Qwen3-VL-Flash

Copied!

Try AIAdd to Compare

Visual Understanding

Overview

Visual Understanding

The Qwen3 series of small-scale visual understanding models effectively integrates thinking and non-thinking modes, delivering superior performance compared to the open-source Qwen3-VL-30B-A3B while maintaining fast response speeds. It features a comprehensive upgrade in image/video understanding, supporting ultra-long contexts such as extended videos and documents, spatial awareness, and object recognition across various domains. Equipped with 2D/3D visual localization capabilities, it is well-suited for tackling complex real-world tasks.

Input

TextImageVideo

Output

Text

Features

Prefix Completion

Function Calling

Cache

Structured Outputs

Batches

Web Search

Pricing

Input
$0.05Per 1M tokens
Output
$0.4Per 1M tokens
Input(Implicit Cache)
$0.01Per 1M tokens
Input(Batch File)
$0.025Per 1M tokens
Output(Batch File)
$0.2Per 1M tokens
Explicit Cache Creation
$0.0625Per 1M tokens
Explicit Cache Read
$0.005Per 1M tokens

Input
$0.05Per 1M tokens
Output
$0.4Per 1M tokens
Input(Implicit Cache)
$0.01Per 1M tokens
Input(Batch File)
$0.025Per 1M tokens
Output(Batch File)
$0.2Per 1M tokens
Explicit Cache Creation
$0.0625Per 1M tokens
Explicit Cache Read
$0.005Per 1M tokens

Context

262.14K

Max Input

258.04K

Max Output

32.76K

Rate Limits

RPMRequests Per Minute
1.20K
TPMTokens Per Minute
1M

API Reference

Get API Key

Copied!

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263

import os
import dashscope
from dashscope import MultiModalConversation
# dashscope.base_http_api_url = "https://dashscope-intl.aliyuncs.com/api/v1"

messages = [
    {
        "role": "user",
        "content": [
            {"image": "https://img.alicdn.com/imgextra/i1/O1CN01gDEY8M1W114Hi3XcN_!!6000000002727-0-tps-1024-406.jpg"},
            {"text": "Solve this problem?"}
        ]
    }
]

response = MultiModalConversation.call(
    # If environment variable is not configured, replace the line below with: api_key="sk-xxx",
    api_key=os.getenv('DASHSCOPE_API_KEY'),
    model="qwen3-vl-flash",  # Here we use qvq-max as an example; you can change the model name as needed.
    messages=messages,
    stream=True,
    # The enable_thinking parameter enables the reasoning process
    # qwen-vl-plus and qwen3-vl-plus-2025-09-23 support enabling/disabling reasoning via enable_thinking; for qwen3-vl-235b-a22b-thinking, enable_thinking only supports enabling; other Qwen-VL models do not support it
    enable_thinking=True,
    # thinking_budget sets the maximum number of tokens for the reasoning process, applicable only to qwen-vl-plus, qwen3-vl-plus-2025-09-23, and qwen3-vl-235b-a22b-thinking
    thinking_budget=50,

)

# Define complete reasoning process
reasoning_content = ""
# Define complete response
answer_content = ""
# Check if reasoning has ended and response has started
is_answering = False

print("=" * 20 + "Reasoning Process" + "=" * 20)

for chunk in response:
    # If both reasoning and response are empty, skip
    message = chunk.output.choices[0].message
    reasoning_content_chunk = message.get("reasoning_content", None)
    if (chunk.output.choices[0].message.content == [] and
        reasoning_content_chunk == ""):
        pass
    else:
        # If current part is reasoning process
        if reasoning_content_chunk != None and chunk.output.choices[0].message.content == []:
            print(chunk.output.choices[0].message.reasoning_content, end="")
            reasoning_content += chunk.output.choices[0].message.reasoning_content
        # If current part is response
        elif chunk.output.choices[0].message.content != []:
            if not is_answering:
                print("\n" + "=" * 20 + "Complete Response" + "=" * 20)
                is_answering = True
            print(chunk.output.choices[0].message.content[0]["text"], end="")
            answer_content += chunk.output.choices[0].message.content[0]["text"]

# If you need to print the full reasoning process and complete response, uncomment the following lines
# print("=" * 20 + "Full Reasoning Process" + "=" * 20 + "\n")
# print(f"{reasoning_content}")
# print("=" * 20 + "Complete Response" + "=" * 20 + "\n")
# print(f"{answer_content}")

import os
import dashscope
from dashscope import MultiModalConversation
# dashscope.base_http_api_url = "https://dashscope-intl.aliyuncs.com/api/v1"

messages = [
    {
        "role": "user",
        "content": [
            {"image": "https://img.alicdn.com/imgextra/i1/O1CN01gDEY8M1W114Hi3XcN_!!6000000002727-0-tps-1024-406.jpg"},
            {"text": "Solve this problem?"}
        ]
    }
]

response = MultiModalConversation.call(
    # If environment variable is not configured, replace the line below with: api_key="sk-xxx",
    api_key=os.getenv('DASHSCOPE_API_KEY'),
    model="qwen3-vl-flash",  # Here we use qvq-max as an example; you can change the model name as needed.
    messages=messages,
    stream=True,
    # The enable_thinking parameter enables the reasoning process
    # qwen-vl-plus and qwen3-vl-plus-2025-09-23 support enabling/disabling reasoning via enable_thinking; for qwen3-vl-235b-a22b-thinking, enable_thinking only supports enabling; other Qwen-VL models do not support it
    enable_thinking=True,
    # thinking_budget sets the maximum number of tokens for the reasoning process, applicable only to qwen-vl-plus, qwen3-vl-plus-2025-09-23, and qwen3-vl-235b-a22b-thinking
    thinking_budget=50,

)

# Define complete reasoning process
reasoning_content = ""
# Define complete response
answer_content = ""
# Check if reasoning has ended and response has started
is_answering = False

print("=" * 20 + "Reasoning Process" + "=" * 20)

for chunk in response:
    # If both reasoning and response are empty, skip
    message = chunk.output.choices[0].message
    reasoning_content_chunk = message.get("reasoning_content", None)
    if (chunk.output.choices[0].message.content == [] and
        reasoning_content_chunk == ""):
        pass
    else:
        # If current part is reasoning process
        if reasoning_content_chunk != None and chunk.output.choices[0].message.content == []:
            print(chunk.output.choices[0].message.reasoning_content, end="")
            reasoning_content += chunk.output.choices[0].message.reasoning_content
        # If current part is response
        elif chunk.output.choices[0].message.content != []:
            if not is_answering:
                print("\n" + "=" * 20 + "Complete Response" + "=" * 20)
                is_answering = True
            print(chunk.output.choices[0].message.content[0]["text"], end="")
            answer_content += chunk.output.choices[0].message.content[0]["text"]

# If you need to print the full reasoning process and complete response, uncomment the following lines
# print("=" * 20 + "Full Reasoning Process" + "=" * 20 + "\n")
# print(f"{reasoning_content}")
# print("=" * 20 + "Complete Response" + "=" * 20 + "\n")
# print(f"{answer_content}")