Qwen3-VL-Flash-2026-01-22

Qwen3-VL-Flash

Copied!

Try AIAdd to Compare

Visual Understanding

Overview

Visual Understanding

The Qwen3 series of small-sized visual understanding models effectively integrates thinking and non-thinking modes. Compared with the snapshot taken on October 15, 2025, the overall performance of the model has improved significantly: it delivers enhanced capabilities in general visual recognition and reasoning, and shows marked improvements in recognition accuracy across various business scenarios such as security, in-store inspections, equipment monitoring, and photo-based problem solving. This version is a snapshot as of January 22, 2026.

Input

TextImageVideo

Output

Text

Features

Prefix Completion

Function Calling

Cache

Structured Outputs

Batches

Web Search

Pricing

Input
$0.05Per 1M tokens
Output
$0.4Per 1M tokens

Input
$0.05Per 1M tokens
Output
$0.4Per 1M tokens

Context

262.14K

Max Input

258.04K

Max Output

32.76K

Rate Limits

RPMRequests Per Minute
60
TPMTokens Per Minute
100K

API Reference

Get API Key

Copied!

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263

import os
import dashscope
from dashscope import MultiModalConversation
# dashscope.base_http_api_url = "https://dashscope-intl.aliyuncs.com/api/v1"

messages = [
    {
        "role": "user",
        "content": [
            {"image": "https://img.alicdn.com/imgextra/i1/O1CN01gDEY8M1W114Hi3XcN_!!6000000002727-0-tps-1024-406.jpg"},
            {"text": "Solve this problem?"}
        ]
    }
]

response = MultiModalConversation.call(
    # If environment variable is not configured, replace the line below with: api_key="sk-xxx",
    api_key=os.getenv('DASHSCOPE_API_KEY'),
    model="qwen3-vl-flash-2026-01-22",  # Here we use qvq-max as an example; you can change the model name as needed.
    messages=messages,
    stream=True,
    # The enable_thinking parameter enables the reasoning process
    # qwen-vl-plus and qwen3-vl-plus-2025-09-23 support enabling/disabling reasoning via enable_thinking; for qwen3-vl-235b-a22b-thinking, enable_thinking only supports enabling; other Qwen-VL models do not support it
    enable_thinking=True,
    # thinking_budget sets the maximum number of tokens for the reasoning process, applicable only to qwen-vl-plus, qwen3-vl-plus-2025-09-23, and qwen3-vl-235b-a22b-thinking
    thinking_budget=50,

)

# Define complete reasoning process
reasoning_content = ""
# Define complete response
answer_content = ""
# Check if reasoning has ended and response has started
is_answering = False

print("=" * 20 + "Reasoning Process" + "=" * 20)

for chunk in response:
    # If both reasoning and response are empty, skip
    message = chunk.output.choices[0].message
    reasoning_content_chunk = message.get("reasoning_content", None)
    if (chunk.output.choices[0].message.content == [] and
        reasoning_content_chunk == ""):
        pass
    else:
        # If current part is reasoning process
        if reasoning_content_chunk != None and chunk.output.choices[0].message.content == []:
            print(chunk.output.choices[0].message.reasoning_content, end="")
            reasoning_content += chunk.output.choices[0].message.reasoning_content
        # If current part is response
        elif chunk.output.choices[0].message.content != []:
            if not is_answering:
                print("\n" + "=" * 20 + "Complete Response" + "=" * 20)
                is_answering = True
            print(chunk.output.choices[0].message.content[0]["text"], end="")
            answer_content += chunk.output.choices[0].message.content[0]["text"]

# If you need to print the full reasoning process and complete response, uncomment the following lines
# print("=" * 20 + "Full Reasoning Process" + "=" * 20 + "\n")
# print(f"{reasoning_content}")
# print("=" * 20 + "Complete Response" + "=" * 20 + "\n")
# print(f"{answer_content}")

import os
import dashscope
from dashscope import MultiModalConversation
# dashscope.base_http_api_url = "https://dashscope-intl.aliyuncs.com/api/v1"

messages = [
    {
        "role": "user",
        "content": [
            {"image": "https://img.alicdn.com/imgextra/i1/O1CN01gDEY8M1W114Hi3XcN_!!6000000002727-0-tps-1024-406.jpg"},
            {"text": "Solve this problem?"}
        ]
    }
]

response = MultiModalConversation.call(
    # If environment variable is not configured, replace the line below with: api_key="sk-xxx",
    api_key=os.getenv('DASHSCOPE_API_KEY'),
    model="qwen3-vl-flash-2026-01-22",  # Here we use qvq-max as an example; you can change the model name as needed.
    messages=messages,
    stream=True,
    # The enable_thinking parameter enables the reasoning process
    # qwen-vl-plus and qwen3-vl-plus-2025-09-23 support enabling/disabling reasoning via enable_thinking; for qwen3-vl-235b-a22b-thinking, enable_thinking only supports enabling; other Qwen-VL models do not support it
    enable_thinking=True,
    # thinking_budget sets the maximum number of tokens for the reasoning process, applicable only to qwen-vl-plus, qwen3-vl-plus-2025-09-23, and qwen3-vl-235b-a22b-thinking
    thinking_budget=50,

)

# Define complete reasoning process
reasoning_content = ""
# Define complete response
answer_content = ""
# Check if reasoning has ended and response has started
is_answering = False

print("=" * 20 + "Reasoning Process" + "=" * 20)

for chunk in response:
    # If both reasoning and response are empty, skip
    message = chunk.output.choices[0].message
    reasoning_content_chunk = message.get("reasoning_content", None)
    if (chunk.output.choices[0].message.content == [] and
        reasoning_content_chunk == ""):
        pass
    else:
        # If current part is reasoning process
        if reasoning_content_chunk != None and chunk.output.choices[0].message.content == []:
            print(chunk.output.choices[0].message.reasoning_content, end="")
            reasoning_content += chunk.output.choices[0].message.reasoning_content
        # If current part is response
        elif chunk.output.choices[0].message.content != []:
            if not is_answering:
                print("\n" + "=" * 20 + "Complete Response" + "=" * 20)
                is_answering = True
            print(chunk.output.choices[0].message.content[0]["text"], end="")
            answer_content += chunk.output.choices[0].message.content[0]["text"]

# If you need to print the full reasoning process and complete response, uncomment the following lines
# print("=" * 20 + "Full Reasoning Process" + "=" * 20 + "\n")
# print(f"{reasoning_content}")
# print("=" * 20 + "Complete Response" + "=" * 20 + "\n")
# print(f"{answer_content}")