QVQ-Max

Copied!

Try AIAdd to Compare

ReasoningVisual Understanding

Overview

ReasoningVisual Understanding

The Tongyi Qianwen QVQ visual reasoning model supports visual input and chain-of-thought output, demonstrating stronger capabilities in mathematics, programming, visual analysis, creation, and general tasks.

Input

TextImageVideo

Output

Text

Features

Prefix Completion

Function Calling

Cache

Structured Outputs

Batches

Web Search

Pricing

Input
$1.2Per 1M tokens
Output
$4.8Per 1M tokens

Context

131.07K

Max Input

106.49K

Max Output

8.19K

Rate Limits

RPMRequests Per Minute
60
TPMTokens Per Minute
100K

API Reference

Get API Key

Copied!

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657

import os
import dashscope
from dashscope import MultiModalConversation
dashscope.base_http_api_url = 'https://dashscope-intl.aliyuncs.com/api/v1'
messages = [
    {
        "role": "user",
        "content": [
            {"image": "https://img.alicdn.com/imgextra/i1/O1CN01gDEY8M1W114Hi3XcN_!!6000000002727-0-tps-1024-406.jpg"},
            {"text": "Solve this problem"}
        ]
    }
]

response = MultiModalConversation.call(
    # If you use a model in the China (Beijing) region, you must use an API key from that region. Get the key at https://bailian.console.alibabacloud.com/?tab=model#/api-key
    # If the environment variable is not set, replace it with your Model Studio API key: api_key="sk-xxx",
    api_key=os.getenv('DASHSCOPE_API_KEY'),
    model="qvq-max",  # qvq-max is used as an example. You can replace it with another model name as needed
    messages=messages,
    stream=True,
)

# Define full thinking process
reasoning_content = ""
# Define full response
answer_content = ""
# Determine whether the chain-of-thought has ended and the response has begun
is_answering = False

print("=" * 20 + "Thinking process" + "=" * 20)

for chunk in response:
    # If both the thinking process and the response is empty, ignore this
    message = chunk.output.choices[0].message
    reasoning_content_chunk = message.get("reasoning_content", None)
    if (chunk.output.choices[0].message.content == [] and
        reasoning_content_chunk == ""):
        pass
    else:
        # It this is thinking process
        if reasoning_content_chunk != None and chunk.output.choices[0].message.content == []:
            print(chunk.output.choices[0].message.reasoning_content, end="")
            reasoning_content += chunk.output.choices[0].message.reasoning_content
        # It this is response
        elif chunk.output.choices[0].message.content != []:
            if not is_answering:
                print("\n" + "=" * 20 + "Full response" + "=" * 20)
                is_answering = True
            print(chunk.output.choices[0].message.content[0]["text"], end="")
            answer_content += chunk.output.choices[0].message.content[0]["text"]

# To print the full thinking process and response, uncomment and run the following
# print("=" * 20 + "Thinking process" + "=" * 20 + "\n")
# print(f"{reasoning_content}")
# print("=" * 20 + "Full response" + "=" * 20 + "\n")
# print(f"{answer_content}")

import os
import dashscope
from dashscope import MultiModalConversation
dashscope.base_http_api_url = 'https://dashscope-intl.aliyuncs.com/api/v1'
messages = [
    {
        "role": "user",
        "content": [
            {"image": "https://img.alicdn.com/imgextra/i1/O1CN01gDEY8M1W114Hi3XcN_!!6000000002727-0-tps-1024-406.jpg"},
            {"text": "Solve this problem"}
        ]
    }
]

response = MultiModalConversation.call(
    # If you use a model in the China (Beijing) region, you must use an API key from that region. Get the key at https://bailian.console.alibabacloud.com/?tab=model#/api-key
    # If the environment variable is not set, replace it with your Model Studio API key: api_key="sk-xxx",
    api_key=os.getenv('DASHSCOPE_API_KEY'),
    model="qvq-max",  # qvq-max is used as an example. You can replace it with another model name as needed
    messages=messages,
    stream=True,
)

# Define full thinking process
reasoning_content = ""
# Define full response
answer_content = ""
# Determine whether the chain-of-thought has ended and the response has begun
is_answering = False

print("=" * 20 + "Thinking process" + "=" * 20)

for chunk in response:
    # If both the thinking process and the response is empty, ignore this
    message = chunk.output.choices[0].message
    reasoning_content_chunk = message.get("reasoning_content", None)
    if (chunk.output.choices[0].message.content == [] and
        reasoning_content_chunk == ""):
        pass
    else:
        # It this is thinking process
        if reasoning_content_chunk != None and chunk.output.choices[0].message.content == []:
            print(chunk.output.choices[0].message.reasoning_content, end="")
            reasoning_content += chunk.output.choices[0].message.reasoning_content
        # It this is response
        elif chunk.output.choices[0].message.content != []:
            if not is_answering:
                print("\n" + "=" * 20 + "Full response" + "=" * 20)
                is_answering = True
            print(chunk.output.choices[0].message.content[0]["text"], end="")
            answer_content += chunk.output.choices[0].message.content[0]["text"]

# To print the full thinking process and response, uncomment and run the following
# print("=" * 20 + "Thinking process" + "=" * 20 + "\n")
# print(f"{reasoning_content}")
# print("=" * 20 + "Full response" + "=" * 20 + "\n")
# print(f"{answer_content}")