Skip to content

Structured Outputs 结构化输出

什么是 Structured Outputs

Structured Outputs 确保模型生成的输出完全匹配您定义的 JSON Schema,保证输出格式的确定性。这对于需要程序化处理模型输出的应用场景至关重要。

核心特性

特性说明
格式保证输出严格遵循 JSON Schema
类型安全支持 JSON Schema 类型验证
可重复性相同输入产生相同格式输出

使用方法

基本语法

python
response = client.chat.completions.create(
    model="gpt-4o",
    messages=[
        {"role": "user", "content": "提取以下信息:姓名、年龄、职业"}
    ],
    response_format={
        "type": "json_schema",
        "json_schema": {
            "name": "person_info",
            "schema": {
                "type": "object",
                "properties": {
                    "name": {"type": "string"},
                    "age": {"type": "integer"},
                    "occupation": {"type": "string"}
                },
                "required": ["name", "age", "occupation"]
            }
        }
    }
)

JSON Schema 示例

简单对象

json
{
  "type": "json_schema",
  "json_schema": {
    "name": "recipe",
    "schema": {
      "type": "object",
      "properties": {
        "title": {"type": "string"},
        "ingredients": {
          "type": "array",
          "items": {"type": "string"}
        },
        "steps": {"type": "string"}
      },
      "required": ["title", "ingredients", "steps"]
    }
  }
}

嵌套对象

json
{
  "type": "json_schema",
  "json_schema": {
    "name": "company",
    "schema": {
      "type": "object",
      "properties": {
        "name": {"type": "string"},
        "founded": {"type": "integer"},
        "headquarters": {
          "type": "object",
          "properties": {
            "city": {"type": "string"},
            "country": {"type": "string"}
          }
        },
        "employees": {"type": "integer"}
      },
      "required": ["name", "headquarters"]
    }
  }
}

数组类型

json
{
  "type": "json_schema",
  "json_schema": {
    "name": "product_list",
    "schema": {
      "type": "array",
      "items": {
        "type": "object",
        "properties": {
          "id": {"type": "string"},
          "name": {"type": "string"},
          "price": {"type": "number"}
        }
      }
    }
  }
}

完整示例

提取结构化数据

python
from openai import OpenAI
import json

client = OpenAI(
    base_url="https://ai-tokenhub.com/v1",
    api_key="your_api_key"
)

# 定义输出格式
response_format = {
    "type": "json_schema",
    "json_schema": {
        "name": "news_summary",
        "schema": {
            "type": "object",
            "properties": {
                "headline": {"type": "string"},
                "category": {
                    "type": "string",
                    "enum": ["科技", "体育", "娱乐", "财经", "国际"]
                },
                "key_points": {
                    "type": "array",
                    "items": {"type": "string"}
                },
                "sentiment": {
                    "type": "string",
                    "enum": ["正面", "负面", "中性"]
                },
                "word_count": {"type": "integer"}
            },
            "required": ["headline", "category", "key_points", "sentiment"]
        }
    }
}

# 发送请求
news_text = """
在今天的科技发布会上,苹果公司宣布推出新一代 iPhone。
新产品采用了全新的 A19 芯片,性能提升 40%。
分析师认为这将帮助苹果在高端市场保持竞争优势。
"""

response = client.chat.completions.create(
    model="gpt-4o",
    messages=[
        {"role": "user", "content": f"分析以下新闻并提取结构化信息:\n{news_text}"}
    ],
    response_format=response_format
)

# 解析响应
result = json.loads(response.choices[0].message.content)
print(json.dumps(result, indent=2, ensure_ascii=False))

输出示例

json
{
  "headline": "苹果发布新一代 iPhone,搭载 A19 芯片",
  "category": "科技",
  "key_points": [
    "苹果推出新一代 iPhone",
    "采用 A19 芯片,性能提升 40%",
    "分析师看好苹果高端市场前景"
  ],
  "sentiment": "正面",
  "word_count": 58
}

枚举值约束

定义枚举

json
{
  "type": "json_schema",
  "json_schema": {
    "name": "status_report",
    "schema": {
      "type": "object",
      "properties": {
        "status": {
          "type": "string",
          "enum": ["pending", "in_progress", "completed", "failed"]
        },
        "priority": {
          "type": "string",
          "enum": ["low", "medium", "high", "critical"]
        },
        "progress": {
          "type": "number",
          "minimum": 0,
          "maximum": 100
        }
      },
      "required": ["status", "priority"]
    }
  }
}

使用枚举

python
response = client.chat.completions.create(
    model="gpt-4o",
    messages=[
        {"role": "user", "content": "分析项目状态:项目正在进行中,已完成 60%,优先级高"}
    ],
    response_format={
        "type": "json_schema",
        "json_schema": {
            "name": "status_report",
            "schema": {
                "type": "object",
                "properties": {
                    "status": {
                        "type": "string",
                        "enum": ["pending", "in_progress", "completed", "failed"]
                    },
                    "priority": {
                        "type": "string",
                        "enum": ["low", "medium", "high", "critical"]
                    },
                    "progress": {
                        "type": "number",
                        "minimum": 0,
                        "maximum": 100
                    }
                },
                "required": ["status", "priority", "progress"]
            }
        }
    }
)

高级用法

联合类型

json
{
  "type": "json_schema",
  "json_schema": {
    "name": "content_item",
    "schema": {
      "type": "object",
      "properties": {
        "type": {
          "type": "string",
          "enum": ["article", "video", "podcast"]
        },
        "content": {
          "oneOf": [
            {"type": "object", "properties": {"text": {"type": "string"}}},
            {"type": "object", "properties": {"url": {"type": "string"}, "duration": {"type": "integer"}}}
          ]
        }
      }
    }
  }
}

条件必填

json
{
  "type": "json_schema",
  "json_schema": {
    "name": "shipping_info",
    "schema": {
      "type": "object",
      "properties": {
        "method": {
          "type": "string",
          "enum": ["standard", "express", "overnight"]
        },
        "tracking_number": {"type": "string"},
        "express_tracking": {"type": "string"}
      },
      "required": ["method"]
    }
  }
}

错误处理

格式验证失败

python
import json

response = client.chat.completions.create(
    model="gpt-4o",
    messages=[{"role": "user", "content": "生成一个产品信息"}],
    response_format={
        "type": "json_schema",
        "json_schema": {
            "name": "product",
            "schema": {"type": "object", "properties": {"name": {"type": "string"}}}
        }
    }
)

try:
    result = json.loads(response.choices[0].message.content)
    # 验证必填字段
    if "name" not in result:
        raise ValueError("Missing required field: name")
except json.JSONDecodeError as e:
    print(f"JSON 解析错误: {e}")

最佳实践

1. 清晰的 Schema 命名

python
# ✅ 好的命名
"json_schema": {"name": "customer_order", ...}

# ❌ 不清晰的命名
"json_schema": {"name": "data", ...}

2. 添加字段描述

json
{
  "properties": {
    "customer_id": {
      "type": "string",
      "description": "客户唯一标识符,格式:CUST-XXXXX"
    }
  }
}

3. 合理使用必填字段

python
# 只将真正必填的字段设为 required
"required": ["name", "email"]  # 其他字段可选