跳转到内容

pydantic_ai.models

与向 LLM 发出请求相关的逻辑。

这里的目标是为不同的 LLM 创建一个通用接口,以便其余代码可以不关心正在使用的具体 LLM。

KnownModelName 模块属性

KnownModelName = TypeAliasType(
    "KnownModelName",
    Literal[
        "anthropic:claude-3-5-haiku-20241022",
        "anthropic:claude-3-5-haiku-latest",
        "anthropic:claude-3-5-sonnet-20240620",
        "anthropic:claude-3-5-sonnet-20241022",
        "anthropic:claude-3-5-sonnet-latest",
        "anthropic:claude-3-7-sonnet-20250219",
        "anthropic:claude-3-7-sonnet-latest",
        "anthropic:claude-3-haiku-20240307",
        "anthropic:claude-3-opus-20240229",
        "anthropic:claude-3-opus-latest",
        "anthropic:claude-4-opus-20250514",
        "anthropic:claude-4-sonnet-20250514",
        "anthropic:claude-opus-4-0",
        "anthropic:claude-opus-4-1-20250805",
        "anthropic:claude-opus-4-20250514",
        "anthropic:claude-sonnet-4-0",
        "anthropic:claude-sonnet-4-20250514",
        "bedrock:amazon.titan-tg1-large",
        "bedrock:amazon.titan-text-lite-v1",
        "bedrock:amazon.titan-text-express-v1",
        "bedrock:us.amazon.nova-pro-v1:0",
        "bedrock:us.amazon.nova-lite-v1:0",
        "bedrock:us.amazon.nova-micro-v1:0",
        "bedrock:anthropic.claude-3-5-sonnet-20241022-v2:0",
        "bedrock:us.anthropic.claude-3-5-sonnet-20241022-v2:0",
        "bedrock:anthropic.claude-3-5-haiku-20241022-v1:0",
        "bedrock:us.anthropic.claude-3-5-haiku-20241022-v1:0",
        "bedrock:anthropic.claude-instant-v1",
        "bedrock:anthropic.claude-v2:1",
        "bedrock:anthropic.claude-v2",
        "bedrock:anthropic.claude-3-sonnet-20240229-v1:0",
        "bedrock:us.anthropic.claude-3-sonnet-20240229-v1:0",
        "bedrock:anthropic.claude-3-haiku-20240307-v1:0",
        "bedrock:us.anthropic.claude-3-haiku-20240307-v1:0",
        "bedrock:anthropic.claude-3-opus-20240229-v1:0",
        "bedrock:us.anthropic.claude-3-opus-20240229-v1:0",
        "bedrock:anthropic.claude-3-5-sonnet-20240620-v1:0",
        "bedrock:us.anthropic.claude-3-5-sonnet-20240620-v1:0",
        "bedrock:anthropic.claude-3-7-sonnet-20250219-v1:0",
        "bedrock:us.anthropic.claude-3-7-sonnet-20250219-v1:0",
        "bedrock:anthropic.claude-opus-4-20250514-v1:0",
        "bedrock:us.anthropic.claude-opus-4-20250514-v1:0",
        "bedrock:anthropic.claude-sonnet-4-20250514-v1:0",
        "bedrock:us.anthropic.claude-sonnet-4-20250514-v1:0",
        "bedrock:cohere.command-text-v14",
        "bedrock:cohere.command-r-v1:0",
        "bedrock:cohere.command-r-plus-v1:0",
        "bedrock:cohere.command-light-text-v14",
        "bedrock:meta.llama3-8b-instruct-v1:0",
        "bedrock:meta.llama3-70b-instruct-v1:0",
        "bedrock:meta.llama3-1-8b-instruct-v1:0",
        "bedrock:us.meta.llama3-1-8b-instruct-v1:0",
        "bedrock:meta.llama3-1-70b-instruct-v1:0",
        "bedrock:us.meta.llama3-1-70b-instruct-v1:0",
        "bedrock:meta.llama3-1-405b-instruct-v1:0",
        "bedrock:us.meta.llama3-2-11b-instruct-v1:0",
        "bedrock:us.meta.llama3-2-90b-instruct-v1:0",
        "bedrock:us.meta.llama3-2-1b-instruct-v1:0",
        "bedrock:us.meta.llama3-2-3b-instruct-v1:0",
        "bedrock:us.meta.llama3-3-70b-instruct-v1:0",
        "bedrock:mistral.mistral-7b-instruct-v0:2",
        "bedrock:mistral.mixtral-8x7b-instruct-v0:1",
        "bedrock:mistral.mistral-large-2402-v1:0",
        "bedrock:mistral.mistral-large-2407-v1:0",
        "cerebras:gpt-oss-120b",
        "cerebras:llama3.1-8b",
        "cerebras:llama-3.3-70b",
        "cerebras:llama-4-scout-17b-16e-instruct",
        "cerebras:llama-4-maverick-17b-128e-instruct",
        "cerebras:qwen-3-235b-a22b-instruct-2507",
        "cerebras:qwen-3-32b",
        "cerebras:qwen-3-coder-480b",
        "cerebras:qwen-3-235b-a22b-thinking-2507",
        "claude-3-5-haiku-20241022",
        "claude-3-5-haiku-latest",
        "claude-3-5-sonnet-20240620",
        "claude-3-5-sonnet-20241022",
        "claude-3-5-sonnet-latest",
        "claude-3-7-sonnet-20250219",
        "claude-3-7-sonnet-latest",
        "claude-3-haiku-20240307",
        "claude-3-opus-20240229",
        "claude-3-opus-latest",
        "claude-4-opus-20250514",
        "claude-4-sonnet-20250514",
        "claude-opus-4-0",
        "claude-opus-4-1-20250805",
        "claude-opus-4-20250514",
        "claude-sonnet-4-0",
        "claude-sonnet-4-20250514",
        "cohere:c4ai-aya-expanse-32b",
        "cohere:c4ai-aya-expanse-8b",
        "cohere:command",
        "cohere:command-light",
        "cohere:command-light-nightly",
        "cohere:command-nightly",
        "cohere:command-r",
        "cohere:command-r-03-2024",
        "cohere:command-r-08-2024",
        "cohere:command-r-plus",
        "cohere:command-r-plus-04-2024",
        "cohere:command-r-plus-08-2024",
        "cohere:command-r7b-12-2024",
        "deepseek:deepseek-chat",
        "deepseek:deepseek-reasoner",
        "google-gla:gemini-2.0-flash",
        "google-gla:gemini-2.0-flash-lite",
        "google-gla:gemini-2.5-flash",
        "google-gla:gemini-2.5-flash-lite",
        "google-gla:gemini-2.5-pro",
        "google-vertex:gemini-2.0-flash",
        "google-vertex:gemini-2.0-flash-lite",
        "google-vertex:gemini-2.5-flash",
        "google-vertex:gemini-2.5-flash-lite",
        "google-vertex:gemini-2.5-pro",
        "gpt-3.5-turbo",
        "gpt-3.5-turbo-0125",
        "gpt-3.5-turbo-0301",
        "gpt-3.5-turbo-0613",
        "gpt-3.5-turbo-1106",
        "gpt-3.5-turbo-16k",
        "gpt-3.5-turbo-16k-0613",
        "gpt-4",
        "gpt-4-0125-preview",
        "gpt-4-0314",
        "gpt-4-0613",
        "gpt-4-1106-preview",
        "gpt-4-32k",
        "gpt-4-32k-0314",
        "gpt-4-32k-0613",
        "gpt-4-turbo",
        "gpt-4-turbo-2024-04-09",
        "gpt-4-turbo-preview",
        "gpt-4-vision-preview",
        "gpt-4.1",
        "gpt-4.1-2025-04-14",
        "gpt-4.1-mini",
        "gpt-4.1-mini-2025-04-14",
        "gpt-4.1-nano",
        "gpt-4.1-nano-2025-04-14",
        "gpt-4o",
        "gpt-4o-2024-05-13",
        "gpt-4o-2024-08-06",
        "gpt-4o-2024-11-20",
        "gpt-4o-audio-preview",
        "gpt-4o-audio-preview-2024-10-01",
        "gpt-4o-audio-preview-2024-12-17",
        "gpt-4o-audio-preview-2025-06-03",
        "gpt-4o-mini",
        "gpt-4o-mini-2024-07-18",
        "gpt-4o-mini-audio-preview",
        "gpt-4o-mini-audio-preview-2024-12-17",
        "gpt-4o-mini-search-preview",
        "gpt-4o-mini-search-preview-2025-03-11",
        "gpt-4o-search-preview",
        "gpt-4o-search-preview-2025-03-11",
        "gpt-5",
        "gpt-5-2025-08-07",
        "gpt-5-chat-latest",
        "gpt-5-mini",
        "gpt-5-mini-2025-08-07",
        "gpt-5-nano",
        "gpt-5-nano-2025-08-07",
        "grok:grok-4",
        "grok:grok-4-0709",
        "grok:grok-3",
        "grok:grok-3-mini",
        "grok:grok-3-fast",
        "grok:grok-3-mini-fast",
        "grok:grok-2-vision-1212",
        "grok:grok-2-image-1212",
        "groq:distil-whisper-large-v3-en",
        "groq:gemma2-9b-it",
        "groq:llama-3.3-70b-versatile",
        "groq:llama-3.1-8b-instant",
        "groq:llama-guard-3-8b",
        "groq:llama3-70b-8192",
        "groq:llama3-8b-8192",
        "groq:moonshotai/kimi-k2-instruct",
        "groq:whisper-large-v3",
        "groq:whisper-large-v3-turbo",
        "groq:playai-tts",
        "groq:playai-tts-arabic",
        "groq:qwen-qwq-32b",
        "groq:mistral-saba-24b",
        "groq:qwen-2.5-coder-32b",
        "groq:qwen-2.5-32b",
        "groq:deepseek-r1-distill-qwen-32b",
        "groq:deepseek-r1-distill-llama-70b",
        "groq:llama-3.3-70b-specdec",
        "groq:llama-3.2-1b-preview",
        "groq:llama-3.2-3b-preview",
        "groq:llama-3.2-11b-vision-preview",
        "groq:llama-3.2-90b-vision-preview",
        "heroku:claude-3-5-haiku",
        "heroku:claude-3-5-sonnet-latest",
        "heroku:claude-3-7-sonnet",
        "heroku:claude-4-sonnet",
        "heroku:claude-3-haiku",
        "heroku:gpt-oss-120b",
        "heroku:nova-lite",
        "heroku:nova-pro",
        "huggingface:Qwen/QwQ-32B",
        "huggingface:Qwen/Qwen2.5-72B-Instruct",
        "huggingface:Qwen/Qwen3-235B-A22B",
        "huggingface:Qwen/Qwen3-32B",
        "huggingface:deepseek-ai/DeepSeek-R1",
        "huggingface:meta-llama/Llama-3.3-70B-Instruct",
        "huggingface:meta-llama/Llama-4-Maverick-17B-128E-Instruct",
        "huggingface:meta-llama/Llama-4-Scout-17B-16E-Instruct",
        "mistral:codestral-latest",
        "mistral:mistral-large-latest",
        "mistral:mistral-moderation-latest",
        "mistral:mistral-small-latest",
        "moonshotai:moonshot-v1-8k",
        "moonshotai:moonshot-v1-32k",
        "moonshotai:moonshot-v1-128k",
        "moonshotai:moonshot-v1-8k-vision-preview",
        "moonshotai:moonshot-v1-32k-vision-preview",
        "moonshotai:moonshot-v1-128k-vision-preview",
        "moonshotai:kimi-latest",
        "moonshotai:kimi-thinking-preview",
        "moonshotai:kimi-k2-0711-preview",
        "o1",
        "o1-2024-12-17",
        "o1-mini",
        "o1-mini-2024-09-12",
        "o1-preview",
        "o1-preview-2024-09-12",
        "o1-pro",
        "o1-pro-2025-03-19",
        "o3",
        "o3-2025-04-16",
        "o3-deep-research",
        "o3-deep-research-2025-06-26",
        "o3-mini",
        "o3-mini-2025-01-31",
        "o3-pro",
        "o3-pro-2025-06-10",
        "openai:chatgpt-4o-latest",
        "openai:codex-mini-latest",
        "openai:gpt-3.5-turbo",
        "openai:gpt-3.5-turbo-0125",
        "openai:gpt-3.5-turbo-0301",
        "openai:gpt-3.5-turbo-0613",
        "openai:gpt-3.5-turbo-1106",
        "openai:gpt-3.5-turbo-16k",
        "openai:gpt-3.5-turbo-16k-0613",
        "openai:gpt-4",
        "openai:gpt-4-0125-preview",
        "openai:gpt-4-0314",
        "openai:gpt-4-0613",
        "openai:gpt-4-1106-preview",
        "openai:gpt-4-32k",
        "openai:gpt-4-32k-0314",
        "openai:gpt-4-32k-0613",
        "openai:gpt-4-turbo",
        "openai:gpt-4-turbo-2024-04-09",
        "openai:gpt-4-turbo-preview",
        "openai:gpt-4-vision-preview",
        "openai:gpt-4.1",
        "openai:gpt-4.1-2025-04-14",
        "openai:gpt-4.1-mini",
        "openai:gpt-4.1-mini-2025-04-14",
        "openai:gpt-4.1-nano",
        "openai:gpt-4.1-nano-2025-04-14",
        "openai:gpt-4o",
        "openai:gpt-4o-2024-05-13",
        "openai:gpt-4o-2024-08-06",
        "openai:gpt-4o-2024-11-20",
        "openai:gpt-4o-audio-preview",
        "openai:gpt-4o-audio-preview-2024-10-01",
        "openai:gpt-4o-audio-preview-2024-12-17",
        "openai:gpt-4o-audio-preview-2025-06-03",
        "openai:gpt-4o-mini",
        "openai:gpt-4o-mini-2024-07-18",
        "openai:gpt-4o-mini-audio-preview",
        "openai:gpt-4o-mini-audio-preview-2024-12-17",
        "openai:gpt-4o-mini-search-preview",
        "openai:gpt-4o-mini-search-preview-2025-03-11",
        "openai:gpt-4o-search-preview",
        "openai:gpt-4o-search-preview-2025-03-11",
        "openai:gpt-5",
        "openai:gpt-5-2025-08-07",
        "openai:o1",
        "openai:gpt-5-chat-latest",
        "openai:o1-2024-12-17",
        "openai:gpt-5-mini",
        "openai:o1-mini",
        "openai:gpt-5-mini-2025-08-07",
        "openai:o1-mini-2024-09-12",
        "openai:gpt-5-nano",
        "openai:o1-preview",
        "openai:gpt-5-nano-2025-08-07",
        "openai:o1-preview-2024-09-12",
        "openai:o1-pro",
        "openai:o1-pro-2025-03-19",
        "openai:o3",
        "openai:o3-2025-04-16",
        "openai:o3-deep-research",
        "openai:o3-deep-research-2025-06-26",
        "openai:o3-mini",
        "openai:o3-mini-2025-01-31",
        "openai:o4-mini",
        "openai:o4-mini-2025-04-16",
        "openai:o4-mini-deep-research",
        "openai:o4-mini-deep-research-2025-06-26",
        "openai:o3-pro",
        "openai:o3-pro-2025-06-10",
        "openai:computer-use-preview",
        "openai:computer-use-preview-2025-03-11",
        "test",
    ],
)

可用于 Agentmodel 参数的已知模型名称。

提供 KnownModelName 是为了以一种简洁的方式指定模型。

ModelRequestParameters 数据类

代理向模型发起请求的配置,特别与工具和输出处理相关。

源代码位于 pydantic_ai_slim/pydantic_ai/models/__init__.py
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
@dataclass(repr=False, kw_only=True)
class ModelRequestParameters:
    """Configuration for an agent's request to a model, specifically related to tools and output handling."""

    function_tools: list[ToolDefinition] = field(default_factory=list)
    builtin_tools: list[AbstractBuiltinTool] = field(default_factory=list)

    output_mode: OutputMode = 'text'
    output_object: OutputObjectDefinition | None = None
    output_tools: list[ToolDefinition] = field(default_factory=list)
    allow_text_output: bool = True

    @cached_property
    def tool_defs(self) -> dict[str, ToolDefinition]:
        return {tool_def.name: tool_def for tool_def in [*self.function_tools, *self.output_tools]}

    __repr__ = _utils.dataclasses_no_defaults_repr

模型

基类:ABC

模型的抽象类。

源代码位于 pydantic_ai_slim/pydantic_ai/models/__init__.py
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
class Model(ABC):
    """Abstract class for a model."""

    _profile: ModelProfileSpec | None = None
    _settings: ModelSettings | None = None

    def __init__(
        self,
        *,
        settings: ModelSettings | None = None,
        profile: ModelProfileSpec | None = None,
    ) -> None:
        """Initialize the model with optional settings and profile.

        Args:
            settings: Model-specific settings that will be used as defaults for this model.
            profile: The model profile to use.
        """
        self._settings = settings
        self._profile = profile

    @property
    def settings(self) -> ModelSettings | None:
        """Get the model settings."""
        return self._settings

    @abstractmethod
    async def request(
        self,
        messages: list[ModelMessage],
        model_settings: ModelSettings | None,
        model_request_parameters: ModelRequestParameters,
    ) -> ModelResponse:
        """Make a request to the model."""
        raise NotImplementedError()

    async def count_tokens(
        self,
        messages: list[ModelMessage],
        model_settings: ModelSettings | None,
        model_request_parameters: ModelRequestParameters,
    ) -> RequestUsage:
        """Make a request to the model for counting tokens."""
        # This method is not required, but you need to implement it if you want to support `UsageLimits.count_tokens_before_request`.
        raise NotImplementedError(f'Token counting ahead of the request is not supported by {self.__class__.__name__}')

    @asynccontextmanager
    async def request_stream(
        self,
        messages: list[ModelMessage],
        model_settings: ModelSettings | None,
        model_request_parameters: ModelRequestParameters,
        run_context: RunContext[Any] | None = None,
    ) -> AsyncIterator[StreamedResponse]:
        """Make a request to the model and return a streaming response."""
        # This method is not required, but you need to implement it if you want to support streamed responses
        raise NotImplementedError(f'Streamed requests not supported by this {self.__class__.__name__}')
        # yield is required to make this a generator for type checking
        # noinspection PyUnreachableCode
        yield  # pragma: no cover

    def customize_request_parameters(self, model_request_parameters: ModelRequestParameters) -> ModelRequestParameters:
        """Customize the request parameters for the model.

        This method can be overridden by subclasses to modify the request parameters before sending them to the model.
        In particular, this method can be used to make modifications to the generated tool JSON schemas if necessary
        for vendor/model-specific reasons.
        """
        if transformer := self.profile.json_schema_transformer:
            model_request_parameters = replace(
                model_request_parameters,
                function_tools=[_customize_tool_def(transformer, t) for t in model_request_parameters.function_tools],
                output_tools=[_customize_tool_def(transformer, t) for t in model_request_parameters.output_tools],
            )
            if output_object := model_request_parameters.output_object:
                model_request_parameters = replace(
                    model_request_parameters,
                    output_object=_customize_output_object(transformer, output_object),
                )

        return model_request_parameters

    @property
    @abstractmethod
    def model_name(self) -> str:
        """The model name."""
        raise NotImplementedError()

    @cached_property
    def profile(self) -> ModelProfile:
        """The model profile."""
        _profile = self._profile
        if callable(_profile):
            _profile = _profile(self.model_name)

        if _profile is None:
            return DEFAULT_PROFILE

        return _profile

    @property
    @abstractmethod
    def system(self) -> str:
        """The model provider, ex: openai.

        Use to populate the `gen_ai.system` OpenTelemetry semantic convention attribute,
        so should use well-known values listed in
        https://opentelemetry.io/docs/specs/semconv/attributes-registry/gen-ai/#gen-ai-system
        when applicable.
        """
        raise NotImplementedError()

    @property
    def base_url(self) -> str | None:
        """The base URL for the provider API, if available."""
        return None

    @staticmethod
    def _get_instructions(messages: list[ModelMessage]) -> str | None:
        """Get instructions from the first ModelRequest found when iterating messages in reverse.

        In the case that a "mock" request was generated to include a tool-return part for a result tool,
        we want to use the instructions from the second-to-most-recent request (which should correspond to the
        original request that generated the response that resulted in the tool-return part).
        """
        last_two_requests: list[ModelRequest] = []
        for message in reversed(messages):
            if isinstance(message, ModelRequest):
                last_two_requests.append(message)
                if len(last_two_requests) == 2:
                    break
                if message.instructions is not None:
                    return message.instructions

        # If we don't have two requests, and we didn't already return instructions, there are definitely not any:
        if len(last_two_requests) != 2:
            return None

        most_recent_request = last_two_requests[0]
        second_most_recent_request = last_two_requests[1]

        # If we've gotten this far and the most recent request consists of only tool-return parts or retry-prompt parts,
        # we use the instructions from the second-to-most-recent request. This is necessary because when handling
        # result tools, we generate a "mock" ModelRequest with a tool-return part for it, and that ModelRequest will not
        # have the relevant instructions from the agent.

        # While it's possible that you could have a message history where the most recent request has only tool returns,
        # I believe there is no way to achieve that would _change_ the instructions without manually crafting the most
        # recent message. That might make sense in principle for some usage pattern, but it's enough of an edge case
        # that I think it's not worth worrying about, since you can work around this by inserting another ModelRequest
        # with no parts at all immediately before the request that has the tool calls (that works because we only look
        # at the two most recent ModelRequests here).

        # If you have a use case where this causes pain, please open a GitHub issue and we can discuss alternatives.

        if all(p.part_kind == 'tool-return' or p.part_kind == 'retry-prompt' for p in most_recent_request.parts):
            return second_most_recent_request.instructions

        return None

__init__

__init__(
    *,
    settings: ModelSettings | None = None,
    profile: ModelProfileSpec | None = None
) -> None

使用可选的设置和配置文件初始化模型。

参数

名称 类型 描述 默认值
settings ModelSettings | None

将用作此模型默认值的特定于模型的设置。

None
profile ModelProfileSpec | None

要使用的模型配置文件。

None
源代码位于 pydantic_ai_slim/pydantic_ai/models/__init__.py
395
396
397
398
399
400
401
402
403
404
405
406
407
408
def __init__(
    self,
    *,
    settings: ModelSettings | None = None,
    profile: ModelProfileSpec | None = None,
) -> None:
    """Initialize the model with optional settings and profile.

    Args:
        settings: Model-specific settings that will be used as defaults for this model.
        profile: The model profile to use.
    """
    self._settings = settings
    self._profile = profile

settings 属性

settings: ModelSettings | None

获取模型设置。

request 抽象方法 异步

request(
    messages: list[ModelMessage],
    model_settings: ModelSettings | None,
    model_request_parameters: ModelRequestParameters,
) -> ModelResponse

向模型发出请求。

源代码位于 pydantic_ai_slim/pydantic_ai/models/__init__.py
415
416
417
418
419
420
421
422
423
@abstractmethod
async def request(
    self,
    messages: list[ModelMessage],
    model_settings: ModelSettings | None,
    model_request_parameters: ModelRequestParameters,
) -> ModelResponse:
    """Make a request to the model."""
    raise NotImplementedError()

count_tokens 异步

count_tokens(
    messages: list[ModelMessage],
    model_settings: ModelSettings | None,
    model_request_parameters: ModelRequestParameters,
) -> RequestUsage

向模型发出请求以计算令牌数。

源代码位于 pydantic_ai_slim/pydantic_ai/models/__init__.py
425
426
427
428
429
430
431
432
433
async def count_tokens(
    self,
    messages: list[ModelMessage],
    model_settings: ModelSettings | None,
    model_request_parameters: ModelRequestParameters,
) -> RequestUsage:
    """Make a request to the model for counting tokens."""
    # This method is not required, but you need to implement it if you want to support `UsageLimits.count_tokens_before_request`.
    raise NotImplementedError(f'Token counting ahead of the request is not supported by {self.__class__.__name__}')

request_stream async

request_stream(
    messages: list[ModelMessage],
    model_settings: ModelSettings | None,
    model_request_parameters: ModelRequestParameters,
    run_context: RunContext[Any] | None = None,
) -> AsyncIterator[StreamedResponse]

向模型发出请求并返回一个流式响应。

源代码位于 pydantic_ai_slim/pydantic_ai/models/__init__.py
435
436
437
438
439
440
441
442
443
444
445
446
447
448
@asynccontextmanager
async def request_stream(
    self,
    messages: list[ModelMessage],
    model_settings: ModelSettings | None,
    model_request_parameters: ModelRequestParameters,
    run_context: RunContext[Any] | None = None,
) -> AsyncIterator[StreamedResponse]:
    """Make a request to the model and return a streaming response."""
    # This method is not required, but you need to implement it if you want to support streamed responses
    raise NotImplementedError(f'Streamed requests not supported by this {self.__class__.__name__}')
    # yield is required to make this a generator for type checking
    # noinspection PyUnreachableCode
    yield  # pragma: no cover

自定义请求参数

customize_request_parameters(
    model_request_parameters: ModelRequestParameters,
) -> ModelRequestParameters

自定义模型的请求参数。

子类可以重写此方法,以便在将请求参数发送到模型之前对其进行修改。特别是,当出于供应商或模型特定原因需要时,此方法可用于修改生成的工具 JSON 模式。

源代码位于 pydantic_ai_slim/pydantic_ai/models/__init__.py
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
def customize_request_parameters(self, model_request_parameters: ModelRequestParameters) -> ModelRequestParameters:
    """Customize the request parameters for the model.

    This method can be overridden by subclasses to modify the request parameters before sending them to the model.
    In particular, this method can be used to make modifications to the generated tool JSON schemas if necessary
    for vendor/model-specific reasons.
    """
    if transformer := self.profile.json_schema_transformer:
        model_request_parameters = replace(
            model_request_parameters,
            function_tools=[_customize_tool_def(transformer, t) for t in model_request_parameters.function_tools],
            output_tools=[_customize_tool_def(transformer, t) for t in model_request_parameters.output_tools],
        )
        if output_object := model_request_parameters.output_object:
            model_request_parameters = replace(
                model_request_parameters,
                output_object=_customize_output_object(transformer, output_object),
            )

    return model_request_parameters

model_name 抽象方法 属性

model_name: str

模型名称。

profile 缓存 属性

profile: ModelProfile

模型配置文件。

system 抽象方法 属性

system: str

模型提供商,例如:openai。

用于填充 gen_ai.system OpenTelemetry 语义约定属性,因此在适用时应使用 https://opentelemetry.io/docs/specs/semconv/attributes-registry/gen-ai/#gen-ai-system 中列出的知名值。

base_url 属性

base_url: str | None

提供商 API 的基础 URL(如果可用)。

StreamedResponse 数据类

基类:ABC

调用工具时来自 LLM 的流式响应。

源代码位于 pydantic_ai_slim/pydantic_ai/models/__init__.py
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
@dataclass
class StreamedResponse(ABC):
    """Streamed response from an LLM when calling a tool."""

    model_request_parameters: ModelRequestParameters

    final_result_event: FinalResultEvent | None = field(default=None, init=False)

    _parts_manager: ModelResponsePartsManager = field(default_factory=ModelResponsePartsManager, init=False)
    _event_iterator: AsyncIterator[ModelResponseStreamEvent] | None = field(default=None, init=False)
    _usage: RequestUsage = field(default_factory=RequestUsage, init=False)

    def __aiter__(self) -> AsyncIterator[ModelResponseStreamEvent]:
        """Stream the response as an async iterable of [`ModelResponseStreamEvent`][pydantic_ai.messages.ModelResponseStreamEvent]s.

        This proxies the `_event_iterator()` and emits all events, while also checking for matches
        on the result schema and emitting a [`FinalResultEvent`][pydantic_ai.messages.FinalResultEvent] if/when the
        first match is found.
        """
        if self._event_iterator is None:

            async def iterator_with_final_event(
                iterator: AsyncIterator[ModelResponseStreamEvent],
            ) -> AsyncIterator[ModelResponseStreamEvent]:
                async for event in iterator:
                    yield event
                    if (
                        final_result_event := _get_final_result_event(event, self.model_request_parameters)
                    ) is not None:
                        self.final_result_event = final_result_event
                        yield final_result_event
                        break

                # If we broke out of the above loop, we need to yield the rest of the events
                # If we didn't, this will just be a no-op
                async for event in iterator:
                    yield event

            self._event_iterator = iterator_with_final_event(self._get_event_iterator())
        return self._event_iterator

    @abstractmethod
    async def _get_event_iterator(self) -> AsyncIterator[ModelResponseStreamEvent]:
        """Return an async iterator of [`ModelResponseStreamEvent`][pydantic_ai.messages.ModelResponseStreamEvent]s.

        This method should be implemented by subclasses to translate the vendor-specific stream of events into
        pydantic_ai-format events.

        It should use the `_parts_manager` to handle deltas, and should update the `_usage` attributes as it goes.
        """
        raise NotImplementedError()
        # noinspection PyUnreachableCode
        yield

    def get(self) -> ModelResponse:
        """Build a [`ModelResponse`][pydantic_ai.messages.ModelResponse] from the data received from the stream so far."""
        return ModelResponse(
            parts=self._parts_manager.get_parts(),
            model_name=self.model_name,
            timestamp=self.timestamp,
            usage=self.usage(),
            provider_name=self.provider_name,
        )

    def usage(self) -> RequestUsage:
        """Get the usage of the response so far. This will not be the final usage until the stream is exhausted."""
        return self._usage

    @property
    @abstractmethod
    def model_name(self) -> str:
        """Get the model name of the response."""
        raise NotImplementedError()

    @property
    @abstractmethod
    def provider_name(self) -> str | None:
        """Get the provider name."""
        raise NotImplementedError()

    @property
    @abstractmethod
    def timestamp(self) -> datetime:
        """Get the timestamp of the response."""
        raise NotImplementedError()

__aiter__

ModelResponseStreamEvent 的异步可迭代对象形式流式传输响应。

此方法代理 _event_iterator() 并发出所有事件,同时还会检查结果模式的匹配情况,并在找到第一个匹配项时发出一个 FinalResultEvent

源代码位于 pydantic_ai_slim/pydantic_ai/models/__init__.py
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
def __aiter__(self) -> AsyncIterator[ModelResponseStreamEvent]:
    """Stream the response as an async iterable of [`ModelResponseStreamEvent`][pydantic_ai.messages.ModelResponseStreamEvent]s.

    This proxies the `_event_iterator()` and emits all events, while also checking for matches
    on the result schema and emitting a [`FinalResultEvent`][pydantic_ai.messages.FinalResultEvent] if/when the
    first match is found.
    """
    if self._event_iterator is None:

        async def iterator_with_final_event(
            iterator: AsyncIterator[ModelResponseStreamEvent],
        ) -> AsyncIterator[ModelResponseStreamEvent]:
            async for event in iterator:
                yield event
                if (
                    final_result_event := _get_final_result_event(event, self.model_request_parameters)
                ) is not None:
                    self.final_result_event = final_result_event
                    yield final_result_event
                    break

            # If we broke out of the above loop, we need to yield the rest of the events
            # If we didn't, this will just be a no-op
            async for event in iterator:
                yield event

        self._event_iterator = iterator_with_final_event(self._get_event_iterator())
    return self._event_iterator

获取

get() -> ModelResponse

根据目前从流中接收到的数据构建一个 ModelResponse

源代码位于 pydantic_ai_slim/pydantic_ai/models/__init__.py
604
605
606
607
608
609
610
611
612
def get(self) -> ModelResponse:
    """Build a [`ModelResponse`][pydantic_ai.messages.ModelResponse] from the data received from the stream so far."""
    return ModelResponse(
        parts=self._parts_manager.get_parts(),
        model_name=self.model_name,
        timestamp=self.timestamp,
        usage=self.usage(),
        provider_name=self.provider_name,
    )

usage

usage() -> RequestUsage

获取到目前为止的响应使用量。在流耗尽之前,这不会是最终的使用量。

源代码位于 pydantic_ai_slim/pydantic_ai/models/__init__.py
614
615
616
def usage(self) -> RequestUsage:
    """Get the usage of the response so far. This will not be the final usage until the stream is exhausted."""
    return self._usage

model_name 抽象方法 属性

model_name: str

获取响应的模型名称。

provider_name 抽象方法 属性

provider_name: str | None

获取提供商名称。

timestamp 抽象方法 属性

timestamp: datetime

获取响应的时间戳。

ALLOW_MODEL_REQUESTS 模块属性

ALLOW_MODEL_REQUESTS = True

是否允许向模型发出请求。

此全局设置允许您禁用对大多数模型的请求,例如,确保在测试期间不会意外地向模型发出昂贵的请求。

测试模型 TestModelFunctionModel 不受此设置的影响。

检查是否允许模型请求

check_allow_model_requests() -> None

检查是否允许模型请求。

如果您正在定义自己的模型,并且其使用会产生费用或延迟,您应该在 Model.requestModel.request_stream 中调用此函数。

引发

类型 描述
RuntimeError

如果不允许模型请求。

源代码位于 pydantic_ai_slim/pydantic_ai/models/__init__.py
648
649
650
651
652
653
654
655
656
657
658
def check_allow_model_requests() -> None:
    """Check if model requests are allowed.

    If you're defining your own models that have costs or latency associated with their use, you should call this in
    [`Model.request`][pydantic_ai.models.Model.request] and [`Model.request_stream`][pydantic_ai.models.Model.request_stream].

    Raises:
        RuntimeError: If model requests are not allowed.
    """
    if not ALLOW_MODEL_REQUESTS:
        raise RuntimeError('Model requests are not allowed, since ALLOW_MODEL_REQUESTS is False')

覆盖允许模型请求的设置

override_allow_model_requests(
    allow_model_requests: bool,
) -> Iterator[None]

用于临时覆盖 ALLOW_MODEL_REQUESTS 的上下文管理器。

参数

名称 类型 描述 默认值
allow_model_requests bool

是否在上下文中允许模型请求。

必需
源代码位于 pydantic_ai_slim/pydantic_ai/models/__init__.py
661
662
663
664
665
666
667
668
669
670
671
672
673
674
@contextmanager
def override_allow_model_requests(allow_model_requests: bool) -> Iterator[None]:
    """Context manager to temporarily override [`ALLOW_MODEL_REQUESTS`][pydantic_ai.models.ALLOW_MODEL_REQUESTS].

    Args:
        allow_model_requests: Whether to allow model requests within the context.
    """
    global ALLOW_MODEL_REQUESTS
    old_value = ALLOW_MODEL_REQUESTS
    ALLOW_MODEL_REQUESTS = allow_model_requests  # pyright: ignore[reportConstantRedefinition]
    try:
        yield
    finally:
        ALLOW_MODEL_REQUESTS = old_value  # pyright: ignore[reportConstantRedefinition]