Retell AI

AI voice agent platform

docs.retellai.com ↗
Version
1.0.0
OpenAPI
3.0.3
Endpoints
19
Schemas
32
Updated
3 days ago
Ai ai voice agents
Use this API in your AI agent

Query structured spec data via REST or MCP. Get exactly what your agent needs.

Get API Key

Server URLs

https://api.retellai.com

Authentication

bearer

No endpoints found for this provider.

Schemas

object AgentCreateNewAgentRequest
{
  "allOf": [
    {
      "$ref": "#/components/schemas/BaseAgent"
    },
    {
      "required": [
        "llm_websocket_url",
        "voice_id"
      ]
    }
  ]
}
array AgentGetAllResponse
{
  "type": "array",
  "items": {
    "$ref": "#/components/schemas/AgentResponse"
  }
}
object AgentResponse
{
  "allOf": [
    {
      "properties": {
        "agent_id": {
          "type": "string",
          "example": "oBeDLoLOeuAbiuaMFXRtDOLriTJ5tSxD",
          "description": "Unique id of agent."
        }
      }
    },
    {
      "$ref": "#/components/schemas/BaseAgent"
    },
    {
      "properties": {
        "last_modification_timestamp": {
          "type": "integer",
          "example": 1703413636133,
          "description": "Last modification timestamp (milliseconds since epoch). Either the time of last update or creation if no updates available."
        }
      }
    }
  ],
  "required": [
    "agent_id",
    "llm_websocket_url",
    "voice_id",
    "last_modification_timestamp"
  ]
}
object BaseAgent
{
  "type": "object",
  "properties": {
    "language": {
      "enum": [
        "en-US",
        "en-IN",
        "en-GB",
        "de-DE",
        "es-ES",
        "es-419",
        "hi-IN",
        "ja-JP",
        "pt-PT",
        "pt-BR"
      ],
      "type": "string",
      "example": "en-US",
      "description": "`Beta feature, use with caution.`\n\n This setting specifies the agent's operational language, including base language and dialect. Speech recognition considers both elements, but text-to-speech currently only recognizes the base language. \n\n For instance, selecting `en-GB` optimizes speech recognition for British English, yet text-to-speech output will be in standard English. If dialect-specific text-to-speech is required, please contact us for support. \n\n- `11lab voices`: supports English(en), German(de), Spanish(es), Hindi(hi), Portuguese(pt) \n\n- `openAI voices`: supports English(en), German(de), Spanish(es), Hindi(hi), Portuguese(pt), Japanese(ja) \n\n- `deepgram voices`: supports English(en) "
    },
    "voice_id": {
      "type": "string",
      "example": "11labs-Adrian",
      "description": "Unique voice id used for the agent. Find list of available voices and their preview in Dashboard."
    },
    "agent_name": {
      "type": "string",
      "example": "Jarvis",
      "description": "The name of the agent. Only used for your own reference."
    },
    "format_text": {
      "type": "boolean",
      "example": true,
      "description": "Whether to format the transcribed text with inverse text normalization. It transforms the spoken form of text into written form for entities like phone number, email address, street address, etc. For example, \"february fourth twenty twenty two\" can be converted into \"february 4th 2022\". If not set, the default is true."
    },
    "voice_speed": {
      "type": "number",
      "example": 1,
      "description": "Controls speed of voice. Value ranging from [0.5,2]. Lower value means slower speech, while higher value means faster speech rate. If unset, default value 1 will apply."
    },
    "webhook_url": {
      "type": "string",
      "example": "https://webhook-url-here",
      "description": "The webhook for agent to listen to call events. See what events it would get at [webhook doc](https://raw.githubusercontent.com). If set, will binds webhook events for this agent to the specified url, and will ignore the account level webhook for this agent. Set to string `null` to remove webhook url from this agent."
    },
    "ambient_sound": {
      "enum": [
        "coffee-shop",
        "convention-hall",
        "summer-outdoor",
        "mountain-outdoor",
        "null"
      ],
      "type": "string",
      "description": "If set, will add ambient environment sound to the call to make experience more realistic. Currently supports the following options:\n\n- `coffee-shop`: Coffee shop ambience with people chatting in background. [Listen to Ambience](https://retell-utils-public.s3.us-west-2.amazonaws.com/coffee-shop.wav)\n\n- `convention-hall`: Convention hall ambience, with some echo and people chatting in background. [Listen to Ambience](https://retell-utils-public.s3.us-west-2.amazonaws.com/convention-hall.wav)\n\n- `summer-outdoor`: Summer outdoor ambience with cicada chirping. [Listen to Ambience](https://retell-utils-public.s3.us-west-2.amazonaws.com/summer-outdoor.wav)\n\n- `mountain-outdoor`: Mountain outdoor ambience with birds singing. [Listen to Ambience](https://retell-utils-public.s3.us-west-2.amazonaws.com/mountain-outdoor.wav)\n\n Set to string `null` to remove ambient sound from this agent. "
    },
    "responsiveness": {
      "type": "number",
      "example": 1,
      "description": "Controls how responsive is the agent. Value ranging from [0,1]. Lower value means less responsive agent (wait more, respond slower), while higher value means faster exchanges (respond when it can). If unset, default value 1 will apply."
    },
    "boosted_keywords": {
      "type": "array",
      "items": {
        "type": "string"
      },
      "example": [
        "retell",
        "kroger"
      ],
      "description": "Provide a customized list of keywords to bias the transcriber model, so that these words are more likely to get transcribed. Commonly used for names, brands, street, etc."
    },
    "llm_websocket_url": {
      "type": "string",
      "example": "wss://your-websocket-endpoint",
      "description": "The URL we will establish LLM websocket for getting response, usually your server. Check out [LLM WebSocket](https://raw.githubusercontent.com) for more about request format (sent from us) and response format (send to us)."
    },
    "voice_temperature": {
      "type": "number",
      "example": 1,
      "description": "Controls how stable the voice is. Value ranging from [0,2]. Lower value means more stable, and higher value means more variant speech generation. Currently this setting only applies to `11labs` voices. If unset, default value 1 will apply."
    },
    "enable_backchannel": {
      "type": "boolean",
      "example": true,
      "description": "Controls whether the agent would backchannel (agent interjects the speaker with phrases like \"yeah\", \"uh-huh\" to signify interest and engagement). Backchannel when enabled tends to show up more in longer user utterances. If not set, agent will not backchannel."
    },
    "optOutSensitiveDataStorage": {
      "type": "boolean",
      "example": true,
      "description": "Disable transcripts and recordings storage for enhanced privacy. Access transcripts securely via webhooks."
    }
  }
}
object BookAppointmentCalTool
{
  "type": "object",
  "required": [
    "type",
    "name",
    "cal_api_key",
    "event_type_id"
  ],
  "properties": {
    "name": {
      "type": "string",
      "description": "Name of the tool. Must be unique within all tools available to LLM at any given time (general tools + state tools + state transitions)."
    },
    "type": {
      "enum": [
        "book_appointment_cal"
      ],
      "type": "string"
    },
    "timezone": {
      "type": "string",
      "description": "Timezone to be used when booking appointment, must be in [IANA timezone database](https://en.wikipedia.org/wiki/List_of_tz_database_time_zones). If not specified, will check if user specified timezone in call, and if not, will use the timezone of the Retell servers."
    },
    "cal_api_key": {
      "type": "string",
      "description": "Cal.com Api key that have access to the cal.com event you want to book appointment."
    },
    "description": {
      "type": "string",
      "description": "Describes when to book the appointment."
    },
    "event_type_id": {
      "type": "number",
      "description": "Cal.com event type id number for the cal.com event you want to book appointment."
    }
  }
}
object CallBase
{
  "type": "object",
  "required": [
    "call_id",
    "agent_id",
    "audio_websocket_protocol",
    "audio_encoding",
    "sample_rate",
    "call_status",
    "start_timestamp"
  ],
  "properties": {
    "call_id": {
      "type": "string",
      "example": "Jabr9TXYYJHfvl6Syypi88rdAHYHmcq6",
      "description": "Unique id of the call. Used to identify in LLM websocket and used to authenticate in audio websocket."
    },
    "agent_id": {
      "type": "string",
      "example": "oBeDLoLOeuAbiuaMFXRtDOLriTJ5tSxD",
      "description": "Corresponding agent id of this call."
    },
    "metadata": {
      "type": "object",
      "description": "An abtriary object for storage purpose only. You can put anything here like your own id for the call, twilio SID, internal customer id. Not used for processing, when we connect to your LLM websocket server, you can then get it from the call object."
    },
    "to_number": {
      "type": "string",
      "example": 12137771235,
      "description": "The callee number. This field is storage purpose only, set this if you want the call object to contain it so that it's easier to reference it. Not used for processing, when we connect to your LLM websocket server, you can then get it from the call object."
    },
    "call_status": {
      "enum": [
        "registered",
        "ongoing",
        "ended",
        "error"
      ],
      "type": "string",
      "example": "registered",
      "description": "Status of call.\n\n- `registered`: Call id issued, ready to make a call using this id.\n\n- `ongoing`: Call connected and ongoing.\n\n- `ended`: The underlying websocket has ended for the call. Either user or agent hanged up, or call transferred. \n\n- `error`: Call encountered error."
    },
    "from_number": {
      "type": "string",
      "example": 12137771234,
      "description": "The caller number. This field is storage purpose only, set this if you want the call object to contain it so that it's easier to reference it. Not used for processing, when we connect to your LLM websocket server, you can then get it from the call object."
    },
    "sample_rate": {
      "type": "integer",
      "example": 24000,
      "description": "Sample rate of the conversation, the input and output audio bytes will all conform to this rate. Check the audio source, audio format, and voice used for the agent to select one that works. supports value ranging from [8000, 48000]. Note for Twilio `mulaw` encoding, the sample rate has to be 8000.\n\n- `s16le` sample rate recommendation (natively supported, lowest latency): \n  - elevenlabs voices: 16000, 22050, 24000, 44100.\n  - openai voices: 24000.\n\n  - deepgram voices: 8000, 16000, 24000, 32000, 48000."
    },
    "audio_encoding": {
      "enum": [
        "s16le",
        "mulaw"
      ],
      "type": "string",
      "example": "s16le",
      "description": "The audio encoding of the call. The following formats are supported: \n\n- `s16le` 16 bit linear PCM audio, the native format for web audio capture and playback.\n\n- `mulaw` non-linear audio encoding technique used in telephony. Commonly used by Twilio."
    },
    "start_timestamp": {
      "type": "integer",
      "example": 1703302407333,
      "description": "Begin timestamp (milliseconds since epoch) of the call."
    },
    "audio_websocket_protocol": {
      "enum": [
        "web",
        "twilio"
      ],
      "type": "string",
      "example": "twilio",
      "description": "Where the audio websocket would connect from would determine the format / protocol of websocket messages, and would determine how our server read audio bytes and send audio bytes.:\n\n- `web`: The protocol defined by Retell, commonly used for connecting from web frontend. Also useful for those who want to manipulate audio bytes directly.\n\n- `twilio`: The [websocket protocol](https://www.twilio.com/docs/voice/twiml/stream#message-media) defined by Twilio, used when your system uses Twilio, and supplies Retell audio websocket url to Twilio."
    },
    "end_call_after_silence_ms": {
      "type": "integer",
      "example": 600000,
      "description": "If users stay silent for a period, end the call. By default, it is set to 600,000 ms (10 min). The minimum value allowed is 10,000 ms (10 s)."
    },
    "retell_llm_dynamic_variables": {
      "type": "object",
      "example": {
        "customer_name": "John Doe"
      },
      "description": "Add optional dynamic variables in key value pairs of string that injects into your Retell LLM prompt and tool description. Only applicable for Retell LLM.",
      "additionalProperties": {}
    }
  }
}
object CallDetail
{
  "allOf": [
    {
      "$ref": "#/components/schemas/CallBase"
    },
    {
      "properties": {
        "transcript": {
          "type": "string",
          "example": "Agent: hi how are you doing?\nUser: Doing pretty well. How are you?\nAgent: That's great to hear! I'm doing well too, thanks! What's up?\nUser: I don't have anything in particular.\nAgent: Got it, just checking in!\nUser: Alright. See you.\nAgent: have a nice day\n ",
          "description": "Transcription of the call. Available after call ends."
        },
        "e2e_latency": {
          "type": "object",
          "properties": {
            "max": {
              "type": "number",
              "example": 2.7,
              "description": "Maximum end to end latency in the call."
            },
            "min": {
              "type": "number",
              "example": 0.5,
              "description": "Minimum end to end latency in the call."
            },
            "num": {
              "type": "number",
              "example": 10,
              "description": "Number of turn change. We track latency every time turn change between user and agent."
            },
            "p50": {
              "type": "number",
              "example": 0.8,
              "description": "50 percentile of end to end latency."
            },
            "p90": {
              "type": "number",
              "example": 1.2,
              "description": "90 percentile of end to end latency."
            },
            "p95": {
              "type": "number",
              "example": 1.5,
              "description": "95 percentile of end to end latency."
            },
            "p99": {
              "type": "number",
              "example": 2.7,
              "description": "99 percentile of end to end latency."
            }
          },
          "description": "End to end latency (from user stops talking to agent start talking) tracking of the call, available after call ends. This latency does not account for the network trip time from Retell server to user frontend."
        },
        "end_timestamp": {
          "type": "integer",
          "example": 1703302428855,
          "description": "End timestamp (milliseconds since epoch) of the call. Available after call ends."
        },
        "recording_url": {
          "type": "string",
          "example": "https://retellai.s3.us-west-2.amazonaws.com/Jabr9TXYYJHfvl6Syypi88rdAHYHmcq6/recording.wav",
          "description": "Recording of the call. Available after call ends."
        },
        "public_log_url": {
          "type": "string",
          "example": "https://retellai.s3.us-west-2.amazonaws.com/Jabr9TXYYJHfvl6Syypi88rdAHYHmcq6/public_log.txt",
          "description": "Public log of the call, containing details about all the requests and responses received in LLM WebSocket, latency tracking for each turntaking, helpful for debugging and tracing. Available after call ends."
        },
        "transcript_object": {
          "type": "array",
          "items": {
            "$ref": "#/components/schemas/Utterance"
          },
          "description": "Transcript of the call in the format of a list of utterance, with timestamp. Available after call ends."
        }
      }
    }
  ]
}
object CallEstablishConnection401Response
{
  "type": "object",
  "properties": {
    "error_message": {
      "type": "string",
      "example": "API key is missing or invalid."
    }
  }
}
object CallEstablishConnection402Response
{
  "type": "object",
  "properties": {
    "error_message": {
      "type": "string",
      "example": "Trial has ended, please add payment method."
    }
  }
}
object CallEstablishConnection422Response
{
  "type": "object",
  "properties": {
    "error_message": {
      "type": "string",
      "example": "Cannot find requested asset under given api key."
    }
  }
}
object CallEstablishConnection429Response
{
  "type": "object",
  "properties": {
    "error_message": {
      "type": "string",
      "example": "Account rate limited, please throttle your requests."
    }
  }
}
object CallEstablishConnection500Response
{
  "type": "object",
  "properties": {
    "error_message": {
      "type": "string",
      "example": "An unexpected server error occurred."
    }
  }
}
object CallEstablishConnectionRequest
{
  "type": "object",
  "required": [
    "agent_id",
    "audio_websocket_protocol",
    "audio_encoding",
    "sample_rate"
  ],
  "properties": {
    "agent_id": {
      "type": "string",
      "example": "oBeDLoLOeuAbiuaMFXRtDOLriTJ5tSxD",
      "description": "Unique id of agent used for the call. Your agent would contain the LLM Websocket url used for this call."
    },
    "metadata": {
      "type": "object",
      "description": "An abtriary object for storage purpose only. You can put anything here like your own id for the call, twilio SID, internal customer id. Not used for processing, when we connect to your LLM websocket server, you can then get it from the call object."
    },
    "to_number": {
      "type": "string",
      "example": 12137771235,
      "description": "The callee number. This field is storage purpose only, set this if you want the call object to contain it so that it's easier to reference it. Not used for processing, when we connect to your LLM websocket server, you can then get it from the call object."
    },
    "from_number": {
      "type": "string",
      "example": 12137771234,
      "description": "The caller number. This field is storage purpose only, set this if you want the call object to contain it so that it's easier to reference it. Not used for processing, when we connect to your LLM websocket server, you can then get it from the call object."
    },
    "sample_rate": {
      "type": "integer",
      "example": 24000,
      "description": "Sample rate of the conversation, the input and output audio bytes will all conform to this rate. Check the audio source, audio format, and voice used for the agent to select one that works. supports value ranging from [8000, 48000]. Note for Twilio `mulaw` encoding, the sample rate has to be 8000.\n\n- `s16le` sample rate recommendation (natively supported, lowest latency): \n  - elevenlabs voices: 16000, 22050, 24000, 44100.\n  - openai voices: 24000.\n\n  - deepgram voices: 8000, 16000, 24000, 32000, 48000."
    },
    "audio_encoding": {
      "enum": [
        "s16le",
        "mulaw"
      ],
      "type": "string",
      "example": "s16le",
      "description": "The audio encoding of the call. The following formats are supported: \n\n- `s16le` 16 bit linear PCM audio, the native format for web audio capture and playback.\n\n- `mulaw` non-linear audio encoding technique used in telephony. Commonly used by Twilio."
    },
    "audio_websocket_protocol": {
      "enum": [
        "web",
        "twilio"
      ],
      "type": "string",
      "example": "twilio",
      "description": "Where the audio websocket would connect from would determine the format / protocol of websocket messages, and would determine how our server read audio bytes and send audio bytes.:\n\n- `web`: The protocol defined by Retell, commonly used for connecting from web frontend. Also useful for those who want to manipulate audio bytes directly.\n\n- `twilio`: The [websocket protocol](https://www.twilio.com/docs/voice/twiml/stream#message-media) defined by Twilio, used when your system uses Twilio, and supplies Retell audio websocket url to Twilio."
    },
    "end_call_after_silence_ms": {
      "type": "integer",
      "example": 600000,
      "description": "If users stay silent for a period, end the call. By default, it is set to 600,000 ms (10 min). The minimum value allowed is 10,000 ms (10 s)."
    },
    "retell_llm_dynamic_variables": {
      "type": "object",
      "example": {
        "customer_name": "John Doe"
      },
      "description": "Add optional dynamic variables in key value pairs of string that injects into your Retell LLM prompt and tool description. Only applicable for Retell LLM.",
      "additionalProperties": {}
    }
  }
}
object CallEstablishConnectionResponse
{
  "type": "object",
  "properties": {
    "error_message": {
      "type": "string",
      "example": "Invalid request format, please check API reference."
    }
  }
}
array CallGetDetailsResponse
{
  "type": "array",
  "items": {
    "$ref": "#/components/schemas/CallDetail"
  }
}
object CallMakeConnectionRequest
{
  "type": "object",
  "required": [
    "phone_number"
  ],
  "properties": {
    "phone_number": {
      "type": "object",
      "required": [
        "from",
        "to"
      ],
      "properties": {
        "to": {
          "type": "string",
          "example": 12137774445,
          "description": "The number you want to call, in BCP 47 format."
        },
        "from": {
          "type": "string",
          "example": 14157774444,
          "description": "The number you own in BCP 47 format."
        }
      }
    },
    "override_agent_id": {
      "type": "string",
      "example": "oBeDLoLOeuAbiuaMFXRtDOLriTJ5tSxD",
      "description": "For this particular call, override the agent used with this agent id. This does not bind the agent to this number, this is for one time override."
    },
    "retell_llm_dynamic_variables": {
      "type": "object",
      "example": {
        "customer_name": "John Doe"
      },
      "description": "Add optional dynamic variables in key value pairs of string that injects into your Retell LLM prompt and tool description. Only applicable for Retell LLM.",
      "additionalProperties": {}
    }
  }
}
object CheckAvailabilityCalTool
{
  "type": "object",
  "required": [
    "type",
    "name",
    "cal_api_key",
    "event_type_id"
  ],
  "properties": {
    "name": {
      "type": "string",
      "description": "Name of the tool. Must be unique within all tools available to LLM at any given time (general tools + state tools + state transitions)."
    },
    "type": {
      "enum": [
        "check_availability_cal"
      ],
      "type": "string"
    },
    "timezone": {
      "type": "string",
      "description": "Timezone to be used when checking availability, must be in [IANA timezone database](https://en.wikipedia.org/wiki/List_of_tz_database_time_zones). If not specified, will check if user specified timezone in call, and if not, will use the timezone of the Retell servers."
    },
    "cal_api_key": {
      "type": "string",
      "description": "Cal.com Api key that have access to the cal.com event you want to check availability for."
    },
    "description": {
      "type": "string",
      "description": "Describes when to check availability."
    },
    "event_type_id": {
      "type": "number",
      "description": "Cal.com event type id number for the cal.com event you want to check availability for."
    }
  }
}
object CustomTool
{
  "type": "object",
  "required": [
    "type",
    "name",
    "url",
    "description",
    "speak_during_execution",
    "speak_after_execution"
  ],
  "properties": {
    "url": {
      "type": "string",
      "description": "The URL we will post the function name and arguments to get a result for the function. Usually this is your server."
    },
    "name": {
      "type": "string",
      "description": "Name of the tool. Must be unique within all tools available to LLM at any given time (general tools + state tools + state edges)."
    },
    "type": {
      "enum": [
        "custom"
      ],
      "type": "string"
    },
    "parameters": {
      "$ref": "#/components/schemas/ToolParameter"
    },
    "description": {
      "type": "string",
      "description": "Describes what this tool does and when to call this tool."
    },
    "speak_after_execution": {
      "type": "boolean",
      "description": "Determines whether the agent would call LLM another time and speak when the result of function is obtained. Usually this needs to get turned on so user can get update for the function call."
    },
    "speak_during_execution": {
      "type": "boolean",
      "description": "Determines whether the agent would say sentence like \"One moment, let me check that.\" when executing the function. Recommend to turn on if your function call takes over 1s (including network) to complete, so that your agent remains responsive."
    },
    "execution_message_description": {
      "type": "string",
      "description": "The description for the sentence agent say during execution. Only applicable when speak_during_execution is true. Can write what to say or even provide examples. The default is \"The message you will say to callee when calling this tool. Make sure it fits into the conversation smoothly.\"."
    }
  }
}
object EndCallTool
{
  "type": "object",
  "required": [
    "type",
    "name"
  ],
  "properties": {
    "name": {
      "type": "string",
      "description": "Name of the tool. Must be unique within all tools available to LLM at any given time (general tools + state tools + state transitions)."
    },
    "type": {
      "enum": [
        "end_call"
      ],
      "type": "string"
    },
    "description": {
      "type": "string",
      "description": "Describes when to end the call."
    }
  }
}
array PhoneNumberGetAllNumbersResponse
{
  "type": "array",
  "items": {
    "$ref": "#/components/schemas/PhoneNumberResponse"
  }
}
object PhoneNumberPurchaseNumberAndBindAgentRequest
{
  "type": "object",
  "required": [
    "agent_id"
  ],
  "properties": {
    "agent_id": {
      "type": "string",
      "example": "oBeDLoLOeuAbiuaMFXRtDOLriTJ5tSxD",
      "description": "Unique id of agent to bind to newly obtained number. The number will automatically use the agent when doing inbound / outbound calls."
    },
    "area_code": {
      "type": "integer",
      "example": 415,
      "description": "Area code of the number to obtain. Format is a 3 digit integer. Currently only supports US area code."
    }
  }
}
object PhoneNumberResponse
{
  "type": "object",
  "required": [
    "phone_number",
    "phone_number_pretty",
    "agent_id",
    "area_code",
    "last_modification_timestamp"
  ],
  "properties": {
    "agent_id": {
      "type": "string",
      "example": "oBeDLoLOeuAbiuaMFXRtDOLriTJ5tSxD",
      "description": "Unique id of agent to bind to newly obtained number. The number will automatically use the agent when doing inbound / outbound calls."
    },
    "area_code": {
      "type": "integer",
      "example": 415,
      "description": "Area code of the number to obtain. Format is a 3 digit integer. Currently only supports US area code."
    },
    "phone_number": {
      "type": "string",
      "example": 14157774444,
      "description": "BCP 47 format of the number (+country code, then number with no space, no special characters), used as the unique identifier for phone number APIs."
    },
    "phone_number_pretty": {
      "type": "string",
      "example": "+1 (415) 777-4444",
      "description": "Pretty printed phone number, provided for your reference."
    },
    "last_modification_timestamp": {
      "type": "integer",
      "example": 1703413636133,
      "description": "Last modification timestamp (milliseconds since epoch). Either the time of last update or creation if no updates available."
    }
  }
}
object PhoneNumberUpdateRetellLlmRequest
{
  "type": "object",
  "required": [
    "agent_id"
  ],
  "properties": {
    "agent_id": {
      "type": "string",
      "description": "Unique id of agent to bind to number. The number will automatically use the agent when doing inbound / outbound calls."
    }
  }
}
object RetellLLMBase
{
  "type": "object",
  "properties": {
    "states": {
      "type": "array",
      "items": {
        "$ref": "#/components/schemas/State"
      },
      "example": [
        {
          "name": "information_collection",
          "edges": [
            {
              "description": "Transition to book an appointment if the user is due for an annual checkup based on the last checkup time collected.",
              "destination_state_name": "appointment_booking",
              "speak_during_transition": false
            }
          ],
          "tools": [
            {
              "name": "transfer_to_support",
              "type": "transfer_call",
              "number": "16175551212",
              "description": "Transfer to the support team when user seems angry or explicitly requests a human agent"
            }
          ],
          "state_prompt": "You will follow the steps below to collect information..."
        },
        {
          "name": "appointment_booking",
          "tools": [
            {
              "name": "book_appointment",
              "type": "book_appointment_cal",
              "timezone": "America/Los_Angeles",
              "cal_api_key": "cal_live_xxxxxxxxxxxx",
              "description": "Book an annual check up when user provided name, email, phone number, and have selected a time.",
              "event_type_id": 60444
            }
          ],
          "state_prompt": "You will follow the steps below to book an appointment..."
        }
      ],
      "description": "States of the LLM. This is to help reduce prompt length and tool choices when the call can be broken into distinct states. With shorter prompts and less tools, the LLM can better focus and follow the rules, minimizing hallucination. If this field is not set, the agent would only have general prompt and general tools (essentially one state)."
    },
    "begin_message": {
      "type": "string",
      "example": "Hey I am a virtual assistant calling from Retell Hospital.",
      "nullable": true,
      "description": "First utterance said by the agent in the call. If not set, LLM will dynamically generate a message. If set to \"\", agent will wait for user to speak first."
    },
    "general_tools": {
      "type": "array",
      "items": {
        "$ref": "#/components/schemas/Tool"
      },
      "example": [
        {
          "name": "end_call",
          "type": "end_call",
          "description": "End the call with user only when user explicitly requests it."
        }
      ],
      "description": "A list of tools the model may call (to get external knowledge, call API, etc). You can select from some common predefined tools like end call, transfer call, etc; or you can create your own custom tool (last option) for the LLM to use. \n\n- Tools of LLM (with state) = general tools + state tools + state transitions\n\n- Tools of LLM (no state) = general tools"
    },
    "general_prompt": {
      "type": "string",
      "example": "You are ...",
      "description": "General prompt that's appended to system prompt no matter what state the agent is in. \n\n- System prompt (with state) = general prompt + state prompt.\n\n- System prompt (no state) = general prompt."
    },
    "starting_state": {
      "type": "string",
      "example": "information_collection",
      "description": "Name of the starting state. Required if states is not empty."
    }
  }
}
object RetellLLMResponse
{
  "allOf": [
    {
      "properties": {
        "llm_id": {
          "type": "string",
          "example": "oBeDLoLOeuAbiuaMFXRtDOLriTJ5tSxD",
          "description": "Unique id of Retell LLM."
        },
        "llm_websocket_url": {
          "type": "string",
          "example": "wss://api.retellai.com/retell-llm/llm-websocket/oBeDLoLOeuAbiuaMFXRtDOLriTJ5tSxD",
          "description": "The LLM Websocket URL constructed from unique id of Retell LLM. Used in agent API to create / update agent."
        }
      }
    },
    {
      "$ref": "#/components/schemas/RetellLLMBase"
    },
    {
      "properties": {
        "last_modification_timestamp": {
          "type": "integer",
          "example": 1703413636133,
          "description": "Last modification timestamp (milliseconds since epoch). Either the time of last update or creation if no updates available."
        }
      }
    }
  ],
  "required": [
    "llm_id",
    "llm_websocket_url",
    "last_modification_timestamp"
  ]
}
array RetellLlmListAllResponse
{
  "type": "array",
  "items": {
    "$ref": "#/components/schemas/RetellLLMResponse"
  }
}
object State
{
  "type": "object",
  "required": [
    "name"
  ],
  "properties": {
    "name": {
      "type": "string",
      "example": "information_collection",
      "description": "Name of the state, must be unique for each state."
    },
    "edges": {
      "type": "array",
      "items": {
        "$ref": "#/components/schemas/StateEdge"
      },
      "description": "Edges of the state define how and what state can be reached from this state."
    },
    "tools": {
      "type": "array",
      "items": {
        "$ref": "#/components/schemas/Tool"
      },
      "description": "A list of tools specific to this state the model may call (to get external knowledge, call API, etc). You can select from some common predefined tools like end call, transfer call, etc; or you can create your own custom tool (last option) for the LLM to use. \n\n- Tools of LLM = general tools + state tools + state transitions"
    },
    "state_prompt": {
      "type": "string",
      "example": "## Task\nYou will follow the steps below...",
      "description": "Prompt of the state, will be appended to the system prompt of LLM. \n\n - System prompt = general prompt + state prompt."
    }
  }
}
object StateEdge
{
  "type": "object",
  "required": [
    "destination_state_name",
    "description",
    "speak_during_transition"
  ],
  "properties": {
    "parameters": {
      "$ref": "#/components/schemas/ToolParameter",
      "description": "Describes what parameters you want to extract out when the transition changes. The parameters extracted here can be referenced in prompts & function descriptions of later states via dynamic variables. The parameters the functions accepts, described as a JSON Schema object. See [JSON Schema reference](https://json-schema.org/understanding-json-schema/) for documentation about the format."
    },
    "description": {
      "type": "string",
      "description": "Describes what's the transition and at what time / criteria should this transition happen."
    },
    "destination_state_name": {
      "type": "string",
      "description": "The destination state name when going through transition of state via this edge. State transition internally is implemented as a tool call of LLM, and a tool call with name \"transition_to_{destination_state_name}\" will get created. Feel free to reference it inside the prompt."
    },
    "speak_during_transition": {
      "type": "boolean",
      "description": "After the state transitions, the agent would speak based on the new prompt and tools in the new state. This bit here controls whether to speak a transition sentence during the transition (so agent would say sentences like \"Let's move on to the next section to help you set up an acount.\", and state transitions, and agent continue to speak.). Usually this is not necessary, and is recommended to set to false to avoid LLM repeating itself during and after transition."
    }
  }
}
object Tool
{
  "oneOf": [
    {
      "$ref": "#/components/schemas/EndCallTool"
    },
    {
      "$ref": "#/components/schemas/TransferCallTool"
    },
    {
      "$ref": "#/components/schemas/CheckAvailabilityCalTool"
    },
    {
      "$ref": "#/components/schemas/BookAppointmentCalTool"
    },
    {
      "$ref": "#/components/schemas/CustomTool"
    }
  ]
}
object ToolParameter
{
  "type": "object",
  "required": [
    "type",
    "properties"
  ],
  "properties": {
    "type": {
      "enum": [
        "object"
      ],
      "type": "string",
      "description": "Type must be \"object\" for a JSON Schema object."
    },
    "required": {
      "type": "array",
      "items": {
        "type": "string"
      },
      "description": "List of names of required property when generating this parameter. LLM will do its best to generate the required properties in its function arguments. Property must exist in properties."
    },
    "properties": {
      "type": "object",
      "description": "The value of properties is an object, where each key is the name of a property and each value is a schema used to validate that property.",
      "additionalProperties": {}
    }
  },
  "description": "The parameters the functions accepts, described as a JSON Schema object. See [JSON Schema reference](https://json-schema.org/understanding-json-schema/) for documentation about the format. Omitting parameters defines a function with an empty parameter list."
}
object TransferCallTool
{
  "type": "object",
  "required": [
    "type",
    "name",
    "number"
  ],
  "properties": {
    "name": {
      "type": "string",
      "example": "transfer_to_support",
      "description": "Name of the tool. Must be unique within all tools available to LLM at any given time (general tools + state tools + state edges)."
    },
    "type": {
      "enum": [
        "transfer_call"
      ],
      "type": "string"
    },
    "number": {
      "type": "string",
      "description": "The number to transfer to in E.164 format (a + and country code, then the phone number with no space or other special characters). For example, +16175551212."
    },
    "description": {
      "type": "string",
      "description": "Describes when to transfer the call."
    }
  }
}
object Utterance
{
  "type": "object",
  "required": [
    "role",
    "content",
    "words"
  ],
  "properties": {
    "role": {
      "enum": [
        "agent",
        "user"
      ],
      "type": "string",
      "example": "agent",
      "description": "Documents whether this utterance is spoken by agent or user."
    },
    "words": {
      "type": "array",
      "items": {
        "type": "object",
        "properties": {
          "end": {
            "type": "number",
            "description": "End time of the word in the call in second. This is relative audio time, not wall time."
          },
          "word": {
            "type": "string",
            "description": "Word transcript (with punctuation if applicable)."
          },
          "start": {
            "type": "number",
            "description": "Start time of the word in the call in second. This is relative audio time, not wall time."
          }
        }
      },
      "example": [
        {
          "end": 1.3,
          "word": "hi",
          "start": 0.7
        }
      ],
      "description": "Array of words in the utternace with the word timestamp. Useful for understanding what word was spoken at what time. Note that the word timestamp is not guranteed to be accurate, it's more like an approximation."
    },
    "content": {
      "type": "string",
      "example": "hi how are you doing?",
      "description": "Transcript of the utterances."
    }
  }
}