⚠️ SIP Billing Starts November 10, 2025 - See Ultravox Pricing for details.
⚠️ SIP Billing Starts November 10, 2025 - See Ultravox Pricing for details.
Returns details for all created calls in a scheduled call batch
curl --request GET \
--url https://api.ultravox.ai/api/agents/{agent_id}/scheduled_batches/{batch_id}/created_calls \
--header 'X-API-Key: <api-key>'{
"results": [
{
"callId": "3c90c3cc-0d44-4b50-8888-8dd25736052a",
"clientVersion": "<string>",
"created": "2023-11-07T05:31:56Z",
"joined": "2023-11-07T05:31:56Z",
"ended": "2023-11-07T05:31:56Z",
"endReason": "unjoined",
"billedDuration": "<string>",
"billingStatus": "BILLING_STATUS_PENDING",
"firstSpeaker": "FIRST_SPEAKER_AGENT",
"firstSpeakerSettings": {
"user": {
"fallback": {
"delay": "<string>",
"text": "<string>",
"prompt": "<string>"
}
},
"agent": {
"uninterruptible": true,
"text": "<string>",
"prompt": "<string>",
"delay": "<string>"
}
},
"initialOutputMedium": "MESSAGE_MEDIUM_VOICE",
"joinUrl": "<string>",
"shortSummary": "<string>",
"summary": "<string>",
"agent": {
"agentId": "3c90c3cc-0d44-4b50-8888-8dd25736052a",
"name": "<string>"
},
"agentId": "<string>",
"experimentalSettings": "<unknown>",
"metadata": {},
"initialState": {},
"requestContext": "<unknown>",
"sipDetails": {
"billedDuration": "<string>",
"terminationReason": "SIP_TERMINATION_NORMAL"
},
"inactivityMessages": [
{
"duration": "<string>",
"message": "<string>",
"endBehavior": "END_BEHAVIOR_UNSPECIFIED"
}
],
"joinTimeout": "30s",
"languageHint": "<string>",
"maxDuration": "3600s",
"medium": {
"webRtc": {
"dataMessages": {
"pong": true,
"state": true,
"transcript": true,
"clientToolInvocation": true,
"dataConnectionToolInvocation": true,
"playbackClearBuffer": true,
"callStarted": true,
"debug": true,
"callEvent": true,
"toolUsed": true
}
},
"twilio": {
"outgoing": {
"to": "<string>",
"from": "<string>",
"additionalParams": {}
}
},
"serverWebSocket": {
"inputSampleRate": 123,
"outputSampleRate": 123,
"clientBufferSizeMs": 123,
"dataMessages": {
"pong": true,
"state": true,
"transcript": true,
"clientToolInvocation": true,
"dataConnectionToolInvocation": true,
"playbackClearBuffer": true,
"callStarted": true,
"debug": true,
"callEvent": true,
"toolUsed": true
}
},
"telnyx": {
"outgoing": {
"to": "<string>",
"from": "<string>",
"additionalParams": {}
}
},
"plivo": {
"outgoing": {
"to": "<string>",
"from": "<string>",
"additionalParams": {}
}
},
"exotel": {},
"sip": {
"incoming": {},
"outgoing": {
"to": "<string>",
"from": "<string>",
"username": "<string>",
"password": "<string>"
}
}
},
"model": "fixie-ai/ultravox",
"recordingEnabled": false,
"systemPrompt": "<string>",
"temperature": 0,
"timeExceededMessage": "<string>",
"voice": "<string>",
"externalVoice": {
"elevenLabs": {
"voiceId": "<string>",
"model": "<string>",
"speed": 123,
"useSpeakerBoost": true,
"style": 123,
"similarityBoost": 123,
"stability": 123,
"pronunciationDictionaries": [
{
"dictionaryId": "<string>",
"versionId": "<string>"
}
],
"optimizeStreamingLatency": 123,
"maxSampleRate": 123
},
"cartesia": {
"voiceId": "<string>",
"model": "<string>",
"speed": 123,
"emotion": "<string>",
"emotions": [
"<string>"
],
"generationConfig": {
"volume": 123,
"speed": 123,
"emotion": "<string>"
}
},
"lmnt": {
"voiceId": "<string>",
"model": "<string>",
"speed": 123,
"conversational": true
},
"google": {
"voiceId": "<string>",
"speakingRate": 123
},
"generic": {
"url": "<string>",
"headers": {},
"body": {},
"responseSampleRate": 123,
"responseWordsPerMinute": 123,
"responseMimeType": "<string>",
"jsonAudioFieldPath": "<string>",
"jsonByteEncoding": "JSON_BYTE_ENCODING_UNSPECIFIED"
}
},
"transcriptOptional": true,
"vadSettings": {
"turnEndpointDelay": "<string>",
"minimumTurnDuration": "<string>",
"minimumInterruptionDuration": "<string>",
"frameActivationThreshold": 123
},
"dataConnectionConfig": {
"websocketUrl": "<string>",
"audioConfig": {
"sampleRate": 123,
"channelMode": "CHANNEL_MODE_UNSPECIFIED"
},
"dataMessages": {
"pong": true,
"state": true,
"transcript": true,
"clientToolInvocation": true,
"dataConnectionToolInvocation": true,
"playbackClearBuffer": true,
"callStarted": true,
"debug": true,
"callEvent": true,
"toolUsed": true
}
},
"callbacks": {
"joined": {
"url": "<string>",
"secrets": [
"<string>"
]
},
"ended": {
"url": "<string>",
"secrets": [
"<string>"
]
},
"billed": {
"url": "<string>",
"secrets": [
"<string>"
]
}
}
}
],
"next": "http://api.example.org/accounts/?cursor=cD00ODY%3D\"",
"previous": "http://api.example.org/accounts/?cursor=cj0xJnA9NDg3",
"total": 123
}API key
The pagination cursor value.
Number of results to return per page.
Show child attributes
The version of the client that joined this call.
The reason the call ended.
unjoined - Client never joinedhangup - Client hung upagent_hangup - Agent hung uptimeout - Call timed outconnection_error - Connection errorsystem_error - System errorunjoined, hangup, agent_hangup, timeout, connection_error, system_error BILLING_STATUS_PENDING, BILLING_STATUS_FREE_CONSOLE, BILLING_STATUS_FREE_ZERO_EFFECTIVE_DURATION, BILLING_STATUS_FREE_MINUTES, BILLING_STATUS_FREE_SYSTEM_ERROR, BILLING_STATUS_FREE_OTHER, BILLING_STATUS_BILLED, BILLING_STATUS_REFUNDED, BILLING_STATUS_UNSPECIFIED Who was supposed to talk first when the call started. Typically set to FIRST_SPEAKER_USER for outgoing calls and left as the default (FIRST_SPEAKER_AGENT) otherwise.
FIRST_SPEAKER_AGENT, FIRST_SPEAKER_USER Settings for the initial message to get the call started.
Show child attributes
If set, the user should speak first.
Show child attributes
If set, the agent will start the conversation itself if the user doesn't start speaking within the given delay.
Show child attributes
How long the agent should wait before starting the conversation itself.
A specific greeting the agent should say.
A prompt for the agent to generate a greeting.
If set, the agent should speak first.
Show child attributes
Whether the user should be prevented from interrupting the agent's first message. Defaults to false (meaning the agent is interruptible as usual).
A specific greeting the agent should say.
A prompt for the agent to generate a greeting.
If set, the agent will wait this long before starting its greeting. This may be useful for ensuring the user is ready.
The medium used initially by the agent. May later be changed by the client.
MESSAGE_MEDIUM_VOICE, MESSAGE_MEDIUM_TEXT A short summary of the call.
A summary of the call.
The ID of the agent used for this call.
Experimental settings for the call.
SIP details for the call, if applicable.
Show child attributes
SIP_TERMINATION_NORMAL, SIP_TERMINATION_INVALID_NUMBER, SIP_TERMINATION_TIMEOUT, SIP_TERMINATION_DESTINATION_UNAVAILABLE, SIP_TERMINATION_BUSY, SIP_TERMINATION_CANCELED, SIP_TERMINATION_REJECTED, SIP_TERMINATION_UNKNOWN Messages spoken by the agent when the user is inactive for the specified duration. Durations are cumulative, so a message m > 1 with duration 30s will be spoken 30 seconds after message m-1.
Show child attributes
The duration after which the message should be spoken.
The message to speak.
The behavior to exhibit when the message is finished being spoken.
END_BEHAVIOR_UNSPECIFIED, END_BEHAVIOR_HANG_UP_SOFT, END_BEHAVIOR_HANG_UP_STRICT BCP47 language code that may be used to guide speech recognition.
16Details about a call's protocol. By default, calls occur over WebRTC using the Ultravox client SDK. Setting a different call medium will prepare the server for a call using a different protocol. At most one call medium may be set.
Show child attributes
The call will use WebRTC with the Ultravox client SDK. This is the default.
Show child attributes
Controls which data messages are enabled for the call.
Show child attributes
Responds to a ping message. (Default: enabled)
Indicates that the agent state has changed. (Default: enabled)
Provides transcripts of the user and agent speech. (Default: enabled)
Requests a client-implemented tool invocation. (Default: enabled)
Requests a data-connection-implemented tool invocation. (Default: enabled for data connections, disabled otherwise)
Requests the client-side audio buffer to be cleared. (Default: enabled for websocket connections, disabled otherwise)
Provides information about the call when it starts. (Default: enabled)
Communicates debug information. (Default: disabled)
Indicates that a call event has been recorded. (Default: disabled)
Indicates that a tool was used. (Default: disabled)
The call will use Twilio's "Media Streams" protocol.
Once you have a join URL from starting a call, include it in your
TwiML like so:
Show child attributes
If set, Ultravox will directly create a call with Twilio. Twilio must be configured for the requesting account.
Show child attributes
The phone number, in E.164 format (e.g. +14155552671), (or sip address) to call.
The phone number or client identifier to use as the caller id. If to is a phone
number, from must be a phone number owned by your Twilio account.
Additional parameters to include in the Twilio call creation request. See https://www.twilio.com/docs/voice/api/call-resource#request-body-parameters
The call will use a plain websocket connection. This is unlikely to yield an acceptable user experience if used from a browser or mobile client, but may be suitable for a server-to-server connection. This option provides a simple way to connect your own server to an Ultravox inference instance.
Show child attributes
The sample rate for input (user) audio. Required.
The desired sample rate for output (agent) audio. If unset, defaults to the input_sample_rate.
The size of the client-side audio buffer in milliseconds. Smaller buffers allow for faster interruptions but may cause audio underflow if network latency fluctuates too greatly. For the best of both worlds, set this to some large value (e.g. 30000) and implement support for playback_clear_buffer messages. Defaults to 60.
Controls which data messages are enabled for the call.
Show child attributes
Responds to a ping message. (Default: enabled)
Indicates that the agent state has changed. (Default: enabled)
Provides transcripts of the user and agent speech. (Default: enabled)
Requests a client-implemented tool invocation. (Default: enabled)
Requests a data-connection-implemented tool invocation. (Default: enabled for data connections, disabled otherwise)
Requests the client-side audio buffer to be cleared. (Default: enabled for websocket connections, disabled otherwise)
Provides information about the call when it starts. (Default: enabled)
Communicates debug information. (Default: disabled)
Indicates that a call event has been recorded. (Default: disabled)
Indicates that a tool was used. (Default: disabled)
The call will use Telnyx's media streaming protocol.
Once you have a join URL from starting a call, include it in your
TexML like so:
Show child attributes
If set, Ultravox will directly create a call with Telnyx. Telnyx must be configured for the requesting account.
Show child attributes
The phone number to call in E.164 format (e.g. +14155552671).
The phone number initiating the call.
Additional parameters to include in the Telnyx call creation request. See https://developers.telnyx.com/api/call-scripting/initiate-texml-call
The call will use Plivo's AudioStreams protocol.
Once you have a join URL from starting a call, include it in your
Plivo XML like so:
Show child attributes
If set, Ultravox will directly create a call with Plivo. Plivo must be configured for the requesting account.
Show child attributes
The phone number(s) or sip URI(s) to call, separated by < if multiple.
The phone number initiating the call, in E.164 format (e.g. +14155552671).
Additional parameters to include in the Plivo call creation request. See https://www.plivo.com/docs/voice/api/call/make-a-call
The call will use Exotel's "Voicebot" protocol. Once you have a join URL from starting a call, provide it to Exotel as the wss target URL for your Voicebot (either directly or more likely dynamically from your own server).
The call will be connected using Session Initiation Protocol (SIP). Note that SIP incurs additional charges and must be enabled for your account.
Show child attributes
Details for an incoming SIP call.
Details for an outgoing SIP call. Ultravox will initiate this call (and there will be no joinUrl).
Show child attributes
The SIP URI to connect to. (Phone numbers are not allowed.)
The SIP URI to connect from. This is the "from" field in the SIP INVITE.
The SIP username to use for authentication.
The password for the specified username.
0 <= x <= 1A voice not known to Ultravox Realtime that can nonetheless be used for a call. Such voices are significantly less validated than normal voices and you'll be responsible for your own TTS-related errors. Exactly one field must be set.
Show child attributes
A voice served by ElevenLabs.
Show child attributes
The ID of the voice in ElevenLabs.
The ElevenLabs model to use.
The speaking rate. Must be between 0.7 and 1.2. Defaults to 1. See https://elevenlabs.io/docs/api-reference/text-to-speech/convert#request.body.voice_settings.speed
The maximum sample rate Ultravox will try to use. ElevenLabs limits your allowed sample rate based on your tier. See https://elevenlabs.io/pricing#pricing-table (and click "Show API details")
A voice served by Cartesia.
Show child attributes
The ID of the voice in Cartesia.
The Cartesia model to use.
(Deprecated) The speaking rate. Must be between -1 and 1. Defaults to 0.
(Deprecated) Use generation_config.emotion instead.
(Deprecated) Use generation_config.emotion instead.
Configure the various attributes of the generated speech.
Show child attributes
Adjust the volume of the generated speech between 0.5x and 2.0x the original volume (default is 1.0x). Valid values are between [0.5, 2.0] inclusive.
Adjust the speed of the generated speech between 0.6x and 2.0x the original speed (default is 1.0x). Valid values are between [0.6, 1.5] inclusive.
The primary emotions are neutral, calm, angry, content, sad, scared. For more options, see Prompting Sonic-3.
A voice served by LMNT.
Show child attributes
The ID of the voice in LMNT.
The LMNT model to use.
The speaking rate. Must be between 0.25 and 2. Defaults to 1. See https://docs.lmnt.com/api-reference/speech/synthesize-speech-bytes#body-speed
A voice served by Google, using bidirectional streaming. (For non-streaming or output-only streaming, use generic.)
Show child attributes
The ID (name) of the voice in Google, e.g. "en-US-Chirp3-HD-Charon".
The speaking rate. Must be between 0.25 and 2. Defaults to 1. See https://cloud.google.com/python/docs/reference/texttospeech/latest/google.cloud.texttospeech_v1.types.StreamingAudioConfig
A voice served by a generic REST-based TTS API.
Show child attributes
The endpoint to which requests are sent.
The request body to send. Some field should include a placeholder for text represented as {text}. The placeholder will be replaced with the text to synthesize.
The sample rate of the audio returned by the API.
An estimate of the speaking rate of the returned audio in words per minute. This is used for transcript timing while audio is streamed in the response. (Once the response is complete, Ultravox Realtime uses the real audio duration to adjust the timing.) Defaults to 150 and is unused for non-streaming responses.
The real mime type of the content returned by the API. If unset, the Content-Type response header will be used. This is useful for APIs whose response bodies don't strictly adhere to what the API claims via header. For example, if your API claims to return audio/wav but omits the WAV header (thus really returning raw PCM), set this to audio/l16. Similarly, if your API claims to return JSON but actually streams JSON Lines, set this to application/jsonl.
For JSON responses, the path to the field containing base64-encoded audio data. The data must be PCM audio, optionally with a WAV header.
For JSON responses, how audio bytes are encoded into the json_audio_field_path string. Defaults to base64. Also supports hex.
JSON_BYTE_ENCODING_UNSPECIFIED, JSON_BYTE_ENCODING_BASE64, JSON_BYTE_ENCODING_HEX Indicates whether a transcript is optional for the call.
VAD settings for the call.
Show child attributes
The minimum amount of time the agent will wait to respond after the user seems to be done speaking. Increasing this value will make the agent less eager to respond, which may increase perceived response latency but will also make the agent less likely to jump in before the user is really done speaking.
Built-in VAD currently operates on 32ms frames, so only multiples of 32ms are meaningful. (Anything from 1ms to 31ms will produce the same result.)
Defaults to "0.384s" (384ms) as a starting point, but there's nothing special about this value aside from it corresponding to 12 VAD frames.
The minimum duration of user speech required to be considered a user turn. Increasing this value will cause the agent to ignore short user audio. This may be useful in particularly noisy environments, but it comes at the cost of possibly ignoring very short user responses such as "yes" or "no".
Defaults to "0s" meaning the agent considers all user audio inputs (that make it through built-in noise cancellation).
The minimum duration of user speech required to interrupt the agent. This works the same way as minimumTurnDuration, but allows for a higher threshold for interrupting the agent. (This value will be ignored if it is less than minimumTurnDuration.)
Defaults to "0.09s" (90ms) as a starting point, but there's nothing special about this value.
The threshold for the VAD to consider a frame as speech. This is a value between 0.1 and 1.
Miniumum value is 0.1, which is the default value.
Settings for exchanging data messages with an additional participant.
Show child attributes
The websocket URL to which the session will connect to stream data messages.
Audio configuration for the data connection. If not set, no audio will be sent.
Show child attributes
The sample rate of the audio stream. If not set, will default to 16000.
The audio channel mode to use. CHANNEL_MODE_MIXED will combine user and agent audio into a single mono output while CHANNEL_MODE_SEPARATED will result in stereo audio where user and agent are separated. The latter is the default.
CHANNEL_MODE_UNSPECIFIED, CHANNEL_MODE_MIXED, CHANNEL_MODE_SEPARATED Controls which data messages are enabled for the data connection.
Show child attributes
Responds to a ping message. (Default: enabled)
Indicates that the agent state has changed. (Default: enabled)
Provides transcripts of the user and agent speech. (Default: enabled)
Requests a client-implemented tool invocation. (Default: enabled)
Requests a data-connection-implemented tool invocation. (Default: enabled for data connections, disabled otherwise)
Requests the client-side audio buffer to be cleared. (Default: enabled for websocket connections, disabled otherwise)
Provides information about the call when it starts. (Default: enabled)
Communicates debug information. (Default: disabled)
Indicates that a call event has been recorded. (Default: disabled)
Indicates that a tool was used. (Default: disabled)
Callbacks configuration for the call.
Show child attributes
"http://api.example.org/accounts/?cursor=cD00ODY%3D\""
"http://api.example.org/accounts/?cursor=cj0xJnA9NDg3"
123
curl --request GET \
--url https://api.ultravox.ai/api/agents/{agent_id}/scheduled_batches/{batch_id}/created_calls \
--header 'X-API-Key: <api-key>'{
"results": [
{
"callId": "3c90c3cc-0d44-4b50-8888-8dd25736052a",
"clientVersion": "<string>",
"created": "2023-11-07T05:31:56Z",
"joined": "2023-11-07T05:31:56Z",
"ended": "2023-11-07T05:31:56Z",
"endReason": "unjoined",
"billedDuration": "<string>",
"billingStatus": "BILLING_STATUS_PENDING",
"firstSpeaker": "FIRST_SPEAKER_AGENT",
"firstSpeakerSettings": {
"user": {
"fallback": {
"delay": "<string>",
"text": "<string>",
"prompt": "<string>"
}
},
"agent": {
"uninterruptible": true,
"text": "<string>",
"prompt": "<string>",
"delay": "<string>"
}
},
"initialOutputMedium": "MESSAGE_MEDIUM_VOICE",
"joinUrl": "<string>",
"shortSummary": "<string>",
"summary": "<string>",
"agent": {
"agentId": "3c90c3cc-0d44-4b50-8888-8dd25736052a",
"name": "<string>"
},
"agentId": "<string>",
"experimentalSettings": "<unknown>",
"metadata": {},
"initialState": {},
"requestContext": "<unknown>",
"sipDetails": {
"billedDuration": "<string>",
"terminationReason": "SIP_TERMINATION_NORMAL"
},
"inactivityMessages": [
{
"duration": "<string>",
"message": "<string>",
"endBehavior": "END_BEHAVIOR_UNSPECIFIED"
}
],
"joinTimeout": "30s",
"languageHint": "<string>",
"maxDuration": "3600s",
"medium": {
"webRtc": {
"dataMessages": {
"pong": true,
"state": true,
"transcript": true,
"clientToolInvocation": true,
"dataConnectionToolInvocation": true,
"playbackClearBuffer": true,
"callStarted": true,
"debug": true,
"callEvent": true,
"toolUsed": true
}
},
"twilio": {
"outgoing": {
"to": "<string>",
"from": "<string>",
"additionalParams": {}
}
},
"serverWebSocket": {
"inputSampleRate": 123,
"outputSampleRate": 123,
"clientBufferSizeMs": 123,
"dataMessages": {
"pong": true,
"state": true,
"transcript": true,
"clientToolInvocation": true,
"dataConnectionToolInvocation": true,
"playbackClearBuffer": true,
"callStarted": true,
"debug": true,
"callEvent": true,
"toolUsed": true
}
},
"telnyx": {
"outgoing": {
"to": "<string>",
"from": "<string>",
"additionalParams": {}
}
},
"plivo": {
"outgoing": {
"to": "<string>",
"from": "<string>",
"additionalParams": {}
}
},
"exotel": {},
"sip": {
"incoming": {},
"outgoing": {
"to": "<string>",
"from": "<string>",
"username": "<string>",
"password": "<string>"
}
}
},
"model": "fixie-ai/ultravox",
"recordingEnabled": false,
"systemPrompt": "<string>",
"temperature": 0,
"timeExceededMessage": "<string>",
"voice": "<string>",
"externalVoice": {
"elevenLabs": {
"voiceId": "<string>",
"model": "<string>",
"speed": 123,
"useSpeakerBoost": true,
"style": 123,
"similarityBoost": 123,
"stability": 123,
"pronunciationDictionaries": [
{
"dictionaryId": "<string>",
"versionId": "<string>"
}
],
"optimizeStreamingLatency": 123,
"maxSampleRate": 123
},
"cartesia": {
"voiceId": "<string>",
"model": "<string>",
"speed": 123,
"emotion": "<string>",
"emotions": [
"<string>"
],
"generationConfig": {
"volume": 123,
"speed": 123,
"emotion": "<string>"
}
},
"lmnt": {
"voiceId": "<string>",
"model": "<string>",
"speed": 123,
"conversational": true
},
"google": {
"voiceId": "<string>",
"speakingRate": 123
},
"generic": {
"url": "<string>",
"headers": {},
"body": {},
"responseSampleRate": 123,
"responseWordsPerMinute": 123,
"responseMimeType": "<string>",
"jsonAudioFieldPath": "<string>",
"jsonByteEncoding": "JSON_BYTE_ENCODING_UNSPECIFIED"
}
},
"transcriptOptional": true,
"vadSettings": {
"turnEndpointDelay": "<string>",
"minimumTurnDuration": "<string>",
"minimumInterruptionDuration": "<string>",
"frameActivationThreshold": 123
},
"dataConnectionConfig": {
"websocketUrl": "<string>",
"audioConfig": {
"sampleRate": 123,
"channelMode": "CHANNEL_MODE_UNSPECIFIED"
},
"dataMessages": {
"pong": true,
"state": true,
"transcript": true,
"clientToolInvocation": true,
"dataConnectionToolInvocation": true,
"playbackClearBuffer": true,
"callStarted": true,
"debug": true,
"callEvent": true,
"toolUsed": true
}
},
"callbacks": {
"joined": {
"url": "<string>",
"secrets": [
"<string>"
]
},
"ended": {
"url": "<string>",
"secrets": [
"<string>"
]
},
"billed": {
"url": "<string>",
"secrets": [
"<string>"
]
}
}
}
],
"next": "http://api.example.org/accounts/?cursor=cD00ODY%3D\"",
"previous": "http://api.example.org/accounts/?cursor=cj0xJnA9NDg3",
"total": 123
}