/chat_generate
endpoint as a preprocessing step when audio input is provided.
/chat_generate
endpoint using multipart/form-data
format.
curl -X POST https://api.addisassistant.com/api/v1/chat_generate \-H "X-API-Key: YOUR_API_KEY" \-F "chat_audio_input=@/path/to/your-audio.wav" \-F 'request_data={"target_language": "am"};type=application/json'
chat_audio_input
| File | The audio file to transcribe |
| request_data
| JSON | Configuration for processing, wrapped as a JSON string |
target_language
| string | Yes | Language code for the response: am
(Amharic) or om
(Afan Oromo) |
| prompt
| string | No | Additional text context that can be combined with the transcribed audio |
| conversation_history
| array | No | Previous conversation turns (see Conversation Management) |
| generation_config
| object | No | Configuration for the response generation |
:::important
When using multipart/form-data
with the chat_generate endpoint, all JSON parameters must be wrapped inside a field named request_data
.
:::
{"response_text": "እሺ፣ ውሎ ደህና መሸ።","finish_reason": "stop","usage_metadata": {"prompt_token_count": 15,"candidates_token_count": 12,"total_token_count": 27},"modelVersion": "Addis-፩-አሌፍ","transcription_raw": "<analysis>gender: male, emotion: neutral</analysis> ውሎ ደህና መሸ ወይ?","transcription_clean": "ውሎ ደህና መሸ ወይ?"}
transcription_raw
| string | Complete transcription with analysis markup as returned by the underlying model |
| transcription_clean
| string | Cleaned transcription text with analysis tags removed, suitable for display to users |
The transcription_raw
field may contain markup tags with metadata, while transcription_clean
offers a user-friendly version for display purposes.
audio/wav
, audio/x-wav
, audio/wave
, audio/x-pn-wav
|
| MP3 | audio/mpeg
, audio/mp3
, audio/x-mp3
|
| M4A/MP4 | audio/mp4
, audio/x-m4a
, audio/m4a
|
| WebM/Ogg/FLAC | audio/webm
, audio/ogg
, audio/x-flac
, audio/flac
|
// Basic audio recording and transcriptionasync function recordAndTranscribe() {// 1. Set up UI elementsconst startButton = document.getElementById("startRecording");const stopButton = document.getElementById("stopRecording");const statusDiv = document.getElementById("status");const resultDiv = document.getElementById("result");// 2. Initialize variableslet mediaRecorder;let audioChunks = [];let stream;// 3. Set up event handlersstartButton.addEventListener("click", async () => {try {// Request microphone accessstream = await navigator.mediaDevices.getUserMedia({ audio: true });// Create media recordermediaRecorder = new MediaRecorder(stream);audioChunks = [];// Collect audio chunksmediaRecorder.addEventListener("dataavailable", (event) => {audioChunks.push(event.data);});// Start recordingmediaRecorder.start();statusDiv.textContent = "Recording...";// Enable stop button, disable start buttonstartButton.disabled = true;stopButton.disabled = false;} catch (error) {statusDiv.textContent = `Error: ${error.message}`;}});stopButton.addEventListener("click", () => {if (mediaRecorder && mediaRecorder.state !== "inactive") {mediaRecorder.stop();statusDiv.textContent = "Processing...";// Handle recording stop eventmediaRecorder.addEventListener("stop", async () => {try {// Create audio blob and send itconst audioBlob = new Blob(audioChunks, { type: "audio/wav" });// Send to APIconst result = await sendAudioForTranscription(audioBlob);// Display resultsstatusDiv.textContent = "Transcription complete";resultDiv.innerHTML = `<p><strong>Transcription:</strong> ${result.transcription_clean}</p><p><strong>Response:</strong> ${result.response_text}</p>`;// Stop the media tracks to release microphonestream.getTracks().forEach((track) => track.stop());// Reset buttonsstartButton.disabled = false;stopButton.disabled = true;} catch (error) {statusDiv.textContent = `Error: ${error.message}`;}});}});// 4. Function to send audio to APIasync function sendAudioForTranscription(audioBlob) {const formData = new FormData();formData.append("chat_audio_input", audioBlob);formData.append("request_data",JSON.stringify({target_language: "am",}),);const response = await fetch("https://api.addisassistant.com/api/v1/chat_generate",{method: "POST",headers: {"X-API-Key": "YOUR_API_KEY",},body: formData,},);if (!response.ok) {throw new Error(`API error: ${response.status}`);}return await response.json();}}// Initialize the recording functionalitydocument.addEventListener("DOMContentLoaded", recordAndTranscribe);
// Handle file upload and transcriptiondocument.getElementById("audioFileForm").addEventListener("submit", async (event) => {event.preventDefault();const fileInput = document.getElementById("audioFile");const resultDiv = document.getElementById("transcriptionResult");const statusDiv = document.getElementById("status");if (!fileInput.files.length) {statusDiv.textContent = "Please select an audio file";return;}const audioFile = fileInput.files[0];statusDiv.textContent = "Uploading and transcribing...";try {const formData = new FormData();formData.append("chat_audio_input", audioFile);// Include additional context if neededconst additionalText = document.getElementById("additionalText").value;const requestData = {target_language: "am",};if (additionalText) {requestData.prompt = additionalText;}formData.append("request_data", JSON.stringify(requestData));const response = await fetch("https://api.addisassistant.com/api/v1/chat_generate",{method: "POST",headers: {"X-API-Key": "YOUR_API_KEY",},body: formData,},);if (!response.ok) {throw new Error(`Server returned ${response.status}: ${response.statusText}`,);}const result = await response.json();// Display resultsstatusDiv.textContent = "Transcription complete";resultDiv.innerHTML = `<div class="transcription"><h3>Transcription</h3><p>${result.transcription_clean}</p></div><div class="analysis"><h3>Raw Transcription</h3><pre>${result.transcription_raw}</pre></div><div class="response"><h3>AI Response</h3><p>${result.response_text}</p></div>`;} catch (error) {statusDiv.textContent = `Error: ${error.message}`;}});
transcription_clean
for user-facing displays