import express from 'express';
import { createServer } from 'http';
import { PlivoWebSocketServer, StartEvent, MediaEvent } from '@plivo/plivo-stream-sdk';
import { createClient, LiveTranscriptionEvents } from '@deepgram/sdk';
import OpenAI from 'openai';
import { ElevenLabsClient } from 'elevenlabs';
const app = express();
const server = createServer(app);
// Initialize AI service clients
const deepgram = createClient(process.env.DEEPGRAM_API_KEY);
const openai = new OpenAI({ apiKey: process.env.OPENAI_API_KEY });
const elevenlabs = new ElevenLabsClient({ apiKey: process.env.ELEVENLABS_API_KEY });
const SYSTEM_PROMPT = `You are a helpful AI voice assistant. Keep responses
concise and conversational. Respond naturally as if speaking on a phone call.`;
const plivoServer = new PlivoWebSocketServer({ server, path: '/stream' });
// Track conversations per WebSocket connection
const conversations = new Map<WebSocket, { history: any[], dgConnection: any }>();
plivoServer
.onStart(async (event: StartEvent, ws) => {
console.log(`Call started from ${event.start.from}`);
// Set up Deepgram live transcription
const dgConnection = deepgram.listen.live({
model: 'nova-2',
encoding: 'mulaw',
sample_rate: 8000,
channels: 1
});
dgConnection.on(LiveTranscriptionEvents.Transcript, async (data) => {
const transcript = data.channel?.alternatives?.[0]?.transcript;
if (transcript && data.is_final) {
await processWithAI(ws, transcript);
}
});
conversations.set(ws, { history: [], dgConnection });
})
.onMedia(async (event: MediaEvent, ws) => {
const conv = conversations.get(ws);
if (conv?.dgConnection) {
// Forward audio to Deepgram
const audioBuffer = event.getRawMedia();
conv.dgConnection.send(audioBuffer);
}
})
.onStop((event, ws) => {
const conv = conversations.get(ws);
if (conv?.dgConnection) {
conv.dgConnection.finish();
}
conversations.delete(ws);
})
.start();
async function processWithAI(ws: WebSocket, userText: string) {
const conv = conversations.get(ws);
if (!conv) return;
conv.history.push({ role: 'user', content: userText });
// Get response from OpenAI
const response = await openai.chat.completions.create({
model: 'gpt-4',
messages: [
{ role: 'system', content: SYSTEM_PROMPT },
...conv.history
]
});
const assistantText = response.choices[0].message?.content || '';
conv.history.push({ role: 'assistant', content: assistantText });
// Convert to speech with ElevenLabs
const audioStream = await elevenlabs.textToSpeech.convert('21m00Tcm4TlvDq8ikWAM', {
text: assistantText,
model_id: 'eleven_turbo_v2',
output_format: 'ulaw_8000'
});
// Collect audio chunks
const chunks: Buffer[] = [];
for await (const chunk of audioStream) {
chunks.push(chunk);
}
const audioBuffer = Buffer.concat(chunks);
// Send audio back to caller
plivoServer.playAudio(ws, 'audio/x-mulaw', 8000, audioBuffer);
}
server.listen(5000, () => {
console.log('Server listening on port 5000');
});