import { getVoiceClientConfig } from '@server/services/ai-providers.service';
import { resolveApiKey } from '@server/services/provider-keys.service';
import { TTS_OPENAI_MODEL, DEFAULT_TTS_VOICE } from '@server/config/ai-defaults';

export interface TtsResult {
  audioBuffer: Buffer;
  contentType: string;
}

const NO_KEY_MSG = (provider: string, envVar: string) =>
  `No ${provider} TTS API key configured — add your ${envVar} in Restaurant Settings → AI Keys.`;

export async function synthesizeSpeech(
  text: string,
  voiceModelId: string | null,
  restaurantId?: string | null,
  overrideLanguageCode?: string | null
): Promise<TtsResult> {
  if (!voiceModelId) {
    const key = await resolveApiKey(restaurantId || null, 'openai');
    return synthesizeOpenAI(text, DEFAULT_TTS_VOICE, key || undefined);
  }

  const config = await getVoiceClientConfig(voiceModelId, restaurantId);

  switch (config.providerName) {
    case 'openai_tts':
    case 'openai_realtime': {
      const key = config.apiKey || (restaurantId ? await resolveApiKey(restaurantId, 'openai') : null) || process.env.OPENAI_API_KEY;
      return synthesizeOpenAI(text, config.voiceId, key || undefined);
    }
    case 'elevenlabs': {
      const key = (restaurantId ? await resolveApiKey(restaurantId, 'elevenlabs') : null) || process.env.ELEVENLABS_API_KEY;
      if (!key) throw new Error(NO_KEY_MSG('ElevenLabs', 'ELEVENLABS_API_KEY'));
      return synthesizeElevenLabs(text, config.voiceId, key);
    }
    case 'deepgram': {
      const key = (restaurantId ? await resolveApiKey(restaurantId, 'deepgram') : null) || process.env.DEEPGRAM_API_KEY;
      if (!key) throw new Error(NO_KEY_MSG('Deepgram', 'DEEPGRAM_API_KEY'));
      return synthesizeDeepgram(text, config.voiceId, key);
    }
    case 'sarvam': {
      const key = (restaurantId ? await resolveApiKey(restaurantId, 'sarvam') : null) || process.env.SARVAM_API_KEY;
      if (!key) throw new Error(NO_KEY_MSG('Sarvam', 'SARVAM_API_KEY'));
      return synthesizeSarvam(text, config.voiceId, overrideLanguageCode || config.languageCode || 'hi-IN', key);
    }
    default:
      throw new Error(`Unsupported voice provider: ${config.providerName}`);
  }
}

async function synthesizeOpenAI(
  text: string,
  voiceId: string,
  apiKey: string | undefined
): Promise<TtsResult> {
  const key = apiKey || process.env.OPENAI_API_KEY;
  if (!key) throw new Error(NO_KEY_MSG('OpenAI', 'OPENAI_API_KEY'));

  const res = await fetch('https://api.openai.com/v1/audio/speech', {
    method: 'POST',
    headers: { Authorization: `Bearer ${key}`, 'Content-Type': 'application/json' },
    body: JSON.stringify({
      model: TTS_OPENAI_MODEL,
      input: text,
      voice: voiceId || DEFAULT_TTS_VOICE,
      response_format: 'wav',
    }),
  });

  if (!res.ok) {
    if (res.status === 401 || res.status === 403) throw new Error(NO_KEY_MSG('OpenAI', 'OPENAI_API_KEY'));
    const errText = await res.text();
    throw new Error(`OpenAI TTS ${res.status}: ${parseErr(errText)}`);
  }

  const arrayBuf = await res.arrayBuffer();
  return { audioBuffer: Buffer.from(arrayBuf), contentType: 'audio/wav' };
}

async function synthesizeElevenLabs(text: string, voiceId: string, apiKey: string): Promise<TtsResult> {
  const res = await fetch(`https://api.elevenlabs.io/v1/text-to-speech/${encodeURIComponent(voiceId)}`, {
    method: 'POST',
    headers: { 'xi-api-key': apiKey, 'Content-Type': 'application/json', Accept: 'audio/mpeg' },
    body: JSON.stringify({ text, model_id: 'eleven_multilingual_v2' }),
  });
  if (!res.ok) throw new Error(`ElevenLabs TTS ${res.status}: ${parseErr(await res.text())}`);
  return { audioBuffer: Buffer.from(await res.arrayBuffer()), contentType: 'audio/mpeg' };
}

async function synthesizeDeepgram(text: string, voiceId: string, apiKey: string): Promise<TtsResult> {
  const model = voiceId || 'aura-asteria-en';
  const res = await fetch(`https://api.deepgram.com/v1/speak?model=${encodeURIComponent(model)}`, {
    method: 'POST',
    headers: { Authorization: `Token ${apiKey}`, 'Content-Type': 'application/json' },
    body: JSON.stringify({ text }),
  });
  if (!res.ok) throw new Error(`Deepgram TTS ${res.status}: ${parseErr(await res.text())}`);
  return { audioBuffer: Buffer.from(await res.arrayBuffer()), contentType: 'audio/mpeg' };
}

async function synthesizeSarvam(text: string, voiceId: string, languageCode: string, apiKey: string): Promise<TtsResult> {
  const res = await fetch('https://api.sarvam.ai/text-to-speech', {
    method: 'POST',
    headers: { 'api-subscription-key': apiKey, 'Content-Type': 'application/json' },
    body: JSON.stringify({
      inputs: [text],
      target_language_code: languageCode,
      speaker: voiceId,
      model: 'bulbul:v3',
    }),
  });
  if (!res.ok) throw new Error(`Sarvam TTS ${res.status}: ${parseErr(await res.text())}`);
  const data = await res.json() as { audios?: string[] };
  const b64 = data.audios?.[0];
  if (!b64) throw new Error('Sarvam TTS returned no audio payload');
  return { audioBuffer: Buffer.from(b64, 'base64'), contentType: 'audio/wav' };
}

function parseErr(raw: string): string {
  try {
    const parsed = JSON.parse(raw) as { error?: { message?: string } | string; detail?: string; message?: string };
    if (typeof parsed.error === 'string') return parsed.error;
    return parsed.error?.message || parsed.detail || parsed.message || raw;
  } catch {
    return raw;
  }
}
