/**
 * transport.ts — JambonzRealtimeTransportLayer
 *
 * Adapter that bridges a jambonz `listen` audio WebSocket (bidirectional
 * PCM16 LE binary frames) to the OpenAI Realtime API via WebSocket.
 *
 * Mirrors the shape of `TwilioRealtimeTransportLayer` from
 * `@openai/agents-extensions` so a `RealtimeSession` + `RealtimeAgent`
 * (with the full restaurant tool suite) can be driven against jambonz
 * with no behavioural divergence from the Twilio path.
 *
 * jambonz PCM frames are emitted at the sampleRate negotiated in the
 * `listen` verb (we use 24kHz to match OpenAI's pcm16 default), so no
 * resampling is needed — frames are forwarded byte-for-byte.
 */
import { OpenAIRealtimeWebSocket } from '@openai/agents/realtime';
import type {
  OpenAIRealtimeWebSocketOptions,
  RealtimeTransportLayerConnectOptions,
  TransportLayerAudio,
  RealtimeSessionConfig,
} from '@openai/agents/realtime';
import type { WebSocket as NodeWebSocket } from 'ws';

export type JambonzRealtimeTransportLayerOptions = OpenAIRealtimeWebSocketOptions & {
  jambonzWebSocket: NodeWebSocket;
};

export class JambonzRealtimeTransportLayer extends OpenAIRealtimeWebSocket {
  private jambonzWs: NodeWebSocket;
  private jambonzCallSid: string | null = null;

  constructor(options: JambonzRealtimeTransportLayerOptions) {
    super(options);
    this.jambonzWs = options.jambonzWebSocket;
  }

  private applyAudioFormat(partial?: Partial<RealtimeSessionConfig>): Partial<RealtimeSessionConfig> {
    const cfg: Record<string, unknown> = { ...(partial || {}) };
    if (cfg.inputAudioFormat == null) cfg.inputAudioFormat = 'pcm16';
    if (cfg.outputAudioFormat == null) cfg.outputAudioFormat = 'pcm16';
    return cfg as Partial<RealtimeSessionConfig>;
  }

  async connect(options: RealtimeTransportLayerConnectOptions): Promise<void> {
    options.initialSessionConfig = this.applyAudioFormat(options.initialSessionConfig);

    // Jambonz → OpenAI: forward binary PCM16 frames; capture the call_sid
    // from the JSON `connect` event on the same socket.
    this.jambonzWs.on('message', (raw: Buffer, isBinary: boolean) => {
      if (this.status !== 'connected') return;
      if (isBinary) {
        // raw is a Node Buffer; convert to ArrayBuffer slice.
        const ab = raw.buffer.slice(raw.byteOffset, raw.byteOffset + raw.byteLength);
        try { this.sendAudio(ab as ArrayBuffer); } catch { /* not connected yet */ }
        return;
      }
      try {
        const msg = JSON.parse(raw.toString('utf8')) as Record<string, unknown>;
        if (msg.event === 'connect' || msg.type === 'session:new') {
          this.jambonzCallSid = (msg.callSid as string) || (msg.call_sid as string) || null;
        }
        // jambonz also emits dtmf/hangup JSON — left as no-op for now.
      } catch {
        /* ignore non-JSON text frames */
      }
    });

    this.jambonzWs.on('close', () => {
      if (this.status !== 'disconnected') this.close();
    });
    this.jambonzWs.on('error', () => {
      try { this.close(); } catch { /* noop */ }
    });

    await super.connect(options);
  }

  updateSessionConfig(config: Partial<RealtimeSessionConfig>): void {
    super.updateSessionConfig(this.applyAudioFormat(config));
  }

  // OpenAI → Jambonz: emit each audio delta as a PCM16 binary frame.
  protected _onAudio(audioEvent: TransportLayerAudio): void {
    try {
      if (this.jambonzWs.readyState === 1 /* OPEN */) {
        this.jambonzWs.send(Buffer.from(audioEvent.data), { binary: true });
      }
    } catch { /* socket closed */ }
    this.emit('audio', audioEvent);
  }

  getCallSid(): string | null { return this.jambonzCallSid; }
}
