Master-Arbeit-Tom-Hempel/Unity-Master/Assets/Scripts/Multiplayer/ConvaiUDPSpeechSender.cs

using System;
using System.Collections.Generic;
using System.Net;
using System.Net.Sockets;
using System.Threading.Tasks;
using Convai.Scripts.Runtime.Core;
using Convai.Scripts.Runtime.LoggerSystem;
using Convai.Scripts.Runtime.Utils;
using UnityEngine;
using System.Collections;

namespace Convai.Scripts.Runtime.Multiplayer
{
    /// <summary>
    /// UDP Speech Sender - Simple and reliable approach using events
    /// Hooks into AudioManager events to capture when clips are about to be played
    /// </summary>
    public class ConvaiUDPSpeechSender : MonoBehaviour
    {
        [Header("Network Configuration")]
        [SerializeField] private bool enableDebugLogging = true;

        [Header("NPC Source")]
        [SerializeField] private bool useActiveNPC = true;
        [SerializeField] private ConvaiNPC sourceNPC;

        [Header("Audio Settings")]
        [SerializeField] private int maxSamplesPerPacket = 8192;
        [SerializeField] private bool sendTranscripts = true;

        // Network components
        private UdpClient _udpClient;
        private IPEndPoint _targetEndPoint;
        private string targetIP;
        private int targetPort;
        private bool _isInitialized = false;

        // Speech tracking
        private int _speechSequence = 0;
        private bool _isSendingSpeech = false;
        private HashSet<AudioClip> _sentClips = new HashSet<AudioClip>();

        // Packet constants
        private const uint MAGIC_NUMBER = 0xC0A3; // V3 magic number
        private const byte PACKET_TYPE_AUDIO_START = 0x01;
        private const byte PACKET_TYPE_AUDIO_CHUNK = 0x02;
        private const byte PACKET_TYPE_AUDIO_END = 0x03;
        private const byte PACKET_TYPE_FINAL = 0x05;

        // Events
        public Action<bool> OnSpeechTransmission;
        public Action<string> OnSpeechSent;

        private void Start()
        {
            // Get network config from global instance
            var cfg = NetworkConfig.Instance;
            if (cfg != null)
            {
                targetIP = cfg.ipAddress;
                targetPort = cfg.port;

                // Subscribe to peer discovery if enabled
                if (cfg.useAutoDiscovery && UDPPeerDiscovery.Instance != null)
                {
                    UDPPeerDiscovery.Instance.OnPeerDiscovered += HandlePeerDiscovered;
                    UDPPeerDiscovery.Instance.OnPeerLost += HandlePeerLost;
                    ConvaiLogger.Info("Speech sender subscribed to peer discovery", ConvaiLogger.LogCategory.Character);
                }
            }
            else
            {
                Debug.LogError("NetworkConfig not found! Please ensure NetworkConfig.asset exists in Resources folder.");
                targetIP = "255.255.255.255";
                targetPort = 1221;
            }

            InitializeNetwork();
            InitializeConvai();
        }

        private void OnDestroy()
        {
            // Unsubscribe from peer discovery
            if (UDPPeerDiscovery.Instance != null)
            {
                UDPPeerDiscovery.Instance.OnPeerDiscovered -= HandlePeerDiscovered;
                UDPPeerDiscovery.Instance.OnPeerLost -= HandlePeerLost;
            }

            CleanupNPCSubscriptions();
            CleanupNetwork();
        }

        private void HandlePeerDiscovered(string peerIP)
        {
            targetIP = peerIP;
            _targetEndPoint = new IPEndPoint(IPAddress.Parse(peerIP), targetPort);
            ConvaiLogger.Info($"🔊 Speech sender now targeting peer at {peerIP}:{targetPort}", ConvaiLogger.LogCategory.Character);
        }

        private void HandlePeerLost()
        {
            var cfg = NetworkConfig.Instance;
            if (cfg != null)
            {
                targetIP = cfg.fallbackBroadcastIP;
                _targetEndPoint = new IPEndPoint(IPAddress.Parse(targetIP), targetPort);
                ConvaiLogger.Warn($"🔊 Speech sender falling back to broadcast: {targetIP}", ConvaiLogger.LogCategory.Character);
            }
        }

        private void InitializeNetwork()
        {
            try
            {
                _udpClient = new UdpClient();
                _targetEndPoint = new IPEndPoint(IPAddress.Parse(targetIP), targetPort);
                _isInitialized = true;

                ConvaiLogger.Info($"UDP Speech Sender initialized. Target: {targetIP}:{targetPort}", ConvaiLogger.LogCategory.Character);
            }
            catch (Exception ex)
            {
                ConvaiLogger.Error($"Failed to initialize UDP speech sender: {ex.Message}", ConvaiLogger.LogCategory.Character);
            }
        }

        private void InitializeConvai()
        {
            // Prefer local ConvaiNPC on the same GameObject, then fall back to active NPC
            var localNPC = GetComponent<ConvaiNPC>();
            if (localNPC != null)
            {
                sourceNPC = localNPC;
            }
            else if (useActiveNPC)
            {
                sourceNPC = ConvaiNPCManager.Instance?.GetActiveConvaiNPC();
            }

            SubscribeToNPCEvents();

            // Subscribe to NPC manager events for late NPC activation
            if (ConvaiNPCManager.Instance != null)
            {
                ConvaiNPCManager.Instance.OnActiveNPCChanged += HandleActiveNPCChanged;
            }
        }

        private void SubscribeToNPCEvents()
        {
            if (sourceNPC?.AudioManager != null)
            {
                // Hook into the character talking events
                sourceNPC.AudioManager.OnCharacterTalkingChanged += HandleCharacterTalkingChanged;
                sourceNPC.AudioManager.OnAudioTranscriptAvailable += HandleTranscriptAvailable;

                ConvaiLogger.Info($"UDP Speech Sender subscribed to NPC: {sourceNPC.characterName}", ConvaiLogger.LogCategory.Character);
            }
            else
            {
                ConvaiLogger.Warn("No source NPC available for speech transmission", ConvaiLogger.LogCategory.Character);
            }
        }

        private void HandleCharacterTalkingChanged(bool isTalking)
        {
            if (!_isInitialized) return;

            if (isTalking)
            {
                // Start monitoring for audio clips
                StartCoroutine(MonitorAudioClips());
            }
            else
            {
                // End speech transmission
                _ = SendFinalPacket();
            }
        }

        private void HandleTranscriptAvailable(string transcript)
        {
            if (enableDebugLogging && !string.IsNullOrEmpty(transcript))
            {
                ConvaiLogger.DebugLog($"📝 NPC transcript: '{transcript}'", ConvaiLogger.LogCategory.Character);
            }
        }

        private IEnumerator MonitorAudioClips()
        {
            if (sourceNPC?.AudioManager == null) yield break;

            AudioSource audioSource = sourceNPC.AudioManager.GetComponent<AudioSource>();
            AudioClip lastClip = null;

            while (sourceNPC.IsCharacterTalking)
            {
                if (audioSource?.clip != null && audioSource.clip != lastClip)
                {
                    // New clip detected!
                    lastClip = audioSource.clip;

                    // Only send if we haven't sent this clip before
                    if (!_sentClips.Contains(lastClip))
                    {
                        _sentClips.Add(lastClip);

                        // Get the transcript from the most recent available transcript
                        string transcript = GetRecentTranscript();

                        // Send this clip
                        _ = TransmitAudioClip(lastClip, transcript);
                    }
                }

                yield return new WaitForSeconds(0.1f); // Check every 100ms
            }

            // Clear sent clips when done
            _sentClips.Clear();
        }

        private string GetRecentTranscript()
        {
            // Try to get transcript from the NPC's recent activity
            // This is a simple approach - in a more complex setup you might want to match clips to transcripts
            return ""; // Transcripts come via the transcript event
        }

        private async Task TransmitAudioClip(AudioClip audioClip, string transcript)
        {
            if (!_isInitialized || audioClip == null) return;

            try
            {
                // Start transmission if not already started
                if (!_isSendingSpeech)
                {
                    _isSendingSpeech = true;
                    OnSpeechTransmission?.Invoke(true);

                    ConvaiLogger.Info($"🔊 Starting speech transmission", ConvaiLogger.LogCategory.Character);
                }

                // Use the current speech sequence for this entire clip
                int clipSequence = _speechSequence;

                // Send start packet with metadata
                await SendAudioStartPacket(audioClip, transcript, clipSequence);

                // Send audio data in chunks (all with the same sequence)
                await SendAudioClipInChunks(audioClip, clipSequence);

                // Send end packet for this clip (with the same sequence)
                await SendAudioEndPacket(clipSequence);

                // Only increment sequence after the entire clip is sent
                _speechSequence++;

                OnSpeechSent?.Invoke(transcript);

                if (enableDebugLogging)
                    ConvaiLogger.DebugLog($"✅ Transmitted speech clip: {audioClip.length:F2}s (sequence {clipSequence})", ConvaiLogger.LogCategory.Character);
            }
            catch (Exception ex)
            {
                ConvaiLogger.Error($"Failed to transmit AudioClip: {ex.Message}", ConvaiLogger.LogCategory.Character);
            }
        }

        private async Task SendAudioStartPacket(AudioClip audioClip, string transcript, int sequence)
        {
            byte[] packet = CreateAudioStartPacket(audioClip, transcript, sequence);
            await _udpClient.SendAsync(packet, packet.Length, _targetEndPoint);

            if (enableDebugLogging)
                ConvaiLogger.DebugLog($"📤 Sent start packet {sequence}: {audioClip.samples} samples", ConvaiLogger.LogCategory.Character);
        }

        private async Task SendAudioClipInChunks(AudioClip audioClip, int sequence)
        {
            // Get all audio data
            float[] audioData = new float[audioClip.samples];
            audioClip.GetData(audioData, 0);

            // Send in chunks
            int totalSamples = audioData.Length;
            int processedSamples = 0;
            int chunkCount = 0;

            while (processedSamples < totalSamples)
            {
                int remainingSamples = totalSamples - processedSamples;
                int currentChunkSize = Mathf.Min(maxSamplesPerPacket, remainingSamples);

                float[] chunkData = new float[currentChunkSize];
                Array.Copy(audioData, processedSamples, chunkData, 0, currentChunkSize);

                byte[] packet = CreateAudioChunkPacket(chunkData, audioClip.frequency, processedSamples, sequence);
                await _udpClient.SendAsync(packet, packet.Length, _targetEndPoint);

                processedSamples += currentChunkSize;
                chunkCount++;

                if (enableDebugLogging && chunkCount % 10 == 0)
                    ConvaiLogger.DebugLog($"📤 Sent chunk {chunkCount} for sequence {sequence}", ConvaiLogger.LogCategory.Character);

                // Small delay to avoid overwhelming the network
                await Task.Delay(5);
            }

            if (enableDebugLogging)
                ConvaiLogger.DebugLog($"📤 Sent {chunkCount} audio chunks for sequence {sequence}", ConvaiLogger.LogCategory.Character);
        }

        private async Task SendAudioEndPacket(int sequence)
        {
            byte[] packet = CreateAudioEndPacket(sequence);
            await _udpClient.SendAsync(packet, packet.Length, _targetEndPoint);

            if (enableDebugLogging)
                ConvaiLogger.DebugLog($"📤 Sent end packet for sequence {sequence}", ConvaiLogger.LogCategory.Character);
        }

        private async Task SendFinalPacket()
        {
            if (!_isSendingSpeech) return;

            try
            {
                byte[] packet = CreateFinalPacket();
                await _udpClient.SendAsync(packet, packet.Length, _targetEndPoint);

                _isSendingSpeech = false;
                OnSpeechTransmission?.Invoke(false);

                ConvaiLogger.Info("🔊 Speech transmission completed", ConvaiLogger.LogCategory.Character);
            }
            catch (Exception ex)
            {
                ConvaiLogger.Error($"Failed to send final packet: {ex.Message}", ConvaiLogger.LogCategory.Character);
            }
        }

        private byte[] CreateAudioStartPacket(AudioClip audioClip, string transcript, int sequence)
        {
            byte[] transcriptBytes = System.Text.Encoding.UTF8.GetBytes(transcript ?? "");

            // Packet structure:
            // 4 bytes: Magic number
            // 1 byte:  Packet type (0x01 = audio start)
            // 4 bytes: Sequence number
            // 4 bytes: Total samples in clip
            // 4 bytes: Sample rate
            // 4 bytes: Channels
            // 4 bytes: Transcript length
            // N bytes: Transcript (UTF-8)

            int headerSize = 25;
            byte[] packet = new byte[headerSize + transcriptBytes.Length];

            int offset = 0;

            BitConverter.GetBytes(MAGIC_NUMBER).CopyTo(packet, offset);
            offset += 4;

            packet[offset] = PACKET_TYPE_AUDIO_START;
            offset += 1;

            BitConverter.GetBytes(sequence).CopyTo(packet, offset);
            offset += 4;

            BitConverter.GetBytes(audioClip.samples).CopyTo(packet, offset);
            offset += 4;

            BitConverter.GetBytes(audioClip.frequency).CopyTo(packet, offset);
            offset += 4;

            BitConverter.GetBytes(audioClip.channels).CopyTo(packet, offset);
            offset += 4;

            BitConverter.GetBytes(transcriptBytes.Length).CopyTo(packet, offset);
            offset += 4;

            transcriptBytes.CopyTo(packet, offset);

            return packet;
        }

        private byte[] CreateAudioChunkPacket(float[] audioData, int frequency, int startSample, int sequence)
        {
            // Packet structure:
            // 4 bytes: Magic number
            // 1 byte:  Packet type (0x02 = audio chunk)
            // 4 bytes: Sequence number
            // 4 bytes: Start sample position
            // 4 bytes: Sample count in this chunk
            // N bytes: Audio data (as 16-bit PCM)

            int headerSize = 17;
            int audioDataSize = audioData.Length * sizeof(short);
            byte[] packet = new byte[headerSize + audioDataSize];

            int offset = 0;

            BitConverter.GetBytes(MAGIC_NUMBER).CopyTo(packet, offset);
            offset += 4;

            packet[offset] = PACKET_TYPE_AUDIO_CHUNK;
            offset += 1;

            BitConverter.GetBytes(sequence).CopyTo(packet, offset);
            offset += 4;

            BitConverter.GetBytes(startSample).CopyTo(packet, offset);
            offset += 4;

            BitConverter.GetBytes(audioData.Length).CopyTo(packet, offset);
            offset += 4;

            // Convert float samples to 16-bit PCM
            for (int i = 0; i < audioData.Length; i++)
            {
                short sample = (short)(Mathf.Clamp(audioData[i], -1f, 1f) * short.MaxValue);
                BitConverter.GetBytes(sample).CopyTo(packet, offset);
                offset += 2;
            }

            return packet;
        }

        private byte[] CreateAudioEndPacket(int sequence)
        {
            byte[] packet = new byte[13]; // Header only

            int offset = 0;

            BitConverter.GetBytes(MAGIC_NUMBER).CopyTo(packet, offset);
            offset += 4;

            packet[offset] = PACKET_TYPE_AUDIO_END;
            offset += 1;

            BitConverter.GetBytes(sequence).CopyTo(packet, offset);
            offset += 4;

            BitConverter.GetBytes(0).CopyTo(packet, offset); // No additional data

            return packet;
        }

        private byte[] CreateFinalPacket()
        {
            byte[] packet = new byte[13]; // Header only

            int offset = 0;

            BitConverter.GetBytes(MAGIC_NUMBER).CopyTo(packet, offset);
            offset += 4;

            packet[offset] = PACKET_TYPE_FINAL;
            offset += 1;

            BitConverter.GetBytes(_speechSequence).CopyTo(packet, offset);
            offset += 4;

            BitConverter.GetBytes(0).CopyTo(packet, offset); // No additional data

            return packet;
        }

        private void CleanupNPCSubscriptions()
        {
            if (sourceNPC?.AudioManager != null)
            {
                sourceNPC.AudioManager.OnCharacterTalkingChanged -= HandleCharacterTalkingChanged;
                sourceNPC.AudioManager.OnAudioTranscriptAvailable -= HandleTranscriptAvailable;
            }

            if (ConvaiNPCManager.Instance != null)
            {
                ConvaiNPCManager.Instance.OnActiveNPCChanged -= HandleActiveNPCChanged;
            }
        }

        private void CleanupNetwork()
        {
            _udpClient?.Close();
            _udpClient?.Dispose();
            _udpClient = null;
        }

        private void HandleActiveNPCChanged(ConvaiNPC newActiveNPC)
        {
            if (!useActiveNPC) return;

            // Cleanup old subscriptions
            CleanupNPCSubscriptions();

            // Update to new NPC
            sourceNPC = newActiveNPC;
            SubscribeToNPCEvents();
        }

        // Public methods for external control
        public void SetTargetEndpoint(string ip, int port)
        {
            targetIP = ip;
            targetPort = port;
            _targetEndPoint = new IPEndPoint(IPAddress.Parse(ip), port);
        }

        public bool IsSendingSpeech => _isSendingSpeech;
        public bool IsInitialized => _isInitialized;
        public ConvaiNPC SourceNPC => sourceNPC;

        // Debug methods
        public void ShowNetworkStatus()
        {
            ConvaiLogger.Info($"=== Speech Sender Status ===", ConvaiLogger.LogCategory.Character);
            ConvaiLogger.Info($"Target: {targetIP}:{targetPort}", ConvaiLogger.LogCategory.Character);
            ConvaiLogger.Info($"Initialized: {_isInitialized}", ConvaiLogger.LogCategory.Character);
            ConvaiLogger.Info($"Sending Speech: {_isSendingSpeech}", ConvaiLogger.LogCategory.Character);
            ConvaiLogger.Info($"Source NPC: {(sourceNPC?.characterName ?? "None")}", ConvaiLogger.LogCategory.Character);
            ConvaiLogger.Info($"Packets sent: {_speechSequence}", ConvaiLogger.LogCategory.Character);
            ConvaiLogger.Info($"Sent clips: {_sentClips.Count}", ConvaiLogger.LogCategory.Character);
        }
    }
}