using System; using System.Collections.Generic; using System.Net; using System.Net.Sockets; using System.Threading.Tasks; using Convai.Scripts.Runtime.Core; using Convai.Scripts.Runtime.LoggerSystem; using Convai.Scripts.Runtime.Utils; using UnityEngine; using System.Collections; namespace Convai.Scripts.Runtime.Multiplayer { /// /// UDP Speech Sender - Captures and transmits NPC speech audio to remote player /// /// FLOW (Player 1 → Player 2): /// 1. Player 2 speaks (via ConvaiSimpleUDPAudioSender on their device) /// 2. Player 1 receives voice input (via ConvaiSimpleUDPAudioReceiver) /// 3. Player 1's NPC generates response speech (Convai API) /// 4. THIS COMPONENT monitors Player 1's NPC AudioSource /// 5. When new AudioClips appear, transmit them to Player 2 /// 6. Player 2's ConvaiUDPSpeechReceiver plays the audio /// /// This component should be on a NetworkManager or similar persistent object. /// It will find and monitor ConvaiNPC components on Avatar objects in the scene. /// public class ConvaiUDPSpeechSender : MonoBehaviour { [Header("Network Configuration")] [SerializeField] private bool enableDebugLogging = true; [Header("NPC Source")] [SerializeField] private bool useActiveNPC = true; [SerializeField] private ConvaiNPC sourceNPC; [Header("Audio Settings")] [SerializeField] private int maxSamplesPerPacket = 8192; // Network components private UdpClient _udpClient; private IPEndPoint _targetEndPoint; private string targetIP; private int targetPort; private bool _isInitialized = false; // Speech tracking private int _speechSequence = 0; private bool _isSendingSpeech = false; private HashSet _sentClips = new HashSet(); // Packet constants private const uint MAGIC_NUMBER = 0xC0A3; // V3 magic number private const byte PACKET_TYPE_AUDIO_START = 0x01; private const byte PACKET_TYPE_AUDIO_CHUNK = 0x02; private const byte PACKET_TYPE_AUDIO_END = 0x03; private const byte PACKET_TYPE_FINAL = 0x05; // Events public Action OnSpeechTransmission; public Action OnSpeechSent; // Metrics for debug UI private int _totalClipsSent = 0; private DateTime _lastClipSentTime; public int TotalClipsSent => _totalClipsSent; public float TimeSinceLastSend => _lastClipSentTime != default ? (float)(DateTime.UtcNow - _lastClipSentTime).TotalSeconds : -1f; public string CurrentTargetIP => targetIP; public int CurrentTargetPort => targetPort; public bool UsingDiscovery => NetworkConfig.Instance?.useAutoDiscovery ?? false; private void Start() { // Get network config from global instance var cfg = NetworkConfig.Instance; if (cfg != null) { targetIP = cfg.ipAddress; targetPort = cfg.port; // Subscribe to peer discovery if enabled if (cfg.useAutoDiscovery && UDPPeerDiscovery.Instance != null) { UDPPeerDiscovery.Instance.OnPeerDiscovered += HandlePeerDiscovered; UDPPeerDiscovery.Instance.OnPeerLost += HandlePeerLost; ConvaiLogger.Info("Speech sender subscribed to peer discovery", ConvaiLogger.LogCategory.Character); } } else { Debug.LogError("NetworkConfig not found! Please ensure NetworkConfig.asset exists in Resources folder."); targetIP = "255.255.255.255"; targetPort = 1221; } InitializeNetwork(); InitializeConvai(); } private void OnDestroy() { // Unsubscribe from peer discovery if (UDPPeerDiscovery.Instance != null) { UDPPeerDiscovery.Instance.OnPeerDiscovered -= HandlePeerDiscovered; UDPPeerDiscovery.Instance.OnPeerLost -= HandlePeerLost; } CleanupNPCSubscriptions(); CleanupNetwork(); } private void Update() { // Continuously update source NPC if using active NPC mode if (useActiveNPC) { var currentActiveNPC = FindEnabledConvaiNPC(); if (currentActiveNPC != sourceNPC) { // Cleanup old subscriptions CleanupNPCSubscriptions(); // Update to new NPC sourceNPC = currentActiveNPC; SubscribeToNPCEvents(); if (sourceNPC != null) { ConvaiLogger.Info($"🔄 UDP Speech Sender updated source NPC to: {sourceNPC.characterName} (on {sourceNPC.gameObject.name})", ConvaiLogger.LogCategory.Character); } else { ConvaiLogger.Info($"🔄 UDP Speech Sender cleared source NPC", ConvaiLogger.LogCategory.Character); } } } } /// /// Finds an enabled ConvaiNPC in the scene (doesn't rely on ConvaiNPCManager raycasting) /// private ConvaiNPC FindEnabledConvaiNPC() { // Find all ConvaiNPC components in the scene (including inactive GameObjects) var allNPCs = FindObjectsOfType(true); // Return the first one that's on an active GameObject foreach (var npc in allNPCs) { if (npc.gameObject.activeInHierarchy && npc.enabled) { return npc; } } return null; } private void HandlePeerDiscovered(string peerIP) { targetIP = peerIP; _targetEndPoint = new IPEndPoint(IPAddress.Parse(peerIP), targetPort); ConvaiLogger.Info($"🔊 Speech sender now targeting peer at {peerIP}:{targetPort}", ConvaiLogger.LogCategory.Character); } private void HandlePeerLost() { // Don't change targetIP - keep sending to the last known peer IP // The peer might come back online and we'll automatically reconnect ConvaiLogger.Warn($"🔊 Speech sender: Peer connection lost, but continuing to send to {targetIP}:{targetPort}", ConvaiLogger.LogCategory.Character); } private void InitializeNetwork() { try { _udpClient = new UdpClient(); _targetEndPoint = new IPEndPoint(IPAddress.Parse(targetIP), targetPort); _isInitialized = true; ConvaiLogger.Info($"UDP Speech Sender initialized. Target: {targetIP}:{targetPort}", ConvaiLogger.LogCategory.Character); } catch (Exception ex) { ConvaiLogger.Error($"Failed to initialize UDP speech sender: {ex.Message}", ConvaiLogger.LogCategory.Character); } } private void InitializeConvai() { // Prefer local ConvaiNPC on the same GameObject, then fall back to finding enabled NPC var localNPC = GetComponent(); if (localNPC != null) { sourceNPC = localNPC; ConvaiLogger.Info($"Speech Sender: Using local NPC {sourceNPC.characterName}", ConvaiLogger.LogCategory.Character); } else if (useActiveNPC) { sourceNPC = FindEnabledConvaiNPC(); if (sourceNPC != null) { ConvaiLogger.Info($"Speech Sender: Found NPC {sourceNPC.characterName} on {sourceNPC.gameObject.name}", ConvaiLogger.LogCategory.Character); } else { ConvaiLogger.Warn("Speech Sender: No ConvaiNPC found in scene yet", ConvaiLogger.LogCategory.Character); } } SubscribeToNPCEvents(); // Subscribe to NPC manager events for late NPC activation if (ConvaiNPCManager.Instance != null) { ConvaiNPCManager.Instance.OnActiveNPCChanged += HandleActiveNPCChanged; } } private void SubscribeToNPCEvents() { if (sourceNPC == null) { ConvaiLogger.Warn("SubscribeToNPCEvents: sourceNPC is null", ConvaiLogger.LogCategory.Character); return; } if (sourceNPC.AudioManager == null) { ConvaiLogger.Warn($"SubscribeToNPCEvents: AudioManager is null for {sourceNPC.characterName}", ConvaiLogger.LogCategory.Character); return; } // Hook into the character talking events sourceNPC.AudioManager.OnCharacterTalkingChanged += HandleCharacterTalkingChanged; sourceNPC.AudioManager.OnAudioTranscriptAvailable += HandleTranscriptAvailable; ConvaiLogger.Info($"✅ UDP Speech Sender subscribed to NPC: {sourceNPC.characterName} (on {sourceNPC.gameObject.name}), AudioManager: {sourceNPC.AudioManager.name}", ConvaiLogger.LogCategory.Character); // Also start continuous monitoring as a fallback in case events don't fire StartCoroutine(ContinuousAudioMonitoring()); } private void HandleCharacterTalkingChanged(bool isTalking) { if (!_isInitialized) return; if (isTalking) { ConvaiLogger.Info($"🔊 NPC {sourceNPC.characterName} started talking, monitoring audio clips...", ConvaiLogger.LogCategory.Character); // Start monitoring for audio clips StartCoroutine(MonitorAudioClips()); } else { ConvaiLogger.Info($"🔊 NPC {sourceNPC.characterName} stopped talking", ConvaiLogger.LogCategory.Character); // End speech transmission _ = SendFinalPacket(); } } private void HandleTranscriptAvailable(string transcript) { if (enableDebugLogging && !string.IsNullOrEmpty(transcript)) { ConvaiLogger.DebugLog($"📝 NPC transcript: '{transcript}'", ConvaiLogger.LogCategory.Character); } } private IEnumerator MonitorAudioClips() { if (sourceNPC?.AudioManager == null) { ConvaiLogger.Error("MonitorAudioClips: AudioManager is null on sourceNPC", ConvaiLogger.LogCategory.Character); yield break; } AudioSource audioSource = sourceNPC.AudioManager.GetComponent(); if (audioSource == null) { ConvaiLogger.Error($"MonitorAudioClips: No AudioSource found on AudioManager ({sourceNPC.AudioManager.name})", ConvaiLogger.LogCategory.Character); yield break; } ConvaiLogger.Info($"🔊 Started monitoring audio clips on {audioSource.name} for NPC {sourceNPC.characterName}", ConvaiLogger.LogCategory.Character); AudioClip lastClip = null; int checkCount = 0; while (sourceNPC != null && sourceNPC.IsCharacterTalking) { checkCount++; // Log periodically to show we're still monitoring if (enableDebugLogging && checkCount % 10 == 0) { ConvaiLogger.DebugLog($"🔊 Monitoring... check #{checkCount}, current clip: {(audioSource?.clip != null ? audioSource.clip.name : "null")}, isTalking: {sourceNPC.IsCharacterTalking}", ConvaiLogger.LogCategory.Character); } if (audioSource?.clip != null && audioSource.clip != lastClip) { // New clip detected! lastClip = audioSource.clip; ConvaiLogger.Info($"🔊 NEW CLIP DETECTED: {lastClip.name}, length: {lastClip.length:F2}s, samples: {lastClip.samples}, freq: {lastClip.frequency}", ConvaiLogger.LogCategory.Character); // Only send if we haven't sent this clip before if (!_sentClips.Contains(lastClip)) { _sentClips.Add(lastClip); // Get the transcript from the most recent available transcript string transcript = GetRecentTranscript(); // Send this clip ConvaiLogger.Info($"🔊 TRANSMITTING CLIP to {targetIP}:{targetPort}", ConvaiLogger.LogCategory.Character); _ = TransmitAudioClip(lastClip, transcript); } else { ConvaiLogger.Warn($"🔊 Clip already sent, skipping: {lastClip.name}", ConvaiLogger.LogCategory.Character); } } yield return new WaitForSeconds(0.1f); // Check every 100ms } ConvaiLogger.Info($"🔊 Stopped monitoring audio clips (NPC stopped talking or was destroyed). Checks performed: {checkCount}", ConvaiLogger.LogCategory.Character); // Clear sent clips when done _sentClips.Clear(); } private IEnumerator ContinuousAudioMonitoring() { ConvaiLogger.Info("🔊 Starting continuous audio monitoring as fallback", ConvaiLogger.LogCategory.Character); AudioClip lastMonitoredClip = null; while (true) { // Wait a bit between checks yield return new WaitForSeconds(0.2f); // Check if we still have a valid source NPC if (sourceNPC == null || sourceNPC.AudioManager == null) { yield return new WaitForSeconds(1f); // Wait longer if no NPC continue; } // Get the audio source AudioSource audioSource = sourceNPC.AudioManager.GetComponent(); if (audioSource == null) { yield return new WaitForSeconds(1f); continue; } // Check if there's a new audio clip playing if (audioSource.clip != null && audioSource.clip != lastMonitoredClip && audioSource.isPlaying) { lastMonitoredClip = audioSource.clip; // Only send if we haven't sent this clip before if (!_sentClips.Contains(lastMonitoredClip)) { _sentClips.Add(lastMonitoredClip); ConvaiLogger.Info($"🔊 [Continuous Monitor] NEW CLIP DETECTED: {lastMonitoredClip.name}, length: {lastMonitoredClip.length:F2}s", ConvaiLogger.LogCategory.Character); // Start transmission if not already started if (!_isSendingSpeech) { _isSendingSpeech = true; OnSpeechTransmission?.Invoke(true); } string transcript = ""; _ = TransmitAudioClip(lastMonitoredClip, transcript); } } // Clean up old clips from the sent list if NPC is not talking if (!sourceNPC.IsCharacterTalking && _sentClips.Count > 0) { if (enableDebugLogging) ConvaiLogger.DebugLog($"🔊 [Continuous Monitor] NPC stopped talking, clearing sent clips list ({_sentClips.Count} clips)", ConvaiLogger.LogCategory.Character); _sentClips.Clear(); // Send final packet if (_isSendingSpeech) { _ = SendFinalPacket(); } } } } private string GetRecentTranscript() { // Try to get transcript from the NPC's recent activity // This is a simple approach - in a more complex setup you might want to match clips to transcripts return ""; // Transcripts come via the transcript event } private async Task TransmitAudioClip(AudioClip audioClip, string transcript) { if (!_isInitialized || audioClip == null) return; try { // Start transmission if not already started if (!_isSendingSpeech) { _isSendingSpeech = true; OnSpeechTransmission?.Invoke(true); ConvaiLogger.Info($"🔊 Starting speech transmission", ConvaiLogger.LogCategory.Character); } // Use the current speech sequence for this entire clip int clipSequence = _speechSequence; // Send start packet with metadata await SendAudioStartPacket(audioClip, transcript, clipSequence); // Send audio data in chunks (all with the same sequence) await SendAudioClipInChunks(audioClip, clipSequence); // Send end packet for this clip (with the same sequence) await SendAudioEndPacket(clipSequence); // Only increment sequence after the entire clip is sent _speechSequence++; // Update metrics _totalClipsSent++; _lastClipSentTime = DateTime.UtcNow; OnSpeechSent?.Invoke(transcript); if (enableDebugLogging) ConvaiLogger.DebugLog($"✅ Transmitted speech clip: {audioClip.length:F2}s (sequence {clipSequence})", ConvaiLogger.LogCategory.Character); } catch (Exception ex) { ConvaiLogger.Error($"Failed to transmit AudioClip: {ex.Message}", ConvaiLogger.LogCategory.Character); } } private async Task SendAudioStartPacket(AudioClip audioClip, string transcript, int sequence) { byte[] packet = CreateAudioStartPacket(audioClip, transcript, sequence); await _udpClient.SendAsync(packet, packet.Length, _targetEndPoint); if (enableDebugLogging) ConvaiLogger.DebugLog($"📤 Sent start packet {sequence}: {audioClip.samples} samples", ConvaiLogger.LogCategory.Character); } private async Task SendAudioClipInChunks(AudioClip audioClip, int sequence) { // Get all audio data float[] audioData = new float[audioClip.samples]; audioClip.GetData(audioData, 0); // Send in chunks int totalSamples = audioData.Length; int processedSamples = 0; int chunkCount = 0; while (processedSamples < totalSamples) { int remainingSamples = totalSamples - processedSamples; int currentChunkSize = Mathf.Min(maxSamplesPerPacket, remainingSamples); float[] chunkData = new float[currentChunkSize]; Array.Copy(audioData, processedSamples, chunkData, 0, currentChunkSize); byte[] packet = CreateAudioChunkPacket(chunkData, audioClip.frequency, processedSamples, sequence); await _udpClient.SendAsync(packet, packet.Length, _targetEndPoint); processedSamples += currentChunkSize; chunkCount++; if (enableDebugLogging && chunkCount % 10 == 0) ConvaiLogger.DebugLog($"📤 Sent chunk {chunkCount} for sequence {sequence}", ConvaiLogger.LogCategory.Character); // Small delay to avoid overwhelming the network await Task.Delay(5); } if (enableDebugLogging) ConvaiLogger.DebugLog($"📤 Sent {chunkCount} audio chunks for sequence {sequence}", ConvaiLogger.LogCategory.Character); } private async Task SendAudioEndPacket(int sequence) { byte[] packet = CreateAudioEndPacket(sequence); await _udpClient.SendAsync(packet, packet.Length, _targetEndPoint); if (enableDebugLogging) ConvaiLogger.DebugLog($"📤 Sent end packet for sequence {sequence}", ConvaiLogger.LogCategory.Character); } private async Task SendFinalPacket() { if (!_isSendingSpeech) return; try { byte[] packet = CreateFinalPacket(); await _udpClient.SendAsync(packet, packet.Length, _targetEndPoint); _isSendingSpeech = false; OnSpeechTransmission?.Invoke(false); ConvaiLogger.Info("🔊 Speech transmission completed", ConvaiLogger.LogCategory.Character); } catch (Exception ex) { ConvaiLogger.Error($"Failed to send final packet: {ex.Message}", ConvaiLogger.LogCategory.Character); } } private byte[] CreateAudioStartPacket(AudioClip audioClip, string transcript, int sequence) { byte[] transcriptBytes = System.Text.Encoding.UTF8.GetBytes(transcript ?? ""); // Packet structure: // 4 bytes: Magic number // 1 byte: Packet type (0x01 = audio start) // 4 bytes: Sequence number // 4 bytes: Total samples in clip // 4 bytes: Sample rate // 4 bytes: Channels // 4 bytes: Transcript length // N bytes: Transcript (UTF-8) int headerSize = 25; byte[] packet = new byte[headerSize + transcriptBytes.Length]; int offset = 0; BitConverter.GetBytes(MAGIC_NUMBER).CopyTo(packet, offset); offset += 4; packet[offset] = PACKET_TYPE_AUDIO_START; offset += 1; BitConverter.GetBytes(sequence).CopyTo(packet, offset); offset += 4; BitConverter.GetBytes(audioClip.samples).CopyTo(packet, offset); offset += 4; BitConverter.GetBytes(audioClip.frequency).CopyTo(packet, offset); offset += 4; BitConverter.GetBytes(audioClip.channels).CopyTo(packet, offset); offset += 4; BitConverter.GetBytes(transcriptBytes.Length).CopyTo(packet, offset); offset += 4; transcriptBytes.CopyTo(packet, offset); return packet; } private byte[] CreateAudioChunkPacket(float[] audioData, int frequency, int startSample, int sequence) { // Packet structure: // 4 bytes: Magic number // 1 byte: Packet type (0x02 = audio chunk) // 4 bytes: Sequence number // 4 bytes: Start sample position // 4 bytes: Sample count in this chunk // N bytes: Audio data (as 16-bit PCM) int headerSize = 17; int audioDataSize = audioData.Length * sizeof(short); byte[] packet = new byte[headerSize + audioDataSize]; int offset = 0; BitConverter.GetBytes(MAGIC_NUMBER).CopyTo(packet, offset); offset += 4; packet[offset] = PACKET_TYPE_AUDIO_CHUNK; offset += 1; BitConverter.GetBytes(sequence).CopyTo(packet, offset); offset += 4; BitConverter.GetBytes(startSample).CopyTo(packet, offset); offset += 4; BitConverter.GetBytes(audioData.Length).CopyTo(packet, offset); offset += 4; // Convert float samples to 16-bit PCM for (int i = 0; i < audioData.Length; i++) { short sample = (short)(Mathf.Clamp(audioData[i], -1f, 1f) * short.MaxValue); BitConverter.GetBytes(sample).CopyTo(packet, offset); offset += 2; } return packet; } private byte[] CreateAudioEndPacket(int sequence) { byte[] packet = new byte[13]; // Header only int offset = 0; BitConverter.GetBytes(MAGIC_NUMBER).CopyTo(packet, offset); offset += 4; packet[offset] = PACKET_TYPE_AUDIO_END; offset += 1; BitConverter.GetBytes(sequence).CopyTo(packet, offset); offset += 4; BitConverter.GetBytes(0).CopyTo(packet, offset); // No additional data return packet; } private byte[] CreateFinalPacket() { byte[] packet = new byte[13]; // Header only int offset = 0; BitConverter.GetBytes(MAGIC_NUMBER).CopyTo(packet, offset); offset += 4; packet[offset] = PACKET_TYPE_FINAL; offset += 1; BitConverter.GetBytes(_speechSequence).CopyTo(packet, offset); offset += 4; BitConverter.GetBytes(0).CopyTo(packet, offset); // No additional data return packet; } private void CleanupNPCSubscriptions() { if (sourceNPC?.AudioManager != null) { sourceNPC.AudioManager.OnCharacterTalkingChanged -= HandleCharacterTalkingChanged; sourceNPC.AudioManager.OnAudioTranscriptAvailable -= HandleTranscriptAvailable; } if (ConvaiNPCManager.Instance != null) { ConvaiNPCManager.Instance.OnActiveNPCChanged -= HandleActiveNPCChanged; } // Stop all coroutines when cleaning up (will restart with new NPC) StopAllCoroutines(); } private void CleanupNetwork() { _udpClient?.Close(); _udpClient?.Dispose(); _udpClient = null; } private void HandleActiveNPCChanged(ConvaiNPC newActiveNPC) { if (!useActiveNPC) return; // Cleanup old subscriptions CleanupNPCSubscriptions(); // Update to new NPC sourceNPC = newActiveNPC; SubscribeToNPCEvents(); } // Public methods for external control public void SetTargetEndpoint(string ip, int port) { targetIP = ip; targetPort = port; _targetEndPoint = new IPEndPoint(IPAddress.Parse(ip), port); } public bool IsSendingSpeech => _isSendingSpeech; public bool IsInitialized => _isInitialized; public ConvaiNPC SourceNPC => sourceNPC; // Debug methods public void ShowNetworkStatus() { ConvaiLogger.Info($"=== Speech Sender Status ===", ConvaiLogger.LogCategory.Character); ConvaiLogger.Info($"Target: {targetIP}:{targetPort}", ConvaiLogger.LogCategory.Character); ConvaiLogger.Info($"Initialized: {_isInitialized}", ConvaiLogger.LogCategory.Character); ConvaiLogger.Info($"Sending Speech: {_isSendingSpeech}", ConvaiLogger.LogCategory.Character); ConvaiLogger.Info($"Source NPC: {(sourceNPC?.characterName ?? "None")}", ConvaiLogger.LogCategory.Character); ConvaiLogger.Info($"Packets sent: {_speechSequence}", ConvaiLogger.LogCategory.Character); ConvaiLogger.Info($"Sent clips: {_sentClips.Count}", ConvaiLogger.LogCategory.Character); } } }