diff --git a/Unity-Master/Assets/Scripts/Multiplayer.meta b/Unity-Master/Assets/Scripts/Multiplayer.meta new file mode 100644 index 0000000..d87a471 --- /dev/null +++ b/Unity-Master/Assets/Scripts/Multiplayer.meta @@ -0,0 +1,8 @@ +fileFormatVersion: 2 +guid: 7bf18a6dad33703489c04336f7f81a3d +folderAsset: yes +DefaultImporter: + externalObjects: {} + userData: + assetBundleName: + assetBundleVariant: diff --git a/Unity-Master/Assets/Scripts/Multiplayer/ConvaiSimpleUDPAudioReceiver.cs b/Unity-Master/Assets/Scripts/Multiplayer/ConvaiSimpleUDPAudioReceiver.cs new file mode 100644 index 0000000..c63bbc1 --- /dev/null +++ b/Unity-Master/Assets/Scripts/Multiplayer/ConvaiSimpleUDPAudioReceiver.cs @@ -0,0 +1,364 @@ +using System; +using System.Collections.Generic; +using System.Net; +using System.Net.Sockets; +using System.Threading; +using System.Threading.Tasks; +using Convai.Scripts.Runtime.Core; +using Convai.Scripts.Runtime.LoggerSystem; +using Convai.Scripts.Runtime.Utils; +using UnityEngine; + +namespace Convai.Scripts.Runtime.Multiplayer +{ + /// + /// Simple UDP Audio Receiver V2 - Simulates microphone input by triggering normal Convai flow + /// This approach is much simpler and more reliable than trying to replicate gRPC calls + /// + public class ConvaiSimpleUDPAudioReceiverV2 : MonoBehaviour + { + [Header("Network Configuration")] + [SerializeField] private int listenPort = 12345; + [SerializeField] private bool enableDebugLogging = true; + + [Header("NPC Target")] + [SerializeField] private bool useActiveNPC = true; + [SerializeField] private ConvaiNPC targetNPC; + + // Events + public Action OnAudioReceiving; + + // Network components + private UdpClient _udpListener; + private IPEndPoint _remoteEndPoint; + private bool _isListening = false; + private CancellationTokenSource _cancellationTokenSource; + + // Audio state tracking + private bool _isReceivingAudio = false; + private int _expectedSequence = 0; + private const uint MAGIC_NUMBER = 0xC0A1; // Simple magic number for packet validation + + // Timing for auto-stop + private float _lastPacketTime; + private const float AUTO_STOP_DELAY = 1.0f; // Stop listening after 1 second of no packets + + // Packet structure (matching ConvaiSimpleUDPAudioSender) + private struct AudioPacketData + { + public uint magicNumber; + public int sequence; + public int sampleCount; + public int microphonePosition; + public bool isEndSignal; + public short[] audioSamples; + public long timestamp; + } + + private void Start() + { + _cancellationTokenSource = new CancellationTokenSource(); + InitializeNetwork(); + InitializeConvai(); + + // Subscribe to NPC manager events to handle late NPC activation + if (ConvaiNPCManager.Instance != null) + { + ConvaiNPCManager.Instance.OnActiveNPCChanged += HandleActiveNPCChanged; + } + } + + private void OnDestroy() + { + // Unsubscribe from events + if (ConvaiNPCManager.Instance != null) + { + ConvaiNPCManager.Instance.OnActiveNPCChanged -= HandleActiveNPCChanged; + } + + StopListening(); + _cancellationTokenSource?.Cancel(); + _cancellationTokenSource?.Dispose(); + } + + private void Update() + { + // Auto-stop listening if no packets received for a while + if (_isReceivingAudio && Time.time - _lastPacketTime > AUTO_STOP_DELAY) + { + StopTalkingSimulation(); + } + } + + private void InitializeNetwork() + { + try + { + StartListening(); + } + catch (Exception ex) + { + ConvaiLogger.Error($"Failed to initialize UDP listener: {ex.Message}", ConvaiLogger.LogCategory.Character); + } + } + + private void InitializeConvai() + { + // Get target NPC + if (useActiveNPC) + { + targetNPC = ConvaiNPCManager.Instance?.GetActiveConvaiNPC(); + } + + if (targetNPC == null) + { + ConvaiLogger.Warn("No target NPC found yet, will wait for NPC to become active", ConvaiLogger.LogCategory.Character); + } + else + { + ConvaiLogger.Info($"UDP Audio Receiver V2 initialized with NPC: {targetNPC.characterName}", ConvaiLogger.LogCategory.Character); + } + } + + public void StartListening() + { + if (_isListening || _cancellationTokenSource == null) + return; + + try + { + _udpListener = new UdpClient(listenPort); + _isListening = true; + + ConvaiLogger.Info($"Simple UDP Audio Receiver V2 listening on port {listenPort}", ConvaiLogger.LogCategory.Character); + + // Start listening for incoming packets + _ = ListenForAudioPackets(_cancellationTokenSource.Token); + } + catch (Exception ex) + { + ConvaiLogger.Error($"Failed to start UDP listener: {ex.Message}", ConvaiLogger.LogCategory.Character); + ConvaiLogger.Error($"Stack trace: {ex.StackTrace}", ConvaiLogger.LogCategory.Character); + } + } + + public void StopListening() + { + if (!_isListening) + return; + + _isListening = false; + _udpListener?.Close(); + _udpListener?.Dispose(); + _udpListener = null; + + // Stop any ongoing simulation + StopTalkingSimulation(); + + ConvaiLogger.Info("Stopped UDP Audio Receiver V2", ConvaiLogger.LogCategory.Character); + } + + private async Task ListenForAudioPackets(CancellationToken cancellationToken) + { + try + { + while (_isListening && !cancellationToken.IsCancellationRequested) + { + var result = await _udpListener.ReceiveAsync(); + _remoteEndPoint = result.RemoteEndPoint; + + await ProcessReceivedPacket(result.Buffer, result.RemoteEndPoint); + } + } + catch (ObjectDisposedException) + { + // Normal when stopping + } + catch (Exception ex) + { + ConvaiLogger.Error($"Error in UDP listener: {ex.Message}", ConvaiLogger.LogCategory.Character); + } + } + + private async Task ProcessReceivedPacket(byte[] data, IPEndPoint sender) + { + try + { + var packetData = ParseSimpleAudioPacket(data); + + if (packetData.HasValue) + { + var packet = packetData.Value; + _lastPacketTime = Time.time; + + if (enableDebugLogging) + { + if (packet.isEndSignal) + ConvaiLogger.DebugLog($"Received end signal from {sender}", ConvaiLogger.LogCategory.Character); + else + ConvaiLogger.DebugLog($"Received audio packet {packet.sequence} with {packet.sampleCount} samples", ConvaiLogger.LogCategory.Character); + } + + if (packet.isEndSignal) + { + StopTalkingSimulation(); + OnAudioReceiving?.Invoke(false); + } + else + { + // If this is the first packet, start the talking simulation + if (packet.sequence == 0 && !_isReceivingAudio) + { + StartTalkingSimulation(); + } + + OnAudioReceiving?.Invoke(true); + } + } + else + { + // Not our audio packet format, might be a test message + string message = System.Text.Encoding.UTF8.GetString(data); + if (enableDebugLogging) + ConvaiLogger.Info($"Received test message from {sender}: {message}", ConvaiLogger.LogCategory.Character); + } + } + catch (Exception ex) + { + ConvaiLogger.Error($"Error processing received packet: {ex.Message}", ConvaiLogger.LogCategory.Character); + } + } + + private void StartTalkingSimulation() + { + if (_isReceivingAudio) return; + + MainThreadDispatcher.Instance.RunOnMainThread(() => { + // Update target NPC if using active NPC + if (useActiveNPC) + { + targetNPC = ConvaiNPCManager.Instance?.GetActiveConvaiNPC(); + } + + if (targetNPC == null) + { + ConvaiLogger.Warn("No target NPC available for audio simulation", ConvaiLogger.LogCategory.Character); + return; + } + + _isReceivingAudio = true; + _expectedSequence = 0; + + // This is the KEY! Simulate a talk key press to trigger normal Convai flow + ConvaiInputManager.Instance.talkKeyInteract?.Invoke(true); + + ConvaiLogger.Info($"🎤 Started talking simulation for {targetNPC.characterName} (remote player audio)", ConvaiLogger.LogCategory.Character); + }); + } + + private void StopTalkingSimulation() + { + if (!_isReceivingAudio) return; + + MainThreadDispatcher.Instance.RunOnMainThread(() => { + _isReceivingAudio = false; + + // Simulate talk key release to stop recording + ConvaiInputManager.Instance.talkKeyInteract?.Invoke(false); + + ConvaiLogger.Info($"🎤 Stopped talking simulation for {targetNPC?.characterName ?? "NPC"} (remote player audio)", ConvaiLogger.LogCategory.Character); + }); + } + + private AudioPacketData? ParseSimpleAudioPacket(byte[] data) + { + if (data.Length < 24) // Minimum header size + return null; + + try + { + int offset = 0; + + // Read magic number + uint magic = BitConverter.ToUInt32(data, offset); + offset += 4; + + if (magic != MAGIC_NUMBER) + return null; + + // Read header + int sequence = BitConverter.ToInt32(data, offset); + offset += 4; + + int sampleCount = BitConverter.ToInt32(data, offset); + offset += 4; + + int microphonePosition = BitConverter.ToInt32(data, offset); + offset += 4; + + bool isEndSignal = BitConverter.ToBoolean(data, offset); + offset += 1; + + // Skip padding + offset += 3; + + long timestamp = BitConverter.ToInt64(data, offset); + offset += 8; + + // Read audio data + short[] audioSamples = null; + if (!isEndSignal && sampleCount > 0) + { + int audioDataSize = sampleCount * sizeof(short); + if (data.Length >= offset + audioDataSize) + { + audioSamples = new short[sampleCount]; + Buffer.BlockCopy(data, offset, audioSamples, 0, audioDataSize); + } + } + + return new AudioPacketData + { + magicNumber = magic, + sequence = sequence, + sampleCount = sampleCount, + microphonePosition = microphonePosition, + isEndSignal = isEndSignal, + audioSamples = audioSamples, + timestamp = timestamp + }; + } + catch (Exception ex) + { + ConvaiLogger.Error($"Error parsing audio packet: {ex.Message}", ConvaiLogger.LogCategory.Character); + return null; + } + } + + // Event handler for when NPC becomes active + private void HandleActiveNPCChanged(ConvaiNPC newActiveNPC) + { + if (useActiveNPC && newActiveNPC != null) + { + targetNPC = newActiveNPC; + ConvaiLogger.Info($"UDP Audio Receiver V2 updated target NPC to: {targetNPC.characterName}", ConvaiLogger.LogCategory.Character); + } + } + + // Public properties for debugging + public bool IsListening => _isListening; + public bool IsReceivingAudio => _isReceivingAudio; + public ConvaiNPC TargetNPC => targetNPC; + + // Debug methods + public void ShowNetworkStatus() + { + ConvaiLogger.Info($"=== Audio Receiver V2 Status ===", ConvaiLogger.LogCategory.Character); + ConvaiLogger.Info($"Listening: {_isListening} on port {listenPort}", ConvaiLogger.LogCategory.Character); + ConvaiLogger.Info($"Receiving Audio: {_isReceivingAudio}", ConvaiLogger.LogCategory.Character); + ConvaiLogger.Info($"Target NPC: {(targetNPC?.characterName ?? "None")}", ConvaiLogger.LogCategory.Character); + ConvaiLogger.Info($"Expected Sequence: {_expectedSequence}", ConvaiLogger.LogCategory.Character); + ConvaiLogger.Info($"Last Packet Time: {_lastPacketTime}", ConvaiLogger.LogCategory.Character); + } + } +} diff --git a/Unity-Master/Assets/Scripts/Multiplayer/ConvaiSimpleUDPAudioReceiver.cs.meta b/Unity-Master/Assets/Scripts/Multiplayer/ConvaiSimpleUDPAudioReceiver.cs.meta new file mode 100644 index 0000000..8fb7859 --- /dev/null +++ b/Unity-Master/Assets/Scripts/Multiplayer/ConvaiSimpleUDPAudioReceiver.cs.meta @@ -0,0 +1,11 @@ +fileFormatVersion: 2 +guid: fa35a6fc55fc4ca44b29b3636484bfd2 +MonoImporter: + externalObjects: {} + serializedVersion: 2 + defaultReferences: [] + executionOrder: 0 + icon: {instanceID: 0} + userData: + assetBundleName: + assetBundleVariant: diff --git a/Unity-Master/Assets/Scripts/Multiplayer/ConvaiSimpleUDPAudioSender.cs b/Unity-Master/Assets/Scripts/Multiplayer/ConvaiSimpleUDPAudioSender.cs new file mode 100644 index 0000000..87c1609 --- /dev/null +++ b/Unity-Master/Assets/Scripts/Multiplayer/ConvaiSimpleUDPAudioSender.cs @@ -0,0 +1,376 @@ +using System; +using System.Net; +using System.Net.Sockets; +using System.Threading; +using System.Threading.Tasks; +using Convai.Scripts.Runtime.LoggerSystem; +using Convai.Scripts.Runtime.UI; +using UnityEngine; + +namespace Convai.Scripts.Runtime.Multiplayer +{ + /// + /// Simplified version of UDP Audio Sender that avoids complex chunking + /// This version sends smaller, more frequent packets to avoid array bounds issues + /// + public class ConvaiSimpleUDPAudioSender : MonoBehaviour + { + [Header("Network Settings")] + [SerializeField] private string targetIP = "127.0.0.1"; + [SerializeField] private int targetPort = 12345; + + [Header("Audio Settings")] + [SerializeField] private int recordingFrequency = 16000; + [SerializeField] private int recordingLength = 10; + [SerializeField] private int samplesPerPacket = 1024; // Number of audio samples per packet (not bytes) + + [Header("UI")] + [SerializeField] private KeyCode talkKey = KeyCode.T; + [SerializeField] private bool useHoldToTalk = true; + + [Header("Debug")] + [SerializeField] private bool enableDebugLogging = true; + [SerializeField] private KeyCode testConnectionKey = KeyCode.C; + + private UdpClient _udpClient; + private IPEndPoint _targetEndPoint; + private AudioClip _audioClip; + private bool _isRecording = false; + private CancellationTokenSource _cancellationTokenSource; + + private int _lastMicrophonePosition = 0; + private float[] _audioBuffer; + private string _selectedMicrophone; + private int _packetSequence = 0; + + public event Action OnRecordingStateChanged; + + private void Start() + { + InitializeNetwork(); + InitializeAudio(); + _cancellationTokenSource = new CancellationTokenSource(); + } + + private void Update() + { + HandleInput(); + } + + private void OnDestroy() + { + StopRecording(); + _cancellationTokenSource?.Cancel(); + _cancellationTokenSource?.Dispose(); + _udpClient?.Close(); + } + + private void InitializeNetwork() + { + try + { + _udpClient = new UdpClient(); + _targetEndPoint = new IPEndPoint(IPAddress.Parse(targetIP), targetPort); + ConvaiLogger.Info($"Simple UDP Audio Sender initialized. Target: {targetIP}:{targetPort}", ConvaiLogger.LogCategory.Character); + } + catch (Exception ex) + { + ConvaiLogger.Error($"Failed to initialize UDP client: {ex.Message}", ConvaiLogger.LogCategory.Character); + } + } + + private void InitializeAudio() + { + _selectedMicrophone = MicrophoneManager.Instance.SelectedMicrophoneName; + _audioBuffer = new float[recordingFrequency * recordingLength]; + + if (string.IsNullOrEmpty(_selectedMicrophone)) + { + ConvaiLogger.Error("No microphone selected for UDP audio sender", ConvaiLogger.LogCategory.Character); + } + } + + private void HandleInput() + { + // Handle talk key + if (useHoldToTalk) + { + if (Input.GetKeyDown(talkKey) && !_isRecording) + { + StartRecording(); + } + else if (Input.GetKeyUp(talkKey) && _isRecording) + { + StopRecording(); + } + } + else + { + if (Input.GetKeyDown(talkKey)) + { + if (_isRecording) + StopRecording(); + else + StartRecording(); + } + } + + // Handle test connection key + if (Input.GetKeyDown(testConnectionKey)) + { + TestConnection(); + } + } + + public void StartRecording() + { + if (_isRecording || string.IsNullOrEmpty(_selectedMicrophone)) + return; + + try + { + _audioClip = Microphone.Start(_selectedMicrophone, false, recordingLength, recordingFrequency); + _isRecording = true; + _lastMicrophonePosition = 0; + _packetSequence = 0; + + ConvaiLogger.Info("Started recording for UDP transmission (Simple)", ConvaiLogger.LogCategory.Character); + OnRecordingStateChanged?.Invoke(true); + + // Start continuous audio processing + _ = ProcessAudioContinuously(_cancellationTokenSource.Token); + } + catch (Exception ex) + { + ConvaiLogger.Error($"Failed to start recording: {ex.Message}", ConvaiLogger.LogCategory.Character); + } + } + + public void StopRecording() + { + if (!_isRecording) + return; + + try + { + Microphone.End(_selectedMicrophone); + _isRecording = false; + + ConvaiLogger.Info("Stopped recording for UDP transmission (Simple)", ConvaiLogger.LogCategory.Character); + OnRecordingStateChanged?.Invoke(false); + + // Send end-of-recording signal + SendEndOfRecordingSignal(); + } + catch (Exception ex) + { + ConvaiLogger.Error($"Failed to stop recording: {ex.Message}", ConvaiLogger.LogCategory.Character); + } + } + + private async Task ProcessAudioContinuously(CancellationToken cancellationToken) + { + while (_isRecording && !cancellationToken.IsCancellationRequested) + { + try + { + await Task.Delay(100, cancellationToken); // Process every 100ms + + if (_audioClip == null || !Microphone.IsRecording(_selectedMicrophone)) + break; + + int currentMicrophonePosition = Microphone.GetPosition(_selectedMicrophone); + int audioDataLength = currentMicrophonePosition - _lastMicrophonePosition; + + if (audioDataLength > 0) + { + // Get audio data from the microphone clip + _audioClip.GetData(_audioBuffer, _lastMicrophonePosition); + + // Send data in smaller chunks to avoid array bounds issues + await SendAudioDataInChunks(_audioBuffer, audioDataLength); + + _lastMicrophonePosition = currentMicrophonePosition; + } + } + catch (Exception ex) when (!(ex is OperationCanceledException)) + { + ConvaiLogger.Error($"Error in audio processing: {ex.Message}", ConvaiLogger.LogCategory.Character); + break; + } + } + } + + private async Task SendAudioDataInChunks(float[] audioData, int totalSamples) + { + int processedSamples = 0; + + while (processedSamples < totalSamples) + { + try + { + int remainingSamples = totalSamples - processedSamples; + int currentChunkSamples = Mathf.Min(samplesPerPacket, remainingSamples); + + // Create a simple packet structure + byte[] packet = CreateSimpleAudioPacket(audioData, processedSamples, currentChunkSamples); + + // Send the packet + await _udpClient.SendAsync(packet, packet.Length, _targetEndPoint); + + if (enableDebugLogging && _packetSequence % 10 == 0) // Log every 10th packet + { + ConvaiLogger.DebugLog($"Sent packet {_packetSequence} with {currentChunkSamples} samples", ConvaiLogger.LogCategory.Character); + } + + processedSamples += currentChunkSamples; + _packetSequence++; + + // Small delay to avoid overwhelming the network + await Task.Delay(10); + } + catch (Exception ex) + { + ConvaiLogger.Error($"Failed to send audio chunk: {ex.Message}", ConvaiLogger.LogCategory.Character); + break; + } + } + } + + private byte[] CreateSimpleAudioPacket(float[] audioData, int startIndex, int sampleCount) + { + // Simple packet structure: + // 4 bytes: Magic number (0xC0A1) + // 4 bytes: Packet sequence number + // 4 bytes: Sample count in this packet + // 4 bytes: Start position in stream + // 1 byte: Flags (0 = normal audio, 1 = end of recording) + // N bytes: Audio data (converted to shorts) + + int headerSize = 17; // 4 + 4 + 4 + 4 + 1 + int audioDataSize = sampleCount * sizeof(short); + byte[] packet = new byte[headerSize + audioDataSize]; + + int offset = 0; + + // Magic number + BitConverter.GetBytes((uint)0xC0A1).CopyTo(packet, offset); + offset += 4; + + // Packet sequence + BitConverter.GetBytes(_packetSequence).CopyTo(packet, offset); + offset += 4; + + // Sample count + BitConverter.GetBytes(sampleCount).CopyTo(packet, offset); + offset += 4; + + // Start position + BitConverter.GetBytes(_lastMicrophonePosition + startIndex).CopyTo(packet, offset); + offset += 4; + + // Flags (0 for normal audio) + packet[offset] = 0; + offset += 1; + + // Convert audio samples to bytes (same as Convai approach) + for (int i = 0; i < sampleCount; i++) + { + float sample = audioData[startIndex + i]; + short shortSample = (short)(sample * short.MaxValue); + byte[] shortBytes = BitConverter.GetBytes(shortSample); + packet[offset] = shortBytes[0]; + packet[offset + 1] = shortBytes[1]; + offset += 2; + } + + return packet; + } + + private void SendEndOfRecordingSignal() + { + try + { + // Create end packet + byte[] packet = new byte[17]; // Header only, no audio data + int offset = 0; + + // Magic number + BitConverter.GetBytes((uint)0xC0A1).CopyTo(packet, offset); + offset += 4; + + // Packet sequence + BitConverter.GetBytes(_packetSequence).CopyTo(packet, offset); + offset += 4; + + // Sample count (0 for end signal) + BitConverter.GetBytes(0).CopyTo(packet, offset); + offset += 4; + + // Start position + BitConverter.GetBytes(_lastMicrophonePosition).CopyTo(packet, offset); + offset += 4; + + // Flags (1 for end of recording) + packet[offset] = 1; + + _udpClient.SendAsync(packet, packet.Length, _targetEndPoint); + } + catch (Exception ex) + { + ConvaiLogger.Error($"Failed to send end signal: {ex.Message}", ConvaiLogger.LogCategory.Character); + } + } + + // Public methods for external control + public void SetTargetEndpoint(string ip, int port) + { + targetIP = ip; + targetPort = port; + _targetEndPoint = new IPEndPoint(IPAddress.Parse(ip), port); + } + + public bool IsRecording => _isRecording; + + // Debug and testing methods + public async void TestConnection() + { + if (_udpClient == null) + { + ConvaiLogger.Error("UDP client not initialized", ConvaiLogger.LogCategory.Character); + return; + } + + try + { + ConvaiLogger.Info($"Testing connection to {targetIP}:{targetPort}", ConvaiLogger.LogCategory.Character); + + // Send a simple test packet + string testMessage = "CONVAI_TEST_CONNECTION"; + byte[] testData = System.Text.Encoding.UTF8.GetBytes(testMessage); + + await _udpClient.SendAsync(testData, testData.Length, _targetEndPoint); + ConvaiLogger.Info("Test packet sent successfully", ConvaiLogger.LogCategory.Character); + } + catch (Exception ex) + { + ConvaiLogger.Error($"Connection test failed: {ex.Message}", ConvaiLogger.LogCategory.Character); + } + } + + public void ShowNetworkStatus() + { + ConvaiLogger.Info($"=== Network Status ===", ConvaiLogger.LogCategory.Character); + ConvaiLogger.Info($"Target: {targetIP}:{targetPort}", ConvaiLogger.LogCategory.Character); + ConvaiLogger.Info($"UDP Client: {(_udpClient != null ? "Initialized" : "Not initialized")}", ConvaiLogger.LogCategory.Character); + ConvaiLogger.Info($"Recording: {_isRecording}", ConvaiLogger.LogCategory.Character); + ConvaiLogger.Info($"Microphone: {_selectedMicrophone}", ConvaiLogger.LogCategory.Character); + ConvaiLogger.Info($"Packets sent: {_packetSequence}", ConvaiLogger.LogCategory.Character); + + if (_udpClient?.Client?.LocalEndPoint != null) + { + ConvaiLogger.Info($"Local endpoint: {_udpClient.Client.LocalEndPoint}", ConvaiLogger.LogCategory.Character); + } + } + } +} diff --git a/Unity-Master/Assets/Scripts/Multiplayer/ConvaiSimpleUDPAudioSender.cs.meta b/Unity-Master/Assets/Scripts/Multiplayer/ConvaiSimpleUDPAudioSender.cs.meta new file mode 100644 index 0000000..52b3811 --- /dev/null +++ b/Unity-Master/Assets/Scripts/Multiplayer/ConvaiSimpleUDPAudioSender.cs.meta @@ -0,0 +1,11 @@ +fileFormatVersion: 2 +guid: fa5cc94311721d04f8e8821151ffb737 +MonoImporter: + externalObjects: {} + serializedVersion: 2 + defaultReferences: [] + executionOrder: 0 + icon: {instanceID: 0} + userData: + assetBundleName: + assetBundleVariant: diff --git a/Unity-Master/Assets/Scripts/Multiplayer/ConvaiUDPSpeechReceiver.cs b/Unity-Master/Assets/Scripts/Multiplayer/ConvaiUDPSpeechReceiver.cs new file mode 100644 index 0000000..8b08d97 --- /dev/null +++ b/Unity-Master/Assets/Scripts/Multiplayer/ConvaiUDPSpeechReceiver.cs @@ -0,0 +1,639 @@ +using System; +using System.Collections.Generic; +using System.Net; +using System.Net.Sockets; +using System.Threading; +using System.Threading.Tasks; +using Convai.Scripts.Runtime.LoggerSystem; +using Convai.Scripts.Runtime.Utils; +using UnityEngine; + +namespace Convai.Scripts.Runtime.Multiplayer +{ + /// + /// UDP Speech Receiver - Receives high-quality Convai speech with proper buffering + /// This version reconstructs the original AudioClip objects for seamless playback + /// + public class ConvaiUDPSpeechReceiver : MonoBehaviour + { + [Header("Network Configuration")] + [SerializeField] private int listenPort = 12346; + [SerializeField] private bool enableDebugLogging = true; + + [Header("Audio Playback")] + [SerializeField] private AudioSource speechAudioSource; + [SerializeField] private bool createAudioSourceIfMissing = true; + [SerializeField] private float audioVolume = 1.0f; + [SerializeField] private bool spatialAudio = false; + + [Header("UI")] + [SerializeField] private bool showTranscripts = true; + + // Network components + private UdpClient _udpListener; + private IPEndPoint _remoteEndPoint; + private bool _isListening = false; + private CancellationTokenSource _cancellationTokenSource; + + // Audio reconstruction + private Dictionary _incomingClips = new Dictionary(); + private Queue _playbackQueue = new Queue(); + private bool _isPlayingSequence = false; + private int _currentSequence = 0; + + // Packet constants (matching sender V3) + private const uint MAGIC_NUMBER = 0xC0A3; + private const byte PACKET_TYPE_AUDIO_START = 0x01; + private const byte PACKET_TYPE_AUDIO_CHUNK = 0x02; + private const byte PACKET_TYPE_AUDIO_END = 0x03; + private const byte PACKET_TYPE_TRANSCRIPT = 0x04; + private const byte PACKET_TYPE_FINAL = 0x05; + + // Events + public Action OnSpeechReceiving; + public Action OnTranscriptReceived; + public Action OnAudioClipReceived; + + // Data structures + private struct SpeechPacket + { + public uint magicNumber; + public byte packetType; + public int sequence; + public int totalSamples; + public int sampleRate; + public int channels; + public int startSample; + public int chunkSampleCount; + public short[] audioSamples; + public string transcript; + } + + private class IncomingAudioClip + { + public int totalSamples; + public int sampleRate; + public int channels; + public string transcript; + public float[] audioData; + public bool isComplete; + public bool hasStart; + public bool hasEnd; + public int receivedSamples; + + public IncomingAudioClip(int totalSamples, int sampleRate, int channels, string transcript) + { + this.totalSamples = totalSamples; + this.sampleRate = sampleRate; + this.channels = channels; + this.transcript = transcript; + this.audioData = new float[totalSamples]; + this.isComplete = false; + this.hasStart = false; + this.hasEnd = false; + this.receivedSamples = 0; + } + } + + private struct ReconstructedAudioClip + { + public AudioClip audioClip; + public string transcript; + public bool isFinal; + } + + private void Start() + { + _cancellationTokenSource = new CancellationTokenSource(); + InitializeAudio(); + InitializeNetwork(); + } + + private void OnDestroy() + { + StopListening(); + _cancellationTokenSource?.Cancel(); + _cancellationTokenSource?.Dispose(); + } + + private void Update() + { + // Process playback queue + ProcessPlaybackQueue(); + } + + private void InitializeAudio() + { + if (speechAudioSource == null) + { + speechAudioSource = GetComponent(); + + if (speechAudioSource == null && createAudioSourceIfMissing) + { + speechAudioSource = gameObject.AddComponent(); + ConvaiLogger.Info("Created AudioSource for speech playback", ConvaiLogger.LogCategory.Character); + } + } + + if (speechAudioSource != null) + { + speechAudioSource.volume = audioVolume; + speechAudioSource.playOnAwake = false; + speechAudioSource.spatialBlend = spatialAudio ? 1.0f : 0.0f; + } + else + { + ConvaiLogger.Error("No AudioSource available for speech playback", ConvaiLogger.LogCategory.Character); + } + } + + private void InitializeNetwork() + { + try + { + StartListening(); + } + catch (Exception ex) + { + ConvaiLogger.Error($"Failed to initialize UDP speech receiver: {ex.Message}", ConvaiLogger.LogCategory.Character); + } + } + + public void StartListening() + { + if (_isListening || _cancellationTokenSource == null) + return; + + try + { + _udpListener = new UdpClient(listenPort); + _isListening = true; + + ConvaiLogger.Info($"UDP Speech Receiver listening on port {listenPort}", ConvaiLogger.LogCategory.Character); + + // Start listening for incoming packets + _ = ListenForSpeechPackets(_cancellationTokenSource.Token); + } + catch (Exception ex) + { + ConvaiLogger.Error($"Failed to start UDP speech receiver: {ex.Message}", ConvaiLogger.LogCategory.Character); + } + } + + public void StopListening() + { + if (!_isListening) + return; + + _isListening = false; + _udpListener?.Close(); + _udpListener?.Dispose(); + _udpListener = null; + + // Stop any ongoing playback + StopSpeechPlayback(); + + ConvaiLogger.Info("Stopped UDP Speech Receiver", ConvaiLogger.LogCategory.Character); + } + + private async Task ListenForSpeechPackets(CancellationToken cancellationToken) + { + try + { + while (_isListening && !cancellationToken.IsCancellationRequested) + { + var result = await _udpListener.ReceiveAsync(); + _remoteEndPoint = result.RemoteEndPoint; + + await ProcessReceivedPacket(result.Buffer, result.RemoteEndPoint); + } + } + catch (ObjectDisposedException) + { + // Normal when stopping + } + catch (Exception ex) + { + ConvaiLogger.Error($"Error in UDP speech listener: {ex.Message}", ConvaiLogger.LogCategory.Character); + } + } + + private async Task ProcessReceivedPacket(byte[] data, IPEndPoint sender) + { + try + { + var packetData = ParseSpeechPacket(data); + + if (packetData.HasValue) + { + var packet = packetData.Value; + + if (enableDebugLogging) + { + string typeStr = packet.packetType switch + { + PACKET_TYPE_AUDIO_START => "start", + PACKET_TYPE_AUDIO_CHUNK => "chunk", + PACKET_TYPE_AUDIO_END => "end", + PACKET_TYPE_TRANSCRIPT => "transcript", + PACKET_TYPE_FINAL => "final", + _ => "unknown" + }; + ConvaiLogger.DebugLog($"📥 Received {typeStr} packet {packet.sequence} from {sender}", ConvaiLogger.LogCategory.Character); + } + + switch (packet.packetType) + { + case PACKET_TYPE_AUDIO_START: + HandleAudioStartPacket(packet); + break; + + case PACKET_TYPE_AUDIO_CHUNK: + HandleAudioChunkPacket(packet); + break; + + case PACKET_TYPE_AUDIO_END: + HandleAudioEndPacket(packet); + break; + + case PACKET_TYPE_TRANSCRIPT: + HandleTranscriptPacket(packet); + break; + + case PACKET_TYPE_FINAL: + HandleFinalPacket(); + break; + } + } + else + { + if (enableDebugLogging) + { + // Check if it's a different magic number + if (data.Length >= 4) + { + uint receivedMagic = BitConverter.ToUInt32(data, 0); + ConvaiLogger.Warn($"❌ Invalid speech packet from {sender}. Expected magic: 0x{MAGIC_NUMBER:X}, Got: 0x{receivedMagic:X}", ConvaiLogger.LogCategory.Character); + } + else + { + ConvaiLogger.Warn($"❌ Packet too small from {sender}: {data.Length} bytes", ConvaiLogger.LogCategory.Character); + } + } + } + } + catch (Exception ex) + { + ConvaiLogger.Error($"Error processing speech packet: {ex.Message}", ConvaiLogger.LogCategory.Character); + } + } + + private void HandleAudioStartPacket(SpeechPacket packet) + { + // Start new speech sequence if this is the first start packet + if (packet.sequence == 0 && !_isPlayingSequence) + { + StartSpeechReception(); + } + + // Create new incoming audio clip + var incomingClip = new IncomingAudioClip(packet.totalSamples, packet.sampleRate, packet.channels, packet.transcript); + incomingClip.hasStart = true; + + _incomingClips[packet.sequence] = incomingClip; + + if (enableDebugLogging) + ConvaiLogger.DebugLog($"🎵 Started receiving audio clip {packet.sequence}: {packet.totalSamples} samples, '{packet.transcript}'", ConvaiLogger.LogCategory.Character); + } + + private void HandleAudioChunkPacket(SpeechPacket packet) + { + if (!_incomingClips.ContainsKey(packet.sequence)) return; + + var incomingClip = _incomingClips[packet.sequence]; + + // Convert short samples back to float and copy to the correct position + if (packet.audioSamples != null && packet.startSample + packet.chunkSampleCount <= incomingClip.totalSamples) + { + for (int i = 0; i < packet.chunkSampleCount; i++) + { + int targetIndex = packet.startSample + i; + if (targetIndex < incomingClip.audioData.Length) + { + incomingClip.audioData[targetIndex] = packet.audioSamples[i] / (float)short.MaxValue; + } + } + + incomingClip.receivedSamples += packet.chunkSampleCount; + } + } + + private void HandleAudioEndPacket(SpeechPacket packet) + { + if (!_incomingClips.ContainsKey(packet.sequence)) return; + + var incomingClip = _incomingClips[packet.sequence]; + incomingClip.hasEnd = true; + + // Check if the clip is complete (has start, end, and all samples) + if (incomingClip.hasStart && incomingClip.hasEnd) + { + incomingClip.isComplete = true; + + // Create the AudioClip + CreateAndQueueAudioClip(incomingClip, packet.sequence); + + // Remove from incoming clips + _incomingClips.Remove(packet.sequence); + } + } + + private void HandleTranscriptPacket(SpeechPacket packet) + { + if (showTranscripts && !string.IsNullOrEmpty(packet.transcript)) + { + MainThreadDispatcher.Instance.RunOnMainThread(() => { + OnTranscriptReceived?.Invoke(packet.transcript); + + if (enableDebugLogging) + ConvaiLogger.Info($"📝 Remote NPC said: '{packet.transcript}'", ConvaiLogger.LogCategory.Character); + }); + } + } + + private void HandleFinalPacket() + { + // Process any remaining incomplete clips + ProcessIncompleteClips(); + + // Add final marker to queue + _playbackQueue.Enqueue(new ReconstructedAudioClip + { + audioClip = null, + transcript = "", + isFinal = true + }); + + StopSpeechReception(); + } + + private void ProcessIncompleteClips() + { + // Try to create AudioClips from any clips that might be mostly complete + var keysToRemove = new List(); + + foreach (var kvp in _incomingClips) + { + var incomingClip = kvp.Value; + + // If we received a reasonable amount of data, try to create the clip + if (incomingClip.receivedSamples > incomingClip.totalSamples * 0.8f) // 80% received + { + CreateAndQueueAudioClip(incomingClip, kvp.Key); + keysToRemove.Add(kvp.Key); + } + } + + foreach (var key in keysToRemove) + { + _incomingClips.Remove(key); + } + } + + private void CreateAndQueueAudioClip(IncomingAudioClip incomingClip, int sequence) + { + try + { + // Create AudioClip + AudioClip clip = AudioClip.Create($"RemoteSpeech_{sequence}", + incomingClip.totalSamples, incomingClip.channels, incomingClip.sampleRate, false); + clip.SetData(incomingClip.audioData, 0); + + // Queue for playback + _playbackQueue.Enqueue(new ReconstructedAudioClip + { + audioClip = clip, + transcript = incomingClip.transcript, + isFinal = false + }); + + OnAudioClipReceived?.Invoke(clip); + + if (enableDebugLogging) + ConvaiLogger.DebugLog($"✅ Reconstructed audio clip {sequence}: {clip.length:F2}s, '{incomingClip.transcript}'", ConvaiLogger.LogCategory.Character); + } + catch (Exception ex) + { + ConvaiLogger.Error($"Failed to create audio clip from sequence {sequence}: {ex.Message}", ConvaiLogger.LogCategory.Character); + } + } + + private void ProcessPlaybackQueue() + { + // If not currently playing and we have queued clips, start playing + if (!_isPlayingSequence && _playbackQueue.Count > 0 && speechAudioSource != null) + { + PlayNextAudioClip(); + } + + // Check if current clip finished playing + if (_isPlayingSequence && speechAudioSource != null && !speechAudioSource.isPlaying) + { + // Current clip finished, play next one if available + if (_playbackQueue.Count > 0) + { + PlayNextAudioClip(); + } + else + { + _isPlayingSequence = false; + } + } + } + + private void PlayNextAudioClip() + { + if (_playbackQueue.Count == 0 || speechAudioSource == null) return; + + var reconstructedClip = _playbackQueue.Dequeue(); + + if (reconstructedClip.isFinal) + { + _isPlayingSequence = false; + ConvaiLogger.Info("🔊 Finished playing remote speech sequence", ConvaiLogger.LogCategory.Character); + return; + } + + if (reconstructedClip.audioClip != null) + { + speechAudioSource.clip = reconstructedClip.audioClip; + speechAudioSource.Play(); + _isPlayingSequence = true; + + if (enableDebugLogging) + ConvaiLogger.DebugLog($"🔊 Playing remote speech: {reconstructedClip.audioClip.length:F2}s, '{reconstructedClip.transcript}'", ConvaiLogger.LogCategory.Character); + } + } + + private void StartSpeechReception() + { + _isPlayingSequence = false; + _currentSequence = 0; + _incomingClips.Clear(); + _playbackQueue.Clear(); + + OnSpeechReceiving?.Invoke(true); + + ConvaiLogger.Info("🔊 Started receiving remote NPC speech", ConvaiLogger.LogCategory.Character); + } + + private void StopSpeechReception() + { + OnSpeechReceiving?.Invoke(false); + + ConvaiLogger.Info("🔊 Stopped receiving remote NPC speech", ConvaiLogger.LogCategory.Character); + } + + private void StopSpeechPlayback() + { + if (speechAudioSource != null && speechAudioSource.isPlaying) + { + speechAudioSource.Stop(); + } + + _isPlayingSequence = false; + _playbackQueue.Clear(); + _incomingClips.Clear(); + } + + private SpeechPacket? ParseSpeechPacket(byte[] data) + { + if (data.Length < 13) // Minimum header size + return null; + + try + { + int offset = 0; + + // Read magic number + uint magic = BitConverter.ToUInt32(data, offset); + offset += 4; + + if (magic != MAGIC_NUMBER) + return null; + + // Read packet type + byte packetType = data[offset]; + offset += 1; + + // Read sequence + int sequence = BitConverter.ToInt32(data, offset); + offset += 4; + + var packet = new SpeechPacket + { + magicNumber = magic, + packetType = packetType, + sequence = sequence + }; + + // Parse based on packet type + switch (packetType) + { + case PACKET_TYPE_AUDIO_START: + if (data.Length < offset + 16) return null; // Need additional fields + + packet.totalSamples = BitConverter.ToInt32(data, offset); + offset += 4; + + packet.sampleRate = BitConverter.ToInt32(data, offset); + offset += 4; + + packet.channels = BitConverter.ToInt32(data, offset); + offset += 4; + + int transcriptLength = BitConverter.ToInt32(data, offset); + offset += 4; + + if (transcriptLength > 0 && data.Length >= offset + transcriptLength) + { + packet.transcript = System.Text.Encoding.UTF8.GetString(data, offset, transcriptLength); + } + break; + + case PACKET_TYPE_AUDIO_CHUNK: + if (data.Length < offset + 8) return null; // Need start sample + count + + packet.startSample = BitConverter.ToInt32(data, offset); + offset += 4; + + packet.chunkSampleCount = BitConverter.ToInt32(data, offset); + offset += 4; + + // Read audio data + if (packet.chunkSampleCount > 0 && data.Length >= offset + packet.chunkSampleCount * 2) + { + packet.audioSamples = new short[packet.chunkSampleCount]; + for (int i = 0; i < packet.chunkSampleCount; i++) + { + packet.audioSamples[i] = BitConverter.ToInt16(data, offset); + offset += 2; + } + } + break; + + case PACKET_TYPE_AUDIO_END: + case PACKET_TYPE_FINAL: + // These packets have no additional data beyond the header + break; + + case PACKET_TYPE_TRANSCRIPT: + // Similar to start packet transcript handling + if (data.Length >= offset + 4) + { + int transcriptLen = BitConverter.ToInt32(data, offset); + offset += 4; + + if (transcriptLen > 0 && data.Length >= offset + transcriptLen) + { + packet.transcript = System.Text.Encoding.UTF8.GetString(data, offset, transcriptLen); + } + } + break; + + default: + return null; + } + + return packet; + } + catch (Exception ex) + { + ConvaiLogger.Error($"Error parsing speech packet V2: {ex.Message}", ConvaiLogger.LogCategory.Character); + return null; + } + } + + // Public properties for debugging + public bool IsListening => _isListening; + public bool IsPlayingSequence => _isPlayingSequence; + public int QueuedClipCount => _playbackQueue.Count; + public int IncomingClipCount => _incomingClips.Count; + + // Debug methods + public void ShowNetworkStatus() + { + ConvaiLogger.Info($"=== Speech Receiver Status ===", ConvaiLogger.LogCategory.Character); + ConvaiLogger.Info($"Listening: {_isListening} on port {listenPort}", ConvaiLogger.LogCategory.Character); + ConvaiLogger.Info($"Playing Sequence: {_isPlayingSequence}", ConvaiLogger.LogCategory.Character); + ConvaiLogger.Info($"Current Sequence: {_currentSequence}", ConvaiLogger.LogCategory.Character); + ConvaiLogger.Info($"Queued Clips: {_playbackQueue.Count}", ConvaiLogger.LogCategory.Character); + ConvaiLogger.Info($"Incoming Clips: {_incomingClips.Count}", ConvaiLogger.LogCategory.Character); + + if (speechAudioSource != null) + { + ConvaiLogger.Info($"Audio Source: {speechAudioSource.name} (Volume: {speechAudioSource.volume})", ConvaiLogger.LogCategory.Character); + } + } + } +} diff --git a/Unity-Master/Assets/Scripts/Multiplayer/ConvaiUDPSpeechReceiver.cs.meta b/Unity-Master/Assets/Scripts/Multiplayer/ConvaiUDPSpeechReceiver.cs.meta new file mode 100644 index 0000000..9329146 --- /dev/null +++ b/Unity-Master/Assets/Scripts/Multiplayer/ConvaiUDPSpeechReceiver.cs.meta @@ -0,0 +1,11 @@ +fileFormatVersion: 2 +guid: 8fe3e5348bc484f44be079d6aaf6e17e +MonoImporter: + externalObjects: {} + serializedVersion: 2 + defaultReferences: [] + executionOrder: 0 + icon: {instanceID: 0} + userData: + assetBundleName: + assetBundleVariant: diff --git a/Unity-Master/Assets/Scripts/Multiplayer/ConvaiUDPSpeechSender.cs b/Unity-Master/Assets/Scripts/Multiplayer/ConvaiUDPSpeechSender.cs new file mode 100644 index 0000000..dd9ef81 --- /dev/null +++ b/Unity-Master/Assets/Scripts/Multiplayer/ConvaiUDPSpeechSender.cs @@ -0,0 +1,479 @@ +using System; +using System.Collections.Generic; +using System.Net; +using System.Net.Sockets; +using System.Threading.Tasks; +using Convai.Scripts.Runtime.Core; +using Convai.Scripts.Runtime.LoggerSystem; +using Convai.Scripts.Runtime.Utils; +using UnityEngine; +using System.Collections; + +namespace Convai.Scripts.Runtime.Multiplayer +{ + /// + /// UDP Speech Sender - Simple and reliable approach using events + /// Hooks into AudioManager events to capture when clips are about to be played + /// + public class ConvaiUDPSpeechSender : MonoBehaviour + { + [Header("Network Configuration")] + [SerializeField] private string targetIP = "127.0.0.1"; + [SerializeField] private int targetPort = 12346; + [SerializeField] private bool enableDebugLogging = true; + + [Header("NPC Source")] + [SerializeField] private bool useActiveNPC = true; + [SerializeField] private ConvaiNPC sourceNPC; + + [Header("Audio Settings")] + [SerializeField] private int maxSamplesPerPacket = 8192; + [SerializeField] private bool sendTranscripts = true; + + // Network components + private UdpClient _udpClient; + private IPEndPoint _targetEndPoint; + private bool _isInitialized = false; + + // Speech tracking + private int _speechSequence = 0; + private bool _isSendingSpeech = false; + private HashSet _sentClips = new HashSet(); + + // Packet constants + private const uint MAGIC_NUMBER = 0xC0A3; // V3 magic number + private const byte PACKET_TYPE_AUDIO_START = 0x01; + private const byte PACKET_TYPE_AUDIO_CHUNK = 0x02; + private const byte PACKET_TYPE_AUDIO_END = 0x03; + private const byte PACKET_TYPE_FINAL = 0x05; + + // Events + public Action OnSpeechTransmission; + public Action OnSpeechSent; + + private void Start() + { + InitializeNetwork(); + InitializeConvai(); + } + + private void OnDestroy() + { + CleanupNPCSubscriptions(); + CleanupNetwork(); + } + + private void InitializeNetwork() + { + try + { + _udpClient = new UdpClient(); + _targetEndPoint = new IPEndPoint(IPAddress.Parse(targetIP), targetPort); + _isInitialized = true; + + ConvaiLogger.Info($"UDP Speech Sender initialized. Target: {targetIP}:{targetPort}", ConvaiLogger.LogCategory.Character); + } + catch (Exception ex) + { + ConvaiLogger.Error($"Failed to initialize UDP speech sender: {ex.Message}", ConvaiLogger.LogCategory.Character); + } + } + + private void InitializeConvai() + { + // Get target NPC + if (useActiveNPC) + { + sourceNPC = ConvaiNPCManager.Instance?.GetActiveConvaiNPC(); + } + + SubscribeToNPCEvents(); + + // Subscribe to NPC manager events for late NPC activation + if (ConvaiNPCManager.Instance != null) + { + ConvaiNPCManager.Instance.OnActiveNPCChanged += HandleActiveNPCChanged; + } + } + + private void SubscribeToNPCEvents() + { + if (sourceNPC?.AudioManager != null) + { + // Hook into the character talking events + sourceNPC.AudioManager.OnCharacterTalkingChanged += HandleCharacterTalkingChanged; + sourceNPC.AudioManager.OnAudioTranscriptAvailable += HandleTranscriptAvailable; + + ConvaiLogger.Info($"UDP Speech Sender subscribed to NPC: {sourceNPC.characterName}", ConvaiLogger.LogCategory.Character); + } + else + { + ConvaiLogger.Warn("No source NPC available for speech transmission", ConvaiLogger.LogCategory.Character); + } + } + + private void HandleCharacterTalkingChanged(bool isTalking) + { + if (!_isInitialized) return; + + if (isTalking) + { + // Start monitoring for audio clips + StartCoroutine(MonitorAudioClips()); + } + else + { + // End speech transmission + _ = SendFinalPacket(); + } + } + + private void HandleTranscriptAvailable(string transcript) + { + if (enableDebugLogging && !string.IsNullOrEmpty(transcript)) + { + ConvaiLogger.DebugLog($"📝 NPC transcript: '{transcript}'", ConvaiLogger.LogCategory.Character); + } + } + + private IEnumerator MonitorAudioClips() + { + if (sourceNPC?.AudioManager == null) yield break; + + AudioSource audioSource = sourceNPC.AudioManager.GetComponent(); + AudioClip lastClip = null; + + while (sourceNPC.IsCharacterTalking) + { + if (audioSource?.clip != null && audioSource.clip != lastClip) + { + // New clip detected! + lastClip = audioSource.clip; + + // Only send if we haven't sent this clip before + if (!_sentClips.Contains(lastClip)) + { + _sentClips.Add(lastClip); + + // Get the transcript from the most recent available transcript + string transcript = GetRecentTranscript(); + + // Send this clip + _ = TransmitAudioClip(lastClip, transcript); + } + } + + yield return new WaitForSeconds(0.1f); // Check every 100ms + } + + // Clear sent clips when done + _sentClips.Clear(); + } + + private string GetRecentTranscript() + { + // Try to get transcript from the NPC's recent activity + // This is a simple approach - in a more complex setup you might want to match clips to transcripts + return ""; // Transcripts come via the transcript event + } + + private async Task TransmitAudioClip(AudioClip audioClip, string transcript) + { + if (!_isInitialized || audioClip == null) return; + + try + { + // Start transmission if not already started + if (!_isSendingSpeech) + { + _isSendingSpeech = true; + OnSpeechTransmission?.Invoke(true); + + ConvaiLogger.Info($"🔊 Starting speech transmission", ConvaiLogger.LogCategory.Character); + } + + // Use the current speech sequence for this entire clip + int clipSequence = _speechSequence; + + // Send start packet with metadata + await SendAudioStartPacket(audioClip, transcript, clipSequence); + + // Send audio data in chunks (all with the same sequence) + await SendAudioClipInChunks(audioClip, clipSequence); + + // Send end packet for this clip (with the same sequence) + await SendAudioEndPacket(clipSequence); + + // Only increment sequence after the entire clip is sent + _speechSequence++; + + OnSpeechSent?.Invoke(transcript); + + if (enableDebugLogging) + ConvaiLogger.DebugLog($"✅ Transmitted speech clip: {audioClip.length:F2}s (sequence {clipSequence})", ConvaiLogger.LogCategory.Character); + } + catch (Exception ex) + { + ConvaiLogger.Error($"Failed to transmit AudioClip: {ex.Message}", ConvaiLogger.LogCategory.Character); + } + } + + private async Task SendAudioStartPacket(AudioClip audioClip, string transcript, int sequence) + { + byte[] packet = CreateAudioStartPacket(audioClip, transcript, sequence); + await _udpClient.SendAsync(packet, packet.Length, _targetEndPoint); + + if (enableDebugLogging) + ConvaiLogger.DebugLog($"📤 Sent start packet {sequence}: {audioClip.samples} samples", ConvaiLogger.LogCategory.Character); + } + + private async Task SendAudioClipInChunks(AudioClip audioClip, int sequence) + { + // Get all audio data + float[] audioData = new float[audioClip.samples]; + audioClip.GetData(audioData, 0); + + // Send in chunks + int totalSamples = audioData.Length; + int processedSamples = 0; + int chunkCount = 0; + + while (processedSamples < totalSamples) + { + int remainingSamples = totalSamples - processedSamples; + int currentChunkSize = Mathf.Min(maxSamplesPerPacket, remainingSamples); + + float[] chunkData = new float[currentChunkSize]; + Array.Copy(audioData, processedSamples, chunkData, 0, currentChunkSize); + + byte[] packet = CreateAudioChunkPacket(chunkData, audioClip.frequency, processedSamples, sequence); + await _udpClient.SendAsync(packet, packet.Length, _targetEndPoint); + + processedSamples += currentChunkSize; + chunkCount++; + + if (enableDebugLogging && chunkCount % 10 == 0) + ConvaiLogger.DebugLog($"📤 Sent chunk {chunkCount} for sequence {sequence}", ConvaiLogger.LogCategory.Character); + + // Small delay to avoid overwhelming the network + await Task.Delay(5); + } + + if (enableDebugLogging) + ConvaiLogger.DebugLog($"📤 Sent {chunkCount} audio chunks for sequence {sequence}", ConvaiLogger.LogCategory.Character); + } + + private async Task SendAudioEndPacket(int sequence) + { + byte[] packet = CreateAudioEndPacket(sequence); + await _udpClient.SendAsync(packet, packet.Length, _targetEndPoint); + + if (enableDebugLogging) + ConvaiLogger.DebugLog($"📤 Sent end packet for sequence {sequence}", ConvaiLogger.LogCategory.Character); + } + + private async Task SendFinalPacket() + { + if (!_isSendingSpeech) return; + + try + { + byte[] packet = CreateFinalPacket(); + await _udpClient.SendAsync(packet, packet.Length, _targetEndPoint); + + _isSendingSpeech = false; + OnSpeechTransmission?.Invoke(false); + + ConvaiLogger.Info("🔊 Speech transmission completed", ConvaiLogger.LogCategory.Character); + } + catch (Exception ex) + { + ConvaiLogger.Error($"Failed to send final packet: {ex.Message}", ConvaiLogger.LogCategory.Character); + } + } + + private byte[] CreateAudioStartPacket(AudioClip audioClip, string transcript, int sequence) + { + byte[] transcriptBytes = System.Text.Encoding.UTF8.GetBytes(transcript ?? ""); + + // Packet structure: + // 4 bytes: Magic number + // 1 byte: Packet type (0x01 = audio start) + // 4 bytes: Sequence number + // 4 bytes: Total samples in clip + // 4 bytes: Sample rate + // 4 bytes: Channels + // 4 bytes: Transcript length + // N bytes: Transcript (UTF-8) + + int headerSize = 25; + byte[] packet = new byte[headerSize + transcriptBytes.Length]; + + int offset = 0; + + BitConverter.GetBytes(MAGIC_NUMBER).CopyTo(packet, offset); + offset += 4; + + packet[offset] = PACKET_TYPE_AUDIO_START; + offset += 1; + + BitConverter.GetBytes(sequence).CopyTo(packet, offset); + offset += 4; + + BitConverter.GetBytes(audioClip.samples).CopyTo(packet, offset); + offset += 4; + + BitConverter.GetBytes(audioClip.frequency).CopyTo(packet, offset); + offset += 4; + + BitConverter.GetBytes(audioClip.channels).CopyTo(packet, offset); + offset += 4; + + BitConverter.GetBytes(transcriptBytes.Length).CopyTo(packet, offset); + offset += 4; + + transcriptBytes.CopyTo(packet, offset); + + return packet; + } + + private byte[] CreateAudioChunkPacket(float[] audioData, int frequency, int startSample, int sequence) + { + // Packet structure: + // 4 bytes: Magic number + // 1 byte: Packet type (0x02 = audio chunk) + // 4 bytes: Sequence number + // 4 bytes: Start sample position + // 4 bytes: Sample count in this chunk + // N bytes: Audio data (as 16-bit PCM) + + int headerSize = 17; + int audioDataSize = audioData.Length * sizeof(short); + byte[] packet = new byte[headerSize + audioDataSize]; + + int offset = 0; + + BitConverter.GetBytes(MAGIC_NUMBER).CopyTo(packet, offset); + offset += 4; + + packet[offset] = PACKET_TYPE_AUDIO_CHUNK; + offset += 1; + + BitConverter.GetBytes(sequence).CopyTo(packet, offset); + offset += 4; + + BitConverter.GetBytes(startSample).CopyTo(packet, offset); + offset += 4; + + BitConverter.GetBytes(audioData.Length).CopyTo(packet, offset); + offset += 4; + + // Convert float samples to 16-bit PCM + for (int i = 0; i < audioData.Length; i++) + { + short sample = (short)(Mathf.Clamp(audioData[i], -1f, 1f) * short.MaxValue); + BitConverter.GetBytes(sample).CopyTo(packet, offset); + offset += 2; + } + + return packet; + } + + private byte[] CreateAudioEndPacket(int sequence) + { + byte[] packet = new byte[13]; // Header only + + int offset = 0; + + BitConverter.GetBytes(MAGIC_NUMBER).CopyTo(packet, offset); + offset += 4; + + packet[offset] = PACKET_TYPE_AUDIO_END; + offset += 1; + + BitConverter.GetBytes(sequence).CopyTo(packet, offset); + offset += 4; + + BitConverter.GetBytes(0).CopyTo(packet, offset); // No additional data + + return packet; + } + + private byte[] CreateFinalPacket() + { + byte[] packet = new byte[13]; // Header only + + int offset = 0; + + BitConverter.GetBytes(MAGIC_NUMBER).CopyTo(packet, offset); + offset += 4; + + packet[offset] = PACKET_TYPE_FINAL; + offset += 1; + + BitConverter.GetBytes(_speechSequence).CopyTo(packet, offset); + offset += 4; + + BitConverter.GetBytes(0).CopyTo(packet, offset); // No additional data + + return packet; + } + + private void CleanupNPCSubscriptions() + { + if (sourceNPC?.AudioManager != null) + { + sourceNPC.AudioManager.OnCharacterTalkingChanged -= HandleCharacterTalkingChanged; + sourceNPC.AudioManager.OnAudioTranscriptAvailable -= HandleTranscriptAvailable; + } + + if (ConvaiNPCManager.Instance != null) + { + ConvaiNPCManager.Instance.OnActiveNPCChanged -= HandleActiveNPCChanged; + } + } + + private void CleanupNetwork() + { + _udpClient?.Close(); + _udpClient?.Dispose(); + _udpClient = null; + } + + private void HandleActiveNPCChanged(ConvaiNPC newActiveNPC) + { + if (!useActiveNPC) return; + + // Cleanup old subscriptions + CleanupNPCSubscriptions(); + + // Update to new NPC + sourceNPC = newActiveNPC; + SubscribeToNPCEvents(); + } + + // Public methods for external control + public void SetTargetEndpoint(string ip, int port) + { + targetIP = ip; + targetPort = port; + _targetEndPoint = new IPEndPoint(IPAddress.Parse(ip), port); + } + + public bool IsSendingSpeech => _isSendingSpeech; + public bool IsInitialized => _isInitialized; + public ConvaiNPC SourceNPC => sourceNPC; + + // Debug methods + public void ShowNetworkStatus() + { + ConvaiLogger.Info($"=== Speech Sender Status ===", ConvaiLogger.LogCategory.Character); + ConvaiLogger.Info($"Target: {targetIP}:{targetPort}", ConvaiLogger.LogCategory.Character); + ConvaiLogger.Info($"Initialized: {_isInitialized}", ConvaiLogger.LogCategory.Character); + ConvaiLogger.Info($"Sending Speech: {_isSendingSpeech}", ConvaiLogger.LogCategory.Character); + ConvaiLogger.Info($"Source NPC: {(sourceNPC?.characterName ?? "None")}", ConvaiLogger.LogCategory.Character); + ConvaiLogger.Info($"Packets sent: {_speechSequence}", ConvaiLogger.LogCategory.Character); + ConvaiLogger.Info($"Sent clips: {_sentClips.Count}", ConvaiLogger.LogCategory.Character); + } + } +} diff --git a/Unity-Master/Assets/Scripts/Multiplayer/ConvaiUDPSpeechSender.cs.meta b/Unity-Master/Assets/Scripts/Multiplayer/ConvaiUDPSpeechSender.cs.meta new file mode 100644 index 0000000..d8c06de --- /dev/null +++ b/Unity-Master/Assets/Scripts/Multiplayer/ConvaiUDPSpeechSender.cs.meta @@ -0,0 +1,11 @@ +fileFormatVersion: 2 +guid: f903e03686cf216469fb4bf1e6c027d0 +MonoImporter: + externalObjects: {} + serializedVersion: 2 + defaultReferences: [] + executionOrder: 0 + icon: {instanceID: 0} + userData: + assetBundleName: + assetBundleVariant: