diff --git a/Unity-Master/Assets/Scripts/Multiplayer.meta b/Unity-Master/Assets/Scripts/Multiplayer.meta
new file mode 100644
index 0000000..d87a471
--- /dev/null
+++ b/Unity-Master/Assets/Scripts/Multiplayer.meta
@@ -0,0 +1,8 @@
+fileFormatVersion: 2
+guid: 7bf18a6dad33703489c04336f7f81a3d
+folderAsset: yes
+DefaultImporter:
+ externalObjects: {}
+ userData:
+ assetBundleName:
+ assetBundleVariant:
diff --git a/Unity-Master/Assets/Scripts/Multiplayer/ConvaiSimpleUDPAudioReceiver.cs b/Unity-Master/Assets/Scripts/Multiplayer/ConvaiSimpleUDPAudioReceiver.cs
new file mode 100644
index 0000000..c63bbc1
--- /dev/null
+++ b/Unity-Master/Assets/Scripts/Multiplayer/ConvaiSimpleUDPAudioReceiver.cs
@@ -0,0 +1,364 @@
+using System;
+using System.Collections.Generic;
+using System.Net;
+using System.Net.Sockets;
+using System.Threading;
+using System.Threading.Tasks;
+using Convai.Scripts.Runtime.Core;
+using Convai.Scripts.Runtime.LoggerSystem;
+using Convai.Scripts.Runtime.Utils;
+using UnityEngine;
+
+namespace Convai.Scripts.Runtime.Multiplayer
+{
+ ///
+ /// Simple UDP Audio Receiver V2 - Simulates microphone input by triggering normal Convai flow
+ /// This approach is much simpler and more reliable than trying to replicate gRPC calls
+ ///
+ public class ConvaiSimpleUDPAudioReceiverV2 : MonoBehaviour
+ {
+ [Header("Network Configuration")]
+ [SerializeField] private int listenPort = 12345;
+ [SerializeField] private bool enableDebugLogging = true;
+
+ [Header("NPC Target")]
+ [SerializeField] private bool useActiveNPC = true;
+ [SerializeField] private ConvaiNPC targetNPC;
+
+ // Events
+ public Action OnAudioReceiving;
+
+ // Network components
+ private UdpClient _udpListener;
+ private IPEndPoint _remoteEndPoint;
+ private bool _isListening = false;
+ private CancellationTokenSource _cancellationTokenSource;
+
+ // Audio state tracking
+ private bool _isReceivingAudio = false;
+ private int _expectedSequence = 0;
+ private const uint MAGIC_NUMBER = 0xC0A1; // Simple magic number for packet validation
+
+ // Timing for auto-stop
+ private float _lastPacketTime;
+ private const float AUTO_STOP_DELAY = 1.0f; // Stop listening after 1 second of no packets
+
+ // Packet structure (matching ConvaiSimpleUDPAudioSender)
+ private struct AudioPacketData
+ {
+ public uint magicNumber;
+ public int sequence;
+ public int sampleCount;
+ public int microphonePosition;
+ public bool isEndSignal;
+ public short[] audioSamples;
+ public long timestamp;
+ }
+
+ private void Start()
+ {
+ _cancellationTokenSource = new CancellationTokenSource();
+ InitializeNetwork();
+ InitializeConvai();
+
+ // Subscribe to NPC manager events to handle late NPC activation
+ if (ConvaiNPCManager.Instance != null)
+ {
+ ConvaiNPCManager.Instance.OnActiveNPCChanged += HandleActiveNPCChanged;
+ }
+ }
+
+ private void OnDestroy()
+ {
+ // Unsubscribe from events
+ if (ConvaiNPCManager.Instance != null)
+ {
+ ConvaiNPCManager.Instance.OnActiveNPCChanged -= HandleActiveNPCChanged;
+ }
+
+ StopListening();
+ _cancellationTokenSource?.Cancel();
+ _cancellationTokenSource?.Dispose();
+ }
+
+ private void Update()
+ {
+ // Auto-stop listening if no packets received for a while
+ if (_isReceivingAudio && Time.time - _lastPacketTime > AUTO_STOP_DELAY)
+ {
+ StopTalkingSimulation();
+ }
+ }
+
+ private void InitializeNetwork()
+ {
+ try
+ {
+ StartListening();
+ }
+ catch (Exception ex)
+ {
+ ConvaiLogger.Error($"Failed to initialize UDP listener: {ex.Message}", ConvaiLogger.LogCategory.Character);
+ }
+ }
+
+ private void InitializeConvai()
+ {
+ // Get target NPC
+ if (useActiveNPC)
+ {
+ targetNPC = ConvaiNPCManager.Instance?.GetActiveConvaiNPC();
+ }
+
+ if (targetNPC == null)
+ {
+ ConvaiLogger.Warn("No target NPC found yet, will wait for NPC to become active", ConvaiLogger.LogCategory.Character);
+ }
+ else
+ {
+ ConvaiLogger.Info($"UDP Audio Receiver V2 initialized with NPC: {targetNPC.characterName}", ConvaiLogger.LogCategory.Character);
+ }
+ }
+
+ public void StartListening()
+ {
+ if (_isListening || _cancellationTokenSource == null)
+ return;
+
+ try
+ {
+ _udpListener = new UdpClient(listenPort);
+ _isListening = true;
+
+ ConvaiLogger.Info($"Simple UDP Audio Receiver V2 listening on port {listenPort}", ConvaiLogger.LogCategory.Character);
+
+ // Start listening for incoming packets
+ _ = ListenForAudioPackets(_cancellationTokenSource.Token);
+ }
+ catch (Exception ex)
+ {
+ ConvaiLogger.Error($"Failed to start UDP listener: {ex.Message}", ConvaiLogger.LogCategory.Character);
+ ConvaiLogger.Error($"Stack trace: {ex.StackTrace}", ConvaiLogger.LogCategory.Character);
+ }
+ }
+
+ public void StopListening()
+ {
+ if (!_isListening)
+ return;
+
+ _isListening = false;
+ _udpListener?.Close();
+ _udpListener?.Dispose();
+ _udpListener = null;
+
+ // Stop any ongoing simulation
+ StopTalkingSimulation();
+
+ ConvaiLogger.Info("Stopped UDP Audio Receiver V2", ConvaiLogger.LogCategory.Character);
+ }
+
+ private async Task ListenForAudioPackets(CancellationToken cancellationToken)
+ {
+ try
+ {
+ while (_isListening && !cancellationToken.IsCancellationRequested)
+ {
+ var result = await _udpListener.ReceiveAsync();
+ _remoteEndPoint = result.RemoteEndPoint;
+
+ await ProcessReceivedPacket(result.Buffer, result.RemoteEndPoint);
+ }
+ }
+ catch (ObjectDisposedException)
+ {
+ // Normal when stopping
+ }
+ catch (Exception ex)
+ {
+ ConvaiLogger.Error($"Error in UDP listener: {ex.Message}", ConvaiLogger.LogCategory.Character);
+ }
+ }
+
+ private async Task ProcessReceivedPacket(byte[] data, IPEndPoint sender)
+ {
+ try
+ {
+ var packetData = ParseSimpleAudioPacket(data);
+
+ if (packetData.HasValue)
+ {
+ var packet = packetData.Value;
+ _lastPacketTime = Time.time;
+
+ if (enableDebugLogging)
+ {
+ if (packet.isEndSignal)
+ ConvaiLogger.DebugLog($"Received end signal from {sender}", ConvaiLogger.LogCategory.Character);
+ else
+ ConvaiLogger.DebugLog($"Received audio packet {packet.sequence} with {packet.sampleCount} samples", ConvaiLogger.LogCategory.Character);
+ }
+
+ if (packet.isEndSignal)
+ {
+ StopTalkingSimulation();
+ OnAudioReceiving?.Invoke(false);
+ }
+ else
+ {
+ // If this is the first packet, start the talking simulation
+ if (packet.sequence == 0 && !_isReceivingAudio)
+ {
+ StartTalkingSimulation();
+ }
+
+ OnAudioReceiving?.Invoke(true);
+ }
+ }
+ else
+ {
+ // Not our audio packet format, might be a test message
+ string message = System.Text.Encoding.UTF8.GetString(data);
+ if (enableDebugLogging)
+ ConvaiLogger.Info($"Received test message from {sender}: {message}", ConvaiLogger.LogCategory.Character);
+ }
+ }
+ catch (Exception ex)
+ {
+ ConvaiLogger.Error($"Error processing received packet: {ex.Message}", ConvaiLogger.LogCategory.Character);
+ }
+ }
+
+ private void StartTalkingSimulation()
+ {
+ if (_isReceivingAudio) return;
+
+ MainThreadDispatcher.Instance.RunOnMainThread(() => {
+ // Update target NPC if using active NPC
+ if (useActiveNPC)
+ {
+ targetNPC = ConvaiNPCManager.Instance?.GetActiveConvaiNPC();
+ }
+
+ if (targetNPC == null)
+ {
+ ConvaiLogger.Warn("No target NPC available for audio simulation", ConvaiLogger.LogCategory.Character);
+ return;
+ }
+
+ _isReceivingAudio = true;
+ _expectedSequence = 0;
+
+ // This is the KEY! Simulate a talk key press to trigger normal Convai flow
+ ConvaiInputManager.Instance.talkKeyInteract?.Invoke(true);
+
+ ConvaiLogger.Info($"🎤 Started talking simulation for {targetNPC.characterName} (remote player audio)", ConvaiLogger.LogCategory.Character);
+ });
+ }
+
+ private void StopTalkingSimulation()
+ {
+ if (!_isReceivingAudio) return;
+
+ MainThreadDispatcher.Instance.RunOnMainThread(() => {
+ _isReceivingAudio = false;
+
+ // Simulate talk key release to stop recording
+ ConvaiInputManager.Instance.talkKeyInteract?.Invoke(false);
+
+ ConvaiLogger.Info($"🎤 Stopped talking simulation for {targetNPC?.characterName ?? "NPC"} (remote player audio)", ConvaiLogger.LogCategory.Character);
+ });
+ }
+
+ private AudioPacketData? ParseSimpleAudioPacket(byte[] data)
+ {
+ if (data.Length < 24) // Minimum header size
+ return null;
+
+ try
+ {
+ int offset = 0;
+
+ // Read magic number
+ uint magic = BitConverter.ToUInt32(data, offset);
+ offset += 4;
+
+ if (magic != MAGIC_NUMBER)
+ return null;
+
+ // Read header
+ int sequence = BitConverter.ToInt32(data, offset);
+ offset += 4;
+
+ int sampleCount = BitConverter.ToInt32(data, offset);
+ offset += 4;
+
+ int microphonePosition = BitConverter.ToInt32(data, offset);
+ offset += 4;
+
+ bool isEndSignal = BitConverter.ToBoolean(data, offset);
+ offset += 1;
+
+ // Skip padding
+ offset += 3;
+
+ long timestamp = BitConverter.ToInt64(data, offset);
+ offset += 8;
+
+ // Read audio data
+ short[] audioSamples = null;
+ if (!isEndSignal && sampleCount > 0)
+ {
+ int audioDataSize = sampleCount * sizeof(short);
+ if (data.Length >= offset + audioDataSize)
+ {
+ audioSamples = new short[sampleCount];
+ Buffer.BlockCopy(data, offset, audioSamples, 0, audioDataSize);
+ }
+ }
+
+ return new AudioPacketData
+ {
+ magicNumber = magic,
+ sequence = sequence,
+ sampleCount = sampleCount,
+ microphonePosition = microphonePosition,
+ isEndSignal = isEndSignal,
+ audioSamples = audioSamples,
+ timestamp = timestamp
+ };
+ }
+ catch (Exception ex)
+ {
+ ConvaiLogger.Error($"Error parsing audio packet: {ex.Message}", ConvaiLogger.LogCategory.Character);
+ return null;
+ }
+ }
+
+ // Event handler for when NPC becomes active
+ private void HandleActiveNPCChanged(ConvaiNPC newActiveNPC)
+ {
+ if (useActiveNPC && newActiveNPC != null)
+ {
+ targetNPC = newActiveNPC;
+ ConvaiLogger.Info($"UDP Audio Receiver V2 updated target NPC to: {targetNPC.characterName}", ConvaiLogger.LogCategory.Character);
+ }
+ }
+
+ // Public properties for debugging
+ public bool IsListening => _isListening;
+ public bool IsReceivingAudio => _isReceivingAudio;
+ public ConvaiNPC TargetNPC => targetNPC;
+
+ // Debug methods
+ public void ShowNetworkStatus()
+ {
+ ConvaiLogger.Info($"=== Audio Receiver V2 Status ===", ConvaiLogger.LogCategory.Character);
+ ConvaiLogger.Info($"Listening: {_isListening} on port {listenPort}", ConvaiLogger.LogCategory.Character);
+ ConvaiLogger.Info($"Receiving Audio: {_isReceivingAudio}", ConvaiLogger.LogCategory.Character);
+ ConvaiLogger.Info($"Target NPC: {(targetNPC?.characterName ?? "None")}", ConvaiLogger.LogCategory.Character);
+ ConvaiLogger.Info($"Expected Sequence: {_expectedSequence}", ConvaiLogger.LogCategory.Character);
+ ConvaiLogger.Info($"Last Packet Time: {_lastPacketTime}", ConvaiLogger.LogCategory.Character);
+ }
+ }
+}
diff --git a/Unity-Master/Assets/Scripts/Multiplayer/ConvaiSimpleUDPAudioReceiver.cs.meta b/Unity-Master/Assets/Scripts/Multiplayer/ConvaiSimpleUDPAudioReceiver.cs.meta
new file mode 100644
index 0000000..8fb7859
--- /dev/null
+++ b/Unity-Master/Assets/Scripts/Multiplayer/ConvaiSimpleUDPAudioReceiver.cs.meta
@@ -0,0 +1,11 @@
+fileFormatVersion: 2
+guid: fa35a6fc55fc4ca44b29b3636484bfd2
+MonoImporter:
+ externalObjects: {}
+ serializedVersion: 2
+ defaultReferences: []
+ executionOrder: 0
+ icon: {instanceID: 0}
+ userData:
+ assetBundleName:
+ assetBundleVariant:
diff --git a/Unity-Master/Assets/Scripts/Multiplayer/ConvaiSimpleUDPAudioSender.cs b/Unity-Master/Assets/Scripts/Multiplayer/ConvaiSimpleUDPAudioSender.cs
new file mode 100644
index 0000000..87c1609
--- /dev/null
+++ b/Unity-Master/Assets/Scripts/Multiplayer/ConvaiSimpleUDPAudioSender.cs
@@ -0,0 +1,376 @@
+using System;
+using System.Net;
+using System.Net.Sockets;
+using System.Threading;
+using System.Threading.Tasks;
+using Convai.Scripts.Runtime.LoggerSystem;
+using Convai.Scripts.Runtime.UI;
+using UnityEngine;
+
+namespace Convai.Scripts.Runtime.Multiplayer
+{
+ ///
+ /// Simplified version of UDP Audio Sender that avoids complex chunking
+ /// This version sends smaller, more frequent packets to avoid array bounds issues
+ ///
+ public class ConvaiSimpleUDPAudioSender : MonoBehaviour
+ {
+ [Header("Network Settings")]
+ [SerializeField] private string targetIP = "127.0.0.1";
+ [SerializeField] private int targetPort = 12345;
+
+ [Header("Audio Settings")]
+ [SerializeField] private int recordingFrequency = 16000;
+ [SerializeField] private int recordingLength = 10;
+ [SerializeField] private int samplesPerPacket = 1024; // Number of audio samples per packet (not bytes)
+
+ [Header("UI")]
+ [SerializeField] private KeyCode talkKey = KeyCode.T;
+ [SerializeField] private bool useHoldToTalk = true;
+
+ [Header("Debug")]
+ [SerializeField] private bool enableDebugLogging = true;
+ [SerializeField] private KeyCode testConnectionKey = KeyCode.C;
+
+ private UdpClient _udpClient;
+ private IPEndPoint _targetEndPoint;
+ private AudioClip _audioClip;
+ private bool _isRecording = false;
+ private CancellationTokenSource _cancellationTokenSource;
+
+ private int _lastMicrophonePosition = 0;
+ private float[] _audioBuffer;
+ private string _selectedMicrophone;
+ private int _packetSequence = 0;
+
+ public event Action OnRecordingStateChanged;
+
+ private void Start()
+ {
+ InitializeNetwork();
+ InitializeAudio();
+ _cancellationTokenSource = new CancellationTokenSource();
+ }
+
+ private void Update()
+ {
+ HandleInput();
+ }
+
+ private void OnDestroy()
+ {
+ StopRecording();
+ _cancellationTokenSource?.Cancel();
+ _cancellationTokenSource?.Dispose();
+ _udpClient?.Close();
+ }
+
+ private void InitializeNetwork()
+ {
+ try
+ {
+ _udpClient = new UdpClient();
+ _targetEndPoint = new IPEndPoint(IPAddress.Parse(targetIP), targetPort);
+ ConvaiLogger.Info($"Simple UDP Audio Sender initialized. Target: {targetIP}:{targetPort}", ConvaiLogger.LogCategory.Character);
+ }
+ catch (Exception ex)
+ {
+ ConvaiLogger.Error($"Failed to initialize UDP client: {ex.Message}", ConvaiLogger.LogCategory.Character);
+ }
+ }
+
+ private void InitializeAudio()
+ {
+ _selectedMicrophone = MicrophoneManager.Instance.SelectedMicrophoneName;
+ _audioBuffer = new float[recordingFrequency * recordingLength];
+
+ if (string.IsNullOrEmpty(_selectedMicrophone))
+ {
+ ConvaiLogger.Error("No microphone selected for UDP audio sender", ConvaiLogger.LogCategory.Character);
+ }
+ }
+
+ private void HandleInput()
+ {
+ // Handle talk key
+ if (useHoldToTalk)
+ {
+ if (Input.GetKeyDown(talkKey) && !_isRecording)
+ {
+ StartRecording();
+ }
+ else if (Input.GetKeyUp(talkKey) && _isRecording)
+ {
+ StopRecording();
+ }
+ }
+ else
+ {
+ if (Input.GetKeyDown(talkKey))
+ {
+ if (_isRecording)
+ StopRecording();
+ else
+ StartRecording();
+ }
+ }
+
+ // Handle test connection key
+ if (Input.GetKeyDown(testConnectionKey))
+ {
+ TestConnection();
+ }
+ }
+
+ public void StartRecording()
+ {
+ if (_isRecording || string.IsNullOrEmpty(_selectedMicrophone))
+ return;
+
+ try
+ {
+ _audioClip = Microphone.Start(_selectedMicrophone, false, recordingLength, recordingFrequency);
+ _isRecording = true;
+ _lastMicrophonePosition = 0;
+ _packetSequence = 0;
+
+ ConvaiLogger.Info("Started recording for UDP transmission (Simple)", ConvaiLogger.LogCategory.Character);
+ OnRecordingStateChanged?.Invoke(true);
+
+ // Start continuous audio processing
+ _ = ProcessAudioContinuously(_cancellationTokenSource.Token);
+ }
+ catch (Exception ex)
+ {
+ ConvaiLogger.Error($"Failed to start recording: {ex.Message}", ConvaiLogger.LogCategory.Character);
+ }
+ }
+
+ public void StopRecording()
+ {
+ if (!_isRecording)
+ return;
+
+ try
+ {
+ Microphone.End(_selectedMicrophone);
+ _isRecording = false;
+
+ ConvaiLogger.Info("Stopped recording for UDP transmission (Simple)", ConvaiLogger.LogCategory.Character);
+ OnRecordingStateChanged?.Invoke(false);
+
+ // Send end-of-recording signal
+ SendEndOfRecordingSignal();
+ }
+ catch (Exception ex)
+ {
+ ConvaiLogger.Error($"Failed to stop recording: {ex.Message}", ConvaiLogger.LogCategory.Character);
+ }
+ }
+
+ private async Task ProcessAudioContinuously(CancellationToken cancellationToken)
+ {
+ while (_isRecording && !cancellationToken.IsCancellationRequested)
+ {
+ try
+ {
+ await Task.Delay(100, cancellationToken); // Process every 100ms
+
+ if (_audioClip == null || !Microphone.IsRecording(_selectedMicrophone))
+ break;
+
+ int currentMicrophonePosition = Microphone.GetPosition(_selectedMicrophone);
+ int audioDataLength = currentMicrophonePosition - _lastMicrophonePosition;
+
+ if (audioDataLength > 0)
+ {
+ // Get audio data from the microphone clip
+ _audioClip.GetData(_audioBuffer, _lastMicrophonePosition);
+
+ // Send data in smaller chunks to avoid array bounds issues
+ await SendAudioDataInChunks(_audioBuffer, audioDataLength);
+
+ _lastMicrophonePosition = currentMicrophonePosition;
+ }
+ }
+ catch (Exception ex) when (!(ex is OperationCanceledException))
+ {
+ ConvaiLogger.Error($"Error in audio processing: {ex.Message}", ConvaiLogger.LogCategory.Character);
+ break;
+ }
+ }
+ }
+
+ private async Task SendAudioDataInChunks(float[] audioData, int totalSamples)
+ {
+ int processedSamples = 0;
+
+ while (processedSamples < totalSamples)
+ {
+ try
+ {
+ int remainingSamples = totalSamples - processedSamples;
+ int currentChunkSamples = Mathf.Min(samplesPerPacket, remainingSamples);
+
+ // Create a simple packet structure
+ byte[] packet = CreateSimpleAudioPacket(audioData, processedSamples, currentChunkSamples);
+
+ // Send the packet
+ await _udpClient.SendAsync(packet, packet.Length, _targetEndPoint);
+
+ if (enableDebugLogging && _packetSequence % 10 == 0) // Log every 10th packet
+ {
+ ConvaiLogger.DebugLog($"Sent packet {_packetSequence} with {currentChunkSamples} samples", ConvaiLogger.LogCategory.Character);
+ }
+
+ processedSamples += currentChunkSamples;
+ _packetSequence++;
+
+ // Small delay to avoid overwhelming the network
+ await Task.Delay(10);
+ }
+ catch (Exception ex)
+ {
+ ConvaiLogger.Error($"Failed to send audio chunk: {ex.Message}", ConvaiLogger.LogCategory.Character);
+ break;
+ }
+ }
+ }
+
+ private byte[] CreateSimpleAudioPacket(float[] audioData, int startIndex, int sampleCount)
+ {
+ // Simple packet structure:
+ // 4 bytes: Magic number (0xC0A1)
+ // 4 bytes: Packet sequence number
+ // 4 bytes: Sample count in this packet
+ // 4 bytes: Start position in stream
+ // 1 byte: Flags (0 = normal audio, 1 = end of recording)
+ // N bytes: Audio data (converted to shorts)
+
+ int headerSize = 17; // 4 + 4 + 4 + 4 + 1
+ int audioDataSize = sampleCount * sizeof(short);
+ byte[] packet = new byte[headerSize + audioDataSize];
+
+ int offset = 0;
+
+ // Magic number
+ BitConverter.GetBytes((uint)0xC0A1).CopyTo(packet, offset);
+ offset += 4;
+
+ // Packet sequence
+ BitConverter.GetBytes(_packetSequence).CopyTo(packet, offset);
+ offset += 4;
+
+ // Sample count
+ BitConverter.GetBytes(sampleCount).CopyTo(packet, offset);
+ offset += 4;
+
+ // Start position
+ BitConverter.GetBytes(_lastMicrophonePosition + startIndex).CopyTo(packet, offset);
+ offset += 4;
+
+ // Flags (0 for normal audio)
+ packet[offset] = 0;
+ offset += 1;
+
+ // Convert audio samples to bytes (same as Convai approach)
+ for (int i = 0; i < sampleCount; i++)
+ {
+ float sample = audioData[startIndex + i];
+ short shortSample = (short)(sample * short.MaxValue);
+ byte[] shortBytes = BitConverter.GetBytes(shortSample);
+ packet[offset] = shortBytes[0];
+ packet[offset + 1] = shortBytes[1];
+ offset += 2;
+ }
+
+ return packet;
+ }
+
+ private void SendEndOfRecordingSignal()
+ {
+ try
+ {
+ // Create end packet
+ byte[] packet = new byte[17]; // Header only, no audio data
+ int offset = 0;
+
+ // Magic number
+ BitConverter.GetBytes((uint)0xC0A1).CopyTo(packet, offset);
+ offset += 4;
+
+ // Packet sequence
+ BitConverter.GetBytes(_packetSequence).CopyTo(packet, offset);
+ offset += 4;
+
+ // Sample count (0 for end signal)
+ BitConverter.GetBytes(0).CopyTo(packet, offset);
+ offset += 4;
+
+ // Start position
+ BitConverter.GetBytes(_lastMicrophonePosition).CopyTo(packet, offset);
+ offset += 4;
+
+ // Flags (1 for end of recording)
+ packet[offset] = 1;
+
+ _udpClient.SendAsync(packet, packet.Length, _targetEndPoint);
+ }
+ catch (Exception ex)
+ {
+ ConvaiLogger.Error($"Failed to send end signal: {ex.Message}", ConvaiLogger.LogCategory.Character);
+ }
+ }
+
+ // Public methods for external control
+ public void SetTargetEndpoint(string ip, int port)
+ {
+ targetIP = ip;
+ targetPort = port;
+ _targetEndPoint = new IPEndPoint(IPAddress.Parse(ip), port);
+ }
+
+ public bool IsRecording => _isRecording;
+
+ // Debug and testing methods
+ public async void TestConnection()
+ {
+ if (_udpClient == null)
+ {
+ ConvaiLogger.Error("UDP client not initialized", ConvaiLogger.LogCategory.Character);
+ return;
+ }
+
+ try
+ {
+ ConvaiLogger.Info($"Testing connection to {targetIP}:{targetPort}", ConvaiLogger.LogCategory.Character);
+
+ // Send a simple test packet
+ string testMessage = "CONVAI_TEST_CONNECTION";
+ byte[] testData = System.Text.Encoding.UTF8.GetBytes(testMessage);
+
+ await _udpClient.SendAsync(testData, testData.Length, _targetEndPoint);
+ ConvaiLogger.Info("Test packet sent successfully", ConvaiLogger.LogCategory.Character);
+ }
+ catch (Exception ex)
+ {
+ ConvaiLogger.Error($"Connection test failed: {ex.Message}", ConvaiLogger.LogCategory.Character);
+ }
+ }
+
+ public void ShowNetworkStatus()
+ {
+ ConvaiLogger.Info($"=== Network Status ===", ConvaiLogger.LogCategory.Character);
+ ConvaiLogger.Info($"Target: {targetIP}:{targetPort}", ConvaiLogger.LogCategory.Character);
+ ConvaiLogger.Info($"UDP Client: {(_udpClient != null ? "Initialized" : "Not initialized")}", ConvaiLogger.LogCategory.Character);
+ ConvaiLogger.Info($"Recording: {_isRecording}", ConvaiLogger.LogCategory.Character);
+ ConvaiLogger.Info($"Microphone: {_selectedMicrophone}", ConvaiLogger.LogCategory.Character);
+ ConvaiLogger.Info($"Packets sent: {_packetSequence}", ConvaiLogger.LogCategory.Character);
+
+ if (_udpClient?.Client?.LocalEndPoint != null)
+ {
+ ConvaiLogger.Info($"Local endpoint: {_udpClient.Client.LocalEndPoint}", ConvaiLogger.LogCategory.Character);
+ }
+ }
+ }
+}
diff --git a/Unity-Master/Assets/Scripts/Multiplayer/ConvaiSimpleUDPAudioSender.cs.meta b/Unity-Master/Assets/Scripts/Multiplayer/ConvaiSimpleUDPAudioSender.cs.meta
new file mode 100644
index 0000000..52b3811
--- /dev/null
+++ b/Unity-Master/Assets/Scripts/Multiplayer/ConvaiSimpleUDPAudioSender.cs.meta
@@ -0,0 +1,11 @@
+fileFormatVersion: 2
+guid: fa5cc94311721d04f8e8821151ffb737
+MonoImporter:
+ externalObjects: {}
+ serializedVersion: 2
+ defaultReferences: []
+ executionOrder: 0
+ icon: {instanceID: 0}
+ userData:
+ assetBundleName:
+ assetBundleVariant:
diff --git a/Unity-Master/Assets/Scripts/Multiplayer/ConvaiUDPSpeechReceiver.cs b/Unity-Master/Assets/Scripts/Multiplayer/ConvaiUDPSpeechReceiver.cs
new file mode 100644
index 0000000..8b08d97
--- /dev/null
+++ b/Unity-Master/Assets/Scripts/Multiplayer/ConvaiUDPSpeechReceiver.cs
@@ -0,0 +1,639 @@
+using System;
+using System.Collections.Generic;
+using System.Net;
+using System.Net.Sockets;
+using System.Threading;
+using System.Threading.Tasks;
+using Convai.Scripts.Runtime.LoggerSystem;
+using Convai.Scripts.Runtime.Utils;
+using UnityEngine;
+
+namespace Convai.Scripts.Runtime.Multiplayer
+{
+ ///
+ /// UDP Speech Receiver - Receives high-quality Convai speech with proper buffering
+ /// This version reconstructs the original AudioClip objects for seamless playback
+ ///
+ public class ConvaiUDPSpeechReceiver : MonoBehaviour
+ {
+ [Header("Network Configuration")]
+ [SerializeField] private int listenPort = 12346;
+ [SerializeField] private bool enableDebugLogging = true;
+
+ [Header("Audio Playback")]
+ [SerializeField] private AudioSource speechAudioSource;
+ [SerializeField] private bool createAudioSourceIfMissing = true;
+ [SerializeField] private float audioVolume = 1.0f;
+ [SerializeField] private bool spatialAudio = false;
+
+ [Header("UI")]
+ [SerializeField] private bool showTranscripts = true;
+
+ // Network components
+ private UdpClient _udpListener;
+ private IPEndPoint _remoteEndPoint;
+ private bool _isListening = false;
+ private CancellationTokenSource _cancellationTokenSource;
+
+ // Audio reconstruction
+ private Dictionary _incomingClips = new Dictionary();
+ private Queue _playbackQueue = new Queue();
+ private bool _isPlayingSequence = false;
+ private int _currentSequence = 0;
+
+ // Packet constants (matching sender V3)
+ private const uint MAGIC_NUMBER = 0xC0A3;
+ private const byte PACKET_TYPE_AUDIO_START = 0x01;
+ private const byte PACKET_TYPE_AUDIO_CHUNK = 0x02;
+ private const byte PACKET_TYPE_AUDIO_END = 0x03;
+ private const byte PACKET_TYPE_TRANSCRIPT = 0x04;
+ private const byte PACKET_TYPE_FINAL = 0x05;
+
+ // Events
+ public Action OnSpeechReceiving;
+ public Action OnTranscriptReceived;
+ public Action OnAudioClipReceived;
+
+ // Data structures
+ private struct SpeechPacket
+ {
+ public uint magicNumber;
+ public byte packetType;
+ public int sequence;
+ public int totalSamples;
+ public int sampleRate;
+ public int channels;
+ public int startSample;
+ public int chunkSampleCount;
+ public short[] audioSamples;
+ public string transcript;
+ }
+
+ private class IncomingAudioClip
+ {
+ public int totalSamples;
+ public int sampleRate;
+ public int channels;
+ public string transcript;
+ public float[] audioData;
+ public bool isComplete;
+ public bool hasStart;
+ public bool hasEnd;
+ public int receivedSamples;
+
+ public IncomingAudioClip(int totalSamples, int sampleRate, int channels, string transcript)
+ {
+ this.totalSamples = totalSamples;
+ this.sampleRate = sampleRate;
+ this.channels = channels;
+ this.transcript = transcript;
+ this.audioData = new float[totalSamples];
+ this.isComplete = false;
+ this.hasStart = false;
+ this.hasEnd = false;
+ this.receivedSamples = 0;
+ }
+ }
+
+ private struct ReconstructedAudioClip
+ {
+ public AudioClip audioClip;
+ public string transcript;
+ public bool isFinal;
+ }
+
+ private void Start()
+ {
+ _cancellationTokenSource = new CancellationTokenSource();
+ InitializeAudio();
+ InitializeNetwork();
+ }
+
+ private void OnDestroy()
+ {
+ StopListening();
+ _cancellationTokenSource?.Cancel();
+ _cancellationTokenSource?.Dispose();
+ }
+
+ private void Update()
+ {
+ // Process playback queue
+ ProcessPlaybackQueue();
+ }
+
+ private void InitializeAudio()
+ {
+ if (speechAudioSource == null)
+ {
+ speechAudioSource = GetComponent();
+
+ if (speechAudioSource == null && createAudioSourceIfMissing)
+ {
+ speechAudioSource = gameObject.AddComponent();
+ ConvaiLogger.Info("Created AudioSource for speech playback", ConvaiLogger.LogCategory.Character);
+ }
+ }
+
+ if (speechAudioSource != null)
+ {
+ speechAudioSource.volume = audioVolume;
+ speechAudioSource.playOnAwake = false;
+ speechAudioSource.spatialBlend = spatialAudio ? 1.0f : 0.0f;
+ }
+ else
+ {
+ ConvaiLogger.Error("No AudioSource available for speech playback", ConvaiLogger.LogCategory.Character);
+ }
+ }
+
+ private void InitializeNetwork()
+ {
+ try
+ {
+ StartListening();
+ }
+ catch (Exception ex)
+ {
+ ConvaiLogger.Error($"Failed to initialize UDP speech receiver: {ex.Message}", ConvaiLogger.LogCategory.Character);
+ }
+ }
+
+ public void StartListening()
+ {
+ if (_isListening || _cancellationTokenSource == null)
+ return;
+
+ try
+ {
+ _udpListener = new UdpClient(listenPort);
+ _isListening = true;
+
+ ConvaiLogger.Info($"UDP Speech Receiver listening on port {listenPort}", ConvaiLogger.LogCategory.Character);
+
+ // Start listening for incoming packets
+ _ = ListenForSpeechPackets(_cancellationTokenSource.Token);
+ }
+ catch (Exception ex)
+ {
+ ConvaiLogger.Error($"Failed to start UDP speech receiver: {ex.Message}", ConvaiLogger.LogCategory.Character);
+ }
+ }
+
+ public void StopListening()
+ {
+ if (!_isListening)
+ return;
+
+ _isListening = false;
+ _udpListener?.Close();
+ _udpListener?.Dispose();
+ _udpListener = null;
+
+ // Stop any ongoing playback
+ StopSpeechPlayback();
+
+ ConvaiLogger.Info("Stopped UDP Speech Receiver", ConvaiLogger.LogCategory.Character);
+ }
+
+ private async Task ListenForSpeechPackets(CancellationToken cancellationToken)
+ {
+ try
+ {
+ while (_isListening && !cancellationToken.IsCancellationRequested)
+ {
+ var result = await _udpListener.ReceiveAsync();
+ _remoteEndPoint = result.RemoteEndPoint;
+
+ await ProcessReceivedPacket(result.Buffer, result.RemoteEndPoint);
+ }
+ }
+ catch (ObjectDisposedException)
+ {
+ // Normal when stopping
+ }
+ catch (Exception ex)
+ {
+ ConvaiLogger.Error($"Error in UDP speech listener: {ex.Message}", ConvaiLogger.LogCategory.Character);
+ }
+ }
+
+ private async Task ProcessReceivedPacket(byte[] data, IPEndPoint sender)
+ {
+ try
+ {
+ var packetData = ParseSpeechPacket(data);
+
+ if (packetData.HasValue)
+ {
+ var packet = packetData.Value;
+
+ if (enableDebugLogging)
+ {
+ string typeStr = packet.packetType switch
+ {
+ PACKET_TYPE_AUDIO_START => "start",
+ PACKET_TYPE_AUDIO_CHUNK => "chunk",
+ PACKET_TYPE_AUDIO_END => "end",
+ PACKET_TYPE_TRANSCRIPT => "transcript",
+ PACKET_TYPE_FINAL => "final",
+ _ => "unknown"
+ };
+ ConvaiLogger.DebugLog($"📥 Received {typeStr} packet {packet.sequence} from {sender}", ConvaiLogger.LogCategory.Character);
+ }
+
+ switch (packet.packetType)
+ {
+ case PACKET_TYPE_AUDIO_START:
+ HandleAudioStartPacket(packet);
+ break;
+
+ case PACKET_TYPE_AUDIO_CHUNK:
+ HandleAudioChunkPacket(packet);
+ break;
+
+ case PACKET_TYPE_AUDIO_END:
+ HandleAudioEndPacket(packet);
+ break;
+
+ case PACKET_TYPE_TRANSCRIPT:
+ HandleTranscriptPacket(packet);
+ break;
+
+ case PACKET_TYPE_FINAL:
+ HandleFinalPacket();
+ break;
+ }
+ }
+ else
+ {
+ if (enableDebugLogging)
+ {
+ // Check if it's a different magic number
+ if (data.Length >= 4)
+ {
+ uint receivedMagic = BitConverter.ToUInt32(data, 0);
+ ConvaiLogger.Warn($"❌ Invalid speech packet from {sender}. Expected magic: 0x{MAGIC_NUMBER:X}, Got: 0x{receivedMagic:X}", ConvaiLogger.LogCategory.Character);
+ }
+ else
+ {
+ ConvaiLogger.Warn($"❌ Packet too small from {sender}: {data.Length} bytes", ConvaiLogger.LogCategory.Character);
+ }
+ }
+ }
+ }
+ catch (Exception ex)
+ {
+ ConvaiLogger.Error($"Error processing speech packet: {ex.Message}", ConvaiLogger.LogCategory.Character);
+ }
+ }
+
+ private void HandleAudioStartPacket(SpeechPacket packet)
+ {
+ // Start new speech sequence if this is the first start packet
+ if (packet.sequence == 0 && !_isPlayingSequence)
+ {
+ StartSpeechReception();
+ }
+
+ // Create new incoming audio clip
+ var incomingClip = new IncomingAudioClip(packet.totalSamples, packet.sampleRate, packet.channels, packet.transcript);
+ incomingClip.hasStart = true;
+
+ _incomingClips[packet.sequence] = incomingClip;
+
+ if (enableDebugLogging)
+ ConvaiLogger.DebugLog($"🎵 Started receiving audio clip {packet.sequence}: {packet.totalSamples} samples, '{packet.transcript}'", ConvaiLogger.LogCategory.Character);
+ }
+
+ private void HandleAudioChunkPacket(SpeechPacket packet)
+ {
+ if (!_incomingClips.ContainsKey(packet.sequence)) return;
+
+ var incomingClip = _incomingClips[packet.sequence];
+
+ // Convert short samples back to float and copy to the correct position
+ if (packet.audioSamples != null && packet.startSample + packet.chunkSampleCount <= incomingClip.totalSamples)
+ {
+ for (int i = 0; i < packet.chunkSampleCount; i++)
+ {
+ int targetIndex = packet.startSample + i;
+ if (targetIndex < incomingClip.audioData.Length)
+ {
+ incomingClip.audioData[targetIndex] = packet.audioSamples[i] / (float)short.MaxValue;
+ }
+ }
+
+ incomingClip.receivedSamples += packet.chunkSampleCount;
+ }
+ }
+
+ private void HandleAudioEndPacket(SpeechPacket packet)
+ {
+ if (!_incomingClips.ContainsKey(packet.sequence)) return;
+
+ var incomingClip = _incomingClips[packet.sequence];
+ incomingClip.hasEnd = true;
+
+ // Check if the clip is complete (has start, end, and all samples)
+ if (incomingClip.hasStart && incomingClip.hasEnd)
+ {
+ incomingClip.isComplete = true;
+
+ // Create the AudioClip
+ CreateAndQueueAudioClip(incomingClip, packet.sequence);
+
+ // Remove from incoming clips
+ _incomingClips.Remove(packet.sequence);
+ }
+ }
+
+ private void HandleTranscriptPacket(SpeechPacket packet)
+ {
+ if (showTranscripts && !string.IsNullOrEmpty(packet.transcript))
+ {
+ MainThreadDispatcher.Instance.RunOnMainThread(() => {
+ OnTranscriptReceived?.Invoke(packet.transcript);
+
+ if (enableDebugLogging)
+ ConvaiLogger.Info($"📝 Remote NPC said: '{packet.transcript}'", ConvaiLogger.LogCategory.Character);
+ });
+ }
+ }
+
+ private void HandleFinalPacket()
+ {
+ // Process any remaining incomplete clips
+ ProcessIncompleteClips();
+
+ // Add final marker to queue
+ _playbackQueue.Enqueue(new ReconstructedAudioClip
+ {
+ audioClip = null,
+ transcript = "",
+ isFinal = true
+ });
+
+ StopSpeechReception();
+ }
+
+ private void ProcessIncompleteClips()
+ {
+ // Try to create AudioClips from any clips that might be mostly complete
+ var keysToRemove = new List();
+
+ foreach (var kvp in _incomingClips)
+ {
+ var incomingClip = kvp.Value;
+
+ // If we received a reasonable amount of data, try to create the clip
+ if (incomingClip.receivedSamples > incomingClip.totalSamples * 0.8f) // 80% received
+ {
+ CreateAndQueueAudioClip(incomingClip, kvp.Key);
+ keysToRemove.Add(kvp.Key);
+ }
+ }
+
+ foreach (var key in keysToRemove)
+ {
+ _incomingClips.Remove(key);
+ }
+ }
+
+ private void CreateAndQueueAudioClip(IncomingAudioClip incomingClip, int sequence)
+ {
+ try
+ {
+ // Create AudioClip
+ AudioClip clip = AudioClip.Create($"RemoteSpeech_{sequence}",
+ incomingClip.totalSamples, incomingClip.channels, incomingClip.sampleRate, false);
+ clip.SetData(incomingClip.audioData, 0);
+
+ // Queue for playback
+ _playbackQueue.Enqueue(new ReconstructedAudioClip
+ {
+ audioClip = clip,
+ transcript = incomingClip.transcript,
+ isFinal = false
+ });
+
+ OnAudioClipReceived?.Invoke(clip);
+
+ if (enableDebugLogging)
+ ConvaiLogger.DebugLog($"✅ Reconstructed audio clip {sequence}: {clip.length:F2}s, '{incomingClip.transcript}'", ConvaiLogger.LogCategory.Character);
+ }
+ catch (Exception ex)
+ {
+ ConvaiLogger.Error($"Failed to create audio clip from sequence {sequence}: {ex.Message}", ConvaiLogger.LogCategory.Character);
+ }
+ }
+
+ private void ProcessPlaybackQueue()
+ {
+ // If not currently playing and we have queued clips, start playing
+ if (!_isPlayingSequence && _playbackQueue.Count > 0 && speechAudioSource != null)
+ {
+ PlayNextAudioClip();
+ }
+
+ // Check if current clip finished playing
+ if (_isPlayingSequence && speechAudioSource != null && !speechAudioSource.isPlaying)
+ {
+ // Current clip finished, play next one if available
+ if (_playbackQueue.Count > 0)
+ {
+ PlayNextAudioClip();
+ }
+ else
+ {
+ _isPlayingSequence = false;
+ }
+ }
+ }
+
+ private void PlayNextAudioClip()
+ {
+ if (_playbackQueue.Count == 0 || speechAudioSource == null) return;
+
+ var reconstructedClip = _playbackQueue.Dequeue();
+
+ if (reconstructedClip.isFinal)
+ {
+ _isPlayingSequence = false;
+ ConvaiLogger.Info("🔊 Finished playing remote speech sequence", ConvaiLogger.LogCategory.Character);
+ return;
+ }
+
+ if (reconstructedClip.audioClip != null)
+ {
+ speechAudioSource.clip = reconstructedClip.audioClip;
+ speechAudioSource.Play();
+ _isPlayingSequence = true;
+
+ if (enableDebugLogging)
+ ConvaiLogger.DebugLog($"🔊 Playing remote speech: {reconstructedClip.audioClip.length:F2}s, '{reconstructedClip.transcript}'", ConvaiLogger.LogCategory.Character);
+ }
+ }
+
+ private void StartSpeechReception()
+ {
+ _isPlayingSequence = false;
+ _currentSequence = 0;
+ _incomingClips.Clear();
+ _playbackQueue.Clear();
+
+ OnSpeechReceiving?.Invoke(true);
+
+ ConvaiLogger.Info("🔊 Started receiving remote NPC speech", ConvaiLogger.LogCategory.Character);
+ }
+
+ private void StopSpeechReception()
+ {
+ OnSpeechReceiving?.Invoke(false);
+
+ ConvaiLogger.Info("🔊 Stopped receiving remote NPC speech", ConvaiLogger.LogCategory.Character);
+ }
+
+ private void StopSpeechPlayback()
+ {
+ if (speechAudioSource != null && speechAudioSource.isPlaying)
+ {
+ speechAudioSource.Stop();
+ }
+
+ _isPlayingSequence = false;
+ _playbackQueue.Clear();
+ _incomingClips.Clear();
+ }
+
+ private SpeechPacket? ParseSpeechPacket(byte[] data)
+ {
+ if (data.Length < 13) // Minimum header size
+ return null;
+
+ try
+ {
+ int offset = 0;
+
+ // Read magic number
+ uint magic = BitConverter.ToUInt32(data, offset);
+ offset += 4;
+
+ if (magic != MAGIC_NUMBER)
+ return null;
+
+ // Read packet type
+ byte packetType = data[offset];
+ offset += 1;
+
+ // Read sequence
+ int sequence = BitConverter.ToInt32(data, offset);
+ offset += 4;
+
+ var packet = new SpeechPacket
+ {
+ magicNumber = magic,
+ packetType = packetType,
+ sequence = sequence
+ };
+
+ // Parse based on packet type
+ switch (packetType)
+ {
+ case PACKET_TYPE_AUDIO_START:
+ if (data.Length < offset + 16) return null; // Need additional fields
+
+ packet.totalSamples = BitConverter.ToInt32(data, offset);
+ offset += 4;
+
+ packet.sampleRate = BitConverter.ToInt32(data, offset);
+ offset += 4;
+
+ packet.channels = BitConverter.ToInt32(data, offset);
+ offset += 4;
+
+ int transcriptLength = BitConverter.ToInt32(data, offset);
+ offset += 4;
+
+ if (transcriptLength > 0 && data.Length >= offset + transcriptLength)
+ {
+ packet.transcript = System.Text.Encoding.UTF8.GetString(data, offset, transcriptLength);
+ }
+ break;
+
+ case PACKET_TYPE_AUDIO_CHUNK:
+ if (data.Length < offset + 8) return null; // Need start sample + count
+
+ packet.startSample = BitConverter.ToInt32(data, offset);
+ offset += 4;
+
+ packet.chunkSampleCount = BitConverter.ToInt32(data, offset);
+ offset += 4;
+
+ // Read audio data
+ if (packet.chunkSampleCount > 0 && data.Length >= offset + packet.chunkSampleCount * 2)
+ {
+ packet.audioSamples = new short[packet.chunkSampleCount];
+ for (int i = 0; i < packet.chunkSampleCount; i++)
+ {
+ packet.audioSamples[i] = BitConverter.ToInt16(data, offset);
+ offset += 2;
+ }
+ }
+ break;
+
+ case PACKET_TYPE_AUDIO_END:
+ case PACKET_TYPE_FINAL:
+ // These packets have no additional data beyond the header
+ break;
+
+ case PACKET_TYPE_TRANSCRIPT:
+ // Similar to start packet transcript handling
+ if (data.Length >= offset + 4)
+ {
+ int transcriptLen = BitConverter.ToInt32(data, offset);
+ offset += 4;
+
+ if (transcriptLen > 0 && data.Length >= offset + transcriptLen)
+ {
+ packet.transcript = System.Text.Encoding.UTF8.GetString(data, offset, transcriptLen);
+ }
+ }
+ break;
+
+ default:
+ return null;
+ }
+
+ return packet;
+ }
+ catch (Exception ex)
+ {
+ ConvaiLogger.Error($"Error parsing speech packet V2: {ex.Message}", ConvaiLogger.LogCategory.Character);
+ return null;
+ }
+ }
+
+ // Public properties for debugging
+ public bool IsListening => _isListening;
+ public bool IsPlayingSequence => _isPlayingSequence;
+ public int QueuedClipCount => _playbackQueue.Count;
+ public int IncomingClipCount => _incomingClips.Count;
+
+ // Debug methods
+ public void ShowNetworkStatus()
+ {
+ ConvaiLogger.Info($"=== Speech Receiver Status ===", ConvaiLogger.LogCategory.Character);
+ ConvaiLogger.Info($"Listening: {_isListening} on port {listenPort}", ConvaiLogger.LogCategory.Character);
+ ConvaiLogger.Info($"Playing Sequence: {_isPlayingSequence}", ConvaiLogger.LogCategory.Character);
+ ConvaiLogger.Info($"Current Sequence: {_currentSequence}", ConvaiLogger.LogCategory.Character);
+ ConvaiLogger.Info($"Queued Clips: {_playbackQueue.Count}", ConvaiLogger.LogCategory.Character);
+ ConvaiLogger.Info($"Incoming Clips: {_incomingClips.Count}", ConvaiLogger.LogCategory.Character);
+
+ if (speechAudioSource != null)
+ {
+ ConvaiLogger.Info($"Audio Source: {speechAudioSource.name} (Volume: {speechAudioSource.volume})", ConvaiLogger.LogCategory.Character);
+ }
+ }
+ }
+}
diff --git a/Unity-Master/Assets/Scripts/Multiplayer/ConvaiUDPSpeechReceiver.cs.meta b/Unity-Master/Assets/Scripts/Multiplayer/ConvaiUDPSpeechReceiver.cs.meta
new file mode 100644
index 0000000..9329146
--- /dev/null
+++ b/Unity-Master/Assets/Scripts/Multiplayer/ConvaiUDPSpeechReceiver.cs.meta
@@ -0,0 +1,11 @@
+fileFormatVersion: 2
+guid: 8fe3e5348bc484f44be079d6aaf6e17e
+MonoImporter:
+ externalObjects: {}
+ serializedVersion: 2
+ defaultReferences: []
+ executionOrder: 0
+ icon: {instanceID: 0}
+ userData:
+ assetBundleName:
+ assetBundleVariant:
diff --git a/Unity-Master/Assets/Scripts/Multiplayer/ConvaiUDPSpeechSender.cs b/Unity-Master/Assets/Scripts/Multiplayer/ConvaiUDPSpeechSender.cs
new file mode 100644
index 0000000..dd9ef81
--- /dev/null
+++ b/Unity-Master/Assets/Scripts/Multiplayer/ConvaiUDPSpeechSender.cs
@@ -0,0 +1,479 @@
+using System;
+using System.Collections.Generic;
+using System.Net;
+using System.Net.Sockets;
+using System.Threading.Tasks;
+using Convai.Scripts.Runtime.Core;
+using Convai.Scripts.Runtime.LoggerSystem;
+using Convai.Scripts.Runtime.Utils;
+using UnityEngine;
+using System.Collections;
+
+namespace Convai.Scripts.Runtime.Multiplayer
+{
+ ///
+ /// UDP Speech Sender - Simple and reliable approach using events
+ /// Hooks into AudioManager events to capture when clips are about to be played
+ ///
+ public class ConvaiUDPSpeechSender : MonoBehaviour
+ {
+ [Header("Network Configuration")]
+ [SerializeField] private string targetIP = "127.0.0.1";
+ [SerializeField] private int targetPort = 12346;
+ [SerializeField] private bool enableDebugLogging = true;
+
+ [Header("NPC Source")]
+ [SerializeField] private bool useActiveNPC = true;
+ [SerializeField] private ConvaiNPC sourceNPC;
+
+ [Header("Audio Settings")]
+ [SerializeField] private int maxSamplesPerPacket = 8192;
+ [SerializeField] private bool sendTranscripts = true;
+
+ // Network components
+ private UdpClient _udpClient;
+ private IPEndPoint _targetEndPoint;
+ private bool _isInitialized = false;
+
+ // Speech tracking
+ private int _speechSequence = 0;
+ private bool _isSendingSpeech = false;
+ private HashSet _sentClips = new HashSet();
+
+ // Packet constants
+ private const uint MAGIC_NUMBER = 0xC0A3; // V3 magic number
+ private const byte PACKET_TYPE_AUDIO_START = 0x01;
+ private const byte PACKET_TYPE_AUDIO_CHUNK = 0x02;
+ private const byte PACKET_TYPE_AUDIO_END = 0x03;
+ private const byte PACKET_TYPE_FINAL = 0x05;
+
+ // Events
+ public Action OnSpeechTransmission;
+ public Action OnSpeechSent;
+
+ private void Start()
+ {
+ InitializeNetwork();
+ InitializeConvai();
+ }
+
+ private void OnDestroy()
+ {
+ CleanupNPCSubscriptions();
+ CleanupNetwork();
+ }
+
+ private void InitializeNetwork()
+ {
+ try
+ {
+ _udpClient = new UdpClient();
+ _targetEndPoint = new IPEndPoint(IPAddress.Parse(targetIP), targetPort);
+ _isInitialized = true;
+
+ ConvaiLogger.Info($"UDP Speech Sender initialized. Target: {targetIP}:{targetPort}", ConvaiLogger.LogCategory.Character);
+ }
+ catch (Exception ex)
+ {
+ ConvaiLogger.Error($"Failed to initialize UDP speech sender: {ex.Message}", ConvaiLogger.LogCategory.Character);
+ }
+ }
+
+ private void InitializeConvai()
+ {
+ // Get target NPC
+ if (useActiveNPC)
+ {
+ sourceNPC = ConvaiNPCManager.Instance?.GetActiveConvaiNPC();
+ }
+
+ SubscribeToNPCEvents();
+
+ // Subscribe to NPC manager events for late NPC activation
+ if (ConvaiNPCManager.Instance != null)
+ {
+ ConvaiNPCManager.Instance.OnActiveNPCChanged += HandleActiveNPCChanged;
+ }
+ }
+
+ private void SubscribeToNPCEvents()
+ {
+ if (sourceNPC?.AudioManager != null)
+ {
+ // Hook into the character talking events
+ sourceNPC.AudioManager.OnCharacterTalkingChanged += HandleCharacterTalkingChanged;
+ sourceNPC.AudioManager.OnAudioTranscriptAvailable += HandleTranscriptAvailable;
+
+ ConvaiLogger.Info($"UDP Speech Sender subscribed to NPC: {sourceNPC.characterName}", ConvaiLogger.LogCategory.Character);
+ }
+ else
+ {
+ ConvaiLogger.Warn("No source NPC available for speech transmission", ConvaiLogger.LogCategory.Character);
+ }
+ }
+
+ private void HandleCharacterTalkingChanged(bool isTalking)
+ {
+ if (!_isInitialized) return;
+
+ if (isTalking)
+ {
+ // Start monitoring for audio clips
+ StartCoroutine(MonitorAudioClips());
+ }
+ else
+ {
+ // End speech transmission
+ _ = SendFinalPacket();
+ }
+ }
+
+ private void HandleTranscriptAvailable(string transcript)
+ {
+ if (enableDebugLogging && !string.IsNullOrEmpty(transcript))
+ {
+ ConvaiLogger.DebugLog($"📝 NPC transcript: '{transcript}'", ConvaiLogger.LogCategory.Character);
+ }
+ }
+
+ private IEnumerator MonitorAudioClips()
+ {
+ if (sourceNPC?.AudioManager == null) yield break;
+
+ AudioSource audioSource = sourceNPC.AudioManager.GetComponent();
+ AudioClip lastClip = null;
+
+ while (sourceNPC.IsCharacterTalking)
+ {
+ if (audioSource?.clip != null && audioSource.clip != lastClip)
+ {
+ // New clip detected!
+ lastClip = audioSource.clip;
+
+ // Only send if we haven't sent this clip before
+ if (!_sentClips.Contains(lastClip))
+ {
+ _sentClips.Add(lastClip);
+
+ // Get the transcript from the most recent available transcript
+ string transcript = GetRecentTranscript();
+
+ // Send this clip
+ _ = TransmitAudioClip(lastClip, transcript);
+ }
+ }
+
+ yield return new WaitForSeconds(0.1f); // Check every 100ms
+ }
+
+ // Clear sent clips when done
+ _sentClips.Clear();
+ }
+
+ private string GetRecentTranscript()
+ {
+ // Try to get transcript from the NPC's recent activity
+ // This is a simple approach - in a more complex setup you might want to match clips to transcripts
+ return ""; // Transcripts come via the transcript event
+ }
+
+ private async Task TransmitAudioClip(AudioClip audioClip, string transcript)
+ {
+ if (!_isInitialized || audioClip == null) return;
+
+ try
+ {
+ // Start transmission if not already started
+ if (!_isSendingSpeech)
+ {
+ _isSendingSpeech = true;
+ OnSpeechTransmission?.Invoke(true);
+
+ ConvaiLogger.Info($"🔊 Starting speech transmission", ConvaiLogger.LogCategory.Character);
+ }
+
+ // Use the current speech sequence for this entire clip
+ int clipSequence = _speechSequence;
+
+ // Send start packet with metadata
+ await SendAudioStartPacket(audioClip, transcript, clipSequence);
+
+ // Send audio data in chunks (all with the same sequence)
+ await SendAudioClipInChunks(audioClip, clipSequence);
+
+ // Send end packet for this clip (with the same sequence)
+ await SendAudioEndPacket(clipSequence);
+
+ // Only increment sequence after the entire clip is sent
+ _speechSequence++;
+
+ OnSpeechSent?.Invoke(transcript);
+
+ if (enableDebugLogging)
+ ConvaiLogger.DebugLog($"✅ Transmitted speech clip: {audioClip.length:F2}s (sequence {clipSequence})", ConvaiLogger.LogCategory.Character);
+ }
+ catch (Exception ex)
+ {
+ ConvaiLogger.Error($"Failed to transmit AudioClip: {ex.Message}", ConvaiLogger.LogCategory.Character);
+ }
+ }
+
+ private async Task SendAudioStartPacket(AudioClip audioClip, string transcript, int sequence)
+ {
+ byte[] packet = CreateAudioStartPacket(audioClip, transcript, sequence);
+ await _udpClient.SendAsync(packet, packet.Length, _targetEndPoint);
+
+ if (enableDebugLogging)
+ ConvaiLogger.DebugLog($"📤 Sent start packet {sequence}: {audioClip.samples} samples", ConvaiLogger.LogCategory.Character);
+ }
+
+ private async Task SendAudioClipInChunks(AudioClip audioClip, int sequence)
+ {
+ // Get all audio data
+ float[] audioData = new float[audioClip.samples];
+ audioClip.GetData(audioData, 0);
+
+ // Send in chunks
+ int totalSamples = audioData.Length;
+ int processedSamples = 0;
+ int chunkCount = 0;
+
+ while (processedSamples < totalSamples)
+ {
+ int remainingSamples = totalSamples - processedSamples;
+ int currentChunkSize = Mathf.Min(maxSamplesPerPacket, remainingSamples);
+
+ float[] chunkData = new float[currentChunkSize];
+ Array.Copy(audioData, processedSamples, chunkData, 0, currentChunkSize);
+
+ byte[] packet = CreateAudioChunkPacket(chunkData, audioClip.frequency, processedSamples, sequence);
+ await _udpClient.SendAsync(packet, packet.Length, _targetEndPoint);
+
+ processedSamples += currentChunkSize;
+ chunkCount++;
+
+ if (enableDebugLogging && chunkCount % 10 == 0)
+ ConvaiLogger.DebugLog($"📤 Sent chunk {chunkCount} for sequence {sequence}", ConvaiLogger.LogCategory.Character);
+
+ // Small delay to avoid overwhelming the network
+ await Task.Delay(5);
+ }
+
+ if (enableDebugLogging)
+ ConvaiLogger.DebugLog($"📤 Sent {chunkCount} audio chunks for sequence {sequence}", ConvaiLogger.LogCategory.Character);
+ }
+
+ private async Task SendAudioEndPacket(int sequence)
+ {
+ byte[] packet = CreateAudioEndPacket(sequence);
+ await _udpClient.SendAsync(packet, packet.Length, _targetEndPoint);
+
+ if (enableDebugLogging)
+ ConvaiLogger.DebugLog($"📤 Sent end packet for sequence {sequence}", ConvaiLogger.LogCategory.Character);
+ }
+
+ private async Task SendFinalPacket()
+ {
+ if (!_isSendingSpeech) return;
+
+ try
+ {
+ byte[] packet = CreateFinalPacket();
+ await _udpClient.SendAsync(packet, packet.Length, _targetEndPoint);
+
+ _isSendingSpeech = false;
+ OnSpeechTransmission?.Invoke(false);
+
+ ConvaiLogger.Info("🔊 Speech transmission completed", ConvaiLogger.LogCategory.Character);
+ }
+ catch (Exception ex)
+ {
+ ConvaiLogger.Error($"Failed to send final packet: {ex.Message}", ConvaiLogger.LogCategory.Character);
+ }
+ }
+
+ private byte[] CreateAudioStartPacket(AudioClip audioClip, string transcript, int sequence)
+ {
+ byte[] transcriptBytes = System.Text.Encoding.UTF8.GetBytes(transcript ?? "");
+
+ // Packet structure:
+ // 4 bytes: Magic number
+ // 1 byte: Packet type (0x01 = audio start)
+ // 4 bytes: Sequence number
+ // 4 bytes: Total samples in clip
+ // 4 bytes: Sample rate
+ // 4 bytes: Channels
+ // 4 bytes: Transcript length
+ // N bytes: Transcript (UTF-8)
+
+ int headerSize = 25;
+ byte[] packet = new byte[headerSize + transcriptBytes.Length];
+
+ int offset = 0;
+
+ BitConverter.GetBytes(MAGIC_NUMBER).CopyTo(packet, offset);
+ offset += 4;
+
+ packet[offset] = PACKET_TYPE_AUDIO_START;
+ offset += 1;
+
+ BitConverter.GetBytes(sequence).CopyTo(packet, offset);
+ offset += 4;
+
+ BitConverter.GetBytes(audioClip.samples).CopyTo(packet, offset);
+ offset += 4;
+
+ BitConverter.GetBytes(audioClip.frequency).CopyTo(packet, offset);
+ offset += 4;
+
+ BitConverter.GetBytes(audioClip.channels).CopyTo(packet, offset);
+ offset += 4;
+
+ BitConverter.GetBytes(transcriptBytes.Length).CopyTo(packet, offset);
+ offset += 4;
+
+ transcriptBytes.CopyTo(packet, offset);
+
+ return packet;
+ }
+
+ private byte[] CreateAudioChunkPacket(float[] audioData, int frequency, int startSample, int sequence)
+ {
+ // Packet structure:
+ // 4 bytes: Magic number
+ // 1 byte: Packet type (0x02 = audio chunk)
+ // 4 bytes: Sequence number
+ // 4 bytes: Start sample position
+ // 4 bytes: Sample count in this chunk
+ // N bytes: Audio data (as 16-bit PCM)
+
+ int headerSize = 17;
+ int audioDataSize = audioData.Length * sizeof(short);
+ byte[] packet = new byte[headerSize + audioDataSize];
+
+ int offset = 0;
+
+ BitConverter.GetBytes(MAGIC_NUMBER).CopyTo(packet, offset);
+ offset += 4;
+
+ packet[offset] = PACKET_TYPE_AUDIO_CHUNK;
+ offset += 1;
+
+ BitConverter.GetBytes(sequence).CopyTo(packet, offset);
+ offset += 4;
+
+ BitConverter.GetBytes(startSample).CopyTo(packet, offset);
+ offset += 4;
+
+ BitConverter.GetBytes(audioData.Length).CopyTo(packet, offset);
+ offset += 4;
+
+ // Convert float samples to 16-bit PCM
+ for (int i = 0; i < audioData.Length; i++)
+ {
+ short sample = (short)(Mathf.Clamp(audioData[i], -1f, 1f) * short.MaxValue);
+ BitConverter.GetBytes(sample).CopyTo(packet, offset);
+ offset += 2;
+ }
+
+ return packet;
+ }
+
+ private byte[] CreateAudioEndPacket(int sequence)
+ {
+ byte[] packet = new byte[13]; // Header only
+
+ int offset = 0;
+
+ BitConverter.GetBytes(MAGIC_NUMBER).CopyTo(packet, offset);
+ offset += 4;
+
+ packet[offset] = PACKET_TYPE_AUDIO_END;
+ offset += 1;
+
+ BitConverter.GetBytes(sequence).CopyTo(packet, offset);
+ offset += 4;
+
+ BitConverter.GetBytes(0).CopyTo(packet, offset); // No additional data
+
+ return packet;
+ }
+
+ private byte[] CreateFinalPacket()
+ {
+ byte[] packet = new byte[13]; // Header only
+
+ int offset = 0;
+
+ BitConverter.GetBytes(MAGIC_NUMBER).CopyTo(packet, offset);
+ offset += 4;
+
+ packet[offset] = PACKET_TYPE_FINAL;
+ offset += 1;
+
+ BitConverter.GetBytes(_speechSequence).CopyTo(packet, offset);
+ offset += 4;
+
+ BitConverter.GetBytes(0).CopyTo(packet, offset); // No additional data
+
+ return packet;
+ }
+
+ private void CleanupNPCSubscriptions()
+ {
+ if (sourceNPC?.AudioManager != null)
+ {
+ sourceNPC.AudioManager.OnCharacterTalkingChanged -= HandleCharacterTalkingChanged;
+ sourceNPC.AudioManager.OnAudioTranscriptAvailable -= HandleTranscriptAvailable;
+ }
+
+ if (ConvaiNPCManager.Instance != null)
+ {
+ ConvaiNPCManager.Instance.OnActiveNPCChanged -= HandleActiveNPCChanged;
+ }
+ }
+
+ private void CleanupNetwork()
+ {
+ _udpClient?.Close();
+ _udpClient?.Dispose();
+ _udpClient = null;
+ }
+
+ private void HandleActiveNPCChanged(ConvaiNPC newActiveNPC)
+ {
+ if (!useActiveNPC) return;
+
+ // Cleanup old subscriptions
+ CleanupNPCSubscriptions();
+
+ // Update to new NPC
+ sourceNPC = newActiveNPC;
+ SubscribeToNPCEvents();
+ }
+
+ // Public methods for external control
+ public void SetTargetEndpoint(string ip, int port)
+ {
+ targetIP = ip;
+ targetPort = port;
+ _targetEndPoint = new IPEndPoint(IPAddress.Parse(ip), port);
+ }
+
+ public bool IsSendingSpeech => _isSendingSpeech;
+ public bool IsInitialized => _isInitialized;
+ public ConvaiNPC SourceNPC => sourceNPC;
+
+ // Debug methods
+ public void ShowNetworkStatus()
+ {
+ ConvaiLogger.Info($"=== Speech Sender Status ===", ConvaiLogger.LogCategory.Character);
+ ConvaiLogger.Info($"Target: {targetIP}:{targetPort}", ConvaiLogger.LogCategory.Character);
+ ConvaiLogger.Info($"Initialized: {_isInitialized}", ConvaiLogger.LogCategory.Character);
+ ConvaiLogger.Info($"Sending Speech: {_isSendingSpeech}", ConvaiLogger.LogCategory.Character);
+ ConvaiLogger.Info($"Source NPC: {(sourceNPC?.characterName ?? "None")}", ConvaiLogger.LogCategory.Character);
+ ConvaiLogger.Info($"Packets sent: {_speechSequence}", ConvaiLogger.LogCategory.Character);
+ ConvaiLogger.Info($"Sent clips: {_sentClips.Count}", ConvaiLogger.LogCategory.Character);
+ }
+ }
+}
diff --git a/Unity-Master/Assets/Scripts/Multiplayer/ConvaiUDPSpeechSender.cs.meta b/Unity-Master/Assets/Scripts/Multiplayer/ConvaiUDPSpeechSender.cs.meta
new file mode 100644
index 0000000..d8c06de
--- /dev/null
+++ b/Unity-Master/Assets/Scripts/Multiplayer/ConvaiUDPSpeechSender.cs.meta
@@ -0,0 +1,11 @@
+fileFormatVersion: 2
+guid: f903e03686cf216469fb4bf1e6c027d0
+MonoImporter:
+ externalObjects: {}
+ serializedVersion: 2
+ defaultReferences: []
+ executionOrder: 0
+ icon: {instanceID: 0}
+ userData:
+ assetBundleName:
+ assetBundleVariant: