added multiplayer scripts
This commit is contained in:
8
Unity-Master/Assets/Scripts/Multiplayer.meta
Normal file
8
Unity-Master/Assets/Scripts/Multiplayer.meta
Normal file
@ -0,0 +1,8 @@
|
|||||||
|
fileFormatVersion: 2
|
||||||
|
guid: 7bf18a6dad33703489c04336f7f81a3d
|
||||||
|
folderAsset: yes
|
||||||
|
DefaultImporter:
|
||||||
|
externalObjects: {}
|
||||||
|
userData:
|
||||||
|
assetBundleName:
|
||||||
|
assetBundleVariant:
|
||||||
@ -0,0 +1,364 @@
|
|||||||
|
using System;
|
||||||
|
using System.Collections.Generic;
|
||||||
|
using System.Net;
|
||||||
|
using System.Net.Sockets;
|
||||||
|
using System.Threading;
|
||||||
|
using System.Threading.Tasks;
|
||||||
|
using Convai.Scripts.Runtime.Core;
|
||||||
|
using Convai.Scripts.Runtime.LoggerSystem;
|
||||||
|
using Convai.Scripts.Runtime.Utils;
|
||||||
|
using UnityEngine;
|
||||||
|
|
||||||
|
namespace Convai.Scripts.Runtime.Multiplayer
|
||||||
|
{
|
||||||
|
/// <summary>
|
||||||
|
/// Simple UDP Audio Receiver V2 - Simulates microphone input by triggering normal Convai flow
|
||||||
|
/// This approach is much simpler and more reliable than trying to replicate gRPC calls
|
||||||
|
/// </summary>
|
||||||
|
public class ConvaiSimpleUDPAudioReceiverV2 : MonoBehaviour
|
||||||
|
{
|
||||||
|
[Header("Network Configuration")]
|
||||||
|
[SerializeField] private int listenPort = 12345;
|
||||||
|
[SerializeField] private bool enableDebugLogging = true;
|
||||||
|
|
||||||
|
[Header("NPC Target")]
|
||||||
|
[SerializeField] private bool useActiveNPC = true;
|
||||||
|
[SerializeField] private ConvaiNPC targetNPC;
|
||||||
|
|
||||||
|
// Events
|
||||||
|
public Action<bool> OnAudioReceiving;
|
||||||
|
|
||||||
|
// Network components
|
||||||
|
private UdpClient _udpListener;
|
||||||
|
private IPEndPoint _remoteEndPoint;
|
||||||
|
private bool _isListening = false;
|
||||||
|
private CancellationTokenSource _cancellationTokenSource;
|
||||||
|
|
||||||
|
// Audio state tracking
|
||||||
|
private bool _isReceivingAudio = false;
|
||||||
|
private int _expectedSequence = 0;
|
||||||
|
private const uint MAGIC_NUMBER = 0xC0A1; // Simple magic number for packet validation
|
||||||
|
|
||||||
|
// Timing for auto-stop
|
||||||
|
private float _lastPacketTime;
|
||||||
|
private const float AUTO_STOP_DELAY = 1.0f; // Stop listening after 1 second of no packets
|
||||||
|
|
||||||
|
// Packet structure (matching ConvaiSimpleUDPAudioSender)
|
||||||
|
private struct AudioPacketData
|
||||||
|
{
|
||||||
|
public uint magicNumber;
|
||||||
|
public int sequence;
|
||||||
|
public int sampleCount;
|
||||||
|
public int microphonePosition;
|
||||||
|
public bool isEndSignal;
|
||||||
|
public short[] audioSamples;
|
||||||
|
public long timestamp;
|
||||||
|
}
|
||||||
|
|
||||||
|
private void Start()
|
||||||
|
{
|
||||||
|
_cancellationTokenSource = new CancellationTokenSource();
|
||||||
|
InitializeNetwork();
|
||||||
|
InitializeConvai();
|
||||||
|
|
||||||
|
// Subscribe to NPC manager events to handle late NPC activation
|
||||||
|
if (ConvaiNPCManager.Instance != null)
|
||||||
|
{
|
||||||
|
ConvaiNPCManager.Instance.OnActiveNPCChanged += HandleActiveNPCChanged;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void OnDestroy()
|
||||||
|
{
|
||||||
|
// Unsubscribe from events
|
||||||
|
if (ConvaiNPCManager.Instance != null)
|
||||||
|
{
|
||||||
|
ConvaiNPCManager.Instance.OnActiveNPCChanged -= HandleActiveNPCChanged;
|
||||||
|
}
|
||||||
|
|
||||||
|
StopListening();
|
||||||
|
_cancellationTokenSource?.Cancel();
|
||||||
|
_cancellationTokenSource?.Dispose();
|
||||||
|
}
|
||||||
|
|
||||||
|
private void Update()
|
||||||
|
{
|
||||||
|
// Auto-stop listening if no packets received for a while
|
||||||
|
if (_isReceivingAudio && Time.time - _lastPacketTime > AUTO_STOP_DELAY)
|
||||||
|
{
|
||||||
|
StopTalkingSimulation();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void InitializeNetwork()
|
||||||
|
{
|
||||||
|
try
|
||||||
|
{
|
||||||
|
StartListening();
|
||||||
|
}
|
||||||
|
catch (Exception ex)
|
||||||
|
{
|
||||||
|
ConvaiLogger.Error($"Failed to initialize UDP listener: {ex.Message}", ConvaiLogger.LogCategory.Character);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void InitializeConvai()
|
||||||
|
{
|
||||||
|
// Get target NPC
|
||||||
|
if (useActiveNPC)
|
||||||
|
{
|
||||||
|
targetNPC = ConvaiNPCManager.Instance?.GetActiveConvaiNPC();
|
||||||
|
}
|
||||||
|
|
||||||
|
if (targetNPC == null)
|
||||||
|
{
|
||||||
|
ConvaiLogger.Warn("No target NPC found yet, will wait for NPC to become active", ConvaiLogger.LogCategory.Character);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
ConvaiLogger.Info($"UDP Audio Receiver V2 initialized with NPC: {targetNPC.characterName}", ConvaiLogger.LogCategory.Character);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public void StartListening()
|
||||||
|
{
|
||||||
|
if (_isListening || _cancellationTokenSource == null)
|
||||||
|
return;
|
||||||
|
|
||||||
|
try
|
||||||
|
{
|
||||||
|
_udpListener = new UdpClient(listenPort);
|
||||||
|
_isListening = true;
|
||||||
|
|
||||||
|
ConvaiLogger.Info($"Simple UDP Audio Receiver V2 listening on port {listenPort}", ConvaiLogger.LogCategory.Character);
|
||||||
|
|
||||||
|
// Start listening for incoming packets
|
||||||
|
_ = ListenForAudioPackets(_cancellationTokenSource.Token);
|
||||||
|
}
|
||||||
|
catch (Exception ex)
|
||||||
|
{
|
||||||
|
ConvaiLogger.Error($"Failed to start UDP listener: {ex.Message}", ConvaiLogger.LogCategory.Character);
|
||||||
|
ConvaiLogger.Error($"Stack trace: {ex.StackTrace}", ConvaiLogger.LogCategory.Character);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public void StopListening()
|
||||||
|
{
|
||||||
|
if (!_isListening)
|
||||||
|
return;
|
||||||
|
|
||||||
|
_isListening = false;
|
||||||
|
_udpListener?.Close();
|
||||||
|
_udpListener?.Dispose();
|
||||||
|
_udpListener = null;
|
||||||
|
|
||||||
|
// Stop any ongoing simulation
|
||||||
|
StopTalkingSimulation();
|
||||||
|
|
||||||
|
ConvaiLogger.Info("Stopped UDP Audio Receiver V2", ConvaiLogger.LogCategory.Character);
|
||||||
|
}
|
||||||
|
|
||||||
|
private async Task ListenForAudioPackets(CancellationToken cancellationToken)
|
||||||
|
{
|
||||||
|
try
|
||||||
|
{
|
||||||
|
while (_isListening && !cancellationToken.IsCancellationRequested)
|
||||||
|
{
|
||||||
|
var result = await _udpListener.ReceiveAsync();
|
||||||
|
_remoteEndPoint = result.RemoteEndPoint;
|
||||||
|
|
||||||
|
await ProcessReceivedPacket(result.Buffer, result.RemoteEndPoint);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
catch (ObjectDisposedException)
|
||||||
|
{
|
||||||
|
// Normal when stopping
|
||||||
|
}
|
||||||
|
catch (Exception ex)
|
||||||
|
{
|
||||||
|
ConvaiLogger.Error($"Error in UDP listener: {ex.Message}", ConvaiLogger.LogCategory.Character);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private async Task ProcessReceivedPacket(byte[] data, IPEndPoint sender)
|
||||||
|
{
|
||||||
|
try
|
||||||
|
{
|
||||||
|
var packetData = ParseSimpleAudioPacket(data);
|
||||||
|
|
||||||
|
if (packetData.HasValue)
|
||||||
|
{
|
||||||
|
var packet = packetData.Value;
|
||||||
|
_lastPacketTime = Time.time;
|
||||||
|
|
||||||
|
if (enableDebugLogging)
|
||||||
|
{
|
||||||
|
if (packet.isEndSignal)
|
||||||
|
ConvaiLogger.DebugLog($"Received end signal from {sender}", ConvaiLogger.LogCategory.Character);
|
||||||
|
else
|
||||||
|
ConvaiLogger.DebugLog($"Received audio packet {packet.sequence} with {packet.sampleCount} samples", ConvaiLogger.LogCategory.Character);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (packet.isEndSignal)
|
||||||
|
{
|
||||||
|
StopTalkingSimulation();
|
||||||
|
OnAudioReceiving?.Invoke(false);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
// If this is the first packet, start the talking simulation
|
||||||
|
if (packet.sequence == 0 && !_isReceivingAudio)
|
||||||
|
{
|
||||||
|
StartTalkingSimulation();
|
||||||
|
}
|
||||||
|
|
||||||
|
OnAudioReceiving?.Invoke(true);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
// Not our audio packet format, might be a test message
|
||||||
|
string message = System.Text.Encoding.UTF8.GetString(data);
|
||||||
|
if (enableDebugLogging)
|
||||||
|
ConvaiLogger.Info($"Received test message from {sender}: {message}", ConvaiLogger.LogCategory.Character);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
catch (Exception ex)
|
||||||
|
{
|
||||||
|
ConvaiLogger.Error($"Error processing received packet: {ex.Message}", ConvaiLogger.LogCategory.Character);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void StartTalkingSimulation()
|
||||||
|
{
|
||||||
|
if (_isReceivingAudio) return;
|
||||||
|
|
||||||
|
MainThreadDispatcher.Instance.RunOnMainThread(() => {
|
||||||
|
// Update target NPC if using active NPC
|
||||||
|
if (useActiveNPC)
|
||||||
|
{
|
||||||
|
targetNPC = ConvaiNPCManager.Instance?.GetActiveConvaiNPC();
|
||||||
|
}
|
||||||
|
|
||||||
|
if (targetNPC == null)
|
||||||
|
{
|
||||||
|
ConvaiLogger.Warn("No target NPC available for audio simulation", ConvaiLogger.LogCategory.Character);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
_isReceivingAudio = true;
|
||||||
|
_expectedSequence = 0;
|
||||||
|
|
||||||
|
// This is the KEY! Simulate a talk key press to trigger normal Convai flow
|
||||||
|
ConvaiInputManager.Instance.talkKeyInteract?.Invoke(true);
|
||||||
|
|
||||||
|
ConvaiLogger.Info($"🎤 Started talking simulation for {targetNPC.characterName} (remote player audio)", ConvaiLogger.LogCategory.Character);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
private void StopTalkingSimulation()
|
||||||
|
{
|
||||||
|
if (!_isReceivingAudio) return;
|
||||||
|
|
||||||
|
MainThreadDispatcher.Instance.RunOnMainThread(() => {
|
||||||
|
_isReceivingAudio = false;
|
||||||
|
|
||||||
|
// Simulate talk key release to stop recording
|
||||||
|
ConvaiInputManager.Instance.talkKeyInteract?.Invoke(false);
|
||||||
|
|
||||||
|
ConvaiLogger.Info($"🎤 Stopped talking simulation for {targetNPC?.characterName ?? "NPC"} (remote player audio)", ConvaiLogger.LogCategory.Character);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
private AudioPacketData? ParseSimpleAudioPacket(byte[] data)
|
||||||
|
{
|
||||||
|
if (data.Length < 24) // Minimum header size
|
||||||
|
return null;
|
||||||
|
|
||||||
|
try
|
||||||
|
{
|
||||||
|
int offset = 0;
|
||||||
|
|
||||||
|
// Read magic number
|
||||||
|
uint magic = BitConverter.ToUInt32(data, offset);
|
||||||
|
offset += 4;
|
||||||
|
|
||||||
|
if (magic != MAGIC_NUMBER)
|
||||||
|
return null;
|
||||||
|
|
||||||
|
// Read header
|
||||||
|
int sequence = BitConverter.ToInt32(data, offset);
|
||||||
|
offset += 4;
|
||||||
|
|
||||||
|
int sampleCount = BitConverter.ToInt32(data, offset);
|
||||||
|
offset += 4;
|
||||||
|
|
||||||
|
int microphonePosition = BitConverter.ToInt32(data, offset);
|
||||||
|
offset += 4;
|
||||||
|
|
||||||
|
bool isEndSignal = BitConverter.ToBoolean(data, offset);
|
||||||
|
offset += 1;
|
||||||
|
|
||||||
|
// Skip padding
|
||||||
|
offset += 3;
|
||||||
|
|
||||||
|
long timestamp = BitConverter.ToInt64(data, offset);
|
||||||
|
offset += 8;
|
||||||
|
|
||||||
|
// Read audio data
|
||||||
|
short[] audioSamples = null;
|
||||||
|
if (!isEndSignal && sampleCount > 0)
|
||||||
|
{
|
||||||
|
int audioDataSize = sampleCount * sizeof(short);
|
||||||
|
if (data.Length >= offset + audioDataSize)
|
||||||
|
{
|
||||||
|
audioSamples = new short[sampleCount];
|
||||||
|
Buffer.BlockCopy(data, offset, audioSamples, 0, audioDataSize);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return new AudioPacketData
|
||||||
|
{
|
||||||
|
magicNumber = magic,
|
||||||
|
sequence = sequence,
|
||||||
|
sampleCount = sampleCount,
|
||||||
|
microphonePosition = microphonePosition,
|
||||||
|
isEndSignal = isEndSignal,
|
||||||
|
audioSamples = audioSamples,
|
||||||
|
timestamp = timestamp
|
||||||
|
};
|
||||||
|
}
|
||||||
|
catch (Exception ex)
|
||||||
|
{
|
||||||
|
ConvaiLogger.Error($"Error parsing audio packet: {ex.Message}", ConvaiLogger.LogCategory.Character);
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Event handler for when NPC becomes active
|
||||||
|
private void HandleActiveNPCChanged(ConvaiNPC newActiveNPC)
|
||||||
|
{
|
||||||
|
if (useActiveNPC && newActiveNPC != null)
|
||||||
|
{
|
||||||
|
targetNPC = newActiveNPC;
|
||||||
|
ConvaiLogger.Info($"UDP Audio Receiver V2 updated target NPC to: {targetNPC.characterName}", ConvaiLogger.LogCategory.Character);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Public properties for debugging
|
||||||
|
public bool IsListening => _isListening;
|
||||||
|
public bool IsReceivingAudio => _isReceivingAudio;
|
||||||
|
public ConvaiNPC TargetNPC => targetNPC;
|
||||||
|
|
||||||
|
// Debug methods
|
||||||
|
public void ShowNetworkStatus()
|
||||||
|
{
|
||||||
|
ConvaiLogger.Info($"=== Audio Receiver V2 Status ===", ConvaiLogger.LogCategory.Character);
|
||||||
|
ConvaiLogger.Info($"Listening: {_isListening} on port {listenPort}", ConvaiLogger.LogCategory.Character);
|
||||||
|
ConvaiLogger.Info($"Receiving Audio: {_isReceivingAudio}", ConvaiLogger.LogCategory.Character);
|
||||||
|
ConvaiLogger.Info($"Target NPC: {(targetNPC?.characterName ?? "None")}", ConvaiLogger.LogCategory.Character);
|
||||||
|
ConvaiLogger.Info($"Expected Sequence: {_expectedSequence}", ConvaiLogger.LogCategory.Character);
|
||||||
|
ConvaiLogger.Info($"Last Packet Time: {_lastPacketTime}", ConvaiLogger.LogCategory.Character);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
@ -0,0 +1,11 @@
|
|||||||
|
fileFormatVersion: 2
|
||||||
|
guid: fa35a6fc55fc4ca44b29b3636484bfd2
|
||||||
|
MonoImporter:
|
||||||
|
externalObjects: {}
|
||||||
|
serializedVersion: 2
|
||||||
|
defaultReferences: []
|
||||||
|
executionOrder: 0
|
||||||
|
icon: {instanceID: 0}
|
||||||
|
userData:
|
||||||
|
assetBundleName:
|
||||||
|
assetBundleVariant:
|
||||||
@ -0,0 +1,376 @@
|
|||||||
|
using System;
|
||||||
|
using System.Net;
|
||||||
|
using System.Net.Sockets;
|
||||||
|
using System.Threading;
|
||||||
|
using System.Threading.Tasks;
|
||||||
|
using Convai.Scripts.Runtime.LoggerSystem;
|
||||||
|
using Convai.Scripts.Runtime.UI;
|
||||||
|
using UnityEngine;
|
||||||
|
|
||||||
|
namespace Convai.Scripts.Runtime.Multiplayer
|
||||||
|
{
|
||||||
|
/// <summary>
|
||||||
|
/// Simplified version of UDP Audio Sender that avoids complex chunking
|
||||||
|
/// This version sends smaller, more frequent packets to avoid array bounds issues
|
||||||
|
/// </summary>
|
||||||
|
public class ConvaiSimpleUDPAudioSender : MonoBehaviour
|
||||||
|
{
|
||||||
|
[Header("Network Settings")]
|
||||||
|
[SerializeField] private string targetIP = "127.0.0.1";
|
||||||
|
[SerializeField] private int targetPort = 12345;
|
||||||
|
|
||||||
|
[Header("Audio Settings")]
|
||||||
|
[SerializeField] private int recordingFrequency = 16000;
|
||||||
|
[SerializeField] private int recordingLength = 10;
|
||||||
|
[SerializeField] private int samplesPerPacket = 1024; // Number of audio samples per packet (not bytes)
|
||||||
|
|
||||||
|
[Header("UI")]
|
||||||
|
[SerializeField] private KeyCode talkKey = KeyCode.T;
|
||||||
|
[SerializeField] private bool useHoldToTalk = true;
|
||||||
|
|
||||||
|
[Header("Debug")]
|
||||||
|
[SerializeField] private bool enableDebugLogging = true;
|
||||||
|
[SerializeField] private KeyCode testConnectionKey = KeyCode.C;
|
||||||
|
|
||||||
|
private UdpClient _udpClient;
|
||||||
|
private IPEndPoint _targetEndPoint;
|
||||||
|
private AudioClip _audioClip;
|
||||||
|
private bool _isRecording = false;
|
||||||
|
private CancellationTokenSource _cancellationTokenSource;
|
||||||
|
|
||||||
|
private int _lastMicrophonePosition = 0;
|
||||||
|
private float[] _audioBuffer;
|
||||||
|
private string _selectedMicrophone;
|
||||||
|
private int _packetSequence = 0;
|
||||||
|
|
||||||
|
public event Action<bool> OnRecordingStateChanged;
|
||||||
|
|
||||||
|
private void Start()
|
||||||
|
{
|
||||||
|
InitializeNetwork();
|
||||||
|
InitializeAudio();
|
||||||
|
_cancellationTokenSource = new CancellationTokenSource();
|
||||||
|
}
|
||||||
|
|
||||||
|
private void Update()
|
||||||
|
{
|
||||||
|
HandleInput();
|
||||||
|
}
|
||||||
|
|
||||||
|
private void OnDestroy()
|
||||||
|
{
|
||||||
|
StopRecording();
|
||||||
|
_cancellationTokenSource?.Cancel();
|
||||||
|
_cancellationTokenSource?.Dispose();
|
||||||
|
_udpClient?.Close();
|
||||||
|
}
|
||||||
|
|
||||||
|
private void InitializeNetwork()
|
||||||
|
{
|
||||||
|
try
|
||||||
|
{
|
||||||
|
_udpClient = new UdpClient();
|
||||||
|
_targetEndPoint = new IPEndPoint(IPAddress.Parse(targetIP), targetPort);
|
||||||
|
ConvaiLogger.Info($"Simple UDP Audio Sender initialized. Target: {targetIP}:{targetPort}", ConvaiLogger.LogCategory.Character);
|
||||||
|
}
|
||||||
|
catch (Exception ex)
|
||||||
|
{
|
||||||
|
ConvaiLogger.Error($"Failed to initialize UDP client: {ex.Message}", ConvaiLogger.LogCategory.Character);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void InitializeAudio()
|
||||||
|
{
|
||||||
|
_selectedMicrophone = MicrophoneManager.Instance.SelectedMicrophoneName;
|
||||||
|
_audioBuffer = new float[recordingFrequency * recordingLength];
|
||||||
|
|
||||||
|
if (string.IsNullOrEmpty(_selectedMicrophone))
|
||||||
|
{
|
||||||
|
ConvaiLogger.Error("No microphone selected for UDP audio sender", ConvaiLogger.LogCategory.Character);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void HandleInput()
|
||||||
|
{
|
||||||
|
// Handle talk key
|
||||||
|
if (useHoldToTalk)
|
||||||
|
{
|
||||||
|
if (Input.GetKeyDown(talkKey) && !_isRecording)
|
||||||
|
{
|
||||||
|
StartRecording();
|
||||||
|
}
|
||||||
|
else if (Input.GetKeyUp(talkKey) && _isRecording)
|
||||||
|
{
|
||||||
|
StopRecording();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
if (Input.GetKeyDown(talkKey))
|
||||||
|
{
|
||||||
|
if (_isRecording)
|
||||||
|
StopRecording();
|
||||||
|
else
|
||||||
|
StartRecording();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Handle test connection key
|
||||||
|
if (Input.GetKeyDown(testConnectionKey))
|
||||||
|
{
|
||||||
|
TestConnection();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public void StartRecording()
|
||||||
|
{
|
||||||
|
if (_isRecording || string.IsNullOrEmpty(_selectedMicrophone))
|
||||||
|
return;
|
||||||
|
|
||||||
|
try
|
||||||
|
{
|
||||||
|
_audioClip = Microphone.Start(_selectedMicrophone, false, recordingLength, recordingFrequency);
|
||||||
|
_isRecording = true;
|
||||||
|
_lastMicrophonePosition = 0;
|
||||||
|
_packetSequence = 0;
|
||||||
|
|
||||||
|
ConvaiLogger.Info("Started recording for UDP transmission (Simple)", ConvaiLogger.LogCategory.Character);
|
||||||
|
OnRecordingStateChanged?.Invoke(true);
|
||||||
|
|
||||||
|
// Start continuous audio processing
|
||||||
|
_ = ProcessAudioContinuously(_cancellationTokenSource.Token);
|
||||||
|
}
|
||||||
|
catch (Exception ex)
|
||||||
|
{
|
||||||
|
ConvaiLogger.Error($"Failed to start recording: {ex.Message}", ConvaiLogger.LogCategory.Character);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public void StopRecording()
|
||||||
|
{
|
||||||
|
if (!_isRecording)
|
||||||
|
return;
|
||||||
|
|
||||||
|
try
|
||||||
|
{
|
||||||
|
Microphone.End(_selectedMicrophone);
|
||||||
|
_isRecording = false;
|
||||||
|
|
||||||
|
ConvaiLogger.Info("Stopped recording for UDP transmission (Simple)", ConvaiLogger.LogCategory.Character);
|
||||||
|
OnRecordingStateChanged?.Invoke(false);
|
||||||
|
|
||||||
|
// Send end-of-recording signal
|
||||||
|
SendEndOfRecordingSignal();
|
||||||
|
}
|
||||||
|
catch (Exception ex)
|
||||||
|
{
|
||||||
|
ConvaiLogger.Error($"Failed to stop recording: {ex.Message}", ConvaiLogger.LogCategory.Character);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private async Task ProcessAudioContinuously(CancellationToken cancellationToken)
|
||||||
|
{
|
||||||
|
while (_isRecording && !cancellationToken.IsCancellationRequested)
|
||||||
|
{
|
||||||
|
try
|
||||||
|
{
|
||||||
|
await Task.Delay(100, cancellationToken); // Process every 100ms
|
||||||
|
|
||||||
|
if (_audioClip == null || !Microphone.IsRecording(_selectedMicrophone))
|
||||||
|
break;
|
||||||
|
|
||||||
|
int currentMicrophonePosition = Microphone.GetPosition(_selectedMicrophone);
|
||||||
|
int audioDataLength = currentMicrophonePosition - _lastMicrophonePosition;
|
||||||
|
|
||||||
|
if (audioDataLength > 0)
|
||||||
|
{
|
||||||
|
// Get audio data from the microphone clip
|
||||||
|
_audioClip.GetData(_audioBuffer, _lastMicrophonePosition);
|
||||||
|
|
||||||
|
// Send data in smaller chunks to avoid array bounds issues
|
||||||
|
await SendAudioDataInChunks(_audioBuffer, audioDataLength);
|
||||||
|
|
||||||
|
_lastMicrophonePosition = currentMicrophonePosition;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
catch (Exception ex) when (!(ex is OperationCanceledException))
|
||||||
|
{
|
||||||
|
ConvaiLogger.Error($"Error in audio processing: {ex.Message}", ConvaiLogger.LogCategory.Character);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private async Task SendAudioDataInChunks(float[] audioData, int totalSamples)
|
||||||
|
{
|
||||||
|
int processedSamples = 0;
|
||||||
|
|
||||||
|
while (processedSamples < totalSamples)
|
||||||
|
{
|
||||||
|
try
|
||||||
|
{
|
||||||
|
int remainingSamples = totalSamples - processedSamples;
|
||||||
|
int currentChunkSamples = Mathf.Min(samplesPerPacket, remainingSamples);
|
||||||
|
|
||||||
|
// Create a simple packet structure
|
||||||
|
byte[] packet = CreateSimpleAudioPacket(audioData, processedSamples, currentChunkSamples);
|
||||||
|
|
||||||
|
// Send the packet
|
||||||
|
await _udpClient.SendAsync(packet, packet.Length, _targetEndPoint);
|
||||||
|
|
||||||
|
if (enableDebugLogging && _packetSequence % 10 == 0) // Log every 10th packet
|
||||||
|
{
|
||||||
|
ConvaiLogger.DebugLog($"Sent packet {_packetSequence} with {currentChunkSamples} samples", ConvaiLogger.LogCategory.Character);
|
||||||
|
}
|
||||||
|
|
||||||
|
processedSamples += currentChunkSamples;
|
||||||
|
_packetSequence++;
|
||||||
|
|
||||||
|
// Small delay to avoid overwhelming the network
|
||||||
|
await Task.Delay(10);
|
||||||
|
}
|
||||||
|
catch (Exception ex)
|
||||||
|
{
|
||||||
|
ConvaiLogger.Error($"Failed to send audio chunk: {ex.Message}", ConvaiLogger.LogCategory.Character);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private byte[] CreateSimpleAudioPacket(float[] audioData, int startIndex, int sampleCount)
|
||||||
|
{
|
||||||
|
// Simple packet structure:
|
||||||
|
// 4 bytes: Magic number (0xC0A1)
|
||||||
|
// 4 bytes: Packet sequence number
|
||||||
|
// 4 bytes: Sample count in this packet
|
||||||
|
// 4 bytes: Start position in stream
|
||||||
|
// 1 byte: Flags (0 = normal audio, 1 = end of recording)
|
||||||
|
// N bytes: Audio data (converted to shorts)
|
||||||
|
|
||||||
|
int headerSize = 17; // 4 + 4 + 4 + 4 + 1
|
||||||
|
int audioDataSize = sampleCount * sizeof(short);
|
||||||
|
byte[] packet = new byte[headerSize + audioDataSize];
|
||||||
|
|
||||||
|
int offset = 0;
|
||||||
|
|
||||||
|
// Magic number
|
||||||
|
BitConverter.GetBytes((uint)0xC0A1).CopyTo(packet, offset);
|
||||||
|
offset += 4;
|
||||||
|
|
||||||
|
// Packet sequence
|
||||||
|
BitConverter.GetBytes(_packetSequence).CopyTo(packet, offset);
|
||||||
|
offset += 4;
|
||||||
|
|
||||||
|
// Sample count
|
||||||
|
BitConverter.GetBytes(sampleCount).CopyTo(packet, offset);
|
||||||
|
offset += 4;
|
||||||
|
|
||||||
|
// Start position
|
||||||
|
BitConverter.GetBytes(_lastMicrophonePosition + startIndex).CopyTo(packet, offset);
|
||||||
|
offset += 4;
|
||||||
|
|
||||||
|
// Flags (0 for normal audio)
|
||||||
|
packet[offset] = 0;
|
||||||
|
offset += 1;
|
||||||
|
|
||||||
|
// Convert audio samples to bytes (same as Convai approach)
|
||||||
|
for (int i = 0; i < sampleCount; i++)
|
||||||
|
{
|
||||||
|
float sample = audioData[startIndex + i];
|
||||||
|
short shortSample = (short)(sample * short.MaxValue);
|
||||||
|
byte[] shortBytes = BitConverter.GetBytes(shortSample);
|
||||||
|
packet[offset] = shortBytes[0];
|
||||||
|
packet[offset + 1] = shortBytes[1];
|
||||||
|
offset += 2;
|
||||||
|
}
|
||||||
|
|
||||||
|
return packet;
|
||||||
|
}
|
||||||
|
|
||||||
|
private void SendEndOfRecordingSignal()
|
||||||
|
{
|
||||||
|
try
|
||||||
|
{
|
||||||
|
// Create end packet
|
||||||
|
byte[] packet = new byte[17]; // Header only, no audio data
|
||||||
|
int offset = 0;
|
||||||
|
|
||||||
|
// Magic number
|
||||||
|
BitConverter.GetBytes((uint)0xC0A1).CopyTo(packet, offset);
|
||||||
|
offset += 4;
|
||||||
|
|
||||||
|
// Packet sequence
|
||||||
|
BitConverter.GetBytes(_packetSequence).CopyTo(packet, offset);
|
||||||
|
offset += 4;
|
||||||
|
|
||||||
|
// Sample count (0 for end signal)
|
||||||
|
BitConverter.GetBytes(0).CopyTo(packet, offset);
|
||||||
|
offset += 4;
|
||||||
|
|
||||||
|
// Start position
|
||||||
|
BitConverter.GetBytes(_lastMicrophonePosition).CopyTo(packet, offset);
|
||||||
|
offset += 4;
|
||||||
|
|
||||||
|
// Flags (1 for end of recording)
|
||||||
|
packet[offset] = 1;
|
||||||
|
|
||||||
|
_udpClient.SendAsync(packet, packet.Length, _targetEndPoint);
|
||||||
|
}
|
||||||
|
catch (Exception ex)
|
||||||
|
{
|
||||||
|
ConvaiLogger.Error($"Failed to send end signal: {ex.Message}", ConvaiLogger.LogCategory.Character);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Public methods for external control
|
||||||
|
public void SetTargetEndpoint(string ip, int port)
|
||||||
|
{
|
||||||
|
targetIP = ip;
|
||||||
|
targetPort = port;
|
||||||
|
_targetEndPoint = new IPEndPoint(IPAddress.Parse(ip), port);
|
||||||
|
}
|
||||||
|
|
||||||
|
public bool IsRecording => _isRecording;
|
||||||
|
|
||||||
|
// Debug and testing methods
|
||||||
|
public async void TestConnection()
|
||||||
|
{
|
||||||
|
if (_udpClient == null)
|
||||||
|
{
|
||||||
|
ConvaiLogger.Error("UDP client not initialized", ConvaiLogger.LogCategory.Character);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
try
|
||||||
|
{
|
||||||
|
ConvaiLogger.Info($"Testing connection to {targetIP}:{targetPort}", ConvaiLogger.LogCategory.Character);
|
||||||
|
|
||||||
|
// Send a simple test packet
|
||||||
|
string testMessage = "CONVAI_TEST_CONNECTION";
|
||||||
|
byte[] testData = System.Text.Encoding.UTF8.GetBytes(testMessage);
|
||||||
|
|
||||||
|
await _udpClient.SendAsync(testData, testData.Length, _targetEndPoint);
|
||||||
|
ConvaiLogger.Info("Test packet sent successfully", ConvaiLogger.LogCategory.Character);
|
||||||
|
}
|
||||||
|
catch (Exception ex)
|
||||||
|
{
|
||||||
|
ConvaiLogger.Error($"Connection test failed: {ex.Message}", ConvaiLogger.LogCategory.Character);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public void ShowNetworkStatus()
|
||||||
|
{
|
||||||
|
ConvaiLogger.Info($"=== Network Status ===", ConvaiLogger.LogCategory.Character);
|
||||||
|
ConvaiLogger.Info($"Target: {targetIP}:{targetPort}", ConvaiLogger.LogCategory.Character);
|
||||||
|
ConvaiLogger.Info($"UDP Client: {(_udpClient != null ? "Initialized" : "Not initialized")}", ConvaiLogger.LogCategory.Character);
|
||||||
|
ConvaiLogger.Info($"Recording: {_isRecording}", ConvaiLogger.LogCategory.Character);
|
||||||
|
ConvaiLogger.Info($"Microphone: {_selectedMicrophone}", ConvaiLogger.LogCategory.Character);
|
||||||
|
ConvaiLogger.Info($"Packets sent: {_packetSequence}", ConvaiLogger.LogCategory.Character);
|
||||||
|
|
||||||
|
if (_udpClient?.Client?.LocalEndPoint != null)
|
||||||
|
{
|
||||||
|
ConvaiLogger.Info($"Local endpoint: {_udpClient.Client.LocalEndPoint}", ConvaiLogger.LogCategory.Character);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
@ -0,0 +1,11 @@
|
|||||||
|
fileFormatVersion: 2
|
||||||
|
guid: fa5cc94311721d04f8e8821151ffb737
|
||||||
|
MonoImporter:
|
||||||
|
externalObjects: {}
|
||||||
|
serializedVersion: 2
|
||||||
|
defaultReferences: []
|
||||||
|
executionOrder: 0
|
||||||
|
icon: {instanceID: 0}
|
||||||
|
userData:
|
||||||
|
assetBundleName:
|
||||||
|
assetBundleVariant:
|
||||||
@ -0,0 +1,639 @@
|
|||||||
|
using System;
|
||||||
|
using System.Collections.Generic;
|
||||||
|
using System.Net;
|
||||||
|
using System.Net.Sockets;
|
||||||
|
using System.Threading;
|
||||||
|
using System.Threading.Tasks;
|
||||||
|
using Convai.Scripts.Runtime.LoggerSystem;
|
||||||
|
using Convai.Scripts.Runtime.Utils;
|
||||||
|
using UnityEngine;
|
||||||
|
|
||||||
|
namespace Convai.Scripts.Runtime.Multiplayer
|
||||||
|
{
|
||||||
|
/// <summary>
|
||||||
|
/// UDP Speech Receiver - Receives high-quality Convai speech with proper buffering
|
||||||
|
/// This version reconstructs the original AudioClip objects for seamless playback
|
||||||
|
/// </summary>
|
||||||
|
public class ConvaiUDPSpeechReceiver : MonoBehaviour
|
||||||
|
{
|
||||||
|
[Header("Network Configuration")]
|
||||||
|
[SerializeField] private int listenPort = 12346;
|
||||||
|
[SerializeField] private bool enableDebugLogging = true;
|
||||||
|
|
||||||
|
[Header("Audio Playback")]
|
||||||
|
[SerializeField] private AudioSource speechAudioSource;
|
||||||
|
[SerializeField] private bool createAudioSourceIfMissing = true;
|
||||||
|
[SerializeField] private float audioVolume = 1.0f;
|
||||||
|
[SerializeField] private bool spatialAudio = false;
|
||||||
|
|
||||||
|
[Header("UI")]
|
||||||
|
[SerializeField] private bool showTranscripts = true;
|
||||||
|
|
||||||
|
// Network components
|
||||||
|
private UdpClient _udpListener;
|
||||||
|
private IPEndPoint _remoteEndPoint;
|
||||||
|
private bool _isListening = false;
|
||||||
|
private CancellationTokenSource _cancellationTokenSource;
|
||||||
|
|
||||||
|
// Audio reconstruction
|
||||||
|
private Dictionary<int, IncomingAudioClip> _incomingClips = new Dictionary<int, IncomingAudioClip>();
|
||||||
|
private Queue<ReconstructedAudioClip> _playbackQueue = new Queue<ReconstructedAudioClip>();
|
||||||
|
private bool _isPlayingSequence = false;
|
||||||
|
private int _currentSequence = 0;
|
||||||
|
|
||||||
|
// Packet constants (matching sender V3)
|
||||||
|
private const uint MAGIC_NUMBER = 0xC0A3;
|
||||||
|
private const byte PACKET_TYPE_AUDIO_START = 0x01;
|
||||||
|
private const byte PACKET_TYPE_AUDIO_CHUNK = 0x02;
|
||||||
|
private const byte PACKET_TYPE_AUDIO_END = 0x03;
|
||||||
|
private const byte PACKET_TYPE_TRANSCRIPT = 0x04;
|
||||||
|
private const byte PACKET_TYPE_FINAL = 0x05;
|
||||||
|
|
||||||
|
// Events
|
||||||
|
public Action<bool> OnSpeechReceiving;
|
||||||
|
public Action<string> OnTranscriptReceived;
|
||||||
|
public Action<AudioClip> OnAudioClipReceived;
|
||||||
|
|
||||||
|
// Data structures
|
||||||
|
private struct SpeechPacket
|
||||||
|
{
|
||||||
|
public uint magicNumber;
|
||||||
|
public byte packetType;
|
||||||
|
public int sequence;
|
||||||
|
public int totalSamples;
|
||||||
|
public int sampleRate;
|
||||||
|
public int channels;
|
||||||
|
public int startSample;
|
||||||
|
public int chunkSampleCount;
|
||||||
|
public short[] audioSamples;
|
||||||
|
public string transcript;
|
||||||
|
}
|
||||||
|
|
||||||
|
private class IncomingAudioClip
|
||||||
|
{
|
||||||
|
public int totalSamples;
|
||||||
|
public int sampleRate;
|
||||||
|
public int channels;
|
||||||
|
public string transcript;
|
||||||
|
public float[] audioData;
|
||||||
|
public bool isComplete;
|
||||||
|
public bool hasStart;
|
||||||
|
public bool hasEnd;
|
||||||
|
public int receivedSamples;
|
||||||
|
|
||||||
|
public IncomingAudioClip(int totalSamples, int sampleRate, int channels, string transcript)
|
||||||
|
{
|
||||||
|
this.totalSamples = totalSamples;
|
||||||
|
this.sampleRate = sampleRate;
|
||||||
|
this.channels = channels;
|
||||||
|
this.transcript = transcript;
|
||||||
|
this.audioData = new float[totalSamples];
|
||||||
|
this.isComplete = false;
|
||||||
|
this.hasStart = false;
|
||||||
|
this.hasEnd = false;
|
||||||
|
this.receivedSamples = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private struct ReconstructedAudioClip
|
||||||
|
{
|
||||||
|
public AudioClip audioClip;
|
||||||
|
public string transcript;
|
||||||
|
public bool isFinal;
|
||||||
|
}
|
||||||
|
|
||||||
|
private void Start()
|
||||||
|
{
|
||||||
|
_cancellationTokenSource = new CancellationTokenSource();
|
||||||
|
InitializeAudio();
|
||||||
|
InitializeNetwork();
|
||||||
|
}
|
||||||
|
|
||||||
|
private void OnDestroy()
|
||||||
|
{
|
||||||
|
StopListening();
|
||||||
|
_cancellationTokenSource?.Cancel();
|
||||||
|
_cancellationTokenSource?.Dispose();
|
||||||
|
}
|
||||||
|
|
||||||
|
private void Update()
|
||||||
|
{
|
||||||
|
// Process playback queue
|
||||||
|
ProcessPlaybackQueue();
|
||||||
|
}
|
||||||
|
|
||||||
|
private void InitializeAudio()
|
||||||
|
{
|
||||||
|
if (speechAudioSource == null)
|
||||||
|
{
|
||||||
|
speechAudioSource = GetComponent<AudioSource>();
|
||||||
|
|
||||||
|
if (speechAudioSource == null && createAudioSourceIfMissing)
|
||||||
|
{
|
||||||
|
speechAudioSource = gameObject.AddComponent<AudioSource>();
|
||||||
|
ConvaiLogger.Info("Created AudioSource for speech playback", ConvaiLogger.LogCategory.Character);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (speechAudioSource != null)
|
||||||
|
{
|
||||||
|
speechAudioSource.volume = audioVolume;
|
||||||
|
speechAudioSource.playOnAwake = false;
|
||||||
|
speechAudioSource.spatialBlend = spatialAudio ? 1.0f : 0.0f;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
ConvaiLogger.Error("No AudioSource available for speech playback", ConvaiLogger.LogCategory.Character);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void InitializeNetwork()
|
||||||
|
{
|
||||||
|
try
|
||||||
|
{
|
||||||
|
StartListening();
|
||||||
|
}
|
||||||
|
catch (Exception ex)
|
||||||
|
{
|
||||||
|
ConvaiLogger.Error($"Failed to initialize UDP speech receiver: {ex.Message}", ConvaiLogger.LogCategory.Character);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public void StartListening()
|
||||||
|
{
|
||||||
|
if (_isListening || _cancellationTokenSource == null)
|
||||||
|
return;
|
||||||
|
|
||||||
|
try
|
||||||
|
{
|
||||||
|
_udpListener = new UdpClient(listenPort);
|
||||||
|
_isListening = true;
|
||||||
|
|
||||||
|
ConvaiLogger.Info($"UDP Speech Receiver listening on port {listenPort}", ConvaiLogger.LogCategory.Character);
|
||||||
|
|
||||||
|
// Start listening for incoming packets
|
||||||
|
_ = ListenForSpeechPackets(_cancellationTokenSource.Token);
|
||||||
|
}
|
||||||
|
catch (Exception ex)
|
||||||
|
{
|
||||||
|
ConvaiLogger.Error($"Failed to start UDP speech receiver: {ex.Message}", ConvaiLogger.LogCategory.Character);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public void StopListening()
|
||||||
|
{
|
||||||
|
if (!_isListening)
|
||||||
|
return;
|
||||||
|
|
||||||
|
_isListening = false;
|
||||||
|
_udpListener?.Close();
|
||||||
|
_udpListener?.Dispose();
|
||||||
|
_udpListener = null;
|
||||||
|
|
||||||
|
// Stop any ongoing playback
|
||||||
|
StopSpeechPlayback();
|
||||||
|
|
||||||
|
ConvaiLogger.Info("Stopped UDP Speech Receiver", ConvaiLogger.LogCategory.Character);
|
||||||
|
}
|
||||||
|
|
||||||
|
private async Task ListenForSpeechPackets(CancellationToken cancellationToken)
|
||||||
|
{
|
||||||
|
try
|
||||||
|
{
|
||||||
|
while (_isListening && !cancellationToken.IsCancellationRequested)
|
||||||
|
{
|
||||||
|
var result = await _udpListener.ReceiveAsync();
|
||||||
|
_remoteEndPoint = result.RemoteEndPoint;
|
||||||
|
|
||||||
|
await ProcessReceivedPacket(result.Buffer, result.RemoteEndPoint);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
catch (ObjectDisposedException)
|
||||||
|
{
|
||||||
|
// Normal when stopping
|
||||||
|
}
|
||||||
|
catch (Exception ex)
|
||||||
|
{
|
||||||
|
ConvaiLogger.Error($"Error in UDP speech listener: {ex.Message}", ConvaiLogger.LogCategory.Character);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private async Task ProcessReceivedPacket(byte[] data, IPEndPoint sender)
|
||||||
|
{
|
||||||
|
try
|
||||||
|
{
|
||||||
|
var packetData = ParseSpeechPacket(data);
|
||||||
|
|
||||||
|
if (packetData.HasValue)
|
||||||
|
{
|
||||||
|
var packet = packetData.Value;
|
||||||
|
|
||||||
|
if (enableDebugLogging)
|
||||||
|
{
|
||||||
|
string typeStr = packet.packetType switch
|
||||||
|
{
|
||||||
|
PACKET_TYPE_AUDIO_START => "start",
|
||||||
|
PACKET_TYPE_AUDIO_CHUNK => "chunk",
|
||||||
|
PACKET_TYPE_AUDIO_END => "end",
|
||||||
|
PACKET_TYPE_TRANSCRIPT => "transcript",
|
||||||
|
PACKET_TYPE_FINAL => "final",
|
||||||
|
_ => "unknown"
|
||||||
|
};
|
||||||
|
ConvaiLogger.DebugLog($"📥 Received {typeStr} packet {packet.sequence} from {sender}", ConvaiLogger.LogCategory.Character);
|
||||||
|
}
|
||||||
|
|
||||||
|
switch (packet.packetType)
|
||||||
|
{
|
||||||
|
case PACKET_TYPE_AUDIO_START:
|
||||||
|
HandleAudioStartPacket(packet);
|
||||||
|
break;
|
||||||
|
|
||||||
|
case PACKET_TYPE_AUDIO_CHUNK:
|
||||||
|
HandleAudioChunkPacket(packet);
|
||||||
|
break;
|
||||||
|
|
||||||
|
case PACKET_TYPE_AUDIO_END:
|
||||||
|
HandleAudioEndPacket(packet);
|
||||||
|
break;
|
||||||
|
|
||||||
|
case PACKET_TYPE_TRANSCRIPT:
|
||||||
|
HandleTranscriptPacket(packet);
|
||||||
|
break;
|
||||||
|
|
||||||
|
case PACKET_TYPE_FINAL:
|
||||||
|
HandleFinalPacket();
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
if (enableDebugLogging)
|
||||||
|
{
|
||||||
|
// Check if it's a different magic number
|
||||||
|
if (data.Length >= 4)
|
||||||
|
{
|
||||||
|
uint receivedMagic = BitConverter.ToUInt32(data, 0);
|
||||||
|
ConvaiLogger.Warn($"❌ Invalid speech packet from {sender}. Expected magic: 0x{MAGIC_NUMBER:X}, Got: 0x{receivedMagic:X}", ConvaiLogger.LogCategory.Character);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
ConvaiLogger.Warn($"❌ Packet too small from {sender}: {data.Length} bytes", ConvaiLogger.LogCategory.Character);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
catch (Exception ex)
|
||||||
|
{
|
||||||
|
ConvaiLogger.Error($"Error processing speech packet: {ex.Message}", ConvaiLogger.LogCategory.Character);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void HandleAudioStartPacket(SpeechPacket packet)
|
||||||
|
{
|
||||||
|
// Start new speech sequence if this is the first start packet
|
||||||
|
if (packet.sequence == 0 && !_isPlayingSequence)
|
||||||
|
{
|
||||||
|
StartSpeechReception();
|
||||||
|
}
|
||||||
|
|
||||||
|
// Create new incoming audio clip
|
||||||
|
var incomingClip = new IncomingAudioClip(packet.totalSamples, packet.sampleRate, packet.channels, packet.transcript);
|
||||||
|
incomingClip.hasStart = true;
|
||||||
|
|
||||||
|
_incomingClips[packet.sequence] = incomingClip;
|
||||||
|
|
||||||
|
if (enableDebugLogging)
|
||||||
|
ConvaiLogger.DebugLog($"🎵 Started receiving audio clip {packet.sequence}: {packet.totalSamples} samples, '{packet.transcript}'", ConvaiLogger.LogCategory.Character);
|
||||||
|
}
|
||||||
|
|
||||||
|
private void HandleAudioChunkPacket(SpeechPacket packet)
|
||||||
|
{
|
||||||
|
if (!_incomingClips.ContainsKey(packet.sequence)) return;
|
||||||
|
|
||||||
|
var incomingClip = _incomingClips[packet.sequence];
|
||||||
|
|
||||||
|
// Convert short samples back to float and copy to the correct position
|
||||||
|
if (packet.audioSamples != null && packet.startSample + packet.chunkSampleCount <= incomingClip.totalSamples)
|
||||||
|
{
|
||||||
|
for (int i = 0; i < packet.chunkSampleCount; i++)
|
||||||
|
{
|
||||||
|
int targetIndex = packet.startSample + i;
|
||||||
|
if (targetIndex < incomingClip.audioData.Length)
|
||||||
|
{
|
||||||
|
incomingClip.audioData[targetIndex] = packet.audioSamples[i] / (float)short.MaxValue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
incomingClip.receivedSamples += packet.chunkSampleCount;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void HandleAudioEndPacket(SpeechPacket packet)
|
||||||
|
{
|
||||||
|
if (!_incomingClips.ContainsKey(packet.sequence)) return;
|
||||||
|
|
||||||
|
var incomingClip = _incomingClips[packet.sequence];
|
||||||
|
incomingClip.hasEnd = true;
|
||||||
|
|
||||||
|
// Check if the clip is complete (has start, end, and all samples)
|
||||||
|
if (incomingClip.hasStart && incomingClip.hasEnd)
|
||||||
|
{
|
||||||
|
incomingClip.isComplete = true;
|
||||||
|
|
||||||
|
// Create the AudioClip
|
||||||
|
CreateAndQueueAudioClip(incomingClip, packet.sequence);
|
||||||
|
|
||||||
|
// Remove from incoming clips
|
||||||
|
_incomingClips.Remove(packet.sequence);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void HandleTranscriptPacket(SpeechPacket packet)
|
||||||
|
{
|
||||||
|
if (showTranscripts && !string.IsNullOrEmpty(packet.transcript))
|
||||||
|
{
|
||||||
|
MainThreadDispatcher.Instance.RunOnMainThread(() => {
|
||||||
|
OnTranscriptReceived?.Invoke(packet.transcript);
|
||||||
|
|
||||||
|
if (enableDebugLogging)
|
||||||
|
ConvaiLogger.Info($"📝 Remote NPC said: '{packet.transcript}'", ConvaiLogger.LogCategory.Character);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void HandleFinalPacket()
|
||||||
|
{
|
||||||
|
// Process any remaining incomplete clips
|
||||||
|
ProcessIncompleteClips();
|
||||||
|
|
||||||
|
// Add final marker to queue
|
||||||
|
_playbackQueue.Enqueue(new ReconstructedAudioClip
|
||||||
|
{
|
||||||
|
audioClip = null,
|
||||||
|
transcript = "",
|
||||||
|
isFinal = true
|
||||||
|
});
|
||||||
|
|
||||||
|
StopSpeechReception();
|
||||||
|
}
|
||||||
|
|
||||||
|
private void ProcessIncompleteClips()
|
||||||
|
{
|
||||||
|
// Try to create AudioClips from any clips that might be mostly complete
|
||||||
|
var keysToRemove = new List<int>();
|
||||||
|
|
||||||
|
foreach (var kvp in _incomingClips)
|
||||||
|
{
|
||||||
|
var incomingClip = kvp.Value;
|
||||||
|
|
||||||
|
// If we received a reasonable amount of data, try to create the clip
|
||||||
|
if (incomingClip.receivedSamples > incomingClip.totalSamples * 0.8f) // 80% received
|
||||||
|
{
|
||||||
|
CreateAndQueueAudioClip(incomingClip, kvp.Key);
|
||||||
|
keysToRemove.Add(kvp.Key);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
foreach (var key in keysToRemove)
|
||||||
|
{
|
||||||
|
_incomingClips.Remove(key);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void CreateAndQueueAudioClip(IncomingAudioClip incomingClip, int sequence)
|
||||||
|
{
|
||||||
|
try
|
||||||
|
{
|
||||||
|
// Create AudioClip
|
||||||
|
AudioClip clip = AudioClip.Create($"RemoteSpeech_{sequence}",
|
||||||
|
incomingClip.totalSamples, incomingClip.channels, incomingClip.sampleRate, false);
|
||||||
|
clip.SetData(incomingClip.audioData, 0);
|
||||||
|
|
||||||
|
// Queue for playback
|
||||||
|
_playbackQueue.Enqueue(new ReconstructedAudioClip
|
||||||
|
{
|
||||||
|
audioClip = clip,
|
||||||
|
transcript = incomingClip.transcript,
|
||||||
|
isFinal = false
|
||||||
|
});
|
||||||
|
|
||||||
|
OnAudioClipReceived?.Invoke(clip);
|
||||||
|
|
||||||
|
if (enableDebugLogging)
|
||||||
|
ConvaiLogger.DebugLog($"✅ Reconstructed audio clip {sequence}: {clip.length:F2}s, '{incomingClip.transcript}'", ConvaiLogger.LogCategory.Character);
|
||||||
|
}
|
||||||
|
catch (Exception ex)
|
||||||
|
{
|
||||||
|
ConvaiLogger.Error($"Failed to create audio clip from sequence {sequence}: {ex.Message}", ConvaiLogger.LogCategory.Character);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void ProcessPlaybackQueue()
|
||||||
|
{
|
||||||
|
// If not currently playing and we have queued clips, start playing
|
||||||
|
if (!_isPlayingSequence && _playbackQueue.Count > 0 && speechAudioSource != null)
|
||||||
|
{
|
||||||
|
PlayNextAudioClip();
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check if current clip finished playing
|
||||||
|
if (_isPlayingSequence && speechAudioSource != null && !speechAudioSource.isPlaying)
|
||||||
|
{
|
||||||
|
// Current clip finished, play next one if available
|
||||||
|
if (_playbackQueue.Count > 0)
|
||||||
|
{
|
||||||
|
PlayNextAudioClip();
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
_isPlayingSequence = false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void PlayNextAudioClip()
|
||||||
|
{
|
||||||
|
if (_playbackQueue.Count == 0 || speechAudioSource == null) return;
|
||||||
|
|
||||||
|
var reconstructedClip = _playbackQueue.Dequeue();
|
||||||
|
|
||||||
|
if (reconstructedClip.isFinal)
|
||||||
|
{
|
||||||
|
_isPlayingSequence = false;
|
||||||
|
ConvaiLogger.Info("🔊 Finished playing remote speech sequence", ConvaiLogger.LogCategory.Character);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (reconstructedClip.audioClip != null)
|
||||||
|
{
|
||||||
|
speechAudioSource.clip = reconstructedClip.audioClip;
|
||||||
|
speechAudioSource.Play();
|
||||||
|
_isPlayingSequence = true;
|
||||||
|
|
||||||
|
if (enableDebugLogging)
|
||||||
|
ConvaiLogger.DebugLog($"🔊 Playing remote speech: {reconstructedClip.audioClip.length:F2}s, '{reconstructedClip.transcript}'", ConvaiLogger.LogCategory.Character);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void StartSpeechReception()
|
||||||
|
{
|
||||||
|
_isPlayingSequence = false;
|
||||||
|
_currentSequence = 0;
|
||||||
|
_incomingClips.Clear();
|
||||||
|
_playbackQueue.Clear();
|
||||||
|
|
||||||
|
OnSpeechReceiving?.Invoke(true);
|
||||||
|
|
||||||
|
ConvaiLogger.Info("🔊 Started receiving remote NPC speech", ConvaiLogger.LogCategory.Character);
|
||||||
|
}
|
||||||
|
|
||||||
|
private void StopSpeechReception()
|
||||||
|
{
|
||||||
|
OnSpeechReceiving?.Invoke(false);
|
||||||
|
|
||||||
|
ConvaiLogger.Info("🔊 Stopped receiving remote NPC speech", ConvaiLogger.LogCategory.Character);
|
||||||
|
}
|
||||||
|
|
||||||
|
private void StopSpeechPlayback()
|
||||||
|
{
|
||||||
|
if (speechAudioSource != null && speechAudioSource.isPlaying)
|
||||||
|
{
|
||||||
|
speechAudioSource.Stop();
|
||||||
|
}
|
||||||
|
|
||||||
|
_isPlayingSequence = false;
|
||||||
|
_playbackQueue.Clear();
|
||||||
|
_incomingClips.Clear();
|
||||||
|
}
|
||||||
|
|
||||||
|
private SpeechPacket? ParseSpeechPacket(byte[] data)
|
||||||
|
{
|
||||||
|
if (data.Length < 13) // Minimum header size
|
||||||
|
return null;
|
||||||
|
|
||||||
|
try
|
||||||
|
{
|
||||||
|
int offset = 0;
|
||||||
|
|
||||||
|
// Read magic number
|
||||||
|
uint magic = BitConverter.ToUInt32(data, offset);
|
||||||
|
offset += 4;
|
||||||
|
|
||||||
|
if (magic != MAGIC_NUMBER)
|
||||||
|
return null;
|
||||||
|
|
||||||
|
// Read packet type
|
||||||
|
byte packetType = data[offset];
|
||||||
|
offset += 1;
|
||||||
|
|
||||||
|
// Read sequence
|
||||||
|
int sequence = BitConverter.ToInt32(data, offset);
|
||||||
|
offset += 4;
|
||||||
|
|
||||||
|
var packet = new SpeechPacket
|
||||||
|
{
|
||||||
|
magicNumber = magic,
|
||||||
|
packetType = packetType,
|
||||||
|
sequence = sequence
|
||||||
|
};
|
||||||
|
|
||||||
|
// Parse based on packet type
|
||||||
|
switch (packetType)
|
||||||
|
{
|
||||||
|
case PACKET_TYPE_AUDIO_START:
|
||||||
|
if (data.Length < offset + 16) return null; // Need additional fields
|
||||||
|
|
||||||
|
packet.totalSamples = BitConverter.ToInt32(data, offset);
|
||||||
|
offset += 4;
|
||||||
|
|
||||||
|
packet.sampleRate = BitConverter.ToInt32(data, offset);
|
||||||
|
offset += 4;
|
||||||
|
|
||||||
|
packet.channels = BitConverter.ToInt32(data, offset);
|
||||||
|
offset += 4;
|
||||||
|
|
||||||
|
int transcriptLength = BitConverter.ToInt32(data, offset);
|
||||||
|
offset += 4;
|
||||||
|
|
||||||
|
if (transcriptLength > 0 && data.Length >= offset + transcriptLength)
|
||||||
|
{
|
||||||
|
packet.transcript = System.Text.Encoding.UTF8.GetString(data, offset, transcriptLength);
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
|
||||||
|
case PACKET_TYPE_AUDIO_CHUNK:
|
||||||
|
if (data.Length < offset + 8) return null; // Need start sample + count
|
||||||
|
|
||||||
|
packet.startSample = BitConverter.ToInt32(data, offset);
|
||||||
|
offset += 4;
|
||||||
|
|
||||||
|
packet.chunkSampleCount = BitConverter.ToInt32(data, offset);
|
||||||
|
offset += 4;
|
||||||
|
|
||||||
|
// Read audio data
|
||||||
|
if (packet.chunkSampleCount > 0 && data.Length >= offset + packet.chunkSampleCount * 2)
|
||||||
|
{
|
||||||
|
packet.audioSamples = new short[packet.chunkSampleCount];
|
||||||
|
for (int i = 0; i < packet.chunkSampleCount; i++)
|
||||||
|
{
|
||||||
|
packet.audioSamples[i] = BitConverter.ToInt16(data, offset);
|
||||||
|
offset += 2;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
|
||||||
|
case PACKET_TYPE_AUDIO_END:
|
||||||
|
case PACKET_TYPE_FINAL:
|
||||||
|
// These packets have no additional data beyond the header
|
||||||
|
break;
|
||||||
|
|
||||||
|
case PACKET_TYPE_TRANSCRIPT:
|
||||||
|
// Similar to start packet transcript handling
|
||||||
|
if (data.Length >= offset + 4)
|
||||||
|
{
|
||||||
|
int transcriptLen = BitConverter.ToInt32(data, offset);
|
||||||
|
offset += 4;
|
||||||
|
|
||||||
|
if (transcriptLen > 0 && data.Length >= offset + transcriptLen)
|
||||||
|
{
|
||||||
|
packet.transcript = System.Text.Encoding.UTF8.GetString(data, offset, transcriptLen);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
|
||||||
|
default:
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
return packet;
|
||||||
|
}
|
||||||
|
catch (Exception ex)
|
||||||
|
{
|
||||||
|
ConvaiLogger.Error($"Error parsing speech packet V2: {ex.Message}", ConvaiLogger.LogCategory.Character);
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Public properties for debugging
|
||||||
|
public bool IsListening => _isListening;
|
||||||
|
public bool IsPlayingSequence => _isPlayingSequence;
|
||||||
|
public int QueuedClipCount => _playbackQueue.Count;
|
||||||
|
public int IncomingClipCount => _incomingClips.Count;
|
||||||
|
|
||||||
|
// Debug methods
|
||||||
|
public void ShowNetworkStatus()
|
||||||
|
{
|
||||||
|
ConvaiLogger.Info($"=== Speech Receiver Status ===", ConvaiLogger.LogCategory.Character);
|
||||||
|
ConvaiLogger.Info($"Listening: {_isListening} on port {listenPort}", ConvaiLogger.LogCategory.Character);
|
||||||
|
ConvaiLogger.Info($"Playing Sequence: {_isPlayingSequence}", ConvaiLogger.LogCategory.Character);
|
||||||
|
ConvaiLogger.Info($"Current Sequence: {_currentSequence}", ConvaiLogger.LogCategory.Character);
|
||||||
|
ConvaiLogger.Info($"Queued Clips: {_playbackQueue.Count}", ConvaiLogger.LogCategory.Character);
|
||||||
|
ConvaiLogger.Info($"Incoming Clips: {_incomingClips.Count}", ConvaiLogger.LogCategory.Character);
|
||||||
|
|
||||||
|
if (speechAudioSource != null)
|
||||||
|
{
|
||||||
|
ConvaiLogger.Info($"Audio Source: {speechAudioSource.name} (Volume: {speechAudioSource.volume})", ConvaiLogger.LogCategory.Character);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
@ -0,0 +1,11 @@
|
|||||||
|
fileFormatVersion: 2
|
||||||
|
guid: 8fe3e5348bc484f44be079d6aaf6e17e
|
||||||
|
MonoImporter:
|
||||||
|
externalObjects: {}
|
||||||
|
serializedVersion: 2
|
||||||
|
defaultReferences: []
|
||||||
|
executionOrder: 0
|
||||||
|
icon: {instanceID: 0}
|
||||||
|
userData:
|
||||||
|
assetBundleName:
|
||||||
|
assetBundleVariant:
|
||||||
479
Unity-Master/Assets/Scripts/Multiplayer/ConvaiUDPSpeechSender.cs
Normal file
479
Unity-Master/Assets/Scripts/Multiplayer/ConvaiUDPSpeechSender.cs
Normal file
@ -0,0 +1,479 @@
|
|||||||
|
using System;
|
||||||
|
using System.Collections.Generic;
|
||||||
|
using System.Net;
|
||||||
|
using System.Net.Sockets;
|
||||||
|
using System.Threading.Tasks;
|
||||||
|
using Convai.Scripts.Runtime.Core;
|
||||||
|
using Convai.Scripts.Runtime.LoggerSystem;
|
||||||
|
using Convai.Scripts.Runtime.Utils;
|
||||||
|
using UnityEngine;
|
||||||
|
using System.Collections;
|
||||||
|
|
||||||
|
namespace Convai.Scripts.Runtime.Multiplayer
|
||||||
|
{
|
||||||
|
/// <summary>
|
||||||
|
/// UDP Speech Sender - Simple and reliable approach using events
|
||||||
|
/// Hooks into AudioManager events to capture when clips are about to be played
|
||||||
|
/// </summary>
|
||||||
|
public class ConvaiUDPSpeechSender : MonoBehaviour
|
||||||
|
{
|
||||||
|
[Header("Network Configuration")]
|
||||||
|
[SerializeField] private string targetIP = "127.0.0.1";
|
||||||
|
[SerializeField] private int targetPort = 12346;
|
||||||
|
[SerializeField] private bool enableDebugLogging = true;
|
||||||
|
|
||||||
|
[Header("NPC Source")]
|
||||||
|
[SerializeField] private bool useActiveNPC = true;
|
||||||
|
[SerializeField] private ConvaiNPC sourceNPC;
|
||||||
|
|
||||||
|
[Header("Audio Settings")]
|
||||||
|
[SerializeField] private int maxSamplesPerPacket = 8192;
|
||||||
|
[SerializeField] private bool sendTranscripts = true;
|
||||||
|
|
||||||
|
// Network components
|
||||||
|
private UdpClient _udpClient;
|
||||||
|
private IPEndPoint _targetEndPoint;
|
||||||
|
private bool _isInitialized = false;
|
||||||
|
|
||||||
|
// Speech tracking
|
||||||
|
private int _speechSequence = 0;
|
||||||
|
private bool _isSendingSpeech = false;
|
||||||
|
private HashSet<AudioClip> _sentClips = new HashSet<AudioClip>();
|
||||||
|
|
||||||
|
// Packet constants
|
||||||
|
private const uint MAGIC_NUMBER = 0xC0A3; // V3 magic number
|
||||||
|
private const byte PACKET_TYPE_AUDIO_START = 0x01;
|
||||||
|
private const byte PACKET_TYPE_AUDIO_CHUNK = 0x02;
|
||||||
|
private const byte PACKET_TYPE_AUDIO_END = 0x03;
|
||||||
|
private const byte PACKET_TYPE_FINAL = 0x05;
|
||||||
|
|
||||||
|
// Events
|
||||||
|
public Action<bool> OnSpeechTransmission;
|
||||||
|
public Action<string> OnSpeechSent;
|
||||||
|
|
||||||
|
private void Start()
|
||||||
|
{
|
||||||
|
InitializeNetwork();
|
||||||
|
InitializeConvai();
|
||||||
|
}
|
||||||
|
|
||||||
|
private void OnDestroy()
|
||||||
|
{
|
||||||
|
CleanupNPCSubscriptions();
|
||||||
|
CleanupNetwork();
|
||||||
|
}
|
||||||
|
|
||||||
|
private void InitializeNetwork()
|
||||||
|
{
|
||||||
|
try
|
||||||
|
{
|
||||||
|
_udpClient = new UdpClient();
|
||||||
|
_targetEndPoint = new IPEndPoint(IPAddress.Parse(targetIP), targetPort);
|
||||||
|
_isInitialized = true;
|
||||||
|
|
||||||
|
ConvaiLogger.Info($"UDP Speech Sender initialized. Target: {targetIP}:{targetPort}", ConvaiLogger.LogCategory.Character);
|
||||||
|
}
|
||||||
|
catch (Exception ex)
|
||||||
|
{
|
||||||
|
ConvaiLogger.Error($"Failed to initialize UDP speech sender: {ex.Message}", ConvaiLogger.LogCategory.Character);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void InitializeConvai()
|
||||||
|
{
|
||||||
|
// Get target NPC
|
||||||
|
if (useActiveNPC)
|
||||||
|
{
|
||||||
|
sourceNPC = ConvaiNPCManager.Instance?.GetActiveConvaiNPC();
|
||||||
|
}
|
||||||
|
|
||||||
|
SubscribeToNPCEvents();
|
||||||
|
|
||||||
|
// Subscribe to NPC manager events for late NPC activation
|
||||||
|
if (ConvaiNPCManager.Instance != null)
|
||||||
|
{
|
||||||
|
ConvaiNPCManager.Instance.OnActiveNPCChanged += HandleActiveNPCChanged;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void SubscribeToNPCEvents()
|
||||||
|
{
|
||||||
|
if (sourceNPC?.AudioManager != null)
|
||||||
|
{
|
||||||
|
// Hook into the character talking events
|
||||||
|
sourceNPC.AudioManager.OnCharacterTalkingChanged += HandleCharacterTalkingChanged;
|
||||||
|
sourceNPC.AudioManager.OnAudioTranscriptAvailable += HandleTranscriptAvailable;
|
||||||
|
|
||||||
|
ConvaiLogger.Info($"UDP Speech Sender subscribed to NPC: {sourceNPC.characterName}", ConvaiLogger.LogCategory.Character);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
ConvaiLogger.Warn("No source NPC available for speech transmission", ConvaiLogger.LogCategory.Character);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void HandleCharacterTalkingChanged(bool isTalking)
|
||||||
|
{
|
||||||
|
if (!_isInitialized) return;
|
||||||
|
|
||||||
|
if (isTalking)
|
||||||
|
{
|
||||||
|
// Start monitoring for audio clips
|
||||||
|
StartCoroutine(MonitorAudioClips());
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
// End speech transmission
|
||||||
|
_ = SendFinalPacket();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void HandleTranscriptAvailable(string transcript)
|
||||||
|
{
|
||||||
|
if (enableDebugLogging && !string.IsNullOrEmpty(transcript))
|
||||||
|
{
|
||||||
|
ConvaiLogger.DebugLog($"📝 NPC transcript: '{transcript}'", ConvaiLogger.LogCategory.Character);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private IEnumerator MonitorAudioClips()
|
||||||
|
{
|
||||||
|
if (sourceNPC?.AudioManager == null) yield break;
|
||||||
|
|
||||||
|
AudioSource audioSource = sourceNPC.AudioManager.GetComponent<AudioSource>();
|
||||||
|
AudioClip lastClip = null;
|
||||||
|
|
||||||
|
while (sourceNPC.IsCharacterTalking)
|
||||||
|
{
|
||||||
|
if (audioSource?.clip != null && audioSource.clip != lastClip)
|
||||||
|
{
|
||||||
|
// New clip detected!
|
||||||
|
lastClip = audioSource.clip;
|
||||||
|
|
||||||
|
// Only send if we haven't sent this clip before
|
||||||
|
if (!_sentClips.Contains(lastClip))
|
||||||
|
{
|
||||||
|
_sentClips.Add(lastClip);
|
||||||
|
|
||||||
|
// Get the transcript from the most recent available transcript
|
||||||
|
string transcript = GetRecentTranscript();
|
||||||
|
|
||||||
|
// Send this clip
|
||||||
|
_ = TransmitAudioClip(lastClip, transcript);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
yield return new WaitForSeconds(0.1f); // Check every 100ms
|
||||||
|
}
|
||||||
|
|
||||||
|
// Clear sent clips when done
|
||||||
|
_sentClips.Clear();
|
||||||
|
}
|
||||||
|
|
||||||
|
private string GetRecentTranscript()
|
||||||
|
{
|
||||||
|
// Try to get transcript from the NPC's recent activity
|
||||||
|
// This is a simple approach - in a more complex setup you might want to match clips to transcripts
|
||||||
|
return ""; // Transcripts come via the transcript event
|
||||||
|
}
|
||||||
|
|
||||||
|
private async Task TransmitAudioClip(AudioClip audioClip, string transcript)
|
||||||
|
{
|
||||||
|
if (!_isInitialized || audioClip == null) return;
|
||||||
|
|
||||||
|
try
|
||||||
|
{
|
||||||
|
// Start transmission if not already started
|
||||||
|
if (!_isSendingSpeech)
|
||||||
|
{
|
||||||
|
_isSendingSpeech = true;
|
||||||
|
OnSpeechTransmission?.Invoke(true);
|
||||||
|
|
||||||
|
ConvaiLogger.Info($"🔊 Starting speech transmission", ConvaiLogger.LogCategory.Character);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Use the current speech sequence for this entire clip
|
||||||
|
int clipSequence = _speechSequence;
|
||||||
|
|
||||||
|
// Send start packet with metadata
|
||||||
|
await SendAudioStartPacket(audioClip, transcript, clipSequence);
|
||||||
|
|
||||||
|
// Send audio data in chunks (all with the same sequence)
|
||||||
|
await SendAudioClipInChunks(audioClip, clipSequence);
|
||||||
|
|
||||||
|
// Send end packet for this clip (with the same sequence)
|
||||||
|
await SendAudioEndPacket(clipSequence);
|
||||||
|
|
||||||
|
// Only increment sequence after the entire clip is sent
|
||||||
|
_speechSequence++;
|
||||||
|
|
||||||
|
OnSpeechSent?.Invoke(transcript);
|
||||||
|
|
||||||
|
if (enableDebugLogging)
|
||||||
|
ConvaiLogger.DebugLog($"✅ Transmitted speech clip: {audioClip.length:F2}s (sequence {clipSequence})", ConvaiLogger.LogCategory.Character);
|
||||||
|
}
|
||||||
|
catch (Exception ex)
|
||||||
|
{
|
||||||
|
ConvaiLogger.Error($"Failed to transmit AudioClip: {ex.Message}", ConvaiLogger.LogCategory.Character);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private async Task SendAudioStartPacket(AudioClip audioClip, string transcript, int sequence)
|
||||||
|
{
|
||||||
|
byte[] packet = CreateAudioStartPacket(audioClip, transcript, sequence);
|
||||||
|
await _udpClient.SendAsync(packet, packet.Length, _targetEndPoint);
|
||||||
|
|
||||||
|
if (enableDebugLogging)
|
||||||
|
ConvaiLogger.DebugLog($"📤 Sent start packet {sequence}: {audioClip.samples} samples", ConvaiLogger.LogCategory.Character);
|
||||||
|
}
|
||||||
|
|
||||||
|
private async Task SendAudioClipInChunks(AudioClip audioClip, int sequence)
|
||||||
|
{
|
||||||
|
// Get all audio data
|
||||||
|
float[] audioData = new float[audioClip.samples];
|
||||||
|
audioClip.GetData(audioData, 0);
|
||||||
|
|
||||||
|
// Send in chunks
|
||||||
|
int totalSamples = audioData.Length;
|
||||||
|
int processedSamples = 0;
|
||||||
|
int chunkCount = 0;
|
||||||
|
|
||||||
|
while (processedSamples < totalSamples)
|
||||||
|
{
|
||||||
|
int remainingSamples = totalSamples - processedSamples;
|
||||||
|
int currentChunkSize = Mathf.Min(maxSamplesPerPacket, remainingSamples);
|
||||||
|
|
||||||
|
float[] chunkData = new float[currentChunkSize];
|
||||||
|
Array.Copy(audioData, processedSamples, chunkData, 0, currentChunkSize);
|
||||||
|
|
||||||
|
byte[] packet = CreateAudioChunkPacket(chunkData, audioClip.frequency, processedSamples, sequence);
|
||||||
|
await _udpClient.SendAsync(packet, packet.Length, _targetEndPoint);
|
||||||
|
|
||||||
|
processedSamples += currentChunkSize;
|
||||||
|
chunkCount++;
|
||||||
|
|
||||||
|
if (enableDebugLogging && chunkCount % 10 == 0)
|
||||||
|
ConvaiLogger.DebugLog($"📤 Sent chunk {chunkCount} for sequence {sequence}", ConvaiLogger.LogCategory.Character);
|
||||||
|
|
||||||
|
// Small delay to avoid overwhelming the network
|
||||||
|
await Task.Delay(5);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (enableDebugLogging)
|
||||||
|
ConvaiLogger.DebugLog($"📤 Sent {chunkCount} audio chunks for sequence {sequence}", ConvaiLogger.LogCategory.Character);
|
||||||
|
}
|
||||||
|
|
||||||
|
private async Task SendAudioEndPacket(int sequence)
|
||||||
|
{
|
||||||
|
byte[] packet = CreateAudioEndPacket(sequence);
|
||||||
|
await _udpClient.SendAsync(packet, packet.Length, _targetEndPoint);
|
||||||
|
|
||||||
|
if (enableDebugLogging)
|
||||||
|
ConvaiLogger.DebugLog($"📤 Sent end packet for sequence {sequence}", ConvaiLogger.LogCategory.Character);
|
||||||
|
}
|
||||||
|
|
||||||
|
private async Task SendFinalPacket()
|
||||||
|
{
|
||||||
|
if (!_isSendingSpeech) return;
|
||||||
|
|
||||||
|
try
|
||||||
|
{
|
||||||
|
byte[] packet = CreateFinalPacket();
|
||||||
|
await _udpClient.SendAsync(packet, packet.Length, _targetEndPoint);
|
||||||
|
|
||||||
|
_isSendingSpeech = false;
|
||||||
|
OnSpeechTransmission?.Invoke(false);
|
||||||
|
|
||||||
|
ConvaiLogger.Info("🔊 Speech transmission completed", ConvaiLogger.LogCategory.Character);
|
||||||
|
}
|
||||||
|
catch (Exception ex)
|
||||||
|
{
|
||||||
|
ConvaiLogger.Error($"Failed to send final packet: {ex.Message}", ConvaiLogger.LogCategory.Character);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private byte[] CreateAudioStartPacket(AudioClip audioClip, string transcript, int sequence)
|
||||||
|
{
|
||||||
|
byte[] transcriptBytes = System.Text.Encoding.UTF8.GetBytes(transcript ?? "");
|
||||||
|
|
||||||
|
// Packet structure:
|
||||||
|
// 4 bytes: Magic number
|
||||||
|
// 1 byte: Packet type (0x01 = audio start)
|
||||||
|
// 4 bytes: Sequence number
|
||||||
|
// 4 bytes: Total samples in clip
|
||||||
|
// 4 bytes: Sample rate
|
||||||
|
// 4 bytes: Channels
|
||||||
|
// 4 bytes: Transcript length
|
||||||
|
// N bytes: Transcript (UTF-8)
|
||||||
|
|
||||||
|
int headerSize = 25;
|
||||||
|
byte[] packet = new byte[headerSize + transcriptBytes.Length];
|
||||||
|
|
||||||
|
int offset = 0;
|
||||||
|
|
||||||
|
BitConverter.GetBytes(MAGIC_NUMBER).CopyTo(packet, offset);
|
||||||
|
offset += 4;
|
||||||
|
|
||||||
|
packet[offset] = PACKET_TYPE_AUDIO_START;
|
||||||
|
offset += 1;
|
||||||
|
|
||||||
|
BitConverter.GetBytes(sequence).CopyTo(packet, offset);
|
||||||
|
offset += 4;
|
||||||
|
|
||||||
|
BitConverter.GetBytes(audioClip.samples).CopyTo(packet, offset);
|
||||||
|
offset += 4;
|
||||||
|
|
||||||
|
BitConverter.GetBytes(audioClip.frequency).CopyTo(packet, offset);
|
||||||
|
offset += 4;
|
||||||
|
|
||||||
|
BitConverter.GetBytes(audioClip.channels).CopyTo(packet, offset);
|
||||||
|
offset += 4;
|
||||||
|
|
||||||
|
BitConverter.GetBytes(transcriptBytes.Length).CopyTo(packet, offset);
|
||||||
|
offset += 4;
|
||||||
|
|
||||||
|
transcriptBytes.CopyTo(packet, offset);
|
||||||
|
|
||||||
|
return packet;
|
||||||
|
}
|
||||||
|
|
||||||
|
private byte[] CreateAudioChunkPacket(float[] audioData, int frequency, int startSample, int sequence)
|
||||||
|
{
|
||||||
|
// Packet structure:
|
||||||
|
// 4 bytes: Magic number
|
||||||
|
// 1 byte: Packet type (0x02 = audio chunk)
|
||||||
|
// 4 bytes: Sequence number
|
||||||
|
// 4 bytes: Start sample position
|
||||||
|
// 4 bytes: Sample count in this chunk
|
||||||
|
// N bytes: Audio data (as 16-bit PCM)
|
||||||
|
|
||||||
|
int headerSize = 17;
|
||||||
|
int audioDataSize = audioData.Length * sizeof(short);
|
||||||
|
byte[] packet = new byte[headerSize + audioDataSize];
|
||||||
|
|
||||||
|
int offset = 0;
|
||||||
|
|
||||||
|
BitConverter.GetBytes(MAGIC_NUMBER).CopyTo(packet, offset);
|
||||||
|
offset += 4;
|
||||||
|
|
||||||
|
packet[offset] = PACKET_TYPE_AUDIO_CHUNK;
|
||||||
|
offset += 1;
|
||||||
|
|
||||||
|
BitConverter.GetBytes(sequence).CopyTo(packet, offset);
|
||||||
|
offset += 4;
|
||||||
|
|
||||||
|
BitConverter.GetBytes(startSample).CopyTo(packet, offset);
|
||||||
|
offset += 4;
|
||||||
|
|
||||||
|
BitConverter.GetBytes(audioData.Length).CopyTo(packet, offset);
|
||||||
|
offset += 4;
|
||||||
|
|
||||||
|
// Convert float samples to 16-bit PCM
|
||||||
|
for (int i = 0; i < audioData.Length; i++)
|
||||||
|
{
|
||||||
|
short sample = (short)(Mathf.Clamp(audioData[i], -1f, 1f) * short.MaxValue);
|
||||||
|
BitConverter.GetBytes(sample).CopyTo(packet, offset);
|
||||||
|
offset += 2;
|
||||||
|
}
|
||||||
|
|
||||||
|
return packet;
|
||||||
|
}
|
||||||
|
|
||||||
|
private byte[] CreateAudioEndPacket(int sequence)
|
||||||
|
{
|
||||||
|
byte[] packet = new byte[13]; // Header only
|
||||||
|
|
||||||
|
int offset = 0;
|
||||||
|
|
||||||
|
BitConverter.GetBytes(MAGIC_NUMBER).CopyTo(packet, offset);
|
||||||
|
offset += 4;
|
||||||
|
|
||||||
|
packet[offset] = PACKET_TYPE_AUDIO_END;
|
||||||
|
offset += 1;
|
||||||
|
|
||||||
|
BitConverter.GetBytes(sequence).CopyTo(packet, offset);
|
||||||
|
offset += 4;
|
||||||
|
|
||||||
|
BitConverter.GetBytes(0).CopyTo(packet, offset); // No additional data
|
||||||
|
|
||||||
|
return packet;
|
||||||
|
}
|
||||||
|
|
||||||
|
private byte[] CreateFinalPacket()
|
||||||
|
{
|
||||||
|
byte[] packet = new byte[13]; // Header only
|
||||||
|
|
||||||
|
int offset = 0;
|
||||||
|
|
||||||
|
BitConverter.GetBytes(MAGIC_NUMBER).CopyTo(packet, offset);
|
||||||
|
offset += 4;
|
||||||
|
|
||||||
|
packet[offset] = PACKET_TYPE_FINAL;
|
||||||
|
offset += 1;
|
||||||
|
|
||||||
|
BitConverter.GetBytes(_speechSequence).CopyTo(packet, offset);
|
||||||
|
offset += 4;
|
||||||
|
|
||||||
|
BitConverter.GetBytes(0).CopyTo(packet, offset); // No additional data
|
||||||
|
|
||||||
|
return packet;
|
||||||
|
}
|
||||||
|
|
||||||
|
private void CleanupNPCSubscriptions()
|
||||||
|
{
|
||||||
|
if (sourceNPC?.AudioManager != null)
|
||||||
|
{
|
||||||
|
sourceNPC.AudioManager.OnCharacterTalkingChanged -= HandleCharacterTalkingChanged;
|
||||||
|
sourceNPC.AudioManager.OnAudioTranscriptAvailable -= HandleTranscriptAvailable;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (ConvaiNPCManager.Instance != null)
|
||||||
|
{
|
||||||
|
ConvaiNPCManager.Instance.OnActiveNPCChanged -= HandleActiveNPCChanged;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void CleanupNetwork()
|
||||||
|
{
|
||||||
|
_udpClient?.Close();
|
||||||
|
_udpClient?.Dispose();
|
||||||
|
_udpClient = null;
|
||||||
|
}
|
||||||
|
|
||||||
|
private void HandleActiveNPCChanged(ConvaiNPC newActiveNPC)
|
||||||
|
{
|
||||||
|
if (!useActiveNPC) return;
|
||||||
|
|
||||||
|
// Cleanup old subscriptions
|
||||||
|
CleanupNPCSubscriptions();
|
||||||
|
|
||||||
|
// Update to new NPC
|
||||||
|
sourceNPC = newActiveNPC;
|
||||||
|
SubscribeToNPCEvents();
|
||||||
|
}
|
||||||
|
|
||||||
|
// Public methods for external control
|
||||||
|
public void SetTargetEndpoint(string ip, int port)
|
||||||
|
{
|
||||||
|
targetIP = ip;
|
||||||
|
targetPort = port;
|
||||||
|
_targetEndPoint = new IPEndPoint(IPAddress.Parse(ip), port);
|
||||||
|
}
|
||||||
|
|
||||||
|
public bool IsSendingSpeech => _isSendingSpeech;
|
||||||
|
public bool IsInitialized => _isInitialized;
|
||||||
|
public ConvaiNPC SourceNPC => sourceNPC;
|
||||||
|
|
||||||
|
// Debug methods
|
||||||
|
public void ShowNetworkStatus()
|
||||||
|
{
|
||||||
|
ConvaiLogger.Info($"=== Speech Sender Status ===", ConvaiLogger.LogCategory.Character);
|
||||||
|
ConvaiLogger.Info($"Target: {targetIP}:{targetPort}", ConvaiLogger.LogCategory.Character);
|
||||||
|
ConvaiLogger.Info($"Initialized: {_isInitialized}", ConvaiLogger.LogCategory.Character);
|
||||||
|
ConvaiLogger.Info($"Sending Speech: {_isSendingSpeech}", ConvaiLogger.LogCategory.Character);
|
||||||
|
ConvaiLogger.Info($"Source NPC: {(sourceNPC?.characterName ?? "None")}", ConvaiLogger.LogCategory.Character);
|
||||||
|
ConvaiLogger.Info($"Packets sent: {_speechSequence}", ConvaiLogger.LogCategory.Character);
|
||||||
|
ConvaiLogger.Info($"Sent clips: {_sentClips.Count}", ConvaiLogger.LogCategory.Character);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
@ -0,0 +1,11 @@
|
|||||||
|
fileFormatVersion: 2
|
||||||
|
guid: f903e03686cf216469fb4bf1e6c027d0
|
||||||
|
MonoImporter:
|
||||||
|
externalObjects: {}
|
||||||
|
serializedVersion: 2
|
||||||
|
defaultReferences: []
|
||||||
|
executionOrder: 0
|
||||||
|
icon: {instanceID: 0}
|
||||||
|
userData:
|
||||||
|
assetBundleName:
|
||||||
|
assetBundleVariant:
|
||||||
Reference in New Issue
Block a user