532 lines
20 KiB
C#
532 lines
20 KiB
C#
using System;
|
|
using System.Collections.Generic;
|
|
using System.Net;
|
|
using System.Net.Sockets;
|
|
using System.Threading.Tasks;
|
|
using Convai.Scripts.Runtime.Core;
|
|
using Convai.Scripts.Runtime.LoggerSystem;
|
|
using Convai.Scripts.Runtime.Utils;
|
|
using UnityEngine;
|
|
using System.Collections;
|
|
|
|
namespace Convai.Scripts.Runtime.Multiplayer
|
|
{
|
|
/// <summary>
|
|
/// UDP Speech Sender - Simple and reliable approach using events
|
|
/// Hooks into AudioManager events to capture when clips are about to be played
|
|
/// </summary>
|
|
public class ConvaiUDPSpeechSender : MonoBehaviour
|
|
{
|
|
[Header("Network Configuration")]
|
|
[SerializeField] private bool enableDebugLogging = true;
|
|
|
|
[Header("NPC Source")]
|
|
[SerializeField] private bool useActiveNPC = true;
|
|
[SerializeField] private ConvaiNPC sourceNPC;
|
|
|
|
[Header("Audio Settings")]
|
|
[SerializeField] private int maxSamplesPerPacket = 8192;
|
|
[SerializeField] private bool sendTranscripts = true;
|
|
|
|
// Network components
|
|
private UdpClient _udpClient;
|
|
private IPEndPoint _targetEndPoint;
|
|
private string targetIP;
|
|
private int targetPort;
|
|
private bool _isInitialized = false;
|
|
|
|
// Speech tracking
|
|
private int _speechSequence = 0;
|
|
private bool _isSendingSpeech = false;
|
|
private HashSet<AudioClip> _sentClips = new HashSet<AudioClip>();
|
|
|
|
// Packet constants
|
|
private const uint MAGIC_NUMBER = 0xC0A3; // V3 magic number
|
|
private const byte PACKET_TYPE_AUDIO_START = 0x01;
|
|
private const byte PACKET_TYPE_AUDIO_CHUNK = 0x02;
|
|
private const byte PACKET_TYPE_AUDIO_END = 0x03;
|
|
private const byte PACKET_TYPE_FINAL = 0x05;
|
|
|
|
// Events
|
|
public Action<bool> OnSpeechTransmission;
|
|
public Action<string> OnSpeechSent;
|
|
|
|
private void Start()
|
|
{
|
|
// Get network config from global instance
|
|
var cfg = NetworkConfig.Instance;
|
|
if (cfg != null)
|
|
{
|
|
targetIP = cfg.ipAddress;
|
|
targetPort = cfg.port;
|
|
|
|
// Subscribe to peer discovery if enabled
|
|
if (cfg.useAutoDiscovery && UDPPeerDiscovery.Instance != null)
|
|
{
|
|
UDPPeerDiscovery.Instance.OnPeerDiscovered += HandlePeerDiscovered;
|
|
UDPPeerDiscovery.Instance.OnPeerLost += HandlePeerLost;
|
|
ConvaiLogger.Info("Speech sender subscribed to peer discovery", ConvaiLogger.LogCategory.Character);
|
|
}
|
|
}
|
|
else
|
|
{
|
|
Debug.LogError("NetworkConfig not found! Please ensure NetworkConfig.asset exists in Resources folder.");
|
|
targetIP = "255.255.255.255";
|
|
targetPort = 1221;
|
|
}
|
|
|
|
InitializeNetwork();
|
|
InitializeConvai();
|
|
}
|
|
|
|
private void OnDestroy()
|
|
{
|
|
// Unsubscribe from peer discovery
|
|
if (UDPPeerDiscovery.Instance != null)
|
|
{
|
|
UDPPeerDiscovery.Instance.OnPeerDiscovered -= HandlePeerDiscovered;
|
|
UDPPeerDiscovery.Instance.OnPeerLost -= HandlePeerLost;
|
|
}
|
|
|
|
CleanupNPCSubscriptions();
|
|
CleanupNetwork();
|
|
}
|
|
|
|
private void HandlePeerDiscovered(string peerIP)
|
|
{
|
|
targetIP = peerIP;
|
|
_targetEndPoint = new IPEndPoint(IPAddress.Parse(peerIP), targetPort);
|
|
ConvaiLogger.Info($"🔊 Speech sender now targeting peer at {peerIP}:{targetPort}", ConvaiLogger.LogCategory.Character);
|
|
}
|
|
|
|
private void HandlePeerLost()
|
|
{
|
|
var cfg = NetworkConfig.Instance;
|
|
if (cfg != null)
|
|
{
|
|
targetIP = cfg.fallbackBroadcastIP;
|
|
_targetEndPoint = new IPEndPoint(IPAddress.Parse(targetIP), targetPort);
|
|
ConvaiLogger.Warn($"🔊 Speech sender falling back to broadcast: {targetIP}", ConvaiLogger.LogCategory.Character);
|
|
}
|
|
}
|
|
|
|
private void InitializeNetwork()
|
|
{
|
|
try
|
|
{
|
|
_udpClient = new UdpClient();
|
|
_targetEndPoint = new IPEndPoint(IPAddress.Parse(targetIP), targetPort);
|
|
_isInitialized = true;
|
|
|
|
ConvaiLogger.Info($"UDP Speech Sender initialized. Target: {targetIP}:{targetPort}", ConvaiLogger.LogCategory.Character);
|
|
}
|
|
catch (Exception ex)
|
|
{
|
|
ConvaiLogger.Error($"Failed to initialize UDP speech sender: {ex.Message}", ConvaiLogger.LogCategory.Character);
|
|
}
|
|
}
|
|
|
|
private void InitializeConvai()
|
|
{
|
|
// Prefer local ConvaiNPC on the same GameObject, then fall back to active NPC
|
|
var localNPC = GetComponent<ConvaiNPC>();
|
|
if (localNPC != null)
|
|
{
|
|
sourceNPC = localNPC;
|
|
}
|
|
else if (useActiveNPC)
|
|
{
|
|
sourceNPC = ConvaiNPCManager.Instance?.GetActiveConvaiNPC();
|
|
}
|
|
|
|
SubscribeToNPCEvents();
|
|
|
|
// Subscribe to NPC manager events for late NPC activation
|
|
if (ConvaiNPCManager.Instance != null)
|
|
{
|
|
ConvaiNPCManager.Instance.OnActiveNPCChanged += HandleActiveNPCChanged;
|
|
}
|
|
}
|
|
|
|
private void SubscribeToNPCEvents()
|
|
{
|
|
if (sourceNPC?.AudioManager != null)
|
|
{
|
|
// Hook into the character talking events
|
|
sourceNPC.AudioManager.OnCharacterTalkingChanged += HandleCharacterTalkingChanged;
|
|
sourceNPC.AudioManager.OnAudioTranscriptAvailable += HandleTranscriptAvailable;
|
|
|
|
ConvaiLogger.Info($"UDP Speech Sender subscribed to NPC: {sourceNPC.characterName}", ConvaiLogger.LogCategory.Character);
|
|
}
|
|
else
|
|
{
|
|
ConvaiLogger.Warn("No source NPC available for speech transmission", ConvaiLogger.LogCategory.Character);
|
|
}
|
|
}
|
|
|
|
private void HandleCharacterTalkingChanged(bool isTalking)
|
|
{
|
|
if (!_isInitialized) return;
|
|
|
|
if (isTalking)
|
|
{
|
|
// Start monitoring for audio clips
|
|
StartCoroutine(MonitorAudioClips());
|
|
}
|
|
else
|
|
{
|
|
// End speech transmission
|
|
_ = SendFinalPacket();
|
|
}
|
|
}
|
|
|
|
private void HandleTranscriptAvailable(string transcript)
|
|
{
|
|
if (enableDebugLogging && !string.IsNullOrEmpty(transcript))
|
|
{
|
|
ConvaiLogger.DebugLog($"📝 NPC transcript: '{transcript}'", ConvaiLogger.LogCategory.Character);
|
|
}
|
|
}
|
|
|
|
private IEnumerator MonitorAudioClips()
|
|
{
|
|
if (sourceNPC?.AudioManager == null) yield break;
|
|
|
|
AudioSource audioSource = sourceNPC.AudioManager.GetComponent<AudioSource>();
|
|
AudioClip lastClip = null;
|
|
|
|
while (sourceNPC.IsCharacterTalking)
|
|
{
|
|
if (audioSource?.clip != null && audioSource.clip != lastClip)
|
|
{
|
|
// New clip detected!
|
|
lastClip = audioSource.clip;
|
|
|
|
// Only send if we haven't sent this clip before
|
|
if (!_sentClips.Contains(lastClip))
|
|
{
|
|
_sentClips.Add(lastClip);
|
|
|
|
// Get the transcript from the most recent available transcript
|
|
string transcript = GetRecentTranscript();
|
|
|
|
// Send this clip
|
|
_ = TransmitAudioClip(lastClip, transcript);
|
|
}
|
|
}
|
|
|
|
yield return new WaitForSeconds(0.1f); // Check every 100ms
|
|
}
|
|
|
|
// Clear sent clips when done
|
|
_sentClips.Clear();
|
|
}
|
|
|
|
private string GetRecentTranscript()
|
|
{
|
|
// Try to get transcript from the NPC's recent activity
|
|
// This is a simple approach - in a more complex setup you might want to match clips to transcripts
|
|
return ""; // Transcripts come via the transcript event
|
|
}
|
|
|
|
private async Task TransmitAudioClip(AudioClip audioClip, string transcript)
|
|
{
|
|
if (!_isInitialized || audioClip == null) return;
|
|
|
|
try
|
|
{
|
|
// Start transmission if not already started
|
|
if (!_isSendingSpeech)
|
|
{
|
|
_isSendingSpeech = true;
|
|
OnSpeechTransmission?.Invoke(true);
|
|
|
|
ConvaiLogger.Info($"🔊 Starting speech transmission", ConvaiLogger.LogCategory.Character);
|
|
}
|
|
|
|
// Use the current speech sequence for this entire clip
|
|
int clipSequence = _speechSequence;
|
|
|
|
// Send start packet with metadata
|
|
await SendAudioStartPacket(audioClip, transcript, clipSequence);
|
|
|
|
// Send audio data in chunks (all with the same sequence)
|
|
await SendAudioClipInChunks(audioClip, clipSequence);
|
|
|
|
// Send end packet for this clip (with the same sequence)
|
|
await SendAudioEndPacket(clipSequence);
|
|
|
|
// Only increment sequence after the entire clip is sent
|
|
_speechSequence++;
|
|
|
|
OnSpeechSent?.Invoke(transcript);
|
|
|
|
if (enableDebugLogging)
|
|
ConvaiLogger.DebugLog($"✅ Transmitted speech clip: {audioClip.length:F2}s (sequence {clipSequence})", ConvaiLogger.LogCategory.Character);
|
|
}
|
|
catch (Exception ex)
|
|
{
|
|
ConvaiLogger.Error($"Failed to transmit AudioClip: {ex.Message}", ConvaiLogger.LogCategory.Character);
|
|
}
|
|
}
|
|
|
|
private async Task SendAudioStartPacket(AudioClip audioClip, string transcript, int sequence)
|
|
{
|
|
byte[] packet = CreateAudioStartPacket(audioClip, transcript, sequence);
|
|
await _udpClient.SendAsync(packet, packet.Length, _targetEndPoint);
|
|
|
|
if (enableDebugLogging)
|
|
ConvaiLogger.DebugLog($"📤 Sent start packet {sequence}: {audioClip.samples} samples", ConvaiLogger.LogCategory.Character);
|
|
}
|
|
|
|
private async Task SendAudioClipInChunks(AudioClip audioClip, int sequence)
|
|
{
|
|
// Get all audio data
|
|
float[] audioData = new float[audioClip.samples];
|
|
audioClip.GetData(audioData, 0);
|
|
|
|
// Send in chunks
|
|
int totalSamples = audioData.Length;
|
|
int processedSamples = 0;
|
|
int chunkCount = 0;
|
|
|
|
while (processedSamples < totalSamples)
|
|
{
|
|
int remainingSamples = totalSamples - processedSamples;
|
|
int currentChunkSize = Mathf.Min(maxSamplesPerPacket, remainingSamples);
|
|
|
|
float[] chunkData = new float[currentChunkSize];
|
|
Array.Copy(audioData, processedSamples, chunkData, 0, currentChunkSize);
|
|
|
|
byte[] packet = CreateAudioChunkPacket(chunkData, audioClip.frequency, processedSamples, sequence);
|
|
await _udpClient.SendAsync(packet, packet.Length, _targetEndPoint);
|
|
|
|
processedSamples += currentChunkSize;
|
|
chunkCount++;
|
|
|
|
if (enableDebugLogging && chunkCount % 10 == 0)
|
|
ConvaiLogger.DebugLog($"📤 Sent chunk {chunkCount} for sequence {sequence}", ConvaiLogger.LogCategory.Character);
|
|
|
|
// Small delay to avoid overwhelming the network
|
|
await Task.Delay(5);
|
|
}
|
|
|
|
if (enableDebugLogging)
|
|
ConvaiLogger.DebugLog($"📤 Sent {chunkCount} audio chunks for sequence {sequence}", ConvaiLogger.LogCategory.Character);
|
|
}
|
|
|
|
private async Task SendAudioEndPacket(int sequence)
|
|
{
|
|
byte[] packet = CreateAudioEndPacket(sequence);
|
|
await _udpClient.SendAsync(packet, packet.Length, _targetEndPoint);
|
|
|
|
if (enableDebugLogging)
|
|
ConvaiLogger.DebugLog($"📤 Sent end packet for sequence {sequence}", ConvaiLogger.LogCategory.Character);
|
|
}
|
|
|
|
private async Task SendFinalPacket()
|
|
{
|
|
if (!_isSendingSpeech) return;
|
|
|
|
try
|
|
{
|
|
byte[] packet = CreateFinalPacket();
|
|
await _udpClient.SendAsync(packet, packet.Length, _targetEndPoint);
|
|
|
|
_isSendingSpeech = false;
|
|
OnSpeechTransmission?.Invoke(false);
|
|
|
|
ConvaiLogger.Info("🔊 Speech transmission completed", ConvaiLogger.LogCategory.Character);
|
|
}
|
|
catch (Exception ex)
|
|
{
|
|
ConvaiLogger.Error($"Failed to send final packet: {ex.Message}", ConvaiLogger.LogCategory.Character);
|
|
}
|
|
}
|
|
|
|
private byte[] CreateAudioStartPacket(AudioClip audioClip, string transcript, int sequence)
|
|
{
|
|
byte[] transcriptBytes = System.Text.Encoding.UTF8.GetBytes(transcript ?? "");
|
|
|
|
// Packet structure:
|
|
// 4 bytes: Magic number
|
|
// 1 byte: Packet type (0x01 = audio start)
|
|
// 4 bytes: Sequence number
|
|
// 4 bytes: Total samples in clip
|
|
// 4 bytes: Sample rate
|
|
// 4 bytes: Channels
|
|
// 4 bytes: Transcript length
|
|
// N bytes: Transcript (UTF-8)
|
|
|
|
int headerSize = 25;
|
|
byte[] packet = new byte[headerSize + transcriptBytes.Length];
|
|
|
|
int offset = 0;
|
|
|
|
BitConverter.GetBytes(MAGIC_NUMBER).CopyTo(packet, offset);
|
|
offset += 4;
|
|
|
|
packet[offset] = PACKET_TYPE_AUDIO_START;
|
|
offset += 1;
|
|
|
|
BitConverter.GetBytes(sequence).CopyTo(packet, offset);
|
|
offset += 4;
|
|
|
|
BitConverter.GetBytes(audioClip.samples).CopyTo(packet, offset);
|
|
offset += 4;
|
|
|
|
BitConverter.GetBytes(audioClip.frequency).CopyTo(packet, offset);
|
|
offset += 4;
|
|
|
|
BitConverter.GetBytes(audioClip.channels).CopyTo(packet, offset);
|
|
offset += 4;
|
|
|
|
BitConverter.GetBytes(transcriptBytes.Length).CopyTo(packet, offset);
|
|
offset += 4;
|
|
|
|
transcriptBytes.CopyTo(packet, offset);
|
|
|
|
return packet;
|
|
}
|
|
|
|
private byte[] CreateAudioChunkPacket(float[] audioData, int frequency, int startSample, int sequence)
|
|
{
|
|
// Packet structure:
|
|
// 4 bytes: Magic number
|
|
// 1 byte: Packet type (0x02 = audio chunk)
|
|
// 4 bytes: Sequence number
|
|
// 4 bytes: Start sample position
|
|
// 4 bytes: Sample count in this chunk
|
|
// N bytes: Audio data (as 16-bit PCM)
|
|
|
|
int headerSize = 17;
|
|
int audioDataSize = audioData.Length * sizeof(short);
|
|
byte[] packet = new byte[headerSize + audioDataSize];
|
|
|
|
int offset = 0;
|
|
|
|
BitConverter.GetBytes(MAGIC_NUMBER).CopyTo(packet, offset);
|
|
offset += 4;
|
|
|
|
packet[offset] = PACKET_TYPE_AUDIO_CHUNK;
|
|
offset += 1;
|
|
|
|
BitConverter.GetBytes(sequence).CopyTo(packet, offset);
|
|
offset += 4;
|
|
|
|
BitConverter.GetBytes(startSample).CopyTo(packet, offset);
|
|
offset += 4;
|
|
|
|
BitConverter.GetBytes(audioData.Length).CopyTo(packet, offset);
|
|
offset += 4;
|
|
|
|
// Convert float samples to 16-bit PCM
|
|
for (int i = 0; i < audioData.Length; i++)
|
|
{
|
|
short sample = (short)(Mathf.Clamp(audioData[i], -1f, 1f) * short.MaxValue);
|
|
BitConverter.GetBytes(sample).CopyTo(packet, offset);
|
|
offset += 2;
|
|
}
|
|
|
|
return packet;
|
|
}
|
|
|
|
private byte[] CreateAudioEndPacket(int sequence)
|
|
{
|
|
byte[] packet = new byte[13]; // Header only
|
|
|
|
int offset = 0;
|
|
|
|
BitConverter.GetBytes(MAGIC_NUMBER).CopyTo(packet, offset);
|
|
offset += 4;
|
|
|
|
packet[offset] = PACKET_TYPE_AUDIO_END;
|
|
offset += 1;
|
|
|
|
BitConverter.GetBytes(sequence).CopyTo(packet, offset);
|
|
offset += 4;
|
|
|
|
BitConverter.GetBytes(0).CopyTo(packet, offset); // No additional data
|
|
|
|
return packet;
|
|
}
|
|
|
|
private byte[] CreateFinalPacket()
|
|
{
|
|
byte[] packet = new byte[13]; // Header only
|
|
|
|
int offset = 0;
|
|
|
|
BitConverter.GetBytes(MAGIC_NUMBER).CopyTo(packet, offset);
|
|
offset += 4;
|
|
|
|
packet[offset] = PACKET_TYPE_FINAL;
|
|
offset += 1;
|
|
|
|
BitConverter.GetBytes(_speechSequence).CopyTo(packet, offset);
|
|
offset += 4;
|
|
|
|
BitConverter.GetBytes(0).CopyTo(packet, offset); // No additional data
|
|
|
|
return packet;
|
|
}
|
|
|
|
private void CleanupNPCSubscriptions()
|
|
{
|
|
if (sourceNPC?.AudioManager != null)
|
|
{
|
|
sourceNPC.AudioManager.OnCharacterTalkingChanged -= HandleCharacterTalkingChanged;
|
|
sourceNPC.AudioManager.OnAudioTranscriptAvailable -= HandleTranscriptAvailable;
|
|
}
|
|
|
|
if (ConvaiNPCManager.Instance != null)
|
|
{
|
|
ConvaiNPCManager.Instance.OnActiveNPCChanged -= HandleActiveNPCChanged;
|
|
}
|
|
}
|
|
|
|
private void CleanupNetwork()
|
|
{
|
|
_udpClient?.Close();
|
|
_udpClient?.Dispose();
|
|
_udpClient = null;
|
|
}
|
|
|
|
private void HandleActiveNPCChanged(ConvaiNPC newActiveNPC)
|
|
{
|
|
if (!useActiveNPC) return;
|
|
|
|
// Cleanup old subscriptions
|
|
CleanupNPCSubscriptions();
|
|
|
|
// Update to new NPC
|
|
sourceNPC = newActiveNPC;
|
|
SubscribeToNPCEvents();
|
|
}
|
|
|
|
// Public methods for external control
|
|
public void SetTargetEndpoint(string ip, int port)
|
|
{
|
|
targetIP = ip;
|
|
targetPort = port;
|
|
_targetEndPoint = new IPEndPoint(IPAddress.Parse(ip), port);
|
|
}
|
|
|
|
public bool IsSendingSpeech => _isSendingSpeech;
|
|
public bool IsInitialized => _isInitialized;
|
|
public ConvaiNPC SourceNPC => sourceNPC;
|
|
|
|
// Debug methods
|
|
public void ShowNetworkStatus()
|
|
{
|
|
ConvaiLogger.Info($"=== Speech Sender Status ===", ConvaiLogger.LogCategory.Character);
|
|
ConvaiLogger.Info($"Target: {targetIP}:{targetPort}", ConvaiLogger.LogCategory.Character);
|
|
ConvaiLogger.Info($"Initialized: {_isInitialized}", ConvaiLogger.LogCategory.Character);
|
|
ConvaiLogger.Info($"Sending Speech: {_isSendingSpeech}", ConvaiLogger.LogCategory.Character);
|
|
ConvaiLogger.Info($"Source NPC: {(sourceNPC?.characterName ?? "None")}", ConvaiLogger.LogCategory.Character);
|
|
ConvaiLogger.Info($"Packets sent: {_speechSequence}", ConvaiLogger.LogCategory.Character);
|
|
ConvaiLogger.Info($"Sent clips: {_sentClips.Count}", ConvaiLogger.LogCategory.Character);
|
|
}
|
|
}
|
|
}
|