Files
Master-Arbeit-Tom-Hempel/Unity-Master/Assets/Scripts/Multiplayer/ConvaiUDPSpeechSender.cs
2025-10-25 14:07:50 +02:00

715 lines
29 KiB
C#

using System;
using System.Collections.Generic;
using System.Net;
using System.Net.Sockets;
using System.Threading.Tasks;
using Convai.Scripts.Runtime.Core;
using Convai.Scripts.Runtime.LoggerSystem;
using Convai.Scripts.Runtime.Utils;
using UnityEngine;
using System.Collections;
namespace Convai.Scripts.Runtime.Multiplayer
{
/// <summary>
/// UDP Speech Sender - Captures and transmits NPC speech audio to remote player
///
/// FLOW (Player 1 → Player 2):
/// 1. Player 2 speaks (via ConvaiSimpleUDPAudioSender on their device)
/// 2. Player 1 receives voice input (via ConvaiSimpleUDPAudioReceiver)
/// 3. Player 1's NPC generates response speech (Convai API)
/// 4. THIS COMPONENT monitors Player 1's NPC AudioSource
/// 5. When new AudioClips appear, transmit them to Player 2
/// 6. Player 2's ConvaiUDPSpeechReceiver plays the audio
///
/// This component should be on a NetworkManager or similar persistent object.
/// It will find and monitor ConvaiNPC components on Avatar objects in the scene.
/// </summary>
public class ConvaiUDPSpeechSender : MonoBehaviour
{
[Header("Network Configuration")]
[SerializeField] private bool enableDebugLogging = true;
[Header("NPC Source")]
[SerializeField] private bool useActiveNPC = true;
[SerializeField] private ConvaiNPC sourceNPC;
[Header("Audio Settings")]
[SerializeField] private int maxSamplesPerPacket = 8192;
// Network components
private UdpClient _udpClient;
private IPEndPoint _targetEndPoint;
private string targetIP;
private int targetPort;
private bool _isInitialized = false;
// Speech tracking
private int _speechSequence = 0;
private bool _isSendingSpeech = false;
private HashSet<AudioClip> _sentClips = new HashSet<AudioClip>();
// Packet constants
private const uint MAGIC_NUMBER = 0xC0A3; // V3 magic number
private const byte PACKET_TYPE_AUDIO_START = 0x01;
private const byte PACKET_TYPE_AUDIO_CHUNK = 0x02;
private const byte PACKET_TYPE_AUDIO_END = 0x03;
private const byte PACKET_TYPE_FINAL = 0x05;
// Events
public Action<bool> OnSpeechTransmission;
public Action<string> OnSpeechSent;
// Metrics for debug UI
private int _totalClipsSent = 0;
private DateTime _lastClipSentTime;
public int TotalClipsSent => _totalClipsSent;
public float TimeSinceLastSend => _lastClipSentTime != default ?
(float)(DateTime.UtcNow - _lastClipSentTime).TotalSeconds : -1f;
public string CurrentTargetIP => targetIP;
public int CurrentTargetPort => targetPort;
public bool UsingDiscovery => NetworkConfig.Instance?.useAutoDiscovery ?? false;
private void Start()
{
// Get network config from global instance
var cfg = NetworkConfig.Instance;
if (cfg != null)
{
targetIP = cfg.ipAddress;
targetPort = cfg.port;
// Subscribe to peer discovery if enabled
if (cfg.useAutoDiscovery && UDPPeerDiscovery.Instance != null)
{
UDPPeerDiscovery.Instance.OnPeerDiscovered += HandlePeerDiscovered;
UDPPeerDiscovery.Instance.OnPeerLost += HandlePeerLost;
ConvaiLogger.Info("Speech sender subscribed to peer discovery", ConvaiLogger.LogCategory.Character);
}
}
else
{
Debug.LogError("NetworkConfig not found! Please ensure NetworkConfig.asset exists in Resources folder.");
targetIP = "255.255.255.255";
targetPort = 1221;
}
InitializeNetwork();
InitializeConvai();
}
private void OnDestroy()
{
// Unsubscribe from peer discovery
if (UDPPeerDiscovery.Instance != null)
{
UDPPeerDiscovery.Instance.OnPeerDiscovered -= HandlePeerDiscovered;
UDPPeerDiscovery.Instance.OnPeerLost -= HandlePeerLost;
}
CleanupNPCSubscriptions();
CleanupNetwork();
}
private void Update()
{
// Continuously update source NPC if using active NPC mode
if (useActiveNPC)
{
var currentActiveNPC = FindEnabledConvaiNPC();
if (currentActiveNPC != sourceNPC)
{
// Cleanup old subscriptions
CleanupNPCSubscriptions();
// Update to new NPC
sourceNPC = currentActiveNPC;
SubscribeToNPCEvents();
if (sourceNPC != null)
{
ConvaiLogger.Info($"🔄 UDP Speech Sender updated source NPC to: {sourceNPC.characterName} (on {sourceNPC.gameObject.name})", ConvaiLogger.LogCategory.Character);
}
else
{
ConvaiLogger.Info($"🔄 UDP Speech Sender cleared source NPC", ConvaiLogger.LogCategory.Character);
}
}
}
}
/// <summary>
/// Finds an enabled ConvaiNPC in the scene (doesn't rely on ConvaiNPCManager raycasting)
/// </summary>
private ConvaiNPC FindEnabledConvaiNPC()
{
// Find all ConvaiNPC components in the scene (including inactive GameObjects)
var allNPCs = FindObjectsOfType<ConvaiNPC>(true);
// Return the first one that's on an active GameObject
foreach (var npc in allNPCs)
{
if (npc.gameObject.activeInHierarchy && npc.enabled)
{
return npc;
}
}
return null;
}
private void HandlePeerDiscovered(string peerIP)
{
targetIP = peerIP;
_targetEndPoint = new IPEndPoint(IPAddress.Parse(peerIP), targetPort);
ConvaiLogger.Info($"🔊 Speech sender now targeting peer at {peerIP}:{targetPort}", ConvaiLogger.LogCategory.Character);
}
private void HandlePeerLost()
{
// Don't change targetIP - keep sending to the last known peer IP
// The peer might come back online and we'll automatically reconnect
ConvaiLogger.Warn($"🔊 Speech sender: Peer connection lost, but continuing to send to {targetIP}:{targetPort}", ConvaiLogger.LogCategory.Character);
}
private void InitializeNetwork()
{
try
{
_udpClient = new UdpClient();
_targetEndPoint = new IPEndPoint(IPAddress.Parse(targetIP), targetPort);
_isInitialized = true;
ConvaiLogger.Info($"UDP Speech Sender initialized. Target: {targetIP}:{targetPort}", ConvaiLogger.LogCategory.Character);
}
catch (Exception ex)
{
ConvaiLogger.Error($"Failed to initialize UDP speech sender: {ex.Message}", ConvaiLogger.LogCategory.Character);
}
}
private void InitializeConvai()
{
// Prefer local ConvaiNPC on the same GameObject, then fall back to finding enabled NPC
var localNPC = GetComponent<ConvaiNPC>();
if (localNPC != null)
{
sourceNPC = localNPC;
ConvaiLogger.Info($"Speech Sender: Using local NPC {sourceNPC.characterName}", ConvaiLogger.LogCategory.Character);
}
else if (useActiveNPC)
{
sourceNPC = FindEnabledConvaiNPC();
if (sourceNPC != null)
{
ConvaiLogger.Info($"Speech Sender: Found NPC {sourceNPC.characterName} on {sourceNPC.gameObject.name}", ConvaiLogger.LogCategory.Character);
}
else
{
ConvaiLogger.Warn("Speech Sender: No ConvaiNPC found in scene yet", ConvaiLogger.LogCategory.Character);
}
}
SubscribeToNPCEvents();
// Subscribe to NPC manager events for late NPC activation
if (ConvaiNPCManager.Instance != null)
{
ConvaiNPCManager.Instance.OnActiveNPCChanged += HandleActiveNPCChanged;
}
}
private void SubscribeToNPCEvents()
{
if (sourceNPC == null)
{
ConvaiLogger.Warn("SubscribeToNPCEvents: sourceNPC is null", ConvaiLogger.LogCategory.Character);
return;
}
if (sourceNPC.AudioManager == null)
{
ConvaiLogger.Warn($"SubscribeToNPCEvents: AudioManager is null for {sourceNPC.characterName}", ConvaiLogger.LogCategory.Character);
return;
}
// Hook into the character talking events
sourceNPC.AudioManager.OnCharacterTalkingChanged += HandleCharacterTalkingChanged;
sourceNPC.AudioManager.OnAudioTranscriptAvailable += HandleTranscriptAvailable;
ConvaiLogger.Info($"✅ UDP Speech Sender subscribed to NPC: {sourceNPC.characterName} (on {sourceNPC.gameObject.name}), AudioManager: {sourceNPC.AudioManager.name}", ConvaiLogger.LogCategory.Character);
// Also start continuous monitoring as a fallback in case events don't fire
StartCoroutine(ContinuousAudioMonitoring());
}
private void HandleCharacterTalkingChanged(bool isTalking)
{
if (!_isInitialized) return;
if (isTalking)
{
ConvaiLogger.Info($"🔊 NPC {sourceNPC.characterName} started talking, monitoring audio clips...", ConvaiLogger.LogCategory.Character);
// Start monitoring for audio clips
StartCoroutine(MonitorAudioClips());
}
else
{
ConvaiLogger.Info($"🔊 NPC {sourceNPC.characterName} stopped talking", ConvaiLogger.LogCategory.Character);
// End speech transmission
_ = SendFinalPacket();
}
}
private void HandleTranscriptAvailable(string transcript)
{
if (enableDebugLogging && !string.IsNullOrEmpty(transcript))
{
ConvaiLogger.DebugLog($"📝 NPC transcript: '{transcript}'", ConvaiLogger.LogCategory.Character);
}
}
private IEnumerator MonitorAudioClips()
{
if (sourceNPC?.AudioManager == null)
{
ConvaiLogger.Error("MonitorAudioClips: AudioManager is null on sourceNPC", ConvaiLogger.LogCategory.Character);
yield break;
}
AudioSource audioSource = sourceNPC.AudioManager.GetComponent<AudioSource>();
if (audioSource == null)
{
ConvaiLogger.Error($"MonitorAudioClips: No AudioSource found on AudioManager ({sourceNPC.AudioManager.name})", ConvaiLogger.LogCategory.Character);
yield break;
}
ConvaiLogger.Info($"🔊 Started monitoring audio clips on {audioSource.name} for NPC {sourceNPC.characterName}", ConvaiLogger.LogCategory.Character);
AudioClip lastClip = null;
int checkCount = 0;
while (sourceNPC != null && sourceNPC.IsCharacterTalking)
{
checkCount++;
// Log periodically to show we're still monitoring
if (enableDebugLogging && checkCount % 10 == 0)
{
ConvaiLogger.DebugLog($"🔊 Monitoring... check #{checkCount}, current clip: {(audioSource?.clip != null ? audioSource.clip.name : "null")}, isTalking: {sourceNPC.IsCharacterTalking}", ConvaiLogger.LogCategory.Character);
}
if (audioSource?.clip != null && audioSource.clip != lastClip)
{
// New clip detected!
lastClip = audioSource.clip;
ConvaiLogger.Info($"🔊 NEW CLIP DETECTED: {lastClip.name}, length: {lastClip.length:F2}s, samples: {lastClip.samples}, freq: {lastClip.frequency}", ConvaiLogger.LogCategory.Character);
// Only send if we haven't sent this clip before
if (!_sentClips.Contains(lastClip))
{
_sentClips.Add(lastClip);
// Get the transcript from the most recent available transcript
string transcript = GetRecentTranscript();
// Send this clip
ConvaiLogger.Info($"🔊 TRANSMITTING CLIP to {targetIP}:{targetPort}", ConvaiLogger.LogCategory.Character);
_ = TransmitAudioClip(lastClip, transcript);
}
else
{
ConvaiLogger.Warn($"🔊 Clip already sent, skipping: {lastClip.name}", ConvaiLogger.LogCategory.Character);
}
}
yield return new WaitForSeconds(0.1f); // Check every 100ms
}
ConvaiLogger.Info($"🔊 Stopped monitoring audio clips (NPC stopped talking or was destroyed). Checks performed: {checkCount}", ConvaiLogger.LogCategory.Character);
// Clear sent clips when done
_sentClips.Clear();
}
private IEnumerator ContinuousAudioMonitoring()
{
ConvaiLogger.Info("🔊 Starting continuous audio monitoring as fallback", ConvaiLogger.LogCategory.Character);
AudioClip lastMonitoredClip = null;
while (true)
{
// Wait a bit between checks
yield return new WaitForSeconds(0.2f);
// Check if we still have a valid source NPC
if (sourceNPC == null || sourceNPC.AudioManager == null)
{
yield return new WaitForSeconds(1f); // Wait longer if no NPC
continue;
}
// Get the audio source
AudioSource audioSource = sourceNPC.AudioManager.GetComponent<AudioSource>();
if (audioSource == null)
{
yield return new WaitForSeconds(1f);
continue;
}
// Check if there's a new audio clip playing
if (audioSource.clip != null &&
audioSource.clip != lastMonitoredClip &&
audioSource.isPlaying)
{
lastMonitoredClip = audioSource.clip;
// Only send if we haven't sent this clip before
if (!_sentClips.Contains(lastMonitoredClip))
{
_sentClips.Add(lastMonitoredClip);
ConvaiLogger.Info($"🔊 [Continuous Monitor] NEW CLIP DETECTED: {lastMonitoredClip.name}, length: {lastMonitoredClip.length:F2}s", ConvaiLogger.LogCategory.Character);
// Start transmission if not already started
if (!_isSendingSpeech)
{
_isSendingSpeech = true;
OnSpeechTransmission?.Invoke(true);
}
string transcript = "";
_ = TransmitAudioClip(lastMonitoredClip, transcript);
}
}
// Clean up old clips from the sent list if NPC is not talking
if (!sourceNPC.IsCharacterTalking && _sentClips.Count > 0)
{
if (enableDebugLogging)
ConvaiLogger.DebugLog($"🔊 [Continuous Monitor] NPC stopped talking, clearing sent clips list ({_sentClips.Count} clips)", ConvaiLogger.LogCategory.Character);
_sentClips.Clear();
// Send final packet
if (_isSendingSpeech)
{
_ = SendFinalPacket();
}
}
}
}
private string GetRecentTranscript()
{
// Try to get transcript from the NPC's recent activity
// This is a simple approach - in a more complex setup you might want to match clips to transcripts
return ""; // Transcripts come via the transcript event
}
private async Task TransmitAudioClip(AudioClip audioClip, string transcript)
{
if (!_isInitialized || audioClip == null) return;
try
{
// Start transmission if not already started
if (!_isSendingSpeech)
{
_isSendingSpeech = true;
OnSpeechTransmission?.Invoke(true);
ConvaiLogger.Info($"🔊 Starting speech transmission", ConvaiLogger.LogCategory.Character);
}
// Use the current speech sequence for this entire clip
int clipSequence = _speechSequence;
// Send start packet with metadata
await SendAudioStartPacket(audioClip, transcript, clipSequence);
// Send audio data in chunks (all with the same sequence)
await SendAudioClipInChunks(audioClip, clipSequence);
// Send end packet for this clip (with the same sequence)
await SendAudioEndPacket(clipSequence);
// Only increment sequence after the entire clip is sent
_speechSequence++;
// Update metrics
_totalClipsSent++;
_lastClipSentTime = DateTime.UtcNow;
OnSpeechSent?.Invoke(transcript);
if (enableDebugLogging)
ConvaiLogger.DebugLog($"✅ Transmitted speech clip: {audioClip.length:F2}s (sequence {clipSequence})", ConvaiLogger.LogCategory.Character);
}
catch (Exception ex)
{
ConvaiLogger.Error($"Failed to transmit AudioClip: {ex.Message}", ConvaiLogger.LogCategory.Character);
}
}
private async Task SendAudioStartPacket(AudioClip audioClip, string transcript, int sequence)
{
byte[] packet = CreateAudioStartPacket(audioClip, transcript, sequence);
await _udpClient.SendAsync(packet, packet.Length, _targetEndPoint);
if (enableDebugLogging)
ConvaiLogger.DebugLog($"📤 Sent start packet {sequence}: {audioClip.samples} samples", ConvaiLogger.LogCategory.Character);
}
private async Task SendAudioClipInChunks(AudioClip audioClip, int sequence)
{
// Get all audio data
float[] audioData = new float[audioClip.samples];
audioClip.GetData(audioData, 0);
// Send in chunks
int totalSamples = audioData.Length;
int processedSamples = 0;
int chunkCount = 0;
while (processedSamples < totalSamples)
{
int remainingSamples = totalSamples - processedSamples;
int currentChunkSize = Mathf.Min(maxSamplesPerPacket, remainingSamples);
float[] chunkData = new float[currentChunkSize];
Array.Copy(audioData, processedSamples, chunkData, 0, currentChunkSize);
byte[] packet = CreateAudioChunkPacket(chunkData, audioClip.frequency, processedSamples, sequence);
await _udpClient.SendAsync(packet, packet.Length, _targetEndPoint);
processedSamples += currentChunkSize;
chunkCount++;
if (enableDebugLogging && chunkCount % 10 == 0)
ConvaiLogger.DebugLog($"📤 Sent chunk {chunkCount} for sequence {sequence}", ConvaiLogger.LogCategory.Character);
// Small delay to avoid overwhelming the network
await Task.Delay(5);
}
if (enableDebugLogging)
ConvaiLogger.DebugLog($"📤 Sent {chunkCount} audio chunks for sequence {sequence}", ConvaiLogger.LogCategory.Character);
}
private async Task SendAudioEndPacket(int sequence)
{
byte[] packet = CreateAudioEndPacket(sequence);
await _udpClient.SendAsync(packet, packet.Length, _targetEndPoint);
if (enableDebugLogging)
ConvaiLogger.DebugLog($"📤 Sent end packet for sequence {sequence}", ConvaiLogger.LogCategory.Character);
}
private async Task SendFinalPacket()
{
if (!_isSendingSpeech) return;
try
{
byte[] packet = CreateFinalPacket();
await _udpClient.SendAsync(packet, packet.Length, _targetEndPoint);
_isSendingSpeech = false;
OnSpeechTransmission?.Invoke(false);
ConvaiLogger.Info("🔊 Speech transmission completed", ConvaiLogger.LogCategory.Character);
}
catch (Exception ex)
{
ConvaiLogger.Error($"Failed to send final packet: {ex.Message}", ConvaiLogger.LogCategory.Character);
}
}
private byte[] CreateAudioStartPacket(AudioClip audioClip, string transcript, int sequence)
{
byte[] transcriptBytes = System.Text.Encoding.UTF8.GetBytes(transcript ?? "");
// Packet structure:
// 4 bytes: Magic number
// 1 byte: Packet type (0x01 = audio start)
// 4 bytes: Sequence number
// 4 bytes: Total samples in clip
// 4 bytes: Sample rate
// 4 bytes: Channels
// 4 bytes: Transcript length
// N bytes: Transcript (UTF-8)
int headerSize = 25;
byte[] packet = new byte[headerSize + transcriptBytes.Length];
int offset = 0;
BitConverter.GetBytes(MAGIC_NUMBER).CopyTo(packet, offset);
offset += 4;
packet[offset] = PACKET_TYPE_AUDIO_START;
offset += 1;
BitConverter.GetBytes(sequence).CopyTo(packet, offset);
offset += 4;
BitConverter.GetBytes(audioClip.samples).CopyTo(packet, offset);
offset += 4;
BitConverter.GetBytes(audioClip.frequency).CopyTo(packet, offset);
offset += 4;
BitConverter.GetBytes(audioClip.channels).CopyTo(packet, offset);
offset += 4;
BitConverter.GetBytes(transcriptBytes.Length).CopyTo(packet, offset);
offset += 4;
transcriptBytes.CopyTo(packet, offset);
return packet;
}
private byte[] CreateAudioChunkPacket(float[] audioData, int frequency, int startSample, int sequence)
{
// Packet structure:
// 4 bytes: Magic number
// 1 byte: Packet type (0x02 = audio chunk)
// 4 bytes: Sequence number
// 4 bytes: Start sample position
// 4 bytes: Sample count in this chunk
// N bytes: Audio data (as 16-bit PCM)
int headerSize = 17;
int audioDataSize = audioData.Length * sizeof(short);
byte[] packet = new byte[headerSize + audioDataSize];
int offset = 0;
BitConverter.GetBytes(MAGIC_NUMBER).CopyTo(packet, offset);
offset += 4;
packet[offset] = PACKET_TYPE_AUDIO_CHUNK;
offset += 1;
BitConverter.GetBytes(sequence).CopyTo(packet, offset);
offset += 4;
BitConverter.GetBytes(startSample).CopyTo(packet, offset);
offset += 4;
BitConverter.GetBytes(audioData.Length).CopyTo(packet, offset);
offset += 4;
// Convert float samples to 16-bit PCM
for (int i = 0; i < audioData.Length; i++)
{
short sample = (short)(Mathf.Clamp(audioData[i], -1f, 1f) * short.MaxValue);
BitConverter.GetBytes(sample).CopyTo(packet, offset);
offset += 2;
}
return packet;
}
private byte[] CreateAudioEndPacket(int sequence)
{
byte[] packet = new byte[13]; // Header only
int offset = 0;
BitConverter.GetBytes(MAGIC_NUMBER).CopyTo(packet, offset);
offset += 4;
packet[offset] = PACKET_TYPE_AUDIO_END;
offset += 1;
BitConverter.GetBytes(sequence).CopyTo(packet, offset);
offset += 4;
BitConverter.GetBytes(0).CopyTo(packet, offset); // No additional data
return packet;
}
private byte[] CreateFinalPacket()
{
byte[] packet = new byte[13]; // Header only
int offset = 0;
BitConverter.GetBytes(MAGIC_NUMBER).CopyTo(packet, offset);
offset += 4;
packet[offset] = PACKET_TYPE_FINAL;
offset += 1;
BitConverter.GetBytes(_speechSequence).CopyTo(packet, offset);
offset += 4;
BitConverter.GetBytes(0).CopyTo(packet, offset); // No additional data
return packet;
}
private void CleanupNPCSubscriptions()
{
if (sourceNPC?.AudioManager != null)
{
sourceNPC.AudioManager.OnCharacterTalkingChanged -= HandleCharacterTalkingChanged;
sourceNPC.AudioManager.OnAudioTranscriptAvailable -= HandleTranscriptAvailable;
}
if (ConvaiNPCManager.Instance != null)
{
ConvaiNPCManager.Instance.OnActiveNPCChanged -= HandleActiveNPCChanged;
}
// Stop all coroutines when cleaning up (will restart with new NPC)
StopAllCoroutines();
}
private void CleanupNetwork()
{
_udpClient?.Close();
_udpClient?.Dispose();
_udpClient = null;
}
private void HandleActiveNPCChanged(ConvaiNPC newActiveNPC)
{
if (!useActiveNPC) return;
// Cleanup old subscriptions
CleanupNPCSubscriptions();
// Update to new NPC
sourceNPC = newActiveNPC;
SubscribeToNPCEvents();
}
// Public methods for external control
public void SetTargetEndpoint(string ip, int port)
{
targetIP = ip;
targetPort = port;
_targetEndPoint = new IPEndPoint(IPAddress.Parse(ip), port);
}
public bool IsSendingSpeech => _isSendingSpeech;
public bool IsInitialized => _isInitialized;
public ConvaiNPC SourceNPC => sourceNPC;
// Debug methods
public void ShowNetworkStatus()
{
ConvaiLogger.Info($"=== Speech Sender Status ===", ConvaiLogger.LogCategory.Character);
ConvaiLogger.Info($"Target: {targetIP}:{targetPort}", ConvaiLogger.LogCategory.Character);
ConvaiLogger.Info($"Initialized: {_isInitialized}", ConvaiLogger.LogCategory.Character);
ConvaiLogger.Info($"Sending Speech: {_isSendingSpeech}", ConvaiLogger.LogCategory.Character);
ConvaiLogger.Info($"Source NPC: {(sourceNPC?.characterName ?? "None")}", ConvaiLogger.LogCategory.Character);
ConvaiLogger.Info($"Packets sent: {_speechSequence}", ConvaiLogger.LogCategory.Character);
ConvaiLogger.Info($"Sent clips: {_sentClips.Count}", ConvaiLogger.LogCategory.Character);
}
}
}