716 lines
29 KiB
C#
716 lines
29 KiB
C#
using System;
|
|
using System.Collections.Generic;
|
|
using System.Net;
|
|
using System.Net.Sockets;
|
|
using System.Threading.Tasks;
|
|
using Convai.Scripts.Runtime.Core;
|
|
using Convai.Scripts.Runtime.LoggerSystem;
|
|
using Convai.Scripts.Runtime.Utils;
|
|
using UnityEngine;
|
|
using System.Collections;
|
|
|
|
namespace Convai.Scripts.Runtime.Multiplayer
|
|
{
|
|
/// <summary>
|
|
/// UDP Speech Sender - Captures and transmits NPC speech audio to remote player
|
|
///
|
|
/// FLOW (Player 1 → Player 2):
|
|
/// 1. Player 2 speaks (via ConvaiSimpleUDPAudioSender on their device)
|
|
/// 2. Player 1 receives voice input (via ConvaiSimpleUDPAudioReceiver)
|
|
/// 3. Player 1's NPC generates response speech (Convai API)
|
|
/// 4. THIS COMPONENT monitors Player 1's NPC AudioSource
|
|
/// 5. When new AudioClips appear, transmit them to Player 2
|
|
/// 6. Player 2's ConvaiUDPSpeechReceiver plays the audio
|
|
///
|
|
/// This component should be on a NetworkManager or similar persistent object.
|
|
/// It will find and monitor ConvaiNPC components on Avatar objects in the scene.
|
|
/// </summary>
|
|
public class ConvaiUDPSpeechSender : MonoBehaviour
|
|
{
|
|
[Header("Network Configuration")]
|
|
[SerializeField] private bool enableDebugLogging = true;
|
|
|
|
[Header("NPC Source")]
|
|
[SerializeField] private bool useActiveNPC = true;
|
|
[SerializeField] private ConvaiNPC sourceNPC;
|
|
|
|
[Header("Audio Settings")]
|
|
[SerializeField] private int maxSamplesPerPacket = 8192;
|
|
[SerializeField] private bool sendTranscripts = true;
|
|
|
|
// Network components
|
|
private UdpClient _udpClient;
|
|
private IPEndPoint _targetEndPoint;
|
|
private string targetIP;
|
|
private int targetPort;
|
|
private bool _isInitialized = false;
|
|
|
|
// Speech tracking
|
|
private int _speechSequence = 0;
|
|
private bool _isSendingSpeech = false;
|
|
private HashSet<AudioClip> _sentClips = new HashSet<AudioClip>();
|
|
|
|
// Packet constants
|
|
private const uint MAGIC_NUMBER = 0xC0A3; // V3 magic number
|
|
private const byte PACKET_TYPE_AUDIO_START = 0x01;
|
|
private const byte PACKET_TYPE_AUDIO_CHUNK = 0x02;
|
|
private const byte PACKET_TYPE_AUDIO_END = 0x03;
|
|
private const byte PACKET_TYPE_FINAL = 0x05;
|
|
|
|
// Events
|
|
public Action<bool> OnSpeechTransmission;
|
|
public Action<string> OnSpeechSent;
|
|
|
|
// Metrics for debug UI
|
|
private int _totalClipsSent = 0;
|
|
private DateTime _lastClipSentTime;
|
|
public int TotalClipsSent => _totalClipsSent;
|
|
public float TimeSinceLastSend => _lastClipSentTime != default ?
|
|
(float)(DateTime.UtcNow - _lastClipSentTime).TotalSeconds : -1f;
|
|
public string CurrentTargetIP => targetIP;
|
|
public int CurrentTargetPort => targetPort;
|
|
public bool UsingDiscovery => NetworkConfig.Instance?.useAutoDiscovery ?? false;
|
|
|
|
private void Start()
|
|
{
|
|
// Get network config from global instance
|
|
var cfg = NetworkConfig.Instance;
|
|
if (cfg != null)
|
|
{
|
|
targetIP = cfg.ipAddress;
|
|
targetPort = cfg.port;
|
|
|
|
// Subscribe to peer discovery if enabled
|
|
if (cfg.useAutoDiscovery && UDPPeerDiscovery.Instance != null)
|
|
{
|
|
UDPPeerDiscovery.Instance.OnPeerDiscovered += HandlePeerDiscovered;
|
|
UDPPeerDiscovery.Instance.OnPeerLost += HandlePeerLost;
|
|
ConvaiLogger.Info("Speech sender subscribed to peer discovery", ConvaiLogger.LogCategory.Character);
|
|
}
|
|
}
|
|
else
|
|
{
|
|
Debug.LogError("NetworkConfig not found! Please ensure NetworkConfig.asset exists in Resources folder.");
|
|
targetIP = "255.255.255.255";
|
|
targetPort = 1221;
|
|
}
|
|
|
|
InitializeNetwork();
|
|
InitializeConvai();
|
|
}
|
|
|
|
private void OnDestroy()
|
|
{
|
|
// Unsubscribe from peer discovery
|
|
if (UDPPeerDiscovery.Instance != null)
|
|
{
|
|
UDPPeerDiscovery.Instance.OnPeerDiscovered -= HandlePeerDiscovered;
|
|
UDPPeerDiscovery.Instance.OnPeerLost -= HandlePeerLost;
|
|
}
|
|
|
|
CleanupNPCSubscriptions();
|
|
CleanupNetwork();
|
|
}
|
|
|
|
private void Update()
|
|
{
|
|
// Continuously update source NPC if using active NPC mode
|
|
if (useActiveNPC)
|
|
{
|
|
var currentActiveNPC = FindEnabledConvaiNPC();
|
|
if (currentActiveNPC != sourceNPC)
|
|
{
|
|
// Cleanup old subscriptions
|
|
CleanupNPCSubscriptions();
|
|
|
|
// Update to new NPC
|
|
sourceNPC = currentActiveNPC;
|
|
SubscribeToNPCEvents();
|
|
|
|
if (sourceNPC != null)
|
|
{
|
|
ConvaiLogger.Info($"🔄 UDP Speech Sender updated source NPC to: {sourceNPC.characterName} (on {sourceNPC.gameObject.name})", ConvaiLogger.LogCategory.Character);
|
|
}
|
|
else
|
|
{
|
|
ConvaiLogger.Info($"🔄 UDP Speech Sender cleared source NPC", ConvaiLogger.LogCategory.Character);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
/// <summary>
|
|
/// Finds an enabled ConvaiNPC in the scene (doesn't rely on ConvaiNPCManager raycasting)
|
|
/// </summary>
|
|
private ConvaiNPC FindEnabledConvaiNPC()
|
|
{
|
|
// Find all ConvaiNPC components in the scene (including inactive GameObjects)
|
|
var allNPCs = FindObjectsOfType<ConvaiNPC>(true);
|
|
|
|
// Return the first one that's on an active GameObject
|
|
foreach (var npc in allNPCs)
|
|
{
|
|
if (npc.gameObject.activeInHierarchy && npc.enabled)
|
|
{
|
|
return npc;
|
|
}
|
|
}
|
|
|
|
return null;
|
|
}
|
|
|
|
private void HandlePeerDiscovered(string peerIP)
|
|
{
|
|
targetIP = peerIP;
|
|
_targetEndPoint = new IPEndPoint(IPAddress.Parse(peerIP), targetPort);
|
|
ConvaiLogger.Info($"🔊 Speech sender now targeting peer at {peerIP}:{targetPort}", ConvaiLogger.LogCategory.Character);
|
|
}
|
|
|
|
private void HandlePeerLost()
|
|
{
|
|
// Don't change targetIP - keep sending to the last known peer IP
|
|
// The peer might come back online and we'll automatically reconnect
|
|
ConvaiLogger.Warn($"🔊 Speech sender: Peer connection lost, but continuing to send to {targetIP}:{targetPort}", ConvaiLogger.LogCategory.Character);
|
|
}
|
|
|
|
private void InitializeNetwork()
|
|
{
|
|
try
|
|
{
|
|
_udpClient = new UdpClient();
|
|
_targetEndPoint = new IPEndPoint(IPAddress.Parse(targetIP), targetPort);
|
|
_isInitialized = true;
|
|
|
|
ConvaiLogger.Info($"UDP Speech Sender initialized. Target: {targetIP}:{targetPort}", ConvaiLogger.LogCategory.Character);
|
|
}
|
|
catch (Exception ex)
|
|
{
|
|
ConvaiLogger.Error($"Failed to initialize UDP speech sender: {ex.Message}", ConvaiLogger.LogCategory.Character);
|
|
}
|
|
}
|
|
|
|
private void InitializeConvai()
|
|
{
|
|
// Prefer local ConvaiNPC on the same GameObject, then fall back to finding enabled NPC
|
|
var localNPC = GetComponent<ConvaiNPC>();
|
|
if (localNPC != null)
|
|
{
|
|
sourceNPC = localNPC;
|
|
ConvaiLogger.Info($"Speech Sender: Using local NPC {sourceNPC.characterName}", ConvaiLogger.LogCategory.Character);
|
|
}
|
|
else if (useActiveNPC)
|
|
{
|
|
sourceNPC = FindEnabledConvaiNPC();
|
|
if (sourceNPC != null)
|
|
{
|
|
ConvaiLogger.Info($"Speech Sender: Found NPC {sourceNPC.characterName} on {sourceNPC.gameObject.name}", ConvaiLogger.LogCategory.Character);
|
|
}
|
|
else
|
|
{
|
|
ConvaiLogger.Warn("Speech Sender: No ConvaiNPC found in scene yet", ConvaiLogger.LogCategory.Character);
|
|
}
|
|
}
|
|
|
|
SubscribeToNPCEvents();
|
|
|
|
// Subscribe to NPC manager events for late NPC activation
|
|
if (ConvaiNPCManager.Instance != null)
|
|
{
|
|
ConvaiNPCManager.Instance.OnActiveNPCChanged += HandleActiveNPCChanged;
|
|
}
|
|
}
|
|
|
|
private void SubscribeToNPCEvents()
|
|
{
|
|
if (sourceNPC == null)
|
|
{
|
|
ConvaiLogger.Warn("SubscribeToNPCEvents: sourceNPC is null", ConvaiLogger.LogCategory.Character);
|
|
return;
|
|
}
|
|
|
|
if (sourceNPC.AudioManager == null)
|
|
{
|
|
ConvaiLogger.Warn($"SubscribeToNPCEvents: AudioManager is null for {sourceNPC.characterName}", ConvaiLogger.LogCategory.Character);
|
|
return;
|
|
}
|
|
|
|
// Hook into the character talking events
|
|
sourceNPC.AudioManager.OnCharacterTalkingChanged += HandleCharacterTalkingChanged;
|
|
sourceNPC.AudioManager.OnAudioTranscriptAvailable += HandleTranscriptAvailable;
|
|
|
|
ConvaiLogger.Info($"✅ UDP Speech Sender subscribed to NPC: {sourceNPC.characterName} (on {sourceNPC.gameObject.name}), AudioManager: {sourceNPC.AudioManager.name}", ConvaiLogger.LogCategory.Character);
|
|
|
|
// Also start continuous monitoring as a fallback in case events don't fire
|
|
StartCoroutine(ContinuousAudioMonitoring());
|
|
}
|
|
|
|
private void HandleCharacterTalkingChanged(bool isTalking)
|
|
{
|
|
if (!_isInitialized) return;
|
|
|
|
if (isTalking)
|
|
{
|
|
ConvaiLogger.Info($"🔊 NPC {sourceNPC.characterName} started talking, monitoring audio clips...", ConvaiLogger.LogCategory.Character);
|
|
// Start monitoring for audio clips
|
|
StartCoroutine(MonitorAudioClips());
|
|
}
|
|
else
|
|
{
|
|
ConvaiLogger.Info($"🔊 NPC {sourceNPC.characterName} stopped talking", ConvaiLogger.LogCategory.Character);
|
|
// End speech transmission
|
|
_ = SendFinalPacket();
|
|
}
|
|
}
|
|
|
|
private void HandleTranscriptAvailable(string transcript)
|
|
{
|
|
if (enableDebugLogging && !string.IsNullOrEmpty(transcript))
|
|
{
|
|
ConvaiLogger.DebugLog($"📝 NPC transcript: '{transcript}'", ConvaiLogger.LogCategory.Character);
|
|
}
|
|
}
|
|
|
|
private IEnumerator MonitorAudioClips()
|
|
{
|
|
if (sourceNPC?.AudioManager == null)
|
|
{
|
|
ConvaiLogger.Error("MonitorAudioClips: AudioManager is null on sourceNPC", ConvaiLogger.LogCategory.Character);
|
|
yield break;
|
|
}
|
|
|
|
AudioSource audioSource = sourceNPC.AudioManager.GetComponent<AudioSource>();
|
|
if (audioSource == null)
|
|
{
|
|
ConvaiLogger.Error($"MonitorAudioClips: No AudioSource found on AudioManager ({sourceNPC.AudioManager.name})", ConvaiLogger.LogCategory.Character);
|
|
yield break;
|
|
}
|
|
|
|
ConvaiLogger.Info($"🔊 Started monitoring audio clips on {audioSource.name} for NPC {sourceNPC.characterName}", ConvaiLogger.LogCategory.Character);
|
|
AudioClip lastClip = null;
|
|
int checkCount = 0;
|
|
|
|
while (sourceNPC != null && sourceNPC.IsCharacterTalking)
|
|
{
|
|
checkCount++;
|
|
|
|
// Log periodically to show we're still monitoring
|
|
if (enableDebugLogging && checkCount % 10 == 0)
|
|
{
|
|
ConvaiLogger.DebugLog($"🔊 Monitoring... check #{checkCount}, current clip: {(audioSource?.clip != null ? audioSource.clip.name : "null")}, isTalking: {sourceNPC.IsCharacterTalking}", ConvaiLogger.LogCategory.Character);
|
|
}
|
|
|
|
if (audioSource?.clip != null && audioSource.clip != lastClip)
|
|
{
|
|
// New clip detected!
|
|
lastClip = audioSource.clip;
|
|
ConvaiLogger.Info($"🔊 NEW CLIP DETECTED: {lastClip.name}, length: {lastClip.length:F2}s, samples: {lastClip.samples}, freq: {lastClip.frequency}", ConvaiLogger.LogCategory.Character);
|
|
|
|
// Only send if we haven't sent this clip before
|
|
if (!_sentClips.Contains(lastClip))
|
|
{
|
|
_sentClips.Add(lastClip);
|
|
|
|
// Get the transcript from the most recent available transcript
|
|
string transcript = GetRecentTranscript();
|
|
|
|
// Send this clip
|
|
ConvaiLogger.Info($"🔊 TRANSMITTING CLIP to {targetIP}:{targetPort}", ConvaiLogger.LogCategory.Character);
|
|
_ = TransmitAudioClip(lastClip, transcript);
|
|
}
|
|
else
|
|
{
|
|
ConvaiLogger.Warn($"🔊 Clip already sent, skipping: {lastClip.name}", ConvaiLogger.LogCategory.Character);
|
|
}
|
|
}
|
|
|
|
yield return new WaitForSeconds(0.1f); // Check every 100ms
|
|
}
|
|
|
|
ConvaiLogger.Info($"🔊 Stopped monitoring audio clips (NPC stopped talking or was destroyed). Checks performed: {checkCount}", ConvaiLogger.LogCategory.Character);
|
|
// Clear sent clips when done
|
|
_sentClips.Clear();
|
|
}
|
|
|
|
private IEnumerator ContinuousAudioMonitoring()
|
|
{
|
|
ConvaiLogger.Info("🔊 Starting continuous audio monitoring as fallback", ConvaiLogger.LogCategory.Character);
|
|
AudioClip lastMonitoredClip = null;
|
|
|
|
while (true)
|
|
{
|
|
// Wait a bit between checks
|
|
yield return new WaitForSeconds(0.2f);
|
|
|
|
// Check if we still have a valid source NPC
|
|
if (sourceNPC == null || sourceNPC.AudioManager == null)
|
|
{
|
|
yield return new WaitForSeconds(1f); // Wait longer if no NPC
|
|
continue;
|
|
}
|
|
|
|
// Get the audio source
|
|
AudioSource audioSource = sourceNPC.AudioManager.GetComponent<AudioSource>();
|
|
if (audioSource == null)
|
|
{
|
|
yield return new WaitForSeconds(1f);
|
|
continue;
|
|
}
|
|
|
|
// Check if there's a new audio clip playing
|
|
if (audioSource.clip != null &&
|
|
audioSource.clip != lastMonitoredClip &&
|
|
audioSource.isPlaying)
|
|
{
|
|
lastMonitoredClip = audioSource.clip;
|
|
|
|
// Only send if we haven't sent this clip before
|
|
if (!_sentClips.Contains(lastMonitoredClip))
|
|
{
|
|
_sentClips.Add(lastMonitoredClip);
|
|
|
|
ConvaiLogger.Info($"🔊 [Continuous Monitor] NEW CLIP DETECTED: {lastMonitoredClip.name}, length: {lastMonitoredClip.length:F2}s", ConvaiLogger.LogCategory.Character);
|
|
|
|
// Start transmission if not already started
|
|
if (!_isSendingSpeech)
|
|
{
|
|
_isSendingSpeech = true;
|
|
OnSpeechTransmission?.Invoke(true);
|
|
}
|
|
|
|
string transcript = "";
|
|
_ = TransmitAudioClip(lastMonitoredClip, transcript);
|
|
}
|
|
}
|
|
|
|
// Clean up old clips from the sent list if NPC is not talking
|
|
if (!sourceNPC.IsCharacterTalking && _sentClips.Count > 0)
|
|
{
|
|
if (enableDebugLogging)
|
|
ConvaiLogger.DebugLog($"🔊 [Continuous Monitor] NPC stopped talking, clearing sent clips list ({_sentClips.Count} clips)", ConvaiLogger.LogCategory.Character);
|
|
|
|
_sentClips.Clear();
|
|
|
|
// Send final packet
|
|
if (_isSendingSpeech)
|
|
{
|
|
_ = SendFinalPacket();
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
private string GetRecentTranscript()
|
|
{
|
|
// Try to get transcript from the NPC's recent activity
|
|
// This is a simple approach - in a more complex setup you might want to match clips to transcripts
|
|
return ""; // Transcripts come via the transcript event
|
|
}
|
|
|
|
private async Task TransmitAudioClip(AudioClip audioClip, string transcript)
|
|
{
|
|
if (!_isInitialized || audioClip == null) return;
|
|
|
|
try
|
|
{
|
|
// Start transmission if not already started
|
|
if (!_isSendingSpeech)
|
|
{
|
|
_isSendingSpeech = true;
|
|
OnSpeechTransmission?.Invoke(true);
|
|
|
|
ConvaiLogger.Info($"🔊 Starting speech transmission", ConvaiLogger.LogCategory.Character);
|
|
}
|
|
|
|
// Use the current speech sequence for this entire clip
|
|
int clipSequence = _speechSequence;
|
|
|
|
// Send start packet with metadata
|
|
await SendAudioStartPacket(audioClip, transcript, clipSequence);
|
|
|
|
// Send audio data in chunks (all with the same sequence)
|
|
await SendAudioClipInChunks(audioClip, clipSequence);
|
|
|
|
// Send end packet for this clip (with the same sequence)
|
|
await SendAudioEndPacket(clipSequence);
|
|
|
|
// Only increment sequence after the entire clip is sent
|
|
_speechSequence++;
|
|
|
|
// Update metrics
|
|
_totalClipsSent++;
|
|
_lastClipSentTime = DateTime.UtcNow;
|
|
|
|
OnSpeechSent?.Invoke(transcript);
|
|
|
|
if (enableDebugLogging)
|
|
ConvaiLogger.DebugLog($"✅ Transmitted speech clip: {audioClip.length:F2}s (sequence {clipSequence})", ConvaiLogger.LogCategory.Character);
|
|
}
|
|
catch (Exception ex)
|
|
{
|
|
ConvaiLogger.Error($"Failed to transmit AudioClip: {ex.Message}", ConvaiLogger.LogCategory.Character);
|
|
}
|
|
}
|
|
|
|
private async Task SendAudioStartPacket(AudioClip audioClip, string transcript, int sequence)
|
|
{
|
|
byte[] packet = CreateAudioStartPacket(audioClip, transcript, sequence);
|
|
await _udpClient.SendAsync(packet, packet.Length, _targetEndPoint);
|
|
|
|
if (enableDebugLogging)
|
|
ConvaiLogger.DebugLog($"📤 Sent start packet {sequence}: {audioClip.samples} samples", ConvaiLogger.LogCategory.Character);
|
|
}
|
|
|
|
private async Task SendAudioClipInChunks(AudioClip audioClip, int sequence)
|
|
{
|
|
// Get all audio data
|
|
float[] audioData = new float[audioClip.samples];
|
|
audioClip.GetData(audioData, 0);
|
|
|
|
// Send in chunks
|
|
int totalSamples = audioData.Length;
|
|
int processedSamples = 0;
|
|
int chunkCount = 0;
|
|
|
|
while (processedSamples < totalSamples)
|
|
{
|
|
int remainingSamples = totalSamples - processedSamples;
|
|
int currentChunkSize = Mathf.Min(maxSamplesPerPacket, remainingSamples);
|
|
|
|
float[] chunkData = new float[currentChunkSize];
|
|
Array.Copy(audioData, processedSamples, chunkData, 0, currentChunkSize);
|
|
|
|
byte[] packet = CreateAudioChunkPacket(chunkData, audioClip.frequency, processedSamples, sequence);
|
|
await _udpClient.SendAsync(packet, packet.Length, _targetEndPoint);
|
|
|
|
processedSamples += currentChunkSize;
|
|
chunkCount++;
|
|
|
|
if (enableDebugLogging && chunkCount % 10 == 0)
|
|
ConvaiLogger.DebugLog($"📤 Sent chunk {chunkCount} for sequence {sequence}", ConvaiLogger.LogCategory.Character);
|
|
|
|
// Small delay to avoid overwhelming the network
|
|
await Task.Delay(5);
|
|
}
|
|
|
|
if (enableDebugLogging)
|
|
ConvaiLogger.DebugLog($"📤 Sent {chunkCount} audio chunks for sequence {sequence}", ConvaiLogger.LogCategory.Character);
|
|
}
|
|
|
|
private async Task SendAudioEndPacket(int sequence)
|
|
{
|
|
byte[] packet = CreateAudioEndPacket(sequence);
|
|
await _udpClient.SendAsync(packet, packet.Length, _targetEndPoint);
|
|
|
|
if (enableDebugLogging)
|
|
ConvaiLogger.DebugLog($"📤 Sent end packet for sequence {sequence}", ConvaiLogger.LogCategory.Character);
|
|
}
|
|
|
|
private async Task SendFinalPacket()
|
|
{
|
|
if (!_isSendingSpeech) return;
|
|
|
|
try
|
|
{
|
|
byte[] packet = CreateFinalPacket();
|
|
await _udpClient.SendAsync(packet, packet.Length, _targetEndPoint);
|
|
|
|
_isSendingSpeech = false;
|
|
OnSpeechTransmission?.Invoke(false);
|
|
|
|
ConvaiLogger.Info("🔊 Speech transmission completed", ConvaiLogger.LogCategory.Character);
|
|
}
|
|
catch (Exception ex)
|
|
{
|
|
ConvaiLogger.Error($"Failed to send final packet: {ex.Message}", ConvaiLogger.LogCategory.Character);
|
|
}
|
|
}
|
|
|
|
private byte[] CreateAudioStartPacket(AudioClip audioClip, string transcript, int sequence)
|
|
{
|
|
byte[] transcriptBytes = System.Text.Encoding.UTF8.GetBytes(transcript ?? "");
|
|
|
|
// Packet structure:
|
|
// 4 bytes: Magic number
|
|
// 1 byte: Packet type (0x01 = audio start)
|
|
// 4 bytes: Sequence number
|
|
// 4 bytes: Total samples in clip
|
|
// 4 bytes: Sample rate
|
|
// 4 bytes: Channels
|
|
// 4 bytes: Transcript length
|
|
// N bytes: Transcript (UTF-8)
|
|
|
|
int headerSize = 25;
|
|
byte[] packet = new byte[headerSize + transcriptBytes.Length];
|
|
|
|
int offset = 0;
|
|
|
|
BitConverter.GetBytes(MAGIC_NUMBER).CopyTo(packet, offset);
|
|
offset += 4;
|
|
|
|
packet[offset] = PACKET_TYPE_AUDIO_START;
|
|
offset += 1;
|
|
|
|
BitConverter.GetBytes(sequence).CopyTo(packet, offset);
|
|
offset += 4;
|
|
|
|
BitConverter.GetBytes(audioClip.samples).CopyTo(packet, offset);
|
|
offset += 4;
|
|
|
|
BitConverter.GetBytes(audioClip.frequency).CopyTo(packet, offset);
|
|
offset += 4;
|
|
|
|
BitConverter.GetBytes(audioClip.channels).CopyTo(packet, offset);
|
|
offset += 4;
|
|
|
|
BitConverter.GetBytes(transcriptBytes.Length).CopyTo(packet, offset);
|
|
offset += 4;
|
|
|
|
transcriptBytes.CopyTo(packet, offset);
|
|
|
|
return packet;
|
|
}
|
|
|
|
private byte[] CreateAudioChunkPacket(float[] audioData, int frequency, int startSample, int sequence)
|
|
{
|
|
// Packet structure:
|
|
// 4 bytes: Magic number
|
|
// 1 byte: Packet type (0x02 = audio chunk)
|
|
// 4 bytes: Sequence number
|
|
// 4 bytes: Start sample position
|
|
// 4 bytes: Sample count in this chunk
|
|
// N bytes: Audio data (as 16-bit PCM)
|
|
|
|
int headerSize = 17;
|
|
int audioDataSize = audioData.Length * sizeof(short);
|
|
byte[] packet = new byte[headerSize + audioDataSize];
|
|
|
|
int offset = 0;
|
|
|
|
BitConverter.GetBytes(MAGIC_NUMBER).CopyTo(packet, offset);
|
|
offset += 4;
|
|
|
|
packet[offset] = PACKET_TYPE_AUDIO_CHUNK;
|
|
offset += 1;
|
|
|
|
BitConverter.GetBytes(sequence).CopyTo(packet, offset);
|
|
offset += 4;
|
|
|
|
BitConverter.GetBytes(startSample).CopyTo(packet, offset);
|
|
offset += 4;
|
|
|
|
BitConverter.GetBytes(audioData.Length).CopyTo(packet, offset);
|
|
offset += 4;
|
|
|
|
// Convert float samples to 16-bit PCM
|
|
for (int i = 0; i < audioData.Length; i++)
|
|
{
|
|
short sample = (short)(Mathf.Clamp(audioData[i], -1f, 1f) * short.MaxValue);
|
|
BitConverter.GetBytes(sample).CopyTo(packet, offset);
|
|
offset += 2;
|
|
}
|
|
|
|
return packet;
|
|
}
|
|
|
|
private byte[] CreateAudioEndPacket(int sequence)
|
|
{
|
|
byte[] packet = new byte[13]; // Header only
|
|
|
|
int offset = 0;
|
|
|
|
BitConverter.GetBytes(MAGIC_NUMBER).CopyTo(packet, offset);
|
|
offset += 4;
|
|
|
|
packet[offset] = PACKET_TYPE_AUDIO_END;
|
|
offset += 1;
|
|
|
|
BitConverter.GetBytes(sequence).CopyTo(packet, offset);
|
|
offset += 4;
|
|
|
|
BitConverter.GetBytes(0).CopyTo(packet, offset); // No additional data
|
|
|
|
return packet;
|
|
}
|
|
|
|
private byte[] CreateFinalPacket()
|
|
{
|
|
byte[] packet = new byte[13]; // Header only
|
|
|
|
int offset = 0;
|
|
|
|
BitConverter.GetBytes(MAGIC_NUMBER).CopyTo(packet, offset);
|
|
offset += 4;
|
|
|
|
packet[offset] = PACKET_TYPE_FINAL;
|
|
offset += 1;
|
|
|
|
BitConverter.GetBytes(_speechSequence).CopyTo(packet, offset);
|
|
offset += 4;
|
|
|
|
BitConverter.GetBytes(0).CopyTo(packet, offset); // No additional data
|
|
|
|
return packet;
|
|
}
|
|
|
|
private void CleanupNPCSubscriptions()
|
|
{
|
|
if (sourceNPC?.AudioManager != null)
|
|
{
|
|
sourceNPC.AudioManager.OnCharacterTalkingChanged -= HandleCharacterTalkingChanged;
|
|
sourceNPC.AudioManager.OnAudioTranscriptAvailable -= HandleTranscriptAvailable;
|
|
}
|
|
|
|
if (ConvaiNPCManager.Instance != null)
|
|
{
|
|
ConvaiNPCManager.Instance.OnActiveNPCChanged -= HandleActiveNPCChanged;
|
|
}
|
|
|
|
// Stop all coroutines when cleaning up (will restart with new NPC)
|
|
StopAllCoroutines();
|
|
}
|
|
|
|
private void CleanupNetwork()
|
|
{
|
|
_udpClient?.Close();
|
|
_udpClient?.Dispose();
|
|
_udpClient = null;
|
|
}
|
|
|
|
private void HandleActiveNPCChanged(ConvaiNPC newActiveNPC)
|
|
{
|
|
if (!useActiveNPC) return;
|
|
|
|
// Cleanup old subscriptions
|
|
CleanupNPCSubscriptions();
|
|
|
|
// Update to new NPC
|
|
sourceNPC = newActiveNPC;
|
|
SubscribeToNPCEvents();
|
|
}
|
|
|
|
// Public methods for external control
|
|
public void SetTargetEndpoint(string ip, int port)
|
|
{
|
|
targetIP = ip;
|
|
targetPort = port;
|
|
_targetEndPoint = new IPEndPoint(IPAddress.Parse(ip), port);
|
|
}
|
|
|
|
public bool IsSendingSpeech => _isSendingSpeech;
|
|
public bool IsInitialized => _isInitialized;
|
|
public ConvaiNPC SourceNPC => sourceNPC;
|
|
|
|
// Debug methods
|
|
public void ShowNetworkStatus()
|
|
{
|
|
ConvaiLogger.Info($"=== Speech Sender Status ===", ConvaiLogger.LogCategory.Character);
|
|
ConvaiLogger.Info($"Target: {targetIP}:{targetPort}", ConvaiLogger.LogCategory.Character);
|
|
ConvaiLogger.Info($"Initialized: {_isInitialized}", ConvaiLogger.LogCategory.Character);
|
|
ConvaiLogger.Info($"Sending Speech: {_isSendingSpeech}", ConvaiLogger.LogCategory.Character);
|
|
ConvaiLogger.Info($"Source NPC: {(sourceNPC?.characterName ?? "None")}", ConvaiLogger.LogCategory.Character);
|
|
ConvaiLogger.Info($"Packets sent: {_speechSequence}", ConvaiLogger.LogCategory.Character);
|
|
ConvaiLogger.Info($"Sent clips: {_sentClips.Count}", ConvaiLogger.LogCategory.Character);
|
|
}
|
|
}
|
|
}
|