final fixes for audio + installed parallelsync
This commit is contained in:
@ -8,11 +8,12 @@ using Convai.Scripts.Runtime.Core;
|
||||
using Convai.Scripts.Runtime.LoggerSystem;
|
||||
using Convai.Scripts.Runtime.Utils;
|
||||
using UnityEngine;
|
||||
using System.IO;
|
||||
|
||||
namespace Convai.Scripts.Runtime.Multiplayer
|
||||
{
|
||||
/// <summary>
|
||||
/// Simple UDP Audio Receiver - Simulates microphone input by triggering normal Convai flow
|
||||
/// Simple UDP Audio Receiver V2 - Simulates microphone input by triggering normal Convai flow
|
||||
/// This approach is much simpler and more reliable than trying to replicate gRPC calls
|
||||
/// </summary>
|
||||
public class ConvaiSimpleUDPAudioReceiver : MonoBehaviour
|
||||
@ -39,27 +40,44 @@ namespace Convai.Scripts.Runtime.Multiplayer
|
||||
// Audio state tracking
|
||||
private bool _isReceivingAudio = false;
|
||||
private int _expectedSequence = 0;
|
||||
private const uint AUDIO_MAGIC = 0xC0A1; // Audio packet magic
|
||||
private const uint ACK_MAGIC = 0xC0A2; // Ack packet magic
|
||||
private const uint MAGIC_NUMBER = 0xC0A1; // Simple magic number for packet validation
|
||||
private const uint ACK_MAGIC = 0xC0A2; // ACK magic to confirm START control
|
||||
|
||||
// Timing for auto-stop
|
||||
private float _lastPacketTime;
|
||||
private const float AUTO_STOP_DELAY = 1.0f; // Stop listening after 1 second of no packets
|
||||
|
||||
|
||||
// Packet structure (matching ConvaiSimpleUDPAudioSender)
|
||||
private struct AudioPacketData
|
||||
{
|
||||
public uint magicNumber;
|
||||
public int sequence;
|
||||
public int sampleCount;
|
||||
public int microphonePosition;
|
||||
public bool isEndSignal;
|
||||
public bool isStartSignal;
|
||||
public short[] audioSamples;
|
||||
public long timestamp;
|
||||
}
|
||||
|
||||
[Header("Recording Storage")]
|
||||
[SerializeField] private bool saveReceivedAudio = true;
|
||||
[SerializeField] private int receivedSampleRate = 16000; // Should match sender
|
||||
[SerializeField] private string outputFilePrefix = "received_audio";
|
||||
|
||||
private readonly object _audioBufferLock = new object();
|
||||
private List<short> _receivedSamples = new List<short>(64 * 1024);
|
||||
private Dictionary<int, short[]> _pendingPackets = new Dictionary<int, short[]>();
|
||||
private int _nextSequenceToWrite = 0;
|
||||
private DateTime _sessionStartTime;
|
||||
private bool _saveInProgress = false;
|
||||
private string _persistentDataPath;
|
||||
|
||||
private void Start()
|
||||
{
|
||||
_cancellationTokenSource = new CancellationTokenSource();
|
||||
_persistentDataPath = Application.persistentDataPath;
|
||||
// Apply global config if enabled
|
||||
if (useGlobalNetworkConfig)
|
||||
{
|
||||
@ -131,13 +149,8 @@ namespace Convai.Scripts.Runtime.Multiplayer
|
||||
|
||||
private void InitializeConvai()
|
||||
{
|
||||
// Prefer local ConvaiNPC on the same GameObject, then fall back to active NPC
|
||||
var localNPC = GetComponent<ConvaiNPC>();
|
||||
if (localNPC != null)
|
||||
{
|
||||
targetNPC = localNPC;
|
||||
}
|
||||
else if (useActiveNPC)
|
||||
// Get target NPC
|
||||
if (useActiveNPC)
|
||||
{
|
||||
targetNPC = ConvaiNPCManager.Instance?.GetActiveConvaiNPC();
|
||||
}
|
||||
@ -216,9 +229,9 @@ namespace Convai.Scripts.Runtime.Multiplayer
|
||||
{
|
||||
try
|
||||
{
|
||||
var packetData = ParseSimpleAudioPacket(data, sender);
|
||||
var packetData = ParseSimpleAudioPacket(data);
|
||||
|
||||
if (packetData.HasValue)
|
||||
if (packetData.HasValue)
|
||||
{
|
||||
var packet = packetData.Value;
|
||||
_lastPacketTime = Time.time;
|
||||
@ -231,23 +244,37 @@ namespace Convai.Scripts.Runtime.Multiplayer
|
||||
ConvaiLogger.DebugLog($"Received audio packet {packet.sequence} with {packet.sampleCount} samples", ConvaiLogger.LogCategory.Character);
|
||||
}
|
||||
|
||||
if (packet.isEndSignal)
|
||||
// Handle START control: acknowledge and begin simulation
|
||||
if (packet.isStartSignal)
|
||||
{
|
||||
SendStartAck(sender);
|
||||
if (!_isReceivingAudio)
|
||||
{
|
||||
StartTalkingSimulation();
|
||||
}
|
||||
OnAudioReceiving?.Invoke(true);
|
||||
return;
|
||||
}
|
||||
|
||||
if (packet.isEndSignal)
|
||||
{
|
||||
StopTalkingSimulation();
|
||||
OnAudioReceiving?.Invoke(false);
|
||||
}
|
||||
else
|
||||
{
|
||||
if (packet.isStartSignal)
|
||||
{
|
||||
// START packet acknowledged earlier
|
||||
}
|
||||
// If this is the first packet, start the talking simulation
|
||||
if (packet.sequence == 0 && !_isReceivingAudio)
|
||||
{
|
||||
StartTalkingSimulation();
|
||||
}
|
||||
|
||||
// Buffer audio samples for saving
|
||||
if (packet.audioSamples != null && packet.audioSamples.Length > 0)
|
||||
{
|
||||
BufferAudioPacket(packet.sequence, packet.audioSamples);
|
||||
}
|
||||
|
||||
OnAudioReceiving?.Invoke(true);
|
||||
}
|
||||
}
|
||||
@ -255,7 +282,8 @@ namespace Convai.Scripts.Runtime.Multiplayer
|
||||
{
|
||||
// Not our audio packet format, might be a test message
|
||||
string message = System.Text.Encoding.UTF8.GetString(data);
|
||||
ConvaiLogger.Info($"Received test message from {sender}: {message}", ConvaiLogger.LogCategory.Character);
|
||||
if (enableDebugLogging)
|
||||
ConvaiLogger.Info($"Received test message from {sender}: {message}", ConvaiLogger.LogCategory.Character);
|
||||
}
|
||||
}
|
||||
catch (Exception ex)
|
||||
@ -283,6 +311,13 @@ namespace Convai.Scripts.Runtime.Multiplayer
|
||||
|
||||
_isReceivingAudio = true;
|
||||
_expectedSequence = 0;
|
||||
_nextSequenceToWrite = 0;
|
||||
_sessionStartTime = DateTime.UtcNow;
|
||||
lock (_audioBufferLock)
|
||||
{
|
||||
_receivedSamples.Clear();
|
||||
_pendingPackets.Clear();
|
||||
}
|
||||
|
||||
// This is the KEY! Simulate a talk key press to trigger normal Convai flow
|
||||
ConvaiInputManager.Instance.talkKeyInteract?.Invoke(true);
|
||||
@ -302,42 +337,47 @@ namespace Convai.Scripts.Runtime.Multiplayer
|
||||
ConvaiInputManager.Instance.talkKeyInteract?.Invoke(false);
|
||||
|
||||
ConvaiLogger.Info($"🎤 Stopped talking simulation for {targetNPC?.characterName ?? "NPC"} (remote player audio)", ConvaiLogger.LogCategory.Character);
|
||||
|
||||
if (saveReceivedAudio)
|
||||
{
|
||||
TrySaveReceivedAudioAsync();
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
private AudioPacketData? ParseSimpleAudioPacket(byte[] data, IPEndPoint sender)
|
||||
private AudioPacketData? ParseSimpleAudioPacket(byte[] data)
|
||||
{
|
||||
// Sender uses a 17-byte header (no timestamp/padding). We also support older 24+ byte format gracefully.
|
||||
if (data.Length < 17)
|
||||
if (data.Length < 17) // Minimum header size to match sender
|
||||
return null;
|
||||
|
||||
try
|
||||
{
|
||||
int offset = 0;
|
||||
|
||||
|
||||
// Read magic number
|
||||
uint magic = BitConverter.ToUInt32(data, offset);
|
||||
offset += 4;
|
||||
if (magic != AUDIO_MAGIC)
|
||||
{
|
||||
// Might be a test message or something else
|
||||
|
||||
if (magic != MAGIC_NUMBER)
|
||||
return null;
|
||||
}
|
||||
|
||||
|
||||
// Read header (matching sender's 17-byte format)
|
||||
int sequence = BitConverter.ToInt32(data, offset);
|
||||
offset += 4;
|
||||
|
||||
int sampleCount = BitConverter.ToInt32(data, offset);
|
||||
offset += 4;
|
||||
|
||||
int microphonePosition = BitConverter.ToInt32(data, offset);
|
||||
offset += 4;
|
||||
byte flag = data[offset];
|
||||
|
||||
byte flags = data[offset];
|
||||
offset += 1;
|
||||
|
||||
bool isEndSignal = (flag == 1);
|
||||
bool isStartSignal = (flag == 2);
|
||||
|
||||
// Send ACK immediately (for START and audio packets)
|
||||
SendAck(sender, sequence);
|
||||
|
||||
|
||||
bool isEndSignal = (flags == 1);
|
||||
bool isStartSignal = (flags == 2);
|
||||
|
||||
// Read audio data
|
||||
short[] audioSamples = null;
|
||||
if (!isEndSignal && !isStartSignal && sampleCount > 0)
|
||||
{
|
||||
@ -348,15 +388,17 @@ namespace Convai.Scripts.Runtime.Multiplayer
|
||||
Buffer.BlockCopy(data, offset, audioSamples, 0, audioDataSize);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
return new AudioPacketData
|
||||
{
|
||||
magicNumber = magic,
|
||||
sequence = sequence,
|
||||
sampleCount = sampleCount,
|
||||
microphonePosition = microphonePosition,
|
||||
isEndSignal = isEndSignal,
|
||||
isStartSignal = isStartSignal,
|
||||
audioSamples = audioSamples
|
||||
audioSamples = audioSamples,
|
||||
timestamp = 0 // Not provided in sender format
|
||||
};
|
||||
}
|
||||
catch (Exception ex)
|
||||
@ -366,21 +408,137 @@ namespace Convai.Scripts.Runtime.Multiplayer
|
||||
}
|
||||
}
|
||||
|
||||
private void SendAck(IPEndPoint recipient, int sequence)
|
||||
private void SendStartAck(IPEndPoint sender)
|
||||
{
|
||||
try
|
||||
{
|
||||
using (var client = new UdpClient())
|
||||
{
|
||||
byte[] ack = new byte[8];
|
||||
Buffer.BlockCopy(BitConverter.GetBytes(ACK_MAGIC), 0, ack, 0, 4);
|
||||
Buffer.BlockCopy(BitConverter.GetBytes(sequence), 0, ack, 4, 4);
|
||||
client.Send(ack, ack.Length, recipient);
|
||||
}
|
||||
if (_udpListener == null || sender == null)
|
||||
return;
|
||||
|
||||
byte[] ack = new byte[8];
|
||||
BitConverter.GetBytes(ACK_MAGIC).CopyTo(ack, 0);
|
||||
BitConverter.GetBytes(-1).CopyTo(ack, 4);
|
||||
_udpListener.SendAsync(ack, ack.Length, sender);
|
||||
|
||||
if (enableDebugLogging)
|
||||
ConvaiLogger.DebugLog($"Sent START ACK to {sender}", ConvaiLogger.LogCategory.Character);
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
ConvaiLogger.Warn($"Failed to send ACK: {ex.Message}", ConvaiLogger.LogCategory.Character);
|
||||
ConvaiLogger.Warn($"Failed to send START ACK: {ex.Message}", ConvaiLogger.LogCategory.Character);
|
||||
}
|
||||
}
|
||||
|
||||
private void BufferAudioPacket(int sequence, short[] samples)
|
||||
{
|
||||
if (samples == null || samples.Length == 0)
|
||||
return;
|
||||
|
||||
lock (_audioBufferLock)
|
||||
{
|
||||
if (sequence < _nextSequenceToWrite)
|
||||
{
|
||||
return; // old/duplicate packet
|
||||
}
|
||||
|
||||
if (sequence == _nextSequenceToWrite)
|
||||
{
|
||||
_receivedSamples.AddRange(samples);
|
||||
_nextSequenceToWrite++;
|
||||
|
||||
// Flush any contiguous pending packets
|
||||
while (_pendingPackets.TryGetValue(_nextSequenceToWrite, out var nextSamples))
|
||||
{
|
||||
_receivedSamples.AddRange(nextSamples);
|
||||
_pendingPackets.Remove(_nextSequenceToWrite);
|
||||
_nextSequenceToWrite++;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// Store for later when gap is filled
|
||||
_pendingPackets[sequence] = samples;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private void TrySaveReceivedAudioAsync()
|
||||
{
|
||||
if (_saveInProgress)
|
||||
return;
|
||||
|
||||
short[] dataToSave;
|
||||
DateTime sessionStart;
|
||||
lock (_audioBufferLock)
|
||||
{
|
||||
if (_receivedSamples == null || _receivedSamples.Count == 0)
|
||||
{
|
||||
if (enableDebugLogging)
|
||||
ConvaiLogger.Info("No received audio to save.", ConvaiLogger.LogCategory.Character);
|
||||
return;
|
||||
}
|
||||
dataToSave = _receivedSamples.ToArray();
|
||||
_receivedSamples.Clear();
|
||||
_pendingPackets.Clear();
|
||||
sessionStart = _sessionStartTime;
|
||||
}
|
||||
|
||||
_saveInProgress = true;
|
||||
Task.Run(() =>
|
||||
{
|
||||
try
|
||||
{
|
||||
string timestamp = sessionStart.ToLocalTime().ToString("yyyyMMdd_HHmmss");
|
||||
string fileName = $"{outputFilePrefix}_{timestamp}.wav";
|
||||
string dir = _persistentDataPath;
|
||||
string path = Path.Combine(dir, fileName);
|
||||
WriteWav(path, dataToSave, receivedSampleRate, 1);
|
||||
ConvaiLogger.Info($"Saved received audio to: {path}", ConvaiLogger.LogCategory.Character);
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
ConvaiLogger.Error($"Failed to save received audio: {ex.Message}", ConvaiLogger.LogCategory.Character);
|
||||
}
|
||||
finally
|
||||
{
|
||||
_saveInProgress = false;
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
private void WriteWav(string path, short[] samples, int sampleRate, int channels)
|
||||
{
|
||||
using (var fs = new FileStream(path, FileMode.Create, FileAccess.Write, FileShare.None))
|
||||
using (var writer = new BinaryWriter(fs))
|
||||
{
|
||||
int bitsPerSample = 16;
|
||||
int byteRate = sampleRate * channels * (bitsPerSample / 8);
|
||||
int blockAlign = channels * (bitsPerSample / 8);
|
||||
int dataSize = samples.Length * (bitsPerSample / 8);
|
||||
int fileSize = 44 - 8 + dataSize;
|
||||
|
||||
// RIFF header
|
||||
writer.Write(System.Text.Encoding.ASCII.GetBytes("RIFF"));
|
||||
writer.Write(fileSize);
|
||||
writer.Write(System.Text.Encoding.ASCII.GetBytes("WAVE"));
|
||||
|
||||
// fmt chunk
|
||||
writer.Write(System.Text.Encoding.ASCII.GetBytes("fmt "));
|
||||
writer.Write(16); // Subchunk1Size for PCM
|
||||
writer.Write((short)1); // AudioFormat = PCM
|
||||
writer.Write((short)channels); // NumChannels
|
||||
writer.Write(sampleRate); // SampleRate
|
||||
writer.Write(byteRate); // ByteRate
|
||||
writer.Write((short)blockAlign); // BlockAlign
|
||||
writer.Write((short)bitsPerSample); // BitsPerSample
|
||||
|
||||
// data chunk
|
||||
writer.Write(System.Text.Encoding.ASCII.GetBytes("data"));
|
||||
writer.Write(dataSize);
|
||||
for (int i = 0; i < samples.Length; i++)
|
||||
{
|
||||
writer.Write(samples[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user