final fixes for audio + installed parallelsync

This commit is contained in:
tom.hempel
2025-09-26 16:40:21 +02:00
parent 7d65d1b799
commit 40fd408908
10 changed files with 365 additions and 50 deletions

View File

@ -0,0 +1,8 @@
fileFormatVersion: 2
guid: bdf5ff7ba76a952439899cd468855cdb
folderAsset: yes
DefaultImporter:
externalObjects: {}
userData:
assetBundleName:
assetBundleVariant:

View File

@ -0,0 +1,8 @@
fileFormatVersion: 2
guid: 53e7ebc83181ffc4eb83d06a00cda31d
folderAsset: yes
DefaultImporter:
externalObjects: {}
userData:
assetBundleName:
assetBundleVariant:

View File

@ -0,0 +1,8 @@
fileFormatVersion: 2
guid: 065ee3e807568e04b8e6bfd81da61980
NativeFormatImporter:
externalObjects: {}
mainObjectFileID: 11400000
userData:
assetBundleName:
assetBundleVariant:

Binary file not shown.

View File

@ -8,11 +8,12 @@ using Convai.Scripts.Runtime.Core;
using Convai.Scripts.Runtime.LoggerSystem; using Convai.Scripts.Runtime.LoggerSystem;
using Convai.Scripts.Runtime.Utils; using Convai.Scripts.Runtime.Utils;
using UnityEngine; using UnityEngine;
using System.IO;
namespace Convai.Scripts.Runtime.Multiplayer namespace Convai.Scripts.Runtime.Multiplayer
{ {
/// <summary> /// <summary>
/// Simple UDP Audio Receiver - Simulates microphone input by triggering normal Convai flow /// Simple UDP Audio Receiver V2 - Simulates microphone input by triggering normal Convai flow
/// This approach is much simpler and more reliable than trying to replicate gRPC calls /// This approach is much simpler and more reliable than trying to replicate gRPC calls
/// </summary> /// </summary>
public class ConvaiSimpleUDPAudioReceiver : MonoBehaviour public class ConvaiSimpleUDPAudioReceiver : MonoBehaviour
@ -39,27 +40,44 @@ namespace Convai.Scripts.Runtime.Multiplayer
// Audio state tracking // Audio state tracking
private bool _isReceivingAudio = false; private bool _isReceivingAudio = false;
private int _expectedSequence = 0; private int _expectedSequence = 0;
private const uint AUDIO_MAGIC = 0xC0A1; // Audio packet magic private const uint MAGIC_NUMBER = 0xC0A1; // Simple magic number for packet validation
private const uint ACK_MAGIC = 0xC0A2; // Ack packet magic private const uint ACK_MAGIC = 0xC0A2; // ACK magic to confirm START control
// Timing for auto-stop // Timing for auto-stop
private float _lastPacketTime; private float _lastPacketTime;
private const float AUTO_STOP_DELAY = 1.0f; // Stop listening after 1 second of no packets private const float AUTO_STOP_DELAY = 1.0f; // Stop listening after 1 second of no packets
// Packet structure (matching ConvaiSimpleUDPAudioSender) // Packet structure (matching ConvaiSimpleUDPAudioSender)
private struct AudioPacketData private struct AudioPacketData
{ {
public uint magicNumber;
public int sequence; public int sequence;
public int sampleCount; public int sampleCount;
public int microphonePosition; public int microphonePosition;
public bool isEndSignal; public bool isEndSignal;
public bool isStartSignal; public bool isStartSignal;
public short[] audioSamples; public short[] audioSamples;
public long timestamp;
} }
[Header("Recording Storage")]
[SerializeField] private bool saveReceivedAudio = true;
[SerializeField] private int receivedSampleRate = 16000; // Should match sender
[SerializeField] private string outputFilePrefix = "received_audio";
private readonly object _audioBufferLock = new object();
private List<short> _receivedSamples = new List<short>(64 * 1024);
private Dictionary<int, short[]> _pendingPackets = new Dictionary<int, short[]>();
private int _nextSequenceToWrite = 0;
private DateTime _sessionStartTime;
private bool _saveInProgress = false;
private string _persistentDataPath;
private void Start() private void Start()
{ {
_cancellationTokenSource = new CancellationTokenSource(); _cancellationTokenSource = new CancellationTokenSource();
_persistentDataPath = Application.persistentDataPath;
// Apply global config if enabled // Apply global config if enabled
if (useGlobalNetworkConfig) if (useGlobalNetworkConfig)
{ {
@ -131,13 +149,8 @@ namespace Convai.Scripts.Runtime.Multiplayer
private void InitializeConvai() private void InitializeConvai()
{ {
// Prefer local ConvaiNPC on the same GameObject, then fall back to active NPC // Get target NPC
var localNPC = GetComponent<ConvaiNPC>(); if (useActiveNPC)
if (localNPC != null)
{
targetNPC = localNPC;
}
else if (useActiveNPC)
{ {
targetNPC = ConvaiNPCManager.Instance?.GetActiveConvaiNPC(); targetNPC = ConvaiNPCManager.Instance?.GetActiveConvaiNPC();
} }
@ -216,9 +229,9 @@ namespace Convai.Scripts.Runtime.Multiplayer
{ {
try try
{ {
var packetData = ParseSimpleAudioPacket(data, sender); var packetData = ParseSimpleAudioPacket(data);
if (packetData.HasValue) if (packetData.HasValue)
{ {
var packet = packetData.Value; var packet = packetData.Value;
_lastPacketTime = Time.time; _lastPacketTime = Time.time;
@ -231,23 +244,37 @@ namespace Convai.Scripts.Runtime.Multiplayer
ConvaiLogger.DebugLog($"Received audio packet {packet.sequence} with {packet.sampleCount} samples", ConvaiLogger.LogCategory.Character); ConvaiLogger.DebugLog($"Received audio packet {packet.sequence} with {packet.sampleCount} samples", ConvaiLogger.LogCategory.Character);
} }
if (packet.isEndSignal) // Handle START control: acknowledge and begin simulation
if (packet.isStartSignal)
{
SendStartAck(sender);
if (!_isReceivingAudio)
{
StartTalkingSimulation();
}
OnAudioReceiving?.Invoke(true);
return;
}
if (packet.isEndSignal)
{ {
StopTalkingSimulation(); StopTalkingSimulation();
OnAudioReceiving?.Invoke(false); OnAudioReceiving?.Invoke(false);
} }
else else
{ {
if (packet.isStartSignal)
{
// START packet acknowledged earlier
}
// If this is the first packet, start the talking simulation // If this is the first packet, start the talking simulation
if (packet.sequence == 0 && !_isReceivingAudio) if (packet.sequence == 0 && !_isReceivingAudio)
{ {
StartTalkingSimulation(); StartTalkingSimulation();
} }
// Buffer audio samples for saving
if (packet.audioSamples != null && packet.audioSamples.Length > 0)
{
BufferAudioPacket(packet.sequence, packet.audioSamples);
}
OnAudioReceiving?.Invoke(true); OnAudioReceiving?.Invoke(true);
} }
} }
@ -255,7 +282,8 @@ namespace Convai.Scripts.Runtime.Multiplayer
{ {
// Not our audio packet format, might be a test message // Not our audio packet format, might be a test message
string message = System.Text.Encoding.UTF8.GetString(data); string message = System.Text.Encoding.UTF8.GetString(data);
ConvaiLogger.Info($"Received test message from {sender}: {message}", ConvaiLogger.LogCategory.Character); if (enableDebugLogging)
ConvaiLogger.Info($"Received test message from {sender}: {message}", ConvaiLogger.LogCategory.Character);
} }
} }
catch (Exception ex) catch (Exception ex)
@ -283,6 +311,13 @@ namespace Convai.Scripts.Runtime.Multiplayer
_isReceivingAudio = true; _isReceivingAudio = true;
_expectedSequence = 0; _expectedSequence = 0;
_nextSequenceToWrite = 0;
_sessionStartTime = DateTime.UtcNow;
lock (_audioBufferLock)
{
_receivedSamples.Clear();
_pendingPackets.Clear();
}
// This is the KEY! Simulate a talk key press to trigger normal Convai flow // This is the KEY! Simulate a talk key press to trigger normal Convai flow
ConvaiInputManager.Instance.talkKeyInteract?.Invoke(true); ConvaiInputManager.Instance.talkKeyInteract?.Invoke(true);
@ -302,42 +337,47 @@ namespace Convai.Scripts.Runtime.Multiplayer
ConvaiInputManager.Instance.talkKeyInteract?.Invoke(false); ConvaiInputManager.Instance.talkKeyInteract?.Invoke(false);
ConvaiLogger.Info($"🎤 Stopped talking simulation for {targetNPC?.characterName ?? "NPC"} (remote player audio)", ConvaiLogger.LogCategory.Character); ConvaiLogger.Info($"🎤 Stopped talking simulation for {targetNPC?.characterName ?? "NPC"} (remote player audio)", ConvaiLogger.LogCategory.Character);
if (saveReceivedAudio)
{
TrySaveReceivedAudioAsync();
}
}); });
} }
private AudioPacketData? ParseSimpleAudioPacket(byte[] data, IPEndPoint sender) private AudioPacketData? ParseSimpleAudioPacket(byte[] data)
{ {
// Sender uses a 17-byte header (no timestamp/padding). We also support older 24+ byte format gracefully. if (data.Length < 17) // Minimum header size to match sender
if (data.Length < 17)
return null; return null;
try try
{ {
int offset = 0; int offset = 0;
// Read magic number
uint magic = BitConverter.ToUInt32(data, offset); uint magic = BitConverter.ToUInt32(data, offset);
offset += 4; offset += 4;
if (magic != AUDIO_MAGIC)
{
// Might be a test message or something else
return null;
}
if (magic != MAGIC_NUMBER)
return null;
// Read header (matching sender's 17-byte format)
int sequence = BitConverter.ToInt32(data, offset); int sequence = BitConverter.ToInt32(data, offset);
offset += 4; offset += 4;
int sampleCount = BitConverter.ToInt32(data, offset); int sampleCount = BitConverter.ToInt32(data, offset);
offset += 4; offset += 4;
int microphonePosition = BitConverter.ToInt32(data, offset); int microphonePosition = BitConverter.ToInt32(data, offset);
offset += 4; offset += 4;
byte flag = data[offset];
byte flags = data[offset];
offset += 1; offset += 1;
bool isEndSignal = (flag == 1); bool isEndSignal = (flags == 1);
bool isStartSignal = (flag == 2); bool isStartSignal = (flags == 2);
// Send ACK immediately (for START and audio packets)
SendAck(sender, sequence);
// Read audio data
short[] audioSamples = null; short[] audioSamples = null;
if (!isEndSignal && !isStartSignal && sampleCount > 0) if (!isEndSignal && !isStartSignal && sampleCount > 0)
{ {
@ -351,12 +391,14 @@ namespace Convai.Scripts.Runtime.Multiplayer
return new AudioPacketData return new AudioPacketData
{ {
magicNumber = magic,
sequence = sequence, sequence = sequence,
sampleCount = sampleCount, sampleCount = sampleCount,
microphonePosition = microphonePosition, microphonePosition = microphonePosition,
isEndSignal = isEndSignal, isEndSignal = isEndSignal,
isStartSignal = isStartSignal, isStartSignal = isStartSignal,
audioSamples = audioSamples audioSamples = audioSamples,
timestamp = 0 // Not provided in sender format
}; };
} }
catch (Exception ex) catch (Exception ex)
@ -366,21 +408,137 @@ namespace Convai.Scripts.Runtime.Multiplayer
} }
} }
private void SendAck(IPEndPoint recipient, int sequence) private void SendStartAck(IPEndPoint sender)
{ {
try try
{ {
using (var client = new UdpClient()) if (_udpListener == null || sender == null)
{ return;
byte[] ack = new byte[8];
Buffer.BlockCopy(BitConverter.GetBytes(ACK_MAGIC), 0, ack, 0, 4); byte[] ack = new byte[8];
Buffer.BlockCopy(BitConverter.GetBytes(sequence), 0, ack, 4, 4); BitConverter.GetBytes(ACK_MAGIC).CopyTo(ack, 0);
client.Send(ack, ack.Length, recipient); BitConverter.GetBytes(-1).CopyTo(ack, 4);
} _udpListener.SendAsync(ack, ack.Length, sender);
if (enableDebugLogging)
ConvaiLogger.DebugLog($"Sent START ACK to {sender}", ConvaiLogger.LogCategory.Character);
} }
catch (Exception ex) catch (Exception ex)
{ {
ConvaiLogger.Warn($"Failed to send ACK: {ex.Message}", ConvaiLogger.LogCategory.Character); ConvaiLogger.Warn($"Failed to send START ACK: {ex.Message}", ConvaiLogger.LogCategory.Character);
}
}
private void BufferAudioPacket(int sequence, short[] samples)
{
if (samples == null || samples.Length == 0)
return;
lock (_audioBufferLock)
{
if (sequence < _nextSequenceToWrite)
{
return; // old/duplicate packet
}
if (sequence == _nextSequenceToWrite)
{
_receivedSamples.AddRange(samples);
_nextSequenceToWrite++;
// Flush any contiguous pending packets
while (_pendingPackets.TryGetValue(_nextSequenceToWrite, out var nextSamples))
{
_receivedSamples.AddRange(nextSamples);
_pendingPackets.Remove(_nextSequenceToWrite);
_nextSequenceToWrite++;
}
}
else
{
// Store for later when gap is filled
_pendingPackets[sequence] = samples;
}
}
}
private void TrySaveReceivedAudioAsync()
{
if (_saveInProgress)
return;
short[] dataToSave;
DateTime sessionStart;
lock (_audioBufferLock)
{
if (_receivedSamples == null || _receivedSamples.Count == 0)
{
if (enableDebugLogging)
ConvaiLogger.Info("No received audio to save.", ConvaiLogger.LogCategory.Character);
return;
}
dataToSave = _receivedSamples.ToArray();
_receivedSamples.Clear();
_pendingPackets.Clear();
sessionStart = _sessionStartTime;
}
_saveInProgress = true;
Task.Run(() =>
{
try
{
string timestamp = sessionStart.ToLocalTime().ToString("yyyyMMdd_HHmmss");
string fileName = $"{outputFilePrefix}_{timestamp}.wav";
string dir = _persistentDataPath;
string path = Path.Combine(dir, fileName);
WriteWav(path, dataToSave, receivedSampleRate, 1);
ConvaiLogger.Info($"Saved received audio to: {path}", ConvaiLogger.LogCategory.Character);
}
catch (Exception ex)
{
ConvaiLogger.Error($"Failed to save received audio: {ex.Message}", ConvaiLogger.LogCategory.Character);
}
finally
{
_saveInProgress = false;
}
});
}
private void WriteWav(string path, short[] samples, int sampleRate, int channels)
{
using (var fs = new FileStream(path, FileMode.Create, FileAccess.Write, FileShare.None))
using (var writer = new BinaryWriter(fs))
{
int bitsPerSample = 16;
int byteRate = sampleRate * channels * (bitsPerSample / 8);
int blockAlign = channels * (bitsPerSample / 8);
int dataSize = samples.Length * (bitsPerSample / 8);
int fileSize = 44 - 8 + dataSize;
// RIFF header
writer.Write(System.Text.Encoding.ASCII.GetBytes("RIFF"));
writer.Write(fileSize);
writer.Write(System.Text.Encoding.ASCII.GetBytes("WAVE"));
// fmt chunk
writer.Write(System.Text.Encoding.ASCII.GetBytes("fmt "));
writer.Write(16); // Subchunk1Size for PCM
writer.Write((short)1); // AudioFormat = PCM
writer.Write((short)channels); // NumChannels
writer.Write(sampleRate); // SampleRate
writer.Write(byteRate); // ByteRate
writer.Write((short)blockAlign); // BlockAlign
writer.Write((short)bitsPerSample); // BitsPerSample
// data chunk
writer.Write(System.Text.Encoding.ASCII.GetBytes("data"));
writer.Write(dataSize);
for (int i = 0; i < samples.Length; i++)
{
writer.Write(samples[i]);
}
} }
} }

View File

@ -9,6 +9,7 @@ using UnityEngine;
using UnityEngine.XR; using UnityEngine.XR;
using UnityEngine.InputSystem; using UnityEngine.InputSystem;
using UnityEngine.InputSystem.XR; using UnityEngine.InputSystem.XR;
using System.IO;
namespace Convai.Scripts.Runtime.Multiplayer namespace Convai.Scripts.Runtime.Multiplayer
{ {
@ -70,6 +71,16 @@ namespace Convai.Scripts.Runtime.Multiplayer
public event Action<bool> OnRecordingStateChanged; public event Action<bool> OnRecordingStateChanged;
[Header("Recording Storage")]
[SerializeField] private bool saveLocalAudio = true;
[SerializeField] private int localSampleRate = 16000;
[SerializeField] private string localFilePrefix = "sender_audio";
private readonly object _localAudioLock = new object();
private readonly System.Collections.Generic.List<short> _localSamples = new System.Collections.Generic.List<short>(128 * 1024);
private bool _localSaveInProgress = false;
private DateTime _localSessionStart;
private string _persistentDataPath;
private void Start() private void Start()
{ {
// Apply global config if enabled // Apply global config if enabled
@ -84,6 +95,7 @@ namespace Convai.Scripts.Runtime.Multiplayer
} }
InitializeNetwork(); InitializeNetwork();
InitializeAudio(); InitializeAudio();
_persistentDataPath = Application.persistentDataPath;
_cancellationTokenSource = new CancellationTokenSource(); _cancellationTokenSource = new CancellationTokenSource();
_ackCancellationTokenSource = new CancellationTokenSource(); _ackCancellationTokenSource = new CancellationTokenSource();
@ -423,6 +435,11 @@ namespace Convai.Scripts.Runtime.Multiplayer
_lastMicrophonePosition = 0; _lastMicrophonePosition = 0;
_packetSequence = 0; _packetSequence = 0;
_startAckReceived = false; _startAckReceived = false;
_localSessionStart = DateTime.UtcNow;
lock (_localAudioLock)
{
_localSamples.Clear();
}
ConvaiLogger.Info("Started recording for UDP transmission (Simple)", ConvaiLogger.LogCategory.Character); ConvaiLogger.Info("Started recording for UDP transmission (Simple)", ConvaiLogger.LogCategory.Character);
OnRecordingStateChanged?.Invoke(true); OnRecordingStateChanged?.Invoke(true);
@ -454,6 +471,11 @@ namespace Convai.Scripts.Runtime.Multiplayer
// Send end-of-recording signal // Send end-of-recording signal
SendEndOfRecordingSignal(); SendEndOfRecordingSignal();
if (saveLocalAudio)
{
TrySaveLocalAudioAsync();
}
} }
catch (Exception ex) catch (Exception ex)
{ {
@ -542,6 +564,12 @@ namespace Convai.Scripts.Runtime.Multiplayer
// Create a simple packet structure // Create a simple packet structure
byte[] packet = CreateSimpleAudioPacket(audioData, processedSamples, currentChunkSamples); byte[] packet = CreateSimpleAudioPacket(audioData, processedSamples, currentChunkSamples);
// Buffer locally for saving
if (saveLocalAudio)
{
AppendLocalAudio(audioData, processedSamples, currentChunkSamples);
}
// Send the packet // Send the packet
await _udpClient.SendAsync(packet, packet.Length, _targetEndPoint); await _udpClient.SendAsync(packet, packet.Length, _targetEndPoint);
@ -649,6 +677,100 @@ namespace Convai.Scripts.Runtime.Multiplayer
} }
} }
private void AppendLocalAudio(float[] source, int startIndex, int count)
{
if (source == null || count <= 0)
return;
lock (_localAudioLock)
{
for (int i = 0; i < count; i++)
{
float sample = source[startIndex + i];
short shortSample = (short)(Mathf.Clamp(sample, -1f, 1f) * short.MaxValue);
_localSamples.Add(shortSample);
}
}
}
private void TrySaveLocalAudioAsync()
{
if (_localSaveInProgress)
return;
short[] dataToSave;
DateTime sessionStart;
lock (_localAudioLock)
{
if (_localSamples.Count == 0)
{
if (enableDebugLogging)
ConvaiLogger.Info("No local audio to save.", ConvaiLogger.LogCategory.Character);
return;
}
dataToSave = _localSamples.ToArray();
_localSamples.Clear();
sessionStart = _localSessionStart;
}
_localSaveInProgress = true;
Task.Run(async () =>
{
try
{
// Small delay to allow any final chunks to enqueue
await Task.Delay(100);
string timestamp = sessionStart.ToLocalTime().ToString("yyyyMMdd_HHmmss");
string fileName = $"{localFilePrefix}_{timestamp}.wav";
string dir = _persistentDataPath;
string path = Path.Combine(dir, fileName);
WriteWav(path, dataToSave, localSampleRate, 1);
ConvaiLogger.Info($"Saved local audio to: {path}", ConvaiLogger.LogCategory.Character);
}
catch (Exception ex)
{
ConvaiLogger.Error($"Failed to save local audio: {ex.Message}", ConvaiLogger.LogCategory.Character);
}
finally
{
_localSaveInProgress = false;
}
});
}
private void WriteWav(string path, short[] samples, int sampleRate, int channels)
{
using (var fs = new FileStream(path, FileMode.Create, FileAccess.Write, FileShare.None))
using (var writer = new BinaryWriter(fs))
{
int bitsPerSample = 16;
int byteRate = sampleRate * channels * (bitsPerSample / 8);
int blockAlign = channels * (bitsPerSample / 8);
int dataSize = samples.Length * (bitsPerSample / 8);
int fileSize = 44 - 8 + dataSize;
writer.Write(System.Text.Encoding.ASCII.GetBytes("RIFF"));
writer.Write(fileSize);
writer.Write(System.Text.Encoding.ASCII.GetBytes("WAVE"));
writer.Write(System.Text.Encoding.ASCII.GetBytes("fmt "));
writer.Write(16);
writer.Write((short)1);
writer.Write((short)channels);
writer.Write(sampleRate);
writer.Write(byteRate);
writer.Write((short)blockAlign);
writer.Write((short)bitsPerSample);
writer.Write(System.Text.Encoding.ASCII.GetBytes("data"));
writer.Write(dataSize);
for (int i = 0; i < samples.Length; i++)
{
writer.Write(samples[i]);
}
}
}
private async Task SendStartOfRecordingSignalAndAwaitAck() private async Task SendStartOfRecordingSignalAndAwaitAck()
{ {
try try

Binary file not shown.

View File

@ -34,6 +34,7 @@
"com.unity.xr.interaction.toolkit": "2.6.4", "com.unity.xr.interaction.toolkit": "2.6.4",
"com.unity.xr.management": "4.5.1", "com.unity.xr.management": "4.5.1",
"com.unity.xr.openxr": "1.12.1", "com.unity.xr.openxr": "1.12.1",
"com.veriorpies.parrelsync": "https://github.com/VeriorPies/ParrelSync.git?path=/ParrelSync",
"com.unity.modules.ai": "1.0.0", "com.unity.modules.ai": "1.0.0",
"com.unity.modules.androidjni": "1.0.0", "com.unity.modules.androidjni": "1.0.0",
"com.unity.modules.animation": "1.0.0", "com.unity.modules.animation": "1.0.0",

View File

@ -398,6 +398,13 @@
}, },
"url": "https://packages.unity.com" "url": "https://packages.unity.com"
}, },
"com.veriorpies.parrelsync": {
"version": "https://github.com/VeriorPies/ParrelSync.git?path=/ParrelSync",
"depth": 0,
"source": "git",
"dependencies": {},
"hash": "610157ad762084380380148ba8ce14e266a6da97"
},
"com.unity.modules.ai": { "com.unity.modules.ai": {
"version": "1.0.0", "version": "1.0.0",
"depth": 0, "depth": 0,