further work on bug fixing

This commit is contained in:
tom.hempel
2025-09-26 10:13:33 +02:00
parent 7aeb173789
commit 7d65d1b799
6 changed files with 1492 additions and 43 deletions

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,14 @@
fileFormatVersion: 2
guid: 7bb00533d62df7744bfa20da5215c4bf
ScriptedImporter:
internalIDToNameTable: []
externalObjects: {}
serializedVersion: 2
userData:
assetBundleName:
assetBundleVariant:
script: {fileID: 11500000, guid: 8404be70184654265930450def6a9037, type: 3}
generateWrapperCode: 0
wrapperCodePath:
wrapperClassName:
wrapperCodeNamespace:

View File

@ -39,7 +39,8 @@ namespace Convai.Scripts.Runtime.Multiplayer
// Audio state tracking
private bool _isReceivingAudio = false;
private int _expectedSequence = 0;
private const uint MAGIC_NUMBER = 0xC0A1; // Simple magic number for packet validation
private const uint AUDIO_MAGIC = 0xC0A1; // Audio packet magic
private const uint ACK_MAGIC = 0xC0A2; // Ack packet magic
// Timing for auto-stop
private float _lastPacketTime;
@ -48,13 +49,12 @@ namespace Convai.Scripts.Runtime.Multiplayer
// Packet structure (matching ConvaiSimpleUDPAudioSender)
private struct AudioPacketData
{
public uint magicNumber;
public int sequence;
public int sampleCount;
public int microphonePosition;
public bool isEndSignal;
public bool isStartSignal;
public short[] audioSamples;
public long timestamp;
}
private void Start()
@ -216,9 +216,9 @@ namespace Convai.Scripts.Runtime.Multiplayer
{
try
{
var packetData = ParseSimpleAudioPacket(data);
var packetData = ParseSimpleAudioPacket(data, sender);
if (packetData.HasValue)
if (packetData.HasValue)
{
var packet = packetData.Value;
_lastPacketTime = Time.time;
@ -231,13 +231,17 @@ namespace Convai.Scripts.Runtime.Multiplayer
ConvaiLogger.DebugLog($"Received audio packet {packet.sequence} with {packet.sampleCount} samples", ConvaiLogger.LogCategory.Character);
}
if (packet.isEndSignal)
if (packet.isEndSignal)
{
StopTalkingSimulation();
OnAudioReceiving?.Invoke(false);
}
else
{
if (packet.isStartSignal)
{
// START packet acknowledged earlier
}
// If this is the first packet, start the talking simulation
if (packet.sequence == 0 && !_isReceivingAudio)
{
@ -251,8 +255,7 @@ namespace Convai.Scripts.Runtime.Multiplayer
{
// Not our audio packet format, might be a test message
string message = System.Text.Encoding.UTF8.GetString(data);
if (enableDebugLogging)
ConvaiLogger.Info($"Received test message from {sender}: {message}", ConvaiLogger.LogCategory.Character);
ConvaiLogger.Info($"Received test message from {sender}: {message}", ConvaiLogger.LogCategory.Character);
}
}
catch (Exception ex)
@ -302,44 +305,41 @@ namespace Convai.Scripts.Runtime.Multiplayer
});
}
private AudioPacketData? ParseSimpleAudioPacket(byte[] data)
private AudioPacketData? ParseSimpleAudioPacket(byte[] data, IPEndPoint sender)
{
if (data.Length < 24) // Minimum header size
// Sender uses a 17-byte header (no timestamp/padding). We also support older 24+ byte format gracefully.
if (data.Length < 17)
return null;
try
{
int offset = 0;
// Read magic number
uint magic = BitConverter.ToUInt32(data, offset);
offset += 4;
if (magic != MAGIC_NUMBER)
if (magic != AUDIO_MAGIC)
{
// Might be a test message or something else
return null;
// Read header
}
int sequence = BitConverter.ToInt32(data, offset);
offset += 4;
int sampleCount = BitConverter.ToInt32(data, offset);
offset += 4;
int microphonePosition = BitConverter.ToInt32(data, offset);
offset += 4;
bool isEndSignal = BitConverter.ToBoolean(data, offset);
byte flag = data[offset];
offset += 1;
// Skip padding
offset += 3;
long timestamp = BitConverter.ToInt64(data, offset);
offset += 8;
// Read audio data
bool isEndSignal = (flag == 1);
bool isStartSignal = (flag == 2);
// Send ACK immediately (for START and audio packets)
SendAck(sender, sequence);
short[] audioSamples = null;
if (!isEndSignal && sampleCount > 0)
if (!isEndSignal && !isStartSignal && sampleCount > 0)
{
int audioDataSize = sampleCount * sizeof(short);
if (data.Length >= offset + audioDataSize)
@ -348,16 +348,15 @@ namespace Convai.Scripts.Runtime.Multiplayer
Buffer.BlockCopy(data, offset, audioSamples, 0, audioDataSize);
}
}
return new AudioPacketData
{
magicNumber = magic,
sequence = sequence,
sampleCount = sampleCount,
microphonePosition = microphonePosition,
isEndSignal = isEndSignal,
audioSamples = audioSamples,
timestamp = timestamp
isStartSignal = isStartSignal,
audioSamples = audioSamples
};
}
catch (Exception ex)
@ -366,6 +365,24 @@ namespace Convai.Scripts.Runtime.Multiplayer
return null;
}
}
private void SendAck(IPEndPoint recipient, int sequence)
{
try
{
using (var client = new UdpClient())
{
byte[] ack = new byte[8];
Buffer.BlockCopy(BitConverter.GetBytes(ACK_MAGIC), 0, ack, 0, 4);
Buffer.BlockCopy(BitConverter.GetBytes(sequence), 0, ack, 4, 4);
client.Send(ack, ack.Length, recipient);
}
}
catch (Exception ex)
{
ConvaiLogger.Warn($"Failed to send ACK: {ex.Message}", ConvaiLogger.LogCategory.Character);
}
}
// Event handler for when NPC becomes active
private void HandleActiveNPCChanged(ConvaiNPC newActiveNPC)

View File

@ -6,6 +6,9 @@ using System.Threading.Tasks;
using Convai.Scripts.Runtime.LoggerSystem;
using Convai.Scripts.Runtime.UI;
using UnityEngine;
using UnityEngine.XR;
using UnityEngine.InputSystem;
using UnityEngine.InputSystem.XR;
namespace Convai.Scripts.Runtime.Multiplayer
{
@ -30,6 +33,11 @@ namespace Convai.Scripts.Runtime.Multiplayer
[SerializeField] private KeyCode talkKey = KeyCode.T;
[SerializeField] private bool useHoldToTalk = true;
[SerializeField] private KeyCode controllerTalkButton = KeyCode.JoystickButton0; // A button on most controllers
[SerializeField] private bool useXRControllerAButton = true; // OpenXR primaryButton
[SerializeField] private XRNode xrControllerNode = XRNode.RightHand; // Quest A button is on right hand
[SerializeField] private bool useInputSystemXR = true; // Use new Input System for XR button
[SerializeField] private bool enableBButtonTest = true; // Press B/secondary to send test packet
[SerializeField] private InputActionReference sendVoiceActionReference; // Optional external action (e.g., QuestPro primaryButton)
[Header("Debug")]
[SerializeField] private bool enableDebugLogging = true;
@ -40,11 +48,25 @@ namespace Convai.Scripts.Runtime.Multiplayer
private AudioClip _audioClip;
private bool _isRecording = false;
private CancellationTokenSource _cancellationTokenSource;
private CancellationTokenSource _ackCancellationTokenSource;
private int _lastMicrophonePosition = 0;
private float[] _audioBuffer;
private string _selectedMicrophone;
private int _packetSequence = 0;
private volatile bool _startAckReceived = false;
private bool _xrAButtonPrevPressed = false;
private InputAction _xrTalkAction;
private InputAction _xrTestAction;
private bool _usingExternalTalkAction = false;
private InputAction _externalTalkAction;
// Protocol constants
private const uint AUDIO_MAGIC = 0xC0A1;
private const uint ACK_MAGIC = 0xC0A2;
private const byte FLAG_AUDIO = 0;
private const byte FLAG_END = 1;
private const byte FLAG_START = 2;
public event Action<bool> OnRecordingStateChanged;
@ -63,6 +85,27 @@ namespace Convai.Scripts.Runtime.Multiplayer
InitializeNetwork();
InitializeAudio();
_cancellationTokenSource = new CancellationTokenSource();
_ackCancellationTokenSource = new CancellationTokenSource();
// Start ACK listener
_ = ListenForAcks(_ackCancellationTokenSource.Token);
// Setup Input System action for XR A/primary button
if (useInputSystemXR)
{
if (sendVoiceActionReference != null && sendVoiceActionReference.action != null)
{
SetupExternalTalkInputAction(sendVoiceActionReference.action);
}
else
{
SetupXRTalkInputAction();
}
if (enableBButtonTest)
{
SetupXRTestInputAction();
}
}
}
private void Update()
@ -75,6 +118,13 @@ namespace Convai.Scripts.Runtime.Multiplayer
StopRecording();
_cancellationTokenSource?.Cancel();
_cancellationTokenSource?.Dispose();
_ackCancellationTokenSource?.Cancel();
_ackCancellationTokenSource?.Dispose();
if (_usingExternalTalkAction)
TeardownExternalTalkInputAction();
else
TeardownXRTalkInputAction();
TeardownXRTestInputAction();
_udpClient?.Close();
}
@ -127,21 +177,34 @@ namespace Convai.Scripts.Runtime.Multiplayer
private void HandleInput()
{
// Input System XR events handle XR button press/release.
// Here we keep keyboard/legacy controller as fallback.
bool xrDown = false;
bool xrUp = false;
if (useXRControllerAButton && !useInputSystemXR)
{
bool xrPressed = GetXRPrimaryButtonPressed(xrControllerNode);
xrDown = xrPressed && !_xrAButtonPrevPressed;
xrUp = !xrPressed && _xrAButtonPrevPressed;
_xrAButtonPrevPressed = xrPressed;
}
// Handle talk key
if (useHoldToTalk)
{
if ((Input.GetKeyDown(talkKey) || Input.GetKeyDown(controllerTalkButton)) && !_isRecording)
if ((Input.GetKeyDown(talkKey) || Input.GetKeyDown(controllerTalkButton) || xrDown) && !_isRecording)
{
StartRecording();
}
else if ((Input.GetKeyUp(talkKey) || Input.GetKeyUp(controllerTalkButton)) && _isRecording)
else if ((Input.GetKeyUp(talkKey) || Input.GetKeyUp(controllerTalkButton) || xrUp) && _isRecording)
{
StopRecording();
}
}
else
{
if (Input.GetKeyDown(talkKey) || Input.GetKeyDown(controllerTalkButton))
if (Input.GetKeyDown(talkKey) || Input.GetKeyDown(controllerTalkButton) || xrDown)
{
if (_isRecording)
StopRecording();
@ -156,6 +219,196 @@ namespace Convai.Scripts.Runtime.Multiplayer
TestConnection();
}
}
private void SetupXRTalkInputAction()
{
try
{
// Create button action
_xrTalkAction = new InputAction("XRTalk", InputActionType.Button);
string handTag = xrControllerNode == XRNode.LeftHand ? "{LeftHand}" : "{RightHand}";
// Bind to common XR controller primary/A button paths
_xrTalkAction.AddBinding($"<XRController>{handTag}/primaryButton");
_xrTalkAction.AddBinding($"<OculusTouchController>{handTag}/primaryButton");
_xrTalkAction.AddBinding($"<MetaTouchController>{handTag}/primaryButton");
_xrTalkAction.AddBinding($"<QuestProTouchController>{handTag}/primaryButton");
_xrTalkAction.AddBinding($"<XRController>{handTag}/buttonSouth");
// Gamepad A as additional fallback (useful in editor)
_xrTalkAction.AddBinding("<Gamepad>/buttonSouth");
if (useHoldToTalk)
{
_xrTalkAction.started += ctx => { if (!_isRecording) StartRecording(); };
_xrTalkAction.canceled += ctx => { if (_isRecording) StopRecording(); };
}
else
{
_xrTalkAction.started += ctx => { if (_isRecording) StopRecording(); else StartRecording(); };
}
_xrTalkAction.Enable();
}
catch (Exception ex)
{
ConvaiLogger.Warn($"Failed to setup XR InputAction: {ex.Message}", ConvaiLogger.LogCategory.Character);
}
}
private void TeardownXRTalkInputAction()
{
try
{
if (_xrTalkAction != null)
{
_xrTalkAction.Disable();
_xrTalkAction.Dispose();
_xrTalkAction = null;
}
}
catch (Exception)
{
// ignore
}
}
private void SetupExternalTalkInputAction(InputAction action)
{
try
{
_externalTalkAction = action;
_usingExternalTalkAction = true;
if (useHoldToTalk)
{
_externalTalkAction.started += ctx => { if (!_isRecording) StartRecording(); };
_externalTalkAction.canceled += ctx => { if (_isRecording) StopRecording(); };
}
else
{
_externalTalkAction.started += ctx => { if (_isRecording) StopRecording(); else StartRecording(); };
}
if (!_externalTalkAction.enabled)
_externalTalkAction.Enable();
}
catch (Exception ex)
{
ConvaiLogger.Warn($"Failed to setup external talk action: {ex.Message}", ConvaiLogger.LogCategory.Character);
_usingExternalTalkAction = false;
_externalTalkAction = null;
// Fallback to programmatic setup
SetupXRTalkInputAction();
}
}
private void TeardownExternalTalkInputAction()
{
try
{
if (_externalTalkAction != null)
{
// Remove handlers; do not disable/dispose external actions
if (useHoldToTalk)
{
_externalTalkAction.started -= ctx => { if (!_isRecording) StartRecording(); };
_externalTalkAction.canceled -= ctx => { if (_isRecording) StopRecording(); };
}
else
{
_externalTalkAction.started -= ctx => { if (_isRecording) StopRecording(); else StartRecording(); };
}
}
}
catch (Exception)
{
// ignore
}
finally
{
_externalTalkAction = null;
_usingExternalTalkAction = false;
}
}
private void SetupXRTestInputAction()
{
try
{
_xrTestAction = new InputAction("XRTest", InputActionType.Button);
string handTag = xrControllerNode == XRNode.LeftHand ? "{LeftHand}" : "{RightHand}";
// B button is typically secondaryButton or buttonEast
_xrTestAction.AddBinding($"<XRController>{handTag}/secondaryButton");
_xrTestAction.AddBinding($"<OculusTouchController>{handTag}/secondaryButton");
_xrTestAction.AddBinding($"<MetaTouchController>{handTag}/secondaryButton");
_xrTestAction.AddBinding($"<XRController>{handTag}/buttonEast");
_xrTestAction.AddBinding("<Gamepad>/buttonEast");
_xrTestAction.started += ctx => { SendTestPacket(); };
_xrTestAction.Enable();
}
catch (Exception ex)
{
ConvaiLogger.Warn($"Failed to setup XR Test InputAction: {ex.Message}", ConvaiLogger.LogCategory.Character);
}
}
private void TeardownXRTestInputAction()
{
try
{
if (_xrTestAction != null)
{
_xrTestAction.Disable();
_xrTestAction.Dispose();
_xrTestAction = null;
}
}
catch (Exception)
{
// ignore
}
}
private async void SendTestPacket()
{
try
{
if (_udpClient == null || _targetEndPoint == null)
{
ConvaiLogger.Error("UDP client not initialized for test packet", ConvaiLogger.LogCategory.Character);
return;
}
string testMessage = "Hello this is a Test";
byte[] data = System.Text.Encoding.UTF8.GetBytes(testMessage);
await _udpClient.SendAsync(data, data.Length, _targetEndPoint);
ConvaiLogger.Info("Sent test packet: 'Hello this is a Test'", ConvaiLogger.LogCategory.Character);
}
catch (Exception ex)
{
ConvaiLogger.Error($"Failed to send test packet: {ex.Message}", ConvaiLogger.LogCategory.Character);
}
}
private bool GetXRPrimaryButtonPressed(XRNode hand)
{
try
{
var device = InputDevices.GetDeviceAtXRNode(hand);
if (!device.isValid)
return false;
if (device.TryGetFeatureValue(UnityEngine.XR.CommonUsages.primaryButton, out bool pressed))
return pressed;
}
catch (Exception)
{
// ignore errors and treat as not pressed
}
return false;
}
public void StartRecording()
{
@ -169,10 +422,14 @@ namespace Convai.Scripts.Runtime.Multiplayer
_isRecording = true;
_lastMicrophonePosition = 0;
_packetSequence = 0;
_startAckReceived = false;
ConvaiLogger.Info("Started recording for UDP transmission (Simple)", ConvaiLogger.LogCategory.Character);
OnRecordingStateChanged?.Invoke(true);
// Send START control and wait briefly for ACK to ensure receiver is ready
_ = SendStartOfRecordingSignalAndAwaitAck();
// Start continuous audio processing
_ = ProcessAudioContinuously(_cancellationTokenSource.Token);
}
@ -324,7 +581,7 @@ namespace Convai.Scripts.Runtime.Multiplayer
int offset = 0;
// Magic number
BitConverter.GetBytes((uint)0xC0A1).CopyTo(packet, offset);
BitConverter.GetBytes(AUDIO_MAGIC).CopyTo(packet, offset);
offset += 4;
// Packet sequence
@ -340,7 +597,7 @@ namespace Convai.Scripts.Runtime.Multiplayer
offset += 4;
// Flags (0 for normal audio)
packet[offset] = 0;
packet[offset] = FLAG_AUDIO;
offset += 1;
// Convert audio samples to bytes (same as Convai approach)
@ -366,7 +623,7 @@ namespace Convai.Scripts.Runtime.Multiplayer
int offset = 0;
// Magic number
BitConverter.GetBytes((uint)0xC0A1).CopyTo(packet, offset);
BitConverter.GetBytes(AUDIO_MAGIC).CopyTo(packet, offset);
offset += 4;
// Packet sequence
@ -382,7 +639,7 @@ namespace Convai.Scripts.Runtime.Multiplayer
offset += 4;
// Flags (1 for end of recording)
packet[offset] = 1;
packet[offset] = FLAG_END;
_udpClient.SendAsync(packet, packet.Length, _targetEndPoint);
}
@ -391,6 +648,90 @@ namespace Convai.Scripts.Runtime.Multiplayer
ConvaiLogger.Error($"Failed to send end signal: {ex.Message}", ConvaiLogger.LogCategory.Character);
}
}
private async Task SendStartOfRecordingSignalAndAwaitAck()
{
try
{
const int maxAttempts = 3;
const int ackTimeoutMs = 250;
for (int attempt = 1; attempt <= maxAttempts && !_startAckReceived; attempt++)
{
// Build START control packet (no audio, special flag)
byte[] packet = new byte[17];
int offset = 0;
BitConverter.GetBytes(AUDIO_MAGIC).CopyTo(packet, offset);
offset += 4;
// Use -1 as the special sequence for START control
BitConverter.GetBytes(-1).CopyTo(packet, offset);
offset += 4;
BitConverter.GetBytes(0).CopyTo(packet, offset);
offset += 4;
BitConverter.GetBytes(_lastMicrophonePosition).CopyTo(packet, offset);
offset += 4;
packet[offset] = FLAG_START;
await _udpClient.SendAsync(packet, packet.Length, _targetEndPoint);
// Wait for ACK
int waited = 0;
while (!_startAckReceived && waited < ackTimeoutMs)
{
await Task.Delay(10);
waited += 10;
}
if (_startAckReceived)
{
if (enableDebugLogging)
ConvaiLogger.DebugLog("Received START ACK from receiver", ConvaiLogger.LogCategory.Character);
break;
}
else if (enableDebugLogging)
{
ConvaiLogger.Warn($"No START ACK (attempt {attempt}/{maxAttempts}), retrying...", ConvaiLogger.LogCategory.Character);
}
}
}
catch (Exception ex)
{
ConvaiLogger.Warn($"Error during START ACK process: {ex.Message}", ConvaiLogger.LogCategory.Character);
}
}
private async Task ListenForAcks(CancellationToken token)
{
// Use the same UDP client; acks will be sent back to our ephemeral local port
while (!token.IsCancellationRequested)
{
try
{
var result = await _udpClient.ReceiveAsync();
var data = result.Buffer;
if (data == null || data.Length < 8)
continue;
uint magic = BitConverter.ToUInt32(data, 0);
if (magic != ACK_MAGIC)
continue;
int seq = BitConverter.ToInt32(data, 4);
if (seq == -1)
{
_startAckReceived = true;
}
}
catch (ObjectDisposedException)
{
break;
}
catch (Exception)
{
// Ignore and keep listening
}
}
}
// Public methods for external control
public void SetTargetEndpoint(string ip, int port)

Binary file not shown.

Binary file not shown.