normalize media pipeline at client boundary
- AudioParams.framing field: client declares "raw" or "adts" - Client strips ADTS from audio before sending (strip_adts) - Client does H.264 NAL inspection for keyframe detection (h264_is_keyframe) - Server uses declared sample_rate/channels for ADTS synthesis instead of hardcoded 48kHz/stereo - Server gates ADTS wrapping on framing field instead of per-packet sniffing New backends only need to pipe output to demux_and_send() — server and Python unchanged. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -23,6 +23,48 @@ use tracing::{error, info, warn};
|
||||
|
||||
use crate::encoder::{EncodedPacket, MediaType};
|
||||
|
||||
/// Check if H.264 data contains an IDR (keyframe) NAL unit.
|
||||
/// Scans for NAL start codes (00 00 01 or 00 00 00 01) and checks
|
||||
/// the NAL type (lower 5 bits). Type 5 = IDR slice.
|
||||
pub fn h264_is_keyframe(data: &[u8]) -> bool {
|
||||
let mut i = 0;
|
||||
while i + 3 < data.len() {
|
||||
if data[i] == 0 && data[i + 1] == 0 {
|
||||
let (nal_byte, skip) = if data[i + 2] == 1 {
|
||||
(data.get(i + 3), 4)
|
||||
} else if data[i + 2] == 0 && i + 4 < data.len() && data[i + 3] == 1 {
|
||||
(data.get(i + 4), 5)
|
||||
} else {
|
||||
(None, 1)
|
||||
};
|
||||
if let Some(&b) = nal_byte {
|
||||
let nal_type = b & 0x1F;
|
||||
if nal_type == 5 {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
i += skip;
|
||||
} else {
|
||||
i += 1;
|
||||
}
|
||||
}
|
||||
false
|
||||
}
|
||||
|
||||
/// Strip ADTS header from AAC data if present. Returns raw AAC frame.
|
||||
/// ADTS header is 7 bytes (no CRC) or 9 bytes (with CRC).
|
||||
fn strip_adts(data: &[u8]) -> Vec<u8> {
|
||||
if data.len() >= 7 && data[0] == 0xFF && (data[1] & 0xF0) == 0xF0 {
|
||||
let has_crc = (data[1] & 0x01) == 0; // protection_absent=0 means CRC present
|
||||
let header_len = if has_crc { 9 } else { 7 };
|
||||
if data.len() > header_len {
|
||||
return data[header_len..].to_vec();
|
||||
}
|
||||
}
|
||||
data.to_vec()
|
||||
}
|
||||
|
||||
|
||||
pub struct SubprocessConfig {
|
||||
pub device: String,
|
||||
pub fps: u32,
|
||||
@@ -324,9 +366,14 @@ fn demux_and_send(
|
||||
}
|
||||
} else if let Some((audio_idx, audio_tb_num, audio_tb_den)) = audio_info {
|
||||
if stream_idx == audio_idx {
|
||||
// Strip ADTS header if present — normalize to raw AAC on the wire.
|
||||
// mpegts backends (e.g. gpu-screen-recorder) wrap AAC in ADTS;
|
||||
// NUT (ffmpeg) sends raw AAC. Stripping here makes the wire
|
||||
// format consistent regardless of capture backend.
|
||||
let audio_data = strip_adts(&data);
|
||||
let encoded = EncodedPacket {
|
||||
media_type: MediaType::Audio,
|
||||
data,
|
||||
data: audio_data,
|
||||
pts: packet.pts().unwrap_or(0),
|
||||
dts: packet.dts().unwrap_or(0),
|
||||
keyframe: packet.is_key(),
|
||||
|
||||
@@ -58,6 +58,7 @@ async fn main() -> Result<()> {
|
||||
sample_rate: 48000,
|
||||
channels: 2,
|
||||
codec: "aac".into(),
|
||||
framing: "raw".into(),
|
||||
},
|
||||
};
|
||||
protocol::write_packet(&mut writer, &session_start.to_wire_packet()?).await?;
|
||||
|
||||
Reference in New Issue
Block a user