a-zlm/ext-codec/VP8Rtp.cpp

357 lines
11 KiB
C++
Raw Normal View History

2026-01-14 15:38:20 +08:00
/*
* Copyright (c) 2016-present The ZLMediaKit project authors. All Rights Reserved.
*
* This file is part of ZLMediaKit(https://github.com/ZLMediaKit/ZLMediaKit).
*
* Use of this source code is governed by MIT-like license that can be found in the
* LICENSE file in the root of the source tree. All contributing project authors
* may be found in the AUTHORS file in the root of the source tree.
*/
#include "VP8Rtp.h"
#include "Extension/Frame.h"
#include "Common/config.h"
namespace mediakit{
const int16_t kNoPictureId = -1;
const int8_t kNoTl0PicIdx = -1;
const uint8_t kNoTemporalIdx = 0xFF;
const int kNoKeyIdx = -1;
// internal bits
constexpr int kXBit = 0x80;
constexpr int kNBit = 0x20;
constexpr int kSBit = 0x10;
constexpr int kKeyIdxField = 0x1F;
constexpr int kIBit = 0x80;
constexpr int kLBit = 0x40;
constexpr int kTBit = 0x20;
constexpr int kKBit = 0x10;
constexpr int kYBit = 0x20;
constexpr int kFailedToParse = 0;
// VP8 payload descriptor
// https://datatracker.ietf.org/doc/html/rfc7741#section-4.2
//
// 0 1 2 3 4 5 6 7
// +-+-+-+-+-+-+-+-+
// |X|R|N|S|R| PID | (REQUIRED)
// +-+-+-+-+-+-+-+-+
// X: |I|L|T|K| RSV | (OPTIONAL)
// +-+-+-+-+-+-+-+-+
// I: |M| PictureID | (OPTIONAL)
// +-+-+-+-+-+-+-+-+
// | PictureID |
// +-+-+-+-+-+-+-+-+
// L: | TL0PICIDX | (OPTIONAL)
// +-+-+-+-+-+-+-+-+
// T/K: |TID|Y| KEYIDX | (OPTIONAL)
// +-+-+-+-+-+-+-+-+
struct RTPVideoHeaderVP8 {
void InitRTPVideoHeaderVP8();
int Size() const;
int Write(uint8_t *data, int size) const;
int Read(const uint8_t *data, int data_length);
bool isFirstPacket() const { return beginningOfPartition && partitionId == 0; }
friend bool operator!=(const RTPVideoHeaderVP8 &lhs, const RTPVideoHeaderVP8 &rhs) { return !(lhs == rhs); }
friend bool operator==(const RTPVideoHeaderVP8 &lhs, const RTPVideoHeaderVP8 &rhs) {
return lhs.nonReference == rhs.nonReference && lhs.pictureId == rhs.pictureId && lhs.tl0PicIdx == rhs.tl0PicIdx && lhs.temporalIdx == rhs.temporalIdx
&& lhs.layerSync == rhs.layerSync && lhs.keyIdx == rhs.keyIdx && lhs.partitionId == rhs.partitionId
&& lhs.beginningOfPartition == rhs.beginningOfPartition;
}
bool nonReference; // Frame is discardable.
int16_t pictureId; // Picture ID index, 15 bits;
// kNoPictureId if PictureID does not exist.
int8_t tl0PicIdx; // TL0PIC_IDX, 8 bits;
// kNoTl0PicIdx means no value provided.
uint8_t temporalIdx; // Temporal layer index, or kNoTemporalIdx.
bool layerSync; // This frame is a layer sync frame.
// Disabled if temporalIdx == kNoTemporalIdx.
int8_t keyIdx; // 5 bits; kNoKeyIdx means not used.
int8_t partitionId; // VP8 partition ID
bool beginningOfPartition; // True if this packet is the first
// in a VP8 partition. Otherwise false
};
void RTPVideoHeaderVP8::InitRTPVideoHeaderVP8() {
nonReference = false;
pictureId = kNoPictureId;
tl0PicIdx = kNoTl0PicIdx;
temporalIdx = kNoTemporalIdx;
layerSync = false;
keyIdx = kNoKeyIdx;
partitionId = 0;
beginningOfPartition = false;
}
int RTPVideoHeaderVP8::Size() const {
bool tid_present = this->temporalIdx != kNoTemporalIdx;
bool keyid_present = this->keyIdx != kNoKeyIdx;
bool tl0_pid_present = this->tl0PicIdx != kNoTl0PicIdx;
bool pid_present = this->pictureId != kNoPictureId;
int ret = 2;
if (pid_present)
ret += 2;
if (tl0_pid_present)
ret++;
if (tid_present || keyid_present)
ret++;
return ret == 2 ? 1 : ret;
}
int RTPVideoHeaderVP8::Write(uint8_t *data, int size) const {
int ret = 0;
bool tid_present = this->temporalIdx != kNoTemporalIdx;
bool keyid_present = this->keyIdx != kNoKeyIdx;
bool tl0_pid_present = this->tl0PicIdx != kNoTl0PicIdx;
bool pid_present = this->pictureId != kNoPictureId;
uint8_t x_field = 0;
if (pid_present)
x_field |= kIBit;
if (tl0_pid_present)
x_field |= kLBit;
if (tid_present)
x_field |= kTBit;
if (keyid_present)
x_field |= kKBit;
uint8_t flags = 0;
if (x_field != 0)
flags |= kXBit;
if (this->nonReference)
flags |= kNBit;
// Create header as first packet in the frame. NextPacket() will clear it
// after first use.
flags |= kSBit;
data[ret++] = flags;
if (x_field == 0) {
return ret;
}
data[ret++] = x_field;
if (pid_present) {
const uint16_t pic_id = static_cast<uint16_t>(this->pictureId);
data[ret++] = (0x80 | ((pic_id >> 8) & 0x7F));
data[ret++] = (pic_id & 0xFF);
}
if (tl0_pid_present) {
data[ret++] = this->tl0PicIdx;
}
if (tid_present || keyid_present) {
uint8_t data_field = 0;
if (tid_present) {
data_field |= this->temporalIdx << 6;
if (this->layerSync)
data_field |= kYBit;
}
if (keyid_present) {
data_field |= (this->keyIdx & kKeyIdxField);
}
data[ret++] = data_field;
}
return ret;
}
int RTPVideoHeaderVP8::Read(const uint8_t *data, int data_length) {
// RTC_DCHECK_GT(data_length, 0);
int parsed_bytes = 0;
// Parse mandatory first byte of payload descriptor.
bool extension = (*data & 0x80) ? true : false; // X bit
this->nonReference = (*data & 0x20) ? true : false; // N bit
this->beginningOfPartition = (*data & 0x10) ? true : false; // S bit
this->partitionId = (*data & 0x07); // PID field
data++;
parsed_bytes++;
data_length--;
if (!extension)
return parsed_bytes;
if (data_length == 0)
return kFailedToParse;
// Optional X field is present.
bool has_picture_id = (*data & 0x80) ? true : false; // I bit
bool has_tl0_pic_idx = (*data & 0x40) ? true : false; // L bit
bool has_tid = (*data & 0x20) ? true : false; // T bit
bool has_key_idx = (*data & 0x10) ? true : false; // K bit
// Advance data and decrease remaining payload size.
data++;
parsed_bytes++;
data_length--;
if (has_picture_id) {
if (data_length == 0)
return kFailedToParse;
this->pictureId = (*data & 0x7F);
if (*data & 0x80) {
data++;
parsed_bytes++;
if (--data_length == 0)
return kFailedToParse;
// PictureId is 15 bits
this->pictureId = (this->pictureId << 8) + *data;
}
data++;
parsed_bytes++;
data_length--;
}
if (has_tl0_pic_idx) {
if (data_length == 0)
return kFailedToParse;
this->tl0PicIdx = *data;
data++;
parsed_bytes++;
data_length--;
}
if (has_tid || has_key_idx) {
if (data_length == 0)
return kFailedToParse;
if (has_tid) {
this->temporalIdx = ((*data >> 6) & 0x03);
this->layerSync = (*data & 0x20) ? true : false; // Y bit
}
if (has_key_idx) {
this->keyIdx = *data & 0x1F;
}
data++;
parsed_bytes++;
data_length--;
}
return parsed_bytes;
}
/////////////////////////////////////////////////
// VP8RtpDecoder
VP8RtpDecoder::VP8RtpDecoder() {
obtainFrame();
}
void VP8RtpDecoder::obtainFrame() {
_frame = FrameImp::create<VP8Frame>();
}
bool VP8RtpDecoder::inputRtp(const RtpPacket::Ptr &rtp, bool key_pos) {
auto seq = rtp->getSeq();
bool ret = decodeRtp(rtp);
if (!_gop_dropped && seq != (uint16_t)(_last_seq + 1) && _last_seq) {
_gop_dropped = true;
WarnL << "start drop vp8 gop, last seq:" << _last_seq << ", rtp:\r\n" << rtp->dumpString();
}
_last_seq = seq;
return ret;
}
bool VP8RtpDecoder::decodeRtp(const RtpPacket::Ptr &rtp) {
auto payload_size = rtp->getPayloadSize();
if (payload_size <= 0) {
// No actual payload
return false;
}
auto payload = rtp->getPayload();
auto stamp = rtp->getStampMS();
auto seq = rtp->getSeq();
RTPVideoHeaderVP8 info;
int offset = info.Read(payload, payload_size);
if (!offset) {
//_frame_drop = true;
return false;
}
bool start = info.isFirstPacket();
if (start) {
_frame->_pts = stamp;
_frame->_buffer.clear();
_frame_drop = false;
}
if (_frame_drop) {
// This frame is incomplete
return false;
}
if (!start && seq != (uint16_t)(_last_seq + 1)) {
// 中间的或末尾的rtp包其seq必须连续否则说明rtp丢包那么该帧不完整必须得丢弃
_frame_drop = true;
_frame->_buffer.clear();
return false;
}
// Append data
_frame->_buffer.append((char *)payload + offset, payload_size - offset);
bool end = rtp->getHeader()->mark;
if (end) {
// 确保下一次fu必须收到第一个包
_frame_drop = true;
// 该帧最后一个rtp包,输出frame [AUTO-TRANSLATED:a648aaa5]
// The last rtp packet of this frame, output frame
outputFrame(rtp);
}
return (info.isFirstPacket() && (payload[offset] & 0x01) == 0);
}
void VP8RtpDecoder::outputFrame(const RtpPacket::Ptr &rtp) {
if (_frame->dropAble()) {
// 不参与dts生成 [AUTO-TRANSLATED:dff3b747]
// Not involved in dts generation
_frame->_dts = _frame->_pts;
} else {
// rtsp没有dts那么根据pts排序算法生成dts [AUTO-TRANSLATED:f37c17f3]
// Rtsp does not have dts, so dts is generated according to the pts sorting algorithm
_dts_generator.getDts(_frame->_pts, _frame->_dts);
}
if (_frame->keyFrame() && _gop_dropped) {
_gop_dropped = false;
InfoL << "new gop received, rtp:\r\n" << rtp->dumpString();
}
if (!_gop_dropped || _frame->configFrame()) {
RtpCodec::inputFrame(_frame);
}
obtainFrame();
}
////////////////////////////////////////////////////////////////////////
bool VP8RtpEncoder::inputFrame(const Frame::Ptr &frame) {
RTPVideoHeaderVP8 info;
info.InitRTPVideoHeaderVP8();
info.beginningOfPartition = true;
info.nonReference = !frame->dropAble();
uint8_t header[20];
int header_size = info.Write(header, sizeof(header));
int pdu_size = getRtpInfo().getMaxSize() - header_size;
const char *ptr = frame->data() + frame->prefixSize();
size_t len = frame->size() - frame->prefixSize();
bool key = frame->keyFrame();
bool mark = false;
for (size_t pos = 0; pos < len; pos += pdu_size) {
if (static_cast<int>(len - pos) <= pdu_size) {
pdu_size = len - pos;
mark = true;
}
auto rtp = getRtpInfo().makeRtp(TrackVideo, nullptr, pdu_size + header_size, mark, frame->pts());
if (rtp) {
uint8_t *payload = rtp->getPayload();
memcpy(payload, header, header_size);
memcpy(payload + header_size, ptr + pos, pdu_size);
RtpCodec::inputRtp(rtp, key);
}
key = false;
header[0] &= (~kSBit); // Clear 'Start of partition' bit.
}
return true;
}
} // namespace mediakit