WIP

2025-11-13 19:56:23 +00:00 · 2024-03-30 22:31:33 +01:00
parent 3e688ddcfb
commit c599657882
2 changed files with 66 additions and 59 deletions
--- a/lib/micro-rtsp-server/include/micro_rtsp_streamer.h
+++ b/lib/micro-rtsp-server/include/micro_rtsp_streamer.h
@@ -2,14 +2,14 @@

 #include <jpg.h>

-
 class micro_rtsp_streamer
 {
 public:
-    micro_rtsp_streamer();
-    size_t create_jpg_packet(const uint8_t *jpg, const uint8_t *jpg_end, uint32_t timestamp);
-    private :
+    micro_rtsp_streamer(const uint16_t width, const uint16_t height);
+    size_t create_jpg_packet(const uint8_t *jpg, uint8_t **jpg_current, const uint8_t *jpg_end, const uint32_t timestamp);

+private:
+    uint16_t width_, height_;
    uint32_t ssrc_;
    uint16_t sequenceNumber_;
 };
--- a/lib/micro-rtsp-server/src/micro_rtsp_streamer.cpp
+++ b/lib/micro-rtsp-server/src/micro_rtsp_streamer.cpp
@@ -1,13 +1,13 @@
-#include "micro_rtsp_streamer.h"
-
-//#include "rtp_payloads.h"
-
 #include <stddef.h>
+#include <memory.h>

+#include "micro_rtsp_streamer.h"
 #include "esp_random.h"

 // https://github.com/txgcwm/Linux-C-Examples/blob/master/h264/h264dec/rtcp.h

+#define RTP_PAYLOAD_JPG 26
+
 // RTP data header (http://www.ietf.org/rfc/rfc3550.txt)
 struct rtp_hdr
 {
@@ -18,9 +18,9 @@ struct rtp_hdr
    uint16_t m : 1;       // marker bit
    uint16_t pt : 7;      // payload type
    uint16_t seq : 16;    // sequence number
-    uint32_t ts;      // timestamp
-    uint32_t ssrc;    // synchronization source
-    uint32_t csrc[];  // optional CSRC list
+    uint32_t ts;          // timestamp
+    uint32_t ssrc;        // synchronization source
+    uint32_t csrc[];      // optional CSRC list
 } rtp_hdr;

 // https://datatracker.ietf.org/doc/html/rfc2435
@@ -32,10 +32,10 @@ struct jpeghdr
 {
    uint32_t tspec : 8; // type-specific field
    uint32_t off : 24;  // fragment byte offset
-    uint8_t type;        // id of jpeg decoder params
-    uint8_t q;           // quantization factor (or table id)
-    uint8_t width;       // frame width in 8 pixel blocks
-    uint8_t height;      // frame height in 8 pixel blocks
+    uint8_t type;       // id of jpeg decoder params
+    uint8_t q;          // quantization factor (or table id)
+    uint8_t width;      // frame width in 8 pixel blocks
+    uint8_t height;     // frame height in 8 pixel blocks
 };

 struct jpeghdr_rst
@@ -55,8 +55,10 @@ struct jpeghdr_qtable

 #define RTP_JPEG_RESTART 0x40

-micro_rtsp_streamer::micro_rtsp_streamer()
+micro_rtsp_streamer::micro_rtsp_streamer(const uint16_t width, const uint16_t height)
 {
+    width_ = width;
+    height_ = height;
    // Random number
    ssrc_ = esp_random();
    sequenceNumber_ = 0;
@@ -64,67 +66,72 @@ micro_rtsp_streamer::micro_rtsp_streamer()

 #define MAX_ESP32_MTU 1440

-size_t micro_rtsp_streamer::create_jpg_packet(const uint8_t *jpg, const uint8_t *jpg_end, uint32_t timestamp)
+size_t micro_rtsp_streamer::create_jpg_packet(const uint8_t *jpg, uint8_t **jpg_current, const uint8_t *jpg_end, const uint32_t timestamp)
 {
+    const int MAX_FRAGMENT_SIZE = 1100; // FIXME, pick more carefully
    int fragmentLen = MAX_FRAGMENT_SIZE;
-    if (fragmentLen + fragmentOffset > jpegLen) // Shrink last fragment if needed
-        fragmentLen = jpegLen - fragmentOffset;
+    auto jpegLen = jpg_end - *jpg_current;

-    bool isLastFragment = (fragmentOffset + fragmentLen) == jpegLen;
+    auto offset = *jpg_current - jpg;
+    if (fragmentLen + offset > jpegLen) // Shrink last fragment if needed
+        fragmentLen = jpegLen - offset;

-    struct rtp_header header = {
+    // bool isLastFragment = (fragmentOffset + fragmentLen) == jpegLen;
+
+    struct rtp_hdr header = {
        .version = 2,
+        .m = 1, // TODO = 1 if last fragfment
+        .pt = RTP_PAYLOAD_JPG,
        .seq = sequenceNumber_,
-        .marker = 1, // TODO = 1 if last fragfment
-        .pt = rtp_payload.JPEG,
        .ts = timestamp,
        .ssrc = ssrc_};

    struct jpeghdr jpghdr = {
-        .tspec = 0,           // type-specific field
-        .off = offset,        // fragment byte offset
-        .type = 0,            // id of jpeg decoder params
-        .q = 0x5e,            // quantization factor (or table id)
-        .width = width >> 3,  // frame width in 8 pixel blocks
-        .height = height >> 3 // frame height in 8 pixel blocks
+        .tspec = 0,            // type-specific field
+        .off = offset,         // fragment byte offset
+        .type = 0,             // id of jpeg decoder params
+        .q = 0x5e,             // quantization factor (or table id)
+        .width = width_ >> 3,  // frame width in 8 pixel blocks
+        .height = height_ >> 3 // frame height in 8 pixel blocks
    };

-    memset(RtpBuf, 0x00, sizeof(RtpBuf));
+    uint8_t rtp_buffer[0x800];
+    //    memset(RtpBuf, 0x00, sizeof(RtpBuf));
    // Prepare the first 4 byte of the packet. This is the Rtp over Rtsp header in case of TCP based transport
-    RtpBuf[0] = '$'; // magic number
-    RtpBuf[1] = 0;   // number of multiplexed subchannel on RTPS connection - here the RTP channel
-    RtpBuf[2] = (RtpPacketSize & 0x0000FF00) >> 8;
-    RtpBuf[3] = (RtpPacketSize & 0x000000FF);
+    rtp_buffer[0] = (uint8_t)'$'; // magic number
+    rtp_buffer[1] = 0;            // number of multiplexed subchannel on RTPS connection - here the RTP channel
+                                  //    rtp_buffer[2] = (RtpPacketSize & 0xFF00) >> 8;
+                                  //    rtp_buffer[3] = (RtpPacketSize & 0x00FF);
    // Prepare the 12 byte RTP header
-    RtpBuf[4] = 0x80;                                  // RTP version
-    RtpBuf[5] = 0x1a | (isLastFragment ? 0x80 : 0x00); // JPEG payload (26) and marker bit
-    RtpBuf[7] = m_SequenceNumber & 0x0FF;              // each packet is counted with a sequence counter
-    RtpBuf[6] = m_SequenceNumber >> 8;
-    RtpBuf[8] = (m_Timestamp & 0xFF000000) >> 24; // each image gets a timestamp
-    RtpBuf[9] = (m_Timestamp & 0x00FF0000) >> 16;
-    RtpBuf[10] = (m_Timestamp & 0x0000FF00) >> 8;
-    RtpBuf[11] = (m_Timestamp & 0x000000FF);
-    RtpBuf[12] = 0x13; // 4 byte SSRC (sychronization source identifier)
-    RtpBuf[13] = 0xf9; // we just an arbitrary number here to keep it simple
-    RtpBuf[14] = 0x7e;
-    RtpBuf[15] = 0x67;
+    // RtpBuf[4] = 0x80;                                  // RTP version
+    // RtpBuf[5] = 0x1a | (isLastFragment ? 0x80 : 0x00); // JPEG payload (26) and marker bit
+    // RtpBuf[7] = m_SequenceNumber & 0x0FF;              // each packet is counted with a sequence counter
+    // RtpBuf[6] = m_SequenceNumber >> 8;
+    // RtpBuf[8] = (m_Timestamp & 0xFF000000) >> 24; // each image gets a timestamp
+    // RtpBuf[9] = (m_Timestamp & 0x00FF0000) >> 16;
+    // RtpBuf[10] = (m_Timestamp & 0x0000FF00) >> 8;
+    // RtpBuf[11] = (m_Timestamp & 0x000000FF);
+    // RtpBuf[12] = 0x13; // 4 byte SSRC (sychronization source identifier)
+    // RtpBuf[13] = 0xf9; // we just an arbitrary number here to keep it simple
+    // RtpBuf[14] = 0x7e;
+    // RtpBuf[15] = 0x67;

    // Prepare the 8 byte payload JPEG header
-    RtpBuf[16] = 0x00;                                // type specific
-    RtpBuf[17] = (fragmentOffset & 0x00FF0000) >> 16; // 3 byte fragmentation offset for fragmented images
-    RtpBuf[18] = (fragmentOffset & 0x0000FF00) >> 8;
-    RtpBuf[19] = (fragmentOffset & 0x000000FF);
+    // RtpBuf[16] = 0x00;                                // type specific
+    // RtpBuf[17] = (fragmentOffset & 0x00FF0000) >> 16; // 3 byte fragmentation offset for fragmented images
+    // RtpBuf[18] = (fragmentOffset & 0x0000FF00) >> 8;
+    // RtpBuf[19] = (fragmentOffset & 0x000000FF);

-    /*    These sampling factors indicate that the chrominance components of
-       type 0 video is downsampled horizontally by 2 (often called 4:2:2)
-       while the chrominance components of type 1 video are downsampled both
-       horizontally and vertically by 2 (often called 4:2:0). */
-    RtpBuf[20] = 0x00;         // type (fixme might be wrong for camera data) https://tools.ietf.org/html/rfc2435
-    RtpBuf[21] = q;            // quality scale factor was 0x5e
-    RtpBuf[22] = m_width / 8;  // width  / 8
-    RtpBuf[23] = m_height / 8; // height / 8
+    // /*    These sampling factors indicate that the chrominance components of
+    //    type 0 video is downsampled horizontally by 2 (often called 4:2:2)
+    //    while the chrominance components of type 1 video are downsampled both
+    //    horizontally and vertically by 2 (often called 4:2:0). */
+    // RtpBuf[20] = 0x00;         // type (fixme might be wrong for camera data) https://tools.ietf.org/html/rfc2435
+    // RtpBuf[21] = q;            // quality scale factor was 0x5e
+    // RtpBuf[22] = width_ / 8;  // width  / 8
+    // RtpBuf[23] = height_ / 8; // height / 8

-    int headerLen = 24; // Inlcuding jpeg header but not qant table header
+    int headerLen = 24; // Including jpeg header but not qant table header
    if (includeQuantTbl)
    { // we need a quant header - but only in first packet of the frame
        // printf("inserting quanttbl\n");