361 lines
12 KiB
C++
361 lines
12 KiB
C++
#include "screen_encoder.h"
|
|
|
|
#include <chrono>
|
|
#include <codecapi.h>
|
|
#include <mfapi.h>
|
|
#include <mferror.h>
|
|
#include <mfidl.h>
|
|
|
|
using Microsoft::WRL::ComPtr;
|
|
|
|
static constexpr LONGLONG kFrameDuration = 333333; // 100ns units ≈ 30fps
|
|
static constexpr UINT32 kBitrate = 6'000'000;
|
|
|
|
// Scan an Annex-B bitstream for the span covering the SPS (type 7) through
|
|
// the end of the PPS (type 8). Returns false if either is missing.
|
|
static bool ExtractSpsPps(const uint8_t* data, size_t size,
|
|
size_t& sps_start, size_t& pps_end) {
|
|
struct Nal { size_t start; int type; };
|
|
std::vector<Nal> nals;
|
|
|
|
size_t i = 0;
|
|
while (i + 4 <= size) {
|
|
size_t hdr_len = 0;
|
|
int nal_type = 0;
|
|
if (data[i] == 0 && data[i+1] == 0 && data[i+2] == 0 && data[i+3] == 1 &&
|
|
i + 4 < size) {
|
|
hdr_len = 4;
|
|
nal_type = data[i+4] & 0x1F;
|
|
} else if (data[i] == 0 && data[i+1] == 0 && data[i+2] == 1 &&
|
|
i + 3 < size) {
|
|
hdr_len = 3;
|
|
nal_type = data[i+3] & 0x1F;
|
|
}
|
|
if (hdr_len) {
|
|
nals.push_back({i, nal_type});
|
|
i += hdr_len;
|
|
} else {
|
|
++i;
|
|
}
|
|
}
|
|
|
|
size_t sps_idx = SIZE_MAX, pps_idx = SIZE_MAX;
|
|
for (size_t j = 0; j < nals.size(); ++j) {
|
|
if (nals[j].type == 7) sps_idx = j;
|
|
if (nals[j].type == 8) pps_idx = j;
|
|
}
|
|
if (sps_idx == SIZE_MAX || pps_idx == SIZE_MAX) return false;
|
|
|
|
sps_start = nals[sps_idx].start;
|
|
size_t after_pps = pps_idx + 1;
|
|
pps_end = (after_pps < nals.size()) ? nals[after_pps].start : size;
|
|
return sps_start < pps_end;
|
|
}
|
|
|
|
// ─── ScreenEncoder ───────────────────────────────────────────────────────────
|
|
|
|
ScreenEncoder::ScreenEncoder() {}
|
|
|
|
ScreenEncoder::~ScreenEncoder() { Stop(); }
|
|
|
|
void ScreenEncoder::Start(
|
|
std::unique_ptr<flutter::EventSink<flutter::EncodableValue>> sink) {
|
|
Stop();
|
|
{
|
|
std::lock_guard<std::mutex> lk(sink_mu_);
|
|
sink_ = std::move(sink);
|
|
}
|
|
config_sent_ = false;
|
|
sample_ts_ = 0;
|
|
running_ = true;
|
|
thread_ = std::thread(&ScreenEncoder::CaptureLoop, this);
|
|
}
|
|
|
|
void ScreenEncoder::Stop() {
|
|
running_ = false;
|
|
if (thread_.joinable()) thread_.join();
|
|
{
|
|
std::lock_guard<std::mutex> lk(sink_mu_);
|
|
sink_.reset();
|
|
}
|
|
encoder_.Reset();
|
|
dupl_.Reset();
|
|
staging_.Reset();
|
|
d3d_ctx_.Reset();
|
|
d3d_dev_.Reset();
|
|
enc_width_ = enc_height_ = 0;
|
|
}
|
|
|
|
void ScreenEncoder::ForceKeyframe() { force_kf_ = true; }
|
|
|
|
// ─── D3D / DXGI init ─────────────────────────────────────────────────────────
|
|
|
|
bool ScreenEncoder::InitD3D() {
|
|
D3D_FEATURE_LEVEL level;
|
|
HRESULT hr = D3D11CreateDevice(nullptr, D3D_DRIVER_TYPE_HARDWARE, nullptr, 0,
|
|
nullptr, 0, D3D11_SDK_VERSION,
|
|
&d3d_dev_, &level, &d3d_ctx_);
|
|
if (FAILED(hr)) return false;
|
|
|
|
ComPtr<IDXGIDevice> dxgi_dev;
|
|
ComPtr<IDXGIAdapter> adapter;
|
|
ComPtr<IDXGIOutput> output;
|
|
ComPtr<IDXGIOutput1> output1;
|
|
|
|
d3d_dev_.As(&dxgi_dev);
|
|
dxgi_dev->GetAdapter(&adapter);
|
|
if (FAILED(adapter->EnumOutputs(0, &output))) return false;
|
|
if (FAILED(output.As(&output1))) return false;
|
|
|
|
hr = output1->DuplicateOutput(d3d_dev_.Get(), &dupl_);
|
|
return SUCCEEDED(hr);
|
|
}
|
|
|
|
// ─── Encoder init ────────────────────────────────────────────────────────────
|
|
|
|
bool ScreenEncoder::InitEncoder(UINT width, UINT height) {
|
|
MFT_REGISTER_TYPE_INFO out_info{MFMediaType_Video, MFVideoFormat_H264};
|
|
|
|
UINT32 count = 0;
|
|
IMFActivate** activates = nullptr;
|
|
HRESULT hr = MFTEnumEx(MFT_CATEGORY_VIDEO_ENCODER,
|
|
MFT_ENUM_FLAG_SYNCMFT | MFT_ENUM_FLAG_SORTANDFILTER,
|
|
nullptr, &out_info, &activates, &count);
|
|
if (FAILED(hr) || count == 0) return false;
|
|
|
|
hr = activates[0]->ActivateObject(IID_PPV_ARGS(&encoder_));
|
|
for (UINT32 i = 0; i < count; ++i) activates[i]->Release();
|
|
CoTaskMemFree(activates);
|
|
if (FAILED(hr)) return false;
|
|
|
|
// Output: H264
|
|
ComPtr<IMFMediaType> out_type;
|
|
MFCreateMediaType(&out_type);
|
|
out_type->SetGUID(MF_MT_MAJOR_TYPE, MFMediaType_Video);
|
|
out_type->SetGUID(MF_MT_SUBTYPE, MFVideoFormat_H264);
|
|
MFSetAttributeSize(out_type.Get(), MF_MT_FRAME_SIZE, width, height);
|
|
MFSetAttributeRatio(out_type.Get(), MF_MT_FRAME_RATE, 30, 1);
|
|
out_type->SetUINT32(MF_MT_AVG_BITRATE, kBitrate);
|
|
out_type->SetUINT32(MF_MT_INTERLACE_MODE, MFVideoInterlace_Progressive);
|
|
out_type->SetUINT32(MF_MT_MPEG2_PROFILE, eAVEncH264VProfile_High);
|
|
if (FAILED(encoder_->SetOutputType(0, out_type.Get(), 0))) return false;
|
|
|
|
// Input: NV12
|
|
ComPtr<IMFMediaType> in_type;
|
|
MFCreateMediaType(&in_type);
|
|
in_type->SetGUID(MF_MT_MAJOR_TYPE, MFMediaType_Video);
|
|
in_type->SetGUID(MF_MT_SUBTYPE, MFVideoFormat_NV12);
|
|
MFSetAttributeSize(in_type.Get(), MF_MT_FRAME_SIZE, width, height);
|
|
MFSetAttributeRatio(in_type.Get(), MF_MT_FRAME_RATE, 30, 1);
|
|
in_type->SetUINT32(MF_MT_INTERLACE_MODE, MFVideoInterlace_Progressive);
|
|
in_type->SetUINT32(MF_MT_DEFAULT_STRIDE, (UINT32)width);
|
|
if (FAILED(encoder_->SetInputType(0, in_type.Get(), 0))) return false;
|
|
|
|
// Keyframe every 30 frames via media type attribute
|
|
out_type->SetUINT32(MF_MT_MAX_KEYFRAME_SPACING, 30);
|
|
|
|
encoder_->ProcessMessage(MFT_MESSAGE_COMMAND_FLUSH, 0);
|
|
encoder_->ProcessMessage(MFT_MESSAGE_NOTIFY_BEGIN_STREAMING, 0);
|
|
encoder_->ProcessMessage(MFT_MESSAGE_NOTIFY_START_OF_STREAM, 0);
|
|
|
|
enc_width_ = width;
|
|
enc_height_ = height;
|
|
return true;
|
|
}
|
|
|
|
// ─── Frame capture ───────────────────────────────────────────────────────────
|
|
|
|
bool ScreenEncoder::CaptureFrame(std::vector<uint8_t>& bgra,
|
|
UINT& width, UINT& height) {
|
|
DXGI_OUTDUPL_FRAME_INFO info{};
|
|
ComPtr<IDXGIResource> res;
|
|
|
|
HRESULT hr = dupl_->AcquireNextFrame(16, &info, &res);
|
|
if (hr == DXGI_ERROR_WAIT_TIMEOUT) return false;
|
|
if (FAILED(hr)) {
|
|
dupl_.Reset();
|
|
staging_.Reset();
|
|
InitD3D();
|
|
return false;
|
|
}
|
|
|
|
ComPtr<ID3D11Texture2D> tex;
|
|
res.As(&tex);
|
|
|
|
D3D11_TEXTURE2D_DESC desc{};
|
|
tex->GetDesc(&desc);
|
|
width = desc.Width;
|
|
height = desc.Height;
|
|
|
|
// Recreate staging texture on size change
|
|
if (!staging_) {
|
|
D3D11_TEXTURE2D_DESC sd{};
|
|
sd.Width = width;
|
|
sd.Height = height;
|
|
sd.MipLevels = 1;
|
|
sd.ArraySize = 1;
|
|
sd.Format = DXGI_FORMAT_B8G8R8A8_UNORM;
|
|
sd.SampleDesc.Count = 1;
|
|
sd.Usage = D3D11_USAGE_STAGING;
|
|
sd.CPUAccessFlags = D3D11_CPU_ACCESS_READ;
|
|
d3d_dev_->CreateTexture2D(&sd, nullptr, &staging_);
|
|
}
|
|
|
|
d3d_ctx_->CopyResource(staging_.Get(), tex.Get());
|
|
dupl_->ReleaseFrame();
|
|
|
|
D3D11_MAPPED_SUBRESOURCE mapped{};
|
|
hr = d3d_ctx_->Map(staging_.Get(), 0, D3D11_MAP_READ, 0, &mapped);
|
|
if (FAILED(hr)) return false;
|
|
|
|
bgra.resize(width * height * 4);
|
|
const uint8_t* src = static_cast<const uint8_t*>(mapped.pData);
|
|
for (UINT row = 0; row < height; ++row)
|
|
memcpy(&bgra[row * width * 4], src + row * mapped.RowPitch, width * 4);
|
|
|
|
d3d_ctx_->Unmap(staging_.Get(), 0);
|
|
return true;
|
|
}
|
|
|
|
// ─── Color conversion: BGRA → NV12 ──────────────────────────────────────────
|
|
|
|
void ScreenEncoder::BgraToNv12(const uint8_t* bgra, std::vector<uint8_t>& nv12,
|
|
UINT w, UINT h) {
|
|
nv12.resize(w * h * 3 / 2);
|
|
uint8_t* Y = nv12.data();
|
|
uint8_t* UV = Y + w * h;
|
|
|
|
for (UINT row = 0; row < h; ++row) {
|
|
for (UINT col = 0; col < w; ++col) {
|
|
const uint8_t* p = bgra + (row * w + col) * 4;
|
|
const int b = p[0], g = p[1], r = p[2];
|
|
Y[row * w + col] =
|
|
(uint8_t)(((66*r + 129*g + 25*b + 128) >> 8) + 16);
|
|
|
|
if ((row & 1) == 0 && (col & 1) == 0) {
|
|
const UINT off = (row / 2) * w + col;
|
|
UV[off] = (uint8_t)(((-38*r - 74*g + 112*b + 128) >> 8) + 128);
|
|
UV[off+1] = (uint8_t)(((112*r - 94*g - 18*b + 128) >> 8) + 128);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// ─── Encode one frame ────────────────────────────────────────────────────────
|
|
|
|
void ScreenEncoder::EncodeFrame(const std::vector<uint8_t>& bgra,
|
|
UINT w, UINT h, bool keyframe) {
|
|
if (enc_width_ != w || enc_height_ != h) {
|
|
encoder_.Reset();
|
|
config_sent_ = false;
|
|
if (!InitEncoder(w, h)) return;
|
|
}
|
|
|
|
// Build NV12 input sample
|
|
std::vector<uint8_t> nv12;
|
|
BgraToNv12(bgra.data(), nv12, w, h);
|
|
|
|
ComPtr<IMFSample> in_sample;
|
|
ComPtr<IMFMediaBuffer> in_buf;
|
|
MFCreateMemoryBuffer((DWORD)nv12.size(), &in_buf);
|
|
{
|
|
BYTE* ptr = nullptr;
|
|
in_buf->Lock(&ptr, nullptr, nullptr);
|
|
memcpy(ptr, nv12.data(), nv12.size());
|
|
in_buf->Unlock();
|
|
}
|
|
in_buf->SetCurrentLength((DWORD)nv12.size());
|
|
MFCreateSample(&in_sample);
|
|
in_sample->SetSampleTime(sample_ts_);
|
|
in_sample->SetSampleDuration(kFrameDuration);
|
|
in_sample->AddBuffer(in_buf.Get());
|
|
if (keyframe) in_sample->SetUINT32(MFSampleExtension_CleanPoint, 1);
|
|
sample_ts_ += kFrameDuration;
|
|
|
|
if (FAILED(encoder_->ProcessInput(0, in_sample.Get(), 0))) return;
|
|
|
|
// Drain output samples
|
|
MFT_OUTPUT_STREAM_INFO si{};
|
|
encoder_->GetOutputStreamInfo(0, &si);
|
|
|
|
while (true) {
|
|
ComPtr<IMFSample> out_sample;
|
|
ComPtr<IMFMediaBuffer> out_buf;
|
|
if (!(si.dwFlags & MFT_OUTPUT_STREAM_PROVIDES_SAMPLES)) {
|
|
MFCreateMemoryBuffer(si.cbSize ? si.cbSize : w * h * 2, &out_buf);
|
|
MFCreateSample(&out_sample);
|
|
out_sample->AddBuffer(out_buf.Get());
|
|
}
|
|
|
|
MFT_OUTPUT_DATA_BUFFER out_data{};
|
|
out_data.pSample = out_sample.Get();
|
|
DWORD status = 0;
|
|
HRESULT hr = encoder_->ProcessOutput(0, 1, &out_data, &status);
|
|
if (out_data.pEvents) out_data.pEvents->Release();
|
|
if (hr == MF_E_TRANSFORM_NEED_MORE_INPUT) break;
|
|
if (FAILED(hr)) break;
|
|
|
|
ComPtr<IMFSample> result(out_data.pSample);
|
|
if (!result) break;
|
|
|
|
ComPtr<IMFMediaBuffer> flat;
|
|
result->ConvertToContiguousBuffer(&flat);
|
|
BYTE* enc = nullptr; DWORD enc_len = 0;
|
|
flat->Lock(&enc, nullptr, &enc_len);
|
|
|
|
if (!config_sent_) {
|
|
size_t sps_start = 0, pps_end = 0;
|
|
if (ExtractSpsPps(enc, enc_len, sps_start, pps_end)) {
|
|
std::vector<uint8_t> cfg(1 + (pps_end - sps_start));
|
|
cfg[0] = 0x01;
|
|
memcpy(&cfg[1], enc + sps_start, pps_end - sps_start);
|
|
SendEvent(std::move(cfg));
|
|
config_sent_ = true;
|
|
}
|
|
}
|
|
|
|
const int64_t now_ms =
|
|
std::chrono::duration_cast<std::chrono::milliseconds>(
|
|
std::chrono::system_clock::now().time_since_epoch())
|
|
.count();
|
|
std::vector<uint8_t> frame(1 + 8 + enc_len);
|
|
frame[0] = 0x02;
|
|
for (int i = 0; i < 8; ++i)
|
|
frame[1 + i] = static_cast<uint8_t>(now_ms >> (56 - i * 8));
|
|
memcpy(&frame[9], enc, enc_len);
|
|
|
|
flat->Unlock();
|
|
SendEvent(std::move(frame));
|
|
}
|
|
}
|
|
|
|
void ScreenEncoder::SendEvent(std::vector<uint8_t> data) {
|
|
std::lock_guard<std::mutex> lk(sink_mu_);
|
|
if (sink_) sink_->Success(flutter::EncodableValue(std::move(data)));
|
|
}
|
|
|
|
// ─── Capture loop ────────────────────────────────────────────────────────────
|
|
|
|
void ScreenEncoder::CaptureLoop() {
|
|
CoInitializeEx(nullptr, COINIT_MULTITHREADED);
|
|
|
|
if (!InitD3D()) {
|
|
CoUninitialize();
|
|
return;
|
|
}
|
|
|
|
while (running_) {
|
|
std::vector<uint8_t> bgra;
|
|
UINT w = 0, h = 0;
|
|
if (CaptureFrame(bgra, w, h)) {
|
|
bool kf = force_kf_.exchange(false);
|
|
EncodeFrame(bgra, w, h, kf);
|
|
}
|
|
}
|
|
|
|
if (encoder_) {
|
|
encoder_->ProcessMessage(MFT_MESSAGE_NOTIFY_END_OF_STREAM, 0);
|
|
encoder_->ProcessMessage(MFT_MESSAGE_COMMAND_DRAIN, 0);
|
|
}
|
|
CoUninitialize();
|
|
}
|