#include "screen_encoder.h" #include #include #include #include #include using Microsoft::WRL::ComPtr; static constexpr LONGLONG kFrameDuration = 333333; // 100ns units ≈ 30fps static constexpr UINT32 kBitrate = 6'000'000; // Scan an Annex-B bitstream for the span covering the SPS (type 7) through // the end of the PPS (type 8). Returns false if either is missing. static bool ExtractSpsPps(const uint8_t* data, size_t size, size_t& sps_start, size_t& pps_end) { struct Nal { size_t start; int type; }; std::vector nals; size_t i = 0; while (i + 4 <= size) { size_t hdr_len = 0; int nal_type = 0; if (data[i] == 0 && data[i+1] == 0 && data[i+2] == 0 && data[i+3] == 1 && i + 4 < size) { hdr_len = 4; nal_type = data[i+4] & 0x1F; } else if (data[i] == 0 && data[i+1] == 0 && data[i+2] == 1 && i + 3 < size) { hdr_len = 3; nal_type = data[i+3] & 0x1F; } if (hdr_len) { nals.push_back({i, nal_type}); i += hdr_len; } else { ++i; } } size_t sps_idx = SIZE_MAX, pps_idx = SIZE_MAX; for (size_t j = 0; j < nals.size(); ++j) { if (nals[j].type == 7) sps_idx = j; if (nals[j].type == 8) pps_idx = j; } if (sps_idx == SIZE_MAX || pps_idx == SIZE_MAX) return false; sps_start = nals[sps_idx].start; size_t after_pps = pps_idx + 1; pps_end = (after_pps < nals.size()) ? nals[after_pps].start : size; return sps_start < pps_end; } // ─── ScreenEncoder ─────────────────────────────────────────────────────────── ScreenEncoder::ScreenEncoder() {} ScreenEncoder::~ScreenEncoder() { Stop(); } void ScreenEncoder::Start( std::unique_ptr> sink) { Stop(); { std::lock_guard lk(sink_mu_); sink_ = std::move(sink); } config_sent_ = false; sample_ts_ = 0; running_ = true; thread_ = std::thread(&ScreenEncoder::CaptureLoop, this); } void ScreenEncoder::Stop() { running_ = false; if (thread_.joinable()) thread_.join(); { std::lock_guard lk(sink_mu_); sink_.reset(); } encoder_.Reset(); dupl_.Reset(); staging_.Reset(); d3d_ctx_.Reset(); d3d_dev_.Reset(); enc_width_ = enc_height_ = 0; } void ScreenEncoder::ForceKeyframe() { force_kf_ = true; } // ─── D3D / DXGI init ───────────────────────────────────────────────────────── bool ScreenEncoder::InitD3D() { D3D_FEATURE_LEVEL level; HRESULT hr = D3D11CreateDevice(nullptr, D3D_DRIVER_TYPE_HARDWARE, nullptr, 0, nullptr, 0, D3D11_SDK_VERSION, &d3d_dev_, &level, &d3d_ctx_); if (FAILED(hr)) return false; ComPtr dxgi_dev; ComPtr adapter; ComPtr output; ComPtr output1; d3d_dev_.As(&dxgi_dev); dxgi_dev->GetAdapter(&adapter); if (FAILED(adapter->EnumOutputs(0, &output))) return false; if (FAILED(output.As(&output1))) return false; hr = output1->DuplicateOutput(d3d_dev_.Get(), &dupl_); return SUCCEEDED(hr); } // ─── Encoder init ──────────────────────────────────────────────────────────── bool ScreenEncoder::InitEncoder(UINT width, UINT height) { MFT_REGISTER_TYPE_INFO out_info{MFMediaType_Video, MFVideoFormat_H264}; UINT32 count = 0; IMFActivate** activates = nullptr; HRESULT hr = MFTEnumEx(MFT_CATEGORY_VIDEO_ENCODER, MFT_ENUM_FLAG_SYNCMFT | MFT_ENUM_FLAG_SORTANDFILTER, nullptr, &out_info, &activates, &count); if (FAILED(hr) || count == 0) return false; hr = activates[0]->ActivateObject(IID_PPV_ARGS(&encoder_)); for (UINT32 i = 0; i < count; ++i) activates[i]->Release(); CoTaskMemFree(activates); if (FAILED(hr)) return false; // Output: H264 ComPtr out_type; MFCreateMediaType(&out_type); out_type->SetGUID(MF_MT_MAJOR_TYPE, MFMediaType_Video); out_type->SetGUID(MF_MT_SUBTYPE, MFVideoFormat_H264); MFSetAttributeSize(out_type.Get(), MF_MT_FRAME_SIZE, width, height); MFSetAttributeRatio(out_type.Get(), MF_MT_FRAME_RATE, 30, 1); out_type->SetUINT32(MF_MT_AVG_BITRATE, kBitrate); out_type->SetUINT32(MF_MT_INTERLACE_MODE, MFVideoInterlace_Progressive); out_type->SetUINT32(MF_MT_MPEG2_PROFILE, eAVEncH264VProfile_High); if (FAILED(encoder_->SetOutputType(0, out_type.Get(), 0))) return false; // Input: NV12 ComPtr in_type; MFCreateMediaType(&in_type); in_type->SetGUID(MF_MT_MAJOR_TYPE, MFMediaType_Video); in_type->SetGUID(MF_MT_SUBTYPE, MFVideoFormat_NV12); MFSetAttributeSize(in_type.Get(), MF_MT_FRAME_SIZE, width, height); MFSetAttributeRatio(in_type.Get(), MF_MT_FRAME_RATE, 30, 1); in_type->SetUINT32(MF_MT_INTERLACE_MODE, MFVideoInterlace_Progressive); in_type->SetUINT32(MF_MT_DEFAULT_STRIDE, (UINT32)width); if (FAILED(encoder_->SetInputType(0, in_type.Get(), 0))) return false; // Keyframe every 30 frames via media type attribute out_type->SetUINT32(MF_MT_MAX_KEYFRAME_SPACING, 30); encoder_->ProcessMessage(MFT_MESSAGE_COMMAND_FLUSH, 0); encoder_->ProcessMessage(MFT_MESSAGE_NOTIFY_BEGIN_STREAMING, 0); encoder_->ProcessMessage(MFT_MESSAGE_NOTIFY_START_OF_STREAM, 0); enc_width_ = width; enc_height_ = height; return true; } // ─── Frame capture ─────────────────────────────────────────────────────────── bool ScreenEncoder::CaptureFrame(std::vector& bgra, UINT& width, UINT& height) { DXGI_OUTDUPL_FRAME_INFO info{}; ComPtr res; HRESULT hr = dupl_->AcquireNextFrame(16, &info, &res); if (hr == DXGI_ERROR_WAIT_TIMEOUT) return false; if (FAILED(hr)) { dupl_.Reset(); staging_.Reset(); InitD3D(); return false; } ComPtr tex; res.As(&tex); D3D11_TEXTURE2D_DESC desc{}; tex->GetDesc(&desc); width = desc.Width; height = desc.Height; // Recreate staging texture on size change if (!staging_) { D3D11_TEXTURE2D_DESC sd{}; sd.Width = width; sd.Height = height; sd.MipLevels = 1; sd.ArraySize = 1; sd.Format = DXGI_FORMAT_B8G8R8A8_UNORM; sd.SampleDesc.Count = 1; sd.Usage = D3D11_USAGE_STAGING; sd.CPUAccessFlags = D3D11_CPU_ACCESS_READ; d3d_dev_->CreateTexture2D(&sd, nullptr, &staging_); } d3d_ctx_->CopyResource(staging_.Get(), tex.Get()); dupl_->ReleaseFrame(); D3D11_MAPPED_SUBRESOURCE mapped{}; hr = d3d_ctx_->Map(staging_.Get(), 0, D3D11_MAP_READ, 0, &mapped); if (FAILED(hr)) return false; bgra.resize(width * height * 4); const uint8_t* src = static_cast(mapped.pData); for (UINT row = 0; row < height; ++row) memcpy(&bgra[row * width * 4], src + row * mapped.RowPitch, width * 4); d3d_ctx_->Unmap(staging_.Get(), 0); return true; } // ─── Color conversion: BGRA → NV12 ────────────────────────────────────────── void ScreenEncoder::BgraToNv12(const uint8_t* bgra, std::vector& nv12, UINT w, UINT h) { nv12.resize(w * h * 3 / 2); uint8_t* Y = nv12.data(); uint8_t* UV = Y + w * h; for (UINT row = 0; row < h; ++row) { for (UINT col = 0; col < w; ++col) { const uint8_t* p = bgra + (row * w + col) * 4; const int b = p[0], g = p[1], r = p[2]; Y[row * w + col] = (uint8_t)(((66*r + 129*g + 25*b + 128) >> 8) + 16); if ((row & 1) == 0 && (col & 1) == 0) { const UINT off = (row / 2) * w + col; UV[off] = (uint8_t)(((-38*r - 74*g + 112*b + 128) >> 8) + 128); UV[off+1] = (uint8_t)(((112*r - 94*g - 18*b + 128) >> 8) + 128); } } } } // ─── Encode one frame ──────────────────────────────────────────────────────── void ScreenEncoder::EncodeFrame(const std::vector& bgra, UINT w, UINT h, bool keyframe) { if (enc_width_ != w || enc_height_ != h) { encoder_.Reset(); config_sent_ = false; if (!InitEncoder(w, h)) return; } // Build NV12 input sample std::vector nv12; BgraToNv12(bgra.data(), nv12, w, h); ComPtr in_sample; ComPtr in_buf; MFCreateMemoryBuffer((DWORD)nv12.size(), &in_buf); { BYTE* ptr = nullptr; in_buf->Lock(&ptr, nullptr, nullptr); memcpy(ptr, nv12.data(), nv12.size()); in_buf->Unlock(); } in_buf->SetCurrentLength((DWORD)nv12.size()); MFCreateSample(&in_sample); in_sample->SetSampleTime(sample_ts_); in_sample->SetSampleDuration(kFrameDuration); in_sample->AddBuffer(in_buf.Get()); if (keyframe) in_sample->SetUINT32(MFSampleExtension_CleanPoint, 1); sample_ts_ += kFrameDuration; if (FAILED(encoder_->ProcessInput(0, in_sample.Get(), 0))) return; // Drain output samples MFT_OUTPUT_STREAM_INFO si{}; encoder_->GetOutputStreamInfo(0, &si); while (true) { ComPtr out_sample; ComPtr out_buf; if (!(si.dwFlags & MFT_OUTPUT_STREAM_PROVIDES_SAMPLES)) { MFCreateMemoryBuffer(si.cbSize ? si.cbSize : w * h * 2, &out_buf); MFCreateSample(&out_sample); out_sample->AddBuffer(out_buf.Get()); } MFT_OUTPUT_DATA_BUFFER out_data{}; out_data.pSample = out_sample.Get(); DWORD status = 0; HRESULT hr = encoder_->ProcessOutput(0, 1, &out_data, &status); if (out_data.pEvents) out_data.pEvents->Release(); if (hr == MF_E_TRANSFORM_NEED_MORE_INPUT) break; if (FAILED(hr)) break; ComPtr result(out_data.pSample); if (!result) break; ComPtr flat; result->ConvertToContiguousBuffer(&flat); BYTE* enc = nullptr; DWORD enc_len = 0; flat->Lock(&enc, nullptr, &enc_len); if (!config_sent_) { size_t sps_start = 0, pps_end = 0; if (ExtractSpsPps(enc, enc_len, sps_start, pps_end)) { std::vector cfg(1 + (pps_end - sps_start)); cfg[0] = 0x01; memcpy(&cfg[1], enc + sps_start, pps_end - sps_start); SendEvent(std::move(cfg)); config_sent_ = true; } } const int64_t now_ms = std::chrono::duration_cast( std::chrono::system_clock::now().time_since_epoch()) .count(); std::vector frame(1 + 8 + enc_len); frame[0] = 0x02; for (int i = 0; i < 8; ++i) frame[1 + i] = static_cast(now_ms >> (56 - i * 8)); memcpy(&frame[9], enc, enc_len); flat->Unlock(); SendEvent(std::move(frame)); } } void ScreenEncoder::SendEvent(std::vector data) { std::lock_guard lk(sink_mu_); if (sink_) sink_->Success(flutter::EncodableValue(std::move(data))); } // ─── Capture loop ──────────────────────────────────────────────────────────── void ScreenEncoder::CaptureLoop() { CoInitializeEx(nullptr, COINIT_MULTITHREADED); if (!InitD3D()) { CoUninitialize(); return; } while (running_) { std::vector bgra; UINT w = 0, h = 0; if (CaptureFrame(bgra, w, h)) { bool kf = force_kf_.exchange(false); EncodeFrame(bgra, w, h, kf); } } if (encoder_) { encoder_->ProcessMessage(MFT_MESSAGE_NOTIFY_END_OF_STREAM, 0); encoder_->ProcessMessage(MFT_MESSAGE_COMMAND_DRAIN, 0); } CoUninitialize(); }