waycap_rs/encoders/
nvenc_encoder.rs

1use std::ptr::null_mut;
2
3use crossbeam::channel::{bounded, Receiver, Sender};
4use cust::{
5    prelude::Context,
6    sys::{
7        cuCtxSetCurrent, cuGraphicsMapResources, cuGraphicsResourceSetMapFlags_v2,
8        cuGraphicsSubResourceGetMappedArray, cuGraphicsUnmapResources,
9        cuGraphicsUnregisterResource, cuMemcpy2D_v2, CUDA_MEMCPY2D_v2, CUarray, CUdeviceptr,
10        CUgraphicsResource, CUmemorytype, CUresult,
11    },
12};
13use ffmpeg_next::{
14    self as ffmpeg,
15    ffi::{
16        av_buffer_ref, av_buffer_unref, av_hwdevice_ctx_alloc, av_hwdevice_ctx_init,
17        av_hwframe_ctx_init, av_hwframe_get_buffer, AVHWDeviceContext, AVHWFramesContext,
18        AVPixelFormat,
19    },
20    Rational,
21};
22use pipewire as pw;
23
24use crate::{
25    encoders::video::{PipewireSPA, ProcessingThread, VideoEncoder},
26    types::{
27        config::QualityPreset,
28        error::{Result, WaycapError},
29        video_frame::{EncodedVideoFrame, RawVideoFrame},
30    },
31    utils::{extract_dmabuf_planes, TIME_UNIT_NS},
32    waycap_egl::EglContext,
33};
34use khronos_egl::Image;
35
36use super::{
37    cuda::{cuGraphicsGLRegisterImage, AVCUDADeviceContext},
38    video::{create_hw_frame_ctx, GOP_SIZE},
39};
40
41// Literally stole these by looking at what OBS uses
42// just magic numbers to me no clue what these are
43// but they enable DMA Buf so it is what it is
44const NVIDIA_MODIFIERS: &[i64] = &[
45    216172782120099856,
46    216172782120099857,
47    216172782120099858,
48    216172782120099859,
49    216172782120099860,
50    216172782120099861,
51    216172782128496656,
52    216172782128496657,
53    216172782128496658,
54    216172782128496659,
55    216172782128496660,
56    216172782128496661,
57    72057594037927935,
58];
59
60/// Encoder which provides frames encoded using Nvenc
61///
62/// Only available for Nvidia GPUs
63pub struct NvencEncoder {
64    encoder: Option<ffmpeg::codec::encoder::Video>,
65    width: u32,
66    height: u32,
67    encoder_name: String,
68    quality: QualityPreset,
69    encoded_frame_recv: Option<Receiver<EncodedVideoFrame>>,
70    encoded_frame_sender: Sender<EncodedVideoFrame>,
71
72    cuda_ctx: Context,
73    graphics_resource: CUgraphicsResource,
74    egl_context: Option<Box<EglContext>>, // boxed egl context because its huge
75    egl_texture: u32,
76}
77
78unsafe impl Send for NvencEncoder {}
79unsafe impl Sync for NvencEncoder {}
80
81impl VideoEncoder for NvencEncoder {
82    type Output = EncodedVideoFrame;
83    fn reset(&mut self) -> Result<()> {
84        self.drop_processor();
85        let new_encoder = Self::create_encoder(
86            self.width,
87            self.height,
88            &self.encoder_name,
89            &self.quality,
90            &self.cuda_ctx,
91        )?;
92
93        self.encoder = Some(new_encoder);
94        Ok(())
95    }
96
97    fn drop_processor(&mut self) {
98        self.encoder.take();
99    }
100
101    fn output(&mut self) -> Option<Receiver<EncodedVideoFrame>> {
102        self.encoded_frame_recv.clone()
103    }
104
105    fn drain(&mut self) -> Result<()> {
106        if let Some(ref mut encoder) = self.encoder {
107            // Drain encoder
108            encoder.send_eof()?;
109            let mut packet = ffmpeg::codec::packet::Packet::empty();
110            while encoder.receive_packet(&mut packet).is_ok() {} // Discard these frames
111        }
112        Ok(())
113    }
114
115    fn get_encoder(&self) -> &Option<ffmpeg::codec::encoder::Video> {
116        &self.encoder
117    }
118}
119impl ProcessingThread for NvencEncoder {
120    fn thread_setup(&mut self) -> Result<()> {
121        self.egl_context = Some(Box::new(EglContext::new(
122            self.width as i32,
123            self.height as i32,
124        )?));
125        self.make_current()?;
126        self.init_gl(None)?;
127        Ok(())
128    }
129
130    fn thread_teardown(&mut self) -> Result<()> {
131        self.egl_context.as_mut().unwrap().release_current()
132    }
133
134    fn process(&mut self, frame: RawVideoFrame) -> Result<()> {
135        match egl_img_from_dmabuf(self.egl_context.as_ref().unwrap(), &frame) {
136            Ok(img) => {
137                if let Some(ref mut encoder) = self.encoder {
138                    let mut cuda_frame = ffmpeg::util::frame::Video::new(
139                        ffmpeg_next::format::Pixel::CUDA,
140                        encoder.width(),
141                        encoder.height(),
142                    );
143
144                    unsafe {
145                        let ret = av_hwframe_get_buffer(
146                            (*encoder.as_ptr()).hw_frames_ctx,
147                            cuda_frame.as_mut_ptr(),
148                            0,
149                        );
150                        if ret < 0 {
151                            return Err(WaycapError::Encoding(format!(
152                                "Failed to allocate CUDA frame buffer: {ret}",
153                            )));
154                        }
155
156                        let result =
157                            cuGraphicsMapResources(1, &mut self.graphics_resource, null_mut());
158                        if result != CUresult::CUDA_SUCCESS {
159                            gl::BindTexture(gl::TEXTURE_2D, 0);
160                            return Err(WaycapError::Encoding(format!(
161                                "Error mapping GL image to CUDA: {result:?}",
162                            )));
163                        }
164
165                        let mut cuda_array: CUarray = null_mut();
166
167                        let result = cuGraphicsSubResourceGetMappedArray(
168                            &mut cuda_array,
169                            self.graphics_resource,
170                            0,
171                            0,
172                        );
173                        if result != CUresult::CUDA_SUCCESS {
174                            cuGraphicsUnmapResources(1, &mut self.graphics_resource, null_mut());
175                            gl::BindTexture(gl::TEXTURE_2D, 0);
176                            return Err(WaycapError::Encoding(format!(
177                                "Error getting CUDA Array: {result:?}",
178                            )));
179                        }
180
181                        let copy_params = CUDA_MEMCPY2D_v2 {
182                            srcMemoryType: CUmemorytype::CU_MEMORYTYPE_ARRAY,
183                            srcArray: cuda_array,
184                            srcXInBytes: 0,
185                            srcY: 0,
186                            srcHost: std::ptr::null(),
187                            srcDevice: 0,
188                            srcPitch: 0,
189
190                            dstMemoryType: CUmemorytype::CU_MEMORYTYPE_DEVICE,
191                            dstDevice: (*cuda_frame.as_ptr()).data[0] as CUdeviceptr,
192                            dstPitch: (*cuda_frame.as_ptr()).linesize[0] as usize,
193                            dstXInBytes: 0,
194                            dstY: 0,
195                            dstHost: std::ptr::null_mut(),
196                            dstArray: std::ptr::null_mut(),
197
198                            // RGBA is 4 bytes per pixel
199                            WidthInBytes: (encoder.width() * 4) as usize,
200                            Height: encoder.height() as usize,
201                        };
202
203                        let result = cuMemcpy2D_v2(&copy_params);
204                        if result != CUresult::CUDA_SUCCESS {
205                            cuGraphicsUnmapResources(1, &mut self.graphics_resource, null_mut());
206                            gl::BindTexture(gl::TEXTURE_2D, 0);
207                            return Err(WaycapError::Encoding(format!(
208                                "Error mapping cuda frame: {result:?}",
209                            )));
210                        }
211
212                        // Cleanup
213                        let result =
214                            cuGraphicsUnmapResources(1, &mut self.graphics_resource, null_mut());
215                        if result != CUresult::CUDA_SUCCESS {
216                            return Err(WaycapError::Encoding(format!(
217                                "Could not unmap resource: {result:?}",
218                            )));
219                        }
220
221                        gl::BindTexture(gl::TEXTURE_2D, 0);
222                    }
223
224                    cuda_frame.set_pts(Some(frame.timestamp));
225                    encoder.send_frame(&cuda_frame)?;
226
227                    let mut packet = ffmpeg::codec::packet::Packet::empty();
228                    if encoder.receive_packet(&mut packet).is_ok() {
229                        if let Some(data) = packet.data() {
230                            match self.encoded_frame_sender.try_send(EncodedVideoFrame {
231                                data: data.to_vec(),
232                                is_keyframe: packet.is_key(),
233                                pts: packet.pts().unwrap_or(0),
234                                dts: packet.dts().unwrap_or(0),
235                            }) {
236                                Ok(_) => {}
237                                Err(crossbeam::channel::TrySendError::Full(_)) => {
238                                    log::error!(
239                                        "Could not send encoded video frame. Receiver is full"
240                                    );
241                                }
242                                Err(crossbeam::channel::TrySendError::Disconnected(_)) => {
243                                    log::error!(
244                                        "Could not send encoded video frame. Receiver disconnected"
245                                    );
246                                }
247                            }
248                        };
249                    }
250                }
251                self.egl_context.as_ref().unwrap().destroy_image(img)?;
252            }
253            Err(e) => log::error!("Could not process dma buf frame: {e:?}"),
254        }
255        Ok(())
256    }
257}
258
259impl PipewireSPA for NvencEncoder {
260    fn get_spa_definition() -> Result<pw::spa::pod::Object> {
261        let nvidia_mod_property = pw::spa::pod::Property {
262            key: pw::spa::param::format::FormatProperties::VideoModifier.as_raw(),
263            flags: pw::spa::pod::PropertyFlags::empty(),
264            value: pw::spa::pod::Value::Choice(pw::spa::pod::ChoiceValue::Long(
265                pw::spa::utils::Choice::<i64>(
266                    pw::spa::utils::ChoiceFlags::empty(),
267                    pw::spa::utils::ChoiceEnum::<i64>::Enum {
268                        default: NVIDIA_MODIFIERS[0],
269                        alternatives: NVIDIA_MODIFIERS.to_vec(),
270                    },
271                ),
272            )),
273        };
274
275        Ok(pw::spa::pod::object!(
276            pw::spa::utils::SpaTypes::ObjectParamFormat,
277            pw::spa::param::ParamType::EnumFormat,
278            pw::spa::pod::property!(
279                pw::spa::param::format::FormatProperties::MediaType,
280                Id,
281                pw::spa::param::format::MediaType::Video
282            ),
283            pw::spa::pod::property!(
284                pw::spa::param::format::FormatProperties::MediaSubtype,
285                Id,
286                pw::spa::param::format::MediaSubtype::Raw
287            ),
288            nvidia_mod_property,
289            pw::spa::pod::property!(
290                pw::spa::param::format::FormatProperties::VideoFormat,
291                Choice,
292                Enum,
293                Id,
294                pw::spa::param::video::VideoFormat::NV12,
295                pw::spa::param::video::VideoFormat::I420,
296                pw::spa::param::video::VideoFormat::BGRA
297            ),
298            pw::spa::pod::property!(
299                pw::spa::param::format::FormatProperties::VideoSize,
300                Choice,
301                Range,
302                Rectangle,
303                pw::spa::utils::Rectangle {
304                    width: 2560,
305                    height: 1440
306                }, // Default
307                pw::spa::utils::Rectangle {
308                    width: 1,
309                    height: 1
310                }, // Min
311                pw::spa::utils::Rectangle {
312                    width: 4096,
313                    height: 4096
314                } // Max
315            ),
316            pw::spa::pod::property!(
317                pw::spa::param::format::FormatProperties::VideoFramerate,
318                Choice,
319                Range,
320                Fraction,
321                pw::spa::utils::Fraction { num: 240, denom: 1 }, // Default
322                pw::spa::utils::Fraction { num: 0, denom: 1 },   // Min
323                pw::spa::utils::Fraction { num: 244, denom: 1 }  // Max
324            ),
325        ))
326    }
327}
328
329fn egl_img_from_dmabuf(egl_ctx: &EglContext, raw_frame: &RawVideoFrame) -> Result<Image> {
330    let dma_buf_planes = extract_dmabuf_planes(raw_frame)?;
331
332    let format = drm_fourcc::DrmFourcc::Argb8888 as u32;
333    let modifier = raw_frame.modifier;
334
335    let egl_image = egl_ctx.create_image_from_dmabuf(
336        &dma_buf_planes,
337        format,
338        raw_frame.dimensions.width,
339        raw_frame.dimensions.height,
340        modifier,
341    )?;
342
343    egl_ctx.update_texture_from_image(egl_image)?;
344
345    Ok(egl_image)
346}
347
348impl NvencEncoder {
349    pub(crate) fn new(width: u32, height: u32, quality: QualityPreset) -> Result<Self> {
350        let encoder_name = "h264_nvenc";
351
352        let (frame_tx, frame_rx): (Sender<EncodedVideoFrame>, Receiver<EncodedVideoFrame>) =
353            bounded(10);
354        let cuda_ctx = cust::quick_init().unwrap();
355
356        let encoder = Self::create_encoder(width, height, encoder_name, &quality, &cuda_ctx)?;
357
358        Ok(Self {
359            encoder: Some(encoder),
360            width,
361            height,
362            encoder_name: encoder_name.to_string(),
363            quality,
364            encoded_frame_recv: Some(frame_rx),
365            encoded_frame_sender: frame_tx,
366            cuda_ctx,
367            graphics_resource: null_mut(),
368            egl_context: None,
369            egl_texture: 0,
370        })
371    }
372
373    fn create_encoder(
374        width: u32,
375        height: u32,
376        encoder: &str,
377        quality: &QualityPreset,
378        cuda_ctx: &Context,
379    ) -> Result<ffmpeg::codec::encoder::Video> {
380        let encoder_codec =
381            ffmpeg::codec::encoder::find_by_name(encoder).ok_or(ffmpeg::Error::EncoderNotFound)?;
382
383        let mut encoder_ctx = ffmpeg::codec::context::Context::new_with_codec(encoder_codec)
384            .encoder()
385            .video()?;
386
387        encoder_ctx.set_width(width);
388        encoder_ctx.set_height(height);
389        encoder_ctx.set_format(ffmpeg::format::Pixel::CUDA);
390        encoder_ctx.set_bit_rate(16_000_000);
391
392        unsafe {
393            // Set up the cuda context
394            let nvenc_device =
395                av_hwdevice_ctx_alloc(ffmpeg_next::ffi::AVHWDeviceType::AV_HWDEVICE_TYPE_CUDA);
396
397            if nvenc_device.is_null() {
398                return Err(WaycapError::Init(
399                    "Could not initialize nvenc device".into(),
400                ));
401            }
402
403            let hw_device_ctx = (*nvenc_device).data as *mut AVHWDeviceContext;
404            let cuda_device_ctx = (*hw_device_ctx).hwctx as *mut AVCUDADeviceContext;
405            (*cuda_device_ctx).cuda_ctx = cuda_ctx.as_raw();
406
407            let err = av_hwdevice_ctx_init(nvenc_device);
408
409            if err < 0 {
410                return Err(WaycapError::Init(format!(
411                    "Error trying to initialize hw device context: {err:?}",
412                )));
413            }
414
415            let hw_device_ctx = (*nvenc_device).data as *mut AVHWDeviceContext;
416            let cuda_device_ctx = (*hw_device_ctx).hwctx as *mut AVCUDADeviceContext;
417            (*cuda_device_ctx).cuda_ctx = cuda_ctx.as_raw();
418
419            let mut frame_ctx = create_hw_frame_ctx(nvenc_device)?;
420
421            if frame_ctx.is_null() {
422                return Err(WaycapError::Init(
423                    "Could not initialize hw frame context".into(),
424                ));
425            }
426
427            let hw_frame_context = &mut *((*frame_ctx).data as *mut AVHWFramesContext);
428
429            hw_frame_context.width = width as i32;
430            hw_frame_context.height = height as i32;
431            hw_frame_context.sw_format = AVPixelFormat::AV_PIX_FMT_RGBA;
432            hw_frame_context.format = encoder_ctx.format().into();
433            hw_frame_context.device_ctx = hw_device_ctx;
434            // Decides buffer size if we do not pop frame from the encoder we cannot
435            // keep pushing. Smaller better as we reserve less GPU memory
436            hw_frame_context.initial_pool_size = 2;
437
438            let err = av_hwframe_ctx_init(frame_ctx);
439            if err < 0 {
440                return Err(WaycapError::Init(format!(
441                    "Error trying to initialize hw frame context: {err:?}",
442                )));
443            }
444
445            (*encoder_ctx.as_mut_ptr()).hw_device_ctx = av_buffer_ref(nvenc_device);
446            (*encoder_ctx.as_mut_ptr()).hw_frames_ctx = av_buffer_ref(frame_ctx);
447
448            av_buffer_unref(&mut frame_ctx);
449        }
450
451        encoder_ctx.set_time_base(Rational::new(1, TIME_UNIT_NS as i32));
452        encoder_ctx.set_gop(GOP_SIZE);
453
454        let encoder_params = ffmpeg::codec::Parameters::new();
455
456        let opts = Self::get_encoder_params(quality);
457
458        encoder_ctx.set_parameters(encoder_params)?;
459        let encoder = encoder_ctx.open_with(opts)?;
460
461        Ok(encoder)
462    }
463
464    fn get_encoder_params(quality: &QualityPreset) -> ffmpeg::Dictionary<'_> {
465        let mut opts = ffmpeg::Dictionary::new();
466        opts.set("vsync", "vfr");
467        opts.set("rc", "vbr");
468        opts.set("tune", "hq");
469        match quality {
470            QualityPreset::Low => {
471                opts.set("preset", "p2");
472                opts.set("cq", "30");
473                opts.set("b:v", "20M");
474            }
475            QualityPreset::Medium => {
476                opts.set("preset", "p4");
477                opts.set("cq", "25");
478                opts.set("b:v", "40M");
479            }
480            QualityPreset::High => {
481                opts.set("preset", "p7");
482                opts.set("cq", "20");
483                opts.set("b:v", "80M");
484            }
485            QualityPreset::Ultra => {
486                opts.set("preset", "p7");
487                opts.set("cq", "15");
488                opts.set("b:v", "120M");
489            }
490        }
491        opts
492    }
493
494    fn init_gl(&mut self, texture_id: Option<u32>) -> Result<()> {
495        self.egl_texture = match texture_id {
496            Some(texture_id) => texture_id,
497            None => {
498                self.egl_context
499                    .as_ref()
500                    .unwrap()
501                    .create_persistent_texture()?;
502                self.egl_context.as_ref().unwrap().get_texture_id().unwrap()
503            }
504        };
505
506        unsafe {
507            // Try to register GL texture with CUDA
508            let result = cuGraphicsGLRegisterImage(
509                &mut self.graphics_resource,
510                self.egl_texture,
511                gl::TEXTURE_2D, // GL_TEXTURE_2D
512                0x00,           // CU_GRAPHICS_REGISTER_FLAGS_READ_NONE
513            );
514
515            if result != CUresult::CUDA_SUCCESS {
516                return Err(WaycapError::Init(format!(
517                    "Error registering GL texture to CUDA: {result:?}",
518                )));
519            }
520
521            let result = cuGraphicsResourceSetMapFlags_v2(self.graphics_resource, 0);
522
523            if result != CUresult::CUDA_SUCCESS {
524                cuGraphicsUnregisterResource(self.graphics_resource);
525                gl::BindTexture(gl::TEXTURE_2D, 0);
526                return Err(WaycapError::Init(format!(
527                    "Failed to set graphics resource map flags: {result:?}",
528                )));
529            }
530        }
531
532        Ok(())
533    }
534
535    /// Set cuda  context to current thread
536    fn make_current(&self) -> Result<()> {
537        unsafe { cuCtxSetCurrent(self.cuda_ctx.as_raw()) };
538        Ok(())
539    }
540}
541
542impl Drop for NvencEncoder {
543    fn drop(&mut self) {
544        if let Err(e) = self.drain() {
545            if let WaycapError::FFmpeg(ffmpeg::Error::Other { errno: 541478725 }) = e {
546                // This seems normal when a stream is empty,
547                // like when its been drained before (in Capture::finish for example)
548                // see: https://trac.ffmpeg.org/ticket/7290
549            } else {
550                log::error!("Error while draining nvenc encoder during drop: {e:?}");
551            }
552        }
553        self.drop_processor();
554
555        self.egl_context.as_ref().unwrap().make_current().unwrap();
556        if let Err(e) = self.make_current() {
557            log::error!("Could not make context current during drop: {e:?}");
558        }
559
560        let result = unsafe { cuGraphicsUnregisterResource(self.graphics_resource) };
561        if result != CUresult::CUDA_SUCCESS {
562            log::error!("Error cleaning up graphics resource: {result:?}");
563        }
564    }
565}