waycap_rs/encoders/
nvenc_encoder.rs

1use std::{any::Any, ptr::null_mut};
2
3use cust::{
4    prelude::Context,
5    sys::{
6        cuCtxSetCurrent, cuGraphicsMapResources, cuGraphicsResourceSetMapFlags_v2,
7        cuGraphicsSubResourceGetMappedArray, cuGraphicsUnmapResources,
8        cuGraphicsUnregisterResource, cuMemcpy2D_v2, CUDA_MEMCPY2D_v2, CUarray, CUdeviceptr,
9        CUgraphicsResource, CUmemorytype, CUresult,
10    },
11};
12use ffmpeg_next::{
13    self as ffmpeg,
14    ffi::{
15        av_buffer_ref, av_buffer_unref, av_hwdevice_ctx_alloc, av_hwdevice_ctx_init,
16        av_hwframe_ctx_init, av_hwframe_get_buffer, AVHWDeviceContext, AVHWFramesContext,
17        AVPixelFormat,
18    },
19    Rational,
20};
21use ringbuf::{
22    traits::{Producer, Split},
23    HeapCons, HeapProd, HeapRb,
24};
25
26use crate::types::{
27    config::QualityPreset,
28    error::{Result, WaycapError},
29    video_frame::{EncodedVideoFrame, RawVideoFrame},
30};
31
32use super::{
33    cuda::{cuGraphicsGLRegisterImage, AVCUDADeviceContext},
34    video::{create_hw_frame_ctx, VideoEncoder, GOP_SIZE},
35};
36
37pub struct NvencEncoder {
38    encoder: Option<ffmpeg::codec::encoder::Video>,
39    width: u32,
40    height: u32,
41    encoder_name: String,
42    quality: QualityPreset,
43    encoded_frame_recv: Option<HeapCons<EncodedVideoFrame>>,
44    encoded_frame_sender: Option<HeapProd<EncodedVideoFrame>>,
45
46    cuda_ctx: Context,
47    graphics_resource: CUgraphicsResource,
48    egl_texture: u32,
49}
50
51unsafe impl Send for NvencEncoder {}
52unsafe impl Sync for NvencEncoder {}
53
54impl VideoEncoder for NvencEncoder {
55    fn new(width: u32, height: u32, quality: QualityPreset) -> Result<Self>
56    where
57        Self: Sized,
58    {
59        let encoder_name = "h264_nvenc";
60        let video_ring_buffer = HeapRb::<EncodedVideoFrame>::new(120);
61        let (video_ring_sender, video_ring_receiver) = video_ring_buffer.split();
62        let cuda_ctx = cust::quick_init().unwrap();
63
64        let encoder = Self::create_encoder(width, height, encoder_name, &quality, &cuda_ctx)?;
65
66        Ok(Self {
67            encoder: Some(encoder),
68            width,
69            height,
70            encoder_name: encoder_name.to_string(),
71            quality,
72            encoded_frame_recv: Some(video_ring_receiver),
73            encoded_frame_sender: Some(video_ring_sender),
74            cuda_ctx,
75            graphics_resource: null_mut(),
76            egl_texture: 0,
77        })
78    }
79
80    fn as_any(&self) -> &dyn Any {
81        self
82    }
83
84    fn process(&mut self, frame: &RawVideoFrame) -> Result<()> {
85        if let Some(ref mut encoder) = self.encoder {
86            let mut cuda_frame = ffmpeg::util::frame::Video::new(
87                ffmpeg_next::format::Pixel::CUDA,
88                encoder.width(),
89                encoder.height(),
90            );
91
92            unsafe {
93                let ret = av_hwframe_get_buffer(
94                    (*encoder.as_ptr()).hw_frames_ctx,
95                    cuda_frame.as_mut_ptr(),
96                    0,
97                );
98                if ret < 0 {
99                    return Err(WaycapError::Encoding(format!(
100                        "Failed to allocate CUDA frame buffer: {}",
101                        ret
102                    )));
103                }
104
105                let result = cuGraphicsMapResources(1, &mut self.graphics_resource, null_mut());
106                if result != CUresult::CUDA_SUCCESS {
107                    gl::BindTexture(gl::TEXTURE_2D, 0);
108                    return Err(WaycapError::Encoding(format!(
109                        "Error mapping GL image to CUDA: {:?}",
110                        result
111                    )));
112                }
113
114                let mut cuda_array: CUarray = null_mut();
115
116                let result = cuGraphicsSubResourceGetMappedArray(
117                    &mut cuda_array,
118                    self.graphics_resource,
119                    0,
120                    0,
121                );
122                if result != CUresult::CUDA_SUCCESS {
123                    cuGraphicsUnmapResources(1, &mut self.graphics_resource, null_mut());
124                    gl::BindTexture(gl::TEXTURE_2D, 0);
125                    return Err(WaycapError::Encoding(format!(
126                        "Error getting CUDA Array: {:?}",
127                        result
128                    )));
129                }
130
131                let copy_params = CUDA_MEMCPY2D_v2 {
132                    srcMemoryType: CUmemorytype::CU_MEMORYTYPE_ARRAY,
133                    srcArray: cuda_array,
134                    srcXInBytes: 0,
135                    srcY: 0,
136                    srcHost: std::ptr::null(),
137                    srcDevice: 0,
138                    srcPitch: 0,
139
140                    dstMemoryType: CUmemorytype::CU_MEMORYTYPE_DEVICE,
141                    dstDevice: (*cuda_frame.as_ptr()).data[0] as CUdeviceptr,
142                    dstPitch: (*cuda_frame.as_ptr()).linesize[0] as usize,
143                    dstXInBytes: 0,
144                    dstY: 0,
145                    dstHost: std::ptr::null_mut(),
146                    dstArray: std::ptr::null_mut(),
147
148                    // RGBA is 4 bytes per pixel
149                    WidthInBytes: (encoder.width() * 4) as usize,
150                    Height: encoder.height() as usize,
151                };
152
153                let result = cuMemcpy2D_v2(&copy_params);
154                if result != CUresult::CUDA_SUCCESS {
155                    cuGraphicsUnmapResources(1, &mut self.graphics_resource, null_mut());
156                    gl::BindTexture(gl::TEXTURE_2D, 0);
157                    return Err(WaycapError::Encoding(format!(
158                        "Error mapping cuda frame: {:?}",
159                        result
160                    )));
161                }
162
163                // Cleanup
164                let result = cuGraphicsUnmapResources(1, &mut self.graphics_resource, null_mut());
165                if result != CUresult::CUDA_SUCCESS {
166                    return Err(WaycapError::Encoding(format!(
167                        "Could not unmap resource: {:?}",
168                        result
169                    )));
170                }
171
172                gl::BindTexture(gl::TEXTURE_2D, 0);
173            }
174
175            cuda_frame.set_pts(Some(frame.timestamp));
176            encoder.send_frame(&cuda_frame)?;
177
178            let mut packet = ffmpeg::codec::packet::Packet::empty();
179            if encoder.receive_packet(&mut packet).is_ok() {
180                if let Some(data) = packet.data() {
181                    if let Some(ref mut sender) = self.encoded_frame_sender {
182                        if sender
183                            .try_push(EncodedVideoFrame {
184                                data: data.to_vec(),
185                                is_keyframe: packet.is_key(),
186                                pts: packet.pts().unwrap_or(0),
187                                dts: packet.dts().unwrap_or(0),
188                            })
189                            .is_err()
190                        {
191                            log::error!("Could not send encoded packet to the ringbuf");
192                        }
193                    }
194                };
195            }
196        }
197        Ok(())
198    }
199
200    fn drain(&mut self) -> Result<()> {
201        if let Some(ref mut encoder) = self.encoder {
202            // Drain encoder
203            encoder.send_eof()?;
204            let mut packet = ffmpeg::codec::packet::Packet::empty();
205            while encoder.receive_packet(&mut packet).is_ok() {
206                if let Some(data) = packet.data() {
207                    if let Some(ref mut sender) = self.encoded_frame_sender {
208                        if sender
209                            .try_push(EncodedVideoFrame {
210                                data: data.to_vec(),
211                                is_keyframe: packet.is_key(),
212                                pts: packet.pts().unwrap_or(0),
213                                dts: packet.dts().unwrap_or(0),
214                            })
215                            .is_err()
216                        {
217                            log::error!("Could not send encoded packet to the ringbuf");
218                        }
219                    }
220                };
221                packet = ffmpeg::codec::packet::Packet::empty();
222            }
223        }
224        Ok(())
225    }
226
227    fn reset(&mut self) -> Result<()> {
228        self.drop_encoder();
229        let new_encoder = Self::create_encoder(
230            self.width,
231            self.height,
232            &self.encoder_name,
233            &self.quality,
234            &self.cuda_ctx,
235        )?;
236
237        self.encoder = Some(new_encoder);
238        Ok(())
239    }
240
241    fn get_encoder(&self) -> &Option<ffmpeg::codec::encoder::Video> {
242        &self.encoder
243    }
244
245    fn drop_encoder(&mut self) {
246        self.encoder.take();
247    }
248
249    fn take_encoded_recv(&mut self) -> Option<HeapCons<EncodedVideoFrame>> {
250        self.encoded_frame_recv.take()
251    }
252}
253
254impl NvencEncoder {
255    fn create_encoder(
256        width: u32,
257        height: u32,
258        encoder: &str,
259        quality: &QualityPreset,
260        cuda_ctx: &Context,
261    ) -> Result<ffmpeg::codec::encoder::Video> {
262        let encoder_codec =
263            ffmpeg::codec::encoder::find_by_name(encoder).ok_or(ffmpeg::Error::EncoderNotFound)?;
264
265        let mut encoder_ctx = ffmpeg::codec::context::Context::new_with_codec(encoder_codec)
266            .encoder()
267            .video()?;
268
269        encoder_ctx.set_width(width);
270        encoder_ctx.set_height(height);
271        encoder_ctx.set_format(ffmpeg::format::Pixel::CUDA);
272        encoder_ctx.set_bit_rate(16_000_000);
273
274        unsafe {
275            // Set up the cuda context
276            let nvenc_device =
277                av_hwdevice_ctx_alloc(ffmpeg_next::ffi::AVHWDeviceType::AV_HWDEVICE_TYPE_CUDA);
278
279            if nvenc_device.is_null() {
280                return Err(WaycapError::Init(
281                    "Could not initialize nvenc device".into(),
282                ));
283            }
284
285            let hw_device_ctx = (*nvenc_device).data as *mut AVHWDeviceContext;
286            let cuda_device_ctx = (*hw_device_ctx).hwctx as *mut AVCUDADeviceContext;
287            (*cuda_device_ctx).cuda_ctx = cuda_ctx.as_raw();
288
289            let err = av_hwdevice_ctx_init(nvenc_device);
290
291            if err < 0 {
292                return Err(WaycapError::Init(format!(
293                    "Error trying to initialize hw device context: {:?}",
294                    err
295                )));
296            }
297
298            let hw_device_ctx = (*nvenc_device).data as *mut AVHWDeviceContext;
299            let cuda_device_ctx = (*hw_device_ctx).hwctx as *mut AVCUDADeviceContext;
300            (*cuda_device_ctx).cuda_ctx = cuda_ctx.as_raw();
301
302            let mut frame_ctx = create_hw_frame_ctx(nvenc_device)?;
303
304            if frame_ctx.is_null() {
305                return Err(WaycapError::Init(
306                    "Could not initialize hw frame context".into(),
307                ));
308            }
309
310            let hw_frame_context = &mut *((*frame_ctx).data as *mut AVHWFramesContext);
311
312            hw_frame_context.width = width as i32;
313            hw_frame_context.height = height as i32;
314            hw_frame_context.sw_format = AVPixelFormat::AV_PIX_FMT_RGBA;
315            hw_frame_context.format = encoder_ctx.format().into();
316            hw_frame_context.device_ctx = hw_device_ctx;
317            // Decides buffer size if we do not pop frame from the encoder we cannot
318            // enqueue more than these many -- maybe adjust but for now setting it to
319            // doble target fps
320            hw_frame_context.initial_pool_size = 120;
321
322            let err = av_hwframe_ctx_init(frame_ctx);
323            if err < 0 {
324                return Err(WaycapError::Init(format!(
325                    "Error trying to initialize hw frame context: {:?}",
326                    err
327                )));
328            }
329
330            (*encoder_ctx.as_mut_ptr()).hw_device_ctx = av_buffer_ref(nvenc_device);
331            (*encoder_ctx.as_mut_ptr()).hw_frames_ctx = av_buffer_ref(frame_ctx);
332
333            av_buffer_unref(&mut frame_ctx);
334        }
335
336        encoder_ctx.set_time_base(Rational::new(1, 1_000_000));
337        encoder_ctx.set_gop(GOP_SIZE);
338
339        let encoder_params = ffmpeg::codec::Parameters::new();
340
341        let opts = Self::get_encoder_params(quality);
342
343        encoder_ctx.set_parameters(encoder_params)?;
344        let encoder = encoder_ctx.open_with(opts)?;
345
346        Ok(encoder)
347    }
348
349    fn get_encoder_params(quality: &QualityPreset) -> ffmpeg::Dictionary {
350        let mut opts = ffmpeg::Dictionary::new();
351        opts.set("vsync", "vfr");
352        opts.set("rc", "vbr");
353        opts.set("tune", "hq");
354        match quality {
355            QualityPreset::Low => {
356                opts.set("preset", "p2");
357                opts.set("cq", "30");
358                opts.set("b:v", "20M");
359            }
360            QualityPreset::Medium => {
361                opts.set("preset", "p4");
362                opts.set("cq", "25");
363                opts.set("b:v", "40M");
364            }
365            QualityPreset::High => {
366                opts.set("preset", "p7");
367                opts.set("cq", "20");
368                opts.set("b:v", "80M");
369            }
370            QualityPreset::Ultra => {
371                opts.set("preset", "p7");
372                opts.set("cq", "15");
373                opts.set("b:v", "120M");
374            }
375        }
376        opts
377    }
378
379    pub fn init_gl(&mut self, texture_id: u32) -> Result<()> {
380        unsafe {
381            self.egl_texture = texture_id;
382            // Try to register GL texture with CUDA
383            let result = cuGraphicsGLRegisterImage(
384                &mut self.graphics_resource,
385                self.egl_texture,
386                gl::TEXTURE_2D, // GL_TEXTURE_2D
387                0x00,           // CU_GRAPHICS_REGISTER_FLAGS_READ_NONE
388            );
389
390            if result != CUresult::CUDA_SUCCESS {
391                return Err(WaycapError::Init(format!(
392                    "Error registering GL texture to CUDA: {:?}",
393                    result
394                )));
395            }
396
397            let result = cuGraphicsResourceSetMapFlags_v2(self.graphics_resource, 0);
398
399            if result != CUresult::CUDA_SUCCESS {
400                cuGraphicsUnregisterResource(self.graphics_resource);
401                gl::BindTexture(gl::TEXTURE_2D, 0);
402                return Err(WaycapError::Init(format!(
403                    "Failed to set graphics resource map flags: {:?}",
404                    result
405                )));
406            }
407        }
408
409        Ok(())
410    }
411
412    pub fn make_current(&self) -> Result<()> {
413        unsafe { cuCtxSetCurrent(self.cuda_ctx.as_raw()) };
414        Ok(())
415    }
416}
417
418impl Drop for NvencEncoder {
419    fn drop(&mut self) {
420        if let Err(e) = self.drain() {
421            log::error!("Error while draining nvenc encoder during drop: {:?}", e);
422        }
423        self.drop_encoder();
424
425        if let Err(e) = self.make_current() {
426            log::error!("Could not make context current during drop: {:?}", e);
427        }
428
429        let result = unsafe { cuGraphicsUnregisterResource(self.graphics_resource) };
430        if result != CUresult::CUDA_SUCCESS {
431            log::error!("Error cleaning up graphics resource: {:?}", result);
432        }
433    }
434}