FFmpeg  4.4.8
vf_nnedi.c
Go to the documentation of this file.
1 /*
2  * Copyright (C) 2010-2011 Kevin Stone
3  * Copyright (C) 2016 Paul B Mahol
4  *
5  * This file is part of FFmpeg.
6  *
7  * FFmpeg is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation; either version 2 of the License, or
10  * (at your option) any later version.
11  *
12  * FFmpeg is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License along
18  * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
19  * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
20  */
21 
22 #include <float.h>
23 
24 #include "libavutil/avassert.h"
25 #include "libavutil/common.h"
26 #include "libavutil/float_dsp.h"
27 #include "libavutil/imgutils.h"
28 #include "libavutil/mem_internal.h"
29 #include "libavutil/opt.h"
30 #include "libavutil/pixdesc.h"
31 #include "avfilter.h"
32 #include "filters.h"
33 #include "formats.h"
34 #include "internal.h"
35 #include "video.h"
36 
37 static const size_t NNEDI_WEIGHTS_SIZE = 13574928;
38 static const uint8_t NNEDI_XDIM[] = { 8, 16, 32, 48, 8, 16, 32 };
39 static const uint8_t NNEDI_YDIM[] = { 6, 6, 6, 6, 4, 4, 4 };
40 static const uint16_t NNEDI_NNS[] = { 16, 32, 64, 128, 256 };
41 
42 typedef struct PrescreenerCoefficients {
43  DECLARE_ALIGNED(32, float, kernel_l0)[4][16 * 4];
44  DECLARE_ALIGNED(32, float, bias_l0)[4];
45 
46  DECLARE_ALIGNED(32, float, kernel_l1)[4][4];
47  DECLARE_ALIGNED(32, float, bias_l1)[4];
48 
49  DECLARE_ALIGNED(32, float, kernel_l2)[4][8];
50  DECLARE_ALIGNED(32, float, bias_l2)[4];
52 
53 typedef struct PredictorCoefficients {
54  int xdim, ydim, nns, nsize;
55  float *data;
56  float *softmax_q1;
57  float *elliott_q1;
60  float *softmax_q2;
61  float *elliott_q2;
65 
66 typedef struct NNEDIContext {
67  const AVClass *class;
68 
69  char *weights_file;
70 
72  int eof;
74 
76  int depth;
77  int nb_planes;
79  int linesize[4];
80  int planewidth[4];
81  int planeheight[4];
82  int field_n;
83 
86 
87  float half;
88  float in_scale;
89  float out_scale;
90 
91  // Parameters
92  int deint;
93  int field;
95  int nsize;
96  int nnsparam;
97  int qual;
98  int etype;
99  int pscrn;
100 
103  float **input_buf;
104  float **output_buf;
105 
106  void (*read)(const uint8_t *src, float *dst,
107  int src_stride, int dst_stride,
108  int width, int height, float scale);
109  void (*write)(const float *src, uint8_t *dst,
110  int src_stride, int dst_stride,
111  int width, int height, int depth, float scale);
113  const void *src, ptrdiff_t src_stride,
114  uint8_t *prescreen, int N,
115  const PrescreenerCoefficients *const coeffs);
116 } NNEDIContext;
117 
118 #define OFFSET(x) offsetof(NNEDIContext, x)
119 #define RFLAGS AV_OPT_FLAG_VIDEO_PARAM|AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_RUNTIME_PARAM
120 #define FLAGS AV_OPT_FLAG_VIDEO_PARAM|AV_OPT_FLAG_FILTERING_PARAM
121 
122 static const AVOption nnedi_options[] = {
123  {"weights", "set weights file", OFFSET(weights_file), AV_OPT_TYPE_STRING, {.str="nnedi3_weights.bin"}, 0, 0, FLAGS },
124  {"deint", "set which frames to deinterlace", OFFSET(deint), AV_OPT_TYPE_INT, {.i64=0}, 0, 1, RFLAGS, "deint" },
125  {"all", "deinterlace all frames", 0, AV_OPT_TYPE_CONST, {.i64=0}, 0, 0, RFLAGS, "deint" },
126  {"interlaced", "only deinterlace frames marked as interlaced", 0, AV_OPT_TYPE_CONST, {.i64=1}, 0, 0, RFLAGS, "deint" },
127  {"field", "set mode of operation", OFFSET(field), AV_OPT_TYPE_INT, {.i64=-1}, -2, 3, RFLAGS, "field" },
128  {"af", "use frame flags, both fields", 0, AV_OPT_TYPE_CONST, {.i64=-2}, 0, 0, RFLAGS, "field" },
129  {"a", "use frame flags, single field", 0, AV_OPT_TYPE_CONST, {.i64=-1}, 0, 0, RFLAGS, "field" },
130  {"t", "use top field only", 0, AV_OPT_TYPE_CONST, {.i64=0}, 0, 0, RFLAGS, "field" },
131  {"b", "use bottom field only", 0, AV_OPT_TYPE_CONST, {.i64=1}, 0, 0, RFLAGS, "field" },
132  {"tf", "use both fields, top first", 0, AV_OPT_TYPE_CONST, {.i64=2}, 0, 0, RFLAGS, "field" },
133  {"bf", "use both fields, bottom first", 0, AV_OPT_TYPE_CONST, {.i64=3}, 0, 0, RFLAGS, "field" },
134  {"planes", "set which planes to process", OFFSET(process_plane), AV_OPT_TYPE_INT, {.i64=7}, 0, 15, RFLAGS },
135  {"nsize", "set size of local neighborhood around each pixel, used by the predictor neural network", OFFSET(nsize), AV_OPT_TYPE_INT, {.i64=6}, 0, 6, RFLAGS, "nsize" },
136  {"s8x6", NULL, 0, AV_OPT_TYPE_CONST, {.i64=0}, 0, 0, RFLAGS, "nsize" },
137  {"s16x6", NULL, 0, AV_OPT_TYPE_CONST, {.i64=1}, 0, 0, RFLAGS, "nsize" },
138  {"s32x6", NULL, 0, AV_OPT_TYPE_CONST, {.i64=2}, 0, 0, RFLAGS, "nsize" },
139  {"s48x6", NULL, 0, AV_OPT_TYPE_CONST, {.i64=3}, 0, 0, RFLAGS, "nsize" },
140  {"s8x4", NULL, 0, AV_OPT_TYPE_CONST, {.i64=4}, 0, 0, RFLAGS, "nsize" },
141  {"s16x4", NULL, 0, AV_OPT_TYPE_CONST, {.i64=5}, 0, 0, RFLAGS, "nsize" },
142  {"s32x4", NULL, 0, AV_OPT_TYPE_CONST, {.i64=6}, 0, 0, RFLAGS, "nsize" },
143  {"nns", "set number of neurons in predictor neural network", OFFSET(nnsparam), AV_OPT_TYPE_INT, {.i64=1}, 0, 4, RFLAGS, "nns" },
144  {"n16", NULL, 0, AV_OPT_TYPE_CONST, {.i64=0}, 0, 0, RFLAGS, "nns" },
145  {"n32", NULL, 0, AV_OPT_TYPE_CONST, {.i64=1}, 0, 0, RFLAGS, "nns" },
146  {"n64", NULL, 0, AV_OPT_TYPE_CONST, {.i64=2}, 0, 0, RFLAGS, "nns" },
147  {"n128", NULL, 0, AV_OPT_TYPE_CONST, {.i64=3}, 0, 0, RFLAGS, "nns" },
148  {"n256", NULL, 0, AV_OPT_TYPE_CONST, {.i64=4}, 0, 0, RFLAGS, "nns" },
149  {"qual", "set quality", OFFSET(qual), AV_OPT_TYPE_INT, {.i64=1}, 1, 2, RFLAGS, "qual" },
150  {"fast", NULL, 0, AV_OPT_TYPE_CONST, {.i64=1}, 0, 0, RFLAGS, "qual" },
151  {"slow", NULL, 0, AV_OPT_TYPE_CONST, {.i64=2}, 0, 0, RFLAGS, "qual" },
152  {"etype", "set which set of weights to use in the predictor", OFFSET(etype), AV_OPT_TYPE_INT, {.i64=0}, 0, 1, RFLAGS, "etype" },
153  {"a", "weights trained to minimize absolute error", 0, AV_OPT_TYPE_CONST, {.i64=0}, 0, 0, RFLAGS, "etype" },
154  {"abs","weights trained to minimize absolute error", 0, AV_OPT_TYPE_CONST, {.i64=0}, 0, 0, RFLAGS, "etype" },
155  {"s", "weights trained to minimize squared error", 0, AV_OPT_TYPE_CONST, {.i64=1}, 0, 0, RFLAGS, "etype" },
156  {"mse","weights trained to minimize squared error", 0, AV_OPT_TYPE_CONST, {.i64=1}, 0, 0, RFLAGS, "etype" },
157  {"pscrn", "set prescreening", OFFSET(pscrn), AV_OPT_TYPE_INT, {.i64=2}, 0, 4, RFLAGS, "pscrn" },
158  {"none", NULL, 0, AV_OPT_TYPE_CONST, {.i64=0}, 0, 0, RFLAGS, "pscrn" },
159  {"original", NULL, 0, AV_OPT_TYPE_CONST, {.i64=1}, 0, 0, RFLAGS, "pscrn" },
160  {"new", NULL, 0, AV_OPT_TYPE_CONST, {.i64=2}, 0, 0, RFLAGS, "pscrn" },
161  {"new2", NULL, 0, AV_OPT_TYPE_CONST, {.i64=3}, 0, 0, RFLAGS, "pscrn" },
162  {"new3", NULL, 0, AV_OPT_TYPE_CONST, {.i64=4}, 0, 0, RFLAGS, "pscrn" },
163  { NULL }
164 };
165 
167 
168 static int config_output(AVFilterLink *outlink)
169 {
170  AVFilterContext *ctx = outlink->src;
171 
172  outlink->time_base.num = ctx->inputs[0]->time_base.num;
173  outlink->time_base.den = ctx->inputs[0]->time_base.den * 2;
174  outlink->w = ctx->inputs[0]->w;
175  outlink->h = ctx->inputs[0]->h;
176 
177  outlink->frame_rate = av_mul_q(ctx->inputs[0]->frame_rate,
178  (AVRational){2, 1});
179 
180  return 0;
181 }
182 
184 {
185  static const enum AVPixelFormat pix_fmts[] = {
209  };
210 
212  if (!fmts_list)
213  return AVERROR(ENOMEM);
214  return ff_set_common_formats(ctx, fmts_list);
215 }
216 
217 static float dot_dsp(const NNEDIContext *const s, const float *kernel, const float *input,
218  int n, float scale, float bias)
219 {
220  float sum, y;
221 
222  sum = s->fdsp->scalarproduct_float(kernel, input, n);
223 
224  y = sum * scale + bias + 1e-20f;
225 
226  return y;
227 }
228 
229 static float elliott(float x)
230 {
231  return x / (1.0f + fabsf(x));
232 }
233 
234 static void transform_elliott(float *input, int size)
235 {
236  for (int i = 0; i < size; i++)
237  input[i] = elliott(input[i]);
238 }
239 
241  const void *src, ptrdiff_t src_stride,
242  uint8_t *prescreen, int N,
243  const PrescreenerCoefficients *const m_data)
244 {
245  NNEDIContext *s = ctx->priv;
246  const float *src_p = src;
247 
248  // Adjust source pointer to point to top-left of filter window.
249  const float *window = src_p - 2 * src_stride - 5;
250 
251  for (int j = 0; j < N; j++) {
252  LOCAL_ALIGNED_32(float, input, [48]);
253  float state[12];
254 
255  for (int i = 0; i < 4; i++)
256  memcpy(input + i * 12, window + i * src_stride + j, 12 * sizeof(float));
257 
258  // Layer 0.
259  for (int n = 0; n < 4; n++)
260  state[n] = dot_dsp(s, m_data->kernel_l0[n], input, 48, 1.0f, m_data->bias_l0[n]);
261  transform_elliott(state + 1, 3);
262 
263  // Layer 1.
264  for (int n = 0; n < 4; n++)
265  state[n + 4] = dot_dsp(s, m_data->kernel_l1[n], state, 4, 1.0f, m_data->bias_l1[n]);
266  transform_elliott(state + 4, 3);
267 
268  // Layer 2.
269  for (int n = 0; n < 4; n++)
270  state[n + 8] = dot_dsp(s, m_data->kernel_l2[n], state, 8, 1.0f, m_data->bias_l2[n]);
271 
272  prescreen[j] = FFMAX(state[10], state[11]) <= FFMAX(state[8], state[9]) ? 255 : 0;
273  }
274 }
275 
277  const void *src, ptrdiff_t src_stride,
278  uint8_t *prescreen, int N,
279  const PrescreenerCoefficients *const m_data)
280 {
281  NNEDIContext *s = ctx->priv;
282  const float *src_p = src;
283 
284  // Adjust source pointer to point to top-left of filter window.
285  const float *window = src_p - 2 * src_stride - 6;
286 
287  for (int j = 0; j < N; j += 4) {
288  LOCAL_ALIGNED_32(float, input, [64]);
289  float state[8];
290 
291  for (int i = 0; i < 4; i++)
292  memcpy(input + i * 16, window + i * src_stride + j, 16 * sizeof(float));
293 
294  for (int n = 0; n < 4; n++)
295  state[n] = dot_dsp(s, m_data->kernel_l0[n], input, 64, 1.0f, m_data->bias_l0[n]);
297 
298  for (int n = 0; n < 4; n++)
299  state[n + 4] = dot_dsp(s, m_data->kernel_l1[n], state, 4, 1.0f, m_data->bias_l1[n]);
300 
301  for (int n = 0; n < 4; n++)
302  prescreen[j + n] = state[n + 4] > 0.f;
303  }
304 }
305 
306 static int filter_offset(int nn, const PredictorCoefficients *const model)
307 {
308  return nn * model->nsize;
309 }
310 
311 static const float *softmax_q1_filter(int nn,
312  const PredictorCoefficients *const model)
313 {
314  return model->softmax_q1 + filter_offset(nn, model);
315 }
316 
317 static const float *elliott_q1_filter(int nn,
318  const PredictorCoefficients *const model)
319 {
320  return model->elliott_q1 + filter_offset(nn, model);
321 }
322 
323 static const float *softmax_q2_filter(int nn,
324  const PredictorCoefficients *const model)
325 {
326  return model->softmax_q2 + filter_offset(nn, model);
327 }
328 
329 static const float *elliott_q2_filter(int nn,
330  const PredictorCoefficients *const model)
331 {
332  return model->elliott_q2 + filter_offset(nn, model);
333 }
334 
335 static void gather_input(const float *src, ptrdiff_t src_stride,
336  float *buf, float mstd[4],
337  const PredictorCoefficients *const model)
338 {
339  const float scale = 1.f / model->nsize;
340  float sum = 0.f;
341  float sum_sq = 0.f;
342  float tmp;
343 
344  for (int i = 0; i < model->ydim; i++) {
345  memcpy(buf, src, model->xdim * sizeof(float));
346 
347  for (int j = 0; j < model->xdim; j++) {
348  const float val = src[j];
349 
350  sum += val;
351  sum_sq += val * val;
352  }
353 
354  src += src_stride;
355  buf += model->xdim;
356  }
357 
358  mstd[0] = sum * scale;
359  mstd[3] = 0.f;
360 
361  tmp = sum_sq * scale - mstd[0] * mstd[0];
362  if (tmp < FLT_EPSILON) {
363  mstd[1] = 0.0f;
364  mstd[2] = 0.0f;
365  } else {
366  mstd[1] = sqrtf(tmp);
367  mstd[2] = 1.0f / mstd[1];
368  }
369 }
370 
371 static float softmax_exp(float x)
372 {
373  return expf(av_clipf(x, -80.f, 80.f));
374 }
375 
376 static void transform_softmax_exp(float *input, int size)
377 {
378  for (int i = 0; i < size; i++)
379  input[i] = softmax_exp(input[i]);
380 }
381 
382 static void wae5(const float *softmax, const float *el,
383  int n, float mstd[4])
384 {
385  float vsum = 0.0f, wsum = 0.0f;
386 
387  for (int i = 0; i < n; i++) {
388  vsum += softmax[i] * elliott(el[i]);
389  wsum += softmax[i];
390  }
391 
392  if (wsum > 1e-10f)
393  mstd[3] += (5.0f * vsum) / wsum * mstd[1] + mstd[0];
394  else
395  mstd[3] += mstd[0];
396 }
397 
399  const void *src, ptrdiff_t src_stride, void *dst,
400  const uint8_t *prescreen, int N,
401  const PredictorCoefficients *const model, int use_q2)
402 {
403  const NNEDIContext *const s = ctx->priv;
404  const float *src_p = src;
405  float *dst_p = dst;
406 
407  // Adjust source pointer to point to top-left of filter window.
408  const float *window = src_p - (model->ydim / 2) * src_stride - (model->xdim / 2 - 1);
409  const int filter_size = model->nsize;
410  const int nns = model->nns;
411 
412  for (int i = 0; i < N; i++) {
413  LOCAL_ALIGNED_32(float, input, [48 * 6]);
414  float activation[256 * 2];
415  float mstd[4];
416  float scale;
417 
418  if (prescreen[i])
419  continue;
420 
421  gather_input(window + i, src_stride, input, mstd, model);
422  scale = mstd[2];
423 
424  for (int nn = 0; nn < nns; nn++)
425  activation[nn] = dot_dsp(s, softmax_q1_filter(nn, model), input, filter_size, scale, model->softmax_bias_q1[nn]);
426 
427  for (int nn = 0; nn < nns; nn++)
428  activation[nns + nn] = dot_dsp(s, elliott_q1_filter(nn, model), input, filter_size, scale, model->elliott_bias_q1[nn]);
429 
430  transform_softmax_exp(activation, nns);
431  wae5(activation, activation + nns, nns, mstd);
432 
433  if (use_q2) {
434  for (int nn = 0; nn < nns; nn++)
435  activation[nn] = dot_dsp(s, softmax_q2_filter(nn, model), input, filter_size, scale, model->softmax_bias_q2[nn]);
436 
437  for (int nn = 0; nn < nns; nn++)
438  activation[nns + nn] = dot_dsp(s, elliott_q2_filter(nn, model), input, filter_size, scale, model->elliott_bias_q2[nn]);
439 
440  transform_softmax_exp(activation, nns);
441  wae5(activation, activation + nns, nns, mstd);
442  }
443 
444  dst_p[i] = mstd[3] * (use_q2 ? 0.5f : 1.f);
445  }
446 }
447 
448 static void read_bytes(const uint8_t *src, float *dst,
449  int src_stride, int dst_stride,
450  int width, int height, float scale)
451 {
452  for (int y = 0; y < height; y++) {
453  for (int x = 0; x < 32; x++)
454  dst[-x - 1] = src[x];
455 
456  for (int x = 0; x < width; x++)
457  dst[x] = src[x];
458 
459  for (int x = 0; x < 32; x++)
460  dst[width + x] = src[width - x - 1];
461 
462  dst += dst_stride;
463  src += src_stride;
464  }
465 }
466 
467 static void read_words(const uint8_t *srcp, float *dst,
468  int src_stride, int dst_stride,
469  int width, int height, float scale)
470 {
471  const uint16_t *src = (const uint16_t *)srcp;
472 
473  src_stride /= 2;
474 
475  for (int y = 0; y < height; y++) {
476  for (int x = 0; x < 32; x++)
477  dst[-x - 1] = src[x] * scale;
478 
479  for (int x = 0; x < width; x++)
480  dst[x] = src[x] * scale;
481 
482  for (int x = 0; x < 32; x++)
483  dst[width + x] = src[width - x - 1] * scale;
484 
485  dst += dst_stride;
486  src += src_stride;
487  }
488 }
489 
490 static void write_bytes(const float *src, uint8_t *dst,
491  int src_stride, int dst_stride,
492  int width, int height, int depth,
493  float scale)
494 {
495  for (int y = 0; y < height; y++) {
496  for (int x = 0; x < width; x++)
497  dst[x] = av_clip_uint8(src[x]);
498 
499  dst += dst_stride;
500  src += src_stride;
501  }
502 }
503 
504 static void write_words(const float *src, uint8_t *dstp,
505  int src_stride, int dst_stride,
506  int width, int height, int depth,
507  float scale)
508 {
509  uint16_t *dst = (uint16_t *)dstp;
510 
511  dst_stride /= 2;
512 
513  for (int y = 0; y < height; y++) {
514  for (int x = 0; x < width; x++)
515  dst[x] = av_clip_uintp2_c(src[x] * scale, depth);
516 
517  dst += dst_stride;
518  src += src_stride;
519  }
520 }
521 
522 static void interpolation(const void *src, ptrdiff_t src_stride,
523  void *dst, const uint8_t *prescreen, int n)
524 {
525  const float *src_p = src;
526  float *dst_p = dst;
527  const float *window = src_p - 2 * src_stride;
528 
529  for (int i = 0; i < n; i++) {
530  float accum = 0.0f;
531 
532  if (!prescreen[i])
533  continue;
534 
535  accum += (-3.0f / 32.0f) * window[0 * src_stride + i];
536  accum += (19.0f / 32.0f) * window[1 * src_stride + i];
537  accum += (19.0f / 32.0f) * window[2 * src_stride + i];
538  accum += (-3.0f / 32.0f) * window[3 * src_stride + i];
539 
540  dst_p[i] = accum;
541  }
542 }
543 
544 static int filter_slice(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs)
545 {
546  const NNEDIContext *const s = ctx->priv;
547  AVFrame *out = arg;
548  AVFrame *in = s->prev;
549  const float in_scale = s->in_scale;
550  const float out_scale = s->out_scale;
551  const int depth = s->depth;
552  const int interlaced = in->interlaced_frame;
553  const int tff = s->field_n == (s->field < 0 ? interlaced ? in->top_field_first : 1 :
554  (s->field & 1) ^ 1);
555 
556 
557  for (int p = 0; p < s->nb_planes; p++) {
558  const int height = s->planeheight[p];
559  const int width = s->planewidth[p];
560  const int slice_start = 2 * (ff_slice_pos(height / 2, jobnr, nb_jobs));
561  const int slice_end = 2 * (ff_slice_pos(height / 2, jobnr + 1, nb_jobs));
562  const uint8_t *src_data = in->data[p];
563  uint8_t *dst_data = out->data[p];
564  uint8_t *dst = out->data[p] + slice_start * out->linesize[p];
565  const int src_linesize = in->linesize[p];
566  const int dst_linesize = out->linesize[p];
567  uint8_t *prescreen_buf = s->prescreen_buf[jobnr];
568  float *srcbuf = s->input_buf[jobnr];
569  const int srcbuf_stride = width + 64;
570  float *dstbuf = s->output_buf[jobnr];
571  const int dstbuf_stride = width;
572  const int slice_height = (slice_end - slice_start) / 2;
573  const int last_slice = slice_end == height;
574  const uint8_t *in_line;
575  uint8_t *out_line;
576  int y_out;
577 
578  if (!(s->process_plane & (1 << p))) {
579  av_image_copy_plane(dst, out->linesize[p],
580  in->data[p] + slice_start * in->linesize[p],
581  in->linesize[p],
582  s->linesize[p], slice_end - slice_start);
583  continue;
584  }
585 
586  y_out = slice_start + (tff ^ (slice_start & 1));
587  in_line = src_data + (y_out * src_linesize);
588  out_line = dst_data + (y_out * dst_linesize);
589 
590  while (y_out < slice_end) {
591  memcpy(out_line, in_line, s->linesize[p]);
592  y_out += 2;
593  in_line += src_linesize * 2;
594  out_line += dst_linesize * 2;
595  }
596 
597  y_out = slice_start + ((!tff) ^ (slice_start & 1));
598 
599  s->read(src_data + FFMAX(y_out - 5, tff) * src_linesize,
600  srcbuf + 32,
601  src_linesize * 2, srcbuf_stride,
602  width, 1, in_scale);
603  srcbuf += srcbuf_stride;
604 
605  s->read(src_data + FFMAX(y_out - 3, tff) * src_linesize,
606  srcbuf + 32,
607  src_linesize * 2, srcbuf_stride,
608  width, 1, in_scale);
609  srcbuf += srcbuf_stride;
610 
611  s->read(src_data + FFMAX(y_out - 1, tff) * src_linesize,
612  srcbuf + 32,
613  src_linesize * 2, srcbuf_stride,
614  width, 1, in_scale);
615  srcbuf += srcbuf_stride;
616 
617  in_line = src_data + FFMIN(y_out + 1, height - 1 - !tff) * src_linesize;
618  out_line = dst_data + (y_out * dst_linesize);
619 
620  s->read(in_line, srcbuf + 32, src_linesize * 2, srcbuf_stride,
621  width, slice_height - last_slice, in_scale);
622 
623  y_out += (slice_height - last_slice) * 2;
624 
625  s->read(src_data + FFMIN(y_out + 1, height - 1 - !tff) * src_linesize,
626  srcbuf + 32 + srcbuf_stride * (slice_height - last_slice),
627  src_linesize * 2, srcbuf_stride,
628  width, 1, in_scale);
629 
630  s->read(src_data + FFMIN(y_out + 3, height - 1 - !tff) * src_linesize,
631  srcbuf + 32 + srcbuf_stride * (slice_height + 1 - last_slice),
632  src_linesize * 2, srcbuf_stride,
633  width, 1, in_scale);
634 
635  s->read(src_data + FFMIN(y_out + 5, height - 1 - !tff) * src_linesize,
636  srcbuf + 32 + srcbuf_stride * (slice_height + 2 - last_slice),
637  src_linesize * 2, srcbuf_stride,
638  width, 1, in_scale);
639 
640  for (int y = 0; y < slice_end - slice_start; y += 2) {
641  if (s->pscrn > 0)
642  s->prescreen[s->pscrn > 1](ctx, srcbuf + (y / 2) * srcbuf_stride + 32,
643  srcbuf_stride, prescreen_buf, width,
644  &s->prescreener[s->pscrn - 1]);
645 
646  predictor(ctx,
647  srcbuf + (y / 2) * srcbuf_stride + 32,
648  srcbuf_stride,
649  dstbuf + (y / 2) * dstbuf_stride,
650  prescreen_buf, width,
651  &s->coeffs[s->etype][s->nnsparam][s->nsize], s->qual == 2);
652 
653  if (s->pscrn > 0)
654  interpolation(srcbuf + (y / 2) * srcbuf_stride + 32,
655  srcbuf_stride,
656  dstbuf + (y / 2) * dstbuf_stride,
657  prescreen_buf, width);
658  }
659 
660  s->write(dstbuf, out_line, dstbuf_stride, dst_linesize * 2,
661  width, slice_height, depth, out_scale);
662  }
663 
664  return 0;
665 }
666 
667 static int get_frame(AVFilterContext *ctx, int is_second)
668 {
669  NNEDIContext *s = ctx->priv;
670  AVFilterLink *outlink = ctx->outputs[0];
671  AVFrame *dst;
672 
673  dst = ff_get_video_buffer(outlink, outlink->w, outlink->h);
674  if (!dst)
675  return AVERROR(ENOMEM);
676  av_frame_copy_props(dst, s->prev);
677  dst->interlaced_frame = 0;
678  dst->pts = s->pts;
679 
680  ctx->internal->execute(ctx, filter_slice, dst, NULL, FFMIN(s->planeheight[1] / 2, s->nb_threads));
681 
682  if (s->field == -2 || s->field > 1)
683  s->field_n = !s->field_n;
684 
685  return ff_filter_frame(outlink, dst);
686 }
687 
688 static int filter_frame(AVFilterLink *inlink, AVFrame *in)
689 {
690  AVFilterContext *ctx = inlink->dst;
691  NNEDIContext *s = ctx->priv;
692  int ret;
693 
694  if (!s->prev) {
695  s->prev = in;
696  return 0;
697  }
698 
699  if ((s->deint && !in->interlaced_frame) || ctx->is_disabled) {
700  s->prev->pts *= 2;
701  ret = ff_filter_frame(ctx->outputs[0], s->prev);
702  s->prev = in;
703  return ret;
704  }
705 
706  s->pts = s->prev->pts * 2;
707  ret = get_frame(ctx, 0);
708  if (ret < 0 || (s->field > -2 && s->field < 2)) {
709  av_frame_free(&s->prev);
710  s->prev = in;
711  return ret;
712  }
713 
714  s->pts = s->prev->pts + in->pts;
715  ret = get_frame(ctx, 1);
716  av_frame_free(&s->prev);
717  s->prev = in;
718  return ret;
719 }
720 
721 static int request_frame(AVFilterLink *link)
722 {
723  AVFilterContext *ctx = link->src;
724  NNEDIContext *s = ctx->priv;
725  int ret;
726 
727  if (s->eof)
728  return AVERROR_EOF;
729 
730  ret = ff_request_frame(ctx->inputs[0]);
731 
732  if (ret == AVERROR_EOF && s->prev) {
733  AVFrame *next = av_frame_clone(s->prev);
734 
735  if (!next)
736  return AVERROR(ENOMEM);
737 
738  next->pts = s->prev->pts + av_rescale_q(1, av_inv_q(ctx->outputs[0]->frame_rate),
739  ctx->outputs[0]->time_base);
740  s->eof = 1;
741 
742  ret = filter_frame(ctx->inputs[0], next);
743  } else if (ret < 0) {
744  return ret;
745  }
746 
747  return ret;
748 }
749 
750 static void copy_weights(float *dst, int n, const float **data)
751 {
752  memcpy(dst, *data, n * sizeof(float));
753  *data += n;
754 }
755 
756 static float *allocate(float **ptr, int size)
757 {
758  float *ret = *ptr;
759 
760  *ptr += size;
761 
762  return ret;
763 }
764 
765 static int allocate_model(PredictorCoefficients *coeffs, int xdim, int ydim, int nns)
766 {
767  int filter_size = nns * xdim * ydim;
768  int bias_size = nns;
769  float *data;
770 
771  data = av_calloc(filter_size + bias_size, 4 * sizeof(float));
772  if (!data)
773  return AVERROR(ENOMEM);
774 
775  coeffs->data = data;
776  coeffs->xdim = xdim;
777  coeffs->ydim = ydim;
778  coeffs->nsize = xdim * ydim;
779  coeffs->nns = nns;
780 
781  coeffs->softmax_q1 = allocate(&data, filter_size);
782  coeffs->elliott_q1 = allocate(&data, filter_size);
783  coeffs->softmax_bias_q1 = allocate(&data, bias_size);
784  coeffs->elliott_bias_q1 = allocate(&data, bias_size);
785 
786  coeffs->softmax_q2 = allocate(&data, filter_size);
787  coeffs->elliott_q2 = allocate(&data, filter_size);
788  coeffs->softmax_bias_q2 = allocate(&data, bias_size);
789  coeffs->elliott_bias_q2 = allocate(&data, bias_size);
790 
791  return 0;
792 }
793 
794 static int read_weights(AVFilterContext *ctx, const float *bdata)
795 {
796  NNEDIContext *s = ctx->priv;
797  int ret;
798 
799  copy_weights(&s->prescreener[0].kernel_l0[0][0], 4 * 48, &bdata);
800  copy_weights(s->prescreener[0].bias_l0, 4, &bdata);
801 
802  copy_weights(&s->prescreener[0].kernel_l1[0][0], 4 * 4, &bdata);
803  copy_weights(s->prescreener[0].bias_l1, 4, &bdata);
804 
805  copy_weights(&s->prescreener[0].kernel_l2[0][0], 4 * 8, &bdata);
806  copy_weights(s->prescreener[0].bias_l2, 4, &bdata);
807 
808  for (int i = 0; i < 3; i++) {
809  PrescreenerCoefficients *data = &s->prescreener[i + 1];
810  float kernel_l0_shuffled[4 * 64];
811  float kernel_l1_shuffled[4 * 4];
812 
813  copy_weights(kernel_l0_shuffled, 4 * 64, &bdata);
814  copy_weights(data->bias_l0, 4, &bdata);
815 
816  copy_weights(kernel_l1_shuffled, 4 * 4, &bdata);
817  copy_weights(data->bias_l1, 4, &bdata);
818 
819  for (int n = 0; n < 4; n++) {
820  for (int k = 0; k < 64; k++)
821  data->kernel_l0[n][k] = kernel_l0_shuffled[(k / 8) * 32 + n * 8 + k % 8];
822  for (int k = 0; k < 4; k++)
823  data->kernel_l1[n][k] = kernel_l1_shuffled[k * 4 + n];
824  }
825  }
826 
827  for (int m = 0; m < 2; m++) {
828  // Grouping by neuron count.
829  for (int i = 0; i < 5; i++) {
830  const int nns = NNEDI_NNS[i];
831 
832  // Grouping by window size.
833  for (int j = 0; j < 7; j++) {
834  PredictorCoefficients *model = &s->coeffs[m][i][j];
835  const int xdim = NNEDI_XDIM[j];
836  const int ydim = NNEDI_YDIM[j];
837  const int filter_size = xdim * ydim;
838 
839  ret = allocate_model(model, xdim, ydim, nns);
840  if (ret < 0)
841  return ret;
842 
843  // Quality 1 model. NNS[i] * (XDIM[j] * YDIM[j]) * 2 coefficients.
844  copy_weights(model->softmax_q1, nns * filter_size, &bdata);
845  copy_weights(model->elliott_q1, nns * filter_size, &bdata);
846 
847  // Quality 1 model bias. NNS[i] * 2 coefficients.
848  copy_weights(model->softmax_bias_q1, nns, &bdata);
849  copy_weights(model->elliott_bias_q1, nns, &bdata);
850 
851  // Quality 2 model. NNS[i] * (XDIM[j] * YDIM[j]) * 2 coefficients.
852  copy_weights(model->softmax_q2, nns * filter_size, &bdata);
853  copy_weights(model->elliott_q2, nns * filter_size, &bdata);
854 
855  // Quality 2 model bias. NNS[i] * 2 coefficients.
856  copy_weights(model->softmax_bias_q2, nns, &bdata);
857  copy_weights(model->elliott_bias_q2, nns, &bdata);
858  }
859  }
860  }
861 
862  return 0;
863 }
864 
865 static float mean(const float *input, int size)
866 {
867  float sum = 0.f;
868 
869  for (int i = 0; i < size; i++)
870  sum += input[i];
871 
872  return sum / size;
873 }
874 
875 static void transform(float *input, int size, float mean, float half)
876 {
877  for (int i = 0; i < size; i++)
878  input[i] = (input[i] - mean) / half;
879 }
880 
881 static void subtract_mean_old(PrescreenerCoefficients *coeffs, float half)
882 {
883  for (int n = 0; n < 4; n++) {
884  float m = mean(coeffs->kernel_l0[n], 48);
885 
886  transform(coeffs->kernel_l0[n], 48, m, half);
887  }
888 }
889 
890 static void subtract_mean_new(PrescreenerCoefficients *coeffs, float half)
891 {
892  for (int n = 0; n < 4; n++) {
893  float m = mean(coeffs->kernel_l0[n], 64);
894 
895  transform(coeffs->kernel_l0[n], 64, m, half);
896  }
897 }
898 
900 {
901  const int filter_size = model->nsize;
902  const int nns = model->nns;
903  const float scale = 1.f / nns;
904 
905  double softmax_means[256]; // Average of individual softmax filters.
906  double elliott_means[256]; // Average of individual elliott filters.
907  double mean_filter[48 * 6] = { 0 }; // Pointwise average of all softmax filters.
908  double mean_bias;
909 
910  // Quality 1.
911  for (int nn = 0; nn < nns; nn++) {
912  softmax_means[nn] = mean(model->softmax_q1 + nn * filter_size, filter_size);
913  elliott_means[nn] = mean(model->elliott_q1 + nn * filter_size, filter_size);
914 
915  for (int k = 0; k < filter_size; k++)
916  mean_filter[k] += model->softmax_q1[nn * filter_size + k] - softmax_means[nn];
917  }
918 
919  for (int k = 0; k < filter_size; k++)
920  mean_filter[k] *= scale;
921 
922  mean_bias = mean(model->softmax_bias_q1, nns);
923 
924  for (int nn = 0; nn < nns; nn++) {
925  for (int k = 0; k < filter_size; k++) {
926  model->softmax_q1[nn * filter_size + k] -= softmax_means[nn] + mean_filter[k];
927  model->elliott_q1[nn * filter_size + k] -= elliott_means[nn];
928  }
929  model->softmax_bias_q1[nn] -= mean_bias;
930  }
931 
932  // Quality 2.
933  memset(mean_filter, 0, sizeof(mean_filter));
934 
935  for (int nn = 0; nn < nns; nn++) {
936  softmax_means[nn] = mean(model->softmax_q2 + nn * filter_size, filter_size);
937  elliott_means[nn] = mean(model->elliott_q2 + nn * filter_size, filter_size);
938 
939  for (int k = 0; k < filter_size; k++) {
940  mean_filter[k] += model->softmax_q2[nn * filter_size + k] - softmax_means[nn];
941  }
942  }
943 
944  for (int k = 0; k < filter_size; k++)
945  mean_filter[k] *= scale;
946 
947  mean_bias = mean(model->softmax_bias_q2, nns);
948 
949  for (int nn = 0; nn < nns; nn++) {
950  for (int k = 0; k < filter_size; k++) {
951  model->softmax_q2[nn * filter_size + k] -= softmax_means[nn] + mean_filter[k];
952  model->elliott_q2[nn * filter_size + k] -= elliott_means[nn];
953  }
954 
955  model->softmax_bias_q2[nn] -= mean_bias;
956  }
957 }
958 
960 {
961  NNEDIContext *s = ctx->priv;
962  FILE *weights_file = NULL;
963  int64_t weights_size;
964  float *bdata;
965  size_t bytes_read;
966  int ret = 0;
967 
968  weights_file = av_fopen_utf8(s->weights_file, "rb");
969  if (!weights_file) {
970  av_log(ctx, AV_LOG_ERROR, "No weights file provided, aborting!\n");
971  return AVERROR(EINVAL);
972  }
973 
974  if (fseek(weights_file, 0, SEEK_END)) {
975  av_log(ctx, AV_LOG_ERROR, "Couldn't seek to the end of weights file.\n");
976  fclose(weights_file);
977  return AVERROR(EINVAL);
978  }
979 
980  weights_size = ftell(weights_file);
981 
982  if (weights_size == -1) {
983  fclose(weights_file);
984  av_log(ctx, AV_LOG_ERROR, "Couldn't get size of weights file.\n");
985  return AVERROR(EINVAL);
986  } else if (weights_size != NNEDI_WEIGHTS_SIZE) {
987  fclose(weights_file);
988  av_log(ctx, AV_LOG_ERROR, "Unexpected weights file size.\n");
989  return AVERROR(EINVAL);
990  }
991 
992  if (fseek(weights_file, 0, SEEK_SET)) {
993  fclose(weights_file);
994  av_log(ctx, AV_LOG_ERROR, "Couldn't seek to the start of weights file.\n");
995  return AVERROR(EINVAL);
996  }
997 
998  bdata = av_malloc(NNEDI_WEIGHTS_SIZE);
999  if (!bdata) {
1000  fclose(weights_file);
1001  return AVERROR(ENOMEM);
1002  }
1003 
1004  bytes_read = fread(bdata, 1, NNEDI_WEIGHTS_SIZE, weights_file);
1005  if (bytes_read != NNEDI_WEIGHTS_SIZE) {
1006  fclose(weights_file);
1007  ret = AVERROR_INVALIDDATA;
1008  av_log(ctx, AV_LOG_ERROR, "Couldn't read weights file.\n");
1009  goto fail;
1010  }
1011 
1012  fclose(weights_file);
1013 
1014  s->fdsp = avpriv_float_dsp_alloc(0);
1015  if (!s->fdsp) {
1016  ret = AVERROR(ENOMEM);
1017  goto fail;
1018  }
1019 
1020  ret = read_weights(ctx, bdata);
1021  if (ret < 0)
1022  goto fail;
1023 
1024 fail:
1025  av_free(bdata);
1026  return ret;
1027 }
1028 
1029 static int config_input(AVFilterLink *inlink)
1030 {
1031  AVFilterContext *ctx = inlink->dst;
1032  NNEDIContext *s = ctx->priv;
1034  int ret;
1035 
1036  s->depth = desc->comp[0].depth;
1037  s->nb_threads = ff_filter_get_nb_threads(ctx);
1038  s->nb_planes = av_pix_fmt_count_planes(inlink->format);
1039  if ((ret = av_image_fill_linesizes(s->linesize, inlink->format, inlink->w)) < 0)
1040  return ret;
1041 
1042  s->planewidth[1] = s->planewidth[2] = AV_CEIL_RSHIFT(inlink->w, desc->log2_chroma_w);
1043  s->planewidth[0] = s->planewidth[3] = inlink->w;
1044  s->planeheight[1] = s->planeheight[2] = AV_CEIL_RSHIFT(inlink->h, desc->log2_chroma_h);
1045  s->planeheight[0] = s->planeheight[3] = inlink->h;
1046 
1047  s->half = ((1 << 8) - 1) / 2.f;
1048  s->out_scale = 1 << (s->depth - 8);
1049  s->in_scale = 1.f / s->out_scale;
1050 
1051  switch (s->depth) {
1052  case 8:
1053  s->read = read_bytes;
1054  s->write = write_bytes;
1055  break;
1056  default:
1057  s->read = read_words;
1058  s->write = write_words;
1059  break;
1060  }
1061 
1062  subtract_mean_old(&s->prescreener[0], s->half);
1063  subtract_mean_new(&s->prescreener[1], s->half);
1064  subtract_mean_new(&s->prescreener[2], s->half);
1065  subtract_mean_new(&s->prescreener[3], s->half);
1066 
1067  s->prescreen[0] = process_old;
1068  s->prescreen[1] = process_new;
1069 
1070  for (int i = 0; i < 2; i++) {
1071  for (int j = 0; j < 5; j++) {
1072  for (int k = 0; k < 7; k++)
1073  subtract_mean_predictor(&s->coeffs[i][j][k]);
1074  }
1075  }
1076 
1077  s->input_size = (s->planewidth[0] + 64) * (s->planeheight[0] + 6);
1078  s->input_buf = av_calloc(s->nb_threads, sizeof(*s->input_buf));
1079  if (!s->input_buf)
1080  return AVERROR(ENOMEM);
1081 
1082  for (int i = 0; i < s->nb_threads; i++) {
1083  s->input_buf[i] = av_calloc(s->input_size, sizeof(**s->input_buf));
1084  if (!s->input_buf[i])
1085  return AVERROR(ENOMEM);
1086  }
1087 
1088  s->output_buf = av_calloc(s->nb_threads, sizeof(*s->output_buf));
1089  if (!s->output_buf)
1090  return AVERROR(ENOMEM);
1091 
1092  for (int i = 0; i < s->nb_threads; i++) {
1093  s->output_buf[i] = av_calloc(s->input_size, sizeof(**s->output_buf));
1094  if (!s->output_buf[i])
1095  return AVERROR(ENOMEM);
1096  }
1097 
1098  s->prescreen_buf = av_calloc(s->nb_threads, sizeof(*s->prescreen_buf));
1099  if (!s->prescreen_buf)
1100  return AVERROR(ENOMEM);
1101 
1102  for (int i = 0; i < s->nb_threads; i++) {
1103  s->prescreen_buf[i] = av_calloc(s->planewidth[0], sizeof(**s->prescreen_buf));
1104  if (!s->prescreen_buf[i])
1105  return AVERROR(ENOMEM);
1106  }
1107 
1108  return 0;
1109 }
1110 
1112 {
1113  NNEDIContext *s = ctx->priv;
1114 
1115  for (int i = 0; i < s->nb_threads && s->prescreen_buf; i++)
1116  av_freep(&s->prescreen_buf[i]);
1117 
1118  av_freep(&s->prescreen_buf);
1119 
1120  for (int i = 0; i < s->nb_threads && s->input_buf; i++)
1121  av_freep(&s->input_buf[i]);
1122 
1123  av_freep(&s->input_buf);
1124 
1125  for (int i = 0; i < s->nb_threads && s->output_buf; i++)
1126  av_freep(&s->output_buf[i]);
1127 
1128  av_freep(&s->output_buf);
1129  av_freep(&s->fdsp);
1130 
1131  for (int i = 0; i < 2; i++) {
1132  for (int j = 0; j < 5; j++) {
1133  for (int k = 0; k < 7; k++) {
1134  av_freep(&s->coeffs[i][j][k].data);
1135  }
1136  }
1137  }
1138 
1139  av_frame_free(&s->prev);
1140 }
1141 
1142 static const AVFilterPad inputs[] = {
1143  {
1144  .name = "default",
1145  .type = AVMEDIA_TYPE_VIDEO,
1146  .filter_frame = filter_frame,
1147  .config_props = config_input,
1148  },
1149  { NULL }
1150 };
1151 
1152 static const AVFilterPad outputs[] = {
1153  {
1154  .name = "default",
1155  .type = AVMEDIA_TYPE_VIDEO,
1156  .config_props = config_output,
1157  .request_frame = request_frame,
1158  },
1159  { NULL }
1160 };
1161 
1163  .name = "nnedi",
1164  .description = NULL_IF_CONFIG_SMALL("Apply neural network edge directed interpolation intra-only deinterlacer."),
1165  .priv_size = sizeof(NNEDIContext),
1166  .priv_class = &nnedi_class,
1167  .init = init,
1168  .uninit = uninit,
1170  .inputs = inputs,
1171  .outputs = outputs,
1174 };
static double val(void *priv, double ch)
Definition: aeval.c:76
static int process_command(AVFilterContext *ctx, const char *cmd, const char *args, char *res, int res_len, int flags)
Definition: af_acrusher.c:336
#define N
Definition: af_mcompand.c:54
#define av_cold
Definition: attributes.h:88
uint8_t pi<< 24) CONV_FUNC_GROUP(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_U8, uint8_t,(*(const uint8_t *) pi - 0x80) *(1.0f/(1<< 7))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_U8, uint8_t,(*(const uint8_t *) pi - 0x80) *(1.0/(1<< 7))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_S16, int16_t,(*(const int16_t *) pi >> 8)+0x80) CONV_FUNC_GROUP(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_S16, int16_t, *(const int16_t *) pi *(1.0f/(1<< 15))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_S16, int16_t, *(const int16_t *) pi *(1.0/(1<< 15))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_S32, int32_t,(*(const int32_t *) pi >> 24)+0x80) CONV_FUNC_GROUP(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_S32, int32_t, *(const int32_t *) pi *(1.0f/(1U<< 31))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_S32, int32_t, *(const int32_t *) pi *(1.0/(1U<< 31))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_FLT, float, av_clip_uint8(lrintf(*(const float *) pi *(1<< 7))+0x80)) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_FLT, float, av_clip_int16(lrintf(*(const float *) pi *(1<< 15)))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_FLT, float, av_clipl_int32(llrintf(*(const float *) pi *(1U<< 31)))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_DBL, double, av_clip_uint8(lrint(*(const double *) pi *(1<< 7))+0x80)) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_DBL, double, av_clip_int16(lrint(*(const double *) pi *(1<< 15)))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_DBL, double, av_clipl_int32(llrint(*(const double *) pi *(1U<< 31)))) #define SET_CONV_FUNC_GROUP(ofmt, ifmt) static void set_generic_function(AudioConvert *ac) { } void ff_audio_convert_free(AudioConvert **ac) { if(! *ac) return;ff_dither_free(&(*ac) ->dc);av_freep(ac);} AudioConvert *ff_audio_convert_alloc(AVAudioResampleContext *avr, enum AVSampleFormat out_fmt, enum AVSampleFormat in_fmt, int channels, int sample_rate, int apply_map) { AudioConvert *ac;int in_planar, out_planar;ac=av_mallocz(sizeof(*ac));if(!ac) return NULL;ac->avr=avr;ac->out_fmt=out_fmt;ac->in_fmt=in_fmt;ac->channels=channels;ac->apply_map=apply_map;if(avr->dither_method !=AV_RESAMPLE_DITHER_NONE &&av_get_packed_sample_fmt(out_fmt)==AV_SAMPLE_FMT_S16 &&av_get_bytes_per_sample(in_fmt) > 2) { ac->dc=ff_dither_alloc(avr, out_fmt, in_fmt, channels, sample_rate, apply_map);if(!ac->dc) { av_free(ac);return NULL;} return ac;} in_planar=ff_sample_fmt_is_planar(in_fmt, channels);out_planar=ff_sample_fmt_is_planar(out_fmt, channels);if(in_planar==out_planar) { ac->func_type=CONV_FUNC_TYPE_FLAT;ac->planes=in_planar ? ac->channels :1;} else if(in_planar) ac->func_type=CONV_FUNC_TYPE_INTERLEAVE;else ac->func_type=CONV_FUNC_TYPE_DEINTERLEAVE;set_generic_function(ac);if(ARCH_AARCH64) ff_audio_convert_init_aarch64(ac);if(ARCH_ARM) ff_audio_convert_init_arm(ac);if(ARCH_X86) ff_audio_convert_init_x86(ac);return ac;} int ff_audio_convert(AudioConvert *ac, AudioData *out, AudioData *in) { int use_generic=1;int len=in->nb_samples;int p;if(ac->dc) { av_log(ac->avr, AV_LOG_TRACE, "%d samples - audio_convert: %s to %s (dithered)\n", len, av_get_sample_fmt_name(ac->in_fmt), av_get_sample_fmt_name(ac->out_fmt));return ff_convert_dither(ac-> in
uint8_t
simple assert() macros that are a bit more flexible than ISO C assert().
int ff_filter_frame(AVFilterLink *link, AVFrame *frame)
Send a frame of data to the next filter.
Definition: avfilter.c:1096
int ff_filter_process_command(AVFilterContext *ctx, const char *cmd, const char *arg, char *res, int res_len, int flags)
Generic processing of user supplied commands that are set in the same way as the filter options.
Definition: avfilter.c:882
int ff_request_frame(AVFilterLink *link)
Request an input frame from the filter at the other end of the link.
Definition: avfilter.c:408
int ff_filter_get_nb_threads(AVFilterContext *ctx)
Get number of threads for current filter instance.
Definition: avfilter.c:802
Main libavfilter public API header.
#define flags(name, subs,...)
Definition: cbs_av1.c:572
#define s(width, name)
Definition: cbs_vp9.c:257
static struct @321 state
#define fail()
Definition: checkasm.h:133
common internal and external API header
#define FFMIN(a, b)
Definition: common.h:105
#define AV_CEIL_RSHIFT(a, b)
Definition: common.h:58
static av_always_inline av_const unsigned av_clip_uintp2_c(int a, int p)
Clip a signed integer to an unsigned power of two range.
Definition: common.h:302
#define FFMAX(a, b)
Definition: common.h:103
#define av_clip_uint8
Definition: common.h:128
#define av_clipf
Definition: common.h:170
#define NULL
Definition: coverity.c:32
long long int64_t
Definition: coverity.c:34
static __device__ float fabsf(float a)
Definition: cuda_runtime.h:181
static SDL_Window * window
Definition: ffplay.c:366
static int ff_slice_pos(int total, int jobnr, int nb_jobs)
Compute the boundary index for a slice when work of size total is split into nb_jobs slices.
Definition: filters.h:271
int ff_set_common_formats(AVFilterContext *ctx, AVFilterFormats *formats)
A helper for query_formats() which sets all links to the same list of formats.
Definition: formats.c:587
AVFilterFormats * ff_make_format_list(const int *fmts)
Create a list of supported formats.
Definition: formats.c:286
@ AV_OPT_TYPE_CONST
Definition: opt.h:234
@ AV_OPT_TYPE_INT
Definition: opt.h:225
@ AV_OPT_TYPE_STRING
Definition: opt.h:229
#define AVFILTER_FLAG_SLICE_THREADS
The filter supports multithreading by splitting frames into multiple parts and processing them concur...
Definition: avfilter.h:117
#define AVFILTER_FLAG_SUPPORT_TIMELINE_INTERNAL
Same as AVFILTER_FLAG_SUPPORT_TIMELINE_GENERIC, except that the filter will have its filter_frame() c...
Definition: avfilter.h:134
#define AVERROR_INVALIDDATA
Invalid data found when processing input.
Definition: error.h:59
#define AVERROR_EOF
End of file.
Definition: error.h:55
#define AVERROR(e)
Definition: error.h:43
AVFrame * av_frame_clone(const AVFrame *src)
Create a new frame that references the same data as src.
Definition: frame.c:540
void av_frame_free(AVFrame **frame)
Free the frame and any dynamically allocated objects in it, e.g.
Definition: frame.c:203
int av_frame_copy_props(AVFrame *dst, const AVFrame *src)
Copy only "metadata" fields from src to dst.
Definition: frame.c:658
#define AV_LOG_ERROR
Something went wrong and cannot losslessly be recovered.
Definition: log.h:194
AVRational av_mul_q(AVRational b, AVRational c)
Multiply two rationals.
Definition: rational.c:80
static av_always_inline AVRational av_inv_q(AVRational q)
Invert a rational.
Definition: rational.h:159
int64_t av_rescale_q(int64_t a, AVRational bq, AVRational cq)
Rescale a 64-bit integer by 2 rational numbers.
Definition: mathematics.c:142
void * av_calloc(size_t nmemb, size_t size)
Non-inlined equivalent of av_mallocz_array().
Definition: mem.c:245
#define DECLARE_ALIGNED(n, t, v)
Declare a variable that is aligned in memory.
Definition: mem.h:117
FILE * av_fopen_utf8(const char *path, const char *mode)
Open a file using a UTF-8 filename.
Definition: file_open.c:158
@ AVMEDIA_TYPE_VIDEO
Definition: avutil.h:201
void av_image_copy_plane(uint8_t *dst, int dst_linesize, const uint8_t *src, int src_linesize, int bytewidth, int height)
Copy image plane from src to dst.
Definition: imgutils.c:373
int av_image_fill_linesizes(int linesizes[4], enum AVPixelFormat pix_fmt, int width)
Fill plane linesizes for an image with pixel format pix_fmt and width width.
Definition: imgutils.c:89
misc image utilities
int i
Definition: input.c:407
const char * arg
Definition: jacosubdec.c:66
av_cold AVFloatDSPContext * avpriv_float_dsp_alloc(int bit_exact)
Allocate a float DSP context.
Definition: float_dsp.c:135
common internal API header
#define NULL_IF_CONFIG_SMALL(x)
Return NULL if CONFIG_SMALL is true, otherwise the argument without modification.
Definition: internal.h:117
static enum AVPixelFormat pix_fmts[]
Definition: libkvazaar.c:309
#define expf(x)
Definition: libm.h:283
const char * desc
Definition: libsvtav1.c:79
#define LOCAL_ALIGNED_32(t, v,...)
Definition: mem_internal.h:136
static uint8_t half(int a, int b)
Definition: mobiclip.c:541
static int slice_end(AVCodecContext *avctx, AVFrame *pict)
Handle slice ends.
Definition: mpeg12dec.c:2033
const char data[16]
Definition: mxf.c:142
uint8_t interlaced
Definition: mxfenc.c:2208
AVOptions.
int av_pix_fmt_count_planes(enum AVPixelFormat pix_fmt)
Definition: pixdesc.c:2613
const AVPixFmtDescriptor * av_pix_fmt_desc_get(enum AVPixelFormat pix_fmt)
Definition: pixdesc.c:2573
#define AV_PIX_FMT_GBRAP12
Definition: pixfmt.h:420
#define AV_PIX_FMT_YUV420P16
Definition: pixfmt.h:410
#define AV_PIX_FMT_YUV444P12
Definition: pixfmt.h:406
#define AV_PIX_FMT_YUV444P9
Definition: pixfmt.h:398
#define AV_PIX_FMT_YUV420P10
Definition: pixfmt.h:399
#define AV_PIX_FMT_YUV440P12
Definition: pixfmt.h:405
#define AV_PIX_FMT_GRAY9
Definition: pixfmt.h:379
#define AV_PIX_FMT_GBRAP16
Definition: pixfmt.h:421
#define AV_PIX_FMT_GBRP9
Definition: pixfmt.h:414
#define AV_PIX_FMT_YUV422P9
Definition: pixfmt.h:397
#define AV_PIX_FMT_YUVA444P10
Definition: pixfmt.h:438
#define AV_PIX_FMT_YUVA420P16
Definition: pixfmt.h:441
#define AV_PIX_FMT_YUV420P12
Definition: pixfmt.h:403
#define AV_PIX_FMT_YUVA420P10
Definition: pixfmt.h:436
#define AV_PIX_FMT_YUVA422P9
Definition: pixfmt.h:434
#define AV_PIX_FMT_YUV422P12
Definition: pixfmt.h:404
#define AV_PIX_FMT_GBRP10
Definition: pixfmt.h:415
#define AV_PIX_FMT_YUV422P10
Definition: pixfmt.h:400
#define AV_PIX_FMT_GRAY12
Definition: pixfmt.h:381
#define AV_PIX_FMT_GBRP12
Definition: pixfmt.h:416
#define AV_PIX_FMT_YUV420P9
Definition: pixfmt.h:396
#define AV_PIX_FMT_YUVA420P9
Definition: pixfmt.h:433
#define AV_PIX_FMT_YUVA422P10
Definition: pixfmt.h:437
#define AV_PIX_FMT_YUV420P14
Definition: pixfmt.h:407
AVPixelFormat
Pixel format.
Definition: pixfmt.h:64
@ AV_PIX_FMT_NONE
Definition: pixfmt.h:65
@ AV_PIX_FMT_YUV420P
planar YUV 4:2:0, 12bpp, (1 Cr & Cb sample per 2x2 Y samples)
Definition: pixfmt.h:66
@ AV_PIX_FMT_YUV440P
planar YUV 4:4:0 (1 Cr & Cb sample per 1x2 Y samples)
Definition: pixfmt.h:99
@ AV_PIX_FMT_YUV422P
planar YUV 4:2:2, 16bpp, (1 Cr & Cb sample per 2x1 Y samples)
Definition: pixfmt.h:70
@ AV_PIX_FMT_GRAY8
Y , 8bpp.
Definition: pixfmt.h:74
@ AV_PIX_FMT_YUVA420P
planar YUV 4:2:0, 20bpp, (1 Cr & Cb sample per 2x2 Y & A samples)
Definition: pixfmt.h:101
@ AV_PIX_FMT_YUVJ440P
planar YUV 4:4:0 full scale (JPEG), deprecated in favor of AV_PIX_FMT_YUV440P and setting color_range
Definition: pixfmt.h:100
@ AV_PIX_FMT_YUV410P
planar YUV 4:1:0, 9bpp, (1 Cr & Cb sample per 4x4 Y samples)
Definition: pixfmt.h:72
@ AV_PIX_FMT_YUV411P
planar YUV 4:1:1, 12bpp, (1 Cr & Cb sample per 4x1 Y samples)
Definition: pixfmt.h:73
@ AV_PIX_FMT_YUV444P
planar YUV 4:4:4, 24bpp, (1 Cr & Cb sample per 1x1 Y samples)
Definition: pixfmt.h:71
@ AV_PIX_FMT_YUVA444P
planar YUV 4:4:4 32bpp, (1 Cr & Cb sample per 1x1 Y & A samples)
Definition: pixfmt.h:177
@ AV_PIX_FMT_YUVJ411P
planar YUV 4:1:1, 12bpp, (1 Cr & Cb sample per 4x1 Y samples) full scale (JPEG), deprecated in favor ...
Definition: pixfmt.h:258
@ AV_PIX_FMT_GBRAP
planar GBRA 4:4:4:4 32bpp
Definition: pixfmt.h:215
@ AV_PIX_FMT_YUVJ422P
planar YUV 4:2:2, 16bpp, full scale (JPEG), deprecated in favor of AV_PIX_FMT_YUV422P and setting col...
Definition: pixfmt.h:79
@ AV_PIX_FMT_YUVA422P
planar YUV 4:2:2 24bpp, (1 Cr & Cb sample per 2x1 Y & A samples)
Definition: pixfmt.h:176
@ AV_PIX_FMT_GBRP
planar GBR 4:4:4 24bpp
Definition: pixfmt.h:168
@ AV_PIX_FMT_YUVJ444P
planar YUV 4:4:4, 24bpp, full scale (JPEG), deprecated in favor of AV_PIX_FMT_YUV444P and setting col...
Definition: pixfmt.h:80
@ AV_PIX_FMT_YUVJ420P
planar YUV 4:2:0, 12bpp, full scale (JPEG), deprecated in favor of AV_PIX_FMT_YUV420P and setting col...
Definition: pixfmt.h:78
#define AV_PIX_FMT_YUVA422P12
Definition: pixfmt.h:439
#define AV_PIX_FMT_YUV422P14
Definition: pixfmt.h:408
#define AV_PIX_FMT_GRAY10
Definition: pixfmt.h:380
#define AV_PIX_FMT_GRAY14
Definition: pixfmt.h:382
#define AV_PIX_FMT_YUV422P16
Definition: pixfmt.h:411
#define AV_PIX_FMT_YUV440P10
Definition: pixfmt.h:401
#define AV_PIX_FMT_GRAY16
Definition: pixfmt.h:383
#define AV_PIX_FMT_GBRAP10
Definition: pixfmt.h:419
#define AV_PIX_FMT_YUVA444P16
Definition: pixfmt.h:443
#define AV_PIX_FMT_YUVA422P16
Definition: pixfmt.h:442
#define AV_PIX_FMT_GBRP16
Definition: pixfmt.h:418
#define AV_PIX_FMT_YUV444P14
Definition: pixfmt.h:409
#define AV_PIX_FMT_YUVA444P9
Definition: pixfmt.h:435
#define AV_PIX_FMT_GBRP14
Definition: pixfmt.h:417
#define AV_PIX_FMT_YUVA444P12
Definition: pixfmt.h:440
#define AV_PIX_FMT_YUV444P16
Definition: pixfmt.h:412
#define AV_PIX_FMT_YUV444P10
Definition: pixfmt.h:402
typedef void(RENAME(mix_any_func_type))
Describe the class of an AVClass context structure.
Definition: log.h:67
An instance of a filter.
Definition: avfilter.h:341
A list of supported formats for one end of a filter link.
Definition: formats.h:65
A filter pad used for either input or output.
Definition: internal.h:54
const char * name
Pad name.
Definition: internal.h:60
Filter definition.
Definition: avfilter.h:145
const char * name
Filter name.
Definition: avfilter.h:149
AVFormatInternal * internal
An opaque field for libavformat internal usage.
Definition: avformat.h:1699
This structure describes decoded (raw) audio or video data.
Definition: frame.h:318
int64_t pts
Presentation timestamp in time_base units (time when frame should be shown to user).
Definition: frame.h:411
int interlaced_frame
The content of the picture is interlaced.
Definition: frame.h:465
AVOption.
Definition: opt.h:248
Descriptor that unambiguously describes how the bits of a pixel are stored in the up to 4 data planes...
Definition: pixdesc.h:81
Rational number (pair of numerator and denominator).
Definition: rational.h:58
int num
Numerator.
Definition: rational.h:59
int den
Denominator.
Definition: rational.h:60
float half
Definition: vf_nnedi.c:87
PrescreenerCoefficients prescreener[4]
Definition: vf_nnedi.c:84
int64_t pts
Definition: vf_nnedi.c:73
float out_scale
Definition: vf_nnedi.c:89
char * weights_file
Definition: vf_nnedi.c:69
int process_plane
Definition: vf_nnedi.c:94
int planewidth[4]
Definition: vf_nnedi.c:80
void(* read)(const uint8_t *src, float *dst, int src_stride, int dst_stride, int width, int height, float scale)
Definition: vf_nnedi.c:106
int planeheight[4]
Definition: vf_nnedi.c:81
void(* prescreen[2])(AVFilterContext *ctx, const void *src, ptrdiff_t src_stride, uint8_t *prescreen, int N, const PrescreenerCoefficients *const coeffs)
Definition: vf_nnedi.c:112
int input_size
Definition: vf_nnedi.c:101
AVFrame * prev
Definition: vf_nnedi.c:71
uint8_t ** prescreen_buf
Definition: vf_nnedi.c:102
PredictorCoefficients coeffs[2][5][7]
Definition: vf_nnedi.c:85
void(* write)(const float *src, uint8_t *dst, int src_stride, int dst_stride, int width, int height, int depth, float scale)
Definition: vf_nnedi.c:109
AVFloatDSPContext * fdsp
Definition: vf_nnedi.c:75
float in_scale
Definition: vf_nnedi.c:88
int nb_planes
Definition: vf_nnedi.c:77
float ** output_buf
Definition: vf_nnedi.c:104
float ** input_buf
Definition: vf_nnedi.c:103
int nnsparam
Definition: vf_nnedi.c:96
int field_n
Definition: vf_nnedi.c:82
int linesize[4]
Definition: vf_nnedi.c:79
int nb_threads
Definition: vf_nnedi.c:78
float * elliott_bias_q2
Definition: vf_nnedi.c:63
float * elliott_bias_q1
Definition: vf_nnedi.c:59
float * softmax_bias_q2
Definition: vf_nnedi.c:62
float * softmax_bias_q1
Definition: vf_nnedi.c:58
float kernel_l0[4][16 *4]
Definition: vf_nnedi.c:43
float kernel_l2[4][8]
Definition: vf_nnedi.c:49
float kernel_l1[4][4]
Definition: vf_nnedi.c:46
#define av_free(p)
#define av_freep(p)
#define av_malloc(s)
#define av_log(a,...)
static uint8_t tmp[11]
Definition: aes_ctr.c:27
#define src
Definition: vp8dsp.c:255
FILE * out
Definition: movenc.c:54
AVFormatContext * ctx
Definition: movenc.c:48
#define height
#define width
int size
static void write_bytes(const float *src, uint8_t *dst, int src_stride, int dst_stride, int width, int height, int depth, float scale)
Definition: vf_nnedi.c:490
static int filter_slice(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs)
Definition: vf_nnedi.c:544
static int filter_offset(int nn, const PredictorCoefficients *const model)
Definition: vf_nnedi.c:306
static float * allocate(float **ptr, int size)
Definition: vf_nnedi.c:756
static float mean(const float *input, int size)
Definition: vf_nnedi.c:865
static const float * softmax_q2_filter(int nn, const PredictorCoefficients *const model)
Definition: vf_nnedi.c:323
static int read_weights(AVFilterContext *ctx, const float *bdata)
Definition: vf_nnedi.c:794
static void gather_input(const float *src, ptrdiff_t src_stride, float *buf, float mstd[4], const PredictorCoefficients *const model)
Definition: vf_nnedi.c:335
AVFilter ff_vf_nnedi
Definition: vf_nnedi.c:1162
static const float * elliott_q2_filter(int nn, const PredictorCoefficients *const model)
Definition: vf_nnedi.c:329
static const size_t NNEDI_WEIGHTS_SIZE
Definition: vf_nnedi.c:37
static void subtract_mean_new(PrescreenerCoefficients *coeffs, float half)
Definition: vf_nnedi.c:890
static int query_formats(AVFilterContext *ctx)
Definition: vf_nnedi.c:183
static int config_input(AVFilterLink *inlink)
Definition: vf_nnedi.c:1029
static int get_frame(AVFilterContext *ctx, int is_second)
Definition: vf_nnedi.c:667
#define FLAGS
Definition: vf_nnedi.c:120
static const AVFilterPad inputs[]
Definition: vf_nnedi.c:1142
AVFILTER_DEFINE_CLASS(nnedi)
static float elliott(float x)
Definition: vf_nnedi.c:229
static void read_words(const uint8_t *srcp, float *dst, int src_stride, int dst_stride, int width, int height, float scale)
Definition: vf_nnedi.c:467
static const uint8_t NNEDI_YDIM[]
Definition: vf_nnedi.c:39
static const AVFilterPad outputs[]
Definition: vf_nnedi.c:1152
static const AVOption nnedi_options[]
Definition: vf_nnedi.c:122
static void read_bytes(const uint8_t *src, float *dst, int src_stride, int dst_stride, int width, int height, float scale)
Definition: vf_nnedi.c:448
static int request_frame(AVFilterLink *link)
Definition: vf_nnedi.c:721
static void process_old(AVFilterContext *ctx, const void *src, ptrdiff_t src_stride, uint8_t *prescreen, int N, const PrescreenerCoefficients *const m_data)
Definition: vf_nnedi.c:240
static int filter_frame(AVFilterLink *inlink, AVFrame *in)
Definition: vf_nnedi.c:688
static void write_words(const float *src, uint8_t *dstp, int src_stride, int dst_stride, int width, int height, int depth, float scale)
Definition: vf_nnedi.c:504
#define RFLAGS
Definition: vf_nnedi.c:119
static int allocate_model(PredictorCoefficients *coeffs, int xdim, int ydim, int nns)
Definition: vf_nnedi.c:765
static void transform_softmax_exp(float *input, int size)
Definition: vf_nnedi.c:376
static const uint8_t NNEDI_XDIM[]
Definition: vf_nnedi.c:38
static void wae5(const float *softmax, const float *el, int n, float mstd[4])
Definition: vf_nnedi.c:382
static av_cold int init(AVFilterContext *ctx)
Definition: vf_nnedi.c:959
static av_cold void uninit(AVFilterContext *ctx)
Definition: vf_nnedi.c:1111
static void copy_weights(float *dst, int n, const float **data)
Definition: vf_nnedi.c:750
static void transform(float *input, int size, float mean, float half)
Definition: vf_nnedi.c:875
#define OFFSET(x)
Definition: vf_nnedi.c:118
static int config_output(AVFilterLink *outlink)
Definition: vf_nnedi.c:168
static float softmax_exp(float x)
Definition: vf_nnedi.c:371
static const uint16_t NNEDI_NNS[]
Definition: vf_nnedi.c:40
static void process_new(AVFilterContext *ctx, const void *src, ptrdiff_t src_stride, uint8_t *prescreen, int N, const PrescreenerCoefficients *const m_data)
Definition: vf_nnedi.c:276
static void subtract_mean_old(PrescreenerCoefficients *coeffs, float half)
Definition: vf_nnedi.c:881
static float dot_dsp(const NNEDIContext *const s, const float *kernel, const float *input, int n, float scale, float bias)
Definition: vf_nnedi.c:217
static void predictor(AVFilterContext *ctx, const void *src, ptrdiff_t src_stride, void *dst, const uint8_t *prescreen, int N, const PredictorCoefficients *const model, int use_q2)
Definition: vf_nnedi.c:398
static const float * elliott_q1_filter(int nn, const PredictorCoefficients *const model)
Definition: vf_nnedi.c:317
static const float * softmax_q1_filter(int nn, const PredictorCoefficients *const model)
Definition: vf_nnedi.c:311
static void subtract_mean_predictor(PredictorCoefficients *model)
Definition: vf_nnedi.c:899
static void transform_elliott(float *input, int size)
Definition: vf_nnedi.c:234
static void interpolation(const void *src, ptrdiff_t src_stride, void *dst, const uint8_t *prescreen, int n)
Definition: vf_nnedi.c:522
AVFrame * ff_get_video_buffer(AVFilterLink *link, int w, int h)
Request a picture buffer with a specific set of permissions.
Definition: video.c:104