src/dsp/dsp.h


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963

/*
 * Copyright 2019 The libgav1 Authors
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

#ifndef LIBGAV1_SRC_DSP_DSP_H_
#define LIBGAV1_SRC_DSP_DSP_H_

#include <cstddef>
#include <cstdint>
#include <cstdlib>

#include "src/dsp/common.h"
#include "src/dsp/constants.h"
#include "src/dsp/film_grain_common.h"
#include "src/utils/cpu.h"
#include "src/utils/reference_info.h"
#include "src/utils/types.h"

namespace libgav1 {
namespace dsp {

#if !defined(LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS)
#define LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS 0
#endif

enum IntraPredictor : uint8_t {
  kIntraPredictorDcFill,
  kIntraPredictorDcTop,
  kIntraPredictorDcLeft,
  kIntraPredictorDc,
  kIntraPredictorVertical,
  kIntraPredictorHorizontal,
  kIntraPredictorPaeth,
  kIntraPredictorSmooth,
  kIntraPredictorSmoothVertical,
  kIntraPredictorSmoothHorizontal,
  kNumIntraPredictors
};

// List of valid 1D transforms.
enum Transform1d : uint8_t {
  kTransform1dDct,   // Discrete Cosine Transform.
  kTransform1dAdst,  // Asymmetric Discrete Sine Transform.
  kTransform1dIdentity,
  kTransform1dWht,  // Walsh Hadamard Transform.
  kNumTransform1ds
};

// List of valid 1D transform sizes. Not all transforms may be available for all
// the sizes.
enum Transform1dSize : uint8_t {
  kTransform1dSize4,
  kTransform1dSize8,
  kTransform1dSize16,
  kTransform1dSize32,
  kTransform1dSize64,
  kNumTransform1dSizes
};

// The maximum width of the loop filter, fewer pixels may be filtered depending
// on strength thresholds.
enum LoopFilterSize : uint8_t {
  kLoopFilterSize4,
  kLoopFilterSize6,
  kLoopFilterSize8,
  kLoopFilterSize14,
  kNumLoopFilterSizes
};

enum : uint8_t {
  kRow = 0,
  kColumn = 1,
};

//------------------------------------------------------------------------------
// ToString()
//
// These functions are meant to be used only in debug logging and within tests.
// They are defined inline to avoid including the strings in the release
// library when logging is disabled; unreferenced functions will not be added to
// any object file in that case.

inline const char* ToString(const IntraPredictor predictor) {
  switch (predictor) {
    case kIntraPredictorDcFill:
      return "kIntraPredictorDcFill";
    case kIntraPredictorDcTop:
      return "kIntraPredictorDcTop";
    case kIntraPredictorDcLeft:
      return "kIntraPredictorDcLeft";
    case kIntraPredictorDc:
      return "kIntraPredictorDc";
    case kIntraPredictorVertical:
      return "kIntraPredictorVertical";
    case kIntraPredictorHorizontal:
      return "kIntraPredictorHorizontal";
    case kIntraPredictorPaeth:
      return "kIntraPredictorPaeth";
    case kIntraPredictorSmooth:
      return "kIntraPredictorSmooth";
    case kIntraPredictorSmoothVertical:
      return "kIntraPredictorSmoothVertical";
    case kIntraPredictorSmoothHorizontal:
      return "kIntraPredictorSmoothHorizontal";
    case kNumIntraPredictors:
      return "kNumIntraPredictors";
  }
  abort();
}

inline const char* ToString(const Transform1d transform) {
  switch (transform) {
    case kTransform1dDct:
      return "kTransform1dDct";
    case kTransform1dAdst:
      return "kTransform1dAdst";
    case kTransform1dIdentity:
      return "kTransform1dIdentity";
    case kTransform1dWht:
      return "kTransform1dWht";
    case kNumTransform1ds:
      return "kNumTransform1ds";
  }
  abort();
}

inline const char* ToString(const Transform1dSize transform_size) {
  switch (transform_size) {
    case kTransform1dSize4:
      return "kTransform1dSize4";
    case kTransform1dSize8:
      return "kTransform1dSize8";
    case kTransform1dSize16:
      return "kTransform1dSize16";
    case kTransform1dSize32:
      return "kTransform1dSize32";
    case kTransform1dSize64:
      return "kTransform1dSize64";
    case kNumTransform1dSizes:
      return "kNumTransform1dSizes";
  }
  abort();
}

inline const char* ToString(const LoopFilterSize filter_size) {
  switch (filter_size) {
    case kLoopFilterSize4:
      return "kLoopFilterSize4";
    case kLoopFilterSize6:
      return "kLoopFilterSize6";
    case kLoopFilterSize8:
      return "kLoopFilterSize8";
    case kLoopFilterSize14:
      return "kLoopFilterSize14";
    case kNumLoopFilterSizes:
      return "kNumLoopFilterSizes";
  }
  abort();
}

inline const char* ToString(const LoopFilterType filter_type) {
  switch (filter_type) {
    case kLoopFilterTypeVertical:
      return "kLoopFilterTypeVertical";
    case kLoopFilterTypeHorizontal:
      return "kLoopFilterTypeHorizontal";
    case kNumLoopFilterTypes:
      return "kNumLoopFilterTypes";
  }
  abort();
}

//------------------------------------------------------------------------------
// Intra predictors. Section 7.11.2.
// These require access to one or both of the top row and left column. Some may
// access the top-left (top[-1]), top-right (top[width+N]), bottom-left
// (left[height+N]) or upper-left (left[-1]).

// Intra predictor function signature. Sections 7.11.2.2, 7.11.2.4 (#10,#11),
// 7.11.2.5, 7.11.2.6.
// |dst| is an unaligned pointer to the output block. Pixel size is determined
// by bitdepth with |stride| given in bytes. |top| is an unaligned pointer to
// the row above |dst|. |left| is an aligned vector of the column to the left
// of |dst|. top-left and bottom-left may be accessed.
// The pointer arguments do not alias one another.
using IntraPredictorFunc = void (*)(void* dst, ptrdiff_t stride,
                                    const void* top, const void* left);
using IntraPredictorFuncs =
    IntraPredictorFunc[kNumTransformSizes][kNumIntraPredictors];

// Directional intra predictor function signature, zone 1 (0 < angle < 90).
// Section 7.11.2.4 (#7).
// |dst| is an unaligned pointer to the output block. Pixel size is determined
// by bitdepth with |stride| given in bytes. |top| is an unaligned pointer to
// the row above |dst|. |width| and |height| give the dimensions of the block.
// |xstep| is the scaled starting index to |top| from
// kDirectionalIntraPredictorDerivative. |upsampled_top| indicates whether
// |top| has been upsampled as described in '7.11.2.11. Intra edge upsample
// process'. This can occur in cases with |width| + |height| <= 16. top-right
// is accessed.
// The pointer arguments do not alias one another.
using DirectionalIntraPredictorZone1Func = void (*)(void* dst, ptrdiff_t stride,
                                                    const void* top, int width,
                                                    int height, int xstep,
                                                    bool upsampled_top);

// Directional intra predictor function signature, zone 2 (90 < angle < 180).
// Section 7.11.2.4 (#8).
// |dst| is an unaligned pointer to the output block. Pixel size is determined
// by bitdepth with |stride| given in bytes. |top| is an unaligned pointer to
// the row above |dst|. |left| is an aligned vector of the column to the left of
// |dst|. |width| and |height| give the dimensions of the block. |xstep| and
// |ystep| are the scaled starting index to |top| and |left|, respectively,
// from kDirectionalIntraPredictorDerivative. |upsampled_top| and
// |upsampled_left| indicate whether |top| and |left| have been upsampled as
// described in '7.11.2.11. Intra edge upsample process'. This can occur in
// cases with |width| + |height| <= 16. top-left and upper-left are accessed,
// up to [-2] in each if |upsampled_top/left| are set.
// The pointer arguments do not alias one another.
using DirectionalIntraPredictorZone2Func = void (*)(
    void* dst, ptrdiff_t stride, const void* top, const void* left, int width,
    int height, int xstep, int ystep, bool upsampled_top, bool upsampled_left);

// Directional intra predictor function signature, zone 3 (180 < angle < 270).
// Section 7.11.2.4 (#9).
// |dst| is an unaligned pointer to the output block. Pixel size is determined
// by bitdepth with |stride| given in bytes. |left| is an aligned vector of the
// column to the left of |dst|. |width| and |height| give the dimensions of the
// block. |ystep| is the scaled starting index to |left| from
// kDirectionalIntraPredictorDerivative. |upsampled_left| indicates whether
// |left| has been upsampled as described in '7.11.2.11. Intra edge upsample
// process'. This can occur in cases with |width| + |height| <= 16. bottom-left
// is accessed.
// The pointer arguments do not alias one another.
using DirectionalIntraPredictorZone3Func = void (*)(void* dst, ptrdiff_t stride,
                                                    const void* left, int width,
                                                    int height, int ystep,
                                                    bool upsampled_left);

// Filter intra predictor function signature. Section 7.11.2.3.
// |dst| is an unaligned pointer to the output block. Pixel size is determined
// by bitdepth with |stride| given in bytes. |top| is an unaligned pointer to
// the row above |dst|. |left| is an aligned vector of the column to the left
// of |dst|. |width| and |height| are the size of the block in pixels.
// The pointer arguments do not alias one another.
using FilterIntraPredictorFunc = void (*)(void* dst, ptrdiff_t stride,
                                          const void* top, const void* left,
                                          FilterIntraPredictor pred, int width,
                                          int height);

//------------------------------------------------------------------------------
// Chroma from Luma (Cfl) prediction. Section 7.11.5.

// Chroma from Luma (Cfl) intra prediction function signature. |dst| is an
// unaligned pointer to the output block. Pixel size is determined by bitdepth
// with |stride| given in bytes. |luma| contains subsampled luma pixels with 3
// fractional bits of precision. |alpha| is the signed Cfl alpha value for the
// appropriate plane.
using CflIntraPredictorFunc = void (*)(
    void* dst, ptrdiff_t stride,
    const int16_t luma[kCflLumaBufferStride][kCflLumaBufferStride], int alpha);
using CflIntraPredictorFuncs = CflIntraPredictorFunc[kNumTransformSizes];

// Chroma from Luma (Cfl) subsampler function signature. |luma| is an unaligned
// pointer to the output block. |src| is an unaligned pointer to the input
// block. Pixel size is determined by bitdepth with |stride| given in bytes.
using CflSubsamplerFunc =
    void (*)(int16_t luma[kCflLumaBufferStride][kCflLumaBufferStride],
             int max_luma_width, int max_luma_height, const void* source,
             ptrdiff_t stride);
using CflSubsamplerFuncs =
    CflSubsamplerFunc[kNumTransformSizes][kNumSubsamplingTypes];

//------------------------------------------------------------------------------
// Intra Edge Filtering and Upsampling. Step 4 in section 7.11.2.4.

// Intra edge filter function signature. |buffer| is a pointer to the top_row or
// left_column that needs to be filtered. Typically the -1'th index of |top_row|
// and |left_column| need to be filtered as well, so the caller can merely pass
// the |buffer| as top_row[-1] or left_column[-1]. Pixel size is determined by
// bitdepth. |size| is the number of pixels to be filtered. |strength| is the
// filter strength. Section 7.11.2.12 in the spec.
using IntraEdgeFilterFunc = void (*)(void* buffer, int size, int strength);

// Intra edge upsampler function signature. |buffer| is a pointer to the top_row
// or left_column that needs to be upsampled. Pixel size is determined by
// bitdepth. |size| is the number of pixels to be upsampled; valid values are:
// 4, 8, 12, 16. This function needs access to negative indices -1 and -2 of
// the |buffer|. Section 7.11.2.11 in the spec.
using IntraEdgeUpsamplerFunc = void (*)(void* buffer, int size);

//------------------------------------------------------------------------------
// Inverse transform add function signature.
//
// Steps 2 and 3 of section 7.12.3 (contains the implementation of section
// 7.13.3).
// Apply the inverse transforms and add the residual to the destination frame
// for the transform type and block size |tx_size| starting at position
// |start_x| and |start_y|. |dst_frame| is a pointer to an Array2D of Pixel
// values. |adjusted_tx_height| is the number of rows to process based on the
// non-zero coefficient count in the block. It will be 1 (non-zero coefficient
// count == 1), 4 or a multiple of 8 up to 32 or the original transform height,
// whichever is less. |src_buffer| is a pointer to an Array2D of Residual
// values. On input |src_buffer| contains the dequantized values, on output it
// contains the residual.
// The pointer arguments do not alias one another.
using InverseTransformAddFunc = void (*)(TransformType tx_type,
                                         TransformSize tx_size,
                                         int adjusted_tx_height,
                                         void* src_buffer, int start_x,
                                         int start_y, void* dst_frame);
// The final dimension holds row and column transforms indexed with kRow and
// kColumn.
using InverseTransformAddFuncs =
    InverseTransformAddFunc[kNumTransform1ds][kNumTransform1dSizes][2];

//------------------------------------------------------------------------------
// Post processing.

// Loop filter function signature. Section 7.14.
// |dst| is an unaligned pointer to the output block. Pixel size is determined
// by bitdepth with |stride| given in bytes.
// <threshold param> <spec name> <range>
// |outer_thresh|    blimit      [7, 193]
// |inner_thresh|    limit       [1, 63]
// |hev_thresh|      thresh      [0, 63]
// These are scaled by the implementation by 'bitdepth - 8' to produce
// the spec variables blimitBd, limitBd and threshBd.
// Note these functions are not called when the loop filter level is 0.
using LoopFilterFunc = void (*)(void* dst, ptrdiff_t stride, int outer_thresh,
                                int inner_thresh, int hev_thresh);
using LoopFilterFuncs =
    LoopFilterFunc[kNumLoopFilterSizes][kNumLoopFilterTypes];

// Cdef direction function signature. Section 7.15.2.
// |src| is a pointer to the source block. Pixel size is determined by bitdepth
// with |stride| given in bytes. |direction| and |variance| are output
// parameters and must not be nullptr.
// The pointer arguments do not alias one another.
using CdefDirectionFunc = void (*)(const void* src, ptrdiff_t stride,
                                   uint8_t* direction, int* variance);

// Cdef filtering function signature. Section 7.15.3.
// |source| is a pointer to the input block padded with kCdefLargeValue if at a
// frame border. |source_stride| is given in units of uint16_t.
// |block_width|, |block_height| are the width/height of the input block.
// |primary_strength|, |secondary_strength|, and |damping| are Cdef filtering
// parameters.
// |direction| is the filtering direction.
// |dest| is the output buffer. |dest_stride| is given in bytes.
// The pointer arguments do not alias one another.
using CdefFilteringFunc = void (*)(const uint16_t* source,
                                   ptrdiff_t source_stride, int block_height,
                                   int primary_strength, int secondary_strength,
                                   int damping, int direction, void* dest,
                                   ptrdiff_t dest_stride);

// The first index is block width: [0]: 4, [1]: 8. The second is based on
// non-zero strengths: [0]: |primary_strength| and |secondary_strength|, [1]:
// |primary_strength| only, [2]: |secondary_strength| only.
using CdefFilteringFuncs = CdefFilteringFunc[2][3];

// Upscaling coefficients function signature. Section 7.16.
// This is an auxiliary function for SIMD optimizations and has no corresponding
// C function. Different SIMD versions may have different outputs. So it must
// pair with the corresponding version of SuperResFunc.
// |upscaled_width| is the width of the output frame.
// |step| is the number of subpixels to move the kernel for the next destination
// pixel.
// |initial_subpixel_x| is a base offset from which |step| increments.
// |coefficients| is the upscale filter used by each pixel in a row.
using SuperResCoefficientsFunc = void (*)(int upscaled_width,
                                          int initial_subpixel_x, int step,
                                          void* coefficients);

// Upscaling process function signature. Section 7.16.
// |coefficients| is the upscale filter used by each pixel in a row. It is not
// used by the C function.
// |source| is the input frame buffer. It will be line extended.
// |source_stride| is given in pixels.
// |dest| is the output buffer.
// |dest_stride| is given in pixels.
// |height| is the height of the block to be processed.
// |downscaled_width| is the width of the input frame.
// |upscaled_width| is the width of the output frame.
// |step| is the number of subpixels to move the kernel for the next destination
// pixel.
// |initial_subpixel_x| is a base offset from which |step| increments.
// The pointer arguments do not alias one another.
using SuperResFunc = void (*)(const void* coefficients, void* source,
                              ptrdiff_t source_stride, int height,
                              int downscaled_width, int upscaled_width,
                              int initial_subpixel_x, int step, void* dest,
                              ptrdiff_t dest_stride);

// Loop restoration function signature. Sections 7.16, 7.17.
// |restoration_info| contains loop restoration information, such as filter
// type, strength.
// |source| is the input frame buffer, which is deblocked and cdef filtered.
// |top_border| and |bottom_border| are the top and bottom borders.
// |dest| is the output.
// |stride| is given in pixels, and shared by |source| and |dest|.
// |top_border_stride| and |bottom_border_stride| are given in pixels.
// |restoration_buffer| contains buffers required for self guided filter and
// wiener filter. They must be initialized before calling.
// The pointer arguments do not alias one another.
using LoopRestorationFunc = void (*)(
    const RestorationUnitInfo& restoration_info, const void* source,
    ptrdiff_t stride, const void* top_border, ptrdiff_t top_border_stride,
    const void* bottom_border, ptrdiff_t bottom_border_stride, int width,
    int height, RestorationBuffer* restoration_buffer, void* dest);

// Index 0 is Wiener Filter.
// Index 1 is Self Guided Restoration Filter.
// This can be accessed as LoopRestorationType - 2.
using LoopRestorationFuncs = LoopRestorationFunc[2];

// Convolve function signature. Section 7.11.3.4.
// This function applies a horizontal filter followed by a vertical filter.
// |reference| is the input block (reference frame buffer). |reference_stride|
// is the corresponding frame stride.
// |vertical_filter_index|/|horizontal_filter_index| is the index to
// retrieve the type of filter to be applied for vertical/horizontal direction
// from the filter lookup table 'kSubPixelFilters'.
// |horizontal_filter_id| and |vertical_filter_id| are the filter ids.
// |width| and |height| are width and height of the block to be filtered.
// |ref_last_x| and |ref_last_y| are the last pixel of the reference frame in
// x/y direction.
// |prediction| is the output block (output frame buffer).
// Rounding precision is derived from the function being called. For horizontal
// filtering kInterRoundBitsHorizontal & kInterRoundBitsHorizontal12bpp will be
// used. For compound vertical filtering kInterRoundBitsCompoundVertical will be
// used. Otherwise kInterRoundBitsVertical & kInterRoundBitsVertical12bpp will
// be used.
// The pointer arguments do not alias one another.
using ConvolveFunc = void (*)(const void* reference, ptrdiff_t reference_stride,
                              int horizontal_filter_index,
                              int vertical_filter_index,
                              int horizontal_filter_id, int vertical_filter_id,
                              int width, int height, void* prediction,
                              ptrdiff_t pred_stride);

// Convolve functions signature. Each points to one convolve function with
// a specific setting:
// ConvolveFunc[is_intra_block_copy][is_compound][has_vertical_filter]
// [has_horizontal_filter].
// If is_compound is false, the prediction is clipped to Pixel.
// If is_compound is true, the range of prediction is:
//   8bpp:  [-5132,  9212] (int16_t)
//   10bpp: [ 3988, 61532] (uint16_t)
//   12bpp: [ 3974, 61559] (uint16_t)
// See src/dsp/convolve.cc
using ConvolveFuncs = ConvolveFunc[2][2][2][2];

// Convolve + scale function signature. Section 7.11.3.4.
// This function applies a horizontal filter followed by a vertical filter.
// |reference| is the input block (reference frame buffer). |reference_stride|
// is the corresponding frame stride.
// |vertical_filter_index|/|horizontal_filter_index| is the index to
// retrieve the type of filter to be applied for vertical/horizontal direction
// from the filter lookup table 'kSubPixelFilters'.
// |subpixel_x| and |subpixel_y| are starting positions in units of 1/1024.
// |step_x| and |step_y| are step sizes in units of 1/1024 of a pixel.
// |width| and |height| are width and height of the block to be filtered.
// |ref_last_x| and |ref_last_y| are the last pixel of the reference frame in
// x/y direction.
// |prediction| is the output block (output frame buffer).
// Rounding precision is derived from the function being called. For horizontal
// filtering kInterRoundBitsHorizontal & kInterRoundBitsHorizontal12bpp will be
// used. For compound vertical filtering kInterRoundBitsCompoundVertical will be
// used. Otherwise kInterRoundBitsVertical & kInterRoundBitsVertical12bpp will
// be used.
// The pointer arguments do not alias one another.
using ConvolveScaleFunc = void (*)(const void* reference,
                                   ptrdiff_t reference_stride,
                                   int horizontal_filter_index,
                                   int vertical_filter_index, int subpixel_x,
                                   int subpixel_y, int step_x, int step_y,
                                   int width, int height, void* prediction,
                                   ptrdiff_t pred_stride);

// Convolve functions signature for scaling version.
// 0: single predictor. 1: compound predictor.
using ConvolveScaleFuncs = ConvolveScaleFunc[2];

// Weight mask function signature. Section 7.11.3.12.
// |prediction_0| is the first input block.
// |prediction_1| is the second input block. Both blocks are int16_t* when
// bitdepth == 8 and uint16_t* otherwise.
// |width| and |height| are the prediction width and height.
// The stride for the input buffers is equal to |width|.
// The valid range of block size is [8x8, 128x128] for the luma plane.
// |mask| is the output buffer. |mask_stride| is the output buffer stride.
// The pointer arguments do not alias one another.
using WeightMaskFunc = void (*)(const void* prediction_0,
                                const void* prediction_1, uint8_t* mask,
                                ptrdiff_t mask_stride);

// Weight mask functions signature. The dimensions (in order) are:
//   * Width index (4 => 0, 8 => 1, 16 => 2 and so on).
//   * Height index (4 => 0, 8 => 1, 16 => 2 and so on).
//   * mask_is_inverse.
using WeightMaskFuncs = WeightMaskFunc[6][6][2];

// Average blending function signature.
// Two predictors are averaged to generate the output.
// Input predictor values are int16_t. Output type is uint8_t, with actual
// range of Pixel value.
// Average blending is in the bottom of Section 7.11.3.1 (COMPOUND_AVERAGE).
// |prediction_0| is the first input block.
// |prediction_1| is the second input block. Both blocks are int16_t* when
// bitdepth == 8 and uint16_t* otherwise.
// |width| and |height| are the same for the first and second input blocks.
// The stride for the input buffers is equal to |width|.
// The valid range of block size is [8x8, 128x128] for the luma plane.
// |dest| is the output buffer. |dest_stride| is the output buffer stride.
// The pointer arguments do not alias one another.
using AverageBlendFunc = void (*)(const void* prediction_0,
                                  const void* prediction_1, int width,
                                  int height, void* dest,
                                  ptrdiff_t dest_stride);

// Distance weighted blending function signature.
// Weights are generated in Section 7.11.3.15.
// Weighted blending is in the bottom of Section 7.11.3.1 (COMPOUND_DISTANCE).
// This function takes two blocks (inter frame prediction) and produces a
// weighted output.
// |prediction_0| is the first input block.
// |prediction_1| is the second input block. Both blocks are int16_t* when
// bitdepth == 8 and uint16_t* otherwise.
// |weight_0| is the weight for the first block. It is derived from the relative
// distance of the first reference frame and the current frame.
// |weight_1| is the weight for the second block. It is derived from the
// relative distance of the second reference frame and the current frame.
// |width| and |height| are the same for the first and second input blocks.
// The stride for the input buffers is equal to |width|.
// The valid range of block size is [8x8, 128x128] for the luma plane.
// |dest| is the output buffer. |dest_stride| is the output buffer stride.
// The pointer arguments do not alias one another.
using DistanceWeightedBlendFunc = void (*)(const void* prediction_0,
                                           const void* prediction_1,
                                           uint8_t weight_0, uint8_t weight_1,
                                           int width, int height, void* dest,
                                           ptrdiff_t dest_stride);

// Mask blending function signature. Section 7.11.3.14.
// This function takes two blocks and produces a blended output stored into the
// output block |dest|. The blending is a weighted average process, controlled
// by values of the mask.
// |prediction_0| is the first input block. When prediction mode is inter_intra
// (or wedge_inter_intra), this refers to the inter frame prediction. It is
// int16_t* when bitdepth == 8 and uint16_t* otherwise.
// The stride for |prediction_0| is equal to |width|.
// |prediction_1| is the second input block. When prediction mode is inter_intra
// (or wedge_inter_intra), this refers to the intra frame prediction and uses
// Pixel values. It is only used for intra frame prediction when bitdepth >= 10.
// It is int16_t* when bitdepth == 8 and uint16_t* otherwise.
// |prediction_stride_1| is the stride, given in units of [u]int16_t. When
// |is_inter_intra| is false (compound prediction) then |prediction_stride_1| is
// equal to |width|.
// |mask| is an integer array, whose value indicates the weight of the blending.
// |mask_stride| is corresponding stride.
// |width|, |height| are the same for both input blocks.
// If it's inter_intra (or wedge_inter_intra), the valid range of block size is
// [8x8, 32x32], no 4:1/1:4 blocks (Section 5.11.28). Otherwise (including
// difference weighted prediction and compound average prediction), the valid
// range is [8x8, 128x128].
// If there's subsampling, the corresponding width and height are halved for
// chroma planes.
// |is_inter_intra| stands for the prediction mode. If it is true, one of the
// prediction blocks is from intra prediction of current frame. Otherwise, two
// prediction blocks are both inter frame predictions.
// |is_wedge_inter_intra| indicates if the mask is for the wedge prediction.
// |dest| is the output block.
// |dest_stride| is the corresponding stride for dest.
// The pointer arguments do not alias one another.
using MaskBlendFunc = void (*)(const void* prediction_0,
                               const void* prediction_1,
                               ptrdiff_t prediction_stride_1,
                               const uint8_t* mask, ptrdiff_t mask_stride,
                               int width, int height, void* dest,
                               ptrdiff_t dest_stride);

// Mask blending functions signature. Each points to one function with
// a specific setting:
// MaskBlendFunc[subsampling_x + subsampling_y][is_inter_intra].
using MaskBlendFuncs = MaskBlendFunc[3][2];

// This function is similar to the MaskBlendFunc. It is only used when
// |is_inter_intra| is true and |bitdepth| == 8.
// |prediction_[01]| are Pixel values (uint8_t).
// |prediction_1| is also the output buffer.
// The pointer arguments do not alias one another.
using InterIntraMaskBlendFunc8bpp = void (*)(const uint8_t* prediction_0,
                                             uint8_t* prediction_1,
                                             ptrdiff_t prediction_stride_1,
                                             const uint8_t* mask,
                                             ptrdiff_t mask_stride, int width,
                                             int height);

// InterIntra8bpp mask blending functions signature. When is_wedge_inter_intra
// is false, the function at index 0 must be used. Otherwise, the function at
// index subsampling_x + subsampling_y must be used.
using InterIntraMaskBlendFuncs8bpp = InterIntraMaskBlendFunc8bpp[3];

// Obmc (overlapped block motion compensation) blending function signature.
// Section 7.11.3.10.
// This function takes two blocks and produces a blended output stored into the
// first input block. The blending is a weighted average process, controlled by
// values of the mask.
// Obmc is not a compound mode. It is different from other compound blending,
// in terms of precision. The current block is computed using convolution with
// clipping to the range of pixel values. Its above and left blocks are also
// clipped. Therefore obmc blending process doesn't need to clip the output.
// |prediction| is the first input block, which will be overwritten.
// |prediction_stride| is the stride, given in bytes.
// |width|, |height| are the same for both input blocks. The range is [4x2,
// 32x32] for kObmcDirectionVertical and [2x4, 32x32] for
// kObmcDirectionHorizontal, see Section 7.11.3.9.
// |obmc_prediction| is the second input block.
// |obmc_prediction_stride| is its stride, given in bytes.
// The pointer arguments do not alias one another.
using ObmcBlendFunc = void (*)(void* prediction, ptrdiff_t prediction_stride,
                               int width, int height,
                               const void* obmc_prediction,
                               ptrdiff_t obmc_prediction_stride);
using ObmcBlendFuncs = ObmcBlendFunc[kNumObmcDirections];

// Warp function signature. Section 7.11.3.5.
// This function applies warp filtering for each 8x8 block inside the current
// coding block. The filtering process is similar to 2d convolve filtering.
// The horizontal filter is applied followed by the vertical filter.
// The function has to calculate corresponding pixel positions before and
// after warping.
// |source| is the input reference frame buffer.
// |source_stride|, |source_width|, |source_height| are corresponding frame
// stride, width, and height. |source_stride| is given in bytes.
// |warp_params| is the matrix of warp motion: warp_params[i] = mN.
//         [x'     (m2 m3 m0   [x
//     z .  y'  =   m4 m5 m1 *  y
//          1]      m6 m7 1)    1]
// |subsampling_x/y| is the current frame's plane subsampling factor.
// |block_start_x| and |block_start_y| are the starting position the current
// coding block.
// |block_width| and |block_height| are width and height of the current coding
// block. |block_width| and |block_height| are at least 8.
// |alpha|, |beta|, |gamma|, |delta| are valid warp parameters. See the
// comments in the definition of struct GlobalMotion for the range of their
// values.
// |dest| is the output buffer of type Pixel. The output values are clipped to
// Pixel values.
// |dest_stride| is the stride, in units of bytes.
// Rounding precision is derived from the function being called. For horizontal
// filtering kInterRoundBitsHorizontal & kInterRoundBitsHorizontal12bpp will be
// used. For vertical filtering kInterRoundBitsVertical &
// kInterRoundBitsVertical12bpp will be used.
//
// NOTE: WarpFunc assumes the source frame has left, right, top, and bottom
// borders that extend the frame boundary pixels.
// * The left and right borders must be at least 13 pixels wide. In addition,
//   Warp_NEON() may read up to 14 bytes after a row in the |source| buffer.
//   Therefore, there must be at least one extra padding byte after the right
//   border of the last row in the source buffer.
// * The top and bottom borders must be at least 13 pixels high.
// The pointer arguments do not alias one another.
using WarpFunc = void (*)(const void* source, ptrdiff_t source_stride,
                          int source_width, int source_height,
                          const int* warp_params, int subsampling_x,
                          int subsampling_y, int block_start_x,
                          int block_start_y, int block_width, int block_height,
                          int16_t alpha, int16_t beta, int16_t gamma,
                          int16_t delta, void* dest, ptrdiff_t dest_stride);

// Warp for compound predictions. Section 7.11.3.5.
// Similar to WarpFunc, but |dest| is a uint16_t predictor buffer,
// |dest_stride| is given in units of uint16_t and |inter_round_bits_vertical|
// is always 7 (kCompoundInterRoundBitsVertical).
// Rounding precision is derived from the function being called. For horizontal
// filtering kInterRoundBitsHorizontal & kInterRoundBitsHorizontal12bpp will be
// used. For vertical filtering kInterRoundBitsCompondVertical will be used.
using WarpCompoundFunc = WarpFunc;

constexpr int kNumAutoRegressionLags = 4;
// Applies an auto-regressive filter to the white noise in |luma_grain_buffer|.
// Section 7.18.3.3, second code block
// |params| are parameters read from frame header, mainly providing
// auto_regression_coeff_y for the filter and auto_regression_shift to right
// shift the filter sum by. Note: This method assumes
// params.auto_regression_coeff_lag is not 0. Do not call this method if
// params.auto_regression_coeff_lag is 0.
using LumaAutoRegressionFunc = void (*)(const FilmGrainParams& params,
                                        void* luma_grain_buffer);
// Function index is auto_regression_coeff_lag - 1.
using LumaAutoRegressionFuncs =
    LumaAutoRegressionFunc[kNumAutoRegressionLags - 1];

// Applies an auto-regressive filter to the white noise in u_grain and v_grain.
// Section 7.18.3.3, third code block
// The |luma_grain_buffer| provides samples that are added to the autoregressive
// sum when num_y_points > 0.
// |u_grain_buffer| and |v_grain_buffer| point to the buffers of chroma noise
// that were generated from the stored Gaussian sequence, and are overwritten
// with the results of the autoregressive filter. |params| are parameters read
// from frame header, mainly providing auto_regression_coeff_u and
// auto_regression_coeff_v for each chroma plane's filter, and
// auto_regression_shift to right shift the filter sums by.
// The pointer arguments do not alias one another.
using ChromaAutoRegressionFunc = void (*)(const FilmGrainParams& params,
                                          const void* luma_grain_buffer,
                                          int subsampling_x, int subsampling_y,
                                          void* u_grain_buffer,
                                          void* v_grain_buffer);
using ChromaAutoRegressionFuncs =
    ChromaAutoRegressionFunc[/*use_luma*/ 2][kNumAutoRegressionLags];

// Build an image-wide "stripe" of grain noise for every 32 rows in the image.
// Section 7.18.3.5, first code block.
// Each 32x32 luma block is copied at a random offset specified via
// |grain_seed| from the grain template produced by autoregression, and the same
// is done for chroma grains, subject to subsampling.
// |width| and |height| are the dimensions of the overall image.
// |noise_stripes_buffer| points to an Array2DView with one row for each stripe.
// Because this function treats all planes identically and independently, it is
// simplified to take one grain buffer at a time. This means duplicating some
// random number generations, but that work can be reduced in other ways.
// The pointer arguments do not alias one another.
using ConstructNoiseStripesFunc = void (*)(const void* grain_buffer,
                                           int grain_seed, int width,
                                           int height, int subsampling_x,
                                           int subsampling_y,
                                           void* noise_stripes_buffer);
using ConstructNoiseStripesFuncs =
    ConstructNoiseStripesFunc[/*overlap_flag*/ 2];

// Compute the one or two overlap rows for each stripe copied to the noise
// image.
// Section 7.18.3.5, second code block. |width| and |height| are the
// dimensions of the overall image. |noise_stripes_buffer| points to an
// Array2DView with one row for each stripe. |noise_image_buffer| points to an
// Array2D containing the allocated plane for this frame. Because this function
// treats all planes identically and independently, it is simplified to take one
// grain buffer at a time.
// The pointer arguments do not alias one another.
using ConstructNoiseImageOverlapFunc =
    void (*)(const void* noise_stripes_buffer, int width, int height,
             int subsampling_x, int subsampling_y, void* noise_image_buffer);

// Populate a scaling lookup table with interpolated values of a piecewise
// linear function where values in |point_value| are mapped to the values in
// |point_scaling|.
// |num_points| can be between 0 and 15. When 0, the lookup table is set to
// zero.
// |point_value| and |point_scaling| have |num_points| valid elements.
// The pointer arguments do not alias one another.
using InitializeScalingLutFunc = void (*)(int num_points,
                                          const uint8_t point_value[],
                                          const uint8_t point_scaling[],
                                          int16_t* scaling_lut,
                                          const int scaling_lut_length);

// Blend noise with image. Section 7.18.3.5, third code block.
// |width| is the width of each row, while |height| is how many rows to compute.
// |start_height| is an offset for the noise image, to support multithreading.
// |min_value|, |max_luma|, and |max_chroma| are computed by the caller of these
// functions, according to the code in the spec.
// |source_plane_y| and |source_plane_uv| are the plane buffers of the decoded
// frame. They are blended with the film grain noise and written to
// |dest_plane_y| and |dest_plane_uv| as final output for display.
// source_plane_* and dest_plane_* may point to the same buffer, in which case
// the film grain noise is added in place.
// |scaling_lut_y|  and |scaling_lut| represent a piecewise linear mapping from
// the frame's raw pixel value, to a scaling factor for the noise sample.
// |scaling_shift| is applied as a right shift after scaling, so that scaling
// down is possible. It is found in FilmGrainParams, but supplied directly to
// BlendNoiseWithImageLumaFunc because it's the only member used.
// The dest plane may point to the source plane, depending on the value of
// frame_header.show_existing_frame. |noise_image_ptr| and scaling_lut.* do not
// alias other arguments.
using BlendNoiseWithImageLumaFunc = void (*)(
    const void* noise_image_ptr, int min_value, int max_value,
    int scaling_shift, int width, int height, int start_height,
    const int16_t* scaling_lut_y, const void* source_plane_y,
    ptrdiff_t source_stride_y, void* dest_plane_y, ptrdiff_t dest_stride_y);

using BlendNoiseWithImageChromaFunc = void (*)(
    Plane plane, const FilmGrainParams& params, const void* noise_image_ptr,
    int min_value, int max_value, int width, int height, int start_height,
    int subsampling_x, int subsampling_y, const int16_t* scaling_lut,
    const void* source_plane_y, ptrdiff_t source_stride_y,
    const void* source_plane_uv, ptrdiff_t source_stride_uv,
    void* dest_plane_uv, ptrdiff_t dest_stride_uv);

using BlendNoiseWithImageChromaFuncs =
    BlendNoiseWithImageChromaFunc[/*chroma_scaling_from_luma*/ 2];

//------------------------------------------------------------------------------

struct FilmGrainFuncs {
  LumaAutoRegressionFuncs luma_auto_regression;
  ChromaAutoRegressionFuncs chroma_auto_regression;
  ConstructNoiseStripesFuncs construct_noise_stripes;
  ConstructNoiseImageOverlapFunc construct_noise_image_overlap;
  InitializeScalingLutFunc initialize_scaling_lut;
  BlendNoiseWithImageLumaFunc blend_noise_luma;
  BlendNoiseWithImageChromaFuncs blend_noise_chroma;
};

// Motion field projection function signature. Section 7.9.
// |reference_info| provides reference information for motion field projection.
// |reference_to_current_with_sign| is the precalculated reference frame id
// distance from current frame.
// |dst_sign| is -1 for LAST_FRAME and LAST2_FRAME, or 0 (1 in spec) for others.
// |y8_start| and |y8_end| are the start and end 8x8 rows of the current tile.
// |x8_start| and |x8_end| are the start and end 8x8 columns of the current
// tile.
// |motion_field| is the output which saves the projected motion field
// information.
// Note: Only the entry from the 8-bit Dsp table is used as this function is
// bitdepth agnostic.
using MotionFieldProjectionKernelFunc = void (*)(
    const ReferenceInfo& reference_info, int reference_to_current_with_sign,
    int dst_sign, int y8_start, int y8_end, int x8_start, int x8_end,
    TemporalMotionField* motion_field);

// Compound temporal motion vector projection function signature.
// Section 7.9.3 and 7.10.2.10.
// |temporal_mvs| is the aligned set of temporal reference motion vectors.
// |temporal_reference_offsets| specifies the number of frames covered by the
// original motion vector.
// |reference_offsets| specifies the number of frames to be covered by the
// projected motion vector.
// |count| is the number of the temporal motion vectors.
// |candidate_mvs| is the aligned set of projected motion vectors.
// The pointer arguments do not alias one another.
// Note: Only the entry from the 8-bit Dsp table is used as this function is
// bitdepth agnostic.
using MvProjectionCompoundFunc = void (*)(
    const MotionVector* temporal_mvs, const int8_t* temporal_reference_offsets,
    const int reference_offsets[2], int count,
    CompoundMotionVector* candidate_mvs);

// Single temporal motion vector projection function signature.
// Section 7.9.3 and 7.10.2.10.
// |temporal_mvs| is the aligned set of temporal reference motion vectors.
// |temporal_reference_offsets| specifies the number of frames covered by the
// original motion vector.
// |reference_offset| specifies the number of frames to be covered by the
// projected motion vector.
// |count| is the number of the temporal motion vectors.
// |candidate_mvs| is the aligned set of projected motion vectors.
// The pointer arguments do not alias one another.
// Note: Only the entry from the 8-bit Dsp table is used as this function is
// bitdepth agnostic.
using MvProjectionSingleFunc = void (*)(
    const MotionVector* temporal_mvs, const int8_t* temporal_reference_offsets,
    int reference_offset, int count, MotionVector* candidate_mvs);

struct Dsp {
  AverageBlendFunc average_blend;
  CdefDirectionFunc cdef_direction;
  CdefFilteringFuncs cdef_filters;
  CflIntraPredictorFuncs cfl_intra_predictors;
  CflSubsamplerFuncs cfl_subsamplers;
  ConvolveFuncs convolve;
  ConvolveScaleFuncs convolve_scale;
  DirectionalIntraPredictorZone1Func directional_intra_predictor_zone1;
  DirectionalIntraPredictorZone2Func directional_intra_predictor_zone2;
  DirectionalIntraPredictorZone3Func directional_intra_predictor_zone3;
  DistanceWeightedBlendFunc distance_weighted_blend;
  FilmGrainFuncs film_grain;
  FilterIntraPredictorFunc filter_intra_predictor;
  InterIntraMaskBlendFuncs8bpp inter_intra_mask_blend_8bpp;
  IntraEdgeFilterFunc intra_edge_filter;
  IntraEdgeUpsamplerFunc intra_edge_upsampler;
  IntraPredictorFuncs intra_predictors;
  InverseTransformAddFuncs inverse_transforms;
  LoopFilterFuncs loop_filters;
  LoopRestorationFuncs loop_restorations;
  MaskBlendFuncs mask_blend;
  MotionFieldProjectionKernelFunc motion_field_projection_kernel;
  MvProjectionCompoundFunc mv_projection_compound[3];
  MvProjectionSingleFunc mv_projection_single[3];
  ObmcBlendFuncs obmc_blend;
  SuperResCoefficientsFunc super_res_coefficients;
  SuperResFunc super_res;
  WarpCompoundFunc warp_compound;
  WarpFunc warp;
  WeightMaskFuncs weight_mask;
};

// Initializes function pointers based on build config and runtime
// environment. Must be called once before first use. This function is
// thread-safe.
void DspInit();

// Returns the appropriate Dsp table for |bitdepth| or nullptr if one doesn't
// exist.
const Dsp* GetDspTable(int bitdepth);

}  // namespace dsp

namespace dsp_internal {

// Visual Studio builds don't have a way to detect SSE4_1. Only exclude the C
// functions if /arch:AVX2 is used across all sources.
#if !LIBGAV1_TARGETING_AVX2 && \
    (defined(_MSC_VER) || (defined(_M_IX86) || defined(_M_X64)))
#undef LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS
#define LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS 1
#endif

// Returns true if a more highly optimized version of |func| is not defined for
// the associated bitdepth or if it is forcibly enabled with
// LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS. The define checked for |func| corresponds
// to the LIBGAV1_Dsp<bitdepth>bpp_|func| define in the header file associated
// with the module.
// |func| is one of:
//   - FunctionName, e.g., SelfGuidedFilter.
//   - [sub-table-index1][...-indexN] e.g.,
//     TransformSize4x4_IntraPredictorDc. The indices correspond to enum values
//     used as lookups with leading 'k' removed.
//
//  NEON support is the only extension available for ARM and it is always
//  required. Because of this restriction DSP_ENABLED_8BPP_NEON(func) is always
//  true and can be omitted.
#define DSP_ENABLED_8BPP_AVX2(func)    \
  (LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS || \
   LIBGAV1_Dsp8bpp_##func == LIBGAV1_CPU_AVX2)
#define DSP_ENABLED_10BPP_AVX2(func)   \
  (LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS || \
   LIBGAV1_Dsp10bpp_##func == LIBGAV1_CPU_AVX2)
#define DSP_ENABLED_8BPP_SSE4_1(func)  \
  (LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS || \
   LIBGAV1_Dsp8bpp_##func == LIBGAV1_CPU_SSE4_1)
#define DSP_ENABLED_10BPP_SSE4_1(func) \
  (LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS || \
   LIBGAV1_Dsp10bpp_##func == LIBGAV1_CPU_SSE4_1)

// Initializes C-only function pointers. Note some entries may be set to
// nullptr if LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS is not defined. This is meant
// for use in tests only, it is not thread-safe.
void DspInit_C();

// Returns the appropriate Dsp table for |bitdepth| or nullptr if one doesn't
// exist. This version is meant for use by test or dsp/*Init() functions only.
dsp::Dsp* GetWritableDspTable(int bitdepth);

}  // namespace dsp_internal
}  // namespace libgav1

#endif  // LIBGAV1_SRC_DSP_DSP_H_