diff options
Diffstat (limited to 'src/dsp/dsp.h')
-rw-r--r-- | src/dsp/dsp.h | 910 |
1 files changed, 910 insertions, 0 deletions
diff --git a/src/dsp/dsp.h b/src/dsp/dsp.h new file mode 100644 index 0000000..fcbac3a --- /dev/null +++ b/src/dsp/dsp.h @@ -0,0 +1,910 @@ +/* + * Copyright 2019 The libgav1 Authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LIBGAV1_SRC_DSP_DSP_H_ +#define LIBGAV1_SRC_DSP_DSP_H_ + +#include <cstddef> // ptrdiff_t +#include <cstdint> +#include <cstdlib> + +#include "src/dsp/common.h" +#include "src/dsp/constants.h" +#include "src/dsp/film_grain_common.h" +#include "src/utils/cpu.h" +#include "src/utils/reference_info.h" +#include "src/utils/types.h" + +namespace libgav1 { +namespace dsp { + +#if !defined(LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS) +#define LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS 0 +#endif + +enum IntraPredictor : uint8_t { + kIntraPredictorDcFill, + kIntraPredictorDcTop, + kIntraPredictorDcLeft, + kIntraPredictorDc, + kIntraPredictorVertical, + kIntraPredictorHorizontal, + kIntraPredictorPaeth, + kIntraPredictorSmooth, + kIntraPredictorSmoothVertical, + kIntraPredictorSmoothHorizontal, + kNumIntraPredictors +}; + +// List of valid 1D transforms. +enum Transform1D : uint8_t { + k1DTransformDct, // Discrete Cosine Transform. + k1DTransformAdst, // Asymmetric Discrete Sine Transform. + k1DTransformIdentity, + k1DTransformWht, // Walsh Hadamard Transform. + kNum1DTransforms +}; + +// List of valid 1D transform sizes. Not all transforms may be available for all +// the sizes. +enum TransformSize1D : uint8_t { + k1DTransformSize4, + k1DTransformSize8, + k1DTransformSize16, + k1DTransformSize32, + k1DTransformSize64, + kNum1DTransformSizes +}; + +// The maximum width of the loop filter, fewer pixels may be filtered depending +// on strength thresholds. +enum LoopFilterSize : uint8_t { + kLoopFilterSize4, + kLoopFilterSize6, + kLoopFilterSize8, + kLoopFilterSize14, + kNumLoopFilterSizes +}; + +enum : uint8_t { + kRow = 0, + kColumn = 1, +}; + +//------------------------------------------------------------------------------ +// ToString() +// +// These functions are meant to be used only in debug logging and within tests. +// They are defined inline to avoid including the strings in the release +// library when logging is disabled; unreferenced functions will not be added to +// any object file in that case. + +inline const char* ToString(const IntraPredictor predictor) { + switch (predictor) { + case kIntraPredictorDcFill: + return "kIntraPredictorDcFill"; + case kIntraPredictorDcTop: + return "kIntraPredictorDcTop"; + case kIntraPredictorDcLeft: + return "kIntraPredictorDcLeft"; + case kIntraPredictorDc: + return "kIntraPredictorDc"; + case kIntraPredictorVertical: + return "kIntraPredictorVertical"; + case kIntraPredictorHorizontal: + return "kIntraPredictorHorizontal"; + case kIntraPredictorPaeth: + return "kIntraPredictorPaeth"; + case kIntraPredictorSmooth: + return "kIntraPredictorSmooth"; + case kIntraPredictorSmoothVertical: + return "kIntraPredictorSmoothVertical"; + case kIntraPredictorSmoothHorizontal: + return "kIntraPredictorSmoothHorizontal"; + case kNumIntraPredictors: + return "kNumIntraPredictors"; + } + abort(); +} + +inline const char* ToString(const Transform1D transform) { + switch (transform) { + case k1DTransformDct: + return "k1DTransformDct"; + case k1DTransformAdst: + return "k1DTransformAdst"; + case k1DTransformIdentity: + return "k1DTransformIdentity"; + case k1DTransformWht: + return "k1DTransformWht"; + case kNum1DTransforms: + return "kNum1DTransforms"; + } + abort(); +} + +inline const char* ToString(const TransformSize1D transform_size) { + switch (transform_size) { + case k1DTransformSize4: + return "k1DTransformSize4"; + case k1DTransformSize8: + return "k1DTransformSize8"; + case k1DTransformSize16: + return "k1DTransformSize16"; + case k1DTransformSize32: + return "k1DTransformSize32"; + case k1DTransformSize64: + return "k1DTransformSize64"; + case kNum1DTransformSizes: + return "kNum1DTransformSizes"; + } + abort(); +} + +inline const char* ToString(const LoopFilterSize filter_size) { + switch (filter_size) { + case kLoopFilterSize4: + return "kLoopFilterSize4"; + case kLoopFilterSize6: + return "kLoopFilterSize6"; + case kLoopFilterSize8: + return "kLoopFilterSize8"; + case kLoopFilterSize14: + return "kLoopFilterSize14"; + case kNumLoopFilterSizes: + return "kNumLoopFilterSizes"; + } + abort(); +} + +inline const char* ToString(const LoopFilterType filter_type) { + switch (filter_type) { + case kLoopFilterTypeVertical: + return "kLoopFilterTypeVertical"; + case kLoopFilterTypeHorizontal: + return "kLoopFilterTypeHorizontal"; + case kNumLoopFilterTypes: + return "kNumLoopFilterTypes"; + } + abort(); +} + +//------------------------------------------------------------------------------ +// Intra predictors. Section 7.11.2. +// These require access to one or both of the top row and left column. Some may +// access the top-left (top[-1]), top-right (top[width+N]), bottom-left +// (left[height+N]) or upper-left (left[-1]). + +// Intra predictor function signature. Sections 7.11.2.2, 7.11.2.4 (#10,#11), +// 7.11.2.5, 7.11.2.6. +// |dst| is an unaligned pointer to the output block. Pixel size is determined +// by bitdepth with |stride| given in bytes. |top| is an unaligned pointer to +// the row above |dst|. |left| is an aligned vector of the column to the left +// of |dst|. top-left and bottom-left may be accessed. +using IntraPredictorFunc = void (*)(void* dst, ptrdiff_t stride, + const void* top, const void* left); +using IntraPredictorFuncs = + IntraPredictorFunc[kNumTransformSizes][kNumIntraPredictors]; + +// Directional intra predictor function signature, zone 1 (0 < angle < 90). +// Section 7.11.2.4 (#7). +// |dst| is an unaligned pointer to the output block. Pixel size is determined +// by bitdepth with |stride| given in bytes. |top| is an unaligned pointer to +// the row above |dst|. |width| and |height| give the dimensions of the block. +// |xstep| is the scaled starting index to |top| from +// kDirectionalIntraPredictorDerivative. |upsampled_top| indicates whether +// |top| has been upsampled as described in '7.11.2.11. Intra edge upsample +// process'. This can occur in cases with |width| + |height| <= 16. top-right +// is accessed. +using DirectionalIntraPredictorZone1Func = void (*)(void* dst, ptrdiff_t stride, + const void* top, int width, + int height, int xstep, + bool upsampled_top); + +// Directional intra predictor function signature, zone 2 (90 < angle < 180). +// Section 7.11.2.4 (#8). +// |dst| is an unaligned pointer to the output block. Pixel size is determined +// by bitdepth with |stride| given in bytes. |top| is an unaligned pointer to +// the row above |dst|. |left| is an aligned vector of the column to the left of +// |dst|. |width| and |height| give the dimensions of the block. |xstep| and +// |ystep| are the scaled starting index to |top| and |left|, respectively, +// from kDirectionalIntraPredictorDerivative. |upsampled_top| and +// |upsampled_left| indicate whether |top| and |left| have been upsampled as +// described in '7.11.2.11. Intra edge upsample process'. This can occur in +// cases with |width| + |height| <= 16. top-left and upper-left are accessed, +// up to [-2] in each if |upsampled_top/left| are set. +using DirectionalIntraPredictorZone2Func = void (*)( + void* dst, ptrdiff_t stride, const void* top, const void* left, int width, + int height, int xstep, int ystep, bool upsampled_top, bool upsampled_left); + +// Directional intra predictor function signature, zone 3 (180 < angle < 270). +// Section 7.11.2.4 (#9). +// |dst| is an unaligned pointer to the output block. Pixel size is determined +// by bitdepth with |stride| given in bytes. |left| is an aligned vector of the +// column to the left of |dst|. |width| and |height| give the dimensions of the +// block. |ystep| is the scaled starting index to |left| from +// kDirectionalIntraPredictorDerivative. |upsampled_left| indicates whether +// |left| has been upsampled as described in '7.11.2.11. Intra edge upsample +// process'. This can occur in cases with |width| + |height| <= 16. bottom-left +// is accessed. +using DirectionalIntraPredictorZone3Func = void (*)(void* dst, ptrdiff_t stride, + const void* left, int width, + int height, int ystep, + bool upsampled_left); + +// Filter intra predictor function signature. Section 7.11.2.3. +// |dst| is an unaligned pointer to the output block. Pixel size is determined +// by bitdepth with |stride| given in bytes. |top| is an unaligned pointer to +// the row above |dst|. |left| is an aligned vector of the column to the left +// of |dst|. |width| and |height| are the size of the block in pixels. +using FilterIntraPredictorFunc = void (*)(void* dst, ptrdiff_t stride, + const void* top, const void* left, + FilterIntraPredictor pred, int width, + int height); + +//------------------------------------------------------------------------------ +// Chroma from Luma (Cfl) prediction. Section 7.11.5. + +// Chroma from Luma (Cfl) intra prediction function signature. |dst| is an +// unaligned pointer to the output block. Pixel size is determined by bitdepth +// with |stride| given in bytes. |luma| contains subsampled luma pixels with 3 +// fractional bits of precision. |alpha| is the signed Cfl alpha value for the +// appropriate plane. +using CflIntraPredictorFunc = void (*)( + void* dst, ptrdiff_t stride, + const int16_t luma[kCflLumaBufferStride][kCflLumaBufferStride], int alpha); +using CflIntraPredictorFuncs = CflIntraPredictorFunc[kNumTransformSizes]; + +// Chroma from Luma (Cfl) subsampler function signature. |luma| is an unaligned +// pointer to the output block. |src| is an unaligned pointer to the input +// block. Pixel size is determined by bitdepth with |stride| given in bytes. +using CflSubsamplerFunc = + void (*)(int16_t luma[kCflLumaBufferStride][kCflLumaBufferStride], + int max_luma_width, int max_luma_height, const void* source, + ptrdiff_t stride); +using CflSubsamplerFuncs = + CflSubsamplerFunc[kNumTransformSizes][kNumSubsamplingTypes]; + +//------------------------------------------------------------------------------ +// Intra Edge Filtering and Upsampling. Step 4 in section 7.11.2.4. + +// Intra edge filter function signature. |buffer| is a pointer to the top_row or +// left_column that needs to be filtered. Typically the -1'th index of |top_row| +// and |left_column| need to be filtered as well, so the caller can merely pass +// the |buffer| as top_row[-1] or left_column[-1]. Pixel size is determined by +// bitdepth. |size| is the number of pixels to be filtered. |strength| is the +// filter strength. Section 7.11.2.12 in the spec. +using IntraEdgeFilterFunc = void (*)(void* buffer, int size, int strength); + +// Intra edge upsampler function signature. |buffer| is a pointer to the top_row +// or left_column that needs to be upsampled. Pixel size is determined by +// bitdepth. |size| is the number of pixels to be upsampled; valid values are: +// 4, 8, 12, 16. This function needs access to negative indices -1 and -2 of +// the |buffer|. Section 7.11.2.11 in the spec. +using IntraEdgeUpsamplerFunc = void (*)(void* buffer, int size); + +//------------------------------------------------------------------------------ +// Inverse transform add function signature. +// +// Steps 2 and 3 of section 7.12.3 (contains the implementation of section +// 7.13.3). +// Apply the inverse transforms and add the residual to the destination frame +// for the transform type and block size |tx_size| starting at position +// |start_x| and |start_y|. |dst_frame| is a pointer to an Array2D. +// |adjusted_tx_height| is the number of rows to process based on the non-zero +// coefficient count in the block. It will be 1 (non-zero coefficient count == +// 1), 4 or a multiple of 8 up to 32 or the original transform height, +// whichever is less. +using InverseTransformAddFunc = void (*)(TransformType tx_type, + TransformSize tx_size, + int adjusted_tx_height, + void* src_buffer, int start_x, + int start_y, void* dst_frame); +// The final dimension holds row and column transforms indexed with kRow and +// kColumn. +using InverseTransformAddFuncs = + InverseTransformAddFunc[kNum1DTransforms][kNum1DTransformSizes][2]; + +//------------------------------------------------------------------------------ +// Post processing. + +// Loop filter function signature. Section 7.14. +// |dst| is an unaligned pointer to the output block. Pixel size is determined +// by bitdepth with |stride| given in bytes. +using LoopFilterFunc = void (*)(void* dst, ptrdiff_t stride, int outer_thresh, + int inner_thresh, int hev_thresh); +using LoopFilterFuncs = + LoopFilterFunc[kNumLoopFilterSizes][kNumLoopFilterTypes]; + +// Cdef direction function signature. Section 7.15.2. +// |src| is a pointer to the source block. Pixel size is determined by bitdepth +// with |stride| given in bytes. |direction| and |variance| are output +// parameters and must not be nullptr. +using CdefDirectionFunc = void (*)(const void* src, ptrdiff_t stride, + uint8_t* direction, int* variance); + +// Cdef filtering function signature. Section 7.15.3. +// |source| is a pointer to the input block padded with kCdefLargeValue if at a +// frame border. |source_stride| is given in units of uint16_t. +// |block_width|, |block_height| are the width/height of the input block. +// |primary_strength|, |secondary_strength|, and |damping| are Cdef filtering +// parameters. +// |direction| is the filtering direction. +// |dest| is the output buffer. |dest_stride| is given in bytes. +using CdefFilteringFunc = void (*)(const uint16_t* source, + ptrdiff_t source_stride, int block_height, + int primary_strength, int secondary_strength, + int damping, int direction, void* dest, + ptrdiff_t dest_stride); + +// The first index is block width: [0]: 4, [1]: 8. The second is based on +// non-zero strengths: [0]: |primary_strength| and |secondary_strength|, [1]: +// |primary_strength| only, [2]: |secondary_strength| only. +using CdefFilteringFuncs = CdefFilteringFunc[2][3]; + +// Upscaling coefficients function signature. Section 7.16. +// This is an auxiliary function for SIMD optimizations and has no corresponding +// C function. Different SIMD versions may have different outputs. So it must +// pair with the corresponding version of SuperResFunc. +// |upscaled_width| is the width of the output frame. +// |step| is the number of subpixels to move the kernel for the next destination +// pixel. +// |initial_subpixel_x| is a base offset from which |step| increments. +// |coefficients| is the upscale filter used by each pixel in a row. +using SuperResCoefficientsFunc = void (*)(int upscaled_width, + int initial_subpixel_x, int step, + void* coefficients); + +// Upscaling process function signature. Section 7.16. +// |coefficients| is the upscale filter used by each pixel in a row. It is not +// used by the C function. +// |source| is the input frame buffer. It will be line extended. +// |dest| is the output buffer. +// |stride| is given in pixels, and shared by |source| and |dest|. +// |height| is the height of the block to be processed. +// |downscaled_width| is the width of the input frame. +// |upscaled_width| is the width of the output frame. +// |step| is the number of subpixels to move the kernel for the next destination +// pixel. +// |initial_subpixel_x| is a base offset from which |step| increments. +using SuperResFunc = void (*)(const void* coefficients, void* source, + ptrdiff_t stride, int height, + int downscaled_width, int upscaled_width, + int initial_subpixel_x, int step, void* dest); + +// Loop restoration function signature. Sections 7.16, 7.17. +// |restoration_info| contains loop restoration information, such as filter +// type, strength. +// |source| is the input frame buffer, which is deblocked and cdef filtered. +// |top_border| and |bottom_border| are the top and bottom borders. +// |dest| is the output. +// |stride| is given in pixels, and shared by |source|, |top_border|, +// |bottom_border| and |dest|. +// |restoration_buffer| contains buffers required for self guided filter and +// wiener filter. They must be initialized before calling. +using LoopRestorationFunc = void (*)( + const RestorationUnitInfo& restoration_info, const void* source, + const void* top_border, const void* bottom_border, ptrdiff_t stride, + int width, int height, RestorationBuffer* restoration_buffer, void* dest); + +// Index 0 is Wiener Filter. +// Index 1 is Self Guided Restoration Filter. +// This can be accessed as LoopRestorationType - 2. +using LoopRestorationFuncs = LoopRestorationFunc[2]; + +// Convolve function signature. Section 7.11.3.4. +// This function applies a horizontal filter followed by a vertical filter. +// |reference| is the input block (reference frame buffer). |reference_stride| +// is the corresponding frame stride. +// |vertical_filter_index|/|horizontal_filter_index| is the index to +// retrieve the type of filter to be applied for vertical/horizontal direction +// from the filter lookup table 'kSubPixelFilters'. +// |horizontal_filter_id| and |vertical_filter_id| are the filter ids. +// |width| and |height| are width and height of the block to be filtered. +// |ref_last_x| and |ref_last_y| are the last pixel of the reference frame in +// x/y direction. +// |prediction| is the output block (output frame buffer). +// Rounding precision is derived from the function being called. For horizontal +// filtering kInterRoundBitsHorizontal & kInterRoundBitsHorizontal12bpp will be +// used. For compound vertical filtering kInterRoundBitsCompoundVertical will be +// used. Otherwise kInterRoundBitsVertical & kInterRoundBitsVertical12bpp will +// be used. +using ConvolveFunc = void (*)(const void* reference, ptrdiff_t reference_stride, + int horizontal_filter_index, + int vertical_filter_index, + int horizontal_filter_id, int vertical_filter_id, + int width, int height, void* prediction, + ptrdiff_t pred_stride); + +// Convolve functions signature. Each points to one convolve function with +// a specific setting: +// ConvolveFunc[is_intra_block_copy][is_compound][has_vertical_filter] +// [has_horizontal_filter]. +// If is_compound is false, the prediction is clipped to Pixel. +// If is_compound is true, the range of prediction is: +// 8bpp: [-5132, 9212] (int16_t) +// 10bpp: [ 3988, 61532] (uint16_t) +// 12bpp: [ 3974, 61559] (uint16_t) +// See src/dsp/convolve.cc +using ConvolveFuncs = ConvolveFunc[2][2][2][2]; + +// Convolve + scale function signature. Section 7.11.3.4. +// This function applies a horizontal filter followed by a vertical filter. +// |reference| is the input block (reference frame buffer). |reference_stride| +// is the corresponding frame stride. +// |vertical_filter_index|/|horizontal_filter_index| is the index to +// retrieve the type of filter to be applied for vertical/horizontal direction +// from the filter lookup table 'kSubPixelFilters'. +// |subpixel_x| and |subpixel_y| are starting positions in units of 1/1024. +// |step_x| and |step_y| are step sizes in units of 1/1024 of a pixel. +// |width| and |height| are width and height of the block to be filtered. +// |ref_last_x| and |ref_last_y| are the last pixel of the reference frame in +// x/y direction. +// |prediction| is the output block (output frame buffer). +// Rounding precision is derived from the function being called. For horizontal +// filtering kInterRoundBitsHorizontal & kInterRoundBitsHorizontal12bpp will be +// used. For compound vertical filtering kInterRoundBitsCompoundVertical will be +// used. Otherwise kInterRoundBitsVertical & kInterRoundBitsVertical12bpp will +// be used. +using ConvolveScaleFunc = void (*)(const void* reference, + ptrdiff_t reference_stride, + int horizontal_filter_index, + int vertical_filter_index, int subpixel_x, + int subpixel_y, int step_x, int step_y, + int width, int height, void* prediction, + ptrdiff_t pred_stride); + +// Convolve functions signature for scaling version. +// 0: single predictor. 1: compound predictor. +using ConvolveScaleFuncs = ConvolveScaleFunc[2]; + +// Weight mask function signature. Section 7.11.3.12. +// |prediction_0| is the first input block. +// |prediction_1| is the second input block. Both blocks are int16_t* when +// bitdepth == 8 and uint16_t* otherwise. +// |width| and |height| are the prediction width and height. +// The stride for the input buffers is equal to |width|. +// The valid range of block size is [8x8, 128x128] for the luma plane. +// |mask| is the output buffer. |mask_stride| is the output buffer stride. +using WeightMaskFunc = void (*)(const void* prediction_0, + const void* prediction_1, uint8_t* mask, + ptrdiff_t mask_stride); + +// Weight mask functions signature. The dimensions (in order) are: +// * Width index (4 => 0, 8 => 1, 16 => 2 and so on). +// * Height index (4 => 0, 8 => 1, 16 => 2 and so on). +// * mask_is_inverse. +using WeightMaskFuncs = WeightMaskFunc[6][6][2]; + +// Average blending function signature. +// Two predictors are averaged to generate the output. +// Input predictor values are int16_t. Output type is uint8_t, with actual +// range of Pixel value. +// Average blending is in the bottom of Section 7.11.3.1 (COMPOUND_AVERAGE). +// |prediction_0| is the first input block. +// |prediction_1| is the second input block. Both blocks are int16_t* when +// bitdepth == 8 and uint16_t* otherwise. +// |width| and |height| are the same for the first and second input blocks. +// The stride for the input buffers is equal to |width|. +// The valid range of block size is [8x8, 128x128] for the luma plane. +// |dest| is the output buffer. |dest_stride| is the output buffer stride. +using AverageBlendFunc = void (*)(const void* prediction_0, + const void* prediction_1, int width, + int height, void* dest, + ptrdiff_t dest_stride); + +// Distance weighted blending function signature. +// Weights are generated in Section 7.11.3.15. +// Weighted blending is in the bottom of Section 7.11.3.1 (COMPOUND_DISTANCE). +// This function takes two blocks (inter frame prediction) and produces a +// weighted output. +// |prediction_0| is the first input block. +// |prediction_1| is the second input block. Both blocks are int16_t* when +// bitdepth == 8 and uint16_t* otherwise. +// |weight_0| is the weight for the first block. It is derived from the relative +// distance of the first reference frame and the current frame. +// |weight_1| is the weight for the second block. It is derived from the +// relative distance of the second reference frame and the current frame. +// |width| and |height| are the same for the first and second input blocks. +// The stride for the input buffers is equal to |width|. +// The valid range of block size is [8x8, 128x128] for the luma plane. +// |dest| is the output buffer. |dest_stride| is the output buffer stride. +using DistanceWeightedBlendFunc = void (*)(const void* prediction_0, + const void* prediction_1, + uint8_t weight_0, uint8_t weight_1, + int width, int height, void* dest, + ptrdiff_t dest_stride); + +// Mask blending function signature. Section 7.11.3.14. +// This function takes two blocks and produces a blended output stored into the +// output block |dest|. The blending is a weighted average process, controlled +// by values of the mask. +// |prediction_0| is the first input block. When prediction mode is inter_intra +// (or wedge_inter_intra), this refers to the inter frame prediction. It is +// int16_t* when bitdepth == 8 and uint16_t* otherwise. +// The stride for |prediction_0| is equal to |width|. +// |prediction_1| is the second input block. When prediction mode is inter_intra +// (or wedge_inter_intra), this refers to the intra frame prediction and uses +// Pixel values. It is only used for intra frame prediction when bitdepth >= 10. +// It is int16_t* when bitdepth == 8 and uint16_t* otherwise. +// |prediction_stride_1| is the stride, given in units of [u]int16_t. When +// |is_inter_intra| is false (compound prediction) then |prediction_stride_1| is +// equal to |width|. +// |mask| is an integer array, whose value indicates the weight of the blending. +// |mask_stride| is corresponding stride. +// |width|, |height| are the same for both input blocks. +// If it's inter_intra (or wedge_inter_intra), the valid range of block size is +// [8x8, 32x32]. Otherwise (including difference weighted prediction and +// compound average prediction), the valid range is [8x8, 128x128]. +// If there's subsampling, the corresponding width and height are halved for +// chroma planes. +// |subsampling_x|, |subsampling_y| are the subsampling factors. +// |is_inter_intra| stands for the prediction mode. If it is true, one of the +// prediction blocks is from intra prediction of current frame. Otherwise, two +// prediction blocks are both inter frame predictions. +// |is_wedge_inter_intra| indicates if the mask is for the wedge prediction. +// |dest| is the output block. +// |dest_stride| is the corresponding stride for dest. +using MaskBlendFunc = void (*)(const void* prediction_0, + const void* prediction_1, + ptrdiff_t prediction_stride_1, + const uint8_t* mask, ptrdiff_t mask_stride, + int width, int height, void* dest, + ptrdiff_t dest_stride); + +// Mask blending functions signature. Each points to one function with +// a specific setting: +// MaskBlendFunc[subsampling_x + subsampling_y][is_inter_intra]. +using MaskBlendFuncs = MaskBlendFunc[3][2]; + +// This function is similar to the MaskBlendFunc. It is only used when +// |is_inter_intra| is true and |bitdepth| == 8. +// |prediction_[01]| are Pixel values (uint8_t). +// |prediction_1| is also the output buffer. +using InterIntraMaskBlendFunc8bpp = void (*)(const uint8_t* prediction_0, + uint8_t* prediction_1, + ptrdiff_t prediction_stride_1, + const uint8_t* mask, + ptrdiff_t mask_stride, int width, + int height); + +// InterIntra8bpp mask blending functions signature. When is_wedge_inter_intra +// is false, the function at index 0 must be used. Otherwise, the function at +// index subsampling_x + subsampling_y must be used. +using InterIntraMaskBlendFuncs8bpp = InterIntraMaskBlendFunc8bpp[3]; + +// Obmc (overlapped block motion compensation) blending function signature. +// Section 7.11.3.10. +// This function takes two blocks and produces a blended output stored into the +// first input block. The blending is a weighted average process, controlled by +// values of the mask. +// Obmc is not a compound mode. It is different from other compound blending, +// in terms of precision. The current block is computed using convolution with +// clipping to the range of pixel values. Its above and left blocks are also +// clipped. Therefore obmc blending process doesn't need to clip the output. +// |prediction| is the first input block, which will be overwritten. +// |prediction_stride| is the stride, given in bytes. +// |width|, |height| are the same for both input blocks. +// |obmc_prediction| is the second input block. +// |obmc_prediction_stride| is its stride, given in bytes. +using ObmcBlendFunc = void (*)(void* prediction, ptrdiff_t prediction_stride, + int width, int height, + const void* obmc_prediction, + ptrdiff_t obmc_prediction_stride); +using ObmcBlendFuncs = ObmcBlendFunc[kNumObmcDirections]; + +// Warp function signature. Section 7.11.3.5. +// This function applies warp filtering for each 8x8 block inside the current +// coding block. The filtering process is similar to 2d convolve filtering. +// The horizontal filter is applied followed by the vertical filter. +// The function has to calculate corresponding pixel positions before and +// after warping. +// |source| is the input reference frame buffer. +// |source_stride|, |source_width|, |source_height| are corresponding frame +// stride, width, and height. |source_stride| is given in bytes. +// |warp_params| is the matrix of warp motion: warp_params[i] = mN. +// [x' (m2 m3 m0 [x +// z . y' = m4 m5 m1 * y +// 1] m6 m7 1) 1] +// |subsampling_x/y| is the current frame's plane subsampling factor. +// |block_start_x| and |block_start_y| are the starting position the current +// coding block. +// |block_width| and |block_height| are width and height of the current coding +// block. |block_width| and |block_height| are at least 8. +// |alpha|, |beta|, |gamma|, |delta| are valid warp parameters. See the +// comments in the definition of struct GlobalMotion for the range of their +// values. +// |dest| is the output buffer of type Pixel. The output values are clipped to +// Pixel values. +// |dest_stride| is the stride, in units of bytes. +// Rounding precision is derived from the function being called. For horizontal +// filtering kInterRoundBitsHorizontal & kInterRoundBitsHorizontal12bpp will be +// used. For vertical filtering kInterRoundBitsVertical & +// kInterRoundBitsVertical12bpp will be used. +// +// NOTE: WarpFunc assumes the source frame has left, right, top, and bottom +// borders that extend the frame boundary pixels. +// * The left and right borders must be at least 13 pixels wide. In addition, +// Warp_NEON() may read up to 14 bytes after a row in the |source| buffer. +// Therefore, there must be at least one extra padding byte after the right +// border of the last row in the source buffer. +// * The top and bottom borders must be at least 13 pixels high. +using WarpFunc = void (*)(const void* source, ptrdiff_t source_stride, + int source_width, int source_height, + const int* warp_params, int subsampling_x, + int subsampling_y, int block_start_x, + int block_start_y, int block_width, int block_height, + int16_t alpha, int16_t beta, int16_t gamma, + int16_t delta, void* dest, ptrdiff_t dest_stride); + +// Warp for compound predictions. Section 7.11.3.5. +// Similar to WarpFunc, but |dest| is a uint16_t predictor buffer, +// |dest_stride| is given in units of uint16_t and |inter_round_bits_vertical| +// is always 7 (kCompoundInterRoundBitsVertical). +// Rounding precision is derived from the function being called. For horizontal +// filtering kInterRoundBitsHorizontal & kInterRoundBitsHorizontal12bpp will be +// used. For vertical filtering kInterRoundBitsCompondVertical will be used. +using WarpCompoundFunc = WarpFunc; + +constexpr int kNumAutoRegressionLags = 4; +// Applies an auto-regressive filter to the white noise in |luma_grain_buffer|. +// Section 7.18.3.3, second code block +// |params| are parameters read from frame header, mainly providing +// auto_regression_coeff_y for the filter and auto_regression_shift to right +// shift the filter sum by. Note: This method assumes +// params.auto_regression_coeff_lag is not 0. Do not call this method if +// params.auto_regression_coeff_lag is 0. +using LumaAutoRegressionFunc = void (*)(const FilmGrainParams& params, + void* luma_grain_buffer); +// Function index is auto_regression_coeff_lag - 1. +using LumaAutoRegressionFuncs = + LumaAutoRegressionFunc[kNumAutoRegressionLags - 1]; + +// Applies an auto-regressive filter to the white noise in u_grain and v_grain. +// Section 7.18.3.3, third code block +// The |luma_grain_buffer| provides samples that are added to the autoregressive +// sum when num_y_points > 0. +// |u_grain_buffer| and |v_grain_buffer| point to the buffers of chroma noise +// that were generated from the stored Gaussian sequence, and are overwritten +// with the results of the autoregressive filter. |params| are parameters read +// from frame header, mainly providing auto_regression_coeff_u and +// auto_regression_coeff_v for each chroma plane's filter, and +// auto_regression_shift to right shift the filter sums by. +using ChromaAutoRegressionFunc = void (*)(const FilmGrainParams& params, + const void* luma_grain_buffer, + int subsampling_x, int subsampling_y, + void* u_grain_buffer, + void* v_grain_buffer); +using ChromaAutoRegressionFuncs = + ChromaAutoRegressionFunc[/*use_luma*/ 2][kNumAutoRegressionLags]; + +// Build an image-wide "stripe" of grain noise for every 32 rows in the image. +// Section 7.18.3.5, first code block. +// Each 32x32 luma block is copied at a random offset specified via +// |grain_seed| from the grain template produced by autoregression, and the same +// is done for chroma grains, subject to subsampling. +// |width| and |height| are the dimensions of the overall image. +// |noise_stripes_buffer| points to an Array2DView with one row for each stripe. +// Because this function treats all planes identically and independently, it is +// simplified to take one grain buffer at a time. This means duplicating some +// random number generations, but that work can be reduced in other ways. +using ConstructNoiseStripesFunc = void (*)(const void* grain_buffer, + int grain_seed, int width, + int height, int subsampling_x, + int subsampling_y, + void* noise_stripes_buffer); +using ConstructNoiseStripesFuncs = + ConstructNoiseStripesFunc[/*overlap_flag*/ 2]; + +// Compute the one or two overlap rows for each stripe copied to the noise +// image. +// Section 7.18.3.5, second code block. |width| and |height| are the +// dimensions of the overall image. |noise_stripes_buffer| points to an +// Array2DView with one row for each stripe. |noise_image_buffer| points to an +// Array2D containing the allocated plane for this frame. Because this function +// treats all planes identically and independently, it is simplified to take one +// grain buffer at a time. +using ConstructNoiseImageOverlapFunc = + void (*)(const void* noise_stripes_buffer, int width, int height, + int subsampling_x, int subsampling_y, void* noise_image_buffer); + +// Populate a scaling lookup table with interpolated values of a piecewise +// linear function where values in |point_value| are mapped to the values in +// |point_scaling|. +// |num_points| can be between 0 and 15. When 0, the lookup table is set to +// zero. +// |point_value| and |point_scaling| have |num_points| valid elements. +using InitializeScalingLutFunc = void (*)( + int num_points, const uint8_t point_value[], const uint8_t point_scaling[], + uint8_t scaling_lut[kScalingLookupTableSize]); + +// Blend noise with image. Section 7.18.3.5, third code block. +// |width| is the width of each row, while |height| is how many rows to compute. +// |start_height| is an offset for the noise image, to support multithreading. +// |min_value|, |max_luma|, and |max_chroma| are computed by the caller of these +// functions, according to the code in the spec. +// |source_plane_y| and |source_plane_uv| are the plane buffers of the decoded +// frame. They are blended with the film grain noise and written to +// |dest_plane_y| and |dest_plane_uv| as final output for display. +// source_plane_* and dest_plane_* may point to the same buffer, in which case +// the film grain noise is added in place. +// |scaling_lut_y| and |scaling_lut| represent a piecewise linear mapping from +// the frame's raw pixel value, to a scaling factor for the noise sample. +// |scaling_shift| is applied as a right shift after scaling, so that scaling +// down is possible. It is found in FilmGrainParams, but supplied directly to +// BlendNoiseWithImageLumaFunc because it's the only member used. +using BlendNoiseWithImageLumaFunc = + void (*)(const void* noise_image_ptr, int min_value, int max_value, + int scaling_shift, int width, int height, int start_height, + const uint8_t scaling_lut_y[kScalingLookupTableSize], + const void* source_plane_y, ptrdiff_t source_stride_y, + void* dest_plane_y, ptrdiff_t dest_stride_y); + +using BlendNoiseWithImageChromaFunc = void (*)( + Plane plane, const FilmGrainParams& params, const void* noise_image_ptr, + int min_value, int max_value, int width, int height, int start_height, + int subsampling_x, int subsampling_y, + const uint8_t scaling_lut[kScalingLookupTableSize], + const void* source_plane_y, ptrdiff_t source_stride_y, + const void* source_plane_uv, ptrdiff_t source_stride_uv, + void* dest_plane_uv, ptrdiff_t dest_stride_uv); + +using BlendNoiseWithImageChromaFuncs = + BlendNoiseWithImageChromaFunc[/*chroma_scaling_from_luma*/ 2]; + +//------------------------------------------------------------------------------ + +struct FilmGrainFuncs { + LumaAutoRegressionFuncs luma_auto_regression; + ChromaAutoRegressionFuncs chroma_auto_regression; + ConstructNoiseStripesFuncs construct_noise_stripes; + ConstructNoiseImageOverlapFunc construct_noise_image_overlap; + InitializeScalingLutFunc initialize_scaling_lut; + BlendNoiseWithImageLumaFunc blend_noise_luma; + BlendNoiseWithImageChromaFuncs blend_noise_chroma; +}; + +// Motion field projection function signature. Section 7.9. +// |reference_info| provides reference information for motion field projection. +// |reference_to_current_with_sign| is the precalculated reference frame id +// distance from current frame. +// |dst_sign| is -1 for LAST_FRAME and LAST2_FRAME, or 0 (1 in spec) for others. +// |y8_start| and |y8_end| are the start and end 8x8 rows of the current tile. +// |x8_start| and |x8_end| are the start and end 8x8 columns of the current +// tile. +// |motion_field| is the output which saves the projected motion field +// information. +using MotionFieldProjectionKernelFunc = void (*)( + const ReferenceInfo& reference_info, int reference_to_current_with_sign, + int dst_sign, int y8_start, int y8_end, int x8_start, int x8_end, + TemporalMotionField* motion_field); + +// Compound temporal motion vector projection function signature. +// Section 7.9.3 and 7.10.2.10. +// |temporal_mvs| is the set of temporal reference motion vectors. +// |temporal_reference_offsets| specifies the number of frames covered by the +// original motion vector. +// |reference_offsets| specifies the number of frames to be covered by the +// projected motion vector. +// |count| is the number of the temporal motion vectors. +// |candidate_mvs| is the set of projected motion vectors. +using MvProjectionCompoundFunc = void (*)( + const MotionVector* temporal_mvs, const int8_t* temporal_reference_offsets, + const int reference_offsets[2], int count, + CompoundMotionVector* candidate_mvs); + +// Single temporal motion vector projection function signature. +// Section 7.9.3 and 7.10.2.10. +// |temporal_mvs| is the set of temporal reference motion vectors. +// |temporal_reference_offsets| specifies the number of frames covered by the +// original motion vector. +// |reference_offset| specifies the number of frames to be covered by the +// projected motion vector. +// |count| is the number of the temporal motion vectors. +// |candidate_mvs| is the set of projected motion vectors. +using MvProjectionSingleFunc = void (*)( + const MotionVector* temporal_mvs, const int8_t* temporal_reference_offsets, + int reference_offset, int count, MotionVector* candidate_mvs); + +struct Dsp { + AverageBlendFunc average_blend; + CdefDirectionFunc cdef_direction; + CdefFilteringFuncs cdef_filters; + CflIntraPredictorFuncs cfl_intra_predictors; + CflSubsamplerFuncs cfl_subsamplers; + ConvolveFuncs convolve; + ConvolveScaleFuncs convolve_scale; + DirectionalIntraPredictorZone1Func directional_intra_predictor_zone1; + DirectionalIntraPredictorZone2Func directional_intra_predictor_zone2; + DirectionalIntraPredictorZone3Func directional_intra_predictor_zone3; + DistanceWeightedBlendFunc distance_weighted_blend; + FilmGrainFuncs film_grain; + FilterIntraPredictorFunc filter_intra_predictor; + InterIntraMaskBlendFuncs8bpp inter_intra_mask_blend_8bpp; + IntraEdgeFilterFunc intra_edge_filter; + IntraEdgeUpsamplerFunc intra_edge_upsampler; + IntraPredictorFuncs intra_predictors; + InverseTransformAddFuncs inverse_transforms; + LoopFilterFuncs loop_filters; + LoopRestorationFuncs loop_restorations; + MaskBlendFuncs mask_blend; + MotionFieldProjectionKernelFunc motion_field_projection_kernel; + MvProjectionCompoundFunc mv_projection_compound[3]; + MvProjectionSingleFunc mv_projection_single[3]; + ObmcBlendFuncs obmc_blend; + SuperResCoefficientsFunc super_res_coefficients; + SuperResFunc super_res; + WarpCompoundFunc warp_compound; + WarpFunc warp; + WeightMaskFuncs weight_mask; +}; + +// Initializes function pointers based on build config and runtime +// environment. Must be called once before first use. This function is +// thread-safe. +void DspInit(); + +// Returns the appropriate Dsp table for |bitdepth| or nullptr if one doesn't +// exist. +const Dsp* GetDspTable(int bitdepth); + +} // namespace dsp + +namespace dsp_internal { + +// Visual Studio builds don't have a way to detect SSE4_1. Only exclude the C +// functions if /arch:AVX2 is used across all sources. +#if !LIBGAV1_TARGETING_AVX2 && \ + (defined(_MSC_VER) || (defined(_M_IX86) || defined(_M_X64))) +#undef LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS +#define LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS 1 +#endif + +// Returns true if a more highly optimized version of |func| is not defined for +// the associated bitdepth or if it is forcibly enabled with +// LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS. The define checked for |func| corresponds +// to the LIBGAV1_Dsp<bitdepth>bpp_|func| define in the header file associated +// with the module. +// |func| is one of: +// - FunctionName, e.g., SelfGuidedFilter. +// - [sub-table-index1][...-indexN] e.g., +// TransformSize4x4_IntraPredictorDc. The indices correspond to enum values +// used as lookups with leading 'k' removed. +// +// NEON support is the only extension available for ARM and it is always +// required. Because of this restriction DSP_ENABLED_8BPP_NEON(func) is always +// true and can be omitted. +#define DSP_ENABLED_8BPP_AVX2(func) \ + (LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS || \ + LIBGAV1_Dsp8bpp_##func == LIBGAV1_CPU_AVX2) +#define DSP_ENABLED_10BPP_AVX2(func) \ + (LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS || \ + LIBGAV1_Dsp10bpp_##func == LIBGAV1_CPU_AVX2) +#define DSP_ENABLED_8BPP_SSE4_1(func) \ + (LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS || \ + LIBGAV1_Dsp8bpp_##func == LIBGAV1_CPU_SSE4_1) +#define DSP_ENABLED_10BPP_SSE4_1(func) \ + (LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS || \ + LIBGAV1_Dsp10bpp_##func == LIBGAV1_CPU_SSE4_1) + +// Returns the appropriate Dsp table for |bitdepth| or nullptr if one doesn't +// exist. This version is meant for use by test or dsp/*Init() functions only. +dsp::Dsp* GetWritableDspTable(int bitdepth); + +} // namespace dsp_internal +} // namespace libgav1 + +#endif // LIBGAV1_SRC_DSP_DSP_H_ |