diff options
Diffstat (limited to 'src/tile/prediction.cc')
-rw-r--r-- | src/tile/prediction.cc | 1361 |
1 files changed, 1361 insertions, 0 deletions
diff --git a/src/tile/prediction.cc b/src/tile/prediction.cc new file mode 100644 index 0000000..c5560a6 --- /dev/null +++ b/src/tile/prediction.cc @@ -0,0 +1,1361 @@ +// Copyright 2019 The libgav1 Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <algorithm> +#include <array> +#include <cassert> +#include <cstddef> +#include <cstdint> +#include <cstdlib> +#include <cstring> +#include <memory> + +#include "src/buffer_pool.h" +#include "src/dsp/constants.h" +#include "src/dsp/dsp.h" +#include "src/motion_vector.h" +#include "src/obu_parser.h" +#include "src/prediction_mask.h" +#include "src/tile.h" +#include "src/utils/array_2d.h" +#include "src/utils/bit_mask_set.h" +#include "src/utils/block_parameters_holder.h" +#include "src/utils/common.h" +#include "src/utils/constants.h" +#include "src/utils/logging.h" +#include "src/utils/memory.h" +#include "src/utils/types.h" +#include "src/warp_prediction.h" +#include "src/yuv_buffer.h" + +namespace libgav1 { +namespace { + +// Import all the constants in the anonymous namespace. +#include "src/inter_intra_masks.inc" + +// Precision bits when scaling reference frames. +constexpr int kReferenceScaleShift = 14; +constexpr int kAngleStep = 3; +constexpr int kPredictionModeToAngle[kIntraPredictionModesUV] = { + 0, 90, 180, 45, 135, 113, 157, 203, 67, 0, 0, 0, 0}; + +// The following modes need both the left_column and top_row for intra +// prediction. For directional modes left/top requirement is inferred based on +// the prediction angle. For Dc modes, left/top requirement is inferred based on +// whether or not left/top is available. +constexpr BitMaskSet kNeedsLeftAndTop(kPredictionModeSmooth, + kPredictionModeSmoothHorizontal, + kPredictionModeSmoothVertical, + kPredictionModePaeth); + +int16_t GetDirectionalIntraPredictorDerivative(const int angle) { + assert(angle >= 3); + assert(angle <= 87); + return kDirectionalIntraPredictorDerivative[DivideBy2(angle) - 1]; +} + +// Maps the block_size to an index as follows: +// kBlock8x8 => 0. +// kBlock8x16 => 1. +// kBlock8x32 => 2. +// kBlock16x8 => 3. +// kBlock16x16 => 4. +// kBlock16x32 => 5. +// kBlock32x8 => 6. +// kBlock32x16 => 7. +// kBlock32x32 => 8. +int GetWedgeBlockSizeIndex(BlockSize block_size) { + assert(block_size >= kBlock8x8); + return block_size - kBlock8x8 - static_cast<int>(block_size >= kBlock16x8) - + static_cast<int>(block_size >= kBlock32x8); +} + +// Maps a dimension of 4, 8, 16 and 32 to indices 0, 1, 2 and 3 respectively. +int GetInterIntraMaskLookupIndex(int dimension) { + assert(dimension == 4 || dimension == 8 || dimension == 16 || + dimension == 32); + return FloorLog2(dimension) - 2; +} + +// 7.11.2.9. +int GetIntraEdgeFilterStrength(int width, int height, int filter_type, + int delta) { + const int sum = width + height; + delta = std::abs(delta); + if (filter_type == 0) { + if (sum <= 8) { + if (delta >= 56) return 1; + } else if (sum <= 16) { + if (delta >= 40) return 1; + } else if (sum <= 24) { + if (delta >= 32) return 3; + if (delta >= 16) return 2; + if (delta >= 8) return 1; + } else if (sum <= 32) { + if (delta >= 32) return 3; + if (delta >= 4) return 2; + return 1; + } else { + return 3; + } + } else { + if (sum <= 8) { + if (delta >= 64) return 2; + if (delta >= 40) return 1; + } else if (sum <= 16) { + if (delta >= 48) return 2; + if (delta >= 20) return 1; + } else if (sum <= 24) { + if (delta >= 4) return 3; + } else { + return 3; + } + } + return 0; +} + +// 7.11.2.10. +bool DoIntraEdgeUpsampling(int width, int height, int filter_type, int delta) { + const int sum = width + height; + delta = std::abs(delta); + // This function should not be called when the prediction angle is 90 or 180. + assert(delta != 0); + if (delta >= 40) return false; + return (filter_type == 1) ? sum <= 8 : sum <= 16; +} + +constexpr uint8_t kQuantizedDistanceWeight[4][2] = { + {2, 3}, {2, 5}, {2, 7}, {1, kMaxFrameDistance}}; + +constexpr uint8_t kQuantizedDistanceLookup[4][2] = { + {9, 7}, {11, 5}, {12, 4}, {13, 3}}; + +void GetDistanceWeights(const int distance[2], int weight[2]) { + // Note: distance[0] and distance[1] correspond to relative distance + // between current frame and reference frame [1] and [0], respectively. + const int order = static_cast<int>(distance[0] <= distance[1]); + if (distance[0] == 0 || distance[1] == 0) { + weight[0] = kQuantizedDistanceLookup[3][order]; + weight[1] = kQuantizedDistanceLookup[3][1 - order]; + } else { + int i; + for (i = 0; i < 3; ++i) { + const int weight_0 = kQuantizedDistanceWeight[i][order]; + const int weight_1 = kQuantizedDistanceWeight[i][1 - order]; + if (order == 0) { + if (distance[0] * weight_0 < distance[1] * weight_1) break; + } else { + if (distance[0] * weight_0 > distance[1] * weight_1) break; + } + } + weight[0] = kQuantizedDistanceLookup[i][order]; + weight[1] = kQuantizedDistanceLookup[i][1 - order]; + } +} + +dsp::IntraPredictor GetIntraPredictor(PredictionMode mode, bool has_left, + bool has_top) { + if (mode == kPredictionModeDc) { + if (has_left && has_top) { + return dsp::kIntraPredictorDc; + } + if (has_left) { + return dsp::kIntraPredictorDcLeft; + } + if (has_top) { + return dsp::kIntraPredictorDcTop; + } + return dsp::kIntraPredictorDcFill; + } + switch (mode) { + case kPredictionModePaeth: + return dsp::kIntraPredictorPaeth; + case kPredictionModeSmooth: + return dsp::kIntraPredictorSmooth; + case kPredictionModeSmoothVertical: + return dsp::kIntraPredictorSmoothVertical; + case kPredictionModeSmoothHorizontal: + return dsp::kIntraPredictorSmoothHorizontal; + default: + return dsp::kNumIntraPredictors; + } +} + +uint8_t* GetStartPoint(Array2DView<uint8_t>* const buffer, const int plane, + const int x, const int y, const int bitdepth) { +#if LIBGAV1_MAX_BITDEPTH >= 10 + if (bitdepth > 8) { + Array2DView<uint16_t> buffer16( + buffer[plane].rows(), buffer[plane].columns() / sizeof(uint16_t), + reinterpret_cast<uint16_t*>(&buffer[plane][0][0])); + return reinterpret_cast<uint8_t*>(&buffer16[y][x]); + } +#endif // LIBGAV1_MAX_BITDEPTH >= 10 + static_cast<void>(bitdepth); + return &buffer[plane][y][x]; +} + +int GetPixelPositionFromHighScale(int start, int step, int offset) { + return (start + step * offset) >> kScaleSubPixelBits; +} + +dsp::MaskBlendFunc GetMaskBlendFunc(const dsp::Dsp& dsp, bool is_inter_intra, + bool is_wedge_inter_intra, + int subsampling_x, int subsampling_y) { + return (is_inter_intra && !is_wedge_inter_intra) + ? dsp.mask_blend[0][/*is_inter_intra=*/true] + : dsp.mask_blend[subsampling_x + subsampling_y][is_inter_intra]; +} + +} // namespace + +template <typename Pixel> +void Tile::IntraPrediction(const Block& block, Plane plane, int x, int y, + bool has_left, bool has_top, bool has_top_right, + bool has_bottom_left, PredictionMode mode, + TransformSize tx_size) { + const int width = 1 << kTransformWidthLog2[tx_size]; + const int height = 1 << kTransformHeightLog2[tx_size]; + const int x_shift = subsampling_x_[plane]; + const int y_shift = subsampling_y_[plane]; + const int max_x = (MultiplyBy4(frame_header_.columns4x4) >> x_shift) - 1; + const int max_y = (MultiplyBy4(frame_header_.rows4x4) >> y_shift) - 1; + // For performance reasons, do not initialize the following two buffers. + alignas(kMaxAlignment) Pixel top_row_data[160]; + alignas(kMaxAlignment) Pixel left_column_data[160]; +#if LIBGAV1_MSAN + if (IsDirectionalMode(mode)) { + memset(top_row_data, 0, sizeof(top_row_data)); + memset(left_column_data, 0, sizeof(left_column_data)); + } +#endif + // Some predictors use |top_row_data| and |left_column_data| with a negative + // offset to access pixels to the top-left of the current block. So have some + // space before the arrays to allow populating those without having to move + // the rest of the array. + Pixel* const top_row = top_row_data + 16; + Pixel* const left_column = left_column_data + 16; + const int bitdepth = sequence_header_.color_config.bitdepth; + const int top_and_left_size = width + height; + const bool is_directional_mode = IsDirectionalMode(mode); + const PredictionParameters& prediction_parameters = + *block.bp->prediction_parameters; + const bool use_filter_intra = + (plane == kPlaneY && prediction_parameters.use_filter_intra); + const int prediction_angle = + is_directional_mode + ? kPredictionModeToAngle[mode] + + prediction_parameters.angle_delta[GetPlaneType(plane)] * + kAngleStep + : 0; + // Directional prediction requires buffers larger than the width or height. + const int top_size = is_directional_mode ? top_and_left_size : width; + const int left_size = is_directional_mode ? top_and_left_size : height; + const int top_right_size = + is_directional_mode ? (has_top_right ? 2 : 1) * width : width; + const int bottom_left_size = + is_directional_mode ? (has_bottom_left ? 2 : 1) * height : height; + + Array2DView<Pixel> buffer(buffer_[plane].rows(), + buffer_[plane].columns() / sizeof(Pixel), + reinterpret_cast<Pixel*>(&buffer_[plane][0][0])); + const bool needs_top = use_filter_intra || kNeedsLeftAndTop.Contains(mode) || + (is_directional_mode && prediction_angle < 180) || + (mode == kPredictionModeDc && has_top); + const bool needs_left = use_filter_intra || kNeedsLeftAndTop.Contains(mode) || + (is_directional_mode && prediction_angle > 90) || + (mode == kPredictionModeDc && has_left); + + const Pixel* top_row_src = buffer[y - 1]; + + // Determine if we need to retrieve the top row from + // |intra_prediction_buffer_|. + if ((needs_top || needs_left) && use_intra_prediction_buffer_) { + // Superblock index of block.row4x4. block.row4x4 is always in luma + // dimension (no subsampling). + const int current_superblock_index = + block.row4x4 >> (sequence_header_.use_128x128_superblock ? 5 : 4); + // Superblock index of y - 1. y is in the plane dimension (chroma planes + // could be subsampled). + const int plane_shift = (sequence_header_.use_128x128_superblock ? 7 : 6) - + subsampling_y_[plane]; + const int top_row_superblock_index = (y - 1) >> plane_shift; + // If the superblock index of y - 1 is not that of the current superblock, + // then we will have to retrieve the top row from the + // |intra_prediction_buffer_|. + if (current_superblock_index != top_row_superblock_index) { + top_row_src = reinterpret_cast<const Pixel*>( + (*intra_prediction_buffer_)[plane].get()); + } + } + + if (needs_top) { + // Compute top_row. + if (has_top || has_left) { + const int left_index = has_left ? x - 1 : x; + top_row[-1] = has_top ? top_row_src[left_index] : buffer[y][left_index]; + } else { + top_row[-1] = 1 << (bitdepth - 1); + } + if (!has_top && has_left) { + Memset(top_row, buffer[y][x - 1], top_size); + } else if (!has_top && !has_left) { + Memset(top_row, (1 << (bitdepth - 1)) - 1, top_size); + } else { + const int top_limit = std::min(max_x - x + 1, top_right_size); + memcpy(top_row, &top_row_src[x], top_limit * sizeof(Pixel)); + // Even though it is safe to call Memset with a size of 0, accessing + // top_row_src[top_limit - x + 1] is not allowed when this condition is + // false. + if (top_size - top_limit > 0) { + Memset(top_row + top_limit, top_row_src[top_limit + x - 1], + top_size - top_limit); + } + } + } + if (needs_left) { + // Compute left_column. + if (has_top || has_left) { + const int left_index = has_left ? x - 1 : x; + left_column[-1] = + has_top ? top_row_src[left_index] : buffer[y][left_index]; + } else { + left_column[-1] = 1 << (bitdepth - 1); + } + if (!has_left && has_top) { + Memset(left_column, top_row_src[x], left_size); + } else if (!has_left && !has_top) { + Memset(left_column, (1 << (bitdepth - 1)) + 1, left_size); + } else { + const int left_limit = std::min(max_y - y + 1, bottom_left_size); + for (int i = 0; i < left_limit; ++i) { + left_column[i] = buffer[y + i][x - 1]; + } + // Even though it is safe to call Memset with a size of 0, accessing + // buffer[left_limit - y + 1][x - 1] is not allowed when this condition is + // false. + if (left_size - left_limit > 0) { + Memset(left_column + left_limit, buffer[left_limit + y - 1][x - 1], + left_size - left_limit); + } + } + } + Pixel* const dest = &buffer[y][x]; + const ptrdiff_t dest_stride = buffer_[plane].columns(); + if (use_filter_intra) { + dsp_.filter_intra_predictor(dest, dest_stride, top_row, left_column, + prediction_parameters.filter_intra_mode, width, + height); + } else if (is_directional_mode) { + DirectionalPrediction(block, plane, x, y, has_left, has_top, needs_left, + needs_top, prediction_angle, width, height, max_x, + max_y, tx_size, top_row, left_column); + } else { + const dsp::IntraPredictor predictor = + GetIntraPredictor(mode, has_left, has_top); + assert(predictor != dsp::kNumIntraPredictors); + dsp_.intra_predictors[tx_size][predictor](dest, dest_stride, top_row, + left_column); + } +} + +template void Tile::IntraPrediction<uint8_t>(const Block& block, Plane plane, + int x, int y, bool has_left, + bool has_top, bool has_top_right, + bool has_bottom_left, + PredictionMode mode, + TransformSize tx_size); +#if LIBGAV1_MAX_BITDEPTH >= 10 +template void Tile::IntraPrediction<uint16_t>(const Block& block, Plane plane, + int x, int y, bool has_left, + bool has_top, bool has_top_right, + bool has_bottom_left, + PredictionMode mode, + TransformSize tx_size); +#endif + +constexpr BitMaskSet kPredictionModeSmoothMask(kPredictionModeSmooth, + kPredictionModeSmoothHorizontal, + kPredictionModeSmoothVertical); + +bool Tile::IsSmoothPrediction(int row, int column, Plane plane) const { + const BlockParameters& bp = *block_parameters_holder_.Find(row, column); + PredictionMode mode; + if (plane == kPlaneY) { + mode = bp.y_mode; + } else { + if (bp.reference_frame[0] > kReferenceFrameIntra) return false; + mode = bp.uv_mode; + } + return kPredictionModeSmoothMask.Contains(mode); +} + +int Tile::GetIntraEdgeFilterType(const Block& block, Plane plane) const { + const int subsampling_x = subsampling_x_[plane]; + const int subsampling_y = subsampling_y_[plane]; + if (block.top_available[plane]) { + const int row = block.row4x4 - 1 - (block.row4x4 & subsampling_y); + const int column = block.column4x4 + (~block.column4x4 & subsampling_x); + if (IsSmoothPrediction(row, column, plane)) return 1; + } + if (block.left_available[plane]) { + const int row = block.row4x4 + (~block.row4x4 & subsampling_y); + const int column = block.column4x4 - 1 - (block.column4x4 & subsampling_x); + if (IsSmoothPrediction(row, column, plane)) return 1; + } + return 0; +} + +template <typename Pixel> +void Tile::DirectionalPrediction(const Block& block, Plane plane, int x, int y, + bool has_left, bool has_top, bool needs_left, + bool needs_top, int prediction_angle, + int width, int height, int max_x, int max_y, + TransformSize tx_size, Pixel* const top_row, + Pixel* const left_column) { + Array2DView<Pixel> buffer(buffer_[plane].rows(), + buffer_[plane].columns() / sizeof(Pixel), + reinterpret_cast<Pixel*>(&buffer_[plane][0][0])); + Pixel* const dest = &buffer[y][x]; + const ptrdiff_t stride = buffer_[plane].columns(); + if (prediction_angle == 90) { + dsp_.intra_predictors[tx_size][dsp::kIntraPredictorVertical]( + dest, stride, top_row, left_column); + return; + } + if (prediction_angle == 180) { + dsp_.intra_predictors[tx_size][dsp::kIntraPredictorHorizontal]( + dest, stride, top_row, left_column); + return; + } + + bool upsampled_top = false; + bool upsampled_left = false; + if (sequence_header_.enable_intra_edge_filter) { + const int filter_type = GetIntraEdgeFilterType(block, plane); + if (prediction_angle > 90 && prediction_angle < 180 && + (width + height) >= 24) { + // 7.11.2.7. + left_column[-1] = top_row[-1] = RightShiftWithRounding( + left_column[0] * 5 + top_row[-1] * 6 + top_row[0] * 5, 4); + } + if (has_top && needs_top) { + const int strength = GetIntraEdgeFilterStrength( + width, height, filter_type, prediction_angle - 90); + if (strength > 0) { + const int num_pixels = std::min(width, max_x - x + 1) + + ((prediction_angle < 90) ? height : 0) + 1; + dsp_.intra_edge_filter(top_row - 1, num_pixels, strength); + } + } + if (has_left && needs_left) { + const int strength = GetIntraEdgeFilterStrength( + width, height, filter_type, prediction_angle - 180); + if (strength > 0) { + const int num_pixels = std::min(height, max_y - y + 1) + + ((prediction_angle > 180) ? width : 0) + 1; + dsp_.intra_edge_filter(left_column - 1, num_pixels, strength); + } + } + upsampled_top = DoIntraEdgeUpsampling(width, height, filter_type, + prediction_angle - 90); + if (upsampled_top && needs_top) { + const int num_pixels = width + ((prediction_angle < 90) ? height : 0); + dsp_.intra_edge_upsampler(top_row, num_pixels); + } + upsampled_left = DoIntraEdgeUpsampling(width, height, filter_type, + prediction_angle - 180); + if (upsampled_left && needs_left) { + const int num_pixels = height + ((prediction_angle > 180) ? width : 0); + dsp_.intra_edge_upsampler(left_column, num_pixels); + } + } + + if (prediction_angle < 90) { + const int dx = GetDirectionalIntraPredictorDerivative(prediction_angle); + dsp_.directional_intra_predictor_zone1(dest, stride, top_row, width, height, + dx, upsampled_top); + } else if (prediction_angle < 180) { + const int dx = + GetDirectionalIntraPredictorDerivative(180 - prediction_angle); + const int dy = + GetDirectionalIntraPredictorDerivative(prediction_angle - 90); + dsp_.directional_intra_predictor_zone2(dest, stride, top_row, left_column, + width, height, dx, dy, upsampled_top, + upsampled_left); + } else { + assert(prediction_angle < 270); + const int dy = + GetDirectionalIntraPredictorDerivative(270 - prediction_angle); + dsp_.directional_intra_predictor_zone3(dest, stride, left_column, width, + height, dy, upsampled_left); + } +} + +template <typename Pixel> +void Tile::PalettePrediction(const Block& block, const Plane plane, + const int start_x, const int start_y, const int x, + const int y, const TransformSize tx_size) { + const int tx_width = kTransformWidth[tx_size]; + const int tx_height = kTransformHeight[tx_size]; + const uint16_t* const palette = block.bp->palette_mode_info.color[plane]; + const PlaneType plane_type = GetPlaneType(plane); + const int x4 = MultiplyBy4(x); + const int y4 = MultiplyBy4(y); + Array2DView<Pixel> buffer(buffer_[plane].rows(), + buffer_[plane].columns() / sizeof(Pixel), + reinterpret_cast<Pixel*>(&buffer_[plane][0][0])); + for (int row = 0; row < tx_height; ++row) { + assert(block.bp->prediction_parameters + ->color_index_map[plane_type][y4 + row] != nullptr); + for (int column = 0; column < tx_width; ++column) { + buffer[start_y + row][start_x + column] = + palette[block.bp->prediction_parameters + ->color_index_map[plane_type][y4 + row][x4 + column]]; + } + } +} + +template void Tile::PalettePrediction<uint8_t>( + const Block& block, const Plane plane, const int start_x, const int start_y, + const int x, const int y, const TransformSize tx_size); +#if LIBGAV1_MAX_BITDEPTH >= 10 +template void Tile::PalettePrediction<uint16_t>( + const Block& block, const Plane plane, const int start_x, const int start_y, + const int x, const int y, const TransformSize tx_size); +#endif + +template <typename Pixel> +void Tile::ChromaFromLumaPrediction(const Block& block, const Plane plane, + const int start_x, const int start_y, + const TransformSize tx_size) { + const int subsampling_x = subsampling_x_[plane]; + const int subsampling_y = subsampling_y_[plane]; + const PredictionParameters& prediction_parameters = + *block.bp->prediction_parameters; + Array2DView<Pixel> y_buffer( + buffer_[kPlaneY].rows(), buffer_[kPlaneY].columns() / sizeof(Pixel), + reinterpret_cast<Pixel*>(&buffer_[kPlaneY][0][0])); + if (!block.scratch_buffer->cfl_luma_buffer_valid) { + const int luma_x = start_x << subsampling_x; + const int luma_y = start_y << subsampling_y; + dsp_.cfl_subsamplers[tx_size][subsampling_x + subsampling_y]( + block.scratch_buffer->cfl_luma_buffer, + prediction_parameters.max_luma_width - luma_x, + prediction_parameters.max_luma_height - luma_y, + reinterpret_cast<uint8_t*>(&y_buffer[luma_y][luma_x]), + buffer_[kPlaneY].columns()); + block.scratch_buffer->cfl_luma_buffer_valid = true; + } + Array2DView<Pixel> buffer(buffer_[plane].rows(), + buffer_[plane].columns() / sizeof(Pixel), + reinterpret_cast<Pixel*>(&buffer_[plane][0][0])); + dsp_.cfl_intra_predictors[tx_size]( + reinterpret_cast<uint8_t*>(&buffer[start_y][start_x]), + buffer_[plane].columns(), block.scratch_buffer->cfl_luma_buffer, + (plane == kPlaneU) ? prediction_parameters.cfl_alpha_u + : prediction_parameters.cfl_alpha_v); +} + +template void Tile::ChromaFromLumaPrediction<uint8_t>( + const Block& block, const Plane plane, const int start_x, const int start_y, + const TransformSize tx_size); +#if LIBGAV1_MAX_BITDEPTH >= 10 +template void Tile::ChromaFromLumaPrediction<uint16_t>( + const Block& block, const Plane plane, const int start_x, const int start_y, + const TransformSize tx_size); +#endif + +void Tile::InterIntraPrediction( + uint16_t* const prediction_0, const uint8_t* const prediction_mask, + const ptrdiff_t prediction_mask_stride, + const PredictionParameters& prediction_parameters, + const int prediction_width, const int prediction_height, + const int subsampling_x, const int subsampling_y, uint8_t* const dest, + const ptrdiff_t dest_stride) { + assert(prediction_mask != nullptr); + assert(prediction_parameters.compound_prediction_type == + kCompoundPredictionTypeIntra || + prediction_parameters.compound_prediction_type == + kCompoundPredictionTypeWedge); + // The first buffer of InterIntra is from inter prediction. + // The second buffer is from intra prediction. +#if LIBGAV1_MAX_BITDEPTH >= 10 + if (sequence_header_.color_config.bitdepth > 8) { + GetMaskBlendFunc(dsp_, /*is_inter_intra=*/true, + prediction_parameters.is_wedge_inter_intra, subsampling_x, + subsampling_y)( + prediction_0, reinterpret_cast<uint16_t*>(dest), + dest_stride / sizeof(uint16_t), prediction_mask, prediction_mask_stride, + prediction_width, prediction_height, dest, dest_stride); + return; + } +#endif + const int function_index = prediction_parameters.is_wedge_inter_intra + ? subsampling_x + subsampling_y + : 0; + // |is_inter_intra| prediction values are stored in a Pixel buffer but it is + // currently declared as a uint16_t buffer. + // TODO(johannkoenig): convert the prediction buffer to a uint8_t buffer and + // remove the reinterpret_cast. + dsp_.inter_intra_mask_blend_8bpp[function_index]( + reinterpret_cast<uint8_t*>(prediction_0), dest, dest_stride, + prediction_mask, prediction_mask_stride, prediction_width, + prediction_height); +} + +void Tile::CompoundInterPrediction( + const Block& block, const uint8_t* const prediction_mask, + const ptrdiff_t prediction_mask_stride, const int prediction_width, + const int prediction_height, const int subsampling_x, + const int subsampling_y, const int candidate_row, + const int candidate_column, uint8_t* dest, const ptrdiff_t dest_stride) { + const PredictionParameters& prediction_parameters = + *block.bp->prediction_parameters; + + void* prediction[2]; +#if LIBGAV1_MAX_BITDEPTH >= 10 + const int bitdepth = sequence_header_.color_config.bitdepth; + if (bitdepth > 8) { + prediction[0] = block.scratch_buffer->prediction_buffer[0]; + prediction[1] = block.scratch_buffer->prediction_buffer[1]; + } else { +#endif + prediction[0] = block.scratch_buffer->compound_prediction_buffer_8bpp[0]; + prediction[1] = block.scratch_buffer->compound_prediction_buffer_8bpp[1]; +#if LIBGAV1_MAX_BITDEPTH >= 10 + } +#endif + + switch (prediction_parameters.compound_prediction_type) { + case kCompoundPredictionTypeWedge: + case kCompoundPredictionTypeDiffWeighted: + GetMaskBlendFunc(dsp_, /*is_inter_intra=*/false, + prediction_parameters.is_wedge_inter_intra, + subsampling_x, subsampling_y)( + prediction[0], prediction[1], + /*prediction_stride=*/prediction_width, prediction_mask, + prediction_mask_stride, prediction_width, prediction_height, dest, + dest_stride); + break; + case kCompoundPredictionTypeDistance: + DistanceWeightedPrediction(prediction[0], prediction[1], prediction_width, + prediction_height, candidate_row, + candidate_column, dest, dest_stride); + break; + default: + assert(prediction_parameters.compound_prediction_type == + kCompoundPredictionTypeAverage); + dsp_.average_blend(prediction[0], prediction[1], prediction_width, + prediction_height, dest, dest_stride); + break; + } +} + +GlobalMotion* Tile::GetWarpParams( + const Block& block, const Plane plane, const int prediction_width, + const int prediction_height, + const PredictionParameters& prediction_parameters, + const ReferenceFrameType reference_type, bool* const is_local_valid, + GlobalMotion* const global_motion_params, + GlobalMotion* const local_warp_params) const { + if (prediction_width < 8 || prediction_height < 8 || + frame_header_.force_integer_mv == 1) { + return nullptr; + } + if (plane == kPlaneY) { + *is_local_valid = + prediction_parameters.motion_mode == kMotionModeLocalWarp && + WarpEstimation( + prediction_parameters.num_warp_samples, DivideBy4(prediction_width), + DivideBy4(prediction_height), block.row4x4, block.column4x4, + block.bp->mv.mv[0], prediction_parameters.warp_estimate_candidates, + local_warp_params) && + SetupShear(local_warp_params); + } + if (prediction_parameters.motion_mode == kMotionModeLocalWarp && + *is_local_valid) { + return local_warp_params; + } + if (!IsScaled(reference_type)) { + GlobalMotionTransformationType global_motion_type = + (reference_type != kReferenceFrameIntra) + ? global_motion_params->type + : kNumGlobalMotionTransformationTypes; + const bool is_global_valid = + IsGlobalMvBlock(block.bp->is_global_mv_block, global_motion_type) && + SetupShear(global_motion_params); + // Valid global motion type implies reference type can't be intra. + assert(!is_global_valid || reference_type != kReferenceFrameIntra); + if (is_global_valid) return global_motion_params; + } + return nullptr; +} + +bool Tile::InterPrediction(const Block& block, const Plane plane, const int x, + const int y, const int prediction_width, + const int prediction_height, int candidate_row, + int candidate_column, bool* const is_local_valid, + GlobalMotion* const local_warp_params) { + const int bitdepth = sequence_header_.color_config.bitdepth; + const BlockParameters& bp = *block.bp; + const BlockParameters& bp_reference = + *block_parameters_holder_.Find(candidate_row, candidate_column); + const bool is_compound = + bp_reference.reference_frame[1] > kReferenceFrameIntra; + assert(bp.is_inter); + const bool is_inter_intra = bp.reference_frame[1] == kReferenceFrameIntra; + + const PredictionParameters& prediction_parameters = + *block.bp->prediction_parameters; + uint8_t* const dest = GetStartPoint(buffer_, plane, x, y, bitdepth); + const ptrdiff_t dest_stride = buffer_[plane].columns(); // In bytes. + for (int index = 0; index < 1 + static_cast<int>(is_compound); ++index) { + const ReferenceFrameType reference_type = + bp_reference.reference_frame[index]; + GlobalMotion global_motion_params = + frame_header_.global_motion[reference_type]; + GlobalMotion* warp_params = + GetWarpParams(block, plane, prediction_width, prediction_height, + prediction_parameters, reference_type, is_local_valid, + &global_motion_params, local_warp_params); + if (warp_params != nullptr) { + if (!BlockWarpProcess(block, plane, index, x, y, prediction_width, + prediction_height, warp_params, is_compound, + is_inter_intra, dest, dest_stride)) { + return false; + } + } else { + const int reference_index = + prediction_parameters.use_intra_block_copy + ? -1 + : frame_header_.reference_frame_index[reference_type - + kReferenceFrameLast]; + if (!BlockInterPrediction( + block, plane, reference_index, bp_reference.mv.mv[index], x, y, + prediction_width, prediction_height, candidate_row, + candidate_column, block.scratch_buffer->prediction_buffer[index], + is_compound, is_inter_intra, dest, dest_stride)) { + return false; + } + } + } + + const int subsampling_x = subsampling_x_[plane]; + const int subsampling_y = subsampling_y_[plane]; + ptrdiff_t prediction_mask_stride = 0; + const uint8_t* prediction_mask = nullptr; + if (prediction_parameters.compound_prediction_type == + kCompoundPredictionTypeWedge) { + const Array2D<uint8_t>& wedge_mask = + wedge_masks_[GetWedgeBlockSizeIndex(block.size)] + [prediction_parameters.wedge_sign] + [prediction_parameters.wedge_index]; + prediction_mask = wedge_mask[0]; + prediction_mask_stride = wedge_mask.columns(); + } else if (prediction_parameters.compound_prediction_type == + kCompoundPredictionTypeIntra) { + // 7.11.3.13. The inter intra masks are precomputed and stored as a set of + // look up tables. + assert(prediction_parameters.inter_intra_mode < kNumInterIntraModes); + prediction_mask = + kInterIntraMasks[prediction_parameters.inter_intra_mode] + [GetInterIntraMaskLookupIndex(prediction_width)] + [GetInterIntraMaskLookupIndex(prediction_height)]; + prediction_mask_stride = prediction_width; + } else if (prediction_parameters.compound_prediction_type == + kCompoundPredictionTypeDiffWeighted) { + if (plane == kPlaneY) { + assert(prediction_width >= 8); + assert(prediction_height >= 8); + dsp_.weight_mask[FloorLog2(prediction_width) - 3] + [FloorLog2(prediction_height) - 3] + [static_cast<int>(prediction_parameters.mask_is_inverse)]( + block.scratch_buffer->prediction_buffer[0], + block.scratch_buffer->prediction_buffer[1], + block.scratch_buffer->weight_mask, + kMaxSuperBlockSizeInPixels); + } + prediction_mask = block.scratch_buffer->weight_mask; + prediction_mask_stride = kMaxSuperBlockSizeInPixels; + } + + if (is_compound) { + CompoundInterPrediction(block, prediction_mask, prediction_mask_stride, + prediction_width, prediction_height, subsampling_x, + subsampling_y, candidate_row, candidate_column, + dest, dest_stride); + } else if (prediction_parameters.motion_mode == kMotionModeObmc) { + // Obmc mode is allowed only for single reference (!is_compound). + return ObmcPrediction(block, plane, prediction_width, prediction_height); + } else if (is_inter_intra) { + // InterIntra and obmc must be mutually exclusive. + InterIntraPrediction( + block.scratch_buffer->prediction_buffer[0], prediction_mask, + prediction_mask_stride, prediction_parameters, prediction_width, + prediction_height, subsampling_x, subsampling_y, dest, dest_stride); + } + return true; +} + +bool Tile::ObmcBlockPrediction(const Block& block, const MotionVector& mv, + const Plane plane, + const int reference_frame_index, const int width, + const int height, const int x, const int y, + const int candidate_row, + const int candidate_column, + const ObmcDirection blending_direction) { + const int bitdepth = sequence_header_.color_config.bitdepth; + // Obmc's prediction needs to be clipped before blending with above/left + // prediction blocks. + // Obmc prediction is used only when is_compound is false. So it is safe to + // use prediction_buffer[1] as a temporary buffer for the Obmc prediction. + static_assert(sizeof(block.scratch_buffer->prediction_buffer[1]) >= + 64 * 64 * sizeof(uint16_t), + ""); + auto* const obmc_buffer = + reinterpret_cast<uint8_t*>(block.scratch_buffer->prediction_buffer[1]); + const ptrdiff_t obmc_buffer_stride = + (bitdepth == 8) ? width : width * sizeof(uint16_t); + if (!BlockInterPrediction(block, plane, reference_frame_index, mv, x, y, + width, height, candidate_row, candidate_column, + nullptr, false, false, obmc_buffer, + obmc_buffer_stride)) { + return false; + } + + uint8_t* const prediction = GetStartPoint(buffer_, plane, x, y, bitdepth); + const ptrdiff_t prediction_stride = buffer_[plane].columns(); + dsp_.obmc_blend[blending_direction](prediction, prediction_stride, width, + height, obmc_buffer, obmc_buffer_stride); + return true; +} + +bool Tile::ObmcPrediction(const Block& block, const Plane plane, + const int width, const int height) { + const int subsampling_x = subsampling_x_[plane]; + const int subsampling_y = subsampling_y_[plane]; + if (block.top_available[kPlaneY] && + !IsBlockSmallerThan8x8(block.residual_size[plane])) { + const int num_limit = std::min(uint8_t{4}, k4x4WidthLog2[block.size]); + const int column4x4_max = + std::min(block.column4x4 + block.width4x4, frame_header_.columns4x4); + const int candidate_row = block.row4x4 - 1; + const int block_start_y = MultiplyBy4(block.row4x4) >> subsampling_y; + int column4x4 = block.column4x4; + const int prediction_height = std::min(height >> 1, 32 >> subsampling_y); + for (int i = 0, step; i < num_limit && column4x4 < column4x4_max; + column4x4 += step) { + const int candidate_column = column4x4 | 1; + const BlockParameters& bp_top = + *block_parameters_holder_.Find(candidate_row, candidate_column); + const int candidate_block_size = bp_top.size; + step = Clip3(kNum4x4BlocksWide[candidate_block_size], 2, 16); + if (bp_top.reference_frame[0] > kReferenceFrameIntra) { + i++; + const int candidate_reference_frame_index = + frame_header_.reference_frame_index[bp_top.reference_frame[0] - + kReferenceFrameLast]; + const int prediction_width = + std::min(width, MultiplyBy4(step) >> subsampling_x); + if (!ObmcBlockPrediction( + block, bp_top.mv.mv[0], plane, candidate_reference_frame_index, + prediction_width, prediction_height, + MultiplyBy4(column4x4) >> subsampling_x, block_start_y, + candidate_row, candidate_column, kObmcDirectionVertical)) { + return false; + } + } + } + } + + if (block.left_available[kPlaneY]) { + const int num_limit = std::min(uint8_t{4}, k4x4HeightLog2[block.size]); + const int row4x4_max = + std::min(block.row4x4 + block.height4x4, frame_header_.rows4x4); + const int candidate_column = block.column4x4 - 1; + int row4x4 = block.row4x4; + const int block_start_x = MultiplyBy4(block.column4x4) >> subsampling_x; + const int prediction_width = std::min(width >> 1, 32 >> subsampling_x); + for (int i = 0, step; i < num_limit && row4x4 < row4x4_max; + row4x4 += step) { + const int candidate_row = row4x4 | 1; + const BlockParameters& bp_left = + *block_parameters_holder_.Find(candidate_row, candidate_column); + const int candidate_block_size = bp_left.size; + step = Clip3(kNum4x4BlocksHigh[candidate_block_size], 2, 16); + if (bp_left.reference_frame[0] > kReferenceFrameIntra) { + i++; + const int candidate_reference_frame_index = + frame_header_.reference_frame_index[bp_left.reference_frame[0] - + kReferenceFrameLast]; + const int prediction_height = + std::min(height, MultiplyBy4(step) >> subsampling_y); + if (!ObmcBlockPrediction( + block, bp_left.mv.mv[0], plane, candidate_reference_frame_index, + prediction_width, prediction_height, block_start_x, + MultiplyBy4(row4x4) >> subsampling_y, candidate_row, + candidate_column, kObmcDirectionHorizontal)) { + return false; + } + } + } + } + return true; +} + +void Tile::DistanceWeightedPrediction(void* prediction_0, void* prediction_1, + const int width, const int height, + const int candidate_row, + const int candidate_column, uint8_t* dest, + ptrdiff_t dest_stride) { + int distance[2]; + int weight[2]; + for (int reference = 0; reference < 2; ++reference) { + const BlockParameters& bp = + *block_parameters_holder_.Find(candidate_row, candidate_column); + // Note: distance[0] and distance[1] correspond to relative distance + // between current frame and reference frame [1] and [0], respectively. + distance[1 - reference] = std::min( + std::abs(static_cast<int>( + current_frame_.reference_info() + ->relative_distance_from[bp.reference_frame[reference]])), + static_cast<int>(kMaxFrameDistance)); + } + GetDistanceWeights(distance, weight); + + dsp_.distance_weighted_blend(prediction_0, prediction_1, weight[0], weight[1], + width, height, dest, dest_stride); +} + +void Tile::ScaleMotionVector(const MotionVector& mv, const Plane plane, + const int reference_frame_index, const int x, + const int y, int* const start_x, + int* const start_y, int* const step_x, + int* const step_y) { + const int reference_upscaled_width = + (reference_frame_index == -1) + ? frame_header_.upscaled_width + : reference_frames_[reference_frame_index]->upscaled_width(); + const int reference_height = + (reference_frame_index == -1) + ? frame_header_.height + : reference_frames_[reference_frame_index]->frame_height(); + assert(2 * frame_header_.width >= reference_upscaled_width && + 2 * frame_header_.height >= reference_height && + frame_header_.width <= 16 * reference_upscaled_width && + frame_header_.height <= 16 * reference_height); + const bool is_scaled_x = reference_upscaled_width != frame_header_.width; + const bool is_scaled_y = reference_height != frame_header_.height; + const int half_sample = 1 << (kSubPixelBits - 1); + int orig_x = (x << kSubPixelBits) + ((2 * mv.mv[1]) >> subsampling_x_[plane]); + int orig_y = (y << kSubPixelBits) + ((2 * mv.mv[0]) >> subsampling_y_[plane]); + const int rounding_offset = + DivideBy2(1 << (kScaleSubPixelBits - kSubPixelBits)); + if (is_scaled_x) { + const int scale_x = ((reference_upscaled_width << kReferenceScaleShift) + + DivideBy2(frame_header_.width)) / + frame_header_.width; + *step_x = RightShiftWithRoundingSigned( + scale_x, kReferenceScaleShift - kScaleSubPixelBits); + orig_x += half_sample; + // When frame size is 4k and above, orig_x can be above 16 bits, scale_x can + // be up to 15 bits. So we use int64_t to hold base_x. + const int64_t base_x = static_cast<int64_t>(orig_x) * scale_x - + (half_sample << kReferenceScaleShift); + *start_x = + RightShiftWithRoundingSigned( + base_x, kReferenceScaleShift + kSubPixelBits - kScaleSubPixelBits) + + rounding_offset; + } else { + *step_x = 1 << kScaleSubPixelBits; + *start_x = LeftShift(orig_x, 6) + rounding_offset; + } + if (is_scaled_y) { + const int scale_y = ((reference_height << kReferenceScaleShift) + + DivideBy2(frame_header_.height)) / + frame_header_.height; + *step_y = RightShiftWithRoundingSigned( + scale_y, kReferenceScaleShift - kScaleSubPixelBits); + orig_y += half_sample; + const int64_t base_y = static_cast<int64_t>(orig_y) * scale_y - + (half_sample << kReferenceScaleShift); + *start_y = + RightShiftWithRoundingSigned( + base_y, kReferenceScaleShift + kSubPixelBits - kScaleSubPixelBits) + + rounding_offset; + } else { + *step_y = 1 << kScaleSubPixelBits; + *start_y = LeftShift(orig_y, 6) + rounding_offset; + } +} + +// static. +bool Tile::GetReferenceBlockPosition( + const int reference_frame_index, const bool is_scaled, const int width, + const int height, const int ref_start_x, const int ref_last_x, + const int ref_start_y, const int ref_last_y, const int start_x, + const int start_y, const int step_x, const int step_y, + const int left_border, const int right_border, const int top_border, + const int bottom_border, int* ref_block_start_x, int* ref_block_start_y, + int* ref_block_end_x) { + *ref_block_start_x = GetPixelPositionFromHighScale(start_x, 0, 0); + *ref_block_start_y = GetPixelPositionFromHighScale(start_y, 0, 0); + if (reference_frame_index == -1) { + return false; + } + *ref_block_start_x -= kConvolveBorderLeftTop; + *ref_block_start_y -= kConvolveBorderLeftTop; + *ref_block_end_x = GetPixelPositionFromHighScale(start_x, step_x, width - 1) + + kConvolveBorderRight; + int ref_block_end_y = + GetPixelPositionFromHighScale(start_y, step_y, height - 1) + + kConvolveBorderBottom; + if (is_scaled) { + const int block_height = + (((height - 1) * step_y + (1 << kScaleSubPixelBits) - 1) >> + kScaleSubPixelBits) + + kSubPixelTaps; + ref_block_end_y = *ref_block_start_y + block_height - 1; + } + // Determines if we need to extend beyond the left/right/top/bottom border. + return *ref_block_start_x < (ref_start_x - left_border) || + *ref_block_end_x > (ref_last_x + right_border) || + *ref_block_start_y < (ref_start_y - top_border) || + ref_block_end_y > (ref_last_y + bottom_border); +} + +// Builds a block as the input for convolve, by copying the content of +// reference frame (either a decoded reference frame, or current frame). +// |block_extended_width| is the combined width of the block and its borders. +template <typename Pixel> +void Tile::BuildConvolveBlock( + const Plane plane, const int reference_frame_index, const bool is_scaled, + const int height, const int ref_start_x, const int ref_last_x, + const int ref_start_y, const int ref_last_y, const int step_y, + const int ref_block_start_x, const int ref_block_end_x, + const int ref_block_start_y, uint8_t* block_buffer, + ptrdiff_t convolve_buffer_stride, ptrdiff_t block_extended_width) { + const YuvBuffer* const reference_buffer = + (reference_frame_index == -1) + ? current_frame_.buffer() + : reference_frames_[reference_frame_index]->buffer(); + Array2DView<const Pixel> reference_block( + reference_buffer->height(plane), + reference_buffer->stride(plane) / sizeof(Pixel), + reinterpret_cast<const Pixel*>(reference_buffer->data(plane))); + auto* const block_head = reinterpret_cast<Pixel*>(block_buffer); + convolve_buffer_stride /= sizeof(Pixel); + int block_height = height + kConvolveBorderLeftTop + kConvolveBorderBottom; + if (is_scaled) { + block_height = (((height - 1) * step_y + (1 << kScaleSubPixelBits) - 1) >> + kScaleSubPixelBits) + + kSubPixelTaps; + } + const int copy_start_x = Clip3(ref_block_start_x, ref_start_x, ref_last_x); + const int copy_start_y = Clip3(ref_block_start_y, ref_start_y, ref_last_y); + const int copy_end_x = Clip3(ref_block_end_x, copy_start_x, ref_last_x); + const int block_width = copy_end_x - copy_start_x + 1; + const bool extend_left = ref_block_start_x < ref_start_x; + const bool extend_right = ref_block_end_x > ref_last_x; + const bool out_of_left = copy_start_x > ref_block_end_x; + const bool out_of_right = copy_end_x < ref_block_start_x; + if (out_of_left || out_of_right) { + const int ref_x = out_of_left ? copy_start_x : copy_end_x; + Pixel* buf_ptr = block_head; + for (int y = 0, ref_y = copy_start_y; y < block_height; ++y) { + Memset(buf_ptr, reference_block[ref_y][ref_x], block_extended_width); + if (ref_block_start_y + y >= ref_start_y && + ref_block_start_y + y < ref_last_y) { + ++ref_y; + } + buf_ptr += convolve_buffer_stride; + } + } else { + Pixel* buf_ptr = block_head; + const int left_width = copy_start_x - ref_block_start_x; + for (int y = 0, ref_y = copy_start_y; y < block_height; ++y) { + if (extend_left) { + Memset(buf_ptr, reference_block[ref_y][copy_start_x], left_width); + } + memcpy(buf_ptr + left_width, &reference_block[ref_y][copy_start_x], + block_width * sizeof(Pixel)); + if (extend_right) { + Memset(buf_ptr + left_width + block_width, + reference_block[ref_y][copy_end_x], + block_extended_width - left_width - block_width); + } + if (ref_block_start_y + y >= ref_start_y && + ref_block_start_y + y < ref_last_y) { + ++ref_y; + } + buf_ptr += convolve_buffer_stride; + } + } +} + +bool Tile::BlockInterPrediction( + const Block& block, const Plane plane, const int reference_frame_index, + const MotionVector& mv, const int x, const int y, const int width, + const int height, const int candidate_row, const int candidate_column, + uint16_t* const prediction, const bool is_compound, + const bool is_inter_intra, uint8_t* const dest, + const ptrdiff_t dest_stride) { + const BlockParameters& bp = + *block_parameters_holder_.Find(candidate_row, candidate_column); + int start_x; + int start_y; + int step_x; + int step_y; + ScaleMotionVector(mv, plane, reference_frame_index, x, y, &start_x, &start_y, + &step_x, &step_y); + const int horizontal_filter_index = bp.interpolation_filter[1]; + const int vertical_filter_index = bp.interpolation_filter[0]; + const int subsampling_x = subsampling_x_[plane]; + const int subsampling_y = subsampling_y_[plane]; + // reference_frame_index equal to -1 indicates using current frame as + // reference. + const YuvBuffer* const reference_buffer = + (reference_frame_index == -1) + ? current_frame_.buffer() + : reference_frames_[reference_frame_index]->buffer(); + const int reference_upscaled_width = + (reference_frame_index == -1) + ? MultiplyBy4(frame_header_.columns4x4) + : reference_frames_[reference_frame_index]->upscaled_width(); + const int reference_height = + (reference_frame_index == -1) + ? MultiplyBy4(frame_header_.rows4x4) + : reference_frames_[reference_frame_index]->frame_height(); + const int ref_start_x = 0; + const int ref_last_x = + SubsampledValue(reference_upscaled_width, subsampling_x) - 1; + const int ref_start_y = 0; + const int ref_last_y = SubsampledValue(reference_height, subsampling_y) - 1; + + const bool is_scaled = (reference_frame_index != -1) && + (frame_header_.width != reference_upscaled_width || + frame_header_.height != reference_height); + const int bitdepth = sequence_header_.color_config.bitdepth; + const int pixel_size = (bitdepth == 8) ? sizeof(uint8_t) : sizeof(uint16_t); + int ref_block_start_x; + int ref_block_start_y; + int ref_block_end_x; + const bool extend_block = GetReferenceBlockPosition( + reference_frame_index, is_scaled, width, height, ref_start_x, ref_last_x, + ref_start_y, ref_last_y, start_x, start_y, step_x, step_y, + reference_buffer->left_border(plane), + reference_buffer->right_border(plane), + reference_buffer->top_border(plane), + reference_buffer->bottom_border(plane), &ref_block_start_x, + &ref_block_start_y, &ref_block_end_x); + + // In frame parallel mode, ensure that the reference block has been decoded + // and available for referencing. + if (reference_frame_index != -1 && frame_parallel_) { + int reference_y_max; + if (is_scaled) { + // TODO(vigneshv): For now, we wait for the entire reference frame to be + // decoded if we are using scaled references. This will eventually be + // fixed. + reference_y_max = reference_height; + } else { + reference_y_max = + std::min(ref_block_start_y + height + kSubPixelTaps, ref_last_y); + // For U and V planes with subsampling, we need to multiply + // reference_y_max by 2 since we only track the progress of Y planes. + reference_y_max = LeftShift(reference_y_max, subsampling_y); + } + if (reference_frame_progress_cache_[reference_frame_index] < + reference_y_max && + !reference_frames_[reference_frame_index]->WaitUntil( + reference_y_max, + &reference_frame_progress_cache_[reference_frame_index])) { + return false; + } + } + + const uint8_t* block_start = nullptr; + ptrdiff_t convolve_buffer_stride; + if (!extend_block) { + const YuvBuffer* const reference_buffer = + (reference_frame_index == -1) + ? current_frame_.buffer() + : reference_frames_[reference_frame_index]->buffer(); + convolve_buffer_stride = reference_buffer->stride(plane); + if (reference_frame_index == -1 || is_scaled) { + block_start = reference_buffer->data(plane) + + ref_block_start_y * reference_buffer->stride(plane) + + ref_block_start_x * pixel_size; + } else { + block_start = reference_buffer->data(plane) + + (ref_block_start_y + kConvolveBorderLeftTop) * + reference_buffer->stride(plane) + + (ref_block_start_x + kConvolveBorderLeftTop) * pixel_size; + } + } else { + // The block width can be at most 2 times as much as current + // block's width because of scaling. + auto block_extended_width = Align<ptrdiff_t>( + (2 * width + kConvolveBorderLeftTop + kConvolveBorderRight) * + pixel_size, + kMaxAlignment); + convolve_buffer_stride = block.scratch_buffer->convolve_block_buffer_stride; +#if LIBGAV1_MAX_BITDEPTH >= 10 + if (bitdepth > 8) { + BuildConvolveBlock<uint16_t>( + plane, reference_frame_index, is_scaled, height, ref_start_x, + ref_last_x, ref_start_y, ref_last_y, step_y, ref_block_start_x, + ref_block_end_x, ref_block_start_y, + block.scratch_buffer->convolve_block_buffer.get(), + convolve_buffer_stride, block_extended_width); + } else { +#endif + BuildConvolveBlock<uint8_t>( + plane, reference_frame_index, is_scaled, height, ref_start_x, + ref_last_x, ref_start_y, ref_last_y, step_y, ref_block_start_x, + ref_block_end_x, ref_block_start_y, + block.scratch_buffer->convolve_block_buffer.get(), + convolve_buffer_stride, block_extended_width); +#if LIBGAV1_MAX_BITDEPTH >= 10 + } +#endif + block_start = block.scratch_buffer->convolve_block_buffer.get() + + (is_scaled ? 0 + : kConvolveBorderLeftTop * convolve_buffer_stride + + kConvolveBorderLeftTop * pixel_size); + } + + void* const output = + (is_compound || is_inter_intra) ? prediction : static_cast<void*>(dest); + ptrdiff_t output_stride = (is_compound || is_inter_intra) + ? /*prediction_stride=*/width + : dest_stride; +#if LIBGAV1_MAX_BITDEPTH >= 10 + // |is_inter_intra| calculations are written to the |prediction| buffer. + // Unlike the |is_compound| calculations the output is Pixel and not uint16_t. + // convolve_func() expects |output_stride| to be in bytes and not Pixels. + // |prediction_stride| is in units of uint16_t. Adjust |output_stride| to + // account for this. + if (is_inter_intra && sequence_header_.color_config.bitdepth > 8) { + output_stride *= 2; + } +#endif + assert(output != nullptr); + if (is_scaled) { + dsp::ConvolveScaleFunc convolve_func = dsp_.convolve_scale[is_compound]; + assert(convolve_func != nullptr); + + convolve_func(block_start, convolve_buffer_stride, horizontal_filter_index, + vertical_filter_index, start_x, start_y, step_x, step_y, + width, height, output, output_stride); + } else { + const int horizontal_filter_id = (start_x >> 6) & kSubPixelMask; + const int vertical_filter_id = (start_y >> 6) & kSubPixelMask; + + dsp::ConvolveFunc convolve_func = + dsp_.convolve[reference_frame_index == -1][is_compound] + [vertical_filter_id != 0][horizontal_filter_id != 0]; + assert(convolve_func != nullptr); + + convolve_func(block_start, convolve_buffer_stride, horizontal_filter_index, + vertical_filter_index, horizontal_filter_id, + vertical_filter_id, width, height, output, output_stride); + } + return true; +} + +bool Tile::BlockWarpProcess(const Block& block, const Plane plane, + const int index, const int block_start_x, + const int block_start_y, const int width, + const int height, GlobalMotion* const warp_params, + const bool is_compound, const bool is_inter_intra, + uint8_t* const dest, const ptrdiff_t dest_stride) { + assert(width >= 8 && height >= 8); + const BlockParameters& bp = *block.bp; + const int reference_frame_index = + frame_header_.reference_frame_index[bp.reference_frame[index] - + kReferenceFrameLast]; + const uint8_t* const source = + reference_frames_[reference_frame_index]->buffer()->data(plane); + ptrdiff_t source_stride = + reference_frames_[reference_frame_index]->buffer()->stride(plane); + const int source_width = + reference_frames_[reference_frame_index]->buffer()->width(plane); + const int source_height = + reference_frames_[reference_frame_index]->buffer()->height(plane); + uint16_t* const prediction = block.scratch_buffer->prediction_buffer[index]; + + // In frame parallel mode, ensure that the reference block has been decoded + // and available for referencing. + if (frame_parallel_) { + int reference_y_max = -1; + // Find out the maximum y-coordinate for warping. + for (int start_y = block_start_y; start_y < block_start_y + height; + start_y += 8) { + for (int start_x = block_start_x; start_x < block_start_x + width; + start_x += 8) { + const int src_x = (start_x + 4) << subsampling_x_[plane]; + const int src_y = (start_y + 4) << subsampling_y_[plane]; + const int dst_y = src_x * warp_params->params[4] + + src_y * warp_params->params[5] + + warp_params->params[1]; + const int y4 = dst_y >> subsampling_y_[plane]; + const int iy4 = y4 >> kWarpedModelPrecisionBits; + reference_y_max = std::max(iy4 + 8, reference_y_max); + } + } + // For U and V planes with subsampling, we need to multiply reference_y_max + // by 2 since we only track the progress of Y planes. + reference_y_max = LeftShift(reference_y_max, subsampling_y_[plane]); + if (reference_frame_progress_cache_[reference_frame_index] < + reference_y_max && + !reference_frames_[reference_frame_index]->WaitUntil( + reference_y_max, + &reference_frame_progress_cache_[reference_frame_index])) { + return false; + } + } + if (is_compound) { + dsp_.warp_compound(source, source_stride, source_width, source_height, + warp_params->params, subsampling_x_[plane], + subsampling_y_[plane], block_start_x, block_start_y, + width, height, warp_params->alpha, warp_params->beta, + warp_params->gamma, warp_params->delta, prediction, + /*prediction_stride=*/width); + } else { + void* const output = is_inter_intra ? static_cast<void*>(prediction) : dest; + ptrdiff_t output_stride = + is_inter_intra ? /*prediction_stride=*/width : dest_stride; +#if LIBGAV1_MAX_BITDEPTH >= 10 + // |is_inter_intra| calculations are written to the |prediction| buffer. + // Unlike the |is_compound| calculations the output is Pixel and not + // uint16_t. warp_clip() expects |output_stride| to be in bytes and not + // Pixels. |prediction_stride| is in units of uint16_t. Adjust + // |output_stride| to account for this. + if (is_inter_intra && sequence_header_.color_config.bitdepth > 8) { + output_stride *= 2; + } +#endif + dsp_.warp(source, source_stride, source_width, source_height, + warp_params->params, subsampling_x_[plane], subsampling_y_[plane], + block_start_x, block_start_y, width, height, warp_params->alpha, + warp_params->beta, warp_params->gamma, warp_params->delta, output, + output_stride); + } + return true; +} + +} // namespace libgav1 |