diff options
author | qinxialei <xialeiqin@gmail.com> | 2020-10-29 11:26:59 +0800 |
---|---|---|
committer | qinxialei <xialeiqin@gmail.com> | 2020-10-29 11:26:59 +0800 |
commit | e8d277081293b6fb2a5d469616baaa7a06f52496 (patch) | |
tree | 1179bb07d3927d1837d4a90bd81b2034c4c696a9 /src/tile | |
download | libgav1-e8d277081293b6fb2a5d469616baaa7a06f52496.tar.gz libgav1-e8d277081293b6fb2a5d469616baaa7a06f52496.tar.bz2 libgav1-e8d277081293b6fb2a5d469616baaa7a06f52496.zip |
Import Upstream version 0.16.0
Diffstat (limited to 'src/tile')
-rw-r--r-- | src/tile/bitstream/mode_info.cc | 1303 | ||||
-rw-r--r-- | src/tile/bitstream/palette.cc | 319 | ||||
-rw-r--r-- | src/tile/bitstream/partition.cc | 148 | ||||
-rw-r--r-- | src/tile/bitstream/transform_size.cc | 222 | ||||
-rw-r--r-- | src/tile/prediction.cc | 1361 | ||||
-rw-r--r-- | src/tile/tile.cc | 2573 |
6 files changed, 5926 insertions, 0 deletions
diff --git a/src/tile/bitstream/mode_info.cc b/src/tile/bitstream/mode_info.cc new file mode 100644 index 0000000..0b22eb0 --- /dev/null +++ b/src/tile/bitstream/mode_info.cc @@ -0,0 +1,1303 @@ +// Copyright 2019 The libgav1 Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <algorithm> +#include <array> +#include <cassert> +#include <cstdint> +#include <cstdlib> +#include <cstring> +#include <memory> +#include <vector> + +#include "src/buffer_pool.h" +#include "src/dsp/constants.h" +#include "src/motion_vector.h" +#include "src/obu_parser.h" +#include "src/prediction_mask.h" +#include "src/symbol_decoder_context.h" +#include "src/tile.h" +#include "src/utils/array_2d.h" +#include "src/utils/bit_mask_set.h" +#include "src/utils/block_parameters_holder.h" +#include "src/utils/common.h" +#include "src/utils/constants.h" +#include "src/utils/entropy_decoder.h" +#include "src/utils/logging.h" +#include "src/utils/segmentation.h" +#include "src/utils/segmentation_map.h" +#include "src/utils/types.h" + +namespace libgav1 { +namespace { + +constexpr int kDeltaQSmall = 3; +constexpr int kDeltaLfSmall = 3; + +constexpr uint8_t kIntraYModeContext[kIntraPredictionModesY] = { + 0, 1, 2, 3, 4, 4, 4, 4, 3, 0, 1, 2, 0}; + +constexpr uint8_t kSizeGroup[kMaxBlockSizes] = { + 0, 0, 0, 0, 1, 1, 1, 0, 1, 2, 2, 2, 1, 2, 3, 3, 2, 3, 3, 3, 3, 3}; + +constexpr int kCompoundModeNewMvContexts = 5; +constexpr uint8_t kCompoundModeContextMap[3][kCompoundModeNewMvContexts] = { + {0, 1, 1, 1, 1}, {1, 2, 3, 4, 4}, {4, 4, 5, 6, 7}}; + +enum CflSign : uint8_t { + kCflSignZero = 0, + kCflSignNegative = 1, + kCflSignPositive = 2 +}; + +// For each possible value of the combined signs (which is read from the +// bitstream), this array stores the following: sign_u, sign_v, alpha_u_context, +// alpha_v_context. Only positive entries are used. Entry at index i is computed +// as follows: +// sign_u = i / 3 +// sign_v = i % 3 +// alpha_u_context = i - 2 +// alpha_v_context = (sign_v - 1) * 3 + sign_u +constexpr int8_t kCflAlphaLookup[kCflAlphaSignsSymbolCount][4] = { + {0, 1, -2, 0}, {0, 2, -1, 3}, {1, 0, 0, -2}, {1, 1, 1, 1}, + {1, 2, 2, 4}, {2, 0, 3, -1}, {2, 1, 4, 2}, {2, 2, 5, 5}, +}; + +constexpr BitMaskSet kPredictionModeHasNearMvMask(kPredictionModeNearMv, + kPredictionModeNearNearMv, + kPredictionModeNearNewMv, + kPredictionModeNewNearMv); + +constexpr BitMaskSet kIsInterIntraModeAllowedMask(kBlock8x8, kBlock8x16, + kBlock16x8, kBlock16x16, + kBlock16x32, kBlock32x16, + kBlock32x32); + +bool IsBackwardReference(ReferenceFrameType type) { + return type >= kReferenceFrameBackward && type <= kReferenceFrameAlternate; +} + +bool IsSameDirectionReferencePair(ReferenceFrameType type1, + ReferenceFrameType type2) { + return (type1 >= kReferenceFrameBackward) == + (type2 >= kReferenceFrameBackward); +} + +// This is called neg_deinterleave() in the spec. +int DecodeSegmentId(int diff, int reference, int max) { + if (reference == 0) return diff; + if (reference >= max - 1) return max - diff - 1; + const int value = ((diff & 1) != 0) ? reference + ((diff + 1) >> 1) + : reference - (diff >> 1); + const int reference2 = (reference << 1); + if (reference2 < max) { + return (diff <= reference2) ? value : diff; + } + return (diff <= ((max - reference - 1) << 1)) ? value : max - (diff + 1); +} + +// This is called DrlCtxStack in section 7.10.2.14 of the spec. +// In the spec, the weights of all the nearest mvs are incremented by a bonus +// weight which is larger than any natural weight, and the weights of the mvs +// are compared with this bonus weight to determine their contexts. We replace +// this procedure by introducing |nearest_mv_count| in PredictionParameters, +// which records the count of the nearest mvs. Since all the nearest mvs are in +// the beginning of the mv stack, the |index| of a mv in the mv stack can be +// compared with |nearest_mv_count| to get that mv's context. +int GetRefMvIndexContext(int nearest_mv_count, int index) { + if (index + 1 < nearest_mv_count) { + return 0; + } + if (index + 1 == nearest_mv_count) { + return 1; + } + return 2; +} + +// Returns true if both the width and height of the block is less than 64. +bool IsBlockDimensionLessThan64(BlockSize size) { + return size <= kBlock32x32 && size != kBlock16x64; +} + +int GetUseCompoundReferenceContext(const Tile::Block& block) { + if (block.top_available[kPlaneY] && block.left_available[kPlaneY]) { + if (block.IsTopSingle() && block.IsLeftSingle()) { + return static_cast<int>(IsBackwardReference(block.TopReference(0))) ^ + static_cast<int>(IsBackwardReference(block.LeftReference(0))); + } + if (block.IsTopSingle()) { + return 2 + static_cast<int>(IsBackwardReference(block.TopReference(0)) || + block.IsTopIntra()); + } + if (block.IsLeftSingle()) { + return 2 + static_cast<int>(IsBackwardReference(block.LeftReference(0)) || + block.IsLeftIntra()); + } + return 4; + } + if (block.top_available[kPlaneY]) { + return block.IsTopSingle() + ? static_cast<int>(IsBackwardReference(block.TopReference(0))) + : 3; + } + if (block.left_available[kPlaneY]) { + return block.IsLeftSingle() + ? static_cast<int>(IsBackwardReference(block.LeftReference(0))) + : 3; + } + return 1; +} + +// Calculates count0 by calling block.CountReferences() on the frame types from +// type0_start to type0_end, inclusive, and summing the results. +// Calculates count1 by calling block.CountReferences() on the frame types from +// type1_start to type1_end, inclusive, and summing the results. +// Compares count0 with count1 and returns 0, 1 or 2. +// +// See count_refs and ref_count_ctx in 8.3.2. +int GetReferenceContext(const Tile::Block& block, + ReferenceFrameType type0_start, + ReferenceFrameType type0_end, + ReferenceFrameType type1_start, + ReferenceFrameType type1_end) { + int count0 = 0; + int count1 = 0; + for (int type = type0_start; type <= type0_end; ++type) { + count0 += block.CountReferences(static_cast<ReferenceFrameType>(type)); + } + for (int type = type1_start; type <= type1_end; ++type) { + count1 += block.CountReferences(static_cast<ReferenceFrameType>(type)); + } + return (count0 < count1) ? 0 : (count0 == count1 ? 1 : 2); +} + +} // namespace + +bool Tile::ReadSegmentId(const Block& block) { + int top_left = -1; + if (block.top_available[kPlaneY] && block.left_available[kPlaneY]) { + top_left = + block_parameters_holder_.Find(block.row4x4 - 1, block.column4x4 - 1) + ->segment_id; + } + int top = -1; + if (block.top_available[kPlaneY]) { + top = block.bp_top->segment_id; + } + int left = -1; + if (block.left_available[kPlaneY]) { + left = block.bp_left->segment_id; + } + int pred; + if (top == -1) { + pred = (left == -1) ? 0 : left; + } else if (left == -1) { + pred = top; + } else { + pred = (top_left == top) ? top : left; + } + BlockParameters& bp = *block.bp; + if (bp.skip) { + bp.segment_id = pred; + return true; + } + int context = 0; + if (top_left < 0) { + context = 0; + } else if (top_left == top && top_left == left) { + context = 2; + } else if (top_left == top || top_left == left || top == left) { + context = 1; + } + uint16_t* const segment_id_cdf = + symbol_decoder_context_.segment_id_cdf[context]; + const int encoded_segment_id = + reader_.ReadSymbol<kMaxSegments>(segment_id_cdf); + bp.segment_id = + DecodeSegmentId(encoded_segment_id, pred, + frame_header_.segmentation.last_active_segment_id + 1); + // Check the bitstream conformance requirement in Section 6.10.8 of the spec. + if (bp.segment_id < 0 || + bp.segment_id > frame_header_.segmentation.last_active_segment_id) { + LIBGAV1_DLOG( + ERROR, + "Corrupted segment_ids: encoded %d, last active %d, postprocessed %d", + encoded_segment_id, frame_header_.segmentation.last_active_segment_id, + bp.segment_id); + return false; + } + return true; +} + +bool Tile::ReadIntraSegmentId(const Block& block) { + BlockParameters& bp = *block.bp; + if (!frame_header_.segmentation.enabled) { + bp.segment_id = 0; + return true; + } + return ReadSegmentId(block); +} + +void Tile::ReadSkip(const Block& block) { + BlockParameters& bp = *block.bp; + if (frame_header_.segmentation.segment_id_pre_skip && + frame_header_.segmentation.FeatureActive(bp.segment_id, + kSegmentFeatureSkip)) { + bp.skip = true; + return; + } + int context = 0; + if (block.top_available[kPlaneY] && block.bp_top->skip) { + ++context; + } + if (block.left_available[kPlaneY] && block.bp_left->skip) { + ++context; + } + uint16_t* const skip_cdf = symbol_decoder_context_.skip_cdf[context]; + bp.skip = reader_.ReadSymbol(skip_cdf); +} + +void Tile::ReadSkipMode(const Block& block) { + BlockParameters& bp = *block.bp; + if (!frame_header_.skip_mode_present || + frame_header_.segmentation.FeatureActive(bp.segment_id, + kSegmentFeatureSkip) || + frame_header_.segmentation.FeatureActive(bp.segment_id, + kSegmentFeatureReferenceFrame) || + frame_header_.segmentation.FeatureActive(bp.segment_id, + kSegmentFeatureGlobalMv) || + IsBlockDimension4(block.size)) { + bp.skip_mode = false; + return; + } + const int context = + (block.left_available[kPlaneY] + ? static_cast<int>(block.bp_left->skip_mode) + : 0) + + (block.top_available[kPlaneY] ? static_cast<int>(block.bp_top->skip_mode) + : 0); + bp.skip_mode = + reader_.ReadSymbol(symbol_decoder_context_.skip_mode_cdf[context]); +} + +void Tile::ReadCdef(const Block& block) { + BlockParameters& bp = *block.bp; + if (bp.skip || frame_header_.coded_lossless || + !sequence_header_.enable_cdef || frame_header_.allow_intrabc) { + return; + } + const int cdef_size4x4 = kNum4x4BlocksWide[kBlock64x64]; + const int cdef_mask4x4 = ~(cdef_size4x4 - 1); + const int row4x4 = block.row4x4 & cdef_mask4x4; + const int column4x4 = block.column4x4 & cdef_mask4x4; + const int row = DivideBy16(row4x4); + const int column = DivideBy16(column4x4); + if (cdef_index_[row][column] == -1) { + cdef_index_[row][column] = + frame_header_.cdef.bits > 0 + ? static_cast<int16_t>(reader_.ReadLiteral(frame_header_.cdef.bits)) + : 0; + for (int i = row4x4; i < row4x4 + block.height4x4; i += cdef_size4x4) { + for (int j = column4x4; j < column4x4 + block.width4x4; + j += cdef_size4x4) { + cdef_index_[DivideBy16(i)][DivideBy16(j)] = cdef_index_[row][column]; + } + } + } +} + +int Tile::ReadAndClipDelta(uint16_t* const cdf, int delta_small, int scale, + int min_value, int max_value, int value) { + int abs = reader_.ReadSymbol<kDeltaSymbolCount>(cdf); + if (abs == delta_small) { + const int remaining_bit_count = + static_cast<int>(reader_.ReadLiteral(3)) + 1; + const int abs_remaining_bits = + static_cast<int>(reader_.ReadLiteral(remaining_bit_count)); + abs = abs_remaining_bits + (1 << remaining_bit_count) + 1; + } + if (abs != 0) { + const bool sign = static_cast<bool>(reader_.ReadBit()); + const int scaled_abs = abs << scale; + const int reduced_delta = sign ? -scaled_abs : scaled_abs; + value += reduced_delta; + value = Clip3(value, min_value, max_value); + } + return value; +} + +void Tile::ReadQuantizerIndexDelta(const Block& block) { + assert(read_deltas_); + BlockParameters& bp = *block.bp; + if ((block.size == SuperBlockSize() && bp.skip)) { + return; + } + current_quantizer_index_ = + ReadAndClipDelta(symbol_decoder_context_.delta_q_cdf, kDeltaQSmall, + frame_header_.delta_q.scale, kMinLossyQuantizer, + kMaxQuantizer, current_quantizer_index_); +} + +void Tile::ReadLoopFilterDelta(const Block& block) { + assert(read_deltas_); + BlockParameters& bp = *block.bp; + if (!frame_header_.delta_lf.present || + (block.size == SuperBlockSize() && bp.skip)) { + return; + } + int frame_lf_count = 1; + if (frame_header_.delta_lf.multi) { + frame_lf_count = kFrameLfCount - (PlaneCount() > 1 ? 0 : 2); + } + bool recompute_deblock_filter_levels = false; + for (int i = 0; i < frame_lf_count; ++i) { + uint16_t* const delta_lf_abs_cdf = + frame_header_.delta_lf.multi + ? symbol_decoder_context_.delta_lf_multi_cdf[i] + : symbol_decoder_context_.delta_lf_cdf; + const int8_t old_delta_lf = delta_lf_[i]; + delta_lf_[i] = ReadAndClipDelta( + delta_lf_abs_cdf, kDeltaLfSmall, frame_header_.delta_lf.scale, + -kMaxLoopFilterValue, kMaxLoopFilterValue, delta_lf_[i]); + recompute_deblock_filter_levels = + recompute_deblock_filter_levels || (old_delta_lf != delta_lf_[i]); + } + delta_lf_all_zero_ = + (delta_lf_[0] | delta_lf_[1] | delta_lf_[2] | delta_lf_[3]) == 0; + if (!delta_lf_all_zero_ && recompute_deblock_filter_levels) { + post_filter_.ComputeDeblockFilterLevels(delta_lf_, deblock_filter_levels_); + } +} + +void Tile::ReadPredictionModeY(const Block& block, bool intra_y_mode) { + uint16_t* cdf; + if (intra_y_mode) { + const PredictionMode top_mode = + block.top_available[kPlaneY] ? block.bp_top->y_mode : kPredictionModeDc; + const PredictionMode left_mode = block.left_available[kPlaneY] + ? block.bp_left->y_mode + : kPredictionModeDc; + const int top_context = kIntraYModeContext[top_mode]; + const int left_context = kIntraYModeContext[left_mode]; + cdf = symbol_decoder_context_ + .intra_frame_y_mode_cdf[top_context][left_context]; + } else { + cdf = symbol_decoder_context_.y_mode_cdf[kSizeGroup[block.size]]; + } + block.bp->y_mode = static_cast<PredictionMode>( + reader_.ReadSymbol<kIntraPredictionModesY>(cdf)); +} + +void Tile::ReadIntraAngleInfo(const Block& block, PlaneType plane_type) { + BlockParameters& bp = *block.bp; + PredictionParameters& prediction_parameters = + *block.bp->prediction_parameters; + prediction_parameters.angle_delta[plane_type] = 0; + const PredictionMode mode = + (plane_type == kPlaneTypeY) ? bp.y_mode : bp.uv_mode; + if (IsBlockSmallerThan8x8(block.size) || !IsDirectionalMode(mode)) return; + uint16_t* const cdf = + symbol_decoder_context_.angle_delta_cdf[mode - kPredictionModeVertical]; + prediction_parameters.angle_delta[plane_type] = + reader_.ReadSymbol<kAngleDeltaSymbolCount>(cdf); + prediction_parameters.angle_delta[plane_type] -= kMaxAngleDelta; +} + +void Tile::ReadCflAlpha(const Block& block) { + const int signs = reader_.ReadSymbol<kCflAlphaSignsSymbolCount>( + symbol_decoder_context_.cfl_alpha_signs_cdf); + const int8_t* const cfl_lookup = kCflAlphaLookup[signs]; + const auto sign_u = static_cast<CflSign>(cfl_lookup[0]); + const auto sign_v = static_cast<CflSign>(cfl_lookup[1]); + PredictionParameters& prediction_parameters = + *block.bp->prediction_parameters; + prediction_parameters.cfl_alpha_u = 0; + if (sign_u != kCflSignZero) { + assert(cfl_lookup[2] >= 0); + prediction_parameters.cfl_alpha_u = + reader_.ReadSymbol<kCflAlphaSymbolCount>( + symbol_decoder_context_.cfl_alpha_cdf[cfl_lookup[2]]) + + 1; + if (sign_u == kCflSignNegative) prediction_parameters.cfl_alpha_u *= -1; + } + prediction_parameters.cfl_alpha_v = 0; + if (sign_v != kCflSignZero) { + assert(cfl_lookup[3] >= 0); + prediction_parameters.cfl_alpha_v = + reader_.ReadSymbol<kCflAlphaSymbolCount>( + symbol_decoder_context_.cfl_alpha_cdf[cfl_lookup[3]]) + + 1; + if (sign_v == kCflSignNegative) prediction_parameters.cfl_alpha_v *= -1; + } +} + +void Tile::ReadPredictionModeUV(const Block& block) { + BlockParameters& bp = *block.bp; + bool chroma_from_luma_allowed; + if (frame_header_.segmentation.lossless[bp.segment_id]) { + chroma_from_luma_allowed = block.residual_size[kPlaneU] == kBlock4x4; + } else { + chroma_from_luma_allowed = IsBlockDimensionLessThan64(block.size); + } + uint16_t* const cdf = + symbol_decoder_context_ + .uv_mode_cdf[static_cast<int>(chroma_from_luma_allowed)][bp.y_mode]; + if (chroma_from_luma_allowed) { + bp.uv_mode = static_cast<PredictionMode>( + reader_.ReadSymbol<kIntraPredictionModesUV>(cdf)); + } else { + bp.uv_mode = static_cast<PredictionMode>( + reader_.ReadSymbol<kIntraPredictionModesUV - 1>(cdf)); + } +} + +int Tile::ReadMotionVectorComponent(const Block& block, const int component) { + const int context = + static_cast<int>(block.bp->prediction_parameters->use_intra_block_copy); + const bool sign = reader_.ReadSymbol( + symbol_decoder_context_.mv_sign_cdf[component][context]); + const int mv_class = reader_.ReadSymbol<kMvClassSymbolCount>( + symbol_decoder_context_.mv_class_cdf[component][context]); + int magnitude = 1; + int value; + uint16_t* fraction_cdf; + uint16_t* precision_cdf; + if (mv_class == 0) { + value = static_cast<int>(reader_.ReadSymbol( + symbol_decoder_context_.mv_class0_bit_cdf[component][context])); + fraction_cdf = symbol_decoder_context_ + .mv_class0_fraction_cdf[component][context][value]; + precision_cdf = symbol_decoder_context_ + .mv_class0_high_precision_cdf[component][context]; + } else { + assert(mv_class <= kMvBitSymbolCount); + value = 0; + for (int i = 0; i < mv_class; ++i) { + const int bit = static_cast<int>(reader_.ReadSymbol( + symbol_decoder_context_.mv_bit_cdf[component][context][i])); + value |= bit << i; + } + magnitude += 2 << (mv_class + 2); + fraction_cdf = symbol_decoder_context_.mv_fraction_cdf[component][context]; + precision_cdf = + symbol_decoder_context_.mv_high_precision_cdf[component][context]; + } + const int fraction = + (frame_header_.force_integer_mv == 0) + ? reader_.ReadSymbol<kMvFractionSymbolCount>(fraction_cdf) + : 3; + const int precision = + frame_header_.allow_high_precision_mv + ? static_cast<int>(reader_.ReadSymbol(precision_cdf)) + : 1; + magnitude += (value << 3) | (fraction << 1) | precision; + return sign ? -magnitude : magnitude; +} + +void Tile::ReadMotionVector(const Block& block, int index) { + BlockParameters& bp = *block.bp; + const int context = + static_cast<int>(block.bp->prediction_parameters->use_intra_block_copy); + const auto mv_joint = + static_cast<MvJointType>(reader_.ReadSymbol<kNumMvJointTypes>( + symbol_decoder_context_.mv_joint_cdf[context])); + if (mv_joint == kMvJointTypeHorizontalZeroVerticalNonZero || + mv_joint == kMvJointTypeNonZero) { + bp.mv.mv[index].mv[0] = ReadMotionVectorComponent(block, 0); + } + if (mv_joint == kMvJointTypeHorizontalNonZeroVerticalZero || + mv_joint == kMvJointTypeNonZero) { + bp.mv.mv[index].mv[1] = ReadMotionVectorComponent(block, 1); + } +} + +void Tile::ReadFilterIntraModeInfo(const Block& block) { + BlockParameters& bp = *block.bp; + PredictionParameters& prediction_parameters = + *block.bp->prediction_parameters; + prediction_parameters.use_filter_intra = false; + if (!sequence_header_.enable_filter_intra || bp.y_mode != kPredictionModeDc || + bp.palette_mode_info.size[kPlaneTypeY] != 0 || + !IsBlockDimensionLessThan64(block.size)) { + return; + } + prediction_parameters.use_filter_intra = reader_.ReadSymbol( + symbol_decoder_context_.use_filter_intra_cdf[block.size]); + if (prediction_parameters.use_filter_intra) { + prediction_parameters.filter_intra_mode = static_cast<FilterIntraPredictor>( + reader_.ReadSymbol<kNumFilterIntraPredictors>( + symbol_decoder_context_.filter_intra_mode_cdf)); + } +} + +bool Tile::DecodeIntraModeInfo(const Block& block) { + BlockParameters& bp = *block.bp; + bp.skip = false; + if (frame_header_.segmentation.segment_id_pre_skip && + !ReadIntraSegmentId(block)) { + return false; + } + bp.skip_mode = false; + ReadSkip(block); + if (!frame_header_.segmentation.segment_id_pre_skip && + !ReadIntraSegmentId(block)) { + return false; + } + ReadCdef(block); + if (read_deltas_) { + ReadQuantizerIndexDelta(block); + ReadLoopFilterDelta(block); + read_deltas_ = false; + } + PredictionParameters& prediction_parameters = + *block.bp->prediction_parameters; + prediction_parameters.use_intra_block_copy = false; + if (frame_header_.allow_intrabc) { + prediction_parameters.use_intra_block_copy = + reader_.ReadSymbol(symbol_decoder_context_.intra_block_copy_cdf); + } + if (prediction_parameters.use_intra_block_copy) { + bp.is_inter = true; + bp.reference_frame[0] = kReferenceFrameIntra; + bp.reference_frame[1] = kReferenceFrameNone; + bp.y_mode = kPredictionModeDc; + bp.uv_mode = kPredictionModeDc; + prediction_parameters.motion_mode = kMotionModeSimple; + prediction_parameters.compound_prediction_type = + kCompoundPredictionTypeAverage; + bp.palette_mode_info.size[kPlaneTypeY] = 0; + bp.palette_mode_info.size[kPlaneTypeUV] = 0; + bp.interpolation_filter[0] = kInterpolationFilterBilinear; + bp.interpolation_filter[1] = kInterpolationFilterBilinear; + MvContexts dummy_mode_contexts; + FindMvStack(block, /*is_compound=*/false, &dummy_mode_contexts); + return AssignIntraMv(block); + } + bp.is_inter = false; + return ReadIntraBlockModeInfo(block, /*intra_y_mode=*/true); +} + +int8_t Tile::ComputePredictedSegmentId(const Block& block) const { + // If prev_segment_ids_ is null, treat it as if it pointed to a segmentation + // map containing all 0s. + if (prev_segment_ids_ == nullptr) return 0; + + const int x_limit = std::min(frame_header_.columns4x4 - block.column4x4, + static_cast<int>(block.width4x4)); + const int y_limit = std::min(frame_header_.rows4x4 - block.row4x4, + static_cast<int>(block.height4x4)); + int8_t id = 7; + for (int y = 0; y < y_limit; ++y) { + for (int x = 0; x < x_limit; ++x) { + const int8_t prev_segment_id = + prev_segment_ids_->segment_id(block.row4x4 + y, block.column4x4 + x); + id = std::min(id, prev_segment_id); + } + } + return id; +} + +bool Tile::ReadInterSegmentId(const Block& block, bool pre_skip) { + BlockParameters& bp = *block.bp; + if (!frame_header_.segmentation.enabled) { + bp.segment_id = 0; + return true; + } + if (!frame_header_.segmentation.update_map) { + bp.segment_id = ComputePredictedSegmentId(block); + return true; + } + if (pre_skip) { + if (!frame_header_.segmentation.segment_id_pre_skip) { + bp.segment_id = 0; + return true; + } + } else if (bp.skip) { + bp.use_predicted_segment_id = false; + return ReadSegmentId(block); + } + if (frame_header_.segmentation.temporal_update) { + const int context = + (block.left_available[kPlaneY] + ? static_cast<int>(block.bp_left->use_predicted_segment_id) + : 0) + + (block.top_available[kPlaneY] + ? static_cast<int>(block.bp_top->use_predicted_segment_id) + : 0); + bp.use_predicted_segment_id = reader_.ReadSymbol( + symbol_decoder_context_.use_predicted_segment_id_cdf[context]); + if (bp.use_predicted_segment_id) { + bp.segment_id = ComputePredictedSegmentId(block); + return true; + } + } + return ReadSegmentId(block); +} + +void Tile::ReadIsInter(const Block& block) { + BlockParameters& bp = *block.bp; + if (bp.skip_mode) { + bp.is_inter = true; + return; + } + if (frame_header_.segmentation.FeatureActive(bp.segment_id, + kSegmentFeatureReferenceFrame)) { + bp.is_inter = + frame_header_.segmentation + .feature_data[bp.segment_id][kSegmentFeatureReferenceFrame] != + kReferenceFrameIntra; + return; + } + if (frame_header_.segmentation.FeatureActive(bp.segment_id, + kSegmentFeatureGlobalMv)) { + bp.is_inter = true; + return; + } + int context = 0; + if (block.top_available[kPlaneY] && block.left_available[kPlaneY]) { + context = (block.IsTopIntra() && block.IsLeftIntra()) + ? 3 + : static_cast<int>(block.IsTopIntra() || block.IsLeftIntra()); + } else if (block.top_available[kPlaneY] || block.left_available[kPlaneY]) { + context = 2 * static_cast<int>(block.top_available[kPlaneY] + ? block.IsTopIntra() + : block.IsLeftIntra()); + } + bp.is_inter = + reader_.ReadSymbol(symbol_decoder_context_.is_inter_cdf[context]); +} + +bool Tile::ReadIntraBlockModeInfo(const Block& block, bool intra_y_mode) { + BlockParameters& bp = *block.bp; + bp.reference_frame[0] = kReferenceFrameIntra; + bp.reference_frame[1] = kReferenceFrameNone; + ReadPredictionModeY(block, intra_y_mode); + ReadIntraAngleInfo(block, kPlaneTypeY); + if (block.HasChroma()) { + ReadPredictionModeUV(block); + if (bp.uv_mode == kPredictionModeChromaFromLuma) { + ReadCflAlpha(block); + } + ReadIntraAngleInfo(block, kPlaneTypeUV); + } + ReadPaletteModeInfo(block); + ReadFilterIntraModeInfo(block); + return true; +} + +CompoundReferenceType Tile::ReadCompoundReferenceType(const Block& block) { + // compound and inter. + const bool top_comp_inter = block.top_available[kPlaneY] && + !block.IsTopIntra() && !block.IsTopSingle(); + const bool left_comp_inter = block.left_available[kPlaneY] && + !block.IsLeftIntra() && !block.IsLeftSingle(); + // unidirectional compound. + const bool top_uni_comp = + top_comp_inter && IsSameDirectionReferencePair(block.TopReference(0), + block.TopReference(1)); + const bool left_uni_comp = + left_comp_inter && IsSameDirectionReferencePair(block.LeftReference(0), + block.LeftReference(1)); + int context; + if (block.top_available[kPlaneY] && !block.IsTopIntra() && + block.left_available[kPlaneY] && !block.IsLeftIntra()) { + const int same_direction = static_cast<int>(IsSameDirectionReferencePair( + block.TopReference(0), block.LeftReference(0))); + if (!top_comp_inter && !left_comp_inter) { + context = 1 + MultiplyBy2(same_direction); + } else if (!top_comp_inter) { + context = left_uni_comp ? 3 + same_direction : 1; + } else if (!left_comp_inter) { + context = top_uni_comp ? 3 + same_direction : 1; + } else { + if (!top_uni_comp && !left_uni_comp) { + context = 0; + } else if (!top_uni_comp || !left_uni_comp) { + context = 2; + } else { + context = 3 + static_cast<int>( + (block.TopReference(0) == kReferenceFrameBackward) == + (block.LeftReference(0) == kReferenceFrameBackward)); + } + } + } else if (block.top_available[kPlaneY] && block.left_available[kPlaneY]) { + if (top_comp_inter) { + context = 1 + MultiplyBy2(static_cast<int>(top_uni_comp)); + } else if (left_comp_inter) { + context = 1 + MultiplyBy2(static_cast<int>(left_uni_comp)); + } else { + context = 2; + } + } else if (top_comp_inter) { + context = MultiplyBy4(static_cast<int>(top_uni_comp)); + } else if (left_comp_inter) { + context = MultiplyBy4(static_cast<int>(left_uni_comp)); + } else { + context = 2; + } + return static_cast<CompoundReferenceType>(reader_.ReadSymbol( + symbol_decoder_context_.compound_reference_type_cdf[context])); +} + +template <bool is_single, bool is_backward, int index> +uint16_t* Tile::GetReferenceCdf( + const Block& block, + CompoundReferenceType type /*= kNumCompoundReferenceTypes*/) { + int context = 0; + if ((type == kCompoundReferenceUnidirectional && index == 0) || + (is_single && index == 1)) { + // uni_comp_ref and single_ref_p1. + context = + GetReferenceContext(block, kReferenceFrameLast, kReferenceFrameGolden, + kReferenceFrameBackward, kReferenceFrameAlternate); + } else if (type == kCompoundReferenceUnidirectional && index == 1) { + // uni_comp_ref_p1. + context = + GetReferenceContext(block, kReferenceFrameLast2, kReferenceFrameLast2, + kReferenceFrameLast3, kReferenceFrameGolden); + } else if ((type == kCompoundReferenceUnidirectional && index == 2) || + (type == kCompoundReferenceBidirectional && index == 2) || + (is_single && index == 5)) { + // uni_comp_ref_p2, comp_ref_p2 and single_ref_p5. + context = + GetReferenceContext(block, kReferenceFrameLast3, kReferenceFrameLast3, + kReferenceFrameGolden, kReferenceFrameGolden); + } else if ((type == kCompoundReferenceBidirectional && index == 0) || + (is_single && index == 3)) { + // comp_ref and single_ref_p3. + context = + GetReferenceContext(block, kReferenceFrameLast, kReferenceFrameLast2, + kReferenceFrameLast3, kReferenceFrameGolden); + } else if ((type == kCompoundReferenceBidirectional && index == 1) || + (is_single && index == 4)) { + // comp_ref_p1 and single_ref_p4. + context = + GetReferenceContext(block, kReferenceFrameLast, kReferenceFrameLast, + kReferenceFrameLast2, kReferenceFrameLast2); + } else if ((is_single && index == 2) || (is_backward && index == 0)) { + // single_ref_p2 and comp_bwdref. + context = GetReferenceContext( + block, kReferenceFrameBackward, kReferenceFrameAlternate2, + kReferenceFrameAlternate, kReferenceFrameAlternate); + } else if ((is_single && index == 6) || (is_backward && index == 1)) { + // single_ref_p6 and comp_bwdref_p1. + context = GetReferenceContext( + block, kReferenceFrameBackward, kReferenceFrameBackward, + kReferenceFrameAlternate2, kReferenceFrameAlternate2); + } + if (is_single) { + // The index parameter for single references is offset by one since the spec + // uses 1-based index for these elements. + return symbol_decoder_context_.single_reference_cdf[context][index - 1]; + } + if (is_backward) { + return symbol_decoder_context_ + .compound_backward_reference_cdf[context][index]; + } + return symbol_decoder_context_.compound_reference_cdf[type][context][index]; +} + +void Tile::ReadReferenceFrames(const Block& block) { + BlockParameters& bp = *block.bp; + if (bp.skip_mode) { + bp.reference_frame[0] = frame_header_.skip_mode_frame[0]; + bp.reference_frame[1] = frame_header_.skip_mode_frame[1]; + return; + } + if (frame_header_.segmentation.FeatureActive(bp.segment_id, + kSegmentFeatureReferenceFrame)) { + bp.reference_frame[0] = static_cast<ReferenceFrameType>( + frame_header_.segmentation + .feature_data[bp.segment_id][kSegmentFeatureReferenceFrame]); + bp.reference_frame[1] = kReferenceFrameNone; + return; + } + if (frame_header_.segmentation.FeatureActive(bp.segment_id, + kSegmentFeatureSkip) || + frame_header_.segmentation.FeatureActive(bp.segment_id, + kSegmentFeatureGlobalMv)) { + bp.reference_frame[0] = kReferenceFrameLast; + bp.reference_frame[1] = kReferenceFrameNone; + return; + } + const bool use_compound_reference = + frame_header_.reference_mode_select && + std::min(block.width4x4, block.height4x4) >= 2 && + reader_.ReadSymbol(symbol_decoder_context_.use_compound_reference_cdf + [GetUseCompoundReferenceContext(block)]); + if (use_compound_reference) { + CompoundReferenceType reference_type = ReadCompoundReferenceType(block); + if (reference_type == kCompoundReferenceUnidirectional) { + // uni_comp_ref. + if (reader_.ReadSymbol( + GetReferenceCdf<false, false, 0>(block, reference_type))) { + bp.reference_frame[0] = kReferenceFrameBackward; + bp.reference_frame[1] = kReferenceFrameAlternate; + return; + } + // uni_comp_ref_p1. + if (!reader_.ReadSymbol( + GetReferenceCdf<false, false, 1>(block, reference_type))) { + bp.reference_frame[0] = kReferenceFrameLast; + bp.reference_frame[1] = kReferenceFrameLast2; + return; + } + // uni_comp_ref_p2. + if (reader_.ReadSymbol( + GetReferenceCdf<false, false, 2>(block, reference_type))) { + bp.reference_frame[0] = kReferenceFrameLast; + bp.reference_frame[1] = kReferenceFrameGolden; + return; + } + bp.reference_frame[0] = kReferenceFrameLast; + bp.reference_frame[1] = kReferenceFrameLast3; + return; + } + assert(reference_type == kCompoundReferenceBidirectional); + // comp_ref. + if (reader_.ReadSymbol( + GetReferenceCdf<false, false, 0>(block, reference_type))) { + // comp_ref_p2. + bp.reference_frame[0] = + reader_.ReadSymbol( + GetReferenceCdf<false, false, 2>(block, reference_type)) + ? kReferenceFrameGolden + : kReferenceFrameLast3; + } else { + // comp_ref_p1. + bp.reference_frame[0] = + reader_.ReadSymbol( + GetReferenceCdf<false, false, 1>(block, reference_type)) + ? kReferenceFrameLast2 + : kReferenceFrameLast; + } + // comp_bwdref. + if (reader_.ReadSymbol(GetReferenceCdf<false, true, 0>(block))) { + bp.reference_frame[1] = kReferenceFrameAlternate; + } else { + // comp_bwdref_p1. + bp.reference_frame[1] = + reader_.ReadSymbol(GetReferenceCdf<false, true, 1>(block)) + ? kReferenceFrameAlternate2 + : kReferenceFrameBackward; + } + return; + } + assert(!use_compound_reference); + bp.reference_frame[1] = kReferenceFrameNone; + // single_ref_p1. + if (reader_.ReadSymbol(GetReferenceCdf<true, false, 1>(block))) { + // single_ref_p2. + if (reader_.ReadSymbol(GetReferenceCdf<true, false, 2>(block))) { + bp.reference_frame[0] = kReferenceFrameAlternate; + return; + } + // single_ref_p6. + bp.reference_frame[0] = + reader_.ReadSymbol(GetReferenceCdf<true, false, 6>(block)) + ? kReferenceFrameAlternate2 + : kReferenceFrameBackward; + return; + } + // single_ref_p3. + if (reader_.ReadSymbol(GetReferenceCdf<true, false, 3>(block))) { + // single_ref_p5. + bp.reference_frame[0] = + reader_.ReadSymbol(GetReferenceCdf<true, false, 5>(block)) + ? kReferenceFrameGolden + : kReferenceFrameLast3; + return; + } + // single_ref_p4. + bp.reference_frame[0] = + reader_.ReadSymbol(GetReferenceCdf<true, false, 4>(block)) + ? kReferenceFrameLast2 + : kReferenceFrameLast; +} + +void Tile::ReadInterPredictionModeY(const Block& block, + const MvContexts& mode_contexts) { + BlockParameters& bp = *block.bp; + if (bp.skip_mode) { + bp.y_mode = kPredictionModeNearestNearestMv; + return; + } + if (frame_header_.segmentation.FeatureActive(bp.segment_id, + kSegmentFeatureSkip) || + frame_header_.segmentation.FeatureActive(bp.segment_id, + kSegmentFeatureGlobalMv)) { + bp.y_mode = kPredictionModeGlobalMv; + return; + } + if (bp.reference_frame[1] > kReferenceFrameIntra) { + const int idx0 = mode_contexts.reference_mv >> 1; + const int idx1 = + std::min(mode_contexts.new_mv, kCompoundModeNewMvContexts - 1); + const int context = kCompoundModeContextMap[idx0][idx1]; + const int offset = reader_.ReadSymbol<kNumCompoundInterPredictionModes>( + symbol_decoder_context_.compound_prediction_mode_cdf[context]); + bp.y_mode = + static_cast<PredictionMode>(kPredictionModeNearestNearestMv + offset); + return; + } + // new_mv. + if (!reader_.ReadSymbol( + symbol_decoder_context_.new_mv_cdf[mode_contexts.new_mv])) { + bp.y_mode = kPredictionModeNewMv; + return; + } + // zero_mv. + if (!reader_.ReadSymbol( + symbol_decoder_context_.zero_mv_cdf[mode_contexts.zero_mv])) { + bp.y_mode = kPredictionModeGlobalMv; + return; + } + // ref_mv. + bp.y_mode = + reader_.ReadSymbol( + symbol_decoder_context_.reference_mv_cdf[mode_contexts.reference_mv]) + ? kPredictionModeNearMv + : kPredictionModeNearestMv; +} + +void Tile::ReadRefMvIndex(const Block& block) { + BlockParameters& bp = *block.bp; + PredictionParameters& prediction_parameters = + *block.bp->prediction_parameters; + prediction_parameters.ref_mv_index = 0; + if (bp.y_mode != kPredictionModeNewMv && + bp.y_mode != kPredictionModeNewNewMv && + !kPredictionModeHasNearMvMask.Contains(bp.y_mode)) { + return; + } + const int start = + static_cast<int>(kPredictionModeHasNearMvMask.Contains(bp.y_mode)); + prediction_parameters.ref_mv_index = start; + for (int i = start; i < start + 2; ++i) { + if (prediction_parameters.ref_mv_count <= i + 1) break; + // drl_mode in the spec. + const bool ref_mv_index_bit = reader_.ReadSymbol( + symbol_decoder_context_.ref_mv_index_cdf[GetRefMvIndexContext( + prediction_parameters.nearest_mv_count, i)]); + prediction_parameters.ref_mv_index = i + static_cast<int>(ref_mv_index_bit); + if (!ref_mv_index_bit) return; + } +} + +void Tile::ReadInterIntraMode(const Block& block, bool is_compound) { + BlockParameters& bp = *block.bp; + PredictionParameters& prediction_parameters = + *block.bp->prediction_parameters; + prediction_parameters.inter_intra_mode = kNumInterIntraModes; + prediction_parameters.is_wedge_inter_intra = false; + if (bp.skip_mode || !sequence_header_.enable_interintra_compound || + is_compound || !kIsInterIntraModeAllowedMask.Contains(block.size)) { + return; + } + // kSizeGroup[block.size] is guaranteed to be non-zero because of the block + // size constraint enforced in the above condition. + assert(kSizeGroup[block.size] - 1 >= 0); + if (!reader_.ReadSymbol( + symbol_decoder_context_ + .is_inter_intra_cdf[kSizeGroup[block.size] - 1])) { + prediction_parameters.inter_intra_mode = kNumInterIntraModes; + return; + } + prediction_parameters.inter_intra_mode = + static_cast<InterIntraMode>(reader_.ReadSymbol<kNumInterIntraModes>( + symbol_decoder_context_ + .inter_intra_mode_cdf[kSizeGroup[block.size] - 1])); + bp.reference_frame[1] = kReferenceFrameIntra; + prediction_parameters.angle_delta[kPlaneTypeY] = 0; + prediction_parameters.angle_delta[kPlaneTypeUV] = 0; + prediction_parameters.use_filter_intra = false; + prediction_parameters.is_wedge_inter_intra = reader_.ReadSymbol( + symbol_decoder_context_.is_wedge_inter_intra_cdf[block.size]); + if (!prediction_parameters.is_wedge_inter_intra) return; + prediction_parameters.wedge_index = + reader_.ReadSymbol<kWedgeIndexSymbolCount>( + symbol_decoder_context_.wedge_index_cdf[block.size]); + prediction_parameters.wedge_sign = 0; +} + +void Tile::ReadMotionMode(const Block& block, bool is_compound) { + BlockParameters& bp = *block.bp; + PredictionParameters& prediction_parameters = + *block.bp->prediction_parameters; + const auto global_motion_type = + frame_header_.global_motion[bp.reference_frame[0]].type; + if (bp.skip_mode || !frame_header_.is_motion_mode_switchable || + IsBlockDimension4(block.size) || + (frame_header_.force_integer_mv == 0 && + (bp.y_mode == kPredictionModeGlobalMv || + bp.y_mode == kPredictionModeGlobalGlobalMv) && + global_motion_type > kGlobalMotionTransformationTypeTranslation) || + is_compound || bp.reference_frame[1] == kReferenceFrameIntra || + !block.HasOverlappableCandidates()) { + prediction_parameters.motion_mode = kMotionModeSimple; + return; + } + prediction_parameters.num_warp_samples = 0; + int num_samples_scanned = 0; + memset(prediction_parameters.warp_estimate_candidates, 0, + sizeof(prediction_parameters.warp_estimate_candidates)); + FindWarpSamples(block, &prediction_parameters.num_warp_samples, + &num_samples_scanned, + prediction_parameters.warp_estimate_candidates); + if (frame_header_.force_integer_mv != 0 || + prediction_parameters.num_warp_samples == 0 || + !frame_header_.allow_warped_motion || IsScaled(bp.reference_frame[0])) { + prediction_parameters.motion_mode = + reader_.ReadSymbol(symbol_decoder_context_.use_obmc_cdf[block.size]) + ? kMotionModeObmc + : kMotionModeSimple; + return; + } + prediction_parameters.motion_mode = + static_cast<MotionMode>(reader_.ReadSymbol<kNumMotionModes>( + symbol_decoder_context_.motion_mode_cdf[block.size])); +} + +uint16_t* Tile::GetIsExplicitCompoundTypeCdf(const Block& block) { + int context = 0; + if (block.top_available[kPlaneY]) { + if (!block.IsTopSingle()) { + context += static_cast<int>(block.bp_top->is_explicit_compound_type); + } else if (block.TopReference(0) == kReferenceFrameAlternate) { + context += 3; + } + } + if (block.left_available[kPlaneY]) { + if (!block.IsLeftSingle()) { + context += static_cast<int>(block.bp_left->is_explicit_compound_type); + } else if (block.LeftReference(0) == kReferenceFrameAlternate) { + context += 3; + } + } + return symbol_decoder_context_.is_explicit_compound_type_cdf[std::min( + context, kIsExplicitCompoundTypeContexts - 1)]; +} + +uint16_t* Tile::GetIsCompoundTypeAverageCdf(const Block& block) { + const BlockParameters& bp = *block.bp; + const ReferenceInfo& reference_info = *current_frame_.reference_info(); + const int forward = + std::abs(reference_info.relative_distance_from[bp.reference_frame[0]]); + const int backward = + std::abs(reference_info.relative_distance_from[bp.reference_frame[1]]); + int context = (forward == backward) ? 3 : 0; + if (block.top_available[kPlaneY]) { + if (!block.IsTopSingle()) { + context += static_cast<int>(block.bp_top->is_compound_type_average); + } else if (block.TopReference(0) == kReferenceFrameAlternate) { + ++context; + } + } + if (block.left_available[kPlaneY]) { + if (!block.IsLeftSingle()) { + context += static_cast<int>(block.bp_left->is_compound_type_average); + } else if (block.LeftReference(0) == kReferenceFrameAlternate) { + ++context; + } + } + return symbol_decoder_context_.is_compound_type_average_cdf[context]; +} + +void Tile::ReadCompoundType(const Block& block, bool is_compound) { + BlockParameters& bp = *block.bp; + bp.is_explicit_compound_type = false; + bp.is_compound_type_average = true; + PredictionParameters& prediction_parameters = + *block.bp->prediction_parameters; + if (bp.skip_mode) { + prediction_parameters.compound_prediction_type = + kCompoundPredictionTypeAverage; + return; + } + if (is_compound) { + if (sequence_header_.enable_masked_compound) { + bp.is_explicit_compound_type = + reader_.ReadSymbol(GetIsExplicitCompoundTypeCdf(block)); + } + if (bp.is_explicit_compound_type) { + if (kIsWedgeCompoundModeAllowed.Contains(block.size)) { + // Only kCompoundPredictionTypeWedge and + // kCompoundPredictionTypeDiffWeighted are signaled explicitly. + prediction_parameters.compound_prediction_type = + static_cast<CompoundPredictionType>(reader_.ReadSymbol( + symbol_decoder_context_.compound_type_cdf[block.size])); + } else { + prediction_parameters.compound_prediction_type = + kCompoundPredictionTypeDiffWeighted; + } + } else { + if (sequence_header_.enable_jnt_comp) { + bp.is_compound_type_average = + reader_.ReadSymbol(GetIsCompoundTypeAverageCdf(block)); + prediction_parameters.compound_prediction_type = + bp.is_compound_type_average ? kCompoundPredictionTypeAverage + : kCompoundPredictionTypeDistance; + } else { + prediction_parameters.compound_prediction_type = + kCompoundPredictionTypeAverage; + return; + } + } + if (prediction_parameters.compound_prediction_type == + kCompoundPredictionTypeWedge) { + prediction_parameters.wedge_index = + reader_.ReadSymbol<kWedgeIndexSymbolCount>( + symbol_decoder_context_.wedge_index_cdf[block.size]); + prediction_parameters.wedge_sign = static_cast<int>(reader_.ReadBit()); + } else if (prediction_parameters.compound_prediction_type == + kCompoundPredictionTypeDiffWeighted) { + prediction_parameters.mask_is_inverse = + static_cast<bool>(reader_.ReadBit()); + } + return; + } + if (prediction_parameters.inter_intra_mode != kNumInterIntraModes) { + prediction_parameters.compound_prediction_type = + prediction_parameters.is_wedge_inter_intra + ? kCompoundPredictionTypeWedge + : kCompoundPredictionTypeIntra; + return; + } + prediction_parameters.compound_prediction_type = + kCompoundPredictionTypeAverage; +} + +uint16_t* Tile::GetInterpolationFilterCdf(const Block& block, int direction) { + const BlockParameters& bp = *block.bp; + int context = MultiplyBy8(direction) + + MultiplyBy4(static_cast<int>(bp.reference_frame[1] > + kReferenceFrameIntra)); + int top_type = kNumExplicitInterpolationFilters; + if (block.top_available[kPlaneY]) { + if (block.bp_top->reference_frame[0] == bp.reference_frame[0] || + block.bp_top->reference_frame[1] == bp.reference_frame[0]) { + top_type = block.bp_top->interpolation_filter[direction]; + } + } + int left_type = kNumExplicitInterpolationFilters; + if (block.left_available[kPlaneY]) { + if (block.bp_left->reference_frame[0] == bp.reference_frame[0] || + block.bp_left->reference_frame[1] == bp.reference_frame[0]) { + left_type = block.bp_left->interpolation_filter[direction]; + } + } + if (left_type == top_type) { + context += left_type; + } else if (left_type == kNumExplicitInterpolationFilters) { + context += top_type; + } else if (top_type == kNumExplicitInterpolationFilters) { + context += left_type; + } else { + context += kNumExplicitInterpolationFilters; + } + return symbol_decoder_context_.interpolation_filter_cdf[context]; +} + +void Tile::ReadInterpolationFilter(const Block& block) { + BlockParameters& bp = *block.bp; + if (frame_header_.interpolation_filter != kInterpolationFilterSwitchable) { + static_assert( + sizeof(bp.interpolation_filter) / sizeof(bp.interpolation_filter[0]) == + 2, + "Interpolation filter array size is not 2"); + for (auto& interpolation_filter : bp.interpolation_filter) { + interpolation_filter = frame_header_.interpolation_filter; + } + return; + } + bool interpolation_filter_present = true; + if (bp.skip_mode || + block.bp->prediction_parameters->motion_mode == kMotionModeLocalWarp) { + interpolation_filter_present = false; + } else if (!IsBlockDimension4(block.size) && + bp.y_mode == kPredictionModeGlobalMv) { + interpolation_filter_present = + frame_header_.global_motion[bp.reference_frame[0]].type == + kGlobalMotionTransformationTypeTranslation; + } else if (!IsBlockDimension4(block.size) && + bp.y_mode == kPredictionModeGlobalGlobalMv) { + interpolation_filter_present = + frame_header_.global_motion[bp.reference_frame[0]].type == + kGlobalMotionTransformationTypeTranslation || + frame_header_.global_motion[bp.reference_frame[1]].type == + kGlobalMotionTransformationTypeTranslation; + } + for (int i = 0; i < (sequence_header_.enable_dual_filter ? 2 : 1); ++i) { + bp.interpolation_filter[i] = + interpolation_filter_present + ? static_cast<InterpolationFilter>( + reader_.ReadSymbol<kNumExplicitInterpolationFilters>( + GetInterpolationFilterCdf(block, i))) + : kInterpolationFilterEightTap; + } + if (!sequence_header_.enable_dual_filter) { + bp.interpolation_filter[1] = bp.interpolation_filter[0]; + } +} + +bool Tile::ReadInterBlockModeInfo(const Block& block) { + BlockParameters& bp = *block.bp; + bp.palette_mode_info.size[kPlaneTypeY] = 0; + bp.palette_mode_info.size[kPlaneTypeUV] = 0; + ReadReferenceFrames(block); + const bool is_compound = bp.reference_frame[1] > kReferenceFrameIntra; + MvContexts mode_contexts; + FindMvStack(block, is_compound, &mode_contexts); + ReadInterPredictionModeY(block, mode_contexts); + ReadRefMvIndex(block); + if (!AssignInterMv(block, is_compound)) return false; + ReadInterIntraMode(block, is_compound); + ReadMotionMode(block, is_compound); + ReadCompoundType(block, is_compound); + ReadInterpolationFilter(block); + return true; +} + +bool Tile::DecodeInterModeInfo(const Block& block) { + BlockParameters& bp = *block.bp; + block.bp->prediction_parameters->use_intra_block_copy = false; + bp.skip = false; + if (!ReadInterSegmentId(block, /*pre_skip=*/true)) return false; + ReadSkipMode(block); + if (bp.skip_mode) { + bp.skip = true; + } else { + ReadSkip(block); + } + if (!frame_header_.segmentation.segment_id_pre_skip && + !ReadInterSegmentId(block, /*pre_skip=*/false)) { + return false; + } + ReadCdef(block); + if (read_deltas_) { + ReadQuantizerIndexDelta(block); + ReadLoopFilterDelta(block); + read_deltas_ = false; + } + ReadIsInter(block); + return bp.is_inter ? ReadInterBlockModeInfo(block) + : ReadIntraBlockModeInfo(block, /*intra_y_mode=*/false); +} + +bool Tile::DecodeModeInfo(const Block& block) { + return IsIntraFrame(frame_header_.frame_type) ? DecodeIntraModeInfo(block) + : DecodeInterModeInfo(block); +} + +} // namespace libgav1 diff --git a/src/tile/bitstream/palette.cc b/src/tile/bitstream/palette.cc new file mode 100644 index 0000000..674d210 --- /dev/null +++ b/src/tile/bitstream/palette.cc @@ -0,0 +1,319 @@ +// Copyright 2019 The libgav1 Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <algorithm> +#include <cassert> +#include <cstdint> +#include <cstring> +#include <iterator> +#include <memory> + +#include "src/obu_parser.h" +#include "src/symbol_decoder_context.h" +#include "src/tile.h" +#include "src/utils/bit_mask_set.h" +#include "src/utils/common.h" +#include "src/utils/constants.h" +#include "src/utils/entropy_decoder.h" +#include "src/utils/memory.h" +#include "src/utils/types.h" + +namespace libgav1 { + +int Tile::GetPaletteCache(const Block& block, PlaneType plane_type, + uint16_t* const cache) { + const int top_size = + (block.top_available[kPlaneY] && Mod64(MultiplyBy4(block.row4x4)) != 0) + ? block.bp_top->palette_mode_info.size[plane_type] + : 0; + const int left_size = block.left_available[kPlaneY] + ? block.bp_left->palette_mode_info.size[plane_type] + : 0; + if (left_size == 0 && top_size == 0) return 0; + // Merge the left and top colors in sorted order and store them in |cache|. + uint16_t dummy[1]; + const uint16_t* top = (top_size > 0) + ? block.bp_top->palette_mode_info.color[plane_type] + : dummy; + const uint16_t* left = + (left_size > 0) ? block.bp_left->palette_mode_info.color[plane_type] + : dummy; + std::merge(top, top + top_size, left, left + left_size, cache); + // Deduplicate the entries in |cache| and return the number of unique + // entries. + return static_cast<int>( + std::distance(cache, std::unique(cache, cache + left_size + top_size))); +} + +void Tile::ReadPaletteColors(const Block& block, Plane plane) { + const PlaneType plane_type = GetPlaneType(plane); + uint16_t cache[2 * kMaxPaletteSize]; + const int n = GetPaletteCache(block, plane_type, cache); + BlockParameters& bp = *block.bp; + const uint8_t palette_size = bp.palette_mode_info.size[plane_type]; + uint16_t* const palette_color = bp.palette_mode_info.color[plane]; + const int8_t bitdepth = sequence_header_.color_config.bitdepth; + int index = 0; + for (int i = 0; i < n && index < palette_size; ++i) { + if (reader_.ReadBit() != 0) { // use_palette_color_cache. + palette_color[index++] = cache[i]; + } + } + const int merge_pivot = index; + if (index < palette_size) { + palette_color[index++] = + static_cast<uint16_t>(reader_.ReadLiteral(bitdepth)); + } + const int max_value = (1 << bitdepth) - 1; + if (index < palette_size) { + int bits = bitdepth - 3 + static_cast<int>(reader_.ReadLiteral(2)); + do { + const int delta = static_cast<int>(reader_.ReadLiteral(bits)) + + (plane_type == kPlaneTypeY ? 1 : 0); + palette_color[index] = + std::min(palette_color[index - 1] + delta, max_value); + if (palette_color[index] + (plane_type == kPlaneTypeY ? 1 : 0) >= + max_value) { + // Once the color exceeds max_value, all others can be set to max_value + // (since they are computed as a delta on top of the current color and + // then clipped). + Memset(&palette_color[index + 1], max_value, palette_size - index - 1); + break; + } + const int range = (1 << bitdepth) - palette_color[index] - + (plane_type == kPlaneTypeY ? 1 : 0); + bits = std::min(bits, CeilLog2(range)); + } while (++index < palette_size); + } + // Palette colors are generated using two ascending arrays. So sorting them is + // simply a matter of merging the two sorted portions of the array. + std::inplace_merge(palette_color, palette_color + merge_pivot, + palette_color + palette_size); + if (plane_type == kPlaneTypeUV) { + uint16_t* const palette_color_v = bp.palette_mode_info.color[kPlaneV]; + if (reader_.ReadBit() != 0) { // delta_encode_palette_colors_v. + const int bits = bitdepth - 4 + static_cast<int>(reader_.ReadLiteral(2)); + palette_color_v[0] = reader_.ReadLiteral(bitdepth); + for (int i = 1; i < palette_size; ++i) { + int delta = static_cast<int>(reader_.ReadLiteral(bits)); + if (delta != 0 && reader_.ReadBit() != 0) delta = -delta; + // This line is equivalent to the following lines in the spec: + // val = palette_colors_v[ idx - 1 ] + palette_delta_v + // if ( val < 0 ) val += maxVal + // if ( val >= maxVal ) val -= maxVal + // palette_colors_v[ idx ] = Clip1( val ) + // + // The difference is that in the code, max_value is (1 << bitdepth) - 1. + // So "& max_value" has the desired effect of computing both the "if" + // conditions and the Clip. + palette_color_v[i] = (palette_color_v[i - 1] + delta) & max_value; + } + } else { + for (int i = 0; i < palette_size; ++i) { + palette_color_v[i] = + static_cast<uint16_t>(reader_.ReadLiteral(bitdepth)); + } + } + } +} + +void Tile::ReadPaletteModeInfo(const Block& block) { + BlockParameters& bp = *block.bp; + if (IsBlockSmallerThan8x8(block.size) || block.size > kBlock64x64 || + !frame_header_.allow_screen_content_tools) { + bp.palette_mode_info.size[kPlaneTypeY] = 0; + bp.palette_mode_info.size[kPlaneTypeUV] = 0; + return; + } + const int block_size_context = + k4x4WidthLog2[block.size] + k4x4HeightLog2[block.size] - 2; + if (bp.y_mode == kPredictionModeDc) { + const int context = + static_cast<int>(block.top_available[kPlaneY] && + block.bp_top->palette_mode_info.size[kPlaneTypeY] > + 0) + + static_cast<int>(block.left_available[kPlaneY] && + block.bp_left->palette_mode_info.size[kPlaneTypeY] > + 0); + const bool has_palette_y = reader_.ReadSymbol( + symbol_decoder_context_.has_palette_y_cdf[block_size_context][context]); + if (has_palette_y) { + bp.palette_mode_info.size[kPlaneTypeY] = + kMinPaletteSize + + reader_.ReadSymbol<kPaletteSizeSymbolCount>( + symbol_decoder_context_.palette_y_size_cdf[block_size_context]); + ReadPaletteColors(block, kPlaneY); + } + } + if (bp.uv_mode == kPredictionModeDc && block.HasChroma()) { + const int context = + static_cast<int>(bp.palette_mode_info.size[kPlaneTypeY] > 0); + const bool has_palette_uv = + reader_.ReadSymbol(symbol_decoder_context_.has_palette_uv_cdf[context]); + if (has_palette_uv) { + bp.palette_mode_info.size[kPlaneTypeUV] = + kMinPaletteSize + + reader_.ReadSymbol<kPaletteSizeSymbolCount>( + symbol_decoder_context_.palette_uv_size_cdf[block_size_context]); + ReadPaletteColors(block, kPlaneU); + } + } +} + +void Tile::PopulatePaletteColorContexts( + const Block& block, PlaneType plane_type, int i, int start, int end, + uint8_t color_order[kMaxPaletteSquare][kMaxPaletteSize], + uint8_t color_context[kMaxPaletteSquare]) { + const PredictionParameters& prediction_parameters = + *block.bp->prediction_parameters; + for (int column = start, counter = 0; column >= end; --column, ++counter) { + const int row = i - column; + assert(row > 0 || column > 0); + const uint8_t top = + (row > 0) + ? prediction_parameters.color_index_map[plane_type][row - 1][column] + : 0; + const uint8_t left = + (column > 0) + ? prediction_parameters.color_index_map[plane_type][row][column - 1] + : 0; + uint8_t index_mask; + static_assert(kMaxPaletteSize <= 8, ""); + int index; + if (column <= 0) { + color_context[counter] = 0; + color_order[counter][0] = top; + index_mask = 1 << top; + index = 1; + } else if (row <= 0) { + color_context[counter] = 0; + color_order[counter][0] = left; + index_mask = 1 << left; + index = 1; + } else { + const uint8_t top_left = + prediction_parameters + .color_index_map[plane_type][row - 1][column - 1]; + index_mask = (1 << top) | (1 << left) | (1 << top_left); + if (top == left && top == top_left) { + color_context[counter] = 4; + color_order[counter][0] = top; + index = 1; + } else if (top == left) { + color_context[counter] = 3; + color_order[counter][0] = top; + color_order[counter][1] = top_left; + index = 2; + } else if (top == top_left) { + color_context[counter] = 2; + color_order[counter][0] = top_left; + color_order[counter][1] = left; + index = 2; + } else if (left == top_left) { + color_context[counter] = 2; + color_order[counter][0] = top_left; + color_order[counter][1] = top; + index = 2; + } else { + color_context[counter] = 1; + color_order[counter][0] = std::min(top, left); + color_order[counter][1] = std::max(top, left); + color_order[counter][2] = top_left; + index = 3; + } + } + // Even though only the first |palette_size| entries of this array are ever + // used, it is faster to populate all 8 because of the vectorization of the + // constant sized loop. + for (uint8_t j = 0; j < kMaxPaletteSize; ++j) { + if (BitMaskSet::MaskContainsValue(index_mask, j)) continue; + color_order[counter][index++] = j; + } + } +} + +bool Tile::ReadPaletteTokens(const Block& block) { + const PaletteModeInfo& palette_mode_info = block.bp->palette_mode_info; + PredictionParameters& prediction_parameters = + *block.bp->prediction_parameters; + for (int plane_type = kPlaneTypeY; + plane_type < (block.HasChroma() ? kNumPlaneTypes : kPlaneTypeUV); + ++plane_type) { + const int palette_size = palette_mode_info.size[plane_type]; + if (palette_size == 0) continue; + int block_height = block.height; + int block_width = block.width; + int screen_height = std::min( + block_height, MultiplyBy4(frame_header_.rows4x4 - block.row4x4)); + int screen_width = std::min( + block_width, MultiplyBy4(frame_header_.columns4x4 - block.column4x4)); + if (plane_type == kPlaneTypeUV) { + block_height >>= sequence_header_.color_config.subsampling_y; + block_width >>= sequence_header_.color_config.subsampling_x; + screen_height >>= sequence_header_.color_config.subsampling_y; + screen_width >>= sequence_header_.color_config.subsampling_x; + if (block_height < 4) { + block_height += 2; + screen_height += 2; + } + if (block_width < 4) { + block_width += 2; + screen_width += 2; + } + } + if (!prediction_parameters.color_index_map[plane_type].Reset( + block_height, block_width, /*zero_initialize=*/false)) { + return false; + } + int first_value = 0; + reader_.DecodeUniform(palette_size, &first_value); + prediction_parameters.color_index_map[plane_type][0][0] = first_value; + for (int i = 1; i < screen_height + screen_width - 1; ++i) { + const int start = std::min(i, screen_width - 1); + const int end = std::max(0, i - screen_height + 1); + uint8_t color_order[kMaxPaletteSquare][kMaxPaletteSize]; + uint8_t color_context[kMaxPaletteSquare]; + PopulatePaletteColorContexts(block, static_cast<PlaneType>(plane_type), i, + start, end, color_order, color_context); + for (int j = start, counter = 0; j >= end; --j, ++counter) { + uint16_t* const cdf = + symbol_decoder_context_ + .palette_color_index_cdf[plane_type] + [palette_size - kMinPaletteSize] + [color_context[counter]]; + const int color_order_index = reader_.ReadSymbol(cdf, palette_size); + prediction_parameters.color_index_map[plane_type][i - j][j] = + color_order[counter][color_order_index]; + } + } + if (screen_width < block_width) { + for (int i = 0; i < screen_height; ++i) { + memset( + &prediction_parameters.color_index_map[plane_type][i][screen_width], + prediction_parameters + .color_index_map[plane_type][i][screen_width - 1], + block_width - screen_width); + } + } + for (int i = screen_height; i < block_height; ++i) { + memcpy( + prediction_parameters.color_index_map[plane_type][i], + prediction_parameters.color_index_map[plane_type][screen_height - 1], + block_width); + } + } + return true; +} + +} // namespace libgav1 diff --git a/src/tile/bitstream/partition.cc b/src/tile/bitstream/partition.cc new file mode 100644 index 0000000..f3dbbb0 --- /dev/null +++ b/src/tile/bitstream/partition.cc @@ -0,0 +1,148 @@ +// Copyright 2019 The libgav1 Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <cassert> +#include <cstdint> + +#include "src/symbol_decoder_context.h" +#include "src/tile.h" +#include "src/utils/block_parameters_holder.h" +#include "src/utils/common.h" +#include "src/utils/constants.h" +#include "src/utils/entropy_decoder.h" +#include "src/utils/types.h" + +namespace libgav1 { +namespace { + +uint16_t PartitionCdfGatherHorizontalAlike(const uint16_t* const partition_cdf, + BlockSize block_size) { + // The spec computes the cdf value using the following formula (not writing + // partition_cdf[] and using short forms for partition names for clarity): + // cdf = None - H + V - S + S - HTS + HTS - HBS + HBS - VLS; + // if (block_size != 128x128) { + // cdf += VRS - H4; + // } + // After canceling out the repeated terms with opposite signs, we have: + // cdf = None - H + V - VLS; + // if (block_size != 128x128) { + // cdf += VRS - H4; + // } + uint16_t cdf = partition_cdf[kPartitionNone] - + partition_cdf[kPartitionHorizontal] + + partition_cdf[kPartitionVertical] - + partition_cdf[kPartitionVerticalWithLeftSplit]; + if (block_size != kBlock128x128) { + cdf += partition_cdf[kPartitionVerticalWithRightSplit] - + partition_cdf[kPartitionHorizontal4]; + } + return cdf; +} + +uint16_t PartitionCdfGatherVerticalAlike(const uint16_t* const partition_cdf, + BlockSize block_size) { + // The spec computes the cdf value using the following formula (not writing + // partition_cdf[] and using short forms for partition names for clarity): + // cdf = H - V + V - S + HBS - VLS + VLS - VRS + S - HTS; + // if (block_size != 128x128) { + // cdf += H4 - V4; + // } + // V4 is always zero. So, after canceling out the repeated terms with opposite + // signs, we have: + // cdf = H + HBS - VRS - HTS; + // if (block_size != 128x128) { + // cdf += H4; + // } + // VRS is zero for 128x128 blocks. So, further simplifying we have: + // cdf = H + HBS - HTS; + // if (block_size != 128x128) { + // cdf += H4 - VRS; + // } + uint16_t cdf = partition_cdf[kPartitionHorizontal] + + partition_cdf[kPartitionHorizontalWithBottomSplit] - + partition_cdf[kPartitionHorizontalWithTopSplit]; + if (block_size != kBlock128x128) { + cdf += partition_cdf[kPartitionHorizontal4] - + partition_cdf[kPartitionVerticalWithRightSplit]; + } + return cdf; +} + +} // namespace + +uint16_t* Tile::GetPartitionCdf(int row4x4, int column4x4, + BlockSize block_size) { + const int block_size_log2 = k4x4WidthLog2[block_size]; + int top = 0; + if (IsTopInside(row4x4)) { + top = static_cast<int>( + k4x4WidthLog2[block_parameters_holder_.Find(row4x4 - 1, column4x4) + ->size] < block_size_log2); + } + int left = 0; + if (IsLeftInside(column4x4)) { + left = static_cast<int>( + k4x4HeightLog2[block_parameters_holder_.Find(row4x4, column4x4 - 1) + ->size] < block_size_log2); + } + const int context = left * 2 + top; + return symbol_decoder_context_.partition_cdf[block_size_log2 - 1][context]; +} + +bool Tile::ReadPartition(int row4x4, int column4x4, BlockSize block_size, + bool has_rows, bool has_columns, + Partition* const partition) { + if (IsBlockSmallerThan8x8(block_size)) { + *partition = kPartitionNone; + return true; + } + if (!has_rows && !has_columns) { + *partition = kPartitionSplit; + return true; + } + uint16_t* const partition_cdf = + GetPartitionCdf(row4x4, column4x4, block_size); + if (partition_cdf == nullptr) { + return false; + } + if (has_rows && has_columns) { + const int bsize_log2 = k4x4WidthLog2[block_size]; + // The partition block size should be 8x8 or above. + assert(bsize_log2 > 0); + if (bsize_log2 == 1) { + *partition = static_cast<Partition>( + reader_.ReadSymbol<kPartitionSplit + 1>(partition_cdf)); + } else if (bsize_log2 == 5) { + *partition = static_cast<Partition>( + reader_.ReadSymbol<kPartitionVerticalWithRightSplit + 1>( + partition_cdf)); + } else { + *partition = static_cast<Partition>( + reader_.ReadSymbol<kMaxPartitionTypes>(partition_cdf)); + } + } else if (has_columns) { + const uint16_t cdf = + PartitionCdfGatherVerticalAlike(partition_cdf, block_size); + *partition = reader_.ReadSymbolWithoutCdfUpdate(cdf) ? kPartitionSplit + : kPartitionHorizontal; + } else { + const uint16_t cdf = + PartitionCdfGatherHorizontalAlike(partition_cdf, block_size); + *partition = reader_.ReadSymbolWithoutCdfUpdate(cdf) ? kPartitionSplit + : kPartitionVertical; + } + return true; +} + +} // namespace libgav1 diff --git a/src/tile/bitstream/transform_size.cc b/src/tile/bitstream/transform_size.cc new file mode 100644 index 0000000..b79851d --- /dev/null +++ b/src/tile/bitstream/transform_size.cc @@ -0,0 +1,222 @@ +// Copyright 2019 The libgav1 Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <algorithm> +#include <cstdint> +#include <cstring> + +#include "src/dsp/constants.h" +#include "src/obu_parser.h" +#include "src/symbol_decoder_context.h" +#include "src/tile.h" +#include "src/utils/array_2d.h" +#include "src/utils/block_parameters_holder.h" +#include "src/utils/common.h" +#include "src/utils/constants.h" +#include "src/utils/entropy_decoder.h" +#include "src/utils/segmentation.h" +#include "src/utils/stack.h" +#include "src/utils/types.h" + +namespace libgav1 { +namespace { + +constexpr uint8_t kMaxVariableTransformTreeDepth = 2; +// Max_Tx_Depth array from section 5.11.5 in the spec with the following +// modification: If the element is not zero, it is subtracted by one. That is +// the only way in which this array is being used. +constexpr int kTxDepthCdfIndex[kMaxBlockSizes] = { + 0, 0, 1, 0, 0, 1, 2, 1, 1, 1, 2, 3, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3}; + +constexpr TransformSize kMaxTransformSizeRectangle[kMaxBlockSizes] = { + kTransformSize4x4, kTransformSize4x8, kTransformSize4x16, + kTransformSize8x4, kTransformSize8x8, kTransformSize8x16, + kTransformSize8x32, kTransformSize16x4, kTransformSize16x8, + kTransformSize16x16, kTransformSize16x32, kTransformSize16x64, + kTransformSize32x8, kTransformSize32x16, kTransformSize32x32, + kTransformSize32x64, kTransformSize64x16, kTransformSize64x32, + kTransformSize64x64, kTransformSize64x64, kTransformSize64x64, + kTransformSize64x64}; + +TransformSize GetSquareTransformSize(uint8_t pixels) { + switch (pixels) { + case 128: + case 64: + return kTransformSize64x64; + case 32: + return kTransformSize32x32; + case 16: + return kTransformSize16x16; + case 8: + return kTransformSize8x8; + default: + return kTransformSize4x4; + } +} + +} // namespace + +int Tile::GetTopTransformWidth(const Block& block, int row4x4, int column4x4, + bool ignore_skip) { + if (row4x4 == block.row4x4) { + if (!block.top_available[kPlaneY]) return 64; + const BlockParameters& bp_top = + *block_parameters_holder_.Find(row4x4 - 1, column4x4); + if ((ignore_skip || bp_top.skip) && bp_top.is_inter) { + return kBlockWidthPixels[bp_top.size]; + } + } + return kTransformWidth[inter_transform_sizes_[row4x4 - 1][column4x4]]; +} + +int Tile::GetLeftTransformHeight(const Block& block, int row4x4, int column4x4, + bool ignore_skip) { + if (column4x4 == block.column4x4) { + if (!block.left_available[kPlaneY]) return 64; + const BlockParameters& bp_left = + *block_parameters_holder_.Find(row4x4, column4x4 - 1); + if ((ignore_skip || bp_left.skip) && bp_left.is_inter) { + return kBlockHeightPixels[bp_left.size]; + } + } + return kTransformHeight[inter_transform_sizes_[row4x4][column4x4 - 1]]; +} + +TransformSize Tile::ReadFixedTransformSize(const Block& block) { + BlockParameters& bp = *block.bp; + if (frame_header_.segmentation.lossless[bp.segment_id]) { + return kTransformSize4x4; + } + const TransformSize max_rect_tx_size = kMaxTransformSizeRectangle[block.size]; + const bool allow_select = !bp.skip || !bp.is_inter; + if (block.size == kBlock4x4 || !allow_select || + frame_header_.tx_mode != kTxModeSelect) { + return max_rect_tx_size; + } + const int max_tx_width = kTransformWidth[max_rect_tx_size]; + const int max_tx_height = kTransformHeight[max_rect_tx_size]; + const int top_width = + block.top_available[kPlaneY] + ? GetTopTransformWidth(block, block.row4x4, block.column4x4, true) + : 0; + const int left_height = + block.left_available[kPlaneY] + ? GetLeftTransformHeight(block, block.row4x4, block.column4x4, true) + : 0; + const auto context = static_cast<int>(top_width >= max_tx_width) + + static_cast<int>(left_height >= max_tx_height); + const int cdf_index = kTxDepthCdfIndex[block.size]; + uint16_t* const cdf = + symbol_decoder_context_.tx_depth_cdf[cdf_index][context]; + const int tx_depth = (cdf_index == 0) + ? static_cast<int>(reader_.ReadSymbol(cdf)) + : reader_.ReadSymbol<3>(cdf); + assert(tx_depth < 3); + TransformSize tx_size = max_rect_tx_size; + if (tx_depth == 0) return tx_size; + tx_size = kSplitTransformSize[tx_size]; + if (tx_depth == 1) return tx_size; + return kSplitTransformSize[tx_size]; +} + +void Tile::ReadVariableTransformTree(const Block& block, int row4x4, + int column4x4, TransformSize tx_size) { + const uint8_t pixels = std::max(block.width, block.height); + const TransformSize max_tx_size = GetSquareTransformSize(pixels); + const int context_delta = (kNumSquareTransformSizes - 1 - + TransformSizeToSquareTransformIndex(max_tx_size)) * + 6; + + // Branching factor is 4 and maximum depth is 2. So the maximum stack size + // necessary is (4 - 1) + 4 = 7. + Stack<TransformTreeNode, 7> stack; + stack.Push(TransformTreeNode(column4x4, row4x4, tx_size, 0)); + + do { + TransformTreeNode node = stack.Pop(); + const int tx_width4x4 = kTransformWidth4x4[node.tx_size]; + const int tx_height4x4 = kTransformHeight4x4[node.tx_size]; + if (node.tx_size != kTransformSize4x4 && + node.depth != kMaxVariableTransformTreeDepth) { + const auto top = + static_cast<int>(GetTopTransformWidth(block, node.y, node.x, false) < + kTransformWidth[node.tx_size]); + const auto left = static_cast<int>( + GetLeftTransformHeight(block, node.y, node.x, false) < + kTransformHeight[node.tx_size]); + const int context = + static_cast<int>(max_tx_size > kTransformSize8x8 && + kTransformSizeSquareMax[node.tx_size] != + max_tx_size) * + 3 + + context_delta + top + left; + // tx_split. + if (reader_.ReadSymbol(symbol_decoder_context_.tx_split_cdf[context])) { + const TransformSize sub_tx_size = kSplitTransformSize[node.tx_size]; + const int step_width4x4 = kTransformWidth4x4[sub_tx_size]; + const int step_height4x4 = kTransformHeight4x4[sub_tx_size]; + // The loops have to run in reverse order because we use a stack for + // DFS. + for (int i = tx_height4x4 - step_height4x4; i >= 0; + i -= step_height4x4) { + for (int j = tx_width4x4 - step_width4x4; j >= 0; + j -= step_width4x4) { + if (node.y + i >= frame_header_.rows4x4 || + node.x + j >= frame_header_.columns4x4) { + continue; + } + stack.Push(TransformTreeNode(node.x + j, node.y + i, sub_tx_size, + node.depth + 1)); + } + } + continue; + } + } + // tx_split is false. + for (int i = 0; i < tx_height4x4; ++i) { + static_assert(sizeof(TransformSize) == 1, ""); + memset(&inter_transform_sizes_[node.y + i][node.x], node.tx_size, + tx_width4x4); + } + block_parameters_holder_.Find(node.y, node.x)->transform_size = + node.tx_size; + } while (!stack.Empty()); +} + +void Tile::DecodeTransformSize(const Block& block) { + BlockParameters& bp = *block.bp; + if (frame_header_.tx_mode == kTxModeSelect && block.size > kBlock4x4 && + bp.is_inter && !bp.skip && + !frame_header_.segmentation.lossless[bp.segment_id]) { + const TransformSize max_tx_size = kMaxTransformSizeRectangle[block.size]; + const int tx_width4x4 = kTransformWidth4x4[max_tx_size]; + const int tx_height4x4 = kTransformHeight4x4[max_tx_size]; + for (int row = block.row4x4; row < block.row4x4 + block.height4x4; + row += tx_height4x4) { + for (int column = block.column4x4; + column < block.column4x4 + block.width4x4; column += tx_width4x4) { + ReadVariableTransformTree(block, row, column, max_tx_size); + } + } + } else { + bp.transform_size = ReadFixedTransformSize(block); + for (int row = block.row4x4; row < block.row4x4 + block.height4x4; ++row) { + static_assert(sizeof(TransformSize) == 1, ""); + memset(&inter_transform_sizes_[row][block.column4x4], bp.transform_size, + block.width4x4); + } + } +} + +} // namespace libgav1 diff --git a/src/tile/prediction.cc b/src/tile/prediction.cc new file mode 100644 index 0000000..c5560a6 --- /dev/null +++ b/src/tile/prediction.cc @@ -0,0 +1,1361 @@ +// Copyright 2019 The libgav1 Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <algorithm> +#include <array> +#include <cassert> +#include <cstddef> +#include <cstdint> +#include <cstdlib> +#include <cstring> +#include <memory> + +#include "src/buffer_pool.h" +#include "src/dsp/constants.h" +#include "src/dsp/dsp.h" +#include "src/motion_vector.h" +#include "src/obu_parser.h" +#include "src/prediction_mask.h" +#include "src/tile.h" +#include "src/utils/array_2d.h" +#include "src/utils/bit_mask_set.h" +#include "src/utils/block_parameters_holder.h" +#include "src/utils/common.h" +#include "src/utils/constants.h" +#include "src/utils/logging.h" +#include "src/utils/memory.h" +#include "src/utils/types.h" +#include "src/warp_prediction.h" +#include "src/yuv_buffer.h" + +namespace libgav1 { +namespace { + +// Import all the constants in the anonymous namespace. +#include "src/inter_intra_masks.inc" + +// Precision bits when scaling reference frames. +constexpr int kReferenceScaleShift = 14; +constexpr int kAngleStep = 3; +constexpr int kPredictionModeToAngle[kIntraPredictionModesUV] = { + 0, 90, 180, 45, 135, 113, 157, 203, 67, 0, 0, 0, 0}; + +// The following modes need both the left_column and top_row for intra +// prediction. For directional modes left/top requirement is inferred based on +// the prediction angle. For Dc modes, left/top requirement is inferred based on +// whether or not left/top is available. +constexpr BitMaskSet kNeedsLeftAndTop(kPredictionModeSmooth, + kPredictionModeSmoothHorizontal, + kPredictionModeSmoothVertical, + kPredictionModePaeth); + +int16_t GetDirectionalIntraPredictorDerivative(const int angle) { + assert(angle >= 3); + assert(angle <= 87); + return kDirectionalIntraPredictorDerivative[DivideBy2(angle) - 1]; +} + +// Maps the block_size to an index as follows: +// kBlock8x8 => 0. +// kBlock8x16 => 1. +// kBlock8x32 => 2. +// kBlock16x8 => 3. +// kBlock16x16 => 4. +// kBlock16x32 => 5. +// kBlock32x8 => 6. +// kBlock32x16 => 7. +// kBlock32x32 => 8. +int GetWedgeBlockSizeIndex(BlockSize block_size) { + assert(block_size >= kBlock8x8); + return block_size - kBlock8x8 - static_cast<int>(block_size >= kBlock16x8) - + static_cast<int>(block_size >= kBlock32x8); +} + +// Maps a dimension of 4, 8, 16 and 32 to indices 0, 1, 2 and 3 respectively. +int GetInterIntraMaskLookupIndex(int dimension) { + assert(dimension == 4 || dimension == 8 || dimension == 16 || + dimension == 32); + return FloorLog2(dimension) - 2; +} + +// 7.11.2.9. +int GetIntraEdgeFilterStrength(int width, int height, int filter_type, + int delta) { + const int sum = width + height; + delta = std::abs(delta); + if (filter_type == 0) { + if (sum <= 8) { + if (delta >= 56) return 1; + } else if (sum <= 16) { + if (delta >= 40) return 1; + } else if (sum <= 24) { + if (delta >= 32) return 3; + if (delta >= 16) return 2; + if (delta >= 8) return 1; + } else if (sum <= 32) { + if (delta >= 32) return 3; + if (delta >= 4) return 2; + return 1; + } else { + return 3; + } + } else { + if (sum <= 8) { + if (delta >= 64) return 2; + if (delta >= 40) return 1; + } else if (sum <= 16) { + if (delta >= 48) return 2; + if (delta >= 20) return 1; + } else if (sum <= 24) { + if (delta >= 4) return 3; + } else { + return 3; + } + } + return 0; +} + +// 7.11.2.10. +bool DoIntraEdgeUpsampling(int width, int height, int filter_type, int delta) { + const int sum = width + height; + delta = std::abs(delta); + // This function should not be called when the prediction angle is 90 or 180. + assert(delta != 0); + if (delta >= 40) return false; + return (filter_type == 1) ? sum <= 8 : sum <= 16; +} + +constexpr uint8_t kQuantizedDistanceWeight[4][2] = { + {2, 3}, {2, 5}, {2, 7}, {1, kMaxFrameDistance}}; + +constexpr uint8_t kQuantizedDistanceLookup[4][2] = { + {9, 7}, {11, 5}, {12, 4}, {13, 3}}; + +void GetDistanceWeights(const int distance[2], int weight[2]) { + // Note: distance[0] and distance[1] correspond to relative distance + // between current frame and reference frame [1] and [0], respectively. + const int order = static_cast<int>(distance[0] <= distance[1]); + if (distance[0] == 0 || distance[1] == 0) { + weight[0] = kQuantizedDistanceLookup[3][order]; + weight[1] = kQuantizedDistanceLookup[3][1 - order]; + } else { + int i; + for (i = 0; i < 3; ++i) { + const int weight_0 = kQuantizedDistanceWeight[i][order]; + const int weight_1 = kQuantizedDistanceWeight[i][1 - order]; + if (order == 0) { + if (distance[0] * weight_0 < distance[1] * weight_1) break; + } else { + if (distance[0] * weight_0 > distance[1] * weight_1) break; + } + } + weight[0] = kQuantizedDistanceLookup[i][order]; + weight[1] = kQuantizedDistanceLookup[i][1 - order]; + } +} + +dsp::IntraPredictor GetIntraPredictor(PredictionMode mode, bool has_left, + bool has_top) { + if (mode == kPredictionModeDc) { + if (has_left && has_top) { + return dsp::kIntraPredictorDc; + } + if (has_left) { + return dsp::kIntraPredictorDcLeft; + } + if (has_top) { + return dsp::kIntraPredictorDcTop; + } + return dsp::kIntraPredictorDcFill; + } + switch (mode) { + case kPredictionModePaeth: + return dsp::kIntraPredictorPaeth; + case kPredictionModeSmooth: + return dsp::kIntraPredictorSmooth; + case kPredictionModeSmoothVertical: + return dsp::kIntraPredictorSmoothVertical; + case kPredictionModeSmoothHorizontal: + return dsp::kIntraPredictorSmoothHorizontal; + default: + return dsp::kNumIntraPredictors; + } +} + +uint8_t* GetStartPoint(Array2DView<uint8_t>* const buffer, const int plane, + const int x, const int y, const int bitdepth) { +#if LIBGAV1_MAX_BITDEPTH >= 10 + if (bitdepth > 8) { + Array2DView<uint16_t> buffer16( + buffer[plane].rows(), buffer[plane].columns() / sizeof(uint16_t), + reinterpret_cast<uint16_t*>(&buffer[plane][0][0])); + return reinterpret_cast<uint8_t*>(&buffer16[y][x]); + } +#endif // LIBGAV1_MAX_BITDEPTH >= 10 + static_cast<void>(bitdepth); + return &buffer[plane][y][x]; +} + +int GetPixelPositionFromHighScale(int start, int step, int offset) { + return (start + step * offset) >> kScaleSubPixelBits; +} + +dsp::MaskBlendFunc GetMaskBlendFunc(const dsp::Dsp& dsp, bool is_inter_intra, + bool is_wedge_inter_intra, + int subsampling_x, int subsampling_y) { + return (is_inter_intra && !is_wedge_inter_intra) + ? dsp.mask_blend[0][/*is_inter_intra=*/true] + : dsp.mask_blend[subsampling_x + subsampling_y][is_inter_intra]; +} + +} // namespace + +template <typename Pixel> +void Tile::IntraPrediction(const Block& block, Plane plane, int x, int y, + bool has_left, bool has_top, bool has_top_right, + bool has_bottom_left, PredictionMode mode, + TransformSize tx_size) { + const int width = 1 << kTransformWidthLog2[tx_size]; + const int height = 1 << kTransformHeightLog2[tx_size]; + const int x_shift = subsampling_x_[plane]; + const int y_shift = subsampling_y_[plane]; + const int max_x = (MultiplyBy4(frame_header_.columns4x4) >> x_shift) - 1; + const int max_y = (MultiplyBy4(frame_header_.rows4x4) >> y_shift) - 1; + // For performance reasons, do not initialize the following two buffers. + alignas(kMaxAlignment) Pixel top_row_data[160]; + alignas(kMaxAlignment) Pixel left_column_data[160]; +#if LIBGAV1_MSAN + if (IsDirectionalMode(mode)) { + memset(top_row_data, 0, sizeof(top_row_data)); + memset(left_column_data, 0, sizeof(left_column_data)); + } +#endif + // Some predictors use |top_row_data| and |left_column_data| with a negative + // offset to access pixels to the top-left of the current block. So have some + // space before the arrays to allow populating those without having to move + // the rest of the array. + Pixel* const top_row = top_row_data + 16; + Pixel* const left_column = left_column_data + 16; + const int bitdepth = sequence_header_.color_config.bitdepth; + const int top_and_left_size = width + height; + const bool is_directional_mode = IsDirectionalMode(mode); + const PredictionParameters& prediction_parameters = + *block.bp->prediction_parameters; + const bool use_filter_intra = + (plane == kPlaneY && prediction_parameters.use_filter_intra); + const int prediction_angle = + is_directional_mode + ? kPredictionModeToAngle[mode] + + prediction_parameters.angle_delta[GetPlaneType(plane)] * + kAngleStep + : 0; + // Directional prediction requires buffers larger than the width or height. + const int top_size = is_directional_mode ? top_and_left_size : width; + const int left_size = is_directional_mode ? top_and_left_size : height; + const int top_right_size = + is_directional_mode ? (has_top_right ? 2 : 1) * width : width; + const int bottom_left_size = + is_directional_mode ? (has_bottom_left ? 2 : 1) * height : height; + + Array2DView<Pixel> buffer(buffer_[plane].rows(), + buffer_[plane].columns() / sizeof(Pixel), + reinterpret_cast<Pixel*>(&buffer_[plane][0][0])); + const bool needs_top = use_filter_intra || kNeedsLeftAndTop.Contains(mode) || + (is_directional_mode && prediction_angle < 180) || + (mode == kPredictionModeDc && has_top); + const bool needs_left = use_filter_intra || kNeedsLeftAndTop.Contains(mode) || + (is_directional_mode && prediction_angle > 90) || + (mode == kPredictionModeDc && has_left); + + const Pixel* top_row_src = buffer[y - 1]; + + // Determine if we need to retrieve the top row from + // |intra_prediction_buffer_|. + if ((needs_top || needs_left) && use_intra_prediction_buffer_) { + // Superblock index of block.row4x4. block.row4x4 is always in luma + // dimension (no subsampling). + const int current_superblock_index = + block.row4x4 >> (sequence_header_.use_128x128_superblock ? 5 : 4); + // Superblock index of y - 1. y is in the plane dimension (chroma planes + // could be subsampled). + const int plane_shift = (sequence_header_.use_128x128_superblock ? 7 : 6) - + subsampling_y_[plane]; + const int top_row_superblock_index = (y - 1) >> plane_shift; + // If the superblock index of y - 1 is not that of the current superblock, + // then we will have to retrieve the top row from the + // |intra_prediction_buffer_|. + if (current_superblock_index != top_row_superblock_index) { + top_row_src = reinterpret_cast<const Pixel*>( + (*intra_prediction_buffer_)[plane].get()); + } + } + + if (needs_top) { + // Compute top_row. + if (has_top || has_left) { + const int left_index = has_left ? x - 1 : x; + top_row[-1] = has_top ? top_row_src[left_index] : buffer[y][left_index]; + } else { + top_row[-1] = 1 << (bitdepth - 1); + } + if (!has_top && has_left) { + Memset(top_row, buffer[y][x - 1], top_size); + } else if (!has_top && !has_left) { + Memset(top_row, (1 << (bitdepth - 1)) - 1, top_size); + } else { + const int top_limit = std::min(max_x - x + 1, top_right_size); + memcpy(top_row, &top_row_src[x], top_limit * sizeof(Pixel)); + // Even though it is safe to call Memset with a size of 0, accessing + // top_row_src[top_limit - x + 1] is not allowed when this condition is + // false. + if (top_size - top_limit > 0) { + Memset(top_row + top_limit, top_row_src[top_limit + x - 1], + top_size - top_limit); + } + } + } + if (needs_left) { + // Compute left_column. + if (has_top || has_left) { + const int left_index = has_left ? x - 1 : x; + left_column[-1] = + has_top ? top_row_src[left_index] : buffer[y][left_index]; + } else { + left_column[-1] = 1 << (bitdepth - 1); + } + if (!has_left && has_top) { + Memset(left_column, top_row_src[x], left_size); + } else if (!has_left && !has_top) { + Memset(left_column, (1 << (bitdepth - 1)) + 1, left_size); + } else { + const int left_limit = std::min(max_y - y + 1, bottom_left_size); + for (int i = 0; i < left_limit; ++i) { + left_column[i] = buffer[y + i][x - 1]; + } + // Even though it is safe to call Memset with a size of 0, accessing + // buffer[left_limit - y + 1][x - 1] is not allowed when this condition is + // false. + if (left_size - left_limit > 0) { + Memset(left_column + left_limit, buffer[left_limit + y - 1][x - 1], + left_size - left_limit); + } + } + } + Pixel* const dest = &buffer[y][x]; + const ptrdiff_t dest_stride = buffer_[plane].columns(); + if (use_filter_intra) { + dsp_.filter_intra_predictor(dest, dest_stride, top_row, left_column, + prediction_parameters.filter_intra_mode, width, + height); + } else if (is_directional_mode) { + DirectionalPrediction(block, plane, x, y, has_left, has_top, needs_left, + needs_top, prediction_angle, width, height, max_x, + max_y, tx_size, top_row, left_column); + } else { + const dsp::IntraPredictor predictor = + GetIntraPredictor(mode, has_left, has_top); + assert(predictor != dsp::kNumIntraPredictors); + dsp_.intra_predictors[tx_size][predictor](dest, dest_stride, top_row, + left_column); + } +} + +template void Tile::IntraPrediction<uint8_t>(const Block& block, Plane plane, + int x, int y, bool has_left, + bool has_top, bool has_top_right, + bool has_bottom_left, + PredictionMode mode, + TransformSize tx_size); +#if LIBGAV1_MAX_BITDEPTH >= 10 +template void Tile::IntraPrediction<uint16_t>(const Block& block, Plane plane, + int x, int y, bool has_left, + bool has_top, bool has_top_right, + bool has_bottom_left, + PredictionMode mode, + TransformSize tx_size); +#endif + +constexpr BitMaskSet kPredictionModeSmoothMask(kPredictionModeSmooth, + kPredictionModeSmoothHorizontal, + kPredictionModeSmoothVertical); + +bool Tile::IsSmoothPrediction(int row, int column, Plane plane) const { + const BlockParameters& bp = *block_parameters_holder_.Find(row, column); + PredictionMode mode; + if (plane == kPlaneY) { + mode = bp.y_mode; + } else { + if (bp.reference_frame[0] > kReferenceFrameIntra) return false; + mode = bp.uv_mode; + } + return kPredictionModeSmoothMask.Contains(mode); +} + +int Tile::GetIntraEdgeFilterType(const Block& block, Plane plane) const { + const int subsampling_x = subsampling_x_[plane]; + const int subsampling_y = subsampling_y_[plane]; + if (block.top_available[plane]) { + const int row = block.row4x4 - 1 - (block.row4x4 & subsampling_y); + const int column = block.column4x4 + (~block.column4x4 & subsampling_x); + if (IsSmoothPrediction(row, column, plane)) return 1; + } + if (block.left_available[plane]) { + const int row = block.row4x4 + (~block.row4x4 & subsampling_y); + const int column = block.column4x4 - 1 - (block.column4x4 & subsampling_x); + if (IsSmoothPrediction(row, column, plane)) return 1; + } + return 0; +} + +template <typename Pixel> +void Tile::DirectionalPrediction(const Block& block, Plane plane, int x, int y, + bool has_left, bool has_top, bool needs_left, + bool needs_top, int prediction_angle, + int width, int height, int max_x, int max_y, + TransformSize tx_size, Pixel* const top_row, + Pixel* const left_column) { + Array2DView<Pixel> buffer(buffer_[plane].rows(), + buffer_[plane].columns() / sizeof(Pixel), + reinterpret_cast<Pixel*>(&buffer_[plane][0][0])); + Pixel* const dest = &buffer[y][x]; + const ptrdiff_t stride = buffer_[plane].columns(); + if (prediction_angle == 90) { + dsp_.intra_predictors[tx_size][dsp::kIntraPredictorVertical]( + dest, stride, top_row, left_column); + return; + } + if (prediction_angle == 180) { + dsp_.intra_predictors[tx_size][dsp::kIntraPredictorHorizontal]( + dest, stride, top_row, left_column); + return; + } + + bool upsampled_top = false; + bool upsampled_left = false; + if (sequence_header_.enable_intra_edge_filter) { + const int filter_type = GetIntraEdgeFilterType(block, plane); + if (prediction_angle > 90 && prediction_angle < 180 && + (width + height) >= 24) { + // 7.11.2.7. + left_column[-1] = top_row[-1] = RightShiftWithRounding( + left_column[0] * 5 + top_row[-1] * 6 + top_row[0] * 5, 4); + } + if (has_top && needs_top) { + const int strength = GetIntraEdgeFilterStrength( + width, height, filter_type, prediction_angle - 90); + if (strength > 0) { + const int num_pixels = std::min(width, max_x - x + 1) + + ((prediction_angle < 90) ? height : 0) + 1; + dsp_.intra_edge_filter(top_row - 1, num_pixels, strength); + } + } + if (has_left && needs_left) { + const int strength = GetIntraEdgeFilterStrength( + width, height, filter_type, prediction_angle - 180); + if (strength > 0) { + const int num_pixels = std::min(height, max_y - y + 1) + + ((prediction_angle > 180) ? width : 0) + 1; + dsp_.intra_edge_filter(left_column - 1, num_pixels, strength); + } + } + upsampled_top = DoIntraEdgeUpsampling(width, height, filter_type, + prediction_angle - 90); + if (upsampled_top && needs_top) { + const int num_pixels = width + ((prediction_angle < 90) ? height : 0); + dsp_.intra_edge_upsampler(top_row, num_pixels); + } + upsampled_left = DoIntraEdgeUpsampling(width, height, filter_type, + prediction_angle - 180); + if (upsampled_left && needs_left) { + const int num_pixels = height + ((prediction_angle > 180) ? width : 0); + dsp_.intra_edge_upsampler(left_column, num_pixels); + } + } + + if (prediction_angle < 90) { + const int dx = GetDirectionalIntraPredictorDerivative(prediction_angle); + dsp_.directional_intra_predictor_zone1(dest, stride, top_row, width, height, + dx, upsampled_top); + } else if (prediction_angle < 180) { + const int dx = + GetDirectionalIntraPredictorDerivative(180 - prediction_angle); + const int dy = + GetDirectionalIntraPredictorDerivative(prediction_angle - 90); + dsp_.directional_intra_predictor_zone2(dest, stride, top_row, left_column, + width, height, dx, dy, upsampled_top, + upsampled_left); + } else { + assert(prediction_angle < 270); + const int dy = + GetDirectionalIntraPredictorDerivative(270 - prediction_angle); + dsp_.directional_intra_predictor_zone3(dest, stride, left_column, width, + height, dy, upsampled_left); + } +} + +template <typename Pixel> +void Tile::PalettePrediction(const Block& block, const Plane plane, + const int start_x, const int start_y, const int x, + const int y, const TransformSize tx_size) { + const int tx_width = kTransformWidth[tx_size]; + const int tx_height = kTransformHeight[tx_size]; + const uint16_t* const palette = block.bp->palette_mode_info.color[plane]; + const PlaneType plane_type = GetPlaneType(plane); + const int x4 = MultiplyBy4(x); + const int y4 = MultiplyBy4(y); + Array2DView<Pixel> buffer(buffer_[plane].rows(), + buffer_[plane].columns() / sizeof(Pixel), + reinterpret_cast<Pixel*>(&buffer_[plane][0][0])); + for (int row = 0; row < tx_height; ++row) { + assert(block.bp->prediction_parameters + ->color_index_map[plane_type][y4 + row] != nullptr); + for (int column = 0; column < tx_width; ++column) { + buffer[start_y + row][start_x + column] = + palette[block.bp->prediction_parameters + ->color_index_map[plane_type][y4 + row][x4 + column]]; + } + } +} + +template void Tile::PalettePrediction<uint8_t>( + const Block& block, const Plane plane, const int start_x, const int start_y, + const int x, const int y, const TransformSize tx_size); +#if LIBGAV1_MAX_BITDEPTH >= 10 +template void Tile::PalettePrediction<uint16_t>( + const Block& block, const Plane plane, const int start_x, const int start_y, + const int x, const int y, const TransformSize tx_size); +#endif + +template <typename Pixel> +void Tile::ChromaFromLumaPrediction(const Block& block, const Plane plane, + const int start_x, const int start_y, + const TransformSize tx_size) { + const int subsampling_x = subsampling_x_[plane]; + const int subsampling_y = subsampling_y_[plane]; + const PredictionParameters& prediction_parameters = + *block.bp->prediction_parameters; + Array2DView<Pixel> y_buffer( + buffer_[kPlaneY].rows(), buffer_[kPlaneY].columns() / sizeof(Pixel), + reinterpret_cast<Pixel*>(&buffer_[kPlaneY][0][0])); + if (!block.scratch_buffer->cfl_luma_buffer_valid) { + const int luma_x = start_x << subsampling_x; + const int luma_y = start_y << subsampling_y; + dsp_.cfl_subsamplers[tx_size][subsampling_x + subsampling_y]( + block.scratch_buffer->cfl_luma_buffer, + prediction_parameters.max_luma_width - luma_x, + prediction_parameters.max_luma_height - luma_y, + reinterpret_cast<uint8_t*>(&y_buffer[luma_y][luma_x]), + buffer_[kPlaneY].columns()); + block.scratch_buffer->cfl_luma_buffer_valid = true; + } + Array2DView<Pixel> buffer(buffer_[plane].rows(), + buffer_[plane].columns() / sizeof(Pixel), + reinterpret_cast<Pixel*>(&buffer_[plane][0][0])); + dsp_.cfl_intra_predictors[tx_size]( + reinterpret_cast<uint8_t*>(&buffer[start_y][start_x]), + buffer_[plane].columns(), block.scratch_buffer->cfl_luma_buffer, + (plane == kPlaneU) ? prediction_parameters.cfl_alpha_u + : prediction_parameters.cfl_alpha_v); +} + +template void Tile::ChromaFromLumaPrediction<uint8_t>( + const Block& block, const Plane plane, const int start_x, const int start_y, + const TransformSize tx_size); +#if LIBGAV1_MAX_BITDEPTH >= 10 +template void Tile::ChromaFromLumaPrediction<uint16_t>( + const Block& block, const Plane plane, const int start_x, const int start_y, + const TransformSize tx_size); +#endif + +void Tile::InterIntraPrediction( + uint16_t* const prediction_0, const uint8_t* const prediction_mask, + const ptrdiff_t prediction_mask_stride, + const PredictionParameters& prediction_parameters, + const int prediction_width, const int prediction_height, + const int subsampling_x, const int subsampling_y, uint8_t* const dest, + const ptrdiff_t dest_stride) { + assert(prediction_mask != nullptr); + assert(prediction_parameters.compound_prediction_type == + kCompoundPredictionTypeIntra || + prediction_parameters.compound_prediction_type == + kCompoundPredictionTypeWedge); + // The first buffer of InterIntra is from inter prediction. + // The second buffer is from intra prediction. +#if LIBGAV1_MAX_BITDEPTH >= 10 + if (sequence_header_.color_config.bitdepth > 8) { + GetMaskBlendFunc(dsp_, /*is_inter_intra=*/true, + prediction_parameters.is_wedge_inter_intra, subsampling_x, + subsampling_y)( + prediction_0, reinterpret_cast<uint16_t*>(dest), + dest_stride / sizeof(uint16_t), prediction_mask, prediction_mask_stride, + prediction_width, prediction_height, dest, dest_stride); + return; + } +#endif + const int function_index = prediction_parameters.is_wedge_inter_intra + ? subsampling_x + subsampling_y + : 0; + // |is_inter_intra| prediction values are stored in a Pixel buffer but it is + // currently declared as a uint16_t buffer. + // TODO(johannkoenig): convert the prediction buffer to a uint8_t buffer and + // remove the reinterpret_cast. + dsp_.inter_intra_mask_blend_8bpp[function_index]( + reinterpret_cast<uint8_t*>(prediction_0), dest, dest_stride, + prediction_mask, prediction_mask_stride, prediction_width, + prediction_height); +} + +void Tile::CompoundInterPrediction( + const Block& block, const uint8_t* const prediction_mask, + const ptrdiff_t prediction_mask_stride, const int prediction_width, + const int prediction_height, const int subsampling_x, + const int subsampling_y, const int candidate_row, + const int candidate_column, uint8_t* dest, const ptrdiff_t dest_stride) { + const PredictionParameters& prediction_parameters = + *block.bp->prediction_parameters; + + void* prediction[2]; +#if LIBGAV1_MAX_BITDEPTH >= 10 + const int bitdepth = sequence_header_.color_config.bitdepth; + if (bitdepth > 8) { + prediction[0] = block.scratch_buffer->prediction_buffer[0]; + prediction[1] = block.scratch_buffer->prediction_buffer[1]; + } else { +#endif + prediction[0] = block.scratch_buffer->compound_prediction_buffer_8bpp[0]; + prediction[1] = block.scratch_buffer->compound_prediction_buffer_8bpp[1]; +#if LIBGAV1_MAX_BITDEPTH >= 10 + } +#endif + + switch (prediction_parameters.compound_prediction_type) { + case kCompoundPredictionTypeWedge: + case kCompoundPredictionTypeDiffWeighted: + GetMaskBlendFunc(dsp_, /*is_inter_intra=*/false, + prediction_parameters.is_wedge_inter_intra, + subsampling_x, subsampling_y)( + prediction[0], prediction[1], + /*prediction_stride=*/prediction_width, prediction_mask, + prediction_mask_stride, prediction_width, prediction_height, dest, + dest_stride); + break; + case kCompoundPredictionTypeDistance: + DistanceWeightedPrediction(prediction[0], prediction[1], prediction_width, + prediction_height, candidate_row, + candidate_column, dest, dest_stride); + break; + default: + assert(prediction_parameters.compound_prediction_type == + kCompoundPredictionTypeAverage); + dsp_.average_blend(prediction[0], prediction[1], prediction_width, + prediction_height, dest, dest_stride); + break; + } +} + +GlobalMotion* Tile::GetWarpParams( + const Block& block, const Plane plane, const int prediction_width, + const int prediction_height, + const PredictionParameters& prediction_parameters, + const ReferenceFrameType reference_type, bool* const is_local_valid, + GlobalMotion* const global_motion_params, + GlobalMotion* const local_warp_params) const { + if (prediction_width < 8 || prediction_height < 8 || + frame_header_.force_integer_mv == 1) { + return nullptr; + } + if (plane == kPlaneY) { + *is_local_valid = + prediction_parameters.motion_mode == kMotionModeLocalWarp && + WarpEstimation( + prediction_parameters.num_warp_samples, DivideBy4(prediction_width), + DivideBy4(prediction_height), block.row4x4, block.column4x4, + block.bp->mv.mv[0], prediction_parameters.warp_estimate_candidates, + local_warp_params) && + SetupShear(local_warp_params); + } + if (prediction_parameters.motion_mode == kMotionModeLocalWarp && + *is_local_valid) { + return local_warp_params; + } + if (!IsScaled(reference_type)) { + GlobalMotionTransformationType global_motion_type = + (reference_type != kReferenceFrameIntra) + ? global_motion_params->type + : kNumGlobalMotionTransformationTypes; + const bool is_global_valid = + IsGlobalMvBlock(block.bp->is_global_mv_block, global_motion_type) && + SetupShear(global_motion_params); + // Valid global motion type implies reference type can't be intra. + assert(!is_global_valid || reference_type != kReferenceFrameIntra); + if (is_global_valid) return global_motion_params; + } + return nullptr; +} + +bool Tile::InterPrediction(const Block& block, const Plane plane, const int x, + const int y, const int prediction_width, + const int prediction_height, int candidate_row, + int candidate_column, bool* const is_local_valid, + GlobalMotion* const local_warp_params) { + const int bitdepth = sequence_header_.color_config.bitdepth; + const BlockParameters& bp = *block.bp; + const BlockParameters& bp_reference = + *block_parameters_holder_.Find(candidate_row, candidate_column); + const bool is_compound = + bp_reference.reference_frame[1] > kReferenceFrameIntra; + assert(bp.is_inter); + const bool is_inter_intra = bp.reference_frame[1] == kReferenceFrameIntra; + + const PredictionParameters& prediction_parameters = + *block.bp->prediction_parameters; + uint8_t* const dest = GetStartPoint(buffer_, plane, x, y, bitdepth); + const ptrdiff_t dest_stride = buffer_[plane].columns(); // In bytes. + for (int index = 0; index < 1 + static_cast<int>(is_compound); ++index) { + const ReferenceFrameType reference_type = + bp_reference.reference_frame[index]; + GlobalMotion global_motion_params = + frame_header_.global_motion[reference_type]; + GlobalMotion* warp_params = + GetWarpParams(block, plane, prediction_width, prediction_height, + prediction_parameters, reference_type, is_local_valid, + &global_motion_params, local_warp_params); + if (warp_params != nullptr) { + if (!BlockWarpProcess(block, plane, index, x, y, prediction_width, + prediction_height, warp_params, is_compound, + is_inter_intra, dest, dest_stride)) { + return false; + } + } else { + const int reference_index = + prediction_parameters.use_intra_block_copy + ? -1 + : frame_header_.reference_frame_index[reference_type - + kReferenceFrameLast]; + if (!BlockInterPrediction( + block, plane, reference_index, bp_reference.mv.mv[index], x, y, + prediction_width, prediction_height, candidate_row, + candidate_column, block.scratch_buffer->prediction_buffer[index], + is_compound, is_inter_intra, dest, dest_stride)) { + return false; + } + } + } + + const int subsampling_x = subsampling_x_[plane]; + const int subsampling_y = subsampling_y_[plane]; + ptrdiff_t prediction_mask_stride = 0; + const uint8_t* prediction_mask = nullptr; + if (prediction_parameters.compound_prediction_type == + kCompoundPredictionTypeWedge) { + const Array2D<uint8_t>& wedge_mask = + wedge_masks_[GetWedgeBlockSizeIndex(block.size)] + [prediction_parameters.wedge_sign] + [prediction_parameters.wedge_index]; + prediction_mask = wedge_mask[0]; + prediction_mask_stride = wedge_mask.columns(); + } else if (prediction_parameters.compound_prediction_type == + kCompoundPredictionTypeIntra) { + // 7.11.3.13. The inter intra masks are precomputed and stored as a set of + // look up tables. + assert(prediction_parameters.inter_intra_mode < kNumInterIntraModes); + prediction_mask = + kInterIntraMasks[prediction_parameters.inter_intra_mode] + [GetInterIntraMaskLookupIndex(prediction_width)] + [GetInterIntraMaskLookupIndex(prediction_height)]; + prediction_mask_stride = prediction_width; + } else if (prediction_parameters.compound_prediction_type == + kCompoundPredictionTypeDiffWeighted) { + if (plane == kPlaneY) { + assert(prediction_width >= 8); + assert(prediction_height >= 8); + dsp_.weight_mask[FloorLog2(prediction_width) - 3] + [FloorLog2(prediction_height) - 3] + [static_cast<int>(prediction_parameters.mask_is_inverse)]( + block.scratch_buffer->prediction_buffer[0], + block.scratch_buffer->prediction_buffer[1], + block.scratch_buffer->weight_mask, + kMaxSuperBlockSizeInPixels); + } + prediction_mask = block.scratch_buffer->weight_mask; + prediction_mask_stride = kMaxSuperBlockSizeInPixels; + } + + if (is_compound) { + CompoundInterPrediction(block, prediction_mask, prediction_mask_stride, + prediction_width, prediction_height, subsampling_x, + subsampling_y, candidate_row, candidate_column, + dest, dest_stride); + } else if (prediction_parameters.motion_mode == kMotionModeObmc) { + // Obmc mode is allowed only for single reference (!is_compound). + return ObmcPrediction(block, plane, prediction_width, prediction_height); + } else if (is_inter_intra) { + // InterIntra and obmc must be mutually exclusive. + InterIntraPrediction( + block.scratch_buffer->prediction_buffer[0], prediction_mask, + prediction_mask_stride, prediction_parameters, prediction_width, + prediction_height, subsampling_x, subsampling_y, dest, dest_stride); + } + return true; +} + +bool Tile::ObmcBlockPrediction(const Block& block, const MotionVector& mv, + const Plane plane, + const int reference_frame_index, const int width, + const int height, const int x, const int y, + const int candidate_row, + const int candidate_column, + const ObmcDirection blending_direction) { + const int bitdepth = sequence_header_.color_config.bitdepth; + // Obmc's prediction needs to be clipped before blending with above/left + // prediction blocks. + // Obmc prediction is used only when is_compound is false. So it is safe to + // use prediction_buffer[1] as a temporary buffer for the Obmc prediction. + static_assert(sizeof(block.scratch_buffer->prediction_buffer[1]) >= + 64 * 64 * sizeof(uint16_t), + ""); + auto* const obmc_buffer = + reinterpret_cast<uint8_t*>(block.scratch_buffer->prediction_buffer[1]); + const ptrdiff_t obmc_buffer_stride = + (bitdepth == 8) ? width : width * sizeof(uint16_t); + if (!BlockInterPrediction(block, plane, reference_frame_index, mv, x, y, + width, height, candidate_row, candidate_column, + nullptr, false, false, obmc_buffer, + obmc_buffer_stride)) { + return false; + } + + uint8_t* const prediction = GetStartPoint(buffer_, plane, x, y, bitdepth); + const ptrdiff_t prediction_stride = buffer_[plane].columns(); + dsp_.obmc_blend[blending_direction](prediction, prediction_stride, width, + height, obmc_buffer, obmc_buffer_stride); + return true; +} + +bool Tile::ObmcPrediction(const Block& block, const Plane plane, + const int width, const int height) { + const int subsampling_x = subsampling_x_[plane]; + const int subsampling_y = subsampling_y_[plane]; + if (block.top_available[kPlaneY] && + !IsBlockSmallerThan8x8(block.residual_size[plane])) { + const int num_limit = std::min(uint8_t{4}, k4x4WidthLog2[block.size]); + const int column4x4_max = + std::min(block.column4x4 + block.width4x4, frame_header_.columns4x4); + const int candidate_row = block.row4x4 - 1; + const int block_start_y = MultiplyBy4(block.row4x4) >> subsampling_y; + int column4x4 = block.column4x4; + const int prediction_height = std::min(height >> 1, 32 >> subsampling_y); + for (int i = 0, step; i < num_limit && column4x4 < column4x4_max; + column4x4 += step) { + const int candidate_column = column4x4 | 1; + const BlockParameters& bp_top = + *block_parameters_holder_.Find(candidate_row, candidate_column); + const int candidate_block_size = bp_top.size; + step = Clip3(kNum4x4BlocksWide[candidate_block_size], 2, 16); + if (bp_top.reference_frame[0] > kReferenceFrameIntra) { + i++; + const int candidate_reference_frame_index = + frame_header_.reference_frame_index[bp_top.reference_frame[0] - + kReferenceFrameLast]; + const int prediction_width = + std::min(width, MultiplyBy4(step) >> subsampling_x); + if (!ObmcBlockPrediction( + block, bp_top.mv.mv[0], plane, candidate_reference_frame_index, + prediction_width, prediction_height, + MultiplyBy4(column4x4) >> subsampling_x, block_start_y, + candidate_row, candidate_column, kObmcDirectionVertical)) { + return false; + } + } + } + } + + if (block.left_available[kPlaneY]) { + const int num_limit = std::min(uint8_t{4}, k4x4HeightLog2[block.size]); + const int row4x4_max = + std::min(block.row4x4 + block.height4x4, frame_header_.rows4x4); + const int candidate_column = block.column4x4 - 1; + int row4x4 = block.row4x4; + const int block_start_x = MultiplyBy4(block.column4x4) >> subsampling_x; + const int prediction_width = std::min(width >> 1, 32 >> subsampling_x); + for (int i = 0, step; i < num_limit && row4x4 < row4x4_max; + row4x4 += step) { + const int candidate_row = row4x4 | 1; + const BlockParameters& bp_left = + *block_parameters_holder_.Find(candidate_row, candidate_column); + const int candidate_block_size = bp_left.size; + step = Clip3(kNum4x4BlocksHigh[candidate_block_size], 2, 16); + if (bp_left.reference_frame[0] > kReferenceFrameIntra) { + i++; + const int candidate_reference_frame_index = + frame_header_.reference_frame_index[bp_left.reference_frame[0] - + kReferenceFrameLast]; + const int prediction_height = + std::min(height, MultiplyBy4(step) >> subsampling_y); + if (!ObmcBlockPrediction( + block, bp_left.mv.mv[0], plane, candidate_reference_frame_index, + prediction_width, prediction_height, block_start_x, + MultiplyBy4(row4x4) >> subsampling_y, candidate_row, + candidate_column, kObmcDirectionHorizontal)) { + return false; + } + } + } + } + return true; +} + +void Tile::DistanceWeightedPrediction(void* prediction_0, void* prediction_1, + const int width, const int height, + const int candidate_row, + const int candidate_column, uint8_t* dest, + ptrdiff_t dest_stride) { + int distance[2]; + int weight[2]; + for (int reference = 0; reference < 2; ++reference) { + const BlockParameters& bp = + *block_parameters_holder_.Find(candidate_row, candidate_column); + // Note: distance[0] and distance[1] correspond to relative distance + // between current frame and reference frame [1] and [0], respectively. + distance[1 - reference] = std::min( + std::abs(static_cast<int>( + current_frame_.reference_info() + ->relative_distance_from[bp.reference_frame[reference]])), + static_cast<int>(kMaxFrameDistance)); + } + GetDistanceWeights(distance, weight); + + dsp_.distance_weighted_blend(prediction_0, prediction_1, weight[0], weight[1], + width, height, dest, dest_stride); +} + +void Tile::ScaleMotionVector(const MotionVector& mv, const Plane plane, + const int reference_frame_index, const int x, + const int y, int* const start_x, + int* const start_y, int* const step_x, + int* const step_y) { + const int reference_upscaled_width = + (reference_frame_index == -1) + ? frame_header_.upscaled_width + : reference_frames_[reference_frame_index]->upscaled_width(); + const int reference_height = + (reference_frame_index == -1) + ? frame_header_.height + : reference_frames_[reference_frame_index]->frame_height(); + assert(2 * frame_header_.width >= reference_upscaled_width && + 2 * frame_header_.height >= reference_height && + frame_header_.width <= 16 * reference_upscaled_width && + frame_header_.height <= 16 * reference_height); + const bool is_scaled_x = reference_upscaled_width != frame_header_.width; + const bool is_scaled_y = reference_height != frame_header_.height; + const int half_sample = 1 << (kSubPixelBits - 1); + int orig_x = (x << kSubPixelBits) + ((2 * mv.mv[1]) >> subsampling_x_[plane]); + int orig_y = (y << kSubPixelBits) + ((2 * mv.mv[0]) >> subsampling_y_[plane]); + const int rounding_offset = + DivideBy2(1 << (kScaleSubPixelBits - kSubPixelBits)); + if (is_scaled_x) { + const int scale_x = ((reference_upscaled_width << kReferenceScaleShift) + + DivideBy2(frame_header_.width)) / + frame_header_.width; + *step_x = RightShiftWithRoundingSigned( + scale_x, kReferenceScaleShift - kScaleSubPixelBits); + orig_x += half_sample; + // When frame size is 4k and above, orig_x can be above 16 bits, scale_x can + // be up to 15 bits. So we use int64_t to hold base_x. + const int64_t base_x = static_cast<int64_t>(orig_x) * scale_x - + (half_sample << kReferenceScaleShift); + *start_x = + RightShiftWithRoundingSigned( + base_x, kReferenceScaleShift + kSubPixelBits - kScaleSubPixelBits) + + rounding_offset; + } else { + *step_x = 1 << kScaleSubPixelBits; + *start_x = LeftShift(orig_x, 6) + rounding_offset; + } + if (is_scaled_y) { + const int scale_y = ((reference_height << kReferenceScaleShift) + + DivideBy2(frame_header_.height)) / + frame_header_.height; + *step_y = RightShiftWithRoundingSigned( + scale_y, kReferenceScaleShift - kScaleSubPixelBits); + orig_y += half_sample; + const int64_t base_y = static_cast<int64_t>(orig_y) * scale_y - + (half_sample << kReferenceScaleShift); + *start_y = + RightShiftWithRoundingSigned( + base_y, kReferenceScaleShift + kSubPixelBits - kScaleSubPixelBits) + + rounding_offset; + } else { + *step_y = 1 << kScaleSubPixelBits; + *start_y = LeftShift(orig_y, 6) + rounding_offset; + } +} + +// static. +bool Tile::GetReferenceBlockPosition( + const int reference_frame_index, const bool is_scaled, const int width, + const int height, const int ref_start_x, const int ref_last_x, + const int ref_start_y, const int ref_last_y, const int start_x, + const int start_y, const int step_x, const int step_y, + const int left_border, const int right_border, const int top_border, + const int bottom_border, int* ref_block_start_x, int* ref_block_start_y, + int* ref_block_end_x) { + *ref_block_start_x = GetPixelPositionFromHighScale(start_x, 0, 0); + *ref_block_start_y = GetPixelPositionFromHighScale(start_y, 0, 0); + if (reference_frame_index == -1) { + return false; + } + *ref_block_start_x -= kConvolveBorderLeftTop; + *ref_block_start_y -= kConvolveBorderLeftTop; + *ref_block_end_x = GetPixelPositionFromHighScale(start_x, step_x, width - 1) + + kConvolveBorderRight; + int ref_block_end_y = + GetPixelPositionFromHighScale(start_y, step_y, height - 1) + + kConvolveBorderBottom; + if (is_scaled) { + const int block_height = + (((height - 1) * step_y + (1 << kScaleSubPixelBits) - 1) >> + kScaleSubPixelBits) + + kSubPixelTaps; + ref_block_end_y = *ref_block_start_y + block_height - 1; + } + // Determines if we need to extend beyond the left/right/top/bottom border. + return *ref_block_start_x < (ref_start_x - left_border) || + *ref_block_end_x > (ref_last_x + right_border) || + *ref_block_start_y < (ref_start_y - top_border) || + ref_block_end_y > (ref_last_y + bottom_border); +} + +// Builds a block as the input for convolve, by copying the content of +// reference frame (either a decoded reference frame, or current frame). +// |block_extended_width| is the combined width of the block and its borders. +template <typename Pixel> +void Tile::BuildConvolveBlock( + const Plane plane, const int reference_frame_index, const bool is_scaled, + const int height, const int ref_start_x, const int ref_last_x, + const int ref_start_y, const int ref_last_y, const int step_y, + const int ref_block_start_x, const int ref_block_end_x, + const int ref_block_start_y, uint8_t* block_buffer, + ptrdiff_t convolve_buffer_stride, ptrdiff_t block_extended_width) { + const YuvBuffer* const reference_buffer = + (reference_frame_index == -1) + ? current_frame_.buffer() + : reference_frames_[reference_frame_index]->buffer(); + Array2DView<const Pixel> reference_block( + reference_buffer->height(plane), + reference_buffer->stride(plane) / sizeof(Pixel), + reinterpret_cast<const Pixel*>(reference_buffer->data(plane))); + auto* const block_head = reinterpret_cast<Pixel*>(block_buffer); + convolve_buffer_stride /= sizeof(Pixel); + int block_height = height + kConvolveBorderLeftTop + kConvolveBorderBottom; + if (is_scaled) { + block_height = (((height - 1) * step_y + (1 << kScaleSubPixelBits) - 1) >> + kScaleSubPixelBits) + + kSubPixelTaps; + } + const int copy_start_x = Clip3(ref_block_start_x, ref_start_x, ref_last_x); + const int copy_start_y = Clip3(ref_block_start_y, ref_start_y, ref_last_y); + const int copy_end_x = Clip3(ref_block_end_x, copy_start_x, ref_last_x); + const int block_width = copy_end_x - copy_start_x + 1; + const bool extend_left = ref_block_start_x < ref_start_x; + const bool extend_right = ref_block_end_x > ref_last_x; + const bool out_of_left = copy_start_x > ref_block_end_x; + const bool out_of_right = copy_end_x < ref_block_start_x; + if (out_of_left || out_of_right) { + const int ref_x = out_of_left ? copy_start_x : copy_end_x; + Pixel* buf_ptr = block_head; + for (int y = 0, ref_y = copy_start_y; y < block_height; ++y) { + Memset(buf_ptr, reference_block[ref_y][ref_x], block_extended_width); + if (ref_block_start_y + y >= ref_start_y && + ref_block_start_y + y < ref_last_y) { + ++ref_y; + } + buf_ptr += convolve_buffer_stride; + } + } else { + Pixel* buf_ptr = block_head; + const int left_width = copy_start_x - ref_block_start_x; + for (int y = 0, ref_y = copy_start_y; y < block_height; ++y) { + if (extend_left) { + Memset(buf_ptr, reference_block[ref_y][copy_start_x], left_width); + } + memcpy(buf_ptr + left_width, &reference_block[ref_y][copy_start_x], + block_width * sizeof(Pixel)); + if (extend_right) { + Memset(buf_ptr + left_width + block_width, + reference_block[ref_y][copy_end_x], + block_extended_width - left_width - block_width); + } + if (ref_block_start_y + y >= ref_start_y && + ref_block_start_y + y < ref_last_y) { + ++ref_y; + } + buf_ptr += convolve_buffer_stride; + } + } +} + +bool Tile::BlockInterPrediction( + const Block& block, const Plane plane, const int reference_frame_index, + const MotionVector& mv, const int x, const int y, const int width, + const int height, const int candidate_row, const int candidate_column, + uint16_t* const prediction, const bool is_compound, + const bool is_inter_intra, uint8_t* const dest, + const ptrdiff_t dest_stride) { + const BlockParameters& bp = + *block_parameters_holder_.Find(candidate_row, candidate_column); + int start_x; + int start_y; + int step_x; + int step_y; + ScaleMotionVector(mv, plane, reference_frame_index, x, y, &start_x, &start_y, + &step_x, &step_y); + const int horizontal_filter_index = bp.interpolation_filter[1]; + const int vertical_filter_index = bp.interpolation_filter[0]; + const int subsampling_x = subsampling_x_[plane]; + const int subsampling_y = subsampling_y_[plane]; + // reference_frame_index equal to -1 indicates using current frame as + // reference. + const YuvBuffer* const reference_buffer = + (reference_frame_index == -1) + ? current_frame_.buffer() + : reference_frames_[reference_frame_index]->buffer(); + const int reference_upscaled_width = + (reference_frame_index == -1) + ? MultiplyBy4(frame_header_.columns4x4) + : reference_frames_[reference_frame_index]->upscaled_width(); + const int reference_height = + (reference_frame_index == -1) + ? MultiplyBy4(frame_header_.rows4x4) + : reference_frames_[reference_frame_index]->frame_height(); + const int ref_start_x = 0; + const int ref_last_x = + SubsampledValue(reference_upscaled_width, subsampling_x) - 1; + const int ref_start_y = 0; + const int ref_last_y = SubsampledValue(reference_height, subsampling_y) - 1; + + const bool is_scaled = (reference_frame_index != -1) && + (frame_header_.width != reference_upscaled_width || + frame_header_.height != reference_height); + const int bitdepth = sequence_header_.color_config.bitdepth; + const int pixel_size = (bitdepth == 8) ? sizeof(uint8_t) : sizeof(uint16_t); + int ref_block_start_x; + int ref_block_start_y; + int ref_block_end_x; + const bool extend_block = GetReferenceBlockPosition( + reference_frame_index, is_scaled, width, height, ref_start_x, ref_last_x, + ref_start_y, ref_last_y, start_x, start_y, step_x, step_y, + reference_buffer->left_border(plane), + reference_buffer->right_border(plane), + reference_buffer->top_border(plane), + reference_buffer->bottom_border(plane), &ref_block_start_x, + &ref_block_start_y, &ref_block_end_x); + + // In frame parallel mode, ensure that the reference block has been decoded + // and available for referencing. + if (reference_frame_index != -1 && frame_parallel_) { + int reference_y_max; + if (is_scaled) { + // TODO(vigneshv): For now, we wait for the entire reference frame to be + // decoded if we are using scaled references. This will eventually be + // fixed. + reference_y_max = reference_height; + } else { + reference_y_max = + std::min(ref_block_start_y + height + kSubPixelTaps, ref_last_y); + // For U and V planes with subsampling, we need to multiply + // reference_y_max by 2 since we only track the progress of Y planes. + reference_y_max = LeftShift(reference_y_max, subsampling_y); + } + if (reference_frame_progress_cache_[reference_frame_index] < + reference_y_max && + !reference_frames_[reference_frame_index]->WaitUntil( + reference_y_max, + &reference_frame_progress_cache_[reference_frame_index])) { + return false; + } + } + + const uint8_t* block_start = nullptr; + ptrdiff_t convolve_buffer_stride; + if (!extend_block) { + const YuvBuffer* const reference_buffer = + (reference_frame_index == -1) + ? current_frame_.buffer() + : reference_frames_[reference_frame_index]->buffer(); + convolve_buffer_stride = reference_buffer->stride(plane); + if (reference_frame_index == -1 || is_scaled) { + block_start = reference_buffer->data(plane) + + ref_block_start_y * reference_buffer->stride(plane) + + ref_block_start_x * pixel_size; + } else { + block_start = reference_buffer->data(plane) + + (ref_block_start_y + kConvolveBorderLeftTop) * + reference_buffer->stride(plane) + + (ref_block_start_x + kConvolveBorderLeftTop) * pixel_size; + } + } else { + // The block width can be at most 2 times as much as current + // block's width because of scaling. + auto block_extended_width = Align<ptrdiff_t>( + (2 * width + kConvolveBorderLeftTop + kConvolveBorderRight) * + pixel_size, + kMaxAlignment); + convolve_buffer_stride = block.scratch_buffer->convolve_block_buffer_stride; +#if LIBGAV1_MAX_BITDEPTH >= 10 + if (bitdepth > 8) { + BuildConvolveBlock<uint16_t>( + plane, reference_frame_index, is_scaled, height, ref_start_x, + ref_last_x, ref_start_y, ref_last_y, step_y, ref_block_start_x, + ref_block_end_x, ref_block_start_y, + block.scratch_buffer->convolve_block_buffer.get(), + convolve_buffer_stride, block_extended_width); + } else { +#endif + BuildConvolveBlock<uint8_t>( + plane, reference_frame_index, is_scaled, height, ref_start_x, + ref_last_x, ref_start_y, ref_last_y, step_y, ref_block_start_x, + ref_block_end_x, ref_block_start_y, + block.scratch_buffer->convolve_block_buffer.get(), + convolve_buffer_stride, block_extended_width); +#if LIBGAV1_MAX_BITDEPTH >= 10 + } +#endif + block_start = block.scratch_buffer->convolve_block_buffer.get() + + (is_scaled ? 0 + : kConvolveBorderLeftTop * convolve_buffer_stride + + kConvolveBorderLeftTop * pixel_size); + } + + void* const output = + (is_compound || is_inter_intra) ? prediction : static_cast<void*>(dest); + ptrdiff_t output_stride = (is_compound || is_inter_intra) + ? /*prediction_stride=*/width + : dest_stride; +#if LIBGAV1_MAX_BITDEPTH >= 10 + // |is_inter_intra| calculations are written to the |prediction| buffer. + // Unlike the |is_compound| calculations the output is Pixel and not uint16_t. + // convolve_func() expects |output_stride| to be in bytes and not Pixels. + // |prediction_stride| is in units of uint16_t. Adjust |output_stride| to + // account for this. + if (is_inter_intra && sequence_header_.color_config.bitdepth > 8) { + output_stride *= 2; + } +#endif + assert(output != nullptr); + if (is_scaled) { + dsp::ConvolveScaleFunc convolve_func = dsp_.convolve_scale[is_compound]; + assert(convolve_func != nullptr); + + convolve_func(block_start, convolve_buffer_stride, horizontal_filter_index, + vertical_filter_index, start_x, start_y, step_x, step_y, + width, height, output, output_stride); + } else { + const int horizontal_filter_id = (start_x >> 6) & kSubPixelMask; + const int vertical_filter_id = (start_y >> 6) & kSubPixelMask; + + dsp::ConvolveFunc convolve_func = + dsp_.convolve[reference_frame_index == -1][is_compound] + [vertical_filter_id != 0][horizontal_filter_id != 0]; + assert(convolve_func != nullptr); + + convolve_func(block_start, convolve_buffer_stride, horizontal_filter_index, + vertical_filter_index, horizontal_filter_id, + vertical_filter_id, width, height, output, output_stride); + } + return true; +} + +bool Tile::BlockWarpProcess(const Block& block, const Plane plane, + const int index, const int block_start_x, + const int block_start_y, const int width, + const int height, GlobalMotion* const warp_params, + const bool is_compound, const bool is_inter_intra, + uint8_t* const dest, const ptrdiff_t dest_stride) { + assert(width >= 8 && height >= 8); + const BlockParameters& bp = *block.bp; + const int reference_frame_index = + frame_header_.reference_frame_index[bp.reference_frame[index] - + kReferenceFrameLast]; + const uint8_t* const source = + reference_frames_[reference_frame_index]->buffer()->data(plane); + ptrdiff_t source_stride = + reference_frames_[reference_frame_index]->buffer()->stride(plane); + const int source_width = + reference_frames_[reference_frame_index]->buffer()->width(plane); + const int source_height = + reference_frames_[reference_frame_index]->buffer()->height(plane); + uint16_t* const prediction = block.scratch_buffer->prediction_buffer[index]; + + // In frame parallel mode, ensure that the reference block has been decoded + // and available for referencing. + if (frame_parallel_) { + int reference_y_max = -1; + // Find out the maximum y-coordinate for warping. + for (int start_y = block_start_y; start_y < block_start_y + height; + start_y += 8) { + for (int start_x = block_start_x; start_x < block_start_x + width; + start_x += 8) { + const int src_x = (start_x + 4) << subsampling_x_[plane]; + const int src_y = (start_y + 4) << subsampling_y_[plane]; + const int dst_y = src_x * warp_params->params[4] + + src_y * warp_params->params[5] + + warp_params->params[1]; + const int y4 = dst_y >> subsampling_y_[plane]; + const int iy4 = y4 >> kWarpedModelPrecisionBits; + reference_y_max = std::max(iy4 + 8, reference_y_max); + } + } + // For U and V planes with subsampling, we need to multiply reference_y_max + // by 2 since we only track the progress of Y planes. + reference_y_max = LeftShift(reference_y_max, subsampling_y_[plane]); + if (reference_frame_progress_cache_[reference_frame_index] < + reference_y_max && + !reference_frames_[reference_frame_index]->WaitUntil( + reference_y_max, + &reference_frame_progress_cache_[reference_frame_index])) { + return false; + } + } + if (is_compound) { + dsp_.warp_compound(source, source_stride, source_width, source_height, + warp_params->params, subsampling_x_[plane], + subsampling_y_[plane], block_start_x, block_start_y, + width, height, warp_params->alpha, warp_params->beta, + warp_params->gamma, warp_params->delta, prediction, + /*prediction_stride=*/width); + } else { + void* const output = is_inter_intra ? static_cast<void*>(prediction) : dest; + ptrdiff_t output_stride = + is_inter_intra ? /*prediction_stride=*/width : dest_stride; +#if LIBGAV1_MAX_BITDEPTH >= 10 + // |is_inter_intra| calculations are written to the |prediction| buffer. + // Unlike the |is_compound| calculations the output is Pixel and not + // uint16_t. warp_clip() expects |output_stride| to be in bytes and not + // Pixels. |prediction_stride| is in units of uint16_t. Adjust + // |output_stride| to account for this. + if (is_inter_intra && sequence_header_.color_config.bitdepth > 8) { + output_stride *= 2; + } +#endif + dsp_.warp(source, source_stride, source_width, source_height, + warp_params->params, subsampling_x_[plane], subsampling_y_[plane], + block_start_x, block_start_y, width, height, warp_params->alpha, + warp_params->beta, warp_params->gamma, warp_params->delta, output, + output_stride); + } + return true; +} + +} // namespace libgav1 diff --git a/src/tile/tile.cc b/src/tile/tile.cc new file mode 100644 index 0000000..ee48f17 --- /dev/null +++ b/src/tile/tile.cc @@ -0,0 +1,2573 @@ +// Copyright 2019 The libgav1 Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "src/tile.h" + +#include <algorithm> +#include <array> +#include <cassert> +#include <climits> +#include <cstdlib> +#include <cstring> +#include <memory> +#include <new> +#include <numeric> +#include <type_traits> +#include <utility> + +#include "src/frame_scratch_buffer.h" +#include "src/motion_vector.h" +#include "src/reconstruction.h" +#include "src/utils/bit_mask_set.h" +#include "src/utils/common.h" +#include "src/utils/constants.h" +#include "src/utils/logging.h" +#include "src/utils/segmentation.h" +#include "src/utils/stack.h" + +namespace libgav1 { +namespace { + +// Import all the constants in the anonymous namespace. +#include "src/scan_tables.inc" + +// Range above kNumQuantizerBaseLevels which the exponential golomb coding +// process is activated. +constexpr int kQuantizerCoefficientBaseRange = 12; +constexpr int kNumQuantizerBaseLevels = 2; +constexpr int kCoeffBaseRangeMaxIterations = + kQuantizerCoefficientBaseRange / (kCoeffBaseRangeSymbolCount - 1); +constexpr int kEntropyContextLeft = 0; +constexpr int kEntropyContextTop = 1; + +constexpr uint8_t kAllZeroContextsByTopLeft[5][5] = {{1, 2, 2, 2, 3}, + {2, 4, 4, 4, 5}, + {2, 4, 4, 4, 5}, + {2, 4, 4, 4, 5}, + {3, 5, 5, 5, 6}}; + +// The space complexity of DFS is O(branching_factor * max_depth). For the +// parameter tree, branching_factor = 4 (there could be up to 4 children for +// every node) and max_depth (excluding the root) = 5 (to go from a 128x128 +// block all the way to a 4x4 block). The worse-case stack size is 16, by +// counting the number of 'o' nodes in the diagram: +// +// | 128x128 The highest level (corresponding to the +// | root of the tree) has no node in the stack. +// |-----------------+ +// | | | | +// | o o o 64x64 +// | +// |-----------------+ +// | | | | +// | o o o 32x32 Higher levels have three nodes in the stack, +// | because we pop one node off the stack before +// |-----------------+ pushing its four children onto the stack. +// | | | | +// | o o o 16x16 +// | +// |-----------------+ +// | | | | +// | o o o 8x8 +// | +// |-----------------+ +// | | | | +// o o o o 4x4 Only the lowest level has four nodes in the +// stack. +constexpr int kDfsStackSize = 16; + +// Mask indicating whether the transform sets contain a particular transform +// type. If |tx_type| is present in |tx_set|, then the |tx_type|th LSB is set. +constexpr BitMaskSet kTransformTypeInSetMask[kNumTransformSets] = { + BitMaskSet(0x1), BitMaskSet(0xE0F), BitMaskSet(0x20F), + BitMaskSet(0xFFFF), BitMaskSet(0xFFF), BitMaskSet(0x201)}; + +constexpr PredictionMode + kFilterIntraModeToIntraPredictor[kNumFilterIntraPredictors] = { + kPredictionModeDc, kPredictionModeVertical, kPredictionModeHorizontal, + kPredictionModeD157, kPredictionModeDc}; + +// Mask used to determine the index for mode_deltas lookup. +constexpr BitMaskSet kPredictionModeDeltasMask( + kPredictionModeNearestMv, kPredictionModeNearMv, kPredictionModeNewMv, + kPredictionModeNearestNearestMv, kPredictionModeNearNearMv, + kPredictionModeNearestNewMv, kPredictionModeNewNearestMv, + kPredictionModeNearNewMv, kPredictionModeNewNearMv, + kPredictionModeNewNewMv); + +// This is computed as: +// min(transform_width_log2, 5) + min(transform_height_log2, 5) - 4. +constexpr uint8_t kEobMultiSizeLookup[kNumTransformSizes] = { + 0, 1, 2, 1, 2, 3, 4, 2, 3, 4, 5, 5, 4, 5, 6, 6, 5, 6, 6}; + +/* clang-format off */ +constexpr uint8_t kCoeffBaseContextOffset[kNumTransformSizes][5][5] = { + {{0, 1, 6, 6, 0}, {1, 6, 6, 21, 0}, {6, 6, 21, 21, 0}, {6, 21, 21, 21, 0}, + {0, 0, 0, 0, 0}}, + {{0, 11, 11, 11, 0}, {11, 11, 11, 11, 0}, {6, 6, 21, 21, 0}, + {6, 21, 21, 21, 0}, {21, 21, 21, 21, 0}}, + {{0, 11, 11, 11, 0}, {11, 11, 11, 11, 0}, {6, 6, 21, 21, 0}, + {6, 21, 21, 21, 0}, {21, 21, 21, 21, 0}}, + {{0, 16, 6, 6, 21}, {16, 16, 6, 21, 21}, {16, 16, 21, 21, 21}, + {16, 16, 21, 21, 21}, {0, 0, 0, 0, 0}}, + {{0, 1, 6, 6, 21}, {1, 6, 6, 21, 21}, {6, 6, 21, 21, 21}, + {6, 21, 21, 21, 21}, {21, 21, 21, 21, 21}}, + {{0, 11, 11, 11, 11}, {11, 11, 11, 11, 11}, {6, 6, 21, 21, 21}, + {6, 21, 21, 21, 21}, {21, 21, 21, 21, 21}}, + {{0, 11, 11, 11, 11}, {11, 11, 11, 11, 11}, {6, 6, 21, 21, 21}, + {6, 21, 21, 21, 21}, {21, 21, 21, 21, 21}}, + {{0, 16, 6, 6, 21}, {16, 16, 6, 21, 21}, {16, 16, 21, 21, 21}, + {16, 16, 21, 21, 21}, {0, 0, 0, 0, 0}}, + {{0, 16, 6, 6, 21}, {16, 16, 6, 21, 21}, {16, 16, 21, 21, 21}, + {16, 16, 21, 21, 21}, {16, 16, 21, 21, 21}}, + {{0, 1, 6, 6, 21}, {1, 6, 6, 21, 21}, {6, 6, 21, 21, 21}, + {6, 21, 21, 21, 21}, {21, 21, 21, 21, 21}}, + {{0, 11, 11, 11, 11}, {11, 11, 11, 11, 11}, {6, 6, 21, 21, 21}, + {6, 21, 21, 21, 21}, {21, 21, 21, 21, 21}}, + {{0, 11, 11, 11, 11}, {11, 11, 11, 11, 11}, {6, 6, 21, 21, 21}, + {6, 21, 21, 21, 21}, {21, 21, 21, 21, 21}}, + {{0, 16, 6, 6, 21}, {16, 16, 6, 21, 21}, {16, 16, 21, 21, 21}, + {16, 16, 21, 21, 21}, {16, 16, 21, 21, 21}}, + {{0, 16, 6, 6, 21}, {16, 16, 6, 21, 21}, {16, 16, 21, 21, 21}, + {16, 16, 21, 21, 21}, {16, 16, 21, 21, 21}}, + {{0, 1, 6, 6, 21}, {1, 6, 6, 21, 21}, {6, 6, 21, 21, 21}, + {6, 21, 21, 21, 21}, {21, 21, 21, 21, 21}}, + {{0, 11, 11, 11, 11}, {11, 11, 11, 11, 11}, {6, 6, 21, 21, 21}, + {6, 21, 21, 21, 21}, {21, 21, 21, 21, 21}}, + {{0, 16, 6, 6, 21}, {16, 16, 6, 21, 21}, {16, 16, 21, 21, 21}, + {16, 16, 21, 21, 21}, {16, 16, 21, 21, 21}}, + {{0, 16, 6, 6, 21}, {16, 16, 6, 21, 21}, {16, 16, 21, 21, 21}, + {16, 16, 21, 21, 21}, {16, 16, 21, 21, 21}}, + {{0, 1, 6, 6, 21}, {1, 6, 6, 21, 21}, {6, 6, 21, 21, 21}, + {6, 21, 21, 21, 21}, {21, 21, 21, 21, 21}}}; +/* clang-format on */ + +// Extended the table size from 3 to 16 by repeating the last element to avoid +// the clips to row or column indices. +constexpr uint8_t kCoeffBasePositionContextOffset[16] = { + 26, 31, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36}; + +constexpr PredictionMode kInterIntraToIntraMode[kNumInterIntraModes] = { + kPredictionModeDc, kPredictionModeVertical, kPredictionModeHorizontal, + kPredictionModeSmooth}; + +// Number of horizontal luma samples before intra block copy can be used. +constexpr int kIntraBlockCopyDelayPixels = 256; +// Number of 64 by 64 blocks before intra block copy can be used. +constexpr int kIntraBlockCopyDelay64x64Blocks = kIntraBlockCopyDelayPixels / 64; + +// Index [i][j] corresponds to the transform size of width 1 << (i + 2) and +// height 1 << (j + 2). +constexpr TransformSize k4x4SizeToTransformSize[5][5] = { + {kTransformSize4x4, kTransformSize4x8, kTransformSize4x16, + kNumTransformSizes, kNumTransformSizes}, + {kTransformSize8x4, kTransformSize8x8, kTransformSize8x16, + kTransformSize8x32, kNumTransformSizes}, + {kTransformSize16x4, kTransformSize16x8, kTransformSize16x16, + kTransformSize16x32, kTransformSize16x64}, + {kNumTransformSizes, kTransformSize32x8, kTransformSize32x16, + kTransformSize32x32, kTransformSize32x64}, + {kNumTransformSizes, kNumTransformSizes, kTransformSize64x16, + kTransformSize64x32, kTransformSize64x64}}; + +// Defined in section 9.3 of the spec. +constexpr TransformType kModeToTransformType[kIntraPredictionModesUV] = { + kTransformTypeDctDct, kTransformTypeDctAdst, kTransformTypeAdstDct, + kTransformTypeDctDct, kTransformTypeAdstAdst, kTransformTypeDctAdst, + kTransformTypeAdstDct, kTransformTypeAdstDct, kTransformTypeDctAdst, + kTransformTypeAdstAdst, kTransformTypeDctAdst, kTransformTypeAdstDct, + kTransformTypeAdstAdst, kTransformTypeDctDct}; + +// Defined in section 5.11.47 of the spec. This array does not contain an entry +// for kTransformSetDctOnly, so the first dimension needs to be +// |kNumTransformSets| - 1. +constexpr TransformType kInverseTransformTypeBySet[kNumTransformSets - 1][16] = + {{kTransformTypeIdentityIdentity, kTransformTypeDctDct, + kTransformTypeIdentityDct, kTransformTypeDctIdentity, + kTransformTypeAdstAdst, kTransformTypeDctAdst, kTransformTypeAdstDct}, + {kTransformTypeIdentityIdentity, kTransformTypeDctDct, + kTransformTypeAdstAdst, kTransformTypeDctAdst, kTransformTypeAdstDct}, + {kTransformTypeIdentityIdentity, kTransformTypeIdentityDct, + kTransformTypeDctIdentity, kTransformTypeIdentityAdst, + kTransformTypeAdstIdentity, kTransformTypeIdentityFlipadst, + kTransformTypeFlipadstIdentity, kTransformTypeDctDct, + kTransformTypeDctAdst, kTransformTypeAdstDct, kTransformTypeDctFlipadst, + kTransformTypeFlipadstDct, kTransformTypeAdstAdst, + kTransformTypeFlipadstFlipadst, kTransformTypeFlipadstAdst, + kTransformTypeAdstFlipadst}, + {kTransformTypeIdentityIdentity, kTransformTypeIdentityDct, + kTransformTypeDctIdentity, kTransformTypeDctDct, kTransformTypeDctAdst, + kTransformTypeAdstDct, kTransformTypeDctFlipadst, + kTransformTypeFlipadstDct, kTransformTypeAdstAdst, + kTransformTypeFlipadstFlipadst, kTransformTypeFlipadstAdst, + kTransformTypeAdstFlipadst}, + {kTransformTypeIdentityIdentity, kTransformTypeDctDct}}; + +// Replaces all occurrences of 64x* and *x64 with 32x* and *x32 respectively. +constexpr TransformSize kAdjustedTransformSize[kNumTransformSizes] = { + kTransformSize4x4, kTransformSize4x8, kTransformSize4x16, + kTransformSize8x4, kTransformSize8x8, kTransformSize8x16, + kTransformSize8x32, kTransformSize16x4, kTransformSize16x8, + kTransformSize16x16, kTransformSize16x32, kTransformSize16x32, + kTransformSize32x8, kTransformSize32x16, kTransformSize32x32, + kTransformSize32x32, kTransformSize32x16, kTransformSize32x32, + kTransformSize32x32}; + +// This is the same as Max_Tx_Size_Rect array in the spec but with *x64 and 64*x +// transforms replaced with *x32 and 32x* respectively. +constexpr TransformSize kUVTransformSize[kMaxBlockSizes] = { + kTransformSize4x4, kTransformSize4x8, kTransformSize4x16, + kTransformSize8x4, kTransformSize8x8, kTransformSize8x16, + kTransformSize8x32, kTransformSize16x4, kTransformSize16x8, + kTransformSize16x16, kTransformSize16x32, kTransformSize16x32, + kTransformSize32x8, kTransformSize32x16, kTransformSize32x32, + kTransformSize32x32, kTransformSize32x16, kTransformSize32x32, + kTransformSize32x32, kTransformSize32x32, kTransformSize32x32, + kTransformSize32x32}; + +// ith entry of this array is computed as: +// DivideBy2(TransformSizeToSquareTransformIndex(kTransformSizeSquareMin[i]) + +// TransformSizeToSquareTransformIndex(kTransformSizeSquareMax[i]) + +// 1) +constexpr uint8_t kTransformSizeContext[kNumTransformSizes] = { + 0, 1, 1, 1, 1, 2, 2, 1, 2, 2, 3, 3, 2, 3, 3, 4, 3, 4, 4}; + +constexpr int8_t kSgrProjDefaultMultiplier[2] = {-32, 31}; + +constexpr int8_t kWienerDefaultFilter[kNumWienerCoefficients] = {3, -7, 15}; + +// Maps compound prediction modes into single modes. For e.g. +// kPredictionModeNearestNewMv will map to kPredictionModeNearestMv for index 0 +// and kPredictionModeNewMv for index 1. It is used to simplify the logic in +// AssignMv (and avoid duplicate code). This is section 5.11.30. in the spec. +constexpr PredictionMode + kCompoundToSinglePredictionMode[kNumCompoundInterPredictionModes][2] = { + {kPredictionModeNearestMv, kPredictionModeNearestMv}, + {kPredictionModeNearMv, kPredictionModeNearMv}, + {kPredictionModeNearestMv, kPredictionModeNewMv}, + {kPredictionModeNewMv, kPredictionModeNearestMv}, + {kPredictionModeNearMv, kPredictionModeNewMv}, + {kPredictionModeNewMv, kPredictionModeNearMv}, + {kPredictionModeGlobalMv, kPredictionModeGlobalMv}, + {kPredictionModeNewMv, kPredictionModeNewMv}, +}; +PredictionMode GetSinglePredictionMode(int index, PredictionMode y_mode) { + if (y_mode < kPredictionModeNearestNearestMv) { + return y_mode; + } + const int lookup_index = y_mode - kPredictionModeNearestNearestMv; + assert(lookup_index >= 0); + return kCompoundToSinglePredictionMode[lookup_index][index]; +} + +// log2(dqDenom) in section 7.12.3 of the spec. We use the log2 value because +// dqDenom is always a power of two and hence right shift can be used instead of +// division. +constexpr uint8_t kQuantizationShift[kNumTransformSizes] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 2, 1, 2, 2}; + +// Returns the minimum of |length| or |max|-|start|. This is used to clamp array +// indices when accessing arrays whose bound is equal to |max|. +int GetNumElements(int length, int start, int max) { + return std::min(length, max - start); +} + +template <typename T> +void SetBlockValues(int rows, int columns, T value, T* dst, ptrdiff_t stride) { + // Specialize all columns cases (values in kTransformWidth4x4[]) for better + // performance. + switch (columns) { + case 1: + MemSetBlock<T>(rows, 1, value, dst, stride); + break; + case 2: + MemSetBlock<T>(rows, 2, value, dst, stride); + break; + case 4: + MemSetBlock<T>(rows, 4, value, dst, stride); + break; + case 8: + MemSetBlock<T>(rows, 8, value, dst, stride); + break; + default: + assert(columns == 16); + MemSetBlock<T>(rows, 16, value, dst, stride); + break; + } +} + +void SetTransformType(const Tile::Block& block, int x4, int y4, int w4, int h4, + TransformType tx_type, + TransformType transform_types[32][32]) { + const int y_offset = y4 - block.row4x4; + const int x_offset = x4 - block.column4x4; + TransformType* const dst = &transform_types[y_offset][x_offset]; + SetBlockValues<TransformType>(h4, w4, tx_type, dst, 32); +} + +void StoreMotionFieldMvs(ReferenceFrameType reference_frame_to_store, + const MotionVector& mv_to_store, ptrdiff_t stride, + int rows, int columns, + ReferenceFrameType* reference_frame_row_start, + MotionVector* mv) { + static_assert(sizeof(*reference_frame_row_start) == sizeof(int8_t), ""); + do { + // Don't switch the following two memory setting functions. + // Some ARM CPUs are quite sensitive to the order. + memset(reference_frame_row_start, reference_frame_to_store, columns); + std::fill(mv, mv + columns, mv_to_store); + reference_frame_row_start += stride; + mv += stride; + } while (--rows != 0); +} + +// Inverse transform process assumes that the quantized coefficients are stored +// as a virtual 2d array of size |tx_width| x tx_height. If transform width is +// 64, then this assumption is broken because the scan order used for populating +// the coefficients for such transforms is the same as the one used for +// corresponding transform with width 32 (e.g. the scan order used for 64x16 is +// the same as the one used for 32x16). So we must restore the coefficients to +// their correct positions and clean the positions they occupied. +template <typename ResidualType> +void MoveCoefficientsForTxWidth64(int clamped_tx_height, int tx_width, + ResidualType* residual) { + if (tx_width != 64) return; + const int rows = clamped_tx_height - 2; + auto* src = residual + 32 * rows; + residual += 64 * rows; + // Process 2 rows in each loop in reverse order to avoid overwrite. + int x = rows >> 1; + do { + // The 2 rows can be processed in order. + memcpy(residual, src, 32 * sizeof(src[0])); + memcpy(residual + 64, src + 32, 32 * sizeof(src[0])); + memset(src + 32, 0, 32 * sizeof(src[0])); + src -= 64; + residual -= 128; + } while (--x); + // Process the second row. The first row is already correct. + memcpy(residual + 64, src + 32, 32 * sizeof(src[0])); + memset(src + 32, 0, 32 * sizeof(src[0])); +} + +void GetClampParameters(const Tile::Block& block, int min[2], int max[2]) { + // 7.10.2.14 (part 1). (also contains implementations of 5.11.53 + // and 5.11.54). + constexpr int kMvBorder4x4 = 4; + const int row_border = kMvBorder4x4 + block.height4x4; + const int column_border = kMvBorder4x4 + block.width4x4; + const int macroblocks_to_top_edge = -block.row4x4; + const int macroblocks_to_bottom_edge = + block.tile.frame_header().rows4x4 - block.height4x4 - block.row4x4; + const int macroblocks_to_left_edge = -block.column4x4; + const int macroblocks_to_right_edge = + block.tile.frame_header().columns4x4 - block.width4x4 - block.column4x4; + min[0] = MultiplyBy32(macroblocks_to_top_edge - row_border); + min[1] = MultiplyBy32(macroblocks_to_left_edge - column_border); + max[0] = MultiplyBy32(macroblocks_to_bottom_edge + row_border); + max[1] = MultiplyBy32(macroblocks_to_right_edge + column_border); +} + +// Section 8.3.2 in the spec, under coeff_base_eob. +int GetCoeffBaseContextEob(TransformSize tx_size, int index) { + if (index == 0) return 0; + const TransformSize adjusted_tx_size = kAdjustedTransformSize[tx_size]; + const int tx_width_log2 = kTransformWidthLog2[adjusted_tx_size]; + const int tx_height = kTransformHeight[adjusted_tx_size]; + if (index <= DivideBy8(tx_height << tx_width_log2)) return 1; + if (index <= DivideBy4(tx_height << tx_width_log2)) return 2; + return 3; +} + +// Section 8.3.2 in the spec, under coeff_br. Optimized for end of block based +// on the fact that {0, 1}, {1, 0}, {1, 1}, {0, 2} and {2, 0} will all be 0 in +// the end of block case. +int GetCoeffBaseRangeContextEob(int adjusted_tx_width_log2, int pos, + TransformClass tx_class) { + if (pos == 0) return 0; + const int tx_width = 1 << adjusted_tx_width_log2; + const int row = pos >> adjusted_tx_width_log2; + const int column = pos & (tx_width - 1); + // This return statement is equivalent to: + // return ((tx_class == kTransformClass2D && (row | column) < 2) || + // (tx_class == kTransformClassHorizontal && column == 0) || + // (tx_class == kTransformClassVertical && row == 0)) + // ? 7 + // : 14; + return 14 >> ((static_cast<int>(tx_class == kTransformClass2D) & + static_cast<int>((row | column) < 2)) | + (tx_class & static_cast<int>(column == 0)) | + ((tx_class >> 1) & static_cast<int>(row == 0))); +} + +} // namespace + +Tile::Tile(int tile_number, const uint8_t* const data, size_t size, + const ObuSequenceHeader& sequence_header, + const ObuFrameHeader& frame_header, + RefCountedBuffer* const current_frame, const DecoderState& state, + FrameScratchBuffer* const frame_scratch_buffer, + const WedgeMaskArray& wedge_masks, + const QuantizerMatrix& quantizer_matrix, + SymbolDecoderContext* const saved_symbol_decoder_context, + const SegmentationMap* prev_segment_ids, + PostFilter* const post_filter, const dsp::Dsp* const dsp, + ThreadPool* const thread_pool, + BlockingCounterWithStatus* const pending_tiles, bool frame_parallel, + bool use_intra_prediction_buffer) + : number_(tile_number), + row_(number_ / frame_header.tile_info.tile_columns), + column_(number_ % frame_header.tile_info.tile_columns), + data_(data), + size_(size), + read_deltas_(false), + subsampling_x_{0, sequence_header.color_config.subsampling_x, + sequence_header.color_config.subsampling_x}, + subsampling_y_{0, sequence_header.color_config.subsampling_y, + sequence_header.color_config.subsampling_y}, + current_quantizer_index_(frame_header.quantizer.base_index), + sequence_header_(sequence_header), + frame_header_(frame_header), + reference_frame_sign_bias_(state.reference_frame_sign_bias), + reference_frames_(state.reference_frame), + motion_field_(frame_scratch_buffer->motion_field), + reference_order_hint_(state.reference_order_hint), + wedge_masks_(wedge_masks), + quantizer_matrix_(quantizer_matrix), + reader_(data_, size_, frame_header_.enable_cdf_update), + symbol_decoder_context_(frame_scratch_buffer->symbol_decoder_context), + saved_symbol_decoder_context_(saved_symbol_decoder_context), + prev_segment_ids_(prev_segment_ids), + dsp_(*dsp), + post_filter_(*post_filter), + block_parameters_holder_(frame_scratch_buffer->block_parameters_holder), + quantizer_(sequence_header_.color_config.bitdepth, + &frame_header_.quantizer), + residual_size_((sequence_header_.color_config.bitdepth == 8) + ? sizeof(int16_t) + : sizeof(int32_t)), + intra_block_copy_lag_( + frame_header_.allow_intrabc + ? (sequence_header_.use_128x128_superblock ? 3 : 5) + : 1), + current_frame_(*current_frame), + cdef_index_(frame_scratch_buffer->cdef_index), + inter_transform_sizes_(frame_scratch_buffer->inter_transform_sizes), + thread_pool_(thread_pool), + residual_buffer_pool_(frame_scratch_buffer->residual_buffer_pool.get()), + tile_scratch_buffer_pool_( + &frame_scratch_buffer->tile_scratch_buffer_pool), + pending_tiles_(pending_tiles), + frame_parallel_(frame_parallel), + use_intra_prediction_buffer_(use_intra_prediction_buffer), + intra_prediction_buffer_( + use_intra_prediction_buffer_ + ? &frame_scratch_buffer->intra_prediction_buffers.get()[row_] + : nullptr) { + row4x4_start_ = frame_header.tile_info.tile_row_start[row_]; + row4x4_end_ = frame_header.tile_info.tile_row_start[row_ + 1]; + column4x4_start_ = frame_header.tile_info.tile_column_start[column_]; + column4x4_end_ = frame_header.tile_info.tile_column_start[column_ + 1]; + const int block_width4x4 = kNum4x4BlocksWide[SuperBlockSize()]; + const int block_width4x4_log2 = k4x4HeightLog2[SuperBlockSize()]; + superblock_rows_ = + (row4x4_end_ - row4x4_start_ + block_width4x4 - 1) >> block_width4x4_log2; + superblock_columns_ = + (column4x4_end_ - column4x4_start_ + block_width4x4 - 1) >> + block_width4x4_log2; + // If |split_parse_and_decode_| is true, we do the necessary setup for + // splitting the parsing and the decoding steps. This is done in the following + // two cases: + // 1) If there is multi-threading within a tile (this is done if + // |thread_pool_| is not nullptr and if there are at least as many + // superblock columns as |intra_block_copy_lag_|). + // 2) If |frame_parallel| is true. + split_parse_and_decode_ = (thread_pool_ != nullptr && + superblock_columns_ > intra_block_copy_lag_) || + frame_parallel; + if (frame_parallel_) { + reference_frame_progress_cache_.fill(INT_MIN); + } + memset(delta_lf_, 0, sizeof(delta_lf_)); + delta_lf_all_zero_ = true; + const YuvBuffer& buffer = post_filter_.frame_buffer(); + for (int plane = kPlaneY; plane < PlaneCount(); ++plane) { + // Verify that the borders are big enough for Reconstruct(). max_tx_length + // is the maximum value of tx_width and tx_height for the plane. + const int max_tx_length = (plane == kPlaneY) ? 64 : 32; + // Reconstruct() may overwrite on the right. Since the right border of a + // row is followed in memory by the left border of the next row, the + // number of extra pixels to the right of a row is at least the sum of the + // left and right borders. + // + // Note: This assertion actually checks the sum of the left and right + // borders of post_filter_.GetUnfilteredBuffer(), which is a horizontally + // and vertically shifted version of |buffer|. Since the sum of the left and + // right borders is not changed by the shift, we can just check the sum of + // the left and right borders of |buffer|. + assert(buffer.left_border(plane) + buffer.right_border(plane) >= + max_tx_length - 1); + // Reconstruct() may overwrite on the bottom. We need an extra border row + // on the bottom because we need the left border of that row. + // + // Note: This assertion checks the bottom border of + // post_filter_.GetUnfilteredBuffer(). So we need to calculate the vertical + // shift that the PostFilter constructor applied to |buffer| and reduce the + // bottom border by that amount. +#ifndef NDEBUG + const int vertical_shift = static_cast<int>( + (post_filter_.GetUnfilteredBuffer(plane) - buffer.data(plane)) / + buffer.stride(plane)); + const int bottom_border = buffer.bottom_border(plane) - vertical_shift; + assert(bottom_border >= max_tx_length); +#endif + // In AV1, a transform block of height H starts at a y coordinate that is + // a multiple of H. If a transform block at the bottom of the frame has + // height H, then Reconstruct() will write up to the row with index + // Align(buffer.height(plane), H) - 1. Therefore the maximum number of + // rows Reconstruct() may write to is + // Align(buffer.height(plane), max_tx_length). + buffer_[plane].Reset(Align(buffer.height(plane), max_tx_length), + buffer.stride(plane), + post_filter_.GetUnfilteredBuffer(plane)); + const int plane_height = + SubsampledValue(frame_header_.height, subsampling_y_[plane]); + deblock_row_limit_[plane] = + std::min(frame_header_.rows4x4, DivideBy4(plane_height + 3) + << subsampling_y_[plane]); + const int plane_width = + SubsampledValue(frame_header_.width, subsampling_x_[plane]); + deblock_column_limit_[plane] = + std::min(frame_header_.columns4x4, DivideBy4(plane_width + 3) + << subsampling_x_[plane]); + } +} + +bool Tile::Init() { + assert(coefficient_levels_.size() == dc_categories_.size()); + for (size_t i = 0; i < coefficient_levels_.size(); ++i) { + const int contexts_per_plane = (i == kEntropyContextLeft) + ? frame_header_.rows4x4 + : frame_header_.columns4x4; + if (!coefficient_levels_[i].Reset(PlaneCount(), contexts_per_plane)) { + LIBGAV1_DLOG(ERROR, "coefficient_levels_[%zu].Reset() failed.", i); + return false; + } + if (!dc_categories_[i].Reset(PlaneCount(), contexts_per_plane)) { + LIBGAV1_DLOG(ERROR, "dc_categories_[%zu].Reset() failed.", i); + return false; + } + } + if (split_parse_and_decode_) { + assert(residual_buffer_pool_ != nullptr); + if (!residual_buffer_threaded_.Reset(superblock_rows_, superblock_columns_, + /*zero_initialize=*/false)) { + LIBGAV1_DLOG(ERROR, "residual_buffer_threaded_.Reset() failed."); + return false; + } + } else { + // Add 32 * |kResidualPaddingVertical| padding to avoid bottom boundary + // checks when parsing quantized coefficients. + residual_buffer_ = MakeAlignedUniquePtr<uint8_t>( + 32, (4096 + 32 * kResidualPaddingVertical) * residual_size_); + if (residual_buffer_ == nullptr) { + LIBGAV1_DLOG(ERROR, "Allocation of residual_buffer_ failed."); + return false; + } + prediction_parameters_.reset(new (std::nothrow) PredictionParameters()); + if (prediction_parameters_ == nullptr) { + LIBGAV1_DLOG(ERROR, "Allocation of prediction_parameters_ failed."); + return false; + } + } + if (frame_header_.use_ref_frame_mvs) { + assert(sequence_header_.enable_order_hint); + SetupMotionField(frame_header_, current_frame_, reference_frames_, + row4x4_start_, row4x4_end_, column4x4_start_, + column4x4_end_, &motion_field_); + } + ResetLoopRestorationParams(); + return true; +} + +template <ProcessingMode processing_mode, bool save_symbol_decoder_context> +bool Tile::ProcessSuperBlockRow(int row4x4, + TileScratchBuffer* const scratch_buffer) { + if (row4x4 < row4x4_start_ || row4x4 >= row4x4_end_) return true; + assert(scratch_buffer != nullptr); + const int block_width4x4 = kNum4x4BlocksWide[SuperBlockSize()]; + for (int column4x4 = column4x4_start_; column4x4 < column4x4_end_; + column4x4 += block_width4x4) { + if (!ProcessSuperBlock(row4x4, column4x4, block_width4x4, scratch_buffer, + processing_mode)) { + LIBGAV1_DLOG(ERROR, "Error decoding super block row: %d column: %d", + row4x4, column4x4); + return false; + } + } + if (save_symbol_decoder_context && row4x4 + block_width4x4 >= row4x4_end_) { + SaveSymbolDecoderContext(); + } + if (processing_mode == kProcessingModeDecodeOnly || + processing_mode == kProcessingModeParseAndDecode) { + PopulateIntraPredictionBuffer(row4x4); + } + return true; +} + +// Used in frame parallel mode. The symbol decoder context need not be saved in +// this case since it was done when parsing was complete. +template bool Tile::ProcessSuperBlockRow<kProcessingModeDecodeOnly, false>( + int row4x4, TileScratchBuffer* scratch_buffer); +// Used in non frame parallel mode. +template bool Tile::ProcessSuperBlockRow<kProcessingModeParseAndDecode, true>( + int row4x4, TileScratchBuffer* scratch_buffer); + +void Tile::SaveSymbolDecoderContext() { + if (frame_header_.enable_frame_end_update_cdf && + number_ == frame_header_.tile_info.context_update_id) { + *saved_symbol_decoder_context_ = symbol_decoder_context_; + } +} + +bool Tile::ParseAndDecode() { + // If this is the main thread, we build the loop filter bit masks when parsing + // so that it happens in the current thread. This ensures that the main thread + // does as much work as possible. + if (split_parse_and_decode_) { + if (!ThreadedParseAndDecode()) return false; + SaveSymbolDecoderContext(); + return true; + } + std::unique_ptr<TileScratchBuffer> scratch_buffer = + tile_scratch_buffer_pool_->Get(); + if (scratch_buffer == nullptr) { + pending_tiles_->Decrement(false); + LIBGAV1_DLOG(ERROR, "Failed to get scratch buffer."); + return false; + } + const int block_width4x4 = kNum4x4BlocksWide[SuperBlockSize()]; + for (int row4x4 = row4x4_start_; row4x4 < row4x4_end_; + row4x4 += block_width4x4) { + if (!ProcessSuperBlockRow<kProcessingModeParseAndDecode, true>( + row4x4, scratch_buffer.get())) { + pending_tiles_->Decrement(false); + return false; + } + } + tile_scratch_buffer_pool_->Release(std::move(scratch_buffer)); + pending_tiles_->Decrement(true); + return true; +} + +bool Tile::Parse() { + const int block_width4x4 = kNum4x4BlocksWide[SuperBlockSize()]; + std::unique_ptr<TileScratchBuffer> scratch_buffer = + tile_scratch_buffer_pool_->Get(); + if (scratch_buffer == nullptr) { + LIBGAV1_DLOG(ERROR, "Failed to get scratch buffer."); + return false; + } + for (int row4x4 = row4x4_start_; row4x4 < row4x4_end_; + row4x4 += block_width4x4) { + if (!ProcessSuperBlockRow<kProcessingModeParseOnly, false>( + row4x4, scratch_buffer.get())) { + return false; + } + } + tile_scratch_buffer_pool_->Release(std::move(scratch_buffer)); + SaveSymbolDecoderContext(); + return true; +} + +bool Tile::Decode( + std::mutex* const mutex, int* const superblock_row_progress, + std::condition_variable* const superblock_row_progress_condvar) { + const int block_width4x4 = sequence_header_.use_128x128_superblock ? 32 : 16; + const int block_width4x4_log2 = + sequence_header_.use_128x128_superblock ? 5 : 4; + std::unique_ptr<TileScratchBuffer> scratch_buffer = + tile_scratch_buffer_pool_->Get(); + if (scratch_buffer == nullptr) { + LIBGAV1_DLOG(ERROR, "Failed to get scratch buffer."); + return false; + } + for (int row4x4 = row4x4_start_, index = row4x4_start_ >> block_width4x4_log2; + row4x4 < row4x4_end_; row4x4 += block_width4x4, ++index) { + if (!ProcessSuperBlockRow<kProcessingModeDecodeOnly, false>( + row4x4, scratch_buffer.get())) { + return false; + } + if (post_filter_.DoDeblock()) { + // Apply vertical deblock filtering for all the columns in this tile + // except for the first 64 columns. + post_filter_.ApplyDeblockFilter( + kLoopFilterTypeVertical, row4x4, + column4x4_start_ + kNum4x4InLoopFilterUnit, column4x4_end_, + block_width4x4); + // If this is the first superblock row of the tile, then we cannot apply + // horizontal deblocking here since we don't know if the top row is + // available. So it will be done by the calling thread in that case. + if (row4x4 != row4x4_start_) { + // Apply horizontal deblock filtering for all the columns in this tile + // except for the first and the last 64 columns. + // Note about the last tile of each row: For the last tile, + // column4x4_end may not be a multiple of 16. In that case it is still + // okay to simply subtract 16 since ApplyDeblockFilter() will only do + // the filters in increments of 64 columns (or 32 columns for chroma + // with subsampling). + post_filter_.ApplyDeblockFilter( + kLoopFilterTypeHorizontal, row4x4, + column4x4_start_ + kNum4x4InLoopFilterUnit, + column4x4_end_ - kNum4x4InLoopFilterUnit, block_width4x4); + } + } + bool notify; + { + std::unique_lock<std::mutex> lock(*mutex); + notify = ++superblock_row_progress[index] == + frame_header_.tile_info.tile_columns; + } + if (notify) { + // We are done decoding this superblock row. Notify the post filtering + // thread. + superblock_row_progress_condvar[index].notify_one(); + } + } + tile_scratch_buffer_pool_->Release(std::move(scratch_buffer)); + return true; +} + +bool Tile::ThreadedParseAndDecode() { + { + std::lock_guard<std::mutex> lock(threading_.mutex); + if (!threading_.sb_state.Reset(superblock_rows_, superblock_columns_)) { + pending_tiles_->Decrement(false); + LIBGAV1_DLOG(ERROR, "threading.sb_state.Reset() failed."); + return false; + } + // Account for the parsing job. + ++threading_.pending_jobs; + } + + const int block_width4x4 = kNum4x4BlocksWide[SuperBlockSize()]; + + // Begin parsing. + std::unique_ptr<TileScratchBuffer> scratch_buffer = + tile_scratch_buffer_pool_->Get(); + if (scratch_buffer == nullptr) { + pending_tiles_->Decrement(false); + LIBGAV1_DLOG(ERROR, "Failed to get scratch buffer."); + return false; + } + for (int row4x4 = row4x4_start_, row_index = 0; row4x4 < row4x4_end_; + row4x4 += block_width4x4, ++row_index) { + for (int column4x4 = column4x4_start_, column_index = 0; + column4x4 < column4x4_end_; + column4x4 += block_width4x4, ++column_index) { + if (!ProcessSuperBlock(row4x4, column4x4, block_width4x4, + scratch_buffer.get(), kProcessingModeParseOnly)) { + std::lock_guard<std::mutex> lock(threading_.mutex); + threading_.abort = true; + break; + } + std::unique_lock<std::mutex> lock(threading_.mutex); + if (threading_.abort) break; + threading_.sb_state[row_index][column_index] = kSuperBlockStateParsed; + // Schedule the decoding of this superblock if it is allowed. + if (CanDecode(row_index, column_index)) { + ++threading_.pending_jobs; + threading_.sb_state[row_index][column_index] = + kSuperBlockStateScheduled; + lock.unlock(); + thread_pool_->Schedule( + [this, row_index, column_index, block_width4x4]() { + DecodeSuperBlock(row_index, column_index, block_width4x4); + }); + } + } + std::lock_guard<std::mutex> lock(threading_.mutex); + if (threading_.abort) break; + } + tile_scratch_buffer_pool_->Release(std::move(scratch_buffer)); + + // We are done parsing. We can return here since the calling thread will make + // sure that it waits for all the superblocks to be decoded. + // + // Finish using |threading_| before |pending_tiles_->Decrement()| because the + // Tile object could go out of scope as soon as |pending_tiles_->Decrement()| + // is called. + threading_.mutex.lock(); + const bool no_pending_jobs = (--threading_.pending_jobs == 0); + const bool job_succeeded = !threading_.abort; + threading_.mutex.unlock(); + if (no_pending_jobs) { + // We are done parsing and decoding this tile. + pending_tiles_->Decrement(job_succeeded); + } + return job_succeeded; +} + +bool Tile::CanDecode(int row_index, int column_index) const { + assert(row_index >= 0); + assert(column_index >= 0); + // If |threading_.sb_state[row_index][column_index]| is not equal to + // kSuperBlockStateParsed, then return false. This is ok because if + // |threading_.sb_state[row_index][column_index]| is equal to: + // kSuperBlockStateNone - then the superblock is not yet parsed. + // kSuperBlockStateScheduled - then the superblock is already scheduled for + // decode. + // kSuperBlockStateDecoded - then the superblock has already been decoded. + if (row_index >= superblock_rows_ || column_index >= superblock_columns_ || + threading_.sb_state[row_index][column_index] != kSuperBlockStateParsed) { + return false; + } + // First superblock has no dependencies. + if (row_index == 0 && column_index == 0) { + return true; + } + // Superblocks in the first row only depend on the superblock to the left of + // it. + if (row_index == 0) { + return threading_.sb_state[0][column_index - 1] == kSuperBlockStateDecoded; + } + // All other superblocks depend on superblock to the left of it (if one + // exists) and superblock to the top right with a lag of + // |intra_block_copy_lag_| (if one exists). + const int top_right_column_index = + std::min(column_index + intra_block_copy_lag_, superblock_columns_ - 1); + return threading_.sb_state[row_index - 1][top_right_column_index] == + kSuperBlockStateDecoded && + (column_index == 0 || + threading_.sb_state[row_index][column_index - 1] == + kSuperBlockStateDecoded); +} + +void Tile::DecodeSuperBlock(int row_index, int column_index, + int block_width4x4) { + const int row4x4 = row4x4_start_ + (row_index * block_width4x4); + const int column4x4 = column4x4_start_ + (column_index * block_width4x4); + std::unique_ptr<TileScratchBuffer> scratch_buffer = + tile_scratch_buffer_pool_->Get(); + bool ok = scratch_buffer != nullptr; + if (ok) { + ok = ProcessSuperBlock(row4x4, column4x4, block_width4x4, + scratch_buffer.get(), kProcessingModeDecodeOnly); + tile_scratch_buffer_pool_->Release(std::move(scratch_buffer)); + } + std::unique_lock<std::mutex> lock(threading_.mutex); + if (ok) { + threading_.sb_state[row_index][column_index] = kSuperBlockStateDecoded; + // Candidate rows and columns that we could potentially begin the decoding + // (if it is allowed to do so). The candidates are: + // 1) The superblock to the bottom-left of the current superblock with a + // lag of |intra_block_copy_lag_| (or the beginning of the next superblock + // row in case there are less than |intra_block_copy_lag_| superblock + // columns in the Tile). + // 2) The superblock to the right of the current superblock. + const int candidate_row_indices[] = {row_index + 1, row_index}; + const int candidate_column_indices[] = { + std::max(0, column_index - intra_block_copy_lag_), column_index + 1}; + for (size_t i = 0; i < std::extent<decltype(candidate_row_indices)>::value; + ++i) { + const int candidate_row_index = candidate_row_indices[i]; + const int candidate_column_index = candidate_column_indices[i]; + if (!CanDecode(candidate_row_index, candidate_column_index)) { + continue; + } + ++threading_.pending_jobs; + threading_.sb_state[candidate_row_index][candidate_column_index] = + kSuperBlockStateScheduled; + lock.unlock(); + thread_pool_->Schedule([this, candidate_row_index, candidate_column_index, + block_width4x4]() { + DecodeSuperBlock(candidate_row_index, candidate_column_index, + block_width4x4); + }); + lock.lock(); + } + } else { + threading_.abort = true; + } + // Finish using |threading_| before |pending_tiles_->Decrement()| because the + // Tile object could go out of scope as soon as |pending_tiles_->Decrement()| + // is called. + const bool no_pending_jobs = (--threading_.pending_jobs == 0); + const bool job_succeeded = !threading_.abort; + lock.unlock(); + if (no_pending_jobs) { + // We are done parsing and decoding this tile. + pending_tiles_->Decrement(job_succeeded); + } +} + +void Tile::PopulateIntraPredictionBuffer(int row4x4) { + const int block_width4x4 = kNum4x4BlocksWide[SuperBlockSize()]; + if (!use_intra_prediction_buffer_ || row4x4 + block_width4x4 >= row4x4_end_) { + return; + } + const size_t pixel_size = + (sequence_header_.color_config.bitdepth == 8 ? sizeof(uint8_t) + : sizeof(uint16_t)); + for (int plane = kPlaneY; plane < PlaneCount(); ++plane) { + const int row_to_copy = + (MultiplyBy4(row4x4 + block_width4x4) >> subsampling_y_[plane]) - 1; + const size_t pixels_to_copy = + (MultiplyBy4(column4x4_end_ - column4x4_start_) >> + subsampling_x_[plane]) * + pixel_size; + const size_t column_start = + MultiplyBy4(column4x4_start_) >> subsampling_x_[plane]; + void* start; +#if LIBGAV1_MAX_BITDEPTH >= 10 + if (sequence_header_.color_config.bitdepth > 8) { + Array2DView<uint16_t> buffer( + buffer_[plane].rows(), buffer_[plane].columns() / sizeof(uint16_t), + reinterpret_cast<uint16_t*>(&buffer_[plane][0][0])); + start = &buffer[row_to_copy][column_start]; + } else // NOLINT +#endif + { + start = &buffer_[plane][row_to_copy][column_start]; + } + memcpy((*intra_prediction_buffer_)[plane].get() + column_start * pixel_size, + start, pixels_to_copy); + } +} + +int Tile::GetTransformAllZeroContext(const Block& block, Plane plane, + TransformSize tx_size, int x4, int y4, + int w4, int h4) { + const int max_x4x4 = frame_header_.columns4x4 >> subsampling_x_[plane]; + const int max_y4x4 = frame_header_.rows4x4 >> subsampling_y_[plane]; + + const int tx_width = kTransformWidth[tx_size]; + const int tx_height = kTransformHeight[tx_size]; + const BlockSize plane_size = block.residual_size[plane]; + const int block_width = kBlockWidthPixels[plane_size]; + const int block_height = kBlockHeightPixels[plane_size]; + + int top = 0; + int left = 0; + const int num_top_elements = GetNumElements(w4, x4, max_x4x4); + const int num_left_elements = GetNumElements(h4, y4, max_y4x4); + if (plane == kPlaneY) { + if (block_width == tx_width && block_height == tx_height) return 0; + const uint8_t* coefficient_levels = + &coefficient_levels_[kEntropyContextTop][plane][x4]; + for (int i = 0; i < num_top_elements; ++i) { + top = std::max(top, static_cast<int>(coefficient_levels[i])); + } + coefficient_levels = &coefficient_levels_[kEntropyContextLeft][plane][y4]; + for (int i = 0; i < num_left_elements; ++i) { + left = std::max(left, static_cast<int>(coefficient_levels[i])); + } + assert(top <= 4); + assert(left <= 4); + // kAllZeroContextsByTopLeft is pre-computed based on the logic in the spec + // for top and left. + return kAllZeroContextsByTopLeft[top][left]; + } + const uint8_t* coefficient_levels = + &coefficient_levels_[kEntropyContextTop][plane][x4]; + const int8_t* dc_categories = &dc_categories_[kEntropyContextTop][plane][x4]; + for (int i = 0; i < num_top_elements; ++i) { + top |= coefficient_levels[i]; + top |= dc_categories[i]; + } + coefficient_levels = &coefficient_levels_[kEntropyContextLeft][plane][y4]; + dc_categories = &dc_categories_[kEntropyContextLeft][plane][y4]; + for (int i = 0; i < num_left_elements; ++i) { + left |= coefficient_levels[i]; + left |= dc_categories[i]; + } + return static_cast<int>(top != 0) + static_cast<int>(left != 0) + 7 + + 3 * static_cast<int>(block_width * block_height > + tx_width * tx_height); +} + +TransformSet Tile::GetTransformSet(TransformSize tx_size, bool is_inter) const { + const TransformSize tx_size_square_min = kTransformSizeSquareMin[tx_size]; + const TransformSize tx_size_square_max = kTransformSizeSquareMax[tx_size]; + if (tx_size_square_max == kTransformSize64x64) return kTransformSetDctOnly; + if (is_inter) { + if (frame_header_.reduced_tx_set || + tx_size_square_max == kTransformSize32x32) { + return kTransformSetInter3; + } + if (tx_size_square_min == kTransformSize16x16) return kTransformSetInter2; + return kTransformSetInter1; + } + if (tx_size_square_max == kTransformSize32x32) return kTransformSetDctOnly; + if (frame_header_.reduced_tx_set || + tx_size_square_min == kTransformSize16x16) { + return kTransformSetIntra2; + } + return kTransformSetIntra1; +} + +TransformType Tile::ComputeTransformType(const Block& block, Plane plane, + TransformSize tx_size, int block_x, + int block_y) { + const BlockParameters& bp = *block.bp; + const TransformSize tx_size_square_max = kTransformSizeSquareMax[tx_size]; + if (frame_header_.segmentation.lossless[bp.segment_id] || + tx_size_square_max == kTransformSize64x64) { + return kTransformTypeDctDct; + } + if (plane == kPlaneY) { + return transform_types_[block_y - block.row4x4][block_x - block.column4x4]; + } + const TransformSet tx_set = GetTransformSet(tx_size, bp.is_inter); + TransformType tx_type; + if (bp.is_inter) { + const int x4 = + std::max(block.column4x4, block_x << subsampling_x_[kPlaneU]); + const int y4 = std::max(block.row4x4, block_y << subsampling_y_[kPlaneU]); + tx_type = transform_types_[y4 - block.row4x4][x4 - block.column4x4]; + } else { + tx_type = kModeToTransformType[bp.uv_mode]; + } + return kTransformTypeInSetMask[tx_set].Contains(tx_type) + ? tx_type + : kTransformTypeDctDct; +} + +void Tile::ReadTransformType(const Block& block, int x4, int y4, + TransformSize tx_size) { + BlockParameters& bp = *block.bp; + const TransformSet tx_set = GetTransformSet(tx_size, bp.is_inter); + + TransformType tx_type = kTransformTypeDctDct; + if (tx_set != kTransformSetDctOnly && + frame_header_.segmentation.qindex[bp.segment_id] > 0) { + const int cdf_index = SymbolDecoderContext::TxTypeIndex(tx_set); + const int cdf_tx_size_index = + TransformSizeToSquareTransformIndex(kTransformSizeSquareMin[tx_size]); + uint16_t* cdf; + if (bp.is_inter) { + cdf = symbol_decoder_context_ + .inter_tx_type_cdf[cdf_index][cdf_tx_size_index]; + switch (tx_set) { + case kTransformSetInter1: + tx_type = static_cast<TransformType>(reader_.ReadSymbol<16>(cdf)); + break; + case kTransformSetInter2: + tx_type = static_cast<TransformType>(reader_.ReadSymbol<12>(cdf)); + break; + default: + assert(tx_set == kTransformSetInter3); + tx_type = static_cast<TransformType>(reader_.ReadSymbol(cdf)); + break; + } + } else { + const PredictionMode intra_direction = + block.bp->prediction_parameters->use_filter_intra + ? kFilterIntraModeToIntraPredictor[block.bp->prediction_parameters + ->filter_intra_mode] + : bp.y_mode; + cdf = + symbol_decoder_context_ + .intra_tx_type_cdf[cdf_index][cdf_tx_size_index][intra_direction]; + assert(tx_set == kTransformSetIntra1 || tx_set == kTransformSetIntra2); + tx_type = static_cast<TransformType>((tx_set == kTransformSetIntra1) + ? reader_.ReadSymbol<7>(cdf) + : reader_.ReadSymbol<5>(cdf)); + } + + // This array does not contain an entry for kTransformSetDctOnly, so the + // first dimension needs to be offset by 1. + tx_type = kInverseTransformTypeBySet[tx_set - 1][tx_type]; + } + SetTransformType(block, x4, y4, kTransformWidth4x4[tx_size], + kTransformHeight4x4[tx_size], tx_type, transform_types_); +} + +// Section 8.3.2 in the spec, under coeff_base and coeff_br. +// Bottom boundary checks are avoided by the padded rows. +// For a coefficient near the right boundary, the two right neighbors and the +// one bottom-right neighbor may be out of boundary. We don't check the right +// boundary for them, because the out of boundary neighbors project to positions +// above the diagonal line which goes through the current coefficient and these +// positions are still all 0s according to the diagonal scan order. +template <typename ResidualType> +void Tile::ReadCoeffBase2D( + const uint16_t* scan, TransformSize tx_size, int adjusted_tx_width_log2, + int eob, + uint16_t coeff_base_cdf[kCoeffBaseContexts][kCoeffBaseSymbolCount + 1], + uint16_t coeff_base_range_cdf[kCoeffBaseRangeContexts] + [kCoeffBaseRangeSymbolCount + 1], + ResidualType* const quantized_buffer, uint8_t* const level_buffer) { + const int tx_width = 1 << adjusted_tx_width_log2; + for (int i = eob - 2; i >= 1; --i) { + const uint16_t pos = scan[i]; + const int row = pos >> adjusted_tx_width_log2; + const int column = pos & (tx_width - 1); + auto* const quantized = &quantized_buffer[pos]; + auto* const levels = &level_buffer[pos]; + const int neighbor_sum = 1 + levels[1] + levels[tx_width] + + levels[tx_width + 1] + levels[2] + + levels[MultiplyBy2(tx_width)]; + const int context = + ((neighbor_sum > 7) ? 4 : DivideBy2(neighbor_sum)) + + kCoeffBaseContextOffset[tx_size][std::min(row, 4)][std::min(column, 4)]; + int level = + reader_.ReadSymbol<kCoeffBaseSymbolCount>(coeff_base_cdf[context]); + levels[0] = level; + if (level > kNumQuantizerBaseLevels) { + // No need to clip quantized values to COEFF_BASE_RANGE + NUM_BASE_LEVELS + // + 1, because we clip the overall output to 6 and the unclipped + // quantized values will always result in an output of greater than 6. + int context = std::min(6, DivideBy2(1 + quantized[1] + // {0, 1} + quantized[tx_width] + // {1, 0} + quantized[tx_width + 1])); // {1, 1} + context += 14 >> static_cast<int>((row | column) < 2); + level += ReadCoeffBaseRange(coeff_base_range_cdf[context]); + } + quantized[0] = level; + } + // Read position 0. + { + auto* const quantized = &quantized_buffer[0]; + int level = reader_.ReadSymbol<kCoeffBaseSymbolCount>(coeff_base_cdf[0]); + level_buffer[0] = level; + if (level > kNumQuantizerBaseLevels) { + // No need to clip quantized values to COEFF_BASE_RANGE + NUM_BASE_LEVELS + // + 1, because we clip the overall output to 6 and the unclipped + // quantized values will always result in an output of greater than 6. + const int context = + std::min(6, DivideBy2(1 + quantized[1] + // {0, 1} + quantized[tx_width] + // {1, 0} + quantized[tx_width + 1])); // {1, 1} + level += ReadCoeffBaseRange(coeff_base_range_cdf[context]); + } + quantized[0] = level; + } +} + +// Section 8.3.2 in the spec, under coeff_base and coeff_br. +// Bottom boundary checks are avoided by the padded rows. +// For a coefficient near the right boundary, the four right neighbors may be +// out of boundary. We don't do the boundary check for the first three right +// neighbors, because even for the transform blocks with smallest width 4, the +// first three out of boundary neighbors project to positions left of the +// current coefficient and these positions are still all 0s according to the +// column scan order. However, when transform block width is 4 and the current +// coefficient is on the right boundary, its fourth right neighbor projects to +// the under position on the same column, which could be nonzero. Therefore, we +// must skip the fourth right neighbor. To make it simple, for any coefficient, +// we always do the boundary check for its fourth right neighbor. +template <typename ResidualType> +void Tile::ReadCoeffBaseHorizontal( + const uint16_t* scan, TransformSize /*tx_size*/, int adjusted_tx_width_log2, + int eob, + uint16_t coeff_base_cdf[kCoeffBaseContexts][kCoeffBaseSymbolCount + 1], + uint16_t coeff_base_range_cdf[kCoeffBaseRangeContexts] + [kCoeffBaseRangeSymbolCount + 1], + ResidualType* const quantized_buffer, uint8_t* const level_buffer) { + const int tx_width = 1 << adjusted_tx_width_log2; + int i = eob - 2; + do { + const uint16_t pos = scan[i]; + const int column = pos & (tx_width - 1); + auto* const quantized = &quantized_buffer[pos]; + auto* const levels = &level_buffer[pos]; + const int neighbor_sum = + 1 + (levels[1] + // {0, 1} + levels[tx_width] + // {1, 0} + levels[2] + // {0, 2} + levels[3] + // {0, 3} + ((column + 4 < tx_width) ? levels[4] : 0)); // {0, 4} + const int context = ((neighbor_sum > 7) ? 4 : DivideBy2(neighbor_sum)) + + kCoeffBasePositionContextOffset[column]; + int level = + reader_.ReadSymbol<kCoeffBaseSymbolCount>(coeff_base_cdf[context]); + levels[0] = level; + if (level > kNumQuantizerBaseLevels) { + // No need to clip quantized values to COEFF_BASE_RANGE + NUM_BASE_LEVELS + // + 1, because we clip the overall output to 6 and the unclipped + // quantized values will always result in an output of greater than 6. + int context = std::min(6, DivideBy2(1 + quantized[1] + // {0, 1} + quantized[tx_width] + // {1, 0} + quantized[2])); // {0, 2} + if (pos != 0) { + context += 14 >> static_cast<int>(column == 0); + } + level += ReadCoeffBaseRange(coeff_base_range_cdf[context]); + } + quantized[0] = level; + } while (--i >= 0); +} + +// Section 8.3.2 in the spec, under coeff_base and coeff_br. +// Bottom boundary checks are avoided by the padded rows. +// Right boundary check is performed explicitly. +template <typename ResidualType> +void Tile::ReadCoeffBaseVertical( + const uint16_t* scan, TransformSize /*tx_size*/, int adjusted_tx_width_log2, + int eob, + uint16_t coeff_base_cdf[kCoeffBaseContexts][kCoeffBaseSymbolCount + 1], + uint16_t coeff_base_range_cdf[kCoeffBaseRangeContexts] + [kCoeffBaseRangeSymbolCount + 1], + ResidualType* const quantized_buffer, uint8_t* const level_buffer) { + const int tx_width = 1 << adjusted_tx_width_log2; + int i = eob - 2; + do { + const uint16_t pos = scan[i]; + const int row = pos >> adjusted_tx_width_log2; + const int column = pos & (tx_width - 1); + auto* const quantized = &quantized_buffer[pos]; + auto* const levels = &level_buffer[pos]; + const int neighbor_sum = + 1 + (((column + 1 < tx_width) ? levels[1] : 0) + // {0, 1} + levels[tx_width] + // {1, 0} + levels[MultiplyBy2(tx_width)] + // {2, 0} + levels[tx_width * 3] + // {3, 0} + levels[MultiplyBy4(tx_width)]); // {4, 0} + const int context = ((neighbor_sum > 7) ? 4 : DivideBy2(neighbor_sum)) + + kCoeffBasePositionContextOffset[row]; + int level = + reader_.ReadSymbol<kCoeffBaseSymbolCount>(coeff_base_cdf[context]); + levels[0] = level; + if (level > kNumQuantizerBaseLevels) { + // No need to clip quantized values to COEFF_BASE_RANGE + NUM_BASE_LEVELS + // + 1, because we clip the overall output to 6 and the unclipped + // quantized values will always result in an output of greater than 6. + const int quantized_column1 = (column + 1 < tx_width) ? quantized[1] : 0; + int context = + std::min(6, DivideBy2(1 + quantized_column1 + // {0, 1} + quantized[tx_width] + // {1, 0} + quantized[MultiplyBy2(tx_width)])); // {2, 0} + if (pos != 0) { + context += 14 >> static_cast<int>(row == 0); + } + level += ReadCoeffBaseRange(coeff_base_range_cdf[context]); + } + quantized[0] = level; + } while (--i >= 0); +} + +int Tile::GetDcSignContext(int x4, int y4, int w4, int h4, Plane plane) { + const int max_x4x4 = frame_header_.columns4x4 >> subsampling_x_[plane]; + const int8_t* dc_categories = &dc_categories_[kEntropyContextTop][plane][x4]; + // Set dc_sign to 8-bit long so that std::accumulate() saves sign extension. + int8_t dc_sign = std::accumulate( + dc_categories, dc_categories + GetNumElements(w4, x4, max_x4x4), 0); + const int max_y4x4 = frame_header_.rows4x4 >> subsampling_y_[plane]; + dc_categories = &dc_categories_[kEntropyContextLeft][plane][y4]; + dc_sign = std::accumulate( + dc_categories, dc_categories + GetNumElements(h4, y4, max_y4x4), dc_sign); + // This return statement is equivalent to: + // if (dc_sign < 0) return 1; + // if (dc_sign > 0) return 2; + // return 0; + // And it is better than: + // return static_cast<int>(dc_sign != 0) + static_cast<int>(dc_sign > 0); + return static_cast<int>(dc_sign < 0) + + MultiplyBy2(static_cast<int>(dc_sign > 0)); +} + +void Tile::SetEntropyContexts(int x4, int y4, int w4, int h4, Plane plane, + uint8_t coefficient_level, int8_t dc_category) { + const int max_x4x4 = frame_header_.columns4x4 >> subsampling_x_[plane]; + const int num_top_elements = GetNumElements(w4, x4, max_x4x4); + memset(&coefficient_levels_[kEntropyContextTop][plane][x4], coefficient_level, + num_top_elements); + memset(&dc_categories_[kEntropyContextTop][plane][x4], dc_category, + num_top_elements); + const int max_y4x4 = frame_header_.rows4x4 >> subsampling_y_[plane]; + const int num_left_elements = GetNumElements(h4, y4, max_y4x4); + memset(&coefficient_levels_[kEntropyContextLeft][plane][y4], + coefficient_level, num_left_elements); + memset(&dc_categories_[kEntropyContextLeft][plane][y4], dc_category, + num_left_elements); +} + +template <typename ResidualType, bool is_dc_coefficient> +bool Tile::ReadSignAndApplyDequantization( + const uint16_t* const scan, int i, int q_value, + const uint8_t* const quantizer_matrix, int shift, int max_value, + uint16_t* const dc_sign_cdf, int8_t* const dc_category, + int* const coefficient_level, ResidualType* residual_buffer) { + const int pos = is_dc_coefficient ? 0 : scan[i]; + // If residual_buffer[pos] is zero, then the rest of the function has no + // effect. + int level = residual_buffer[pos]; + if (level == 0) return true; + const int sign = is_dc_coefficient + ? static_cast<int>(reader_.ReadSymbol(dc_sign_cdf)) + : reader_.ReadBit(); + if (level > kNumQuantizerBaseLevels + kQuantizerCoefficientBaseRange) { + int length = 0; + bool golomb_length_bit = false; + do { + golomb_length_bit = static_cast<bool>(reader_.ReadBit()); + ++length; + if (length > 20) { + LIBGAV1_DLOG(ERROR, "Invalid golomb_length %d", length); + return false; + } + } while (!golomb_length_bit); + int x = 1; + for (int i = length - 2; i >= 0; --i) { + x = (x << 1) | reader_.ReadBit(); + } + level += x - 1; + } + if (is_dc_coefficient) { + *dc_category = (sign != 0) ? -1 : 1; + } + level &= 0xfffff; + *coefficient_level += level; + // Apply dequantization. Step 1 of section 7.12.3 in the spec. + int q = q_value; + if (quantizer_matrix != nullptr) { + q = RightShiftWithRounding(q * quantizer_matrix[pos], 5); + } + // The intermediate multiplication can exceed 32 bits, so it has to be + // performed by promoting one of the values to int64_t. + int32_t dequantized_value = (static_cast<int64_t>(q) * level) & 0xffffff; + dequantized_value >>= shift; + // At this point: + // * |dequantized_value| is always non-negative. + // * |sign| can be either 0 or 1. + // * min_value = -(max_value + 1). + // We need to apply the following: + // dequantized_value = sign ? -dequantized_value : dequantized_value; + // dequantized_value = Clip3(dequantized_value, min_value, max_value); + // + // Note that -x == ~(x - 1). + // + // Now, The above two lines can be done with a std::min and xor as follows: + dequantized_value = std::min(dequantized_value - sign, max_value) ^ -sign; + residual_buffer[pos] = dequantized_value; + return true; +} + +int Tile::ReadCoeffBaseRange(uint16_t* cdf) { + int level = 0; + for (int j = 0; j < kCoeffBaseRangeMaxIterations; ++j) { + const int coeff_base_range = + reader_.ReadSymbol<kCoeffBaseRangeSymbolCount>(cdf); + level += coeff_base_range; + if (coeff_base_range < (kCoeffBaseRangeSymbolCount - 1)) break; + } + return level; +} + +template <typename ResidualType> +int Tile::ReadTransformCoefficients(const Block& block, Plane plane, + int start_x, int start_y, + TransformSize tx_size, + TransformType* const tx_type) { + const int x4 = DivideBy4(start_x); + const int y4 = DivideBy4(start_y); + const int w4 = kTransformWidth4x4[tx_size]; + const int h4 = kTransformHeight4x4[tx_size]; + const int tx_size_context = kTransformSizeContext[tx_size]; + int context = + GetTransformAllZeroContext(block, plane, tx_size, x4, y4, w4, h4); + const bool all_zero = reader_.ReadSymbol( + symbol_decoder_context_.all_zero_cdf[tx_size_context][context]); + if (all_zero) { + if (plane == kPlaneY) { + SetTransformType(block, x4, y4, w4, h4, kTransformTypeDctDct, + transform_types_); + } + SetEntropyContexts(x4, y4, w4, h4, plane, 0, 0); + // This is not used in this case, so it can be set to any value. + *tx_type = kNumTransformTypes; + return 0; + } + const int tx_width = kTransformWidth[tx_size]; + const int tx_height = kTransformHeight[tx_size]; + const TransformSize adjusted_tx_size = kAdjustedTransformSize[tx_size]; + const int adjusted_tx_width_log2 = kTransformWidthLog2[adjusted_tx_size]; + const int tx_padding = + (1 << adjusted_tx_width_log2) * kResidualPaddingVertical; + auto* residual = reinterpret_cast<ResidualType*>(*block.residual); + // Clear padding to avoid bottom boundary checks when parsing quantized + // coefficients. + memset(residual, 0, (tx_width * tx_height + tx_padding) * residual_size_); + uint8_t level_buffer[(32 + kResidualPaddingVertical) * 32]; + memset( + level_buffer, 0, + kTransformWidth[adjusted_tx_size] * kTransformHeight[adjusted_tx_size] + + tx_padding); + const int clamped_tx_height = std::min(tx_height, 32); + if (plane == kPlaneY) { + ReadTransformType(block, x4, y4, tx_size); + } + BlockParameters& bp = *block.bp; + *tx_type = ComputeTransformType(block, plane, tx_size, x4, y4); + const int eob_multi_size = kEobMultiSizeLookup[tx_size]; + const PlaneType plane_type = GetPlaneType(plane); + const TransformClass tx_class = GetTransformClass(*tx_type); + context = static_cast<int>(tx_class != kTransformClass2D); + int eob_pt = 1; + switch (eob_multi_size) { + case 0: + eob_pt += reader_.ReadSymbol<kEobPt16SymbolCount>( + symbol_decoder_context_.eob_pt_16_cdf[plane_type][context]); + break; + case 1: + eob_pt += reader_.ReadSymbol<kEobPt32SymbolCount>( + symbol_decoder_context_.eob_pt_32_cdf[plane_type][context]); + break; + case 2: + eob_pt += reader_.ReadSymbol<kEobPt64SymbolCount>( + symbol_decoder_context_.eob_pt_64_cdf[plane_type][context]); + break; + case 3: + eob_pt += reader_.ReadSymbol<kEobPt128SymbolCount>( + symbol_decoder_context_.eob_pt_128_cdf[plane_type][context]); + break; + case 4: + eob_pt += reader_.ReadSymbol<kEobPt256SymbolCount>( + symbol_decoder_context_.eob_pt_256_cdf[plane_type][context]); + break; + case 5: + eob_pt += reader_.ReadSymbol<kEobPt512SymbolCount>( + symbol_decoder_context_.eob_pt_512_cdf[plane_type]); + break; + case 6: + default: + eob_pt += reader_.ReadSymbol<kEobPt1024SymbolCount>( + symbol_decoder_context_.eob_pt_1024_cdf[plane_type]); + break; + } + int eob = (eob_pt < 2) ? eob_pt : ((1 << (eob_pt - 2)) + 1); + if (eob_pt >= 3) { + context = eob_pt - 3; + const bool eob_extra = reader_.ReadSymbol( + symbol_decoder_context_ + .eob_extra_cdf[tx_size_context][plane_type][context]); + if (eob_extra) eob += 1 << (eob_pt - 3); + for (int i = 1; i < eob_pt - 2; ++i) { + assert(eob_pt - i >= 3); + assert(eob_pt <= kEobPt1024SymbolCount); + if (static_cast<bool>(reader_.ReadBit())) { + eob += 1 << (eob_pt - i - 3); + } + } + } + const uint16_t* scan = kScan[tx_class][tx_size]; + const int clamped_tx_size_context = std::min(tx_size_context, 3); + auto coeff_base_range_cdf = + symbol_decoder_context_ + .coeff_base_range_cdf[clamped_tx_size_context][plane_type]; + // Read the last coefficient. + { + context = GetCoeffBaseContextEob(tx_size, eob - 1); + const uint16_t pos = scan[eob - 1]; + int level = + 1 + reader_.ReadSymbol<kCoeffBaseEobSymbolCount>( + symbol_decoder_context_ + .coeff_base_eob_cdf[tx_size_context][plane_type][context]); + level_buffer[pos] = level; + if (level > kNumQuantizerBaseLevels) { + level += + ReadCoeffBaseRange(coeff_base_range_cdf[GetCoeffBaseRangeContextEob( + adjusted_tx_width_log2, pos, tx_class)]); + } + residual[pos] = level; + } + if (eob > 1) { + // Read all the other coefficients. + // Lookup used to call the right variant of ReadCoeffBase*() based on the + // transform class. + static constexpr void (Tile::*kGetCoeffBaseFunc[])( + const uint16_t* scan, TransformSize tx_size, int adjusted_tx_width_log2, + int eob, + uint16_t coeff_base_cdf[kCoeffBaseContexts][kCoeffBaseSymbolCount + 1], + uint16_t coeff_base_range_cdf[kCoeffBaseRangeContexts] + [kCoeffBaseRangeSymbolCount + 1], + ResidualType* quantized_buffer, + uint8_t* level_buffer) = {&Tile::ReadCoeffBase2D<ResidualType>, + &Tile::ReadCoeffBaseHorizontal<ResidualType>, + &Tile::ReadCoeffBaseVertical<ResidualType>}; + (this->*kGetCoeffBaseFunc[tx_class])( + scan, tx_size, adjusted_tx_width_log2, eob, + symbol_decoder_context_.coeff_base_cdf[tx_size_context][plane_type], + coeff_base_range_cdf, residual, level_buffer); + } + const int max_value = (1 << (7 + sequence_header_.color_config.bitdepth)) - 1; + const int current_quantizer_index = GetQIndex( + frame_header_.segmentation, bp.segment_id, current_quantizer_index_); + const int dc_q_value = quantizer_.GetDcValue(plane, current_quantizer_index); + const int ac_q_value = quantizer_.GetAcValue(plane, current_quantizer_index); + const int shift = kQuantizationShift[tx_size]; + const uint8_t* const quantizer_matrix = + (frame_header_.quantizer.use_matrix && + *tx_type < kTransformTypeIdentityIdentity && + !frame_header_.segmentation.lossless[bp.segment_id] && + frame_header_.quantizer.matrix_level[plane] < 15) + ? quantizer_matrix_[frame_header_.quantizer.matrix_level[plane]] + [plane_type][adjusted_tx_size] + .get() + : nullptr; + int coefficient_level = 0; + int8_t dc_category = 0; + uint16_t* const dc_sign_cdf = + (residual[0] != 0) + ? symbol_decoder_context_.dc_sign_cdf[plane_type][GetDcSignContext( + x4, y4, w4, h4, plane)] + : nullptr; + assert(scan[0] == 0); + if (!ReadSignAndApplyDequantization<ResidualType, /*is_dc_coefficient=*/true>( + scan, 0, dc_q_value, quantizer_matrix, shift, max_value, dc_sign_cdf, + &dc_category, &coefficient_level, residual)) { + return -1; + } + if (eob > 1) { + int i = 1; + do { + if (!ReadSignAndApplyDequantization<ResidualType, + /*is_dc_coefficient=*/false>( + scan, i, ac_q_value, quantizer_matrix, shift, max_value, nullptr, + nullptr, &coefficient_level, residual)) { + return -1; + } + } while (++i < eob); + MoveCoefficientsForTxWidth64(clamped_tx_height, tx_width, residual); + } + SetEntropyContexts(x4, y4, w4, h4, plane, std::min(4, coefficient_level), + dc_category); + if (split_parse_and_decode_) { + *block.residual += tx_width * tx_height * residual_size_; + } + return eob; +} + +// CALL_BITDEPTH_FUNCTION is a macro that calls the appropriate template +// |function| depending on the value of |sequence_header_.color_config.bitdepth| +// with the variadic arguments. +#if LIBGAV1_MAX_BITDEPTH >= 10 +#define CALL_BITDEPTH_FUNCTION(function, ...) \ + do { \ + if (sequence_header_.color_config.bitdepth > 8) { \ + function<uint16_t>(__VA_ARGS__); \ + } else { \ + function<uint8_t>(__VA_ARGS__); \ + } \ + } while (false) +#else +#define CALL_BITDEPTH_FUNCTION(function, ...) \ + do { \ + function<uint8_t>(__VA_ARGS__); \ + } while (false) +#endif + +bool Tile::TransformBlock(const Block& block, Plane plane, int base_x, + int base_y, TransformSize tx_size, int x, int y, + ProcessingMode mode) { + BlockParameters& bp = *block.bp; + const int subsampling_x = subsampling_x_[plane]; + const int subsampling_y = subsampling_y_[plane]; + const int start_x = base_x + MultiplyBy4(x); + const int start_y = base_y + MultiplyBy4(y); + const int max_x = MultiplyBy4(frame_header_.columns4x4) >> subsampling_x; + const int max_y = MultiplyBy4(frame_header_.rows4x4) >> subsampling_y; + if (start_x >= max_x || start_y >= max_y) return true; + const int row = DivideBy4(start_y << subsampling_y); + const int column = DivideBy4(start_x << subsampling_x); + const int mask = sequence_header_.use_128x128_superblock ? 31 : 15; + const int sub_block_row4x4 = row & mask; + const int sub_block_column4x4 = column & mask; + const int step_x = kTransformWidth4x4[tx_size]; + const int step_y = kTransformHeight4x4[tx_size]; + const bool do_decode = mode == kProcessingModeDecodeOnly || + mode == kProcessingModeParseAndDecode; + if (do_decode && !bp.is_inter) { + if (bp.palette_mode_info.size[GetPlaneType(plane)] > 0) { + CALL_BITDEPTH_FUNCTION(PalettePrediction, block, plane, start_x, start_y, + x, y, tx_size); + } else { + const PredictionMode mode = + (plane == kPlaneY) + ? bp.y_mode + : (bp.uv_mode == kPredictionModeChromaFromLuma ? kPredictionModeDc + : bp.uv_mode); + const int tr_row4x4 = (sub_block_row4x4 >> subsampling_y); + const int tr_column4x4 = + (sub_block_column4x4 >> subsampling_x) + step_x + 1; + const int bl_row4x4 = (sub_block_row4x4 >> subsampling_y) + step_y + 1; + const int bl_column4x4 = (sub_block_column4x4 >> subsampling_x); + const bool has_left = x > 0 || block.left_available[plane]; + const bool has_top = y > 0 || block.top_available[plane]; + + CALL_BITDEPTH_FUNCTION( + IntraPrediction, block, plane, start_x, start_y, has_left, has_top, + block.scratch_buffer->block_decoded[plane][tr_row4x4][tr_column4x4], + block.scratch_buffer->block_decoded[plane][bl_row4x4][bl_column4x4], + mode, tx_size); + if (plane != kPlaneY && bp.uv_mode == kPredictionModeChromaFromLuma) { + CALL_BITDEPTH_FUNCTION(ChromaFromLumaPrediction, block, plane, start_x, + start_y, tx_size); + } + } + if (plane == kPlaneY) { + block.bp->prediction_parameters->max_luma_width = + start_x + MultiplyBy4(step_x); + block.bp->prediction_parameters->max_luma_height = + start_y + MultiplyBy4(step_y); + block.scratch_buffer->cfl_luma_buffer_valid = false; + } + } + if (!bp.skip) { + const int sb_row_index = SuperBlockRowIndex(block.row4x4); + const int sb_column_index = SuperBlockColumnIndex(block.column4x4); + if (mode == kProcessingModeDecodeOnly) { + TransformParameterQueue& tx_params = + *residual_buffer_threaded_[sb_row_index][sb_column_index] + ->transform_parameters(); + ReconstructBlock(block, plane, start_x, start_y, tx_size, + tx_params.Type(), tx_params.NonZeroCoeffCount()); + tx_params.Pop(); + } else { + TransformType tx_type; + int non_zero_coeff_count; +#if LIBGAV1_MAX_BITDEPTH >= 10 + if (sequence_header_.color_config.bitdepth > 8) { + non_zero_coeff_count = ReadTransformCoefficients<int32_t>( + block, plane, start_x, start_y, tx_size, &tx_type); + } else // NOLINT +#endif + { + non_zero_coeff_count = ReadTransformCoefficients<int16_t>( + block, plane, start_x, start_y, tx_size, &tx_type); + } + if (non_zero_coeff_count < 0) return false; + if (mode == kProcessingModeParseAndDecode) { + ReconstructBlock(block, plane, start_x, start_y, tx_size, tx_type, + non_zero_coeff_count); + } else { + assert(mode == kProcessingModeParseOnly); + residual_buffer_threaded_[sb_row_index][sb_column_index] + ->transform_parameters() + ->Push(non_zero_coeff_count, tx_type); + } + } + } + if (do_decode) { + bool* block_decoded = + &block.scratch_buffer + ->block_decoded[plane][(sub_block_row4x4 >> subsampling_y) + 1] + [(sub_block_column4x4 >> subsampling_x) + 1]; + SetBlockValues<bool>(step_y, step_x, true, block_decoded, + TileScratchBuffer::kBlockDecodedStride); + } + return true; +} + +bool Tile::TransformTree(const Block& block, int start_x, int start_y, + BlockSize plane_size, ProcessingMode mode) { + assert(plane_size <= kBlock64x64); + // Branching factor is 4; Maximum Depth is 4; So the maximum stack size + // required is (4 - 1) * 4 + 1 = 13. + Stack<TransformTreeNode, 13> stack; + // It is okay to cast BlockSize to TransformSize here since the enum are + // equivalent for all BlockSize values <= kBlock64x64. + stack.Push(TransformTreeNode(start_x, start_y, + static_cast<TransformSize>(plane_size))); + + do { + TransformTreeNode node = stack.Pop(); + const int row = DivideBy4(node.y); + const int column = DivideBy4(node.x); + if (row >= frame_header_.rows4x4 || column >= frame_header_.columns4x4) { + continue; + } + const TransformSize inter_tx_size = inter_transform_sizes_[row][column]; + const int width = kTransformWidth[node.tx_size]; + const int height = kTransformHeight[node.tx_size]; + if (width <= kTransformWidth[inter_tx_size] && + height <= kTransformHeight[inter_tx_size]) { + if (!TransformBlock(block, kPlaneY, node.x, node.y, node.tx_size, 0, 0, + mode)) { + return false; + } + continue; + } + // The split transform size look up gives the right transform size that we + // should push in the stack. + // if (width > height) => transform size whose width is half. + // if (width < height) => transform size whose height is half. + // if (width == height) => transform size whose width and height are half. + const TransformSize split_tx_size = kSplitTransformSize[node.tx_size]; + const int half_width = DivideBy2(width); + if (width > height) { + stack.Push(TransformTreeNode(node.x + half_width, node.y, split_tx_size)); + stack.Push(TransformTreeNode(node.x, node.y, split_tx_size)); + continue; + } + const int half_height = DivideBy2(height); + if (width < height) { + stack.Push( + TransformTreeNode(node.x, node.y + half_height, split_tx_size)); + stack.Push(TransformTreeNode(node.x, node.y, split_tx_size)); + continue; + } + stack.Push(TransformTreeNode(node.x + half_width, node.y + half_height, + split_tx_size)); + stack.Push(TransformTreeNode(node.x, node.y + half_height, split_tx_size)); + stack.Push(TransformTreeNode(node.x + half_width, node.y, split_tx_size)); + stack.Push(TransformTreeNode(node.x, node.y, split_tx_size)); + } while (!stack.Empty()); + return true; +} + +void Tile::ReconstructBlock(const Block& block, Plane plane, int start_x, + int start_y, TransformSize tx_size, + TransformType tx_type, int non_zero_coeff_count) { + // Reconstruction process. Steps 2 and 3 of Section 7.12.3 in the spec. + assert(non_zero_coeff_count >= 0); + if (non_zero_coeff_count == 0) return; +#if LIBGAV1_MAX_BITDEPTH >= 10 + if (sequence_header_.color_config.bitdepth > 8) { + Array2DView<uint16_t> buffer( + buffer_[plane].rows(), buffer_[plane].columns() / sizeof(uint16_t), + reinterpret_cast<uint16_t*>(&buffer_[plane][0][0])); + Reconstruct(dsp_, tx_type, tx_size, + frame_header_.segmentation.lossless[block.bp->segment_id], + reinterpret_cast<int32_t*>(*block.residual), start_x, start_y, + &buffer, non_zero_coeff_count); + } else // NOLINT +#endif + { + Reconstruct(dsp_, tx_type, tx_size, + frame_header_.segmentation.lossless[block.bp->segment_id], + reinterpret_cast<int16_t*>(*block.residual), start_x, start_y, + &buffer_[plane], non_zero_coeff_count); + } + if (split_parse_and_decode_) { + *block.residual += + kTransformWidth[tx_size] * kTransformHeight[tx_size] * residual_size_; + } +} + +bool Tile::Residual(const Block& block, ProcessingMode mode) { + const int width_chunks = std::max(1, block.width >> 6); + const int height_chunks = std::max(1, block.height >> 6); + const BlockSize size_chunk4x4 = + (width_chunks > 1 || height_chunks > 1) ? kBlock64x64 : block.size; + const BlockParameters& bp = *block.bp; + for (int chunk_y = 0; chunk_y < height_chunks; ++chunk_y) { + for (int chunk_x = 0; chunk_x < width_chunks; ++chunk_x) { + const int num_planes = block.HasChroma() ? PlaneCount() : 1; + int plane = kPlaneY; + do { + const int subsampling_x = subsampling_x_[plane]; + const int subsampling_y = subsampling_y_[plane]; + // For Y Plane, when lossless is true |bp.transform_size| is always + // kTransformSize4x4. So we can simply use |bp.transform_size| here as + // the Y plane's transform size (part of Section 5.11.37 in the spec). + const TransformSize tx_size = + (plane == kPlaneY) ? bp.transform_size : bp.uv_transform_size; + const BlockSize plane_size = + kPlaneResidualSize[size_chunk4x4][subsampling_x][subsampling_y]; + assert(plane_size != kBlockInvalid); + if (bp.is_inter && + !frame_header_.segmentation.lossless[bp.segment_id] && + plane == kPlaneY) { + const int row_chunk4x4 = block.row4x4 + MultiplyBy16(chunk_y); + const int column_chunk4x4 = block.column4x4 + MultiplyBy16(chunk_x); + const int base_x = MultiplyBy4(column_chunk4x4 >> subsampling_x); + const int base_y = MultiplyBy4(row_chunk4x4 >> subsampling_y); + if (!TransformTree(block, base_x, base_y, plane_size, mode)) { + return false; + } + } else { + const int base_x = MultiplyBy4(block.column4x4 >> subsampling_x); + const int base_y = MultiplyBy4(block.row4x4 >> subsampling_y); + const int step_x = kTransformWidth4x4[tx_size]; + const int step_y = kTransformHeight4x4[tx_size]; + const int num4x4_wide = kNum4x4BlocksWide[plane_size]; + const int num4x4_high = kNum4x4BlocksHigh[plane_size]; + for (int y = 0; y < num4x4_high; y += step_y) { + for (int x = 0; x < num4x4_wide; x += step_x) { + if (!TransformBlock( + block, static_cast<Plane>(plane), base_x, base_y, tx_size, + x + (MultiplyBy16(chunk_x) >> subsampling_x), + y + (MultiplyBy16(chunk_y) >> subsampling_y), mode)) { + return false; + } + } + } + } + } while (++plane < num_planes); + } + } + return true; +} + +// The purpose of this function is to limit the maximum size of motion vectors +// and also, if use_intra_block_copy is true, to additionally constrain the +// motion vector so that the data is fetched from parts of the tile that have +// already been decoded and are not too close to the current block (in order to +// make a pipelined decoder implementation feasible). +bool Tile::IsMvValid(const Block& block, bool is_compound) const { + const BlockParameters& bp = *block.bp; + for (int i = 0; i < 1 + static_cast<int>(is_compound); ++i) { + for (int mv_component : bp.mv.mv[i].mv) { + if (std::abs(mv_component) >= (1 << 14)) { + return false; + } + } + } + if (!block.bp->prediction_parameters->use_intra_block_copy) { + return true; + } + if ((bp.mv.mv[0].mv32 & 0x00070007) != 0) { + return false; + } + const int delta_row = bp.mv.mv[0].mv[0] >> 3; + const int delta_column = bp.mv.mv[0].mv[1] >> 3; + int src_top_edge = MultiplyBy4(block.row4x4) + delta_row; + int src_left_edge = MultiplyBy4(block.column4x4) + delta_column; + const int src_bottom_edge = src_top_edge + block.height; + const int src_right_edge = src_left_edge + block.width; + if (block.HasChroma()) { + if (block.width < 8 && subsampling_x_[kPlaneU] != 0) { + src_left_edge -= 4; + } + if (block.height < 8 && subsampling_y_[kPlaneU] != 0) { + src_top_edge -= 4; + } + } + if (src_top_edge < MultiplyBy4(row4x4_start_) || + src_left_edge < MultiplyBy4(column4x4_start_) || + src_bottom_edge > MultiplyBy4(row4x4_end_) || + src_right_edge > MultiplyBy4(column4x4_end_)) { + return false; + } + // sb_height_log2 = use_128x128_superblock ? log2(128) : log2(64) + const int sb_height_log2 = + 6 + static_cast<int>(sequence_header_.use_128x128_superblock); + const int active_sb_row = MultiplyBy4(block.row4x4) >> sb_height_log2; + const int active_64x64_block_column = MultiplyBy4(block.column4x4) >> 6; + const int src_sb_row = (src_bottom_edge - 1) >> sb_height_log2; + const int src_64x64_block_column = (src_right_edge - 1) >> 6; + const int total_64x64_blocks_per_row = + ((column4x4_end_ - column4x4_start_ - 1) >> 4) + 1; + const int active_64x64_block = + active_sb_row * total_64x64_blocks_per_row + active_64x64_block_column; + const int src_64x64_block = + src_sb_row * total_64x64_blocks_per_row + src_64x64_block_column; + if (src_64x64_block >= active_64x64_block - kIntraBlockCopyDelay64x64Blocks) { + return false; + } + + // Wavefront constraint: use only top left area of frame for reference. + if (src_sb_row > active_sb_row) return false; + const int gradient = + 1 + kIntraBlockCopyDelay64x64Blocks + + static_cast<int>(sequence_header_.use_128x128_superblock); + const int wavefront_offset = gradient * (active_sb_row - src_sb_row); + return src_64x64_block_column < active_64x64_block_column - + kIntraBlockCopyDelay64x64Blocks + + wavefront_offset; +} + +bool Tile::AssignInterMv(const Block& block, bool is_compound) { + int min[2]; + int max[2]; + GetClampParameters(block, min, max); + BlockParameters& bp = *block.bp; + const PredictionParameters& prediction_parameters = *bp.prediction_parameters; + if (is_compound) { + for (int i = 0; i < 2; ++i) { + const PredictionMode mode = GetSinglePredictionMode(i, bp.y_mode); + MotionVector predicted_mv; + if (mode == kPredictionModeGlobalMv) { + predicted_mv = prediction_parameters.global_mv[i]; + } else { + const int ref_mv_index = (mode == kPredictionModeNearestMv || + (mode == kPredictionModeNewMv && + prediction_parameters.ref_mv_count <= 1)) + ? 0 + : prediction_parameters.ref_mv_index; + predicted_mv = prediction_parameters.reference_mv(ref_mv_index, i); + if (ref_mv_index < prediction_parameters.ref_mv_count) { + predicted_mv.mv[0] = Clip3(predicted_mv.mv[0], min[0], max[0]); + predicted_mv.mv[1] = Clip3(predicted_mv.mv[1], min[1], max[1]); + } + } + if (mode == kPredictionModeNewMv) { + ReadMotionVector(block, i); + bp.mv.mv[i].mv[0] += predicted_mv.mv[0]; + bp.mv.mv[i].mv[1] += predicted_mv.mv[1]; + } else { + bp.mv.mv[i] = predicted_mv; + } + } + } else { + const PredictionMode mode = GetSinglePredictionMode(0, bp.y_mode); + MotionVector predicted_mv; + if (mode == kPredictionModeGlobalMv) { + predicted_mv = prediction_parameters.global_mv[0]; + } else { + const int ref_mv_index = (mode == kPredictionModeNearestMv || + (mode == kPredictionModeNewMv && + prediction_parameters.ref_mv_count <= 1)) + ? 0 + : prediction_parameters.ref_mv_index; + predicted_mv = prediction_parameters.reference_mv(ref_mv_index); + if (ref_mv_index < prediction_parameters.ref_mv_count) { + predicted_mv.mv[0] = Clip3(predicted_mv.mv[0], min[0], max[0]); + predicted_mv.mv[1] = Clip3(predicted_mv.mv[1], min[1], max[1]); + } + } + if (mode == kPredictionModeNewMv) { + ReadMotionVector(block, 0); + bp.mv.mv[0].mv[0] += predicted_mv.mv[0]; + bp.mv.mv[0].mv[1] += predicted_mv.mv[1]; + } else { + bp.mv.mv[0] = predicted_mv; + } + } + return IsMvValid(block, is_compound); +} + +bool Tile::AssignIntraMv(const Block& block) { + // TODO(linfengz): Check if the clamping process is necessary. + int min[2]; + int max[2]; + GetClampParameters(block, min, max); + BlockParameters& bp = *block.bp; + const PredictionParameters& prediction_parameters = *bp.prediction_parameters; + const MotionVector& ref_mv_0 = prediction_parameters.reference_mv(0); + ReadMotionVector(block, 0); + if (ref_mv_0.mv32 == 0) { + const MotionVector& ref_mv_1 = prediction_parameters.reference_mv(1); + if (ref_mv_1.mv32 == 0) { + const int super_block_size4x4 = kNum4x4BlocksHigh[SuperBlockSize()]; + if (block.row4x4 - super_block_size4x4 < row4x4_start_) { + bp.mv.mv[0].mv[1] -= MultiplyBy32(super_block_size4x4); + bp.mv.mv[0].mv[1] -= MultiplyBy8(kIntraBlockCopyDelayPixels); + } else { + bp.mv.mv[0].mv[0] -= MultiplyBy32(super_block_size4x4); + } + } else { + bp.mv.mv[0].mv[0] += Clip3(ref_mv_1.mv[0], min[0], max[0]); + bp.mv.mv[0].mv[1] += Clip3(ref_mv_1.mv[1], min[0], max[0]); + } + } else { + bp.mv.mv[0].mv[0] += Clip3(ref_mv_0.mv[0], min[0], max[0]); + bp.mv.mv[0].mv[1] += Clip3(ref_mv_0.mv[1], min[1], max[1]); + } + return IsMvValid(block, /*is_compound=*/false); +} + +void Tile::ResetEntropyContext(const Block& block) { + const int num_planes = block.HasChroma() ? PlaneCount() : 1; + int plane = kPlaneY; + do { + const int subsampling_x = subsampling_x_[plane]; + const int start_x = block.column4x4 >> subsampling_x; + const int end_x = + std::min((block.column4x4 + block.width4x4) >> subsampling_x, + frame_header_.columns4x4); + memset(&coefficient_levels_[kEntropyContextTop][plane][start_x], 0, + end_x - start_x); + memset(&dc_categories_[kEntropyContextTop][plane][start_x], 0, + end_x - start_x); + const int subsampling_y = subsampling_y_[plane]; + const int start_y = block.row4x4 >> subsampling_y; + const int end_y = + std::min((block.row4x4 + block.height4x4) >> subsampling_y, + frame_header_.rows4x4); + memset(&coefficient_levels_[kEntropyContextLeft][plane][start_y], 0, + end_y - start_y); + memset(&dc_categories_[kEntropyContextLeft][plane][start_y], 0, + end_y - start_y); + } while (++plane < num_planes); +} + +bool Tile::ComputePrediction(const Block& block) { + const BlockParameters& bp = *block.bp; + if (!bp.is_inter) return true; + const int mask = + (1 << (4 + static_cast<int>(sequence_header_.use_128x128_superblock))) - + 1; + const int sub_block_row4x4 = block.row4x4 & mask; + const int sub_block_column4x4 = block.column4x4 & mask; + const int plane_count = block.HasChroma() ? PlaneCount() : 1; + // Returns true if this block applies local warping. The state is determined + // in the Y plane and carried for use in the U/V planes. + // But the U/V planes will not apply warping when the block size is smaller + // than 8x8, even if this variable is true. + bool is_local_valid = false; + // Local warping parameters, similar usage as is_local_valid. + GlobalMotion local_warp_params; + int plane = kPlaneY; + do { + const int8_t subsampling_x = subsampling_x_[plane]; + const int8_t subsampling_y = subsampling_y_[plane]; + const BlockSize plane_size = block.residual_size[plane]; + const int block_width4x4 = kNum4x4BlocksWide[plane_size]; + const int block_height4x4 = kNum4x4BlocksHigh[plane_size]; + const int block_width = MultiplyBy4(block_width4x4); + const int block_height = MultiplyBy4(block_height4x4); + const int base_x = MultiplyBy4(block.column4x4 >> subsampling_x); + const int base_y = MultiplyBy4(block.row4x4 >> subsampling_y); + if (bp.reference_frame[1] == kReferenceFrameIntra) { + const int tr_row4x4 = sub_block_row4x4 >> subsampling_y; + const int tr_column4x4 = + (sub_block_column4x4 >> subsampling_x) + block_width4x4 + 1; + const int bl_row4x4 = + (sub_block_row4x4 >> subsampling_y) + block_height4x4; + const int bl_column4x4 = (sub_block_column4x4 >> subsampling_x) + 1; + const TransformSize tx_size = + k4x4SizeToTransformSize[k4x4WidthLog2[plane_size]] + [k4x4HeightLog2[plane_size]]; + const bool has_left = block.left_available[plane]; + const bool has_top = block.top_available[plane]; + CALL_BITDEPTH_FUNCTION( + IntraPrediction, block, static_cast<Plane>(plane), base_x, base_y, + has_left, has_top, + block.scratch_buffer->block_decoded[plane][tr_row4x4][tr_column4x4], + block.scratch_buffer->block_decoded[plane][bl_row4x4][bl_column4x4], + kInterIntraToIntraMode[block.bp->prediction_parameters + ->inter_intra_mode], + tx_size); + } + int candidate_row = block.row4x4; + int candidate_column = block.column4x4; + bool some_use_intra = bp.reference_frame[0] == kReferenceFrameIntra; + if (!some_use_intra && plane != 0) { + candidate_row = (candidate_row >> subsampling_y) << subsampling_y; + candidate_column = (candidate_column >> subsampling_x) << subsampling_x; + if (candidate_row != block.row4x4) { + // Top block. + const BlockParameters& bp_top = + *block_parameters_holder_.Find(candidate_row, block.column4x4); + some_use_intra = bp_top.reference_frame[0] == kReferenceFrameIntra; + if (!some_use_intra && candidate_column != block.column4x4) { + // Top-left block. + const BlockParameters& bp_top_left = + *block_parameters_holder_.Find(candidate_row, candidate_column); + some_use_intra = + bp_top_left.reference_frame[0] == kReferenceFrameIntra; + } + } + if (!some_use_intra && candidate_column != block.column4x4) { + // Left block. + const BlockParameters& bp_left = + *block_parameters_holder_.Find(block.row4x4, candidate_column); + some_use_intra = bp_left.reference_frame[0] == kReferenceFrameIntra; + } + } + int prediction_width; + int prediction_height; + if (some_use_intra) { + candidate_row = block.row4x4; + candidate_column = block.column4x4; + prediction_width = block_width; + prediction_height = block_height; + } else { + prediction_width = block.width >> subsampling_x; + prediction_height = block.height >> subsampling_y; + } + int r = 0; + int y = 0; + do { + int c = 0; + int x = 0; + do { + if (!InterPrediction(block, static_cast<Plane>(plane), base_x + x, + base_y + y, prediction_width, prediction_height, + candidate_row + r, candidate_column + c, + &is_local_valid, &local_warp_params)) { + return false; + } + ++c; + x += prediction_width; + } while (x < block_width); + ++r; + y += prediction_height; + } while (y < block_height); + } while (++plane < plane_count); + return true; +} + +#undef CALL_BITDEPTH_FUNCTION + +void Tile::PopulateDeblockFilterLevel(const Block& block) { + if (!post_filter_.DoDeblock()) return; + BlockParameters& bp = *block.bp; + const int mode_id = + static_cast<int>(kPredictionModeDeltasMask.Contains(bp.y_mode)); + for (int i = 0; i < kFrameLfCount; ++i) { + if (delta_lf_all_zero_) { + bp.deblock_filter_level[i] = post_filter_.GetZeroDeltaDeblockFilterLevel( + bp.segment_id, i, bp.reference_frame[0], mode_id); + } else { + bp.deblock_filter_level[i] = + deblock_filter_levels_[bp.segment_id][i][bp.reference_frame[0]] + [mode_id]; + } + } +} + +bool Tile::ProcessBlock(int row4x4, int column4x4, BlockSize block_size, + ParameterTree* const tree, + TileScratchBuffer* const scratch_buffer, + ResidualPtr* residual) { + // Do not process the block if the starting point is beyond the visible frame. + // This is equivalent to the has_row/has_column check in the + // decode_partition() section of the spec when partition equals + // kPartitionHorizontal or kPartitionVertical. + if (row4x4 >= frame_header_.rows4x4 || + column4x4 >= frame_header_.columns4x4) { + return true; + } + BlockParameters& bp = *tree->parameters(); + block_parameters_holder_.FillCache(row4x4, column4x4, block_size, &bp); + Block block(*this, block_size, row4x4, column4x4, scratch_buffer, residual); + bp.size = block_size; + bp.prediction_parameters = + split_parse_and_decode_ ? std::unique_ptr<PredictionParameters>( + new (std::nothrow) PredictionParameters()) + : std::move(prediction_parameters_); + if (bp.prediction_parameters == nullptr) return false; + if (!DecodeModeInfo(block)) return false; + bp.is_global_mv_block = (bp.y_mode == kPredictionModeGlobalMv || + bp.y_mode == kPredictionModeGlobalGlobalMv) && + !IsBlockDimension4(bp.size); + PopulateDeblockFilterLevel(block); + if (!ReadPaletteTokens(block)) return false; + DecodeTransformSize(block); + // Part of Section 5.11.37 in the spec (implemented as a simple lookup). + bp.uv_transform_size = frame_header_.segmentation.lossless[bp.segment_id] + ? kTransformSize4x4 + : kUVTransformSize[block.residual_size[kPlaneU]]; + if (bp.skip) ResetEntropyContext(block); + if (split_parse_and_decode_) { + if (!Residual(block, kProcessingModeParseOnly)) return false; + } else { + if (!ComputePrediction(block) || + !Residual(block, kProcessingModeParseAndDecode)) { + return false; + } + } + // If frame_header_.segmentation.enabled is false, bp.segment_id is 0 for all + // blocks. We don't need to call save bp.segment_id in the current frame + // because the current frame's segmentation map will be cleared to all 0s. + // + // If frame_header_.segmentation.enabled is true and + // frame_header_.segmentation.update_map is false, we will copy the previous + // frame's segmentation map to the current frame. So we don't need to call + // save bp.segment_id in the current frame. + if (frame_header_.segmentation.enabled && + frame_header_.segmentation.update_map) { + const int x_limit = std::min(frame_header_.columns4x4 - column4x4, + static_cast<int>(block.width4x4)); + const int y_limit = std::min(frame_header_.rows4x4 - row4x4, + static_cast<int>(block.height4x4)); + current_frame_.segmentation_map()->FillBlock(row4x4, column4x4, x_limit, + y_limit, bp.segment_id); + } + StoreMotionFieldMvsIntoCurrentFrame(block); + if (!split_parse_and_decode_) { + prediction_parameters_ = std::move(bp.prediction_parameters); + } + return true; +} + +bool Tile::DecodeBlock(ParameterTree* const tree, + TileScratchBuffer* const scratch_buffer, + ResidualPtr* residual) { + const int row4x4 = tree->row4x4(); + const int column4x4 = tree->column4x4(); + if (row4x4 >= frame_header_.rows4x4 || + column4x4 >= frame_header_.columns4x4) { + return true; + } + const BlockSize block_size = tree->block_size(); + Block block(*this, block_size, row4x4, column4x4, scratch_buffer, residual); + if (!ComputePrediction(block) || + !Residual(block, kProcessingModeDecodeOnly)) { + return false; + } + block.bp->prediction_parameters.reset(nullptr); + return true; +} + +bool Tile::ProcessPartition(int row4x4_start, int column4x4_start, + ParameterTree* const root, + TileScratchBuffer* const scratch_buffer, + ResidualPtr* residual) { + Stack<ParameterTree*, kDfsStackSize> stack; + + // Set up the first iteration. + ParameterTree* node = root; + int row4x4 = row4x4_start; + int column4x4 = column4x4_start; + BlockSize block_size = SuperBlockSize(); + + // DFS loop. If it sees a terminal node (leaf node), ProcessBlock is invoked. + // Otherwise, the children are pushed into the stack for future processing. + do { + if (!stack.Empty()) { + // Set up subsequent iterations. + node = stack.Pop(); + row4x4 = node->row4x4(); + column4x4 = node->column4x4(); + block_size = node->block_size(); + } + if (row4x4 >= frame_header_.rows4x4 || + column4x4 >= frame_header_.columns4x4) { + continue; + } + const int block_width4x4 = kNum4x4BlocksWide[block_size]; + assert(block_width4x4 == kNum4x4BlocksHigh[block_size]); + const int half_block4x4 = block_width4x4 >> 1; + const bool has_rows = (row4x4 + half_block4x4) < frame_header_.rows4x4; + const bool has_columns = + (column4x4 + half_block4x4) < frame_header_.columns4x4; + Partition partition; + if (!ReadPartition(row4x4, column4x4, block_size, has_rows, has_columns, + &partition)) { + LIBGAV1_DLOG(ERROR, "Failed to read partition for row: %d column: %d", + row4x4, column4x4); + return false; + } + const BlockSize sub_size = kSubSize[partition][block_size]; + // Section 6.10.4: It is a requirement of bitstream conformance that + // get_plane_residual_size( subSize, 1 ) is not equal to BLOCK_INVALID + // every time subSize is computed. + if (sub_size == kBlockInvalid || + kPlaneResidualSize[sub_size] + [sequence_header_.color_config.subsampling_x] + [sequence_header_.color_config.subsampling_y] == + kBlockInvalid) { + LIBGAV1_DLOG( + ERROR, + "Invalid sub-block/plane size for row: %d column: %d partition: " + "%d block_size: %d sub_size: %d subsampling_x/y: %d, %d", + row4x4, column4x4, partition, block_size, sub_size, + sequence_header_.color_config.subsampling_x, + sequence_header_.color_config.subsampling_y); + return false; + } + if (!node->SetPartitionType(partition)) { + LIBGAV1_DLOG(ERROR, "node->SetPartitionType() failed."); + return false; + } + switch (partition) { + case kPartitionNone: + if (!ProcessBlock(row4x4, column4x4, sub_size, node, scratch_buffer, + residual)) { + return false; + } + break; + case kPartitionSplit: + // The children must be added in reverse order since a stack is being + // used. + for (int i = 3; i >= 0; --i) { + ParameterTree* const child = node->children(i); + assert(child != nullptr); + stack.Push(child); + } + break; + case kPartitionHorizontal: + case kPartitionVertical: + case kPartitionHorizontalWithTopSplit: + case kPartitionHorizontalWithBottomSplit: + case kPartitionVerticalWithLeftSplit: + case kPartitionVerticalWithRightSplit: + case kPartitionHorizontal4: + case kPartitionVertical4: + for (int i = 0; i < 4; ++i) { + ParameterTree* const child = node->children(i); + // Once a null child is seen, all the subsequent children will also be + // null. + if (child == nullptr) break; + if (!ProcessBlock(child->row4x4(), child->column4x4(), + child->block_size(), child, scratch_buffer, + residual)) { + return false; + } + } + break; + } + } while (!stack.Empty()); + return true; +} + +void Tile::ResetLoopRestorationParams() { + for (int plane = kPlaneY; plane < kMaxPlanes; ++plane) { + for (int i = WienerInfo::kVertical; i <= WienerInfo::kHorizontal; ++i) { + reference_unit_info_[plane].sgr_proj_info.multiplier[i] = + kSgrProjDefaultMultiplier[i]; + for (int j = 0; j < kNumWienerCoefficients; ++j) { + reference_unit_info_[plane].wiener_info.filter[i][j] = + kWienerDefaultFilter[j]; + } + } + } +} + +void Tile::ResetCdef(const int row4x4, const int column4x4) { + if (!sequence_header_.enable_cdef) return; + const int row = DivideBy16(row4x4); + const int column = DivideBy16(column4x4); + cdef_index_[row][column] = -1; + if (sequence_header_.use_128x128_superblock) { + const int cdef_size4x4 = kNum4x4BlocksWide[kBlock64x64]; + const int border_row = DivideBy16(row4x4 + cdef_size4x4); + const int border_column = DivideBy16(column4x4 + cdef_size4x4); + cdef_index_[row][border_column] = -1; + cdef_index_[border_row][column] = -1; + cdef_index_[border_row][border_column] = -1; + } +} + +void Tile::ClearBlockDecoded(TileScratchBuffer* const scratch_buffer, + int row4x4, int column4x4) { + // Set everything to false. + memset(scratch_buffer->block_decoded, 0, + sizeof(scratch_buffer->block_decoded)); + // Set specific edge cases to true. + const int sb_size4 = sequence_header_.use_128x128_superblock ? 32 : 16; + for (int plane = kPlaneY; plane < PlaneCount(); ++plane) { + const int subsampling_x = subsampling_x_[plane]; + const int subsampling_y = subsampling_y_[plane]; + const int sb_width4 = (column4x4_end_ - column4x4) >> subsampling_x; + const int sb_height4 = (row4x4_end_ - row4x4) >> subsampling_y; + // The memset is equivalent to the following lines in the spec: + // for ( x = -1; x <= ( sbSize4 >> subX ); x++ ) { + // if ( y < 0 && x < sbWidth4 ) { + // BlockDecoded[plane][y][x] = 1 + // } + // } + const int num_elements = + std::min((sb_size4 >> subsampling_x_[plane]) + 1, sb_width4) + 1; + memset(&scratch_buffer->block_decoded[plane][0][0], 1, num_elements); + // The for loop is equivalent to the following lines in the spec: + // for ( y = -1; y <= ( sbSize4 >> subY ); y++ ) + // if ( x < 0 && y < sbHeight4 ) + // BlockDecoded[plane][y][x] = 1 + // } + // } + // BlockDecoded[plane][sbSize4 >> subY][-1] = 0 + for (int y = -1; y < std::min((sb_size4 >> subsampling_y), sb_height4); + ++y) { + scratch_buffer->block_decoded[plane][y + 1][0] = true; + } + } +} + +bool Tile::ProcessSuperBlock(int row4x4, int column4x4, int block_width4x4, + TileScratchBuffer* const scratch_buffer, + ProcessingMode mode) { + const bool parsing = + mode == kProcessingModeParseOnly || mode == kProcessingModeParseAndDecode; + const bool decoding = mode == kProcessingModeDecodeOnly || + mode == kProcessingModeParseAndDecode; + if (parsing) { + read_deltas_ = frame_header_.delta_q.present; + ResetCdef(row4x4, column4x4); + } + if (decoding) { + ClearBlockDecoded(scratch_buffer, row4x4, column4x4); + } + const BlockSize block_size = SuperBlockSize(); + if (parsing) { + ReadLoopRestorationCoefficients(row4x4, column4x4, block_size); + } + const int row = row4x4 / block_width4x4; + const int column = column4x4 / block_width4x4; + if (parsing && decoding) { + uint8_t* residual_buffer = residual_buffer_.get(); + if (!ProcessPartition(row4x4, column4x4, + block_parameters_holder_.Tree(row, column), + scratch_buffer, &residual_buffer)) { + LIBGAV1_DLOG(ERROR, "Error decoding partition row: %d column: %d", row4x4, + column4x4); + return false; + } + return true; + } + const int sb_row_index = SuperBlockRowIndex(row4x4); + const int sb_column_index = SuperBlockColumnIndex(column4x4); + if (parsing) { + residual_buffer_threaded_[sb_row_index][sb_column_index] = + residual_buffer_pool_->Get(); + if (residual_buffer_threaded_[sb_row_index][sb_column_index] == nullptr) { + LIBGAV1_DLOG(ERROR, "Failed to get residual buffer."); + return false; + } + uint8_t* residual_buffer = + residual_buffer_threaded_[sb_row_index][sb_column_index]->buffer(); + if (!ProcessPartition(row4x4, column4x4, + block_parameters_holder_.Tree(row, column), + scratch_buffer, &residual_buffer)) { + LIBGAV1_DLOG(ERROR, "Error parsing partition row: %d column: %d", row4x4, + column4x4); + return false; + } + } else { + uint8_t* residual_buffer = + residual_buffer_threaded_[sb_row_index][sb_column_index]->buffer(); + if (!DecodeSuperBlock(block_parameters_holder_.Tree(row, column), + scratch_buffer, &residual_buffer)) { + LIBGAV1_DLOG(ERROR, "Error decoding superblock row: %d column: %d", + row4x4, column4x4); + return false; + } + residual_buffer_pool_->Release( + std::move(residual_buffer_threaded_[sb_row_index][sb_column_index])); + } + return true; +} + +bool Tile::DecodeSuperBlock(ParameterTree* const tree, + TileScratchBuffer* const scratch_buffer, + ResidualPtr* residual) { + Stack<ParameterTree*, kDfsStackSize> stack; + stack.Push(tree); + do { + ParameterTree* const node = stack.Pop(); + if (node->partition() != kPartitionNone) { + for (int i = 3; i >= 0; --i) { + if (node->children(i) == nullptr) continue; + stack.Push(node->children(i)); + } + continue; + } + if (!DecodeBlock(node, scratch_buffer, residual)) { + LIBGAV1_DLOG(ERROR, "Error decoding block row: %d column: %d", + node->row4x4(), node->column4x4()); + return false; + } + } while (!stack.Empty()); + return true; +} + +void Tile::ReadLoopRestorationCoefficients(int row4x4, int column4x4, + BlockSize block_size) { + if (frame_header_.allow_intrabc) return; + LoopRestorationInfo* const restoration_info = post_filter_.restoration_info(); + const bool is_superres_scaled = + frame_header_.width != frame_header_.upscaled_width; + for (int plane = kPlaneY; plane < PlaneCount(); ++plane) { + LoopRestorationUnitInfo unit_info; + if (restoration_info->PopulateUnitInfoForSuperBlock( + static_cast<Plane>(plane), block_size, is_superres_scaled, + frame_header_.superres_scale_denominator, row4x4, column4x4, + &unit_info)) { + for (int unit_row = unit_info.row_start; unit_row < unit_info.row_end; + ++unit_row) { + for (int unit_column = unit_info.column_start; + unit_column < unit_info.column_end; ++unit_column) { + const int unit_id = unit_row * restoration_info->num_horizontal_units( + static_cast<Plane>(plane)) + + unit_column; + restoration_info->ReadUnitCoefficients( + &reader_, &symbol_decoder_context_, static_cast<Plane>(plane), + unit_id, &reference_unit_info_); + } + } + } + } +} + +void Tile::StoreMotionFieldMvsIntoCurrentFrame(const Block& block) { + if (frame_header_.refresh_frame_flags == 0 || + IsIntraFrame(frame_header_.frame_type)) { + return; + } + // Iterate over odd rows/columns beginning at the first odd row/column for the + // block. It is done this way because motion field mvs are only needed at a + // 8x8 granularity. + const int row_start4x4 = block.row4x4 | 1; + const int row_limit4x4 = + std::min(block.row4x4 + block.height4x4, frame_header_.rows4x4); + if (row_start4x4 >= row_limit4x4) return; + const int column_start4x4 = block.column4x4 | 1; + const int column_limit4x4 = + std::min(block.column4x4 + block.width4x4, frame_header_.columns4x4); + if (column_start4x4 >= column_limit4x4) return; + + // The largest reference MV component that can be saved. + constexpr int kRefMvsLimit = (1 << 12) - 1; + const BlockParameters& bp = *block.bp; + ReferenceInfo* reference_info = current_frame_.reference_info(); + for (int i = 1; i >= 0; --i) { + const ReferenceFrameType reference_frame_to_store = bp.reference_frame[i]; + // Must make a local copy so that StoreMotionFieldMvs() knows there is no + // overlap between load and store. + const MotionVector mv_to_store = bp.mv.mv[i]; + const int mv_row = std::abs(mv_to_store.mv[MotionVector::kRow]); + const int mv_column = std::abs(mv_to_store.mv[MotionVector::kColumn]); + if (reference_frame_to_store > kReferenceFrameIntra && + // kRefMvsLimit equals 0x07FF, so we can first bitwise OR the two + // absolute values and then compare with kRefMvsLimit to save a branch. + // The next line is equivalent to: + // mv_row <= kRefMvsLimit && mv_column <= kRefMvsLimit + (mv_row | mv_column) <= kRefMvsLimit && + reference_info->relative_distance_from[reference_frame_to_store] < 0) { + const int row_start8x8 = DivideBy2(row_start4x4); + const int row_limit8x8 = DivideBy2(row_limit4x4); + const int column_start8x8 = DivideBy2(column_start4x4); + const int column_limit8x8 = DivideBy2(column_limit4x4); + const int rows = row_limit8x8 - row_start8x8; + const int columns = column_limit8x8 - column_start8x8; + const ptrdiff_t stride = DivideBy2(current_frame_.columns4x4()); + ReferenceFrameType* const reference_frame_row_start = + &reference_info + ->motion_field_reference_frame[row_start8x8][column_start8x8]; + MotionVector* const mv = + &reference_info->motion_field_mv[row_start8x8][column_start8x8]; + + // Specialize columns cases 1, 2, 4, 8 and 16. This makes memset() inlined + // and simplifies std::fill() for these cases. + if (columns <= 1) { + // Don't change the above condition to (columns == 1). + // Condition (columns <= 1) may help the compiler simplify the inlining + // of the general case of StoreMotionFieldMvs() by eliminating the + // (columns == 0) case. + assert(columns == 1); + StoreMotionFieldMvs(reference_frame_to_store, mv_to_store, stride, rows, + 1, reference_frame_row_start, mv); + } else if (columns == 2) { + StoreMotionFieldMvs(reference_frame_to_store, mv_to_store, stride, rows, + 2, reference_frame_row_start, mv); + } else if (columns == 4) { + StoreMotionFieldMvs(reference_frame_to_store, mv_to_store, stride, rows, + 4, reference_frame_row_start, mv); + } else if (columns == 8) { + StoreMotionFieldMvs(reference_frame_to_store, mv_to_store, stride, rows, + 8, reference_frame_row_start, mv); + } else if (columns == 16) { + StoreMotionFieldMvs(reference_frame_to_store, mv_to_store, stride, rows, + 16, reference_frame_row_start, mv); + } else if (columns < 16) { + // This always true condition (columns < 16) may help the compiler + // simplify the inlining of the following function. + // This general case is rare and usually only happens to the blocks + // which contain the right boundary of the frame. + StoreMotionFieldMvs(reference_frame_to_store, mv_to_store, stride, rows, + columns, reference_frame_row_start, mv); + } else { + assert(false); + } + return; + } + } +} + +} // namespace libgav1 |