Import Upstream version 0.16.0

author: qinxialei <xialeiqin@gmail.com> 2020-10-29 11:26:59 +0800
committer: qinxialei <xialeiqin@gmail.com> 2020-10-29 11:26:59 +0800
commit: e8d277081293b6fb2a5d469616baaa7a06f52496 (patch)
tree: 1179bb07d3927d1837d4a90bd81b2034c4c696a9 /src/tile
download: libgav1-e8d277081293b6fb2a5d469616baaa7a06f52496.tar.gz
libgav1-e8d277081293b6fb2a5d469616baaa7a06f52496.tar.bz2
libgav1-e8d277081293b6fb2a5d469616baaa7a06f52496.zip
6 files changed, 5926 insertions, 0 deletions
diff --git a/src/tile/bitstream/mode_info.cc b/src/tile/bitstream/mode_info.cc
new file mode 100644
index 0000000..0b22eb0
--- /dev/null
+++ b/src/tile/bitstream/mode_info.cc
@@ -0,0 +1,1303 @@
+// Copyright 2019 The libgav1 Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <algorithm>
+#include <array>
+#include <cassert>
+#include <cstdint>
+#include <cstdlib>
+#include <cstring>
+#include <memory>
+#include <vector>
+
+#include "src/buffer_pool.h"
+#include "src/dsp/constants.h"
+#include "src/motion_vector.h"
+#include "src/obu_parser.h"
+#include "src/prediction_mask.h"
+#include "src/symbol_decoder_context.h"
+#include "src/tile.h"
+#include "src/utils/array_2d.h"
+#include "src/utils/bit_mask_set.h"
+#include "src/utils/block_parameters_holder.h"
+#include "src/utils/common.h"
+#include "src/utils/constants.h"
+#include "src/utils/entropy_decoder.h"
+#include "src/utils/logging.h"
+#include "src/utils/segmentation.h"
+#include "src/utils/segmentation_map.h"
+#include "src/utils/types.h"
+
+namespace libgav1 {
+namespace {
+
+constexpr int kDeltaQSmall = 3;
+constexpr int kDeltaLfSmall = 3;
+
+constexpr uint8_t kIntraYModeContext[kIntraPredictionModesY] = {
+    0, 1, 2, 3, 4, 4, 4, 4, 3, 0, 1, 2, 0};
+
+constexpr uint8_t kSizeGroup[kMaxBlockSizes] = {
+    0, 0, 0, 0, 1, 1, 1, 0, 1, 2, 2, 2, 1, 2, 3, 3, 2, 3, 3, 3, 3, 3};
+
+constexpr int kCompoundModeNewMvContexts = 5;
+constexpr uint8_t kCompoundModeContextMap[3][kCompoundModeNewMvContexts] = {
+    {0, 1, 1, 1, 1}, {1, 2, 3, 4, 4}, {4, 4, 5, 6, 7}};
+
+enum CflSign : uint8_t {
+  kCflSignZero = 0,
+  kCflSignNegative = 1,
+  kCflSignPositive = 2
+};
+
+// For each possible value of the combined signs (which is read from the
+// bitstream), this array stores the following: sign_u, sign_v, alpha_u_context,
+// alpha_v_context. Only positive entries are used. Entry at index i is computed
+// as follows:
+// sign_u = i / 3
+// sign_v = i % 3
+// alpha_u_context = i - 2
+// alpha_v_context = (sign_v - 1) * 3 + sign_u
+constexpr int8_t kCflAlphaLookup[kCflAlphaSignsSymbolCount][4] = {
+    {0, 1, -2, 0}, {0, 2, -1, 3}, {1, 0, 0, -2}, {1, 1, 1, 1},
+    {1, 2, 2, 4},  {2, 0, 3, -1}, {2, 1, 4, 2},  {2, 2, 5, 5},
+};
+
+constexpr BitMaskSet kPredictionModeHasNearMvMask(kPredictionModeNearMv,
+                                                  kPredictionModeNearNearMv,
+                                                  kPredictionModeNearNewMv,
+                                                  kPredictionModeNewNearMv);
+
+constexpr BitMaskSet kIsInterIntraModeAllowedMask(kBlock8x8, kBlock8x16,
+                                                  kBlock16x8, kBlock16x16,
+                                                  kBlock16x32, kBlock32x16,
+                                                  kBlock32x32);
+
+bool IsBackwardReference(ReferenceFrameType type) {
+  return type >= kReferenceFrameBackward && type <= kReferenceFrameAlternate;
+}
+
+bool IsSameDirectionReferencePair(ReferenceFrameType type1,
+                                  ReferenceFrameType type2) {
+  return (type1 >= kReferenceFrameBackward) ==
+         (type2 >= kReferenceFrameBackward);
+}
+
+// This is called neg_deinterleave() in the spec.
+int DecodeSegmentId(int diff, int reference, int max) {
+  if (reference == 0) return diff;
+  if (reference >= max - 1) return max - diff - 1;
+  const int value = ((diff & 1) != 0) ? reference + ((diff + 1) >> 1)
+                                      : reference - (diff >> 1);
+  const int reference2 = (reference << 1);
+  if (reference2 < max) {
+    return (diff <= reference2) ? value : diff;
+  }
+  return (diff <= ((max - reference - 1) << 1)) ? value : max - (diff + 1);
+}
+
+// This is called DrlCtxStack in section 7.10.2.14 of the spec.
+// In the spec, the weights of all the nearest mvs are incremented by a bonus
+// weight which is larger than any natural weight, and the weights of the mvs
+// are compared with this bonus weight to determine their contexts. We replace
+// this procedure by introducing |nearest_mv_count| in PredictionParameters,
+// which records the count of the nearest mvs. Since all the nearest mvs are in
+// the beginning of the mv stack, the |index| of a mv in the mv stack can be
+// compared with |nearest_mv_count| to get that mv's context.
+int GetRefMvIndexContext(int nearest_mv_count, int index) {
+  if (index + 1 < nearest_mv_count) {
+    return 0;
+  }
+  if (index + 1 == nearest_mv_count) {
+    return 1;
+  }
+  return 2;
+}
+
+// Returns true if both the width and height of the block is less than 64.
+bool IsBlockDimensionLessThan64(BlockSize size) {
+  return size <= kBlock32x32 && size != kBlock16x64;
+}
+
+int GetUseCompoundReferenceContext(const Tile::Block& block) {
+  if (block.top_available[kPlaneY] && block.left_available[kPlaneY]) {
+    if (block.IsTopSingle() && block.IsLeftSingle()) {
+      return static_cast<int>(IsBackwardReference(block.TopReference(0))) ^
+             static_cast<int>(IsBackwardReference(block.LeftReference(0)));
+    }
+    if (block.IsTopSingle()) {
+      return 2 + static_cast<int>(IsBackwardReference(block.TopReference(0)) ||
+                                  block.IsTopIntra());
+    }
+    if (block.IsLeftSingle()) {
+      return 2 + static_cast<int>(IsBackwardReference(block.LeftReference(0)) ||
+                                  block.IsLeftIntra());
+    }
+    return 4;
+  }
+  if (block.top_available[kPlaneY]) {
+    return block.IsTopSingle()
+               ? static_cast<int>(IsBackwardReference(block.TopReference(0)))
+               : 3;
+  }
+  if (block.left_available[kPlaneY]) {
+    return block.IsLeftSingle()
+               ? static_cast<int>(IsBackwardReference(block.LeftReference(0)))
+               : 3;
+  }
+  return 1;
+}
+
+// Calculates count0 by calling block.CountReferences() on the frame types from
+// type0_start to type0_end, inclusive, and summing the results.
+// Calculates count1 by calling block.CountReferences() on the frame types from
+// type1_start to type1_end, inclusive, and summing the results.
+// Compares count0 with count1 and returns 0, 1 or 2.
+//
+// See count_refs and ref_count_ctx in 8.3.2.
+int GetReferenceContext(const Tile::Block& block,
+                        ReferenceFrameType type0_start,
+                        ReferenceFrameType type0_end,
+                        ReferenceFrameType type1_start,
+                        ReferenceFrameType type1_end) {
+  int count0 = 0;
+  int count1 = 0;
+  for (int type = type0_start; type <= type0_end; ++type) {
+    count0 += block.CountReferences(static_cast<ReferenceFrameType>(type));
+  }
+  for (int type = type1_start; type <= type1_end; ++type) {
+    count1 += block.CountReferences(static_cast<ReferenceFrameType>(type));
+  }
+  return (count0 < count1) ? 0 : (count0 == count1 ? 1 : 2);
+}
+
+}  // namespace
+
+bool Tile::ReadSegmentId(const Block& block) {
+  int top_left = -1;
+  if (block.top_available[kPlaneY] && block.left_available[kPlaneY]) {
+    top_left =
+        block_parameters_holder_.Find(block.row4x4 - 1, block.column4x4 - 1)
+            ->segment_id;
+  }
+  int top = -1;
+  if (block.top_available[kPlaneY]) {
+    top = block.bp_top->segment_id;
+  }
+  int left = -1;
+  if (block.left_available[kPlaneY]) {
+    left = block.bp_left->segment_id;
+  }
+  int pred;
+  if (top == -1) {
+    pred = (left == -1) ? 0 : left;
+  } else if (left == -1) {
+    pred = top;
+  } else {
+    pred = (top_left == top) ? top : left;
+  }
+  BlockParameters& bp = *block.bp;
+  if (bp.skip) {
+    bp.segment_id = pred;
+    return true;
+  }
+  int context = 0;
+  if (top_left < 0) {
+    context = 0;
+  } else if (top_left == top && top_left == left) {
+    context = 2;
+  } else if (top_left == top || top_left == left || top == left) {
+    context = 1;
+  }
+  uint16_t* const segment_id_cdf =
+      symbol_decoder_context_.segment_id_cdf[context];
+  const int encoded_segment_id =
+      reader_.ReadSymbol<kMaxSegments>(segment_id_cdf);
+  bp.segment_id =
+      DecodeSegmentId(encoded_segment_id, pred,
+                      frame_header_.segmentation.last_active_segment_id + 1);
+  // Check the bitstream conformance requirement in Section 6.10.8 of the spec.
+  if (bp.segment_id < 0 ||
+      bp.segment_id > frame_header_.segmentation.last_active_segment_id) {
+    LIBGAV1_DLOG(
+        ERROR,
+        "Corrupted segment_ids: encoded %d, last active %d, postprocessed %d",
+        encoded_segment_id, frame_header_.segmentation.last_active_segment_id,
+        bp.segment_id);
+    return false;
+  }
+  return true;
+}
+
+bool Tile::ReadIntraSegmentId(const Block& block) {
+  BlockParameters& bp = *block.bp;
+  if (!frame_header_.segmentation.enabled) {
+    bp.segment_id = 0;
+    return true;
+  }
+  return ReadSegmentId(block);
+}
+
+void Tile::ReadSkip(const Block& block) {
+  BlockParameters& bp = *block.bp;
+  if (frame_header_.segmentation.segment_id_pre_skip &&
+      frame_header_.segmentation.FeatureActive(bp.segment_id,
+                                               kSegmentFeatureSkip)) {
+    bp.skip = true;
+    return;
+  }
+  int context = 0;
+  if (block.top_available[kPlaneY] && block.bp_top->skip) {
+    ++context;
+  }
+  if (block.left_available[kPlaneY] && block.bp_left->skip) {
+    ++context;
+  }
+  uint16_t* const skip_cdf = symbol_decoder_context_.skip_cdf[context];
+  bp.skip = reader_.ReadSymbol(skip_cdf);
+}
+
+void Tile::ReadSkipMode(const Block& block) {
+  BlockParameters& bp = *block.bp;
+  if (!frame_header_.skip_mode_present ||
+      frame_header_.segmentation.FeatureActive(bp.segment_id,
+                                               kSegmentFeatureSkip) ||
+      frame_header_.segmentation.FeatureActive(bp.segment_id,
+                                               kSegmentFeatureReferenceFrame) ||
+      frame_header_.segmentation.FeatureActive(bp.segment_id,
+                                               kSegmentFeatureGlobalMv) ||
+      IsBlockDimension4(block.size)) {
+    bp.skip_mode = false;
+    return;
+  }
+  const int context =
+      (block.left_available[kPlaneY]
+           ? static_cast<int>(block.bp_left->skip_mode)
+           : 0) +
+      (block.top_available[kPlaneY] ? static_cast<int>(block.bp_top->skip_mode)
+                                    : 0);
+  bp.skip_mode =
+      reader_.ReadSymbol(symbol_decoder_context_.skip_mode_cdf[context]);
+}
+
+void Tile::ReadCdef(const Block& block) {
+  BlockParameters& bp = *block.bp;
+  if (bp.skip || frame_header_.coded_lossless ||
+      !sequence_header_.enable_cdef || frame_header_.allow_intrabc) {
+    return;
+  }
+  const int cdef_size4x4 = kNum4x4BlocksWide[kBlock64x64];
+  const int cdef_mask4x4 = ~(cdef_size4x4 - 1);
+  const int row4x4 = block.row4x4 & cdef_mask4x4;
+  const int column4x4 = block.column4x4 & cdef_mask4x4;
+  const int row = DivideBy16(row4x4);
+  const int column = DivideBy16(column4x4);
+  if (cdef_index_[row][column] == -1) {
+    cdef_index_[row][column] =
+        frame_header_.cdef.bits > 0
+            ? static_cast<int16_t>(reader_.ReadLiteral(frame_header_.cdef.bits))
+            : 0;
+    for (int i = row4x4; i < row4x4 + block.height4x4; i += cdef_size4x4) {
+      for (int j = column4x4; j < column4x4 + block.width4x4;
+           j += cdef_size4x4) {
+        cdef_index_[DivideBy16(i)][DivideBy16(j)] = cdef_index_[row][column];
+      }
+    }
+  }
+}
+
+int Tile::ReadAndClipDelta(uint16_t* const cdf, int delta_small, int scale,
+                           int min_value, int max_value, int value) {
+  int abs = reader_.ReadSymbol<kDeltaSymbolCount>(cdf);
+  if (abs == delta_small) {
+    const int remaining_bit_count =
+        static_cast<int>(reader_.ReadLiteral(3)) + 1;
+    const int abs_remaining_bits =
+        static_cast<int>(reader_.ReadLiteral(remaining_bit_count));
+    abs = abs_remaining_bits + (1 << remaining_bit_count) + 1;
+  }
+  if (abs != 0) {
+    const bool sign = static_cast<bool>(reader_.ReadBit());
+    const int scaled_abs = abs << scale;
+    const int reduced_delta = sign ? -scaled_abs : scaled_abs;
+    value += reduced_delta;
+    value = Clip3(value, min_value, max_value);
+  }
+  return value;
+}
+
+void Tile::ReadQuantizerIndexDelta(const Block& block) {
+  assert(read_deltas_);
+  BlockParameters& bp = *block.bp;
+  if ((block.size == SuperBlockSize() && bp.skip)) {
+    return;
+  }
+  current_quantizer_index_ =
+      ReadAndClipDelta(symbol_decoder_context_.delta_q_cdf, kDeltaQSmall,
+                       frame_header_.delta_q.scale, kMinLossyQuantizer,
+                       kMaxQuantizer, current_quantizer_index_);
+}
+
+void Tile::ReadLoopFilterDelta(const Block& block) {
+  assert(read_deltas_);
+  BlockParameters& bp = *block.bp;
+  if (!frame_header_.delta_lf.present ||
+      (block.size == SuperBlockSize() && bp.skip)) {
+    return;
+  }
+  int frame_lf_count = 1;
+  if (frame_header_.delta_lf.multi) {
+    frame_lf_count = kFrameLfCount - (PlaneCount() > 1 ? 0 : 2);
+  }
+  bool recompute_deblock_filter_levels = false;
+  for (int i = 0; i < frame_lf_count; ++i) {
+    uint16_t* const delta_lf_abs_cdf =
+        frame_header_.delta_lf.multi
+            ? symbol_decoder_context_.delta_lf_multi_cdf[i]
+            : symbol_decoder_context_.delta_lf_cdf;
+    const int8_t old_delta_lf = delta_lf_[i];
+    delta_lf_[i] = ReadAndClipDelta(
+        delta_lf_abs_cdf, kDeltaLfSmall, frame_header_.delta_lf.scale,
+        -kMaxLoopFilterValue, kMaxLoopFilterValue, delta_lf_[i]);
+    recompute_deblock_filter_levels =
+        recompute_deblock_filter_levels || (old_delta_lf != delta_lf_[i]);
+  }
+  delta_lf_all_zero_ =
+      (delta_lf_[0] | delta_lf_[1] | delta_lf_[2] | delta_lf_[3]) == 0;
+  if (!delta_lf_all_zero_ && recompute_deblock_filter_levels) {
+    post_filter_.ComputeDeblockFilterLevels(delta_lf_, deblock_filter_levels_);
+  }
+}
+
+void Tile::ReadPredictionModeY(const Block& block, bool intra_y_mode) {
+  uint16_t* cdf;
+  if (intra_y_mode) {
+    const PredictionMode top_mode =
+        block.top_available[kPlaneY] ? block.bp_top->y_mode : kPredictionModeDc;
+    const PredictionMode left_mode = block.left_available[kPlaneY]
+                                         ? block.bp_left->y_mode
+                                         : kPredictionModeDc;
+    const int top_context = kIntraYModeContext[top_mode];
+    const int left_context = kIntraYModeContext[left_mode];
+    cdf = symbol_decoder_context_
+              .intra_frame_y_mode_cdf[top_context][left_context];
+  } else {
+    cdf = symbol_decoder_context_.y_mode_cdf[kSizeGroup[block.size]];
+  }
+  block.bp->y_mode = static_cast<PredictionMode>(
+      reader_.ReadSymbol<kIntraPredictionModesY>(cdf));
+}
+
+void Tile::ReadIntraAngleInfo(const Block& block, PlaneType plane_type) {
+  BlockParameters& bp = *block.bp;
+  PredictionParameters& prediction_parameters =
+      *block.bp->prediction_parameters;
+  prediction_parameters.angle_delta[plane_type] = 0;
+  const PredictionMode mode =
+      (plane_type == kPlaneTypeY) ? bp.y_mode : bp.uv_mode;
+  if (IsBlockSmallerThan8x8(block.size) || !IsDirectionalMode(mode)) return;
+  uint16_t* const cdf =
+      symbol_decoder_context_.angle_delta_cdf[mode - kPredictionModeVertical];
+  prediction_parameters.angle_delta[plane_type] =
+      reader_.ReadSymbol<kAngleDeltaSymbolCount>(cdf);
+  prediction_parameters.angle_delta[plane_type] -= kMaxAngleDelta;
+}
+
+void Tile::ReadCflAlpha(const Block& block) {
+  const int signs = reader_.ReadSymbol<kCflAlphaSignsSymbolCount>(
+      symbol_decoder_context_.cfl_alpha_signs_cdf);
+  const int8_t* const cfl_lookup = kCflAlphaLookup[signs];
+  const auto sign_u = static_cast<CflSign>(cfl_lookup[0]);
+  const auto sign_v = static_cast<CflSign>(cfl_lookup[1]);
+  PredictionParameters& prediction_parameters =
+      *block.bp->prediction_parameters;
+  prediction_parameters.cfl_alpha_u = 0;
+  if (sign_u != kCflSignZero) {
+    assert(cfl_lookup[2] >= 0);
+    prediction_parameters.cfl_alpha_u =
+        reader_.ReadSymbol<kCflAlphaSymbolCount>(
+            symbol_decoder_context_.cfl_alpha_cdf[cfl_lookup[2]]) +
+        1;
+    if (sign_u == kCflSignNegative) prediction_parameters.cfl_alpha_u *= -1;
+  }
+  prediction_parameters.cfl_alpha_v = 0;
+  if (sign_v != kCflSignZero) {
+    assert(cfl_lookup[3] >= 0);
+    prediction_parameters.cfl_alpha_v =
+        reader_.ReadSymbol<kCflAlphaSymbolCount>(
+            symbol_decoder_context_.cfl_alpha_cdf[cfl_lookup[3]]) +
+        1;
+    if (sign_v == kCflSignNegative) prediction_parameters.cfl_alpha_v *= -1;
+  }
+}
+
+void Tile::ReadPredictionModeUV(const Block& block) {
+  BlockParameters& bp = *block.bp;
+  bool chroma_from_luma_allowed;
+  if (frame_header_.segmentation.lossless[bp.segment_id]) {
+    chroma_from_luma_allowed = block.residual_size[kPlaneU] == kBlock4x4;
+  } else {
+    chroma_from_luma_allowed = IsBlockDimensionLessThan64(block.size);
+  }
+  uint16_t* const cdf =
+      symbol_decoder_context_
+          .uv_mode_cdf[static_cast<int>(chroma_from_luma_allowed)][bp.y_mode];
+  if (chroma_from_luma_allowed) {
+    bp.uv_mode = static_cast<PredictionMode>(
+        reader_.ReadSymbol<kIntraPredictionModesUV>(cdf));
+  } else {
+    bp.uv_mode = static_cast<PredictionMode>(
+        reader_.ReadSymbol<kIntraPredictionModesUV - 1>(cdf));
+  }
+}
+
+int Tile::ReadMotionVectorComponent(const Block& block, const int component) {
+  const int context =
+      static_cast<int>(block.bp->prediction_parameters->use_intra_block_copy);
+  const bool sign = reader_.ReadSymbol(
+      symbol_decoder_context_.mv_sign_cdf[component][context]);
+  const int mv_class = reader_.ReadSymbol<kMvClassSymbolCount>(
+      symbol_decoder_context_.mv_class_cdf[component][context]);
+  int magnitude = 1;
+  int value;
+  uint16_t* fraction_cdf;
+  uint16_t* precision_cdf;
+  if (mv_class == 0) {
+    value = static_cast<int>(reader_.ReadSymbol(
+        symbol_decoder_context_.mv_class0_bit_cdf[component][context]));
+    fraction_cdf = symbol_decoder_context_
+                       .mv_class0_fraction_cdf[component][context][value];
+    precision_cdf = symbol_decoder_context_
+                        .mv_class0_high_precision_cdf[component][context];
+  } else {
+    assert(mv_class <= kMvBitSymbolCount);
+    value = 0;
+    for (int i = 0; i < mv_class; ++i) {
+      const int bit = static_cast<int>(reader_.ReadSymbol(
+          symbol_decoder_context_.mv_bit_cdf[component][context][i]));
+      value |= bit << i;
+    }
+    magnitude += 2 << (mv_class + 2);
+    fraction_cdf = symbol_decoder_context_.mv_fraction_cdf[component][context];
+    precision_cdf =
+        symbol_decoder_context_.mv_high_precision_cdf[component][context];
+  }
+  const int fraction =
+      (frame_header_.force_integer_mv == 0)
+          ? reader_.ReadSymbol<kMvFractionSymbolCount>(fraction_cdf)
+          : 3;
+  const int precision =
+      frame_header_.allow_high_precision_mv
+          ? static_cast<int>(reader_.ReadSymbol(precision_cdf))
+          : 1;
+  magnitude += (value << 3) | (fraction << 1) | precision;
+  return sign ? -magnitude : magnitude;
+}
+
+void Tile::ReadMotionVector(const Block& block, int index) {
+  BlockParameters& bp = *block.bp;
+  const int context =
+      static_cast<int>(block.bp->prediction_parameters->use_intra_block_copy);
+  const auto mv_joint =
+      static_cast<MvJointType>(reader_.ReadSymbol<kNumMvJointTypes>(
+          symbol_decoder_context_.mv_joint_cdf[context]));
+  if (mv_joint == kMvJointTypeHorizontalZeroVerticalNonZero ||
+      mv_joint == kMvJointTypeNonZero) {
+    bp.mv.mv[index].mv[0] = ReadMotionVectorComponent(block, 0);
+  }
+  if (mv_joint == kMvJointTypeHorizontalNonZeroVerticalZero ||
+      mv_joint == kMvJointTypeNonZero) {
+    bp.mv.mv[index].mv[1] = ReadMotionVectorComponent(block, 1);
+  }
+}
+
+void Tile::ReadFilterIntraModeInfo(const Block& block) {
+  BlockParameters& bp = *block.bp;
+  PredictionParameters& prediction_parameters =
+      *block.bp->prediction_parameters;
+  prediction_parameters.use_filter_intra = false;
+  if (!sequence_header_.enable_filter_intra || bp.y_mode != kPredictionModeDc ||
+      bp.palette_mode_info.size[kPlaneTypeY] != 0 ||
+      !IsBlockDimensionLessThan64(block.size)) {
+    return;
+  }
+  prediction_parameters.use_filter_intra = reader_.ReadSymbol(
+      symbol_decoder_context_.use_filter_intra_cdf[block.size]);
+  if (prediction_parameters.use_filter_intra) {
+    prediction_parameters.filter_intra_mode = static_cast<FilterIntraPredictor>(
+        reader_.ReadSymbol<kNumFilterIntraPredictors>(
+            symbol_decoder_context_.filter_intra_mode_cdf));
+  }
+}
+
+bool Tile::DecodeIntraModeInfo(const Block& block) {
+  BlockParameters& bp = *block.bp;
+  bp.skip = false;
+  if (frame_header_.segmentation.segment_id_pre_skip &&
+      !ReadIntraSegmentId(block)) {
+    return false;
+  }
+  bp.skip_mode = false;
+  ReadSkip(block);
+  if (!frame_header_.segmentation.segment_id_pre_skip &&
+      !ReadIntraSegmentId(block)) {
+    return false;
+  }
+  ReadCdef(block);
+  if (read_deltas_) {
+    ReadQuantizerIndexDelta(block);
+    ReadLoopFilterDelta(block);
+    read_deltas_ = false;
+  }
+  PredictionParameters& prediction_parameters =
+      *block.bp->prediction_parameters;
+  prediction_parameters.use_intra_block_copy = false;
+  if (frame_header_.allow_intrabc) {
+    prediction_parameters.use_intra_block_copy =
+        reader_.ReadSymbol(symbol_decoder_context_.intra_block_copy_cdf);
+  }
+  if (prediction_parameters.use_intra_block_copy) {
+    bp.is_inter = true;
+    bp.reference_frame[0] = kReferenceFrameIntra;
+    bp.reference_frame[1] = kReferenceFrameNone;
+    bp.y_mode = kPredictionModeDc;
+    bp.uv_mode = kPredictionModeDc;
+    prediction_parameters.motion_mode = kMotionModeSimple;
+    prediction_parameters.compound_prediction_type =
+        kCompoundPredictionTypeAverage;
+    bp.palette_mode_info.size[kPlaneTypeY] = 0;
+    bp.palette_mode_info.size[kPlaneTypeUV] = 0;
+    bp.interpolation_filter[0] = kInterpolationFilterBilinear;
+    bp.interpolation_filter[1] = kInterpolationFilterBilinear;
+    MvContexts dummy_mode_contexts;
+    FindMvStack(block, /*is_compound=*/false, &dummy_mode_contexts);
+    return AssignIntraMv(block);
+  }
+  bp.is_inter = false;
+  return ReadIntraBlockModeInfo(block, /*intra_y_mode=*/true);
+}
+
+int8_t Tile::ComputePredictedSegmentId(const Block& block) const {
+  // If prev_segment_ids_ is null, treat it as if it pointed to a segmentation
+  // map containing all 0s.
+  if (prev_segment_ids_ == nullptr) return 0;
+
+  const int x_limit = std::min(frame_header_.columns4x4 - block.column4x4,
+                               static_cast<int>(block.width4x4));
+  const int y_limit = std::min(frame_header_.rows4x4 - block.row4x4,
+                               static_cast<int>(block.height4x4));
+  int8_t id = 7;
+  for (int y = 0; y < y_limit; ++y) {
+    for (int x = 0; x < x_limit; ++x) {
+      const int8_t prev_segment_id =
+          prev_segment_ids_->segment_id(block.row4x4 + y, block.column4x4 + x);
+      id = std::min(id, prev_segment_id);
+    }
+  }
+  return id;
+}
+
+bool Tile::ReadInterSegmentId(const Block& block, bool pre_skip) {
+  BlockParameters& bp = *block.bp;
+  if (!frame_header_.segmentation.enabled) {
+    bp.segment_id = 0;
+    return true;
+  }
+  if (!frame_header_.segmentation.update_map) {
+    bp.segment_id = ComputePredictedSegmentId(block);
+    return true;
+  }
+  if (pre_skip) {
+    if (!frame_header_.segmentation.segment_id_pre_skip) {
+      bp.segment_id = 0;
+      return true;
+    }
+  } else if (bp.skip) {
+    bp.use_predicted_segment_id = false;
+    return ReadSegmentId(block);
+  }
+  if (frame_header_.segmentation.temporal_update) {
+    const int context =
+        (block.left_available[kPlaneY]
+             ? static_cast<int>(block.bp_left->use_predicted_segment_id)
+             : 0) +
+        (block.top_available[kPlaneY]
+             ? static_cast<int>(block.bp_top->use_predicted_segment_id)
+             : 0);
+    bp.use_predicted_segment_id = reader_.ReadSymbol(
+        symbol_decoder_context_.use_predicted_segment_id_cdf[context]);
+    if (bp.use_predicted_segment_id) {
+      bp.segment_id = ComputePredictedSegmentId(block);
+      return true;
+    }
+  }
+  return ReadSegmentId(block);
+}
+
+void Tile::ReadIsInter(const Block& block) {
+  BlockParameters& bp = *block.bp;
+  if (bp.skip_mode) {
+    bp.is_inter = true;
+    return;
+  }
+  if (frame_header_.segmentation.FeatureActive(bp.segment_id,
+                                               kSegmentFeatureReferenceFrame)) {
+    bp.is_inter =
+        frame_header_.segmentation
+            .feature_data[bp.segment_id][kSegmentFeatureReferenceFrame] !=
+        kReferenceFrameIntra;
+    return;
+  }
+  if (frame_header_.segmentation.FeatureActive(bp.segment_id,
+                                               kSegmentFeatureGlobalMv)) {
+    bp.is_inter = true;
+    return;
+  }
+  int context = 0;
+  if (block.top_available[kPlaneY] && block.left_available[kPlaneY]) {
+    context = (block.IsTopIntra() && block.IsLeftIntra())
+                  ? 3
+                  : static_cast<int>(block.IsTopIntra() || block.IsLeftIntra());
+  } else if (block.top_available[kPlaneY] || block.left_available[kPlaneY]) {
+    context = 2 * static_cast<int>(block.top_available[kPlaneY]
+                                       ? block.IsTopIntra()
+                                       : block.IsLeftIntra());
+  }
+  bp.is_inter =
+      reader_.ReadSymbol(symbol_decoder_context_.is_inter_cdf[context]);
+}
+
+bool Tile::ReadIntraBlockModeInfo(const Block& block, bool intra_y_mode) {
+  BlockParameters& bp = *block.bp;
+  bp.reference_frame[0] = kReferenceFrameIntra;
+  bp.reference_frame[1] = kReferenceFrameNone;
+  ReadPredictionModeY(block, intra_y_mode);
+  ReadIntraAngleInfo(block, kPlaneTypeY);
+  if (block.HasChroma()) {
+    ReadPredictionModeUV(block);
+    if (bp.uv_mode == kPredictionModeChromaFromLuma) {
+      ReadCflAlpha(block);
+    }
+    ReadIntraAngleInfo(block, kPlaneTypeUV);
+  }
+  ReadPaletteModeInfo(block);
+  ReadFilterIntraModeInfo(block);
+  return true;
+}
+
+CompoundReferenceType Tile::ReadCompoundReferenceType(const Block& block) {
+  // compound and inter.
+  const bool top_comp_inter = block.top_available[kPlaneY] &&
+                              !block.IsTopIntra() && !block.IsTopSingle();
+  const bool left_comp_inter = block.left_available[kPlaneY] &&
+                               !block.IsLeftIntra() && !block.IsLeftSingle();
+  // unidirectional compound.
+  const bool top_uni_comp =
+      top_comp_inter && IsSameDirectionReferencePair(block.TopReference(0),
+                                                     block.TopReference(1));
+  const bool left_uni_comp =
+      left_comp_inter && IsSameDirectionReferencePair(block.LeftReference(0),
+                                                      block.LeftReference(1));
+  int context;
+  if (block.top_available[kPlaneY] && !block.IsTopIntra() &&
+      block.left_available[kPlaneY] && !block.IsLeftIntra()) {
+    const int same_direction = static_cast<int>(IsSameDirectionReferencePair(
+        block.TopReference(0), block.LeftReference(0)));
+    if (!top_comp_inter && !left_comp_inter) {
+      context = 1 + MultiplyBy2(same_direction);
+    } else if (!top_comp_inter) {
+      context = left_uni_comp ? 3 + same_direction : 1;
+    } else if (!left_comp_inter) {
+      context = top_uni_comp ? 3 + same_direction : 1;
+    } else {
+      if (!top_uni_comp && !left_uni_comp) {
+        context = 0;
+      } else if (!top_uni_comp || !left_uni_comp) {
+        context = 2;
+      } else {
+        context = 3 + static_cast<int>(
+                          (block.TopReference(0) == kReferenceFrameBackward) ==
+                          (block.LeftReference(0) == kReferenceFrameBackward));
+      }
+    }
+  } else if (block.top_available[kPlaneY] && block.left_available[kPlaneY]) {
+    if (top_comp_inter) {
+      context = 1 + MultiplyBy2(static_cast<int>(top_uni_comp));
+    } else if (left_comp_inter) {
+      context = 1 + MultiplyBy2(static_cast<int>(left_uni_comp));
+    } else {
+      context = 2;
+    }
+  } else if (top_comp_inter) {
+    context = MultiplyBy4(static_cast<int>(top_uni_comp));
+  } else if (left_comp_inter) {
+    context = MultiplyBy4(static_cast<int>(left_uni_comp));
+  } else {
+    context = 2;
+  }
+  return static_cast<CompoundReferenceType>(reader_.ReadSymbol(
+      symbol_decoder_context_.compound_reference_type_cdf[context]));
+}
+
+template <bool is_single, bool is_backward, int index>
+uint16_t* Tile::GetReferenceCdf(
+    const Block& block,
+    CompoundReferenceType type /*= kNumCompoundReferenceTypes*/) {
+  int context = 0;
+  if ((type == kCompoundReferenceUnidirectional && index == 0) ||
+      (is_single && index == 1)) {
+    // uni_comp_ref and single_ref_p1.
+    context =
+        GetReferenceContext(block, kReferenceFrameLast, kReferenceFrameGolden,
+                            kReferenceFrameBackward, kReferenceFrameAlternate);
+  } else if (type == kCompoundReferenceUnidirectional && index == 1) {
+    // uni_comp_ref_p1.
+    context =
+        GetReferenceContext(block, kReferenceFrameLast2, kReferenceFrameLast2,
+                            kReferenceFrameLast3, kReferenceFrameGolden);
+  } else if ((type == kCompoundReferenceUnidirectional && index == 2) ||
+             (type == kCompoundReferenceBidirectional && index == 2) ||
+             (is_single && index == 5)) {
+    // uni_comp_ref_p2, comp_ref_p2 and single_ref_p5.
+    context =
+        GetReferenceContext(block, kReferenceFrameLast3, kReferenceFrameLast3,
+                            kReferenceFrameGolden, kReferenceFrameGolden);
+  } else if ((type == kCompoundReferenceBidirectional && index == 0) ||
+             (is_single && index == 3)) {
+    // comp_ref and single_ref_p3.
+    context =
+        GetReferenceContext(block, kReferenceFrameLast, kReferenceFrameLast2,
+                            kReferenceFrameLast3, kReferenceFrameGolden);
+  } else if ((type == kCompoundReferenceBidirectional && index == 1) ||
+             (is_single && index == 4)) {
+    // comp_ref_p1 and single_ref_p4.
+    context =
+        GetReferenceContext(block, kReferenceFrameLast, kReferenceFrameLast,
+                            kReferenceFrameLast2, kReferenceFrameLast2);
+  } else if ((is_single && index == 2) || (is_backward && index == 0)) {
+    // single_ref_p2 and comp_bwdref.
+    context = GetReferenceContext(
+        block, kReferenceFrameBackward, kReferenceFrameAlternate2,
+        kReferenceFrameAlternate, kReferenceFrameAlternate);
+  } else if ((is_single && index == 6) || (is_backward && index == 1)) {
+    // single_ref_p6 and comp_bwdref_p1.
+    context = GetReferenceContext(
+        block, kReferenceFrameBackward, kReferenceFrameBackward,
+        kReferenceFrameAlternate2, kReferenceFrameAlternate2);
+  }
+  if (is_single) {
+    // The index parameter for single references is offset by one since the spec
+    // uses 1-based index for these elements.
+    return symbol_decoder_context_.single_reference_cdf[context][index - 1];
+  }
+  if (is_backward) {
+    return symbol_decoder_context_
+        .compound_backward_reference_cdf[context][index];
+  }
+  return symbol_decoder_context_.compound_reference_cdf[type][context][index];
+}
+
+void Tile::ReadReferenceFrames(const Block& block) {
+  BlockParameters& bp = *block.bp;
+  if (bp.skip_mode) {
+    bp.reference_frame[0] = frame_header_.skip_mode_frame[0];
+    bp.reference_frame[1] = frame_header_.skip_mode_frame[1];
+    return;
+  }
+  if (frame_header_.segmentation.FeatureActive(bp.segment_id,
+                                               kSegmentFeatureReferenceFrame)) {
+    bp.reference_frame[0] = static_cast<ReferenceFrameType>(
+        frame_header_.segmentation
+            .feature_data[bp.segment_id][kSegmentFeatureReferenceFrame]);
+    bp.reference_frame[1] = kReferenceFrameNone;
+    return;
+  }
+  if (frame_header_.segmentation.FeatureActive(bp.segment_id,
+                                               kSegmentFeatureSkip) ||
+      frame_header_.segmentation.FeatureActive(bp.segment_id,
+                                               kSegmentFeatureGlobalMv)) {
+    bp.reference_frame[0] = kReferenceFrameLast;
+    bp.reference_frame[1] = kReferenceFrameNone;
+    return;
+  }
+  const bool use_compound_reference =
+      frame_header_.reference_mode_select &&
+      std::min(block.width4x4, block.height4x4) >= 2 &&
+      reader_.ReadSymbol(symbol_decoder_context_.use_compound_reference_cdf
+                             [GetUseCompoundReferenceContext(block)]);
+  if (use_compound_reference) {
+    CompoundReferenceType reference_type = ReadCompoundReferenceType(block);
+    if (reference_type == kCompoundReferenceUnidirectional) {
+      // uni_comp_ref.
+      if (reader_.ReadSymbol(
+              GetReferenceCdf<false, false, 0>(block, reference_type))) {
+        bp.reference_frame[0] = kReferenceFrameBackward;
+        bp.reference_frame[1] = kReferenceFrameAlternate;
+        return;
+      }
+      // uni_comp_ref_p1.
+      if (!reader_.ReadSymbol(
+              GetReferenceCdf<false, false, 1>(block, reference_type))) {
+        bp.reference_frame[0] = kReferenceFrameLast;
+        bp.reference_frame[1] = kReferenceFrameLast2;
+        return;
+      }
+      // uni_comp_ref_p2.
+      if (reader_.ReadSymbol(
+              GetReferenceCdf<false, false, 2>(block, reference_type))) {
+        bp.reference_frame[0] = kReferenceFrameLast;
+        bp.reference_frame[1] = kReferenceFrameGolden;
+        return;
+      }
+      bp.reference_frame[0] = kReferenceFrameLast;
+      bp.reference_frame[1] = kReferenceFrameLast3;
+      return;
+    }
+    assert(reference_type == kCompoundReferenceBidirectional);
+    // comp_ref.
+    if (reader_.ReadSymbol(
+            GetReferenceCdf<false, false, 0>(block, reference_type))) {
+      // comp_ref_p2.
+      bp.reference_frame[0] =
+          reader_.ReadSymbol(
+              GetReferenceCdf<false, false, 2>(block, reference_type))
+              ? kReferenceFrameGolden
+              : kReferenceFrameLast3;
+    } else {
+      // comp_ref_p1.
+      bp.reference_frame[0] =
+          reader_.ReadSymbol(
+              GetReferenceCdf<false, false, 1>(block, reference_type))
+              ? kReferenceFrameLast2
+              : kReferenceFrameLast;
+    }
+    // comp_bwdref.
+    if (reader_.ReadSymbol(GetReferenceCdf<false, true, 0>(block))) {
+      bp.reference_frame[1] = kReferenceFrameAlternate;
+    } else {
+      // comp_bwdref_p1.
+      bp.reference_frame[1] =
+          reader_.ReadSymbol(GetReferenceCdf<false, true, 1>(block))
+              ? kReferenceFrameAlternate2
+              : kReferenceFrameBackward;
+    }
+    return;
+  }
+  assert(!use_compound_reference);
+  bp.reference_frame[1] = kReferenceFrameNone;
+  // single_ref_p1.
+  if (reader_.ReadSymbol(GetReferenceCdf<true, false, 1>(block))) {
+    // single_ref_p2.
+    if (reader_.ReadSymbol(GetReferenceCdf<true, false, 2>(block))) {
+      bp.reference_frame[0] = kReferenceFrameAlternate;
+      return;
+    }
+    // single_ref_p6.
+    bp.reference_frame[0] =
+        reader_.ReadSymbol(GetReferenceCdf<true, false, 6>(block))
+            ? kReferenceFrameAlternate2
+            : kReferenceFrameBackward;
+    return;
+  }
+  // single_ref_p3.
+  if (reader_.ReadSymbol(GetReferenceCdf<true, false, 3>(block))) {
+    // single_ref_p5.
+    bp.reference_frame[0] =
+        reader_.ReadSymbol(GetReferenceCdf<true, false, 5>(block))
+            ? kReferenceFrameGolden
+            : kReferenceFrameLast3;
+    return;
+  }
+  // single_ref_p4.
+  bp.reference_frame[0] =
+      reader_.ReadSymbol(GetReferenceCdf<true, false, 4>(block))
+          ? kReferenceFrameLast2
+          : kReferenceFrameLast;
+}
+
+void Tile::ReadInterPredictionModeY(const Block& block,
+                                    const MvContexts& mode_contexts) {
+  BlockParameters& bp = *block.bp;
+  if (bp.skip_mode) {
+    bp.y_mode = kPredictionModeNearestNearestMv;
+    return;
+  }
+  if (frame_header_.segmentation.FeatureActive(bp.segment_id,
+                                               kSegmentFeatureSkip) ||
+      frame_header_.segmentation.FeatureActive(bp.segment_id,
+                                               kSegmentFeatureGlobalMv)) {
+    bp.y_mode = kPredictionModeGlobalMv;
+    return;
+  }
+  if (bp.reference_frame[1] > kReferenceFrameIntra) {
+    const int idx0 = mode_contexts.reference_mv >> 1;
+    const int idx1 =
+        std::min(mode_contexts.new_mv, kCompoundModeNewMvContexts - 1);
+    const int context = kCompoundModeContextMap[idx0][idx1];
+    const int offset = reader_.ReadSymbol<kNumCompoundInterPredictionModes>(
+        symbol_decoder_context_.compound_prediction_mode_cdf[context]);
+    bp.y_mode =
+        static_cast<PredictionMode>(kPredictionModeNearestNearestMv + offset);
+    return;
+  }
+  // new_mv.
+  if (!reader_.ReadSymbol(
+          symbol_decoder_context_.new_mv_cdf[mode_contexts.new_mv])) {
+    bp.y_mode = kPredictionModeNewMv;
+    return;
+  }
+  // zero_mv.
+  if (!reader_.ReadSymbol(
+          symbol_decoder_context_.zero_mv_cdf[mode_contexts.zero_mv])) {
+    bp.y_mode = kPredictionModeGlobalMv;
+    return;
+  }
+  // ref_mv.
+  bp.y_mode =
+      reader_.ReadSymbol(
+          symbol_decoder_context_.reference_mv_cdf[mode_contexts.reference_mv])
+          ? kPredictionModeNearMv
+          : kPredictionModeNearestMv;
+}
+
+void Tile::ReadRefMvIndex(const Block& block) {
+  BlockParameters& bp = *block.bp;
+  PredictionParameters& prediction_parameters =
+      *block.bp->prediction_parameters;
+  prediction_parameters.ref_mv_index = 0;
+  if (bp.y_mode != kPredictionModeNewMv &&
+      bp.y_mode != kPredictionModeNewNewMv &&
+      !kPredictionModeHasNearMvMask.Contains(bp.y_mode)) {
+    return;
+  }
+  const int start =
+      static_cast<int>(kPredictionModeHasNearMvMask.Contains(bp.y_mode));
+  prediction_parameters.ref_mv_index = start;
+  for (int i = start; i < start + 2; ++i) {
+    if (prediction_parameters.ref_mv_count <= i + 1) break;
+    // drl_mode in the spec.
+    const bool ref_mv_index_bit = reader_.ReadSymbol(
+        symbol_decoder_context_.ref_mv_index_cdf[GetRefMvIndexContext(
+            prediction_parameters.nearest_mv_count, i)]);
+    prediction_parameters.ref_mv_index = i + static_cast<int>(ref_mv_index_bit);
+    if (!ref_mv_index_bit) return;
+  }
+}
+
+void Tile::ReadInterIntraMode(const Block& block, bool is_compound) {
+  BlockParameters& bp = *block.bp;
+  PredictionParameters& prediction_parameters =
+      *block.bp->prediction_parameters;
+  prediction_parameters.inter_intra_mode = kNumInterIntraModes;
+  prediction_parameters.is_wedge_inter_intra = false;
+  if (bp.skip_mode || !sequence_header_.enable_interintra_compound ||
+      is_compound || !kIsInterIntraModeAllowedMask.Contains(block.size)) {
+    return;
+  }
+  // kSizeGroup[block.size] is guaranteed to be non-zero because of the block
+  // size constraint enforced in the above condition.
+  assert(kSizeGroup[block.size] - 1 >= 0);
+  if (!reader_.ReadSymbol(
+          symbol_decoder_context_
+              .is_inter_intra_cdf[kSizeGroup[block.size] - 1])) {
+    prediction_parameters.inter_intra_mode = kNumInterIntraModes;
+    return;
+  }
+  prediction_parameters.inter_intra_mode =
+      static_cast<InterIntraMode>(reader_.ReadSymbol<kNumInterIntraModes>(
+          symbol_decoder_context_
+              .inter_intra_mode_cdf[kSizeGroup[block.size] - 1]));
+  bp.reference_frame[1] = kReferenceFrameIntra;
+  prediction_parameters.angle_delta[kPlaneTypeY] = 0;
+  prediction_parameters.angle_delta[kPlaneTypeUV] = 0;
+  prediction_parameters.use_filter_intra = false;
+  prediction_parameters.is_wedge_inter_intra = reader_.ReadSymbol(
+      symbol_decoder_context_.is_wedge_inter_intra_cdf[block.size]);
+  if (!prediction_parameters.is_wedge_inter_intra) return;
+  prediction_parameters.wedge_index =
+      reader_.ReadSymbol<kWedgeIndexSymbolCount>(
+          symbol_decoder_context_.wedge_index_cdf[block.size]);
+  prediction_parameters.wedge_sign = 0;
+}
+
+void Tile::ReadMotionMode(const Block& block, bool is_compound) {
+  BlockParameters& bp = *block.bp;
+  PredictionParameters& prediction_parameters =
+      *block.bp->prediction_parameters;
+  const auto global_motion_type =
+      frame_header_.global_motion[bp.reference_frame[0]].type;
+  if (bp.skip_mode || !frame_header_.is_motion_mode_switchable ||
+      IsBlockDimension4(block.size) ||
+      (frame_header_.force_integer_mv == 0 &&
+       (bp.y_mode == kPredictionModeGlobalMv ||
+        bp.y_mode == kPredictionModeGlobalGlobalMv) &&
+       global_motion_type > kGlobalMotionTransformationTypeTranslation) ||
+      is_compound || bp.reference_frame[1] == kReferenceFrameIntra ||
+      !block.HasOverlappableCandidates()) {
+    prediction_parameters.motion_mode = kMotionModeSimple;
+    return;
+  }
+  prediction_parameters.num_warp_samples = 0;
+  int num_samples_scanned = 0;
+  memset(prediction_parameters.warp_estimate_candidates, 0,
+         sizeof(prediction_parameters.warp_estimate_candidates));
+  FindWarpSamples(block, &prediction_parameters.num_warp_samples,
+                  &num_samples_scanned,
+                  prediction_parameters.warp_estimate_candidates);
+  if (frame_header_.force_integer_mv != 0 ||
+      prediction_parameters.num_warp_samples == 0 ||
+      !frame_header_.allow_warped_motion || IsScaled(bp.reference_frame[0])) {
+    prediction_parameters.motion_mode =
+        reader_.ReadSymbol(symbol_decoder_context_.use_obmc_cdf[block.size])
+            ? kMotionModeObmc
+            : kMotionModeSimple;
+    return;
+  }
+  prediction_parameters.motion_mode =
+      static_cast<MotionMode>(reader_.ReadSymbol<kNumMotionModes>(
+          symbol_decoder_context_.motion_mode_cdf[block.size]));
+}
+
+uint16_t* Tile::GetIsExplicitCompoundTypeCdf(const Block& block) {
+  int context = 0;
+  if (block.top_available[kPlaneY]) {
+    if (!block.IsTopSingle()) {
+      context += static_cast<int>(block.bp_top->is_explicit_compound_type);
+    } else if (block.TopReference(0) == kReferenceFrameAlternate) {
+      context += 3;
+    }
+  }
+  if (block.left_available[kPlaneY]) {
+    if (!block.IsLeftSingle()) {
+      context += static_cast<int>(block.bp_left->is_explicit_compound_type);
+    } else if (block.LeftReference(0) == kReferenceFrameAlternate) {
+      context += 3;
+    }
+  }
+  return symbol_decoder_context_.is_explicit_compound_type_cdf[std::min(
+      context, kIsExplicitCompoundTypeContexts - 1)];
+}
+
+uint16_t* Tile::GetIsCompoundTypeAverageCdf(const Block& block) {
+  const BlockParameters& bp = *block.bp;
+  const ReferenceInfo& reference_info = *current_frame_.reference_info();
+  const int forward =
+      std::abs(reference_info.relative_distance_from[bp.reference_frame[0]]);
+  const int backward =
+      std::abs(reference_info.relative_distance_from[bp.reference_frame[1]]);
+  int context = (forward == backward) ? 3 : 0;
+  if (block.top_available[kPlaneY]) {
+    if (!block.IsTopSingle()) {
+      context += static_cast<int>(block.bp_top->is_compound_type_average);
+    } else if (block.TopReference(0) == kReferenceFrameAlternate) {
+      ++context;
+    }
+  }
+  if (block.left_available[kPlaneY]) {
+    if (!block.IsLeftSingle()) {
+      context += static_cast<int>(block.bp_left->is_compound_type_average);
+    } else if (block.LeftReference(0) == kReferenceFrameAlternate) {
+      ++context;
+    }
+  }
+  return symbol_decoder_context_.is_compound_type_average_cdf[context];
+}
+
+void Tile::ReadCompoundType(const Block& block, bool is_compound) {
+  BlockParameters& bp = *block.bp;
+  bp.is_explicit_compound_type = false;
+  bp.is_compound_type_average = true;
+  PredictionParameters& prediction_parameters =
+      *block.bp->prediction_parameters;
+  if (bp.skip_mode) {
+    prediction_parameters.compound_prediction_type =
+        kCompoundPredictionTypeAverage;
+    return;
+  }
+  if (is_compound) {
+    if (sequence_header_.enable_masked_compound) {
+      bp.is_explicit_compound_type =
+          reader_.ReadSymbol(GetIsExplicitCompoundTypeCdf(block));
+    }
+    if (bp.is_explicit_compound_type) {
+      if (kIsWedgeCompoundModeAllowed.Contains(block.size)) {
+        // Only kCompoundPredictionTypeWedge and
+        // kCompoundPredictionTypeDiffWeighted are signaled explicitly.
+        prediction_parameters.compound_prediction_type =
+            static_cast<CompoundPredictionType>(reader_.ReadSymbol(
+                symbol_decoder_context_.compound_type_cdf[block.size]));
+      } else {
+        prediction_parameters.compound_prediction_type =
+            kCompoundPredictionTypeDiffWeighted;
+      }
+    } else {
+      if (sequence_header_.enable_jnt_comp) {
+        bp.is_compound_type_average =
+            reader_.ReadSymbol(GetIsCompoundTypeAverageCdf(block));
+        prediction_parameters.compound_prediction_type =
+            bp.is_compound_type_average ? kCompoundPredictionTypeAverage
+                                        : kCompoundPredictionTypeDistance;
+      } else {
+        prediction_parameters.compound_prediction_type =
+            kCompoundPredictionTypeAverage;
+        return;
+      }
+    }
+    if (prediction_parameters.compound_prediction_type ==
+        kCompoundPredictionTypeWedge) {
+      prediction_parameters.wedge_index =
+          reader_.ReadSymbol<kWedgeIndexSymbolCount>(
+              symbol_decoder_context_.wedge_index_cdf[block.size]);
+      prediction_parameters.wedge_sign = static_cast<int>(reader_.ReadBit());
+    } else if (prediction_parameters.compound_prediction_type ==
+               kCompoundPredictionTypeDiffWeighted) {
+      prediction_parameters.mask_is_inverse =
+          static_cast<bool>(reader_.ReadBit());
+    }
+    return;
+  }
+  if (prediction_parameters.inter_intra_mode != kNumInterIntraModes) {
+    prediction_parameters.compound_prediction_type =
+        prediction_parameters.is_wedge_inter_intra
+            ? kCompoundPredictionTypeWedge
+            : kCompoundPredictionTypeIntra;
+    return;
+  }
+  prediction_parameters.compound_prediction_type =
+      kCompoundPredictionTypeAverage;
+}
+
+uint16_t* Tile::GetInterpolationFilterCdf(const Block& block, int direction) {
+  const BlockParameters& bp = *block.bp;
+  int context = MultiplyBy8(direction) +
+                MultiplyBy4(static_cast<int>(bp.reference_frame[1] >
+                                             kReferenceFrameIntra));
+  int top_type = kNumExplicitInterpolationFilters;
+  if (block.top_available[kPlaneY]) {
+    if (block.bp_top->reference_frame[0] == bp.reference_frame[0] ||
+        block.bp_top->reference_frame[1] == bp.reference_frame[0]) {
+      top_type = block.bp_top->interpolation_filter[direction];
+    }
+  }
+  int left_type = kNumExplicitInterpolationFilters;
+  if (block.left_available[kPlaneY]) {
+    if (block.bp_left->reference_frame[0] == bp.reference_frame[0] ||
+        block.bp_left->reference_frame[1] == bp.reference_frame[0]) {
+      left_type = block.bp_left->interpolation_filter[direction];
+    }
+  }
+  if (left_type == top_type) {
+    context += left_type;
+  } else if (left_type == kNumExplicitInterpolationFilters) {
+    context += top_type;
+  } else if (top_type == kNumExplicitInterpolationFilters) {
+    context += left_type;
+  } else {
+    context += kNumExplicitInterpolationFilters;
+  }
+  return symbol_decoder_context_.interpolation_filter_cdf[context];
+}
+
+void Tile::ReadInterpolationFilter(const Block& block) {
+  BlockParameters& bp = *block.bp;
+  if (frame_header_.interpolation_filter != kInterpolationFilterSwitchable) {
+    static_assert(
+        sizeof(bp.interpolation_filter) / sizeof(bp.interpolation_filter[0]) ==
+            2,
+        "Interpolation filter array size is not 2");
+    for (auto& interpolation_filter : bp.interpolation_filter) {
+      interpolation_filter = frame_header_.interpolation_filter;
+    }
+    return;
+  }
+  bool interpolation_filter_present = true;
+  if (bp.skip_mode ||
+      block.bp->prediction_parameters->motion_mode == kMotionModeLocalWarp) {
+    interpolation_filter_present = false;
+  } else if (!IsBlockDimension4(block.size) &&
+             bp.y_mode == kPredictionModeGlobalMv) {
+    interpolation_filter_present =
+        frame_header_.global_motion[bp.reference_frame[0]].type ==
+        kGlobalMotionTransformationTypeTranslation;
+  } else if (!IsBlockDimension4(block.size) &&
+             bp.y_mode == kPredictionModeGlobalGlobalMv) {
+    interpolation_filter_present =
+        frame_header_.global_motion[bp.reference_frame[0]].type ==
+            kGlobalMotionTransformationTypeTranslation ||
+        frame_header_.global_motion[bp.reference_frame[1]].type ==
+            kGlobalMotionTransformationTypeTranslation;
+  }
+  for (int i = 0; i < (sequence_header_.enable_dual_filter ? 2 : 1); ++i) {
+    bp.interpolation_filter[i] =
+        interpolation_filter_present
+            ? static_cast<InterpolationFilter>(
+                  reader_.ReadSymbol<kNumExplicitInterpolationFilters>(
+                      GetInterpolationFilterCdf(block, i)))
+            : kInterpolationFilterEightTap;
+  }
+  if (!sequence_header_.enable_dual_filter) {
+    bp.interpolation_filter[1] = bp.interpolation_filter[0];
+  }
+}
+
+bool Tile::ReadInterBlockModeInfo(const Block& block) {
+  BlockParameters& bp = *block.bp;
+  bp.palette_mode_info.size[kPlaneTypeY] = 0;
+  bp.palette_mode_info.size[kPlaneTypeUV] = 0;
+  ReadReferenceFrames(block);
+  const bool is_compound = bp.reference_frame[1] > kReferenceFrameIntra;
+  MvContexts mode_contexts;
+  FindMvStack(block, is_compound, &mode_contexts);
+  ReadInterPredictionModeY(block, mode_contexts);
+  ReadRefMvIndex(block);
+  if (!AssignInterMv(block, is_compound)) return false;
+  ReadInterIntraMode(block, is_compound);
+  ReadMotionMode(block, is_compound);
+  ReadCompoundType(block, is_compound);
+  ReadInterpolationFilter(block);
+  return true;
+}
+
+bool Tile::DecodeInterModeInfo(const Block& block) {
+  BlockParameters& bp = *block.bp;
+  block.bp->prediction_parameters->use_intra_block_copy = false;
+  bp.skip = false;
+  if (!ReadInterSegmentId(block, /*pre_skip=*/true)) return false;
+  ReadSkipMode(block);
+  if (bp.skip_mode) {
+    bp.skip = true;
+  } else {
+    ReadSkip(block);
+  }
+  if (!frame_header_.segmentation.segment_id_pre_skip &&
+      !ReadInterSegmentId(block, /*pre_skip=*/false)) {
+    return false;
+  }
+  ReadCdef(block);
+  if (read_deltas_) {
+    ReadQuantizerIndexDelta(block);
+    ReadLoopFilterDelta(block);
+    read_deltas_ = false;
+  }
+  ReadIsInter(block);
+  return bp.is_inter ? ReadInterBlockModeInfo(block)
+                     : ReadIntraBlockModeInfo(block, /*intra_y_mode=*/false);
+}
+
+bool Tile::DecodeModeInfo(const Block& block) {
+  return IsIntraFrame(frame_header_.frame_type) ? DecodeIntraModeInfo(block)
+                                                : DecodeInterModeInfo(block);
+}
+
+}  // namespace libgav1
diff --git a/src/tile/bitstream/palette.cc b/src/tile/bitstream/palette.cc
new file mode 100644
index 0000000..674d210
--- /dev/null
+++ b/src/tile/bitstream/palette.cc
@@ -0,0 +1,319 @@
+// Copyright 2019 The libgav1 Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <algorithm>
+#include <cassert>
+#include <cstdint>
+#include <cstring>
+#include <iterator>
+#include <memory>
+
+#include "src/obu_parser.h"
+#include "src/symbol_decoder_context.h"
+#include "src/tile.h"
+#include "src/utils/bit_mask_set.h"
+#include "src/utils/common.h"
+#include "src/utils/constants.h"
+#include "src/utils/entropy_decoder.h"
+#include "src/utils/memory.h"
+#include "src/utils/types.h"
+
+namespace libgav1 {
+
+int Tile::GetPaletteCache(const Block& block, PlaneType plane_type,
+                          uint16_t* const cache) {
+  const int top_size =
+      (block.top_available[kPlaneY] && Mod64(MultiplyBy4(block.row4x4)) != 0)
+          ? block.bp_top->palette_mode_info.size[plane_type]
+          : 0;
+  const int left_size = block.left_available[kPlaneY]
+                            ? block.bp_left->palette_mode_info.size[plane_type]
+                            : 0;
+  if (left_size == 0 && top_size == 0) return 0;
+  // Merge the left and top colors in sorted order and store them in |cache|.
+  uint16_t dummy[1];
+  const uint16_t* top = (top_size > 0)
+                            ? block.bp_top->palette_mode_info.color[plane_type]
+                            : dummy;
+  const uint16_t* left =
+      (left_size > 0) ? block.bp_left->palette_mode_info.color[plane_type]
+                      : dummy;
+  std::merge(top, top + top_size, left, left + left_size, cache);
+  // Deduplicate the entries in |cache| and return the number of unique
+  // entries.
+  return static_cast<int>(
+      std::distance(cache, std::unique(cache, cache + left_size + top_size)));
+}
+
+void Tile::ReadPaletteColors(const Block& block, Plane plane) {
+  const PlaneType plane_type = GetPlaneType(plane);
+  uint16_t cache[2 * kMaxPaletteSize];
+  const int n = GetPaletteCache(block, plane_type, cache);
+  BlockParameters& bp = *block.bp;
+  const uint8_t palette_size = bp.palette_mode_info.size[plane_type];
+  uint16_t* const palette_color = bp.palette_mode_info.color[plane];
+  const int8_t bitdepth = sequence_header_.color_config.bitdepth;
+  int index = 0;
+  for (int i = 0; i < n && index < palette_size; ++i) {
+    if (reader_.ReadBit() != 0) {  // use_palette_color_cache.
+      palette_color[index++] = cache[i];
+    }
+  }
+  const int merge_pivot = index;
+  if (index < palette_size) {
+    palette_color[index++] =
+        static_cast<uint16_t>(reader_.ReadLiteral(bitdepth));
+  }
+  const int max_value = (1 << bitdepth) - 1;
+  if (index < palette_size) {
+    int bits = bitdepth - 3 + static_cast<int>(reader_.ReadLiteral(2));
+    do {
+      const int delta = static_cast<int>(reader_.ReadLiteral(bits)) +
+                        (plane_type == kPlaneTypeY ? 1 : 0);
+      palette_color[index] =
+          std::min(palette_color[index - 1] + delta, max_value);
+      if (palette_color[index] + (plane_type == kPlaneTypeY ? 1 : 0) >=
+          max_value) {
+        // Once the color exceeds max_value, all others can be set to max_value
+        // (since they are computed as a delta on top of the current color and
+        // then clipped).
+        Memset(&palette_color[index + 1], max_value, palette_size - index - 1);
+        break;
+      }
+      const int range = (1 << bitdepth) - palette_color[index] -
+                        (plane_type == kPlaneTypeY ? 1 : 0);
+      bits = std::min(bits, CeilLog2(range));
+    } while (++index < palette_size);
+  }
+  // Palette colors are generated using two ascending arrays. So sorting them is
+  // simply a matter of merging the two sorted portions of the array.
+  std::inplace_merge(palette_color, palette_color + merge_pivot,
+                     palette_color + palette_size);
+  if (plane_type == kPlaneTypeUV) {
+    uint16_t* const palette_color_v = bp.palette_mode_info.color[kPlaneV];
+    if (reader_.ReadBit() != 0) {  // delta_encode_palette_colors_v.
+      const int bits = bitdepth - 4 + static_cast<int>(reader_.ReadLiteral(2));
+      palette_color_v[0] = reader_.ReadLiteral(bitdepth);
+      for (int i = 1; i < palette_size; ++i) {
+        int delta = static_cast<int>(reader_.ReadLiteral(bits));
+        if (delta != 0 && reader_.ReadBit() != 0) delta = -delta;
+        // This line is equivalent to the following lines in the spec:
+        // val = palette_colors_v[ idx - 1 ] + palette_delta_v
+        // if ( val < 0 ) val += maxVal
+        // if ( val >= maxVal ) val -= maxVal
+        // palette_colors_v[ idx ] = Clip1( val )
+        //
+        // The difference is that in the code, max_value is (1 << bitdepth) - 1.
+        // So "& max_value" has the desired effect of computing both the "if"
+        // conditions and the Clip.
+        palette_color_v[i] = (palette_color_v[i - 1] + delta) & max_value;
+      }
+    } else {
+      for (int i = 0; i < palette_size; ++i) {
+        palette_color_v[i] =
+            static_cast<uint16_t>(reader_.ReadLiteral(bitdepth));
+      }
+    }
+  }
+}
+
+void Tile::ReadPaletteModeInfo(const Block& block) {
+  BlockParameters& bp = *block.bp;
+  if (IsBlockSmallerThan8x8(block.size) || block.size > kBlock64x64 ||
+      !frame_header_.allow_screen_content_tools) {
+    bp.palette_mode_info.size[kPlaneTypeY] = 0;
+    bp.palette_mode_info.size[kPlaneTypeUV] = 0;
+    return;
+  }
+  const int block_size_context =
+      k4x4WidthLog2[block.size] + k4x4HeightLog2[block.size] - 2;
+  if (bp.y_mode == kPredictionModeDc) {
+    const int context =
+        static_cast<int>(block.top_available[kPlaneY] &&
+                         block.bp_top->palette_mode_info.size[kPlaneTypeY] >
+                             0) +
+        static_cast<int>(block.left_available[kPlaneY] &&
+                         block.bp_left->palette_mode_info.size[kPlaneTypeY] >
+                             0);
+    const bool has_palette_y = reader_.ReadSymbol(
+        symbol_decoder_context_.has_palette_y_cdf[block_size_context][context]);
+    if (has_palette_y) {
+      bp.palette_mode_info.size[kPlaneTypeY] =
+          kMinPaletteSize +
+          reader_.ReadSymbol<kPaletteSizeSymbolCount>(
+              symbol_decoder_context_.palette_y_size_cdf[block_size_context]);
+      ReadPaletteColors(block, kPlaneY);
+    }
+  }
+  if (bp.uv_mode == kPredictionModeDc && block.HasChroma()) {
+    const int context =
+        static_cast<int>(bp.palette_mode_info.size[kPlaneTypeY] > 0);
+    const bool has_palette_uv =
+        reader_.ReadSymbol(symbol_decoder_context_.has_palette_uv_cdf[context]);
+    if (has_palette_uv) {
+      bp.palette_mode_info.size[kPlaneTypeUV] =
+          kMinPaletteSize +
+          reader_.ReadSymbol<kPaletteSizeSymbolCount>(
+              symbol_decoder_context_.palette_uv_size_cdf[block_size_context]);
+      ReadPaletteColors(block, kPlaneU);
+    }
+  }
+}
+
+void Tile::PopulatePaletteColorContexts(
+    const Block& block, PlaneType plane_type, int i, int start, int end,
+    uint8_t color_order[kMaxPaletteSquare][kMaxPaletteSize],
+    uint8_t color_context[kMaxPaletteSquare]) {
+  const PredictionParameters& prediction_parameters =
+      *block.bp->prediction_parameters;
+  for (int column = start, counter = 0; column >= end; --column, ++counter) {
+    const int row = i - column;
+    assert(row > 0 || column > 0);
+    const uint8_t top =
+        (row > 0)
+            ? prediction_parameters.color_index_map[plane_type][row - 1][column]
+            : 0;
+    const uint8_t left =
+        (column > 0)
+            ? prediction_parameters.color_index_map[plane_type][row][column - 1]
+            : 0;
+    uint8_t index_mask;
+    static_assert(kMaxPaletteSize <= 8, "");
+    int index;
+    if (column <= 0) {
+      color_context[counter] = 0;
+      color_order[counter][0] = top;
+      index_mask = 1 << top;
+      index = 1;
+    } else if (row <= 0) {
+      color_context[counter] = 0;
+      color_order[counter][0] = left;
+      index_mask = 1 << left;
+      index = 1;
+    } else {
+      const uint8_t top_left =
+          prediction_parameters
+              .color_index_map[plane_type][row - 1][column - 1];
+      index_mask = (1 << top) | (1 << left) | (1 << top_left);
+      if (top == left && top == top_left) {
+        color_context[counter] = 4;
+        color_order[counter][0] = top;
+        index = 1;
+      } else if (top == left) {
+        color_context[counter] = 3;
+        color_order[counter][0] = top;
+        color_order[counter][1] = top_left;
+        index = 2;
+      } else if (top == top_left) {
+        color_context[counter] = 2;
+        color_order[counter][0] = top_left;
+        color_order[counter][1] = left;
+        index = 2;
+      } else if (left == top_left) {
+        color_context[counter] = 2;
+        color_order[counter][0] = top_left;
+        color_order[counter][1] = top;
+        index = 2;
+      } else {
+        color_context[counter] = 1;
+        color_order[counter][0] = std::min(top, left);
+        color_order[counter][1] = std::max(top, left);
+        color_order[counter][2] = top_left;
+        index = 3;
+      }
+    }
+    // Even though only the first |palette_size| entries of this array are ever
+    // used, it is faster to populate all 8 because of the vectorization of the
+    // constant sized loop.
+    for (uint8_t j = 0; j < kMaxPaletteSize; ++j) {
+      if (BitMaskSet::MaskContainsValue(index_mask, j)) continue;
+      color_order[counter][index++] = j;
+    }
+  }
+}
+
+bool Tile::ReadPaletteTokens(const Block& block) {
+  const PaletteModeInfo& palette_mode_info = block.bp->palette_mode_info;
+  PredictionParameters& prediction_parameters =
+      *block.bp->prediction_parameters;
+  for (int plane_type = kPlaneTypeY;
+       plane_type < (block.HasChroma() ? kNumPlaneTypes : kPlaneTypeUV);
+       ++plane_type) {
+    const int palette_size = palette_mode_info.size[plane_type];
+    if (palette_size == 0) continue;
+    int block_height = block.height;
+    int block_width = block.width;
+    int screen_height = std::min(
+        block_height, MultiplyBy4(frame_header_.rows4x4 - block.row4x4));
+    int screen_width = std::min(
+        block_width, MultiplyBy4(frame_header_.columns4x4 - block.column4x4));
+    if (plane_type == kPlaneTypeUV) {
+      block_height >>= sequence_header_.color_config.subsampling_y;
+      block_width >>= sequence_header_.color_config.subsampling_x;
+      screen_height >>= sequence_header_.color_config.subsampling_y;
+      screen_width >>= sequence_header_.color_config.subsampling_x;
+      if (block_height < 4) {
+        block_height += 2;
+        screen_height += 2;
+      }
+      if (block_width < 4) {
+        block_width += 2;
+        screen_width += 2;
+      }
+    }
+    if (!prediction_parameters.color_index_map[plane_type].Reset(
+            block_height, block_width, /*zero_initialize=*/false)) {
+      return false;
+    }
+    int first_value = 0;
+    reader_.DecodeUniform(palette_size, &first_value);
+    prediction_parameters.color_index_map[plane_type][0][0] = first_value;
+    for (int i = 1; i < screen_height + screen_width - 1; ++i) {
+      const int start = std::min(i, screen_width - 1);
+      const int end = std::max(0, i - screen_height + 1);
+      uint8_t color_order[kMaxPaletteSquare][kMaxPaletteSize];
+      uint8_t color_context[kMaxPaletteSquare];
+      PopulatePaletteColorContexts(block, static_cast<PlaneType>(plane_type), i,
+                                   start, end, color_order, color_context);
+      for (int j = start, counter = 0; j >= end; --j, ++counter) {
+        uint16_t* const cdf =
+            symbol_decoder_context_
+                .palette_color_index_cdf[plane_type]
+                                        [palette_size - kMinPaletteSize]
+                                        [color_context[counter]];
+        const int color_order_index = reader_.ReadSymbol(cdf, palette_size);
+        prediction_parameters.color_index_map[plane_type][i - j][j] =
+            color_order[counter][color_order_index];
+      }
+    }
+    if (screen_width < block_width) {
+      for (int i = 0; i < screen_height; ++i) {
+        memset(
+            &prediction_parameters.color_index_map[plane_type][i][screen_width],
+            prediction_parameters
+                .color_index_map[plane_type][i][screen_width - 1],
+            block_width - screen_width);
+      }
+    }
+    for (int i = screen_height; i < block_height; ++i) {
+      memcpy(
+          prediction_parameters.color_index_map[plane_type][i],
+          prediction_parameters.color_index_map[plane_type][screen_height - 1],
+          block_width);
+    }
+  }
+  return true;
+}
+
+}  // namespace libgav1
diff --git a/src/tile/bitstream/partition.cc b/src/tile/bitstream/partition.cc
new file mode 100644
index 0000000..f3dbbb0
--- /dev/null
+++ b/src/tile/bitstream/partition.cc
@@ -0,0 +1,148 @@
+// Copyright 2019 The libgav1 Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <cassert>
+#include <cstdint>
+
+#include "src/symbol_decoder_context.h"
+#include "src/tile.h"
+#include "src/utils/block_parameters_holder.h"
+#include "src/utils/common.h"
+#include "src/utils/constants.h"
+#include "src/utils/entropy_decoder.h"
+#include "src/utils/types.h"
+
+namespace libgav1 {
+namespace {
+
+uint16_t PartitionCdfGatherHorizontalAlike(const uint16_t* const partition_cdf,
+                                           BlockSize block_size) {
+  // The spec computes the cdf value using the following formula (not writing
+  // partition_cdf[] and using short forms for partition names for clarity):
+  //   cdf = None - H + V - S + S - HTS + HTS - HBS + HBS - VLS;
+  //   if (block_size != 128x128) {
+  //     cdf += VRS - H4;
+  //   }
+  // After canceling out the repeated terms with opposite signs, we have:
+  //   cdf = None - H + V - VLS;
+  //   if (block_size != 128x128) {
+  //     cdf += VRS - H4;
+  //   }
+  uint16_t cdf = partition_cdf[kPartitionNone] -
+                 partition_cdf[kPartitionHorizontal] +
+                 partition_cdf[kPartitionVertical] -
+                 partition_cdf[kPartitionVerticalWithLeftSplit];
+  if (block_size != kBlock128x128) {
+    cdf += partition_cdf[kPartitionVerticalWithRightSplit] -
+           partition_cdf[kPartitionHorizontal4];
+  }
+  return cdf;
+}
+
+uint16_t PartitionCdfGatherVerticalAlike(const uint16_t* const partition_cdf,
+                                         BlockSize block_size) {
+  // The spec computes the cdf value using the following formula (not writing
+  // partition_cdf[] and using short forms for partition names for clarity):
+  //   cdf = H - V + V - S + HBS - VLS + VLS - VRS + S - HTS;
+  //   if (block_size != 128x128) {
+  //     cdf += H4 - V4;
+  //   }
+  // V4 is always zero. So, after canceling out the repeated terms with opposite
+  // signs, we have:
+  //   cdf = H + HBS - VRS - HTS;
+  //   if (block_size != 128x128) {
+  //     cdf += H4;
+  //   }
+  // VRS is zero for 128x128 blocks. So, further simplifying we have:
+  //   cdf = H + HBS - HTS;
+  //   if (block_size != 128x128) {
+  //     cdf += H4 - VRS;
+  //   }
+  uint16_t cdf = partition_cdf[kPartitionHorizontal] +
+                 partition_cdf[kPartitionHorizontalWithBottomSplit] -
+                 partition_cdf[kPartitionHorizontalWithTopSplit];
+  if (block_size != kBlock128x128) {
+    cdf += partition_cdf[kPartitionHorizontal4] -
+           partition_cdf[kPartitionVerticalWithRightSplit];
+  }
+  return cdf;
+}
+
+}  // namespace
+
+uint16_t* Tile::GetPartitionCdf(int row4x4, int column4x4,
+                                BlockSize block_size) {
+  const int block_size_log2 = k4x4WidthLog2[block_size];
+  int top = 0;
+  if (IsTopInside(row4x4)) {
+    top = static_cast<int>(
+        k4x4WidthLog2[block_parameters_holder_.Find(row4x4 - 1, column4x4)
+                          ->size] < block_size_log2);
+  }
+  int left = 0;
+  if (IsLeftInside(column4x4)) {
+    left = static_cast<int>(
+        k4x4HeightLog2[block_parameters_holder_.Find(row4x4, column4x4 - 1)
+                           ->size] < block_size_log2);
+  }
+  const int context = left * 2 + top;
+  return symbol_decoder_context_.partition_cdf[block_size_log2 - 1][context];
+}
+
+bool Tile::ReadPartition(int row4x4, int column4x4, BlockSize block_size,
+                         bool has_rows, bool has_columns,
+                         Partition* const partition) {
+  if (IsBlockSmallerThan8x8(block_size)) {
+    *partition = kPartitionNone;
+    return true;
+  }
+  if (!has_rows && !has_columns) {
+    *partition = kPartitionSplit;
+    return true;
+  }
+  uint16_t* const partition_cdf =
+      GetPartitionCdf(row4x4, column4x4, block_size);
+  if (partition_cdf == nullptr) {
+    return false;
+  }
+  if (has_rows && has_columns) {
+    const int bsize_log2 = k4x4WidthLog2[block_size];
+    // The partition block size should be 8x8 or above.
+    assert(bsize_log2 > 0);
+    if (bsize_log2 == 1) {
+      *partition = static_cast<Partition>(
+          reader_.ReadSymbol<kPartitionSplit + 1>(partition_cdf));
+    } else if (bsize_log2 == 5) {
+      *partition = static_cast<Partition>(
+          reader_.ReadSymbol<kPartitionVerticalWithRightSplit + 1>(
+              partition_cdf));
+    } else {
+      *partition = static_cast<Partition>(
+          reader_.ReadSymbol<kMaxPartitionTypes>(partition_cdf));
+    }
+  } else if (has_columns) {
+    const uint16_t cdf =
+        PartitionCdfGatherVerticalAlike(partition_cdf, block_size);
+    *partition = reader_.ReadSymbolWithoutCdfUpdate(cdf) ? kPartitionSplit
+                                                         : kPartitionHorizontal;
+  } else {
+    const uint16_t cdf =
+        PartitionCdfGatherHorizontalAlike(partition_cdf, block_size);
+    *partition = reader_.ReadSymbolWithoutCdfUpdate(cdf) ? kPartitionSplit
+                                                         : kPartitionVertical;
+  }
+  return true;
+}
+
+}  // namespace libgav1
diff --git a/src/tile/bitstream/transform_size.cc b/src/tile/bitstream/transform_size.cc
new file mode 100644
index 0000000..b79851d
--- /dev/null
+++ b/src/tile/bitstream/transform_size.cc
@@ -0,0 +1,222 @@
+// Copyright 2019 The libgav1 Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <algorithm>
+#include <cstdint>
+#include <cstring>
+
+#include "src/dsp/constants.h"
+#include "src/obu_parser.h"
+#include "src/symbol_decoder_context.h"
+#include "src/tile.h"
+#include "src/utils/array_2d.h"
+#include "src/utils/block_parameters_holder.h"
+#include "src/utils/common.h"
+#include "src/utils/constants.h"
+#include "src/utils/entropy_decoder.h"
+#include "src/utils/segmentation.h"
+#include "src/utils/stack.h"
+#include "src/utils/types.h"
+
+namespace libgav1 {
+namespace {
+
+constexpr uint8_t kMaxVariableTransformTreeDepth = 2;
+// Max_Tx_Depth array from section 5.11.5 in the spec with the following
+// modification: If the element is not zero, it is subtracted by one. That is
+// the only way in which this array is being used.
+constexpr int kTxDepthCdfIndex[kMaxBlockSizes] = {
+    0, 0, 1, 0, 0, 1, 2, 1, 1, 1, 2, 3, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3};
+
+constexpr TransformSize kMaxTransformSizeRectangle[kMaxBlockSizes] = {
+    kTransformSize4x4,   kTransformSize4x8,   kTransformSize4x16,
+    kTransformSize8x4,   kTransformSize8x8,   kTransformSize8x16,
+    kTransformSize8x32,  kTransformSize16x4,  kTransformSize16x8,
+    kTransformSize16x16, kTransformSize16x32, kTransformSize16x64,
+    kTransformSize32x8,  kTransformSize32x16, kTransformSize32x32,
+    kTransformSize32x64, kTransformSize64x16, kTransformSize64x32,
+    kTransformSize64x64, kTransformSize64x64, kTransformSize64x64,
+    kTransformSize64x64};
+
+TransformSize GetSquareTransformSize(uint8_t pixels) {
+  switch (pixels) {
+    case 128:
+    case 64:
+      return kTransformSize64x64;
+    case 32:
+      return kTransformSize32x32;
+    case 16:
+      return kTransformSize16x16;
+    case 8:
+      return kTransformSize8x8;
+    default:
+      return kTransformSize4x4;
+  }
+}
+
+}  // namespace
+
+int Tile::GetTopTransformWidth(const Block& block, int row4x4, int column4x4,
+                               bool ignore_skip) {
+  if (row4x4 == block.row4x4) {
+    if (!block.top_available[kPlaneY]) return 64;
+    const BlockParameters& bp_top =
+        *block_parameters_holder_.Find(row4x4 - 1, column4x4);
+    if ((ignore_skip || bp_top.skip) && bp_top.is_inter) {
+      return kBlockWidthPixels[bp_top.size];
+    }
+  }
+  return kTransformWidth[inter_transform_sizes_[row4x4 - 1][column4x4]];
+}
+
+int Tile::GetLeftTransformHeight(const Block& block, int row4x4, int column4x4,
+                                 bool ignore_skip) {
+  if (column4x4 == block.column4x4) {
+    if (!block.left_available[kPlaneY]) return 64;
+    const BlockParameters& bp_left =
+        *block_parameters_holder_.Find(row4x4, column4x4 - 1);
+    if ((ignore_skip || bp_left.skip) && bp_left.is_inter) {
+      return kBlockHeightPixels[bp_left.size];
+    }
+  }
+  return kTransformHeight[inter_transform_sizes_[row4x4][column4x4 - 1]];
+}
+
+TransformSize Tile::ReadFixedTransformSize(const Block& block) {
+  BlockParameters& bp = *block.bp;
+  if (frame_header_.segmentation.lossless[bp.segment_id]) {
+    return kTransformSize4x4;
+  }
+  const TransformSize max_rect_tx_size = kMaxTransformSizeRectangle[block.size];
+  const bool allow_select = !bp.skip || !bp.is_inter;
+  if (block.size == kBlock4x4 || !allow_select ||
+      frame_header_.tx_mode != kTxModeSelect) {
+    return max_rect_tx_size;
+  }
+  const int max_tx_width = kTransformWidth[max_rect_tx_size];
+  const int max_tx_height = kTransformHeight[max_rect_tx_size];
+  const int top_width =
+      block.top_available[kPlaneY]
+          ? GetTopTransformWidth(block, block.row4x4, block.column4x4, true)
+          : 0;
+  const int left_height =
+      block.left_available[kPlaneY]
+          ? GetLeftTransformHeight(block, block.row4x4, block.column4x4, true)
+          : 0;
+  const auto context = static_cast<int>(top_width >= max_tx_width) +
+                       static_cast<int>(left_height >= max_tx_height);
+  const int cdf_index = kTxDepthCdfIndex[block.size];
+  uint16_t* const cdf =
+      symbol_decoder_context_.tx_depth_cdf[cdf_index][context];
+  const int tx_depth = (cdf_index == 0)
+                           ? static_cast<int>(reader_.ReadSymbol(cdf))
+                           : reader_.ReadSymbol<3>(cdf);
+  assert(tx_depth < 3);
+  TransformSize tx_size = max_rect_tx_size;
+  if (tx_depth == 0) return tx_size;
+  tx_size = kSplitTransformSize[tx_size];
+  if (tx_depth == 1) return tx_size;
+  return kSplitTransformSize[tx_size];
+}
+
+void Tile::ReadVariableTransformTree(const Block& block, int row4x4,
+                                     int column4x4, TransformSize tx_size) {
+  const uint8_t pixels = std::max(block.width, block.height);
+  const TransformSize max_tx_size = GetSquareTransformSize(pixels);
+  const int context_delta = (kNumSquareTransformSizes - 1 -
+                             TransformSizeToSquareTransformIndex(max_tx_size)) *
+                            6;
+
+  // Branching factor is 4 and maximum depth is 2. So the maximum stack size
+  // necessary is (4 - 1) + 4 = 7.
+  Stack<TransformTreeNode, 7> stack;
+  stack.Push(TransformTreeNode(column4x4, row4x4, tx_size, 0));
+
+  do {
+    TransformTreeNode node = stack.Pop();
+    const int tx_width4x4 = kTransformWidth4x4[node.tx_size];
+    const int tx_height4x4 = kTransformHeight4x4[node.tx_size];
+    if (node.tx_size != kTransformSize4x4 &&
+        node.depth != kMaxVariableTransformTreeDepth) {
+      const auto top =
+          static_cast<int>(GetTopTransformWidth(block, node.y, node.x, false) <
+                           kTransformWidth[node.tx_size]);
+      const auto left = static_cast<int>(
+          GetLeftTransformHeight(block, node.y, node.x, false) <
+          kTransformHeight[node.tx_size]);
+      const int context =
+          static_cast<int>(max_tx_size > kTransformSize8x8 &&
+                           kTransformSizeSquareMax[node.tx_size] !=
+                               max_tx_size) *
+              3 +
+          context_delta + top + left;
+      // tx_split.
+      if (reader_.ReadSymbol(symbol_decoder_context_.tx_split_cdf[context])) {
+        const TransformSize sub_tx_size = kSplitTransformSize[node.tx_size];
+        const int step_width4x4 = kTransformWidth4x4[sub_tx_size];
+        const int step_height4x4 = kTransformHeight4x4[sub_tx_size];
+        // The loops have to run in reverse order because we use a stack for
+        // DFS.
+        for (int i = tx_height4x4 - step_height4x4; i >= 0;
+             i -= step_height4x4) {
+          for (int j = tx_width4x4 - step_width4x4; j >= 0;
+               j -= step_width4x4) {
+            if (node.y + i >= frame_header_.rows4x4 ||
+                node.x + j >= frame_header_.columns4x4) {
+              continue;
+            }
+            stack.Push(TransformTreeNode(node.x + j, node.y + i, sub_tx_size,
+                                         node.depth + 1));
+          }
+        }
+        continue;
+      }
+    }
+    // tx_split is false.
+    for (int i = 0; i < tx_height4x4; ++i) {
+      static_assert(sizeof(TransformSize) == 1, "");
+      memset(&inter_transform_sizes_[node.y + i][node.x], node.tx_size,
+             tx_width4x4);
+    }
+    block_parameters_holder_.Find(node.y, node.x)->transform_size =
+        node.tx_size;
+  } while (!stack.Empty());
+}
+
+void Tile::DecodeTransformSize(const Block& block) {
+  BlockParameters& bp = *block.bp;
+  if (frame_header_.tx_mode == kTxModeSelect && block.size > kBlock4x4 &&
+      bp.is_inter && !bp.skip &&
+      !frame_header_.segmentation.lossless[bp.segment_id]) {
+    const TransformSize max_tx_size = kMaxTransformSizeRectangle[block.size];
+    const int tx_width4x4 = kTransformWidth4x4[max_tx_size];
+    const int tx_height4x4 = kTransformHeight4x4[max_tx_size];
+    for (int row = block.row4x4; row < block.row4x4 + block.height4x4;
+         row += tx_height4x4) {
+      for (int column = block.column4x4;
+           column < block.column4x4 + block.width4x4; column += tx_width4x4) {
+        ReadVariableTransformTree(block, row, column, max_tx_size);
+      }
+    }
+  } else {
+    bp.transform_size = ReadFixedTransformSize(block);
+    for (int row = block.row4x4; row < block.row4x4 + block.height4x4; ++row) {
+      static_assert(sizeof(TransformSize) == 1, "");
+      memset(&inter_transform_sizes_[row][block.column4x4], bp.transform_size,
+             block.width4x4);
+    }
+  }
+}
+
+}  // namespace libgav1
diff --git a/src/tile/prediction.cc b/src/tile/prediction.cc
new file mode 100644
index 0000000..c5560a6
--- /dev/null
+++ b/src/tile/prediction.cc
@@ -0,0 +1,1361 @@
+// Copyright 2019 The libgav1 Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <algorithm>
+#include <array>
+#include <cassert>
+#include <cstddef>
+#include <cstdint>
+#include <cstdlib>
+#include <cstring>
+#include <memory>
+
+#include "src/buffer_pool.h"
+#include "src/dsp/constants.h"
+#include "src/dsp/dsp.h"
+#include "src/motion_vector.h"
+#include "src/obu_parser.h"
+#include "src/prediction_mask.h"
+#include "src/tile.h"
+#include "src/utils/array_2d.h"
+#include "src/utils/bit_mask_set.h"
+#include "src/utils/block_parameters_holder.h"
+#include "src/utils/common.h"
+#include "src/utils/constants.h"
+#include "src/utils/logging.h"
+#include "src/utils/memory.h"
+#include "src/utils/types.h"
+#include "src/warp_prediction.h"
+#include "src/yuv_buffer.h"
+
+namespace libgav1 {
+namespace {
+
+// Import all the constants in the anonymous namespace.
+#include "src/inter_intra_masks.inc"
+
+// Precision bits when scaling reference frames.
+constexpr int kReferenceScaleShift = 14;
+constexpr int kAngleStep = 3;
+constexpr int kPredictionModeToAngle[kIntraPredictionModesUV] = {
+    0, 90, 180, 45, 135, 113, 157, 203, 67, 0, 0, 0, 0};
+
+// The following modes need both the left_column and top_row for intra
+// prediction. For directional modes left/top requirement is inferred based on
+// the prediction angle. For Dc modes, left/top requirement is inferred based on
+// whether or not left/top is available.
+constexpr BitMaskSet kNeedsLeftAndTop(kPredictionModeSmooth,
+                                      kPredictionModeSmoothHorizontal,
+                                      kPredictionModeSmoothVertical,
+                                      kPredictionModePaeth);
+
+int16_t GetDirectionalIntraPredictorDerivative(const int angle) {
+  assert(angle >= 3);
+  assert(angle <= 87);
+  return kDirectionalIntraPredictorDerivative[DivideBy2(angle) - 1];
+}
+
+// Maps the block_size to an index as follows:
+//  kBlock8x8 => 0.
+//  kBlock8x16 => 1.
+//  kBlock8x32 => 2.
+//  kBlock16x8 => 3.
+//  kBlock16x16 => 4.
+//  kBlock16x32 => 5.
+//  kBlock32x8 => 6.
+//  kBlock32x16 => 7.
+//  kBlock32x32 => 8.
+int GetWedgeBlockSizeIndex(BlockSize block_size) {
+  assert(block_size >= kBlock8x8);
+  return block_size - kBlock8x8 - static_cast<int>(block_size >= kBlock16x8) -
+         static_cast<int>(block_size >= kBlock32x8);
+}
+
+// Maps a dimension of 4, 8, 16 and 32 to indices 0, 1, 2 and 3 respectively.
+int GetInterIntraMaskLookupIndex(int dimension) {
+  assert(dimension == 4 || dimension == 8 || dimension == 16 ||
+         dimension == 32);
+  return FloorLog2(dimension) - 2;
+}
+
+// 7.11.2.9.
+int GetIntraEdgeFilterStrength(int width, int height, int filter_type,
+                               int delta) {
+  const int sum = width + height;
+  delta = std::abs(delta);
+  if (filter_type == 0) {
+    if (sum <= 8) {
+      if (delta >= 56) return 1;
+    } else if (sum <= 16) {
+      if (delta >= 40) return 1;
+    } else if (sum <= 24) {
+      if (delta >= 32) return 3;
+      if (delta >= 16) return 2;
+      if (delta >= 8) return 1;
+    } else if (sum <= 32) {
+      if (delta >= 32) return 3;
+      if (delta >= 4) return 2;
+      return 1;
+    } else {
+      return 3;
+    }
+  } else {
+    if (sum <= 8) {
+      if (delta >= 64) return 2;
+      if (delta >= 40) return 1;
+    } else if (sum <= 16) {
+      if (delta >= 48) return 2;
+      if (delta >= 20) return 1;
+    } else if (sum <= 24) {
+      if (delta >= 4) return 3;
+    } else {
+      return 3;
+    }
+  }
+  return 0;
+}
+
+// 7.11.2.10.
+bool DoIntraEdgeUpsampling(int width, int height, int filter_type, int delta) {
+  const int sum = width + height;
+  delta = std::abs(delta);
+  // This function should not be called when the prediction angle is 90 or 180.
+  assert(delta != 0);
+  if (delta >= 40) return false;
+  return (filter_type == 1) ? sum <= 8 : sum <= 16;
+}
+
+constexpr uint8_t kQuantizedDistanceWeight[4][2] = {
+    {2, 3}, {2, 5}, {2, 7}, {1, kMaxFrameDistance}};
+
+constexpr uint8_t kQuantizedDistanceLookup[4][2] = {
+    {9, 7}, {11, 5}, {12, 4}, {13, 3}};
+
+void GetDistanceWeights(const int distance[2], int weight[2]) {
+  // Note: distance[0] and distance[1] correspond to relative distance
+  // between current frame and reference frame [1] and [0], respectively.
+  const int order = static_cast<int>(distance[0] <= distance[1]);
+  if (distance[0] == 0 || distance[1] == 0) {
+    weight[0] = kQuantizedDistanceLookup[3][order];
+    weight[1] = kQuantizedDistanceLookup[3][1 - order];
+  } else {
+    int i;
+    for (i = 0; i < 3; ++i) {
+      const int weight_0 = kQuantizedDistanceWeight[i][order];
+      const int weight_1 = kQuantizedDistanceWeight[i][1 - order];
+      if (order == 0) {
+        if (distance[0] * weight_0 < distance[1] * weight_1) break;
+      } else {
+        if (distance[0] * weight_0 > distance[1] * weight_1) break;
+      }
+    }
+    weight[0] = kQuantizedDistanceLookup[i][order];
+    weight[1] = kQuantizedDistanceLookup[i][1 - order];
+  }
+}
+
+dsp::IntraPredictor GetIntraPredictor(PredictionMode mode, bool has_left,
+                                      bool has_top) {
+  if (mode == kPredictionModeDc) {
+    if (has_left && has_top) {
+      return dsp::kIntraPredictorDc;
+    }
+    if (has_left) {
+      return dsp::kIntraPredictorDcLeft;
+    }
+    if (has_top) {
+      return dsp::kIntraPredictorDcTop;
+    }
+    return dsp::kIntraPredictorDcFill;
+  }
+  switch (mode) {
+    case kPredictionModePaeth:
+      return dsp::kIntraPredictorPaeth;
+    case kPredictionModeSmooth:
+      return dsp::kIntraPredictorSmooth;
+    case kPredictionModeSmoothVertical:
+      return dsp::kIntraPredictorSmoothVertical;
+    case kPredictionModeSmoothHorizontal:
+      return dsp::kIntraPredictorSmoothHorizontal;
+    default:
+      return dsp::kNumIntraPredictors;
+  }
+}
+
+uint8_t* GetStartPoint(Array2DView<uint8_t>* const buffer, const int plane,
+                       const int x, const int y, const int bitdepth) {
+#if LIBGAV1_MAX_BITDEPTH >= 10
+  if (bitdepth > 8) {
+    Array2DView<uint16_t> buffer16(
+        buffer[plane].rows(), buffer[plane].columns() / sizeof(uint16_t),
+        reinterpret_cast<uint16_t*>(&buffer[plane][0][0]));
+    return reinterpret_cast<uint8_t*>(&buffer16[y][x]);
+  }
+#endif  // LIBGAV1_MAX_BITDEPTH >= 10
+  static_cast<void>(bitdepth);
+  return &buffer[plane][y][x];
+}
+
+int GetPixelPositionFromHighScale(int start, int step, int offset) {
+  return (start + step * offset) >> kScaleSubPixelBits;
+}
+
+dsp::MaskBlendFunc GetMaskBlendFunc(const dsp::Dsp& dsp, bool is_inter_intra,
+                                    bool is_wedge_inter_intra,
+                                    int subsampling_x, int subsampling_y) {
+  return (is_inter_intra && !is_wedge_inter_intra)
+             ? dsp.mask_blend[0][/*is_inter_intra=*/true]
+             : dsp.mask_blend[subsampling_x + subsampling_y][is_inter_intra];
+}
+
+}  // namespace
+
+template <typename Pixel>
+void Tile::IntraPrediction(const Block& block, Plane plane, int x, int y,
+                           bool has_left, bool has_top, bool has_top_right,
+                           bool has_bottom_left, PredictionMode mode,
+                           TransformSize tx_size) {
+  const int width = 1 << kTransformWidthLog2[tx_size];
+  const int height = 1 << kTransformHeightLog2[tx_size];
+  const int x_shift = subsampling_x_[plane];
+  const int y_shift = subsampling_y_[plane];
+  const int max_x = (MultiplyBy4(frame_header_.columns4x4) >> x_shift) - 1;
+  const int max_y = (MultiplyBy4(frame_header_.rows4x4) >> y_shift) - 1;
+  // For performance reasons, do not initialize the following two buffers.
+  alignas(kMaxAlignment) Pixel top_row_data[160];
+  alignas(kMaxAlignment) Pixel left_column_data[160];
+#if LIBGAV1_MSAN
+  if (IsDirectionalMode(mode)) {
+    memset(top_row_data, 0, sizeof(top_row_data));
+    memset(left_column_data, 0, sizeof(left_column_data));
+  }
+#endif
+  // Some predictors use |top_row_data| and |left_column_data| with a negative
+  // offset to access pixels to the top-left of the current block. So have some
+  // space before the arrays to allow populating those without having to move
+  // the rest of the array.
+  Pixel* const top_row = top_row_data + 16;
+  Pixel* const left_column = left_column_data + 16;
+  const int bitdepth = sequence_header_.color_config.bitdepth;
+  const int top_and_left_size = width + height;
+  const bool is_directional_mode = IsDirectionalMode(mode);
+  const PredictionParameters& prediction_parameters =
+      *block.bp->prediction_parameters;
+  const bool use_filter_intra =
+      (plane == kPlaneY && prediction_parameters.use_filter_intra);
+  const int prediction_angle =
+      is_directional_mode
+          ? kPredictionModeToAngle[mode] +
+                prediction_parameters.angle_delta[GetPlaneType(plane)] *
+                    kAngleStep
+          : 0;
+  // Directional prediction requires buffers larger than the width or height.
+  const int top_size = is_directional_mode ? top_and_left_size : width;
+  const int left_size = is_directional_mode ? top_and_left_size : height;
+  const int top_right_size =
+      is_directional_mode ? (has_top_right ? 2 : 1) * width : width;
+  const int bottom_left_size =
+      is_directional_mode ? (has_bottom_left ? 2 : 1) * height : height;
+
+  Array2DView<Pixel> buffer(buffer_[plane].rows(),
+                            buffer_[plane].columns() / sizeof(Pixel),
+                            reinterpret_cast<Pixel*>(&buffer_[plane][0][0]));
+  const bool needs_top = use_filter_intra || kNeedsLeftAndTop.Contains(mode) ||
+                         (is_directional_mode && prediction_angle < 180) ||
+                         (mode == kPredictionModeDc && has_top);
+  const bool needs_left = use_filter_intra || kNeedsLeftAndTop.Contains(mode) ||
+                          (is_directional_mode && prediction_angle > 90) ||
+                          (mode == kPredictionModeDc && has_left);
+
+  const Pixel* top_row_src = buffer[y - 1];
+
+  // Determine if we need to retrieve the top row from
+  // |intra_prediction_buffer_|.
+  if ((needs_top || needs_left) && use_intra_prediction_buffer_) {
+    // Superblock index of block.row4x4. block.row4x4 is always in luma
+    // dimension (no subsampling).
+    const int current_superblock_index =
+        block.row4x4 >> (sequence_header_.use_128x128_superblock ? 5 : 4);
+    // Superblock index of y - 1. y is in the plane dimension (chroma planes
+    // could be subsampled).
+    const int plane_shift = (sequence_header_.use_128x128_superblock ? 7 : 6) -
+                            subsampling_y_[plane];
+    const int top_row_superblock_index = (y - 1) >> plane_shift;
+    // If the superblock index of y - 1 is not that of the current superblock,
+    // then we will have to retrieve the top row from the
+    // |intra_prediction_buffer_|.
+    if (current_superblock_index != top_row_superblock_index) {
+      top_row_src = reinterpret_cast<const Pixel*>(
+          (*intra_prediction_buffer_)[plane].get());
+    }
+  }
+
+  if (needs_top) {
+    // Compute top_row.
+    if (has_top || has_left) {
+      const int left_index = has_left ? x - 1 : x;
+      top_row[-1] = has_top ? top_row_src[left_index] : buffer[y][left_index];
+    } else {
+      top_row[-1] = 1 << (bitdepth - 1);
+    }
+    if (!has_top && has_left) {
+      Memset(top_row, buffer[y][x - 1], top_size);
+    } else if (!has_top && !has_left) {
+      Memset(top_row, (1 << (bitdepth - 1)) - 1, top_size);
+    } else {
+      const int top_limit = std::min(max_x - x + 1, top_right_size);
+      memcpy(top_row, &top_row_src[x], top_limit * sizeof(Pixel));
+      // Even though it is safe to call Memset with a size of 0, accessing
+      // top_row_src[top_limit - x + 1] is not allowed when this condition is
+      // false.
+      if (top_size - top_limit > 0) {
+        Memset(top_row + top_limit, top_row_src[top_limit + x - 1],
+               top_size - top_limit);
+      }
+    }
+  }
+  if (needs_left) {
+    // Compute left_column.
+    if (has_top || has_left) {
+      const int left_index = has_left ? x - 1 : x;
+      left_column[-1] =
+          has_top ? top_row_src[left_index] : buffer[y][left_index];
+    } else {
+      left_column[-1] = 1 << (bitdepth - 1);
+    }
+    if (!has_left && has_top) {
+      Memset(left_column, top_row_src[x], left_size);
+    } else if (!has_left && !has_top) {
+      Memset(left_column, (1 << (bitdepth - 1)) + 1, left_size);
+    } else {
+      const int left_limit = std::min(max_y - y + 1, bottom_left_size);
+      for (int i = 0; i < left_limit; ++i) {
+        left_column[i] = buffer[y + i][x - 1];
+      }
+      // Even though it is safe to call Memset with a size of 0, accessing
+      // buffer[left_limit - y + 1][x - 1] is not allowed when this condition is
+      // false.
+      if (left_size - left_limit > 0) {
+        Memset(left_column + left_limit, buffer[left_limit + y - 1][x - 1],
+               left_size - left_limit);
+      }
+    }
+  }
+  Pixel* const dest = &buffer[y][x];
+  const ptrdiff_t dest_stride = buffer_[plane].columns();
+  if (use_filter_intra) {
+    dsp_.filter_intra_predictor(dest, dest_stride, top_row, left_column,
+                                prediction_parameters.filter_intra_mode, width,
+                                height);
+  } else if (is_directional_mode) {
+    DirectionalPrediction(block, plane, x, y, has_left, has_top, needs_left,
+                          needs_top, prediction_angle, width, height, max_x,
+                          max_y, tx_size, top_row, left_column);
+  } else {
+    const dsp::IntraPredictor predictor =
+        GetIntraPredictor(mode, has_left, has_top);
+    assert(predictor != dsp::kNumIntraPredictors);
+    dsp_.intra_predictors[tx_size][predictor](dest, dest_stride, top_row,
+                                              left_column);
+  }
+}
+
+template void Tile::IntraPrediction<uint8_t>(const Block& block, Plane plane,
+                                             int x, int y, bool has_left,
+                                             bool has_top, bool has_top_right,
+                                             bool has_bottom_left,
+                                             PredictionMode mode,
+                                             TransformSize tx_size);
+#if LIBGAV1_MAX_BITDEPTH >= 10
+template void Tile::IntraPrediction<uint16_t>(const Block& block, Plane plane,
+                                              int x, int y, bool has_left,
+                                              bool has_top, bool has_top_right,
+                                              bool has_bottom_left,
+                                              PredictionMode mode,
+                                              TransformSize tx_size);
+#endif
+
+constexpr BitMaskSet kPredictionModeSmoothMask(kPredictionModeSmooth,
+                                               kPredictionModeSmoothHorizontal,
+                                               kPredictionModeSmoothVertical);
+
+bool Tile::IsSmoothPrediction(int row, int column, Plane plane) const {
+  const BlockParameters& bp = *block_parameters_holder_.Find(row, column);
+  PredictionMode mode;
+  if (plane == kPlaneY) {
+    mode = bp.y_mode;
+  } else {
+    if (bp.reference_frame[0] > kReferenceFrameIntra) return false;
+    mode = bp.uv_mode;
+  }
+  return kPredictionModeSmoothMask.Contains(mode);
+}
+
+int Tile::GetIntraEdgeFilterType(const Block& block, Plane plane) const {
+  const int subsampling_x = subsampling_x_[plane];
+  const int subsampling_y = subsampling_y_[plane];
+  if (block.top_available[plane]) {
+    const int row = block.row4x4 - 1 - (block.row4x4 & subsampling_y);
+    const int column = block.column4x4 + (~block.column4x4 & subsampling_x);
+    if (IsSmoothPrediction(row, column, plane)) return 1;
+  }
+  if (block.left_available[plane]) {
+    const int row = block.row4x4 + (~block.row4x4 & subsampling_y);
+    const int column = block.column4x4 - 1 - (block.column4x4 & subsampling_x);
+    if (IsSmoothPrediction(row, column, plane)) return 1;
+  }
+  return 0;
+}
+
+template <typename Pixel>
+void Tile::DirectionalPrediction(const Block& block, Plane plane, int x, int y,
+                                 bool has_left, bool has_top, bool needs_left,
+                                 bool needs_top, int prediction_angle,
+                                 int width, int height, int max_x, int max_y,
+                                 TransformSize tx_size, Pixel* const top_row,
+                                 Pixel* const left_column) {
+  Array2DView<Pixel> buffer(buffer_[plane].rows(),
+                            buffer_[plane].columns() / sizeof(Pixel),
+                            reinterpret_cast<Pixel*>(&buffer_[plane][0][0]));
+  Pixel* const dest = &buffer[y][x];
+  const ptrdiff_t stride = buffer_[plane].columns();
+  if (prediction_angle == 90) {
+    dsp_.intra_predictors[tx_size][dsp::kIntraPredictorVertical](
+        dest, stride, top_row, left_column);
+    return;
+  }
+  if (prediction_angle == 180) {
+    dsp_.intra_predictors[tx_size][dsp::kIntraPredictorHorizontal](
+        dest, stride, top_row, left_column);
+    return;
+  }
+
+  bool upsampled_top = false;
+  bool upsampled_left = false;
+  if (sequence_header_.enable_intra_edge_filter) {
+    const int filter_type = GetIntraEdgeFilterType(block, plane);
+    if (prediction_angle > 90 && prediction_angle < 180 &&
+        (width + height) >= 24) {
+      // 7.11.2.7.
+      left_column[-1] = top_row[-1] = RightShiftWithRounding(
+          left_column[0] * 5 + top_row[-1] * 6 + top_row[0] * 5, 4);
+    }
+    if (has_top && needs_top) {
+      const int strength = GetIntraEdgeFilterStrength(
+          width, height, filter_type, prediction_angle - 90);
+      if (strength > 0) {
+        const int num_pixels = std::min(width, max_x - x + 1) +
+                               ((prediction_angle < 90) ? height : 0) + 1;
+        dsp_.intra_edge_filter(top_row - 1, num_pixels, strength);
+      }
+    }
+    if (has_left && needs_left) {
+      const int strength = GetIntraEdgeFilterStrength(
+          width, height, filter_type, prediction_angle - 180);
+      if (strength > 0) {
+        const int num_pixels = std::min(height, max_y - y + 1) +
+                               ((prediction_angle > 180) ? width : 0) + 1;
+        dsp_.intra_edge_filter(left_column - 1, num_pixels, strength);
+      }
+    }
+    upsampled_top = DoIntraEdgeUpsampling(width, height, filter_type,
+                                          prediction_angle - 90);
+    if (upsampled_top && needs_top) {
+      const int num_pixels = width + ((prediction_angle < 90) ? height : 0);
+      dsp_.intra_edge_upsampler(top_row, num_pixels);
+    }
+    upsampled_left = DoIntraEdgeUpsampling(width, height, filter_type,
+                                           prediction_angle - 180);
+    if (upsampled_left && needs_left) {
+      const int num_pixels = height + ((prediction_angle > 180) ? width : 0);
+      dsp_.intra_edge_upsampler(left_column, num_pixels);
+    }
+  }
+
+  if (prediction_angle < 90) {
+    const int dx = GetDirectionalIntraPredictorDerivative(prediction_angle);
+    dsp_.directional_intra_predictor_zone1(dest, stride, top_row, width, height,
+                                           dx, upsampled_top);
+  } else if (prediction_angle < 180) {
+    const int dx =
+        GetDirectionalIntraPredictorDerivative(180 - prediction_angle);
+    const int dy =
+        GetDirectionalIntraPredictorDerivative(prediction_angle - 90);
+    dsp_.directional_intra_predictor_zone2(dest, stride, top_row, left_column,
+                                           width, height, dx, dy, upsampled_top,
+                                           upsampled_left);
+  } else {
+    assert(prediction_angle < 270);
+    const int dy =
+        GetDirectionalIntraPredictorDerivative(270 - prediction_angle);
+    dsp_.directional_intra_predictor_zone3(dest, stride, left_column, width,
+                                           height, dy, upsampled_left);
+  }
+}
+
+template <typename Pixel>
+void Tile::PalettePrediction(const Block& block, const Plane plane,
+                             const int start_x, const int start_y, const int x,
+                             const int y, const TransformSize tx_size) {
+  const int tx_width = kTransformWidth[tx_size];
+  const int tx_height = kTransformHeight[tx_size];
+  const uint16_t* const palette = block.bp->palette_mode_info.color[plane];
+  const PlaneType plane_type = GetPlaneType(plane);
+  const int x4 = MultiplyBy4(x);
+  const int y4 = MultiplyBy4(y);
+  Array2DView<Pixel> buffer(buffer_[plane].rows(),
+                            buffer_[plane].columns() / sizeof(Pixel),
+                            reinterpret_cast<Pixel*>(&buffer_[plane][0][0]));
+  for (int row = 0; row < tx_height; ++row) {
+    assert(block.bp->prediction_parameters
+               ->color_index_map[plane_type][y4 + row] != nullptr);
+    for (int column = 0; column < tx_width; ++column) {
+      buffer[start_y + row][start_x + column] =
+          palette[block.bp->prediction_parameters
+                      ->color_index_map[plane_type][y4 + row][x4 + column]];
+    }
+  }
+}
+
+template void Tile::PalettePrediction<uint8_t>(
+    const Block& block, const Plane plane, const int start_x, const int start_y,
+    const int x, const int y, const TransformSize tx_size);
+#if LIBGAV1_MAX_BITDEPTH >= 10
+template void Tile::PalettePrediction<uint16_t>(
+    const Block& block, const Plane plane, const int start_x, const int start_y,
+    const int x, const int y, const TransformSize tx_size);
+#endif
+
+template <typename Pixel>
+void Tile::ChromaFromLumaPrediction(const Block& block, const Plane plane,
+                                    const int start_x, const int start_y,
+                                    const TransformSize tx_size) {
+  const int subsampling_x = subsampling_x_[plane];
+  const int subsampling_y = subsampling_y_[plane];
+  const PredictionParameters& prediction_parameters =
+      *block.bp->prediction_parameters;
+  Array2DView<Pixel> y_buffer(
+      buffer_[kPlaneY].rows(), buffer_[kPlaneY].columns() / sizeof(Pixel),
+      reinterpret_cast<Pixel*>(&buffer_[kPlaneY][0][0]));
+  if (!block.scratch_buffer->cfl_luma_buffer_valid) {
+    const int luma_x = start_x << subsampling_x;
+    const int luma_y = start_y << subsampling_y;
+    dsp_.cfl_subsamplers[tx_size][subsampling_x + subsampling_y](
+        block.scratch_buffer->cfl_luma_buffer,
+        prediction_parameters.max_luma_width - luma_x,
+        prediction_parameters.max_luma_height - luma_y,
+        reinterpret_cast<uint8_t*>(&y_buffer[luma_y][luma_x]),
+        buffer_[kPlaneY].columns());
+    block.scratch_buffer->cfl_luma_buffer_valid = true;
+  }
+  Array2DView<Pixel> buffer(buffer_[plane].rows(),
+                            buffer_[plane].columns() / sizeof(Pixel),
+                            reinterpret_cast<Pixel*>(&buffer_[plane][0][0]));
+  dsp_.cfl_intra_predictors[tx_size](
+      reinterpret_cast<uint8_t*>(&buffer[start_y][start_x]),
+      buffer_[plane].columns(), block.scratch_buffer->cfl_luma_buffer,
+      (plane == kPlaneU) ? prediction_parameters.cfl_alpha_u
+                         : prediction_parameters.cfl_alpha_v);
+}
+
+template void Tile::ChromaFromLumaPrediction<uint8_t>(
+    const Block& block, const Plane plane, const int start_x, const int start_y,
+    const TransformSize tx_size);
+#if LIBGAV1_MAX_BITDEPTH >= 10
+template void Tile::ChromaFromLumaPrediction<uint16_t>(
+    const Block& block, const Plane plane, const int start_x, const int start_y,
+    const TransformSize tx_size);
+#endif
+
+void Tile::InterIntraPrediction(
+    uint16_t* const prediction_0, const uint8_t* const prediction_mask,
+    const ptrdiff_t prediction_mask_stride,
+    const PredictionParameters& prediction_parameters,
+    const int prediction_width, const int prediction_height,
+    const int subsampling_x, const int subsampling_y, uint8_t* const dest,
+    const ptrdiff_t dest_stride) {
+  assert(prediction_mask != nullptr);
+  assert(prediction_parameters.compound_prediction_type ==
+             kCompoundPredictionTypeIntra ||
+         prediction_parameters.compound_prediction_type ==
+             kCompoundPredictionTypeWedge);
+  // The first buffer of InterIntra is from inter prediction.
+  // The second buffer is from intra prediction.
+#if LIBGAV1_MAX_BITDEPTH >= 10
+  if (sequence_header_.color_config.bitdepth > 8) {
+    GetMaskBlendFunc(dsp_, /*is_inter_intra=*/true,
+                     prediction_parameters.is_wedge_inter_intra, subsampling_x,
+                     subsampling_y)(
+        prediction_0, reinterpret_cast<uint16_t*>(dest),
+        dest_stride / sizeof(uint16_t), prediction_mask, prediction_mask_stride,
+        prediction_width, prediction_height, dest, dest_stride);
+    return;
+  }
+#endif
+  const int function_index = prediction_parameters.is_wedge_inter_intra
+                                 ? subsampling_x + subsampling_y
+                                 : 0;
+  // |is_inter_intra| prediction values are stored in a Pixel buffer but it is
+  // currently declared as a uint16_t buffer.
+  // TODO(johannkoenig): convert the prediction buffer to a uint8_t buffer and
+  // remove the reinterpret_cast.
+  dsp_.inter_intra_mask_blend_8bpp[function_index](
+      reinterpret_cast<uint8_t*>(prediction_0), dest, dest_stride,
+      prediction_mask, prediction_mask_stride, prediction_width,
+      prediction_height);
+}
+
+void Tile::CompoundInterPrediction(
+    const Block& block, const uint8_t* const prediction_mask,
+    const ptrdiff_t prediction_mask_stride, const int prediction_width,
+    const int prediction_height, const int subsampling_x,
+    const int subsampling_y, const int candidate_row,
+    const int candidate_column, uint8_t* dest, const ptrdiff_t dest_stride) {
+  const PredictionParameters& prediction_parameters =
+      *block.bp->prediction_parameters;
+
+  void* prediction[2];
+#if LIBGAV1_MAX_BITDEPTH >= 10
+  const int bitdepth = sequence_header_.color_config.bitdepth;
+  if (bitdepth > 8) {
+    prediction[0] = block.scratch_buffer->prediction_buffer[0];
+    prediction[1] = block.scratch_buffer->prediction_buffer[1];
+  } else {
+#endif
+    prediction[0] = block.scratch_buffer->compound_prediction_buffer_8bpp[0];
+    prediction[1] = block.scratch_buffer->compound_prediction_buffer_8bpp[1];
+#if LIBGAV1_MAX_BITDEPTH >= 10
+  }
+#endif
+
+  switch (prediction_parameters.compound_prediction_type) {
+    case kCompoundPredictionTypeWedge:
+    case kCompoundPredictionTypeDiffWeighted:
+      GetMaskBlendFunc(dsp_, /*is_inter_intra=*/false,
+                       prediction_parameters.is_wedge_inter_intra,
+                       subsampling_x, subsampling_y)(
+          prediction[0], prediction[1],
+          /*prediction_stride=*/prediction_width, prediction_mask,
+          prediction_mask_stride, prediction_width, prediction_height, dest,
+          dest_stride);
+      break;
+    case kCompoundPredictionTypeDistance:
+      DistanceWeightedPrediction(prediction[0], prediction[1], prediction_width,
+                                 prediction_height, candidate_row,
+                                 candidate_column, dest, dest_stride);
+      break;
+    default:
+      assert(prediction_parameters.compound_prediction_type ==
+             kCompoundPredictionTypeAverage);
+      dsp_.average_blend(prediction[0], prediction[1], prediction_width,
+                         prediction_height, dest, dest_stride);
+      break;
+  }
+}
+
+GlobalMotion* Tile::GetWarpParams(
+    const Block& block, const Plane plane, const int prediction_width,
+    const int prediction_height,
+    const PredictionParameters& prediction_parameters,
+    const ReferenceFrameType reference_type, bool* const is_local_valid,
+    GlobalMotion* const global_motion_params,
+    GlobalMotion* const local_warp_params) const {
+  if (prediction_width < 8 || prediction_height < 8 ||
+      frame_header_.force_integer_mv == 1) {
+    return nullptr;
+  }
+  if (plane == kPlaneY) {
+    *is_local_valid =
+        prediction_parameters.motion_mode == kMotionModeLocalWarp &&
+        WarpEstimation(
+            prediction_parameters.num_warp_samples, DivideBy4(prediction_width),
+            DivideBy4(prediction_height), block.row4x4, block.column4x4,
+            block.bp->mv.mv[0], prediction_parameters.warp_estimate_candidates,
+            local_warp_params) &&
+        SetupShear(local_warp_params);
+  }
+  if (prediction_parameters.motion_mode == kMotionModeLocalWarp &&
+      *is_local_valid) {
+    return local_warp_params;
+  }
+  if (!IsScaled(reference_type)) {
+    GlobalMotionTransformationType global_motion_type =
+        (reference_type != kReferenceFrameIntra)
+            ? global_motion_params->type
+            : kNumGlobalMotionTransformationTypes;
+    const bool is_global_valid =
+        IsGlobalMvBlock(block.bp->is_global_mv_block, global_motion_type) &&
+        SetupShear(global_motion_params);
+    // Valid global motion type implies reference type can't be intra.
+    assert(!is_global_valid || reference_type != kReferenceFrameIntra);
+    if (is_global_valid) return global_motion_params;
+  }
+  return nullptr;
+}
+
+bool Tile::InterPrediction(const Block& block, const Plane plane, const int x,
+                           const int y, const int prediction_width,
+                           const int prediction_height, int candidate_row,
+                           int candidate_column, bool* const is_local_valid,
+                           GlobalMotion* const local_warp_params) {
+  const int bitdepth = sequence_header_.color_config.bitdepth;
+  const BlockParameters& bp = *block.bp;
+  const BlockParameters& bp_reference =
+      *block_parameters_holder_.Find(candidate_row, candidate_column);
+  const bool is_compound =
+      bp_reference.reference_frame[1] > kReferenceFrameIntra;
+  assert(bp.is_inter);
+  const bool is_inter_intra = bp.reference_frame[1] == kReferenceFrameIntra;
+
+  const PredictionParameters& prediction_parameters =
+      *block.bp->prediction_parameters;
+  uint8_t* const dest = GetStartPoint(buffer_, plane, x, y, bitdepth);
+  const ptrdiff_t dest_stride = buffer_[plane].columns();  // In bytes.
+  for (int index = 0; index < 1 + static_cast<int>(is_compound); ++index) {
+    const ReferenceFrameType reference_type =
+        bp_reference.reference_frame[index];
+    GlobalMotion global_motion_params =
+        frame_header_.global_motion[reference_type];
+    GlobalMotion* warp_params =
+        GetWarpParams(block, plane, prediction_width, prediction_height,
+                      prediction_parameters, reference_type, is_local_valid,
+                      &global_motion_params, local_warp_params);
+    if (warp_params != nullptr) {
+      if (!BlockWarpProcess(block, plane, index, x, y, prediction_width,
+                            prediction_height, warp_params, is_compound,
+                            is_inter_intra, dest, dest_stride)) {
+        return false;
+      }
+    } else {
+      const int reference_index =
+          prediction_parameters.use_intra_block_copy
+              ? -1
+              : frame_header_.reference_frame_index[reference_type -
+                                                    kReferenceFrameLast];
+      if (!BlockInterPrediction(
+              block, plane, reference_index, bp_reference.mv.mv[index], x, y,
+              prediction_width, prediction_height, candidate_row,
+              candidate_column, block.scratch_buffer->prediction_buffer[index],
+              is_compound, is_inter_intra, dest, dest_stride)) {
+        return false;
+      }
+    }
+  }
+
+  const int subsampling_x = subsampling_x_[plane];
+  const int subsampling_y = subsampling_y_[plane];
+  ptrdiff_t prediction_mask_stride = 0;
+  const uint8_t* prediction_mask = nullptr;
+  if (prediction_parameters.compound_prediction_type ==
+      kCompoundPredictionTypeWedge) {
+    const Array2D<uint8_t>& wedge_mask =
+        wedge_masks_[GetWedgeBlockSizeIndex(block.size)]
+                    [prediction_parameters.wedge_sign]
+                    [prediction_parameters.wedge_index];
+    prediction_mask = wedge_mask[0];
+    prediction_mask_stride = wedge_mask.columns();
+  } else if (prediction_parameters.compound_prediction_type ==
+             kCompoundPredictionTypeIntra) {
+    // 7.11.3.13. The inter intra masks are precomputed and stored as a set of
+    // look up tables.
+    assert(prediction_parameters.inter_intra_mode < kNumInterIntraModes);
+    prediction_mask =
+        kInterIntraMasks[prediction_parameters.inter_intra_mode]
+                        [GetInterIntraMaskLookupIndex(prediction_width)]
+                        [GetInterIntraMaskLookupIndex(prediction_height)];
+    prediction_mask_stride = prediction_width;
+  } else if (prediction_parameters.compound_prediction_type ==
+             kCompoundPredictionTypeDiffWeighted) {
+    if (plane == kPlaneY) {
+      assert(prediction_width >= 8);
+      assert(prediction_height >= 8);
+      dsp_.weight_mask[FloorLog2(prediction_width) - 3]
+                      [FloorLog2(prediction_height) - 3]
+                      [static_cast<int>(prediction_parameters.mask_is_inverse)](
+                          block.scratch_buffer->prediction_buffer[0],
+                          block.scratch_buffer->prediction_buffer[1],
+                          block.scratch_buffer->weight_mask,
+                          kMaxSuperBlockSizeInPixels);
+    }
+    prediction_mask = block.scratch_buffer->weight_mask;
+    prediction_mask_stride = kMaxSuperBlockSizeInPixels;
+  }
+
+  if (is_compound) {
+    CompoundInterPrediction(block, prediction_mask, prediction_mask_stride,
+                            prediction_width, prediction_height, subsampling_x,
+                            subsampling_y, candidate_row, candidate_column,
+                            dest, dest_stride);
+  } else if (prediction_parameters.motion_mode == kMotionModeObmc) {
+    // Obmc mode is allowed only for single reference (!is_compound).
+    return ObmcPrediction(block, plane, prediction_width, prediction_height);
+  } else if (is_inter_intra) {
+    // InterIntra and obmc must be mutually exclusive.
+    InterIntraPrediction(
+        block.scratch_buffer->prediction_buffer[0], prediction_mask,
+        prediction_mask_stride, prediction_parameters, prediction_width,
+        prediction_height, subsampling_x, subsampling_y, dest, dest_stride);
+  }
+  return true;
+}
+
+bool Tile::ObmcBlockPrediction(const Block& block, const MotionVector& mv,
+                               const Plane plane,
+                               const int reference_frame_index, const int width,
+                               const int height, const int x, const int y,
+                               const int candidate_row,
+                               const int candidate_column,
+                               const ObmcDirection blending_direction) {
+  const int bitdepth = sequence_header_.color_config.bitdepth;
+  // Obmc's prediction needs to be clipped before blending with above/left
+  // prediction blocks.
+  // Obmc prediction is used only when is_compound is false. So it is safe to
+  // use prediction_buffer[1] as a temporary buffer for the Obmc prediction.
+  static_assert(sizeof(block.scratch_buffer->prediction_buffer[1]) >=
+                    64 * 64 * sizeof(uint16_t),
+                "");
+  auto* const obmc_buffer =
+      reinterpret_cast<uint8_t*>(block.scratch_buffer->prediction_buffer[1]);
+  const ptrdiff_t obmc_buffer_stride =
+      (bitdepth == 8) ? width : width * sizeof(uint16_t);
+  if (!BlockInterPrediction(block, plane, reference_frame_index, mv, x, y,
+                            width, height, candidate_row, candidate_column,
+                            nullptr, false, false, obmc_buffer,
+                            obmc_buffer_stride)) {
+    return false;
+  }
+
+  uint8_t* const prediction = GetStartPoint(buffer_, plane, x, y, bitdepth);
+  const ptrdiff_t prediction_stride = buffer_[plane].columns();
+  dsp_.obmc_blend[blending_direction](prediction, prediction_stride, width,
+                                      height, obmc_buffer, obmc_buffer_stride);
+  return true;
+}
+
+bool Tile::ObmcPrediction(const Block& block, const Plane plane,
+                          const int width, const int height) {
+  const int subsampling_x = subsampling_x_[plane];
+  const int subsampling_y = subsampling_y_[plane];
+  if (block.top_available[kPlaneY] &&
+      !IsBlockSmallerThan8x8(block.residual_size[plane])) {
+    const int num_limit = std::min(uint8_t{4}, k4x4WidthLog2[block.size]);
+    const int column4x4_max =
+        std::min(block.column4x4 + block.width4x4, frame_header_.columns4x4);
+    const int candidate_row = block.row4x4 - 1;
+    const int block_start_y = MultiplyBy4(block.row4x4) >> subsampling_y;
+    int column4x4 = block.column4x4;
+    const int prediction_height = std::min(height >> 1, 32 >> subsampling_y);
+    for (int i = 0, step; i < num_limit && column4x4 < column4x4_max;
+         column4x4 += step) {
+      const int candidate_column = column4x4 | 1;
+      const BlockParameters& bp_top =
+          *block_parameters_holder_.Find(candidate_row, candidate_column);
+      const int candidate_block_size = bp_top.size;
+      step = Clip3(kNum4x4BlocksWide[candidate_block_size], 2, 16);
+      if (bp_top.reference_frame[0] > kReferenceFrameIntra) {
+        i++;
+        const int candidate_reference_frame_index =
+            frame_header_.reference_frame_index[bp_top.reference_frame[0] -
+                                                kReferenceFrameLast];
+        const int prediction_width =
+            std::min(width, MultiplyBy4(step) >> subsampling_x);
+        if (!ObmcBlockPrediction(
+                block, bp_top.mv.mv[0], plane, candidate_reference_frame_index,
+                prediction_width, prediction_height,
+                MultiplyBy4(column4x4) >> subsampling_x, block_start_y,
+                candidate_row, candidate_column, kObmcDirectionVertical)) {
+          return false;
+        }
+      }
+    }
+  }
+
+  if (block.left_available[kPlaneY]) {
+    const int num_limit = std::min(uint8_t{4}, k4x4HeightLog2[block.size]);
+    const int row4x4_max =
+        std::min(block.row4x4 + block.height4x4, frame_header_.rows4x4);
+    const int candidate_column = block.column4x4 - 1;
+    int row4x4 = block.row4x4;
+    const int block_start_x = MultiplyBy4(block.column4x4) >> subsampling_x;
+    const int prediction_width = std::min(width >> 1, 32 >> subsampling_x);
+    for (int i = 0, step; i < num_limit && row4x4 < row4x4_max;
+         row4x4 += step) {
+      const int candidate_row = row4x4 | 1;
+      const BlockParameters& bp_left =
+          *block_parameters_holder_.Find(candidate_row, candidate_column);
+      const int candidate_block_size = bp_left.size;
+      step = Clip3(kNum4x4BlocksHigh[candidate_block_size], 2, 16);
+      if (bp_left.reference_frame[0] > kReferenceFrameIntra) {
+        i++;
+        const int candidate_reference_frame_index =
+            frame_header_.reference_frame_index[bp_left.reference_frame[0] -
+                                                kReferenceFrameLast];
+        const int prediction_height =
+            std::min(height, MultiplyBy4(step) >> subsampling_y);
+        if (!ObmcBlockPrediction(
+                block, bp_left.mv.mv[0], plane, candidate_reference_frame_index,
+                prediction_width, prediction_height, block_start_x,
+                MultiplyBy4(row4x4) >> subsampling_y, candidate_row,
+                candidate_column, kObmcDirectionHorizontal)) {
+          return false;
+        }
+      }
+    }
+  }
+  return true;
+}
+
+void Tile::DistanceWeightedPrediction(void* prediction_0, void* prediction_1,
+                                      const int width, const int height,
+                                      const int candidate_row,
+                                      const int candidate_column, uint8_t* dest,
+                                      ptrdiff_t dest_stride) {
+  int distance[2];
+  int weight[2];
+  for (int reference = 0; reference < 2; ++reference) {
+    const BlockParameters& bp =
+        *block_parameters_holder_.Find(candidate_row, candidate_column);
+    // Note: distance[0] and distance[1] correspond to relative distance
+    // between current frame and reference frame [1] and [0], respectively.
+    distance[1 - reference] = std::min(
+        std::abs(static_cast<int>(
+            current_frame_.reference_info()
+                ->relative_distance_from[bp.reference_frame[reference]])),
+        static_cast<int>(kMaxFrameDistance));
+  }
+  GetDistanceWeights(distance, weight);
+
+  dsp_.distance_weighted_blend(prediction_0, prediction_1, weight[0], weight[1],
+                               width, height, dest, dest_stride);
+}
+
+void Tile::ScaleMotionVector(const MotionVector& mv, const Plane plane,
+                             const int reference_frame_index, const int x,
+                             const int y, int* const start_x,
+                             int* const start_y, int* const step_x,
+                             int* const step_y) {
+  const int reference_upscaled_width =
+      (reference_frame_index == -1)
+          ? frame_header_.upscaled_width
+          : reference_frames_[reference_frame_index]->upscaled_width();
+  const int reference_height =
+      (reference_frame_index == -1)
+          ? frame_header_.height
+          : reference_frames_[reference_frame_index]->frame_height();
+  assert(2 * frame_header_.width >= reference_upscaled_width &&
+         2 * frame_header_.height >= reference_height &&
+         frame_header_.width <= 16 * reference_upscaled_width &&
+         frame_header_.height <= 16 * reference_height);
+  const bool is_scaled_x = reference_upscaled_width != frame_header_.width;
+  const bool is_scaled_y = reference_height != frame_header_.height;
+  const int half_sample = 1 << (kSubPixelBits - 1);
+  int orig_x = (x << kSubPixelBits) + ((2 * mv.mv[1]) >> subsampling_x_[plane]);
+  int orig_y = (y << kSubPixelBits) + ((2 * mv.mv[0]) >> subsampling_y_[plane]);
+  const int rounding_offset =
+      DivideBy2(1 << (kScaleSubPixelBits - kSubPixelBits));
+  if (is_scaled_x) {
+    const int scale_x = ((reference_upscaled_width << kReferenceScaleShift) +
+                         DivideBy2(frame_header_.width)) /
+                        frame_header_.width;
+    *step_x = RightShiftWithRoundingSigned(
+        scale_x, kReferenceScaleShift - kScaleSubPixelBits);
+    orig_x += half_sample;
+    // When frame size is 4k and above, orig_x can be above 16 bits, scale_x can
+    // be up to 15 bits. So we use int64_t to hold base_x.
+    const int64_t base_x = static_cast<int64_t>(orig_x) * scale_x -
+                           (half_sample << kReferenceScaleShift);
+    *start_x =
+        RightShiftWithRoundingSigned(
+            base_x, kReferenceScaleShift + kSubPixelBits - kScaleSubPixelBits) +
+        rounding_offset;
+  } else {
+    *step_x = 1 << kScaleSubPixelBits;
+    *start_x = LeftShift(orig_x, 6) + rounding_offset;
+  }
+  if (is_scaled_y) {
+    const int scale_y = ((reference_height << kReferenceScaleShift) +
+                         DivideBy2(frame_header_.height)) /
+                        frame_header_.height;
+    *step_y = RightShiftWithRoundingSigned(
+        scale_y, kReferenceScaleShift - kScaleSubPixelBits);
+    orig_y += half_sample;
+    const int64_t base_y = static_cast<int64_t>(orig_y) * scale_y -
+                           (half_sample << kReferenceScaleShift);
+    *start_y =
+        RightShiftWithRoundingSigned(
+            base_y, kReferenceScaleShift + kSubPixelBits - kScaleSubPixelBits) +
+        rounding_offset;
+  } else {
+    *step_y = 1 << kScaleSubPixelBits;
+    *start_y = LeftShift(orig_y, 6) + rounding_offset;
+  }
+}
+
+// static.
+bool Tile::GetReferenceBlockPosition(
+    const int reference_frame_index, const bool is_scaled, const int width,
+    const int height, const int ref_start_x, const int ref_last_x,
+    const int ref_start_y, const int ref_last_y, const int start_x,
+    const int start_y, const int step_x, const int step_y,
+    const int left_border, const int right_border, const int top_border,
+    const int bottom_border, int* ref_block_start_x, int* ref_block_start_y,
+    int* ref_block_end_x) {
+  *ref_block_start_x = GetPixelPositionFromHighScale(start_x, 0, 0);
+  *ref_block_start_y = GetPixelPositionFromHighScale(start_y, 0, 0);
+  if (reference_frame_index == -1) {
+    return false;
+  }
+  *ref_block_start_x -= kConvolveBorderLeftTop;
+  *ref_block_start_y -= kConvolveBorderLeftTop;
+  *ref_block_end_x = GetPixelPositionFromHighScale(start_x, step_x, width - 1) +
+                     kConvolveBorderRight;
+  int ref_block_end_y =
+      GetPixelPositionFromHighScale(start_y, step_y, height - 1) +
+      kConvolveBorderBottom;
+  if (is_scaled) {
+    const int block_height =
+        (((height - 1) * step_y + (1 << kScaleSubPixelBits) - 1) >>
+         kScaleSubPixelBits) +
+        kSubPixelTaps;
+    ref_block_end_y = *ref_block_start_y + block_height - 1;
+  }
+  // Determines if we need to extend beyond the left/right/top/bottom border.
+  return *ref_block_start_x < (ref_start_x - left_border) ||
+         *ref_block_end_x > (ref_last_x + right_border) ||
+         *ref_block_start_y < (ref_start_y - top_border) ||
+         ref_block_end_y > (ref_last_y + bottom_border);
+}
+
+// Builds a block as the input for convolve, by copying the content of
+// reference frame (either a decoded reference frame, or current frame).
+// |block_extended_width| is the combined width of the block and its borders.
+template <typename Pixel>
+void Tile::BuildConvolveBlock(
+    const Plane plane, const int reference_frame_index, const bool is_scaled,
+    const int height, const int ref_start_x, const int ref_last_x,
+    const int ref_start_y, const int ref_last_y, const int step_y,
+    const int ref_block_start_x, const int ref_block_end_x,
+    const int ref_block_start_y, uint8_t* block_buffer,
+    ptrdiff_t convolve_buffer_stride, ptrdiff_t block_extended_width) {
+  const YuvBuffer* const reference_buffer =
+      (reference_frame_index == -1)
+          ? current_frame_.buffer()
+          : reference_frames_[reference_frame_index]->buffer();
+  Array2DView<const Pixel> reference_block(
+      reference_buffer->height(plane),
+      reference_buffer->stride(plane) / sizeof(Pixel),
+      reinterpret_cast<const Pixel*>(reference_buffer->data(plane)));
+  auto* const block_head = reinterpret_cast<Pixel*>(block_buffer);
+  convolve_buffer_stride /= sizeof(Pixel);
+  int block_height = height + kConvolveBorderLeftTop + kConvolveBorderBottom;
+  if (is_scaled) {
+    block_height = (((height - 1) * step_y + (1 << kScaleSubPixelBits) - 1) >>
+                    kScaleSubPixelBits) +
+                   kSubPixelTaps;
+  }
+  const int copy_start_x = Clip3(ref_block_start_x, ref_start_x, ref_last_x);
+  const int copy_start_y = Clip3(ref_block_start_y, ref_start_y, ref_last_y);
+  const int copy_end_x = Clip3(ref_block_end_x, copy_start_x, ref_last_x);
+  const int block_width = copy_end_x - copy_start_x + 1;
+  const bool extend_left = ref_block_start_x < ref_start_x;
+  const bool extend_right = ref_block_end_x > ref_last_x;
+  const bool out_of_left = copy_start_x > ref_block_end_x;
+  const bool out_of_right = copy_end_x < ref_block_start_x;
+  if (out_of_left || out_of_right) {
+    const int ref_x = out_of_left ? copy_start_x : copy_end_x;
+    Pixel* buf_ptr = block_head;
+    for (int y = 0, ref_y = copy_start_y; y < block_height; ++y) {
+      Memset(buf_ptr, reference_block[ref_y][ref_x], block_extended_width);
+      if (ref_block_start_y + y >= ref_start_y &&
+          ref_block_start_y + y < ref_last_y) {
+        ++ref_y;
+      }
+      buf_ptr += convolve_buffer_stride;
+    }
+  } else {
+    Pixel* buf_ptr = block_head;
+    const int left_width = copy_start_x - ref_block_start_x;
+    for (int y = 0, ref_y = copy_start_y; y < block_height; ++y) {
+      if (extend_left) {
+        Memset(buf_ptr, reference_block[ref_y][copy_start_x], left_width);
+      }
+      memcpy(buf_ptr + left_width, &reference_block[ref_y][copy_start_x],
+             block_width * sizeof(Pixel));
+      if (extend_right) {
+        Memset(buf_ptr + left_width + block_width,
+               reference_block[ref_y][copy_end_x],
+               block_extended_width - left_width - block_width);
+      }
+      if (ref_block_start_y + y >= ref_start_y &&
+          ref_block_start_y + y < ref_last_y) {
+        ++ref_y;
+      }
+      buf_ptr += convolve_buffer_stride;
+    }
+  }
+}
+
+bool Tile::BlockInterPrediction(
+    const Block& block, const Plane plane, const int reference_frame_index,
+    const MotionVector& mv, const int x, const int y, const int width,
+    const int height, const int candidate_row, const int candidate_column,
+    uint16_t* const prediction, const bool is_compound,
+    const bool is_inter_intra, uint8_t* const dest,
+    const ptrdiff_t dest_stride) {
+  const BlockParameters& bp =
+      *block_parameters_holder_.Find(candidate_row, candidate_column);
+  int start_x;
+  int start_y;
+  int step_x;
+  int step_y;
+  ScaleMotionVector(mv, plane, reference_frame_index, x, y, &start_x, &start_y,
+                    &step_x, &step_y);
+  const int horizontal_filter_index = bp.interpolation_filter[1];
+  const int vertical_filter_index = bp.interpolation_filter[0];
+  const int subsampling_x = subsampling_x_[plane];
+  const int subsampling_y = subsampling_y_[plane];
+  // reference_frame_index equal to -1 indicates using current frame as
+  // reference.
+  const YuvBuffer* const reference_buffer =
+      (reference_frame_index == -1)
+          ? current_frame_.buffer()
+          : reference_frames_[reference_frame_index]->buffer();
+  const int reference_upscaled_width =
+      (reference_frame_index == -1)
+          ? MultiplyBy4(frame_header_.columns4x4)
+          : reference_frames_[reference_frame_index]->upscaled_width();
+  const int reference_height =
+      (reference_frame_index == -1)
+          ? MultiplyBy4(frame_header_.rows4x4)
+          : reference_frames_[reference_frame_index]->frame_height();
+  const int ref_start_x = 0;
+  const int ref_last_x =
+      SubsampledValue(reference_upscaled_width, subsampling_x) - 1;
+  const int ref_start_y = 0;
+  const int ref_last_y = SubsampledValue(reference_height, subsampling_y) - 1;
+
+  const bool is_scaled = (reference_frame_index != -1) &&
+                         (frame_header_.width != reference_upscaled_width ||
+                          frame_header_.height != reference_height);
+  const int bitdepth = sequence_header_.color_config.bitdepth;
+  const int pixel_size = (bitdepth == 8) ? sizeof(uint8_t) : sizeof(uint16_t);
+  int ref_block_start_x;
+  int ref_block_start_y;
+  int ref_block_end_x;
+  const bool extend_block = GetReferenceBlockPosition(
+      reference_frame_index, is_scaled, width, height, ref_start_x, ref_last_x,
+      ref_start_y, ref_last_y, start_x, start_y, step_x, step_y,
+      reference_buffer->left_border(plane),
+      reference_buffer->right_border(plane),
+      reference_buffer->top_border(plane),
+      reference_buffer->bottom_border(plane), &ref_block_start_x,
+      &ref_block_start_y, &ref_block_end_x);
+
+  // In frame parallel mode, ensure that the reference block has been decoded
+  // and available for referencing.
+  if (reference_frame_index != -1 && frame_parallel_) {
+    int reference_y_max;
+    if (is_scaled) {
+      // TODO(vigneshv): For now, we wait for the entire reference frame to be
+      // decoded if we are using scaled references. This will eventually be
+      // fixed.
+      reference_y_max = reference_height;
+    } else {
+      reference_y_max =
+          std::min(ref_block_start_y + height + kSubPixelTaps, ref_last_y);
+      // For U and V planes with subsampling, we need to multiply
+      // reference_y_max by 2 since we only track the progress of Y planes.
+      reference_y_max = LeftShift(reference_y_max, subsampling_y);
+    }
+    if (reference_frame_progress_cache_[reference_frame_index] <
+            reference_y_max &&
+        !reference_frames_[reference_frame_index]->WaitUntil(
+            reference_y_max,
+            &reference_frame_progress_cache_[reference_frame_index])) {
+      return false;
+    }
+  }
+
+  const uint8_t* block_start = nullptr;
+  ptrdiff_t convolve_buffer_stride;
+  if (!extend_block) {
+    const YuvBuffer* const reference_buffer =
+        (reference_frame_index == -1)
+            ? current_frame_.buffer()
+            : reference_frames_[reference_frame_index]->buffer();
+    convolve_buffer_stride = reference_buffer->stride(plane);
+    if (reference_frame_index == -1 || is_scaled) {
+      block_start = reference_buffer->data(plane) +
+                    ref_block_start_y * reference_buffer->stride(plane) +
+                    ref_block_start_x * pixel_size;
+    } else {
+      block_start = reference_buffer->data(plane) +
+                    (ref_block_start_y + kConvolveBorderLeftTop) *
+                        reference_buffer->stride(plane) +
+                    (ref_block_start_x + kConvolveBorderLeftTop) * pixel_size;
+    }
+  } else {
+    // The block width can be at most 2 times as much as current
+    // block's width because of scaling.
+    auto block_extended_width = Align<ptrdiff_t>(
+        (2 * width + kConvolveBorderLeftTop + kConvolveBorderRight) *
+            pixel_size,
+        kMaxAlignment);
+    convolve_buffer_stride = block.scratch_buffer->convolve_block_buffer_stride;
+#if LIBGAV1_MAX_BITDEPTH >= 10
+    if (bitdepth > 8) {
+      BuildConvolveBlock<uint16_t>(
+          plane, reference_frame_index, is_scaled, height, ref_start_x,
+          ref_last_x, ref_start_y, ref_last_y, step_y, ref_block_start_x,
+          ref_block_end_x, ref_block_start_y,
+          block.scratch_buffer->convolve_block_buffer.get(),
+          convolve_buffer_stride, block_extended_width);
+    } else {
+#endif
+      BuildConvolveBlock<uint8_t>(
+          plane, reference_frame_index, is_scaled, height, ref_start_x,
+          ref_last_x, ref_start_y, ref_last_y, step_y, ref_block_start_x,
+          ref_block_end_x, ref_block_start_y,
+          block.scratch_buffer->convolve_block_buffer.get(),
+          convolve_buffer_stride, block_extended_width);
+#if LIBGAV1_MAX_BITDEPTH >= 10
+    }
+#endif
+    block_start = block.scratch_buffer->convolve_block_buffer.get() +
+                  (is_scaled ? 0
+                             : kConvolveBorderLeftTop * convolve_buffer_stride +
+                                   kConvolveBorderLeftTop * pixel_size);
+  }
+
+  void* const output =
+      (is_compound || is_inter_intra) ? prediction : static_cast<void*>(dest);
+  ptrdiff_t output_stride = (is_compound || is_inter_intra)
+                                ? /*prediction_stride=*/width
+                                : dest_stride;
+#if LIBGAV1_MAX_BITDEPTH >= 10
+  // |is_inter_intra| calculations are written to the |prediction| buffer.
+  // Unlike the |is_compound| calculations the output is Pixel and not uint16_t.
+  // convolve_func() expects |output_stride| to be in bytes and not Pixels.
+  // |prediction_stride| is in units of uint16_t. Adjust |output_stride| to
+  // account for this.
+  if (is_inter_intra && sequence_header_.color_config.bitdepth > 8) {
+    output_stride *= 2;
+  }
+#endif
+  assert(output != nullptr);
+  if (is_scaled) {
+    dsp::ConvolveScaleFunc convolve_func = dsp_.convolve_scale[is_compound];
+    assert(convolve_func != nullptr);
+
+    convolve_func(block_start, convolve_buffer_stride, horizontal_filter_index,
+                  vertical_filter_index, start_x, start_y, step_x, step_y,
+                  width, height, output, output_stride);
+  } else {
+    const int horizontal_filter_id = (start_x >> 6) & kSubPixelMask;
+    const int vertical_filter_id = (start_y >> 6) & kSubPixelMask;
+
+    dsp::ConvolveFunc convolve_func =
+        dsp_.convolve[reference_frame_index == -1][is_compound]
+                     [vertical_filter_id != 0][horizontal_filter_id != 0];
+    assert(convolve_func != nullptr);
+
+    convolve_func(block_start, convolve_buffer_stride, horizontal_filter_index,
+                  vertical_filter_index, horizontal_filter_id,
+                  vertical_filter_id, width, height, output, output_stride);
+  }
+  return true;
+}
+
+bool Tile::BlockWarpProcess(const Block& block, const Plane plane,
+                            const int index, const int block_start_x,
+                            const int block_start_y, const int width,
+                            const int height, GlobalMotion* const warp_params,
+                            const bool is_compound, const bool is_inter_intra,
+                            uint8_t* const dest, const ptrdiff_t dest_stride) {
+  assert(width >= 8 && height >= 8);
+  const BlockParameters& bp = *block.bp;
+  const int reference_frame_index =
+      frame_header_.reference_frame_index[bp.reference_frame[index] -
+                                          kReferenceFrameLast];
+  const uint8_t* const source =
+      reference_frames_[reference_frame_index]->buffer()->data(plane);
+  ptrdiff_t source_stride =
+      reference_frames_[reference_frame_index]->buffer()->stride(plane);
+  const int source_width =
+      reference_frames_[reference_frame_index]->buffer()->width(plane);
+  const int source_height =
+      reference_frames_[reference_frame_index]->buffer()->height(plane);
+  uint16_t* const prediction = block.scratch_buffer->prediction_buffer[index];
+
+  // In frame parallel mode, ensure that the reference block has been decoded
+  // and available for referencing.
+  if (frame_parallel_) {
+    int reference_y_max = -1;
+    // Find out the maximum y-coordinate for warping.
+    for (int start_y = block_start_y; start_y < block_start_y + height;
+         start_y += 8) {
+      for (int start_x = block_start_x; start_x < block_start_x + width;
+           start_x += 8) {
+        const int src_x = (start_x + 4) << subsampling_x_[plane];
+        const int src_y = (start_y + 4) << subsampling_y_[plane];
+        const int dst_y = src_x * warp_params->params[4] +
+                          src_y * warp_params->params[5] +
+                          warp_params->params[1];
+        const int y4 = dst_y >> subsampling_y_[plane];
+        const int iy4 = y4 >> kWarpedModelPrecisionBits;
+        reference_y_max = std::max(iy4 + 8, reference_y_max);
+      }
+    }
+    // For U and V planes with subsampling, we need to multiply reference_y_max
+    // by 2 since we only track the progress of Y planes.
+    reference_y_max = LeftShift(reference_y_max, subsampling_y_[plane]);
+    if (reference_frame_progress_cache_[reference_frame_index] <
+            reference_y_max &&
+        !reference_frames_[reference_frame_index]->WaitUntil(
+            reference_y_max,
+            &reference_frame_progress_cache_[reference_frame_index])) {
+      return false;
+    }
+  }
+  if (is_compound) {
+    dsp_.warp_compound(source, source_stride, source_width, source_height,
+                       warp_params->params, subsampling_x_[plane],
+                       subsampling_y_[plane], block_start_x, block_start_y,
+                       width, height, warp_params->alpha, warp_params->beta,
+                       warp_params->gamma, warp_params->delta, prediction,
+                       /*prediction_stride=*/width);
+  } else {
+    void* const output = is_inter_intra ? static_cast<void*>(prediction) : dest;
+    ptrdiff_t output_stride =
+        is_inter_intra ? /*prediction_stride=*/width : dest_stride;
+#if LIBGAV1_MAX_BITDEPTH >= 10
+    // |is_inter_intra| calculations are written to the |prediction| buffer.
+    // Unlike the |is_compound| calculations the output is Pixel and not
+    // uint16_t. warp_clip() expects |output_stride| to be in bytes and not
+    // Pixels. |prediction_stride| is in units of uint16_t. Adjust
+    // |output_stride| to account for this.
+    if (is_inter_intra && sequence_header_.color_config.bitdepth > 8) {
+      output_stride *= 2;
+    }
+#endif
+    dsp_.warp(source, source_stride, source_width, source_height,
+              warp_params->params, subsampling_x_[plane], subsampling_y_[plane],
+              block_start_x, block_start_y, width, height, warp_params->alpha,
+              warp_params->beta, warp_params->gamma, warp_params->delta, output,
+              output_stride);
+  }
+  return true;
+}
+
+}  // namespace libgav1
diff --git a/src/tile/tile.cc b/src/tile/tile.cc
new file mode 100644
index 0000000..ee48f17
--- /dev/null
+++ b/src/tile/tile.cc
@@ -0,0 +1,2573 @@
+// Copyright 2019 The libgav1 Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "src/tile.h"
+
+#include <algorithm>
+#include <array>
+#include <cassert>
+#include <climits>
+#include <cstdlib>
+#include <cstring>
+#include <memory>
+#include <new>
+#include <numeric>
+#include <type_traits>
+#include <utility>
+
+#include "src/frame_scratch_buffer.h"
+#include "src/motion_vector.h"
+#include "src/reconstruction.h"
+#include "src/utils/bit_mask_set.h"
+#include "src/utils/common.h"
+#include "src/utils/constants.h"
+#include "src/utils/logging.h"
+#include "src/utils/segmentation.h"
+#include "src/utils/stack.h"
+
+namespace libgav1 {
+namespace {
+
+// Import all the constants in the anonymous namespace.
+#include "src/scan_tables.inc"
+
+// Range above kNumQuantizerBaseLevels which the exponential golomb coding
+// process is activated.
+constexpr int kQuantizerCoefficientBaseRange = 12;
+constexpr int kNumQuantizerBaseLevels = 2;
+constexpr int kCoeffBaseRangeMaxIterations =
+    kQuantizerCoefficientBaseRange / (kCoeffBaseRangeSymbolCount - 1);
+constexpr int kEntropyContextLeft = 0;
+constexpr int kEntropyContextTop = 1;
+
+constexpr uint8_t kAllZeroContextsByTopLeft[5][5] = {{1, 2, 2, 2, 3},
+                                                     {2, 4, 4, 4, 5},
+                                                     {2, 4, 4, 4, 5},
+                                                     {2, 4, 4, 4, 5},
+                                                     {3, 5, 5, 5, 6}};
+
+// The space complexity of DFS is O(branching_factor * max_depth). For the
+// parameter tree, branching_factor = 4 (there could be up to 4 children for
+// every node) and max_depth (excluding the root) = 5 (to go from a 128x128
+// block all the way to a 4x4 block). The worse-case stack size is 16, by
+// counting the number of 'o' nodes in the diagram:
+//
+//   |                    128x128  The highest level (corresponding to the
+//   |                             root of the tree) has no node in the stack.
+//   |-----------------+
+//   |     |     |     |
+//   |     o     o     o  64x64
+//   |
+//   |-----------------+
+//   |     |     |     |
+//   |     o     o     o  32x32    Higher levels have three nodes in the stack,
+//   |                             because we pop one node off the stack before
+//   |-----------------+           pushing its four children onto the stack.
+//   |     |     |     |
+//   |     o     o     o  16x16
+//   |
+//   |-----------------+
+//   |     |     |     |
+//   |     o     o     o  8x8
+//   |
+//   |-----------------+
+//   |     |     |     |
+//   o     o     o     o  4x4      Only the lowest level has four nodes in the
+//                                 stack.
+constexpr int kDfsStackSize = 16;
+
+// Mask indicating whether the transform sets contain a particular transform
+// type. If |tx_type| is present in |tx_set|, then the |tx_type|th LSB is set.
+constexpr BitMaskSet kTransformTypeInSetMask[kNumTransformSets] = {
+    BitMaskSet(0x1),    BitMaskSet(0xE0F), BitMaskSet(0x20F),
+    BitMaskSet(0xFFFF), BitMaskSet(0xFFF), BitMaskSet(0x201)};
+
+constexpr PredictionMode
+    kFilterIntraModeToIntraPredictor[kNumFilterIntraPredictors] = {
+        kPredictionModeDc, kPredictionModeVertical, kPredictionModeHorizontal,
+        kPredictionModeD157, kPredictionModeDc};
+
+// Mask used to determine the index for mode_deltas lookup.
+constexpr BitMaskSet kPredictionModeDeltasMask(
+    kPredictionModeNearestMv, kPredictionModeNearMv, kPredictionModeNewMv,
+    kPredictionModeNearestNearestMv, kPredictionModeNearNearMv,
+    kPredictionModeNearestNewMv, kPredictionModeNewNearestMv,
+    kPredictionModeNearNewMv, kPredictionModeNewNearMv,
+    kPredictionModeNewNewMv);
+
+// This is computed as:
+// min(transform_width_log2, 5) + min(transform_height_log2, 5) - 4.
+constexpr uint8_t kEobMultiSizeLookup[kNumTransformSizes] = {
+    0, 1, 2, 1, 2, 3, 4, 2, 3, 4, 5, 5, 4, 5, 6, 6, 5, 6, 6};
+
+/* clang-format off */
+constexpr uint8_t kCoeffBaseContextOffset[kNumTransformSizes][5][5] = {
+    {{0, 1, 6, 6, 0}, {1, 6, 6, 21, 0}, {6, 6, 21, 21, 0}, {6, 21, 21, 21, 0},
+     {0, 0, 0, 0, 0}},
+    {{0, 11, 11, 11, 0}, {11, 11, 11, 11, 0}, {6, 6, 21, 21, 0},
+     {6, 21, 21, 21, 0}, {21, 21, 21, 21, 0}},
+    {{0, 11, 11, 11, 0}, {11, 11, 11, 11, 0}, {6, 6, 21, 21, 0},
+     {6, 21, 21, 21, 0}, {21, 21, 21, 21, 0}},
+    {{0, 16, 6, 6, 21}, {16, 16, 6, 21, 21}, {16, 16, 21, 21, 21},
+     {16, 16, 21, 21, 21}, {0, 0, 0, 0, 0}},
+    {{0, 1, 6, 6, 21}, {1, 6, 6, 21, 21}, {6, 6, 21, 21, 21},
+     {6, 21, 21, 21, 21}, {21, 21, 21, 21, 21}},
+    {{0, 11, 11, 11, 11}, {11, 11, 11, 11, 11}, {6, 6, 21, 21, 21},
+     {6, 21, 21, 21, 21}, {21, 21, 21, 21, 21}},
+    {{0, 11, 11, 11, 11}, {11, 11, 11, 11, 11}, {6, 6, 21, 21, 21},
+     {6, 21, 21, 21, 21}, {21, 21, 21, 21, 21}},
+    {{0, 16, 6, 6, 21}, {16, 16, 6, 21, 21}, {16, 16, 21, 21, 21},
+     {16, 16, 21, 21, 21}, {0, 0, 0, 0, 0}},
+    {{0, 16, 6, 6, 21}, {16, 16, 6, 21, 21}, {16, 16, 21, 21, 21},
+     {16, 16, 21, 21, 21}, {16, 16, 21, 21, 21}},
+    {{0, 1, 6, 6, 21}, {1, 6, 6, 21, 21}, {6, 6, 21, 21, 21},
+     {6, 21, 21, 21, 21}, {21, 21, 21, 21, 21}},
+    {{0, 11, 11, 11, 11}, {11, 11, 11, 11, 11}, {6, 6, 21, 21, 21},
+     {6, 21, 21, 21, 21}, {21, 21, 21, 21, 21}},
+    {{0, 11, 11, 11, 11}, {11, 11, 11, 11, 11}, {6, 6, 21, 21, 21},
+     {6, 21, 21, 21, 21}, {21, 21, 21, 21, 21}},
+    {{0, 16, 6, 6, 21}, {16, 16, 6, 21, 21}, {16, 16, 21, 21, 21},
+     {16, 16, 21, 21, 21}, {16, 16, 21, 21, 21}},
+    {{0, 16, 6, 6, 21}, {16, 16, 6, 21, 21}, {16, 16, 21, 21, 21},
+     {16, 16, 21, 21, 21}, {16, 16, 21, 21, 21}},
+    {{0, 1, 6, 6, 21}, {1, 6, 6, 21, 21}, {6, 6, 21, 21, 21},
+     {6, 21, 21, 21, 21}, {21, 21, 21, 21, 21}},
+    {{0, 11, 11, 11, 11}, {11, 11, 11, 11, 11}, {6, 6, 21, 21, 21},
+     {6, 21, 21, 21, 21}, {21, 21, 21, 21, 21}},
+    {{0, 16, 6, 6, 21}, {16, 16, 6, 21, 21}, {16, 16, 21, 21, 21},
+     {16, 16, 21, 21, 21}, {16, 16, 21, 21, 21}},
+    {{0, 16, 6, 6, 21}, {16, 16, 6, 21, 21}, {16, 16, 21, 21, 21},
+     {16, 16, 21, 21, 21}, {16, 16, 21, 21, 21}},
+    {{0, 1, 6, 6, 21}, {1, 6, 6, 21, 21}, {6, 6, 21, 21, 21},
+     {6, 21, 21, 21, 21}, {21, 21, 21, 21, 21}}};
+/* clang-format on */
+
+// Extended the table size from 3 to 16 by repeating the last element to avoid
+// the clips to row or column indices.
+constexpr uint8_t kCoeffBasePositionContextOffset[16] = {
+    26, 31, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36};
+
+constexpr PredictionMode kInterIntraToIntraMode[kNumInterIntraModes] = {
+    kPredictionModeDc, kPredictionModeVertical, kPredictionModeHorizontal,
+    kPredictionModeSmooth};
+
+// Number of horizontal luma samples before intra block copy can be used.
+constexpr int kIntraBlockCopyDelayPixels = 256;
+// Number of 64 by 64 blocks before intra block copy can be used.
+constexpr int kIntraBlockCopyDelay64x64Blocks = kIntraBlockCopyDelayPixels / 64;
+
+// Index [i][j] corresponds to the transform size of width 1 << (i + 2) and
+// height 1 << (j + 2).
+constexpr TransformSize k4x4SizeToTransformSize[5][5] = {
+    {kTransformSize4x4, kTransformSize4x8, kTransformSize4x16,
+     kNumTransformSizes, kNumTransformSizes},
+    {kTransformSize8x4, kTransformSize8x8, kTransformSize8x16,
+     kTransformSize8x32, kNumTransformSizes},
+    {kTransformSize16x4, kTransformSize16x8, kTransformSize16x16,
+     kTransformSize16x32, kTransformSize16x64},
+    {kNumTransformSizes, kTransformSize32x8, kTransformSize32x16,
+     kTransformSize32x32, kTransformSize32x64},
+    {kNumTransformSizes, kNumTransformSizes, kTransformSize64x16,
+     kTransformSize64x32, kTransformSize64x64}};
+
+// Defined in section 9.3 of the spec.
+constexpr TransformType kModeToTransformType[kIntraPredictionModesUV] = {
+    kTransformTypeDctDct,   kTransformTypeDctAdst,  kTransformTypeAdstDct,
+    kTransformTypeDctDct,   kTransformTypeAdstAdst, kTransformTypeDctAdst,
+    kTransformTypeAdstDct,  kTransformTypeAdstDct,  kTransformTypeDctAdst,
+    kTransformTypeAdstAdst, kTransformTypeDctAdst,  kTransformTypeAdstDct,
+    kTransformTypeAdstAdst, kTransformTypeDctDct};
+
+// Defined in section 5.11.47 of the spec. This array does not contain an entry
+// for kTransformSetDctOnly, so the first dimension needs to be
+// |kNumTransformSets| - 1.
+constexpr TransformType kInverseTransformTypeBySet[kNumTransformSets - 1][16] =
+    {{kTransformTypeIdentityIdentity, kTransformTypeDctDct,
+      kTransformTypeIdentityDct, kTransformTypeDctIdentity,
+      kTransformTypeAdstAdst, kTransformTypeDctAdst, kTransformTypeAdstDct},
+     {kTransformTypeIdentityIdentity, kTransformTypeDctDct,
+      kTransformTypeAdstAdst, kTransformTypeDctAdst, kTransformTypeAdstDct},
+     {kTransformTypeIdentityIdentity, kTransformTypeIdentityDct,
+      kTransformTypeDctIdentity, kTransformTypeIdentityAdst,
+      kTransformTypeAdstIdentity, kTransformTypeIdentityFlipadst,
+      kTransformTypeFlipadstIdentity, kTransformTypeDctDct,
+      kTransformTypeDctAdst, kTransformTypeAdstDct, kTransformTypeDctFlipadst,
+      kTransformTypeFlipadstDct, kTransformTypeAdstAdst,
+      kTransformTypeFlipadstFlipadst, kTransformTypeFlipadstAdst,
+      kTransformTypeAdstFlipadst},
+     {kTransformTypeIdentityIdentity, kTransformTypeIdentityDct,
+      kTransformTypeDctIdentity, kTransformTypeDctDct, kTransformTypeDctAdst,
+      kTransformTypeAdstDct, kTransformTypeDctFlipadst,
+      kTransformTypeFlipadstDct, kTransformTypeAdstAdst,
+      kTransformTypeFlipadstFlipadst, kTransformTypeFlipadstAdst,
+      kTransformTypeAdstFlipadst},
+     {kTransformTypeIdentityIdentity, kTransformTypeDctDct}};
+
+// Replaces all occurrences of 64x* and *x64 with 32x* and *x32 respectively.
+constexpr TransformSize kAdjustedTransformSize[kNumTransformSizes] = {
+    kTransformSize4x4,   kTransformSize4x8,   kTransformSize4x16,
+    kTransformSize8x4,   kTransformSize8x8,   kTransformSize8x16,
+    kTransformSize8x32,  kTransformSize16x4,  kTransformSize16x8,
+    kTransformSize16x16, kTransformSize16x32, kTransformSize16x32,
+    kTransformSize32x8,  kTransformSize32x16, kTransformSize32x32,
+    kTransformSize32x32, kTransformSize32x16, kTransformSize32x32,
+    kTransformSize32x32};
+
+// This is the same as Max_Tx_Size_Rect array in the spec but with *x64 and 64*x
+// transforms replaced with *x32 and 32x* respectively.
+constexpr TransformSize kUVTransformSize[kMaxBlockSizes] = {
+    kTransformSize4x4,   kTransformSize4x8,   kTransformSize4x16,
+    kTransformSize8x4,   kTransformSize8x8,   kTransformSize8x16,
+    kTransformSize8x32,  kTransformSize16x4,  kTransformSize16x8,
+    kTransformSize16x16, kTransformSize16x32, kTransformSize16x32,
+    kTransformSize32x8,  kTransformSize32x16, kTransformSize32x32,
+    kTransformSize32x32, kTransformSize32x16, kTransformSize32x32,
+    kTransformSize32x32, kTransformSize32x32, kTransformSize32x32,
+    kTransformSize32x32};
+
+// ith entry of this array is computed as:
+// DivideBy2(TransformSizeToSquareTransformIndex(kTransformSizeSquareMin[i]) +
+//           TransformSizeToSquareTransformIndex(kTransformSizeSquareMax[i]) +
+//           1)
+constexpr uint8_t kTransformSizeContext[kNumTransformSizes] = {
+    0, 1, 1, 1, 1, 2, 2, 1, 2, 2, 3, 3, 2, 3, 3, 4, 3, 4, 4};
+
+constexpr int8_t kSgrProjDefaultMultiplier[2] = {-32, 31};
+
+constexpr int8_t kWienerDefaultFilter[kNumWienerCoefficients] = {3, -7, 15};
+
+// Maps compound prediction modes into single modes. For e.g.
+// kPredictionModeNearestNewMv will map to kPredictionModeNearestMv for index 0
+// and kPredictionModeNewMv for index 1. It is used to simplify the logic in
+// AssignMv (and avoid duplicate code). This is section 5.11.30. in the spec.
+constexpr PredictionMode
+    kCompoundToSinglePredictionMode[kNumCompoundInterPredictionModes][2] = {
+        {kPredictionModeNearestMv, kPredictionModeNearestMv},
+        {kPredictionModeNearMv, kPredictionModeNearMv},
+        {kPredictionModeNearestMv, kPredictionModeNewMv},
+        {kPredictionModeNewMv, kPredictionModeNearestMv},
+        {kPredictionModeNearMv, kPredictionModeNewMv},
+        {kPredictionModeNewMv, kPredictionModeNearMv},
+        {kPredictionModeGlobalMv, kPredictionModeGlobalMv},
+        {kPredictionModeNewMv, kPredictionModeNewMv},
+};
+PredictionMode GetSinglePredictionMode(int index, PredictionMode y_mode) {
+  if (y_mode < kPredictionModeNearestNearestMv) {
+    return y_mode;
+  }
+  const int lookup_index = y_mode - kPredictionModeNearestNearestMv;
+  assert(lookup_index >= 0);
+  return kCompoundToSinglePredictionMode[lookup_index][index];
+}
+
+// log2(dqDenom) in section 7.12.3 of the spec. We use the log2 value because
+// dqDenom is always a power of two and hence right shift can be used instead of
+// division.
+constexpr uint8_t kQuantizationShift[kNumTransformSizes] = {
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 2, 1, 2, 2};
+
+// Returns the minimum of |length| or |max|-|start|. This is used to clamp array
+// indices when accessing arrays whose bound is equal to |max|.
+int GetNumElements(int length, int start, int max) {
+  return std::min(length, max - start);
+}
+
+template <typename T>
+void SetBlockValues(int rows, int columns, T value, T* dst, ptrdiff_t stride) {
+  // Specialize all columns cases (values in kTransformWidth4x4[]) for better
+  // performance.
+  switch (columns) {
+    case 1:
+      MemSetBlock<T>(rows, 1, value, dst, stride);
+      break;
+    case 2:
+      MemSetBlock<T>(rows, 2, value, dst, stride);
+      break;
+    case 4:
+      MemSetBlock<T>(rows, 4, value, dst, stride);
+      break;
+    case 8:
+      MemSetBlock<T>(rows, 8, value, dst, stride);
+      break;
+    default:
+      assert(columns == 16);
+      MemSetBlock<T>(rows, 16, value, dst, stride);
+      break;
+  }
+}
+
+void SetTransformType(const Tile::Block& block, int x4, int y4, int w4, int h4,
+                      TransformType tx_type,
+                      TransformType transform_types[32][32]) {
+  const int y_offset = y4 - block.row4x4;
+  const int x_offset = x4 - block.column4x4;
+  TransformType* const dst = &transform_types[y_offset][x_offset];
+  SetBlockValues<TransformType>(h4, w4, tx_type, dst, 32);
+}
+
+void StoreMotionFieldMvs(ReferenceFrameType reference_frame_to_store,
+                         const MotionVector& mv_to_store, ptrdiff_t stride,
+                         int rows, int columns,
+                         ReferenceFrameType* reference_frame_row_start,
+                         MotionVector* mv) {
+  static_assert(sizeof(*reference_frame_row_start) == sizeof(int8_t), "");
+  do {
+    // Don't switch the following two memory setting functions.
+    // Some ARM CPUs are quite sensitive to the order.
+    memset(reference_frame_row_start, reference_frame_to_store, columns);
+    std::fill(mv, mv + columns, mv_to_store);
+    reference_frame_row_start += stride;
+    mv += stride;
+  } while (--rows != 0);
+}
+
+// Inverse transform process assumes that the quantized coefficients are stored
+// as a virtual 2d array of size |tx_width| x tx_height. If transform width is
+// 64, then this assumption is broken because the scan order used for populating
+// the coefficients for such transforms is the same as the one used for
+// corresponding transform with width 32 (e.g. the scan order used for 64x16 is
+// the same as the one used for 32x16). So we must restore the coefficients to
+// their correct positions and clean the positions they occupied.
+template <typename ResidualType>
+void MoveCoefficientsForTxWidth64(int clamped_tx_height, int tx_width,
+                                  ResidualType* residual) {
+  if (tx_width != 64) return;
+  const int rows = clamped_tx_height - 2;
+  auto* src = residual + 32 * rows;
+  residual += 64 * rows;
+  // Process 2 rows in each loop in reverse order to avoid overwrite.
+  int x = rows >> 1;
+  do {
+    // The 2 rows can be processed in order.
+    memcpy(residual, src, 32 * sizeof(src[0]));
+    memcpy(residual + 64, src + 32, 32 * sizeof(src[0]));
+    memset(src + 32, 0, 32 * sizeof(src[0]));
+    src -= 64;
+    residual -= 128;
+  } while (--x);
+  // Process the second row. The first row is already correct.
+  memcpy(residual + 64, src + 32, 32 * sizeof(src[0]));
+  memset(src + 32, 0, 32 * sizeof(src[0]));
+}
+
+void GetClampParameters(const Tile::Block& block, int min[2], int max[2]) {
+  // 7.10.2.14 (part 1). (also contains implementations of 5.11.53
+  // and 5.11.54).
+  constexpr int kMvBorder4x4 = 4;
+  const int row_border = kMvBorder4x4 + block.height4x4;
+  const int column_border = kMvBorder4x4 + block.width4x4;
+  const int macroblocks_to_top_edge = -block.row4x4;
+  const int macroblocks_to_bottom_edge =
+      block.tile.frame_header().rows4x4 - block.height4x4 - block.row4x4;
+  const int macroblocks_to_left_edge = -block.column4x4;
+  const int macroblocks_to_right_edge =
+      block.tile.frame_header().columns4x4 - block.width4x4 - block.column4x4;
+  min[0] = MultiplyBy32(macroblocks_to_top_edge - row_border);
+  min[1] = MultiplyBy32(macroblocks_to_left_edge - column_border);
+  max[0] = MultiplyBy32(macroblocks_to_bottom_edge + row_border);
+  max[1] = MultiplyBy32(macroblocks_to_right_edge + column_border);
+}
+
+// Section 8.3.2 in the spec, under coeff_base_eob.
+int GetCoeffBaseContextEob(TransformSize tx_size, int index) {
+  if (index == 0) return 0;
+  const TransformSize adjusted_tx_size = kAdjustedTransformSize[tx_size];
+  const int tx_width_log2 = kTransformWidthLog2[adjusted_tx_size];
+  const int tx_height = kTransformHeight[adjusted_tx_size];
+  if (index <= DivideBy8(tx_height << tx_width_log2)) return 1;
+  if (index <= DivideBy4(tx_height << tx_width_log2)) return 2;
+  return 3;
+}
+
+// Section 8.3.2 in the spec, under coeff_br. Optimized for end of block based
+// on the fact that {0, 1}, {1, 0}, {1, 1}, {0, 2} and {2, 0} will all be 0 in
+// the end of block case.
+int GetCoeffBaseRangeContextEob(int adjusted_tx_width_log2, int pos,
+                                TransformClass tx_class) {
+  if (pos == 0) return 0;
+  const int tx_width = 1 << adjusted_tx_width_log2;
+  const int row = pos >> adjusted_tx_width_log2;
+  const int column = pos & (tx_width - 1);
+  // This return statement is equivalent to:
+  // return ((tx_class == kTransformClass2D && (row | column) < 2) ||
+  //         (tx_class == kTransformClassHorizontal && column == 0) ||
+  //         (tx_class == kTransformClassVertical && row == 0))
+  //            ? 7
+  //            : 14;
+  return 14 >> ((static_cast<int>(tx_class == kTransformClass2D) &
+                 static_cast<int>((row | column) < 2)) |
+                (tx_class & static_cast<int>(column == 0)) |
+                ((tx_class >> 1) & static_cast<int>(row == 0)));
+}
+
+}  // namespace
+
+Tile::Tile(int tile_number, const uint8_t* const data, size_t size,
+           const ObuSequenceHeader& sequence_header,
+           const ObuFrameHeader& frame_header,
+           RefCountedBuffer* const current_frame, const DecoderState& state,
+           FrameScratchBuffer* const frame_scratch_buffer,
+           const WedgeMaskArray& wedge_masks,
+           const QuantizerMatrix& quantizer_matrix,
+           SymbolDecoderContext* const saved_symbol_decoder_context,
+           const SegmentationMap* prev_segment_ids,
+           PostFilter* const post_filter, const dsp::Dsp* const dsp,
+           ThreadPool* const thread_pool,
+           BlockingCounterWithStatus* const pending_tiles, bool frame_parallel,
+           bool use_intra_prediction_buffer)
+    : number_(tile_number),
+      row_(number_ / frame_header.tile_info.tile_columns),
+      column_(number_ % frame_header.tile_info.tile_columns),
+      data_(data),
+      size_(size),
+      read_deltas_(false),
+      subsampling_x_{0, sequence_header.color_config.subsampling_x,
+                     sequence_header.color_config.subsampling_x},
+      subsampling_y_{0, sequence_header.color_config.subsampling_y,
+                     sequence_header.color_config.subsampling_y},
+      current_quantizer_index_(frame_header.quantizer.base_index),
+      sequence_header_(sequence_header),
+      frame_header_(frame_header),
+      reference_frame_sign_bias_(state.reference_frame_sign_bias),
+      reference_frames_(state.reference_frame),
+      motion_field_(frame_scratch_buffer->motion_field),
+      reference_order_hint_(state.reference_order_hint),
+      wedge_masks_(wedge_masks),
+      quantizer_matrix_(quantizer_matrix),
+      reader_(data_, size_, frame_header_.enable_cdf_update),
+      symbol_decoder_context_(frame_scratch_buffer->symbol_decoder_context),
+      saved_symbol_decoder_context_(saved_symbol_decoder_context),
+      prev_segment_ids_(prev_segment_ids),
+      dsp_(*dsp),
+      post_filter_(*post_filter),
+      block_parameters_holder_(frame_scratch_buffer->block_parameters_holder),
+      quantizer_(sequence_header_.color_config.bitdepth,
+                 &frame_header_.quantizer),
+      residual_size_((sequence_header_.color_config.bitdepth == 8)
+                         ? sizeof(int16_t)
+                         : sizeof(int32_t)),
+      intra_block_copy_lag_(
+          frame_header_.allow_intrabc
+              ? (sequence_header_.use_128x128_superblock ? 3 : 5)
+              : 1),
+      current_frame_(*current_frame),
+      cdef_index_(frame_scratch_buffer->cdef_index),
+      inter_transform_sizes_(frame_scratch_buffer->inter_transform_sizes),
+      thread_pool_(thread_pool),
+      residual_buffer_pool_(frame_scratch_buffer->residual_buffer_pool.get()),
+      tile_scratch_buffer_pool_(
+          &frame_scratch_buffer->tile_scratch_buffer_pool),
+      pending_tiles_(pending_tiles),
+      frame_parallel_(frame_parallel),
+      use_intra_prediction_buffer_(use_intra_prediction_buffer),
+      intra_prediction_buffer_(
+          use_intra_prediction_buffer_
+              ? &frame_scratch_buffer->intra_prediction_buffers.get()[row_]
+              : nullptr) {
+  row4x4_start_ = frame_header.tile_info.tile_row_start[row_];
+  row4x4_end_ = frame_header.tile_info.tile_row_start[row_ + 1];
+  column4x4_start_ = frame_header.tile_info.tile_column_start[column_];
+  column4x4_end_ = frame_header.tile_info.tile_column_start[column_ + 1];
+  const int block_width4x4 = kNum4x4BlocksWide[SuperBlockSize()];
+  const int block_width4x4_log2 = k4x4HeightLog2[SuperBlockSize()];
+  superblock_rows_ =
+      (row4x4_end_ - row4x4_start_ + block_width4x4 - 1) >> block_width4x4_log2;
+  superblock_columns_ =
+      (column4x4_end_ - column4x4_start_ + block_width4x4 - 1) >>
+      block_width4x4_log2;
+  // If |split_parse_and_decode_| is true, we do the necessary setup for
+  // splitting the parsing and the decoding steps. This is done in the following
+  // two cases:
+  //  1) If there is multi-threading within a tile (this is done if
+  //     |thread_pool_| is not nullptr and if there are at least as many
+  //     superblock columns as |intra_block_copy_lag_|).
+  //  2) If |frame_parallel| is true.
+  split_parse_and_decode_ = (thread_pool_ != nullptr &&
+                             superblock_columns_ > intra_block_copy_lag_) ||
+                            frame_parallel;
+  if (frame_parallel_) {
+    reference_frame_progress_cache_.fill(INT_MIN);
+  }
+  memset(delta_lf_, 0, sizeof(delta_lf_));
+  delta_lf_all_zero_ = true;
+  const YuvBuffer& buffer = post_filter_.frame_buffer();
+  for (int plane = kPlaneY; plane < PlaneCount(); ++plane) {
+    // Verify that the borders are big enough for Reconstruct(). max_tx_length
+    // is the maximum value of tx_width and tx_height for the plane.
+    const int max_tx_length = (plane == kPlaneY) ? 64 : 32;
+    // Reconstruct() may overwrite on the right. Since the right border of a
+    // row is followed in memory by the left border of the next row, the
+    // number of extra pixels to the right of a row is at least the sum of the
+    // left and right borders.
+    //
+    // Note: This assertion actually checks the sum of the left and right
+    // borders of post_filter_.GetUnfilteredBuffer(), which is a horizontally
+    // and vertically shifted version of |buffer|. Since the sum of the left and
+    // right borders is not changed by the shift, we can just check the sum of
+    // the left and right borders of |buffer|.
+    assert(buffer.left_border(plane) + buffer.right_border(plane) >=
+           max_tx_length - 1);
+    // Reconstruct() may overwrite on the bottom. We need an extra border row
+    // on the bottom because we need the left border of that row.
+    //
+    // Note: This assertion checks the bottom border of
+    // post_filter_.GetUnfilteredBuffer(). So we need to calculate the vertical
+    // shift that the PostFilter constructor applied to |buffer| and reduce the
+    // bottom border by that amount.
+#ifndef NDEBUG
+    const int vertical_shift = static_cast<int>(
+        (post_filter_.GetUnfilteredBuffer(plane) - buffer.data(plane)) /
+        buffer.stride(plane));
+    const int bottom_border = buffer.bottom_border(plane) - vertical_shift;
+    assert(bottom_border >= max_tx_length);
+#endif
+    // In AV1, a transform block of height H starts at a y coordinate that is
+    // a multiple of H. If a transform block at the bottom of the frame has
+    // height H, then Reconstruct() will write up to the row with index
+    // Align(buffer.height(plane), H) - 1. Therefore the maximum number of
+    // rows Reconstruct() may write to is
+    // Align(buffer.height(plane), max_tx_length).
+    buffer_[plane].Reset(Align(buffer.height(plane), max_tx_length),
+                         buffer.stride(plane),
+                         post_filter_.GetUnfilteredBuffer(plane));
+    const int plane_height =
+        SubsampledValue(frame_header_.height, subsampling_y_[plane]);
+    deblock_row_limit_[plane] =
+        std::min(frame_header_.rows4x4, DivideBy4(plane_height + 3)
+                                            << subsampling_y_[plane]);
+    const int plane_width =
+        SubsampledValue(frame_header_.width, subsampling_x_[plane]);
+    deblock_column_limit_[plane] =
+        std::min(frame_header_.columns4x4, DivideBy4(plane_width + 3)
+                                               << subsampling_x_[plane]);
+  }
+}
+
+bool Tile::Init() {
+  assert(coefficient_levels_.size() == dc_categories_.size());
+  for (size_t i = 0; i < coefficient_levels_.size(); ++i) {
+    const int contexts_per_plane = (i == kEntropyContextLeft)
+                                       ? frame_header_.rows4x4
+                                       : frame_header_.columns4x4;
+    if (!coefficient_levels_[i].Reset(PlaneCount(), contexts_per_plane)) {
+      LIBGAV1_DLOG(ERROR, "coefficient_levels_[%zu].Reset() failed.", i);
+      return false;
+    }
+    if (!dc_categories_[i].Reset(PlaneCount(), contexts_per_plane)) {
+      LIBGAV1_DLOG(ERROR, "dc_categories_[%zu].Reset() failed.", i);
+      return false;
+    }
+  }
+  if (split_parse_and_decode_) {
+    assert(residual_buffer_pool_ != nullptr);
+    if (!residual_buffer_threaded_.Reset(superblock_rows_, superblock_columns_,
+                                         /*zero_initialize=*/false)) {
+      LIBGAV1_DLOG(ERROR, "residual_buffer_threaded_.Reset() failed.");
+      return false;
+    }
+  } else {
+    // Add 32 * |kResidualPaddingVertical| padding to avoid bottom boundary
+    // checks when parsing quantized coefficients.
+    residual_buffer_ = MakeAlignedUniquePtr<uint8_t>(
+        32, (4096 + 32 * kResidualPaddingVertical) * residual_size_);
+    if (residual_buffer_ == nullptr) {
+      LIBGAV1_DLOG(ERROR, "Allocation of residual_buffer_ failed.");
+      return false;
+    }
+    prediction_parameters_.reset(new (std::nothrow) PredictionParameters());
+    if (prediction_parameters_ == nullptr) {
+      LIBGAV1_DLOG(ERROR, "Allocation of prediction_parameters_ failed.");
+      return false;
+    }
+  }
+  if (frame_header_.use_ref_frame_mvs) {
+    assert(sequence_header_.enable_order_hint);
+    SetupMotionField(frame_header_, current_frame_, reference_frames_,
+                     row4x4_start_, row4x4_end_, column4x4_start_,
+                     column4x4_end_, &motion_field_);
+  }
+  ResetLoopRestorationParams();
+  return true;
+}
+
+template <ProcessingMode processing_mode, bool save_symbol_decoder_context>
+bool Tile::ProcessSuperBlockRow(int row4x4,
+                                TileScratchBuffer* const scratch_buffer) {
+  if (row4x4 < row4x4_start_ || row4x4 >= row4x4_end_) return true;
+  assert(scratch_buffer != nullptr);
+  const int block_width4x4 = kNum4x4BlocksWide[SuperBlockSize()];
+  for (int column4x4 = column4x4_start_; column4x4 < column4x4_end_;
+       column4x4 += block_width4x4) {
+    if (!ProcessSuperBlock(row4x4, column4x4, block_width4x4, scratch_buffer,
+                           processing_mode)) {
+      LIBGAV1_DLOG(ERROR, "Error decoding super block row: %d column: %d",
+                   row4x4, column4x4);
+      return false;
+    }
+  }
+  if (save_symbol_decoder_context && row4x4 + block_width4x4 >= row4x4_end_) {
+    SaveSymbolDecoderContext();
+  }
+  if (processing_mode == kProcessingModeDecodeOnly ||
+      processing_mode == kProcessingModeParseAndDecode) {
+    PopulateIntraPredictionBuffer(row4x4);
+  }
+  return true;
+}
+
+// Used in frame parallel mode. The symbol decoder context need not be saved in
+// this case since it was done when parsing was complete.
+template bool Tile::ProcessSuperBlockRow<kProcessingModeDecodeOnly, false>(
+    int row4x4, TileScratchBuffer* scratch_buffer);
+// Used in non frame parallel mode.
+template bool Tile::ProcessSuperBlockRow<kProcessingModeParseAndDecode, true>(
+    int row4x4, TileScratchBuffer* scratch_buffer);
+
+void Tile::SaveSymbolDecoderContext() {
+  if (frame_header_.enable_frame_end_update_cdf &&
+      number_ == frame_header_.tile_info.context_update_id) {
+    *saved_symbol_decoder_context_ = symbol_decoder_context_;
+  }
+}
+
+bool Tile::ParseAndDecode() {
+  // If this is the main thread, we build the loop filter bit masks when parsing
+  // so that it happens in the current thread. This ensures that the main thread
+  // does as much work as possible.
+  if (split_parse_and_decode_) {
+    if (!ThreadedParseAndDecode()) return false;
+    SaveSymbolDecoderContext();
+    return true;
+  }
+  std::unique_ptr<TileScratchBuffer> scratch_buffer =
+      tile_scratch_buffer_pool_->Get();
+  if (scratch_buffer == nullptr) {
+    pending_tiles_->Decrement(false);
+    LIBGAV1_DLOG(ERROR, "Failed to get scratch buffer.");
+    return false;
+  }
+  const int block_width4x4 = kNum4x4BlocksWide[SuperBlockSize()];
+  for (int row4x4 = row4x4_start_; row4x4 < row4x4_end_;
+       row4x4 += block_width4x4) {
+    if (!ProcessSuperBlockRow<kProcessingModeParseAndDecode, true>(
+            row4x4, scratch_buffer.get())) {
+      pending_tiles_->Decrement(false);
+      return false;
+    }
+  }
+  tile_scratch_buffer_pool_->Release(std::move(scratch_buffer));
+  pending_tiles_->Decrement(true);
+  return true;
+}
+
+bool Tile::Parse() {
+  const int block_width4x4 = kNum4x4BlocksWide[SuperBlockSize()];
+  std::unique_ptr<TileScratchBuffer> scratch_buffer =
+      tile_scratch_buffer_pool_->Get();
+  if (scratch_buffer == nullptr) {
+    LIBGAV1_DLOG(ERROR, "Failed to get scratch buffer.");
+    return false;
+  }
+  for (int row4x4 = row4x4_start_; row4x4 < row4x4_end_;
+       row4x4 += block_width4x4) {
+    if (!ProcessSuperBlockRow<kProcessingModeParseOnly, false>(
+            row4x4, scratch_buffer.get())) {
+      return false;
+    }
+  }
+  tile_scratch_buffer_pool_->Release(std::move(scratch_buffer));
+  SaveSymbolDecoderContext();
+  return true;
+}
+
+bool Tile::Decode(
+    std::mutex* const mutex, int* const superblock_row_progress,
+    std::condition_variable* const superblock_row_progress_condvar) {
+  const int block_width4x4 = sequence_header_.use_128x128_superblock ? 32 : 16;
+  const int block_width4x4_log2 =
+      sequence_header_.use_128x128_superblock ? 5 : 4;
+  std::unique_ptr<TileScratchBuffer> scratch_buffer =
+      tile_scratch_buffer_pool_->Get();
+  if (scratch_buffer == nullptr) {
+    LIBGAV1_DLOG(ERROR, "Failed to get scratch buffer.");
+    return false;
+  }
+  for (int row4x4 = row4x4_start_, index = row4x4_start_ >> block_width4x4_log2;
+       row4x4 < row4x4_end_; row4x4 += block_width4x4, ++index) {
+    if (!ProcessSuperBlockRow<kProcessingModeDecodeOnly, false>(
+            row4x4, scratch_buffer.get())) {
+      return false;
+    }
+    if (post_filter_.DoDeblock()) {
+      // Apply vertical deblock filtering for all the columns in this tile
+      // except for the first 64 columns.
+      post_filter_.ApplyDeblockFilter(
+          kLoopFilterTypeVertical, row4x4,
+          column4x4_start_ + kNum4x4InLoopFilterUnit, column4x4_end_,
+          block_width4x4);
+      // If this is the first superblock row of the tile, then we cannot apply
+      // horizontal deblocking here since we don't know if the top row is
+      // available. So it will be done by the calling thread in that case.
+      if (row4x4 != row4x4_start_) {
+        // Apply horizontal deblock filtering for all the columns in this tile
+        // except for the first and the last 64 columns.
+        // Note about the last tile of each row: For the last tile,
+        // column4x4_end may not be a multiple of 16. In that case it is still
+        // okay to simply subtract 16 since ApplyDeblockFilter() will only do
+        // the filters in increments of 64 columns (or 32 columns for chroma
+        // with subsampling).
+        post_filter_.ApplyDeblockFilter(
+            kLoopFilterTypeHorizontal, row4x4,
+            column4x4_start_ + kNum4x4InLoopFilterUnit,
+            column4x4_end_ - kNum4x4InLoopFilterUnit, block_width4x4);
+      }
+    }
+    bool notify;
+    {
+      std::unique_lock<std::mutex> lock(*mutex);
+      notify = ++superblock_row_progress[index] ==
+               frame_header_.tile_info.tile_columns;
+    }
+    if (notify) {
+      // We are done decoding this superblock row. Notify the post filtering
+      // thread.
+      superblock_row_progress_condvar[index].notify_one();
+    }
+  }
+  tile_scratch_buffer_pool_->Release(std::move(scratch_buffer));
+  return true;
+}
+
+bool Tile::ThreadedParseAndDecode() {
+  {
+    std::lock_guard<std::mutex> lock(threading_.mutex);
+    if (!threading_.sb_state.Reset(superblock_rows_, superblock_columns_)) {
+      pending_tiles_->Decrement(false);
+      LIBGAV1_DLOG(ERROR, "threading.sb_state.Reset() failed.");
+      return false;
+    }
+    // Account for the parsing job.
+    ++threading_.pending_jobs;
+  }
+
+  const int block_width4x4 = kNum4x4BlocksWide[SuperBlockSize()];
+
+  // Begin parsing.
+  std::unique_ptr<TileScratchBuffer> scratch_buffer =
+      tile_scratch_buffer_pool_->Get();
+  if (scratch_buffer == nullptr) {
+    pending_tiles_->Decrement(false);
+    LIBGAV1_DLOG(ERROR, "Failed to get scratch buffer.");
+    return false;
+  }
+  for (int row4x4 = row4x4_start_, row_index = 0; row4x4 < row4x4_end_;
+       row4x4 += block_width4x4, ++row_index) {
+    for (int column4x4 = column4x4_start_, column_index = 0;
+         column4x4 < column4x4_end_;
+         column4x4 += block_width4x4, ++column_index) {
+      if (!ProcessSuperBlock(row4x4, column4x4, block_width4x4,
+                             scratch_buffer.get(), kProcessingModeParseOnly)) {
+        std::lock_guard<std::mutex> lock(threading_.mutex);
+        threading_.abort = true;
+        break;
+      }
+      std::unique_lock<std::mutex> lock(threading_.mutex);
+      if (threading_.abort) break;
+      threading_.sb_state[row_index][column_index] = kSuperBlockStateParsed;
+      // Schedule the decoding of this superblock if it is allowed.
+      if (CanDecode(row_index, column_index)) {
+        ++threading_.pending_jobs;
+        threading_.sb_state[row_index][column_index] =
+            kSuperBlockStateScheduled;
+        lock.unlock();
+        thread_pool_->Schedule(
+            [this, row_index, column_index, block_width4x4]() {
+              DecodeSuperBlock(row_index, column_index, block_width4x4);
+            });
+      }
+    }
+    std::lock_guard<std::mutex> lock(threading_.mutex);
+    if (threading_.abort) break;
+  }
+  tile_scratch_buffer_pool_->Release(std::move(scratch_buffer));
+
+  // We are done parsing. We can return here since the calling thread will make
+  // sure that it waits for all the superblocks to be decoded.
+  //
+  // Finish using |threading_| before |pending_tiles_->Decrement()| because the
+  // Tile object could go out of scope as soon as |pending_tiles_->Decrement()|
+  // is called.
+  threading_.mutex.lock();
+  const bool no_pending_jobs = (--threading_.pending_jobs == 0);
+  const bool job_succeeded = !threading_.abort;
+  threading_.mutex.unlock();
+  if (no_pending_jobs) {
+    // We are done parsing and decoding this tile.
+    pending_tiles_->Decrement(job_succeeded);
+  }
+  return job_succeeded;
+}
+
+bool Tile::CanDecode(int row_index, int column_index) const {
+  assert(row_index >= 0);
+  assert(column_index >= 0);
+  // If |threading_.sb_state[row_index][column_index]| is not equal to
+  // kSuperBlockStateParsed, then return false. This is ok because if
+  // |threading_.sb_state[row_index][column_index]| is equal to:
+  //   kSuperBlockStateNone - then the superblock is not yet parsed.
+  //   kSuperBlockStateScheduled - then the superblock is already scheduled for
+  //                               decode.
+  //   kSuperBlockStateDecoded - then the superblock has already been decoded.
+  if (row_index >= superblock_rows_ || column_index >= superblock_columns_ ||
+      threading_.sb_state[row_index][column_index] != kSuperBlockStateParsed) {
+    return false;
+  }
+  // First superblock has no dependencies.
+  if (row_index == 0 && column_index == 0) {
+    return true;
+  }
+  // Superblocks in the first row only depend on the superblock to the left of
+  // it.
+  if (row_index == 0) {
+    return threading_.sb_state[0][column_index - 1] == kSuperBlockStateDecoded;
+  }
+  // All other superblocks depend on superblock to the left of it (if one
+  // exists) and superblock to the top right with a lag of
+  // |intra_block_copy_lag_| (if one exists).
+  const int top_right_column_index =
+      std::min(column_index + intra_block_copy_lag_, superblock_columns_ - 1);
+  return threading_.sb_state[row_index - 1][top_right_column_index] ==
+             kSuperBlockStateDecoded &&
+         (column_index == 0 ||
+          threading_.sb_state[row_index][column_index - 1] ==
+              kSuperBlockStateDecoded);
+}
+
+void Tile::DecodeSuperBlock(int row_index, int column_index,
+                            int block_width4x4) {
+  const int row4x4 = row4x4_start_ + (row_index * block_width4x4);
+  const int column4x4 = column4x4_start_ + (column_index * block_width4x4);
+  std::unique_ptr<TileScratchBuffer> scratch_buffer =
+      tile_scratch_buffer_pool_->Get();
+  bool ok = scratch_buffer != nullptr;
+  if (ok) {
+    ok = ProcessSuperBlock(row4x4, column4x4, block_width4x4,
+                           scratch_buffer.get(), kProcessingModeDecodeOnly);
+    tile_scratch_buffer_pool_->Release(std::move(scratch_buffer));
+  }
+  std::unique_lock<std::mutex> lock(threading_.mutex);
+  if (ok) {
+    threading_.sb_state[row_index][column_index] = kSuperBlockStateDecoded;
+    // Candidate rows and columns that we could potentially begin the decoding
+    // (if it is allowed to do so). The candidates are:
+    //   1) The superblock to the bottom-left of the current superblock with a
+    //   lag of |intra_block_copy_lag_| (or the beginning of the next superblock
+    //   row in case there are less than |intra_block_copy_lag_| superblock
+    //   columns in the Tile).
+    //   2) The superblock to the right of the current superblock.
+    const int candidate_row_indices[] = {row_index + 1, row_index};
+    const int candidate_column_indices[] = {
+        std::max(0, column_index - intra_block_copy_lag_), column_index + 1};
+    for (size_t i = 0; i < std::extent<decltype(candidate_row_indices)>::value;
+         ++i) {
+      const int candidate_row_index = candidate_row_indices[i];
+      const int candidate_column_index = candidate_column_indices[i];
+      if (!CanDecode(candidate_row_index, candidate_column_index)) {
+        continue;
+      }
+      ++threading_.pending_jobs;
+      threading_.sb_state[candidate_row_index][candidate_column_index] =
+          kSuperBlockStateScheduled;
+      lock.unlock();
+      thread_pool_->Schedule([this, candidate_row_index, candidate_column_index,
+                              block_width4x4]() {
+        DecodeSuperBlock(candidate_row_index, candidate_column_index,
+                         block_width4x4);
+      });
+      lock.lock();
+    }
+  } else {
+    threading_.abort = true;
+  }
+  // Finish using |threading_| before |pending_tiles_->Decrement()| because the
+  // Tile object could go out of scope as soon as |pending_tiles_->Decrement()|
+  // is called.
+  const bool no_pending_jobs = (--threading_.pending_jobs == 0);
+  const bool job_succeeded = !threading_.abort;
+  lock.unlock();
+  if (no_pending_jobs) {
+    // We are done parsing and decoding this tile.
+    pending_tiles_->Decrement(job_succeeded);
+  }
+}
+
+void Tile::PopulateIntraPredictionBuffer(int row4x4) {
+  const int block_width4x4 = kNum4x4BlocksWide[SuperBlockSize()];
+  if (!use_intra_prediction_buffer_ || row4x4 + block_width4x4 >= row4x4_end_) {
+    return;
+  }
+  const size_t pixel_size =
+      (sequence_header_.color_config.bitdepth == 8 ? sizeof(uint8_t)
+                                                   : sizeof(uint16_t));
+  for (int plane = kPlaneY; plane < PlaneCount(); ++plane) {
+    const int row_to_copy =
+        (MultiplyBy4(row4x4 + block_width4x4) >> subsampling_y_[plane]) - 1;
+    const size_t pixels_to_copy =
+        (MultiplyBy4(column4x4_end_ - column4x4_start_) >>
+         subsampling_x_[plane]) *
+        pixel_size;
+    const size_t column_start =
+        MultiplyBy4(column4x4_start_) >> subsampling_x_[plane];
+    void* start;
+#if LIBGAV1_MAX_BITDEPTH >= 10
+    if (sequence_header_.color_config.bitdepth > 8) {
+      Array2DView<uint16_t> buffer(
+          buffer_[plane].rows(), buffer_[plane].columns() / sizeof(uint16_t),
+          reinterpret_cast<uint16_t*>(&buffer_[plane][0][0]));
+      start = &buffer[row_to_copy][column_start];
+    } else  // NOLINT
+#endif
+    {
+      start = &buffer_[plane][row_to_copy][column_start];
+    }
+    memcpy((*intra_prediction_buffer_)[plane].get() + column_start * pixel_size,
+           start, pixels_to_copy);
+  }
+}
+
+int Tile::GetTransformAllZeroContext(const Block& block, Plane plane,
+                                     TransformSize tx_size, int x4, int y4,
+                                     int w4, int h4) {
+  const int max_x4x4 = frame_header_.columns4x4 >> subsampling_x_[plane];
+  const int max_y4x4 = frame_header_.rows4x4 >> subsampling_y_[plane];
+
+  const int tx_width = kTransformWidth[tx_size];
+  const int tx_height = kTransformHeight[tx_size];
+  const BlockSize plane_size = block.residual_size[plane];
+  const int block_width = kBlockWidthPixels[plane_size];
+  const int block_height = kBlockHeightPixels[plane_size];
+
+  int top = 0;
+  int left = 0;
+  const int num_top_elements = GetNumElements(w4, x4, max_x4x4);
+  const int num_left_elements = GetNumElements(h4, y4, max_y4x4);
+  if (plane == kPlaneY) {
+    if (block_width == tx_width && block_height == tx_height) return 0;
+    const uint8_t* coefficient_levels =
+        &coefficient_levels_[kEntropyContextTop][plane][x4];
+    for (int i = 0; i < num_top_elements; ++i) {
+      top = std::max(top, static_cast<int>(coefficient_levels[i]));
+    }
+    coefficient_levels = &coefficient_levels_[kEntropyContextLeft][plane][y4];
+    for (int i = 0; i < num_left_elements; ++i) {
+      left = std::max(left, static_cast<int>(coefficient_levels[i]));
+    }
+    assert(top <= 4);
+    assert(left <= 4);
+    // kAllZeroContextsByTopLeft is pre-computed based on the logic in the spec
+    // for top and left.
+    return kAllZeroContextsByTopLeft[top][left];
+  }
+  const uint8_t* coefficient_levels =
+      &coefficient_levels_[kEntropyContextTop][plane][x4];
+  const int8_t* dc_categories = &dc_categories_[kEntropyContextTop][plane][x4];
+  for (int i = 0; i < num_top_elements; ++i) {
+    top |= coefficient_levels[i];
+    top |= dc_categories[i];
+  }
+  coefficient_levels = &coefficient_levels_[kEntropyContextLeft][plane][y4];
+  dc_categories = &dc_categories_[kEntropyContextLeft][plane][y4];
+  for (int i = 0; i < num_left_elements; ++i) {
+    left |= coefficient_levels[i];
+    left |= dc_categories[i];
+  }
+  return static_cast<int>(top != 0) + static_cast<int>(left != 0) + 7 +
+         3 * static_cast<int>(block_width * block_height >
+                              tx_width * tx_height);
+}
+
+TransformSet Tile::GetTransformSet(TransformSize tx_size, bool is_inter) const {
+  const TransformSize tx_size_square_min = kTransformSizeSquareMin[tx_size];
+  const TransformSize tx_size_square_max = kTransformSizeSquareMax[tx_size];
+  if (tx_size_square_max == kTransformSize64x64) return kTransformSetDctOnly;
+  if (is_inter) {
+    if (frame_header_.reduced_tx_set ||
+        tx_size_square_max == kTransformSize32x32) {
+      return kTransformSetInter3;
+    }
+    if (tx_size_square_min == kTransformSize16x16) return kTransformSetInter2;
+    return kTransformSetInter1;
+  }
+  if (tx_size_square_max == kTransformSize32x32) return kTransformSetDctOnly;
+  if (frame_header_.reduced_tx_set ||
+      tx_size_square_min == kTransformSize16x16) {
+    return kTransformSetIntra2;
+  }
+  return kTransformSetIntra1;
+}
+
+TransformType Tile::ComputeTransformType(const Block& block, Plane plane,
+                                         TransformSize tx_size, int block_x,
+                                         int block_y) {
+  const BlockParameters& bp = *block.bp;
+  const TransformSize tx_size_square_max = kTransformSizeSquareMax[tx_size];
+  if (frame_header_.segmentation.lossless[bp.segment_id] ||
+      tx_size_square_max == kTransformSize64x64) {
+    return kTransformTypeDctDct;
+  }
+  if (plane == kPlaneY) {
+    return transform_types_[block_y - block.row4x4][block_x - block.column4x4];
+  }
+  const TransformSet tx_set = GetTransformSet(tx_size, bp.is_inter);
+  TransformType tx_type;
+  if (bp.is_inter) {
+    const int x4 =
+        std::max(block.column4x4, block_x << subsampling_x_[kPlaneU]);
+    const int y4 = std::max(block.row4x4, block_y << subsampling_y_[kPlaneU]);
+    tx_type = transform_types_[y4 - block.row4x4][x4 - block.column4x4];
+  } else {
+    tx_type = kModeToTransformType[bp.uv_mode];
+  }
+  return kTransformTypeInSetMask[tx_set].Contains(tx_type)
+             ? tx_type
+             : kTransformTypeDctDct;
+}
+
+void Tile::ReadTransformType(const Block& block, int x4, int y4,
+                             TransformSize tx_size) {
+  BlockParameters& bp = *block.bp;
+  const TransformSet tx_set = GetTransformSet(tx_size, bp.is_inter);
+
+  TransformType tx_type = kTransformTypeDctDct;
+  if (tx_set != kTransformSetDctOnly &&
+      frame_header_.segmentation.qindex[bp.segment_id] > 0) {
+    const int cdf_index = SymbolDecoderContext::TxTypeIndex(tx_set);
+    const int cdf_tx_size_index =
+        TransformSizeToSquareTransformIndex(kTransformSizeSquareMin[tx_size]);
+    uint16_t* cdf;
+    if (bp.is_inter) {
+      cdf = symbol_decoder_context_
+                .inter_tx_type_cdf[cdf_index][cdf_tx_size_index];
+      switch (tx_set) {
+        case kTransformSetInter1:
+          tx_type = static_cast<TransformType>(reader_.ReadSymbol<16>(cdf));
+          break;
+        case kTransformSetInter2:
+          tx_type = static_cast<TransformType>(reader_.ReadSymbol<12>(cdf));
+          break;
+        default:
+          assert(tx_set == kTransformSetInter3);
+          tx_type = static_cast<TransformType>(reader_.ReadSymbol(cdf));
+          break;
+      }
+    } else {
+      const PredictionMode intra_direction =
+          block.bp->prediction_parameters->use_filter_intra
+              ? kFilterIntraModeToIntraPredictor[block.bp->prediction_parameters
+                                                     ->filter_intra_mode]
+              : bp.y_mode;
+      cdf =
+          symbol_decoder_context_
+              .intra_tx_type_cdf[cdf_index][cdf_tx_size_index][intra_direction];
+      assert(tx_set == kTransformSetIntra1 || tx_set == kTransformSetIntra2);
+      tx_type = static_cast<TransformType>((tx_set == kTransformSetIntra1)
+                                               ? reader_.ReadSymbol<7>(cdf)
+                                               : reader_.ReadSymbol<5>(cdf));
+    }
+
+    // This array does not contain an entry for kTransformSetDctOnly, so the
+    // first dimension needs to be offset by 1.
+    tx_type = kInverseTransformTypeBySet[tx_set - 1][tx_type];
+  }
+  SetTransformType(block, x4, y4, kTransformWidth4x4[tx_size],
+                   kTransformHeight4x4[tx_size], tx_type, transform_types_);
+}
+
+// Section 8.3.2 in the spec, under coeff_base and coeff_br.
+// Bottom boundary checks are avoided by the padded rows.
+// For a coefficient near the right boundary, the two right neighbors and the
+// one bottom-right neighbor may be out of boundary. We don't check the right
+// boundary for them, because the out of boundary neighbors project to positions
+// above the diagonal line which goes through the current coefficient and these
+// positions are still all 0s according to the diagonal scan order.
+template <typename ResidualType>
+void Tile::ReadCoeffBase2D(
+    const uint16_t* scan, TransformSize tx_size, int adjusted_tx_width_log2,
+    int eob,
+    uint16_t coeff_base_cdf[kCoeffBaseContexts][kCoeffBaseSymbolCount + 1],
+    uint16_t coeff_base_range_cdf[kCoeffBaseRangeContexts]
+                                 [kCoeffBaseRangeSymbolCount + 1],
+    ResidualType* const quantized_buffer, uint8_t* const level_buffer) {
+  const int tx_width = 1 << adjusted_tx_width_log2;
+  for (int i = eob - 2; i >= 1; --i) {
+    const uint16_t pos = scan[i];
+    const int row = pos >> adjusted_tx_width_log2;
+    const int column = pos & (tx_width - 1);
+    auto* const quantized = &quantized_buffer[pos];
+    auto* const levels = &level_buffer[pos];
+    const int neighbor_sum = 1 + levels[1] + levels[tx_width] +
+                             levels[tx_width + 1] + levels[2] +
+                             levels[MultiplyBy2(tx_width)];
+    const int context =
+        ((neighbor_sum > 7) ? 4 : DivideBy2(neighbor_sum)) +
+        kCoeffBaseContextOffset[tx_size][std::min(row, 4)][std::min(column, 4)];
+    int level =
+        reader_.ReadSymbol<kCoeffBaseSymbolCount>(coeff_base_cdf[context]);
+    levels[0] = level;
+    if (level > kNumQuantizerBaseLevels) {
+      // No need to clip quantized values to COEFF_BASE_RANGE + NUM_BASE_LEVELS
+      // + 1, because we clip the overall output to 6 and the unclipped
+      // quantized values will always result in an output of greater than 6.
+      int context = std::min(6, DivideBy2(1 + quantized[1] +          // {0, 1}
+                                          quantized[tx_width] +       // {1, 0}
+                                          quantized[tx_width + 1]));  // {1, 1}
+      context += 14 >> static_cast<int>((row | column) < 2);
+      level += ReadCoeffBaseRange(coeff_base_range_cdf[context]);
+    }
+    quantized[0] = level;
+  }
+  // Read position 0.
+  {
+    auto* const quantized = &quantized_buffer[0];
+    int level = reader_.ReadSymbol<kCoeffBaseSymbolCount>(coeff_base_cdf[0]);
+    level_buffer[0] = level;
+    if (level > kNumQuantizerBaseLevels) {
+      // No need to clip quantized values to COEFF_BASE_RANGE + NUM_BASE_LEVELS
+      // + 1, because we clip the overall output to 6 and the unclipped
+      // quantized values will always result in an output of greater than 6.
+      const int context =
+          std::min(6, DivideBy2(1 + quantized[1] +          // {0, 1}
+                                quantized[tx_width] +       // {1, 0}
+                                quantized[tx_width + 1]));  // {1, 1}
+      level += ReadCoeffBaseRange(coeff_base_range_cdf[context]);
+    }
+    quantized[0] = level;
+  }
+}
+
+// Section 8.3.2 in the spec, under coeff_base and coeff_br.
+// Bottom boundary checks are avoided by the padded rows.
+// For a coefficient near the right boundary, the four right neighbors may be
+// out of boundary. We don't do the boundary check for the first three right
+// neighbors, because even for the transform blocks with smallest width 4, the
+// first three out of boundary neighbors project to positions left of the
+// current coefficient and these positions are still all 0s according to the
+// column scan order. However, when transform block width is 4 and the current
+// coefficient is on the right boundary, its fourth right neighbor projects to
+// the under position on the same column, which could be nonzero. Therefore, we
+// must skip the fourth right neighbor. To make it simple, for any coefficient,
+// we always do the boundary check for its fourth right neighbor.
+template <typename ResidualType>
+void Tile::ReadCoeffBaseHorizontal(
+    const uint16_t* scan, TransformSize /*tx_size*/, int adjusted_tx_width_log2,
+    int eob,
+    uint16_t coeff_base_cdf[kCoeffBaseContexts][kCoeffBaseSymbolCount + 1],
+    uint16_t coeff_base_range_cdf[kCoeffBaseRangeContexts]
+                                 [kCoeffBaseRangeSymbolCount + 1],
+    ResidualType* const quantized_buffer, uint8_t* const level_buffer) {
+  const int tx_width = 1 << adjusted_tx_width_log2;
+  int i = eob - 2;
+  do {
+    const uint16_t pos = scan[i];
+    const int column = pos & (tx_width - 1);
+    auto* const quantized = &quantized_buffer[pos];
+    auto* const levels = &level_buffer[pos];
+    const int neighbor_sum =
+        1 + (levels[1] +                                  // {0, 1}
+             levels[tx_width] +                           // {1, 0}
+             levels[2] +                                  // {0, 2}
+             levels[3] +                                  // {0, 3}
+             ((column + 4 < tx_width) ? levels[4] : 0));  // {0, 4}
+    const int context = ((neighbor_sum > 7) ? 4 : DivideBy2(neighbor_sum)) +
+                        kCoeffBasePositionContextOffset[column];
+    int level =
+        reader_.ReadSymbol<kCoeffBaseSymbolCount>(coeff_base_cdf[context]);
+    levels[0] = level;
+    if (level > kNumQuantizerBaseLevels) {
+      // No need to clip quantized values to COEFF_BASE_RANGE + NUM_BASE_LEVELS
+      // + 1, because we clip the overall output to 6 and the unclipped
+      // quantized values will always result in an output of greater than 6.
+      int context = std::min(6, DivideBy2(1 + quantized[1] +     // {0, 1}
+                                          quantized[tx_width] +  // {1, 0}
+                                          quantized[2]));        // {0, 2}
+      if (pos != 0) {
+        context += 14 >> static_cast<int>(column == 0);
+      }
+      level += ReadCoeffBaseRange(coeff_base_range_cdf[context]);
+    }
+    quantized[0] = level;
+  } while (--i >= 0);
+}
+
+// Section 8.3.2 in the spec, under coeff_base and coeff_br.
+// Bottom boundary checks are avoided by the padded rows.
+// Right boundary check is performed explicitly.
+template <typename ResidualType>
+void Tile::ReadCoeffBaseVertical(
+    const uint16_t* scan, TransformSize /*tx_size*/, int adjusted_tx_width_log2,
+    int eob,
+    uint16_t coeff_base_cdf[kCoeffBaseContexts][kCoeffBaseSymbolCount + 1],
+    uint16_t coeff_base_range_cdf[kCoeffBaseRangeContexts]
+                                 [kCoeffBaseRangeSymbolCount + 1],
+    ResidualType* const quantized_buffer, uint8_t* const level_buffer) {
+  const int tx_width = 1 << adjusted_tx_width_log2;
+  int i = eob - 2;
+  do {
+    const uint16_t pos = scan[i];
+    const int row = pos >> adjusted_tx_width_log2;
+    const int column = pos & (tx_width - 1);
+    auto* const quantized = &quantized_buffer[pos];
+    auto* const levels = &level_buffer[pos];
+    const int neighbor_sum =
+        1 + (((column + 1 < tx_width) ? levels[1] : 0) +  // {0, 1}
+             levels[tx_width] +                           // {1, 0}
+             levels[MultiplyBy2(tx_width)] +              // {2, 0}
+             levels[tx_width * 3] +                       // {3, 0}
+             levels[MultiplyBy4(tx_width)]);              // {4, 0}
+    const int context = ((neighbor_sum > 7) ? 4 : DivideBy2(neighbor_sum)) +
+                        kCoeffBasePositionContextOffset[row];
+    int level =
+        reader_.ReadSymbol<kCoeffBaseSymbolCount>(coeff_base_cdf[context]);
+    levels[0] = level;
+    if (level > kNumQuantizerBaseLevels) {
+      // No need to clip quantized values to COEFF_BASE_RANGE + NUM_BASE_LEVELS
+      // + 1, because we clip the overall output to 6 and the unclipped
+      // quantized values will always result in an output of greater than 6.
+      const int quantized_column1 = (column + 1 < tx_width) ? quantized[1] : 0;
+      int context =
+          std::min(6, DivideBy2(1 + quantized_column1 +              // {0, 1}
+                                quantized[tx_width] +                // {1, 0}
+                                quantized[MultiplyBy2(tx_width)]));  // {2, 0}
+      if (pos != 0) {
+        context += 14 >> static_cast<int>(row == 0);
+      }
+      level += ReadCoeffBaseRange(coeff_base_range_cdf[context]);
+    }
+    quantized[0] = level;
+  } while (--i >= 0);
+}
+
+int Tile::GetDcSignContext(int x4, int y4, int w4, int h4, Plane plane) {
+  const int max_x4x4 = frame_header_.columns4x4 >> subsampling_x_[plane];
+  const int8_t* dc_categories = &dc_categories_[kEntropyContextTop][plane][x4];
+  // Set dc_sign to 8-bit long so that std::accumulate() saves sign extension.
+  int8_t dc_sign = std::accumulate(
+      dc_categories, dc_categories + GetNumElements(w4, x4, max_x4x4), 0);
+  const int max_y4x4 = frame_header_.rows4x4 >> subsampling_y_[plane];
+  dc_categories = &dc_categories_[kEntropyContextLeft][plane][y4];
+  dc_sign = std::accumulate(
+      dc_categories, dc_categories + GetNumElements(h4, y4, max_y4x4), dc_sign);
+  // This return statement is equivalent to:
+  //   if (dc_sign < 0) return 1;
+  //   if (dc_sign > 0) return 2;
+  //   return 0;
+  // And it is better than:
+  //   return static_cast<int>(dc_sign != 0) + static_cast<int>(dc_sign > 0);
+  return static_cast<int>(dc_sign < 0) +
+         MultiplyBy2(static_cast<int>(dc_sign > 0));
+}
+
+void Tile::SetEntropyContexts(int x4, int y4, int w4, int h4, Plane plane,
+                              uint8_t coefficient_level, int8_t dc_category) {
+  const int max_x4x4 = frame_header_.columns4x4 >> subsampling_x_[plane];
+  const int num_top_elements = GetNumElements(w4, x4, max_x4x4);
+  memset(&coefficient_levels_[kEntropyContextTop][plane][x4], coefficient_level,
+         num_top_elements);
+  memset(&dc_categories_[kEntropyContextTop][plane][x4], dc_category,
+         num_top_elements);
+  const int max_y4x4 = frame_header_.rows4x4 >> subsampling_y_[plane];
+  const int num_left_elements = GetNumElements(h4, y4, max_y4x4);
+  memset(&coefficient_levels_[kEntropyContextLeft][plane][y4],
+         coefficient_level, num_left_elements);
+  memset(&dc_categories_[kEntropyContextLeft][plane][y4], dc_category,
+         num_left_elements);
+}
+
+template <typename ResidualType, bool is_dc_coefficient>
+bool Tile::ReadSignAndApplyDequantization(
+    const uint16_t* const scan, int i, int q_value,
+    const uint8_t* const quantizer_matrix, int shift, int max_value,
+    uint16_t* const dc_sign_cdf, int8_t* const dc_category,
+    int* const coefficient_level, ResidualType* residual_buffer) {
+  const int pos = is_dc_coefficient ? 0 : scan[i];
+  // If residual_buffer[pos] is zero, then the rest of the function has no
+  // effect.
+  int level = residual_buffer[pos];
+  if (level == 0) return true;
+  const int sign = is_dc_coefficient
+                       ? static_cast<int>(reader_.ReadSymbol(dc_sign_cdf))
+                       : reader_.ReadBit();
+  if (level > kNumQuantizerBaseLevels + kQuantizerCoefficientBaseRange) {
+    int length = 0;
+    bool golomb_length_bit = false;
+    do {
+      golomb_length_bit = static_cast<bool>(reader_.ReadBit());
+      ++length;
+      if (length > 20) {
+        LIBGAV1_DLOG(ERROR, "Invalid golomb_length %d", length);
+        return false;
+      }
+    } while (!golomb_length_bit);
+    int x = 1;
+    for (int i = length - 2; i >= 0; --i) {
+      x = (x << 1) | reader_.ReadBit();
+    }
+    level += x - 1;
+  }
+  if (is_dc_coefficient) {
+    *dc_category = (sign != 0) ? -1 : 1;
+  }
+  level &= 0xfffff;
+  *coefficient_level += level;
+  // Apply dequantization. Step 1 of section 7.12.3 in the spec.
+  int q = q_value;
+  if (quantizer_matrix != nullptr) {
+    q = RightShiftWithRounding(q * quantizer_matrix[pos], 5);
+  }
+  // The intermediate multiplication can exceed 32 bits, so it has to be
+  // performed by promoting one of the values to int64_t.
+  int32_t dequantized_value = (static_cast<int64_t>(q) * level) & 0xffffff;
+  dequantized_value >>= shift;
+  // At this point:
+  //   * |dequantized_value| is always non-negative.
+  //   * |sign| can be either 0 or 1.
+  //   * min_value = -(max_value + 1).
+  // We need to apply the following:
+  // dequantized_value = sign ? -dequantized_value : dequantized_value;
+  // dequantized_value = Clip3(dequantized_value, min_value, max_value);
+  //
+  // Note that -x == ~(x - 1).
+  //
+  // Now, The above two lines can be done with a std::min and xor as follows:
+  dequantized_value = std::min(dequantized_value - sign, max_value) ^ -sign;
+  residual_buffer[pos] = dequantized_value;
+  return true;
+}
+
+int Tile::ReadCoeffBaseRange(uint16_t* cdf) {
+  int level = 0;
+  for (int j = 0; j < kCoeffBaseRangeMaxIterations; ++j) {
+    const int coeff_base_range =
+        reader_.ReadSymbol<kCoeffBaseRangeSymbolCount>(cdf);
+    level += coeff_base_range;
+    if (coeff_base_range < (kCoeffBaseRangeSymbolCount - 1)) break;
+  }
+  return level;
+}
+
+template <typename ResidualType>
+int Tile::ReadTransformCoefficients(const Block& block, Plane plane,
+                                    int start_x, int start_y,
+                                    TransformSize tx_size,
+                                    TransformType* const tx_type) {
+  const int x4 = DivideBy4(start_x);
+  const int y4 = DivideBy4(start_y);
+  const int w4 = kTransformWidth4x4[tx_size];
+  const int h4 = kTransformHeight4x4[tx_size];
+  const int tx_size_context = kTransformSizeContext[tx_size];
+  int context =
+      GetTransformAllZeroContext(block, plane, tx_size, x4, y4, w4, h4);
+  const bool all_zero = reader_.ReadSymbol(
+      symbol_decoder_context_.all_zero_cdf[tx_size_context][context]);
+  if (all_zero) {
+    if (plane == kPlaneY) {
+      SetTransformType(block, x4, y4, w4, h4, kTransformTypeDctDct,
+                       transform_types_);
+    }
+    SetEntropyContexts(x4, y4, w4, h4, plane, 0, 0);
+    // This is not used in this case, so it can be set to any value.
+    *tx_type = kNumTransformTypes;
+    return 0;
+  }
+  const int tx_width = kTransformWidth[tx_size];
+  const int tx_height = kTransformHeight[tx_size];
+  const TransformSize adjusted_tx_size = kAdjustedTransformSize[tx_size];
+  const int adjusted_tx_width_log2 = kTransformWidthLog2[adjusted_tx_size];
+  const int tx_padding =
+      (1 << adjusted_tx_width_log2) * kResidualPaddingVertical;
+  auto* residual = reinterpret_cast<ResidualType*>(*block.residual);
+  // Clear padding to avoid bottom boundary checks when parsing quantized
+  // coefficients.
+  memset(residual, 0, (tx_width * tx_height + tx_padding) * residual_size_);
+  uint8_t level_buffer[(32 + kResidualPaddingVertical) * 32];
+  memset(
+      level_buffer, 0,
+      kTransformWidth[adjusted_tx_size] * kTransformHeight[adjusted_tx_size] +
+          tx_padding);
+  const int clamped_tx_height = std::min(tx_height, 32);
+  if (plane == kPlaneY) {
+    ReadTransformType(block, x4, y4, tx_size);
+  }
+  BlockParameters& bp = *block.bp;
+  *tx_type = ComputeTransformType(block, plane, tx_size, x4, y4);
+  const int eob_multi_size = kEobMultiSizeLookup[tx_size];
+  const PlaneType plane_type = GetPlaneType(plane);
+  const TransformClass tx_class = GetTransformClass(*tx_type);
+  context = static_cast<int>(tx_class != kTransformClass2D);
+  int eob_pt = 1;
+  switch (eob_multi_size) {
+    case 0:
+      eob_pt += reader_.ReadSymbol<kEobPt16SymbolCount>(
+          symbol_decoder_context_.eob_pt_16_cdf[plane_type][context]);
+      break;
+    case 1:
+      eob_pt += reader_.ReadSymbol<kEobPt32SymbolCount>(
+          symbol_decoder_context_.eob_pt_32_cdf[plane_type][context]);
+      break;
+    case 2:
+      eob_pt += reader_.ReadSymbol<kEobPt64SymbolCount>(
+          symbol_decoder_context_.eob_pt_64_cdf[plane_type][context]);
+      break;
+    case 3:
+      eob_pt += reader_.ReadSymbol<kEobPt128SymbolCount>(
+          symbol_decoder_context_.eob_pt_128_cdf[plane_type][context]);
+      break;
+    case 4:
+      eob_pt += reader_.ReadSymbol<kEobPt256SymbolCount>(
+          symbol_decoder_context_.eob_pt_256_cdf[plane_type][context]);
+      break;
+    case 5:
+      eob_pt += reader_.ReadSymbol<kEobPt512SymbolCount>(
+          symbol_decoder_context_.eob_pt_512_cdf[plane_type]);
+      break;
+    case 6:
+    default:
+      eob_pt += reader_.ReadSymbol<kEobPt1024SymbolCount>(
+          symbol_decoder_context_.eob_pt_1024_cdf[plane_type]);
+      break;
+  }
+  int eob = (eob_pt < 2) ? eob_pt : ((1 << (eob_pt - 2)) + 1);
+  if (eob_pt >= 3) {
+    context = eob_pt - 3;
+    const bool eob_extra = reader_.ReadSymbol(
+        symbol_decoder_context_
+            .eob_extra_cdf[tx_size_context][plane_type][context]);
+    if (eob_extra) eob += 1 << (eob_pt - 3);
+    for (int i = 1; i < eob_pt - 2; ++i) {
+      assert(eob_pt - i >= 3);
+      assert(eob_pt <= kEobPt1024SymbolCount);
+      if (static_cast<bool>(reader_.ReadBit())) {
+        eob += 1 << (eob_pt - i - 3);
+      }
+    }
+  }
+  const uint16_t* scan = kScan[tx_class][tx_size];
+  const int clamped_tx_size_context = std::min(tx_size_context, 3);
+  auto coeff_base_range_cdf =
+      symbol_decoder_context_
+          .coeff_base_range_cdf[clamped_tx_size_context][plane_type];
+  // Read the last coefficient.
+  {
+    context = GetCoeffBaseContextEob(tx_size, eob - 1);
+    const uint16_t pos = scan[eob - 1];
+    int level =
+        1 + reader_.ReadSymbol<kCoeffBaseEobSymbolCount>(
+                symbol_decoder_context_
+                    .coeff_base_eob_cdf[tx_size_context][plane_type][context]);
+    level_buffer[pos] = level;
+    if (level > kNumQuantizerBaseLevels) {
+      level +=
+          ReadCoeffBaseRange(coeff_base_range_cdf[GetCoeffBaseRangeContextEob(
+              adjusted_tx_width_log2, pos, tx_class)]);
+    }
+    residual[pos] = level;
+  }
+  if (eob > 1) {
+    // Read all the other coefficients.
+    // Lookup used to call the right variant of ReadCoeffBase*() based on the
+    // transform class.
+    static constexpr void (Tile::*kGetCoeffBaseFunc[])(
+        const uint16_t* scan, TransformSize tx_size, int adjusted_tx_width_log2,
+        int eob,
+        uint16_t coeff_base_cdf[kCoeffBaseContexts][kCoeffBaseSymbolCount + 1],
+        uint16_t coeff_base_range_cdf[kCoeffBaseRangeContexts]
+                                     [kCoeffBaseRangeSymbolCount + 1],
+        ResidualType* quantized_buffer,
+        uint8_t* level_buffer) = {&Tile::ReadCoeffBase2D<ResidualType>,
+                                  &Tile::ReadCoeffBaseHorizontal<ResidualType>,
+                                  &Tile::ReadCoeffBaseVertical<ResidualType>};
+    (this->*kGetCoeffBaseFunc[tx_class])(
+        scan, tx_size, adjusted_tx_width_log2, eob,
+        symbol_decoder_context_.coeff_base_cdf[tx_size_context][plane_type],
+        coeff_base_range_cdf, residual, level_buffer);
+  }
+  const int max_value = (1 << (7 + sequence_header_.color_config.bitdepth)) - 1;
+  const int current_quantizer_index = GetQIndex(
+      frame_header_.segmentation, bp.segment_id, current_quantizer_index_);
+  const int dc_q_value = quantizer_.GetDcValue(plane, current_quantizer_index);
+  const int ac_q_value = quantizer_.GetAcValue(plane, current_quantizer_index);
+  const int shift = kQuantizationShift[tx_size];
+  const uint8_t* const quantizer_matrix =
+      (frame_header_.quantizer.use_matrix &&
+       *tx_type < kTransformTypeIdentityIdentity &&
+       !frame_header_.segmentation.lossless[bp.segment_id] &&
+       frame_header_.quantizer.matrix_level[plane] < 15)
+          ? quantizer_matrix_[frame_header_.quantizer.matrix_level[plane]]
+                             [plane_type][adjusted_tx_size]
+                                 .get()
+          : nullptr;
+  int coefficient_level = 0;
+  int8_t dc_category = 0;
+  uint16_t* const dc_sign_cdf =
+      (residual[0] != 0)
+          ? symbol_decoder_context_.dc_sign_cdf[plane_type][GetDcSignContext(
+                x4, y4, w4, h4, plane)]
+          : nullptr;
+  assert(scan[0] == 0);
+  if (!ReadSignAndApplyDequantization<ResidualType, /*is_dc_coefficient=*/true>(
+          scan, 0, dc_q_value, quantizer_matrix, shift, max_value, dc_sign_cdf,
+          &dc_category, &coefficient_level, residual)) {
+    return -1;
+  }
+  if (eob > 1) {
+    int i = 1;
+    do {
+      if (!ReadSignAndApplyDequantization<ResidualType,
+                                          /*is_dc_coefficient=*/false>(
+              scan, i, ac_q_value, quantizer_matrix, shift, max_value, nullptr,
+              nullptr, &coefficient_level, residual)) {
+        return -1;
+      }
+    } while (++i < eob);
+    MoveCoefficientsForTxWidth64(clamped_tx_height, tx_width, residual);
+  }
+  SetEntropyContexts(x4, y4, w4, h4, plane, std::min(4, coefficient_level),
+                     dc_category);
+  if (split_parse_and_decode_) {
+    *block.residual += tx_width * tx_height * residual_size_;
+  }
+  return eob;
+}
+
+// CALL_BITDEPTH_FUNCTION is a macro that calls the appropriate template
+// |function| depending on the value of |sequence_header_.color_config.bitdepth|
+// with the variadic arguments.
+#if LIBGAV1_MAX_BITDEPTH >= 10
+#define CALL_BITDEPTH_FUNCTION(function, ...)         \
+  do {                                                \
+    if (sequence_header_.color_config.bitdepth > 8) { \
+      function<uint16_t>(__VA_ARGS__);                \
+    } else {                                          \
+      function<uint8_t>(__VA_ARGS__);                 \
+    }                                                 \
+  } while (false)
+#else
+#define CALL_BITDEPTH_FUNCTION(function, ...) \
+  do {                                        \
+    function<uint8_t>(__VA_ARGS__);           \
+  } while (false)
+#endif
+
+bool Tile::TransformBlock(const Block& block, Plane plane, int base_x,
+                          int base_y, TransformSize tx_size, int x, int y,
+                          ProcessingMode mode) {
+  BlockParameters& bp = *block.bp;
+  const int subsampling_x = subsampling_x_[plane];
+  const int subsampling_y = subsampling_y_[plane];
+  const int start_x = base_x + MultiplyBy4(x);
+  const int start_y = base_y + MultiplyBy4(y);
+  const int max_x = MultiplyBy4(frame_header_.columns4x4) >> subsampling_x;
+  const int max_y = MultiplyBy4(frame_header_.rows4x4) >> subsampling_y;
+  if (start_x >= max_x || start_y >= max_y) return true;
+  const int row = DivideBy4(start_y << subsampling_y);
+  const int column = DivideBy4(start_x << subsampling_x);
+  const int mask = sequence_header_.use_128x128_superblock ? 31 : 15;
+  const int sub_block_row4x4 = row & mask;
+  const int sub_block_column4x4 = column & mask;
+  const int step_x = kTransformWidth4x4[tx_size];
+  const int step_y = kTransformHeight4x4[tx_size];
+  const bool do_decode = mode == kProcessingModeDecodeOnly ||
+                         mode == kProcessingModeParseAndDecode;
+  if (do_decode && !bp.is_inter) {
+    if (bp.palette_mode_info.size[GetPlaneType(plane)] > 0) {
+      CALL_BITDEPTH_FUNCTION(PalettePrediction, block, plane, start_x, start_y,
+                             x, y, tx_size);
+    } else {
+      const PredictionMode mode =
+          (plane == kPlaneY)
+              ? bp.y_mode
+              : (bp.uv_mode == kPredictionModeChromaFromLuma ? kPredictionModeDc
+                                                             : bp.uv_mode);
+      const int tr_row4x4 = (sub_block_row4x4 >> subsampling_y);
+      const int tr_column4x4 =
+          (sub_block_column4x4 >> subsampling_x) + step_x + 1;
+      const int bl_row4x4 = (sub_block_row4x4 >> subsampling_y) + step_y + 1;
+      const int bl_column4x4 = (sub_block_column4x4 >> subsampling_x);
+      const bool has_left = x > 0 || block.left_available[plane];
+      const bool has_top = y > 0 || block.top_available[plane];
+
+      CALL_BITDEPTH_FUNCTION(
+          IntraPrediction, block, plane, start_x, start_y, has_left, has_top,
+          block.scratch_buffer->block_decoded[plane][tr_row4x4][tr_column4x4],
+          block.scratch_buffer->block_decoded[plane][bl_row4x4][bl_column4x4],
+          mode, tx_size);
+      if (plane != kPlaneY && bp.uv_mode == kPredictionModeChromaFromLuma) {
+        CALL_BITDEPTH_FUNCTION(ChromaFromLumaPrediction, block, plane, start_x,
+                               start_y, tx_size);
+      }
+    }
+    if (plane == kPlaneY) {
+      block.bp->prediction_parameters->max_luma_width =
+          start_x + MultiplyBy4(step_x);
+      block.bp->prediction_parameters->max_luma_height =
+          start_y + MultiplyBy4(step_y);
+      block.scratch_buffer->cfl_luma_buffer_valid = false;
+    }
+  }
+  if (!bp.skip) {
+    const int sb_row_index = SuperBlockRowIndex(block.row4x4);
+    const int sb_column_index = SuperBlockColumnIndex(block.column4x4);
+    if (mode == kProcessingModeDecodeOnly) {
+      TransformParameterQueue& tx_params =
+          *residual_buffer_threaded_[sb_row_index][sb_column_index]
+               ->transform_parameters();
+      ReconstructBlock(block, plane, start_x, start_y, tx_size,
+                       tx_params.Type(), tx_params.NonZeroCoeffCount());
+      tx_params.Pop();
+    } else {
+      TransformType tx_type;
+      int non_zero_coeff_count;
+#if LIBGAV1_MAX_BITDEPTH >= 10
+      if (sequence_header_.color_config.bitdepth > 8) {
+        non_zero_coeff_count = ReadTransformCoefficients<int32_t>(
+            block, plane, start_x, start_y, tx_size, &tx_type);
+      } else  // NOLINT
+#endif
+      {
+        non_zero_coeff_count = ReadTransformCoefficients<int16_t>(
+            block, plane, start_x, start_y, tx_size, &tx_type);
+      }
+      if (non_zero_coeff_count < 0) return false;
+      if (mode == kProcessingModeParseAndDecode) {
+        ReconstructBlock(block, plane, start_x, start_y, tx_size, tx_type,
+                         non_zero_coeff_count);
+      } else {
+        assert(mode == kProcessingModeParseOnly);
+        residual_buffer_threaded_[sb_row_index][sb_column_index]
+            ->transform_parameters()
+            ->Push(non_zero_coeff_count, tx_type);
+      }
+    }
+  }
+  if (do_decode) {
+    bool* block_decoded =
+        &block.scratch_buffer
+             ->block_decoded[plane][(sub_block_row4x4 >> subsampling_y) + 1]
+                            [(sub_block_column4x4 >> subsampling_x) + 1];
+    SetBlockValues<bool>(step_y, step_x, true, block_decoded,
+                         TileScratchBuffer::kBlockDecodedStride);
+  }
+  return true;
+}
+
+bool Tile::TransformTree(const Block& block, int start_x, int start_y,
+                         BlockSize plane_size, ProcessingMode mode) {
+  assert(plane_size <= kBlock64x64);
+  // Branching factor is 4; Maximum Depth is 4; So the maximum stack size
+  // required is (4 - 1) * 4 + 1 = 13.
+  Stack<TransformTreeNode, 13> stack;
+  // It is okay to cast BlockSize to TransformSize here since the enum are
+  // equivalent for all BlockSize values <= kBlock64x64.
+  stack.Push(TransformTreeNode(start_x, start_y,
+                               static_cast<TransformSize>(plane_size)));
+
+  do {
+    TransformTreeNode node = stack.Pop();
+    const int row = DivideBy4(node.y);
+    const int column = DivideBy4(node.x);
+    if (row >= frame_header_.rows4x4 || column >= frame_header_.columns4x4) {
+      continue;
+    }
+    const TransformSize inter_tx_size = inter_transform_sizes_[row][column];
+    const int width = kTransformWidth[node.tx_size];
+    const int height = kTransformHeight[node.tx_size];
+    if (width <= kTransformWidth[inter_tx_size] &&
+        height <= kTransformHeight[inter_tx_size]) {
+      if (!TransformBlock(block, kPlaneY, node.x, node.y, node.tx_size, 0, 0,
+                          mode)) {
+        return false;
+      }
+      continue;
+    }
+    // The split transform size look up gives the right transform size that we
+    // should push in the stack.
+    //   if (width > height) => transform size whose width is half.
+    //   if (width < height) => transform size whose height is half.
+    //   if (width == height) => transform size whose width and height are half.
+    const TransformSize split_tx_size = kSplitTransformSize[node.tx_size];
+    const int half_width = DivideBy2(width);
+    if (width > height) {
+      stack.Push(TransformTreeNode(node.x + half_width, node.y, split_tx_size));
+      stack.Push(TransformTreeNode(node.x, node.y, split_tx_size));
+      continue;
+    }
+    const int half_height = DivideBy2(height);
+    if (width < height) {
+      stack.Push(
+          TransformTreeNode(node.x, node.y + half_height, split_tx_size));
+      stack.Push(TransformTreeNode(node.x, node.y, split_tx_size));
+      continue;
+    }
+    stack.Push(TransformTreeNode(node.x + half_width, node.y + half_height,
+                                 split_tx_size));
+    stack.Push(TransformTreeNode(node.x, node.y + half_height, split_tx_size));
+    stack.Push(TransformTreeNode(node.x + half_width, node.y, split_tx_size));
+    stack.Push(TransformTreeNode(node.x, node.y, split_tx_size));
+  } while (!stack.Empty());
+  return true;
+}
+
+void Tile::ReconstructBlock(const Block& block, Plane plane, int start_x,
+                            int start_y, TransformSize tx_size,
+                            TransformType tx_type, int non_zero_coeff_count) {
+  // Reconstruction process. Steps 2 and 3 of Section 7.12.3 in the spec.
+  assert(non_zero_coeff_count >= 0);
+  if (non_zero_coeff_count == 0) return;
+#if LIBGAV1_MAX_BITDEPTH >= 10
+  if (sequence_header_.color_config.bitdepth > 8) {
+    Array2DView<uint16_t> buffer(
+        buffer_[plane].rows(), buffer_[plane].columns() / sizeof(uint16_t),
+        reinterpret_cast<uint16_t*>(&buffer_[plane][0][0]));
+    Reconstruct(dsp_, tx_type, tx_size,
+                frame_header_.segmentation.lossless[block.bp->segment_id],
+                reinterpret_cast<int32_t*>(*block.residual), start_x, start_y,
+                &buffer, non_zero_coeff_count);
+  } else  // NOLINT
+#endif
+  {
+    Reconstruct(dsp_, tx_type, tx_size,
+                frame_header_.segmentation.lossless[block.bp->segment_id],
+                reinterpret_cast<int16_t*>(*block.residual), start_x, start_y,
+                &buffer_[plane], non_zero_coeff_count);
+  }
+  if (split_parse_and_decode_) {
+    *block.residual +=
+        kTransformWidth[tx_size] * kTransformHeight[tx_size] * residual_size_;
+  }
+}
+
+bool Tile::Residual(const Block& block, ProcessingMode mode) {
+  const int width_chunks = std::max(1, block.width >> 6);
+  const int height_chunks = std::max(1, block.height >> 6);
+  const BlockSize size_chunk4x4 =
+      (width_chunks > 1 || height_chunks > 1) ? kBlock64x64 : block.size;
+  const BlockParameters& bp = *block.bp;
+  for (int chunk_y = 0; chunk_y < height_chunks; ++chunk_y) {
+    for (int chunk_x = 0; chunk_x < width_chunks; ++chunk_x) {
+      const int num_planes = block.HasChroma() ? PlaneCount() : 1;
+      int plane = kPlaneY;
+      do {
+        const int subsampling_x = subsampling_x_[plane];
+        const int subsampling_y = subsampling_y_[plane];
+        // For Y Plane, when lossless is true |bp.transform_size| is always
+        // kTransformSize4x4. So we can simply use |bp.transform_size| here as
+        // the Y plane's transform size (part of Section 5.11.37 in the spec).
+        const TransformSize tx_size =
+            (plane == kPlaneY) ? bp.transform_size : bp.uv_transform_size;
+        const BlockSize plane_size =
+            kPlaneResidualSize[size_chunk4x4][subsampling_x][subsampling_y];
+        assert(plane_size != kBlockInvalid);
+        if (bp.is_inter &&
+            !frame_header_.segmentation.lossless[bp.segment_id] &&
+            plane == kPlaneY) {
+          const int row_chunk4x4 = block.row4x4 + MultiplyBy16(chunk_y);
+          const int column_chunk4x4 = block.column4x4 + MultiplyBy16(chunk_x);
+          const int base_x = MultiplyBy4(column_chunk4x4 >> subsampling_x);
+          const int base_y = MultiplyBy4(row_chunk4x4 >> subsampling_y);
+          if (!TransformTree(block, base_x, base_y, plane_size, mode)) {
+            return false;
+          }
+        } else {
+          const int base_x = MultiplyBy4(block.column4x4 >> subsampling_x);
+          const int base_y = MultiplyBy4(block.row4x4 >> subsampling_y);
+          const int step_x = kTransformWidth4x4[tx_size];
+          const int step_y = kTransformHeight4x4[tx_size];
+          const int num4x4_wide = kNum4x4BlocksWide[plane_size];
+          const int num4x4_high = kNum4x4BlocksHigh[plane_size];
+          for (int y = 0; y < num4x4_high; y += step_y) {
+            for (int x = 0; x < num4x4_wide; x += step_x) {
+              if (!TransformBlock(
+                      block, static_cast<Plane>(plane), base_x, base_y, tx_size,
+                      x + (MultiplyBy16(chunk_x) >> subsampling_x),
+                      y + (MultiplyBy16(chunk_y) >> subsampling_y), mode)) {
+                return false;
+              }
+            }
+          }
+        }
+      } while (++plane < num_planes);
+    }
+  }
+  return true;
+}
+
+// The purpose of this function is to limit the maximum size of motion vectors
+// and also, if use_intra_block_copy is true, to additionally constrain the
+// motion vector so that the data is fetched from parts of the tile that have
+// already been decoded and are not too close to the current block (in order to
+// make a pipelined decoder implementation feasible).
+bool Tile::IsMvValid(const Block& block, bool is_compound) const {
+  const BlockParameters& bp = *block.bp;
+  for (int i = 0; i < 1 + static_cast<int>(is_compound); ++i) {
+    for (int mv_component : bp.mv.mv[i].mv) {
+      if (std::abs(mv_component) >= (1 << 14)) {
+        return false;
+      }
+    }
+  }
+  if (!block.bp->prediction_parameters->use_intra_block_copy) {
+    return true;
+  }
+  if ((bp.mv.mv[0].mv32 & 0x00070007) != 0) {
+    return false;
+  }
+  const int delta_row = bp.mv.mv[0].mv[0] >> 3;
+  const int delta_column = bp.mv.mv[0].mv[1] >> 3;
+  int src_top_edge = MultiplyBy4(block.row4x4) + delta_row;
+  int src_left_edge = MultiplyBy4(block.column4x4) + delta_column;
+  const int src_bottom_edge = src_top_edge + block.height;
+  const int src_right_edge = src_left_edge + block.width;
+  if (block.HasChroma()) {
+    if (block.width < 8 && subsampling_x_[kPlaneU] != 0) {
+      src_left_edge -= 4;
+    }
+    if (block.height < 8 && subsampling_y_[kPlaneU] != 0) {
+      src_top_edge -= 4;
+    }
+  }
+  if (src_top_edge < MultiplyBy4(row4x4_start_) ||
+      src_left_edge < MultiplyBy4(column4x4_start_) ||
+      src_bottom_edge > MultiplyBy4(row4x4_end_) ||
+      src_right_edge > MultiplyBy4(column4x4_end_)) {
+    return false;
+  }
+  // sb_height_log2 = use_128x128_superblock ? log2(128) : log2(64)
+  const int sb_height_log2 =
+      6 + static_cast<int>(sequence_header_.use_128x128_superblock);
+  const int active_sb_row = MultiplyBy4(block.row4x4) >> sb_height_log2;
+  const int active_64x64_block_column = MultiplyBy4(block.column4x4) >> 6;
+  const int src_sb_row = (src_bottom_edge - 1) >> sb_height_log2;
+  const int src_64x64_block_column = (src_right_edge - 1) >> 6;
+  const int total_64x64_blocks_per_row =
+      ((column4x4_end_ - column4x4_start_ - 1) >> 4) + 1;
+  const int active_64x64_block =
+      active_sb_row * total_64x64_blocks_per_row + active_64x64_block_column;
+  const int src_64x64_block =
+      src_sb_row * total_64x64_blocks_per_row + src_64x64_block_column;
+  if (src_64x64_block >= active_64x64_block - kIntraBlockCopyDelay64x64Blocks) {
+    return false;
+  }
+
+  // Wavefront constraint: use only top left area of frame for reference.
+  if (src_sb_row > active_sb_row) return false;
+  const int gradient =
+      1 + kIntraBlockCopyDelay64x64Blocks +
+      static_cast<int>(sequence_header_.use_128x128_superblock);
+  const int wavefront_offset = gradient * (active_sb_row - src_sb_row);
+  return src_64x64_block_column < active_64x64_block_column -
+                                      kIntraBlockCopyDelay64x64Blocks +
+                                      wavefront_offset;
+}
+
+bool Tile::AssignInterMv(const Block& block, bool is_compound) {
+  int min[2];
+  int max[2];
+  GetClampParameters(block, min, max);
+  BlockParameters& bp = *block.bp;
+  const PredictionParameters& prediction_parameters = *bp.prediction_parameters;
+  if (is_compound) {
+    for (int i = 0; i < 2; ++i) {
+      const PredictionMode mode = GetSinglePredictionMode(i, bp.y_mode);
+      MotionVector predicted_mv;
+      if (mode == kPredictionModeGlobalMv) {
+        predicted_mv = prediction_parameters.global_mv[i];
+      } else {
+        const int ref_mv_index = (mode == kPredictionModeNearestMv ||
+                                  (mode == kPredictionModeNewMv &&
+                                   prediction_parameters.ref_mv_count <= 1))
+                                     ? 0
+                                     : prediction_parameters.ref_mv_index;
+        predicted_mv = prediction_parameters.reference_mv(ref_mv_index, i);
+        if (ref_mv_index < prediction_parameters.ref_mv_count) {
+          predicted_mv.mv[0] = Clip3(predicted_mv.mv[0], min[0], max[0]);
+          predicted_mv.mv[1] = Clip3(predicted_mv.mv[1], min[1], max[1]);
+        }
+      }
+      if (mode == kPredictionModeNewMv) {
+        ReadMotionVector(block, i);
+        bp.mv.mv[i].mv[0] += predicted_mv.mv[0];
+        bp.mv.mv[i].mv[1] += predicted_mv.mv[1];
+      } else {
+        bp.mv.mv[i] = predicted_mv;
+      }
+    }
+  } else {
+    const PredictionMode mode = GetSinglePredictionMode(0, bp.y_mode);
+    MotionVector predicted_mv;
+    if (mode == kPredictionModeGlobalMv) {
+      predicted_mv = prediction_parameters.global_mv[0];
+    } else {
+      const int ref_mv_index = (mode == kPredictionModeNearestMv ||
+                                (mode == kPredictionModeNewMv &&
+                                 prediction_parameters.ref_mv_count <= 1))
+                                   ? 0
+                                   : prediction_parameters.ref_mv_index;
+      predicted_mv = prediction_parameters.reference_mv(ref_mv_index);
+      if (ref_mv_index < prediction_parameters.ref_mv_count) {
+        predicted_mv.mv[0] = Clip3(predicted_mv.mv[0], min[0], max[0]);
+        predicted_mv.mv[1] = Clip3(predicted_mv.mv[1], min[1], max[1]);
+      }
+    }
+    if (mode == kPredictionModeNewMv) {
+      ReadMotionVector(block, 0);
+      bp.mv.mv[0].mv[0] += predicted_mv.mv[0];
+      bp.mv.mv[0].mv[1] += predicted_mv.mv[1];
+    } else {
+      bp.mv.mv[0] = predicted_mv;
+    }
+  }
+  return IsMvValid(block, is_compound);
+}
+
+bool Tile::AssignIntraMv(const Block& block) {
+  // TODO(linfengz): Check if the clamping process is necessary.
+  int min[2];
+  int max[2];
+  GetClampParameters(block, min, max);
+  BlockParameters& bp = *block.bp;
+  const PredictionParameters& prediction_parameters = *bp.prediction_parameters;
+  const MotionVector& ref_mv_0 = prediction_parameters.reference_mv(0);
+  ReadMotionVector(block, 0);
+  if (ref_mv_0.mv32 == 0) {
+    const MotionVector& ref_mv_1 = prediction_parameters.reference_mv(1);
+    if (ref_mv_1.mv32 == 0) {
+      const int super_block_size4x4 = kNum4x4BlocksHigh[SuperBlockSize()];
+      if (block.row4x4 - super_block_size4x4 < row4x4_start_) {
+        bp.mv.mv[0].mv[1] -= MultiplyBy32(super_block_size4x4);
+        bp.mv.mv[0].mv[1] -= MultiplyBy8(kIntraBlockCopyDelayPixels);
+      } else {
+        bp.mv.mv[0].mv[0] -= MultiplyBy32(super_block_size4x4);
+      }
+    } else {
+      bp.mv.mv[0].mv[0] += Clip3(ref_mv_1.mv[0], min[0], max[0]);
+      bp.mv.mv[0].mv[1] += Clip3(ref_mv_1.mv[1], min[0], max[0]);
+    }
+  } else {
+    bp.mv.mv[0].mv[0] += Clip3(ref_mv_0.mv[0], min[0], max[0]);
+    bp.mv.mv[0].mv[1] += Clip3(ref_mv_0.mv[1], min[1], max[1]);
+  }
+  return IsMvValid(block, /*is_compound=*/false);
+}
+
+void Tile::ResetEntropyContext(const Block& block) {
+  const int num_planes = block.HasChroma() ? PlaneCount() : 1;
+  int plane = kPlaneY;
+  do {
+    const int subsampling_x = subsampling_x_[plane];
+    const int start_x = block.column4x4 >> subsampling_x;
+    const int end_x =
+        std::min((block.column4x4 + block.width4x4) >> subsampling_x,
+                 frame_header_.columns4x4);
+    memset(&coefficient_levels_[kEntropyContextTop][plane][start_x], 0,
+           end_x - start_x);
+    memset(&dc_categories_[kEntropyContextTop][plane][start_x], 0,
+           end_x - start_x);
+    const int subsampling_y = subsampling_y_[plane];
+    const int start_y = block.row4x4 >> subsampling_y;
+    const int end_y =
+        std::min((block.row4x4 + block.height4x4) >> subsampling_y,
+                 frame_header_.rows4x4);
+    memset(&coefficient_levels_[kEntropyContextLeft][plane][start_y], 0,
+           end_y - start_y);
+    memset(&dc_categories_[kEntropyContextLeft][plane][start_y], 0,
+           end_y - start_y);
+  } while (++plane < num_planes);
+}
+
+bool Tile::ComputePrediction(const Block& block) {
+  const BlockParameters& bp = *block.bp;
+  if (!bp.is_inter) return true;
+  const int mask =
+      (1 << (4 + static_cast<int>(sequence_header_.use_128x128_superblock))) -
+      1;
+  const int sub_block_row4x4 = block.row4x4 & mask;
+  const int sub_block_column4x4 = block.column4x4 & mask;
+  const int plane_count = block.HasChroma() ? PlaneCount() : 1;
+  // Returns true if this block applies local warping. The state is determined
+  // in the Y plane and carried for use in the U/V planes.
+  // But the U/V planes will not apply warping when the block size is smaller
+  // than 8x8, even if this variable is true.
+  bool is_local_valid = false;
+  // Local warping parameters, similar usage as is_local_valid.
+  GlobalMotion local_warp_params;
+  int plane = kPlaneY;
+  do {
+    const int8_t subsampling_x = subsampling_x_[plane];
+    const int8_t subsampling_y = subsampling_y_[plane];
+    const BlockSize plane_size = block.residual_size[plane];
+    const int block_width4x4 = kNum4x4BlocksWide[plane_size];
+    const int block_height4x4 = kNum4x4BlocksHigh[plane_size];
+    const int block_width = MultiplyBy4(block_width4x4);
+    const int block_height = MultiplyBy4(block_height4x4);
+    const int base_x = MultiplyBy4(block.column4x4 >> subsampling_x);
+    const int base_y = MultiplyBy4(block.row4x4 >> subsampling_y);
+    if (bp.reference_frame[1] == kReferenceFrameIntra) {
+      const int tr_row4x4 = sub_block_row4x4 >> subsampling_y;
+      const int tr_column4x4 =
+          (sub_block_column4x4 >> subsampling_x) + block_width4x4 + 1;
+      const int bl_row4x4 =
+          (sub_block_row4x4 >> subsampling_y) + block_height4x4;
+      const int bl_column4x4 = (sub_block_column4x4 >> subsampling_x) + 1;
+      const TransformSize tx_size =
+          k4x4SizeToTransformSize[k4x4WidthLog2[plane_size]]
+                                 [k4x4HeightLog2[plane_size]];
+      const bool has_left = block.left_available[plane];
+      const bool has_top = block.top_available[plane];
+      CALL_BITDEPTH_FUNCTION(
+          IntraPrediction, block, static_cast<Plane>(plane), base_x, base_y,
+          has_left, has_top,
+          block.scratch_buffer->block_decoded[plane][tr_row4x4][tr_column4x4],
+          block.scratch_buffer->block_decoded[plane][bl_row4x4][bl_column4x4],
+          kInterIntraToIntraMode[block.bp->prediction_parameters
+                                     ->inter_intra_mode],
+          tx_size);
+    }
+    int candidate_row = block.row4x4;
+    int candidate_column = block.column4x4;
+    bool some_use_intra = bp.reference_frame[0] == kReferenceFrameIntra;
+    if (!some_use_intra && plane != 0) {
+      candidate_row = (candidate_row >> subsampling_y) << subsampling_y;
+      candidate_column = (candidate_column >> subsampling_x) << subsampling_x;
+      if (candidate_row != block.row4x4) {
+        // Top block.
+        const BlockParameters& bp_top =
+            *block_parameters_holder_.Find(candidate_row, block.column4x4);
+        some_use_intra = bp_top.reference_frame[0] == kReferenceFrameIntra;
+        if (!some_use_intra && candidate_column != block.column4x4) {
+          // Top-left block.
+          const BlockParameters& bp_top_left =
+              *block_parameters_holder_.Find(candidate_row, candidate_column);
+          some_use_intra =
+              bp_top_left.reference_frame[0] == kReferenceFrameIntra;
+        }
+      }
+      if (!some_use_intra && candidate_column != block.column4x4) {
+        // Left block.
+        const BlockParameters& bp_left =
+            *block_parameters_holder_.Find(block.row4x4, candidate_column);
+        some_use_intra = bp_left.reference_frame[0] == kReferenceFrameIntra;
+      }
+    }
+    int prediction_width;
+    int prediction_height;
+    if (some_use_intra) {
+      candidate_row = block.row4x4;
+      candidate_column = block.column4x4;
+      prediction_width = block_width;
+      prediction_height = block_height;
+    } else {
+      prediction_width = block.width >> subsampling_x;
+      prediction_height = block.height >> subsampling_y;
+    }
+    int r = 0;
+    int y = 0;
+    do {
+      int c = 0;
+      int x = 0;
+      do {
+        if (!InterPrediction(block, static_cast<Plane>(plane), base_x + x,
+                             base_y + y, prediction_width, prediction_height,
+                             candidate_row + r, candidate_column + c,
+                             &is_local_valid, &local_warp_params)) {
+          return false;
+        }
+        ++c;
+        x += prediction_width;
+      } while (x < block_width);
+      ++r;
+      y += prediction_height;
+    } while (y < block_height);
+  } while (++plane < plane_count);
+  return true;
+}
+
+#undef CALL_BITDEPTH_FUNCTION
+
+void Tile::PopulateDeblockFilterLevel(const Block& block) {
+  if (!post_filter_.DoDeblock()) return;
+  BlockParameters& bp = *block.bp;
+  const int mode_id =
+      static_cast<int>(kPredictionModeDeltasMask.Contains(bp.y_mode));
+  for (int i = 0; i < kFrameLfCount; ++i) {
+    if (delta_lf_all_zero_) {
+      bp.deblock_filter_level[i] = post_filter_.GetZeroDeltaDeblockFilterLevel(
+          bp.segment_id, i, bp.reference_frame[0], mode_id);
+    } else {
+      bp.deblock_filter_level[i] =
+          deblock_filter_levels_[bp.segment_id][i][bp.reference_frame[0]]
+                                [mode_id];
+    }
+  }
+}
+
+bool Tile::ProcessBlock(int row4x4, int column4x4, BlockSize block_size,
+                        ParameterTree* const tree,
+                        TileScratchBuffer* const scratch_buffer,
+                        ResidualPtr* residual) {
+  // Do not process the block if the starting point is beyond the visible frame.
+  // This is equivalent to the has_row/has_column check in the
+  // decode_partition() section of the spec when partition equals
+  // kPartitionHorizontal or kPartitionVertical.
+  if (row4x4 >= frame_header_.rows4x4 ||
+      column4x4 >= frame_header_.columns4x4) {
+    return true;
+  }
+  BlockParameters& bp = *tree->parameters();
+  block_parameters_holder_.FillCache(row4x4, column4x4, block_size, &bp);
+  Block block(*this, block_size, row4x4, column4x4, scratch_buffer, residual);
+  bp.size = block_size;
+  bp.prediction_parameters =
+      split_parse_and_decode_ ? std::unique_ptr<PredictionParameters>(
+                                    new (std::nothrow) PredictionParameters())
+                              : std::move(prediction_parameters_);
+  if (bp.prediction_parameters == nullptr) return false;
+  if (!DecodeModeInfo(block)) return false;
+  bp.is_global_mv_block = (bp.y_mode == kPredictionModeGlobalMv ||
+                           bp.y_mode == kPredictionModeGlobalGlobalMv) &&
+                          !IsBlockDimension4(bp.size);
+  PopulateDeblockFilterLevel(block);
+  if (!ReadPaletteTokens(block)) return false;
+  DecodeTransformSize(block);
+  // Part of Section 5.11.37 in the spec (implemented as a simple lookup).
+  bp.uv_transform_size = frame_header_.segmentation.lossless[bp.segment_id]
+                             ? kTransformSize4x4
+                             : kUVTransformSize[block.residual_size[kPlaneU]];
+  if (bp.skip) ResetEntropyContext(block);
+  if (split_parse_and_decode_) {
+    if (!Residual(block, kProcessingModeParseOnly)) return false;
+  } else {
+    if (!ComputePrediction(block) ||
+        !Residual(block, kProcessingModeParseAndDecode)) {
+      return false;
+    }
+  }
+  // If frame_header_.segmentation.enabled is false, bp.segment_id is 0 for all
+  // blocks. We don't need to call save bp.segment_id in the current frame
+  // because the current frame's segmentation map will be cleared to all 0s.
+  //
+  // If frame_header_.segmentation.enabled is true and
+  // frame_header_.segmentation.update_map is false, we will copy the previous
+  // frame's segmentation map to the current frame. So we don't need to call
+  // save bp.segment_id in the current frame.
+  if (frame_header_.segmentation.enabled &&
+      frame_header_.segmentation.update_map) {
+    const int x_limit = std::min(frame_header_.columns4x4 - column4x4,
+                                 static_cast<int>(block.width4x4));
+    const int y_limit = std::min(frame_header_.rows4x4 - row4x4,
+                                 static_cast<int>(block.height4x4));
+    current_frame_.segmentation_map()->FillBlock(row4x4, column4x4, x_limit,
+                                                 y_limit, bp.segment_id);
+  }
+  StoreMotionFieldMvsIntoCurrentFrame(block);
+  if (!split_parse_and_decode_) {
+    prediction_parameters_ = std::move(bp.prediction_parameters);
+  }
+  return true;
+}
+
+bool Tile::DecodeBlock(ParameterTree* const tree,
+                       TileScratchBuffer* const scratch_buffer,
+                       ResidualPtr* residual) {
+  const int row4x4 = tree->row4x4();
+  const int column4x4 = tree->column4x4();
+  if (row4x4 >= frame_header_.rows4x4 ||
+      column4x4 >= frame_header_.columns4x4) {
+    return true;
+  }
+  const BlockSize block_size = tree->block_size();
+  Block block(*this, block_size, row4x4, column4x4, scratch_buffer, residual);
+  if (!ComputePrediction(block) ||
+      !Residual(block, kProcessingModeDecodeOnly)) {
+    return false;
+  }
+  block.bp->prediction_parameters.reset(nullptr);
+  return true;
+}
+
+bool Tile::ProcessPartition(int row4x4_start, int column4x4_start,
+                            ParameterTree* const root,
+                            TileScratchBuffer* const scratch_buffer,
+                            ResidualPtr* residual) {
+  Stack<ParameterTree*, kDfsStackSize> stack;
+
+  // Set up the first iteration.
+  ParameterTree* node = root;
+  int row4x4 = row4x4_start;
+  int column4x4 = column4x4_start;
+  BlockSize block_size = SuperBlockSize();
+
+  // DFS loop. If it sees a terminal node (leaf node), ProcessBlock is invoked.
+  // Otherwise, the children are pushed into the stack for future processing.
+  do {
+    if (!stack.Empty()) {
+      // Set up subsequent iterations.
+      node = stack.Pop();
+      row4x4 = node->row4x4();
+      column4x4 = node->column4x4();
+      block_size = node->block_size();
+    }
+    if (row4x4 >= frame_header_.rows4x4 ||
+        column4x4 >= frame_header_.columns4x4) {
+      continue;
+    }
+    const int block_width4x4 = kNum4x4BlocksWide[block_size];
+    assert(block_width4x4 == kNum4x4BlocksHigh[block_size]);
+    const int half_block4x4 = block_width4x4 >> 1;
+    const bool has_rows = (row4x4 + half_block4x4) < frame_header_.rows4x4;
+    const bool has_columns =
+        (column4x4 + half_block4x4) < frame_header_.columns4x4;
+    Partition partition;
+    if (!ReadPartition(row4x4, column4x4, block_size, has_rows, has_columns,
+                       &partition)) {
+      LIBGAV1_DLOG(ERROR, "Failed to read partition for row: %d column: %d",
+                   row4x4, column4x4);
+      return false;
+    }
+    const BlockSize sub_size = kSubSize[partition][block_size];
+    // Section 6.10.4: It is a requirement of bitstream conformance that
+    // get_plane_residual_size( subSize, 1 ) is not equal to BLOCK_INVALID
+    // every time subSize is computed.
+    if (sub_size == kBlockInvalid ||
+        kPlaneResidualSize[sub_size]
+                          [sequence_header_.color_config.subsampling_x]
+                          [sequence_header_.color_config.subsampling_y] ==
+            kBlockInvalid) {
+      LIBGAV1_DLOG(
+          ERROR,
+          "Invalid sub-block/plane size for row: %d column: %d partition: "
+          "%d block_size: %d sub_size: %d subsampling_x/y: %d, %d",
+          row4x4, column4x4, partition, block_size, sub_size,
+          sequence_header_.color_config.subsampling_x,
+          sequence_header_.color_config.subsampling_y);
+      return false;
+    }
+    if (!node->SetPartitionType(partition)) {
+      LIBGAV1_DLOG(ERROR, "node->SetPartitionType() failed.");
+      return false;
+    }
+    switch (partition) {
+      case kPartitionNone:
+        if (!ProcessBlock(row4x4, column4x4, sub_size, node, scratch_buffer,
+                          residual)) {
+          return false;
+        }
+        break;
+      case kPartitionSplit:
+        // The children must be added in reverse order since a stack is being
+        // used.
+        for (int i = 3; i >= 0; --i) {
+          ParameterTree* const child = node->children(i);
+          assert(child != nullptr);
+          stack.Push(child);
+        }
+        break;
+      case kPartitionHorizontal:
+      case kPartitionVertical:
+      case kPartitionHorizontalWithTopSplit:
+      case kPartitionHorizontalWithBottomSplit:
+      case kPartitionVerticalWithLeftSplit:
+      case kPartitionVerticalWithRightSplit:
+      case kPartitionHorizontal4:
+      case kPartitionVertical4:
+        for (int i = 0; i < 4; ++i) {
+          ParameterTree* const child = node->children(i);
+          // Once a null child is seen, all the subsequent children will also be
+          // null.
+          if (child == nullptr) break;
+          if (!ProcessBlock(child->row4x4(), child->column4x4(),
+                            child->block_size(), child, scratch_buffer,
+                            residual)) {
+            return false;
+          }
+        }
+        break;
+    }
+  } while (!stack.Empty());
+  return true;
+}
+
+void Tile::ResetLoopRestorationParams() {
+  for (int plane = kPlaneY; plane < kMaxPlanes; ++plane) {
+    for (int i = WienerInfo::kVertical; i <= WienerInfo::kHorizontal; ++i) {
+      reference_unit_info_[plane].sgr_proj_info.multiplier[i] =
+          kSgrProjDefaultMultiplier[i];
+      for (int j = 0; j < kNumWienerCoefficients; ++j) {
+        reference_unit_info_[plane].wiener_info.filter[i][j] =
+            kWienerDefaultFilter[j];
+      }
+    }
+  }
+}
+
+void Tile::ResetCdef(const int row4x4, const int column4x4) {
+  if (!sequence_header_.enable_cdef) return;
+  const int row = DivideBy16(row4x4);
+  const int column = DivideBy16(column4x4);
+  cdef_index_[row][column] = -1;
+  if (sequence_header_.use_128x128_superblock) {
+    const int cdef_size4x4 = kNum4x4BlocksWide[kBlock64x64];
+    const int border_row = DivideBy16(row4x4 + cdef_size4x4);
+    const int border_column = DivideBy16(column4x4 + cdef_size4x4);
+    cdef_index_[row][border_column] = -1;
+    cdef_index_[border_row][column] = -1;
+    cdef_index_[border_row][border_column] = -1;
+  }
+}
+
+void Tile::ClearBlockDecoded(TileScratchBuffer* const scratch_buffer,
+                             int row4x4, int column4x4) {
+  // Set everything to false.
+  memset(scratch_buffer->block_decoded, 0,
+         sizeof(scratch_buffer->block_decoded));
+  // Set specific edge cases to true.
+  const int sb_size4 = sequence_header_.use_128x128_superblock ? 32 : 16;
+  for (int plane = kPlaneY; plane < PlaneCount(); ++plane) {
+    const int subsampling_x = subsampling_x_[plane];
+    const int subsampling_y = subsampling_y_[plane];
+    const int sb_width4 = (column4x4_end_ - column4x4) >> subsampling_x;
+    const int sb_height4 = (row4x4_end_ - row4x4) >> subsampling_y;
+    // The memset is equivalent to the following lines in the spec:
+    // for ( x = -1; x <= ( sbSize4 >> subX ); x++ ) {
+    //   if ( y < 0 && x < sbWidth4 ) {
+    //     BlockDecoded[plane][y][x] = 1
+    //   }
+    // }
+    const int num_elements =
+        std::min((sb_size4 >> subsampling_x_[plane]) + 1, sb_width4) + 1;
+    memset(&scratch_buffer->block_decoded[plane][0][0], 1, num_elements);
+    // The for loop is equivalent to the following lines in the spec:
+    // for ( y = -1; y <= ( sbSize4 >> subY ); y++ )
+    //   if ( x < 0 && y < sbHeight4 )
+    //     BlockDecoded[plane][y][x] = 1
+    //   }
+    // }
+    // BlockDecoded[plane][sbSize4 >> subY][-1] = 0
+    for (int y = -1; y < std::min((sb_size4 >> subsampling_y), sb_height4);
+         ++y) {
+      scratch_buffer->block_decoded[plane][y + 1][0] = true;
+    }
+  }
+}
+
+bool Tile::ProcessSuperBlock(int row4x4, int column4x4, int block_width4x4,
+                             TileScratchBuffer* const scratch_buffer,
+                             ProcessingMode mode) {
+  const bool parsing =
+      mode == kProcessingModeParseOnly || mode == kProcessingModeParseAndDecode;
+  const bool decoding = mode == kProcessingModeDecodeOnly ||
+                        mode == kProcessingModeParseAndDecode;
+  if (parsing) {
+    read_deltas_ = frame_header_.delta_q.present;
+    ResetCdef(row4x4, column4x4);
+  }
+  if (decoding) {
+    ClearBlockDecoded(scratch_buffer, row4x4, column4x4);
+  }
+  const BlockSize block_size = SuperBlockSize();
+  if (parsing) {
+    ReadLoopRestorationCoefficients(row4x4, column4x4, block_size);
+  }
+  const int row = row4x4 / block_width4x4;
+  const int column = column4x4 / block_width4x4;
+  if (parsing && decoding) {
+    uint8_t* residual_buffer = residual_buffer_.get();
+    if (!ProcessPartition(row4x4, column4x4,
+                          block_parameters_holder_.Tree(row, column),
+                          scratch_buffer, &residual_buffer)) {
+      LIBGAV1_DLOG(ERROR, "Error decoding partition row: %d column: %d", row4x4,
+                   column4x4);
+      return false;
+    }
+    return true;
+  }
+  const int sb_row_index = SuperBlockRowIndex(row4x4);
+  const int sb_column_index = SuperBlockColumnIndex(column4x4);
+  if (parsing) {
+    residual_buffer_threaded_[sb_row_index][sb_column_index] =
+        residual_buffer_pool_->Get();
+    if (residual_buffer_threaded_[sb_row_index][sb_column_index] == nullptr) {
+      LIBGAV1_DLOG(ERROR, "Failed to get residual buffer.");
+      return false;
+    }
+    uint8_t* residual_buffer =
+        residual_buffer_threaded_[sb_row_index][sb_column_index]->buffer();
+    if (!ProcessPartition(row4x4, column4x4,
+                          block_parameters_holder_.Tree(row, column),
+                          scratch_buffer, &residual_buffer)) {
+      LIBGAV1_DLOG(ERROR, "Error parsing partition row: %d column: %d", row4x4,
+                   column4x4);
+      return false;
+    }
+  } else {
+    uint8_t* residual_buffer =
+        residual_buffer_threaded_[sb_row_index][sb_column_index]->buffer();
+    if (!DecodeSuperBlock(block_parameters_holder_.Tree(row, column),
+                          scratch_buffer, &residual_buffer)) {
+      LIBGAV1_DLOG(ERROR, "Error decoding superblock row: %d column: %d",
+                   row4x4, column4x4);
+      return false;
+    }
+    residual_buffer_pool_->Release(
+        std::move(residual_buffer_threaded_[sb_row_index][sb_column_index]));
+  }
+  return true;
+}
+
+bool Tile::DecodeSuperBlock(ParameterTree* const tree,
+                            TileScratchBuffer* const scratch_buffer,
+                            ResidualPtr* residual) {
+  Stack<ParameterTree*, kDfsStackSize> stack;
+  stack.Push(tree);
+  do {
+    ParameterTree* const node = stack.Pop();
+    if (node->partition() != kPartitionNone) {
+      for (int i = 3; i >= 0; --i) {
+        if (node->children(i) == nullptr) continue;
+        stack.Push(node->children(i));
+      }
+      continue;
+    }
+    if (!DecodeBlock(node, scratch_buffer, residual)) {
+      LIBGAV1_DLOG(ERROR, "Error decoding block row: %d column: %d",
+                   node->row4x4(), node->column4x4());
+      return false;
+    }
+  } while (!stack.Empty());
+  return true;
+}
+
+void Tile::ReadLoopRestorationCoefficients(int row4x4, int column4x4,
+                                           BlockSize block_size) {
+  if (frame_header_.allow_intrabc) return;
+  LoopRestorationInfo* const restoration_info = post_filter_.restoration_info();
+  const bool is_superres_scaled =
+      frame_header_.width != frame_header_.upscaled_width;
+  for (int plane = kPlaneY; plane < PlaneCount(); ++plane) {
+    LoopRestorationUnitInfo unit_info;
+    if (restoration_info->PopulateUnitInfoForSuperBlock(
+            static_cast<Plane>(plane), block_size, is_superres_scaled,
+            frame_header_.superres_scale_denominator, row4x4, column4x4,
+            &unit_info)) {
+      for (int unit_row = unit_info.row_start; unit_row < unit_info.row_end;
+           ++unit_row) {
+        for (int unit_column = unit_info.column_start;
+             unit_column < unit_info.column_end; ++unit_column) {
+          const int unit_id = unit_row * restoration_info->num_horizontal_units(
+                                             static_cast<Plane>(plane)) +
+                              unit_column;
+          restoration_info->ReadUnitCoefficients(
+              &reader_, &symbol_decoder_context_, static_cast<Plane>(plane),
+              unit_id, &reference_unit_info_);
+        }
+      }
+    }
+  }
+}
+
+void Tile::StoreMotionFieldMvsIntoCurrentFrame(const Block& block) {
+  if (frame_header_.refresh_frame_flags == 0 ||
+      IsIntraFrame(frame_header_.frame_type)) {
+    return;
+  }
+  // Iterate over odd rows/columns beginning at the first odd row/column for the
+  // block. It is done this way because motion field mvs are only needed at a
+  // 8x8 granularity.
+  const int row_start4x4 = block.row4x4 | 1;
+  const int row_limit4x4 =
+      std::min(block.row4x4 + block.height4x4, frame_header_.rows4x4);
+  if (row_start4x4 >= row_limit4x4) return;
+  const int column_start4x4 = block.column4x4 | 1;
+  const int column_limit4x4 =
+      std::min(block.column4x4 + block.width4x4, frame_header_.columns4x4);
+  if (column_start4x4 >= column_limit4x4) return;
+
+  // The largest reference MV component that can be saved.
+  constexpr int kRefMvsLimit = (1 << 12) - 1;
+  const BlockParameters& bp = *block.bp;
+  ReferenceInfo* reference_info = current_frame_.reference_info();
+  for (int i = 1; i >= 0; --i) {
+    const ReferenceFrameType reference_frame_to_store = bp.reference_frame[i];
+    // Must make a local copy so that StoreMotionFieldMvs() knows there is no
+    // overlap between load and store.
+    const MotionVector mv_to_store = bp.mv.mv[i];
+    const int mv_row = std::abs(mv_to_store.mv[MotionVector::kRow]);
+    const int mv_column = std::abs(mv_to_store.mv[MotionVector::kColumn]);
+    if (reference_frame_to_store > kReferenceFrameIntra &&
+        // kRefMvsLimit equals 0x07FF, so we can first bitwise OR the two
+        // absolute values and then compare with kRefMvsLimit to save a branch.
+        // The next line is equivalent to:
+        // mv_row <= kRefMvsLimit && mv_column <= kRefMvsLimit
+        (mv_row | mv_column) <= kRefMvsLimit &&
+        reference_info->relative_distance_from[reference_frame_to_store] < 0) {
+      const int row_start8x8 = DivideBy2(row_start4x4);
+      const int row_limit8x8 = DivideBy2(row_limit4x4);
+      const int column_start8x8 = DivideBy2(column_start4x4);
+      const int column_limit8x8 = DivideBy2(column_limit4x4);
+      const int rows = row_limit8x8 - row_start8x8;
+      const int columns = column_limit8x8 - column_start8x8;
+      const ptrdiff_t stride = DivideBy2(current_frame_.columns4x4());
+      ReferenceFrameType* const reference_frame_row_start =
+          &reference_info
+               ->motion_field_reference_frame[row_start8x8][column_start8x8];
+      MotionVector* const mv =
+          &reference_info->motion_field_mv[row_start8x8][column_start8x8];
+
+      // Specialize columns cases 1, 2, 4, 8 and 16. This makes memset() inlined
+      // and simplifies std::fill() for these cases.
+      if (columns <= 1) {
+        // Don't change the above condition to (columns == 1).
+        // Condition (columns <= 1) may help the compiler simplify the inlining
+        // of the general case of StoreMotionFieldMvs() by eliminating the
+        // (columns == 0) case.
+        assert(columns == 1);
+        StoreMotionFieldMvs(reference_frame_to_store, mv_to_store, stride, rows,
+                            1, reference_frame_row_start, mv);
+      } else if (columns == 2) {
+        StoreMotionFieldMvs(reference_frame_to_store, mv_to_store, stride, rows,
+                            2, reference_frame_row_start, mv);
+      } else if (columns == 4) {
+        StoreMotionFieldMvs(reference_frame_to_store, mv_to_store, stride, rows,
+                            4, reference_frame_row_start, mv);
+      } else if (columns == 8) {
+        StoreMotionFieldMvs(reference_frame_to_store, mv_to_store, stride, rows,
+                            8, reference_frame_row_start, mv);
+      } else if (columns == 16) {
+        StoreMotionFieldMvs(reference_frame_to_store, mv_to_store, stride, rows,
+                            16, reference_frame_row_start, mv);
+      } else if (columns < 16) {
+        // This always true condition (columns < 16) may help the compiler
+        // simplify the inlining of the following function.
+        // This general case is rare and usually only happens to the blocks
+        // which contain the right boundary of the frame.
+        StoreMotionFieldMvs(reference_frame_to_store, mv_to_store, stride, rows,
+                            columns, reference_frame_row_start, mv);
+      } else {
+        assert(false);
+      }
+      return;
+    }
+  }
+}
+
+}  // namespace libgav1
author	qinxialei <xialeiqin@gmail.com>	2020-10-29 11:26:59 +0800
committer	qinxialei <xialeiqin@gmail.com>	2020-10-29 11:26:59 +0800
commit	e8d277081293b6fb2a5d469616baaa7a06f52496 (patch)
tree	1179bb07d3927d1837d4a90bd81b2034c4c696a9 /src/tile
download	libgav1-e8d277081293b6fb2a5d469616baaa7a06f52496.tar.gz libgav1-e8d277081293b6fb2a5d469616baaa7a06f52496.tar.bz2 libgav1-e8d277081293b6fb2a5d469616baaa7a06f52496.zip