Import Upstream version 0.16.0

author: qinxialei <xialeiqin@gmail.com> 2020-10-29 11:26:59 +0800
committer: qinxialei <xialeiqin@gmail.com> 2020-10-29 11:26:59 +0800
commit: e8d277081293b6fb2a5d469616baaa7a06f52496 (patch)
tree: 1179bb07d3927d1837d4a90bd81b2034c4c696a9 /src/utils
download: libgav1-e8d277081293b6fb2a5d469616baaa7a06f52496.tar.gz
libgav1-e8d277081293b6fb2a5d469616baaa7a06f52496.tar.bz2
libgav1-e8d277081293b6fb2a5d469616baaa7a06f52496.zip
38 files changed, 7626 insertions, 0 deletions
diff --git a/src/utils/array_2d.h b/src/utils/array_2d.h
new file mode 100644
index 0000000..2df6241
--- /dev/null
+++ b/src/utils/array_2d.h
@@ -0,0 +1,131 @@
+/*
+ * Copyright 2019 The libgav1 Authors
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LIBGAV1_SRC_UTILS_ARRAY_2D_H_
+#define LIBGAV1_SRC_UTILS_ARRAY_2D_H_
+
+#include <cassert>
+#include <cstddef>
+#include <cstring>
+#include <memory>
+#include <new>
+#include <type_traits>
+
+#include "src/utils/compiler_attributes.h"
+
+namespace libgav1 {
+
+// Exposes a 1D allocated memory buffer as a 2D array.
+template <typename T>
+class Array2DView {
+ public:
+  Array2DView() = default;
+  Array2DView(int rows, int columns, T* const data) {
+    Reset(rows, columns, data);
+  }
+
+  // Copyable and Movable.
+  Array2DView(const Array2DView& rhs) = default;
+  Array2DView& operator=(const Array2DView& rhs) = default;
+
+  void Reset(int rows, int columns, T* const data) {
+    rows_ = rows;
+    columns_ = columns;
+    data_ = data;
+  }
+
+  int rows() const { return rows_; }
+  int columns() const { return columns_; }
+
+  T* operator[](int row) { return const_cast<T*>(GetRow(row)); }
+
+  const T* operator[](int row) const { return GetRow(row); }
+
+ private:
+  const T* GetRow(int row) const {
+    assert(row < rows_);
+    const ptrdiff_t offset = static_cast<ptrdiff_t>(row) * columns_;
+    return data_ + offset;
+  }
+
+  int rows_ = 0;
+  int columns_ = 0;
+  T* data_ = nullptr;
+};
+
+// Allocates and owns the contiguous memory and exposes an Array2DView of
+// dimension |rows| x |columns|.
+template <typename T>
+class Array2D {
+ public:
+  Array2D() = default;
+
+  // Copyable and Movable.
+  Array2D(const Array2D& rhs) = default;
+  Array2D& operator=(const Array2D& rhs) = default;
+
+  LIBGAV1_MUST_USE_RESULT bool Reset(int rows, int columns,
+                                     bool zero_initialize = true) {
+    size_ = rows * columns;
+    // If T is not a trivial type, we should always reallocate the data_
+    // buffer, so that the destructors of any existing objects are invoked.
+    if (!std::is_trivial<T>::value || allocated_size_ < size_) {
+      // Note: This invokes the global operator new if T is a non-class type,
+      // such as integer or enum types, or a class type that is not derived
+      // from libgav1::Allocable, such as std::unique_ptr. If we enforce a
+      // maximum allocation size or keep track of our own heap memory
+      // consumption, we will need to handle the allocations here that use the
+      // global operator new.
+      if (zero_initialize) {
+        data_.reset(new (std::nothrow) T[size_]());
+      } else {
+        data_.reset(new (std::nothrow) T[size_]);
+      }
+      if (data_ == nullptr) {
+        allocated_size_ = 0;
+        return false;
+      }
+      allocated_size_ = size_;
+    } else if (zero_initialize) {
+      // Cast the data_ pointer to void* to avoid the GCC -Wclass-memaccess
+      // warning. The memset is safe because T is a trivial type.
+      void* dest = data_.get();
+      memset(dest, 0, sizeof(T) * size_);
+    }
+    data_view_.Reset(rows, columns, data_.get());
+    return true;
+  }
+
+  int rows() const { return data_view_.rows(); }
+  int columns() const { return data_view_.columns(); }
+  size_t size() const { return size_; }
+  T* data() { return data_.get(); }
+  const T* data() const { return data_.get(); }
+
+  T* operator[](int row) { return data_view_[row]; }
+
+  const T* operator[](int row) const { return data_view_[row]; }
+
+ private:
+  std::unique_ptr<T[]> data_ = nullptr;
+  size_t allocated_size_ = 0;
+  size_t size_ = 0;
+  Array2DView<T> data_view_;
+};
+
+}  // namespace libgav1
+
+#endif  // LIBGAV1_SRC_UTILS_ARRAY_2D_H_
diff --git a/src/utils/bit_mask_set.h b/src/utils/bit_mask_set.h
new file mode 100644
index 0000000..7371753
--- /dev/null
+++ b/src/utils/bit_mask_set.h
@@ -0,0 +1,79 @@
+/*
+ * Copyright 2019 The libgav1 Authors
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LIBGAV1_SRC_UTILS_BIT_MASK_SET_H_
+#define LIBGAV1_SRC_UTILS_BIT_MASK_SET_H_
+
+#include <cstdint>
+
+namespace libgav1 {
+
+// This class is used to check if a given value is equal to one of the several
+// predetermined values using a bit mask instead of a chain of comparisons and
+// ||s. This usually results in fewer instructions.
+//
+// Usage:
+//   constexpr BitMaskSet set(value1, value2);
+//   set.Contains(value1) => returns true.
+//   set.Contains(value3) => returns false.
+class BitMaskSet {
+ public:
+  explicit constexpr BitMaskSet(uint32_t mask) : mask_(mask) {}
+
+  constexpr BitMaskSet(int v1, int v2) : mask_((1U << v1) | (1U << v2)) {}
+
+  constexpr BitMaskSet(int v1, int v2, int v3)
+      : mask_((1U << v1) | (1U << v2) | (1U << v3)) {}
+
+  constexpr BitMaskSet(int v1, int v2, int v3, int v4)
+      : mask_((1U << v1) | (1U << v2) | (1U << v3) | (1U << v4)) {}
+
+  constexpr BitMaskSet(int v1, int v2, int v3, int v4, int v5)
+      : mask_((1U << v1) | (1U << v2) | (1U << v3) | (1U << v4) | (1U << v5)) {}
+
+  constexpr BitMaskSet(int v1, int v2, int v3, int v4, int v5, int v6)
+      : mask_((1U << v1) | (1U << v2) | (1U << v3) | (1U << v4) | (1U << v5) |
+              (1U << v6)) {}
+
+  constexpr BitMaskSet(int v1, int v2, int v3, int v4, int v5, int v6, int v7)
+      : mask_((1U << v1) | (1U << v2) | (1U << v3) | (1U << v4) | (1U << v5) |
+              (1U << v6) | (1U << v7)) {}
+
+  constexpr BitMaskSet(int v1, int v2, int v3, int v4, int v5, int v6, int v7,
+                       int v8, int v9)
+      : mask_((1U << v1) | (1U << v2) | (1U << v3) | (1U << v4) | (1U << v5) |
+              (1U << v6) | (1U << v7) | (1U << v8) | (1U << v9)) {}
+
+  constexpr BitMaskSet(int v1, int v2, int v3, int v4, int v5, int v6, int v7,
+                       int v8, int v9, int v10)
+      : mask_((1U << v1) | (1U << v2) | (1U << v3) | (1U << v4) | (1U << v5) |
+              (1U << v6) | (1U << v7) | (1U << v8) | (1U << v9) | (1U << v10)) {
+  }
+
+  constexpr bool Contains(uint8_t value) const {
+    return MaskContainsValue(mask_, value);
+  }
+
+  static constexpr bool MaskContainsValue(uint32_t mask, uint8_t value) {
+    return ((mask >> value) & 1) != 0;
+  }
+
+ private:
+  const uint32_t mask_;
+};
+
+}  // namespace libgav1
+#endif  // LIBGAV1_SRC_UTILS_BIT_MASK_SET_H_
diff --git a/src/utils/bit_reader.cc b/src/utils/bit_reader.cc
new file mode 100644
index 0000000..3234128
--- /dev/null
+++ b/src/utils/bit_reader.cc
@@ -0,0 +1,117 @@
+// Copyright 2019 The libgav1 Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "src/utils/bit_reader.h"
+
+#include <cassert>
+#include <cstdint>
+
+#include "src/utils/common.h"
+
+namespace libgav1 {
+namespace {
+
+bool Assign(int* const value, int assignment, bool return_value) {
+  *value = assignment;
+  return return_value;
+}
+
+// 5.9.29.
+int InverseRecenter(int r, int v) {
+  if (v > (r << 1)) {
+    return v;
+  }
+  if ((v & 1) != 0) {
+    return r - ((v + 1) >> 1);
+  }
+  return r + (v >> 1);
+}
+
+}  // namespace
+
+bool BitReader::DecodeSignedSubexpWithReference(int low, int high,
+                                                int reference, int control,
+                                                int* const value) {
+  if (!DecodeUnsignedSubexpWithReference(high - low, reference - low, control,
+                                         value)) {
+    return false;
+  }
+  *value += low;
+  return true;
+}
+
+bool BitReader::DecodeUniform(int n, int* const value) {
+  if (n <= 1) {
+    return Assign(value, 0, true);
+  }
+  const int w = FloorLog2(n) + 1;
+  const int m = (1 << w) - n;
+  assert(w - 1 < 32);
+  const int v = static_cast<int>(ReadLiteral(w - 1));
+  if (v == -1) {
+    return Assign(value, 0, false);
+  }
+  if (v < m) {
+    return Assign(value, v, true);
+  }
+  const int extra_bit = ReadBit();
+  if (extra_bit == -1) {
+    return Assign(value, 0, false);
+  }
+  return Assign(value, (v << 1) - m + extra_bit, true);
+}
+
+bool BitReader::DecodeUnsignedSubexpWithReference(int mx, int reference,
+                                                  int control,
+                                                  int* const value) {
+  int v;
+  if (!DecodeSubexp(mx, control, &v)) return false;
+  if ((reference << 1) <= mx) {
+    *value = InverseRecenter(reference, v);
+  } else {
+    *value = mx - 1 - InverseRecenter(mx - 1 - reference, v);
+  }
+  return true;
+}
+
+bool BitReader::DecodeSubexp(int num_symbols, int control, int* const value) {
+  int i = 0;
+  int mk = 0;
+  while (true) {
+    const int b = (i != 0) ? control + i - 1 : control;
+    if (b >= 32) {
+      return Assign(value, 0, false);
+    }
+    const int a = 1 << b;
+    if (num_symbols <= mk + 3 * a) {
+      if (!DecodeUniform(num_symbols - mk, value)) return false;
+      *value += mk;
+      return true;
+    }
+    const int8_t subexp_more_bits = ReadBit();
+    if (subexp_more_bits == -1) return false;
+    if (subexp_more_bits != 0) {
+      ++i;
+      mk += a;
+    } else {
+      const int subexp_bits = static_cast<int>(ReadLiteral(b));
+      if (subexp_bits == -1) {
+        return Assign(value, 0, false);
+      }
+      return Assign(value, subexp_bits + mk, true);
+    }
+  }
+}
+
+}  // namespace libgav1
diff --git a/src/utils/bit_reader.h b/src/utils/bit_reader.h
new file mode 100644
index 0000000..5a10e12
--- /dev/null
+++ b/src/utils/bit_reader.h
@@ -0,0 +1,49 @@
+/*
+ * Copyright 2019 The libgav1 Authors
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LIBGAV1_SRC_UTILS_BIT_READER_H_
+#define LIBGAV1_SRC_UTILS_BIT_READER_H_
+
+#include <cstdint>
+
+namespace libgav1 {
+
+class BitReader {
+ public:
+  virtual ~BitReader() = default;
+
+  virtual int ReadBit() = 0;
+  // |num_bits| has to be <= 32. The function returns a value in the range [0,
+  // 2^num_bits - 1] (inclusive) on success and -1 on failure.
+  virtual int64_t ReadLiteral(int num_bits) = 0;
+
+  bool DecodeSignedSubexpWithReference(int low, int high, int reference,
+                                       int control, int* value);  // 5.9.26.
+  // Decodes a nonnegative integer with maximum number of values |n| (i.e.,
+  // output in range 0..n-1) by following the process specified in Section
+  // 4.10.7 ns(n) and Section 4.10.10 NS(n) of the spec.
+  bool DecodeUniform(int n, int* value);
+
+ private:
+  // Helper functions for DecodeSignedSubexpWithReference.
+  bool DecodeUnsignedSubexpWithReference(int mx, int reference, int control,
+                                         int* value);           // 5.9.27.
+  bool DecodeSubexp(int num_symbols, int control, int* value);  // 5.9.28.
+};
+
+}  // namespace libgav1
+
+#endif  // LIBGAV1_SRC_UTILS_BIT_READER_H_
diff --git a/src/utils/block_parameters_holder.cc b/src/utils/block_parameters_holder.cc
new file mode 100644
index 0000000..3ccdb9b
--- /dev/null
+++ b/src/utils/block_parameters_holder.cc
@@ -0,0 +1,107 @@
+// Copyright 2019 The libgav1 Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "src/utils/block_parameters_holder.h"
+
+#include <algorithm>
+
+#include "src/utils/common.h"
+#include "src/utils/constants.h"
+#include "src/utils/logging.h"
+#include "src/utils/parameter_tree.h"
+#include "src/utils/types.h"
+
+namespace libgav1 {
+
+namespace {
+
+// Returns the number of super block rows/columns for |value4x4| where value4x4
+// is either rows4x4 or columns4x4.
+int RowsOrColumns4x4ToSuperBlocks(int value4x4, bool use_128x128_superblock) {
+  return use_128x128_superblock ? DivideBy128(MultiplyBy4(value4x4) + 127)
+                                : DivideBy64(MultiplyBy4(value4x4) + 63);
+}
+
+}  // namespace
+
+bool BlockParametersHolder::Reset(int rows4x4, int columns4x4,
+                                  bool use_128x128_superblock) {
+  rows4x4_ = rows4x4;
+  columns4x4_ = columns4x4;
+  use_128x128_superblock_ = use_128x128_superblock;
+  if (!block_parameters_cache_.Reset(rows4x4_, columns4x4_)) {
+    LIBGAV1_DLOG(ERROR, "block_parameters_cache_.Reset() failed.");
+    return false;
+  }
+  const int rows =
+      RowsOrColumns4x4ToSuperBlocks(rows4x4_, use_128x128_superblock_);
+  const int columns =
+      RowsOrColumns4x4ToSuperBlocks(columns4x4_, use_128x128_superblock_);
+  const BlockSize sb_size =
+      use_128x128_superblock_ ? kBlock128x128 : kBlock64x64;
+  const int multiplier = kNum4x4BlocksWide[sb_size];
+  if (!trees_.Reset(rows, columns, /*zero_initialize=*/false)) {
+    LIBGAV1_DLOG(ERROR, "trees_.Reset() failed.");
+    return false;
+  }
+  for (int i = 0; i < rows; ++i) {
+    for (int j = 0; j < columns; ++j) {
+      trees_[i][j] =
+          ParameterTree::Create(i * multiplier, j * multiplier, sb_size);
+      if (trees_[i][j] == nullptr) {
+        LIBGAV1_DLOG(ERROR, "Allocation of trees_[%d][%d] failed.", i, j);
+        return false;
+      }
+    }
+  }
+  return true;
+}
+
+void BlockParametersHolder::FillCache(int row4x4, int column4x4,
+                                      BlockSize block_size,
+                                      BlockParameters* const bp) {
+  int rows = std::min(static_cast<int>(kNum4x4BlocksHigh[block_size]),
+                      rows4x4_ - row4x4);
+  const int columns = std::min(static_cast<int>(kNum4x4BlocksWide[block_size]),
+                               columns4x4_ - column4x4);
+  auto* bp_dst = &block_parameters_cache_[row4x4][column4x4];
+  // Specialize columns cases (values in kNum4x4BlocksWide[]) for better
+  // performance.
+  if (columns == 1) {
+    SetBlock<BlockParameters*>(rows, 1, bp, bp_dst, columns4x4_);
+  } else if (columns == 2) {
+    SetBlock<BlockParameters*>(rows, 2, bp, bp_dst, columns4x4_);
+  } else if (columns == 4) {
+    SetBlock<BlockParameters*>(rows, 4, bp, bp_dst, columns4x4_);
+  } else if (columns == 8) {
+    SetBlock<BlockParameters*>(rows, 8, bp, bp_dst, columns4x4_);
+  } else if (columns == 16) {
+    SetBlock<BlockParameters*>(rows, 16, bp, bp_dst, columns4x4_);
+  } else if (columns == 32) {
+    SetBlock<BlockParameters*>(rows, 32, bp, bp_dst, columns4x4_);
+  } else {
+    do {
+      // The following loop has better performance than using std::fill().
+      // std::fill() has some overhead in checking zero loop count.
+      int x = columns;
+      auto* d = bp_dst;
+      do {
+        *d++ = bp;
+      } while (--x != 0);
+      bp_dst += columns4x4_;
+    } while (--rows != 0);
+  }
+}
+
+}  // namespace libgav1
diff --git a/src/utils/block_parameters_holder.h b/src/utils/block_parameters_holder.h
new file mode 100644
index 0000000..35543c3
--- /dev/null
+++ b/src/utils/block_parameters_holder.h
@@ -0,0 +1,85 @@
+/*
+ * Copyright 2019 The libgav1 Authors
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LIBGAV1_SRC_UTILS_BLOCK_PARAMETERS_HOLDER_H_
+#define LIBGAV1_SRC_UTILS_BLOCK_PARAMETERS_HOLDER_H_
+
+#include <memory>
+
+#include "src/utils/array_2d.h"
+#include "src/utils/compiler_attributes.h"
+#include "src/utils/constants.h"
+#include "src/utils/parameter_tree.h"
+#include "src/utils/types.h"
+
+namespace libgav1 {
+
+// Holds a 2D array of |ParameterTree| objects. Each tree stores the parameters
+// corresponding to a superblock.
+class BlockParametersHolder {
+ public:
+  BlockParametersHolder() = default;
+
+  // Not copyable or movable.
+  BlockParametersHolder(const BlockParametersHolder&) = delete;
+  BlockParametersHolder& operator=(const BlockParametersHolder&) = delete;
+
+  // If |use_128x128_superblock| is true, 128x128 superblocks will be used,
+  // otherwise 64x64 superblocks will be used.
+  LIBGAV1_MUST_USE_RESULT bool Reset(int rows4x4, int columns4x4,
+                                     bool use_128x128_superblock);
+
+  // Finds the BlockParameters corresponding to |row4x4| and |column4x4|. This
+  // is done as a simple look up of the |block_parameters_cache_| matrix.
+  // Returns nullptr if the BlockParameters cannot be found.
+  BlockParameters* Find(int row4x4, int column4x4) const {
+    return block_parameters_cache_[row4x4][column4x4];
+  }
+
+  BlockParameters** Address(int row4x4, int column4x4) {
+    return block_parameters_cache_.data() + row4x4 * columns4x4_ + column4x4;
+  }
+
+  BlockParameters* const* Address(int row4x4, int column4x4) const {
+    return block_parameters_cache_.data() + row4x4 * columns4x4_ + column4x4;
+  }
+
+  int columns4x4() const { return columns4x4_; }
+
+  // Returns the ParameterTree corresponding to superblock starting at (|row|,
+  // |column|).
+  ParameterTree* Tree(int row, int column) { return trees_[row][column].get(); }
+
+  // Fills the cache matrix for the block starting at |row4x4|, |column4x4| of
+  // size |block_size| with the pointer |bp|.
+  void FillCache(int row4x4, int column4x4, BlockSize block_size,
+                 BlockParameters* bp);
+
+ private:
+  int rows4x4_ = 0;
+  int columns4x4_ = 0;
+  bool use_128x128_superblock_ = false;
+  Array2D<std::unique_ptr<ParameterTree>> trees_;
+
+  // This is a 2d array of size |rows4x4_| * |columns4x4_|. This is filled in by
+  // FillCache() and used by Find() to perform look ups using exactly one look
+  // up (instead of traversing the entire tree).
+  Array2D<BlockParameters*> block_parameters_cache_;
+};
+
+}  // namespace libgav1
+
+#endif  // LIBGAV1_SRC_UTILS_BLOCK_PARAMETERS_HOLDER_H_
diff --git a/src/utils/blocking_counter.h b/src/utils/blocking_counter.h
new file mode 100644
index 0000000..6d664f8
--- /dev/null
+++ b/src/utils/blocking_counter.h
@@ -0,0 +1,97 @@
+/*
+ * Copyright 2019 The libgav1 Authors
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LIBGAV1_SRC_UTILS_BLOCKING_COUNTER_H_
+#define LIBGAV1_SRC_UTILS_BLOCKING_COUNTER_H_
+
+#include <cassert>
+#include <condition_variable>  // NOLINT (unapproved c++11 header)
+#include <mutex>               // NOLINT (unapproved c++11 header)
+
+#include "src/utils/compiler_attributes.h"
+
+namespace libgav1 {
+
+// Implementation of a Blocking Counter that is used for the "fork-join"
+// use case. Typical usage would be as follows:
+//   BlockingCounter counter(num_jobs);
+//     - spawn the jobs.
+//     - call counter.Wait() on the master thread.
+//     - worker threads will call counter.Decrement().
+//     - master thread will return from counter.Wait() when all workers are
+//     complete.
+template <bool has_failure_status>
+class BlockingCounterImpl {
+ public:
+  explicit BlockingCounterImpl(int initial_count)
+      : count_(initial_count), job_failed_(false) {}
+
+  // Increment the counter by |count|. This must be called before Wait() is
+  // called. This must be called from the same thread that will call Wait().
+  void IncrementBy(int count) {
+    assert(count >= 0);
+    std::unique_lock<std::mutex> lock(mutex_);
+    count_ += count;
+  }
+
+  // Decrement the counter by 1. This function can be called only when
+  // |has_failure_status| is false (i.e.) when this class is being used with the
+  // |BlockingCounter| alias.
+  void Decrement() {
+    static_assert(!has_failure_status, "");
+    std::unique_lock<std::mutex> lock(mutex_);
+    if (--count_ == 0) {
+      condition_.notify_one();
+    }
+  }
+
+  // Decrement the counter by 1. This function can be called only when
+  // |has_failure_status| is true (i.e.) when this class is being used with the
+  // |BlockingCounterWithStatus| alias. |job_succeeded| is used to update the
+  // state of |job_failed_|.
+  void Decrement(bool job_succeeded) {
+    static_assert(has_failure_status, "");
+    std::unique_lock<std::mutex> lock(mutex_);
+    job_failed_ |= !job_succeeded;
+    if (--count_ == 0) {
+      condition_.notify_one();
+    }
+  }
+
+  // Block until the counter becomes 0. This function can be called only once
+  // per object. If |has_failure_status| is true, true is returned if all the
+  // jobs succeeded and false is returned if any of the jobs failed. If
+  // |has_failure_status| is false, this function always returns true.
+  bool Wait() {
+    std::unique_lock<std::mutex> lock(mutex_);
+    condition_.wait(lock, [this]() { return count_ == 0; });
+    // If |has_failure_status| is false, we simply return true.
+    return has_failure_status ? !job_failed_ : true;
+  }
+
+ private:
+  std::mutex mutex_;
+  std::condition_variable condition_;
+  int count_ LIBGAV1_GUARDED_BY(mutex_);
+  bool job_failed_ LIBGAV1_GUARDED_BY(mutex_);
+};
+
+using BlockingCounterWithStatus = BlockingCounterImpl<true>;
+using BlockingCounter = BlockingCounterImpl<false>;
+
+}  // namespace libgav1
+
+#endif  // LIBGAV1_SRC_UTILS_BLOCKING_COUNTER_H_
diff --git a/src/utils/common.h b/src/utils/common.h
new file mode 100644
index 0000000..ae43c2b
--- /dev/null
+++ b/src/utils/common.h
@@ -0,0 +1,534 @@
+/*
+ * Copyright 2019 The libgav1 Authors
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LIBGAV1_SRC_UTILS_COMMON_H_
+#define LIBGAV1_SRC_UTILS_COMMON_H_
+
+#if defined(_MSC_VER)
+#include <intrin.h>
+#pragma intrinsic(_BitScanForward)
+#pragma intrinsic(_BitScanReverse)
+#if defined(_M_X64) || defined(_M_ARM) || defined(_M_ARM64)
+#pragma intrinsic(_BitScanReverse64)
+#define HAVE_BITSCANREVERSE64
+#endif  // defined(_M_X64) || defined(_M_ARM) || defined(_M_ARM64)
+#endif  // defined(_MSC_VER)
+
+#include <cassert>
+#include <cstddef>
+#include <cstdint>
+#include <cstdlib>
+#include <cstring>
+#include <type_traits>
+
+#include "src/utils/bit_mask_set.h"
+#include "src/utils/constants.h"
+#include "src/utils/memory.h"
+#include "src/utils/types.h"
+
+namespace libgav1 {
+
+// Aligns |value| to the desired |alignment|. |alignment| must be a power of 2.
+template <typename T>
+inline T Align(T value, T alignment) {
+  assert(alignment != 0);
+  const T alignment_mask = alignment - 1;
+  return (value + alignment_mask) & ~alignment_mask;
+}
+
+// Aligns |addr| to the desired |alignment|. |alignment| must be a power of 2.
+inline uint8_t* AlignAddr(uint8_t* const addr, const uintptr_t alignment) {
+  const auto value = reinterpret_cast<uintptr_t>(addr);
+  return reinterpret_cast<uint8_t*>(Align(value, alignment));
+}
+
+inline int32_t Clip3(int32_t value, int32_t low, int32_t high) {
+  return value < low ? low : (value > high ? high : value);
+}
+
+template <typename Pixel>
+void ExtendLine(void* const line_start, const int width, const int left,
+                const int right) {
+  auto* const start = static_cast<Pixel*>(line_start);
+  const Pixel* src = start;
+  Pixel* dst = start - left;
+  // Copy to left and right borders.
+  Memset(dst, src[0], left);
+  Memset(dst + left + width, src[width - 1], right);
+}
+
+// The following 2 templates set a block of data with uncontiguous memory to
+// |value|. The compilers usually generate several branches to handle different
+// cases of |columns| when inlining memset() and std::fill(), and these branches
+// are unfortunately within the loop of |rows|. So calling these templates
+// directly could be inefficient. It is recommended to specialize common cases
+// of |columns|, such as 1, 2, 4, 8, 16 and 32, etc. in advance before
+// processing the generic case of |columns|. The code size may be larger, but
+// there would be big speed gains.
+// Call template MemSetBlock<> when sizeof(|T|) is 1.
+// Call template SetBlock<> when sizeof(|T|) is larger than 1.
+template <typename T>
+void MemSetBlock(int rows, int columns, T value, T* dst, ptrdiff_t stride) {
+  static_assert(sizeof(T) == 1, "");
+  do {
+    memset(dst, value, columns);
+    dst += stride;
+  } while (--rows != 0);
+}
+
+template <typename T>
+void SetBlock(int rows, int columns, T value, T* dst, ptrdiff_t stride) {
+  do {
+    std::fill(dst, dst + columns, value);
+    dst += stride;
+  } while (--rows != 0);
+}
+
+#if defined(__GNUC__)
+
+inline int CountLeadingZeros(uint32_t n) {
+  assert(n != 0);
+  return __builtin_clz(n);
+}
+
+inline int CountLeadingZeros(uint64_t n) {
+  assert(n != 0);
+  return __builtin_clzll(n);
+}
+
+inline int CountTrailingZeros(uint32_t n) {
+  assert(n != 0);
+  return __builtin_ctz(n);
+}
+
+#elif defined(_MSC_VER)
+
+inline int CountLeadingZeros(uint32_t n) {
+  assert(n != 0);
+  unsigned long first_set_bit;  // NOLINT(runtime/int)
+  const unsigned char bit_set = _BitScanReverse(&first_set_bit, n);
+  assert(bit_set != 0);
+  static_cast<void>(bit_set);
+  return 31 ^ static_cast<int>(first_set_bit);
+}
+
+inline int CountLeadingZeros(uint64_t n) {
+  assert(n != 0);
+  unsigned long first_set_bit;  // NOLINT(runtime/int)
+#if defined(HAVE_BITSCANREVERSE64)
+  const unsigned char bit_set =
+      _BitScanReverse64(&first_set_bit, static_cast<unsigned __int64>(n));
+#else  // !defined(HAVE_BITSCANREVERSE64)
+  const auto n_hi = static_cast<unsigned long>(n >> 32);  // NOLINT(runtime/int)
+  if (n_hi != 0) {
+    const unsigned char bit_set = _BitScanReverse(&first_set_bit, n_hi);
+    assert(bit_set != 0);
+    static_cast<void>(bit_set);
+    return 31 ^ static_cast<int>(first_set_bit);
+  }
+  const unsigned char bit_set = _BitScanReverse(
+      &first_set_bit, static_cast<unsigned long>(n));  // NOLINT(runtime/int)
+#endif  // defined(HAVE_BITSCANREVERSE64)
+  assert(bit_set != 0);
+  static_cast<void>(bit_set);
+  return 63 ^ static_cast<int>(first_set_bit);
+}
+
+#undef HAVE_BITSCANREVERSE64
+
+inline int CountTrailingZeros(uint32_t n) {
+  assert(n != 0);
+  unsigned long first_set_bit;  // NOLINT(runtime/int)
+  const unsigned char bit_set = _BitScanForward(&first_set_bit, n);
+  assert(bit_set != 0);
+  static_cast<void>(bit_set);
+  return static_cast<int>(first_set_bit);
+}
+
+#else  // !defined(__GNUC__) && !defined(_MSC_VER)
+
+template <const int kMSB, typename T>
+inline int CountLeadingZeros(T n) {
+  assert(n != 0);
+  const T msb = T{1} << kMSB;
+  int count = 0;
+  while ((n & msb) == 0) {
+    ++count;
+    n <<= 1;
+  }
+  return count;
+}
+
+inline int CountLeadingZeros(uint32_t n) { return CountLeadingZeros<31>(n); }
+
+inline int CountLeadingZeros(uint64_t n) { return CountLeadingZeros<63>(n); }
+
+// This is the algorithm on the left in Figure 5-23, Hacker's Delight, Second
+// Edition, page 109. The book says:
+//   If the number of trailing 0's is expected to be small or large, then the
+//   simple loops shown in Figure 5-23 are quite fast.
+inline int CountTrailingZeros(uint32_t n) {
+  assert(n != 0);
+  // Create a word with 1's at the positions of the trailing 0's in |n|, and
+  // 0's elsewhere (e.g., 01011000 => 00000111).
+  n = ~n & (n - 1);
+  int count = 0;
+  while (n != 0) {
+    ++count;
+    n >>= 1;
+  }
+  return count;
+}
+
+#endif  // defined(__GNUC__)
+
+inline int FloorLog2(int32_t n) {
+  assert(n > 0);
+  return 31 ^ CountLeadingZeros(static_cast<uint32_t>(n));
+}
+
+inline int FloorLog2(uint32_t n) {
+  assert(n > 0);
+  return 31 ^ CountLeadingZeros(n);
+}
+
+inline int FloorLog2(int64_t n) {
+  assert(n > 0);
+  return 63 ^ CountLeadingZeros(static_cast<uint64_t>(n));
+}
+
+inline int FloorLog2(uint64_t n) {
+  assert(n > 0);
+  return 63 ^ CountLeadingZeros(n);
+}
+
+inline int CeilLog2(unsigned int n) {
+  // The expression FloorLog2(n - 1) + 1 is undefined not only for n == 0 but
+  // also for n == 1, so this expression must be guarded by the n < 2 test. An
+  // alternative implementation is:
+  // return (n == 0) ? 0 : FloorLog2(n) + static_cast<int>((n & (n - 1)) != 0);
+  return (n < 2) ? 0 : FloorLog2(n - 1) + 1;
+}
+
+inline int RightShiftWithCeiling(int value, int bits) {
+  assert(bits > 0);
+  return (value + (1 << bits) - 1) >> bits;
+}
+
+inline int32_t RightShiftWithRounding(int32_t value, int bits) {
+  assert(bits >= 0);
+  return (value + ((1 << bits) >> 1)) >> bits;
+}
+
+inline uint32_t RightShiftWithRounding(uint32_t value, int bits) {
+  assert(bits >= 0);
+  return (value + ((1 << bits) >> 1)) >> bits;
+}
+
+// This variant is used when |value| can exceed 32 bits. Although the final
+// result must always fit into int32_t.
+inline int32_t RightShiftWithRounding(int64_t value, int bits) {
+  assert(bits >= 0);
+  return static_cast<int32_t>((value + ((int64_t{1} << bits) >> 1)) >> bits);
+}
+
+inline int32_t RightShiftWithRoundingSigned(int32_t value, int bits) {
+  assert(bits > 0);
+  // The next line is equivalent to:
+  // return (value >= 0) ? RightShiftWithRounding(value, bits)
+  //                     : -RightShiftWithRounding(-value, bits);
+  return RightShiftWithRounding(value + (value >> 31), bits);
+}
+
+// This variant is used when |value| can exceed 32 bits. Although the final
+// result must always fit into int32_t.
+inline int32_t RightShiftWithRoundingSigned(int64_t value, int bits) {
+  assert(bits > 0);
+  // The next line is equivalent to:
+  // return (value >= 0) ? RightShiftWithRounding(value, bits)
+  //                     : -RightShiftWithRounding(-value, bits);
+  return RightShiftWithRounding(value + (value >> 63), bits);
+}
+
+constexpr int DivideBy2(int n) { return n >> 1; }
+constexpr int DivideBy4(int n) { return n >> 2; }
+constexpr int DivideBy8(int n) { return n >> 3; }
+constexpr int DivideBy16(int n) { return n >> 4; }
+constexpr int DivideBy32(int n) { return n >> 5; }
+constexpr int DivideBy64(int n) { return n >> 6; }
+constexpr int DivideBy128(int n) { return n >> 7; }
+
+// Convert |value| to unsigned before shifting to avoid undefined behavior with
+// negative values.
+inline int LeftShift(int value, int bits) {
+  assert(bits >= 0);
+  assert(value >= -(int64_t{1} << (31 - bits)));
+  assert(value <= (int64_t{1} << (31 - bits)) - ((bits == 0) ? 1 : 0));
+  return static_cast<int>(static_cast<uint32_t>(value) << bits);
+}
+inline int MultiplyBy2(int n) { return LeftShift(n, 1); }
+inline int MultiplyBy4(int n) { return LeftShift(n, 2); }
+inline int MultiplyBy8(int n) { return LeftShift(n, 3); }
+inline int MultiplyBy16(int n) { return LeftShift(n, 4); }
+inline int MultiplyBy32(int n) { return LeftShift(n, 5); }
+inline int MultiplyBy64(int n) { return LeftShift(n, 6); }
+
+constexpr int Mod32(int n) { return n & 0x1f; }
+constexpr int Mod64(int n) { return n & 0x3f; }
+
+//------------------------------------------------------------------------------
+// Bitstream functions
+
+constexpr bool IsIntraFrame(FrameType type) {
+  return type == kFrameKey || type == kFrameIntraOnly;
+}
+
+inline TransformClass GetTransformClass(TransformType tx_type) {
+  constexpr BitMaskSet kTransformClassVerticalMask(
+      kTransformTypeIdentityDct, kTransformTypeIdentityAdst,
+      kTransformTypeIdentityFlipadst);
+  if (kTransformClassVerticalMask.Contains(tx_type)) {
+    return kTransformClassVertical;
+  }
+  constexpr BitMaskSet kTransformClassHorizontalMask(
+      kTransformTypeDctIdentity, kTransformTypeAdstIdentity,
+      kTransformTypeFlipadstIdentity);
+  if (kTransformClassHorizontalMask.Contains(tx_type)) {
+    return kTransformClassHorizontal;
+  }
+  return kTransformClass2D;
+}
+
+inline int RowOrColumn4x4ToPixel(int row_or_column4x4, Plane plane,
+                                 int8_t subsampling) {
+  return MultiplyBy4(row_or_column4x4) >> (plane == kPlaneY ? 0 : subsampling);
+}
+
+constexpr PlaneType GetPlaneType(Plane plane) {
+  return static_cast<PlaneType>(plane != kPlaneY);
+}
+
+// 5.11.44.
+constexpr bool IsDirectionalMode(PredictionMode mode) {
+  return mode >= kPredictionModeVertical && mode <= kPredictionModeD67;
+}
+
+// 5.9.3.
+//
+// |a| and |b| are order hints, treated as unsigned order_hint_bits-bit
+// integers. |order_hint_shift_bits| equals (32 - order_hint_bits) % 32.
+// order_hint_bits is at most 8, so |order_hint_shift_bits| is zero or a
+// value between 24 and 31 (inclusive).
+//
+// If |order_hint_shift_bits| is zero, |a| and |b| are both zeros, and the
+// result is zero. If |order_hint_shift_bits| is not zero, returns the
+// signed difference |a| - |b| using "modular arithmetic". More precisely, the
+// signed difference |a| - |b| is treated as a signed order_hint_bits-bit
+// integer and cast to an int. The returned difference is between
+// -(1 << (order_hint_bits - 1)) and (1 << (order_hint_bits - 1)) - 1
+// (inclusive).
+//
+// NOTE: |a| and |b| are the order_hint_bits least significant bits of the
+// actual values. This function returns the signed difference between the
+// actual values. The returned difference is correct as long as the actual
+// values are not more than 1 << (order_hint_bits - 1) - 1 apart.
+//
+// Example: Suppose order_hint_bits is 4 and |order_hint_shift_bits|
+// is 28. Then |a| and |b| are in the range [0, 15], and the actual values for
+// |a| and |b| must not be more than 7 apart. (If the actual values for |a| and
+// |b| are exactly 8 apart, this function cannot tell whether the actual value
+// for |a| is before or after the actual value for |b|.)
+//
+// First, consider the order hints 2 and 6. For this simple case, we have
+//   GetRelativeDistance(2, 6, 28) = 2 - 6 = -4, and
+//   GetRelativeDistance(6, 2, 28) = 6 - 2 = 4.
+//
+// On the other hand, consider the order hints 2 and 14. The order hints are
+// 12 (> 7) apart, so we need to use the actual values instead. The actual
+// values may be 34 (= 2 mod 16) and 30 (= 14 mod 16), respectively. Therefore
+// we have
+//   GetRelativeDistance(2, 14, 28) = 34 - 30 = 4, and
+//   GetRelativeDistance(14, 2, 28) = 30 - 34 = -4.
+//
+// The following comments apply only to specific CPUs' SIMD implementations,
+// such as intrinsics code.
+// For the 2 shift operations in this function, if the SIMD packed data is
+// 16-bit wide, try to use |order_hint_shift_bits| - 16 as the number of bits to
+// shift; If the SIMD packed data is 8-bit wide, try to use
+// |order_hint_shift_bits| - 24 as as the number of bits to shift.
+// |order_hint_shift_bits| - 16 and |order_hint_shift_bits| - 24 could be -16 or
+// -24. In these cases diff is 0, and the behavior of left or right shifting -16
+// or -24 bits is defined for x86 SIMD instructions and ARM NEON instructions,
+// and the result of shifting 0 is still 0. There is no guarantee that this
+// behavior and result apply to other CPUs' SIMD instructions.
+inline int GetRelativeDistance(const unsigned int a, const unsigned int b,
+                               const unsigned int order_hint_shift_bits) {
+  const int diff = a - b;
+  assert(order_hint_shift_bits <= 31);
+  if (order_hint_shift_bits == 0) {
+    assert(a == 0);
+    assert(b == 0);
+  } else {
+    assert(order_hint_shift_bits >= 24);  // i.e., order_hint_bits <= 8
+    assert(a < (1u << (32 - order_hint_shift_bits)));
+    assert(b < (1u << (32 - order_hint_shift_bits)));
+    assert(diff < (1 << (32 - order_hint_shift_bits)));
+    assert(diff >= -(1 << (32 - order_hint_shift_bits)));
+  }
+  // Sign extend the result of subtracting the values.
+  // Cast to unsigned int and then left shift to avoid undefined behavior with
+  // negative values. Cast to int to do the sign extension through right shift.
+  // This requires the right shift of a signed integer be an arithmetic shift,
+  // which is true for clang, gcc, and Visual C++.
+  // These two casts do not generate extra instructions.
+  // Don't use LeftShift(diff) since a valid diff may fail its assertions.
+  // For example, GetRelativeDistance(2, 14, 28), diff equals -12 and is less
+  // than the minimum allowed value of LeftShift() which is -8.
+  // The next 3 lines are equivalent to:
+  // const int order_hint_bits = Mod32(32 - order_hint_shift_bits);
+  // const int m = (1 << order_hint_bits) >> 1;
+  // return (diff & (m - 1)) - (diff & m);
+  return static_cast<int>(static_cast<unsigned int>(diff)
+                          << order_hint_shift_bits) >>
+         order_hint_shift_bits;
+}
+
+// Applies |sign| (must be 0 or -1) to |value|, i.e.,
+//   return (sign == 0) ? value : -value;
+// and does so without a branch.
+constexpr int ApplySign(int value, int sign) { return (value ^ sign) - sign; }
+
+// 7.9.3. (without the clamp for numerator and denominator).
+inline void GetMvProjection(const MotionVector& mv, int numerator,
+                            int division_multiplier,
+                            MotionVector* projection_mv) {
+  // Allow numerator and to be 0 so that this function can be called
+  // unconditionally. When numerator is 0, |projection_mv| will be 0, and this
+  // is what we want.
+  assert(std::abs(numerator) <= kMaxFrameDistance);
+  for (int i = 0; i < 2; ++i) {
+    projection_mv->mv[i] =
+        Clip3(RightShiftWithRoundingSigned(
+                  mv.mv[i] * numerator * division_multiplier, 14),
+              -kProjectionMvClamp, kProjectionMvClamp);
+  }
+}
+
+// 7.9.4.
+constexpr int Project(int value, int delta, int dst_sign) {
+  return value + ApplySign(delta / 64, dst_sign);
+}
+
+inline bool IsBlockSmallerThan8x8(BlockSize size) {
+  return size < kBlock8x8 && size != kBlock4x16;
+}
+
+// Returns true if the either the width or the height of the block is equal to
+// four.
+inline bool IsBlockDimension4(BlockSize size) {
+  return size < kBlock8x8 || size == kBlock16x4;
+}
+
+// Converts bitdepth 8, 10, and 12 to array index 0, 1, and 2, respectively.
+constexpr int BitdepthToArrayIndex(int bitdepth) { return (bitdepth - 8) >> 1; }
+
+// Maps a square transform to an index between [0, 4]. kTransformSize4x4 maps
+// to 0, kTransformSize8x8 maps to 1 and so on.
+inline int TransformSizeToSquareTransformIndex(TransformSize tx_size) {
+  assert(kTransformWidth[tx_size] == kTransformHeight[tx_size]);
+
+  // The values of the square transform sizes happen to be in the right
+  // ranges, so we can just divide them by 4 to get the indexes.
+  static_assert(
+      std::is_unsigned<std::underlying_type<TransformSize>::type>::value, "");
+  static_assert(kTransformSize4x4 < 4, "");
+  static_assert(4 <= kTransformSize8x8 && kTransformSize8x8 < 8, "");
+  static_assert(8 <= kTransformSize16x16 && kTransformSize16x16 < 12, "");
+  static_assert(12 <= kTransformSize32x32 && kTransformSize32x32 < 16, "");
+  static_assert(16 <= kTransformSize64x64 && kTransformSize64x64 < 20, "");
+  return DivideBy4(tx_size);
+}
+
+// Gets the corresponding Y/U/V position, to set and get filter masks
+// in deblock filtering.
+// Returns luma_position if it's Y plane, whose subsampling must be 0.
+// Returns the odd position for U/V plane, if there is subsampling.
+constexpr int GetDeblockPosition(const int luma_position,
+                                 const int subsampling) {
+  return luma_position | subsampling;
+}
+
+// Returns the size of the residual buffer required to hold the residual values
+// for a block or frame of size |rows| by |columns| (taking into account
+// |subsampling_x|, |subsampling_y| and |residual_size|). |residual_size| is the
+// number of bytes required to represent one residual value.
+inline size_t GetResidualBufferSize(const int rows, const int columns,
+                                    const int subsampling_x,
+                                    const int subsampling_y,
+                                    const size_t residual_size) {
+  // The subsampling multipliers are:
+  //   Both x and y are subsampled: 3 / 2.
+  //   Only x or y is subsampled: 2 / 1 (which is equivalent to 4 / 2).
+  //   Both x and y are not subsampled: 3 / 1 (which is equivalent to 6 / 2).
+  // So we compute the final subsampling multiplier as follows:
+  //   multiplier = (2 + (4 >> subsampling_x >> subsampling_y)) / 2.
+  // Add 32 * |kResidualPaddingVertical| padding to avoid bottom boundary checks
+  // when parsing quantized coefficients.
+  const int subsampling_multiplier_num =
+      2 + (4 >> subsampling_x >> subsampling_y);
+  const int number_elements =
+      (rows * columns * subsampling_multiplier_num) >> 1;
+  const int tx_padding = 32 * kResidualPaddingVertical;
+  return residual_size * (number_elements + tx_padding);
+}
+
+// This function is equivalent to:
+// std::min({kTransformWidthLog2[tx_size] - 2,
+//           kTransformWidthLog2[left_tx_size] - 2,
+//           2});
+constexpr LoopFilterTransformSizeId GetTransformSizeIdWidth(
+    TransformSize tx_size, TransformSize left_tx_size) {
+  return static_cast<LoopFilterTransformSizeId>(
+      static_cast<int>(tx_size > kTransformSize4x16 &&
+                       left_tx_size > kTransformSize4x16) +
+      static_cast<int>(tx_size > kTransformSize8x32 &&
+                       left_tx_size > kTransformSize8x32));
+}
+
+// This is used for 7.11.3.4 Block Inter Prediction Process, to select convolve
+// filters.
+inline int GetFilterIndex(const int filter_index, const int length) {
+  if (length <= 4) {
+    if (filter_index == kInterpolationFilterEightTap ||
+        filter_index == kInterpolationFilterEightTapSharp) {
+      return 4;
+    }
+    if (filter_index == kInterpolationFilterEightTapSmooth) {
+      return 5;
+    }
+  }
+  return filter_index;
+}
+
+// This has identical results as RightShiftWithRounding since |subsampling| can
+// only be 0 or 1.
+constexpr int SubsampledValue(int value, int subsampling) {
+  return (value + subsampling) >> subsampling;
+}
+
+}  // namespace libgav1
+
+#endif  // LIBGAV1_SRC_UTILS_COMMON_H_
diff --git a/src/utils/compiler_attributes.h b/src/utils/compiler_attributes.h
new file mode 100644
index 0000000..e122426
--- /dev/null
+++ b/src/utils/compiler_attributes.h
@@ -0,0 +1,181 @@
+/*
+ * Copyright 2019 The libgav1 Authors
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LIBGAV1_SRC_UTILS_COMPILER_ATTRIBUTES_H_
+#define LIBGAV1_SRC_UTILS_COMPILER_ATTRIBUTES_H_
+
+// A collection of compiler attribute checks and defines to control for
+// compatibility across toolchains.
+
+//------------------------------------------------------------------------------
+// Language version, attribute and feature helpers.
+
+// Detect c++17 support. Visual Studio sets __cplusplus to 199711L by default
+// unless compiled with /Zc:__cplusplus, use the value controlled by /std
+// instead.
+// https://docs.microsoft.com/en-us/cpp/build/reference/zc-cplusplus
+#if __cplusplus >= 201703L || (defined(_MSVC_LANG) && _MSVC_LANG >= 201703L)
+#define LIBGAV1_CXX17 1
+#else
+#define LIBGAV1_CXX17 0
+#endif
+
+#if defined(__has_attribute)
+#define LIBGAV1_HAS_ATTRIBUTE __has_attribute
+#else
+#define LIBGAV1_HAS_ATTRIBUTE(x) 0
+#endif
+
+#if defined(__has_feature)
+#define LIBGAV1_HAS_FEATURE __has_feature
+#else
+#define LIBGAV1_HAS_FEATURE(x) 0
+#endif
+
+//------------------------------------------------------------------------------
+// Sanitizer attributes.
+
+#if LIBGAV1_HAS_FEATURE(address_sanitizer) || defined(__SANITIZE_ADDRESS__)
+#define LIBGAV1_ASAN 1
+#else
+#define LIBGAV1_ASAN 0
+#endif
+
+#if LIBGAV1_HAS_FEATURE(memory_sanitizer)
+#define LIBGAV1_MSAN 1
+#else
+#define LIBGAV1_MSAN 0
+#endif
+
+#if LIBGAV1_HAS_FEATURE(thread_sanitizer) || defined(__SANITIZE_THREAD__)
+#define LIBGAV1_TSAN 1
+#else
+#define LIBGAV1_TSAN 0
+#endif
+
+//------------------------------------------------------------------------------
+// AddressSanitizer support.
+
+// Define the macros for AddressSanitizer manual memory poisoning. See
+// https://github.com/google/sanitizers/wiki/AddressSanitizerManualPoisoning.
+#if LIBGAV1_ASAN
+#include <sanitizer/asan_interface.h>
+#else
+#define ASAN_POISON_MEMORY_REGION(addr, size) \
+  (static_cast<void>(addr), static_cast<void>(size))
+#define ASAN_UNPOISON_MEMORY_REGION(addr, size) \
+  (static_cast<void>(addr), static_cast<void>(size))
+#endif
+
+//------------------------------------------------------------------------------
+// Function attributes.
+// GCC: https://gcc.gnu.org/onlinedocs/gcc/Function-Attributes.html
+// Clang: https://clang.llvm.org/docs/AttributeReference.html
+
+#if defined(__GNUC__)
+#define LIBGAV1_ALWAYS_INLINE __attribute__((always_inline)) inline
+#elif defined(_MSC_VER)
+#define LIBGAV1_ALWAYS_INLINE __forceinline
+#else
+#define LIBGAV1_ALWAYS_INLINE inline
+#endif
+
+// LIBGAV1_MUST_USE_RESULT
+//
+// Tells the compiler to warn about unused results.
+//
+// When annotating a function, it must appear as the first part of the
+// declaration or definition. The compiler will warn if the return value from
+// such a function is unused:
+//
+//   LIBGAV1_MUST_USE_RESULT Sprocket* AllocateSprocket();
+//   AllocateSprocket();  // Triggers a warning.
+//
+// When annotating a class, it is equivalent to annotating every function which
+// returns an instance.
+//
+//   class LIBGAV1_MUST_USE_RESULT Sprocket {};
+//   Sprocket();  // Triggers a warning.
+//
+//   Sprocket MakeSprocket();
+//   MakeSprocket();  // Triggers a warning.
+//
+// Note that references and pointers are not instances:
+//
+//   Sprocket* SprocketPointer();
+//   SprocketPointer();  // Does *not* trigger a warning.
+//
+// LIBGAV1_MUST_USE_RESULT allows using cast-to-void to suppress the unused
+// result warning. For that, warn_unused_result is used only for clang but not
+// for gcc. https://gcc.gnu.org/bugzilla/show_bug.cgi?id=66425
+#if LIBGAV1_HAS_ATTRIBUTE(nodiscard)
+#define LIBGAV1_MUST_USE_RESULT [[nodiscard]]
+#elif defined(__clang__) && LIBGAV1_HAS_ATTRIBUTE(warn_unused_result)
+#define LIBGAV1_MUST_USE_RESULT __attribute__((warn_unused_result))
+#else
+#define LIBGAV1_MUST_USE_RESULT
+#endif
+
+// LIBGAV1_PRINTF_ATTRIBUTE
+//
+// Tells the compiler to perform `printf` format string checking if the
+// compiler supports it; see the 'format' attribute in
+// <https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html>.
+//
+// Note: As the GCC manual states, "[s]ince non-static C++ methods
+// have an implicit 'this' argument, the arguments of such methods
+// should be counted from two, not one."
+#if LIBGAV1_HAS_ATTRIBUTE(format) || (defined(__GNUC__) && !defined(__clang__))
+#define LIBGAV1_PRINTF_ATTRIBUTE(string_index, first_to_check) \
+  __attribute__((__format__(__printf__, string_index, first_to_check)))
+#else
+#define LIBGAV1_PRINTF_ATTRIBUTE(string_index, first_to_check)
+#endif
+
+//------------------------------------------------------------------------------
+// Thread annotations.
+
+// LIBGAV1_GUARDED_BY()
+//
+// Documents if a shared field or global variable needs to be protected by a
+// mutex. LIBGAV1_GUARDED_BY() allows the user to specify a particular mutex
+// that should be held when accessing the annotated variable.
+//
+// Although this annotation cannot be applied to local variables, a local
+// variable and its associated mutex can often be combined into a small class
+// or struct, thereby allowing the annotation.
+//
+// Example:
+//
+//   class Foo {
+//     Mutex mu_;
+//     int p1_ LIBGAV1_GUARDED_BY(mu_);
+//     ...
+//   };
+// TODO(b/132506370): this can be reenabled after a local MutexLock
+// implementation is added with proper thread annotations.
+#if 0  // LIBGAV1_HAS_ATTRIBUTE(guarded_by)
+#define LIBGAV1_GUARDED_BY(x) __attribute__((guarded_by(x)))
+#else
+#define LIBGAV1_GUARDED_BY(x)
+#endif
+
+//------------------------------------------------------------------------------
+
+#undef LIBGAV1_HAS_ATTRIBUTE
+#undef LIBGAV1_HAS_FEATURE
+
+#endif  // LIBGAV1_SRC_UTILS_COMPILER_ATTRIBUTES_H_
diff --git a/src/utils/constants.cc b/src/utils/constants.cc
new file mode 100644
index 0000000..80d7acb
--- /dev/null
+++ b/src/utils/constants.cc
@@ -0,0 +1,874 @@
+// Copyright 2019 The libgav1 Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "src/utils/constants.h"
+
+namespace libgav1 {
+
+const uint8_t k4x4WidthLog2[kMaxBlockSizes] = {0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2,
+                                               2, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5};
+
+const uint8_t k4x4HeightLog2[kMaxBlockSizes] = {
+    0, 1, 2, 0, 1, 2, 3, 0, 1, 2, 3, 4, 1, 2, 3, 4, 2, 3, 4, 5, 4, 5};
+
+const uint8_t kNum4x4BlocksWide[kMaxBlockSizes] = {
+    1, 1, 1, 2, 2, 2, 2, 4, 4, 4, 4, 4, 8, 8, 8, 8, 16, 16, 16, 16, 32, 32};
+
+const uint8_t kNum4x4BlocksHigh[kMaxBlockSizes] = {
+    1, 2, 4, 1, 2, 4, 8, 1, 2, 4, 8, 16, 2, 4, 8, 16, 4, 8, 16, 32, 16, 32};
+
+const uint8_t kBlockWidthPixels[kMaxBlockSizes] = {
+    4,  4,  4,  8,  8,  8,  8,  16, 16, 16,  16,
+    16, 32, 32, 32, 32, 64, 64, 64, 64, 128, 128};
+
+const uint8_t kBlockHeightPixels[kMaxBlockSizes] = {
+    4,  8, 16, 4,  8,  16, 32, 4,  8,   16, 32,
+    64, 8, 16, 32, 64, 16, 32, 64, 128, 64, 128};
+
+// 9.3 -- Partition_Subsize[]
+const BlockSize kSubSize[kMaxPartitionTypes][kMaxBlockSizes] = {
+    // kPartitionNone
+    {kBlock4x4,     kBlockInvalid, kBlockInvalid, kBlockInvalid, kBlock8x8,
+     kBlockInvalid, kBlockInvalid, kBlockInvalid, kBlockInvalid, kBlock16x16,
+     kBlockInvalid, kBlockInvalid, kBlockInvalid, kBlockInvalid, kBlock32x32,
+     kBlockInvalid, kBlockInvalid, kBlockInvalid, kBlock64x64,   kBlockInvalid,
+     kBlockInvalid, kBlock128x128},
+    // kPartitionHorizontal
+    {kBlockInvalid, kBlockInvalid, kBlockInvalid, kBlockInvalid, kBlock8x4,
+     kBlockInvalid, kBlockInvalid, kBlockInvalid, kBlockInvalid, kBlock16x8,
+     kBlockInvalid, kBlockInvalid, kBlockInvalid, kBlockInvalid, kBlock32x16,
+     kBlockInvalid, kBlockInvalid, kBlockInvalid, kBlock64x32,   kBlockInvalid,
+     kBlockInvalid, kBlock128x64},
+    // kPartitionVertical
+    {kBlockInvalid, kBlockInvalid, kBlockInvalid, kBlockInvalid, kBlock4x8,
+     kBlockInvalid, kBlockInvalid, kBlockInvalid, kBlockInvalid, kBlock8x16,
+     kBlockInvalid, kBlockInvalid, kBlockInvalid, kBlockInvalid, kBlock16x32,
+     kBlockInvalid, kBlockInvalid, kBlockInvalid, kBlock32x64,   kBlockInvalid,
+     kBlockInvalid, kBlock64x128},
+    // kPartitionSplit
+    {kBlockInvalid, kBlockInvalid, kBlockInvalid, kBlockInvalid, kBlock4x4,
+     kBlockInvalid, kBlockInvalid, kBlockInvalid, kBlockInvalid, kBlock8x8,
+     kBlockInvalid, kBlockInvalid, kBlockInvalid, kBlockInvalid, kBlock16x16,
+     kBlockInvalid, kBlockInvalid, kBlockInvalid, kBlock32x32,   kBlockInvalid,
+     kBlockInvalid, kBlock64x64},
+    // kPartitionHorizontalWithTopSplit
+    {kBlockInvalid, kBlockInvalid, kBlockInvalid, kBlockInvalid, kBlock8x4,
+     kBlockInvalid, kBlockInvalid, kBlockInvalid, kBlockInvalid, kBlock16x8,
+     kBlockInvalid, kBlockInvalid, kBlockInvalid, kBlockInvalid, kBlock32x16,
+     kBlockInvalid, kBlockInvalid, kBlockInvalid, kBlock64x32,   kBlockInvalid,
+     kBlockInvalid, kBlock128x64},
+    // kPartitionHorizontalWithBottomSplit
+    {kBlockInvalid, kBlockInvalid, kBlockInvalid, kBlockInvalid, kBlock8x4,
+     kBlockInvalid, kBlockInvalid, kBlockInvalid, kBlockInvalid, kBlock16x8,
+     kBlockInvalid, kBlockInvalid, kBlockInvalid, kBlockInvalid, kBlock32x16,
+     kBlockInvalid, kBlockInvalid, kBlockInvalid, kBlock64x32,   kBlockInvalid,
+     kBlockInvalid, kBlock128x64},
+    // kPartitionVerticalWithLeftSplit
+    {kBlockInvalid, kBlockInvalid, kBlockInvalid, kBlockInvalid, kBlock4x8,
+     kBlockInvalid, kBlockInvalid, kBlockInvalid, kBlockInvalid, kBlock8x16,
+     kBlockInvalid, kBlockInvalid, kBlockInvalid, kBlockInvalid, kBlock16x32,
+     kBlockInvalid, kBlockInvalid, kBlockInvalid, kBlock32x64,   kBlockInvalid,
+     kBlockInvalid, kBlock64x128},
+    // kPartitionVerticalWithRightSplit
+    {kBlockInvalid, kBlockInvalid, kBlockInvalid, kBlockInvalid, kBlock4x8,
+     kBlockInvalid, kBlockInvalid, kBlockInvalid, kBlockInvalid, kBlock8x16,
+     kBlockInvalid, kBlockInvalid, kBlockInvalid, kBlockInvalid, kBlock16x32,
+     kBlockInvalid, kBlockInvalid, kBlockInvalid, kBlock32x64,   kBlockInvalid,
+     kBlockInvalid, kBlock64x128},
+    // kPartitionHorizontal4
+    {kBlockInvalid, kBlockInvalid, kBlockInvalid, kBlockInvalid, kBlockInvalid,
+     kBlockInvalid, kBlockInvalid, kBlockInvalid, kBlockInvalid, kBlock16x4,
+     kBlockInvalid, kBlockInvalid, kBlockInvalid, kBlockInvalid, kBlock32x8,
+     kBlockInvalid, kBlockInvalid, kBlockInvalid, kBlock64x16,   kBlockInvalid,
+     kBlockInvalid, kBlockInvalid},
+    // kPartitionVertical4
+    {kBlockInvalid, kBlockInvalid, kBlockInvalid, kBlockInvalid, kBlockInvalid,
+     kBlockInvalid, kBlockInvalid, kBlockInvalid, kBlockInvalid, kBlock4x16,
+     kBlockInvalid, kBlockInvalid, kBlockInvalid, kBlockInvalid, kBlock8x32,
+     kBlockInvalid, kBlockInvalid, kBlockInvalid, kBlock16x64,   kBlockInvalid,
+     kBlockInvalid, kBlockInvalid}};
+
+// 5.11.38 (implemented as a simple look up. first dimension is block size,
+// second and third are subsampling_x and subsampling_y).
+const BlockSize kPlaneResidualSize[kMaxBlockSizes][2][2] = {
+    {{kBlock4x4, kBlock4x4}, {kBlock4x4, kBlock4x4}},
+    {{kBlock4x8, kBlock4x4}, {kBlockInvalid, kBlock4x4}},
+    {{kBlock4x16, kBlock4x8}, {kBlockInvalid, kBlock4x8}},
+    {{kBlock8x4, kBlockInvalid}, {kBlock4x4, kBlock4x4}},
+    {{kBlock8x8, kBlock8x4}, {kBlock4x8, kBlock4x4}},
+    {{kBlock8x16, kBlock8x8}, {kBlockInvalid, kBlock4x8}},
+    {{kBlock8x32, kBlock8x16}, {kBlockInvalid, kBlock4x16}},
+    {{kBlock16x4, kBlockInvalid}, {kBlock8x4, kBlock8x4}},
+    {{kBlock16x8, kBlockInvalid}, {kBlock8x8, kBlock8x4}},
+    {{kBlock16x16, kBlock16x8}, {kBlock8x16, kBlock8x8}},
+    {{kBlock16x32, kBlock16x16}, {kBlockInvalid, kBlock8x16}},
+    {{kBlock16x64, kBlock16x32}, {kBlockInvalid, kBlock8x32}},
+    {{kBlock32x8, kBlockInvalid}, {kBlock16x8, kBlock16x4}},
+    {{kBlock32x16, kBlockInvalid}, {kBlock16x16, kBlock16x8}},
+    {{kBlock32x32, kBlock32x16}, {kBlock16x32, kBlock16x16}},
+    {{kBlock32x64, kBlock32x32}, {kBlockInvalid, kBlock16x32}},
+    {{kBlock64x16, kBlockInvalid}, {kBlock32x16, kBlock32x8}},
+    {{kBlock64x32, kBlockInvalid}, {kBlock32x32, kBlock32x16}},
+    {{kBlock64x64, kBlock64x32}, {kBlock32x64, kBlock32x32}},
+    {{kBlock64x128, kBlock64x64}, {kBlockInvalid, kBlock32x64}},
+    {{kBlock128x64, kBlockInvalid}, {kBlock64x64, kBlock64x32}},
+    {{kBlock128x128, kBlock128x64}, {kBlock64x128, kBlock64x64}}};
+
+const int16_t kProjectionMvDivisionLookup[kMaxFrameDistance + 1] = {
+    0,    16384, 8192, 5461, 4096, 3276, 2730, 2340, 2048, 1820, 1638,
+    1489, 1365,  1260, 1170, 1092, 1024, 963,  910,  862,  819,  780,
+    744,  712,   682,  655,  630,  606,  585,  564,  546,  528};
+
+const uint8_t kTransformWidth[kNumTransformSizes] = {
+    4, 4, 4, 8, 8, 8, 8, 16, 16, 16, 16, 16, 32, 32, 32, 32, 64, 64, 64};
+
+const uint8_t kTransformHeight[kNumTransformSizes] = {
+    4, 8, 16, 4, 8, 16, 32, 4, 8, 16, 32, 64, 8, 16, 32, 64, 16, 32, 64};
+
+const uint8_t kTransformWidth4x4[kNumTransformSizes] = {
+    1, 1, 1, 2, 2, 2, 2, 4, 4, 4, 4, 4, 8, 8, 8, 8, 16, 16, 16};
+
+const uint8_t kTransformHeight4x4[kNumTransformSizes] = {
+    1, 2, 4, 1, 2, 4, 8, 1, 2, 4, 8, 16, 2, 4, 8, 16, 4, 8, 16};
+
+const uint8_t kTransformWidthLog2[kNumTransformSizes] = {
+    2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 6};
+
+const uint8_t kTransformHeightLog2[kNumTransformSizes] = {
+    2, 3, 4, 2, 3, 4, 5, 2, 3, 4, 5, 6, 3, 4, 5, 6, 4, 5, 6};
+
+// 9.3 -- Split_Tx_Size[]
+const TransformSize kSplitTransformSize[kNumTransformSizes] = {
+    kTransformSize4x4,   kTransformSize4x4,   kTransformSize4x8,
+    kTransformSize4x4,   kTransformSize4x4,   kTransformSize8x8,
+    kTransformSize8x16,  kTransformSize8x4,   kTransformSize8x8,
+    kTransformSize8x8,   kTransformSize16x16, kTransformSize16x32,
+    kTransformSize16x8,  kTransformSize16x16, kTransformSize16x16,
+    kTransformSize32x32, kTransformSize32x16, kTransformSize32x32,
+    kTransformSize32x32};
+
+// Square transform of size min(w,h).
+const TransformSize kTransformSizeSquareMin[kNumTransformSizes] = {
+    kTransformSize4x4,   kTransformSize4x4,   kTransformSize4x4,
+    kTransformSize4x4,   kTransformSize8x8,   kTransformSize8x8,
+    kTransformSize8x8,   kTransformSize4x4,   kTransformSize8x8,
+    kTransformSize16x16, kTransformSize16x16, kTransformSize16x16,
+    kTransformSize8x8,   kTransformSize16x16, kTransformSize32x32,
+    kTransformSize32x32, kTransformSize16x16, kTransformSize32x32,
+    kTransformSize64x64};
+
+// Square transform of size max(w,h).
+const TransformSize kTransformSizeSquareMax[kNumTransformSizes] = {
+    kTransformSize4x4,   kTransformSize8x8,   kTransformSize16x16,
+    kTransformSize8x8,   kTransformSize8x8,   kTransformSize16x16,
+    kTransformSize32x32, kTransformSize16x16, kTransformSize16x16,
+    kTransformSize16x16, kTransformSize32x32, kTransformSize64x64,
+    kTransformSize32x32, kTransformSize32x32, kTransformSize32x32,
+    kTransformSize64x64, kTransformSize64x64, kTransformSize64x64,
+    kTransformSize64x64};
+
+const uint8_t kNumTransformTypesInSet[kNumTransformSets] = {1, 7, 5, 16, 12, 2};
+
+const uint8_t kSgrProjParams[1 << kSgrProjParamsBits][4] = {
+    {2, 12, 1, 4},  {2, 15, 1, 6},  {2, 18, 1, 8},  {2, 21, 1, 9},
+    {2, 24, 1, 10}, {2, 29, 1, 11}, {2, 36, 1, 12}, {2, 45, 1, 13},
+    {2, 56, 1, 14}, {2, 68, 1, 15}, {0, 0, 1, 5},   {0, 0, 1, 8},
+    {0, 0, 1, 11},  {0, 0, 1, 14},  {2, 30, 0, 0},  {2, 75, 0, 0}};
+
+const int8_t kSgrProjMultiplierMin[2] = {-96, -32};
+
+const int8_t kSgrProjMultiplierMax[2] = {31, 95};
+
+const int8_t kWienerTapsMin[3] = {-5, -23, -17};
+
+const int8_t kWienerTapsMax[3] = {10, 8, 46};
+
+// This was modified from Upscale_Filter as defined in AV1 Section 7.16, in
+// order to support 16-bit packed NEON operations.
+// The sign of each tap is: - + - + + - + -
+alignas(16) const uint8_t
+    kUpscaleFilterUnsigned[kSuperResFilterShifts][kSuperResFilterTaps] = {
+        {0, 0, 0, 128, 0, 0, 0, 0},    {0, 0, 1, 128, 2, 1, 0, 0},
+        {0, 1, 3, 127, 4, 2, 1, 0},    {0, 1, 4, 127, 6, 3, 1, 0},
+        {0, 2, 6, 126, 8, 3, 1, 0},    {0, 2, 7, 125, 11, 4, 1, 0},
+        {1, 2, 8, 125, 13, 5, 2, 0},   {1, 3, 9, 124, 15, 6, 2, 0},
+        {1, 3, 10, 123, 18, 6, 2, 1},  {1, 3, 11, 122, 20, 7, 3, 1},
+        {1, 4, 12, 121, 22, 8, 3, 1},  {1, 4, 13, 120, 25, 9, 3, 1},
+        {1, 4, 14, 118, 28, 9, 3, 1},  {1, 4, 15, 117, 30, 10, 4, 1},
+        {1, 5, 16, 116, 32, 11, 4, 1}, {1, 5, 16, 114, 35, 12, 4, 1},
+        {1, 5, 17, 112, 38, 12, 4, 1}, {1, 5, 18, 111, 40, 13, 5, 1},
+        {1, 5, 18, 109, 43, 14, 5, 1}, {1, 6, 19, 107, 45, 14, 5, 1},
+        {1, 6, 19, 105, 48, 15, 5, 1}, {1, 6, 19, 103, 51, 16, 5, 1},
+        {1, 6, 20, 101, 53, 16, 6, 1}, {1, 6, 20, 99, 56, 17, 6, 1},
+        {1, 6, 20, 97, 58, 17, 6, 1},  {1, 6, 20, 95, 61, 18, 6, 1},
+        {2, 7, 20, 93, 64, 18, 6, 2},  {2, 7, 20, 91, 66, 19, 6, 1},
+        {2, 7, 20, 88, 69, 19, 6, 1},  {2, 7, 20, 86, 71, 19, 6, 1},
+        {2, 7, 20, 84, 74, 20, 7, 2},  {2, 7, 20, 81, 76, 20, 7, 1},
+        {2, 7, 20, 79, 79, 20, 7, 2},  {1, 7, 20, 76, 81, 20, 7, 2},
+        {2, 7, 20, 74, 84, 20, 7, 2},  {1, 6, 19, 71, 86, 20, 7, 2},
+        {1, 6, 19, 69, 88, 20, 7, 2},  {1, 6, 19, 66, 91, 20, 7, 2},
+        {2, 6, 18, 64, 93, 20, 7, 2},  {1, 6, 18, 61, 95, 20, 6, 1},
+        {1, 6, 17, 58, 97, 20, 6, 1},  {1, 6, 17, 56, 99, 20, 6, 1},
+        {1, 6, 16, 53, 101, 20, 6, 1}, {1, 5, 16, 51, 103, 19, 6, 1},
+        {1, 5, 15, 48, 105, 19, 6, 1}, {1, 5, 14, 45, 107, 19, 6, 1},
+        {1, 5, 14, 43, 109, 18, 5, 1}, {1, 5, 13, 40, 111, 18, 5, 1},
+        {1, 4, 12, 38, 112, 17, 5, 1}, {1, 4, 12, 35, 114, 16, 5, 1},
+        {1, 4, 11, 32, 116, 16, 5, 1}, {1, 4, 10, 30, 117, 15, 4, 1},
+        {1, 3, 9, 28, 118, 14, 4, 1},  {1, 3, 9, 25, 120, 13, 4, 1},
+        {1, 3, 8, 22, 121, 12, 4, 1},  {1, 3, 7, 20, 122, 11, 3, 1},
+        {1, 2, 6, 18, 123, 10, 3, 1},  {0, 2, 6, 15, 124, 9, 3, 1},
+        {0, 2, 5, 13, 125, 8, 2, 1},   {0, 1, 4, 11, 125, 7, 2, 0},
+        {0, 1, 3, 8, 126, 6, 2, 0},    {0, 1, 3, 6, 127, 4, 1, 0},
+        {0, 1, 2, 4, 127, 3, 1, 0},    {0, 0, 1, 2, 128, 1, 0, 0},
+};
+
+alignas(8) const int8_t
+    kWarpedFilters8[3 * kWarpedPixelPrecisionShifts + 1][8] = {
+        // [-1, 0).
+        {0, 0, 127, 1, 0, 0, 0, 0},
+        {0, -1, 127, 2, 0, 0, 0, 0},
+        {1, -3, 127, 4, -1, 0, 0, 0},
+        {1, -4, 126, 6, -2, 1, 0, 0},
+        {1, -5, 126, 8, -3, 1, 0, 0},
+        {1, -6, 125, 11, -4, 1, 0, 0},
+        {1, -7, 124, 13, -4, 1, 0, 0},
+        {2, -8, 123, 15, -5, 1, 0, 0},
+        {2, -9, 122, 18, -6, 1, 0, 0},
+        {2, -10, 121, 20, -6, 1, 0, 0},
+        {2, -11, 120, 22, -7, 2, 0, 0},
+        {2, -12, 119, 25, -8, 2, 0, 0},
+        {3, -13, 117, 27, -8, 2, 0, 0},
+        {3, -13, 116, 29, -9, 2, 0, 0},
+        {3, -14, 114, 32, -10, 3, 0, 0},
+        {3, -15, 113, 35, -10, 2, 0, 0},
+        {3, -15, 111, 37, -11, 3, 0, 0},
+        {3, -16, 109, 40, -11, 3, 0, 0},
+        {3, -16, 108, 42, -12, 3, 0, 0},
+        {4, -17, 106, 45, -13, 3, 0, 0},
+        {4, -17, 104, 47, -13, 3, 0, 0},
+        {4, -17, 102, 50, -14, 3, 0, 0},
+        {4, -17, 100, 52, -14, 3, 0, 0},
+        {4, -18, 98, 55, -15, 4, 0, 0},
+        {4, -18, 96, 58, -15, 3, 0, 0},
+        {4, -18, 94, 60, -16, 4, 0, 0},
+        {4, -18, 91, 63, -16, 4, 0, 0},
+        {4, -18, 89, 65, -16, 4, 0, 0},
+        {4, -18, 87, 68, -17, 4, 0, 0},
+        {4, -18, 85, 70, -17, 4, 0, 0},
+        {4, -18, 82, 73, -17, 4, 0, 0},
+        {4, -18, 80, 75, -17, 4, 0, 0},
+        {4, -18, 78, 78, -18, 4, 0, 0},
+        {4, -17, 75, 80, -18, 4, 0, 0},
+        {4, -17, 73, 82, -18, 4, 0, 0},
+        {4, -17, 70, 85, -18, 4, 0, 0},
+        {4, -17, 68, 87, -18, 4, 0, 0},
+        {4, -16, 65, 89, -18, 4, 0, 0},
+        {4, -16, 63, 91, -18, 4, 0, 0},
+        {4, -16, 60, 94, -18, 4, 0, 0},
+        {3, -15, 58, 96, -18, 4, 0, 0},
+        {4, -15, 55, 98, -18, 4, 0, 0},
+        {3, -14, 52, 100, -17, 4, 0, 0},
+        {3, -14, 50, 102, -17, 4, 0, 0},
+        {3, -13, 47, 104, -17, 4, 0, 0},
+        {3, -13, 45, 106, -17, 4, 0, 0},
+        {3, -12, 42, 108, -16, 3, 0, 0},
+        {3, -11, 40, 109, -16, 3, 0, 0},
+        {3, -11, 37, 111, -15, 3, 0, 0},
+        {2, -10, 35, 113, -15, 3, 0, 0},
+        {3, -10, 32, 114, -14, 3, 0, 0},
+        {2, -9, 29, 116, -13, 3, 0, 0},
+        {2, -8, 27, 117, -13, 3, 0, 0},
+        {2, -8, 25, 119, -12, 2, 0, 0},
+        {2, -7, 22, 120, -11, 2, 0, 0},
+        {1, -6, 20, 121, -10, 2, 0, 0},
+        {1, -6, 18, 122, -9, 2, 0, 0},
+        {1, -5, 15, 123, -8, 2, 0, 0},
+        {1, -4, 13, 124, -7, 1, 0, 0},
+        {1, -4, 11, 125, -6, 1, 0, 0},
+        {1, -3, 8, 126, -5, 1, 0, 0},
+        {1, -2, 6, 126, -4, 1, 0, 0},
+        {0, -1, 4, 127, -3, 1, 0, 0},
+        {0, 0, 2, 127, -1, 0, 0, 0},
+        // [0, 1).
+        {0, 0, 0, 127, 1, 0, 0, 0},
+        {0, 0, -1, 127, 2, 0, 0, 0},
+        {0, 1, -3, 127, 4, -2, 1, 0},
+        {0, 1, -5, 127, 6, -2, 1, 0},
+        {0, 2, -6, 126, 8, -3, 1, 0},
+        {-1, 2, -7, 126, 11, -4, 2, -1},
+        {-1, 3, -8, 125, 13, -5, 2, -1},
+        {-1, 3, -10, 124, 16, -6, 3, -1},
+        {-1, 4, -11, 123, 18, -7, 3, -1},
+        {-1, 4, -12, 122, 20, -7, 3, -1},
+        {-1, 4, -13, 121, 23, -8, 3, -1},
+        {-2, 5, -14, 120, 25, -9, 4, -1},
+        {-1, 5, -15, 119, 27, -10, 4, -1},
+        {-1, 5, -16, 118, 30, -11, 4, -1},
+        {-2, 6, -17, 116, 33, -12, 5, -1},
+        {-2, 6, -17, 114, 35, -12, 5, -1},
+        {-2, 6, -18, 113, 38, -13, 5, -1},
+        {-2, 7, -19, 111, 41, -14, 6, -2},
+        {-2, 7, -19, 110, 43, -15, 6, -2},
+        {-2, 7, -20, 108, 46, -15, 6, -2},
+        {-2, 7, -20, 106, 49, -16, 6, -2},
+        {-2, 7, -21, 104, 51, -16, 7, -2},
+        {-2, 7, -21, 102, 54, -17, 7, -2},
+        {-2, 8, -21, 100, 56, -18, 7, -2},
+        {-2, 8, -22, 98, 59, -18, 7, -2},
+        {-2, 8, -22, 96, 62, -19, 7, -2},
+        {-2, 8, -22, 94, 64, -19, 7, -2},
+        {-2, 8, -22, 91, 67, -20, 8, -2},
+        {-2, 8, -22, 89, 69, -20, 8, -2},
+        {-2, 8, -22, 87, 72, -21, 8, -2},
+        {-2, 8, -21, 84, 74, -21, 8, -2},
+        {-2, 8, -22, 82, 77, -21, 8, -2},
+        {-2, 8, -21, 79, 79, -21, 8, -2},
+        {-2, 8, -21, 77, 82, -22, 8, -2},
+        {-2, 8, -21, 74, 84, -21, 8, -2},
+        {-2, 8, -21, 72, 87, -22, 8, -2},
+        {-2, 8, -20, 69, 89, -22, 8, -2},
+        {-2, 8, -20, 67, 91, -22, 8, -2},
+        {-2, 7, -19, 64, 94, -22, 8, -2},
+        {-2, 7, -19, 62, 96, -22, 8, -2},
+        {-2, 7, -18, 59, 98, -22, 8, -2},
+        {-2, 7, -18, 56, 100, -21, 8, -2},
+        {-2, 7, -17, 54, 102, -21, 7, -2},
+        {-2, 7, -16, 51, 104, -21, 7, -2},
+        {-2, 6, -16, 49, 106, -20, 7, -2},
+        {-2, 6, -15, 46, 108, -20, 7, -2},
+        {-2, 6, -15, 43, 110, -19, 7, -2},
+        {-2, 6, -14, 41, 111, -19, 7, -2},
+        {-1, 5, -13, 38, 113, -18, 6, -2},
+        {-1, 5, -12, 35, 114, -17, 6, -2},
+        {-1, 5, -12, 33, 116, -17, 6, -2},
+        {-1, 4, -11, 30, 118, -16, 5, -1},
+        {-1, 4, -10, 27, 119, -15, 5, -1},
+        {-1, 4, -9, 25, 120, -14, 5, -2},
+        {-1, 3, -8, 23, 121, -13, 4, -1},
+        {-1, 3, -7, 20, 122, -12, 4, -1},
+        {-1, 3, -7, 18, 123, -11, 4, -1},
+        {-1, 3, -6, 16, 124, -10, 3, -1},
+        {-1, 2, -5, 13, 125, -8, 3, -1},
+        {-1, 2, -4, 11, 126, -7, 2, -1},
+        {0, 1, -3, 8, 126, -6, 2, 0},
+        {0, 1, -2, 6, 127, -5, 1, 0},
+        {0, 1, -2, 4, 127, -3, 1, 0},
+        {0, 0, 0, 2, 127, -1, 0, 0},
+        // [1, 2).
+        {0, 0, 0, 1, 127, 0, 0, 0},
+        {0, 0, 0, -1, 127, 2, 0, 0},
+        {0, 0, 1, -3, 127, 4, -1, 0},
+        {0, 0, 1, -4, 126, 6, -2, 1},
+        {0, 0, 1, -5, 126, 8, -3, 1},
+        {0, 0, 1, -6, 125, 11, -4, 1},
+        {0, 0, 1, -7, 124, 13, -4, 1},
+        {0, 0, 2, -8, 123, 15, -5, 1},
+        {0, 0, 2, -9, 122, 18, -6, 1},
+        {0, 0, 2, -10, 121, 20, -6, 1},
+        {0, 0, 2, -11, 120, 22, -7, 2},
+        {0, 0, 2, -12, 119, 25, -8, 2},
+        {0, 0, 3, -13, 117, 27, -8, 2},
+        {0, 0, 3, -13, 116, 29, -9, 2},
+        {0, 0, 3, -14, 114, 32, -10, 3},
+        {0, 0, 3, -15, 113, 35, -10, 2},
+        {0, 0, 3, -15, 111, 37, -11, 3},
+        {0, 0, 3, -16, 109, 40, -11, 3},
+        {0, 0, 3, -16, 108, 42, -12, 3},
+        {0, 0, 4, -17, 106, 45, -13, 3},
+        {0, 0, 4, -17, 104, 47, -13, 3},
+        {0, 0, 4, -17, 102, 50, -14, 3},
+        {0, 0, 4, -17, 100, 52, -14, 3},
+        {0, 0, 4, -18, 98, 55, -15, 4},
+        {0, 0, 4, -18, 96, 58, -15, 3},
+        {0, 0, 4, -18, 94, 60, -16, 4},
+        {0, 0, 4, -18, 91, 63, -16, 4},
+        {0, 0, 4, -18, 89, 65, -16, 4},
+        {0, 0, 4, -18, 87, 68, -17, 4},
+        {0, 0, 4, -18, 85, 70, -17, 4},
+        {0, 0, 4, -18, 82, 73, -17, 4},
+        {0, 0, 4, -18, 80, 75, -17, 4},
+        {0, 0, 4, -18, 78, 78, -18, 4},
+        {0, 0, 4, -17, 75, 80, -18, 4},
+        {0, 0, 4, -17, 73, 82, -18, 4},
+        {0, 0, 4, -17, 70, 85, -18, 4},
+        {0, 0, 4, -17, 68, 87, -18, 4},
+        {0, 0, 4, -16, 65, 89, -18, 4},
+        {0, 0, 4, -16, 63, 91, -18, 4},
+        {0, 0, 4, -16, 60, 94, -18, 4},
+        {0, 0, 3, -15, 58, 96, -18, 4},
+        {0, 0, 4, -15, 55, 98, -18, 4},
+        {0, 0, 3, -14, 52, 100, -17, 4},
+        {0, 0, 3, -14, 50, 102, -17, 4},
+        {0, 0, 3, -13, 47, 104, -17, 4},
+        {0, 0, 3, -13, 45, 106, -17, 4},
+        {0, 0, 3, -12, 42, 108, -16, 3},
+        {0, 0, 3, -11, 40, 109, -16, 3},
+        {0, 0, 3, -11, 37, 111, -15, 3},
+        {0, 0, 2, -10, 35, 113, -15, 3},
+        {0, 0, 3, -10, 32, 114, -14, 3},
+        {0, 0, 2, -9, 29, 116, -13, 3},
+        {0, 0, 2, -8, 27, 117, -13, 3},
+        {0, 0, 2, -8, 25, 119, -12, 2},
+        {0, 0, 2, -7, 22, 120, -11, 2},
+        {0, 0, 1, -6, 20, 121, -10, 2},
+        {0, 0, 1, -6, 18, 122, -9, 2},
+        {0, 0, 1, -5, 15, 123, -8, 2},
+        {0, 0, 1, -4, 13, 124, -7, 1},
+        {0, 0, 1, -4, 11, 125, -6, 1},
+        {0, 0, 1, -3, 8, 126, -5, 1},
+        {0, 0, 1, -2, 6, 126, -4, 1},
+        {0, 0, 0, -1, 4, 127, -3, 1},
+        {0, 0, 0, 0, 2, 127, -1, 0},
+        // dummy, replicate row index 191.
+        {0, 0, 0, 0, 2, 127, -1, 0}};
+
+alignas(16) const int16_t
+    kWarpedFilters[3 * kWarpedPixelPrecisionShifts + 1][8] = {
+        // [-1, 0).
+        {0, 0, 127, 1, 0, 0, 0, 0},
+        {0, -1, 127, 2, 0, 0, 0, 0},
+        {1, -3, 127, 4, -1, 0, 0, 0},
+        {1, -4, 126, 6, -2, 1, 0, 0},
+        {1, -5, 126, 8, -3, 1, 0, 0},
+        {1, -6, 125, 11, -4, 1, 0, 0},
+        {1, -7, 124, 13, -4, 1, 0, 0},
+        {2, -8, 123, 15, -5, 1, 0, 0},
+        {2, -9, 122, 18, -6, 1, 0, 0},
+        {2, -10, 121, 20, -6, 1, 0, 0},
+        {2, -11, 120, 22, -7, 2, 0, 0},
+        {2, -12, 119, 25, -8, 2, 0, 0},
+        {3, -13, 117, 27, -8, 2, 0, 0},
+        {3, -13, 116, 29, -9, 2, 0, 0},
+        {3, -14, 114, 32, -10, 3, 0, 0},
+        {3, -15, 113, 35, -10, 2, 0, 0},
+        {3, -15, 111, 37, -11, 3, 0, 0},
+        {3, -16, 109, 40, -11, 3, 0, 0},
+        {3, -16, 108, 42, -12, 3, 0, 0},
+        {4, -17, 106, 45, -13, 3, 0, 0},
+        {4, -17, 104, 47, -13, 3, 0, 0},
+        {4, -17, 102, 50, -14, 3, 0, 0},
+        {4, -17, 100, 52, -14, 3, 0, 0},
+        {4, -18, 98, 55, -15, 4, 0, 0},
+        {4, -18, 96, 58, -15, 3, 0, 0},
+        {4, -18, 94, 60, -16, 4, 0, 0},
+        {4, -18, 91, 63, -16, 4, 0, 0},
+        {4, -18, 89, 65, -16, 4, 0, 0},
+        {4, -18, 87, 68, -17, 4, 0, 0},
+        {4, -18, 85, 70, -17, 4, 0, 0},
+        {4, -18, 82, 73, -17, 4, 0, 0},
+        {4, -18, 80, 75, -17, 4, 0, 0},
+        {4, -18, 78, 78, -18, 4, 0, 0},
+        {4, -17, 75, 80, -18, 4, 0, 0},
+        {4, -17, 73, 82, -18, 4, 0, 0},
+        {4, -17, 70, 85, -18, 4, 0, 0},
+        {4, -17, 68, 87, -18, 4, 0, 0},
+        {4, -16, 65, 89, -18, 4, 0, 0},
+        {4, -16, 63, 91, -18, 4, 0, 0},
+        {4, -16, 60, 94, -18, 4, 0, 0},
+        {3, -15, 58, 96, -18, 4, 0, 0},
+        {4, -15, 55, 98, -18, 4, 0, 0},
+        {3, -14, 52, 100, -17, 4, 0, 0},
+        {3, -14, 50, 102, -17, 4, 0, 0},
+        {3, -13, 47, 104, -17, 4, 0, 0},
+        {3, -13, 45, 106, -17, 4, 0, 0},
+        {3, -12, 42, 108, -16, 3, 0, 0},
+        {3, -11, 40, 109, -16, 3, 0, 0},
+        {3, -11, 37, 111, -15, 3, 0, 0},
+        {2, -10, 35, 113, -15, 3, 0, 0},
+        {3, -10, 32, 114, -14, 3, 0, 0},
+        {2, -9, 29, 116, -13, 3, 0, 0},
+        {2, -8, 27, 117, -13, 3, 0, 0},
+        {2, -8, 25, 119, -12, 2, 0, 0},
+        {2, -7, 22, 120, -11, 2, 0, 0},
+        {1, -6, 20, 121, -10, 2, 0, 0},
+        {1, -6, 18, 122, -9, 2, 0, 0},
+        {1, -5, 15, 123, -8, 2, 0, 0},
+        {1, -4, 13, 124, -7, 1, 0, 0},
+        {1, -4, 11, 125, -6, 1, 0, 0},
+        {1, -3, 8, 126, -5, 1, 0, 0},
+        {1, -2, 6, 126, -4, 1, 0, 0},
+        {0, -1, 4, 127, -3, 1, 0, 0},
+        {0, 0, 2, 127, -1, 0, 0, 0},
+        // [0, 1).
+        {0, 0, 0, 127, 1, 0, 0, 0},
+        {0, 0, -1, 127, 2, 0, 0, 0},
+        {0, 1, -3, 127, 4, -2, 1, 0},
+        {0, 1, -5, 127, 6, -2, 1, 0},
+        {0, 2, -6, 126, 8, -3, 1, 0},
+        {-1, 2, -7, 126, 11, -4, 2, -1},
+        {-1, 3, -8, 125, 13, -5, 2, -1},
+        {-1, 3, -10, 124, 16, -6, 3, -1},
+        {-1, 4, -11, 123, 18, -7, 3, -1},
+        {-1, 4, -12, 122, 20, -7, 3, -1},
+        {-1, 4, -13, 121, 23, -8, 3, -1},
+        {-2, 5, -14, 120, 25, -9, 4, -1},
+        {-1, 5, -15, 119, 27, -10, 4, -1},
+        {-1, 5, -16, 118, 30, -11, 4, -1},
+        {-2, 6, -17, 116, 33, -12, 5, -1},
+        {-2, 6, -17, 114, 35, -12, 5, -1},
+        {-2, 6, -18, 113, 38, -13, 5, -1},
+        {-2, 7, -19, 111, 41, -14, 6, -2},
+        {-2, 7, -19, 110, 43, -15, 6, -2},
+        {-2, 7, -20, 108, 46, -15, 6, -2},
+        {-2, 7, -20, 106, 49, -16, 6, -2},
+        {-2, 7, -21, 104, 51, -16, 7, -2},
+        {-2, 7, -21, 102, 54, -17, 7, -2},
+        {-2, 8, -21, 100, 56, -18, 7, -2},
+        {-2, 8, -22, 98, 59, -18, 7, -2},
+        {-2, 8, -22, 96, 62, -19, 7, -2},
+        {-2, 8, -22, 94, 64, -19, 7, -2},
+        {-2, 8, -22, 91, 67, -20, 8, -2},
+        {-2, 8, -22, 89, 69, -20, 8, -2},
+        {-2, 8, -22, 87, 72, -21, 8, -2},
+        {-2, 8, -21, 84, 74, -21, 8, -2},
+        {-2, 8, -22, 82, 77, -21, 8, -2},
+        {-2, 8, -21, 79, 79, -21, 8, -2},
+        {-2, 8, -21, 77, 82, -22, 8, -2},
+        {-2, 8, -21, 74, 84, -21, 8, -2},
+        {-2, 8, -21, 72, 87, -22, 8, -2},
+        {-2, 8, -20, 69, 89, -22, 8, -2},
+        {-2, 8, -20, 67, 91, -22, 8, -2},
+        {-2, 7, -19, 64, 94, -22, 8, -2},
+        {-2, 7, -19, 62, 96, -22, 8, -2},
+        {-2, 7, -18, 59, 98, -22, 8, -2},
+        {-2, 7, -18, 56, 100, -21, 8, -2},
+        {-2, 7, -17, 54, 102, -21, 7, -2},
+        {-2, 7, -16, 51, 104, -21, 7, -2},
+        {-2, 6, -16, 49, 106, -20, 7, -2},
+        {-2, 6, -15, 46, 108, -20, 7, -2},
+        {-2, 6, -15, 43, 110, -19, 7, -2},
+        {-2, 6, -14, 41, 111, -19, 7, -2},
+        {-1, 5, -13, 38, 113, -18, 6, -2},
+        {-1, 5, -12, 35, 114, -17, 6, -2},
+        {-1, 5, -12, 33, 116, -17, 6, -2},
+        {-1, 4, -11, 30, 118, -16, 5, -1},
+        {-1, 4, -10, 27, 119, -15, 5, -1},
+        {-1, 4, -9, 25, 120, -14, 5, -2},
+        {-1, 3, -8, 23, 121, -13, 4, -1},
+        {-1, 3, -7, 20, 122, -12, 4, -1},
+        {-1, 3, -7, 18, 123, -11, 4, -1},
+        {-1, 3, -6, 16, 124, -10, 3, -1},
+        {-1, 2, -5, 13, 125, -8, 3, -1},
+        {-1, 2, -4, 11, 126, -7, 2, -1},
+        {0, 1, -3, 8, 126, -6, 2, 0},
+        {0, 1, -2, 6, 127, -5, 1, 0},
+        {0, 1, -2, 4, 127, -3, 1, 0},
+        {0, 0, 0, 2, 127, -1, 0, 0},
+        // [1, 2).
+        {0, 0, 0, 1, 127, 0, 0, 0},
+        {0, 0, 0, -1, 127, 2, 0, 0},
+        {0, 0, 1, -3, 127, 4, -1, 0},
+        {0, 0, 1, -4, 126, 6, -2, 1},
+        {0, 0, 1, -5, 126, 8, -3, 1},
+        {0, 0, 1, -6, 125, 11, -4, 1},
+        {0, 0, 1, -7, 124, 13, -4, 1},
+        {0, 0, 2, -8, 123, 15, -5, 1},
+        {0, 0, 2, -9, 122, 18, -6, 1},
+        {0, 0, 2, -10, 121, 20, -6, 1},
+        {0, 0, 2, -11, 120, 22, -7, 2},
+        {0, 0, 2, -12, 119, 25, -8, 2},
+        {0, 0, 3, -13, 117, 27, -8, 2},
+        {0, 0, 3, -13, 116, 29, -9, 2},
+        {0, 0, 3, -14, 114, 32, -10, 3},
+        {0, 0, 3, -15, 113, 35, -10, 2},
+        {0, 0, 3, -15, 111, 37, -11, 3},
+        {0, 0, 3, -16, 109, 40, -11, 3},
+        {0, 0, 3, -16, 108, 42, -12, 3},
+        {0, 0, 4, -17, 106, 45, -13, 3},
+        {0, 0, 4, -17, 104, 47, -13, 3},
+        {0, 0, 4, -17, 102, 50, -14, 3},
+        {0, 0, 4, -17, 100, 52, -14, 3},
+        {0, 0, 4, -18, 98, 55, -15, 4},
+        {0, 0, 4, -18, 96, 58, -15, 3},
+        {0, 0, 4, -18, 94, 60, -16, 4},
+        {0, 0, 4, -18, 91, 63, -16, 4},
+        {0, 0, 4, -18, 89, 65, -16, 4},
+        {0, 0, 4, -18, 87, 68, -17, 4},
+        {0, 0, 4, -18, 85, 70, -17, 4},
+        {0, 0, 4, -18, 82, 73, -17, 4},
+        {0, 0, 4, -18, 80, 75, -17, 4},
+        {0, 0, 4, -18, 78, 78, -18, 4},
+        {0, 0, 4, -17, 75, 80, -18, 4},
+        {0, 0, 4, -17, 73, 82, -18, 4},
+        {0, 0, 4, -17, 70, 85, -18, 4},
+        {0, 0, 4, -17, 68, 87, -18, 4},
+        {0, 0, 4, -16, 65, 89, -18, 4},
+        {0, 0, 4, -16, 63, 91, -18, 4},
+        {0, 0, 4, -16, 60, 94, -18, 4},
+        {0, 0, 3, -15, 58, 96, -18, 4},
+        {0, 0, 4, -15, 55, 98, -18, 4},
+        {0, 0, 3, -14, 52, 100, -17, 4},
+        {0, 0, 3, -14, 50, 102, -17, 4},
+        {0, 0, 3, -13, 47, 104, -17, 4},
+        {0, 0, 3, -13, 45, 106, -17, 4},
+        {0, 0, 3, -12, 42, 108, -16, 3},
+        {0, 0, 3, -11, 40, 109, -16, 3},
+        {0, 0, 3, -11, 37, 111, -15, 3},
+        {0, 0, 2, -10, 35, 113, -15, 3},
+        {0, 0, 3, -10, 32, 114, -14, 3},
+        {0, 0, 2, -9, 29, 116, -13, 3},
+        {0, 0, 2, -8, 27, 117, -13, 3},
+        {0, 0, 2, -8, 25, 119, -12, 2},
+        {0, 0, 2, -7, 22, 120, -11, 2},
+        {0, 0, 1, -6, 20, 121, -10, 2},
+        {0, 0, 1, -6, 18, 122, -9, 2},
+        {0, 0, 1, -5, 15, 123, -8, 2},
+        {0, 0, 1, -4, 13, 124, -7, 1},
+        {0, 0, 1, -4, 11, 125, -6, 1},
+        {0, 0, 1, -3, 8, 126, -5, 1},
+        {0, 0, 1, -2, 6, 126, -4, 1},
+        {0, 0, 0, -1, 4, 127, -3, 1},
+        {0, 0, 0, 0, 2, 127, -1, 0},
+        // dummy, replicate row index 191.
+        {0, 0, 0, 0, 2, 127, -1, 0}};
+
+// Every value in |kSubPixelFilters| is even. Divide by 2 to simplify
+// calculations by reducing the range by 1 bit.
+alignas(8) const int8_t kHalfSubPixelFilters[6][16][8] = {
+    {{0, 0, 0, 64, 0, 0, 0, 0},
+     {0, 1, -3, 63, 4, -1, 0, 0},
+     {0, 1, -5, 61, 9, -2, 0, 0},
+     {0, 1, -6, 58, 14, -4, 1, 0},
+     {0, 1, -7, 55, 19, -5, 1, 0},
+     {0, 1, -7, 51, 24, -6, 1, 0},
+     {0, 1, -8, 47, 29, -6, 1, 0},
+     {0, 1, -7, 42, 33, -6, 1, 0},
+     {0, 1, -7, 38, 38, -7, 1, 0},
+     {0, 1, -6, 33, 42, -7, 1, 0},
+     {0, 1, -6, 29, 47, -8, 1, 0},
+     {0, 1, -6, 24, 51, -7, 1, 0},
+     {0, 1, -5, 19, 55, -7, 1, 0},
+     {0, 1, -4, 14, 58, -6, 1, 0},
+     {0, 0, -2, 9, 61, -5, 1, 0},
+     {0, 0, -1, 4, 63, -3, 1, 0}},
+    {{0, 0, 0, 64, 0, 0, 0, 0},
+     {0, 1, 14, 31, 17, 1, 0, 0},
+     {0, 0, 13, 31, 18, 2, 0, 0},
+     {0, 0, 11, 31, 20, 2, 0, 0},
+     {0, 0, 10, 30, 21, 3, 0, 0},
+     {0, 0, 9, 29, 22, 4, 0, 0},
+     {0, 0, 8, 28, 23, 5, 0, 0},
+     {0, -1, 8, 27, 24, 6, 0, 0},
+     {0, -1, 7, 26, 26, 7, -1, 0},
+     {0, 0, 6, 24, 27, 8, -1, 0},
+     {0, 0, 5, 23, 28, 8, 0, 0},
+     {0, 0, 4, 22, 29, 9, 0, 0},
+     {0, 0, 3, 21, 30, 10, 0, 0},
+     {0, 0, 2, 20, 31, 11, 0, 0},
+     {0, 0, 2, 18, 31, 13, 0, 0},
+     {0, 0, 1, 17, 31, 14, 1, 0}},
+    {{0, 0, 0, 64, 0, 0, 0, 0},
+     {-1, 1, -3, 63, 4, -1, 1, 0},
+     {-1, 3, -6, 62, 8, -3, 2, -1},
+     {-1, 4, -9, 60, 13, -5, 3, -1},
+     {-2, 5, -11, 58, 19, -7, 3, -1},
+     {-2, 5, -11, 54, 24, -9, 4, -1},
+     {-2, 5, -12, 50, 30, -10, 4, -1},
+     {-2, 5, -12, 45, 35, -11, 5, -1},
+     {-2, 6, -12, 40, 40, -12, 6, -2},
+     {-1, 5, -11, 35, 45, -12, 5, -2},
+     {-1, 4, -10, 30, 50, -12, 5, -2},
+     {-1, 4, -9, 24, 54, -11, 5, -2},
+     {-1, 3, -7, 19, 58, -11, 5, -2},
+     {-1, 3, -5, 13, 60, -9, 4, -1},
+     {-1, 2, -3, 8, 62, -6, 3, -1},
+     {0, 1, -1, 4, 63, -3, 1, -1}},
+    {{0, 0, 0, 64, 0, 0, 0, 0},
+     {0, 0, 0, 60, 4, 0, 0, 0},
+     {0, 0, 0, 56, 8, 0, 0, 0},
+     {0, 0, 0, 52, 12, 0, 0, 0},
+     {0, 0, 0, 48, 16, 0, 0, 0},
+     {0, 0, 0, 44, 20, 0, 0, 0},
+     {0, 0, 0, 40, 24, 0, 0, 0},
+     {0, 0, 0, 36, 28, 0, 0, 0},
+     {0, 0, 0, 32, 32, 0, 0, 0},
+     {0, 0, 0, 28, 36, 0, 0, 0},
+     {0, 0, 0, 24, 40, 0, 0, 0},
+     {0, 0, 0, 20, 44, 0, 0, 0},
+     {0, 0, 0, 16, 48, 0, 0, 0},
+     {0, 0, 0, 12, 52, 0, 0, 0},
+     {0, 0, 0, 8, 56, 0, 0, 0},
+     {0, 0, 0, 4, 60, 0, 0, 0}},
+    {{0, 0, 0, 64, 0, 0, 0, 0},
+     {0, 0, -2, 63, 4, -1, 0, 0},
+     {0, 0, -4, 61, 9, -2, 0, 0},
+     {0, 0, -5, 58, 14, -3, 0, 0},
+     {0, 0, -6, 55, 19, -4, 0, 0},
+     {0, 0, -6, 51, 24, -5, 0, 0},
+     {0, 0, -7, 47, 29, -5, 0, 0},
+     {0, 0, -6, 42, 33, -5, 0, 0},
+     {0, 0, -6, 38, 38, -6, 0, 0},
+     {0, 0, -5, 33, 42, -6, 0, 0},
+     {0, 0, -5, 29, 47, -7, 0, 0},
+     {0, 0, -5, 24, 51, -6, 0, 0},
+     {0, 0, -4, 19, 55, -6, 0, 0},
+     {0, 0, -3, 14, 58, -5, 0, 0},
+     {0, 0, -2, 9, 61, -4, 0, 0},
+     {0, 0, -1, 4, 63, -2, 0, 0}},
+    {{0, 0, 0, 64, 0, 0, 0, 0},
+     {0, 0, 15, 31, 17, 1, 0, 0},
+     {0, 0, 13, 31, 18, 2, 0, 0},
+     {0, 0, 11, 31, 20, 2, 0, 0},
+     {0, 0, 10, 30, 21, 3, 0, 0},
+     {0, 0, 9, 29, 22, 4, 0, 0},
+     {0, 0, 8, 28, 23, 5, 0, 0},
+     {0, 0, 7, 27, 24, 6, 0, 0},
+     {0, 0, 6, 26, 26, 6, 0, 0},
+     {0, 0, 6, 24, 27, 7, 0, 0},
+     {0, 0, 5, 23, 28, 8, 0, 0},
+     {0, 0, 4, 22, 29, 9, 0, 0},
+     {0, 0, 3, 21, 30, 10, 0, 0},
+     {0, 0, 2, 20, 31, 11, 0, 0},
+     {0, 0, 2, 18, 31, 13, 0, 0},
+     {0, 0, 1, 17, 31, 15, 0, 0}}};
+
+// Absolute values of |kHalfSubPixelFilters|. Used in situations where we know
+// the pattern of the signs and account for it in other ways.
+const uint8_t kAbsHalfSubPixelFilters[6][16][8] = {
+    {{0, 0, 0, 64, 0, 0, 0, 0},
+     {0, 1, 3, 63, 4, 1, 0, 0},
+     {0, 1, 5, 61, 9, 2, 0, 0},
+     {0, 1, 6, 58, 14, 4, 1, 0},
+     {0, 1, 7, 55, 19, 5, 1, 0},
+     {0, 1, 7, 51, 24, 6, 1, 0},
+     {0, 1, 8, 47, 29, 6, 1, 0},
+     {0, 1, 7, 42, 33, 6, 1, 0},
+     {0, 1, 7, 38, 38, 7, 1, 0},
+     {0, 1, 6, 33, 42, 7, 1, 0},
+     {0, 1, 6, 29, 47, 8, 1, 0},
+     {0, 1, 6, 24, 51, 7, 1, 0},
+     {0, 1, 5, 19, 55, 7, 1, 0},
+     {0, 1, 4, 14, 58, 6, 1, 0},
+     {0, 0, 2, 9, 61, 5, 1, 0},
+     {0, 0, 1, 4, 63, 3, 1, 0}},
+    {{0, 0, 0, 64, 0, 0, 0, 0},
+     {0, 1, 14, 31, 17, 1, 0, 0},
+     {0, 0, 13, 31, 18, 2, 0, 0},
+     {0, 0, 11, 31, 20, 2, 0, 0},
+     {0, 0, 10, 30, 21, 3, 0, 0},
+     {0, 0, 9, 29, 22, 4, 0, 0},
+     {0, 0, 8, 28, 23, 5, 0, 0},
+     {0, 1, 8, 27, 24, 6, 0, 0},
+     {0, 1, 7, 26, 26, 7, 1, 0},
+     {0, 0, 6, 24, 27, 8, 1, 0},
+     {0, 0, 5, 23, 28, 8, 0, 0},
+     {0, 0, 4, 22, 29, 9, 0, 0},
+     {0, 0, 3, 21, 30, 10, 0, 0},
+     {0, 0, 2, 20, 31, 11, 0, 0},
+     {0, 0, 2, 18, 31, 13, 0, 0},
+     {0, 0, 1, 17, 31, 14, 1, 0}},
+    {{0, 0, 0, 64, 0, 0, 0, 0},
+     {1, 1, 3, 63, 4, 1, 1, 0},
+     {1, 3, 6, 62, 8, 3, 2, 1},
+     {1, 4, 9, 60, 13, 5, 3, 1},
+     {2, 5, 11, 58, 19, 7, 3, 1},
+     {2, 5, 11, 54, 24, 9, 4, 1},
+     {2, 5, 12, 50, 30, 10, 4, 1},
+     {2, 5, 12, 45, 35, 11, 5, 1},
+     {2, 6, 12, 40, 40, 12, 6, 2},
+     {1, 5, 11, 35, 45, 12, 5, 2},
+     {1, 4, 10, 30, 50, 12, 5, 2},
+     {1, 4, 9, 24, 54, 11, 5, 2},
+     {1, 3, 7, 19, 58, 11, 5, 2},
+     {1, 3, 5, 13, 60, 9, 4, 1},
+     {1, 2, 3, 8, 62, 6, 3, 1},
+     {0, 1, 1, 4, 63, 3, 1, 1}},
+    {{0, 0, 0, 64, 0, 0, 0, 0},
+     {0, 0, 0, 60, 4, 0, 0, 0},
+     {0, 0, 0, 56, 8, 0, 0, 0},
+     {0, 0, 0, 52, 12, 0, 0, 0},
+     {0, 0, 0, 48, 16, 0, 0, 0},
+     {0, 0, 0, 44, 20, 0, 0, 0},
+     {0, 0, 0, 40, 24, 0, 0, 0},
+     {0, 0, 0, 36, 28, 0, 0, 0},
+     {0, 0, 0, 32, 32, 0, 0, 0},
+     {0, 0, 0, 28, 36, 0, 0, 0},
+     {0, 0, 0, 24, 40, 0, 0, 0},
+     {0, 0, 0, 20, 44, 0, 0, 0},
+     {0, 0, 0, 16, 48, 0, 0, 0},
+     {0, 0, 0, 12, 52, 0, 0, 0},
+     {0, 0, 0, 8, 56, 0, 0, 0},
+     {0, 0, 0, 4, 60, 0, 0, 0}},
+    {{0, 0, 0, 64, 0, 0, 0, 0},
+     {0, 0, 2, 63, 4, 1, 0, 0},
+     {0, 0, 4, 61, 9, 2, 0, 0},
+     {0, 0, 5, 58, 14, 3, 0, 0},
+     {0, 0, 6, 55, 19, 4, 0, 0},
+     {0, 0, 6, 51, 24, 5, 0, 0},
+     {0, 0, 7, 47, 29, 5, 0, 0},
+     {0, 0, 6, 42, 33, 5, 0, 0},
+     {0, 0, 6, 38, 38, 6, 0, 0},
+     {0, 0, 5, 33, 42, 6, 0, 0},
+     {0, 0, 5, 29, 47, 7, 0, 0},
+     {0, 0, 5, 24, 51, 6, 0, 0},
+     {0, 0, 4, 19, 55, 6, 0, 0},
+     {0, 0, 3, 14, 58, 5, 0, 0},
+     {0, 0, 2, 9, 61, 4, 0, 0},
+     {0, 0, 1, 4, 63, 2, 0, 0}},
+    {{0, 0, 0, 64, 0, 0, 0, 0},
+     {0, 0, 15, 31, 17, 1, 0, 0},
+     {0, 0, 13, 31, 18, 2, 0, 0},
+     {0, 0, 11, 31, 20, 2, 0, 0},
+     {0, 0, 10, 30, 21, 3, 0, 0},
+     {0, 0, 9, 29, 22, 4, 0, 0},
+     {0, 0, 8, 28, 23, 5, 0, 0},
+     {0, 0, 7, 27, 24, 6, 0, 0},
+     {0, 0, 6, 26, 26, 6, 0, 0},
+     {0, 0, 6, 24, 27, 7, 0, 0},
+     {0, 0, 5, 23, 28, 8, 0, 0},
+     {0, 0, 4, 22, 29, 9, 0, 0},
+     {0, 0, 3, 21, 30, 10, 0, 0},
+     {0, 0, 2, 20, 31, 11, 0, 0},
+     {0, 0, 2, 18, 31, 13, 0, 0},
+     {0, 0, 1, 17, 31, 15, 0, 0}}};
+
+// 9.3 -- Dr_Intra_Derivative[]
+// This is a more compact version of the table from the spec. angle / 2 - 1 is
+// used as the lookup. Note angle / 3 - 1 would work too, but the calculation
+// becomes more costly.
+const int16_t kDirectionalIntraPredictorDerivative[44] = {
+    //              Approx angle
+    1023, 0,     // 3, ...
+    547,         // 6, ...
+    372,  0, 0,  // 9, ...
+    273,         // 14, ...
+    215,  0,     // 17, ...
+    178,         // 20, ...
+    151,  0,     // 23, ... (113 & 203 are base angles)
+    132,         // 26, ...
+    116,  0,     // 29, ...
+    102,  0,     // 32, ...
+    90,          // 36, ...
+    80,   0,     // 39, ...
+    71,          // 42, ...
+    64,   0,     // 45, ... (45 & 135 are base angles)
+    57,          // 48, ...
+    51,   0,     // 51, ...
+    45,   0,     // 54, ...
+    40,          // 58, ...
+    35,   0,     // 61, ...
+    31,          // 64, ...
+    27,   0,     // 67, ... (67 & 157 are base angles)
+    23,          // 70, ...
+    19,   0,     // 73, ...
+    15,   0,     // 76, ...
+    11,   0,     // 81, ...
+    7,           // 84, ...
+    3,           // 87, ...
+};
+
+const uint8_t kDeblockFilterLevelIndex[kMaxPlanes][kNumLoopFilterTypes] = {
+    {0, 1}, {2, 2}, {3, 3}};
+
+}  // namespace libgav1
diff --git a/src/utils/constants.h b/src/utils/constants.h
new file mode 100644
index 0000000..34cf56d
--- /dev/null
+++ b/src/utils/constants.h
@@ -0,0 +1,744 @@
+/*
+ * Copyright 2019 The libgav1 Authors
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LIBGAV1_SRC_UTILS_CONSTANTS_H_
+#define LIBGAV1_SRC_UTILS_CONSTANTS_H_
+
+#include <cstdint>
+#include <cstdlib>
+
+#include "src/utils/bit_mask_set.h"
+
+namespace libgav1 {
+
+// Returns the number of elements between begin (inclusive) and end (inclusive).
+constexpr int EnumRangeLength(int begin, int end) { return end - begin + 1; }
+
+enum {
+// Maximum number of threads that the library will ever create.
+#if defined(LIBGAV1_MAX_THREADS) && LIBGAV1_MAX_THREADS > 0
+  kMaxThreads = LIBGAV1_MAX_THREADS
+#else
+  kMaxThreads = 128
+#endif
+};  // anonymous enum
+
+enum {
+  kInvalidMvValue = -32768,
+  kCdfMaxProbability = 32768,
+  kBlockWidthCount = 5,
+  kMaxSegments = 8,
+  kMinQuantizer = 0,
+  kMinLossyQuantizer = 1,
+  kMaxQuantizer = 255,
+  // Quantizer matrix is used only when level < 15.
+  kNumQuantizerLevelsForQuantizerMatrix = 15,
+  kFrameLfCount = 4,
+  kMaxLoopFilterValue = 63,
+  kNum4x4In64x64 = 256,
+  kMaxAngleDelta = 3,
+  kDirectionalIntraModes = 8,
+  kMaxSuperBlockSizeLog2 = 7,
+  kMinSuperBlockSizeLog2 = 6,
+  kGlobalMotionReadControl = 3,
+  kSuperResScaleNumerator = 8,
+  kBooleanSymbolCount = 2,
+  kRestorationTypeSymbolCount = 3,
+  kSgrProjParamsBits = 4,
+  kSgrProjPrecisionBits = 7,
+  // Padding on left and right side of a restoration block.
+  // 3 is enough, but padding to 4 is more efficient, and makes the temporary
+  // source buffer 8-pixel aligned.
+  kRestorationHorizontalBorder = 4,
+  // Padding on top and bottom side of a restoration block.
+  kRestorationVerticalBorder = 2,
+  kCdefBorder = 2,             // Padding on each side of a cdef block.
+  kConvolveBorderLeftTop = 3,  // Left/top padding of a convolve block.
+  // Right/bottom padding of a convolve block. This needs to be 4 at minimum,
+  // but was increased to simplify the SIMD loads in
+  // ConvolveCompoundScale2D_NEON() and ConvolveScale2D_NEON().
+  kConvolveBorderRight = 8,
+  kConvolveBorderBottom = 4,
+  kSubPixelTaps = 8,
+  kWienerFilterBits = 7,
+  kWienerFilterTaps = 7,
+  kMaxPaletteSize = 8,
+  kMinPaletteSize = 2,
+  kMaxPaletteSquare = 64,
+  kBorderPixels = 64,
+  // The final blending process for film grain needs room to overwrite and read
+  // with SIMD instructions. The maximum overwrite is 7 pixels, but the border
+  // is required to be a multiple of 32 by YuvBuffer::Realloc, so that
+  // subsampled chroma borders are 16-aligned.
+  kBorderPixelsFilmGrain = 32,
+  // These constants are the minimum left, right, top, and bottom border sizes
+  // in pixels as an extension of the frame boundary. The minimum border sizes
+  // are derived from the following requirements:
+  // - Warp_C() may read up to 13 pixels before or after a row.
+  // - Warp_NEON() may read up to 13 pixels before a row. It may read up to 14
+  //   pixels after a row, but the value of the last read pixel is not used.
+  // - Warp_C() and Warp_NEON() may read up to 13 pixels above the top row and
+  //   13 pixels below the bottom row.
+  kMinLeftBorderPixels = 13,
+  kMinRightBorderPixels = 13,
+  kMinTopBorderPixels = 13,
+  kMinBottomBorderPixels = 13,
+  kWarpedModelPrecisionBits = 16,
+  kMaxRefMvStackSize = 8,
+  kMaxLeastSquaresSamples = 8,
+  kMaxTemporalMvCandidates = 19,
+  // The SIMD implementations of motion vection projection functions always
+  // process 2 or 4 elements together, so we pad the corresponding buffers to
+  // size 20.
+  kMaxTemporalMvCandidatesWithPadding = 20,
+  kMaxSuperBlockSizeInPixels = 128,
+  kMaxScaledSuperBlockSizeInPixels = 128 * 2,
+  kMaxSuperBlockSizeSquareInPixels = 128 * 128,
+  kNum4x4InLoopFilterUnit = 16,
+  kNum4x4InLoopRestorationUnit = 16,
+  kProjectionMvClamp = (1 << 14) - 1,  // == 16383
+  kProjectionMvMaxHorizontalOffset = 8,
+  kCdefUnitSize = 64,
+  kCdefUnitSizeWithBorders = kCdefUnitSize + 2 * kCdefBorder,
+  kRestorationUnitOffset = 8,
+  // Loop restoration's processing unit size is fixed as 64x64.
+  kRestorationUnitHeight = 64,
+  kRestorationUnitWidth = 256,
+  kRestorationUnitHeightWithBorders =
+      kRestorationUnitHeight + 2 * kRestorationVerticalBorder,
+  kRestorationUnitWidthWithBorders =
+      kRestorationUnitWidth + 2 * kRestorationHorizontalBorder,
+  kSuperResFilterBits = 6,
+  kSuperResFilterShifts = 1 << kSuperResFilterBits,
+  kSuperResFilterTaps = 8,
+  kSuperResScaleBits = 14,
+  kSuperResExtraBits = kSuperResScaleBits - kSuperResFilterBits,
+  kSuperResScaleMask = (1 << 14) - 1,
+  kSuperResHorizontalBorder = 4,
+  kSuperResVerticalBorder = 1,
+  // The SIMD implementations of superres calculate up to 15 extra upscaled
+  // pixels which will over-read up to 15 downscaled pixels in the end of each
+  // row. Set the padding to 16 for alignment purposes.
+  kSuperResHorizontalPadding = 16,
+  // TODO(chengchen): consider merging these constants:
+  // kFilterBits, kWienerFilterBits, and kSgrProjPrecisionBits, which are all 7,
+  // They are designed to match AV1 convolution, which increases coeff
+  // values up to 7 bits. We could consider to combine them and use kFilterBits
+  // only.
+  kFilterBits = 7,
+  // Sub pixel is used in AV1 to represent a pixel location that is not at
+  // integer position. Sub pixel is in 1/16 (1 << kSubPixelBits) unit of
+  // integer pixel. Sub pixel values are interpolated using adjacent integer
+  // pixel values. The interpolation is a filtering process.
+  kSubPixelBits = 4,
+  kSubPixelMask = (1 << kSubPixelBits) - 1,
+  // Precision bits when computing inter prediction locations.
+  kScaleSubPixelBits = 10,
+  kWarpParamRoundingBits = 6,
+  // Number of fractional bits of lookup in divisor lookup table.
+  kDivisorLookupBits = 8,
+  // Number of fractional bits of entries in divisor lookup table.
+  kDivisorLookupPrecisionBits = 14,
+  // Number of phases used in warped filtering.
+  kWarpedPixelPrecisionShifts = 1 << 6,
+  kResidualPaddingVertical = 4,
+  kWedgeMaskMasterSize = 64,
+  kMaxFrameDistance = 31,
+  kReferenceFrameScalePrecision = 14,
+  kNumWienerCoefficients = 3,
+  kLoopFilterMaxModeDeltas = 2,
+  kMaxCdefStrengths = 8,
+  kCdefLargeValue = 0x4000,  // Used to indicate where CDEF is not available.
+  kMaxTileColumns = 64,
+  kMaxTileRows = 64,
+  kMaxOperatingPoints = 32,
+  // There can be a maximum of 4 spatial layers and 8 temporal layers.
+  kMaxLayers = 32,
+  // The cache line size should ideally be queried at run time. 64 is a common
+  // cache line size of x86 CPUs. Web searches showed the cache line size of ARM
+  // CPUs is 32 or 64 bytes. So aligning to 64-byte boundary will work for all
+  // CPUs that we care about, even though it is excessive for some ARM
+  // CPUs.
+  //
+  // On Linux, the cache line size can be looked up with the command:
+  //   getconf LEVEL1_DCACHE_LINESIZE
+  kCacheLineSize = 64,
+};  // anonymous enum
+
+enum FrameType : uint8_t {
+  kFrameKey,
+  kFrameInter,
+  kFrameIntraOnly,
+  kFrameSwitch
+};
+
+enum Plane : uint8_t { kPlaneY, kPlaneU, kPlaneV };
+enum : uint8_t { kMaxPlanesMonochrome = kPlaneY + 1, kMaxPlanes = kPlaneV + 1 };
+
+// The plane types, called luma and chroma in the spec.
+enum PlaneType : uint8_t { kPlaneTypeY, kPlaneTypeUV, kNumPlaneTypes };
+
+enum ReferenceFrameType : int8_t {
+  kReferenceFrameNone = -1,
+  kReferenceFrameIntra,
+  kReferenceFrameLast,
+  kReferenceFrameLast2,
+  kReferenceFrameLast3,
+  kReferenceFrameGolden,
+  kReferenceFrameBackward,
+  kReferenceFrameAlternate2,
+  kReferenceFrameAlternate,
+  kNumReferenceFrameTypes,
+  kNumInterReferenceFrameTypes =
+      EnumRangeLength(kReferenceFrameLast, kReferenceFrameAlternate),
+  kNumForwardReferenceTypes =
+      EnumRangeLength(kReferenceFrameLast, kReferenceFrameGolden),
+  kNumBackwardReferenceTypes =
+      EnumRangeLength(kReferenceFrameBackward, kReferenceFrameAlternate)
+};
+
+enum {
+  // Unidirectional compound reference pairs that are signaled explicitly:
+  // {kReferenceFrameLast, kReferenceFrameLast2},
+  // {kReferenceFrameLast, kReferenceFrameLast3},
+  // {kReferenceFrameLast, kReferenceFrameGolden},
+  // {kReferenceFrameBackward, kReferenceFrameAlternate}
+  kExplicitUnidirectionalCompoundReferences = 4,
+  // Other unidirectional compound reference pairs:
+  // {kReferenceFrameLast2, kReferenceFrameLast3},
+  // {kReferenceFrameLast2, kReferenceFrameGolden},
+  // {kReferenceFrameLast3, kReferenceFrameGolden},
+  // {kReferenceFrameBackward, kReferenceFrameAlternate2},
+  // {kReferenceFrameAlternate2, kReferenceFrameAlternate}
+  kUnidirectionalCompoundReferences =
+      kExplicitUnidirectionalCompoundReferences + 5,
+};  // anonymous enum
+
+enum BlockSize : uint8_t {
+  kBlock4x4,
+  kBlock4x8,
+  kBlock4x16,
+  kBlock8x4,
+  kBlock8x8,
+  kBlock8x16,
+  kBlock8x32,
+  kBlock16x4,
+  kBlock16x8,
+  kBlock16x16,
+  kBlock16x32,
+  kBlock16x64,
+  kBlock32x8,
+  kBlock32x16,
+  kBlock32x32,
+  kBlock32x64,
+  kBlock64x16,
+  kBlock64x32,
+  kBlock64x64,
+  kBlock64x128,
+  kBlock128x64,
+  kBlock128x128,
+  kMaxBlockSizes,
+  kBlockInvalid
+};
+
+//  Partition types.  R: Recursive
+//
+//  None          Horizontal    Vertical      Split
+//  +-------+     +-------+     +---+---+     +---+---+
+//  |       |     |       |     |   |   |     | R | R |
+//  |       |     +-------+     |   |   |     +---+---+
+//  |       |     |       |     |   |   |     | R | R |
+//  +-------+     +-------+     +---+---+     +---+---+
+//
+//  Horizontal    Horizontal    Vertical      Vertical
+//  with top      with bottom   with left     with right
+//  split         split         split         split
+//  +---+---+     +-------+     +---+---+     +---+---+
+//  |   |   |     |       |     |   |   |     |   |   |
+//  +---+---+     +---+---+     +---+   |     |   +---+
+//  |       |     |   |   |     |   |   |     |   |   |
+//  +-------+     +---+---+     +---+---+     +---+---+
+//
+//  Horizontal4   Vertical4
+//  +-----+       +-+-+-+
+//  +-----+       | | | |
+//  +-----+       | | | |
+//  +-----+       +-+-+-+
+enum Partition : uint8_t {
+  kPartitionNone,
+  kPartitionHorizontal,
+  kPartitionVertical,
+  kPartitionSplit,
+  kPartitionHorizontalWithTopSplit,
+  kPartitionHorizontalWithBottomSplit,
+  kPartitionVerticalWithLeftSplit,
+  kPartitionVerticalWithRightSplit,
+  kPartitionHorizontal4,
+  kPartitionVertical4
+};
+enum : uint8_t { kMaxPartitionTypes = kPartitionVertical4 + 1 };
+
+enum PredictionMode : uint8_t {
+  // Intra prediction modes.
+  kPredictionModeDc,
+  kPredictionModeVertical,
+  kPredictionModeHorizontal,
+  kPredictionModeD45,
+  kPredictionModeD135,
+  kPredictionModeD113,
+  kPredictionModeD157,
+  kPredictionModeD203,
+  kPredictionModeD67,
+  kPredictionModeSmooth,
+  kPredictionModeSmoothVertical,
+  kPredictionModeSmoothHorizontal,
+  kPredictionModePaeth,
+  kPredictionModeChromaFromLuma,
+  // Single inter prediction modes.
+  kPredictionModeNearestMv,
+  kPredictionModeNearMv,
+  kPredictionModeGlobalMv,
+  kPredictionModeNewMv,
+  // Compound inter prediction modes.
+  kPredictionModeNearestNearestMv,
+  kPredictionModeNearNearMv,
+  kPredictionModeNearestNewMv,
+  kPredictionModeNewNearestMv,
+  kPredictionModeNearNewMv,
+  kPredictionModeNewNearMv,
+  kPredictionModeGlobalGlobalMv,
+  kPredictionModeNewNewMv,
+  kNumPredictionModes,
+  kNumCompoundInterPredictionModes =
+      EnumRangeLength(kPredictionModeNearestNearestMv, kPredictionModeNewNewMv),
+  kIntraPredictionModesY =
+      EnumRangeLength(kPredictionModeDc, kPredictionModePaeth),
+  kIntraPredictionModesUV =
+      EnumRangeLength(kPredictionModeDc, kPredictionModeChromaFromLuma),
+  kPredictionModeInvalid = 255
+};
+
+enum InterIntraMode : uint8_t {
+  kInterIntraModeDc,
+  kInterIntraModeVertical,
+  kInterIntraModeHorizontal,
+  kInterIntraModeSmooth,
+  kNumInterIntraModes
+};
+
+enum MotionMode : uint8_t {
+  kMotionModeSimple,
+  kMotionModeObmc,  // Overlapped block motion compensation.
+  kMotionModeLocalWarp,
+  kNumMotionModes
+};
+
+enum TxMode : uint8_t {
+  kTxModeOnly4x4,
+  kTxModeLargest,
+  kTxModeSelect,
+  kNumTxModes
+};
+
+// These enums are named as kType1Type2 where Type1 is the transform type for
+// the rows and Type2 is the transform type for the columns.
+enum TransformType : uint8_t {
+  kTransformTypeDctDct,
+  kTransformTypeAdstDct,
+  kTransformTypeDctAdst,
+  kTransformTypeAdstAdst,
+  kTransformTypeFlipadstDct,
+  kTransformTypeDctFlipadst,
+  kTransformTypeFlipadstFlipadst,
+  kTransformTypeAdstFlipadst,
+  kTransformTypeFlipadstAdst,
+  kTransformTypeIdentityIdentity,
+  kTransformTypeIdentityDct,
+  kTransformTypeDctIdentity,
+  kTransformTypeIdentityAdst,
+  kTransformTypeAdstIdentity,
+  kTransformTypeIdentityFlipadst,
+  kTransformTypeFlipadstIdentity,
+  kNumTransformTypes
+};
+
+constexpr BitMaskSet kTransformFlipColumnsMask(kTransformTypeFlipadstDct,
+                                               kTransformTypeFlipadstAdst,
+                                               kTransformTypeFlipadstIdentity,
+                                               kTransformTypeFlipadstFlipadst);
+constexpr BitMaskSet kTransformFlipRowsMask(kTransformTypeDctFlipadst,
+                                            kTransformTypeAdstFlipadst,
+                                            kTransformTypeIdentityFlipadst,
+                                            kTransformTypeFlipadstFlipadst);
+
+enum TransformSize : uint8_t {
+  kTransformSize4x4,
+  kTransformSize4x8,
+  kTransformSize4x16,
+  kTransformSize8x4,
+  kTransformSize8x8,
+  kTransformSize8x16,
+  kTransformSize8x32,
+  kTransformSize16x4,
+  kTransformSize16x8,
+  kTransformSize16x16,
+  kTransformSize16x32,
+  kTransformSize16x64,
+  kTransformSize32x8,
+  kTransformSize32x16,
+  kTransformSize32x32,
+  kTransformSize32x64,
+  kTransformSize64x16,
+  kTransformSize64x32,
+  kTransformSize64x64,
+  kNumTransformSizes
+};
+
+enum TransformSet : uint8_t {
+  // DCT Only (1).
+  kTransformSetDctOnly,
+  // 2D-DCT and 2D-ADST without flip (4) + Identity (1) + 1D Horizontal/Vertical
+  // DCT (2) = Total (7).
+  kTransformSetIntra1,
+  // 2D-DCT and 2D-ADST without flip (4) + Identity (1) = Total (5).
+  kTransformSetIntra2,
+  // All transforms = Total (16).
+  kTransformSetInter1,
+  // 2D-DCT and 2D-ADST with flip (9) + Identity (1) + 1D Horizontal/Vertical
+  // DCT (2) = Total (12).
+  kTransformSetInter2,
+  // DCT (1) + Identity (1) = Total (2).
+  kTransformSetInter3,
+  kNumTransformSets
+};
+
+enum TransformClass : uint8_t {
+  kTransformClass2D,
+  kTransformClassHorizontal,
+  kTransformClassVertical,
+  kNumTransformClasses
+};
+
+enum FilterIntraPredictor : uint8_t {
+  kFilterIntraPredictorDc,
+  kFilterIntraPredictorVertical,
+  kFilterIntraPredictorHorizontal,
+  kFilterIntraPredictorD157,
+  kFilterIntraPredictorPaeth,
+  kNumFilterIntraPredictors
+};
+
+enum ObmcDirection : uint8_t {
+  kObmcDirectionVertical,
+  kObmcDirectionHorizontal,
+  kNumObmcDirections
+};
+
+// In AV1 the name of the filter refers to the direction of filter application.
+// Horizontal refers to the column edge and vertical the row edge.
+enum LoopFilterType : uint8_t {
+  kLoopFilterTypeVertical,
+  kLoopFilterTypeHorizontal,
+  kNumLoopFilterTypes
+};
+
+enum LoopFilterTransformSizeId : uint8_t {
+  kLoopFilterTransformSizeId4x4,
+  kLoopFilterTransformSizeId8x8,
+  kLoopFilterTransformSizeId16x16,
+  kNumLoopFilterTransformSizeIds
+};
+
+enum LoopRestorationType : uint8_t {
+  kLoopRestorationTypeNone,
+  kLoopRestorationTypeSwitchable,
+  kLoopRestorationTypeWiener,
+  kLoopRestorationTypeSgrProj,  // self guided projection filter.
+  kNumLoopRestorationTypes
+};
+
+enum CompoundReferenceType : uint8_t {
+  kCompoundReferenceUnidirectional,
+  kCompoundReferenceBidirectional,
+  kNumCompoundReferenceTypes
+};
+
+enum CompoundPredictionType : uint8_t {
+  kCompoundPredictionTypeWedge,
+  kCompoundPredictionTypeDiffWeighted,
+  kCompoundPredictionTypeAverage,
+  kCompoundPredictionTypeIntra,
+  kCompoundPredictionTypeDistance,
+  kNumCompoundPredictionTypes,
+  // Number of compound prediction types that are explicitly signaled in the
+  // bitstream (in the compound_type syntax element).
+  kNumExplicitCompoundPredictionTypes = 2
+};
+
+enum InterpolationFilter : uint8_t {
+  kInterpolationFilterEightTap,
+  kInterpolationFilterEightTapSmooth,
+  kInterpolationFilterEightTapSharp,
+  kInterpolationFilterBilinear,
+  kInterpolationFilterSwitchable,
+  kNumInterpolationFilters,
+  // Number of interpolation filters that can be explicitly signaled in the
+  // compressed headers (when the uncompressed headers allow switchable
+  // interpolation filters) of the bitstream.
+  kNumExplicitInterpolationFilters = EnumRangeLength(
+      kInterpolationFilterEightTap, kInterpolationFilterEightTapSharp)
+};
+
+enum MvJointType : uint8_t {
+  kMvJointTypeZero,
+  kMvJointTypeHorizontalNonZeroVerticalZero,
+  kMvJointTypeHorizontalZeroVerticalNonZero,
+  kMvJointTypeNonZero,
+  kNumMvJointTypes
+};
+
+enum ObuType : int8_t {
+  kObuInvalid = -1,
+  kObuSequenceHeader = 1,
+  kObuTemporalDelimiter = 2,
+  kObuFrameHeader = 3,
+  kObuTileGroup = 4,
+  kObuMetadata = 5,
+  kObuFrame = 6,
+  kObuRedundantFrameHeader = 7,
+  kObuTileList = 8,
+  kObuPadding = 15,
+};
+
+//------------------------------------------------------------------------------
+// ToString()
+//
+// These functions are meant to be used only in debug logging and within tests.
+// They are defined inline to avoid including the strings in the release
+// library when logging is disabled; unreferenced functions will not be added to
+// any object file in that case.
+
+inline const char* ToString(const BlockSize size) {
+  switch (size) {
+    case kBlock4x4:
+      return "kBlock4x4";
+    case kBlock4x8:
+      return "kBlock4x8";
+    case kBlock4x16:
+      return "kBlock4x16";
+    case kBlock8x4:
+      return "kBlock8x4";
+    case kBlock8x8:
+      return "kBlock8x8";
+    case kBlock8x16:
+      return "kBlock8x16";
+    case kBlock8x32:
+      return "kBlock8x32";
+    case kBlock16x4:
+      return "kBlock16x4";
+    case kBlock16x8:
+      return "kBlock16x8";
+    case kBlock16x16:
+      return "kBlock16x16";
+    case kBlock16x32:
+      return "kBlock16x32";
+    case kBlock16x64:
+      return "kBlock16x64";
+    case kBlock32x8:
+      return "kBlock32x8";
+    case kBlock32x16:
+      return "kBlock32x16";
+    case kBlock32x32:
+      return "kBlock32x32";
+    case kBlock32x64:
+      return "kBlock32x64";
+    case kBlock64x16:
+      return "kBlock64x16";
+    case kBlock64x32:
+      return "kBlock64x32";
+    case kBlock64x64:
+      return "kBlock64x64";
+    case kBlock64x128:
+      return "kBlock64x128";
+    case kBlock128x64:
+      return "kBlock128x64";
+    case kBlock128x128:
+      return "kBlock128x128";
+    case kMaxBlockSizes:
+      return "kMaxBlockSizes";
+    case kBlockInvalid:
+      return "kBlockInvalid";
+  }
+  abort();
+}
+
+inline const char* ToString(const InterIntraMode mode) {
+  switch (mode) {
+    case kInterIntraModeDc:
+      return "kInterIntraModeDc";
+    case kInterIntraModeVertical:
+      return "kInterIntraModeVertical";
+    case kInterIntraModeHorizontal:
+      return "kInterIntraModeHorizontal";
+    case kInterIntraModeSmooth:
+      return "kInterIntraModeSmooth";
+    case kNumInterIntraModes:
+      return "kNumInterIntraModes";
+  }
+  abort();
+}
+
+inline const char* ToString(const ObmcDirection direction) {
+  switch (direction) {
+    case kObmcDirectionVertical:
+      return "kObmcDirectionVertical";
+    case kObmcDirectionHorizontal:
+      return "kObmcDirectionHorizontal";
+    case kNumObmcDirections:
+      return "kNumObmcDirections";
+  }
+  abort();
+}
+
+inline const char* ToString(const LoopRestorationType type) {
+  switch (type) {
+    case kLoopRestorationTypeNone:
+      return "kLoopRestorationTypeNone";
+    case kLoopRestorationTypeSwitchable:
+      return "kLoopRestorationTypeSwitchable";
+    case kLoopRestorationTypeWiener:
+      return "kLoopRestorationTypeWiener";
+    case kLoopRestorationTypeSgrProj:
+      return "kLoopRestorationTypeSgrProj";
+    case kNumLoopRestorationTypes:
+      return "kNumLoopRestorationTypes";
+  }
+  abort();
+}
+
+inline const char* ToString(const TransformType type) {
+  switch (type) {
+    case kTransformTypeDctDct:
+      return "kTransformTypeDctDct";
+    case kTransformTypeAdstDct:
+      return "kTransformTypeAdstDct";
+    case kTransformTypeDctAdst:
+      return "kTransformTypeDctAdst";
+    case kTransformTypeAdstAdst:
+      return "kTransformTypeAdstAdst";
+    case kTransformTypeFlipadstDct:
+      return "kTransformTypeFlipadstDct";
+    case kTransformTypeDctFlipadst:
+      return "kTransformTypeDctFlipadst";
+    case kTransformTypeFlipadstFlipadst:
+      return "kTransformTypeFlipadstFlipadst";
+    case kTransformTypeAdstFlipadst:
+      return "kTransformTypeAdstFlipadst";
+    case kTransformTypeFlipadstAdst:
+      return "kTransformTypeFlipadstAdst";
+    case kTransformTypeIdentityIdentity:
+      return "kTransformTypeIdentityIdentity";
+    case kTransformTypeIdentityDct:
+      return "kTransformTypeIdentityDct";
+    case kTransformTypeDctIdentity:
+      return "kTransformTypeDctIdentity";
+    case kTransformTypeIdentityAdst:
+      return "kTransformTypeIdentityAdst";
+    case kTransformTypeAdstIdentity:
+      return "kTransformTypeAdstIdentity";
+    case kTransformTypeIdentityFlipadst:
+      return "kTransformTypeIdentityFlipadst";
+    case kTransformTypeFlipadstIdentity:
+      return "kTransformTypeFlipadstIdentity";
+    // case to quiet compiler
+    case kNumTransformTypes:
+      return "kNumTransformTypes";
+  }
+  abort();
+}
+
+//------------------------------------------------------------------------------
+
+extern const uint8_t k4x4WidthLog2[kMaxBlockSizes];
+
+extern const uint8_t k4x4HeightLog2[kMaxBlockSizes];
+
+extern const uint8_t kNum4x4BlocksWide[kMaxBlockSizes];
+
+extern const uint8_t kNum4x4BlocksHigh[kMaxBlockSizes];
+
+extern const uint8_t kBlockWidthPixels[kMaxBlockSizes];
+
+extern const uint8_t kBlockHeightPixels[kMaxBlockSizes];
+
+extern const BlockSize kSubSize[kMaxPartitionTypes][kMaxBlockSizes];
+
+extern const BlockSize kPlaneResidualSize[kMaxBlockSizes][2][2];
+
+extern const int16_t kProjectionMvDivisionLookup[kMaxFrameDistance + 1];
+
+extern const uint8_t kTransformWidth[kNumTransformSizes];
+
+extern const uint8_t kTransformHeight[kNumTransformSizes];
+
+extern const uint8_t kTransformWidth4x4[kNumTransformSizes];
+
+extern const uint8_t kTransformHeight4x4[kNumTransformSizes];
+
+extern const uint8_t kTransformWidthLog2[kNumTransformSizes];
+
+extern const uint8_t kTransformHeightLog2[kNumTransformSizes];
+
+extern const TransformSize kSplitTransformSize[kNumTransformSizes];
+
+// Square transform of size min(w,h).
+extern const TransformSize kTransformSizeSquareMin[kNumTransformSizes];
+
+// Square transform of size max(w,h).
+extern const TransformSize kTransformSizeSquareMax[kNumTransformSizes];
+
+extern const uint8_t kNumTransformTypesInSet[kNumTransformSets];
+
+extern const uint8_t kSgrProjParams[1 << kSgrProjParamsBits][4];
+
+extern const int8_t kSgrProjMultiplierMin[2];
+
+extern const int8_t kSgrProjMultiplierMax[2];
+
+extern const int8_t kWienerTapsMin[3];
+
+extern const int8_t kWienerTapsMax[3];
+
+extern const uint8_t kUpscaleFilterUnsigned[kSuperResFilterShifts]
+                                           [kSuperResFilterTaps];
+
+// An int8_t version of the kWarpedFilters array.
+// Note: The array could be removed with a performance penalty.
+extern const int8_t kWarpedFilters8[3 * kWarpedPixelPrecisionShifts + 1][8];
+
+extern const int16_t kWarpedFilters[3 * kWarpedPixelPrecisionShifts + 1][8];
+
+extern const int8_t kHalfSubPixelFilters[6][16][8];
+
+extern const uint8_t kAbsHalfSubPixelFilters[6][16][8];
+
+extern const int16_t kDirectionalIntraPredictorDerivative[44];
+
+extern const uint8_t kDeblockFilterLevelIndex[kMaxPlanes][kNumLoopFilterTypes];
+
+}  // namespace libgav1
+
+#endif  // LIBGAV1_SRC_UTILS_CONSTANTS_H_
diff --git a/src/utils/cpu.cc b/src/utils/cpu.cc
new file mode 100644
index 0000000..a6b7057
--- /dev/null
+++ b/src/utils/cpu.cc
@@ -0,0 +1,84 @@
+// Copyright 2019 The libgav1 Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "src/utils/cpu.h"
+
+#if defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__))
+#include <cpuid.h>
+#elif defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_X64))
+#include <immintrin.h>  // _xgetbv
+#include <intrin.h>
+#endif
+
+namespace libgav1 {
+
+#if defined(__i386__) || defined(__x86_64__) || defined(_M_IX86) || \
+    defined(_M_X64)
+namespace {
+
+#if defined(__GNUC__)
+void CpuId(int leaf, uint32_t info[4]) {
+  __cpuid_count(leaf, 0 /*ecx=subleaf*/, info[0], info[1], info[2], info[3]);
+}
+
+uint64_t Xgetbv() {
+  const uint32_t ecx = 0;  // ecx specifies the extended control register
+  uint32_t eax;
+  uint32_t edx;
+  __asm__ volatile("xgetbv" : "=a"(eax), "=d"(edx) : "c"(ecx));
+  return (static_cast<uint64_t>(edx) << 32) | eax;
+}
+#else  // _MSC_VER
+void CpuId(int leaf, uint32_t info[4]) {
+  __cpuidex(reinterpret_cast<int*>(info), leaf, 0 /*ecx=subleaf*/);
+}
+
+uint64_t Xgetbv() { return _xgetbv(0); }
+#endif  // __GNUC__
+
+}  // namespace
+
+uint32_t GetCpuInfo() {
+  uint32_t info[4];
+
+  // Get the highest feature value cpuid supports
+  CpuId(0, info);
+  const int max_cpuid_value = info[0];
+  if (max_cpuid_value < 1) return 0;
+
+  CpuId(1, info);
+  uint32_t features = 0;
+  if ((info[3] & (1 << 26)) != 0) features |= kSSE2;
+  if ((info[2] & (1 << 9)) != 0) features |= kSSSE3;
+  if ((info[2] & (1 << 19)) != 0) features |= kSSE4_1;
+
+  // Bits 27 (OSXSAVE) & 28 (256-bit AVX)
+  if ((info[2] & (3 << 27)) == (3 << 27)) {
+    // XMM state and YMM state enabled by the OS
+    if ((Xgetbv() & 0x6) == 0x6) {
+      features |= kAVX;
+      if (max_cpuid_value >= 7) {
+        CpuId(7, info);
+        if ((info[1] & (1 << 5)) != 0) features |= kAVX2;
+      }
+    }
+  }
+
+  return features;
+}
+#else
+uint32_t GetCpuInfo() { return 0; }
+#endif  // x86 || x86_64
+
+}  // namespace libgav1
diff --git a/src/utils/cpu.h b/src/utils/cpu.h
new file mode 100644
index 0000000..630b251
--- /dev/null
+++ b/src/utils/cpu.h
@@ -0,0 +1,107 @@
+/*
+ * Copyright 2019 The libgav1 Authors
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LIBGAV1_SRC_UTILS_CPU_H_
+#define LIBGAV1_SRC_UTILS_CPU_H_
+
+#include <cstdint>
+
+namespace libgav1 {
+
+#if defined(__i386__) || defined(__x86_64__)
+#define LIBGAV1_X86
+#elif defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_X64))
+#define LIBGAV1_X86
+#define LIBGAV1_X86_MSVC
+#endif
+
+#if defined(LIBGAV1_X86)
+
+#if !defined(LIBGAV1_ENABLE_SSE4_1)
+#define LIBGAV1_ENABLE_SSE4_1 1
+#endif
+
+#if LIBGAV1_ENABLE_SSE4_1
+#if !defined(LIBGAV1_ENABLE_AVX2)
+#define LIBGAV1_ENABLE_AVX2 1
+#endif  // !defined(LIBGAV1_ENABLE_AVX2)
+#else  // !LIBGAV1_ENABLE_SSE4_1
+// Disable AVX2 when SSE4.1 is disabled as it may rely on shared components.
+#undef LIBGAV1_ENABLE_AVX2
+#define LIBGAV1_ENABLE_AVX2 0
+#endif  // LIBGAV1_ENABLE_SSE4_1
+
+#else  // !LIBGAV1_X86
+
+#undef LIBGAV1_ENABLE_AVX2
+#define LIBGAV1_ENABLE_AVX2 0
+#undef LIBGAV1_ENABLE_SSE4_1
+#define LIBGAV1_ENABLE_SSE4_1 0
+
+#endif  // LIBGAV1_X86
+
+// For x86 LIBGAV1_TARGETING_* indicate the source being built is targeting
+// (at least) that instruction set. This prevents disabling other instruction
+// sets if the current instruction set isn't a global target, e.g., building
+// *_avx2.cc w/-mavx2, but the remaining files without the flag.
+#if LIBGAV1_ENABLE_AVX2 && defined(__AVX2__)
+#define LIBGAV1_TARGETING_AVX2 1
+#else
+#define LIBGAV1_TARGETING_AVX2 0
+#endif
+
+// Note: LIBGAV1_X86_MSVC isn't completely correct for Visual Studio, but there
+// is no equivalent to __SSE4_1__. LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS will be
+// enabled in dsp.h to compensate for this.
+#if LIBGAV1_ENABLE_SSE4_1 && (defined(__SSE4_1__) || defined(LIBGAV1_X86_MSVC))
+#define LIBGAV1_TARGETING_SSE4_1 1
+#else
+#define LIBGAV1_TARGETING_SSE4_1 0
+#endif
+
+#undef LIBGAV1_X86
+
+#if !defined(LIBGAV1_ENABLE_NEON)
+// TODO(jzern): add support for _M_ARM64.
+#if defined(__ARM_NEON__) || defined(__aarch64__) || \
+    (defined(_MSC_VER) && defined(_M_ARM))
+#define LIBGAV1_ENABLE_NEON 1
+#else
+#define LIBGAV1_ENABLE_NEON 0
+#endif
+#endif  // !defined(LIBGAV1_ENABLE_NEON)
+
+enum CpuFeatures : uint8_t {
+  kSSE2 = 1 << 0,
+#define LIBGAV1_CPU_SSE2 (1 << 0)
+  kSSSE3 = 1 << 1,
+#define LIBGAV1_CPU_SSSE3 (1 << 1)
+  kSSE4_1 = 1 << 2,
+#define LIBGAV1_CPU_SSE4_1 (1 << 2)
+  kAVX = 1 << 3,
+#define LIBGAV1_CPU_AVX (1 << 3)
+  kAVX2 = 1 << 4,
+#define LIBGAV1_CPU_AVX2 (1 << 4)
+  kNEON = 1 << 5,
+#define LIBGAV1_CPU_NEON (1 << 5)
+};
+
+// Returns a bit-wise OR of CpuFeatures supported by this platform.
+uint32_t GetCpuInfo();
+
+}  // namespace libgav1
+
+#endif  // LIBGAV1_SRC_UTILS_CPU_H_
diff --git a/src/utils/dynamic_buffer.h b/src/utils/dynamic_buffer.h
new file mode 100644
index 0000000..b51345a
--- /dev/null
+++ b/src/utils/dynamic_buffer.h
@@ -0,0 +1,82 @@
+/*
+ * Copyright 2020 The libgav1 Authors
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LIBGAV1_SRC_UTILS_DYNAMIC_BUFFER_H_
+#define LIBGAV1_SRC_UTILS_DYNAMIC_BUFFER_H_
+
+#include <memory>
+#include <new>
+
+#include "src/utils/memory.h"
+
+namespace libgav1 {
+
+template <typename T>
+class DynamicBuffer {
+ public:
+  T* get() { return buffer_.get(); }
+  const T* get() const { return buffer_.get(); }
+
+  // Resizes the buffer so that it can hold at least |size| elements. Existing
+  // contents will be destroyed when resizing to a larger size.
+  //
+  // Returns true on success. If Resize() returns false, then subsequent calls
+  // to get() will return nullptr.
+  bool Resize(size_t size) {
+    if (size <= size_) return true;
+    buffer_.reset(new (std::nothrow) T[size]);
+    if (buffer_ == nullptr) {
+      size_ = 0;
+      return false;
+    }
+    size_ = size;
+    return true;
+  }
+
+ private:
+  std::unique_ptr<T[]> buffer_;
+  size_t size_ = 0;
+};
+
+template <typename T, int alignment>
+class AlignedDynamicBuffer {
+ public:
+  T* get() { return buffer_.get(); }
+
+  // Resizes the buffer so that it can hold at least |size| elements. Existing
+  // contents will be destroyed when resizing to a larger size.
+  //
+  // Returns true on success. If Resize() returns false, then subsequent calls
+  // to get() will return nullptr.
+  bool Resize(size_t size) {
+    if (size <= size_) return true;
+    buffer_ = MakeAlignedUniquePtr<T>(alignment, size);
+    if (buffer_ == nullptr) {
+      size_ = 0;
+      return false;
+    }
+    size_ = size;
+    return true;
+  }
+
+ private:
+  AlignedUniquePtr<T> buffer_;
+  size_t size_ = 0;
+};
+
+}  // namespace libgav1
+
+#endif  // LIBGAV1_SRC_UTILS_DYNAMIC_BUFFER_H_
diff --git a/src/utils/entropy_decoder.cc b/src/utils/entropy_decoder.cc
new file mode 100644
index 0000000..bf21199
--- /dev/null
+++ b/src/utils/entropy_decoder.cc
@@ -0,0 +1,1117 @@
+// Copyright 2019 The libgav1 Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "src/utils/entropy_decoder.h"
+
+#include <cassert>
+#include <cstring>
+
+#include "src/utils/common.h"
+#include "src/utils/compiler_attributes.h"
+#include "src/utils/constants.h"
+#include "src/utils/cpu.h"
+
+#if defined(__ARM_NEON__) || defined(__aarch64__) || \
+    (defined(_MSC_VER) && defined(_M_ARM))
+#define LIBGAV1_ENTROPY_DECODER_ENABLE_NEON 1
+#else
+#define LIBGAV1_ENTROPY_DECODER_ENABLE_NEON 0
+#endif
+
+#if LIBGAV1_ENTROPY_DECODER_ENABLE_NEON
+#include <arm_neon.h>
+#endif
+
+#if defined(__SSE2__) || defined(LIBGAV1_X86_MSVC)
+#define LIBGAV1_ENTROPY_DECODER_ENABLE_SSE2 1
+#else
+#define LIBGAV1_ENTROPY_DECODER_ENABLE_SSE2 0
+#endif
+
+#if LIBGAV1_ENTROPY_DECODER_ENABLE_SSE2
+#include <emmintrin.h>
+#endif
+
+namespace libgav1 {
+namespace {
+
+constexpr uint32_t kReadBitMask = ~255;
+constexpr int kCdfPrecision = 6;
+constexpr int kMinimumProbabilityPerSymbol = 4;
+
+// This function computes the "cur" variable as specified inside the do-while
+// loop in Section 8.2.6 of the spec. This function is monotonically
+// decreasing as the values of index increases (note that the |cdf| array is
+// sorted in decreasing order).
+uint32_t ScaleCdf(uint32_t values_in_range_shifted, const uint16_t* const cdf,
+                  int index, int symbol_count) {
+  return ((values_in_range_shifted * (cdf[index] >> kCdfPrecision)) >> 1) +
+         (kMinimumProbabilityPerSymbol * (symbol_count - index));
+}
+
+void UpdateCdf(uint16_t* const cdf, const int symbol_count, const int symbol) {
+  const uint16_t count = cdf[symbol_count];
+  // rate is computed in the spec as:
+  //  3 + ( cdf[N] > 15 ) + ( cdf[N] > 31 ) + Min(FloorLog2(N), 2)
+  // In this case cdf[N] is |count|.
+  // Min(FloorLog2(N), 2) is 1 for symbol_count == {2, 3} and 2 for all
+  // symbol_count > 3. So the equation becomes:
+  //  4 + (count > 15) + (count > 31) + (symbol_count > 3).
+  // Note that the largest value for count is 32 (it is not incremented beyond
+  // 32). So using that information:
+  //  count >> 4 is 0 for count from 0 to 15.
+  //  count >> 4 is 1 for count from 16 to 31.
+  //  count >> 4 is 2 for count == 31.
+  // Now, the equation becomes:
+  //  4 + (count >> 4) + (symbol_count > 3).
+  // Since (count >> 4) can only be 0 or 1 or 2, the addition could be replaced
+  // with bitwise or:
+  //  (4 | (count >> 4)) + (symbol_count > 3).
+  // but using addition will allow the compiler to eliminate an operation when
+  // symbol_count is known and this function is inlined.
+  const int rate = (count >> 4) + 4 + static_cast<int>(symbol_count > 3);
+  // Hints for further optimizations:
+  //
+  // 1. clang can vectorize this for loop with width 4, even though the loop
+  // contains an if-else statement. Therefore, it may be advantageous to use
+  // "i < symbol_count" as the loop condition when symbol_count is 8, 12, or 16
+  // (a multiple of 4 that's not too small).
+  //
+  // 2. The for loop can be rewritten in the following form, which would enable
+  // clang to vectorize the loop with width 8:
+  //
+  //   const int rounding = (1 << rate) - 1;
+  //   for (int i = 0; i < symbol_count - 1; ++i) {
+  //     const uint16_t a = (i < symbol) ? kCdfMaxProbability : rounding;
+  //     cdf[i] += static_cast<int16_t>(a - cdf[i]) >> rate;
+  //   }
+  //
+  // The subtraction (a - cdf[i]) relies on the overflow semantics of unsigned
+  // integer arithmetic. The result of the unsigned subtraction is cast to a
+  // signed integer and right-shifted. This requires the right shift of a
+  // signed integer be an arithmetic shift, which is true for clang, gcc, and
+  // Visual C++.
+  assert(symbol_count - 1 > 0);
+  int i = 0;
+  do {
+    if (i < symbol) {
+      cdf[i] += (kCdfMaxProbability - cdf[i]) >> rate;
+    } else {
+      cdf[i] -= cdf[i] >> rate;
+    }
+  } while (++i < symbol_count - 1);
+  cdf[symbol_count] += static_cast<uint16_t>(count < 32);
+}
+
+// Define the UpdateCdfN functions. UpdateCdfN is a specialized implementation
+// of UpdateCdf based on the fact that symbol_count == N. UpdateCdfN uses the
+// SIMD instruction sets if available.
+
+#if LIBGAV1_ENTROPY_DECODER_ENABLE_NEON
+
+// The UpdateCdf() method contains the following for loop:
+//
+//   for (int i = 0; i < symbol_count - 1; ++i) {
+//     if (i < symbol) {
+//       cdf[i] += (kCdfMaxProbability - cdf[i]) >> rate;
+//     } else {
+//       cdf[i] -= cdf[i] >> rate;
+//     }
+//   }
+//
+// It can be rewritten in the following two forms, which are amenable to SIMD
+// implementations:
+//
+//   const int rounding = (1 << rate) - 1;
+//   for (int i = 0; i < symbol_count - 1; ++i) {
+//     const uint16_t a = (i < symbol) ? kCdfMaxProbability : rounding;
+//     cdf[i] += static_cast<int16_t>(a - cdf[i]) >> rate;
+//   }
+//
+// or:
+//
+//   const int rounding = (1 << rate) - 1;
+//   for (int i = 0; i < symbol_count - 1; ++i) {
+//     const uint16_t a = (i < symbol) ? (kCdfMaxProbability - rounding) : 0;
+//     cdf[i] -= static_cast<int16_t>(cdf[i] - a) >> rate;
+//   }
+//
+// The following ARM NEON implementations use a modified version of the first
+// form, using the comparison mask and unsigned rollover to avoid the need to
+// calculate rounding.
+//
+// The cdf array has symbol_count + 1 elements. The first symbol_count elements
+// are the CDF. The last element is a count that is initialized to 0 and may
+// grow up to 32. The for loop in UpdateCdf updates the CDF in the array. Since
+// cdf[symbol_count - 1] is always 0, the for loop does not update
+// cdf[symbol_count - 1]. However, it would be correct to have the for loop
+// update cdf[symbol_count - 1] anyway: since symbol_count - 1 >= symbol, the
+// for loop would take the else branch when i is symbol_count - 1:
+//      cdf[i] -= cdf[i] >> rate;
+// Since cdf[symbol_count - 1] is 0, cdf[symbol_count - 1] would still be 0
+// after the update. The ARM NEON implementations take advantage of this in the
+// following two cases:
+// 1. When symbol_count is 8 or 16, the vectorized code updates the first
+//    symbol_count elements in the array.
+// 2. When symbol_count is 7, the vectorized code updates all the 8 elements in
+//    the cdf array. Since an invalid CDF value is written into cdf[7], the
+//    count in cdf[7] needs to be fixed up after the vectorized code.
+
+void UpdateCdf5(uint16_t* const cdf, const int symbol) {
+  uint16x4_t cdf_vec = vld1_u16(cdf);
+  const uint16_t count = cdf[5];
+  const int rate = (count >> 4) + 5;
+  const uint16x4_t cdf_max_probability = vdup_n_u16(kCdfMaxProbability);
+  const uint16x4_t index = vcreate_u16(0x0003000200010000);
+  const uint16x4_t symbol_vec = vdup_n_u16(symbol);
+  const uint16x4_t mask = vcge_u16(index, symbol_vec);
+  // i < symbol: 32768, i >= symbol: 65535.
+  const uint16x4_t a = vorr_u16(mask, cdf_max_probability);
+  // i < symbol: 32768 - cdf, i >= symbol: 65535 - cdf.
+  const int16x4_t diff = vreinterpret_s16_u16(vsub_u16(a, cdf_vec));
+  // i < symbol: cdf - 0, i >= symbol: cdf - 65535.
+  const uint16x4_t cdf_offset = vsub_u16(cdf_vec, mask);
+  const int16x4_t negative_rate = vdup_n_s16(-rate);
+  // i < symbol: (32768 - cdf) >> rate, i >= symbol: (65535 (-1) - cdf) >> rate.
+  const uint16x4_t delta = vreinterpret_u16_s16(vshl_s16(diff, negative_rate));
+  // i < symbol: (cdf - 0) + ((32768 - cdf) >> rate).
+  // i >= symbol: (cdf - 65535) + ((65535 - cdf) >> rate).
+  cdf_vec = vadd_u16(cdf_offset, delta);
+  vst1_u16(cdf, cdf_vec);
+  cdf[5] = count + static_cast<uint16_t>(count < 32);
+}
+
+// This version works for |symbol_count| = 7, 8, or 9.
+// See UpdateCdf5 for implementation details.
+template <int symbol_count>
+void UpdateCdf7To9(uint16_t* const cdf, const int symbol) {
+  static_assert(symbol_count >= 7 && symbol_count <= 9, "");
+  uint16x8_t cdf_vec = vld1q_u16(cdf);
+  const uint16_t count = cdf[symbol_count];
+  const int rate = (count >> 4) + 5;
+  const uint16x8_t cdf_max_probability = vdupq_n_u16(kCdfMaxProbability);
+  const uint16x8_t index = vcombine_u16(vcreate_u16(0x0003000200010000),
+                                        vcreate_u16(0x0007000600050004));
+  const uint16x8_t symbol_vec = vdupq_n_u16(symbol);
+  const uint16x8_t mask = vcgeq_u16(index, symbol_vec);
+  const uint16x8_t a = vorrq_u16(mask, cdf_max_probability);
+  const int16x8_t diff = vreinterpretq_s16_u16(vsubq_u16(a, cdf_vec));
+  const uint16x8_t cdf_offset = vsubq_u16(cdf_vec, mask);
+  const int16x8_t negative_rate = vdupq_n_s16(-rate);
+  const uint16x8_t delta =
+      vreinterpretq_u16_s16(vshlq_s16(diff, negative_rate));
+  cdf_vec = vaddq_u16(cdf_offset, delta);
+  vst1q_u16(cdf, cdf_vec);
+  cdf[symbol_count] = count + static_cast<uint16_t>(count < 32);
+}
+
+void UpdateCdf7(uint16_t* const cdf, const int symbol) {
+  UpdateCdf7To9<7>(cdf, symbol);
+}
+
+void UpdateCdf8(uint16_t* const cdf, const int symbol) {
+  UpdateCdf7To9<8>(cdf, symbol);
+}
+
+void UpdateCdf9(uint16_t* const cdf, const int symbol) {
+  UpdateCdf7To9<9>(cdf, symbol);
+}
+
+// See UpdateCdf5 for implementation details.
+void UpdateCdf11(uint16_t* const cdf, const int symbol) {
+  uint16x8_t cdf_vec = vld1q_u16(cdf + 2);
+  const uint16_t count = cdf[11];
+  cdf[11] = count + static_cast<uint16_t>(count < 32);
+  const int rate = (count >> 4) + 5;
+  if (symbol > 1) {
+    cdf[0] += (kCdfMaxProbability - cdf[0]) >> rate;
+    cdf[1] += (kCdfMaxProbability - cdf[1]) >> rate;
+    const uint16x8_t cdf_max_probability = vdupq_n_u16(kCdfMaxProbability);
+    const uint16x8_t symbol_vec = vdupq_n_u16(symbol);
+    const int16x8_t negative_rate = vdupq_n_s16(-rate);
+    const uint16x8_t index = vcombine_u16(vcreate_u16(0x0005000400030002),
+                                          vcreate_u16(0x0009000800070006));
+    const uint16x8_t mask = vcgeq_u16(index, symbol_vec);
+    const uint16x8_t a = vorrq_u16(mask, cdf_max_probability);
+    const int16x8_t diff = vreinterpretq_s16_u16(vsubq_u16(a, cdf_vec));
+    const uint16x8_t cdf_offset = vsubq_u16(cdf_vec, mask);
+    const uint16x8_t delta =
+        vreinterpretq_u16_s16(vshlq_s16(diff, negative_rate));
+    cdf_vec = vaddq_u16(cdf_offset, delta);
+    vst1q_u16(cdf + 2, cdf_vec);
+  } else {
+    if (symbol != 0) {
+      cdf[0] += (kCdfMaxProbability - cdf[0]) >> rate;
+      cdf[1] -= cdf[1] >> rate;
+    } else {
+      cdf[0] -= cdf[0] >> rate;
+      cdf[1] -= cdf[1] >> rate;
+    }
+    const int16x8_t negative_rate = vdupq_n_s16(-rate);
+    const uint16x8_t delta = vshlq_u16(cdf_vec, negative_rate);
+    cdf_vec = vsubq_u16(cdf_vec, delta);
+    vst1q_u16(cdf + 2, cdf_vec);
+  }
+}
+
+// See UpdateCdf5 for implementation details.
+void UpdateCdf13(uint16_t* const cdf, const int symbol) {
+  uint16x8_t cdf_vec0 = vld1q_u16(cdf);
+  uint16x8_t cdf_vec1 = vld1q_u16(cdf + 4);
+  const uint16_t count = cdf[13];
+  const int rate = (count >> 4) + 5;
+  const uint16x8_t cdf_max_probability = vdupq_n_u16(kCdfMaxProbability);
+  const uint16x8_t symbol_vec = vdupq_n_u16(symbol);
+  const int16x8_t negative_rate = vdupq_n_s16(-rate);
+
+  uint16x8_t index = vcombine_u16(vcreate_u16(0x0003000200010000),
+                                  vcreate_u16(0x0007000600050004));
+  uint16x8_t mask = vcgeq_u16(index, symbol_vec);
+  uint16x8_t a = vorrq_u16(mask, cdf_max_probability);
+  int16x8_t diff = vreinterpretq_s16_u16(vsubq_u16(a, cdf_vec0));
+  uint16x8_t cdf_offset = vsubq_u16(cdf_vec0, mask);
+  uint16x8_t delta = vreinterpretq_u16_s16(vshlq_s16(diff, negative_rate));
+  cdf_vec0 = vaddq_u16(cdf_offset, delta);
+  vst1q_u16(cdf, cdf_vec0);
+
+  index = vcombine_u16(vcreate_u16(0x0007000600050004),
+                       vcreate_u16(0x000b000a00090008));
+  mask = vcgeq_u16(index, symbol_vec);
+  a = vorrq_u16(mask, cdf_max_probability);
+  diff = vreinterpretq_s16_u16(vsubq_u16(a, cdf_vec1));
+  cdf_offset = vsubq_u16(cdf_vec1, mask);
+  delta = vreinterpretq_u16_s16(vshlq_s16(diff, negative_rate));
+  cdf_vec1 = vaddq_u16(cdf_offset, delta);
+  vst1q_u16(cdf + 4, cdf_vec1);
+
+  cdf[13] = count + static_cast<uint16_t>(count < 32);
+}
+
+// See UpdateCdf5 for implementation details.
+void UpdateCdf16(uint16_t* const cdf, const int symbol) {
+  uint16x8_t cdf_vec = vld1q_u16(cdf);
+  const uint16_t count = cdf[16];
+  const int rate = (count >> 4) + 5;
+  const uint16x8_t cdf_max_probability = vdupq_n_u16(kCdfMaxProbability);
+  const uint16x8_t symbol_vec = vdupq_n_u16(symbol);
+  const int16x8_t negative_rate = vdupq_n_s16(-rate);
+
+  uint16x8_t index = vcombine_u16(vcreate_u16(0x0003000200010000),
+                                  vcreate_u16(0x0007000600050004));
+  uint16x8_t mask = vcgeq_u16(index, symbol_vec);
+  uint16x8_t a = vorrq_u16(mask, cdf_max_probability);
+  int16x8_t diff = vreinterpretq_s16_u16(vsubq_u16(a, cdf_vec));
+  uint16x8_t cdf_offset = vsubq_u16(cdf_vec, mask);
+  uint16x8_t delta = vreinterpretq_u16_s16(vshlq_s16(diff, negative_rate));
+  cdf_vec = vaddq_u16(cdf_offset, delta);
+  vst1q_u16(cdf, cdf_vec);
+
+  cdf_vec = vld1q_u16(cdf + 8);
+  index = vcombine_u16(vcreate_u16(0x000b000a00090008),
+                       vcreate_u16(0x000f000e000d000c));
+  mask = vcgeq_u16(index, symbol_vec);
+  a = vorrq_u16(mask, cdf_max_probability);
+  diff = vreinterpretq_s16_u16(vsubq_u16(a, cdf_vec));
+  cdf_offset = vsubq_u16(cdf_vec, mask);
+  delta = vreinterpretq_u16_s16(vshlq_s16(diff, negative_rate));
+  cdf_vec = vaddq_u16(cdf_offset, delta);
+  vst1q_u16(cdf + 8, cdf_vec);
+
+  cdf[16] = count + static_cast<uint16_t>(count < 32);
+}
+
+#else  // !LIBGAV1_ENTROPY_DECODER_ENABLE_NEON
+
+#if LIBGAV1_ENTROPY_DECODER_ENABLE_SSE2
+
+inline __m128i LoadLo8(const void* a) {
+  return _mm_loadl_epi64(static_cast<const __m128i*>(a));
+}
+
+inline __m128i LoadUnaligned16(const void* a) {
+  return _mm_loadu_si128(static_cast<const __m128i*>(a));
+}
+
+inline void StoreLo8(void* a, const __m128i v) {
+  _mm_storel_epi64(static_cast<__m128i*>(a), v);
+}
+
+inline void StoreUnaligned16(void* a, const __m128i v) {
+  _mm_storeu_si128(static_cast<__m128i*>(a), v);
+}
+
+void UpdateCdf5(uint16_t* const cdf, const int symbol) {
+  __m128i cdf_vec = LoadLo8(cdf);
+  const uint16_t count = cdf[5];
+  const int rate = (count >> 4) + 5;
+  const __m128i cdf_max_probability =
+      _mm_shufflelo_epi16(_mm_cvtsi32_si128(kCdfMaxProbability), 0);
+  const __m128i index = _mm_set_epi32(0x0, 0x0, 0x00040003, 0x00020001);
+  const __m128i symbol_vec = _mm_shufflelo_epi16(_mm_cvtsi32_si128(symbol), 0);
+  // i >= symbol.
+  const __m128i mask = _mm_cmpgt_epi16(index, symbol_vec);
+  // i < symbol: 32768, i >= symbol: 65535.
+  const __m128i a = _mm_or_si128(mask, cdf_max_probability);
+  // i < symbol: 32768 - cdf, i >= symbol: 65535 - cdf.
+  const __m128i diff = _mm_sub_epi16(a, cdf_vec);
+  // i < symbol: cdf - 0, i >= symbol: cdf - 65535.
+  const __m128i cdf_offset = _mm_sub_epi16(cdf_vec, mask);
+  // i < symbol: (32768 - cdf) >> rate, i >= symbol: (65535 (-1) - cdf) >> rate.
+  const __m128i delta = _mm_sra_epi16(diff, _mm_cvtsi32_si128(rate));
+  // i < symbol: (cdf - 0) + ((32768 - cdf) >> rate).
+  // i >= symbol: (cdf - 65535) + ((65535 - cdf) >> rate).
+  cdf_vec = _mm_add_epi16(cdf_offset, delta);
+  StoreLo8(cdf, cdf_vec);
+  cdf[5] = count + static_cast<uint16_t>(count < 32);
+}
+
+// This version works for |symbol_count| = 7, 8, or 9.
+// See UpdateCdf5 for implementation details.
+template <int symbol_count>
+void UpdateCdf7To9(uint16_t* const cdf, const int symbol) {
+  static_assert(symbol_count >= 7 && symbol_count <= 9, "");
+  __m128i cdf_vec = LoadUnaligned16(cdf);
+  const uint16_t count = cdf[symbol_count];
+  const int rate = (count >> 4) + 5;
+  const __m128i cdf_max_probability =
+      _mm_set1_epi16(static_cast<int16_t>(kCdfMaxProbability));
+  const __m128i index =
+      _mm_set_epi32(0x00080007, 0x00060005, 0x00040003, 0x00020001);
+  const __m128i symbol_vec = _mm_set1_epi16(static_cast<int16_t>(symbol));
+  const __m128i mask = _mm_cmpgt_epi16(index, symbol_vec);
+  const __m128i a = _mm_or_si128(mask, cdf_max_probability);
+  const __m128i diff = _mm_sub_epi16(a, cdf_vec);
+  const __m128i cdf_offset = _mm_sub_epi16(cdf_vec, mask);
+  const __m128i delta = _mm_sra_epi16(diff, _mm_cvtsi32_si128(rate));
+  cdf_vec = _mm_add_epi16(cdf_offset, delta);
+  StoreUnaligned16(cdf, cdf_vec);
+  cdf[symbol_count] = count + static_cast<uint16_t>(count < 32);
+}
+
+void UpdateCdf7(uint16_t* const cdf, const int symbol) {
+  UpdateCdf7To9<7>(cdf, symbol);
+}
+
+void UpdateCdf8(uint16_t* const cdf, const int symbol) {
+  UpdateCdf7To9<8>(cdf, symbol);
+}
+
+void UpdateCdf9(uint16_t* const cdf, const int symbol) {
+  UpdateCdf7To9<9>(cdf, symbol);
+}
+
+// See UpdateCdf5 for implementation details.
+void UpdateCdf11(uint16_t* const cdf, const int symbol) {
+  __m128i cdf_vec = LoadUnaligned16(cdf + 2);
+  const uint16_t count = cdf[11];
+  cdf[11] = count + static_cast<uint16_t>(count < 32);
+  const int rate = (count >> 4) + 5;
+  if (symbol > 1) {
+    cdf[0] += (kCdfMaxProbability - cdf[0]) >> rate;
+    cdf[1] += (kCdfMaxProbability - cdf[1]) >> rate;
+    const __m128i cdf_max_probability =
+        _mm_set1_epi16(static_cast<int16_t>(kCdfMaxProbability));
+    const __m128i index =
+        _mm_set_epi32(0x000a0009, 0x00080007, 0x00060005, 0x00040003);
+    const __m128i symbol_vec = _mm_set1_epi16(static_cast<int16_t>(symbol));
+    const __m128i mask = _mm_cmpgt_epi16(index, symbol_vec);
+    const __m128i a = _mm_or_si128(mask, cdf_max_probability);
+    const __m128i diff = _mm_sub_epi16(a, cdf_vec);
+    const __m128i cdf_offset = _mm_sub_epi16(cdf_vec, mask);
+    const __m128i delta = _mm_sra_epi16(diff, _mm_cvtsi32_si128(rate));
+    cdf_vec = _mm_add_epi16(cdf_offset, delta);
+    StoreUnaligned16(cdf + 2, cdf_vec);
+  } else {
+    if (symbol != 0) {
+      cdf[0] += (kCdfMaxProbability - cdf[0]) >> rate;
+      cdf[1] -= cdf[1] >> rate;
+    } else {
+      cdf[0] -= cdf[0] >> rate;
+      cdf[1] -= cdf[1] >> rate;
+    }
+    const __m128i delta = _mm_sra_epi16(cdf_vec, _mm_cvtsi32_si128(rate));
+    cdf_vec = _mm_sub_epi16(cdf_vec, delta);
+    StoreUnaligned16(cdf + 2, cdf_vec);
+  }
+}
+
+// See UpdateCdf5 for implementation details.
+void UpdateCdf13(uint16_t* const cdf, const int symbol) {
+  __m128i cdf_vec0 = LoadLo8(cdf);
+  __m128i cdf_vec1 = LoadUnaligned16(cdf + 4);
+  const uint16_t count = cdf[13];
+  const int rate = (count >> 4) + 5;
+  const __m128i cdf_max_probability =
+      _mm_set1_epi16(static_cast<int16_t>(kCdfMaxProbability));
+  const __m128i symbol_vec = _mm_set1_epi16(static_cast<int16_t>(symbol));
+
+  const __m128i index = _mm_set_epi32(0x0, 0x0, 0x00040003, 0x00020001);
+  const __m128i mask = _mm_cmpgt_epi16(index, symbol_vec);
+  const __m128i a = _mm_or_si128(mask, cdf_max_probability);
+  const __m128i diff = _mm_sub_epi16(a, cdf_vec0);
+  const __m128i cdf_offset = _mm_sub_epi16(cdf_vec0, mask);
+  const __m128i delta = _mm_sra_epi16(diff, _mm_cvtsi32_si128(rate));
+  cdf_vec0 = _mm_add_epi16(cdf_offset, delta);
+  StoreLo8(cdf, cdf_vec0);
+
+  const __m128i index1 =
+      _mm_set_epi32(0x000c000b, 0x000a0009, 0x00080007, 0x00060005);
+  const __m128i mask1 = _mm_cmpgt_epi16(index1, symbol_vec);
+  const __m128i a1 = _mm_or_si128(mask1, cdf_max_probability);
+  const __m128i diff1 = _mm_sub_epi16(a1, cdf_vec1);
+  const __m128i cdf_offset1 = _mm_sub_epi16(cdf_vec1, mask1);
+  const __m128i delta1 = _mm_sra_epi16(diff1, _mm_cvtsi32_si128(rate));
+  cdf_vec1 = _mm_add_epi16(cdf_offset1, delta1);
+  StoreUnaligned16(cdf + 4, cdf_vec1);
+
+  cdf[13] = count + static_cast<uint16_t>(count < 32);
+}
+
+void UpdateCdf16(uint16_t* const cdf, const int symbol) {
+  __m128i cdf_vec0 = LoadUnaligned16(cdf);
+  const uint16_t count = cdf[16];
+  const int rate = (count >> 4) + 5;
+  const __m128i cdf_max_probability =
+      _mm_set1_epi16(static_cast<int16_t>(kCdfMaxProbability));
+  const __m128i symbol_vec = _mm_set1_epi16(static_cast<int16_t>(symbol));
+
+  const __m128i index =
+      _mm_set_epi32(0x00080007, 0x00060005, 0x00040003, 0x00020001);
+  const __m128i mask = _mm_cmpgt_epi16(index, symbol_vec);
+  const __m128i a = _mm_or_si128(mask, cdf_max_probability);
+  const __m128i diff = _mm_sub_epi16(a, cdf_vec0);
+  const __m128i cdf_offset = _mm_sub_epi16(cdf_vec0, mask);
+  const __m128i delta = _mm_sra_epi16(diff, _mm_cvtsi32_si128(rate));
+  cdf_vec0 = _mm_add_epi16(cdf_offset, delta);
+  StoreUnaligned16(cdf, cdf_vec0);
+
+  __m128i cdf_vec1 = LoadUnaligned16(cdf + 8);
+  const __m128i index1 =
+      _mm_set_epi32(0x0010000f, 0x000e000d, 0x000c000b, 0x000a0009);
+  const __m128i mask1 = _mm_cmpgt_epi16(index1, symbol_vec);
+  const __m128i a1 = _mm_or_si128(mask1, cdf_max_probability);
+  const __m128i diff1 = _mm_sub_epi16(a1, cdf_vec1);
+  const __m128i cdf_offset1 = _mm_sub_epi16(cdf_vec1, mask1);
+  const __m128i delta1 = _mm_sra_epi16(diff1, _mm_cvtsi32_si128(rate));
+  cdf_vec1 = _mm_add_epi16(cdf_offset1, delta1);
+  StoreUnaligned16(cdf + 8, cdf_vec1);
+
+  cdf[16] = count + static_cast<uint16_t>(count < 32);
+}
+
+#else  // !LIBGAV1_ENTROPY_DECODER_ENABLE_SSE2
+
+void UpdateCdf5(uint16_t* const cdf, const int symbol) {
+  UpdateCdf(cdf, 5, symbol);
+}
+
+void UpdateCdf7(uint16_t* const cdf, const int symbol) {
+  UpdateCdf(cdf, 7, symbol);
+}
+
+void UpdateCdf8(uint16_t* const cdf, const int symbol) {
+  UpdateCdf(cdf, 8, symbol);
+}
+
+void UpdateCdf9(uint16_t* const cdf, const int symbol) {
+  UpdateCdf(cdf, 9, symbol);
+}
+
+void UpdateCdf11(uint16_t* const cdf, const int symbol) {
+  UpdateCdf(cdf, 11, symbol);
+}
+
+void UpdateCdf13(uint16_t* const cdf, const int symbol) {
+  UpdateCdf(cdf, 13, symbol);
+}
+
+void UpdateCdf16(uint16_t* const cdf, const int symbol) {
+  UpdateCdf(cdf, 16, symbol);
+}
+
+#endif  // LIBGAV1_ENTROPY_DECODER_ENABLE_SSE2
+#endif  // LIBGAV1_ENTROPY_DECODER_ENABLE_NEON
+
+inline DaalaBitReader::WindowSize HostToBigEndian(
+    const DaalaBitReader::WindowSize x) {
+  static_assert(sizeof(x) == 4 || sizeof(x) == 8, "");
+#if defined(__GNUC__)
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+  return (sizeof(x) == 8) ? __builtin_bswap64(x) : __builtin_bswap32(x);
+#else
+  return x;
+#endif
+#elif defined(_WIN32)
+  // Note Windows targets are assumed to be little endian.
+  return static_cast<DaalaBitReader::WindowSize>(
+      (sizeof(x) == 8) ? _byteswap_uint64(static_cast<unsigned __int64>(x))
+                       : _byteswap_ulong(static_cast<unsigned long>(x)));
+#else
+#error Unknown compiler!
+#endif  // defined(__GNUC__)
+}
+
+}  // namespace
+
+#if !LIBGAV1_CXX17
+constexpr int DaalaBitReader::kWindowSize;  // static.
+#endif
+
+DaalaBitReader::DaalaBitReader(const uint8_t* data, size_t size,
+                               bool allow_update_cdf)
+    : data_(data),
+      data_end_(data + size),
+      data_memcpy_end_((size >= sizeof(WindowSize))
+                           ? data + size - sizeof(WindowSize) + 1
+                           : data),
+      allow_update_cdf_(allow_update_cdf),
+      values_in_range_(kCdfMaxProbability) {
+  if (data_ < data_memcpy_end_) {
+    // This is a simplified version of PopulateBits() which loads 8 extra bits
+    // and skips the unnecessary shifts of value and window_diff_.
+    WindowSize value;
+    memcpy(&value, data_, sizeof(value));
+    data_ += sizeof(value);
+    window_diff_ = HostToBigEndian(value) ^ -1;
+    // Note the initial value of bits_ is larger than kMaxCachedBits as it's
+    // used to restore the most significant 0 bit that would be present after
+    // PopulateBits() when we extract the first symbol value.
+    // As shown in Section 8.2.2 Initialization process for symbol decoder,
+    // which uses a fixed offset to read the symbol values, the most
+    // significant bit is always 0:
+    //   The variable numBits is set equal to Min( sz * 8, 15).
+    //   The variable buf is read using the f(numBits) parsing process.
+    //   The variable paddedBuf is set equal to ( buf << (15 - numBits) ).
+    //   The variable SymbolValue is set to ((1 << 15) - 1) ^ paddedBuf.
+    bits_ = kWindowSize - 15;
+    return;
+  }
+  window_diff_ = 0;
+  bits_ = -15;
+  PopulateBits();
+}
+
+// This is similar to the ReadSymbol() implementation but it is optimized based
+// on the following facts:
+//   * The probability is fixed at half. So some multiplications can be replaced
+//     with bit operations.
+//   * Symbol count is fixed at 2.
+int DaalaBitReader::ReadBit() {
+  const uint32_t curr =
+      ((values_in_range_ & kReadBitMask) >> 1) + kMinimumProbabilityPerSymbol;
+  const auto symbol_value = static_cast<uint16_t>(window_diff_ >> bits_);
+  int bit = 1;
+  if (symbol_value >= curr) {
+    values_in_range_ -= curr;
+    window_diff_ -= static_cast<WindowSize>(curr) << bits_;
+    bit = 0;
+  } else {
+    values_in_range_ = curr;
+  }
+  NormalizeRange();
+  return bit;
+}
+
+int64_t DaalaBitReader::ReadLiteral(int num_bits) {
+  assert(num_bits <= 32);
+  assert(num_bits > 0);
+  uint32_t literal = 0;
+  int bit = num_bits - 1;
+  do {
+    // ARM can combine a shift operation with a constant number of bits with
+    // some other operations, such as the OR operation.
+    // Here is an ARM disassembly example:
+    // orr w1, w0, w1, lsl #1
+    // which left shifts register w1 by 1 bit and OR the shift result with
+    // register w0.
+    // The next 2 lines are equivalent to:
+    // literal |= static_cast<uint32_t>(ReadBit()) << bit;
+    literal <<= 1;
+    literal |= static_cast<uint32_t>(ReadBit());
+  } while (--bit >= 0);
+  return literal;
+}
+
+int DaalaBitReader::ReadSymbol(uint16_t* const cdf, int symbol_count) {
+  const int symbol = ReadSymbolImpl(cdf, symbol_count);
+  if (allow_update_cdf_) {
+    UpdateCdf(cdf, symbol_count, symbol);
+  }
+  return symbol;
+}
+
+bool DaalaBitReader::ReadSymbol(uint16_t* cdf) {
+  assert(cdf[1] == 0);
+  const bool symbol = ReadSymbolImpl(cdf[0]) != 0;
+  if (allow_update_cdf_) {
+    const uint16_t count = cdf[2];
+    // rate is computed in the spec as:
+    //  3 + ( cdf[N] > 15 ) + ( cdf[N] > 31 ) + Min(FloorLog2(N), 2)
+    // In this case N is 2 and cdf[N] is |count|. So the equation becomes:
+    //  4 + (count > 15) + (count > 31)
+    // Note that the largest value for count is 32 (it is not incremented beyond
+    // 32). So using that information:
+    //  count >> 4 is 0 for count from 0 to 15.
+    //  count >> 4 is 1 for count from 16 to 31.
+    //  count >> 4 is 2 for count == 32.
+    // Now, the equation becomes:
+    //  4 + (count >> 4).
+    // Since (count >> 4) can only be 0 or 1 or 2, the addition can be replaced
+    // with bitwise or. So the final equation is:
+    //  4 | (count >> 4).
+    const int rate = 4 | (count >> 4);
+    if (symbol) {
+      cdf[0] += (kCdfMaxProbability - cdf[0]) >> rate;
+    } else {
+      cdf[0] -= cdf[0] >> rate;
+    }
+    cdf[2] += static_cast<uint16_t>(count < 32);
+  }
+  return symbol;
+}
+
+bool DaalaBitReader::ReadSymbolWithoutCdfUpdate(uint16_t cdf) {
+  return ReadSymbolImpl(cdf) != 0;
+}
+
+template <int symbol_count>
+int DaalaBitReader::ReadSymbol(uint16_t* const cdf) {
+  static_assert(symbol_count >= 3 && symbol_count <= 16, "");
+  if (symbol_count == 3 || symbol_count == 4) {
+    return ReadSymbol3Or4(cdf, symbol_count);
+  }
+  int symbol;
+  if (symbol_count == 8) {
+    symbol = ReadSymbolImpl8(cdf);
+  } else if (symbol_count <= 13) {
+    symbol = ReadSymbolImpl(cdf, symbol_count);
+  } else {
+    symbol = ReadSymbolImplBinarySearch(cdf, symbol_count);
+  }
+  if (allow_update_cdf_) {
+    if (symbol_count == 5) {
+      UpdateCdf5(cdf, symbol);
+    } else if (symbol_count == 7) {
+      UpdateCdf7(cdf, symbol);
+    } else if (symbol_count == 8) {
+      UpdateCdf8(cdf, symbol);
+    } else if (symbol_count == 9) {
+      UpdateCdf9(cdf, symbol);
+    } else if (symbol_count == 11) {
+      UpdateCdf11(cdf, symbol);
+    } else if (symbol_count == 13) {
+      UpdateCdf13(cdf, symbol);
+    } else if (symbol_count == 16) {
+      UpdateCdf16(cdf, symbol);
+    } else {
+      UpdateCdf(cdf, symbol_count, symbol);
+    }
+  }
+  return symbol;
+}
+
+int DaalaBitReader::ReadSymbolImpl(const uint16_t* const cdf,
+                                   int symbol_count) {
+  assert(cdf[symbol_count - 1] == 0);
+  --symbol_count;
+  uint32_t curr = values_in_range_;
+  int symbol = -1;
+  uint32_t prev;
+  const auto symbol_value = static_cast<uint16_t>(window_diff_ >> bits_);
+  uint32_t delta = kMinimumProbabilityPerSymbol * symbol_count;
+  // Search through the |cdf| array to determine where the scaled cdf value and
+  // |symbol_value| cross over.
+  do {
+    prev = curr;
+    curr = (((values_in_range_ >> 8) * (cdf[++symbol] >> kCdfPrecision)) >> 1) +
+           delta;
+    delta -= kMinimumProbabilityPerSymbol;
+  } while (symbol_value < curr);
+  values_in_range_ = prev - curr;
+  window_diff_ -= static_cast<WindowSize>(curr) << bits_;
+  NormalizeRange();
+  return symbol;
+}
+
+int DaalaBitReader::ReadSymbolImplBinarySearch(const uint16_t* const cdf,
+                                               int symbol_count) {
+  assert(cdf[symbol_count - 1] == 0);
+  assert(symbol_count > 1 && symbol_count <= 16);
+  --symbol_count;
+  const auto symbol_value = static_cast<uint16_t>(window_diff_ >> bits_);
+  // Search through the |cdf| array to determine where the scaled cdf value and
+  // |symbol_value| cross over. Since the CDFs are sorted, we can use binary
+  // search to do this. Let |symbol| be the index of the first |cdf| array
+  // entry whose scaled cdf value is less than or equal to |symbol_value|. The
+  // binary search maintains the invariant:
+  //   low <= symbol <= high + 1
+  // and terminates when low == high + 1.
+  int low = 0;
+  int high = symbol_count - 1;
+  // The binary search maintains the invariants that |prev| is the scaled cdf
+  // value for low - 1 and |curr| is the scaled cdf value for high + 1. (By
+  // convention, the scaled cdf value for -1 is values_in_range_.) When the
+  // binary search terminates, |prev| is the scaled cdf value for symbol - 1
+  // and |curr| is the scaled cdf value for |symbol|.
+  uint32_t prev = values_in_range_;
+  uint32_t curr = 0;
+  const uint32_t values_in_range_shifted = values_in_range_ >> 8;
+  do {
+    const int mid = DivideBy2(low + high);
+    const uint32_t scaled_cdf =
+        ScaleCdf(values_in_range_shifted, cdf, mid, symbol_count);
+    if (symbol_value < scaled_cdf) {
+      low = mid + 1;
+      prev = scaled_cdf;
+    } else {
+      high = mid - 1;
+      curr = scaled_cdf;
+    }
+  } while (low <= high);
+  assert(low == high + 1);
+  // At this point, |low| is the symbol that has been decoded.
+  values_in_range_ = prev - curr;
+  window_diff_ -= static_cast<WindowSize>(curr) << bits_;
+  NormalizeRange();
+  return low;
+}
+
+int DaalaBitReader::ReadSymbolImpl(uint16_t cdf) {
+  const auto symbol_value = static_cast<uint16_t>(window_diff_ >> bits_);
+  const uint32_t curr =
+      (((values_in_range_ >> 8) * (cdf >> kCdfPrecision)) >> 1) +
+      kMinimumProbabilityPerSymbol;
+  const int symbol = static_cast<int>(symbol_value < curr);
+  if (symbol == 1) {
+    values_in_range_ = curr;
+  } else {
+    values_in_range_ -= curr;
+    window_diff_ -= static_cast<WindowSize>(curr) << bits_;
+  }
+  NormalizeRange();
+  return symbol;
+}
+
+// Equivalent to ReadSymbol(cdf, [3,4]), with the ReadSymbolImpl and UpdateCdf
+// calls inlined.
+int DaalaBitReader::ReadSymbol3Or4(uint16_t* const cdf,
+                                   const int symbol_count) {
+  assert(cdf[symbol_count - 1] == 0);
+  uint32_t curr = values_in_range_;
+  uint32_t prev;
+  const auto symbol_value = static_cast<uint16_t>(window_diff_ >> bits_);
+  uint32_t delta = kMinimumProbabilityPerSymbol * (symbol_count - 1);
+  const uint32_t values_in_range_shifted = values_in_range_ >> 8;
+
+  // Search through the |cdf| array to determine where the scaled cdf value and
+  // |symbol_value| cross over. If allow_update_cdf_ is true, update the |cdf|
+  // array.
+  //
+  // The original code is:
+  //
+  //  int symbol = -1;
+  //  do {
+  //    prev = curr;
+  //    curr =
+  //        ((values_in_range_shifted * (cdf[++symbol] >> kCdfPrecision)) >> 1)
+  //        + delta;
+  //    delta -= kMinimumProbabilityPerSymbol;
+  //  } while (symbol_value < curr);
+  //  if (allow_update_cdf_) {
+  //    UpdateCdf(cdf, [3,4], symbol);
+  //  }
+  //
+  // The do-while loop is unrolled with three or four iterations, and the
+  // UpdateCdf call is inlined and merged into the iterations.
+  int symbol = 0;
+  // Iteration 0.
+  prev = curr;
+  curr =
+      ((values_in_range_shifted * (cdf[symbol] >> kCdfPrecision)) >> 1) + delta;
+  if (symbol_value >= curr) {
+    // symbol == 0.
+    if (allow_update_cdf_) {
+      // Inlined version of UpdateCdf(cdf, [3,4], /*symbol=*/0).
+      const uint16_t count = cdf[symbol_count];
+      cdf[symbol_count] += static_cast<uint16_t>(count < 32);
+      const int rate = (count >> 4) + 4 + static_cast<int>(symbol_count == 4);
+      if (symbol_count == 4) {
+#if LIBGAV1_ENTROPY_DECODER_ENABLE_NEON
+        // 1. On Motorola Moto G5 Plus (running 32-bit Android 8.1.0), the ARM
+        // NEON code is slower. Consider using the C version if __arm__ is
+        // defined.
+        // 2. The ARM NEON code (compiled for arm64) is slightly slower on
+        // Samsung Galaxy S8+ (SM-G955FD).
+        uint16x4_t cdf_vec = vld1_u16(cdf);
+        const int16x4_t negative_rate = vdup_n_s16(-rate);
+        const uint16x4_t delta = vshl_u16(cdf_vec, negative_rate);
+        cdf_vec = vsub_u16(cdf_vec, delta);
+        vst1_u16(cdf, cdf_vec);
+#elif LIBGAV1_ENTROPY_DECODER_ENABLE_SSE2
+        __m128i cdf_vec = LoadLo8(cdf);
+        const __m128i delta = _mm_sra_epi16(cdf_vec, _mm_cvtsi32_si128(rate));
+        cdf_vec = _mm_sub_epi16(cdf_vec, delta);
+        StoreLo8(cdf, cdf_vec);
+#else  // !LIBGAV1_ENTROPY_DECODER_ENABLE_SSE2
+        cdf[0] -= cdf[0] >> rate;
+        cdf[1] -= cdf[1] >> rate;
+        cdf[2] -= cdf[2] >> rate;
+#endif
+      } else {  // symbol_count == 3.
+        cdf[0] -= cdf[0] >> rate;
+        cdf[1] -= cdf[1] >> rate;
+      }
+    }
+    goto found;
+  }
+  ++symbol;
+  delta -= kMinimumProbabilityPerSymbol;
+  // Iteration 1.
+  prev = curr;
+  curr =
+      ((values_in_range_shifted * (cdf[symbol] >> kCdfPrecision)) >> 1) + delta;
+  if (symbol_value >= curr) {
+    // symbol == 1.
+    if (allow_update_cdf_) {
+      // Inlined version of UpdateCdf(cdf, [3,4], /*symbol=*/1).
+      const uint16_t count = cdf[symbol_count];
+      cdf[symbol_count] += static_cast<uint16_t>(count < 32);
+      const int rate = (count >> 4) + 4 + static_cast<int>(symbol_count == 4);
+      cdf[0] += (kCdfMaxProbability - cdf[0]) >> rate;
+      cdf[1] -= cdf[1] >> rate;
+      if (symbol_count == 4) cdf[2] -= cdf[2] >> rate;
+    }
+    goto found;
+  }
+  ++symbol;
+  if (symbol_count == 4) {
+    delta -= kMinimumProbabilityPerSymbol;
+    // Iteration 2.
+    prev = curr;
+    curr = ((values_in_range_shifted * (cdf[symbol] >> kCdfPrecision)) >> 1) +
+           delta;
+    if (symbol_value >= curr) {
+      // symbol == 2.
+      if (allow_update_cdf_) {
+        // Inlined version of UpdateCdf(cdf, 4, /*symbol=*/2).
+        const uint16_t count = cdf[4];
+        cdf[4] += static_cast<uint16_t>(count < 32);
+        const int rate = (count >> 4) + 5;
+        cdf[0] += (kCdfMaxProbability - cdf[0]) >> rate;
+        cdf[1] += (kCdfMaxProbability - cdf[1]) >> rate;
+        cdf[2] -= cdf[2] >> rate;
+      }
+      goto found;
+    }
+    ++symbol;
+  }
+  // |delta| is 0 for the last iteration.
+  // Iteration 2 (symbol_count == 3) or 3 (symbol_count == 4).
+  prev = curr;
+  // Since cdf[symbol_count - 1] is 0 and |delta| is 0, |curr| is also 0.
+  curr = 0;
+  // symbol == [2,3].
+  if (allow_update_cdf_) {
+    // Inlined version of UpdateCdf(cdf, [3,4], /*symbol=*/[2,3]).
+    const uint16_t count = cdf[symbol_count];
+    cdf[symbol_count] += static_cast<uint16_t>(count < 32);
+    const int rate = (4 | (count >> 4)) + static_cast<int>(symbol_count == 4);
+    if (symbol_count == 4) {
+#if LIBGAV1_ENTROPY_DECODER_ENABLE_NEON
+      // On Motorola Moto G5 Plus (running 32-bit Android 8.1.0), the ARM NEON
+      // code is a tiny bit slower. Consider using the C version if __arm__ is
+      // defined.
+      uint16x4_t cdf_vec = vld1_u16(cdf);
+      const uint16x4_t cdf_max_probability = vdup_n_u16(kCdfMaxProbability);
+      const int16x4_t diff =
+          vreinterpret_s16_u16(vsub_u16(cdf_max_probability, cdf_vec));
+      const int16x4_t negative_rate = vdup_n_s16(-rate);
+      const uint16x4_t delta =
+          vreinterpret_u16_s16(vshl_s16(diff, negative_rate));
+      cdf_vec = vadd_u16(cdf_vec, delta);
+      vst1_u16(cdf, cdf_vec);
+      cdf[3] = 0;
+#elif LIBGAV1_ENTROPY_DECODER_ENABLE_SSE2
+      __m128i cdf_vec = LoadLo8(cdf);
+      const __m128i cdf_max_probability =
+          _mm_shufflelo_epi16(_mm_cvtsi32_si128(kCdfMaxProbability), 0);
+      const __m128i diff = _mm_sub_epi16(cdf_max_probability, cdf_vec);
+      const __m128i delta = _mm_sra_epi16(diff, _mm_cvtsi32_si128(rate));
+      cdf_vec = _mm_add_epi16(cdf_vec, delta);
+      StoreLo8(cdf, cdf_vec);
+      cdf[3] = 0;
+#else  // !LIBGAV1_ENTROPY_DECODER_ENABLE_SSE2
+      cdf[0] += (kCdfMaxProbability - cdf[0]) >> rate;
+      cdf[1] += (kCdfMaxProbability - cdf[1]) >> rate;
+      cdf[2] += (kCdfMaxProbability - cdf[2]) >> rate;
+#endif
+    } else {  // symbol_count == 3.
+      cdf[0] += (kCdfMaxProbability - cdf[0]) >> rate;
+      cdf[1] += (kCdfMaxProbability - cdf[1]) >> rate;
+    }
+  }
+found:
+  // End of unrolled do-while loop.
+
+  values_in_range_ = prev - curr;
+  window_diff_ -= static_cast<WindowSize>(curr) << bits_;
+  NormalizeRange();
+  return symbol;
+}
+
+int DaalaBitReader::ReadSymbolImpl8(const uint16_t* const cdf) {
+  assert(cdf[7] == 0);
+  uint32_t curr = values_in_range_;
+  uint32_t prev;
+  const auto symbol_value = static_cast<uint16_t>(window_diff_ >> bits_);
+  uint32_t delta = kMinimumProbabilityPerSymbol * 7;
+  // Search through the |cdf| array to determine where the scaled cdf value and
+  // |symbol_value| cross over.
+  //
+  // The original code is:
+  //
+  // int symbol = -1;
+  // do {
+  //   prev = curr;
+  //   curr =
+  //       (((values_in_range_ >> 8) * (cdf[++symbol] >> kCdfPrecision)) >> 1)
+  //       + delta;
+  //   delta -= kMinimumProbabilityPerSymbol;
+  // } while (symbol_value < curr);
+  //
+  // The do-while loop is unrolled with eight iterations.
+  int symbol = 0;
+
+#define READ_SYMBOL_ITERATION                                                \
+  prev = curr;                                                               \
+  curr = (((values_in_range_ >> 8) * (cdf[symbol] >> kCdfPrecision)) >> 1) + \
+         delta;                                                              \
+  if (symbol_value >= curr) goto found;                                      \
+  ++symbol;                                                                  \
+  delta -= kMinimumProbabilityPerSymbol
+
+  READ_SYMBOL_ITERATION;  // Iteration 0.
+  READ_SYMBOL_ITERATION;  // Iteration 1.
+  READ_SYMBOL_ITERATION;  // Iteration 2.
+  READ_SYMBOL_ITERATION;  // Iteration 3.
+  READ_SYMBOL_ITERATION;  // Iteration 4.
+  READ_SYMBOL_ITERATION;  // Iteration 5.
+
+  // The last two iterations can be simplified, so they don't use the
+  // READ_SYMBOL_ITERATION macro.
+#undef READ_SYMBOL_ITERATION
+
+  // Iteration 6.
+  prev = curr;
+  curr =
+      (((values_in_range_ >> 8) * (cdf[symbol] >> kCdfPrecision)) >> 1) + delta;
+  if (symbol_value >= curr) goto found;  // symbol == 6.
+  ++symbol;
+  // |delta| is 0 for the last iteration.
+  // Iteration 7.
+  prev = curr;
+  // Since cdf[7] is 0 and |delta| is 0, |curr| is also 0.
+  curr = 0;
+  // symbol == 7.
+found:
+  // End of unrolled do-while loop.
+
+  values_in_range_ = prev - curr;
+  window_diff_ -= static_cast<WindowSize>(curr) << bits_;
+  NormalizeRange();
+  return symbol;
+}
+
+void DaalaBitReader::PopulateBits() {
+  constexpr int kMaxCachedBits = kWindowSize - 16;
+#if defined(__aarch64__)
+  // Fast path: read eight bytes and add the first six bytes to window_diff_.
+  // This fast path makes the following assumptions.
+  // 1. We assume that unaligned load of uint64_t is fast.
+  // 2. When there are enough bytes in data_, the for loop below reads 6 or 7
+  //    bytes depending on the value of bits_. This fast path always reads 6
+  //    bytes, which results in more calls to PopulateBits(). We assume that
+  //    making more calls to a faster PopulateBits() is overall a win.
+  // NOTE: Although this fast path could also be used on x86_64, it hurts
+  // performance (measured on Lenovo ThinkStation P920 running Linux). (The
+  // reason is still unknown.) Therefore this fast path is only used on arm64.
+  static_assert(kWindowSize == 64, "");
+  if (data_ < data_memcpy_end_) {
+    uint64_t value;
+    // arm64 supports unaligned loads, so this memcpy call is compiled to a
+    // single ldr instruction.
+    memcpy(&value, data_, sizeof(value));
+    data_ += kMaxCachedBits >> 3;
+    value = HostToBigEndian(value) ^ -1;
+    value >>= kWindowSize - kMaxCachedBits;
+    window_diff_ = value | (window_diff_ << kMaxCachedBits);
+    bits_ += kMaxCachedBits;
+    return;
+  }
+#endif
+
+  const uint8_t* data = data_;
+  int bits = bits_;
+  WindowSize window_diff = window_diff_;
+
+  int count = kWindowSize - 9 - (bits + 15);
+  // The fast path above, if compiled, would cause clang 8.0.7 to vectorize
+  // this loop. Since -15 <= bits_ <= -1, this loop has at most 6 or 7
+  // iterations when WindowSize is 64 bits. So it is not profitable to
+  // vectorize this loop. Note that clang 8.0.7 does not vectorize this loop if
+  // the fast path above is not compiled.
+
+#ifdef __clang__
+#pragma clang loop vectorize(disable) interleave(disable)
+#endif
+  for (; count >= 0 && data < data_end_; count -= 8) {
+    const uint8_t value = *data++ ^ -1;
+    window_diff = static_cast<WindowSize>(value) | (window_diff << 8);
+    bits += 8;
+  }
+  assert(bits <= kMaxCachedBits);
+  if (data == data_end_) {
+    // Shift in some 1s. This is equivalent to providing fake 0 data bits.
+    window_diff = ((window_diff + 1) << (kMaxCachedBits - bits)) - 1;
+    bits = kMaxCachedBits;
+  }
+
+  data_ = data;
+  bits_ = bits;
+  window_diff_ = window_diff;
+}
+
+void DaalaBitReader::NormalizeRange() {
+  const int bits_used = 15 ^ FloorLog2(values_in_range_);
+  bits_ -= bits_used;
+  values_in_range_ <<= bits_used;
+  if (bits_ < 0) PopulateBits();
+}
+
+// Explicit instantiations.
+template int DaalaBitReader::ReadSymbol<3>(uint16_t* cdf);
+template int DaalaBitReader::ReadSymbol<4>(uint16_t* cdf);
+template int DaalaBitReader::ReadSymbol<5>(uint16_t* cdf);
+template int DaalaBitReader::ReadSymbol<6>(uint16_t* cdf);
+template int DaalaBitReader::ReadSymbol<7>(uint16_t* cdf);
+template int DaalaBitReader::ReadSymbol<8>(uint16_t* cdf);
+template int DaalaBitReader::ReadSymbol<9>(uint16_t* cdf);
+template int DaalaBitReader::ReadSymbol<10>(uint16_t* cdf);
+template int DaalaBitReader::ReadSymbol<11>(uint16_t* cdf);
+template int DaalaBitReader::ReadSymbol<12>(uint16_t* cdf);
+template int DaalaBitReader::ReadSymbol<13>(uint16_t* cdf);
+template int DaalaBitReader::ReadSymbol<14>(uint16_t* cdf);
+template int DaalaBitReader::ReadSymbol<16>(uint16_t* cdf);
+
+}  // namespace libgav1
diff --git a/src/utils/entropy_decoder.h b/src/utils/entropy_decoder.h
new file mode 100644
index 0000000..c066b98
--- /dev/null
+++ b/src/utils/entropy_decoder.h
@@ -0,0 +1,123 @@
+/*
+ * Copyright 2019 The libgav1 Authors
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LIBGAV1_SRC_UTILS_ENTROPY_DECODER_H_
+#define LIBGAV1_SRC_UTILS_ENTROPY_DECODER_H_
+
+#include <cstddef>
+#include <cstdint>
+
+#include "src/utils/bit_reader.h"
+#include "src/utils/compiler_attributes.h"
+
+namespace libgav1 {
+
+class DaalaBitReader : public BitReader {
+ public:
+  // WindowSize must be an unsigned integer type with at least 32 bits. Use the
+  // largest type with fast arithmetic. size_t should meet these requirements.
+  using WindowSize = size_t;
+
+  DaalaBitReader(const uint8_t* data, size_t size, bool allow_update_cdf);
+  ~DaalaBitReader() override = default;
+
+  // Move only.
+  DaalaBitReader(DaalaBitReader&& rhs) noexcept;
+  DaalaBitReader& operator=(DaalaBitReader&& rhs) noexcept;
+
+  int ReadBit() final;
+  int64_t ReadLiteral(int num_bits) override;
+  // ReadSymbol() calls for which the |symbol_count| is only known at runtime
+  // will use this variant.
+  int ReadSymbol(uint16_t* cdf, int symbol_count);
+  // ReadSymbol() calls for which the |symbol_count| is equal to 2 (boolean
+  // symbols) will use this variant.
+  bool ReadSymbol(uint16_t* cdf);
+  bool ReadSymbolWithoutCdfUpdate(uint16_t cdf);
+  // Use either linear search or binary search for decoding the symbol depending
+  // on |symbol_count|. ReadSymbol calls for which the |symbol_count| is known
+  // at compile time will use this variant.
+  template <int symbol_count>
+  int ReadSymbol(uint16_t* cdf);
+
+ private:
+  static constexpr int kWindowSize = static_cast<int>(sizeof(WindowSize)) * 8;
+  static_assert(kWindowSize >= 32, "");
+
+  // Reads a symbol using the |cdf| table which contains the probabilities of
+  // each symbol. On a high level, this function does the following:
+  //   1) Scale the |cdf| values.
+  //   2) Find the index in the |cdf| array where the scaled CDF value crosses
+  //   the modified |window_diff_| threshold.
+  //   3) That index is the symbol that has been decoded.
+  //   4) Update |window_diff_| and |values_in_range_| based on the symbol that
+  //   has been decoded.
+  inline int ReadSymbolImpl(const uint16_t* cdf, int symbol_count);
+  // Similar to ReadSymbolImpl but it uses binary search to perform step 2 in
+  // the comment above. As of now, this function is called when |symbol_count|
+  // is greater than or equal to 14.
+  inline int ReadSymbolImplBinarySearch(const uint16_t* cdf, int symbol_count);
+  // Specialized implementation of ReadSymbolImpl based on the fact that
+  // symbol_count == 2.
+  inline int ReadSymbolImpl(uint16_t cdf);
+  // ReadSymbolN is a specialization of ReadSymbol for symbol_count == N.
+  LIBGAV1_ALWAYS_INLINE int ReadSymbol3Or4(uint16_t* cdf, int symbol_count);
+  // ReadSymbolImplN is a specialization of ReadSymbolImpl for
+  // symbol_count == N.
+  LIBGAV1_ALWAYS_INLINE int ReadSymbolImpl8(const uint16_t* cdf);
+  inline void PopulateBits();
+  // Normalizes the range so that 32768 <= |values_in_range_| < 65536. Also
+  // calls PopulateBits() if necessary.
+  inline void NormalizeRange();
+
+  const uint8_t* data_;
+  const uint8_t* const data_end_;
+  // If |data_| < |data_memcpy_end_|, then we can read sizeof(WindowSize) bytes
+  // from |data_|. Note with sizeof(WindowSize) == 4 this is only used in the
+  // constructor, not PopulateBits().
+  const uint8_t* const data_memcpy_end_;
+  const bool allow_update_cdf_;
+  // Number of cached bits of data in the current value.
+  int bits_;
+  // Number of values in the current range. Declared as uint32_t for better
+  // performance but only the lower 16 bits are used.
+  uint32_t values_in_range_;
+  // The difference between the high end of the current range and the coded
+  // value minus 1. The 16 bits above |bits_| of this variable are used to
+  // decode the next symbol. It is filled in whenever |bits_| is less than 0.
+  // Note this implementation differs from the spec as it trades the need to
+  // shift in 1s in NormalizeRange() with an extra shift in PopulateBits(),
+  // which occurs less frequently.
+  WindowSize window_diff_;
+};
+
+extern template int DaalaBitReader::ReadSymbol<3>(uint16_t* cdf);
+extern template int DaalaBitReader::ReadSymbol<4>(uint16_t* cdf);
+extern template int DaalaBitReader::ReadSymbol<5>(uint16_t* cdf);
+extern template int DaalaBitReader::ReadSymbol<6>(uint16_t* cdf);
+extern template int DaalaBitReader::ReadSymbol<7>(uint16_t* cdf);
+extern template int DaalaBitReader::ReadSymbol<8>(uint16_t* cdf);
+extern template int DaalaBitReader::ReadSymbol<9>(uint16_t* cdf);
+extern template int DaalaBitReader::ReadSymbol<10>(uint16_t* cdf);
+extern template int DaalaBitReader::ReadSymbol<11>(uint16_t* cdf);
+extern template int DaalaBitReader::ReadSymbol<12>(uint16_t* cdf);
+extern template int DaalaBitReader::ReadSymbol<13>(uint16_t* cdf);
+extern template int DaalaBitReader::ReadSymbol<14>(uint16_t* cdf);
+extern template int DaalaBitReader::ReadSymbol<16>(uint16_t* cdf);
+
+}  // namespace libgav1
+
+#endif  // LIBGAV1_SRC_UTILS_ENTROPY_DECODER_H_
diff --git a/src/utils/executor.cc b/src/utils/executor.cc
new file mode 100644
index 0000000..6934057
--- /dev/null
+++ b/src/utils/executor.cc
@@ -0,0 +1,21 @@
+// Copyright 2019 The libgav1 Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "src/utils/executor.h"
+
+namespace libgav1 {
+
+Executor::~Executor() = default;
+
+}  // namespace libgav1
diff --git a/src/utils/executor.h b/src/utils/executor.h
new file mode 100644
index 0000000..21abdf8
--- /dev/null
+++ b/src/utils/executor.h
@@ -0,0 +1,36 @@
+/*
+ * Copyright 2019 The libgav1 Authors
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LIBGAV1_SRC_UTILS_EXECUTOR_H_
+#define LIBGAV1_SRC_UTILS_EXECUTOR_H_
+
+#include <functional>
+
+namespace libgav1 {
+
+class Executor {
+ public:
+  virtual ~Executor();
+
+  // Schedules the specified "callback" for execution in this executor.
+  // Depending on the subclass implementation, this may block in some
+  // situations.
+  virtual void Schedule(std::function<void()> callback) = 0;
+};
+
+}  // namespace libgav1
+
+#endif  // LIBGAV1_SRC_UTILS_EXECUTOR_H_
diff --git a/src/utils/libgav1_utils.cmake b/src/utils/libgav1_utils.cmake
new file mode 100644
index 0000000..8b6ec4b
--- /dev/null
+++ b/src/utils/libgav1_utils.cmake
@@ -0,0 +1,72 @@
+# Copyright 2019 The libgav1 Authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+if(LIBGAV1_UTILS_LIBGAV1_UTILS_CMAKE_)
+  return()
+endif() # LIBGAV1_UTILS_LIBGAV1_UTILS_CMAKE_
+set(LIBGAV1_UTILS_LIBGAV1_UTILS_CMAKE_ 1)
+
+list(APPEND libgav1_utils_sources
+            "${libgav1_source}/utils/array_2d.h"
+            "${libgav1_source}/utils/bit_mask_set.h"
+            "${libgav1_source}/utils/bit_reader.cc"
+            "${libgav1_source}/utils/bit_reader.h"
+            "${libgav1_source}/utils/block_parameters_holder.cc"
+            "${libgav1_source}/utils/block_parameters_holder.h"
+            "${libgav1_source}/utils/blocking_counter.h"
+            "${libgav1_source}/utils/common.h"
+            "${libgav1_source}/utils/compiler_attributes.h"
+            "${libgav1_source}/utils/constants.cc"
+            "${libgav1_source}/utils/constants.h"
+            "${libgav1_source}/utils/cpu.cc"
+            "${libgav1_source}/utils/cpu.h"
+            "${libgav1_source}/utils/dynamic_buffer.h"
+            "${libgav1_source}/utils/entropy_decoder.cc"
+            "${libgav1_source}/utils/entropy_decoder.h"
+            "${libgav1_source}/utils/executor.cc"
+            "${libgav1_source}/utils/executor.h"
+            "${libgav1_source}/utils/logging.cc"
+            "${libgav1_source}/utils/logging.h"
+            "${libgav1_source}/utils/memory.h"
+            "${libgav1_source}/utils/parameter_tree.cc"
+            "${libgav1_source}/utils/parameter_tree.h"
+            "${libgav1_source}/utils/queue.h"
+            "${libgav1_source}/utils/raw_bit_reader.cc"
+            "${libgav1_source}/utils/raw_bit_reader.h"
+            "${libgav1_source}/utils/reference_info.h"
+            "${libgav1_source}/utils/segmentation.cc"
+            "${libgav1_source}/utils/segmentation.h"
+            "${libgav1_source}/utils/segmentation_map.cc"
+            "${libgav1_source}/utils/segmentation_map.h"
+            "${libgav1_source}/utils/stack.h"
+            "${libgav1_source}/utils/threadpool.cc"
+            "${libgav1_source}/utils/threadpool.h"
+            "${libgav1_source}/utils/types.h"
+            "${libgav1_source}/utils/unbounded_queue.h"
+            "${libgav1_source}/utils/vector.h")
+
+macro(libgav1_add_utils_targets)
+  libgav1_add_library(NAME
+                      libgav1_utils
+                      TYPE
+                      OBJECT
+                      SOURCES
+                      ${libgav1_utils_sources}
+                      DEFINES
+                      ${libgav1_defines}
+                      INCLUDES
+                      ${libgav1_include_paths}
+                      ${libgav1_gtest_include_paths})
+
+endmacro()
diff --git a/src/utils/logging.cc b/src/utils/logging.cc
new file mode 100644
index 0000000..9a43c22
--- /dev/null
+++ b/src/utils/logging.cc
@@ -0,0 +1,65 @@
+// Copyright 2019 The libgav1 Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "src/utils/logging.h"
+
+#include <cstdarg>
+#include <cstdio>
+#include <sstream>
+#include <thread>  // NOLINT (unapproved c++11 header)
+
+#if !defined(LIBGAV1_LOG_LEVEL)
+#define LIBGAV1_LOG_LEVEL (1 << 30)
+#endif
+
+namespace libgav1 {
+namespace internal {
+#if LIBGAV1_ENABLE_LOGGING
+namespace {
+
+const char* LogSeverityName(LogSeverity severity) {
+  switch (severity) {
+    case LogSeverity::kInfo:
+      return "INFO";
+    case LogSeverity::kError:
+      return "ERROR";
+    case LogSeverity::kWarning:
+      return "WARNING";
+  }
+  return "UNKNOWN";
+}
+
+}  // namespace
+
+void Log(LogSeverity severity, const char* file, int line, const char* format,
+         ...) {
+  if (LIBGAV1_LOG_LEVEL < static_cast<int>(severity)) return;
+  std::ostringstream ss;
+  ss << std::hex << std::this_thread::get_id();
+  fprintf(stderr, "%s %s %s:%d] ", LogSeverityName(severity), ss.str().c_str(),
+          file, line);
+
+  va_list ap;
+  va_start(ap, format);
+  vfprintf(stderr, format, ap);
+  va_end(ap);
+  fprintf(stderr, "\n");
+}
+#else  // !LIBGAV1_ENABLE_LOGGING
+void Log(LogSeverity /*severity*/, const char* /*file*/, int /*line*/,
+         const char* /*format*/, ...) {}
+#endif  // LIBGAV1_ENABLE_LOGGING
+
+}  // namespace internal
+}  // namespace libgav1
diff --git a/src/utils/logging.h b/src/utils/logging.h
new file mode 100644
index 0000000..48928db
--- /dev/null
+++ b/src/utils/logging.h
@@ -0,0 +1,85 @@
+/*
+ * Copyright 2019 The libgav1 Authors
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LIBGAV1_SRC_UTILS_LOGGING_H_
+#define LIBGAV1_SRC_UTILS_LOGGING_H_
+
+#include <cstddef>
+
+#include "src/utils/compiler_attributes.h"
+
+#if !defined(LIBGAV1_ENABLE_LOGGING)
+#if defined(NDEBUG) || defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION)
+#define LIBGAV1_ENABLE_LOGGING 0
+#else
+#define LIBGAV1_ENABLE_LOGGING 1
+#endif
+#endif
+
+#if LIBGAV1_ENABLE_LOGGING
+// LIBGAV1_DLOG(severity, printf-format-string)
+// Debug logging that can optionally be enabled in release builds by explicitly
+// setting LIBGAV1_ENABLE_LOGGING.
+// Severity is given as an all-caps version of enum LogSeverity with the
+// leading 'k' removed: LIBGAV1_DLOG(INFO, "...");
+#define LIBGAV1_DLOG(severity, ...)                                       \
+  do {                                                                    \
+    constexpr const char* libgav1_logging_internal_basename =             \
+        ::libgav1::internal::Basename(__FILE__, sizeof(__FILE__) - 1);    \
+    ::libgav1::internal::Log(LIBGAV1_LOGGING_INTERNAL_##severity,         \
+                             libgav1_logging_internal_basename, __LINE__, \
+                             __VA_ARGS__);                                \
+  } while (0)
+#else
+#define LIBGAV1_DLOG(severity, ...) \
+  do {                              \
+  } while (0)
+#endif  // LIBGAV1_ENABLE_LOGGING
+
+#define LIBGAV1_LOGGING_INTERNAL_ERROR ::libgav1::internal::LogSeverity::kError
+#define LIBGAV1_LOGGING_INTERNAL_WARNING \
+  ::libgav1::internal::LogSeverity::kWarning
+#define LIBGAV1_LOGGING_INTERNAL_INFO ::libgav1::internal::LogSeverity::kInfo
+
+namespace libgav1 {
+namespace internal {
+
+enum class LogSeverity : int {
+  kError,
+  kWarning,
+  kInfo,
+};
+
+// Helper function to implement LIBGAV1_DLOG
+// Logs |format, ...| at |severity| level, reporting it as called from
+// |file|:|line|.
+void Log(libgav1::internal::LogSeverity severity, const char* file, int line,
+         const char* format, ...) LIBGAV1_PRINTF_ATTRIBUTE(4, 5);
+
+// Compile-time function to get the 'base' file_name, that is, the part of
+// a file_name after the last '/' or '\' path separator. The search starts at
+// the end of the string; the second parameter is the length of the string.
+constexpr const char* Basename(const char* file_name, size_t offset) {
+  return (offset == 0 || file_name[offset - 1] == '/' ||
+          file_name[offset - 1] == '\\')
+             ? file_name + offset
+             : Basename(file_name, offset - 1);
+}
+
+}  // namespace internal
+}  // namespace libgav1
+
+#endif  // LIBGAV1_SRC_UTILS_LOGGING_H_
diff --git a/src/utils/memory.h b/src/utils/memory.h
new file mode 100644
index 0000000..219a83f
--- /dev/null
+++ b/src/utils/memory.h
@@ -0,0 +1,237 @@
+/*
+ * Copyright 2019 The libgav1 Authors
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LIBGAV1_SRC_UTILS_MEMORY_H_
+#define LIBGAV1_SRC_UTILS_MEMORY_H_
+
+#if defined(__ANDROID__) || defined(_MSC_VER)
+#include <malloc.h>
+#endif
+
+#include <cerrno>
+#include <cstddef>
+#include <cstdint>
+#include <cstdlib>
+#include <cstring>
+#include <memory>
+#include <new>
+
+namespace libgav1 {
+
+enum {
+// The byte alignment required for buffers used with SIMD code to be read or
+// written with aligned operations.
+#if defined(__i386__) || defined(_M_IX86) || defined(__x86_64__) || \
+    defined(_M_X64)
+  kMaxAlignment = 32,  // extended alignment is safe on x86.
+#else
+  kMaxAlignment = alignof(max_align_t),
+#endif
+};
+
+// AlignedAlloc, AlignedFree
+//
+// void* AlignedAlloc(size_t alignment, size_t size);
+//   Allocate aligned memory.
+//   |alignment| must be a power of 2.
+//   Unlike posix_memalign(), |alignment| may be smaller than sizeof(void*).
+//   Unlike aligned_alloc(), |size| does not need to be a multiple of
+//   |alignment|.
+//   The returned pointer should be freed by AlignedFree().
+//
+// void AlignedFree(void* aligned_memory);
+//   Free aligned memory.
+
+#if defined(_MSC_VER)  // MSVC
+
+inline void* AlignedAlloc(size_t alignment, size_t size) {
+  return _aligned_malloc(size, alignment);
+}
+
+inline void AlignedFree(void* aligned_memory) { _aligned_free(aligned_memory); }
+
+#else  // !defined(_MSC_VER)
+
+inline void* AlignedAlloc(size_t alignment, size_t size) {
+#if defined(__ANDROID__)
+  // Although posix_memalign() was introduced in Android API level 17, it is
+  // more convenient to use memalign(). Unlike glibc, Android does not consider
+  // memalign() an obsolete function.
+  return memalign(alignment, size);
+#else  // !defined(__ANDROID__)
+  void* ptr = nullptr;
+  // posix_memalign requires that the requested alignment be at least
+  // sizeof(void*). In this case, fall back on malloc which should return
+  // memory aligned to at least the size of a pointer.
+  const size_t required_alignment = sizeof(void*);
+  if (alignment < required_alignment) return malloc(size);
+  const int error = posix_memalign(&ptr, alignment, size);
+  if (error != 0) {
+    errno = error;
+    return nullptr;
+  }
+  return ptr;
+#endif  // defined(__ANDROID__)
+}
+
+inline void AlignedFree(void* aligned_memory) { free(aligned_memory); }
+
+#endif  // defined(_MSC_VER)
+
+inline void Memset(uint8_t* const dst, int value, size_t count) {
+  memset(dst, value, count);
+}
+
+inline void Memset(uint16_t* const dst, int value, size_t count) {
+  for (size_t i = 0; i < count; ++i) {
+    dst[i] = static_cast<uint16_t>(value);
+  }
+}
+
+struct MallocDeleter {
+  void operator()(void* ptr) const { free(ptr); }
+};
+
+struct AlignedDeleter {
+  void operator()(void* ptr) const { AlignedFree(ptr); }
+};
+
+template <typename T>
+using AlignedUniquePtr = std::unique_ptr<T, AlignedDeleter>;
+
+// Allocates aligned memory for an array of |count| elements of type T.
+template <typename T>
+inline AlignedUniquePtr<T> MakeAlignedUniquePtr(size_t alignment,
+                                                size_t count) {
+  return AlignedUniquePtr<T>(
+      static_cast<T*>(AlignedAlloc(alignment, count * sizeof(T))));
+}
+
+// A base class with custom new and delete operators. The exception-throwing
+// new operators are deleted. The "new (std::nothrow)" form must be used.
+//
+// The new operators return nullptr if the requested size is greater than
+// 0x40000000 bytes (1 GB). TODO(wtc): Make the maximum allocable memory size
+// a compile-time configuration macro.
+//
+// See https://en.cppreference.com/w/cpp/memory/new/operator_new and
+// https://en.cppreference.com/w/cpp/memory/new/operator_delete.
+//
+// NOTE: The allocation and deallocation functions are static member functions
+// whether the keyword 'static' is used or not.
+struct Allocable {
+  // Class-specific allocation functions.
+  static void* operator new(size_t size) = delete;
+  static void* operator new[](size_t size) = delete;
+
+  // Class-specific non-throwing allocation functions
+  static void* operator new(size_t size, const std::nothrow_t& tag) noexcept {
+    if (size > 0x40000000) return nullptr;
+    return ::operator new(size, tag);
+  }
+  static void* operator new[](size_t size, const std::nothrow_t& tag) noexcept {
+    if (size > 0x40000000) return nullptr;
+    return ::operator new[](size, tag);
+  }
+
+  // Class-specific deallocation functions.
+  static void operator delete(void* ptr) noexcept { ::operator delete(ptr); }
+  static void operator delete[](void* ptr) noexcept {
+    ::operator delete[](ptr);
+  }
+
+  // Only called if new (std::nothrow) is used and the constructor throws an
+  // exception.
+  static void operator delete(void* ptr, const std::nothrow_t& tag) noexcept {
+    ::operator delete(ptr, tag);
+  }
+  // Only called if new[] (std::nothrow) is used and the constructor throws an
+  // exception.
+  static void operator delete[](void* ptr, const std::nothrow_t& tag) noexcept {
+    ::operator delete[](ptr, tag);
+  }
+};
+
+// A variant of Allocable that forces allocations to be aligned to
+// kMaxAlignment bytes. This is intended for use with classes that use
+// alignas() with this value. C++17 aligned new/delete are used if available,
+// otherwise we use AlignedAlloc/Free.
+struct MaxAlignedAllocable {
+  // Class-specific allocation functions.
+  static void* operator new(size_t size) = delete;
+  static void* operator new[](size_t size) = delete;
+
+  // Class-specific non-throwing allocation functions
+  static void* operator new(size_t size, const std::nothrow_t& tag) noexcept {
+    if (size > 0x40000000) return nullptr;
+#ifdef __cpp_aligned_new
+    return ::operator new(size, std::align_val_t(kMaxAlignment), tag);
+#else
+    static_cast<void>(tag);
+    return AlignedAlloc(kMaxAlignment, size);
+#endif
+  }
+  static void* operator new[](size_t size, const std::nothrow_t& tag) noexcept {
+    if (size > 0x40000000) return nullptr;
+#ifdef __cpp_aligned_new
+    return ::operator new[](size, std::align_val_t(kMaxAlignment), tag);
+#else
+    static_cast<void>(tag);
+    return AlignedAlloc(kMaxAlignment, size);
+#endif
+  }
+
+  // Class-specific deallocation functions.
+  static void operator delete(void* ptr) noexcept {
+#ifdef __cpp_aligned_new
+    ::operator delete(ptr, std::align_val_t(kMaxAlignment));
+#else
+    AlignedFree(ptr);
+#endif
+  }
+  static void operator delete[](void* ptr) noexcept {
+#ifdef __cpp_aligned_new
+    ::operator delete[](ptr, std::align_val_t(kMaxAlignment));
+#else
+    AlignedFree(ptr);
+#endif
+  }
+
+  // Only called if new (std::nothrow) is used and the constructor throws an
+  // exception.
+  static void operator delete(void* ptr, const std::nothrow_t& tag) noexcept {
+#ifdef __cpp_aligned_new
+    ::operator delete(ptr, std::align_val_t(kMaxAlignment), tag);
+#else
+    static_cast<void>(tag);
+    AlignedFree(ptr);
+#endif
+  }
+  // Only called if new[] (std::nothrow) is used and the constructor throws an
+  // exception.
+  static void operator delete[](void* ptr, const std::nothrow_t& tag) noexcept {
+#ifdef __cpp_aligned_new
+    ::operator delete[](ptr, std::align_val_t(kMaxAlignment), tag);
+#else
+    static_cast<void>(tag);
+    AlignedFree(ptr);
+#endif
+  }
+};
+
+}  // namespace libgav1
+
+#endif  // LIBGAV1_SRC_UTILS_MEMORY_H_
diff --git a/src/utils/parameter_tree.cc b/src/utils/parameter_tree.cc
new file mode 100644
index 0000000..9426ce6
--- /dev/null
+++ b/src/utils/parameter_tree.cc
@@ -0,0 +1,133 @@
+// Copyright 2019 The libgav1 Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "src/utils/parameter_tree.h"
+
+#include <cassert>
+#include <memory>
+#include <new>
+
+#include "src/utils/common.h"
+#include "src/utils/constants.h"
+#include "src/utils/logging.h"
+#include "src/utils/types.h"
+
+namespace libgav1 {
+
+// static
+std::unique_ptr<ParameterTree> ParameterTree::Create(int row4x4, int column4x4,
+                                                     BlockSize block_size,
+                                                     bool is_leaf) {
+  std::unique_ptr<ParameterTree> tree(
+      new (std::nothrow) ParameterTree(row4x4, column4x4, block_size));
+  if (tree != nullptr && is_leaf && !tree->SetPartitionType(kPartitionNone)) {
+    tree = nullptr;
+  }
+  return tree;
+}
+
+bool ParameterTree::SetPartitionType(Partition partition) {
+  assert(!partition_type_set_);
+  partition_ = partition;
+  partition_type_set_ = true;
+  const int block_width4x4 = kNum4x4BlocksWide[block_size_];
+  const int half_block4x4 = block_width4x4 >> 1;
+  const int quarter_block4x4 = half_block4x4 >> 1;
+  const BlockSize sub_size = kSubSize[partition][block_size_];
+  const BlockSize split_size = kSubSize[kPartitionSplit][block_size_];
+  assert(partition == kPartitionNone || sub_size != kBlockInvalid);
+  switch (partition) {
+    case kPartitionNone:
+      parameters_.reset(new (std::nothrow) BlockParameters());
+      return parameters_ != nullptr;
+    case kPartitionHorizontal:
+      children_[0] = ParameterTree::Create(row4x4_, column4x4_, sub_size, true);
+      children_[1] = ParameterTree::Create(row4x4_ + half_block4x4, column4x4_,
+                                           sub_size, true);
+      return children_[0] != nullptr && children_[1] != nullptr;
+    case kPartitionVertical:
+      children_[0] = ParameterTree::Create(row4x4_, column4x4_, sub_size, true);
+      children_[1] = ParameterTree::Create(row4x4_, column4x4_ + half_block4x4,
+                                           sub_size, true);
+      return children_[0] != nullptr && children_[1] != nullptr;
+    case kPartitionSplit:
+      children_[0] =
+          ParameterTree::Create(row4x4_, column4x4_, sub_size, false);
+      children_[1] = ParameterTree::Create(row4x4_, column4x4_ + half_block4x4,
+                                           sub_size, false);
+      children_[2] = ParameterTree::Create(row4x4_ + half_block4x4, column4x4_,
+                                           sub_size, false);
+      children_[3] = ParameterTree::Create(
+          row4x4_ + half_block4x4, column4x4_ + half_block4x4, sub_size, false);
+      return children_[0] != nullptr && children_[1] != nullptr &&
+             children_[2] != nullptr && children_[3] != nullptr;
+    case kPartitionHorizontalWithTopSplit:
+      assert(split_size != kBlockInvalid);
+      children_[0] =
+          ParameterTree::Create(row4x4_, column4x4_, split_size, true);
+      children_[1] = ParameterTree::Create(row4x4_, column4x4_ + half_block4x4,
+                                           split_size, true);
+      children_[2] = ParameterTree::Create(row4x4_ + half_block4x4, column4x4_,
+                                           sub_size, true);
+      return children_[0] != nullptr && children_[1] != nullptr &&
+             children_[2] != nullptr;
+    case kPartitionHorizontalWithBottomSplit:
+      assert(split_size != kBlockInvalid);
+      children_[0] = ParameterTree::Create(row4x4_, column4x4_, sub_size, true);
+      children_[1] = ParameterTree::Create(row4x4_ + half_block4x4, column4x4_,
+                                           split_size, true);
+      children_[2] =
+          ParameterTree::Create(row4x4_ + half_block4x4,
+                                column4x4_ + half_block4x4, split_size, true);
+      return children_[0] != nullptr && children_[1] != nullptr &&
+             children_[2] != nullptr;
+    case kPartitionVerticalWithLeftSplit:
+      assert(split_size != kBlockInvalid);
+      children_[0] =
+          ParameterTree::Create(row4x4_, column4x4_, split_size, true);
+      children_[1] = ParameterTree::Create(row4x4_ + half_block4x4, column4x4_,
+                                           split_size, true);
+      children_[2] = ParameterTree::Create(row4x4_, column4x4_ + half_block4x4,
+                                           sub_size, true);
+      return children_[0] != nullptr && children_[1] != nullptr &&
+             children_[2] != nullptr;
+    case kPartitionVerticalWithRightSplit:
+      assert(split_size != kBlockInvalid);
+      children_[0] = ParameterTree::Create(row4x4_, column4x4_, sub_size, true);
+      children_[1] = ParameterTree::Create(row4x4_, column4x4_ + half_block4x4,
+                                           split_size, true);
+      children_[2] =
+          ParameterTree::Create(row4x4_ + half_block4x4,
+                                column4x4_ + half_block4x4, split_size, true);
+      return children_[0] != nullptr && children_[1] != nullptr &&
+             children_[2] != nullptr;
+    case kPartitionHorizontal4:
+      for (int i = 0; i < 4; ++i) {
+        children_[i] = ParameterTree::Create(row4x4_ + i * quarter_block4x4,
+                                             column4x4_, sub_size, true);
+        if (children_[i] == nullptr) return false;
+      }
+      return true;
+    default:
+      assert(partition == kPartitionVertical4);
+      for (int i = 0; i < 4; ++i) {
+        children_[i] = ParameterTree::Create(
+            row4x4_, column4x4_ + i * quarter_block4x4, sub_size, true);
+        if (children_[i] == nullptr) return false;
+      }
+      return true;
+  }
+}
+
+}  // namespace libgav1
diff --git a/src/utils/parameter_tree.h b/src/utils/parameter_tree.h
new file mode 100644
index 0000000..935f3eb
--- /dev/null
+++ b/src/utils/parameter_tree.h
@@ -0,0 +1,113 @@
+/*
+ * Copyright 2019 The libgav1 Authors
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LIBGAV1_SRC_UTILS_PARAMETER_TREE_H_
+#define LIBGAV1_SRC_UTILS_PARAMETER_TREE_H_
+
+#include <cassert>
+#include <memory>
+
+#include "src/utils/common.h"
+#include "src/utils/compiler_attributes.h"
+#include "src/utils/constants.h"
+#include "src/utils/memory.h"
+#include "src/utils/types.h"
+
+namespace libgav1 {
+
+class ParameterTree : public Allocable {
+ public:
+  // Creates a parameter tree to store the parameters of a block of size
+  // |block_size| starting at coordinates |row4x4| and |column4x4|. If |is_leaf|
+  // is set to true, the memory will be allocated for the BlockParameters for
+  // this node. Otherwise, no memory will be allocated. If |is_leaf| is set to
+  // false, |block_size| must be a square block, i.e.,
+  // kBlockWidthPixels[block_size] must be equal to
+  // kBlockHeightPixels[block_size].
+  static std::unique_ptr<ParameterTree> Create(int row4x4, int column4x4,
+                                               BlockSize block_size,
+                                               bool is_leaf = false);
+
+  // Move only (not Copyable).
+  ParameterTree(ParameterTree&& other) = default;
+  ParameterTree& operator=(ParameterTree&& other) = default;
+  ParameterTree(const ParameterTree&) = delete;
+  ParameterTree& operator=(const ParameterTree&) = delete;
+
+  // Set the partition type of the current node to |partition|.
+  // if (partition == kPartitionNone) {
+  //   Memory will be allocated for the BlockParameters for this node.
+  // } else if (partition != kPartitionSplit) {
+  //   The appropriate child nodes will be populated and memory will be
+  //   allocated for the BlockParameters of the children.
+  // } else {
+  //   The appropriate child nodes will be populated but they are considered to
+  //   be hanging, i.e., future calls to SetPartitionType() on the child nodes
+  //   will have to set them or their descendants to a terminal type.
+  // }
+  // This function must be called only once per node.
+  LIBGAV1_MUST_USE_RESULT bool SetPartitionType(Partition partition);
+
+  // Basic getters.
+  int row4x4() const { return row4x4_; }
+  int column4x4() const { return column4x4_; }
+  BlockSize block_size() const { return block_size_; }
+  Partition partition() const { return partition_; }
+  ParameterTree* children(int index) const {
+    assert(index < 4);
+    return children_[index].get();
+  }
+  // Returns the BlockParameters object of the current node if one exists.
+  // Otherwise returns nullptr. This function will return a valid
+  // BlockParameters object only for leaf nodes.
+  BlockParameters* parameters() const { return parameters_.get(); }
+
+ private:
+  ParameterTree(int row4x4, int column4x4, BlockSize block_size)
+      : row4x4_(row4x4), column4x4_(column4x4), block_size_(block_size) {}
+
+  Partition partition_ = kPartitionNone;
+  std::unique_ptr<BlockParameters> parameters_ = nullptr;
+  int row4x4_ = -1;
+  int column4x4_ = -1;
+  BlockSize block_size_ = kBlockInvalid;
+  bool partition_type_set_ = false;
+
+  // Child values are defined as follows for various partition types:
+  //  * Horizontal: 0 top partition; 1 bottom partition; 2 nullptr; 3 nullptr;
+  //  * Vertical: 0 left partition; 1 right partition; 2 nullptr; 3 nullptr;
+  //  * Split: 0 top-left partition; 1 top-right partition; 2; bottom-left
+  //    partition; 3 bottom-right partition;
+  //  * HorizontalWithTopSplit: 0 top-left partition; 1 top-right partition; 2
+  //    bottom partition; 3 nullptr;
+  //  * HorizontalWithBottomSplit: 0 top partition; 1 bottom-left partition; 2
+  //    bottom-right partition; 3 nullptr;
+  //  * VerticalWithLeftSplit: 0 top-left partition; 1 bottom-left partition; 2
+  //    right partition; 3 nullptr;
+  //  * VerticalWithRightSplit: 0 left-partition; 1 top-right partition; 2
+  //    bottom-right partition; 3 nullptr;
+  //  * Horizontal4: 0 top partition; 1 second top partition; 2 third top
+  //    partition; 3 bottom partition;
+  //  * Vertical4: 0 left partition; 1 second left partition; 2 third left
+  //    partition; 3 right partition;
+  std::unique_ptr<ParameterTree> children_[4] = {};
+
+  friend class ParameterTreeTest;
+};
+
+}  // namespace libgav1
+
+#endif  // LIBGAV1_SRC_UTILS_PARAMETER_TREE_H_
diff --git a/src/utils/queue.h b/src/utils/queue.h
new file mode 100644
index 0000000..cffb9ca
--- /dev/null
+++ b/src/utils/queue.h
@@ -0,0 +1,105 @@
+/*
+ * Copyright 2019 The libgav1 Authors
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LIBGAV1_SRC_UTILS_QUEUE_H_
+#define LIBGAV1_SRC_UTILS_QUEUE_H_
+
+#include <cassert>
+#include <cstddef>
+#include <memory>
+#include <new>
+
+#include "src/utils/compiler_attributes.h"
+
+namespace libgav1 {
+
+// A FIFO queue of a fixed capacity.
+//
+// WARNING: No error checking is performed.
+template <typename T>
+class Queue {
+ public:
+  LIBGAV1_MUST_USE_RESULT bool Init(size_t capacity) {
+    elements_.reset(new (std::nothrow) T[capacity]);
+    if (elements_ == nullptr) return false;
+    capacity_ = capacity;
+    return true;
+  }
+
+  // Pushes the element |value| to the end of the queue. It is an error to call
+  // Push() when the queue is full.
+  void Push(T&& value) {
+    assert(size_ < capacity_);
+    elements_[end_++] = std::move(value);
+    if (end_ == capacity_) end_ = 0;
+    ++size_;
+  }
+
+  // Removes the element at the front of the queue. It is an error to call Pop()
+  // when the queue is empty.
+  void Pop() {
+    assert(size_ != 0);
+    const T element = std::move(elements_[begin_++]);
+    static_cast<void>(element);
+    if (begin_ == capacity_) begin_ = 0;
+    --size_;
+  }
+
+  // Returns a reference to the element at the front of the queue. It is an
+  // error to call Front() when the queue is empty.
+  T& Front() {
+    assert(size_ != 0);
+    return elements_[begin_];
+  }
+
+  // Returns a reference to the element at the back of the queue. It is an error
+  // to call Back() when the queue is empty.
+  T& Back() {
+    assert(size_ != 0);
+    const size_t back = ((end_ == 0) ? capacity_ : end_) - 1;
+    return elements_[back];
+  }
+
+  // Clears the queue.
+  void Clear() {
+    while (!Empty()) {
+      Pop();
+    }
+  }
+
+  // Returns true if the queue is empty.
+  bool Empty() const { return size_ == 0; }
+
+  // Returns true if the queue is full.
+  bool Full() const { return size_ >= capacity_; }
+
+  // Returns the number of elements in the queue.
+  size_t Size() const { return size_; }
+
+ private:
+  // An array of |capacity| elements. Used as a circular array.
+  std::unique_ptr<T[]> elements_;
+  size_t capacity_ = 0;
+  // The index of the element to be removed by Pop().
+  size_t begin_ = 0;
+  // The index where the new element is inserted by Push().
+  size_t end_ = 0;
+  size_t size_ = 0;
+};
+
+}  // namespace libgav1
+
+#endif  // LIBGAV1_SRC_UTILS_QUEUE_H_
diff --git a/src/utils/raw_bit_reader.cc b/src/utils/raw_bit_reader.cc
new file mode 100644
index 0000000..15e980d
--- /dev/null
+++ b/src/utils/raw_bit_reader.cc
@@ -0,0 +1,224 @@
+// Copyright 2019 The libgav1 Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "src/utils/raw_bit_reader.h"
+
+#include <cassert>
+#include <limits>
+
+#include "src/utils/common.h"
+#include "src/utils/logging.h"
+
+// Note <cinttypes> is only needed when logging is enabled (for the PRI*
+// macros). It depends on the definition of LIBGAV1_ENABLE_LOGGING from
+// logging.h, thus the non-standard header ordering.
+#if LIBGAV1_ENABLE_LOGGING
+#include <cinttypes>
+#endif
+
+namespace libgav1 {
+namespace {
+
+constexpr int kMaximumLeb128Size = 8;
+constexpr uint8_t kLeb128ValueByteMask = 0x7f;
+constexpr uint8_t kLeb128TerminationByteMask = 0x80;
+
+uint8_t Mod8(size_t n) {
+  // Last 3 bits are the value of mod 8.
+  return n & 0x07;
+}
+
+size_t DivideBy8(size_t n, bool ceil) { return (n + (ceil ? 7 : 0)) >> 3; }
+
+}  // namespace
+
+RawBitReader::RawBitReader(const uint8_t* data, size_t size)
+    : data_(data), bit_offset_(0), size_(size) {
+  assert(data_ != nullptr || size_ == 0);
+}
+
+int RawBitReader::ReadBitImpl() {
+  const size_t byte_offset = DivideBy8(bit_offset_, false);
+  const uint8_t byte = data_[byte_offset];
+  const uint8_t shift = 7 - Mod8(bit_offset_);
+  ++bit_offset_;
+  return static_cast<int>((byte >> shift) & 0x01);
+}
+
+int RawBitReader::ReadBit() {
+  if (Finished()) return -1;
+  return ReadBitImpl();
+}
+
+int64_t RawBitReader::ReadLiteral(int num_bits) {
+  assert(num_bits <= 32);
+  if (!CanReadLiteral(num_bits)) return -1;
+  assert(num_bits > 0);
+  uint32_t literal = 0;
+  int bit = num_bits - 1;
+  do {
+    // ARM can combine a shift operation with a constant number of bits with
+    // some other operations, such as the OR operation.
+    // Here is an ARM disassembly example:
+    // orr w1, w0, w1, lsl #1
+    // which left shifts register w1 by 1 bit and OR the shift result with
+    // register w0.
+    // The next 2 lines are equivalent to:
+    // literal |= static_cast<uint32_t>(ReadBitImpl()) << bit;
+    literal <<= 1;
+    literal |= static_cast<uint32_t>(ReadBitImpl());
+  } while (--bit >= 0);
+  return literal;
+}
+
+bool RawBitReader::ReadInverseSignedLiteral(int num_bits, int* const value) {
+  assert(num_bits + 1 < 32);
+  *value = static_cast<int>(ReadLiteral(num_bits + 1));
+  if (*value == -1) return false;
+  const int sign_bit = 1 << num_bits;
+  if ((*value & sign_bit) != 0) {
+    *value -= 2 * sign_bit;
+  }
+  return true;
+}
+
+bool RawBitReader::ReadLittleEndian(int num_bytes, size_t* const value) {
+  // We must be at a byte boundary.
+  assert(Mod8(bit_offset_) == 0);
+  assert(num_bytes <= 4);
+  static_assert(sizeof(size_t) >= 4, "");
+  if (value == nullptr) return false;
+  size_t byte_offset = DivideBy8(bit_offset_, false);
+  if (Finished() || byte_offset + num_bytes > size_) {
+    LIBGAV1_DLOG(ERROR, "Not enough bits to read Little Endian value.");
+    return false;
+  }
+  *value = 0;
+  for (int i = 0; i < num_bytes; ++i) {
+    const size_t byte = data_[byte_offset];
+    *value |= (byte << (i * 8));
+    ++byte_offset;
+  }
+  bit_offset_ = byte_offset * 8;
+  return true;
+}
+
+bool RawBitReader::ReadUnsignedLeb128(size_t* const value) {
+  // We must be at a byte boundary.
+  assert(Mod8(bit_offset_) == 0);
+  if (value == nullptr) return false;
+  uint64_t value64 = 0;
+  for (int i = 0; i < kMaximumLeb128Size; ++i) {
+    if (Finished()) {
+      LIBGAV1_DLOG(ERROR, "Not enough bits to read LEB128 value.");
+      return false;
+    }
+    const size_t byte_offset = DivideBy8(bit_offset_, false);
+    const uint8_t byte = data_[byte_offset];
+    bit_offset_ += 8;
+    value64 |= static_cast<uint64_t>(byte & kLeb128ValueByteMask) << (i * 7);
+    if ((byte & kLeb128TerminationByteMask) == 0) {
+      if (value64 != static_cast<size_t>(value64) ||
+          value64 > std::numeric_limits<uint32_t>::max()) {
+        LIBGAV1_DLOG(
+            ERROR, "LEB128 value (%" PRIu64 ") exceeded uint32_t maximum (%u).",
+            value64, std::numeric_limits<uint32_t>::max());
+        return false;
+      }
+      *value = static_cast<size_t>(value64);
+      return true;
+    }
+  }
+  LIBGAV1_DLOG(
+      ERROR,
+      "Exceeded kMaximumLeb128Size (%d) when trying to read LEB128 value",
+      kMaximumLeb128Size);
+  return false;
+}
+
+bool RawBitReader::ReadUvlc(uint32_t* const value) {
+  if (value == nullptr) return false;
+  int leading_zeros = 0;
+  while (true) {
+    const int bit = ReadBit();
+    if (bit == -1) {
+      LIBGAV1_DLOG(ERROR, "Not enough bits to read uvlc value.");
+      return false;
+    }
+    if (bit == 1) break;
+    ++leading_zeros;
+    if (leading_zeros == 32) {
+      LIBGAV1_DLOG(ERROR,
+                   "Exceeded maximum size (32) when trying to read uvlc value");
+      return false;
+    }
+  }
+  int literal;
+  if (leading_zeros != 0) {
+    literal = static_cast<int>(ReadLiteral(leading_zeros));
+    if (literal == -1) {
+      LIBGAV1_DLOG(ERROR, "Not enough bits to read uvlc value.");
+      return false;
+    }
+    literal += (1U << leading_zeros) - 1;
+  } else {
+    literal = 0;
+  }
+  *value = literal;
+  return true;
+}
+
+bool RawBitReader::AlignToNextByte() {
+  while ((bit_offset_ & 7) != 0) {
+    if (ReadBit() != 0) {
+      return false;
+    }
+  }
+  return true;
+}
+
+bool RawBitReader::VerifyAndSkipTrailingBits(size_t num_bits) {
+  if (ReadBit() != 1) return false;
+  for (size_t i = 0; i < num_bits - 1; ++i) {
+    if (ReadBit() != 0) return false;
+  }
+  return true;
+}
+
+bool RawBitReader::SkipBytes(size_t num_bytes) {
+  // If we are not at a byte boundary, return false.
+  return ((bit_offset_ & 7) != 0) ? false : SkipBits(num_bytes * 8);
+}
+
+bool RawBitReader::SkipBits(size_t num_bits) {
+  // If the reader is already finished, return false.
+  if (Finished()) return false;
+  // If skipping |num_bits| runs out of buffer, return false.
+  const size_t bit_offset = bit_offset_ + num_bits - 1;
+  if (DivideBy8(bit_offset, false) >= size_) return false;
+  bit_offset_ += num_bits;
+  return true;
+}
+
+bool RawBitReader::CanReadLiteral(size_t num_bits) const {
+  if (Finished()) return false;
+  const size_t bit_offset = bit_offset_ + num_bits - 1;
+  return DivideBy8(bit_offset, false) < size_;
+}
+
+bool RawBitReader::Finished() const {
+  return DivideBy8(bit_offset_, false) >= size_;
+}
+
+}  // namespace libgav1
diff --git a/src/utils/raw_bit_reader.h b/src/utils/raw_bit_reader.h
new file mode 100644
index 0000000..76e7bfa
--- /dev/null
+++ b/src/utils/raw_bit_reader.h
@@ -0,0 +1,78 @@
+/*
+ * Copyright 2019 The libgav1 Authors
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LIBGAV1_SRC_UTILS_RAW_BIT_READER_H_
+#define LIBGAV1_SRC_UTILS_RAW_BIT_READER_H_
+
+#include <cstddef>
+#include <cstdint>
+
+#include "src/utils/bit_reader.h"
+#include "src/utils/memory.h"
+
+namespace libgav1 {
+
+class RawBitReader : public BitReader, public Allocable {
+ public:
+  RawBitReader(const uint8_t* data, size_t size);
+  ~RawBitReader() override = default;
+
+  int ReadBit() override;
+  int64_t ReadLiteral(int num_bits) override;  // f(n) in the spec.
+  bool ReadInverseSignedLiteral(int num_bits,
+                                int* value);  // su(1+num_bits) in the spec.
+  bool ReadLittleEndian(int num_bytes,
+                        size_t* value);    // le(n) in the spec.
+  bool ReadUnsignedLeb128(size_t* value);  // leb128() in the spec.
+  // Reads a variable length unsigned number and stores it in |*value|. On a
+  // successful return, |*value| is in the range of 0 to UINT32_MAX − 1,
+  // inclusive.
+  bool ReadUvlc(uint32_t* value);  // uvlc() in the spec.
+  bool Finished() const;
+  size_t bit_offset() const { return bit_offset_; }
+  // Return the bytes consumed so far (rounded up).
+  size_t byte_offset() const { return (bit_offset() + 7) >> 3; }
+  size_t size() const { return size_; }
+  // Move to the next byte boundary if not already at one. Return false if any
+  // of the bits being skipped over is non-zero. Return true otherwise. If this
+  // function returns false, the reader is left in an undefined state and must
+  // not be used further. section 5.3.5.
+  bool AlignToNextByte();
+  // Make sure that the trailing bits structure is as expected and skip over it.
+  // section 5.3.4.
+  bool VerifyAndSkipTrailingBits(size_t num_bits);
+  // Skip |num_bytes| bytes. This only works if the current position is at a
+  // byte boundary. The function returns false if the current position is not at
+  // a byte boundary or if skipping |num_bytes| causes the reader to run out of
+  // buffer. Returns true otherwise.
+  bool SkipBytes(size_t num_bytes);
+  // Skip |num_bits| bits. The function returns false if skipping |num_bits|
+  // causes the reader to run out of buffer. Returns true otherwise.
+  bool SkipBits(size_t num_bits);
+
+ private:
+  // Returns true if it is safe to read a literal of size |num_bits|.
+  bool CanReadLiteral(size_t num_bits) const;
+  int ReadBitImpl();
+
+  const uint8_t* const data_;
+  size_t bit_offset_;
+  const size_t size_;
+};
+
+}  // namespace libgav1
+
+#endif  // LIBGAV1_SRC_UTILS_RAW_BIT_READER_H_
diff --git a/src/utils/reference_info.h b/src/utils/reference_info.h
new file mode 100644
index 0000000..a660791
--- /dev/null
+++ b/src/utils/reference_info.h
@@ -0,0 +1,92 @@
+/*
+ * Copyright 2020 The libgav1 Authors
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LIBGAV1_SRC_UTILS_REFERENCE_INFO_H_
+#define LIBGAV1_SRC_UTILS_REFERENCE_INFO_H_
+
+#include <array>
+#include <cstdint>
+
+#include "src/utils/array_2d.h"
+#include "src/utils/constants.h"
+#include "src/utils/types.h"
+
+namespace libgav1 {
+
+// This struct collects some members related to reference frames in one place to
+// make it easier to pass them as parameters to some dsp functions.
+struct ReferenceInfo {
+  // Initialize |motion_field_reference_frame| so that
+  // Tile::StoreMotionFieldMvsIntoCurrentFrame() can skip some updates when
+  // the updates are the same as the initialized value.
+  // Set to kReferenceFrameIntra instead of kReferenceFrameNone to simplify
+  // branch conditions in motion field projection.
+  // The following memory initialization of contiguous memory is very fast. It
+  // is not recommended to make the initialization multi-threaded, unless the
+  // memory which needs to be initialized in each thread is still contiguous.
+  LIBGAV1_MUST_USE_RESULT bool Reset(int rows, int columns) {
+    return motion_field_reference_frame.Reset(rows, columns,
+                                              /*zero_initialize=*/true) &&
+           motion_field_mv.Reset(
+               rows, columns,
+#if LIBGAV1_MSAN
+               // It is set in Tile::StoreMotionFieldMvsIntoCurrentFrame() only
+               // for qualified blocks. In MotionFieldProjectionKernel() dsp
+               // optimizations, it is read no matter it was set or not.
+               /*zero_initialize=*/true
+#else
+               /*zero_initialize=*/false
+#endif
+           );
+  }
+
+  // All members are used by inter frames only.
+  // For intra frames, they are not initialized.
+
+  std::array<uint8_t, kNumReferenceFrameTypes> order_hint;
+
+  // An example when |relative_distance_from| does not equal
+  // -|relative_distance_to|:
+  // |relative_distance_from| = GetRelativeDistance(7, 71, 25) = -64
+  // -|relative_distance_to| = -GetRelativeDistance(71, 7, 25) = 64
+  // This is why we need both |relative_distance_from| and
+  // |relative_distance_to|.
+  // |relative_distance_from|: Relative distances from reference frames to this
+  // frame.
+  std::array<int8_t, kNumReferenceFrameTypes> relative_distance_from;
+  // |relative_distance_to|: Relative distances to reference frames.
+  std::array<int8_t, kNumReferenceFrameTypes> relative_distance_to;
+
+  // Skip motion field projection of specific types of frames if their
+  // |relative_distance_to| is negative or too large.
+  std::array<bool, kNumReferenceFrameTypes> skip_references;
+  // Lookup table to get motion field projection division multiplier of specific
+  // types of frames. Derived from kProjectionMvDivisionLookup.
+  std::array<int16_t, kNumReferenceFrameTypes> projection_divisions;
+
+  // The current frame's |motion_field_reference_frame| and |motion_field_mv_|
+  // are guaranteed to be allocated only when refresh_frame_flags is not 0.
+  // Array of size (rows4x4 / 2) x (columns4x4 / 2). Entry at i, j corresponds
+  // to MfRefFrames[i * 2 + 1][j * 2 + 1] in the spec.
+  Array2D<ReferenceFrameType> motion_field_reference_frame;
+  // Array of size (rows4x4 / 2) x (columns4x4 / 2). Entry at i, j corresponds
+  // to MfMvs[i * 2 + 1][j * 2 + 1] in the spec.
+  Array2D<MotionVector> motion_field_mv;
+};
+
+}  // namespace libgav1
+
+#endif  // LIBGAV1_SRC_UTILS_REFERENCE_INFO_H_
diff --git a/src/utils/segmentation.cc b/src/utils/segmentation.cc
new file mode 100644
index 0000000..75fa776
--- /dev/null
+++ b/src/utils/segmentation.cc
@@ -0,0 +1,31 @@
+// Copyright 2019 The libgav1 Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "src/utils/segmentation.h"
+
+namespace libgav1 {
+
+const int8_t kSegmentationFeatureBits[kSegmentFeatureMax] = {8, 6, 6, 6,
+                                                             6, 3, 0, 0};
+const int kSegmentationFeatureMaxValues[kSegmentFeatureMax] = {
+    255,
+    kMaxLoopFilterValue,
+    kMaxLoopFilterValue,
+    kMaxLoopFilterValue,
+    kMaxLoopFilterValue,
+    7,
+    0,
+    0};
+
+}  // namespace libgav1
diff --git a/src/utils/segmentation.h b/src/utils/segmentation.h
new file mode 100644
index 0000000..67ff74c
--- /dev/null
+++ b/src/utils/segmentation.h
@@ -0,0 +1,32 @@
+/*
+ * Copyright 2019 The libgav1 Authors
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LIBGAV1_SRC_UTILS_SEGMENTATION_H_
+#define LIBGAV1_SRC_UTILS_SEGMENTATION_H_
+
+#include <cstdint>
+
+#include "src/utils/constants.h"
+#include "src/utils/types.h"
+
+namespace libgav1 {
+
+extern const int8_t kSegmentationFeatureBits[kSegmentFeatureMax];
+extern const int kSegmentationFeatureMaxValues[kSegmentFeatureMax];
+
+}  // namespace libgav1
+
+#endif  // LIBGAV1_SRC_UTILS_SEGMENTATION_H_
diff --git a/src/utils/segmentation_map.cc b/src/utils/segmentation_map.cc
new file mode 100644
index 0000000..4284ca2
--- /dev/null
+++ b/src/utils/segmentation_map.cc
@@ -0,0 +1,49 @@
+// Copyright 2019 The libgav1 Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "src/utils/segmentation_map.h"
+
+#include <cassert>
+#include <cstring>
+#include <new>
+
+namespace libgav1 {
+
+bool SegmentationMap::Allocate(int32_t rows4x4, int32_t columns4x4) {
+  rows4x4_ = rows4x4;
+  columns4x4_ = columns4x4;
+  segment_id_buffer_.reset(new (std::nothrow) int8_t[rows4x4_ * columns4x4_]);
+  if (segment_id_buffer_ == nullptr) return false;
+  segment_id_.Reset(rows4x4_, columns4x4_, segment_id_buffer_.get());
+  return true;
+}
+
+void SegmentationMap::Clear() {
+  memset(segment_id_buffer_.get(), 0, rows4x4_ * columns4x4_);
+}
+
+void SegmentationMap::CopyFrom(const SegmentationMap& from) {
+  assert(rows4x4_ == from.rows4x4_ && columns4x4_ == from.columns4x4_);
+  memcpy(segment_id_buffer_.get(), from.segment_id_buffer_.get(),
+         rows4x4_ * columns4x4_);
+}
+
+void SegmentationMap::FillBlock(int row4x4, int column4x4, int block_width4x4,
+                                int block_height4x4, int8_t segment_id) {
+  for (int y = 0; y < block_height4x4; ++y) {
+    memset(&segment_id_[row4x4 + y][column4x4], segment_id, block_width4x4);
+  }
+}
+
+}  // namespace libgav1
diff --git a/src/utils/segmentation_map.h b/src/utils/segmentation_map.h
new file mode 100644
index 0000000..499be24
--- /dev/null
+++ b/src/utils/segmentation_map.h
@@ -0,0 +1,71 @@
+/*
+ * Copyright 2019 The libgav1 Authors
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LIBGAV1_SRC_UTILS_SEGMENTATION_MAP_H_
+#define LIBGAV1_SRC_UTILS_SEGMENTATION_MAP_H_
+
+#include <cstdint>
+#include <memory>
+
+#include "src/utils/array_2d.h"
+#include "src/utils/compiler_attributes.h"
+
+namespace libgav1 {
+
+// SegmentationMap stores the segment id associated with each 4x4 block in the
+// frame.
+class SegmentationMap {
+ public:
+  SegmentationMap() = default;
+
+  // Not copyable or movable
+  SegmentationMap(const SegmentationMap&) = delete;
+  SegmentationMap& operator=(const SegmentationMap&) = delete;
+
+  // Allocates an internal buffer of the given dimensions to hold the
+  // segmentation map. The memory in the buffer is not initialized. Returns
+  // true on success, false on failure (for example, out of memory).
+  LIBGAV1_MUST_USE_RESULT bool Allocate(int32_t rows4x4, int32_t columns4x4);
+
+  int8_t segment_id(int row4x4, int column4x4) const {
+    return segment_id_[row4x4][column4x4];
+  }
+
+  // Sets every element in the segmentation map to 0.
+  void Clear();
+
+  // Copies the entire segmentation map. |from| must be of the same dimensions.
+  void CopyFrom(const SegmentationMap& from);
+
+  // Sets the region of segmentation map covered by the block to |segment_id|.
+  // The block is located at |row4x4|, |column4x4| and has dimensions
+  // |block_width4x4| and |block_height4x4|.
+  void FillBlock(int row4x4, int column4x4, int block_width4x4,
+                 int block_height4x4, int8_t segment_id);
+
+ private:
+  int32_t rows4x4_ = 0;
+  int32_t columns4x4_ = 0;
+
+  // segment_id_ is a rows4x4_ by columns4x4_ 2D array. The underlying data
+  // buffer is dynamically allocated and owned by segment_id_buffer_.
+  std::unique_ptr<int8_t[]> segment_id_buffer_;
+  Array2DView<int8_t> segment_id_;
+};
+
+}  // namespace libgav1
+
+#endif  // LIBGAV1_SRC_UTILS_SEGMENTATION_MAP_H_
diff --git a/src/utils/stack.h b/src/utils/stack.h
new file mode 100644
index 0000000..39133b9
--- /dev/null
+++ b/src/utils/stack.h
@@ -0,0 +1,59 @@
+/*
+ * Copyright 2019 The libgav1 Authors
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LIBGAV1_SRC_UTILS_STACK_H_
+#define LIBGAV1_SRC_UTILS_STACK_H_
+
+#include <cassert>
+#include <utility>
+
+namespace libgav1 {
+
+// A LIFO stack of a fixed capacity. The elements are moved using std::move, so
+// the element type T has to be movable.
+//
+// WARNING: No error checking is performed.
+template <typename T, int capacity>
+class Stack {
+ public:
+  // Pushes the element |value| to the top of the stack. It is an error to call
+  // Push() when the stack is full.
+  void Push(T value) {
+    ++top_;
+    assert(top_ < capacity);
+    elements_[top_] = std::move(value);
+  }
+
+  // Returns the element at the top of the stack and removes it from the stack.
+  // It is an error to call Pop() when the stack is empty.
+  T Pop() {
+    assert(top_ >= 0);
+    return std::move(elements_[top_--]);
+  }
+
+  // Returns true if the stack is empty.
+  bool Empty() const { return top_ < 0; }
+
+ private:
+  static_assert(capacity > 0, "");
+  T elements_[capacity];
+  // The array index of the top of the stack. The stack is empty if top_ is -1.
+  int top_ = -1;
+};
+
+}  // namespace libgav1
+
+#endif  // LIBGAV1_SRC_UTILS_STACK_H_
diff --git a/src/utils/threadpool.cc b/src/utils/threadpool.cc
new file mode 100644
index 0000000..8c8f4fe
--- /dev/null
+++ b/src/utils/threadpool.cc
@@ -0,0 +1,323 @@
+// Copyright 2019 The libgav1 Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "src/utils/threadpool.h"
+
+#if defined(_MSC_VER)
+#include <process.h>
+#include <windows.h>
+#else  // defined(_MSC_VER)
+#include <pthread.h>
+#endif  // defined(_MSC_VER)
+#if defined(__ANDROID__) || defined(__GLIBC__)
+#include <sys/types.h>
+#include <unistd.h>
+#endif
+#include <algorithm>
+#include <cassert>
+#include <cinttypes>
+#include <cstddef>
+#include <cstdint>
+#include <cstring>
+#include <new>
+#include <utility>
+
+#if defined(__ANDROID__)
+#include <chrono>  // NOLINT (unapproved c++11 header)
+#endif
+
+// The glibc wrapper for the gettid() system call was added in glibc 2.30.
+// Emulate it for older versions of glibc.
+#if defined(__GLIBC_PREREQ)
+#if !__GLIBC_PREREQ(2, 30)
+
+#include <sys/syscall.h>
+
+static pid_t gettid() { return static_cast<pid_t>(syscall(SYS_gettid)); }
+
+#endif
+#endif  // defined(__GLIBC_PREREQ)
+
+namespace libgav1 {
+
+#if defined(__ANDROID__)
+namespace {
+
+using Clock = std::chrono::steady_clock;
+using Duration = Clock::duration;
+constexpr Duration kBusyWaitDuration =
+    std::chrono::duration_cast<Duration>(std::chrono::duration<double>(2e-3));
+
+}  // namespace
+#endif  // defined(__ANDROID__)
+
+// static
+std::unique_ptr<ThreadPool> ThreadPool::Create(int num_threads) {
+  return Create(/*name_prefix=*/"", num_threads);
+}
+
+// static
+std::unique_ptr<ThreadPool> ThreadPool::Create(const char name_prefix[],
+                                               int num_threads) {
+  if (name_prefix == nullptr || num_threads <= 0) return nullptr;
+  std::unique_ptr<WorkerThread*[]> threads(new (std::nothrow)
+                                               WorkerThread*[num_threads]);
+  if (threads == nullptr) return nullptr;
+  std::unique_ptr<ThreadPool> pool(new (std::nothrow) ThreadPool(
+      name_prefix, std::move(threads), num_threads));
+  if (pool != nullptr && !pool->StartWorkers()) {
+    pool = nullptr;
+  }
+  return pool;
+}
+
+ThreadPool::ThreadPool(const char name_prefix[],
+                       std::unique_ptr<WorkerThread*[]> threads,
+                       int num_threads)
+    : threads_(std::move(threads)), num_threads_(num_threads) {
+  threads_[0] = nullptr;
+  assert(name_prefix != nullptr);
+  const size_t name_prefix_len =
+      std::min(strlen(name_prefix), sizeof(name_prefix_) - 1);
+  memcpy(name_prefix_, name_prefix, name_prefix_len);
+  name_prefix_[name_prefix_len] = '\0';
+}
+
+ThreadPool::~ThreadPool() { Shutdown(); }
+
+void ThreadPool::Schedule(std::function<void()> closure) {
+  LockMutex();
+  if (!queue_.GrowIfNeeded()) {
+    // queue_ is full and we can't grow it. Run |closure| directly.
+    UnlockMutex();
+    closure();
+    return;
+  }
+  queue_.Push(std::move(closure));
+  UnlockMutex();
+  SignalOne();
+}
+
+int ThreadPool::num_threads() const { return num_threads_; }
+
+// A simple implementation that mirrors the non-portable Thread.  We may
+// choose to expand this in the future as a portable implementation of
+// Thread, or replace it at such a time as one is implemented.
+class ThreadPool::WorkerThread : public Allocable {
+ public:
+  // Creates and starts a thread that runs pool->WorkerFunction().
+  explicit WorkerThread(ThreadPool* pool);
+
+  // Not copyable or movable.
+  WorkerThread(const WorkerThread&) = delete;
+  WorkerThread& operator=(const WorkerThread&) = delete;
+
+  // REQUIRES: Join() must have been called if Start() was called and
+  // succeeded.
+  ~WorkerThread() = default;
+
+  LIBGAV1_MUST_USE_RESULT bool Start();
+
+  // Joins with the running thread.
+  void Join();
+
+ private:
+#if defined(_MSC_VER)
+  static unsigned int __stdcall ThreadBody(void* arg);
+#else
+  static void* ThreadBody(void* arg);
+#endif
+
+  void SetupName();
+  void Run();
+
+  ThreadPool* pool_;
+#if defined(_MSC_VER)
+  HANDLE handle_;
+#else
+  pthread_t thread_;
+#endif
+};
+
+ThreadPool::WorkerThread::WorkerThread(ThreadPool* pool) : pool_(pool) {}
+
+#if defined(_MSC_VER)
+
+bool ThreadPool::WorkerThread::Start() {
+  // Since our code calls the C run-time library (CRT), use _beginthreadex
+  // rather than CreateThread. Microsoft documentation says "If a thread
+  // created using CreateThread calls the CRT, the CRT may terminate the
+  // process in low-memory conditions."
+  uintptr_t handle = _beginthreadex(
+      /*security=*/nullptr, /*stack_size=*/0, ThreadBody, this,
+      /*initflag=*/CREATE_SUSPENDED, /*thrdaddr=*/nullptr);
+  if (handle == 0) return false;
+  handle_ = reinterpret_cast<HANDLE>(handle);
+  ResumeThread(handle_);
+  return true;
+}
+
+void ThreadPool::WorkerThread::Join() {
+  WaitForSingleObject(handle_, INFINITE);
+  CloseHandle(handle_);
+}
+
+unsigned int ThreadPool::WorkerThread::ThreadBody(void* arg) {
+  auto* thread = static_cast<WorkerThread*>(arg);
+  thread->Run();
+  return 0;
+}
+
+void ThreadPool::WorkerThread::SetupName() {
+  // Not currently supported on Windows.
+}
+
+#else  // defined(_MSC_VER)
+
+bool ThreadPool::WorkerThread::Start() {
+  return pthread_create(&thread_, nullptr, ThreadBody, this) == 0;
+}
+
+void ThreadPool::WorkerThread::Join() { pthread_join(thread_, nullptr); }
+
+void* ThreadPool::WorkerThread::ThreadBody(void* arg) {
+  auto* thread = static_cast<WorkerThread*>(arg);
+  thread->Run();
+  return nullptr;
+}
+
+void ThreadPool::WorkerThread::SetupName() {
+  if (pool_->name_prefix_[0] != '\0') {
+#if defined(__APPLE__)
+    // Apple's version of pthread_setname_np takes one argument and operates on
+    // the current thread only. Also, pthread_mach_thread_np is Apple-specific.
+    // The maximum size of the |name| buffer was noted in the Chromium source
+    // code and was confirmed by experiments.
+    char name[64];
+    mach_port_t id = pthread_mach_thread_np(pthread_self());
+    int rv = snprintf(name, sizeof(name), "%s/%" PRId64, pool_->name_prefix_,
+                      static_cast<int64_t>(id));
+    assert(rv >= 0);
+    rv = pthread_setname_np(name);
+    assert(rv == 0);
+    static_cast<void>(rv);
+#elif defined(__ANDROID__) || defined(__GLIBC__)
+    // If the |name| buffer is longer than 16 bytes, pthread_setname_np fails
+    // with error 34 (ERANGE) on Android.
+    char name[16];
+    pid_t id = gettid();
+    int rv = snprintf(name, sizeof(name), "%s/%" PRId64, pool_->name_prefix_,
+                      static_cast<int64_t>(id));
+    assert(rv >= 0);
+    rv = pthread_setname_np(pthread_self(), name);
+    assert(rv == 0);
+    static_cast<void>(rv);
+#endif
+  }
+}
+
+#endif  // defined(_MSC_VER)
+
+void ThreadPool::WorkerThread::Run() {
+  SetupName();
+  pool_->WorkerFunction();
+}
+
+bool ThreadPool::StartWorkers() {
+  if (!queue_.Init()) return false;
+  for (int i = 0; i < num_threads_; ++i) {
+    threads_[i] = new (std::nothrow) WorkerThread(this);
+    if (threads_[i] == nullptr) return false;
+    if (!threads_[i]->Start()) {
+      delete threads_[i];
+      threads_[i] = nullptr;
+      return false;
+    }
+  }
+  return true;
+}
+
+void ThreadPool::WorkerFunction() {
+  LockMutex();
+  while (true) {
+    if (queue_.Empty()) {
+      if (exit_threads_) {
+        break;  // Queue is empty and exit was requested.
+      }
+#if defined(__ANDROID__)
+      // On android, if we go to a conditional wait right away, the CPU governor
+      // kicks in and starts shutting the cores down. So we do a very small busy
+      // wait to see if we get our next job within that period. This
+      // significantly improves the performance of common cases of tile parallel
+      // decoding. If we don't receive a job in the busy wait time, we then go
+      // to an actual conditional wait as usual.
+      UnlockMutex();
+      bool found_job = false;
+      const auto wait_start = Clock::now();
+      while (Clock::now() - wait_start < kBusyWaitDuration) {
+        LockMutex();
+        if (!queue_.Empty()) {
+          found_job = true;
+          break;
+        }
+        UnlockMutex();
+      }
+      // If |found_job| is true, we simply continue since we already hold the
+      // mutex and we know for sure that the |queue_| is not empty.
+      if (found_job) continue;
+      // Since |found_job_| was false, the mutex is not being held at this
+      // point.
+      LockMutex();
+      // Ensure that the queue is still empty.
+      if (!queue_.Empty()) continue;
+      if (exit_threads_) {
+        break;  // Queue is empty and exit was requested.
+      }
+#endif  // defined(__ANDROID__)
+      // Queue is still empty, wait for signal or broadcast.
+      Wait();
+    } else {
+      // Take a job from the queue.
+      std::function<void()> job = std::move(queue_.Front());
+      queue_.Pop();
+
+      UnlockMutex();
+      // Note that it is good practice to surround this with a try/catch so
+      // the thread pool doesn't go to hell if the job throws an exception.
+      // This is omitted here because Google3 doesn't like exceptions.
+      std::move(job)();
+      job = nullptr;
+
+      LockMutex();
+    }
+  }
+  UnlockMutex();
+}
+
+void ThreadPool::Shutdown() {
+  // Tell worker threads how to exit.
+  LockMutex();
+  exit_threads_ = true;
+  UnlockMutex();
+  SignalAll();
+
+  // Join all workers. This will block.
+  for (int i = 0; i < num_threads_; ++i) {
+    if (threads_[i] == nullptr) break;
+    threads_[i]->Join();
+    delete threads_[i];
+  }
+}
+
+}  // namespace libgav1
diff --git a/src/utils/threadpool.h b/src/utils/threadpool.h
new file mode 100644
index 0000000..fac875e
--- /dev/null
+++ b/src/utils/threadpool.h
@@ -0,0 +1,167 @@
+/*
+ * Copyright 2019 The libgav1 Authors
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LIBGAV1_SRC_UTILS_THREADPOOL_H_
+#define LIBGAV1_SRC_UTILS_THREADPOOL_H_
+
+#include <functional>
+#include <memory>
+
+#if defined(__APPLE__)
+#include <TargetConditionals.h>
+#endif
+
+#if !defined(LIBGAV1_THREADPOOL_USE_STD_MUTEX)
+#if defined(__ANDROID__) || (defined(TARGET_OS_IPHONE) && TARGET_OS_IPHONE)
+#define LIBGAV1_THREADPOOL_USE_STD_MUTEX 1
+#else
+#define LIBGAV1_THREADPOOL_USE_STD_MUTEX 0
+#endif
+#endif
+
+#if LIBGAV1_THREADPOOL_USE_STD_MUTEX
+#include <condition_variable>  // NOLINT (unapproved c++11 header)
+#include <mutex>               // NOLINT (unapproved c++11 header)
+#else
+// absl::Mutex & absl::CondVar are significantly faster than the pthread
+// variants on platforms other than Android. iOS may deadlock on Shutdown()
+// using absl, see b/142251739.
+#include "absl/base/thread_annotations.h"
+#include "absl/synchronization/mutex.h"
+#endif
+
+#include "src/utils/compiler_attributes.h"
+#include "src/utils/executor.h"
+#include "src/utils/memory.h"
+#include "src/utils/unbounded_queue.h"
+
+namespace libgav1 {
+
+// An implementation of ThreadPool using POSIX threads (pthreads) or Windows
+// threads.
+//
+// - The pool allocates a fixed number of worker threads on instantiation.
+// - The worker threads will pick up work jobs as they arrive.
+// - If all workers are busy, work jobs are queued for later execution.
+//
+// The thread pool is shut down when the pool is destroyed.
+//
+// Example usage of the thread pool:
+//   {
+//     std::unique_ptr<ThreadPool> pool = ThreadPool::Create(4);
+//     for (int i = 0; i < 100; ++i) {  // Dispatch 100 jobs.
+//       pool->Schedule([&my_data]() { MyFunction(&my_data); });
+//     }
+//   } // ThreadPool gets destroyed only when all jobs are done.
+class ThreadPool : public Executor, public Allocable {
+ public:
+  // Creates the thread pool with the specified number of worker threads.
+  // If num_threads is 1, the closures are run in FIFO order.
+  static std::unique_ptr<ThreadPool> Create(int num_threads);
+
+  // Like the above factory method, but also sets the name prefix for threads.
+  static std::unique_ptr<ThreadPool> Create(const char name_prefix[],
+                                            int num_threads);
+
+  // The destructor will shut down the thread pool and all jobs are executed.
+  // Note that after shutdown, the thread pool does not accept further jobs.
+  ~ThreadPool() override;
+
+  // Adds the specified "closure" to the queue for processing. If worker threads
+  // are available, "closure" will run immediately. Otherwise "closure" is
+  // queued for later execution.
+  //
+  // NOTE: If the internal queue is full and cannot be resized because of an
+  // out-of-memory error, the current thread runs "closure" before returning
+  // from Schedule(). For our use cases, this seems better than the
+  // alternatives:
+  //   1. Return a failure status.
+  //   2. Have the current thread wait until the queue is not full.
+  void Schedule(std::function<void()> closure) override;
+
+  int num_threads() const;
+
+ private:
+  class WorkerThread;
+
+  // Creates the thread pool with the specified number of worker threads.
+  // If num_threads is 1, the closures are run in FIFO order.
+  ThreadPool(const char name_prefix[], std::unique_ptr<WorkerThread*[]> threads,
+             int num_threads);
+
+  // Starts the worker pool.
+  LIBGAV1_MUST_USE_RESULT bool StartWorkers();
+
+  void WorkerFunction();
+
+  // Shuts down the thread pool, i.e. worker threads finish their work and
+  // pick up new jobs until the queue is empty. This call will block until
+  // the shutdown is complete.
+  //
+  // Note: If a worker encounters an empty queue after this call, it will exit.
+  // Other workers might still be running, and if the queue fills up again, the
+  // thread pool will continue to operate with a decreased number of workers.
+  // It is up to the caller to prevent adding new jobs.
+  void Shutdown();
+
+#if LIBGAV1_THREADPOOL_USE_STD_MUTEX
+
+  void LockMutex() { queue_mutex_.lock(); }
+  void UnlockMutex() { queue_mutex_.unlock(); }
+
+  void Wait() {
+    std::unique_lock<std::mutex> queue_lock(queue_mutex_, std::adopt_lock);
+    condition_.wait(queue_lock);
+    queue_lock.release();
+  }
+
+  void SignalOne() { condition_.notify_one(); }
+  void SignalAll() { condition_.notify_all(); }
+
+  std::condition_variable condition_;
+  std::mutex queue_mutex_;
+
+#else  // !LIBGAV1_THREADPOOL_USE_STD_MUTEX
+
+  void LockMutex() ABSL_EXCLUSIVE_LOCK_FUNCTION() { queue_mutex_.Lock(); }
+  void UnlockMutex() ABSL_UNLOCK_FUNCTION() { queue_mutex_.Unlock(); }
+  void Wait() { condition_.Wait(&queue_mutex_); }
+  void SignalOne() { condition_.Signal(); }
+  void SignalAll() { condition_.SignalAll(); }
+
+  absl::CondVar condition_;
+  absl::Mutex queue_mutex_;
+
+#endif  // LIBGAV1_THREADPOOL_USE_STD_MUTEX
+
+  UnboundedQueue<std::function<void()>> queue_ LIBGAV1_GUARDED_BY(queue_mutex_);
+  // If not all the worker threads are created, the first entry after the
+  // created worker threads is a null pointer.
+  const std::unique_ptr<WorkerThread*[]> threads_;
+
+  bool exit_threads_ LIBGAV1_GUARDED_BY(queue_mutex_) = false;
+  const int num_threads_ = 0;
+  // name_prefix_ is a C string, whose length is restricted to 16 characters,
+  // including the terminating null byte ('\0'). This restriction comes from
+  // the Linux pthread_setname_np() function.
+  char name_prefix_[16];
+};
+
+}  // namespace libgav1
+
+#undef LIBGAV1_THREADPOOL_USE_STD_MUTEX
+
+#endif  // LIBGAV1_SRC_UTILS_THREADPOOL_H_
diff --git a/src/utils/types.h b/src/utils/types.h
new file mode 100644
index 0000000..374f06b
--- /dev/null
+++ b/src/utils/types.h
@@ -0,0 +1,525 @@
+/*
+ * Copyright 2019 The libgav1 Authors
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LIBGAV1_SRC_UTILS_TYPES_H_
+#define LIBGAV1_SRC_UTILS_TYPES_H_
+
+#include <array>
+#include <cstdint>
+#include <memory>
+
+#include "src/utils/array_2d.h"
+#include "src/utils/constants.h"
+#include "src/utils/memory.h"
+
+namespace libgav1 {
+
+struct MotionVector : public Allocable {
+  static constexpr int kRow = 0;
+  static constexpr int kColumn = 1;
+
+  MotionVector() = default;
+  MotionVector(const MotionVector& mv) = default;
+
+  MotionVector& operator=(const MotionVector& rhs) {
+    mv32 = rhs.mv32;
+    return *this;
+  }
+
+  bool operator==(const MotionVector& rhs) const { return mv32 == rhs.mv32; }
+
+  union {
+    // Motion vectors will always fit in int16_t and using int16_t here instead
+    // of int saves significant memory since some of the frame sized structures
+    // store motion vectors.
+    int16_t mv[2];
+    // A uint32_t view into the |mv| array. Useful for cases where both the
+    // motion vectors have to be copied or compared with a single 32 bit
+    // instruction.
+    uint32_t mv32;
+  };
+};
+
+union CompoundMotionVector {
+  CompoundMotionVector() = default;
+  CompoundMotionVector(const CompoundMotionVector& mv) = default;
+
+  CompoundMotionVector& operator=(const CompoundMotionVector& rhs) {
+    mv64 = rhs.mv64;
+    return *this;
+  }
+
+  bool operator==(const CompoundMotionVector& rhs) const {
+    return mv64 == rhs.mv64;
+  }
+
+  MotionVector mv[2];
+  // A uint64_t view into the |mv| array. Useful for cases where all the motion
+  // vectors have to be copied or compared with a single 64 bit instruction.
+  uint64_t mv64;
+};
+
+// Stores the motion information used for motion field estimation.
+struct TemporalMotionField : public Allocable {
+  Array2D<MotionVector> mv;
+  Array2D<int8_t> reference_offset;
+};
+
+// MvContexts contains the contexts used to decode portions of an inter block
+// mode info to set the y_mode field in BlockParameters.
+//
+// The contexts in the struct correspond to the ZeroMvContext, RefMvContext,
+// and NewMvContext variables in the spec.
+struct MvContexts {
+  int zero_mv;
+  int reference_mv;
+  int new_mv;
+};
+
+struct PaletteModeInfo {
+  uint8_t size[kNumPlaneTypes];
+  uint16_t color[kMaxPlanes][kMaxPaletteSize];
+};
+
+// Stores the parameters used by the prediction process. The members of the
+// struct are filled in when parsing the bitstream and used when the prediction
+// is computed. The information in this struct is associated with a single
+// block.
+// While both BlockParameters and PredictionParameters store information
+// pertaining to a Block, the only difference is that BlockParameters outlives
+// the block itself (for example, some of the variables in BlockParameters are
+// used to compute the context for reading elements in the subsequent blocks).
+struct PredictionParameters : public Allocable {
+  // Restore the index in the unsorted mv stack from the least 3 bits of sorted
+  // |weight_index_stack|.
+  const MotionVector& reference_mv(int stack_index) const {
+    return ref_mv_stack[7 - (weight_index_stack[stack_index] & 7)];
+  }
+  const MotionVector& reference_mv(int stack_index, int mv_index) const {
+    return compound_ref_mv_stack[7 - (weight_index_stack[stack_index] & 7)]
+        .mv[mv_index];
+  }
+
+  void IncreaseWeight(ptrdiff_t index, int weight) {
+    weight_index_stack[index] += weight << 3;
+  }
+
+  void SetWeightIndexStackEntry(int index, int weight) {
+    weight_index_stack[index] = (weight << 3) + 7 - index;
+  }
+
+  bool use_filter_intra;
+  FilterIntraPredictor filter_intra_mode;
+  int angle_delta[kNumPlaneTypes];
+  int8_t cfl_alpha_u;
+  int8_t cfl_alpha_v;
+  int max_luma_width;
+  int max_luma_height;
+  Array2D<uint8_t> color_index_map[kNumPlaneTypes];
+  bool use_intra_block_copy;
+  InterIntraMode inter_intra_mode;
+  bool is_wedge_inter_intra;
+  int wedge_index;
+  int wedge_sign;
+  bool mask_is_inverse;
+  MotionMode motion_mode;
+  CompoundPredictionType compound_prediction_type;
+  union {
+    // |ref_mv_stack| and |compound_ref_mv_stack| are not sorted after
+    // construction. reference_mv() must be called to get the correct element.
+    MotionVector ref_mv_stack[kMaxRefMvStackSize];
+    CompoundMotionVector compound_ref_mv_stack[kMaxRefMvStackSize];
+  };
+  // The least 3 bits of |weight_index_stack| store the index information, and
+  // the other bits store the weight. The index information is actually 7 -
+  // index to make the descending order sort stable (preserves the original
+  // order for elements with the same weight). Sorting an int16_t array is much
+  // faster than sorting a struct array with weight and index stored separately.
+  int16_t weight_index_stack[kMaxRefMvStackSize];
+  // In the spec, the weights of all the nearest mvs are incremented by a bonus
+  // weight which is larger than any natural weight, and later the weights of
+  // the mvs are compared with this bonus weight to determine their contexts. We
+  // replace this procedure by introducing |nearest_mv_count|, which records the
+  // count of the nearest mvs. Since all the nearest mvs are in the beginning of
+  // the mv stack, the index of a mv in the mv stack can be compared with
+  // |nearest_mv_count| to get that mv's context.
+  int nearest_mv_count;
+  int ref_mv_count;
+  int ref_mv_index;
+  MotionVector global_mv[2];
+  int num_warp_samples;
+  int warp_estimate_candidates[kMaxLeastSquaresSamples][4];
+};
+
+// A lot of BlockParameters objects are created, so the smallest type is used
+// for each field. The ranges of some fields are documented to justify why
+// their types are large enough.
+struct BlockParameters : public Allocable {
+  BlockSize size;
+  bool skip;
+  // True means that this block will use some default settings (that
+  // correspond to compound prediction) and so most of the mode info is
+  // skipped. False means that the mode info is not skipped.
+  bool skip_mode;
+  bool is_inter;
+  bool is_explicit_compound_type;  // comp_group_idx in the spec.
+  bool is_compound_type_average;   // compound_idx in the spec.
+  bool is_global_mv_block;
+  bool use_predicted_segment_id;  // only valid with temporal update enabled.
+  int8_t segment_id;              // segment_id is in the range [0, 7].
+  PredictionMode y_mode;
+  PredictionMode uv_mode;
+  TransformSize transform_size;
+  TransformSize uv_transform_size;
+  InterpolationFilter interpolation_filter[2];
+  ReferenceFrameType reference_frame[2];
+  // The index of this array is as follows:
+  //  0 - Y plane vertical filtering.
+  //  1 - Y plane horizontal filtering.
+  //  2 - U plane (both directions).
+  //  3 - V plane (both directions).
+  uint8_t deblock_filter_level[kFrameLfCount];
+  CompoundMotionVector mv;
+  PaletteModeInfo palette_mode_info;
+  // When |Tile::split_parse_and_decode_| is true, each block gets its own
+  // instance of |prediction_parameters|. When it is false, all the blocks point
+  // to |Tile::prediction_parameters_|. This field is valid only as long as the
+  // block is *being* decoded. The lifetime and usage of this field can be
+  // better understood by following its flow in tile.cc.
+  std::unique_ptr<PredictionParameters> prediction_parameters;
+};
+
+// A five dimensional array used to store the wedge masks. The dimensions are:
+//   - block_size_index (returned by GetWedgeBlockSizeIndex() in prediction.cc).
+//   - flip_sign (0 or 1).
+//   - wedge_index (0 to 15).
+//   - each of those three dimensions is a 2d array of block_width by
+//     block_height.
+using WedgeMaskArray =
+    std::array<std::array<std::array<Array2D<uint8_t>, 16>, 2>, 9>;
+
+enum GlobalMotionTransformationType : uint8_t {
+  kGlobalMotionTransformationTypeIdentity,
+  kGlobalMotionTransformationTypeTranslation,
+  kGlobalMotionTransformationTypeRotZoom,
+  kGlobalMotionTransformationTypeAffine,
+  kNumGlobalMotionTransformationTypes
+};
+
+// Global motion and warped motion parameters. See the paper for more info:
+// S. Parker, Y. Chen, D. Barker, P. de Rivaz, D. Mukherjee, "Global and locally
+// adaptive warped motion compensation in video compression", Proc. IEEE
+// International Conference on Image Processing (ICIP), pp. 275-279, Sep. 2017.
+struct GlobalMotion {
+  GlobalMotionTransformationType type;
+  int32_t params[6];
+
+  // Represent two shearing operations. Computed from |params| by SetupShear().
+  //
+  // The least significant six (= kWarpParamRoundingBits) bits are all zeros.
+  // (This means alpha, beta, gamma, and delta could be represented by a 10-bit
+  // signed integer.) The minimum value is INT16_MIN (= -32768) and the maximum
+  // value is 32704 = 0x7fc0, the largest int16_t value whose least significant
+  // six bits are all zeros.
+  //
+  // Valid warp parameters (as validated by SetupShear()) have smaller ranges.
+  // Their absolute values are less than 2^14 (= 16384). (This follows from
+  // the warpValid check at the end of Section 7.11.3.6.)
+  //
+  // NOTE: Section 7.11.3.6 of the spec allows a maximum value of 32768, which
+  // is outside the range of int16_t. When cast to int16_t, 32768 becomes
+  // -32768. This potential int16_t overflow does not matter because either
+  // 32768 or -32768 causes SetupShear() to return false,
+  int16_t alpha;
+  int16_t beta;
+  int16_t gamma;
+  int16_t delta;
+};
+
+// Loop filter parameters:
+//
+// If level[0] and level[1] are both equal to 0, the loop filter process is
+// not invoked.
+//
+// |sharpness| and |delta_enabled| are only used by the loop filter process.
+//
+// The |ref_deltas| and |mode_deltas| arrays are used not only by the loop
+// filter process but also by the reference frame update and loading
+// processes. The loop filter process uses |ref_deltas| and |mode_deltas| only
+// when |delta_enabled| is true.
+struct LoopFilter {
+  // Contains loop filter strength values in the range of [0, 63].
+  std::array<int8_t, kFrameLfCount> level;
+  // Indicates the sharpness level in the range of [0, 7].
+  int8_t sharpness;
+  // Whether the filter level depends on the mode and reference frame used to
+  // predict a block.
+  bool delta_enabled;
+  // Whether additional syntax elements were read that specify which mode and
+  // reference frame deltas are to be updated. loop_filter_delta_update field in
+  // Section 5.9.11 of the spec.
+  bool delta_update;
+  // Contains the adjustment needed for the filter level based on the chosen
+  // reference frame, in the range of [-64, 63].
+  std::array<int8_t, kNumReferenceFrameTypes> ref_deltas;
+  // Contains the adjustment needed for the filter level based on the chosen
+  // mode, in the range of [-64, 63].
+  std::array<int8_t, kLoopFilterMaxModeDeltas> mode_deltas;
+};
+
+struct Delta {
+  bool present;
+  uint8_t scale;
+  bool multi;
+};
+
+struct Cdef {
+  uint8_t damping;  // damping value from the spec + (bitdepth - 8).
+  uint8_t bits;
+  // All the strength values are the values from the spec and left shifted by
+  // (bitdepth - 8).
+  uint8_t y_primary_strength[kMaxCdefStrengths];
+  uint8_t y_secondary_strength[kMaxCdefStrengths];
+  uint8_t uv_primary_strength[kMaxCdefStrengths];
+  uint8_t uv_secondary_strength[kMaxCdefStrengths];
+};
+
+struct TileInfo {
+  bool uniform_spacing;
+  int sb_rows;
+  int sb_columns;
+  int tile_count;
+  int tile_columns_log2;
+  int tile_columns;
+  int tile_column_start[kMaxTileColumns + 1];
+  // This field is not used by libgav1, but is populated for use by some
+  // hardware decoders. So it must not be removed.
+  int tile_column_width_in_superblocks[kMaxTileColumns + 1];
+  int tile_rows_log2;
+  int tile_rows;
+  int tile_row_start[kMaxTileRows + 1];
+  // This field is not used by libgav1, but is populated for use by some
+  // hardware decoders. So it must not be removed.
+  int tile_row_height_in_superblocks[kMaxTileRows + 1];
+  int16_t context_update_id;
+  uint8_t tile_size_bytes;
+};
+
+struct LoopRestoration {
+  LoopRestorationType type[kMaxPlanes];
+  int unit_size_log2[kMaxPlanes];
+};
+
+// Stores the quantization parameters of Section 5.9.12.
+struct QuantizerParameters {
+  // base_index is in the range [0, 255].
+  uint8_t base_index;
+  int8_t delta_dc[kMaxPlanes];
+  // delta_ac[kPlaneY] is always 0.
+  int8_t delta_ac[kMaxPlanes];
+  bool use_matrix;
+  // The |matrix_level| array is used only when |use_matrix| is true.
+  // matrix_level[plane] specifies the level in the quantizer matrix that
+  // should be used for decoding |plane|. The quantizer matrix has 15 levels,
+  // from 0 to 14. The range of matrix_level[plane] is [0, 15]. If
+  // matrix_level[plane] is 15, the quantizer matrix is not used.
+  int8_t matrix_level[kMaxPlanes];
+};
+
+// The corresponding segment feature constants in the AV1 spec are named
+// SEG_LVL_xxx.
+enum SegmentFeature : uint8_t {
+  kSegmentFeatureQuantizer,
+  kSegmentFeatureLoopFilterYVertical,
+  kSegmentFeatureLoopFilterYHorizontal,
+  kSegmentFeatureLoopFilterU,
+  kSegmentFeatureLoopFilterV,
+  kSegmentFeatureReferenceFrame,
+  kSegmentFeatureSkip,
+  kSegmentFeatureGlobalMv,
+  kSegmentFeatureMax
+};
+
+struct Segmentation {
+  // 5.11.14.
+  // Returns true if the feature is enabled in the segment.
+  bool FeatureActive(int segment_id, SegmentFeature feature) const {
+    return enabled && segment_id < kMaxSegments &&
+           feature_enabled[segment_id][feature];
+  }
+
+  // Returns true if the feature is signed.
+  static bool FeatureSigned(SegmentFeature feature) {
+    // Only the first five segment features are signed, so this comparison
+    // suffices.
+    return feature <= kSegmentFeatureLoopFilterV;
+  }
+
+  bool enabled;
+  bool update_map;
+  bool update_data;
+  bool temporal_update;
+  // True if the segment id will be read before the skip syntax element. False
+  // if the skip syntax element will be read first.
+  bool segment_id_pre_skip;
+  // The highest numbered segment id that has some enabled feature. Used as
+  // the upper bound for decoding segment ids.
+  int8_t last_active_segment_id;
+
+  bool feature_enabled[kMaxSegments][kSegmentFeatureMax];
+  int16_t feature_data[kMaxSegments][kSegmentFeatureMax];
+  bool lossless[kMaxSegments];
+  // Cached values of get_qindex(1, segmentId), to be consumed by
+  // Tile::ReadTransformType(). The values are in the range [0, 255].
+  uint8_t qindex[kMaxSegments];
+};
+
+// Section 6.8.20.
+// Note: In spec, film grain section uses YCbCr to denote variable names,
+// such as num_cb_points, num_cr_points. To keep it consistent with other
+// parts of code, we use YUV, i.e., num_u_points, num_v_points, etc.
+struct FilmGrainParams {
+  bool apply_grain;
+  bool update_grain;
+  bool chroma_scaling_from_luma;
+  bool overlap_flag;
+  bool clip_to_restricted_range;
+
+  uint8_t num_y_points;  // [0, 14].
+  uint8_t num_u_points;  // [0, 10].
+  uint8_t num_v_points;  // [0, 10].
+  // Must be [0, 255]. 10/12 bit /= 4 or 16. Must be in increasing order.
+  uint8_t point_y_value[14];
+  uint8_t point_y_scaling[14];
+  uint8_t point_u_value[10];
+  uint8_t point_u_scaling[10];
+  uint8_t point_v_value[10];
+  uint8_t point_v_scaling[10];
+
+  uint8_t chroma_scaling;              // [8, 11].
+  uint8_t auto_regression_coeff_lag;   // [0, 3].
+  int8_t auto_regression_coeff_y[24];  // [-128, 127]
+  int8_t auto_regression_coeff_u[25];  // [-128, 127]
+  int8_t auto_regression_coeff_v[25];  // [-128, 127]
+  // Shift value: auto regression coeffs range
+  // 6: [-2, 2)
+  // 7: [-1, 1)
+  // 8: [-0.5, 0.5)
+  // 9: [-0.25, 0.25)
+  uint8_t auto_regression_shift;
+
+  uint16_t grain_seed;
+  int reference_index;
+  int grain_scale_shift;
+  // These multipliers are encoded as nonnegative values by adding 128 first.
+  // The 128 is subtracted during parsing.
+  int8_t u_multiplier;       // [-128, 127]
+  int8_t u_luma_multiplier;  // [-128, 127]
+  // These offsets are encoded as nonnegative values by adding 256 first. The
+  // 256 is subtracted during parsing.
+  int16_t u_offset;          // [-256, 255]
+  int8_t v_multiplier;       // [-128, 127]
+  int8_t v_luma_multiplier;  // [-128, 127]
+  int16_t v_offset;          // [-256, 255]
+};
+
+struct ObuFrameHeader {
+  uint16_t display_frame_id;
+  uint16_t current_frame_id;
+  int64_t frame_offset;
+  uint16_t expected_frame_id[kNumInterReferenceFrameTypes];
+  int32_t width;
+  int32_t height;
+  int32_t columns4x4;
+  int32_t rows4x4;
+  // The render size (render_width and render_height) is a hint to the
+  // application about the desired display size. It has no effect on the
+  // decoding process.
+  int32_t render_width;
+  int32_t render_height;
+  int32_t upscaled_width;
+  LoopRestoration loop_restoration;
+  uint32_t buffer_removal_time[kMaxOperatingPoints];
+  uint32_t frame_presentation_time;
+  // Note: global_motion[0] (for kReferenceFrameIntra) is not used.
+  std::array<GlobalMotion, kNumReferenceFrameTypes> global_motion;
+  TileInfo tile_info;
+  QuantizerParameters quantizer;
+  Segmentation segmentation;
+  bool show_existing_frame;
+  // frame_to_show is in the range [0, 7]. Only used if show_existing_frame is
+  // true.
+  int8_t frame_to_show;
+  FrameType frame_type;
+  bool show_frame;
+  bool showable_frame;
+  bool error_resilient_mode;
+  bool enable_cdf_update;
+  bool frame_size_override_flag;
+  // The order_hint syntax element in the uncompressed header. If
+  // show_existing_frame is false, the OrderHint variable in the spec is equal
+  // to this field, and so this field can be used in place of OrderHint when
+  // show_existing_frame is known to be false, such as during tile decoding.
+  uint8_t order_hint;
+  int8_t primary_reference_frame;
+  bool render_and_frame_size_different;
+  bool use_superres;
+  uint8_t superres_scale_denominator;
+  bool allow_screen_content_tools;
+  bool allow_intrabc;
+  bool frame_refs_short_signaling;
+  // A bitmask that specifies which reference frame slots will be updated with
+  // the current frame after it is decoded.
+  uint8_t refresh_frame_flags;
+  static_assert(sizeof(ObuFrameHeader::refresh_frame_flags) * 8 ==
+                    kNumReferenceFrameTypes,
+                "");
+  bool found_reference;
+  int8_t force_integer_mv;
+  bool allow_high_precision_mv;
+  InterpolationFilter interpolation_filter;
+  bool is_motion_mode_switchable;
+  bool use_ref_frame_mvs;
+  bool enable_frame_end_update_cdf;
+  // True if all segments are losslessly encoded at the coded resolution.
+  bool coded_lossless;
+  // True if all segments are losslessly encoded at the upscaled resolution.
+  bool upscaled_lossless;
+  TxMode tx_mode;
+  // True means that the mode info for inter blocks contains the syntax
+  // element comp_mode that indicates whether to use single or compound
+  // prediction. False means that all inter blocks will use single prediction.
+  bool reference_mode_select;
+  // The frames to use for compound prediction when skip_mode is true.
+  ReferenceFrameType skip_mode_frame[2];
+  bool skip_mode_present;
+  bool reduced_tx_set;
+  bool allow_warped_motion;
+  Delta delta_q;
+  Delta delta_lf;
+  // A valid value of reference_frame_index[i] is in the range [0, 7]. -1
+  // indicates an invalid value.
+  int8_t reference_frame_index[kNumInterReferenceFrameTypes];
+  // The ref_order_hint[ i ] syntax element in the uncompressed header.
+  // Specifies the expected output order hint for each reference frame.
+  uint8_t reference_order_hint[kNumReferenceFrameTypes];
+  LoopFilter loop_filter;
+  Cdef cdef;
+  FilmGrainParams film_grain_params;
+};
+
+}  // namespace libgav1
+#endif  // LIBGAV1_SRC_UTILS_TYPES_H_
diff --git a/src/utils/unbounded_queue.h b/src/utils/unbounded_queue.h
new file mode 100644
index 0000000..fa0d303
--- /dev/null
+++ b/src/utils/unbounded_queue.h
@@ -0,0 +1,245 @@
+/*
+ * Copyright 2019 The libgav1 Authors
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LIBGAV1_SRC_UTILS_UNBOUNDED_QUEUE_H_
+#define LIBGAV1_SRC_UTILS_UNBOUNDED_QUEUE_H_
+
+#include <cassert>
+#include <cstddef>
+#include <memory>
+#include <new>
+#include <utility>
+
+#include "src/utils/compiler_attributes.h"
+#include "src/utils/memory.h"
+
+namespace libgav1 {
+
+// A FIFO queue of an unbounded capacity.
+//
+// This implementation uses the general approach used in std::deque
+// implementations. See, for example,
+// https://stackoverflow.com/questions/6292332/what-really-is-a-deque-in-stl
+//
+// It is much simpler because it just needs to support the queue interface.
+// The blocks are chained into a circular list, not managed by a "map". It
+// does not shrink the internal buffer.
+//
+// An alternative implementation approach is a resizable circular array. See,
+// for example, ResizingArrayQueue.java in https://algs4.cs.princeton.edu/code/
+// and base::circular_deque in Chromium's base/containers library.
+template <typename T>
+class UnboundedQueue {
+ public:
+  UnboundedQueue() = default;
+
+  // Move only.
+  UnboundedQueue(UnboundedQueue&& other)
+      : first_block_(other.first_block_),
+        front_(other.front_),
+        last_block_(other.last_block_),
+        back_(other.back_) {
+    other.first_block_ = nullptr;
+    other.front_ = 0;
+    other.last_block_ = nullptr;
+    other.back_ = 0;
+  }
+  UnboundedQueue& operator=(UnboundedQueue&& other) {
+    if (this != &other) {
+      Destroy();
+      first_block_ = other.first_block_;
+      front_ = other.front_;
+      last_block_ = other.last_block_;
+      back_ = other.back_;
+      other.first_block_ = nullptr;
+      other.front_ = 0;
+      other.last_block_ = nullptr;
+      other.back_ = 0;
+    }
+    return *this;
+  }
+
+  ~UnboundedQueue() { Destroy(); }
+
+  // Allocates two Blocks upfront because most access patterns require at
+  // least two Blocks. Returns false if the allocation of the Blocks failed.
+  LIBGAV1_MUST_USE_RESULT bool Init() {
+    std::unique_ptr<Block> new_block0(new (std::nothrow) Block);
+    std::unique_ptr<Block> new_block1(new (std::nothrow) Block);
+    if (new_block0 == nullptr || new_block1 == nullptr) return false;
+    first_block_ = last_block_ = new_block0.release();
+    new_block1->next = first_block_;
+    last_block_->next = new_block1.release();
+    return true;
+  }
+
+  // Checks if the queue has room for a new element. If the queue is full,
+  // tries to grow it. Returns false if the queue is full and the attempt to
+  // grow it failed.
+  //
+  // NOTE: GrowIfNeeded() must be called before each call to Push(). This
+  // inconvenient design is necessary to guarantee a successful Push() call.
+  //
+  // Push(T&& value) is often called with the argument std::move(value). The
+  // moved-from object |value| won't be usable afterwards, so it would be
+  // problematic if Push(T&& value) failed and we lost access to the original
+  // |value| object.
+  LIBGAV1_MUST_USE_RESULT bool GrowIfNeeded() {
+    assert(last_block_ != nullptr);
+    if (back_ == kBlockCapacity) {
+      if (last_block_->next == first_block_) {
+        // All Blocks are in use.
+        std::unique_ptr<Block> new_block(new (std::nothrow) Block);
+        if (new_block == nullptr) return false;
+        new_block->next = first_block_;
+        last_block_->next = new_block.release();
+      }
+      last_block_ = last_block_->next;
+      back_ = 0;
+    }
+    return true;
+  }
+
+  // Pushes the element |value| to the end of the queue. It is an error to call
+  // Push() when the queue is full.
+  void Push(const T& value) {
+    assert(last_block_ != nullptr);
+    assert(back_ < kBlockCapacity);
+    T* elements = reinterpret_cast<T*>(last_block_->buffer);
+    new (&elements[back_++]) T(value);
+  }
+
+  void Push(T&& value) {
+    assert(last_block_ != nullptr);
+    assert(back_ < kBlockCapacity);
+    T* elements = reinterpret_cast<T*>(last_block_->buffer);
+    new (&elements[back_++]) T(std::move(value));
+  }
+
+  // Returns the element at the front of the queue. It is an error to call
+  // Front() when the queue is empty.
+  T& Front() {
+    assert(!Empty());
+    T* elements = reinterpret_cast<T*>(first_block_->buffer);
+    return elements[front_];
+  }
+
+  const T& Front() const {
+    assert(!Empty());
+    T* elements = reinterpret_cast<T*>(first_block_->buffer);
+    return elements[front_];
+  }
+
+  // Removes the element at the front of the queue from the queue. It is an
+  // error to call Pop() when the queue is empty.
+  void Pop() {
+    assert(!Empty());
+    T* elements = reinterpret_cast<T*>(first_block_->buffer);
+    elements[front_++].~T();
+    if (front_ == kBlockCapacity) {
+      // The first block has become empty.
+      front_ = 0;
+      if (first_block_ == last_block_) {
+        // Only one Block is in use. Simply reset back_.
+        back_ = 0;
+      } else {
+        first_block_ = first_block_->next;
+      }
+    }
+  }
+
+  // Returns true if the queue is empty.
+  bool Empty() const { return first_block_ == last_block_ && front_ == back_; }
+
+ private:
+  // kBlockCapacity is the maximum number of elements each Block can hold.
+  // sizeof(void*) is subtracted from 2048 to account for the |next| pointer in
+  // the Block struct.
+  //
+  // In Linux x86_64, sizeof(std::function<void()>) is 32, so each Block can
+  // hold 63 std::function<void()> objects.
+  //
+  // NOTE: The corresponding value in <deque> in libc++ revision
+  // 245b5ba3448b9d3f6de5962066557e253a6bc9a4 is:
+  //   template <class _ValueType, class _DiffType>
+  //   struct __deque_block_size {
+  //     static const _DiffType value =
+  //         sizeof(_ValueType) < 256 ? 4096 / sizeof(_ValueType) : 16;
+  //   };
+  //
+  // Note that 4096 / 256 = 16, so apparently this expression is intended to
+  // ensure the block size is at least 4096 bytes and each block can hold at
+  // least 16 elements.
+  static constexpr size_t kBlockCapacity =
+      (sizeof(T) < 128) ? (2048 - sizeof(void*)) / sizeof(T) : 16;
+
+  struct Block : public Allocable {
+    alignas(T) char buffer[kBlockCapacity * sizeof(T)];
+    Block* next;
+  };
+
+  void Destroy() {
+    if (first_block_ == nullptr) return;  // An uninitialized queue.
+
+    // First free the unused blocks, which are located after last_block and
+    // before first_block_.
+    Block* block = last_block_->next;
+    // Cut the circular list open after last_block_.
+    last_block_->next = nullptr;
+    while (block != first_block_) {
+      Block* next = block->next;
+      delete block;
+      block = next;
+    }
+
+    // Then free the used blocks. Destruct the elements in the used blocks.
+    while (block != nullptr) {
+      const size_t begin = (block == first_block_) ? front_ : 0;
+      const size_t end = (block == last_block_) ? back_ : kBlockCapacity;
+      T* elements = reinterpret_cast<T*>(block->buffer);
+      for (size_t i = begin; i < end; ++i) {
+        elements[i].~T();
+      }
+      Block* next = block->next;
+      delete block;
+      block = next;
+    }
+  }
+
+  // Blocks are chained in a circular singly-linked list. If the list of Blocks
+  // is empty, both first_block_ and last_block_ are null pointers. If the list
+  // is nonempty, first_block_ points to the first used Block and last_block_
+  // points to the last used Block.
+  //
+  // Invariant: If Init() is called and succeeds, the queue is always nonempty.
+  // This allows all methods (except the destructor) to avoid null pointer
+  // checks for first_block_ and last_block_.
+  Block* first_block_ = nullptr;
+  // The index of the element in first_block_ to be removed by Pop().
+  size_t front_ = 0;
+  Block* last_block_ = nullptr;
+  // The index in last_block_ where the new element is inserted by Push().
+  size_t back_ = 0;
+};
+
+#if !LIBGAV1_CXX17
+template <typename T>
+constexpr size_t UnboundedQueue<T>::kBlockCapacity;
+#endif
+
+}  // namespace libgav1
+
+#endif  // LIBGAV1_SRC_UTILS_UNBOUNDED_QUEUE_H_
diff --git a/src/utils/vector.h b/src/utils/vector.h
new file mode 100644
index 0000000..e211240
--- /dev/null
+++ b/src/utils/vector.h
@@ -0,0 +1,352 @@
+/*
+ * Copyright 2019 The libgav1 Authors
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// libgav1::Vector implementation
+
+#ifndef LIBGAV1_SRC_UTILS_VECTOR_H_
+#define LIBGAV1_SRC_UTILS_VECTOR_H_
+
+#include <cassert>
+#include <cstddef>
+#include <cstdlib>
+#include <cstring>
+#include <iterator>
+#include <type_traits>
+#include <utility>
+
+#include "src/utils/compiler_attributes.h"
+
+namespace libgav1 {
+namespace internal {
+
+static constexpr size_t kMinVectorAllocation = 16;
+
+// Returns the smallest power of two greater or equal to 'value'.
+inline size_t NextPow2(size_t value) {
+  if (value == 0) return 0;
+  --value;
+  for (size_t i = 1; i < sizeof(size_t) * 8; i *= 2) value |= value >> i;
+  return value + 1;
+}
+
+// Returns the smallest capacity greater or equal to 'value'.
+inline size_t NextCapacity(size_t value) {
+  if (value == 0) return 0;
+  if (value <= kMinVectorAllocation) return kMinVectorAllocation;
+  return NextPow2(value);
+}
+
+//------------------------------------------------------------------------------
+// Data structure equivalent to std::vector but returning false and to its last
+// valid state on memory allocation failure.
+// std::vector with a custom allocator does not fill this need without
+// exceptions.
+
+template <typename T>
+class VectorBase {
+ public:
+  using iterator = T*;
+  using const_iterator = const T*;
+
+  VectorBase() noexcept = default;
+  // Move only.
+  VectorBase(const VectorBase&) = delete;
+  VectorBase& operator=(const VectorBase&) = delete;
+  VectorBase(VectorBase&& other) noexcept
+      : items_(other.items_),
+        capacity_(other.capacity_),
+        num_items_(other.num_items_) {
+    other.items_ = nullptr;
+    other.capacity_ = 0;
+    other.num_items_ = 0;
+  }
+  VectorBase& operator=(VectorBase&& other) noexcept {
+    if (this != &other) {
+      clear();
+      free(items_);
+      items_ = other.items_;
+      capacity_ = other.capacity_;
+      num_items_ = other.num_items_;
+      other.items_ = nullptr;
+      other.capacity_ = 0;
+      other.num_items_ = 0;
+    }
+    return *this;
+  }
+  ~VectorBase() {
+    clear();
+    free(items_);
+  }
+
+  // Reallocates just enough memory if needed so that 'new_cap' items can fit.
+  LIBGAV1_MUST_USE_RESULT bool reserve(size_t new_cap) {
+    if (capacity_ < new_cap) {
+      T* const new_items = static_cast<T*>(malloc(new_cap * sizeof(T)));
+      if (new_items == nullptr) return false;
+      if (num_items_ > 0) {
+        if (std::is_trivial<T>::value) {
+          // Cast |new_items| and |items_| to void* to avoid the GCC
+          // -Wclass-memaccess warning and additionally the
+          // bugprone-undefined-memory-manipulation clang-tidy warning. The
+          // memcpy is safe because T is a trivial type.
+          memcpy(static_cast<void*>(new_items),
+                 static_cast<const void*>(items_), num_items_ * sizeof(T));
+        } else {
+          for (size_t i = 0; i < num_items_; ++i) {
+            new (&new_items[i]) T(std::move(items_[i]));
+            items_[i].~T();
+          }
+        }
+      }
+      free(items_);
+      items_ = new_items;
+      capacity_ = new_cap;
+    }
+    return true;
+  }
+
+  // Reallocates less memory so that only the existing items can fit.
+  bool shrink_to_fit() {
+    if (capacity_ == num_items_) return true;
+    if (num_items_ == 0) {
+      free(items_);
+      items_ = nullptr;
+      capacity_ = 0;
+      return true;
+    }
+    const size_t previous_capacity = capacity_;
+    capacity_ = 0;  // Force reserve() to allocate and copy.
+    if (reserve(num_items_)) return true;
+    capacity_ = previous_capacity;
+    return false;
+  }
+
+  // Constructs a new item by copy constructor. May reallocate if
+  // 'resize_if_needed'.
+  LIBGAV1_MUST_USE_RESULT bool push_back(const T& value,
+                                         bool resize_if_needed = true) {
+    if (num_items_ >= capacity_ &&
+        (!resize_if_needed ||
+         !reserve(internal::NextCapacity(num_items_ + 1)))) {
+      return false;
+    }
+    new (&items_[num_items_]) T(value);
+    ++num_items_;
+    return true;
+  }
+
+  // Constructs a new item by copy constructor. reserve() must have been called
+  // with a sufficient capacity.
+  //
+  // WARNING: No error checking is performed.
+  void push_back_unchecked(const T& value) {
+    assert(num_items_ < capacity_);
+    new (&items_[num_items_]) T(value);
+    ++num_items_;
+  }
+
+  // Constructs a new item by move constructor. May reallocate if
+  // 'resize_if_needed'.
+  LIBGAV1_MUST_USE_RESULT bool push_back(T&& value,
+                                         bool resize_if_needed = true) {
+    if (num_items_ >= capacity_ &&
+        (!resize_if_needed ||
+         !reserve(internal::NextCapacity(num_items_ + 1)))) {
+      return false;
+    }
+    new (&items_[num_items_]) T(std::move(value));
+    ++num_items_;
+    return true;
+  }
+
+  // Constructs a new item by move constructor. reserve() must have been called
+  // with a sufficient capacity.
+  //
+  // WARNING: No error checking is performed.
+  void push_back_unchecked(T&& value) {
+    assert(num_items_ < capacity_);
+    new (&items_[num_items_]) T(std::move(value));
+    ++num_items_;
+  }
+
+  // Constructs a new item in place by forwarding the arguments args... to the
+  // constructor. May reallocate.
+  template <typename... Args>
+  LIBGAV1_MUST_USE_RESULT bool emplace_back(Args&&... args) {
+    if (num_items_ >= capacity_ &&
+        !reserve(internal::NextCapacity(num_items_ + 1))) {
+      return false;
+    }
+    new (&items_[num_items_]) T(std::forward<Args>(args)...);
+    ++num_items_;
+    return true;
+  }
+
+  // Destructs the last item.
+  void pop_back() {
+    --num_items_;
+    items_[num_items_].~T();
+  }
+
+  // Destructs the item at 'pos'.
+  void erase(iterator pos) { erase(pos, pos + 1); }
+
+  // Destructs the items in [first,last).
+  void erase(iterator first, iterator last) {
+    for (iterator it = first; it != last; ++it) it->~T();
+    if (last != end()) {
+      if (std::is_trivial<T>::value) {
+        // Cast |first| and |last| to void* to avoid the GCC
+        // -Wclass-memaccess warning and additionally the
+        // bugprone-undefined-memory-manipulation clang-tidy warning. The
+        // memmove is safe because T is a trivial type.
+        memmove(static_cast<void*>(first), static_cast<const void*>(last),
+                (end() - last) * sizeof(T));
+      } else {
+        for (iterator it_src = last, it_dst = first; it_src != end();
+             ++it_src, ++it_dst) {
+          new (it_dst) T(std::move(*it_src));
+          it_src->~T();
+        }
+      }
+    }
+    num_items_ -= std::distance(first, last);
+  }
+
+  // Destructs all the items.
+  void clear() { erase(begin(), end()); }
+
+  // Destroys (including deallocating) all the items.
+  void reset() {
+    clear();
+    if (!shrink_to_fit()) assert(false);
+  }
+
+  // Accessors
+  bool empty() const { return (num_items_ == 0); }
+  size_t size() const { return num_items_; }
+  size_t capacity() const { return capacity_; }
+
+  T* data() { return items_; }
+  T& front() { return items_[0]; }
+  T& back() { return items_[num_items_ - 1]; }
+  T& operator[](size_t i) { return items_[i]; }
+  T& at(size_t i) { return items_[i]; }
+  const T* data() const { return items_; }
+  const T& front() const { return items_[0]; }
+  const T& back() const { return items_[num_items_ - 1]; }
+  const T& operator[](size_t i) const { return items_[i]; }
+  const T& at(size_t i) const { return items_[i]; }
+
+  iterator begin() { return &items_[0]; }
+  const_iterator begin() const { return &items_[0]; }
+  iterator end() { return &items_[num_items_]; }
+  const_iterator end() const { return &items_[num_items_]; }
+
+  void swap(VectorBase& b) {
+    // Although not necessary here, adding "using std::swap;" and then calling
+    // swap() without namespace qualification is recommended. See Effective
+    // C++, Item 25.
+    using std::swap;
+    swap(items_, b.items_);
+    swap(capacity_, b.capacity_);
+    swap(num_items_, b.num_items_);
+  }
+
+ protected:
+  T* items_ = nullptr;
+  size_t capacity_ = 0;
+  size_t num_items_ = 0;
+};
+
+}  // namespace internal
+
+//------------------------------------------------------------------------------
+
+// Vector class that does *NOT* construct the content on resize().
+// Should be reserved to plain old data.
+template <typename T>
+class VectorNoCtor : public internal::VectorBase<T> {
+ public:
+  // Creates or destructs items so that 'new_num_items' exist.
+  // Allocated memory grows every power-of-two items.
+  LIBGAV1_MUST_USE_RESULT bool resize(size_t new_num_items) {
+    using super = internal::VectorBase<T>;
+    if (super::num_items_ < new_num_items) {
+      if (super::capacity_ < new_num_items) {
+        if (!super::reserve(internal::NextCapacity(new_num_items))) {
+          return false;
+        }
+      }
+      super::num_items_ = new_num_items;
+    } else {
+      while (super::num_items_ > new_num_items) {
+        --super::num_items_;
+        super::items_[super::num_items_].~T();
+      }
+    }
+    return true;
+  }
+};
+
+// This generic vector class will call the constructors.
+template <typename T>
+class Vector : public internal::VectorBase<T> {
+ public:
+  // Constructs or destructs items so that 'new_num_items' exist.
+  // Allocated memory grows every power-of-two items.
+  LIBGAV1_MUST_USE_RESULT bool resize(size_t new_num_items) {
+    using super = internal::VectorBase<T>;
+    if (super::num_items_ < new_num_items) {
+      if (super::capacity_ < new_num_items) {
+        if (!super::reserve(internal::NextCapacity(new_num_items))) {
+          return false;
+        }
+      }
+      while (super::num_items_ < new_num_items) {
+        new (&super::items_[super::num_items_]) T();
+        ++super::num_items_;
+      }
+    } else {
+      while (super::num_items_ > new_num_items) {
+        --super::num_items_;
+        super::items_[super::num_items_].~T();
+      }
+    }
+    return true;
+  }
+};
+
+//------------------------------------------------------------------------------
+
+// Define non-member swap() functions in the namespace in which VectorNoCtor
+// and Vector are implemented. See Effective C++, Item 25.
+
+template <typename T>
+void swap(VectorNoCtor<T>& a, VectorNoCtor<T>& b) {
+  a.swap(b);
+}
+
+template <typename T>
+void swap(Vector<T>& a, Vector<T>& b) {
+  a.swap(b);
+}
+
+//------------------------------------------------------------------------------
+
+}  // namespace libgav1
+
+#endif  // LIBGAV1_SRC_UTILS_VECTOR_H_
author	qinxialei <xialeiqin@gmail.com>	2020-10-29 11:26:59 +0800
committer	qinxialei <xialeiqin@gmail.com>	2020-10-29 11:26:59 +0800
commit	e8d277081293b6fb2a5d469616baaa7a06f52496 (patch)
tree	1179bb07d3927d1837d4a90bd81b2034c4c696a9 /src/utils
download	libgav1-e8d277081293b6fb2a5d469616baaa7a06f52496.tar.gz libgav1-e8d277081293b6fb2a5d469616baaa7a06f52496.tar.bz2 libgav1-e8d277081293b6fb2a5d469616baaa7a06f52496.zip