aboutsummaryrefslogtreecommitdiff
path: root/src/utils
diff options
context:
space:
mode:
authorqinxialei <xialeiqin@gmail.com>2020-10-29 11:26:59 +0800
committerqinxialei <xialeiqin@gmail.com>2020-10-29 11:26:59 +0800
commite8d277081293b6fb2a5d469616baaa7a06f52496 (patch)
tree1179bb07d3927d1837d4a90bd81b2034c4c696a9 /src/utils
downloadlibgav1-e8d277081293b6fb2a5d469616baaa7a06f52496.tar.gz
libgav1-e8d277081293b6fb2a5d469616baaa7a06f52496.tar.bz2
libgav1-e8d277081293b6fb2a5d469616baaa7a06f52496.zip
Import Upstream version 0.16.0
Diffstat (limited to 'src/utils')
-rw-r--r--src/utils/array_2d.h131
-rw-r--r--src/utils/bit_mask_set.h79
-rw-r--r--src/utils/bit_reader.cc117
-rw-r--r--src/utils/bit_reader.h49
-rw-r--r--src/utils/block_parameters_holder.cc107
-rw-r--r--src/utils/block_parameters_holder.h85
-rw-r--r--src/utils/blocking_counter.h97
-rw-r--r--src/utils/common.h534
-rw-r--r--src/utils/compiler_attributes.h181
-rw-r--r--src/utils/constants.cc874
-rw-r--r--src/utils/constants.h744
-rw-r--r--src/utils/cpu.cc84
-rw-r--r--src/utils/cpu.h107
-rw-r--r--src/utils/dynamic_buffer.h82
-rw-r--r--src/utils/entropy_decoder.cc1117
-rw-r--r--src/utils/entropy_decoder.h123
-rw-r--r--src/utils/executor.cc21
-rw-r--r--src/utils/executor.h36
-rw-r--r--src/utils/libgav1_utils.cmake72
-rw-r--r--src/utils/logging.cc65
-rw-r--r--src/utils/logging.h85
-rw-r--r--src/utils/memory.h237
-rw-r--r--src/utils/parameter_tree.cc133
-rw-r--r--src/utils/parameter_tree.h113
-rw-r--r--src/utils/queue.h105
-rw-r--r--src/utils/raw_bit_reader.cc224
-rw-r--r--src/utils/raw_bit_reader.h78
-rw-r--r--src/utils/reference_info.h92
-rw-r--r--src/utils/segmentation.cc31
-rw-r--r--src/utils/segmentation.h32
-rw-r--r--src/utils/segmentation_map.cc49
-rw-r--r--src/utils/segmentation_map.h71
-rw-r--r--src/utils/stack.h59
-rw-r--r--src/utils/threadpool.cc323
-rw-r--r--src/utils/threadpool.h167
-rw-r--r--src/utils/types.h525
-rw-r--r--src/utils/unbounded_queue.h245
-rw-r--r--src/utils/vector.h352
38 files changed, 7626 insertions, 0 deletions
diff --git a/src/utils/array_2d.h b/src/utils/array_2d.h
new file mode 100644
index 0000000..2df6241
--- /dev/null
+++ b/src/utils/array_2d.h
@@ -0,0 +1,131 @@
+/*
+ * Copyright 2019 The libgav1 Authors
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LIBGAV1_SRC_UTILS_ARRAY_2D_H_
+#define LIBGAV1_SRC_UTILS_ARRAY_2D_H_
+
+#include <cassert>
+#include <cstddef>
+#include <cstring>
+#include <memory>
+#include <new>
+#include <type_traits>
+
+#include "src/utils/compiler_attributes.h"
+
+namespace libgav1 {
+
+// Exposes a 1D allocated memory buffer as a 2D array.
+template <typename T>
+class Array2DView {
+ public:
+ Array2DView() = default;
+ Array2DView(int rows, int columns, T* const data) {
+ Reset(rows, columns, data);
+ }
+
+ // Copyable and Movable.
+ Array2DView(const Array2DView& rhs) = default;
+ Array2DView& operator=(const Array2DView& rhs) = default;
+
+ void Reset(int rows, int columns, T* const data) {
+ rows_ = rows;
+ columns_ = columns;
+ data_ = data;
+ }
+
+ int rows() const { return rows_; }
+ int columns() const { return columns_; }
+
+ T* operator[](int row) { return const_cast<T*>(GetRow(row)); }
+
+ const T* operator[](int row) const { return GetRow(row); }
+
+ private:
+ const T* GetRow(int row) const {
+ assert(row < rows_);
+ const ptrdiff_t offset = static_cast<ptrdiff_t>(row) * columns_;
+ return data_ + offset;
+ }
+
+ int rows_ = 0;
+ int columns_ = 0;
+ T* data_ = nullptr;
+};
+
+// Allocates and owns the contiguous memory and exposes an Array2DView of
+// dimension |rows| x |columns|.
+template <typename T>
+class Array2D {
+ public:
+ Array2D() = default;
+
+ // Copyable and Movable.
+ Array2D(const Array2D& rhs) = default;
+ Array2D& operator=(const Array2D& rhs) = default;
+
+ LIBGAV1_MUST_USE_RESULT bool Reset(int rows, int columns,
+ bool zero_initialize = true) {
+ size_ = rows * columns;
+ // If T is not a trivial type, we should always reallocate the data_
+ // buffer, so that the destructors of any existing objects are invoked.
+ if (!std::is_trivial<T>::value || allocated_size_ < size_) {
+ // Note: This invokes the global operator new if T is a non-class type,
+ // such as integer or enum types, or a class type that is not derived
+ // from libgav1::Allocable, such as std::unique_ptr. If we enforce a
+ // maximum allocation size or keep track of our own heap memory
+ // consumption, we will need to handle the allocations here that use the
+ // global operator new.
+ if (zero_initialize) {
+ data_.reset(new (std::nothrow) T[size_]());
+ } else {
+ data_.reset(new (std::nothrow) T[size_]);
+ }
+ if (data_ == nullptr) {
+ allocated_size_ = 0;
+ return false;
+ }
+ allocated_size_ = size_;
+ } else if (zero_initialize) {
+ // Cast the data_ pointer to void* to avoid the GCC -Wclass-memaccess
+ // warning. The memset is safe because T is a trivial type.
+ void* dest = data_.get();
+ memset(dest, 0, sizeof(T) * size_);
+ }
+ data_view_.Reset(rows, columns, data_.get());
+ return true;
+ }
+
+ int rows() const { return data_view_.rows(); }
+ int columns() const { return data_view_.columns(); }
+ size_t size() const { return size_; }
+ T* data() { return data_.get(); }
+ const T* data() const { return data_.get(); }
+
+ T* operator[](int row) { return data_view_[row]; }
+
+ const T* operator[](int row) const { return data_view_[row]; }
+
+ private:
+ std::unique_ptr<T[]> data_ = nullptr;
+ size_t allocated_size_ = 0;
+ size_t size_ = 0;
+ Array2DView<T> data_view_;
+};
+
+} // namespace libgav1
+
+#endif // LIBGAV1_SRC_UTILS_ARRAY_2D_H_
diff --git a/src/utils/bit_mask_set.h b/src/utils/bit_mask_set.h
new file mode 100644
index 0000000..7371753
--- /dev/null
+++ b/src/utils/bit_mask_set.h
@@ -0,0 +1,79 @@
+/*
+ * Copyright 2019 The libgav1 Authors
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LIBGAV1_SRC_UTILS_BIT_MASK_SET_H_
+#define LIBGAV1_SRC_UTILS_BIT_MASK_SET_H_
+
+#include <cstdint>
+
+namespace libgav1 {
+
+// This class is used to check if a given value is equal to one of the several
+// predetermined values using a bit mask instead of a chain of comparisons and
+// ||s. This usually results in fewer instructions.
+//
+// Usage:
+// constexpr BitMaskSet set(value1, value2);
+// set.Contains(value1) => returns true.
+// set.Contains(value3) => returns false.
+class BitMaskSet {
+ public:
+ explicit constexpr BitMaskSet(uint32_t mask) : mask_(mask) {}
+
+ constexpr BitMaskSet(int v1, int v2) : mask_((1U << v1) | (1U << v2)) {}
+
+ constexpr BitMaskSet(int v1, int v2, int v3)
+ : mask_((1U << v1) | (1U << v2) | (1U << v3)) {}
+
+ constexpr BitMaskSet(int v1, int v2, int v3, int v4)
+ : mask_((1U << v1) | (1U << v2) | (1U << v3) | (1U << v4)) {}
+
+ constexpr BitMaskSet(int v1, int v2, int v3, int v4, int v5)
+ : mask_((1U << v1) | (1U << v2) | (1U << v3) | (1U << v4) | (1U << v5)) {}
+
+ constexpr BitMaskSet(int v1, int v2, int v3, int v4, int v5, int v6)
+ : mask_((1U << v1) | (1U << v2) | (1U << v3) | (1U << v4) | (1U << v5) |
+ (1U << v6)) {}
+
+ constexpr BitMaskSet(int v1, int v2, int v3, int v4, int v5, int v6, int v7)
+ : mask_((1U << v1) | (1U << v2) | (1U << v3) | (1U << v4) | (1U << v5) |
+ (1U << v6) | (1U << v7)) {}
+
+ constexpr BitMaskSet(int v1, int v2, int v3, int v4, int v5, int v6, int v7,
+ int v8, int v9)
+ : mask_((1U << v1) | (1U << v2) | (1U << v3) | (1U << v4) | (1U << v5) |
+ (1U << v6) | (1U << v7) | (1U << v8) | (1U << v9)) {}
+
+ constexpr BitMaskSet(int v1, int v2, int v3, int v4, int v5, int v6, int v7,
+ int v8, int v9, int v10)
+ : mask_((1U << v1) | (1U << v2) | (1U << v3) | (1U << v4) | (1U << v5) |
+ (1U << v6) | (1U << v7) | (1U << v8) | (1U << v9) | (1U << v10)) {
+ }
+
+ constexpr bool Contains(uint8_t value) const {
+ return MaskContainsValue(mask_, value);
+ }
+
+ static constexpr bool MaskContainsValue(uint32_t mask, uint8_t value) {
+ return ((mask >> value) & 1) != 0;
+ }
+
+ private:
+ const uint32_t mask_;
+};
+
+} // namespace libgav1
+#endif // LIBGAV1_SRC_UTILS_BIT_MASK_SET_H_
diff --git a/src/utils/bit_reader.cc b/src/utils/bit_reader.cc
new file mode 100644
index 0000000..3234128
--- /dev/null
+++ b/src/utils/bit_reader.cc
@@ -0,0 +1,117 @@
+// Copyright 2019 The libgav1 Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "src/utils/bit_reader.h"
+
+#include <cassert>
+#include <cstdint>
+
+#include "src/utils/common.h"
+
+namespace libgav1 {
+namespace {
+
+bool Assign(int* const value, int assignment, bool return_value) {
+ *value = assignment;
+ return return_value;
+}
+
+// 5.9.29.
+int InverseRecenter(int r, int v) {
+ if (v > (r << 1)) {
+ return v;
+ }
+ if ((v & 1) != 0) {
+ return r - ((v + 1) >> 1);
+ }
+ return r + (v >> 1);
+}
+
+} // namespace
+
+bool BitReader::DecodeSignedSubexpWithReference(int low, int high,
+ int reference, int control,
+ int* const value) {
+ if (!DecodeUnsignedSubexpWithReference(high - low, reference - low, control,
+ value)) {
+ return false;
+ }
+ *value += low;
+ return true;
+}
+
+bool BitReader::DecodeUniform(int n, int* const value) {
+ if (n <= 1) {
+ return Assign(value, 0, true);
+ }
+ const int w = FloorLog2(n) + 1;
+ const int m = (1 << w) - n;
+ assert(w - 1 < 32);
+ const int v = static_cast<int>(ReadLiteral(w - 1));
+ if (v == -1) {
+ return Assign(value, 0, false);
+ }
+ if (v < m) {
+ return Assign(value, v, true);
+ }
+ const int extra_bit = ReadBit();
+ if (extra_bit == -1) {
+ return Assign(value, 0, false);
+ }
+ return Assign(value, (v << 1) - m + extra_bit, true);
+}
+
+bool BitReader::DecodeUnsignedSubexpWithReference(int mx, int reference,
+ int control,
+ int* const value) {
+ int v;
+ if (!DecodeSubexp(mx, control, &v)) return false;
+ if ((reference << 1) <= mx) {
+ *value = InverseRecenter(reference, v);
+ } else {
+ *value = mx - 1 - InverseRecenter(mx - 1 - reference, v);
+ }
+ return true;
+}
+
+bool BitReader::DecodeSubexp(int num_symbols, int control, int* const value) {
+ int i = 0;
+ int mk = 0;
+ while (true) {
+ const int b = (i != 0) ? control + i - 1 : control;
+ if (b >= 32) {
+ return Assign(value, 0, false);
+ }
+ const int a = 1 << b;
+ if (num_symbols <= mk + 3 * a) {
+ if (!DecodeUniform(num_symbols - mk, value)) return false;
+ *value += mk;
+ return true;
+ }
+ const int8_t subexp_more_bits = ReadBit();
+ if (subexp_more_bits == -1) return false;
+ if (subexp_more_bits != 0) {
+ ++i;
+ mk += a;
+ } else {
+ const int subexp_bits = static_cast<int>(ReadLiteral(b));
+ if (subexp_bits == -1) {
+ return Assign(value, 0, false);
+ }
+ return Assign(value, subexp_bits + mk, true);
+ }
+ }
+}
+
+} // namespace libgav1
diff --git a/src/utils/bit_reader.h b/src/utils/bit_reader.h
new file mode 100644
index 0000000..5a10e12
--- /dev/null
+++ b/src/utils/bit_reader.h
@@ -0,0 +1,49 @@
+/*
+ * Copyright 2019 The libgav1 Authors
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LIBGAV1_SRC_UTILS_BIT_READER_H_
+#define LIBGAV1_SRC_UTILS_BIT_READER_H_
+
+#include <cstdint>
+
+namespace libgav1 {
+
+class BitReader {
+ public:
+ virtual ~BitReader() = default;
+
+ virtual int ReadBit() = 0;
+ // |num_bits| has to be <= 32. The function returns a value in the range [0,
+ // 2^num_bits - 1] (inclusive) on success and -1 on failure.
+ virtual int64_t ReadLiteral(int num_bits) = 0;
+
+ bool DecodeSignedSubexpWithReference(int low, int high, int reference,
+ int control, int* value); // 5.9.26.
+ // Decodes a nonnegative integer with maximum number of values |n| (i.e.,
+ // output in range 0..n-1) by following the process specified in Section
+ // 4.10.7 ns(n) and Section 4.10.10 NS(n) of the spec.
+ bool DecodeUniform(int n, int* value);
+
+ private:
+ // Helper functions for DecodeSignedSubexpWithReference.
+ bool DecodeUnsignedSubexpWithReference(int mx, int reference, int control,
+ int* value); // 5.9.27.
+ bool DecodeSubexp(int num_symbols, int control, int* value); // 5.9.28.
+};
+
+} // namespace libgav1
+
+#endif // LIBGAV1_SRC_UTILS_BIT_READER_H_
diff --git a/src/utils/block_parameters_holder.cc b/src/utils/block_parameters_holder.cc
new file mode 100644
index 0000000..3ccdb9b
--- /dev/null
+++ b/src/utils/block_parameters_holder.cc
@@ -0,0 +1,107 @@
+// Copyright 2019 The libgav1 Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "src/utils/block_parameters_holder.h"
+
+#include <algorithm>
+
+#include "src/utils/common.h"
+#include "src/utils/constants.h"
+#include "src/utils/logging.h"
+#include "src/utils/parameter_tree.h"
+#include "src/utils/types.h"
+
+namespace libgav1 {
+
+namespace {
+
+// Returns the number of super block rows/columns for |value4x4| where value4x4
+// is either rows4x4 or columns4x4.
+int RowsOrColumns4x4ToSuperBlocks(int value4x4, bool use_128x128_superblock) {
+ return use_128x128_superblock ? DivideBy128(MultiplyBy4(value4x4) + 127)
+ : DivideBy64(MultiplyBy4(value4x4) + 63);
+}
+
+} // namespace
+
+bool BlockParametersHolder::Reset(int rows4x4, int columns4x4,
+ bool use_128x128_superblock) {
+ rows4x4_ = rows4x4;
+ columns4x4_ = columns4x4;
+ use_128x128_superblock_ = use_128x128_superblock;
+ if (!block_parameters_cache_.Reset(rows4x4_, columns4x4_)) {
+ LIBGAV1_DLOG(ERROR, "block_parameters_cache_.Reset() failed.");
+ return false;
+ }
+ const int rows =
+ RowsOrColumns4x4ToSuperBlocks(rows4x4_, use_128x128_superblock_);
+ const int columns =
+ RowsOrColumns4x4ToSuperBlocks(columns4x4_, use_128x128_superblock_);
+ const BlockSize sb_size =
+ use_128x128_superblock_ ? kBlock128x128 : kBlock64x64;
+ const int multiplier = kNum4x4BlocksWide[sb_size];
+ if (!trees_.Reset(rows, columns, /*zero_initialize=*/false)) {
+ LIBGAV1_DLOG(ERROR, "trees_.Reset() failed.");
+ return false;
+ }
+ for (int i = 0; i < rows; ++i) {
+ for (int j = 0; j < columns; ++j) {
+ trees_[i][j] =
+ ParameterTree::Create(i * multiplier, j * multiplier, sb_size);
+ if (trees_[i][j] == nullptr) {
+ LIBGAV1_DLOG(ERROR, "Allocation of trees_[%d][%d] failed.", i, j);
+ return false;
+ }
+ }
+ }
+ return true;
+}
+
+void BlockParametersHolder::FillCache(int row4x4, int column4x4,
+ BlockSize block_size,
+ BlockParameters* const bp) {
+ int rows = std::min(static_cast<int>(kNum4x4BlocksHigh[block_size]),
+ rows4x4_ - row4x4);
+ const int columns = std::min(static_cast<int>(kNum4x4BlocksWide[block_size]),
+ columns4x4_ - column4x4);
+ auto* bp_dst = &block_parameters_cache_[row4x4][column4x4];
+ // Specialize columns cases (values in kNum4x4BlocksWide[]) for better
+ // performance.
+ if (columns == 1) {
+ SetBlock<BlockParameters*>(rows, 1, bp, bp_dst, columns4x4_);
+ } else if (columns == 2) {
+ SetBlock<BlockParameters*>(rows, 2, bp, bp_dst, columns4x4_);
+ } else if (columns == 4) {
+ SetBlock<BlockParameters*>(rows, 4, bp, bp_dst, columns4x4_);
+ } else if (columns == 8) {
+ SetBlock<BlockParameters*>(rows, 8, bp, bp_dst, columns4x4_);
+ } else if (columns == 16) {
+ SetBlock<BlockParameters*>(rows, 16, bp, bp_dst, columns4x4_);
+ } else if (columns == 32) {
+ SetBlock<BlockParameters*>(rows, 32, bp, bp_dst, columns4x4_);
+ } else {
+ do {
+ // The following loop has better performance than using std::fill().
+ // std::fill() has some overhead in checking zero loop count.
+ int x = columns;
+ auto* d = bp_dst;
+ do {
+ *d++ = bp;
+ } while (--x != 0);
+ bp_dst += columns4x4_;
+ } while (--rows != 0);
+ }
+}
+
+} // namespace libgav1
diff --git a/src/utils/block_parameters_holder.h b/src/utils/block_parameters_holder.h
new file mode 100644
index 0000000..35543c3
--- /dev/null
+++ b/src/utils/block_parameters_holder.h
@@ -0,0 +1,85 @@
+/*
+ * Copyright 2019 The libgav1 Authors
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LIBGAV1_SRC_UTILS_BLOCK_PARAMETERS_HOLDER_H_
+#define LIBGAV1_SRC_UTILS_BLOCK_PARAMETERS_HOLDER_H_
+
+#include <memory>
+
+#include "src/utils/array_2d.h"
+#include "src/utils/compiler_attributes.h"
+#include "src/utils/constants.h"
+#include "src/utils/parameter_tree.h"
+#include "src/utils/types.h"
+
+namespace libgav1 {
+
+// Holds a 2D array of |ParameterTree| objects. Each tree stores the parameters
+// corresponding to a superblock.
+class BlockParametersHolder {
+ public:
+ BlockParametersHolder() = default;
+
+ // Not copyable or movable.
+ BlockParametersHolder(const BlockParametersHolder&) = delete;
+ BlockParametersHolder& operator=(const BlockParametersHolder&) = delete;
+
+ // If |use_128x128_superblock| is true, 128x128 superblocks will be used,
+ // otherwise 64x64 superblocks will be used.
+ LIBGAV1_MUST_USE_RESULT bool Reset(int rows4x4, int columns4x4,
+ bool use_128x128_superblock);
+
+ // Finds the BlockParameters corresponding to |row4x4| and |column4x4|. This
+ // is done as a simple look up of the |block_parameters_cache_| matrix.
+ // Returns nullptr if the BlockParameters cannot be found.
+ BlockParameters* Find(int row4x4, int column4x4) const {
+ return block_parameters_cache_[row4x4][column4x4];
+ }
+
+ BlockParameters** Address(int row4x4, int column4x4) {
+ return block_parameters_cache_.data() + row4x4 * columns4x4_ + column4x4;
+ }
+
+ BlockParameters* const* Address(int row4x4, int column4x4) const {
+ return block_parameters_cache_.data() + row4x4 * columns4x4_ + column4x4;
+ }
+
+ int columns4x4() const { return columns4x4_; }
+
+ // Returns the ParameterTree corresponding to superblock starting at (|row|,
+ // |column|).
+ ParameterTree* Tree(int row, int column) { return trees_[row][column].get(); }
+
+ // Fills the cache matrix for the block starting at |row4x4|, |column4x4| of
+ // size |block_size| with the pointer |bp|.
+ void FillCache(int row4x4, int column4x4, BlockSize block_size,
+ BlockParameters* bp);
+
+ private:
+ int rows4x4_ = 0;
+ int columns4x4_ = 0;
+ bool use_128x128_superblock_ = false;
+ Array2D<std::unique_ptr<ParameterTree>> trees_;
+
+ // This is a 2d array of size |rows4x4_| * |columns4x4_|. This is filled in by
+ // FillCache() and used by Find() to perform look ups using exactly one look
+ // up (instead of traversing the entire tree).
+ Array2D<BlockParameters*> block_parameters_cache_;
+};
+
+} // namespace libgav1
+
+#endif // LIBGAV1_SRC_UTILS_BLOCK_PARAMETERS_HOLDER_H_
diff --git a/src/utils/blocking_counter.h b/src/utils/blocking_counter.h
new file mode 100644
index 0000000..6d664f8
--- /dev/null
+++ b/src/utils/blocking_counter.h
@@ -0,0 +1,97 @@
+/*
+ * Copyright 2019 The libgav1 Authors
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LIBGAV1_SRC_UTILS_BLOCKING_COUNTER_H_
+#define LIBGAV1_SRC_UTILS_BLOCKING_COUNTER_H_
+
+#include <cassert>
+#include <condition_variable> // NOLINT (unapproved c++11 header)
+#include <mutex> // NOLINT (unapproved c++11 header)
+
+#include "src/utils/compiler_attributes.h"
+
+namespace libgav1 {
+
+// Implementation of a Blocking Counter that is used for the "fork-join"
+// use case. Typical usage would be as follows:
+// BlockingCounter counter(num_jobs);
+// - spawn the jobs.
+// - call counter.Wait() on the master thread.
+// - worker threads will call counter.Decrement().
+// - master thread will return from counter.Wait() when all workers are
+// complete.
+template <bool has_failure_status>
+class BlockingCounterImpl {
+ public:
+ explicit BlockingCounterImpl(int initial_count)
+ : count_(initial_count), job_failed_(false) {}
+
+ // Increment the counter by |count|. This must be called before Wait() is
+ // called. This must be called from the same thread that will call Wait().
+ void IncrementBy(int count) {
+ assert(count >= 0);
+ std::unique_lock<std::mutex> lock(mutex_);
+ count_ += count;
+ }
+
+ // Decrement the counter by 1. This function can be called only when
+ // |has_failure_status| is false (i.e.) when this class is being used with the
+ // |BlockingCounter| alias.
+ void Decrement() {
+ static_assert(!has_failure_status, "");
+ std::unique_lock<std::mutex> lock(mutex_);
+ if (--count_ == 0) {
+ condition_.notify_one();
+ }
+ }
+
+ // Decrement the counter by 1. This function can be called only when
+ // |has_failure_status| is true (i.e.) when this class is being used with the
+ // |BlockingCounterWithStatus| alias. |job_succeeded| is used to update the
+ // state of |job_failed_|.
+ void Decrement(bool job_succeeded) {
+ static_assert(has_failure_status, "");
+ std::unique_lock<std::mutex> lock(mutex_);
+ job_failed_ |= !job_succeeded;
+ if (--count_ == 0) {
+ condition_.notify_one();
+ }
+ }
+
+ // Block until the counter becomes 0. This function can be called only once
+ // per object. If |has_failure_status| is true, true is returned if all the
+ // jobs succeeded and false is returned if any of the jobs failed. If
+ // |has_failure_status| is false, this function always returns true.
+ bool Wait() {
+ std::unique_lock<std::mutex> lock(mutex_);
+ condition_.wait(lock, [this]() { return count_ == 0; });
+ // If |has_failure_status| is false, we simply return true.
+ return has_failure_status ? !job_failed_ : true;
+ }
+
+ private:
+ std::mutex mutex_;
+ std::condition_variable condition_;
+ int count_ LIBGAV1_GUARDED_BY(mutex_);
+ bool job_failed_ LIBGAV1_GUARDED_BY(mutex_);
+};
+
+using BlockingCounterWithStatus = BlockingCounterImpl<true>;
+using BlockingCounter = BlockingCounterImpl<false>;
+
+} // namespace libgav1
+
+#endif // LIBGAV1_SRC_UTILS_BLOCKING_COUNTER_H_
diff --git a/src/utils/common.h b/src/utils/common.h
new file mode 100644
index 0000000..ae43c2b
--- /dev/null
+++ b/src/utils/common.h
@@ -0,0 +1,534 @@
+/*
+ * Copyright 2019 The libgav1 Authors
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LIBGAV1_SRC_UTILS_COMMON_H_
+#define LIBGAV1_SRC_UTILS_COMMON_H_
+
+#if defined(_MSC_VER)
+#include <intrin.h>
+#pragma intrinsic(_BitScanForward)
+#pragma intrinsic(_BitScanReverse)
+#if defined(_M_X64) || defined(_M_ARM) || defined(_M_ARM64)
+#pragma intrinsic(_BitScanReverse64)
+#define HAVE_BITSCANREVERSE64
+#endif // defined(_M_X64) || defined(_M_ARM) || defined(_M_ARM64)
+#endif // defined(_MSC_VER)
+
+#include <cassert>
+#include <cstddef>
+#include <cstdint>
+#include <cstdlib>
+#include <cstring>
+#include <type_traits>
+
+#include "src/utils/bit_mask_set.h"
+#include "src/utils/constants.h"
+#include "src/utils/memory.h"
+#include "src/utils/types.h"
+
+namespace libgav1 {
+
+// Aligns |value| to the desired |alignment|. |alignment| must be a power of 2.
+template <typename T>
+inline T Align(T value, T alignment) {
+ assert(alignment != 0);
+ const T alignment_mask = alignment - 1;
+ return (value + alignment_mask) & ~alignment_mask;
+}
+
+// Aligns |addr| to the desired |alignment|. |alignment| must be a power of 2.
+inline uint8_t* AlignAddr(uint8_t* const addr, const uintptr_t alignment) {
+ const auto value = reinterpret_cast<uintptr_t>(addr);
+ return reinterpret_cast<uint8_t*>(Align(value, alignment));
+}
+
+inline int32_t Clip3(int32_t value, int32_t low, int32_t high) {
+ return value < low ? low : (value > high ? high : value);
+}
+
+template <typename Pixel>
+void ExtendLine(void* const line_start, const int width, const int left,
+ const int right) {
+ auto* const start = static_cast<Pixel*>(line_start);
+ const Pixel* src = start;
+ Pixel* dst = start - left;
+ // Copy to left and right borders.
+ Memset(dst, src[0], left);
+ Memset(dst + left + width, src[width - 1], right);
+}
+
+// The following 2 templates set a block of data with uncontiguous memory to
+// |value|. The compilers usually generate several branches to handle different
+// cases of |columns| when inlining memset() and std::fill(), and these branches
+// are unfortunately within the loop of |rows|. So calling these templates
+// directly could be inefficient. It is recommended to specialize common cases
+// of |columns|, such as 1, 2, 4, 8, 16 and 32, etc. in advance before
+// processing the generic case of |columns|. The code size may be larger, but
+// there would be big speed gains.
+// Call template MemSetBlock<> when sizeof(|T|) is 1.
+// Call template SetBlock<> when sizeof(|T|) is larger than 1.
+template <typename T>
+void MemSetBlock(int rows, int columns, T value, T* dst, ptrdiff_t stride) {
+ static_assert(sizeof(T) == 1, "");
+ do {
+ memset(dst, value, columns);
+ dst += stride;
+ } while (--rows != 0);
+}
+
+template <typename T>
+void SetBlock(int rows, int columns, T value, T* dst, ptrdiff_t stride) {
+ do {
+ std::fill(dst, dst + columns, value);
+ dst += stride;
+ } while (--rows != 0);
+}
+
+#if defined(__GNUC__)
+
+inline int CountLeadingZeros(uint32_t n) {
+ assert(n != 0);
+ return __builtin_clz(n);
+}
+
+inline int CountLeadingZeros(uint64_t n) {
+ assert(n != 0);
+ return __builtin_clzll(n);
+}
+
+inline int CountTrailingZeros(uint32_t n) {
+ assert(n != 0);
+ return __builtin_ctz(n);
+}
+
+#elif defined(_MSC_VER)
+
+inline int CountLeadingZeros(uint32_t n) {
+ assert(n != 0);
+ unsigned long first_set_bit; // NOLINT(runtime/int)
+ const unsigned char bit_set = _BitScanReverse(&first_set_bit, n);
+ assert(bit_set != 0);
+ static_cast<void>(bit_set);
+ return 31 ^ static_cast<int>(first_set_bit);
+}
+
+inline int CountLeadingZeros(uint64_t n) {
+ assert(n != 0);
+ unsigned long first_set_bit; // NOLINT(runtime/int)
+#if defined(HAVE_BITSCANREVERSE64)
+ const unsigned char bit_set =
+ _BitScanReverse64(&first_set_bit, static_cast<unsigned __int64>(n));
+#else // !defined(HAVE_BITSCANREVERSE64)
+ const auto n_hi = static_cast<unsigned long>(n >> 32); // NOLINT(runtime/int)
+ if (n_hi != 0) {
+ const unsigned char bit_set = _BitScanReverse(&first_set_bit, n_hi);
+ assert(bit_set != 0);
+ static_cast<void>(bit_set);
+ return 31 ^ static_cast<int>(first_set_bit);
+ }
+ const unsigned char bit_set = _BitScanReverse(
+ &first_set_bit, static_cast<unsigned long>(n)); // NOLINT(runtime/int)
+#endif // defined(HAVE_BITSCANREVERSE64)
+ assert(bit_set != 0);
+ static_cast<void>(bit_set);
+ return 63 ^ static_cast<int>(first_set_bit);
+}
+
+#undef HAVE_BITSCANREVERSE64
+
+inline int CountTrailingZeros(uint32_t n) {
+ assert(n != 0);
+ unsigned long first_set_bit; // NOLINT(runtime/int)
+ const unsigned char bit_set = _BitScanForward(&first_set_bit, n);
+ assert(bit_set != 0);
+ static_cast<void>(bit_set);
+ return static_cast<int>(first_set_bit);
+}
+
+#else // !defined(__GNUC__) && !defined(_MSC_VER)
+
+template <const int kMSB, typename T>
+inline int CountLeadingZeros(T n) {
+ assert(n != 0);
+ const T msb = T{1} << kMSB;
+ int count = 0;
+ while ((n & msb) == 0) {
+ ++count;
+ n <<= 1;
+ }
+ return count;
+}
+
+inline int CountLeadingZeros(uint32_t n) { return CountLeadingZeros<31>(n); }
+
+inline int CountLeadingZeros(uint64_t n) { return CountLeadingZeros<63>(n); }
+
+// This is the algorithm on the left in Figure 5-23, Hacker's Delight, Second
+// Edition, page 109. The book says:
+// If the number of trailing 0's is expected to be small or large, then the
+// simple loops shown in Figure 5-23 are quite fast.
+inline int CountTrailingZeros(uint32_t n) {
+ assert(n != 0);
+ // Create a word with 1's at the positions of the trailing 0's in |n|, and
+ // 0's elsewhere (e.g., 01011000 => 00000111).
+ n = ~n & (n - 1);
+ int count = 0;
+ while (n != 0) {
+ ++count;
+ n >>= 1;
+ }
+ return count;
+}
+
+#endif // defined(__GNUC__)
+
+inline int FloorLog2(int32_t n) {
+ assert(n > 0);
+ return 31 ^ CountLeadingZeros(static_cast<uint32_t>(n));
+}
+
+inline int FloorLog2(uint32_t n) {
+ assert(n > 0);
+ return 31 ^ CountLeadingZeros(n);
+}
+
+inline int FloorLog2(int64_t n) {
+ assert(n > 0);
+ return 63 ^ CountLeadingZeros(static_cast<uint64_t>(n));
+}
+
+inline int FloorLog2(uint64_t n) {
+ assert(n > 0);
+ return 63 ^ CountLeadingZeros(n);
+}
+
+inline int CeilLog2(unsigned int n) {
+ // The expression FloorLog2(n - 1) + 1 is undefined not only for n == 0 but
+ // also for n == 1, so this expression must be guarded by the n < 2 test. An
+ // alternative implementation is:
+ // return (n == 0) ? 0 : FloorLog2(n) + static_cast<int>((n & (n - 1)) != 0);
+ return (n < 2) ? 0 : FloorLog2(n - 1) + 1;
+}
+
+inline int RightShiftWithCeiling(int value, int bits) {
+ assert(bits > 0);
+ return (value + (1 << bits) - 1) >> bits;
+}
+
+inline int32_t RightShiftWithRounding(int32_t value, int bits) {
+ assert(bits >= 0);
+ return (value + ((1 << bits) >> 1)) >> bits;
+}
+
+inline uint32_t RightShiftWithRounding(uint32_t value, int bits) {
+ assert(bits >= 0);
+ return (value + ((1 << bits) >> 1)) >> bits;
+}
+
+// This variant is used when |value| can exceed 32 bits. Although the final
+// result must always fit into int32_t.
+inline int32_t RightShiftWithRounding(int64_t value, int bits) {
+ assert(bits >= 0);
+ return static_cast<int32_t>((value + ((int64_t{1} << bits) >> 1)) >> bits);
+}
+
+inline int32_t RightShiftWithRoundingSigned(int32_t value, int bits) {
+ assert(bits > 0);
+ // The next line is equivalent to:
+ // return (value >= 0) ? RightShiftWithRounding(value, bits)
+ // : -RightShiftWithRounding(-value, bits);
+ return RightShiftWithRounding(value + (value >> 31), bits);
+}
+
+// This variant is used when |value| can exceed 32 bits. Although the final
+// result must always fit into int32_t.
+inline int32_t RightShiftWithRoundingSigned(int64_t value, int bits) {
+ assert(bits > 0);
+ // The next line is equivalent to:
+ // return (value >= 0) ? RightShiftWithRounding(value, bits)
+ // : -RightShiftWithRounding(-value, bits);
+ return RightShiftWithRounding(value + (value >> 63), bits);
+}
+
+constexpr int DivideBy2(int n) { return n >> 1; }
+constexpr int DivideBy4(int n) { return n >> 2; }
+constexpr int DivideBy8(int n) { return n >> 3; }
+constexpr int DivideBy16(int n) { return n >> 4; }
+constexpr int DivideBy32(int n) { return n >> 5; }
+constexpr int DivideBy64(int n) { return n >> 6; }
+constexpr int DivideBy128(int n) { return n >> 7; }
+
+// Convert |value| to unsigned before shifting to avoid undefined behavior with
+// negative values.
+inline int LeftShift(int value, int bits) {
+ assert(bits >= 0);
+ assert(value >= -(int64_t{1} << (31 - bits)));
+ assert(value <= (int64_t{1} << (31 - bits)) - ((bits == 0) ? 1 : 0));
+ return static_cast<int>(static_cast<uint32_t>(value) << bits);
+}
+inline int MultiplyBy2(int n) { return LeftShift(n, 1); }
+inline int MultiplyBy4(int n) { return LeftShift(n, 2); }
+inline int MultiplyBy8(int n) { return LeftShift(n, 3); }
+inline int MultiplyBy16(int n) { return LeftShift(n, 4); }
+inline int MultiplyBy32(int n) { return LeftShift(n, 5); }
+inline int MultiplyBy64(int n) { return LeftShift(n, 6); }
+
+constexpr int Mod32(int n) { return n & 0x1f; }
+constexpr int Mod64(int n) { return n & 0x3f; }
+
+//------------------------------------------------------------------------------
+// Bitstream functions
+
+constexpr bool IsIntraFrame(FrameType type) {
+ return type == kFrameKey || type == kFrameIntraOnly;
+}
+
+inline TransformClass GetTransformClass(TransformType tx_type) {
+ constexpr BitMaskSet kTransformClassVerticalMask(
+ kTransformTypeIdentityDct, kTransformTypeIdentityAdst,
+ kTransformTypeIdentityFlipadst);
+ if (kTransformClassVerticalMask.Contains(tx_type)) {
+ return kTransformClassVertical;
+ }
+ constexpr BitMaskSet kTransformClassHorizontalMask(
+ kTransformTypeDctIdentity, kTransformTypeAdstIdentity,
+ kTransformTypeFlipadstIdentity);
+ if (kTransformClassHorizontalMask.Contains(tx_type)) {
+ return kTransformClassHorizontal;
+ }
+ return kTransformClass2D;
+}
+
+inline int RowOrColumn4x4ToPixel(int row_or_column4x4, Plane plane,
+ int8_t subsampling) {
+ return MultiplyBy4(row_or_column4x4) >> (plane == kPlaneY ? 0 : subsampling);
+}
+
+constexpr PlaneType GetPlaneType(Plane plane) {
+ return static_cast<PlaneType>(plane != kPlaneY);
+}
+
+// 5.11.44.
+constexpr bool IsDirectionalMode(PredictionMode mode) {
+ return mode >= kPredictionModeVertical && mode <= kPredictionModeD67;
+}
+
+// 5.9.3.
+//
+// |a| and |b| are order hints, treated as unsigned order_hint_bits-bit
+// integers. |order_hint_shift_bits| equals (32 - order_hint_bits) % 32.
+// order_hint_bits is at most 8, so |order_hint_shift_bits| is zero or a
+// value between 24 and 31 (inclusive).
+//
+// If |order_hint_shift_bits| is zero, |a| and |b| are both zeros, and the
+// result is zero. If |order_hint_shift_bits| is not zero, returns the
+// signed difference |a| - |b| using "modular arithmetic". More precisely, the
+// signed difference |a| - |b| is treated as a signed order_hint_bits-bit
+// integer and cast to an int. The returned difference is between
+// -(1 << (order_hint_bits - 1)) and (1 << (order_hint_bits - 1)) - 1
+// (inclusive).
+//
+// NOTE: |a| and |b| are the order_hint_bits least significant bits of the
+// actual values. This function returns the signed difference between the
+// actual values. The returned difference is correct as long as the actual
+// values are not more than 1 << (order_hint_bits - 1) - 1 apart.
+//
+// Example: Suppose order_hint_bits is 4 and |order_hint_shift_bits|
+// is 28. Then |a| and |b| are in the range [0, 15], and the actual values for
+// |a| and |b| must not be more than 7 apart. (If the actual values for |a| and
+// |b| are exactly 8 apart, this function cannot tell whether the actual value
+// for |a| is before or after the actual value for |b|.)
+//
+// First, consider the order hints 2 and 6. For this simple case, we have
+// GetRelativeDistance(2, 6, 28) = 2 - 6 = -4, and
+// GetRelativeDistance(6, 2, 28) = 6 - 2 = 4.
+//
+// On the other hand, consider the order hints 2 and 14. The order hints are
+// 12 (> 7) apart, so we need to use the actual values instead. The actual
+// values may be 34 (= 2 mod 16) and 30 (= 14 mod 16), respectively. Therefore
+// we have
+// GetRelativeDistance(2, 14, 28) = 34 - 30 = 4, and
+// GetRelativeDistance(14, 2, 28) = 30 - 34 = -4.
+//
+// The following comments apply only to specific CPUs' SIMD implementations,
+// such as intrinsics code.
+// For the 2 shift operations in this function, if the SIMD packed data is
+// 16-bit wide, try to use |order_hint_shift_bits| - 16 as the number of bits to
+// shift; If the SIMD packed data is 8-bit wide, try to use
+// |order_hint_shift_bits| - 24 as as the number of bits to shift.
+// |order_hint_shift_bits| - 16 and |order_hint_shift_bits| - 24 could be -16 or
+// -24. In these cases diff is 0, and the behavior of left or right shifting -16
+// or -24 bits is defined for x86 SIMD instructions and ARM NEON instructions,
+// and the result of shifting 0 is still 0. There is no guarantee that this
+// behavior and result apply to other CPUs' SIMD instructions.
+inline int GetRelativeDistance(const unsigned int a, const unsigned int b,
+ const unsigned int order_hint_shift_bits) {
+ const int diff = a - b;
+ assert(order_hint_shift_bits <= 31);
+ if (order_hint_shift_bits == 0) {
+ assert(a == 0);
+ assert(b == 0);
+ } else {
+ assert(order_hint_shift_bits >= 24); // i.e., order_hint_bits <= 8
+ assert(a < (1u << (32 - order_hint_shift_bits)));
+ assert(b < (1u << (32 - order_hint_shift_bits)));
+ assert(diff < (1 << (32 - order_hint_shift_bits)));
+ assert(diff >= -(1 << (32 - order_hint_shift_bits)));
+ }
+ // Sign extend the result of subtracting the values.
+ // Cast to unsigned int and then left shift to avoid undefined behavior with
+ // negative values. Cast to int to do the sign extension through right shift.
+ // This requires the right shift of a signed integer be an arithmetic shift,
+ // which is true for clang, gcc, and Visual C++.
+ // These two casts do not generate extra instructions.
+ // Don't use LeftShift(diff) since a valid diff may fail its assertions.
+ // For example, GetRelativeDistance(2, 14, 28), diff equals -12 and is less
+ // than the minimum allowed value of LeftShift() which is -8.
+ // The next 3 lines are equivalent to:
+ // const int order_hint_bits = Mod32(32 - order_hint_shift_bits);
+ // const int m = (1 << order_hint_bits) >> 1;
+ // return (diff & (m - 1)) - (diff & m);
+ return static_cast<int>(static_cast<unsigned int>(diff)
+ << order_hint_shift_bits) >>
+ order_hint_shift_bits;
+}
+
+// Applies |sign| (must be 0 or -1) to |value|, i.e.,
+// return (sign == 0) ? value : -value;
+// and does so without a branch.
+constexpr int ApplySign(int value, int sign) { return (value ^ sign) - sign; }
+
+// 7.9.3. (without the clamp for numerator and denominator).
+inline void GetMvProjection(const MotionVector& mv, int numerator,
+ int division_multiplier,
+ MotionVector* projection_mv) {
+ // Allow numerator and to be 0 so that this function can be called
+ // unconditionally. When numerator is 0, |projection_mv| will be 0, and this
+ // is what we want.
+ assert(std::abs(numerator) <= kMaxFrameDistance);
+ for (int i = 0; i < 2; ++i) {
+ projection_mv->mv[i] =
+ Clip3(RightShiftWithRoundingSigned(
+ mv.mv[i] * numerator * division_multiplier, 14),
+ -kProjectionMvClamp, kProjectionMvClamp);
+ }
+}
+
+// 7.9.4.
+constexpr int Project(int value, int delta, int dst_sign) {
+ return value + ApplySign(delta / 64, dst_sign);
+}
+
+inline bool IsBlockSmallerThan8x8(BlockSize size) {
+ return size < kBlock8x8 && size != kBlock4x16;
+}
+
+// Returns true if the either the width or the height of the block is equal to
+// four.
+inline bool IsBlockDimension4(BlockSize size) {
+ return size < kBlock8x8 || size == kBlock16x4;
+}
+
+// Converts bitdepth 8, 10, and 12 to array index 0, 1, and 2, respectively.
+constexpr int BitdepthToArrayIndex(int bitdepth) { return (bitdepth - 8) >> 1; }
+
+// Maps a square transform to an index between [0, 4]. kTransformSize4x4 maps
+// to 0, kTransformSize8x8 maps to 1 and so on.
+inline int TransformSizeToSquareTransformIndex(TransformSize tx_size) {
+ assert(kTransformWidth[tx_size] == kTransformHeight[tx_size]);
+
+ // The values of the square transform sizes happen to be in the right
+ // ranges, so we can just divide them by 4 to get the indexes.
+ static_assert(
+ std::is_unsigned<std::underlying_type<TransformSize>::type>::value, "");
+ static_assert(kTransformSize4x4 < 4, "");
+ static_assert(4 <= kTransformSize8x8 && kTransformSize8x8 < 8, "");
+ static_assert(8 <= kTransformSize16x16 && kTransformSize16x16 < 12, "");
+ static_assert(12 <= kTransformSize32x32 && kTransformSize32x32 < 16, "");
+ static_assert(16 <= kTransformSize64x64 && kTransformSize64x64 < 20, "");
+ return DivideBy4(tx_size);
+}
+
+// Gets the corresponding Y/U/V position, to set and get filter masks
+// in deblock filtering.
+// Returns luma_position if it's Y plane, whose subsampling must be 0.
+// Returns the odd position for U/V plane, if there is subsampling.
+constexpr int GetDeblockPosition(const int luma_position,
+ const int subsampling) {
+ return luma_position | subsampling;
+}
+
+// Returns the size of the residual buffer required to hold the residual values
+// for a block or frame of size |rows| by |columns| (taking into account
+// |subsampling_x|, |subsampling_y| and |residual_size|). |residual_size| is the
+// number of bytes required to represent one residual value.
+inline size_t GetResidualBufferSize(const int rows, const int columns,
+ const int subsampling_x,
+ const int subsampling_y,
+ const size_t residual_size) {
+ // The subsampling multipliers are:
+ // Both x and y are subsampled: 3 / 2.
+ // Only x or y is subsampled: 2 / 1 (which is equivalent to 4 / 2).
+ // Both x and y are not subsampled: 3 / 1 (which is equivalent to 6 / 2).
+ // So we compute the final subsampling multiplier as follows:
+ // multiplier = (2 + (4 >> subsampling_x >> subsampling_y)) / 2.
+ // Add 32 * |kResidualPaddingVertical| padding to avoid bottom boundary checks
+ // when parsing quantized coefficients.
+ const int subsampling_multiplier_num =
+ 2 + (4 >> subsampling_x >> subsampling_y);
+ const int number_elements =
+ (rows * columns * subsampling_multiplier_num) >> 1;
+ const int tx_padding = 32 * kResidualPaddingVertical;
+ return residual_size * (number_elements + tx_padding);
+}
+
+// This function is equivalent to:
+// std::min({kTransformWidthLog2[tx_size] - 2,
+// kTransformWidthLog2[left_tx_size] - 2,
+// 2});
+constexpr LoopFilterTransformSizeId GetTransformSizeIdWidth(
+ TransformSize tx_size, TransformSize left_tx_size) {
+ return static_cast<LoopFilterTransformSizeId>(
+ static_cast<int>(tx_size > kTransformSize4x16 &&
+ left_tx_size > kTransformSize4x16) +
+ static_cast<int>(tx_size > kTransformSize8x32 &&
+ left_tx_size > kTransformSize8x32));
+}
+
+// This is used for 7.11.3.4 Block Inter Prediction Process, to select convolve
+// filters.
+inline int GetFilterIndex(const int filter_index, const int length) {
+ if (length <= 4) {
+ if (filter_index == kInterpolationFilterEightTap ||
+ filter_index == kInterpolationFilterEightTapSharp) {
+ return 4;
+ }
+ if (filter_index == kInterpolationFilterEightTapSmooth) {
+ return 5;
+ }
+ }
+ return filter_index;
+}
+
+// This has identical results as RightShiftWithRounding since |subsampling| can
+// only be 0 or 1.
+constexpr int SubsampledValue(int value, int subsampling) {
+ return (value + subsampling) >> subsampling;
+}
+
+} // namespace libgav1
+
+#endif // LIBGAV1_SRC_UTILS_COMMON_H_
diff --git a/src/utils/compiler_attributes.h b/src/utils/compiler_attributes.h
new file mode 100644
index 0000000..e122426
--- /dev/null
+++ b/src/utils/compiler_attributes.h
@@ -0,0 +1,181 @@
+/*
+ * Copyright 2019 The libgav1 Authors
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LIBGAV1_SRC_UTILS_COMPILER_ATTRIBUTES_H_
+#define LIBGAV1_SRC_UTILS_COMPILER_ATTRIBUTES_H_
+
+// A collection of compiler attribute checks and defines to control for
+// compatibility across toolchains.
+
+//------------------------------------------------------------------------------
+// Language version, attribute and feature helpers.
+
+// Detect c++17 support. Visual Studio sets __cplusplus to 199711L by default
+// unless compiled with /Zc:__cplusplus, use the value controlled by /std
+// instead.
+// https://docs.microsoft.com/en-us/cpp/build/reference/zc-cplusplus
+#if __cplusplus >= 201703L || (defined(_MSVC_LANG) && _MSVC_LANG >= 201703L)
+#define LIBGAV1_CXX17 1
+#else
+#define LIBGAV1_CXX17 0
+#endif
+
+#if defined(__has_attribute)
+#define LIBGAV1_HAS_ATTRIBUTE __has_attribute
+#else
+#define LIBGAV1_HAS_ATTRIBUTE(x) 0
+#endif
+
+#if defined(__has_feature)
+#define LIBGAV1_HAS_FEATURE __has_feature
+#else
+#define LIBGAV1_HAS_FEATURE(x) 0
+#endif
+
+//------------------------------------------------------------------------------
+// Sanitizer attributes.
+
+#if LIBGAV1_HAS_FEATURE(address_sanitizer) || defined(__SANITIZE_ADDRESS__)
+#define LIBGAV1_ASAN 1
+#else
+#define LIBGAV1_ASAN 0
+#endif
+
+#if LIBGAV1_HAS_FEATURE(memory_sanitizer)
+#define LIBGAV1_MSAN 1
+#else
+#define LIBGAV1_MSAN 0
+#endif
+
+#if LIBGAV1_HAS_FEATURE(thread_sanitizer) || defined(__SANITIZE_THREAD__)
+#define LIBGAV1_TSAN 1
+#else
+#define LIBGAV1_TSAN 0
+#endif
+
+//------------------------------------------------------------------------------
+// AddressSanitizer support.
+
+// Define the macros for AddressSanitizer manual memory poisoning. See
+// https://github.com/google/sanitizers/wiki/AddressSanitizerManualPoisoning.
+#if LIBGAV1_ASAN
+#include <sanitizer/asan_interface.h>
+#else
+#define ASAN_POISON_MEMORY_REGION(addr, size) \
+ (static_cast<void>(addr), static_cast<void>(size))
+#define ASAN_UNPOISON_MEMORY_REGION(addr, size) \
+ (static_cast<void>(addr), static_cast<void>(size))
+#endif
+
+//------------------------------------------------------------------------------
+// Function attributes.
+// GCC: https://gcc.gnu.org/onlinedocs/gcc/Function-Attributes.html
+// Clang: https://clang.llvm.org/docs/AttributeReference.html
+
+#if defined(__GNUC__)
+#define LIBGAV1_ALWAYS_INLINE __attribute__((always_inline)) inline
+#elif defined(_MSC_VER)
+#define LIBGAV1_ALWAYS_INLINE __forceinline
+#else
+#define LIBGAV1_ALWAYS_INLINE inline
+#endif
+
+// LIBGAV1_MUST_USE_RESULT
+//
+// Tells the compiler to warn about unused results.
+//
+// When annotating a function, it must appear as the first part of the
+// declaration or definition. The compiler will warn if the return value from
+// such a function is unused:
+//
+// LIBGAV1_MUST_USE_RESULT Sprocket* AllocateSprocket();
+// AllocateSprocket(); // Triggers a warning.
+//
+// When annotating a class, it is equivalent to annotating every function which
+// returns an instance.
+//
+// class LIBGAV1_MUST_USE_RESULT Sprocket {};
+// Sprocket(); // Triggers a warning.
+//
+// Sprocket MakeSprocket();
+// MakeSprocket(); // Triggers a warning.
+//
+// Note that references and pointers are not instances:
+//
+// Sprocket* SprocketPointer();
+// SprocketPointer(); // Does *not* trigger a warning.
+//
+// LIBGAV1_MUST_USE_RESULT allows using cast-to-void to suppress the unused
+// result warning. For that, warn_unused_result is used only for clang but not
+// for gcc. https://gcc.gnu.org/bugzilla/show_bug.cgi?id=66425
+#if LIBGAV1_HAS_ATTRIBUTE(nodiscard)
+#define LIBGAV1_MUST_USE_RESULT [[nodiscard]]
+#elif defined(__clang__) && LIBGAV1_HAS_ATTRIBUTE(warn_unused_result)
+#define LIBGAV1_MUST_USE_RESULT __attribute__((warn_unused_result))
+#else
+#define LIBGAV1_MUST_USE_RESULT
+#endif
+
+// LIBGAV1_PRINTF_ATTRIBUTE
+//
+// Tells the compiler to perform `printf` format string checking if the
+// compiler supports it; see the 'format' attribute in
+// <https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html>.
+//
+// Note: As the GCC manual states, "[s]ince non-static C++ methods
+// have an implicit 'this' argument, the arguments of such methods
+// should be counted from two, not one."
+#if LIBGAV1_HAS_ATTRIBUTE(format) || (defined(__GNUC__) && !defined(__clang__))
+#define LIBGAV1_PRINTF_ATTRIBUTE(string_index, first_to_check) \
+ __attribute__((__format__(__printf__, string_index, first_to_check)))
+#else
+#define LIBGAV1_PRINTF_ATTRIBUTE(string_index, first_to_check)
+#endif
+
+//------------------------------------------------------------------------------
+// Thread annotations.
+
+// LIBGAV1_GUARDED_BY()
+//
+// Documents if a shared field or global variable needs to be protected by a
+// mutex. LIBGAV1_GUARDED_BY() allows the user to specify a particular mutex
+// that should be held when accessing the annotated variable.
+//
+// Although this annotation cannot be applied to local variables, a local
+// variable and its associated mutex can often be combined into a small class
+// or struct, thereby allowing the annotation.
+//
+// Example:
+//
+// class Foo {
+// Mutex mu_;
+// int p1_ LIBGAV1_GUARDED_BY(mu_);
+// ...
+// };
+// TODO(b/132506370): this can be reenabled after a local MutexLock
+// implementation is added with proper thread annotations.
+#if 0 // LIBGAV1_HAS_ATTRIBUTE(guarded_by)
+#define LIBGAV1_GUARDED_BY(x) __attribute__((guarded_by(x)))
+#else
+#define LIBGAV1_GUARDED_BY(x)
+#endif
+
+//------------------------------------------------------------------------------
+
+#undef LIBGAV1_HAS_ATTRIBUTE
+#undef LIBGAV1_HAS_FEATURE
+
+#endif // LIBGAV1_SRC_UTILS_COMPILER_ATTRIBUTES_H_
diff --git a/src/utils/constants.cc b/src/utils/constants.cc
new file mode 100644
index 0000000..80d7acb
--- /dev/null
+++ b/src/utils/constants.cc
@@ -0,0 +1,874 @@
+// Copyright 2019 The libgav1 Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "src/utils/constants.h"
+
+namespace libgav1 {
+
+const uint8_t k4x4WidthLog2[kMaxBlockSizes] = {0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2,
+ 2, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5};
+
+const uint8_t k4x4HeightLog2[kMaxBlockSizes] = {
+ 0, 1, 2, 0, 1, 2, 3, 0, 1, 2, 3, 4, 1, 2, 3, 4, 2, 3, 4, 5, 4, 5};
+
+const uint8_t kNum4x4BlocksWide[kMaxBlockSizes] = {
+ 1, 1, 1, 2, 2, 2, 2, 4, 4, 4, 4, 4, 8, 8, 8, 8, 16, 16, 16, 16, 32, 32};
+
+const uint8_t kNum4x4BlocksHigh[kMaxBlockSizes] = {
+ 1, 2, 4, 1, 2, 4, 8, 1, 2, 4, 8, 16, 2, 4, 8, 16, 4, 8, 16, 32, 16, 32};
+
+const uint8_t kBlockWidthPixels[kMaxBlockSizes] = {
+ 4, 4, 4, 8, 8, 8, 8, 16, 16, 16, 16,
+ 16, 32, 32, 32, 32, 64, 64, 64, 64, 128, 128};
+
+const uint8_t kBlockHeightPixels[kMaxBlockSizes] = {
+ 4, 8, 16, 4, 8, 16, 32, 4, 8, 16, 32,
+ 64, 8, 16, 32, 64, 16, 32, 64, 128, 64, 128};
+
+// 9.3 -- Partition_Subsize[]
+const BlockSize kSubSize[kMaxPartitionTypes][kMaxBlockSizes] = {
+ // kPartitionNone
+ {kBlock4x4, kBlockInvalid, kBlockInvalid, kBlockInvalid, kBlock8x8,
+ kBlockInvalid, kBlockInvalid, kBlockInvalid, kBlockInvalid, kBlock16x16,
+ kBlockInvalid, kBlockInvalid, kBlockInvalid, kBlockInvalid, kBlock32x32,
+ kBlockInvalid, kBlockInvalid, kBlockInvalid, kBlock64x64, kBlockInvalid,
+ kBlockInvalid, kBlock128x128},
+ // kPartitionHorizontal
+ {kBlockInvalid, kBlockInvalid, kBlockInvalid, kBlockInvalid, kBlock8x4,
+ kBlockInvalid, kBlockInvalid, kBlockInvalid, kBlockInvalid, kBlock16x8,
+ kBlockInvalid, kBlockInvalid, kBlockInvalid, kBlockInvalid, kBlock32x16,
+ kBlockInvalid, kBlockInvalid, kBlockInvalid, kBlock64x32, kBlockInvalid,
+ kBlockInvalid, kBlock128x64},
+ // kPartitionVertical
+ {kBlockInvalid, kBlockInvalid, kBlockInvalid, kBlockInvalid, kBlock4x8,
+ kBlockInvalid, kBlockInvalid, kBlockInvalid, kBlockInvalid, kBlock8x16,
+ kBlockInvalid, kBlockInvalid, kBlockInvalid, kBlockInvalid, kBlock16x32,
+ kBlockInvalid, kBlockInvalid, kBlockInvalid, kBlock32x64, kBlockInvalid,
+ kBlockInvalid, kBlock64x128},
+ // kPartitionSplit
+ {kBlockInvalid, kBlockInvalid, kBlockInvalid, kBlockInvalid, kBlock4x4,
+ kBlockInvalid, kBlockInvalid, kBlockInvalid, kBlockInvalid, kBlock8x8,
+ kBlockInvalid, kBlockInvalid, kBlockInvalid, kBlockInvalid, kBlock16x16,
+ kBlockInvalid, kBlockInvalid, kBlockInvalid, kBlock32x32, kBlockInvalid,
+ kBlockInvalid, kBlock64x64},
+ // kPartitionHorizontalWithTopSplit
+ {kBlockInvalid, kBlockInvalid, kBlockInvalid, kBlockInvalid, kBlock8x4,
+ kBlockInvalid, kBlockInvalid, kBlockInvalid, kBlockInvalid, kBlock16x8,
+ kBlockInvalid, kBlockInvalid, kBlockInvalid, kBlockInvalid, kBlock32x16,
+ kBlockInvalid, kBlockInvalid, kBlockInvalid, kBlock64x32, kBlockInvalid,
+ kBlockInvalid, kBlock128x64},
+ // kPartitionHorizontalWithBottomSplit
+ {kBlockInvalid, kBlockInvalid, kBlockInvalid, kBlockInvalid, kBlock8x4,
+ kBlockInvalid, kBlockInvalid, kBlockInvalid, kBlockInvalid, kBlock16x8,
+ kBlockInvalid, kBlockInvalid, kBlockInvalid, kBlockInvalid, kBlock32x16,
+ kBlockInvalid, kBlockInvalid, kBlockInvalid, kBlock64x32, kBlockInvalid,
+ kBlockInvalid, kBlock128x64},
+ // kPartitionVerticalWithLeftSplit
+ {kBlockInvalid, kBlockInvalid, kBlockInvalid, kBlockInvalid, kBlock4x8,
+ kBlockInvalid, kBlockInvalid, kBlockInvalid, kBlockInvalid, kBlock8x16,
+ kBlockInvalid, kBlockInvalid, kBlockInvalid, kBlockInvalid, kBlock16x32,
+ kBlockInvalid, kBlockInvalid, kBlockInvalid, kBlock32x64, kBlockInvalid,
+ kBlockInvalid, kBlock64x128},
+ // kPartitionVerticalWithRightSplit
+ {kBlockInvalid, kBlockInvalid, kBlockInvalid, kBlockInvalid, kBlock4x8,
+ kBlockInvalid, kBlockInvalid, kBlockInvalid, kBlockInvalid, kBlock8x16,
+ kBlockInvalid, kBlockInvalid, kBlockInvalid, kBlockInvalid, kBlock16x32,
+ kBlockInvalid, kBlockInvalid, kBlockInvalid, kBlock32x64, kBlockInvalid,
+ kBlockInvalid, kBlock64x128},
+ // kPartitionHorizontal4
+ {kBlockInvalid, kBlockInvalid, kBlockInvalid, kBlockInvalid, kBlockInvalid,
+ kBlockInvalid, kBlockInvalid, kBlockInvalid, kBlockInvalid, kBlock16x4,
+ kBlockInvalid, kBlockInvalid, kBlockInvalid, kBlockInvalid, kBlock32x8,
+ kBlockInvalid, kBlockInvalid, kBlockInvalid, kBlock64x16, kBlockInvalid,
+ kBlockInvalid, kBlockInvalid},
+ // kPartitionVertical4
+ {kBlockInvalid, kBlockInvalid, kBlockInvalid, kBlockInvalid, kBlockInvalid,
+ kBlockInvalid, kBlockInvalid, kBlockInvalid, kBlockInvalid, kBlock4x16,
+ kBlockInvalid, kBlockInvalid, kBlockInvalid, kBlockInvalid, kBlock8x32,
+ kBlockInvalid, kBlockInvalid, kBlockInvalid, kBlock16x64, kBlockInvalid,
+ kBlockInvalid, kBlockInvalid}};
+
+// 5.11.38 (implemented as a simple look up. first dimension is block size,
+// second and third are subsampling_x and subsampling_y).
+const BlockSize kPlaneResidualSize[kMaxBlockSizes][2][2] = {
+ {{kBlock4x4, kBlock4x4}, {kBlock4x4, kBlock4x4}},
+ {{kBlock4x8, kBlock4x4}, {kBlockInvalid, kBlock4x4}},
+ {{kBlock4x16, kBlock4x8}, {kBlockInvalid, kBlock4x8}},
+ {{kBlock8x4, kBlockInvalid}, {kBlock4x4, kBlock4x4}},
+ {{kBlock8x8, kBlock8x4}, {kBlock4x8, kBlock4x4}},
+ {{kBlock8x16, kBlock8x8}, {kBlockInvalid, kBlock4x8}},
+ {{kBlock8x32, kBlock8x16}, {kBlockInvalid, kBlock4x16}},
+ {{kBlock16x4, kBlockInvalid}, {kBlock8x4, kBlock8x4}},
+ {{kBlock16x8, kBlockInvalid}, {kBlock8x8, kBlock8x4}},
+ {{kBlock16x16, kBlock16x8}, {kBlock8x16, kBlock8x8}},
+ {{kBlock16x32, kBlock16x16}, {kBlockInvalid, kBlock8x16}},
+ {{kBlock16x64, kBlock16x32}, {kBlockInvalid, kBlock8x32}},
+ {{kBlock32x8, kBlockInvalid}, {kBlock16x8, kBlock16x4}},
+ {{kBlock32x16, kBlockInvalid}, {kBlock16x16, kBlock16x8}},
+ {{kBlock32x32, kBlock32x16}, {kBlock16x32, kBlock16x16}},
+ {{kBlock32x64, kBlock32x32}, {kBlockInvalid, kBlock16x32}},
+ {{kBlock64x16, kBlockInvalid}, {kBlock32x16, kBlock32x8}},
+ {{kBlock64x32, kBlockInvalid}, {kBlock32x32, kBlock32x16}},
+ {{kBlock64x64, kBlock64x32}, {kBlock32x64, kBlock32x32}},
+ {{kBlock64x128, kBlock64x64}, {kBlockInvalid, kBlock32x64}},
+ {{kBlock128x64, kBlockInvalid}, {kBlock64x64, kBlock64x32}},
+ {{kBlock128x128, kBlock128x64}, {kBlock64x128, kBlock64x64}}};
+
+const int16_t kProjectionMvDivisionLookup[kMaxFrameDistance + 1] = {
+ 0, 16384, 8192, 5461, 4096, 3276, 2730, 2340, 2048, 1820, 1638,
+ 1489, 1365, 1260, 1170, 1092, 1024, 963, 910, 862, 819, 780,
+ 744, 712, 682, 655, 630, 606, 585, 564, 546, 528};
+
+const uint8_t kTransformWidth[kNumTransformSizes] = {
+ 4, 4, 4, 8, 8, 8, 8, 16, 16, 16, 16, 16, 32, 32, 32, 32, 64, 64, 64};
+
+const uint8_t kTransformHeight[kNumTransformSizes] = {
+ 4, 8, 16, 4, 8, 16, 32, 4, 8, 16, 32, 64, 8, 16, 32, 64, 16, 32, 64};
+
+const uint8_t kTransformWidth4x4[kNumTransformSizes] = {
+ 1, 1, 1, 2, 2, 2, 2, 4, 4, 4, 4, 4, 8, 8, 8, 8, 16, 16, 16};
+
+const uint8_t kTransformHeight4x4[kNumTransformSizes] = {
+ 1, 2, 4, 1, 2, 4, 8, 1, 2, 4, 8, 16, 2, 4, 8, 16, 4, 8, 16};
+
+const uint8_t kTransformWidthLog2[kNumTransformSizes] = {
+ 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 6};
+
+const uint8_t kTransformHeightLog2[kNumTransformSizes] = {
+ 2, 3, 4, 2, 3, 4, 5, 2, 3, 4, 5, 6, 3, 4, 5, 6, 4, 5, 6};
+
+// 9.3 -- Split_Tx_Size[]
+const TransformSize kSplitTransformSize[kNumTransformSizes] = {
+ kTransformSize4x4, kTransformSize4x4, kTransformSize4x8,
+ kTransformSize4x4, kTransformSize4x4, kTransformSize8x8,
+ kTransformSize8x16, kTransformSize8x4, kTransformSize8x8,
+ kTransformSize8x8, kTransformSize16x16, kTransformSize16x32,
+ kTransformSize16x8, kTransformSize16x16, kTransformSize16x16,
+ kTransformSize32x32, kTransformSize32x16, kTransformSize32x32,
+ kTransformSize32x32};
+
+// Square transform of size min(w,h).
+const TransformSize kTransformSizeSquareMin[kNumTransformSizes] = {
+ kTransformSize4x4, kTransformSize4x4, kTransformSize4x4,
+ kTransformSize4x4, kTransformSize8x8, kTransformSize8x8,
+ kTransformSize8x8, kTransformSize4x4, kTransformSize8x8,
+ kTransformSize16x16, kTransformSize16x16, kTransformSize16x16,
+ kTransformSize8x8, kTransformSize16x16, kTransformSize32x32,
+ kTransformSize32x32, kTransformSize16x16, kTransformSize32x32,
+ kTransformSize64x64};
+
+// Square transform of size max(w,h).
+const TransformSize kTransformSizeSquareMax[kNumTransformSizes] = {
+ kTransformSize4x4, kTransformSize8x8, kTransformSize16x16,
+ kTransformSize8x8, kTransformSize8x8, kTransformSize16x16,
+ kTransformSize32x32, kTransformSize16x16, kTransformSize16x16,
+ kTransformSize16x16, kTransformSize32x32, kTransformSize64x64,
+ kTransformSize32x32, kTransformSize32x32, kTransformSize32x32,
+ kTransformSize64x64, kTransformSize64x64, kTransformSize64x64,
+ kTransformSize64x64};
+
+const uint8_t kNumTransformTypesInSet[kNumTransformSets] = {1, 7, 5, 16, 12, 2};
+
+const uint8_t kSgrProjParams[1 << kSgrProjParamsBits][4] = {
+ {2, 12, 1, 4}, {2, 15, 1, 6}, {2, 18, 1, 8}, {2, 21, 1, 9},
+ {2, 24, 1, 10}, {2, 29, 1, 11}, {2, 36, 1, 12}, {2, 45, 1, 13},
+ {2, 56, 1, 14}, {2, 68, 1, 15}, {0, 0, 1, 5}, {0, 0, 1, 8},
+ {0, 0, 1, 11}, {0, 0, 1, 14}, {2, 30, 0, 0}, {2, 75, 0, 0}};
+
+const int8_t kSgrProjMultiplierMin[2] = {-96, -32};
+
+const int8_t kSgrProjMultiplierMax[2] = {31, 95};
+
+const int8_t kWienerTapsMin[3] = {-5, -23, -17};
+
+const int8_t kWienerTapsMax[3] = {10, 8, 46};
+
+// This was modified from Upscale_Filter as defined in AV1 Section 7.16, in
+// order to support 16-bit packed NEON operations.
+// The sign of each tap is: - + - + + - + -
+alignas(16) const uint8_t
+ kUpscaleFilterUnsigned[kSuperResFilterShifts][kSuperResFilterTaps] = {
+ {0, 0, 0, 128, 0, 0, 0, 0}, {0, 0, 1, 128, 2, 1, 0, 0},
+ {0, 1, 3, 127, 4, 2, 1, 0}, {0, 1, 4, 127, 6, 3, 1, 0},
+ {0, 2, 6, 126, 8, 3, 1, 0}, {0, 2, 7, 125, 11, 4, 1, 0},
+ {1, 2, 8, 125, 13, 5, 2, 0}, {1, 3, 9, 124, 15, 6, 2, 0},
+ {1, 3, 10, 123, 18, 6, 2, 1}, {1, 3, 11, 122, 20, 7, 3, 1},
+ {1, 4, 12, 121, 22, 8, 3, 1}, {1, 4, 13, 120, 25, 9, 3, 1},
+ {1, 4, 14, 118, 28, 9, 3, 1}, {1, 4, 15, 117, 30, 10, 4, 1},
+ {1, 5, 16, 116, 32, 11, 4, 1}, {1, 5, 16, 114, 35, 12, 4, 1},
+ {1, 5, 17, 112, 38, 12, 4, 1}, {1, 5, 18, 111, 40, 13, 5, 1},
+ {1, 5, 18, 109, 43, 14, 5, 1}, {1, 6, 19, 107, 45, 14, 5, 1},
+ {1, 6, 19, 105, 48, 15, 5, 1}, {1, 6, 19, 103, 51, 16, 5, 1},
+ {1, 6, 20, 101, 53, 16, 6, 1}, {1, 6, 20, 99, 56, 17, 6, 1},
+ {1, 6, 20, 97, 58, 17, 6, 1}, {1, 6, 20, 95, 61, 18, 6, 1},
+ {2, 7, 20, 93, 64, 18, 6, 2}, {2, 7, 20, 91, 66, 19, 6, 1},
+ {2, 7, 20, 88, 69, 19, 6, 1}, {2, 7, 20, 86, 71, 19, 6, 1},
+ {2, 7, 20, 84, 74, 20, 7, 2}, {2, 7, 20, 81, 76, 20, 7, 1},
+ {2, 7, 20, 79, 79, 20, 7, 2}, {1, 7, 20, 76, 81, 20, 7, 2},
+ {2, 7, 20, 74, 84, 20, 7, 2}, {1, 6, 19, 71, 86, 20, 7, 2},
+ {1, 6, 19, 69, 88, 20, 7, 2}, {1, 6, 19, 66, 91, 20, 7, 2},
+ {2, 6, 18, 64, 93, 20, 7, 2}, {1, 6, 18, 61, 95, 20, 6, 1},
+ {1, 6, 17, 58, 97, 20, 6, 1}, {1, 6, 17, 56, 99, 20, 6, 1},
+ {1, 6, 16, 53, 101, 20, 6, 1}, {1, 5, 16, 51, 103, 19, 6, 1},
+ {1, 5, 15, 48, 105, 19, 6, 1}, {1, 5, 14, 45, 107, 19, 6, 1},
+ {1, 5, 14, 43, 109, 18, 5, 1}, {1, 5, 13, 40, 111, 18, 5, 1},
+ {1, 4, 12, 38, 112, 17, 5, 1}, {1, 4, 12, 35, 114, 16, 5, 1},
+ {1, 4, 11, 32, 116, 16, 5, 1}, {1, 4, 10, 30, 117, 15, 4, 1},
+ {1, 3, 9, 28, 118, 14, 4, 1}, {1, 3, 9, 25, 120, 13, 4, 1},
+ {1, 3, 8, 22, 121, 12, 4, 1}, {1, 3, 7, 20, 122, 11, 3, 1},
+ {1, 2, 6, 18, 123, 10, 3, 1}, {0, 2, 6, 15, 124, 9, 3, 1},
+ {0, 2, 5, 13, 125, 8, 2, 1}, {0, 1, 4, 11, 125, 7, 2, 0},
+ {0, 1, 3, 8, 126, 6, 2, 0}, {0, 1, 3, 6, 127, 4, 1, 0},
+ {0, 1, 2, 4, 127, 3, 1, 0}, {0, 0, 1, 2, 128, 1, 0, 0},
+};
+
+alignas(8) const int8_t
+ kWarpedFilters8[3 * kWarpedPixelPrecisionShifts + 1][8] = {
+ // [-1, 0).
+ {0, 0, 127, 1, 0, 0, 0, 0},
+ {0, -1, 127, 2, 0, 0, 0, 0},
+ {1, -3, 127, 4, -1, 0, 0, 0},
+ {1, -4, 126, 6, -2, 1, 0, 0},
+ {1, -5, 126, 8, -3, 1, 0, 0},
+ {1, -6, 125, 11, -4, 1, 0, 0},
+ {1, -7, 124, 13, -4, 1, 0, 0},
+ {2, -8, 123, 15, -5, 1, 0, 0},
+ {2, -9, 122, 18, -6, 1, 0, 0},
+ {2, -10, 121, 20, -6, 1, 0, 0},
+ {2, -11, 120, 22, -7, 2, 0, 0},
+ {2, -12, 119, 25, -8, 2, 0, 0},
+ {3, -13, 117, 27, -8, 2, 0, 0},
+ {3, -13, 116, 29, -9, 2, 0, 0},
+ {3, -14, 114, 32, -10, 3, 0, 0},
+ {3, -15, 113, 35, -10, 2, 0, 0},
+ {3, -15, 111, 37, -11, 3, 0, 0},
+ {3, -16, 109, 40, -11, 3, 0, 0},
+ {3, -16, 108, 42, -12, 3, 0, 0},
+ {4, -17, 106, 45, -13, 3, 0, 0},
+ {4, -17, 104, 47, -13, 3, 0, 0},
+ {4, -17, 102, 50, -14, 3, 0, 0},
+ {4, -17, 100, 52, -14, 3, 0, 0},
+ {4, -18, 98, 55, -15, 4, 0, 0},
+ {4, -18, 96, 58, -15, 3, 0, 0},
+ {4, -18, 94, 60, -16, 4, 0, 0},
+ {4, -18, 91, 63, -16, 4, 0, 0},
+ {4, -18, 89, 65, -16, 4, 0, 0},
+ {4, -18, 87, 68, -17, 4, 0, 0},
+ {4, -18, 85, 70, -17, 4, 0, 0},
+ {4, -18, 82, 73, -17, 4, 0, 0},
+ {4, -18, 80, 75, -17, 4, 0, 0},
+ {4, -18, 78, 78, -18, 4, 0, 0},
+ {4, -17, 75, 80, -18, 4, 0, 0},
+ {4, -17, 73, 82, -18, 4, 0, 0},
+ {4, -17, 70, 85, -18, 4, 0, 0},
+ {4, -17, 68, 87, -18, 4, 0, 0},
+ {4, -16, 65, 89, -18, 4, 0, 0},
+ {4, -16, 63, 91, -18, 4, 0, 0},
+ {4, -16, 60, 94, -18, 4, 0, 0},
+ {3, -15, 58, 96, -18, 4, 0, 0},
+ {4, -15, 55, 98, -18, 4, 0, 0},
+ {3, -14, 52, 100, -17, 4, 0, 0},
+ {3, -14, 50, 102, -17, 4, 0, 0},
+ {3, -13, 47, 104, -17, 4, 0, 0},
+ {3, -13, 45, 106, -17, 4, 0, 0},
+ {3, -12, 42, 108, -16, 3, 0, 0},
+ {3, -11, 40, 109, -16, 3, 0, 0},
+ {3, -11, 37, 111, -15, 3, 0, 0},
+ {2, -10, 35, 113, -15, 3, 0, 0},
+ {3, -10, 32, 114, -14, 3, 0, 0},
+ {2, -9, 29, 116, -13, 3, 0, 0},
+ {2, -8, 27, 117, -13, 3, 0, 0},
+ {2, -8, 25, 119, -12, 2, 0, 0},
+ {2, -7, 22, 120, -11, 2, 0, 0},
+ {1, -6, 20, 121, -10, 2, 0, 0},
+ {1, -6, 18, 122, -9, 2, 0, 0},
+ {1, -5, 15, 123, -8, 2, 0, 0},
+ {1, -4, 13, 124, -7, 1, 0, 0},
+ {1, -4, 11, 125, -6, 1, 0, 0},
+ {1, -3, 8, 126, -5, 1, 0, 0},
+ {1, -2, 6, 126, -4, 1, 0, 0},
+ {0, -1, 4, 127, -3, 1, 0, 0},
+ {0, 0, 2, 127, -1, 0, 0, 0},
+ // [0, 1).
+ {0, 0, 0, 127, 1, 0, 0, 0},
+ {0, 0, -1, 127, 2, 0, 0, 0},
+ {0, 1, -3, 127, 4, -2, 1, 0},
+ {0, 1, -5, 127, 6, -2, 1, 0},
+ {0, 2, -6, 126, 8, -3, 1, 0},
+ {-1, 2, -7, 126, 11, -4, 2, -1},
+ {-1, 3, -8, 125, 13, -5, 2, -1},
+ {-1, 3, -10, 124, 16, -6, 3, -1},
+ {-1, 4, -11, 123, 18, -7, 3, -1},
+ {-1, 4, -12, 122, 20, -7, 3, -1},
+ {-1, 4, -13, 121, 23, -8, 3, -1},
+ {-2, 5, -14, 120, 25, -9, 4, -1},
+ {-1, 5, -15, 119, 27, -10, 4, -1},
+ {-1, 5, -16, 118, 30, -11, 4, -1},
+ {-2, 6, -17, 116, 33, -12, 5, -1},
+ {-2, 6, -17, 114, 35, -12, 5, -1},
+ {-2, 6, -18, 113, 38, -13, 5, -1},
+ {-2, 7, -19, 111, 41, -14, 6, -2},
+ {-2, 7, -19, 110, 43, -15, 6, -2},
+ {-2, 7, -20, 108, 46, -15, 6, -2},
+ {-2, 7, -20, 106, 49, -16, 6, -2},
+ {-2, 7, -21, 104, 51, -16, 7, -2},
+ {-2, 7, -21, 102, 54, -17, 7, -2},
+ {-2, 8, -21, 100, 56, -18, 7, -2},
+ {-2, 8, -22, 98, 59, -18, 7, -2},
+ {-2, 8, -22, 96, 62, -19, 7, -2},
+ {-2, 8, -22, 94, 64, -19, 7, -2},
+ {-2, 8, -22, 91, 67, -20, 8, -2},
+ {-2, 8, -22, 89, 69, -20, 8, -2},
+ {-2, 8, -22, 87, 72, -21, 8, -2},
+ {-2, 8, -21, 84, 74, -21, 8, -2},
+ {-2, 8, -22, 82, 77, -21, 8, -2},
+ {-2, 8, -21, 79, 79, -21, 8, -2},
+ {-2, 8, -21, 77, 82, -22, 8, -2},
+ {-2, 8, -21, 74, 84, -21, 8, -2},
+ {-2, 8, -21, 72, 87, -22, 8, -2},
+ {-2, 8, -20, 69, 89, -22, 8, -2},
+ {-2, 8, -20, 67, 91, -22, 8, -2},
+ {-2, 7, -19, 64, 94, -22, 8, -2},
+ {-2, 7, -19, 62, 96, -22, 8, -2},
+ {-2, 7, -18, 59, 98, -22, 8, -2},
+ {-2, 7, -18, 56, 100, -21, 8, -2},
+ {-2, 7, -17, 54, 102, -21, 7, -2},
+ {-2, 7, -16, 51, 104, -21, 7, -2},
+ {-2, 6, -16, 49, 106, -20, 7, -2},
+ {-2, 6, -15, 46, 108, -20, 7, -2},
+ {-2, 6, -15, 43, 110, -19, 7, -2},
+ {-2, 6, -14, 41, 111, -19, 7, -2},
+ {-1, 5, -13, 38, 113, -18, 6, -2},
+ {-1, 5, -12, 35, 114, -17, 6, -2},
+ {-1, 5, -12, 33, 116, -17, 6, -2},
+ {-1, 4, -11, 30, 118, -16, 5, -1},
+ {-1, 4, -10, 27, 119, -15, 5, -1},
+ {-1, 4, -9, 25, 120, -14, 5, -2},
+ {-1, 3, -8, 23, 121, -13, 4, -1},
+ {-1, 3, -7, 20, 122, -12, 4, -1},
+ {-1, 3, -7, 18, 123, -11, 4, -1},
+ {-1, 3, -6, 16, 124, -10, 3, -1},
+ {-1, 2, -5, 13, 125, -8, 3, -1},
+ {-1, 2, -4, 11, 126, -7, 2, -1},
+ {0, 1, -3, 8, 126, -6, 2, 0},
+ {0, 1, -2, 6, 127, -5, 1, 0},
+ {0, 1, -2, 4, 127, -3, 1, 0},
+ {0, 0, 0, 2, 127, -1, 0, 0},
+ // [1, 2).
+ {0, 0, 0, 1, 127, 0, 0, 0},
+ {0, 0, 0, -1, 127, 2, 0, 0},
+ {0, 0, 1, -3, 127, 4, -1, 0},
+ {0, 0, 1, -4, 126, 6, -2, 1},
+ {0, 0, 1, -5, 126, 8, -3, 1},
+ {0, 0, 1, -6, 125, 11, -4, 1},
+ {0, 0, 1, -7, 124, 13, -4, 1},
+ {0, 0, 2, -8, 123, 15, -5, 1},
+ {0, 0, 2, -9, 122, 18, -6, 1},
+ {0, 0, 2, -10, 121, 20, -6, 1},
+ {0, 0, 2, -11, 120, 22, -7, 2},
+ {0, 0, 2, -12, 119, 25, -8, 2},
+ {0, 0, 3, -13, 117, 27, -8, 2},
+ {0, 0, 3, -13, 116, 29, -9, 2},
+ {0, 0, 3, -14, 114, 32, -10, 3},
+ {0, 0, 3, -15, 113, 35, -10, 2},
+ {0, 0, 3, -15, 111, 37, -11, 3},
+ {0, 0, 3, -16, 109, 40, -11, 3},
+ {0, 0, 3, -16, 108, 42, -12, 3},
+ {0, 0, 4, -17, 106, 45, -13, 3},
+ {0, 0, 4, -17, 104, 47, -13, 3},
+ {0, 0, 4, -17, 102, 50, -14, 3},
+ {0, 0, 4, -17, 100, 52, -14, 3},
+ {0, 0, 4, -18, 98, 55, -15, 4},
+ {0, 0, 4, -18, 96, 58, -15, 3},
+ {0, 0, 4, -18, 94, 60, -16, 4},
+ {0, 0, 4, -18, 91, 63, -16, 4},
+ {0, 0, 4, -18, 89, 65, -16, 4},
+ {0, 0, 4, -18, 87, 68, -17, 4},
+ {0, 0, 4, -18, 85, 70, -17, 4},
+ {0, 0, 4, -18, 82, 73, -17, 4},
+ {0, 0, 4, -18, 80, 75, -17, 4},
+ {0, 0, 4, -18, 78, 78, -18, 4},
+ {0, 0, 4, -17, 75, 80, -18, 4},
+ {0, 0, 4, -17, 73, 82, -18, 4},
+ {0, 0, 4, -17, 70, 85, -18, 4},
+ {0, 0, 4, -17, 68, 87, -18, 4},
+ {0, 0, 4, -16, 65, 89, -18, 4},
+ {0, 0, 4, -16, 63, 91, -18, 4},
+ {0, 0, 4, -16, 60, 94, -18, 4},
+ {0, 0, 3, -15, 58, 96, -18, 4},
+ {0, 0, 4, -15, 55, 98, -18, 4},
+ {0, 0, 3, -14, 52, 100, -17, 4},
+ {0, 0, 3, -14, 50, 102, -17, 4},
+ {0, 0, 3, -13, 47, 104, -17, 4},
+ {0, 0, 3, -13, 45, 106, -17, 4},
+ {0, 0, 3, -12, 42, 108, -16, 3},
+ {0, 0, 3, -11, 40, 109, -16, 3},
+ {0, 0, 3, -11, 37, 111, -15, 3},
+ {0, 0, 2, -10, 35, 113, -15, 3},
+ {0, 0, 3, -10, 32, 114, -14, 3},
+ {0, 0, 2, -9, 29, 116, -13, 3},
+ {0, 0, 2, -8, 27, 117, -13, 3},
+ {0, 0, 2, -8, 25, 119, -12, 2},
+ {0, 0, 2, -7, 22, 120, -11, 2},
+ {0, 0, 1, -6, 20, 121, -10, 2},
+ {0, 0, 1, -6, 18, 122, -9, 2},
+ {0, 0, 1, -5, 15, 123, -8, 2},
+ {0, 0, 1, -4, 13, 124, -7, 1},
+ {0, 0, 1, -4, 11, 125, -6, 1},
+ {0, 0, 1, -3, 8, 126, -5, 1},
+ {0, 0, 1, -2, 6, 126, -4, 1},
+ {0, 0, 0, -1, 4, 127, -3, 1},
+ {0, 0, 0, 0, 2, 127, -1, 0},
+ // dummy, replicate row index 191.
+ {0, 0, 0, 0, 2, 127, -1, 0}};
+
+alignas(16) const int16_t
+ kWarpedFilters[3 * kWarpedPixelPrecisionShifts + 1][8] = {
+ // [-1, 0).
+ {0, 0, 127, 1, 0, 0, 0, 0},
+ {0, -1, 127, 2, 0, 0, 0, 0},
+ {1, -3, 127, 4, -1, 0, 0, 0},
+ {1, -4, 126, 6, -2, 1, 0, 0},
+ {1, -5, 126, 8, -3, 1, 0, 0},
+ {1, -6, 125, 11, -4, 1, 0, 0},
+ {1, -7, 124, 13, -4, 1, 0, 0},
+ {2, -8, 123, 15, -5, 1, 0, 0},
+ {2, -9, 122, 18, -6, 1, 0, 0},
+ {2, -10, 121, 20, -6, 1, 0, 0},
+ {2, -11, 120, 22, -7, 2, 0, 0},
+ {2, -12, 119, 25, -8, 2, 0, 0},
+ {3, -13, 117, 27, -8, 2, 0, 0},
+ {3, -13, 116, 29, -9, 2, 0, 0},
+ {3, -14, 114, 32, -10, 3, 0, 0},
+ {3, -15, 113, 35, -10, 2, 0, 0},
+ {3, -15, 111, 37, -11, 3, 0, 0},
+ {3, -16, 109, 40, -11, 3, 0, 0},
+ {3, -16, 108, 42, -12, 3, 0, 0},
+ {4, -17, 106, 45, -13, 3, 0, 0},
+ {4, -17, 104, 47, -13, 3, 0, 0},
+ {4, -17, 102, 50, -14, 3, 0, 0},
+ {4, -17, 100, 52, -14, 3, 0, 0},
+ {4, -18, 98, 55, -15, 4, 0, 0},
+ {4, -18, 96, 58, -15, 3, 0, 0},
+ {4, -18, 94, 60, -16, 4, 0, 0},
+ {4, -18, 91, 63, -16, 4, 0, 0},
+ {4, -18, 89, 65, -16, 4, 0, 0},
+ {4, -18, 87, 68, -17, 4, 0, 0},
+ {4, -18, 85, 70, -17, 4, 0, 0},
+ {4, -18, 82, 73, -17, 4, 0, 0},
+ {4, -18, 80, 75, -17, 4, 0, 0},
+ {4, -18, 78, 78, -18, 4, 0, 0},
+ {4, -17, 75, 80, -18, 4, 0, 0},
+ {4, -17, 73, 82, -18, 4, 0, 0},
+ {4, -17, 70, 85, -18, 4, 0, 0},
+ {4, -17, 68, 87, -18, 4, 0, 0},
+ {4, -16, 65, 89, -18, 4, 0, 0},
+ {4, -16, 63, 91, -18, 4, 0, 0},
+ {4, -16, 60, 94, -18, 4, 0, 0},
+ {3, -15, 58, 96, -18, 4, 0, 0},
+ {4, -15, 55, 98, -18, 4, 0, 0},
+ {3, -14, 52, 100, -17, 4, 0, 0},
+ {3, -14, 50, 102, -17, 4, 0, 0},
+ {3, -13, 47, 104, -17, 4, 0, 0},
+ {3, -13, 45, 106, -17, 4, 0, 0},
+ {3, -12, 42, 108, -16, 3, 0, 0},
+ {3, -11, 40, 109, -16, 3, 0, 0},
+ {3, -11, 37, 111, -15, 3, 0, 0},
+ {2, -10, 35, 113, -15, 3, 0, 0},
+ {3, -10, 32, 114, -14, 3, 0, 0},
+ {2, -9, 29, 116, -13, 3, 0, 0},
+ {2, -8, 27, 117, -13, 3, 0, 0},
+ {2, -8, 25, 119, -12, 2, 0, 0},
+ {2, -7, 22, 120, -11, 2, 0, 0},
+ {1, -6, 20, 121, -10, 2, 0, 0},
+ {1, -6, 18, 122, -9, 2, 0, 0},
+ {1, -5, 15, 123, -8, 2, 0, 0},
+ {1, -4, 13, 124, -7, 1, 0, 0},
+ {1, -4, 11, 125, -6, 1, 0, 0},
+ {1, -3, 8, 126, -5, 1, 0, 0},
+ {1, -2, 6, 126, -4, 1, 0, 0},
+ {0, -1, 4, 127, -3, 1, 0, 0},
+ {0, 0, 2, 127, -1, 0, 0, 0},
+ // [0, 1).
+ {0, 0, 0, 127, 1, 0, 0, 0},
+ {0, 0, -1, 127, 2, 0, 0, 0},
+ {0, 1, -3, 127, 4, -2, 1, 0},
+ {0, 1, -5, 127, 6, -2, 1, 0},
+ {0, 2, -6, 126, 8, -3, 1, 0},
+ {-1, 2, -7, 126, 11, -4, 2, -1},
+ {-1, 3, -8, 125, 13, -5, 2, -1},
+ {-1, 3, -10, 124, 16, -6, 3, -1},
+ {-1, 4, -11, 123, 18, -7, 3, -1},
+ {-1, 4, -12, 122, 20, -7, 3, -1},
+ {-1, 4, -13, 121, 23, -8, 3, -1},
+ {-2, 5, -14, 120, 25, -9, 4, -1},
+ {-1, 5, -15, 119, 27, -10, 4, -1},
+ {-1, 5, -16, 118, 30, -11, 4, -1},
+ {-2, 6, -17, 116, 33, -12, 5, -1},
+ {-2, 6, -17, 114, 35, -12, 5, -1},
+ {-2, 6, -18, 113, 38, -13, 5, -1},
+ {-2, 7, -19, 111, 41, -14, 6, -2},
+ {-2, 7, -19, 110, 43, -15, 6, -2},
+ {-2, 7, -20, 108, 46, -15, 6, -2},
+ {-2, 7, -20, 106, 49, -16, 6, -2},
+ {-2, 7, -21, 104, 51, -16, 7, -2},
+ {-2, 7, -21, 102, 54, -17, 7, -2},
+ {-2, 8, -21, 100, 56, -18, 7, -2},
+ {-2, 8, -22, 98, 59, -18, 7, -2},
+ {-2, 8, -22, 96, 62, -19, 7, -2},
+ {-2, 8, -22, 94, 64, -19, 7, -2},
+ {-2, 8, -22, 91, 67, -20, 8, -2},
+ {-2, 8, -22, 89, 69, -20, 8, -2},
+ {-2, 8, -22, 87, 72, -21, 8, -2},
+ {-2, 8, -21, 84, 74, -21, 8, -2},
+ {-2, 8, -22, 82, 77, -21, 8, -2},
+ {-2, 8, -21, 79, 79, -21, 8, -2},
+ {-2, 8, -21, 77, 82, -22, 8, -2},
+ {-2, 8, -21, 74, 84, -21, 8, -2},
+ {-2, 8, -21, 72, 87, -22, 8, -2},
+ {-2, 8, -20, 69, 89, -22, 8, -2},
+ {-2, 8, -20, 67, 91, -22, 8, -2},
+ {-2, 7, -19, 64, 94, -22, 8, -2},
+ {-2, 7, -19, 62, 96, -22, 8, -2},
+ {-2, 7, -18, 59, 98, -22, 8, -2},
+ {-2, 7, -18, 56, 100, -21, 8, -2},
+ {-2, 7, -17, 54, 102, -21, 7, -2},
+ {-2, 7, -16, 51, 104, -21, 7, -2},
+ {-2, 6, -16, 49, 106, -20, 7, -2},
+ {-2, 6, -15, 46, 108, -20, 7, -2},
+ {-2, 6, -15, 43, 110, -19, 7, -2},
+ {-2, 6, -14, 41, 111, -19, 7, -2},
+ {-1, 5, -13, 38, 113, -18, 6, -2},
+ {-1, 5, -12, 35, 114, -17, 6, -2},
+ {-1, 5, -12, 33, 116, -17, 6, -2},
+ {-1, 4, -11, 30, 118, -16, 5, -1},
+ {-1, 4, -10, 27, 119, -15, 5, -1},
+ {-1, 4, -9, 25, 120, -14, 5, -2},
+ {-1, 3, -8, 23, 121, -13, 4, -1},
+ {-1, 3, -7, 20, 122, -12, 4, -1},
+ {-1, 3, -7, 18, 123, -11, 4, -1},
+ {-1, 3, -6, 16, 124, -10, 3, -1},
+ {-1, 2, -5, 13, 125, -8, 3, -1},
+ {-1, 2, -4, 11, 126, -7, 2, -1},
+ {0, 1, -3, 8, 126, -6, 2, 0},
+ {0, 1, -2, 6, 127, -5, 1, 0},
+ {0, 1, -2, 4, 127, -3, 1, 0},
+ {0, 0, 0, 2, 127, -1, 0, 0},
+ // [1, 2).
+ {0, 0, 0, 1, 127, 0, 0, 0},
+ {0, 0, 0, -1, 127, 2, 0, 0},
+ {0, 0, 1, -3, 127, 4, -1, 0},
+ {0, 0, 1, -4, 126, 6, -2, 1},
+ {0, 0, 1, -5, 126, 8, -3, 1},
+ {0, 0, 1, -6, 125, 11, -4, 1},
+ {0, 0, 1, -7, 124, 13, -4, 1},
+ {0, 0, 2, -8, 123, 15, -5, 1},
+ {0, 0, 2, -9, 122, 18, -6, 1},
+ {0, 0, 2, -10, 121, 20, -6, 1},
+ {0, 0, 2, -11, 120, 22, -7, 2},
+ {0, 0, 2, -12, 119, 25, -8, 2},
+ {0, 0, 3, -13, 117, 27, -8, 2},
+ {0, 0, 3, -13, 116, 29, -9, 2},
+ {0, 0, 3, -14, 114, 32, -10, 3},
+ {0, 0, 3, -15, 113, 35, -10, 2},
+ {0, 0, 3, -15, 111, 37, -11, 3},
+ {0, 0, 3, -16, 109, 40, -11, 3},
+ {0, 0, 3, -16, 108, 42, -12, 3},
+ {0, 0, 4, -17, 106, 45, -13, 3},
+ {0, 0, 4, -17, 104, 47, -13, 3},
+ {0, 0, 4, -17, 102, 50, -14, 3},
+ {0, 0, 4, -17, 100, 52, -14, 3},
+ {0, 0, 4, -18, 98, 55, -15, 4},
+ {0, 0, 4, -18, 96, 58, -15, 3},
+ {0, 0, 4, -18, 94, 60, -16, 4},
+ {0, 0, 4, -18, 91, 63, -16, 4},
+ {0, 0, 4, -18, 89, 65, -16, 4},
+ {0, 0, 4, -18, 87, 68, -17, 4},
+ {0, 0, 4, -18, 85, 70, -17, 4},
+ {0, 0, 4, -18, 82, 73, -17, 4},
+ {0, 0, 4, -18, 80, 75, -17, 4},
+ {0, 0, 4, -18, 78, 78, -18, 4},
+ {0, 0, 4, -17, 75, 80, -18, 4},
+ {0, 0, 4, -17, 73, 82, -18, 4},
+ {0, 0, 4, -17, 70, 85, -18, 4},
+ {0, 0, 4, -17, 68, 87, -18, 4},
+ {0, 0, 4, -16, 65, 89, -18, 4},
+ {0, 0, 4, -16, 63, 91, -18, 4},
+ {0, 0, 4, -16, 60, 94, -18, 4},
+ {0, 0, 3, -15, 58, 96, -18, 4},
+ {0, 0, 4, -15, 55, 98, -18, 4},
+ {0, 0, 3, -14, 52, 100, -17, 4},
+ {0, 0, 3, -14, 50, 102, -17, 4},
+ {0, 0, 3, -13, 47, 104, -17, 4},
+ {0, 0, 3, -13, 45, 106, -17, 4},
+ {0, 0, 3, -12, 42, 108, -16, 3},
+ {0, 0, 3, -11, 40, 109, -16, 3},
+ {0, 0, 3, -11, 37, 111, -15, 3},
+ {0, 0, 2, -10, 35, 113, -15, 3},
+ {0, 0, 3, -10, 32, 114, -14, 3},
+ {0, 0, 2, -9, 29, 116, -13, 3},
+ {0, 0, 2, -8, 27, 117, -13, 3},
+ {0, 0, 2, -8, 25, 119, -12, 2},
+ {0, 0, 2, -7, 22, 120, -11, 2},
+ {0, 0, 1, -6, 20, 121, -10, 2},
+ {0, 0, 1, -6, 18, 122, -9, 2},
+ {0, 0, 1, -5, 15, 123, -8, 2},
+ {0, 0, 1, -4, 13, 124, -7, 1},
+ {0, 0, 1, -4, 11, 125, -6, 1},
+ {0, 0, 1, -3, 8, 126, -5, 1},
+ {0, 0, 1, -2, 6, 126, -4, 1},
+ {0, 0, 0, -1, 4, 127, -3, 1},
+ {0, 0, 0, 0, 2, 127, -1, 0},
+ // dummy, replicate row index 191.
+ {0, 0, 0, 0, 2, 127, -1, 0}};
+
+// Every value in |kSubPixelFilters| is even. Divide by 2 to simplify
+// calculations by reducing the range by 1 bit.
+alignas(8) const int8_t kHalfSubPixelFilters[6][16][8] = {
+ {{0, 0, 0, 64, 0, 0, 0, 0},
+ {0, 1, -3, 63, 4, -1, 0, 0},
+ {0, 1, -5, 61, 9, -2, 0, 0},
+ {0, 1, -6, 58, 14, -4, 1, 0},
+ {0, 1, -7, 55, 19, -5, 1, 0},
+ {0, 1, -7, 51, 24, -6, 1, 0},
+ {0, 1, -8, 47, 29, -6, 1, 0},
+ {0, 1, -7, 42, 33, -6, 1, 0},
+ {0, 1, -7, 38, 38, -7, 1, 0},
+ {0, 1, -6, 33, 42, -7, 1, 0},
+ {0, 1, -6, 29, 47, -8, 1, 0},
+ {0, 1, -6, 24, 51, -7, 1, 0},
+ {0, 1, -5, 19, 55, -7, 1, 0},
+ {0, 1, -4, 14, 58, -6, 1, 0},
+ {0, 0, -2, 9, 61, -5, 1, 0},
+ {0, 0, -1, 4, 63, -3, 1, 0}},
+ {{0, 0, 0, 64, 0, 0, 0, 0},
+ {0, 1, 14, 31, 17, 1, 0, 0},
+ {0, 0, 13, 31, 18, 2, 0, 0},
+ {0, 0, 11, 31, 20, 2, 0, 0},
+ {0, 0, 10, 30, 21, 3, 0, 0},
+ {0, 0, 9, 29, 22, 4, 0, 0},
+ {0, 0, 8, 28, 23, 5, 0, 0},
+ {0, -1, 8, 27, 24, 6, 0, 0},
+ {0, -1, 7, 26, 26, 7, -1, 0},
+ {0, 0, 6, 24, 27, 8, -1, 0},
+ {0, 0, 5, 23, 28, 8, 0, 0},
+ {0, 0, 4, 22, 29, 9, 0, 0},
+ {0, 0, 3, 21, 30, 10, 0, 0},
+ {0, 0, 2, 20, 31, 11, 0, 0},
+ {0, 0, 2, 18, 31, 13, 0, 0},
+ {0, 0, 1, 17, 31, 14, 1, 0}},
+ {{0, 0, 0, 64, 0, 0, 0, 0},
+ {-1, 1, -3, 63, 4, -1, 1, 0},
+ {-1, 3, -6, 62, 8, -3, 2, -1},
+ {-1, 4, -9, 60, 13, -5, 3, -1},
+ {-2, 5, -11, 58, 19, -7, 3, -1},
+ {-2, 5, -11, 54, 24, -9, 4, -1},
+ {-2, 5, -12, 50, 30, -10, 4, -1},
+ {-2, 5, -12, 45, 35, -11, 5, -1},
+ {-2, 6, -12, 40, 40, -12, 6, -2},
+ {-1, 5, -11, 35, 45, -12, 5, -2},
+ {-1, 4, -10, 30, 50, -12, 5, -2},
+ {-1, 4, -9, 24, 54, -11, 5, -2},
+ {-1, 3, -7, 19, 58, -11, 5, -2},
+ {-1, 3, -5, 13, 60, -9, 4, -1},
+ {-1, 2, -3, 8, 62, -6, 3, -1},
+ {0, 1, -1, 4, 63, -3, 1, -1}},
+ {{0, 0, 0, 64, 0, 0, 0, 0},
+ {0, 0, 0, 60, 4, 0, 0, 0},
+ {0, 0, 0, 56, 8, 0, 0, 0},
+ {0, 0, 0, 52, 12, 0, 0, 0},
+ {0, 0, 0, 48, 16, 0, 0, 0},
+ {0, 0, 0, 44, 20, 0, 0, 0},
+ {0, 0, 0, 40, 24, 0, 0, 0},
+ {0, 0, 0, 36, 28, 0, 0, 0},
+ {0, 0, 0, 32, 32, 0, 0, 0},
+ {0, 0, 0, 28, 36, 0, 0, 0},
+ {0, 0, 0, 24, 40, 0, 0, 0},
+ {0, 0, 0, 20, 44, 0, 0, 0},
+ {0, 0, 0, 16, 48, 0, 0, 0},
+ {0, 0, 0, 12, 52, 0, 0, 0},
+ {0, 0, 0, 8, 56, 0, 0, 0},
+ {0, 0, 0, 4, 60, 0, 0, 0}},
+ {{0, 0, 0, 64, 0, 0, 0, 0},
+ {0, 0, -2, 63, 4, -1, 0, 0},
+ {0, 0, -4, 61, 9, -2, 0, 0},
+ {0, 0, -5, 58, 14, -3, 0, 0},
+ {0, 0, -6, 55, 19, -4, 0, 0},
+ {0, 0, -6, 51, 24, -5, 0, 0},
+ {0, 0, -7, 47, 29, -5, 0, 0},
+ {0, 0, -6, 42, 33, -5, 0, 0},
+ {0, 0, -6, 38, 38, -6, 0, 0},
+ {0, 0, -5, 33, 42, -6, 0, 0},
+ {0, 0, -5, 29, 47, -7, 0, 0},
+ {0, 0, -5, 24, 51, -6, 0, 0},
+ {0, 0, -4, 19, 55, -6, 0, 0},
+ {0, 0, -3, 14, 58, -5, 0, 0},
+ {0, 0, -2, 9, 61, -4, 0, 0},
+ {0, 0, -1, 4, 63, -2, 0, 0}},
+ {{0, 0, 0, 64, 0, 0, 0, 0},
+ {0, 0, 15, 31, 17, 1, 0, 0},
+ {0, 0, 13, 31, 18, 2, 0, 0},
+ {0, 0, 11, 31, 20, 2, 0, 0},
+ {0, 0, 10, 30, 21, 3, 0, 0},
+ {0, 0, 9, 29, 22, 4, 0, 0},
+ {0, 0, 8, 28, 23, 5, 0, 0},
+ {0, 0, 7, 27, 24, 6, 0, 0},
+ {0, 0, 6, 26, 26, 6, 0, 0},
+ {0, 0, 6, 24, 27, 7, 0, 0},
+ {0, 0, 5, 23, 28, 8, 0, 0},
+ {0, 0, 4, 22, 29, 9, 0, 0},
+ {0, 0, 3, 21, 30, 10, 0, 0},
+ {0, 0, 2, 20, 31, 11, 0, 0},
+ {0, 0, 2, 18, 31, 13, 0, 0},
+ {0, 0, 1, 17, 31, 15, 0, 0}}};
+
+// Absolute values of |kHalfSubPixelFilters|. Used in situations where we know
+// the pattern of the signs and account for it in other ways.
+const uint8_t kAbsHalfSubPixelFilters[6][16][8] = {
+ {{0, 0, 0, 64, 0, 0, 0, 0},
+ {0, 1, 3, 63, 4, 1, 0, 0},
+ {0, 1, 5, 61, 9, 2, 0, 0},
+ {0, 1, 6, 58, 14, 4, 1, 0},
+ {0, 1, 7, 55, 19, 5, 1, 0},
+ {0, 1, 7, 51, 24, 6, 1, 0},
+ {0, 1, 8, 47, 29, 6, 1, 0},
+ {0, 1, 7, 42, 33, 6, 1, 0},
+ {0, 1, 7, 38, 38, 7, 1, 0},
+ {0, 1, 6, 33, 42, 7, 1, 0},
+ {0, 1, 6, 29, 47, 8, 1, 0},
+ {0, 1, 6, 24, 51, 7, 1, 0},
+ {0, 1, 5, 19, 55, 7, 1, 0},
+ {0, 1, 4, 14, 58, 6, 1, 0},
+ {0, 0, 2, 9, 61, 5, 1, 0},
+ {0, 0, 1, 4, 63, 3, 1, 0}},
+ {{0, 0, 0, 64, 0, 0, 0, 0},
+ {0, 1, 14, 31, 17, 1, 0, 0},
+ {0, 0, 13, 31, 18, 2, 0, 0},
+ {0, 0, 11, 31, 20, 2, 0, 0},
+ {0, 0, 10, 30, 21, 3, 0, 0},
+ {0, 0, 9, 29, 22, 4, 0, 0},
+ {0, 0, 8, 28, 23, 5, 0, 0},
+ {0, 1, 8, 27, 24, 6, 0, 0},
+ {0, 1, 7, 26, 26, 7, 1, 0},
+ {0, 0, 6, 24, 27, 8, 1, 0},
+ {0, 0, 5, 23, 28, 8, 0, 0},
+ {0, 0, 4, 22, 29, 9, 0, 0},
+ {0, 0, 3, 21, 30, 10, 0, 0},
+ {0, 0, 2, 20, 31, 11, 0, 0},
+ {0, 0, 2, 18, 31, 13, 0, 0},
+ {0, 0, 1, 17, 31, 14, 1, 0}},
+ {{0, 0, 0, 64, 0, 0, 0, 0},
+ {1, 1, 3, 63, 4, 1, 1, 0},
+ {1, 3, 6, 62, 8, 3, 2, 1},
+ {1, 4, 9, 60, 13, 5, 3, 1},
+ {2, 5, 11, 58, 19, 7, 3, 1},
+ {2, 5, 11, 54, 24, 9, 4, 1},
+ {2, 5, 12, 50, 30, 10, 4, 1},
+ {2, 5, 12, 45, 35, 11, 5, 1},
+ {2, 6, 12, 40, 40, 12, 6, 2},
+ {1, 5, 11, 35, 45, 12, 5, 2},
+ {1, 4, 10, 30, 50, 12, 5, 2},
+ {1, 4, 9, 24, 54, 11, 5, 2},
+ {1, 3, 7, 19, 58, 11, 5, 2},
+ {1, 3, 5, 13, 60, 9, 4, 1},
+ {1, 2, 3, 8, 62, 6, 3, 1},
+ {0, 1, 1, 4, 63, 3, 1, 1}},
+ {{0, 0, 0, 64, 0, 0, 0, 0},
+ {0, 0, 0, 60, 4, 0, 0, 0},
+ {0, 0, 0, 56, 8, 0, 0, 0},
+ {0, 0, 0, 52, 12, 0, 0, 0},
+ {0, 0, 0, 48, 16, 0, 0, 0},
+ {0, 0, 0, 44, 20, 0, 0, 0},
+ {0, 0, 0, 40, 24, 0, 0, 0},
+ {0, 0, 0, 36, 28, 0, 0, 0},
+ {0, 0, 0, 32, 32, 0, 0, 0},
+ {0, 0, 0, 28, 36, 0, 0, 0},
+ {0, 0, 0, 24, 40, 0, 0, 0},
+ {0, 0, 0, 20, 44, 0, 0, 0},
+ {0, 0, 0, 16, 48, 0, 0, 0},
+ {0, 0, 0, 12, 52, 0, 0, 0},
+ {0, 0, 0, 8, 56, 0, 0, 0},
+ {0, 0, 0, 4, 60, 0, 0, 0}},
+ {{0, 0, 0, 64, 0, 0, 0, 0},
+ {0, 0, 2, 63, 4, 1, 0, 0},
+ {0, 0, 4, 61, 9, 2, 0, 0},
+ {0, 0, 5, 58, 14, 3, 0, 0},
+ {0, 0, 6, 55, 19, 4, 0, 0},
+ {0, 0, 6, 51, 24, 5, 0, 0},
+ {0, 0, 7, 47, 29, 5, 0, 0},
+ {0, 0, 6, 42, 33, 5, 0, 0},
+ {0, 0, 6, 38, 38, 6, 0, 0},
+ {0, 0, 5, 33, 42, 6, 0, 0},
+ {0, 0, 5, 29, 47, 7, 0, 0},
+ {0, 0, 5, 24, 51, 6, 0, 0},
+ {0, 0, 4, 19, 55, 6, 0, 0},
+ {0, 0, 3, 14, 58, 5, 0, 0},
+ {0, 0, 2, 9, 61, 4, 0, 0},
+ {0, 0, 1, 4, 63, 2, 0, 0}},
+ {{0, 0, 0, 64, 0, 0, 0, 0},
+ {0, 0, 15, 31, 17, 1, 0, 0},
+ {0, 0, 13, 31, 18, 2, 0, 0},
+ {0, 0, 11, 31, 20, 2, 0, 0},
+ {0, 0, 10, 30, 21, 3, 0, 0},
+ {0, 0, 9, 29, 22, 4, 0, 0},
+ {0, 0, 8, 28, 23, 5, 0, 0},
+ {0, 0, 7, 27, 24, 6, 0, 0},
+ {0, 0, 6, 26, 26, 6, 0, 0},
+ {0, 0, 6, 24, 27, 7, 0, 0},
+ {0, 0, 5, 23, 28, 8, 0, 0},
+ {0, 0, 4, 22, 29, 9, 0, 0},
+ {0, 0, 3, 21, 30, 10, 0, 0},
+ {0, 0, 2, 20, 31, 11, 0, 0},
+ {0, 0, 2, 18, 31, 13, 0, 0},
+ {0, 0, 1, 17, 31, 15, 0, 0}}};
+
+// 9.3 -- Dr_Intra_Derivative[]
+// This is a more compact version of the table from the spec. angle / 2 - 1 is
+// used as the lookup. Note angle / 3 - 1 would work too, but the calculation
+// becomes more costly.
+const int16_t kDirectionalIntraPredictorDerivative[44] = {
+ // Approx angle
+ 1023, 0, // 3, ...
+ 547, // 6, ...
+ 372, 0, 0, // 9, ...
+ 273, // 14, ...
+ 215, 0, // 17, ...
+ 178, // 20, ...
+ 151, 0, // 23, ... (113 & 203 are base angles)
+ 132, // 26, ...
+ 116, 0, // 29, ...
+ 102, 0, // 32, ...
+ 90, // 36, ...
+ 80, 0, // 39, ...
+ 71, // 42, ...
+ 64, 0, // 45, ... (45 & 135 are base angles)
+ 57, // 48, ...
+ 51, 0, // 51, ...
+ 45, 0, // 54, ...
+ 40, // 58, ...
+ 35, 0, // 61, ...
+ 31, // 64, ...
+ 27, 0, // 67, ... (67 & 157 are base angles)
+ 23, // 70, ...
+ 19, 0, // 73, ...
+ 15, 0, // 76, ...
+ 11, 0, // 81, ...
+ 7, // 84, ...
+ 3, // 87, ...
+};
+
+const uint8_t kDeblockFilterLevelIndex[kMaxPlanes][kNumLoopFilterTypes] = {
+ {0, 1}, {2, 2}, {3, 3}};
+
+} // namespace libgav1
diff --git a/src/utils/constants.h b/src/utils/constants.h
new file mode 100644
index 0000000..34cf56d
--- /dev/null
+++ b/src/utils/constants.h
@@ -0,0 +1,744 @@
+/*
+ * Copyright 2019 The libgav1 Authors
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LIBGAV1_SRC_UTILS_CONSTANTS_H_
+#define LIBGAV1_SRC_UTILS_CONSTANTS_H_
+
+#include <cstdint>
+#include <cstdlib>
+
+#include "src/utils/bit_mask_set.h"
+
+namespace libgav1 {
+
+// Returns the number of elements between begin (inclusive) and end (inclusive).
+constexpr int EnumRangeLength(int begin, int end) { return end - begin + 1; }
+
+enum {
+// Maximum number of threads that the library will ever create.
+#if defined(LIBGAV1_MAX_THREADS) && LIBGAV1_MAX_THREADS > 0
+ kMaxThreads = LIBGAV1_MAX_THREADS
+#else
+ kMaxThreads = 128
+#endif
+}; // anonymous enum
+
+enum {
+ kInvalidMvValue = -32768,
+ kCdfMaxProbability = 32768,
+ kBlockWidthCount = 5,
+ kMaxSegments = 8,
+ kMinQuantizer = 0,
+ kMinLossyQuantizer = 1,
+ kMaxQuantizer = 255,
+ // Quantizer matrix is used only when level < 15.
+ kNumQuantizerLevelsForQuantizerMatrix = 15,
+ kFrameLfCount = 4,
+ kMaxLoopFilterValue = 63,
+ kNum4x4In64x64 = 256,
+ kMaxAngleDelta = 3,
+ kDirectionalIntraModes = 8,
+ kMaxSuperBlockSizeLog2 = 7,
+ kMinSuperBlockSizeLog2 = 6,
+ kGlobalMotionReadControl = 3,
+ kSuperResScaleNumerator = 8,
+ kBooleanSymbolCount = 2,
+ kRestorationTypeSymbolCount = 3,
+ kSgrProjParamsBits = 4,
+ kSgrProjPrecisionBits = 7,
+ // Padding on left and right side of a restoration block.
+ // 3 is enough, but padding to 4 is more efficient, and makes the temporary
+ // source buffer 8-pixel aligned.
+ kRestorationHorizontalBorder = 4,
+ // Padding on top and bottom side of a restoration block.
+ kRestorationVerticalBorder = 2,
+ kCdefBorder = 2, // Padding on each side of a cdef block.
+ kConvolveBorderLeftTop = 3, // Left/top padding of a convolve block.
+ // Right/bottom padding of a convolve block. This needs to be 4 at minimum,
+ // but was increased to simplify the SIMD loads in
+ // ConvolveCompoundScale2D_NEON() and ConvolveScale2D_NEON().
+ kConvolveBorderRight = 8,
+ kConvolveBorderBottom = 4,
+ kSubPixelTaps = 8,
+ kWienerFilterBits = 7,
+ kWienerFilterTaps = 7,
+ kMaxPaletteSize = 8,
+ kMinPaletteSize = 2,
+ kMaxPaletteSquare = 64,
+ kBorderPixels = 64,
+ // The final blending process for film grain needs room to overwrite and read
+ // with SIMD instructions. The maximum overwrite is 7 pixels, but the border
+ // is required to be a multiple of 32 by YuvBuffer::Realloc, so that
+ // subsampled chroma borders are 16-aligned.
+ kBorderPixelsFilmGrain = 32,
+ // These constants are the minimum left, right, top, and bottom border sizes
+ // in pixels as an extension of the frame boundary. The minimum border sizes
+ // are derived from the following requirements:
+ // - Warp_C() may read up to 13 pixels before or after a row.
+ // - Warp_NEON() may read up to 13 pixels before a row. It may read up to 14
+ // pixels after a row, but the value of the last read pixel is not used.
+ // - Warp_C() and Warp_NEON() may read up to 13 pixels above the top row and
+ // 13 pixels below the bottom row.
+ kMinLeftBorderPixels = 13,
+ kMinRightBorderPixels = 13,
+ kMinTopBorderPixels = 13,
+ kMinBottomBorderPixels = 13,
+ kWarpedModelPrecisionBits = 16,
+ kMaxRefMvStackSize = 8,
+ kMaxLeastSquaresSamples = 8,
+ kMaxTemporalMvCandidates = 19,
+ // The SIMD implementations of motion vection projection functions always
+ // process 2 or 4 elements together, so we pad the corresponding buffers to
+ // size 20.
+ kMaxTemporalMvCandidatesWithPadding = 20,
+ kMaxSuperBlockSizeInPixels = 128,
+ kMaxScaledSuperBlockSizeInPixels = 128 * 2,
+ kMaxSuperBlockSizeSquareInPixels = 128 * 128,
+ kNum4x4InLoopFilterUnit = 16,
+ kNum4x4InLoopRestorationUnit = 16,
+ kProjectionMvClamp = (1 << 14) - 1, // == 16383
+ kProjectionMvMaxHorizontalOffset = 8,
+ kCdefUnitSize = 64,
+ kCdefUnitSizeWithBorders = kCdefUnitSize + 2 * kCdefBorder,
+ kRestorationUnitOffset = 8,
+ // Loop restoration's processing unit size is fixed as 64x64.
+ kRestorationUnitHeight = 64,
+ kRestorationUnitWidth = 256,
+ kRestorationUnitHeightWithBorders =
+ kRestorationUnitHeight + 2 * kRestorationVerticalBorder,
+ kRestorationUnitWidthWithBorders =
+ kRestorationUnitWidth + 2 * kRestorationHorizontalBorder,
+ kSuperResFilterBits = 6,
+ kSuperResFilterShifts = 1 << kSuperResFilterBits,
+ kSuperResFilterTaps = 8,
+ kSuperResScaleBits = 14,
+ kSuperResExtraBits = kSuperResScaleBits - kSuperResFilterBits,
+ kSuperResScaleMask = (1 << 14) - 1,
+ kSuperResHorizontalBorder = 4,
+ kSuperResVerticalBorder = 1,
+ // The SIMD implementations of superres calculate up to 15 extra upscaled
+ // pixels which will over-read up to 15 downscaled pixels in the end of each
+ // row. Set the padding to 16 for alignment purposes.
+ kSuperResHorizontalPadding = 16,
+ // TODO(chengchen): consider merging these constants:
+ // kFilterBits, kWienerFilterBits, and kSgrProjPrecisionBits, which are all 7,
+ // They are designed to match AV1 convolution, which increases coeff
+ // values up to 7 bits. We could consider to combine them and use kFilterBits
+ // only.
+ kFilterBits = 7,
+ // Sub pixel is used in AV1 to represent a pixel location that is not at
+ // integer position. Sub pixel is in 1/16 (1 << kSubPixelBits) unit of
+ // integer pixel. Sub pixel values are interpolated using adjacent integer
+ // pixel values. The interpolation is a filtering process.
+ kSubPixelBits = 4,
+ kSubPixelMask = (1 << kSubPixelBits) - 1,
+ // Precision bits when computing inter prediction locations.
+ kScaleSubPixelBits = 10,
+ kWarpParamRoundingBits = 6,
+ // Number of fractional bits of lookup in divisor lookup table.
+ kDivisorLookupBits = 8,
+ // Number of fractional bits of entries in divisor lookup table.
+ kDivisorLookupPrecisionBits = 14,
+ // Number of phases used in warped filtering.
+ kWarpedPixelPrecisionShifts = 1 << 6,
+ kResidualPaddingVertical = 4,
+ kWedgeMaskMasterSize = 64,
+ kMaxFrameDistance = 31,
+ kReferenceFrameScalePrecision = 14,
+ kNumWienerCoefficients = 3,
+ kLoopFilterMaxModeDeltas = 2,
+ kMaxCdefStrengths = 8,
+ kCdefLargeValue = 0x4000, // Used to indicate where CDEF is not available.
+ kMaxTileColumns = 64,
+ kMaxTileRows = 64,
+ kMaxOperatingPoints = 32,
+ // There can be a maximum of 4 spatial layers and 8 temporal layers.
+ kMaxLayers = 32,
+ // The cache line size should ideally be queried at run time. 64 is a common
+ // cache line size of x86 CPUs. Web searches showed the cache line size of ARM
+ // CPUs is 32 or 64 bytes. So aligning to 64-byte boundary will work for all
+ // CPUs that we care about, even though it is excessive for some ARM
+ // CPUs.
+ //
+ // On Linux, the cache line size can be looked up with the command:
+ // getconf LEVEL1_DCACHE_LINESIZE
+ kCacheLineSize = 64,
+}; // anonymous enum
+
+enum FrameType : uint8_t {
+ kFrameKey,
+ kFrameInter,
+ kFrameIntraOnly,
+ kFrameSwitch
+};
+
+enum Plane : uint8_t { kPlaneY, kPlaneU, kPlaneV };
+enum : uint8_t { kMaxPlanesMonochrome = kPlaneY + 1, kMaxPlanes = kPlaneV + 1 };
+
+// The plane types, called luma and chroma in the spec.
+enum PlaneType : uint8_t { kPlaneTypeY, kPlaneTypeUV, kNumPlaneTypes };
+
+enum ReferenceFrameType : int8_t {
+ kReferenceFrameNone = -1,
+ kReferenceFrameIntra,
+ kReferenceFrameLast,
+ kReferenceFrameLast2,
+ kReferenceFrameLast3,
+ kReferenceFrameGolden,
+ kReferenceFrameBackward,
+ kReferenceFrameAlternate2,
+ kReferenceFrameAlternate,
+ kNumReferenceFrameTypes,
+ kNumInterReferenceFrameTypes =
+ EnumRangeLength(kReferenceFrameLast, kReferenceFrameAlternate),
+ kNumForwardReferenceTypes =
+ EnumRangeLength(kReferenceFrameLast, kReferenceFrameGolden),
+ kNumBackwardReferenceTypes =
+ EnumRangeLength(kReferenceFrameBackward, kReferenceFrameAlternate)
+};
+
+enum {
+ // Unidirectional compound reference pairs that are signaled explicitly:
+ // {kReferenceFrameLast, kReferenceFrameLast2},
+ // {kReferenceFrameLast, kReferenceFrameLast3},
+ // {kReferenceFrameLast, kReferenceFrameGolden},
+ // {kReferenceFrameBackward, kReferenceFrameAlternate}
+ kExplicitUnidirectionalCompoundReferences = 4,
+ // Other unidirectional compound reference pairs:
+ // {kReferenceFrameLast2, kReferenceFrameLast3},
+ // {kReferenceFrameLast2, kReferenceFrameGolden},
+ // {kReferenceFrameLast3, kReferenceFrameGolden},
+ // {kReferenceFrameBackward, kReferenceFrameAlternate2},
+ // {kReferenceFrameAlternate2, kReferenceFrameAlternate}
+ kUnidirectionalCompoundReferences =
+ kExplicitUnidirectionalCompoundReferences + 5,
+}; // anonymous enum
+
+enum BlockSize : uint8_t {
+ kBlock4x4,
+ kBlock4x8,
+ kBlock4x16,
+ kBlock8x4,
+ kBlock8x8,
+ kBlock8x16,
+ kBlock8x32,
+ kBlock16x4,
+ kBlock16x8,
+ kBlock16x16,
+ kBlock16x32,
+ kBlock16x64,
+ kBlock32x8,
+ kBlock32x16,
+ kBlock32x32,
+ kBlock32x64,
+ kBlock64x16,
+ kBlock64x32,
+ kBlock64x64,
+ kBlock64x128,
+ kBlock128x64,
+ kBlock128x128,
+ kMaxBlockSizes,
+ kBlockInvalid
+};
+
+// Partition types. R: Recursive
+//
+// None Horizontal Vertical Split
+// +-------+ +-------+ +---+---+ +---+---+
+// | | | | | | | | R | R |
+// | | +-------+ | | | +---+---+
+// | | | | | | | | R | R |
+// +-------+ +-------+ +---+---+ +---+---+
+//
+// Horizontal Horizontal Vertical Vertical
+// with top with bottom with left with right
+// split split split split
+// +---+---+ +-------+ +---+---+ +---+---+
+// | | | | | | | | | | |
+// +---+---+ +---+---+ +---+ | | +---+
+// | | | | | | | | | | |
+// +-------+ +---+---+ +---+---+ +---+---+
+//
+// Horizontal4 Vertical4
+// +-----+ +-+-+-+
+// +-----+ | | | |
+// +-----+ | | | |
+// +-----+ +-+-+-+
+enum Partition : uint8_t {
+ kPartitionNone,
+ kPartitionHorizontal,
+ kPartitionVertical,
+ kPartitionSplit,
+ kPartitionHorizontalWithTopSplit,
+ kPartitionHorizontalWithBottomSplit,
+ kPartitionVerticalWithLeftSplit,
+ kPartitionVerticalWithRightSplit,
+ kPartitionHorizontal4,
+ kPartitionVertical4
+};
+enum : uint8_t { kMaxPartitionTypes = kPartitionVertical4 + 1 };
+
+enum PredictionMode : uint8_t {
+ // Intra prediction modes.
+ kPredictionModeDc,
+ kPredictionModeVertical,
+ kPredictionModeHorizontal,
+ kPredictionModeD45,
+ kPredictionModeD135,
+ kPredictionModeD113,
+ kPredictionModeD157,
+ kPredictionModeD203,
+ kPredictionModeD67,
+ kPredictionModeSmooth,
+ kPredictionModeSmoothVertical,
+ kPredictionModeSmoothHorizontal,
+ kPredictionModePaeth,
+ kPredictionModeChromaFromLuma,
+ // Single inter prediction modes.
+ kPredictionModeNearestMv,
+ kPredictionModeNearMv,
+ kPredictionModeGlobalMv,
+ kPredictionModeNewMv,
+ // Compound inter prediction modes.
+ kPredictionModeNearestNearestMv,
+ kPredictionModeNearNearMv,
+ kPredictionModeNearestNewMv,
+ kPredictionModeNewNearestMv,
+ kPredictionModeNearNewMv,
+ kPredictionModeNewNearMv,
+ kPredictionModeGlobalGlobalMv,
+ kPredictionModeNewNewMv,
+ kNumPredictionModes,
+ kNumCompoundInterPredictionModes =
+ EnumRangeLength(kPredictionModeNearestNearestMv, kPredictionModeNewNewMv),
+ kIntraPredictionModesY =
+ EnumRangeLength(kPredictionModeDc, kPredictionModePaeth),
+ kIntraPredictionModesUV =
+ EnumRangeLength(kPredictionModeDc, kPredictionModeChromaFromLuma),
+ kPredictionModeInvalid = 255
+};
+
+enum InterIntraMode : uint8_t {
+ kInterIntraModeDc,
+ kInterIntraModeVertical,
+ kInterIntraModeHorizontal,
+ kInterIntraModeSmooth,
+ kNumInterIntraModes
+};
+
+enum MotionMode : uint8_t {
+ kMotionModeSimple,
+ kMotionModeObmc, // Overlapped block motion compensation.
+ kMotionModeLocalWarp,
+ kNumMotionModes
+};
+
+enum TxMode : uint8_t {
+ kTxModeOnly4x4,
+ kTxModeLargest,
+ kTxModeSelect,
+ kNumTxModes
+};
+
+// These enums are named as kType1Type2 where Type1 is the transform type for
+// the rows and Type2 is the transform type for the columns.
+enum TransformType : uint8_t {
+ kTransformTypeDctDct,
+ kTransformTypeAdstDct,
+ kTransformTypeDctAdst,
+ kTransformTypeAdstAdst,
+ kTransformTypeFlipadstDct,
+ kTransformTypeDctFlipadst,
+ kTransformTypeFlipadstFlipadst,
+ kTransformTypeAdstFlipadst,
+ kTransformTypeFlipadstAdst,
+ kTransformTypeIdentityIdentity,
+ kTransformTypeIdentityDct,
+ kTransformTypeDctIdentity,
+ kTransformTypeIdentityAdst,
+ kTransformTypeAdstIdentity,
+ kTransformTypeIdentityFlipadst,
+ kTransformTypeFlipadstIdentity,
+ kNumTransformTypes
+};
+
+constexpr BitMaskSet kTransformFlipColumnsMask(kTransformTypeFlipadstDct,
+ kTransformTypeFlipadstAdst,
+ kTransformTypeFlipadstIdentity,
+ kTransformTypeFlipadstFlipadst);
+constexpr BitMaskSet kTransformFlipRowsMask(kTransformTypeDctFlipadst,
+ kTransformTypeAdstFlipadst,
+ kTransformTypeIdentityFlipadst,
+ kTransformTypeFlipadstFlipadst);
+
+enum TransformSize : uint8_t {
+ kTransformSize4x4,
+ kTransformSize4x8,
+ kTransformSize4x16,
+ kTransformSize8x4,
+ kTransformSize8x8,
+ kTransformSize8x16,
+ kTransformSize8x32,
+ kTransformSize16x4,
+ kTransformSize16x8,
+ kTransformSize16x16,
+ kTransformSize16x32,
+ kTransformSize16x64,
+ kTransformSize32x8,
+ kTransformSize32x16,
+ kTransformSize32x32,
+ kTransformSize32x64,
+ kTransformSize64x16,
+ kTransformSize64x32,
+ kTransformSize64x64,
+ kNumTransformSizes
+};
+
+enum TransformSet : uint8_t {
+ // DCT Only (1).
+ kTransformSetDctOnly,
+ // 2D-DCT and 2D-ADST without flip (4) + Identity (1) + 1D Horizontal/Vertical
+ // DCT (2) = Total (7).
+ kTransformSetIntra1,
+ // 2D-DCT and 2D-ADST without flip (4) + Identity (1) = Total (5).
+ kTransformSetIntra2,
+ // All transforms = Total (16).
+ kTransformSetInter1,
+ // 2D-DCT and 2D-ADST with flip (9) + Identity (1) + 1D Horizontal/Vertical
+ // DCT (2) = Total (12).
+ kTransformSetInter2,
+ // DCT (1) + Identity (1) = Total (2).
+ kTransformSetInter3,
+ kNumTransformSets
+};
+
+enum TransformClass : uint8_t {
+ kTransformClass2D,
+ kTransformClassHorizontal,
+ kTransformClassVertical,
+ kNumTransformClasses
+};
+
+enum FilterIntraPredictor : uint8_t {
+ kFilterIntraPredictorDc,
+ kFilterIntraPredictorVertical,
+ kFilterIntraPredictorHorizontal,
+ kFilterIntraPredictorD157,
+ kFilterIntraPredictorPaeth,
+ kNumFilterIntraPredictors
+};
+
+enum ObmcDirection : uint8_t {
+ kObmcDirectionVertical,
+ kObmcDirectionHorizontal,
+ kNumObmcDirections
+};
+
+// In AV1 the name of the filter refers to the direction of filter application.
+// Horizontal refers to the column edge and vertical the row edge.
+enum LoopFilterType : uint8_t {
+ kLoopFilterTypeVertical,
+ kLoopFilterTypeHorizontal,
+ kNumLoopFilterTypes
+};
+
+enum LoopFilterTransformSizeId : uint8_t {
+ kLoopFilterTransformSizeId4x4,
+ kLoopFilterTransformSizeId8x8,
+ kLoopFilterTransformSizeId16x16,
+ kNumLoopFilterTransformSizeIds
+};
+
+enum LoopRestorationType : uint8_t {
+ kLoopRestorationTypeNone,
+ kLoopRestorationTypeSwitchable,
+ kLoopRestorationTypeWiener,
+ kLoopRestorationTypeSgrProj, // self guided projection filter.
+ kNumLoopRestorationTypes
+};
+
+enum CompoundReferenceType : uint8_t {
+ kCompoundReferenceUnidirectional,
+ kCompoundReferenceBidirectional,
+ kNumCompoundReferenceTypes
+};
+
+enum CompoundPredictionType : uint8_t {
+ kCompoundPredictionTypeWedge,
+ kCompoundPredictionTypeDiffWeighted,
+ kCompoundPredictionTypeAverage,
+ kCompoundPredictionTypeIntra,
+ kCompoundPredictionTypeDistance,
+ kNumCompoundPredictionTypes,
+ // Number of compound prediction types that are explicitly signaled in the
+ // bitstream (in the compound_type syntax element).
+ kNumExplicitCompoundPredictionTypes = 2
+};
+
+enum InterpolationFilter : uint8_t {
+ kInterpolationFilterEightTap,
+ kInterpolationFilterEightTapSmooth,
+ kInterpolationFilterEightTapSharp,
+ kInterpolationFilterBilinear,
+ kInterpolationFilterSwitchable,
+ kNumInterpolationFilters,
+ // Number of interpolation filters that can be explicitly signaled in the
+ // compressed headers (when the uncompressed headers allow switchable
+ // interpolation filters) of the bitstream.
+ kNumExplicitInterpolationFilters = EnumRangeLength(
+ kInterpolationFilterEightTap, kInterpolationFilterEightTapSharp)
+};
+
+enum MvJointType : uint8_t {
+ kMvJointTypeZero,
+ kMvJointTypeHorizontalNonZeroVerticalZero,
+ kMvJointTypeHorizontalZeroVerticalNonZero,
+ kMvJointTypeNonZero,
+ kNumMvJointTypes
+};
+
+enum ObuType : int8_t {
+ kObuInvalid = -1,
+ kObuSequenceHeader = 1,
+ kObuTemporalDelimiter = 2,
+ kObuFrameHeader = 3,
+ kObuTileGroup = 4,
+ kObuMetadata = 5,
+ kObuFrame = 6,
+ kObuRedundantFrameHeader = 7,
+ kObuTileList = 8,
+ kObuPadding = 15,
+};
+
+//------------------------------------------------------------------------------
+// ToString()
+//
+// These functions are meant to be used only in debug logging and within tests.
+// They are defined inline to avoid including the strings in the release
+// library when logging is disabled; unreferenced functions will not be added to
+// any object file in that case.
+
+inline const char* ToString(const BlockSize size) {
+ switch (size) {
+ case kBlock4x4:
+ return "kBlock4x4";
+ case kBlock4x8:
+ return "kBlock4x8";
+ case kBlock4x16:
+ return "kBlock4x16";
+ case kBlock8x4:
+ return "kBlock8x4";
+ case kBlock8x8:
+ return "kBlock8x8";
+ case kBlock8x16:
+ return "kBlock8x16";
+ case kBlock8x32:
+ return "kBlock8x32";
+ case kBlock16x4:
+ return "kBlock16x4";
+ case kBlock16x8:
+ return "kBlock16x8";
+ case kBlock16x16:
+ return "kBlock16x16";
+ case kBlock16x32:
+ return "kBlock16x32";
+ case kBlock16x64:
+ return "kBlock16x64";
+ case kBlock32x8:
+ return "kBlock32x8";
+ case kBlock32x16:
+ return "kBlock32x16";
+ case kBlock32x32:
+ return "kBlock32x32";
+ case kBlock32x64:
+ return "kBlock32x64";
+ case kBlock64x16:
+ return "kBlock64x16";
+ case kBlock64x32:
+ return "kBlock64x32";
+ case kBlock64x64:
+ return "kBlock64x64";
+ case kBlock64x128:
+ return "kBlock64x128";
+ case kBlock128x64:
+ return "kBlock128x64";
+ case kBlock128x128:
+ return "kBlock128x128";
+ case kMaxBlockSizes:
+ return "kMaxBlockSizes";
+ case kBlockInvalid:
+ return "kBlockInvalid";
+ }
+ abort();
+}
+
+inline const char* ToString(const InterIntraMode mode) {
+ switch (mode) {
+ case kInterIntraModeDc:
+ return "kInterIntraModeDc";
+ case kInterIntraModeVertical:
+ return "kInterIntraModeVertical";
+ case kInterIntraModeHorizontal:
+ return "kInterIntraModeHorizontal";
+ case kInterIntraModeSmooth:
+ return "kInterIntraModeSmooth";
+ case kNumInterIntraModes:
+ return "kNumInterIntraModes";
+ }
+ abort();
+}
+
+inline const char* ToString(const ObmcDirection direction) {
+ switch (direction) {
+ case kObmcDirectionVertical:
+ return "kObmcDirectionVertical";
+ case kObmcDirectionHorizontal:
+ return "kObmcDirectionHorizontal";
+ case kNumObmcDirections:
+ return "kNumObmcDirections";
+ }
+ abort();
+}
+
+inline const char* ToString(const LoopRestorationType type) {
+ switch (type) {
+ case kLoopRestorationTypeNone:
+ return "kLoopRestorationTypeNone";
+ case kLoopRestorationTypeSwitchable:
+ return "kLoopRestorationTypeSwitchable";
+ case kLoopRestorationTypeWiener:
+ return "kLoopRestorationTypeWiener";
+ case kLoopRestorationTypeSgrProj:
+ return "kLoopRestorationTypeSgrProj";
+ case kNumLoopRestorationTypes:
+ return "kNumLoopRestorationTypes";
+ }
+ abort();
+}
+
+inline const char* ToString(const TransformType type) {
+ switch (type) {
+ case kTransformTypeDctDct:
+ return "kTransformTypeDctDct";
+ case kTransformTypeAdstDct:
+ return "kTransformTypeAdstDct";
+ case kTransformTypeDctAdst:
+ return "kTransformTypeDctAdst";
+ case kTransformTypeAdstAdst:
+ return "kTransformTypeAdstAdst";
+ case kTransformTypeFlipadstDct:
+ return "kTransformTypeFlipadstDct";
+ case kTransformTypeDctFlipadst:
+ return "kTransformTypeDctFlipadst";
+ case kTransformTypeFlipadstFlipadst:
+ return "kTransformTypeFlipadstFlipadst";
+ case kTransformTypeAdstFlipadst:
+ return "kTransformTypeAdstFlipadst";
+ case kTransformTypeFlipadstAdst:
+ return "kTransformTypeFlipadstAdst";
+ case kTransformTypeIdentityIdentity:
+ return "kTransformTypeIdentityIdentity";
+ case kTransformTypeIdentityDct:
+ return "kTransformTypeIdentityDct";
+ case kTransformTypeDctIdentity:
+ return "kTransformTypeDctIdentity";
+ case kTransformTypeIdentityAdst:
+ return "kTransformTypeIdentityAdst";
+ case kTransformTypeAdstIdentity:
+ return "kTransformTypeAdstIdentity";
+ case kTransformTypeIdentityFlipadst:
+ return "kTransformTypeIdentityFlipadst";
+ case kTransformTypeFlipadstIdentity:
+ return "kTransformTypeFlipadstIdentity";
+ // case to quiet compiler
+ case kNumTransformTypes:
+ return "kNumTransformTypes";
+ }
+ abort();
+}
+
+//------------------------------------------------------------------------------
+
+extern const uint8_t k4x4WidthLog2[kMaxBlockSizes];
+
+extern const uint8_t k4x4HeightLog2[kMaxBlockSizes];
+
+extern const uint8_t kNum4x4BlocksWide[kMaxBlockSizes];
+
+extern const uint8_t kNum4x4BlocksHigh[kMaxBlockSizes];
+
+extern const uint8_t kBlockWidthPixels[kMaxBlockSizes];
+
+extern const uint8_t kBlockHeightPixels[kMaxBlockSizes];
+
+extern const BlockSize kSubSize[kMaxPartitionTypes][kMaxBlockSizes];
+
+extern const BlockSize kPlaneResidualSize[kMaxBlockSizes][2][2];
+
+extern const int16_t kProjectionMvDivisionLookup[kMaxFrameDistance + 1];
+
+extern const uint8_t kTransformWidth[kNumTransformSizes];
+
+extern const uint8_t kTransformHeight[kNumTransformSizes];
+
+extern const uint8_t kTransformWidth4x4[kNumTransformSizes];
+
+extern const uint8_t kTransformHeight4x4[kNumTransformSizes];
+
+extern const uint8_t kTransformWidthLog2[kNumTransformSizes];
+
+extern const uint8_t kTransformHeightLog2[kNumTransformSizes];
+
+extern const TransformSize kSplitTransformSize[kNumTransformSizes];
+
+// Square transform of size min(w,h).
+extern const TransformSize kTransformSizeSquareMin[kNumTransformSizes];
+
+// Square transform of size max(w,h).
+extern const TransformSize kTransformSizeSquareMax[kNumTransformSizes];
+
+extern const uint8_t kNumTransformTypesInSet[kNumTransformSets];
+
+extern const uint8_t kSgrProjParams[1 << kSgrProjParamsBits][4];
+
+extern const int8_t kSgrProjMultiplierMin[2];
+
+extern const int8_t kSgrProjMultiplierMax[2];
+
+extern const int8_t kWienerTapsMin[3];
+
+extern const int8_t kWienerTapsMax[3];
+
+extern const uint8_t kUpscaleFilterUnsigned[kSuperResFilterShifts]
+ [kSuperResFilterTaps];
+
+// An int8_t version of the kWarpedFilters array.
+// Note: The array could be removed with a performance penalty.
+extern const int8_t kWarpedFilters8[3 * kWarpedPixelPrecisionShifts + 1][8];
+
+extern const int16_t kWarpedFilters[3 * kWarpedPixelPrecisionShifts + 1][8];
+
+extern const int8_t kHalfSubPixelFilters[6][16][8];
+
+extern const uint8_t kAbsHalfSubPixelFilters[6][16][8];
+
+extern const int16_t kDirectionalIntraPredictorDerivative[44];
+
+extern const uint8_t kDeblockFilterLevelIndex[kMaxPlanes][kNumLoopFilterTypes];
+
+} // namespace libgav1
+
+#endif // LIBGAV1_SRC_UTILS_CONSTANTS_H_
diff --git a/src/utils/cpu.cc b/src/utils/cpu.cc
new file mode 100644
index 0000000..a6b7057
--- /dev/null
+++ b/src/utils/cpu.cc
@@ -0,0 +1,84 @@
+// Copyright 2019 The libgav1 Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "src/utils/cpu.h"
+
+#if defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__))
+#include <cpuid.h>
+#elif defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_X64))
+#include <immintrin.h> // _xgetbv
+#include <intrin.h>
+#endif
+
+namespace libgav1 {
+
+#if defined(__i386__) || defined(__x86_64__) || defined(_M_IX86) || \
+ defined(_M_X64)
+namespace {
+
+#if defined(__GNUC__)
+void CpuId(int leaf, uint32_t info[4]) {
+ __cpuid_count(leaf, 0 /*ecx=subleaf*/, info[0], info[1], info[2], info[3]);
+}
+
+uint64_t Xgetbv() {
+ const uint32_t ecx = 0; // ecx specifies the extended control register
+ uint32_t eax;
+ uint32_t edx;
+ __asm__ volatile("xgetbv" : "=a"(eax), "=d"(edx) : "c"(ecx));
+ return (static_cast<uint64_t>(edx) << 32) | eax;
+}
+#else // _MSC_VER
+void CpuId(int leaf, uint32_t info[4]) {
+ __cpuidex(reinterpret_cast<int*>(info), leaf, 0 /*ecx=subleaf*/);
+}
+
+uint64_t Xgetbv() { return _xgetbv(0); }
+#endif // __GNUC__
+
+} // namespace
+
+uint32_t GetCpuInfo() {
+ uint32_t info[4];
+
+ // Get the highest feature value cpuid supports
+ CpuId(0, info);
+ const int max_cpuid_value = info[0];
+ if (max_cpuid_value < 1) return 0;
+
+ CpuId(1, info);
+ uint32_t features = 0;
+ if ((info[3] & (1 << 26)) != 0) features |= kSSE2;
+ if ((info[2] & (1 << 9)) != 0) features |= kSSSE3;
+ if ((info[2] & (1 << 19)) != 0) features |= kSSE4_1;
+
+ // Bits 27 (OSXSAVE) & 28 (256-bit AVX)
+ if ((info[2] & (3 << 27)) == (3 << 27)) {
+ // XMM state and YMM state enabled by the OS
+ if ((Xgetbv() & 0x6) == 0x6) {
+ features |= kAVX;
+ if (max_cpuid_value >= 7) {
+ CpuId(7, info);
+ if ((info[1] & (1 << 5)) != 0) features |= kAVX2;
+ }
+ }
+ }
+
+ return features;
+}
+#else
+uint32_t GetCpuInfo() { return 0; }
+#endif // x86 || x86_64
+
+} // namespace libgav1
diff --git a/src/utils/cpu.h b/src/utils/cpu.h
new file mode 100644
index 0000000..630b251
--- /dev/null
+++ b/src/utils/cpu.h
@@ -0,0 +1,107 @@
+/*
+ * Copyright 2019 The libgav1 Authors
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LIBGAV1_SRC_UTILS_CPU_H_
+#define LIBGAV1_SRC_UTILS_CPU_H_
+
+#include <cstdint>
+
+namespace libgav1 {
+
+#if defined(__i386__) || defined(__x86_64__)
+#define LIBGAV1_X86
+#elif defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_X64))
+#define LIBGAV1_X86
+#define LIBGAV1_X86_MSVC
+#endif
+
+#if defined(LIBGAV1_X86)
+
+#if !defined(LIBGAV1_ENABLE_SSE4_1)
+#define LIBGAV1_ENABLE_SSE4_1 1
+#endif
+
+#if LIBGAV1_ENABLE_SSE4_1
+#if !defined(LIBGAV1_ENABLE_AVX2)
+#define LIBGAV1_ENABLE_AVX2 1
+#endif // !defined(LIBGAV1_ENABLE_AVX2)
+#else // !LIBGAV1_ENABLE_SSE4_1
+// Disable AVX2 when SSE4.1 is disabled as it may rely on shared components.
+#undef LIBGAV1_ENABLE_AVX2
+#define LIBGAV1_ENABLE_AVX2 0
+#endif // LIBGAV1_ENABLE_SSE4_1
+
+#else // !LIBGAV1_X86
+
+#undef LIBGAV1_ENABLE_AVX2
+#define LIBGAV1_ENABLE_AVX2 0
+#undef LIBGAV1_ENABLE_SSE4_1
+#define LIBGAV1_ENABLE_SSE4_1 0
+
+#endif // LIBGAV1_X86
+
+// For x86 LIBGAV1_TARGETING_* indicate the source being built is targeting
+// (at least) that instruction set. This prevents disabling other instruction
+// sets if the current instruction set isn't a global target, e.g., building
+// *_avx2.cc w/-mavx2, but the remaining files without the flag.
+#if LIBGAV1_ENABLE_AVX2 && defined(__AVX2__)
+#define LIBGAV1_TARGETING_AVX2 1
+#else
+#define LIBGAV1_TARGETING_AVX2 0
+#endif
+
+// Note: LIBGAV1_X86_MSVC isn't completely correct for Visual Studio, but there
+// is no equivalent to __SSE4_1__. LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS will be
+// enabled in dsp.h to compensate for this.
+#if LIBGAV1_ENABLE_SSE4_1 && (defined(__SSE4_1__) || defined(LIBGAV1_X86_MSVC))
+#define LIBGAV1_TARGETING_SSE4_1 1
+#else
+#define LIBGAV1_TARGETING_SSE4_1 0
+#endif
+
+#undef LIBGAV1_X86
+
+#if !defined(LIBGAV1_ENABLE_NEON)
+// TODO(jzern): add support for _M_ARM64.
+#if defined(__ARM_NEON__) || defined(__aarch64__) || \
+ (defined(_MSC_VER) && defined(_M_ARM))
+#define LIBGAV1_ENABLE_NEON 1
+#else
+#define LIBGAV1_ENABLE_NEON 0
+#endif
+#endif // !defined(LIBGAV1_ENABLE_NEON)
+
+enum CpuFeatures : uint8_t {
+ kSSE2 = 1 << 0,
+#define LIBGAV1_CPU_SSE2 (1 << 0)
+ kSSSE3 = 1 << 1,
+#define LIBGAV1_CPU_SSSE3 (1 << 1)
+ kSSE4_1 = 1 << 2,
+#define LIBGAV1_CPU_SSE4_1 (1 << 2)
+ kAVX = 1 << 3,
+#define LIBGAV1_CPU_AVX (1 << 3)
+ kAVX2 = 1 << 4,
+#define LIBGAV1_CPU_AVX2 (1 << 4)
+ kNEON = 1 << 5,
+#define LIBGAV1_CPU_NEON (1 << 5)
+};
+
+// Returns a bit-wise OR of CpuFeatures supported by this platform.
+uint32_t GetCpuInfo();
+
+} // namespace libgav1
+
+#endif // LIBGAV1_SRC_UTILS_CPU_H_
diff --git a/src/utils/dynamic_buffer.h b/src/utils/dynamic_buffer.h
new file mode 100644
index 0000000..b51345a
--- /dev/null
+++ b/src/utils/dynamic_buffer.h
@@ -0,0 +1,82 @@
+/*
+ * Copyright 2020 The libgav1 Authors
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LIBGAV1_SRC_UTILS_DYNAMIC_BUFFER_H_
+#define LIBGAV1_SRC_UTILS_DYNAMIC_BUFFER_H_
+
+#include <memory>
+#include <new>
+
+#include "src/utils/memory.h"
+
+namespace libgav1 {
+
+template <typename T>
+class DynamicBuffer {
+ public:
+ T* get() { return buffer_.get(); }
+ const T* get() const { return buffer_.get(); }
+
+ // Resizes the buffer so that it can hold at least |size| elements. Existing
+ // contents will be destroyed when resizing to a larger size.
+ //
+ // Returns true on success. If Resize() returns false, then subsequent calls
+ // to get() will return nullptr.
+ bool Resize(size_t size) {
+ if (size <= size_) return true;
+ buffer_.reset(new (std::nothrow) T[size]);
+ if (buffer_ == nullptr) {
+ size_ = 0;
+ return false;
+ }
+ size_ = size;
+ return true;
+ }
+
+ private:
+ std::unique_ptr<T[]> buffer_;
+ size_t size_ = 0;
+};
+
+template <typename T, int alignment>
+class AlignedDynamicBuffer {
+ public:
+ T* get() { return buffer_.get(); }
+
+ // Resizes the buffer so that it can hold at least |size| elements. Existing
+ // contents will be destroyed when resizing to a larger size.
+ //
+ // Returns true on success. If Resize() returns false, then subsequent calls
+ // to get() will return nullptr.
+ bool Resize(size_t size) {
+ if (size <= size_) return true;
+ buffer_ = MakeAlignedUniquePtr<T>(alignment, size);
+ if (buffer_ == nullptr) {
+ size_ = 0;
+ return false;
+ }
+ size_ = size;
+ return true;
+ }
+
+ private:
+ AlignedUniquePtr<T> buffer_;
+ size_t size_ = 0;
+};
+
+} // namespace libgav1
+
+#endif // LIBGAV1_SRC_UTILS_DYNAMIC_BUFFER_H_
diff --git a/src/utils/entropy_decoder.cc b/src/utils/entropy_decoder.cc
new file mode 100644
index 0000000..bf21199
--- /dev/null
+++ b/src/utils/entropy_decoder.cc
@@ -0,0 +1,1117 @@
+// Copyright 2019 The libgav1 Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "src/utils/entropy_decoder.h"
+
+#include <cassert>
+#include <cstring>
+
+#include "src/utils/common.h"
+#include "src/utils/compiler_attributes.h"
+#include "src/utils/constants.h"
+#include "src/utils/cpu.h"
+
+#if defined(__ARM_NEON__) || defined(__aarch64__) || \
+ (defined(_MSC_VER) && defined(_M_ARM))
+#define LIBGAV1_ENTROPY_DECODER_ENABLE_NEON 1
+#else
+#define LIBGAV1_ENTROPY_DECODER_ENABLE_NEON 0
+#endif
+
+#if LIBGAV1_ENTROPY_DECODER_ENABLE_NEON
+#include <arm_neon.h>
+#endif
+
+#if defined(__SSE2__) || defined(LIBGAV1_X86_MSVC)
+#define LIBGAV1_ENTROPY_DECODER_ENABLE_SSE2 1
+#else
+#define LIBGAV1_ENTROPY_DECODER_ENABLE_SSE2 0
+#endif
+
+#if LIBGAV1_ENTROPY_DECODER_ENABLE_SSE2
+#include <emmintrin.h>
+#endif
+
+namespace libgav1 {
+namespace {
+
+constexpr uint32_t kReadBitMask = ~255;
+constexpr int kCdfPrecision = 6;
+constexpr int kMinimumProbabilityPerSymbol = 4;
+
+// This function computes the "cur" variable as specified inside the do-while
+// loop in Section 8.2.6 of the spec. This function is monotonically
+// decreasing as the values of index increases (note that the |cdf| array is
+// sorted in decreasing order).
+uint32_t ScaleCdf(uint32_t values_in_range_shifted, const uint16_t* const cdf,
+ int index, int symbol_count) {
+ return ((values_in_range_shifted * (cdf[index] >> kCdfPrecision)) >> 1) +
+ (kMinimumProbabilityPerSymbol * (symbol_count - index));
+}
+
+void UpdateCdf(uint16_t* const cdf, const int symbol_count, const int symbol) {
+ const uint16_t count = cdf[symbol_count];
+ // rate is computed in the spec as:
+ // 3 + ( cdf[N] > 15 ) + ( cdf[N] > 31 ) + Min(FloorLog2(N), 2)
+ // In this case cdf[N] is |count|.
+ // Min(FloorLog2(N), 2) is 1 for symbol_count == {2, 3} and 2 for all
+ // symbol_count > 3. So the equation becomes:
+ // 4 + (count > 15) + (count > 31) + (symbol_count > 3).
+ // Note that the largest value for count is 32 (it is not incremented beyond
+ // 32). So using that information:
+ // count >> 4 is 0 for count from 0 to 15.
+ // count >> 4 is 1 for count from 16 to 31.
+ // count >> 4 is 2 for count == 31.
+ // Now, the equation becomes:
+ // 4 + (count >> 4) + (symbol_count > 3).
+ // Since (count >> 4) can only be 0 or 1 or 2, the addition could be replaced
+ // with bitwise or:
+ // (4 | (count >> 4)) + (symbol_count > 3).
+ // but using addition will allow the compiler to eliminate an operation when
+ // symbol_count is known and this function is inlined.
+ const int rate = (count >> 4) + 4 + static_cast<int>(symbol_count > 3);
+ // Hints for further optimizations:
+ //
+ // 1. clang can vectorize this for loop with width 4, even though the loop
+ // contains an if-else statement. Therefore, it may be advantageous to use
+ // "i < symbol_count" as the loop condition when symbol_count is 8, 12, or 16
+ // (a multiple of 4 that's not too small).
+ //
+ // 2. The for loop can be rewritten in the following form, which would enable
+ // clang to vectorize the loop with width 8:
+ //
+ // const int rounding = (1 << rate) - 1;
+ // for (int i = 0; i < symbol_count - 1; ++i) {
+ // const uint16_t a = (i < symbol) ? kCdfMaxProbability : rounding;
+ // cdf[i] += static_cast<int16_t>(a - cdf[i]) >> rate;
+ // }
+ //
+ // The subtraction (a - cdf[i]) relies on the overflow semantics of unsigned
+ // integer arithmetic. The result of the unsigned subtraction is cast to a
+ // signed integer and right-shifted. This requires the right shift of a
+ // signed integer be an arithmetic shift, which is true for clang, gcc, and
+ // Visual C++.
+ assert(symbol_count - 1 > 0);
+ int i = 0;
+ do {
+ if (i < symbol) {
+ cdf[i] += (kCdfMaxProbability - cdf[i]) >> rate;
+ } else {
+ cdf[i] -= cdf[i] >> rate;
+ }
+ } while (++i < symbol_count - 1);
+ cdf[symbol_count] += static_cast<uint16_t>(count < 32);
+}
+
+// Define the UpdateCdfN functions. UpdateCdfN is a specialized implementation
+// of UpdateCdf based on the fact that symbol_count == N. UpdateCdfN uses the
+// SIMD instruction sets if available.
+
+#if LIBGAV1_ENTROPY_DECODER_ENABLE_NEON
+
+// The UpdateCdf() method contains the following for loop:
+//
+// for (int i = 0; i < symbol_count - 1; ++i) {
+// if (i < symbol) {
+// cdf[i] += (kCdfMaxProbability - cdf[i]) >> rate;
+// } else {
+// cdf[i] -= cdf[i] >> rate;
+// }
+// }
+//
+// It can be rewritten in the following two forms, which are amenable to SIMD
+// implementations:
+//
+// const int rounding = (1 << rate) - 1;
+// for (int i = 0; i < symbol_count - 1; ++i) {
+// const uint16_t a = (i < symbol) ? kCdfMaxProbability : rounding;
+// cdf[i] += static_cast<int16_t>(a - cdf[i]) >> rate;
+// }
+//
+// or:
+//
+// const int rounding = (1 << rate) - 1;
+// for (int i = 0; i < symbol_count - 1; ++i) {
+// const uint16_t a = (i < symbol) ? (kCdfMaxProbability - rounding) : 0;
+// cdf[i] -= static_cast<int16_t>(cdf[i] - a) >> rate;
+// }
+//
+// The following ARM NEON implementations use a modified version of the first
+// form, using the comparison mask and unsigned rollover to avoid the need to
+// calculate rounding.
+//
+// The cdf array has symbol_count + 1 elements. The first symbol_count elements
+// are the CDF. The last element is a count that is initialized to 0 and may
+// grow up to 32. The for loop in UpdateCdf updates the CDF in the array. Since
+// cdf[symbol_count - 1] is always 0, the for loop does not update
+// cdf[symbol_count - 1]. However, it would be correct to have the for loop
+// update cdf[symbol_count - 1] anyway: since symbol_count - 1 >= symbol, the
+// for loop would take the else branch when i is symbol_count - 1:
+// cdf[i] -= cdf[i] >> rate;
+// Since cdf[symbol_count - 1] is 0, cdf[symbol_count - 1] would still be 0
+// after the update. The ARM NEON implementations take advantage of this in the
+// following two cases:
+// 1. When symbol_count is 8 or 16, the vectorized code updates the first
+// symbol_count elements in the array.
+// 2. When symbol_count is 7, the vectorized code updates all the 8 elements in
+// the cdf array. Since an invalid CDF value is written into cdf[7], the
+// count in cdf[7] needs to be fixed up after the vectorized code.
+
+void UpdateCdf5(uint16_t* const cdf, const int symbol) {
+ uint16x4_t cdf_vec = vld1_u16(cdf);
+ const uint16_t count = cdf[5];
+ const int rate = (count >> 4) + 5;
+ const uint16x4_t cdf_max_probability = vdup_n_u16(kCdfMaxProbability);
+ const uint16x4_t index = vcreate_u16(0x0003000200010000);
+ const uint16x4_t symbol_vec = vdup_n_u16(symbol);
+ const uint16x4_t mask = vcge_u16(index, symbol_vec);
+ // i < symbol: 32768, i >= symbol: 65535.
+ const uint16x4_t a = vorr_u16(mask, cdf_max_probability);
+ // i < symbol: 32768 - cdf, i >= symbol: 65535 - cdf.
+ const int16x4_t diff = vreinterpret_s16_u16(vsub_u16(a, cdf_vec));
+ // i < symbol: cdf - 0, i >= symbol: cdf - 65535.
+ const uint16x4_t cdf_offset = vsub_u16(cdf_vec, mask);
+ const int16x4_t negative_rate = vdup_n_s16(-rate);
+ // i < symbol: (32768 - cdf) >> rate, i >= symbol: (65535 (-1) - cdf) >> rate.
+ const uint16x4_t delta = vreinterpret_u16_s16(vshl_s16(diff, negative_rate));
+ // i < symbol: (cdf - 0) + ((32768 - cdf) >> rate).
+ // i >= symbol: (cdf - 65535) + ((65535 - cdf) >> rate).
+ cdf_vec = vadd_u16(cdf_offset, delta);
+ vst1_u16(cdf, cdf_vec);
+ cdf[5] = count + static_cast<uint16_t>(count < 32);
+}
+
+// This version works for |symbol_count| = 7, 8, or 9.
+// See UpdateCdf5 for implementation details.
+template <int symbol_count>
+void UpdateCdf7To9(uint16_t* const cdf, const int symbol) {
+ static_assert(symbol_count >= 7 && symbol_count <= 9, "");
+ uint16x8_t cdf_vec = vld1q_u16(cdf);
+ const uint16_t count = cdf[symbol_count];
+ const int rate = (count >> 4) + 5;
+ const uint16x8_t cdf_max_probability = vdupq_n_u16(kCdfMaxProbability);
+ const uint16x8_t index = vcombine_u16(vcreate_u16(0x0003000200010000),
+ vcreate_u16(0x0007000600050004));
+ const uint16x8_t symbol_vec = vdupq_n_u16(symbol);
+ const uint16x8_t mask = vcgeq_u16(index, symbol_vec);
+ const uint16x8_t a = vorrq_u16(mask, cdf_max_probability);
+ const int16x8_t diff = vreinterpretq_s16_u16(vsubq_u16(a, cdf_vec));
+ const uint16x8_t cdf_offset = vsubq_u16(cdf_vec, mask);
+ const int16x8_t negative_rate = vdupq_n_s16(-rate);
+ const uint16x8_t delta =
+ vreinterpretq_u16_s16(vshlq_s16(diff, negative_rate));
+ cdf_vec = vaddq_u16(cdf_offset, delta);
+ vst1q_u16(cdf, cdf_vec);
+ cdf[symbol_count] = count + static_cast<uint16_t>(count < 32);
+}
+
+void UpdateCdf7(uint16_t* const cdf, const int symbol) {
+ UpdateCdf7To9<7>(cdf, symbol);
+}
+
+void UpdateCdf8(uint16_t* const cdf, const int symbol) {
+ UpdateCdf7To9<8>(cdf, symbol);
+}
+
+void UpdateCdf9(uint16_t* const cdf, const int symbol) {
+ UpdateCdf7To9<9>(cdf, symbol);
+}
+
+// See UpdateCdf5 for implementation details.
+void UpdateCdf11(uint16_t* const cdf, const int symbol) {
+ uint16x8_t cdf_vec = vld1q_u16(cdf + 2);
+ const uint16_t count = cdf[11];
+ cdf[11] = count + static_cast<uint16_t>(count < 32);
+ const int rate = (count >> 4) + 5;
+ if (symbol > 1) {
+ cdf[0] += (kCdfMaxProbability - cdf[0]) >> rate;
+ cdf[1] += (kCdfMaxProbability - cdf[1]) >> rate;
+ const uint16x8_t cdf_max_probability = vdupq_n_u16(kCdfMaxProbability);
+ const uint16x8_t symbol_vec = vdupq_n_u16(symbol);
+ const int16x8_t negative_rate = vdupq_n_s16(-rate);
+ const uint16x8_t index = vcombine_u16(vcreate_u16(0x0005000400030002),
+ vcreate_u16(0x0009000800070006));
+ const uint16x8_t mask = vcgeq_u16(index, symbol_vec);
+ const uint16x8_t a = vorrq_u16(mask, cdf_max_probability);
+ const int16x8_t diff = vreinterpretq_s16_u16(vsubq_u16(a, cdf_vec));
+ const uint16x8_t cdf_offset = vsubq_u16(cdf_vec, mask);
+ const uint16x8_t delta =
+ vreinterpretq_u16_s16(vshlq_s16(diff, negative_rate));
+ cdf_vec = vaddq_u16(cdf_offset, delta);
+ vst1q_u16(cdf + 2, cdf_vec);
+ } else {
+ if (symbol != 0) {
+ cdf[0] += (kCdfMaxProbability - cdf[0]) >> rate;
+ cdf[1] -= cdf[1] >> rate;
+ } else {
+ cdf[0] -= cdf[0] >> rate;
+ cdf[1] -= cdf[1] >> rate;
+ }
+ const int16x8_t negative_rate = vdupq_n_s16(-rate);
+ const uint16x8_t delta = vshlq_u16(cdf_vec, negative_rate);
+ cdf_vec = vsubq_u16(cdf_vec, delta);
+ vst1q_u16(cdf + 2, cdf_vec);
+ }
+}
+
+// See UpdateCdf5 for implementation details.
+void UpdateCdf13(uint16_t* const cdf, const int symbol) {
+ uint16x8_t cdf_vec0 = vld1q_u16(cdf);
+ uint16x8_t cdf_vec1 = vld1q_u16(cdf + 4);
+ const uint16_t count = cdf[13];
+ const int rate = (count >> 4) + 5;
+ const uint16x8_t cdf_max_probability = vdupq_n_u16(kCdfMaxProbability);
+ const uint16x8_t symbol_vec = vdupq_n_u16(symbol);
+ const int16x8_t negative_rate = vdupq_n_s16(-rate);
+
+ uint16x8_t index = vcombine_u16(vcreate_u16(0x0003000200010000),
+ vcreate_u16(0x0007000600050004));
+ uint16x8_t mask = vcgeq_u16(index, symbol_vec);
+ uint16x8_t a = vorrq_u16(mask, cdf_max_probability);
+ int16x8_t diff = vreinterpretq_s16_u16(vsubq_u16(a, cdf_vec0));
+ uint16x8_t cdf_offset = vsubq_u16(cdf_vec0, mask);
+ uint16x8_t delta = vreinterpretq_u16_s16(vshlq_s16(diff, negative_rate));
+ cdf_vec0 = vaddq_u16(cdf_offset, delta);
+ vst1q_u16(cdf, cdf_vec0);
+
+ index = vcombine_u16(vcreate_u16(0x0007000600050004),
+ vcreate_u16(0x000b000a00090008));
+ mask = vcgeq_u16(index, symbol_vec);
+ a = vorrq_u16(mask, cdf_max_probability);
+ diff = vreinterpretq_s16_u16(vsubq_u16(a, cdf_vec1));
+ cdf_offset = vsubq_u16(cdf_vec1, mask);
+ delta = vreinterpretq_u16_s16(vshlq_s16(diff, negative_rate));
+ cdf_vec1 = vaddq_u16(cdf_offset, delta);
+ vst1q_u16(cdf + 4, cdf_vec1);
+
+ cdf[13] = count + static_cast<uint16_t>(count < 32);
+}
+
+// See UpdateCdf5 for implementation details.
+void UpdateCdf16(uint16_t* const cdf, const int symbol) {
+ uint16x8_t cdf_vec = vld1q_u16(cdf);
+ const uint16_t count = cdf[16];
+ const int rate = (count >> 4) + 5;
+ const uint16x8_t cdf_max_probability = vdupq_n_u16(kCdfMaxProbability);
+ const uint16x8_t symbol_vec = vdupq_n_u16(symbol);
+ const int16x8_t negative_rate = vdupq_n_s16(-rate);
+
+ uint16x8_t index = vcombine_u16(vcreate_u16(0x0003000200010000),
+ vcreate_u16(0x0007000600050004));
+ uint16x8_t mask = vcgeq_u16(index, symbol_vec);
+ uint16x8_t a = vorrq_u16(mask, cdf_max_probability);
+ int16x8_t diff = vreinterpretq_s16_u16(vsubq_u16(a, cdf_vec));
+ uint16x8_t cdf_offset = vsubq_u16(cdf_vec, mask);
+ uint16x8_t delta = vreinterpretq_u16_s16(vshlq_s16(diff, negative_rate));
+ cdf_vec = vaddq_u16(cdf_offset, delta);
+ vst1q_u16(cdf, cdf_vec);
+
+ cdf_vec = vld1q_u16(cdf + 8);
+ index = vcombine_u16(vcreate_u16(0x000b000a00090008),
+ vcreate_u16(0x000f000e000d000c));
+ mask = vcgeq_u16(index, symbol_vec);
+ a = vorrq_u16(mask, cdf_max_probability);
+ diff = vreinterpretq_s16_u16(vsubq_u16(a, cdf_vec));
+ cdf_offset = vsubq_u16(cdf_vec, mask);
+ delta = vreinterpretq_u16_s16(vshlq_s16(diff, negative_rate));
+ cdf_vec = vaddq_u16(cdf_offset, delta);
+ vst1q_u16(cdf + 8, cdf_vec);
+
+ cdf[16] = count + static_cast<uint16_t>(count < 32);
+}
+
+#else // !LIBGAV1_ENTROPY_DECODER_ENABLE_NEON
+
+#if LIBGAV1_ENTROPY_DECODER_ENABLE_SSE2
+
+inline __m128i LoadLo8(const void* a) {
+ return _mm_loadl_epi64(static_cast<const __m128i*>(a));
+}
+
+inline __m128i LoadUnaligned16(const void* a) {
+ return _mm_loadu_si128(static_cast<const __m128i*>(a));
+}
+
+inline void StoreLo8(void* a, const __m128i v) {
+ _mm_storel_epi64(static_cast<__m128i*>(a), v);
+}
+
+inline void StoreUnaligned16(void* a, const __m128i v) {
+ _mm_storeu_si128(static_cast<__m128i*>(a), v);
+}
+
+void UpdateCdf5(uint16_t* const cdf, const int symbol) {
+ __m128i cdf_vec = LoadLo8(cdf);
+ const uint16_t count = cdf[5];
+ const int rate = (count >> 4) + 5;
+ const __m128i cdf_max_probability =
+ _mm_shufflelo_epi16(_mm_cvtsi32_si128(kCdfMaxProbability), 0);
+ const __m128i index = _mm_set_epi32(0x0, 0x0, 0x00040003, 0x00020001);
+ const __m128i symbol_vec = _mm_shufflelo_epi16(_mm_cvtsi32_si128(symbol), 0);
+ // i >= symbol.
+ const __m128i mask = _mm_cmpgt_epi16(index, symbol_vec);
+ // i < symbol: 32768, i >= symbol: 65535.
+ const __m128i a = _mm_or_si128(mask, cdf_max_probability);
+ // i < symbol: 32768 - cdf, i >= symbol: 65535 - cdf.
+ const __m128i diff = _mm_sub_epi16(a, cdf_vec);
+ // i < symbol: cdf - 0, i >= symbol: cdf - 65535.
+ const __m128i cdf_offset = _mm_sub_epi16(cdf_vec, mask);
+ // i < symbol: (32768 - cdf) >> rate, i >= symbol: (65535 (-1) - cdf) >> rate.
+ const __m128i delta = _mm_sra_epi16(diff, _mm_cvtsi32_si128(rate));
+ // i < symbol: (cdf - 0) + ((32768 - cdf) >> rate).
+ // i >= symbol: (cdf - 65535) + ((65535 - cdf) >> rate).
+ cdf_vec = _mm_add_epi16(cdf_offset, delta);
+ StoreLo8(cdf, cdf_vec);
+ cdf[5] = count + static_cast<uint16_t>(count < 32);
+}
+
+// This version works for |symbol_count| = 7, 8, or 9.
+// See UpdateCdf5 for implementation details.
+template <int symbol_count>
+void UpdateCdf7To9(uint16_t* const cdf, const int symbol) {
+ static_assert(symbol_count >= 7 && symbol_count <= 9, "");
+ __m128i cdf_vec = LoadUnaligned16(cdf);
+ const uint16_t count = cdf[symbol_count];
+ const int rate = (count >> 4) + 5;
+ const __m128i cdf_max_probability =
+ _mm_set1_epi16(static_cast<int16_t>(kCdfMaxProbability));
+ const __m128i index =
+ _mm_set_epi32(0x00080007, 0x00060005, 0x00040003, 0x00020001);
+ const __m128i symbol_vec = _mm_set1_epi16(static_cast<int16_t>(symbol));
+ const __m128i mask = _mm_cmpgt_epi16(index, symbol_vec);
+ const __m128i a = _mm_or_si128(mask, cdf_max_probability);
+ const __m128i diff = _mm_sub_epi16(a, cdf_vec);
+ const __m128i cdf_offset = _mm_sub_epi16(cdf_vec, mask);
+ const __m128i delta = _mm_sra_epi16(diff, _mm_cvtsi32_si128(rate));
+ cdf_vec = _mm_add_epi16(cdf_offset, delta);
+ StoreUnaligned16(cdf, cdf_vec);
+ cdf[symbol_count] = count + static_cast<uint16_t>(count < 32);
+}
+
+void UpdateCdf7(uint16_t* const cdf, const int symbol) {
+ UpdateCdf7To9<7>(cdf, symbol);
+}
+
+void UpdateCdf8(uint16_t* const cdf, const int symbol) {
+ UpdateCdf7To9<8>(cdf, symbol);
+}
+
+void UpdateCdf9(uint16_t* const cdf, const int symbol) {
+ UpdateCdf7To9<9>(cdf, symbol);
+}
+
+// See UpdateCdf5 for implementation details.
+void UpdateCdf11(uint16_t* const cdf, const int symbol) {
+ __m128i cdf_vec = LoadUnaligned16(cdf + 2);
+ const uint16_t count = cdf[11];
+ cdf[11] = count + static_cast<uint16_t>(count < 32);
+ const int rate = (count >> 4) + 5;
+ if (symbol > 1) {
+ cdf[0] += (kCdfMaxProbability - cdf[0]) >> rate;
+ cdf[1] += (kCdfMaxProbability - cdf[1]) >> rate;
+ const __m128i cdf_max_probability =
+ _mm_set1_epi16(static_cast<int16_t>(kCdfMaxProbability));
+ const __m128i index =
+ _mm_set_epi32(0x000a0009, 0x00080007, 0x00060005, 0x00040003);
+ const __m128i symbol_vec = _mm_set1_epi16(static_cast<int16_t>(symbol));
+ const __m128i mask = _mm_cmpgt_epi16(index, symbol_vec);
+ const __m128i a = _mm_or_si128(mask, cdf_max_probability);
+ const __m128i diff = _mm_sub_epi16(a, cdf_vec);
+ const __m128i cdf_offset = _mm_sub_epi16(cdf_vec, mask);
+ const __m128i delta = _mm_sra_epi16(diff, _mm_cvtsi32_si128(rate));
+ cdf_vec = _mm_add_epi16(cdf_offset, delta);
+ StoreUnaligned16(cdf + 2, cdf_vec);
+ } else {
+ if (symbol != 0) {
+ cdf[0] += (kCdfMaxProbability - cdf[0]) >> rate;
+ cdf[1] -= cdf[1] >> rate;
+ } else {
+ cdf[0] -= cdf[0] >> rate;
+ cdf[1] -= cdf[1] >> rate;
+ }
+ const __m128i delta = _mm_sra_epi16(cdf_vec, _mm_cvtsi32_si128(rate));
+ cdf_vec = _mm_sub_epi16(cdf_vec, delta);
+ StoreUnaligned16(cdf + 2, cdf_vec);
+ }
+}
+
+// See UpdateCdf5 for implementation details.
+void UpdateCdf13(uint16_t* const cdf, const int symbol) {
+ __m128i cdf_vec0 = LoadLo8(cdf);
+ __m128i cdf_vec1 = LoadUnaligned16(cdf + 4);
+ const uint16_t count = cdf[13];
+ const int rate = (count >> 4) + 5;
+ const __m128i cdf_max_probability =
+ _mm_set1_epi16(static_cast<int16_t>(kCdfMaxProbability));
+ const __m128i symbol_vec = _mm_set1_epi16(static_cast<int16_t>(symbol));
+
+ const __m128i index = _mm_set_epi32(0x0, 0x0, 0x00040003, 0x00020001);
+ const __m128i mask = _mm_cmpgt_epi16(index, symbol_vec);
+ const __m128i a = _mm_or_si128(mask, cdf_max_probability);
+ const __m128i diff = _mm_sub_epi16(a, cdf_vec0);
+ const __m128i cdf_offset = _mm_sub_epi16(cdf_vec0, mask);
+ const __m128i delta = _mm_sra_epi16(diff, _mm_cvtsi32_si128(rate));
+ cdf_vec0 = _mm_add_epi16(cdf_offset, delta);
+ StoreLo8(cdf, cdf_vec0);
+
+ const __m128i index1 =
+ _mm_set_epi32(0x000c000b, 0x000a0009, 0x00080007, 0x00060005);
+ const __m128i mask1 = _mm_cmpgt_epi16(index1, symbol_vec);
+ const __m128i a1 = _mm_or_si128(mask1, cdf_max_probability);
+ const __m128i diff1 = _mm_sub_epi16(a1, cdf_vec1);
+ const __m128i cdf_offset1 = _mm_sub_epi16(cdf_vec1, mask1);
+ const __m128i delta1 = _mm_sra_epi16(diff1, _mm_cvtsi32_si128(rate));
+ cdf_vec1 = _mm_add_epi16(cdf_offset1, delta1);
+ StoreUnaligned16(cdf + 4, cdf_vec1);
+
+ cdf[13] = count + static_cast<uint16_t>(count < 32);
+}
+
+void UpdateCdf16(uint16_t* const cdf, const int symbol) {
+ __m128i cdf_vec0 = LoadUnaligned16(cdf);
+ const uint16_t count = cdf[16];
+ const int rate = (count >> 4) + 5;
+ const __m128i cdf_max_probability =
+ _mm_set1_epi16(static_cast<int16_t>(kCdfMaxProbability));
+ const __m128i symbol_vec = _mm_set1_epi16(static_cast<int16_t>(symbol));
+
+ const __m128i index =
+ _mm_set_epi32(0x00080007, 0x00060005, 0x00040003, 0x00020001);
+ const __m128i mask = _mm_cmpgt_epi16(index, symbol_vec);
+ const __m128i a = _mm_or_si128(mask, cdf_max_probability);
+ const __m128i diff = _mm_sub_epi16(a, cdf_vec0);
+ const __m128i cdf_offset = _mm_sub_epi16(cdf_vec0, mask);
+ const __m128i delta = _mm_sra_epi16(diff, _mm_cvtsi32_si128(rate));
+ cdf_vec0 = _mm_add_epi16(cdf_offset, delta);
+ StoreUnaligned16(cdf, cdf_vec0);
+
+ __m128i cdf_vec1 = LoadUnaligned16(cdf + 8);
+ const __m128i index1 =
+ _mm_set_epi32(0x0010000f, 0x000e000d, 0x000c000b, 0x000a0009);
+ const __m128i mask1 = _mm_cmpgt_epi16(index1, symbol_vec);
+ const __m128i a1 = _mm_or_si128(mask1, cdf_max_probability);
+ const __m128i diff1 = _mm_sub_epi16(a1, cdf_vec1);
+ const __m128i cdf_offset1 = _mm_sub_epi16(cdf_vec1, mask1);
+ const __m128i delta1 = _mm_sra_epi16(diff1, _mm_cvtsi32_si128(rate));
+ cdf_vec1 = _mm_add_epi16(cdf_offset1, delta1);
+ StoreUnaligned16(cdf + 8, cdf_vec1);
+
+ cdf[16] = count + static_cast<uint16_t>(count < 32);
+}
+
+#else // !LIBGAV1_ENTROPY_DECODER_ENABLE_SSE2
+
+void UpdateCdf5(uint16_t* const cdf, const int symbol) {
+ UpdateCdf(cdf, 5, symbol);
+}
+
+void UpdateCdf7(uint16_t* const cdf, const int symbol) {
+ UpdateCdf(cdf, 7, symbol);
+}
+
+void UpdateCdf8(uint16_t* const cdf, const int symbol) {
+ UpdateCdf(cdf, 8, symbol);
+}
+
+void UpdateCdf9(uint16_t* const cdf, const int symbol) {
+ UpdateCdf(cdf, 9, symbol);
+}
+
+void UpdateCdf11(uint16_t* const cdf, const int symbol) {
+ UpdateCdf(cdf, 11, symbol);
+}
+
+void UpdateCdf13(uint16_t* const cdf, const int symbol) {
+ UpdateCdf(cdf, 13, symbol);
+}
+
+void UpdateCdf16(uint16_t* const cdf, const int symbol) {
+ UpdateCdf(cdf, 16, symbol);
+}
+
+#endif // LIBGAV1_ENTROPY_DECODER_ENABLE_SSE2
+#endif // LIBGAV1_ENTROPY_DECODER_ENABLE_NEON
+
+inline DaalaBitReader::WindowSize HostToBigEndian(
+ const DaalaBitReader::WindowSize x) {
+ static_assert(sizeof(x) == 4 || sizeof(x) == 8, "");
+#if defined(__GNUC__)
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+ return (sizeof(x) == 8) ? __builtin_bswap64(x) : __builtin_bswap32(x);
+#else
+ return x;
+#endif
+#elif defined(_WIN32)
+ // Note Windows targets are assumed to be little endian.
+ return static_cast<DaalaBitReader::WindowSize>(
+ (sizeof(x) == 8) ? _byteswap_uint64(static_cast<unsigned __int64>(x))
+ : _byteswap_ulong(static_cast<unsigned long>(x)));
+#else
+#error Unknown compiler!
+#endif // defined(__GNUC__)
+}
+
+} // namespace
+
+#if !LIBGAV1_CXX17
+constexpr int DaalaBitReader::kWindowSize; // static.
+#endif
+
+DaalaBitReader::DaalaBitReader(const uint8_t* data, size_t size,
+ bool allow_update_cdf)
+ : data_(data),
+ data_end_(data + size),
+ data_memcpy_end_((size >= sizeof(WindowSize))
+ ? data + size - sizeof(WindowSize) + 1
+ : data),
+ allow_update_cdf_(allow_update_cdf),
+ values_in_range_(kCdfMaxProbability) {
+ if (data_ < data_memcpy_end_) {
+ // This is a simplified version of PopulateBits() which loads 8 extra bits
+ // and skips the unnecessary shifts of value and window_diff_.
+ WindowSize value;
+ memcpy(&value, data_, sizeof(value));
+ data_ += sizeof(value);
+ window_diff_ = HostToBigEndian(value) ^ -1;
+ // Note the initial value of bits_ is larger than kMaxCachedBits as it's
+ // used to restore the most significant 0 bit that would be present after
+ // PopulateBits() when we extract the first symbol value.
+ // As shown in Section 8.2.2 Initialization process for symbol decoder,
+ // which uses a fixed offset to read the symbol values, the most
+ // significant bit is always 0:
+ // The variable numBits is set equal to Min( sz * 8, 15).
+ // The variable buf is read using the f(numBits) parsing process.
+ // The variable paddedBuf is set equal to ( buf << (15 - numBits) ).
+ // The variable SymbolValue is set to ((1 << 15) - 1) ^ paddedBuf.
+ bits_ = kWindowSize - 15;
+ return;
+ }
+ window_diff_ = 0;
+ bits_ = -15;
+ PopulateBits();
+}
+
+// This is similar to the ReadSymbol() implementation but it is optimized based
+// on the following facts:
+// * The probability is fixed at half. So some multiplications can be replaced
+// with bit operations.
+// * Symbol count is fixed at 2.
+int DaalaBitReader::ReadBit() {
+ const uint32_t curr =
+ ((values_in_range_ & kReadBitMask) >> 1) + kMinimumProbabilityPerSymbol;
+ const auto symbol_value = static_cast<uint16_t>(window_diff_ >> bits_);
+ int bit = 1;
+ if (symbol_value >= curr) {
+ values_in_range_ -= curr;
+ window_diff_ -= static_cast<WindowSize>(curr) << bits_;
+ bit = 0;
+ } else {
+ values_in_range_ = curr;
+ }
+ NormalizeRange();
+ return bit;
+}
+
+int64_t DaalaBitReader::ReadLiteral(int num_bits) {
+ assert(num_bits <= 32);
+ assert(num_bits > 0);
+ uint32_t literal = 0;
+ int bit = num_bits - 1;
+ do {
+ // ARM can combine a shift operation with a constant number of bits with
+ // some other operations, such as the OR operation.
+ // Here is an ARM disassembly example:
+ // orr w1, w0, w1, lsl #1
+ // which left shifts register w1 by 1 bit and OR the shift result with
+ // register w0.
+ // The next 2 lines are equivalent to:
+ // literal |= static_cast<uint32_t>(ReadBit()) << bit;
+ literal <<= 1;
+ literal |= static_cast<uint32_t>(ReadBit());
+ } while (--bit >= 0);
+ return literal;
+}
+
+int DaalaBitReader::ReadSymbol(uint16_t* const cdf, int symbol_count) {
+ const int symbol = ReadSymbolImpl(cdf, symbol_count);
+ if (allow_update_cdf_) {
+ UpdateCdf(cdf, symbol_count, symbol);
+ }
+ return symbol;
+}
+
+bool DaalaBitReader::ReadSymbol(uint16_t* cdf) {
+ assert(cdf[1] == 0);
+ const bool symbol = ReadSymbolImpl(cdf[0]) != 0;
+ if (allow_update_cdf_) {
+ const uint16_t count = cdf[2];
+ // rate is computed in the spec as:
+ // 3 + ( cdf[N] > 15 ) + ( cdf[N] > 31 ) + Min(FloorLog2(N), 2)
+ // In this case N is 2 and cdf[N] is |count|. So the equation becomes:
+ // 4 + (count > 15) + (count > 31)
+ // Note that the largest value for count is 32 (it is not incremented beyond
+ // 32). So using that information:
+ // count >> 4 is 0 for count from 0 to 15.
+ // count >> 4 is 1 for count from 16 to 31.
+ // count >> 4 is 2 for count == 32.
+ // Now, the equation becomes:
+ // 4 + (count >> 4).
+ // Since (count >> 4) can only be 0 or 1 or 2, the addition can be replaced
+ // with bitwise or. So the final equation is:
+ // 4 | (count >> 4).
+ const int rate = 4 | (count >> 4);
+ if (symbol) {
+ cdf[0] += (kCdfMaxProbability - cdf[0]) >> rate;
+ } else {
+ cdf[0] -= cdf[0] >> rate;
+ }
+ cdf[2] += static_cast<uint16_t>(count < 32);
+ }
+ return symbol;
+}
+
+bool DaalaBitReader::ReadSymbolWithoutCdfUpdate(uint16_t cdf) {
+ return ReadSymbolImpl(cdf) != 0;
+}
+
+template <int symbol_count>
+int DaalaBitReader::ReadSymbol(uint16_t* const cdf) {
+ static_assert(symbol_count >= 3 && symbol_count <= 16, "");
+ if (symbol_count == 3 || symbol_count == 4) {
+ return ReadSymbol3Or4(cdf, symbol_count);
+ }
+ int symbol;
+ if (symbol_count == 8) {
+ symbol = ReadSymbolImpl8(cdf);
+ } else if (symbol_count <= 13) {
+ symbol = ReadSymbolImpl(cdf, symbol_count);
+ } else {
+ symbol = ReadSymbolImplBinarySearch(cdf, symbol_count);
+ }
+ if (allow_update_cdf_) {
+ if (symbol_count == 5) {
+ UpdateCdf5(cdf, symbol);
+ } else if (symbol_count == 7) {
+ UpdateCdf7(cdf, symbol);
+ } else if (symbol_count == 8) {
+ UpdateCdf8(cdf, symbol);
+ } else if (symbol_count == 9) {
+ UpdateCdf9(cdf, symbol);
+ } else if (symbol_count == 11) {
+ UpdateCdf11(cdf, symbol);
+ } else if (symbol_count == 13) {
+ UpdateCdf13(cdf, symbol);
+ } else if (symbol_count == 16) {
+ UpdateCdf16(cdf, symbol);
+ } else {
+ UpdateCdf(cdf, symbol_count, symbol);
+ }
+ }
+ return symbol;
+}
+
+int DaalaBitReader::ReadSymbolImpl(const uint16_t* const cdf,
+ int symbol_count) {
+ assert(cdf[symbol_count - 1] == 0);
+ --symbol_count;
+ uint32_t curr = values_in_range_;
+ int symbol = -1;
+ uint32_t prev;
+ const auto symbol_value = static_cast<uint16_t>(window_diff_ >> bits_);
+ uint32_t delta = kMinimumProbabilityPerSymbol * symbol_count;
+ // Search through the |cdf| array to determine where the scaled cdf value and
+ // |symbol_value| cross over.
+ do {
+ prev = curr;
+ curr = (((values_in_range_ >> 8) * (cdf[++symbol] >> kCdfPrecision)) >> 1) +
+ delta;
+ delta -= kMinimumProbabilityPerSymbol;
+ } while (symbol_value < curr);
+ values_in_range_ = prev - curr;
+ window_diff_ -= static_cast<WindowSize>(curr) << bits_;
+ NormalizeRange();
+ return symbol;
+}
+
+int DaalaBitReader::ReadSymbolImplBinarySearch(const uint16_t* const cdf,
+ int symbol_count) {
+ assert(cdf[symbol_count - 1] == 0);
+ assert(symbol_count > 1 && symbol_count <= 16);
+ --symbol_count;
+ const auto symbol_value = static_cast<uint16_t>(window_diff_ >> bits_);
+ // Search through the |cdf| array to determine where the scaled cdf value and
+ // |symbol_value| cross over. Since the CDFs are sorted, we can use binary
+ // search to do this. Let |symbol| be the index of the first |cdf| array
+ // entry whose scaled cdf value is less than or equal to |symbol_value|. The
+ // binary search maintains the invariant:
+ // low <= symbol <= high + 1
+ // and terminates when low == high + 1.
+ int low = 0;
+ int high = symbol_count - 1;
+ // The binary search maintains the invariants that |prev| is the scaled cdf
+ // value for low - 1 and |curr| is the scaled cdf value for high + 1. (By
+ // convention, the scaled cdf value for -1 is values_in_range_.) When the
+ // binary search terminates, |prev| is the scaled cdf value for symbol - 1
+ // and |curr| is the scaled cdf value for |symbol|.
+ uint32_t prev = values_in_range_;
+ uint32_t curr = 0;
+ const uint32_t values_in_range_shifted = values_in_range_ >> 8;
+ do {
+ const int mid = DivideBy2(low + high);
+ const uint32_t scaled_cdf =
+ ScaleCdf(values_in_range_shifted, cdf, mid, symbol_count);
+ if (symbol_value < scaled_cdf) {
+ low = mid + 1;
+ prev = scaled_cdf;
+ } else {
+ high = mid - 1;
+ curr = scaled_cdf;
+ }
+ } while (low <= high);
+ assert(low == high + 1);
+ // At this point, |low| is the symbol that has been decoded.
+ values_in_range_ = prev - curr;
+ window_diff_ -= static_cast<WindowSize>(curr) << bits_;
+ NormalizeRange();
+ return low;
+}
+
+int DaalaBitReader::ReadSymbolImpl(uint16_t cdf) {
+ const auto symbol_value = static_cast<uint16_t>(window_diff_ >> bits_);
+ const uint32_t curr =
+ (((values_in_range_ >> 8) * (cdf >> kCdfPrecision)) >> 1) +
+ kMinimumProbabilityPerSymbol;
+ const int symbol = static_cast<int>(symbol_value < curr);
+ if (symbol == 1) {
+ values_in_range_ = curr;
+ } else {
+ values_in_range_ -= curr;
+ window_diff_ -= static_cast<WindowSize>(curr) << bits_;
+ }
+ NormalizeRange();
+ return symbol;
+}
+
+// Equivalent to ReadSymbol(cdf, [3,4]), with the ReadSymbolImpl and UpdateCdf
+// calls inlined.
+int DaalaBitReader::ReadSymbol3Or4(uint16_t* const cdf,
+ const int symbol_count) {
+ assert(cdf[symbol_count - 1] == 0);
+ uint32_t curr = values_in_range_;
+ uint32_t prev;
+ const auto symbol_value = static_cast<uint16_t>(window_diff_ >> bits_);
+ uint32_t delta = kMinimumProbabilityPerSymbol * (symbol_count - 1);
+ const uint32_t values_in_range_shifted = values_in_range_ >> 8;
+
+ // Search through the |cdf| array to determine where the scaled cdf value and
+ // |symbol_value| cross over. If allow_update_cdf_ is true, update the |cdf|
+ // array.
+ //
+ // The original code is:
+ //
+ // int symbol = -1;
+ // do {
+ // prev = curr;
+ // curr =
+ // ((values_in_range_shifted * (cdf[++symbol] >> kCdfPrecision)) >> 1)
+ // + delta;
+ // delta -= kMinimumProbabilityPerSymbol;
+ // } while (symbol_value < curr);
+ // if (allow_update_cdf_) {
+ // UpdateCdf(cdf, [3,4], symbol);
+ // }
+ //
+ // The do-while loop is unrolled with three or four iterations, and the
+ // UpdateCdf call is inlined and merged into the iterations.
+ int symbol = 0;
+ // Iteration 0.
+ prev = curr;
+ curr =
+ ((values_in_range_shifted * (cdf[symbol] >> kCdfPrecision)) >> 1) + delta;
+ if (symbol_value >= curr) {
+ // symbol == 0.
+ if (allow_update_cdf_) {
+ // Inlined version of UpdateCdf(cdf, [3,4], /*symbol=*/0).
+ const uint16_t count = cdf[symbol_count];
+ cdf[symbol_count] += static_cast<uint16_t>(count < 32);
+ const int rate = (count >> 4) + 4 + static_cast<int>(symbol_count == 4);
+ if (symbol_count == 4) {
+#if LIBGAV1_ENTROPY_DECODER_ENABLE_NEON
+ // 1. On Motorola Moto G5 Plus (running 32-bit Android 8.1.0), the ARM
+ // NEON code is slower. Consider using the C version if __arm__ is
+ // defined.
+ // 2. The ARM NEON code (compiled for arm64) is slightly slower on
+ // Samsung Galaxy S8+ (SM-G955FD).
+ uint16x4_t cdf_vec = vld1_u16(cdf);
+ const int16x4_t negative_rate = vdup_n_s16(-rate);
+ const uint16x4_t delta = vshl_u16(cdf_vec, negative_rate);
+ cdf_vec = vsub_u16(cdf_vec, delta);
+ vst1_u16(cdf, cdf_vec);
+#elif LIBGAV1_ENTROPY_DECODER_ENABLE_SSE2
+ __m128i cdf_vec = LoadLo8(cdf);
+ const __m128i delta = _mm_sra_epi16(cdf_vec, _mm_cvtsi32_si128(rate));
+ cdf_vec = _mm_sub_epi16(cdf_vec, delta);
+ StoreLo8(cdf, cdf_vec);
+#else // !LIBGAV1_ENTROPY_DECODER_ENABLE_SSE2
+ cdf[0] -= cdf[0] >> rate;
+ cdf[1] -= cdf[1] >> rate;
+ cdf[2] -= cdf[2] >> rate;
+#endif
+ } else { // symbol_count == 3.
+ cdf[0] -= cdf[0] >> rate;
+ cdf[1] -= cdf[1] >> rate;
+ }
+ }
+ goto found;
+ }
+ ++symbol;
+ delta -= kMinimumProbabilityPerSymbol;
+ // Iteration 1.
+ prev = curr;
+ curr =
+ ((values_in_range_shifted * (cdf[symbol] >> kCdfPrecision)) >> 1) + delta;
+ if (symbol_value >= curr) {
+ // symbol == 1.
+ if (allow_update_cdf_) {
+ // Inlined version of UpdateCdf(cdf, [3,4], /*symbol=*/1).
+ const uint16_t count = cdf[symbol_count];
+ cdf[symbol_count] += static_cast<uint16_t>(count < 32);
+ const int rate = (count >> 4) + 4 + static_cast<int>(symbol_count == 4);
+ cdf[0] += (kCdfMaxProbability - cdf[0]) >> rate;
+ cdf[1] -= cdf[1] >> rate;
+ if (symbol_count == 4) cdf[2] -= cdf[2] >> rate;
+ }
+ goto found;
+ }
+ ++symbol;
+ if (symbol_count == 4) {
+ delta -= kMinimumProbabilityPerSymbol;
+ // Iteration 2.
+ prev = curr;
+ curr = ((values_in_range_shifted * (cdf[symbol] >> kCdfPrecision)) >> 1) +
+ delta;
+ if (symbol_value >= curr) {
+ // symbol == 2.
+ if (allow_update_cdf_) {
+ // Inlined version of UpdateCdf(cdf, 4, /*symbol=*/2).
+ const uint16_t count = cdf[4];
+ cdf[4] += static_cast<uint16_t>(count < 32);
+ const int rate = (count >> 4) + 5;
+ cdf[0] += (kCdfMaxProbability - cdf[0]) >> rate;
+ cdf[1] += (kCdfMaxProbability - cdf[1]) >> rate;
+ cdf[2] -= cdf[2] >> rate;
+ }
+ goto found;
+ }
+ ++symbol;
+ }
+ // |delta| is 0 for the last iteration.
+ // Iteration 2 (symbol_count == 3) or 3 (symbol_count == 4).
+ prev = curr;
+ // Since cdf[symbol_count - 1] is 0 and |delta| is 0, |curr| is also 0.
+ curr = 0;
+ // symbol == [2,3].
+ if (allow_update_cdf_) {
+ // Inlined version of UpdateCdf(cdf, [3,4], /*symbol=*/[2,3]).
+ const uint16_t count = cdf[symbol_count];
+ cdf[symbol_count] += static_cast<uint16_t>(count < 32);
+ const int rate = (4 | (count >> 4)) + static_cast<int>(symbol_count == 4);
+ if (symbol_count == 4) {
+#if LIBGAV1_ENTROPY_DECODER_ENABLE_NEON
+ // On Motorola Moto G5 Plus (running 32-bit Android 8.1.0), the ARM NEON
+ // code is a tiny bit slower. Consider using the C version if __arm__ is
+ // defined.
+ uint16x4_t cdf_vec = vld1_u16(cdf);
+ const uint16x4_t cdf_max_probability = vdup_n_u16(kCdfMaxProbability);
+ const int16x4_t diff =
+ vreinterpret_s16_u16(vsub_u16(cdf_max_probability, cdf_vec));
+ const int16x4_t negative_rate = vdup_n_s16(-rate);
+ const uint16x4_t delta =
+ vreinterpret_u16_s16(vshl_s16(diff, negative_rate));
+ cdf_vec = vadd_u16(cdf_vec, delta);
+ vst1_u16(cdf, cdf_vec);
+ cdf[3] = 0;
+#elif LIBGAV1_ENTROPY_DECODER_ENABLE_SSE2
+ __m128i cdf_vec = LoadLo8(cdf);
+ const __m128i cdf_max_probability =
+ _mm_shufflelo_epi16(_mm_cvtsi32_si128(kCdfMaxProbability), 0);
+ const __m128i diff = _mm_sub_epi16(cdf_max_probability, cdf_vec);
+ const __m128i delta = _mm_sra_epi16(diff, _mm_cvtsi32_si128(rate));
+ cdf_vec = _mm_add_epi16(cdf_vec, delta);
+ StoreLo8(cdf, cdf_vec);
+ cdf[3] = 0;
+#else // !LIBGAV1_ENTROPY_DECODER_ENABLE_SSE2
+ cdf[0] += (kCdfMaxProbability - cdf[0]) >> rate;
+ cdf[1] += (kCdfMaxProbability - cdf[1]) >> rate;
+ cdf[2] += (kCdfMaxProbability - cdf[2]) >> rate;
+#endif
+ } else { // symbol_count == 3.
+ cdf[0] += (kCdfMaxProbability - cdf[0]) >> rate;
+ cdf[1] += (kCdfMaxProbability - cdf[1]) >> rate;
+ }
+ }
+found:
+ // End of unrolled do-while loop.
+
+ values_in_range_ = prev - curr;
+ window_diff_ -= static_cast<WindowSize>(curr) << bits_;
+ NormalizeRange();
+ return symbol;
+}
+
+int DaalaBitReader::ReadSymbolImpl8(const uint16_t* const cdf) {
+ assert(cdf[7] == 0);
+ uint32_t curr = values_in_range_;
+ uint32_t prev;
+ const auto symbol_value = static_cast<uint16_t>(window_diff_ >> bits_);
+ uint32_t delta = kMinimumProbabilityPerSymbol * 7;
+ // Search through the |cdf| array to determine where the scaled cdf value and
+ // |symbol_value| cross over.
+ //
+ // The original code is:
+ //
+ // int symbol = -1;
+ // do {
+ // prev = curr;
+ // curr =
+ // (((values_in_range_ >> 8) * (cdf[++symbol] >> kCdfPrecision)) >> 1)
+ // + delta;
+ // delta -= kMinimumProbabilityPerSymbol;
+ // } while (symbol_value < curr);
+ //
+ // The do-while loop is unrolled with eight iterations.
+ int symbol = 0;
+
+#define READ_SYMBOL_ITERATION \
+ prev = curr; \
+ curr = (((values_in_range_ >> 8) * (cdf[symbol] >> kCdfPrecision)) >> 1) + \
+ delta; \
+ if (symbol_value >= curr) goto found; \
+ ++symbol; \
+ delta -= kMinimumProbabilityPerSymbol
+
+ READ_SYMBOL_ITERATION; // Iteration 0.
+ READ_SYMBOL_ITERATION; // Iteration 1.
+ READ_SYMBOL_ITERATION; // Iteration 2.
+ READ_SYMBOL_ITERATION; // Iteration 3.
+ READ_SYMBOL_ITERATION; // Iteration 4.
+ READ_SYMBOL_ITERATION; // Iteration 5.
+
+ // The last two iterations can be simplified, so they don't use the
+ // READ_SYMBOL_ITERATION macro.
+#undef READ_SYMBOL_ITERATION
+
+ // Iteration 6.
+ prev = curr;
+ curr =
+ (((values_in_range_ >> 8) * (cdf[symbol] >> kCdfPrecision)) >> 1) + delta;
+ if (symbol_value >= curr) goto found; // symbol == 6.
+ ++symbol;
+ // |delta| is 0 for the last iteration.
+ // Iteration 7.
+ prev = curr;
+ // Since cdf[7] is 0 and |delta| is 0, |curr| is also 0.
+ curr = 0;
+ // symbol == 7.
+found:
+ // End of unrolled do-while loop.
+
+ values_in_range_ = prev - curr;
+ window_diff_ -= static_cast<WindowSize>(curr) << bits_;
+ NormalizeRange();
+ return symbol;
+}
+
+void DaalaBitReader::PopulateBits() {
+ constexpr int kMaxCachedBits = kWindowSize - 16;
+#if defined(__aarch64__)
+ // Fast path: read eight bytes and add the first six bytes to window_diff_.
+ // This fast path makes the following assumptions.
+ // 1. We assume that unaligned load of uint64_t is fast.
+ // 2. When there are enough bytes in data_, the for loop below reads 6 or 7
+ // bytes depending on the value of bits_. This fast path always reads 6
+ // bytes, which results in more calls to PopulateBits(). We assume that
+ // making more calls to a faster PopulateBits() is overall a win.
+ // NOTE: Although this fast path could also be used on x86_64, it hurts
+ // performance (measured on Lenovo ThinkStation P920 running Linux). (The
+ // reason is still unknown.) Therefore this fast path is only used on arm64.
+ static_assert(kWindowSize == 64, "");
+ if (data_ < data_memcpy_end_) {
+ uint64_t value;
+ // arm64 supports unaligned loads, so this memcpy call is compiled to a
+ // single ldr instruction.
+ memcpy(&value, data_, sizeof(value));
+ data_ += kMaxCachedBits >> 3;
+ value = HostToBigEndian(value) ^ -1;
+ value >>= kWindowSize - kMaxCachedBits;
+ window_diff_ = value | (window_diff_ << kMaxCachedBits);
+ bits_ += kMaxCachedBits;
+ return;
+ }
+#endif
+
+ const uint8_t* data = data_;
+ int bits = bits_;
+ WindowSize window_diff = window_diff_;
+
+ int count = kWindowSize - 9 - (bits + 15);
+ // The fast path above, if compiled, would cause clang 8.0.7 to vectorize
+ // this loop. Since -15 <= bits_ <= -1, this loop has at most 6 or 7
+ // iterations when WindowSize is 64 bits. So it is not profitable to
+ // vectorize this loop. Note that clang 8.0.7 does not vectorize this loop if
+ // the fast path above is not compiled.
+
+#ifdef __clang__
+#pragma clang loop vectorize(disable) interleave(disable)
+#endif
+ for (; count >= 0 && data < data_end_; count -= 8) {
+ const uint8_t value = *data++ ^ -1;
+ window_diff = static_cast<WindowSize>(value) | (window_diff << 8);
+ bits += 8;
+ }
+ assert(bits <= kMaxCachedBits);
+ if (data == data_end_) {
+ // Shift in some 1s. This is equivalent to providing fake 0 data bits.
+ window_diff = ((window_diff + 1) << (kMaxCachedBits - bits)) - 1;
+ bits = kMaxCachedBits;
+ }
+
+ data_ = data;
+ bits_ = bits;
+ window_diff_ = window_diff;
+}
+
+void DaalaBitReader::NormalizeRange() {
+ const int bits_used = 15 ^ FloorLog2(values_in_range_);
+ bits_ -= bits_used;
+ values_in_range_ <<= bits_used;
+ if (bits_ < 0) PopulateBits();
+}
+
+// Explicit instantiations.
+template int DaalaBitReader::ReadSymbol<3>(uint16_t* cdf);
+template int DaalaBitReader::ReadSymbol<4>(uint16_t* cdf);
+template int DaalaBitReader::ReadSymbol<5>(uint16_t* cdf);
+template int DaalaBitReader::ReadSymbol<6>(uint16_t* cdf);
+template int DaalaBitReader::ReadSymbol<7>(uint16_t* cdf);
+template int DaalaBitReader::ReadSymbol<8>(uint16_t* cdf);
+template int DaalaBitReader::ReadSymbol<9>(uint16_t* cdf);
+template int DaalaBitReader::ReadSymbol<10>(uint16_t* cdf);
+template int DaalaBitReader::ReadSymbol<11>(uint16_t* cdf);
+template int DaalaBitReader::ReadSymbol<12>(uint16_t* cdf);
+template int DaalaBitReader::ReadSymbol<13>(uint16_t* cdf);
+template int DaalaBitReader::ReadSymbol<14>(uint16_t* cdf);
+template int DaalaBitReader::ReadSymbol<16>(uint16_t* cdf);
+
+} // namespace libgav1
diff --git a/src/utils/entropy_decoder.h b/src/utils/entropy_decoder.h
new file mode 100644
index 0000000..c066b98
--- /dev/null
+++ b/src/utils/entropy_decoder.h
@@ -0,0 +1,123 @@
+/*
+ * Copyright 2019 The libgav1 Authors
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LIBGAV1_SRC_UTILS_ENTROPY_DECODER_H_
+#define LIBGAV1_SRC_UTILS_ENTROPY_DECODER_H_
+
+#include <cstddef>
+#include <cstdint>
+
+#include "src/utils/bit_reader.h"
+#include "src/utils/compiler_attributes.h"
+
+namespace libgav1 {
+
+class DaalaBitReader : public BitReader {
+ public:
+ // WindowSize must be an unsigned integer type with at least 32 bits. Use the
+ // largest type with fast arithmetic. size_t should meet these requirements.
+ using WindowSize = size_t;
+
+ DaalaBitReader(const uint8_t* data, size_t size, bool allow_update_cdf);
+ ~DaalaBitReader() override = default;
+
+ // Move only.
+ DaalaBitReader(DaalaBitReader&& rhs) noexcept;
+ DaalaBitReader& operator=(DaalaBitReader&& rhs) noexcept;
+
+ int ReadBit() final;
+ int64_t ReadLiteral(int num_bits) override;
+ // ReadSymbol() calls for which the |symbol_count| is only known at runtime
+ // will use this variant.
+ int ReadSymbol(uint16_t* cdf, int symbol_count);
+ // ReadSymbol() calls for which the |symbol_count| is equal to 2 (boolean
+ // symbols) will use this variant.
+ bool ReadSymbol(uint16_t* cdf);
+ bool ReadSymbolWithoutCdfUpdate(uint16_t cdf);
+ // Use either linear search or binary search for decoding the symbol depending
+ // on |symbol_count|. ReadSymbol calls for which the |symbol_count| is known
+ // at compile time will use this variant.
+ template <int symbol_count>
+ int ReadSymbol(uint16_t* cdf);
+
+ private:
+ static constexpr int kWindowSize = static_cast<int>(sizeof(WindowSize)) * 8;
+ static_assert(kWindowSize >= 32, "");
+
+ // Reads a symbol using the |cdf| table which contains the probabilities of
+ // each symbol. On a high level, this function does the following:
+ // 1) Scale the |cdf| values.
+ // 2) Find the index in the |cdf| array where the scaled CDF value crosses
+ // the modified |window_diff_| threshold.
+ // 3) That index is the symbol that has been decoded.
+ // 4) Update |window_diff_| and |values_in_range_| based on the symbol that
+ // has been decoded.
+ inline int ReadSymbolImpl(const uint16_t* cdf, int symbol_count);
+ // Similar to ReadSymbolImpl but it uses binary search to perform step 2 in
+ // the comment above. As of now, this function is called when |symbol_count|
+ // is greater than or equal to 14.
+ inline int ReadSymbolImplBinarySearch(const uint16_t* cdf, int symbol_count);
+ // Specialized implementation of ReadSymbolImpl based on the fact that
+ // symbol_count == 2.
+ inline int ReadSymbolImpl(uint16_t cdf);
+ // ReadSymbolN is a specialization of ReadSymbol for symbol_count == N.
+ LIBGAV1_ALWAYS_INLINE int ReadSymbol3Or4(uint16_t* cdf, int symbol_count);
+ // ReadSymbolImplN is a specialization of ReadSymbolImpl for
+ // symbol_count == N.
+ LIBGAV1_ALWAYS_INLINE int ReadSymbolImpl8(const uint16_t* cdf);
+ inline void PopulateBits();
+ // Normalizes the range so that 32768 <= |values_in_range_| < 65536. Also
+ // calls PopulateBits() if necessary.
+ inline void NormalizeRange();
+
+ const uint8_t* data_;
+ const uint8_t* const data_end_;
+ // If |data_| < |data_memcpy_end_|, then we can read sizeof(WindowSize) bytes
+ // from |data_|. Note with sizeof(WindowSize) == 4 this is only used in the
+ // constructor, not PopulateBits().
+ const uint8_t* const data_memcpy_end_;
+ const bool allow_update_cdf_;
+ // Number of cached bits of data in the current value.
+ int bits_;
+ // Number of values in the current range. Declared as uint32_t for better
+ // performance but only the lower 16 bits are used.
+ uint32_t values_in_range_;
+ // The difference between the high end of the current range and the coded
+ // value minus 1. The 16 bits above |bits_| of this variable are used to
+ // decode the next symbol. It is filled in whenever |bits_| is less than 0.
+ // Note this implementation differs from the spec as it trades the need to
+ // shift in 1s in NormalizeRange() with an extra shift in PopulateBits(),
+ // which occurs less frequently.
+ WindowSize window_diff_;
+};
+
+extern template int DaalaBitReader::ReadSymbol<3>(uint16_t* cdf);
+extern template int DaalaBitReader::ReadSymbol<4>(uint16_t* cdf);
+extern template int DaalaBitReader::ReadSymbol<5>(uint16_t* cdf);
+extern template int DaalaBitReader::ReadSymbol<6>(uint16_t* cdf);
+extern template int DaalaBitReader::ReadSymbol<7>(uint16_t* cdf);
+extern template int DaalaBitReader::ReadSymbol<8>(uint16_t* cdf);
+extern template int DaalaBitReader::ReadSymbol<9>(uint16_t* cdf);
+extern template int DaalaBitReader::ReadSymbol<10>(uint16_t* cdf);
+extern template int DaalaBitReader::ReadSymbol<11>(uint16_t* cdf);
+extern template int DaalaBitReader::ReadSymbol<12>(uint16_t* cdf);
+extern template int DaalaBitReader::ReadSymbol<13>(uint16_t* cdf);
+extern template int DaalaBitReader::ReadSymbol<14>(uint16_t* cdf);
+extern template int DaalaBitReader::ReadSymbol<16>(uint16_t* cdf);
+
+} // namespace libgav1
+
+#endif // LIBGAV1_SRC_UTILS_ENTROPY_DECODER_H_
diff --git a/src/utils/executor.cc b/src/utils/executor.cc
new file mode 100644
index 0000000..6934057
--- /dev/null
+++ b/src/utils/executor.cc
@@ -0,0 +1,21 @@
+// Copyright 2019 The libgav1 Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "src/utils/executor.h"
+
+namespace libgav1 {
+
+Executor::~Executor() = default;
+
+} // namespace libgav1
diff --git a/src/utils/executor.h b/src/utils/executor.h
new file mode 100644
index 0000000..21abdf8
--- /dev/null
+++ b/src/utils/executor.h
@@ -0,0 +1,36 @@
+/*
+ * Copyright 2019 The libgav1 Authors
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LIBGAV1_SRC_UTILS_EXECUTOR_H_
+#define LIBGAV1_SRC_UTILS_EXECUTOR_H_
+
+#include <functional>
+
+namespace libgav1 {
+
+class Executor {
+ public:
+ virtual ~Executor();
+
+ // Schedules the specified "callback" for execution in this executor.
+ // Depending on the subclass implementation, this may block in some
+ // situations.
+ virtual void Schedule(std::function<void()> callback) = 0;
+};
+
+} // namespace libgav1
+
+#endif // LIBGAV1_SRC_UTILS_EXECUTOR_H_
diff --git a/src/utils/libgav1_utils.cmake b/src/utils/libgav1_utils.cmake
new file mode 100644
index 0000000..8b6ec4b
--- /dev/null
+++ b/src/utils/libgav1_utils.cmake
@@ -0,0 +1,72 @@
+# Copyright 2019 The libgav1 Authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+if(LIBGAV1_UTILS_LIBGAV1_UTILS_CMAKE_)
+ return()
+endif() # LIBGAV1_UTILS_LIBGAV1_UTILS_CMAKE_
+set(LIBGAV1_UTILS_LIBGAV1_UTILS_CMAKE_ 1)
+
+list(APPEND libgav1_utils_sources
+ "${libgav1_source}/utils/array_2d.h"
+ "${libgav1_source}/utils/bit_mask_set.h"
+ "${libgav1_source}/utils/bit_reader.cc"
+ "${libgav1_source}/utils/bit_reader.h"
+ "${libgav1_source}/utils/block_parameters_holder.cc"
+ "${libgav1_source}/utils/block_parameters_holder.h"
+ "${libgav1_source}/utils/blocking_counter.h"
+ "${libgav1_source}/utils/common.h"
+ "${libgav1_source}/utils/compiler_attributes.h"
+ "${libgav1_source}/utils/constants.cc"
+ "${libgav1_source}/utils/constants.h"
+ "${libgav1_source}/utils/cpu.cc"
+ "${libgav1_source}/utils/cpu.h"
+ "${libgav1_source}/utils/dynamic_buffer.h"
+ "${libgav1_source}/utils/entropy_decoder.cc"
+ "${libgav1_source}/utils/entropy_decoder.h"
+ "${libgav1_source}/utils/executor.cc"
+ "${libgav1_source}/utils/executor.h"
+ "${libgav1_source}/utils/logging.cc"
+ "${libgav1_source}/utils/logging.h"
+ "${libgav1_source}/utils/memory.h"
+ "${libgav1_source}/utils/parameter_tree.cc"
+ "${libgav1_source}/utils/parameter_tree.h"
+ "${libgav1_source}/utils/queue.h"
+ "${libgav1_source}/utils/raw_bit_reader.cc"
+ "${libgav1_source}/utils/raw_bit_reader.h"
+ "${libgav1_source}/utils/reference_info.h"
+ "${libgav1_source}/utils/segmentation.cc"
+ "${libgav1_source}/utils/segmentation.h"
+ "${libgav1_source}/utils/segmentation_map.cc"
+ "${libgav1_source}/utils/segmentation_map.h"
+ "${libgav1_source}/utils/stack.h"
+ "${libgav1_source}/utils/threadpool.cc"
+ "${libgav1_source}/utils/threadpool.h"
+ "${libgav1_source}/utils/types.h"
+ "${libgav1_source}/utils/unbounded_queue.h"
+ "${libgav1_source}/utils/vector.h")
+
+macro(libgav1_add_utils_targets)
+ libgav1_add_library(NAME
+ libgav1_utils
+ TYPE
+ OBJECT
+ SOURCES
+ ${libgav1_utils_sources}
+ DEFINES
+ ${libgav1_defines}
+ INCLUDES
+ ${libgav1_include_paths}
+ ${libgav1_gtest_include_paths})
+
+endmacro()
diff --git a/src/utils/logging.cc b/src/utils/logging.cc
new file mode 100644
index 0000000..9a43c22
--- /dev/null
+++ b/src/utils/logging.cc
@@ -0,0 +1,65 @@
+// Copyright 2019 The libgav1 Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "src/utils/logging.h"
+
+#include <cstdarg>
+#include <cstdio>
+#include <sstream>
+#include <thread> // NOLINT (unapproved c++11 header)
+
+#if !defined(LIBGAV1_LOG_LEVEL)
+#define LIBGAV1_LOG_LEVEL (1 << 30)
+#endif
+
+namespace libgav1 {
+namespace internal {
+#if LIBGAV1_ENABLE_LOGGING
+namespace {
+
+const char* LogSeverityName(LogSeverity severity) {
+ switch (severity) {
+ case LogSeverity::kInfo:
+ return "INFO";
+ case LogSeverity::kError:
+ return "ERROR";
+ case LogSeverity::kWarning:
+ return "WARNING";
+ }
+ return "UNKNOWN";
+}
+
+} // namespace
+
+void Log(LogSeverity severity, const char* file, int line, const char* format,
+ ...) {
+ if (LIBGAV1_LOG_LEVEL < static_cast<int>(severity)) return;
+ std::ostringstream ss;
+ ss << std::hex << std::this_thread::get_id();
+ fprintf(stderr, "%s %s %s:%d] ", LogSeverityName(severity), ss.str().c_str(),
+ file, line);
+
+ va_list ap;
+ va_start(ap, format);
+ vfprintf(stderr, format, ap);
+ va_end(ap);
+ fprintf(stderr, "\n");
+}
+#else // !LIBGAV1_ENABLE_LOGGING
+void Log(LogSeverity /*severity*/, const char* /*file*/, int /*line*/,
+ const char* /*format*/, ...) {}
+#endif // LIBGAV1_ENABLE_LOGGING
+
+} // namespace internal
+} // namespace libgav1
diff --git a/src/utils/logging.h b/src/utils/logging.h
new file mode 100644
index 0000000..48928db
--- /dev/null
+++ b/src/utils/logging.h
@@ -0,0 +1,85 @@
+/*
+ * Copyright 2019 The libgav1 Authors
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LIBGAV1_SRC_UTILS_LOGGING_H_
+#define LIBGAV1_SRC_UTILS_LOGGING_H_
+
+#include <cstddef>
+
+#include "src/utils/compiler_attributes.h"
+
+#if !defined(LIBGAV1_ENABLE_LOGGING)
+#if defined(NDEBUG) || defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION)
+#define LIBGAV1_ENABLE_LOGGING 0
+#else
+#define LIBGAV1_ENABLE_LOGGING 1
+#endif
+#endif
+
+#if LIBGAV1_ENABLE_LOGGING
+// LIBGAV1_DLOG(severity, printf-format-string)
+// Debug logging that can optionally be enabled in release builds by explicitly
+// setting LIBGAV1_ENABLE_LOGGING.
+// Severity is given as an all-caps version of enum LogSeverity with the
+// leading 'k' removed: LIBGAV1_DLOG(INFO, "...");
+#define LIBGAV1_DLOG(severity, ...) \
+ do { \
+ constexpr const char* libgav1_logging_internal_basename = \
+ ::libgav1::internal::Basename(__FILE__, sizeof(__FILE__) - 1); \
+ ::libgav1::internal::Log(LIBGAV1_LOGGING_INTERNAL_##severity, \
+ libgav1_logging_internal_basename, __LINE__, \
+ __VA_ARGS__); \
+ } while (0)
+#else
+#define LIBGAV1_DLOG(severity, ...) \
+ do { \
+ } while (0)
+#endif // LIBGAV1_ENABLE_LOGGING
+
+#define LIBGAV1_LOGGING_INTERNAL_ERROR ::libgav1::internal::LogSeverity::kError
+#define LIBGAV1_LOGGING_INTERNAL_WARNING \
+ ::libgav1::internal::LogSeverity::kWarning
+#define LIBGAV1_LOGGING_INTERNAL_INFO ::libgav1::internal::LogSeverity::kInfo
+
+namespace libgav1 {
+namespace internal {
+
+enum class LogSeverity : int {
+ kError,
+ kWarning,
+ kInfo,
+};
+
+// Helper function to implement LIBGAV1_DLOG
+// Logs |format, ...| at |severity| level, reporting it as called from
+// |file|:|line|.
+void Log(libgav1::internal::LogSeverity severity, const char* file, int line,
+ const char* format, ...) LIBGAV1_PRINTF_ATTRIBUTE(4, 5);
+
+// Compile-time function to get the 'base' file_name, that is, the part of
+// a file_name after the last '/' or '\' path separator. The search starts at
+// the end of the string; the second parameter is the length of the string.
+constexpr const char* Basename(const char* file_name, size_t offset) {
+ return (offset == 0 || file_name[offset - 1] == '/' ||
+ file_name[offset - 1] == '\\')
+ ? file_name + offset
+ : Basename(file_name, offset - 1);
+}
+
+} // namespace internal
+} // namespace libgav1
+
+#endif // LIBGAV1_SRC_UTILS_LOGGING_H_
diff --git a/src/utils/memory.h b/src/utils/memory.h
new file mode 100644
index 0000000..219a83f
--- /dev/null
+++ b/src/utils/memory.h
@@ -0,0 +1,237 @@
+/*
+ * Copyright 2019 The libgav1 Authors
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LIBGAV1_SRC_UTILS_MEMORY_H_
+#define LIBGAV1_SRC_UTILS_MEMORY_H_
+
+#if defined(__ANDROID__) || defined(_MSC_VER)
+#include <malloc.h>
+#endif
+
+#include <cerrno>
+#include <cstddef>
+#include <cstdint>
+#include <cstdlib>
+#include <cstring>
+#include <memory>
+#include <new>
+
+namespace libgav1 {
+
+enum {
+// The byte alignment required for buffers used with SIMD code to be read or
+// written with aligned operations.
+#if defined(__i386__) || defined(_M_IX86) || defined(__x86_64__) || \
+ defined(_M_X64)
+ kMaxAlignment = 32, // extended alignment is safe on x86.
+#else
+ kMaxAlignment = alignof(max_align_t),
+#endif
+};
+
+// AlignedAlloc, AlignedFree
+//
+// void* AlignedAlloc(size_t alignment, size_t size);
+// Allocate aligned memory.
+// |alignment| must be a power of 2.
+// Unlike posix_memalign(), |alignment| may be smaller than sizeof(void*).
+// Unlike aligned_alloc(), |size| does not need to be a multiple of
+// |alignment|.
+// The returned pointer should be freed by AlignedFree().
+//
+// void AlignedFree(void* aligned_memory);
+// Free aligned memory.
+
+#if defined(_MSC_VER) // MSVC
+
+inline void* AlignedAlloc(size_t alignment, size_t size) {
+ return _aligned_malloc(size, alignment);
+}
+
+inline void AlignedFree(void* aligned_memory) { _aligned_free(aligned_memory); }
+
+#else // !defined(_MSC_VER)
+
+inline void* AlignedAlloc(size_t alignment, size_t size) {
+#if defined(__ANDROID__)
+ // Although posix_memalign() was introduced in Android API level 17, it is
+ // more convenient to use memalign(). Unlike glibc, Android does not consider
+ // memalign() an obsolete function.
+ return memalign(alignment, size);
+#else // !defined(__ANDROID__)
+ void* ptr = nullptr;
+ // posix_memalign requires that the requested alignment be at least
+ // sizeof(void*). In this case, fall back on malloc which should return
+ // memory aligned to at least the size of a pointer.
+ const size_t required_alignment = sizeof(void*);
+ if (alignment < required_alignment) return malloc(size);
+ const int error = posix_memalign(&ptr, alignment, size);
+ if (error != 0) {
+ errno = error;
+ return nullptr;
+ }
+ return ptr;
+#endif // defined(__ANDROID__)
+}
+
+inline void AlignedFree(void* aligned_memory) { free(aligned_memory); }
+
+#endif // defined(_MSC_VER)
+
+inline void Memset(uint8_t* const dst, int value, size_t count) {
+ memset(dst, value, count);
+}
+
+inline void Memset(uint16_t* const dst, int value, size_t count) {
+ for (size_t i = 0; i < count; ++i) {
+ dst[i] = static_cast<uint16_t>(value);
+ }
+}
+
+struct MallocDeleter {
+ void operator()(void* ptr) const { free(ptr); }
+};
+
+struct AlignedDeleter {
+ void operator()(void* ptr) const { AlignedFree(ptr); }
+};
+
+template <typename T>
+using AlignedUniquePtr = std::unique_ptr<T, AlignedDeleter>;
+
+// Allocates aligned memory for an array of |count| elements of type T.
+template <typename T>
+inline AlignedUniquePtr<T> MakeAlignedUniquePtr(size_t alignment,
+ size_t count) {
+ return AlignedUniquePtr<T>(
+ static_cast<T*>(AlignedAlloc(alignment, count * sizeof(T))));
+}
+
+// A base class with custom new and delete operators. The exception-throwing
+// new operators are deleted. The "new (std::nothrow)" form must be used.
+//
+// The new operators return nullptr if the requested size is greater than
+// 0x40000000 bytes (1 GB). TODO(wtc): Make the maximum allocable memory size
+// a compile-time configuration macro.
+//
+// See https://en.cppreference.com/w/cpp/memory/new/operator_new and
+// https://en.cppreference.com/w/cpp/memory/new/operator_delete.
+//
+// NOTE: The allocation and deallocation functions are static member functions
+// whether the keyword 'static' is used or not.
+struct Allocable {
+ // Class-specific allocation functions.
+ static void* operator new(size_t size) = delete;
+ static void* operator new[](size_t size) = delete;
+
+ // Class-specific non-throwing allocation functions
+ static void* operator new(size_t size, const std::nothrow_t& tag) noexcept {
+ if (size > 0x40000000) return nullptr;
+ return ::operator new(size, tag);
+ }
+ static void* operator new[](size_t size, const std::nothrow_t& tag) noexcept {
+ if (size > 0x40000000) return nullptr;
+ return ::operator new[](size, tag);
+ }
+
+ // Class-specific deallocation functions.
+ static void operator delete(void* ptr) noexcept { ::operator delete(ptr); }
+ static void operator delete[](void* ptr) noexcept {
+ ::operator delete[](ptr);
+ }
+
+ // Only called if new (std::nothrow) is used and the constructor throws an
+ // exception.
+ static void operator delete(void* ptr, const std::nothrow_t& tag) noexcept {
+ ::operator delete(ptr, tag);
+ }
+ // Only called if new[] (std::nothrow) is used and the constructor throws an
+ // exception.
+ static void operator delete[](void* ptr, const std::nothrow_t& tag) noexcept {
+ ::operator delete[](ptr, tag);
+ }
+};
+
+// A variant of Allocable that forces allocations to be aligned to
+// kMaxAlignment bytes. This is intended for use with classes that use
+// alignas() with this value. C++17 aligned new/delete are used if available,
+// otherwise we use AlignedAlloc/Free.
+struct MaxAlignedAllocable {
+ // Class-specific allocation functions.
+ static void* operator new(size_t size) = delete;
+ static void* operator new[](size_t size) = delete;
+
+ // Class-specific non-throwing allocation functions
+ static void* operator new(size_t size, const std::nothrow_t& tag) noexcept {
+ if (size > 0x40000000) return nullptr;
+#ifdef __cpp_aligned_new
+ return ::operator new(size, std::align_val_t(kMaxAlignment), tag);
+#else
+ static_cast<void>(tag);
+ return AlignedAlloc(kMaxAlignment, size);
+#endif
+ }
+ static void* operator new[](size_t size, const std::nothrow_t& tag) noexcept {
+ if (size > 0x40000000) return nullptr;
+#ifdef __cpp_aligned_new
+ return ::operator new[](size, std::align_val_t(kMaxAlignment), tag);
+#else
+ static_cast<void>(tag);
+ return AlignedAlloc(kMaxAlignment, size);
+#endif
+ }
+
+ // Class-specific deallocation functions.
+ static void operator delete(void* ptr) noexcept {
+#ifdef __cpp_aligned_new
+ ::operator delete(ptr, std::align_val_t(kMaxAlignment));
+#else
+ AlignedFree(ptr);
+#endif
+ }
+ static void operator delete[](void* ptr) noexcept {
+#ifdef __cpp_aligned_new
+ ::operator delete[](ptr, std::align_val_t(kMaxAlignment));
+#else
+ AlignedFree(ptr);
+#endif
+ }
+
+ // Only called if new (std::nothrow) is used and the constructor throws an
+ // exception.
+ static void operator delete(void* ptr, const std::nothrow_t& tag) noexcept {
+#ifdef __cpp_aligned_new
+ ::operator delete(ptr, std::align_val_t(kMaxAlignment), tag);
+#else
+ static_cast<void>(tag);
+ AlignedFree(ptr);
+#endif
+ }
+ // Only called if new[] (std::nothrow) is used and the constructor throws an
+ // exception.
+ static void operator delete[](void* ptr, const std::nothrow_t& tag) noexcept {
+#ifdef __cpp_aligned_new
+ ::operator delete[](ptr, std::align_val_t(kMaxAlignment), tag);
+#else
+ static_cast<void>(tag);
+ AlignedFree(ptr);
+#endif
+ }
+};
+
+} // namespace libgav1
+
+#endif // LIBGAV1_SRC_UTILS_MEMORY_H_
diff --git a/src/utils/parameter_tree.cc b/src/utils/parameter_tree.cc
new file mode 100644
index 0000000..9426ce6
--- /dev/null
+++ b/src/utils/parameter_tree.cc
@@ -0,0 +1,133 @@
+// Copyright 2019 The libgav1 Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "src/utils/parameter_tree.h"
+
+#include <cassert>
+#include <memory>
+#include <new>
+
+#include "src/utils/common.h"
+#include "src/utils/constants.h"
+#include "src/utils/logging.h"
+#include "src/utils/types.h"
+
+namespace libgav1 {
+
+// static
+std::unique_ptr<ParameterTree> ParameterTree::Create(int row4x4, int column4x4,
+ BlockSize block_size,
+ bool is_leaf) {
+ std::unique_ptr<ParameterTree> tree(
+ new (std::nothrow) ParameterTree(row4x4, column4x4, block_size));
+ if (tree != nullptr && is_leaf && !tree->SetPartitionType(kPartitionNone)) {
+ tree = nullptr;
+ }
+ return tree;
+}
+
+bool ParameterTree::SetPartitionType(Partition partition) {
+ assert(!partition_type_set_);
+ partition_ = partition;
+ partition_type_set_ = true;
+ const int block_width4x4 = kNum4x4BlocksWide[block_size_];
+ const int half_block4x4 = block_width4x4 >> 1;
+ const int quarter_block4x4 = half_block4x4 >> 1;
+ const BlockSize sub_size = kSubSize[partition][block_size_];
+ const BlockSize split_size = kSubSize[kPartitionSplit][block_size_];
+ assert(partition == kPartitionNone || sub_size != kBlockInvalid);
+ switch (partition) {
+ case kPartitionNone:
+ parameters_.reset(new (std::nothrow) BlockParameters());
+ return parameters_ != nullptr;
+ case kPartitionHorizontal:
+ children_[0] = ParameterTree::Create(row4x4_, column4x4_, sub_size, true);
+ children_[1] = ParameterTree::Create(row4x4_ + half_block4x4, column4x4_,
+ sub_size, true);
+ return children_[0] != nullptr && children_[1] != nullptr;
+ case kPartitionVertical:
+ children_[0] = ParameterTree::Create(row4x4_, column4x4_, sub_size, true);
+ children_[1] = ParameterTree::Create(row4x4_, column4x4_ + half_block4x4,
+ sub_size, true);
+ return children_[0] != nullptr && children_[1] != nullptr;
+ case kPartitionSplit:
+ children_[0] =
+ ParameterTree::Create(row4x4_, column4x4_, sub_size, false);
+ children_[1] = ParameterTree::Create(row4x4_, column4x4_ + half_block4x4,
+ sub_size, false);
+ children_[2] = ParameterTree::Create(row4x4_ + half_block4x4, column4x4_,
+ sub_size, false);
+ children_[3] = ParameterTree::Create(
+ row4x4_ + half_block4x4, column4x4_ + half_block4x4, sub_size, false);
+ return children_[0] != nullptr && children_[1] != nullptr &&
+ children_[2] != nullptr && children_[3] != nullptr;
+ case kPartitionHorizontalWithTopSplit:
+ assert(split_size != kBlockInvalid);
+ children_[0] =
+ ParameterTree::Create(row4x4_, column4x4_, split_size, true);
+ children_[1] = ParameterTree::Create(row4x4_, column4x4_ + half_block4x4,
+ split_size, true);
+ children_[2] = ParameterTree::Create(row4x4_ + half_block4x4, column4x4_,
+ sub_size, true);
+ return children_[0] != nullptr && children_[1] != nullptr &&
+ children_[2] != nullptr;
+ case kPartitionHorizontalWithBottomSplit:
+ assert(split_size != kBlockInvalid);
+ children_[0] = ParameterTree::Create(row4x4_, column4x4_, sub_size, true);
+ children_[1] = ParameterTree::Create(row4x4_ + half_block4x4, column4x4_,
+ split_size, true);
+ children_[2] =
+ ParameterTree::Create(row4x4_ + half_block4x4,
+ column4x4_ + half_block4x4, split_size, true);
+ return children_[0] != nullptr && children_[1] != nullptr &&
+ children_[2] != nullptr;
+ case kPartitionVerticalWithLeftSplit:
+ assert(split_size != kBlockInvalid);
+ children_[0] =
+ ParameterTree::Create(row4x4_, column4x4_, split_size, true);
+ children_[1] = ParameterTree::Create(row4x4_ + half_block4x4, column4x4_,
+ split_size, true);
+ children_[2] = ParameterTree::Create(row4x4_, column4x4_ + half_block4x4,
+ sub_size, true);
+ return children_[0] != nullptr && children_[1] != nullptr &&
+ children_[2] != nullptr;
+ case kPartitionVerticalWithRightSplit:
+ assert(split_size != kBlockInvalid);
+ children_[0] = ParameterTree::Create(row4x4_, column4x4_, sub_size, true);
+ children_[1] = ParameterTree::Create(row4x4_, column4x4_ + half_block4x4,
+ split_size, true);
+ children_[2] =
+ ParameterTree::Create(row4x4_ + half_block4x4,
+ column4x4_ + half_block4x4, split_size, true);
+ return children_[0] != nullptr && children_[1] != nullptr &&
+ children_[2] != nullptr;
+ case kPartitionHorizontal4:
+ for (int i = 0; i < 4; ++i) {
+ children_[i] = ParameterTree::Create(row4x4_ + i * quarter_block4x4,
+ column4x4_, sub_size, true);
+ if (children_[i] == nullptr) return false;
+ }
+ return true;
+ default:
+ assert(partition == kPartitionVertical4);
+ for (int i = 0; i < 4; ++i) {
+ children_[i] = ParameterTree::Create(
+ row4x4_, column4x4_ + i * quarter_block4x4, sub_size, true);
+ if (children_[i] == nullptr) return false;
+ }
+ return true;
+ }
+}
+
+} // namespace libgav1
diff --git a/src/utils/parameter_tree.h b/src/utils/parameter_tree.h
new file mode 100644
index 0000000..935f3eb
--- /dev/null
+++ b/src/utils/parameter_tree.h
@@ -0,0 +1,113 @@
+/*
+ * Copyright 2019 The libgav1 Authors
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LIBGAV1_SRC_UTILS_PARAMETER_TREE_H_
+#define LIBGAV1_SRC_UTILS_PARAMETER_TREE_H_
+
+#include <cassert>
+#include <memory>
+
+#include "src/utils/common.h"
+#include "src/utils/compiler_attributes.h"
+#include "src/utils/constants.h"
+#include "src/utils/memory.h"
+#include "src/utils/types.h"
+
+namespace libgav1 {
+
+class ParameterTree : public Allocable {
+ public:
+ // Creates a parameter tree to store the parameters of a block of size
+ // |block_size| starting at coordinates |row4x4| and |column4x4|. If |is_leaf|
+ // is set to true, the memory will be allocated for the BlockParameters for
+ // this node. Otherwise, no memory will be allocated. If |is_leaf| is set to
+ // false, |block_size| must be a square block, i.e.,
+ // kBlockWidthPixels[block_size] must be equal to
+ // kBlockHeightPixels[block_size].
+ static std::unique_ptr<ParameterTree> Create(int row4x4, int column4x4,
+ BlockSize block_size,
+ bool is_leaf = false);
+
+ // Move only (not Copyable).
+ ParameterTree(ParameterTree&& other) = default;
+ ParameterTree& operator=(ParameterTree&& other) = default;
+ ParameterTree(const ParameterTree&) = delete;
+ ParameterTree& operator=(const ParameterTree&) = delete;
+
+ // Set the partition type of the current node to |partition|.
+ // if (partition == kPartitionNone) {
+ // Memory will be allocated for the BlockParameters for this node.
+ // } else if (partition != kPartitionSplit) {
+ // The appropriate child nodes will be populated and memory will be
+ // allocated for the BlockParameters of the children.
+ // } else {
+ // The appropriate child nodes will be populated but they are considered to
+ // be hanging, i.e., future calls to SetPartitionType() on the child nodes
+ // will have to set them or their descendants to a terminal type.
+ // }
+ // This function must be called only once per node.
+ LIBGAV1_MUST_USE_RESULT bool SetPartitionType(Partition partition);
+
+ // Basic getters.
+ int row4x4() const { return row4x4_; }
+ int column4x4() const { return column4x4_; }
+ BlockSize block_size() const { return block_size_; }
+ Partition partition() const { return partition_; }
+ ParameterTree* children(int index) const {
+ assert(index < 4);
+ return children_[index].get();
+ }
+ // Returns the BlockParameters object of the current node if one exists.
+ // Otherwise returns nullptr. This function will return a valid
+ // BlockParameters object only for leaf nodes.
+ BlockParameters* parameters() const { return parameters_.get(); }
+
+ private:
+ ParameterTree(int row4x4, int column4x4, BlockSize block_size)
+ : row4x4_(row4x4), column4x4_(column4x4), block_size_(block_size) {}
+
+ Partition partition_ = kPartitionNone;
+ std::unique_ptr<BlockParameters> parameters_ = nullptr;
+ int row4x4_ = -1;
+ int column4x4_ = -1;
+ BlockSize block_size_ = kBlockInvalid;
+ bool partition_type_set_ = false;
+
+ // Child values are defined as follows for various partition types:
+ // * Horizontal: 0 top partition; 1 bottom partition; 2 nullptr; 3 nullptr;
+ // * Vertical: 0 left partition; 1 right partition; 2 nullptr; 3 nullptr;
+ // * Split: 0 top-left partition; 1 top-right partition; 2; bottom-left
+ // partition; 3 bottom-right partition;
+ // * HorizontalWithTopSplit: 0 top-left partition; 1 top-right partition; 2
+ // bottom partition; 3 nullptr;
+ // * HorizontalWithBottomSplit: 0 top partition; 1 bottom-left partition; 2
+ // bottom-right partition; 3 nullptr;
+ // * VerticalWithLeftSplit: 0 top-left partition; 1 bottom-left partition; 2
+ // right partition; 3 nullptr;
+ // * VerticalWithRightSplit: 0 left-partition; 1 top-right partition; 2
+ // bottom-right partition; 3 nullptr;
+ // * Horizontal4: 0 top partition; 1 second top partition; 2 third top
+ // partition; 3 bottom partition;
+ // * Vertical4: 0 left partition; 1 second left partition; 2 third left
+ // partition; 3 right partition;
+ std::unique_ptr<ParameterTree> children_[4] = {};
+
+ friend class ParameterTreeTest;
+};
+
+} // namespace libgav1
+
+#endif // LIBGAV1_SRC_UTILS_PARAMETER_TREE_H_
diff --git a/src/utils/queue.h b/src/utils/queue.h
new file mode 100644
index 0000000..cffb9ca
--- /dev/null
+++ b/src/utils/queue.h
@@ -0,0 +1,105 @@
+/*
+ * Copyright 2019 The libgav1 Authors
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LIBGAV1_SRC_UTILS_QUEUE_H_
+#define LIBGAV1_SRC_UTILS_QUEUE_H_
+
+#include <cassert>
+#include <cstddef>
+#include <memory>
+#include <new>
+
+#include "src/utils/compiler_attributes.h"
+
+namespace libgav1 {
+
+// A FIFO queue of a fixed capacity.
+//
+// WARNING: No error checking is performed.
+template <typename T>
+class Queue {
+ public:
+ LIBGAV1_MUST_USE_RESULT bool Init(size_t capacity) {
+ elements_.reset(new (std::nothrow) T[capacity]);
+ if (elements_ == nullptr) return false;
+ capacity_ = capacity;
+ return true;
+ }
+
+ // Pushes the element |value| to the end of the queue. It is an error to call
+ // Push() when the queue is full.
+ void Push(T&& value) {
+ assert(size_ < capacity_);
+ elements_[end_++] = std::move(value);
+ if (end_ == capacity_) end_ = 0;
+ ++size_;
+ }
+
+ // Removes the element at the front of the queue. It is an error to call Pop()
+ // when the queue is empty.
+ void Pop() {
+ assert(size_ != 0);
+ const T element = std::move(elements_[begin_++]);
+ static_cast<void>(element);
+ if (begin_ == capacity_) begin_ = 0;
+ --size_;
+ }
+
+ // Returns a reference to the element at the front of the queue. It is an
+ // error to call Front() when the queue is empty.
+ T& Front() {
+ assert(size_ != 0);
+ return elements_[begin_];
+ }
+
+ // Returns a reference to the element at the back of the queue. It is an error
+ // to call Back() when the queue is empty.
+ T& Back() {
+ assert(size_ != 0);
+ const size_t back = ((end_ == 0) ? capacity_ : end_) - 1;
+ return elements_[back];
+ }
+
+ // Clears the queue.
+ void Clear() {
+ while (!Empty()) {
+ Pop();
+ }
+ }
+
+ // Returns true if the queue is empty.
+ bool Empty() const { return size_ == 0; }
+
+ // Returns true if the queue is full.
+ bool Full() const { return size_ >= capacity_; }
+
+ // Returns the number of elements in the queue.
+ size_t Size() const { return size_; }
+
+ private:
+ // An array of |capacity| elements. Used as a circular array.
+ std::unique_ptr<T[]> elements_;
+ size_t capacity_ = 0;
+ // The index of the element to be removed by Pop().
+ size_t begin_ = 0;
+ // The index where the new element is inserted by Push().
+ size_t end_ = 0;
+ size_t size_ = 0;
+};
+
+} // namespace libgav1
+
+#endif // LIBGAV1_SRC_UTILS_QUEUE_H_
diff --git a/src/utils/raw_bit_reader.cc b/src/utils/raw_bit_reader.cc
new file mode 100644
index 0000000..15e980d
--- /dev/null
+++ b/src/utils/raw_bit_reader.cc
@@ -0,0 +1,224 @@
+// Copyright 2019 The libgav1 Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "src/utils/raw_bit_reader.h"
+
+#include <cassert>
+#include <limits>
+
+#include "src/utils/common.h"
+#include "src/utils/logging.h"
+
+// Note <cinttypes> is only needed when logging is enabled (for the PRI*
+// macros). It depends on the definition of LIBGAV1_ENABLE_LOGGING from
+// logging.h, thus the non-standard header ordering.
+#if LIBGAV1_ENABLE_LOGGING
+#include <cinttypes>
+#endif
+
+namespace libgav1 {
+namespace {
+
+constexpr int kMaximumLeb128Size = 8;
+constexpr uint8_t kLeb128ValueByteMask = 0x7f;
+constexpr uint8_t kLeb128TerminationByteMask = 0x80;
+
+uint8_t Mod8(size_t n) {
+ // Last 3 bits are the value of mod 8.
+ return n & 0x07;
+}
+
+size_t DivideBy8(size_t n, bool ceil) { return (n + (ceil ? 7 : 0)) >> 3; }
+
+} // namespace
+
+RawBitReader::RawBitReader(const uint8_t* data, size_t size)
+ : data_(data), bit_offset_(0), size_(size) {
+ assert(data_ != nullptr || size_ == 0);
+}
+
+int RawBitReader::ReadBitImpl() {
+ const size_t byte_offset = DivideBy8(bit_offset_, false);
+ const uint8_t byte = data_[byte_offset];
+ const uint8_t shift = 7 - Mod8(bit_offset_);
+ ++bit_offset_;
+ return static_cast<int>((byte >> shift) & 0x01);
+}
+
+int RawBitReader::ReadBit() {
+ if (Finished()) return -1;
+ return ReadBitImpl();
+}
+
+int64_t RawBitReader::ReadLiteral(int num_bits) {
+ assert(num_bits <= 32);
+ if (!CanReadLiteral(num_bits)) return -1;
+ assert(num_bits > 0);
+ uint32_t literal = 0;
+ int bit = num_bits - 1;
+ do {
+ // ARM can combine a shift operation with a constant number of bits with
+ // some other operations, such as the OR operation.
+ // Here is an ARM disassembly example:
+ // orr w1, w0, w1, lsl #1
+ // which left shifts register w1 by 1 bit and OR the shift result with
+ // register w0.
+ // The next 2 lines are equivalent to:
+ // literal |= static_cast<uint32_t>(ReadBitImpl()) << bit;
+ literal <<= 1;
+ literal |= static_cast<uint32_t>(ReadBitImpl());
+ } while (--bit >= 0);
+ return literal;
+}
+
+bool RawBitReader::ReadInverseSignedLiteral(int num_bits, int* const value) {
+ assert(num_bits + 1 < 32);
+ *value = static_cast<int>(ReadLiteral(num_bits + 1));
+ if (*value == -1) return false;
+ const int sign_bit = 1 << num_bits;
+ if ((*value & sign_bit) != 0) {
+ *value -= 2 * sign_bit;
+ }
+ return true;
+}
+
+bool RawBitReader::ReadLittleEndian(int num_bytes, size_t* const value) {
+ // We must be at a byte boundary.
+ assert(Mod8(bit_offset_) == 0);
+ assert(num_bytes <= 4);
+ static_assert(sizeof(size_t) >= 4, "");
+ if (value == nullptr) return false;
+ size_t byte_offset = DivideBy8(bit_offset_, false);
+ if (Finished() || byte_offset + num_bytes > size_) {
+ LIBGAV1_DLOG(ERROR, "Not enough bits to read Little Endian value.");
+ return false;
+ }
+ *value = 0;
+ for (int i = 0; i < num_bytes; ++i) {
+ const size_t byte = data_[byte_offset];
+ *value |= (byte << (i * 8));
+ ++byte_offset;
+ }
+ bit_offset_ = byte_offset * 8;
+ return true;
+}
+
+bool RawBitReader::ReadUnsignedLeb128(size_t* const value) {
+ // We must be at a byte boundary.
+ assert(Mod8(bit_offset_) == 0);
+ if (value == nullptr) return false;
+ uint64_t value64 = 0;
+ for (int i = 0; i < kMaximumLeb128Size; ++i) {
+ if (Finished()) {
+ LIBGAV1_DLOG(ERROR, "Not enough bits to read LEB128 value.");
+ return false;
+ }
+ const size_t byte_offset = DivideBy8(bit_offset_, false);
+ const uint8_t byte = data_[byte_offset];
+ bit_offset_ += 8;
+ value64 |= static_cast<uint64_t>(byte & kLeb128ValueByteMask) << (i * 7);
+ if ((byte & kLeb128TerminationByteMask) == 0) {
+ if (value64 != static_cast<size_t>(value64) ||
+ value64 > std::numeric_limits<uint32_t>::max()) {
+ LIBGAV1_DLOG(
+ ERROR, "LEB128 value (%" PRIu64 ") exceeded uint32_t maximum (%u).",
+ value64, std::numeric_limits<uint32_t>::max());
+ return false;
+ }
+ *value = static_cast<size_t>(value64);
+ return true;
+ }
+ }
+ LIBGAV1_DLOG(
+ ERROR,
+ "Exceeded kMaximumLeb128Size (%d) when trying to read LEB128 value",
+ kMaximumLeb128Size);
+ return false;
+}
+
+bool RawBitReader::ReadUvlc(uint32_t* const value) {
+ if (value == nullptr) return false;
+ int leading_zeros = 0;
+ while (true) {
+ const int bit = ReadBit();
+ if (bit == -1) {
+ LIBGAV1_DLOG(ERROR, "Not enough bits to read uvlc value.");
+ return false;
+ }
+ if (bit == 1) break;
+ ++leading_zeros;
+ if (leading_zeros == 32) {
+ LIBGAV1_DLOG(ERROR,
+ "Exceeded maximum size (32) when trying to read uvlc value");
+ return false;
+ }
+ }
+ int literal;
+ if (leading_zeros != 0) {
+ literal = static_cast<int>(ReadLiteral(leading_zeros));
+ if (literal == -1) {
+ LIBGAV1_DLOG(ERROR, "Not enough bits to read uvlc value.");
+ return false;
+ }
+ literal += (1U << leading_zeros) - 1;
+ } else {
+ literal = 0;
+ }
+ *value = literal;
+ return true;
+}
+
+bool RawBitReader::AlignToNextByte() {
+ while ((bit_offset_ & 7) != 0) {
+ if (ReadBit() != 0) {
+ return false;
+ }
+ }
+ return true;
+}
+
+bool RawBitReader::VerifyAndSkipTrailingBits(size_t num_bits) {
+ if (ReadBit() != 1) return false;
+ for (size_t i = 0; i < num_bits - 1; ++i) {
+ if (ReadBit() != 0) return false;
+ }
+ return true;
+}
+
+bool RawBitReader::SkipBytes(size_t num_bytes) {
+ // If we are not at a byte boundary, return false.
+ return ((bit_offset_ & 7) != 0) ? false : SkipBits(num_bytes * 8);
+}
+
+bool RawBitReader::SkipBits(size_t num_bits) {
+ // If the reader is already finished, return false.
+ if (Finished()) return false;
+ // If skipping |num_bits| runs out of buffer, return false.
+ const size_t bit_offset = bit_offset_ + num_bits - 1;
+ if (DivideBy8(bit_offset, false) >= size_) return false;
+ bit_offset_ += num_bits;
+ return true;
+}
+
+bool RawBitReader::CanReadLiteral(size_t num_bits) const {
+ if (Finished()) return false;
+ const size_t bit_offset = bit_offset_ + num_bits - 1;
+ return DivideBy8(bit_offset, false) < size_;
+}
+
+bool RawBitReader::Finished() const {
+ return DivideBy8(bit_offset_, false) >= size_;
+}
+
+} // namespace libgav1
diff --git a/src/utils/raw_bit_reader.h b/src/utils/raw_bit_reader.h
new file mode 100644
index 0000000..76e7bfa
--- /dev/null
+++ b/src/utils/raw_bit_reader.h
@@ -0,0 +1,78 @@
+/*
+ * Copyright 2019 The libgav1 Authors
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LIBGAV1_SRC_UTILS_RAW_BIT_READER_H_
+#define LIBGAV1_SRC_UTILS_RAW_BIT_READER_H_
+
+#include <cstddef>
+#include <cstdint>
+
+#include "src/utils/bit_reader.h"
+#include "src/utils/memory.h"
+
+namespace libgav1 {
+
+class RawBitReader : public BitReader, public Allocable {
+ public:
+ RawBitReader(const uint8_t* data, size_t size);
+ ~RawBitReader() override = default;
+
+ int ReadBit() override;
+ int64_t ReadLiteral(int num_bits) override; // f(n) in the spec.
+ bool ReadInverseSignedLiteral(int num_bits,
+ int* value); // su(1+num_bits) in the spec.
+ bool ReadLittleEndian(int num_bytes,
+ size_t* value); // le(n) in the spec.
+ bool ReadUnsignedLeb128(size_t* value); // leb128() in the spec.
+ // Reads a variable length unsigned number and stores it in |*value|. On a
+ // successful return, |*value| is in the range of 0 to UINT32_MAX − 1,
+ // inclusive.
+ bool ReadUvlc(uint32_t* value); // uvlc() in the spec.
+ bool Finished() const;
+ size_t bit_offset() const { return bit_offset_; }
+ // Return the bytes consumed so far (rounded up).
+ size_t byte_offset() const { return (bit_offset() + 7) >> 3; }
+ size_t size() const { return size_; }
+ // Move to the next byte boundary if not already at one. Return false if any
+ // of the bits being skipped over is non-zero. Return true otherwise. If this
+ // function returns false, the reader is left in an undefined state and must
+ // not be used further. section 5.3.5.
+ bool AlignToNextByte();
+ // Make sure that the trailing bits structure is as expected and skip over it.
+ // section 5.3.4.
+ bool VerifyAndSkipTrailingBits(size_t num_bits);
+ // Skip |num_bytes| bytes. This only works if the current position is at a
+ // byte boundary. The function returns false if the current position is not at
+ // a byte boundary or if skipping |num_bytes| causes the reader to run out of
+ // buffer. Returns true otherwise.
+ bool SkipBytes(size_t num_bytes);
+ // Skip |num_bits| bits. The function returns false if skipping |num_bits|
+ // causes the reader to run out of buffer. Returns true otherwise.
+ bool SkipBits(size_t num_bits);
+
+ private:
+ // Returns true if it is safe to read a literal of size |num_bits|.
+ bool CanReadLiteral(size_t num_bits) const;
+ int ReadBitImpl();
+
+ const uint8_t* const data_;
+ size_t bit_offset_;
+ const size_t size_;
+};
+
+} // namespace libgav1
+
+#endif // LIBGAV1_SRC_UTILS_RAW_BIT_READER_H_
diff --git a/src/utils/reference_info.h b/src/utils/reference_info.h
new file mode 100644
index 0000000..a660791
--- /dev/null
+++ b/src/utils/reference_info.h
@@ -0,0 +1,92 @@
+/*
+ * Copyright 2020 The libgav1 Authors
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LIBGAV1_SRC_UTILS_REFERENCE_INFO_H_
+#define LIBGAV1_SRC_UTILS_REFERENCE_INFO_H_
+
+#include <array>
+#include <cstdint>
+
+#include "src/utils/array_2d.h"
+#include "src/utils/constants.h"
+#include "src/utils/types.h"
+
+namespace libgav1 {
+
+// This struct collects some members related to reference frames in one place to
+// make it easier to pass them as parameters to some dsp functions.
+struct ReferenceInfo {
+ // Initialize |motion_field_reference_frame| so that
+ // Tile::StoreMotionFieldMvsIntoCurrentFrame() can skip some updates when
+ // the updates are the same as the initialized value.
+ // Set to kReferenceFrameIntra instead of kReferenceFrameNone to simplify
+ // branch conditions in motion field projection.
+ // The following memory initialization of contiguous memory is very fast. It
+ // is not recommended to make the initialization multi-threaded, unless the
+ // memory which needs to be initialized in each thread is still contiguous.
+ LIBGAV1_MUST_USE_RESULT bool Reset(int rows, int columns) {
+ return motion_field_reference_frame.Reset(rows, columns,
+ /*zero_initialize=*/true) &&
+ motion_field_mv.Reset(
+ rows, columns,
+#if LIBGAV1_MSAN
+ // It is set in Tile::StoreMotionFieldMvsIntoCurrentFrame() only
+ // for qualified blocks. In MotionFieldProjectionKernel() dsp
+ // optimizations, it is read no matter it was set or not.
+ /*zero_initialize=*/true
+#else
+ /*zero_initialize=*/false
+#endif
+ );
+ }
+
+ // All members are used by inter frames only.
+ // For intra frames, they are not initialized.
+
+ std::array<uint8_t, kNumReferenceFrameTypes> order_hint;
+
+ // An example when |relative_distance_from| does not equal
+ // -|relative_distance_to|:
+ // |relative_distance_from| = GetRelativeDistance(7, 71, 25) = -64
+ // -|relative_distance_to| = -GetRelativeDistance(71, 7, 25) = 64
+ // This is why we need both |relative_distance_from| and
+ // |relative_distance_to|.
+ // |relative_distance_from|: Relative distances from reference frames to this
+ // frame.
+ std::array<int8_t, kNumReferenceFrameTypes> relative_distance_from;
+ // |relative_distance_to|: Relative distances to reference frames.
+ std::array<int8_t, kNumReferenceFrameTypes> relative_distance_to;
+
+ // Skip motion field projection of specific types of frames if their
+ // |relative_distance_to| is negative or too large.
+ std::array<bool, kNumReferenceFrameTypes> skip_references;
+ // Lookup table to get motion field projection division multiplier of specific
+ // types of frames. Derived from kProjectionMvDivisionLookup.
+ std::array<int16_t, kNumReferenceFrameTypes> projection_divisions;
+
+ // The current frame's |motion_field_reference_frame| and |motion_field_mv_|
+ // are guaranteed to be allocated only when refresh_frame_flags is not 0.
+ // Array of size (rows4x4 / 2) x (columns4x4 / 2). Entry at i, j corresponds
+ // to MfRefFrames[i * 2 + 1][j * 2 + 1] in the spec.
+ Array2D<ReferenceFrameType> motion_field_reference_frame;
+ // Array of size (rows4x4 / 2) x (columns4x4 / 2). Entry at i, j corresponds
+ // to MfMvs[i * 2 + 1][j * 2 + 1] in the spec.
+ Array2D<MotionVector> motion_field_mv;
+};
+
+} // namespace libgav1
+
+#endif // LIBGAV1_SRC_UTILS_REFERENCE_INFO_H_
diff --git a/src/utils/segmentation.cc b/src/utils/segmentation.cc
new file mode 100644
index 0000000..75fa776
--- /dev/null
+++ b/src/utils/segmentation.cc
@@ -0,0 +1,31 @@
+// Copyright 2019 The libgav1 Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "src/utils/segmentation.h"
+
+namespace libgav1 {
+
+const int8_t kSegmentationFeatureBits[kSegmentFeatureMax] = {8, 6, 6, 6,
+ 6, 3, 0, 0};
+const int kSegmentationFeatureMaxValues[kSegmentFeatureMax] = {
+ 255,
+ kMaxLoopFilterValue,
+ kMaxLoopFilterValue,
+ kMaxLoopFilterValue,
+ kMaxLoopFilterValue,
+ 7,
+ 0,
+ 0};
+
+} // namespace libgav1
diff --git a/src/utils/segmentation.h b/src/utils/segmentation.h
new file mode 100644
index 0000000..67ff74c
--- /dev/null
+++ b/src/utils/segmentation.h
@@ -0,0 +1,32 @@
+/*
+ * Copyright 2019 The libgav1 Authors
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LIBGAV1_SRC_UTILS_SEGMENTATION_H_
+#define LIBGAV1_SRC_UTILS_SEGMENTATION_H_
+
+#include <cstdint>
+
+#include "src/utils/constants.h"
+#include "src/utils/types.h"
+
+namespace libgav1 {
+
+extern const int8_t kSegmentationFeatureBits[kSegmentFeatureMax];
+extern const int kSegmentationFeatureMaxValues[kSegmentFeatureMax];
+
+} // namespace libgav1
+
+#endif // LIBGAV1_SRC_UTILS_SEGMENTATION_H_
diff --git a/src/utils/segmentation_map.cc b/src/utils/segmentation_map.cc
new file mode 100644
index 0000000..4284ca2
--- /dev/null
+++ b/src/utils/segmentation_map.cc
@@ -0,0 +1,49 @@
+// Copyright 2019 The libgav1 Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "src/utils/segmentation_map.h"
+
+#include <cassert>
+#include <cstring>
+#include <new>
+
+namespace libgav1 {
+
+bool SegmentationMap::Allocate(int32_t rows4x4, int32_t columns4x4) {
+ rows4x4_ = rows4x4;
+ columns4x4_ = columns4x4;
+ segment_id_buffer_.reset(new (std::nothrow) int8_t[rows4x4_ * columns4x4_]);
+ if (segment_id_buffer_ == nullptr) return false;
+ segment_id_.Reset(rows4x4_, columns4x4_, segment_id_buffer_.get());
+ return true;
+}
+
+void SegmentationMap::Clear() {
+ memset(segment_id_buffer_.get(), 0, rows4x4_ * columns4x4_);
+}
+
+void SegmentationMap::CopyFrom(const SegmentationMap& from) {
+ assert(rows4x4_ == from.rows4x4_ && columns4x4_ == from.columns4x4_);
+ memcpy(segment_id_buffer_.get(), from.segment_id_buffer_.get(),
+ rows4x4_ * columns4x4_);
+}
+
+void SegmentationMap::FillBlock(int row4x4, int column4x4, int block_width4x4,
+ int block_height4x4, int8_t segment_id) {
+ for (int y = 0; y < block_height4x4; ++y) {
+ memset(&segment_id_[row4x4 + y][column4x4], segment_id, block_width4x4);
+ }
+}
+
+} // namespace libgav1
diff --git a/src/utils/segmentation_map.h b/src/utils/segmentation_map.h
new file mode 100644
index 0000000..499be24
--- /dev/null
+++ b/src/utils/segmentation_map.h
@@ -0,0 +1,71 @@
+/*
+ * Copyright 2019 The libgav1 Authors
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LIBGAV1_SRC_UTILS_SEGMENTATION_MAP_H_
+#define LIBGAV1_SRC_UTILS_SEGMENTATION_MAP_H_
+
+#include <cstdint>
+#include <memory>
+
+#include "src/utils/array_2d.h"
+#include "src/utils/compiler_attributes.h"
+
+namespace libgav1 {
+
+// SegmentationMap stores the segment id associated with each 4x4 block in the
+// frame.
+class SegmentationMap {
+ public:
+ SegmentationMap() = default;
+
+ // Not copyable or movable
+ SegmentationMap(const SegmentationMap&) = delete;
+ SegmentationMap& operator=(const SegmentationMap&) = delete;
+
+ // Allocates an internal buffer of the given dimensions to hold the
+ // segmentation map. The memory in the buffer is not initialized. Returns
+ // true on success, false on failure (for example, out of memory).
+ LIBGAV1_MUST_USE_RESULT bool Allocate(int32_t rows4x4, int32_t columns4x4);
+
+ int8_t segment_id(int row4x4, int column4x4) const {
+ return segment_id_[row4x4][column4x4];
+ }
+
+ // Sets every element in the segmentation map to 0.
+ void Clear();
+
+ // Copies the entire segmentation map. |from| must be of the same dimensions.
+ void CopyFrom(const SegmentationMap& from);
+
+ // Sets the region of segmentation map covered by the block to |segment_id|.
+ // The block is located at |row4x4|, |column4x4| and has dimensions
+ // |block_width4x4| and |block_height4x4|.
+ void FillBlock(int row4x4, int column4x4, int block_width4x4,
+ int block_height4x4, int8_t segment_id);
+
+ private:
+ int32_t rows4x4_ = 0;
+ int32_t columns4x4_ = 0;
+
+ // segment_id_ is a rows4x4_ by columns4x4_ 2D array. The underlying data
+ // buffer is dynamically allocated and owned by segment_id_buffer_.
+ std::unique_ptr<int8_t[]> segment_id_buffer_;
+ Array2DView<int8_t> segment_id_;
+};
+
+} // namespace libgav1
+
+#endif // LIBGAV1_SRC_UTILS_SEGMENTATION_MAP_H_
diff --git a/src/utils/stack.h b/src/utils/stack.h
new file mode 100644
index 0000000..39133b9
--- /dev/null
+++ b/src/utils/stack.h
@@ -0,0 +1,59 @@
+/*
+ * Copyright 2019 The libgav1 Authors
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LIBGAV1_SRC_UTILS_STACK_H_
+#define LIBGAV1_SRC_UTILS_STACK_H_
+
+#include <cassert>
+#include <utility>
+
+namespace libgav1 {
+
+// A LIFO stack of a fixed capacity. The elements are moved using std::move, so
+// the element type T has to be movable.
+//
+// WARNING: No error checking is performed.
+template <typename T, int capacity>
+class Stack {
+ public:
+ // Pushes the element |value| to the top of the stack. It is an error to call
+ // Push() when the stack is full.
+ void Push(T value) {
+ ++top_;
+ assert(top_ < capacity);
+ elements_[top_] = std::move(value);
+ }
+
+ // Returns the element at the top of the stack and removes it from the stack.
+ // It is an error to call Pop() when the stack is empty.
+ T Pop() {
+ assert(top_ >= 0);
+ return std::move(elements_[top_--]);
+ }
+
+ // Returns true if the stack is empty.
+ bool Empty() const { return top_ < 0; }
+
+ private:
+ static_assert(capacity > 0, "");
+ T elements_[capacity];
+ // The array index of the top of the stack. The stack is empty if top_ is -1.
+ int top_ = -1;
+};
+
+} // namespace libgav1
+
+#endif // LIBGAV1_SRC_UTILS_STACK_H_
diff --git a/src/utils/threadpool.cc b/src/utils/threadpool.cc
new file mode 100644
index 0000000..8c8f4fe
--- /dev/null
+++ b/src/utils/threadpool.cc
@@ -0,0 +1,323 @@
+// Copyright 2019 The libgav1 Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "src/utils/threadpool.h"
+
+#if defined(_MSC_VER)
+#include <process.h>
+#include <windows.h>
+#else // defined(_MSC_VER)
+#include <pthread.h>
+#endif // defined(_MSC_VER)
+#if defined(__ANDROID__) || defined(__GLIBC__)
+#include <sys/types.h>
+#include <unistd.h>
+#endif
+#include <algorithm>
+#include <cassert>
+#include <cinttypes>
+#include <cstddef>
+#include <cstdint>
+#include <cstring>
+#include <new>
+#include <utility>
+
+#if defined(__ANDROID__)
+#include <chrono> // NOLINT (unapproved c++11 header)
+#endif
+
+// The glibc wrapper for the gettid() system call was added in glibc 2.30.
+// Emulate it for older versions of glibc.
+#if defined(__GLIBC_PREREQ)
+#if !__GLIBC_PREREQ(2, 30)
+
+#include <sys/syscall.h>
+
+static pid_t gettid() { return static_cast<pid_t>(syscall(SYS_gettid)); }
+
+#endif
+#endif // defined(__GLIBC_PREREQ)
+
+namespace libgav1 {
+
+#if defined(__ANDROID__)
+namespace {
+
+using Clock = std::chrono::steady_clock;
+using Duration = Clock::duration;
+constexpr Duration kBusyWaitDuration =
+ std::chrono::duration_cast<Duration>(std::chrono::duration<double>(2e-3));
+
+} // namespace
+#endif // defined(__ANDROID__)
+
+// static
+std::unique_ptr<ThreadPool> ThreadPool::Create(int num_threads) {
+ return Create(/*name_prefix=*/"", num_threads);
+}
+
+// static
+std::unique_ptr<ThreadPool> ThreadPool::Create(const char name_prefix[],
+ int num_threads) {
+ if (name_prefix == nullptr || num_threads <= 0) return nullptr;
+ std::unique_ptr<WorkerThread*[]> threads(new (std::nothrow)
+ WorkerThread*[num_threads]);
+ if (threads == nullptr) return nullptr;
+ std::unique_ptr<ThreadPool> pool(new (std::nothrow) ThreadPool(
+ name_prefix, std::move(threads), num_threads));
+ if (pool != nullptr && !pool->StartWorkers()) {
+ pool = nullptr;
+ }
+ return pool;
+}
+
+ThreadPool::ThreadPool(const char name_prefix[],
+ std::unique_ptr<WorkerThread*[]> threads,
+ int num_threads)
+ : threads_(std::move(threads)), num_threads_(num_threads) {
+ threads_[0] = nullptr;
+ assert(name_prefix != nullptr);
+ const size_t name_prefix_len =
+ std::min(strlen(name_prefix), sizeof(name_prefix_) - 1);
+ memcpy(name_prefix_, name_prefix, name_prefix_len);
+ name_prefix_[name_prefix_len] = '\0';
+}
+
+ThreadPool::~ThreadPool() { Shutdown(); }
+
+void ThreadPool::Schedule(std::function<void()> closure) {
+ LockMutex();
+ if (!queue_.GrowIfNeeded()) {
+ // queue_ is full and we can't grow it. Run |closure| directly.
+ UnlockMutex();
+ closure();
+ return;
+ }
+ queue_.Push(std::move(closure));
+ UnlockMutex();
+ SignalOne();
+}
+
+int ThreadPool::num_threads() const { return num_threads_; }
+
+// A simple implementation that mirrors the non-portable Thread. We may
+// choose to expand this in the future as a portable implementation of
+// Thread, or replace it at such a time as one is implemented.
+class ThreadPool::WorkerThread : public Allocable {
+ public:
+ // Creates and starts a thread that runs pool->WorkerFunction().
+ explicit WorkerThread(ThreadPool* pool);
+
+ // Not copyable or movable.
+ WorkerThread(const WorkerThread&) = delete;
+ WorkerThread& operator=(const WorkerThread&) = delete;
+
+ // REQUIRES: Join() must have been called if Start() was called and
+ // succeeded.
+ ~WorkerThread() = default;
+
+ LIBGAV1_MUST_USE_RESULT bool Start();
+
+ // Joins with the running thread.
+ void Join();
+
+ private:
+#if defined(_MSC_VER)
+ static unsigned int __stdcall ThreadBody(void* arg);
+#else
+ static void* ThreadBody(void* arg);
+#endif
+
+ void SetupName();
+ void Run();
+
+ ThreadPool* pool_;
+#if defined(_MSC_VER)
+ HANDLE handle_;
+#else
+ pthread_t thread_;
+#endif
+};
+
+ThreadPool::WorkerThread::WorkerThread(ThreadPool* pool) : pool_(pool) {}
+
+#if defined(_MSC_VER)
+
+bool ThreadPool::WorkerThread::Start() {
+ // Since our code calls the C run-time library (CRT), use _beginthreadex
+ // rather than CreateThread. Microsoft documentation says "If a thread
+ // created using CreateThread calls the CRT, the CRT may terminate the
+ // process in low-memory conditions."
+ uintptr_t handle = _beginthreadex(
+ /*security=*/nullptr, /*stack_size=*/0, ThreadBody, this,
+ /*initflag=*/CREATE_SUSPENDED, /*thrdaddr=*/nullptr);
+ if (handle == 0) return false;
+ handle_ = reinterpret_cast<HANDLE>(handle);
+ ResumeThread(handle_);
+ return true;
+}
+
+void ThreadPool::WorkerThread::Join() {
+ WaitForSingleObject(handle_, INFINITE);
+ CloseHandle(handle_);
+}
+
+unsigned int ThreadPool::WorkerThread::ThreadBody(void* arg) {
+ auto* thread = static_cast<WorkerThread*>(arg);
+ thread->Run();
+ return 0;
+}
+
+void ThreadPool::WorkerThread::SetupName() {
+ // Not currently supported on Windows.
+}
+
+#else // defined(_MSC_VER)
+
+bool ThreadPool::WorkerThread::Start() {
+ return pthread_create(&thread_, nullptr, ThreadBody, this) == 0;
+}
+
+void ThreadPool::WorkerThread::Join() { pthread_join(thread_, nullptr); }
+
+void* ThreadPool::WorkerThread::ThreadBody(void* arg) {
+ auto* thread = static_cast<WorkerThread*>(arg);
+ thread->Run();
+ return nullptr;
+}
+
+void ThreadPool::WorkerThread::SetupName() {
+ if (pool_->name_prefix_[0] != '\0') {
+#if defined(__APPLE__)
+ // Apple's version of pthread_setname_np takes one argument and operates on
+ // the current thread only. Also, pthread_mach_thread_np is Apple-specific.
+ // The maximum size of the |name| buffer was noted in the Chromium source
+ // code and was confirmed by experiments.
+ char name[64];
+ mach_port_t id = pthread_mach_thread_np(pthread_self());
+ int rv = snprintf(name, sizeof(name), "%s/%" PRId64, pool_->name_prefix_,
+ static_cast<int64_t>(id));
+ assert(rv >= 0);
+ rv = pthread_setname_np(name);
+ assert(rv == 0);
+ static_cast<void>(rv);
+#elif defined(__ANDROID__) || defined(__GLIBC__)
+ // If the |name| buffer is longer than 16 bytes, pthread_setname_np fails
+ // with error 34 (ERANGE) on Android.
+ char name[16];
+ pid_t id = gettid();
+ int rv = snprintf(name, sizeof(name), "%s/%" PRId64, pool_->name_prefix_,
+ static_cast<int64_t>(id));
+ assert(rv >= 0);
+ rv = pthread_setname_np(pthread_self(), name);
+ assert(rv == 0);
+ static_cast<void>(rv);
+#endif
+ }
+}
+
+#endif // defined(_MSC_VER)
+
+void ThreadPool::WorkerThread::Run() {
+ SetupName();
+ pool_->WorkerFunction();
+}
+
+bool ThreadPool::StartWorkers() {
+ if (!queue_.Init()) return false;
+ for (int i = 0; i < num_threads_; ++i) {
+ threads_[i] = new (std::nothrow) WorkerThread(this);
+ if (threads_[i] == nullptr) return false;
+ if (!threads_[i]->Start()) {
+ delete threads_[i];
+ threads_[i] = nullptr;
+ return false;
+ }
+ }
+ return true;
+}
+
+void ThreadPool::WorkerFunction() {
+ LockMutex();
+ while (true) {
+ if (queue_.Empty()) {
+ if (exit_threads_) {
+ break; // Queue is empty and exit was requested.
+ }
+#if defined(__ANDROID__)
+ // On android, if we go to a conditional wait right away, the CPU governor
+ // kicks in and starts shutting the cores down. So we do a very small busy
+ // wait to see if we get our next job within that period. This
+ // significantly improves the performance of common cases of tile parallel
+ // decoding. If we don't receive a job in the busy wait time, we then go
+ // to an actual conditional wait as usual.
+ UnlockMutex();
+ bool found_job = false;
+ const auto wait_start = Clock::now();
+ while (Clock::now() - wait_start < kBusyWaitDuration) {
+ LockMutex();
+ if (!queue_.Empty()) {
+ found_job = true;
+ break;
+ }
+ UnlockMutex();
+ }
+ // If |found_job| is true, we simply continue since we already hold the
+ // mutex and we know for sure that the |queue_| is not empty.
+ if (found_job) continue;
+ // Since |found_job_| was false, the mutex is not being held at this
+ // point.
+ LockMutex();
+ // Ensure that the queue is still empty.
+ if (!queue_.Empty()) continue;
+ if (exit_threads_) {
+ break; // Queue is empty and exit was requested.
+ }
+#endif // defined(__ANDROID__)
+ // Queue is still empty, wait for signal or broadcast.
+ Wait();
+ } else {
+ // Take a job from the queue.
+ std::function<void()> job = std::move(queue_.Front());
+ queue_.Pop();
+
+ UnlockMutex();
+ // Note that it is good practice to surround this with a try/catch so
+ // the thread pool doesn't go to hell if the job throws an exception.
+ // This is omitted here because Google3 doesn't like exceptions.
+ std::move(job)();
+ job = nullptr;
+
+ LockMutex();
+ }
+ }
+ UnlockMutex();
+}
+
+void ThreadPool::Shutdown() {
+ // Tell worker threads how to exit.
+ LockMutex();
+ exit_threads_ = true;
+ UnlockMutex();
+ SignalAll();
+
+ // Join all workers. This will block.
+ for (int i = 0; i < num_threads_; ++i) {
+ if (threads_[i] == nullptr) break;
+ threads_[i]->Join();
+ delete threads_[i];
+ }
+}
+
+} // namespace libgav1
diff --git a/src/utils/threadpool.h b/src/utils/threadpool.h
new file mode 100644
index 0000000..fac875e
--- /dev/null
+++ b/src/utils/threadpool.h
@@ -0,0 +1,167 @@
+/*
+ * Copyright 2019 The libgav1 Authors
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LIBGAV1_SRC_UTILS_THREADPOOL_H_
+#define LIBGAV1_SRC_UTILS_THREADPOOL_H_
+
+#include <functional>
+#include <memory>
+
+#if defined(__APPLE__)
+#include <TargetConditionals.h>
+#endif
+
+#if !defined(LIBGAV1_THREADPOOL_USE_STD_MUTEX)
+#if defined(__ANDROID__) || (defined(TARGET_OS_IPHONE) && TARGET_OS_IPHONE)
+#define LIBGAV1_THREADPOOL_USE_STD_MUTEX 1
+#else
+#define LIBGAV1_THREADPOOL_USE_STD_MUTEX 0
+#endif
+#endif
+
+#if LIBGAV1_THREADPOOL_USE_STD_MUTEX
+#include <condition_variable> // NOLINT (unapproved c++11 header)
+#include <mutex> // NOLINT (unapproved c++11 header)
+#else
+// absl::Mutex & absl::CondVar are significantly faster than the pthread
+// variants on platforms other than Android. iOS may deadlock on Shutdown()
+// using absl, see b/142251739.
+#include "absl/base/thread_annotations.h"
+#include "absl/synchronization/mutex.h"
+#endif
+
+#include "src/utils/compiler_attributes.h"
+#include "src/utils/executor.h"
+#include "src/utils/memory.h"
+#include "src/utils/unbounded_queue.h"
+
+namespace libgav1 {
+
+// An implementation of ThreadPool using POSIX threads (pthreads) or Windows
+// threads.
+//
+// - The pool allocates a fixed number of worker threads on instantiation.
+// - The worker threads will pick up work jobs as they arrive.
+// - If all workers are busy, work jobs are queued for later execution.
+//
+// The thread pool is shut down when the pool is destroyed.
+//
+// Example usage of the thread pool:
+// {
+// std::unique_ptr<ThreadPool> pool = ThreadPool::Create(4);
+// for (int i = 0; i < 100; ++i) { // Dispatch 100 jobs.
+// pool->Schedule([&my_data]() { MyFunction(&my_data); });
+// }
+// } // ThreadPool gets destroyed only when all jobs are done.
+class ThreadPool : public Executor, public Allocable {
+ public:
+ // Creates the thread pool with the specified number of worker threads.
+ // If num_threads is 1, the closures are run in FIFO order.
+ static std::unique_ptr<ThreadPool> Create(int num_threads);
+
+ // Like the above factory method, but also sets the name prefix for threads.
+ static std::unique_ptr<ThreadPool> Create(const char name_prefix[],
+ int num_threads);
+
+ // The destructor will shut down the thread pool and all jobs are executed.
+ // Note that after shutdown, the thread pool does not accept further jobs.
+ ~ThreadPool() override;
+
+ // Adds the specified "closure" to the queue for processing. If worker threads
+ // are available, "closure" will run immediately. Otherwise "closure" is
+ // queued for later execution.
+ //
+ // NOTE: If the internal queue is full and cannot be resized because of an
+ // out-of-memory error, the current thread runs "closure" before returning
+ // from Schedule(). For our use cases, this seems better than the
+ // alternatives:
+ // 1. Return a failure status.
+ // 2. Have the current thread wait until the queue is not full.
+ void Schedule(std::function<void()> closure) override;
+
+ int num_threads() const;
+
+ private:
+ class WorkerThread;
+
+ // Creates the thread pool with the specified number of worker threads.
+ // If num_threads is 1, the closures are run in FIFO order.
+ ThreadPool(const char name_prefix[], std::unique_ptr<WorkerThread*[]> threads,
+ int num_threads);
+
+ // Starts the worker pool.
+ LIBGAV1_MUST_USE_RESULT bool StartWorkers();
+
+ void WorkerFunction();
+
+ // Shuts down the thread pool, i.e. worker threads finish their work and
+ // pick up new jobs until the queue is empty. This call will block until
+ // the shutdown is complete.
+ //
+ // Note: If a worker encounters an empty queue after this call, it will exit.
+ // Other workers might still be running, and if the queue fills up again, the
+ // thread pool will continue to operate with a decreased number of workers.
+ // It is up to the caller to prevent adding new jobs.
+ void Shutdown();
+
+#if LIBGAV1_THREADPOOL_USE_STD_MUTEX
+
+ void LockMutex() { queue_mutex_.lock(); }
+ void UnlockMutex() { queue_mutex_.unlock(); }
+
+ void Wait() {
+ std::unique_lock<std::mutex> queue_lock(queue_mutex_, std::adopt_lock);
+ condition_.wait(queue_lock);
+ queue_lock.release();
+ }
+
+ void SignalOne() { condition_.notify_one(); }
+ void SignalAll() { condition_.notify_all(); }
+
+ std::condition_variable condition_;
+ std::mutex queue_mutex_;
+
+#else // !LIBGAV1_THREADPOOL_USE_STD_MUTEX
+
+ void LockMutex() ABSL_EXCLUSIVE_LOCK_FUNCTION() { queue_mutex_.Lock(); }
+ void UnlockMutex() ABSL_UNLOCK_FUNCTION() { queue_mutex_.Unlock(); }
+ void Wait() { condition_.Wait(&queue_mutex_); }
+ void SignalOne() { condition_.Signal(); }
+ void SignalAll() { condition_.SignalAll(); }
+
+ absl::CondVar condition_;
+ absl::Mutex queue_mutex_;
+
+#endif // LIBGAV1_THREADPOOL_USE_STD_MUTEX
+
+ UnboundedQueue<std::function<void()>> queue_ LIBGAV1_GUARDED_BY(queue_mutex_);
+ // If not all the worker threads are created, the first entry after the
+ // created worker threads is a null pointer.
+ const std::unique_ptr<WorkerThread*[]> threads_;
+
+ bool exit_threads_ LIBGAV1_GUARDED_BY(queue_mutex_) = false;
+ const int num_threads_ = 0;
+ // name_prefix_ is a C string, whose length is restricted to 16 characters,
+ // including the terminating null byte ('\0'). This restriction comes from
+ // the Linux pthread_setname_np() function.
+ char name_prefix_[16];
+};
+
+} // namespace libgav1
+
+#undef LIBGAV1_THREADPOOL_USE_STD_MUTEX
+
+#endif // LIBGAV1_SRC_UTILS_THREADPOOL_H_
diff --git a/src/utils/types.h b/src/utils/types.h
new file mode 100644
index 0000000..374f06b
--- /dev/null
+++ b/src/utils/types.h
@@ -0,0 +1,525 @@
+/*
+ * Copyright 2019 The libgav1 Authors
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LIBGAV1_SRC_UTILS_TYPES_H_
+#define LIBGAV1_SRC_UTILS_TYPES_H_
+
+#include <array>
+#include <cstdint>
+#include <memory>
+
+#include "src/utils/array_2d.h"
+#include "src/utils/constants.h"
+#include "src/utils/memory.h"
+
+namespace libgav1 {
+
+struct MotionVector : public Allocable {
+ static constexpr int kRow = 0;
+ static constexpr int kColumn = 1;
+
+ MotionVector() = default;
+ MotionVector(const MotionVector& mv) = default;
+
+ MotionVector& operator=(const MotionVector& rhs) {
+ mv32 = rhs.mv32;
+ return *this;
+ }
+
+ bool operator==(const MotionVector& rhs) const { return mv32 == rhs.mv32; }
+
+ union {
+ // Motion vectors will always fit in int16_t and using int16_t here instead
+ // of int saves significant memory since some of the frame sized structures
+ // store motion vectors.
+ int16_t mv[2];
+ // A uint32_t view into the |mv| array. Useful for cases where both the
+ // motion vectors have to be copied or compared with a single 32 bit
+ // instruction.
+ uint32_t mv32;
+ };
+};
+
+union CompoundMotionVector {
+ CompoundMotionVector() = default;
+ CompoundMotionVector(const CompoundMotionVector& mv) = default;
+
+ CompoundMotionVector& operator=(const CompoundMotionVector& rhs) {
+ mv64 = rhs.mv64;
+ return *this;
+ }
+
+ bool operator==(const CompoundMotionVector& rhs) const {
+ return mv64 == rhs.mv64;
+ }
+
+ MotionVector mv[2];
+ // A uint64_t view into the |mv| array. Useful for cases where all the motion
+ // vectors have to be copied or compared with a single 64 bit instruction.
+ uint64_t mv64;
+};
+
+// Stores the motion information used for motion field estimation.
+struct TemporalMotionField : public Allocable {
+ Array2D<MotionVector> mv;
+ Array2D<int8_t> reference_offset;
+};
+
+// MvContexts contains the contexts used to decode portions of an inter block
+// mode info to set the y_mode field in BlockParameters.
+//
+// The contexts in the struct correspond to the ZeroMvContext, RefMvContext,
+// and NewMvContext variables in the spec.
+struct MvContexts {
+ int zero_mv;
+ int reference_mv;
+ int new_mv;
+};
+
+struct PaletteModeInfo {
+ uint8_t size[kNumPlaneTypes];
+ uint16_t color[kMaxPlanes][kMaxPaletteSize];
+};
+
+// Stores the parameters used by the prediction process. The members of the
+// struct are filled in when parsing the bitstream and used when the prediction
+// is computed. The information in this struct is associated with a single
+// block.
+// While both BlockParameters and PredictionParameters store information
+// pertaining to a Block, the only difference is that BlockParameters outlives
+// the block itself (for example, some of the variables in BlockParameters are
+// used to compute the context for reading elements in the subsequent blocks).
+struct PredictionParameters : public Allocable {
+ // Restore the index in the unsorted mv stack from the least 3 bits of sorted
+ // |weight_index_stack|.
+ const MotionVector& reference_mv(int stack_index) const {
+ return ref_mv_stack[7 - (weight_index_stack[stack_index] & 7)];
+ }
+ const MotionVector& reference_mv(int stack_index, int mv_index) const {
+ return compound_ref_mv_stack[7 - (weight_index_stack[stack_index] & 7)]
+ .mv[mv_index];
+ }
+
+ void IncreaseWeight(ptrdiff_t index, int weight) {
+ weight_index_stack[index] += weight << 3;
+ }
+
+ void SetWeightIndexStackEntry(int index, int weight) {
+ weight_index_stack[index] = (weight << 3) + 7 - index;
+ }
+
+ bool use_filter_intra;
+ FilterIntraPredictor filter_intra_mode;
+ int angle_delta[kNumPlaneTypes];
+ int8_t cfl_alpha_u;
+ int8_t cfl_alpha_v;
+ int max_luma_width;
+ int max_luma_height;
+ Array2D<uint8_t> color_index_map[kNumPlaneTypes];
+ bool use_intra_block_copy;
+ InterIntraMode inter_intra_mode;
+ bool is_wedge_inter_intra;
+ int wedge_index;
+ int wedge_sign;
+ bool mask_is_inverse;
+ MotionMode motion_mode;
+ CompoundPredictionType compound_prediction_type;
+ union {
+ // |ref_mv_stack| and |compound_ref_mv_stack| are not sorted after
+ // construction. reference_mv() must be called to get the correct element.
+ MotionVector ref_mv_stack[kMaxRefMvStackSize];
+ CompoundMotionVector compound_ref_mv_stack[kMaxRefMvStackSize];
+ };
+ // The least 3 bits of |weight_index_stack| store the index information, and
+ // the other bits store the weight. The index information is actually 7 -
+ // index to make the descending order sort stable (preserves the original
+ // order for elements with the same weight). Sorting an int16_t array is much
+ // faster than sorting a struct array with weight and index stored separately.
+ int16_t weight_index_stack[kMaxRefMvStackSize];
+ // In the spec, the weights of all the nearest mvs are incremented by a bonus
+ // weight which is larger than any natural weight, and later the weights of
+ // the mvs are compared with this bonus weight to determine their contexts. We
+ // replace this procedure by introducing |nearest_mv_count|, which records the
+ // count of the nearest mvs. Since all the nearest mvs are in the beginning of
+ // the mv stack, the index of a mv in the mv stack can be compared with
+ // |nearest_mv_count| to get that mv's context.
+ int nearest_mv_count;
+ int ref_mv_count;
+ int ref_mv_index;
+ MotionVector global_mv[2];
+ int num_warp_samples;
+ int warp_estimate_candidates[kMaxLeastSquaresSamples][4];
+};
+
+// A lot of BlockParameters objects are created, so the smallest type is used
+// for each field. The ranges of some fields are documented to justify why
+// their types are large enough.
+struct BlockParameters : public Allocable {
+ BlockSize size;
+ bool skip;
+ // True means that this block will use some default settings (that
+ // correspond to compound prediction) and so most of the mode info is
+ // skipped. False means that the mode info is not skipped.
+ bool skip_mode;
+ bool is_inter;
+ bool is_explicit_compound_type; // comp_group_idx in the spec.
+ bool is_compound_type_average; // compound_idx in the spec.
+ bool is_global_mv_block;
+ bool use_predicted_segment_id; // only valid with temporal update enabled.
+ int8_t segment_id; // segment_id is in the range [0, 7].
+ PredictionMode y_mode;
+ PredictionMode uv_mode;
+ TransformSize transform_size;
+ TransformSize uv_transform_size;
+ InterpolationFilter interpolation_filter[2];
+ ReferenceFrameType reference_frame[2];
+ // The index of this array is as follows:
+ // 0 - Y plane vertical filtering.
+ // 1 - Y plane horizontal filtering.
+ // 2 - U plane (both directions).
+ // 3 - V plane (both directions).
+ uint8_t deblock_filter_level[kFrameLfCount];
+ CompoundMotionVector mv;
+ PaletteModeInfo palette_mode_info;
+ // When |Tile::split_parse_and_decode_| is true, each block gets its own
+ // instance of |prediction_parameters|. When it is false, all the blocks point
+ // to |Tile::prediction_parameters_|. This field is valid only as long as the
+ // block is *being* decoded. The lifetime and usage of this field can be
+ // better understood by following its flow in tile.cc.
+ std::unique_ptr<PredictionParameters> prediction_parameters;
+};
+
+// A five dimensional array used to store the wedge masks. The dimensions are:
+// - block_size_index (returned by GetWedgeBlockSizeIndex() in prediction.cc).
+// - flip_sign (0 or 1).
+// - wedge_index (0 to 15).
+// - each of those three dimensions is a 2d array of block_width by
+// block_height.
+using WedgeMaskArray =
+ std::array<std::array<std::array<Array2D<uint8_t>, 16>, 2>, 9>;
+
+enum GlobalMotionTransformationType : uint8_t {
+ kGlobalMotionTransformationTypeIdentity,
+ kGlobalMotionTransformationTypeTranslation,
+ kGlobalMotionTransformationTypeRotZoom,
+ kGlobalMotionTransformationTypeAffine,
+ kNumGlobalMotionTransformationTypes
+};
+
+// Global motion and warped motion parameters. See the paper for more info:
+// S. Parker, Y. Chen, D. Barker, P. de Rivaz, D. Mukherjee, "Global and locally
+// adaptive warped motion compensation in video compression", Proc. IEEE
+// International Conference on Image Processing (ICIP), pp. 275-279, Sep. 2017.
+struct GlobalMotion {
+ GlobalMotionTransformationType type;
+ int32_t params[6];
+
+ // Represent two shearing operations. Computed from |params| by SetupShear().
+ //
+ // The least significant six (= kWarpParamRoundingBits) bits are all zeros.
+ // (This means alpha, beta, gamma, and delta could be represented by a 10-bit
+ // signed integer.) The minimum value is INT16_MIN (= -32768) and the maximum
+ // value is 32704 = 0x7fc0, the largest int16_t value whose least significant
+ // six bits are all zeros.
+ //
+ // Valid warp parameters (as validated by SetupShear()) have smaller ranges.
+ // Their absolute values are less than 2^14 (= 16384). (This follows from
+ // the warpValid check at the end of Section 7.11.3.6.)
+ //
+ // NOTE: Section 7.11.3.6 of the spec allows a maximum value of 32768, which
+ // is outside the range of int16_t. When cast to int16_t, 32768 becomes
+ // -32768. This potential int16_t overflow does not matter because either
+ // 32768 or -32768 causes SetupShear() to return false,
+ int16_t alpha;
+ int16_t beta;
+ int16_t gamma;
+ int16_t delta;
+};
+
+// Loop filter parameters:
+//
+// If level[0] and level[1] are both equal to 0, the loop filter process is
+// not invoked.
+//
+// |sharpness| and |delta_enabled| are only used by the loop filter process.
+//
+// The |ref_deltas| and |mode_deltas| arrays are used not only by the loop
+// filter process but also by the reference frame update and loading
+// processes. The loop filter process uses |ref_deltas| and |mode_deltas| only
+// when |delta_enabled| is true.
+struct LoopFilter {
+ // Contains loop filter strength values in the range of [0, 63].
+ std::array<int8_t, kFrameLfCount> level;
+ // Indicates the sharpness level in the range of [0, 7].
+ int8_t sharpness;
+ // Whether the filter level depends on the mode and reference frame used to
+ // predict a block.
+ bool delta_enabled;
+ // Whether additional syntax elements were read that specify which mode and
+ // reference frame deltas are to be updated. loop_filter_delta_update field in
+ // Section 5.9.11 of the spec.
+ bool delta_update;
+ // Contains the adjustment needed for the filter level based on the chosen
+ // reference frame, in the range of [-64, 63].
+ std::array<int8_t, kNumReferenceFrameTypes> ref_deltas;
+ // Contains the adjustment needed for the filter level based on the chosen
+ // mode, in the range of [-64, 63].
+ std::array<int8_t, kLoopFilterMaxModeDeltas> mode_deltas;
+};
+
+struct Delta {
+ bool present;
+ uint8_t scale;
+ bool multi;
+};
+
+struct Cdef {
+ uint8_t damping; // damping value from the spec + (bitdepth - 8).
+ uint8_t bits;
+ // All the strength values are the values from the spec and left shifted by
+ // (bitdepth - 8).
+ uint8_t y_primary_strength[kMaxCdefStrengths];
+ uint8_t y_secondary_strength[kMaxCdefStrengths];
+ uint8_t uv_primary_strength[kMaxCdefStrengths];
+ uint8_t uv_secondary_strength[kMaxCdefStrengths];
+};
+
+struct TileInfo {
+ bool uniform_spacing;
+ int sb_rows;
+ int sb_columns;
+ int tile_count;
+ int tile_columns_log2;
+ int tile_columns;
+ int tile_column_start[kMaxTileColumns + 1];
+ // This field is not used by libgav1, but is populated for use by some
+ // hardware decoders. So it must not be removed.
+ int tile_column_width_in_superblocks[kMaxTileColumns + 1];
+ int tile_rows_log2;
+ int tile_rows;
+ int tile_row_start[kMaxTileRows + 1];
+ // This field is not used by libgav1, but is populated for use by some
+ // hardware decoders. So it must not be removed.
+ int tile_row_height_in_superblocks[kMaxTileRows + 1];
+ int16_t context_update_id;
+ uint8_t tile_size_bytes;
+};
+
+struct LoopRestoration {
+ LoopRestorationType type[kMaxPlanes];
+ int unit_size_log2[kMaxPlanes];
+};
+
+// Stores the quantization parameters of Section 5.9.12.
+struct QuantizerParameters {
+ // base_index is in the range [0, 255].
+ uint8_t base_index;
+ int8_t delta_dc[kMaxPlanes];
+ // delta_ac[kPlaneY] is always 0.
+ int8_t delta_ac[kMaxPlanes];
+ bool use_matrix;
+ // The |matrix_level| array is used only when |use_matrix| is true.
+ // matrix_level[plane] specifies the level in the quantizer matrix that
+ // should be used for decoding |plane|. The quantizer matrix has 15 levels,
+ // from 0 to 14. The range of matrix_level[plane] is [0, 15]. If
+ // matrix_level[plane] is 15, the quantizer matrix is not used.
+ int8_t matrix_level[kMaxPlanes];
+};
+
+// The corresponding segment feature constants in the AV1 spec are named
+// SEG_LVL_xxx.
+enum SegmentFeature : uint8_t {
+ kSegmentFeatureQuantizer,
+ kSegmentFeatureLoopFilterYVertical,
+ kSegmentFeatureLoopFilterYHorizontal,
+ kSegmentFeatureLoopFilterU,
+ kSegmentFeatureLoopFilterV,
+ kSegmentFeatureReferenceFrame,
+ kSegmentFeatureSkip,
+ kSegmentFeatureGlobalMv,
+ kSegmentFeatureMax
+};
+
+struct Segmentation {
+ // 5.11.14.
+ // Returns true if the feature is enabled in the segment.
+ bool FeatureActive(int segment_id, SegmentFeature feature) const {
+ return enabled && segment_id < kMaxSegments &&
+ feature_enabled[segment_id][feature];
+ }
+
+ // Returns true if the feature is signed.
+ static bool FeatureSigned(SegmentFeature feature) {
+ // Only the first five segment features are signed, so this comparison
+ // suffices.
+ return feature <= kSegmentFeatureLoopFilterV;
+ }
+
+ bool enabled;
+ bool update_map;
+ bool update_data;
+ bool temporal_update;
+ // True if the segment id will be read before the skip syntax element. False
+ // if the skip syntax element will be read first.
+ bool segment_id_pre_skip;
+ // The highest numbered segment id that has some enabled feature. Used as
+ // the upper bound for decoding segment ids.
+ int8_t last_active_segment_id;
+
+ bool feature_enabled[kMaxSegments][kSegmentFeatureMax];
+ int16_t feature_data[kMaxSegments][kSegmentFeatureMax];
+ bool lossless[kMaxSegments];
+ // Cached values of get_qindex(1, segmentId), to be consumed by
+ // Tile::ReadTransformType(). The values are in the range [0, 255].
+ uint8_t qindex[kMaxSegments];
+};
+
+// Section 6.8.20.
+// Note: In spec, film grain section uses YCbCr to denote variable names,
+// such as num_cb_points, num_cr_points. To keep it consistent with other
+// parts of code, we use YUV, i.e., num_u_points, num_v_points, etc.
+struct FilmGrainParams {
+ bool apply_grain;
+ bool update_grain;
+ bool chroma_scaling_from_luma;
+ bool overlap_flag;
+ bool clip_to_restricted_range;
+
+ uint8_t num_y_points; // [0, 14].
+ uint8_t num_u_points; // [0, 10].
+ uint8_t num_v_points; // [0, 10].
+ // Must be [0, 255]. 10/12 bit /= 4 or 16. Must be in increasing order.
+ uint8_t point_y_value[14];
+ uint8_t point_y_scaling[14];
+ uint8_t point_u_value[10];
+ uint8_t point_u_scaling[10];
+ uint8_t point_v_value[10];
+ uint8_t point_v_scaling[10];
+
+ uint8_t chroma_scaling; // [8, 11].
+ uint8_t auto_regression_coeff_lag; // [0, 3].
+ int8_t auto_regression_coeff_y[24]; // [-128, 127]
+ int8_t auto_regression_coeff_u[25]; // [-128, 127]
+ int8_t auto_regression_coeff_v[25]; // [-128, 127]
+ // Shift value: auto regression coeffs range
+ // 6: [-2, 2)
+ // 7: [-1, 1)
+ // 8: [-0.5, 0.5)
+ // 9: [-0.25, 0.25)
+ uint8_t auto_regression_shift;
+
+ uint16_t grain_seed;
+ int reference_index;
+ int grain_scale_shift;
+ // These multipliers are encoded as nonnegative values by adding 128 first.
+ // The 128 is subtracted during parsing.
+ int8_t u_multiplier; // [-128, 127]
+ int8_t u_luma_multiplier; // [-128, 127]
+ // These offsets are encoded as nonnegative values by adding 256 first. The
+ // 256 is subtracted during parsing.
+ int16_t u_offset; // [-256, 255]
+ int8_t v_multiplier; // [-128, 127]
+ int8_t v_luma_multiplier; // [-128, 127]
+ int16_t v_offset; // [-256, 255]
+};
+
+struct ObuFrameHeader {
+ uint16_t display_frame_id;
+ uint16_t current_frame_id;
+ int64_t frame_offset;
+ uint16_t expected_frame_id[kNumInterReferenceFrameTypes];
+ int32_t width;
+ int32_t height;
+ int32_t columns4x4;
+ int32_t rows4x4;
+ // The render size (render_width and render_height) is a hint to the
+ // application about the desired display size. It has no effect on the
+ // decoding process.
+ int32_t render_width;
+ int32_t render_height;
+ int32_t upscaled_width;
+ LoopRestoration loop_restoration;
+ uint32_t buffer_removal_time[kMaxOperatingPoints];
+ uint32_t frame_presentation_time;
+ // Note: global_motion[0] (for kReferenceFrameIntra) is not used.
+ std::array<GlobalMotion, kNumReferenceFrameTypes> global_motion;
+ TileInfo tile_info;
+ QuantizerParameters quantizer;
+ Segmentation segmentation;
+ bool show_existing_frame;
+ // frame_to_show is in the range [0, 7]. Only used if show_existing_frame is
+ // true.
+ int8_t frame_to_show;
+ FrameType frame_type;
+ bool show_frame;
+ bool showable_frame;
+ bool error_resilient_mode;
+ bool enable_cdf_update;
+ bool frame_size_override_flag;
+ // The order_hint syntax element in the uncompressed header. If
+ // show_existing_frame is false, the OrderHint variable in the spec is equal
+ // to this field, and so this field can be used in place of OrderHint when
+ // show_existing_frame is known to be false, such as during tile decoding.
+ uint8_t order_hint;
+ int8_t primary_reference_frame;
+ bool render_and_frame_size_different;
+ bool use_superres;
+ uint8_t superres_scale_denominator;
+ bool allow_screen_content_tools;
+ bool allow_intrabc;
+ bool frame_refs_short_signaling;
+ // A bitmask that specifies which reference frame slots will be updated with
+ // the current frame after it is decoded.
+ uint8_t refresh_frame_flags;
+ static_assert(sizeof(ObuFrameHeader::refresh_frame_flags) * 8 ==
+ kNumReferenceFrameTypes,
+ "");
+ bool found_reference;
+ int8_t force_integer_mv;
+ bool allow_high_precision_mv;
+ InterpolationFilter interpolation_filter;
+ bool is_motion_mode_switchable;
+ bool use_ref_frame_mvs;
+ bool enable_frame_end_update_cdf;
+ // True if all segments are losslessly encoded at the coded resolution.
+ bool coded_lossless;
+ // True if all segments are losslessly encoded at the upscaled resolution.
+ bool upscaled_lossless;
+ TxMode tx_mode;
+ // True means that the mode info for inter blocks contains the syntax
+ // element comp_mode that indicates whether to use single or compound
+ // prediction. False means that all inter blocks will use single prediction.
+ bool reference_mode_select;
+ // The frames to use for compound prediction when skip_mode is true.
+ ReferenceFrameType skip_mode_frame[2];
+ bool skip_mode_present;
+ bool reduced_tx_set;
+ bool allow_warped_motion;
+ Delta delta_q;
+ Delta delta_lf;
+ // A valid value of reference_frame_index[i] is in the range [0, 7]. -1
+ // indicates an invalid value.
+ int8_t reference_frame_index[kNumInterReferenceFrameTypes];
+ // The ref_order_hint[ i ] syntax element in the uncompressed header.
+ // Specifies the expected output order hint for each reference frame.
+ uint8_t reference_order_hint[kNumReferenceFrameTypes];
+ LoopFilter loop_filter;
+ Cdef cdef;
+ FilmGrainParams film_grain_params;
+};
+
+} // namespace libgav1
+#endif // LIBGAV1_SRC_UTILS_TYPES_H_
diff --git a/src/utils/unbounded_queue.h b/src/utils/unbounded_queue.h
new file mode 100644
index 0000000..fa0d303
--- /dev/null
+++ b/src/utils/unbounded_queue.h
@@ -0,0 +1,245 @@
+/*
+ * Copyright 2019 The libgav1 Authors
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LIBGAV1_SRC_UTILS_UNBOUNDED_QUEUE_H_
+#define LIBGAV1_SRC_UTILS_UNBOUNDED_QUEUE_H_
+
+#include <cassert>
+#include <cstddef>
+#include <memory>
+#include <new>
+#include <utility>
+
+#include "src/utils/compiler_attributes.h"
+#include "src/utils/memory.h"
+
+namespace libgav1 {
+
+// A FIFO queue of an unbounded capacity.
+//
+// This implementation uses the general approach used in std::deque
+// implementations. See, for example,
+// https://stackoverflow.com/questions/6292332/what-really-is-a-deque-in-stl
+//
+// It is much simpler because it just needs to support the queue interface.
+// The blocks are chained into a circular list, not managed by a "map". It
+// does not shrink the internal buffer.
+//
+// An alternative implementation approach is a resizable circular array. See,
+// for example, ResizingArrayQueue.java in https://algs4.cs.princeton.edu/code/
+// and base::circular_deque in Chromium's base/containers library.
+template <typename T>
+class UnboundedQueue {
+ public:
+ UnboundedQueue() = default;
+
+ // Move only.
+ UnboundedQueue(UnboundedQueue&& other)
+ : first_block_(other.first_block_),
+ front_(other.front_),
+ last_block_(other.last_block_),
+ back_(other.back_) {
+ other.first_block_ = nullptr;
+ other.front_ = 0;
+ other.last_block_ = nullptr;
+ other.back_ = 0;
+ }
+ UnboundedQueue& operator=(UnboundedQueue&& other) {
+ if (this != &other) {
+ Destroy();
+ first_block_ = other.first_block_;
+ front_ = other.front_;
+ last_block_ = other.last_block_;
+ back_ = other.back_;
+ other.first_block_ = nullptr;
+ other.front_ = 0;
+ other.last_block_ = nullptr;
+ other.back_ = 0;
+ }
+ return *this;
+ }
+
+ ~UnboundedQueue() { Destroy(); }
+
+ // Allocates two Blocks upfront because most access patterns require at
+ // least two Blocks. Returns false if the allocation of the Blocks failed.
+ LIBGAV1_MUST_USE_RESULT bool Init() {
+ std::unique_ptr<Block> new_block0(new (std::nothrow) Block);
+ std::unique_ptr<Block> new_block1(new (std::nothrow) Block);
+ if (new_block0 == nullptr || new_block1 == nullptr) return false;
+ first_block_ = last_block_ = new_block0.release();
+ new_block1->next = first_block_;
+ last_block_->next = new_block1.release();
+ return true;
+ }
+
+ // Checks if the queue has room for a new element. If the queue is full,
+ // tries to grow it. Returns false if the queue is full and the attempt to
+ // grow it failed.
+ //
+ // NOTE: GrowIfNeeded() must be called before each call to Push(). This
+ // inconvenient design is necessary to guarantee a successful Push() call.
+ //
+ // Push(T&& value) is often called with the argument std::move(value). The
+ // moved-from object |value| won't be usable afterwards, so it would be
+ // problematic if Push(T&& value) failed and we lost access to the original
+ // |value| object.
+ LIBGAV1_MUST_USE_RESULT bool GrowIfNeeded() {
+ assert(last_block_ != nullptr);
+ if (back_ == kBlockCapacity) {
+ if (last_block_->next == first_block_) {
+ // All Blocks are in use.
+ std::unique_ptr<Block> new_block(new (std::nothrow) Block);
+ if (new_block == nullptr) return false;
+ new_block->next = first_block_;
+ last_block_->next = new_block.release();
+ }
+ last_block_ = last_block_->next;
+ back_ = 0;
+ }
+ return true;
+ }
+
+ // Pushes the element |value| to the end of the queue. It is an error to call
+ // Push() when the queue is full.
+ void Push(const T& value) {
+ assert(last_block_ != nullptr);
+ assert(back_ < kBlockCapacity);
+ T* elements = reinterpret_cast<T*>(last_block_->buffer);
+ new (&elements[back_++]) T(value);
+ }
+
+ void Push(T&& value) {
+ assert(last_block_ != nullptr);
+ assert(back_ < kBlockCapacity);
+ T* elements = reinterpret_cast<T*>(last_block_->buffer);
+ new (&elements[back_++]) T(std::move(value));
+ }
+
+ // Returns the element at the front of the queue. It is an error to call
+ // Front() when the queue is empty.
+ T& Front() {
+ assert(!Empty());
+ T* elements = reinterpret_cast<T*>(first_block_->buffer);
+ return elements[front_];
+ }
+
+ const T& Front() const {
+ assert(!Empty());
+ T* elements = reinterpret_cast<T*>(first_block_->buffer);
+ return elements[front_];
+ }
+
+ // Removes the element at the front of the queue from the queue. It is an
+ // error to call Pop() when the queue is empty.
+ void Pop() {
+ assert(!Empty());
+ T* elements = reinterpret_cast<T*>(first_block_->buffer);
+ elements[front_++].~T();
+ if (front_ == kBlockCapacity) {
+ // The first block has become empty.
+ front_ = 0;
+ if (first_block_ == last_block_) {
+ // Only one Block is in use. Simply reset back_.
+ back_ = 0;
+ } else {
+ first_block_ = first_block_->next;
+ }
+ }
+ }
+
+ // Returns true if the queue is empty.
+ bool Empty() const { return first_block_ == last_block_ && front_ == back_; }
+
+ private:
+ // kBlockCapacity is the maximum number of elements each Block can hold.
+ // sizeof(void*) is subtracted from 2048 to account for the |next| pointer in
+ // the Block struct.
+ //
+ // In Linux x86_64, sizeof(std::function<void()>) is 32, so each Block can
+ // hold 63 std::function<void()> objects.
+ //
+ // NOTE: The corresponding value in <deque> in libc++ revision
+ // 245b5ba3448b9d3f6de5962066557e253a6bc9a4 is:
+ // template <class _ValueType, class _DiffType>
+ // struct __deque_block_size {
+ // static const _DiffType value =
+ // sizeof(_ValueType) < 256 ? 4096 / sizeof(_ValueType) : 16;
+ // };
+ //
+ // Note that 4096 / 256 = 16, so apparently this expression is intended to
+ // ensure the block size is at least 4096 bytes and each block can hold at
+ // least 16 elements.
+ static constexpr size_t kBlockCapacity =
+ (sizeof(T) < 128) ? (2048 - sizeof(void*)) / sizeof(T) : 16;
+
+ struct Block : public Allocable {
+ alignas(T) char buffer[kBlockCapacity * sizeof(T)];
+ Block* next;
+ };
+
+ void Destroy() {
+ if (first_block_ == nullptr) return; // An uninitialized queue.
+
+ // First free the unused blocks, which are located after last_block and
+ // before first_block_.
+ Block* block = last_block_->next;
+ // Cut the circular list open after last_block_.
+ last_block_->next = nullptr;
+ while (block != first_block_) {
+ Block* next = block->next;
+ delete block;
+ block = next;
+ }
+
+ // Then free the used blocks. Destruct the elements in the used blocks.
+ while (block != nullptr) {
+ const size_t begin = (block == first_block_) ? front_ : 0;
+ const size_t end = (block == last_block_) ? back_ : kBlockCapacity;
+ T* elements = reinterpret_cast<T*>(block->buffer);
+ for (size_t i = begin; i < end; ++i) {
+ elements[i].~T();
+ }
+ Block* next = block->next;
+ delete block;
+ block = next;
+ }
+ }
+
+ // Blocks are chained in a circular singly-linked list. If the list of Blocks
+ // is empty, both first_block_ and last_block_ are null pointers. If the list
+ // is nonempty, first_block_ points to the first used Block and last_block_
+ // points to the last used Block.
+ //
+ // Invariant: If Init() is called and succeeds, the queue is always nonempty.
+ // This allows all methods (except the destructor) to avoid null pointer
+ // checks for first_block_ and last_block_.
+ Block* first_block_ = nullptr;
+ // The index of the element in first_block_ to be removed by Pop().
+ size_t front_ = 0;
+ Block* last_block_ = nullptr;
+ // The index in last_block_ where the new element is inserted by Push().
+ size_t back_ = 0;
+};
+
+#if !LIBGAV1_CXX17
+template <typename T>
+constexpr size_t UnboundedQueue<T>::kBlockCapacity;
+#endif
+
+} // namespace libgav1
+
+#endif // LIBGAV1_SRC_UTILS_UNBOUNDED_QUEUE_H_
diff --git a/src/utils/vector.h b/src/utils/vector.h
new file mode 100644
index 0000000..e211240
--- /dev/null
+++ b/src/utils/vector.h
@@ -0,0 +1,352 @@
+/*
+ * Copyright 2019 The libgav1 Authors
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// libgav1::Vector implementation
+
+#ifndef LIBGAV1_SRC_UTILS_VECTOR_H_
+#define LIBGAV1_SRC_UTILS_VECTOR_H_
+
+#include <cassert>
+#include <cstddef>
+#include <cstdlib>
+#include <cstring>
+#include <iterator>
+#include <type_traits>
+#include <utility>
+
+#include "src/utils/compiler_attributes.h"
+
+namespace libgav1 {
+namespace internal {
+
+static constexpr size_t kMinVectorAllocation = 16;
+
+// Returns the smallest power of two greater or equal to 'value'.
+inline size_t NextPow2(size_t value) {
+ if (value == 0) return 0;
+ --value;
+ for (size_t i = 1; i < sizeof(size_t) * 8; i *= 2) value |= value >> i;
+ return value + 1;
+}
+
+// Returns the smallest capacity greater or equal to 'value'.
+inline size_t NextCapacity(size_t value) {
+ if (value == 0) return 0;
+ if (value <= kMinVectorAllocation) return kMinVectorAllocation;
+ return NextPow2(value);
+}
+
+//------------------------------------------------------------------------------
+// Data structure equivalent to std::vector but returning false and to its last
+// valid state on memory allocation failure.
+// std::vector with a custom allocator does not fill this need without
+// exceptions.
+
+template <typename T>
+class VectorBase {
+ public:
+ using iterator = T*;
+ using const_iterator = const T*;
+
+ VectorBase() noexcept = default;
+ // Move only.
+ VectorBase(const VectorBase&) = delete;
+ VectorBase& operator=(const VectorBase&) = delete;
+ VectorBase(VectorBase&& other) noexcept
+ : items_(other.items_),
+ capacity_(other.capacity_),
+ num_items_(other.num_items_) {
+ other.items_ = nullptr;
+ other.capacity_ = 0;
+ other.num_items_ = 0;
+ }
+ VectorBase& operator=(VectorBase&& other) noexcept {
+ if (this != &other) {
+ clear();
+ free(items_);
+ items_ = other.items_;
+ capacity_ = other.capacity_;
+ num_items_ = other.num_items_;
+ other.items_ = nullptr;
+ other.capacity_ = 0;
+ other.num_items_ = 0;
+ }
+ return *this;
+ }
+ ~VectorBase() {
+ clear();
+ free(items_);
+ }
+
+ // Reallocates just enough memory if needed so that 'new_cap' items can fit.
+ LIBGAV1_MUST_USE_RESULT bool reserve(size_t new_cap) {
+ if (capacity_ < new_cap) {
+ T* const new_items = static_cast<T*>(malloc(new_cap * sizeof(T)));
+ if (new_items == nullptr) return false;
+ if (num_items_ > 0) {
+ if (std::is_trivial<T>::value) {
+ // Cast |new_items| and |items_| to void* to avoid the GCC
+ // -Wclass-memaccess warning and additionally the
+ // bugprone-undefined-memory-manipulation clang-tidy warning. The
+ // memcpy is safe because T is a trivial type.
+ memcpy(static_cast<void*>(new_items),
+ static_cast<const void*>(items_), num_items_ * sizeof(T));
+ } else {
+ for (size_t i = 0; i < num_items_; ++i) {
+ new (&new_items[i]) T(std::move(items_[i]));
+ items_[i].~T();
+ }
+ }
+ }
+ free(items_);
+ items_ = new_items;
+ capacity_ = new_cap;
+ }
+ return true;
+ }
+
+ // Reallocates less memory so that only the existing items can fit.
+ bool shrink_to_fit() {
+ if (capacity_ == num_items_) return true;
+ if (num_items_ == 0) {
+ free(items_);
+ items_ = nullptr;
+ capacity_ = 0;
+ return true;
+ }
+ const size_t previous_capacity = capacity_;
+ capacity_ = 0; // Force reserve() to allocate and copy.
+ if (reserve(num_items_)) return true;
+ capacity_ = previous_capacity;
+ return false;
+ }
+
+ // Constructs a new item by copy constructor. May reallocate if
+ // 'resize_if_needed'.
+ LIBGAV1_MUST_USE_RESULT bool push_back(const T& value,
+ bool resize_if_needed = true) {
+ if (num_items_ >= capacity_ &&
+ (!resize_if_needed ||
+ !reserve(internal::NextCapacity(num_items_ + 1)))) {
+ return false;
+ }
+ new (&items_[num_items_]) T(value);
+ ++num_items_;
+ return true;
+ }
+
+ // Constructs a new item by copy constructor. reserve() must have been called
+ // with a sufficient capacity.
+ //
+ // WARNING: No error checking is performed.
+ void push_back_unchecked(const T& value) {
+ assert(num_items_ < capacity_);
+ new (&items_[num_items_]) T(value);
+ ++num_items_;
+ }
+
+ // Constructs a new item by move constructor. May reallocate if
+ // 'resize_if_needed'.
+ LIBGAV1_MUST_USE_RESULT bool push_back(T&& value,
+ bool resize_if_needed = true) {
+ if (num_items_ >= capacity_ &&
+ (!resize_if_needed ||
+ !reserve(internal::NextCapacity(num_items_ + 1)))) {
+ return false;
+ }
+ new (&items_[num_items_]) T(std::move(value));
+ ++num_items_;
+ return true;
+ }
+
+ // Constructs a new item by move constructor. reserve() must have been called
+ // with a sufficient capacity.
+ //
+ // WARNING: No error checking is performed.
+ void push_back_unchecked(T&& value) {
+ assert(num_items_ < capacity_);
+ new (&items_[num_items_]) T(std::move(value));
+ ++num_items_;
+ }
+
+ // Constructs a new item in place by forwarding the arguments args... to the
+ // constructor. May reallocate.
+ template <typename... Args>
+ LIBGAV1_MUST_USE_RESULT bool emplace_back(Args&&... args) {
+ if (num_items_ >= capacity_ &&
+ !reserve(internal::NextCapacity(num_items_ + 1))) {
+ return false;
+ }
+ new (&items_[num_items_]) T(std::forward<Args>(args)...);
+ ++num_items_;
+ return true;
+ }
+
+ // Destructs the last item.
+ void pop_back() {
+ --num_items_;
+ items_[num_items_].~T();
+ }
+
+ // Destructs the item at 'pos'.
+ void erase(iterator pos) { erase(pos, pos + 1); }
+
+ // Destructs the items in [first,last).
+ void erase(iterator first, iterator last) {
+ for (iterator it = first; it != last; ++it) it->~T();
+ if (last != end()) {
+ if (std::is_trivial<T>::value) {
+ // Cast |first| and |last| to void* to avoid the GCC
+ // -Wclass-memaccess warning and additionally the
+ // bugprone-undefined-memory-manipulation clang-tidy warning. The
+ // memmove is safe because T is a trivial type.
+ memmove(static_cast<void*>(first), static_cast<const void*>(last),
+ (end() - last) * sizeof(T));
+ } else {
+ for (iterator it_src = last, it_dst = first; it_src != end();
+ ++it_src, ++it_dst) {
+ new (it_dst) T(std::move(*it_src));
+ it_src->~T();
+ }
+ }
+ }
+ num_items_ -= std::distance(first, last);
+ }
+
+ // Destructs all the items.
+ void clear() { erase(begin(), end()); }
+
+ // Destroys (including deallocating) all the items.
+ void reset() {
+ clear();
+ if (!shrink_to_fit()) assert(false);
+ }
+
+ // Accessors
+ bool empty() const { return (num_items_ == 0); }
+ size_t size() const { return num_items_; }
+ size_t capacity() const { return capacity_; }
+
+ T* data() { return items_; }
+ T& front() { return items_[0]; }
+ T& back() { return items_[num_items_ - 1]; }
+ T& operator[](size_t i) { return items_[i]; }
+ T& at(size_t i) { return items_[i]; }
+ const T* data() const { return items_; }
+ const T& front() const { return items_[0]; }
+ const T& back() const { return items_[num_items_ - 1]; }
+ const T& operator[](size_t i) const { return items_[i]; }
+ const T& at(size_t i) const { return items_[i]; }
+
+ iterator begin() { return &items_[0]; }
+ const_iterator begin() const { return &items_[0]; }
+ iterator end() { return &items_[num_items_]; }
+ const_iterator end() const { return &items_[num_items_]; }
+
+ void swap(VectorBase& b) {
+ // Although not necessary here, adding "using std::swap;" and then calling
+ // swap() without namespace qualification is recommended. See Effective
+ // C++, Item 25.
+ using std::swap;
+ swap(items_, b.items_);
+ swap(capacity_, b.capacity_);
+ swap(num_items_, b.num_items_);
+ }
+
+ protected:
+ T* items_ = nullptr;
+ size_t capacity_ = 0;
+ size_t num_items_ = 0;
+};
+
+} // namespace internal
+
+//------------------------------------------------------------------------------
+
+// Vector class that does *NOT* construct the content on resize().
+// Should be reserved to plain old data.
+template <typename T>
+class VectorNoCtor : public internal::VectorBase<T> {
+ public:
+ // Creates or destructs items so that 'new_num_items' exist.
+ // Allocated memory grows every power-of-two items.
+ LIBGAV1_MUST_USE_RESULT bool resize(size_t new_num_items) {
+ using super = internal::VectorBase<T>;
+ if (super::num_items_ < new_num_items) {
+ if (super::capacity_ < new_num_items) {
+ if (!super::reserve(internal::NextCapacity(new_num_items))) {
+ return false;
+ }
+ }
+ super::num_items_ = new_num_items;
+ } else {
+ while (super::num_items_ > new_num_items) {
+ --super::num_items_;
+ super::items_[super::num_items_].~T();
+ }
+ }
+ return true;
+ }
+};
+
+// This generic vector class will call the constructors.
+template <typename T>
+class Vector : public internal::VectorBase<T> {
+ public:
+ // Constructs or destructs items so that 'new_num_items' exist.
+ // Allocated memory grows every power-of-two items.
+ LIBGAV1_MUST_USE_RESULT bool resize(size_t new_num_items) {
+ using super = internal::VectorBase<T>;
+ if (super::num_items_ < new_num_items) {
+ if (super::capacity_ < new_num_items) {
+ if (!super::reserve(internal::NextCapacity(new_num_items))) {
+ return false;
+ }
+ }
+ while (super::num_items_ < new_num_items) {
+ new (&super::items_[super::num_items_]) T();
+ ++super::num_items_;
+ }
+ } else {
+ while (super::num_items_ > new_num_items) {
+ --super::num_items_;
+ super::items_[super::num_items_].~T();
+ }
+ }
+ return true;
+ }
+};
+
+//------------------------------------------------------------------------------
+
+// Define non-member swap() functions in the namespace in which VectorNoCtor
+// and Vector are implemented. See Effective C++, Item 25.
+
+template <typename T>
+void swap(VectorNoCtor<T>& a, VectorNoCtor<T>& b) {
+ a.swap(b);
+}
+
+template <typename T>
+void swap(Vector<T>& a, Vector<T>& b) {
+ a.swap(b);
+}
+
+//------------------------------------------------------------------------------
+
+} // namespace libgav1
+
+#endif // LIBGAV1_SRC_UTILS_VECTOR_H_