aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBoyuan Yang <byang@debian.org>2021-11-07 08:50:20 -0500
committerBoyuan Yang <byang@debian.org>2021-11-07 08:50:20 -0500
commit513fcf1cd0dca1a6cbef9ff6e38e22237e75ba44 (patch)
tree249280ac94eb2b871de89cd1b166fff4ee00ab09
parent3c21ceac2f6a5adfab07d3d458880561543d0a31 (diff)
parent320ef65362608ee1148c299d8d5d7618af34e470 (diff)
downloadlibgav1-513fcf1cd0dca1a6cbef9ff6e38e22237e75ba44.tar.gz
libgav1-513fcf1cd0dca1a6cbef9ff6e38e22237e75ba44.tar.bz2
libgav1-513fcf1cd0dca1a6cbef9ff6e38e22237e75ba44.zip
Update upstream source from tag 'upstream/0.17.0'
Update to upstream version '0.17.0' with Debian dir 5b612b6a2d67788b0c85bac59e50edc1545bfd7e
-rw-r--r--.cmake-format.py126
-rw-r--r--CMakeLists.txt17
-rw-r--r--README.md15
-rw-r--r--cmake/libgav1_build_definitions.cmake11
-rw-r--r--cmake/libgav1_cpu_detection.cmake3
-rw-r--r--cmake/libgav1_flags.cmake13
-rw-r--r--cmake/libgav1_targets.cmake28
-rw-r--r--cmake/toolchains/aarch64-linux-gnu.cmake9
-rw-r--r--cmake/toolchains/arm-linux-gnueabihf.cmake9
-rw-r--r--examples/file_reader_factory_test.cc114
-rw-r--r--examples/file_reader_test.cc126
-rw-r--r--examples/file_reader_test_common.cc43
-rw-r--r--examples/file_reader_test_common.h171
-rw-r--r--examples/file_writer_test.cc495
-rw-r--r--src/buffer_pool.h7
-rw-r--r--src/buffer_pool_test.cc305
-rw-r--r--src/c_decoder_test.c440
-rw-r--r--src/c_version_test.c102
-rw-r--r--src/decoder_buffer_test.cc38
-rw-r--r--src/decoder_impl.cc44
-rw-r--r--src/decoder_test.cc352
-rw-r--r--src/dsp/arm/average_blend_neon.cc39
-rw-r--r--src/dsp/arm/cdef_neon.cc268
-rw-r--r--src/dsp/arm/cdef_neon.h3
-rw-r--r--src/dsp/arm/common_neon.h385
-rw-r--r--src/dsp/arm/common_neon_test.cc208
-rw-r--r--src/dsp/arm/convolve_10bit_neon.cc3008
-rw-r--r--src/dsp/arm/convolve_neon.cc451
-rw-r--r--src/dsp/arm/convolve_neon.h17
-rw-r--r--src/dsp/arm/distance_weighted_blend_neon.cc38
-rw-r--r--src/dsp/arm/film_grain_neon.cc739
-rw-r--r--src/dsp/arm/film_grain_neon.h6
-rw-r--r--src/dsp/arm/intra_edge_neon.cc3
-rw-r--r--src/dsp/arm/intrapred_cfl_neon.cc48
-rw-r--r--src/dsp/arm/intrapred_directional_neon.cc901
-rw-r--r--src/dsp/arm/intrapred_directional_neon.h4
-rw-r--r--src/dsp/arm/intrapred_filter_neon.cc144
-rw-r--r--src/dsp/arm/intrapred_filter_neon.h2
-rw-r--r--src/dsp/arm/intrapred_neon.cc579
-rw-r--r--src/dsp/arm/intrapred_neon.h19
-rw-r--r--src/dsp/arm/intrapred_smooth_neon.cc741
-rw-r--r--src/dsp/arm/intrapred_smooth_neon.h125
-rw-r--r--src/dsp/arm/inverse_transform_10bit_neon.cc728
-rw-r--r--src/dsp/arm/inverse_transform_neon.cc235
-rw-r--r--src/dsp/arm/inverse_transform_neon.h51
-rw-r--r--src/dsp/arm/loop_filter_neon.cc1322
-rw-r--r--src/dsp/arm/loop_filter_neon.h17
-rw-r--r--src/dsp/arm/loop_restoration_10bit_neon.cc2652
-rw-r--r--src/dsp/arm/loop_restoration_neon.cc687
-rw-r--r--src/dsp/arm/loop_restoration_neon.h4
-rw-r--r--src/dsp/arm/mask_blend_neon.cc352
-rw-r--r--src/dsp/arm/mask_blend_neon.h7
-rw-r--r--src/dsp/arm/motion_field_projection_neon.cc21
-rw-r--r--src/dsp/arm/motion_vector_search_neon.cc81
-rw-r--r--src/dsp/arm/obmc_neon.cc688
-rw-r--r--src/dsp/arm/obmc_neon.h3
-rw-r--r--src/dsp/arm/super_res_neon.cc29
-rw-r--r--src/dsp/arm/warp_neon.cc479
-rw-r--r--src/dsp/arm/warp_neon.h3
-rw-r--r--src/dsp/arm/weight_mask_neon.cc289
-rw-r--r--src/dsp/arm/weight_mask_neon.h18
-rw-r--r--src/dsp/average_blend.cc5
-rw-r--r--src/dsp/average_blend_test.cc148
-rw-r--r--src/dsp/cdef.cc15
-rw-r--r--src/dsp/cdef_test.cc58
-rw-r--r--src/dsp/convolve.cc131
-rw-r--r--src/dsp/convolve.inc3
-rw-r--r--src/dsp/convolve_test.cc1610
-rw-r--r--src/dsp/distance_weighted_blend.cc6
-rw-r--r--src/dsp/distance_weighted_blend_test.cc152
-rw-r--r--src/dsp/dsp.cc2
-rw-r--r--src/dsp/dsp.h169
-rw-r--r--src/dsp/dsp_test.cc40
-rw-r--r--src/dsp/film_grain.cc284
-rw-r--r--src/dsp/film_grain_common.h7
-rw-r--r--src/dsp/intra_edge_test.cc28
-rw-r--r--src/dsp/intrapred.cc26
-rw-r--r--src/dsp/intrapred_cfl.cc5
-rw-r--r--src/dsp/intrapred_cfl_test.cc5
-rw-r--r--src/dsp/intrapred_directional.cc31
-rw-r--r--src/dsp/intrapred_directional_test.cc26
-rw-r--r--src/dsp/intrapred_filter.cc6
-rw-r--r--src/dsp/intrapred_filter_test.cc5
-rw-r--r--src/dsp/intrapred_smooth.cc31
-rw-r--r--src/dsp/inverse_transform.cc413
-rw-r--r--src/dsp/inverse_transform_test.cc129
-rw-r--r--src/dsp/libgav1_dsp.cmake3
-rw-r--r--src/dsp/loop_filter.cc3
-rw-r--r--src/dsp/loop_filter_test.cc47
-rw-r--r--src/dsp/loop_restoration.cc22
-rw-r--r--src/dsp/loop_restoration_test.cc28
-rw-r--r--src/dsp/mask_blend.cc20
-rw-r--r--src/dsp/mask_blend_test.cc408
-rw-r--r--src/dsp/motion_field_projection.cc36
-rw-r--r--src/dsp/motion_vector_search.cc80
-rw-r--r--src/dsp/obmc.cc20
-rw-r--r--src/dsp/obmc_test.cc64
-rw-r--r--src/dsp/smooth_weights.inc35
-rw-r--r--src/dsp/super_res.cc7
-rw-r--r--src/dsp/warp.cc14
-rw-r--r--src/dsp/warp_test.cc5
-rw-r--r--src/dsp/weight_mask.cc5
-rw-r--r--src/dsp/x86/average_blend_sse4.cc36
-rw-r--r--src/dsp/x86/cdef_avx2.cc30
-rw-r--r--src/dsp/x86/cdef_sse4.cc29
-rw-r--r--src/dsp/x86/common_avx2_test.cc67
-rw-r--r--src/dsp/x86/common_sse4_test.cc64
-rw-r--r--src/dsp/x86/convolve_avx2.cc127
-rw-r--r--src/dsp/x86/convolve_sse4.cc284
-rw-r--r--src/dsp/x86/distance_weighted_blend_sse4.cc52
-rw-r--r--src/dsp/x86/film_grain_sse4.cc106
-rw-r--r--src/dsp/x86/intra_edge_sse4.cc9
-rw-r--r--src/dsp/x86/intrapred_cfl_sse4.cc53
-rw-r--r--src/dsp/x86/intrapred_filter_sse4.cc23
-rw-r--r--src/dsp/x86/intrapred_smooth_sse4.cc378
-rw-r--r--src/dsp/x86/intrapred_sse4.cc202
-rw-r--r--src/dsp/x86/inverse_transform_sse4.cc191
-rw-r--r--src/dsp/x86/inverse_transform_sse4.h52
-rw-r--r--src/dsp/x86/loop_restoration_10bit_avx2.cc22
-rw-r--r--src/dsp/x86/loop_restoration_10bit_sse4.cc22
-rw-r--r--src/dsp/x86/loop_restoration_avx2.cc22
-rw-r--r--src/dsp/x86/loop_restoration_sse4.cc22
-rw-r--r--src/dsp/x86/mask_blend_sse4.cc159
-rw-r--r--src/dsp/x86/motion_field_projection_sse4.cc21
-rw-r--r--src/dsp/x86/motion_vector_search_sse4.cc65
-rw-r--r--src/dsp/x86/obmc_sse4.cc142
-rw-r--r--src/dsp/x86/super_res_sse4.cc12
-rw-r--r--src/dsp/x86/warp_sse4.cc66
-rw-r--r--src/dsp/x86/weight_mask_sse4.cc198
-rw-r--r--src/film_grain.cc53
-rw-r--r--src/film_grain.h12
-rw-r--r--src/film_grain_test.cc2360
-rw-r--r--src/frame_scratch_buffer.h22
-rw-r--r--src/gav1/decoder_buffer.h19
-rw-r--r--src/gav1/version.h4
-rw-r--r--src/internal_frame_buffer_list_test.cc158
-rw-r--r--src/loop_restoration_info.cc6
-rw-r--r--src/loop_restoration_info.h8
-rw-r--r--src/motion_vector.cc43
-rw-r--r--src/motion_vector.h6
-rw-r--r--src/obu_parser.cc179
-rw-r--r--src/obu_parser.h1
-rw-r--r--src/obu_parser_test.cc2675
-rw-r--r--src/post_filter.h56
-rw-r--r--src/post_filter/cdef.cc208
-rw-r--r--src/post_filter/deblock.cc114
-rw-r--r--src/post_filter/loop_restoration.cc6
-rw-r--r--src/post_filter/post_filter.cc273
-rw-r--r--src/post_filter/super_res.cc7
-rw-r--r--src/post_filter_test.cc956
-rw-r--r--src/prediction_mask.h3
-rw-r--r--src/prediction_mask_test.cc214
-rw-r--r--src/quantizer.h1
-rw-r--r--src/quantizer_test.cc168
-rw-r--r--src/reconstruction.cc64
-rw-r--r--src/reconstruction_test.cc294
-rw-r--r--src/residual_buffer_pool_test.cc201
-rw-r--r--src/scan_test.cc85
-rw-r--r--src/symbol_decoder_context_test.cc264
-rw-r--r--src/threading_strategy_test.cc281
-rw-r--r--src/tile.h89
-rw-r--r--src/tile/bitstream/mode_info.cc382
-rw-r--r--src/tile/bitstream/palette.cc64
-rw-r--r--src/tile/bitstream/transform_size.cc12
-rw-r--r--src/tile/prediction.cc54
-rw-r--r--src/tile/tile.cc142
-rw-r--r--src/tile_scratch_buffer.h15
-rw-r--r--src/utils/array_2d_test.cc248
-rw-r--r--src/utils/block_parameters_holder_test.cc76
-rw-r--r--src/utils/blocking_counter_test.cc127
-rw-r--r--src/utils/common.h26
-rw-r--r--src/utils/common_test.cc604
-rw-r--r--src/utils/compiler_attributes.h2
-rw-r--r--src/utils/constants.h5
-rw-r--r--src/utils/cpu_test.cc248
-rw-r--r--src/utils/dynamic_buffer.h1
-rw-r--r--src/utils/entropy_decoder.cc89
-rw-r--r--src/utils/entropy_decoder.h38
-rw-r--r--src/utils/entropy_decoder_test.cc1259
-rw-r--r--src/utils/entropy_decoder_test_data.inc8443
-rw-r--r--src/utils/memory.h14
-rw-r--r--src/utils/memory_test.cc184
-rw-r--r--src/utils/queue.h1
-rw-r--r--src/utils/queue_test.cc86
-rw-r--r--src/utils/raw_bit_reader.h2
-rw-r--r--src/utils/raw_bit_reader_test.cc580
-rw-r--r--src/utils/reference_info.h1
-rw-r--r--src/utils/segmentation_map_test.cc120
-rw-r--r--src/utils/segmentation_test.cc40
-rw-r--r--src/utils/stack_test.cc74
-rw-r--r--src/utils/threadpool_test.cc133
-rw-r--r--src/utils/types.h76
-rw-r--r--src/utils/unbounded_queue_test.cc163
-rw-r--r--src/utils/vector.h1
-rw-r--r--src/utils/vector_test.cc234
-rw-r--r--src/version_test.cc66
-rw-r--r--src/warp_prediction.cc20
-rw-r--r--src/warp_prediction_test.cc246
-rw-r--r--src/yuv_buffer.cc55
-rw-r--r--tests/block_utils.cc12
-rw-r--r--tests/data/five-frames.ivfbin0 -> 883 bytes
-rw-r--r--tests/data/ivf-header-and-truncated-frame-headerbin0 -> 36 bytes
-rw-r--r--tests/data/ivf-header-onlybin0 -> 32 bytes
-rw-r--r--tests/data/ivf-signature-only1
-rw-r--r--tests/data/one-frame-large-timestamp.ivfbin0 -> 608 bytes
-rw-r--r--tests/data/one-frame-truncated.ivfbin0 -> 100 bytes
-rw-r--r--tests/data/one-frame.ivfbin0 -> 608 bytes
-rw-r--r--tests/libgav1_tests.cmake755
-rw-r--r--tests/utils.cc77
-rw-r--r--tests/utils.h19
210 files changed, 42013 insertions, 6060 deletions
diff --git a/.cmake-format.py b/.cmake-format.py
new file mode 100644
index 0000000..90499e5
--- /dev/null
+++ b/.cmake-format.py
@@ -0,0 +1,126 @@
+# Generated with cmake-format 0.5.4
+# --------------------------
+# General Formatting Options
+# --------------------------
+# How wide to allow formatted cmake files
+line_width = 80
+
+# How many spaces to tab for indent
+tab_size = 2
+
+# If arglists are longer than this, break them always
+max_subargs_per_line = 10
+
+# If true, separate flow control names from their parentheses with a space
+separate_ctrl_name_with_space = False
+
+# If true, separate function names from parentheses with a space
+separate_fn_name_with_space = False
+
+# If a statement is wrapped to more than one line, than dangle the closing
+# parenthesis on it's own line
+dangle_parens = False
+
+# If the statement spelling length (including space and parenthesis is larger
+# than the tab width by more than this amoung, then force reject un-nested
+# layouts.
+max_prefix_chars = 2
+
+# If a candidate layout is wrapped horizontally but it exceeds this many lines,
+# then reject the layout.
+max_lines_hwrap = 2
+
+# What style line endings to use in the output.
+line_ending = 'unix'
+
+# Format command names consistently as 'lower' or 'upper' case
+command_case = 'lower'
+
+# Format keywords consistently as 'lower' or 'upper' case
+keyword_case = 'unchanged'
+
+# Specify structure for custom cmake functions
+additional_commands = {
+ "foo": {
+ "flags": [
+ "BAR",
+ "BAZ"
+ ],
+ "kwargs": {
+ "HEADERS": "*",
+ "SOURCES": "*",
+ "DEPENDS": "*"
+ }
+ }
+}
+
+# A list of command names which should always be wrapped
+always_wrap = []
+
+# Specify the order of wrapping algorithms during successive reflow attempts
+algorithm_order = [0, 1, 2, 3, 4]
+
+# If true, the argument lists which are known to be sortable will be sorted
+# lexicographicall
+enable_sort = False
+
+# If true, the parsers may infer whether or not an argument list is sortable
+# (without annotation).
+autosort = False
+
+# If a comment line starts with at least this many consecutive hash characters,
+# then don't lstrip() them off. This allows for lazy hash rulers where the first
+# hash char is not separated by space
+hashruler_min_length = 10
+
+# A dictionary containing any per-command configuration overrides. Currently
+# only `command_case` is supported.
+per_command = {}
+
+
+# --------------------------
+# Comment Formatting Options
+# --------------------------
+# What character to use for bulleted lists
+bullet_char = '*'
+
+# What character to use as punctuation after numerals in an enumerated list
+enum_char = '.'
+
+# enable comment markup parsing and reflow
+enable_markup = True
+
+# If comment markup is enabled, don't reflow the first comment block in each
+# listfile. Use this to preserve formatting of your copyright/license
+# statements.
+first_comment_is_literal = True
+
+# If comment markup is enabled, don't reflow any comment block which matches
+# this (regex) pattern. Default is `None` (disabled).
+literal_comment_pattern = None
+
+# Regular expression to match preformat fences in comments
+# default=r'^\s*([`~]{3}[`~]*)(.*)$'
+fence_pattern = '^\\s*([`~]{3}[`~]*)(.*)$'
+
+# Regular expression to match rulers in comments
+# default=r'^\s*[^\w\s]{3}.*[^\w\s]{3}$'
+ruler_pattern = '^\\s*[^\\w\\s]{3}.*[^\\w\\s]{3}$'
+
+# If true, then insert a space between the first hash char and remaining hash
+# chars in a hash ruler, and normalize it's length to fill the column
+canonicalize_hashrulers = True
+
+
+# ---------------------------------
+# Miscellaneous Options
+# ---------------------------------
+# If true, emit the unicode byte-order mark (BOM) at the start of the file
+emit_byteorder_mark = False
+
+# Specify the encoding of the input file. Defaults to utf-8.
+input_encoding = 'utf-8'
+
+# Specify the encoding of the output file. Defaults to utf-8. Note that cmake
+# only claims to support utf-8 so be careful when using anything else
+output_encoding = 'utf-8'
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 5e9e17a..4029de1 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -18,8 +18,10 @@ cmake_minimum_required(VERSION 3.7.1 FATAL_ERROR)
# libgav1 requires C++11.
set(CMAKE_CXX_STANDARD 11)
set(ABSL_CXX_STANDARD 11)
+# libgav1 requires C99.
+set(CMAKE_C_STANDARD 99)
-project(libgav1 CXX)
+project(libgav1 CXX C)
set(libgav1_root "${CMAKE_CURRENT_SOURCE_DIR}")
set(libgav1_build "${CMAKE_BINARY_DIR}")
@@ -56,6 +58,12 @@ if(NOT CMAKE_BUILD_TYPE)
set(CMAKE_BUILD_TYPE Release)
endif()
+# Enable generators like Xcode and Visual Studio to place projects in folders.
+get_property(use_folders_is_set GLOBAL PROPERTY USE_FOLDERS SET)
+if(NOT use_folders_is_set)
+ set_property(GLOBAL PROPERTY USE_FOLDERS TRUE)
+endif()
+
include(FindThreads)
include("${libgav1_examples}/libgav1_examples.cmake")
@@ -126,6 +134,7 @@ if(NOT EXISTS "${libgav1_abseil}")
" clone \\\n"
" https://github.com/abseil/abseil-cpp.git third_party/abseil-cpp")
endif()
+set(ABSL_PROPAGATE_CXX_STD ON)
add_subdirectory("${libgav1_abseil}" "${libgav1_abseil_build}" EXCLUDE_FROM_ALL)
libgav1_reset_target_lists()
@@ -136,6 +145,12 @@ libgav1_add_tests_targets()
libgav1_add_utils_targets()
libgav1_setup_install_target()
+if(LIBGAV1_ENABLE_TESTS)
+ # include(CTest) or -DBUILD_TESTING=1 aren't used to avoid enabling abseil
+ # tests.
+ enable_testing()
+endif()
+
if(LIBGAV1_VERBOSE)
libgav1_dump_cmake_flag_variables()
libgav1_dump_tracked_configuration_variables()
diff --git a/README.md b/README.md
index 3155970..6744291 100644
--- a/README.md
+++ b/README.md
@@ -92,6 +92,21 @@ For additional options see:
options. Note: tools like [FFmpeg](https://ffmpeg.org) can be used to
convert other container formats to IVF.
+* Unit tests are built when `LIBGAV1_ENABLE_TESTS` is set to `1`. The binaries
+ can be invoked directly or with
+ [`ctest`](https://cmake.org/cmake/help/latest/manual/ctest.1.html).
+
+ * The test input location can be given by setting the
+ `LIBGAV1_TEST_DATA_PATH` environment variable; it defaults to
+ `<libgav1_src>/tests/data`, where `<libgav1_src>` is `/data/local/tmp`
+ on Android platforms or the source directory configured with cmake
+ otherwise.
+
+ * Output is written to the value of the `TMPDIR` or `TEMP` environment
+ variables in that order if set, otherwise `/data/local/tmp` on Android
+ platforms, the value of `LIBGAV1_FLAGS_TMPDIR` if defined during
+ compilation or the current directory if not.
+
## Development
### Contributing
diff --git a/cmake/libgav1_build_definitions.cmake b/cmake/libgav1_build_definitions.cmake
index fc83490..0d00bb6 100644
--- a/cmake/libgav1_build_definitions.cmake
+++ b/cmake/libgav1_build_definitions.cmake
@@ -32,7 +32,7 @@ macro(libgav1_set_build_definitions)
#
# We set LIBGAV1_SOVERSION = [c-a].a.r
set(LT_CURRENT 0)
- set(LT_REVISION 0)
+ set(LT_REVISION 1)
set(LT_AGE 0)
math(EXPR LIBGAV1_SOVERSION_MAJOR "${LT_CURRENT} - ${LT_AGE}")
set(LIBGAV1_SOVERSION "${LIBGAV1_SOVERSION_MAJOR}.${LT_AGE}.${LT_REVISION}")
@@ -53,7 +53,8 @@ macro(libgav1_set_build_definitions)
"LIBGAV1_FLAGS_TMPDIR=\"/tmp\"")
if(MSVC OR WIN32)
- list(APPEND libgav1_defines "_CRT_SECURE_NO_DEPRECATE=1" "NOMINMAX=1")
+ list(APPEND libgav1_defines "_CRT_SECURE_NO_WARNINGS" "NOMINMAX"
+ "_SCL_SECURE_NO_WARNINGS")
endif()
if(ANDROID)
@@ -159,7 +160,7 @@ macro(libgav1_set_build_definitions)
# Source file names ending in these suffixes will have the appropriate
# compiler flags added to their compile commands to enable intrinsics.
- set(libgav1_avx2_source_file_suffix "avx2.cc")
- set(libgav1_neon_source_file_suffix "neon.cc")
- set(libgav1_sse4_source_file_suffix "sse4.cc")
+ set(libgav1_avx2_source_file_suffix "avx2(_test)?.cc")
+ set(libgav1_neon_source_file_suffix "neon(_test)?.cc")
+ set(libgav1_sse4_source_file_suffix "sse4(_test)?.cc")
endmacro()
diff --git a/cmake/libgav1_cpu_detection.cmake b/cmake/libgav1_cpu_detection.cmake
index e17e27c..d79b83a 100644
--- a/cmake/libgav1_cpu_detection.cmake
+++ b/cmake/libgav1_cpu_detection.cmake
@@ -33,17 +33,20 @@ macro(libgav1_optimization_detect)
list(APPEND libgav1_defines "LIBGAV1_ENABLE_AVX2=1")
else()
list(APPEND libgav1_defines "LIBGAV1_ENABLE_AVX2=0")
+ set(libgav1_have_avx2 OFF)
endif()
if(libgav1_have_neon AND LIBGAV1_ENABLE_NEON)
list(APPEND libgav1_defines "LIBGAV1_ENABLE_NEON=1")
else()
list(APPEND libgav1_defines "LIBGAV1_ENABLE_NEON=0")
+ set(libgav1_have_neon, OFF)
endif()
if(libgav1_have_sse4 AND LIBGAV1_ENABLE_SSE4_1)
list(APPEND libgav1_defines "LIBGAV1_ENABLE_SSE4_1=1")
else()
list(APPEND libgav1_defines "LIBGAV1_ENABLE_SSE4_1=0")
+ set(libgav1_have_sse4 OFF)
endif()
endmacro()
diff --git a/cmake/libgav1_flags.cmake b/cmake/libgav1_flags.cmake
index a5408e2..4f2c4fd 100644
--- a/cmake/libgav1_flags.cmake
+++ b/cmake/libgav1_flags.cmake
@@ -259,5 +259,18 @@ macro(libgav1_set_test_flags)
if(LIBGAV1_ENABLE_TESTS)
set(LIBGAV1_TEST_CXX_FLAGS ${LIBGAV1_CXX_FLAGS})
list(FILTER LIBGAV1_TEST_CXX_FLAGS EXCLUDE REGEX "-Wframe-larger-than")
+
+ if(NOT CMAKE_CXX_COMPILER_ID STREQUAL CMAKE_C_COMPILER_ID)
+ message(
+ FATAL_ERROR
+ "C/CXX compiler mismatch (${CMAKE_C_COMPILER_ID} vs"
+ " ${CMAKE_CXX_COMPILER_ID})! Compiler flags are only tested using"
+ " CMAKE_CXX_COMPILER, rerun cmake with CMAKE_C_COMPILER set to the"
+ " C compiler from the same package as CMAKE_CXX_COMPILER to ensure"
+ " the build completes successfully.")
+ endif()
+ set(LIBGAV1_TEST_C_FLAGS ${LIBGAV1_TEST_CXX_FLAGS})
+ list(FILTER LIBGAV1_TEST_C_FLAGS EXCLUDE REGEX
+ "-fvisibility-inlines-hidden")
endif()
endmacro()
diff --git a/cmake/libgav1_targets.cmake b/cmake/libgav1_targets.cmake
index 997f8bd..f8326a9 100644
--- a/cmake/libgav1_targets.cmake
+++ b/cmake/libgav1_targets.cmake
@@ -17,6 +17,14 @@ if(LIBGAV1_CMAKE_GAV1_TARGETS_CMAKE_)
endif() # LIBGAV1_CMAKE_GAV1_TARGETS_CMAKE_
set(LIBGAV1_CMAKE_GAV1_TARGETS_CMAKE_ 1)
+if(LIBGAV1_IDE_FOLDER)
+ set(LIBGAV1_EXAMPLES_IDE_FOLDER "${LIBGAV1_IDE_FOLDER}/examples")
+ set(LIBGAV1_TESTS_IDE_FOLDER "${LIBGAV1_IDE_FOLDER}/tests")
+else()
+ set(LIBGAV1_EXAMPLES_IDE_FOLDER "libgav1_examples")
+ set(LIBGAV1_TESTS_IDE_FOLDER "libgav1_tests")
+endif()
+
# Resets list variables used to track libgav1 targets.
macro(libgav1_reset_target_lists)
unset(libgav1_targets)
@@ -100,6 +108,13 @@ macro(libgav1_add_executable)
endif()
add_executable(${exe_NAME} ${exe_SOURCES})
+ if(exe_TEST)
+ add_test(NAME ${exe_NAME} COMMAND ${exe_NAME})
+ set_property(TARGET ${exe_NAME} PROPERTY FOLDER ${LIBGAV1_TESTS_IDE_FOLDER})
+ else()
+ set_property(TARGET ${exe_NAME}
+ PROPERTY FOLDER ${LIBGAV1_EXAMPLES_IDE_FOLDER})
+ endif()
if(exe_OUTPUT_NAME)
set_target_properties(${exe_NAME} PROPERTIES OUTPUT_NAME ${exe_OUTPUT_NAME})
@@ -366,4 +381,17 @@ macro(libgav1_add_library)
libgav1_create_dummy_source_file(TARGET ${lib_NAME} BASENAME ${lib_NAME})
endif()
endif()
+
+ if(lib_TEST)
+ set_property(TARGET ${lib_NAME} PROPERTY FOLDER ${LIBGAV1_TESTS_IDE_FOLDER})
+ else()
+ set(sources_list ${lib_SOURCES})
+ list(FILTER sources_list INCLUDE REGEX examples)
+ if(sources_list)
+ set_property(TARGET ${lib_NAME}
+ PROPERTY FOLDER ${LIBGAV1_EXAMPLES_IDE_FOLDER})
+ else()
+ set_property(TARGET ${lib_NAME} PROPERTY FOLDER ${LIBGAV1_IDE_FOLDER})
+ endif()
+ endif()
endmacro()
diff --git a/cmake/toolchains/aarch64-linux-gnu.cmake b/cmake/toolchains/aarch64-linux-gnu.cmake
index 7ffe397..fdcb012 100644
--- a/cmake/toolchains/aarch64-linux-gnu.cmake
+++ b/cmake/toolchains/aarch64-linux-gnu.cmake
@@ -23,6 +23,13 @@ if("${CROSS}" STREQUAL "")
set(CROSS aarch64-linux-gnu-)
endif()
-set(CMAKE_CXX_COMPILER ${CROSS}g++)
+# For c_decoder_test.c and c_version_test.c.
+if(NOT CMAKE_C_COMPILER)
+ set(CMAKE_C_COMPILER ${CROSS}gcc)
+endif()
+set(CMAKE_C_FLAGS_INIT "-march=armv8-a")
+if(NOT CMAKE_CXX_COMPILER)
+ set(CMAKE_CXX_COMPILER ${CROSS}g++)
+endif()
set(CMAKE_CXX_FLAGS_INIT "-march=armv8-a")
set(CMAKE_SYSTEM_PROCESSOR "aarch64")
diff --git a/cmake/toolchains/arm-linux-gnueabihf.cmake b/cmake/toolchains/arm-linux-gnueabihf.cmake
index 8051f0d..7448f54 100644
--- a/cmake/toolchains/arm-linux-gnueabihf.cmake
+++ b/cmake/toolchains/arm-linux-gnueabihf.cmake
@@ -23,7 +23,14 @@ if("${CROSS}" STREQUAL "")
set(CROSS arm-linux-gnueabihf-)
endif()
-set(CMAKE_CXX_COMPILER ${CROSS}g++)
+# For c_decoder_test.c and c_version_test.c.
+if(NOT CMAKE_C_COMPILER)
+ set(CMAKE_C_COMPILER ${CROSS}gcc)
+endif()
+set(CMAKE_C_FLAGS_INIT "-march=armv7-a -marm")
+if(NOT CMAKE_CXX_COMPILER)
+ set(CMAKE_CXX_COMPILER ${CROSS}g++)
+endif()
set(CMAKE_CXX_FLAGS_INIT "-march=armv7-a -marm")
set(CMAKE_SYSTEM_PROCESSOR "armv7")
set(LIBGAV1_NEON_INTRINSICS_FLAG "-mfpu=neon")
diff --git a/examples/file_reader_factory_test.cc b/examples/file_reader_factory_test.cc
new file mode 100644
index 0000000..346f9f8
--- /dev/null
+++ b/examples/file_reader_factory_test.cc
@@ -0,0 +1,114 @@
+// Copyright 2021 The libgav1 Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "examples/file_reader_factory.h"
+
+#include <cstddef>
+#include <cstdint>
+#include <memory>
+#include <new>
+#include <string>
+#include <vector>
+
+#include "absl/memory/memory.h"
+#include "examples/file_reader_interface.h"
+#include "gtest/gtest.h"
+
+namespace libgav1 {
+namespace {
+
+class AlwaysFailFileReader : public FileReaderInterface {
+ public:
+ static std::unique_ptr<FileReaderInterface> Open(
+ const std::string& /*file_name*/, bool /*error_tolerant*/) {
+ return nullptr;
+ }
+
+ AlwaysFailFileReader() = delete;
+ AlwaysFailFileReader(const AlwaysFailFileReader&) = delete;
+ AlwaysFailFileReader& operator=(const AlwaysFailFileReader&) = delete;
+ // Note this isn't overridden as the class can never be instantiated. This
+ // avoids an unused function warning.
+ // ~AlwaysFailFileReader() override = default;
+
+ bool ReadTemporalUnit(std::vector<uint8_t>* /*data*/,
+ int64_t* /*pts*/) override {
+ return false;
+ }
+ bool IsEndOfFile() const override { return false; }
+
+ size_t width() const override { return 0; }
+ size_t height() const override { return 0; }
+ size_t frame_rate() const override { return 0; }
+ size_t time_scale() const override { return 0; }
+
+ static bool is_registered_;
+};
+
+class AlwaysOkFileReader : public FileReaderInterface {
+ public:
+ static std::unique_ptr<FileReaderInterface> Open(
+ const std::string& /*file_name*/, bool /*error_tolerant*/) {
+ auto reader = absl::WrapUnique(new (std::nothrow) AlwaysOkFileReader());
+
+ return reader;
+ }
+
+ AlwaysOkFileReader(const AlwaysOkFileReader&) = delete;
+ AlwaysOkFileReader& operator=(const AlwaysOkFileReader&) = delete;
+ ~AlwaysOkFileReader() override = default;
+
+ bool ReadTemporalUnit(std::vector<uint8_t>* /*data*/,
+ int64_t* /*pts*/) override {
+ return true;
+ }
+ bool IsEndOfFile() const override { return true; }
+
+ size_t width() const override { return 1; }
+ size_t height() const override { return 1; }
+ size_t frame_rate() const override { return 1; }
+ size_t time_scale() const override { return 1; }
+
+ static bool is_registered_;
+
+ private:
+ AlwaysOkFileReader() = default;
+};
+
+bool AlwaysFailFileReader::is_registered_ =
+ FileReaderFactory::RegisterReader(AlwaysFailFileReader::Open);
+
+bool AlwaysOkFileReader::is_registered_ =
+ FileReaderFactory::RegisterReader(AlwaysOkFileReader::Open);
+
+TEST(FileReaderFactoryTest, RegistrationFail) {
+ EXPECT_FALSE(FileReaderFactory::RegisterReader(nullptr));
+}
+
+TEST(FileReaderFactoryTest, OpenReader) {
+ ASSERT_TRUE(AlwaysOkFileReader::is_registered_);
+ ASSERT_TRUE(AlwaysFailFileReader::is_registered_);
+
+ auto reader = FileReaderFactory::OpenReader("fake file");
+ EXPECT_NE(reader, nullptr);
+ EXPECT_TRUE(reader->IsEndOfFile());
+ EXPECT_TRUE(reader->ReadTemporalUnit(nullptr, nullptr));
+ EXPECT_EQ(reader->width(), 1);
+ EXPECT_EQ(reader->height(), 1);
+ EXPECT_EQ(reader->frame_rate(), 1);
+ EXPECT_EQ(reader->time_scale(), 1);
+}
+
+} // namespace
+} // namespace libgav1
diff --git a/examples/file_reader_test.cc b/examples/file_reader_test.cc
new file mode 100644
index 0000000..53e27f7
--- /dev/null
+++ b/examples/file_reader_test.cc
@@ -0,0 +1,126 @@
+// Copyright 2021 The libgav1 Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "examples/file_reader.h"
+
+#include <cstdint>
+#include <cstdio>
+#include <memory>
+#include <vector>
+
+#include "examples/file_reader_interface.h"
+#include "examples/file_reader_test_common.h"
+#include "gtest/gtest.h"
+#include "tests/utils.h"
+
+namespace libgav1 {
+namespace {
+
+// For use with tests that expect Open() failure to distinguish failure due to
+// the file contents versus failure due to a missing file.
+bool FileCanBeRead(const std::string& filename) {
+ FILE* const file = fopen(filename.c_str(), "r");
+ if (file != nullptr) {
+ fclose(file);
+ return true;
+ }
+ return false;
+}
+
+TEST(FileReaderTest, FailOpen) {
+ EXPECT_EQ(FileReader::Open(""), nullptr);
+ const std::string filename =
+ test_utils::GetTestInputFilePath("ivf-signature-only");
+ SCOPED_TRACE("Filename: " + filename);
+ EXPECT_TRUE(FileCanBeRead(filename));
+ EXPECT_EQ(FileReader::Open(filename), nullptr);
+}
+
+TEST(FileReaderTest, Open) {
+ const std::string filenames[] = {
+ test_utils::GetTestInputFilePath("five-frames.ivf"),
+ test_utils::GetTestInputFilePath("ivf-header-and-truncated-frame-header"),
+ test_utils::GetTestInputFilePath("ivf-header-only"),
+ test_utils::GetTestInputFilePath("one-frame-truncated.ivf"),
+ test_utils::GetTestInputFilePath("one-frame.ivf"),
+ };
+ for (const auto& filename : filenames) {
+ EXPECT_NE(FileReader::Open(filename), nullptr) << "Filename: " << filename;
+ }
+}
+
+TEST_P(FileReaderFailTest, FailRead) {
+ ASSERT_FALSE(reader_->ReadTemporalUnit(&tu_data_, nullptr));
+}
+
+TEST_P(FileReaderErrorTolerant, ReadThroughEndOfFile) {
+ while (!reader_->IsEndOfFile()) {
+ tu_data_.clear();
+ ASSERT_TRUE(reader_->ReadTemporalUnit(&tu_data_, nullptr));
+ ASSERT_GT(tu_data_.size(), 0);
+ }
+}
+
+TEST_P(FileReaderTestNoTimeStamps, ReadThroughEndOfFile) {
+ while (!reader_->IsEndOfFile()) {
+ tu_data_.clear();
+ ASSERT_TRUE(reader_->ReadTemporalUnit(&tu_data_, nullptr));
+ }
+}
+
+TEST_P(FileReaderTestWithTimeStamps, ReadThroughEndOfFile) {
+ int64_t timestamp = 0;
+ while (!reader_->IsEndOfFile()) {
+ tu_data_.clear();
+ ASSERT_TRUE(reader_->ReadTemporalUnit(&tu_data_, &timestamp));
+ if (!tu_data_.empty()) {
+ last_timestamp_ = timestamp;
+ }
+ }
+ ASSERT_TRUE(tu_data_.empty());
+ ASSERT_EQ(last_timestamp_, expected_last_timestamp_);
+}
+
+INSTANTIATE_TEST_SUITE_P(
+ FailRead, FileReaderFailTest,
+ testing::Values(
+ FileReaderTestParameters(FileReader::Open,
+ "ivf-header-and-truncated-frame-header"),
+ FileReaderTestParameters(FileReader::Open, "one-frame-truncated.ivf")));
+
+INSTANTIATE_TEST_SUITE_P(ReadThroughEndOfFile, FileReaderErrorTolerant,
+ testing::Values(FileReaderTestParameters(
+ FileReader::Open, "one-frame-truncated.ivf")));
+
+INSTANTIATE_TEST_SUITE_P(
+ ReadThroughEndOfFile, FileReaderTestNoTimeStamps,
+ testing::Values(FileReaderTestParameters(FileReader::Open, "one-frame.ivf"),
+ FileReaderTestParameters(FileReader::Open,
+ "one-frame-large-timestamp.ivf"),
+ FileReaderTestParameters(FileReader::Open,
+ "five-frames.ivf")));
+
+INSTANTIATE_TEST_SUITE_P(
+ ReadThroughEndOfFile, FileReaderTestWithTimeStamps,
+ testing::Values(
+ FileReaderTestWithTimeStampsParameters(FileReader::Open,
+ "one-frame.ivf", 0),
+ FileReaderTestWithTimeStampsParameters(FileReader::Open,
+ "one-frame-large-timestamp.ivf",
+ 4294967296),
+ FileReaderTestWithTimeStampsParameters(FileReader::Open,
+ "five-frames.ivf", 4)));
+
+} // namespace
+} // namespace libgav1
diff --git a/examples/file_reader_test_common.cc b/examples/file_reader_test_common.cc
new file mode 100644
index 0000000..735dd9e
--- /dev/null
+++ b/examples/file_reader_test_common.cc
@@ -0,0 +1,43 @@
+// Copyright 2021 The libgav1 Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "examples/file_reader_test_common.h"
+
+#include <ostream>
+
+#include "examples/file_reader.h"
+
+namespace libgav1 {
+
+std::ostream& operator<<(std::ostream& stream,
+ const FileReaderTestParameters& parameters) {
+ stream << "open_function="
+ << ((parameters.open_function == FileReader::Open) ? "FileReader"
+ : "Unknown")
+ << ", file_name=" << parameters.file_name;
+ return stream;
+}
+
+std::ostream& operator<<(
+ std::ostream& stream,
+ const FileReaderTestWithTimeStampsParameters& parameters) {
+ stream << "open_function="
+ << ((parameters.open_function == FileReader::Open) ? "FileReader"
+ : "Unknown")
+ << ", file_name=" << parameters.file_name
+ << ", expected_last_timestamp=" << parameters.expected_last_timestamp;
+ return stream;
+}
+
+} // namespace libgav1
diff --git a/examples/file_reader_test_common.h b/examples/file_reader_test_common.h
new file mode 100644
index 0000000..187a6ac
--- /dev/null
+++ b/examples/file_reader_test_common.h
@@ -0,0 +1,171 @@
+/*
+ * Copyright 2021 The libgav1 Authors
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LIBGAV1_EXAMPLES_FILE_READER_TEST_COMMON_H_
+#define LIBGAV1_EXAMPLES_FILE_READER_TEST_COMMON_H_
+
+#include <cstdint>
+#include <memory>
+#include <ostream>
+#include <string>
+#include <vector>
+
+#include "examples/file_reader.h"
+#include "examples/file_reader_factory.h"
+#include "examples/file_reader_interface.h"
+#include "gtest/gtest.h"
+#include "tests/utils.h"
+
+namespace libgav1 {
+
+struct FileReaderTestParameters {
+ FileReaderTestParameters() = default;
+ FileReaderTestParameters(FileReaderFactory::OpenFunction open_function,
+ const char* file_name)
+ : open_function(open_function), file_name(file_name) {}
+ FileReaderTestParameters(const FileReaderTestParameters&) = default;
+ FileReaderTestParameters& operator=(const FileReaderTestParameters&) = delete;
+ FileReaderTestParameters(FileReaderTestParameters&&) = default;
+ FileReaderTestParameters& operator=(FileReaderTestParameters&&) = default;
+ ~FileReaderTestParameters() = default;
+
+ FileReaderFactory::OpenFunction open_function = nullptr;
+ const char* file_name = nullptr;
+};
+
+class FileReaderTestBase {
+ public:
+ FileReaderTestBase() = default;
+ FileReaderTestBase(const FileReaderTestBase&) = delete;
+ FileReaderTestBase& operator=(const FileReaderTestBase&) = delete;
+ FileReaderTestBase(FileReaderTestBase&&) = default;
+ FileReaderTestBase& operator=(FileReaderTestBase&&) = default;
+ ~FileReaderTestBase() = default;
+
+ protected:
+ void OpenReader(const char* file_name,
+ FileReaderFactory::OpenFunction open_function) {
+ file_name_ = test_utils::GetTestInputFilePath(file_name);
+ reader_ = open_function(file_name_, /*error_tolerant=*/false);
+ ASSERT_NE(reader_, nullptr);
+ }
+
+ std::string file_name_;
+ std::unique_ptr<FileReaderInterface> reader_;
+ std::vector<uint8_t> tu_data_;
+};
+
+class FileReaderFailTest
+ : public FileReaderTestBase,
+ public testing::TestWithParam<FileReaderTestParameters> {
+ public:
+ FileReaderFailTest() = default;
+ FileReaderFailTest(const FileReaderTestBase&) = delete;
+ FileReaderFailTest& operator=(const FileReaderTestBase&) = delete;
+ ~FileReaderFailTest() override = default;
+
+ protected:
+ void SetUp() override {
+ OpenReader(GetParam().file_name, GetParam().open_function);
+ }
+};
+
+class FileReaderTestNoTimeStamps
+ : public FileReaderTestBase,
+ public testing::TestWithParam<FileReaderTestParameters> {
+ public:
+ FileReaderTestNoTimeStamps() = default;
+ FileReaderTestNoTimeStamps(const FileReaderTestNoTimeStamps&) = delete;
+ FileReaderTestNoTimeStamps& operator=(const FileReaderTestNoTimeStamps&) =
+ delete;
+ ~FileReaderTestNoTimeStamps() override = default;
+
+ protected:
+ void SetUp() override {
+ OpenReader(GetParam().file_name, GetParam().open_function);
+ }
+};
+
+class FileReaderErrorTolerant
+ : public FileReaderTestBase,
+ public testing::TestWithParam<FileReaderTestParameters> {
+ public:
+ FileReaderErrorTolerant() = default;
+ FileReaderErrorTolerant(const FileReaderErrorTolerant&) = delete;
+ FileReaderErrorTolerant& operator=(const FileReaderErrorTolerant&) = delete;
+ ~FileReaderErrorTolerant() override = default;
+
+ protected:
+ void SetUp() override {
+ file_name_ = test_utils::GetTestInputFilePath(GetParam().file_name);
+ reader_ = GetParam().open_function(file_name_, /*error_tolerant=*/true);
+ ASSERT_NE(reader_, nullptr);
+ }
+};
+
+struct FileReaderTestWithTimeStampsParameters {
+ FileReaderTestWithTimeStampsParameters() = default;
+ FileReaderTestWithTimeStampsParameters(
+ FileReaderFactory::OpenFunction open_function, const char* file_name,
+ int64_t expected_last_timestamp)
+ : open_function(open_function),
+ file_name(file_name),
+ expected_last_timestamp(expected_last_timestamp) {}
+ FileReaderTestWithTimeStampsParameters(
+ const FileReaderTestWithTimeStampsParameters&) = default;
+ FileReaderTestWithTimeStampsParameters& operator=(
+ const FileReaderTestWithTimeStampsParameters&) = delete;
+ FileReaderTestWithTimeStampsParameters(
+ FileReaderTestWithTimeStampsParameters&&) = default;
+ FileReaderTestWithTimeStampsParameters& operator=(
+ FileReaderTestWithTimeStampsParameters&&) = default;
+ ~FileReaderTestWithTimeStampsParameters() = default;
+
+ FileReaderFactory::OpenFunction open_function = nullptr;
+ const char* file_name = nullptr;
+ int64_t expected_last_timestamp = 0;
+};
+
+std::ostream& operator<<(std::ostream& stream,
+ const FileReaderTestParameters& parameters);
+
+std::ostream& operator<<(
+ std::ostream& stream,
+ const FileReaderTestWithTimeStampsParameters& parameters);
+
+class FileReaderTestWithTimeStamps
+ : public FileReaderTestBase,
+ public testing::TestWithParam<FileReaderTestWithTimeStampsParameters> {
+ public:
+ FileReaderTestWithTimeStamps() = default;
+ FileReaderTestWithTimeStamps(const FileReaderTestWithTimeStamps&) = delete;
+ FileReaderTestWithTimeStamps& operator=(const FileReaderTestWithTimeStamps&) =
+ delete;
+ ~FileReaderTestWithTimeStamps() override = default;
+
+ protected:
+ void SetUp() override {
+ FileReaderTestWithTimeStampsParameters parameters = GetParam();
+ OpenReader(parameters.file_name, parameters.open_function);
+ expected_last_timestamp_ = parameters.expected_last_timestamp;
+ }
+
+ int64_t last_timestamp_ = 0;
+ int64_t expected_last_timestamp_ = 0;
+};
+
+} // namespace libgav1
+#endif // LIBGAV1_EXAMPLES_FILE_READER_TEST_COMMON_H_
diff --git a/examples/file_writer_test.cc b/examples/file_writer_test.cc
new file mode 100644
index 0000000..481808c
--- /dev/null
+++ b/examples/file_writer_test.cc
@@ -0,0 +1,495 @@
+// Copyright 2021 The libgav1 Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "examples/file_writer.h"
+
+#include <cstddef>
+#include <cstdint>
+#include <cstring>
+#include <memory>
+#include <ostream>
+#include <string>
+#include <utility>
+
+#include "absl/memory/memory.h"
+#include "gav1/decoder_buffer.h"
+#include "gtest/gtest.h"
+#include "tests/utils.h"
+
+namespace libgav1 {
+namespace {
+
+const char kExpectedY4mHeader8bit[] = "YUV4MPEG2 W352 H288 F30:1 Ip C420jpeg\n";
+const char kExpectedY4mHeader10bit[] = "YUV4MPEG2 W352 H288 F30:1 Ip C420p10\n";
+const char kExpectedY4mHeader8bitMonochrome[] =
+ "YUV4MPEG2 W352 H288 F30:1 Ip Cmono\n";
+const char kExpectedY4mHeader10bitMonochrome[] =
+ "YUV4MPEG2 W352 H288 F30:1 Ip Cmono10\n";
+
+// Note: These are non-const because DecoderBuffer.plane is non-const.
+char fake_plane0[] = "PLANE0\n";
+char fake_plane1[] = "PLANE1\n";
+char fake_plane2[] = "PLANE2\n";
+
+constexpr size_t kExpectedRawDataBufferCount = 3;
+const char* kExpectedRawData[kExpectedRawDataBufferCount] = {
+ fake_plane0, fake_plane1, fake_plane2};
+
+const char* const kExpectedRawDataMonochrome = fake_plane0;
+
+constexpr size_t kExpectedY4mDataBufferCount = 5;
+const char* const kExpectedY4mFileData8bit[kExpectedY4mDataBufferCount] = {
+ kExpectedY4mHeader8bit, "FRAME\n", fake_plane0, fake_plane1, fake_plane2};
+const char* const kExpectedY4mFileData10bit[kExpectedY4mDataBufferCount] = {
+ kExpectedY4mHeader10bit, "FRAME\n", fake_plane0, fake_plane1, fake_plane2};
+
+constexpr size_t kExpectedY4mDataBufferCountMonochrome = 3;
+const char* const
+ kExpectedY4mFileData8bitMonochrome[kExpectedY4mDataBufferCountMonochrome] =
+ {kExpectedY4mHeader8bitMonochrome, "FRAME\n", fake_plane0};
+const char* const
+ kExpectedY4mFileData10bitMonochrome[kExpectedY4mDataBufferCountMonochrome] =
+ {kExpectedY4mHeader10bitMonochrome, "FRAME\n", fake_plane0};
+
+// TODO(tomfinegan): Add a bitdepth arg, and test writing 10 bit frame buffers.
+std::unique_ptr<DecoderBuffer> GetFakeDecoderBuffer(ImageFormat image_format) {
+ auto buffer = absl::make_unique<DecoderBuffer>();
+ if (buffer == nullptr) return nullptr;
+ buffer->chroma_sample_position = kChromaSamplePositionUnknown;
+ buffer->image_format = image_format;
+ buffer->bitdepth = 8;
+ buffer->displayed_width[0] = static_cast<int>(strlen(fake_plane0));
+ buffer->displayed_width[1] = static_cast<int>(strlen(fake_plane1));
+ buffer->displayed_width[2] = static_cast<int>(strlen(fake_plane2));
+ buffer->displayed_height[0] = 1;
+ buffer->displayed_height[1] = 1;
+ buffer->displayed_height[2] = 1;
+ buffer->stride[0] = static_cast<int>(strlen(fake_plane0));
+ buffer->stride[1] = static_cast<int>(strlen(fake_plane1));
+ buffer->stride[2] = static_cast<int>(strlen(fake_plane2));
+ buffer->plane[0] = reinterpret_cast<uint8_t*>(fake_plane0);
+ buffer->plane[1] = reinterpret_cast<uint8_t*>(fake_plane1);
+ buffer->plane[2] = reinterpret_cast<uint8_t*>(fake_plane2);
+ buffer->user_private_data = 0;
+ buffer->buffer_private_data = nullptr;
+ return buffer;
+}
+
+TEST(FileWriterTest, FailOpen) {
+ EXPECT_EQ(FileWriter::Open(test_utils::GetTestOutputFilePath("fail_open"),
+ static_cast<FileWriter::FileType>(3), nullptr),
+ nullptr);
+ EXPECT_EQ(FileWriter::Open(test_utils::GetTestOutputFilePath("fail_open"),
+ FileWriter::kFileTypeY4m, nullptr),
+ nullptr);
+}
+
+struct FileWriterY4mHeaderTestParameters {
+ FileWriterY4mHeaderTestParameters() = default;
+ FileWriterY4mHeaderTestParameters(const FileWriterY4mHeaderTestParameters&) =
+ default;
+ FileWriterY4mHeaderTestParameters& operator=(
+ const FileWriterY4mHeaderTestParameters&) = default;
+ FileWriterY4mHeaderTestParameters(FileWriterY4mHeaderTestParameters&&) =
+ default;
+ FileWriterY4mHeaderTestParameters& operator=(
+ FileWriterY4mHeaderTestParameters&&) = default;
+ ~FileWriterY4mHeaderTestParameters() = default;
+
+ FileWriterY4mHeaderTestParameters(std::string file_name,
+ ChromaSamplePosition chroma_sample_position,
+ ImageFormat image_format, int bitdepth,
+ const char* expected_header_string)
+ : file_name(std::move(file_name)),
+ chroma_sample_position(chroma_sample_position),
+ image_format(image_format),
+ bitdepth(bitdepth),
+ expected_header_string(expected_header_string) {}
+ std::string file_name;
+ ChromaSamplePosition chroma_sample_position = kChromaSamplePositionUnknown;
+ ImageFormat image_format = kImageFormatMonochrome400;
+ int bitdepth = 8;
+ const char* expected_header_string = nullptr;
+};
+
+std::ostream& operator<<(std::ostream& stream,
+ const FileWriterY4mHeaderTestParameters& parameters) {
+ stream << "file_name=" << parameters.file_name << "\n"
+ << "chroma_sample_position=" << parameters.chroma_sample_position
+ << "\n"
+ << "image_format=" << parameters.image_format << "\n"
+ << "bitdepth=" << parameters.bitdepth << "\n"
+ << "expected_header_string=" << parameters.expected_header_string
+ << "\n";
+ return stream;
+}
+
+class FileWriterY4mHeaderTest
+ : public testing::TestWithParam<FileWriterY4mHeaderTestParameters> {
+ public:
+ FileWriterY4mHeaderTest() {
+ test_parameters_ = GetParam();
+ y4m_parameters_.width = 352;
+ y4m_parameters_.height = 288;
+ y4m_parameters_.frame_rate_numerator = 30;
+ y4m_parameters_.frame_rate_denominator = 1;
+ y4m_parameters_.chroma_sample_position =
+ test_parameters_.chroma_sample_position;
+ y4m_parameters_.image_format = test_parameters_.image_format;
+ y4m_parameters_.bitdepth = test_parameters_.bitdepth;
+ }
+ FileWriterY4mHeaderTest(const FileWriterY4mHeaderTest&) = delete;
+ FileWriterY4mHeaderTest& operator=(const FileWriterY4mHeaderTest&) = delete;
+ ~FileWriterY4mHeaderTest() override = default;
+
+ protected:
+ FileWriterY4mHeaderTestParameters test_parameters_;
+ FileWriter::Y4mParameters y4m_parameters_;
+};
+
+TEST_P(FileWriterY4mHeaderTest, WriteY4mHeader) {
+ const std::string file_name =
+ test_utils::GetTestOutputFilePath(test_parameters_.file_name);
+ EXPECT_NE(
+ FileWriter::Open(file_name, FileWriter::kFileTypeY4m, &y4m_parameters_),
+ nullptr);
+ std::string y4m_header_string;
+ test_utils::GetTestData(test_parameters_.file_name, true, &y4m_header_string);
+ EXPECT_STREQ(y4m_header_string.c_str(),
+ test_parameters_.expected_header_string);
+}
+
+INSTANTIATE_TEST_SUITE_P(
+ WriteY4mHeader, FileWriterY4mHeaderTest,
+ testing::Values(
+ FileWriterY4mHeaderTestParameters(
+ "y4m_header_8bit", kChromaSamplePositionUnknown, kImageFormatYuv420,
+ /*bitdepth=*/8, kExpectedY4mHeader8bit),
+ FileWriterY4mHeaderTestParameters("y4m_header_10bit",
+ kChromaSamplePositionUnknown,
+ kImageFormatYuv420, /*bitdepth=*/10,
+ kExpectedY4mHeader10bit),
+ FileWriterY4mHeaderTestParameters("y4m_header_8bit_monochrome",
+ kChromaSamplePositionUnknown,
+ kImageFormatMonochrome400,
+ /*bitdepth=*/8,
+ kExpectedY4mHeader8bitMonochrome),
+ FileWriterY4mHeaderTestParameters("y4m_header_10bit_monochrome",
+ kChromaSamplePositionUnknown,
+ kImageFormatMonochrome400,
+ /*bitdepth=*/10,
+ kExpectedY4mHeader10bitMonochrome)));
+
+struct FileWriterTestParameters {
+ FileWriterTestParameters() = default;
+ FileWriterTestParameters(const FileWriterTestParameters&) = default;
+ FileWriterTestParameters& operator=(const FileWriterTestParameters&) =
+ default;
+ FileWriterTestParameters(FileWriterTestParameters&&) = default;
+ FileWriterTestParameters& operator=(FileWriterTestParameters&&) = default;
+ ~FileWriterTestParameters() = default;
+
+ FileWriterTestParameters(std::string file_name,
+ FileWriter::FileType file_type,
+ const FileWriter::Y4mParameters* y4m_parameters,
+ size_t num_frames)
+ : file_name(std::move(file_name)),
+ file_type(file_type),
+ y4m_parameters(y4m_parameters),
+ num_frames(num_frames) {}
+ std::string file_name;
+ FileWriter::FileType file_type = FileWriter::kFileTypeRaw;
+ const FileWriter::Y4mParameters* y4m_parameters = nullptr;
+ size_t num_frames = 1;
+};
+
+std::ostream& operator<<(std::ostream& stream,
+ const ChromaSamplePosition& position) {
+ switch (position) {
+ case kChromaSamplePositionUnknown:
+ stream << "kCromaSamplePositionUnknown";
+ break;
+ case kChromaSamplePositionVertical:
+ stream << "kChromaSamplePositionVertical";
+ break;
+ case kChromaSamplePositionColocated:
+ stream << "kChromaSamplePositionColocated";
+ break;
+ case kChromaSamplePositionReserved:
+ stream << "kChromaSamplePositionReserved";
+ break;
+ }
+ return stream;
+}
+
+std::ostream& operator<<(std::ostream& stream,
+ const ImageFormat& image_format) {
+ switch (image_format) {
+ case kImageFormatMonochrome400:
+ stream << "kImageFormatMonochrome400";
+ break;
+ case kImageFormatYuv420:
+ stream << "kImageFormatYuv420";
+ break;
+ case kImageFormatYuv422:
+ stream << "kImageFormatYuv422";
+ break;
+ case kImageFormatYuv444:
+ stream << "kImageFormatYuv444";
+ break;
+ }
+ return stream;
+}
+
+std::ostream& operator<<(std::ostream& stream,
+ const FileWriter::Y4mParameters& parameters) {
+ stream << "y4m_parameters:\n"
+ << " width=" << parameters.width << "\n"
+ << " height=" << parameters.height << "\n"
+ << " frame_rate_numerator=" << parameters.frame_rate_numerator << "\n"
+ << " frame_rate_denominator=" << parameters.frame_rate_denominator
+ << "\n"
+ << " chroma_sample_position=" << parameters.chroma_sample_position
+ << "\n"
+ << " image_format=" << parameters.image_format << "\n"
+ << " bitdepth=" << parameters.bitdepth << "\n";
+
+ return stream;
+}
+
+std::ostream& operator<<(std::ostream& stream,
+ const FileWriterTestParameters& parameters) {
+ stream << "file_name=" << parameters.file_name << "\n"
+ << "file_type="
+ << (parameters.file_type == FileWriter::kFileTypeRaw ? "kFileTypeRaw"
+ : "kFileTypeY4m")
+ << "\n";
+ if (parameters.y4m_parameters != nullptr) {
+ stream << *parameters.y4m_parameters;
+ } else {
+ stream << "y4m_parameters: <nullptr>\n";
+ }
+ stream << "num_frames=" << parameters.num_frames << "\n";
+ return stream;
+}
+
+class FileWriterTestBase
+ : public testing::TestWithParam<FileWriterTestParameters> {
+ public:
+ FileWriterTestBase() = default;
+ FileWriterTestBase(const FileWriterTestBase&) = delete;
+ FileWriterTestBase& operator=(const FileWriterTestBase&) = delete;
+ ~FileWriterTestBase() override = default;
+
+ protected:
+ void SetUp() override { OpenWriter(GetParam()); }
+
+ void OpenWriter(const FileWriterTestParameters& parameters) {
+ parameters_ = parameters;
+ parameters_.file_name = parameters.file_name;
+ file_writer_ = FileWriter::Open(
+ test_utils::GetTestOutputFilePath(parameters.file_name),
+ parameters_.file_type, parameters_.y4m_parameters);
+ ASSERT_NE(file_writer_, nullptr);
+ }
+
+ void WriteFramesAndCloseFile() {
+ if (parameters_.y4m_parameters != nullptr) {
+ image_format_ = parameters_.y4m_parameters->image_format;
+ }
+ decoder_buffer_ = GetFakeDecoderBuffer(image_format_);
+ for (size_t frame_num = 0; frame_num < parameters_.num_frames;
+ ++frame_num) {
+ ASSERT_TRUE(file_writer_->WriteFrame(*decoder_buffer_));
+ }
+ file_writer_ = nullptr;
+ }
+
+ ImageFormat image_format_ = kImageFormatYuv420;
+ FileWriterTestParameters parameters_;
+ std::unique_ptr<FileWriter> file_writer_;
+ std::unique_ptr<DecoderBuffer> decoder_buffer_;
+};
+
+class FileWriterTestRaw : public FileWriterTestBase {
+ public:
+ FileWriterTestRaw() = default;
+ FileWriterTestRaw(const FileWriterTestRaw&) = delete;
+ FileWriterTestRaw& operator=(const FileWriterTestRaw&) = delete;
+ ~FileWriterTestRaw() override = default;
+
+ protected:
+ void SetUp() override { FileWriterTestBase::SetUp(); }
+};
+
+class FileWriterTestY4m : public FileWriterTestBase {
+ public:
+ FileWriterTestY4m() = default;
+ FileWriterTestY4m(const FileWriterTestY4m&) = delete;
+ FileWriterTestY4m& operator=(const FileWriterTestY4m&) = delete;
+ ~FileWriterTestY4m() override = default;
+
+ protected:
+ void SetUp() override { FileWriterTestBase::SetUp(); }
+};
+
+TEST_P(FileWriterTestRaw, WriteRawFrames) {
+ WriteFramesAndCloseFile();
+
+ std::string actual_file_data;
+ test_utils::GetTestData(parameters_.file_name, true, &actual_file_data);
+
+ std::string expected_file_data;
+ for (size_t frame_num = 0; frame_num < parameters_.num_frames; ++frame_num) {
+ if (image_format_ == kImageFormatMonochrome400) {
+ expected_file_data += kExpectedRawDataMonochrome;
+ } else {
+ for (const auto& buffer : kExpectedRawData) {
+ expected_file_data += buffer;
+ }
+ }
+ }
+
+ ASSERT_EQ(actual_file_data, expected_file_data);
+}
+
+TEST_P(FileWriterTestY4m, WriteY4mFrames) {
+ WriteFramesAndCloseFile();
+
+ std::string actual_file_data;
+ test_utils::GetTestData(parameters_.file_name, true, &actual_file_data);
+
+ std::string expected_file_data;
+ for (size_t frame_num = 0; frame_num < parameters_.num_frames; ++frame_num) {
+ if (image_format_ == kImageFormatMonochrome400) {
+ const char* const* expected_data_planes =
+ (parameters_.y4m_parameters->bitdepth == 8)
+ ? kExpectedY4mFileData8bitMonochrome
+ : kExpectedY4mFileData10bitMonochrome;
+ // Skip the Y4M file header "plane" after frame 0.
+ for (size_t buffer_num = (frame_num == 0) ? 0 : 1;
+ buffer_num < kExpectedY4mDataBufferCountMonochrome; ++buffer_num) {
+ expected_file_data += expected_data_planes[buffer_num];
+ }
+ } else {
+ const char* const* expected_data_planes =
+ (parameters_.y4m_parameters->bitdepth == 8)
+ ? kExpectedY4mFileData8bit
+ : kExpectedY4mFileData10bit;
+
+ // Skip the Y4M file header "plane" after frame 0.
+ for (size_t buffer_num = (frame_num == 0) ? 0 : 1;
+ buffer_num < kExpectedY4mDataBufferCount; ++buffer_num) {
+ expected_file_data += expected_data_planes[buffer_num];
+ }
+ }
+ }
+
+ ASSERT_EQ(actual_file_data, expected_file_data);
+}
+
+INSTANTIATE_TEST_SUITE_P(
+ WriteRawFrames, FileWriterTestRaw,
+ testing::Values(
+ FileWriterTestParameters("raw_frames_test_1frame",
+ FileWriter::kFileTypeRaw,
+ /*y4m_parameters=*/nullptr,
+ /*num_frames=*/1),
+ FileWriterTestParameters("raw_frames_test_5frames",
+ FileWriter::kFileTypeRaw,
+ /*y4m_parameters=*/nullptr,
+ /*num_frames=*/5),
+ FileWriterTestParameters("raw_frames_test_1frame_monochrome",
+ FileWriter::kFileTypeRaw,
+ /*y4m_parameters=*/nullptr,
+ /*num_frames=*/1),
+ FileWriterTestParameters("raw_frames_test_5frames_monochrome",
+ FileWriter::kFileTypeRaw,
+ /*y4m_parameters=*/nullptr,
+ /*num_frames=*/5)));
+
+const FileWriter::Y4mParameters kY4mParameters8Bit = {
+ 352, // width
+ 288, // height
+ 30, // frame_rate_numerator
+ 1, // frame_rate_denominator
+ kChromaSamplePositionUnknown,
+ kImageFormatYuv420,
+ 8 // bitdepth
+};
+
+const FileWriter::Y4mParameters kY4mParameters10Bit = {
+ 352, // width
+ 288, // height
+ 30, // frame_rate_numerator
+ 1, // frame_rate_denominator
+ kChromaSamplePositionUnknown,
+ kImageFormatYuv420,
+ 10 // bitdepth
+};
+
+const FileWriter::Y4mParameters kY4mParameters8BitMonochrome = {
+ 352, // width
+ 288, // height
+ 30, // frame_rate_numerator
+ 1, // frame_rate_denominator
+ kChromaSamplePositionUnknown,
+ kImageFormatMonochrome400,
+ 8 // bitdepth
+};
+
+const FileWriter::Y4mParameters kY4mParameters10BitMonochrome = {
+ 352, // width
+ 288, // height
+ 30, // frame_rate_numerator
+ 1, // frame_rate_denominator
+ kChromaSamplePositionUnknown,
+ kImageFormatMonochrome400,
+ 10 // bitdepth
+};
+
+INSTANTIATE_TEST_SUITE_P(
+ WriteY4mFrames, FileWriterTestY4m,
+ testing::Values(
+ FileWriterTestParameters("y4m_frames_test_8bit_1frame",
+ FileWriter::kFileTypeY4m, &kY4mParameters8Bit,
+ /*num_frames=*/1),
+ FileWriterTestParameters("y4m_frames_test_8bit_5frames",
+ FileWriter::kFileTypeY4m, &kY4mParameters8Bit,
+ /*num_frames=*/5),
+ FileWriterTestParameters("y4m_frames_test_10bit_1frame",
+ FileWriter::kFileTypeY4m, &kY4mParameters10Bit,
+ /*num_frames=*/1),
+ FileWriterTestParameters("y4m_frames_test_10bit_5frames",
+ FileWriter::kFileTypeY4m, &kY4mParameters10Bit,
+ /*num_frames=*/5),
+ FileWriterTestParameters("y4m_frames_test_8bit_1frame_monochrome",
+ FileWriter::kFileTypeY4m,
+ &kY4mParameters8BitMonochrome,
+ /*num_frames=*/1),
+ FileWriterTestParameters("y4m_frames_test_8bit_5frames_monochrome",
+ FileWriter::kFileTypeY4m,
+ &kY4mParameters8BitMonochrome,
+ /*num_frames=*/5),
+ FileWriterTestParameters("y4m_frames_test_10bit_1frame_monochrome",
+ FileWriter::kFileTypeY4m,
+ &kY4mParameters10BitMonochrome,
+ /*num_frames=*/1),
+ FileWriterTestParameters("y4m_frames_test_10bit_5frames_monochrome",
+ FileWriter::kFileTypeY4m,
+ &kY4mParameters10BitMonochrome,
+ /*num_frames=*/5)));
+
+} // namespace
+} // namespace libgav1
diff --git a/src/buffer_pool.h b/src/buffer_pool.h
index f35a633..d9eba6d 100644
--- a/src/buffer_pool.h
+++ b/src/buffer_pool.h
@@ -17,12 +17,13 @@
#ifndef LIBGAV1_SRC_BUFFER_POOL_H_
#define LIBGAV1_SRC_BUFFER_POOL_H_
+#include <algorithm>
#include <array>
#include <cassert>
#include <climits>
#include <condition_variable> // NOLINT (unapproved c++11 header)
#include <cstdint>
-#include <cstring>
+#include <memory>
#include <mutex> // NOLINT (unapproved c++11 header)
#include "src/dsp/common.h"
@@ -52,7 +53,9 @@ enum FrameState : uint8_t {
// A reference-counted frame buffer. Clients should access it via
// RefCountedBufferPtr, which manages reference counting transparently.
-class RefCountedBuffer {
+// The alignment requirement is due to the SymbolDecoderContext member
+// frame_context_.
+class RefCountedBuffer : public MaxAlignedAllocable {
public:
// Not copyable or movable.
RefCountedBuffer(const RefCountedBuffer&) = delete;
diff --git a/src/buffer_pool_test.cc b/src/buffer_pool_test.cc
new file mode 100644
index 0000000..abe681e
--- /dev/null
+++ b/src/buffer_pool_test.cc
@@ -0,0 +1,305 @@
+// Copyright 2021 The libgav1 Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "src/buffer_pool.h"
+
+#include <climits>
+#include <cstdint>
+#include <memory>
+#include <ostream>
+#include <tuple>
+#include <utility>
+
+#include "gtest/gtest.h"
+#include "src/frame_buffer_utils.h"
+#include "src/gav1/decoder_buffer.h"
+#include "src/gav1/frame_buffer.h"
+#include "src/internal_frame_buffer_list.h"
+#include "src/utils/constants.h"
+#include "src/utils/types.h"
+#include "src/yuv_buffer.h"
+
+namespace libgav1 {
+namespace {
+
+TEST(BufferPoolTest, RefCountedBufferPtr) {
+ InternalFrameBufferList buffer_list;
+ BufferPool buffer_pool(OnInternalFrameBufferSizeChanged,
+ GetInternalFrameBuffer, ReleaseInternalFrameBuffer,
+ &buffer_list);
+ RefCountedBufferPtr buffer_ptr = buffer_pool.GetFreeBuffer();
+ EXPECT_NE(buffer_ptr, nullptr);
+ EXPECT_EQ(buffer_ptr.use_count(), 1);
+
+ RefCountedBufferPtr buffer_ptr2 = buffer_ptr;
+ RefCountedBufferPtr buffer_ptr3 = buffer_ptr;
+ EXPECT_EQ(buffer_ptr.use_count(), 3);
+ EXPECT_EQ(buffer_ptr2.use_count(), 3);
+ EXPECT_EQ(buffer_ptr3.use_count(), 3);
+
+ buffer_ptr2 = nullptr;
+ EXPECT_EQ(buffer_ptr.use_count(), 2);
+ EXPECT_EQ(buffer_ptr2.use_count(), 0);
+ EXPECT_EQ(buffer_ptr3.use_count(), 2);
+
+ RefCountedBufferPtr buffer_ptr4 = std::move(buffer_ptr);
+ EXPECT_EQ(buffer_ptr.use_count(), 0);
+ EXPECT_EQ(buffer_ptr2.use_count(), 0);
+ EXPECT_EQ(buffer_ptr3.use_count(), 2);
+ EXPECT_EQ(buffer_ptr4.use_count(), 2);
+}
+
+TEST(RefCountedBufferTest, SetFrameDimensions) {
+ InternalFrameBufferList buffer_list;
+ BufferPool buffer_pool(OnInternalFrameBufferSizeChanged,
+ GetInternalFrameBuffer, ReleaseInternalFrameBuffer,
+ &buffer_list);
+ RefCountedBufferPtr buffer_ptr = buffer_pool.GetFreeBuffer();
+ EXPECT_NE(buffer_ptr, nullptr);
+
+ // Test the undocumented default values of rows4x4() and columns4x4(). (Not
+ // sure if this is a good idea.)
+ EXPECT_EQ(buffer_ptr->rows4x4(), 0);
+ EXPECT_EQ(buffer_ptr->columns4x4(), 0);
+
+ // Test the side effects of SetFrameDimensions().
+ ObuFrameHeader frame_header = {};
+ frame_header.rows4x4 = 20;
+ frame_header.columns4x4 = 30;
+ EXPECT_TRUE(buffer_ptr->SetFrameDimensions(frame_header));
+ EXPECT_EQ(buffer_ptr->rows4x4(), 20);
+ EXPECT_EQ(buffer_ptr->columns4x4(), 30);
+}
+
+TEST(RefCountedBuffertTest, WaitUntil) {
+ InternalFrameBufferList buffer_list;
+ BufferPool buffer_pool(OnInternalFrameBufferSizeChanged,
+ GetInternalFrameBuffer, ReleaseInternalFrameBuffer,
+ &buffer_list);
+ RefCountedBufferPtr buffer_ptr = buffer_pool.GetFreeBuffer();
+ EXPECT_NE(buffer_ptr, nullptr);
+
+ int progress_row_cache;
+ buffer_ptr->SetProgress(10);
+ EXPECT_TRUE(buffer_ptr->WaitUntil(5, &progress_row_cache));
+ EXPECT_EQ(progress_row_cache, 10);
+
+ buffer_ptr->SetFrameState(kFrameStateDecoded);
+ EXPECT_TRUE(buffer_ptr->WaitUntil(500, &progress_row_cache));
+ EXPECT_EQ(progress_row_cache, INT_MAX);
+
+ buffer_ptr->Abort();
+ EXPECT_FALSE(buffer_ptr->WaitUntil(50, &progress_row_cache));
+}
+
+constexpr struct Params {
+ int width;
+ int height;
+ int8_t subsampling_x;
+ int8_t subsampling_y;
+ int border;
+} kParams[] = {
+ {1920, 1080, 1, 1, 96}, //
+ {1920, 1080, 1, 1, 64}, //
+ {1920, 1080, 1, 1, 32}, //
+ {1920, 1080, 1, 1, 160}, //
+ {1920, 1080, 1, 0, 160}, //
+ {1920, 1080, 0, 0, 160}, //
+};
+
+std::ostream& operator<<(std::ostream& os, const Params& param) {
+ return os << param.width << "x" << param.height
+ << ", subsampling(x/y): " << static_cast<int>(param.subsampling_x)
+ << "/" << static_cast<int>(param.subsampling_y)
+ << ", border: " << param.border;
+}
+
+class RefCountedBufferReallocTest
+ : public testing::TestWithParam<std::tuple<bool, Params>> {
+ protected:
+ const bool use_external_callbacks_ = std::get<0>(GetParam());
+ const Params& param_ = std::get<1>(GetParam());
+};
+
+TEST_P(RefCountedBufferReallocTest, 8Bit) {
+ InternalFrameBufferList buffer_list;
+ FrameBufferSizeChangedCallback on_frame_buffer_size_changed = nullptr;
+ GetFrameBufferCallback get_frame_buffer = nullptr;
+ ReleaseFrameBufferCallback release_frame_buffer = nullptr;
+ void* callback_private_data = nullptr;
+ if (use_external_callbacks_) {
+ on_frame_buffer_size_changed = OnInternalFrameBufferSizeChanged;
+ get_frame_buffer = GetInternalFrameBuffer;
+ release_frame_buffer = ReleaseInternalFrameBuffer;
+ callback_private_data = &buffer_list;
+ }
+
+ BufferPool buffer_pool(on_frame_buffer_size_changed, get_frame_buffer,
+ release_frame_buffer, callback_private_data);
+
+ RefCountedBufferPtr buffer_ptr = buffer_pool.GetFreeBuffer();
+ EXPECT_NE(buffer_ptr, nullptr);
+
+ const Libgav1ImageFormat image_format = ComposeImageFormat(
+ /*is_monochrome=*/false, param_.subsampling_x, param_.subsampling_y);
+ EXPECT_TRUE(buffer_pool.OnFrameBufferSizeChanged(
+ /*bitdepth=*/8, image_format, param_.width, param_.height, param_.border,
+ param_.border, param_.border, param_.border));
+
+ EXPECT_TRUE(buffer_ptr->Realloc(
+ /*bitdepth=*/8, /*is_monochrome=*/false, param_.width, param_.height,
+ param_.subsampling_x, param_.subsampling_y, param_.border, param_.border,
+ param_.border, param_.border));
+
+ // The first row of each plane is aligned at 16-byte boundaries.
+ EXPECT_EQ(
+ reinterpret_cast<uintptr_t>(buffer_ptr->buffer()->data(kPlaneY)) % 16, 0);
+ EXPECT_EQ(
+ reinterpret_cast<uintptr_t>(buffer_ptr->buffer()->data(kPlaneU)) % 16, 0);
+ EXPECT_EQ(
+ reinterpret_cast<uintptr_t>(buffer_ptr->buffer()->data(kPlaneV)) % 16, 0);
+
+ // Subsequent rows are aligned at 16-byte boundaries.
+ EXPECT_EQ(buffer_ptr->buffer()->stride(kPlaneY) % 16, 0);
+ EXPECT_EQ(buffer_ptr->buffer()->stride(kPlaneU) % 16, 0);
+ EXPECT_EQ(buffer_ptr->buffer()->stride(kPlaneV) % 16, 0);
+
+ // Check the borders.
+ EXPECT_EQ(buffer_ptr->buffer()->left_border(kPlaneY), param_.border);
+ EXPECT_EQ(buffer_ptr->buffer()->right_border(kPlaneY), param_.border);
+ EXPECT_EQ(buffer_ptr->buffer()->top_border(kPlaneY), param_.border);
+ EXPECT_EQ(buffer_ptr->buffer()->bottom_border(kPlaneY), param_.border);
+ EXPECT_EQ(buffer_ptr->buffer()->left_border(kPlaneU),
+ param_.border >> param_.subsampling_x);
+ EXPECT_EQ(buffer_ptr->buffer()->right_border(kPlaneU),
+ param_.border >> param_.subsampling_x);
+ EXPECT_EQ(buffer_ptr->buffer()->top_border(kPlaneU),
+ param_.border >> param_.subsampling_y);
+ EXPECT_EQ(buffer_ptr->buffer()->bottom_border(kPlaneU),
+ param_.border >> param_.subsampling_y);
+ EXPECT_EQ(buffer_ptr->buffer()->left_border(kPlaneV),
+ param_.border >> param_.subsampling_x);
+ EXPECT_EQ(buffer_ptr->buffer()->right_border(kPlaneV),
+ param_.border >> param_.subsampling_x);
+ EXPECT_EQ(buffer_ptr->buffer()->top_border(kPlaneV),
+ param_.border >> param_.subsampling_y);
+ EXPECT_EQ(buffer_ptr->buffer()->bottom_border(kPlaneV),
+ param_.border >> param_.subsampling_y);
+
+ // Write to the upper-left corner of the border.
+ uint8_t* y_buffer = buffer_ptr->buffer()->data(kPlaneY);
+ int y_stride = buffer_ptr->buffer()->stride(kPlaneY);
+ y_buffer[-buffer_ptr->buffer()->left_border(kPlaneY) -
+ buffer_ptr->buffer()->top_border(kPlaneY) * y_stride] = 0;
+ // Write to the lower-right corner of the border.
+ uint8_t* v_buffer = buffer_ptr->buffer()->data(kPlaneV);
+ int v_stride = buffer_ptr->buffer()->stride(kPlaneV);
+ v_buffer[(buffer_ptr->buffer()->height(kPlaneV) +
+ buffer_ptr->buffer()->bottom_border(kPlaneV) - 1) *
+ v_stride +
+ buffer_ptr->buffer()->width(kPlaneV) +
+ buffer_ptr->buffer()->right_border(kPlaneV) - 1] = 0;
+}
+
+#if LIBGAV1_MAX_BITDEPTH >= 10
+TEST_P(RefCountedBufferReallocTest, 10Bit) {
+ InternalFrameBufferList buffer_list;
+ FrameBufferSizeChangedCallback on_frame_buffer_size_changed = nullptr;
+ GetFrameBufferCallback get_frame_buffer = nullptr;
+ ReleaseFrameBufferCallback release_frame_buffer = nullptr;
+ void* callback_private_data = nullptr;
+ if (use_external_callbacks_) {
+ on_frame_buffer_size_changed = OnInternalFrameBufferSizeChanged;
+ get_frame_buffer = GetInternalFrameBuffer;
+ release_frame_buffer = ReleaseInternalFrameBuffer;
+ callback_private_data = &buffer_list;
+ }
+
+ BufferPool buffer_pool(on_frame_buffer_size_changed, get_frame_buffer,
+ release_frame_buffer, callback_private_data);
+
+ RefCountedBufferPtr buffer_ptr = buffer_pool.GetFreeBuffer();
+ EXPECT_NE(buffer_ptr, nullptr);
+
+ const Libgav1ImageFormat image_format = ComposeImageFormat(
+ /*is_monochrome=*/false, param_.subsampling_x, param_.subsampling_y);
+ EXPECT_TRUE(buffer_pool.OnFrameBufferSizeChanged(
+ /*bitdepth=*/8, image_format, param_.width, param_.height, param_.border,
+ param_.border, param_.border, param_.border));
+
+ EXPECT_TRUE(buffer_ptr->Realloc(
+ /*bitdepth=*/10, /*is_monochrome=*/false, param_.width, param_.height,
+ param_.subsampling_x, param_.subsampling_y, param_.border, param_.border,
+ param_.border, param_.border));
+
+ // The first row of each plane is aligned at 16-byte boundaries.
+ EXPECT_EQ(
+ reinterpret_cast<uintptr_t>(buffer_ptr->buffer()->data(kPlaneY)) % 16, 0);
+ EXPECT_EQ(
+ reinterpret_cast<uintptr_t>(buffer_ptr->buffer()->data(kPlaneU)) % 16, 0);
+ EXPECT_EQ(
+ reinterpret_cast<uintptr_t>(buffer_ptr->buffer()->data(kPlaneV)) % 16, 0);
+
+ // Subsequent rows are aligned at 16-byte boundaries.
+ EXPECT_EQ(buffer_ptr->buffer()->stride(kPlaneY) % 16, 0);
+ EXPECT_EQ(buffer_ptr->buffer()->stride(kPlaneU) % 16, 0);
+ EXPECT_EQ(buffer_ptr->buffer()->stride(kPlaneV) % 16, 0);
+
+ // Check the borders.
+ EXPECT_EQ(buffer_ptr->buffer()->left_border(kPlaneY), param_.border);
+ EXPECT_EQ(buffer_ptr->buffer()->right_border(kPlaneY), param_.border);
+ EXPECT_EQ(buffer_ptr->buffer()->top_border(kPlaneY), param_.border);
+ EXPECT_EQ(buffer_ptr->buffer()->bottom_border(kPlaneY), param_.border);
+ EXPECT_EQ(buffer_ptr->buffer()->left_border(kPlaneU),
+ param_.border >> param_.subsampling_x);
+ EXPECT_EQ(buffer_ptr->buffer()->right_border(kPlaneU),
+ param_.border >> param_.subsampling_x);
+ EXPECT_EQ(buffer_ptr->buffer()->top_border(kPlaneU),
+ param_.border >> param_.subsampling_y);
+ EXPECT_EQ(buffer_ptr->buffer()->bottom_border(kPlaneU),
+ param_.border >> param_.subsampling_y);
+ EXPECT_EQ(buffer_ptr->buffer()->left_border(kPlaneV),
+ param_.border >> param_.subsampling_x);
+ EXPECT_EQ(buffer_ptr->buffer()->right_border(kPlaneV),
+ param_.border >> param_.subsampling_x);
+ EXPECT_EQ(buffer_ptr->buffer()->top_border(kPlaneV),
+ param_.border >> param_.subsampling_y);
+ EXPECT_EQ(buffer_ptr->buffer()->bottom_border(kPlaneV),
+ param_.border >> param_.subsampling_y);
+
+ // Write to the upper-left corner of the border.
+ auto* y_buffer =
+ reinterpret_cast<uint16_t*>(buffer_ptr->buffer()->data(kPlaneY));
+ int y_stride = buffer_ptr->buffer()->stride(kPlaneY) / sizeof(uint16_t);
+ y_buffer[-buffer_ptr->buffer()->left_border(kPlaneY) -
+ buffer_ptr->buffer()->top_border(kPlaneY) * y_stride] = 0;
+ // Write to the lower-right corner of the border.
+ auto* v_buffer =
+ reinterpret_cast<uint16_t*>(buffer_ptr->buffer()->data(kPlaneV));
+ int v_stride = buffer_ptr->buffer()->stride(kPlaneV) / sizeof(uint16_t);
+ v_buffer[(buffer_ptr->buffer()->height(kPlaneV) +
+ buffer_ptr->buffer()->bottom_border(kPlaneV) - 1) *
+ v_stride +
+ buffer_ptr->buffer()->width(kPlaneV) +
+ buffer_ptr->buffer()->right_border(kPlaneV) - 1] = 0;
+}
+#endif // LIBGAV1_MAX_BITDEPTH >= 10
+
+INSTANTIATE_TEST_SUITE_P(
+ Default, RefCountedBufferReallocTest,
+ testing::Combine(testing::Bool(), // use_external_callbacks
+ testing::ValuesIn(kParams)));
+
+} // namespace
+} // namespace libgav1
diff --git a/src/c_decoder_test.c b/src/c_decoder_test.c
new file mode 100644
index 0000000..10ef29f
--- /dev/null
+++ b/src/c_decoder_test.c
@@ -0,0 +1,440 @@
+/*
+ * Copyright 2021 The libgav1 Authors
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifdef __cplusplus
+#error Do not compile this file with a C++ compiler
+#endif
+
+// clang-format off
+#include "src/gav1/decoder.h"
+// clang-format on
+
+#include <stddef.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#define ASSERT_EQ(a, b) \
+ do { \
+ if ((a) != (b)) { \
+ fprintf(stderr, "Assertion failure: (%s) == (%s), at %s:%d\n", #a, #b, \
+ __FILE__, __LINE__); \
+ fprintf(stderr, "C DecoderTest failed\n"); \
+ exit(1); \
+ } \
+ } while (0)
+
+#define ASSERT_NE(a, b) \
+ do { \
+ if ((a) == (b)) { \
+ fprintf(stderr, "Assertion failure: (%s) != (%s), at %s:%d\n", #a, #b, \
+ __FILE__, __LINE__); \
+ fprintf(stderr, "C DecoderTest failed\n"); \
+ exit(1); \
+ } \
+ } while (0)
+
+#define ASSERT_TRUE(a) \
+ do { \
+ if (!(a)) { \
+ fprintf(stderr, "Assertion failure: %s, at %s:%d\n", #a, __FILE__, \
+ __LINE__); \
+ fprintf(stderr, "C DecoderTest failed\n"); \
+ exit(1); \
+ } \
+ } while (0)
+
+#define ASSERT_FALSE(a) \
+ do { \
+ if (a) { \
+ fprintf(stderr, "Assertion failure: !(%s), at %s:%d\n", #a, __FILE__, \
+ __LINE__); \
+ fprintf(stderr, "C DecoderTest failed\n"); \
+ exit(1); \
+ } \
+ } while (0)
+
+// These two frames come from the libaom test vector av1-1-b8-01-size-32x32.ivf
+static const uint8_t kFrame1[] = {
+ 0x12, 0x0, 0xa, 0xa, 0x0, 0x0, 0x0, 0x2, 0x27, 0xfe, 0xff, 0xfc,
+ 0xc0, 0x20, 0x32, 0x93, 0x2, 0x10, 0x0, 0xa8, 0x80, 0x0, 0x3, 0x0,
+ 0x10, 0x10, 0x30, 0x0, 0xd3, 0xc6, 0xc6, 0x82, 0xaa, 0x5e, 0xbf, 0x82,
+ 0xf2, 0xa4, 0xa4, 0x29, 0xab, 0xda, 0xd7, 0x1, 0x5, 0x0, 0xb3, 0xde,
+ 0xa8, 0x6f, 0x8d, 0xbf, 0x1b, 0xa8, 0x25, 0xc3, 0x84, 0x7c, 0x1a, 0x2b,
+ 0x8b, 0x0, 0xff, 0x19, 0x1f, 0x45, 0x7e, 0xe0, 0xbe, 0xe1, 0x3a, 0x63,
+ 0xc2, 0xc6, 0x6e, 0xf4, 0xc8, 0xce, 0x11, 0xe1, 0x9f, 0x48, 0x64, 0x72,
+ 0xeb, 0xbb, 0x4f, 0xf3, 0x94, 0xb4, 0xb6, 0x9d, 0x4f, 0x4, 0x18, 0x5e,
+ 0x5e, 0x1b, 0x65, 0x49, 0x74, 0x90, 0x13, 0x50, 0xef, 0x8c, 0xb8, 0xe8,
+ 0xd9, 0x8e, 0x9c, 0xc9, 0x4d, 0xda, 0x60, 0x6a, 0xa, 0xf9, 0x75, 0xd0,
+ 0x62, 0x69, 0xd, 0xf5, 0xdc, 0xa9, 0xb9, 0x4c, 0x8, 0x9e, 0x33, 0x15,
+ 0xa3, 0xe1, 0x42, 0x0, 0xe2, 0xb0, 0x46, 0xd0, 0xf7, 0xad, 0x55, 0xbc,
+ 0x75, 0xe9, 0xe3, 0x1f, 0xa3, 0x41, 0x11, 0xba, 0xaa, 0x81, 0xf3, 0xcb,
+ 0x82, 0x87, 0x71, 0x0, 0xe6, 0xb9, 0x8c, 0xe1, 0xe9, 0xd3, 0x21, 0xcc,
+ 0xcd, 0xe7, 0x12, 0xb9, 0xe, 0x43, 0x6a, 0xa3, 0x76, 0x5c, 0x35, 0x90,
+ 0x45, 0x36, 0x52, 0xb4, 0x2d, 0xa3, 0x55, 0xde, 0x20, 0xf8, 0x80, 0xe1,
+ 0x26, 0x46, 0x1b, 0x3f, 0x59, 0xc7, 0x2e, 0x5b, 0x4a, 0x73, 0xf8, 0xb3,
+ 0xf4, 0x62, 0xf4, 0xf5, 0xa4, 0xc2, 0xae, 0x9e, 0xa6, 0x9c, 0x10, 0xbb,
+ 0xe1, 0xd6, 0x88, 0x75, 0xb9, 0x85, 0x48, 0xe5, 0x7, 0x12, 0xf3, 0x11,
+ 0x85, 0x8e, 0xa2, 0x95, 0x9d, 0xed, 0x50, 0xfb, 0x6, 0x5a, 0x1, 0x37,
+ 0xc4, 0x8e, 0x9e, 0x73, 0x9b, 0x96, 0x64, 0xbd, 0x42, 0xb, 0x80, 0xde,
+ 0x57, 0x86, 0xcb, 0x7d, 0xab, 0x12, 0xb2, 0xcc, 0xe6, 0xea, 0xb5, 0x89,
+ 0xeb, 0x91, 0xb3, 0x93, 0xb2, 0x4f, 0x2f, 0x5b, 0xf3, 0x72, 0x12, 0x51,
+ 0x56, 0x75, 0xb3, 0xdd, 0x49, 0xb6, 0x5b, 0x77, 0xbe, 0xc5, 0xd7, 0xd4,
+ 0xaf, 0xd6, 0x6b, 0x38};
+
+static const uint8_t kFrame2[] = {
+ 0x12, 0x0, 0x32, 0x33, 0x30, 0x3, 0xc3, 0x0, 0xa7, 0x2e, 0x46,
+ 0xa8, 0x80, 0x0, 0x3, 0x0, 0x10, 0x1, 0x0, 0xa0, 0x0, 0xed,
+ 0xb1, 0x51, 0x15, 0x58, 0xc7, 0x69, 0x3, 0x26, 0x35, 0xeb, 0x5a,
+ 0x2d, 0x7a, 0x53, 0x24, 0x26, 0x20, 0xa6, 0x11, 0x7, 0x49, 0x76,
+ 0xa3, 0xc7, 0x62, 0xf8, 0x3, 0x32, 0xb0, 0x98, 0x17, 0x3d, 0x80};
+
+typedef struct DecoderTest {
+ Libgav1Decoder* decoder;
+ int frames_in_use;
+ void* buffer_private_data;
+ void* released_input_buffer;
+} DecoderTest;
+
+static void DecoderTestInit(DecoderTest* test) {
+ test->decoder = NULL;
+ test->frames_in_use = 0;
+ test->buffer_private_data = NULL;
+ test->released_input_buffer = NULL;
+}
+
+static void DecoderTestIncrementFramesInUse(DecoderTest* test) {
+ ++test->frames_in_use;
+}
+
+static void DecoderTestDecrementFramesInUse(DecoderTest* test) {
+ --test->frames_in_use;
+}
+
+static void DecoderTestSetReleasedInputBuffer(DecoderTest* test,
+ void* released_input_buffer) {
+ test->released_input_buffer = released_input_buffer;
+}
+
+static void DecoderTestSetBufferPrivateData(DecoderTest* test,
+ void* buffer_private_data) {
+ test->buffer_private_data = buffer_private_data;
+}
+
+typedef struct FrameBufferPrivate {
+ uint8_t* data[3];
+} FrameBufferPrivate;
+
+static Libgav1StatusCode GetFrameBuffer(
+ void* callback_private_data, int bitdepth, Libgav1ImageFormat image_format,
+ int width, int height, int left_border, int right_border, int top_border,
+ int bottom_border, int stride_alignment, Libgav1FrameBuffer* frame_buffer) {
+ Libgav1FrameBufferInfo info;
+ Libgav1StatusCode status = Libgav1ComputeFrameBufferInfo(
+ bitdepth, image_format, width, height, left_border, right_border,
+ top_border, bottom_border, stride_alignment, &info);
+ if (status != kLibgav1StatusOk) return status;
+
+ FrameBufferPrivate* buffer_private =
+ (FrameBufferPrivate*)malloc(sizeof(FrameBufferPrivate));
+ if (buffer_private == NULL) return kLibgav1StatusOutOfMemory;
+
+ for (int i = 0; i < 3; ++i) {
+ const size_t size = (i == 0) ? info.y_buffer_size : info.uv_buffer_size;
+ buffer_private->data[i] = (uint8_t*)malloc(sizeof(uint8_t) * size);
+ if (buffer_private->data[i] == NULL) {
+ for (int j = 0; j < i; j++) {
+ free(buffer_private->data[j]);
+ }
+ free(buffer_private);
+ return kLibgav1StatusOutOfMemory;
+ }
+ }
+
+ uint8_t* const y_buffer = buffer_private->data[0];
+ uint8_t* const u_buffer =
+ (info.uv_buffer_size != 0) ? buffer_private->data[1] : NULL;
+ uint8_t* const v_buffer =
+ (info.uv_buffer_size != 0) ? buffer_private->data[2] : NULL;
+
+ status = Libgav1SetFrameBuffer(&info, y_buffer, u_buffer, v_buffer,
+ buffer_private, frame_buffer);
+ if (status != kLibgav1StatusOk) return status;
+
+ DecoderTest* const decoder_test = (DecoderTest*)callback_private_data;
+ DecoderTestIncrementFramesInUse(decoder_test);
+ DecoderTestSetBufferPrivateData(decoder_test, frame_buffer->private_data);
+ return kLibgav1StatusOk;
+}
+
+static void ReleaseFrameBuffer(void* callback_private_data,
+ void* buffer_private_data) {
+ FrameBufferPrivate* buffer_private = (FrameBufferPrivate*)buffer_private_data;
+ for (int i = 0; i < 3; ++i) {
+ free(buffer_private->data[i]);
+ }
+ free(buffer_private);
+ DecoderTest* const decoder_test = (DecoderTest*)callback_private_data;
+ DecoderTestDecrementFramesInUse(decoder_test);
+}
+
+static void ReleaseInputBuffer(void* private_data, void* input_buffer) {
+ DecoderTestSetReleasedInputBuffer((DecoderTest*)private_data, input_buffer);
+}
+
+static void DecoderTestSetUp(DecoderTest* test) {
+ Libgav1DecoderSettings settings;
+ Libgav1DecoderSettingsInitDefault(&settings);
+ settings.frame_parallel = 0; // false
+ settings.get_frame_buffer = GetFrameBuffer;
+ settings.release_frame_buffer = ReleaseFrameBuffer;
+ settings.callback_private_data = test;
+ settings.release_input_buffer = ReleaseInputBuffer;
+ ASSERT_EQ(test->decoder, NULL);
+ ASSERT_EQ(Libgav1DecoderCreate(&settings, &test->decoder), kLibgav1StatusOk);
+ ASSERT_NE(test->decoder, NULL);
+}
+
+static void DecoderTestAPIFlowForNonFrameParallelMode(void) {
+ DecoderTest test;
+ DecoderTestInit(&test);
+ DecoderTestSetUp(&test);
+
+ Libgav1StatusCode status;
+ const Libgav1DecoderBuffer* buffer;
+
+ // Enqueue frame1 for decoding.
+ status = Libgav1DecoderEnqueueFrame(test.decoder, kFrame1, sizeof(kFrame1), 0,
+ (uint8_t*)&kFrame1);
+ ASSERT_EQ(status, kLibgav1StatusOk);
+
+ // In non-frame-parallel mode, decoding happens only in the DequeueFrame call.
+ // So there should be no frames in use yet.
+ ASSERT_EQ(test.frames_in_use, 0);
+
+ // Dequeue the output of frame1.
+ status = Libgav1DecoderDequeueFrame(test.decoder, &buffer);
+ ASSERT_EQ(status, kLibgav1StatusOk);
+ ASSERT_NE(buffer, NULL);
+ ASSERT_EQ(test.released_input_buffer, &kFrame1);
+
+ // libgav1 has decoded frame1 and is holding a reference to it.
+ ASSERT_EQ(test.frames_in_use, 1);
+ ASSERT_EQ(test.buffer_private_data, buffer->buffer_private_data);
+
+ // Enqueue frame2 for decoding.
+ status = Libgav1DecoderEnqueueFrame(test.decoder, kFrame2, sizeof(kFrame2), 0,
+ (uint8_t*)&kFrame2);
+ ASSERT_EQ(status, kLibgav1StatusOk);
+
+ ASSERT_EQ(test.frames_in_use, 1);
+
+ // Dequeue the output of frame2.
+ status = Libgav1DecoderDequeueFrame(test.decoder, &buffer);
+ ASSERT_EQ(status, kLibgav1StatusOk);
+ ASSERT_NE(buffer, NULL);
+ ASSERT_EQ(test.released_input_buffer, &kFrame2);
+
+ ASSERT_EQ(test.frames_in_use, 2);
+ ASSERT_EQ(test.buffer_private_data, buffer->buffer_private_data);
+
+ // Signal end of stream (method 1). This should ensure that all the references
+ // are released.
+ status = Libgav1DecoderSignalEOS(test.decoder);
+
+ // libgav1 should have released all the reference frames now.
+ ASSERT_EQ(test.frames_in_use, 0);
+
+ // Now, the decoder is ready to accept a new coded video sequence.
+
+ // Enqueue frame1 for decoding.
+ status = Libgav1DecoderEnqueueFrame(test.decoder, kFrame1, sizeof(kFrame1), 0,
+ (uint8_t*)&kFrame1);
+ ASSERT_EQ(status, kLibgav1StatusOk);
+
+ ASSERT_EQ(test.frames_in_use, 0);
+
+ // Dequeue the output of frame1.
+ status = Libgav1DecoderDequeueFrame(test.decoder, &buffer);
+ ASSERT_EQ(status, kLibgav1StatusOk);
+ ASSERT_NE(buffer, NULL);
+ ASSERT_EQ(test.released_input_buffer, &kFrame1);
+
+ ASSERT_EQ(test.frames_in_use, 1);
+ ASSERT_EQ(test.buffer_private_data, buffer->buffer_private_data);
+
+ // Enqueue frame2 for decoding.
+ status = Libgav1DecoderEnqueueFrame(test.decoder, kFrame2, sizeof(kFrame2), 0,
+ (uint8_t*)&kFrame2);
+ ASSERT_EQ(status, kLibgav1StatusOk);
+
+ ASSERT_EQ(test.frames_in_use, 1);
+
+ // Dequeue the output of frame2.
+ status = Libgav1DecoderDequeueFrame(test.decoder, &buffer);
+ ASSERT_EQ(status, kLibgav1StatusOk);
+ ASSERT_NE(buffer, NULL);
+ ASSERT_EQ(test.released_input_buffer, &kFrame2);
+
+ ASSERT_EQ(test.frames_in_use, 2);
+ ASSERT_EQ(test.buffer_private_data, buffer->buffer_private_data);
+
+ // Signal end of stream (method 2). This should ensure that all the references
+ // are released.
+ Libgav1DecoderDestroy(test.decoder);
+ test.decoder = NULL;
+
+ // libgav1 should have released all the frames now.
+ ASSERT_EQ(test.frames_in_use, 0);
+}
+
+static void
+DecoderTestNonFrameParallelModeEnqueueMultipleFramesWithoutDequeuing(void) {
+ DecoderTest test;
+ DecoderTestInit(&test);
+ DecoderTestSetUp(&test);
+
+ Libgav1StatusCode status;
+ const Libgav1DecoderBuffer* buffer;
+
+ // Enqueue frame1 for decoding.
+ status = Libgav1DecoderEnqueueFrame(test.decoder, kFrame1, sizeof(kFrame1), 0,
+ (uint8_t*)&kFrame1);
+ ASSERT_EQ(status, kLibgav1StatusOk);
+
+ // Until the output of frame1 is dequeued, no other frames can be enqueued.
+ status = Libgav1DecoderEnqueueFrame(test.decoder, kFrame2, sizeof(kFrame2), 0,
+ (uint8_t*)&kFrame2);
+ ASSERT_EQ(status, kLibgav1StatusTryAgain);
+
+ ASSERT_EQ(test.frames_in_use, 0);
+
+ // Dequeue the output of frame1.
+ status = Libgav1DecoderDequeueFrame(test.decoder, &buffer);
+ ASSERT_EQ(status, kLibgav1StatusOk);
+ ASSERT_NE(buffer, NULL);
+ ASSERT_EQ(test.released_input_buffer, &kFrame1);
+
+ ASSERT_EQ(test.frames_in_use, 1);
+
+ // Delete the decoder instance.
+ Libgav1DecoderDestroy(test.decoder);
+ test.decoder = NULL;
+
+ ASSERT_EQ(test.frames_in_use, 0);
+}
+
+static void DecoderTestNonFrameParallelModeEOSBeforeDequeuingLastFrame(void) {
+ DecoderTest test;
+ DecoderTestInit(&test);
+ DecoderTestSetUp(&test);
+
+ Libgav1StatusCode status;
+ const Libgav1DecoderBuffer* buffer;
+
+ // Enqueue frame1 for decoding.
+ status = Libgav1DecoderEnqueueFrame(test.decoder, kFrame1, sizeof(kFrame1), 0,
+ (uint8_t*)&kFrame1);
+ ASSERT_EQ(status, kLibgav1StatusOk);
+
+ ASSERT_EQ(test.frames_in_use, 0);
+
+ // Dequeue the output of frame1.
+ status = Libgav1DecoderDequeueFrame(test.decoder, &buffer);
+ ASSERT_EQ(status, kLibgav1StatusOk);
+ ASSERT_NE(buffer, NULL);
+ ASSERT_EQ(test.released_input_buffer, &kFrame1);
+
+ // Enqueue frame2 for decoding.
+ status = Libgav1DecoderEnqueueFrame(test.decoder, kFrame2, sizeof(kFrame2), 0,
+ (uint8_t*)&kFrame2);
+ ASSERT_EQ(status, kLibgav1StatusOk);
+
+ ASSERT_EQ(test.frames_in_use, 1);
+
+ // Signal end of stream before dequeuing the output of frame2.
+ status = Libgav1DecoderSignalEOS(test.decoder);
+ ASSERT_EQ(status, kLibgav1StatusOk);
+
+ // In this case, the output of the last frame that was enqueued is lost (which
+ // is intentional since end of stream was signaled without dequeueing it).
+ ASSERT_EQ(test.frames_in_use, 0);
+
+ Libgav1DecoderDestroy(test.decoder);
+ test.decoder = NULL;
+}
+
+static void DecoderTestNonFrameParallelModeInvalidFrameAfterEOS(void) {
+ DecoderTest test;
+ DecoderTestInit(&test);
+ DecoderTestSetUp(&test);
+
+ Libgav1StatusCode status;
+ const Libgav1DecoderBuffer* buffer = NULL;
+
+ // Enqueue frame1 for decoding.
+ status = Libgav1DecoderEnqueueFrame(test.decoder, kFrame1, sizeof(kFrame1), 0,
+ (uint8_t*)&kFrame1);
+ ASSERT_EQ(status, kLibgav1StatusOk);
+
+ ASSERT_EQ(test.frames_in_use, 0);
+
+ // Dequeue the output of frame1.
+ status = Libgav1DecoderDequeueFrame(test.decoder, &buffer);
+ ASSERT_EQ(status, kLibgav1StatusOk);
+ ASSERT_NE(buffer, NULL);
+ ASSERT_EQ(test.released_input_buffer, &kFrame1);
+
+ ASSERT_EQ(test.frames_in_use, 1);
+
+ // Signal end of stream.
+ status = Libgav1DecoderSignalEOS(test.decoder);
+
+ // libgav1 should have released all the reference frames now.
+ ASSERT_EQ(test.frames_in_use, 0);
+
+ // Now, the decoder is ready to accept a new coded video sequence. But, we
+ // try to enqueue a frame that does not have a sequence header (which is not
+ // allowed).
+
+ // Enqueue frame2 for decoding.
+ status = Libgav1DecoderEnqueueFrame(test.decoder, kFrame2, sizeof(kFrame2), 0,
+ (uint8_t*)&kFrame2);
+ ASSERT_EQ(status, kLibgav1StatusOk);
+
+ ASSERT_EQ(test.frames_in_use, 0);
+
+ // Dequeue the output of frame2 (this will fail since no sequence header has
+ // been seen since the last EOS signal).
+ status = Libgav1DecoderDequeueFrame(test.decoder, &buffer);
+ ASSERT_EQ(status, kLibgav1StatusBitstreamError);
+ ASSERT_EQ(test.released_input_buffer, &kFrame2);
+
+ ASSERT_EQ(test.frames_in_use, 0);
+
+ Libgav1DecoderDestroy(test.decoder);
+ test.decoder = NULL;
+}
+
+int main(void) {
+ fprintf(stderr, "C DecoderTest started\n");
+ DecoderTestAPIFlowForNonFrameParallelMode();
+ DecoderTestNonFrameParallelModeEnqueueMultipleFramesWithoutDequeuing();
+ DecoderTestNonFrameParallelModeEOSBeforeDequeuingLastFrame();
+ DecoderTestNonFrameParallelModeInvalidFrameAfterEOS();
+ fprintf(stderr, "C DecoderTest passed\n");
+ return 0;
+}
diff --git a/src/c_version_test.c b/src/c_version_test.c
new file mode 100644
index 0000000..e198ee7
--- /dev/null
+++ b/src/c_version_test.c
@@ -0,0 +1,102 @@
+/*
+ * Copyright 2021 The libgav1 Authors
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifdef __cplusplus
+#error Do not compile this file with a C++ compiler
+#endif
+
+// clang-format off
+#include "src/gav1/version.h"
+// clang-format on
+
+#include <stddef.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#define ASSERT_EQ(a, b) \
+ do { \
+ if ((a) != (b)) { \
+ fprintf(stderr, "Assertion failure: (%s) == (%s), at %s:%d\n", #a, #b, \
+ __FILE__, __LINE__); \
+ fprintf(stderr, "C VersionTest failed\n"); \
+ exit(1); \
+ } \
+ } while (0)
+
+#define ASSERT_NE(a, b) \
+ do { \
+ if ((a) == (b)) { \
+ fprintf(stderr, "Assertion failure: (%s) != (%s), at %s:%d\n", #a, #b, \
+ __FILE__, __LINE__); \
+ fprintf(stderr, "C VersionTest failed\n"); \
+ exit(1); \
+ } \
+ } while (0)
+
+#define ASSERT_TRUE(a) \
+ do { \
+ if (!(a)) { \
+ fprintf(stderr, "Assertion failure: %s, at %s:%d\n", #a, __FILE__, \
+ __LINE__); \
+ fprintf(stderr, "C VersionTest failed\n"); \
+ exit(1); \
+ } \
+ } while (0)
+
+#define ASSERT_FALSE(a) \
+ do { \
+ if (a) { \
+ fprintf(stderr, "Assertion failure: !(%s), at %s:%d\n", #a, __FILE__, \
+ __LINE__); \
+ fprintf(stderr, "C VersionTest failed\n"); \
+ exit(1); \
+ } \
+ } while (0)
+
+static void VersionTestGetVersion(void) {
+ const int library_version = Libgav1GetVersion();
+ ASSERT_EQ((library_version >> 24) & 0xff, 0);
+ // Note if we link against a shared object there's potential for a mismatch
+ // if a different library is loaded at runtime.
+ ASSERT_EQ((library_version >> 16) & 0xff, LIBGAV1_MAJOR_VERSION);
+ ASSERT_EQ((library_version >> 8) & 0xff, LIBGAV1_MINOR_VERSION);
+ ASSERT_EQ(library_version & 0xff, LIBGAV1_PATCH_VERSION);
+
+ const int header_version = LIBGAV1_VERSION;
+ ASSERT_EQ((header_version >> 24) & 0xff, 0);
+ ASSERT_EQ((header_version >> 16) & 0xff, LIBGAV1_MAJOR_VERSION);
+ ASSERT_EQ((header_version >> 8) & 0xff, LIBGAV1_MINOR_VERSION);
+ ASSERT_EQ(header_version & 0xff, LIBGAV1_PATCH_VERSION);
+}
+
+static void VersionTestGetVersionString(void) {
+ const char* version = Libgav1GetVersionString();
+ ASSERT_NE(version, NULL);
+}
+
+static void VersionTestGetBuildConfiguration(void) {
+ const char* config = Libgav1GetBuildConfiguration();
+ ASSERT_NE(config, NULL);
+}
+
+int main(void) {
+ fprintf(stderr, "C VersionTest started\n");
+ VersionTestGetVersion();
+ VersionTestGetVersionString();
+ VersionTestGetBuildConfiguration();
+ fprintf(stderr, "C VersionTest passed\n");
+ return 0;
+}
diff --git a/src/decoder_buffer_test.cc b/src/decoder_buffer_test.cc
new file mode 100644
index 0000000..b1d8bb8
--- /dev/null
+++ b/src/decoder_buffer_test.cc
@@ -0,0 +1,38 @@
+// Copyright 2021 The libgav1 Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "src/gav1/decoder_buffer.h"
+
+#include "gtest/gtest.h"
+
+namespace libgav1 {
+namespace {
+
+// Tests the emulation of C++ enumerators by constexpr constants.
+TEST(DecoderBufferTest, EnumTest) {
+ ColorRange color_range = kLibgav1ColorRangeFull;
+
+ // Verify that we get the -Wswitch warning unless the switch statement
+ // handles both kColorRangeStudio and kColorRangeFull:
+ // enumeration value 'kLibgav1ColorRangeFull' not handled in switch
+ switch (color_range) {
+ case kColorRangeStudio:
+ break;
+ case kColorRangeFull:
+ break;
+ }
+}
+
+} // namespace
+} // namespace libgav1
diff --git a/src/decoder_impl.cc b/src/decoder_impl.cc
index e23903c..dbb9e81 100644
--- a/src/decoder_impl.cc
+++ b/src/decoder_impl.cc
@@ -1232,7 +1232,7 @@ StatusCode DecoderImpl::DecodeTiles(
LIBGAV1_DLOG(ERROR, "Failed to allocate memory for the decoder buffer.");
return kStatusOutOfMemory;
}
- if (sequence_header.enable_cdef) {
+ if (frame_header.cdef.bits > 0) {
if (!frame_scratch_buffer->cdef_index.Reset(
DivideBy16(frame_header.rows4x4 + kMaxBlockHeight4x4),
DivideBy16(frame_header.columns4x4 + kMaxBlockWidth4x4),
@@ -1241,6 +1241,15 @@ StatusCode DecoderImpl::DecodeTiles(
return kStatusOutOfMemory;
}
}
+ if (do_cdef) {
+ if (!frame_scratch_buffer->cdef_skip.Reset(
+ DivideBy2(frame_header.rows4x4 + kMaxBlockHeight4x4),
+ DivideBy16(frame_header.columns4x4 + kMaxBlockWidth4x4),
+ /*zero_initialize=*/true)) {
+ LIBGAV1_DLOG(ERROR, "Failed to allocate memory for cdef skip.");
+ return kStatusOutOfMemory;
+ }
+ }
if (!frame_scratch_buffer->inter_transform_sizes.Reset(
frame_header.rows4x4 + kMaxBlockHeight4x4,
frame_header.columns4x4 + kMaxBlockWidth4x4,
@@ -1364,23 +1373,39 @@ StatusCode DecoderImpl::DecodeTiles(
const int pixel_size = sequence_header.color_config.bitdepth == 8
? sizeof(uint8_t)
: sizeof(uint16_t);
+ const int coefficients_size = kSuperResFilterTaps *
+ Align(frame_header.upscaled_width, 16) *
+ pixel_size;
if (!frame_scratch_buffer->superres_coefficients[kPlaneTypeY].Resize(
- kSuperResFilterTaps * Align(frame_header.upscaled_width, 16) *
- pixel_size)) {
+ coefficients_size)) {
LIBGAV1_DLOG(ERROR,
"Failed to Resize superres_coefficients[kPlaneTypeY].");
return kStatusOutOfMemory;
}
+#if LIBGAV1_MSAN
+ // Quiet SuperRes_NEON() msan warnings.
+ memset(frame_scratch_buffer->superres_coefficients[kPlaneTypeY].get(), 0,
+ coefficients_size);
+#endif
+ const int uv_coefficients_size =
+ kSuperResFilterTaps *
+ Align(SubsampledValue(frame_header.upscaled_width, 1), 16) * pixel_size;
if (!sequence_header.color_config.is_monochrome &&
sequence_header.color_config.subsampling_x != 0 &&
!frame_scratch_buffer->superres_coefficients[kPlaneTypeUV].Resize(
- kSuperResFilterTaps *
- Align(SubsampledValue(frame_header.upscaled_width, 1), 16) *
- pixel_size)) {
+ uv_coefficients_size)) {
LIBGAV1_DLOG(ERROR,
"Failed to Resize superres_coefficients[kPlaneTypeUV].");
return kStatusOutOfMemory;
}
+#if LIBGAV1_MSAN
+ if (!sequence_header.color_config.is_monochrome &&
+ sequence_header.color_config.subsampling_x != 0) {
+ // Quiet SuperRes_NEON() msan warnings.
+ memset(frame_scratch_buffer->superres_coefficients[kPlaneTypeUV].get(), 0,
+ uv_coefficients_size);
+ }
+#endif
}
if (do_superres && threading_strategy.post_filter_thread_pool() != nullptr) {
@@ -1405,10 +1430,6 @@ StatusCode DecoderImpl::DecodeTiles(
}
}
- PostFilter post_filter(frame_header, sequence_header, frame_scratch_buffer,
- current_frame->buffer(), dsp,
- settings_.post_filter_mask);
-
if (is_frame_parallel_ && !IsIntraFrame(frame_header.frame_type)) {
// We can parse the current frame if all the reference frames have been
// parsed.
@@ -1477,6 +1498,9 @@ StatusCode DecoderImpl::DecodeTiles(
}
}
+ PostFilter post_filter(frame_header, sequence_header, frame_scratch_buffer,
+ current_frame->buffer(), dsp,
+ settings_.post_filter_mask);
SymbolDecoderContext saved_symbol_decoder_context;
BlockingCounterWithStatus pending_tiles(tile_count);
for (int tile_number = 0; tile_number < tile_count; ++tile_number) {
diff --git a/src/decoder_test.cc b/src/decoder_test.cc
new file mode 100644
index 0000000..de7d490
--- /dev/null
+++ b/src/decoder_test.cc
@@ -0,0 +1,352 @@
+// Copyright 2021 The libgav1 Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "src/gav1/decoder.h"
+
+#include <cstddef>
+#include <cstdint>
+#include <memory>
+#include <new>
+
+#include "gtest/gtest.h"
+
+namespace libgav1 {
+namespace {
+
+// These two frames come from the libaom test vector av1-1-b8-01-size-32x32.ivf
+constexpr uint8_t kFrame1[] = {
+ 0x12, 0x0, 0xa, 0xa, 0x0, 0x0, 0x0, 0x2, 0x27, 0xfe, 0xff, 0xfc,
+ 0xc0, 0x20, 0x32, 0x93, 0x2, 0x10, 0x0, 0xa8, 0x80, 0x0, 0x3, 0x0,
+ 0x10, 0x10, 0x30, 0x0, 0xd3, 0xc6, 0xc6, 0x82, 0xaa, 0x5e, 0xbf, 0x82,
+ 0xf2, 0xa4, 0xa4, 0x29, 0xab, 0xda, 0xd7, 0x1, 0x5, 0x0, 0xb3, 0xde,
+ 0xa8, 0x6f, 0x8d, 0xbf, 0x1b, 0xa8, 0x25, 0xc3, 0x84, 0x7c, 0x1a, 0x2b,
+ 0x8b, 0x0, 0xff, 0x19, 0x1f, 0x45, 0x7e, 0xe0, 0xbe, 0xe1, 0x3a, 0x63,
+ 0xc2, 0xc6, 0x6e, 0xf4, 0xc8, 0xce, 0x11, 0xe1, 0x9f, 0x48, 0x64, 0x72,
+ 0xeb, 0xbb, 0x4f, 0xf3, 0x94, 0xb4, 0xb6, 0x9d, 0x4f, 0x4, 0x18, 0x5e,
+ 0x5e, 0x1b, 0x65, 0x49, 0x74, 0x90, 0x13, 0x50, 0xef, 0x8c, 0xb8, 0xe8,
+ 0xd9, 0x8e, 0x9c, 0xc9, 0x4d, 0xda, 0x60, 0x6a, 0xa, 0xf9, 0x75, 0xd0,
+ 0x62, 0x69, 0xd, 0xf5, 0xdc, 0xa9, 0xb9, 0x4c, 0x8, 0x9e, 0x33, 0x15,
+ 0xa3, 0xe1, 0x42, 0x0, 0xe2, 0xb0, 0x46, 0xd0, 0xf7, 0xad, 0x55, 0xbc,
+ 0x75, 0xe9, 0xe3, 0x1f, 0xa3, 0x41, 0x11, 0xba, 0xaa, 0x81, 0xf3, 0xcb,
+ 0x82, 0x87, 0x71, 0x0, 0xe6, 0xb9, 0x8c, 0xe1, 0xe9, 0xd3, 0x21, 0xcc,
+ 0xcd, 0xe7, 0x12, 0xb9, 0xe, 0x43, 0x6a, 0xa3, 0x76, 0x5c, 0x35, 0x90,
+ 0x45, 0x36, 0x52, 0xb4, 0x2d, 0xa3, 0x55, 0xde, 0x20, 0xf8, 0x80, 0xe1,
+ 0x26, 0x46, 0x1b, 0x3f, 0x59, 0xc7, 0x2e, 0x5b, 0x4a, 0x73, 0xf8, 0xb3,
+ 0xf4, 0x62, 0xf4, 0xf5, 0xa4, 0xc2, 0xae, 0x9e, 0xa6, 0x9c, 0x10, 0xbb,
+ 0xe1, 0xd6, 0x88, 0x75, 0xb9, 0x85, 0x48, 0xe5, 0x7, 0x12, 0xf3, 0x11,
+ 0x85, 0x8e, 0xa2, 0x95, 0x9d, 0xed, 0x50, 0xfb, 0x6, 0x5a, 0x1, 0x37,
+ 0xc4, 0x8e, 0x9e, 0x73, 0x9b, 0x96, 0x64, 0xbd, 0x42, 0xb, 0x80, 0xde,
+ 0x57, 0x86, 0xcb, 0x7d, 0xab, 0x12, 0xb2, 0xcc, 0xe6, 0xea, 0xb5, 0x89,
+ 0xeb, 0x91, 0xb3, 0x93, 0xb2, 0x4f, 0x2f, 0x5b, 0xf3, 0x72, 0x12, 0x51,
+ 0x56, 0x75, 0xb3, 0xdd, 0x49, 0xb6, 0x5b, 0x77, 0xbe, 0xc5, 0xd7, 0xd4,
+ 0xaf, 0xd6, 0x6b, 0x38};
+
+constexpr uint8_t kFrame2[] = {
+ 0x12, 0x0, 0x32, 0x33, 0x30, 0x3, 0xc3, 0x0, 0xa7, 0x2e, 0x46,
+ 0xa8, 0x80, 0x0, 0x3, 0x0, 0x10, 0x1, 0x0, 0xa0, 0x0, 0xed,
+ 0xb1, 0x51, 0x15, 0x58, 0xc7, 0x69, 0x3, 0x26, 0x35, 0xeb, 0x5a,
+ 0x2d, 0x7a, 0x53, 0x24, 0x26, 0x20, 0xa6, 0x11, 0x7, 0x49, 0x76,
+ 0xa3, 0xc7, 0x62, 0xf8, 0x3, 0x32, 0xb0, 0x98, 0x17, 0x3d, 0x80};
+
+class DecoderTest : public testing::Test {
+ public:
+ void SetUp() override;
+ void IncrementFramesInUse() { ++frames_in_use_; }
+ void DecrementFramesInUse() { --frames_in_use_; }
+ void SetBufferPrivateData(void* buffer_private_data) {
+ buffer_private_data_ = buffer_private_data;
+ }
+ void SetReleasedInputBuffer(void* released_input_buffer) {
+ released_input_buffer_ = released_input_buffer;
+ }
+
+ protected:
+ std::unique_ptr<Decoder> decoder_;
+ int frames_in_use_ = 0;
+ void* buffer_private_data_ = nullptr;
+ void* released_input_buffer_ = nullptr;
+};
+
+struct FrameBufferPrivate {
+ uint8_t* data[3];
+};
+
+extern "C" {
+
+static Libgav1StatusCode GetFrameBuffer(
+ void* callback_private_data, int bitdepth, Libgav1ImageFormat image_format,
+ int width, int height, int left_border, int right_border, int top_border,
+ int bottom_border, int stride_alignment, Libgav1FrameBuffer* frame_buffer) {
+ Libgav1FrameBufferInfo info;
+ Libgav1StatusCode status = Libgav1ComputeFrameBufferInfo(
+ bitdepth, image_format, width, height, left_border, right_border,
+ top_border, bottom_border, stride_alignment, &info);
+ if (status != kLibgav1StatusOk) return status;
+
+ std::unique_ptr<FrameBufferPrivate> buffer_private(new (std::nothrow)
+ FrameBufferPrivate);
+ if (buffer_private == nullptr) return kLibgav1StatusOutOfMemory;
+
+ for (int i = 0; i < 3; ++i) {
+ const size_t size = (i == 0) ? info.y_buffer_size : info.uv_buffer_size;
+ buffer_private->data[i] = new (std::nothrow) uint8_t[size];
+ if (buffer_private->data[i] == nullptr) {
+ return kLibgav1StatusOutOfMemory;
+ }
+ }
+
+ uint8_t* const y_buffer = buffer_private->data[0];
+ uint8_t* const u_buffer =
+ (info.uv_buffer_size != 0) ? buffer_private->data[1] : nullptr;
+ uint8_t* const v_buffer =
+ (info.uv_buffer_size != 0) ? buffer_private->data[2] : nullptr;
+
+ status = Libgav1SetFrameBuffer(&info, y_buffer, u_buffer, v_buffer,
+ buffer_private.release(), frame_buffer);
+ if (status != kLibgav1StatusOk) return status;
+
+ auto* const decoder_test = static_cast<DecoderTest*>(callback_private_data);
+ decoder_test->IncrementFramesInUse();
+ decoder_test->SetBufferPrivateData(frame_buffer->private_data);
+ return kLibgav1StatusOk;
+}
+
+static void ReleaseFrameBuffer(void* callback_private_data,
+ void* buffer_private_data) {
+ auto* buffer_private = static_cast<FrameBufferPrivate*>(buffer_private_data);
+ for (auto& data : buffer_private->data) {
+ delete[] data;
+ }
+ delete buffer_private;
+ auto* const decoder_test = static_cast<DecoderTest*>(callback_private_data);
+ decoder_test->DecrementFramesInUse();
+}
+
+static void ReleaseInputBuffer(void* private_data, void* input_buffer) {
+ auto* const decoder_test = static_cast<DecoderTest*>(private_data);
+ decoder_test->SetReleasedInputBuffer(input_buffer);
+}
+
+} // extern "C"
+
+void DecoderTest::SetUp() {
+ decoder_.reset(new (std::nothrow) Decoder());
+ ASSERT_NE(decoder_, nullptr);
+ DecoderSettings settings = {};
+ settings.frame_parallel = false;
+ settings.get_frame_buffer = GetFrameBuffer;
+ settings.release_frame_buffer = ReleaseFrameBuffer;
+ settings.callback_private_data = this;
+ settings.release_input_buffer = ReleaseInputBuffer;
+ ASSERT_EQ(decoder_->Init(&settings), kStatusOk);
+}
+
+TEST_F(DecoderTest, APIFlowForNonFrameParallelMode) {
+ StatusCode status;
+ const DecoderBuffer* buffer;
+
+ // Enqueue frame1 for decoding.
+ status = decoder_->EnqueueFrame(kFrame1, sizeof(kFrame1), 0,
+ const_cast<uint8_t*>(kFrame1));
+ ASSERT_EQ(status, kStatusOk);
+
+ // In non-frame-parallel mode, decoding happens only in the DequeueFrame call.
+ // So there should be no frames in use yet.
+ EXPECT_EQ(frames_in_use_, 0);
+
+ // Dequeue the output of frame1.
+ status = decoder_->DequeueFrame(&buffer);
+ ASSERT_EQ(status, kStatusOk);
+ ASSERT_NE(buffer, nullptr);
+ EXPECT_EQ(released_input_buffer_, &kFrame1);
+
+ // libgav1 has decoded frame1 and is holding a reference to it.
+ EXPECT_EQ(frames_in_use_, 1);
+ EXPECT_EQ(buffer_private_data_, buffer->buffer_private_data);
+
+ // Enqueue frame2 for decoding.
+ status = decoder_->EnqueueFrame(kFrame2, sizeof(kFrame2), 0,
+ const_cast<uint8_t*>(kFrame2));
+ ASSERT_EQ(status, kStatusOk);
+
+ EXPECT_EQ(frames_in_use_, 1);
+
+ // Dequeue the output of frame2.
+ status = decoder_->DequeueFrame(&buffer);
+ ASSERT_EQ(status, kStatusOk);
+ ASSERT_NE(buffer, nullptr);
+ EXPECT_EQ(released_input_buffer_, &kFrame2);
+
+ EXPECT_EQ(frames_in_use_, 2);
+ EXPECT_EQ(buffer_private_data_, buffer->buffer_private_data);
+
+ // Signal end of stream (method 1). This should ensure that all the references
+ // are released.
+ status = decoder_->SignalEOS();
+
+ // libgav1 should have released all the reference frames now.
+ EXPECT_EQ(frames_in_use_, 0);
+
+ // Now, the decoder is ready to accept a new coded video sequence.
+
+ // Enqueue frame1 for decoding.
+ status = decoder_->EnqueueFrame(kFrame1, sizeof(kFrame1), 0,
+ const_cast<uint8_t*>(kFrame1));
+ ASSERT_EQ(status, kStatusOk);
+
+ EXPECT_EQ(frames_in_use_, 0);
+
+ // Dequeue the output of frame1.
+ status = decoder_->DequeueFrame(&buffer);
+ ASSERT_EQ(status, kStatusOk);
+ ASSERT_NE(buffer, nullptr);
+ EXPECT_EQ(released_input_buffer_, &kFrame1);
+
+ EXPECT_EQ(frames_in_use_, 1);
+ EXPECT_EQ(buffer_private_data_, buffer->buffer_private_data);
+
+ // Enqueue frame2 for decoding.
+ status = decoder_->EnqueueFrame(kFrame2, sizeof(kFrame2), 0,
+ const_cast<uint8_t*>(kFrame2));
+ ASSERT_EQ(status, kStatusOk);
+
+ EXPECT_EQ(frames_in_use_, 1);
+
+ // Dequeue the output of frame2.
+ status = decoder_->DequeueFrame(&buffer);
+ ASSERT_EQ(status, kStatusOk);
+ ASSERT_NE(buffer, nullptr);
+ EXPECT_EQ(released_input_buffer_, &kFrame2);
+
+ EXPECT_EQ(frames_in_use_, 2);
+ EXPECT_EQ(buffer_private_data_, buffer->buffer_private_data);
+
+ // Signal end of stream (method 2). This should ensure that all the references
+ // are released.
+ decoder_ = nullptr;
+
+ // libgav1 should have released all the frames now.
+ EXPECT_EQ(frames_in_use_, 0);
+}
+
+TEST_F(DecoderTest, NonFrameParallelModeEnqueueMultipleFramesWithoutDequeuing) {
+ StatusCode status;
+ const DecoderBuffer* buffer;
+
+ // Enqueue frame1 for decoding.
+ status = decoder_->EnqueueFrame(kFrame1, sizeof(kFrame1), 0,
+ const_cast<uint8_t*>(kFrame1));
+ ASSERT_EQ(status, kStatusOk);
+
+ // Until the output of frame1 is dequeued, no other frames can be enqueued.
+ status = decoder_->EnqueueFrame(kFrame2, sizeof(kFrame2), 0,
+ const_cast<uint8_t*>(kFrame2));
+ ASSERT_EQ(status, kStatusTryAgain);
+
+ EXPECT_EQ(frames_in_use_, 0);
+
+ // Dequeue the output of frame1.
+ status = decoder_->DequeueFrame(&buffer);
+ ASSERT_EQ(status, kStatusOk);
+ ASSERT_NE(buffer, nullptr);
+ EXPECT_EQ(released_input_buffer_, &kFrame1);
+
+ EXPECT_EQ(frames_in_use_, 1);
+
+ // Delete the decoder instance.
+ decoder_ = nullptr;
+
+ EXPECT_EQ(frames_in_use_, 0);
+}
+
+TEST_F(DecoderTest, NonFrameParallelModeEOSBeforeDequeuingLastFrame) {
+ StatusCode status;
+ const DecoderBuffer* buffer;
+
+ // Enqueue frame1 for decoding.
+ status = decoder_->EnqueueFrame(kFrame1, sizeof(kFrame1), 0,
+ const_cast<uint8_t*>(kFrame1));
+ ASSERT_EQ(status, kStatusOk);
+
+ EXPECT_EQ(frames_in_use_, 0);
+
+ // Dequeue the output of frame1.
+ status = decoder_->DequeueFrame(&buffer);
+ ASSERT_EQ(status, kStatusOk);
+ ASSERT_NE(buffer, nullptr);
+ EXPECT_EQ(released_input_buffer_, &kFrame1);
+
+ // Enqueue frame2 for decoding.
+ status = decoder_->EnqueueFrame(kFrame2, sizeof(kFrame2), 0,
+ const_cast<uint8_t*>(kFrame2));
+ ASSERT_EQ(status, kStatusOk);
+
+ EXPECT_EQ(frames_in_use_, 1);
+
+ // Signal end of stream before dequeuing the output of frame2.
+ status = decoder_->SignalEOS();
+ ASSERT_EQ(status, kStatusOk);
+
+ // In this case, the output of the last frame that was enqueued is lost (which
+ // is intentional since end of stream was signaled without dequeueing it).
+ EXPECT_EQ(frames_in_use_, 0);
+}
+
+TEST_F(DecoderTest, NonFrameParallelModeInvalidFrameAfterEOS) {
+ StatusCode status;
+ const DecoderBuffer* buffer = nullptr;
+
+ // Enqueue frame1 for decoding.
+ status = decoder_->EnqueueFrame(kFrame1, sizeof(kFrame1), 0,
+ const_cast<uint8_t*>(kFrame1));
+ ASSERT_EQ(status, kStatusOk);
+
+ EXPECT_EQ(frames_in_use_, 0);
+
+ // Dequeue the output of frame1.
+ status = decoder_->DequeueFrame(&buffer);
+ ASSERT_EQ(status, kStatusOk);
+ ASSERT_NE(buffer, nullptr);
+ EXPECT_EQ(released_input_buffer_, &kFrame1);
+
+ EXPECT_EQ(frames_in_use_, 1);
+
+ // Signal end of stream.
+ status = decoder_->SignalEOS();
+
+ // libgav1 should have released all the reference frames now.
+ EXPECT_EQ(frames_in_use_, 0);
+
+ // Now, the decoder is ready to accept a new coded video sequence. But, we
+ // try to enqueue a frame that does not have a sequence header (which is not
+ // allowed).
+
+ // Enqueue frame2 for decoding.
+ status = decoder_->EnqueueFrame(kFrame2, sizeof(kFrame2), 0,
+ const_cast<uint8_t*>(kFrame2));
+ ASSERT_EQ(status, kStatusOk);
+
+ EXPECT_EQ(frames_in_use_, 0);
+
+ // Dequeue the output of frame2 (this will fail since no sequence header has
+ // been seen since the last EOS signal).
+ status = decoder_->DequeueFrame(&buffer);
+ ASSERT_EQ(status, kStatusBitstreamError);
+ EXPECT_EQ(released_input_buffer_, &kFrame2);
+
+ EXPECT_EQ(frames_in_use_, 0);
+}
+
+} // namespace
+} // namespace libgav1
diff --git a/src/dsp/arm/average_blend_neon.cc b/src/dsp/arm/average_blend_neon.cc
index 5b4c094..3603750 100644
--- a/src/dsp/arm/average_blend_neon.cc
+++ b/src/dsp/arm/average_blend_neon.cc
@@ -40,17 +40,19 @@ constexpr int kInterPostRoundBit =
namespace low_bitdepth {
namespace {
-inline uint8x8_t AverageBlend8Row(const int16_t* prediction_0,
- const int16_t* prediction_1) {
+inline uint8x8_t AverageBlend8Row(const int16_t* LIBGAV1_RESTRICT prediction_0,
+ const int16_t* LIBGAV1_RESTRICT
+ prediction_1) {
const int16x8_t pred0 = vld1q_s16(prediction_0);
const int16x8_t pred1 = vld1q_s16(prediction_1);
const int16x8_t res = vaddq_s16(pred0, pred1);
return vqrshrun_n_s16(res, kInterPostRoundBit + 1);
}
-inline void AverageBlendLargeRow(const int16_t* prediction_0,
- const int16_t* prediction_1, const int width,
- uint8_t* dest) {
+inline void AverageBlendLargeRow(const int16_t* LIBGAV1_RESTRICT prediction_0,
+ const int16_t* LIBGAV1_RESTRICT prediction_1,
+ const int width,
+ uint8_t* LIBGAV1_RESTRICT dest) {
int x = width;
do {
const int16x8_t pred_00 = vld1q_s16(prediction_0);
@@ -71,8 +73,10 @@ inline void AverageBlendLargeRow(const int16_t* prediction_0,
} while (x != 0);
}
-void AverageBlend_NEON(const void* prediction_0, const void* prediction_1,
- const int width, const int height, void* const dest,
+void AverageBlend_NEON(const void* LIBGAV1_RESTRICT prediction_0,
+ const void* LIBGAV1_RESTRICT prediction_1,
+ const int width, const int height,
+ void* LIBGAV1_RESTRICT const dest,
const ptrdiff_t dest_stride) {
auto* dst = static_cast<uint8_t*>(dest);
const auto* pred_0 = static_cast<const int16_t*>(prediction_0);
@@ -139,10 +143,10 @@ void Init8bpp() {
namespace high_bitdepth {
namespace {
-inline uint16x8_t AverageBlend8Row(const uint16_t* prediction_0,
- const uint16_t* prediction_1,
- const int32x4_t compound_offset,
- const uint16x8_t v_bitdepth) {
+inline uint16x8_t AverageBlend8Row(
+ const uint16_t* LIBGAV1_RESTRICT prediction_0,
+ const uint16_t* LIBGAV1_RESTRICT prediction_1,
+ const int32x4_t compound_offset, const uint16x8_t v_bitdepth) {
const uint16x8_t pred0 = vld1q_u16(prediction_0);
const uint16x8_t pred1 = vld1q_u16(prediction_1);
const uint32x4_t pred_lo =
@@ -158,9 +162,10 @@ inline uint16x8_t AverageBlend8Row(const uint16_t* prediction_0,
return vminq_u16(vcombine_u16(res_lo, res_hi), v_bitdepth);
}
-inline void AverageBlendLargeRow(const uint16_t* prediction_0,
- const uint16_t* prediction_1, const int width,
- uint16_t* dest,
+inline void AverageBlendLargeRow(const uint16_t* LIBGAV1_RESTRICT prediction_0,
+ const uint16_t* LIBGAV1_RESTRICT prediction_1,
+ const int width,
+ uint16_t* LIBGAV1_RESTRICT dest,
const int32x4_t compound_offset,
const uint16x8_t v_bitdepth) {
int x = width;
@@ -181,8 +186,10 @@ inline void AverageBlendLargeRow(const uint16_t* prediction_0,
} while (x != 0);
}
-void AverageBlend_NEON(const void* prediction_0, const void* prediction_1,
- const int width, const int height, void* const dest,
+void AverageBlend_NEON(const void* LIBGAV1_RESTRICT prediction_0,
+ const void* LIBGAV1_RESTRICT prediction_1,
+ const int width, const int height,
+ void* LIBGAV1_RESTRICT const dest,
const ptrdiff_t dest_stride) {
auto* dst = static_cast<uint16_t*>(dest);
const auto* pred_0 = static_cast<const uint16_t*>(prediction_0);
diff --git a/src/dsp/arm/cdef_neon.cc b/src/dsp/arm/cdef_neon.cc
index 60c72d6..da271f2 100644
--- a/src/dsp/arm/cdef_neon.cc
+++ b/src/dsp/arm/cdef_neon.cc
@@ -33,7 +33,6 @@
namespace libgav1 {
namespace dsp {
-namespace low_bitdepth {
namespace {
#include "src/dsp/cdef.inc"
@@ -234,7 +233,8 @@ LIBGAV1_ALWAYS_INLINE void AddPartial_D5_D7(uint8x8_t* v_src,
*partial_hi = vaddq_u16(*partial_hi, vextq_u16(v_pair_add[3], v_zero, 5));
}
-LIBGAV1_ALWAYS_INLINE void AddPartial(const void* const source,
+template <int bitdepth>
+LIBGAV1_ALWAYS_INLINE void AddPartial(const void* LIBGAV1_RESTRICT const source,
ptrdiff_t stride, uint16x8_t* partial_lo,
uint16x8_t* partial_hi) {
const auto* src = static_cast<const uint8_t*>(source);
@@ -249,11 +249,20 @@ LIBGAV1_ALWAYS_INLINE void AddPartial(const void* const source,
// 60 61 62 63 64 65 66 67
// 70 71 72 73 74 75 76 77
uint8x8_t v_src[8];
- for (int i = 0; i < 8; ++i) {
- v_src[i] = vld1_u8(src);
- src += stride;
+ if (bitdepth == kBitdepth8) {
+ for (auto& v : v_src) {
+ v = vld1_u8(src);
+ src += stride;
+ }
+ } else {
+ // bitdepth - 8
+ constexpr int src_shift = (bitdepth == kBitdepth10) ? 2 : 4;
+ for (auto& v : v_src) {
+ v = vshrn_n_u16(vld1q_u16(reinterpret_cast<const uint16_t*>(src)),
+ src_shift);
+ src += stride;
+ }
}
-
// partial for direction 2
// --------------------------------------------------------------------------
// partial[2][i] += x;
@@ -358,15 +367,19 @@ uint32_t CostOdd(const uint16x8_t a, const uint16x8_t b, const uint32x4_t mask,
return SumVector(c);
}
-void CdefDirection_NEON(const void* const source, ptrdiff_t stride,
- uint8_t* const direction, int* const variance) {
+template <int bitdepth>
+void CdefDirection_NEON(const void* LIBGAV1_RESTRICT const source,
+ ptrdiff_t stride,
+ uint8_t* LIBGAV1_RESTRICT const direction,
+ int* LIBGAV1_RESTRICT const variance) {
assert(direction != nullptr);
assert(variance != nullptr);
const auto* src = static_cast<const uint8_t*>(source);
+
uint32_t cost[8];
uint16x8_t partial_lo[8], partial_hi[8];
- AddPartial(src, stride, partial_lo, partial_hi);
+ AddPartial<bitdepth>(src, stride, partial_lo, partial_hi);
cost[2] = SquareAccumulate(partial_lo[2]);
cost[6] = SquareAccumulate(partial_lo[6]);
@@ -407,8 +420,9 @@ void CdefDirection_NEON(const void* const source, ptrdiff_t stride,
// CdefFilter
// Load 4 vectors based on the given |direction|.
-void LoadDirection(const uint16_t* const src, const ptrdiff_t stride,
- uint16x8_t* output, const int direction) {
+void LoadDirection(const uint16_t* LIBGAV1_RESTRICT const src,
+ const ptrdiff_t stride, uint16x8_t* output,
+ const int direction) {
// Each |direction| describes a different set of source values. Expand this
// set by negating each set. For |direction| == 0 this gives a diagonal line
// from top right to bottom left. The first value is y, the second x. Negative
@@ -432,8 +446,9 @@ void LoadDirection(const uint16_t* const src, const ptrdiff_t stride,
// Load 4 vectors based on the given |direction|. Use when |block_width| == 4 to
// do 2 rows at a time.
-void LoadDirection4(const uint16_t* const src, const ptrdiff_t stride,
- uint16x8_t* output, const int direction) {
+void LoadDirection4(const uint16_t* LIBGAV1_RESTRICT const src,
+ const ptrdiff_t stride, uint16x8_t* output,
+ const int direction) {
const int y_0 = kCdefDirections[direction][0][0];
const int x_0 = kCdefDirections[direction][0][1];
const int y_1 = kCdefDirections[direction][1][0];
@@ -469,12 +484,90 @@ int16x8_t Constrain(const uint16x8_t pixel, const uint16x8_t reference,
vsubq_u16(veorq_u16(clamp_abs_diff, sign), sign));
}
-template <int width, bool enable_primary = true, bool enable_secondary = true>
-void CdefFilter_NEON(const uint16_t* src, const ptrdiff_t src_stride,
- const int height, const int primary_strength,
- const int secondary_strength, const int damping,
- const int direction, void* dest,
- const ptrdiff_t dst_stride) {
+template <typename Pixel>
+uint16x8_t GetMaxPrimary(uint16x8_t* primary_val, uint16x8_t max,
+ uint16x8_t cdef_large_value_mask) {
+ if (sizeof(Pixel) == 1) {
+ // The source is 16 bits, however, we only really care about the lower
+ // 8 bits. The upper 8 bits contain the "large" flag. After the final
+ // primary max has been calculated, zero out the upper 8 bits. Use this
+ // to find the "16 bit" max.
+ const uint8x16_t max_p01 = vmaxq_u8(vreinterpretq_u8_u16(primary_val[0]),
+ vreinterpretq_u8_u16(primary_val[1]));
+ const uint8x16_t max_p23 = vmaxq_u8(vreinterpretq_u8_u16(primary_val[2]),
+ vreinterpretq_u8_u16(primary_val[3]));
+ const uint16x8_t max_p = vreinterpretq_u16_u8(vmaxq_u8(max_p01, max_p23));
+ max = vmaxq_u16(max, vandq_u16(max_p, cdef_large_value_mask));
+ } else {
+ // Convert kCdefLargeValue to 0 before calculating max.
+ max = vmaxq_u16(max, vandq_u16(primary_val[0], cdef_large_value_mask));
+ max = vmaxq_u16(max, vandq_u16(primary_val[1], cdef_large_value_mask));
+ max = vmaxq_u16(max, vandq_u16(primary_val[2], cdef_large_value_mask));
+ max = vmaxq_u16(max, vandq_u16(primary_val[3], cdef_large_value_mask));
+ }
+ return max;
+}
+
+template <typename Pixel>
+uint16x8_t GetMaxSecondary(uint16x8_t* secondary_val, uint16x8_t max,
+ uint16x8_t cdef_large_value_mask) {
+ if (sizeof(Pixel) == 1) {
+ const uint8x16_t max_s01 = vmaxq_u8(vreinterpretq_u8_u16(secondary_val[0]),
+ vreinterpretq_u8_u16(secondary_val[1]));
+ const uint8x16_t max_s23 = vmaxq_u8(vreinterpretq_u8_u16(secondary_val[2]),
+ vreinterpretq_u8_u16(secondary_val[3]));
+ const uint8x16_t max_s45 = vmaxq_u8(vreinterpretq_u8_u16(secondary_val[4]),
+ vreinterpretq_u8_u16(secondary_val[5]));
+ const uint8x16_t max_s67 = vmaxq_u8(vreinterpretq_u8_u16(secondary_val[6]),
+ vreinterpretq_u8_u16(secondary_val[7]));
+ const uint16x8_t max_s = vreinterpretq_u16_u8(
+ vmaxq_u8(vmaxq_u8(max_s01, max_s23), vmaxq_u8(max_s45, max_s67)));
+ max = vmaxq_u16(max, vandq_u16(max_s, cdef_large_value_mask));
+ } else {
+ max = vmaxq_u16(max, vandq_u16(secondary_val[0], cdef_large_value_mask));
+ max = vmaxq_u16(max, vandq_u16(secondary_val[1], cdef_large_value_mask));
+ max = vmaxq_u16(max, vandq_u16(secondary_val[2], cdef_large_value_mask));
+ max = vmaxq_u16(max, vandq_u16(secondary_val[3], cdef_large_value_mask));
+ max = vmaxq_u16(max, vandq_u16(secondary_val[4], cdef_large_value_mask));
+ max = vmaxq_u16(max, vandq_u16(secondary_val[5], cdef_large_value_mask));
+ max = vmaxq_u16(max, vandq_u16(secondary_val[6], cdef_large_value_mask));
+ max = vmaxq_u16(max, vandq_u16(secondary_val[7], cdef_large_value_mask));
+ }
+ return max;
+}
+
+template <typename Pixel, int width>
+void StorePixels(void* dest, ptrdiff_t dst_stride, int16x8_t result) {
+ auto* const dst8 = static_cast<uint8_t*>(dest);
+ if (sizeof(Pixel) == 1) {
+ const uint8x8_t dst_pixel = vqmovun_s16(result);
+ if (width == 8) {
+ vst1_u8(dst8, dst_pixel);
+ } else {
+ StoreLo4(dst8, dst_pixel);
+ StoreHi4(dst8 + dst_stride, dst_pixel);
+ }
+ } else {
+ const uint16x8_t dst_pixel = vreinterpretq_u16_s16(result);
+ auto* const dst16 = reinterpret_cast<uint16_t*>(dst8);
+ if (width == 8) {
+ vst1q_u16(dst16, dst_pixel);
+ } else {
+ auto* const dst16_next_row =
+ reinterpret_cast<uint16_t*>(dst8 + dst_stride);
+ vst1_u16(dst16, vget_low_u16(dst_pixel));
+ vst1_u16(dst16_next_row, vget_high_u16(dst_pixel));
+ }
+ }
+}
+
+template <int width, typename Pixel, bool enable_primary = true,
+ bool enable_secondary = true>
+void CdefFilter_NEON(const uint16_t* LIBGAV1_RESTRICT src,
+ const ptrdiff_t src_stride, const int height,
+ const int primary_strength, const int secondary_strength,
+ const int damping, const int direction,
+ void* LIBGAV1_RESTRICT dest, const ptrdiff_t dst_stride) {
static_assert(width == 8 || width == 4, "");
static_assert(enable_primary || enable_secondary, "");
constexpr bool clipping_required = enable_primary && enable_secondary;
@@ -488,22 +581,34 @@ void CdefFilter_NEON(const uint16_t* src, const ptrdiff_t src_stride,
// FloorLog2() requires input to be > 0.
// 8-bit damping range: Y: [3, 6], UV: [2, 5].
+ // 10-bit damping range: Y: [3, 6 + 2], UV: [2, 5 + 2].
if (enable_primary) {
- // primary_strength: [0, 15] -> FloorLog2: [0, 3] so a clamp is necessary
- // for UV filtering.
+ // 8-bit primary_strength: [0, 15] -> FloorLog2: [0, 3] so a clamp is
+ // necessary for UV filtering.
+ // 10-bit primary_strength: [0, 15 << 2].
primary_damping_shift =
vdupq_n_s16(-std::max(0, damping - FloorLog2(primary_strength)));
}
+
if (enable_secondary) {
- // secondary_strength: [0, 4] -> FloorLog2: [0, 2] so no clamp to 0 is
- // necessary.
- assert(damping - FloorLog2(secondary_strength) >= 0);
- secondary_damping_shift =
- vdupq_n_s16(-(damping - FloorLog2(secondary_strength)));
+ if (sizeof(Pixel) == 1) {
+ // secondary_strength: [0, 4] -> FloorLog2: [0, 2] so no clamp to 0 is
+ // necessary.
+ assert(damping - FloorLog2(secondary_strength) >= 0);
+ secondary_damping_shift =
+ vdupq_n_s16(-(damping - FloorLog2(secondary_strength)));
+ } else {
+ // secondary_strength: [0, 4 << 2]
+ secondary_damping_shift =
+ vdupq_n_s16(-std::max(0, damping - FloorLog2(secondary_strength)));
+ }
}
- const int primary_tap_0 = kCdefPrimaryTaps[primary_strength & 1][0];
- const int primary_tap_1 = kCdefPrimaryTaps[primary_strength & 1][1];
+ constexpr int coeff_shift = (sizeof(Pixel) == 1) ? 0 : kBitdepth10 - 8;
+ const int primary_tap_0 =
+ kCdefPrimaryTaps[(primary_strength >> coeff_shift) & 1][0];
+ const int primary_tap_1 =
+ kCdefPrimaryTaps[(primary_strength >> coeff_shift) & 1][1];
int y = height;
do {
@@ -533,19 +638,7 @@ void CdefFilter_NEON(const uint16_t* src, const ptrdiff_t src_stride,
min = vminq_u16(min, primary_val[2]);
min = vminq_u16(min, primary_val[3]);
- // The source is 16 bits, however, we only really care about the lower
- // 8 bits. The upper 8 bits contain the "large" flag. After the final
- // primary max has been calculated, zero out the upper 8 bits. Use this
- // to find the "16 bit" max.
- const uint8x16_t max_p01 =
- vmaxq_u8(vreinterpretq_u8_u16(primary_val[0]),
- vreinterpretq_u8_u16(primary_val[1]));
- const uint8x16_t max_p23 =
- vmaxq_u8(vreinterpretq_u8_u16(primary_val[2]),
- vreinterpretq_u8_u16(primary_val[3]));
- const uint16x8_t max_p =
- vreinterpretq_u16_u8(vmaxq_u8(max_p01, max_p23));
- max = vmaxq_u16(max, vandq_u16(max_p, cdef_large_value_mask));
+ max = GetMaxPrimary<Pixel>(primary_val, max, cdef_large_value_mask);
}
sum = Constrain(primary_val[0], pixel, primary_threshold,
@@ -588,21 +681,7 @@ void CdefFilter_NEON(const uint16_t* src, const ptrdiff_t src_stride,
min = vminq_u16(min, secondary_val[6]);
min = vminq_u16(min, secondary_val[7]);
- const uint8x16_t max_s01 =
- vmaxq_u8(vreinterpretq_u8_u16(secondary_val[0]),
- vreinterpretq_u8_u16(secondary_val[1]));
- const uint8x16_t max_s23 =
- vmaxq_u8(vreinterpretq_u8_u16(secondary_val[2]),
- vreinterpretq_u8_u16(secondary_val[3]));
- const uint8x16_t max_s45 =
- vmaxq_u8(vreinterpretq_u8_u16(secondary_val[4]),
- vreinterpretq_u8_u16(secondary_val[5]));
- const uint8x16_t max_s67 =
- vmaxq_u8(vreinterpretq_u8_u16(secondary_val[6]),
- vreinterpretq_u8_u16(secondary_val[7]));
- const uint16x8_t max_s = vreinterpretq_u16_u8(
- vmaxq_u8(vmaxq_u8(max_s01, max_s23), vmaxq_u8(max_s45, max_s67)));
- max = vmaxq_u16(max, vandq_u16(max_s, cdef_large_value_mask));
+ max = GetMaxSecondary<Pixel>(secondary_val, max, cdef_large_value_mask);
}
sum = vmlaq_n_s16(sum,
@@ -647,41 +726,70 @@ void CdefFilter_NEON(const uint16_t* src, const ptrdiff_t src_stride,
result = vmaxq_s16(result, vreinterpretq_s16_u16(min));
}
- const uint8x8_t dst_pixel = vqmovun_s16(result);
- if (width == 8) {
- src += src_stride;
- vst1_u8(dst, dst_pixel);
- dst += dst_stride;
- --y;
- } else {
- src += src_stride << 1;
- StoreLo4(dst, dst_pixel);
- dst += dst_stride;
- StoreHi4(dst, dst_pixel);
- dst += dst_stride;
- y -= 2;
- }
+ StorePixels<Pixel, width>(dst, dst_stride, result);
+
+ src += (width == 8) ? src_stride : src_stride << 1;
+ dst += (width == 8) ? dst_stride : dst_stride << 1;
+ y -= (width == 8) ? 1 : 2;
} while (y != 0);
}
+} // namespace
+
+namespace low_bitdepth {
+namespace {
+
void Init8bpp() {
Dsp* const dsp = dsp_internal::GetWritableDspTable(kBitdepth8);
assert(dsp != nullptr);
- dsp->cdef_direction = CdefDirection_NEON;
- dsp->cdef_filters[0][0] = CdefFilter_NEON<4>;
- dsp->cdef_filters[0][1] =
- CdefFilter_NEON<4, /*enable_primary=*/true, /*enable_secondary=*/false>;
- dsp->cdef_filters[0][2] = CdefFilter_NEON<4, /*enable_primary=*/false>;
- dsp->cdef_filters[1][0] = CdefFilter_NEON<8>;
- dsp->cdef_filters[1][1] =
- CdefFilter_NEON<8, /*enable_primary=*/true, /*enable_secondary=*/false>;
- dsp->cdef_filters[1][2] = CdefFilter_NEON<8, /*enable_primary=*/false>;
+ dsp->cdef_direction = CdefDirection_NEON<kBitdepth8>;
+ dsp->cdef_filters[0][0] = CdefFilter_NEON<4, uint8_t>;
+ dsp->cdef_filters[0][1] = CdefFilter_NEON<4, uint8_t, /*enable_primary=*/true,
+ /*enable_secondary=*/false>;
+ dsp->cdef_filters[0][2] =
+ CdefFilter_NEON<4, uint8_t, /*enable_primary=*/false>;
+ dsp->cdef_filters[1][0] = CdefFilter_NEON<8, uint8_t>;
+ dsp->cdef_filters[1][1] = CdefFilter_NEON<8, uint8_t, /*enable_primary=*/true,
+ /*enable_secondary=*/false>;
+ dsp->cdef_filters[1][2] =
+ CdefFilter_NEON<8, uint8_t, /*enable_primary=*/false>;
}
} // namespace
} // namespace low_bitdepth
-void CdefInit_NEON() { low_bitdepth::Init8bpp(); }
+#if LIBGAV1_MAX_BITDEPTH >= 10
+namespace high_bitdepth {
+namespace {
+
+void Init10bpp() {
+ Dsp* const dsp = dsp_internal::GetWritableDspTable(kBitdepth10);
+ assert(dsp != nullptr);
+ dsp->cdef_direction = CdefDirection_NEON<kBitdepth10>;
+ dsp->cdef_filters[0][0] = CdefFilter_NEON<4, uint16_t>;
+ dsp->cdef_filters[0][1] =
+ CdefFilter_NEON<4, uint16_t, /*enable_primary=*/true,
+ /*enable_secondary=*/false>;
+ dsp->cdef_filters[0][2] =
+ CdefFilter_NEON<4, uint16_t, /*enable_primary=*/false>;
+ dsp->cdef_filters[1][0] = CdefFilter_NEON<8, uint16_t>;
+ dsp->cdef_filters[1][1] =
+ CdefFilter_NEON<8, uint16_t, /*enable_primary=*/true,
+ /*enable_secondary=*/false>;
+ dsp->cdef_filters[1][2] =
+ CdefFilter_NEON<8, uint16_t, /*enable_primary=*/false>;
+}
+
+} // namespace
+} // namespace high_bitdepth
+#endif // LIBGAV1_MAX_BITDEPTH >= 10
+
+void CdefInit_NEON() {
+ low_bitdepth::Init8bpp();
+#if LIBGAV1_MAX_BITDEPTH >= 10
+ high_bitdepth::Init10bpp();
+#endif
+}
} // namespace dsp
} // namespace libgav1
diff --git a/src/dsp/arm/cdef_neon.h b/src/dsp/arm/cdef_neon.h
index 53d5f86..ef8ed3c 100644
--- a/src/dsp/arm/cdef_neon.h
+++ b/src/dsp/arm/cdef_neon.h
@@ -33,6 +33,9 @@ void CdefInit_NEON();
#if LIBGAV1_ENABLE_NEON
#define LIBGAV1_Dsp8bpp_CdefDirection LIBGAV1_CPU_NEON
#define LIBGAV1_Dsp8bpp_CdefFilters LIBGAV1_CPU_NEON
+
+#define LIBGAV1_Dsp10bpp_CdefDirection LIBGAV1_CPU_NEON
+#define LIBGAV1_Dsp10bpp_CdefFilters LIBGAV1_CPU_NEON
#endif // LIBGAV1_ENABLE_NEON
#endif // LIBGAV1_SRC_DSP_ARM_CDEF_NEON_H_
diff --git a/src/dsp/arm/common_neon.h b/src/dsp/arm/common_neon.h
index 05e0d05..9c46525 100644
--- a/src/dsp/arm/common_neon.h
+++ b/src/dsp/arm/common_neon.h
@@ -23,9 +23,13 @@
#include <arm_neon.h>
+#include <algorithm>
+#include <cstddef>
#include <cstdint>
#include <cstring>
+#include "src/utils/compiler_attributes.h"
+
#if 0
#include <cstdio>
#include <string>
@@ -183,6 +187,20 @@ inline void PrintHex(const int x, const char* name) {
#define PD(x) PrintReg(x, #x)
#define PX(x) PrintHex(x, #x)
+#if LIBGAV1_MSAN
+#include <sanitizer/msan_interface.h>
+
+inline void PrintShadow(const void* r, const char* const name,
+ const size_t size) {
+ if (kEnablePrintRegs) {
+ fprintf(stderr, "Shadow for %s:\n", name);
+ __msan_print_shadow(r, size);
+ }
+}
+#define PS(var, N) PrintShadow(var, #var, N)
+
+#endif // LIBGAV1_MSAN
+
#endif // 0
namespace libgav1 {
@@ -210,6 +228,14 @@ inline uint8x8_t Load2(const void* const buf, uint8x8_t val) {
vld1_lane_u16(&temp, vreinterpret_u16_u8(val), lane));
}
+template <int lane>
+inline uint16x4_t Load2(const void* const buf, uint16x4_t val) {
+ uint32_t temp;
+ memcpy(&temp, buf, 4);
+ return vreinterpret_u16_u32(
+ vld1_lane_u32(&temp, vreinterpret_u32_u16(val), lane));
+}
+
// Load 4 uint8_t values into the low half of a uint8x8_t register. Zeros the
// register before loading the values. Use caution when using this in loops
// because it will re-zero the register before loading on every iteration.
@@ -229,6 +255,96 @@ inline uint8x8_t Load4(const void* const buf, uint8x8_t val) {
vld1_lane_u32(&temp, vreinterpret_u32_u8(val), lane));
}
+// Convenience functions for 16-bit loads from a uint8_t* source.
+inline uint16x4_t Load4U16(const void* const buf) {
+ return vld1_u16(static_cast<const uint16_t*>(buf));
+}
+
+inline uint16x8_t Load8U16(const void* const buf) {
+ return vld1q_u16(static_cast<const uint16_t*>(buf));
+}
+
+//------------------------------------------------------------------------------
+// Load functions to avoid MemorySanitizer's use-of-uninitialized-value warning.
+
+inline uint8x8_t MaskOverreads(const uint8x8_t source,
+ const ptrdiff_t over_read_in_bytes) {
+ uint8x8_t dst = source;
+#if LIBGAV1_MSAN
+ if (over_read_in_bytes > 0) {
+ uint8x8_t mask = vdup_n_u8(0);
+ uint8x8_t valid_element_mask = vdup_n_u8(-1);
+ const int valid_bytes =
+ std::min(8, 8 - static_cast<int>(over_read_in_bytes));
+ for (int i = 0; i < valid_bytes; ++i) {
+ // Feed ff bytes into |mask| one at a time.
+ mask = vext_u8(valid_element_mask, mask, 7);
+ }
+ dst = vand_u8(dst, mask);
+ }
+#else
+ static_cast<void>(over_read_in_bytes);
+#endif
+ return dst;
+}
+
+inline uint8x16_t MaskOverreadsQ(const uint8x16_t source,
+ const ptrdiff_t over_read_in_bytes) {
+ uint8x16_t dst = source;
+#if LIBGAV1_MSAN
+ if (over_read_in_bytes > 0) {
+ uint8x16_t mask = vdupq_n_u8(0);
+ uint8x16_t valid_element_mask = vdupq_n_u8(-1);
+ const int valid_bytes =
+ std::min(16, 16 - static_cast<int>(over_read_in_bytes));
+ for (int i = 0; i < valid_bytes; ++i) {
+ // Feed ff bytes into |mask| one at a time.
+ mask = vextq_u8(valid_element_mask, mask, 15);
+ }
+ dst = vandq_u8(dst, mask);
+ }
+#else
+ static_cast<void>(over_read_in_bytes);
+#endif
+ return dst;
+}
+
+inline uint8x8_t Load1MsanU8(const uint8_t* const source,
+ const ptrdiff_t over_read_in_bytes) {
+ return MaskOverreads(vld1_u8(source), over_read_in_bytes);
+}
+
+inline uint8x16_t Load1QMsanU8(const uint8_t* const source,
+ const ptrdiff_t over_read_in_bytes) {
+ return MaskOverreadsQ(vld1q_u8(source), over_read_in_bytes);
+}
+
+inline uint16x8_t Load1QMsanU16(const uint16_t* const source,
+ const ptrdiff_t over_read_in_bytes) {
+ return vreinterpretq_u16_u8(MaskOverreadsQ(
+ vreinterpretq_u8_u16(vld1q_u16(source)), over_read_in_bytes));
+}
+
+inline uint16x8x2_t Load2QMsanU16(const uint16_t* const source,
+ const ptrdiff_t over_read_in_bytes) {
+ // Relative source index of elements (2 bytes each):
+ // dst.val[0]: 00 02 04 06 08 10 12 14
+ // dst.val[1]: 01 03 05 07 09 11 13 15
+ uint16x8x2_t dst = vld2q_u16(source);
+ dst.val[0] = vreinterpretq_u16_u8(MaskOverreadsQ(
+ vreinterpretq_u8_u16(dst.val[0]), over_read_in_bytes >> 1));
+ dst.val[1] = vreinterpretq_u16_u8(
+ MaskOverreadsQ(vreinterpretq_u8_u16(dst.val[1]),
+ (over_read_in_bytes >> 1) + (over_read_in_bytes % 4)));
+ return dst;
+}
+
+inline uint32x4_t Load1QMsanU32(const uint32_t* const source,
+ const ptrdiff_t over_read_in_bytes) {
+ return vreinterpretq_u32_u8(MaskOverreadsQ(
+ vreinterpretq_u8_u32(vld1q_u32(source)), over_read_in_bytes));
+}
+
//------------------------------------------------------------------------------
// Store functions.
@@ -272,7 +388,7 @@ inline void Store2(void* const buf, const uint16x8_t val) {
// Store 2 uint16_t values from |lane| * 2 and |lane| * 2 + 1 of a uint16x4_t
// register.
template <int lane>
-inline void Store2(uint16_t* const buf, const uint16x4_t val) {
+inline void Store2(void* const buf, const uint16x4_t val) {
ValueToMem<uint32_t>(buf, vget_lane_u32(vreinterpret_u32_u16(val), lane));
}
@@ -287,6 +403,104 @@ inline void Store8(void* const buf, const uint16x8_t val) {
}
//------------------------------------------------------------------------------
+// Pointer helpers.
+
+// This function adds |stride|, given as a number of bytes, to a pointer to a
+// larger type, using native pointer arithmetic.
+template <typename T>
+inline T* AddByteStride(T* ptr, const ptrdiff_t stride) {
+ return reinterpret_cast<T*>(
+ const_cast<uint8_t*>(reinterpret_cast<const uint8_t*>(ptr) + stride));
+}
+
+//------------------------------------------------------------------------------
+// Multiply.
+
+// Shim vmull_high_u16 for armv7.
+inline uint32x4_t VMullHighU16(const uint16x8_t a, const uint16x8_t b) {
+#if defined(__aarch64__)
+ return vmull_high_u16(a, b);
+#else
+ return vmull_u16(vget_high_u16(a), vget_high_u16(b));
+#endif
+}
+
+// Shim vmull_high_s16 for armv7.
+inline int32x4_t VMullHighS16(const int16x8_t a, const int16x8_t b) {
+#if defined(__aarch64__)
+ return vmull_high_s16(a, b);
+#else
+ return vmull_s16(vget_high_s16(a), vget_high_s16(b));
+#endif
+}
+
+// Shim vmlal_high_u16 for armv7.
+inline uint32x4_t VMlalHighU16(const uint32x4_t a, const uint16x8_t b,
+ const uint16x8_t c) {
+#if defined(__aarch64__)
+ return vmlal_high_u16(a, b, c);
+#else
+ return vmlal_u16(a, vget_high_u16(b), vget_high_u16(c));
+#endif
+}
+
+// Shim vmlal_high_s16 for armv7.
+inline int32x4_t VMlalHighS16(const int32x4_t a, const int16x8_t b,
+ const int16x8_t c) {
+#if defined(__aarch64__)
+ return vmlal_high_s16(a, b, c);
+#else
+ return vmlal_s16(a, vget_high_s16(b), vget_high_s16(c));
+#endif
+}
+
+// Shim vmul_laneq_u16 for armv7.
+template <int lane>
+inline uint16x4_t VMulLaneQU16(const uint16x4_t a, const uint16x8_t b) {
+#if defined(__aarch64__)
+ return vmul_laneq_u16(a, b, lane);
+#else
+ if (lane < 4) return vmul_lane_u16(a, vget_low_u16(b), lane & 0x3);
+ return vmul_lane_u16(a, vget_high_u16(b), (lane - 4) & 0x3);
+#endif
+}
+
+// Shim vmulq_laneq_u16 for armv7.
+template <int lane>
+inline uint16x8_t VMulQLaneQU16(const uint16x8_t a, const uint16x8_t b) {
+#if defined(__aarch64__)
+ return vmulq_laneq_u16(a, b, lane);
+#else
+ if (lane < 4) return vmulq_lane_u16(a, vget_low_u16(b), lane & 0x3);
+ return vmulq_lane_u16(a, vget_high_u16(b), (lane - 4) & 0x3);
+#endif
+}
+
+// Shim vmla_laneq_u16 for armv7.
+template <int lane>
+inline uint16x4_t VMlaLaneQU16(const uint16x4_t a, const uint16x4_t b,
+ const uint16x8_t c) {
+#if defined(__aarch64__)
+ return vmla_laneq_u16(a, b, c, lane);
+#else
+ if (lane < 4) return vmla_lane_u16(a, b, vget_low_u16(c), lane & 0x3);
+ return vmla_lane_u16(a, b, vget_high_u16(c), (lane - 4) & 0x3);
+#endif
+}
+
+// Shim vmlaq_laneq_u16 for armv7.
+template <int lane>
+inline uint16x8_t VMlaQLaneQU16(const uint16x8_t a, const uint16x8_t b,
+ const uint16x8_t c) {
+#if defined(__aarch64__)
+ return vmlaq_laneq_u16(a, b, c, lane);
+#else
+ if (lane < 4) return vmlaq_lane_u16(a, b, vget_low_u16(c), lane & 0x3);
+ return vmlaq_lane_u16(a, b, vget_high_u16(c), (lane - 4) & 0x3);
+#endif
+}
+
+//------------------------------------------------------------------------------
// Bit manipulation.
// vshXX_n_XX() requires an immediate.
@@ -315,6 +529,51 @@ inline uint8x8_t VQTbl1U8(const uint8x16_t a, const uint8x8_t index) {
#endif
}
+// Shim vqtbl2_u8 for armv7.
+inline uint8x8_t VQTbl2U8(const uint8x16x2_t a, const uint8x8_t index) {
+#if defined(__aarch64__)
+ return vqtbl2_u8(a, index);
+#else
+ const uint8x8x4_t b = {vget_low_u8(a.val[0]), vget_high_u8(a.val[0]),
+ vget_low_u8(a.val[1]), vget_high_u8(a.val[1])};
+ return vtbl4_u8(b, index);
+#endif
+}
+
+// Shim vqtbl2q_u8 for armv7.
+inline uint8x16_t VQTbl2QU8(const uint8x16x2_t a, const uint8x16_t index) {
+#if defined(__aarch64__)
+ return vqtbl2q_u8(a, index);
+#else
+ return vcombine_u8(VQTbl2U8(a, vget_low_u8(index)),
+ VQTbl2U8(a, vget_high_u8(index)));
+#endif
+}
+
+// Shim vqtbl3q_u8 for armv7.
+inline uint8x8_t VQTbl3U8(const uint8x16x3_t a, const uint8x8_t index) {
+#if defined(__aarch64__)
+ return vqtbl3_u8(a, index);
+#else
+ const uint8x8x4_t b = {vget_low_u8(a.val[0]), vget_high_u8(a.val[0]),
+ vget_low_u8(a.val[1]), vget_high_u8(a.val[1])};
+ const uint8x8x2_t c = {vget_low_u8(a.val[2]), vget_high_u8(a.val[2])};
+ const uint8x8_t index_ext = vsub_u8(index, vdup_n_u8(32));
+ const uint8x8_t partial_lookup = vtbl4_u8(b, index);
+ return vtbx2_u8(partial_lookup, c, index_ext);
+#endif
+}
+
+// Shim vqtbl3q_u8 for armv7.
+inline uint8x16_t VQTbl3QU8(const uint8x16x3_t a, const uint8x16_t index) {
+#if defined(__aarch64__)
+ return vqtbl3q_u8(a, index);
+#else
+ return vcombine_u8(VQTbl3U8(a, vget_low_u8(index)),
+ VQTbl3U8(a, vget_high_u8(index)));
+#endif
+}
+
// Shim vqtbl1_s8 for armv7.
inline int8x8_t VQTbl1S8(const int8x16_t a, const uint8x8_t index) {
#if defined(__aarch64__)
@@ -326,6 +585,25 @@ inline int8x8_t VQTbl1S8(const int8x16_t a, const uint8x8_t index) {
}
//------------------------------------------------------------------------------
+// Saturation helpers.
+
+inline int16x4_t Clip3S16(int16x4_t val, int16x4_t low, int16x4_t high) {
+ return vmin_s16(vmax_s16(val, low), high);
+}
+
+inline int16x8_t Clip3S16(const int16x8_t val, const int16x8_t low,
+ const int16x8_t high) {
+ return vminq_s16(vmaxq_s16(val, low), high);
+}
+
+inline uint16x8_t ConvertToUnsignedPixelU16(int16x8_t val, int bitdepth) {
+ const int16x8_t low = vdupq_n_s16(0);
+ const uint16x8_t high = vdupq_n_u16((1 << bitdepth) - 1);
+
+ return vminq_u16(vreinterpretq_u16_s16(vmaxq_s16(val, low)), high);
+}
+
+//------------------------------------------------------------------------------
// Interleave.
// vzipN is exclusive to A64.
@@ -439,6 +717,9 @@ inline uint8x8_t Transpose32(const uint8x8_t a) {
return vreinterpret_u8_u32(b);
}
+// Swap high and low halves.
+inline uint16x8_t Transpose64(const uint16x8_t a) { return vextq_u16(a, a, 4); }
+
// Implement vtrnq_s64().
// Input:
// a0: 00 01 02 03 04 05 06 07
@@ -512,6 +793,108 @@ inline void Transpose4x4(uint8x8_t* a, uint8x8_t* b) {
*b = e.val[1];
}
+// 4x8 Input:
+// a[0]: 00 01 02 03 04 05 06 07
+// a[1]: 10 11 12 13 14 15 16 17
+// a[2]: 20 21 22 23 24 25 26 27
+// a[3]: 30 31 32 33 34 35 36 37
+// 8x4 Output:
+// a[0]: 00 10 20 30 04 14 24 34
+// a[1]: 01 11 21 31 05 15 25 35
+// a[2]: 02 12 22 32 06 16 26 36
+// a[3]: 03 13 23 33 07 17 27 37
+inline void Transpose4x8(uint16x8_t a[4]) {
+ // b0.val[0]: 00 10 02 12 04 14 06 16
+ // b0.val[1]: 01 11 03 13 05 15 07 17
+ // b1.val[0]: 20 30 22 32 24 34 26 36
+ // b1.val[1]: 21 31 23 33 25 35 27 37
+ const uint16x8x2_t b0 = vtrnq_u16(a[0], a[1]);
+ const uint16x8x2_t b1 = vtrnq_u16(a[2], a[3]);
+
+ // c0.val[0]: 00 10 20 30 04 14 24 34
+ // c0.val[1]: 02 12 22 32 06 16 26 36
+ // c1.val[0]: 01 11 21 31 05 15 25 35
+ // c1.val[1]: 03 13 23 33 07 17 27 37
+ const uint32x4x2_t c0 = vtrnq_u32(vreinterpretq_u32_u16(b0.val[0]),
+ vreinterpretq_u32_u16(b1.val[0]));
+ const uint32x4x2_t c1 = vtrnq_u32(vreinterpretq_u32_u16(b0.val[1]),
+ vreinterpretq_u32_u16(b1.val[1]));
+
+ a[0] = vreinterpretq_u16_u32(c0.val[0]);
+ a[1] = vreinterpretq_u16_u32(c1.val[0]);
+ a[2] = vreinterpretq_u16_u32(c0.val[1]);
+ a[3] = vreinterpretq_u16_u32(c1.val[1]);
+}
+
+// Special transpose for loop filter.
+// 4x8 Input:
+// p_q: p3 p2 p1 p0 q0 q1 q2 q3
+// a[0]: 00 01 02 03 04 05 06 07
+// a[1]: 10 11 12 13 14 15 16 17
+// a[2]: 20 21 22 23 24 25 26 27
+// a[3]: 30 31 32 33 34 35 36 37
+// 8x4 Output:
+// a[0]: 03 13 23 33 04 14 24 34 p0q0
+// a[1]: 02 12 22 32 05 15 25 35 p1q1
+// a[2]: 01 11 21 31 06 16 26 36 p2q2
+// a[3]: 00 10 20 30 07 17 27 37 p3q3
+// Direct reapplication of the function will reset the high halves, but
+// reverse the low halves:
+// p_q: p0 p1 p2 p3 q0 q1 q2 q3
+// a[0]: 33 32 31 30 04 05 06 07
+// a[1]: 23 22 21 20 14 15 16 17
+// a[2]: 13 12 11 10 24 25 26 27
+// a[3]: 03 02 01 00 34 35 36 37
+// Simply reordering the inputs (3, 2, 1, 0) will reset the low halves, but
+// reverse the high halves.
+// The standard Transpose4x8 will produce the same reversals, but with the
+// order of the low halves also restored relative to the high halves. This is
+// preferable because it puts all values from the same source row back together,
+// but some post-processing is inevitable.
+inline void LoopFilterTranspose4x8(uint16x8_t a[4]) {
+ // b0.val[0]: 00 10 02 12 04 14 06 16
+ // b0.val[1]: 01 11 03 13 05 15 07 17
+ // b1.val[0]: 20 30 22 32 24 34 26 36
+ // b1.val[1]: 21 31 23 33 25 35 27 37
+ const uint16x8x2_t b0 = vtrnq_u16(a[0], a[1]);
+ const uint16x8x2_t b1 = vtrnq_u16(a[2], a[3]);
+
+ // Reverse odd vectors to bring the appropriate items to the front of zips.
+ // b0.val[0]: 00 10 02 12 04 14 06 16
+ // r0 : 03 13 01 11 07 17 05 15
+ // b1.val[0]: 20 30 22 32 24 34 26 36
+ // r1 : 23 33 21 31 27 37 25 35
+ const uint32x4_t r0 = vrev64q_u32(vreinterpretq_u32_u16(b0.val[1]));
+ const uint32x4_t r1 = vrev64q_u32(vreinterpretq_u32_u16(b1.val[1]));
+
+ // Zip to complete the halves.
+ // c0.val[0]: 00 10 20 30 02 12 22 32 p3p1
+ // c0.val[1]: 04 14 24 34 06 16 26 36 q0q2
+ // c1.val[0]: 03 13 23 33 01 11 21 31 p0p2
+ // c1.val[1]: 07 17 27 37 05 15 25 35 q3q1
+ const uint32x4x2_t c0 = vzipq_u32(vreinterpretq_u32_u16(b0.val[0]),
+ vreinterpretq_u32_u16(b1.val[0]));
+ const uint32x4x2_t c1 = vzipq_u32(r0, r1);
+
+ // d0.val[0]: 00 10 20 30 07 17 27 37 p3q3
+ // d0.val[1]: 02 12 22 32 05 15 25 35 p1q1
+ // d1.val[0]: 03 13 23 33 04 14 24 34 p0q0
+ // d1.val[1]: 01 11 21 31 06 16 26 36 p2q2
+ const uint16x8x2_t d0 = VtrnqU64(c0.val[0], c1.val[1]);
+ // The third row of c comes first here to swap p2 with q0.
+ const uint16x8x2_t d1 = VtrnqU64(c1.val[0], c0.val[1]);
+
+ // 8x4 Output:
+ // a[0]: 03 13 23 33 04 14 24 34 p0q0
+ // a[1]: 02 12 22 32 05 15 25 35 p1q1
+ // a[2]: 01 11 21 31 06 16 26 36 p2q2
+ // a[3]: 00 10 20 30 07 17 27 37 p3q3
+ a[0] = d1.val[0]; // p0q0
+ a[1] = d0.val[1]; // p1q1
+ a[2] = d1.val[1]; // p2q2
+ a[3] = d0.val[0]; // p3q3
+}
+
// Reversible if the x4 values are packed next to each other.
// x4 input / x8 output:
// a0: 00 01 02 03 40 41 42 43 44
diff --git a/src/dsp/arm/common_neon_test.cc b/src/dsp/arm/common_neon_test.cc
new file mode 100644
index 0000000..03aed19
--- /dev/null
+++ b/src/dsp/arm/common_neon_test.cc
@@ -0,0 +1,208 @@
+// Copyright 2021 The libgav1 Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "src/dsp/arm/common_neon.h"
+
+#include "gtest/gtest.h"
+#include "src/utils/cpu.h"
+
+#if LIBGAV1_ENABLE_NEON
+#include <cstdint>
+
+#include "tests/block_utils.h"
+
+namespace libgav1 {
+namespace dsp {
+namespace {
+
+constexpr int kMaxBlockWidth = 16;
+constexpr int kMaxBlockHeight = 16;
+
+template <typename Pixel>
+class TransposeTest : public testing::Test {
+ public:
+ TransposeTest() {
+ for (int y = 0; y < kMaxBlockHeight; ++y) {
+ for (int x = 0; x < kMaxBlockWidth; ++x) {
+ src_block_[y][x] = y * 16 + x;
+ expected_transpose_[y][x] = x * 16 + y;
+ }
+ }
+ }
+
+ TransposeTest(const TransposeTest&) = delete;
+ TransposeTest& operator=(const TransposeTest&) = delete;
+ ~TransposeTest() override = default;
+
+ protected:
+ Pixel src_block_[kMaxBlockHeight][kMaxBlockWidth];
+ Pixel expected_transpose_[kMaxBlockHeight][kMaxBlockWidth];
+};
+
+using TransposeTestLowBitdepth = TransposeTest<uint8_t>;
+
+TEST_F(TransposeTestLowBitdepth, Transpose4x4Test) {
+ uint8x8_t a = Load4<1>(src_block_[1], Load4(src_block_[0]));
+ uint8x8_t b = Load4<1>(src_block_[3], Load4(src_block_[2]));
+ Transpose4x4(&a, &b);
+ uint8_t output_4x4[4][4];
+ StoreLo4(output_4x4[0], a);
+ StoreLo4(output_4x4[1], b);
+ StoreHi4(output_4x4[2], a);
+ StoreHi4(output_4x4[3], b);
+ EXPECT_TRUE(test_utils::CompareBlocks(expected_transpose_[0], output_4x4[0],
+ 4, 4, kMaxBlockWidth, 4, false));
+}
+
+TEST_F(TransposeTestLowBitdepth, Transpose8x4Test) {
+ uint8x8_t a0 = Load4<1>(src_block_[4], Load4(src_block_[0]));
+ uint8x8_t a1 = Load4<1>(src_block_[5], Load4(src_block_[1]));
+ uint8x8_t a2 = Load4<1>(src_block_[6], Load4(src_block_[2]));
+ uint8x8_t a3 = Load4<1>(src_block_[7], Load4(src_block_[3]));
+ Transpose8x4(&a0, &a1, &a2, &a3);
+ uint8_t output_8x4[4][8];
+ vst1_u8(output_8x4[0], a0);
+ vst1_u8(output_8x4[1], a1);
+ vst1_u8(output_8x4[2], a2);
+ vst1_u8(output_8x4[3], a3);
+ EXPECT_TRUE(test_utils::CompareBlocks(expected_transpose_[0], output_8x4[0],
+ 8, 4, kMaxBlockWidth, 8, false));
+}
+
+TEST_F(TransposeTestLowBitdepth, Transpose8x8Test) {
+ uint8x8_t input_8x8[8];
+ for (int i = 0; i < 8; ++i) {
+ input_8x8[i] = vld1_u8(src_block_[i]);
+ }
+ Transpose8x8(input_8x8);
+ uint8_t output_8x8[8][8];
+ for (int i = 0; i < 8; ++i) {
+ vst1_u8(output_8x8[i], input_8x8[i]);
+ }
+ EXPECT_TRUE(test_utils::CompareBlocks(expected_transpose_[0], output_8x8[0],
+ 8, 8, kMaxBlockWidth, 8, false));
+}
+
+TEST_F(TransposeTestLowBitdepth, Transpose8x16Test) {
+ uint8x16_t input_8x16[8];
+ for (int i = 0; i < 8; ++i) {
+ input_8x16[i] =
+ vcombine_u8(vld1_u8(src_block_[i]), vld1_u8(src_block_[i + 8]));
+ }
+ Transpose8x16(input_8x16);
+ uint8_t output_16x8[8][16];
+ for (int i = 0; i < 8; ++i) {
+ vst1q_u8(output_16x8[i], input_8x16[i]);
+ }
+ EXPECT_TRUE(test_utils::CompareBlocks(expected_transpose_[0], output_16x8[0],
+ 16, 8, kMaxBlockWidth, 16, false));
+}
+
+using TransposeTestHighBitdepth = TransposeTest<uint16_t>;
+
+TEST_F(TransposeTestHighBitdepth, Transpose4x4Test) {
+ uint16x4_t input_4x4[4];
+ input_4x4[0] = vld1_u16(src_block_[0]);
+ input_4x4[1] = vld1_u16(src_block_[1]);
+ input_4x4[2] = vld1_u16(src_block_[2]);
+ input_4x4[3] = vld1_u16(src_block_[3]);
+ Transpose4x4(input_4x4);
+ uint16_t output_4x4[4][4];
+ for (int i = 0; i < 4; ++i) {
+ vst1_u16(output_4x4[i], input_4x4[i]);
+ }
+ EXPECT_TRUE(test_utils::CompareBlocks(expected_transpose_[0], output_4x4[0],
+ 4, 4, kMaxBlockWidth, 4, false));
+}
+
+TEST_F(TransposeTestHighBitdepth, Transpose4x8Test) {
+ uint16x8_t input_4x8[4];
+ for (int i = 0; i < 4; ++i) {
+ input_4x8[i] = vld1q_u16(src_block_[i]);
+ }
+ Transpose4x8(input_4x8);
+ uint16_t output_4x8[4][8];
+ for (int i = 0; i < 4; ++i) {
+ vst1q_u16(output_4x8[i], input_4x8[i]);
+ memcpy(&expected_transpose_[i][4], &expected_transpose_[i + 4][0],
+ 4 * sizeof(expected_transpose_[0][0]));
+ }
+ EXPECT_TRUE(test_utils::CompareBlocks(expected_transpose_[0], output_4x8[0],
+ 8, 4, kMaxBlockWidth, 8, false));
+}
+
+TEST_F(TransposeTestHighBitdepth, LoopFilterTranspose4x8Test) {
+ uint16x8_t input_4x8[4];
+ for (int i = 0; i < 4; ++i) {
+ input_4x8[i] = vld1q_u16(src_block_[i]);
+ }
+ LoopFilterTranspose4x8(input_4x8);
+ uint16_t output_4x8[4][8];
+ for (int i = 0; i < 4; ++i) {
+ vst1q_u16(output_4x8[i], input_4x8[i]);
+ }
+ // a[0]: 03 13 23 33 04 14 24 34 p0q0
+ // a[1]: 02 12 22 32 05 15 25 35 p1q1
+ // a[2]: 01 11 21 31 06 16 26 36 p2q2
+ // a[3]: 00 10 20 30 07 17 27 37 p3q3
+ static constexpr uint16_t expected_output[4][8] = {
+ {0x03, 0x13, 0x23, 0x33, 0x04, 0x14, 0x24, 0x34},
+ {0x02, 0x12, 0x22, 0x32, 0x05, 0x15, 0x25, 0x35},
+ {0x01, 0x11, 0x21, 0x31, 0x06, 0x16, 0x26, 0x36},
+ {0x00, 0x10, 0x20, 0x30, 0x07, 0x17, 0x27, 0x37},
+ };
+ EXPECT_TRUE(test_utils::CompareBlocks(expected_output[0], output_4x8[0], 8, 4,
+ 8, 8, false));
+}
+
+TEST_F(TransposeTestHighBitdepth, Transpose8x8Test) {
+ uint16x8_t input_8x8[8];
+ for (int i = 0; i < 8; ++i) {
+ input_8x8[i] = vld1q_u16(src_block_[i]);
+ }
+ Transpose8x8(input_8x8);
+ uint16_t output_8x8[8][8];
+ for (int i = 0; i < 8; ++i) {
+ vst1q_u16(output_8x8[i], input_8x8[i]);
+ }
+ EXPECT_TRUE(test_utils::CompareBlocks(expected_transpose_[0], output_8x8[0],
+ 8, 8, kMaxBlockWidth, 8, false));
+}
+
+TEST_F(TransposeTestHighBitdepth, Transpose8x8SignedTest) {
+ int16x8_t input_8x8[8];
+ for (int i = 0; i < 8; ++i) {
+ input_8x8[i] = vreinterpretq_s16_u16(vld1q_u16(src_block_[i]));
+ }
+ Transpose8x8(input_8x8);
+ uint16_t output_8x8[8][8];
+ for (int i = 0; i < 8; ++i) {
+ vst1q_u16(output_8x8[i], vreinterpretq_u16_s16(input_8x8[i]));
+ }
+ EXPECT_TRUE(test_utils::CompareBlocks(expected_transpose_[0], output_8x8[0],
+ 8, 8, kMaxBlockWidth, 8, false));
+}
+
+} // namespace
+} // namespace dsp
+} // namespace libgav1
+
+#else // !LIBGAV1_ENABLE_NEON
+
+TEST(CommonDspTest, NEON) {
+ GTEST_SKIP()
+ << "Build this module for Arm with NEON enabled to enable the tests.";
+}
+
+#endif // LIBGAV1_ENABLE_NEON
diff --git a/src/dsp/arm/convolve_10bit_neon.cc b/src/dsp/arm/convolve_10bit_neon.cc
new file mode 100644
index 0000000..b7205df
--- /dev/null
+++ b/src/dsp/arm/convolve_10bit_neon.cc
@@ -0,0 +1,3008 @@
+// Copyright 2021 The libgav1 Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "src/dsp/convolve.h"
+#include "src/utils/cpu.h"
+
+#if LIBGAV1_ENABLE_NEON && LIBGAV1_MAX_BITDEPTH >= 10
+#include <arm_neon.h>
+
+#include <algorithm>
+#include <cassert>
+#include <cstdint>
+
+#include "src/dsp/arm/common_neon.h"
+#include "src/dsp/constants.h"
+#include "src/dsp/dsp.h"
+#include "src/utils/common.h"
+#include "src/utils/compiler_attributes.h"
+#include "src/utils/constants.h"
+
+namespace libgav1 {
+namespace dsp {
+namespace {
+
+// Include the constants and utility functions inside the anonymous namespace.
+#include "src/dsp/convolve.inc"
+
+// Output of ConvolveTest.ShowRange below.
+// Bitdepth: 10 Input range: [ 0, 1023]
+// Horizontal base upscaled range: [ -28644, 94116]
+// Horizontal halved upscaled range: [ -14322, 47085]
+// Horizontal downscaled range: [ -7161, 23529]
+// Vertical upscaled range: [-1317624, 2365176]
+// Pixel output range: [ 0, 1023]
+// Compound output range: [ 3988, 61532]
+
+template <int filter_index>
+int32x4x2_t SumOnePassTaps(const uint16x8_t* const src,
+ const int16x4_t* const taps) {
+ const auto* ssrc = reinterpret_cast<const int16x8_t*>(src);
+ int32x4x2_t sum;
+ if (filter_index < 2) {
+ // 6 taps.
+ sum.val[0] = vmull_s16(vget_low_s16(ssrc[0]), taps[0]);
+ sum.val[0] = vmlal_s16(sum.val[0], vget_low_s16(ssrc[1]), taps[1]);
+ sum.val[0] = vmlal_s16(sum.val[0], vget_low_s16(ssrc[2]), taps[2]);
+ sum.val[0] = vmlal_s16(sum.val[0], vget_low_s16(ssrc[3]), taps[3]);
+ sum.val[0] = vmlal_s16(sum.val[0], vget_low_s16(ssrc[4]), taps[4]);
+ sum.val[0] = vmlal_s16(sum.val[0], vget_low_s16(ssrc[5]), taps[5]);
+
+ sum.val[1] = vmull_s16(vget_high_s16(ssrc[0]), taps[0]);
+ sum.val[1] = vmlal_s16(sum.val[1], vget_high_s16(ssrc[1]), taps[1]);
+ sum.val[1] = vmlal_s16(sum.val[1], vget_high_s16(ssrc[2]), taps[2]);
+ sum.val[1] = vmlal_s16(sum.val[1], vget_high_s16(ssrc[3]), taps[3]);
+ sum.val[1] = vmlal_s16(sum.val[1], vget_high_s16(ssrc[4]), taps[4]);
+ sum.val[1] = vmlal_s16(sum.val[1], vget_high_s16(ssrc[5]), taps[5]);
+ } else if (filter_index == 2) {
+ // 8 taps.
+ sum.val[0] = vmull_s16(vget_low_s16(ssrc[0]), taps[0]);
+ sum.val[0] = vmlal_s16(sum.val[0], vget_low_s16(ssrc[1]), taps[1]);
+ sum.val[0] = vmlal_s16(sum.val[0], vget_low_s16(ssrc[2]), taps[2]);
+ sum.val[0] = vmlal_s16(sum.val[0], vget_low_s16(ssrc[3]), taps[3]);
+ sum.val[0] = vmlal_s16(sum.val[0], vget_low_s16(ssrc[4]), taps[4]);
+ sum.val[0] = vmlal_s16(sum.val[0], vget_low_s16(ssrc[5]), taps[5]);
+ sum.val[0] = vmlal_s16(sum.val[0], vget_low_s16(ssrc[6]), taps[6]);
+ sum.val[0] = vmlal_s16(sum.val[0], vget_low_s16(ssrc[7]), taps[7]);
+
+ sum.val[1] = vmull_s16(vget_high_s16(ssrc[0]), taps[0]);
+ sum.val[1] = vmlal_s16(sum.val[1], vget_high_s16(ssrc[1]), taps[1]);
+ sum.val[1] = vmlal_s16(sum.val[1], vget_high_s16(ssrc[2]), taps[2]);
+ sum.val[1] = vmlal_s16(sum.val[1], vget_high_s16(ssrc[3]), taps[3]);
+ sum.val[1] = vmlal_s16(sum.val[1], vget_high_s16(ssrc[4]), taps[4]);
+ sum.val[1] = vmlal_s16(sum.val[1], vget_high_s16(ssrc[5]), taps[5]);
+ sum.val[1] = vmlal_s16(sum.val[1], vget_high_s16(ssrc[6]), taps[6]);
+ sum.val[1] = vmlal_s16(sum.val[1], vget_high_s16(ssrc[7]), taps[7]);
+ } else if (filter_index == 3) {
+ // 2 taps.
+ sum.val[0] = vmull_s16(vget_low_s16(ssrc[0]), taps[0]);
+ sum.val[0] = vmlal_s16(sum.val[0], vget_low_s16(ssrc[1]), taps[1]);
+
+ sum.val[1] = vmull_s16(vget_high_s16(ssrc[0]), taps[0]);
+ sum.val[1] = vmlal_s16(sum.val[1], vget_high_s16(ssrc[1]), taps[1]);
+ } else {
+ // 4 taps.
+ sum.val[0] = vmull_s16(vget_low_s16(ssrc[0]), taps[0]);
+ sum.val[0] = vmlal_s16(sum.val[0], vget_low_s16(ssrc[1]), taps[1]);
+ sum.val[0] = vmlal_s16(sum.val[0], vget_low_s16(ssrc[2]), taps[2]);
+ sum.val[0] = vmlal_s16(sum.val[0], vget_low_s16(ssrc[3]), taps[3]);
+
+ sum.val[1] = vmull_s16(vget_high_s16(ssrc[0]), taps[0]);
+ sum.val[1] = vmlal_s16(sum.val[1], vget_high_s16(ssrc[1]), taps[1]);
+ sum.val[1] = vmlal_s16(sum.val[1], vget_high_s16(ssrc[2]), taps[2]);
+ sum.val[1] = vmlal_s16(sum.val[1], vget_high_s16(ssrc[3]), taps[3]);
+ }
+ return sum;
+}
+
+template <int filter_index>
+int32x4_t SumOnePassTaps(const uint16x4_t* const src,
+ const int16x4_t* const taps) {
+ const auto* ssrc = reinterpret_cast<const int16x4_t*>(src);
+ int32x4_t sum;
+ if (filter_index < 2) {
+ // 6 taps.
+ sum = vmull_s16(ssrc[0], taps[0]);
+ sum = vmlal_s16(sum, ssrc[1], taps[1]);
+ sum = vmlal_s16(sum, ssrc[2], taps[2]);
+ sum = vmlal_s16(sum, ssrc[3], taps[3]);
+ sum = vmlal_s16(sum, ssrc[4], taps[4]);
+ sum = vmlal_s16(sum, ssrc[5], taps[5]);
+ } else if (filter_index == 2) {
+ // 8 taps.
+ sum = vmull_s16(ssrc[0], taps[0]);
+ sum = vmlal_s16(sum, ssrc[1], taps[1]);
+ sum = vmlal_s16(sum, ssrc[2], taps[2]);
+ sum = vmlal_s16(sum, ssrc[3], taps[3]);
+ sum = vmlal_s16(sum, ssrc[4], taps[4]);
+ sum = vmlal_s16(sum, ssrc[5], taps[5]);
+ sum = vmlal_s16(sum, ssrc[6], taps[6]);
+ sum = vmlal_s16(sum, ssrc[7], taps[7]);
+ } else if (filter_index == 3) {
+ // 2 taps.
+ sum = vmull_s16(ssrc[0], taps[0]);
+ sum = vmlal_s16(sum, ssrc[1], taps[1]);
+ } else {
+ // 4 taps.
+ sum = vmull_s16(ssrc[0], taps[0]);
+ sum = vmlal_s16(sum, ssrc[1], taps[1]);
+ sum = vmlal_s16(sum, ssrc[2], taps[2]);
+ sum = vmlal_s16(sum, ssrc[3], taps[3]);
+ }
+ return sum;
+}
+
+template <int filter_index, bool is_compound, bool is_2d>
+void FilterHorizontalWidth8AndUp(const uint16_t* LIBGAV1_RESTRICT src,
+ const ptrdiff_t src_stride,
+ void* LIBGAV1_RESTRICT const dest,
+ const ptrdiff_t pred_stride, const int width,
+ const int height,
+ const int16x4_t* const v_tap) {
+ auto* dest16 = static_cast<uint16_t*>(dest);
+ const uint16x4_t v_max_bitdepth = vdup_n_u16((1 << kBitdepth10) - 1);
+ if (is_2d) {
+ int x = 0;
+ do {
+ const uint16_t* s = src + x;
+ int y = height;
+ do { // Increasing loop counter x is better.
+ const uint16x8_t src_long = vld1q_u16(s);
+ const uint16x8_t src_long_hi = vld1q_u16(s + 8);
+ uint16x8_t v_src[8];
+ int32x4x2_t v_sum;
+ if (filter_index < 2) {
+ v_src[0] = src_long;
+ v_src[1] = vextq_u16(src_long, src_long_hi, 1);
+ v_src[2] = vextq_u16(src_long, src_long_hi, 2);
+ v_src[3] = vextq_u16(src_long, src_long_hi, 3);
+ v_src[4] = vextq_u16(src_long, src_long_hi, 4);
+ v_src[5] = vextq_u16(src_long, src_long_hi, 5);
+ v_sum = SumOnePassTaps<filter_index>(v_src, v_tap + 1);
+ } else if (filter_index == 2) {
+ v_src[0] = src_long;
+ v_src[1] = vextq_u16(src_long, src_long_hi, 1);
+ v_src[2] = vextq_u16(src_long, src_long_hi, 2);
+ v_src[3] = vextq_u16(src_long, src_long_hi, 3);
+ v_src[4] = vextq_u16(src_long, src_long_hi, 4);
+ v_src[5] = vextq_u16(src_long, src_long_hi, 5);
+ v_src[6] = vextq_u16(src_long, src_long_hi, 6);
+ v_src[7] = vextq_u16(src_long, src_long_hi, 7);
+ v_sum = SumOnePassTaps<filter_index>(v_src, v_tap);
+ } else if (filter_index == 3) {
+ v_src[0] = src_long;
+ v_src[1] = vextq_u16(src_long, src_long_hi, 1);
+ v_sum = SumOnePassTaps<filter_index>(v_src, v_tap + 3);
+ } else { // filter_index > 3
+ v_src[0] = src_long;
+ v_src[1] = vextq_u16(src_long, src_long_hi, 1);
+ v_src[2] = vextq_u16(src_long, src_long_hi, 2);
+ v_src[3] = vextq_u16(src_long, src_long_hi, 3);
+ v_sum = SumOnePassTaps<filter_index>(v_src, v_tap + 2);
+ }
+
+ const int16x4_t d0 =
+ vqrshrn_n_s32(v_sum.val[0], kInterRoundBitsHorizontal - 1);
+ const int16x4_t d1 =
+ vqrshrn_n_s32(v_sum.val[1], kInterRoundBitsHorizontal - 1);
+ vst1_u16(&dest16[0], vreinterpret_u16_s16(d0));
+ vst1_u16(&dest16[4], vreinterpret_u16_s16(d1));
+ s += src_stride;
+ dest16 += 8;
+ } while (--y != 0);
+ x += 8;
+ } while (x < width);
+ return;
+ }
+ int y = height;
+ do {
+ int x = 0;
+ do {
+ const uint16x8_t src_long = vld1q_u16(src + x);
+ const uint16x8_t src_long_hi = vld1q_u16(src + x + 8);
+ uint16x8_t v_src[8];
+ int32x4x2_t v_sum;
+ if (filter_index < 2) {
+ v_src[0] = src_long;
+ v_src[1] = vextq_u16(src_long, src_long_hi, 1);
+ v_src[2] = vextq_u16(src_long, src_long_hi, 2);
+ v_src[3] = vextq_u16(src_long, src_long_hi, 3);
+ v_src[4] = vextq_u16(src_long, src_long_hi, 4);
+ v_src[5] = vextq_u16(src_long, src_long_hi, 5);
+ v_sum = SumOnePassTaps<filter_index>(v_src, v_tap + 1);
+ } else if (filter_index == 2) {
+ v_src[0] = src_long;
+ v_src[1] = vextq_u16(src_long, src_long_hi, 1);
+ v_src[2] = vextq_u16(src_long, src_long_hi, 2);
+ v_src[3] = vextq_u16(src_long, src_long_hi, 3);
+ v_src[4] = vextq_u16(src_long, src_long_hi, 4);
+ v_src[5] = vextq_u16(src_long, src_long_hi, 5);
+ v_src[6] = vextq_u16(src_long, src_long_hi, 6);
+ v_src[7] = vextq_u16(src_long, src_long_hi, 7);
+ v_sum = SumOnePassTaps<filter_index>(v_src, v_tap);
+ } else if (filter_index == 3) {
+ v_src[0] = src_long;
+ v_src[1] = vextq_u16(src_long, src_long_hi, 1);
+ v_sum = SumOnePassTaps<filter_index>(v_src, v_tap + 3);
+ } else { // filter_index > 3
+ v_src[0] = src_long;
+ v_src[1] = vextq_u16(src_long, src_long_hi, 1);
+ v_src[2] = vextq_u16(src_long, src_long_hi, 2);
+ v_src[3] = vextq_u16(src_long, src_long_hi, 3);
+ v_sum = SumOnePassTaps<filter_index>(v_src, v_tap + 2);
+ }
+ if (is_compound) {
+ const int16x4_t v_compound_offset = vdup_n_s16(kCompoundOffset);
+ const int16x4_t d0 =
+ vqrshrn_n_s32(v_sum.val[0], kInterRoundBitsHorizontal - 1);
+ const int16x4_t d1 =
+ vqrshrn_n_s32(v_sum.val[1], kInterRoundBitsHorizontal - 1);
+ vst1_u16(&dest16[x],
+ vreinterpret_u16_s16(vadd_s16(d0, v_compound_offset)));
+ vst1_u16(&dest16[x + 4],
+ vreinterpret_u16_s16(vadd_s16(d1, v_compound_offset)));
+ } else {
+ // Normally the Horizontal pass does the downshift in two passes:
+ // kInterRoundBitsHorizontal - 1 and then (kFilterBits -
+ // kInterRoundBitsHorizontal). Each one uses a rounding shift.
+ // Combining them requires adding the rounding offset from the skipped
+ // shift.
+ const int32x4_t v_first_shift_rounding_bit =
+ vdupq_n_s32(1 << (kInterRoundBitsHorizontal - 2));
+ v_sum.val[0] = vaddq_s32(v_sum.val[0], v_first_shift_rounding_bit);
+ v_sum.val[1] = vaddq_s32(v_sum.val[1], v_first_shift_rounding_bit);
+ const uint16x4_t d0 = vmin_u16(
+ vqrshrun_n_s32(v_sum.val[0], kFilterBits - 1), v_max_bitdepth);
+ const uint16x4_t d1 = vmin_u16(
+ vqrshrun_n_s32(v_sum.val[1], kFilterBits - 1), v_max_bitdepth);
+ vst1_u16(&dest16[x], d0);
+ vst1_u16(&dest16[x + 4], d1);
+ }
+ x += 8;
+ } while (x < width);
+ src += src_stride;
+ dest16 += pred_stride;
+ } while (--y != 0);
+}
+
+template <int filter_index, bool is_compound, bool is_2d>
+void FilterHorizontalWidth4(const uint16_t* LIBGAV1_RESTRICT src,
+ const ptrdiff_t src_stride,
+ void* LIBGAV1_RESTRICT const dest,
+ const ptrdiff_t pred_stride, const int height,
+ const int16x4_t* const v_tap) {
+ auto* dest16 = static_cast<uint16_t*>(dest);
+ const uint16x4_t v_max_bitdepth = vdup_n_u16((1 << kBitdepth10) - 1);
+ int y = height;
+ do {
+ const uint16x8_t v_zero = vdupq_n_u16(0);
+ uint16x4_t v_src[4];
+ int32x4_t v_sum;
+ const uint16x8_t src_long = vld1q_u16(src);
+ v_src[0] = vget_low_u16(src_long);
+ if (filter_index == 3) {
+ v_src[1] = vget_low_u16(vextq_u16(src_long, v_zero, 1));
+ v_sum = SumOnePassTaps<filter_index>(v_src, v_tap + 3);
+ } else {
+ v_src[1] = vget_low_u16(vextq_u16(src_long, v_zero, 1));
+ v_src[2] = vget_low_u16(vextq_u16(src_long, v_zero, 2));
+ v_src[3] = vget_low_u16(vextq_u16(src_long, v_zero, 3));
+ v_sum = SumOnePassTaps<filter_index>(v_src, v_tap + 2);
+ }
+ if (is_compound || is_2d) {
+ const int16x4_t d0 = vqrshrn_n_s32(v_sum, kInterRoundBitsHorizontal - 1);
+ if (is_compound && !is_2d) {
+ vst1_u16(&dest16[0], vreinterpret_u16_s16(
+ vadd_s16(d0, vdup_n_s16(kCompoundOffset))));
+ } else {
+ vst1_u16(&dest16[0], vreinterpret_u16_s16(d0));
+ }
+ } else {
+ const int32x4_t v_first_shift_rounding_bit =
+ vdupq_n_s32(1 << (kInterRoundBitsHorizontal - 2));
+ v_sum = vaddq_s32(v_sum, v_first_shift_rounding_bit);
+ const uint16x4_t d0 =
+ vmin_u16(vqrshrun_n_s32(v_sum, kFilterBits - 1), v_max_bitdepth);
+ vst1_u16(&dest16[0], d0);
+ }
+ src += src_stride;
+ dest16 += pred_stride;
+ } while (--y != 0);
+}
+
+template <int filter_index, bool is_2d>
+void FilterHorizontalWidth2(const uint16_t* LIBGAV1_RESTRICT src,
+ const ptrdiff_t src_stride,
+ void* LIBGAV1_RESTRICT const dest,
+ const ptrdiff_t pred_stride, const int height,
+ const int16x4_t* const v_tap) {
+ auto* dest16 = static_cast<uint16_t*>(dest);
+ const uint16x4_t v_max_bitdepth = vdup_n_u16((1 << kBitdepth10) - 1);
+ int y = height >> 1;
+ do {
+ const int16x8_t v_zero = vdupq_n_s16(0);
+ const int16x8_t input0 = vreinterpretq_s16_u16(vld1q_u16(src));
+ const int16x8_t input1 = vreinterpretq_s16_u16(vld1q_u16(src + src_stride));
+ const int16x8x2_t input = vzipq_s16(input0, input1);
+ int32x4_t v_sum;
+ if (filter_index == 3) {
+ v_sum = vmull_s16(vget_low_s16(input.val[0]), v_tap[3]);
+ v_sum = vmlal_s16(v_sum,
+ vget_low_s16(vextq_s16(input.val[0], input.val[1], 2)),
+ v_tap[4]);
+ } else {
+ v_sum = vmull_s16(vget_low_s16(input.val[0]), v_tap[2]);
+ v_sum = vmlal_s16(v_sum, vget_low_s16(vextq_s16(input.val[0], v_zero, 2)),
+ v_tap[3]);
+ v_sum = vmlal_s16(v_sum, vget_low_s16(vextq_s16(input.val[0], v_zero, 4)),
+ v_tap[4]);
+ v_sum = vmlal_s16(v_sum,
+ vget_low_s16(vextq_s16(input.val[0], input.val[1], 6)),
+ v_tap[5]);
+ }
+ if (is_2d) {
+ const uint16x4_t d0 = vreinterpret_u16_s16(
+ vqrshrn_n_s32(v_sum, kInterRoundBitsHorizontal - 1));
+ dest16[0] = vget_lane_u16(d0, 0);
+ dest16[1] = vget_lane_u16(d0, 2);
+ dest16 += pred_stride;
+ dest16[0] = vget_lane_u16(d0, 1);
+ dest16[1] = vget_lane_u16(d0, 3);
+ dest16 += pred_stride;
+ } else {
+ // Normally the Horizontal pass does the downshift in two passes:
+ // kInterRoundBitsHorizontal - 1 and then (kFilterBits -
+ // kInterRoundBitsHorizontal). Each one uses a rounding shift.
+ // Combining them requires adding the rounding offset from the skipped
+ // shift.
+ const int32x4_t v_first_shift_rounding_bit =
+ vdupq_n_s32(1 << (kInterRoundBitsHorizontal - 2));
+ v_sum = vaddq_s32(v_sum, v_first_shift_rounding_bit);
+ const uint16x4_t d0 =
+ vmin_u16(vqrshrun_n_s32(v_sum, kFilterBits - 1), v_max_bitdepth);
+ dest16[0] = vget_lane_u16(d0, 0);
+ dest16[1] = vget_lane_u16(d0, 2);
+ dest16 += pred_stride;
+ dest16[0] = vget_lane_u16(d0, 1);
+ dest16[1] = vget_lane_u16(d0, 3);
+ dest16 += pred_stride;
+ }
+ src += src_stride << 1;
+ } while (--y != 0);
+
+ // The 2d filters have an odd |height| because the horizontal pass
+ // generates context for the vertical pass.
+ if (is_2d) {
+ assert(height % 2 == 1);
+ const int16x8_t input = vreinterpretq_s16_u16(vld1q_u16(src));
+ int32x4_t v_sum;
+ if (filter_index == 3) {
+ v_sum = vmull_s16(vget_low_s16(input), v_tap[3]);
+ v_sum =
+ vmlal_s16(v_sum, vget_low_s16(vextq_s16(input, input, 1)), v_tap[4]);
+ } else {
+ v_sum = vmull_s16(vget_low_s16(input), v_tap[2]);
+ v_sum =
+ vmlal_s16(v_sum, vget_low_s16(vextq_s16(input, input, 1)), v_tap[3]);
+ v_sum =
+ vmlal_s16(v_sum, vget_low_s16(vextq_s16(input, input, 2)), v_tap[4]);
+ v_sum =
+ vmlal_s16(v_sum, vget_low_s16(vextq_s16(input, input, 3)), v_tap[5]);
+ }
+ const uint16x4_t d0 = vreinterpret_u16_s16(
+ vqrshrn_n_s32(v_sum, kInterRoundBitsHorizontal - 1));
+ Store2<0>(dest16, d0);
+ }
+}
+
+template <int filter_index, bool is_compound, bool is_2d>
+void FilterHorizontal(const uint16_t* LIBGAV1_RESTRICT const src,
+ const ptrdiff_t src_stride,
+ void* LIBGAV1_RESTRICT const dest,
+ const ptrdiff_t pred_stride, const int width,
+ const int height, const int16x4_t* const v_tap) {
+ assert(width < 8 || filter_index <= 3);
+ // Don't simplify the redundant if conditions with the template parameters,
+ // which helps the compiler generate compact code.
+ if (width >= 8 && filter_index <= 3) {
+ FilterHorizontalWidth8AndUp<filter_index, is_compound, is_2d>(
+ src, src_stride, dest, pred_stride, width, height, v_tap);
+ return;
+ }
+
+ // Horizontal passes only needs to account for number of taps 2 and 4 when
+ // |width| <= 4.
+ assert(width <= 4);
+ assert(filter_index >= 3 && filter_index <= 5);
+ if (filter_index >= 3 && filter_index <= 5) {
+ if (width == 4) {
+ FilterHorizontalWidth4<filter_index, is_compound, is_2d>(
+ src, src_stride, dest, pred_stride, height, v_tap);
+ return;
+ }
+ assert(width == 2);
+ if (!is_compound) {
+ FilterHorizontalWidth2<filter_index, is_2d>(src, src_stride, dest,
+ pred_stride, height, v_tap);
+ }
+ }
+}
+
+template <bool is_compound = false, bool is_2d = false>
+LIBGAV1_ALWAYS_INLINE void DoHorizontalPass(
+ const uint16_t* LIBGAV1_RESTRICT const src, const ptrdiff_t src_stride,
+ void* LIBGAV1_RESTRICT const dst, const ptrdiff_t dst_stride,
+ const int width, const int height, const int filter_id,
+ const int filter_index) {
+ // Duplicate the absolute value for each tap. Negative taps are corrected
+ // by using the vmlsl_u8 instruction. Positive taps use vmlal_u8.
+ int16x4_t v_tap[kSubPixelTaps];
+ assert(filter_id != 0);
+
+ for (int k = 0; k < kSubPixelTaps; ++k) {
+ v_tap[k] = vdup_n_s16(kHalfSubPixelFilters[filter_index][filter_id][k]);
+ }
+
+ if (filter_index == 2) { // 8 tap.
+ FilterHorizontal<2, is_compound, is_2d>(src, src_stride, dst, dst_stride,
+ width, height, v_tap);
+ } else if (filter_index == 1) { // 6 tap.
+ FilterHorizontal<1, is_compound, is_2d>(src + 1, src_stride, dst,
+ dst_stride, width, height, v_tap);
+ } else if (filter_index == 0) { // 6 tap.
+ FilterHorizontal<0, is_compound, is_2d>(src + 1, src_stride, dst,
+ dst_stride, width, height, v_tap);
+ } else if (filter_index == 4) { // 4 tap.
+ FilterHorizontal<4, is_compound, is_2d>(src + 2, src_stride, dst,
+ dst_stride, width, height, v_tap);
+ } else if (filter_index == 5) { // 4 tap.
+ FilterHorizontal<5, is_compound, is_2d>(src + 2, src_stride, dst,
+ dst_stride, width, height, v_tap);
+ } else { // 2 tap.
+ FilterHorizontal<3, is_compound, is_2d>(src + 3, src_stride, dst,
+ dst_stride, width, height, v_tap);
+ }
+}
+
+void ConvolveHorizontal_NEON(
+ const void* LIBGAV1_RESTRICT const reference,
+ const ptrdiff_t reference_stride, const int horizontal_filter_index,
+ const int /*vertical_filter_index*/, const int horizontal_filter_id,
+ const int /*vertical_filter_id*/, const int width, const int height,
+ void* LIBGAV1_RESTRICT const prediction, const ptrdiff_t pred_stride) {
+ const int filter_index = GetFilterIndex(horizontal_filter_index, width);
+ // Set |src| to the outermost tap.
+ const auto* const src =
+ static_cast<const uint16_t*>(reference) - kHorizontalOffset;
+ auto* const dest = static_cast<uint16_t*>(prediction);
+ const ptrdiff_t src_stride = reference_stride >> 1;
+ const ptrdiff_t dst_stride = pred_stride >> 1;
+
+ DoHorizontalPass(src, src_stride, dest, dst_stride, width, height,
+ horizontal_filter_id, filter_index);
+}
+
+void ConvolveCompoundHorizontal_NEON(
+ const void* LIBGAV1_RESTRICT const reference,
+ const ptrdiff_t reference_stride, const int horizontal_filter_index,
+ const int /*vertical_filter_index*/, const int horizontal_filter_id,
+ const int /*vertical_filter_id*/, const int width, const int height,
+ void* LIBGAV1_RESTRICT const prediction, const ptrdiff_t /*pred_stride*/) {
+ const int filter_index = GetFilterIndex(horizontal_filter_index, width);
+ const auto* const src =
+ static_cast<const uint16_t*>(reference) - kHorizontalOffset;
+ auto* const dest = static_cast<uint16_t*>(prediction);
+ const ptrdiff_t src_stride = reference_stride >> 1;
+
+ DoHorizontalPass</*is_compound=*/true>(src, src_stride, dest, width, width,
+ height, horizontal_filter_id,
+ filter_index);
+}
+
+template <int filter_index, bool is_compound = false>
+void FilterVertical(const uint16_t* LIBGAV1_RESTRICT const src,
+ const ptrdiff_t src_stride,
+ void* LIBGAV1_RESTRICT const dst,
+ const ptrdiff_t dst_stride, const int width,
+ const int height, const int16x4_t* const taps) {
+ const int num_taps = GetNumTapsInFilter(filter_index);
+ const int next_row = num_taps - 1;
+ const uint16x4_t v_max_bitdepth = vdup_n_u16((1 << kBitdepth10) - 1);
+ auto* const dst16 = static_cast<uint16_t*>(dst);
+ assert(width >= 8);
+
+ int x = 0;
+ do {
+ const uint16_t* src_x = src + x;
+ uint16x8_t srcs[8];
+ srcs[0] = vld1q_u16(src_x);
+ src_x += src_stride;
+ if (num_taps >= 4) {
+ srcs[1] = vld1q_u16(src_x);
+ src_x += src_stride;
+ srcs[2] = vld1q_u16(src_x);
+ src_x += src_stride;
+ if (num_taps >= 6) {
+ srcs[3] = vld1q_u16(src_x);
+ src_x += src_stride;
+ srcs[4] = vld1q_u16(src_x);
+ src_x += src_stride;
+ if (num_taps == 8) {
+ srcs[5] = vld1q_u16(src_x);
+ src_x += src_stride;
+ srcs[6] = vld1q_u16(src_x);
+ src_x += src_stride;
+ }
+ }
+ }
+
+ // Decreasing the y loop counter produces worse code with clang.
+ // Don't unroll this loop since it generates too much code and the decoder
+ // is even slower.
+ int y = 0;
+ do {
+ srcs[next_row] = vld1q_u16(src_x);
+ src_x += src_stride;
+
+ const int32x4x2_t v_sum = SumOnePassTaps<filter_index>(srcs, taps);
+ if (is_compound) {
+ const int16x4_t v_compound_offset = vdup_n_s16(kCompoundOffset);
+ const int16x4_t d0 =
+ vqrshrn_n_s32(v_sum.val[0], kInterRoundBitsHorizontal - 1);
+ const int16x4_t d1 =
+ vqrshrn_n_s32(v_sum.val[1], kInterRoundBitsHorizontal - 1);
+ vst1_u16(dst16 + x + y * dst_stride,
+ vreinterpret_u16_s16(vadd_s16(d0, v_compound_offset)));
+ vst1_u16(dst16 + x + 4 + y * dst_stride,
+ vreinterpret_u16_s16(vadd_s16(d1, v_compound_offset)));
+ } else {
+ const uint16x4_t d0 = vmin_u16(
+ vqrshrun_n_s32(v_sum.val[0], kFilterBits - 1), v_max_bitdepth);
+ const uint16x4_t d1 = vmin_u16(
+ vqrshrun_n_s32(v_sum.val[1], kFilterBits - 1), v_max_bitdepth);
+ vst1_u16(dst16 + x + y * dst_stride, d0);
+ vst1_u16(dst16 + x + 4 + y * dst_stride, d1);
+ }
+
+ srcs[0] = srcs[1];
+ if (num_taps >= 4) {
+ srcs[1] = srcs[2];
+ srcs[2] = srcs[3];
+ if (num_taps >= 6) {
+ srcs[3] = srcs[4];
+ srcs[4] = srcs[5];
+ if (num_taps == 8) {
+ srcs[5] = srcs[6];
+ srcs[6] = srcs[7];
+ }
+ }
+ }
+ } while (++y < height);
+ x += 8;
+ } while (x < width);
+}
+
+template <int filter_index, bool is_compound = false>
+void FilterVertical4xH(const uint16_t* LIBGAV1_RESTRICT src,
+ const ptrdiff_t src_stride,
+ void* LIBGAV1_RESTRICT const dst,
+ const ptrdiff_t dst_stride, const int height,
+ const int16x4_t* const taps) {
+ const int num_taps = GetNumTapsInFilter(filter_index);
+ const int next_row = num_taps - 1;
+ const uint16x4_t v_max_bitdepth = vdup_n_u16((1 << kBitdepth10) - 1);
+ auto* dst16 = static_cast<uint16_t*>(dst);
+
+ uint16x4_t srcs[9];
+ srcs[0] = vld1_u16(src);
+ src += src_stride;
+ if (num_taps >= 4) {
+ srcs[1] = vld1_u16(src);
+ src += src_stride;
+ srcs[2] = vld1_u16(src);
+ src += src_stride;
+ if (num_taps >= 6) {
+ srcs[3] = vld1_u16(src);
+ src += src_stride;
+ srcs[4] = vld1_u16(src);
+ src += src_stride;
+ if (num_taps == 8) {
+ srcs[5] = vld1_u16(src);
+ src += src_stride;
+ srcs[6] = vld1_u16(src);
+ src += src_stride;
+ }
+ }
+ }
+
+ int y = height;
+ do {
+ srcs[next_row] = vld1_u16(src);
+ src += src_stride;
+ srcs[num_taps] = vld1_u16(src);
+ src += src_stride;
+
+ const int32x4_t v_sum = SumOnePassTaps<filter_index>(srcs, taps);
+ const int32x4_t v_sum_1 = SumOnePassTaps<filter_index>(srcs + 1, taps);
+ if (is_compound) {
+ const int16x4_t d0 = vqrshrn_n_s32(v_sum, kInterRoundBitsHorizontal - 1);
+ const int16x4_t d1 =
+ vqrshrn_n_s32(v_sum_1, kInterRoundBitsHorizontal - 1);
+ vst1_u16(dst16,
+ vreinterpret_u16_s16(vadd_s16(d0, vdup_n_s16(kCompoundOffset))));
+ dst16 += dst_stride;
+ vst1_u16(dst16,
+ vreinterpret_u16_s16(vadd_s16(d1, vdup_n_s16(kCompoundOffset))));
+ dst16 += dst_stride;
+ } else {
+ const uint16x4_t d0 =
+ vmin_u16(vqrshrun_n_s32(v_sum, kFilterBits - 1), v_max_bitdepth);
+ const uint16x4_t d1 =
+ vmin_u16(vqrshrun_n_s32(v_sum_1, kFilterBits - 1), v_max_bitdepth);
+ vst1_u16(dst16, d0);
+ dst16 += dst_stride;
+ vst1_u16(dst16, d1);
+ dst16 += dst_stride;
+ }
+
+ srcs[0] = srcs[2];
+ if (num_taps >= 4) {
+ srcs[1] = srcs[3];
+ srcs[2] = srcs[4];
+ if (num_taps >= 6) {
+ srcs[3] = srcs[5];
+ srcs[4] = srcs[6];
+ if (num_taps == 8) {
+ srcs[5] = srcs[7];
+ srcs[6] = srcs[8];
+ }
+ }
+ }
+ y -= 2;
+ } while (y != 0);
+}
+
+template <int filter_index>
+void FilterVertical2xH(const uint16_t* LIBGAV1_RESTRICT src,
+ const ptrdiff_t src_stride,
+ void* LIBGAV1_RESTRICT const dst,
+ const ptrdiff_t dst_stride, const int height,
+ const int16x4_t* const taps) {
+ const int num_taps = GetNumTapsInFilter(filter_index);
+ const int next_row = num_taps - 1;
+ const uint16x4_t v_max_bitdepth = vdup_n_u16((1 << kBitdepth10) - 1);
+ auto* dst16 = static_cast<uint16_t*>(dst);
+ const uint16x4_t v_zero = vdup_n_u16(0);
+
+ uint16x4_t srcs[9];
+ srcs[0] = Load2<0>(src, v_zero);
+ src += src_stride;
+ if (num_taps >= 4) {
+ srcs[0] = Load2<1>(src, srcs[0]);
+ src += src_stride;
+ srcs[2] = Load2<0>(src, v_zero);
+ src += src_stride;
+ srcs[1] = vext_u16(srcs[0], srcs[2], 2);
+ if (num_taps >= 6) {
+ srcs[2] = Load2<1>(src, srcs[2]);
+ src += src_stride;
+ srcs[4] = Load2<0>(src, v_zero);
+ src += src_stride;
+ srcs[3] = vext_u16(srcs[2], srcs[4], 2);
+ if (num_taps == 8) {
+ srcs[4] = Load2<1>(src, srcs[4]);
+ src += src_stride;
+ srcs[6] = Load2<0>(src, v_zero);
+ src += src_stride;
+ srcs[5] = vext_u16(srcs[4], srcs[6], 2);
+ }
+ }
+ }
+
+ int y = height;
+ do {
+ srcs[next_row - 1] = Load2<1>(src, srcs[next_row - 1]);
+ src += src_stride;
+ srcs[num_taps] = Load2<0>(src, v_zero);
+ src += src_stride;
+ srcs[next_row] = vext_u16(srcs[next_row - 1], srcs[num_taps], 2);
+
+ const int32x4_t v_sum = SumOnePassTaps<filter_index>(srcs, taps);
+ const uint16x4_t d0 =
+ vmin_u16(vqrshrun_n_s32(v_sum, kFilterBits - 1), v_max_bitdepth);
+ Store2<0>(dst16, d0);
+ dst16 += dst_stride;
+ Store2<1>(dst16, d0);
+ dst16 += dst_stride;
+
+ srcs[0] = srcs[2];
+ if (num_taps >= 4) {
+ srcs[1] = srcs[3];
+ srcs[2] = srcs[4];
+ if (num_taps >= 6) {
+ srcs[3] = srcs[5];
+ srcs[4] = srcs[6];
+ if (num_taps == 8) {
+ srcs[5] = srcs[7];
+ srcs[6] = srcs[8];
+ }
+ }
+ }
+ y -= 2;
+ } while (y != 0);
+}
+
+template <int num_taps, bool is_compound>
+int16x8_t SimpleSum2DVerticalTaps(const int16x8_t* const src,
+ const int16x8_t taps) {
+ const int16x4_t taps_lo = vget_low_s16(taps);
+ const int16x4_t taps_hi = vget_high_s16(taps);
+ int32x4_t sum_lo, sum_hi;
+ if (num_taps == 8) {
+ sum_lo = vmull_lane_s16(vget_low_s16(src[0]), taps_lo, 0);
+ sum_hi = vmull_lane_s16(vget_high_s16(src[0]), taps_lo, 0);
+ sum_lo = vmlal_lane_s16(sum_lo, vget_low_s16(src[1]), taps_lo, 1);
+ sum_hi = vmlal_lane_s16(sum_hi, vget_high_s16(src[1]), taps_lo, 1);
+ sum_lo = vmlal_lane_s16(sum_lo, vget_low_s16(src[2]), taps_lo, 2);
+ sum_hi = vmlal_lane_s16(sum_hi, vget_high_s16(src[2]), taps_lo, 2);
+ sum_lo = vmlal_lane_s16(sum_lo, vget_low_s16(src[3]), taps_lo, 3);
+ sum_hi = vmlal_lane_s16(sum_hi, vget_high_s16(src[3]), taps_lo, 3);
+
+ sum_lo = vmlal_lane_s16(sum_lo, vget_low_s16(src[4]), taps_hi, 0);
+ sum_hi = vmlal_lane_s16(sum_hi, vget_high_s16(src[4]), taps_hi, 0);
+ sum_lo = vmlal_lane_s16(sum_lo, vget_low_s16(src[5]), taps_hi, 1);
+ sum_hi = vmlal_lane_s16(sum_hi, vget_high_s16(src[5]), taps_hi, 1);
+ sum_lo = vmlal_lane_s16(sum_lo, vget_low_s16(src[6]), taps_hi, 2);
+ sum_hi = vmlal_lane_s16(sum_hi, vget_high_s16(src[6]), taps_hi, 2);
+ sum_lo = vmlal_lane_s16(sum_lo, vget_low_s16(src[7]), taps_hi, 3);
+ sum_hi = vmlal_lane_s16(sum_hi, vget_high_s16(src[7]), taps_hi, 3);
+ } else if (num_taps == 6) {
+ sum_lo = vmull_lane_s16(vget_low_s16(src[0]), taps_lo, 1);
+ sum_hi = vmull_lane_s16(vget_high_s16(src[0]), taps_lo, 1);
+ sum_lo = vmlal_lane_s16(sum_lo, vget_low_s16(src[1]), taps_lo, 2);
+ sum_hi = vmlal_lane_s16(sum_hi, vget_high_s16(src[1]), taps_lo, 2);
+ sum_lo = vmlal_lane_s16(sum_lo, vget_low_s16(src[2]), taps_lo, 3);
+ sum_hi = vmlal_lane_s16(sum_hi, vget_high_s16(src[2]), taps_lo, 3);
+
+ sum_lo = vmlal_lane_s16(sum_lo, vget_low_s16(src[3]), taps_hi, 0);
+ sum_hi = vmlal_lane_s16(sum_hi, vget_high_s16(src[3]), taps_hi, 0);
+ sum_lo = vmlal_lane_s16(sum_lo, vget_low_s16(src[4]), taps_hi, 1);
+ sum_hi = vmlal_lane_s16(sum_hi, vget_high_s16(src[4]), taps_hi, 1);
+ sum_lo = vmlal_lane_s16(sum_lo, vget_low_s16(src[5]), taps_hi, 2);
+ sum_hi = vmlal_lane_s16(sum_hi, vget_high_s16(src[5]), taps_hi, 2);
+ } else if (num_taps == 4) {
+ sum_lo = vmull_lane_s16(vget_low_s16(src[0]), taps_lo, 2);
+ sum_hi = vmull_lane_s16(vget_high_s16(src[0]), taps_lo, 2);
+ sum_lo = vmlal_lane_s16(sum_lo, vget_low_s16(src[1]), taps_lo, 3);
+ sum_hi = vmlal_lane_s16(sum_hi, vget_high_s16(src[1]), taps_lo, 3);
+
+ sum_lo = vmlal_lane_s16(sum_lo, vget_low_s16(src[2]), taps_hi, 0);
+ sum_hi = vmlal_lane_s16(sum_hi, vget_high_s16(src[2]), taps_hi, 0);
+ sum_lo = vmlal_lane_s16(sum_lo, vget_low_s16(src[3]), taps_hi, 1);
+ sum_hi = vmlal_lane_s16(sum_hi, vget_high_s16(src[3]), taps_hi, 1);
+ } else if (num_taps == 2) {
+ sum_lo = vmull_lane_s16(vget_low_s16(src[0]), taps_lo, 3);
+ sum_hi = vmull_lane_s16(vget_high_s16(src[0]), taps_lo, 3);
+
+ sum_lo = vmlal_lane_s16(sum_lo, vget_low_s16(src[1]), taps_hi, 0);
+ sum_hi = vmlal_lane_s16(sum_hi, vget_high_s16(src[1]), taps_hi, 0);
+ }
+
+ if (is_compound) {
+ // Output is compound, so leave signed and do not saturate. Offset will
+ // accurately bring the value back into positive range.
+ return vcombine_s16(
+ vrshrn_n_s32(sum_lo, kInterRoundBitsCompoundVertical - 1),
+ vrshrn_n_s32(sum_hi, kInterRoundBitsCompoundVertical - 1));
+ }
+
+ // Output is pixel, so saturate to clip at 0.
+ return vreinterpretq_s16_u16(
+ vcombine_u16(vqrshrun_n_s32(sum_lo, kInterRoundBitsVertical - 1),
+ vqrshrun_n_s32(sum_hi, kInterRoundBitsVertical - 1)));
+}
+
+template <int num_taps, bool is_compound = false>
+void Filter2DVerticalWidth8AndUp(const int16_t* LIBGAV1_RESTRICT src,
+ void* LIBGAV1_RESTRICT const dst,
+ const ptrdiff_t dst_stride, const int width,
+ const int height, const int16x8_t taps) {
+ assert(width >= 8);
+ constexpr int next_row = num_taps - 1;
+ const uint16x8_t v_max_bitdepth = vdupq_n_u16((1 << kBitdepth10) - 1);
+ auto* const dst16 = static_cast<uint16_t*>(dst);
+
+ int x = 0;
+ do {
+ int16x8_t srcs[9];
+ srcs[0] = vld1q_s16(src);
+ src += 8;
+ if (num_taps >= 4) {
+ srcs[1] = vld1q_s16(src);
+ src += 8;
+ srcs[2] = vld1q_s16(src);
+ src += 8;
+ if (num_taps >= 6) {
+ srcs[3] = vld1q_s16(src);
+ src += 8;
+ srcs[4] = vld1q_s16(src);
+ src += 8;
+ if (num_taps == 8) {
+ srcs[5] = vld1q_s16(src);
+ src += 8;
+ srcs[6] = vld1q_s16(src);
+ src += 8;
+ }
+ }
+ }
+
+ uint16_t* d16 = dst16 + x;
+ int y = height;
+ do {
+ srcs[next_row] = vld1q_s16(src);
+ src += 8;
+ srcs[next_row + 1] = vld1q_s16(src);
+ src += 8;
+ const int16x8_t sum0 =
+ SimpleSum2DVerticalTaps<num_taps, is_compound>(srcs + 0, taps);
+ const int16x8_t sum1 =
+ SimpleSum2DVerticalTaps<num_taps, is_compound>(srcs + 1, taps);
+ if (is_compound) {
+ const int16x8_t v_compound_offset = vdupq_n_s16(kCompoundOffset);
+ vst1q_u16(d16,
+ vreinterpretq_u16_s16(vaddq_s16(sum0, v_compound_offset)));
+ d16 += dst_stride;
+ vst1q_u16(d16,
+ vreinterpretq_u16_s16(vaddq_s16(sum1, v_compound_offset)));
+ d16 += dst_stride;
+ } else {
+ vst1q_u16(d16, vminq_u16(vreinterpretq_u16_s16(sum0), v_max_bitdepth));
+ d16 += dst_stride;
+ vst1q_u16(d16, vminq_u16(vreinterpretq_u16_s16(sum1), v_max_bitdepth));
+ d16 += dst_stride;
+ }
+ srcs[0] = srcs[2];
+ if (num_taps >= 4) {
+ srcs[1] = srcs[3];
+ srcs[2] = srcs[4];
+ if (num_taps >= 6) {
+ srcs[3] = srcs[5];
+ srcs[4] = srcs[6];
+ if (num_taps == 8) {
+ srcs[5] = srcs[7];
+ srcs[6] = srcs[8];
+ }
+ }
+ }
+ y -= 2;
+ } while (y != 0);
+ x += 8;
+ } while (x < width);
+}
+
+// Take advantage of |src_stride| == |width| to process two rows at a time.
+template <int num_taps, bool is_compound = false>
+void Filter2DVerticalWidth4(const int16_t* LIBGAV1_RESTRICT src,
+ void* LIBGAV1_RESTRICT const dst,
+ const ptrdiff_t dst_stride, const int height,
+ const int16x8_t taps) {
+ const uint16x8_t v_max_bitdepth = vdupq_n_u16((1 << kBitdepth10) - 1);
+ auto* dst16 = static_cast<uint16_t*>(dst);
+
+ int16x8_t srcs[9];
+ srcs[0] = vld1q_s16(src);
+ src += 8;
+ if (num_taps >= 4) {
+ srcs[2] = vld1q_s16(src);
+ src += 8;
+ srcs[1] = vcombine_s16(vget_high_s16(srcs[0]), vget_low_s16(srcs[2]));
+ if (num_taps >= 6) {
+ srcs[4] = vld1q_s16(src);
+ src += 8;
+ srcs[3] = vcombine_s16(vget_high_s16(srcs[2]), vget_low_s16(srcs[4]));
+ if (num_taps == 8) {
+ srcs[6] = vld1q_s16(src);
+ src += 8;
+ srcs[5] = vcombine_s16(vget_high_s16(srcs[4]), vget_low_s16(srcs[6]));
+ }
+ }
+ }
+
+ int y = height;
+ do {
+ srcs[num_taps] = vld1q_s16(src);
+ src += 8;
+ srcs[num_taps - 1] = vcombine_s16(vget_high_s16(srcs[num_taps - 2]),
+ vget_low_s16(srcs[num_taps]));
+
+ const int16x8_t sum =
+ SimpleSum2DVerticalTaps<num_taps, is_compound>(srcs, taps);
+ if (is_compound) {
+ const int16x8_t v_compound_offset = vdupq_n_s16(kCompoundOffset);
+ vst1q_u16(dst16,
+ vreinterpretq_u16_s16(vaddq_s16(sum, v_compound_offset)));
+ dst16 += 4 << 1;
+ } else {
+ const uint16x8_t d0 =
+ vminq_u16(vreinterpretq_u16_s16(sum), v_max_bitdepth);
+ vst1_u16(dst16, vget_low_u16(d0));
+ dst16 += dst_stride;
+ vst1_u16(dst16, vget_high_u16(d0));
+ dst16 += dst_stride;
+ }
+
+ srcs[0] = srcs[2];
+ if (num_taps >= 4) {
+ srcs[1] = srcs[3];
+ srcs[2] = srcs[4];
+ if (num_taps >= 6) {
+ srcs[3] = srcs[5];
+ srcs[4] = srcs[6];
+ if (num_taps == 8) {
+ srcs[5] = srcs[7];
+ srcs[6] = srcs[8];
+ }
+ }
+ }
+ y -= 2;
+ } while (y != 0);
+}
+
+// Take advantage of |src_stride| == |width| to process four rows at a time.
+template <int num_taps>
+void Filter2DVerticalWidth2(const int16_t* LIBGAV1_RESTRICT src,
+ void* LIBGAV1_RESTRICT const dst,
+ const ptrdiff_t dst_stride, const int height,
+ const int16x8_t taps) {
+ constexpr int next_row = (num_taps < 6) ? 4 : 8;
+ const uint16x8_t v_max_bitdepth = vdupq_n_u16((1 << kBitdepth10) - 1);
+ auto* dst16 = static_cast<uint16_t*>(dst);
+
+ int16x8_t srcs[9];
+ srcs[0] = vld1q_s16(src);
+ src += 8;
+ if (num_taps >= 6) {
+ srcs[4] = vld1q_s16(src);
+ src += 8;
+ srcs[1] = vextq_s16(srcs[0], srcs[4], 2);
+ if (num_taps == 8) {
+ srcs[2] = vcombine_s16(vget_high_s16(srcs[0]), vget_low_s16(srcs[4]));
+ srcs[3] = vextq_s16(srcs[0], srcs[4], 6);
+ }
+ }
+
+ int y = height;
+ do {
+ srcs[next_row] = vld1q_s16(src);
+ src += 8;
+ if (num_taps == 2) {
+ srcs[1] = vextq_s16(srcs[0], srcs[4], 2);
+ } else if (num_taps == 4) {
+ srcs[1] = vextq_s16(srcs[0], srcs[4], 2);
+ srcs[2] = vcombine_s16(vget_high_s16(srcs[0]), vget_low_s16(srcs[4]));
+ srcs[3] = vextq_s16(srcs[0], srcs[4], 6);
+ } else if (num_taps == 6) {
+ srcs[2] = vcombine_s16(vget_high_s16(srcs[0]), vget_low_s16(srcs[4]));
+ srcs[3] = vextq_s16(srcs[0], srcs[4], 6);
+ srcs[5] = vextq_s16(srcs[4], srcs[8], 2);
+ } else if (num_taps == 8) {
+ srcs[5] = vextq_s16(srcs[4], srcs[8], 2);
+ srcs[6] = vcombine_s16(vget_high_s16(srcs[4]), vget_low_s16(srcs[8]));
+ srcs[7] = vextq_s16(srcs[4], srcs[8], 6);
+ }
+ const int16x8_t sum =
+ SimpleSum2DVerticalTaps<num_taps, /*is_compound=*/false>(srcs, taps);
+ const uint16x8_t d0 = vminq_u16(vreinterpretq_u16_s16(sum), v_max_bitdepth);
+ Store2<0>(dst16, d0);
+ dst16 += dst_stride;
+ Store2<1>(dst16, d0);
+ // When |height| <= 4 the taps are restricted to 2 and 4 tap variants.
+ // Therefore we don't need to check this condition when |height| > 4.
+ if (num_taps <= 4 && height == 2) return;
+ dst16 += dst_stride;
+ Store2<2>(dst16, d0);
+ dst16 += dst_stride;
+ Store2<3>(dst16, d0);
+ dst16 += dst_stride;
+
+ srcs[0] = srcs[4];
+ if (num_taps == 6) {
+ srcs[1] = srcs[5];
+ srcs[4] = srcs[8];
+ } else if (num_taps == 8) {
+ srcs[1] = srcs[5];
+ srcs[2] = srcs[6];
+ srcs[3] = srcs[7];
+ srcs[4] = srcs[8];
+ }
+
+ y -= 4;
+ } while (y != 0);
+}
+
+template <int vertical_taps>
+void Filter2DVertical(const int16_t* LIBGAV1_RESTRICT const intermediate_result,
+ const int width, const int height, const int16x8_t taps,
+ void* LIBGAV1_RESTRICT const prediction,
+ const ptrdiff_t pred_stride) {
+ auto* const dest = static_cast<uint16_t*>(prediction);
+ if (width >= 8) {
+ Filter2DVerticalWidth8AndUp<vertical_taps>(
+ intermediate_result, dest, pred_stride, width, height, taps);
+ } else if (width == 4) {
+ Filter2DVerticalWidth4<vertical_taps>(intermediate_result, dest,
+ pred_stride, height, taps);
+ } else {
+ assert(width == 2);
+ Filter2DVerticalWidth2<vertical_taps>(intermediate_result, dest,
+ pred_stride, height, taps);
+ }
+}
+
+void Convolve2D_NEON(const void* LIBGAV1_RESTRICT const reference,
+ const ptrdiff_t reference_stride,
+ const int horizontal_filter_index,
+ const int vertical_filter_index,
+ const int horizontal_filter_id,
+ const int vertical_filter_id, const int width,
+ const int height, void* LIBGAV1_RESTRICT const prediction,
+ const ptrdiff_t pred_stride) {
+ const int horiz_filter_index = GetFilterIndex(horizontal_filter_index, width);
+ const int vert_filter_index = GetFilterIndex(vertical_filter_index, height);
+ const int vertical_taps = GetNumTapsInFilter(vert_filter_index);
+ // The output of the horizontal filter is guaranteed to fit in 16 bits.
+ int16_t intermediate_result[kMaxSuperBlockSizeInPixels *
+ (kMaxSuperBlockSizeInPixels + kSubPixelTaps - 1)];
+#if LIBGAV1_MSAN
+ // Quiet msan warnings. Set with random non-zero value to aid in debugging.
+ memset(intermediate_result, 0x43, sizeof(intermediate_result));
+#endif
+ const int intermediate_height = height + vertical_taps - 1;
+ const ptrdiff_t src_stride = reference_stride >> 1;
+ const auto* const src = static_cast<const uint16_t*>(reference) -
+ (vertical_taps / 2 - 1) * src_stride -
+ kHorizontalOffset;
+ const ptrdiff_t dest_stride = pred_stride >> 1;
+
+ DoHorizontalPass</*is_compound=*/false, /*is_2d=*/true>(
+ src, src_stride, intermediate_result, width, width, intermediate_height,
+ horizontal_filter_id, horiz_filter_index);
+
+ assert(vertical_filter_id != 0);
+ const int16x8_t taps = vmovl_s8(
+ vld1_s8(kHalfSubPixelFilters[vert_filter_index][vertical_filter_id]));
+ if (vertical_taps == 8) {
+ Filter2DVertical<8>(intermediate_result, width, height, taps, prediction,
+ dest_stride);
+ } else if (vertical_taps == 6) {
+ Filter2DVertical<6>(intermediate_result, width, height, taps, prediction,
+ dest_stride);
+ } else if (vertical_taps == 4) {
+ Filter2DVertical<4>(intermediate_result, width, height, taps, prediction,
+ dest_stride);
+ } else { // |vertical_taps| == 2
+ Filter2DVertical<2>(intermediate_result, width, height, taps, prediction,
+ dest_stride);
+ }
+}
+
+template <int vertical_taps>
+void Compound2DVertical(
+ const int16_t* LIBGAV1_RESTRICT const intermediate_result, const int width,
+ const int height, const int16x8_t taps,
+ void* LIBGAV1_RESTRICT const prediction) {
+ auto* const dest = static_cast<uint16_t*>(prediction);
+ if (width == 4) {
+ Filter2DVerticalWidth4<vertical_taps, /*is_compound=*/true>(
+ intermediate_result, dest, width, height, taps);
+ } else {
+ Filter2DVerticalWidth8AndUp<vertical_taps, /*is_compound=*/true>(
+ intermediate_result, dest, width, width, height, taps);
+ }
+}
+
+void ConvolveCompound2D_NEON(
+ const void* LIBGAV1_RESTRICT const reference,
+ const ptrdiff_t reference_stride, const int horizontal_filter_index,
+ const int vertical_filter_index, const int horizontal_filter_id,
+ const int vertical_filter_id, const int width, const int height,
+ void* LIBGAV1_RESTRICT const prediction, const ptrdiff_t /*pred_stride*/) {
+ // The output of the horizontal filter, i.e. the intermediate_result, is
+ // guaranteed to fit in int16_t.
+ int16_t
+ intermediate_result[(kMaxSuperBlockSizeInPixels *
+ (kMaxSuperBlockSizeInPixels + kSubPixelTaps - 1))];
+
+ // Horizontal filter.
+ // Filter types used for width <= 4 are different from those for width > 4.
+ // When width > 4, the valid filter index range is always [0, 3].
+ // When width <= 4, the valid filter index range is always [4, 5].
+ // Similarly for height.
+ const int horiz_filter_index = GetFilterIndex(horizontal_filter_index, width);
+ const int vert_filter_index = GetFilterIndex(vertical_filter_index, height);
+ const int vertical_taps = GetNumTapsInFilter(vert_filter_index);
+ const int intermediate_height = height + vertical_taps - 1;
+ const ptrdiff_t src_stride = reference_stride >> 1;
+ const auto* const src = static_cast<const uint16_t*>(reference) -
+ (vertical_taps / 2 - 1) * src_stride -
+ kHorizontalOffset;
+
+ DoHorizontalPass</*is_2d=*/true, /*is_compound=*/true>(
+ src, src_stride, intermediate_result, width, width, intermediate_height,
+ horizontal_filter_id, horiz_filter_index);
+
+ // Vertical filter.
+ assert(vertical_filter_id != 0);
+ const int16x8_t taps = vmovl_s8(
+ vld1_s8(kHalfSubPixelFilters[vert_filter_index][vertical_filter_id]));
+ if (vertical_taps == 8) {
+ Compound2DVertical<8>(intermediate_result, width, height, taps, prediction);
+ } else if (vertical_taps == 6) {
+ Compound2DVertical<6>(intermediate_result, width, height, taps, prediction);
+ } else if (vertical_taps == 4) {
+ Compound2DVertical<4>(intermediate_result, width, height, taps, prediction);
+ } else { // |vertical_taps| == 2
+ Compound2DVertical<2>(intermediate_result, width, height, taps, prediction);
+ }
+}
+
+void ConvolveVertical_NEON(
+ const void* LIBGAV1_RESTRICT const reference,
+ const ptrdiff_t reference_stride, const int /*horizontal_filter_index*/,
+ const int vertical_filter_index, const int /*horizontal_filter_id*/,
+ const int vertical_filter_id, const int width, const int height,
+ void* LIBGAV1_RESTRICT const prediction, const ptrdiff_t pred_stride) {
+ const int filter_index = GetFilterIndex(vertical_filter_index, height);
+ const int vertical_taps = GetNumTapsInFilter(filter_index);
+ const ptrdiff_t src_stride = reference_stride >> 1;
+ const auto* src = static_cast<const uint16_t*>(reference) -
+ (vertical_taps / 2 - 1) * src_stride;
+ auto* const dest = static_cast<uint16_t*>(prediction);
+ const ptrdiff_t dest_stride = pred_stride >> 1;
+ assert(vertical_filter_id != 0);
+
+ int16x4_t taps[8];
+ for (int k = 0; k < kSubPixelTaps; ++k) {
+ taps[k] =
+ vdup_n_s16(kHalfSubPixelFilters[filter_index][vertical_filter_id][k]);
+ }
+
+ if (filter_index == 0) { // 6 tap.
+ if (width == 2) {
+ FilterVertical2xH<0>(src, src_stride, dest, dest_stride, height,
+ taps + 1);
+ } else if (width == 4) {
+ FilterVertical4xH<0>(src, src_stride, dest, dest_stride, height,
+ taps + 1);
+ } else {
+ FilterVertical<0>(src, src_stride, dest, dest_stride, width, height,
+ taps + 1);
+ }
+ } else if ((static_cast<int>(filter_index == 1) &
+ (static_cast<int>(vertical_filter_id == 1) |
+ static_cast<int>(vertical_filter_id == 7) |
+ static_cast<int>(vertical_filter_id == 8) |
+ static_cast<int>(vertical_filter_id == 9) |
+ static_cast<int>(vertical_filter_id == 15))) != 0) { // 6 tap.
+ if (width == 2) {
+ FilterVertical2xH<1>(src, src_stride, dest, dest_stride, height,
+ taps + 1);
+ } else if (width == 4) {
+ FilterVertical4xH<1>(src, src_stride, dest, dest_stride, height,
+ taps + 1);
+ } else {
+ FilterVertical<1>(src, src_stride, dest, dest_stride, width, height,
+ taps + 1);
+ }
+ } else if (filter_index == 2) { // 8 tap.
+ if (width == 2) {
+ FilterVertical2xH<2>(src, src_stride, dest, dest_stride, height, taps);
+ } else if (width == 4) {
+ FilterVertical4xH<2>(src, src_stride, dest, dest_stride, height, taps);
+ } else {
+ FilterVertical<2>(src, src_stride, dest, dest_stride, width, height,
+ taps);
+ }
+ } else if (filter_index == 3) { // 2 tap.
+ if (width == 2) {
+ FilterVertical2xH<3>(src, src_stride, dest, dest_stride, height,
+ taps + 3);
+ } else if (width == 4) {
+ FilterVertical4xH<3>(src, src_stride, dest, dest_stride, height,
+ taps + 3);
+ } else {
+ FilterVertical<3>(src, src_stride, dest, dest_stride, width, height,
+ taps + 3);
+ }
+ } else {
+ // 4 tap. When |filter_index| == 1 the |vertical_filter_id| values listed
+ // below map to 4 tap filters.
+ assert(filter_index == 5 || filter_index == 4 ||
+ (filter_index == 1 &&
+ (vertical_filter_id == 0 || vertical_filter_id == 2 ||
+ vertical_filter_id == 3 || vertical_filter_id == 4 ||
+ vertical_filter_id == 5 || vertical_filter_id == 6 ||
+ vertical_filter_id == 10 || vertical_filter_id == 11 ||
+ vertical_filter_id == 12 || vertical_filter_id == 13 ||
+ vertical_filter_id == 14)));
+ // According to GetNumTapsInFilter() this has 6 taps but here we are
+ // treating it as though it has 4.
+ if (filter_index == 1) src += src_stride;
+ if (width == 2) {
+ FilterVertical2xH<5>(src, src_stride, dest, dest_stride, height,
+ taps + 2);
+ } else if (width == 4) {
+ FilterVertical4xH<5>(src, src_stride, dest, dest_stride, height,
+ taps + 2);
+ } else {
+ FilterVertical<5>(src, src_stride, dest, dest_stride, width, height,
+ taps + 2);
+ }
+ }
+}
+
+void ConvolveCompoundVertical_NEON(
+ const void* LIBGAV1_RESTRICT const reference,
+ const ptrdiff_t reference_stride, const int /*horizontal_filter_index*/,
+ const int vertical_filter_index, const int /*horizontal_filter_id*/,
+ const int vertical_filter_id, const int width, const int height,
+ void* LIBGAV1_RESTRICT const prediction, const ptrdiff_t /*pred_stride*/) {
+ const int filter_index = GetFilterIndex(vertical_filter_index, height);
+ const int vertical_taps = GetNumTapsInFilter(filter_index);
+ const ptrdiff_t src_stride = reference_stride >> 1;
+ const auto* src = static_cast<const uint16_t*>(reference) -
+ (vertical_taps / 2 - 1) * src_stride;
+ auto* const dest = static_cast<uint16_t*>(prediction);
+ assert(vertical_filter_id != 0);
+
+ int16x4_t taps[8];
+ for (int k = 0; k < kSubPixelTaps; ++k) {
+ taps[k] =
+ vdup_n_s16(kHalfSubPixelFilters[filter_index][vertical_filter_id][k]);
+ }
+
+ if (filter_index == 0) { // 6 tap.
+ if (width == 4) {
+ FilterVertical4xH<0, /*is_compound=*/true>(src, src_stride, dest, 4,
+ height, taps + 1);
+ } else {
+ FilterVertical<0, /*is_compound=*/true>(src, src_stride, dest, width,
+ width, height, taps + 1);
+ }
+ } else if ((static_cast<int>(filter_index == 1) &
+ (static_cast<int>(vertical_filter_id == 1) |
+ static_cast<int>(vertical_filter_id == 7) |
+ static_cast<int>(vertical_filter_id == 8) |
+ static_cast<int>(vertical_filter_id == 9) |
+ static_cast<int>(vertical_filter_id == 15))) != 0) { // 6 tap.
+ if (width == 4) {
+ FilterVertical4xH<1, /*is_compound=*/true>(src, src_stride, dest, 4,
+ height, taps + 1);
+ } else {
+ FilterVertical<1, /*is_compound=*/true>(src, src_stride, dest, width,
+ width, height, taps + 1);
+ }
+ } else if (filter_index == 2) { // 8 tap.
+ if (width == 4) {
+ FilterVertical4xH<2, /*is_compound=*/true>(src, src_stride, dest, 4,
+ height, taps);
+ } else {
+ FilterVertical<2, /*is_compound=*/true>(src, src_stride, dest, width,
+ width, height, taps);
+ }
+ } else if (filter_index == 3) { // 2 tap.
+ if (width == 4) {
+ FilterVertical4xH<3, /*is_compound=*/true>(src, src_stride, dest, 4,
+ height, taps + 3);
+ } else {
+ FilterVertical<3, /*is_compound=*/true>(src, src_stride, dest, width,
+ width, height, taps + 3);
+ }
+ } else {
+ // 4 tap. When |filter_index| == 1 the |filter_id| values listed below map
+ // to 4 tap filters.
+ assert(filter_index == 5 || filter_index == 4 ||
+ (filter_index == 1 &&
+ (vertical_filter_id == 2 || vertical_filter_id == 3 ||
+ vertical_filter_id == 4 || vertical_filter_id == 5 ||
+ vertical_filter_id == 6 || vertical_filter_id == 10 ||
+ vertical_filter_id == 11 || vertical_filter_id == 12 ||
+ vertical_filter_id == 13 || vertical_filter_id == 14)));
+ // According to GetNumTapsInFilter() this has 6 taps but here we are
+ // treating it as though it has 4.
+ if (filter_index == 1) src += src_stride;
+ if (width == 4) {
+ FilterVertical4xH<5, /*is_compound=*/true>(src, src_stride, dest, 4,
+ height, taps + 2);
+ } else {
+ FilterVertical<5, /*is_compound=*/true>(src, src_stride, dest, width,
+ width, height, taps + 2);
+ }
+ }
+}
+
+void ConvolveCompoundCopy_NEON(
+ const void* const reference, const ptrdiff_t reference_stride,
+ const int /*horizontal_filter_index*/, const int /*vertical_filter_index*/,
+ const int /*horizontal_filter_id*/, const int /*vertical_filter_id*/,
+ const int width, const int height, void* const prediction,
+ const ptrdiff_t /*pred_stride*/) {
+ const auto* src = static_cast<const uint16_t*>(reference);
+ const ptrdiff_t src_stride = reference_stride >> 1;
+ auto* dest = static_cast<uint16_t*>(prediction);
+ constexpr int final_shift =
+ kInterRoundBitsVertical - kInterRoundBitsCompoundVertical;
+ const uint16x8_t offset =
+ vdupq_n_u16((1 << kBitdepth10) + (1 << (kBitdepth10 - 1)));
+
+ if (width >= 16) {
+ int y = height;
+ do {
+ int x = 0;
+ int w = width;
+ do {
+ const uint16x8_t v_src_lo = vld1q_u16(&src[x]);
+ const uint16x8_t v_src_hi = vld1q_u16(&src[x + 8]);
+ const uint16x8_t v_sum_lo = vaddq_u16(v_src_lo, offset);
+ const uint16x8_t v_sum_hi = vaddq_u16(v_src_hi, offset);
+ const uint16x8_t v_dest_lo = vshlq_n_u16(v_sum_lo, final_shift);
+ const uint16x8_t v_dest_hi = vshlq_n_u16(v_sum_hi, final_shift);
+ vst1q_u16(&dest[x], v_dest_lo);
+ vst1q_u16(&dest[x + 8], v_dest_hi);
+ x += 16;
+ w -= 16;
+ } while (w != 0);
+ src += src_stride;
+ dest += width;
+ } while (--y != 0);
+ } else if (width == 8) {
+ int y = height;
+ do {
+ const uint16x8_t v_src_lo = vld1q_u16(&src[0]);
+ const uint16x8_t v_src_hi = vld1q_u16(&src[src_stride]);
+ const uint16x8_t v_sum_lo = vaddq_u16(v_src_lo, offset);
+ const uint16x8_t v_sum_hi = vaddq_u16(v_src_hi, offset);
+ const uint16x8_t v_dest_lo = vshlq_n_u16(v_sum_lo, final_shift);
+ const uint16x8_t v_dest_hi = vshlq_n_u16(v_sum_hi, final_shift);
+ vst1q_u16(&dest[0], v_dest_lo);
+ vst1q_u16(&dest[8], v_dest_hi);
+ src += src_stride << 1;
+ dest += 16;
+ y -= 2;
+ } while (y != 0);
+ } else { // width == 4
+ int y = height;
+ do {
+ const uint16x4_t v_src_lo = vld1_u16(&src[0]);
+ const uint16x4_t v_src_hi = vld1_u16(&src[src_stride]);
+ const uint16x4_t v_sum_lo = vadd_u16(v_src_lo, vget_low_u16(offset));
+ const uint16x4_t v_sum_hi = vadd_u16(v_src_hi, vget_low_u16(offset));
+ const uint16x4_t v_dest_lo = vshl_n_u16(v_sum_lo, final_shift);
+ const uint16x4_t v_dest_hi = vshl_n_u16(v_sum_hi, final_shift);
+ vst1_u16(&dest[0], v_dest_lo);
+ vst1_u16(&dest[4], v_dest_hi);
+ src += src_stride << 1;
+ dest += 8;
+ y -= 2;
+ } while (y != 0);
+ }
+}
+
+inline void HalfAddHorizontal(const uint16_t* LIBGAV1_RESTRICT const src,
+ uint16_t* LIBGAV1_RESTRICT const dst) {
+ const uint16x8_t left = vld1q_u16(src);
+ const uint16x8_t right = vld1q_u16(src + 1);
+ vst1q_u16(dst, vrhaddq_u16(left, right));
+}
+
+inline void HalfAddHorizontal16(const uint16_t* LIBGAV1_RESTRICT const src,
+ uint16_t* LIBGAV1_RESTRICT const dst) {
+ HalfAddHorizontal(src, dst);
+ HalfAddHorizontal(src + 8, dst + 8);
+}
+
+template <int width>
+inline void IntraBlockCopyHorizontal(const uint16_t* LIBGAV1_RESTRICT src,
+ const ptrdiff_t src_stride,
+ const int height,
+ uint16_t* LIBGAV1_RESTRICT dst,
+ const ptrdiff_t dst_stride) {
+ const ptrdiff_t src_remainder_stride = src_stride - (width - 16);
+ const ptrdiff_t dst_remainder_stride = dst_stride - (width - 16);
+
+ int y = height;
+ do {
+ HalfAddHorizontal16(src, dst);
+ if (width >= 32) {
+ src += 16;
+ dst += 16;
+ HalfAddHorizontal16(src, dst);
+ if (width >= 64) {
+ src += 16;
+ dst += 16;
+ HalfAddHorizontal16(src, dst);
+ src += 16;
+ dst += 16;
+ HalfAddHorizontal16(src, dst);
+ if (width == 128) {
+ src += 16;
+ dst += 16;
+ HalfAddHorizontal16(src, dst);
+ src += 16;
+ dst += 16;
+ HalfAddHorizontal16(src, dst);
+ src += 16;
+ dst += 16;
+ HalfAddHorizontal16(src, dst);
+ src += 16;
+ dst += 16;
+ HalfAddHorizontal16(src, dst);
+ }
+ }
+ }
+ src += src_remainder_stride;
+ dst += dst_remainder_stride;
+ } while (--y != 0);
+}
+
+void ConvolveIntraBlockCopyHorizontal_NEON(
+ const void* LIBGAV1_RESTRICT const reference,
+ const ptrdiff_t reference_stride, const int /*horizontal_filter_index*/,
+ const int /*vertical_filter_index*/, const int /*subpixel_x*/,
+ const int /*subpixel_y*/, const int width, const int height,
+ void* LIBGAV1_RESTRICT const prediction, const ptrdiff_t pred_stride) {
+ assert(width >= 4 && width <= kMaxSuperBlockSizeInPixels);
+ assert(height >= 4 && height <= kMaxSuperBlockSizeInPixels);
+ const auto* src = static_cast<const uint16_t*>(reference);
+ auto* dest = static_cast<uint16_t*>(prediction);
+ const ptrdiff_t src_stride = reference_stride >> 1;
+ const ptrdiff_t dst_stride = pred_stride >> 1;
+
+ if (width == 128) {
+ IntraBlockCopyHorizontal<128>(src, src_stride, height, dest, dst_stride);
+ } else if (width == 64) {
+ IntraBlockCopyHorizontal<64>(src, src_stride, height, dest, dst_stride);
+ } else if (width == 32) {
+ IntraBlockCopyHorizontal<32>(src, src_stride, height, dest, dst_stride);
+ } else if (width == 16) {
+ IntraBlockCopyHorizontal<16>(src, src_stride, height, dest, dst_stride);
+ } else if (width == 8) {
+ int y = height;
+ do {
+ HalfAddHorizontal(src, dest);
+ src += src_stride;
+ dest += dst_stride;
+ } while (--y != 0);
+ } else { // width == 4
+ int y = height;
+ do {
+ uint16x4x2_t left;
+ uint16x4x2_t right;
+ left.val[0] = vld1_u16(src);
+ right.val[0] = vld1_u16(src + 1);
+ src += src_stride;
+ left.val[1] = vld1_u16(src);
+ right.val[1] = vld1_u16(src + 1);
+ src += src_stride;
+
+ vst1_u16(dest, vrhadd_u16(left.val[0], right.val[0]));
+ dest += dst_stride;
+ vst1_u16(dest, vrhadd_u16(left.val[1], right.val[1]));
+ dest += dst_stride;
+ y -= 2;
+ } while (y != 0);
+ }
+}
+
+template <int width>
+inline void IntraBlockCopyVertical(const uint16_t* LIBGAV1_RESTRICT src,
+ const ptrdiff_t src_stride, const int height,
+ uint16_t* LIBGAV1_RESTRICT dst,
+ const ptrdiff_t dst_stride) {
+ const ptrdiff_t src_remainder_stride = src_stride - (width - 8);
+ const ptrdiff_t dst_remainder_stride = dst_stride - (width - 8);
+ uint16x8_t row[8], below[8];
+
+ row[0] = vld1q_u16(src);
+ if (width >= 16) {
+ src += 8;
+ row[1] = vld1q_u16(src);
+ if (width >= 32) {
+ src += 8;
+ row[2] = vld1q_u16(src);
+ src += 8;
+ row[3] = vld1q_u16(src);
+ if (width == 64) {
+ src += 8;
+ row[4] = vld1q_u16(src);
+ src += 8;
+ row[5] = vld1q_u16(src);
+ src += 8;
+ row[6] = vld1q_u16(src);
+ src += 8;
+ row[7] = vld1q_u16(src);
+ }
+ }
+ }
+ src += src_remainder_stride;
+
+ int y = height;
+ do {
+ below[0] = vld1q_u16(src);
+ if (width >= 16) {
+ src += 8;
+ below[1] = vld1q_u16(src);
+ if (width >= 32) {
+ src += 8;
+ below[2] = vld1q_u16(src);
+ src += 8;
+ below[3] = vld1q_u16(src);
+ if (width == 64) {
+ src += 8;
+ below[4] = vld1q_u16(src);
+ src += 8;
+ below[5] = vld1q_u16(src);
+ src += 8;
+ below[6] = vld1q_u16(src);
+ src += 8;
+ below[7] = vld1q_u16(src);
+ }
+ }
+ }
+ src += src_remainder_stride;
+
+ vst1q_u16(dst, vrhaddq_u16(row[0], below[0]));
+ row[0] = below[0];
+ if (width >= 16) {
+ dst += 8;
+ vst1q_u16(dst, vrhaddq_u16(row[1], below[1]));
+ row[1] = below[1];
+ if (width >= 32) {
+ dst += 8;
+ vst1q_u16(dst, vrhaddq_u16(row[2], below[2]));
+ row[2] = below[2];
+ dst += 8;
+ vst1q_u16(dst, vrhaddq_u16(row[3], below[3]));
+ row[3] = below[3];
+ if (width >= 64) {
+ dst += 8;
+ vst1q_u16(dst, vrhaddq_u16(row[4], below[4]));
+ row[4] = below[4];
+ dst += 8;
+ vst1q_u16(dst, vrhaddq_u16(row[5], below[5]));
+ row[5] = below[5];
+ dst += 8;
+ vst1q_u16(dst, vrhaddq_u16(row[6], below[6]));
+ row[6] = below[6];
+ dst += 8;
+ vst1q_u16(dst, vrhaddq_u16(row[7], below[7]));
+ row[7] = below[7];
+ }
+ }
+ }
+ dst += dst_remainder_stride;
+ } while (--y != 0);
+}
+
+void ConvolveIntraBlockCopyVertical_NEON(
+ const void* LIBGAV1_RESTRICT const reference,
+ const ptrdiff_t reference_stride, const int /*horizontal_filter_index*/,
+ const int /*vertical_filter_index*/, const int /*horizontal_filter_id*/,
+ const int /*vertical_filter_id*/, const int width, const int height,
+ void* LIBGAV1_RESTRICT const prediction, const ptrdiff_t pred_stride) {
+ assert(width >= 4 && width <= kMaxSuperBlockSizeInPixels);
+ assert(height >= 4 && height <= kMaxSuperBlockSizeInPixels);
+ const auto* src = static_cast<const uint16_t*>(reference);
+ auto* dest = static_cast<uint16_t*>(prediction);
+ const ptrdiff_t src_stride = reference_stride >> 1;
+ const ptrdiff_t dst_stride = pred_stride >> 1;
+
+ if (width == 128) {
+ // Due to register pressure, process two 64xH.
+ for (int i = 0; i < 2; ++i) {
+ IntraBlockCopyVertical<64>(src, src_stride, height, dest, dst_stride);
+ src += 64;
+ dest += 64;
+ }
+ } else if (width == 64) {
+ IntraBlockCopyVertical<64>(src, src_stride, height, dest, dst_stride);
+ } else if (width == 32) {
+ IntraBlockCopyVertical<32>(src, src_stride, height, dest, dst_stride);
+ } else if (width == 16) {
+ IntraBlockCopyVertical<16>(src, src_stride, height, dest, dst_stride);
+ } else if (width == 8) {
+ IntraBlockCopyVertical<8>(src, src_stride, height, dest, dst_stride);
+ } else { // width == 4
+ uint16x4_t row = vld1_u16(src);
+ src += src_stride;
+ int y = height;
+ do {
+ const uint16x4_t below = vld1_u16(src);
+ src += src_stride;
+ vst1_u16(dest, vrhadd_u16(row, below));
+ dest += dst_stride;
+ row = below;
+ } while (--y != 0);
+ }
+}
+
+template <int width>
+inline void IntraBlockCopy2D(const uint16_t* LIBGAV1_RESTRICT src,
+ const ptrdiff_t src_stride, const int height,
+ uint16_t* LIBGAV1_RESTRICT dst,
+ const ptrdiff_t dst_stride) {
+ const ptrdiff_t src_remainder_stride = src_stride - (width - 8);
+ const ptrdiff_t dst_remainder_stride = dst_stride - (width - 8);
+ uint16x8_t row[16];
+ row[0] = vaddq_u16(vld1q_u16(src), vld1q_u16(src + 1));
+ if (width >= 16) {
+ src += 8;
+ row[1] = vaddq_u16(vld1q_u16(src), vld1q_u16(src + 1));
+ if (width >= 32) {
+ src += 8;
+ row[2] = vaddq_u16(vld1q_u16(src), vld1q_u16(src + 1));
+ src += 8;
+ row[3] = vaddq_u16(vld1q_u16(src), vld1q_u16(src + 1));
+ if (width >= 64) {
+ src += 8;
+ row[4] = vaddq_u16(vld1q_u16(src), vld1q_u16(src + 1));
+ src += 8;
+ row[5] = vaddq_u16(vld1q_u16(src), vld1q_u16(src + 1));
+ src += 8;
+ row[6] = vaddq_u16(vld1q_u16(src), vld1q_u16(src + 1));
+ src += 8;
+ row[7] = vaddq_u16(vld1q_u16(src), vld1q_u16(src + 1));
+ if (width == 128) {
+ src += 8;
+ row[8] = vaddq_u16(vld1q_u16(src), vld1q_u16(src + 1));
+ src += 8;
+ row[9] = vaddq_u16(vld1q_u16(src), vld1q_u16(src + 1));
+ src += 8;
+ row[10] = vaddq_u16(vld1q_u16(src), vld1q_u16(src + 1));
+ src += 8;
+ row[11] = vaddq_u16(vld1q_u16(src), vld1q_u16(src + 1));
+ src += 8;
+ row[12] = vaddq_u16(vld1q_u16(src), vld1q_u16(src + 1));
+ src += 8;
+ row[13] = vaddq_u16(vld1q_u16(src), vld1q_u16(src + 1));
+ src += 8;
+ row[14] = vaddq_u16(vld1q_u16(src), vld1q_u16(src + 1));
+ src += 8;
+ row[15] = vaddq_u16(vld1q_u16(src), vld1q_u16(src + 1));
+ }
+ }
+ }
+ }
+ src += src_remainder_stride;
+
+ int y = height;
+ do {
+ const uint16x8_t below_0 = vaddq_u16(vld1q_u16(src), vld1q_u16(src + 1));
+ vst1q_u16(dst, vrshrq_n_u16(vaddq_u16(row[0], below_0), 2));
+ row[0] = below_0;
+ if (width >= 16) {
+ src += 8;
+ dst += 8;
+
+ const uint16x8_t below_1 = vaddq_u16(vld1q_u16(src), vld1q_u16(src + 1));
+ vst1q_u16(dst, vrshrq_n_u16(vaddq_u16(row[1], below_1), 2));
+ row[1] = below_1;
+ if (width >= 32) {
+ src += 8;
+ dst += 8;
+
+ const uint16x8_t below_2 =
+ vaddq_u16(vld1q_u16(src), vld1q_u16(src + 1));
+ vst1q_u16(dst, vrshrq_n_u16(vaddq_u16(row[2], below_2), 2));
+ row[2] = below_2;
+ src += 8;
+ dst += 8;
+
+ const uint16x8_t below_3 =
+ vaddq_u16(vld1q_u16(src), vld1q_u16(src + 1));
+ vst1q_u16(dst, vrshrq_n_u16(vaddq_u16(row[3], below_3), 2));
+ row[3] = below_3;
+ if (width >= 64) {
+ src += 8;
+ dst += 8;
+
+ const uint16x8_t below_4 =
+ vaddq_u16(vld1q_u16(src), vld1q_u16(src + 1));
+ vst1q_u16(dst, vrshrq_n_u16(vaddq_u16(row[4], below_4), 2));
+ row[4] = below_4;
+ src += 8;
+ dst += 8;
+
+ const uint16x8_t below_5 =
+ vaddq_u16(vld1q_u16(src), vld1q_u16(src + 1));
+ vst1q_u16(dst, vrshrq_n_u16(vaddq_u16(row[5], below_5), 2));
+ row[5] = below_5;
+ src += 8;
+ dst += 8;
+
+ const uint16x8_t below_6 =
+ vaddq_u16(vld1q_u16(src), vld1q_u16(src + 1));
+ vst1q_u16(dst, vrshrq_n_u16(vaddq_u16(row[6], below_6), 2));
+ row[6] = below_6;
+ src += 8;
+ dst += 8;
+
+ const uint16x8_t below_7 =
+ vaddq_u16(vld1q_u16(src), vld1q_u16(src + 1));
+ vst1q_u16(dst, vrshrq_n_u16(vaddq_u16(row[7], below_7), 2));
+ row[7] = below_7;
+ if (width == 128) {
+ src += 8;
+ dst += 8;
+
+ const uint16x8_t below_8 =
+ vaddq_u16(vld1q_u16(src), vld1q_u16(src + 1));
+ vst1q_u16(dst, vrshrq_n_u16(vaddq_u16(row[8], below_8), 2));
+ row[8] = below_8;
+ src += 8;
+ dst += 8;
+
+ const uint16x8_t below_9 =
+ vaddq_u16(vld1q_u16(src), vld1q_u16(src + 1));
+ vst1q_u16(dst, vrshrq_n_u16(vaddq_u16(row[9], below_9), 2));
+ row[9] = below_9;
+ src += 8;
+ dst += 8;
+
+ const uint16x8_t below_10 =
+ vaddq_u16(vld1q_u16(src), vld1q_u16(src + 1));
+ vst1q_u16(dst, vrshrq_n_u16(vaddq_u16(row[10], below_10), 2));
+ row[10] = below_10;
+ src += 8;
+ dst += 8;
+
+ const uint16x8_t below_11 =
+ vaddq_u16(vld1q_u16(src), vld1q_u16(src + 1));
+ vst1q_u16(dst, vrshrq_n_u16(vaddq_u16(row[11], below_11), 2));
+ row[11] = below_11;
+ src += 8;
+ dst += 8;
+
+ const uint16x8_t below_12 =
+ vaddq_u16(vld1q_u16(src), vld1q_u16(src + 1));
+ vst1q_u16(dst, vrshrq_n_u16(vaddq_u16(row[12], below_12), 2));
+ row[12] = below_12;
+ src += 8;
+ dst += 8;
+
+ const uint16x8_t below_13 =
+ vaddq_u16(vld1q_u16(src), vld1q_u16(src + 1));
+ vst1q_u16(dst, vrshrq_n_u16(vaddq_u16(row[13], below_13), 2));
+ row[13] = below_13;
+ src += 8;
+ dst += 8;
+
+ const uint16x8_t below_14 =
+ vaddq_u16(vld1q_u16(src), vld1q_u16(src + 1));
+ vst1q_u16(dst, vrshrq_n_u16(vaddq_u16(row[14], below_14), 2));
+ row[14] = below_14;
+ src += 8;
+ dst += 8;
+
+ const uint16x8_t below_15 =
+ vaddq_u16(vld1q_u16(src), vld1q_u16(src + 1));
+ vst1q_u16(dst, vrshrq_n_u16(vaddq_u16(row[15], below_15), 2));
+ row[15] = below_15;
+ }
+ }
+ }
+ }
+ src += src_remainder_stride;
+ dst += dst_remainder_stride;
+ } while (--y != 0);
+}
+
+void ConvolveIntraBlockCopy2D_NEON(
+ const void* LIBGAV1_RESTRICT const reference,
+ const ptrdiff_t reference_stride, const int /*horizontal_filter_index*/,
+ const int /*vertical_filter_index*/, const int /*horizontal_filter_id*/,
+ const int /*vertical_filter_id*/, const int width, const int height,
+ void* LIBGAV1_RESTRICT const prediction, const ptrdiff_t pred_stride) {
+ assert(width >= 4 && width <= kMaxSuperBlockSizeInPixels);
+ assert(height >= 4 && height <= kMaxSuperBlockSizeInPixels);
+ const auto* src = static_cast<const uint16_t*>(reference);
+ auto* dest = static_cast<uint16_t*>(prediction);
+ const ptrdiff_t src_stride = reference_stride >> 1;
+ const ptrdiff_t dst_stride = pred_stride >> 1;
+
+ // Note: allow vertical access to height + 1. Because this function is only
+ // for u/v plane of intra block copy, such access is guaranteed to be within
+ // the prediction block.
+
+ if (width == 128) {
+ IntraBlockCopy2D<128>(src, src_stride, height, dest, dst_stride);
+ } else if (width == 64) {
+ IntraBlockCopy2D<64>(src, src_stride, height, dest, dst_stride);
+ } else if (width == 32) {
+ IntraBlockCopy2D<32>(src, src_stride, height, dest, dst_stride);
+ } else if (width == 16) {
+ IntraBlockCopy2D<16>(src, src_stride, height, dest, dst_stride);
+ } else if (width == 8) {
+ IntraBlockCopy2D<8>(src, src_stride, height, dest, dst_stride);
+ } else { // width == 4
+ uint16x4_t row0 = vadd_u16(vld1_u16(src), vld1_u16(src + 1));
+ src += src_stride;
+
+ int y = height;
+ do {
+ const uint16x4_t row1 = vadd_u16(vld1_u16(src), vld1_u16(src + 1));
+ src += src_stride;
+ const uint16x4_t row2 = vadd_u16(vld1_u16(src), vld1_u16(src + 1));
+ src += src_stride;
+ const uint16x4_t result_01 = vrshr_n_u16(vadd_u16(row0, row1), 2);
+ const uint16x4_t result_12 = vrshr_n_u16(vadd_u16(row1, row2), 2);
+ vst1_u16(dest, result_01);
+ dest += dst_stride;
+ vst1_u16(dest, result_12);
+ dest += dst_stride;
+ row0 = row2;
+ y -= 2;
+ } while (y != 0);
+ }
+}
+
+// -----------------------------------------------------------------------------
+// Scaled Convolve
+
+// There are many opportunities for overreading in scaled convolve, because the
+// range of starting points for filter windows is anywhere from 0 to 16 for 8
+// destination pixels, and the window sizes range from 2 to 8. To accommodate
+// this range concisely, we use |grade_x| to mean the most steps in src that can
+// be traversed in a single |step_x| increment, i.e. 1 or 2. When grade_x is 2,
+// we are guaranteed to exceed 8 whole steps in src for every 8 |step_x|
+// increments. The first load covers the initial elements of src_x, while the
+// final load covers the taps.
+template <int grade_x>
+inline uint8x16x3_t LoadSrcVals(const uint16_t* const src_x) {
+ uint8x16x3_t ret;
+ // When fractional step size is less than or equal to 1, the rightmost
+ // starting value for a filter may be at position 7. For an 8-tap filter, the
+ // rightmost value for the final tap may be at position 14. Therefore we load
+ // 2 vectors of eight 16-bit values.
+ ret.val[0] = vreinterpretq_u8_u16(vld1q_u16(src_x));
+ ret.val[1] = vreinterpretq_u8_u16(vld1q_u16(src_x + 8));
+#if LIBGAV1_MSAN
+ // Initialize to quiet msan warnings when grade_x <= 1.
+ ret.val[2] = vdupq_n_u8(0);
+#endif
+ if (grade_x > 1) {
+ // When fractional step size is greater than 1 (up to 2), the rightmost
+ // starting value for a filter may be at position 15. For an 8-tap filter,
+ // the rightmost value for the final tap may be at position 22. Therefore we
+ // load 3 vectors of eight 16-bit values.
+ ret.val[2] = vreinterpretq_u8_u16(vld1q_u16(src_x + 16));
+ }
+ return ret;
+}
+
+// Assemble 4 values corresponding to one tap position across multiple filters.
+// This is a simple case because maximum offset is 8 and only smaller filters
+// work on 4xH.
+inline uint16x4_t PermuteSrcVals(const uint8x16x3_t src_bytes,
+ const uint8x8_t indices) {
+ const uint8x16x2_t src_bytes2 = {src_bytes.val[0], src_bytes.val[1]};
+ return vreinterpret_u16_u8(VQTbl2U8(src_bytes2, indices));
+}
+
+// Assemble 8 values corresponding to one tap position across multiple filters.
+// This requires a lot of workaround on A32 architectures, so it may be worth
+// using an overall different algorithm for that architecture.
+template <int grade_x>
+inline uint16x8_t PermuteSrcVals(const uint8x16x3_t src_bytes,
+ const uint8x16_t indices) {
+ if (grade_x == 1) {
+ const uint8x16x2_t src_bytes2 = {src_bytes.val[0], src_bytes.val[1]};
+ return vreinterpretq_u16_u8(VQTbl2QU8(src_bytes2, indices));
+ }
+ return vreinterpretq_u16_u8(VQTbl3QU8(src_bytes, indices));
+}
+
+// Pre-transpose the 2 tap filters in |kAbsHalfSubPixelFilters|[3]
+// Although the taps need to be converted to 16-bit values, they must be
+// arranged by table lookup, which is more expensive for larger types than
+// lengthening in-loop. |tap_index| refers to the index within a kernel applied
+// to a single value.
+inline int8x16_t GetPositive2TapFilter(const int tap_index) {
+ assert(tap_index < 2);
+ alignas(
+ 16) static constexpr int8_t kAbsHalfSubPixel2TapFilterColumns[2][16] = {
+ {64, 60, 56, 52, 48, 44, 40, 36, 32, 28, 24, 20, 16, 12, 8, 4},
+ {0, 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60}};
+
+ return vld1q_s8(kAbsHalfSubPixel2TapFilterColumns[tap_index]);
+}
+
+template <int grade_x>
+inline void ConvolveKernelHorizontal2Tap(
+ const uint16_t* LIBGAV1_RESTRICT const src, const ptrdiff_t src_stride,
+ const int width, const int subpixel_x, const int step_x,
+ const int intermediate_height, int16_t* LIBGAV1_RESTRICT intermediate) {
+ // Account for the 0-taps that precede the 2 nonzero taps in the spec.
+ const int kernel_offset = 3;
+ const int ref_x = subpixel_x >> kScaleSubPixelBits;
+ const int step_x8 = step_x << 3;
+ const int8x16_t filter_taps0 = GetPositive2TapFilter(0);
+ const int8x16_t filter_taps1 = GetPositive2TapFilter(1);
+ const uint16x8_t index_steps = vmulq_n_u16(
+ vmovl_u8(vcreate_u8(0x0706050403020100)), static_cast<uint16_t>(step_x));
+ const uint8x8_t filter_index_mask = vdup_n_u8(kSubPixelMask);
+
+ int p = subpixel_x;
+ if (width <= 4) {
+ const uint16_t* src_y = src;
+ // Only add steps to the 10-bit truncated p to avoid overflow.
+ const uint16x8_t p_fraction = vdupq_n_u16(p & 1023);
+ const uint16x8_t subpel_index_offsets = vaddq_u16(index_steps, p_fraction);
+ const uint8x8_t filter_indices =
+ vand_u8(vshrn_n_u16(subpel_index_offsets, 6), filter_index_mask);
+ // Each lane of lane of taps[k] corresponds to one output value along the
+ // row, containing kSubPixelFilters[filter_index][filter_id][k], where
+ // filter_id depends on x.
+ const int16x4_t taps[2] = {
+ vget_low_s16(vmovl_s8(VQTbl1S8(filter_taps0, filter_indices))),
+ vget_low_s16(vmovl_s8(VQTbl1S8(filter_taps1, filter_indices)))};
+ // Lower byte of Nth value is at position 2*N.
+ // Narrowing shift is not available here because the maximum shift
+ // parameter is 8.
+ const uint8x8_t src_indices0 = vshl_n_u8(
+ vmovn_u16(vshrq_n_u16(subpel_index_offsets, kScaleSubPixelBits)), 1);
+ // Upper byte of Nth value is at position 2*N+1.
+ const uint8x8_t src_indices1 = vadd_u8(src_indices0, vdup_n_u8(1));
+ // Only 4 values needed.
+ const uint8x8_t src_indices = InterleaveLow8(src_indices0, src_indices1);
+ const uint8x8_t src_lookup[2] = {src_indices,
+ vadd_u8(src_indices, vdup_n_u8(2))};
+
+ int y = intermediate_height;
+ do {
+ const uint16_t* src_x =
+ src_y + (p >> kScaleSubPixelBits) - ref_x + kernel_offset;
+ // Load a pool of samples to select from using stepped indices.
+ const uint8x16x3_t src_bytes = LoadSrcVals<1>(src_x);
+ // Each lane corresponds to a different filter kernel.
+ const uint16x4_t src[2] = {PermuteSrcVals(src_bytes, src_lookup[0]),
+ PermuteSrcVals(src_bytes, src_lookup[1])};
+
+ vst1_s16(intermediate,
+ vrshrn_n_s32(SumOnePassTaps</*filter_index=*/3>(src, taps),
+ kInterRoundBitsHorizontal - 1));
+ src_y = AddByteStride(src_y, src_stride);
+ intermediate += kIntermediateStride;
+ } while (--y != 0);
+ return;
+ }
+
+ // |width| >= 8
+ int16_t* intermediate_x = intermediate;
+ int x = 0;
+ do {
+ const uint16_t* src_x =
+ src + (p >> kScaleSubPixelBits) - ref_x + kernel_offset;
+ // Only add steps to the 10-bit truncated p to avoid overflow.
+ const uint16x8_t p_fraction = vdupq_n_u16(p & 1023);
+ const uint16x8_t subpel_index_offsets = vaddq_u16(index_steps, p_fraction);
+ const uint8x8_t filter_indices =
+ vand_u8(vshrn_n_u16(subpel_index_offsets, kFilterIndexShift),
+ filter_index_mask);
+ // Each lane of lane of taps[k] corresponds to one output value along the
+ // row, containing kSubPixelFilters[filter_index][filter_id][k], where
+ // filter_id depends on x.
+ const int16x8_t taps[2] = {
+ vmovl_s8(VQTbl1S8(filter_taps0, filter_indices)),
+ vmovl_s8(VQTbl1S8(filter_taps1, filter_indices))};
+ const int16x4_t taps_low[2] = {vget_low_s16(taps[0]),
+ vget_low_s16(taps[1])};
+ const int16x4_t taps_high[2] = {vget_high_s16(taps[0]),
+ vget_high_s16(taps[1])};
+ // Lower byte of Nth value is at position 2*N.
+ const uint8x8_t src_indices0 = vshl_n_u8(
+ vmovn_u16(vshrq_n_u16(subpel_index_offsets, kScaleSubPixelBits)), 1);
+ // Upper byte of Nth value is at position 2*N+1.
+ const uint8x8_t src_indices1 = vadd_u8(src_indices0, vdup_n_u8(1));
+ const uint8x8x2_t src_indices_zip = vzip_u8(src_indices0, src_indices1);
+ const uint8x16_t src_indices =
+ vcombine_u8(src_indices_zip.val[0], src_indices_zip.val[1]);
+ const uint8x16_t src_lookup[2] = {src_indices,
+ vaddq_u8(src_indices, vdupq_n_u8(2))};
+
+ int y = intermediate_height;
+ do {
+ // Load a pool of samples to select from using stepped indices.
+ const uint8x16x3_t src_bytes = LoadSrcVals<grade_x>(src_x);
+ // Each lane corresponds to a different filter kernel.
+ const uint16x8_t src[2] = {
+ PermuteSrcVals<grade_x>(src_bytes, src_lookup[0]),
+ PermuteSrcVals<grade_x>(src_bytes, src_lookup[1])};
+ const uint16x4_t src_low[2] = {vget_low_u16(src[0]),
+ vget_low_u16(src[1])};
+ const uint16x4_t src_high[2] = {vget_high_u16(src[0]),
+ vget_high_u16(src[1])};
+
+ vst1_s16(intermediate_x, vrshrn_n_s32(SumOnePassTaps</*filter_index=*/3>(
+ src_low, taps_low),
+ kInterRoundBitsHorizontal - 1));
+ vst1_s16(
+ intermediate_x + 4,
+ vrshrn_n_s32(SumOnePassTaps</*filter_index=*/3>(src_high, taps_high),
+ kInterRoundBitsHorizontal - 1));
+ // Avoid right shifting the stride.
+ src_x = AddByteStride(src_x, src_stride);
+ intermediate_x += kIntermediateStride;
+ } while (--y != 0);
+ x += 8;
+ p += step_x8;
+ } while (x < width);
+}
+
+// Pre-transpose the 4 tap filters in |kAbsHalfSubPixelFilters|[5].
+inline int8x16_t GetPositive4TapFilter(const int tap_index) {
+ assert(tap_index < 4);
+ alignas(
+ 16) static constexpr int8_t kSubPixel4TapPositiveFilterColumns[4][16] = {
+ {0, 15, 13, 11, 10, 9, 8, 7, 6, 6, 5, 4, 3, 2, 2, 1},
+ {64, 31, 31, 31, 30, 29, 28, 27, 26, 24, 23, 22, 21, 20, 18, 17},
+ {0, 17, 18, 20, 21, 22, 23, 24, 26, 27, 28, 29, 30, 31, 31, 31},
+ {0, 1, 2, 2, 3, 4, 5, 6, 6, 7, 8, 9, 10, 11, 13, 15}};
+
+ return vld1q_s8(kSubPixel4TapPositiveFilterColumns[tap_index]);
+}
+
+// This filter is only possible when width <= 4.
+inline void ConvolveKernelHorizontalPositive4Tap(
+ const uint16_t* LIBGAV1_RESTRICT const src, const ptrdiff_t src_stride,
+ const int subpixel_x, const int step_x, const int intermediate_height,
+ int16_t* LIBGAV1_RESTRICT intermediate) {
+ // Account for the 0-taps that precede the 2 nonzero taps in the spec.
+ const int kernel_offset = 2;
+ const int ref_x = subpixel_x >> kScaleSubPixelBits;
+ const int8x16_t filter_taps0 = GetPositive4TapFilter(0);
+ const int8x16_t filter_taps1 = GetPositive4TapFilter(1);
+ const int8x16_t filter_taps2 = GetPositive4TapFilter(2);
+ const int8x16_t filter_taps3 = GetPositive4TapFilter(3);
+ const uint16x8_t index_steps = vmulq_n_u16(
+ vmovl_u8(vcreate_u8(0x0706050403020100)), static_cast<uint16_t>(step_x));
+ const uint8x8_t filter_index_mask = vdup_n_u8(kSubPixelMask);
+
+ int p = subpixel_x;
+ // Only add steps to the 10-bit truncated p to avoid overflow.
+ const uint16x8_t p_fraction = vdupq_n_u16(p & 1023);
+ const uint16x8_t subpel_index_offsets = vaddq_u16(index_steps, p_fraction);
+ const uint8x8_t filter_indices =
+ vand_u8(vshrn_n_u16(subpel_index_offsets, 6), filter_index_mask);
+ // Each lane of lane of taps[k] corresponds to one output value along the row,
+ // containing kSubPixelFilters[filter_index][filter_id][k], where filter_id
+ // depends on x.
+ const int16x4_t taps[4] = {
+ vget_low_s16(vmovl_s8(VQTbl1S8(filter_taps0, filter_indices))),
+ vget_low_s16(vmovl_s8(VQTbl1S8(filter_taps1, filter_indices))),
+ vget_low_s16(vmovl_s8(VQTbl1S8(filter_taps2, filter_indices))),
+ vget_low_s16(vmovl_s8(VQTbl1S8(filter_taps3, filter_indices)))};
+ // Lower byte of Nth value is at position 2*N.
+ // Narrowing shift is not available here because the maximum shift
+ // parameter is 8.
+ const uint8x8_t src_indices0 = vshl_n_u8(
+ vmovn_u16(vshrq_n_u16(subpel_index_offsets, kScaleSubPixelBits)), 1);
+ // Upper byte of Nth value is at position 2*N+1.
+ const uint8x8_t src_indices1 = vadd_u8(src_indices0, vdup_n_u8(1));
+ // Only 4 values needed.
+ const uint8x8_t src_indices_base = InterleaveLow8(src_indices0, src_indices1);
+
+ uint8x8_t src_lookup[4];
+ const uint8x8_t two = vdup_n_u8(2);
+ src_lookup[0] = src_indices_base;
+ for (int i = 1; i < 4; ++i) {
+ src_lookup[i] = vadd_u8(src_lookup[i - 1], two);
+ }
+
+ const uint16_t* src_y =
+ src + (p >> kScaleSubPixelBits) - ref_x + kernel_offset;
+ int y = intermediate_height;
+ do {
+ // Load a pool of samples to select from using stepped indices.
+ const uint8x16x3_t src_bytes = LoadSrcVals<1>(src_y);
+ // Each lane corresponds to a different filter kernel.
+ const uint16x4_t src[4] = {PermuteSrcVals(src_bytes, src_lookup[0]),
+ PermuteSrcVals(src_bytes, src_lookup[1]),
+ PermuteSrcVals(src_bytes, src_lookup[2]),
+ PermuteSrcVals(src_bytes, src_lookup[3])};
+
+ vst1_s16(intermediate,
+ vrshrn_n_s32(SumOnePassTaps</*filter_index=*/5>(src, taps),
+ kInterRoundBitsHorizontal - 1));
+ src_y = AddByteStride(src_y, src_stride);
+ intermediate += kIntermediateStride;
+ } while (--y != 0);
+}
+
+// Pre-transpose the 4 tap filters in |kAbsHalfSubPixelFilters|[4].
+inline int8x16_t GetSigned4TapFilter(const int tap_index) {
+ assert(tap_index < 4);
+ alignas(16) static constexpr int8_t
+ kAbsHalfSubPixel4TapSignedFilterColumns[4][16] = {
+ {-0, -2, -4, -5, -6, -6, -7, -6, -6, -5, -5, -5, -4, -3, -2, -1},
+ {64, 63, 61, 58, 55, 51, 47, 42, 38, 33, 29, 24, 19, 14, 9, 4},
+ {0, 4, 9, 14, 19, 24, 29, 33, 38, 42, 47, 51, 55, 58, 61, 63},
+ {-0, -1, -2, -3, -4, -5, -5, -5, -6, -6, -7, -6, -6, -5, -4, -2}};
+
+ return vld1q_s8(kAbsHalfSubPixel4TapSignedFilterColumns[tap_index]);
+}
+
+// This filter is only possible when width <= 4.
+inline void ConvolveKernelHorizontalSigned4Tap(
+ const uint16_t* LIBGAV1_RESTRICT const src, const ptrdiff_t src_stride,
+ const int subpixel_x, const int step_x, const int intermediate_height,
+ int16_t* LIBGAV1_RESTRICT intermediate) {
+ const int kernel_offset = 2;
+ const int ref_x = subpixel_x >> kScaleSubPixelBits;
+ const uint8x8_t filter_index_mask = vdup_n_u8(kSubPixelMask);
+ const int8x16_t filter_taps0 = GetSigned4TapFilter(0);
+ const int8x16_t filter_taps1 = GetSigned4TapFilter(1);
+ const int8x16_t filter_taps2 = GetSigned4TapFilter(2);
+ const int8x16_t filter_taps3 = GetSigned4TapFilter(3);
+ const uint16x8_t index_steps = vmulq_n_u16(
+ vmovl_u8(vcreate_u8(0x0706050403020100)), static_cast<uint16_t>(step_x));
+
+ const int p = subpixel_x;
+ // Only add steps to the 10-bit truncated p to avoid overflow.
+ const uint16x8_t p_fraction = vdupq_n_u16(p & 1023);
+ const uint16x8_t subpel_index_offsets = vaddq_u16(index_steps, p_fraction);
+ const uint8x8_t filter_indices =
+ vand_u8(vshrn_n_u16(subpel_index_offsets, 6), filter_index_mask);
+ // Each lane of lane of taps[k] corresponds to one output value along the row,
+ // containing kSubPixelFilters[filter_index][filter_id][k], where filter_id
+ // depends on x.
+ const int16x4_t taps[4] = {
+ vget_low_s16(vmovl_s8(VQTbl1S8(filter_taps0, filter_indices))),
+ vget_low_s16(vmovl_s8(VQTbl1S8(filter_taps1, filter_indices))),
+ vget_low_s16(vmovl_s8(VQTbl1S8(filter_taps2, filter_indices))),
+ vget_low_s16(vmovl_s8(VQTbl1S8(filter_taps3, filter_indices)))};
+ // Lower byte of Nth value is at position 2*N.
+ // Narrowing shift is not available here because the maximum shift
+ // parameter is 8.
+ const uint8x8_t src_indices0 = vshl_n_u8(
+ vmovn_u16(vshrq_n_u16(subpel_index_offsets, kScaleSubPixelBits)), 1);
+ // Upper byte of Nth value is at position 2*N+1.
+ const uint8x8_t src_indices1 = vadd_u8(src_indices0, vdup_n_u8(1));
+ // Only 4 values needed.
+ const uint8x8_t src_indices_base = InterleaveLow8(src_indices0, src_indices1);
+
+ uint8x8_t src_lookup[4];
+ const uint8x8_t two = vdup_n_u8(2);
+ src_lookup[0] = src_indices_base;
+ for (int i = 1; i < 4; ++i) {
+ src_lookup[i] = vadd_u8(src_lookup[i - 1], two);
+ }
+
+ const uint16_t* src_y =
+ src + (p >> kScaleSubPixelBits) - ref_x + kernel_offset;
+ int y = intermediate_height;
+ do {
+ // Load a pool of samples to select from using stepped indices.
+ const uint8x16x3_t src_bytes = LoadSrcVals<1>(src_y);
+ // Each lane corresponds to a different filter kernel.
+ const uint16x4_t src[4] = {PermuteSrcVals(src_bytes, src_lookup[0]),
+ PermuteSrcVals(src_bytes, src_lookup[1]),
+ PermuteSrcVals(src_bytes, src_lookup[2]),
+ PermuteSrcVals(src_bytes, src_lookup[3])};
+
+ vst1_s16(intermediate,
+ vrshrn_n_s32(SumOnePassTaps</*filter_index=*/4>(src, taps),
+ kInterRoundBitsHorizontal - 1));
+ src_y = AddByteStride(src_y, src_stride);
+ intermediate += kIntermediateStride;
+ } while (--y != 0);
+}
+
+// Pre-transpose the 6 tap filters in |kAbsHalfSubPixelFilters|[0].
+inline int8x16_t GetSigned6TapFilter(const int tap_index) {
+ assert(tap_index < 6);
+ alignas(16) static constexpr int8_t
+ kAbsHalfSubPixel6TapSignedFilterColumns[6][16] = {
+ {0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0},
+ {-0, -3, -5, -6, -7, -7, -8, -7, -7, -6, -6, -6, -5, -4, -2, -1},
+ {64, 63, 61, 58, 55, 51, 47, 42, 38, 33, 29, 24, 19, 14, 9, 4},
+ {0, 4, 9, 14, 19, 24, 29, 33, 38, 42, 47, 51, 55, 58, 61, 63},
+ {-0, -1, -2, -4, -5, -6, -6, -6, -7, -7, -8, -7, -7, -6, -5, -3},
+ {0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}};
+
+ return vld1q_s8(kAbsHalfSubPixel6TapSignedFilterColumns[tap_index]);
+}
+
+// This filter is only possible when width >= 8.
+template <int grade_x>
+inline void ConvolveKernelHorizontalSigned6Tap(
+ const uint16_t* LIBGAV1_RESTRICT const src, const ptrdiff_t src_stride,
+ const int width, const int subpixel_x, const int step_x,
+ const int intermediate_height,
+ int16_t* LIBGAV1_RESTRICT const intermediate) {
+ const int kernel_offset = 1;
+ const uint8x8_t filter_index_mask = vdup_n_u8(kSubPixelMask);
+ const int ref_x = subpixel_x >> kScaleSubPixelBits;
+ const int step_x8 = step_x << 3;
+ int8x16_t filter_taps[6];
+ for (int i = 0; i < 6; ++i) {
+ filter_taps[i] = GetSigned6TapFilter(i);
+ }
+ const uint16x8_t index_steps = vmulq_n_u16(
+ vmovl_u8(vcreate_u8(0x0706050403020100)), static_cast<uint16_t>(step_x));
+
+ int16_t* intermediate_x = intermediate;
+ int x = 0;
+ int p = subpixel_x;
+ do {
+ const uint16_t* src_x =
+ src + (p >> kScaleSubPixelBits) - ref_x + kernel_offset;
+ // Only add steps to the 10-bit truncated p to avoid overflow.
+ const uint16x8_t p_fraction = vdupq_n_u16(p & 1023);
+ const uint16x8_t subpel_index_offsets = vaddq_u16(index_steps, p_fraction);
+ const uint8x8_t filter_indices =
+ vand_u8(vshrn_n_u16(subpel_index_offsets, kFilterIndexShift),
+ filter_index_mask);
+
+ // Each lane of lane of taps_(low|high)[k] corresponds to one output value
+ // along the row, containing kSubPixelFilters[filter_index][filter_id][k],
+ // where filter_id depends on x.
+ int16x4_t taps_low[6];
+ int16x4_t taps_high[6];
+ for (int i = 0; i < 6; ++i) {
+ const int16x8_t taps_i =
+ vmovl_s8(VQTbl1S8(filter_taps[i], filter_indices));
+ taps_low[i] = vget_low_s16(taps_i);
+ taps_high[i] = vget_high_s16(taps_i);
+ }
+
+ // Lower byte of Nth value is at position 2*N.
+ const uint8x8_t src_indices0 = vshl_n_u8(
+ vmovn_u16(vshrq_n_u16(subpel_index_offsets, kScaleSubPixelBits)), 1);
+ // Upper byte of Nth value is at position 2*N+1.
+ const uint8x8_t src_indices1 = vadd_u8(src_indices0, vdup_n_u8(1));
+ const uint8x8x2_t src_indices_zip = vzip_u8(src_indices0, src_indices1);
+ const uint8x16_t src_indices_base =
+ vcombine_u8(src_indices_zip.val[0], src_indices_zip.val[1]);
+
+ uint8x16_t src_lookup[6];
+ const uint8x16_t two = vdupq_n_u8(2);
+ src_lookup[0] = src_indices_base;
+ for (int i = 1; i < 6; ++i) {
+ src_lookup[i] = vaddq_u8(src_lookup[i - 1], two);
+ }
+
+ int y = intermediate_height;
+ do {
+ // Load a pool of samples to select from using stepped indices.
+ const uint8x16x3_t src_bytes = LoadSrcVals<grade_x>(src_x);
+
+ uint16x4_t src_low[6];
+ uint16x4_t src_high[6];
+ for (int i = 0; i < 6; ++i) {
+ const uint16x8_t src_i =
+ PermuteSrcVals<grade_x>(src_bytes, src_lookup[i]);
+ src_low[i] = vget_low_u16(src_i);
+ src_high[i] = vget_high_u16(src_i);
+ }
+
+ vst1_s16(intermediate_x, vrshrn_n_s32(SumOnePassTaps</*filter_index=*/0>(
+ src_low, taps_low),
+ kInterRoundBitsHorizontal - 1));
+ vst1_s16(
+ intermediate_x + 4,
+ vrshrn_n_s32(SumOnePassTaps</*filter_index=*/0>(src_high, taps_high),
+ kInterRoundBitsHorizontal - 1));
+ // Avoid right shifting the stride.
+ src_x = AddByteStride(src_x, src_stride);
+ intermediate_x += kIntermediateStride;
+ } while (--y != 0);
+ x += 8;
+ p += step_x8;
+ } while (x < width);
+}
+
+// Pre-transpose the 6 tap filters in |kAbsHalfSubPixelFilters|[1]. This filter
+// has mixed positive and negative outer taps depending on the filter id.
+inline int8x16_t GetMixed6TapFilter(const int tap_index) {
+ assert(tap_index < 6);
+ alignas(16) static constexpr int8_t
+ kAbsHalfSubPixel6TapMixedFilterColumns[6][16] = {
+ {0, 1, 0, 0, 0, 0, 0, -1, -1, 0, 0, 0, 0, 0, 0, 0},
+ {0, 14, 13, 11, 10, 9, 8, 8, 7, 6, 5, 4, 3, 2, 2, 1},
+ {64, 31, 31, 31, 30, 29, 28, 27, 26, 24, 23, 22, 21, 20, 18, 17},
+ {0, 17, 18, 20, 21, 22, 23, 24, 26, 27, 28, 29, 30, 31, 31, 31},
+ {0, 1, 2, 2, 3, 4, 5, 6, 7, 8, 8, 9, 10, 11, 13, 14},
+ {0, 0, 0, 0, 0, 0, 0, 0, -1, -1, 0, 0, 0, 0, 0, 1}};
+
+ return vld1q_s8(kAbsHalfSubPixel6TapMixedFilterColumns[tap_index]);
+}
+
+// This filter is only possible when width >= 8.
+template <int grade_x>
+inline void ConvolveKernelHorizontalMixed6Tap(
+ const uint16_t* LIBGAV1_RESTRICT const src, const ptrdiff_t src_stride,
+ const int width, const int subpixel_x, const int step_x,
+ const int intermediate_height,
+ int16_t* LIBGAV1_RESTRICT const intermediate) {
+ const int kernel_offset = 1;
+ const uint8x8_t filter_index_mask = vdup_n_u8(kSubPixelMask);
+ const int ref_x = subpixel_x >> kScaleSubPixelBits;
+ const int step_x8 = step_x << 3;
+ int8x16_t filter_taps[6];
+ for (int i = 0; i < 6; ++i) {
+ filter_taps[i] = GetMixed6TapFilter(i);
+ }
+ const uint16x8_t index_steps = vmulq_n_u16(
+ vmovl_u8(vcreate_u8(0x0706050403020100)), static_cast<uint16_t>(step_x));
+
+ int16_t* intermediate_x = intermediate;
+ int x = 0;
+ int p = subpixel_x;
+ do {
+ const uint16_t* src_x =
+ src + (p >> kScaleSubPixelBits) - ref_x + kernel_offset;
+ // Only add steps to the 10-bit truncated p to avoid overflow.
+ const uint16x8_t p_fraction = vdupq_n_u16(p & 1023);
+ const uint16x8_t subpel_index_offsets = vaddq_u16(index_steps, p_fraction);
+
+ const uint8x8_t filter_indices =
+ vand_u8(vshrn_n_u16(subpel_index_offsets, kFilterIndexShift),
+ filter_index_mask);
+ // Each lane of lane of taps_(low|high)[k] corresponds to one output value
+ // along the row, containing kSubPixelFilters[filter_index][filter_id][k],
+ // where filter_id depends on x.
+ int16x4_t taps_low[6];
+ int16x4_t taps_high[6];
+ for (int i = 0; i < 6; ++i) {
+ const int16x8_t taps = vmovl_s8(VQTbl1S8(filter_taps[i], filter_indices));
+ taps_low[i] = vget_low_s16(taps);
+ taps_high[i] = vget_high_s16(taps);
+ }
+
+ // Lower byte of Nth value is at position 2*N.
+ const uint8x8_t src_indices0 = vshl_n_u8(
+ vmovn_u16(vshrq_n_u16(subpel_index_offsets, kScaleSubPixelBits)), 1);
+ // Upper byte of Nth value is at position 2*N+1.
+ const uint8x8_t src_indices1 = vadd_u8(src_indices0, vdup_n_u8(1));
+ const uint8x8x2_t src_indices_zip = vzip_u8(src_indices0, src_indices1);
+ const uint8x16_t src_indices_base =
+ vcombine_u8(src_indices_zip.val[0], src_indices_zip.val[1]);
+
+ uint8x16_t src_lookup[6];
+ const uint8x16_t two = vdupq_n_u8(2);
+ src_lookup[0] = src_indices_base;
+ for (int i = 1; i < 6; ++i) {
+ src_lookup[i] = vaddq_u8(src_lookup[i - 1], two);
+ }
+
+ int y = intermediate_height;
+ do {
+ // Load a pool of samples to select from using stepped indices.
+ const uint8x16x3_t src_bytes = LoadSrcVals<grade_x>(src_x);
+
+ uint16x4_t src_low[6];
+ uint16x4_t src_high[6];
+ for (int i = 0; i < 6; ++i) {
+ const uint16x8_t src_i =
+ PermuteSrcVals<grade_x>(src_bytes, src_lookup[i]);
+ src_low[i] = vget_low_u16(src_i);
+ src_high[i] = vget_high_u16(src_i);
+ }
+
+ vst1_s16(intermediate_x, vrshrn_n_s32(SumOnePassTaps</*filter_index=*/0>(
+ src_low, taps_low),
+ kInterRoundBitsHorizontal - 1));
+ vst1_s16(
+ intermediate_x + 4,
+ vrshrn_n_s32(SumOnePassTaps</*filter_index=*/0>(src_high, taps_high),
+ kInterRoundBitsHorizontal - 1));
+ // Avoid right shifting the stride.
+ src_x = AddByteStride(src_x, src_stride);
+ intermediate_x += kIntermediateStride;
+ } while (--y != 0);
+ x += 8;
+ p += step_x8;
+ } while (x < width);
+}
+
+// Pre-transpose the 8 tap filters in |kAbsHalfSubPixelFilters|[2].
+inline int8x16_t GetSigned8TapFilter(const int tap_index) {
+ assert(tap_index < 8);
+ alignas(16) static constexpr int8_t
+ kAbsHalfSubPixel8TapSignedFilterColumns[8][16] = {
+ {-0, -1, -1, -1, -2, -2, -2, -2, -2, -1, -1, -1, -1, -1, -1, -0},
+ {0, 1, 3, 4, 5, 5, 5, 5, 6, 5, 4, 4, 3, 3, 2, 1},
+ {-0, -3, -6, -9, -11, -11, -12, -12, -12, -11, -10, -9, -7, -5, -3,
+ -1},
+ {64, 63, 62, 60, 58, 54, 50, 45, 40, 35, 30, 24, 19, 13, 8, 4},
+ {0, 4, 8, 13, 19, 24, 30, 35, 40, 45, 50, 54, 58, 60, 62, 63},
+ {-0, -1, -3, -5, -7, -9, -10, -11, -12, -12, -12, -11, -11, -9, -6,
+ -3},
+ {0, 1, 2, 3, 3, 4, 4, 5, 6, 5, 5, 5, 5, 4, 3, 1},
+ {-0, -0, -1, -1, -1, -1, -1, -1, -2, -2, -2, -2, -2, -1, -1, -1}};
+
+ return vld1q_s8(kAbsHalfSubPixel8TapSignedFilterColumns[tap_index]);
+}
+
+// This filter is only possible when width >= 8.
+template <int grade_x>
+inline void ConvolveKernelHorizontalSigned8Tap(
+ const uint16_t* LIBGAV1_RESTRICT const src, const ptrdiff_t src_stride,
+ const int width, const int subpixel_x, const int step_x,
+ const int intermediate_height,
+ int16_t* LIBGAV1_RESTRICT const intermediate) {
+ const uint8x8_t filter_index_mask = vdup_n_u8(kSubPixelMask);
+ const int ref_x = subpixel_x >> kScaleSubPixelBits;
+ const int step_x8 = step_x << 3;
+ int8x16_t filter_taps[8];
+ for (int i = 0; i < 8; ++i) {
+ filter_taps[i] = GetSigned8TapFilter(i);
+ }
+ const uint16x8_t index_steps = vmulq_n_u16(
+ vmovl_u8(vcreate_u8(0x0706050403020100)), static_cast<uint16_t>(step_x));
+ int16_t* intermediate_x = intermediate;
+ int x = 0;
+ int p = subpixel_x;
+ do {
+ const uint16_t* src_x = src + (p >> kScaleSubPixelBits) - ref_x;
+ // Only add steps to the 10-bit truncated p to avoid overflow.
+ const uint16x8_t p_fraction = vdupq_n_u16(p & 1023);
+ const uint16x8_t subpel_index_offsets = vaddq_u16(index_steps, p_fraction);
+
+ const uint8x8_t filter_indices =
+ vand_u8(vshrn_n_u16(subpel_index_offsets, kFilterIndexShift),
+ filter_index_mask);
+
+ // Lower byte of Nth value is at position 2*N.
+ const uint8x8_t src_indices0 = vshl_n_u8(
+ vmovn_u16(vshrq_n_u16(subpel_index_offsets, kScaleSubPixelBits)), 1);
+ // Upper byte of Nth value is at position 2*N+1.
+ const uint8x8_t src_indices1 = vadd_u8(src_indices0, vdup_n_u8(1));
+ const uint8x8x2_t src_indices_zip = vzip_u8(src_indices0, src_indices1);
+ const uint8x16_t src_indices_base =
+ vcombine_u8(src_indices_zip.val[0], src_indices_zip.val[1]);
+
+ uint8x16_t src_lookup[8];
+ const uint8x16_t two = vdupq_n_u8(2);
+ src_lookup[0] = src_indices_base;
+ for (int i = 1; i < 8; ++i) {
+ src_lookup[i] = vaddq_u8(src_lookup[i - 1], two);
+ }
+ // Each lane of lane of taps_(low|high)[k] corresponds to one output value
+ // along the row, containing kSubPixelFilters[filter_index][filter_id][k],
+ // where filter_id depends on x.
+ int16x4_t taps_low[8];
+ int16x4_t taps_high[8];
+ for (int i = 0; i < 8; ++i) {
+ const int16x8_t taps = vmovl_s8(VQTbl1S8(filter_taps[i], filter_indices));
+ taps_low[i] = vget_low_s16(taps);
+ taps_high[i] = vget_high_s16(taps);
+ }
+
+ int y = intermediate_height;
+ do {
+ // Load a pool of samples to select from using stepped indices.
+ const uint8x16x3_t src_bytes = LoadSrcVals<grade_x>(src_x);
+
+ uint16x4_t src_low[8];
+ uint16x4_t src_high[8];
+ for (int i = 0; i < 8; ++i) {
+ const uint16x8_t src_i =
+ PermuteSrcVals<grade_x>(src_bytes, src_lookup[i]);
+ src_low[i] = vget_low_u16(src_i);
+ src_high[i] = vget_high_u16(src_i);
+ }
+
+ vst1_s16(intermediate_x, vrshrn_n_s32(SumOnePassTaps</*filter_index=*/2>(
+ src_low, taps_low),
+ kInterRoundBitsHorizontal - 1));
+ vst1_s16(
+ intermediate_x + 4,
+ vrshrn_n_s32(SumOnePassTaps</*filter_index=*/2>(src_high, taps_high),
+ kInterRoundBitsHorizontal - 1));
+ // Avoid right shifting the stride.
+ src_x = AddByteStride(src_x, src_stride);
+ intermediate_x += kIntermediateStride;
+ } while (--y != 0);
+ x += 8;
+ p += step_x8;
+ } while (x < width);
+}
+
+// Process 16 bit inputs and output 32 bits.
+template <int num_taps, bool is_compound>
+inline int16x4_t Sum2DVerticalTaps4(const int16x4_t* const src,
+ const int16x8_t taps) {
+ const int16x4_t taps_lo = vget_low_s16(taps);
+ const int16x4_t taps_hi = vget_high_s16(taps);
+ int32x4_t sum;
+ if (num_taps == 8) {
+ sum = vmull_lane_s16(src[0], taps_lo, 0);
+ sum = vmlal_lane_s16(sum, src[1], taps_lo, 1);
+ sum = vmlal_lane_s16(sum, src[2], taps_lo, 2);
+ sum = vmlal_lane_s16(sum, src[3], taps_lo, 3);
+ sum = vmlal_lane_s16(sum, src[4], taps_hi, 0);
+ sum = vmlal_lane_s16(sum, src[5], taps_hi, 1);
+ sum = vmlal_lane_s16(sum, src[6], taps_hi, 2);
+ sum = vmlal_lane_s16(sum, src[7], taps_hi, 3);
+ } else if (num_taps == 6) {
+ sum = vmull_lane_s16(src[0], taps_lo, 1);
+ sum = vmlal_lane_s16(sum, src[1], taps_lo, 2);
+ sum = vmlal_lane_s16(sum, src[2], taps_lo, 3);
+ sum = vmlal_lane_s16(sum, src[3], taps_hi, 0);
+ sum = vmlal_lane_s16(sum, src[4], taps_hi, 1);
+ sum = vmlal_lane_s16(sum, src[5], taps_hi, 2);
+ } else if (num_taps == 4) {
+ sum = vmull_lane_s16(src[0], taps_lo, 2);
+ sum = vmlal_lane_s16(sum, src[1], taps_lo, 3);
+ sum = vmlal_lane_s16(sum, src[2], taps_hi, 0);
+ sum = vmlal_lane_s16(sum, src[3], taps_hi, 1);
+ } else if (num_taps == 2) {
+ sum = vmull_lane_s16(src[0], taps_lo, 3);
+ sum = vmlal_lane_s16(sum, src[1], taps_hi, 0);
+ }
+
+ if (is_compound) {
+ return vrshrn_n_s32(sum, kInterRoundBitsCompoundVertical - 1);
+ }
+
+ return vreinterpret_s16_u16(vqrshrun_n_s32(sum, kInterRoundBitsVertical - 1));
+}
+
+template <int num_taps, int grade_y, int width, bool is_compound>
+void ConvolveVerticalScale2Or4xH(const int16_t* LIBGAV1_RESTRICT const src,
+ const int subpixel_y, const int filter_index,
+ const int step_y, const int height,
+ void* LIBGAV1_RESTRICT const dest,
+ const ptrdiff_t dest_stride) {
+ static_assert(width == 2 || width == 4, "");
+ // We increment stride with the 8-bit pointer and then reinterpret to avoid
+ // shifting |dest_stride|.
+ auto* dest_y = static_cast<uint16_t*>(dest);
+ // In compound mode, |dest_stride| is based on the size of uint16_t, rather
+ // than bytes.
+ auto* compound_dest_y = static_cast<uint16_t*>(dest);
+ // This stride always corresponds to int16_t.
+ constexpr ptrdiff_t src_stride = kIntermediateStride;
+ const int16_t* src_y = src;
+ int16x4_t s[num_taps + grade_y];
+
+ int p = subpixel_y & 1023;
+ int prev_p = p;
+ int y = height;
+ do {
+ for (int i = 0; i < num_taps; ++i) {
+ s[i] = vld1_s16(src_y + i * src_stride);
+ }
+ int filter_id = (p >> 6) & kSubPixelMask;
+ int16x8_t filter =
+ vmovl_s8(vld1_s8(kHalfSubPixelFilters[filter_index][filter_id]));
+ int16x4_t sums = Sum2DVerticalTaps4<num_taps, is_compound>(s, filter);
+ if (is_compound) {
+ assert(width != 2);
+ // This offset potentially overflows into the sign bit, but should yield
+ // the correct unsigned value.
+ const uint16x4_t result =
+ vreinterpret_u16_s16(vadd_s16(sums, vdup_n_s16(kCompoundOffset)));
+ vst1_u16(compound_dest_y, result);
+ compound_dest_y += dest_stride;
+ } else {
+ const uint16x4_t result = vmin_u16(vreinterpret_u16_s16(sums),
+ vdup_n_u16((1 << kBitdepth10) - 1));
+ if (width == 2) {
+ Store2<0>(dest_y, result);
+ } else {
+ vst1_u16(dest_y, result);
+ }
+ dest_y = AddByteStride(dest_y, dest_stride);
+ }
+ p += step_y;
+ const int p_diff =
+ (p >> kScaleSubPixelBits) - (prev_p >> kScaleSubPixelBits);
+ prev_p = p;
+ // Here we load extra source in case it is needed. If |p_diff| == 0, these
+ // values will be unused, but it's faster to load than to branch.
+ s[num_taps] = vld1_s16(src_y + num_taps * src_stride);
+ if (grade_y > 1) {
+ s[num_taps + 1] = vld1_s16(src_y + (num_taps + 1) * src_stride);
+ }
+
+ filter_id = (p >> 6) & kSubPixelMask;
+ filter = vmovl_s8(vld1_s8(kHalfSubPixelFilters[filter_index][filter_id]));
+ sums = Sum2DVerticalTaps4<num_taps, is_compound>(&s[p_diff], filter);
+ if (is_compound) {
+ assert(width != 2);
+ const uint16x4_t result =
+ vreinterpret_u16_s16(vadd_s16(sums, vdup_n_s16(kCompoundOffset)));
+ vst1_u16(compound_dest_y, result);
+ compound_dest_y += dest_stride;
+ } else {
+ const uint16x4_t result = vmin_u16(vreinterpret_u16_s16(sums),
+ vdup_n_u16((1 << kBitdepth10) - 1));
+ if (width == 2) {
+ Store2<0>(dest_y, result);
+ } else {
+ vst1_u16(dest_y, result);
+ }
+ dest_y = AddByteStride(dest_y, dest_stride);
+ }
+ p += step_y;
+ src_y = src + (p >> kScaleSubPixelBits) * src_stride;
+ prev_p = p;
+ y -= 2;
+ } while (y != 0);
+}
+
+template <int num_taps, int grade_y, bool is_compound>
+void ConvolveVerticalScale(const int16_t* LIBGAV1_RESTRICT const source,
+ const int intermediate_height, const int width,
+ const int subpixel_y, const int filter_index,
+ const int step_y, const int height,
+ void* LIBGAV1_RESTRICT const dest,
+ const ptrdiff_t dest_stride) {
+ // This stride always corresponds to int16_t.
+ constexpr ptrdiff_t src_stride = kIntermediateStride;
+
+ int16x8_t s[num_taps + 2];
+
+ const int16_t* src = source;
+ int x = 0;
+ do {
+ const int16_t* src_y = src;
+ int p = subpixel_y & 1023;
+ int prev_p = p;
+ // We increment stride with the 8-bit pointer and then reinterpret to avoid
+ // shifting |dest_stride|.
+ auto* dest_y = static_cast<uint16_t*>(dest) + x;
+ // In compound mode, |dest_stride| is based on the size of uint16_t, rather
+ // than bytes.
+ auto* compound_dest_y = static_cast<uint16_t*>(dest) + x;
+ int y = height;
+ do {
+ for (int i = 0; i < num_taps; ++i) {
+ s[i] = vld1q_s16(src_y + i * src_stride);
+ }
+ int filter_id = (p >> 6) & kSubPixelMask;
+ int16x8_t filter =
+ vmovl_s8(vld1_s8(kHalfSubPixelFilters[filter_index][filter_id]));
+ int16x8_t sums =
+ SimpleSum2DVerticalTaps<num_taps, is_compound>(s, filter);
+ if (is_compound) {
+ // This offset potentially overflows int16_t, but should yield the
+ // correct unsigned value.
+ const uint16x8_t result = vreinterpretq_u16_s16(
+ vaddq_s16(sums, vdupq_n_s16(kCompoundOffset)));
+ vst1q_u16(compound_dest_y, result);
+ compound_dest_y += dest_stride;
+ } else {
+ const uint16x8_t result = vminq_u16(
+ vreinterpretq_u16_s16(sums), vdupq_n_u16((1 << kBitdepth10) - 1));
+ vst1q_u16(dest_y, result);
+ dest_y = AddByteStride(dest_y, dest_stride);
+ }
+ p += step_y;
+ const int p_diff =
+ (p >> kScaleSubPixelBits) - (prev_p >> kScaleSubPixelBits);
+ prev_p = p;
+ // Here we load extra source in case it is needed. If |p_diff| == 0, these
+ // values will be unused, but it's faster to load than to branch.
+ s[num_taps] = vld1q_s16(src_y + num_taps * src_stride);
+ if (grade_y > 1) {
+ s[num_taps + 1] = vld1q_s16(src_y + (num_taps + 1) * src_stride);
+ }
+
+ filter_id = (p >> 6) & kSubPixelMask;
+ filter = vmovl_s8(vld1_s8(kHalfSubPixelFilters[filter_index][filter_id]));
+ sums = SimpleSum2DVerticalTaps<num_taps, is_compound>(&s[p_diff], filter);
+ if (is_compound) {
+ assert(width != 2);
+ const uint16x8_t result = vreinterpretq_u16_s16(
+ vaddq_s16(sums, vdupq_n_s16(kCompoundOffset)));
+ vst1q_u16(compound_dest_y, result);
+ compound_dest_y += dest_stride;
+ } else {
+ const uint16x8_t result = vminq_u16(
+ vreinterpretq_u16_s16(sums), vdupq_n_u16((1 << kBitdepth10) - 1));
+ vst1q_u16(dest_y, result);
+ dest_y = AddByteStride(dest_y, dest_stride);
+ }
+ p += step_y;
+ src_y = src + (p >> kScaleSubPixelBits) * src_stride;
+ prev_p = p;
+
+ y -= 2;
+ } while (y != 0);
+ src += kIntermediateStride * intermediate_height;
+ x += 8;
+ } while (x < width);
+}
+
+template <bool is_compound>
+void ConvolveScale2D_NEON(const void* LIBGAV1_RESTRICT const reference,
+ const ptrdiff_t reference_stride,
+ const int horizontal_filter_index,
+ const int vertical_filter_index, const int subpixel_x,
+ const int subpixel_y, const int step_x,
+ const int step_y, const int width, const int height,
+ void* LIBGAV1_RESTRICT const prediction,
+ const ptrdiff_t pred_stride) {
+ const int horiz_filter_index = GetFilterIndex(horizontal_filter_index, width);
+ const int vert_filter_index = GetFilterIndex(vertical_filter_index, height);
+ assert(step_x <= 2048);
+ assert(step_y <= 2048);
+ const int num_vert_taps = GetNumTapsInFilter(vert_filter_index);
+ const int intermediate_height =
+ (((height - 1) * step_y + (1 << kScaleSubPixelBits) - 1) >>
+ kScaleSubPixelBits) +
+ num_vert_taps;
+ int16_t intermediate_result[kIntermediateAllocWidth *
+ (2 * kIntermediateAllocWidth + 8)];
+#if LIBGAV1_MSAN
+ // Quiet msan warnings. Set with random non-zero value to aid in debugging.
+ memset(intermediate_result, 0x54, sizeof(intermediate_result));
+#endif
+ // Horizontal filter.
+ // Filter types used for width <= 4 are different from those for width > 4.
+ // When width > 4, the valid filter index range is always [0, 3].
+ // When width <= 4, the valid filter index range is always [3, 5].
+ // The same applies to height and vertical filter index.
+ int filter_index = GetFilterIndex(horizontal_filter_index, width);
+ int16_t* intermediate = intermediate_result;
+ const ptrdiff_t src_stride = reference_stride;
+ const auto* src = static_cast<const uint16_t*>(reference);
+ const int vert_kernel_offset = (8 - num_vert_taps) / 2;
+ src = AddByteStride(src, vert_kernel_offset * src_stride);
+
+ // Derive the maximum value of |step_x| at which all source values fit in one
+ // 16-byte (8-value) load. Final index is src_x + |num_taps| - 1 < 16
+ // step_x*7 is the final base subpel index for the shuffle mask for filter
+ // inputs in each iteration on large blocks. When step_x is large, we need a
+ // larger structure and use a larger table lookup in order to gather all
+ // filter inputs.
+ const int num_horiz_taps = GetNumTapsInFilter(horiz_filter_index);
+ // |num_taps| - 1 is the shuffle index of the final filter input.
+ const int kernel_start_ceiling = 16 - num_horiz_taps;
+ // This truncated quotient |grade_x_threshold| selects |step_x| such that:
+ // (step_x * 7) >> kScaleSubPixelBits < single load limit
+ const int grade_x_threshold =
+ (kernel_start_ceiling << kScaleSubPixelBits) / 7;
+
+ switch (filter_index) {
+ case 0:
+ if (step_x > grade_x_threshold) {
+ ConvolveKernelHorizontalSigned6Tap<2>(
+ src, src_stride, width, subpixel_x, step_x, intermediate_height,
+ intermediate);
+ } else {
+ ConvolveKernelHorizontalSigned6Tap<1>(
+ src, src_stride, width, subpixel_x, step_x, intermediate_height,
+ intermediate);
+ }
+ break;
+ case 1:
+ if (step_x > grade_x_threshold) {
+ ConvolveKernelHorizontalMixed6Tap<2>(src, src_stride, width, subpixel_x,
+ step_x, intermediate_height,
+ intermediate);
+
+ } else {
+ ConvolveKernelHorizontalMixed6Tap<1>(src, src_stride, width, subpixel_x,
+ step_x, intermediate_height,
+ intermediate);
+ }
+ break;
+ case 2:
+ if (step_x > grade_x_threshold) {
+ ConvolveKernelHorizontalSigned8Tap<2>(
+ src, src_stride, width, subpixel_x, step_x, intermediate_height,
+ intermediate);
+ } else {
+ ConvolveKernelHorizontalSigned8Tap<1>(
+ src, src_stride, width, subpixel_x, step_x, intermediate_height,
+ intermediate);
+ }
+ break;
+ case 3:
+ if (step_x > grade_x_threshold) {
+ ConvolveKernelHorizontal2Tap<2>(src, src_stride, width, subpixel_x,
+ step_x, intermediate_height,
+ intermediate);
+ } else {
+ ConvolveKernelHorizontal2Tap<1>(src, src_stride, width, subpixel_x,
+ step_x, intermediate_height,
+ intermediate);
+ }
+ break;
+ case 4:
+ assert(width <= 4);
+ ConvolveKernelHorizontalSigned4Tap(src, src_stride, subpixel_x, step_x,
+ intermediate_height, intermediate);
+ break;
+ default:
+ assert(filter_index == 5);
+ ConvolveKernelHorizontalPositive4Tap(src, src_stride, subpixel_x, step_x,
+ intermediate_height, intermediate);
+ }
+
+ // Vertical filter.
+ filter_index = GetFilterIndex(vertical_filter_index, height);
+ intermediate = intermediate_result;
+ switch (filter_index) {
+ case 0:
+ case 1:
+ if (step_y <= 1024) {
+ if (!is_compound && width == 2) {
+ ConvolveVerticalScale2Or4xH<6, 1, 2, is_compound>(
+ intermediate, subpixel_y, filter_index, step_y, height,
+ prediction, pred_stride);
+ } else if (width == 4) {
+ ConvolveVerticalScale2Or4xH<6, 1, 4, is_compound>(
+ intermediate, subpixel_y, filter_index, step_y, height,
+ prediction, pred_stride);
+ } else {
+ ConvolveVerticalScale<6, 1, is_compound>(
+ intermediate, intermediate_height, width, subpixel_y,
+ filter_index, step_y, height, prediction, pred_stride);
+ }
+ } else {
+ if (!is_compound && width == 2) {
+ ConvolveVerticalScale2Or4xH<6, 2, 2, is_compound>(
+ intermediate, subpixel_y, filter_index, step_y, height,
+ prediction, pred_stride);
+ } else if (width == 4) {
+ ConvolveVerticalScale2Or4xH<6, 2, 4, is_compound>(
+ intermediate, subpixel_y, filter_index, step_y, height,
+ prediction, pred_stride);
+ } else {
+ ConvolveVerticalScale<6, 2, is_compound>(
+ intermediate, intermediate_height, width, subpixel_y,
+ filter_index, step_y, height, prediction, pred_stride);
+ }
+ }
+ break;
+ case 2:
+ if (step_y <= 1024) {
+ if (!is_compound && width == 2) {
+ ConvolveVerticalScale2Or4xH<8, 1, 2, is_compound>(
+ intermediate, subpixel_y, filter_index, step_y, height,
+ prediction, pred_stride);
+ } else if (width == 4) {
+ ConvolveVerticalScale2Or4xH<8, 1, 4, is_compound>(
+ intermediate, subpixel_y, filter_index, step_y, height,
+ prediction, pred_stride);
+ } else {
+ ConvolveVerticalScale<8, 1, is_compound>(
+ intermediate, intermediate_height, width, subpixel_y,
+ filter_index, step_y, height, prediction, pred_stride);
+ }
+ } else {
+ if (!is_compound && width == 2) {
+ ConvolveVerticalScale2Or4xH<8, 2, 2, is_compound>(
+ intermediate, subpixel_y, filter_index, step_y, height,
+ prediction, pred_stride);
+ } else if (width == 4) {
+ ConvolveVerticalScale2Or4xH<8, 2, 4, is_compound>(
+ intermediate, subpixel_y, filter_index, step_y, height,
+ prediction, pred_stride);
+ } else {
+ ConvolveVerticalScale<8, 2, is_compound>(
+ intermediate, intermediate_height, width, subpixel_y,
+ filter_index, step_y, height, prediction, pred_stride);
+ }
+ }
+ break;
+ case 3:
+ if (step_y <= 1024) {
+ if (!is_compound && width == 2) {
+ ConvolveVerticalScale2Or4xH<2, 1, 2, is_compound>(
+ intermediate, subpixel_y, filter_index, step_y, height,
+ prediction, pred_stride);
+ } else if (width == 4) {
+ ConvolveVerticalScale2Or4xH<2, 1, 4, is_compound>(
+ intermediate, subpixel_y, filter_index, step_y, height,
+ prediction, pred_stride);
+ } else {
+ ConvolveVerticalScale<2, 1, is_compound>(
+ intermediate, intermediate_height, width, subpixel_y,
+ filter_index, step_y, height, prediction, pred_stride);
+ }
+ } else {
+ if (!is_compound && width == 2) {
+ ConvolveVerticalScale2Or4xH<2, 2, 2, is_compound>(
+ intermediate, subpixel_y, filter_index, step_y, height,
+ prediction, pred_stride);
+ } else if (width == 4) {
+ ConvolveVerticalScale2Or4xH<2, 2, 4, is_compound>(
+ intermediate, subpixel_y, filter_index, step_y, height,
+ prediction, pred_stride);
+ } else {
+ ConvolveVerticalScale<2, 2, is_compound>(
+ intermediate, intermediate_height, width, subpixel_y,
+ filter_index, step_y, height, prediction, pred_stride);
+ }
+ }
+ break;
+ default:
+ assert(filter_index == 4 || filter_index == 5);
+ assert(height <= 4);
+ if (step_y <= 1024) {
+ if (!is_compound && width == 2) {
+ ConvolveVerticalScale2Or4xH<4, 1, 2, is_compound>(
+ intermediate, subpixel_y, filter_index, step_y, height,
+ prediction, pred_stride);
+ } else if (width == 4) {
+ ConvolveVerticalScale2Or4xH<4, 1, 4, is_compound>(
+ intermediate, subpixel_y, filter_index, step_y, height,
+ prediction, pred_stride);
+ } else {
+ ConvolveVerticalScale<4, 1, is_compound>(
+ intermediate, intermediate_height, width, subpixel_y,
+ filter_index, step_y, height, prediction, pred_stride);
+ }
+ } else {
+ if (!is_compound && width == 2) {
+ ConvolveVerticalScale2Or4xH<4, 2, 2, is_compound>(
+ intermediate, subpixel_y, filter_index, step_y, height,
+ prediction, pred_stride);
+ } else if (width == 4) {
+ ConvolveVerticalScale2Or4xH<4, 2, 4, is_compound>(
+ intermediate, subpixel_y, filter_index, step_y, height,
+ prediction, pred_stride);
+ } else {
+ ConvolveVerticalScale<4, 2, is_compound>(
+ intermediate, intermediate_height, width, subpixel_y,
+ filter_index, step_y, height, prediction, pred_stride);
+ }
+ }
+ }
+}
+
+void Init10bpp() {
+ Dsp* const dsp = dsp_internal::GetWritableDspTable(kBitdepth10);
+ assert(dsp != nullptr);
+ dsp->convolve[0][0][0][1] = ConvolveHorizontal_NEON;
+ dsp->convolve[0][0][1][0] = ConvolveVertical_NEON;
+ dsp->convolve[0][0][1][1] = Convolve2D_NEON;
+
+ dsp->convolve[0][1][0][0] = ConvolveCompoundCopy_NEON;
+ dsp->convolve[0][1][0][1] = ConvolveCompoundHorizontal_NEON;
+ dsp->convolve[0][1][1][0] = ConvolveCompoundVertical_NEON;
+ dsp->convolve[0][1][1][1] = ConvolveCompound2D_NEON;
+
+ dsp->convolve[1][0][0][1] = ConvolveIntraBlockCopyHorizontal_NEON;
+ dsp->convolve[1][0][1][0] = ConvolveIntraBlockCopyVertical_NEON;
+ dsp->convolve[1][0][1][1] = ConvolveIntraBlockCopy2D_NEON;
+
+ dsp->convolve_scale[0] = ConvolveScale2D_NEON<false>;
+ dsp->convolve_scale[1] = ConvolveScale2D_NEON<true>;
+}
+
+} // namespace
+
+void ConvolveInit10bpp_NEON() { Init10bpp(); }
+
+} // namespace dsp
+} // namespace libgav1
+
+#else // !(LIBGAV1_ENABLE_NEON && LIBGAV1_MAX_BITDEPTH >= 10)
+
+namespace libgav1 {
+namespace dsp {
+
+void ConvolveInit10bpp_NEON() {}
+
+} // namespace dsp
+} // namespace libgav1
+#endif // LIBGAV1_ENABLE_NEON && LIBGAV1_MAX_BITDEPTH >= 10
diff --git a/src/dsp/arm/convolve_neon.cc b/src/dsp/arm/convolve_neon.cc
index 331bfe2..5b80da2 100644
--- a/src/dsp/arm/convolve_neon.cc
+++ b/src/dsp/arm/convolve_neon.cc
@@ -103,9 +103,11 @@ int16x8_t SumOnePassTaps(const uint8x8_t* const src,
template <int filter_index, bool negative_outside_taps, bool is_2d,
bool is_compound>
-void FilterHorizontalWidth8AndUp(const uint8_t* src, const ptrdiff_t src_stride,
- void* const dest, const ptrdiff_t pred_stride,
- const int width, const int height,
+void FilterHorizontalWidth8AndUp(const uint8_t* LIBGAV1_RESTRICT src,
+ const ptrdiff_t src_stride,
+ void* LIBGAV1_RESTRICT const dest,
+ const ptrdiff_t pred_stride, const int width,
+ const int height,
const uint8x8_t* const v_tap) {
auto* dest8 = static_cast<uint8_t*>(dest);
auto* dest16 = static_cast<uint16_t*>(dest);
@@ -220,9 +222,11 @@ void FilterHorizontalWidth8AndUp(const uint8_t* src, const ptrdiff_t src_stride,
}
template <int filter_index, bool is_2d, bool is_compound>
-void FilterHorizontalWidth4(const uint8_t* src, const ptrdiff_t src_stride,
- void* const dest, const ptrdiff_t pred_stride,
- const int height, const uint8x8_t* const v_tap) {
+void FilterHorizontalWidth4(const uint8_t* LIBGAV1_RESTRICT src,
+ const ptrdiff_t src_stride,
+ void* LIBGAV1_RESTRICT const dest,
+ const ptrdiff_t pred_stride, const int height,
+ const uint8x8_t* const v_tap) {
auto* dest8 = static_cast<uint8_t*>(dest);
auto* dest16 = static_cast<uint16_t*>(dest);
int y = height;
@@ -257,9 +261,11 @@ void FilterHorizontalWidth4(const uint8_t* src, const ptrdiff_t src_stride,
}
template <int filter_index, bool is_2d>
-void FilterHorizontalWidth2(const uint8_t* src, const ptrdiff_t src_stride,
- void* const dest, const ptrdiff_t pred_stride,
- const int height, const uint8x8_t* const v_tap) {
+void FilterHorizontalWidth2(const uint8_t* LIBGAV1_RESTRICT src,
+ const ptrdiff_t src_stride,
+ void* LIBGAV1_RESTRICT const dest,
+ const ptrdiff_t pred_stride, const int height,
+ const uint8x8_t* const v_tap) {
auto* dest8 = static_cast<uint8_t*>(dest);
auto* dest16 = static_cast<uint16_t*>(dest);
int y = height >> 1;
@@ -345,10 +351,11 @@ void FilterHorizontalWidth2(const uint8_t* src, const ptrdiff_t src_stride,
template <int filter_index, bool negative_outside_taps, bool is_2d,
bool is_compound>
-void FilterHorizontal(const uint8_t* const src, const ptrdiff_t src_stride,
- void* const dest, const ptrdiff_t pred_stride,
- const int width, const int height,
- const uint8x8_t* const v_tap) {
+void FilterHorizontal(const uint8_t* LIBGAV1_RESTRICT const src,
+ const ptrdiff_t src_stride,
+ void* LIBGAV1_RESTRICT const dest,
+ const ptrdiff_t pred_stride, const int width,
+ const int height, const uint8x8_t* const v_tap) {
assert(width < 8 || filter_index <= 3);
// Don't simplify the redundant if conditions with the template parameters,
// which helps the compiler generate compact code.
@@ -484,7 +491,8 @@ int16x8_t SimpleSum2DVerticalTaps(const int16x8_t* const src,
}
template <int num_taps, bool is_compound = false>
-void Filter2DVerticalWidth8AndUp(const uint16_t* src, void* const dst,
+void Filter2DVerticalWidth8AndUp(const uint16_t* LIBGAV1_RESTRICT src,
+ void* LIBGAV1_RESTRICT const dst,
const ptrdiff_t dst_stride, const int width,
const int height, const int16x8_t taps) {
assert(width >= 8);
@@ -560,7 +568,8 @@ void Filter2DVerticalWidth8AndUp(const uint16_t* src, void* const dst,
// Take advantage of |src_stride| == |width| to process two rows at a time.
template <int num_taps, bool is_compound = false>
-void Filter2DVerticalWidth4(const uint16_t* src, void* const dst,
+void Filter2DVerticalWidth4(const uint16_t* LIBGAV1_RESTRICT src,
+ void* LIBGAV1_RESTRICT const dst,
const ptrdiff_t dst_stride, const int height,
const int16x8_t taps) {
auto* dst8 = static_cast<uint8_t*>(dst);
@@ -626,7 +635,8 @@ void Filter2DVerticalWidth4(const uint16_t* src, void* const dst,
// Take advantage of |src_stride| == |width| to process four rows at a time.
template <int num_taps>
-void Filter2DVerticalWidth2(const uint16_t* src, void* const dst,
+void Filter2DVerticalWidth2(const uint16_t* LIBGAV1_RESTRICT src,
+ void* LIBGAV1_RESTRICT const dst,
const ptrdiff_t dst_stride, const int height,
const int16x8_t taps) {
constexpr int next_row = (num_taps < 6) ? 4 : 8;
@@ -699,9 +709,10 @@ void Filter2DVerticalWidth2(const uint16_t* src, void* const dst,
template <bool is_2d = false, bool is_compound = false>
LIBGAV1_ALWAYS_INLINE void DoHorizontalPass(
- const uint8_t* const src, const ptrdiff_t src_stride, void* const dst,
- const ptrdiff_t dst_stride, const int width, const int height,
- const int filter_id, const int filter_index) {
+ const uint8_t* LIBGAV1_RESTRICT const src, const ptrdiff_t src_stride,
+ void* LIBGAV1_RESTRICT const dst, const ptrdiff_t dst_stride,
+ const int width, const int height, const int filter_id,
+ const int filter_index) {
// Duplicate the absolute value for each tap. Negative taps are corrected
// by using the vmlsl_u8 instruction. Positive taps use vmlal_u8.
uint8x8_t v_tap[kSubPixelTaps];
@@ -739,9 +750,10 @@ LIBGAV1_ALWAYS_INLINE void DoHorizontalPass(
}
template <int vertical_taps>
-void Filter2DVertical(const uint16_t* const intermediate_result,
- const int width, const int height, const int16x8_t taps,
- void* const prediction, const ptrdiff_t pred_stride) {
+void Filter2DVertical(
+ const uint16_t* LIBGAV1_RESTRICT const intermediate_result, const int width,
+ const int height, const int16x8_t taps,
+ void* LIBGAV1_RESTRICT const prediction, const ptrdiff_t pred_stride) {
auto* const dest = static_cast<uint8_t*>(prediction);
if (width >= 8) {
Filter2DVerticalWidth8AndUp<vertical_taps>(
@@ -756,13 +768,13 @@ void Filter2DVertical(const uint16_t* const intermediate_result,
}
}
-void Convolve2D_NEON(const void* const reference,
+void Convolve2D_NEON(const void* LIBGAV1_RESTRICT const reference,
const ptrdiff_t reference_stride,
const int horizontal_filter_index,
const int vertical_filter_index,
const int horizontal_filter_id,
const int vertical_filter_id, const int width,
- const int height, void* const prediction,
+ const int height, void* LIBGAV1_RESTRICT const prediction,
const ptrdiff_t pred_stride) {
const int horiz_filter_index = GetFilterIndex(horizontal_filter_index, width);
const int vert_filter_index = GetFilterIndex(vertical_filter_index, height);
@@ -772,6 +784,10 @@ void Convolve2D_NEON(const void* const reference,
uint16_t
intermediate_result[kMaxSuperBlockSizeInPixels *
(kMaxSuperBlockSizeInPixels + kSubPixelTaps - 1)];
+#if LIBGAV1_MSAN
+ // Quiet msan warnings. Set with random non-zero value to aid in debugging.
+ memset(intermediate_result, 0x33, sizeof(intermediate_result));
+#endif
const int intermediate_height = height + vertical_taps - 1;
const ptrdiff_t src_stride = reference_stride;
const auto* const src = static_cast<const uint8_t*>(reference) -
@@ -815,6 +831,10 @@ inline uint8x8x3_t LoadSrcVals(const uint8_t* const src_x) {
const uint8x16_t src_val = vld1q_u8(src_x);
ret.val[0] = vget_low_u8(src_val);
ret.val[1] = vget_high_u8(src_val);
+#if LIBGAV1_MSAN
+ // Initialize to quiet msan warnings when grade_x <= 1.
+ ret.val[2] = vdup_n_u8(0);
+#endif
if (grade_x > 1) {
ret.val[2] = vld1_u8(src_x + 16);
}
@@ -833,12 +853,10 @@ inline uint8x16_t GetPositive2TapFilter(const int tap_index) {
}
template <int grade_x>
-inline void ConvolveKernelHorizontal2Tap(const uint8_t* const src,
- const ptrdiff_t src_stride,
- const int width, const int subpixel_x,
- const int step_x,
- const int intermediate_height,
- int16_t* intermediate) {
+inline void ConvolveKernelHorizontal2Tap(
+ const uint8_t* LIBGAV1_RESTRICT const src, const ptrdiff_t src_stride,
+ const int width, const int subpixel_x, const int step_x,
+ const int intermediate_height, int16_t* LIBGAV1_RESTRICT intermediate) {
// Account for the 0-taps that precede the 2 nonzero taps.
const int kernel_offset = 3;
const int ref_x = subpixel_x >> kScaleSubPixelBits;
@@ -891,7 +909,6 @@ inline void ConvolveKernelHorizontal2Tap(const uint8_t* const src,
do {
const uint8_t* src_x =
&src[(p >> kScaleSubPixelBits) - ref_x + kernel_offset];
- int16_t* intermediate_x = intermediate + x;
// Only add steps to the 10-bit truncated p to avoid overflow.
const uint16x8_t p_fraction = vdupq_n_u16(p & 1023);
const uint16x8_t subpel_index_offsets = vaddq_u16(index_steps, p_fraction);
@@ -917,11 +934,11 @@ inline void ConvolveKernelHorizontal2Tap(const uint8_t* const src,
vtbl3_u8(src_vals, src_indices),
vtbl3_u8(src_vals, vadd_u8(src_indices, vdup_n_u8(1)))};
- vst1q_s16(intermediate_x,
+ vst1q_s16(intermediate,
vrshrq_n_s16(SumOnePassTaps</*filter_index=*/3>(src, taps),
kInterRoundBitsHorizontal - 1));
src_x += src_stride;
- intermediate_x += kIntermediateStride;
+ intermediate += kIntermediateStride;
} while (--y != 0);
x += 8;
p += step_x8;
@@ -943,8 +960,9 @@ inline uint8x16_t GetPositive4TapFilter(const int tap_index) {
// This filter is only possible when width <= 4.
void ConvolveKernelHorizontalPositive4Tap(
- const uint8_t* const src, const ptrdiff_t src_stride, const int subpixel_x,
- const int step_x, const int intermediate_height, int16_t* intermediate) {
+ const uint8_t* LIBGAV1_RESTRICT const src, const ptrdiff_t src_stride,
+ const int subpixel_x, const int step_x, const int intermediate_height,
+ int16_t* LIBGAV1_RESTRICT intermediate) {
const int kernel_offset = 2;
const int ref_x = subpixel_x >> kScaleSubPixelBits;
const uint8x8_t filter_index_mask = vdup_n_u8(kSubPixelMask);
@@ -1010,8 +1028,9 @@ inline uint8x16_t GetSigned4TapFilter(const int tap_index) {
// This filter is only possible when width <= 4.
inline void ConvolveKernelHorizontalSigned4Tap(
- const uint8_t* const src, const ptrdiff_t src_stride, const int subpixel_x,
- const int step_x, const int intermediate_height, int16_t* intermediate) {
+ const uint8_t* LIBGAV1_RESTRICT const src, const ptrdiff_t src_stride,
+ const int subpixel_x, const int step_x, const int intermediate_height,
+ int16_t* LIBGAV1_RESTRICT intermediate) {
const int kernel_offset = 2;
const int ref_x = subpixel_x >> kScaleSubPixelBits;
const uint8x8_t filter_index_mask = vdup_n_u8(kSubPixelMask);
@@ -1085,9 +1104,10 @@ inline uint8x16_t GetSigned6TapFilter(const int tap_index) {
// This filter is only possible when width >= 8.
template <int grade_x>
inline void ConvolveKernelHorizontalSigned6Tap(
- const uint8_t* const src, const ptrdiff_t src_stride, const int width,
- const int subpixel_x, const int step_x, const int intermediate_height,
- int16_t* const intermediate) {
+ const uint8_t* LIBGAV1_RESTRICT const src, const ptrdiff_t src_stride,
+ const int width, const int subpixel_x, const int step_x,
+ const int intermediate_height,
+ int16_t* LIBGAV1_RESTRICT const intermediate) {
const int kernel_offset = 1;
const uint8x8_t one = vdup_n_u8(1);
const uint8x8_t filter_index_mask = vdup_n_u8(kSubPixelMask);
@@ -1100,6 +1120,7 @@ inline void ConvolveKernelHorizontalSigned6Tap(
const uint16x8_t index_steps = vmulq_n_u16(
vmovl_u8(vcreate_u8(0x0706050403020100)), static_cast<uint16_t>(step_x));
+ int16_t* intermediate_x = intermediate;
int x = 0;
int p = subpixel_x;
do {
@@ -1107,7 +1128,6 @@ inline void ConvolveKernelHorizontalSigned6Tap(
// |trailing_width| can be up to 24.
const uint8_t* src_x =
&src[(p >> kScaleSubPixelBits) - ref_x + kernel_offset];
- int16_t* intermediate_x = intermediate + x;
// Only add steps to the 10-bit truncated p to avoid overflow.
const uint16x8_t p_fraction = vdupq_n_u16(p & 1023);
const uint16x8_t subpel_index_offsets = vaddq_u16(index_steps, p_fraction);
@@ -1178,9 +1198,10 @@ inline int8x16_t GetMixed6TapFilter(const int tap_index) {
// This filter is only possible when width >= 8.
template <int grade_x>
inline void ConvolveKernelHorizontalMixed6Tap(
- const uint8_t* const src, const ptrdiff_t src_stride, const int width,
- const int subpixel_x, const int step_x, const int intermediate_height,
- int16_t* const intermediate) {
+ const uint8_t* LIBGAV1_RESTRICT const src, const ptrdiff_t src_stride,
+ const int width, const int subpixel_x, const int step_x,
+ const int intermediate_height,
+ int16_t* LIBGAV1_RESTRICT const intermediate) {
const int kernel_offset = 1;
const uint8x8_t one = vdup_n_u8(1);
const uint8x8_t filter_index_mask = vdup_n_u8(kSubPixelMask);
@@ -1198,12 +1219,12 @@ inline void ConvolveKernelHorizontalMixed6Tap(
const uint16x8_t index_steps = vmulq_n_u16(
vmovl_u8(vcreate_u8(0x0706050403020100)), static_cast<uint16_t>(step_x));
+ int16_t* intermediate_x = intermediate;
int x = 0;
int p = subpixel_x;
do {
const uint8_t* src_x =
&src[(p >> kScaleSubPixelBits) - ref_x + kernel_offset];
- int16_t* intermediate_x = intermediate + x;
// Only add steps to the 10-bit truncated p to avoid overflow.
const uint16x8_t p_fraction = vdupq_n_u16(p & 1023);
const uint16x8_t subpel_index_offsets = vaddq_u16(index_steps, p_fraction);
@@ -1272,9 +1293,10 @@ inline uint8x16_t GetSigned8TapFilter(const int tap_index) {
// This filter is only possible when width >= 8.
template <int grade_x>
inline void ConvolveKernelHorizontalSigned8Tap(
- const uint8_t* const src, const ptrdiff_t src_stride, const int width,
- const int subpixel_x, const int step_x, const int intermediate_height,
- int16_t* const intermediate) {
+ const uint8_t* LIBGAV1_RESTRICT const src, const ptrdiff_t src_stride,
+ const int width, const int subpixel_x, const int step_x,
+ const int intermediate_height,
+ int16_t* LIBGAV1_RESTRICT const intermediate) {
const uint8x8_t one = vdup_n_u8(1);
const uint8x8_t filter_index_mask = vdup_n_u8(kSubPixelMask);
const int ref_x = subpixel_x >> kScaleSubPixelBits;
@@ -1286,11 +1308,12 @@ inline void ConvolveKernelHorizontalSigned8Tap(
}
const uint16x8_t index_steps = vmulq_n_u16(
vmovl_u8(vcreate_u8(0x0706050403020100)), static_cast<uint16_t>(step_x));
+
+ int16_t* intermediate_x = intermediate;
int x = 0;
int p = subpixel_x;
do {
const uint8_t* src_x = &src[(p >> kScaleSubPixelBits) - ref_x];
- int16_t* intermediate_x = intermediate + x;
// Only add steps to the 10-bit truncated p to avoid overflow.
const uint16x8_t p_fraction = vdupq_n_u16(p & 1023);
const uint16x8_t subpel_index_offsets = vaddq_u16(index_steps, p_fraction);
@@ -1336,15 +1359,16 @@ inline void ConvolveKernelHorizontalSigned8Tap(
// This function handles blocks of width 2 or 4.
template <int num_taps, int grade_y, int width, bool is_compound>
-void ConvolveVerticalScale4xH(const int16_t* const src, const int subpixel_y,
- const int filter_index, const int step_y,
- const int height, void* const dest,
+void ConvolveVerticalScale4xH(const int16_t* LIBGAV1_RESTRICT const src,
+ const int subpixel_y, const int filter_index,
+ const int step_y, const int height,
+ void* LIBGAV1_RESTRICT const dest,
const ptrdiff_t dest_stride) {
constexpr ptrdiff_t src_stride = kIntermediateStride;
const int16_t* src_y = src;
// |dest| is 16-bit in compound mode, Pixel otherwise.
- uint16_t* dest16_y = static_cast<uint16_t*>(dest);
- uint8_t* dest_y = static_cast<uint8_t*>(dest);
+ auto* dest16_y = static_cast<uint16_t*>(dest);
+ auto* dest_y = static_cast<uint8_t*>(dest);
int16x4_t s[num_taps + grade_y];
int p = subpixel_y & 1023;
@@ -1408,10 +1432,12 @@ void ConvolveVerticalScale4xH(const int16_t* const src, const int subpixel_y,
}
template <int num_taps, int grade_y, bool is_compound>
-inline void ConvolveVerticalScale(const int16_t* const src, const int width,
- const int subpixel_y, const int filter_index,
- const int step_y, const int height,
- void* const dest,
+inline void ConvolveVerticalScale(const int16_t* LIBGAV1_RESTRICT const source,
+ const int intermediate_height,
+ const int width, const int subpixel_y,
+ const int filter_index, const int step_y,
+ const int height,
+ void* LIBGAV1_RESTRICT const dest,
const ptrdiff_t dest_stride) {
constexpr ptrdiff_t src_stride = kIntermediateStride;
// A possible improvement is to use arithmetic to decide how many times to
@@ -1421,11 +1447,11 @@ inline void ConvolveVerticalScale(const int16_t* const src, const int width,
// |dest| is 16-bit in compound mode, Pixel otherwise.
uint16_t* dest16_y;
uint8_t* dest_y;
+ const int16_t* src = source;
int x = 0;
do {
- const int16_t* const src_x = src + x;
- const int16_t* src_y = src_x;
+ const int16_t* src_y = src;
dest16_y = static_cast<uint16_t*>(dest) + x;
dest_y = static_cast<uint8_t*>(dest) + x;
int p = subpixel_y & 1023;
@@ -1466,38 +1492,43 @@ inline void ConvolveVerticalScale(const int16_t* const src, const int width,
vst1_u8(dest_y, vqmovun_s16(sum));
}
p += step_y;
- src_y = src_x + (p >> kScaleSubPixelBits) * src_stride;
+ src_y = src + (p >> kScaleSubPixelBits) * src_stride;
prev_p = p;
dest16_y += dest_stride;
dest_y += dest_stride;
y -= 2;
} while (y != 0);
+ src += kIntermediateStride * intermediate_height;
x += 8;
} while (x < width);
}
template <bool is_compound>
-void ConvolveScale2D_NEON(const void* const reference,
+void ConvolveScale2D_NEON(const void* LIBGAV1_RESTRICT const reference,
const ptrdiff_t reference_stride,
const int horizontal_filter_index,
const int vertical_filter_index, const int subpixel_x,
const int subpixel_y, const int step_x,
const int step_y, const int width, const int height,
- void* const prediction, const ptrdiff_t pred_stride) {
+ void* LIBGAV1_RESTRICT const prediction,
+ const ptrdiff_t pred_stride) {
const int horiz_filter_index = GetFilterIndex(horizontal_filter_index, width);
const int vert_filter_index = GetFilterIndex(vertical_filter_index, height);
assert(step_x <= 2048);
+ assert(step_y <= 2048);
const int num_vert_taps = GetNumTapsInFilter(vert_filter_index);
const int intermediate_height =
(((height - 1) * step_y + (1 << kScaleSubPixelBits) - 1) >>
kScaleSubPixelBits) +
num_vert_taps;
- assert(step_x <= 2048);
// The output of the horizontal filter, i.e. the intermediate_result, is
// guaranteed to fit in int16_t.
- int16_t intermediate_result[kMaxSuperBlockSizeInPixels *
- (2 * kMaxSuperBlockSizeInPixels + 8)];
-
+ int16_t intermediate_result[kIntermediateAllocWidth *
+ (2 * kIntermediateAllocWidth + 8)];
+#if LIBGAV1_MSAN
+ // Quiet msan warnings. Set with random non-zero value to aid in debugging.
+ memset(intermediate_result, 0x44, sizeof(intermediate_result));
+#endif
// Horizontal filter.
// Filter types used for width <= 4 are different from those for width > 4.
// When width > 4, the valid filter index range is always [0, 3].
@@ -1597,8 +1628,8 @@ void ConvolveScale2D_NEON(const void* const reference,
prediction, pred_stride);
} else {
ConvolveVerticalScale<6, 1, is_compound>(
- intermediate, width, subpixel_y, filter_index, step_y, height,
- prediction, pred_stride);
+ intermediate, intermediate_height, width, subpixel_y,
+ filter_index, step_y, height, prediction, pred_stride);
}
} else {
if (!is_compound && width == 2) {
@@ -1611,8 +1642,8 @@ void ConvolveScale2D_NEON(const void* const reference,
prediction, pred_stride);
} else {
ConvolveVerticalScale<6, 2, is_compound>(
- intermediate, width, subpixel_y, filter_index, step_y, height,
- prediction, pred_stride);
+ intermediate, intermediate_height, width, subpixel_y,
+ filter_index, step_y, height, prediction, pred_stride);
}
}
break;
@@ -1628,8 +1659,8 @@ void ConvolveScale2D_NEON(const void* const reference,
prediction, pred_stride);
} else {
ConvolveVerticalScale<8, 1, is_compound>(
- intermediate, width, subpixel_y, filter_index, step_y, height,
- prediction, pred_stride);
+ intermediate, intermediate_height, width, subpixel_y,
+ filter_index, step_y, height, prediction, pred_stride);
}
} else {
if (!is_compound && width == 2) {
@@ -1642,8 +1673,8 @@ void ConvolveScale2D_NEON(const void* const reference,
prediction, pred_stride);
} else {
ConvolveVerticalScale<8, 2, is_compound>(
- intermediate, width, subpixel_y, filter_index, step_y, height,
- prediction, pred_stride);
+ intermediate, intermediate_height, width, subpixel_y,
+ filter_index, step_y, height, prediction, pred_stride);
}
}
break;
@@ -1659,8 +1690,8 @@ void ConvolveScale2D_NEON(const void* const reference,
prediction, pred_stride);
} else {
ConvolveVerticalScale<2, 1, is_compound>(
- intermediate, width, subpixel_y, filter_index, step_y, height,
- prediction, pred_stride);
+ intermediate, intermediate_height, width, subpixel_y,
+ filter_index, step_y, height, prediction, pred_stride);
}
} else {
if (!is_compound && width == 2) {
@@ -1673,8 +1704,8 @@ void ConvolveScale2D_NEON(const void* const reference,
prediction, pred_stride);
} else {
ConvolveVerticalScale<2, 2, is_compound>(
- intermediate, width, subpixel_y, filter_index, step_y, height,
- prediction, pred_stride);
+ intermediate, intermediate_height, width, subpixel_y,
+ filter_index, step_y, height, prediction, pred_stride);
}
}
break;
@@ -1693,8 +1724,8 @@ void ConvolveScale2D_NEON(const void* const reference,
prediction, pred_stride);
} else {
ConvolveVerticalScale<4, 1, is_compound>(
- intermediate, width, subpixel_y, filter_index, step_y, height,
- prediction, pred_stride);
+ intermediate, intermediate_height, width, subpixel_y,
+ filter_index, step_y, height, prediction, pred_stride);
}
} else {
if (!is_compound && width == 2) {
@@ -1707,21 +1738,19 @@ void ConvolveScale2D_NEON(const void* const reference,
prediction, pred_stride);
} else {
ConvolveVerticalScale<4, 2, is_compound>(
- intermediate, width, subpixel_y, filter_index, step_y, height,
- prediction, pred_stride);
+ intermediate, intermediate_height, width, subpixel_y,
+ filter_index, step_y, height, prediction, pred_stride);
}
}
}
}
-void ConvolveHorizontal_NEON(const void* const reference,
- const ptrdiff_t reference_stride,
- const int horizontal_filter_index,
- const int /*vertical_filter_index*/,
- const int horizontal_filter_id,
- const int /*vertical_filter_id*/, const int width,
- const int height, void* const prediction,
- const ptrdiff_t pred_stride) {
+void ConvolveHorizontal_NEON(
+ const void* LIBGAV1_RESTRICT const reference,
+ const ptrdiff_t reference_stride, const int horizontal_filter_index,
+ const int /*vertical_filter_index*/, const int horizontal_filter_id,
+ const int /*vertical_filter_id*/, const int width, const int height,
+ void* LIBGAV1_RESTRICT const prediction, const ptrdiff_t pred_stride) {
const int filter_index = GetFilterIndex(horizontal_filter_index, width);
// Set |src| to the outermost tap.
const auto* const src =
@@ -1741,10 +1770,11 @@ uint16x8_t Compound1DShift(const int16x8_t sum) {
template <int filter_index, bool is_compound = false,
bool negative_outside_taps = false>
-void FilterVertical(const uint8_t* const src, const ptrdiff_t src_stride,
- void* const dst, const ptrdiff_t dst_stride,
- const int width, const int height,
- const uint8x8_t* const taps) {
+void FilterVertical(const uint8_t* LIBGAV1_RESTRICT const src,
+ const ptrdiff_t src_stride,
+ void* LIBGAV1_RESTRICT const dst,
+ const ptrdiff_t dst_stride, const int width,
+ const int height, const uint8x8_t* const taps) {
const int num_taps = GetNumTapsInFilter(filter_index);
const int next_row = num_taps - 1;
auto* const dst8 = static_cast<uint8_t*>(dst);
@@ -1814,9 +1844,11 @@ void FilterVertical(const uint8_t* const src, const ptrdiff_t src_stride,
template <int filter_index, bool is_compound = false,
bool negative_outside_taps = false>
-void FilterVertical4xH(const uint8_t* src, const ptrdiff_t src_stride,
- void* const dst, const ptrdiff_t dst_stride,
- const int height, const uint8x8_t* const taps) {
+void FilterVertical4xH(const uint8_t* LIBGAV1_RESTRICT src,
+ const ptrdiff_t src_stride,
+ void* LIBGAV1_RESTRICT const dst,
+ const ptrdiff_t dst_stride, const int height,
+ const uint8x8_t* const taps) {
const int num_taps = GetNumTapsInFilter(filter_index);
auto* dst8 = static_cast<uint8_t*>(dst);
auto* dst16 = static_cast<uint16_t*>(dst);
@@ -2001,9 +2033,11 @@ void FilterVertical4xH(const uint8_t* src, const ptrdiff_t src_stride,
}
template <int filter_index, bool negative_outside_taps = false>
-void FilterVertical2xH(const uint8_t* src, const ptrdiff_t src_stride,
- void* const dst, const ptrdiff_t dst_stride,
- const int height, const uint8x8_t* const taps) {
+void FilterVertical2xH(const uint8_t* LIBGAV1_RESTRICT src,
+ const ptrdiff_t src_stride,
+ void* LIBGAV1_RESTRICT const dst,
+ const ptrdiff_t dst_stride, const int height,
+ const uint8x8_t* const taps) {
const int num_taps = GetNumTapsInFilter(filter_index);
auto* dst8 = static_cast<uint8_t*>(dst);
@@ -2205,14 +2239,12 @@ void FilterVertical2xH(const uint8_t* src, const ptrdiff_t src_stride,
// filtering is required.
// The output is the single prediction of the block, clipped to valid pixel
// range.
-void ConvolveVertical_NEON(const void* const reference,
- const ptrdiff_t reference_stride,
- const int /*horizontal_filter_index*/,
- const int vertical_filter_index,
- const int /*horizontal_filter_id*/,
- const int vertical_filter_id, const int width,
- const int height, void* const prediction,
- const ptrdiff_t pred_stride) {
+void ConvolveVertical_NEON(
+ const void* LIBGAV1_RESTRICT const reference,
+ const ptrdiff_t reference_stride, const int /*horizontal_filter_index*/,
+ const int vertical_filter_index, const int /*horizontal_filter_id*/,
+ const int vertical_filter_id, const int width, const int height,
+ void* LIBGAV1_RESTRICT const prediction, const ptrdiff_t pred_stride) {
const int filter_index = GetFilterIndex(vertical_filter_index, height);
const int vertical_taps = GetNumTapsInFilter(filter_index);
const ptrdiff_t src_stride = reference_stride;
@@ -2239,8 +2271,9 @@ void ConvolveVertical_NEON(const void* const reference,
FilterVertical<0>(src, src_stride, dest, dest_stride, width, height,
taps + 1);
}
- } else if ((filter_index == 1) & ((vertical_filter_id == 1) |
- (vertical_filter_id == 15))) { // 5 tap.
+ } else if ((static_cast<int>(filter_index == 1) &
+ (static_cast<int>(vertical_filter_id == 1) |
+ static_cast<int>(vertical_filter_id == 15))) != 0) { // 5 tap.
if (width == 2) {
FilterVertical2xH<1>(src, src_stride, dest, dest_stride, height,
taps + 1);
@@ -2251,9 +2284,11 @@ void ConvolveVertical_NEON(const void* const reference,
FilterVertical<1>(src, src_stride, dest, dest_stride, width, height,
taps + 1);
}
- } else if ((filter_index == 1) &
- ((vertical_filter_id == 7) | (vertical_filter_id == 8) |
- (vertical_filter_id == 9))) { // 6 tap with weird negative taps.
+ } else if ((static_cast<int>(filter_index == 1) &
+ (static_cast<int>(vertical_filter_id == 7) |
+ static_cast<int>(vertical_filter_id == 8) |
+ static_cast<int>(vertical_filter_id == 9))) !=
+ 0) { // 6 tap with weird negative taps.
if (width == 2) {
FilterVertical2xH<1,
/*negative_outside_taps=*/true>(
@@ -2325,11 +2360,11 @@ void ConvolveVertical_NEON(const void* const reference,
}
void ConvolveCompoundCopy_NEON(
- const void* const reference, const ptrdiff_t reference_stride,
- const int /*horizontal_filter_index*/, const int /*vertical_filter_index*/,
- const int /*horizontal_filter_id*/, const int /*vertical_filter_id*/,
- const int width, const int height, void* const prediction,
- const ptrdiff_t /*pred_stride*/) {
+ const void* LIBGAV1_RESTRICT const reference,
+ const ptrdiff_t reference_stride, const int /*horizontal_filter_index*/,
+ const int /*vertical_filter_index*/, const int /*horizontal_filter_id*/,
+ const int /*vertical_filter_id*/, const int width, const int height,
+ void* LIBGAV1_RESTRICT const prediction, const ptrdiff_t /*pred_stride*/) {
const auto* src = static_cast<const uint8_t*>(reference);
const ptrdiff_t src_stride = reference_stride;
auto* dest = static_cast<uint16_t*>(prediction);
@@ -2381,11 +2416,11 @@ void ConvolveCompoundCopy_NEON(
}
void ConvolveCompoundVertical_NEON(
- const void* const reference, const ptrdiff_t reference_stride,
- const int /*horizontal_filter_index*/, const int vertical_filter_index,
- const int /*horizontal_filter_id*/, const int vertical_filter_id,
- const int width, const int height, void* const prediction,
- const ptrdiff_t /*pred_stride*/) {
+ const void* LIBGAV1_RESTRICT const reference,
+ const ptrdiff_t reference_stride, const int /*horizontal_filter_index*/,
+ const int vertical_filter_index, const int /*horizontal_filter_id*/,
+ const int vertical_filter_id, const int width, const int height,
+ void* LIBGAV1_RESTRICT const prediction, const ptrdiff_t /*pred_stride*/) {
const int filter_index = GetFilterIndex(vertical_filter_index, height);
const int vertical_taps = GetNumTapsInFilter(filter_index);
const ptrdiff_t src_stride = reference_stride;
@@ -2408,8 +2443,9 @@ void ConvolveCompoundVertical_NEON(
FilterVertical<0, /*is_compound=*/true>(src, src_stride, dest, width,
width, height, taps + 1);
}
- } else if ((filter_index == 1) & ((vertical_filter_id == 1) |
- (vertical_filter_id == 15))) { // 5 tap.
+ } else if ((static_cast<int>(filter_index == 1) &
+ (static_cast<int>(vertical_filter_id == 1) |
+ static_cast<int>(vertical_filter_id == 15))) != 0) { // 5 tap.
if (width == 4) {
FilterVertical4xH<1, /*is_compound=*/true>(src, src_stride, dest, 4,
height, taps + 1);
@@ -2417,9 +2453,11 @@ void ConvolveCompoundVertical_NEON(
FilterVertical<1, /*is_compound=*/true>(src, src_stride, dest, width,
width, height, taps + 1);
}
- } else if ((filter_index == 1) &
- ((vertical_filter_id == 7) | (vertical_filter_id == 8) |
- (vertical_filter_id == 9))) { // 6 tap with weird negative taps.
+ } else if ((static_cast<int>(filter_index == 1) &
+ (static_cast<int>(vertical_filter_id == 7) |
+ static_cast<int>(vertical_filter_id == 8) |
+ static_cast<int>(vertical_filter_id == 9))) !=
+ 0) { // 6 tap with weird negative taps.
if (width == 4) {
FilterVertical4xH<1, /*is_compound=*/true,
/*negative_outside_taps=*/true>(src, src_stride, dest,
@@ -2476,11 +2514,11 @@ void ConvolveCompoundVertical_NEON(
}
void ConvolveCompoundHorizontal_NEON(
- const void* const reference, const ptrdiff_t reference_stride,
- const int horizontal_filter_index, const int /*vertical_filter_index*/,
- const int horizontal_filter_id, const int /*vertical_filter_id*/,
- const int width, const int height, void* const prediction,
- const ptrdiff_t /*pred_stride*/) {
+ const void* LIBGAV1_RESTRICT const reference,
+ const ptrdiff_t reference_stride, const int horizontal_filter_index,
+ const int /*vertical_filter_index*/, const int horizontal_filter_id,
+ const int /*vertical_filter_id*/, const int width, const int height,
+ void* LIBGAV1_RESTRICT const prediction, const ptrdiff_t /*pred_stride*/) {
const int filter_index = GetFilterIndex(horizontal_filter_index, width);
const auto* const src =
static_cast<const uint8_t*>(reference) - kHorizontalOffset;
@@ -2492,9 +2530,10 @@ void ConvolveCompoundHorizontal_NEON(
}
template <int vertical_taps>
-void Compound2DVertical(const uint16_t* const intermediate_result,
- const int width, const int height, const int16x8_t taps,
- void* const prediction) {
+void Compound2DVertical(
+ const uint16_t* LIBGAV1_RESTRICT const intermediate_result, const int width,
+ const int height, const int16x8_t taps,
+ void* LIBGAV1_RESTRICT const prediction) {
auto* const dest = static_cast<uint16_t*>(prediction);
if (width == 4) {
Filter2DVerticalWidth4<vertical_taps, /*is_compound=*/true>(
@@ -2505,14 +2544,12 @@ void Compound2DVertical(const uint16_t* const intermediate_result,
}
}
-void ConvolveCompound2D_NEON(const void* const reference,
- const ptrdiff_t reference_stride,
- const int horizontal_filter_index,
- const int vertical_filter_index,
- const int horizontal_filter_id,
- const int vertical_filter_id, const int width,
- const int height, void* const prediction,
- const ptrdiff_t /*pred_stride*/) {
+void ConvolveCompound2D_NEON(
+ const void* LIBGAV1_RESTRICT const reference,
+ const ptrdiff_t reference_stride, const int horizontal_filter_index,
+ const int vertical_filter_index, const int horizontal_filter_id,
+ const int vertical_filter_id, const int width, const int height,
+ void* LIBGAV1_RESTRICT const prediction, const ptrdiff_t /*pred_stride*/) {
// The output of the horizontal filter, i.e. the intermediate_result, is
// guaranteed to fit in int16_t.
uint16_t
@@ -2551,16 +2588,18 @@ void ConvolveCompound2D_NEON(const void* const reference,
}
}
-inline void HalfAddHorizontal(const uint8_t* const src, uint8_t* const dst) {
+inline void HalfAddHorizontal(const uint8_t* LIBGAV1_RESTRICT const src,
+ uint8_t* LIBGAV1_RESTRICT const dst) {
const uint8x16_t left = vld1q_u8(src);
const uint8x16_t right = vld1q_u8(src + 1);
vst1q_u8(dst, vrhaddq_u8(left, right));
}
template <int width>
-inline void IntraBlockCopyHorizontal(const uint8_t* src,
+inline void IntraBlockCopyHorizontal(const uint8_t* LIBGAV1_RESTRICT src,
const ptrdiff_t src_stride,
- const int height, uint8_t* dst,
+ const int height,
+ uint8_t* LIBGAV1_RESTRICT dst,
const ptrdiff_t dst_stride) {
const ptrdiff_t src_remainder_stride = src_stride - (width - 16);
const ptrdiff_t dst_remainder_stride = dst_stride - (width - 16);
@@ -2601,10 +2640,13 @@ inline void IntraBlockCopyHorizontal(const uint8_t* src,
}
void ConvolveIntraBlockCopyHorizontal_NEON(
- const void* const reference, const ptrdiff_t reference_stride,
- const int /*horizontal_filter_index*/, const int /*vertical_filter_index*/,
- const int /*subpixel_x*/, const int /*subpixel_y*/, const int width,
- const int height, void* const prediction, const ptrdiff_t pred_stride) {
+ const void* LIBGAV1_RESTRICT const reference,
+ const ptrdiff_t reference_stride, const int /*horizontal_filter_index*/,
+ const int /*vertical_filter_index*/, const int /*subpixel_x*/,
+ const int /*subpixel_y*/, const int width, const int height,
+ void* LIBGAV1_RESTRICT const prediction, const ptrdiff_t pred_stride) {
+ assert(width >= 4 && width <= kMaxSuperBlockSizeInPixels);
+ assert(height >= 4 && height <= kMaxSuperBlockSizeInPixels);
const auto* src = static_cast<const uint8_t*>(reference);
auto* dest = static_cast<uint8_t*>(prediction);
@@ -2630,7 +2672,7 @@ void ConvolveIntraBlockCopyHorizontal_NEON(
src += reference_stride;
dest += pred_stride;
} while (--y != 0);
- } else if (width == 4) {
+ } else { // width == 4
uint8x8_t left = vdup_n_u8(0);
uint8x8_t right = vdup_n_u8(0);
int y = height;
@@ -2650,34 +2692,14 @@ void ConvolveIntraBlockCopyHorizontal_NEON(
dest += pred_stride;
y -= 2;
} while (y != 0);
- } else {
- assert(width == 2);
- uint8x8_t left = vdup_n_u8(0);
- uint8x8_t right = vdup_n_u8(0);
- int y = height;
- do {
- left = Load2<0>(src, left);
- right = Load2<0>(src + 1, right);
- src += reference_stride;
- left = Load2<1>(src, left);
- right = Load2<1>(src + 1, right);
- src += reference_stride;
-
- const uint8x8_t result = vrhadd_u8(left, right);
-
- Store2<0>(dest, result);
- dest += pred_stride;
- Store2<1>(dest, result);
- dest += pred_stride;
- y -= 2;
- } while (y != 0);
}
}
template <int width>
-inline void IntraBlockCopyVertical(const uint8_t* src,
+inline void IntraBlockCopyVertical(const uint8_t* LIBGAV1_RESTRICT src,
const ptrdiff_t src_stride, const int height,
- uint8_t* dst, const ptrdiff_t dst_stride) {
+ uint8_t* LIBGAV1_RESTRICT dst,
+ const ptrdiff_t dst_stride) {
const ptrdiff_t src_remainder_stride = src_stride - (width - 16);
const ptrdiff_t dst_remainder_stride = dst_stride - (width - 16);
uint8x16_t row[8], below[8];
@@ -2764,11 +2786,13 @@ inline void IntraBlockCopyVertical(const uint8_t* src,
}
void ConvolveIntraBlockCopyVertical_NEON(
- const void* const reference, const ptrdiff_t reference_stride,
- const int /*horizontal_filter_index*/, const int /*vertical_filter_index*/,
- const int /*horizontal_filter_id*/, const int /*vertical_filter_id*/,
- const int width, const int height, void* const prediction,
- const ptrdiff_t pred_stride) {
+ const void* LIBGAV1_RESTRICT const reference,
+ const ptrdiff_t reference_stride, const int /*horizontal_filter_index*/,
+ const int /*vertical_filter_index*/, const int /*horizontal_filter_id*/,
+ const int /*vertical_filter_id*/, const int width, const int height,
+ void* LIBGAV1_RESTRICT const prediction, const ptrdiff_t pred_stride) {
+ assert(width >= 4 && width <= kMaxSuperBlockSizeInPixels);
+ assert(height >= 4 && height <= kMaxSuperBlockSizeInPixels);
const auto* src = static_cast<const uint8_t*>(reference);
auto* dest = static_cast<uint8_t*>(prediction);
@@ -2799,7 +2823,7 @@ void ConvolveIntraBlockCopyVertical_NEON(
row = below;
} while (--y != 0);
- } else if (width == 4) {
+ } else { // width == 4
uint8x8_t row = Load4(src);
uint8x8_t below = vdup_n_u8(0);
src += reference_stride;
@@ -2814,28 +2838,13 @@ void ConvolveIntraBlockCopyVertical_NEON(
row = below;
} while (--y != 0);
- } else {
- assert(width == 2);
- uint8x8_t row = Load2(src);
- uint8x8_t below = vdup_n_u8(0);
- src += reference_stride;
-
- int y = height;
- do {
- below = Load2<0>(src, below);
- src += reference_stride;
-
- Store2<0>(dest, vrhadd_u8(row, below));
- dest += pred_stride;
-
- row = below;
- } while (--y != 0);
}
}
template <int width>
-inline void IntraBlockCopy2D(const uint8_t* src, const ptrdiff_t src_stride,
- const int height, uint8_t* dst,
+inline void IntraBlockCopy2D(const uint8_t* LIBGAV1_RESTRICT src,
+ const ptrdiff_t src_stride, const int height,
+ uint8_t* LIBGAV1_RESTRICT dst,
const ptrdiff_t dst_stride) {
const ptrdiff_t src_remainder_stride = src_stride - (width - 8);
const ptrdiff_t dst_remainder_stride = dst_stride - (width - 8);
@@ -2996,11 +3005,13 @@ inline void IntraBlockCopy2D(const uint8_t* src, const ptrdiff_t src_stride,
}
void ConvolveIntraBlockCopy2D_NEON(
- const void* const reference, const ptrdiff_t reference_stride,
- const int /*horizontal_filter_index*/, const int /*vertical_filter_index*/,
- const int /*horizontal_filter_id*/, const int /*vertical_filter_id*/,
- const int width, const int height, void* const prediction,
- const ptrdiff_t pred_stride) {
+ const void* LIBGAV1_RESTRICT const reference,
+ const ptrdiff_t reference_stride, const int /*horizontal_filter_index*/,
+ const int /*vertical_filter_index*/, const int /*horizontal_filter_id*/,
+ const int /*vertical_filter_id*/, const int width, const int height,
+ void* LIBGAV1_RESTRICT const prediction, const ptrdiff_t pred_stride) {
+ assert(width >= 4 && width <= kMaxSuperBlockSizeInPixels);
+ assert(height >= 4 && height <= kMaxSuperBlockSizeInPixels);
const auto* src = static_cast<const uint8_t*>(reference);
auto* dest = static_cast<uint8_t*>(prediction);
// Note: allow vertical access to height + 1. Because this function is only
@@ -3017,7 +3028,7 @@ void ConvolveIntraBlockCopy2D_NEON(
IntraBlockCopy2D<16>(src, reference_stride, height, dest, pred_stride);
} else if (width == 8) {
IntraBlockCopy2D<8>(src, reference_stride, height, dest, pred_stride);
- } else if (width == 4) {
+ } else { // width == 4
uint8x8_t left = Load4(src);
uint8x8_t right = Load4(src + 1);
src += reference_stride;
@@ -3045,34 +3056,6 @@ void ConvolveIntraBlockCopy2D_NEON(
row = vget_high_u16(below);
y -= 2;
} while (y != 0);
- } else {
- uint8x8_t left = Load2(src);
- uint8x8_t right = Load2(src + 1);
- src += reference_stride;
-
- uint16x4_t row = vget_low_u16(vaddl_u8(left, right));
-
- int y = height;
- do {
- left = Load2<0>(src, left);
- right = Load2<0>(src + 1, right);
- src += reference_stride;
- left = Load2<2>(src, left);
- right = Load2<2>(src + 1, right);
- src += reference_stride;
-
- const uint16x8_t below = vaddl_u8(left, right);
-
- const uint8x8_t result = vrshrn_n_u16(
- vaddq_u16(vcombine_u16(row, vget_low_u16(below)), below), 2);
- Store2<0>(dest, result);
- dest += pred_stride;
- Store2<2>(dest, result);
- dest += pred_stride;
-
- row = vget_high_u16(below);
- y -= 2;
- } while (y != 0);
}
}
diff --git a/src/dsp/arm/convolve_neon.h b/src/dsp/arm/convolve_neon.h
index 948ef4d..9c67bc9 100644
--- a/src/dsp/arm/convolve_neon.h
+++ b/src/dsp/arm/convolve_neon.h
@@ -25,6 +25,7 @@ namespace dsp {
// Initializes Dsp::convolve. This function is not thread-safe.
void ConvolveInit_NEON();
+void ConvolveInit10bpp_NEON();
} // namespace dsp
} // namespace libgav1
@@ -45,6 +46,22 @@ void ConvolveInit_NEON();
#define LIBGAV1_Dsp8bpp_ConvolveScale2D LIBGAV1_CPU_NEON
#define LIBGAV1_Dsp8bpp_ConvolveCompoundScale2D LIBGAV1_CPU_NEON
+
+#define LIBGAV1_Dsp10bpp_ConvolveHorizontal LIBGAV1_CPU_NEON
+#define LIBGAV1_Dsp10bpp_ConvolveVertical LIBGAV1_CPU_NEON
+#define LIBGAV1_Dsp10bpp_Convolve2D LIBGAV1_CPU_NEON
+
+#define LIBGAV1_Dsp10bpp_ConvolveCompoundCopy LIBGAV1_CPU_NEON
+#define LIBGAV1_Dsp10bpp_ConvolveCompoundHorizontal LIBGAV1_CPU_NEON
+#define LIBGAV1_Dsp10bpp_ConvolveCompoundVertical LIBGAV1_CPU_NEON
+#define LIBGAV1_Dsp10bpp_ConvolveCompound2D LIBGAV1_CPU_NEON
+
+#define LIBGAV1_Dsp10bpp_ConvolveIntraBlockCopyHorizontal LIBGAV1_CPU_NEON
+#define LIBGAV1_Dsp10bpp_ConvolveIntraBlockCopyVertical LIBGAV1_CPU_NEON
+#define LIBGAV1_Dsp10bpp_ConvolveIntraBlockCopy2D LIBGAV1_CPU_NEON
+
+#define LIBGAV1_Dsp10bpp_ConvolveScale2D LIBGAV1_CPU_NEON
+#define LIBGAV1_Dsp10bpp_ConvolveCompoundScale2D LIBGAV1_CPU_NEON
#endif // LIBGAV1_ENABLE_NEON
#endif // LIBGAV1_SRC_DSP_ARM_CONVOLVE_NEON_H_
diff --git a/src/dsp/arm/distance_weighted_blend_neon.cc b/src/dsp/arm/distance_weighted_blend_neon.cc
index a0cd0ac..7d287c8 100644
--- a/src/dsp/arm/distance_weighted_blend_neon.cc
+++ b/src/dsp/arm/distance_weighted_blend_neon.cc
@@ -52,11 +52,10 @@ inline int16x8_t ComputeWeightedAverage8(const int16x8_t pred0,
}
template <int width, int height>
-inline void DistanceWeightedBlendSmall_NEON(const int16_t* prediction_0,
- const int16_t* prediction_1,
- const int16x4_t weights[2],
- void* const dest,
- const ptrdiff_t dest_stride) {
+inline void DistanceWeightedBlendSmall_NEON(
+ const int16_t* LIBGAV1_RESTRICT prediction_0,
+ const int16_t* LIBGAV1_RESTRICT prediction_1, const int16x4_t weights[2],
+ void* LIBGAV1_RESTRICT const dest, const ptrdiff_t dest_stride) {
auto* dst = static_cast<uint8_t*>(dest);
constexpr int step = 16 / width;
@@ -94,12 +93,11 @@ inline void DistanceWeightedBlendSmall_NEON(const int16_t* prediction_0,
}
}
-inline void DistanceWeightedBlendLarge_NEON(const int16_t* prediction_0,
- const int16_t* prediction_1,
- const int16x4_t weights[2],
- const int width, const int height,
- void* const dest,
- const ptrdiff_t dest_stride) {
+inline void DistanceWeightedBlendLarge_NEON(
+ const int16_t* LIBGAV1_RESTRICT prediction_0,
+ const int16_t* LIBGAV1_RESTRICT prediction_1, const int16x4_t weights[2],
+ const int width, const int height, void* LIBGAV1_RESTRICT const dest,
+ const ptrdiff_t dest_stride) {
auto* dst = static_cast<uint8_t*>(dest);
int y = height;
@@ -127,12 +125,11 @@ inline void DistanceWeightedBlendLarge_NEON(const int16_t* prediction_0,
} while (--y != 0);
}
-inline void DistanceWeightedBlend_NEON(const void* prediction_0,
- const void* prediction_1,
- const uint8_t weight_0,
- const uint8_t weight_1, const int width,
- const int height, void* const dest,
- const ptrdiff_t dest_stride) {
+inline void DistanceWeightedBlend_NEON(
+ const void* LIBGAV1_RESTRICT prediction_0,
+ const void* LIBGAV1_RESTRICT prediction_1, const uint8_t weight_0,
+ const uint8_t weight_1, const int width, const int height,
+ void* LIBGAV1_RESTRICT const dest, const ptrdiff_t dest_stride) {
const auto* pred_0 = static_cast<const int16_t*>(prediction_0);
const auto* pred_1 = static_cast<const int16_t*>(prediction_1);
int16x4_t weights[2] = {vdup_n_s16(weight_0), vdup_n_s16(weight_1)};
@@ -267,11 +264,12 @@ inline uint16x4x4_t LoadU16x4_x4(uint16_t const* ptr) {
return x;
}
-void DistanceWeightedBlend_NEON(const void* prediction_0,
- const void* prediction_1,
+void DistanceWeightedBlend_NEON(const void* LIBGAV1_RESTRICT prediction_0,
+ const void* LIBGAV1_RESTRICT prediction_1,
const uint8_t weight_0, const uint8_t weight_1,
const int width, const int height,
- void* const dest, const ptrdiff_t dest_stride) {
+ void* LIBGAV1_RESTRICT const dest,
+ const ptrdiff_t dest_stride) {
const auto* pred_0 = static_cast<const uint16_t*>(prediction_0);
const auto* pred_1 = static_cast<const uint16_t*>(prediction_1);
auto* dst = static_cast<uint16_t*>(dest);
diff --git a/src/dsp/arm/film_grain_neon.cc b/src/dsp/arm/film_grain_neon.cc
index 8ee3745..0b1b481 100644
--- a/src/dsp/arm/film_grain_neon.cc
+++ b/src/dsp/arm/film_grain_neon.cc
@@ -34,6 +34,7 @@
#include "src/utils/common.h"
#include "src/utils/compiler_attributes.h"
#include "src/utils/logging.h"
+#include "src/utils/memory.h"
namespace libgav1 {
namespace dsp {
@@ -51,6 +52,12 @@ inline int16x8_t GetSignedSource8(const uint8_t* src) {
return ZeroExtend(vld1_u8(src));
}
+inline int16x8_t GetSignedSource8Msan(const uint8_t* src, int /*valid_range*/) {
+ // TODO(b/194217060): restore |valid_range| usage after correcting call sites
+ // causing test vector failures.
+ return ZeroExtend(Load1MsanU8(src, 0));
+}
+
inline void StoreUnsigned8(uint8_t* dest, const uint16x8_t data) {
vst1_u8(dest, vmovn_u16(data));
}
@@ -62,6 +69,13 @@ inline int16x8_t GetSignedSource8(const uint16_t* src) {
return vreinterpretq_s16_u16(vld1q_u16(src));
}
+inline int16x8_t GetSignedSource8Msan(const uint16_t* src,
+ int /*valid_range*/) {
+ // TODO(b/194217060): restore |valid_range| usage after correcting call sites
+ // causing test vector failures.
+ return vreinterpretq_s16_u16(Load1QMsanU16(src, 0));
+}
+
inline void StoreUnsigned8(uint16_t* dest, const uint16x8_t data) {
vst1q_u16(dest, data);
}
@@ -84,8 +98,10 @@ inline int32x4x2_t AccumulateWeightedGrain(const int16x8_t grain_lo,
// compute pixels that come after in the row, we have to finish the calculations
// one at a time.
template <int bitdepth, int auto_regression_coeff_lag, int lane>
-inline void WriteFinalAutoRegression(int8_t* grain_cursor, int32x4x2_t sum,
- const int8_t* coeffs, int pos, int shift) {
+inline void WriteFinalAutoRegression(int8_t* LIBGAV1_RESTRICT grain_cursor,
+ int32x4x2_t sum,
+ const int8_t* LIBGAV1_RESTRICT coeffs,
+ int pos, int shift) {
int32_t result = vgetq_lane_s32(sum.val[lane >> 2], lane & 3);
for (int delta_col = -auto_regression_coeff_lag; delta_col < 0; ++delta_col) {
@@ -99,8 +115,10 @@ inline void WriteFinalAutoRegression(int8_t* grain_cursor, int32x4x2_t sum,
#if LIBGAV1_MAX_BITDEPTH >= 10
template <int bitdepth, int auto_regression_coeff_lag, int lane>
-inline void WriteFinalAutoRegression(int16_t* grain_cursor, int32x4x2_t sum,
- const int8_t* coeffs, int pos, int shift) {
+inline void WriteFinalAutoRegression(int16_t* LIBGAV1_RESTRICT grain_cursor,
+ int32x4x2_t sum,
+ const int8_t* LIBGAV1_RESTRICT coeffs,
+ int pos, int shift) {
int32_t result = vgetq_lane_s32(sum.val[lane >> 2], lane & 3);
for (int delta_col = -auto_regression_coeff_lag; delta_col < 0; ++delta_col) {
@@ -117,12 +135,11 @@ inline void WriteFinalAutoRegression(int16_t* grain_cursor, int32x4x2_t sum,
// compute pixels that come after in the row, we have to finish the calculations
// one at a time.
template <int bitdepth, int auto_regression_coeff_lag, int lane>
-inline void WriteFinalAutoRegressionChroma(int8_t* u_grain_cursor,
- int8_t* v_grain_cursor,
- int32x4x2_t sum_u, int32x4x2_t sum_v,
- const int8_t* coeffs_u,
- const int8_t* coeffs_v, int pos,
- int shift) {
+inline void WriteFinalAutoRegressionChroma(
+ int8_t* LIBGAV1_RESTRICT u_grain_cursor,
+ int8_t* LIBGAV1_RESTRICT v_grain_cursor, int32x4x2_t sum_u,
+ int32x4x2_t sum_v, const int8_t* LIBGAV1_RESTRICT coeffs_u,
+ const int8_t* LIBGAV1_RESTRICT coeffs_v, int pos, int shift) {
WriteFinalAutoRegression<bitdepth, auto_regression_coeff_lag, lane>(
u_grain_cursor, sum_u, coeffs_u, pos, shift);
WriteFinalAutoRegression<bitdepth, auto_regression_coeff_lag, lane>(
@@ -131,12 +148,11 @@ inline void WriteFinalAutoRegressionChroma(int8_t* u_grain_cursor,
#if LIBGAV1_MAX_BITDEPTH >= 10
template <int bitdepth, int auto_regression_coeff_lag, int lane>
-inline void WriteFinalAutoRegressionChroma(int16_t* u_grain_cursor,
- int16_t* v_grain_cursor,
- int32x4x2_t sum_u, int32x4x2_t sum_v,
- const int8_t* coeffs_u,
- const int8_t* coeffs_v, int pos,
- int shift) {
+inline void WriteFinalAutoRegressionChroma(
+ int16_t* LIBGAV1_RESTRICT u_grain_cursor,
+ int16_t* LIBGAV1_RESTRICT v_grain_cursor, int32x4x2_t sum_u,
+ int32x4x2_t sum_v, const int8_t* LIBGAV1_RESTRICT coeffs_u,
+ const int8_t* LIBGAV1_RESTRICT coeffs_v, int pos, int shift) {
WriteFinalAutoRegression<bitdepth, auto_regression_coeff_lag, lane>(
u_grain_cursor, sum_u, coeffs_u, pos, shift);
WriteFinalAutoRegression<bitdepth, auto_regression_coeff_lag, lane>(
@@ -181,6 +197,20 @@ inline uint16x8_t GetAverageLuma(const uint8_t* const luma, int subsampling_x) {
return vmovl_u8(vld1_u8(luma));
}
+inline uint16x8_t GetAverageLumaMsan(const uint8_t* const luma,
+ int subsampling_x, int /*valid_range*/) {
+ if (subsampling_x != 0) {
+ // TODO(b/194217060): restore |valid_range| usage after correcting call
+ // sites causing test vector failures.
+ const uint8x16_t src = Load1QMsanU8(luma, 0);
+
+ return vrshrq_n_u16(vpaddlq_u8(src), 1);
+ }
+ // TODO(b/194217060): restore |valid_range| usage after correcting call sites
+ // causing test vector failures.
+ return vmovl_u8(Load1MsanU8(luma, 0));
+}
+
#if LIBGAV1_MAX_BITDEPTH >= 10
// Computes subsampled luma for use with chroma, by averaging in the x direction
// or y direction when applicable.
@@ -220,16 +250,28 @@ inline uint16x8_t GetAverageLuma(const uint16_t* const luma,
}
return vld1q_u16(luma);
}
+
+inline uint16x8_t GetAverageLumaMsan(const uint16_t* const luma,
+ int subsampling_x, int /*valid_range*/) {
+ if (subsampling_x != 0) {
+ // TODO(b/194217060): restore |valid_range| usage after correcting call
+ // sites causing test vector failures.
+ const uint16x8x2_t src = Load2QMsanU16(luma, 0);
+ return vrhaddq_u16(src.val[0], src.val[1]);
+ }
+ // TODO(b/194217060): restore |valid_range| usage after correcting call sites
+ // causing test vector failures.
+ return Load1QMsanU16(luma, 0);
+}
#endif // LIBGAV1_MAX_BITDEPTH >= 10
template <int bitdepth, typename GrainType, int auto_regression_coeff_lag,
bool use_luma>
-void ApplyAutoRegressiveFilterToChromaGrains_NEON(const FilmGrainParams& params,
- const void* luma_grain_buffer,
- int subsampling_x,
- int subsampling_y,
- void* u_grain_buffer,
- void* v_grain_buffer) {
+void ApplyAutoRegressiveFilterToChromaGrains_NEON(
+ const FilmGrainParams& params,
+ const void* LIBGAV1_RESTRICT luma_grain_buffer, int subsampling_x,
+ int subsampling_y, void* LIBGAV1_RESTRICT u_grain_buffer,
+ void* LIBGAV1_RESTRICT v_grain_buffer) {
static_assert(auto_regression_coeff_lag <= 3, "Invalid autoregression lag.");
const auto* luma_grain = static_cast<const GrainType*>(luma_grain_buffer);
auto* u_grain = static_cast<GrainType*>(u_grain_buffer);
@@ -558,49 +600,93 @@ void ApplyAutoRegressiveFilterToLumaGrain_NEON(const FilmGrainParams& params,
#undef ACCUMULATE_WEIGHTED_GRAIN
}
-void InitializeScalingLookupTable_NEON(
- int num_points, const uint8_t point_value[], const uint8_t point_scaling[],
- uint8_t scaling_lut[kScalingLookupTableSize]) {
+template <int bitdepth>
+void InitializeScalingLookupTable_NEON(int num_points,
+ const uint8_t point_value[],
+ const uint8_t point_scaling[],
+ int16_t* scaling_lut,
+ const int scaling_lut_length) {
+ static_assert(bitdepth < kBitdepth12,
+ "NEON Scaling lookup table only supports 8bpp and 10bpp.");
if (num_points == 0) {
- memset(scaling_lut, 0, sizeof(scaling_lut[0]) * kScalingLookupTableSize);
+ memset(scaling_lut, 0, sizeof(scaling_lut[0]) * scaling_lut_length);
return;
}
- static_assert(sizeof(scaling_lut[0]) == 1, "");
- memset(scaling_lut, point_scaling[0], point_value[0]);
- const uint32x4_t steps = vmovl_u16(vcreate_u16(0x0003000200010000));
- const uint32x4_t offset = vdupq_n_u32(32768);
+ static_assert(sizeof(scaling_lut[0]) == 2, "");
+ Memset(scaling_lut, point_scaling[0],
+ std::max(static_cast<int>(point_value[0]), 1)
+ << (bitdepth - kBitdepth8));
+ const int32x4_t steps = vmovl_s16(vcreate_s16(0x0003000200010000));
+ const int32x4_t rounding = vdupq_n_s32(32768);
for (int i = 0; i < num_points - 1; ++i) {
const int delta_y = point_scaling[i + 1] - point_scaling[i];
const int delta_x = point_value[i + 1] - point_value[i];
+ // |delta| corresponds to b, for the function y = a + b*x.
const int delta = delta_y * ((65536 + (delta_x >> 1)) / delta_x);
const int delta4 = delta << 2;
- const uint8x8_t base_point = vdup_n_u8(point_scaling[i]);
- uint32x4_t upscaled_points0 = vmlaq_n_u32(offset, steps, delta);
- const uint32x4_t line_increment4 = vdupq_n_u32(delta4);
+ // vmull_n_u16 will not work here because |delta| typically exceeds the
+ // range of uint16_t.
+ int32x4_t upscaled_points0 = vmlaq_n_s32(rounding, steps, delta);
+ const int32x4_t line_increment4 = vdupq_n_s32(delta4);
// Get the second set of 4 points by adding 4 steps to the first set.
- uint32x4_t upscaled_points1 = vaddq_u32(upscaled_points0, line_increment4);
+ int32x4_t upscaled_points1 = vaddq_s32(upscaled_points0, line_increment4);
// We obtain the next set of 8 points by adding 8 steps to each of the
// current 8 points.
- const uint32x4_t line_increment8 = vshlq_n_u32(line_increment4, 1);
+ const int32x4_t line_increment8 = vshlq_n_s32(line_increment4, 1);
+ const int16x8_t base_point = vdupq_n_s16(point_scaling[i]);
int x = 0;
+ // Derive and write 8 values (or 32 values, for 10bpp).
do {
- const uint16x4_t interp_points0 = vshrn_n_u32(upscaled_points0, 16);
- const uint16x4_t interp_points1 = vshrn_n_u32(upscaled_points1, 16);
- const uint8x8_t interp_points =
- vmovn_u16(vcombine_u16(interp_points0, interp_points1));
+ const int16x4_t interp_points0 = vshrn_n_s32(upscaled_points0, 16);
+ const int16x4_t interp_points1 = vshrn_n_s32(upscaled_points1, 16);
+ const int16x8_t interp_points =
+ vcombine_s16(interp_points0, interp_points1);
// The spec guarantees that the max value of |point_value[i]| + x is 255.
- // Writing 8 bytes starting at the final table byte, leaves 7 bytes of
+ // Writing 8 values starting at the final table byte, leaves 7 values of
// required padding.
- vst1_u8(&scaling_lut[point_value[i] + x],
- vadd_u8(interp_points, base_point));
- upscaled_points0 = vaddq_u32(upscaled_points0, line_increment8);
- upscaled_points1 = vaddq_u32(upscaled_points1, line_increment8);
+ const int16x8_t full_interp = vaddq_s16(interp_points, base_point);
+ const int x_base = (point_value[i] + x) << (bitdepth - kBitdepth8);
+ if (bitdepth == kBitdepth10) {
+ const int16x8_t next_val = vaddq_s16(
+ base_point,
+ vdupq_n_s16((vgetq_lane_s32(upscaled_points1, 3) + delta) >> 16));
+ const int16x8_t start = full_interp;
+ const int16x8_t end = vextq_s16(full_interp, next_val, 1);
+ // lut[i << 2] = start;
+ // lut[(i << 2) + 1] = start + RightShiftWithRounding(start - end, 2)
+ // lut[(i << 2) + 2] = start +
+ // RightShiftWithRounding(2 * (start - end), 2)
+ // lut[(i << 2) + 3] = start +
+ // RightShiftWithRounding(3 * (start - end), 2)
+ const int16x8_t delta = vsubq_s16(end, start);
+ const int16x8_t double_delta = vshlq_n_s16(delta, 1);
+ const int16x8_t delta2 = vrshrq_n_s16(double_delta, 2);
+ const int16x8_t delta3 =
+ vrshrq_n_s16(vaddq_s16(delta, double_delta), 2);
+ const int16x8x4_t result = {
+ start, vaddq_s16(start, vrshrq_n_s16(delta, 2)),
+ vaddq_s16(start, delta2), vaddq_s16(start, delta3)};
+ vst4q_s16(&scaling_lut[x_base], result);
+ } else {
+ vst1q_s16(&scaling_lut[x_base], full_interp);
+ }
+ upscaled_points0 = vaddq_s32(upscaled_points0, line_increment8);
+ upscaled_points1 = vaddq_s32(upscaled_points1, line_increment8);
x += 8;
} while (x < delta_x);
}
- const uint8_t last_point_value = point_value[num_points - 1];
- memset(&scaling_lut[last_point_value], point_scaling[num_points - 1],
- kScalingLookupTableSize - last_point_value);
+ const int16_t last_point_value = point_value[num_points - 1];
+ const int x_base = last_point_value << (bitdepth - kBitdepth8);
+ Memset(&scaling_lut[x_base], point_scaling[num_points - 1],
+ scaling_lut_length - x_base);
+ if (bitdepth == kBitdepth10 && x_base > 0) {
+ const int start = scaling_lut[x_base - 4];
+ const int end = point_scaling[num_points - 1];
+ const int delta = end - start;
+ scaling_lut[x_base - 3] = start + RightShiftWithRounding(delta, 2);
+ scaling_lut[x_base - 2] = start + RightShiftWithRounding(2 * delta, 2);
+ scaling_lut[x_base - 1] = start + RightShiftWithRounding(3 * delta, 2);
+ }
}
inline int16x8_t Clip3(const int16x8_t value, const int16x8_t low,
@@ -611,86 +697,38 @@ inline int16x8_t Clip3(const int16x8_t value, const int16x8_t low,
template <int bitdepth, typename Pixel>
inline int16x8_t GetScalingFactors(
- const uint8_t scaling_lut[kScalingLookupTableSize], const Pixel* source) {
+ const int16_t scaling_lut[kScalingLookupTableSize], const Pixel* source) {
int16_t start_vals[8];
- if (bitdepth == 8) {
- start_vals[0] = scaling_lut[source[0]];
- start_vals[1] = scaling_lut[source[1]];
- start_vals[2] = scaling_lut[source[2]];
- start_vals[3] = scaling_lut[source[3]];
- start_vals[4] = scaling_lut[source[4]];
- start_vals[5] = scaling_lut[source[5]];
- start_vals[6] = scaling_lut[source[6]];
- start_vals[7] = scaling_lut[source[7]];
- return vld1q_s16(start_vals);
+ static_assert(bitdepth <= kBitdepth10,
+ "NEON Film Grain is not yet implemented for 12bpp.");
+ for (int i = 0; i < 8; ++i) {
+ assert(source[i] < kScalingLookupTableSize << (bitdepth - 2));
+ start_vals[i] = scaling_lut[source[i]];
}
- int16_t end_vals[8];
- // TODO(petersonab): Precompute this into a larger table for direct lookups.
- int index = source[0] >> 2;
- start_vals[0] = scaling_lut[index];
- end_vals[0] = scaling_lut[index + 1];
- index = source[1] >> 2;
- start_vals[1] = scaling_lut[index];
- end_vals[1] = scaling_lut[index + 1];
- index = source[2] >> 2;
- start_vals[2] = scaling_lut[index];
- end_vals[2] = scaling_lut[index + 1];
- index = source[3] >> 2;
- start_vals[3] = scaling_lut[index];
- end_vals[3] = scaling_lut[index + 1];
- index = source[4] >> 2;
- start_vals[4] = scaling_lut[index];
- end_vals[4] = scaling_lut[index + 1];
- index = source[5] >> 2;
- start_vals[5] = scaling_lut[index];
- end_vals[5] = scaling_lut[index + 1];
- index = source[6] >> 2;
- start_vals[6] = scaling_lut[index];
- end_vals[6] = scaling_lut[index + 1];
- index = source[7] >> 2;
- start_vals[7] = scaling_lut[index];
- end_vals[7] = scaling_lut[index + 1];
- const int16x8_t start = vld1q_s16(start_vals);
- const int16x8_t end = vld1q_s16(end_vals);
- int16x8_t remainder = GetSignedSource8(source);
- remainder = vandq_s16(remainder, vdupq_n_s16(3));
- const int16x8_t delta = vmulq_s16(vsubq_s16(end, start), remainder);
- return vaddq_s16(start, vrshrq_n_s16(delta, 2));
+ return vld1q_s16(start_vals);
}
+template <int bitdepth>
inline int16x8_t ScaleNoise(const int16x8_t noise, const int16x8_t scaling,
const int16x8_t scaling_shift_vect) {
- const int16x8_t upscaled_noise = vmulq_s16(noise, scaling);
- return vrshlq_s16(upscaled_noise, scaling_shift_vect);
-}
-
-#if LIBGAV1_MAX_BITDEPTH >= 10
-inline int16x8_t ScaleNoise(const int16x8_t noise, const int16x8_t scaling,
- const int32x4_t scaling_shift_vect) {
- // TODO(petersonab): Try refactoring scaling lookup table to int16_t and
- // upscaling by 7 bits to permit high half multiply. This would eliminate
- // the intermediate 32x4 registers. Also write the averaged values directly
- // into the table so it doesn't have to be done for every pixel in
- // the frame.
- const int32x4_t upscaled_noise_lo =
- vmull_s16(vget_low_s16(noise), vget_low_s16(scaling));
- const int32x4_t upscaled_noise_hi =
- vmull_s16(vget_high_s16(noise), vget_high_s16(scaling));
- const int16x4_t noise_lo =
- vmovn_s32(vrshlq_s32(upscaled_noise_lo, scaling_shift_vect));
- const int16x4_t noise_hi =
- vmovn_s32(vrshlq_s32(upscaled_noise_hi, scaling_shift_vect));
- return vcombine_s16(noise_lo, noise_hi);
+ if (bitdepth == kBitdepth8) {
+ const int16x8_t upscaled_noise = vmulq_s16(noise, scaling);
+ return vrshlq_s16(upscaled_noise, scaling_shift_vect);
+ }
+ // Scaling shift is in the range [8, 11]. The doubling multiply returning high
+ // half is equivalent to a right shift by 15, so |scaling_shift_vect| should
+ // provide a left shift equal to 15 - s, where s is the original shift
+ // parameter.
+ const int16x8_t scaling_up = vshlq_s16(scaling, scaling_shift_vect);
+ return vqrdmulhq_s16(noise, scaling_up);
}
-#endif // LIBGAV1_MAX_BITDEPTH >= 10
template <int bitdepth, typename GrainType, typename Pixel>
void BlendNoiseWithImageLuma_NEON(
- const void* noise_image_ptr, int min_value, int max_luma, int scaling_shift,
- int width, int height, int start_height,
- const uint8_t scaling_lut_y[kScalingLookupTableSize],
- const void* source_plane_y, ptrdiff_t source_stride_y, void* dest_plane_y,
- ptrdiff_t dest_stride_y) {
+ const void* LIBGAV1_RESTRICT noise_image_ptr, int min_value, int max_luma,
+ int scaling_shift, int width, int height, int start_height,
+ const int16_t* scaling_lut_y, const void* source_plane_y,
+ ptrdiff_t source_stride_y, void* dest_plane_y, ptrdiff_t dest_stride_y) {
const auto* noise_image =
static_cast<const Array2D<GrainType>*>(noise_image_ptr);
const auto* in_y_row = static_cast<const Pixel*>(source_plane_y);
@@ -702,10 +740,8 @@ void BlendNoiseWithImageLuma_NEON(
// In 8bpp, the maximum upscaled noise is 127*255 = 0x7E81, which is safe
// for 16 bit signed integers. In higher bitdepths, however, we have to
// expand to 32 to protect the sign bit.
- const int16x8_t scaling_shift_vect16 = vdupq_n_s16(-scaling_shift);
-#if LIBGAV1_MAX_BITDEPTH >= 10
- const int32x4_t scaling_shift_vect32 = vdupq_n_s32(-scaling_shift);
-#endif // LIBGAV1_MAX_BITDEPTH >= 10
+ const int16x8_t scaling_shift_vect = vdupq_n_s16(
+ (bitdepth == kBitdepth10) ? 15 - scaling_shift : -scaling_shift);
int y = 0;
do {
@@ -713,25 +749,35 @@ void BlendNoiseWithImageLuma_NEON(
do {
// This operation on the unsigned input is safe in 8bpp because the vector
// is widened before it is reinterpreted.
- const int16x8_t orig = GetSignedSource8(&in_y_row[x]);
- const int16x8_t scaling =
+ const int16x8_t orig0 = GetSignedSource8(&in_y_row[x]);
+ const int16x8_t scaling0 =
GetScalingFactors<bitdepth, Pixel>(scaling_lut_y, &in_y_row[x]);
int16x8_t noise =
GetSignedSource8(&(noise_image[kPlaneY][y + start_height][x]));
- if (bitdepth == 8) {
- noise = ScaleNoise(noise, scaling, scaling_shift_vect16);
- } else {
-#if LIBGAV1_MAX_BITDEPTH >= 10
- noise = ScaleNoise(noise, scaling, scaling_shift_vect32);
-#endif // LIBGAV1_MAX_BITDEPTH >= 10
- }
- const int16x8_t combined = vaddq_s16(orig, noise);
+ noise = ScaleNoise<bitdepth>(noise, scaling0, scaling_shift_vect);
+ const int16x8_t combined0 = vaddq_s16(orig0, noise);
+ // In 8bpp, when params_.clip_to_restricted_range == false, we can replace
+ // clipping with vqmovun_s16, but it's not likely to be worth copying the
+ // function for just that case, though the gain would be very small.
+ StoreUnsigned8(&out_y_row[x],
+ vreinterpretq_u16_s16(Clip3(combined0, floor, ceiling)));
+ x += 8;
+
+ // This operation on the unsigned input is safe in 8bpp because the vector
+ // is widened before it is reinterpreted.
+ const int16x8_t orig1 = GetSignedSource8(&in_y_row[x]);
+ const int16x8_t scaling1 = GetScalingFactors<bitdepth, Pixel>(
+ scaling_lut_y, &in_y_row[std::min(x, width)]);
+ noise = GetSignedSource8(&(noise_image[kPlaneY][y + start_height][x]));
+
+ noise = ScaleNoise<bitdepth>(noise, scaling1, scaling_shift_vect);
+ const int16x8_t combined1 = vaddq_s16(orig1, noise);
// In 8bpp, when params_.clip_to_restricted_range == false, we can replace
// clipping with vqmovun_s16, but it's not likely to be worth copying the
// function for just that case, though the gain would be very small.
StoreUnsigned8(&out_y_row[x],
- vreinterpretq_u16_s16(Clip3(combined, floor, ceiling)));
+ vreinterpretq_u16_s16(Clip3(combined1, floor, ceiling)));
x += 8;
} while (x < width);
in_y_row += source_stride_y;
@@ -741,20 +787,16 @@ void BlendNoiseWithImageLuma_NEON(
template <int bitdepth, typename GrainType, typename Pixel>
inline int16x8_t BlendChromaValsWithCfl(
- const Pixel* average_luma_buffer,
- const uint8_t scaling_lut[kScalingLookupTableSize],
- const Pixel* chroma_cursor, const GrainType* noise_image_cursor,
- const int16x8_t scaling_shift_vect16,
- const int32x4_t scaling_shift_vect32) {
+ const Pixel* LIBGAV1_RESTRICT average_luma_buffer,
+ const int16_t* LIBGAV1_RESTRICT scaling_lut,
+ const Pixel* LIBGAV1_RESTRICT chroma_cursor,
+ const GrainType* LIBGAV1_RESTRICT noise_image_cursor,
+ const int16x8_t scaling_shift_vect) {
const int16x8_t scaling =
GetScalingFactors<bitdepth, Pixel>(scaling_lut, average_luma_buffer);
const int16x8_t orig = GetSignedSource8(chroma_cursor);
int16x8_t noise = GetSignedSource8(noise_image_cursor);
- if (bitdepth == 8) {
- noise = ScaleNoise(noise, scaling, scaling_shift_vect16);
- } else {
- noise = ScaleNoise(noise, scaling, scaling_shift_vect32);
- }
+ noise = ScaleNoise<bitdepth>(noise, scaling, scaling_shift_vect);
return vaddq_s16(orig, noise);
}
@@ -763,10 +805,10 @@ LIBGAV1_ALWAYS_INLINE void BlendChromaPlaneWithCfl_NEON(
const Array2D<GrainType>& noise_image, int min_value, int max_chroma,
int width, int height, int start_height, int subsampling_x,
int subsampling_y, int scaling_shift,
- const uint8_t scaling_lut[kScalingLookupTableSize], const Pixel* in_y_row,
- ptrdiff_t source_stride_y, const Pixel* in_chroma_row,
- ptrdiff_t source_stride_chroma, Pixel* out_chroma_row,
- ptrdiff_t dest_stride) {
+ const int16_t* LIBGAV1_RESTRICT scaling_lut,
+ const Pixel* LIBGAV1_RESTRICT in_y_row, ptrdiff_t source_stride_y,
+ const Pixel* in_chroma_row, ptrdiff_t source_stride_chroma,
+ Pixel* out_chroma_row, ptrdiff_t dest_stride) {
const int16x8_t floor = vdupq_n_s16(min_value);
const int16x8_t ceiling = vdupq_n_s16(max_chroma);
Pixel luma_buffer[16];
@@ -774,8 +816,8 @@ LIBGAV1_ALWAYS_INLINE void BlendChromaPlaneWithCfl_NEON(
// In 8bpp, the maximum upscaled noise is 127*255 = 0x7E81, which is safe
// for 16 bit signed integers. In higher bitdepths, however, we have to
// expand to 32 to protect the sign bit.
- const int16x8_t scaling_shift_vect16 = vdupq_n_s16(-scaling_shift);
- const int32x4_t scaling_shift_vect32 = vdupq_n_s32(-scaling_shift);
+ const int16x8_t scaling_shift_vect = vdupq_n_s16(
+ (bitdepth == kBitdepth10) ? 15 - scaling_shift : -scaling_shift);
const int chroma_height = (height + subsampling_y) >> subsampling_y;
const int chroma_width = (width + subsampling_x) >> subsampling_x;
@@ -791,8 +833,6 @@ LIBGAV1_ALWAYS_INLINE void BlendChromaPlaneWithCfl_NEON(
int x = 0;
do {
const int luma_x = x << subsampling_x;
- // TODO(petersonab): Consider specializing by subsampling_x. In the 444
- // case &in_y_row[x] can be passed to GetScalingFactors directly.
const uint16x8_t average_luma =
GetAverageLuma(&in_y_row[luma_x], subsampling_x);
StoreUnsigned8(average_luma_buffer, average_luma);
@@ -800,8 +840,7 @@ LIBGAV1_ALWAYS_INLINE void BlendChromaPlaneWithCfl_NEON(
const int16x8_t blended =
BlendChromaValsWithCfl<bitdepth, GrainType, Pixel>(
average_luma_buffer, scaling_lut, &in_chroma_row[x],
- &(noise_image[y + start_height][x]), scaling_shift_vect16,
- scaling_shift_vect32);
+ &(noise_image[y + start_height][x]), scaling_shift_vect);
// In 8bpp, when params_.clip_to_restricted_range == false, we can replace
// clipping with vqmovun_s16, but it's not likely to be worth copying the
@@ -813,18 +852,19 @@ LIBGAV1_ALWAYS_INLINE void BlendChromaPlaneWithCfl_NEON(
if (x < chroma_width) {
const int luma_x = x << subsampling_x;
- const int valid_range = width - luma_x;
- memcpy(luma_buffer, &in_y_row[luma_x], valid_range * sizeof(in_y_row[0]));
- luma_buffer[valid_range] = in_y_row[width - 1];
- const uint16x8_t average_luma =
- GetAverageLuma(luma_buffer, subsampling_x);
+ const int valid_range_pixels = width - luma_x;
+ const int valid_range_bytes = valid_range_pixels * sizeof(in_y_row[0]);
+ memcpy(luma_buffer, &in_y_row[luma_x], valid_range_bytes);
+ luma_buffer[valid_range_pixels] = in_y_row[width - 1];
+ const uint16x8_t average_luma = GetAverageLumaMsan(
+ luma_buffer, subsampling_x, valid_range_bytes + sizeof(in_y_row[0]));
+
StoreUnsigned8(average_luma_buffer, average_luma);
const int16x8_t blended =
BlendChromaValsWithCfl<bitdepth, GrainType, Pixel>(
average_luma_buffer, scaling_lut, &in_chroma_row[x],
- &(noise_image[y + start_height][x]), scaling_shift_vect16,
- scaling_shift_vect32);
+ &(noise_image[y + start_height][x]), scaling_shift_vect);
// In 8bpp, when params_.clip_to_restricted_range == false, we can replace
// clipping with vqmovun_s16, but it's not likely to be worth copying the
// function for just that case.
@@ -842,11 +882,11 @@ LIBGAV1_ALWAYS_INLINE void BlendChromaPlaneWithCfl_NEON(
// This further implies that scaling_lut_u == scaling_lut_v == scaling_lut_y.
template <int bitdepth, typename GrainType, typename Pixel>
void BlendNoiseWithImageChromaWithCfl_NEON(
- Plane plane, const FilmGrainParams& params, const void* noise_image_ptr,
- int min_value, int max_chroma, int width, int height, int start_height,
- int subsampling_x, int subsampling_y,
- const uint8_t scaling_lut[kScalingLookupTableSize],
- const void* source_plane_y, ptrdiff_t source_stride_y,
+ Plane plane, const FilmGrainParams& params,
+ const void* LIBGAV1_RESTRICT noise_image_ptr, int min_value, int max_chroma,
+ int width, int height, int start_height, int subsampling_x,
+ int subsampling_y, const int16_t* LIBGAV1_RESTRICT scaling_lut,
+ const void* LIBGAV1_RESTRICT source_plane_y, ptrdiff_t source_stride_y,
const void* source_plane_uv, ptrdiff_t source_stride_uv,
void* dest_plane_uv, ptrdiff_t dest_stride_uv) {
const auto* noise_image =
@@ -872,12 +912,11 @@ namespace low_bitdepth {
namespace {
inline int16x8_t BlendChromaValsNoCfl(
- const uint8_t scaling_lut[kScalingLookupTableSize],
- const uint8_t* chroma_cursor, const int8_t* noise_image_cursor,
+ const int16_t* LIBGAV1_RESTRICT scaling_lut, const int16x8_t orig,
+ const int8_t* LIBGAV1_RESTRICT noise_image_cursor,
const int16x8_t& average_luma, const int16x8_t& scaling_shift_vect,
const int16x8_t& offset, int luma_multiplier, int chroma_multiplier) {
uint8_t merged_buffer[8];
- const int16x8_t orig = GetSignedSource8(chroma_cursor);
const int16x8_t weighted_luma = vmulq_n_s16(average_luma, luma_multiplier);
const int16x8_t weighted_chroma = vmulq_n_s16(orig, chroma_multiplier);
// Maximum value of |combined_u| is 127*255 = 0x7E81.
@@ -887,9 +926,9 @@ inline int16x8_t BlendChromaValsNoCfl(
const uint8x8_t merged = vqshrun_n_s16(vhaddq_s16(offset, combined), 4);
vst1_u8(merged_buffer, merged);
const int16x8_t scaling =
- GetScalingFactors<8, uint8_t>(scaling_lut, merged_buffer);
+ GetScalingFactors<kBitdepth8, uint8_t>(scaling_lut, merged_buffer);
int16x8_t noise = GetSignedSource8(noise_image_cursor);
- noise = ScaleNoise(noise, scaling, scaling_shift_vect);
+ noise = ScaleNoise<kBitdepth8>(noise, scaling, scaling_shift_vect);
return vaddq_s16(orig, noise);
}
@@ -898,10 +937,10 @@ LIBGAV1_ALWAYS_INLINE void BlendChromaPlane8bpp_NEON(
int width, int height, int start_height, int subsampling_x,
int subsampling_y, int scaling_shift, int chroma_offset,
int chroma_multiplier, int luma_multiplier,
- const uint8_t scaling_lut[kScalingLookupTableSize], const uint8_t* in_y_row,
- ptrdiff_t source_stride_y, const uint8_t* in_chroma_row,
- ptrdiff_t source_stride_chroma, uint8_t* out_chroma_row,
- ptrdiff_t dest_stride) {
+ const int16_t* LIBGAV1_RESTRICT scaling_lut,
+ const uint8_t* LIBGAV1_RESTRICT in_y_row, ptrdiff_t source_stride_y,
+ const uint8_t* in_chroma_row, ptrdiff_t source_stride_chroma,
+ uint8_t* out_chroma_row, ptrdiff_t dest_stride) {
const int16x8_t floor = vdupq_n_s16(min_value);
const int16x8_t ceiling = vdupq_n_s16(max_chroma);
// In 8bpp, the maximum upscaled noise is 127*255 = 0x7E81, which is safe
@@ -913,6 +952,10 @@ LIBGAV1_ALWAYS_INLINE void BlendChromaPlane8bpp_NEON(
const int chroma_width = (width + subsampling_x) >> subsampling_x;
const int safe_chroma_width = chroma_width & ~7;
uint8_t luma_buffer[16];
+#if LIBGAV1_MSAN
+ // Quiet msan warnings.
+ memset(luma_buffer, 0, sizeof(luma_buffer));
+#endif
const int16x8_t offset = vdupq_n_s16(chroma_offset << 5);
start_height >>= subsampling_y;
@@ -921,10 +964,13 @@ LIBGAV1_ALWAYS_INLINE void BlendChromaPlane8bpp_NEON(
int x = 0;
do {
const int luma_x = x << subsampling_x;
+ const int valid_range = width - luma_x;
+
+ const int16x8_t orig_chroma = GetSignedSource8(&in_chroma_row[x]);
const int16x8_t average_luma = vreinterpretq_s16_u16(
- GetAverageLuma(&in_y_row[luma_x], subsampling_x));
+ GetAverageLumaMsan(&in_y_row[luma_x], subsampling_x, valid_range));
const int16x8_t blended = BlendChromaValsNoCfl(
- scaling_lut, &in_chroma_row[x], &(noise_image[y + start_height][x]),
+ scaling_lut, orig_chroma, &(noise_image[y + start_height][x]),
average_luma, scaling_shift_vect, offset, luma_multiplier,
chroma_multiplier);
// In 8bpp, when params_.clip_to_restricted_range == false, we can
@@ -940,14 +986,19 @@ LIBGAV1_ALWAYS_INLINE void BlendChromaPlane8bpp_NEON(
// |average_luma| computation requires a duplicated luma value at the
// end.
const int luma_x = x << subsampling_x;
- const int valid_range = width - luma_x;
- memcpy(luma_buffer, &in_y_row[luma_x], valid_range * sizeof(in_y_row[0]));
- luma_buffer[valid_range] = in_y_row[width - 1];
-
- const int16x8_t average_luma =
- vreinterpretq_s16_u16(GetAverageLuma(luma_buffer, subsampling_x));
+ const int valid_range_pixels = width - luma_x;
+ const int valid_range_bytes = valid_range_pixels * sizeof(in_y_row[0]);
+ memcpy(luma_buffer, &in_y_row[luma_x], valid_range_bytes);
+ luma_buffer[valid_range_pixels] = in_y_row[width - 1];
+ const int valid_range_chroma_bytes =
+ (chroma_width - x) * sizeof(in_chroma_row[0]);
+
+ const int16x8_t orig_chroma =
+ GetSignedSource8Msan(&in_chroma_row[x], valid_range_chroma_bytes);
+ const int16x8_t average_luma = vreinterpretq_s16_u16(GetAverageLumaMsan(
+ luma_buffer, subsampling_x, valid_range_bytes + sizeof(in_y_row[0])));
const int16x8_t blended = BlendChromaValsNoCfl(
- scaling_lut, &in_chroma_row[x], &(noise_image[y + start_height][x]),
+ scaling_lut, orig_chroma, &(noise_image[y + start_height][x]),
average_luma, scaling_shift_vect, offset, luma_multiplier,
chroma_multiplier);
StoreUnsigned8(&out_chroma_row[x],
@@ -963,11 +1014,11 @@ LIBGAV1_ALWAYS_INLINE void BlendChromaPlane8bpp_NEON(
// This function is for the case params_.chroma_scaling_from_luma == false.
void BlendNoiseWithImageChroma8bpp_NEON(
- Plane plane, const FilmGrainParams& params, const void* noise_image_ptr,
- int min_value, int max_chroma, int width, int height, int start_height,
- int subsampling_x, int subsampling_y,
- const uint8_t scaling_lut[kScalingLookupTableSize],
- const void* source_plane_y, ptrdiff_t source_stride_y,
+ Plane plane, const FilmGrainParams& params,
+ const void* LIBGAV1_RESTRICT noise_image_ptr, int min_value, int max_chroma,
+ int width, int height, int start_height, int subsampling_x,
+ int subsampling_y, const int16_t* LIBGAV1_RESTRICT scaling_lut,
+ const void* LIBGAV1_RESTRICT source_plane_y, ptrdiff_t source_stride_y,
const void* source_plane_uv, ptrdiff_t source_stride_uv,
void* dest_plane_uv, ptrdiff_t dest_stride_uv) {
assert(plane == kPlaneU || plane == kPlaneV);
@@ -989,12 +1040,11 @@ void BlendNoiseWithImageChroma8bpp_NEON(
in_uv, source_stride_uv, out_uv, dest_stride_uv);
}
-inline void WriteOverlapLine8bpp_NEON(const int8_t* noise_stripe_row,
- const int8_t* noise_stripe_row_prev,
- int plane_width,
- const int8x8_t grain_coeff,
- const int8x8_t old_coeff,
- int8_t* noise_image_row) {
+inline void WriteOverlapLine8bpp_NEON(
+ const int8_t* LIBGAV1_RESTRICT noise_stripe_row,
+ const int8_t* LIBGAV1_RESTRICT noise_stripe_row_prev, int plane_width,
+ const int8x8_t grain_coeff, const int8x8_t old_coeff,
+ int8_t* LIBGAV1_RESTRICT noise_image_row) {
int x = 0;
do {
// Note that these reads may exceed noise_stripe_row's width by up to 7
@@ -1009,10 +1059,10 @@ inline void WriteOverlapLine8bpp_NEON(const int8_t* noise_stripe_row,
} while (x < plane_width);
}
-void ConstructNoiseImageOverlap8bpp_NEON(const void* noise_stripes_buffer,
- int width, int height,
- int subsampling_x, int subsampling_y,
- void* noise_image_buffer) {
+void ConstructNoiseImageOverlap8bpp_NEON(
+ const void* LIBGAV1_RESTRICT noise_stripes_buffer, int width, int height,
+ int subsampling_x, int subsampling_y,
+ void* LIBGAV1_RESTRICT noise_image_buffer) {
const auto* noise_stripes =
static_cast<const Array2DView<int8_t>*>(noise_stripes_buffer);
auto* noise_image = static_cast<Array2D<int8_t>*>(noise_image_buffer);
@@ -1077,41 +1127,45 @@ void Init8bpp() {
// LumaAutoRegressionFunc
dsp->film_grain.luma_auto_regression[0] =
- ApplyAutoRegressiveFilterToLumaGrain_NEON<8, int8_t, 1>;
+ ApplyAutoRegressiveFilterToLumaGrain_NEON<kBitdepth8, int8_t, 1>;
dsp->film_grain.luma_auto_regression[1] =
- ApplyAutoRegressiveFilterToLumaGrain_NEON<8, int8_t, 2>;
+ ApplyAutoRegressiveFilterToLumaGrain_NEON<kBitdepth8, int8_t, 2>;
dsp->film_grain.luma_auto_regression[2] =
- ApplyAutoRegressiveFilterToLumaGrain_NEON<8, int8_t, 3>;
+ ApplyAutoRegressiveFilterToLumaGrain_NEON<kBitdepth8, int8_t, 3>;
// ChromaAutoRegressionFunc[use_luma][auto_regression_coeff_lag]
// Chroma autoregression should never be called when lag is 0 and use_luma
// is false.
dsp->film_grain.chroma_auto_regression[0][0] = nullptr;
dsp->film_grain.chroma_auto_regression[0][1] =
- ApplyAutoRegressiveFilterToChromaGrains_NEON<8, int8_t, 1, false>;
+ ApplyAutoRegressiveFilterToChromaGrains_NEON<kBitdepth8, int8_t, 1,
+ false>;
dsp->film_grain.chroma_auto_regression[0][2] =
- ApplyAutoRegressiveFilterToChromaGrains_NEON<8, int8_t, 2, false>;
+ ApplyAutoRegressiveFilterToChromaGrains_NEON<kBitdepth8, int8_t, 2,
+ false>;
dsp->film_grain.chroma_auto_regression[0][3] =
- ApplyAutoRegressiveFilterToChromaGrains_NEON<8, int8_t, 3, false>;
+ ApplyAutoRegressiveFilterToChromaGrains_NEON<kBitdepth8, int8_t, 3,
+ false>;
dsp->film_grain.chroma_auto_regression[1][0] =
- ApplyAutoRegressiveFilterToChromaGrains_NEON<8, int8_t, 0, true>;
+ ApplyAutoRegressiveFilterToChromaGrains_NEON<kBitdepth8, int8_t, 0, true>;
dsp->film_grain.chroma_auto_regression[1][1] =
- ApplyAutoRegressiveFilterToChromaGrains_NEON<8, int8_t, 1, true>;
+ ApplyAutoRegressiveFilterToChromaGrains_NEON<kBitdepth8, int8_t, 1, true>;
dsp->film_grain.chroma_auto_regression[1][2] =
- ApplyAutoRegressiveFilterToChromaGrains_NEON<8, int8_t, 2, true>;
+ ApplyAutoRegressiveFilterToChromaGrains_NEON<kBitdepth8, int8_t, 2, true>;
dsp->film_grain.chroma_auto_regression[1][3] =
- ApplyAutoRegressiveFilterToChromaGrains_NEON<8, int8_t, 3, true>;
+ ApplyAutoRegressiveFilterToChromaGrains_NEON<kBitdepth8, int8_t, 3, true>;
dsp->film_grain.construct_noise_image_overlap =
ConstructNoiseImageOverlap8bpp_NEON;
- dsp->film_grain.initialize_scaling_lut = InitializeScalingLookupTable_NEON;
+ dsp->film_grain.initialize_scaling_lut =
+ InitializeScalingLookupTable_NEON<kBitdepth8>;
dsp->film_grain.blend_noise_luma =
- BlendNoiseWithImageLuma_NEON<8, int8_t, uint8_t>;
+ BlendNoiseWithImageLuma_NEON<kBitdepth8, int8_t, uint8_t>;
dsp->film_grain.blend_noise_chroma[0] = BlendNoiseWithImageChroma8bpp_NEON;
dsp->film_grain.blend_noise_chroma[1] =
- BlendNoiseWithImageChromaWithCfl_NEON<8, int8_t, uint8_t>;
+ BlendNoiseWithImageChromaWithCfl_NEON<kBitdepth8, int8_t, uint8_t>;
}
} // namespace
@@ -1121,43 +1175,280 @@ void Init8bpp() {
namespace high_bitdepth {
namespace {
+inline void WriteOverlapLine10bpp_NEON(
+ const int16_t* LIBGAV1_RESTRICT noise_stripe_row,
+ const int16_t* LIBGAV1_RESTRICT noise_stripe_row_prev, int plane_width,
+ const int16x8_t grain_coeff, const int16x8_t old_coeff,
+ int16_t* LIBGAV1_RESTRICT noise_image_row) {
+ int x = 0;
+ do {
+ // Note that these reads may exceed noise_stripe_row's width by up to 7
+ // values.
+ const int16x8_t source_grain = vld1q_s16(noise_stripe_row + x);
+ const int16x8_t source_old = vld1q_s16(noise_stripe_row_prev + x);
+ // Maximum product is 511 * 27 = 0x35E5.
+ const int16x8_t weighted_grain = vmulq_s16(grain_coeff, source_grain);
+ // Maximum sum is 511 * (22 + 23) = 0x59D3.
+ const int16x8_t grain_sum =
+ vmlaq_s16(weighted_grain, old_coeff, source_old);
+ // Note that this write may exceed noise_image_row's width by up to 7
+ // values.
+ const int16x8_t grain = Clip3S16(vrshrq_n_s16(grain_sum, 5),
+ vdupq_n_s16(GetGrainMin<kBitdepth10>()),
+ vdupq_n_s16(GetGrainMax<kBitdepth10>()));
+ vst1q_s16(noise_image_row + x, grain);
+ x += 8;
+ } while (x < plane_width);
+}
+
+void ConstructNoiseImageOverlap10bpp_NEON(
+ const void* LIBGAV1_RESTRICT noise_stripes_buffer, int width, int height,
+ int subsampling_x, int subsampling_y,
+ void* LIBGAV1_RESTRICT noise_image_buffer) {
+ const auto* noise_stripes =
+ static_cast<const Array2DView<int16_t>*>(noise_stripes_buffer);
+ auto* noise_image = static_cast<Array2D<int16_t>*>(noise_image_buffer);
+ const int plane_width = (width + subsampling_x) >> subsampling_x;
+ const int plane_height = (height + subsampling_y) >> subsampling_y;
+ const int stripe_height = 32 >> subsampling_y;
+ const int stripe_mask = stripe_height - 1;
+ int y = stripe_height;
+ int luma_num = 1;
+ if (subsampling_y == 0) {
+ const int16x8_t first_row_grain_coeff = vdupq_n_s16(17);
+ const int16x8_t first_row_old_coeff = vdupq_n_s16(27);
+ const int16x8_t second_row_grain_coeff = first_row_old_coeff;
+ const int16x8_t second_row_old_coeff = first_row_grain_coeff;
+ for (; y < (plane_height & ~stripe_mask); ++luma_num, y += stripe_height) {
+ const int16_t* noise_stripe = (*noise_stripes)[luma_num];
+ const int16_t* noise_stripe_prev = (*noise_stripes)[luma_num - 1];
+ WriteOverlapLine10bpp_NEON(
+ noise_stripe, &noise_stripe_prev[32 * plane_width], plane_width,
+ first_row_grain_coeff, first_row_old_coeff, (*noise_image)[y]);
+
+ WriteOverlapLine10bpp_NEON(&noise_stripe[plane_width],
+ &noise_stripe_prev[(32 + 1) * plane_width],
+ plane_width, second_row_grain_coeff,
+ second_row_old_coeff, (*noise_image)[y + 1]);
+ }
+ // Either one partial stripe remains (remaining_height > 0),
+ // OR image is less than one stripe high (remaining_height < 0),
+ // OR all stripes are completed (remaining_height == 0).
+ const int remaining_height = plane_height - y;
+ if (remaining_height <= 0) {
+ return;
+ }
+ const int16_t* noise_stripe = (*noise_stripes)[luma_num];
+ const int16_t* noise_stripe_prev = (*noise_stripes)[luma_num - 1];
+ WriteOverlapLine10bpp_NEON(
+ noise_stripe, &noise_stripe_prev[32 * plane_width], plane_width,
+ first_row_grain_coeff, first_row_old_coeff, (*noise_image)[y]);
+
+ if (remaining_height > 1) {
+ WriteOverlapLine10bpp_NEON(&noise_stripe[plane_width],
+ &noise_stripe_prev[(32 + 1) * plane_width],
+ plane_width, second_row_grain_coeff,
+ second_row_old_coeff, (*noise_image)[y + 1]);
+ }
+ } else { // subsampling_y == 1
+ const int16x8_t first_row_grain_coeff = vdupq_n_s16(22);
+ const int16x8_t first_row_old_coeff = vdupq_n_s16(23);
+ for (; y < plane_height; ++luma_num, y += stripe_height) {
+ const int16_t* noise_stripe = (*noise_stripes)[luma_num];
+ const int16_t* noise_stripe_prev = (*noise_stripes)[luma_num - 1];
+ WriteOverlapLine10bpp_NEON(
+ noise_stripe, &noise_stripe_prev[16 * plane_width], plane_width,
+ first_row_grain_coeff, first_row_old_coeff, (*noise_image)[y]);
+ }
+ }
+}
+
+inline int16x8_t BlendChromaValsNoCfl(
+ const int16_t* LIBGAV1_RESTRICT scaling_lut, const int16x8_t orig,
+ const int16_t* LIBGAV1_RESTRICT noise_image_cursor,
+ const int16x8_t& average_luma, const int16x8_t& scaling_shift_vect,
+ const int32x4_t& offset, int luma_multiplier, int chroma_multiplier) {
+ uint16_t merged_buffer[8];
+ const int32x4_t weighted_luma_low =
+ vmull_n_s16(vget_low_s16(average_luma), luma_multiplier);
+ const int32x4_t weighted_luma_high =
+ vmull_n_s16(vget_high_s16(average_luma), luma_multiplier);
+ // Maximum value of combined is 127 * 1023 = 0x1FB81.
+ const int32x4_t combined_low =
+ vmlal_n_s16(weighted_luma_low, vget_low_s16(orig), chroma_multiplier);
+ const int32x4_t combined_high =
+ vmlal_n_s16(weighted_luma_high, vget_high_s16(orig), chroma_multiplier);
+ // Maximum value of offset is (255 << 8) = 0xFF00. Offset may be negative.
+ const uint16x4_t merged_low =
+ vqshrun_n_s32(vaddq_s32(offset, combined_low), 6);
+ const uint16x4_t merged_high =
+ vqshrun_n_s32(vaddq_s32(offset, combined_high), 6);
+ const uint16x8_t max_pixel = vdupq_n_u16((1 << kBitdepth10) - 1);
+ vst1q_u16(merged_buffer,
+ vminq_u16(vcombine_u16(merged_low, merged_high), max_pixel));
+ const int16x8_t scaling =
+ GetScalingFactors<kBitdepth10, uint16_t>(scaling_lut, merged_buffer);
+ const int16x8_t noise = GetSignedSource8(noise_image_cursor);
+ const int16x8_t scaled_noise =
+ ScaleNoise<kBitdepth10>(noise, scaling, scaling_shift_vect);
+ return vaddq_s16(orig, scaled_noise);
+}
+
+LIBGAV1_ALWAYS_INLINE void BlendChromaPlane10bpp_NEON(
+ const Array2D<int16_t>& noise_image, int min_value, int max_chroma,
+ int width, int height, int start_height, int subsampling_x,
+ int subsampling_y, int scaling_shift, int chroma_offset,
+ int chroma_multiplier, int luma_multiplier,
+ const int16_t* LIBGAV1_RESTRICT scaling_lut,
+ const uint16_t* LIBGAV1_RESTRICT in_y_row, ptrdiff_t source_stride_y,
+ const uint16_t* in_chroma_row, ptrdiff_t source_stride_chroma,
+ uint16_t* out_chroma_row, ptrdiff_t dest_stride) {
+ const int16x8_t floor = vdupq_n_s16(min_value);
+ const int16x8_t ceiling = vdupq_n_s16(max_chroma);
+ const int16x8_t scaling_shift_vect = vdupq_n_s16(15 - scaling_shift);
+
+ const int chroma_height = (height + subsampling_y) >> subsampling_y;
+ const int chroma_width = (width + subsampling_x) >> subsampling_x;
+ const int safe_chroma_width = chroma_width & ~7;
+ uint16_t luma_buffer[16];
+#if LIBGAV1_MSAN
+ // TODO(b/194217060): This can be removed if the range calculations below are
+ // fixed.
+ memset(luma_buffer, 0, sizeof(luma_buffer));
+#endif
+ // Offset is added before downshifting in order to take advantage of
+ // saturation, so it has to be upscaled by 6 bits, plus 2 bits for 10bpp.
+ const int32x4_t offset = vdupq_n_s32(chroma_offset << (6 + 2));
+
+ start_height >>= subsampling_y;
+ int y = 0;
+ do {
+ int x = 0;
+ do {
+ const int luma_x = x << subsampling_x;
+ const int16x8_t average_luma = vreinterpretq_s16_u16(
+ GetAverageLuma(&in_y_row[luma_x], subsampling_x));
+ const int16x8_t orig_chroma = GetSignedSource8(&in_chroma_row[x]);
+ const int16x8_t blended = BlendChromaValsNoCfl(
+ scaling_lut, orig_chroma, &(noise_image[y + start_height][x]),
+ average_luma, scaling_shift_vect, offset, luma_multiplier,
+ chroma_multiplier);
+ StoreUnsigned8(&out_chroma_row[x],
+ vreinterpretq_u16_s16(Clip3(blended, floor, ceiling)));
+
+ x += 8;
+ } while (x < safe_chroma_width);
+
+ if (x < chroma_width) {
+ // Begin right edge iteration. Same as the normal iterations, but the
+ // |average_luma| computation requires a duplicated luma value at the
+ // end.
+ const int luma_x = x << subsampling_x;
+ const int valid_range_pixels = width - luma_x;
+ const int valid_range_bytes = valid_range_pixels * sizeof(in_y_row[0]);
+ memcpy(luma_buffer, &in_y_row[luma_x], valid_range_bytes);
+ luma_buffer[valid_range_pixels] = in_y_row[width - 1];
+ const int valid_range_chroma_bytes =
+ (chroma_width - x) * sizeof(in_chroma_row[0]);
+ const int16x8_t orig_chroma =
+ GetSignedSource8Msan(&in_chroma_row[x], valid_range_chroma_bytes);
+
+ const int16x8_t average_luma = vreinterpretq_s16_u16(GetAverageLumaMsan(
+ luma_buffer, subsampling_x, valid_range_bytes + sizeof(in_y_row[0])));
+ const int16x8_t blended = BlendChromaValsNoCfl(
+ scaling_lut, orig_chroma, &(noise_image[y + start_height][x]),
+ average_luma, scaling_shift_vect, offset, luma_multiplier,
+ chroma_multiplier);
+ StoreUnsigned8(&out_chroma_row[x],
+ vreinterpretq_u16_s16(Clip3(blended, floor, ceiling)));
+ // End of right edge iteration.
+ }
+
+ in_y_row = AddByteStride(in_y_row, source_stride_y << subsampling_y);
+ in_chroma_row = AddByteStride(in_chroma_row, source_stride_chroma);
+ out_chroma_row = AddByteStride(out_chroma_row, dest_stride);
+ } while (++y < chroma_height);
+}
+
+// This function is for the case params_.chroma_scaling_from_luma == false.
+void BlendNoiseWithImageChroma10bpp_NEON(
+ Plane plane, const FilmGrainParams& params,
+ const void* LIBGAV1_RESTRICT noise_image_ptr, int min_value, int max_chroma,
+ int width, int height, int start_height, int subsampling_x,
+ int subsampling_y, const int16_t* LIBGAV1_RESTRICT scaling_lut,
+ const void* LIBGAV1_RESTRICT source_plane_y, ptrdiff_t source_stride_y,
+ const void* source_plane_uv, ptrdiff_t source_stride_uv,
+ void* dest_plane_uv, ptrdiff_t dest_stride_uv) {
+ assert(plane == kPlaneU || plane == kPlaneV);
+ const auto* noise_image =
+ static_cast<const Array2D<int16_t>*>(noise_image_ptr);
+ const auto* in_y = static_cast<const uint16_t*>(source_plane_y);
+ const auto* in_uv = static_cast<const uint16_t*>(source_plane_uv);
+ auto* out_uv = static_cast<uint16_t*>(dest_plane_uv);
+
+ const int offset = (plane == kPlaneU) ? params.u_offset : params.v_offset;
+ const int luma_multiplier =
+ (plane == kPlaneU) ? params.u_luma_multiplier : params.v_luma_multiplier;
+ const int multiplier =
+ (plane == kPlaneU) ? params.u_multiplier : params.v_multiplier;
+ BlendChromaPlane10bpp_NEON(
+ noise_image[plane], min_value, max_chroma, width, height, start_height,
+ subsampling_x, subsampling_y, params.chroma_scaling, offset, multiplier,
+ luma_multiplier, scaling_lut, in_y, source_stride_y, in_uv,
+ source_stride_uv, out_uv, dest_stride_uv);
+}
+
void Init10bpp() {
Dsp* const dsp = dsp_internal::GetWritableDspTable(kBitdepth10);
assert(dsp != nullptr);
// LumaAutoRegressionFunc
dsp->film_grain.luma_auto_regression[0] =
- ApplyAutoRegressiveFilterToLumaGrain_NEON<10, int16_t, 1>;
+ ApplyAutoRegressiveFilterToLumaGrain_NEON<kBitdepth10, int16_t, 1>;
dsp->film_grain.luma_auto_regression[1] =
- ApplyAutoRegressiveFilterToLumaGrain_NEON<10, int16_t, 2>;
+ ApplyAutoRegressiveFilterToLumaGrain_NEON<kBitdepth10, int16_t, 2>;
dsp->film_grain.luma_auto_regression[2] =
- ApplyAutoRegressiveFilterToLumaGrain_NEON<10, int16_t, 3>;
+ ApplyAutoRegressiveFilterToLumaGrain_NEON<kBitdepth10, int16_t, 3>;
// ChromaAutoRegressionFunc[use_luma][auto_regression_coeff_lag][subsampling]
// Chroma autoregression should never be called when lag is 0 and use_luma
// is false.
dsp->film_grain.chroma_auto_regression[0][0] = nullptr;
dsp->film_grain.chroma_auto_regression[0][1] =
- ApplyAutoRegressiveFilterToChromaGrains_NEON<10, int16_t, 1, false>;
+ ApplyAutoRegressiveFilterToChromaGrains_NEON<kBitdepth10, int16_t, 1,
+ false>;
dsp->film_grain.chroma_auto_regression[0][2] =
- ApplyAutoRegressiveFilterToChromaGrains_NEON<10, int16_t, 2, false>;
+ ApplyAutoRegressiveFilterToChromaGrains_NEON<kBitdepth10, int16_t, 2,
+ false>;
dsp->film_grain.chroma_auto_regression[0][3] =
- ApplyAutoRegressiveFilterToChromaGrains_NEON<10, int16_t, 3, false>;
+ ApplyAutoRegressiveFilterToChromaGrains_NEON<kBitdepth10, int16_t, 3,
+ false>;
dsp->film_grain.chroma_auto_regression[1][0] =
- ApplyAutoRegressiveFilterToChromaGrains_NEON<10, int16_t, 0, true>;
+ ApplyAutoRegressiveFilterToChromaGrains_NEON<kBitdepth10, int16_t, 0,
+ true>;
dsp->film_grain.chroma_auto_regression[1][1] =
- ApplyAutoRegressiveFilterToChromaGrains_NEON<10, int16_t, 1, true>;
+ ApplyAutoRegressiveFilterToChromaGrains_NEON<kBitdepth10, int16_t, 1,
+ true>;
dsp->film_grain.chroma_auto_regression[1][2] =
- ApplyAutoRegressiveFilterToChromaGrains_NEON<10, int16_t, 2, true>;
+ ApplyAutoRegressiveFilterToChromaGrains_NEON<kBitdepth10, int16_t, 2,
+ true>;
dsp->film_grain.chroma_auto_regression[1][3] =
- ApplyAutoRegressiveFilterToChromaGrains_NEON<10, int16_t, 3, true>;
+ ApplyAutoRegressiveFilterToChromaGrains_NEON<kBitdepth10, int16_t, 3,
+ true>;
- dsp->film_grain.initialize_scaling_lut = InitializeScalingLookupTable_NEON;
+ dsp->film_grain.construct_noise_image_overlap =
+ ConstructNoiseImageOverlap10bpp_NEON;
- dsp->film_grain.blend_noise_luma =
- BlendNoiseWithImageLuma_NEON<10, int16_t, uint16_t>;
+ dsp->film_grain.initialize_scaling_lut =
+ InitializeScalingLookupTable_NEON<kBitdepth10>;
+
+ // TODO(b/194442742): reenable this function after segfault under armv7 ASan
+ // is fixed.
+ // dsp->film_grain.blend_noise_luma =
+ // BlendNoiseWithImageLuma_NEON<kBitdepth10, int16_t, uint16_t>;
+ dsp->film_grain.blend_noise_chroma[0] = BlendNoiseWithImageChroma10bpp_NEON;
dsp->film_grain.blend_noise_chroma[1] =
- BlendNoiseWithImageChromaWithCfl_NEON<10, int16_t, uint16_t>;
+ BlendNoiseWithImageChromaWithCfl_NEON<kBitdepth10, int16_t, uint16_t>;
}
} // namespace
diff --git a/src/dsp/arm/film_grain_neon.h b/src/dsp/arm/film_grain_neon.h
index 44b3d1d..3ba2eef 100644
--- a/src/dsp/arm/film_grain_neon.h
+++ b/src/dsp/arm/film_grain_neon.h
@@ -35,11 +35,15 @@ void FilmGrainInit_NEON();
#define LIBGAV1_Dsp8bpp_FilmGrainAutoregressionChroma LIBGAV1_DSP_NEON
#define LIBGAV1_Dsp10bpp_FilmGrainAutoregressionChroma LIBGAV1_DSP_NEON
#define LIBGAV1_Dsp8bpp_FilmGrainConstructNoiseImageOverlap LIBGAV1_DSP_NEON
+#define LIBGAV1_Dsp10bpp_FilmGrainConstructNoiseImageOverlap LIBGAV1_DSP_NEON
#define LIBGAV1_Dsp8bpp_FilmGrainInitializeScalingLutFunc LIBGAV1_DSP_NEON
#define LIBGAV1_Dsp10bpp_FilmGrainInitializeScalingLutFunc LIBGAV1_DSP_NEON
#define LIBGAV1_Dsp8bpp_FilmGrainBlendNoiseLuma LIBGAV1_DSP_NEON
-#define LIBGAV1_Dsp10bpp_FilmGrainBlendNoiseLuma LIBGAV1_DSP_NEON
+// TODO(b/194442742): reenable this function after segfault under armv7 ASan is
+// fixed.
+// #define LIBGAV1_Dsp10bpp_FilmGrainBlendNoiseLuma LIBGAV1_DSP_NEON
#define LIBGAV1_Dsp8bpp_FilmGrainBlendNoiseChroma LIBGAV1_DSP_NEON
+#define LIBGAV1_Dsp10bpp_FilmGrainBlendNoiseChroma LIBGAV1_DSP_NEON
#define LIBGAV1_Dsp8bpp_FilmGrainBlendNoiseChromaWithCfl LIBGAV1_DSP_NEON
#define LIBGAV1_Dsp10bpp_FilmGrainBlendNoiseChromaWithCfl LIBGAV1_DSP_NEON
#endif // LIBGAV1_ENABLE_NEON
diff --git a/src/dsp/arm/intra_edge_neon.cc b/src/dsp/arm/intra_edge_neon.cc
index 074283f..9b20e29 100644
--- a/src/dsp/arm/intra_edge_neon.cc
+++ b/src/dsp/arm/intra_edge_neon.cc
@@ -248,7 +248,8 @@ void IntraEdgeUpsampler_NEON(void* buffer, const int size) {
vst1_u8(pixel_buffer - 1, InterleaveLow8(result, src21));
return;
- } else if (size == 8) {
+ }
+ if (size == 8) {
// Likewise, one load + multiple vtbls seems preferred to multiple loads.
const uint8x16_t src = vld1q_u8(pixel_buffer - 1);
const uint8x8_t src0 = VQTbl1U8(src, vcreate_u8(0x0605040302010000));
diff --git a/src/dsp/arm/intrapred_cfl_neon.cc b/src/dsp/arm/intrapred_cfl_neon.cc
index 8d8748f..ad39947 100644
--- a/src/dsp/arm/intrapred_cfl_neon.cc
+++ b/src/dsp/arm/intrapred_cfl_neon.cc
@@ -76,7 +76,7 @@ template <int block_width, int block_height>
void CflSubsampler420_NEON(
int16_t luma[kCflLumaBufferStride][kCflLumaBufferStride],
const int max_luma_width, const int max_luma_height,
- const void* const source, const ptrdiff_t stride) {
+ const void* LIBGAV1_RESTRICT const source, const ptrdiff_t stride) {
const auto* src = static_cast<const uint8_t*>(source);
uint32_t sum;
if (block_width == 4) {
@@ -140,7 +140,7 @@ void CflSubsampler420_NEON(
const uint8_t a11 = src[max_luma_width - 1 + stride];
// Dup the 2x2 sum at the max luma offset.
const uint16x8_t max_luma_sum =
- vdupq_n_u16((uint16_t)((a00 + a01 + a10 + a11) << 1));
+ vdupq_n_u16(static_cast<uint16_t>((a00 + a01 + a10 + a11) << 1));
uint16x8_t x_index = {0, 2, 4, 6, 8, 10, 12, 14};
ptrdiff_t src_x_offset = 0;
@@ -173,7 +173,7 @@ template <int block_width, int block_height>
void CflSubsampler444_NEON(
int16_t luma[kCflLumaBufferStride][kCflLumaBufferStride],
const int max_luma_width, const int max_luma_height,
- const void* const source, const ptrdiff_t stride) {
+ const void* LIBGAV1_RESTRICT const source, const ptrdiff_t stride) {
const auto* src = static_cast<const uint8_t*>(source);
uint32_t sum;
if (block_width == 4) {
@@ -276,7 +276,7 @@ inline uint8x8_t Combine8(const int16x8_t luma, const int alpha,
// uint8_t. Saturated int16_t >> 6 outranges uint8_t.
template <int block_height>
inline void CflIntraPredictor4xN_NEON(
- void* const dest, const ptrdiff_t stride,
+ void* LIBGAV1_RESTRICT const dest, const ptrdiff_t stride,
const int16_t luma[kCflLumaBufferStride][kCflLumaBufferStride],
const int alpha) {
auto* dst = static_cast<uint8_t*>(dest);
@@ -295,7 +295,7 @@ inline void CflIntraPredictor4xN_NEON(
template <int block_height>
inline void CflIntraPredictor8xN_NEON(
- void* const dest, const ptrdiff_t stride,
+ void* LIBGAV1_RESTRICT const dest, const ptrdiff_t stride,
const int16_t luma[kCflLumaBufferStride][kCflLumaBufferStride],
const int alpha) {
auto* dst = static_cast<uint8_t*>(dest);
@@ -310,7 +310,7 @@ inline void CflIntraPredictor8xN_NEON(
template <int block_height>
inline void CflIntraPredictor16xN_NEON(
- void* const dest, const ptrdiff_t stride,
+ void* LIBGAV1_RESTRICT const dest, const ptrdiff_t stride,
const int16_t luma[kCflLumaBufferStride][kCflLumaBufferStride],
const int alpha) {
auto* dst = static_cast<uint8_t*>(dest);
@@ -328,7 +328,7 @@ inline void CflIntraPredictor16xN_NEON(
template <int block_height>
inline void CflIntraPredictor32xN_NEON(
- void* const dest, const ptrdiff_t stride,
+ void* LIBGAV1_RESTRICT const dest, const ptrdiff_t stride,
const int16_t luma[kCflLumaBufferStride][kCflLumaBufferStride],
const int alpha) {
auto* dst = static_cast<uint8_t*>(dest);
@@ -507,7 +507,8 @@ inline uint16x8_t StoreLumaResults8_420(const uint16x8_t vertical_sum0,
template <int block_height_log2, bool is_inside>
void CflSubsampler444_4xH_NEON(
int16_t luma[kCflLumaBufferStride][kCflLumaBufferStride],
- const int max_luma_height, const void* const source, ptrdiff_t stride) {
+ const int max_luma_height, const void* LIBGAV1_RESTRICT const source,
+ ptrdiff_t stride) {
static_assert(block_height_log2 <= 4, "");
const int block_height = 1 << block_height_log2;
const int visible_height = max_luma_height;
@@ -568,7 +569,7 @@ template <int block_height_log2>
void CflSubsampler444_4xH_NEON(
int16_t luma[kCflLumaBufferStride][kCflLumaBufferStride],
const int max_luma_width, const int max_luma_height,
- const void* const source, ptrdiff_t stride) {
+ const void* LIBGAV1_RESTRICT const source, ptrdiff_t stride) {
static_cast<void>(max_luma_width);
static_cast<void>(max_luma_height);
static_assert(block_height_log2 <= 4, "");
@@ -588,7 +589,8 @@ void CflSubsampler444_4xH_NEON(
template <int block_height_log2, bool is_inside>
void CflSubsampler444_8xH_NEON(
int16_t luma[kCflLumaBufferStride][kCflLumaBufferStride],
- const int max_luma_height, const void* const source, ptrdiff_t stride) {
+ const int max_luma_height, const void* LIBGAV1_RESTRICT const source,
+ ptrdiff_t stride) {
const int block_height = 1 << block_height_log2;
const int visible_height = max_luma_height;
const auto* src = static_cast<const uint16_t*>(source);
@@ -643,7 +645,7 @@ template <int block_height_log2>
void CflSubsampler444_8xH_NEON(
int16_t luma[kCflLumaBufferStride][kCflLumaBufferStride],
const int max_luma_width, const int max_luma_height,
- const void* const source, ptrdiff_t stride) {
+ const void* LIBGAV1_RESTRICT const source, ptrdiff_t stride) {
static_cast<void>(max_luma_width);
static_cast<void>(max_luma_height);
static_assert(block_height_log2 <= 5, "");
@@ -667,7 +669,7 @@ template <int block_width_log2, int block_height_log2, bool is_inside>
void CflSubsampler444_WxH_NEON(
int16_t luma[kCflLumaBufferStride][kCflLumaBufferStride],
const int max_luma_width, const int max_luma_height,
- const void* const source, ptrdiff_t stride) {
+ const void* LIBGAV1_RESTRICT const source, ptrdiff_t stride) {
const int block_height = 1 << block_height_log2;
const int visible_height = max_luma_height;
const int block_width = 1 << block_width_log2;
@@ -751,7 +753,7 @@ template <int block_width_log2, int block_height_log2>
void CflSubsampler444_WxH_NEON(
int16_t luma[kCflLumaBufferStride][kCflLumaBufferStride],
const int max_luma_width, const int max_luma_height,
- const void* const source, ptrdiff_t stride) {
+ const void* LIBGAV1_RESTRICT const source, ptrdiff_t stride) {
static_assert(block_width_log2 == 4 || block_width_log2 == 5,
"This function will only work for block_width 16 and 32.");
static_assert(block_height_log2 <= 5, "");
@@ -773,7 +775,7 @@ template <int block_height_log2>
void CflSubsampler420_4xH_NEON(
int16_t luma[kCflLumaBufferStride][kCflLumaBufferStride],
const int /*max_luma_width*/, const int max_luma_height,
- const void* const source, ptrdiff_t stride) {
+ const void* LIBGAV1_RESTRICT const source, ptrdiff_t stride) {
const int block_height = 1 << block_height_log2;
const auto* src = static_cast<const uint16_t*>(source);
const ptrdiff_t src_stride = stride / sizeof(src[0]);
@@ -839,7 +841,8 @@ void CflSubsampler420_4xH_NEON(
template <int block_height_log2, int max_luma_width>
inline void CflSubsampler420Impl_8xH_NEON(
int16_t luma[kCflLumaBufferStride][kCflLumaBufferStride],
- const int max_luma_height, const void* const source, ptrdiff_t stride) {
+ const int max_luma_height, const void* LIBGAV1_RESTRICT const source,
+ ptrdiff_t stride) {
const int block_height = 1 << block_height_log2;
const auto* src = static_cast<const uint16_t*>(source);
const ptrdiff_t src_stride = stride / sizeof(src[0]);
@@ -944,7 +947,7 @@ template <int block_height_log2>
void CflSubsampler420_8xH_NEON(
int16_t luma[kCflLumaBufferStride][kCflLumaBufferStride],
const int max_luma_width, const int max_luma_height,
- const void* const source, ptrdiff_t stride) {
+ const void* LIBGAV1_RESTRICT const source, ptrdiff_t stride) {
if (max_luma_width == 8) {
CflSubsampler420Impl_8xH_NEON<block_height_log2, 8>(luma, max_luma_height,
source, stride);
@@ -957,7 +960,8 @@ void CflSubsampler420_8xH_NEON(
template <int block_width_log2, int block_height_log2, int max_luma_width>
inline void CflSubsampler420Impl_WxH_NEON(
int16_t luma[kCflLumaBufferStride][kCflLumaBufferStride],
- const int max_luma_height, const void* const source, ptrdiff_t stride) {
+ const int max_luma_height, const void* LIBGAV1_RESTRICT const source,
+ ptrdiff_t stride) {
const auto* src = static_cast<const uint16_t*>(source);
const ptrdiff_t src_stride = stride / sizeof(src[0]);
const int block_height = 1 << block_height_log2;
@@ -1062,7 +1066,7 @@ template <int block_width_log2, int block_height_log2>
void CflSubsampler420_WxH_NEON(
int16_t luma[kCflLumaBufferStride][kCflLumaBufferStride],
const int max_luma_width, const int max_luma_height,
- const void* const source, ptrdiff_t stride) {
+ const void* LIBGAV1_RESTRICT const source, ptrdiff_t stride) {
switch (max_luma_width) {
case 8:
CflSubsampler420Impl_WxH_NEON<block_width_log2, block_height_log2, 8>(
@@ -1109,7 +1113,7 @@ inline uint16x8_t Combine8(const int16x8_t luma, const int16x8_t alpha_abs,
template <int block_height, int bitdepth = 10>
inline void CflIntraPredictor4xN_NEON(
- void* const dest, const ptrdiff_t stride,
+ void* LIBGAV1_RESTRICT const dest, const ptrdiff_t stride,
const int16_t luma[kCflLumaBufferStride][kCflLumaBufferStride],
const int alpha) {
auto* dst = static_cast<uint16_t*>(dest);
@@ -1133,7 +1137,7 @@ inline void CflIntraPredictor4xN_NEON(
template <int block_height, int bitdepth = 10>
inline void CflIntraPredictor8xN_NEON(
- void* const dest, const ptrdiff_t stride,
+ void* LIBGAV1_RESTRICT const dest, const ptrdiff_t stride,
const int16_t luma[kCflLumaBufferStride][kCflLumaBufferStride],
const int alpha) {
auto* dst = static_cast<uint16_t*>(dest);
@@ -1153,7 +1157,7 @@ inline void CflIntraPredictor8xN_NEON(
template <int block_height, int bitdepth = 10>
inline void CflIntraPredictor16xN_NEON(
- void* const dest, const ptrdiff_t stride,
+ void* LIBGAV1_RESTRICT const dest, const ptrdiff_t stride,
const int16_t luma[kCflLumaBufferStride][kCflLumaBufferStride],
const int alpha) {
auto* dst = static_cast<uint16_t*>(dest);
@@ -1177,7 +1181,7 @@ inline void CflIntraPredictor16xN_NEON(
template <int block_height, int bitdepth = 10>
inline void CflIntraPredictor32xN_NEON(
- void* const dest, const ptrdiff_t stride,
+ void* LIBGAV1_RESTRICT const dest, const ptrdiff_t stride,
const int16_t luma[kCflLumaBufferStride][kCflLumaBufferStride],
const int alpha) {
auto* dst = static_cast<uint16_t*>(dest);
diff --git a/src/dsp/arm/intrapred_directional_neon.cc b/src/dsp/arm/intrapred_directional_neon.cc
index 3f5edbd..3cad4a6 100644
--- a/src/dsp/arm/intrapred_directional_neon.cc
+++ b/src/dsp/arm/intrapred_directional_neon.cc
@@ -29,6 +29,7 @@
#include "src/dsp/constants.h"
#include "src/dsp/dsp.h"
#include "src/utils/common.h"
+#include "src/utils/compiler_attributes.h"
namespace libgav1 {
namespace dsp {
@@ -40,9 +41,9 @@ inline uint8x8_t WeightedBlend(const uint8x8_t a, const uint8x8_t b,
const uint8x8_t a_weight,
const uint8x8_t b_weight) {
const uint16x8_t a_product = vmull_u8(a, a_weight);
- const uint16x8_t b_product = vmull_u8(b, b_weight);
+ const uint16x8_t sum = vmlal_u8(a_product, b, b_weight);
- return vrshrn_n_u16(vaddq_u16(a_product, b_product), 5 /*log2(32)*/);
+ return vrshrn_n_u16(sum, 5 /*log2(32)*/);
}
// For vertical operations the weights are one constant value.
@@ -52,9 +53,9 @@ inline uint8x8_t WeightedBlend(const uint8x8_t a, const uint8x8_t b,
}
// Fill |left| and |right| with the appropriate values for a given |base_step|.
-inline void LoadStepwise(const uint8_t* const source, const uint8x8_t left_step,
- const uint8x8_t right_step, uint8x8_t* left,
- uint8x8_t* right) {
+inline void LoadStepwise(const uint8_t* LIBGAV1_RESTRICT const source,
+ const uint8x8_t left_step, const uint8x8_t right_step,
+ uint8x8_t* left, uint8x8_t* right) {
const uint8x16_t mixed = vld1q_u8(source);
*left = VQTbl1U8(mixed, left_step);
*right = VQTbl1U8(mixed, right_step);
@@ -62,17 +63,18 @@ inline void LoadStepwise(const uint8_t* const source, const uint8x8_t left_step,
// Handle signed step arguments by ignoring the sign. Negative values are
// considered out of range and overwritten later.
-inline void LoadStepwise(const uint8_t* const source, const int8x8_t left_step,
- const int8x8_t right_step, uint8x8_t* left,
- uint8x8_t* right) {
+inline void LoadStepwise(const uint8_t* LIBGAV1_RESTRICT const source,
+ const int8x8_t left_step, const int8x8_t right_step,
+ uint8x8_t* left, uint8x8_t* right) {
LoadStepwise(source, vreinterpret_u8_s8(left_step),
vreinterpret_u8_s8(right_step), left, right);
}
// Process 4 or 8 |width| by any |height|.
template <int width>
-inline void DirectionalZone1_WxH(uint8_t* dst, const ptrdiff_t stride,
- const int height, const uint8_t* const top,
+inline void DirectionalZone1_WxH(uint8_t* LIBGAV1_RESTRICT dst,
+ const ptrdiff_t stride, const int height,
+ const uint8_t* LIBGAV1_RESTRICT const top,
const int xstep, const bool upsampled) {
assert(width == 4 || width == 8);
@@ -142,10 +144,11 @@ inline void DirectionalZone1_WxH(uint8_t* dst, const ptrdiff_t stride,
// Process a multiple of 8 |width| by any |height|. Processes horizontally
// before vertically in the hopes of being a little more cache friendly.
-inline void DirectionalZone1_WxH(uint8_t* dst, const ptrdiff_t stride,
- const int width, const int height,
- const uint8_t* const top, const int xstep,
- const bool upsampled) {
+inline void DirectionalZone1_WxH(uint8_t* LIBGAV1_RESTRICT dst,
+ const ptrdiff_t stride, const int width,
+ const int height,
+ const uint8_t* LIBGAV1_RESTRICT const top,
+ const int xstep, const bool upsampled) {
assert(width % 8 == 0);
const int upsample_shift = static_cast<int>(upsampled);
const int scale_bits = 6 - upsample_shift;
@@ -203,14 +206,12 @@ inline void DirectionalZone1_WxH(uint8_t* dst, const ptrdiff_t stride,
} while (++y < height);
}
-void DirectionalIntraPredictorZone1_NEON(void* const dest,
- const ptrdiff_t stride,
- const void* const top_row,
- const int width, const int height,
- const int xstep,
- const bool upsampled_top) {
- const uint8_t* const top = static_cast<const uint8_t*>(top_row);
- uint8_t* dst = static_cast<uint8_t*>(dest);
+void DirectionalIntraPredictorZone1_NEON(
+ void* LIBGAV1_RESTRICT const dest, const ptrdiff_t stride,
+ const void* LIBGAV1_RESTRICT const top_row, const int width,
+ const int height, const int xstep, const bool upsampled_top) {
+ const auto* const top = static_cast<const uint8_t*>(top_row);
+ auto* dst = static_cast<uint8_t*>(dest);
assert(xstep > 0);
@@ -282,11 +283,10 @@ void DirectionalIntraPredictorZone1_NEON(void* const dest,
// Process 4 or 8 |width| by 4 or 8 |height|.
template <int width>
-inline void DirectionalZone3_WxH(uint8_t* dest, const ptrdiff_t stride,
- const int height,
- const uint8_t* const left_column,
- const int base_left_y, const int ystep,
- const int upsample_shift) {
+inline void DirectionalZone3_WxH(
+ uint8_t* LIBGAV1_RESTRICT dest, const ptrdiff_t stride, const int height,
+ const uint8_t* LIBGAV1_RESTRICT const left_column, const int base_left_y,
+ const int ystep, const int upsample_shift) {
assert(width == 4 || width == 8);
assert(height == 4 || height == 8);
const int scale_bits = 6 - upsample_shift;
@@ -417,12 +417,10 @@ constexpr int kPositiveIndexOffset = 15;
// Process 4 or 8 |width| by any |height|.
template <int width>
-inline void DirectionalZone2FromLeftCol_WxH(uint8_t* dst,
- const ptrdiff_t stride,
- const int height,
- const uint8_t* const left_column,
- const int16x8_t left_y,
- const int upsample_shift) {
+inline void DirectionalZone2FromLeftCol_WxH(
+ uint8_t* LIBGAV1_RESTRICT dst, const ptrdiff_t stride, const int height,
+ const uint8_t* LIBGAV1_RESTRICT const left_column, const int16x8_t left_y,
+ const int upsample_shift) {
assert(width == 4 || width == 8);
// The shift argument must be a constant.
@@ -468,12 +466,10 @@ inline void DirectionalZone2FromLeftCol_WxH(uint8_t* dst,
// Process 4 or 8 |width| by any |height|.
template <int width>
-inline void DirectionalZone1Blend_WxH(uint8_t* dest, const ptrdiff_t stride,
- const int height,
- const uint8_t* const top_row,
- int zone_bounds, int top_x,
- const int xstep,
- const int upsample_shift) {
+inline void DirectionalZone1Blend_WxH(
+ uint8_t* LIBGAV1_RESTRICT dest, const ptrdiff_t stride, const int height,
+ const uint8_t* LIBGAV1_RESTRICT const top_row, int zone_bounds, int top_x,
+ const int xstep, const int upsample_shift) {
assert(width == 4 || width == 8);
const int scale_bits_x = 6 - upsample_shift;
@@ -523,12 +519,12 @@ constexpr int kDirectionalZone2ShuffleInvalidHeight[16] = {
// then handle only blocks that take from |left_ptr|. Additionally, a fast
// index-shuffle approach is used for pred values from |left_column| in sections
// that permit it.
-inline void DirectionalZone2_4xH(uint8_t* dst, const ptrdiff_t stride,
- const uint8_t* const top_row,
- const uint8_t* const left_column,
- const int height, const int xstep,
- const int ystep, const bool upsampled_top,
- const bool upsampled_left) {
+inline void DirectionalZone2_4xH(
+ uint8_t* LIBGAV1_RESTRICT dst, const ptrdiff_t stride,
+ const uint8_t* LIBGAV1_RESTRICT const top_row,
+ const uint8_t* LIBGAV1_RESTRICT const left_column, const int height,
+ const int xstep, const int ystep, const bool upsampled_top,
+ const bool upsampled_left) {
const int upsample_left_shift = static_cast<int>(upsampled_left);
const int upsample_top_shift = static_cast<int>(upsampled_top);
@@ -564,8 +560,8 @@ inline void DirectionalZone2_4xH(uint8_t* dst, const ptrdiff_t stride,
// If the 64 scaling is regarded as a decimal point, the first value of the
// left_y vector omits the portion which is covered under the left_column
// offset. The following values need the full ystep as a relative offset.
- int16x8_t left_y = vmulq_n_s16(zero_to_seven, -ystep);
- left_y = vaddq_s16(left_y, vdupq_n_s16(-ystep_remainder));
+ const int16x8_t remainder = vdupq_n_s16(-ystep_remainder);
+ const int16x8_t left_y = vmlaq_n_s16(remainder, zero_to_seven, -ystep);
// This loop treats each set of 4 columns in 3 stages with y-value boundaries.
// The first stage, before the first y-loop, covers blocks that are only
@@ -639,13 +635,12 @@ inline void DirectionalZone2_4xH(uint8_t* dst, const ptrdiff_t stride,
}
// Process a multiple of 8 |width|.
-inline void DirectionalZone2_8(uint8_t* const dst, const ptrdiff_t stride,
- const uint8_t* const top_row,
- const uint8_t* const left_column,
- const int width, const int height,
- const int xstep, const int ystep,
- const bool upsampled_top,
- const bool upsampled_left) {
+inline void DirectionalZone2_8(
+ uint8_t* LIBGAV1_RESTRICT const dst, const ptrdiff_t stride,
+ const uint8_t* LIBGAV1_RESTRICT const top_row,
+ const uint8_t* LIBGAV1_RESTRICT const left_column, const int width,
+ const int height, const int xstep, const int ystep,
+ const bool upsampled_top, const bool upsampled_left) {
const int upsample_left_shift = static_cast<int>(upsampled_left);
const int upsample_top_shift = static_cast<int>(upsampled_top);
@@ -668,12 +663,6 @@ inline void DirectionalZone2_8(uint8_t* const dst, const ptrdiff_t stride,
assert(xstep >= 3);
const int min_top_only_x = std::min((height * xstep) >> 6, width);
- // For steep angles, the source pixels from |left_column| may not fit in a
- // 16-byte load for shuffling.
- // TODO(petersonab): Find a more precise formula for this subject to x.
- const int max_shuffle_height =
- std::min(kDirectionalZone2ShuffleInvalidHeight[ystep >> 6], height);
-
// Offsets the original zone bound value to simplify x < (y+1)*xstep/64 -1
int xstep_bounds_base = (xstep == 64) ? 0 : xstep - 1;
@@ -687,8 +676,8 @@ inline void DirectionalZone2_8(uint8_t* const dst, const ptrdiff_t stride,
// If the 64 scaling is regarded as a decimal point, the first value of the
// left_y vector omits the portion which is covered under the left_column
// offset. Following values need the full ystep as a relative offset.
- int16x8_t left_y = vmulq_n_s16(zero_to_seven, -ystep);
- left_y = vaddq_s16(left_y, vdupq_n_s16(-ystep_remainder));
+ const int16x8_t remainder = vdupq_n_s16(-ystep_remainder);
+ int16x8_t left_y = vmlaq_n_s16(remainder, zero_to_seven, -ystep);
// This loop treats each set of 4 columns in 3 stages with y-value boundaries.
// The first stage, before the first y-loop, covers blocks that are only
@@ -696,12 +685,21 @@ inline void DirectionalZone2_8(uint8_t* const dst, const ptrdiff_t stride,
// blocks that have a mixture of values computed from top or left. The final
// stage covers blocks that are only computed from the left.
int x = 0;
+ // For steep angles, the source pixels from |left_column| may not fit in a
+ // 16-byte load for shuffling. |d| represents the number of pixels that can
+ // fit in one contiguous vector when stepping by |ystep|. For a given x
+ // position, the left column values can be obtained by VTBL as long as the
+ // values at row[x + d] and beyond come from the top row. However, this does
+ // not guarantee that the vector will also contain all of the values needed
+ // from top row.
+ const int d = 16 / ((ystep >> 6) + 1);
for (int left_offset = -left_base_increment; x < min_top_only_x; x += 8,
xstep_bounds_base -= (8 << 6),
left_y = vsubq_s16(left_y, increment_left8),
left_offset -= left_base_increment8) {
uint8_t* dst_x = dst + x;
-
+ const int max_shuffle_height =
+ std::min(((x + d) << 6) / xstep, height) & ~7;
// Round down to the nearest multiple of 8.
const int max_top_only_y = std::min(((x + 1) << 6) / xstep, height) & ~7;
DirectionalZone1_WxH<8>(dst_x, stride, max_top_only_y,
@@ -770,14 +768,20 @@ inline void DirectionalZone2_8(uint8_t* const dst, const ptrdiff_t stride,
}
void DirectionalIntraPredictorZone2_NEON(
- void* const dest, const ptrdiff_t stride, const void* const top_row,
- const void* const left_column, const int width, const int height,
- const int xstep, const int ystep, const bool upsampled_top,
- const bool upsampled_left) {
+ void* LIBGAV1_RESTRICT const dest, const ptrdiff_t stride,
+ const void* LIBGAV1_RESTRICT const top_row,
+ const void* LIBGAV1_RESTRICT const left_column, const int width,
+ const int height, const int xstep, const int ystep,
+ const bool upsampled_top, const bool upsampled_left) {
// Increasing the negative buffer for this function allows more rows to be
// processed at a time without branching in an inner loop to check the base.
uint8_t top_buffer[288];
uint8_t left_buffer[288];
+#if LIBGAV1_MSAN
+ memset(top_buffer, 0, sizeof(top_buffer));
+ memset(left_buffer, 0, sizeof(left_buffer));
+#endif // LIBGAV1_MSAN
+
memcpy(top_buffer + 128, static_cast<const uint8_t*>(top_row) - 16, 160);
memcpy(left_buffer + 128, static_cast<const uint8_t*>(left_column) - 16, 160);
const uint8_t* top_ptr = top_buffer + 144;
@@ -793,12 +797,10 @@ void DirectionalIntraPredictorZone2_NEON(
}
}
-void DirectionalIntraPredictorZone3_NEON(void* const dest,
- const ptrdiff_t stride,
- const void* const left_column,
- const int width, const int height,
- const int ystep,
- const bool upsampled_left) {
+void DirectionalIntraPredictorZone3_NEON(
+ void* LIBGAV1_RESTRICT const dest, const ptrdiff_t stride,
+ const void* LIBGAV1_RESTRICT const left_column, const int width,
+ const int height, const int ystep, const bool upsampled_left) {
const auto* const left = static_cast<const uint8_t*>(left_column);
assert(ystep > 0);
@@ -819,7 +821,7 @@ void DirectionalIntraPredictorZone3_NEON(void* const dest,
do {
int x = 0;
do {
- uint8_t* dst = static_cast<uint8_t*>(dest);
+ auto* dst = static_cast<uint8_t*>(dest);
dst += y * stride + x;
uint8x8_t left_v[4], right_v[4], value_v[4];
const int ystep_base = ystep * x;
@@ -886,7 +888,7 @@ void DirectionalIntraPredictorZone3_NEON(void* const dest,
do {
int x = 0;
do {
- uint8_t* dst = static_cast<uint8_t*>(dest);
+ auto* dst = static_cast<uint8_t*>(dest);
dst += y * stride + x;
const int ystep_base = ystep * (x + 1);
@@ -934,7 +936,8 @@ inline uint16x8_t WeightedBlend(const uint16x8_t a, const uint16x8_t b,
}
// Each element of |dest| contains values associated with one weight value.
-inline void LoadEdgeVals(uint16x4x2_t* dest, const uint16_t* const source,
+inline void LoadEdgeVals(uint16x4x2_t* dest,
+ const uint16_t* LIBGAV1_RESTRICT const source,
const bool upsampled) {
if (upsampled) {
*dest = vld2_u16(source);
@@ -945,7 +948,8 @@ inline void LoadEdgeVals(uint16x4x2_t* dest, const uint16_t* const source,
}
// Each element of |dest| contains values associated with one weight value.
-inline void LoadEdgeVals(uint16x8x2_t* dest, const uint16_t* const source,
+inline void LoadEdgeVals(uint16x8x2_t* dest,
+ const uint16_t* LIBGAV1_RESTRICT const source,
const bool upsampled) {
if (upsampled) {
*dest = vld2q_u16(source);
@@ -956,8 +960,9 @@ inline void LoadEdgeVals(uint16x8x2_t* dest, const uint16_t* const source,
}
template <bool upsampled>
-inline void DirectionalZone1_4xH(uint16_t* dst, const ptrdiff_t stride,
- const int height, const uint16_t* const top,
+inline void DirectionalZone1_4xH(uint16_t* LIBGAV1_RESTRICT dst,
+ const ptrdiff_t stride, const int height,
+ const uint16_t* LIBGAV1_RESTRICT const top,
const int xstep) {
const int upsample_shift = static_cast<int>(upsampled);
const int index_scale_bits = 6 - upsample_shift;
@@ -1007,9 +1012,11 @@ inline void DirectionalZone1_4xH(uint16_t* dst, const ptrdiff_t stride,
// Process a multiple of 8 |width| by any |height|. Processes horizontally
// before vertically in the hopes of being a little more cache friendly.
template <bool upsampled>
-inline void DirectionalZone1_WxH(uint16_t* dst, const ptrdiff_t stride,
- const int width, const int height,
- const uint16_t* const top, const int xstep) {
+inline void DirectionalZone1_WxH(uint16_t* LIBGAV1_RESTRICT dst,
+ const ptrdiff_t stride, const int width,
+ const int height,
+ const uint16_t* LIBGAV1_RESTRICT const top,
+ const int xstep) {
assert(width % 8 == 0);
const int upsample_shift = static_cast<int>(upsampled);
const int index_scale_bits = 6 - upsample_shift;
@@ -1068,10 +1075,11 @@ inline void DirectionalZone1_WxH(uint16_t* dst, const ptrdiff_t stride,
// Process a multiple of 8 |width| by any |height|. Processes horizontally
// before vertically in the hopes of being a little more cache friendly.
-inline void DirectionalZone1_Large(uint16_t* dst, const ptrdiff_t stride,
- const int width, const int height,
- const uint16_t* const top, const int xstep,
- const bool upsampled) {
+inline void DirectionalZone1_Large(uint16_t* LIBGAV1_RESTRICT dst,
+ const ptrdiff_t stride, const int width,
+ const int height,
+ const uint16_t* LIBGAV1_RESTRICT const top,
+ const int xstep, const bool upsampled) {
assert(width % 8 == 0);
const int upsample_shift = static_cast<int>(upsampled);
const int index_scale_bits = 6 - upsample_shift;
@@ -1156,13 +1164,12 @@ inline void DirectionalZone1_Large(uint16_t* dst, const ptrdiff_t stride,
}
}
-void DirectionalIntraPredictorZone1_NEON(void* const dest, ptrdiff_t stride,
- const void* const top_row,
- const int width, const int height,
- const int xstep,
- const bool upsampled_top) {
- const uint16_t* const top = static_cast<const uint16_t*>(top_row);
- uint16_t* dst = static_cast<uint16_t*>(dest);
+void DirectionalIntraPredictorZone1_NEON(
+ void* LIBGAV1_RESTRICT const dest, ptrdiff_t stride,
+ const void* LIBGAV1_RESTRICT const top_row, const int width,
+ const int height, const int xstep, const bool upsampled_top) {
+ const auto* const top = static_cast<const uint16_t*>(top_row);
+ auto* dst = static_cast<uint16_t*>(dest);
stride /= sizeof(top[0]);
assert(xstep > 0);
@@ -1225,9 +1232,10 @@ void DirectionalIntraPredictorZone1_NEON(void* const dest, ptrdiff_t stride,
// 42 52 62 72 60 61 62 63
// 43 53 63 73 70 71 72 73
template <bool upsampled>
-inline void DirectionalZone3_4x4(uint8_t* dst, const ptrdiff_t stride,
- const uint16_t* const left, const int ystep,
- const int base_left_y = 0) {
+inline void DirectionalZone3_4x4(uint8_t* LIBGAV1_RESTRICT dst,
+ const ptrdiff_t stride,
+ const uint16_t* LIBGAV1_RESTRICT const left,
+ const int ystep, const int base_left_y = 0) {
const int upsample_shift = static_cast<int>(upsampled);
const int index_scale_bits = 6 - upsample_shift;
@@ -1278,8 +1286,9 @@ inline void DirectionalZone3_4x4(uint8_t* dst, const ptrdiff_t stride,
}
template <bool upsampled>
-inline void DirectionalZone3_4xH(uint8_t* dest, const ptrdiff_t stride,
- const int height, const uint16_t* const left,
+inline void DirectionalZone3_4xH(uint8_t* LIBGAV1_RESTRICT dest,
+ const ptrdiff_t stride, const int height,
+ const uint16_t* LIBGAV1_RESTRICT const left,
const int ystep) {
const int upsample_shift = static_cast<int>(upsampled);
int y = 0;
@@ -1292,8 +1301,9 @@ inline void DirectionalZone3_4xH(uint8_t* dest, const ptrdiff_t stride,
}
template <bool upsampled>
-inline void DirectionalZone3_Wx4(uint8_t* dest, const ptrdiff_t stride,
- const int width, const uint16_t* const left,
+inline void DirectionalZone3_Wx4(uint8_t* LIBGAV1_RESTRICT dest,
+ const ptrdiff_t stride, const int width,
+ const uint16_t* LIBGAV1_RESTRICT const left,
const int ystep) {
int x = 0;
int base_left_y = 0;
@@ -1308,9 +1318,10 @@ inline void DirectionalZone3_Wx4(uint8_t* dest, const ptrdiff_t stride,
}
template <bool upsampled>
-inline void DirectionalZone3_8x8(uint8_t* dest, const ptrdiff_t stride,
- const uint16_t* const left, const int ystep,
- const int base_left_y = 0) {
+inline void DirectionalZone3_8x8(uint8_t* LIBGAV1_RESTRICT dest,
+ const ptrdiff_t stride,
+ const uint16_t* LIBGAV1_RESTRICT const left,
+ const int ystep, const int base_left_y = 0) {
const int upsample_shift = static_cast<int>(upsampled);
const int index_scale_bits = 6 - upsample_shift;
@@ -1400,9 +1411,11 @@ inline void DirectionalZone3_8x8(uint8_t* dest, const ptrdiff_t stride,
}
template <bool upsampled>
-inline void DirectionalZone3_WxH(uint8_t* dest, const ptrdiff_t stride,
- const int width, const int height,
- const uint16_t* const left, const int ystep) {
+inline void DirectionalZone3_WxH(uint8_t* LIBGAV1_RESTRICT dest,
+ const ptrdiff_t stride, const int width,
+ const int height,
+ const uint16_t* LIBGAV1_RESTRICT const left,
+ const int ystep) {
const int upsample_shift = static_cast<int>(upsampled);
// Zone3 never runs out of left_column values.
assert((width + height - 1) << upsample_shift > // max_base_y
@@ -1424,14 +1437,12 @@ inline void DirectionalZone3_WxH(uint8_t* dest, const ptrdiff_t stride,
} while (y < height);
}
-void DirectionalIntraPredictorZone3_NEON(void* const dest,
- const ptrdiff_t stride,
- const void* const left_column,
- const int width, const int height,
- const int ystep,
- const bool upsampled_left) {
- const uint16_t* const left = static_cast<const uint16_t*>(left_column);
- uint8_t* dst = static_cast<uint8_t*>(dest);
+void DirectionalIntraPredictorZone3_NEON(
+ void* LIBGAV1_RESTRICT const dest, const ptrdiff_t stride,
+ const void* LIBGAV1_RESTRICT const left_column, const int width,
+ const int height, const int ystep, const bool upsampled_left) {
+ const auto* const left = static_cast<const uint16_t*>(left_column);
+ auto* dst = static_cast<uint8_t*>(dest);
if (ystep == 64) {
assert(!upsampled_left);
@@ -1472,10 +1483,672 @@ void DirectionalIntraPredictorZone3_NEON(void* const dest,
}
}
+// -----------------------------------------------------------------------------
+// Zone2
+// This function deals with cases not found in zone 1 or zone 3. The extreme
+// angles are 93, which makes for sharp ascents along |left_column| with each
+// successive dest row element until reaching |top_row|, and 177, with a shallow
+// ascent up |left_column| until reaching large jumps along |top_row|. In the
+// extremely steep cases, source vectors can only be loaded one lane at a time.
+
+// Fill |left| and |right| with the appropriate values for a given |base_step|.
+inline void LoadStepwise(const void* LIBGAV1_RESTRICT const source,
+ const uint8x8_t left_step, const uint8x8_t right_step,
+ uint16x4_t* left, uint16x4_t* right) {
+ const uint8x16x2_t mixed = {
+ vld1q_u8(static_cast<const uint8_t*>(source)),
+ vld1q_u8(static_cast<const uint8_t*>(source) + 16)};
+ *left = vreinterpret_u16_u8(VQTbl2U8(mixed, left_step));
+ *right = vreinterpret_u16_u8(VQTbl2U8(mixed, right_step));
+}
+
+inline void LoadStepwise(const void* LIBGAV1_RESTRICT const source,
+ const uint8x8_t left_step_0,
+ const uint8x8_t right_step_0,
+ const uint8x8_t left_step_1,
+ const uint8x8_t right_step_1, uint16x8_t* left,
+ uint16x8_t* right) {
+ const uint8x16x2_t mixed = {
+ vld1q_u8(static_cast<const uint8_t*>(source)),
+ vld1q_u8(static_cast<const uint8_t*>(source) + 16)};
+ const uint16x4_t left_low = vreinterpret_u16_u8(VQTbl2U8(mixed, left_step_0));
+ const uint16x4_t left_high =
+ vreinterpret_u16_u8(VQTbl2U8(mixed, left_step_1));
+ *left = vcombine_u16(left_low, left_high);
+ const uint16x4_t right_low =
+ vreinterpret_u16_u8(VQTbl2U8(mixed, right_step_0));
+ const uint16x4_t right_high =
+ vreinterpret_u16_u8(VQTbl2U8(mixed, right_step_1));
+ *right = vcombine_u16(right_low, right_high);
+}
+
+// Blend two values based on weight pairs that each sum to 32.
+inline uint16x4_t WeightedBlend(const uint16x4_t a, const uint16x4_t b,
+ const uint16x4_t a_weight,
+ const uint16x4_t b_weight) {
+ const uint16x4_t a_product = vmul_u16(a, a_weight);
+ const uint16x4_t sum = vmla_u16(a_product, b, b_weight);
+
+ return vrshr_n_u16(sum, 5 /*log2(32)*/);
+}
+
+// Blend two values based on weight pairs that each sum to 32.
+inline uint16x8_t WeightedBlend(const uint16x8_t a, const uint16x8_t b,
+ const uint16x8_t a_weight,
+ const uint16x8_t b_weight) {
+ const uint16x8_t a_product = vmulq_u16(a, a_weight);
+ const uint16x8_t sum = vmlaq_u16(a_product, b, b_weight);
+
+ return vrshrq_n_u16(sum, 5 /*log2(32)*/);
+}
+
+// Because the source values "move backwards" as the row index increases, the
+// indices derived from ystep are generally negative in localized functions.
+// This is accommodated by making sure the relative indices are within [-15, 0]
+// when the function is called, and sliding them into the inclusive range
+// [0, 15], relative to a lower base address. 15 is the Pixel offset, so 30 is
+// the byte offset for table lookups.
+
+constexpr int kPositiveIndexOffsetPixels = 15;
+constexpr int kPositiveIndexOffsetBytes = 30;
+
+inline void DirectionalZone2FromLeftCol_4xH(
+ uint8_t* LIBGAV1_RESTRICT dst, const ptrdiff_t stride, const int height,
+ const uint16_t* LIBGAV1_RESTRICT const left_column, const int16x4_t left_y,
+ const bool upsampled) {
+ const int upsample_shift = static_cast<int>(upsampled);
+
+ const int index_scale_bits = 6;
+ // The values in |offset_y| are negative, except for the first element, which
+ // is zero.
+ int16x4_t offset_y;
+ int16x4_t shift_upsampled = left_y;
+ // The shift argument must be a constant, otherwise use upsample_shift
+ // directly.
+ if (upsampled) {
+ offset_y = vshr_n_s16(left_y, index_scale_bits - 1 /*upsample_shift*/);
+ shift_upsampled = vshl_n_s16(shift_upsampled, 1);
+ } else {
+ offset_y = vshr_n_s16(left_y, index_scale_bits);
+ }
+ offset_y = vshl_n_s16(offset_y, 1);
+
+ // Select values to the left of the starting point.
+ // The 15th element (and 16th) will be all the way at the end, to the
+ // right. With a negative ystep everything else will be "left" of them.
+ // This supports cumulative steps up to 15. We could support up to 16 by
+ // doing separate loads for |left_values| and |right_values|. vtbl
+ // supports 2 Q registers as input which would allow for cumulative
+ // offsets of 32.
+ // |sampler_0| indexes the first byte of each 16-bit value.
+ const int16x4_t sampler_0 =
+ vadd_s16(offset_y, vdup_n_s16(kPositiveIndexOffsetBytes));
+ // |sampler_1| indexes the second byte of each 16-bit value.
+ const int16x4_t sampler_1 = vadd_s16(sampler_0, vdup_n_s16(1));
+ const int16x4x2_t sampler = vzip_s16(sampler_0, sampler_1);
+ const uint8x8_t left_indices =
+ vqmovun_s16(vcombine_s16(sampler.val[0], sampler.val[1]));
+ const uint8x8_t right_indices =
+ vadd_u8(left_indices, vdup_n_u8(sizeof(uint16_t)));
+
+ const int16x4_t shift_masked = vand_s16(shift_upsampled, vdup_n_s16(0x3f));
+ const uint16x4_t shift_0 = vreinterpret_u16_s16(vshr_n_s16(shift_masked, 1));
+ const uint16x4_t shift_1 = vsub_u16(vdup_n_u16(32), shift_0);
+
+ int y = 0;
+ do {
+ uint16x4_t src_left, src_right;
+ LoadStepwise(
+ left_column - kPositiveIndexOffsetPixels + (y << upsample_shift),
+ left_indices, right_indices, &src_left, &src_right);
+ const uint16x4_t val = WeightedBlend(src_left, src_right, shift_1, shift_0);
+
+ Store4(dst, val);
+ dst += stride;
+ } while (++y < height);
+}
+
+inline void DirectionalZone2FromLeftCol_8xH(
+ uint8_t* LIBGAV1_RESTRICT dst, const ptrdiff_t stride, const int height,
+ const uint16_t* LIBGAV1_RESTRICT const left_column, const int16x8_t left_y,
+ const bool upsampled) {
+ const int upsample_shift = static_cast<int>(upsampled);
+
+ const int index_scale_bits = 6;
+ // The values in |offset_y| are negative, except for the first element, which
+ // is zero.
+ int16x8_t offset_y = left_y;
+ int16x8_t shift_upsampled = left_y;
+ // The shift argument must be a constant, otherwise use upsample_shift
+ // directly.
+ if (upsampled) {
+ offset_y = vshrq_n_s16(left_y, index_scale_bits - 1);
+ shift_upsampled = vshlq_n_s16(shift_upsampled, 1);
+ } else {
+ offset_y = vshrq_n_s16(left_y, index_scale_bits);
+ }
+ offset_y = vshlq_n_s16(offset_y, 1);
+
+ // Select values to the left of the starting point.
+ // The 15th element (and 16th) will be all the way at the end, to the right.
+ // With a negative ystep everything else will be "left" of them.
+ // This supports cumulative steps up to 15. We could support up to 16 by doing
+ // separate loads for |left_values| and |right_values|. vtbl supports 2 Q
+ // registers as input which would allow for cumulative offsets of 32.
+ // |sampler_0| indexes the first byte of each 16-bit value.
+ const int16x8_t sampler_0 =
+ vaddq_s16(offset_y, vdupq_n_s16(kPositiveIndexOffsetBytes));
+ // |sampler_1| indexes the second byte of each 16-bit value.
+ const int16x8_t sampler_1 = vaddq_s16(sampler_0, vdupq_n_s16(1));
+ const int16x8x2_t sampler = vzipq_s16(sampler_0, sampler_1);
+ const uint8x8_t left_values_0 = vqmovun_s16(sampler.val[0]);
+ const uint8x8_t left_values_1 = vqmovun_s16(sampler.val[1]);
+ const uint8x8_t right_values_0 =
+ vadd_u8(left_values_0, vdup_n_u8(sizeof(uint16_t)));
+ const uint8x8_t right_values_1 =
+ vadd_u8(left_values_1, vdup_n_u8(sizeof(uint16_t)));
+
+ const int16x8_t shift_masked = vandq_s16(shift_upsampled, vdupq_n_s16(0x3f));
+ const uint16x8_t shift_0 =
+ vreinterpretq_u16_s16(vshrq_n_s16(shift_masked, 1));
+ const uint16x8_t shift_1 = vsubq_u16(vdupq_n_u16(32), shift_0);
+
+ int y = 0;
+ do {
+ uint16x8_t src_left, src_right;
+ LoadStepwise(
+ left_column - kPositiveIndexOffsetPixels + (y << upsample_shift),
+ left_values_0, right_values_0, left_values_1, right_values_1, &src_left,
+ &src_right);
+ const uint16x8_t val = WeightedBlend(src_left, src_right, shift_1, shift_0);
+
+ Store8(dst, val);
+ dst += stride;
+ } while (++y < height);
+}
+
+template <bool upsampled>
+inline void DirectionalZone1Blend_4xH(
+ uint8_t* LIBGAV1_RESTRICT dest, const ptrdiff_t stride, const int height,
+ const uint16_t* LIBGAV1_RESTRICT const top_row, int zone_bounds, int top_x,
+ const int xstep) {
+ const int upsample_shift = static_cast<int>(upsampled);
+ const int scale_bits_x = 6 - upsample_shift;
+
+ // Representing positions along the row, which |zone_bounds| will target for
+ // the blending boundary.
+ const int16x4_t indices = {0, 1, 2, 3};
+
+ uint16x4x2_t top_vals;
+ int y = height;
+ do {
+ const uint16_t* const src = top_row + (top_x >> scale_bits_x);
+ LoadEdgeVals(&top_vals, src, upsampled);
+
+ const uint16_t shift_0 = ((top_x << upsample_shift) & 0x3f) >> 1;
+ const uint16_t shift_1 = 32 - shift_0;
+
+ const uint16x4_t val =
+ WeightedBlend(top_vals.val[0], top_vals.val[1], shift_1, shift_0);
+
+ const uint16x4_t dst_blend = Load4U16(dest);
+ // |zone_bounds| values can be negative.
+ const uint16x4_t blend = vcge_s16(indices, vdup_n_s16(zone_bounds >> 6));
+ const uint16x4_t output = vbsl_u16(blend, val, dst_blend);
+
+ Store4(dest, output);
+ dest += stride;
+ zone_bounds += xstep;
+ top_x -= xstep;
+ } while (--y != 0);
+}
+
+template <bool upsampled>
+inline void DirectionalZone1Blend_8xH(
+ uint8_t* LIBGAV1_RESTRICT dest, const ptrdiff_t stride, const int height,
+ const uint16_t* LIBGAV1_RESTRICT const top_row, int zone_bounds, int top_x,
+ const int xstep) {
+ const int upsample_shift = static_cast<int>(upsampled);
+ const int scale_bits_x = 6 - upsample_shift;
+
+ // Representing positions along the row, which |zone_bounds| will target for
+ // the blending boundary.
+ const int16x8_t indices = {0, 1, 2, 3, 4, 5, 6, 7};
+
+ uint16x8x2_t top_vals;
+ int y = height;
+ do {
+ const uint16_t* const src = top_row + (top_x >> scale_bits_x);
+ LoadEdgeVals(&top_vals, src, upsampled);
+
+ const uint16_t shift_0 = ((top_x << upsample_shift) & 0x3f) >> 1;
+ const uint16_t shift_1 = 32 - shift_0;
+
+ const uint16x8_t val =
+ WeightedBlend(top_vals.val[0], top_vals.val[1], shift_1, shift_0);
+
+ const uint16x8_t dst_blend = Load8U16(dest);
+ // |zone_bounds| values can be negative.
+ const uint16x8_t blend = vcgeq_s16(indices, vdupq_n_s16(zone_bounds >> 6));
+ const uint16x8_t output = vbslq_u16(blend, val, dst_blend);
+
+ Store8(dest, output);
+ dest += stride;
+ zone_bounds += xstep;
+ top_x -= xstep;
+ } while (--y != 0);
+}
+
+// The height at which a load of 16 bytes will not contain enough source pixels
+// from |left_column| to supply an accurate row when computing 8 pixels at a
+// time. The values are found by inspection. By coincidence, all angles that
+// satisfy (ystep >> 6) == 2 map to the same value, so it is enough to look up
+// by ystep >> 6. The largest index for this lookup is 1023 >> 6 == 15. Indices
+// that do not correspond to angle derivatives are left at zero.
+// Notably, in cases with upsampling, the shuffle-invalid height is always
+// greater than the prediction height (which is 8 at maximum).
+constexpr int kDirectionalZone2ShuffleInvalidHeight[16] = {
+ 1024, 1024, 16, 16, 16, 16, 0, 0, 18, 0, 0, 0, 0, 0, 0, 40};
+
+// 7.11.2.4 (8) 90 < angle > 180
+// The strategy for these functions (4xH and 8+xH) is to know how many blocks
+// can be processed with just pixels from |top_ptr|, then handle mixed blocks,
+// then handle only blocks that take from |left_ptr|. Additionally, a fast
+// index-shuffle approach is used for pred values from |left_column| in sections
+// that permit it.
+template <bool upsampled_top, bool upsampled_left>
+inline void DirectionalZone2_4xH(
+ uint8_t* LIBGAV1_RESTRICT dst, const ptrdiff_t stride,
+ const uint16_t* LIBGAV1_RESTRICT const top_row,
+ const uint16_t* LIBGAV1_RESTRICT const left_column, const int height,
+ const int xstep, const int ystep) {
+ const int upsample_left_shift = static_cast<int>(upsampled_left);
+
+ // Helper vector for index computation.
+ const int16x4_t zero_to_three = {0, 1, 2, 3};
+
+ // Loop increments for moving by block (4xN). Vertical still steps by 8. If
+ // it's only 4, it will be finished in the first iteration.
+ const ptrdiff_t stride8 = stride << 3;
+ const int xstep8 = xstep << 3;
+
+ const int min_height = (height == 4) ? 4 : 8;
+
+ // All columns from |min_top_only_x| to the right will only need |top_row| to
+ // compute and can therefore call the Zone1 functions. This assumes |xstep| is
+ // at least 3.
+ assert(xstep >= 3);
+
+ // Offsets the original zone bound value to simplify x < (y+1)*xstep/64 -1
+ int xstep_bounds_base = (xstep == 64) ? 0 : xstep - 1;
+
+ const int left_base_increment = ystep >> 6;
+ const int ystep_remainder = ystep & 0x3F;
+
+ // If the 64 scaling is regarded as a decimal point, the first value of the
+ // left_y vector omits the portion which is covered under the left_column
+ // offset. The following values need the full ystep as a relative offset.
+ const int16x4_t left_y =
+ vmla_n_s16(vdup_n_s16(-ystep_remainder), zero_to_three, -ystep);
+
+ // This loop treats the 4 columns in 3 stages with y-value boundaries.
+ // The first stage, before the first y-loop, covers blocks that are only
+ // computed from the top row. The second stage, comprising two y-loops, covers
+ // blocks that have a mixture of values computed from top or left. The final
+ // stage covers blocks that are only computed from the left.
+ // Round down to the nearest multiple of 8.
+ // TODO(petersonab): Check if rounding to the nearest 4 is okay.
+ const int max_top_only_y = std::min((1 << 6) / xstep, height) & ~7;
+ DirectionalZone1_4xH<upsampled_top>(reinterpret_cast<uint16_t*>(dst),
+ stride >> 1, max_top_only_y, top_row,
+ -xstep);
+
+ if (max_top_only_y == height) return;
+
+ int y = max_top_only_y;
+ dst += stride * y;
+ const int xstep_y = xstep * y;
+
+ // All rows from |min_left_only_y| down for this set of columns only need
+ // |left_column| to compute.
+ const int min_left_only_y = std::min((4 /*width*/ << 6) / xstep, height);
+ int xstep_bounds = xstep_bounds_base + xstep_y;
+ int top_x = -xstep - xstep_y;
+
+ // +8 increment is OK because if height is 4 this only runs once.
+ for (; y < min_left_only_y;
+ y += 8, dst += stride8, xstep_bounds += xstep8, top_x -= xstep8) {
+ DirectionalZone2FromLeftCol_4xH(
+ dst, stride, min_height,
+ left_column + ((y - left_base_increment) << upsample_left_shift),
+ left_y, upsampled_left);
+
+ DirectionalZone1Blend_4xH<upsampled_top>(dst, stride, min_height, top_row,
+ xstep_bounds, top_x, xstep);
+ }
+
+ // Loop over y for left-only rows.
+ for (; y < height; y += 8, dst += stride8) {
+ // Angle expected by Zone3 is flipped about the 180 degree vector, which
+ // is the x-axis.
+ DirectionalZone3_4xH<upsampled_left>(
+ dst, stride, min_height, left_column + (y << upsample_left_shift),
+ -ystep);
+ }
+}
+
+// Process 8x4 and 16x4 blocks. This avoids a lot of overhead and simplifies
+// address safety.
+template <bool upsampled_top, bool upsampled_left>
+inline void DirectionalZone2_Wx4(
+ uint8_t* LIBGAV1_RESTRICT const dst, const ptrdiff_t stride,
+ const uint16_t* LIBGAV1_RESTRICT const top_row,
+ const uint16_t* LIBGAV1_RESTRICT const left_column, const int width,
+ const int xstep, const int ystep) {
+ const int upsample_top_shift = static_cast<int>(upsampled_top);
+ // Offsets the original zone bound value to simplify x < (y+1)*xstep/64 -1
+ int xstep_bounds_base = (xstep == 64) ? 0 : xstep - 1;
+
+ const int min_top_only_x = std::min((4 * xstep) >> 6, width);
+ int x = 0;
+ for (; x < min_top_only_x; x += 4, xstep_bounds_base -= (4 << 6)) {
+ uint8_t* dst_x = dst + x * sizeof(uint16_t);
+
+ // Round down to the nearest multiple of 4.
+ const int max_top_only_y = (((x + 1) << 6) / xstep) & ~3;
+ if (max_top_only_y != 0) {
+ DirectionalZone1_4xH<upsampled_top>(
+ reinterpret_cast<uint16_t*>(dst_x), stride >> 1, 4,
+ top_row + (x << upsample_top_shift), -xstep);
+ continue;
+ }
+
+ DirectionalZone3_4x4<upsampled_left>(dst_x, stride, left_column, -ystep,
+ -ystep * x);
+
+ const int min_left_only_y = ((x + 4) << 6) / xstep;
+ if (min_left_only_y != 0) {
+ const int top_x = -xstep;
+ DirectionalZone1Blend_4xH<upsampled_top>(
+ dst_x, stride, 4, top_row + (x << upsample_top_shift),
+ xstep_bounds_base, top_x, xstep);
+ }
+ }
+ // Reached |min_top_only_x|.
+ for (; x < width; x += 4) {
+ DirectionalZone1_4xH<upsampled_top>(
+ reinterpret_cast<uint16_t*>(dst) + x, stride >> 1, 4,
+ top_row + (x << upsample_top_shift), -xstep);
+ }
+}
+
+// Process a multiple of 8 |width|.
+template <bool upsampled_top, bool upsampled_left>
+inline void DirectionalZone2_8(
+ uint8_t* LIBGAV1_RESTRICT const dst, const ptrdiff_t stride,
+ const uint16_t* LIBGAV1_RESTRICT const top_row,
+ const uint16_t* LIBGAV1_RESTRICT const left_column, const int width,
+ const int height, const int xstep, const int ystep) {
+ if (height == 4) {
+ DirectionalZone2_Wx4<upsampled_top, upsampled_left>(
+ dst, stride, top_row, left_column, width, xstep, ystep);
+ return;
+ }
+ const int upsample_left_shift = static_cast<int>(upsampled_left);
+ const int upsample_top_shift = static_cast<int>(upsampled_top);
+
+ // Helper vector.
+ const int16x8_t zero_to_seven = {0, 1, 2, 3, 4, 5, 6, 7};
+
+ // Loop increments for moving by block (8x8). This function handles blocks
+ // with height 4 as well. They are calculated in one pass so these variables
+ // do not get used.
+ const ptrdiff_t stride8 = stride << 3;
+ const int xstep8 = xstep << 3;
+ const int ystep8 = ystep << 3;
+
+ // All columns from |min_top_only_x| to the right will only need |top_row| to
+ // compute and can therefore call the Zone1 functions. This assumes |xstep| is
+ // at least 3.
+ assert(xstep >= 3);
+ const int min_top_only_x = std::min((height * xstep) >> 6, width);
+
+ // For steep angles, the source pixels from |left_column| may not fit in a
+ // 16-byte load for shuffling.
+ // TODO(petersonab): Find a more precise formula for this subject to x.
+ const int max_shuffle_height =
+ std::min(kDirectionalZone2ShuffleInvalidHeight[ystep >> 6], height);
+
+ // Offsets the original zone bound value to simplify x < (y+1)*xstep/64 -1
+ int xstep_bounds_base = (xstep == 64) ? 0 : xstep - 1;
+
+ const int left_base_increment = ystep >> 6;
+ const int ystep_remainder = ystep & 0x3F;
+
+ const int left_base_increment8 = ystep8 >> 6;
+ const int ystep_remainder8 = ystep8 & 0x3F;
+ const int16x8_t increment_left8 = vdupq_n_s16(ystep_remainder8);
+
+ // If the 64 scaling is regarded as a decimal point, the first value of the
+ // left_y vector omits the portion which is covered under the left_column
+ // offset. Following values need the full ystep as a relative offset.
+ int16x8_t left_y =
+ vmlaq_n_s16(vdupq_n_s16(-ystep_remainder), zero_to_seven, -ystep);
+
+ // This loop treats each set of 4 columns in 3 stages with y-value boundaries.
+ // The first stage, before the first y-loop, covers blocks that are only
+ // computed from the top row. The second stage, comprising two y-loops, covers
+ // blocks that have a mixture of values computed from top or left. The final
+ // stage covers blocks that are only computed from the left.
+ int x = 0;
+ for (int left_offset = -left_base_increment; x < min_top_only_x; x += 8,
+ xstep_bounds_base -= (8 << 6),
+ left_y = vsubq_s16(left_y, increment_left8),
+ left_offset -= left_base_increment8) {
+ uint8_t* dst_x = dst + x * sizeof(uint16_t);
+
+ // Round down to the nearest multiple of 8.
+ const int max_top_only_y = std::min(((x + 1) << 6) / xstep, height) & ~7;
+ DirectionalZone1_WxH<upsampled_top>(
+ reinterpret_cast<uint16_t*>(dst_x), stride >> 1, 8, max_top_only_y,
+ top_row + (x << upsample_top_shift), -xstep);
+
+ if (max_top_only_y == height) continue;
+
+ int y = max_top_only_y;
+ dst_x += stride * y;
+ const int xstep_y = xstep * y;
+
+ // All rows from |min_left_only_y| down for this set of columns only need
+ // |left_column| to compute.
+ const int min_left_only_y = std::min(((x + 8) << 6) / xstep, height);
+ // At high angles such that min_left_only_y < 8, ystep is low and xstep is
+ // high. This means that max_shuffle_height is unbounded and xstep_bounds
+ // will overflow in 16 bits. This is prevented by stopping the first
+ // blending loop at min_left_only_y for such cases, which means we skip over
+ // the second blending loop as well.
+ const int left_shuffle_stop_y =
+ std::min(max_shuffle_height, min_left_only_y);
+ int xstep_bounds = xstep_bounds_base + xstep_y;
+ int top_x = -xstep - xstep_y;
+
+ for (; y < left_shuffle_stop_y;
+ y += 8, dst_x += stride8, xstep_bounds += xstep8, top_x -= xstep8) {
+ DirectionalZone2FromLeftCol_8xH(
+ dst_x, stride, 8,
+ left_column + ((left_offset + y) << upsample_left_shift), left_y,
+ upsample_left_shift);
+
+ DirectionalZone1Blend_8xH<upsampled_top>(
+ dst_x, stride, 8, top_row + (x << upsample_top_shift), xstep_bounds,
+ top_x, xstep);
+ }
+
+ // Pick up from the last y-value, using the slower but secure method for
+ // left prediction.
+ for (; y < min_left_only_y;
+ y += 8, dst_x += stride8, xstep_bounds += xstep8, top_x -= xstep8) {
+ DirectionalZone3_8x8<upsampled_left>(
+ dst_x, stride, left_column + (y << upsample_left_shift), -ystep,
+ -ystep * x);
+
+ DirectionalZone1Blend_8xH<upsampled_top>(
+ dst_x, stride, 8, top_row + (x << upsample_top_shift), xstep_bounds,
+ top_x, xstep);
+ }
+ // Loop over y for left_only rows.
+ for (; y < height; y += 8, dst_x += stride8) {
+ DirectionalZone3_8x8<upsampled_left>(
+ dst_x, stride, left_column + (y << upsample_left_shift), -ystep,
+ -ystep * x);
+ }
+ }
+ // Reached |min_top_only_x|.
+ if (x < width) {
+ DirectionalZone1_WxH<upsampled_top>(
+ reinterpret_cast<uint16_t*>(dst) + x, stride >> 1, width - x, height,
+ top_row + (x << upsample_top_shift), -xstep);
+ }
+}
+
+// At this angle, neither edges are upsampled.
+// |min_width| is either 4 or 8.
+template <int min_width>
+void DirectionalAngle135(uint8_t* LIBGAV1_RESTRICT dst, const ptrdiff_t stride,
+ const uint16_t* LIBGAV1_RESTRICT const top,
+ const uint16_t* LIBGAV1_RESTRICT const left,
+ const int width, const int height) {
+ // y = 0 is more trivial than the other rows.
+ memcpy(dst, top - 1, width * sizeof(top[0]));
+ dst += stride;
+
+ // If |height| > |width|, then there is a point at which top_row is no longer
+ // used in each row.
+ const int min_left_only_y = std::min(width, height);
+
+ int y = 1;
+ do {
+ // Example: If y is 4 (min_width), the dest row starts with left[3],
+ // left[2], left[1], left[0], because the angle points up. Therefore, load
+ // starts at left[0] and is then reversed. If y is 2, the load starts at
+ // left[-2], and is reversed to store left[1], left[0], with negative values
+ // overwritten from |top_row|.
+ const uint16_t* const load_left = left + y - min_width;
+ auto* dst16 = reinterpret_cast<uint16_t*>(dst);
+
+ // Some values will be overwritten when |y| is not a multiple of
+ // |min_width|.
+ if (min_width == 4) {
+ const uint16x4_t left_toward_corner = vrev64_u16(vld1_u16(load_left));
+ vst1_u16(dst16, left_toward_corner);
+ } else {
+ int x = 0;
+ do {
+ const uint16x8_t left_toward_corner =
+ vrev64q_u16(vld1q_u16(load_left - x));
+ vst1_u16(dst16 + x, vget_high_u16(left_toward_corner));
+ vst1_u16(dst16 + x + 4, vget_low_u16(left_toward_corner));
+ x += 8;
+ } while (x < y);
+ }
+ // Entering |top|.
+ memcpy(dst16 + y, top - 1, (width - y) * sizeof(top[0]));
+ dst += stride;
+ } while (++y < min_left_only_y);
+
+ // Left only.
+ for (; y < height; ++y, dst += stride) {
+ auto* dst16 = reinterpret_cast<uint16_t*>(dst);
+ const uint16_t* const load_left = left + y - min_width;
+
+ int x = 0;
+ if (min_width == 4) {
+ const uint16x4_t left_toward_corner = vrev64_u16(vld1_u16(load_left - x));
+ vst1_u16(dst16 + x, left_toward_corner);
+ } else {
+ do {
+ const uint16x8_t left_toward_corner =
+ vrev64q_u16(vld1q_u16(load_left - x));
+ vst1_u16(dst16 + x, vget_high_u16(left_toward_corner));
+ vst1_u16(dst16 + x + 4, vget_low_u16(left_toward_corner));
+ x += 8;
+ } while (x < width);
+ }
+ }
+}
+
+void DirectionalIntraPredictorZone2_NEON(
+ void* LIBGAV1_RESTRICT dest, const ptrdiff_t stride,
+ const void* LIBGAV1_RESTRICT const top_row,
+ const void* LIBGAV1_RESTRICT const left_column, const int width,
+ const int height, const int xstep, const int ystep,
+ const bool upsampled_top, const bool upsampled_left) {
+ // Increasing the negative buffer for this function allows more rows to be
+ // processed at a time without branching in an inner loop to check the base.
+ uint16_t top_buffer[288];
+ uint16_t left_buffer[288];
+#if LIBGAV1_MSAN
+ memset(top_buffer, 0, sizeof(top_buffer));
+ memset(left_buffer, 0, sizeof(left_buffer));
+#endif // LIBGAV1_MSAN
+ memcpy(top_buffer + 128, static_cast<const uint16_t*>(top_row) - 16, 160);
+ memcpy(left_buffer + 128, static_cast<const uint16_t*>(left_column) - 16,
+ 160);
+ const uint16_t* top_ptr = top_buffer + 144;
+ const uint16_t* left_ptr = left_buffer + 144;
+ auto* dst = static_cast<uint8_t*>(dest);
+
+ if (width == 4) {
+ if (xstep == 64) {
+ assert(ystep == 64);
+ DirectionalAngle135<4>(dst, stride, top_ptr, left_ptr, width, height);
+ return;
+ }
+ if (upsampled_top) {
+ if (upsampled_left) {
+ DirectionalZone2_4xH<true, true>(dst, stride, top_ptr, left_ptr, height,
+ xstep, ystep);
+ } else {
+ DirectionalZone2_4xH<true, false>(dst, stride, top_ptr, left_ptr,
+ height, xstep, ystep);
+ }
+ } else if (upsampled_left) {
+ DirectionalZone2_4xH<false, true>(dst, stride, top_ptr, left_ptr, height,
+ xstep, ystep);
+ } else {
+ DirectionalZone2_4xH<false, false>(dst, stride, top_ptr, left_ptr, height,
+ xstep, ystep);
+ }
+ return;
+ }
+
+ if (xstep == 64) {
+ assert(ystep == 64);
+ DirectionalAngle135<8>(dst, stride, top_ptr, left_ptr, width, height);
+ return;
+ }
+ if (upsampled_top) {
+ if (upsampled_left) {
+ DirectionalZone2_8<true, true>(dst, stride, top_ptr, left_ptr, width,
+ height, xstep, ystep);
+ } else {
+ DirectionalZone2_8<true, false>(dst, stride, top_ptr, left_ptr, width,
+ height, xstep, ystep);
+ }
+ } else if (upsampled_left) {
+ DirectionalZone2_8<false, true>(dst, stride, top_ptr, left_ptr, width,
+ height, xstep, ystep);
+ } else {
+ DirectionalZone2_8<false, false>(dst, stride, top_ptr, left_ptr, width,
+ height, xstep, ystep);
+ }
+}
+
void Init10bpp() {
Dsp* const dsp = dsp_internal::GetWritableDspTable(kBitdepth10);
assert(dsp != nullptr);
dsp->directional_intra_predictor_zone1 = DirectionalIntraPredictorZone1_NEON;
+ dsp->directional_intra_predictor_zone2 = DirectionalIntraPredictorZone2_NEON;
dsp->directional_intra_predictor_zone3 = DirectionalIntraPredictorZone3_NEON;
}
diff --git a/src/dsp/arm/intrapred_directional_neon.h b/src/dsp/arm/intrapred_directional_neon.h
index f7d6235..310d90b 100644
--- a/src/dsp/arm/intrapred_directional_neon.h
+++ b/src/dsp/arm/intrapred_directional_neon.h
@@ -47,6 +47,10 @@ void IntraPredDirectionalInit_NEON();
#define LIBGAV1_Dsp10bpp_DirectionalIntraPredictorZone1 LIBGAV1_CPU_NEON
#endif
+#ifndef LIBGAV1_Dsp10bpp_DirectionalIntraPredictorZone2
+#define LIBGAV1_Dsp10bpp_DirectionalIntraPredictorZone2 LIBGAV1_CPU_NEON
+#endif
+
#ifndef LIBGAV1_Dsp10bpp_DirectionalIntraPredictorZone3
#define LIBGAV1_Dsp10bpp_DirectionalIntraPredictorZone3 LIBGAV1_CPU_NEON
#endif
diff --git a/src/dsp/arm/intrapred_filter_neon.cc b/src/dsp/arm/intrapred_filter_neon.cc
index bd9f61d..70bd62b 100644
--- a/src/dsp/arm/intrapred_filter_neon.cc
+++ b/src/dsp/arm/intrapred_filter_neon.cc
@@ -85,17 +85,18 @@ alignas(8) constexpr uint8_t kTransposedTaps[kNumFilterIntraPredictors][7][8] =
{14, 12, 11, 10, 0, 0, 1, 1},
{0, 0, 0, 0, 14, 12, 11, 9}}};
-void FilterIntraPredictor_NEON(void* const dest, ptrdiff_t stride,
- const void* const top_row,
- const void* const left_column,
+void FilterIntraPredictor_NEON(void* LIBGAV1_RESTRICT const dest,
+ ptrdiff_t stride,
+ const void* LIBGAV1_RESTRICT const top_row,
+ const void* LIBGAV1_RESTRICT const left_column,
FilterIntraPredictor pred, int width,
int height) {
- const uint8_t* const top = static_cast<const uint8_t*>(top_row);
- const uint8_t* const left = static_cast<const uint8_t*>(left_column);
+ const auto* const top = static_cast<const uint8_t*>(top_row);
+ const auto* const left = static_cast<const uint8_t*>(left_column);
assert(width <= 32 && height <= 32);
- uint8_t* dst = static_cast<uint8_t*>(dest);
+ auto* dst = static_cast<uint8_t*>(dest);
uint8x8_t transposed_taps[7];
for (int i = 0; i < 7; ++i) {
@@ -160,7 +161,136 @@ void Init8bpp() {
} // namespace
} // namespace low_bitdepth
-void IntraPredFilterInit_NEON() { low_bitdepth::Init8bpp(); }
+//------------------------------------------------------------------------------
+#if LIBGAV1_MAX_BITDEPTH >= 10
+namespace high_bitdepth {
+namespace {
+
+alignas(kMaxAlignment) constexpr int16_t
+ kTransposedTaps[kNumFilterIntraPredictors][7][8] = {
+ {{-6, -5, -3, -3, -4, -3, -3, -3},
+ {10, 2, 1, 1, 6, 2, 2, 1},
+ {0, 10, 1, 1, 0, 6, 2, 2},
+ {0, 0, 10, 2, 0, 0, 6, 2},
+ {0, 0, 0, 10, 0, 0, 0, 6},
+ {12, 9, 7, 5, 2, 2, 2, 3},
+ {0, 0, 0, 0, 12, 9, 7, 5}},
+ {{-10, -6, -4, -2, -10, -6, -4, -2},
+ {16, 0, 0, 0, 16, 0, 0, 0},
+ {0, 16, 0, 0, 0, 16, 0, 0},
+ {0, 0, 16, 0, 0, 0, 16, 0},
+ {0, 0, 0, 16, 0, 0, 0, 16},
+ {10, 6, 4, 2, 0, 0, 0, 0},
+ {0, 0, 0, 0, 10, 6, 4, 2}},
+ {{-8, -8, -8, -8, -4, -4, -4, -4},
+ {8, 0, 0, 0, 4, 0, 0, 0},
+ {0, 8, 0, 0, 0, 4, 0, 0},
+ {0, 0, 8, 0, 0, 0, 4, 0},
+ {0, 0, 0, 8, 0, 0, 0, 4},
+ {16, 16, 16, 16, 0, 0, 0, 0},
+ {0, 0, 0, 0, 16, 16, 16, 16}},
+ {{-2, -1, -1, -0, -1, -1, -1, -1},
+ {8, 3, 2, 1, 4, 3, 2, 2},
+ {0, 8, 3, 2, 0, 4, 3, 2},
+ {0, 0, 8, 3, 0, 0, 4, 3},
+ {0, 0, 0, 8, 0, 0, 0, 4},
+ {10, 6, 4, 2, 3, 4, 4, 3},
+ {0, 0, 0, 0, 10, 6, 4, 3}},
+ {{-12, -10, -9, -8, -10, -9, -8, -7},
+ {14, 0, 0, 0, 12, 1, 0, 0},
+ {0, 14, 0, 0, 0, 12, 0, 0},
+ {0, 0, 14, 0, 0, 0, 12, 1},
+ {0, 0, 0, 14, 0, 0, 0, 12},
+ {14, 12, 11, 10, 0, 0, 1, 1},
+ {0, 0, 0, 0, 14, 12, 11, 9}}};
+
+void FilterIntraPredictor_NEON(void* LIBGAV1_RESTRICT const dest,
+ ptrdiff_t stride,
+ const void* LIBGAV1_RESTRICT const top_row,
+ const void* LIBGAV1_RESTRICT const left_column,
+ FilterIntraPredictor pred, int width,
+ int height) {
+ const auto* const top = static_cast<const uint16_t*>(top_row);
+ const auto* const left = static_cast<const uint16_t*>(left_column);
+
+ assert(width <= 32 && height <= 32);
+
+ auto* dst = static_cast<uint16_t*>(dest);
+
+ stride >>= 1;
+
+ int16x8_t transposed_taps[7];
+ for (int i = 0; i < 7; ++i) {
+ transposed_taps[i] = vld1q_s16(kTransposedTaps[pred][i]);
+ }
+
+ uint16_t relative_top_left = top[-1];
+ const uint16_t* relative_top = top;
+ uint16_t relative_left[2] = {left[0], left[1]};
+
+ int y = 0;
+ do {
+ uint16_t* row_dst = dst;
+ int x = 0;
+ do {
+ int16x8_t sum =
+ vmulq_s16(transposed_taps[0],
+ vreinterpretq_s16_u16(vdupq_n_u16(relative_top_left)));
+ for (int i = 1; i < 5; ++i) {
+ sum =
+ vmlaq_s16(sum, transposed_taps[i],
+ vreinterpretq_s16_u16(vdupq_n_u16(relative_top[i - 1])));
+ }
+ for (int i = 5; i < 7; ++i) {
+ sum =
+ vmlaq_s16(sum, transposed_taps[i],
+ vreinterpretq_s16_u16(vdupq_n_u16(relative_left[i - 5])));
+ }
+
+ const int16x8_t sum_shifted = vrshrq_n_s16(sum, 4);
+ const uint16x8_t sum_saturated = vminq_u16(
+ vreinterpretq_u16_s16(vmaxq_s16(sum_shifted, vdupq_n_s16(0))),
+ vdupq_n_u16((1 << kBitdepth10) - 1));
+
+ vst1_u16(row_dst, vget_low_u16(sum_saturated));
+ vst1_u16(row_dst + stride, vget_high_u16(sum_saturated));
+
+ // Progress across
+ relative_top_left = relative_top[3];
+ relative_top += 4;
+ relative_left[0] = row_dst[3];
+ relative_left[1] = row_dst[3 + stride];
+ row_dst += 4;
+ x += 4;
+ } while (x < width);
+
+ // Progress down.
+ relative_top_left = left[y + 1];
+ relative_top = dst + stride;
+ relative_left[0] = left[y + 2];
+ relative_left[1] = left[y + 3];
+
+ dst += 2 * stride;
+ y += 2;
+ } while (y < height);
+}
+
+void Init10bpp() {
+ Dsp* dsp = dsp_internal::GetWritableDspTable(kBitdepth10);
+ assert(dsp != nullptr);
+ dsp->filter_intra_predictor = FilterIntraPredictor_NEON;
+}
+
+} // namespace
+} // namespace high_bitdepth
+#endif // LIBGAV1_MAX_BITDEPTH >= 10
+
+void IntraPredFilterInit_NEON() {
+ low_bitdepth::Init8bpp();
+#if LIBGAV1_MAX_BITDEPTH >= 10
+ high_bitdepth::Init10bpp();
+#endif
+}
} // namespace dsp
} // namespace libgav1
diff --git a/src/dsp/arm/intrapred_filter_neon.h b/src/dsp/arm/intrapred_filter_neon.h
index 283c1b1..d005f4c 100644
--- a/src/dsp/arm/intrapred_filter_neon.h
+++ b/src/dsp/arm/intrapred_filter_neon.h
@@ -32,6 +32,8 @@ void IntraPredFilterInit_NEON();
#if LIBGAV1_ENABLE_NEON
#define LIBGAV1_Dsp8bpp_FilterIntraPredictor LIBGAV1_CPU_NEON
+
+#define LIBGAV1_Dsp10bpp_FilterIntraPredictor LIBGAV1_CPU_NEON
#endif // LIBGAV1_ENABLE_NEON
#endif // LIBGAV1_SRC_DSP_ARM_INTRAPRED_FILTER_NEON_H_
diff --git a/src/dsp/arm/intrapred_neon.cc b/src/dsp/arm/intrapred_neon.cc
index c143648..cd47a22 100644
--- a/src/dsp/arm/intrapred_neon.cc
+++ b/src/dsp/arm/intrapred_neon.cc
@@ -26,6 +26,7 @@
#include "src/dsp/arm/common_neon.h"
#include "src/dsp/constants.h"
#include "src/dsp/dsp.h"
+#include "src/utils/common.h"
#include "src/utils/constants.h"
namespace libgav1 {
@@ -56,10 +57,10 @@ struct DcPredFuncs_NEON {
template <int block_width_log2, int block_height_log2, DcSumFunc sumfn,
DcStoreFunc storefn>
-void DcPredFuncs_NEON<block_width_log2, block_height_log2, sumfn,
- storefn>::DcTop(void* const dest, ptrdiff_t stride,
- const void* const top_row,
- const void* /*left_column*/) {
+void DcPredFuncs_NEON<block_width_log2, block_height_log2, sumfn, storefn>::
+ DcTop(void* LIBGAV1_RESTRICT const dest, ptrdiff_t stride,
+ const void* LIBGAV1_RESTRICT const top_row,
+ const void* /*left_column*/) {
const uint32x2_t sum = sumfn(top_row, block_width_log2, false, nullptr, 0);
const uint32x2_t dc = vrshr_n_u32(sum, block_width_log2);
storefn(dest, stride, dc);
@@ -67,10 +68,10 @@ void DcPredFuncs_NEON<block_width_log2, block_height_log2, sumfn,
template <int block_width_log2, int block_height_log2, DcSumFunc sumfn,
DcStoreFunc storefn>
-void DcPredFuncs_NEON<block_width_log2, block_height_log2, sumfn,
- storefn>::DcLeft(void* const dest, ptrdiff_t stride,
- const void* /*top_row*/,
- const void* const left_column) {
+void DcPredFuncs_NEON<block_width_log2, block_height_log2, sumfn, storefn>::
+ DcLeft(void* LIBGAV1_RESTRICT const dest, ptrdiff_t stride,
+ const void* /*top_row*/,
+ const void* LIBGAV1_RESTRICT const left_column) {
const uint32x2_t sum =
sumfn(left_column, block_height_log2, false, nullptr, 0);
const uint32x2_t dc = vrshr_n_u32(sum, block_height_log2);
@@ -80,8 +81,9 @@ void DcPredFuncs_NEON<block_width_log2, block_height_log2, sumfn,
template <int block_width_log2, int block_height_log2, DcSumFunc sumfn,
DcStoreFunc storefn>
void DcPredFuncs_NEON<block_width_log2, block_height_log2, sumfn, storefn>::Dc(
- void* const dest, ptrdiff_t stride, const void* const top_row,
- const void* const left_column) {
+ void* LIBGAV1_RESTRICT const dest, ptrdiff_t stride,
+ const void* LIBGAV1_RESTRICT const top_row,
+ const void* LIBGAV1_RESTRICT const left_column) {
const uint32x2_t sum =
sumfn(top_row, block_width_log2, true, left_column, block_height_log2);
if (block_width_log2 == block_height_log2) {
@@ -154,92 +156,116 @@ inline uint16x8_t LoadAndAdd64(const uint8_t* buf) {
// If |use_ref_1| is false then only sum |ref_0|.
// For |ref[01]_size_log2| == 4 this relies on |ref_[01]| being aligned to
// uint32_t.
-inline uint32x2_t DcSum_NEON(const void* ref_0, const int ref_0_size_log2,
- const bool use_ref_1, const void* ref_1,
+inline uint32x2_t DcSum_NEON(const void* LIBGAV1_RESTRICT ref_0,
+ const int ref_0_size_log2, const bool use_ref_1,
+ const void* LIBGAV1_RESTRICT ref_1,
const int ref_1_size_log2) {
const auto* const ref_0_u8 = static_cast<const uint8_t*>(ref_0);
const auto* const ref_1_u8 = static_cast<const uint8_t*>(ref_1);
if (ref_0_size_log2 == 2) {
uint8x8_t val = Load4(ref_0_u8);
if (use_ref_1) {
- if (ref_1_size_log2 == 2) { // 4x4
- val = Load4<1>(ref_1_u8, val);
- return Sum(vpaddl_u8(val));
- } else if (ref_1_size_log2 == 3) { // 4x8
- const uint8x8_t val_1 = vld1_u8(ref_1_u8);
- const uint16x4_t sum_0 = vpaddl_u8(val);
- const uint16x4_t sum_1 = vpaddl_u8(val_1);
- return Sum(vadd_u16(sum_0, sum_1));
- } else if (ref_1_size_log2 == 4) { // 4x16
- const uint8x16_t val_1 = vld1q_u8(ref_1_u8);
- return Sum(vaddw_u8(vpaddlq_u8(val_1), val));
+ switch (ref_1_size_log2) {
+ case 2: { // 4x4
+ val = Load4<1>(ref_1_u8, val);
+ return Sum(vpaddl_u8(val));
+ }
+ case 3: { // 4x8
+ const uint8x8_t val_1 = vld1_u8(ref_1_u8);
+ const uint16x4_t sum_0 = vpaddl_u8(val);
+ const uint16x4_t sum_1 = vpaddl_u8(val_1);
+ return Sum(vadd_u16(sum_0, sum_1));
+ }
+ case 4: { // 4x16
+ const uint8x16_t val_1 = vld1q_u8(ref_1_u8);
+ return Sum(vaddw_u8(vpaddlq_u8(val_1), val));
+ }
}
}
// 4x1
const uint16x4_t sum = vpaddl_u8(val);
return vpaddl_u16(sum);
- } else if (ref_0_size_log2 == 3) {
+ }
+ if (ref_0_size_log2 == 3) {
const uint8x8_t val_0 = vld1_u8(ref_0_u8);
if (use_ref_1) {
- if (ref_1_size_log2 == 2) { // 8x4
- const uint8x8_t val_1 = Load4(ref_1_u8);
- const uint16x4_t sum_0 = vpaddl_u8(val_0);
- const uint16x4_t sum_1 = vpaddl_u8(val_1);
- return Sum(vadd_u16(sum_0, sum_1));
- } else if (ref_1_size_log2 == 3) { // 8x8
- const uint8x8_t val_1 = vld1_u8(ref_1_u8);
- const uint16x4_t sum_0 = vpaddl_u8(val_0);
- const uint16x4_t sum_1 = vpaddl_u8(val_1);
- return Sum(vadd_u16(sum_0, sum_1));
- } else if (ref_1_size_log2 == 4) { // 8x16
- const uint8x16_t val_1 = vld1q_u8(ref_1_u8);
- return Sum(vaddw_u8(vpaddlq_u8(val_1), val_0));
- } else if (ref_1_size_log2 == 5) { // 8x32
- return Sum(vaddw_u8(LoadAndAdd32(ref_1_u8), val_0));
+ switch (ref_1_size_log2) {
+ case 2: { // 8x4
+ const uint8x8_t val_1 = Load4(ref_1_u8);
+ const uint16x4_t sum_0 = vpaddl_u8(val_0);
+ const uint16x4_t sum_1 = vpaddl_u8(val_1);
+ return Sum(vadd_u16(sum_0, sum_1));
+ }
+ case 3: { // 8x8
+ const uint8x8_t val_1 = vld1_u8(ref_1_u8);
+ const uint16x4_t sum_0 = vpaddl_u8(val_0);
+ const uint16x4_t sum_1 = vpaddl_u8(val_1);
+ return Sum(vadd_u16(sum_0, sum_1));
+ }
+ case 4: { // 8x16
+ const uint8x16_t val_1 = vld1q_u8(ref_1_u8);
+ return Sum(vaddw_u8(vpaddlq_u8(val_1), val_0));
+ }
+ case 5: { // 8x32
+ return Sum(vaddw_u8(LoadAndAdd32(ref_1_u8), val_0));
+ }
}
}
// 8x1
return Sum(vpaddl_u8(val_0));
- } else if (ref_0_size_log2 == 4) {
+ }
+ if (ref_0_size_log2 == 4) {
const uint8x16_t val_0 = vld1q_u8(ref_0_u8);
if (use_ref_1) {
- if (ref_1_size_log2 == 2) { // 16x4
- const uint8x8_t val_1 = Load4(ref_1_u8);
- return Sum(vaddw_u8(vpaddlq_u8(val_0), val_1));
- } else if (ref_1_size_log2 == 3) { // 16x8
- const uint8x8_t val_1 = vld1_u8(ref_1_u8);
- return Sum(vaddw_u8(vpaddlq_u8(val_0), val_1));
- } else if (ref_1_size_log2 == 4) { // 16x16
- const uint8x16_t val_1 = vld1q_u8(ref_1_u8);
- return Sum(Add(val_0, val_1));
- } else if (ref_1_size_log2 == 5) { // 16x32
- const uint16x8_t sum_0 = vpaddlq_u8(val_0);
- const uint16x8_t sum_1 = LoadAndAdd32(ref_1_u8);
- return Sum(vaddq_u16(sum_0, sum_1));
- } else if (ref_1_size_log2 == 6) { // 16x64
- const uint16x8_t sum_0 = vpaddlq_u8(val_0);
- const uint16x8_t sum_1 = LoadAndAdd64(ref_1_u8);
- return Sum(vaddq_u16(sum_0, sum_1));
+ switch (ref_1_size_log2) {
+ case 2: { // 16x4
+ const uint8x8_t val_1 = Load4(ref_1_u8);
+ return Sum(vaddw_u8(vpaddlq_u8(val_0), val_1));
+ }
+ case 3: { // 16x8
+ const uint8x8_t val_1 = vld1_u8(ref_1_u8);
+ return Sum(vaddw_u8(vpaddlq_u8(val_0), val_1));
+ }
+ case 4: { // 16x16
+ const uint8x16_t val_1 = vld1q_u8(ref_1_u8);
+ return Sum(Add(val_0, val_1));
+ }
+ case 5: { // 16x32
+ const uint16x8_t sum_0 = vpaddlq_u8(val_0);
+ const uint16x8_t sum_1 = LoadAndAdd32(ref_1_u8);
+ return Sum(vaddq_u16(sum_0, sum_1));
+ }
+ case 6: { // 16x64
+ const uint16x8_t sum_0 = vpaddlq_u8(val_0);
+ const uint16x8_t sum_1 = LoadAndAdd64(ref_1_u8);
+ return Sum(vaddq_u16(sum_0, sum_1));
+ }
}
}
// 16x1
return Sum(vpaddlq_u8(val_0));
- } else if (ref_0_size_log2 == 5) {
+ }
+ if (ref_0_size_log2 == 5) {
const uint16x8_t sum_0 = LoadAndAdd32(ref_0_u8);
if (use_ref_1) {
- if (ref_1_size_log2 == 3) { // 32x8
- const uint8x8_t val_1 = vld1_u8(ref_1_u8);
- return Sum(vaddw_u8(sum_0, val_1));
- } else if (ref_1_size_log2 == 4) { // 32x16
- const uint8x16_t val_1 = vld1q_u8(ref_1_u8);
- const uint16x8_t sum_1 = vpaddlq_u8(val_1);
- return Sum(vaddq_u16(sum_0, sum_1));
- } else if (ref_1_size_log2 == 5) { // 32x32
- const uint16x8_t sum_1 = LoadAndAdd32(ref_1_u8);
- return Sum(vaddq_u16(sum_0, sum_1));
- } else if (ref_1_size_log2 == 6) { // 32x64
- const uint16x8_t sum_1 = LoadAndAdd64(ref_1_u8);
- return Sum(vaddq_u16(sum_0, sum_1));
+ switch (ref_1_size_log2) {
+ case 3: { // 32x8
+ const uint8x8_t val_1 = vld1_u8(ref_1_u8);
+ return Sum(vaddw_u8(sum_0, val_1));
+ }
+ case 4: { // 32x16
+ const uint8x16_t val_1 = vld1q_u8(ref_1_u8);
+ const uint16x8_t sum_1 = vpaddlq_u8(val_1);
+ return Sum(vaddq_u16(sum_0, sum_1));
+ }
+ case 5: { // 32x32
+ const uint16x8_t sum_1 = LoadAndAdd32(ref_1_u8);
+ return Sum(vaddq_u16(sum_0, sum_1));
+ }
+ case 6: { // 32x64
+ const uint16x8_t sum_1 = LoadAndAdd64(ref_1_u8);
+ return Sum(vaddq_u16(sum_0, sum_1));
+ }
}
}
// 32x1
@@ -249,16 +275,20 @@ inline uint32x2_t DcSum_NEON(const void* ref_0, const int ref_0_size_log2,
assert(ref_0_size_log2 == 6);
const uint16x8_t sum_0 = LoadAndAdd64(ref_0_u8);
if (use_ref_1) {
- if (ref_1_size_log2 == 4) { // 64x16
- const uint8x16_t val_1 = vld1q_u8(ref_1_u8);
- const uint16x8_t sum_1 = vpaddlq_u8(val_1);
- return Sum(vaddq_u16(sum_0, sum_1));
- } else if (ref_1_size_log2 == 5) { // 64x32
- const uint16x8_t sum_1 = LoadAndAdd32(ref_1_u8);
- return Sum(vaddq_u16(sum_0, sum_1));
- } else if (ref_1_size_log2 == 6) { // 64x64
- const uint16x8_t sum_1 = LoadAndAdd64(ref_1_u8);
- return Sum(vaddq_u16(sum_0, sum_1));
+ switch (ref_1_size_log2) {
+ case 4: { // 64x16
+ const uint8x16_t val_1 = vld1q_u8(ref_1_u8);
+ const uint16x8_t sum_1 = vpaddlq_u8(val_1);
+ return Sum(vaddq_u16(sum_0, sum_1));
+ }
+ case 5: { // 64x32
+ const uint16x8_t sum_1 = LoadAndAdd32(ref_1_u8);
+ return Sum(vaddq_u16(sum_0, sum_1));
+ }
+ case 6: { // 64x64
+ const uint16x8_t sum_1 = LoadAndAdd64(ref_1_u8);
+ return Sum(vaddq_u16(sum_0, sum_1));
+ }
}
}
// 64x1
@@ -318,9 +348,10 @@ inline void DcStore_NEON(void* const dest, ptrdiff_t stride,
}
template <int width, int height>
-inline void Paeth4Or8xN_NEON(void* const dest, ptrdiff_t stride,
- const void* const top_row,
- const void* const left_column) {
+inline void Paeth4Or8xN_NEON(void* LIBGAV1_RESTRICT const dest,
+ ptrdiff_t stride,
+ const void* LIBGAV1_RESTRICT const top_row,
+ const void* LIBGAV1_RESTRICT const left_column) {
auto* dest_u8 = static_cast<uint8_t*>(dest);
const auto* const top_row_u8 = static_cast<const uint8_t*>(top_row);
const auto* const left_col_u8 = static_cast<const uint8_t*>(left_column);
@@ -425,9 +456,10 @@ inline uint8x16_t SelectPaeth(const uint8x16_t top, const uint8x16_t left,
top_dist, top_left_##num##_dist_low, top_left_##num##_dist_high)
template <int width, int height>
-inline void Paeth16PlusxN_NEON(void* const dest, ptrdiff_t stride,
- const void* const top_row,
- const void* const left_column) {
+inline void Paeth16PlusxN_NEON(void* LIBGAV1_RESTRICT const dest,
+ ptrdiff_t stride,
+ const void* LIBGAV1_RESTRICT const top_row,
+ const void* LIBGAV1_RESTRICT const left_column) {
auto* dest_u8 = static_cast<uint8_t*>(dest);
const auto* const top_row_u8 = static_cast<const uint8_t*>(top_row);
const auto* const left_col_u8 = static_cast<const uint8_t*>(left_column);
@@ -769,87 +801,111 @@ inline uint16x8_t LoadAndAdd64(const uint16_t* buf) {
// |ref_[01]| each point to 1 << |ref[01]_size_log2| packed uint16_t values.
// If |use_ref_1| is false then only sum |ref_0|.
-inline uint32x2_t DcSum_NEON(const void* ref_0, const int ref_0_size_log2,
- const bool use_ref_1, const void* ref_1,
+inline uint32x2_t DcSum_NEON(const void* LIBGAV1_RESTRICT ref_0,
+ const int ref_0_size_log2, const bool use_ref_1,
+ const void* LIBGAV1_RESTRICT ref_1,
const int ref_1_size_log2) {
const auto* ref_0_u16 = static_cast<const uint16_t*>(ref_0);
const auto* ref_1_u16 = static_cast<const uint16_t*>(ref_1);
if (ref_0_size_log2 == 2) {
const uint16x4_t val_0 = vld1_u16(ref_0_u16);
if (use_ref_1) {
- if (ref_1_size_log2 == 2) { // 4x4
- const uint16x4_t val_1 = vld1_u16(ref_1_u16);
- return Sum(vadd_u16(val_0, val_1));
- } else if (ref_1_size_log2 == 3) { // 4x8
- const uint16x8_t val_1 = vld1q_u16(ref_1_u16);
- const uint16x8_t sum_0 = vcombine_u16(vdup_n_u16(0), val_0);
- return Sum(vaddq_u16(sum_0, val_1));
- } else if (ref_1_size_log2 == 4) { // 4x16
- const uint16x8_t sum_0 = vcombine_u16(vdup_n_u16(0), val_0);
- const uint16x8_t sum_1 = LoadAndAdd16(ref_1_u16);
- return Sum(vaddq_u16(sum_0, sum_1));
+ switch (ref_1_size_log2) {
+ case 2: { // 4x4
+ const uint16x4_t val_1 = vld1_u16(ref_1_u16);
+ return Sum(vadd_u16(val_0, val_1));
+ }
+ case 3: { // 4x8
+ const uint16x8_t val_1 = vld1q_u16(ref_1_u16);
+ const uint16x8_t sum_0 = vcombine_u16(vdup_n_u16(0), val_0);
+ return Sum(vaddq_u16(sum_0, val_1));
+ }
+ case 4: { // 4x16
+ const uint16x8_t sum_0 = vcombine_u16(vdup_n_u16(0), val_0);
+ const uint16x8_t sum_1 = LoadAndAdd16(ref_1_u16);
+ return Sum(vaddq_u16(sum_0, sum_1));
+ }
}
}
// 4x1
return Sum(val_0);
- } else if (ref_0_size_log2 == 3) {
+ }
+ if (ref_0_size_log2 == 3) {
const uint16x8_t val_0 = vld1q_u16(ref_0_u16);
if (use_ref_1) {
- if (ref_1_size_log2 == 2) { // 8x4
- const uint16x4_t val_1 = vld1_u16(ref_1_u16);
- const uint16x8_t sum_1 = vcombine_u16(vdup_n_u16(0), val_1);
- return Sum(vaddq_u16(val_0, sum_1));
- } else if (ref_1_size_log2 == 3) { // 8x8
- const uint16x8_t val_1 = vld1q_u16(ref_1_u16);
- return Sum(vaddq_u16(val_0, val_1));
- } else if (ref_1_size_log2 == 4) { // 8x16
- const uint16x8_t sum_1 = LoadAndAdd16(ref_1_u16);
- return Sum(vaddq_u16(val_0, sum_1));
- } else if (ref_1_size_log2 == 5) { // 8x32
- const uint16x8_t sum_1 = LoadAndAdd32(ref_1_u16);
- return Sum(vaddq_u16(val_0, sum_1));
+ switch (ref_1_size_log2) {
+ case 2: { // 8x4
+ const uint16x4_t val_1 = vld1_u16(ref_1_u16);
+ const uint16x8_t sum_1 = vcombine_u16(vdup_n_u16(0), val_1);
+ return Sum(vaddq_u16(val_0, sum_1));
+ }
+ case 3: { // 8x8
+ const uint16x8_t val_1 = vld1q_u16(ref_1_u16);
+ return Sum(vaddq_u16(val_0, val_1));
+ }
+ case 4: { // 8x16
+ const uint16x8_t sum_1 = LoadAndAdd16(ref_1_u16);
+ return Sum(vaddq_u16(val_0, sum_1));
+ }
+ case 5: { // 8x32
+ const uint16x8_t sum_1 = LoadAndAdd32(ref_1_u16);
+ return Sum(vaddq_u16(val_0, sum_1));
+ }
}
}
// 8x1
return Sum(val_0);
- } else if (ref_0_size_log2 == 4) {
+ }
+ if (ref_0_size_log2 == 4) {
const uint16x8_t sum_0 = LoadAndAdd16(ref_0_u16);
if (use_ref_1) {
- if (ref_1_size_log2 == 2) { // 16x4
- const uint16x4_t val_1 = vld1_u16(ref_1_u16);
- const uint16x8_t sum_1 = vcombine_u16(vdup_n_u16(0), val_1);
- return Sum(vaddq_u16(sum_0, sum_1));
- } else if (ref_1_size_log2 == 3) { // 16x8
- const uint16x8_t val_1 = vld1q_u16(ref_1_u16);
- return Sum(vaddq_u16(sum_0, val_1));
- } else if (ref_1_size_log2 == 4) { // 16x16
- const uint16x8_t sum_1 = LoadAndAdd16(ref_1_u16);
- return Sum(vaddq_u16(sum_0, sum_1));
- } else if (ref_1_size_log2 == 5) { // 16x32
- const uint16x8_t sum_1 = LoadAndAdd32(ref_1_u16);
- return Sum(vaddq_u16(sum_0, sum_1));
- } else if (ref_1_size_log2 == 6) { // 16x64
- const uint16x8_t sum_1 = LoadAndAdd64(ref_1_u16);
- return Sum(vaddq_u16(sum_0, sum_1));
+ switch (ref_1_size_log2) {
+ case 2: { // 16x4
+ const uint16x4_t val_1 = vld1_u16(ref_1_u16);
+ const uint16x8_t sum_1 = vcombine_u16(vdup_n_u16(0), val_1);
+ return Sum(vaddq_u16(sum_0, sum_1));
+ }
+ case 3: { // 16x8
+ const uint16x8_t val_1 = vld1q_u16(ref_1_u16);
+ return Sum(vaddq_u16(sum_0, val_1));
+ }
+ case 4: { // 16x16
+ const uint16x8_t sum_1 = LoadAndAdd16(ref_1_u16);
+ return Sum(vaddq_u16(sum_0, sum_1));
+ }
+ case 5: { // 16x32
+ const uint16x8_t sum_1 = LoadAndAdd32(ref_1_u16);
+ return Sum(vaddq_u16(sum_0, sum_1));
+ }
+ case 6: { // 16x64
+ const uint16x8_t sum_1 = LoadAndAdd64(ref_1_u16);
+ return Sum(vaddq_u16(sum_0, sum_1));
+ }
}
}
// 16x1
return Sum(sum_0);
- } else if (ref_0_size_log2 == 5) {
+ }
+ if (ref_0_size_log2 == 5) {
const uint16x8_t sum_0 = LoadAndAdd32(ref_0_u16);
if (use_ref_1) {
- if (ref_1_size_log2 == 3) { // 32x8
- const uint16x8_t val_1 = vld1q_u16(ref_1_u16);
- return Sum(vaddq_u16(sum_0, val_1));
- } else if (ref_1_size_log2 == 4) { // 32x16
- const uint16x8_t sum_1 = LoadAndAdd16(ref_1_u16);
- return Sum(vaddq_u16(sum_0, sum_1));
- } else if (ref_1_size_log2 == 5) { // 32x32
- const uint16x8_t sum_1 = LoadAndAdd32(ref_1_u16);
- return Sum(vaddq_u16(sum_0, sum_1));
- } else if (ref_1_size_log2 == 6) { // 32x64
- const uint16x8_t sum_1 = LoadAndAdd64(ref_1_u16);
- return Sum(vaddq_u16(sum_0, sum_1));
+ switch (ref_1_size_log2) {
+ case 3: { // 32x8
+ const uint16x8_t val_1 = vld1q_u16(ref_1_u16);
+ return Sum(vaddq_u16(sum_0, val_1));
+ }
+ case 4: { // 32x16
+ const uint16x8_t sum_1 = LoadAndAdd16(ref_1_u16);
+ return Sum(vaddq_u16(sum_0, sum_1));
+ }
+ case 5: { // 32x32
+ const uint16x8_t sum_1 = LoadAndAdd32(ref_1_u16);
+ return Sum(vaddq_u16(sum_0, sum_1));
+ }
+ case 6: { // 32x64
+ const uint16x8_t sum_1 = LoadAndAdd64(ref_1_u16);
+ return Sum(vaddq_u16(sum_0, sum_1));
+ }
}
}
// 32x1
@@ -859,15 +915,19 @@ inline uint32x2_t DcSum_NEON(const void* ref_0, const int ref_0_size_log2,
assert(ref_0_size_log2 == 6);
const uint16x8_t sum_0 = LoadAndAdd64(ref_0_u16);
if (use_ref_1) {
- if (ref_1_size_log2 == 4) { // 64x16
- const uint16x8_t sum_1 = LoadAndAdd16(ref_1_u16);
- return Sum(vaddq_u16(sum_0, sum_1));
- } else if (ref_1_size_log2 == 5) { // 64x32
- const uint16x8_t sum_1 = LoadAndAdd32(ref_1_u16);
- return Sum(vaddq_u16(sum_0, sum_1));
- } else if (ref_1_size_log2 == 6) { // 64x64
- const uint16x8_t sum_1 = LoadAndAdd64(ref_1_u16);
- return Sum(vaddq_u16(sum_0, sum_1));
+ switch (ref_1_size_log2) {
+ case 4: { // 64x16
+ const uint16x8_t sum_1 = LoadAndAdd16(ref_1_u16);
+ return Sum(vaddq_u16(sum_0, sum_1));
+ }
+ case 5: { // 64x32
+ const uint16x8_t sum_1 = LoadAndAdd32(ref_1_u16);
+ return Sum(vaddq_u16(sum_0, sum_1));
+ }
+ case 6: { // 64x64
+ const uint16x8_t sum_1 = LoadAndAdd64(ref_1_u16);
+ return Sum(vaddq_u16(sum_0, sum_1));
+ }
}
}
// 64x1
@@ -968,9 +1028,9 @@ struct DcDefs {
// IntraPredFuncs_NEON::Horizontal -- duplicate left column across all rows
template <int block_height>
-void Horizontal4xH_NEON(void* const dest, ptrdiff_t stride,
+void Horizontal4xH_NEON(void* LIBGAV1_RESTRICT const dest, ptrdiff_t stride,
const void* /*top_row*/,
- const void* const left_column) {
+ const void* LIBGAV1_RESTRICT const left_column) {
const auto* const left = static_cast<const uint16_t*>(left_column);
auto* dst = static_cast<uint8_t*>(dest);
int y = 0;
@@ -983,9 +1043,9 @@ void Horizontal4xH_NEON(void* const dest, ptrdiff_t stride,
}
template <int block_height>
-void Horizontal8xH_NEON(void* const dest, ptrdiff_t stride,
+void Horizontal8xH_NEON(void* LIBGAV1_RESTRICT const dest, ptrdiff_t stride,
const void* /*top_row*/,
- const void* const left_column) {
+ const void* LIBGAV1_RESTRICT const left_column) {
const auto* const left = static_cast<const uint16_t*>(left_column);
auto* dst = static_cast<uint8_t*>(dest);
int y = 0;
@@ -998,9 +1058,9 @@ void Horizontal8xH_NEON(void* const dest, ptrdiff_t stride,
}
template <int block_height>
-void Horizontal16xH_NEON(void* const dest, ptrdiff_t stride,
+void Horizontal16xH_NEON(void* LIBGAV1_RESTRICT const dest, ptrdiff_t stride,
const void* /*top_row*/,
- const void* const left_column) {
+ const void* LIBGAV1_RESTRICT const left_column) {
const auto* const left = static_cast<const uint16_t*>(left_column);
auto* dst = static_cast<uint8_t*>(dest);
int y = 0;
@@ -1020,9 +1080,9 @@ void Horizontal16xH_NEON(void* const dest, ptrdiff_t stride,
}
template <int block_height>
-void Horizontal32xH_NEON(void* const dest, ptrdiff_t stride,
+void Horizontal32xH_NEON(void* LIBGAV1_RESTRICT const dest, ptrdiff_t stride,
const void* /*top_row*/,
- const void* const left_column) {
+ const void* LIBGAV1_RESTRICT const left_column) {
const auto* const left = static_cast<const uint16_t*>(left_column);
auto* dst = static_cast<uint8_t*>(dest);
int y = 0;
@@ -1048,8 +1108,8 @@ void Horizontal32xH_NEON(void* const dest, ptrdiff_t stride,
// IntraPredFuncs_NEON::Vertical -- copy top row to all rows
template <int block_height>
-void Vertical4xH_NEON(void* const dest, ptrdiff_t stride,
- const void* const top_row,
+void Vertical4xH_NEON(void* LIBGAV1_RESTRICT const dest, ptrdiff_t stride,
+ const void* LIBGAV1_RESTRICT const top_row,
const void* const /*left_column*/) {
const auto* const top = static_cast<const uint8_t*>(top_row);
auto* dst = static_cast<uint8_t*>(dest);
@@ -1062,8 +1122,8 @@ void Vertical4xH_NEON(void* const dest, ptrdiff_t stride,
}
template <int block_height>
-void Vertical8xH_NEON(void* const dest, ptrdiff_t stride,
- const void* const top_row,
+void Vertical8xH_NEON(void* LIBGAV1_RESTRICT const dest, ptrdiff_t stride,
+ const void* LIBGAV1_RESTRICT const top_row,
const void* const /*left_column*/) {
const auto* const top = static_cast<const uint8_t*>(top_row);
auto* dst = static_cast<uint8_t*>(dest);
@@ -1076,8 +1136,8 @@ void Vertical8xH_NEON(void* const dest, ptrdiff_t stride,
}
template <int block_height>
-void Vertical16xH_NEON(void* const dest, ptrdiff_t stride,
- const void* const top_row,
+void Vertical16xH_NEON(void* LIBGAV1_RESTRICT const dest, ptrdiff_t stride,
+ const void* LIBGAV1_RESTRICT const top_row,
const void* const /*left_column*/) {
const auto* const top = static_cast<const uint8_t*>(top_row);
auto* dst = static_cast<uint8_t*>(dest);
@@ -1096,8 +1156,8 @@ void Vertical16xH_NEON(void* const dest, ptrdiff_t stride,
}
template <int block_height>
-void Vertical32xH_NEON(void* const dest, ptrdiff_t stride,
- const void* const top_row,
+void Vertical32xH_NEON(void* LIBGAV1_RESTRICT const dest, ptrdiff_t stride,
+ const void* LIBGAV1_RESTRICT const top_row,
const void* const /*left_column*/) {
const auto* const top = static_cast<const uint8_t*>(top_row);
auto* dst = static_cast<uint8_t*>(dest);
@@ -1122,8 +1182,8 @@ void Vertical32xH_NEON(void* const dest, ptrdiff_t stride,
}
template <int block_height>
-void Vertical64xH_NEON(void* const dest, ptrdiff_t stride,
- const void* const top_row,
+void Vertical64xH_NEON(void* LIBGAV1_RESTRICT const dest, ptrdiff_t stride,
+ const void* LIBGAV1_RESTRICT const top_row,
const void* const /*left_column*/) {
const auto* const top = static_cast<const uint8_t*>(top_row);
auto* dst = static_cast<uint8_t*>(dest);
@@ -1159,6 +1219,145 @@ void Vertical64xH_NEON(void* const dest, ptrdiff_t stride,
} while (y != 0);
}
+template <int height>
+inline void Paeth4xH_NEON(void* LIBGAV1_RESTRICT const dest, ptrdiff_t stride,
+ const void* LIBGAV1_RESTRICT const top_ptr,
+ const void* LIBGAV1_RESTRICT const left_ptr) {
+ auto* dst = static_cast<uint8_t*>(dest);
+ const auto* const top_row = static_cast<const uint16_t*>(top_ptr);
+ const auto* const left_col = static_cast<const uint16_t*>(left_ptr);
+
+ const uint16x4_t top_left = vdup_n_u16(top_row[-1]);
+ const uint16x4_t top_left_x2 = vshl_n_u16(top_left, 1);
+ const uint16x4_t top = vld1_u16(top_row);
+
+ for (int y = 0; y < height; ++y) {
+ auto* dst16 = reinterpret_cast<uint16_t*>(dst);
+ const uint16x4_t left = vdup_n_u16(left_col[y]);
+
+ const uint16x4_t left_dist = vabd_u16(top, top_left);
+ const uint16x4_t top_dist = vabd_u16(left, top_left);
+ const uint16x4_t top_left_dist = vabd_u16(vadd_u16(top, left), top_left_x2);
+
+ const uint16x4_t left_le_top = vcle_u16(left_dist, top_dist);
+ const uint16x4_t left_le_top_left = vcle_u16(left_dist, top_left_dist);
+ const uint16x4_t top_le_top_left = vcle_u16(top_dist, top_left_dist);
+
+ // if (left_dist <= top_dist && left_dist <= top_left_dist)
+ const uint16x4_t left_mask = vand_u16(left_le_top, left_le_top_left);
+ // dest[x] = left_column[y];
+ // Fill all the unused spaces with 'top'. They will be overwritten when
+ // the positions for top_left are known.
+ uint16x4_t result = vbsl_u16(left_mask, left, top);
+ // else if (top_dist <= top_left_dist)
+ // dest[x] = top_row[x];
+ // Add these values to the mask. They were already set.
+ const uint16x4_t left_or_top_mask = vorr_u16(left_mask, top_le_top_left);
+ // else
+ // dest[x] = top_left;
+ result = vbsl_u16(left_or_top_mask, result, top_left);
+
+ vst1_u16(dst16, result);
+ dst += stride;
+ }
+}
+
+template <int height>
+inline void Paeth8xH_NEON(void* LIBGAV1_RESTRICT const dest, ptrdiff_t stride,
+ const void* LIBGAV1_RESTRICT const top_ptr,
+ const void* LIBGAV1_RESTRICT const left_ptr) {
+ auto* dst = static_cast<uint8_t*>(dest);
+ const auto* const top_row = static_cast<const uint16_t*>(top_ptr);
+ const auto* const left_col = static_cast<const uint16_t*>(left_ptr);
+
+ const uint16x8_t top_left = vdupq_n_u16(top_row[-1]);
+ const uint16x8_t top_left_x2 = vshlq_n_u16(top_left, 1);
+ const uint16x8_t top = vld1q_u16(top_row);
+
+ for (int y = 0; y < height; ++y) {
+ auto* dst16 = reinterpret_cast<uint16_t*>(dst);
+ const uint16x8_t left = vdupq_n_u16(left_col[y]);
+
+ const uint16x8_t left_dist = vabdq_u16(top, top_left);
+ const uint16x8_t top_dist = vabdq_u16(left, top_left);
+ const uint16x8_t top_left_dist =
+ vabdq_u16(vaddq_u16(top, left), top_left_x2);
+
+ const uint16x8_t left_le_top = vcleq_u16(left_dist, top_dist);
+ const uint16x8_t left_le_top_left = vcleq_u16(left_dist, top_left_dist);
+ const uint16x8_t top_le_top_left = vcleq_u16(top_dist, top_left_dist);
+
+ // if (left_dist <= top_dist && left_dist <= top_left_dist)
+ const uint16x8_t left_mask = vandq_u16(left_le_top, left_le_top_left);
+ // dest[x] = left_column[y];
+ // Fill all the unused spaces with 'top'. They will be overwritten when
+ // the positions for top_left are known.
+ uint16x8_t result = vbslq_u16(left_mask, left, top);
+ // else if (top_dist <= top_left_dist)
+ // dest[x] = top_row[x];
+ // Add these values to the mask. They were already set.
+ const uint16x8_t left_or_top_mask = vorrq_u16(left_mask, top_le_top_left);
+ // else
+ // dest[x] = top_left;
+ result = vbslq_u16(left_or_top_mask, result, top_left);
+
+ vst1q_u16(dst16, result);
+ dst += stride;
+ }
+}
+
+// For 16xH and above.
+template <int width, int height>
+inline void PaethWxH_NEON(void* LIBGAV1_RESTRICT const dest, ptrdiff_t stride,
+ const void* LIBGAV1_RESTRICT const top_ptr,
+ const void* LIBGAV1_RESTRICT const left_ptr) {
+ auto* dst = static_cast<uint8_t*>(dest);
+ const auto* const top_row = static_cast<const uint16_t*>(top_ptr);
+ const auto* const left_col = static_cast<const uint16_t*>(left_ptr);
+
+ const uint16x8_t top_left = vdupq_n_u16(top_row[-1]);
+ const uint16x8_t top_left_x2 = vshlq_n_u16(top_left, 1);
+
+ uint16x8_t top[width >> 3];
+ for (int i = 0; i < width >> 3; ++i) {
+ top[i] = vld1q_u16(top_row + (i << 3));
+ }
+
+ for (int y = 0; y < height; ++y) {
+ auto* dst_x = reinterpret_cast<uint16_t*>(dst);
+ const uint16x8_t left = vdupq_n_u16(left_col[y]);
+ const uint16x8_t top_dist = vabdq_u16(left, top_left);
+
+ for (int i = 0; i < (width >> 3); ++i) {
+ const uint16x8_t left_dist = vabdq_u16(top[i], top_left);
+ const uint16x8_t top_left_dist =
+ vabdq_u16(vaddq_u16(top[i], left), top_left_x2);
+
+ const uint16x8_t left_le_top = vcleq_u16(left_dist, top_dist);
+ const uint16x8_t left_le_top_left = vcleq_u16(left_dist, top_left_dist);
+ const uint16x8_t top_le_top_left = vcleq_u16(top_dist, top_left_dist);
+
+ // if (left_dist <= top_dist && left_dist <= top_left_dist)
+ const uint16x8_t left_mask = vandq_u16(left_le_top, left_le_top_left);
+ // dest[x] = left_column[y];
+ // Fill all the unused spaces with 'top'. They will be overwritten when
+ // the positions for top_left are known.
+ uint16x8_t result = vbslq_u16(left_mask, left, top[i]);
+ // else if (top_dist <= top_left_dist)
+ // dest[x] = top_row[x];
+ // Add these values to the mask. They were already set.
+ const uint16x8_t left_or_top_mask = vorrq_u16(left_mask, top_le_top_left);
+ // else
+ // dest[x] = top_left;
+ result = vbslq_u16(left_or_top_mask, result, top_left);
+
+ vst1q_u16(dst_x, result);
+ dst_x += 8;
+ }
+ dst += stride;
+ }
+}
+
void Init10bpp() {
Dsp* const dsp = dsp_internal::GetWritableDspTable(kBitdepth10);
assert(dsp != nullptr);
@@ -1170,6 +1369,8 @@ void Init10bpp() {
DcDefs::_4x4::Dc;
dsp->intra_predictors[kTransformSize4x4][kIntraPredictorVertical] =
Vertical4xH_NEON<4>;
+ dsp->intra_predictors[kTransformSize4x4][kIntraPredictorPaeth] =
+ Paeth4xH_NEON<4>;
// 4x8
dsp->intra_predictors[kTransformSize4x8][kIntraPredictorDcTop] =
@@ -1182,6 +1383,8 @@ void Init10bpp() {
Horizontal4xH_NEON<8>;
dsp->intra_predictors[kTransformSize4x8][kIntraPredictorVertical] =
Vertical4xH_NEON<8>;
+ dsp->intra_predictors[kTransformSize4x8][kIntraPredictorPaeth] =
+ Paeth4xH_NEON<8>;
// 4x16
dsp->intra_predictors[kTransformSize4x16][kIntraPredictorDcTop] =
@@ -1194,6 +1397,8 @@ void Init10bpp() {
Horizontal4xH_NEON<16>;
dsp->intra_predictors[kTransformSize4x16][kIntraPredictorVertical] =
Vertical4xH_NEON<16>;
+ dsp->intra_predictors[kTransformSize4x16][kIntraPredictorPaeth] =
+ Paeth4xH_NEON<16>;
// 8x4
dsp->intra_predictors[kTransformSize8x4][kIntraPredictorDcTop] =
@@ -1204,6 +1409,8 @@ void Init10bpp() {
DcDefs::_8x4::Dc;
dsp->intra_predictors[kTransformSize8x4][kIntraPredictorVertical] =
Vertical8xH_NEON<4>;
+ dsp->intra_predictors[kTransformSize8x4][kIntraPredictorPaeth] =
+ Paeth8xH_NEON<4>;
// 8x8
dsp->intra_predictors[kTransformSize8x8][kIntraPredictorDcTop] =
@@ -1216,6 +1423,8 @@ void Init10bpp() {
Horizontal8xH_NEON<8>;
dsp->intra_predictors[kTransformSize8x8][kIntraPredictorVertical] =
Vertical8xH_NEON<8>;
+ dsp->intra_predictors[kTransformSize8x8][kIntraPredictorPaeth] =
+ Paeth8xH_NEON<8>;
// 8x16
dsp->intra_predictors[kTransformSize8x16][kIntraPredictorDcTop] =
@@ -1226,6 +1435,8 @@ void Init10bpp() {
DcDefs::_8x16::Dc;
dsp->intra_predictors[kTransformSize8x16][kIntraPredictorVertical] =
Vertical8xH_NEON<16>;
+ dsp->intra_predictors[kTransformSize8x16][kIntraPredictorPaeth] =
+ Paeth8xH_NEON<16>;
// 8x32
dsp->intra_predictors[kTransformSize8x32][kIntraPredictorDcTop] =
@@ -1238,6 +1449,8 @@ void Init10bpp() {
Horizontal8xH_NEON<32>;
dsp->intra_predictors[kTransformSize8x32][kIntraPredictorVertical] =
Vertical8xH_NEON<32>;
+ dsp->intra_predictors[kTransformSize8x32][kIntraPredictorPaeth] =
+ Paeth8xH_NEON<32>;
// 16x4
dsp->intra_predictors[kTransformSize16x4][kIntraPredictorDcTop] =
@@ -1248,6 +1461,8 @@ void Init10bpp() {
DcDefs::_16x4::Dc;
dsp->intra_predictors[kTransformSize16x4][kIntraPredictorVertical] =
Vertical16xH_NEON<4>;
+ dsp->intra_predictors[kTransformSize16x4][kIntraPredictorPaeth] =
+ PaethWxH_NEON<16, 4>;
// 16x8
dsp->intra_predictors[kTransformSize16x8][kIntraPredictorDcTop] =
@@ -1260,6 +1475,8 @@ void Init10bpp() {
Horizontal16xH_NEON<8>;
dsp->intra_predictors[kTransformSize16x8][kIntraPredictorVertical] =
Vertical16xH_NEON<8>;
+ dsp->intra_predictors[kTransformSize16x8][kIntraPredictorPaeth] =
+ PaethWxH_NEON<16, 8>;
// 16x16
dsp->intra_predictors[kTransformSize16x16][kIntraPredictorDcTop] =
@@ -1270,6 +1487,8 @@ void Init10bpp() {
DcDefs::_16x16::Dc;
dsp->intra_predictors[kTransformSize16x16][kIntraPredictorVertical] =
Vertical16xH_NEON<16>;
+ dsp->intra_predictors[kTransformSize16x16][kIntraPredictorPaeth] =
+ PaethWxH_NEON<16, 16>;
// 16x32
dsp->intra_predictors[kTransformSize16x32][kIntraPredictorDcTop] =
@@ -1280,6 +1499,8 @@ void Init10bpp() {
DcDefs::_16x32::Dc;
dsp->intra_predictors[kTransformSize16x32][kIntraPredictorVertical] =
Vertical16xH_NEON<32>;
+ dsp->intra_predictors[kTransformSize16x32][kIntraPredictorPaeth] =
+ PaethWxH_NEON<16, 32>;
// 16x64
dsp->intra_predictors[kTransformSize16x64][kIntraPredictorDcTop] =
@@ -1290,6 +1511,8 @@ void Init10bpp() {
DcDefs::_16x64::Dc;
dsp->intra_predictors[kTransformSize16x64][kIntraPredictorVertical] =
Vertical16xH_NEON<64>;
+ dsp->intra_predictors[kTransformSize16x64][kIntraPredictorPaeth] =
+ PaethWxH_NEON<16, 64>;
// 32x8
dsp->intra_predictors[kTransformSize32x8][kIntraPredictorDcTop] =
@@ -1300,6 +1523,8 @@ void Init10bpp() {
DcDefs::_32x8::Dc;
dsp->intra_predictors[kTransformSize32x8][kIntraPredictorVertical] =
Vertical32xH_NEON<8>;
+ dsp->intra_predictors[kTransformSize32x8][kIntraPredictorPaeth] =
+ PaethWxH_NEON<32, 8>;
// 32x16
dsp->intra_predictors[kTransformSize32x16][kIntraPredictorDcTop] =
@@ -1310,6 +1535,8 @@ void Init10bpp() {
DcDefs::_32x16::Dc;
dsp->intra_predictors[kTransformSize32x16][kIntraPredictorVertical] =
Vertical32xH_NEON<16>;
+ dsp->intra_predictors[kTransformSize32x16][kIntraPredictorPaeth] =
+ PaethWxH_NEON<32, 16>;
// 32x32
dsp->intra_predictors[kTransformSize32x32][kIntraPredictorDcTop] =
@@ -1320,6 +1547,8 @@ void Init10bpp() {
DcDefs::_32x32::Dc;
dsp->intra_predictors[kTransformSize32x32][kIntraPredictorVertical] =
Vertical32xH_NEON<32>;
+ dsp->intra_predictors[kTransformSize32x32][kIntraPredictorPaeth] =
+ PaethWxH_NEON<32, 32>;
// 32x64
dsp->intra_predictors[kTransformSize32x64][kIntraPredictorDcTop] =
@@ -1332,6 +1561,8 @@ void Init10bpp() {
Horizontal32xH_NEON<64>;
dsp->intra_predictors[kTransformSize32x64][kIntraPredictorVertical] =
Vertical32xH_NEON<64>;
+ dsp->intra_predictors[kTransformSize32x64][kIntraPredictorPaeth] =
+ PaethWxH_NEON<32, 64>;
// 64x16
dsp->intra_predictors[kTransformSize64x16][kIntraPredictorDcTop] =
@@ -1342,6 +1573,8 @@ void Init10bpp() {
DcDefs::_64x16::Dc;
dsp->intra_predictors[kTransformSize64x16][kIntraPredictorVertical] =
Vertical64xH_NEON<16>;
+ dsp->intra_predictors[kTransformSize64x16][kIntraPredictorPaeth] =
+ PaethWxH_NEON<64, 16>;
// 64x32
dsp->intra_predictors[kTransformSize64x32][kIntraPredictorDcTop] =
@@ -1352,6 +1585,8 @@ void Init10bpp() {
DcDefs::_64x32::Dc;
dsp->intra_predictors[kTransformSize64x32][kIntraPredictorVertical] =
Vertical64xH_NEON<32>;
+ dsp->intra_predictors[kTransformSize64x32][kIntraPredictorPaeth] =
+ PaethWxH_NEON<64, 32>;
// 64x64
dsp->intra_predictors[kTransformSize64x64][kIntraPredictorDcTop] =
@@ -1362,6 +1597,8 @@ void Init10bpp() {
DcDefs::_64x64::Dc;
dsp->intra_predictors[kTransformSize64x64][kIntraPredictorVertical] =
Vertical64xH_NEON<64>;
+ dsp->intra_predictors[kTransformSize64x64][kIntraPredictorPaeth] =
+ PaethWxH_NEON<64, 64>;
}
} // namespace
diff --git a/src/dsp/arm/intrapred_neon.h b/src/dsp/arm/intrapred_neon.h
index b27f29f..5a56924 100644
--- a/src/dsp/arm/intrapred_neon.h
+++ b/src/dsp/arm/intrapred_neon.h
@@ -152,6 +152,7 @@ void IntraPredInit_NEON();
#define LIBGAV1_Dsp10bpp_TransformSize4x4_IntraPredictorDc LIBGAV1_CPU_NEON
#define LIBGAV1_Dsp10bpp_TransformSize4x4_IntraPredictorVertical \
LIBGAV1_CPU_NEON
+#define LIBGAV1_Dsp10bpp_TransformSize4x4_IntraPredictorPaeth LIBGAV1_CPU_NEON
// 4x8
#define LIBGAV1_Dsp10bpp_TransformSize4x8_IntraPredictorDcTop LIBGAV1_CPU_NEON
@@ -161,6 +162,7 @@ void IntraPredInit_NEON();
LIBGAV1_CPU_NEON
#define LIBGAV1_Dsp10bpp_TransformSize4x8_IntraPredictorVertical \
LIBGAV1_CPU_NEON
+#define LIBGAV1_Dsp10bpp_TransformSize4x8_IntraPredictorPaeth LIBGAV1_CPU_NEON
// 4x16
#define LIBGAV1_Dsp10bpp_TransformSize4x16_IntraPredictorDcTop LIBGAV1_CPU_NEON
@@ -170,6 +172,7 @@ void IntraPredInit_NEON();
LIBGAV1_CPU_NEON
#define LIBGAV1_Dsp10bpp_TransformSize4x16_IntraPredictorVertical \
LIBGAV1_CPU_NEON
+#define LIBGAV1_Dsp10bpp_TransformSize4x16_IntraPredictorPaeth LIBGAV1_CPU_NEON
// 8x4
#define LIBGAV1_Dsp10bpp_TransformSize8x4_IntraPredictorDcTop LIBGAV1_CPU_NEON
@@ -177,6 +180,7 @@ void IntraPredInit_NEON();
#define LIBGAV1_Dsp10bpp_TransformSize8x4_IntraPredictorDc LIBGAV1_CPU_NEON
#define LIBGAV1_Dsp10bpp_TransformSize8x4_IntraPredictorVertical \
LIBGAV1_CPU_NEON
+#define LIBGAV1_Dsp10bpp_TransformSize8x4_IntraPredictorPaeth LIBGAV1_CPU_NEON
// 8x8
#define LIBGAV1_Dsp10bpp_TransformSize8x8_IntraPredictorDcTop LIBGAV1_CPU_NEON
@@ -186,6 +190,7 @@ void IntraPredInit_NEON();
LIBGAV1_CPU_NEON
#define LIBGAV1_Dsp10bpp_TransformSize8x8_IntraPredictorVertical \
LIBGAV1_CPU_NEON
+#define LIBGAV1_Dsp10bpp_TransformSize8x8_IntraPredictorPaeth LIBGAV1_CPU_NEON
// 8x16
#define LIBGAV1_Dsp10bpp_TransformSize8x16_IntraPredictorDcTop LIBGAV1_CPU_NEON
@@ -193,6 +198,7 @@ void IntraPredInit_NEON();
#define LIBGAV1_Dsp10bpp_TransformSize8x16_IntraPredictorDc LIBGAV1_CPU_NEON
#define LIBGAV1_Dsp10bpp_TransformSize8x16_IntraPredictorVertical \
LIBGAV1_CPU_NEON
+#define LIBGAV1_Dsp10bpp_TransformSize8x16_IntraPredictorPaeth LIBGAV1_CPU_NEON
// 8x32
#define LIBGAV1_Dsp10bpp_TransformSize8x32_IntraPredictorDcTop LIBGAV1_CPU_NEON
@@ -202,6 +208,7 @@ void IntraPredInit_NEON();
LIBGAV1_CPU_NEON
#define LIBGAV1_Dsp10bpp_TransformSize8x32_IntraPredictorVertical \
LIBGAV1_CPU_NEON
+#define LIBGAV1_Dsp10bpp_TransformSize8x32_IntraPredictorPaeth LIBGAV1_CPU_NEON
// 16x4
#define LIBGAV1_Dsp10bpp_TransformSize16x4_IntraPredictorDcTop LIBGAV1_CPU_NEON
@@ -209,6 +216,7 @@ void IntraPredInit_NEON();
#define LIBGAV1_Dsp10bpp_TransformSize16x4_IntraPredictorDc LIBGAV1_CPU_NEON
#define LIBGAV1_Dsp10bpp_TransformSize16x4_IntraPredictorVertical \
LIBGAV1_CPU_NEON
+#define LIBGAV1_Dsp10bpp_TransformSize16x4_IntraPredictorPaeth LIBGAV1_CPU_NEON
// 16x8
#define LIBGAV1_Dsp10bpp_TransformSize16x8_IntraPredictorDcTop LIBGAV1_CPU_NEON
@@ -218,6 +226,7 @@ void IntraPredInit_NEON();
LIBGAV1_CPU_NEON
#define LIBGAV1_Dsp10bpp_TransformSize16x8_IntraPredictorVertical \
LIBGAV1_CPU_NEON
+#define LIBGAV1_Dsp10bpp_TransformSize16x8_IntraPredictorPaeth LIBGAV1_CPU_NEON
// 16x16
#define LIBGAV1_Dsp10bpp_TransformSize16x16_IntraPredictorDcTop LIBGAV1_CPU_NEON
@@ -226,6 +235,7 @@ void IntraPredInit_NEON();
#define LIBGAV1_Dsp10bpp_TransformSize16x16_IntraPredictorDc LIBGAV1_CPU_NEON
#define LIBGAV1_Dsp10bpp_TransformSize16x16_IntraPredictorVertical \
LIBGAV1_CPU_NEON
+#define LIBGAV1_Dsp10bpp_TransformSize16x16_IntraPredictorPaeth LIBGAV1_CPU_NEON
// 16x32
#define LIBGAV1_Dsp10bpp_TransformSize16x32_IntraPredictorDcTop LIBGAV1_CPU_NEON
@@ -234,6 +244,7 @@ void IntraPredInit_NEON();
#define LIBGAV1_Dsp10bpp_TransformSize16x32_IntraPredictorDc LIBGAV1_CPU_NEON
#define LIBGAV1_Dsp10bpp_TransformSize16x32_IntraPredictorVertical \
LIBGAV1_CPU_NEON
+#define LIBGAV1_Dsp10bpp_TransformSize16x32_IntraPredictorPaeth LIBGAV1_CPU_NEON
// 16x64
#define LIBGAV1_Dsp10bpp_TransformSize16x64_IntraPredictorDcTop LIBGAV1_CPU_NEON
@@ -242,6 +253,7 @@ void IntraPredInit_NEON();
#define LIBGAV1_Dsp10bpp_TransformSize16x64_IntraPredictorDc LIBGAV1_CPU_NEON
#define LIBGAV1_Dsp10bpp_TransformSize16x64_IntraPredictorVertical \
LIBGAV1_CPU_NEON
+#define LIBGAV1_Dsp10bpp_TransformSize16x64_IntraPredictorPaeth LIBGAV1_CPU_NEON
// 32x8
#define LIBGAV1_Dsp10bpp_TransformSize32x8_IntraPredictorDcTop LIBGAV1_CPU_NEON
@@ -249,6 +261,7 @@ void IntraPredInit_NEON();
#define LIBGAV1_Dsp10bpp_TransformSize32x8_IntraPredictorDc LIBGAV1_CPU_NEON
#define LIBGAV1_Dsp10bpp_TransformSize32x8_IntraPredictorVertical \
LIBGAV1_CPU_NEON
+#define LIBGAV1_Dsp10bpp_TransformSize32x8_IntraPredictorPaeth LIBGAV1_CPU_NEON
// 32x16
#define LIBGAV1_Dsp10bpp_TransformSize32x16_IntraPredictorDcTop LIBGAV1_CPU_NEON
@@ -257,6 +270,7 @@ void IntraPredInit_NEON();
#define LIBGAV1_Dsp10bpp_TransformSize32x16_IntraPredictorDc LIBGAV1_CPU_NEON
#define LIBGAV1_Dsp10bpp_TransformSize32x16_IntraPredictorVertical \
LIBGAV1_CPU_NEON
+#define LIBGAV1_Dsp10bpp_TransformSize32x16_IntraPredictorPaeth LIBGAV1_CPU_NEON
// 32x32
#define LIBGAV1_Dsp10bpp_TransformSize32x32_IntraPredictorDcTop LIBGAV1_CPU_NEON
@@ -265,6 +279,7 @@ void IntraPredInit_NEON();
#define LIBGAV1_Dsp10bpp_TransformSize32x32_IntraPredictorDc LIBGAV1_CPU_NEON
#define LIBGAV1_Dsp10bpp_TransformSize32x32_IntraPredictorVertical \
LIBGAV1_CPU_NEON
+#define LIBGAV1_Dsp10bpp_TransformSize32x32_IntraPredictorPaeth LIBGAV1_CPU_NEON
// 32x64
#define LIBGAV1_Dsp10bpp_TransformSize32x64_IntraPredictorDcTop LIBGAV1_CPU_NEON
@@ -275,6 +290,7 @@ void IntraPredInit_NEON();
LIBGAV1_CPU_NEON
#define LIBGAV1_Dsp10bpp_TransformSize32x64_IntraPredictorVertical \
LIBGAV1_CPU_NEON
+#define LIBGAV1_Dsp10bpp_TransformSize32x64_IntraPredictorPaeth LIBGAV1_CPU_NEON
// 64x16
#define LIBGAV1_Dsp10bpp_TransformSize64x16_IntraPredictorDcTop LIBGAV1_CPU_NEON
@@ -283,6 +299,7 @@ void IntraPredInit_NEON();
#define LIBGAV1_Dsp10bpp_TransformSize64x16_IntraPredictorDc LIBGAV1_CPU_NEON
#define LIBGAV1_Dsp10bpp_TransformSize64x16_IntraPredictorVertical \
LIBGAV1_CPU_NEON
+#define LIBGAV1_Dsp10bpp_TransformSize64x16_IntraPredictorPaeth LIBGAV1_CPU_NEON
// 64x32
#define LIBGAV1_Dsp10bpp_TransformSize64x32_IntraPredictorDcTop LIBGAV1_CPU_NEON
@@ -291,6 +308,7 @@ void IntraPredInit_NEON();
#define LIBGAV1_Dsp10bpp_TransformSize64x32_IntraPredictorDc LIBGAV1_CPU_NEON
#define LIBGAV1_Dsp10bpp_TransformSize64x32_IntraPredictorVertical \
LIBGAV1_CPU_NEON
+#define LIBGAV1_Dsp10bpp_TransformSize64x32_IntraPredictorPaeth LIBGAV1_CPU_NEON
// 64x64
#define LIBGAV1_Dsp10bpp_TransformSize64x64_IntraPredictorDcTop LIBGAV1_CPU_NEON
@@ -299,6 +317,7 @@ void IntraPredInit_NEON();
#define LIBGAV1_Dsp10bpp_TransformSize64x64_IntraPredictorDc LIBGAV1_CPU_NEON
#define LIBGAV1_Dsp10bpp_TransformSize64x64_IntraPredictorVertical \
LIBGAV1_CPU_NEON
+#define LIBGAV1_Dsp10bpp_TransformSize64x64_IntraPredictorPaeth LIBGAV1_CPU_NEON
#endif // LIBGAV1_ENABLE_NEON
#endif // LIBGAV1_SRC_DSP_ARM_INTRAPRED_NEON_H_
diff --git a/src/dsp/arm/intrapred_smooth_neon.cc b/src/dsp/arm/intrapred_smooth_neon.cc
index c33f333..bcda131 100644
--- a/src/dsp/arm/intrapred_smooth_neon.cc
+++ b/src/dsp/arm/intrapred_smooth_neon.cc
@@ -26,6 +26,7 @@
#include "src/dsp/arm/common_neon.h"
#include "src/dsp/constants.h"
#include "src/dsp/dsp.h"
+#include "src/utils/common.h"
#include "src/utils/constants.h"
namespace libgav1 {
@@ -38,24 +39,9 @@ namespace {
// to have visibility of the values. This helps reduce loads and in the
// creation of the inverse weights.
constexpr uint8_t kSmoothWeights[] = {
- // block dimension = 4
- 255, 149, 85, 64,
- // block dimension = 8
- 255, 197, 146, 105, 73, 50, 37, 32,
- // block dimension = 16
- 255, 225, 196, 170, 145, 123, 102, 84, 68, 54, 43, 33, 26, 20, 17, 16,
- // block dimension = 32
- 255, 240, 225, 210, 196, 182, 169, 157, 145, 133, 122, 111, 101, 92, 83, 74,
- 66, 59, 52, 45, 39, 34, 29, 25, 21, 17, 14, 12, 10, 9, 8, 8,
- // block dimension = 64
- 255, 248, 240, 233, 225, 218, 210, 203, 196, 189, 182, 176, 169, 163, 156,
- 150, 144, 138, 133, 127, 121, 116, 111, 106, 101, 96, 91, 86, 82, 77, 73,
- 69, 65, 61, 57, 54, 50, 47, 44, 41, 38, 35, 32, 29, 27, 25, 22, 20, 18, 16,
- 15, 13, 12, 10, 9, 8, 7, 6, 6, 5, 5, 4, 4, 4};
-
-// TODO(b/150459137): Keeping the intermediate values in uint16_t would allow
-// processing more values at once. At the high end, it could do 4x4 or 8x2 at a
-// time.
+#include "src/dsp/smooth_weights.inc"
+};
+
inline uint16x4_t CalculatePred(const uint16x4_t weighted_top,
const uint16x4_t weighted_left,
const uint16x4_t weighted_bl,
@@ -66,26 +52,74 @@ inline uint16x4_t CalculatePred(const uint16x4_t weighted_top,
return vrshrn_n_u32(pred_2, kSmoothWeightScale + 1);
}
-template <int width, int height>
-inline void Smooth4Or8xN_NEON(void* const dest, ptrdiff_t stride,
- const void* const top_row,
- const void* const left_column) {
- const uint8_t* const top = static_cast<const uint8_t*>(top_row);
- const uint8_t* const left = static_cast<const uint8_t*>(left_column);
+template <int height>
+inline void Smooth4xN_NEON(void* LIBGAV1_RESTRICT const dest, ptrdiff_t stride,
+ const void* LIBGAV1_RESTRICT const top_row,
+ const void* LIBGAV1_RESTRICT const left_column) {
+ constexpr int width = 4;
+ const auto* const top = static_cast<const uint8_t*>(top_row);
+ const auto* const left = static_cast<const uint8_t*>(left_column);
const uint8_t top_right = top[width - 1];
const uint8_t bottom_left = left[height - 1];
const uint8_t* const weights_y = kSmoothWeights + height - 4;
- uint8_t* dst = static_cast<uint8_t*>(dest);
+ auto* dst = static_cast<uint8_t*>(dest);
- uint8x8_t top_v;
- if (width == 4) {
- top_v = Load4(top);
- } else { // width == 8
- top_v = vld1_u8(top);
+ const uint8x8_t top_v = Load4(top);
+ const uint8x8_t top_right_v = vdup_n_u8(top_right);
+ const uint8x8_t bottom_left_v = vdup_n_u8(bottom_left);
+ const uint8x8_t weights_x_v = Load4(kSmoothWeights + width - 4);
+ // 256 - weights = vneg_s8(weights)
+ const uint8x8_t scaled_weights_x =
+ vreinterpret_u8_s8(vneg_s8(vreinterpret_s8_u8(weights_x_v)));
+
+ for (int y = 0; y < height; ++y) {
+ const uint8x8_t left_v = vdup_n_u8(left[y]);
+ const uint8x8_t weights_y_v = vdup_n_u8(weights_y[y]);
+ const uint8x8_t scaled_weights_y =
+ vreinterpret_u8_s8(vneg_s8(vreinterpret_s8_u8(weights_y_v)));
+ const uint16x4_t weighted_bl =
+ vget_low_u16(vmull_u8(scaled_weights_y, bottom_left_v));
+
+ const uint16x4_t weighted_top = vget_low_u16(vmull_u8(weights_y_v, top_v));
+ const uint16x4_t weighted_left =
+ vget_low_u16(vmull_u8(weights_x_v, left_v));
+ const uint16x4_t weighted_tr =
+ vget_low_u16(vmull_u8(scaled_weights_x, top_right_v));
+ const uint16x4_t result =
+ CalculatePred(weighted_top, weighted_left, weighted_bl, weighted_tr);
+
+ StoreLo4(dst, vmovn_u16(vcombine_u16(result, result)));
+ dst += stride;
}
+}
+
+inline uint8x8_t CalculatePred(const uint16x8_t weighted_top,
+ const uint16x8_t weighted_left,
+ const uint16x8_t weighted_bl,
+ const uint16x8_t weighted_tr) {
+ // Maximum value: 0xFF00
+ const uint16x8_t pred_0 = vaddq_u16(weighted_top, weighted_bl);
+ // Maximum value: 0xFF00
+ const uint16x8_t pred_1 = vaddq_u16(weighted_left, weighted_tr);
+ const uint16x8_t pred_2 = vhaddq_u16(pred_0, pred_1);
+ return vrshrn_n_u16(pred_2, kSmoothWeightScale);
+}
+
+template <int height>
+inline void Smooth8xN_NEON(void* LIBGAV1_RESTRICT const dest, ptrdiff_t stride,
+ const void* LIBGAV1_RESTRICT const top_row,
+ const void* LIBGAV1_RESTRICT const left_column) {
+ constexpr int width = 8;
+ const auto* const top = static_cast<const uint8_t*>(top_row);
+ const auto* const left = static_cast<const uint8_t*>(left_column);
+ const uint8_t top_right = top[width - 1];
+ const uint8_t bottom_left = left[height - 1];
+ const uint8_t* const weights_y = kSmoothWeights + height - 4;
+ auto* dst = static_cast<uint8_t*>(dest);
+
+ const uint8x8_t top_v = vld1_u8(top);
const uint8x8_t top_right_v = vdup_n_u8(top_right);
const uint8x8_t bottom_left_v = vdup_n_u8(bottom_left);
- // Over-reads for 4xN but still within the array.
const uint8x8_t weights_x_v = vld1_u8(kSmoothWeights + width - 4);
// 256 - weights = vneg_s8(weights)
const uint8x8_t scaled_weights_x =
@@ -100,18 +134,10 @@ inline void Smooth4Or8xN_NEON(void* const dest, ptrdiff_t stride,
const uint16x8_t weighted_top = vmull_u8(weights_y_v, top_v);
const uint16x8_t weighted_left = vmull_u8(weights_x_v, left_v);
const uint16x8_t weighted_tr = vmull_u8(scaled_weights_x, top_right_v);
- const uint16x4_t dest_0 =
- CalculatePred(vget_low_u16(weighted_top), vget_low_u16(weighted_left),
- vget_low_u16(weighted_tr), vget_low_u16(weighted_bl));
+ const uint8x8_t result =
+ CalculatePred(weighted_top, weighted_left, weighted_bl, weighted_tr);
- if (width == 4) {
- StoreLo4(dst, vmovn_u16(vcombine_u16(dest_0, dest_0)));
- } else { // width == 8
- const uint16x4_t dest_1 = CalculatePred(
- vget_high_u16(weighted_top), vget_high_u16(weighted_left),
- vget_high_u16(weighted_tr), vget_high_u16(weighted_bl));
- vst1_u8(dst, vmovn_u16(vcombine_u16(dest_0, dest_1)));
- }
+ vst1_u8(dst, result);
dst += stride;
}
}
@@ -124,39 +150,30 @@ inline uint8x16_t CalculateWeightsAndPred(
const uint16x8_t weighted_left_low = vmull_u8(vget_low_u8(weights_x), left);
const uint16x8_t weighted_tr_low =
vmull_u8(vget_low_u8(scaled_weights_x), top_right);
- const uint16x4_t dest_0 = CalculatePred(
- vget_low_u16(weighted_top_low), vget_low_u16(weighted_left_low),
- vget_low_u16(weighted_tr_low), vget_low_u16(weighted_bl));
- const uint16x4_t dest_1 = CalculatePred(
- vget_high_u16(weighted_top_low), vget_high_u16(weighted_left_low),
- vget_high_u16(weighted_tr_low), vget_high_u16(weighted_bl));
- const uint8x8_t dest_0_u8 = vmovn_u16(vcombine_u16(dest_0, dest_1));
+ const uint8x8_t result_low = CalculatePred(
+ weighted_top_low, weighted_left_low, weighted_bl, weighted_tr_low);
const uint16x8_t weighted_top_high = vmull_u8(weights_y, vget_high_u8(top));
const uint16x8_t weighted_left_high = vmull_u8(vget_high_u8(weights_x), left);
const uint16x8_t weighted_tr_high =
vmull_u8(vget_high_u8(scaled_weights_x), top_right);
- const uint16x4_t dest_2 = CalculatePred(
- vget_low_u16(weighted_top_high), vget_low_u16(weighted_left_high),
- vget_low_u16(weighted_tr_high), vget_low_u16(weighted_bl));
- const uint16x4_t dest_3 = CalculatePred(
- vget_high_u16(weighted_top_high), vget_high_u16(weighted_left_high),
- vget_high_u16(weighted_tr_high), vget_high_u16(weighted_bl));
- const uint8x8_t dest_1_u8 = vmovn_u16(vcombine_u16(dest_2, dest_3));
-
- return vcombine_u8(dest_0_u8, dest_1_u8);
+ const uint8x8_t result_high = CalculatePred(
+ weighted_top_high, weighted_left_high, weighted_bl, weighted_tr_high);
+
+ return vcombine_u8(result_low, result_high);
}
template <int width, int height>
-inline void Smooth16PlusxN_NEON(void* const dest, ptrdiff_t stride,
- const void* const top_row,
- const void* const left_column) {
- const uint8_t* const top = static_cast<const uint8_t*>(top_row);
- const uint8_t* const left = static_cast<const uint8_t*>(left_column);
+inline void Smooth16PlusxN_NEON(
+ void* LIBGAV1_RESTRICT const dest, ptrdiff_t stride,
+ const void* LIBGAV1_RESTRICT const top_row,
+ const void* LIBGAV1_RESTRICT const left_column) {
+ const auto* const top = static_cast<const uint8_t*>(top_row);
+ const auto* const left = static_cast<const uint8_t*>(left_column);
const uint8_t top_right = top[width - 1];
const uint8_t bottom_left = left[height - 1];
const uint8_t* const weights_y = kSmoothWeights + height - 4;
- uint8_t* dst = static_cast<uint8_t*>(dest);
+ auto* dst = static_cast<uint8_t*>(dest);
uint8x16_t top_v[4];
top_v[0] = vld1q_u8(top);
@@ -229,14 +246,15 @@ inline void Smooth16PlusxN_NEON(void* const dest, ptrdiff_t stride,
}
template <int width, int height>
-inline void SmoothVertical4Or8xN_NEON(void* const dest, ptrdiff_t stride,
- const void* const top_row,
- const void* const left_column) {
- const uint8_t* const top = static_cast<const uint8_t*>(top_row);
- const uint8_t* const left = static_cast<const uint8_t*>(left_column);
+inline void SmoothVertical4Or8xN_NEON(
+ void* LIBGAV1_RESTRICT const dest, ptrdiff_t stride,
+ const void* LIBGAV1_RESTRICT const top_row,
+ const void* LIBGAV1_RESTRICT const left_column) {
+ const auto* const top = static_cast<const uint8_t*>(top_row);
+ const auto* const left = static_cast<const uint8_t*>(left_column);
const uint8_t bottom_left = left[height - 1];
const uint8_t* const weights_y = kSmoothWeights + height - 4;
- uint8_t* dst = static_cast<uint8_t*>(dest);
+ auto* dst = static_cast<uint8_t*>(dest);
uint8x8_t top_v;
if (width == 4) {
@@ -279,14 +297,15 @@ inline uint8x16_t CalculateVerticalWeightsAndPred(
}
template <int width, int height>
-inline void SmoothVertical16PlusxN_NEON(void* const dest, ptrdiff_t stride,
- const void* const top_row,
- const void* const left_column) {
- const uint8_t* const top = static_cast<const uint8_t*>(top_row);
- const uint8_t* const left = static_cast<const uint8_t*>(left_column);
+inline void SmoothVertical16PlusxN_NEON(
+ void* LIBGAV1_RESTRICT const dest, ptrdiff_t stride,
+ const void* LIBGAV1_RESTRICT const top_row,
+ const void* LIBGAV1_RESTRICT const left_column) {
+ const auto* const top = static_cast<const uint8_t*>(top_row);
+ const auto* const left = static_cast<const uint8_t*>(left_column);
const uint8_t bottom_left = left[height - 1];
const uint8_t* const weights_y = kSmoothWeights + height - 4;
- uint8_t* dst = static_cast<uint8_t*>(dest);
+ auto* dst = static_cast<uint8_t*>(dest);
uint8x16_t top_v[4];
top_v[0] = vld1q_u8(top);
@@ -330,13 +349,14 @@ inline void SmoothVertical16PlusxN_NEON(void* const dest, ptrdiff_t stride,
}
template <int width, int height>
-inline void SmoothHorizontal4Or8xN_NEON(void* const dest, ptrdiff_t stride,
- const void* const top_row,
- const void* const left_column) {
- const uint8_t* const top = static_cast<const uint8_t*>(top_row);
- const uint8_t* const left = static_cast<const uint8_t*>(left_column);
+inline void SmoothHorizontal4Or8xN_NEON(
+ void* LIBGAV1_RESTRICT const dest, ptrdiff_t stride,
+ const void* LIBGAV1_RESTRICT const top_row,
+ const void* LIBGAV1_RESTRICT const left_column) {
+ const auto* const top = static_cast<const uint8_t*>(top_row);
+ const auto* const left = static_cast<const uint8_t*>(left_column);
const uint8_t top_right = top[width - 1];
- uint8_t* dst = static_cast<uint8_t*>(dest);
+ auto* dst = static_cast<uint8_t*>(dest);
const uint8x8_t top_right_v = vdup_n_u8(top_right);
// Over-reads for 4xN but still within the array.
@@ -382,13 +402,14 @@ inline uint8x16_t CalculateHorizontalWeightsAndPred(
}
template <int width, int height>
-inline void SmoothHorizontal16PlusxN_NEON(void* const dest, ptrdiff_t stride,
- const void* const top_row,
- const void* const left_column) {
- const uint8_t* const top = static_cast<const uint8_t*>(top_row);
- const uint8_t* const left = static_cast<const uint8_t*>(left_column);
+inline void SmoothHorizontal16PlusxN_NEON(
+ void* LIBGAV1_RESTRICT const dest, ptrdiff_t stride,
+ const void* LIBGAV1_RESTRICT const top_row,
+ const void* LIBGAV1_RESTRICT const left_column) {
+ const auto* const top = static_cast<const uint8_t*>(top_row);
+ const auto* const left = static_cast<const uint8_t*>(left_column);
const uint8_t top_right = top[width - 1];
- uint8_t* dst = static_cast<uint8_t*>(dest);
+ auto* dst = static_cast<uint8_t*>(dest);
const uint8x8_t top_right_v = vdup_n_u8(top_right);
@@ -447,7 +468,7 @@ void Init8bpp() {
assert(dsp != nullptr);
// 4x4
dsp->intra_predictors[kTransformSize4x4][kIntraPredictorSmooth] =
- Smooth4Or8xN_NEON<4, 4>;
+ Smooth4xN_NEON<4>;
dsp->intra_predictors[kTransformSize4x4][kIntraPredictorSmoothVertical] =
SmoothVertical4Or8xN_NEON<4, 4>;
dsp->intra_predictors[kTransformSize4x4][kIntraPredictorSmoothHorizontal] =
@@ -455,7 +476,7 @@ void Init8bpp() {
// 4x8
dsp->intra_predictors[kTransformSize4x8][kIntraPredictorSmooth] =
- Smooth4Or8xN_NEON<4, 8>;
+ Smooth4xN_NEON<8>;
dsp->intra_predictors[kTransformSize4x8][kIntraPredictorSmoothVertical] =
SmoothVertical4Or8xN_NEON<4, 8>;
dsp->intra_predictors[kTransformSize4x8][kIntraPredictorSmoothHorizontal] =
@@ -463,7 +484,7 @@ void Init8bpp() {
// 4x16
dsp->intra_predictors[kTransformSize4x16][kIntraPredictorSmooth] =
- Smooth4Or8xN_NEON<4, 16>;
+ Smooth4xN_NEON<16>;
dsp->intra_predictors[kTransformSize4x16][kIntraPredictorSmoothVertical] =
SmoothVertical4Or8xN_NEON<4, 16>;
dsp->intra_predictors[kTransformSize4x16][kIntraPredictorSmoothHorizontal] =
@@ -471,7 +492,7 @@ void Init8bpp() {
// 8x4
dsp->intra_predictors[kTransformSize8x4][kIntraPredictorSmooth] =
- Smooth4Or8xN_NEON<8, 4>;
+ Smooth8xN_NEON<4>;
dsp->intra_predictors[kTransformSize8x4][kIntraPredictorSmoothVertical] =
SmoothVertical4Or8xN_NEON<8, 4>;
dsp->intra_predictors[kTransformSize8x4][kIntraPredictorSmoothHorizontal] =
@@ -479,7 +500,7 @@ void Init8bpp() {
// 8x8
dsp->intra_predictors[kTransformSize8x8][kIntraPredictorSmooth] =
- Smooth4Or8xN_NEON<8, 8>;
+ Smooth8xN_NEON<8>;
dsp->intra_predictors[kTransformSize8x8][kIntraPredictorSmoothVertical] =
SmoothVertical4Or8xN_NEON<8, 8>;
dsp->intra_predictors[kTransformSize8x8][kIntraPredictorSmoothHorizontal] =
@@ -487,7 +508,7 @@ void Init8bpp() {
// 8x16
dsp->intra_predictors[kTransformSize8x16][kIntraPredictorSmooth] =
- Smooth4Or8xN_NEON<8, 16>;
+ Smooth8xN_NEON<16>;
dsp->intra_predictors[kTransformSize8x16][kIntraPredictorSmoothVertical] =
SmoothVertical4Or8xN_NEON<8, 16>;
dsp->intra_predictors[kTransformSize8x16][kIntraPredictorSmoothHorizontal] =
@@ -495,7 +516,7 @@ void Init8bpp() {
// 8x32
dsp->intra_predictors[kTransformSize8x32][kIntraPredictorSmooth] =
- Smooth4Or8xN_NEON<8, 32>;
+ Smooth8xN_NEON<32>;
dsp->intra_predictors[kTransformSize8x32][kIntraPredictorSmoothVertical] =
SmoothVertical4Or8xN_NEON<8, 32>;
dsp->intra_predictors[kTransformSize8x32][kIntraPredictorSmoothHorizontal] =
@@ -601,7 +622,535 @@ void Init8bpp() {
} // namespace
} // namespace low_bitdepth
-void IntraPredSmoothInit_NEON() { low_bitdepth::Init8bpp(); }
+#if LIBGAV1_MAX_BITDEPTH >= 10
+namespace high_bitdepth {
+namespace {
+
+// Note these constants are duplicated from intrapred.cc to allow the compiler
+// to have visibility of the values. This helps reduce loads and in the
+// creation of the inverse weights.
+constexpr uint16_t kSmoothWeights[] = {
+#include "src/dsp/smooth_weights.inc"
+};
+
+template <int height>
+inline void Smooth4xH_NEON(void* LIBGAV1_RESTRICT const dest, ptrdiff_t stride,
+ const void* LIBGAV1_RESTRICT const top_row,
+ const void* LIBGAV1_RESTRICT const left_column) {
+ const auto* const top = static_cast<const uint16_t*>(top_row);
+ const auto* const left = static_cast<const uint16_t*>(left_column);
+ const uint16_t top_right = top[3];
+ const uint16_t bottom_left = left[height - 1];
+ const uint16_t* const weights_y = kSmoothWeights + height - 4;
+ auto* dst = static_cast<uint8_t*>(dest);
+
+ const uint16x4_t top_v = vld1_u16(top);
+ const uint16x4_t bottom_left_v = vdup_n_u16(bottom_left);
+ const uint16x4_t weights_x_v = vld1_u16(kSmoothWeights);
+ const uint16x4_t scaled_weights_x = vsub_u16(vdup_n_u16(256), weights_x_v);
+
+ // Weighted top right doesn't change with each row.
+ const uint32x4_t weighted_tr = vmull_n_u16(scaled_weights_x, top_right);
+
+ for (int y = 0; y < height; ++y) {
+ // Each variable in the running summation is named for the last item to be
+ // accumulated.
+ const uint32x4_t weighted_top =
+ vmlal_n_u16(weighted_tr, top_v, weights_y[y]);
+ const uint32x4_t weighted_left =
+ vmlal_n_u16(weighted_top, weights_x_v, left[y]);
+ const uint32x4_t weighted_bl =
+ vmlal_n_u16(weighted_left, bottom_left_v, 256 - weights_y[y]);
+
+ const uint16x4_t pred = vrshrn_n_u32(weighted_bl, kSmoothWeightScale + 1);
+ vst1_u16(reinterpret_cast<uint16_t*>(dst), pred);
+ dst += stride;
+ }
+}
+
+// Common code between 8xH and [16|32|64]xH.
+inline void CalculatePred8(uint16_t* LIBGAV1_RESTRICT dst,
+ const uint32x4_t& weighted_corners_low,
+ const uint32x4_t& weighted_corners_high,
+ const uint16x4x2_t& top_vals,
+ const uint16x4x2_t& weights_x, const uint16_t left_y,
+ const uint16_t weight_y) {
+ // Each variable in the running summation is named for the last item to be
+ // accumulated.
+ const uint32x4_t weighted_top_low =
+ vmlal_n_u16(weighted_corners_low, top_vals.val[0], weight_y);
+ const uint32x4_t weighted_edges_low =
+ vmlal_n_u16(weighted_top_low, weights_x.val[0], left_y);
+
+ const uint16x4_t pred_low =
+ vrshrn_n_u32(weighted_edges_low, kSmoothWeightScale + 1);
+ vst1_u16(dst, pred_low);
+
+ const uint32x4_t weighted_top_high =
+ vmlal_n_u16(weighted_corners_high, top_vals.val[1], weight_y);
+ const uint32x4_t weighted_edges_high =
+ vmlal_n_u16(weighted_top_high, weights_x.val[1], left_y);
+
+ const uint16x4_t pred_high =
+ vrshrn_n_u32(weighted_edges_high, kSmoothWeightScale + 1);
+ vst1_u16(dst + 4, pred_high);
+}
+
+template <int height>
+inline void Smooth8xH_NEON(void* LIBGAV1_RESTRICT const dest, ptrdiff_t stride,
+ const void* LIBGAV1_RESTRICT const top_row,
+ const void* LIBGAV1_RESTRICT const left_column) {
+ const auto* const top = static_cast<const uint16_t*>(top_row);
+ const auto* const left = static_cast<const uint16_t*>(left_column);
+ const uint16_t top_right = top[7];
+ const uint16_t bottom_left = left[height - 1];
+ const uint16_t* const weights_y = kSmoothWeights + height - 4;
+
+ auto* dst = static_cast<uint8_t*>(dest);
+
+ const uint16x4x2_t top_vals = {vld1_u16(top), vld1_u16(top + 4)};
+ const uint16x4_t bottom_left_v = vdup_n_u16(bottom_left);
+ const uint16x4x2_t weights_x = {vld1_u16(kSmoothWeights + 4),
+ vld1_u16(kSmoothWeights + 8)};
+ // Weighted top right doesn't change with each row.
+ const uint32x4_t weighted_tr_low =
+ vmull_n_u16(vsub_u16(vdup_n_u16(256), weights_x.val[0]), top_right);
+ const uint32x4_t weighted_tr_high =
+ vmull_n_u16(vsub_u16(vdup_n_u16(256), weights_x.val[1]), top_right);
+
+ for (int y = 0; y < height; ++y) {
+ // |weighted_bl| is invariant across the row.
+ const uint32x4_t weighted_bl =
+ vmull_n_u16(bottom_left_v, 256 - weights_y[y]);
+ const uint32x4_t weighted_corners_low =
+ vaddq_u32(weighted_bl, weighted_tr_low);
+ const uint32x4_t weighted_corners_high =
+ vaddq_u32(weighted_bl, weighted_tr_high);
+ CalculatePred8(reinterpret_cast<uint16_t*>(dst), weighted_corners_low,
+ weighted_corners_high, top_vals, weights_x, left[y],
+ weights_y[y]);
+ dst += stride;
+ }
+}
+
+// For width 16 and above.
+template <int width, int height>
+inline void SmoothWxH_NEON(void* LIBGAV1_RESTRICT const dest, ptrdiff_t stride,
+ const void* LIBGAV1_RESTRICT const top_row,
+ const void* LIBGAV1_RESTRICT const left_column) {
+ const auto* const top = static_cast<const uint16_t*>(top_row);
+ const auto* const left = static_cast<const uint16_t*>(left_column);
+ const uint16_t top_right = top[width - 1];
+ const uint16_t bottom_left = left[height - 1];
+ const uint16_t* const weights_y = kSmoothWeights + height - 4;
+
+ auto* dst = static_cast<uint8_t*>(dest);
+
+ const uint16x4_t weight_scaling = vdup_n_u16(256);
+ // Precompute weighted values that don't vary with |y|.
+ uint32x4_t weighted_tr_low[width >> 3];
+ uint32x4_t weighted_tr_high[width >> 3];
+ for (int i = 0; i < width >> 3; ++i) {
+ const int x = i << 3;
+ const uint16x4_t weights_x_low = vld1_u16(kSmoothWeights + width - 4 + x);
+ weighted_tr_low[i] =
+ vmull_n_u16(vsub_u16(weight_scaling, weights_x_low), top_right);
+ const uint16x4_t weights_x_high = vld1_u16(kSmoothWeights + width + x);
+ weighted_tr_high[i] =
+ vmull_n_u16(vsub_u16(weight_scaling, weights_x_high), top_right);
+ }
+
+ const uint16x4_t bottom_left_v = vdup_n_u16(bottom_left);
+ for (int y = 0; y < height; ++y) {
+ // |weighted_bl| is invariant across the row.
+ const uint32x4_t weighted_bl =
+ vmull_n_u16(bottom_left_v, 256 - weights_y[y]);
+ auto* dst_x = reinterpret_cast<uint16_t*>(dst);
+ for (int i = 0; i < width >> 3; ++i) {
+ const int x = i << 3;
+ const uint16x4x2_t top_vals = {vld1_u16(top + x), vld1_u16(top + x + 4)};
+ const uint32x4_t weighted_corners_low =
+ vaddq_u32(weighted_bl, weighted_tr_low[i]);
+ const uint32x4_t weighted_corners_high =
+ vaddq_u32(weighted_bl, weighted_tr_high[i]);
+ // Accumulate weighted edge values and store.
+ const uint16x4x2_t weights_x = {vld1_u16(kSmoothWeights + width - 4 + x),
+ vld1_u16(kSmoothWeights + width + x)};
+ CalculatePred8(dst_x, weighted_corners_low, weighted_corners_high,
+ top_vals, weights_x, left[y], weights_y[y]);
+ dst_x += 8;
+ }
+ dst += stride;
+ }
+}
+
+template <int height>
+inline void SmoothVertical4xH_NEON(
+ void* LIBGAV1_RESTRICT const dest, ptrdiff_t stride,
+ const void* LIBGAV1_RESTRICT const top_row,
+ const void* LIBGAV1_RESTRICT const left_column) {
+ const auto* const top = static_cast<const uint16_t*>(top_row);
+ const auto* const left = static_cast<const uint16_t*>(left_column);
+ const uint16_t bottom_left = left[height - 1];
+ const uint16_t* const weights_y = kSmoothWeights + height - 4;
+
+ auto* dst = static_cast<uint8_t*>(dest);
+
+ const uint16x4_t top_v = vld1_u16(top);
+ const uint16x4_t bottom_left_v = vdup_n_u16(bottom_left);
+
+ for (int y = 0; y < height; ++y) {
+ auto* dst16 = reinterpret_cast<uint16_t*>(dst);
+ const uint32x4_t weighted_bl =
+ vmull_n_u16(bottom_left_v, 256 - weights_y[y]);
+ const uint32x4_t weighted_top =
+ vmlal_n_u16(weighted_bl, top_v, weights_y[y]);
+ vst1_u16(dst16, vrshrn_n_u32(weighted_top, kSmoothWeightScale));
+
+ dst += stride;
+ }
+}
+
+template <int height>
+inline void SmoothVertical8xH_NEON(
+ void* LIBGAV1_RESTRICT const dest, const ptrdiff_t stride,
+ const void* LIBGAV1_RESTRICT const top_row,
+ const void* LIBGAV1_RESTRICT const left_column) {
+ const auto* const top = static_cast<const uint16_t*>(top_row);
+ const auto* const left = static_cast<const uint16_t*>(left_column);
+ const uint16_t bottom_left = left[height - 1];
+ const uint16_t* const weights_y = kSmoothWeights + height - 4;
+
+ auto* dst = static_cast<uint8_t*>(dest);
+
+ const uint16x4_t top_low = vld1_u16(top);
+ const uint16x4_t top_high = vld1_u16(top + 4);
+ const uint16x4_t bottom_left_v = vdup_n_u16(bottom_left);
+
+ for (int y = 0; y < height; ++y) {
+ auto* dst16 = reinterpret_cast<uint16_t*>(dst);
+ // |weighted_bl| is invariant across the row.
+ const uint32x4_t weighted_bl =
+ vmull_n_u16(bottom_left_v, 256 - weights_y[y]);
+
+ const uint32x4_t weighted_top_low =
+ vmlal_n_u16(weighted_bl, top_low, weights_y[y]);
+ vst1_u16(dst16, vrshrn_n_u32(weighted_top_low, kSmoothWeightScale));
+
+ const uint32x4_t weighted_top_high =
+ vmlal_n_u16(weighted_bl, top_high, weights_y[y]);
+ vst1_u16(dst16 + 4, vrshrn_n_u32(weighted_top_high, kSmoothWeightScale));
+ dst += stride;
+ }
+}
+
+// For width 16 and above.
+template <int width, int height>
+inline void SmoothVerticalWxH_NEON(
+ void* LIBGAV1_RESTRICT const dest, const ptrdiff_t stride,
+ const void* LIBGAV1_RESTRICT const top_row,
+ const void* LIBGAV1_RESTRICT const left_column) {
+ const auto* const top = static_cast<const uint16_t*>(top_row);
+ const auto* const left = static_cast<const uint16_t*>(left_column);
+ const uint16_t bottom_left = left[height - 1];
+ const uint16_t* const weights_y = kSmoothWeights + height - 4;
+
+ auto* dst = static_cast<uint8_t*>(dest);
+
+ uint16x4x2_t top_vals[width >> 3];
+ for (int i = 0; i < width >> 3; ++i) {
+ const int x = i << 3;
+ top_vals[i] = {vld1_u16(top + x), vld1_u16(top + x + 4)};
+ }
+
+ const uint16x4_t bottom_left_v = vdup_n_u16(bottom_left);
+ for (int y = 0; y < height; ++y) {
+ // |weighted_bl| is invariant across the row.
+ const uint32x4_t weighted_bl =
+ vmull_n_u16(bottom_left_v, 256 - weights_y[y]);
+
+ auto* dst_x = reinterpret_cast<uint16_t*>(dst);
+ for (int i = 0; i < width >> 3; ++i) {
+ const uint32x4_t weighted_top_low =
+ vmlal_n_u16(weighted_bl, top_vals[i].val[0], weights_y[y]);
+ vst1_u16(dst_x, vrshrn_n_u32(weighted_top_low, kSmoothWeightScale));
+
+ const uint32x4_t weighted_top_high =
+ vmlal_n_u16(weighted_bl, top_vals[i].val[1], weights_y[y]);
+ vst1_u16(dst_x + 4, vrshrn_n_u32(weighted_top_high, kSmoothWeightScale));
+ dst_x += 8;
+ }
+ dst += stride;
+ }
+}
+
+template <int height>
+inline void SmoothHorizontal4xH_NEON(
+ void* LIBGAV1_RESTRICT const dest, ptrdiff_t stride,
+ const void* LIBGAV1_RESTRICT const top_row,
+ const void* LIBGAV1_RESTRICT const left_column) {
+ const auto* const top = static_cast<const uint16_t*>(top_row);
+ const auto* const left = static_cast<const uint16_t*>(left_column);
+ const uint16_t top_right = top[3];
+
+ auto* dst = static_cast<uint8_t*>(dest);
+
+ const uint16x4_t weights_x = vld1_u16(kSmoothWeights);
+ const uint16x4_t scaled_weights_x = vsub_u16(vdup_n_u16(256), weights_x);
+
+ const uint32x4_t weighted_tr = vmull_n_u16(scaled_weights_x, top_right);
+ for (int y = 0; y < height; ++y) {
+ auto* dst16 = reinterpret_cast<uint16_t*>(dst);
+ const uint32x4_t weighted_left =
+ vmlal_n_u16(weighted_tr, weights_x, left[y]);
+ vst1_u16(dst16, vrshrn_n_u32(weighted_left, kSmoothWeightScale));
+ dst += stride;
+ }
+}
+
+template <int height>
+inline void SmoothHorizontal8xH_NEON(
+ void* LIBGAV1_RESTRICT const dest, ptrdiff_t stride,
+ const void* LIBGAV1_RESTRICT const top_row,
+ const void* LIBGAV1_RESTRICT const left_column) {
+ const auto* const top = static_cast<const uint16_t*>(top_row);
+ const auto* const left = static_cast<const uint16_t*>(left_column);
+ const uint16_t top_right = top[7];
+
+ auto* dst = static_cast<uint8_t*>(dest);
+
+ const uint16x4x2_t weights_x = {vld1_u16(kSmoothWeights + 4),
+ vld1_u16(kSmoothWeights + 8)};
+
+ const uint32x4_t weighted_tr_low =
+ vmull_n_u16(vsub_u16(vdup_n_u16(256), weights_x.val[0]), top_right);
+ const uint32x4_t weighted_tr_high =
+ vmull_n_u16(vsub_u16(vdup_n_u16(256), weights_x.val[1]), top_right);
+
+ for (int y = 0; y < height; ++y) {
+ auto* dst16 = reinterpret_cast<uint16_t*>(dst);
+ const uint16_t left_y = left[y];
+ const uint32x4_t weighted_left_low =
+ vmlal_n_u16(weighted_tr_low, weights_x.val[0], left_y);
+ vst1_u16(dst16, vrshrn_n_u32(weighted_left_low, kSmoothWeightScale));
+
+ const uint32x4_t weighted_left_high =
+ vmlal_n_u16(weighted_tr_high, weights_x.val[1], left_y);
+ vst1_u16(dst16 + 4, vrshrn_n_u32(weighted_left_high, kSmoothWeightScale));
+ dst += stride;
+ }
+}
+
+// For width 16 and above.
+template <int width, int height>
+inline void SmoothHorizontalWxH_NEON(
+ void* LIBGAV1_RESTRICT const dest, ptrdiff_t stride,
+ const void* LIBGAV1_RESTRICT const top_row,
+ const void* LIBGAV1_RESTRICT const left_column) {
+ const auto* const top = static_cast<const uint16_t*>(top_row);
+ const auto* const left = static_cast<const uint16_t*>(left_column);
+ const uint16_t top_right = top[width - 1];
+
+ auto* dst = static_cast<uint8_t*>(dest);
+
+ const uint16x4_t weight_scaling = vdup_n_u16(256);
+
+ uint16x4_t weights_x_low[width >> 3];
+ uint16x4_t weights_x_high[width >> 3];
+ uint32x4_t weighted_tr_low[width >> 3];
+ uint32x4_t weighted_tr_high[width >> 3];
+ for (int i = 0; i < width >> 3; ++i) {
+ const int x = i << 3;
+ weights_x_low[i] = vld1_u16(kSmoothWeights + width - 4 + x);
+ weighted_tr_low[i] =
+ vmull_n_u16(vsub_u16(weight_scaling, weights_x_low[i]), top_right);
+ weights_x_high[i] = vld1_u16(kSmoothWeights + width + x);
+ weighted_tr_high[i] =
+ vmull_n_u16(vsub_u16(weight_scaling, weights_x_high[i]), top_right);
+ }
+
+ for (int y = 0; y < height; ++y) {
+ auto* dst_x = reinterpret_cast<uint16_t*>(dst);
+ const uint16_t left_y = left[y];
+ for (int i = 0; i < width >> 3; ++i) {
+ const uint32x4_t weighted_left_low =
+ vmlal_n_u16(weighted_tr_low[i], weights_x_low[i], left_y);
+ vst1_u16(dst_x, vrshrn_n_u32(weighted_left_low, kSmoothWeightScale));
+
+ const uint32x4_t weighted_left_high =
+ vmlal_n_u16(weighted_tr_high[i], weights_x_high[i], left_y);
+ vst1_u16(dst_x + 4, vrshrn_n_u32(weighted_left_high, kSmoothWeightScale));
+ dst_x += 8;
+ }
+ dst += stride;
+ }
+}
+
+void Init10bpp() {
+ Dsp* const dsp = dsp_internal::GetWritableDspTable(kBitdepth10);
+ assert(dsp != nullptr);
+ // 4x4
+ dsp->intra_predictors[kTransformSize4x4][kIntraPredictorSmooth] =
+ Smooth4xH_NEON<4>;
+ dsp->intra_predictors[kTransformSize4x4][kIntraPredictorSmoothVertical] =
+ SmoothVertical4xH_NEON<4>;
+ dsp->intra_predictors[kTransformSize4x4][kIntraPredictorSmoothHorizontal] =
+ SmoothHorizontal4xH_NEON<4>;
+
+ // 4x8
+ dsp->intra_predictors[kTransformSize4x8][kIntraPredictorSmooth] =
+ Smooth4xH_NEON<8>;
+ dsp->intra_predictors[kTransformSize4x8][kIntraPredictorSmoothVertical] =
+ SmoothVertical4xH_NEON<8>;
+ dsp->intra_predictors[kTransformSize4x8][kIntraPredictorSmoothHorizontal] =
+ SmoothHorizontal4xH_NEON<8>;
+
+ // 4x16
+ dsp->intra_predictors[kTransformSize4x16][kIntraPredictorSmooth] =
+ Smooth4xH_NEON<16>;
+ dsp->intra_predictors[kTransformSize4x16][kIntraPredictorSmoothVertical] =
+ SmoothVertical4xH_NEON<16>;
+ dsp->intra_predictors[kTransformSize4x16][kIntraPredictorSmoothHorizontal] =
+ SmoothHorizontal4xH_NEON<16>;
+
+ // 8x4
+ dsp->intra_predictors[kTransformSize8x4][kIntraPredictorSmooth] =
+ Smooth8xH_NEON<4>;
+ dsp->intra_predictors[kTransformSize8x4][kIntraPredictorSmoothVertical] =
+ SmoothVertical8xH_NEON<4>;
+ dsp->intra_predictors[kTransformSize8x4][kIntraPredictorSmoothHorizontal] =
+ SmoothHorizontal8xH_NEON<4>;
+
+ // 8x8
+ dsp->intra_predictors[kTransformSize8x8][kIntraPredictorSmooth] =
+ Smooth8xH_NEON<8>;
+ dsp->intra_predictors[kTransformSize8x8][kIntraPredictorSmoothVertical] =
+ SmoothVertical8xH_NEON<8>;
+ dsp->intra_predictors[kTransformSize8x8][kIntraPredictorSmoothHorizontal] =
+ SmoothHorizontal8xH_NEON<8>;
+
+ // 8x16
+ dsp->intra_predictors[kTransformSize8x16][kIntraPredictorSmooth] =
+ Smooth8xH_NEON<16>;
+ dsp->intra_predictors[kTransformSize8x16][kIntraPredictorSmoothVertical] =
+ SmoothVertical8xH_NEON<16>;
+ dsp->intra_predictors[kTransformSize8x16][kIntraPredictorSmoothHorizontal] =
+ SmoothHorizontal8xH_NEON<16>;
+
+ // 8x32
+ dsp->intra_predictors[kTransformSize8x32][kIntraPredictorSmooth] =
+ Smooth8xH_NEON<32>;
+ dsp->intra_predictors[kTransformSize8x32][kIntraPredictorSmoothVertical] =
+ SmoothVertical8xH_NEON<32>;
+ dsp->intra_predictors[kTransformSize8x32][kIntraPredictorSmoothHorizontal] =
+ SmoothHorizontal8xH_NEON<32>;
+
+ // 16x4
+ dsp->intra_predictors[kTransformSize16x4][kIntraPredictorSmooth] =
+ SmoothWxH_NEON<16, 4>;
+ dsp->intra_predictors[kTransformSize16x4][kIntraPredictorSmoothVertical] =
+ SmoothVerticalWxH_NEON<16, 4>;
+ dsp->intra_predictors[kTransformSize16x4][kIntraPredictorSmoothHorizontal] =
+ SmoothHorizontalWxH_NEON<16, 4>;
+
+ // 16x8
+ dsp->intra_predictors[kTransformSize16x8][kIntraPredictorSmooth] =
+ SmoothWxH_NEON<16, 8>;
+ dsp->intra_predictors[kTransformSize16x8][kIntraPredictorSmoothVertical] =
+ SmoothVerticalWxH_NEON<16, 8>;
+ dsp->intra_predictors[kTransformSize16x8][kIntraPredictorSmoothHorizontal] =
+ SmoothHorizontalWxH_NEON<16, 8>;
+
+ // 16x16
+ dsp->intra_predictors[kTransformSize16x16][kIntraPredictorSmooth] =
+ SmoothWxH_NEON<16, 16>;
+ dsp->intra_predictors[kTransformSize16x16][kIntraPredictorSmoothVertical] =
+ SmoothVerticalWxH_NEON<16, 16>;
+ dsp->intra_predictors[kTransformSize16x16][kIntraPredictorSmoothHorizontal] =
+ SmoothHorizontalWxH_NEON<16, 16>;
+
+ // 16x32
+ dsp->intra_predictors[kTransformSize16x32][kIntraPredictorSmooth] =
+ SmoothWxH_NEON<16, 32>;
+ dsp->intra_predictors[kTransformSize16x32][kIntraPredictorSmoothVertical] =
+ SmoothVerticalWxH_NEON<16, 32>;
+ dsp->intra_predictors[kTransformSize16x32][kIntraPredictorSmoothHorizontal] =
+ SmoothHorizontalWxH_NEON<16, 32>;
+
+ // 16x64
+ dsp->intra_predictors[kTransformSize16x64][kIntraPredictorSmooth] =
+ SmoothWxH_NEON<16, 64>;
+ dsp->intra_predictors[kTransformSize16x64][kIntraPredictorSmoothVertical] =
+ SmoothVerticalWxH_NEON<16, 64>;
+ dsp->intra_predictors[kTransformSize16x64][kIntraPredictorSmoothHorizontal] =
+ SmoothHorizontalWxH_NEON<16, 64>;
+
+ // 32x8
+ dsp->intra_predictors[kTransformSize32x8][kIntraPredictorSmooth] =
+ SmoothWxH_NEON<32, 8>;
+ dsp->intra_predictors[kTransformSize32x8][kIntraPredictorSmoothVertical] =
+ SmoothVerticalWxH_NEON<32, 8>;
+ dsp->intra_predictors[kTransformSize32x8][kIntraPredictorSmoothHorizontal] =
+ SmoothHorizontalWxH_NEON<32, 8>;
+
+ // 32x16
+ dsp->intra_predictors[kTransformSize32x16][kIntraPredictorSmooth] =
+ SmoothWxH_NEON<32, 16>;
+ dsp->intra_predictors[kTransformSize32x16][kIntraPredictorSmoothVertical] =
+ SmoothVerticalWxH_NEON<32, 16>;
+ dsp->intra_predictors[kTransformSize32x16][kIntraPredictorSmoothHorizontal] =
+ SmoothHorizontalWxH_NEON<32, 16>;
+
+ // 32x32
+ dsp->intra_predictors[kTransformSize32x32][kIntraPredictorSmooth] =
+ SmoothWxH_NEON<32, 32>;
+ dsp->intra_predictors[kTransformSize32x32][kIntraPredictorSmoothVertical] =
+ SmoothVerticalWxH_NEON<32, 32>;
+ dsp->intra_predictors[kTransformSize32x32][kIntraPredictorSmoothHorizontal] =
+ SmoothHorizontalWxH_NEON<32, 32>;
+
+ // 32x64
+ dsp->intra_predictors[kTransformSize32x64][kIntraPredictorSmooth] =
+ SmoothWxH_NEON<32, 64>;
+ dsp->intra_predictors[kTransformSize32x64][kIntraPredictorSmoothVertical] =
+ SmoothVerticalWxH_NEON<32, 64>;
+ dsp->intra_predictors[kTransformSize32x64][kIntraPredictorSmoothHorizontal] =
+ SmoothHorizontalWxH_NEON<32, 64>;
+
+ // 64x16
+ dsp->intra_predictors[kTransformSize64x16][kIntraPredictorSmooth] =
+ SmoothWxH_NEON<64, 16>;
+ dsp->intra_predictors[kTransformSize64x16][kIntraPredictorSmoothVertical] =
+ SmoothVerticalWxH_NEON<64, 16>;
+ dsp->intra_predictors[kTransformSize64x16][kIntraPredictorSmoothHorizontal] =
+ SmoothHorizontalWxH_NEON<64, 16>;
+
+ // 64x32
+ dsp->intra_predictors[kTransformSize64x32][kIntraPredictorSmooth] =
+ SmoothWxH_NEON<64, 32>;
+ dsp->intra_predictors[kTransformSize64x32][kIntraPredictorSmoothVertical] =
+ SmoothVerticalWxH_NEON<64, 32>;
+ dsp->intra_predictors[kTransformSize64x32][kIntraPredictorSmoothHorizontal] =
+ SmoothHorizontalWxH_NEON<64, 32>;
+
+ // 64x64
+ dsp->intra_predictors[kTransformSize64x64][kIntraPredictorSmooth] =
+ SmoothWxH_NEON<64, 64>;
+ dsp->intra_predictors[kTransformSize64x64][kIntraPredictorSmoothVertical] =
+ SmoothVerticalWxH_NEON<64, 64>;
+ dsp->intra_predictors[kTransformSize64x64][kIntraPredictorSmoothHorizontal] =
+ SmoothHorizontalWxH_NEON<64, 64>;
+}
+} // namespace
+} // namespace high_bitdepth
+#endif // LIBGAV1_MAX_BITDEPTH >= 10
+
+void IntraPredSmoothInit_NEON() {
+ low_bitdepth::Init8bpp();
+#if LIBGAV1_MAX_BITDEPTH >= 10
+ high_bitdepth::Init10bpp();
+#endif
+}
} // namespace dsp
} // namespace libgav1
diff --git a/src/dsp/arm/intrapred_smooth_neon.h b/src/dsp/arm/intrapred_smooth_neon.h
index edd01be..28b5bd5 100644
--- a/src/dsp/arm/intrapred_smooth_neon.h
+++ b/src/dsp/arm/intrapred_smooth_neon.h
@@ -144,6 +144,131 @@ void IntraPredSmoothInit_NEON();
LIBGAV1_CPU_NEON
#define LIBGAV1_Dsp8bpp_TransformSize64x64_IntraPredictorSmoothHorizontal \
LIBGAV1_CPU_NEON
+
+// 10bpp
+#define LIBGAV1_Dsp10bpp_TransformSize4x4_IntraPredictorSmooth LIBGAV1_CPU_NEON
+#define LIBGAV1_Dsp10bpp_TransformSize4x4_IntraPredictorSmoothVertical \
+ LIBGAV1_CPU_NEON
+#define LIBGAV1_Dsp10bpp_TransformSize4x4_IntraPredictorSmoothHorizontal \
+ LIBGAV1_CPU_NEON
+
+#define LIBGAV1_Dsp10bpp_TransformSize4x8_IntraPredictorSmooth LIBGAV1_CPU_NEON
+#define LIBGAV1_Dsp10bpp_TransformSize4x8_IntraPredictorSmoothVertical \
+ LIBGAV1_CPU_NEON
+#define LIBGAV1_Dsp10bpp_TransformSize4x8_IntraPredictorSmoothHorizontal \
+ LIBGAV1_CPU_NEON
+
+#define LIBGAV1_Dsp10bpp_TransformSize4x16_IntraPredictorSmooth LIBGAV1_CPU_NEON
+#define LIBGAV1_Dsp10bpp_TransformSize4x16_IntraPredictorSmoothVertical \
+ LIBGAV1_CPU_NEON
+#define LIBGAV1_Dsp10bpp_TransformSize4x16_IntraPredictorSmoothHorizontal \
+ LIBGAV1_CPU_NEON
+
+#define LIBGAV1_Dsp10bpp_TransformSize8x4_IntraPredictorSmooth LIBGAV1_CPU_NEON
+#define LIBGAV1_Dsp10bpp_TransformSize8x4_IntraPredictorSmoothVertical \
+ LIBGAV1_CPU_NEON
+#define LIBGAV1_Dsp10bpp_TransformSize8x4_IntraPredictorSmoothHorizontal \
+ LIBGAV1_CPU_NEON
+
+#define LIBGAV1_Dsp10bpp_TransformSize8x8_IntraPredictorSmooth LIBGAV1_CPU_NEON
+#define LIBGAV1_Dsp10bpp_TransformSize8x8_IntraPredictorSmoothVertical \
+ LIBGAV1_CPU_NEON
+#define LIBGAV1_Dsp10bpp_TransformSize8x8_IntraPredictorSmoothHorizontal \
+ LIBGAV1_CPU_NEON
+
+#define LIBGAV1_Dsp10bpp_TransformSize8x16_IntraPredictorSmooth LIBGAV1_CPU_NEON
+#define LIBGAV1_Dsp10bpp_TransformSize8x16_IntraPredictorSmoothVertical \
+ LIBGAV1_CPU_NEON
+#define LIBGAV1_Dsp10bpp_TransformSize8x16_IntraPredictorSmoothHorizontal \
+ LIBGAV1_CPU_NEON
+
+#define LIBGAV1_Dsp10bpp_TransformSize8x32_IntraPredictorSmooth LIBGAV1_CPU_NEON
+#define LIBGAV1_Dsp10bpp_TransformSize8x32_IntraPredictorSmoothVertical \
+ LIBGAV1_CPU_NEON
+#define LIBGAV1_Dsp10bpp_TransformSize8x32_IntraPredictorSmoothHorizontal \
+ LIBGAV1_CPU_NEON
+
+#define LIBGAV1_Dsp10bpp_TransformSize16x4_IntraPredictorSmooth LIBGAV1_CPU_NEON
+#define LIBGAV1_Dsp10bpp_TransformSize16x4_IntraPredictorSmoothVertical \
+ LIBGAV1_CPU_NEON
+#define LIBGAV1_Dsp10bpp_TransformSize16x4_IntraPredictorSmoothHorizontal \
+ LIBGAV1_CPU_NEON
+
+#define LIBGAV1_Dsp10bpp_TransformSize16x8_IntraPredictorSmooth LIBGAV1_CPU_NEON
+#define LIBGAV1_Dsp10bpp_TransformSize16x8_IntraPredictorSmoothVertical \
+ LIBGAV1_CPU_NEON
+#define LIBGAV1_Dsp10bpp_TransformSize16x8_IntraPredictorSmoothHorizontal \
+ LIBGAV1_CPU_NEON
+
+#define LIBGAV1_Dsp10bpp_TransformSize16x16_IntraPredictorSmooth \
+ LIBGAV1_CPU_NEON
+#define LIBGAV1_Dsp10bpp_TransformSize16x16_IntraPredictorSmoothVertical \
+ LIBGAV1_CPU_NEON
+#define LIBGAV1_Dsp10bpp_TransformSize16x16_IntraPredictorSmoothHorizontal \
+ LIBGAV1_CPU_NEON
+
+#define LIBGAV1_Dsp10bpp_TransformSize16x32_IntraPredictorSmooth \
+ LIBGAV1_CPU_NEON
+#define LIBGAV1_Dsp10bpp_TransformSize16x32_IntraPredictorSmoothVertical \
+ LIBGAV1_CPU_NEON
+#define LIBGAV1_Dsp10bpp_TransformSize16x32_IntraPredictorSmoothHorizontal \
+ LIBGAV1_CPU_NEON
+
+#define LIBGAV1_Dsp10bpp_TransformSize16x64_IntraPredictorSmooth \
+ LIBGAV1_CPU_NEON
+#define LIBGAV1_Dsp10bpp_TransformSize16x64_IntraPredictorSmoothVertical \
+ LIBGAV1_CPU_NEON
+#define LIBGAV1_Dsp10bpp_TransformSize16x64_IntraPredictorSmoothHorizontal \
+ LIBGAV1_CPU_NEON
+
+#define LIBGAV1_Dsp10bpp_TransformSize32x8_IntraPredictorSmooth LIBGAV1_CPU_NEON
+#define LIBGAV1_Dsp10bpp_TransformSize32x8_IntraPredictorSmoothVertical \
+ LIBGAV1_CPU_NEON
+#define LIBGAV1_Dsp10bpp_TransformSize32x8_IntraPredictorSmoothHorizontal \
+ LIBGAV1_CPU_NEON
+
+#define LIBGAV1_Dsp10bpp_TransformSize32x16_IntraPredictorSmooth \
+ LIBGAV1_CPU_NEON
+#define LIBGAV1_Dsp10bpp_TransformSize32x16_IntraPredictorSmoothVertical \
+ LIBGAV1_CPU_NEON
+#define LIBGAV1_Dsp10bpp_TransformSize32x16_IntraPredictorSmoothHorizontal \
+ LIBGAV1_CPU_NEON
+
+#define LIBGAV1_Dsp10bpp_TransformSize32x32_IntraPredictorSmooth \
+ LIBGAV1_CPU_NEON
+#define LIBGAV1_Dsp10bpp_TransformSize32x32_IntraPredictorSmoothVertical \
+ LIBGAV1_CPU_NEON
+#define LIBGAV1_Dsp10bpp_TransformSize32x32_IntraPredictorSmoothHorizontal \
+ LIBGAV1_CPU_NEON
+
+#define LIBGAV1_Dsp10bpp_TransformSize32x64_IntraPredictorSmooth \
+ LIBGAV1_CPU_NEON
+#define LIBGAV1_Dsp10bpp_TransformSize32x64_IntraPredictorSmoothVertical \
+ LIBGAV1_CPU_NEON
+#define LIBGAV1_Dsp10bpp_TransformSize32x64_IntraPredictorSmoothHorizontal \
+ LIBGAV1_CPU_NEON
+
+#define LIBGAV1_Dsp10bpp_TransformSize64x16_IntraPredictorSmooth \
+ LIBGAV1_CPU_NEON
+#define LIBGAV1_Dsp10bpp_TransformSize64x16_IntraPredictorSmoothVertical \
+ LIBGAV1_CPU_NEON
+#define LIBGAV1_Dsp10bpp_TransformSize64x16_IntraPredictorSmoothHorizontal \
+ LIBGAV1_CPU_NEON
+
+#define LIBGAV1_Dsp10bpp_TransformSize64x32_IntraPredictorSmooth \
+ LIBGAV1_CPU_NEON
+#define LIBGAV1_Dsp10bpp_TransformSize64x32_IntraPredictorSmoothVertical \
+ LIBGAV1_CPU_NEON
+#define LIBGAV1_Dsp10bpp_TransformSize64x32_IntraPredictorSmoothHorizontal \
+ LIBGAV1_CPU_NEON
+
+#define LIBGAV1_Dsp10bpp_TransformSize64x64_IntraPredictorSmooth \
+ LIBGAV1_CPU_NEON
+#define LIBGAV1_Dsp10bpp_TransformSize64x64_IntraPredictorSmoothVertical \
+ LIBGAV1_CPU_NEON
+#define LIBGAV1_Dsp10bpp_TransformSize64x64_IntraPredictorSmoothHorizontal \
+ LIBGAV1_CPU_NEON
+
#endif // LIBGAV1_ENABLE_NEON
#endif // LIBGAV1_SRC_DSP_ARM_INTRAPRED_SMOOTH_NEON_H_
diff --git a/src/dsp/arm/inverse_transform_10bit_neon.cc b/src/dsp/arm/inverse_transform_10bit_neon.cc
index ff184a1..617accc 100644
--- a/src/dsp/arm/inverse_transform_10bit_neon.cc
+++ b/src/dsp/arm/inverse_transform_10bit_neon.cc
@@ -67,7 +67,8 @@ LIBGAV1_ALWAYS_INLINE void Transpose4x4(const int32x4_t in[4],
//------------------------------------------------------------------------------
template <int store_count>
-LIBGAV1_ALWAYS_INLINE void StoreDst(int32_t* dst, int32_t stride, int32_t idx,
+LIBGAV1_ALWAYS_INLINE void StoreDst(int32_t* LIBGAV1_RESTRICT dst,
+ int32_t stride, int32_t idx,
const int32x4_t* const s) {
assert(store_count % 4 == 0);
for (int i = 0; i < store_count; i += 4) {
@@ -79,8 +80,8 @@ LIBGAV1_ALWAYS_INLINE void StoreDst(int32_t* dst, int32_t stride, int32_t idx,
}
template <int load_count>
-LIBGAV1_ALWAYS_INLINE void LoadSrc(const int32_t* src, int32_t stride,
- int32_t idx, int32x4_t* x) {
+LIBGAV1_ALWAYS_INLINE void LoadSrc(const int32_t* LIBGAV1_RESTRICT src,
+ int32_t stride, int32_t idx, int32x4_t* x) {
assert(load_count % 4 == 0);
for (int i = 0; i < load_count; i += 4) {
x[i] = vld1q_s32(&src[i * stride + idx]);
@@ -168,8 +169,8 @@ LIBGAV1_ALWAYS_INLINE void HadamardRotation(int32x4_t* a, int32x4_t* b,
}
LIBGAV1_ALWAYS_INLINE void HadamardRotation(int32x4_t* a, int32x4_t* b,
- bool flip, const int32x4_t* min,
- const int32x4_t* max) {
+ bool flip, const int32x4_t min,
+ const int32x4_t max) {
int32x4_t x, y;
if (flip) {
y = vqaddq_s32(*b, *a);
@@ -178,8 +179,8 @@ LIBGAV1_ALWAYS_INLINE void HadamardRotation(int32x4_t* a, int32x4_t* b,
x = vqaddq_s32(*a, *b);
y = vqsubq_s32(*a, *b);
}
- *a = vmaxq_s32(vminq_s32(x, *max), *min);
- *b = vmaxq_s32(vminq_s32(y, *max), *min);
+ *a = vmaxq_s32(vminq_s32(x, max), min);
+ *b = vmaxq_s32(vminq_s32(y, max), min);
}
using ButterflyRotationFunc = void (*)(int32x4_t* a, int32x4_t* b, int angle,
@@ -248,8 +249,8 @@ LIBGAV1_ALWAYS_INLINE bool DctDcOnlyColumn(void* dest, int adjusted_tx_height,
template <ButterflyRotationFunc butterfly_rotation,
bool is_fast_butterfly = false>
-LIBGAV1_ALWAYS_INLINE void Dct4Stages(int32x4_t* s, const int32x4_t* min,
- const int32x4_t* max,
+LIBGAV1_ALWAYS_INLINE void Dct4Stages(int32x4_t* s, const int32x4_t min,
+ const int32x4_t max,
const bool is_last_stage) {
// stage 12.
if (is_fast_butterfly) {
@@ -293,12 +294,12 @@ LIBGAV1_ALWAYS_INLINE void Dct4_NEON(void* dest, int32_t step, bool is_row,
s[2] = x[1];
s[3] = x[3];
- Dct4Stages<butterfly_rotation>(s, &min, &max, /*is_last_stage=*/true);
+ Dct4Stages<butterfly_rotation>(s, min, max, /*is_last_stage=*/true);
if (is_row) {
const int32x4_t v_row_shift = vdupq_n_s32(-row_shift);
- for (int i = 0; i < 4; ++i) {
- s[i] = vmovl_s16(vqmovn_s32(vqrshlq_s32(s[i], v_row_shift)));
+ for (auto& i : s) {
+ i = vmovl_s16(vqmovn_s32(vqrshlq_s32(i, v_row_shift)));
}
Transpose4x4(s, s);
}
@@ -307,8 +308,8 @@ LIBGAV1_ALWAYS_INLINE void Dct4_NEON(void* dest, int32_t step, bool is_row,
template <ButterflyRotationFunc butterfly_rotation,
bool is_fast_butterfly = false>
-LIBGAV1_ALWAYS_INLINE void Dct8Stages(int32x4_t* s, const int32x4_t* min,
- const int32x4_t* max,
+LIBGAV1_ALWAYS_INLINE void Dct8Stages(int32x4_t* s, const int32x4_t min,
+ const int32x4_t max,
const bool is_last_stage) {
// stage 8.
if (is_fast_butterfly) {
@@ -370,13 +371,13 @@ LIBGAV1_ALWAYS_INLINE void Dct8_NEON(void* dest, int32_t step, bool is_row,
s[6] = x[3];
s[7] = x[7];
- Dct4Stages<butterfly_rotation>(s, &min, &max, /*is_last_stage=*/false);
- Dct8Stages<butterfly_rotation>(s, &min, &max, /*is_last_stage=*/true);
+ Dct4Stages<butterfly_rotation>(s, min, max, /*is_last_stage=*/false);
+ Dct8Stages<butterfly_rotation>(s, min, max, /*is_last_stage=*/true);
if (is_row) {
const int32x4_t v_row_shift = vdupq_n_s32(-row_shift);
- for (int i = 0; i < 8; ++i) {
- s[i] = vmovl_s16(vqmovn_s32(vqrshlq_s32(s[i], v_row_shift)));
+ for (auto& i : s) {
+ i = vmovl_s16(vqmovn_s32(vqrshlq_s32(i, v_row_shift)));
}
Transpose4x4(&s[0], &s[0]);
Transpose4x4(&s[4], &s[4]);
@@ -389,8 +390,8 @@ LIBGAV1_ALWAYS_INLINE void Dct8_NEON(void* dest, int32_t step, bool is_row,
template <ButterflyRotationFunc butterfly_rotation,
bool is_fast_butterfly = false>
-LIBGAV1_ALWAYS_INLINE void Dct16Stages(int32x4_t* s, const int32x4_t* min,
- const int32x4_t* max,
+LIBGAV1_ALWAYS_INLINE void Dct16Stages(int32x4_t* s, const int32x4_t min,
+ const int32x4_t max,
const bool is_last_stage) {
// stage 5.
if (is_fast_butterfly) {
@@ -487,14 +488,14 @@ LIBGAV1_ALWAYS_INLINE void Dct16_NEON(void* dest, int32_t step, bool is_row,
s[14] = x[7];
s[15] = x[15];
- Dct4Stages<butterfly_rotation>(s, &min, &max, /*is_last_stage=*/false);
- Dct8Stages<butterfly_rotation>(s, &min, &max, /*is_last_stage=*/false);
- Dct16Stages<butterfly_rotation>(s, &min, &max, /*is_last_stage=*/true);
+ Dct4Stages<butterfly_rotation>(s, min, max, /*is_last_stage=*/false);
+ Dct8Stages<butterfly_rotation>(s, min, max, /*is_last_stage=*/false);
+ Dct16Stages<butterfly_rotation>(s, min, max, /*is_last_stage=*/true);
if (is_row) {
const int32x4_t v_row_shift = vdupq_n_s32(-row_shift);
- for (int i = 0; i < 16; ++i) {
- s[i] = vmovl_s16(vqmovn_s32(vqrshlq_s32(s[i], v_row_shift)));
+ for (auto& i : s) {
+ i = vmovl_s16(vqmovn_s32(vqrshlq_s32(i, v_row_shift)));
}
for (int idx = 0; idx < 16; idx += 8) {
Transpose4x4(&s[idx], &s[idx]);
@@ -509,8 +510,8 @@ LIBGAV1_ALWAYS_INLINE void Dct16_NEON(void* dest, int32_t step, bool is_row,
template <ButterflyRotationFunc butterfly_rotation,
bool is_fast_butterfly = false>
-LIBGAV1_ALWAYS_INLINE void Dct32Stages(int32x4_t* s, const int32x4_t* min,
- const int32x4_t* max,
+LIBGAV1_ALWAYS_INLINE void Dct32Stages(int32x4_t* s, const int32x4_t min,
+ const int32x4_t max,
const bool is_last_stage) {
// stage 3
if (is_fast_butterfly) {
@@ -677,10 +678,10 @@ LIBGAV1_ALWAYS_INLINE void Dct32_NEON(void* dest, const int32_t step,
s[30] = x[15];
s[31] = x[31];
- Dct4Stages<ButterflyRotation_4>(s, &min, &max, /*is_last_stage=*/false);
- Dct8Stages<ButterflyRotation_4>(s, &min, &max, /*is_last_stage=*/false);
- Dct16Stages<ButterflyRotation_4>(s, &min, &max, /*is_last_stage=*/false);
- Dct32Stages<ButterflyRotation_4>(s, &min, &max, /*is_last_stage=*/true);
+ Dct4Stages<ButterflyRotation_4>(s, min, max, /*is_last_stage=*/false);
+ Dct8Stages<ButterflyRotation_4>(s, min, max, /*is_last_stage=*/false);
+ Dct16Stages<ButterflyRotation_4>(s, min, max, /*is_last_stage=*/false);
+ Dct32Stages<ButterflyRotation_4>(s, min, max, /*is_last_stage=*/true);
if (is_row) {
const int32x4_t v_row_shift = vdupq_n_s32(-row_shift);
@@ -688,8 +689,8 @@ LIBGAV1_ALWAYS_INLINE void Dct32_NEON(void* dest, const int32_t step,
int32x4_t output[8];
Transpose4x4(&s[idx], &output[0]);
Transpose4x4(&s[idx + 4], &output[4]);
- for (int i = 0; i < 8; ++i) {
- output[i] = vmovl_s16(vqmovn_s32(vqrshlq_s32(output[i], v_row_shift)));
+ for (auto& o : output) {
+ o = vmovl_s16(vqmovn_s32(vqrshlq_s32(o, v_row_shift)));
}
StoreDst<4>(dst, step, idx, &output[0]);
StoreDst<4>(dst, step, idx + 4, &output[4]);
@@ -764,13 +765,13 @@ void Dct64_NEON(void* dest, int32_t step, bool is_row, int row_shift) {
s[62] = x[31];
Dct4Stages<ButterflyRotation_4, /*is_fast_butterfly=*/true>(
- s, &min, &max, /*is_last_stage=*/false);
+ s, min, max, /*is_last_stage=*/false);
Dct8Stages<ButterflyRotation_4, /*is_fast_butterfly=*/true>(
- s, &min, &max, /*is_last_stage=*/false);
+ s, min, max, /*is_last_stage=*/false);
Dct16Stages<ButterflyRotation_4, /*is_fast_butterfly=*/true>(
- s, &min, &max, /*is_last_stage=*/false);
+ s, min, max, /*is_last_stage=*/false);
Dct32Stages<ButterflyRotation_4, /*is_fast_butterfly=*/true>(
- s, &min, &max, /*is_last_stage=*/false);
+ s, min, max, /*is_last_stage=*/false);
//-- start dct 64 stages
// stage 2.
@@ -792,22 +793,22 @@ void Dct64_NEON(void* dest, int32_t step, bool is_row, int row_shift) {
ButterflyRotation_FirstIsZero(&s[47], &s[48], 63 - 60, false);
// stage 4.
- HadamardRotation(&s[32], &s[33], false, &min, &max);
- HadamardRotation(&s[34], &s[35], true, &min, &max);
- HadamardRotation(&s[36], &s[37], false, &min, &max);
- HadamardRotation(&s[38], &s[39], true, &min, &max);
- HadamardRotation(&s[40], &s[41], false, &min, &max);
- HadamardRotation(&s[42], &s[43], true, &min, &max);
- HadamardRotation(&s[44], &s[45], false, &min, &max);
- HadamardRotation(&s[46], &s[47], true, &min, &max);
- HadamardRotation(&s[48], &s[49], false, &min, &max);
- HadamardRotation(&s[50], &s[51], true, &min, &max);
- HadamardRotation(&s[52], &s[53], false, &min, &max);
- HadamardRotation(&s[54], &s[55], true, &min, &max);
- HadamardRotation(&s[56], &s[57], false, &min, &max);
- HadamardRotation(&s[58], &s[59], true, &min, &max);
- HadamardRotation(&s[60], &s[61], false, &min, &max);
- HadamardRotation(&s[62], &s[63], true, &min, &max);
+ HadamardRotation(&s[32], &s[33], false, min, max);
+ HadamardRotation(&s[34], &s[35], true, min, max);
+ HadamardRotation(&s[36], &s[37], false, min, max);
+ HadamardRotation(&s[38], &s[39], true, min, max);
+ HadamardRotation(&s[40], &s[41], false, min, max);
+ HadamardRotation(&s[42], &s[43], true, min, max);
+ HadamardRotation(&s[44], &s[45], false, min, max);
+ HadamardRotation(&s[46], &s[47], true, min, max);
+ HadamardRotation(&s[48], &s[49], false, min, max);
+ HadamardRotation(&s[50], &s[51], true, min, max);
+ HadamardRotation(&s[52], &s[53], false, min, max);
+ HadamardRotation(&s[54], &s[55], true, min, max);
+ HadamardRotation(&s[56], &s[57], false, min, max);
+ HadamardRotation(&s[58], &s[59], true, min, max);
+ HadamardRotation(&s[60], &s[61], false, min, max);
+ HadamardRotation(&s[62], &s[63], true, min, max);
// stage 7.
ButterflyRotation_4(&s[62], &s[33], 60 - 0, true);
@@ -820,22 +821,22 @@ void Dct64_NEON(void* dest, int32_t step, bool is_row, int row_shift) {
ButterflyRotation_4(&s[49], &s[46], 60 - 48 + 64, true);
// stage 11.
- HadamardRotation(&s[32], &s[35], false, &min, &max);
- HadamardRotation(&s[33], &s[34], false, &min, &max);
- HadamardRotation(&s[36], &s[39], true, &min, &max);
- HadamardRotation(&s[37], &s[38], true, &min, &max);
- HadamardRotation(&s[40], &s[43], false, &min, &max);
- HadamardRotation(&s[41], &s[42], false, &min, &max);
- HadamardRotation(&s[44], &s[47], true, &min, &max);
- HadamardRotation(&s[45], &s[46], true, &min, &max);
- HadamardRotation(&s[48], &s[51], false, &min, &max);
- HadamardRotation(&s[49], &s[50], false, &min, &max);
- HadamardRotation(&s[52], &s[55], true, &min, &max);
- HadamardRotation(&s[53], &s[54], true, &min, &max);
- HadamardRotation(&s[56], &s[59], false, &min, &max);
- HadamardRotation(&s[57], &s[58], false, &min, &max);
- HadamardRotation(&s[60], &s[63], true, &min, &max);
- HadamardRotation(&s[61], &s[62], true, &min, &max);
+ HadamardRotation(&s[32], &s[35], false, min, max);
+ HadamardRotation(&s[33], &s[34], false, min, max);
+ HadamardRotation(&s[36], &s[39], true, min, max);
+ HadamardRotation(&s[37], &s[38], true, min, max);
+ HadamardRotation(&s[40], &s[43], false, min, max);
+ HadamardRotation(&s[41], &s[42], false, min, max);
+ HadamardRotation(&s[44], &s[47], true, min, max);
+ HadamardRotation(&s[45], &s[46], true, min, max);
+ HadamardRotation(&s[48], &s[51], false, min, max);
+ HadamardRotation(&s[49], &s[50], false, min, max);
+ HadamardRotation(&s[52], &s[55], true, min, max);
+ HadamardRotation(&s[53], &s[54], true, min, max);
+ HadamardRotation(&s[56], &s[59], false, min, max);
+ HadamardRotation(&s[57], &s[58], false, min, max);
+ HadamardRotation(&s[60], &s[63], true, min, max);
+ HadamardRotation(&s[61], &s[62], true, min, max);
// stage 16.
ButterflyRotation_4(&s[61], &s[34], 56, true);
@@ -848,22 +849,22 @@ void Dct64_NEON(void* dest, int32_t step, bool is_row, int row_shift) {
ButterflyRotation_4(&s[50], &s[45], 56 - 32 + 64, true);
// stage 21.
- HadamardRotation(&s[32], &s[39], false, &min, &max);
- HadamardRotation(&s[33], &s[38], false, &min, &max);
- HadamardRotation(&s[34], &s[37], false, &min, &max);
- HadamardRotation(&s[35], &s[36], false, &min, &max);
- HadamardRotation(&s[40], &s[47], true, &min, &max);
- HadamardRotation(&s[41], &s[46], true, &min, &max);
- HadamardRotation(&s[42], &s[45], true, &min, &max);
- HadamardRotation(&s[43], &s[44], true, &min, &max);
- HadamardRotation(&s[48], &s[55], false, &min, &max);
- HadamardRotation(&s[49], &s[54], false, &min, &max);
- HadamardRotation(&s[50], &s[53], false, &min, &max);
- HadamardRotation(&s[51], &s[52], false, &min, &max);
- HadamardRotation(&s[56], &s[63], true, &min, &max);
- HadamardRotation(&s[57], &s[62], true, &min, &max);
- HadamardRotation(&s[58], &s[61], true, &min, &max);
- HadamardRotation(&s[59], &s[60], true, &min, &max);
+ HadamardRotation(&s[32], &s[39], false, min, max);
+ HadamardRotation(&s[33], &s[38], false, min, max);
+ HadamardRotation(&s[34], &s[37], false, min, max);
+ HadamardRotation(&s[35], &s[36], false, min, max);
+ HadamardRotation(&s[40], &s[47], true, min, max);
+ HadamardRotation(&s[41], &s[46], true, min, max);
+ HadamardRotation(&s[42], &s[45], true, min, max);
+ HadamardRotation(&s[43], &s[44], true, min, max);
+ HadamardRotation(&s[48], &s[55], false, min, max);
+ HadamardRotation(&s[49], &s[54], false, min, max);
+ HadamardRotation(&s[50], &s[53], false, min, max);
+ HadamardRotation(&s[51], &s[52], false, min, max);
+ HadamardRotation(&s[56], &s[63], true, min, max);
+ HadamardRotation(&s[57], &s[62], true, min, max);
+ HadamardRotation(&s[58], &s[61], true, min, max);
+ HadamardRotation(&s[59], &s[60], true, min, max);
// stage 25.
ButterflyRotation_4(&s[59], &s[36], 48, true);
@@ -876,22 +877,22 @@ void Dct64_NEON(void* dest, int32_t step, bool is_row, int row_shift) {
ButterflyRotation_4(&s[52], &s[43], 112, true);
// stage 28.
- HadamardRotation(&s[32], &s[47], false, &min, &max);
- HadamardRotation(&s[33], &s[46], false, &min, &max);
- HadamardRotation(&s[34], &s[45], false, &min, &max);
- HadamardRotation(&s[35], &s[44], false, &min, &max);
- HadamardRotation(&s[36], &s[43], false, &min, &max);
- HadamardRotation(&s[37], &s[42], false, &min, &max);
- HadamardRotation(&s[38], &s[41], false, &min, &max);
- HadamardRotation(&s[39], &s[40], false, &min, &max);
- HadamardRotation(&s[48], &s[63], true, &min, &max);
- HadamardRotation(&s[49], &s[62], true, &min, &max);
- HadamardRotation(&s[50], &s[61], true, &min, &max);
- HadamardRotation(&s[51], &s[60], true, &min, &max);
- HadamardRotation(&s[52], &s[59], true, &min, &max);
- HadamardRotation(&s[53], &s[58], true, &min, &max);
- HadamardRotation(&s[54], &s[57], true, &min, &max);
- HadamardRotation(&s[55], &s[56], true, &min, &max);
+ HadamardRotation(&s[32], &s[47], false, min, max);
+ HadamardRotation(&s[33], &s[46], false, min, max);
+ HadamardRotation(&s[34], &s[45], false, min, max);
+ HadamardRotation(&s[35], &s[44], false, min, max);
+ HadamardRotation(&s[36], &s[43], false, min, max);
+ HadamardRotation(&s[37], &s[42], false, min, max);
+ HadamardRotation(&s[38], &s[41], false, min, max);
+ HadamardRotation(&s[39], &s[40], false, min, max);
+ HadamardRotation(&s[48], &s[63], true, min, max);
+ HadamardRotation(&s[49], &s[62], true, min, max);
+ HadamardRotation(&s[50], &s[61], true, min, max);
+ HadamardRotation(&s[51], &s[60], true, min, max);
+ HadamardRotation(&s[52], &s[59], true, min, max);
+ HadamardRotation(&s[53], &s[58], true, min, max);
+ HadamardRotation(&s[54], &s[57], true, min, max);
+ HadamardRotation(&s[55], &s[56], true, min, max);
// stage 30.
ButterflyRotation_4(&s[55], &s[40], 32, true);
@@ -905,10 +906,10 @@ void Dct64_NEON(void* dest, int32_t step, bool is_row, int row_shift) {
// stage 31.
for (int i = 0; i < 32; i += 4) {
- HadamardRotation(&s[i], &s[63 - i], false, &min, &max);
- HadamardRotation(&s[i + 1], &s[63 - i - 1], false, &min, &max);
- HadamardRotation(&s[i + 2], &s[63 - i - 2], false, &min, &max);
- HadamardRotation(&s[i + 3], &s[63 - i - 3], false, &min, &max);
+ HadamardRotation(&s[i], &s[63 - i], false, min, max);
+ HadamardRotation(&s[i + 1], &s[63 - i - 1], false, min, max);
+ HadamardRotation(&s[i + 2], &s[63 - i - 2], false, min, max);
+ HadamardRotation(&s[i + 3], &s[63 - i - 3], false, min, max);
}
//-- end dct 64 stages
if (is_row) {
@@ -917,8 +918,8 @@ void Dct64_NEON(void* dest, int32_t step, bool is_row, int row_shift) {
int32x4_t output[8];
Transpose4x4(&s[idx], &output[0]);
Transpose4x4(&s[idx + 4], &output[4]);
- for (int i = 0; i < 8; ++i) {
- output[i] = vmovl_s16(vqmovn_s32(vqrshlq_s32(output[i], v_row_shift)));
+ for (auto& o : output) {
+ o = vmovl_s16(vqmovn_s32(vqrshlq_s32(o, v_row_shift)));
}
StoreDst<4>(dst, step, idx, &output[0]);
StoreDst<4>(dst, step, idx + 4, &output[4]);
@@ -1089,20 +1090,20 @@ LIBGAV1_ALWAYS_INLINE void Adst8_NEON(void* dest, int32_t step, bool is_row,
butterfly_rotation(&s[6], &s[7], 60 - 48, true);
// stage 3.
- HadamardRotation(&s[0], &s[4], false, &min, &max);
- HadamardRotation(&s[1], &s[5], false, &min, &max);
- HadamardRotation(&s[2], &s[6], false, &min, &max);
- HadamardRotation(&s[3], &s[7], false, &min, &max);
+ HadamardRotation(&s[0], &s[4], false, min, max);
+ HadamardRotation(&s[1], &s[5], false, min, max);
+ HadamardRotation(&s[2], &s[6], false, min, max);
+ HadamardRotation(&s[3], &s[7], false, min, max);
// stage 4.
butterfly_rotation(&s[4], &s[5], 48 - 0, true);
butterfly_rotation(&s[7], &s[6], 48 - 32, true);
// stage 5.
- HadamardRotation(&s[0], &s[2], false, &min, &max);
- HadamardRotation(&s[4], &s[6], false, &min, &max);
- HadamardRotation(&s[1], &s[3], false, &min, &max);
- HadamardRotation(&s[5], &s[7], false, &min, &max);
+ HadamardRotation(&s[0], &s[2], false, min, max);
+ HadamardRotation(&s[4], &s[6], false, min, max);
+ HadamardRotation(&s[1], &s[3], false, min, max);
+ HadamardRotation(&s[5], &s[7], false, min, max);
// stage 6.
butterfly_rotation(&s[2], &s[3], 32, true);
@@ -1120,8 +1121,8 @@ LIBGAV1_ALWAYS_INLINE void Adst8_NEON(void* dest, int32_t step, bool is_row,
if (is_row) {
const int32x4_t v_row_shift = vdupq_n_s32(-row_shift);
- for (int i = 0; i < 8; ++i) {
- x[i] = vmovl_s16(vqmovn_s32(vqrshlq_s32(x[i], v_row_shift)));
+ for (auto& i : x) {
+ i = vmovl_s16(vqmovn_s32(vqrshlq_s32(i, v_row_shift)));
}
Transpose4x4(&x[0], &x[0]);
Transpose4x4(&x[4], &x[4]);
@@ -1289,14 +1290,14 @@ LIBGAV1_ALWAYS_INLINE void Adst16_NEON(void* dest, int32_t step, bool is_row,
butterfly_rotation(&s[14], &s[15], 62 - 56, true);
// stage 3.
- HadamardRotation(&s[0], &s[8], false, &min, &max);
- HadamardRotation(&s[1], &s[9], false, &min, &max);
- HadamardRotation(&s[2], &s[10], false, &min, &max);
- HadamardRotation(&s[3], &s[11], false, &min, &max);
- HadamardRotation(&s[4], &s[12], false, &min, &max);
- HadamardRotation(&s[5], &s[13], false, &min, &max);
- HadamardRotation(&s[6], &s[14], false, &min, &max);
- HadamardRotation(&s[7], &s[15], false, &min, &max);
+ HadamardRotation(&s[0], &s[8], false, min, max);
+ HadamardRotation(&s[1], &s[9], false, min, max);
+ HadamardRotation(&s[2], &s[10], false, min, max);
+ HadamardRotation(&s[3], &s[11], false, min, max);
+ HadamardRotation(&s[4], &s[12], false, min, max);
+ HadamardRotation(&s[5], &s[13], false, min, max);
+ HadamardRotation(&s[6], &s[14], false, min, max);
+ HadamardRotation(&s[7], &s[15], false, min, max);
// stage 4.
butterfly_rotation(&s[8], &s[9], 56 - 0, true);
@@ -1305,14 +1306,14 @@ LIBGAV1_ALWAYS_INLINE void Adst16_NEON(void* dest, int32_t step, bool is_row,
butterfly_rotation(&s[15], &s[14], 8 + 32, true);
// stage 5.
- HadamardRotation(&s[0], &s[4], false, &min, &max);
- HadamardRotation(&s[8], &s[12], false, &min, &max);
- HadamardRotation(&s[1], &s[5], false, &min, &max);
- HadamardRotation(&s[9], &s[13], false, &min, &max);
- HadamardRotation(&s[2], &s[6], false, &min, &max);
- HadamardRotation(&s[10], &s[14], false, &min, &max);
- HadamardRotation(&s[3], &s[7], false, &min, &max);
- HadamardRotation(&s[11], &s[15], false, &min, &max);
+ HadamardRotation(&s[0], &s[4], false, min, max);
+ HadamardRotation(&s[8], &s[12], false, min, max);
+ HadamardRotation(&s[1], &s[5], false, min, max);
+ HadamardRotation(&s[9], &s[13], false, min, max);
+ HadamardRotation(&s[2], &s[6], false, min, max);
+ HadamardRotation(&s[10], &s[14], false, min, max);
+ HadamardRotation(&s[3], &s[7], false, min, max);
+ HadamardRotation(&s[11], &s[15], false, min, max);
// stage 6.
butterfly_rotation(&s[4], &s[5], 48 - 0, true);
@@ -1321,14 +1322,14 @@ LIBGAV1_ALWAYS_INLINE void Adst16_NEON(void* dest, int32_t step, bool is_row,
butterfly_rotation(&s[15], &s[14], 48 - 32, true);
// stage 7.
- HadamardRotation(&s[0], &s[2], false, &min, &max);
- HadamardRotation(&s[4], &s[6], false, &min, &max);
- HadamardRotation(&s[8], &s[10], false, &min, &max);
- HadamardRotation(&s[12], &s[14], false, &min, &max);
- HadamardRotation(&s[1], &s[3], false, &min, &max);
- HadamardRotation(&s[5], &s[7], false, &min, &max);
- HadamardRotation(&s[9], &s[11], false, &min, &max);
- HadamardRotation(&s[13], &s[15], false, &min, &max);
+ HadamardRotation(&s[0], &s[2], false, min, max);
+ HadamardRotation(&s[4], &s[6], false, min, max);
+ HadamardRotation(&s[8], &s[10], false, min, max);
+ HadamardRotation(&s[12], &s[14], false, min, max);
+ HadamardRotation(&s[1], &s[3], false, min, max);
+ HadamardRotation(&s[5], &s[7], false, min, max);
+ HadamardRotation(&s[9], &s[11], false, min, max);
+ HadamardRotation(&s[13], &s[15], false, min, max);
// stage 8.
butterfly_rotation(&s[2], &s[3], 32, true);
@@ -1356,8 +1357,8 @@ LIBGAV1_ALWAYS_INLINE void Adst16_NEON(void* dest, int32_t step, bool is_row,
if (is_row) {
const int32x4_t v_row_shift = vdupq_n_s32(-row_shift);
- for (int i = 0; i < 16; ++i) {
- x[i] = vmovl_s16(vqmovn_s32(vqrshlq_s32(x[i], v_row_shift)));
+ for (auto& i : x) {
+ i = vmovl_s16(vqmovn_s32(vqrshlq_s32(i, v_row_shift)));
}
for (int idx = 0; idx < 16; idx += 8) {
Transpose4x4(&x[idx], &x[idx]);
@@ -1517,59 +1518,23 @@ LIBGAV1_ALWAYS_INLINE bool Identity4DcOnly(void* dest, int adjusted_tx_height,
template <int identity_size>
LIBGAV1_ALWAYS_INLINE void IdentityColumnStoreToFrame(
Array2DView<uint16_t> frame, const int start_x, const int start_y,
- const int tx_width, const int tx_height, const int32_t* source) {
- static_assert(identity_size == 4 || identity_size == 8 || identity_size == 16,
+ const int tx_width, const int tx_height,
+ const int32_t* LIBGAV1_RESTRICT source) {
+ static_assert(identity_size == 4 || identity_size == 8 ||
+ identity_size == 16 || identity_size == 32,
"Invalid identity_size.");
const int stride = frame.columns();
- uint16_t* dst = frame[start_y] + start_x;
+ uint16_t* LIBGAV1_RESTRICT dst = frame[start_y] + start_x;
const int32x4_t v_dual_round = vdupq_n_s32((1 + (1 << 4)) << 11);
const uint16x4_t v_max_bitdepth = vdup_n_u16((1 << kBitdepth10) - 1);
- if (tx_width == 4) {
- int i = 0;
- do {
- int32x4x2_t v_src, v_dst_i, a, b;
- v_src.val[0] = vld1q_s32(&source[i * 4]);
- v_src.val[1] = vld1q_s32(&source[(i * 4) + 4]);
- if (identity_size == 4) {
- v_dst_i.val[0] =
- vmlaq_n_s32(v_dual_round, v_src.val[0], kIdentity4Multiplier);
- v_dst_i.val[1] =
- vmlaq_n_s32(v_dual_round, v_src.val[1], kIdentity4Multiplier);
- a.val[0] = vshrq_n_s32(v_dst_i.val[0], 4 + 12);
- a.val[1] = vshrq_n_s32(v_dst_i.val[1], 4 + 12);
- } else if (identity_size == 8) {
- v_dst_i.val[0] = vaddq_s32(v_src.val[0], v_src.val[0]);
- v_dst_i.val[1] = vaddq_s32(v_src.val[1], v_src.val[1]);
- a.val[0] = vrshrq_n_s32(v_dst_i.val[0], 4);
- a.val[1] = vrshrq_n_s32(v_dst_i.val[1], 4);
- } else { // identity_size == 16
- v_dst_i.val[0] =
- vmlaq_n_s32(v_dual_round, v_src.val[0], kIdentity16Multiplier);
- v_dst_i.val[1] =
- vmlaq_n_s32(v_dual_round, v_src.val[1], kIdentity16Multiplier);
- a.val[0] = vshrq_n_s32(v_dst_i.val[0], 4 + 12);
- a.val[1] = vshrq_n_s32(v_dst_i.val[1], 4 + 12);
- }
- uint16x4x2_t frame_data;
- frame_data.val[0] = vld1_u16(dst);
- frame_data.val[1] = vld1_u16(dst + stride);
- b.val[0] = vaddw_s16(a.val[0], vreinterpret_s16_u16(frame_data.val[0]));
- b.val[1] = vaddw_s16(a.val[1], vreinterpret_s16_u16(frame_data.val[1]));
- vst1_u16(dst, vmin_u16(vqmovun_s32(b.val[0]), v_max_bitdepth));
- vst1_u16(dst + stride, vmin_u16(vqmovun_s32(b.val[1]), v_max_bitdepth));
- dst += stride << 1;
- i += 2;
- } while (i < tx_height);
- } else {
- int i = 0;
- do {
- const int row = i * tx_width;
- int j = 0;
+ if (identity_size < 32) {
+ if (tx_width == 4) {
+ int i = 0;
do {
int32x4x2_t v_src, v_dst_i, a, b;
- v_src.val[0] = vld1q_s32(&source[row + j]);
- v_src.val[1] = vld1q_s32(&source[row + j + 4]);
+ v_src.val[0] = vld1q_s32(&source[i * 4]);
+ v_src.val[1] = vld1q_s32(&source[(i * 4) + 4]);
if (identity_size == 4) {
v_dst_i.val[0] =
vmlaq_n_s32(v_dual_round, v_src.val[0], kIdentity4Multiplier);
@@ -1591,13 +1556,72 @@ LIBGAV1_ALWAYS_INLINE void IdentityColumnStoreToFrame(
a.val[1] = vshrq_n_s32(v_dst_i.val[1], 4 + 12);
}
uint16x4x2_t frame_data;
- frame_data.val[0] = vld1_u16(dst + j);
- frame_data.val[1] = vld1_u16(dst + j + 4);
+ frame_data.val[0] = vld1_u16(dst);
+ frame_data.val[1] = vld1_u16(dst + stride);
b.val[0] = vaddw_s16(a.val[0], vreinterpret_s16_u16(frame_data.val[0]));
b.val[1] = vaddw_s16(a.val[1], vreinterpret_s16_u16(frame_data.val[1]));
- vst1_u16(dst + j, vmin_u16(vqmovun_s32(b.val[0]), v_max_bitdepth));
- vst1_u16(dst + j + 4, vmin_u16(vqmovun_s32(b.val[1]), v_max_bitdepth));
- j += 8;
+ vst1_u16(dst, vmin_u16(vqmovun_s32(b.val[0]), v_max_bitdepth));
+ vst1_u16(dst + stride, vmin_u16(vqmovun_s32(b.val[1]), v_max_bitdepth));
+ dst += stride << 1;
+ i += 2;
+ } while (i < tx_height);
+ } else {
+ int i = 0;
+ do {
+ const int row = i * tx_width;
+ int j = 0;
+ do {
+ int32x4x2_t v_src, v_dst_i, a, b;
+ v_src.val[0] = vld1q_s32(&source[row + j]);
+ v_src.val[1] = vld1q_s32(&source[row + j + 4]);
+ if (identity_size == 4) {
+ v_dst_i.val[0] =
+ vmlaq_n_s32(v_dual_round, v_src.val[0], kIdentity4Multiplier);
+ v_dst_i.val[1] =
+ vmlaq_n_s32(v_dual_round, v_src.val[1], kIdentity4Multiplier);
+ a.val[0] = vshrq_n_s32(v_dst_i.val[0], 4 + 12);
+ a.val[1] = vshrq_n_s32(v_dst_i.val[1], 4 + 12);
+ } else if (identity_size == 8) {
+ v_dst_i.val[0] = vaddq_s32(v_src.val[0], v_src.val[0]);
+ v_dst_i.val[1] = vaddq_s32(v_src.val[1], v_src.val[1]);
+ a.val[0] = vrshrq_n_s32(v_dst_i.val[0], 4);
+ a.val[1] = vrshrq_n_s32(v_dst_i.val[1], 4);
+ } else { // identity_size == 16
+ v_dst_i.val[0] =
+ vmlaq_n_s32(v_dual_round, v_src.val[0], kIdentity16Multiplier);
+ v_dst_i.val[1] =
+ vmlaq_n_s32(v_dual_round, v_src.val[1], kIdentity16Multiplier);
+ a.val[0] = vshrq_n_s32(v_dst_i.val[0], 4 + 12);
+ a.val[1] = vshrq_n_s32(v_dst_i.val[1], 4 + 12);
+ }
+ uint16x4x2_t frame_data;
+ frame_data.val[0] = vld1_u16(dst + j);
+ frame_data.val[1] = vld1_u16(dst + j + 4);
+ b.val[0] =
+ vaddw_s16(a.val[0], vreinterpret_s16_u16(frame_data.val[0]));
+ b.val[1] =
+ vaddw_s16(a.val[1], vreinterpret_s16_u16(frame_data.val[1]));
+ vst1_u16(dst + j, vmin_u16(vqmovun_s32(b.val[0]), v_max_bitdepth));
+ vst1_u16(dst + j + 4,
+ vmin_u16(vqmovun_s32(b.val[1]), v_max_bitdepth));
+ j += 8;
+ } while (j < tx_width);
+ dst += stride;
+ } while (++i < tx_height);
+ }
+ } else {
+ int i = 0;
+ do {
+ const int row = i * tx_width;
+ int j = 0;
+ do {
+ const int32x4_t v_dst_i = vld1q_s32(&source[row + j]);
+ const uint16x4_t frame_data = vld1_u16(dst + j);
+ const int32x4_t a = vrshrq_n_s32(v_dst_i, 2);
+ const int32x4_t b = vaddw_s16(a, vreinterpret_s16_u16(frame_data));
+ const uint16x4_t d = vmin_u16(vqmovun_s32(b), v_max_bitdepth);
+ vst1_u16(dst + j, d);
+ j += 4;
} while (j < tx_width);
dst += stride;
} while (++i < tx_height);
@@ -1606,9 +1630,10 @@ LIBGAV1_ALWAYS_INLINE void IdentityColumnStoreToFrame(
LIBGAV1_ALWAYS_INLINE void Identity4RowColumnStoreToFrame(
Array2DView<uint16_t> frame, const int start_x, const int start_y,
- const int tx_width, const int tx_height, const int32_t* source) {
+ const int tx_width, const int tx_height,
+ const int32_t* LIBGAV1_RESTRICT source) {
const int stride = frame.columns();
- uint16_t* dst = frame[start_y] + start_x;
+ uint16_t* LIBGAV1_RESTRICT dst = frame[start_y] + start_x;
const int32x4_t v_round = vdupq_n_s32((1 + (0)) << 11);
const uint16x4_t v_max_bitdepth = vdup_n_u16((1 << kBitdepth10) - 1);
@@ -1747,6 +1772,119 @@ LIBGAV1_ALWAYS_INLINE bool Identity16DcOnly(void* dest, int adjusted_tx_height,
return true;
}
+LIBGAV1_ALWAYS_INLINE void Identity32Row16_NEON(void* dest,
+ const int32_t step) {
+ auto* const dst = static_cast<int32_t*>(dest);
+
+ // When combining the identity32 multiplier with the row shift, the
+ // calculation for tx_height equal to 16 can be simplified from
+ // ((A * 4) + 1) >> 1) to (A * 2).
+ for (int i = 0; i < 4; ++i) {
+ for (int j = 0; j < 32; j += 4) {
+ const int32x4_t v_src = vld1q_s32(&dst[i * step + j]);
+ const int32x4_t v_dst_i = vqaddq_s32(v_src, v_src);
+ vst1q_s32(&dst[i * step + j], v_dst_i);
+ }
+ }
+}
+
+LIBGAV1_ALWAYS_INLINE bool Identity32DcOnly(void* dest,
+ int adjusted_tx_height) {
+ if (adjusted_tx_height > 1) return false;
+
+ auto* dst = static_cast<int32_t*>(dest);
+ const int32x2_t v_src0 = vdup_n_s32(dst[0]);
+ const int32x2_t v_src =
+ vqrdmulh_n_s32(v_src0, kTransformRowMultiplier << (31 - 12));
+ // When combining the identity32 multiplier with the row shift, the
+ // calculation for tx_height equal to 16 can be simplified from
+ // ((A * 4) + 1) >> 1) to (A * 2).
+ const int32x2_t v_dst_0 = vqadd_s32(v_src, v_src);
+ vst1_lane_s32(dst, v_dst_0, 0);
+ return true;
+}
+
+//------------------------------------------------------------------------------
+// Walsh Hadamard Transform.
+
+// Process 4 wht4 rows and columns.
+LIBGAV1_ALWAYS_INLINE void Wht4_NEON(uint16_t* LIBGAV1_RESTRICT dst,
+ const int dst_stride,
+ const void* LIBGAV1_RESTRICT source,
+ const int adjusted_tx_height) {
+ const auto* const src = static_cast<const int32_t*>(source);
+ int32x4_t s[4];
+
+ if (adjusted_tx_height == 1) {
+ // Special case: only src[0] is nonzero.
+ // src[0] 0 0 0
+ // 0 0 0 0
+ // 0 0 0 0
+ // 0 0 0 0
+ //
+ // After the row and column transforms are applied, we have:
+ // f h h h
+ // g i i i
+ // g i i i
+ // g i i i
+ // where f, g, h, i are computed as follows.
+ int32_t f = (src[0] >> 2) - (src[0] >> 3);
+ const int32_t g = f >> 1;
+ f = f - (f >> 1);
+ const int32_t h = (src[0] >> 3) - (src[0] >> 4);
+ const int32_t i = (src[0] >> 4);
+ s[0] = vdupq_n_s32(h);
+ s[0] = vsetq_lane_s32(f, s[0], 0);
+ s[1] = vdupq_n_s32(i);
+ s[1] = vsetq_lane_s32(g, s[1], 0);
+ s[2] = s[3] = s[1];
+ } else {
+ // Load the 4x4 source in transposed form.
+ int32x4x4_t columns = vld4q_s32(src);
+
+ // Shift right and permute the columns for the WHT.
+ s[0] = vshrq_n_s32(columns.val[0], 2);
+ s[2] = vshrq_n_s32(columns.val[1], 2);
+ s[3] = vshrq_n_s32(columns.val[2], 2);
+ s[1] = vshrq_n_s32(columns.val[3], 2);
+
+ // Row transforms.
+ s[0] = vaddq_s32(s[0], s[2]);
+ s[3] = vsubq_s32(s[3], s[1]);
+ int32x4_t e = vhsubq_s32(s[0], s[3]); // e = (s[0] - s[3]) >> 1
+ s[1] = vsubq_s32(e, s[1]);
+ s[2] = vsubq_s32(e, s[2]);
+ s[0] = vsubq_s32(s[0], s[1]);
+ s[3] = vaddq_s32(s[3], s[2]);
+
+ int32x4_t x[4];
+ Transpose4x4(s, x);
+
+ s[0] = x[0];
+ s[2] = x[1];
+ s[3] = x[2];
+ s[1] = x[3];
+
+ // Column transforms.
+ s[0] = vaddq_s32(s[0], s[2]);
+ s[3] = vsubq_s32(s[3], s[1]);
+ e = vhsubq_s32(s[0], s[3]); // e = (s[0] - s[3]) >> 1
+ s[1] = vsubq_s32(e, s[1]);
+ s[2] = vsubq_s32(e, s[2]);
+ s[0] = vsubq_s32(s[0], s[1]);
+ s[3] = vaddq_s32(s[3], s[2]);
+ }
+
+ // Store to frame.
+ const uint16x4_t v_max_bitdepth = vdup_n_u16((1 << kBitdepth10) - 1);
+ for (int row = 0; row < 4; row += 1) {
+ const uint16x4_t frame_data = vld1_u16(dst);
+ const int32x4_t b = vaddw_s16(s[row], vreinterpret_s16_u16(frame_data));
+ vst1_u16(dst, vmin_u16(vqmovun_s32(b), v_max_bitdepth));
+ dst += dst_stride;
+ }
+}
+
//------------------------------------------------------------------------------
// row/column transform loops
@@ -1837,11 +1975,12 @@ LIBGAV1_ALWAYS_INLINE void RowShift(int32_t* source, int num_rows,
template <int tx_height, bool enable_flip_rows = false>
LIBGAV1_ALWAYS_INLINE void StoreToFrameWithRound(
Array2DView<uint16_t> frame, const int start_x, const int start_y,
- const int tx_width, const int32_t* source, TransformType tx_type) {
+ const int tx_width, const int32_t* LIBGAV1_RESTRICT source,
+ TransformType tx_type) {
const bool flip_rows =
enable_flip_rows ? kTransformFlipRowsMask.Contains(tx_type) : false;
const int stride = frame.columns();
- uint16_t* dst = frame[start_y] + start_x;
+ uint16_t* LIBGAV1_RESTRICT dst = frame[start_y] + start_x;
if (tx_width == 4) {
for (int i = 0; i < tx_height; ++i) {
@@ -1887,7 +2026,7 @@ void Dct4TransformLoopRow_NEON(TransformType /*tx_type*/, TransformSize tx_size,
auto* src = static_cast<int32_t*>(src_buffer);
const int tx_height = kTransformHeight[tx_size];
const bool should_round = (tx_height == 8);
- const int row_shift = (tx_height == 16);
+ const int row_shift = static_cast<int>(tx_height == 16);
if (DctDcOnly<4>(src, adjusted_tx_height, should_round, row_shift)) {
return;
@@ -1909,8 +2048,10 @@ void Dct4TransformLoopRow_NEON(TransformType /*tx_type*/, TransformSize tx_size,
}
void Dct4TransformLoopColumn_NEON(TransformType tx_type, TransformSize tx_size,
- int adjusted_tx_height, void* src_buffer,
- int start_x, int start_y, void* dst_frame) {
+ int adjusted_tx_height,
+ void* LIBGAV1_RESTRICT src_buffer,
+ int start_x, int start_y,
+ void* LIBGAV1_RESTRICT dst_frame) {
auto* src = static_cast<int32_t*>(src_buffer);
const int tx_width = kTransformWidth[tx_size];
@@ -1962,8 +2103,10 @@ void Dct8TransformLoopRow_NEON(TransformType /*tx_type*/, TransformSize tx_size,
}
void Dct8TransformLoopColumn_NEON(TransformType tx_type, TransformSize tx_size,
- int adjusted_tx_height, void* src_buffer,
- int start_x, int start_y, void* dst_frame) {
+ int adjusted_tx_height,
+ void* LIBGAV1_RESTRICT src_buffer,
+ int start_x, int start_y,
+ void* LIBGAV1_RESTRICT dst_frame) {
auto* src = static_cast<int32_t*>(src_buffer);
const int tx_width = kTransformWidth[tx_size];
@@ -2014,8 +2157,10 @@ void Dct16TransformLoopRow_NEON(TransformType /*tx_type*/,
}
void Dct16TransformLoopColumn_NEON(TransformType tx_type, TransformSize tx_size,
- int adjusted_tx_height, void* src_buffer,
- int start_x, int start_y, void* dst_frame) {
+ int adjusted_tx_height,
+ void* LIBGAV1_RESTRICT src_buffer,
+ int start_x, int start_y,
+ void* LIBGAV1_RESTRICT dst_frame) {
auto* src = static_cast<int32_t*>(src_buffer);
const int tx_width = kTransformWidth[tx_size];
@@ -2066,8 +2211,10 @@ void Dct32TransformLoopRow_NEON(TransformType /*tx_type*/,
}
void Dct32TransformLoopColumn_NEON(TransformType tx_type, TransformSize tx_size,
- int adjusted_tx_height, void* src_buffer,
- int start_x, int start_y, void* dst_frame) {
+ int adjusted_tx_height,
+ void* LIBGAV1_RESTRICT src_buffer,
+ int start_x, int start_y,
+ void* LIBGAV1_RESTRICT dst_frame) {
auto* src = static_cast<int32_t*>(src_buffer);
const int tx_width = kTransformWidth[tx_size];
@@ -2117,8 +2264,10 @@ void Dct64TransformLoopRow_NEON(TransformType /*tx_type*/,
}
void Dct64TransformLoopColumn_NEON(TransformType tx_type, TransformSize tx_size,
- int adjusted_tx_height, void* src_buffer,
- int start_x, int start_y, void* dst_frame) {
+ int adjusted_tx_height,
+ void* LIBGAV1_RESTRICT src_buffer,
+ int start_x, int start_y,
+ void* LIBGAV1_RESTRICT dst_frame) {
auto* src = static_cast<int32_t*>(src_buffer);
const int tx_width = kTransformWidth[tx_size];
@@ -2168,8 +2317,10 @@ void Adst4TransformLoopRow_NEON(TransformType /*tx_type*/,
}
void Adst4TransformLoopColumn_NEON(TransformType tx_type, TransformSize tx_size,
- int adjusted_tx_height, void* src_buffer,
- int start_x, int start_y, void* dst_frame) {
+ int adjusted_tx_height,
+ void* LIBGAV1_RESTRICT src_buffer,
+ int start_x, int start_y,
+ void* LIBGAV1_RESTRICT dst_frame) {
auto* src = static_cast<int32_t*>(src_buffer);
const int tx_width = kTransformWidth[tx_size];
@@ -2222,8 +2373,10 @@ void Adst8TransformLoopRow_NEON(TransformType /*tx_type*/,
}
void Adst8TransformLoopColumn_NEON(TransformType tx_type, TransformSize tx_size,
- int adjusted_tx_height, void* src_buffer,
- int start_x, int start_y, void* dst_frame) {
+ int adjusted_tx_height,
+ void* LIBGAV1_RESTRICT src_buffer,
+ int start_x, int start_y,
+ void* LIBGAV1_RESTRICT dst_frame) {
auto* src = static_cast<int32_t*>(src_buffer);
const int tx_width = kTransformWidth[tx_size];
@@ -2275,8 +2428,10 @@ void Adst16TransformLoopRow_NEON(TransformType /*tx_type*/,
void Adst16TransformLoopColumn_NEON(TransformType tx_type,
TransformSize tx_size,
- int adjusted_tx_height, void* src_buffer,
- int start_x, int start_y, void* dst_frame) {
+ int adjusted_tx_height,
+ void* LIBGAV1_RESTRICT src_buffer,
+ int start_x, int start_y,
+ void* LIBGAV1_RESTRICT dst_frame) {
auto* src = static_cast<int32_t*>(src_buffer);
const int tx_width = kTransformWidth[tx_size];
@@ -2335,9 +2490,10 @@ void Identity4TransformLoopRow_NEON(TransformType tx_type,
void Identity4TransformLoopColumn_NEON(TransformType tx_type,
TransformSize tx_size,
- int adjusted_tx_height, void* src_buffer,
+ int adjusted_tx_height,
+ void* LIBGAV1_RESTRICT src_buffer,
int start_x, int start_y,
- void* dst_frame) {
+ void* LIBGAV1_RESTRICT dst_frame) {
auto& frame = *static_cast<Array2DView<uint16_t>*>(dst_frame);
auto* src = static_cast<int32_t*>(src_buffer);
const int tx_width = kTransformWidth[tx_size];
@@ -2416,9 +2572,10 @@ void Identity8TransformLoopRow_NEON(TransformType tx_type,
void Identity8TransformLoopColumn_NEON(TransformType tx_type,
TransformSize tx_size,
- int adjusted_tx_height, void* src_buffer,
+ int adjusted_tx_height,
+ void* LIBGAV1_RESTRICT src_buffer,
int start_x, int start_y,
- void* dst_frame) {
+ void* LIBGAV1_RESTRICT dst_frame) {
auto* src = static_cast<int32_t*>(src_buffer);
const int tx_width = kTransformWidth[tx_size];
@@ -2457,8 +2614,9 @@ void Identity16TransformLoopRow_NEON(TransformType /*tx_type*/,
void Identity16TransformLoopColumn_NEON(TransformType tx_type,
TransformSize tx_size,
int adjusted_tx_height,
- void* src_buffer, int start_x,
- int start_y, void* dst_frame) {
+ void* LIBGAV1_RESTRICT src_buffer,
+ int start_x, int start_y,
+ void* LIBGAV1_RESTRICT dst_frame) {
auto* src = static_cast<int32_t*>(src_buffer);
const int tx_width = kTransformWidth[tx_size];
@@ -2470,60 +2628,144 @@ void Identity16TransformLoopColumn_NEON(TransformType tx_type,
adjusted_tx_height, src);
}
+void Identity32TransformLoopRow_NEON(TransformType /*tx_type*/,
+ TransformSize tx_size,
+ int adjusted_tx_height, void* src_buffer,
+ int /*start_x*/, int /*start_y*/,
+ void* /*dst_frame*/) {
+ const int tx_height = kTransformHeight[tx_size];
+
+ // When combining the identity32 multiplier with the row shift, the
+ // calculations for tx_height == 8 and tx_height == 32 can be simplified
+ // from ((A * 4) + 2) >> 2) to A.
+ if ((tx_height & 0x28) != 0) {
+ return;
+ }
+
+ // Process kTransformSize32x16. The src is always rounded before the identity
+ // transform and shifted by 1 afterwards.
+ auto* src = static_cast<int32_t*>(src_buffer);
+ if (Identity32DcOnly(src, adjusted_tx_height)) {
+ return;
+ }
+
+ assert(tx_size == kTransformSize32x16);
+ ApplyRounding<32>(src, adjusted_tx_height);
+ int i = adjusted_tx_height;
+ do {
+ Identity32Row16_NEON(src, /*step=*/32);
+ src += 128;
+ i -= 4;
+ } while (i != 0);
+}
+
+void Identity32TransformLoopColumn_NEON(TransformType /*tx_type*/,
+ TransformSize tx_size,
+ int adjusted_tx_height,
+ void* LIBGAV1_RESTRICT src_buffer,
+ int start_x, int start_y,
+ void* LIBGAV1_RESTRICT dst_frame) {
+ auto& frame = *static_cast<Array2DView<uint16_t>*>(dst_frame);
+ auto* src = static_cast<int32_t*>(src_buffer);
+ const int tx_width = kTransformWidth[tx_size];
+
+ IdentityColumnStoreToFrame<32>(frame, start_x, start_y, tx_width,
+ adjusted_tx_height, src);
+}
+
+void Wht4TransformLoopRow_NEON(TransformType tx_type, TransformSize tx_size,
+ int /*adjusted_tx_height*/, void* /*src_buffer*/,
+ int /*start_x*/, int /*start_y*/,
+ void* /*dst_frame*/) {
+ assert(tx_type == kTransformTypeDctDct);
+ assert(tx_size == kTransformSize4x4);
+ static_cast<void>(tx_type);
+ static_cast<void>(tx_size);
+ // Do both row and column transforms in the column-transform pass.
+}
+
+void Wht4TransformLoopColumn_NEON(TransformType tx_type, TransformSize tx_size,
+ int adjusted_tx_height,
+ void* LIBGAV1_RESTRICT src_buffer,
+ int start_x, int start_y,
+ void* LIBGAV1_RESTRICT dst_frame) {
+ assert(tx_type == kTransformTypeDctDct);
+ assert(tx_size == kTransformSize4x4);
+ static_cast<void>(tx_type);
+ static_cast<void>(tx_size);
+
+ // Process 4 1d wht4 rows and columns in parallel.
+ const auto* src = static_cast<int32_t*>(src_buffer);
+ auto& frame = *static_cast<Array2DView<uint16_t>*>(dst_frame);
+ uint16_t* dst = frame[start_y] + start_x;
+ const int dst_stride = frame.columns();
+ Wht4_NEON(dst, dst_stride, src, adjusted_tx_height);
+}
+
//------------------------------------------------------------------------------
void Init10bpp() {
Dsp* const dsp = dsp_internal::GetWritableDspTable(kBitdepth10);
assert(dsp != nullptr);
// Maximum transform size for Dct is 64.
- dsp->inverse_transforms[k1DTransformDct][k1DTransformSize4][kRow] =
+ dsp->inverse_transforms[kTransform1dDct][kTransform1dSize4][kRow] =
Dct4TransformLoopRow_NEON;
- dsp->inverse_transforms[k1DTransformDct][k1DTransformSize4][kColumn] =
+ dsp->inverse_transforms[kTransform1dDct][kTransform1dSize4][kColumn] =
Dct4TransformLoopColumn_NEON;
- dsp->inverse_transforms[k1DTransformDct][k1DTransformSize8][kRow] =
+ dsp->inverse_transforms[kTransform1dDct][kTransform1dSize8][kRow] =
Dct8TransformLoopRow_NEON;
- dsp->inverse_transforms[k1DTransformDct][k1DTransformSize8][kColumn] =
+ dsp->inverse_transforms[kTransform1dDct][kTransform1dSize8][kColumn] =
Dct8TransformLoopColumn_NEON;
- dsp->inverse_transforms[k1DTransformDct][k1DTransformSize16][kRow] =
+ dsp->inverse_transforms[kTransform1dDct][kTransform1dSize16][kRow] =
Dct16TransformLoopRow_NEON;
- dsp->inverse_transforms[k1DTransformDct][k1DTransformSize16][kColumn] =
+ dsp->inverse_transforms[kTransform1dDct][kTransform1dSize16][kColumn] =
Dct16TransformLoopColumn_NEON;
- dsp->inverse_transforms[k1DTransformDct][k1DTransformSize32][kRow] =
+ dsp->inverse_transforms[kTransform1dDct][kTransform1dSize32][kRow] =
Dct32TransformLoopRow_NEON;
- dsp->inverse_transforms[k1DTransformDct][k1DTransformSize32][kColumn] =
+ dsp->inverse_transforms[kTransform1dDct][kTransform1dSize32][kColumn] =
Dct32TransformLoopColumn_NEON;
- dsp->inverse_transforms[k1DTransformDct][k1DTransformSize64][kRow] =
+ dsp->inverse_transforms[kTransform1dDct][kTransform1dSize64][kRow] =
Dct64TransformLoopRow_NEON;
- dsp->inverse_transforms[k1DTransformDct][k1DTransformSize64][kColumn] =
+ dsp->inverse_transforms[kTransform1dDct][kTransform1dSize64][kColumn] =
Dct64TransformLoopColumn_NEON;
// Maximum transform size for Adst is 16.
- dsp->inverse_transforms[k1DTransformAdst][k1DTransformSize4][kRow] =
+ dsp->inverse_transforms[kTransform1dAdst][kTransform1dSize4][kRow] =
Adst4TransformLoopRow_NEON;
- dsp->inverse_transforms[k1DTransformAdst][k1DTransformSize4][kColumn] =
+ dsp->inverse_transforms[kTransform1dAdst][kTransform1dSize4][kColumn] =
Adst4TransformLoopColumn_NEON;
- dsp->inverse_transforms[k1DTransformAdst][k1DTransformSize8][kRow] =
+ dsp->inverse_transforms[kTransform1dAdst][kTransform1dSize8][kRow] =
Adst8TransformLoopRow_NEON;
- dsp->inverse_transforms[k1DTransformAdst][k1DTransformSize8][kColumn] =
+ dsp->inverse_transforms[kTransform1dAdst][kTransform1dSize8][kColumn] =
Adst8TransformLoopColumn_NEON;
- dsp->inverse_transforms[k1DTransformAdst][k1DTransformSize16][kRow] =
+ dsp->inverse_transforms[kTransform1dAdst][kTransform1dSize16][kRow] =
Adst16TransformLoopRow_NEON;
- dsp->inverse_transforms[k1DTransformAdst][k1DTransformSize16][kColumn] =
+ dsp->inverse_transforms[kTransform1dAdst][kTransform1dSize16][kColumn] =
Adst16TransformLoopColumn_NEON;
// Maximum transform size for Identity transform is 32.
- dsp->inverse_transforms[k1DTransformIdentity][k1DTransformSize4][kRow] =
+ dsp->inverse_transforms[kTransform1dIdentity][kTransform1dSize4][kRow] =
Identity4TransformLoopRow_NEON;
- dsp->inverse_transforms[k1DTransformIdentity][k1DTransformSize4][kColumn] =
+ dsp->inverse_transforms[kTransform1dIdentity][kTransform1dSize4][kColumn] =
Identity4TransformLoopColumn_NEON;
- dsp->inverse_transforms[k1DTransformIdentity][k1DTransformSize8][kRow] =
+ dsp->inverse_transforms[kTransform1dIdentity][kTransform1dSize8][kRow] =
Identity8TransformLoopRow_NEON;
- dsp->inverse_transforms[k1DTransformIdentity][k1DTransformSize8][kColumn] =
+ dsp->inverse_transforms[kTransform1dIdentity][kTransform1dSize8][kColumn] =
Identity8TransformLoopColumn_NEON;
- dsp->inverse_transforms[k1DTransformIdentity][k1DTransformSize16][kRow] =
+ dsp->inverse_transforms[kTransform1dIdentity][kTransform1dSize16][kRow] =
Identity16TransformLoopRow_NEON;
- dsp->inverse_transforms[k1DTransformIdentity][k1DTransformSize16][kColumn] =
+ dsp->inverse_transforms[kTransform1dIdentity][kTransform1dSize16][kColumn] =
Identity16TransformLoopColumn_NEON;
+ dsp->inverse_transforms[kTransform1dIdentity][kTransform1dSize32][kRow] =
+ Identity32TransformLoopRow_NEON;
+ dsp->inverse_transforms[kTransform1dIdentity][kTransform1dSize32][kColumn] =
+ Identity32TransformLoopColumn_NEON;
+
+ // Maximum transform size for Wht is 4.
+ dsp->inverse_transforms[kTransform1dWht][kTransform1dSize4][kRow] =
+ Wht4TransformLoopRow_NEON;
+ dsp->inverse_transforms[kTransform1dWht][kTransform1dSize4][kColumn] =
+ Wht4TransformLoopColumn_NEON;
}
} // namespace
diff --git a/src/dsp/arm/inverse_transform_neon.cc b/src/dsp/arm/inverse_transform_neon.cc
index 315d5e9..1c2e111 100644
--- a/src/dsp/arm/inverse_transform_neon.cc
+++ b/src/dsp/arm/inverse_transform_neon.cc
@@ -273,7 +273,8 @@ LIBGAV1_ALWAYS_INLINE void Transpose8x4To4x8(const int16x8_t in[4],
//------------------------------------------------------------------------------
template <int store_width, int store_count>
-LIBGAV1_ALWAYS_INLINE void StoreDst(int16_t* dst, int32_t stride, int32_t idx,
+LIBGAV1_ALWAYS_INLINE void StoreDst(int16_t* LIBGAV1_RESTRICT dst,
+ int32_t stride, int32_t idx,
const int16x8_t* const s) {
assert(store_count % 4 == 0);
assert(store_width == 8 || store_width == 16);
@@ -297,8 +298,8 @@ LIBGAV1_ALWAYS_INLINE void StoreDst(int16_t* dst, int32_t stride, int32_t idx,
}
template <int load_width, int load_count>
-LIBGAV1_ALWAYS_INLINE void LoadSrc(const int16_t* src, int32_t stride,
- int32_t idx, int16x8_t* x) {
+LIBGAV1_ALWAYS_INLINE void LoadSrc(const int16_t* LIBGAV1_RESTRICT src,
+ int32_t stride, int32_t idx, int16x8_t* x) {
assert(load_count % 4 == 0);
assert(load_width == 8 || load_width == 16);
// NOTE: It is expected that the compiler will unroll these loops.
@@ -388,6 +389,33 @@ LIBGAV1_ALWAYS_INLINE void ButterflyRotation_FirstIsZero(int16x8_t* a,
int16x8_t* b,
const int angle,
const bool flip) {
+#if defined(__ARM_FEATURE_QRDMX) && defined(__aarch64__) && \
+ defined(__clang__) // ARM v8.1-A
+ // Clang optimizes vqrdmulhq_n_s16 and vqsubq_s16 (in HadamardRotation) into
+ // vqrdmlshq_s16 resulting in an "off by one" error. For now, do not use
+ // vqrdmulhq_n_s16().
+ const int16_t cos128 = Cos128(angle);
+ const int16_t sin128 = Sin128(angle);
+ const int32x4_t x0 = vmull_n_s16(vget_low_s16(*b), -sin128);
+ const int32x4_t y0 = vmull_n_s16(vget_low_s16(*b), cos128);
+ const int16x4_t x1 = vqrshrn_n_s32(x0, 12);
+ const int16x4_t y1 = vqrshrn_n_s32(y0, 12);
+
+ const int32x4_t x0_hi = vmull_n_s16(vget_high_s16(*b), -sin128);
+ const int32x4_t y0_hi = vmull_n_s16(vget_high_s16(*b), cos128);
+ const int16x4_t x1_hi = vqrshrn_n_s32(x0_hi, 12);
+ const int16x4_t y1_hi = vqrshrn_n_s32(y0_hi, 12);
+
+ const int16x8_t x = vcombine_s16(x1, x1_hi);
+ const int16x8_t y = vcombine_s16(y1, y1_hi);
+ if (flip) {
+ *a = y;
+ *b = x;
+ } else {
+ *a = x;
+ *b = y;
+ }
+#else
const int16_t cos128 = Cos128(angle);
const int16_t sin128 = Sin128(angle);
// For this function, the max value returned by Sin128() is 4091, which fits
@@ -403,12 +431,40 @@ LIBGAV1_ALWAYS_INLINE void ButterflyRotation_FirstIsZero(int16x8_t* a,
*a = x;
*b = y;
}
+#endif
}
LIBGAV1_ALWAYS_INLINE void ButterflyRotation_SecondIsZero(int16x8_t* a,
int16x8_t* b,
const int angle,
const bool flip) {
+#if defined(__ARM_FEATURE_QRDMX) && defined(__aarch64__) && \
+ defined(__clang__) // ARM v8.1-A
+ // Clang optimizes vqrdmulhq_n_s16 and vqsubq_s16 (in HadamardRotation) into
+ // vqrdmlshq_s16 resulting in an "off by one" error. For now, do not use
+ // vqrdmulhq_n_s16().
+ const int16_t cos128 = Cos128(angle);
+ const int16_t sin128 = Sin128(angle);
+ const int32x4_t x0 = vmull_n_s16(vget_low_s16(*a), cos128);
+ const int32x4_t y0 = vmull_n_s16(vget_low_s16(*a), sin128);
+ const int16x4_t x1 = vqrshrn_n_s32(x0, 12);
+ const int16x4_t y1 = vqrshrn_n_s32(y0, 12);
+
+ const int32x4_t x0_hi = vmull_n_s16(vget_high_s16(*a), cos128);
+ const int32x4_t y0_hi = vmull_n_s16(vget_high_s16(*a), sin128);
+ const int16x4_t x1_hi = vqrshrn_n_s32(x0_hi, 12);
+ const int16x4_t y1_hi = vqrshrn_n_s32(y0_hi, 12);
+
+ const int16x8_t x = vcombine_s16(x1, x1_hi);
+ const int16x8_t y = vcombine_s16(y1, y1_hi);
+ if (flip) {
+ *a = y;
+ *b = x;
+ } else {
+ *a = x;
+ *b = y;
+ }
+#else
const int16_t cos128 = Cos128(angle);
const int16_t sin128 = Sin128(angle);
const int16x8_t x = vqrdmulhq_n_s16(*a, cos128 << 3);
@@ -420,6 +476,7 @@ LIBGAV1_ALWAYS_INLINE void ButterflyRotation_SecondIsZero(int16x8_t* a,
*a = x;
*b = y;
}
+#endif
}
LIBGAV1_ALWAYS_INLINE void HadamardRotation(int16x8_t* a, int16x8_t* b,
@@ -736,8 +793,8 @@ LIBGAV1_ALWAYS_INLINE void Dct16_NEON(void* dest, int32_t step, bool is_row,
if (is_row) {
const int16x8_t v_row_shift = vdupq_n_s16(-row_shift);
- for (int i = 0; i < 16; ++i) {
- s[i] = vqrshlq_s16(s[i], v_row_shift);
+ for (auto& i : s) {
+ i = vqrshlq_s16(i, v_row_shift);
}
}
@@ -914,8 +971,8 @@ LIBGAV1_ALWAYS_INLINE void Dct32_NEON(void* dest, const int32_t step,
for (int idx = 0; idx < 32; idx += 8) {
int16x8_t output[8];
Transpose8x8(&s[idx], output);
- for (int i = 0; i < 8; ++i) {
- output[i] = vqrshlq_s16(output[i], v_row_shift);
+ for (auto& o : output) {
+ o = vqrshlq_s16(o, v_row_shift);
}
StoreDst<16, 8>(dst, step, idx, output);
}
@@ -1135,8 +1192,8 @@ void Dct64_NEON(void* dest, int32_t step, bool is_row, int row_shift) {
for (int idx = 0; idx < 64; idx += 8) {
int16x8_t output[8];
Transpose8x8(&s[idx], output);
- for (int i = 0; i < 8; ++i) {
- output[i] = vqrshlq_s16(output[i], v_row_shift);
+ for (auto& o : output) {
+ o = vqrshlq_s16(o, v_row_shift);
}
StoreDst<16, 8>(dst, step, idx, output);
}
@@ -1611,13 +1668,13 @@ LIBGAV1_ALWAYS_INLINE void Adst16_NEON(void* dest, int32_t step, bool is_row,
const int16x8_t v_row_shift = vdupq_n_s16(-row_shift);
int16x8_t output[4];
Transpose4x8To8x4(x, output);
- for (int i = 0; i < 4; ++i) {
- output[i] = vqrshlq_s16(output[i], v_row_shift);
+ for (auto& o : output) {
+ o = vqrshlq_s16(o, v_row_shift);
}
StoreDst<16, 4>(dst, step, 0, output);
Transpose4x8To8x4(&x[8], output);
- for (int i = 0; i < 4; ++i) {
- output[i] = vqrshlq_s16(output[i], v_row_shift);
+ for (auto& o : output) {
+ o = vqrshlq_s16(o, v_row_shift);
}
StoreDst<16, 4>(dst, step, 8, output);
} else {
@@ -1629,8 +1686,8 @@ LIBGAV1_ALWAYS_INLINE void Adst16_NEON(void* dest, int32_t step, bool is_row,
for (int idx = 0; idx < 16; idx += 8) {
int16x8_t output[8];
Transpose8x8(&x[idx], output);
- for (int i = 0; i < 8; ++i) {
- output[i] = vqrshlq_s16(output[i], v_row_shift);
+ for (auto& o : output) {
+ o = vqrshlq_s16(o, v_row_shift);
}
StoreDst<16, 8>(dst, step, idx, output);
}
@@ -1805,9 +1862,10 @@ LIBGAV1_ALWAYS_INLINE bool Identity4DcOnly(void* dest, int adjusted_tx_height,
template <int identity_size>
LIBGAV1_ALWAYS_INLINE void IdentityColumnStoreToFrame(
Array2DView<uint8_t> frame, const int start_x, const int start_y,
- const int tx_width, const int tx_height, const int16_t* source) {
+ const int tx_width, const int tx_height,
+ const int16_t* LIBGAV1_RESTRICT source) {
const int stride = frame.columns();
- uint8_t* dst = frame[start_y] + start_x;
+ uint8_t* LIBGAV1_RESTRICT dst = frame[start_y] + start_x;
if (identity_size < 32) {
if (tx_width == 4) {
@@ -1891,9 +1949,10 @@ LIBGAV1_ALWAYS_INLINE void IdentityColumnStoreToFrame(
LIBGAV1_ALWAYS_INLINE void Identity4RowColumnStoreToFrame(
Array2DView<uint8_t> frame, const int start_x, const int start_y,
- const int tx_width, const int tx_height, const int16_t* source) {
+ const int tx_width, const int tx_height,
+ const int16_t* LIBGAV1_RESTRICT source) {
const int stride = frame.columns();
- uint8_t* dst = frame[start_y] + start_x;
+ uint8_t* LIBGAV1_RESTRICT dst = frame[start_y] + start_x;
if (tx_width == 4) {
uint8x8_t frame_data = vdup_n_u8(0);
@@ -2106,8 +2165,9 @@ LIBGAV1_ALWAYS_INLINE void TransposeAndPermute4x4WideInput(
}
// Process 4 wht4 rows and columns.
-LIBGAV1_ALWAYS_INLINE void Wht4_NEON(uint8_t* dst, const int dst_stride,
- const void* source,
+LIBGAV1_ALWAYS_INLINE void Wht4_NEON(uint8_t* LIBGAV1_RESTRICT dst,
+ const int dst_stride,
+ const void* LIBGAV1_RESTRICT source,
const int adjusted_tx_height) {
const auto* const src = static_cast<const int16_t*>(source);
int16x4_t s[4];
@@ -2273,11 +2333,12 @@ LIBGAV1_ALWAYS_INLINE void RowShift(int16_t* source, int num_rows,
template <int tx_height, bool enable_flip_rows = false>
LIBGAV1_ALWAYS_INLINE void StoreToFrameWithRound(
Array2DView<uint8_t> frame, const int start_x, const int start_y,
- const int tx_width, const int16_t* source, TransformType tx_type) {
+ const int tx_width, const int16_t* LIBGAV1_RESTRICT source,
+ TransformType tx_type) {
const bool flip_rows =
enable_flip_rows ? kTransformFlipRowsMask.Contains(tx_type) : false;
const int stride = frame.columns();
- uint8_t* dst = frame[start_y] + start_x;
+ uint8_t* LIBGAV1_RESTRICT dst = frame[start_y] + start_x;
// Enable for 4x4, 4x8, 4x16
if (tx_height < 32 && tx_width == 4) {
@@ -2338,7 +2399,7 @@ void Dct4TransformLoopRow_NEON(TransformType /*tx_type*/, TransformSize tx_size,
auto* src = static_cast<int16_t*>(src_buffer);
const int tx_height = kTransformHeight[tx_size];
const bool should_round = (tx_height == 8);
- const int row_shift = (tx_height == 16);
+ const int row_shift = static_cast<int>(tx_height == 16);
if (DctDcOnly<4>(src, adjusted_tx_height, should_round, row_shift)) {
return;
@@ -2368,8 +2429,10 @@ void Dct4TransformLoopRow_NEON(TransformType /*tx_type*/, TransformSize tx_size,
}
void Dct4TransformLoopColumn_NEON(TransformType tx_type, TransformSize tx_size,
- int adjusted_tx_height, void* src_buffer,
- int start_x, int start_y, void* dst_frame) {
+ int adjusted_tx_height,
+ void* LIBGAV1_RESTRICT src_buffer,
+ int start_x, int start_y,
+ void* LIBGAV1_RESTRICT dst_frame) {
auto* src = static_cast<int16_t*>(src_buffer);
const int tx_width = kTransformWidth[tx_size];
@@ -2435,8 +2498,10 @@ void Dct8TransformLoopRow_NEON(TransformType /*tx_type*/, TransformSize tx_size,
}
void Dct8TransformLoopColumn_NEON(TransformType tx_type, TransformSize tx_size,
- int adjusted_tx_height, void* src_buffer,
- int start_x, int start_y, void* dst_frame) {
+ int adjusted_tx_height,
+ void* LIBGAV1_RESTRICT src_buffer,
+ int start_x, int start_y,
+ void* LIBGAV1_RESTRICT dst_frame) {
auto* src = static_cast<int16_t*>(src_buffer);
const int tx_width = kTransformWidth[tx_size];
@@ -2497,8 +2562,10 @@ void Dct16TransformLoopRow_NEON(TransformType /*tx_type*/,
}
void Dct16TransformLoopColumn_NEON(TransformType tx_type, TransformSize tx_size,
- int adjusted_tx_height, void* src_buffer,
- int start_x, int start_y, void* dst_frame) {
+ int adjusted_tx_height,
+ void* LIBGAV1_RESTRICT src_buffer,
+ int start_x, int start_y,
+ void* LIBGAV1_RESTRICT dst_frame) {
auto* src = static_cast<int16_t*>(src_buffer);
const int tx_width = kTransformWidth[tx_size];
@@ -2551,8 +2618,10 @@ void Dct32TransformLoopRow_NEON(TransformType /*tx_type*/,
}
void Dct32TransformLoopColumn_NEON(TransformType tx_type, TransformSize tx_size,
- int adjusted_tx_height, void* src_buffer,
- int start_x, int start_y, void* dst_frame) {
+ int adjusted_tx_height,
+ void* LIBGAV1_RESTRICT src_buffer,
+ int start_x, int start_y,
+ void* LIBGAV1_RESTRICT dst_frame) {
auto* src = static_cast<int16_t*>(src_buffer);
const int tx_width = kTransformWidth[tx_size];
@@ -2594,8 +2663,10 @@ void Dct64TransformLoopRow_NEON(TransformType /*tx_type*/,
}
void Dct64TransformLoopColumn_NEON(TransformType tx_type, TransformSize tx_size,
- int adjusted_tx_height, void* src_buffer,
- int start_x, int start_y, void* dst_frame) {
+ int adjusted_tx_height,
+ void* LIBGAV1_RESTRICT src_buffer,
+ int start_x, int start_y,
+ void* LIBGAV1_RESTRICT dst_frame) {
auto* src = static_cast<int16_t*>(src_buffer);
const int tx_width = kTransformWidth[tx_size];
@@ -2645,8 +2716,10 @@ void Adst4TransformLoopRow_NEON(TransformType /*tx_type*/,
}
void Adst4TransformLoopColumn_NEON(TransformType tx_type, TransformSize tx_size,
- int adjusted_tx_height, void* src_buffer,
- int start_x, int start_y, void* dst_frame) {
+ int adjusted_tx_height,
+ void* LIBGAV1_RESTRICT src_buffer,
+ int start_x, int start_y,
+ void* LIBGAV1_RESTRICT dst_frame) {
auto* src = static_cast<int16_t*>(src_buffer);
const int tx_width = kTransformWidth[tx_size];
@@ -2707,8 +2780,10 @@ void Adst8TransformLoopRow_NEON(TransformType /*tx_type*/,
}
void Adst8TransformLoopColumn_NEON(TransformType tx_type, TransformSize tx_size,
- int adjusted_tx_height, void* src_buffer,
- int start_x, int start_y, void* dst_frame) {
+ int adjusted_tx_height,
+ void* LIBGAV1_RESTRICT src_buffer,
+ int start_x, int start_y,
+ void* LIBGAV1_RESTRICT dst_frame) {
auto* src = static_cast<int16_t*>(src_buffer);
const int tx_width = kTransformWidth[tx_size];
@@ -2771,8 +2846,10 @@ void Adst16TransformLoopRow_NEON(TransformType /*tx_type*/,
void Adst16TransformLoopColumn_NEON(TransformType tx_type,
TransformSize tx_size,
- int adjusted_tx_height, void* src_buffer,
- int start_x, int start_y, void* dst_frame) {
+ int adjusted_tx_height,
+ void* LIBGAV1_RESTRICT src_buffer,
+ int start_x, int start_y,
+ void* LIBGAV1_RESTRICT dst_frame) {
auto* src = static_cast<int16_t*>(src_buffer);
const int tx_width = kTransformWidth[tx_size];
@@ -2844,9 +2921,10 @@ void Identity4TransformLoopRow_NEON(TransformType tx_type,
void Identity4TransformLoopColumn_NEON(TransformType tx_type,
TransformSize tx_size,
- int adjusted_tx_height, void* src_buffer,
+ int adjusted_tx_height,
+ void* LIBGAV1_RESTRICT src_buffer,
int start_x, int start_y,
- void* dst_frame) {
+ void* LIBGAV1_RESTRICT dst_frame) {
auto& frame = *static_cast<Array2DView<uint8_t>*>(dst_frame);
auto* src = static_cast<int16_t*>(src_buffer);
const int tx_width = kTransformWidth[tx_size];
@@ -2919,9 +2997,10 @@ void Identity8TransformLoopRow_NEON(TransformType tx_type,
void Identity8TransformLoopColumn_NEON(TransformType tx_type,
TransformSize tx_size,
- int adjusted_tx_height, void* src_buffer,
+ int adjusted_tx_height,
+ void* LIBGAV1_RESTRICT src_buffer,
int start_x, int start_y,
- void* dst_frame) {
+ void* LIBGAV1_RESTRICT dst_frame) {
auto* src = static_cast<int16_t*>(src_buffer);
const int tx_width = kTransformWidth[tx_size];
@@ -2960,8 +3039,9 @@ void Identity16TransformLoopRow_NEON(TransformType /*tx_type*/,
void Identity16TransformLoopColumn_NEON(TransformType tx_type,
TransformSize tx_size,
int adjusted_tx_height,
- void* src_buffer, int start_x,
- int start_y, void* dst_frame) {
+ void* LIBGAV1_RESTRICT src_buffer,
+ int start_x, int start_y,
+ void* LIBGAV1_RESTRICT dst_frame) {
auto* src = static_cast<int16_t*>(src_buffer);
const int tx_width = kTransformWidth[tx_size];
@@ -3007,8 +3087,9 @@ void Identity32TransformLoopRow_NEON(TransformType /*tx_type*/,
void Identity32TransformLoopColumn_NEON(TransformType /*tx_type*/,
TransformSize tx_size,
int adjusted_tx_height,
- void* src_buffer, int start_x,
- int start_y, void* dst_frame) {
+ void* LIBGAV1_RESTRICT src_buffer,
+ int start_x, int start_y,
+ void* LIBGAV1_RESTRICT dst_frame) {
auto& frame = *static_cast<Array2DView<uint8_t>*>(dst_frame);
auto* src = static_cast<int16_t*>(src_buffer);
const int tx_width = kTransformWidth[tx_size];
@@ -3029,8 +3110,10 @@ void Wht4TransformLoopRow_NEON(TransformType tx_type, TransformSize tx_size,
}
void Wht4TransformLoopColumn_NEON(TransformType tx_type, TransformSize tx_size,
- int adjusted_tx_height, void* src_buffer,
- int start_x, int start_y, void* dst_frame) {
+ int adjusted_tx_height,
+ void* LIBGAV1_RESTRICT src_buffer,
+ int start_x, int start_y,
+ void* LIBGAV1_RESTRICT dst_frame) {
assert(tx_type == kTransformTypeDctDct);
assert(tx_size == kTransformSize4x4);
static_cast<void>(tx_type);
@@ -3050,63 +3133,63 @@ void Init8bpp() {
Dsp* const dsp = dsp_internal::GetWritableDspTable(kBitdepth8);
assert(dsp != nullptr);
// Maximum transform size for Dct is 64.
- dsp->inverse_transforms[k1DTransformDct][k1DTransformSize4][kRow] =
+ dsp->inverse_transforms[kTransform1dDct][kTransform1dSize4][kRow] =
Dct4TransformLoopRow_NEON;
- dsp->inverse_transforms[k1DTransformDct][k1DTransformSize4][kColumn] =
+ dsp->inverse_transforms[kTransform1dDct][kTransform1dSize4][kColumn] =
Dct4TransformLoopColumn_NEON;
- dsp->inverse_transforms[k1DTransformDct][k1DTransformSize8][kRow] =
+ dsp->inverse_transforms[kTransform1dDct][kTransform1dSize8][kRow] =
Dct8TransformLoopRow_NEON;
- dsp->inverse_transforms[k1DTransformDct][k1DTransformSize8][kColumn] =
+ dsp->inverse_transforms[kTransform1dDct][kTransform1dSize8][kColumn] =
Dct8TransformLoopColumn_NEON;
- dsp->inverse_transforms[k1DTransformDct][k1DTransformSize16][kRow] =
+ dsp->inverse_transforms[kTransform1dDct][kTransform1dSize16][kRow] =
Dct16TransformLoopRow_NEON;
- dsp->inverse_transforms[k1DTransformDct][k1DTransformSize16][kColumn] =
+ dsp->inverse_transforms[kTransform1dDct][kTransform1dSize16][kColumn] =
Dct16TransformLoopColumn_NEON;
- dsp->inverse_transforms[k1DTransformDct][k1DTransformSize32][kRow] =
+ dsp->inverse_transforms[kTransform1dDct][kTransform1dSize32][kRow] =
Dct32TransformLoopRow_NEON;
- dsp->inverse_transforms[k1DTransformDct][k1DTransformSize32][kColumn] =
+ dsp->inverse_transforms[kTransform1dDct][kTransform1dSize32][kColumn] =
Dct32TransformLoopColumn_NEON;
- dsp->inverse_transforms[k1DTransformDct][k1DTransformSize64][kRow] =
+ dsp->inverse_transforms[kTransform1dDct][kTransform1dSize64][kRow] =
Dct64TransformLoopRow_NEON;
- dsp->inverse_transforms[k1DTransformDct][k1DTransformSize64][kColumn] =
+ dsp->inverse_transforms[kTransform1dDct][kTransform1dSize64][kColumn] =
Dct64TransformLoopColumn_NEON;
// Maximum transform size for Adst is 16.
- dsp->inverse_transforms[k1DTransformAdst][k1DTransformSize4][kRow] =
+ dsp->inverse_transforms[kTransform1dAdst][kTransform1dSize4][kRow] =
Adst4TransformLoopRow_NEON;
- dsp->inverse_transforms[k1DTransformAdst][k1DTransformSize4][kColumn] =
+ dsp->inverse_transforms[kTransform1dAdst][kTransform1dSize4][kColumn] =
Adst4TransformLoopColumn_NEON;
- dsp->inverse_transforms[k1DTransformAdst][k1DTransformSize8][kRow] =
+ dsp->inverse_transforms[kTransform1dAdst][kTransform1dSize8][kRow] =
Adst8TransformLoopRow_NEON;
- dsp->inverse_transforms[k1DTransformAdst][k1DTransformSize8][kColumn] =
+ dsp->inverse_transforms[kTransform1dAdst][kTransform1dSize8][kColumn] =
Adst8TransformLoopColumn_NEON;
- dsp->inverse_transforms[k1DTransformAdst][k1DTransformSize16][kRow] =
+ dsp->inverse_transforms[kTransform1dAdst][kTransform1dSize16][kRow] =
Adst16TransformLoopRow_NEON;
- dsp->inverse_transforms[k1DTransformAdst][k1DTransformSize16][kColumn] =
+ dsp->inverse_transforms[kTransform1dAdst][kTransform1dSize16][kColumn] =
Adst16TransformLoopColumn_NEON;
// Maximum transform size for Identity transform is 32.
- dsp->inverse_transforms[k1DTransformIdentity][k1DTransformSize4][kRow] =
+ dsp->inverse_transforms[kTransform1dIdentity][kTransform1dSize4][kRow] =
Identity4TransformLoopRow_NEON;
- dsp->inverse_transforms[k1DTransformIdentity][k1DTransformSize4][kColumn] =
+ dsp->inverse_transforms[kTransform1dIdentity][kTransform1dSize4][kColumn] =
Identity4TransformLoopColumn_NEON;
- dsp->inverse_transforms[k1DTransformIdentity][k1DTransformSize8][kRow] =
+ dsp->inverse_transforms[kTransform1dIdentity][kTransform1dSize8][kRow] =
Identity8TransformLoopRow_NEON;
- dsp->inverse_transforms[k1DTransformIdentity][k1DTransformSize8][kColumn] =
+ dsp->inverse_transforms[kTransform1dIdentity][kTransform1dSize8][kColumn] =
Identity8TransformLoopColumn_NEON;
- dsp->inverse_transforms[k1DTransformIdentity][k1DTransformSize16][kRow] =
+ dsp->inverse_transforms[kTransform1dIdentity][kTransform1dSize16][kRow] =
Identity16TransformLoopRow_NEON;
- dsp->inverse_transforms[k1DTransformIdentity][k1DTransformSize16][kColumn] =
+ dsp->inverse_transforms[kTransform1dIdentity][kTransform1dSize16][kColumn] =
Identity16TransformLoopColumn_NEON;
- dsp->inverse_transforms[k1DTransformIdentity][k1DTransformSize32][kRow] =
+ dsp->inverse_transforms[kTransform1dIdentity][kTransform1dSize32][kRow] =
Identity32TransformLoopRow_NEON;
- dsp->inverse_transforms[k1DTransformIdentity][k1DTransformSize32][kColumn] =
+ dsp->inverse_transforms[kTransform1dIdentity][kTransform1dSize32][kColumn] =
Identity32TransformLoopColumn_NEON;
// Maximum transform size for Wht is 4.
- dsp->inverse_transforms[k1DTransformWht][k1DTransformSize4][kRow] =
+ dsp->inverse_transforms[kTransform1dWht][kTransform1dSize4][kRow] =
Wht4TransformLoopRow_NEON;
- dsp->inverse_transforms[k1DTransformWht][k1DTransformSize4][kColumn] =
+ dsp->inverse_transforms[kTransform1dWht][kTransform1dSize4][kColumn] =
Wht4TransformLoopColumn_NEON;
}
diff --git a/src/dsp/arm/inverse_transform_neon.h b/src/dsp/arm/inverse_transform_neon.h
index 91e0e83..ebd7cf4 100644
--- a/src/dsp/arm/inverse_transform_neon.h
+++ b/src/dsp/arm/inverse_transform_neon.h
@@ -32,36 +32,39 @@ void InverseTransformInit10bpp_NEON();
} // namespace libgav1
#if LIBGAV1_ENABLE_NEON
-#define LIBGAV1_Dsp8bpp_1DTransformSize4_1DTransformDct LIBGAV1_CPU_NEON
-#define LIBGAV1_Dsp8bpp_1DTransformSize8_1DTransformDct LIBGAV1_CPU_NEON
-#define LIBGAV1_Dsp8bpp_1DTransformSize16_1DTransformDct LIBGAV1_CPU_NEON
-#define LIBGAV1_Dsp8bpp_1DTransformSize32_1DTransformDct LIBGAV1_CPU_NEON
-#define LIBGAV1_Dsp8bpp_1DTransformSize64_1DTransformDct LIBGAV1_CPU_NEON
+#define LIBGAV1_Dsp8bpp_Transform1dSize4_Transform1dDct LIBGAV1_CPU_NEON
+#define LIBGAV1_Dsp8bpp_Transform1dSize8_Transform1dDct LIBGAV1_CPU_NEON
+#define LIBGAV1_Dsp8bpp_Transform1dSize16_Transform1dDct LIBGAV1_CPU_NEON
+#define LIBGAV1_Dsp8bpp_Transform1dSize32_Transform1dDct LIBGAV1_CPU_NEON
+#define LIBGAV1_Dsp8bpp_Transform1dSize64_Transform1dDct LIBGAV1_CPU_NEON
-#define LIBGAV1_Dsp8bpp_1DTransformSize4_1DTransformAdst LIBGAV1_CPU_NEON
-#define LIBGAV1_Dsp8bpp_1DTransformSize8_1DTransformAdst LIBGAV1_CPU_NEON
-#define LIBGAV1_Dsp8bpp_1DTransformSize16_1DTransformAdst LIBGAV1_CPU_NEON
+#define LIBGAV1_Dsp8bpp_Transform1dSize4_Transform1dAdst LIBGAV1_CPU_NEON
+#define LIBGAV1_Dsp8bpp_Transform1dSize8_Transform1dAdst LIBGAV1_CPU_NEON
+#define LIBGAV1_Dsp8bpp_Transform1dSize16_Transform1dAdst LIBGAV1_CPU_NEON
-#define LIBGAV1_Dsp8bpp_1DTransformSize4_1DTransformIdentity LIBGAV1_CPU_NEON
-#define LIBGAV1_Dsp8bpp_1DTransformSize8_1DTransformIdentity LIBGAV1_CPU_NEON
-#define LIBGAV1_Dsp8bpp_1DTransformSize16_1DTransformIdentity LIBGAV1_CPU_NEON
-#define LIBGAV1_Dsp8bpp_1DTransformSize32_1DTransformIdentity LIBGAV1_CPU_NEON
+#define LIBGAV1_Dsp8bpp_Transform1dSize4_Transform1dIdentity LIBGAV1_CPU_NEON
+#define LIBGAV1_Dsp8bpp_Transform1dSize8_Transform1dIdentity LIBGAV1_CPU_NEON
+#define LIBGAV1_Dsp8bpp_Transform1dSize16_Transform1dIdentity LIBGAV1_CPU_NEON
+#define LIBGAV1_Dsp8bpp_Transform1dSize32_Transform1dIdentity LIBGAV1_CPU_NEON
-#define LIBGAV1_Dsp8bpp_1DTransformSize4_1DTransformWht LIBGAV1_CPU_NEON
+#define LIBGAV1_Dsp8bpp_Transform1dSize4_Transform1dWht LIBGAV1_CPU_NEON
-#define LIBGAV1_Dsp10bpp_1DTransformSize4_1DTransformDct LIBGAV1_CPU_NEON
-#define LIBGAV1_Dsp10bpp_1DTransformSize8_1DTransformDct LIBGAV1_CPU_NEON
-#define LIBGAV1_Dsp10bpp_1DTransformSize16_1DTransformDct LIBGAV1_CPU_NEON
-#define LIBGAV1_Dsp10bpp_1DTransformSize32_1DTransformDct LIBGAV1_CPU_NEON
-#define LIBGAV1_Dsp10bpp_1DTransformSize64_1DTransformDct LIBGAV1_CPU_NEON
+#define LIBGAV1_Dsp10bpp_Transform1dSize4_Transform1dDct LIBGAV1_CPU_NEON
+#define LIBGAV1_Dsp10bpp_Transform1dSize8_Transform1dDct LIBGAV1_CPU_NEON
+#define LIBGAV1_Dsp10bpp_Transform1dSize16_Transform1dDct LIBGAV1_CPU_NEON
+#define LIBGAV1_Dsp10bpp_Transform1dSize32_Transform1dDct LIBGAV1_CPU_NEON
+#define LIBGAV1_Dsp10bpp_Transform1dSize64_Transform1dDct LIBGAV1_CPU_NEON
-#define LIBGAV1_Dsp10bpp_1DTransformSize4_1DTransformAdst LIBGAV1_CPU_NEON
-#define LIBGAV1_Dsp10bpp_1DTransformSize8_1DTransformAdst LIBGAV1_CPU_NEON
-#define LIBGAV1_Dsp10bpp_1DTransformSize16_1DTransformAdst LIBGAV1_CPU_NEON
+#define LIBGAV1_Dsp10bpp_Transform1dSize4_Transform1dAdst LIBGAV1_CPU_NEON
+#define LIBGAV1_Dsp10bpp_Transform1dSize8_Transform1dAdst LIBGAV1_CPU_NEON
+#define LIBGAV1_Dsp10bpp_Transform1dSize16_Transform1dAdst LIBGAV1_CPU_NEON
-#define LIBGAV1_Dsp10bpp_1DTransformSize4_1DTransformIdentity LIBGAV1_CPU_NEON
-#define LIBGAV1_Dsp10bpp_1DTransformSize8_1DTransformIdentity LIBGAV1_CPU_NEON
-#define LIBGAV1_Dsp10bpp_1DTransformSize16_1DTransformIdentity LIBGAV1_CPU_NEON
+#define LIBGAV1_Dsp10bpp_Transform1dSize4_Transform1dIdentity LIBGAV1_CPU_NEON
+#define LIBGAV1_Dsp10bpp_Transform1dSize8_Transform1dIdentity LIBGAV1_CPU_NEON
+#define LIBGAV1_Dsp10bpp_Transform1dSize16_Transform1dIdentity LIBGAV1_CPU_NEON
+#define LIBGAV1_Dsp10bpp_Transform1dSize32_Transform1dIdentity LIBGAV1_CPU_NEON
+
+#define LIBGAV1_Dsp10bpp_Transform1dSize4_Transform1dWht LIBGAV1_CPU_NEON
#endif // LIBGAV1_ENABLE_NEON
diff --git a/src/dsp/arm/loop_filter_neon.cc b/src/dsp/arm/loop_filter_neon.cc
index 8d72892..8c03928 100644
--- a/src/dsp/arm/loop_filter_neon.cc
+++ b/src/dsp/arm/loop_filter_neon.cc
@@ -50,7 +50,7 @@ inline uint8x8_t OuterThreshold(const uint8x8_t p0q0, const uint8x8_t p1q1,
}
// abs(p1 - p0) <= inner_thresh && abs(q1 - q0) <= inner_thresh &&
-// OuterThreshhold()
+// OuterThreshold()
inline uint8x8_t NeedsFilter4(const uint8x8_t abd_p0p1_q0q1,
const uint8x8_t p0q0, const uint8x8_t p1q1,
const uint8_t inner_thresh,
@@ -65,6 +65,7 @@ inline void Filter4Masks(const uint8x8_t p0q0, const uint8x8_t p1q1,
const uint8_t hev_thresh, const uint8_t outer_thresh,
const uint8_t inner_thresh, uint8x8_t* const hev_mask,
uint8x8_t* const needs_filter4_mask) {
+ // First half is |p0 - p1|, second half is |q0 - q1|.
const uint8x8_t p0p1_q0q1 = vabd_u8(p0q0, p1q1);
// This includes cases where NeedsFilter4() is not true and so Filter2() will
// not be applied.
@@ -131,7 +132,7 @@ inline void Filter4(const uint8x8_t q0p1, const uint8x8_t p0q1,
void Horizontal4_NEON(void* const dest, const ptrdiff_t stride,
const int outer_thresh, const int inner_thresh,
const int hev_thresh) {
- uint8_t* dst = static_cast<uint8_t*>(dest);
+ auto* dst = static_cast<uint8_t*>(dest);
const uint8x8_t p1_v = Load4(dst - 2 * stride);
const uint8x8_t p0_v = Load4(dst - stride);
@@ -180,7 +181,7 @@ void Horizontal4_NEON(void* const dest, const ptrdiff_t stride,
void Vertical4_NEON(void* const dest, const ptrdiff_t stride,
const int outer_thresh, const int inner_thresh,
const int hev_thresh) {
- uint8_t* dst = static_cast<uint8_t*>(dest);
+ auto* dst = static_cast<uint8_t*>(dest);
// Move |dst| to the left side of the filter window.
dst -= 2;
@@ -256,7 +257,7 @@ inline uint8x8_t IsFlat3(const uint8x8_t abd_p0p1_q0q1,
// abs(p2 - p1) <= inner_thresh && abs(p1 - p0) <= inner_thresh &&
// abs(q1 - q0) <= inner_thresh && abs(q2 - q1) <= inner_thresh &&
-// OuterThreshhold()
+// OuterThreshold()
inline uint8x8_t NeedsFilter6(const uint8x8_t abd_p0p1_q0q1,
const uint8x8_t abd_p1p2_q1q2,
const uint8x8_t p0q0, const uint8x8_t p1q1,
@@ -288,26 +289,26 @@ inline void Filter6(const uint8x8_t p2q2, const uint8x8_t p1q1,
// Sum p1 and q1 output from opposite directions
// p1 = (3 * p2) + (2 * p1) + (2 * p0) + q0
// ^^^^^^^^
- // q1 = p0 + (2 * q0) + (2 * q1) + (3 * q3)
+ // q1 = p0 + (2 * q0) + (2 * q1) + (3 * q2)
// ^^^^^^^^
const uint16x8_t p2q2_double = vaddl_u8(p2q2, p2q2);
uint16x8_t sum = vaddw_u8(p2q2_double, p2q2);
// p1 = (3 * p2) + (2 * p1) + (2 * p0) + q0
// ^^^^^^^^
- // q1 = p0 + (2 * q0) + (2 * q1) + (3 * q3)
+ // q1 = p0 + (2 * q0) + (2 * q1) + (3 * q2)
// ^^^^^^^^
sum = vaddq_u16(vaddl_u8(p1q1, p1q1), sum);
// p1 = (3 * p2) + (2 * p1) + (2 * p0) + q0
// ^^^^^^^^
- // q1 = p0 + (2 * q0) + (2 * q1) + (3 * q3)
+ // q1 = p0 + (2 * q0) + (2 * q1) + (3 * q2)
// ^^^^^^^^
sum = vaddq_u16(vaddl_u8(p0q0, p0q0), sum);
// p1 = (3 * p2) + (2 * p1) + (2 * p0) + q0
// ^^
- // q1 = p0 + (2 * q0) + (2 * q1) + (3 * q3)
+ // q1 = p0 + (2 * q0) + (2 * q1) + (3 * q2)
// ^^
const uint8x8_t q0p0 = Transpose32(p0q0);
sum = vaddw_u8(sum, q0p0);
@@ -488,7 +489,7 @@ inline uint8x8_t IsFlat4(const uint8x8_t abd_p0n0_q0n0,
// abs(p3 - p2) <= inner_thresh && abs(p2 - p1) <= inner_thresh &&
// abs(p1 - p0) <= inner_thresh && abs(q1 - q0) <= inner_thresh &&
// abs(q2 - q1) <= inner_thresh && abs(q3 - q2) <= inner_thresh
-// OuterThreshhold()
+// OuterThreshold()
inline uint8x8_t NeedsFilter8(const uint8x8_t abd_p0p1_q0q1,
const uint8x8_t abd_p1p2_q1q2,
const uint8x8_t abd_p2p3_q2q3,
@@ -522,29 +523,35 @@ inline void Filter8(const uint8x8_t p3q3, const uint8x8_t p2q2,
const uint8x8_t p1q1, const uint8x8_t p0q0,
uint8x8_t* const p2q2_output, uint8x8_t* const p1q1_output,
uint8x8_t* const p0q0_output) {
- // Sum p2 and q2 output from opposite directions
+ // Sum p2 and q2 output from opposite directions.
+ // The formula is regrouped to allow 2 doubling operations to be combined.
// p2 = (3 * p3) + (2 * p2) + p1 + p0 + q0
// ^^^^^^^^
// q2 = p0 + q0 + q1 + (2 * q2) + (3 * q3)
// ^^^^^^^^
- uint16x8_t sum = vaddw_u8(vaddl_u8(p3q3, p3q3), p3q3);
+ // p2q2 = p3q3 + 2 * (p3q3 + p2q2) + p1q1 + p0q0 + q0p0
+ // ^^^^^^^^^^^
+ const uint16x8_t p23q23 = vaddl_u8(p3q3, p2q2);
- // p2 = (3 * p3) + (2 * p2) + p1 + p0 + q0
- // ^^^^^^^^
- // q2 = p0 + q0 + q1 + (2 * q2) + (3 * q3)
- // ^^^^^^^^
- sum = vaddq_u16(vaddl_u8(p2q2, p2q2), sum);
+ // p2q2 = p3q3 + 2 * (p3q3 + p2q2) + p1q1 + p0q0 + q0p0
+ // ^^^^^
+ uint16x8_t sum = vshlq_n_u16(p23q23, 1);
- // p2 = (3 * p3) + (2 * p2) + p1 + p0 + q0
- // ^^^^^^^
- // q2 = p0 + q0 + q1 + (2 * q2) + (3 * q3)
- // ^^^^^^^
- sum = vaddq_u16(vaddl_u8(p1q1, p0q0), sum);
+ // Add two other terms to make dual issue with shift more likely.
+ // p2q2 = p3q3 + 2 * (p3q3 + p2q2) + p1q1 + p0q0 + q0p0
+ // ^^^^^^^^^^^
+ const uint16x8_t p01q01 = vaddl_u8(p0q0, p1q1);
- // p2 = (3 * p3) + (2 * p2) + p1 + p0 + q0
- // ^^
- // q2 = p0 + q0 + q1 + (2 * q2) + (3 * q3)
- // ^^
+ // p2q2 = p3q3 + 2 * (p3q3 + p2q2) + p1q1 + p0q0 + q0p0
+ // ^^^^^^^^^^^^^
+ sum = vaddq_u16(sum, p01q01);
+
+ // p2q2 = p3q3 + 2 * (p3q3 + p2q2) + p1q1 + p0q0 + q0p0
+ // ^^^^^^
+ sum = vaddw_u8(sum, p3q3);
+
+ // p2q2 = p3q3 + 2 * (p3q3 + p2q2) + p1q1 + p0q0 + q0p0
+ // ^^^^^^
const uint8x8_t q0p0 = Transpose32(p0q0);
sum = vaddw_u8(sum, q0p0);
@@ -553,9 +560,9 @@ inline void Filter8(const uint8x8_t p3q3, const uint8x8_t p2q2,
// Convert to p1 and q1 output:
// p1 = p2 - p3 - p2 + p1 + q1
// q1 = q2 - q3 - q2 + q0 + p1
- sum = vsubq_u16(sum, vaddl_u8(p3q3, p2q2));
+ sum = vsubq_u16(sum, p23q23);
const uint8x8_t q1p1 = Transpose32(p1q1);
- sum = vaddq_u16(vaddl_u8(p1q1, q1p1), sum);
+ sum = vaddq_u16(sum, vaddl_u8(p1q1, q1p1));
*p1q1_output = vrshrn_n_u16(sum, 3);
@@ -564,7 +571,7 @@ inline void Filter8(const uint8x8_t p3q3, const uint8x8_t p2q2,
// q0 = q1 - q3 - q1 + q0 + p2
sum = vsubq_u16(sum, vaddl_u8(p3q3, p1q1));
const uint8x8_t q2p2 = Transpose32(p2q2);
- sum = vaddq_u16(vaddl_u8(p0q0, q2p2), sum);
+ sum = vaddq_u16(sum, vaddl_u8(p0q0, q2p2));
*p0q0_output = vrshrn_n_u16(sum, 3);
}
@@ -1174,7 +1181,1264 @@ void Init8bpp() {
} // namespace
} // namespace low_bitdepth
-void LoopFilterInit_NEON() { low_bitdepth::Init8bpp(); }
+#if LIBGAV1_MAX_BITDEPTH >= 10
+namespace high_bitdepth {
+namespace {
+
+// (abs(p1 - p0) > thresh) || (abs(q1 - q0) > thresh)
+inline uint16x4_t Hev(const uint16x8_t abd_p0p1_q0q1, const uint16_t thresh) {
+ const uint16x8_t a = vcgtq_u16(abd_p0p1_q0q1, vdupq_n_u16(thresh));
+ return vorr_u16(vget_low_u16(a), vget_high_u16(a));
+}
+
+// abs(p0 - q0) * 2 + abs(p1 - q1) / 2 <= outer_thresh
+inline uint16x4_t OuterThreshold(const uint16x4_t p1, const uint16x4_t p0,
+ const uint16x4_t q0, const uint16x4_t q1,
+ const uint16_t outer_thresh) {
+ const uint16x4_t abd_p0q0 = vabd_u16(p0, q0);
+ const uint16x4_t abd_p1q1 = vabd_u16(p1, q1);
+ const uint16x4_t p0q0_double = vshl_n_u16(abd_p0q0, 1);
+ const uint16x4_t p1q1_half = vshr_n_u16(abd_p1q1, 1);
+ const uint16x4_t sum = vadd_u16(p0q0_double, p1q1_half);
+ return vcle_u16(sum, vdup_n_u16(outer_thresh));
+}
+
+// abs(p1 - p0) <= inner_thresh && abs(q1 - q0) <= inner_thresh &&
+// OuterThreshold()
+inline uint16x4_t NeedsFilter4(const uint16x8_t abd_p0p1_q0q1,
+ const uint16_t inner_thresh,
+ const uint16x4_t outer_mask) {
+ const uint16x8_t a = vcleq_u16(abd_p0p1_q0q1, vdupq_n_u16(inner_thresh));
+ const uint16x4_t inner_mask = vand_u16(vget_low_u16(a), vget_high_u16(a));
+ return vand_u16(inner_mask, outer_mask);
+}
+
+// abs(p2 - p1) <= inner_thresh && abs(p1 - p0) <= inner_thresh &&
+// abs(q1 - q0) <= inner_thresh && abs(q2 - q1) <= inner_thresh &&
+// OuterThreshold()
+inline uint16x4_t NeedsFilter6(const uint16x8_t abd_p0p1_q0q1,
+ const uint16x8_t abd_p1p2_q1q2,
+ const uint16_t inner_thresh,
+ const uint16x4_t outer_mask) {
+ const uint16x8_t a = vmaxq_u16(abd_p0p1_q0q1, abd_p1p2_q1q2);
+ const uint16x8_t b = vcleq_u16(a, vdupq_n_u16(inner_thresh));
+ const uint16x4_t inner_mask = vand_u16(vget_low_u16(b), vget_high_u16(b));
+ return vand_u16(inner_mask, outer_mask);
+}
+
+// abs(p3 - p2) <= inner_thresh && abs(p2 - p1) <= inner_thresh &&
+// abs(p1 - p0) <= inner_thresh && abs(q1 - q0) <= inner_thresh &&
+// abs(q2 - q1) <= inner_thresh && abs(q3 - q2) <= inner_thresh
+// OuterThreshold()
+inline uint16x4_t NeedsFilter8(const uint16x8_t abd_p0p1_q0q1,
+ const uint16x8_t abd_p1p2_q1q2,
+ const uint16x8_t abd_p2p3_q2q3,
+ const uint16_t inner_thresh,
+ const uint16x4_t outer_mask) {
+ const uint16x8_t a = vmaxq_u16(abd_p0p1_q0q1, abd_p1p2_q1q2);
+ const uint16x8_t b = vmaxq_u16(a, abd_p2p3_q2q3);
+ const uint16x8_t c = vcleq_u16(b, vdupq_n_u16(inner_thresh));
+ const uint16x4_t inner_mask = vand_u16(vget_low_u16(c), vget_high_u16(c));
+ return vand_u16(inner_mask, outer_mask);
+}
+
+// -----------------------------------------------------------------------------
+// FilterNMasks functions.
+
+inline void Filter4Masks(const uint16x8_t p0q0, const uint16x8_t p1q1,
+ const uint16_t hev_thresh, const uint16x4_t outer_mask,
+ const uint16_t inner_thresh,
+ uint16x4_t* const hev_mask,
+ uint16x4_t* const needs_filter4_mask) {
+ const uint16x8_t p0p1_q0q1 = vabdq_u16(p0q0, p1q1);
+ // This includes cases where NeedsFilter4() is not true and so Filter2() will
+ // not be applied.
+ const uint16x4_t hev_tmp_mask = Hev(p0p1_q0q1, hev_thresh);
+
+ *needs_filter4_mask = NeedsFilter4(p0p1_q0q1, inner_thresh, outer_mask);
+
+ // Filter2() will only be applied if both NeedsFilter4() and Hev() are true.
+ *hev_mask = vand_u16(hev_tmp_mask, *needs_filter4_mask);
+}
+
+// abs(p1 - p0) <= flat_thresh && abs(q1 - q0) <= flat_thresh &&
+// abs(p2 - p0) <= flat_thresh && abs(q2 - q0) <= flat_thresh
+// |flat_thresh| == 4 for 10 bit decode.
+inline uint16x4_t IsFlat3(const uint16x8_t abd_p0p1_q0q1,
+ const uint16x8_t abd_p0p2_q0q2) {
+ constexpr int flat_thresh = 1 << 2;
+ const uint16x8_t a = vmaxq_u16(abd_p0p1_q0q1, abd_p0p2_q0q2);
+ const uint16x8_t b = vcleq_u16(a, vdupq_n_u16(flat_thresh));
+ return vand_u16(vget_low_u16(b), vget_high_u16(b));
+}
+
+inline void Filter6Masks(const uint16x8_t p2q2, const uint16x8_t p1q1,
+ const uint16x8_t p0q0, const uint16_t hev_thresh,
+ const uint16x4_t outer_mask,
+ const uint16_t inner_thresh,
+ uint16x4_t* const needs_filter6_mask,
+ uint16x4_t* const is_flat3_mask,
+ uint16x4_t* const hev_mask) {
+ const uint16x8_t abd_p0p1_q0q1 = vabdq_u16(p0q0, p1q1);
+ *hev_mask = Hev(abd_p0p1_q0q1, hev_thresh);
+ *is_flat3_mask = IsFlat3(abd_p0p1_q0q1, vabdq_u16(p0q0, p2q2));
+ *needs_filter6_mask = NeedsFilter6(abd_p0p1_q0q1, vabdq_u16(p1q1, p2q2),
+ inner_thresh, outer_mask);
+}
+
+// IsFlat4 uses N=1, IsFlatOuter4 uses N=4.
+// abs(p[N] - p0) <= flat_thresh && abs(q[N] - q0) <= flat_thresh &&
+// abs(p[N+1] - p0) <= flat_thresh && abs(q[N+1] - q0) <= flat_thresh &&
+// abs(p[N+2] - p0) <= flat_thresh && abs(q[N+1] - q0) <= flat_thresh
+// |flat_thresh| == 4 for 10 bit decode.
+inline uint16x4_t IsFlat4(const uint16x8_t abd_pnp0_qnq0,
+ const uint16x8_t abd_pn1p0_qn1q0,
+ const uint16x8_t abd_pn2p0_qn2q0) {
+ constexpr int flat_thresh = 1 << 2;
+ const uint16x8_t a = vmaxq_u16(abd_pnp0_qnq0, abd_pn1p0_qn1q0);
+ const uint16x8_t b = vmaxq_u16(a, abd_pn2p0_qn2q0);
+ const uint16x8_t c = vcleq_u16(b, vdupq_n_u16(flat_thresh));
+ return vand_u16(vget_low_u16(c), vget_high_u16(c));
+}
+
+inline void Filter8Masks(const uint16x8_t p3q3, const uint16x8_t p2q2,
+ const uint16x8_t p1q1, const uint16x8_t p0q0,
+ const uint16_t hev_thresh, const uint16x4_t outer_mask,
+ const uint16_t inner_thresh,
+ uint16x4_t* const needs_filter8_mask,
+ uint16x4_t* const is_flat4_mask,
+ uint16x4_t* const hev_mask) {
+ const uint16x8_t abd_p0p1_q0q1 = vabdq_u16(p0q0, p1q1);
+ *hev_mask = Hev(abd_p0p1_q0q1, hev_thresh);
+ const uint16x4_t is_flat4 =
+ IsFlat4(abd_p0p1_q0q1, vabdq_u16(p0q0, p2q2), vabdq_u16(p0q0, p3q3));
+ *needs_filter8_mask =
+ NeedsFilter8(abd_p0p1_q0q1, vabdq_u16(p1q1, p2q2), vabdq_u16(p2q2, p3q3),
+ inner_thresh, outer_mask);
+ // |is_flat4_mask| is used to decide where to use the result of Filter8.
+ // In rare cases, |is_flat4| can be true where |needs_filter8_mask| is false,
+ // overriding the question of whether to use Filter8. Because Filter4 doesn't
+ // apply to p2q2, |is_flat4_mask| chooses directly between Filter8 and the
+ // source value. To be correct, the mask must account for this override.
+ *is_flat4_mask = vand_u16(is_flat4, *needs_filter8_mask);
+}
+
+// -----------------------------------------------------------------------------
+// FilterN functions.
+
+// Calculate Filter4() or Filter2() based on |hev_mask|.
+inline void Filter4(const uint16x8_t p0q0, const uint16x8_t p0q1,
+ const uint16x8_t p1q1, const uint16x4_t hev_mask,
+ uint16x8_t* const p1q1_result,
+ uint16x8_t* const p0q0_result) {
+ const uint16x8_t q0p1 = vextq_u16(p0q0, p1q1, 4);
+ // a = 3 * (q0 - p0) + Clip3(p1 - q1, min_signed_val, max_signed_val);
+ // q0mp0 means "q0 minus p0".
+ const int16x8_t q0mp0_p1mq1 = vreinterpretq_s16_u16(vsubq_u16(q0p1, p0q1));
+ const int16x4_t q0mp0_3 = vmul_n_s16(vget_low_s16(q0mp0_p1mq1), 3);
+
+ // If this is for Filter2() then include |p1mq1|. Otherwise zero it.
+ const int16x4_t min_signed_pixel = vdup_n_s16(-(1 << (9 /*bitdepth-1*/)));
+ const int16x4_t max_signed_pixel = vdup_n_s16((1 << (9 /*bitdepth-1*/)) - 1);
+ const int16x4_t p1mq1 = vget_high_s16(q0mp0_p1mq1);
+ const int16x4_t p1mq1_saturated =
+ Clip3S16(p1mq1, min_signed_pixel, max_signed_pixel);
+ const int16x4_t hev_option =
+ vand_s16(vreinterpret_s16_u16(hev_mask), p1mq1_saturated);
+
+ const int16x4_t a = vadd_s16(q0mp0_3, hev_option);
+
+ // Need to figure out what's going on here because there are some unnecessary
+ // tricks to accommodate 8x8 as smallest 8bpp vector
+
+ // We can not shift with rounding because the clamp comes *before* the
+ // shifting. a1 = Clip3(a + 4, min_signed_val, max_signed_val) >> 3; a2 =
+ // Clip3(a + 3, min_signed_val, max_signed_val) >> 3;
+ const int16x4_t plus_four =
+ Clip3S16(vadd_s16(a, vdup_n_s16(4)), min_signed_pixel, max_signed_pixel);
+ const int16x4_t plus_three =
+ Clip3S16(vadd_s16(a, vdup_n_s16(3)), min_signed_pixel, max_signed_pixel);
+ const int16x4_t a1 = vshr_n_s16(plus_four, 3);
+ const int16x4_t a2 = vshr_n_s16(plus_three, 3);
+
+ // a3 = (a1 + 1) >> 1;
+ const int16x4_t a3 = vrshr_n_s16(a1, 1);
+
+ const int16x8_t a3_ma3 = vcombine_s16(a3, vneg_s16(a3));
+ const int16x8_t p1q1_a3 = vaddq_s16(vreinterpretq_s16_u16(p1q1), a3_ma3);
+
+ // Need to shift the second term or we end up with a2_ma2.
+ const int16x8_t a2_ma1 = vcombine_s16(a2, vneg_s16(a1));
+ const int16x8_t p0q0_a = vaddq_s16(vreinterpretq_s16_u16(p0q0), a2_ma1);
+ *p1q1_result = ConvertToUnsignedPixelU16(p1q1_a3, kBitdepth10);
+ *p0q0_result = ConvertToUnsignedPixelU16(p0q0_a, kBitdepth10);
+}
+
+void Horizontal4_NEON(void* const dest, const ptrdiff_t stride,
+ int outer_thresh, int inner_thresh, int hev_thresh) {
+ auto* const dst = static_cast<uint8_t*>(dest);
+ auto* const dst_p1 = reinterpret_cast<uint16_t*>(dst - 2 * stride);
+ auto* const dst_p0 = reinterpret_cast<uint16_t*>(dst - stride);
+ auto* const dst_q0 = reinterpret_cast<uint16_t*>(dst);
+ auto* const dst_q1 = reinterpret_cast<uint16_t*>(dst + stride);
+
+ const uint16x4_t src[4] = {vld1_u16(dst_p1), vld1_u16(dst_p0),
+ vld1_u16(dst_q0), vld1_u16(dst_q1)};
+
+ // Adjust thresholds to bitdepth.
+ outer_thresh <<= 2;
+ inner_thresh <<= 2;
+ hev_thresh <<= 2;
+ const uint16x4_t outer_mask =
+ OuterThreshold(src[0], src[1], src[2], src[3], outer_thresh);
+ uint16x4_t hev_mask;
+ uint16x4_t needs_filter4_mask;
+ const uint16x8_t p0q0 = vcombine_u16(src[1], src[2]);
+ const uint16x8_t p1q1 = vcombine_u16(src[0], src[3]);
+ Filter4Masks(p0q0, p1q1, hev_thresh, outer_mask, inner_thresh, &hev_mask,
+ &needs_filter4_mask);
+
+#if defined(__aarch64__)
+ // This provides a good speedup for the unit test, but may not come up often
+ // enough to warrant it.
+ if (vaddv_u16(needs_filter4_mask) == 0) {
+ // None of the values will be filtered.
+ return;
+ }
+#else // !defined(__aarch64__)
+ const uint64x1_t needs_filter4_mask64 =
+ vreinterpret_u64_u16(needs_filter4_mask);
+ if (vget_lane_u64(needs_filter4_mask64, 0) == 0) {
+ // None of the values will be filtered.
+ return;
+ }
+#endif // defined(__aarch64__)
+
+ // Copy the masks to the high bits for packed comparisons later.
+ const uint16x8_t hev_mask_8 = vcombine_u16(hev_mask, hev_mask);
+ const uint16x8_t needs_filter4_mask_8 =
+ vcombine_u16(needs_filter4_mask, needs_filter4_mask);
+
+ uint16x8_t f_p1q1;
+ uint16x8_t f_p0q0;
+ const uint16x8_t p0q1 = vcombine_u16(src[1], src[3]);
+ Filter4(p0q0, p0q1, p1q1, hev_mask, &f_p1q1, &f_p0q0);
+
+ // Already integrated the Hev mask when calculating the filtered values.
+ const uint16x8_t p0q0_output = vbslq_u16(needs_filter4_mask_8, f_p0q0, p0q0);
+
+ // p1/q1 are unmodified if only Hev() is true. This works because it was and'd
+ // with |needs_filter4_mask| previously.
+ const uint16x8_t p1q1_mask = veorq_u16(hev_mask_8, needs_filter4_mask_8);
+ const uint16x8_t p1q1_output = vbslq_u16(p1q1_mask, f_p1q1, p1q1);
+
+ vst1_u16(dst_p1, vget_low_u16(p1q1_output));
+ vst1_u16(dst_p0, vget_low_u16(p0q0_output));
+ vst1_u16(dst_q0, vget_high_u16(p0q0_output));
+ vst1_u16(dst_q1, vget_high_u16(p1q1_output));
+}
+
+void Vertical4_NEON(void* const dest, const ptrdiff_t stride, int outer_thresh,
+ int inner_thresh, int hev_thresh) {
+ // Offset by 2 uint16_t values to load from first p1 position.
+ auto* dst = static_cast<uint8_t*>(dest) - 4;
+ auto* dst_p1 = reinterpret_cast<uint16_t*>(dst);
+ auto* dst_p0 = reinterpret_cast<uint16_t*>(dst + stride);
+ auto* dst_q0 = reinterpret_cast<uint16_t*>(dst + stride * 2);
+ auto* dst_q1 = reinterpret_cast<uint16_t*>(dst + stride * 3);
+
+ uint16x4_t src[4] = {vld1_u16(dst_p1), vld1_u16(dst_p0), vld1_u16(dst_q0),
+ vld1_u16(dst_q1)};
+ Transpose4x4(src);
+
+ // Adjust thresholds to bitdepth.
+ outer_thresh <<= 2;
+ inner_thresh <<= 2;
+ hev_thresh <<= 2;
+ const uint16x4_t outer_mask =
+ OuterThreshold(src[0], src[1], src[2], src[3], outer_thresh);
+ uint16x4_t hev_mask;
+ uint16x4_t needs_filter4_mask;
+ const uint16x8_t p0q0 = vcombine_u16(src[1], src[2]);
+ const uint16x8_t p1q1 = vcombine_u16(src[0], src[3]);
+ Filter4Masks(p0q0, p1q1, hev_thresh, outer_mask, inner_thresh, &hev_mask,
+ &needs_filter4_mask);
+
+#if defined(__aarch64__)
+ // This provides a good speedup for the unit test. Not sure how applicable it
+ // is to valid streams though.
+ // Consider doing this on armv7 if there is a quick way to check if a vector
+ // is zero.
+ if (vaddv_u16(needs_filter4_mask) == 0) {
+ // None of the values will be filtered.
+ return;
+ }
+#else // !defined(__aarch64__)
+ const uint64x1_t needs_filter4_mask64 =
+ vreinterpret_u64_u16(needs_filter4_mask);
+ if (vget_lane_u64(needs_filter4_mask64, 0) == 0) {
+ // None of the values will be filtered.
+ return;
+ }
+#endif // defined(__aarch64__)
+
+ // Copy the masks to the high bits for packed comparisons later.
+ const uint16x8_t hev_mask_8 = vcombine_u16(hev_mask, hev_mask);
+ const uint16x8_t needs_filter4_mask_8 =
+ vcombine_u16(needs_filter4_mask, needs_filter4_mask);
+
+ uint16x8_t f_p1q1;
+ uint16x8_t f_p0q0;
+ const uint16x8_t p0q1 = vcombine_u16(src[1], src[3]);
+ Filter4(p0q0, p0q1, p1q1, hev_mask, &f_p1q1, &f_p0q0);
+
+ // Already integrated the Hev mask when calculating the filtered values.
+ const uint16x8_t p0q0_output = vbslq_u16(needs_filter4_mask_8, f_p0q0, p0q0);
+
+ // p1/q1 are unmodified if only Hev() is true. This works because it was and'd
+ // with |needs_filter4_mask| previously.
+ const uint16x8_t p1q1_mask = veorq_u16(hev_mask_8, needs_filter4_mask_8);
+ const uint16x8_t p1q1_output = vbslq_u16(p1q1_mask, f_p1q1, p1q1);
+
+ uint16x4_t output[4] = {
+ vget_low_u16(p1q1_output),
+ vget_low_u16(p0q0_output),
+ vget_high_u16(p0q0_output),
+ vget_high_u16(p1q1_output),
+ };
+ Transpose4x4(output);
+
+ vst1_u16(dst_p1, output[0]);
+ vst1_u16(dst_p0, output[1]);
+ vst1_u16(dst_q0, output[2]);
+ vst1_u16(dst_q1, output[3]);
+}
+
+inline void Filter6(const uint16x8_t p2q2, const uint16x8_t p1q1,
+ const uint16x8_t p0q0, uint16x8_t* const p1q1_output,
+ uint16x8_t* const p0q0_output) {
+ // Sum p1 and q1 output from opposite directions.
+ // The formula is regrouped to allow 3 doubling operations to be combined.
+ //
+ // p1 = (3 * p2) + (2 * p1) + (2 * p0) + q0
+ // ^^^^^^^^
+ // q1 = p0 + (2 * q0) + (2 * q1) + (3 * q2)
+ // ^^^^^^^^
+ // p1q1 = p2q2 + 2 * (p2q2 + p1q1 + p0q0) + q0p0
+ // ^^^^^^^^^^^
+ uint16x8_t sum = vaddq_u16(p2q2, p1q1);
+
+ // p1q1 = p2q2 + 2 * (p2q2 + p1q1 + p0q0) + q0p0
+ // ^^^^^^
+ sum = vaddq_u16(sum, p0q0);
+
+ // p1q1 = p2q2 + 2 * (p2q2 + p1q1 + p0q0) + q0p0
+ // ^^^^^
+ sum = vshlq_n_u16(sum, 1);
+
+ // p1q1 = p2q2 + 2 * (p2q2 + p1q1 + p0q0) + q0p0
+ // ^^^^^^ ^^^^^^
+ // Should dual issue with the left shift.
+ const uint16x8_t q0p0 = Transpose64(p0q0);
+ const uint16x8_t outer_sum = vaddq_u16(p2q2, q0p0);
+ sum = vaddq_u16(sum, outer_sum);
+
+ *p1q1_output = vrshrq_n_u16(sum, 3);
+
+ // Convert to p0 and q0 output:
+ // p0 = p1 - (2 * p2) + q0 + q1
+ // q0 = q1 - (2 * q2) + p0 + p1
+ // p0q0 = p1q1 - (2 * p2q2) + q0p0 + q1p1
+ // ^^^^^^^^
+ const uint16x8_t p2q2_double = vshlq_n_u16(p2q2, 1);
+ // p0q0 = p1q1 - (2 * p2q2) + q0p0 + q1p1
+ // ^^^^^^^^
+ sum = vsubq_u16(sum, p2q2_double);
+ const uint16x8_t q1p1 = Transpose64(p1q1);
+ sum = vaddq_u16(sum, vaddq_u16(q0p0, q1p1));
+
+ *p0q0_output = vrshrq_n_u16(sum, 3);
+}
+
+void Horizontal6_NEON(void* const dest, const ptrdiff_t stride,
+ int outer_thresh, int inner_thresh, int hev_thresh) {
+ auto* const dst = static_cast<uint8_t*>(dest);
+ auto* const dst_p2 = reinterpret_cast<uint16_t*>(dst - 3 * stride);
+ auto* const dst_p1 = reinterpret_cast<uint16_t*>(dst - 2 * stride);
+ auto* const dst_p0 = reinterpret_cast<uint16_t*>(dst - stride);
+ auto* const dst_q0 = reinterpret_cast<uint16_t*>(dst);
+ auto* const dst_q1 = reinterpret_cast<uint16_t*>(dst + stride);
+ auto* const dst_q2 = reinterpret_cast<uint16_t*>(dst + 2 * stride);
+
+ const uint16x4_t src[6] = {vld1_u16(dst_p2), vld1_u16(dst_p1),
+ vld1_u16(dst_p0), vld1_u16(dst_q0),
+ vld1_u16(dst_q1), vld1_u16(dst_q2)};
+
+ // Adjust thresholds to bitdepth.
+ outer_thresh <<= 2;
+ inner_thresh <<= 2;
+ hev_thresh <<= 2;
+ const uint16x4_t outer_mask =
+ OuterThreshold(src[1], src[2], src[3], src[4], outer_thresh);
+ uint16x4_t hev_mask;
+ uint16x4_t needs_filter_mask;
+ uint16x4_t is_flat3_mask;
+ const uint16x8_t p0q0 = vcombine_u16(src[2], src[3]);
+ const uint16x8_t p1q1 = vcombine_u16(src[1], src[4]);
+ const uint16x8_t p2q2 = vcombine_u16(src[0], src[5]);
+ Filter6Masks(p2q2, p1q1, p0q0, hev_thresh, outer_mask, inner_thresh,
+ &needs_filter_mask, &is_flat3_mask, &hev_mask);
+
+#if defined(__aarch64__)
+ if (vaddv_u16(needs_filter_mask) == 0) {
+ // None of the values will be filtered.
+ return;
+ }
+#else // !defined(__aarch64__)
+ // This might be faster than vaddv (latency 3) because mov to general register
+ // has latency 2.
+ const uint64x1_t needs_filter_mask64 =
+ vreinterpret_u64_u16(needs_filter_mask);
+ if (vget_lane_u64(needs_filter_mask64, 0) == 0) {
+ // None of the values will be filtered.
+ return;
+ }
+#endif // defined(__aarch64__)
+
+ // Copy the masks to the high bits for packed comparisons later.
+ const uint16x8_t hev_mask_8 = vcombine_u16(hev_mask, hev_mask);
+ const uint16x8_t is_flat3_mask_8 = vcombine_u16(is_flat3_mask, is_flat3_mask);
+ const uint16x8_t needs_filter_mask_8 =
+ vcombine_u16(needs_filter_mask, needs_filter_mask);
+
+ uint16x8_t f4_p1q1;
+ uint16x8_t f4_p0q0;
+ // ZIP1 p0q0, p1q1 may perform better here.
+ const uint16x8_t p0q1 = vcombine_u16(src[2], src[4]);
+ Filter4(p0q0, p0q1, p1q1, hev_mask, &f4_p1q1, &f4_p0q0);
+ f4_p1q1 = vbslq_u16(hev_mask_8, p1q1, f4_p1q1);
+
+ uint16x8_t p0q0_output, p1q1_output;
+ // Because we did not return after testing |needs_filter_mask| we know it is
+ // nonzero. |is_flat3_mask| controls whether the needed filter is Filter4 or
+ // Filter6. Therefore if it is false when |needs_filter_mask| is true, Filter6
+ // output is not used.
+ uint16x8_t f6_p1q1, f6_p0q0;
+ const uint64x1_t need_filter6 = vreinterpret_u64_u16(is_flat3_mask);
+ if (vget_lane_u64(need_filter6, 0) == 0) {
+ // Filter6() does not apply, but Filter4() applies to one or more values.
+ p0q0_output = p0q0;
+ p1q1_output = vbslq_u16(needs_filter_mask_8, f4_p1q1, p1q1);
+ p0q0_output = vbslq_u16(needs_filter_mask_8, f4_p0q0, p0q0);
+ } else {
+ Filter6(p2q2, p1q1, p0q0, &f6_p1q1, &f6_p0q0);
+ p1q1_output = vbslq_u16(is_flat3_mask_8, f6_p1q1, f4_p1q1);
+ p1q1_output = vbslq_u16(needs_filter_mask_8, p1q1_output, p1q1);
+ p0q0_output = vbslq_u16(is_flat3_mask_8, f6_p0q0, f4_p0q0);
+ p0q0_output = vbslq_u16(needs_filter_mask_8, p0q0_output, p0q0);
+ }
+
+ vst1_u16(dst_p1, vget_low_u16(p1q1_output));
+ vst1_u16(dst_p0, vget_low_u16(p0q0_output));
+ vst1_u16(dst_q0, vget_high_u16(p0q0_output));
+ vst1_u16(dst_q1, vget_high_u16(p1q1_output));
+}
+
+void Vertical6_NEON(void* const dest, const ptrdiff_t stride, int outer_thresh,
+ int inner_thresh, int hev_thresh) {
+ // Left side of the filter window.
+ auto* const dst = static_cast<uint8_t*>(dest) - 3 * sizeof(uint16_t);
+ auto* const dst_0 = reinterpret_cast<uint16_t*>(dst);
+ auto* const dst_1 = reinterpret_cast<uint16_t*>(dst + stride);
+ auto* const dst_2 = reinterpret_cast<uint16_t*>(dst + 2 * stride);
+ auto* const dst_3 = reinterpret_cast<uint16_t*>(dst + 3 * stride);
+
+ // Overread by 2 values. These overreads become the high halves of src_raw[2]
+ // and src_raw[3] after transpose.
+ uint16x8_t src_raw[4] = {vld1q_u16(dst_0), vld1q_u16(dst_1), vld1q_u16(dst_2),
+ vld1q_u16(dst_3)};
+ Transpose4x8(src_raw);
+ // p2, p1, p0, q0, q1, q2
+ const uint16x4_t src[6] = {
+ vget_low_u16(src_raw[0]), vget_low_u16(src_raw[1]),
+ vget_low_u16(src_raw[2]), vget_low_u16(src_raw[3]),
+ vget_high_u16(src_raw[0]), vget_high_u16(src_raw[1]),
+ };
+
+ // Adjust thresholds to bitdepth.
+ outer_thresh <<= 2;
+ inner_thresh <<= 2;
+ hev_thresh <<= 2;
+ const uint16x4_t outer_mask =
+ OuterThreshold(src[1], src[2], src[3], src[4], outer_thresh);
+ uint16x4_t hev_mask;
+ uint16x4_t needs_filter_mask;
+ uint16x4_t is_flat3_mask;
+ const uint16x8_t p0q0 = vcombine_u16(src[2], src[3]);
+ const uint16x8_t p1q1 = vcombine_u16(src[1], src[4]);
+ const uint16x8_t p2q2 = vcombine_u16(src[0], src[5]);
+ Filter6Masks(p2q2, p1q1, p0q0, hev_thresh, outer_mask, inner_thresh,
+ &needs_filter_mask, &is_flat3_mask, &hev_mask);
+
+#if defined(__aarch64__)
+ if (vaddv_u16(needs_filter_mask) == 0) {
+ // None of the values will be filtered.
+ return;
+ }
+#else // !defined(__aarch64__)
+ // This might be faster than vaddv (latency 3) because mov to general register
+ // has latency 2.
+ const uint64x1_t needs_filter_mask64 =
+ vreinterpret_u64_u16(needs_filter_mask);
+ if (vget_lane_u64(needs_filter_mask64, 0) == 0) {
+ // None of the values will be filtered.
+ return;
+ }
+#endif // defined(__aarch64__)
+
+ // Copy the masks to the high bits for packed comparisons later.
+ const uint16x8_t hev_mask_8 = vcombine_u16(hev_mask, hev_mask);
+ const uint16x8_t is_flat3_mask_8 = vcombine_u16(is_flat3_mask, is_flat3_mask);
+ const uint16x8_t needs_filter_mask_8 =
+ vcombine_u16(needs_filter_mask, needs_filter_mask);
+
+ uint16x8_t f4_p1q1;
+ uint16x8_t f4_p0q0;
+ // ZIP1 p0q0, p1q1 may perform better here.
+ const uint16x8_t p0q1 = vcombine_u16(src[2], src[4]);
+ Filter4(p0q0, p0q1, p1q1, hev_mask, &f4_p1q1, &f4_p0q0);
+ f4_p1q1 = vbslq_u16(hev_mask_8, p1q1, f4_p1q1);
+
+ uint16x8_t p0q0_output, p1q1_output;
+ // Because we did not return after testing |needs_filter_mask| we know it is
+ // nonzero. |is_flat3_mask| controls whether the needed filter is Filter4 or
+ // Filter6. Therefore if it is false when |needs_filter_mask| is true, Filter6
+ // output is not used.
+ uint16x8_t f6_p1q1, f6_p0q0;
+ const uint64x1_t need_filter6 = vreinterpret_u64_u16(is_flat3_mask);
+ if (vget_lane_u64(need_filter6, 0) == 0) {
+ // Filter6() does not apply, but Filter4() applies to one or more values.
+ p0q0_output = p0q0;
+ p1q1_output = vbslq_u16(needs_filter_mask_8, f4_p1q1, p1q1);
+ p0q0_output = vbslq_u16(needs_filter_mask_8, f4_p0q0, p0q0);
+ } else {
+ Filter6(p2q2, p1q1, p0q0, &f6_p1q1, &f6_p0q0);
+ p1q1_output = vbslq_u16(is_flat3_mask_8, f6_p1q1, f4_p1q1);
+ p1q1_output = vbslq_u16(needs_filter_mask_8, p1q1_output, p1q1);
+ p0q0_output = vbslq_u16(is_flat3_mask_8, f6_p0q0, f4_p0q0);
+ p0q0_output = vbslq_u16(needs_filter_mask_8, p0q0_output, p0q0);
+ }
+
+ uint16x4_t output[4] = {
+ vget_low_u16(p1q1_output),
+ vget_low_u16(p0q0_output),
+ vget_high_u16(p0q0_output),
+ vget_high_u16(p1q1_output),
+ };
+ Transpose4x4(output);
+
+ // dst_n starts at p2, so adjust to p1.
+ vst1_u16(dst_0 + 1, output[0]);
+ vst1_u16(dst_1 + 1, output[1]);
+ vst1_u16(dst_2 + 1, output[2]);
+ vst1_u16(dst_3 + 1, output[3]);
+}
+
+inline void Filter8(const uint16x8_t p3q3, const uint16x8_t p2q2,
+ const uint16x8_t p1q1, const uint16x8_t p0q0,
+ uint16x8_t* const p2q2_output,
+ uint16x8_t* const p1q1_output,
+ uint16x8_t* const p0q0_output) {
+ // Sum p2 and q2 output from opposite directions.
+ // The formula is regrouped to allow 2 doubling operations to be combined.
+ // p2 = (3 * p3) + (2 * p2) + p1 + p0 + q0
+ // ^^^^^^^^
+ // q2 = p0 + q0 + q1 + (2 * q2) + (3 * q3)
+ // ^^^^^^^^
+ // p2q2 = p3q3 + 2 * (p3q3 + p2q2) + p1q1 + p0q0 + q0p0
+ // ^^^^^^^^^^^
+ const uint16x8_t p23q23 = vaddq_u16(p3q3, p2q2);
+
+ // p2q2 = p3q3 + 2 * (p3q3 + p2q2) + p1q1 + p0q0 + q0p0
+ // ^^^^^
+ uint16x8_t sum = vshlq_n_u16(p23q23, 1);
+
+ // Add two other terms to make dual issue with shift more likely.
+ // p2q2 = p3q3 + 2 * (p3q3 + p2q2) + p1q1 + p0q0 + q0p0
+ // ^^^^^^^^^^^
+ const uint16x8_t p01q01 = vaddq_u16(p0q0, p1q1);
+
+ // p2q2 = p3q3 + 2 * (p3q3 + p2q2) + p1q1 + p0q0 + q0p0
+ // ^^^^^^^^^^^^^
+ sum = vaddq_u16(sum, p01q01);
+
+ // p2q2 = p3q3 + 2 * (p3q3 + p2q2) + p1q1 + p0q0 + q0p0
+ // ^^^^^^
+ sum = vaddq_u16(sum, p3q3);
+
+ // p2q2 = p3q3 + 2 * (p3q3 + p2q2) + p1q1 + p0q0 + q0p0
+ // ^^^^^^
+ const uint16x8_t q0p0 = Transpose64(p0q0);
+ sum = vaddq_u16(sum, q0p0);
+
+ *p2q2_output = vrshrq_n_u16(sum, 3);
+
+ // Convert to p1 and q1 output:
+ // p1 = p2 - p3 - p2 + p1 + q1
+ // q1 = q2 - q3 - q2 + q0 + p1
+ sum = vsubq_u16(sum, p23q23);
+ const uint16x8_t q1p1 = Transpose64(p1q1);
+ sum = vaddq_u16(sum, vaddq_u16(p1q1, q1p1));
+
+ *p1q1_output = vrshrq_n_u16(sum, 3);
+
+ // Convert to p0 and q0 output:
+ // p0 = p1 - p3 - p1 + p0 + q2
+ // q0 = q1 - q3 - q1 + q0 + p2
+ sum = vsubq_u16(sum, vaddq_u16(p3q3, p1q1));
+ const uint16x8_t q2p2 = Transpose64(p2q2);
+ sum = vaddq_u16(sum, vaddq_u16(p0q0, q2p2));
+
+ *p0q0_output = vrshrq_n_u16(sum, 3);
+}
+
+void Horizontal8_NEON(void* const dest, const ptrdiff_t stride,
+ int outer_thresh, int inner_thresh, int hev_thresh) {
+ auto* const dst = static_cast<uint8_t*>(dest);
+ auto* const dst_p3 = reinterpret_cast<uint16_t*>(dst - 4 * stride);
+ auto* const dst_p2 = reinterpret_cast<uint16_t*>(dst - 3 * stride);
+ auto* const dst_p1 = reinterpret_cast<uint16_t*>(dst - 2 * stride);
+ auto* const dst_p0 = reinterpret_cast<uint16_t*>(dst - stride);
+ auto* const dst_q0 = reinterpret_cast<uint16_t*>(dst);
+ auto* const dst_q1 = reinterpret_cast<uint16_t*>(dst + stride);
+ auto* const dst_q2 = reinterpret_cast<uint16_t*>(dst + 2 * stride);
+ auto* const dst_q3 = reinterpret_cast<uint16_t*>(dst + 3 * stride);
+
+ const uint16x4_t src[8] = {
+ vld1_u16(dst_p3), vld1_u16(dst_p2), vld1_u16(dst_p1), vld1_u16(dst_p0),
+ vld1_u16(dst_q0), vld1_u16(dst_q1), vld1_u16(dst_q2), vld1_u16(dst_q3)};
+
+ // Adjust thresholds to bitdepth.
+ outer_thresh <<= 2;
+ inner_thresh <<= 2;
+ hev_thresh <<= 2;
+ const uint16x4_t outer_mask =
+ OuterThreshold(src[2], src[3], src[4], src[5], outer_thresh);
+ uint16x4_t hev_mask;
+ uint16x4_t needs_filter_mask;
+ uint16x4_t is_flat4_mask;
+ const uint16x8_t p0q0 = vcombine_u16(src[3], src[4]);
+ const uint16x8_t p1q1 = vcombine_u16(src[2], src[5]);
+ const uint16x8_t p2q2 = vcombine_u16(src[1], src[6]);
+ const uint16x8_t p3q3 = vcombine_u16(src[0], src[7]);
+ Filter8Masks(p3q3, p2q2, p1q1, p0q0, hev_thresh, outer_mask, inner_thresh,
+ &needs_filter_mask, &is_flat4_mask, &hev_mask);
+
+#if defined(__aarch64__)
+ if (vaddv_u16(needs_filter_mask) == 0) {
+ // None of the values will be filtered.
+ return;
+ }
+#else // !defined(__aarch64__)
+ // This might be faster than vaddv (latency 3) because mov to general register
+ // has latency 2.
+ const uint64x1_t needs_filter_mask64 =
+ vreinterpret_u64_u16(needs_filter_mask);
+ if (vget_lane_u64(needs_filter_mask64, 0) == 0) {
+ // None of the values will be filtered.
+ return;
+ }
+#endif // defined(__aarch64__)
+
+ // Copy the masks to the high bits for packed comparisons later.
+ const uint16x8_t hev_mask_8 = vcombine_u16(hev_mask, hev_mask);
+ const uint16x8_t needs_filter_mask_8 =
+ vcombine_u16(needs_filter_mask, needs_filter_mask);
+
+ uint16x8_t f4_p1q1;
+ uint16x8_t f4_p0q0;
+ // ZIP1 p0q0, p1q1 may perform better here.
+ const uint16x8_t p0q1 = vcombine_u16(src[3], src[5]);
+ Filter4(p0q0, p0q1, p1q1, hev_mask, &f4_p1q1, &f4_p0q0);
+ f4_p1q1 = vbslq_u16(hev_mask_8, p1q1, f4_p1q1);
+
+ uint16x8_t p0q0_output, p1q1_output, p2q2_output;
+ // Because we did not return after testing |needs_filter_mask| we know it is
+ // nonzero. |is_flat4_mask| controls whether the needed filter is Filter4 or
+ // Filter8. Therefore if it is false when |needs_filter_mask| is true, Filter8
+ // output is not used.
+ uint16x8_t f8_p2q2, f8_p1q1, f8_p0q0;
+ const uint64x1_t need_filter8 = vreinterpret_u64_u16(is_flat4_mask);
+ if (vget_lane_u64(need_filter8, 0) == 0) {
+ // Filter8() does not apply, but Filter4() applies to one or more values.
+ p2q2_output = p2q2;
+ p1q1_output = vbslq_u16(needs_filter_mask_8, f4_p1q1, p1q1);
+ p0q0_output = vbslq_u16(needs_filter_mask_8, f4_p0q0, p0q0);
+ } else {
+ const uint16x8_t is_flat4_mask_8 =
+ vcombine_u16(is_flat4_mask, is_flat4_mask);
+ Filter8(p3q3, p2q2, p1q1, p0q0, &f8_p2q2, &f8_p1q1, &f8_p0q0);
+ p2q2_output = vbslq_u16(is_flat4_mask_8, f8_p2q2, p2q2);
+ p1q1_output = vbslq_u16(is_flat4_mask_8, f8_p1q1, f4_p1q1);
+ p1q1_output = vbslq_u16(needs_filter_mask_8, p1q1_output, p1q1);
+ p0q0_output = vbslq_u16(is_flat4_mask_8, f8_p0q0, f4_p0q0);
+ p0q0_output = vbslq_u16(needs_filter_mask_8, p0q0_output, p0q0);
+ }
+
+ vst1_u16(dst_p2, vget_low_u16(p2q2_output));
+ vst1_u16(dst_p1, vget_low_u16(p1q1_output));
+ vst1_u16(dst_p0, vget_low_u16(p0q0_output));
+ vst1_u16(dst_q0, vget_high_u16(p0q0_output));
+ vst1_u16(dst_q1, vget_high_u16(p1q1_output));
+ vst1_u16(dst_q2, vget_high_u16(p2q2_output));
+}
+
+inline uint16x8_t ReverseLowHalf(const uint16x8_t a) {
+ return vcombine_u16(vrev64_u16(vget_low_u16(a)), vget_high_u16(a));
+}
+
+void Vertical8_NEON(void* const dest, const ptrdiff_t stride, int outer_thresh,
+ int inner_thresh, int hev_thresh) {
+ auto* const dst = static_cast<uint8_t*>(dest) - 4 * sizeof(uint16_t);
+ auto* const dst_0 = reinterpret_cast<uint16_t*>(dst);
+ auto* const dst_1 = reinterpret_cast<uint16_t*>(dst + stride);
+ auto* const dst_2 = reinterpret_cast<uint16_t*>(dst + 2 * stride);
+ auto* const dst_3 = reinterpret_cast<uint16_t*>(dst + 3 * stride);
+
+ // src_raw[n] contains p3, p2, p1, p0, q0, q1, q2, q3 for row n.
+ // To get desired pairs after transpose, one half should be reversed.
+ uint16x8_t src[4] = {vld1q_u16(dst_0), vld1q_u16(dst_1), vld1q_u16(dst_2),
+ vld1q_u16(dst_3)};
+
+ // src[0] = p0q0
+ // src[1] = p1q1
+ // src[2] = p2q2
+ // src[3] = p3q3
+ LoopFilterTranspose4x8(src);
+
+ // Adjust thresholds to bitdepth.
+ outer_thresh <<= 2;
+ inner_thresh <<= 2;
+ hev_thresh <<= 2;
+ const uint16x4_t outer_mask = OuterThreshold(
+ vget_low_u16(src[1]), vget_low_u16(src[0]), vget_high_u16(src[0]),
+ vget_high_u16(src[1]), outer_thresh);
+ uint16x4_t hev_mask;
+ uint16x4_t needs_filter_mask;
+ uint16x4_t is_flat4_mask;
+ const uint16x8_t p0q0 = src[0];
+ const uint16x8_t p1q1 = src[1];
+ const uint16x8_t p2q2 = src[2];
+ const uint16x8_t p3q3 = src[3];
+ Filter8Masks(p3q3, p2q2, p1q1, p0q0, hev_thresh, outer_mask, inner_thresh,
+ &needs_filter_mask, &is_flat4_mask, &hev_mask);
+
+#if defined(__aarch64__)
+ if (vaddv_u16(needs_filter_mask) == 0) {
+ // None of the values will be filtered.
+ return;
+ }
+#else // !defined(__aarch64__)
+ // This might be faster than vaddv (latency 3) because mov to general register
+ // has latency 2.
+ const uint64x1_t needs_filter_mask64 =
+ vreinterpret_u64_u16(needs_filter_mask);
+ if (vget_lane_u64(needs_filter_mask64, 0) == 0) {
+ // None of the values will be filtered.
+ return;
+ }
+#endif // defined(__aarch64__)
+
+ // Copy the masks to the high bits for packed comparisons later.
+ const uint16x8_t hev_mask_8 = vcombine_u16(hev_mask, hev_mask);
+ const uint16x8_t needs_filter_mask_8 =
+ vcombine_u16(needs_filter_mask, needs_filter_mask);
+
+ uint16x8_t f4_p1q1;
+ uint16x8_t f4_p0q0;
+ const uint16x8_t p0q1 = vcombine_u16(vget_low_u16(p0q0), vget_high_u16(p1q1));
+ Filter4(p0q0, p0q1, p1q1, hev_mask, &f4_p1q1, &f4_p0q0);
+ f4_p1q1 = vbslq_u16(hev_mask_8, p1q1, f4_p1q1);
+
+ uint16x8_t p0q0_output, p1q1_output, p2q2_output;
+ // Because we did not return after testing |needs_filter_mask| we know it is
+ // nonzero. |is_flat4_mask| controls whether the needed filter is Filter4 or
+ // Filter8. Therefore if it is false when |needs_filter_mask| is true, Filter8
+ // output is not used.
+ const uint64x1_t need_filter8 = vreinterpret_u64_u16(is_flat4_mask);
+ if (vget_lane_u64(need_filter8, 0) == 0) {
+ // Filter8() does not apply, but Filter4() applies to one or more values.
+ p2q2_output = p2q2;
+ p1q1_output = vbslq_u16(needs_filter_mask_8, f4_p1q1, p1q1);
+ p0q0_output = vbslq_u16(needs_filter_mask_8, f4_p0q0, p0q0);
+ } else {
+ const uint16x8_t is_flat4_mask_8 =
+ vcombine_u16(is_flat4_mask, is_flat4_mask);
+ uint16x8_t f8_p2q2, f8_p1q1, f8_p0q0;
+ Filter8(p3q3, p2q2, p1q1, p0q0, &f8_p2q2, &f8_p1q1, &f8_p0q0);
+ p2q2_output = vbslq_u16(is_flat4_mask_8, f8_p2q2, p2q2);
+ p1q1_output = vbslq_u16(is_flat4_mask_8, f8_p1q1, f4_p1q1);
+ p1q1_output = vbslq_u16(needs_filter_mask_8, p1q1_output, p1q1);
+ p0q0_output = vbslq_u16(is_flat4_mask_8, f8_p0q0, f4_p0q0);
+ p0q0_output = vbslq_u16(needs_filter_mask_8, p0q0_output, p0q0);
+ }
+
+ uint16x8_t output[4] = {p0q0_output, p1q1_output, p2q2_output, p3q3};
+ // After transpose, |output| will contain rows of the form:
+ // p0 p1 p2 p3 q0 q1 q2 q3
+ Transpose4x8(output);
+
+ // Reverse p values to produce original order:
+ // p3 p2 p1 p0 q0 q1 q2 q3
+ vst1q_u16(dst_0, ReverseLowHalf(output[0]));
+ vst1q_u16(dst_1, ReverseLowHalf(output[1]));
+ vst1q_u16(dst_2, ReverseLowHalf(output[2]));
+ vst1q_u16(dst_3, ReverseLowHalf(output[3]));
+}
+inline void Filter14(const uint16x8_t p6q6, const uint16x8_t p5q5,
+ const uint16x8_t p4q4, const uint16x8_t p3q3,
+ const uint16x8_t p2q2, const uint16x8_t p1q1,
+ const uint16x8_t p0q0, uint16x8_t* const p5q5_output,
+ uint16x8_t* const p4q4_output,
+ uint16x8_t* const p3q3_output,
+ uint16x8_t* const p2q2_output,
+ uint16x8_t* const p1q1_output,
+ uint16x8_t* const p0q0_output) {
+ // Sum p5 and q5 output from opposite directions.
+ // p5 = (7 * p6) + (2 * p5) + (2 * p4) + p3 + p2 + p1 + p0 + q0
+ // ^^^^^^^^
+ // q5 = p0 + q0 + q1 + q2 + q3 + (2 * q4) + (2 * q5) + (7 * q6)
+ // ^^^^^^^^
+ const uint16x8_t p6q6_x7 = vsubq_u16(vshlq_n_u16(p6q6, 3), p6q6);
+
+ // p5 = (7 * p6) + (2 * p5) + (2 * p4) + p3 + p2 + p1 + p0 + q0
+ // ^^^^^^^^^^^^^^^^^^^
+ // q5 = p0 + q0 + q1 + q2 + q3 + (2 * q4) + (2 * q5) + (7 * q6)
+ // ^^^^^^^^^^^^^^^^^^^
+ uint16x8_t sum = vshlq_n_u16(vaddq_u16(p5q5, p4q4), 1);
+ sum = vaddq_u16(sum, p6q6_x7);
+
+ // p5 = (7 * p6) + (2 * p5) + (2 * p4) + p3 + p2 + p1 + p0 + q0
+ // ^^^^^^^
+ // q5 = p0 + q0 + q1 + q2 + q3 + (2 * q4) + (2 * q5) + (7 * q6)
+ // ^^^^^^^
+ sum = vaddq_u16(vaddq_u16(p3q3, p2q2), sum);
+
+ // p5 = (7 * p6) + (2 * p5) + (2 * p4) + p3 + p2 + p1 + p0 + q0
+ // ^^^^^^^
+ // q5 = p0 + q0 + q1 + q2 + q3 + (2 * q4) + (2 * q5) + (7 * q6)
+ // ^^^^^^^
+ sum = vaddq_u16(vaddq_u16(p1q1, p0q0), sum);
+
+ // p5 = (7 * p6) + (2 * p5) + (2 * p4) + p3 + p2 + p1 + p0 + q0
+ // ^^
+ // q5 = p0 + q0 + q1 + q2 + q3 + (2 * q4) + (2 * q5) + (7 * q6)
+ // ^^
+ const uint16x8_t q0p0 = Transpose64(p0q0);
+ sum = vaddq_u16(sum, q0p0);
+
+ *p5q5_output = vrshrq_n_u16(sum, 4);
+
+ // Convert to p4 and q4 output:
+ // p4 = p5 - (2 * p6) + p3 + q1
+ // q4 = q5 - (2 * q6) + q3 + p1
+ sum = vsubq_u16(sum, vshlq_n_u16(p6q6, 1));
+ const uint16x8_t q1p1 = Transpose64(p1q1);
+ sum = vaddq_u16(vaddq_u16(p3q3, q1p1), sum);
+
+ *p4q4_output = vrshrq_n_u16(sum, 4);
+
+ // Convert to p3 and q3 output:
+ // p3 = p4 - p6 - p5 + p2 + q2
+ // q3 = q4 - q6 - q5 + q2 + p2
+ sum = vsubq_u16(sum, vaddq_u16(p6q6, p5q5));
+ const uint16x8_t q2p2 = Transpose64(p2q2);
+ sum = vaddq_u16(vaddq_u16(p2q2, q2p2), sum);
+
+ *p3q3_output = vrshrq_n_u16(sum, 4);
+
+ // Convert to p2 and q2 output:
+ // p2 = p3 - p6 - p4 + p1 + q3
+ // q2 = q3 - q6 - q4 + q1 + p3
+ sum = vsubq_u16(sum, vaddq_u16(p6q6, p4q4));
+ const uint16x8_t q3p3 = Transpose64(p3q3);
+ sum = vaddq_u16(vaddq_u16(p1q1, q3p3), sum);
+
+ *p2q2_output = vrshrq_n_u16(sum, 4);
+
+ // Convert to p1 and q1 output:
+ // p1 = p2 - p6 - p3 + p0 + q4
+ // q1 = q2 - q6 - q3 + q0 + p4
+ sum = vsubq_u16(sum, vaddq_u16(p6q6, p3q3));
+ const uint16x8_t q4p4 = Transpose64(p4q4);
+ sum = vaddq_u16(vaddq_u16(p0q0, q4p4), sum);
+
+ *p1q1_output = vrshrq_n_u16(sum, 4);
+
+ // Convert to p0 and q0 output:
+ // p0 = p1 - p6 - p2 + q0 + q5
+ // q0 = q1 - q6 - q2 + p0 + p5
+ sum = vsubq_u16(sum, vaddq_u16(p6q6, p2q2));
+ const uint16x8_t q5p5 = Transpose64(p5q5);
+ sum = vaddq_u16(vaddq_u16(q0p0, q5p5), sum);
+
+ *p0q0_output = vrshrq_n_u16(sum, 4);
+}
+
+void Horizontal14_NEON(void* const dest, const ptrdiff_t stride,
+ int outer_thresh, int inner_thresh, int hev_thresh) {
+ auto* const dst = static_cast<uint8_t*>(dest);
+ auto* const dst_p6 = reinterpret_cast<uint16_t*>(dst - 7 * stride);
+ auto* const dst_p5 = reinterpret_cast<uint16_t*>(dst - 6 * stride);
+ auto* const dst_p4 = reinterpret_cast<uint16_t*>(dst - 5 * stride);
+ auto* const dst_p3 = reinterpret_cast<uint16_t*>(dst - 4 * stride);
+ auto* const dst_p2 = reinterpret_cast<uint16_t*>(dst - 3 * stride);
+ auto* const dst_p1 = reinterpret_cast<uint16_t*>(dst - 2 * stride);
+ auto* const dst_p0 = reinterpret_cast<uint16_t*>(dst - stride);
+ auto* const dst_q0 = reinterpret_cast<uint16_t*>(dst);
+ auto* const dst_q1 = reinterpret_cast<uint16_t*>(dst + stride);
+ auto* const dst_q2 = reinterpret_cast<uint16_t*>(dst + 2 * stride);
+ auto* const dst_q3 = reinterpret_cast<uint16_t*>(dst + 3 * stride);
+ auto* const dst_q4 = reinterpret_cast<uint16_t*>(dst + 4 * stride);
+ auto* const dst_q5 = reinterpret_cast<uint16_t*>(dst + 5 * stride);
+ auto* const dst_q6 = reinterpret_cast<uint16_t*>(dst + 6 * stride);
+
+ const uint16x4_t src[14] = {
+ vld1_u16(dst_p6), vld1_u16(dst_p5), vld1_u16(dst_p4), vld1_u16(dst_p3),
+ vld1_u16(dst_p2), vld1_u16(dst_p1), vld1_u16(dst_p0), vld1_u16(dst_q0),
+ vld1_u16(dst_q1), vld1_u16(dst_q2), vld1_u16(dst_q3), vld1_u16(dst_q4),
+ vld1_u16(dst_q5), vld1_u16(dst_q6)};
+
+ // Adjust thresholds to bitdepth.
+ outer_thresh <<= 2;
+ inner_thresh <<= 2;
+ hev_thresh <<= 2;
+ const uint16x4_t outer_mask =
+ OuterThreshold(src[5], src[6], src[7], src[8], outer_thresh);
+ uint16x4_t hev_mask;
+ uint16x4_t needs_filter_mask;
+ uint16x4_t is_flat4_mask;
+ const uint16x8_t p0q0 = vcombine_u16(src[6], src[7]);
+ const uint16x8_t p1q1 = vcombine_u16(src[5], src[8]);
+ const uint16x8_t p2q2 = vcombine_u16(src[4], src[9]);
+ const uint16x8_t p3q3 = vcombine_u16(src[3], src[10]);
+ Filter8Masks(p3q3, p2q2, p1q1, p0q0, hev_thresh, outer_mask, inner_thresh,
+ &needs_filter_mask, &is_flat4_mask, &hev_mask);
+
+#if defined(__aarch64__)
+ if (vaddv_u16(needs_filter_mask) == 0) {
+ // None of the values will be filtered.
+ return;
+ }
+#else // !defined(__aarch64__)
+ // This might be faster than vaddv (latency 3) because mov to general register
+ // has latency 2.
+ const uint64x1_t needs_filter_mask64 =
+ vreinterpret_u64_u16(needs_filter_mask);
+ if (vget_lane_u64(needs_filter_mask64, 0) == 0) {
+ // None of the values will be filtered.
+ return;
+ }
+#endif // defined(__aarch64__)
+ const uint16x8_t p4q4 = vcombine_u16(src[2], src[11]);
+ const uint16x8_t p5q5 = vcombine_u16(src[1], src[12]);
+ const uint16x8_t p6q6 = vcombine_u16(src[0], src[13]);
+ // Mask to choose between the outputs of Filter8 and Filter14.
+ // As with the derivation of |is_flat4_mask|, the question of whether to use
+ // Filter14 is only raised where |is_flat4_mask| is true.
+ const uint16x4_t is_flat4_outer_mask = vand_u16(
+ is_flat4_mask, IsFlat4(vabdq_u16(p0q0, p4q4), vabdq_u16(p0q0, p5q5),
+ vabdq_u16(p0q0, p6q6)));
+ // Copy the masks to the high bits for packed comparisons later.
+ const uint16x8_t hev_mask_8 = vcombine_u16(hev_mask, hev_mask);
+ const uint16x8_t needs_filter_mask_8 =
+ vcombine_u16(needs_filter_mask, needs_filter_mask);
+
+ uint16x8_t f4_p1q1;
+ uint16x8_t f4_p0q0;
+ // ZIP1 p0q0, p1q1 may perform better here.
+ const uint16x8_t p0q1 = vcombine_u16(src[6], src[8]);
+ Filter4(p0q0, p0q1, p1q1, hev_mask, &f4_p1q1, &f4_p0q0);
+ f4_p1q1 = vbslq_u16(hev_mask_8, p1q1, f4_p1q1);
+
+ uint16x8_t p0q0_output, p1q1_output, p2q2_output, p3q3_output, p4q4_output,
+ p5q5_output;
+ // Because we did not return after testing |needs_filter_mask| we know it is
+ // nonzero. |is_flat4_mask| controls whether the needed filter is Filter4 or
+ // Filter8. Therefore if it is false when |needs_filter_mask| is true, Filter8
+ // output is not used.
+ uint16x8_t f8_p2q2, f8_p1q1, f8_p0q0;
+ const uint64x1_t need_filter8 = vreinterpret_u64_u16(is_flat4_mask);
+ if (vget_lane_u64(need_filter8, 0) == 0) {
+ // Filter8() and Filter14() do not apply, but Filter4() applies to one or
+ // more values.
+ p5q5_output = p5q5;
+ p4q4_output = p4q4;
+ p3q3_output = p3q3;
+ p2q2_output = p2q2;
+ p1q1_output = vbslq_u16(needs_filter_mask_8, f4_p1q1, p1q1);
+ p0q0_output = vbslq_u16(needs_filter_mask_8, f4_p0q0, p0q0);
+ } else {
+ const uint16x8_t use_filter8_mask =
+ vcombine_u16(is_flat4_mask, is_flat4_mask);
+ Filter8(p3q3, p2q2, p1q1, p0q0, &f8_p2q2, &f8_p1q1, &f8_p0q0);
+ const uint64x1_t need_filter14 = vreinterpret_u64_u16(is_flat4_outer_mask);
+ if (vget_lane_u64(need_filter14, 0) == 0) {
+ // Filter14() does not apply, but Filter8() and Filter4() apply to one or
+ // more values.
+ p5q5_output = p5q5;
+ p4q4_output = p4q4;
+ p3q3_output = p3q3;
+ p2q2_output = vbslq_u16(use_filter8_mask, f8_p2q2, p2q2);
+ p1q1_output = vbslq_u16(use_filter8_mask, f8_p1q1, f4_p1q1);
+ p1q1_output = vbslq_u16(needs_filter_mask_8, p1q1_output, p1q1);
+ p0q0_output = vbslq_u16(use_filter8_mask, f8_p0q0, f4_p0q0);
+ p0q0_output = vbslq_u16(needs_filter_mask_8, p0q0_output, p0q0);
+ } else {
+ // All filters may contribute values to final outputs.
+ const uint16x8_t use_filter14_mask =
+ vcombine_u16(is_flat4_outer_mask, is_flat4_outer_mask);
+ uint16x8_t f14_p5q5, f14_p4q4, f14_p3q3, f14_p2q2, f14_p1q1, f14_p0q0;
+ Filter14(p6q6, p5q5, p4q4, p3q3, p2q2, p1q1, p0q0, &f14_p5q5, &f14_p4q4,
+ &f14_p3q3, &f14_p2q2, &f14_p1q1, &f14_p0q0);
+ p5q5_output = vbslq_u16(use_filter14_mask, f14_p5q5, p5q5);
+ p4q4_output = vbslq_u16(use_filter14_mask, f14_p4q4, p4q4);
+ p3q3_output = vbslq_u16(use_filter14_mask, f14_p3q3, p3q3);
+ p2q2_output = vbslq_u16(use_filter14_mask, f14_p2q2, f8_p2q2);
+ p2q2_output = vbslq_u16(use_filter8_mask, p2q2_output, p2q2);
+ p2q2_output = vbslq_u16(needs_filter_mask_8, p2q2_output, p2q2);
+ p1q1_output = vbslq_u16(use_filter14_mask, f14_p1q1, f8_p1q1);
+ p1q1_output = vbslq_u16(use_filter8_mask, p1q1_output, f4_p1q1);
+ p1q1_output = vbslq_u16(needs_filter_mask_8, p1q1_output, p1q1);
+ p0q0_output = vbslq_u16(use_filter14_mask, f14_p0q0, f8_p0q0);
+ p0q0_output = vbslq_u16(use_filter8_mask, p0q0_output, f4_p0q0);
+ p0q0_output = vbslq_u16(needs_filter_mask_8, p0q0_output, p0q0);
+ }
+ }
+
+ vst1_u16(dst_p5, vget_low_u16(p5q5_output));
+ vst1_u16(dst_p4, vget_low_u16(p4q4_output));
+ vst1_u16(dst_p3, vget_low_u16(p3q3_output));
+ vst1_u16(dst_p2, vget_low_u16(p2q2_output));
+ vst1_u16(dst_p1, vget_low_u16(p1q1_output));
+ vst1_u16(dst_p0, vget_low_u16(p0q0_output));
+ vst1_u16(dst_q0, vget_high_u16(p0q0_output));
+ vst1_u16(dst_q1, vget_high_u16(p1q1_output));
+ vst1_u16(dst_q2, vget_high_u16(p2q2_output));
+ vst1_u16(dst_q3, vget_high_u16(p3q3_output));
+ vst1_u16(dst_q4, vget_high_u16(p4q4_output));
+ vst1_u16(dst_q5, vget_high_u16(p5q5_output));
+}
+
+inline uint16x8x2_t PermuteACDB64(const uint16x8_t ab, const uint16x8_t cd) {
+ uint16x8x2_t acdb;
+#if defined(__aarch64__)
+ // a[b] <- [c]d
+ acdb.val[0] = vreinterpretq_u16_u64(
+ vtrn1q_u64(vreinterpretq_u64_u16(ab), vreinterpretq_u64_u16(cd)));
+ // [a]b <- c[d]
+ acdb.val[1] = vreinterpretq_u16_u64(
+ vtrn2q_u64(vreinterpretq_u64_u16(cd), vreinterpretq_u64_u16(ab)));
+#else
+ // a[b] <- [c]d
+ acdb.val[0] = vreinterpretq_u16_u64(
+ vsetq_lane_u64(vgetq_lane_u64(vreinterpretq_u64_u16(cd), 0),
+ vreinterpretq_u64_u16(ab), 1));
+ // [a]b <- c[d]
+ acdb.val[1] = vreinterpretq_u16_u64(
+ vsetq_lane_u64(vgetq_lane_u64(vreinterpretq_u64_u16(cd), 1),
+ vreinterpretq_u64_u16(ab), 0));
+#endif // defined(__aarch64__)
+ return acdb;
+}
+
+void Vertical14_NEON(void* const dest, const ptrdiff_t stride, int outer_thresh,
+ int inner_thresh, int hev_thresh) {
+ auto* const dst = static_cast<uint8_t*>(dest) - 8 * sizeof(uint16_t);
+ auto* const dst_0 = reinterpret_cast<uint16_t*>(dst);
+ auto* const dst_1 = reinterpret_cast<uint16_t*>(dst + stride);
+ auto* const dst_2 = reinterpret_cast<uint16_t*>(dst + 2 * stride);
+ auto* const dst_3 = reinterpret_cast<uint16_t*>(dst + 3 * stride);
+
+ // Low halves: p7 p6 p5 p4
+ // High halves: p3 p2 p1 p0
+ uint16x8_t src_p[4] = {vld1q_u16(dst_0), vld1q_u16(dst_1), vld1q_u16(dst_2),
+ vld1q_u16(dst_3)};
+ // p7 will be the low half of src_p[0]. Not used until the end.
+ Transpose4x8(src_p);
+
+ // Low halves: q0 q1 q2 q3
+ // High halves: q4 q5 q6 q7
+ uint16x8_t src_q[4] = {vld1q_u16(dst_0 + 8), vld1q_u16(dst_1 + 8),
+ vld1q_u16(dst_2 + 8), vld1q_u16(dst_3 + 8)};
+ // q7 will be the high half of src_q[3]. Not used until the end.
+ Transpose4x8(src_q);
+
+ // Adjust thresholds to bitdepth.
+ outer_thresh <<= 2;
+ inner_thresh <<= 2;
+ hev_thresh <<= 2;
+ const uint16x4_t outer_mask = OuterThreshold(
+ vget_high_u16(src_p[2]), vget_high_u16(src_p[3]), vget_low_u16(src_q[0]),
+ vget_low_u16(src_q[1]), outer_thresh);
+ const uint16x8_t p0q0 = vextq_u16(src_p[3], src_q[0], 4);
+ const uint16x8_t p1q1 = vextq_u16(src_p[2], src_q[1], 4);
+ const uint16x8_t p2q2 = vextq_u16(src_p[1], src_q[2], 4);
+ const uint16x8_t p3q3 = vextq_u16(src_p[0], src_q[3], 4);
+ uint16x4_t hev_mask;
+ uint16x4_t needs_filter_mask;
+ uint16x4_t is_flat4_mask;
+ Filter8Masks(p3q3, p2q2, p1q1, p0q0, hev_thresh, outer_mask, inner_thresh,
+ &needs_filter_mask, &is_flat4_mask, &hev_mask);
+
+#if defined(__aarch64__)
+ if (vaddv_u16(needs_filter_mask) == 0) {
+ // None of the values will be filtered.
+ return;
+ }
+#else // !defined(__aarch64__)
+ // This might be faster than vaddv (latency 3) because mov to general register
+ // has latency 2.
+ const uint64x1_t needs_filter_mask64 =
+ vreinterpret_u64_u16(needs_filter_mask);
+ if (vget_lane_u64(needs_filter_mask64, 0) == 0) {
+ // None of the values will be filtered.
+ return;
+ }
+#endif // defined(__aarch64__)
+ const uint16x8_t p4q4 =
+ vcombine_u16(vget_low_u16(src_p[3]), vget_high_u16(src_q[0]));
+ const uint16x8_t p5q5 =
+ vcombine_u16(vget_low_u16(src_p[2]), vget_high_u16(src_q[1]));
+ const uint16x8_t p6q6 =
+ vcombine_u16(vget_low_u16(src_p[1]), vget_high_u16(src_q[2]));
+ const uint16x8_t p7q7 =
+ vcombine_u16(vget_low_u16(src_p[0]), vget_high_u16(src_q[3]));
+ // Mask to choose between the outputs of Filter8 and Filter14.
+ // As with the derivation of |is_flat4_mask|, the question of whether to use
+ // Filter14 is only raised where |is_flat4_mask| is true.
+ const uint16x4_t is_flat4_outer_mask = vand_u16(
+ is_flat4_mask, IsFlat4(vabdq_u16(p0q0, p4q4), vabdq_u16(p0q0, p5q5),
+ vabdq_u16(p0q0, p6q6)));
+ // Copy the masks to the high bits for packed comparisons later.
+ const uint16x8_t hev_mask_8 = vcombine_u16(hev_mask, hev_mask);
+ const uint16x8_t needs_filter_mask_8 =
+ vcombine_u16(needs_filter_mask, needs_filter_mask);
+
+ uint16x8_t f4_p1q1;
+ uint16x8_t f4_p0q0;
+ const uint16x8_t p0q1 = vcombine_u16(vget_low_u16(p0q0), vget_high_u16(p1q1));
+ Filter4(p0q0, p0q1, p1q1, hev_mask, &f4_p1q1, &f4_p0q0);
+ f4_p1q1 = vbslq_u16(hev_mask_8, p1q1, f4_p1q1);
+
+ uint16x8_t p0q0_output, p1q1_output, p2q2_output, p3q3_output, p4q4_output,
+ p5q5_output;
+ // Because we did not return after testing |needs_filter_mask| we know it is
+ // nonzero. |is_flat4_mask| controls whether the needed filter is Filter4 or
+ // Filter8. Therefore if it is false when |needs_filter_mask| is true, Filter8
+ // output is not used.
+ uint16x8_t f8_p2q2, f8_p1q1, f8_p0q0;
+ const uint64x1_t need_filter8 = vreinterpret_u64_u16(is_flat4_mask);
+ if (vget_lane_u64(need_filter8, 0) == 0) {
+ // Filter8() and Filter14() do not apply, but Filter4() applies to one or
+ // more values.
+ p5q5_output = p5q5;
+ p4q4_output = p4q4;
+ p3q3_output = p3q3;
+ p2q2_output = p2q2;
+ p1q1_output = vbslq_u16(needs_filter_mask_8, f4_p1q1, p1q1);
+ p0q0_output = vbslq_u16(needs_filter_mask_8, f4_p0q0, p0q0);
+ } else {
+ const uint16x8_t use_filter8_mask =
+ vcombine_u16(is_flat4_mask, is_flat4_mask);
+ Filter8(p3q3, p2q2, p1q1, p0q0, &f8_p2q2, &f8_p1q1, &f8_p0q0);
+ const uint64x1_t need_filter14 = vreinterpret_u64_u16(is_flat4_outer_mask);
+ if (vget_lane_u64(need_filter14, 0) == 0) {
+ // Filter14() does not apply, but Filter8() and Filter4() apply to one or
+ // more values.
+ p5q5_output = p5q5;
+ p4q4_output = p4q4;
+ p3q3_output = p3q3;
+ p2q2_output = vbslq_u16(use_filter8_mask, f8_p2q2, p2q2);
+ p1q1_output = vbslq_u16(use_filter8_mask, f8_p1q1, f4_p1q1);
+ p1q1_output = vbslq_u16(needs_filter_mask_8, p1q1_output, p1q1);
+ p0q0_output = vbslq_u16(use_filter8_mask, f8_p0q0, f4_p0q0);
+ p0q0_output = vbslq_u16(needs_filter_mask_8, p0q0_output, p0q0);
+ } else {
+ // All filters may contribute values to final outputs.
+ const uint16x8_t use_filter14_mask =
+ vcombine_u16(is_flat4_outer_mask, is_flat4_outer_mask);
+ uint16x8_t f14_p5q5, f14_p4q4, f14_p3q3, f14_p2q2, f14_p1q1, f14_p0q0;
+ Filter14(p6q6, p5q5, p4q4, p3q3, p2q2, p1q1, p0q0, &f14_p5q5, &f14_p4q4,
+ &f14_p3q3, &f14_p2q2, &f14_p1q1, &f14_p0q0);
+ p5q5_output = vbslq_u16(use_filter14_mask, f14_p5q5, p5q5);
+ p4q4_output = vbslq_u16(use_filter14_mask, f14_p4q4, p4q4);
+ p3q3_output = vbslq_u16(use_filter14_mask, f14_p3q3, p3q3);
+ p2q2_output = vbslq_u16(use_filter14_mask, f14_p2q2, f8_p2q2);
+ p2q2_output = vbslq_u16(use_filter8_mask, p2q2_output, p2q2);
+ p2q2_output = vbslq_u16(needs_filter_mask_8, p2q2_output, p2q2);
+ p1q1_output = vbslq_u16(use_filter14_mask, f14_p1q1, f8_p1q1);
+ p1q1_output = vbslq_u16(use_filter8_mask, p1q1_output, f4_p1q1);
+ p1q1_output = vbslq_u16(needs_filter_mask_8, p1q1_output, p1q1);
+ p0q0_output = vbslq_u16(use_filter14_mask, f14_p0q0, f8_p0q0);
+ p0q0_output = vbslq_u16(use_filter8_mask, p0q0_output, f4_p0q0);
+ p0q0_output = vbslq_u16(needs_filter_mask_8, p0q0_output, p0q0);
+ }
+ }
+ // To get the correctly ordered rows from the transpose, we need:
+ // p7p3 p6p2 p5p1 p4p0
+ // q0q4 q1q5 q2q6 q3q7
+ const uint16x8x2_t p7p3_q3q7 = PermuteACDB64(p7q7, p3q3_output);
+ const uint16x8x2_t p6p2_q2q6 = PermuteACDB64(p6q6, p2q2_output);
+ const uint16x8x2_t p5p1_q1q5 = PermuteACDB64(p5q5_output, p1q1_output);
+ const uint16x8x2_t p4p0_q0q4 = PermuteACDB64(p4q4_output, p0q0_output);
+ uint16x8_t output_p[4] = {p7p3_q3q7.val[0], p6p2_q2q6.val[0],
+ p5p1_q1q5.val[0], p4p0_q0q4.val[0]};
+ Transpose4x8(output_p);
+ uint16x8_t output_q[4] = {p4p0_q0q4.val[1], p5p1_q1q5.val[1],
+ p6p2_q2q6.val[1], p7p3_q3q7.val[1]};
+ Transpose4x8(output_q);
+
+ // Reverse p values to produce original order:
+ // p3 p2 p1 p0 q0 q1 q2 q3
+ vst1q_u16(dst_0, output_p[0]);
+ vst1q_u16(dst_0 + 8, output_q[0]);
+ vst1q_u16(dst_1, output_p[1]);
+ vst1q_u16(dst_1 + 8, output_q[1]);
+ vst1q_u16(dst_2, output_p[2]);
+ vst1q_u16(dst_2 + 8, output_q[2]);
+ vst1q_u16(dst_3, output_p[3]);
+ vst1q_u16(dst_3 + 8, output_q[3]);
+}
+
+void Init10bpp() {
+ Dsp* const dsp = dsp_internal::GetWritableDspTable(kBitdepth10);
+ assert(dsp != nullptr);
+ dsp->loop_filters[kLoopFilterSize4][kLoopFilterTypeHorizontal] =
+ Horizontal4_NEON;
+ dsp->loop_filters[kLoopFilterSize4][kLoopFilterTypeVertical] = Vertical4_NEON;
+ dsp->loop_filters[kLoopFilterSize6][kLoopFilterTypeHorizontal] =
+ Horizontal6_NEON;
+ dsp->loop_filters[kLoopFilterSize6][kLoopFilterTypeVertical] = Vertical6_NEON;
+ dsp->loop_filters[kLoopFilterSize8][kLoopFilterTypeHorizontal] =
+ Horizontal8_NEON;
+ dsp->loop_filters[kLoopFilterSize8][kLoopFilterTypeVertical] = Vertical8_NEON;
+ dsp->loop_filters[kLoopFilterSize14][kLoopFilterTypeHorizontal] =
+ Horizontal14_NEON;
+ dsp->loop_filters[kLoopFilterSize14][kLoopFilterTypeVertical] =
+ Vertical14_NEON;
+}
+
+} // namespace
+} // namespace high_bitdepth
+#endif // LIBGAV1_MAX_BITDEPTH >= 10
+
+void LoopFilterInit_NEON() {
+ low_bitdepth::Init8bpp();
+#if LIBGAV1_MAX_BITDEPTH >= 10
+ high_bitdepth::Init10bpp();
+#endif
+}
} // namespace dsp
} // namespace libgav1
diff --git a/src/dsp/arm/loop_filter_neon.h b/src/dsp/arm/loop_filter_neon.h
index 5f79200..540defc 100644
--- a/src/dsp/arm/loop_filter_neon.h
+++ b/src/dsp/arm/loop_filter_neon.h
@@ -48,6 +48,23 @@ void LoopFilterInit_NEON();
LIBGAV1_CPU_NEON
#define LIBGAV1_Dsp8bpp_LoopFilterSize14_LoopFilterTypeVertical LIBGAV1_CPU_NEON
+#define LIBGAV1_Dsp10bpp_LoopFilterSize4_LoopFilterTypeHorizontal \
+ LIBGAV1_CPU_NEON
+#define LIBGAV1_Dsp10bpp_LoopFilterSize4_LoopFilterTypeVertical LIBGAV1_CPU_NEON
+
+#define LIBGAV1_Dsp10bpp_LoopFilterSize6_LoopFilterTypeHorizontal \
+ LIBGAV1_CPU_NEON
+#define LIBGAV1_Dsp10bpp_LoopFilterSize6_LoopFilterTypeVertical LIBGAV1_CPU_NEON
+
+#define LIBGAV1_Dsp10bpp_LoopFilterSize8_LoopFilterTypeHorizontal \
+ LIBGAV1_CPU_NEON
+#define LIBGAV1_Dsp10bpp_LoopFilterSize8_LoopFilterTypeVertical LIBGAV1_CPU_NEON
+
+#define LIBGAV1_Dsp10bpp_LoopFilterSize14_LoopFilterTypeHorizontal \
+ LIBGAV1_CPU_NEON
+#define LIBGAV1_Dsp10bpp_LoopFilterSize14_LoopFilterTypeVertical \
+ LIBGAV1_CPU_NEON
+
#endif // LIBGAV1_ENABLE_NEON
#endif // LIBGAV1_SRC_DSP_ARM_LOOP_FILTER_NEON_H_
diff --git a/src/dsp/arm/loop_restoration_10bit_neon.cc b/src/dsp/arm/loop_restoration_10bit_neon.cc
new file mode 100644
index 0000000..410bc20
--- /dev/null
+++ b/src/dsp/arm/loop_restoration_10bit_neon.cc
@@ -0,0 +1,2652 @@
+// Copyright 2021 The libgav1 Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "src/dsp/loop_restoration.h"
+#include "src/utils/cpu.h"
+
+#if LIBGAV1_ENABLE_NEON && LIBGAV1_MAX_BITDEPTH >= 10
+#include <arm_neon.h>
+
+#include <algorithm>
+#include <cassert>
+#include <cstdint>
+
+#include "src/dsp/arm/common_neon.h"
+#include "src/dsp/constants.h"
+#include "src/dsp/dsp.h"
+#include "src/utils/common.h"
+#include "src/utils/compiler_attributes.h"
+#include "src/utils/constants.h"
+
+namespace libgav1 {
+namespace dsp {
+namespace {
+
+//------------------------------------------------------------------------------
+// Wiener
+
+// Must make a local copy of coefficients to help compiler know that they have
+// no overlap with other buffers. Using 'const' keyword is not enough. Actually
+// compiler doesn't make a copy, since there is enough registers in this case.
+inline void PopulateWienerCoefficients(
+ const RestorationUnitInfo& restoration_info, const int direction,
+ int16_t filter[4]) {
+ for (int i = 0; i < 4; ++i) {
+ filter[i] = restoration_info.wiener_info.filter[direction][i];
+ }
+}
+
+inline int32x4x2_t WienerHorizontal2(const uint16x8_t s0, const uint16x8_t s1,
+ const int16_t filter,
+ const int32x4x2_t sum) {
+ const int16x8_t ss = vreinterpretq_s16_u16(vaddq_u16(s0, s1));
+ int32x4x2_t res;
+ res.val[0] = vmlal_n_s16(sum.val[0], vget_low_s16(ss), filter);
+ res.val[1] = vmlal_n_s16(sum.val[1], vget_high_s16(ss), filter);
+ return res;
+}
+
+inline void WienerHorizontalSum(const uint16x8_t s[3], const int16_t filter[4],
+ int32x4x2_t sum, int16_t* const wiener_buffer) {
+ constexpr int offset =
+ 1 << (kBitdepth10 + kWienerFilterBits - kInterRoundBitsHorizontal - 1);
+ constexpr int limit = (offset << 2) - 1;
+ const int16x8_t s_0_2 = vreinterpretq_s16_u16(vaddq_u16(s[0], s[2]));
+ const int16x8_t s_1 = vreinterpretq_s16_u16(s[1]);
+ int16x4x2_t sum16;
+ sum.val[0] = vmlal_n_s16(sum.val[0], vget_low_s16(s_0_2), filter[2]);
+ sum.val[0] = vmlal_n_s16(sum.val[0], vget_low_s16(s_1), filter[3]);
+ sum16.val[0] = vqshrn_n_s32(sum.val[0], kInterRoundBitsHorizontal);
+ sum16.val[0] = vmax_s16(sum16.val[0], vdup_n_s16(-offset));
+ sum16.val[0] = vmin_s16(sum16.val[0], vdup_n_s16(limit - offset));
+ vst1_s16(wiener_buffer, sum16.val[0]);
+ sum.val[1] = vmlal_n_s16(sum.val[1], vget_high_s16(s_0_2), filter[2]);
+ sum.val[1] = vmlal_n_s16(sum.val[1], vget_high_s16(s_1), filter[3]);
+ sum16.val[1] = vqshrn_n_s32(sum.val[1], kInterRoundBitsHorizontal);
+ sum16.val[1] = vmax_s16(sum16.val[1], vdup_n_s16(-offset));
+ sum16.val[1] = vmin_s16(sum16.val[1], vdup_n_s16(limit - offset));
+ vst1_s16(wiener_buffer + 4, sum16.val[1]);
+}
+
+inline void WienerHorizontalTap7(const uint16_t* src,
+ const ptrdiff_t src_stride,
+ const ptrdiff_t wiener_stride,
+ const ptrdiff_t width, const int height,
+ const int16_t filter[4],
+ int16_t** const wiener_buffer) {
+ const ptrdiff_t src_width =
+ width + ((kRestorationHorizontalBorder - 1) * sizeof(*src));
+ for (int y = height; y != 0; --y) {
+ const uint16_t* src_ptr = src;
+ uint16x8_t s[8];
+ s[0] = vld1q_u16(src_ptr);
+ ptrdiff_t x = wiener_stride;
+ ptrdiff_t valid_bytes = src_width * 2;
+ do {
+ src_ptr += 8;
+ valid_bytes -= 16;
+ s[7] = Load1QMsanU16(src_ptr, 16 - valid_bytes);
+ s[1] = vextq_u16(s[0], s[7], 1);
+ s[2] = vextq_u16(s[0], s[7], 2);
+ s[3] = vextq_u16(s[0], s[7], 3);
+ s[4] = vextq_u16(s[0], s[7], 4);
+ s[5] = vextq_u16(s[0], s[7], 5);
+ s[6] = vextq_u16(s[0], s[7], 6);
+ int32x4x2_t sum;
+ sum.val[0] = sum.val[1] =
+ vdupq_n_s32(1 << (kInterRoundBitsHorizontal - 1));
+ sum = WienerHorizontal2(s[0], s[6], filter[0], sum);
+ sum = WienerHorizontal2(s[1], s[5], filter[1], sum);
+ WienerHorizontalSum(s + 2, filter, sum, *wiener_buffer);
+ s[0] = s[7];
+ *wiener_buffer += 8;
+ x -= 8;
+ } while (x != 0);
+ src += src_stride;
+ }
+}
+
+inline void WienerHorizontalTap5(const uint16_t* src,
+ const ptrdiff_t src_stride,
+ const ptrdiff_t wiener_stride,
+ const ptrdiff_t width, const int height,
+ const int16_t filter[4],
+ int16_t** const wiener_buffer) {
+ const ptrdiff_t src_width =
+ width + ((kRestorationHorizontalBorder - 1) * sizeof(*src));
+ for (int y = height; y != 0; --y) {
+ const uint16_t* src_ptr = src;
+ uint16x8_t s[6];
+ s[0] = vld1q_u16(src_ptr);
+ ptrdiff_t x = wiener_stride;
+ ptrdiff_t valid_bytes = src_width * 2;
+ do {
+ src_ptr += 8;
+ valid_bytes -= 16;
+ s[5] = Load1QMsanU16(src_ptr, 16 - valid_bytes);
+ s[1] = vextq_u16(s[0], s[5], 1);
+ s[2] = vextq_u16(s[0], s[5], 2);
+ s[3] = vextq_u16(s[0], s[5], 3);
+ s[4] = vextq_u16(s[0], s[5], 4);
+
+ int32x4x2_t sum;
+ sum.val[0] = sum.val[1] =
+ vdupq_n_s32(1 << (kInterRoundBitsHorizontal - 1));
+ sum = WienerHorizontal2(s[0], s[4], filter[1], sum);
+ WienerHorizontalSum(s + 1, filter, sum, *wiener_buffer);
+ s[0] = s[5];
+ *wiener_buffer += 8;
+ x -= 8;
+ } while (x != 0);
+ src += src_stride;
+ }
+}
+
+inline void WienerHorizontalTap3(const uint16_t* src,
+ const ptrdiff_t src_stride,
+ const ptrdiff_t width, const int height,
+ const int16_t filter[4],
+ int16_t** const wiener_buffer) {
+ for (int y = height; y != 0; --y) {
+ const uint16_t* src_ptr = src;
+ uint16x8_t s[3];
+ ptrdiff_t x = width;
+ do {
+ s[0] = vld1q_u16(src_ptr);
+ s[1] = vld1q_u16(src_ptr + 1);
+ s[2] = vld1q_u16(src_ptr + 2);
+
+ int32x4x2_t sum;
+ sum.val[0] = sum.val[1] =
+ vdupq_n_s32(1 << (kInterRoundBitsHorizontal - 1));
+ WienerHorizontalSum(s, filter, sum, *wiener_buffer);
+ src_ptr += 8;
+ *wiener_buffer += 8;
+ x -= 8;
+ } while (x != 0);
+ src += src_stride;
+ }
+}
+
+inline void WienerHorizontalTap1(const uint16_t* src,
+ const ptrdiff_t src_stride,
+ const ptrdiff_t width, const int height,
+ int16_t** const wiener_buffer) {
+ for (int y = height; y != 0; --y) {
+ ptrdiff_t x = 0;
+ do {
+ const uint16x8_t s = vld1q_u16(src + x);
+ const int16x8_t d = vreinterpretq_s16_u16(vshlq_n_u16(s, 4));
+ vst1q_s16(*wiener_buffer + x, d);
+ x += 8;
+ } while (x < width);
+ src += src_stride;
+ *wiener_buffer += width;
+ }
+}
+
+inline int32x4x2_t WienerVertical2(const int16x8_t a0, const int16x8_t a1,
+ const int16_t filter,
+ const int32x4x2_t sum) {
+ int32x4x2_t d;
+ d.val[0] = vmlal_n_s16(sum.val[0], vget_low_s16(a0), filter);
+ d.val[1] = vmlal_n_s16(sum.val[1], vget_high_s16(a0), filter);
+ d.val[0] = vmlal_n_s16(d.val[0], vget_low_s16(a1), filter);
+ d.val[1] = vmlal_n_s16(d.val[1], vget_high_s16(a1), filter);
+ return d;
+}
+
+inline uint16x8_t WienerVertical(const int16x8_t a[3], const int16_t filter[4],
+ const int32x4x2_t sum) {
+ int32x4x2_t d = WienerVertical2(a[0], a[2], filter[2], sum);
+ d.val[0] = vmlal_n_s16(d.val[0], vget_low_s16(a[1]), filter[3]);
+ d.val[1] = vmlal_n_s16(d.val[1], vget_high_s16(a[1]), filter[3]);
+ const uint16x4_t sum_lo_16 = vqrshrun_n_s32(d.val[0], 11);
+ const uint16x4_t sum_hi_16 = vqrshrun_n_s32(d.val[1], 11);
+ return vcombine_u16(sum_lo_16, sum_hi_16);
+}
+
+inline uint16x8_t WienerVerticalTap7Kernel(const int16_t* const wiener_buffer,
+ const ptrdiff_t wiener_stride,
+ const int16_t filter[4],
+ int16x8_t a[7]) {
+ int32x4x2_t sum;
+ a[0] = vld1q_s16(wiener_buffer + 0 * wiener_stride);
+ a[1] = vld1q_s16(wiener_buffer + 1 * wiener_stride);
+ a[5] = vld1q_s16(wiener_buffer + 5 * wiener_stride);
+ a[6] = vld1q_s16(wiener_buffer + 6 * wiener_stride);
+ sum.val[0] = sum.val[1] = vdupq_n_s32(0);
+ sum = WienerVertical2(a[0], a[6], filter[0], sum);
+ sum = WienerVertical2(a[1], a[5], filter[1], sum);
+ a[2] = vld1q_s16(wiener_buffer + 2 * wiener_stride);
+ a[3] = vld1q_s16(wiener_buffer + 3 * wiener_stride);
+ a[4] = vld1q_s16(wiener_buffer + 4 * wiener_stride);
+ return WienerVertical(a + 2, filter, sum);
+}
+
+inline uint16x8x2_t WienerVerticalTap7Kernel2(
+ const int16_t* const wiener_buffer, const ptrdiff_t wiener_stride,
+ const int16_t filter[4]) {
+ int16x8_t a[8];
+ int32x4x2_t sum;
+ uint16x8x2_t d;
+ d.val[0] = WienerVerticalTap7Kernel(wiener_buffer, wiener_stride, filter, a);
+ a[7] = vld1q_s16(wiener_buffer + 7 * wiener_stride);
+ sum.val[0] = sum.val[1] = vdupq_n_s32(0);
+ sum = WienerVertical2(a[1], a[7], filter[0], sum);
+ sum = WienerVertical2(a[2], a[6], filter[1], sum);
+ d.val[1] = WienerVertical(a + 3, filter, sum);
+ return d;
+}
+
+inline void WienerVerticalTap7(const int16_t* wiener_buffer,
+ const ptrdiff_t width, const int height,
+ const int16_t filter[4], uint16_t* dst,
+ const ptrdiff_t dst_stride) {
+ const uint16x8_t v_max_bitdepth = vdupq_n_u16((1 << kBitdepth10) - 1);
+ for (int y = height >> 1; y != 0; --y) {
+ uint16_t* dst_ptr = dst;
+ ptrdiff_t x = width;
+ do {
+ uint16x8x2_t d[2];
+ d[0] = WienerVerticalTap7Kernel2(wiener_buffer + 0, width, filter);
+ d[1] = WienerVerticalTap7Kernel2(wiener_buffer + 8, width, filter);
+ vst1q_u16(dst_ptr, vminq_u16(d[0].val[0], v_max_bitdepth));
+ vst1q_u16(dst_ptr + 8, vminq_u16(d[1].val[0], v_max_bitdepth));
+ vst1q_u16(dst_ptr + dst_stride, vminq_u16(d[0].val[1], v_max_bitdepth));
+ vst1q_u16(dst_ptr + 8 + dst_stride,
+ vminq_u16(d[1].val[1], v_max_bitdepth));
+ wiener_buffer += 16;
+ dst_ptr += 16;
+ x -= 16;
+ } while (x != 0);
+ wiener_buffer += width;
+ dst += 2 * dst_stride;
+ }
+
+ if ((height & 1) != 0) {
+ ptrdiff_t x = width;
+ do {
+ int16x8_t a[7];
+ const uint16x8_t d0 =
+ WienerVerticalTap7Kernel(wiener_buffer + 0, width, filter, a);
+ const uint16x8_t d1 =
+ WienerVerticalTap7Kernel(wiener_buffer + 8, width, filter, a);
+ vst1q_u16(dst, vminq_u16(d0, v_max_bitdepth));
+ vst1q_u16(dst + 8, vminq_u16(d1, v_max_bitdepth));
+ wiener_buffer += 16;
+ dst += 16;
+ x -= 16;
+ } while (x != 0);
+ }
+}
+
+inline uint16x8_t WienerVerticalTap5Kernel(const int16_t* const wiener_buffer,
+ const ptrdiff_t wiener_stride,
+ const int16_t filter[4],
+ int16x8_t a[5]) {
+ a[0] = vld1q_s16(wiener_buffer + 0 * wiener_stride);
+ a[1] = vld1q_s16(wiener_buffer + 1 * wiener_stride);
+ a[2] = vld1q_s16(wiener_buffer + 2 * wiener_stride);
+ a[3] = vld1q_s16(wiener_buffer + 3 * wiener_stride);
+ a[4] = vld1q_s16(wiener_buffer + 4 * wiener_stride);
+ int32x4x2_t sum;
+ sum.val[0] = sum.val[1] = vdupq_n_s32(0);
+ sum = WienerVertical2(a[0], a[4], filter[1], sum);
+ return WienerVertical(a + 1, filter, sum);
+}
+
+inline uint16x8x2_t WienerVerticalTap5Kernel2(
+ const int16_t* const wiener_buffer, const ptrdiff_t wiener_stride,
+ const int16_t filter[4]) {
+ int16x8_t a[6];
+ int32x4x2_t sum;
+ uint16x8x2_t d;
+ d.val[0] = WienerVerticalTap5Kernel(wiener_buffer, wiener_stride, filter, a);
+ a[5] = vld1q_s16(wiener_buffer + 5 * wiener_stride);
+ sum.val[0] = sum.val[1] = vdupq_n_s32(0);
+ sum = WienerVertical2(a[1], a[5], filter[1], sum);
+ d.val[1] = WienerVertical(a + 2, filter, sum);
+ return d;
+}
+
+inline void WienerVerticalTap5(const int16_t* wiener_buffer,
+ const ptrdiff_t width, const int height,
+ const int16_t filter[4], uint16_t* dst,
+ const ptrdiff_t dst_stride) {
+ const uint16x8_t v_max_bitdepth = vdupq_n_u16((1 << kBitdepth10) - 1);
+ for (int y = height >> 1; y != 0; --y) {
+ uint16_t* dst_ptr = dst;
+ ptrdiff_t x = width;
+ do {
+ uint16x8x2_t d[2];
+ d[0] = WienerVerticalTap5Kernel2(wiener_buffer + 0, width, filter);
+ d[1] = WienerVerticalTap5Kernel2(wiener_buffer + 8, width, filter);
+ vst1q_u16(dst_ptr, vminq_u16(d[0].val[0], v_max_bitdepth));
+ vst1q_u16(dst_ptr + 8, vminq_u16(d[1].val[0], v_max_bitdepth));
+ vst1q_u16(dst_ptr + dst_stride, vminq_u16(d[0].val[1], v_max_bitdepth));
+ vst1q_u16(dst_ptr + 8 + dst_stride,
+ vminq_u16(d[1].val[1], v_max_bitdepth));
+ wiener_buffer += 16;
+ dst_ptr += 16;
+ x -= 16;
+ } while (x != 0);
+ wiener_buffer += width;
+ dst += 2 * dst_stride;
+ }
+
+ if ((height & 1) != 0) {
+ ptrdiff_t x = width;
+ do {
+ int16x8_t a[5];
+ const uint16x8_t d0 =
+ WienerVerticalTap5Kernel(wiener_buffer + 0, width, filter, a);
+ const uint16x8_t d1 =
+ WienerVerticalTap5Kernel(wiener_buffer + 8, width, filter, a);
+ vst1q_u16(dst, vminq_u16(d0, v_max_bitdepth));
+ vst1q_u16(dst + 8, vminq_u16(d1, v_max_bitdepth));
+ wiener_buffer += 16;
+ dst += 16;
+ x -= 16;
+ } while (x != 0);
+ }
+}
+
+inline uint16x8_t WienerVerticalTap3Kernel(const int16_t* const wiener_buffer,
+ const ptrdiff_t wiener_stride,
+ const int16_t filter[4],
+ int16x8_t a[3]) {
+ a[0] = vld1q_s16(wiener_buffer + 0 * wiener_stride);
+ a[1] = vld1q_s16(wiener_buffer + 1 * wiener_stride);
+ a[2] = vld1q_s16(wiener_buffer + 2 * wiener_stride);
+ int32x4x2_t sum;
+ sum.val[0] = sum.val[1] = vdupq_n_s32(0);
+ return WienerVertical(a, filter, sum);
+}
+
+inline uint16x8x2_t WienerVerticalTap3Kernel2(
+ const int16_t* const wiener_buffer, const ptrdiff_t wiener_stride,
+ const int16_t filter[4]) {
+ int16x8_t a[4];
+ int32x4x2_t sum;
+ uint16x8x2_t d;
+ d.val[0] = WienerVerticalTap3Kernel(wiener_buffer, wiener_stride, filter, a);
+ a[3] = vld1q_s16(wiener_buffer + 3 * wiener_stride);
+ sum.val[0] = sum.val[1] = vdupq_n_s32(0);
+ d.val[1] = WienerVertical(a + 1, filter, sum);
+ return d;
+}
+
+inline void WienerVerticalTap3(const int16_t* wiener_buffer,
+ const ptrdiff_t width, const int height,
+ const int16_t filter[4], uint16_t* dst,
+ const ptrdiff_t dst_stride) {
+ const uint16x8_t v_max_bitdepth = vdupq_n_u16((1 << kBitdepth10) - 1);
+
+ for (int y = height >> 1; y != 0; --y) {
+ uint16_t* dst_ptr = dst;
+ ptrdiff_t x = width;
+ do {
+ uint16x8x2_t d[2];
+ d[0] = WienerVerticalTap3Kernel2(wiener_buffer + 0, width, filter);
+ d[1] = WienerVerticalTap3Kernel2(wiener_buffer + 8, width, filter);
+
+ vst1q_u16(dst_ptr, vminq_u16(d[0].val[0], v_max_bitdepth));
+ vst1q_u16(dst_ptr + 8, vminq_u16(d[1].val[0], v_max_bitdepth));
+ vst1q_u16(dst_ptr + dst_stride, vminq_u16(d[0].val[1], v_max_bitdepth));
+ vst1q_u16(dst_ptr + 8 + dst_stride,
+ vminq_u16(d[1].val[1], v_max_bitdepth));
+
+ wiener_buffer += 16;
+ dst_ptr += 16;
+ x -= 16;
+ } while (x != 0);
+ wiener_buffer += width;
+ dst += 2 * dst_stride;
+ }
+
+ if ((height & 1) != 0) {
+ ptrdiff_t x = width;
+ do {
+ int16x8_t a[3];
+ const uint16x8_t d0 =
+ WienerVerticalTap3Kernel(wiener_buffer + 0, width, filter, a);
+ const uint16x8_t d1 =
+ WienerVerticalTap3Kernel(wiener_buffer + 8, width, filter, a);
+ vst1q_u16(dst, vminq_u16(d0, v_max_bitdepth));
+ vst1q_u16(dst + 8, vminq_u16(d1, v_max_bitdepth));
+ wiener_buffer += 16;
+ dst += 16;
+ x -= 16;
+ } while (x != 0);
+ }
+}
+
+inline void WienerVerticalTap1Kernel(const int16_t* const wiener_buffer,
+ uint16_t* const dst) {
+ const uint16x8_t v_max_bitdepth = vdupq_n_u16((1 << kBitdepth10) - 1);
+ const int16x8_t a0 = vld1q_s16(wiener_buffer + 0);
+ const int16x8_t a1 = vld1q_s16(wiener_buffer + 8);
+ const int16x8_t d0 = vrshrq_n_s16(a0, 4);
+ const int16x8_t d1 = vrshrq_n_s16(a1, 4);
+ vst1q_u16(dst, vminq_u16(vreinterpretq_u16_s16(vmaxq_s16(d0, vdupq_n_s16(0))),
+ v_max_bitdepth));
+ vst1q_u16(dst + 8,
+ vminq_u16(vreinterpretq_u16_s16(vmaxq_s16(d1, vdupq_n_s16(0))),
+ v_max_bitdepth));
+}
+
+inline void WienerVerticalTap1(const int16_t* wiener_buffer,
+ const ptrdiff_t width, const int height,
+ uint16_t* dst, const ptrdiff_t dst_stride) {
+ for (int y = height >> 1; y != 0; --y) {
+ uint16_t* dst_ptr = dst;
+ ptrdiff_t x = width;
+ do {
+ WienerVerticalTap1Kernel(wiener_buffer, dst_ptr);
+ WienerVerticalTap1Kernel(wiener_buffer + width, dst_ptr + dst_stride);
+ wiener_buffer += 16;
+ dst_ptr += 16;
+ x -= 16;
+ } while (x != 0);
+ wiener_buffer += width;
+ dst += 2 * dst_stride;
+ }
+
+ if ((height & 1) != 0) {
+ ptrdiff_t x = width;
+ do {
+ WienerVerticalTap1Kernel(wiener_buffer, dst);
+ wiener_buffer += 16;
+ dst += 16;
+ x -= 16;
+ } while (x != 0);
+ }
+}
+
+// For width 16 and up, store the horizontal results, and then do the vertical
+// filter row by row. This is faster than doing it column by column when
+// considering cache issues.
+void WienerFilter_NEON(
+ const RestorationUnitInfo& LIBGAV1_RESTRICT restoration_info,
+ const void* LIBGAV1_RESTRICT const source, const ptrdiff_t stride,
+ const void* LIBGAV1_RESTRICT const top_border,
+ const ptrdiff_t top_border_stride,
+ const void* LIBGAV1_RESTRICT const bottom_border,
+ const ptrdiff_t bottom_border_stride, const int width, const int height,
+ RestorationBuffer* LIBGAV1_RESTRICT const restoration_buffer,
+ void* LIBGAV1_RESTRICT const dest) {
+ const int16_t* const number_leading_zero_coefficients =
+ restoration_info.wiener_info.number_leading_zero_coefficients;
+ const int number_rows_to_skip = std::max(
+ static_cast<int>(number_leading_zero_coefficients[WienerInfo::kVertical]),
+ 1);
+ const ptrdiff_t wiener_stride = Align(width, 16);
+ int16_t* const wiener_buffer_vertical = restoration_buffer->wiener_buffer;
+ // The values are saturated to 13 bits before storing.
+ int16_t* wiener_buffer_horizontal =
+ wiener_buffer_vertical + number_rows_to_skip * wiener_stride;
+ int16_t filter_horizontal[(kWienerFilterTaps + 1) / 2];
+ int16_t filter_vertical[(kWienerFilterTaps + 1) / 2];
+ PopulateWienerCoefficients(restoration_info, WienerInfo::kHorizontal,
+ filter_horizontal);
+ PopulateWienerCoefficients(restoration_info, WienerInfo::kVertical,
+ filter_vertical);
+ // horizontal filtering.
+ const int height_horizontal =
+ height + kWienerFilterTaps - 1 - 2 * number_rows_to_skip;
+ const int height_extra = (height_horizontal - height) >> 1;
+ assert(height_extra <= 2);
+ const auto* const src = static_cast<const uint16_t*>(source);
+ const auto* const top = static_cast<const uint16_t*>(top_border);
+ const auto* const bottom = static_cast<const uint16_t*>(bottom_border);
+ if (number_leading_zero_coefficients[WienerInfo::kHorizontal] == 0) {
+ WienerHorizontalTap7(top + (2 - height_extra) * top_border_stride - 3,
+ top_border_stride, wiener_stride, width, height_extra,
+ filter_horizontal, &wiener_buffer_horizontal);
+ WienerHorizontalTap7(src - 3, stride, wiener_stride, width, height,
+ filter_horizontal, &wiener_buffer_horizontal);
+ WienerHorizontalTap7(bottom - 3, bottom_border_stride, wiener_stride, width,
+ height_extra, filter_horizontal,
+ &wiener_buffer_horizontal);
+ } else if (number_leading_zero_coefficients[WienerInfo::kHorizontal] == 1) {
+ WienerHorizontalTap5(top + (2 - height_extra) * top_border_stride - 2,
+ top_border_stride, wiener_stride, width, height_extra,
+ filter_horizontal, &wiener_buffer_horizontal);
+ WienerHorizontalTap5(src - 2, stride, wiener_stride, width, height,
+ filter_horizontal, &wiener_buffer_horizontal);
+ WienerHorizontalTap5(bottom - 2, bottom_border_stride, wiener_stride, width,
+ height_extra, filter_horizontal,
+ &wiener_buffer_horizontal);
+ } else if (number_leading_zero_coefficients[WienerInfo::kHorizontal] == 2) {
+ WienerHorizontalTap3(top + (2 - height_extra) * top_border_stride - 1,
+ top_border_stride, wiener_stride, height_extra,
+ filter_horizontal, &wiener_buffer_horizontal);
+ WienerHorizontalTap3(src - 1, stride, wiener_stride, height,
+ filter_horizontal, &wiener_buffer_horizontal);
+ WienerHorizontalTap3(bottom - 1, bottom_border_stride, wiener_stride,
+ height_extra, filter_horizontal,
+ &wiener_buffer_horizontal);
+ } else {
+ assert(number_leading_zero_coefficients[WienerInfo::kHorizontal] == 3);
+ WienerHorizontalTap1(top + (2 - height_extra) * top_border_stride,
+ top_border_stride, wiener_stride, height_extra,
+ &wiener_buffer_horizontal);
+ WienerHorizontalTap1(src, stride, wiener_stride, height,
+ &wiener_buffer_horizontal);
+ WienerHorizontalTap1(bottom, bottom_border_stride, wiener_stride,
+ height_extra, &wiener_buffer_horizontal);
+ }
+
+ // vertical filtering.
+ auto* dst = static_cast<uint16_t*>(dest);
+ if (number_leading_zero_coefficients[WienerInfo::kVertical] == 0) {
+ // Because the top row of |source| is a duplicate of the second row, and the
+ // bottom row of |source| is a duplicate of its above row, we can duplicate
+ // the top and bottom row of |wiener_buffer| accordingly.
+ memcpy(wiener_buffer_horizontal, wiener_buffer_horizontal - wiener_stride,
+ sizeof(*wiener_buffer_horizontal) * wiener_stride);
+ memcpy(restoration_buffer->wiener_buffer,
+ restoration_buffer->wiener_buffer + wiener_stride,
+ sizeof(*restoration_buffer->wiener_buffer) * wiener_stride);
+ WienerVerticalTap7(wiener_buffer_vertical, wiener_stride, height,
+ filter_vertical, dst, stride);
+ } else if (number_leading_zero_coefficients[WienerInfo::kVertical] == 1) {
+ WienerVerticalTap5(wiener_buffer_vertical + wiener_stride, wiener_stride,
+ height, filter_vertical, dst, stride);
+ } else if (number_leading_zero_coefficients[WienerInfo::kVertical] == 2) {
+ WienerVerticalTap3(wiener_buffer_vertical + 2 * wiener_stride,
+ wiener_stride, height, filter_vertical, dst, stride);
+ } else {
+ assert(number_leading_zero_coefficients[WienerInfo::kVertical] == 3);
+ WienerVerticalTap1(wiener_buffer_vertical + 3 * wiener_stride,
+ wiener_stride, height, dst, stride);
+ }
+}
+
+//------------------------------------------------------------------------------
+// SGR
+
+// SIMD overreads 8 - (width % 8) - 2 * padding pixels, where padding is 3 for
+// Pass 1 and 2 for Pass 2.
+constexpr int kOverreadInBytesPass1 = 4;
+constexpr int kOverreadInBytesPass2 = 8;
+
+inline void LoadAligned16x2U16(const uint16_t* const src[2], const ptrdiff_t x,
+ uint16x8_t dst[2]) {
+ dst[0] = vld1q_u16(src[0] + x);
+ dst[1] = vld1q_u16(src[1] + x);
+}
+
+inline void LoadAligned16x2U16Msan(const uint16_t* const src[2],
+ const ptrdiff_t x, const ptrdiff_t border,
+ uint16x8_t dst[2]) {
+ dst[0] = Load1QMsanU16(src[0] + x, sizeof(**src) * (x + 8 - border));
+ dst[1] = Load1QMsanU16(src[1] + x, sizeof(**src) * (x + 8 - border));
+}
+
+inline void LoadAligned16x3U16(const uint16_t* const src[3], const ptrdiff_t x,
+ uint16x8_t dst[3]) {
+ dst[0] = vld1q_u16(src[0] + x);
+ dst[1] = vld1q_u16(src[1] + x);
+ dst[2] = vld1q_u16(src[2] + x);
+}
+
+inline void LoadAligned16x3U16Msan(const uint16_t* const src[3],
+ const ptrdiff_t x, const ptrdiff_t border,
+ uint16x8_t dst[3]) {
+ dst[0] = Load1QMsanU16(src[0] + x, sizeof(**src) * (x + 8 - border));
+ dst[1] = Load1QMsanU16(src[1] + x, sizeof(**src) * (x + 8 - border));
+ dst[2] = Load1QMsanU16(src[2] + x, sizeof(**src) * (x + 8 - border));
+}
+
+inline void LoadAligned32U32(const uint32_t* const src, uint32x4_t dst[2]) {
+ dst[0] = vld1q_u32(src + 0);
+ dst[1] = vld1q_u32(src + 4);
+}
+
+inline void LoadAligned32U32Msan(const uint32_t* const src, const ptrdiff_t x,
+ const ptrdiff_t border, uint32x4_t dst[2]) {
+ dst[0] = Load1QMsanU32(src + x + 0, sizeof(*src) * (x + 4 - border));
+ dst[1] = Load1QMsanU32(src + x + 4, sizeof(*src) * (x + 8 - border));
+}
+
+inline void LoadAligned32x2U32(const uint32_t* const src[2], const ptrdiff_t x,
+ uint32x4_t dst[2][2]) {
+ LoadAligned32U32(src[0] + x, dst[0]);
+ LoadAligned32U32(src[1] + x, dst[1]);
+}
+
+inline void LoadAligned32x2U32Msan(const uint32_t* const src[2],
+ const ptrdiff_t x, const ptrdiff_t border,
+ uint32x4_t dst[2][2]) {
+ LoadAligned32U32Msan(src[0], x, border, dst[0]);
+ LoadAligned32U32Msan(src[1], x, border, dst[1]);
+}
+
+inline void LoadAligned32x3U32(const uint32_t* const src[3], const ptrdiff_t x,
+ uint32x4_t dst[3][2]) {
+ LoadAligned32U32(src[0] + x, dst[0]);
+ LoadAligned32U32(src[1] + x, dst[1]);
+ LoadAligned32U32(src[2] + x, dst[2]);
+}
+
+inline void LoadAligned32x3U32Msan(const uint32_t* const src[3],
+ const ptrdiff_t x, const ptrdiff_t border,
+ uint32x4_t dst[3][2]) {
+ LoadAligned32U32Msan(src[0], x, border, dst[0]);
+ LoadAligned32U32Msan(src[1], x, border, dst[1]);
+ LoadAligned32U32Msan(src[2], x, border, dst[2]);
+}
+
+inline void StoreAligned32U16(uint16_t* const dst, const uint16x8_t src[2]) {
+ vst1q_u16(dst + 0, src[0]);
+ vst1q_u16(dst + 8, src[1]);
+}
+
+inline void StoreAligned32U32(uint32_t* const dst, const uint32x4_t src[2]) {
+ vst1q_u32(dst + 0, src[0]);
+ vst1q_u32(dst + 4, src[1]);
+}
+
+inline void StoreAligned64U32(uint32_t* const dst, const uint32x4_t src[4]) {
+ StoreAligned32U32(dst + 0, src + 0);
+ StoreAligned32U32(dst + 8, src + 2);
+}
+
+inline uint16x8_t VaddwLo8(const uint16x8_t src0, const uint8x16_t src1) {
+ const uint8x8_t s1 = vget_low_u8(src1);
+ return vaddw_u8(src0, s1);
+}
+
+inline uint16x8_t VaddwHi8(const uint16x8_t src0, const uint8x16_t src1) {
+ const uint8x8_t s1 = vget_high_u8(src1);
+ return vaddw_u8(src0, s1);
+}
+
+inline uint32x4_t VmullLo16(const uint16x8_t src0, const uint16x8_t src1) {
+ return vmull_u16(vget_low_u16(src0), vget_low_u16(src1));
+}
+
+inline uint32x4_t VmullHi16(const uint16x8_t src0, const uint16x8_t src1) {
+ return vmull_u16(vget_high_u16(src0), vget_high_u16(src1));
+}
+
+template <int bytes>
+inline uint8x8_t VshrU128(const uint8x8x2_t src) {
+ return vext_u8(src.val[0], src.val[1], bytes);
+}
+
+template <int bytes>
+inline uint8x8_t VshrU128(const uint8x8_t src[2]) {
+ return vext_u8(src[0], src[1], bytes);
+}
+
+template <int bytes>
+inline uint8x16_t VshrU128(const uint8x16_t src[2]) {
+ return vextq_u8(src[0], src[1], bytes);
+}
+
+template <int bytes>
+inline uint16x8_t VshrU128(const uint16x8x2_t src) {
+ return vextq_u16(src.val[0], src.val[1], bytes / 2);
+}
+
+template <int bytes>
+inline uint16x8_t VshrU128(const uint16x8_t src[2]) {
+ return vextq_u16(src[0], src[1], bytes / 2);
+}
+
+inline uint32x4_t Square(uint16x4_t s) { return vmull_u16(s, s); }
+
+inline void Square(const uint16x8_t src, uint32x4_t dst[2]) {
+ const uint16x4_t s_lo = vget_low_u16(src);
+ const uint16x4_t s_hi = vget_high_u16(src);
+ dst[0] = Square(s_lo);
+ dst[1] = Square(s_hi);
+}
+
+template <int offset>
+inline void Prepare3_8(const uint8x16_t src[2], uint8x16_t dst[3]) {
+ dst[0] = VshrU128<offset + 0>(src);
+ dst[1] = VshrU128<offset + 1>(src);
+ dst[2] = VshrU128<offset + 2>(src);
+}
+
+inline void Prepare3_16(const uint16x8_t src[2], uint16x8_t dst[3]) {
+ dst[0] = src[0];
+ dst[1] = vextq_u16(src[0], src[1], 1);
+ dst[2] = vextq_u16(src[0], src[1], 2);
+}
+
+template <int offset>
+inline void Prepare5_8(const uint8x16_t src[2], uint8x16_t dst[5]) {
+ dst[0] = VshrU128<offset + 0>(src);
+ dst[1] = VshrU128<offset + 1>(src);
+ dst[2] = VshrU128<offset + 2>(src);
+ dst[3] = VshrU128<offset + 3>(src);
+ dst[4] = VshrU128<offset + 4>(src);
+}
+
+inline void Prepare5_16(const uint16x8_t src[2], uint16x8_t dst[5]) {
+ dst[0] = src[0];
+ dst[1] = vextq_u16(src[0], src[1], 1);
+ dst[2] = vextq_u16(src[0], src[1], 2);
+ dst[3] = vextq_u16(src[0], src[1], 3);
+ dst[4] = vextq_u16(src[0], src[1], 4);
+}
+
+inline void Prepare3_32(const uint32x4_t src[2], uint32x4_t dst[3]) {
+ dst[0] = src[0];
+ dst[1] = vextq_u32(src[0], src[1], 1);
+ dst[2] = vextq_u32(src[0], src[1], 2);
+}
+
+inline void Prepare5_32(const uint32x4_t src[2], uint32x4_t dst[5]) {
+ Prepare3_32(src, dst);
+ dst[3] = vextq_u32(src[0], src[1], 3);
+ dst[4] = src[1];
+}
+
+inline uint16x8_t Sum3WLo16(const uint8x16_t src[3]) {
+ const uint16x8_t sum = vaddl_u8(vget_low_u8(src[0]), vget_low_u8(src[1]));
+ return vaddw_u8(sum, vget_low_u8(src[2]));
+}
+
+inline uint16x8_t Sum3WHi16(const uint8x16_t src[3]) {
+ const uint16x8_t sum = vaddl_u8(vget_high_u8(src[0]), vget_high_u8(src[1]));
+ return vaddw_u8(sum, vget_high_u8(src[2]));
+}
+
+inline uint16x8_t Sum3_16(const uint16x8_t src0, const uint16x8_t src1,
+ const uint16x8_t src2) {
+ const uint16x8_t sum = vaddq_u16(src0, src1);
+ return vaddq_u16(sum, src2);
+}
+
+inline uint16x8_t Sum3_16(const uint16x8_t src[3]) {
+ return Sum3_16(src[0], src[1], src[2]);
+}
+
+inline uint32x4_t Sum3_32(const uint32x4_t src0, const uint32x4_t src1,
+ const uint32x4_t src2) {
+ const uint32x4_t sum = vaddq_u32(src0, src1);
+ return vaddq_u32(sum, src2);
+}
+
+inline uint32x4_t Sum3_32(const uint32x4_t src[3]) {
+ return Sum3_32(src[0], src[1], src[2]);
+}
+
+inline void Sum3_32(const uint32x4_t src[3][2], uint32x4_t dst[2]) {
+ dst[0] = Sum3_32(src[0][0], src[1][0], src[2][0]);
+ dst[1] = Sum3_32(src[0][1], src[1][1], src[2][1]);
+}
+
+inline uint16x8_t Sum5_16(const uint16x8_t src[5]) {
+ const uint16x8_t sum01 = vaddq_u16(src[0], src[1]);
+ const uint16x8_t sum23 = vaddq_u16(src[2], src[3]);
+ const uint16x8_t sum = vaddq_u16(sum01, sum23);
+ return vaddq_u16(sum, src[4]);
+}
+
+inline uint32x4_t Sum5_32(const uint32x4_t* src0, const uint32x4_t* src1,
+ const uint32x4_t* src2, const uint32x4_t* src3,
+ const uint32x4_t* src4) {
+ const uint32x4_t sum01 = vaddq_u32(*src0, *src1);
+ const uint32x4_t sum23 = vaddq_u32(*src2, *src3);
+ const uint32x4_t sum = vaddq_u32(sum01, sum23);
+ return vaddq_u32(sum, *src4);
+}
+
+inline uint32x4_t Sum5_32(const uint32x4_t src[5]) {
+ return Sum5_32(&src[0], &src[1], &src[2], &src[3], &src[4]);
+}
+
+inline void Sum5_32(const uint32x4_t src[5][2], uint32x4_t dst[2]) {
+ dst[0] = Sum5_32(&src[0][0], &src[1][0], &src[2][0], &src[3][0], &src[4][0]);
+ dst[1] = Sum5_32(&src[0][1], &src[1][1], &src[2][1], &src[3][1], &src[4][1]);
+}
+
+inline uint16x8_t Sum3Horizontal16(const uint16x8_t src[2]) {
+ uint16x8_t s[3];
+ Prepare3_16(src, s);
+ return Sum3_16(s);
+}
+
+inline void Sum3Horizontal32(const uint32x4_t src[3], uint32x4_t dst[2]) {
+ uint32x4_t s[3];
+ Prepare3_32(src + 0, s);
+ dst[0] = Sum3_32(s);
+ Prepare3_32(src + 1, s);
+ dst[1] = Sum3_32(s);
+}
+
+inline uint16x8_t Sum5Horizontal16(const uint16x8_t src[2]) {
+ uint16x8_t s[5];
+ Prepare5_16(src, s);
+ return Sum5_16(s);
+}
+
+inline void Sum5Horizontal32(const uint32x4_t src[3], uint32x4_t dst[2]) {
+ uint32x4_t s[5];
+ Prepare5_32(src + 0, s);
+ dst[0] = Sum5_32(s);
+ Prepare5_32(src + 1, s);
+ dst[1] = Sum5_32(s);
+}
+
+void SumHorizontal16(const uint16x8_t src[2], uint16x8_t* const row3,
+ uint16x8_t* const row5) {
+ uint16x8_t s[5];
+ Prepare5_16(src, s);
+ const uint16x8_t sum04 = vaddq_u16(s[0], s[4]);
+ *row3 = Sum3_16(s + 1);
+ *row5 = vaddq_u16(sum04, *row3);
+}
+
+inline void SumHorizontal16(const uint16x8_t src[3], uint16x8_t* const row3_0,
+ uint16x8_t* const row3_1, uint16x8_t* const row5_0,
+ uint16x8_t* const row5_1) {
+ SumHorizontal16(src + 0, row3_0, row5_0);
+ SumHorizontal16(src + 1, row3_1, row5_1);
+}
+
+void SumHorizontal32(const uint32x4_t src[5], uint32x4_t* const row_sq3,
+ uint32x4_t* const row_sq5) {
+ const uint32x4_t sum04 = vaddq_u32(src[0], src[4]);
+ *row_sq3 = Sum3_32(src + 1);
+ *row_sq5 = vaddq_u32(sum04, *row_sq3);
+}
+
+inline void SumHorizontal32(const uint32x4_t src[3],
+ uint32x4_t* const row_sq3_0,
+ uint32x4_t* const row_sq3_1,
+ uint32x4_t* const row_sq5_0,
+ uint32x4_t* const row_sq5_1) {
+ uint32x4_t s[5];
+ Prepare5_32(src + 0, s);
+ SumHorizontal32(s, row_sq3_0, row_sq5_0);
+ Prepare5_32(src + 1, s);
+ SumHorizontal32(s, row_sq3_1, row_sq5_1);
+}
+
+inline uint16x8_t Sum343Lo(const uint8x16_t ma3[3]) {
+ const uint16x8_t sum = Sum3WLo16(ma3);
+ const uint16x8_t sum3 = Sum3_16(sum, sum, sum);
+ return VaddwLo8(sum3, ma3[1]);
+}
+
+inline uint16x8_t Sum343Hi(const uint8x16_t ma3[3]) {
+ const uint16x8_t sum = Sum3WHi16(ma3);
+ const uint16x8_t sum3 = Sum3_16(sum, sum, sum);
+ return VaddwHi8(sum3, ma3[1]);
+}
+
+inline uint32x4_t Sum343(const uint32x4_t src[3]) {
+ const uint32x4_t sum = Sum3_32(src);
+ const uint32x4_t sum3 = Sum3_32(sum, sum, sum);
+ return vaddq_u32(sum3, src[1]);
+}
+
+inline void Sum343(const uint32x4_t src[3], uint32x4_t dst[2]) {
+ uint32x4_t s[3];
+ Prepare3_32(src + 0, s);
+ dst[0] = Sum343(s);
+ Prepare3_32(src + 1, s);
+ dst[1] = Sum343(s);
+}
+
+inline uint16x8_t Sum565Lo(const uint8x16_t src[3]) {
+ const uint16x8_t sum = Sum3WLo16(src);
+ const uint16x8_t sum4 = vshlq_n_u16(sum, 2);
+ const uint16x8_t sum5 = vaddq_u16(sum4, sum);
+ return VaddwLo8(sum5, src[1]);
+}
+
+inline uint16x8_t Sum565Hi(const uint8x16_t src[3]) {
+ const uint16x8_t sum = Sum3WHi16(src);
+ const uint16x8_t sum4 = vshlq_n_u16(sum, 2);
+ const uint16x8_t sum5 = vaddq_u16(sum4, sum);
+ return VaddwHi8(sum5, src[1]);
+}
+
+inline uint32x4_t Sum565(const uint32x4_t src[3]) {
+ const uint32x4_t sum = Sum3_32(src);
+ const uint32x4_t sum4 = vshlq_n_u32(sum, 2);
+ const uint32x4_t sum5 = vaddq_u32(sum4, sum);
+ return vaddq_u32(sum5, src[1]);
+}
+
+inline void Sum565(const uint32x4_t src[3], uint32x4_t dst[2]) {
+ uint32x4_t s[3];
+ Prepare3_32(src + 0, s);
+ dst[0] = Sum565(s);
+ Prepare3_32(src + 1, s);
+ dst[1] = Sum565(s);
+}
+
+inline void BoxSum(const uint16_t* src, const ptrdiff_t src_stride,
+ const ptrdiff_t width, const ptrdiff_t sum_stride,
+ const ptrdiff_t sum_width, uint16_t* sum3, uint16_t* sum5,
+ uint32_t* square_sum3, uint32_t* square_sum5) {
+ const ptrdiff_t overread_in_bytes =
+ kOverreadInBytesPass1 - sizeof(*src) * width;
+ int y = 2;
+ do {
+ uint16x8_t s[3];
+ uint32x4_t sq[6];
+ s[0] = Load1QMsanU16(src, overread_in_bytes);
+ Square(s[0], sq);
+ ptrdiff_t x = sum_width;
+ do {
+ uint16x8_t row3[2], row5[2];
+ uint32x4_t row_sq3[2], row_sq5[2];
+ s[1] = Load1QMsanU16(
+ src + 8, overread_in_bytes + sizeof(*src) * (sum_width - x + 8));
+ x -= 16;
+ src += 16;
+ s[2] = Load1QMsanU16(src,
+ overread_in_bytes + sizeof(*src) * (sum_width - x));
+ Square(s[1], sq + 2);
+ Square(s[2], sq + 4);
+ SumHorizontal16(s, &row3[0], &row3[1], &row5[0], &row5[1]);
+ StoreAligned32U16(sum3, row3);
+ StoreAligned32U16(sum5, row5);
+ SumHorizontal32(sq + 0, &row_sq3[0], &row_sq3[1], &row_sq5[0],
+ &row_sq5[1]);
+ StoreAligned32U32(square_sum3 + 0, row_sq3);
+ StoreAligned32U32(square_sum5 + 0, row_sq5);
+ SumHorizontal32(sq + 2, &row_sq3[0], &row_sq3[1], &row_sq5[0],
+ &row_sq5[1]);
+ StoreAligned32U32(square_sum3 + 8, row_sq3);
+ StoreAligned32U32(square_sum5 + 8, row_sq5);
+ s[0] = s[2];
+ sq[0] = sq[4];
+ sq[1] = sq[5];
+ sum3 += 16;
+ sum5 += 16;
+ square_sum3 += 16;
+ square_sum5 += 16;
+ } while (x != 0);
+ src += src_stride - sum_width;
+ sum3 += sum_stride - sum_width;
+ sum5 += sum_stride - sum_width;
+ square_sum3 += sum_stride - sum_width;
+ square_sum5 += sum_stride - sum_width;
+ } while (--y != 0);
+}
+
+template <int size>
+inline void BoxSum(const uint16_t* src, const ptrdiff_t src_stride,
+ const ptrdiff_t width, const ptrdiff_t sum_stride,
+ const ptrdiff_t sum_width, uint16_t* sums,
+ uint32_t* square_sums) {
+ static_assert(size == 3 || size == 5, "");
+ const ptrdiff_t overread_in_bytes =
+ ((size == 5) ? kOverreadInBytesPass1 : kOverreadInBytesPass2) -
+ sizeof(*src) * width;
+ int y = 2;
+ do {
+ uint16x8_t s[3];
+ uint32x4_t sq[6];
+ s[0] = Load1QMsanU16(src, overread_in_bytes);
+ Square(s[0], sq);
+ ptrdiff_t x = sum_width;
+ do {
+ uint16x8_t row[2];
+ uint32x4_t row_sq[4];
+ s[1] = Load1QMsanU16(
+ src + 8, overread_in_bytes + sizeof(*src) * (sum_width - x + 8));
+ x -= 16;
+ src += 16;
+ s[2] = Load1QMsanU16(src,
+ overread_in_bytes + sizeof(*src) * (sum_width - x));
+ Square(s[1], sq + 2);
+ Square(s[2], sq + 4);
+ if (size == 3) {
+ row[0] = Sum3Horizontal16(s + 0);
+ row[1] = Sum3Horizontal16(s + 1);
+ Sum3Horizontal32(sq + 0, row_sq + 0);
+ Sum3Horizontal32(sq + 2, row_sq + 2);
+ } else {
+ row[0] = Sum5Horizontal16(s + 0);
+ row[1] = Sum5Horizontal16(s + 1);
+ Sum5Horizontal32(sq + 0, row_sq + 0);
+ Sum5Horizontal32(sq + 2, row_sq + 2);
+ }
+ StoreAligned32U16(sums, row);
+ StoreAligned64U32(square_sums, row_sq);
+ s[0] = s[2];
+ sq[0] = sq[4];
+ sq[1] = sq[5];
+ sums += 16;
+ square_sums += 16;
+ } while (x != 0);
+ src += src_stride - sum_width;
+ sums += sum_stride - sum_width;
+ square_sums += sum_stride - sum_width;
+ } while (--y != 0);
+}
+
+template <int n>
+inline uint16x4_t CalculateMa(const uint16x4_t sum, const uint32x4_t sum_sq,
+ const uint32_t scale) {
+ // a = |sum_sq|
+ // d = |sum|
+ // p = (a * n < d * d) ? 0 : a * n - d * d;
+ const uint32x4_t dxd = vmull_u16(sum, sum);
+ const uint32x4_t axn = vmulq_n_u32(sum_sq, n);
+ // Ensure |p| does not underflow by using saturating subtraction.
+ const uint32x4_t p = vqsubq_u32(axn, dxd);
+ const uint32x4_t pxs = vmulq_n_u32(p, scale);
+ // vrshrn_n_u32() (narrowing shift) can only shift by 16 and kSgrProjScaleBits
+ // is 20.
+ const uint32x4_t shifted = vrshrq_n_u32(pxs, kSgrProjScaleBits);
+ return vmovn_u32(shifted);
+}
+
+template <int n>
+inline uint16x8_t CalculateMa(const uint16x8_t sum, const uint32x4_t sum_sq[2],
+ const uint32_t scale) {
+ static_assert(n == 9 || n == 25, "");
+ const uint16x8_t b = vrshrq_n_u16(sum, 2);
+ const uint16x4_t sum_lo = vget_low_u16(b);
+ const uint16x4_t sum_hi = vget_high_u16(b);
+ const uint16x4_t z0 =
+ CalculateMa<n>(sum_lo, vrshrq_n_u32(sum_sq[0], 4), scale);
+ const uint16x4_t z1 =
+ CalculateMa<n>(sum_hi, vrshrq_n_u32(sum_sq[1], 4), scale);
+ return vcombine_u16(z0, z1);
+}
+
+inline void CalculateB5(const uint16x8_t sum, const uint16x8_t ma,
+ uint32x4_t b[2]) {
+ // one_over_n == 164.
+ constexpr uint32_t one_over_n =
+ ((1 << kSgrProjReciprocalBits) + (25 >> 1)) / 25;
+ // one_over_n_quarter == 41.
+ constexpr uint32_t one_over_n_quarter = one_over_n >> 2;
+ static_assert(one_over_n == one_over_n_quarter << 2, "");
+ // |ma| is in range [0, 255].
+ const uint32x4_t m2 = VmullLo16(ma, sum);
+ const uint32x4_t m3 = VmullHi16(ma, sum);
+ const uint32x4_t m0 = vmulq_n_u32(m2, one_over_n_quarter);
+ const uint32x4_t m1 = vmulq_n_u32(m3, one_over_n_quarter);
+ b[0] = vrshrq_n_u32(m0, kSgrProjReciprocalBits - 2);
+ b[1] = vrshrq_n_u32(m1, kSgrProjReciprocalBits - 2);
+}
+
+inline void CalculateB3(const uint16x8_t sum, const uint16x8_t ma,
+ uint32x4_t b[2]) {
+ // one_over_n == 455.
+ constexpr uint32_t one_over_n =
+ ((1 << kSgrProjReciprocalBits) + (9 >> 1)) / 9;
+ const uint32x4_t m0 = VmullLo16(ma, sum);
+ const uint32x4_t m1 = VmullHi16(ma, sum);
+ const uint32x4_t m2 = vmulq_n_u32(m0, one_over_n);
+ const uint32x4_t m3 = vmulq_n_u32(m1, one_over_n);
+ b[0] = vrshrq_n_u32(m2, kSgrProjReciprocalBits);
+ b[1] = vrshrq_n_u32(m3, kSgrProjReciprocalBits);
+}
+
+inline void CalculateSumAndIndex3(const uint16x8_t s3[3],
+ const uint32x4_t sq3[3][2],
+ const uint32_t scale, uint16x8_t* const sum,
+ uint16x8_t* const index) {
+ uint32x4_t sum_sq[2];
+ *sum = Sum3_16(s3);
+ Sum3_32(sq3, sum_sq);
+ *index = CalculateMa<9>(*sum, sum_sq, scale);
+}
+
+inline void CalculateSumAndIndex5(const uint16x8_t s5[5],
+ const uint32x4_t sq5[5][2],
+ const uint32_t scale, uint16x8_t* const sum,
+ uint16x8_t* const index) {
+ uint32x4_t sum_sq[2];
+ *sum = Sum5_16(s5);
+ Sum5_32(sq5, sum_sq);
+ *index = CalculateMa<25>(*sum, sum_sq, scale);
+}
+
+template <int n, int offset>
+inline void LookupIntermediate(const uint16x8_t sum, const uint16x8_t index,
+ uint8x16_t* const ma, uint32x4_t b[2]) {
+ static_assert(n == 9 || n == 25, "");
+ static_assert(offset == 0 || offset == 8, "");
+
+ const uint8x8_t idx = vqmovn_u16(index);
+ uint8_t temp[8];
+ vst1_u8(temp, idx);
+ *ma = vsetq_lane_u8(kSgrMaLookup[temp[0]], *ma, offset + 0);
+ *ma = vsetq_lane_u8(kSgrMaLookup[temp[1]], *ma, offset + 1);
+ *ma = vsetq_lane_u8(kSgrMaLookup[temp[2]], *ma, offset + 2);
+ *ma = vsetq_lane_u8(kSgrMaLookup[temp[3]], *ma, offset + 3);
+ *ma = vsetq_lane_u8(kSgrMaLookup[temp[4]], *ma, offset + 4);
+ *ma = vsetq_lane_u8(kSgrMaLookup[temp[5]], *ma, offset + 5);
+ *ma = vsetq_lane_u8(kSgrMaLookup[temp[6]], *ma, offset + 6);
+ *ma = vsetq_lane_u8(kSgrMaLookup[temp[7]], *ma, offset + 7);
+ // b = ma * b * one_over_n
+ // |ma| = [0, 255]
+ // |sum| is a box sum with radius 1 or 2.
+ // For the first pass radius is 2. Maximum value is 5x5x255 = 6375.
+ // For the second pass radius is 1. Maximum value is 3x3x255 = 2295.
+ // |one_over_n| = ((1 << kSgrProjReciprocalBits) + (n >> 1)) / n
+ // When radius is 2 |n| is 25. |one_over_n| is 164.
+ // When radius is 1 |n| is 9. |one_over_n| is 455.
+ // |kSgrProjReciprocalBits| is 12.
+ // Radius 2: 255 * 6375 * 164 >> 12 = 65088 (16 bits).
+ // Radius 1: 255 * 2295 * 455 >> 12 = 65009 (16 bits).
+ const uint16x8_t maq =
+ vmovl_u8((offset == 0) ? vget_low_u8(*ma) : vget_high_u8(*ma));
+ if (n == 9) {
+ CalculateB3(sum, maq, b);
+ } else {
+ CalculateB5(sum, maq, b);
+ }
+}
+
+inline uint8x8_t AdjustValue(const uint8x8_t value, const uint8x8_t index,
+ const int threshold) {
+ const uint8x8_t thresholds = vdup_n_u8(threshold);
+ const uint8x8_t offset = vcgt_u8(index, thresholds);
+ // Adding 255 is equivalent to subtracting 1 for 8-bit data.
+ return vadd_u8(value, offset);
+}
+
+inline uint8x8_t MaLookupAndAdjust(const uint8x8x4_t table0,
+ const uint8x8x2_t table1,
+ const uint16x8_t index) {
+ const uint8x8_t idx = vqmovn_u16(index);
+ // All elements whose indices are out of range [0, 47] are set to 0.
+ uint8x8_t val = vtbl4_u8(table0, idx); // Range [0, 31].
+ // Subtract 8 to shuffle the next index range.
+ const uint8x8_t sub_idx = vsub_u8(idx, vdup_n_u8(32));
+ const uint8x8_t res = vtbl2_u8(table1, sub_idx); // Range [32, 47].
+ // Use OR instruction to combine shuffle results together.
+ val = vorr_u8(val, res);
+
+ // For elements whose indices are larger than 47, since they seldom change
+ // values with the increase of the index, we use comparison and arithmetic
+ // operations to calculate their values.
+ // Elements whose indices are larger than 47 (with value 0) are set to 5.
+ val = vmax_u8(val, vdup_n_u8(5));
+ val = AdjustValue(val, idx, 55); // 55 is the last index which value is 5.
+ val = AdjustValue(val, idx, 72); // 72 is the last index which value is 4.
+ val = AdjustValue(val, idx, 101); // 101 is the last index which value is 3.
+ val = AdjustValue(val, idx, 169); // 169 is the last index which value is 2.
+ val = AdjustValue(val, idx, 254); // 254 is the last index which value is 1.
+ return val;
+}
+
+inline void CalculateIntermediate(const uint16x8_t sum[2],
+ const uint16x8_t index[2],
+ uint8x16_t* const ma, uint32x4_t b0[2],
+ uint32x4_t b1[2]) {
+ // Use table lookup to read elements whose indices are less than 48.
+ // Using one uint8x8x4_t vector and one uint8x8x2_t vector is faster than
+ // using two uint8x8x3_t vectors.
+ uint8x8x4_t table0;
+ uint8x8x2_t table1;
+ table0.val[0] = vld1_u8(kSgrMaLookup + 0 * 8);
+ table0.val[1] = vld1_u8(kSgrMaLookup + 1 * 8);
+ table0.val[2] = vld1_u8(kSgrMaLookup + 2 * 8);
+ table0.val[3] = vld1_u8(kSgrMaLookup + 3 * 8);
+ table1.val[0] = vld1_u8(kSgrMaLookup + 4 * 8);
+ table1.val[1] = vld1_u8(kSgrMaLookup + 5 * 8);
+ const uint8x8_t ma_lo = MaLookupAndAdjust(table0, table1, index[0]);
+ const uint8x8_t ma_hi = MaLookupAndAdjust(table0, table1, index[1]);
+ *ma = vcombine_u8(ma_lo, ma_hi);
+ // b = ma * b * one_over_n
+ // |ma| = [0, 255]
+ // |sum| is a box sum with radius 1 or 2.
+ // For the first pass radius is 2. Maximum value is 5x5x255 = 6375.
+ // For the second pass radius is 1. Maximum value is 3x3x255 = 2295.
+ // |one_over_n| = ((1 << kSgrProjReciprocalBits) + (n >> 1)) / n
+ // When radius is 2 |n| is 25. |one_over_n| is 164.
+ // When radius is 1 |n| is 9. |one_over_n| is 455.
+ // |kSgrProjReciprocalBits| is 12.
+ // Radius 2: 255 * 6375 * 164 >> 12 = 65088 (16 bits).
+ // Radius 1: 255 * 2295 * 455 >> 12 = 65009 (16 bits).
+ const uint16x8_t maq0 = vmovl_u8(vget_low_u8(*ma));
+ CalculateB3(sum[0], maq0, b0);
+ const uint16x8_t maq1 = vmovl_u8(vget_high_u8(*ma));
+ CalculateB3(sum[1], maq1, b1);
+}
+
+inline void CalculateIntermediate(const uint16x8_t sum[2],
+ const uint16x8_t index[2], uint8x16_t ma[2],
+ uint32x4_t b[4]) {
+ uint8x16_t mas;
+ CalculateIntermediate(sum, index, &mas, b + 0, b + 2);
+ ma[0] = vcombine_u8(vget_low_u8(ma[0]), vget_low_u8(mas));
+ ma[1] = vextq_u8(mas, vdupq_n_u8(0), 8);
+}
+
+template <int offset>
+inline void CalculateIntermediate5(const uint16x8_t s5[5],
+ const uint32x4_t sq5[5][2],
+ const uint32_t scale, uint8x16_t* const ma,
+ uint32x4_t b[2]) {
+ static_assert(offset == 0 || offset == 8, "");
+ uint16x8_t sum, index;
+ CalculateSumAndIndex5(s5, sq5, scale, &sum, &index);
+ LookupIntermediate<25, offset>(sum, index, ma, b);
+}
+
+inline void CalculateIntermediate3(const uint16x8_t s3[3],
+ const uint32x4_t sq3[3][2],
+ const uint32_t scale, uint8x16_t* const ma,
+ uint32x4_t b[2]) {
+ uint16x8_t sum, index;
+ CalculateSumAndIndex3(s3, sq3, scale, &sum, &index);
+ LookupIntermediate<9, 0>(sum, index, ma, b);
+}
+
+inline void Store343_444(const uint32x4_t b3[3], const ptrdiff_t x,
+ uint32x4_t sum_b343[2], uint32x4_t sum_b444[2],
+ uint32_t* const b343, uint32_t* const b444) {
+ uint32x4_t b[3], sum_b111[2];
+ Prepare3_32(b3 + 0, b);
+ sum_b111[0] = Sum3_32(b);
+ sum_b444[0] = vshlq_n_u32(sum_b111[0], 2);
+ sum_b343[0] = vsubq_u32(sum_b444[0], sum_b111[0]);
+ sum_b343[0] = vaddq_u32(sum_b343[0], b[1]);
+ Prepare3_32(b3 + 1, b);
+ sum_b111[1] = Sum3_32(b);
+ sum_b444[1] = vshlq_n_u32(sum_b111[1], 2);
+ sum_b343[1] = vsubq_u32(sum_b444[1], sum_b111[1]);
+ sum_b343[1] = vaddq_u32(sum_b343[1], b[1]);
+ StoreAligned32U32(b444 + x, sum_b444);
+ StoreAligned32U32(b343 + x, sum_b343);
+}
+
+inline void Store343_444Lo(const uint8x16_t ma3[3], const uint32x4_t b3[3],
+ const ptrdiff_t x, uint16x8_t* const sum_ma343,
+ uint16x8_t* const sum_ma444, uint32x4_t sum_b343[2],
+ uint32x4_t sum_b444[2], uint16_t* const ma343,
+ uint16_t* const ma444, uint32_t* const b343,
+ uint32_t* const b444) {
+ const uint16x8_t sum_ma111 = Sum3WLo16(ma3);
+ *sum_ma444 = vshlq_n_u16(sum_ma111, 2);
+ vst1q_u16(ma444 + x, *sum_ma444);
+ const uint16x8_t sum333 = vsubq_u16(*sum_ma444, sum_ma111);
+ *sum_ma343 = VaddwLo8(sum333, ma3[1]);
+ vst1q_u16(ma343 + x, *sum_ma343);
+ Store343_444(b3, x, sum_b343, sum_b444, b343, b444);
+}
+
+inline void Store343_444Hi(const uint8x16_t ma3[3], const uint32x4_t b3[2],
+ const ptrdiff_t x, uint16x8_t* const sum_ma343,
+ uint16x8_t* const sum_ma444, uint32x4_t sum_b343[2],
+ uint32x4_t sum_b444[2], uint16_t* const ma343,
+ uint16_t* const ma444, uint32_t* const b343,
+ uint32_t* const b444) {
+ const uint16x8_t sum_ma111 = Sum3WHi16(ma3);
+ *sum_ma444 = vshlq_n_u16(sum_ma111, 2);
+ vst1q_u16(ma444 + x, *sum_ma444);
+ const uint16x8_t sum333 = vsubq_u16(*sum_ma444, sum_ma111);
+ *sum_ma343 = VaddwHi8(sum333, ma3[1]);
+ vst1q_u16(ma343 + x, *sum_ma343);
+ Store343_444(b3, x, sum_b343, sum_b444, b343, b444);
+}
+
+inline void Store343_444Lo(const uint8x16_t ma3[3], const uint32x4_t b3[2],
+ const ptrdiff_t x, uint16x8_t* const sum_ma343,
+ uint32x4_t sum_b343[2], uint16_t* const ma343,
+ uint16_t* const ma444, uint32_t* const b343,
+ uint32_t* const b444) {
+ uint16x8_t sum_ma444;
+ uint32x4_t sum_b444[2];
+ Store343_444Lo(ma3, b3, x, sum_ma343, &sum_ma444, sum_b343, sum_b444, ma343,
+ ma444, b343, b444);
+}
+
+inline void Store343_444Hi(const uint8x16_t ma3[3], const uint32x4_t b3[2],
+ const ptrdiff_t x, uint16x8_t* const sum_ma343,
+ uint32x4_t sum_b343[2], uint16_t* const ma343,
+ uint16_t* const ma444, uint32_t* const b343,
+ uint32_t* const b444) {
+ uint16x8_t sum_ma444;
+ uint32x4_t sum_b444[2];
+ Store343_444Hi(ma3, b3, x, sum_ma343, &sum_ma444, sum_b343, sum_b444, ma343,
+ ma444, b343, b444);
+}
+
+inline void Store343_444Lo(const uint8x16_t ma3[3], const uint32x4_t b3[2],
+ const ptrdiff_t x, uint16_t* const ma343,
+ uint16_t* const ma444, uint32_t* const b343,
+ uint32_t* const b444) {
+ uint16x8_t sum_ma343;
+ uint32x4_t sum_b343[2];
+ Store343_444Lo(ma3, b3, x, &sum_ma343, sum_b343, ma343, ma444, b343, b444);
+}
+
+inline void Store343_444Hi(const uint8x16_t ma3[3], const uint32x4_t b3[2],
+ const ptrdiff_t x, uint16_t* const ma343,
+ uint16_t* const ma444, uint32_t* const b343,
+ uint32_t* const b444) {
+ uint16x8_t sum_ma343;
+ uint32x4_t sum_b343[2];
+ Store343_444Hi(ma3, b3, x, &sum_ma343, sum_b343, ma343, ma444, b343, b444);
+}
+
+LIBGAV1_ALWAYS_INLINE void BoxFilterPreProcess5Lo(
+ const uint16x8_t s[2][4], const uint32_t scale, uint16_t* const sum5[5],
+ uint32_t* const square_sum5[5], uint32x4_t sq[2][8], uint8x16_t* const ma,
+ uint32x4_t b[2]) {
+ uint16x8_t s5[2][5];
+ uint32x4_t sq5[5][2];
+ Square(s[0][1], sq[0] + 2);
+ Square(s[1][1], sq[1] + 2);
+ s5[0][3] = Sum5Horizontal16(s[0]);
+ vst1q_u16(sum5[3], s5[0][3]);
+ s5[0][4] = Sum5Horizontal16(s[1]);
+ vst1q_u16(sum5[4], s5[0][4]);
+ Sum5Horizontal32(sq[0], sq5[3]);
+ StoreAligned32U32(square_sum5[3], sq5[3]);
+ Sum5Horizontal32(sq[1], sq5[4]);
+ StoreAligned32U32(square_sum5[4], sq5[4]);
+ LoadAligned16x3U16(sum5, 0, s5[0]);
+ LoadAligned32x3U32(square_sum5, 0, sq5);
+ CalculateIntermediate5<0>(s5[0], sq5, scale, ma, b);
+}
+
+LIBGAV1_ALWAYS_INLINE void BoxFilterPreProcess5(
+ const uint16x8_t s[2][4], const ptrdiff_t sum_width, const ptrdiff_t x,
+ const uint32_t scale, uint16_t* const sum5[5],
+ uint32_t* const square_sum5[5], uint32x4_t sq[2][8], uint8x16_t ma[2],
+ uint32x4_t b[6]) {
+ uint16x8_t s5[2][5];
+ uint32x4_t sq5[5][2];
+ Square(s[0][2], sq[0] + 4);
+ Square(s[1][2], sq[1] + 4);
+ s5[0][3] = Sum5Horizontal16(s[0] + 1);
+ s5[1][3] = Sum5Horizontal16(s[0] + 2);
+ vst1q_u16(sum5[3] + x + 0, s5[0][3]);
+ vst1q_u16(sum5[3] + x + 8, s5[1][3]);
+ s5[0][4] = Sum5Horizontal16(s[1] + 1);
+ s5[1][4] = Sum5Horizontal16(s[1] + 2);
+ vst1q_u16(sum5[4] + x + 0, s5[0][4]);
+ vst1q_u16(sum5[4] + x + 8, s5[1][4]);
+ Sum5Horizontal32(sq[0] + 2, sq5[3]);
+ StoreAligned32U32(square_sum5[3] + x, sq5[3]);
+ Sum5Horizontal32(sq[1] + 2, sq5[4]);
+ StoreAligned32U32(square_sum5[4] + x, sq5[4]);
+ LoadAligned16x3U16(sum5, x, s5[0]);
+ LoadAligned32x3U32(square_sum5, x, sq5);
+ CalculateIntermediate5<8>(s5[0], sq5, scale, &ma[0], b + 2);
+
+ Square(s[0][3], sq[0] + 6);
+ Square(s[1][3], sq[1] + 6);
+ Sum5Horizontal32(sq[0] + 4, sq5[3]);
+ StoreAligned32U32(square_sum5[3] + x + 8, sq5[3]);
+ Sum5Horizontal32(sq[1] + 4, sq5[4]);
+ StoreAligned32U32(square_sum5[4] + x + 8, sq5[4]);
+ LoadAligned16x3U16Msan(sum5, x + 8, sum_width, s5[1]);
+ LoadAligned32x3U32Msan(square_sum5, x + 8, sum_width, sq5);
+ CalculateIntermediate5<0>(s5[1], sq5, scale, &ma[1], b + 4);
+}
+
+LIBGAV1_ALWAYS_INLINE void BoxFilterPreProcess5LastRowLo(
+ const uint16x8_t s[2], const uint32_t scale, const uint16_t* const sum5[5],
+ const uint32_t* const square_sum5[5], uint32x4_t sq[4],
+ uint8x16_t* const ma, uint32x4_t b[2]) {
+ uint16x8_t s5[5];
+ uint32x4_t sq5[5][2];
+ Square(s[1], sq + 2);
+ s5[3] = s5[4] = Sum5Horizontal16(s);
+ Sum5Horizontal32(sq, sq5[3]);
+ sq5[4][0] = sq5[3][0];
+ sq5[4][1] = sq5[3][1];
+ LoadAligned16x3U16(sum5, 0, s5);
+ LoadAligned32x3U32(square_sum5, 0, sq5);
+ CalculateIntermediate5<0>(s5, sq5, scale, ma, b);
+}
+
+LIBGAV1_ALWAYS_INLINE void BoxFilterPreProcess5LastRow(
+ const uint16x8_t s[4], const ptrdiff_t sum_width, const ptrdiff_t x,
+ const uint32_t scale, const uint16_t* const sum5[5],
+ const uint32_t* const square_sum5[5], uint32x4_t sq[8], uint8x16_t ma[2],
+ uint32x4_t b[6]) {
+ uint16x8_t s5[2][5];
+ uint32x4_t sq5[5][2];
+ Square(s[2], sq + 4);
+ s5[0][3] = Sum5Horizontal16(s + 1);
+ s5[1][3] = Sum5Horizontal16(s + 2);
+ s5[0][4] = s5[0][3];
+ s5[1][4] = s5[1][3];
+ Sum5Horizontal32(sq + 2, sq5[3]);
+ sq5[4][0] = sq5[3][0];
+ sq5[4][1] = sq5[3][1];
+ LoadAligned16x3U16(sum5, x, s5[0]);
+ LoadAligned32x3U32(square_sum5, x, sq5);
+ CalculateIntermediate5<8>(s5[0], sq5, scale, &ma[0], b + 2);
+
+ Square(s[3], sq + 6);
+ Sum5Horizontal32(sq + 4, sq5[3]);
+ sq5[4][0] = sq5[3][0];
+ sq5[4][1] = sq5[3][1];
+ LoadAligned16x3U16Msan(sum5, x + 8, sum_width, s5[1]);
+ LoadAligned32x3U32Msan(square_sum5, x + 8, sum_width, sq5);
+ CalculateIntermediate5<0>(s5[1], sq5, scale, &ma[1], b + 4);
+}
+
+LIBGAV1_ALWAYS_INLINE void BoxFilterPreProcess3Lo(
+ const uint16x8_t s[2], const uint32_t scale, uint16_t* const sum3[3],
+ uint32_t* const square_sum3[3], uint32x4_t sq[4], uint8x16_t* const ma,
+ uint32x4_t b[2]) {
+ uint16x8_t s3[3];
+ uint32x4_t sq3[3][2];
+ Square(s[1], sq + 2);
+ s3[2] = Sum3Horizontal16(s);
+ vst1q_u16(sum3[2], s3[2]);
+ Sum3Horizontal32(sq, sq3[2]);
+ StoreAligned32U32(square_sum3[2], sq3[2]);
+ LoadAligned16x2U16(sum3, 0, s3);
+ LoadAligned32x2U32(square_sum3, 0, sq3);
+ CalculateIntermediate3(s3, sq3, scale, ma, b);
+}
+
+LIBGAV1_ALWAYS_INLINE void BoxFilterPreProcess3(
+ const uint16x8_t s[4], const ptrdiff_t x, const ptrdiff_t sum_width,
+ const uint32_t scale, uint16_t* const sum3[3],
+ uint32_t* const square_sum3[3], uint32x4_t sq[8], uint8x16_t ma[2],
+ uint32x4_t b[6]) {
+ uint16x8_t s3[4], sum[2], index[2];
+ uint32x4_t sq3[3][2];
+
+ Square(s[2], sq + 4);
+ s3[2] = Sum3Horizontal16(s + 1);
+ s3[3] = Sum3Horizontal16(s + 2);
+ StoreAligned32U16(sum3[2] + x, s3 + 2);
+ Sum3Horizontal32(sq + 2, sq3[2]);
+ StoreAligned32U32(square_sum3[2] + x + 0, sq3[2]);
+ LoadAligned16x2U16(sum3, x, s3);
+ LoadAligned32x2U32(square_sum3, x, sq3);
+ CalculateSumAndIndex3(s3, sq3, scale, &sum[0], &index[0]);
+
+ Square(s[3], sq + 6);
+ Sum3Horizontal32(sq + 4, sq3[2]);
+ StoreAligned32U32(square_sum3[2] + x + 8, sq3[2]);
+ LoadAligned16x2U16Msan(sum3, x + 8, sum_width, s3 + 1);
+ LoadAligned32x2U32Msan(square_sum3, x + 8, sum_width, sq3);
+ CalculateSumAndIndex3(s3 + 1, sq3, scale, &sum[1], &index[1]);
+ CalculateIntermediate(sum, index, ma, b + 2);
+}
+
+LIBGAV1_ALWAYS_INLINE void BoxFilterPreProcessLo(
+ const uint16x8_t s[2][4], const uint16_t scales[2], uint16_t* const sum3[4],
+ uint16_t* const sum5[5], uint32_t* const square_sum3[4],
+ uint32_t* const square_sum5[5], uint32x4_t sq[2][8], uint8x16_t ma3[2][2],
+ uint32x4_t b3[2][6], uint8x16_t* const ma5, uint32x4_t b5[2]) {
+ uint16x8_t s3[4], s5[5], sum[2], index[2];
+ uint32x4_t sq3[4][2], sq5[5][2];
+
+ Square(s[0][1], sq[0] + 2);
+ Square(s[1][1], sq[1] + 2);
+ SumHorizontal16(s[0], &s3[2], &s5[3]);
+ SumHorizontal16(s[1], &s3[3], &s5[4]);
+ vst1q_u16(sum3[2], s3[2]);
+ vst1q_u16(sum3[3], s3[3]);
+ vst1q_u16(sum5[3], s5[3]);
+ vst1q_u16(sum5[4], s5[4]);
+ SumHorizontal32(sq[0], &sq3[2][0], &sq3[2][1], &sq5[3][0], &sq5[3][1]);
+ StoreAligned32U32(square_sum3[2], sq3[2]);
+ StoreAligned32U32(square_sum5[3], sq5[3]);
+ SumHorizontal32(sq[1], &sq3[3][0], &sq3[3][1], &sq5[4][0], &sq5[4][1]);
+ StoreAligned32U32(square_sum3[3], sq3[3]);
+ StoreAligned32U32(square_sum5[4], sq5[4]);
+ LoadAligned16x2U16(sum3, 0, s3);
+ LoadAligned32x2U32(square_sum3, 0, sq3);
+ LoadAligned16x3U16(sum5, 0, s5);
+ LoadAligned32x3U32(square_sum5, 0, sq5);
+ CalculateSumAndIndex3(s3 + 0, sq3 + 0, scales[1], &sum[0], &index[0]);
+ CalculateSumAndIndex3(s3 + 1, sq3 + 1, scales[1], &sum[1], &index[1]);
+ CalculateIntermediate(sum, index, &ma3[0][0], b3[0], b3[1]);
+ ma3[1][0] = vextq_u8(ma3[0][0], vdupq_n_u8(0), 8);
+ CalculateIntermediate5<0>(s5, sq5, scales[0], ma5, b5);
+}
+
+LIBGAV1_ALWAYS_INLINE void BoxFilterPreProcess(
+ const uint16x8_t s[2][4], const ptrdiff_t x, const uint16_t scales[2],
+ uint16_t* const sum3[4], uint16_t* const sum5[5],
+ uint32_t* const square_sum3[4], uint32_t* const square_sum5[5],
+ const ptrdiff_t sum_width, uint32x4_t sq[2][8], uint8x16_t ma3[2][2],
+ uint32x4_t b3[2][6], uint8x16_t ma5[2], uint32x4_t b5[6]) {
+ uint16x8_t s3[2][4], s5[2][5], sum[2][2], index[2][2];
+ uint32x4_t sq3[4][2], sq5[5][2];
+
+ SumHorizontal16(s[0] + 1, &s3[0][2], &s3[1][2], &s5[0][3], &s5[1][3]);
+ vst1q_u16(sum3[2] + x + 0, s3[0][2]);
+ vst1q_u16(sum3[2] + x + 8, s3[1][2]);
+ vst1q_u16(sum5[3] + x + 0, s5[0][3]);
+ vst1q_u16(sum5[3] + x + 8, s5[1][3]);
+ SumHorizontal16(s[1] + 1, &s3[0][3], &s3[1][3], &s5[0][4], &s5[1][4]);
+ vst1q_u16(sum3[3] + x + 0, s3[0][3]);
+ vst1q_u16(sum3[3] + x + 8, s3[1][3]);
+ vst1q_u16(sum5[4] + x + 0, s5[0][4]);
+ vst1q_u16(sum5[4] + x + 8, s5[1][4]);
+ Square(s[0][2], sq[0] + 4);
+ Square(s[1][2], sq[1] + 4);
+ SumHorizontal32(sq[0] + 2, &sq3[2][0], &sq3[2][1], &sq5[3][0], &sq5[3][1]);
+ StoreAligned32U32(square_sum3[2] + x, sq3[2]);
+ StoreAligned32U32(square_sum5[3] + x, sq5[3]);
+ SumHorizontal32(sq[1] + 2, &sq3[3][0], &sq3[3][1], &sq5[4][0], &sq5[4][1]);
+ StoreAligned32U32(square_sum3[3] + x, sq3[3]);
+ StoreAligned32U32(square_sum5[4] + x, sq5[4]);
+ LoadAligned16x2U16(sum3, x, s3[0]);
+ LoadAligned32x2U32(square_sum3, x, sq3);
+ CalculateSumAndIndex3(s3[0], sq3, scales[1], &sum[0][0], &index[0][0]);
+ CalculateSumAndIndex3(s3[0] + 1, sq3 + 1, scales[1], &sum[1][0],
+ &index[1][0]);
+ LoadAligned16x3U16(sum5, x, s5[0]);
+ LoadAligned32x3U32(square_sum5, x, sq5);
+ CalculateIntermediate5<8>(s5[0], sq5, scales[0], &ma5[0], b5 + 2);
+
+ Square(s[0][3], sq[0] + 6);
+ Square(s[1][3], sq[1] + 6);
+ SumHorizontal32(sq[0] + 4, &sq3[2][0], &sq3[2][1], &sq5[3][0], &sq5[3][1]);
+ StoreAligned32U32(square_sum3[2] + x + 8, sq3[2]);
+ StoreAligned32U32(square_sum5[3] + x + 8, sq5[3]);
+ SumHorizontal32(sq[1] + 4, &sq3[3][0], &sq3[3][1], &sq5[4][0], &sq5[4][1]);
+ StoreAligned32U32(square_sum3[3] + x + 8, sq3[3]);
+ StoreAligned32U32(square_sum5[4] + x + 8, sq5[4]);
+ LoadAligned16x2U16Msan(sum3, x + 8, sum_width, s3[1]);
+ LoadAligned32x2U32Msan(square_sum3, x + 8, sum_width, sq3);
+ CalculateSumAndIndex3(s3[1], sq3, scales[1], &sum[0][1], &index[0][1]);
+ CalculateSumAndIndex3(s3[1] + 1, sq3 + 1, scales[1], &sum[1][1],
+ &index[1][1]);
+ CalculateIntermediate(sum[0], index[0], ma3[0], b3[0] + 2);
+ CalculateIntermediate(sum[1], index[1], ma3[1], b3[1] + 2);
+ LoadAligned16x3U16Msan(sum5, x + 8, sum_width, s5[1]);
+ LoadAligned32x3U32Msan(square_sum5, x + 8, sum_width, sq5);
+ CalculateIntermediate5<0>(s5[1], sq5, scales[0], &ma5[1], b5 + 4);
+}
+
+LIBGAV1_ALWAYS_INLINE void BoxFilterPreProcessLastRowLo(
+ const uint16x8_t s[2], const uint16_t scales[2],
+ const uint16_t* const sum3[4], const uint16_t* const sum5[5],
+ const uint32_t* const square_sum3[4], const uint32_t* const square_sum5[5],
+ uint32x4_t sq[4], uint8x16_t* const ma3, uint8x16_t* const ma5,
+ uint32x4_t b3[2], uint32x4_t b5[2]) {
+ uint16x8_t s3[3], s5[5];
+ uint32x4_t sq3[3][2], sq5[5][2];
+
+ Square(s[1], sq + 2);
+ SumHorizontal16(s, &s3[2], &s5[3]);
+ SumHorizontal32(sq, &sq3[2][0], &sq3[2][1], &sq5[3][0], &sq5[3][1]);
+ LoadAligned16x3U16(sum5, 0, s5);
+ s5[4] = s5[3];
+ LoadAligned32x3U32(square_sum5, 0, sq5);
+ sq5[4][0] = sq5[3][0];
+ sq5[4][1] = sq5[3][1];
+ CalculateIntermediate5<0>(s5, sq5, scales[0], ma5, b5);
+ LoadAligned16x2U16(sum3, 0, s3);
+ LoadAligned32x2U32(square_sum3, 0, sq3);
+ CalculateIntermediate3(s3, sq3, scales[1], ma3, b3);
+}
+
+LIBGAV1_ALWAYS_INLINE void BoxFilterPreProcessLastRow(
+ const uint16x8_t s[4], const ptrdiff_t sum_width, const ptrdiff_t x,
+ const uint16_t scales[2], const uint16_t* const sum3[4],
+ const uint16_t* const sum5[5], const uint32_t* const square_sum3[4],
+ const uint32_t* const square_sum5[5], uint32x4_t sq[8], uint8x16_t ma3[2],
+ uint8x16_t ma5[2], uint32x4_t b3[6], uint32x4_t b5[6]) {
+ uint16x8_t s3[2][3], s5[2][5], sum[2], index[2];
+ uint32x4_t sq3[3][2], sq5[5][2];
+
+ Square(s[2], sq + 4);
+ SumHorizontal16(s + 1, &s3[0][2], &s3[1][2], &s5[0][3], &s5[1][3]);
+ SumHorizontal32(sq + 2, &sq3[2][0], &sq3[2][1], &sq5[3][0], &sq5[3][1]);
+ LoadAligned16x3U16(sum5, x, s5[0]);
+ s5[0][4] = s5[0][3];
+ LoadAligned32x3U32(square_sum5, x, sq5);
+ sq5[4][0] = sq5[3][0];
+ sq5[4][1] = sq5[3][1];
+ CalculateIntermediate5<8>(s5[0], sq5, scales[0], ma5, b5 + 2);
+ LoadAligned16x2U16(sum3, x, s3[0]);
+ LoadAligned32x2U32(square_sum3, x, sq3);
+ CalculateSumAndIndex3(s3[0], sq3, scales[1], &sum[0], &index[0]);
+
+ Square(s[3], sq + 6);
+ SumHorizontal32(sq + 4, &sq3[2][0], &sq3[2][1], &sq5[3][0], &sq5[3][1]);
+ LoadAligned16x3U16Msan(sum5, x + 8, sum_width, s5[1]);
+ s5[1][4] = s5[1][3];
+ LoadAligned32x3U32Msan(square_sum5, x + 8, sum_width, sq5);
+ sq5[4][0] = sq5[3][0];
+ sq5[4][1] = sq5[3][1];
+ CalculateIntermediate5<0>(s5[1], sq5, scales[0], ma5 + 1, b5 + 4);
+ LoadAligned16x2U16Msan(sum3, x + 8, sum_width, s3[1]);
+ LoadAligned32x2U32Msan(square_sum3, x + 8, sum_width, sq3);
+ CalculateSumAndIndex3(s3[1], sq3, scales[1], &sum[1], &index[1]);
+ CalculateIntermediate(sum, index, ma3, b3 + 2);
+}
+
+inline void BoxSumFilterPreProcess5(const uint16_t* const src0,
+ const uint16_t* const src1, const int width,
+ const uint32_t scale,
+ uint16_t* const sum5[5],
+ uint32_t* const square_sum5[5],
+ const ptrdiff_t sum_width, uint16_t* ma565,
+ uint32_t* b565) {
+ const ptrdiff_t overread_in_bytes =
+ kOverreadInBytesPass1 - sizeof(*src0) * width;
+ uint16x8_t s[2][4];
+ uint8x16_t mas[2];
+ uint32x4_t sq[2][8], bs[6];
+
+ s[0][0] = Load1QMsanU16(src0 + 0, overread_in_bytes + 0);
+ s[0][1] = Load1QMsanU16(src0 + 8, overread_in_bytes + 16);
+ s[1][0] = Load1QMsanU16(src1 + 0, overread_in_bytes + 0);
+ s[1][1] = Load1QMsanU16(src1 + 8, overread_in_bytes + 16);
+ Square(s[0][0], sq[0]);
+ Square(s[1][0], sq[1]);
+ BoxFilterPreProcess5Lo(s, scale, sum5, square_sum5, sq, &mas[0], bs);
+
+ int x = 0;
+ do {
+ uint8x16_t ma5[3];
+ uint16x8_t ma[2];
+ uint32x4_t b[4];
+
+ s[0][2] = Load1QMsanU16(src0 + x + 16,
+ overread_in_bytes + sizeof(*src0) * (x + 16));
+ s[0][3] = Load1QMsanU16(src0 + x + 24,
+ overread_in_bytes + sizeof(*src0) * (x + 24));
+ s[1][2] = Load1QMsanU16(src1 + x + 16,
+ overread_in_bytes + sizeof(*src1) * (x + 16));
+ s[1][3] = Load1QMsanU16(src1 + x + 24,
+ overread_in_bytes + sizeof(*src1) * (x + 24));
+
+ BoxFilterPreProcess5(s, sum_width, x + 8, scale, sum5, square_sum5, sq, mas,
+ bs);
+ Prepare3_8<0>(mas, ma5);
+ ma[0] = Sum565Lo(ma5);
+ ma[1] = Sum565Hi(ma5);
+ StoreAligned32U16(ma565, ma);
+ Sum565(bs + 0, b + 0);
+ Sum565(bs + 2, b + 2);
+ StoreAligned64U32(b565, b);
+ s[0][0] = s[0][2];
+ s[0][1] = s[0][3];
+ s[1][0] = s[1][2];
+ s[1][1] = s[1][3];
+ sq[0][2] = sq[0][6];
+ sq[0][3] = sq[0][7];
+ sq[1][2] = sq[1][6];
+ sq[1][3] = sq[1][7];
+ mas[0] = mas[1];
+ bs[0] = bs[4];
+ bs[1] = bs[5];
+ ma565 += 16;
+ b565 += 16;
+ x += 16;
+ } while (x < width);
+}
+
+template <bool calculate444>
+LIBGAV1_ALWAYS_INLINE void BoxSumFilterPreProcess3(
+ const uint16_t* const src, const int width, const uint32_t scale,
+ uint16_t* const sum3[3], uint32_t* const square_sum3[3],
+ const ptrdiff_t sum_width, uint16_t* ma343, uint16_t* ma444, uint32_t* b343,
+ uint32_t* b444) {
+ const ptrdiff_t overread_in_bytes =
+ kOverreadInBytesPass2 - sizeof(*src) * width;
+ uint16x8_t s[4];
+ uint8x16_t mas[2];
+ uint32x4_t sq[8], bs[6];
+
+ s[0] = Load1QMsanU16(src + 0, overread_in_bytes + 0);
+ s[1] = Load1QMsanU16(src + 8, overread_in_bytes + 16);
+ Square(s[0], sq);
+ // Quiet "may be used uninitialized" warning.
+ mas[0] = mas[1] = vdupq_n_u8(0);
+ BoxFilterPreProcess3Lo(s, scale, sum3, square_sum3, sq, &mas[0], bs);
+
+ int x = 0;
+ do {
+ s[2] = Load1QMsanU16(src + x + 16,
+ overread_in_bytes + sizeof(*src) * (x + 16));
+ s[3] = Load1QMsanU16(src + x + 24,
+ overread_in_bytes + sizeof(*src) * (x + 24));
+ BoxFilterPreProcess3(s, x + 8, sum_width, scale, sum3, square_sum3, sq, mas,
+ bs);
+ uint8x16_t ma3[3];
+ Prepare3_8<0>(mas, ma3);
+ if (calculate444) { // NOLINT(readability-simplify-boolean-expr)
+ Store343_444Lo(ma3, bs + 0, 0, ma343, ma444, b343, b444);
+ Store343_444Hi(ma3, bs + 2, 8, ma343, ma444, b343, b444);
+ ma444 += 16;
+ b444 += 16;
+ } else {
+ uint16x8_t ma[2];
+ uint32x4_t b[4];
+ ma[0] = Sum343Lo(ma3);
+ ma[1] = Sum343Hi(ma3);
+ StoreAligned32U16(ma343, ma);
+ Sum343(bs + 0, b + 0);
+ Sum343(bs + 2, b + 2);
+ StoreAligned64U32(b343, b);
+ }
+ s[1] = s[3];
+ sq[2] = sq[6];
+ sq[3] = sq[7];
+ mas[0] = mas[1];
+ bs[0] = bs[4];
+ bs[1] = bs[5];
+ ma343 += 16;
+ b343 += 16;
+ x += 16;
+ } while (x < width);
+}
+
+inline void BoxSumFilterPreProcess(
+ const uint16_t* const src0, const uint16_t* const src1, const int width,
+ const uint16_t scales[2], uint16_t* const sum3[4], uint16_t* const sum5[5],
+ uint32_t* const square_sum3[4], uint32_t* const square_sum5[5],
+ const ptrdiff_t sum_width, uint16_t* const ma343[4], uint16_t* const ma444,
+ uint16_t* ma565, uint32_t* const b343[4], uint32_t* const b444,
+ uint32_t* b565) {
+ const ptrdiff_t overread_in_bytes =
+ kOverreadInBytesPass1 - sizeof(*src0) * width;
+ uint16x8_t s[2][4];
+ uint8x16_t ma3[2][2], ma5[2];
+ uint32x4_t sq[2][8], b3[2][6], b5[6];
+
+ s[0][0] = Load1QMsanU16(src0 + 0, overread_in_bytes + 0);
+ s[0][1] = Load1QMsanU16(src0 + 8, overread_in_bytes + 16);
+ s[1][0] = Load1QMsanU16(src1 + 0, overread_in_bytes + 0);
+ s[1][1] = Load1QMsanU16(src1 + 8, overread_in_bytes + 16);
+ Square(s[0][0], sq[0]);
+ Square(s[1][0], sq[1]);
+ BoxFilterPreProcessLo(s, scales, sum3, sum5, square_sum3, square_sum5, sq,
+ ma3, b3, &ma5[0], b5);
+
+ int x = 0;
+ do {
+ uint16x8_t ma[2];
+ uint32x4_t b[4];
+ uint8x16_t ma3x[3], ma5x[3];
+
+ s[0][2] = Load1QMsanU16(src0 + x + 16,
+ overread_in_bytes + sizeof(*src0) * (x + 16));
+ s[0][3] = Load1QMsanU16(src0 + x + 24,
+ overread_in_bytes + sizeof(*src0) * (x + 24));
+ s[1][2] = Load1QMsanU16(src1 + x + 16,
+ overread_in_bytes + sizeof(*src1) * (x + 16));
+ s[1][3] = Load1QMsanU16(src1 + x + 24,
+ overread_in_bytes + sizeof(*src1) * (x + 24));
+ BoxFilterPreProcess(s, x + 8, scales, sum3, sum5, square_sum3, square_sum5,
+ sum_width, sq, ma3, b3, ma5, b5);
+
+ Prepare3_8<0>(ma3[0], ma3x);
+ ma[0] = Sum343Lo(ma3x);
+ ma[1] = Sum343Hi(ma3x);
+ StoreAligned32U16(ma343[0] + x, ma);
+ Sum343(b3[0] + 0, b + 0);
+ Sum343(b3[0] + 2, b + 2);
+ StoreAligned64U32(b343[0] + x, b);
+ Sum565(b5 + 0, b + 0);
+ Sum565(b5 + 2, b + 2);
+ StoreAligned64U32(b565, b);
+ Prepare3_8<0>(ma3[1], ma3x);
+ Store343_444Lo(ma3x, b3[1], x, ma343[1], ma444, b343[1], b444);
+ Store343_444Hi(ma3x, b3[1] + 2, x + 8, ma343[1], ma444, b343[1], b444);
+ Prepare3_8<0>(ma5, ma5x);
+ ma[0] = Sum565Lo(ma5x);
+ ma[1] = Sum565Hi(ma5x);
+ StoreAligned32U16(ma565, ma);
+ s[0][0] = s[0][2];
+ s[0][1] = s[0][3];
+ s[1][0] = s[1][2];
+ s[1][1] = s[1][3];
+ sq[0][2] = sq[0][6];
+ sq[0][3] = sq[0][7];
+ sq[1][2] = sq[1][6];
+ sq[1][3] = sq[1][7];
+ ma3[0][0] = ma3[0][1];
+ ma3[1][0] = ma3[1][1];
+ ma5[0] = ma5[1];
+ b3[0][0] = b3[0][4];
+ b3[0][1] = b3[0][5];
+ b3[1][0] = b3[1][4];
+ b3[1][1] = b3[1][5];
+ b5[0] = b5[4];
+ b5[1] = b5[5];
+ ma565 += 16;
+ b565 += 16;
+ x += 16;
+ } while (x < width);
+}
+
+template <int shift>
+inline int16x4_t FilterOutput(const uint32x4_t ma_x_src, const uint32x4_t b) {
+ // ma: 255 * 32 = 8160 (13 bits)
+ // b: 65088 * 32 = 2082816 (21 bits)
+ // v: b - ma * 255 (22 bits)
+ const int32x4_t v = vreinterpretq_s32_u32(vsubq_u32(b, ma_x_src));
+ // kSgrProjSgrBits = 8
+ // kSgrProjRestoreBits = 4
+ // shift = 4 or 5
+ // v >> 8 or 9 (13 bits)
+ return vqrshrn_n_s32(v, kSgrProjSgrBits + shift - kSgrProjRestoreBits);
+}
+
+template <int shift>
+inline int16x8_t CalculateFilteredOutput(const uint16x8_t src,
+ const uint16x8_t ma,
+ const uint32x4_t b[2]) {
+ const uint32x4_t ma_x_src_lo = VmullLo16(ma, src);
+ const uint32x4_t ma_x_src_hi = VmullHi16(ma, src);
+ const int16x4_t dst_lo = FilterOutput<shift>(ma_x_src_lo, b[0]);
+ const int16x4_t dst_hi = FilterOutput<shift>(ma_x_src_hi, b[1]);
+ return vcombine_s16(dst_lo, dst_hi); // 13 bits
+}
+
+inline int16x8_t CalculateFilteredOutputPass1(const uint16x8_t src,
+ const uint16x8_t ma[2],
+ const uint32x4_t b[2][2]) {
+ const uint16x8_t ma_sum = vaddq_u16(ma[0], ma[1]);
+ uint32x4_t b_sum[2];
+ b_sum[0] = vaddq_u32(b[0][0], b[1][0]);
+ b_sum[1] = vaddq_u32(b[0][1], b[1][1]);
+ return CalculateFilteredOutput<5>(src, ma_sum, b_sum);
+}
+
+inline int16x8_t CalculateFilteredOutputPass2(const uint16x8_t src,
+ const uint16x8_t ma[3],
+ const uint32x4_t b[3][2]) {
+ const uint16x8_t ma_sum = Sum3_16(ma);
+ uint32x4_t b_sum[2];
+ Sum3_32(b, b_sum);
+ return CalculateFilteredOutput<5>(src, ma_sum, b_sum);
+}
+
+inline int16x8_t SelfGuidedFinal(const uint16x8_t src, const int32x4_t v[2]) {
+ const int16x4_t v_lo =
+ vqrshrn_n_s32(v[0], kSgrProjRestoreBits + kSgrProjPrecisionBits);
+ const int16x4_t v_hi =
+ vqrshrn_n_s32(v[1], kSgrProjRestoreBits + kSgrProjPrecisionBits);
+ const int16x8_t vv = vcombine_s16(v_lo, v_hi);
+ return vaddq_s16(vreinterpretq_s16_u16(src), vv);
+}
+
+inline int16x8_t SelfGuidedDoubleMultiplier(const uint16x8_t src,
+ const int16x8_t filter[2],
+ const int w0, const int w2) {
+ int32x4_t v[2];
+ v[0] = vmull_n_s16(vget_low_s16(filter[0]), w0);
+ v[1] = vmull_n_s16(vget_high_s16(filter[0]), w0);
+ v[0] = vmlal_n_s16(v[0], vget_low_s16(filter[1]), w2);
+ v[1] = vmlal_n_s16(v[1], vget_high_s16(filter[1]), w2);
+ return SelfGuidedFinal(src, v);
+}
+
+inline int16x8_t SelfGuidedSingleMultiplier(const uint16x8_t src,
+ const int16x8_t filter,
+ const int w0) {
+ // weight: -96 to 96 (Sgrproj_Xqd_Min/Max)
+ int32x4_t v[2];
+ v[0] = vmull_n_s16(vget_low_s16(filter), w0);
+ v[1] = vmull_n_s16(vget_high_s16(filter), w0);
+ return SelfGuidedFinal(src, v);
+}
+
+inline void ClipAndStore(uint16_t* const dst, const int16x8_t val) {
+ const uint16x8_t val0 = vreinterpretq_u16_s16(vmaxq_s16(val, vdupq_n_s16(0)));
+ const uint16x8_t val1 = vminq_u16(val0, vdupq_n_u16((1 << kBitdepth10) - 1));
+ vst1q_u16(dst, val1);
+}
+
+LIBGAV1_ALWAYS_INLINE void BoxFilterPass1(
+ const uint16_t* const src, const uint16_t* const src0,
+ const uint16_t* const src1, const ptrdiff_t stride, uint16_t* const sum5[5],
+ uint32_t* const square_sum5[5], const int width, const ptrdiff_t sum_width,
+ const uint32_t scale, const int16_t w0, uint16_t* const ma565[2],
+ uint32_t* const b565[2], uint16_t* const dst) {
+ const ptrdiff_t overread_in_bytes =
+ kOverreadInBytesPass1 - sizeof(*src0) * width;
+ uint16x8_t s[2][4];
+ uint8x16_t mas[2];
+ uint32x4_t sq[2][8], bs[6];
+
+ s[0][0] = Load1QMsanU16(src0 + 0, overread_in_bytes + 0);
+ s[0][1] = Load1QMsanU16(src0 + 8, overread_in_bytes + 16);
+ s[1][0] = Load1QMsanU16(src1 + 0, overread_in_bytes + 0);
+ s[1][1] = Load1QMsanU16(src1 + 8, overread_in_bytes + 16);
+
+ Square(s[0][0], sq[0]);
+ Square(s[1][0], sq[1]);
+ BoxFilterPreProcess5Lo(s, scale, sum5, square_sum5, sq, &mas[0], bs);
+
+ int x = 0;
+ do {
+ uint16x8_t ma[2];
+ uint32x4_t b[2][2];
+ uint8x16_t ma5[3];
+ int16x8_t p[2];
+
+ s[0][2] = Load1QMsanU16(src0 + x + 16,
+ overread_in_bytes + sizeof(*src0) * (x + 16));
+ s[0][3] = Load1QMsanU16(src0 + x + 24,
+ overread_in_bytes + sizeof(*src0) * (x + 24));
+ s[1][2] = Load1QMsanU16(src1 + x + 16,
+ overread_in_bytes + sizeof(*src1) * (x + 16));
+ s[1][3] = Load1QMsanU16(src1 + x + 24,
+ overread_in_bytes + sizeof(*src1) * (x + 24));
+ BoxFilterPreProcess5(s, sum_width, x + 8, scale, sum5, square_sum5, sq, mas,
+ bs);
+ Prepare3_8<0>(mas, ma5);
+ ma[1] = Sum565Lo(ma5);
+ vst1q_u16(ma565[1] + x, ma[1]);
+ Sum565(bs, b[1]);
+ StoreAligned32U32(b565[1] + x, b[1]);
+ const uint16x8_t sr0_lo = vld1q_u16(src + x + 0);
+ const uint16x8_t sr1_lo = vld1q_u16(src + stride + x + 0);
+ ma[0] = vld1q_u16(ma565[0] + x);
+ LoadAligned32U32(b565[0] + x, b[0]);
+ p[0] = CalculateFilteredOutputPass1(sr0_lo, ma, b);
+ p[1] = CalculateFilteredOutput<4>(sr1_lo, ma[1], b[1]);
+ const int16x8_t d00 = SelfGuidedSingleMultiplier(sr0_lo, p[0], w0);
+ const int16x8_t d10 = SelfGuidedSingleMultiplier(sr1_lo, p[1], w0);
+
+ ma[1] = Sum565Hi(ma5);
+ vst1q_u16(ma565[1] + x + 8, ma[1]);
+ Sum565(bs + 2, b[1]);
+ StoreAligned32U32(b565[1] + x + 8, b[1]);
+ const uint16x8_t sr0_hi = vld1q_u16(src + x + 8);
+ const uint16x8_t sr1_hi = vld1q_u16(src + stride + x + 8);
+ ma[0] = vld1q_u16(ma565[0] + x + 8);
+ LoadAligned32U32(b565[0] + x + 8, b[0]);
+ p[0] = CalculateFilteredOutputPass1(sr0_hi, ma, b);
+ p[1] = CalculateFilteredOutput<4>(sr1_hi, ma[1], b[1]);
+ const int16x8_t d01 = SelfGuidedSingleMultiplier(sr0_hi, p[0], w0);
+ ClipAndStore(dst + x + 0, d00);
+ ClipAndStore(dst + x + 8, d01);
+ const int16x8_t d11 = SelfGuidedSingleMultiplier(sr1_hi, p[1], w0);
+ ClipAndStore(dst + stride + x + 0, d10);
+ ClipAndStore(dst + stride + x + 8, d11);
+ s[0][0] = s[0][2];
+ s[0][1] = s[0][3];
+ s[1][0] = s[1][2];
+ s[1][1] = s[1][3];
+ sq[0][2] = sq[0][6];
+ sq[0][3] = sq[0][7];
+ sq[1][2] = sq[1][6];
+ sq[1][3] = sq[1][7];
+ mas[0] = mas[1];
+ bs[0] = bs[4];
+ bs[1] = bs[5];
+ x += 16;
+ } while (x < width);
+}
+
+inline void BoxFilterPass1LastRow(
+ const uint16_t* const src, const uint16_t* const src0, const int width,
+ const ptrdiff_t sum_width, const uint32_t scale, const int16_t w0,
+ uint16_t* const sum5[5], uint32_t* const square_sum5[5], uint16_t* ma565,
+ uint32_t* b565, uint16_t* const dst) {
+ const ptrdiff_t overread_in_bytes =
+ kOverreadInBytesPass1 - sizeof(*src0) * width;
+ uint16x8_t s[4];
+ uint8x16_t mas[2];
+ uint32x4_t sq[8], bs[6];
+
+ s[0] = Load1QMsanU16(src0 + 0, overread_in_bytes + 0);
+ s[1] = Load1QMsanU16(src0 + 8, overread_in_bytes + 16);
+ Square(s[0], sq);
+ BoxFilterPreProcess5LastRowLo(s, scale, sum5, square_sum5, sq, &mas[0], bs);
+
+ int x = 0;
+ do {
+ uint16x8_t ma[2];
+ uint32x4_t b[2][2];
+ uint8x16_t ma5[3];
+
+ s[2] = Load1QMsanU16(src0 + x + 16,
+ overread_in_bytes + sizeof(*src0) * (x + 16));
+ s[3] = Load1QMsanU16(src0 + x + 24,
+ overread_in_bytes + sizeof(*src0) * (x + 24));
+ BoxFilterPreProcess5LastRow(s, sum_width, x + 8, scale, sum5, square_sum5,
+ sq, mas, bs);
+ Prepare3_8<0>(mas, ma5);
+ ma[1] = Sum565Lo(ma5);
+ Sum565(bs, b[1]);
+ ma[0] = vld1q_u16(ma565);
+ LoadAligned32U32(b565, b[0]);
+ const uint16x8_t sr_lo = vld1q_u16(src + x + 0);
+ int16x8_t p = CalculateFilteredOutputPass1(sr_lo, ma, b);
+ const int16x8_t d0 = SelfGuidedSingleMultiplier(sr_lo, p, w0);
+
+ ma[1] = Sum565Hi(ma5);
+ Sum565(bs + 2, b[1]);
+ ma[0] = vld1q_u16(ma565 + 8);
+ LoadAligned32U32(b565 + 8, b[0]);
+ const uint16x8_t sr_hi = vld1q_u16(src + x + 8);
+ p = CalculateFilteredOutputPass1(sr_hi, ma, b);
+ const int16x8_t d1 = SelfGuidedSingleMultiplier(sr_hi, p, w0);
+ ClipAndStore(dst + x + 0, d0);
+ ClipAndStore(dst + x + 8, d1);
+ s[1] = s[3];
+ sq[2] = sq[6];
+ sq[3] = sq[7];
+ mas[0] = mas[1];
+ bs[0] = bs[4];
+ bs[1] = bs[5];
+ ma565 += 16;
+ b565 += 16;
+ x += 16;
+ } while (x < width);
+}
+
+LIBGAV1_ALWAYS_INLINE void BoxFilterPass2(
+ const uint16_t* const src, const uint16_t* const src0, const int width,
+ const ptrdiff_t sum_width, const uint32_t scale, const int16_t w0,
+ uint16_t* const sum3[3], uint32_t* const square_sum3[3],
+ uint16_t* const ma343[3], uint16_t* const ma444[2], uint32_t* const b343[3],
+ uint32_t* const b444[2], uint16_t* const dst) {
+ const ptrdiff_t overread_in_bytes =
+ kOverreadInBytesPass2 - sizeof(*src0) * width;
+ uint16x8_t s[4];
+ uint8x16_t mas[2];
+ uint32x4_t sq[8], bs[6];
+
+ s[0] = Load1QMsanU16(src0 + 0, overread_in_bytes + 0);
+ s[1] = Load1QMsanU16(src0 + 8, overread_in_bytes + 16);
+ Square(s[0], sq);
+ // Quiet "may be used uninitialized" warning.
+ mas[0] = mas[1] = vdupq_n_u8(0);
+ BoxFilterPreProcess3Lo(s, scale, sum3, square_sum3, sq, &mas[0], bs);
+
+ int x = 0;
+ do {
+ s[2] = Load1QMsanU16(src0 + x + 16,
+ overread_in_bytes + sizeof(*src0) * (x + 16));
+ s[3] = Load1QMsanU16(src0 + x + 24,
+ overread_in_bytes + sizeof(*src0) * (x + 24));
+ BoxFilterPreProcess3(s, x + 8, sum_width, scale, sum3, square_sum3, sq, mas,
+ bs);
+ uint16x8_t ma[3];
+ uint32x4_t b[3][2];
+ uint8x16_t ma3[3];
+
+ Prepare3_8<0>(mas, ma3);
+ Store343_444Lo(ma3, bs + 0, x, &ma[2], b[2], ma343[2], ma444[1], b343[2],
+ b444[1]);
+ const uint16x8_t sr_lo = vld1q_u16(src + x + 0);
+ ma[0] = vld1q_u16(ma343[0] + x);
+ ma[1] = vld1q_u16(ma444[0] + x);
+ LoadAligned32U32(b343[0] + x, b[0]);
+ LoadAligned32U32(b444[0] + x, b[1]);
+ const int16x8_t p0 = CalculateFilteredOutputPass2(sr_lo, ma, b);
+
+ Store343_444Hi(ma3, bs + 2, x + 8, &ma[2], b[2], ma343[2], ma444[1],
+ b343[2], b444[1]);
+ const uint16x8_t sr_hi = vld1q_u16(src + x + 8);
+ ma[0] = vld1q_u16(ma343[0] + x + 8);
+ ma[1] = vld1q_u16(ma444[0] + x + 8);
+ LoadAligned32U32(b343[0] + x + 8, b[0]);
+ LoadAligned32U32(b444[0] + x + 8, b[1]);
+ const int16x8_t p1 = CalculateFilteredOutputPass2(sr_hi, ma, b);
+ const int16x8_t d0 = SelfGuidedSingleMultiplier(sr_lo, p0, w0);
+ const int16x8_t d1 = SelfGuidedSingleMultiplier(sr_hi, p1, w0);
+ ClipAndStore(dst + x + 0, d0);
+ ClipAndStore(dst + x + 8, d1);
+ s[1] = s[3];
+ sq[2] = sq[6];
+ sq[3] = sq[7];
+ mas[0] = mas[1];
+ bs[0] = bs[4];
+ bs[1] = bs[5];
+ x += 16;
+ } while (x < width);
+}
+
+LIBGAV1_ALWAYS_INLINE void BoxFilter(
+ const uint16_t* const src, const uint16_t* const src0,
+ const uint16_t* const src1, const ptrdiff_t stride, const int width,
+ const uint16_t scales[2], const int16_t w0, const int16_t w2,
+ uint16_t* const sum3[4], uint16_t* const sum5[5],
+ uint32_t* const square_sum3[4], uint32_t* const square_sum5[5],
+ const ptrdiff_t sum_width, uint16_t* const ma343[4],
+ uint16_t* const ma444[3], uint16_t* const ma565[2], uint32_t* const b343[4],
+ uint32_t* const b444[3], uint32_t* const b565[2], uint16_t* const dst) {
+ const ptrdiff_t overread_in_bytes =
+ kOverreadInBytesPass1 - sizeof(*src0) * width;
+ uint16x8_t s[2][4];
+ uint8x16_t ma3[2][2], ma5[2];
+ uint32x4_t sq[2][8], b3[2][6], b5[6];
+
+ s[0][0] = Load1QMsanU16(src0 + 0, overread_in_bytes + 0);
+ s[0][1] = Load1QMsanU16(src0 + 8, overread_in_bytes + 16);
+ s[1][0] = Load1QMsanU16(src1 + 0, overread_in_bytes + 0);
+ s[1][1] = Load1QMsanU16(src1 + 8, overread_in_bytes + 16);
+ Square(s[0][0], sq[0]);
+ Square(s[1][0], sq[1]);
+ BoxFilterPreProcessLo(s, scales, sum3, sum5, square_sum3, square_sum5, sq,
+ ma3, b3, &ma5[0], b5);
+
+ int x = 0;
+ do {
+ uint16x8_t ma[3][3];
+ uint32x4_t b[3][3][2];
+ uint8x16_t ma3x[2][3], ma5x[3];
+ int16x8_t p[2][2];
+
+ s[0][2] = Load1QMsanU16(src0 + x + 16,
+ overread_in_bytes + sizeof(*src0) * (x + 16));
+ s[0][3] = Load1QMsanU16(src0 + x + 24,
+ overread_in_bytes + sizeof(*src0) * (x + 24));
+ s[1][2] = Load1QMsanU16(src1 + x + 16,
+ overread_in_bytes + sizeof(*src1) * (x + 16));
+ s[1][3] = Load1QMsanU16(src1 + x + 24,
+ overread_in_bytes + sizeof(*src1) * (x + 24));
+
+ BoxFilterPreProcess(s, x + 8, scales, sum3, sum5, square_sum3, square_sum5,
+ sum_width, sq, ma3, b3, ma5, b5);
+ Prepare3_8<0>(ma3[0], ma3x[0]);
+ Prepare3_8<0>(ma3[1], ma3x[1]);
+ Prepare3_8<0>(ma5, ma5x);
+ Store343_444Lo(ma3x[0], b3[0], x, &ma[1][2], &ma[2][1], b[1][2], b[2][1],
+ ma343[2], ma444[1], b343[2], b444[1]);
+ Store343_444Lo(ma3x[1], b3[1], x, &ma[2][2], b[2][2], ma343[3], ma444[2],
+ b343[3], b444[2]);
+ ma[0][1] = Sum565Lo(ma5x);
+ vst1q_u16(ma565[1] + x, ma[0][1]);
+ Sum565(b5, b[0][1]);
+ StoreAligned32U32(b565[1] + x, b[0][1]);
+ const uint16x8_t sr0_lo = vld1q_u16(src + x);
+ const uint16x8_t sr1_lo = vld1q_u16(src + stride + x);
+ ma[0][0] = vld1q_u16(ma565[0] + x);
+ LoadAligned32U32(b565[0] + x, b[0][0]);
+ p[0][0] = CalculateFilteredOutputPass1(sr0_lo, ma[0], b[0]);
+ p[1][0] = CalculateFilteredOutput<4>(sr1_lo, ma[0][1], b[0][1]);
+ ma[1][0] = vld1q_u16(ma343[0] + x);
+ ma[1][1] = vld1q_u16(ma444[0] + x);
+ LoadAligned32U32(b343[0] + x, b[1][0]);
+ LoadAligned32U32(b444[0] + x, b[1][1]);
+ p[0][1] = CalculateFilteredOutputPass2(sr0_lo, ma[1], b[1]);
+ const int16x8_t d00 = SelfGuidedDoubleMultiplier(sr0_lo, p[0], w0, w2);
+ ma[2][0] = vld1q_u16(ma343[1] + x);
+ LoadAligned32U32(b343[1] + x, b[2][0]);
+ p[1][1] = CalculateFilteredOutputPass2(sr1_lo, ma[2], b[2]);
+ const int16x8_t d10 = SelfGuidedDoubleMultiplier(sr1_lo, p[1], w0, w2);
+
+ Store343_444Hi(ma3x[0], b3[0] + 2, x + 8, &ma[1][2], &ma[2][1], b[1][2],
+ b[2][1], ma343[2], ma444[1], b343[2], b444[1]);
+ Store343_444Hi(ma3x[1], b3[1] + 2, x + 8, &ma[2][2], b[2][2], ma343[3],
+ ma444[2], b343[3], b444[2]);
+ ma[0][1] = Sum565Hi(ma5x);
+ vst1q_u16(ma565[1] + x + 8, ma[0][1]);
+ Sum565(b5 + 2, b[0][1]);
+ StoreAligned32U32(b565[1] + x + 8, b[0][1]);
+ const uint16x8_t sr0_hi = Load1QMsanU16(
+ src + x + 8, overread_in_bytes + 4 + sizeof(*src) * (x + 8));
+ const uint16x8_t sr1_hi = Load1QMsanU16(
+ src + stride + x + 8, overread_in_bytes + 4 + sizeof(*src) * (x + 8));
+ ma[0][0] = vld1q_u16(ma565[0] + x + 8);
+ LoadAligned32U32(b565[0] + x + 8, b[0][0]);
+ p[0][0] = CalculateFilteredOutputPass1(sr0_hi, ma[0], b[0]);
+ p[1][0] = CalculateFilteredOutput<4>(sr1_hi, ma[0][1], b[0][1]);
+ ma[1][0] = vld1q_u16(ma343[0] + x + 8);
+ ma[1][1] = vld1q_u16(ma444[0] + x + 8);
+ LoadAligned32U32(b343[0] + x + 8, b[1][0]);
+ LoadAligned32U32(b444[0] + x + 8, b[1][1]);
+ p[0][1] = CalculateFilteredOutputPass2(sr0_hi, ma[1], b[1]);
+ const int16x8_t d01 = SelfGuidedDoubleMultiplier(sr0_hi, p[0], w0, w2);
+ ClipAndStore(dst + x + 0, d00);
+ ClipAndStore(dst + x + 8, d01);
+ ma[2][0] = vld1q_u16(ma343[1] + x + 8);
+ LoadAligned32U32(b343[1] + x + 8, b[2][0]);
+ p[1][1] = CalculateFilteredOutputPass2(sr1_hi, ma[2], b[2]);
+ const int16x8_t d11 = SelfGuidedDoubleMultiplier(sr1_hi, p[1], w0, w2);
+ ClipAndStore(dst + stride + x + 0, d10);
+ ClipAndStore(dst + stride + x + 8, d11);
+ s[0][0] = s[0][2];
+ s[0][1] = s[0][3];
+ s[1][0] = s[1][2];
+ s[1][1] = s[1][3];
+ sq[0][2] = sq[0][6];
+ sq[0][3] = sq[0][7];
+ sq[1][2] = sq[1][6];
+ sq[1][3] = sq[1][7];
+ ma3[0][0] = ma3[0][1];
+ ma3[1][0] = ma3[1][1];
+ ma5[0] = ma5[1];
+ b3[0][0] = b3[0][4];
+ b3[0][1] = b3[0][5];
+ b3[1][0] = b3[1][4];
+ b3[1][1] = b3[1][5];
+ b5[0] = b5[4];
+ b5[1] = b5[5];
+ x += 16;
+ } while (x < width);
+}
+
+inline void BoxFilterLastRow(
+ const uint16_t* const src, const uint16_t* const src0, const int width,
+ const ptrdiff_t sum_width, const uint16_t scales[2], const int16_t w0,
+ const int16_t w2, uint16_t* const sum3[4], uint16_t* const sum5[5],
+ uint32_t* const square_sum3[4], uint32_t* const square_sum5[5],
+ uint16_t* const ma343, uint16_t* const ma444, uint16_t* const ma565,
+ uint32_t* const b343, uint32_t* const b444, uint32_t* const b565,
+ uint16_t* const dst) {
+ const ptrdiff_t overread_in_bytes =
+ kOverreadInBytesPass1 - sizeof(*src0) * width;
+ uint16x8_t s[4];
+ uint8x16_t ma3[2], ma5[2];
+ uint32x4_t sq[8], b3[6], b5[6];
+ uint16x8_t ma[3];
+ uint32x4_t b[3][2];
+
+ s[0] = Load1QMsanU16(src0 + 0, overread_in_bytes + 0);
+ s[1] = Load1QMsanU16(src0 + 8, overread_in_bytes + 16);
+ Square(s[0], sq);
+ // Quiet "may be used uninitialized" warning.
+ ma3[0] = ma3[1] = vdupq_n_u8(0);
+ BoxFilterPreProcessLastRowLo(s, scales, sum3, sum5, square_sum3, square_sum5,
+ sq, &ma3[0], &ma5[0], b3, b5);
+
+ int x = 0;
+ do {
+ uint8x16_t ma3x[3], ma5x[3];
+ int16x8_t p[2];
+
+ s[2] = Load1QMsanU16(src0 + x + 16,
+ overread_in_bytes + sizeof(*src0) * (x + 16));
+ s[3] = Load1QMsanU16(src0 + x + 24,
+ overread_in_bytes + sizeof(*src0) * (x + 24));
+ BoxFilterPreProcessLastRow(s, sum_width, x + 8, scales, sum3, sum5,
+ square_sum3, square_sum5, sq, ma3, ma5, b3, b5);
+ Prepare3_8<0>(ma3, ma3x);
+ Prepare3_8<0>(ma5, ma5x);
+ ma[1] = Sum565Lo(ma5x);
+ Sum565(b5, b[1]);
+ ma[2] = Sum343Lo(ma3x);
+ Sum343(b3, b[2]);
+ const uint16x8_t sr_lo = vld1q_u16(src + x + 0);
+ ma[0] = vld1q_u16(ma565 + x);
+ LoadAligned32U32(b565 + x, b[0]);
+ p[0] = CalculateFilteredOutputPass1(sr_lo, ma, b);
+ ma[0] = vld1q_u16(ma343 + x);
+ ma[1] = vld1q_u16(ma444 + x);
+ LoadAligned32U32(b343 + x, b[0]);
+ LoadAligned32U32(b444 + x, b[1]);
+ p[1] = CalculateFilteredOutputPass2(sr_lo, ma, b);
+ const int16x8_t d0 = SelfGuidedDoubleMultiplier(sr_lo, p, w0, w2);
+
+ ma[1] = Sum565Hi(ma5x);
+ Sum565(b5 + 2, b[1]);
+ ma[2] = Sum343Hi(ma3x);
+ Sum343(b3 + 2, b[2]);
+ const uint16x8_t sr_hi = Load1QMsanU16(
+ src + x + 8, overread_in_bytes + 4 + sizeof(*src) * (x + 8));
+ ma[0] = vld1q_u16(ma565 + x + 8);
+ LoadAligned32U32(b565 + x + 8, b[0]);
+ p[0] = CalculateFilteredOutputPass1(sr_hi, ma, b);
+ ma[0] = vld1q_u16(ma343 + x + 8);
+ ma[1] = vld1q_u16(ma444 + x + 8);
+ LoadAligned32U32(b343 + x + 8, b[0]);
+ LoadAligned32U32(b444 + x + 8, b[1]);
+ p[1] = CalculateFilteredOutputPass2(sr_hi, ma, b);
+ const int16x8_t d1 = SelfGuidedDoubleMultiplier(sr_hi, p, w0, w2);
+ ClipAndStore(dst + x + 0, d0);
+ ClipAndStore(dst + x + 8, d1);
+ s[1] = s[3];
+ sq[2] = sq[6];
+ sq[3] = sq[7];
+ ma3[0] = ma3[1];
+ ma5[0] = ma5[1];
+ b3[0] = b3[4];
+ b3[1] = b3[5];
+ b5[0] = b5[4];
+ b5[1] = b5[5];
+ x += 16;
+ } while (x < width);
+}
+
+LIBGAV1_ALWAYS_INLINE void BoxFilterProcess(
+ const RestorationUnitInfo& restoration_info, const uint16_t* src,
+ const ptrdiff_t stride, const uint16_t* const top_border,
+ const ptrdiff_t top_border_stride, const uint16_t* bottom_border,
+ const ptrdiff_t bottom_border_stride, const int width, const int height,
+ SgrBuffer* const sgr_buffer, uint16_t* dst) {
+ const auto temp_stride = Align<ptrdiff_t>(width, 16);
+ const auto sum_width = Align<ptrdiff_t>(width + 8, 16);
+ const auto sum_stride = temp_stride + 16;
+ const int sgr_proj_index = restoration_info.sgr_proj_info.index;
+ const uint16_t* const scales = kSgrScaleParameter[sgr_proj_index]; // < 2^12.
+ const int16_t w0 = restoration_info.sgr_proj_info.multiplier[0];
+ const int16_t w1 = restoration_info.sgr_proj_info.multiplier[1];
+ const int16_t w2 = (1 << kSgrProjPrecisionBits) - w0 - w1;
+ uint16_t *sum3[4], *sum5[5], *ma343[4], *ma444[3], *ma565[2];
+ uint32_t *square_sum3[4], *square_sum5[5], *b343[4], *b444[3], *b565[2];
+ sum3[0] = sgr_buffer->sum3;
+ square_sum3[0] = sgr_buffer->square_sum3;
+ ma343[0] = sgr_buffer->ma343;
+ b343[0] = sgr_buffer->b343;
+ for (int i = 1; i <= 3; ++i) {
+ sum3[i] = sum3[i - 1] + sum_stride;
+ square_sum3[i] = square_sum3[i - 1] + sum_stride;
+ ma343[i] = ma343[i - 1] + temp_stride;
+ b343[i] = b343[i - 1] + temp_stride;
+ }
+ sum5[0] = sgr_buffer->sum5;
+ square_sum5[0] = sgr_buffer->square_sum5;
+ for (int i = 1; i <= 4; ++i) {
+ sum5[i] = sum5[i - 1] + sum_stride;
+ square_sum5[i] = square_sum5[i - 1] + sum_stride;
+ }
+ ma444[0] = sgr_buffer->ma444;
+ b444[0] = sgr_buffer->b444;
+ for (int i = 1; i <= 2; ++i) {
+ ma444[i] = ma444[i - 1] + temp_stride;
+ b444[i] = b444[i - 1] + temp_stride;
+ }
+ ma565[0] = sgr_buffer->ma565;
+ ma565[1] = ma565[0] + temp_stride;
+ b565[0] = sgr_buffer->b565;
+ b565[1] = b565[0] + temp_stride;
+ assert(scales[0] != 0);
+ assert(scales[1] != 0);
+ BoxSum(top_border, top_border_stride, width, sum_stride, sum_width, sum3[0],
+ sum5[1], square_sum3[0], square_sum5[1]);
+ sum5[0] = sum5[1];
+ square_sum5[0] = square_sum5[1];
+ const uint16_t* const s = (height > 1) ? src + stride : bottom_border;
+ BoxSumFilterPreProcess(src, s, width, scales, sum3, sum5, square_sum3,
+ square_sum5, sum_width, ma343, ma444[0], ma565[0],
+ b343, b444[0], b565[0]);
+ sum5[0] = sgr_buffer->sum5;
+ square_sum5[0] = sgr_buffer->square_sum5;
+
+ for (int y = (height >> 1) - 1; y > 0; --y) {
+ Circulate4PointersBy2<uint16_t>(sum3);
+ Circulate4PointersBy2<uint32_t>(square_sum3);
+ Circulate5PointersBy2<uint16_t>(sum5);
+ Circulate5PointersBy2<uint32_t>(square_sum5);
+ BoxFilter(src + 3, src + 2 * stride, src + 3 * stride, stride, width,
+ scales, w0, w2, sum3, sum5, square_sum3, square_sum5, sum_width,
+ ma343, ma444, ma565, b343, b444, b565, dst);
+ src += 2 * stride;
+ dst += 2 * stride;
+ Circulate4PointersBy2<uint16_t>(ma343);
+ Circulate4PointersBy2<uint32_t>(b343);
+ std::swap(ma444[0], ma444[2]);
+ std::swap(b444[0], b444[2]);
+ std::swap(ma565[0], ma565[1]);
+ std::swap(b565[0], b565[1]);
+ }
+
+ Circulate4PointersBy2<uint16_t>(sum3);
+ Circulate4PointersBy2<uint32_t>(square_sum3);
+ Circulate5PointersBy2<uint16_t>(sum5);
+ Circulate5PointersBy2<uint32_t>(square_sum5);
+ if ((height & 1) == 0 || height > 1) {
+ const uint16_t* sr[2];
+ if ((height & 1) == 0) {
+ sr[0] = bottom_border;
+ sr[1] = bottom_border + bottom_border_stride;
+ } else {
+ sr[0] = src + 2 * stride;
+ sr[1] = bottom_border;
+ }
+ BoxFilter(src + 3, sr[0], sr[1], stride, width, scales, w0, w2, sum3, sum5,
+ square_sum3, square_sum5, sum_width, ma343, ma444, ma565, b343,
+ b444, b565, dst);
+ }
+ if ((height & 1) != 0) {
+ if (height > 1) {
+ src += 2 * stride;
+ dst += 2 * stride;
+ Circulate4PointersBy2<uint16_t>(sum3);
+ Circulate4PointersBy2<uint32_t>(square_sum3);
+ Circulate5PointersBy2<uint16_t>(sum5);
+ Circulate5PointersBy2<uint32_t>(square_sum5);
+ Circulate4PointersBy2<uint16_t>(ma343);
+ Circulate4PointersBy2<uint32_t>(b343);
+ std::swap(ma444[0], ma444[2]);
+ std::swap(b444[0], b444[2]);
+ std::swap(ma565[0], ma565[1]);
+ std::swap(b565[0], b565[1]);
+ }
+ BoxFilterLastRow(src + 3, bottom_border + bottom_border_stride, width,
+ sum_width, scales, w0, w2, sum3, sum5, square_sum3,
+ square_sum5, ma343[0], ma444[0], ma565[0], b343[0],
+ b444[0], b565[0], dst);
+ }
+}
+
+inline void BoxFilterProcessPass1(const RestorationUnitInfo& restoration_info,
+ const uint16_t* src, const ptrdiff_t stride,
+ const uint16_t* const top_border,
+ const ptrdiff_t top_border_stride,
+ const uint16_t* bottom_border,
+ const ptrdiff_t bottom_border_stride,
+ const int width, const int height,
+ SgrBuffer* const sgr_buffer, uint16_t* dst) {
+ const auto temp_stride = Align<ptrdiff_t>(width, 16);
+ const auto sum_width = Align<ptrdiff_t>(width + 8, 16);
+ const auto sum_stride = temp_stride + 16;
+ const int sgr_proj_index = restoration_info.sgr_proj_info.index;
+ const uint32_t scale = kSgrScaleParameter[sgr_proj_index][0]; // < 2^12.
+ const int16_t w0 = restoration_info.sgr_proj_info.multiplier[0];
+ uint16_t *sum5[5], *ma565[2];
+ uint32_t *square_sum5[5], *b565[2];
+ sum5[0] = sgr_buffer->sum5;
+ square_sum5[0] = sgr_buffer->square_sum5;
+ for (int i = 1; i <= 4; ++i) {
+ sum5[i] = sum5[i - 1] + sum_stride;
+ square_sum5[i] = square_sum5[i - 1] + sum_stride;
+ }
+ ma565[0] = sgr_buffer->ma565;
+ ma565[1] = ma565[0] + temp_stride;
+ b565[0] = sgr_buffer->b565;
+ b565[1] = b565[0] + temp_stride;
+ assert(scale != 0);
+
+ BoxSum<5>(top_border, top_border_stride, width, sum_stride, sum_width,
+ sum5[1], square_sum5[1]);
+ sum5[0] = sum5[1];
+ square_sum5[0] = square_sum5[1];
+ const uint16_t* const s = (height > 1) ? src + stride : bottom_border;
+ BoxSumFilterPreProcess5(src, s, width, scale, sum5, square_sum5, sum_width,
+ ma565[0], b565[0]);
+ sum5[0] = sgr_buffer->sum5;
+ square_sum5[0] = sgr_buffer->square_sum5;
+
+ for (int y = (height >> 1) - 1; y > 0; --y) {
+ Circulate5PointersBy2<uint16_t>(sum5);
+ Circulate5PointersBy2<uint32_t>(square_sum5);
+ BoxFilterPass1(src + 3, src + 2 * stride, src + 3 * stride, stride, sum5,
+ square_sum5, width, sum_width, scale, w0, ma565, b565, dst);
+ src += 2 * stride;
+ dst += 2 * stride;
+ std::swap(ma565[0], ma565[1]);
+ std::swap(b565[0], b565[1]);
+ }
+
+ Circulate5PointersBy2<uint16_t>(sum5);
+ Circulate5PointersBy2<uint32_t>(square_sum5);
+ if ((height & 1) == 0 || height > 1) {
+ const uint16_t* sr[2];
+ if ((height & 1) == 0) {
+ sr[0] = bottom_border;
+ sr[1] = bottom_border + bottom_border_stride;
+ } else {
+ sr[0] = src + 2 * stride;
+ sr[1] = bottom_border;
+ }
+ BoxFilterPass1(src + 3, sr[0], sr[1], stride, sum5, square_sum5, width,
+ sum_width, scale, w0, ma565, b565, dst);
+ }
+ if ((height & 1) != 0) {
+ src += 3;
+ if (height > 1) {
+ src += 2 * stride;
+ dst += 2 * stride;
+ std::swap(ma565[0], ma565[1]);
+ std::swap(b565[0], b565[1]);
+ Circulate5PointersBy2<uint16_t>(sum5);
+ Circulate5PointersBy2<uint32_t>(square_sum5);
+ }
+ BoxFilterPass1LastRow(src, bottom_border + bottom_border_stride, width,
+ sum_width, scale, w0, sum5, square_sum5, ma565[0],
+ b565[0], dst);
+ }
+}
+
+inline void BoxFilterProcessPass2(const RestorationUnitInfo& restoration_info,
+ const uint16_t* src, const ptrdiff_t stride,
+ const uint16_t* const top_border,
+ const ptrdiff_t top_border_stride,
+ const uint16_t* bottom_border,
+ const ptrdiff_t bottom_border_stride,
+ const int width, const int height,
+ SgrBuffer* const sgr_buffer, uint16_t* dst) {
+ assert(restoration_info.sgr_proj_info.multiplier[0] == 0);
+ const auto temp_stride = Align<ptrdiff_t>(width, 16);
+ const auto sum_width = Align<ptrdiff_t>(width + 8, 16);
+ const auto sum_stride = temp_stride + 16;
+ const int16_t w1 = restoration_info.sgr_proj_info.multiplier[1];
+ const int16_t w0 = (1 << kSgrProjPrecisionBits) - w1;
+ const int sgr_proj_index = restoration_info.sgr_proj_info.index;
+ const uint32_t scale = kSgrScaleParameter[sgr_proj_index][1]; // < 2^12.
+ uint16_t *sum3[3], *ma343[3], *ma444[2];
+ uint32_t *square_sum3[3], *b343[3], *b444[2];
+ sum3[0] = sgr_buffer->sum3;
+ square_sum3[0] = sgr_buffer->square_sum3;
+ ma343[0] = sgr_buffer->ma343;
+ b343[0] = sgr_buffer->b343;
+ for (int i = 1; i <= 2; ++i) {
+ sum3[i] = sum3[i - 1] + sum_stride;
+ square_sum3[i] = square_sum3[i - 1] + sum_stride;
+ ma343[i] = ma343[i - 1] + temp_stride;
+ b343[i] = b343[i - 1] + temp_stride;
+ }
+ ma444[0] = sgr_buffer->ma444;
+ ma444[1] = ma444[0] + temp_stride;
+ b444[0] = sgr_buffer->b444;
+ b444[1] = b444[0] + temp_stride;
+ assert(scale != 0);
+ BoxSum<3>(top_border, top_border_stride, width, sum_stride, sum_width,
+ sum3[0], square_sum3[0]);
+ BoxSumFilterPreProcess3<false>(src, width, scale, sum3, square_sum3,
+ sum_width, ma343[0], nullptr, b343[0],
+ nullptr);
+ Circulate3PointersBy1<uint16_t>(sum3);
+ Circulate3PointersBy1<uint32_t>(square_sum3);
+ const uint16_t* s;
+ if (height > 1) {
+ s = src + stride;
+ } else {
+ s = bottom_border;
+ bottom_border += bottom_border_stride;
+ }
+ BoxSumFilterPreProcess3<true>(s, width, scale, sum3, square_sum3, sum_width,
+ ma343[1], ma444[0], b343[1], b444[0]);
+
+ for (int y = height - 2; y > 0; --y) {
+ Circulate3PointersBy1<uint16_t>(sum3);
+ Circulate3PointersBy1<uint32_t>(square_sum3);
+ BoxFilterPass2(src + 2, src + 2 * stride, width, sum_width, scale, w0, sum3,
+ square_sum3, ma343, ma444, b343, b444, dst);
+ src += stride;
+ dst += stride;
+ Circulate3PointersBy1<uint16_t>(ma343);
+ Circulate3PointersBy1<uint32_t>(b343);
+ std::swap(ma444[0], ma444[1]);
+ std::swap(b444[0], b444[1]);
+ }
+
+ int y = std::min(height, 2);
+ src += 2;
+ do {
+ Circulate3PointersBy1<uint16_t>(sum3);
+ Circulate3PointersBy1<uint32_t>(square_sum3);
+ BoxFilterPass2(src, bottom_border, width, sum_width, scale, w0, sum3,
+ square_sum3, ma343, ma444, b343, b444, dst);
+ src += stride;
+ dst += stride;
+ bottom_border += bottom_border_stride;
+ Circulate3PointersBy1<uint16_t>(ma343);
+ Circulate3PointersBy1<uint32_t>(b343);
+ std::swap(ma444[0], ma444[1]);
+ std::swap(b444[0], b444[1]);
+ } while (--y != 0);
+}
+
+// If |width| is non-multiple of 8, up to 7 more pixels are written to |dest| in
+// the end of each row. It is safe to overwrite the output as it will not be
+// part of the visible frame.
+void SelfGuidedFilter_NEON(
+ const RestorationUnitInfo& LIBGAV1_RESTRICT restoration_info,
+ const void* LIBGAV1_RESTRICT const source, const ptrdiff_t stride,
+ const void* LIBGAV1_RESTRICT const top_border,
+ const ptrdiff_t top_border_stride,
+ const void* LIBGAV1_RESTRICT const bottom_border,
+ const ptrdiff_t bottom_border_stride, const int width, const int height,
+ RestorationBuffer* LIBGAV1_RESTRICT const restoration_buffer,
+ void* LIBGAV1_RESTRICT const dest) {
+ const int index = restoration_info.sgr_proj_info.index;
+ const int radius_pass_0 = kSgrProjParams[index][0]; // 2 or 0
+ const int radius_pass_1 = kSgrProjParams[index][2]; // 1 or 0
+ const auto* const src = static_cast<const uint16_t*>(source);
+ const auto* top = static_cast<const uint16_t*>(top_border);
+ const auto* bottom = static_cast<const uint16_t*>(bottom_border);
+ auto* const dst = static_cast<uint16_t*>(dest);
+ SgrBuffer* const sgr_buffer = &restoration_buffer->sgr_buffer;
+ if (radius_pass_1 == 0) {
+ // |radius_pass_0| and |radius_pass_1| cannot both be 0, so we have the
+ // following assertion.
+ assert(radius_pass_0 != 0);
+ BoxFilterProcessPass1(restoration_info, src - 3, stride, top - 3,
+ top_border_stride, bottom - 3, bottom_border_stride,
+ width, height, sgr_buffer, dst);
+ } else if (radius_pass_0 == 0) {
+ BoxFilterProcessPass2(restoration_info, src - 2, stride, top - 2,
+ top_border_stride, bottom - 2, bottom_border_stride,
+ width, height, sgr_buffer, dst);
+ } else {
+ BoxFilterProcess(restoration_info, src - 3, stride, top - 3,
+ top_border_stride, bottom - 3, bottom_border_stride, width,
+ height, sgr_buffer, dst);
+ }
+}
+
+void Init10bpp() {
+ Dsp* const dsp = dsp_internal::GetWritableDspTable(kBitdepth10);
+ assert(dsp != nullptr);
+ dsp->loop_restorations[0] = WienerFilter_NEON;
+ dsp->loop_restorations[1] = SelfGuidedFilter_NEON;
+}
+
+} // namespace
+
+void LoopRestorationInit10bpp_NEON() { Init10bpp(); }
+
+} // namespace dsp
+} // namespace libgav1
+
+#else // !(LIBGAV1_ENABLE_NEON && LIBGAV1_MAX_BITDEPTH >= 10)
+namespace libgav1 {
+namespace dsp {
+
+void LoopRestorationInit10bpp_NEON() {}
+
+} // namespace dsp
+} // namespace libgav1
+#endif // LIBGAV1_ENABLE_NEON && LIBGAV1_MAX_BITDEPTH >= 10
diff --git a/src/dsp/arm/loop_restoration_neon.cc b/src/dsp/arm/loop_restoration_neon.cc
index e6ceb66..2db137f 100644
--- a/src/dsp/arm/loop_restoration_neon.cc
+++ b/src/dsp/arm/loop_restoration_neon.cc
@@ -28,6 +28,7 @@
#include "src/dsp/constants.h"
#include "src/dsp/dsp.h"
#include "src/utils/common.h"
+#include "src/utils/compiler_attributes.h"
#include "src/utils/constants.h"
namespace libgav1 {
@@ -491,11 +492,14 @@ inline void WienerVerticalTap1(const int16_t* wiener_buffer,
// filter row by row. This is faster than doing it column by column when
// considering cache issues.
void WienerFilter_NEON(
- const RestorationUnitInfo& restoration_info, const void* const source,
- const ptrdiff_t stride, const void* const top_border,
- const ptrdiff_t top_border_stride, const void* const bottom_border,
+ const RestorationUnitInfo& LIBGAV1_RESTRICT restoration_info,
+ const void* LIBGAV1_RESTRICT const source, const ptrdiff_t stride,
+ const void* LIBGAV1_RESTRICT const top_border,
+ const ptrdiff_t top_border_stride,
+ const void* LIBGAV1_RESTRICT const bottom_border,
const ptrdiff_t bottom_border_stride, const int width, const int height,
- RestorationBuffer* const restoration_buffer, void* const dest) {
+ RestorationBuffer* LIBGAV1_RESTRICT const restoration_buffer,
+ void* LIBGAV1_RESTRICT const dest) {
const int16_t* const number_leading_zero_coefficients =
restoration_info.wiener_info.number_leading_zero_coefficients;
const int number_rows_to_skip = std::max(
@@ -591,6 +595,74 @@ void WienerFilter_NEON(
//------------------------------------------------------------------------------
// SGR
+// SIMD overreads 8 - (width % 8) - 2 * padding pixels, where padding is 3 for
+// Pass 1 and 2 for Pass 2.
+constexpr int kOverreadInBytesPass1 = 2;
+constexpr int kOverreadInBytesPass2 = 4;
+
+// SIMD overreads 16 - (width % 16) - 2 * padding pixels, where padding is 3 for
+// Pass 1 and 2 for Pass 2.
+constexpr int kWideOverreadInBytesPass1 = 10;
+constexpr int kWideOverreadInBytesPass2 = 12;
+
+inline void LoadAligned16x2U16(const uint16_t* const src[2], const ptrdiff_t x,
+ uint16x8_t dst[2]) {
+ dst[0] = vld1q_u16(src[0] + x);
+ dst[1] = vld1q_u16(src[1] + x);
+}
+
+inline void LoadAligned16x3U16(const uint16_t* const src[3], const ptrdiff_t x,
+ uint16x8_t dst[3]) {
+ dst[0] = vld1q_u16(src[0] + x);
+ dst[1] = vld1q_u16(src[1] + x);
+ dst[2] = vld1q_u16(src[2] + x);
+}
+
+inline void LoadAligned32U32(const uint32_t* const src, uint32x4x2_t* dst) {
+ (*dst).val[0] = vld1q_u32(src + 0);
+ (*dst).val[1] = vld1q_u32(src + 4);
+}
+
+inline void LoadAligned32x2U32(const uint32_t* const src[2], const ptrdiff_t x,
+ uint32x4x2_t dst[2]) {
+ LoadAligned32U32(src[0] + x, &dst[0]);
+ LoadAligned32U32(src[1] + x, &dst[1]);
+}
+
+inline void LoadAligned32x3U32(const uint32_t* const src[3], const ptrdiff_t x,
+ uint32x4x2_t dst[3]) {
+ LoadAligned32U32(src[0] + x, &dst[0]);
+ LoadAligned32U32(src[1] + x, &dst[1]);
+ LoadAligned32U32(src[2] + x, &dst[2]);
+}
+
+inline void StoreAligned32U16(uint16_t* const dst, const uint16x8_t src[2]) {
+ vst1q_u16(dst + 0, src[0]);
+ vst1q_u16(dst + 8, src[1]);
+}
+
+inline void StoreAligned32U32(uint32_t* const dst, const uint32x4x2_t src) {
+ vst1q_u32(dst + 0, src.val[0]);
+ vst1q_u32(dst + 4, src.val[1]);
+}
+
+inline void StoreAligned64U32(uint32_t* const dst, const uint32x4x2_t src[2]) {
+ vst1q_u32(dst + 0, src[0].val[0]);
+ vst1q_u32(dst + 4, src[0].val[1]);
+ vst1q_u32(dst + 8, src[1].val[0]);
+ vst1q_u32(dst + 12, src[1].val[1]);
+}
+
+inline uint16x8_t SquareLo8(const uint8x8_t src) { return vmull_u8(src, src); }
+
+inline uint16x8_t SquareLo8(const uint8x16_t src) {
+ return vmull_u8(vget_low_u8(src), vget_low_u8(src));
+}
+
+inline uint16x8_t SquareHi8(const uint8x16_t src) {
+ return vmull_u8(vget_high_u8(src), vget_high_u8(src));
+}
+
inline void Prepare3_8(const uint8x8_t src[2], uint8x8_t dst[3]) {
dst[0] = VshrU128<0>(src);
dst[1] = VshrU128<1>(src);
@@ -904,58 +976,69 @@ inline uint32x4x2_t Sum565W(const uint16x8_t src[2]) {
}
inline void BoxSum(const uint8_t* src, const ptrdiff_t src_stride,
- const ptrdiff_t sum_stride, uint16_t* sum3, uint16_t* sum5,
+ const ptrdiff_t width, const ptrdiff_t sum_stride,
+ const ptrdiff_t sum_width, uint16_t* sum3, uint16_t* sum5,
uint32_t* square_sum3, uint32_t* square_sum5) {
+ const ptrdiff_t overread_in_bytes = kOverreadInBytesPass1 - width;
int y = 2;
// Don't change loop width to 16, which is even slower.
do {
uint8x8_t s[2];
uint16x8_t sq[2];
- s[0] = vld1_u8(src);
- sq[0] = vmull_u8(s[0], s[0]);
- ptrdiff_t x = 0;
+ s[0] = Load1MsanU8(src, overread_in_bytes);
+ sq[0] = SquareLo8(s[0]);
+ ptrdiff_t x = sum_width;
do {
uint16x8_t row3, row5;
uint32x4x2_t row_sq3, row_sq5;
- s[1] = vld1_u8(src + x + 8);
- sq[1] = vmull_u8(s[1], s[1]);
+ x -= 8;
+ src += 8;
+ s[1] = Load1MsanU8(src, sum_width - x + overread_in_bytes);
+ sq[1] = SquareLo8(s[1]);
SumHorizontal(s, sq, &row3, &row5, &row_sq3, &row_sq5);
vst1q_u16(sum3, row3);
vst1q_u16(sum5, row5);
- vst1q_u32(square_sum3 + 0, row_sq3.val[0]);
- vst1q_u32(square_sum3 + 4, row_sq3.val[1]);
- vst1q_u32(square_sum5 + 0, row_sq5.val[0]);
- vst1q_u32(square_sum5 + 4, row_sq5.val[1]);
+ StoreAligned32U32(square_sum3 + 0, row_sq3);
+ StoreAligned32U32(square_sum5 + 0, row_sq5);
s[0] = s[1];
sq[0] = sq[1];
sum3 += 8;
sum5 += 8;
square_sum3 += 8;
square_sum5 += 8;
- x += 8;
- } while (x < sum_stride);
- src += src_stride;
+ } while (x != 0);
+ src += src_stride - sum_width;
+ sum3 += sum_stride - sum_width;
+ sum5 += sum_stride - sum_width;
+ square_sum3 += sum_stride - sum_width;
+ square_sum5 += sum_stride - sum_width;
} while (--y != 0);
}
template <int size>
inline void BoxSum(const uint8_t* src, const ptrdiff_t src_stride,
- const ptrdiff_t sum_stride, uint16_t* sums,
+ const ptrdiff_t width, const ptrdiff_t sum_stride,
+ const ptrdiff_t sum_width, uint16_t* sums,
uint32_t* square_sums) {
static_assert(size == 3 || size == 5, "");
+ const ptrdiff_t overread_in_bytes =
+ ((size == 5) ? kOverreadInBytesPass1 : kOverreadInBytesPass2) -
+ sizeof(*src) * width;
int y = 2;
// Don't change loop width to 16, which is even slower.
do {
uint8x8_t s[2];
uint16x8_t sq[2];
- s[0] = vld1_u8(src);
- sq[0] = vmull_u8(s[0], s[0]);
- ptrdiff_t x = 0;
+ s[0] = Load1MsanU8(src, overread_in_bytes);
+ sq[0] = SquareLo8(s[0]);
+ ptrdiff_t x = sum_width;
do {
uint16x8_t row;
uint32x4x2_t row_sq;
- s[1] = vld1_u8(src + x + 8);
- sq[1] = vmull_u8(s[1], s[1]);
+ x -= 8;
+ src += 8;
+ s[1] = Load1MsanU8(src, sum_width - x + overread_in_bytes);
+ sq[1] = SquareLo8(s[1]);
if (size == 3) {
row = Sum3Horizontal(s);
row_sq = Sum3WHorizontal(sq);
@@ -964,15 +1047,15 @@ inline void BoxSum(const uint8_t* src, const ptrdiff_t src_stride,
row_sq = Sum5WHorizontal(sq);
}
vst1q_u16(sums, row);
- vst1q_u32(square_sums + 0, row_sq.val[0]);
- vst1q_u32(square_sums + 4, row_sq.val[1]);
+ StoreAligned32U32(square_sums, row_sq);
s[0] = s[1];
sq[0] = sq[1];
sums += 8;
square_sums += 8;
- x += 8;
- } while (x < sum_stride);
- src += src_stride;
+ } while (x != 0);
+ src += src_stride - sum_width;
+ sums += sum_stride - sum_width;
+ square_sums += sum_stride - sum_width;
} while (--y != 0);
}
@@ -1143,339 +1226,216 @@ inline void Store343_444(const uint8x16_t ma3[3], const uint16x8_t b3[2],
}
LIBGAV1_ALWAYS_INLINE void BoxFilterPreProcess5Lo(
- const uint8_t* const src0, const uint8_t* const src1, const uint32_t scale,
- uint8x16_t s[2][2], uint16_t* const sum5[5], uint32_t* const square_sum5[5],
- uint16x8_t sq[2][4], uint8x16_t* const ma, uint16x8_t* const b) {
+ uint8x16_t s[2][2], const uint32_t scale, uint16_t* const sum5[5],
+ uint32_t* const square_sum5[5], uint16x8_t sq[2][4], uint8x16_t* const ma,
+ uint16x8_t* const b) {
uint16x8_t s5[5];
uint32x4x2_t sq5[5];
- s[0][0] = vld1q_u8(src0);
- s[1][0] = vld1q_u8(src1);
- sq[0][0] = vmull_u8(vget_low_u8(s[0][0]), vget_low_u8(s[0][0]));
- sq[1][0] = vmull_u8(vget_low_u8(s[1][0]), vget_low_u8(s[1][0]));
- sq[0][1] = vmull_u8(vget_high_u8(s[0][0]), vget_high_u8(s[0][0]));
- sq[1][1] = vmull_u8(vget_high_u8(s[1][0]), vget_high_u8(s[1][0]));
+ sq[0][0] = SquareLo8(s[0][0]);
+ sq[1][0] = SquareLo8(s[1][0]);
+ sq[0][1] = SquareHi8(s[0][0]);
+ sq[1][1] = SquareHi8(s[1][0]);
s5[3] = Sum5Horizontal(s[0][0]);
s5[4] = Sum5Horizontal(s[1][0]);
sq5[3] = Sum5WHorizontal(sq[0]);
sq5[4] = Sum5WHorizontal(sq[1]);
vst1q_u16(sum5[3], s5[3]);
vst1q_u16(sum5[4], s5[4]);
- vst1q_u32(square_sum5[3] + 0, sq5[3].val[0]);
- vst1q_u32(square_sum5[3] + 4, sq5[3].val[1]);
- vst1q_u32(square_sum5[4] + 0, sq5[4].val[0]);
- vst1q_u32(square_sum5[4] + 4, sq5[4].val[1]);
- s5[0] = vld1q_u16(sum5[0]);
- s5[1] = vld1q_u16(sum5[1]);
- s5[2] = vld1q_u16(sum5[2]);
- sq5[0].val[0] = vld1q_u32(square_sum5[0] + 0);
- sq5[0].val[1] = vld1q_u32(square_sum5[0] + 4);
- sq5[1].val[0] = vld1q_u32(square_sum5[1] + 0);
- sq5[1].val[1] = vld1q_u32(square_sum5[1] + 4);
- sq5[2].val[0] = vld1q_u32(square_sum5[2] + 0);
- sq5[2].val[1] = vld1q_u32(square_sum5[2] + 4);
+ StoreAligned32U32(square_sum5[3], sq5[3]);
+ StoreAligned32U32(square_sum5[4], sq5[4]);
+ LoadAligned16x3U16(sum5, 0, s5);
+ LoadAligned32x3U32(square_sum5, 0, sq5);
CalculateIntermediate5<0>(s5, sq5, scale, ma, b);
}
LIBGAV1_ALWAYS_INLINE void BoxFilterPreProcess5(
- const uint8_t* const src0, const uint8_t* const src1, const ptrdiff_t x,
- const uint32_t scale, uint8x16_t s[2][2], uint16_t* const sum5[5],
- uint32_t* const square_sum5[5], uint16x8_t sq[2][4], uint8x16_t ma[2],
- uint16x8_t b[2]) {
+ uint8x16_t s[2][2], const ptrdiff_t x, const uint32_t scale,
+ uint16_t* const sum5[5], uint32_t* const square_sum5[5],
+ uint16x8_t sq[2][4], uint8x16_t ma[2], uint16x8_t b[2]) {
uint16x8_t s5[2][5];
uint32x4x2_t sq5[5];
- s[0][1] = vld1q_u8(src0 + x + 8);
- s[1][1] = vld1q_u8(src1 + x + 8);
- sq[0][2] = vmull_u8(vget_low_u8(s[0][1]), vget_low_u8(s[0][1]));
- sq[1][2] = vmull_u8(vget_low_u8(s[1][1]), vget_low_u8(s[1][1]));
+ sq[0][2] = SquareLo8(s[0][1]);
+ sq[1][2] = SquareLo8(s[1][1]);
Sum5Horizontal<8>(s[0], &s5[0][3], &s5[1][3]);
Sum5Horizontal<8>(s[1], &s5[0][4], &s5[1][4]);
sq5[3] = Sum5WHorizontal(sq[0] + 1);
sq5[4] = Sum5WHorizontal(sq[1] + 1);
vst1q_u16(sum5[3] + x, s5[0][3]);
vst1q_u16(sum5[4] + x, s5[0][4]);
- vst1q_u32(square_sum5[3] + x + 0, sq5[3].val[0]);
- vst1q_u32(square_sum5[3] + x + 4, sq5[3].val[1]);
- vst1q_u32(square_sum5[4] + x + 0, sq5[4].val[0]);
- vst1q_u32(square_sum5[4] + x + 4, sq5[4].val[1]);
- s5[0][0] = vld1q_u16(sum5[0] + x);
- s5[0][1] = vld1q_u16(sum5[1] + x);
- s5[0][2] = vld1q_u16(sum5[2] + x);
- sq5[0].val[0] = vld1q_u32(square_sum5[0] + x + 0);
- sq5[0].val[1] = vld1q_u32(square_sum5[0] + x + 4);
- sq5[1].val[0] = vld1q_u32(square_sum5[1] + x + 0);
- sq5[1].val[1] = vld1q_u32(square_sum5[1] + x + 4);
- sq5[2].val[0] = vld1q_u32(square_sum5[2] + x + 0);
- sq5[2].val[1] = vld1q_u32(square_sum5[2] + x + 4);
+ StoreAligned32U32(square_sum5[3] + x, sq5[3]);
+ StoreAligned32U32(square_sum5[4] + x, sq5[4]);
+ LoadAligned16x3U16(sum5, x, s5[0]);
+ LoadAligned32x3U32(square_sum5, x, sq5);
CalculateIntermediate5<8>(s5[0], sq5, scale, &ma[0], &b[0]);
- sq[0][3] = vmull_u8(vget_high_u8(s[0][1]), vget_high_u8(s[0][1]));
- sq[1][3] = vmull_u8(vget_high_u8(s[1][1]), vget_high_u8(s[1][1]));
+ sq[0][3] = SquareHi8(s[0][1]);
+ sq[1][3] = SquareHi8(s[1][1]);
sq5[3] = Sum5WHorizontal(sq[0] + 2);
sq5[4] = Sum5WHorizontal(sq[1] + 2);
vst1q_u16(sum5[3] + x + 8, s5[1][3]);
vst1q_u16(sum5[4] + x + 8, s5[1][4]);
- vst1q_u32(square_sum5[3] + x + 8, sq5[3].val[0]);
- vst1q_u32(square_sum5[3] + x + 12, sq5[3].val[1]);
- vst1q_u32(square_sum5[4] + x + 8, sq5[4].val[0]);
- vst1q_u32(square_sum5[4] + x + 12, sq5[4].val[1]);
- s5[1][0] = vld1q_u16(sum5[0] + x + 8);
- s5[1][1] = vld1q_u16(sum5[1] + x + 8);
- s5[1][2] = vld1q_u16(sum5[2] + x + 8);
- sq5[0].val[0] = vld1q_u32(square_sum5[0] + x + 8);
- sq5[0].val[1] = vld1q_u32(square_sum5[0] + x + 12);
- sq5[1].val[0] = vld1q_u32(square_sum5[1] + x + 8);
- sq5[1].val[1] = vld1q_u32(square_sum5[1] + x + 12);
- sq5[2].val[0] = vld1q_u32(square_sum5[2] + x + 8);
- sq5[2].val[1] = vld1q_u32(square_sum5[2] + x + 12);
+ StoreAligned32U32(square_sum5[3] + x + 8, sq5[3]);
+ StoreAligned32U32(square_sum5[4] + x + 8, sq5[4]);
+ LoadAligned16x3U16(sum5, x + 8, s5[1]);
+ LoadAligned32x3U32(square_sum5, x + 8, sq5);
CalculateIntermediate5<0>(s5[1], sq5, scale, &ma[1], &b[1]);
}
LIBGAV1_ALWAYS_INLINE void BoxFilterPreProcess5LastRowLo(
- const uint8_t* const src, const uint32_t scale, uint8x16_t* const s,
- const uint16_t* const sum5[5], const uint32_t* const square_sum5[5],
- uint16x8_t sq[2], uint8x16_t* const ma, uint16x8_t* const b) {
+ uint8x16_t* const s, const uint32_t scale, const uint16_t* const sum5[5],
+ const uint32_t* const square_sum5[5], uint16x8_t sq[2],
+ uint8x16_t* const ma, uint16x8_t* const b) {
uint16x8_t s5[5];
uint32x4x2_t sq5[5];
- *s = vld1q_u8(src);
- sq[0] = vmull_u8(vget_low_u8(*s), vget_low_u8(*s));
- sq[1] = vmull_u8(vget_high_u8(*s), vget_high_u8(*s));
+ sq[0] = SquareLo8(s[0]);
+ sq[1] = SquareHi8(s[0]);
s5[3] = s5[4] = Sum5Horizontal(*s);
sq5[3] = sq5[4] = Sum5WHorizontal(sq);
- s5[0] = vld1q_u16(sum5[0]);
- s5[1] = vld1q_u16(sum5[1]);
- s5[2] = vld1q_u16(sum5[2]);
- sq5[0].val[0] = vld1q_u32(square_sum5[0] + 0);
- sq5[0].val[1] = vld1q_u32(square_sum5[0] + 4);
- sq5[1].val[0] = vld1q_u32(square_sum5[1] + 0);
- sq5[1].val[1] = vld1q_u32(square_sum5[1] + 4);
- sq5[2].val[0] = vld1q_u32(square_sum5[2] + 0);
- sq5[2].val[1] = vld1q_u32(square_sum5[2] + 4);
+ LoadAligned16x3U16(sum5, 0, s5);
+ LoadAligned32x3U32(square_sum5, 0, sq5);
CalculateIntermediate5<0>(s5, sq5, scale, ma, b);
}
LIBGAV1_ALWAYS_INLINE void BoxFilterPreProcess5LastRow(
- const uint8_t* const src, const ptrdiff_t x, const uint32_t scale,
- uint8x16_t s[2], const uint16_t* const sum5[5],
- const uint32_t* const square_sum5[5], uint16x8_t sq[3], uint8x16_t ma[2],
- uint16x8_t b[2]) {
+ uint8x16_t s[2], const ptrdiff_t x, const uint32_t scale,
+ const uint16_t* const sum5[5], const uint32_t* const square_sum5[5],
+ uint16x8_t sq[3], uint8x16_t ma[2], uint16x8_t b[2]) {
uint16x8_t s5[2][5];
uint32x4x2_t sq5[5];
- s[1] = vld1q_u8(src + x + 8);
- sq[1] = vmull_u8(vget_low_u8(s[1]), vget_low_u8(s[1]));
+ sq[1] = SquareLo8(s[1]);
Sum5Horizontal<8>(s, &s5[0][3], &s5[1][3]);
sq5[3] = sq5[4] = Sum5WHorizontal(sq);
- s5[0][0] = vld1q_u16(sum5[0] + x);
- s5[0][1] = vld1q_u16(sum5[1] + x);
- s5[0][2] = vld1q_u16(sum5[2] + x);
+ LoadAligned16x3U16(sum5, x, s5[0]);
s5[0][4] = s5[0][3];
- sq5[0].val[0] = vld1q_u32(square_sum5[0] + x + 0);
- sq5[0].val[1] = vld1q_u32(square_sum5[0] + x + 4);
- sq5[1].val[0] = vld1q_u32(square_sum5[1] + x + 0);
- sq5[1].val[1] = vld1q_u32(square_sum5[1] + x + 4);
- sq5[2].val[0] = vld1q_u32(square_sum5[2] + x + 0);
- sq5[2].val[1] = vld1q_u32(square_sum5[2] + x + 4);
+ LoadAligned32x3U32(square_sum5, x, sq5);
CalculateIntermediate5<8>(s5[0], sq5, scale, &ma[0], &b[0]);
- sq[2] = vmull_u8(vget_high_u8(s[1]), vget_high_u8(s[1]));
+ sq[2] = SquareHi8(s[1]);
sq5[3] = sq5[4] = Sum5WHorizontal(sq + 1);
- s5[1][0] = vld1q_u16(sum5[0] + x + 8);
- s5[1][1] = vld1q_u16(sum5[1] + x + 8);
- s5[1][2] = vld1q_u16(sum5[2] + x + 8);
+ LoadAligned16x3U16(sum5, x + 8, s5[1]);
s5[1][4] = s5[1][3];
- sq5[0].val[0] = vld1q_u32(square_sum5[0] + x + 8);
- sq5[0].val[1] = vld1q_u32(square_sum5[0] + x + 12);
- sq5[1].val[0] = vld1q_u32(square_sum5[1] + x + 8);
- sq5[1].val[1] = vld1q_u32(square_sum5[1] + x + 12);
- sq5[2].val[0] = vld1q_u32(square_sum5[2] + x + 8);
- sq5[2].val[1] = vld1q_u32(square_sum5[2] + x + 12);
+ LoadAligned32x3U32(square_sum5, x + 8, sq5);
CalculateIntermediate5<0>(s5[1], sq5, scale, &ma[1], &b[1]);
}
LIBGAV1_ALWAYS_INLINE void BoxFilterPreProcess3Lo(
- const uint8_t* const src, const uint32_t scale, uint8x16_t* const s,
- uint16_t* const sum3[3], uint32_t* const square_sum3[3], uint16x8_t sq[2],
- uint8x16_t* const ma, uint16x8_t* const b) {
+ uint8x16_t* const s, const uint32_t scale, uint16_t* const sum3[3],
+ uint32_t* const square_sum3[3], uint16x8_t sq[2], uint8x16_t* const ma,
+ uint16x8_t* const b) {
uint16x8_t s3[3];
uint32x4x2_t sq3[3];
- *s = vld1q_u8(src);
- sq[0] = vmull_u8(vget_low_u8(*s), vget_low_u8(*s));
- sq[1] = vmull_u8(vget_high_u8(*s), vget_high_u8(*s));
+ sq[0] = SquareLo8(*s);
+ sq[1] = SquareHi8(*s);
s3[2] = Sum3Horizontal(*s);
sq3[2] = Sum3WHorizontal(sq);
vst1q_u16(sum3[2], s3[2]);
- vst1q_u32(square_sum3[2] + 0, sq3[2].val[0]);
- vst1q_u32(square_sum3[2] + 4, sq3[2].val[1]);
- s3[0] = vld1q_u16(sum3[0]);
- s3[1] = vld1q_u16(sum3[1]);
- sq3[0].val[0] = vld1q_u32(square_sum3[0] + 0);
- sq3[0].val[1] = vld1q_u32(square_sum3[0] + 4);
- sq3[1].val[0] = vld1q_u32(square_sum3[1] + 0);
- sq3[1].val[1] = vld1q_u32(square_sum3[1] + 4);
+ StoreAligned32U32(square_sum3[2], sq3[2]);
+ LoadAligned16x2U16(sum3, 0, s3);
+ LoadAligned32x2U32(square_sum3, 0, sq3);
CalculateIntermediate3<0>(s3, sq3, scale, ma, b);
}
LIBGAV1_ALWAYS_INLINE void BoxFilterPreProcess3(
- const uint8_t* const src, const ptrdiff_t x, const uint32_t scale,
- uint16_t* const sum3[3], uint32_t* const square_sum3[3], uint8x16_t s[2],
- uint16x8_t sq[3], uint8x16_t ma[2], uint16x8_t b[2]) {
+ uint8x16_t s[2], const ptrdiff_t x, const uint32_t scale,
+ uint16_t* const sum3[3], uint32_t* const square_sum3[3], uint16x8_t sq[3],
+ uint8x16_t ma[2], uint16x8_t b[2]) {
uint16x8_t s3[4];
uint32x4x2_t sq3[3];
- s[1] = vld1q_u8(src + x + 8);
- sq[1] = vmull_u8(vget_low_u8(s[1]), vget_low_u8(s[1]));
+ sq[1] = SquareLo8(s[1]);
Sum3Horizontal<8>(s, s3 + 2);
sq3[2] = Sum3WHorizontal(sq);
vst1q_u16(sum3[2] + x, s3[2]);
- vst1q_u32(square_sum3[2] + x + 0, sq3[2].val[0]);
- vst1q_u32(square_sum3[2] + x + 4, sq3[2].val[1]);
- s3[0] = vld1q_u16(sum3[0] + x);
- s3[1] = vld1q_u16(sum3[1] + x);
- sq3[0].val[0] = vld1q_u32(square_sum3[0] + x + 0);
- sq3[0].val[1] = vld1q_u32(square_sum3[0] + x + 4);
- sq3[1].val[0] = vld1q_u32(square_sum3[1] + x + 0);
- sq3[1].val[1] = vld1q_u32(square_sum3[1] + x + 4);
+ StoreAligned32U32(square_sum3[2] + x, sq3[2]);
+ LoadAligned16x2U16(sum3, x, s3);
+ LoadAligned32x2U32(square_sum3, x, sq3);
CalculateIntermediate3<8>(s3, sq3, scale, &ma[0], &b[0]);
- sq[2] = vmull_u8(vget_high_u8(s[1]), vget_high_u8(s[1]));
+ sq[2] = SquareHi8(s[1]);
sq3[2] = Sum3WHorizontal(sq + 1);
vst1q_u16(sum3[2] + x + 8, s3[3]);
- vst1q_u32(square_sum3[2] + x + 8, sq3[2].val[0]);
- vst1q_u32(square_sum3[2] + x + 12, sq3[2].val[1]);
- s3[1] = vld1q_u16(sum3[0] + x + 8);
- s3[2] = vld1q_u16(sum3[1] + x + 8);
- sq3[0].val[0] = vld1q_u32(square_sum3[0] + x + 8);
- sq3[0].val[1] = vld1q_u32(square_sum3[0] + x + 12);
- sq3[1].val[0] = vld1q_u32(square_sum3[1] + x + 8);
- sq3[1].val[1] = vld1q_u32(square_sum3[1] + x + 12);
+ StoreAligned32U32(square_sum3[2] + x + 8, sq3[2]);
+ LoadAligned16x2U16(sum3, x + 8, s3 + 1);
+ LoadAligned32x2U32(square_sum3, x + 8, sq3);
CalculateIntermediate3<0>(s3 + 1, sq3, scale, &ma[1], &b[1]);
}
LIBGAV1_ALWAYS_INLINE void BoxFilterPreProcessLo(
- const uint8_t* const src0, const uint8_t* const src1,
- const uint16_t scales[2], uint8x16_t s[2][2], uint16_t* const sum3[4],
+ uint8x16_t s[2][2], const uint16_t scales[2], uint16_t* const sum3[4],
uint16_t* const sum5[5], uint32_t* const square_sum3[4],
uint32_t* const square_sum5[5], uint16x8_t sq[2][4], uint8x16_t ma3[2][2],
uint16x8_t b3[2][3], uint8x16_t* const ma5, uint16x8_t* const b5) {
uint16x8_t s3[4], s5[5];
uint32x4x2_t sq3[4], sq5[5];
- s[0][0] = vld1q_u8(src0);
- s[1][0] = vld1q_u8(src1);
- sq[0][0] = vmull_u8(vget_low_u8(s[0][0]), vget_low_u8(s[0][0]));
- sq[1][0] = vmull_u8(vget_low_u8(s[1][0]), vget_low_u8(s[1][0]));
- sq[0][1] = vmull_u8(vget_high_u8(s[0][0]), vget_high_u8(s[0][0]));
- sq[1][1] = vmull_u8(vget_high_u8(s[1][0]), vget_high_u8(s[1][0]));
+ sq[0][0] = SquareLo8(s[0][0]);
+ sq[1][0] = SquareLo8(s[1][0]);
+ sq[0][1] = SquareHi8(s[0][0]);
+ sq[1][1] = SquareHi8(s[1][0]);
SumHorizontal(s[0][0], sq[0], &s3[2], &s5[3], &sq3[2], &sq5[3]);
SumHorizontal(s[1][0], sq[1], &s3[3], &s5[4], &sq3[3], &sq5[4]);
vst1q_u16(sum3[2], s3[2]);
vst1q_u16(sum3[3], s3[3]);
- vst1q_u32(square_sum3[2] + 0, sq3[2].val[0]);
- vst1q_u32(square_sum3[2] + 4, sq3[2].val[1]);
- vst1q_u32(square_sum3[3] + 0, sq3[3].val[0]);
- vst1q_u32(square_sum3[3] + 4, sq3[3].val[1]);
+ StoreAligned32U32(square_sum3[2], sq3[2]);
+ StoreAligned32U32(square_sum3[3], sq3[3]);
vst1q_u16(sum5[3], s5[3]);
vst1q_u16(sum5[4], s5[4]);
- vst1q_u32(square_sum5[3] + 0, sq5[3].val[0]);
- vst1q_u32(square_sum5[3] + 4, sq5[3].val[1]);
- vst1q_u32(square_sum5[4] + 0, sq5[4].val[0]);
- vst1q_u32(square_sum5[4] + 4, sq5[4].val[1]);
- s3[0] = vld1q_u16(sum3[0]);
- s3[1] = vld1q_u16(sum3[1]);
- sq3[0].val[0] = vld1q_u32(square_sum3[0] + 0);
- sq3[0].val[1] = vld1q_u32(square_sum3[0] + 4);
- sq3[1].val[0] = vld1q_u32(square_sum3[1] + 0);
- sq3[1].val[1] = vld1q_u32(square_sum3[1] + 4);
- s5[0] = vld1q_u16(sum5[0]);
- s5[1] = vld1q_u16(sum5[1]);
- s5[2] = vld1q_u16(sum5[2]);
- sq5[0].val[0] = vld1q_u32(square_sum5[0] + 0);
- sq5[0].val[1] = vld1q_u32(square_sum5[0] + 4);
- sq5[1].val[0] = vld1q_u32(square_sum5[1] + 0);
- sq5[1].val[1] = vld1q_u32(square_sum5[1] + 4);
- sq5[2].val[0] = vld1q_u32(square_sum5[2] + 0);
- sq5[2].val[1] = vld1q_u32(square_sum5[2] + 4);
+ StoreAligned32U32(square_sum5[3], sq5[3]);
+ StoreAligned32U32(square_sum5[4], sq5[4]);
+ LoadAligned16x2U16(sum3, 0, s3);
+ LoadAligned32x2U32(square_sum3, 0, sq3);
+ LoadAligned16x3U16(sum5, 0, s5);
+ LoadAligned32x3U32(square_sum5, 0, sq5);
CalculateIntermediate3<0>(s3, sq3, scales[1], ma3[0], b3[0]);
CalculateIntermediate3<0>(s3 + 1, sq3 + 1, scales[1], ma3[1], b3[1]);
CalculateIntermediate5<0>(s5, sq5, scales[0], ma5, b5);
}
LIBGAV1_ALWAYS_INLINE void BoxFilterPreProcess(
- const uint8_t* const src0, const uint8_t* const src1, const ptrdiff_t x,
- const uint16_t scales[2], uint8x16_t s[2][2], uint16_t* const sum3[4],
- uint16_t* const sum5[5], uint32_t* const square_sum3[4],
- uint32_t* const square_sum5[5], uint16x8_t sq[2][4], uint8x16_t ma3[2][2],
- uint16x8_t b3[2][3], uint8x16_t ma5[2], uint16x8_t b5[2]) {
+ const uint8x16_t s[2][2], const ptrdiff_t x, const uint16_t scales[2],
+ uint16_t* const sum3[4], uint16_t* const sum5[5],
+ uint32_t* const square_sum3[4], uint32_t* const square_sum5[5],
+ uint16x8_t sq[2][4], uint8x16_t ma3[2][2], uint16x8_t b3[2][3],
+ uint8x16_t ma5[2], uint16x8_t b5[2]) {
uint16x8_t s3[2][4], s5[2][5];
uint32x4x2_t sq3[4], sq5[5];
- s[0][1] = vld1q_u8(src0 + x + 8);
- s[1][1] = vld1q_u8(src1 + x + 8);
- sq[0][2] = vmull_u8(vget_low_u8(s[0][1]), vget_low_u8(s[0][1]));
- sq[1][2] = vmull_u8(vget_low_u8(s[1][1]), vget_low_u8(s[1][1]));
+ sq[0][2] = SquareLo8(s[0][1]);
+ sq[1][2] = SquareLo8(s[1][1]);
SumHorizontal<8>(s[0], &s3[0][2], &s3[1][2], &s5[0][3], &s5[1][3]);
SumHorizontal<8>(s[1], &s3[0][3], &s3[1][3], &s5[0][4], &s5[1][4]);
SumHorizontal(sq[0] + 1, &sq3[2], &sq5[3]);
SumHorizontal(sq[1] + 1, &sq3[3], &sq5[4]);
vst1q_u16(sum3[2] + x, s3[0][2]);
vst1q_u16(sum3[3] + x, s3[0][3]);
- vst1q_u32(square_sum3[2] + x + 0, sq3[2].val[0]);
- vst1q_u32(square_sum3[2] + x + 4, sq3[2].val[1]);
- vst1q_u32(square_sum3[3] + x + 0, sq3[3].val[0]);
- vst1q_u32(square_sum3[3] + x + 4, sq3[3].val[1]);
+ StoreAligned32U32(square_sum3[2] + x, sq3[2]);
+ StoreAligned32U32(square_sum3[3] + x, sq3[3]);
vst1q_u16(sum5[3] + x, s5[0][3]);
vst1q_u16(sum5[4] + x, s5[0][4]);
- vst1q_u32(square_sum5[3] + x + 0, sq5[3].val[0]);
- vst1q_u32(square_sum5[3] + x + 4, sq5[3].val[1]);
- vst1q_u32(square_sum5[4] + x + 0, sq5[4].val[0]);
- vst1q_u32(square_sum5[4] + x + 4, sq5[4].val[1]);
- s3[0][0] = vld1q_u16(sum3[0] + x);
- s3[0][1] = vld1q_u16(sum3[1] + x);
- sq3[0].val[0] = vld1q_u32(square_sum3[0] + x + 0);
- sq3[0].val[1] = vld1q_u32(square_sum3[0] + x + 4);
- sq3[1].val[0] = vld1q_u32(square_sum3[1] + x + 0);
- sq3[1].val[1] = vld1q_u32(square_sum3[1] + x + 4);
- s5[0][0] = vld1q_u16(sum5[0] + x);
- s5[0][1] = vld1q_u16(sum5[1] + x);
- s5[0][2] = vld1q_u16(sum5[2] + x);
- sq5[0].val[0] = vld1q_u32(square_sum5[0] + x + 0);
- sq5[0].val[1] = vld1q_u32(square_sum5[0] + x + 4);
- sq5[1].val[0] = vld1q_u32(square_sum5[1] + x + 0);
- sq5[1].val[1] = vld1q_u32(square_sum5[1] + x + 4);
- sq5[2].val[0] = vld1q_u32(square_sum5[2] + x + 0);
- sq5[2].val[1] = vld1q_u32(square_sum5[2] + x + 4);
+ StoreAligned32U32(square_sum5[3] + x, sq5[3]);
+ StoreAligned32U32(square_sum5[4] + x, sq5[4]);
+ LoadAligned16x2U16(sum3, x, s3[0]);
+ LoadAligned32x2U32(square_sum3, x, sq3);
+ LoadAligned16x3U16(sum5, x, s5[0]);
+ LoadAligned32x3U32(square_sum5, x, sq5);
CalculateIntermediate3<8>(s3[0], sq3, scales[1], &ma3[0][0], &b3[0][1]);
CalculateIntermediate3<8>(s3[0] + 1, sq3 + 1, scales[1], &ma3[1][0],
&b3[1][1]);
CalculateIntermediate5<8>(s5[0], sq5, scales[0], &ma5[0], &b5[0]);
- sq[0][3] = vmull_u8(vget_high_u8(s[0][1]), vget_high_u8(s[0][1]));
- sq[1][3] = vmull_u8(vget_high_u8(s[1][1]), vget_high_u8(s[1][1]));
+ sq[0][3] = SquareHi8(s[0][1]);
+ sq[1][3] = SquareHi8(s[1][1]);
SumHorizontal(sq[0] + 2, &sq3[2], &sq5[3]);
SumHorizontal(sq[1] + 2, &sq3[3], &sq5[4]);
vst1q_u16(sum3[2] + x + 8, s3[1][2]);
vst1q_u16(sum3[3] + x + 8, s3[1][3]);
- vst1q_u32(square_sum3[2] + x + 8, sq3[2].val[0]);
- vst1q_u32(square_sum3[2] + x + 12, sq3[2].val[1]);
- vst1q_u32(square_sum3[3] + x + 8, sq3[3].val[0]);
- vst1q_u32(square_sum3[3] + x + 12, sq3[3].val[1]);
+ StoreAligned32U32(square_sum3[2] + x + 8, sq3[2]);
+ StoreAligned32U32(square_sum3[3] + x + 8, sq3[3]);
vst1q_u16(sum5[3] + x + 8, s5[1][3]);
vst1q_u16(sum5[4] + x + 8, s5[1][4]);
- vst1q_u32(square_sum5[3] + x + 8, sq5[3].val[0]);
- vst1q_u32(square_sum5[3] + x + 12, sq5[3].val[1]);
- vst1q_u32(square_sum5[4] + x + 8, sq5[4].val[0]);
- vst1q_u32(square_sum5[4] + x + 12, sq5[4].val[1]);
- s3[1][0] = vld1q_u16(sum3[0] + x + 8);
- s3[1][1] = vld1q_u16(sum3[1] + x + 8);
- sq3[0].val[0] = vld1q_u32(square_sum3[0] + x + 8);
- sq3[0].val[1] = vld1q_u32(square_sum3[0] + x + 12);
- sq3[1].val[0] = vld1q_u32(square_sum3[1] + x + 8);
- sq3[1].val[1] = vld1q_u32(square_sum3[1] + x + 12);
- s5[1][0] = vld1q_u16(sum5[0] + x + 8);
- s5[1][1] = vld1q_u16(sum5[1] + x + 8);
- s5[1][2] = vld1q_u16(sum5[2] + x + 8);
- sq5[0].val[0] = vld1q_u32(square_sum5[0] + x + 8);
- sq5[0].val[1] = vld1q_u32(square_sum5[0] + x + 12);
- sq5[1].val[0] = vld1q_u32(square_sum5[1] + x + 8);
- sq5[1].val[1] = vld1q_u32(square_sum5[1] + x + 12);
- sq5[2].val[0] = vld1q_u32(square_sum5[2] + x + 8);
- sq5[2].val[1] = vld1q_u32(square_sum5[2] + x + 12);
+ StoreAligned32U32(square_sum5[3] + x + 8, sq5[3]);
+ StoreAligned32U32(square_sum5[4] + x + 8, sq5[4]);
+ LoadAligned16x2U16(sum3, x + 8, s3[1]);
+ LoadAligned32x2U32(square_sum3, x + 8, sq3);
+ LoadAligned16x3U16(sum5, x + 8, s5[1]);
+ LoadAligned32x3U32(square_sum5, x + 8, sq5);
CalculateIntermediate3<0>(s3[1], sq3, scales[1], &ma3[0][1], &b3[0][2]);
CalculateIntermediate3<0>(s3[1] + 1, sq3 + 1, scales[1], &ma3[1][1],
&b3[1][2]);
@@ -1483,90 +1443,55 @@ LIBGAV1_ALWAYS_INLINE void BoxFilterPreProcess(
}
LIBGAV1_ALWAYS_INLINE void BoxFilterPreProcessLastRowLo(
- const uint8_t* const src, const uint16_t scales[2],
+ uint8x16_t* const s, const uint16_t scales[2],
const uint16_t* const sum3[4], const uint16_t* const sum5[5],
const uint32_t* const square_sum3[4], const uint32_t* const square_sum5[5],
- uint8x16_t* const s, uint16x8_t sq[2], uint8x16_t* const ma3,
- uint8x16_t* const ma5, uint16x8_t* const b3, uint16x8_t* const b5) {
+ uint16x8_t sq[2], uint8x16_t* const ma3, uint8x16_t* const ma5,
+ uint16x8_t* const b3, uint16x8_t* const b5) {
uint16x8_t s3[3], s5[5];
uint32x4x2_t sq3[3], sq5[5];
- *s = vld1q_u8(src);
- sq[0] = vmull_u8(vget_low_u8(*s), vget_low_u8(*s));
- sq[1] = vmull_u8(vget_high_u8(*s), vget_high_u8(*s));
+ sq[0] = SquareLo8(s[0]);
+ sq[1] = SquareHi8(s[0]);
SumHorizontal(*s, sq, &s3[2], &s5[3], &sq3[2], &sq5[3]);
- s5[0] = vld1q_u16(sum5[0]);
- s5[1] = vld1q_u16(sum5[1]);
- s5[2] = vld1q_u16(sum5[2]);
+ LoadAligned16x3U16(sum5, 0, s5);
s5[4] = s5[3];
- sq5[0].val[0] = vld1q_u32(square_sum5[0] + 0);
- sq5[0].val[1] = vld1q_u32(square_sum5[0] + 4);
- sq5[1].val[0] = vld1q_u32(square_sum5[1] + 0);
- sq5[1].val[1] = vld1q_u32(square_sum5[1] + 4);
- sq5[2].val[0] = vld1q_u32(square_sum5[2] + 0);
- sq5[2].val[1] = vld1q_u32(square_sum5[2] + 4);
+ LoadAligned32x3U32(square_sum5, 0, sq5);
sq5[4] = sq5[3];
CalculateIntermediate5<0>(s5, sq5, scales[0], ma5, b5);
- s3[0] = vld1q_u16(sum3[0]);
- s3[1] = vld1q_u16(sum3[1]);
- sq3[0].val[0] = vld1q_u32(square_sum3[0] + 0);
- sq3[0].val[1] = vld1q_u32(square_sum3[0] + 4);
- sq3[1].val[0] = vld1q_u32(square_sum3[1] + 0);
- sq3[1].val[1] = vld1q_u32(square_sum3[1] + 4);
+ LoadAligned16x2U16(sum3, 0, s3);
+ LoadAligned32x2U32(square_sum3, 0, sq3);
CalculateIntermediate3<0>(s3, sq3, scales[1], ma3, b3);
}
LIBGAV1_ALWAYS_INLINE void BoxFilterPreProcessLastRow(
- const uint8_t* const src, const ptrdiff_t x, const uint16_t scales[2],
+ uint8x16_t s[2], const ptrdiff_t x, const uint16_t scales[2],
const uint16_t* const sum3[4], const uint16_t* const sum5[5],
const uint32_t* const square_sum3[4], const uint32_t* const square_sum5[5],
- uint8x16_t s[2], uint16x8_t sq[3], uint8x16_t ma3[2], uint8x16_t ma5[2],
- uint16x8_t b3[2], uint16x8_t b5[2]) {
+ uint16x8_t sq[3], uint8x16_t ma3[2], uint8x16_t ma5[2], uint16x8_t b3[2],
+ uint16x8_t b5[2]) {
uint16x8_t s3[2][3], s5[2][5];
uint32x4x2_t sq3[3], sq5[5];
- s[1] = vld1q_u8(src + x + 8);
- sq[1] = vmull_u8(vget_low_u8(s[1]), vget_low_u8(s[1]));
+ sq[1] = SquareLo8(s[1]);
SumHorizontal<8>(s, &s3[0][2], &s3[1][2], &s5[0][3], &s5[1][3]);
SumHorizontal(sq, &sq3[2], &sq5[3]);
- s5[0][0] = vld1q_u16(sum5[0] + x);
- s5[0][1] = vld1q_u16(sum5[1] + x);
- s5[0][2] = vld1q_u16(sum5[2] + x);
+ LoadAligned16x3U16(sum5, x, s5[0]);
s5[0][4] = s5[0][3];
- sq5[0].val[0] = vld1q_u32(square_sum5[0] + x + 0);
- sq5[0].val[1] = vld1q_u32(square_sum5[0] + x + 4);
- sq5[1].val[0] = vld1q_u32(square_sum5[1] + x + 0);
- sq5[1].val[1] = vld1q_u32(square_sum5[1] + x + 4);
- sq5[2].val[0] = vld1q_u32(square_sum5[2] + x + 0);
- sq5[2].val[1] = vld1q_u32(square_sum5[2] + x + 4);
+ LoadAligned32x3U32(square_sum5, x, sq5);
sq5[4] = sq5[3];
CalculateIntermediate5<8>(s5[0], sq5, scales[0], &ma5[0], &b5[0]);
- s3[0][0] = vld1q_u16(sum3[0] + x);
- s3[0][1] = vld1q_u16(sum3[1] + x);
- sq3[0].val[0] = vld1q_u32(square_sum3[0] + x + 0);
- sq3[0].val[1] = vld1q_u32(square_sum3[0] + x + 4);
- sq3[1].val[0] = vld1q_u32(square_sum3[1] + x + 0);
- sq3[1].val[1] = vld1q_u32(square_sum3[1] + x + 4);
+ LoadAligned16x2U16(sum3, x, s3[0]);
+ LoadAligned32x2U32(square_sum3, x, sq3);
CalculateIntermediate3<8>(s3[0], sq3, scales[1], &ma3[0], &b3[0]);
- sq[2] = vmull_u8(vget_high_u8(s[1]), vget_high_u8(s[1]));
+ sq[2] = SquareHi8(s[1]);
SumHorizontal(sq + 1, &sq3[2], &sq5[3]);
- s5[1][0] = vld1q_u16(sum5[0] + x + 8);
- s5[1][1] = vld1q_u16(sum5[1] + x + 8);
- s5[1][2] = vld1q_u16(sum5[2] + x + 8);
+ LoadAligned16x3U16(sum5, x + 8, s5[1]);
s5[1][4] = s5[1][3];
- sq5[0].val[0] = vld1q_u32(square_sum5[0] + x + 8);
- sq5[0].val[1] = vld1q_u32(square_sum5[0] + x + 12);
- sq5[1].val[0] = vld1q_u32(square_sum5[1] + x + 8);
- sq5[1].val[1] = vld1q_u32(square_sum5[1] + x + 12);
- sq5[2].val[0] = vld1q_u32(square_sum5[2] + x + 8);
- sq5[2].val[1] = vld1q_u32(square_sum5[2] + x + 12);
+ LoadAligned32x3U32(square_sum5, x + 8, sq5);
sq5[4] = sq5[3];
CalculateIntermediate5<0>(s5[1], sq5, scales[0], &ma5[1], &b5[1]);
- s3[1][0] = vld1q_u16(sum3[0] + x + 8);
- s3[1][1] = vld1q_u16(sum3[1] + x + 8);
- sq3[0].val[0] = vld1q_u32(square_sum3[0] + x + 8);
- sq3[0].val[1] = vld1q_u32(square_sum3[0] + x + 12);
- sq3[1].val[0] = vld1q_u32(square_sum3[1] + x + 8);
- sq3[1].val[1] = vld1q_u32(square_sum3[1] + x + 12);
+ LoadAligned16x2U16(sum3, x + 8, s3[1]);
+ LoadAligned32x2U32(square_sum3, x + 8, sq3);
CalculateIntermediate3<0>(s3[1], sq3, scales[1], &ma3[1], &b3[1]);
}
@@ -1576,18 +1501,23 @@ inline void BoxSumFilterPreProcess5(const uint8_t* const src0,
uint16_t* const sum5[5],
uint32_t* const square_sum5[5],
uint16_t* ma565, uint32_t* b565) {
+ const ptrdiff_t overread_in_bytes = kWideOverreadInBytesPass1 - width;
uint8x16_t s[2][2], mas[2];
uint16x8_t sq[2][4], bs[3];
- BoxFilterPreProcess5Lo(src0, src1, scale, s, sum5, square_sum5, sq, &mas[0],
- &bs[0]);
+ // TODO(b/194217060): Future msan load.
+ s[0][0] = vld1q_u8(src0);
+ s[1][0] = vld1q_u8(src1);
+
+ BoxFilterPreProcess5Lo(s, scale, sum5, square_sum5, sq, &mas[0], &bs[0]);
int x = 0;
do {
uint16x8_t ma[2];
uint8x16_t masx[3];
uint32x4x2_t b[2];
- BoxFilterPreProcess5(src0, src1, x + 8, scale, s, sum5, square_sum5, sq,
- mas, bs + 1);
+ s[0][1] = Load1QMsanU8(src0 + x + 16, x + 16 + overread_in_bytes);
+ s[1][1] = Load1QMsanU8(src1 + x + 16, x + 16 + overread_in_bytes);
+ BoxFilterPreProcess5(s, x + 8, scale, sum5, square_sum5, sq, mas, bs + 1);
Prepare3_8<0>(mas, masx);
ma[0] = Sum565<0>(masx);
b[0] = Sum565W(bs);
@@ -1617,15 +1547,17 @@ LIBGAV1_ALWAYS_INLINE void BoxSumFilterPreProcess3(
const uint8_t* const src, const int width, const uint32_t scale,
uint16_t* const sum3[3], uint32_t* const square_sum3[3], uint16_t* ma343,
uint16_t* ma444, uint32_t* b343, uint32_t* b444) {
+ const ptrdiff_t overread_in_bytes = kWideOverreadInBytesPass2 - width;
uint8x16_t s[2], mas[2];
uint16x8_t sq[4], bs[3];
- BoxFilterPreProcess3Lo(src, scale, &s[0], sum3, square_sum3, sq, &mas[0],
- &bs[0]);
+ s[0] = Load1QMsanU8(src, overread_in_bytes);
+ BoxFilterPreProcess3Lo(&s[0], scale, sum3, square_sum3, sq, &mas[0], &bs[0]);
int x = 0;
do {
uint8x16_t ma3x[3];
- BoxFilterPreProcess3(src, x + 8, scale, sum3, square_sum3, s, sq + 1, mas,
+ s[1] = Load1QMsanU8(src + x + 16, x + 16 + overread_in_bytes);
+ BoxFilterPreProcess3(s, x + 8, scale, sum3, square_sum3, sq + 1, mas,
bs + 1);
Prepare3_8<0>(mas, ma3x);
if (calculate444) {
@@ -1664,43 +1596,43 @@ inline void BoxSumFilterPreProcess(
uint32_t* const square_sum3[4], uint32_t* const square_sum5[5],
uint16_t* const ma343[4], uint16_t* const ma444, uint16_t* ma565,
uint32_t* const b343[4], uint32_t* const b444, uint32_t* b565) {
+ const ptrdiff_t overread_in_bytes = kWideOverreadInBytesPass1 - width;
uint8x16_t s[2][2], ma3[2][2], ma5[2];
uint16x8_t sq[2][4], b3[2][3], b5[3];
- BoxFilterPreProcessLo(src0, src1, scales, s, sum3, sum5, square_sum3,
- square_sum5, sq, ma3, b3, &ma5[0], &b5[0]);
+ // TODO(b/194217060): Future msan load.
+ s[0][0] = vld1q_u8(src0);
+ s[1][0] = vld1q_u8(src1);
+
+ BoxFilterPreProcessLo(s, scales, sum3, sum5, square_sum3, square_sum5, sq,
+ ma3, b3, &ma5[0], &b5[0]);
int x = 0;
do {
uint16x8_t ma[2];
uint8x16_t ma3x[3], ma5x[3];
uint32x4x2_t b[2];
- BoxFilterPreProcess(src0, src1, x + 8, scales, s, sum3, sum5, square_sum3,
- square_sum5, sq, ma3, b3, ma5, b5 + 1);
+
+ s[0][1] = Load1QMsanU8(src0 + x + 16, x + 16 + overread_in_bytes);
+ s[1][1] = Load1QMsanU8(src1 + x + 16, x + 16 + overread_in_bytes);
+ BoxFilterPreProcess(s, x + 8, scales, sum3, sum5, square_sum3, square_sum5,
+ sq, ma3, b3, ma5, b5 + 1);
Prepare3_8<0>(ma3[0], ma3x);
ma[0] = Sum343<0>(ma3x);
ma[1] = Sum343<8>(ma3x);
+ StoreAligned32U16(ma343[0] + x, ma);
b[0] = Sum343W(b3[0] + 0);
b[1] = Sum343W(b3[0] + 1);
- vst1q_u16(ma343[0] + x, ma[0]);
- vst1q_u16(ma343[0] + x + 8, ma[1]);
- vst1q_u32(b343[0] + x, b[0].val[0]);
- vst1q_u32(b343[0] + x + 4, b[0].val[1]);
- vst1q_u32(b343[0] + x + 8, b[1].val[0]);
- vst1q_u32(b343[0] + x + 12, b[1].val[1]);
+ StoreAligned64U32(b343[0] + x, b);
Prepare3_8<0>(ma3[1], ma3x);
Store343_444<0>(ma3x, b3[1], x, ma343[1], ma444, b343[1], b444);
Store343_444<8>(ma3x, b3[1] + 1, x + 8, ma343[1], ma444, b343[1], b444);
Prepare3_8<0>(ma5, ma5x);
ma[0] = Sum565<0>(ma5x);
ma[1] = Sum565<8>(ma5x);
+ StoreAligned32U16(ma565, ma);
b[0] = Sum565W(b5);
b[1] = Sum565W(b5 + 1);
- vst1q_u16(ma565, ma[0]);
- vst1q_u16(ma565 + 8, ma[1]);
- vst1q_u32(b565 + 0, b[0].val[0]);
- vst1q_u32(b565 + 4, b[0].val[1]);
- vst1q_u32(b565 + 8, b[1].val[0]);
- vst1q_u32(b565 + 12, b[1].val[1]);
+ StoreAligned64U32(b565, b);
s[0][0] = s[0][1];
s[1][0] = s[1][1];
sq[0][1] = sq[0][3];
@@ -1799,10 +1731,13 @@ LIBGAV1_ALWAYS_INLINE void BoxFilterPass1(
uint32_t* const square_sum5[5], const int width, const uint32_t scale,
const int16_t w0, uint16_t* const ma565[2], uint32_t* const b565[2],
uint8_t* const dst) {
+ const ptrdiff_t overread_in_bytes = kWideOverreadInBytesPass1 - width;
uint8x16_t s[2][2], mas[2];
uint16x8_t sq[2][4], bs[3];
- BoxFilterPreProcess5Lo(src0, src1, scale, s, sum5, square_sum5, sq, &mas[0],
- &bs[0]);
+ s[0][0] = Load1QMsanU8(src0, overread_in_bytes);
+ s[1][0] = Load1QMsanU8(src1, overread_in_bytes);
+
+ BoxFilterPreProcess5Lo(s, scale, sum5, square_sum5, sq, &mas[0], &bs[0]);
int x = 0;
do {
@@ -1810,8 +1745,9 @@ LIBGAV1_ALWAYS_INLINE void BoxFilterPass1(
uint8x16_t masx[3];
uint32x4x2_t b[2];
int16x8_t p0, p1;
- BoxFilterPreProcess5(src0, src1, x + 8, scale, s, sum5, square_sum5, sq,
- mas, bs + 1);
+ s[0][1] = Load1QMsanU8(src0 + x + 16, x + 16 + overread_in_bytes);
+ s[1][1] = Load1QMsanU8(src1 + x + 16, x + 16 + overread_in_bytes);
+ BoxFilterPreProcess5(s, x + 8, scale, sum5, square_sum5, sq, mas, bs + 1);
Prepare3_8<0>(mas, masx);
ma[1] = Sum565<0>(masx);
b[1] = Sum565W(bs);
@@ -1865,7 +1801,10 @@ inline void BoxFilterPass1LastRow(const uint8_t* const src,
uint8_t* const dst) {
uint8x16_t s[2], mas[2];
uint16x8_t sq[4], bs[4];
- BoxFilterPreProcess5LastRowLo(src0, scale, s, sum5, square_sum5, sq, &mas[0],
+ // TODO(b/194217060): Future msan load.
+ s[0] = vld1q_u8(src0);
+
+ BoxFilterPreProcess5LastRowLo(s, scale, sum5, square_sum5, sq, &mas[0],
&bs[0]);
int x = 0;
@@ -1873,8 +1812,11 @@ inline void BoxFilterPass1LastRow(const uint8_t* const src,
uint16x8_t ma[2];
uint8x16_t masx[3];
uint32x4x2_t b[2];
- BoxFilterPreProcess5LastRow(src0, x + 8, scale, s, sum5, square_sum5,
- sq + 1, mas, bs + 1);
+ // TODO(b/194217060): Future msan load.
+ s[1] = vld1q_u8(src0 + x + 16);
+
+ BoxFilterPreProcess5LastRow(s, x + 8, scale, sum5, square_sum5, sq + 1, mas,
+ bs + 1);
Prepare3_8<0>(mas, masx);
ma[1] = Sum565<0>(masx);
b[1] = Sum565W(bs);
@@ -1911,17 +1853,21 @@ LIBGAV1_ALWAYS_INLINE void BoxFilterPass2(
uint32_t* const square_sum3[3], uint16_t* const ma343[3],
uint16_t* const ma444[2], uint32_t* const b343[3], uint32_t* const b444[2],
uint8_t* const dst) {
+ const ptrdiff_t overread_in_bytes = kWideOverreadInBytesPass2 - width;
uint8x16_t s[2], mas[2];
uint16x8_t sq[4], bs[3];
- BoxFilterPreProcess3Lo(src0, scale, &s[0], sum3, square_sum3, sq, &mas[0],
- &bs[0]);
+ // TODO(b/194217060): Future msan load.
+ s[0] = vld1q_u8(src0);
+
+ BoxFilterPreProcess3Lo(&s[0], scale, sum3, square_sum3, sq, &mas[0], &bs[0]);
int x = 0;
do {
uint16x8_t ma[3];
uint8x16_t ma3x[3];
uint32x4x2_t b[3];
- BoxFilterPreProcess3(src0, x + 8, scale, sum3, square_sum3, s, sq + 1, mas,
+ s[1] = Load1QMsanU8(src0 + x + 16, x + 16 + overread_in_bytes);
+ BoxFilterPreProcess3(s, x + 8, scale, sum3, square_sum3, sq + 1, mas,
bs + 1);
Prepare3_8<0>(mas, ma3x);
Store343_444<0>(ma3x, bs, x, &ma[2], &b[2], ma343[2], ma444[1], b343[2],
@@ -1966,10 +1912,15 @@ LIBGAV1_ALWAYS_INLINE void BoxFilter(
uint16_t* const ma343[4], uint16_t* const ma444[3],
uint16_t* const ma565[2], uint32_t* const b343[4], uint32_t* const b444[3],
uint32_t* const b565[2], uint8_t* const dst) {
+ const ptrdiff_t overread_in_bytes = kWideOverreadInBytesPass1 - width;
uint8x16_t s[2][2], ma3[2][2], ma5[2];
uint16x8_t sq[2][4], b3[2][3], b5[3];
- BoxFilterPreProcessLo(src0, src1, scales, s, sum3, sum5, square_sum3,
- square_sum5, sq, ma3, b3, &ma5[0], &b5[0]);
+ // TODO(b/194217060): Future msan load.
+ s[0][0] = vld1q_u8(src0);
+ s[1][0] = vld1q_u8(src1);
+
+ BoxFilterPreProcessLo(s, scales, sum3, sum5, square_sum3, square_sum5, sq,
+ ma3, b3, &ma5[0], &b5[0]);
int x = 0;
do {
@@ -1977,8 +1928,10 @@ LIBGAV1_ALWAYS_INLINE void BoxFilter(
uint8x16_t ma3x[2][3], ma5x[3];
uint32x4x2_t b[3][3];
int16x8_t p[2][2];
- BoxFilterPreProcess(src0, src1, x + 8, scales, s, sum3, sum5, square_sum3,
- square_sum5, sq, ma3, b3, ma5, b5 + 1);
+ s[0][1] = Load1QMsanU8(src0 + x + 16, x + 16 + overread_in_bytes);
+ s[1][1] = Load1QMsanU8(src1 + x + 16, x + 16 + overread_in_bytes);
+ BoxFilterPreProcess(s, x + 8, scales, sum3, sum5, square_sum3, square_sum5,
+ sq, ma3, b3, ma5, b5 + 1);
Prepare3_8<0>(ma3[0], ma3x[0]);
Prepare3_8<0>(ma3[1], ma3x[1]);
Store343_444<0>(ma3x[0], b3[0], x, &ma[1][2], &ma[2][1], &b[1][2], &b[2][1],
@@ -2070,17 +2023,21 @@ inline void BoxFilterLastRow(
uint8x16_t s[2], ma3[2], ma5[2];
uint16x8_t sq[4], ma[3], b3[3], b5[3];
uint32x4x2_t b[3];
- BoxFilterPreProcessLastRowLo(src0, scales, sum3, sum5, square_sum3,
- square_sum5, &s[0], sq, &ma3[0], &ma5[0], &b3[0],
- &b5[0]);
+ // TODO(b/194217060): Future msan load.
+ s[0] = vld1q_u8(src0);
+
+ BoxFilterPreProcessLastRowLo(s, scales, sum3, sum5, square_sum3, square_sum5,
+ sq, &ma3[0], &ma5[0], &b3[0], &b5[0]);
int x = 0;
do {
uint8x16_t ma3x[3], ma5x[3];
int16x8_t p[2];
- BoxFilterPreProcessLastRow(src0, x + 8, scales, sum3, sum5, square_sum3,
- square_sum5, s, sq + 1, ma3, ma5, &b3[1],
- &b5[1]);
+ // TODO(b/194217060): Future msan load.
+ s[1] = vld1q_u8(src0 + x + 16);
+
+ BoxFilterPreProcessLastRow(s, x + 8, scales, sum3, sum5, square_sum3,
+ square_sum5, sq + 1, ma3, ma5, &b3[1], &b5[1]);
Prepare3_8<0>(ma5, ma5x);
ma[1] = Sum565<0>(ma5x);
b[1] = Sum565W(b5);
@@ -2137,6 +2094,7 @@ LIBGAV1_ALWAYS_INLINE void BoxFilterProcess(
const ptrdiff_t bottom_border_stride, const int width, const int height,
SgrBuffer* const sgr_buffer, uint8_t* dst) {
const auto temp_stride = Align<ptrdiff_t>(width, 16);
+ const auto sum_width = Align<ptrdiff_t>(width + 8, 16);
const ptrdiff_t sum_stride = temp_stride + 8;
const int sgr_proj_index = restoration_info.sgr_proj_info.index;
const uint16_t* const scales = kSgrScaleParameter[sgr_proj_index]; // < 2^12.
@@ -2173,8 +2131,8 @@ LIBGAV1_ALWAYS_INLINE void BoxFilterProcess(
b565[1] = b565[0] + temp_stride;
assert(scales[0] != 0);
assert(scales[1] != 0);
- BoxSum(top_border, top_border_stride, sum_stride, sum3[0], sum5[1],
- square_sum3[0], square_sum5[1]);
+ BoxSum(top_border, top_border_stride, width, sum_stride, sum_width, sum3[0],
+ sum5[1], square_sum3[0], square_sum5[1]);
sum5[0] = sum5[1];
square_sum5[0] = square_sum5[1];
const uint8_t* const s = (height > 1) ? src + stride : bottom_border;
@@ -2250,6 +2208,7 @@ inline void BoxFilterProcessPass1(const RestorationUnitInfo& restoration_info,
const int width, const int height,
SgrBuffer* const sgr_buffer, uint8_t* dst) {
const auto temp_stride = Align<ptrdiff_t>(width, 16);
+ const auto sum_width = Align<ptrdiff_t>(width + 8, 16);
const ptrdiff_t sum_stride = temp_stride + 8;
const int sgr_proj_index = restoration_info.sgr_proj_info.index;
const uint32_t scale = kSgrScaleParameter[sgr_proj_index][0]; // < 2^12.
@@ -2267,7 +2226,8 @@ inline void BoxFilterProcessPass1(const RestorationUnitInfo& restoration_info,
b565[0] = sgr_buffer->b565;
b565[1] = b565[0] + temp_stride;
assert(scale != 0);
- BoxSum<5>(top_border, top_border_stride, sum_stride, sum5[1], square_sum5[1]);
+ BoxSum<5>(top_border, top_border_stride, width, sum_stride, sum_width,
+ sum5[1], square_sum5[1]);
sum5[0] = sum5[1];
square_sum5[0] = square_sum5[1];
const uint8_t* const s = (height > 1) ? src + stride : bottom_border;
@@ -2325,6 +2285,7 @@ inline void BoxFilterProcessPass2(const RestorationUnitInfo& restoration_info,
SgrBuffer* const sgr_buffer, uint8_t* dst) {
assert(restoration_info.sgr_proj_info.multiplier[0] == 0);
const auto temp_stride = Align<ptrdiff_t>(width, 16);
+ const auto sum_width = Align<ptrdiff_t>(width + 8, 16);
const ptrdiff_t sum_stride = temp_stride + 8;
const int16_t w1 = restoration_info.sgr_proj_info.multiplier[1];
const int16_t w0 = (1 << kSgrProjPrecisionBits) - w1;
@@ -2347,7 +2308,8 @@ inline void BoxFilterProcessPass2(const RestorationUnitInfo& restoration_info,
b444[0] = sgr_buffer->b444;
b444[1] = b444[0] + temp_stride;
assert(scale != 0);
- BoxSum<3>(top_border, top_border_stride, sum_stride, sum3[0], square_sum3[0]);
+ BoxSum<3>(top_border, top_border_stride, width, sum_stride, sum_width,
+ sum3[0], square_sum3[0]);
BoxSumFilterPreProcess3<false>(src, width, scale, sum3, square_sum3, ma343[0],
nullptr, b343[0], nullptr);
Circulate3PointersBy1<uint16_t>(sum3);
@@ -2396,11 +2358,14 @@ inline void BoxFilterProcessPass2(const RestorationUnitInfo& restoration_info,
// the end of each row. It is safe to overwrite the output as it will not be
// part of the visible frame.
void SelfGuidedFilter_NEON(
- const RestorationUnitInfo& restoration_info, const void* const source,
- const ptrdiff_t stride, const void* const top_border,
- const ptrdiff_t top_border_stride, const void* const bottom_border,
+ const RestorationUnitInfo& LIBGAV1_RESTRICT restoration_info,
+ const void* LIBGAV1_RESTRICT const source, const ptrdiff_t stride,
+ const void* LIBGAV1_RESTRICT const top_border,
+ const ptrdiff_t top_border_stride,
+ const void* LIBGAV1_RESTRICT const bottom_border,
const ptrdiff_t bottom_border_stride, const int width, const int height,
- RestorationBuffer* const restoration_buffer, void* const dest) {
+ RestorationBuffer* LIBGAV1_RESTRICT const restoration_buffer,
+ void* LIBGAV1_RESTRICT const dest) {
const int index = restoration_info.sgr_proj_info.index;
const int radius_pass_0 = kSgrProjParams[index][0]; // 2 or 0
const int radius_pass_1 = kSgrProjParams[index][2]; // 1 or 0
@@ -2409,6 +2374,12 @@ void SelfGuidedFilter_NEON(
const auto* bottom = static_cast<const uint8_t*>(bottom_border);
auto* const dst = static_cast<uint8_t*>(dest);
SgrBuffer* const sgr_buffer = &restoration_buffer->sgr_buffer;
+
+#if LIBGAV1_MSAN
+ // Initialize to prevent msan warnings when intermediate overreads occur.
+ memset(sgr_buffer, 0, sizeof(SgrBuffer));
+#endif
+
if (radius_pass_1 == 0) {
// |radius_pass_0| and |radius_pass_1| cannot both be 0, so we have the
// following assertion.
diff --git a/src/dsp/arm/loop_restoration_neon.h b/src/dsp/arm/loop_restoration_neon.h
index b551610..b9a4803 100644
--- a/src/dsp/arm/loop_restoration_neon.h
+++ b/src/dsp/arm/loop_restoration_neon.h
@@ -26,6 +26,7 @@ namespace dsp {
// Initializes Dsp::loop_restorations, see the defines below for specifics.
// This function is not thread-safe.
void LoopRestorationInit_NEON();
+void LoopRestorationInit10bpp_NEON();
} // namespace dsp
} // namespace libgav1
@@ -35,6 +36,9 @@ void LoopRestorationInit_NEON();
#define LIBGAV1_Dsp8bpp_WienerFilter LIBGAV1_CPU_NEON
#define LIBGAV1_Dsp8bpp_SelfGuidedFilter LIBGAV1_CPU_NEON
+#define LIBGAV1_Dsp10bpp_WienerFilter LIBGAV1_CPU_NEON
+#define LIBGAV1_Dsp10bpp_SelfGuidedFilter LIBGAV1_CPU_NEON
+
#endif // LIBGAV1_ENABLE_NEON
#endif // LIBGAV1_SRC_DSP_ARM_LOOP_RESTORATION_NEON_H_
diff --git a/src/dsp/arm/mask_blend_neon.cc b/src/dsp/arm/mask_blend_neon.cc
index ee50923..853f949 100644
--- a/src/dsp/arm/mask_blend_neon.cc
+++ b/src/dsp/arm/mask_blend_neon.cc
@@ -79,10 +79,11 @@ inline int16x8_t GetMask8(const uint8_t* mask, ptrdiff_t mask_stride) {
return vreinterpretq_s16_u16(vmovl_u8(mask_val));
}
-inline void WriteMaskBlendLine4x2(const int16_t* const pred_0,
- const int16_t* const pred_1,
+inline void WriteMaskBlendLine4x2(const int16_t* LIBGAV1_RESTRICT const pred_0,
+ const int16_t* LIBGAV1_RESTRICT const pred_1,
const int16x8_t pred_mask_0,
- const int16x8_t pred_mask_1, uint8_t* dst,
+ const int16x8_t pred_mask_1,
+ uint8_t* LIBGAV1_RESTRICT dst,
const ptrdiff_t dst_stride) {
const int16x8_t pred_val_0 = vld1q_s16(pred_0);
const int16x8_t pred_val_1 = vld1q_s16(pred_1);
@@ -109,9 +110,11 @@ inline void WriteMaskBlendLine4x2(const int16_t* const pred_0,
}
template <int subsampling_x, int subsampling_y>
-inline void MaskBlending4x4_NEON(const int16_t* pred_0, const int16_t* pred_1,
- const uint8_t* mask,
- const ptrdiff_t mask_stride, uint8_t* dst,
+inline void MaskBlending4x4_NEON(const int16_t* LIBGAV1_RESTRICT pred_0,
+ const int16_t* LIBGAV1_RESTRICT pred_1,
+ const uint8_t* LIBGAV1_RESTRICT mask,
+ const ptrdiff_t mask_stride,
+ uint8_t* LIBGAV1_RESTRICT dst,
const ptrdiff_t dst_stride) {
const int16x8_t mask_inverter = vdupq_n_s16(64);
int16x8_t pred_mask_0 =
@@ -133,10 +136,12 @@ inline void MaskBlending4x4_NEON(const int16_t* pred_0, const int16_t* pred_1,
}
template <int subsampling_x, int subsampling_y>
-inline void MaskBlending4xH_NEON(const int16_t* pred_0, const int16_t* pred_1,
- const uint8_t* const mask_ptr,
+inline void MaskBlending4xH_NEON(const int16_t* LIBGAV1_RESTRICT pred_0,
+ const int16_t* LIBGAV1_RESTRICT pred_1,
+ const uint8_t* LIBGAV1_RESTRICT const mask_ptr,
const ptrdiff_t mask_stride, const int height,
- uint8_t* dst, const ptrdiff_t dst_stride) {
+ uint8_t* LIBGAV1_RESTRICT dst,
+ const ptrdiff_t dst_stride) {
const uint8_t* mask = mask_ptr;
if (height == 4) {
MaskBlending4x4_NEON<subsampling_x, subsampling_y>(
@@ -188,11 +193,12 @@ inline void MaskBlending4xH_NEON(const int16_t* pred_0, const int16_t* pred_1,
}
template <int subsampling_x, int subsampling_y>
-inline void MaskBlend_NEON(const void* prediction_0, const void* prediction_1,
+inline void MaskBlend_NEON(const void* LIBGAV1_RESTRICT prediction_0,
+ const void* LIBGAV1_RESTRICT prediction_1,
const ptrdiff_t /*prediction_stride_1*/,
- const uint8_t* const mask_ptr,
+ const uint8_t* LIBGAV1_RESTRICT const mask_ptr,
const ptrdiff_t mask_stride, const int width,
- const int height, void* dest,
+ const int height, void* LIBGAV1_RESTRICT dest,
const ptrdiff_t dst_stride) {
auto* dst = static_cast<uint8_t*>(dest);
const auto* pred_0 = static_cast<const int16_t*>(prediction_0);
@@ -302,11 +308,10 @@ inline uint8x8_t GetInterIntraMask8(const uint8_t* mask,
return vld1_u8(mask);
}
-inline void InterIntraWriteMaskBlendLine8bpp4x2(const uint8_t* const pred_0,
- uint8_t* const pred_1,
- const ptrdiff_t pred_stride_1,
- const uint8x8_t pred_mask_0,
- const uint8x8_t pred_mask_1) {
+inline void InterIntraWriteMaskBlendLine8bpp4x2(
+ const uint8_t* LIBGAV1_RESTRICT const pred_0,
+ uint8_t* LIBGAV1_RESTRICT const pred_1, const ptrdiff_t pred_stride_1,
+ const uint8x8_t pred_mask_0, const uint8x8_t pred_mask_1) {
const uint8x8_t pred_val_0 = vld1_u8(pred_0);
uint8x8_t pred_val_1 = Load4(pred_1);
pred_val_1 = Load4<1>(pred_1 + pred_stride_1, pred_val_1);
@@ -320,11 +325,10 @@ inline void InterIntraWriteMaskBlendLine8bpp4x2(const uint8_t* const pred_0,
}
template <int subsampling_x, int subsampling_y>
-inline void InterIntraMaskBlending8bpp4x4_NEON(const uint8_t* pred_0,
- uint8_t* pred_1,
- const ptrdiff_t pred_stride_1,
- const uint8_t* mask,
- const ptrdiff_t mask_stride) {
+inline void InterIntraMaskBlending8bpp4x4_NEON(
+ const uint8_t* LIBGAV1_RESTRICT pred_0, uint8_t* LIBGAV1_RESTRICT pred_1,
+ const ptrdiff_t pred_stride_1, const uint8_t* LIBGAV1_RESTRICT mask,
+ const ptrdiff_t mask_stride) {
const uint8x8_t mask_inverter = vdup_n_u8(64);
uint8x8_t pred_mask_1 =
GetInterIntraMask4x2<subsampling_x, subsampling_y>(mask, mask_stride);
@@ -344,8 +348,9 @@ inline void InterIntraMaskBlending8bpp4x4_NEON(const uint8_t* pred_0,
template <int subsampling_x, int subsampling_y>
inline void InterIntraMaskBlending8bpp4xH_NEON(
- const uint8_t* pred_0, uint8_t* pred_1, const ptrdiff_t pred_stride_1,
- const uint8_t* mask, const ptrdiff_t mask_stride, const int height) {
+ const uint8_t* LIBGAV1_RESTRICT pred_0, uint8_t* LIBGAV1_RESTRICT pred_1,
+ const ptrdiff_t pred_stride_1, const uint8_t* LIBGAV1_RESTRICT mask,
+ const ptrdiff_t mask_stride, const int height) {
if (height == 4) {
InterIntraMaskBlending8bpp4x4_NEON<subsampling_x, subsampling_y>(
pred_0, pred_1, pred_stride_1, mask, mask_stride);
@@ -369,12 +374,11 @@ inline void InterIntraMaskBlending8bpp4xH_NEON(
}
template <int subsampling_x, int subsampling_y>
-inline void InterIntraMaskBlend8bpp_NEON(const uint8_t* prediction_0,
- uint8_t* prediction_1,
- const ptrdiff_t prediction_stride_1,
- const uint8_t* const mask_ptr,
- const ptrdiff_t mask_stride,
- const int width, const int height) {
+inline void InterIntraMaskBlend8bpp_NEON(
+ const uint8_t* LIBGAV1_RESTRICT prediction_0,
+ uint8_t* LIBGAV1_RESTRICT prediction_1, const ptrdiff_t prediction_stride_1,
+ const uint8_t* LIBGAV1_RESTRICT const mask_ptr, const ptrdiff_t mask_stride,
+ const int width, const int height) {
if (width == 4) {
InterIntraMaskBlending8bpp4xH_NEON<subsampling_x, subsampling_y>(
prediction_0, prediction_1, prediction_stride_1, mask_ptr, mask_stride,
@@ -427,7 +431,293 @@ void Init8bpp() {
} // namespace
} // namespace low_bitdepth
-void MaskBlendInit_NEON() { low_bitdepth::Init8bpp(); }
+#if LIBGAV1_MAX_BITDEPTH >= 10
+namespace high_bitdepth {
+namespace {
+
+template <int subsampling_x, int subsampling_y>
+inline uint16x8_t GetMask4x2(const uint8_t* mask, ptrdiff_t mask_stride) {
+ if (subsampling_x == 1) {
+ const uint8x8_t mask_val0 = vld1_u8(mask);
+ const uint8x8_t mask_val1 = vld1_u8(mask + (mask_stride << subsampling_y));
+ uint16x8_t final_val = vpaddlq_u8(vcombine_u8(mask_val0, mask_val1));
+ if (subsampling_y == 1) {
+ const uint8x8_t next_mask_val0 = vld1_u8(mask + mask_stride);
+ const uint8x8_t next_mask_val1 = vld1_u8(mask + mask_stride * 3);
+ final_val = vaddq_u16(
+ final_val, vpaddlq_u8(vcombine_u8(next_mask_val0, next_mask_val1)));
+ }
+ return vrshrq_n_u16(final_val, subsampling_y + 1);
+ }
+ assert(subsampling_y == 0 && subsampling_x == 0);
+ const uint8x8_t mask_val0 = Load4(mask);
+ const uint8x8_t mask_val = Load4<1>(mask + mask_stride, mask_val0);
+ return vmovl_u8(mask_val);
+}
+
+template <int subsampling_x, int subsampling_y>
+inline uint16x8_t GetMask8(const uint8_t* mask, ptrdiff_t mask_stride) {
+ if (subsampling_x == 1) {
+ uint16x8_t mask_val = vpaddlq_u8(vld1q_u8(mask));
+ if (subsampling_y == 1) {
+ const uint16x8_t next_mask_val = vpaddlq_u8(vld1q_u8(mask + mask_stride));
+ mask_val = vaddq_u16(mask_val, next_mask_val);
+ }
+ return vrshrq_n_u16(mask_val, 1 + subsampling_y);
+ }
+ assert(subsampling_y == 0 && subsampling_x == 0);
+ const uint8x8_t mask_val = vld1_u8(mask);
+ return vmovl_u8(mask_val);
+}
+
+template <bool is_inter_intra>
+uint16x8_t SumWeightedPred(const uint16x8_t pred_mask_0,
+ const uint16x8_t pred_mask_1,
+ const uint16x8_t pred_val_0,
+ const uint16x8_t pred_val_1) {
+ if (is_inter_intra) {
+ // dst[x] = static_cast<Pixel>(RightShiftWithRounding(
+ // mask_value * pred_1[x] + (64 - mask_value) * pred_0[x], 6));
+ uint16x8_t sum = vmulq_u16(pred_mask_1, pred_val_0);
+ sum = vmlaq_u16(sum, pred_mask_0, pred_val_1);
+ return vrshrq_n_u16(sum, 6);
+ } else {
+ // int res = (mask_value * prediction_0[x] +
+ // (64 - mask_value) * prediction_1[x]) >> 6;
+ const uint32x4_t weighted_pred_0_lo =
+ vmull_u16(vget_low_u16(pred_mask_0), vget_low_u16(pred_val_0));
+ const uint32x4_t weighted_pred_0_hi = VMullHighU16(pred_mask_0, pred_val_0);
+ uint32x4x2_t sum;
+ sum.val[0] = vmlal_u16(weighted_pred_0_lo, vget_low_u16(pred_mask_1),
+ vget_low_u16(pred_val_1));
+ sum.val[1] = VMlalHighU16(weighted_pred_0_hi, pred_mask_1, pred_val_1);
+ return vcombine_u16(vshrn_n_u32(sum.val[0], 6), vshrn_n_u32(sum.val[1], 6));
+ }
+}
+
+template <bool is_inter_intra, int width, int bitdepth = 10>
+inline void StoreShiftedResult(uint8_t* dst, const uint16x8_t result,
+ const ptrdiff_t dst_stride = 0) {
+ if (is_inter_intra) {
+ if (width == 4) {
+ // Store 2 lines of width 4.
+ assert(dst_stride != 0);
+ vst1_u16(reinterpret_cast<uint16_t*>(dst), vget_low_u16(result));
+ vst1_u16(reinterpret_cast<uint16_t*>(dst + dst_stride),
+ vget_high_u16(result));
+ } else {
+ // Store 1 line of width 8.
+ vst1q_u16(reinterpret_cast<uint16_t*>(dst), result);
+ }
+ } else {
+ // res -= (bitdepth == 8) ? 0 : kCompoundOffset;
+ // dst[x] = static_cast<Pixel>(
+ // Clip3(RightShiftWithRounding(res, inter_post_round_bits), 0,
+ // (1 << kBitdepth8) - 1));
+ constexpr int inter_post_round_bits = (bitdepth == 12) ? 2 : 4;
+ const uint16x8_t compound_result =
+ vminq_u16(vrshrq_n_u16(vqsubq_u16(result, vdupq_n_u16(kCompoundOffset)),
+ inter_post_round_bits),
+ vdupq_n_u16((1 << bitdepth) - 1));
+ if (width == 4) {
+ // Store 2 lines of width 4.
+ assert(dst_stride != 0);
+ vst1_u16(reinterpret_cast<uint16_t*>(dst), vget_low_u16(compound_result));
+ vst1_u16(reinterpret_cast<uint16_t*>(dst + dst_stride),
+ vget_high_u16(compound_result));
+ } else {
+ // Store 1 line of width 8.
+ vst1q_u16(reinterpret_cast<uint16_t*>(dst), compound_result);
+ }
+ }
+}
+
+template <int subsampling_x, int subsampling_y, bool is_inter_intra>
+inline void MaskBlend4x2_NEON(const uint16_t* LIBGAV1_RESTRICT pred_0,
+ const uint16_t* LIBGAV1_RESTRICT pred_1,
+ const ptrdiff_t pred_stride_1,
+ const uint8_t* LIBGAV1_RESTRICT mask,
+ const uint16x8_t mask_inverter,
+ const ptrdiff_t mask_stride,
+ uint8_t* LIBGAV1_RESTRICT dst,
+ const ptrdiff_t dst_stride) {
+ // This works because stride == width == 4.
+ const uint16x8_t pred_val_0 = vld1q_u16(pred_0);
+ const uint16x8_t pred_val_1 =
+ is_inter_intra
+ ? vcombine_u16(vld1_u16(pred_1), vld1_u16(pred_1 + pred_stride_1))
+ : vld1q_u16(pred_1);
+ const uint16x8_t pred_mask_0 =
+ GetMask4x2<subsampling_x, subsampling_y>(mask, mask_stride);
+ const uint16x8_t pred_mask_1 = vsubq_u16(mask_inverter, pred_mask_0);
+ const uint16x8_t weighted_pred_sum = SumWeightedPred<is_inter_intra>(
+ pred_mask_0, pred_mask_1, pred_val_0, pred_val_1);
+
+ StoreShiftedResult<is_inter_intra, 4>(dst, weighted_pred_sum, dst_stride);
+}
+
+template <int subsampling_x, int subsampling_y, bool is_inter_intra>
+inline void MaskBlending4x4_NEON(const uint16_t* LIBGAV1_RESTRICT pred_0,
+ const uint16_t* LIBGAV1_RESTRICT pred_1,
+ const ptrdiff_t pred_stride_1,
+ const uint8_t* LIBGAV1_RESTRICT mask,
+ const ptrdiff_t mask_stride,
+ uint8_t* LIBGAV1_RESTRICT dst,
+ const ptrdiff_t dst_stride) {
+ // Double stride because the function works on 2 lines at a time.
+ const ptrdiff_t mask_stride_y = mask_stride << (subsampling_y + 1);
+ const ptrdiff_t dst_stride_y = dst_stride << 1;
+ const uint16x8_t mask_inverter = vdupq_n_u16(64);
+
+ MaskBlend4x2_NEON<subsampling_x, subsampling_y, is_inter_intra>(
+ pred_0, pred_1, pred_stride_1, mask, mask_inverter, mask_stride, dst,
+ dst_stride);
+
+ pred_0 += 4 << 1;
+ pred_1 += pred_stride_1 << 1;
+ mask += mask_stride_y;
+ dst += dst_stride_y;
+
+ MaskBlend4x2_NEON<subsampling_x, subsampling_y, is_inter_intra>(
+ pred_0, pred_1, pred_stride_1, mask, mask_inverter, mask_stride, dst,
+ dst_stride);
+}
+
+template <int subsampling_x, int subsampling_y, bool is_inter_intra>
+inline void MaskBlending4xH_NEON(const uint16_t* LIBGAV1_RESTRICT pred_0,
+ const uint16_t* LIBGAV1_RESTRICT pred_1,
+ const ptrdiff_t pred_stride_1,
+ const uint8_t* LIBGAV1_RESTRICT const mask_ptr,
+ const ptrdiff_t mask_stride, const int height,
+ uint8_t* LIBGAV1_RESTRICT dst,
+ const ptrdiff_t dst_stride) {
+ const uint8_t* mask = mask_ptr;
+ if (height == 4) {
+ MaskBlending4x4_NEON<subsampling_x, subsampling_y, is_inter_intra>(
+ pred_0, pred_1, pred_stride_1, mask, mask_stride, dst, dst_stride);
+ return;
+ }
+ // Double stride because the function works on 2 lines at a time.
+ const ptrdiff_t mask_stride_y = mask_stride << (subsampling_y + 1);
+ const ptrdiff_t dst_stride_y = dst_stride << 1;
+ const uint16x8_t mask_inverter = vdupq_n_u16(64);
+ int y = 0;
+ do {
+ MaskBlend4x2_NEON<subsampling_x, subsampling_y, is_inter_intra>(
+ pred_0, pred_1, pred_stride_1, mask, mask_inverter, mask_stride, dst,
+ dst_stride);
+ pred_0 += 4 << 1;
+ pred_1 += pred_stride_1 << 1;
+ mask += mask_stride_y;
+ dst += dst_stride_y;
+
+ MaskBlend4x2_NEON<subsampling_x, subsampling_y, is_inter_intra>(
+ pred_0, pred_1, pred_stride_1, mask, mask_inverter, mask_stride, dst,
+ dst_stride);
+ pred_0 += 4 << 1;
+ pred_1 += pred_stride_1 << 1;
+ mask += mask_stride_y;
+ dst += dst_stride_y;
+
+ MaskBlend4x2_NEON<subsampling_x, subsampling_y, is_inter_intra>(
+ pred_0, pred_1, pred_stride_1, mask, mask_inverter, mask_stride, dst,
+ dst_stride);
+ pred_0 += 4 << 1;
+ pred_1 += pred_stride_1 << 1;
+ mask += mask_stride_y;
+ dst += dst_stride_y;
+
+ MaskBlend4x2_NEON<subsampling_x, subsampling_y, is_inter_intra>(
+ pred_0, pred_1, pred_stride_1, mask, mask_inverter, mask_stride, dst,
+ dst_stride);
+ pred_0 += 4 << 1;
+ pred_1 += pred_stride_1 << 1;
+ mask += mask_stride_y;
+ dst += dst_stride_y;
+ y += 8;
+ } while (y < height);
+}
+
+template <int subsampling_x, int subsampling_y, bool is_inter_intra>
+void MaskBlend8_NEON(const uint16_t* LIBGAV1_RESTRICT pred_0,
+ const uint16_t* LIBGAV1_RESTRICT pred_1,
+ const uint8_t* LIBGAV1_RESTRICT mask,
+ const uint16x8_t mask_inverter,
+ const ptrdiff_t mask_stride,
+ uint8_t* LIBGAV1_RESTRICT dst) {
+ const uint16x8_t pred_val_0 = vld1q_u16(pred_0);
+ const uint16x8_t pred_val_1 = vld1q_u16(pred_1);
+ const uint16x8_t pred_mask_0 =
+ GetMask8<subsampling_x, subsampling_y>(mask, mask_stride);
+ const uint16x8_t pred_mask_1 = vsubq_u16(mask_inverter, pred_mask_0);
+ const uint16x8_t weighted_pred_sum = SumWeightedPred<is_inter_intra>(
+ pred_mask_0, pred_mask_1, pred_val_0, pred_val_1);
+
+ StoreShiftedResult<is_inter_intra, 8>(dst, weighted_pred_sum);
+}
+
+template <int subsampling_x, int subsampling_y, bool is_inter_intra>
+inline void MaskBlend_NEON(const void* LIBGAV1_RESTRICT prediction_0,
+ const void* LIBGAV1_RESTRICT prediction_1,
+ const ptrdiff_t prediction_stride_1,
+ const uint8_t* LIBGAV1_RESTRICT const mask_ptr,
+ const ptrdiff_t mask_stride, const int width,
+ const int height, void* LIBGAV1_RESTRICT dest,
+ const ptrdiff_t dst_stride) {
+ if (!is_inter_intra) {
+ assert(prediction_stride_1 == width);
+ }
+ auto* dst = static_cast<uint8_t*>(dest);
+ const auto* pred_0 = static_cast<const uint16_t*>(prediction_0);
+ const auto* pred_1 = static_cast<const uint16_t*>(prediction_1);
+ if (width == 4) {
+ MaskBlending4xH_NEON<subsampling_x, subsampling_y, is_inter_intra>(
+ pred_0, pred_1, prediction_stride_1, mask_ptr, mask_stride, height, dst,
+ dst_stride);
+ return;
+ }
+ const ptrdiff_t mask_stride_y = mask_stride << subsampling_y;
+ const uint8_t* mask = mask_ptr;
+ const uint16x8_t mask_inverter = vdupq_n_u16(64);
+ int y = 0;
+ do {
+ int x = 0;
+ do {
+ MaskBlend8_NEON<subsampling_x, subsampling_y, is_inter_intra>(
+ pred_0 + x, pred_1 + x, mask + (x << subsampling_x), mask_inverter,
+ mask_stride,
+ reinterpret_cast<uint8_t*>(reinterpret_cast<uint16_t*>(dst) + x));
+ x += 8;
+ } while (x < width);
+ dst += dst_stride;
+ pred_0 += width;
+ pred_1 += prediction_stride_1;
+ mask += mask_stride_y;
+ } while (++y < height);
+}
+
+void Init10bpp() {
+ Dsp* const dsp = dsp_internal::GetWritableDspTable(kBitdepth10);
+ assert(dsp != nullptr);
+ dsp->mask_blend[0][0] = MaskBlend_NEON<0, 0, false>;
+ dsp->mask_blend[1][0] = MaskBlend_NEON<1, 0, false>;
+ dsp->mask_blend[2][0] = MaskBlend_NEON<1, 1, false>;
+
+ dsp->mask_blend[0][1] = MaskBlend_NEON<0, 0, true>;
+ dsp->mask_blend[1][1] = MaskBlend_NEON<1, 0, true>;
+ dsp->mask_blend[2][1] = MaskBlend_NEON<1, 1, true>;
+}
+
+} // namespace
+} // namespace high_bitdepth
+#endif // LIBGAV1_MAX_BITDEPTH >= 10
+
+void MaskBlendInit_NEON() {
+ low_bitdepth::Init8bpp();
+#if LIBGAV1_MAX_BITDEPTH >= 10
+ high_bitdepth::Init10bpp();
+#endif
+}
} // namespace dsp
} // namespace libgav1
diff --git a/src/dsp/arm/mask_blend_neon.h b/src/dsp/arm/mask_blend_neon.h
index 3829274..c24f2f8 100644
--- a/src/dsp/arm/mask_blend_neon.h
+++ b/src/dsp/arm/mask_blend_neon.h
@@ -36,6 +36,13 @@ void MaskBlendInit_NEON();
#define LIBGAV1_Dsp8bpp_InterIntraMaskBlend8bpp444 LIBGAV1_CPU_NEON
#define LIBGAV1_Dsp8bpp_InterIntraMaskBlend8bpp422 LIBGAV1_CPU_NEON
#define LIBGAV1_Dsp8bpp_InterIntraMaskBlend8bpp420 LIBGAV1_CPU_NEON
+
+#define LIBGAV1_Dsp10bpp_MaskBlend444 LIBGAV1_CPU_NEON
+#define LIBGAV1_Dsp10bpp_MaskBlend422 LIBGAV1_CPU_NEON
+#define LIBGAV1_Dsp10bpp_MaskBlend420 LIBGAV1_CPU_NEON
+#define LIBGAV1_Dsp10bpp_MaskBlendInterIntra444 LIBGAV1_CPU_NEON
+#define LIBGAV1_Dsp10bpp_MaskBlendInterIntra422 LIBGAV1_CPU_NEON
+#define LIBGAV1_Dsp10bpp_MaskBlendInterIntra420 LIBGAV1_CPU_NEON
#endif // LIBGAV1_ENABLE_NEON
#endif // LIBGAV1_SRC_DSP_ARM_MASK_BLEND_NEON_H_
diff --git a/src/dsp/arm/motion_field_projection_neon.cc b/src/dsp/arm/motion_field_projection_neon.cc
index 3e731b2..144adf7 100644
--- a/src/dsp/arm/motion_field_projection_neon.cc
+++ b/src/dsp/arm/motion_field_projection_neon.cc
@@ -356,27 +356,12 @@ void MotionFieldProjectionKernel_NEON(const ReferenceInfo& reference_info,
} while (++y8 < y8_end);
}
-void Init8bpp() {
- Dsp* const dsp = dsp_internal::GetWritableDspTable(kBitdepth8);
- assert(dsp != nullptr);
- dsp->motion_field_projection_kernel = MotionFieldProjectionKernel_NEON;
-}
-
-#if LIBGAV1_MAX_BITDEPTH >= 10
-void Init10bpp() {
- Dsp* const dsp = dsp_internal::GetWritableDspTable(kBitdepth10);
- assert(dsp != nullptr);
- dsp->motion_field_projection_kernel = MotionFieldProjectionKernel_NEON;
-}
-#endif
-
} // namespace
void MotionFieldProjectionInit_NEON() {
- Init8bpp();
-#if LIBGAV1_MAX_BITDEPTH >= 10
- Init10bpp();
-#endif
+ Dsp* const dsp = dsp_internal::GetWritableDspTable(kBitdepth8);
+ assert(dsp != nullptr);
+ dsp->motion_field_projection_kernel = MotionFieldProjectionKernel_NEON;
}
} // namespace dsp
diff --git a/src/dsp/arm/motion_vector_search_neon.cc b/src/dsp/arm/motion_vector_search_neon.cc
index da3ba17..4720879 100644
--- a/src/dsp/arm/motion_vector_search_neon.cc
+++ b/src/dsp/arm/motion_vector_search_neon.cc
@@ -61,8 +61,8 @@ inline int16x8_t ProjectionClip(const int16x4_t mv0, const int16x4_t mv1) {
}
inline int16x8_t MvProjectionCompoundClip(
- const MotionVector* const temporal_mvs,
- const int8_t* const temporal_reference_offsets,
+ const MotionVector* LIBGAV1_RESTRICT const temporal_mvs,
+ const int8_t* LIBGAV1_RESTRICT const temporal_reference_offsets,
const int reference_offsets[2]) {
const auto* const tmvs = reinterpret_cast<const int32_t*>(temporal_mvs);
const int32x2_t temporal_mv = vld1_s32(tmvs);
@@ -76,9 +76,9 @@ inline int16x8_t MvProjectionCompoundClip(
}
inline int16x8_t MvProjectionSingleClip(
- const MotionVector* const temporal_mvs,
- const int8_t* const temporal_reference_offsets, const int reference_offset,
- int16x4_t* const lookup) {
+ const MotionVector* LIBGAV1_RESTRICT const temporal_mvs,
+ const int8_t* LIBGAV1_RESTRICT const temporal_reference_offsets,
+ const int reference_offset, int16x4_t* const lookup) {
const auto* const tmvs = reinterpret_cast<const int16_t*>(temporal_mvs);
const int16x8_t temporal_mv = vld1q_s16(tmvs);
*lookup = vld1_lane_s16(
@@ -116,9 +116,10 @@ inline void ForceInteger(const int16x8_t mv, void* const candidate_mvs) {
}
void MvProjectionCompoundLowPrecision_NEON(
- const MotionVector* temporal_mvs, const int8_t* temporal_reference_offsets,
+ const MotionVector* LIBGAV1_RESTRICT temporal_mvs,
+ const int8_t* LIBGAV1_RESTRICT temporal_reference_offsets,
const int reference_offsets[2], const int count,
- CompoundMotionVector* candidate_mvs) {
+ CompoundMotionVector* LIBGAV1_RESTRICT candidate_mvs) {
// |reference_offsets| non-zero check usually equals true and is ignored.
// To facilitate the compilers, make a local copy of |reference_offsets|.
const int offsets[2] = {reference_offsets[0], reference_offsets[1]};
@@ -131,13 +132,14 @@ void MvProjectionCompoundLowPrecision_NEON(
temporal_mvs += 2;
temporal_reference_offsets += 2;
candidate_mvs += 2;
- } while (--loop_count);
+ } while (--loop_count != 0);
}
void MvProjectionCompoundForceInteger_NEON(
- const MotionVector* temporal_mvs, const int8_t* temporal_reference_offsets,
+ const MotionVector* LIBGAV1_RESTRICT temporal_mvs,
+ const int8_t* LIBGAV1_RESTRICT temporal_reference_offsets,
const int reference_offsets[2], const int count,
- CompoundMotionVector* candidate_mvs) {
+ CompoundMotionVector* LIBGAV1_RESTRICT candidate_mvs) {
// |reference_offsets| non-zero check usually equals true and is ignored.
// To facilitate the compilers, make a local copy of |reference_offsets|.
const int offsets[2] = {reference_offsets[0], reference_offsets[1]};
@@ -150,13 +152,14 @@ void MvProjectionCompoundForceInteger_NEON(
temporal_mvs += 2;
temporal_reference_offsets += 2;
candidate_mvs += 2;
- } while (--loop_count);
+ } while (--loop_count != 0);
}
void MvProjectionCompoundHighPrecision_NEON(
- const MotionVector* temporal_mvs, const int8_t* temporal_reference_offsets,
+ const MotionVector* LIBGAV1_RESTRICT temporal_mvs,
+ const int8_t* LIBGAV1_RESTRICT temporal_reference_offsets,
const int reference_offsets[2], const int count,
- CompoundMotionVector* candidate_mvs) {
+ CompoundMotionVector* LIBGAV1_RESTRICT candidate_mvs) {
// |reference_offsets| non-zero check usually equals true and is ignored.
// To facilitate the compilers, make a local copy of |reference_offsets|.
const int offsets[2] = {reference_offsets[0], reference_offsets[1]};
@@ -169,12 +172,14 @@ void MvProjectionCompoundHighPrecision_NEON(
temporal_mvs += 2;
temporal_reference_offsets += 2;
candidate_mvs += 2;
- } while (--loop_count);
+ } while (--loop_count != 0);
}
void MvProjectionSingleLowPrecision_NEON(
- const MotionVector* temporal_mvs, const int8_t* temporal_reference_offsets,
- const int reference_offset, const int count, MotionVector* candidate_mvs) {
+ const MotionVector* LIBGAV1_RESTRICT temporal_mvs,
+ const int8_t* LIBGAV1_RESTRICT temporal_reference_offsets,
+ const int reference_offset, const int count,
+ MotionVector* LIBGAV1_RESTRICT candidate_mvs) {
// Up to three more elements could be calculated.
int loop_count = (count + 3) >> 2;
int16x4_t lookup = vdup_n_s16(0);
@@ -185,12 +190,14 @@ void MvProjectionSingleLowPrecision_NEON(
temporal_mvs += 4;
temporal_reference_offsets += 4;
candidate_mvs += 4;
- } while (--loop_count);
+ } while (--loop_count != 0);
}
void MvProjectionSingleForceInteger_NEON(
- const MotionVector* temporal_mvs, const int8_t* temporal_reference_offsets,
- const int reference_offset, const int count, MotionVector* candidate_mvs) {
+ const MotionVector* LIBGAV1_RESTRICT temporal_mvs,
+ const int8_t* LIBGAV1_RESTRICT temporal_reference_offsets,
+ const int reference_offset, const int count,
+ MotionVector* LIBGAV1_RESTRICT candidate_mvs) {
// Up to three more elements could be calculated.
int loop_count = (count + 3) >> 2;
int16x4_t lookup = vdup_n_s16(0);
@@ -201,12 +208,14 @@ void MvProjectionSingleForceInteger_NEON(
temporal_mvs += 4;
temporal_reference_offsets += 4;
candidate_mvs += 4;
- } while (--loop_count);
+ } while (--loop_count != 0);
}
void MvProjectionSingleHighPrecision_NEON(
- const MotionVector* temporal_mvs, const int8_t* temporal_reference_offsets,
- const int reference_offset, const int count, MotionVector* candidate_mvs) {
+ const MotionVector* LIBGAV1_RESTRICT temporal_mvs,
+ const int8_t* LIBGAV1_RESTRICT temporal_reference_offsets,
+ const int reference_offset, const int count,
+ MotionVector* LIBGAV1_RESTRICT candidate_mvs) {
// Up to three more elements could be calculated.
int loop_count = (count + 3) >> 2;
int16x4_t lookup = vdup_n_s16(0);
@@ -217,23 +226,13 @@ void MvProjectionSingleHighPrecision_NEON(
temporal_mvs += 4;
temporal_reference_offsets += 4;
candidate_mvs += 4;
- } while (--loop_count);
+ } while (--loop_count != 0);
}
-void Init8bpp() {
- Dsp* const dsp = dsp_internal::GetWritableDspTable(kBitdepth8);
- assert(dsp != nullptr);
- dsp->mv_projection_compound[0] = MvProjectionCompoundLowPrecision_NEON;
- dsp->mv_projection_compound[1] = MvProjectionCompoundForceInteger_NEON;
- dsp->mv_projection_compound[2] = MvProjectionCompoundHighPrecision_NEON;
- dsp->mv_projection_single[0] = MvProjectionSingleLowPrecision_NEON;
- dsp->mv_projection_single[1] = MvProjectionSingleForceInteger_NEON;
- dsp->mv_projection_single[2] = MvProjectionSingleHighPrecision_NEON;
-}
+} // namespace
-#if LIBGAV1_MAX_BITDEPTH >= 10
-void Init10bpp() {
- Dsp* const dsp = dsp_internal::GetWritableDspTable(kBitdepth10);
+void MotionVectorSearchInit_NEON() {
+ Dsp* const dsp = dsp_internal::GetWritableDspTable(kBitdepth8);
assert(dsp != nullptr);
dsp->mv_projection_compound[0] = MvProjectionCompoundLowPrecision_NEON;
dsp->mv_projection_compound[1] = MvProjectionCompoundForceInteger_NEON;
@@ -242,16 +241,6 @@ void Init10bpp() {
dsp->mv_projection_single[1] = MvProjectionSingleForceInteger_NEON;
dsp->mv_projection_single[2] = MvProjectionSingleHighPrecision_NEON;
}
-#endif
-
-} // namespace
-
-void MotionVectorSearchInit_NEON() {
- Init8bpp();
-#if LIBGAV1_MAX_BITDEPTH >= 10
- Init10bpp();
-#endif
-}
} // namespace dsp
} // namespace libgav1
diff --git a/src/dsp/arm/obmc_neon.cc b/src/dsp/arm/obmc_neon.cc
index 1111a90..659ed8e 100644
--- a/src/dsp/arm/obmc_neon.cc
+++ b/src/dsp/arm/obmc_neon.cc
@@ -33,10 +33,15 @@
namespace libgav1 {
namespace dsp {
namespace {
-
#include "src/dsp/obmc.inc"
-inline void WriteObmcLine4(uint8_t* const pred, const uint8_t* const obmc_pred,
+} // namespace
+
+namespace low_bitdepth {
+namespace {
+
+inline void WriteObmcLine4(uint8_t* LIBGAV1_RESTRICT const pred,
+ const uint8_t* LIBGAV1_RESTRICT const obmc_pred,
const uint8x8_t pred_mask,
const uint8x8_t obmc_pred_mask) {
const uint8x8_t pred_val = Load4(pred);
@@ -47,35 +52,17 @@ inline void WriteObmcLine4(uint8_t* const pred, const uint8_t* const obmc_pred,
StoreLo4(pred, result);
}
-template <bool from_left>
-inline void OverlapBlend2xH_NEON(uint8_t* const prediction,
- const ptrdiff_t prediction_stride,
- const int height,
- const uint8_t* const obmc_prediction,
- const ptrdiff_t obmc_prediction_stride) {
- uint8_t* pred = prediction;
+inline void OverlapBlendFromLeft2xH_NEON(
+ uint8_t* LIBGAV1_RESTRICT pred, const ptrdiff_t prediction_stride,
+ const int height, const uint8_t* LIBGAV1_RESTRICT obmc_pred,
+ const ptrdiff_t obmc_prediction_stride) {
const uint8x8_t mask_inverter = vdup_n_u8(64);
- const uint8_t* obmc_pred = obmc_prediction;
- uint8x8_t pred_mask;
- uint8x8_t obmc_pred_mask;
- int compute_height;
- const int mask_offset = height - 2;
- if (from_left) {
- pred_mask = Load2(kObmcMask);
- obmc_pred_mask = vsub_u8(mask_inverter, pred_mask);
- compute_height = height;
- } else {
- // Weights for the last line are all 64, which is a no-op.
- compute_height = height - 1;
- }
+ const uint8x8_t pred_mask = Load2(kObmcMask);
+ const uint8x8_t obmc_pred_mask = vsub_u8(mask_inverter, pred_mask);
uint8x8_t pred_val = vdup_n_u8(0);
uint8x8_t obmc_pred_val = vdup_n_u8(0);
int y = 0;
do {
- if (!from_left) {
- pred_mask = vdup_n_u8(kObmcMask[mask_offset + y]);
- obmc_pred_mask = vsub_u8(mask_inverter, pred_mask);
- }
pred_val = Load2<0>(pred, pred_val);
const uint16x8_t weighted_pred = vmull_u8(pred_mask, pred_val);
obmc_pred_val = Load2<0>(obmc_pred, obmc_pred_val);
@@ -85,16 +72,13 @@ inline void OverlapBlend2xH_NEON(uint8_t* const prediction,
pred += prediction_stride;
obmc_pred += obmc_prediction_stride;
- } while (++y != compute_height);
+ } while (++y != height);
}
inline void OverlapBlendFromLeft4xH_NEON(
- uint8_t* const prediction, const ptrdiff_t prediction_stride,
- const int height, const uint8_t* const obmc_prediction,
+ uint8_t* LIBGAV1_RESTRICT pred, const ptrdiff_t prediction_stride,
+ const int height, const uint8_t* LIBGAV1_RESTRICT obmc_pred,
const ptrdiff_t obmc_prediction_stride) {
- uint8_t* pred = prediction;
- const uint8_t* obmc_pred = obmc_prediction;
-
const uint8x8_t mask_inverter = vdup_n_u8(64);
const uint8x8_t pred_mask = Load4(kObmcMask + 2);
// 64 - mask
@@ -114,11 +98,9 @@ inline void OverlapBlendFromLeft4xH_NEON(
}
inline void OverlapBlendFromLeft8xH_NEON(
- uint8_t* const prediction, const ptrdiff_t prediction_stride,
- const int height, const uint8_t* const obmc_prediction,
+ uint8_t* LIBGAV1_RESTRICT pred, const ptrdiff_t prediction_stride,
+ const int height, const uint8_t* LIBGAV1_RESTRICT obmc_pred,
const ptrdiff_t obmc_prediction_stride) {
- uint8_t* pred = prediction;
- const uint8_t* obmc_pred = obmc_prediction;
const uint8x8_t mask_inverter = vdup_n_u8(64);
const uint8x8_t pred_mask = vld1_u8(kObmcMask + 6);
// 64 - mask
@@ -137,17 +119,19 @@ inline void OverlapBlendFromLeft8xH_NEON(
} while (++y != height);
}
-void OverlapBlendFromLeft_NEON(void* const prediction,
- const ptrdiff_t prediction_stride,
- const int width, const int height,
- const void* const obmc_prediction,
- const ptrdiff_t obmc_prediction_stride) {
+void OverlapBlendFromLeft_NEON(
+ void* LIBGAV1_RESTRICT const prediction, const ptrdiff_t prediction_stride,
+ const int width, const int height,
+ const void* LIBGAV1_RESTRICT const obmc_prediction,
+ const ptrdiff_t obmc_prediction_stride) {
auto* pred = static_cast<uint8_t*>(prediction);
const auto* obmc_pred = static_cast<const uint8_t*>(obmc_prediction);
+ assert(width >= 2);
+ assert(height >= 4);
if (width == 2) {
- OverlapBlend2xH_NEON<true>(pred, prediction_stride, height, obmc_pred,
- obmc_prediction_stride);
+ OverlapBlendFromLeft2xH_NEON(pred, prediction_stride, height, obmc_pred,
+ obmc_prediction_stride);
return;
}
if (width == 4) {
@@ -194,13 +178,10 @@ void OverlapBlendFromLeft_NEON(void* const prediction,
} while (x < width);
}
-inline void OverlapBlendFromTop4x4_NEON(uint8_t* const prediction,
- const ptrdiff_t prediction_stride,
- const uint8_t* const obmc_prediction,
- const ptrdiff_t obmc_prediction_stride,
- const int height) {
- uint8_t* pred = prediction;
- const uint8_t* obmc_pred = obmc_prediction;
+inline void OverlapBlendFromTop4x4_NEON(
+ uint8_t* LIBGAV1_RESTRICT pred, const ptrdiff_t prediction_stride,
+ const uint8_t* LIBGAV1_RESTRICT obmc_pred,
+ const ptrdiff_t obmc_prediction_stride, const int height) {
uint8x8_t pred_mask = vdup_n_u8(kObmcMask[height - 2]);
const uint8x8_t mask_inverter = vdup_n_u8(64);
uint8x8_t obmc_pred_mask = vsub_u8(mask_inverter, pred_mask);
@@ -224,16 +205,14 @@ inline void OverlapBlendFromTop4x4_NEON(uint8_t* const prediction,
}
inline void OverlapBlendFromTop4xH_NEON(
- uint8_t* const prediction, const ptrdiff_t prediction_stride,
- const int height, const uint8_t* const obmc_prediction,
+ uint8_t* LIBGAV1_RESTRICT pred, const ptrdiff_t prediction_stride,
+ const int height, const uint8_t* LIBGAV1_RESTRICT obmc_pred,
const ptrdiff_t obmc_prediction_stride) {
if (height < 8) {
- OverlapBlendFromTop4x4_NEON(prediction, prediction_stride, obmc_prediction,
+ OverlapBlendFromTop4x4_NEON(pred, prediction_stride, obmc_pred,
obmc_prediction_stride, height);
return;
}
- uint8_t* pred = prediction;
- const uint8_t* obmc_pred = obmc_prediction;
const uint8_t* mask = kObmcMask + height - 2;
const uint8x8_t mask_inverter = vdup_n_u8(64);
int y = 0;
@@ -282,11 +261,9 @@ inline void OverlapBlendFromTop4xH_NEON(
}
inline void OverlapBlendFromTop8xH_NEON(
- uint8_t* const prediction, const ptrdiff_t prediction_stride,
- const int height, const uint8_t* const obmc_prediction,
+ uint8_t* LIBGAV1_RESTRICT pred, const ptrdiff_t prediction_stride,
+ const int height, const uint8_t* LIBGAV1_RESTRICT obmc_pred,
const ptrdiff_t obmc_prediction_stride) {
- uint8_t* pred = prediction;
- const uint8_t* obmc_pred = obmc_prediction;
const uint8x8_t mask_inverter = vdup_n_u8(64);
const uint8_t* mask = kObmcMask + height - 2;
const int compute_height = height - (height >> 2);
@@ -307,19 +284,16 @@ inline void OverlapBlendFromTop8xH_NEON(
} while (++y != compute_height);
}
-void OverlapBlendFromTop_NEON(void* const prediction,
- const ptrdiff_t prediction_stride,
- const int width, const int height,
- const void* const obmc_prediction,
- const ptrdiff_t obmc_prediction_stride) {
+void OverlapBlendFromTop_NEON(
+ void* LIBGAV1_RESTRICT const prediction, const ptrdiff_t prediction_stride,
+ const int width, const int height,
+ const void* LIBGAV1_RESTRICT const obmc_prediction,
+ const ptrdiff_t obmc_prediction_stride) {
auto* pred = static_cast<uint8_t*>(prediction);
const auto* obmc_pred = static_cast<const uint8_t*>(obmc_prediction);
+ assert(width >= 4);
+ assert(height >= 2);
- if (width == 2) {
- OverlapBlend2xH_NEON<false>(pred, prediction_stride, height, obmc_pred,
- obmc_prediction_stride);
- return;
- }
if (width == 4) {
OverlapBlendFromTop4xH_NEON(pred, prediction_stride, height, obmc_pred,
obmc_prediction_stride);
@@ -374,8 +348,582 @@ void Init8bpp() {
}
} // namespace
+} // namespace low_bitdepth
-void ObmcInit_NEON() { Init8bpp(); }
+#if LIBGAV1_MAX_BITDEPTH >= 10
+namespace high_bitdepth {
+namespace {
+
+// This is a flat array of masks for each block dimension from 2 to 32. The
+// starting index for each length is length-2. The value 64 leaves the result
+// equal to |pred| and may be ignored if convenient. Vector loads may overrread
+// values meant for larger sizes, but these values will be unused.
+constexpr uint16_t kObmcMask[62] = {
+ // Obmc Mask 2
+ 45, 64,
+ // Obmc Mask 4
+ 39, 50, 59, 64,
+ // Obmc Mask 8
+ 36, 42, 48, 53, 57, 61, 64, 64,
+ // Obmc Mask 16
+ 34, 37, 40, 43, 46, 49, 52, 54, 56, 58, 60, 61, 64, 64, 64, 64,
+ // Obmc Mask 32
+ 33, 35, 36, 38, 40, 41, 43, 44, 45, 47, 48, 50, 51, 52, 53, 55, 56, 57, 58,
+ 59, 60, 60, 61, 62, 64, 64, 64, 64, 64, 64, 64, 64};
+
+inline uint16x4_t BlendObmc2Or4(uint8_t* LIBGAV1_RESTRICT const pred,
+ const uint8_t* LIBGAV1_RESTRICT const obmc_pred,
+ const uint16x4_t pred_mask,
+ const uint16x4_t obmc_pred_mask) {
+ const uint16x4_t pred_val = vld1_u16(reinterpret_cast<uint16_t*>(pred));
+ const uint16x4_t obmc_pred_val =
+ vld1_u16(reinterpret_cast<const uint16_t*>(obmc_pred));
+ const uint16x4_t weighted_pred = vmul_u16(pred_mask, pred_val);
+ const uint16x4_t result =
+ vrshr_n_u16(vmla_u16(weighted_pred, obmc_pred_mask, obmc_pred_val), 6);
+ return result;
+}
+
+inline uint16x8_t BlendObmc8(uint8_t* LIBGAV1_RESTRICT const pred,
+ const uint8_t* LIBGAV1_RESTRICT const obmc_pred,
+ const uint16x8_t pred_mask,
+ const uint16x8_t obmc_pred_mask) {
+ const uint16x8_t pred_val = vld1q_u16(reinterpret_cast<uint16_t*>(pred));
+ const uint16x8_t obmc_pred_val =
+ vld1q_u16(reinterpret_cast<const uint16_t*>(obmc_pred));
+ const uint16x8_t weighted_pred = vmulq_u16(pred_mask, pred_val);
+ const uint16x8_t result =
+ vrshrq_n_u16(vmlaq_u16(weighted_pred, obmc_pred_mask, obmc_pred_val), 6);
+ return result;
+}
+
+inline void OverlapBlendFromLeft2xH_NEON(
+ uint8_t* LIBGAV1_RESTRICT pred, const ptrdiff_t prediction_stride,
+ const int height, const uint8_t* LIBGAV1_RESTRICT obmc_pred,
+ const ptrdiff_t obmc_prediction_stride) {
+ const uint16x4_t mask_inverter = vdup_n_u16(64);
+ // Second two lanes unused.
+ const uint16x4_t pred_mask = vld1_u16(kObmcMask);
+ const uint16x4_t obmc_pred_mask = vsub_u16(mask_inverter, pred_mask);
+ int y = 0;
+ do {
+ const uint16x4_t result_0 =
+ BlendObmc2Or4(pred, obmc_pred, pred_mask, obmc_pred_mask);
+ Store2<0>(reinterpret_cast<uint16_t*>(pred), result_0);
+
+ pred += prediction_stride;
+ obmc_pred += obmc_prediction_stride;
+
+ const uint16x4_t result_1 =
+ BlendObmc2Or4(pred, obmc_pred, pred_mask, obmc_pred_mask);
+ Store2<0>(reinterpret_cast<uint16_t*>(pred), result_1);
+
+ pred += prediction_stride;
+ obmc_pred += obmc_prediction_stride;
+
+ y += 2;
+ } while (y != height);
+}
+
+inline void OverlapBlendFromLeft4xH_NEON(
+ uint8_t* LIBGAV1_RESTRICT pred, const ptrdiff_t prediction_stride,
+ const int height, const uint8_t* LIBGAV1_RESTRICT obmc_pred,
+ const ptrdiff_t obmc_prediction_stride) {
+ const uint16x4_t mask_inverter = vdup_n_u16(64);
+ const uint16x4_t pred_mask = vld1_u16(kObmcMask + 2);
+ // 64 - mask
+ const uint16x4_t obmc_pred_mask = vsub_u16(mask_inverter, pred_mask);
+ int y = 0;
+ do {
+ const uint16x4_t result_0 =
+ BlendObmc2Or4(pred, obmc_pred, pred_mask, obmc_pred_mask);
+ vst1_u16(reinterpret_cast<uint16_t*>(pred), result_0);
+ pred += prediction_stride;
+ obmc_pred += obmc_prediction_stride;
+
+ const uint16x4_t result_1 =
+ BlendObmc2Or4(pred, obmc_pred, pred_mask, obmc_pred_mask);
+ vst1_u16(reinterpret_cast<uint16_t*>(pred), result_1);
+ pred += prediction_stride;
+ obmc_pred += obmc_prediction_stride;
+
+ y += 2;
+ } while (y != height);
+}
+
+void OverlapBlendFromLeft_NEON(
+ void* LIBGAV1_RESTRICT const prediction, const ptrdiff_t prediction_stride,
+ const int width, const int height,
+ const void* LIBGAV1_RESTRICT const obmc_prediction,
+ const ptrdiff_t obmc_prediction_stride) {
+ auto* pred = static_cast<uint8_t*>(prediction);
+ const auto* obmc_pred = static_cast<const uint8_t*>(obmc_prediction);
+ assert(width >= 2);
+ assert(height >= 4);
+
+ if (width == 2) {
+ OverlapBlendFromLeft2xH_NEON(pred, prediction_stride, height, obmc_pred,
+ obmc_prediction_stride);
+ return;
+ }
+ if (width == 4) {
+ OverlapBlendFromLeft4xH_NEON(pred, prediction_stride, height, obmc_pred,
+ obmc_prediction_stride);
+ return;
+ }
+ const uint16x8_t mask_inverter = vdupq_n_u16(64);
+ const uint16_t* mask = kObmcMask + width - 2;
+ int x = 0;
+ do {
+ pred = reinterpret_cast<uint8_t*>(static_cast<uint16_t*>(prediction) + x);
+ obmc_pred = reinterpret_cast<const uint8_t*>(
+ static_cast<const uint16_t*>(obmc_prediction) + x);
+ const uint16x8_t pred_mask = vld1q_u16(mask + x);
+ // 64 - mask
+ const uint16x8_t obmc_pred_mask = vsubq_u16(mask_inverter, pred_mask);
+ int y = 0;
+ do {
+ const uint16x8_t result =
+ BlendObmc8(pred, obmc_pred, pred_mask, obmc_pred_mask);
+ vst1q_u16(reinterpret_cast<uint16_t*>(pred), result);
+
+ pred += prediction_stride;
+ obmc_pred += obmc_prediction_stride;
+ } while (++y < height);
+ x += 8;
+ } while (x < width);
+}
+
+template <int lane>
+inline uint16x4_t BlendObmcFromTop4(
+ uint8_t* LIBGAV1_RESTRICT const pred,
+ const uint8_t* LIBGAV1_RESTRICT const obmc_pred, const uint16x8_t pred_mask,
+ const uint16x8_t obmc_pred_mask) {
+ const uint16x4_t pred_val = vld1_u16(reinterpret_cast<uint16_t*>(pred));
+ const uint16x4_t obmc_pred_val =
+ vld1_u16(reinterpret_cast<const uint16_t*>(obmc_pred));
+ const uint16x4_t weighted_pred = VMulLaneQU16<lane>(pred_val, pred_mask);
+ const uint16x4_t result = vrshr_n_u16(
+ VMlaLaneQU16<lane>(weighted_pred, obmc_pred_val, obmc_pred_mask), 6);
+ return result;
+}
+
+template <int lane>
+inline uint16x8_t BlendObmcFromTop8(
+ uint8_t* LIBGAV1_RESTRICT const pred,
+ const uint8_t* LIBGAV1_RESTRICT const obmc_pred, const uint16x8_t pred_mask,
+ const uint16x8_t obmc_pred_mask) {
+ const uint16x8_t pred_val = vld1q_u16(reinterpret_cast<uint16_t*>(pred));
+ const uint16x8_t obmc_pred_val =
+ vld1q_u16(reinterpret_cast<const uint16_t*>(obmc_pred));
+ const uint16x8_t weighted_pred = VMulQLaneQU16<lane>(pred_val, pred_mask);
+ const uint16x8_t result = vrshrq_n_u16(
+ VMlaQLaneQU16<lane>(weighted_pred, obmc_pred_val, obmc_pred_mask), 6);
+ return result;
+}
+
+inline void OverlapBlendFromTop4x2Or4_NEON(
+ uint8_t* LIBGAV1_RESTRICT pred, const ptrdiff_t prediction_stride,
+ const uint8_t* LIBGAV1_RESTRICT obmc_pred,
+ const ptrdiff_t obmc_prediction_stride, const int height) {
+ const uint16x8_t pred_mask = vld1q_u16(&kObmcMask[height - 2]);
+ const uint16x8_t mask_inverter = vdupq_n_u16(64);
+ const uint16x8_t obmc_pred_mask = vsubq_u16(mask_inverter, pred_mask);
+ uint16x4_t result =
+ BlendObmcFromTop4<0>(pred, obmc_pred, pred_mask, obmc_pred_mask);
+ vst1_u16(reinterpret_cast<uint16_t*>(pred), result);
+ pred += prediction_stride;
+ obmc_pred += obmc_prediction_stride;
+
+ if (height == 2) {
+ // Mask value is 64, meaning |pred| is unchanged.
+ return;
+ }
+
+ result = BlendObmcFromTop4<1>(pred, obmc_pred, pred_mask, obmc_pred_mask);
+ vst1_u16(reinterpret_cast<uint16_t*>(pred), result);
+ pred += prediction_stride;
+ obmc_pred += obmc_prediction_stride;
+
+ result = BlendObmcFromTop4<2>(pred, obmc_pred, pred_mask, obmc_pred_mask);
+ vst1_u16(reinterpret_cast<uint16_t*>(pred), result);
+}
+
+inline void OverlapBlendFromTop4xH_NEON(
+ uint8_t* LIBGAV1_RESTRICT pred, const ptrdiff_t prediction_stride,
+ const int height, const uint8_t* LIBGAV1_RESTRICT obmc_pred,
+ const ptrdiff_t obmc_prediction_stride) {
+ if (height < 8) {
+ OverlapBlendFromTop4x2Or4_NEON(pred, prediction_stride, obmc_pred,
+ obmc_prediction_stride, height);
+ return;
+ }
+ const uint16_t* mask = kObmcMask + height - 2;
+ const uint16x8_t mask_inverter = vdupq_n_u16(64);
+ int y = 0;
+ // Compute 6 lines for height 8, or 12 lines for height 16. The remaining
+ // lines are unchanged as the corresponding mask value is 64.
+ do {
+ const uint16x8_t pred_mask = vld1q_u16(&mask[y]);
+ const uint16x8_t obmc_pred_mask = vsubq_u16(mask_inverter, pred_mask);
+ uint16x4_t result =
+ BlendObmcFromTop4<0>(pred, obmc_pred, pred_mask, obmc_pred_mask);
+ vst1_u16(reinterpret_cast<uint16_t*>(pred), result);
+ pred += prediction_stride;
+ obmc_pred += obmc_prediction_stride;
+
+ result = BlendObmcFromTop4<1>(pred, obmc_pred, pred_mask, obmc_pred_mask);
+ vst1_u16(reinterpret_cast<uint16_t*>(pred), result);
+ pred += prediction_stride;
+ obmc_pred += obmc_prediction_stride;
+
+ result = BlendObmcFromTop4<2>(pred, obmc_pred, pred_mask, obmc_pred_mask);
+ vst1_u16(reinterpret_cast<uint16_t*>(pred), result);
+ pred += prediction_stride;
+ obmc_pred += obmc_prediction_stride;
+
+ result = BlendObmcFromTop4<3>(pred, obmc_pred, pred_mask, obmc_pred_mask);
+ vst1_u16(reinterpret_cast<uint16_t*>(pred), result);
+ pred += prediction_stride;
+ obmc_pred += obmc_prediction_stride;
+
+ result = BlendObmcFromTop4<4>(pred, obmc_pred, pred_mask, obmc_pred_mask);
+ vst1_u16(reinterpret_cast<uint16_t*>(pred), result);
+ pred += prediction_stride;
+ obmc_pred += obmc_prediction_stride;
+
+ result = BlendObmcFromTop4<5>(pred, obmc_pred, pred_mask, obmc_pred_mask);
+ vst1_u16(reinterpret_cast<uint16_t*>(pred), result);
+ pred += prediction_stride;
+ obmc_pred += obmc_prediction_stride;
+
+ // Increment for the right mask index.
+ y += 6;
+ } while (y < height - 4);
+}
+
+inline void OverlapBlendFromTop8xH_NEON(
+ uint8_t* LIBGAV1_RESTRICT pred, const ptrdiff_t prediction_stride,
+ const uint8_t* LIBGAV1_RESTRICT obmc_pred,
+ const ptrdiff_t obmc_prediction_stride, const int height) {
+ const uint16_t* mask = kObmcMask + height - 2;
+ const uint16x8_t mask_inverter = vdupq_n_u16(64);
+ uint16x8_t pred_mask = vld1q_u16(mask);
+ uint16x8_t obmc_pred_mask = vsubq_u16(mask_inverter, pred_mask);
+ uint16x8_t result =
+ BlendObmcFromTop8<0>(pred, obmc_pred, pred_mask, obmc_pred_mask);
+ vst1q_u16(reinterpret_cast<uint16_t*>(pred), result);
+ if (height == 2) return;
+
+ pred += prediction_stride;
+ obmc_pred += obmc_prediction_stride;
+
+ result = BlendObmcFromTop8<1>(pred, obmc_pred, pred_mask, obmc_pred_mask);
+ vst1q_u16(reinterpret_cast<uint16_t*>(pred), result);
+ pred += prediction_stride;
+ obmc_pred += obmc_prediction_stride;
+
+ result = BlendObmcFromTop8<2>(pred, obmc_pred, pred_mask, obmc_pred_mask);
+ vst1q_u16(reinterpret_cast<uint16_t*>(pred), result);
+ pred += prediction_stride;
+ obmc_pred += obmc_prediction_stride;
+
+ result = BlendObmcFromTop8<3>(pred, obmc_pred, pred_mask, obmc_pred_mask);
+ vst1q_u16(reinterpret_cast<uint16_t*>(pred), result);
+ if (height == 4) return;
+
+ pred += prediction_stride;
+ obmc_pred += obmc_prediction_stride;
+
+ result = BlendObmcFromTop8<4>(pred, obmc_pred, pred_mask, obmc_pred_mask);
+ vst1q_u16(reinterpret_cast<uint16_t*>(pred), result);
+ pred += prediction_stride;
+ obmc_pred += obmc_prediction_stride;
+
+ result = BlendObmcFromTop8<5>(pred, obmc_pred, pred_mask, obmc_pred_mask);
+ vst1q_u16(reinterpret_cast<uint16_t*>(pred), result);
+
+ if (height == 8) return;
+
+ pred += prediction_stride;
+ obmc_pred += obmc_prediction_stride;
+
+ result = BlendObmcFromTop8<6>(pred, obmc_pred, pred_mask, obmc_pred_mask);
+ vst1q_u16(reinterpret_cast<uint16_t*>(pred), result);
+ pred += prediction_stride;
+ obmc_pred += obmc_prediction_stride;
+
+ result = BlendObmcFromTop8<7>(pred, obmc_pred, pred_mask, obmc_pred_mask);
+ vst1q_u16(reinterpret_cast<uint16_t*>(pred), result);
+ pred += prediction_stride;
+ obmc_pred += obmc_prediction_stride;
+
+ pred_mask = vld1q_u16(&mask[8]);
+ obmc_pred_mask = vsubq_u16(mask_inverter, pred_mask);
+
+ result = BlendObmcFromTop8<0>(pred, obmc_pred, pred_mask, obmc_pred_mask);
+ vst1q_u16(reinterpret_cast<uint16_t*>(pred), result);
+ pred += prediction_stride;
+ obmc_pred += obmc_prediction_stride;
+
+ result = BlendObmcFromTop8<1>(pred, obmc_pred, pred_mask, obmc_pred_mask);
+ vst1q_u16(reinterpret_cast<uint16_t*>(pred), result);
+ pred += prediction_stride;
+ obmc_pred += obmc_prediction_stride;
+
+ result = BlendObmcFromTop8<2>(pred, obmc_pred, pred_mask, obmc_pred_mask);
+ vst1q_u16(reinterpret_cast<uint16_t*>(pred), result);
+ pred += prediction_stride;
+ obmc_pred += obmc_prediction_stride;
+
+ result = BlendObmcFromTop8<3>(pred, obmc_pred, pred_mask, obmc_pred_mask);
+ vst1q_u16(reinterpret_cast<uint16_t*>(pred), result);
+
+ if (height == 16) return;
+
+ pred += prediction_stride;
+ obmc_pred += obmc_prediction_stride;
+
+ result = BlendObmcFromTop8<4>(pred, obmc_pred, pred_mask, obmc_pred_mask);
+ vst1q_u16(reinterpret_cast<uint16_t*>(pred), result);
+ pred += prediction_stride;
+ obmc_pred += obmc_prediction_stride;
+
+ result = BlendObmcFromTop8<5>(pred, obmc_pred, pred_mask, obmc_pred_mask);
+ vst1q_u16(reinterpret_cast<uint16_t*>(pred), result);
+ pred += prediction_stride;
+ obmc_pred += obmc_prediction_stride;
+
+ result = BlendObmcFromTop8<6>(pred, obmc_pred, pred_mask, obmc_pred_mask);
+ vst1q_u16(reinterpret_cast<uint16_t*>(pred), result);
+ pred += prediction_stride;
+ obmc_pred += obmc_prediction_stride;
+
+ result = BlendObmcFromTop8<7>(pred, obmc_pred, pred_mask, obmc_pred_mask);
+ vst1q_u16(reinterpret_cast<uint16_t*>(pred), result);
+ pred += prediction_stride;
+ obmc_pred += obmc_prediction_stride;
+
+ pred_mask = vld1q_u16(&mask[16]);
+ obmc_pred_mask = vsubq_u16(mask_inverter, pred_mask);
+
+ result = BlendObmcFromTop8<0>(pred, obmc_pred, pred_mask, obmc_pred_mask);
+ vst1q_u16(reinterpret_cast<uint16_t*>(pred), result);
+ pred += prediction_stride;
+ obmc_pred += obmc_prediction_stride;
+
+ result = BlendObmcFromTop8<1>(pred, obmc_pred, pred_mask, obmc_pred_mask);
+ vst1q_u16(reinterpret_cast<uint16_t*>(pred), result);
+ pred += prediction_stride;
+ obmc_pred += obmc_prediction_stride;
+
+ result = BlendObmcFromTop8<2>(pred, obmc_pred, pred_mask, obmc_pred_mask);
+ vst1q_u16(reinterpret_cast<uint16_t*>(pred), result);
+ pred += prediction_stride;
+ obmc_pred += obmc_prediction_stride;
+
+ result = BlendObmcFromTop8<3>(pred, obmc_pred, pred_mask, obmc_pred_mask);
+ vst1q_u16(reinterpret_cast<uint16_t*>(pred), result);
+ pred += prediction_stride;
+ obmc_pred += obmc_prediction_stride;
+
+ result = BlendObmcFromTop8<4>(pred, obmc_pred, pred_mask, obmc_pred_mask);
+ vst1q_u16(reinterpret_cast<uint16_t*>(pred), result);
+ pred += prediction_stride;
+ obmc_pred += obmc_prediction_stride;
+
+ result = BlendObmcFromTop8<5>(pred, obmc_pred, pred_mask, obmc_pred_mask);
+ vst1q_u16(reinterpret_cast<uint16_t*>(pred), result);
+ pred += prediction_stride;
+ obmc_pred += obmc_prediction_stride;
+
+ result = BlendObmcFromTop8<6>(pred, obmc_pred, pred_mask, obmc_pred_mask);
+ vst1q_u16(reinterpret_cast<uint16_t*>(pred), result);
+ pred += prediction_stride;
+ obmc_pred += obmc_prediction_stride;
+
+ result = BlendObmcFromTop8<7>(pred, obmc_pred, pred_mask, obmc_pred_mask);
+ vst1q_u16(reinterpret_cast<uint16_t*>(pred), result);
+}
+
+void OverlapBlendFromTop_NEON(
+ void* LIBGAV1_RESTRICT const prediction, const ptrdiff_t prediction_stride,
+ const int width, const int height,
+ const void* LIBGAV1_RESTRICT const obmc_prediction,
+ const ptrdiff_t obmc_prediction_stride) {
+ auto* pred = static_cast<uint8_t*>(prediction);
+ const auto* obmc_pred = static_cast<const uint8_t*>(obmc_prediction);
+ assert(width >= 4);
+ assert(height >= 2);
+
+ if (width == 4) {
+ OverlapBlendFromTop4xH_NEON(pred, prediction_stride, height, obmc_pred,
+ obmc_prediction_stride);
+ return;
+ }
+
+ if (width == 8) {
+ OverlapBlendFromTop8xH_NEON(pred, prediction_stride, obmc_pred,
+ obmc_prediction_stride, height);
+ return;
+ }
+
+ const uint16_t* mask = kObmcMask + height - 2;
+ const uint16x8_t mask_inverter = vdupq_n_u16(64);
+ const uint16x8_t pred_mask = vld1q_u16(mask);
+ // 64 - mask
+ const uint16x8_t obmc_pred_mask = vsubq_u16(mask_inverter, pred_mask);
+#define OBMC_ROW_FROM_TOP(n) \
+ do { \
+ int x = 0; \
+ do { \
+ const uint16x8_t result = BlendObmcFromTop8<n>( \
+ reinterpret_cast<uint8_t*>(reinterpret_cast<uint16_t*>(pred) + x), \
+ reinterpret_cast<const uint8_t*>( \
+ reinterpret_cast<const uint16_t*>(obmc_pred) + x), \
+ pred_mask, obmc_pred_mask); \
+ vst1q_u16(reinterpret_cast<uint16_t*>(pred) + x, result); \
+ \
+ x += 8; \
+ } while (x < width); \
+ } while (false)
+
+ // Compute 1 row.
+ if (height == 2) {
+ OBMC_ROW_FROM_TOP(0);
+ return;
+ }
+
+ // Compute 3 rows.
+ if (height == 4) {
+ OBMC_ROW_FROM_TOP(0);
+ pred += prediction_stride;
+ obmc_pred += obmc_prediction_stride;
+ OBMC_ROW_FROM_TOP(1);
+ pred += prediction_stride;
+ obmc_pred += obmc_prediction_stride;
+ OBMC_ROW_FROM_TOP(2);
+ return;
+ }
+
+ // Compute 6 rows.
+ if (height == 8) {
+ OBMC_ROW_FROM_TOP(0);
+ pred += prediction_stride;
+ obmc_pred += obmc_prediction_stride;
+ OBMC_ROW_FROM_TOP(1);
+ pred += prediction_stride;
+ obmc_pred += obmc_prediction_stride;
+ OBMC_ROW_FROM_TOP(2);
+ pred += prediction_stride;
+ obmc_pred += obmc_prediction_stride;
+ OBMC_ROW_FROM_TOP(3);
+ pred += prediction_stride;
+ obmc_pred += obmc_prediction_stride;
+ OBMC_ROW_FROM_TOP(4);
+ pred += prediction_stride;
+ obmc_pred += obmc_prediction_stride;
+ OBMC_ROW_FROM_TOP(5);
+ return;
+ }
+
+ // Compute 12 rows.
+ if (height == 16) {
+ OBMC_ROW_FROM_TOP(0);
+ pred += prediction_stride;
+ obmc_pred += obmc_prediction_stride;
+ OBMC_ROW_FROM_TOP(1);
+ pred += prediction_stride;
+ obmc_pred += obmc_prediction_stride;
+ OBMC_ROW_FROM_TOP(2);
+ pred += prediction_stride;
+ obmc_pred += obmc_prediction_stride;
+ OBMC_ROW_FROM_TOP(3);
+ pred += prediction_stride;
+ obmc_pred += obmc_prediction_stride;
+ OBMC_ROW_FROM_TOP(4);
+ pred += prediction_stride;
+ obmc_pred += obmc_prediction_stride;
+ OBMC_ROW_FROM_TOP(5);
+ pred += prediction_stride;
+ obmc_pred += obmc_prediction_stride;
+ OBMC_ROW_FROM_TOP(6);
+ pred += prediction_stride;
+ obmc_pred += obmc_prediction_stride;
+ OBMC_ROW_FROM_TOP(7);
+ pred += prediction_stride;
+ obmc_pred += obmc_prediction_stride;
+
+ const uint16x8_t pred_mask = vld1q_u16(&mask[8]);
+ // 64 - mask
+ const uint16x8_t obmc_pred_mask = vsubq_u16(mask_inverter, pred_mask);
+ OBMC_ROW_FROM_TOP(0);
+ pred += prediction_stride;
+ obmc_pred += obmc_prediction_stride;
+ OBMC_ROW_FROM_TOP(1);
+ pred += prediction_stride;
+ obmc_pred += obmc_prediction_stride;
+ OBMC_ROW_FROM_TOP(2);
+ pred += prediction_stride;
+ obmc_pred += obmc_prediction_stride;
+ OBMC_ROW_FROM_TOP(3);
+ return;
+ }
+
+ // Stop when mask value becomes 64. This is a multiple of 8 for height 32
+ // and 64.
+ const int compute_height = height - (height >> 2);
+ int y = 0;
+ do {
+ const uint16x8_t pred_mask = vld1q_u16(&mask[y]);
+ // 64 - mask
+ const uint16x8_t obmc_pred_mask = vsubq_u16(mask_inverter, pred_mask);
+ OBMC_ROW_FROM_TOP(0);
+ pred += prediction_stride;
+ obmc_pred += obmc_prediction_stride;
+ OBMC_ROW_FROM_TOP(1);
+ pred += prediction_stride;
+ obmc_pred += obmc_prediction_stride;
+ OBMC_ROW_FROM_TOP(2);
+ pred += prediction_stride;
+ obmc_pred += obmc_prediction_stride;
+ OBMC_ROW_FROM_TOP(3);
+ pred += prediction_stride;
+ obmc_pred += obmc_prediction_stride;
+ OBMC_ROW_FROM_TOP(4);
+ pred += prediction_stride;
+ obmc_pred += obmc_prediction_stride;
+ OBMC_ROW_FROM_TOP(5);
+ pred += prediction_stride;
+ obmc_pred += obmc_prediction_stride;
+ OBMC_ROW_FROM_TOP(6);
+ pred += prediction_stride;
+ obmc_pred += obmc_prediction_stride;
+ OBMC_ROW_FROM_TOP(7);
+ pred += prediction_stride;
+ obmc_pred += obmc_prediction_stride;
+
+ y += 8;
+ } while (y < compute_height);
+}
+
+void Init10bpp() {
+ Dsp* const dsp = dsp_internal::GetWritableDspTable(kBitdepth10);
+ assert(dsp != nullptr);
+ dsp->obmc_blend[kObmcDirectionVertical] = OverlapBlendFromTop_NEON;
+ dsp->obmc_blend[kObmcDirectionHorizontal] = OverlapBlendFromLeft_NEON;
+}
+
+} // namespace
+} // namespace high_bitdepth
+#endif // LIBGAV1_MAX_BITDEPTH >= 10
+
+void ObmcInit_NEON() {
+ low_bitdepth::Init8bpp();
+#if LIBGAV1_MAX_BITDEPTH >= 10
+ high_bitdepth::Init10bpp();
+#endif
+}
} // namespace dsp
} // namespace libgav1
diff --git a/src/dsp/arm/obmc_neon.h b/src/dsp/arm/obmc_neon.h
index d5c9d9c..788017e 100644
--- a/src/dsp/arm/obmc_neon.h
+++ b/src/dsp/arm/obmc_neon.h
@@ -33,6 +33,9 @@ void ObmcInit_NEON();
#if LIBGAV1_ENABLE_NEON
#define LIBGAV1_Dsp8bpp_ObmcVertical LIBGAV1_CPU_NEON
#define LIBGAV1_Dsp8bpp_ObmcHorizontal LIBGAV1_CPU_NEON
+
+#define LIBGAV1_Dsp10bpp_ObmcVertical LIBGAV1_CPU_NEON
+#define LIBGAV1_Dsp10bpp_ObmcHorizontal LIBGAV1_CPU_NEON
#endif // LIBGAV1_ENABLE_NEON
#endif // LIBGAV1_SRC_DSP_ARM_OBMC_NEON_H_
diff --git a/src/dsp/arm/super_res_neon.cc b/src/dsp/arm/super_res_neon.cc
index 91537c4..2f8dde6 100644
--- a/src/dsp/arm/super_res_neon.cc
+++ b/src/dsp/arm/super_res_neon.cc
@@ -23,6 +23,7 @@
#include "src/dsp/constants.h"
#include "src/dsp/dsp.h"
#include "src/utils/common.h"
+#include "src/utils/compiler_attributes.h"
#include "src/utils/constants.h"
namespace libgav1 {
@@ -81,19 +82,27 @@ inline uint8x8_t SuperRes(const uint8x8_t src[kSuperResFilterTaps],
return vqrshrn_n_u16(res, kFilterBits);
}
-void SuperRes_NEON(const void* const coefficients, void* const source,
+void SuperRes_NEON(const void* LIBGAV1_RESTRICT const coefficients,
+ void* LIBGAV1_RESTRICT const source,
const ptrdiff_t source_stride, const int height,
const int downscaled_width, const int upscaled_width,
const int initial_subpixel_x, const int step,
- void* const dest, const ptrdiff_t dest_stride) {
+ void* LIBGAV1_RESTRICT const dest,
+ const ptrdiff_t dest_stride) {
auto* src = static_cast<uint8_t*>(source) - DivideBy2(kSuperResFilterTaps);
auto* dst = static_cast<uint8_t*>(dest);
int y = height;
do {
const auto* filter = static_cast<const uint8_t*>(coefficients);
uint8_t* dst_ptr = dst;
+#if LIBGAV1_MSAN
+ // Initialize the padding area to prevent msan warnings.
+ const int super_res_right_border = kSuperResHorizontalPadding;
+#else
+ const int super_res_right_border = kSuperResHorizontalBorder;
+#endif
ExtendLine<uint8_t>(src + DivideBy2(kSuperResFilterTaps), downscaled_width,
- kSuperResHorizontalBorder, kSuperResHorizontalBorder);
+ kSuperResHorizontalBorder, super_res_right_border);
int subpixel_x = initial_subpixel_x;
uint8x8_t sr[8];
uint8x16_t s[8];
@@ -234,19 +243,27 @@ inline uint16x8_t SuperRes(const uint16x8_t src[kSuperResFilterTaps],
}
template <int bitdepth>
-void SuperRes_NEON(const void* const coefficients, void* const source,
+void SuperRes_NEON(const void* LIBGAV1_RESTRICT const coefficients,
+ void* LIBGAV1_RESTRICT const source,
const ptrdiff_t source_stride, const int height,
const int downscaled_width, const int upscaled_width,
const int initial_subpixel_x, const int step,
- void* const dest, const ptrdiff_t dest_stride) {
+ void* LIBGAV1_RESTRICT const dest,
+ const ptrdiff_t dest_stride) {
auto* src = static_cast<uint16_t*>(source) - DivideBy2(kSuperResFilterTaps);
auto* dst = static_cast<uint16_t*>(dest);
int y = height;
do {
const auto* filter = static_cast<const uint16_t*>(coefficients);
uint16_t* dst_ptr = dst;
+#if LIBGAV1_MSAN
+ // Initialize the padding area to prevent msan warnings.
+ const int super_res_right_border = kSuperResHorizontalPadding;
+#else
+ const int super_res_right_border = kSuperResHorizontalBorder;
+#endif
ExtendLine<uint16_t>(src + DivideBy2(kSuperResFilterTaps), downscaled_width,
- kSuperResHorizontalBorder, kSuperResHorizontalBorder);
+ kSuperResHorizontalBorder, super_res_right_border);
int subpixel_x = initial_subpixel_x;
uint16x8_t sr[8];
int x = RightShiftWithCeiling(upscaled_width, 3);
diff --git a/src/dsp/arm/warp_neon.cc b/src/dsp/arm/warp_neon.cc
index c7fb739..71e0a43 100644
--- a/src/dsp/arm/warp_neon.cc
+++ b/src/dsp/arm/warp_neon.cc
@@ -34,11 +34,16 @@
namespace libgav1 {
namespace dsp {
-namespace low_bitdepth {
namespace {
// Number of extra bits of precision in warped filtering.
constexpr int kWarpedDiffPrecisionBits = 10;
+
+} // namespace
+
+namespace low_bitdepth {
+namespace {
+
constexpr int kFirstPassOffset = 1 << 14;
constexpr int kOffsetRemoval =
(kFirstPassOffset >> kInterRoundBitsHorizontal) * 128;
@@ -54,10 +59,10 @@ void HorizontalFilter(const int sx4, const int16_t alpha,
int16_t intermediate_result_row[8]) {
int sx = sx4 - MultiplyBy4(alpha);
int8x8_t filter[8];
- for (int x = 0; x < 8; ++x) {
+ for (auto& f : filter) {
const int offset = RightShiftWithRounding(sx, kWarpedDiffPrecisionBits) +
kWarpedPixelPrecisionShifts;
- filter[x] = vld1_s8(kWarpedFilters8[offset]);
+ f = vld1_s8(kWarpedFilters8[offset]);
sx += alpha;
}
Transpose8x8(filter);
@@ -103,13 +108,15 @@ void HorizontalFilter(const int sx4, const int16_t alpha,
}
template <bool is_compound>
-void Warp_NEON(const void* const source, const ptrdiff_t source_stride,
- const int source_width, const int source_height,
- const int* const warp_params, const int subsampling_x,
- const int subsampling_y, const int block_start_x,
- const int block_start_y, const int block_width,
- const int block_height, const int16_t alpha, const int16_t beta,
- const int16_t gamma, const int16_t delta, void* dest,
+void Warp_NEON(const void* LIBGAV1_RESTRICT const source,
+ const ptrdiff_t source_stride, const int source_width,
+ const int source_height,
+ const int* LIBGAV1_RESTRICT const warp_params,
+ const int subsampling_x, const int subsampling_y,
+ const int block_start_x, const int block_start_y,
+ const int block_width, const int block_height,
+ const int16_t alpha, const int16_t beta, const int16_t gamma,
+ const int16_t delta, void* LIBGAV1_RESTRICT dest,
const ptrdiff_t dest_stride) {
constexpr int kRoundBitsVertical =
is_compound ? kInterRoundBitsCompoundVertical : kInterRoundBitsVertical;
@@ -393,11 +400,11 @@ void Warp_NEON(const void* const source, const ptrdiff_t source_stride,
for (int y = 0; y < 8; ++y) {
int sy = sy4 - MultiplyBy4(gamma);
int16x8_t filter[8];
- for (int x = 0; x < 8; ++x) {
+ for (auto& f : filter) {
const int offset =
RightShiftWithRounding(sy, kWarpedDiffPrecisionBits) +
kWarpedPixelPrecisionShifts;
- filter[x] = vld1q_s16(kWarpedFilters[offset]);
+ f = vld1q_s16(kWarpedFilters[offset]);
sy += gamma;
}
Transpose8x8(filter);
@@ -438,7 +445,453 @@ void Init8bpp() {
} // namespace
} // namespace low_bitdepth
-void WarpInit_NEON() { low_bitdepth::Init8bpp(); }
+//------------------------------------------------------------------------------
+#if LIBGAV1_MAX_BITDEPTH >= 10
+namespace high_bitdepth {
+namespace {
+
+LIBGAV1_ALWAYS_INLINE uint16x8x2_t LoadSrcRow(uint16_t const* ptr) {
+ uint16x8x2_t x;
+ // Clang/gcc uses ldp here.
+ x.val[0] = vld1q_u16(ptr);
+ x.val[1] = vld1q_u16(ptr + 8);
+ return x;
+}
+
+LIBGAV1_ALWAYS_INLINE void HorizontalFilter(
+ const int sx4, const int16_t alpha, const uint16x8x2_t src_row,
+ int16_t intermediate_result_row[8]) {
+ int sx = sx4 - MultiplyBy4(alpha);
+ int8x8_t filter8[8];
+ for (auto& f : filter8) {
+ const int offset = RightShiftWithRounding(sx, kWarpedDiffPrecisionBits) +
+ kWarpedPixelPrecisionShifts;
+ f = vld1_s8(kWarpedFilters8[offset]);
+ sx += alpha;
+ }
+
+ Transpose8x8(filter8);
+
+ int16x8_t filter[8];
+ for (int i = 0; i < 8; ++i) {
+ filter[i] = vmovl_s8(filter8[i]);
+ }
+
+ int32x4x2_t sum;
+ int16x8_t src_row_window;
+ // k = 0.
+ src_row_window = vreinterpretq_s16_u16(src_row.val[0]);
+ sum.val[0] = vmull_s16(vget_low_s16(filter[0]), vget_low_s16(src_row_window));
+ sum.val[1] = VMullHighS16(filter[0], src_row_window);
+ // k = 1.
+ src_row_window =
+ vreinterpretq_s16_u16(vextq_u16(src_row.val[0], src_row.val[1], 1));
+ sum.val[0] = vmlal_s16(sum.val[0], vget_low_s16(filter[1]),
+ vget_low_s16(src_row_window));
+ sum.val[1] = VMlalHighS16(sum.val[1], filter[1], src_row_window);
+ // k = 2.
+ src_row_window =
+ vreinterpretq_s16_u16(vextq_u16(src_row.val[0], src_row.val[1], 2));
+ sum.val[0] = vmlal_s16(sum.val[0], vget_low_s16(filter[2]),
+ vget_low_s16(src_row_window));
+ sum.val[1] = VMlalHighS16(sum.val[1], filter[2], src_row_window);
+ // k = 3.
+ src_row_window =
+ vreinterpretq_s16_u16(vextq_u16(src_row.val[0], src_row.val[1], 3));
+ sum.val[0] = vmlal_s16(sum.val[0], vget_low_s16(filter[3]),
+ vget_low_s16(src_row_window));
+ sum.val[1] = VMlalHighS16(sum.val[1], filter[3], src_row_window);
+ // k = 4.
+ src_row_window =
+ vreinterpretq_s16_u16(vextq_u16(src_row.val[0], src_row.val[1], 4));
+ sum.val[0] = vmlal_s16(sum.val[0], vget_low_s16(filter[4]),
+ vget_low_s16(src_row_window));
+ sum.val[1] = VMlalHighS16(sum.val[1], filter[4], src_row_window);
+ // k = 5.
+ src_row_window =
+ vreinterpretq_s16_u16(vextq_u16(src_row.val[0], src_row.val[1], 5));
+ sum.val[0] = vmlal_s16(sum.val[0], vget_low_s16(filter[5]),
+ vget_low_s16(src_row_window));
+ sum.val[1] = VMlalHighS16(sum.val[1], filter[5], src_row_window);
+ // k = 6.
+ src_row_window =
+ vreinterpretq_s16_u16(vextq_u16(src_row.val[0], src_row.val[1], 6));
+ sum.val[0] = vmlal_s16(sum.val[0], vget_low_s16(filter[6]),
+ vget_low_s16(src_row_window));
+ sum.val[1] = VMlalHighS16(sum.val[1], filter[6], src_row_window);
+ // k = 7.
+ src_row_window =
+ vreinterpretq_s16_u16(vextq_u16(src_row.val[0], src_row.val[1], 7));
+ sum.val[0] = vmlal_s16(sum.val[0], vget_low_s16(filter[7]),
+ vget_low_s16(src_row_window));
+ sum.val[1] = VMlalHighS16(sum.val[1], filter[7], src_row_window);
+ // End of unrolled k = 0..7 loop.
+
+ vst1_s16(intermediate_result_row,
+ vrshrn_n_s32(sum.val[0], kInterRoundBitsHorizontal));
+ vst1_s16(intermediate_result_row + 4,
+ vrshrn_n_s32(sum.val[1], kInterRoundBitsHorizontal));
+}
+
+template <bool is_compound>
+void Warp_NEON(const void* LIBGAV1_RESTRICT const source,
+ const ptrdiff_t source_stride, const int source_width,
+ const int source_height,
+ const int* LIBGAV1_RESTRICT const warp_params,
+ const int subsampling_x, const int subsampling_y,
+ const int block_start_x, const int block_start_y,
+ const int block_width, const int block_height,
+ const int16_t alpha, const int16_t beta, const int16_t gamma,
+ const int16_t delta, void* LIBGAV1_RESTRICT dest,
+ const ptrdiff_t dest_stride) {
+ constexpr int kRoundBitsVertical =
+ is_compound ? kInterRoundBitsCompoundVertical : kInterRoundBitsVertical;
+ union {
+ // Intermediate_result is the output of the horizontal filtering and
+ // rounding. The range is within 13 (= bitdepth + kFilterBits + 1 -
+ // kInterRoundBitsHorizontal) bits (unsigned). We use the signed int16_t
+ // type so that we can multiply it by kWarpedFilters (which has signed
+ // values) using vmlal_s16().
+ int16_t intermediate_result[15][8]; // 15 rows, 8 columns.
+ // In the simple special cases where the samples in each row are all the
+ // same, store one sample per row in a column vector.
+ int16_t intermediate_result_column[15];
+ };
+
+ const auto* const src = static_cast<const uint16_t*>(source);
+ const ptrdiff_t src_stride = source_stride >> 1;
+ using DestType =
+ typename std::conditional<is_compound, int16_t, uint16_t>::type;
+ auto* dst = static_cast<DestType*>(dest);
+ const ptrdiff_t dst_stride = is_compound ? dest_stride : dest_stride >> 1;
+ assert(block_width >= 8);
+ assert(block_height >= 8);
+
+ // Warp process applies for each 8x8 block.
+ int start_y = block_start_y;
+ do {
+ int start_x = block_start_x;
+ do {
+ const int src_x = (start_x + 4) << subsampling_x;
+ const int src_y = (start_y + 4) << subsampling_y;
+ const int dst_x =
+ src_x * warp_params[2] + src_y * warp_params[3] + warp_params[0];
+ const int dst_y =
+ src_x * warp_params[4] + src_y * warp_params[5] + warp_params[1];
+ const int x4 = dst_x >> subsampling_x;
+ const int y4 = dst_y >> subsampling_y;
+ const int ix4 = x4 >> kWarpedModelPrecisionBits;
+ const int iy4 = y4 >> kWarpedModelPrecisionBits;
+ // A prediction block may fall outside the frame's boundaries. If a
+ // prediction block is calculated using only samples outside the frame's
+ // boundary, the filtering can be simplified. We can divide the plane
+ // into several regions and handle them differently.
+ //
+ // | |
+ // 1 | 3 | 1
+ // | |
+ // -------+-----------+-------
+ // |***********|
+ // 2 |*****4*****| 2
+ // |***********|
+ // -------+-----------+-------
+ // | |
+ // 1 | 3 | 1
+ // | |
+ //
+ // At the center, region 4 represents the frame and is the general case.
+ //
+ // In regions 1 and 2, the prediction block is outside the frame's
+ // boundary horizontally. Therefore the horizontal filtering can be
+ // simplified. Furthermore, in the region 1 (at the four corners), the
+ // prediction is outside the frame's boundary both horizontally and
+ // vertically, so we get a constant prediction block.
+ //
+ // In region 3, the prediction block is outside the frame's boundary
+ // vertically. Unfortunately because we apply the horizontal filters
+ // first, by the time we apply the vertical filters, they no longer see
+ // simple inputs. So the only simplification is that all the rows are
+ // the same, but we still need to apply all the horizontal and vertical
+ // filters.
+
+ // Check for two simple special cases, where the horizontal filter can
+ // be significantly simplified.
+ //
+ // In general, for each row, the horizontal filter is calculated as
+ // follows:
+ // for (int x = -4; x < 4; ++x) {
+ // const int offset = ...;
+ // int sum = first_pass_offset;
+ // for (int k = 0; k < 8; ++k) {
+ // const int column = Clip3(ix4 + x + k - 3, 0, source_width - 1);
+ // sum += kWarpedFilters[offset][k] * src_row[column];
+ // }
+ // ...
+ // }
+ // The column index before clipping, ix4 + x + k - 3, varies in the range
+ // ix4 - 7 <= ix4 + x + k - 3 <= ix4 + 7. If ix4 - 7 >= source_width - 1
+ // or ix4 + 7 <= 0, then all the column indexes are clipped to the same
+ // border index (source_width - 1 or 0, respectively). Then for each x,
+ // the inner for loop of the horizontal filter is reduced to multiplying
+ // the border pixel by the sum of the filter coefficients.
+ if (ix4 - 7 >= source_width - 1 || ix4 + 7 <= 0) {
+ // Regions 1 and 2.
+ // Points to the left or right border of the first row of |src|.
+ const uint16_t* first_row_border =
+ (ix4 + 7 <= 0) ? src : src + source_width - 1;
+ // In general, for y in [-7, 8), the row number iy4 + y is clipped:
+ // const int row = Clip3(iy4 + y, 0, source_height - 1);
+ // In two special cases, iy4 + y is clipped to either 0 or
+ // source_height - 1 for all y. In the rest of the cases, iy4 + y is
+ // bounded and we can avoid clipping iy4 + y by relying on a reference
+ // frame's boundary extension on the top and bottom.
+ if (iy4 - 7 >= source_height - 1 || iy4 + 7 <= 0) {
+ // Region 1.
+ // Every sample used to calculate the prediction block has the same
+ // value. So the whole prediction block has the same value.
+ const int row = (iy4 + 7 <= 0) ? 0 : source_height - 1;
+ const uint16_t row_border_pixel = first_row_border[row * src_stride];
+
+ DestType* dst_row = dst + start_x - block_start_x;
+ for (int y = 0; y < 8; ++y) {
+ if (is_compound) {
+ const int16x8_t sum =
+ vdupq_n_s16(row_border_pixel << (kInterRoundBitsVertical -
+ kRoundBitsVertical));
+ vst1q_s16(reinterpret_cast<int16_t*>(dst_row),
+ vaddq_s16(sum, vdupq_n_s16(kCompoundOffset)));
+ } else {
+ vst1q_u16(reinterpret_cast<uint16_t*>(dst_row),
+ vdupq_n_u16(row_border_pixel));
+ }
+ dst_row += dst_stride;
+ }
+ // End of region 1. Continue the |start_x| do-while loop.
+ start_x += 8;
+ continue;
+ }
+
+ // Region 2.
+ // Horizontal filter.
+ // The input values in this region are generated by extending the border
+ // which makes them identical in the horizontal direction. This
+ // computation could be inlined in the vertical pass but most
+ // implementations will need a transpose of some sort.
+ // It is not necessary to use the offset values here because the
+ // horizontal pass is a simple shift and the vertical pass will always
+ // require using 32 bits.
+ for (int y = -7; y < 8; ++y) {
+ // We may over-read up to 13 pixels above the top source row, or up
+ // to 13 pixels below the bottom source row. This is proved in
+ // warp.cc.
+ const int row = iy4 + y;
+ int sum = first_row_border[row * src_stride];
+ sum <<= (kFilterBits - kInterRoundBitsHorizontal);
+ intermediate_result_column[y + 7] = sum;
+ }
+ // Vertical filter.
+ DestType* dst_row = dst + start_x - block_start_x;
+ int sy4 =
+ (y4 & ((1 << kWarpedModelPrecisionBits) - 1)) - MultiplyBy4(delta);
+ for (int y = 0; y < 8; ++y) {
+ int sy = sy4 - MultiplyBy4(gamma);
+#if defined(__aarch64__)
+ const int16x8_t intermediate =
+ vld1q_s16(&intermediate_result_column[y]);
+ int16_t tmp[8];
+ for (int x = 0; x < 8; ++x) {
+ const int offset =
+ RightShiftWithRounding(sy, kWarpedDiffPrecisionBits) +
+ kWarpedPixelPrecisionShifts;
+ const int16x8_t filter = vld1q_s16(kWarpedFilters[offset]);
+ const int32x4_t product_low =
+ vmull_s16(vget_low_s16(filter), vget_low_s16(intermediate));
+ const int32x4_t product_high =
+ vmull_s16(vget_high_s16(filter), vget_high_s16(intermediate));
+ // vaddvq_s32 is only available on __aarch64__.
+ const int32_t sum =
+ vaddvq_s32(product_low) + vaddvq_s32(product_high);
+ const int16_t sum_descale =
+ RightShiftWithRounding(sum, kRoundBitsVertical);
+ if (is_compound) {
+ dst_row[x] = sum_descale + kCompoundOffset;
+ } else {
+ tmp[x] = sum_descale;
+ }
+ sy += gamma;
+ }
+ if (!is_compound) {
+ const uint16x8_t v_max_bitdepth =
+ vdupq_n_u16((1 << kBitdepth10) - 1);
+ const int16x8_t sum = vld1q_s16(tmp);
+ const uint16x8_t d0 =
+ vminq_u16(vreinterpretq_u16_s16(vmaxq_s16(sum, vdupq_n_s16(0))),
+ v_max_bitdepth);
+ vst1q_u16(reinterpret_cast<uint16_t*>(dst_row), d0);
+ }
+#else // !defined(__aarch64__)
+ int16x8_t filter[8];
+ for (int x = 0; x < 8; ++x) {
+ const int offset =
+ RightShiftWithRounding(sy, kWarpedDiffPrecisionBits) +
+ kWarpedPixelPrecisionShifts;
+ filter[x] = vld1q_s16(kWarpedFilters[offset]);
+ sy += gamma;
+ }
+ Transpose8x8(filter);
+ int32x4_t sum_low = vdupq_n_s32(0);
+ int32x4_t sum_high = sum_low;
+ for (int k = 0; k < 8; ++k) {
+ const int16_t intermediate = intermediate_result_column[y + k];
+ sum_low =
+ vmlal_n_s16(sum_low, vget_low_s16(filter[k]), intermediate);
+ sum_high =
+ vmlal_n_s16(sum_high, vget_high_s16(filter[k]), intermediate);
+ }
+ if (is_compound) {
+ const int16x8_t sum =
+ vcombine_s16(vrshrn_n_s32(sum_low, kRoundBitsVertical),
+ vrshrn_n_s32(sum_high, kRoundBitsVertical));
+ vst1q_s16(reinterpret_cast<int16_t*>(dst_row),
+ vaddq_s16(sum, vdupq_n_s16(kCompoundOffset)));
+ } else {
+ const uint16x4_t v_max_bitdepth =
+ vdup_n_u16((1 << kBitdepth10) - 1);
+ const uint16x4_t d0 = vmin_u16(
+ vqrshrun_n_s32(sum_low, kRoundBitsVertical), v_max_bitdepth);
+ const uint16x4_t d1 = vmin_u16(
+ vqrshrun_n_s32(sum_high, kRoundBitsVertical), v_max_bitdepth);
+ vst1_u16(reinterpret_cast<uint16_t*>(dst_row), d0);
+ vst1_u16(reinterpret_cast<uint16_t*>(dst_row + 4), d1);
+ }
+#endif // defined(__aarch64__)
+ dst_row += dst_stride;
+ sy4 += delta;
+ }
+ // End of region 2. Continue the |start_x| do-while loop.
+ start_x += 8;
+ continue;
+ }
+
+ // Regions 3 and 4.
+ // At this point, we know ix4 - 7 < source_width - 1 and ix4 + 7 > 0.
+
+ // In general, for y in [-7, 8), the row number iy4 + y is clipped:
+ // const int row = Clip3(iy4 + y, 0, source_height - 1);
+ // In two special cases, iy4 + y is clipped to either 0 or
+ // source_height - 1 for all y. In the rest of the cases, iy4 + y is
+ // bounded and we can avoid clipping iy4 + y by relying on a reference
+ // frame's boundary extension on the top and bottom.
+ if (iy4 - 7 >= source_height - 1 || iy4 + 7 <= 0) {
+ // Region 3.
+ // Horizontal filter.
+ const int row = (iy4 + 7 <= 0) ? 0 : source_height - 1;
+ const uint16_t* const src_row = src + row * src_stride;
+ // Read 15 samples from &src_row[ix4 - 7]. The 16th sample is also
+ // read but is ignored.
+ //
+ // NOTE: This may read up to 13 pixels before src_row[0] or up to 14
+ // pixels after src_row[source_width - 1]. We assume the source frame
+ // has left and right borders of at least 13 pixels that extend the
+ // frame boundary pixels. We also assume there is at least one extra
+ // padding pixel after the right border of the last source row.
+ const uint16x8x2_t src_row_v = LoadSrcRow(&src_row[ix4 - 7]);
+ int sx4 = (x4 & ((1 << kWarpedModelPrecisionBits) - 1)) - beta * 7;
+ for (int y = -7; y < 8; ++y) {
+ HorizontalFilter(sx4, alpha, src_row_v, intermediate_result[y + 7]);
+ sx4 += beta;
+ }
+ } else {
+ // Region 4.
+ // Horizontal filter.
+ int sx4 = (x4 & ((1 << kWarpedModelPrecisionBits) - 1)) - beta * 7;
+ for (int y = -7; y < 8; ++y) {
+ // We may over-read up to 13 pixels above the top source row, or up
+ // to 13 pixels below the bottom source row. This is proved in
+ // warp.cc.
+ const int row = iy4 + y;
+ const uint16_t* const src_row = src + row * src_stride;
+ // Read 15 samples from &src_row[ix4 - 7]. The 16th sample is also
+ // read but is ignored.
+ //
+ // NOTE: This may read up to pixels bytes before src_row[0] or up to
+ // 14 pixels after src_row[source_width - 1]. We assume the source
+ // frame has left and right borders of at least 13 pixels that extend
+ // the frame boundary pixels. We also assume there is at least one
+ // extra padding pixel after the right border of the last source row.
+ const uint16x8x2_t src_row_v = LoadSrcRow(&src_row[ix4 - 7]);
+ HorizontalFilter(sx4, alpha, src_row_v, intermediate_result[y + 7]);
+ sx4 += beta;
+ }
+ }
+
+ // Regions 3 and 4.
+ // Vertical filter.
+ DestType* dst_row = dst + start_x - block_start_x;
+ int sy4 =
+ (y4 & ((1 << kWarpedModelPrecisionBits) - 1)) - MultiplyBy4(delta);
+ for (int y = 0; y < 8; ++y) {
+ int sy = sy4 - MultiplyBy4(gamma);
+ int16x8_t filter[8];
+ for (auto& f : filter) {
+ const int offset =
+ RightShiftWithRounding(sy, kWarpedDiffPrecisionBits) +
+ kWarpedPixelPrecisionShifts;
+ f = vld1q_s16(kWarpedFilters[offset]);
+ sy += gamma;
+ }
+ Transpose8x8(filter);
+ int32x4_t sum_low = vdupq_n_s32(0);
+ int32x4_t sum_high = sum_low;
+ for (int k = 0; k < 8; ++k) {
+ const int16x8_t intermediate = vld1q_s16(intermediate_result[y + k]);
+ sum_low = vmlal_s16(sum_low, vget_low_s16(filter[k]),
+ vget_low_s16(intermediate));
+ sum_high = vmlal_s16(sum_high, vget_high_s16(filter[k]),
+ vget_high_s16(intermediate));
+ }
+ if (is_compound) {
+ const int16x8_t sum =
+ vcombine_s16(vrshrn_n_s32(sum_low, kRoundBitsVertical),
+ vrshrn_n_s32(sum_high, kRoundBitsVertical));
+ vst1q_s16(reinterpret_cast<int16_t*>(dst_row),
+ vaddq_s16(sum, vdupq_n_s16(kCompoundOffset)));
+ } else {
+ const uint16x4_t v_max_bitdepth = vdup_n_u16((1 << kBitdepth10) - 1);
+ const uint16x4_t d0 = vmin_u16(
+ vqrshrun_n_s32(sum_low, kRoundBitsVertical), v_max_bitdepth);
+ const uint16x4_t d1 = vmin_u16(
+ vqrshrun_n_s32(sum_high, kRoundBitsVertical), v_max_bitdepth);
+ vst1_u16(reinterpret_cast<uint16_t*>(dst_row), d0);
+ vst1_u16(reinterpret_cast<uint16_t*>(dst_row + 4), d1);
+ }
+ dst_row += dst_stride;
+ sy4 += delta;
+ }
+ start_x += 8;
+ } while (start_x < block_start_x + block_width);
+ dst += 8 * dst_stride;
+ start_y += 8;
+ } while (start_y < block_start_y + block_height);
+}
+
+void Init10bpp() {
+ Dsp* dsp = dsp_internal::GetWritableDspTable(kBitdepth10);
+ assert(dsp != nullptr);
+ dsp->warp = Warp_NEON</*is_compound=*/false>;
+ dsp->warp_compound = Warp_NEON</*is_compound=*/true>;
+}
+
+} // namespace
+} // namespace high_bitdepth
+#endif // LIBGAV1_MAX_BITDEPTH >= 10
+
+void WarpInit_NEON() {
+ low_bitdepth::Init8bpp();
+#if LIBGAV1_MAX_BITDEPTH >= 10
+ high_bitdepth::Init10bpp();
+#endif
+}
} // namespace dsp
} // namespace libgav1
diff --git a/src/dsp/arm/warp_neon.h b/src/dsp/arm/warp_neon.h
index dbcaa23..cd60602 100644
--- a/src/dsp/arm/warp_neon.h
+++ b/src/dsp/arm/warp_neon.h
@@ -32,6 +32,9 @@ void WarpInit_NEON();
#if LIBGAV1_ENABLE_NEON
#define LIBGAV1_Dsp8bpp_Warp LIBGAV1_CPU_NEON
#define LIBGAV1_Dsp8bpp_WarpCompound LIBGAV1_CPU_NEON
+
+#define LIBGAV1_Dsp10bpp_Warp LIBGAV1_CPU_NEON
+#define LIBGAV1_Dsp10bpp_WarpCompound LIBGAV1_CPU_NEON
#endif // LIBGAV1_ENABLE_NEON
#endif // LIBGAV1_SRC_DSP_ARM_WARP_NEON_H_
diff --git a/src/dsp/arm/weight_mask_neon.cc b/src/dsp/arm/weight_mask_neon.cc
index 7e5bff0..5ad6b97 100644
--- a/src/dsp/arm/weight_mask_neon.cc
+++ b/src/dsp/arm/weight_mask_neon.cc
@@ -32,20 +32,51 @@
namespace libgav1 {
namespace dsp {
-namespace low_bitdepth {
namespace {
-constexpr int kRoundingBits8bpp = 4;
+inline int16x8x2_t LoadPred(const int16_t* LIBGAV1_RESTRICT prediction_0,
+ const int16_t* LIBGAV1_RESTRICT prediction_1) {
+ const int16x8x2_t pred = {vld1q_s16(prediction_0), vld1q_s16(prediction_1)};
+ return pred;
+}
+
+#if LIBGAV1_MAX_BITDEPTH >= 10
+inline uint16x8x2_t LoadPred(const uint16_t* LIBGAV1_RESTRICT prediction_0,
+ const uint16_t* LIBGAV1_RESTRICT prediction_1) {
+ const uint16x8x2_t pred = {vld1q_u16(prediction_0), vld1q_u16(prediction_1)};
+ return pred;
+}
+#endif // LIBGAV1_MAX_BITDEPTH >= 10
+
+template <int bitdepth>
+inline uint16x8_t AbsolutePredDifference(const int16x8x2_t pred) {
+ static_assert(bitdepth == 8, "");
+ constexpr int rounding_bits = bitdepth - 8 + ((bitdepth == 12) ? 2 : 4);
+ return vrshrq_n_u16(
+ vreinterpretq_u16_s16(vabdq_s16(pred.val[0], pred.val[1])),
+ rounding_bits);
+}
-template <bool mask_is_inverse>
-inline void WeightMask8_NEON(const int16_t* prediction_0,
- const int16_t* prediction_1, uint8_t* mask) {
- const int16x8_t pred_0 = vld1q_s16(prediction_0);
- const int16x8_t pred_1 = vld1q_s16(prediction_1);
+template <int bitdepth>
+inline uint16x8_t AbsolutePredDifference(const uint16x8x2_t pred) {
+ constexpr int rounding_bits = bitdepth - 8 + ((bitdepth == 12) ? 2 : 4);
+ return vrshrq_n_u16(vabdq_u16(pred.val[0], pred.val[1]), rounding_bits);
+}
+
+template <bool mask_is_inverse, int bitdepth>
+inline void WeightMask8_NEON(const void* LIBGAV1_RESTRICT prediction_0,
+ const void* LIBGAV1_RESTRICT prediction_1,
+ uint8_t* LIBGAV1_RESTRICT mask) {
+ using PredType =
+ typename std::conditional<bitdepth == 8, int16_t, uint16_t>::type;
+ using PredTypeVecx2 =
+ typename std::conditional<bitdepth == 8, int16x8x2_t, uint16x8x2_t>::type;
+ const PredTypeVecx2 pred =
+ LoadPred(static_cast<const PredType*>(prediction_0),
+ static_cast<const PredType*>(prediction_1));
+ const uint16x8_t difference = AbsolutePredDifference<bitdepth>(pred);
const uint8x8_t difference_offset = vdup_n_u8(38);
const uint8x8_t mask_ceiling = vdup_n_u8(64);
- const uint16x8_t difference = vrshrq_n_u16(
- vreinterpretq_u16_s16(vabdq_s16(pred_0, pred_1)), kRoundingBits8bpp);
const uint8x8_t adjusted_difference =
vqadd_u8(vqshrn_n_u16(difference, 4), difference_offset);
const uint8x8_t mask_value = vmin_u8(adjusted_difference, mask_ceiling);
@@ -58,7 +89,7 @@ inline void WeightMask8_NEON(const int16_t* prediction_0,
}
#define WEIGHT8_WITHOUT_STRIDE \
- WeightMask8_NEON<mask_is_inverse>(pred_0, pred_1, mask)
+ WeightMask8_NEON<mask_is_inverse, bitdepth>(pred_0, pred_1, mask)
#define WEIGHT8_AND_STRIDE \
WEIGHT8_WITHOUT_STRIDE; \
@@ -66,9 +97,12 @@ inline void WeightMask8_NEON(const int16_t* prediction_0,
pred_1 += 8; \
mask += mask_stride
-template <bool mask_is_inverse>
-void WeightMask8x8_NEON(const void* prediction_0, const void* prediction_1,
- uint8_t* mask, ptrdiff_t mask_stride) {
+// |pred_0| and |pred_1| are cast as int16_t* for the sake of pointer math. They
+// are uint16_t* for 10bpp and 12bpp, and this is handled in WeightMask8_NEON.
+template <bool mask_is_inverse, int bitdepth>
+void WeightMask8x8_NEON(const void* LIBGAV1_RESTRICT prediction_0,
+ const void* LIBGAV1_RESTRICT prediction_1,
+ uint8_t* LIBGAV1_RESTRICT mask, ptrdiff_t mask_stride) {
const auto* pred_0 = static_cast<const int16_t*>(prediction_0);
const auto* pred_1 = static_cast<const int16_t*>(prediction_1);
int y = 0;
@@ -78,9 +112,11 @@ void WeightMask8x8_NEON(const void* prediction_0, const void* prediction_1,
WEIGHT8_WITHOUT_STRIDE;
}
-template <bool mask_is_inverse>
-void WeightMask8x16_NEON(const void* prediction_0, const void* prediction_1,
- uint8_t* mask, ptrdiff_t mask_stride) {
+template <bool mask_is_inverse, int bitdepth>
+void WeightMask8x16_NEON(const void* LIBGAV1_RESTRICT prediction_0,
+ const void* LIBGAV1_RESTRICT prediction_1,
+ uint8_t* LIBGAV1_RESTRICT mask,
+ ptrdiff_t mask_stride) {
const auto* pred_0 = static_cast<const int16_t*>(prediction_0);
const auto* pred_1 = static_cast<const int16_t*>(prediction_1);
int y3 = 0;
@@ -92,9 +128,11 @@ void WeightMask8x16_NEON(const void* prediction_0, const void* prediction_1,
WEIGHT8_WITHOUT_STRIDE;
}
-template <bool mask_is_inverse>
-void WeightMask8x32_NEON(const void* prediction_0, const void* prediction_1,
- uint8_t* mask, ptrdiff_t mask_stride) {
+template <bool mask_is_inverse, int bitdepth>
+void WeightMask8x32_NEON(const void* LIBGAV1_RESTRICT prediction_0,
+ const void* LIBGAV1_RESTRICT prediction_1,
+ uint8_t* LIBGAV1_RESTRICT mask,
+ ptrdiff_t mask_stride) {
const auto* pred_0 = static_cast<const int16_t*>(prediction_0);
const auto* pred_1 = static_cast<const int16_t*>(prediction_1);
int y5 = 0;
@@ -109,9 +147,9 @@ void WeightMask8x32_NEON(const void* prediction_0, const void* prediction_1,
WEIGHT8_WITHOUT_STRIDE;
}
-#define WEIGHT16_WITHOUT_STRIDE \
- WeightMask8_NEON<mask_is_inverse>(pred_0, pred_1, mask); \
- WeightMask8_NEON<mask_is_inverse>(pred_0 + 8, pred_1 + 8, mask + 8)
+#define WEIGHT16_WITHOUT_STRIDE \
+ WeightMask8_NEON<mask_is_inverse, bitdepth>(pred_0, pred_1, mask); \
+ WeightMask8_NEON<mask_is_inverse, bitdepth>(pred_0 + 8, pred_1 + 8, mask + 8)
#define WEIGHT16_AND_STRIDE \
WEIGHT16_WITHOUT_STRIDE; \
@@ -119,9 +157,11 @@ void WeightMask8x32_NEON(const void* prediction_0, const void* prediction_1,
pred_1 += 16; \
mask += mask_stride
-template <bool mask_is_inverse>
-void WeightMask16x8_NEON(const void* prediction_0, const void* prediction_1,
- uint8_t* mask, ptrdiff_t mask_stride) {
+template <bool mask_is_inverse, int bitdepth>
+void WeightMask16x8_NEON(const void* LIBGAV1_RESTRICT prediction_0,
+ const void* LIBGAV1_RESTRICT prediction_1,
+ uint8_t* LIBGAV1_RESTRICT mask,
+ ptrdiff_t mask_stride) {
const auto* pred_0 = static_cast<const int16_t*>(prediction_0);
const auto* pred_1 = static_cast<const int16_t*>(prediction_1);
int y = 0;
@@ -131,9 +171,11 @@ void WeightMask16x8_NEON(const void* prediction_0, const void* prediction_1,
WEIGHT16_WITHOUT_STRIDE;
}
-template <bool mask_is_inverse>
-void WeightMask16x16_NEON(const void* prediction_0, const void* prediction_1,
- uint8_t* mask, ptrdiff_t mask_stride) {
+template <bool mask_is_inverse, int bitdepth>
+void WeightMask16x16_NEON(const void* LIBGAV1_RESTRICT prediction_0,
+ const void* LIBGAV1_RESTRICT prediction_1,
+ uint8_t* LIBGAV1_RESTRICT mask,
+ ptrdiff_t mask_stride) {
const auto* pred_0 = static_cast<const int16_t*>(prediction_0);
const auto* pred_1 = static_cast<const int16_t*>(prediction_1);
int y3 = 0;
@@ -145,9 +187,11 @@ void WeightMask16x16_NEON(const void* prediction_0, const void* prediction_1,
WEIGHT16_WITHOUT_STRIDE;
}
-template <bool mask_is_inverse>
-void WeightMask16x32_NEON(const void* prediction_0, const void* prediction_1,
- uint8_t* mask, ptrdiff_t mask_stride) {
+template <bool mask_is_inverse, int bitdepth>
+void WeightMask16x32_NEON(const void* LIBGAV1_RESTRICT prediction_0,
+ const void* LIBGAV1_RESTRICT prediction_1,
+ uint8_t* LIBGAV1_RESTRICT mask,
+ ptrdiff_t mask_stride) {
const auto* pred_0 = static_cast<const int16_t*>(prediction_0);
const auto* pred_1 = static_cast<const int16_t*>(prediction_1);
int y5 = 0;
@@ -162,9 +206,11 @@ void WeightMask16x32_NEON(const void* prediction_0, const void* prediction_1,
WEIGHT16_WITHOUT_STRIDE;
}
-template <bool mask_is_inverse>
-void WeightMask16x64_NEON(const void* prediction_0, const void* prediction_1,
- uint8_t* mask, ptrdiff_t mask_stride) {
+template <bool mask_is_inverse, int bitdepth>
+void WeightMask16x64_NEON(const void* LIBGAV1_RESTRICT prediction_0,
+ const void* LIBGAV1_RESTRICT prediction_1,
+ uint8_t* LIBGAV1_RESTRICT mask,
+ ptrdiff_t mask_stride) {
const auto* pred_0 = static_cast<const int16_t*>(prediction_0);
const auto* pred_1 = static_cast<const int16_t*>(prediction_1);
int y3 = 0;
@@ -176,11 +222,14 @@ void WeightMask16x64_NEON(const void* prediction_0, const void* prediction_1,
WEIGHT16_WITHOUT_STRIDE;
}
-#define WEIGHT32_WITHOUT_STRIDE \
- WeightMask8_NEON<mask_is_inverse>(pred_0, pred_1, mask); \
- WeightMask8_NEON<mask_is_inverse>(pred_0 + 8, pred_1 + 8, mask + 8); \
- WeightMask8_NEON<mask_is_inverse>(pred_0 + 16, pred_1 + 16, mask + 16); \
- WeightMask8_NEON<mask_is_inverse>(pred_0 + 24, pred_1 + 24, mask + 24)
+#define WEIGHT32_WITHOUT_STRIDE \
+ WeightMask8_NEON<mask_is_inverse, bitdepth>(pred_0, pred_1, mask); \
+ WeightMask8_NEON<mask_is_inverse, bitdepth>(pred_0 + 8, pred_1 + 8, \
+ mask + 8); \
+ WeightMask8_NEON<mask_is_inverse, bitdepth>(pred_0 + 16, pred_1 + 16, \
+ mask + 16); \
+ WeightMask8_NEON<mask_is_inverse, bitdepth>(pred_0 + 24, pred_1 + 24, \
+ mask + 24)
#define WEIGHT32_AND_STRIDE \
WEIGHT32_WITHOUT_STRIDE; \
@@ -188,9 +237,11 @@ void WeightMask16x64_NEON(const void* prediction_0, const void* prediction_1,
pred_1 += 32; \
mask += mask_stride
-template <bool mask_is_inverse>
-void WeightMask32x8_NEON(const void* prediction_0, const void* prediction_1,
- uint8_t* mask, ptrdiff_t mask_stride) {
+template <bool mask_is_inverse, int bitdepth>
+void WeightMask32x8_NEON(const void* LIBGAV1_RESTRICT prediction_0,
+ const void* LIBGAV1_RESTRICT prediction_1,
+ uint8_t* LIBGAV1_RESTRICT mask,
+ ptrdiff_t mask_stride) {
const auto* pred_0 = static_cast<const int16_t*>(prediction_0);
const auto* pred_1 = static_cast<const int16_t*>(prediction_1);
WEIGHT32_AND_STRIDE;
@@ -203,9 +254,11 @@ void WeightMask32x8_NEON(const void* prediction_0, const void* prediction_1,
WEIGHT32_WITHOUT_STRIDE;
}
-template <bool mask_is_inverse>
-void WeightMask32x16_NEON(const void* prediction_0, const void* prediction_1,
- uint8_t* mask, ptrdiff_t mask_stride) {
+template <bool mask_is_inverse, int bitdepth>
+void WeightMask32x16_NEON(const void* LIBGAV1_RESTRICT prediction_0,
+ const void* LIBGAV1_RESTRICT prediction_1,
+ uint8_t* LIBGAV1_RESTRICT mask,
+ ptrdiff_t mask_stride) {
const auto* pred_0 = static_cast<const int16_t*>(prediction_0);
const auto* pred_1 = static_cast<const int16_t*>(prediction_1);
int y3 = 0;
@@ -217,9 +270,11 @@ void WeightMask32x16_NEON(const void* prediction_0, const void* prediction_1,
WEIGHT32_WITHOUT_STRIDE;
}
-template <bool mask_is_inverse>
-void WeightMask32x32_NEON(const void* prediction_0, const void* prediction_1,
- uint8_t* mask, ptrdiff_t mask_stride) {
+template <bool mask_is_inverse, int bitdepth>
+void WeightMask32x32_NEON(const void* LIBGAV1_RESTRICT prediction_0,
+ const void* LIBGAV1_RESTRICT prediction_1,
+ uint8_t* LIBGAV1_RESTRICT mask,
+ ptrdiff_t mask_stride) {
const auto* pred_0 = static_cast<const int16_t*>(prediction_0);
const auto* pred_1 = static_cast<const int16_t*>(prediction_1);
int y5 = 0;
@@ -234,9 +289,11 @@ void WeightMask32x32_NEON(const void* prediction_0, const void* prediction_1,
WEIGHT32_WITHOUT_STRIDE;
}
-template <bool mask_is_inverse>
-void WeightMask32x64_NEON(const void* prediction_0, const void* prediction_1,
- uint8_t* mask, ptrdiff_t mask_stride) {
+template <bool mask_is_inverse, int bitdepth>
+void WeightMask32x64_NEON(const void* LIBGAV1_RESTRICT prediction_0,
+ const void* LIBGAV1_RESTRICT prediction_1,
+ uint8_t* LIBGAV1_RESTRICT mask,
+ ptrdiff_t mask_stride) {
const auto* pred_0 = static_cast<const int16_t*>(prediction_0);
const auto* pred_1 = static_cast<const int16_t*>(prediction_1);
int y3 = 0;
@@ -248,15 +305,22 @@ void WeightMask32x64_NEON(const void* prediction_0, const void* prediction_1,
WEIGHT32_WITHOUT_STRIDE;
}
-#define WEIGHT64_WITHOUT_STRIDE \
- WeightMask8_NEON<mask_is_inverse>(pred_0, pred_1, mask); \
- WeightMask8_NEON<mask_is_inverse>(pred_0 + 8, pred_1 + 8, mask + 8); \
- WeightMask8_NEON<mask_is_inverse>(pred_0 + 16, pred_1 + 16, mask + 16); \
- WeightMask8_NEON<mask_is_inverse>(pred_0 + 24, pred_1 + 24, mask + 24); \
- WeightMask8_NEON<mask_is_inverse>(pred_0 + 32, pred_1 + 32, mask + 32); \
- WeightMask8_NEON<mask_is_inverse>(pred_0 + 40, pred_1 + 40, mask + 40); \
- WeightMask8_NEON<mask_is_inverse>(pred_0 + 48, pred_1 + 48, mask + 48); \
- WeightMask8_NEON<mask_is_inverse>(pred_0 + 56, pred_1 + 56, mask + 56)
+#define WEIGHT64_WITHOUT_STRIDE \
+ WeightMask8_NEON<mask_is_inverse, bitdepth>(pred_0, pred_1, mask); \
+ WeightMask8_NEON<mask_is_inverse, bitdepth>(pred_0 + 8, pred_1 + 8, \
+ mask + 8); \
+ WeightMask8_NEON<mask_is_inverse, bitdepth>(pred_0 + 16, pred_1 + 16, \
+ mask + 16); \
+ WeightMask8_NEON<mask_is_inverse, bitdepth>(pred_0 + 24, pred_1 + 24, \
+ mask + 24); \
+ WeightMask8_NEON<mask_is_inverse, bitdepth>(pred_0 + 32, pred_1 + 32, \
+ mask + 32); \
+ WeightMask8_NEON<mask_is_inverse, bitdepth>(pred_0 + 40, pred_1 + 40, \
+ mask + 40); \
+ WeightMask8_NEON<mask_is_inverse, bitdepth>(pred_0 + 48, pred_1 + 48, \
+ mask + 48); \
+ WeightMask8_NEON<mask_is_inverse, bitdepth>(pred_0 + 56, pred_1 + 56, \
+ mask + 56)
#define WEIGHT64_AND_STRIDE \
WEIGHT64_WITHOUT_STRIDE; \
@@ -264,9 +328,11 @@ void WeightMask32x64_NEON(const void* prediction_0, const void* prediction_1,
pred_1 += 64; \
mask += mask_stride
-template <bool mask_is_inverse>
-void WeightMask64x16_NEON(const void* prediction_0, const void* prediction_1,
- uint8_t* mask, ptrdiff_t mask_stride) {
+template <bool mask_is_inverse, int bitdepth>
+void WeightMask64x16_NEON(const void* LIBGAV1_RESTRICT prediction_0,
+ const void* LIBGAV1_RESTRICT prediction_1,
+ uint8_t* LIBGAV1_RESTRICT mask,
+ ptrdiff_t mask_stride) {
const auto* pred_0 = static_cast<const int16_t*>(prediction_0);
const auto* pred_1 = static_cast<const int16_t*>(prediction_1);
int y3 = 0;
@@ -278,9 +344,11 @@ void WeightMask64x16_NEON(const void* prediction_0, const void* prediction_1,
WEIGHT64_WITHOUT_STRIDE;
}
-template <bool mask_is_inverse>
-void WeightMask64x32_NEON(const void* prediction_0, const void* prediction_1,
- uint8_t* mask, ptrdiff_t mask_stride) {
+template <bool mask_is_inverse, int bitdepth>
+void WeightMask64x32_NEON(const void* LIBGAV1_RESTRICT prediction_0,
+ const void* LIBGAV1_RESTRICT prediction_1,
+ uint8_t* LIBGAV1_RESTRICT mask,
+ ptrdiff_t mask_stride) {
const auto* pred_0 = static_cast<const int16_t*>(prediction_0);
const auto* pred_1 = static_cast<const int16_t*>(prediction_1);
int y5 = 0;
@@ -295,9 +363,11 @@ void WeightMask64x32_NEON(const void* prediction_0, const void* prediction_1,
WEIGHT64_WITHOUT_STRIDE;
}
-template <bool mask_is_inverse>
-void WeightMask64x64_NEON(const void* prediction_0, const void* prediction_1,
- uint8_t* mask, ptrdiff_t mask_stride) {
+template <bool mask_is_inverse, int bitdepth>
+void WeightMask64x64_NEON(const void* LIBGAV1_RESTRICT prediction_0,
+ const void* LIBGAV1_RESTRICT prediction_1,
+ uint8_t* LIBGAV1_RESTRICT mask,
+ ptrdiff_t mask_stride) {
const auto* pred_0 = static_cast<const int16_t*>(prediction_0);
const auto* pred_1 = static_cast<const int16_t*>(prediction_1);
int y3 = 0;
@@ -309,9 +379,11 @@ void WeightMask64x64_NEON(const void* prediction_0, const void* prediction_1,
WEIGHT64_WITHOUT_STRIDE;
}
-template <bool mask_is_inverse>
-void WeightMask64x128_NEON(const void* prediction_0, const void* prediction_1,
- uint8_t* mask, ptrdiff_t mask_stride) {
+template <bool mask_is_inverse, int bitdepth>
+void WeightMask64x128_NEON(const void* LIBGAV1_RESTRICT prediction_0,
+ const void* LIBGAV1_RESTRICT prediction_1,
+ uint8_t* LIBGAV1_RESTRICT mask,
+ ptrdiff_t mask_stride) {
const auto* pred_0 = static_cast<const int16_t*>(prediction_0);
const auto* pred_1 = static_cast<const int16_t*>(prediction_1);
int y3 = 0;
@@ -324,9 +396,11 @@ void WeightMask64x128_NEON(const void* prediction_0, const void* prediction_1,
WEIGHT64_WITHOUT_STRIDE;
}
-template <bool mask_is_inverse>
-void WeightMask128x64_NEON(const void* prediction_0, const void* prediction_1,
- uint8_t* mask, ptrdiff_t mask_stride) {
+template <bool mask_is_inverse, int bitdepth>
+void WeightMask128x64_NEON(const void* LIBGAV1_RESTRICT prediction_0,
+ const void* LIBGAV1_RESTRICT prediction_1,
+ uint8_t* LIBGAV1_RESTRICT mask,
+ ptrdiff_t mask_stride) {
const auto* pred_0 = static_cast<const int16_t*>(prediction_0);
const auto* pred_1 = static_cast<const int16_t*>(prediction_1);
int y3 = 0;
@@ -366,9 +440,11 @@ void WeightMask128x64_NEON(const void* prediction_0, const void* prediction_1,
WEIGHT64_WITHOUT_STRIDE;
}
-template <bool mask_is_inverse>
-void WeightMask128x128_NEON(const void* prediction_0, const void* prediction_1,
- uint8_t* mask, ptrdiff_t mask_stride) {
+template <bool mask_is_inverse, int bitdepth>
+void WeightMask128x128_NEON(const void* LIBGAV1_RESTRICT prediction_0,
+ const void* LIBGAV1_RESTRICT prediction_1,
+ uint8_t* LIBGAV1_RESTRICT mask,
+ ptrdiff_t mask_stride) {
const auto* pred_0 = static_cast<const int16_t*>(prediction_0);
const auto* pred_1 = static_cast<const int16_t*>(prediction_1);
int y3 = 0;
@@ -416,11 +492,20 @@ void WeightMask128x128_NEON(const void* prediction_0, const void* prediction_1,
mask += 64;
WEIGHT64_WITHOUT_STRIDE;
}
+#undef WEIGHT8_WITHOUT_STRIDE
+#undef WEIGHT8_AND_STRIDE
+#undef WEIGHT16_WITHOUT_STRIDE
+#undef WEIGHT16_AND_STRIDE
+#undef WEIGHT32_WITHOUT_STRIDE
+#undef WEIGHT32_AND_STRIDE
+#undef WEIGHT64_WITHOUT_STRIDE
+#undef WEIGHT64_AND_STRIDE
#define INIT_WEIGHT_MASK_8BPP(width, height, w_index, h_index) \
dsp->weight_mask[w_index][h_index][0] = \
- WeightMask##width##x##height##_NEON<0>; \
- dsp->weight_mask[w_index][h_index][1] = WeightMask##width##x##height##_NEON<1>
+ WeightMask##width##x##height##_NEON<0, 8>; \
+ dsp->weight_mask[w_index][h_index][1] = \
+ WeightMask##width##x##height##_NEON<1, 8>
void Init8bpp() {
Dsp* const dsp = dsp_internal::GetWritableDspTable(kBitdepth8);
assert(dsp != nullptr);
@@ -442,11 +527,51 @@ void Init8bpp() {
INIT_WEIGHT_MASK_8BPP(128, 64, 4, 3);
INIT_WEIGHT_MASK_8BPP(128, 128, 4, 4);
}
+#undef INIT_WEIGHT_MASK_8BPP
} // namespace
-} // namespace low_bitdepth
-void WeightMaskInit_NEON() { low_bitdepth::Init8bpp(); }
+#if LIBGAV1_MAX_BITDEPTH >= 10
+namespace high_bitdepth {
+namespace {
+
+#define INIT_WEIGHT_MASK_10BPP(width, height, w_index, h_index) \
+ dsp->weight_mask[w_index][h_index][0] = \
+ WeightMask##width##x##height##_NEON<0, 10>; \
+ dsp->weight_mask[w_index][h_index][1] = \
+ WeightMask##width##x##height##_NEON<1, 10>
+void Init10bpp() {
+ Dsp* const dsp = dsp_internal::GetWritableDspTable(kBitdepth10);
+ assert(dsp != nullptr);
+ INIT_WEIGHT_MASK_10BPP(8, 8, 0, 0);
+ INIT_WEIGHT_MASK_10BPP(8, 16, 0, 1);
+ INIT_WEIGHT_MASK_10BPP(8, 32, 0, 2);
+ INIT_WEIGHT_MASK_10BPP(16, 8, 1, 0);
+ INIT_WEIGHT_MASK_10BPP(16, 16, 1, 1);
+ INIT_WEIGHT_MASK_10BPP(16, 32, 1, 2);
+ INIT_WEIGHT_MASK_10BPP(16, 64, 1, 3);
+ INIT_WEIGHT_MASK_10BPP(32, 8, 2, 0);
+ INIT_WEIGHT_MASK_10BPP(32, 16, 2, 1);
+ INIT_WEIGHT_MASK_10BPP(32, 32, 2, 2);
+ INIT_WEIGHT_MASK_10BPP(32, 64, 2, 3);
+ INIT_WEIGHT_MASK_10BPP(64, 16, 3, 1);
+ INIT_WEIGHT_MASK_10BPP(64, 32, 3, 2);
+ INIT_WEIGHT_MASK_10BPP(64, 64, 3, 3);
+ INIT_WEIGHT_MASK_10BPP(64, 128, 3, 4);
+ INIT_WEIGHT_MASK_10BPP(128, 64, 4, 3);
+ INIT_WEIGHT_MASK_10BPP(128, 128, 4, 4);
+}
+#undef INIT_WEIGHT_MASK_10BPP
+
+} // namespace
+} // namespace high_bitdepth
+#endif // LIBGAV1_MAX_BITDEPTH >= 10
+void WeightMaskInit_NEON() {
+ Init8bpp();
+#if LIBGAV1_MAX_BITDEPTH >= 10
+ high_bitdepth::Init10bpp();
+#endif // LIBGAV1_MAX_BITDEPTH >= 10
+}
} // namespace dsp
} // namespace libgav1
diff --git a/src/dsp/arm/weight_mask_neon.h b/src/dsp/arm/weight_mask_neon.h
index b4749ec..573f7de 100644
--- a/src/dsp/arm/weight_mask_neon.h
+++ b/src/dsp/arm/weight_mask_neon.h
@@ -47,6 +47,24 @@ void WeightMaskInit_NEON();
#define LIBGAV1_Dsp8bpp_WeightMask_64x128 LIBGAV1_CPU_NEON
#define LIBGAV1_Dsp8bpp_WeightMask_128x64 LIBGAV1_CPU_NEON
#define LIBGAV1_Dsp8bpp_WeightMask_128x128 LIBGAV1_CPU_NEON
+
+#define LIBGAV1_Dsp10bpp_WeightMask_8x8 LIBGAV1_CPU_NEON
+#define LIBGAV1_Dsp10bpp_WeightMask_8x16 LIBGAV1_CPU_NEON
+#define LIBGAV1_Dsp10bpp_WeightMask_8x32 LIBGAV1_CPU_NEON
+#define LIBGAV1_Dsp10bpp_WeightMask_16x8 LIBGAV1_CPU_NEON
+#define LIBGAV1_Dsp10bpp_WeightMask_16x16 LIBGAV1_CPU_NEON
+#define LIBGAV1_Dsp10bpp_WeightMask_16x32 LIBGAV1_CPU_NEON
+#define LIBGAV1_Dsp10bpp_WeightMask_16x64 LIBGAV1_CPU_NEON
+#define LIBGAV1_Dsp10bpp_WeightMask_32x8 LIBGAV1_CPU_NEON
+#define LIBGAV1_Dsp10bpp_WeightMask_32x16 LIBGAV1_CPU_NEON
+#define LIBGAV1_Dsp10bpp_WeightMask_32x32 LIBGAV1_CPU_NEON
+#define LIBGAV1_Dsp10bpp_WeightMask_32x64 LIBGAV1_CPU_NEON
+#define LIBGAV1_Dsp10bpp_WeightMask_64x16 LIBGAV1_CPU_NEON
+#define LIBGAV1_Dsp10bpp_WeightMask_64x32 LIBGAV1_CPU_NEON
+#define LIBGAV1_Dsp10bpp_WeightMask_64x64 LIBGAV1_CPU_NEON
+#define LIBGAV1_Dsp10bpp_WeightMask_64x128 LIBGAV1_CPU_NEON
+#define LIBGAV1_Dsp10bpp_WeightMask_128x64 LIBGAV1_CPU_NEON
+#define LIBGAV1_Dsp10bpp_WeightMask_128x128 LIBGAV1_CPU_NEON
#endif // LIBGAV1_ENABLE_NEON
#endif // LIBGAV1_SRC_DSP_ARM_WEIGHT_MASK_NEON_H_
diff --git a/src/dsp/average_blend.cc b/src/dsp/average_blend.cc
index d3ec21f..273b355 100644
--- a/src/dsp/average_blend.cc
+++ b/src/dsp/average_blend.cc
@@ -27,8 +27,9 @@ namespace dsp {
namespace {
template <int bitdepth, typename Pixel>
-void AverageBlend_C(const void* prediction_0, const void* prediction_1,
- const int width, const int height, void* const dest,
+void AverageBlend_C(const void* LIBGAV1_RESTRICT prediction_0,
+ const void* LIBGAV1_RESTRICT prediction_1, const int width,
+ const int height, void* const dest,
const ptrdiff_t dest_stride) {
// 7.11.3.2 Rounding variables derivation process
// 2 * FILTER_BITS(7) - (InterRound0(3|5) + InterRound1(7))
diff --git a/src/dsp/average_blend_test.cc b/src/dsp/average_blend_test.cc
index fe8a9d6..04e24e5 100644
--- a/src/dsp/average_blend_test.cc
+++ b/src/dsp/average_blend_test.cc
@@ -14,13 +14,13 @@
#include "src/dsp/average_blend.h"
+#include <cassert>
#include <cstdint>
#include <ostream>
#include <string>
#include <type_traits>
#include "absl/strings/match.h"
-#include "absl/strings/str_format.h"
#include "absl/strings/string_view.h"
#include "absl/time/clock.h"
#include "absl/time/time.h"
@@ -55,18 +55,8 @@ constexpr int kCompoundPredictionRange[3][2] = {
{3974, 61559},
};
-struct TestParam {
- TestParam(int width, int height) : width(width), height(height) {}
- int width;
- int height;
-};
-
-std::ostream& operator<<(std::ostream& os, const TestParam& param) {
- return os << "BlockSize" << param.width << "x" << param.height;
-}
-
template <int bitdepth, typename Pixel>
-class AverageBlendTest : public testing::TestWithParam<TestParam>,
+class AverageBlendTest : public testing::TestWithParam<BlockSize>,
public test_utils::MaxAlignedAllocable {
public:
AverageBlendTest() = default;
@@ -105,8 +95,8 @@ class AverageBlendTest : public testing::TestWithParam<TestParam>,
using PredType =
typename std::conditional<bitdepth == 8, int16_t, uint16_t>::type;
static constexpr int kDestStride = kMaxSuperBlockSizeInPixels;
- const int width_ = GetParam().width;
- const int height_ = GetParam().height;
+ const int width_ = kBlockWidthPixels[GetParam()];
+ const int height_ = kBlockHeightPixels[GetParam()];
alignas(kMaxAlignment) PredType
source1_[kMaxSuperBlockSizeInPixels * kMaxSuperBlockSizeInPixels];
alignas(kMaxAlignment) PredType
@@ -158,69 +148,54 @@ void AverageBlendTest<bitdepth, Pixel>::Test(const char* digest, int num_tests,
kDestStride, kDestStride, false));
}
- test_utils::CheckMd5Digest(
- kAverageBlend, absl::StrFormat("%dx%d", width_, height_).c_str(), digest,
- dest_, sizeof(dest_[0]) * kDestStride * height_, elapsed_time);
+ test_utils::CheckMd5Digest(kAverageBlend, ToString(GetParam()), digest, dest_,
+ sizeof(dest_[0]) * kDestStride * height_,
+ elapsed_time);
}
-const TestParam kTestParam[] = {
- TestParam(4, 4), TestParam(4, 8), TestParam(8, 8),
- TestParam(8, 16), TestParam(16, 8), TestParam(16, 16),
- TestParam(16, 32), TestParam(32, 16), TestParam(32, 32),
- TestParam(32, 64), TestParam(64, 32), TestParam(64, 64),
- TestParam(64, 128), TestParam(128, 64), TestParam(128, 128),
+const BlockSize kTestParam[] = {
+ kBlock4x4, kBlock4x8, kBlock4x16, kBlock8x4, kBlock8x8,
+ kBlock8x16, kBlock8x32, kBlock16x4, kBlock16x8, kBlock16x16,
+ kBlock16x32, kBlock16x64, kBlock32x8, kBlock32x16, kBlock32x32,
+ kBlock32x64, kBlock64x16, kBlock64x32, kBlock64x64, kBlock64x128,
+ kBlock128x64, kBlock128x128,
};
using AverageBlendTest8bpp = AverageBlendTest<8, uint8_t>;
-const char* GetAverageBlendDigest8bpp(const TestParam block_size) {
- static const char* const kDigestsWidth4[] = {
+const char* GetAverageBlendDigest8bpp(const BlockSize block_size) {
+ static const char* const kDigests[kMaxBlockSizes] = {
+ // 4xN
"152bcc35946900b1ed16369b3e7a81b7",
"c23e9b5698f7384eaae30a3908118b77",
- };
- static const char* const kDigestsWidth8[] = {
+ "f2da31d940f62490c368c03d32d3ede8",
+ // 8xN
+ "73c95485ef956e1d9ab914e88e6a202b",
"d90d3abd368e58c513070a88b34649ba",
"77f7d53d0edeffb3537afffd9ff33a4a",
- };
- static const char* const kDigestsWidth16[] = {
+ "460b9b1e6b83f65f013cfcaf67ec0122",
+ // 16xN
+ "96454a56de940174ff92e9bb686d6d38",
"a50e268e93b48ae39cc2a47d377410e2",
"65c8502ff6d78065d466f9911ed6bb3e",
"bc2c873b9f5d74b396e1df705e87f699",
- };
- static const char* const kDigestsWidth32[] = {
+ "b4dae656484b2d255d1e09b7f34e12c1",
+ // 32xN
+ "7e1e5db92b22a96e5226a23de883d766",
"ca40d46d89773e7f858b15fcecd43cc0",
"bfdc894707323f4dc43d1326309f8368",
"f4733417621719b7feba3166ec0da5b9",
- };
- static const char* const kDigestsWidth64[] = {
+ // 64xN
+ "378fa0594d22f01c8e8931c2a908d7c4",
"db38fe2e082bd4a09acb3bb1d52ee11e",
"3ad44401cc731215c46c9b7d96f7e4ae",
"6c43267be5ed03d204a05fe36090f870",
- };
- static const char* const kDigestsWidth128[] = {
+ // 128xN
"c8cfe46ebf166c1cbf08e8804206aadb",
"b0557b5156d2334c8ce4a7ee12f9d6b4",
};
- // height < width implies 0.
- // height == width implies 1.
- // height > width implies 2.
- const int height_index = block_size.height / block_size.width;
- switch (block_size.width) {
- case 4:
- return kDigestsWidth4[height_index - 1];
- case 8:
- return kDigestsWidth8[height_index - 1];
- case 16:
- return kDigestsWidth16[height_index];
- case 32:
- return kDigestsWidth32[height_index];
- case 64:
- return kDigestsWidth64[height_index];
- default:
- EXPECT_EQ(block_size.width, 128)
- << "Unknown width parameter: " << block_size.width;
- return kDigestsWidth128[height_index];
- }
+ assert(block_size < kMaxBlockSizes);
+ return kDigests[block_size];
}
TEST_P(AverageBlendTest8bpp, Blending) {
@@ -229,7 +204,9 @@ TEST_P(AverageBlendTest8bpp, Blending) {
TEST_P(AverageBlendTest8bpp, DISABLED_Speed) {
Test(GetAverageBlendDigest8bpp(GetParam()),
- kNumSpeedTests / (GetParam().height * GetParam().width), false);
+ kNumSpeedTests /
+ (kBlockHeightPixels[GetParam()] * kBlockWidthPixels[GetParam()]),
+ false);
}
INSTANTIATE_TEST_SUITE_P(C, AverageBlendTest8bpp,
@@ -246,54 +223,39 @@ INSTANTIATE_TEST_SUITE_P(NEON, AverageBlendTest8bpp,
#if LIBGAV1_MAX_BITDEPTH >= 10
using AverageBlendTest10bpp = AverageBlendTest<10, uint16_t>;
-const char* GetAverageBlendDigest10bpp(const TestParam block_size) {
- static const char* const kDigestsWidth4[] = {
+const char* GetAverageBlendDigest10bpp(const BlockSize block_size) {
+ static const char* const kDigests[kMaxBlockSizes] = {
+ // 4xN
"98c0671c092b4288adcaaa17362cc4a3",
"7083f3def8bfb63ab3a985ef5616a923",
- };
- static const char* const kDigestsWidth8[] = {
+ "a7211ee2eaa6f88e08875b377d17b0f1",
+ // 8xN
+ "11f9ab881700f2ef0f82d8d4662868c6",
"3bee144b9ea6f4288b860c24f88a22f3",
"27113bd17bf95034f100e9046c7b59d2",
- };
- static const char* const kDigestsWidth16[] = {
+ "c42886a5e16e23a81e43833d34467558",
+ // 16xN
+ "b0ac2eb0a7a6596d6d1339074c7f8771",
"24c9e079b9a8647a6ee03f5441f2cdd9",
"dd05777751ccdb4356856c90e1176e53",
"27b1d69d035b1525c013b7373cfe3875",
- };
- static const char* const kDigestsWidth32[] = {
+ "08c46403afe19e6b008ccc8f56633da9",
+ // 32xN
+ "36d434db11298aba76166df06e9b8125",
"efd24dd7b555786bff1a482e51170ea3",
"3b37ddac87de443cd18784f02c2d1dd5",
"80d8070939a743a20689a65bf5dc0a68",
- };
- static const char* const kDigestsWidth64[] = {
+ // 64xN
+ "88e747246237c6408d0bd4cc3ecc8396",
"af1fe8c52487c9f2951c3ea516828abb",
"ea6f18ff56b053748c18032b7e048e83",
"af0cb87fe27d24c2e0afd2c90a8533a6",
- };
- static const char* const kDigestsWidth128[] = {
+ // 128xN
"16a83b19911d6dc7278a694b8baa9901",
"bd22e77ce6fa727267ff63eeb4dcb19c",
};
- // (height < width) -> 0
- // (height == width) -> 1
- // (height > width) -> 2
- const int height_index = block_size.height / block_size.width;
- switch (block_size.width) {
- case 4:
- return kDigestsWidth4[height_index - 1];
- case 8:
- return kDigestsWidth8[height_index - 1];
- case 16:
- return kDigestsWidth16[height_index];
- case 32:
- return kDigestsWidth32[height_index];
- case 64:
- return kDigestsWidth64[height_index];
- default:
- EXPECT_EQ(block_size.width, 128)
- << "Unknown width parameter: " << block_size.width;
- return kDigestsWidth128[height_index];
- }
+ assert(block_size < kMaxBlockSizes);
+ return kDigests[block_size];
}
TEST_P(AverageBlendTest10bpp, Blending) {
@@ -302,7 +264,10 @@ TEST_P(AverageBlendTest10bpp, Blending) {
TEST_P(AverageBlendTest10bpp, DISABLED_Speed) {
Test(GetAverageBlendDigest10bpp(GetParam()),
- kNumSpeedTests / (GetParam().height * GetParam().width) / 2, false);
+ kNumSpeedTests /
+ (kBlockHeightPixels[GetParam()] * kBlockHeightPixels[GetParam()]) /
+ 2,
+ false);
}
INSTANTIATE_TEST_SUITE_P(C, AverageBlendTest10bpp,
@@ -319,4 +284,9 @@ INSTANTIATE_TEST_SUITE_P(NEON, AverageBlendTest10bpp,
} // namespace
} // namespace dsp
+
+static std::ostream& operator<<(std::ostream& os, const BlockSize param) {
+ return os << ToString(param);
+}
+
} // namespace libgav1
diff --git a/src/dsp/cdef.cc b/src/dsp/cdef.cc
index 0b50517..ca2adfd 100644
--- a/src/dsp/cdef.cc
+++ b/src/dsp/cdef.cc
@@ -40,8 +40,10 @@ constexpr int16_t kDivisionTable[] = {840, 420, 280, 210, 168, 140, 120, 105};
int32_t Square(int32_t x) { return x * x; }
template <int bitdepth, typename Pixel>
-void CdefDirection_C(const void* const source, ptrdiff_t stride,
- uint8_t* const direction, int* const variance) {
+void CdefDirection_C(const void* LIBGAV1_RESTRICT const source,
+ ptrdiff_t stride,
+ uint8_t* LIBGAV1_RESTRICT const direction,
+ int* LIBGAV1_RESTRICT const variance) {
assert(direction != nullptr);
assert(variance != nullptr);
const auto* src = static_cast<const Pixel*>(source);
@@ -121,10 +123,11 @@ int Constrain(int diff, int threshold, int damping) {
// constant large value (kCdefLargeValue) if at the boundary.
template <int block_width, int bitdepth, typename Pixel,
bool enable_primary = true, bool enable_secondary = true>
-void CdefFilter_C(const uint16_t* src, const ptrdiff_t src_stride,
- const int block_height, const int primary_strength,
- const int secondary_strength, const int damping,
- const int direction, void* const dest,
+void CdefFilter_C(const uint16_t* LIBGAV1_RESTRICT src,
+ const ptrdiff_t src_stride, const int block_height,
+ const int primary_strength, const int secondary_strength,
+ const int damping, const int direction,
+ void* LIBGAV1_RESTRICT const dest,
const ptrdiff_t dest_stride) {
static_assert(block_width == 4 || block_width == 8, "Invalid CDEF width.");
static_assert(enable_primary || enable_secondary, "");
diff --git a/src/dsp/cdef_test.cc b/src/dsp/cdef_test.cc
index fd64593..c10a8d7 100644
--- a/src/dsp/cdef_test.cc
+++ b/src/dsp/cdef_test.cc
@@ -54,6 +54,8 @@ const char* GetDirectionDigest(const int bitdepth, const int num_runs) {
return kDigest[bitdepth_index][run_index];
}
+// The 'int' parameter is unused but required to allow for instantiations of C,
+// NEON, etc.
template <int bitdepth, typename Pixel>
class CdefDirectionTest : public testing::TestWithParam<int> {
public:
@@ -159,6 +161,10 @@ TEST_P(CdefDirectionTest10bpp, DISABLED_Speed) {
}
INSTANTIATE_TEST_SUITE_P(C, CdefDirectionTest10bpp, testing::Values(0));
+
+#if LIBGAV1_ENABLE_NEON
+INSTANTIATE_TEST_SUITE_P(NEON, CdefDirectionTest10bpp, testing::Values(0));
+#endif
#endif // LIBGAV1_MAX_BITDEPTH >= 10
const char* GetDigest8bpp(int id) {
@@ -166,21 +172,12 @@ const char* GetDigest8bpp(int id) {
"b6fe1a1f5bbb23e35197160ce57d90bd", "8aed39871b19184f1d381b145779bc33",
"82653dd66072e8ebd967083a0413ab03", "421c048396bc66ffaa6aafa016c7bc54",
"1f70ba51091e8c6034c3f0974af241c3", "8f700997452a24091136ca58890a5be4",
- "9deaaf07db25ca1d96ea8762925372d3", "7edadd9ad058be518430e64f78fe34a2",
- "862362a654edb2562609895395eb69cd", "3b4dae4d353b75f652ce67f96b2fd718",
- "65c51f49e4fd848d9fef23a346702b17", "f93b3fa86764e53e4c206ef01d5ee9db",
- "202e36551bc147c30b76ae359d5f7646", "3de677a2b6fe4aa6fc29a5e5f2d63063",
- "ab860362809e878f7b47dacc6087bce3", "c0d991affc8aeb45d91ae36e7b3d77d8",
- "27f19fffabfb79104b4be3c272723f62", "a54b981f562e2cf10a4fb037d0181e2d",
- "9a65933d02867a1e8fc1f29097d4d0db", "c068b21d232145c61db8ef9298447bfa",
- "8db1948c23648372509e4f3577e8eaa0", "c08a3b192ab0a47abe22f7f0ae78a5d7",
- "4ff9bd4ae06f2cc2d2660df41cf1baca", "a0a634e48c55a2ca340cf5cac7f74cb6",
- "f9f631985b42214f8b059c8f119d4401", "5fb136073300a45d74145649473970da",
- "33624aab8ba0264657fa9304dbdcf72c", "e6a15775d451a3c4803a7c0604deb0ea",
- "4c28b63022cdc5ea0e49b492c187d53d", "c5fa9792ee292d29c5a864e376ddacc0",
- "fcdf7319978b64f03ca3b9d4d83a0c2a", "394931c89bd5065308b0633d12370b19",
- "9e702d68000c1b02759001e9a8876df2", "c844919f0114e83960dd329b1aa7146f",
- "499248c675884db3ef57018d0a0868b5", "4a9041ed183f9add717e5ddcdb280799",
+ "9e3dea21ee4246172121f0420eccd899", "0848bdeffa74145758ef47992e1035c4",
+ "0bb55818de986e9d988b0c1cc6883887", "9b558a7eefc934f90cd09ca26b998bfd",
+ "3a38670f8c5f0c61cc47c9c79da728d2", "ed18fe91180e78008ccb98e9019bed69",
+ "2aa4bbcb6fb088ad42bde76be014dff0", "88f746f0d6c079ab8e9ecc7ff67524c7",
+ "7cffa948f5ddbccc7c6b07d15ca9eb69", "5e22c1c89735965dda935d1249129548",
+ "e765133d133b94e1578c8c5616248a96", "da95d47cad74eb4a075893ca98e658ab",
};
return kDigest[id];
}
@@ -191,21 +188,12 @@ const char* GetDigest10bpp(int id) {
"0a9630b39974850998db653b07e09ab4", "97a924661d931b23ee57893da617ae70",
"0d79516b9a491ce5112eb00bbae5eb80", "d5801fd96029a7509cf66dde61e8e2d8",
"5bf5c0ea5a85e9b6c1e6991619c34ebc", "e2f1c08a8b3cd93b3a85511493a0ee31",
- "18910f422e386c71ffde8680176d61c0", "3255afe8b3db5be4c17299420ae9b4b3",
- "ccac34de92891d4ef25820737e7a4f06", "5c2109c4142867c15bc6bb81e19b8058",
- "86e8300e2ad292bfce95185530ef06c8", "21c06ed6d62b8fbef1363cd177386cd0",
- "fd6687987dbff6f15210c2cc61570daa", "7cb246cb65a9cf9b2f829ab086f7c45a",
- "3a38dc3c89f7e400383b1b7ce3e73008", "7b23b520e41ad510b9608b47f9c5f87e",
- "f9ca24b57fc06d7b8dc4151bbc4d2840", "070ef8fa64dcdc45701428ee6ef0ca79",
- "0e7e3ca3cf8546972d01fc262b2b9cfb", "9ac81b7cf93173f33d195927b0a3685a",
- "1f964b6959774651a79d961e5a2a6a56", "64d5f88995a918a85df317d4240f0862",
- "55c94ec09facda30fac677d205beb708", "2c010b256f4dabf42ef78bf5a3851b2c",
- "c7d18d0e287fa8658b94131603e378db", "4f7696fe2c8dbedd0c8e8a53b9dec0fc",
- "b3483dc32665a4bb0606d78dfb3d285c", "0bcb4acd4090f5798c2d260df73b2c46",
- "4f574c782f3b28fb9c85cdb70dfcb46a", "14bd700a88be0107e9ef2fe54f75cee6",
- "5d3b2698c9ffa4a6aed45a9adbddb8bf", "eff870414f80897cf8958ebeea84f0a6",
- "e042843275f82271a9f540bc3e4ef35c", "26e3ff3d661dac25861a0f5bab522340",
- "239844e66b07796003f9315166b9e29e", "44b8e6884215a1793cc7f8f7ce40bcee",
+ "45c047d2be5e2dcf6094937780a3f88a", "346caf437c1ad85862de72a622e29845",
+ "0e9cb69d24d9badbe956da779d912b05", "81803dcb00971237b3fe6372564a842f",
+ "17681ad2ed4a2456d70760852af6c6fd", "5312f8049a08a5f9b1708fda936f7a55",
+ "3f0f522f3a33e4ff2a97bdc1e614c5c4", "3818a50be7fe16aa0c636a7392d1eceb",
+ "c6849b8cd77a076dc7e3c26e8cd55b9e", "223c0dd685bbc74aec1d088356708433",
+ "90992957cb8103222aa2fb43c6cd2fc4", "a4ba6edcefe4130851c4c2607b147f95",
};
return kDigest[id];
}
@@ -282,8 +270,8 @@ class CdefFilteringTest : public testing::TestWithParam<CdefTestParam> {
template <int bitdepth, typename Pixel>
void CdefFilteringTest<bitdepth, Pixel>::TestRandomValues(int num_runs) {
- const int id = ((param_.rows4x4 < 4) + (param_.rows4x4 < 2)) * 3 +
- param_.subsampling_x * 9 + param_.subsampling_y * 18;
+ const int id = static_cast<int>(param_.rows4x4 < 4) * 3 +
+ (param_.subsampling_x + param_.subsampling_y) * 6;
absl::Duration elapsed_time;
for (int num_tests = 0; num_tests < num_runs; ++num_tests) {
for (int plane = kPlaneY; plane < kMaxPlanes; ++plane) {
@@ -361,7 +349,6 @@ void CdefFilteringTest<bitdepth, Pixel>::TestRandomValues(int num_runs) {
const CdefTestParam cdef_test_param[] = {
CdefTestParam(0, 0, 4, 4), CdefTestParam(0, 0, 2, 2),
CdefTestParam(1, 0, 4, 4), CdefTestParam(1, 0, 2, 2),
- CdefTestParam(0, 1, 4, 4), CdefTestParam(0, 1, 2, 2),
CdefTestParam(1, 1, 4, 4), CdefTestParam(1, 1, 2, 2),
};
@@ -402,6 +389,11 @@ TEST_P(CdefFilteringTest10bpp, DISABLED_Speed) {
INSTANTIATE_TEST_SUITE_P(C, CdefFilteringTest10bpp,
testing::ValuesIn(cdef_test_param));
+
+#if LIBGAV1_ENABLE_NEON
+INSTANTIATE_TEST_SUITE_P(NEON, CdefFilteringTest10bpp,
+ testing::ValuesIn(cdef_test_param));
+#endif
#endif // LIBGAV1_MAX_BITDEPTH >= 10
} // namespace
diff --git a/src/dsp/convolve.cc b/src/dsp/convolve.cc
index 727b4af..f11b45e 100644
--- a/src/dsp/convolve.cc
+++ b/src/dsp/convolve.cc
@@ -33,34 +33,39 @@ constexpr int kHorizontalOffset = 3;
constexpr int kVerticalOffset = 3;
// Compound prediction output ranges from ConvolveTest.ShowRange.
+// In some cases, the horizontal or vertical filter will be omitted. This table
+// shows the general case, where the downscaled horizontal output is input to
+// the vertical filter via the |intermediate_result| array. The final output is
+// either Pixel or compound values, depending on the |is_compound| variable.
// Bitdepth: 8 Input range: [ 0, 255]
-// intermediate range: [ -7140, 23460]
-// first pass output range: [ -1785, 5865]
-// intermediate range: [ -328440, 589560]
-// second pass output range: [ 0, 255]
-// compound second pass output range: [ -5132, 9212]
+// Horizontal upscaled range: [ -7140, 23460]
+// Horizontal downscaled range: [ -1785, 5865]
+// Vertical upscaled range: [ -328440, 589560]
+// Pixel output range: [ 0, 255]
+// Compound output range: [ -5132, 9212]
//
// Bitdepth: 10 Input range: [ 0, 1023]
-// intermediate range: [ -28644, 94116]
-// first pass output range: [ -7161, 23529]
-// intermediate range: [-1317624, 2365176]
-// second pass output range: [ 0, 1023]
-// compound second pass output range: [ 3988, 61532]
+// Horizontal upscaled range: [ -28644, 94116]
+// Horizontal downscaled range: [ -7161, 23529]
+// Vertical upscaled range: [-1317624, 2365176]
+// Pixel output range: [ 0, 1023]
+// Compound output range: [ 3988, 61532]
//
// Bitdepth: 12 Input range: [ 0, 4095]
-// intermediate range: [ -114660, 376740]
-// first pass output range: [ -7166, 23546]
-// intermediate range: [-1318560, 2366880]
-// second pass output range: [ 0, 4095]
-// compound second pass output range: [ 3974, 61559]
+// Horizontal upscaled range: [ -114660, 376740]
+// Horizontal downscaled range: [ -7166, 23546]
+// Vertical upscaled range: [-1318560, 2366880]
+// Pixel output range: [ 0, 4095]
+// Compound output range: [ 3974, 61559]
template <int bitdepth, typename Pixel>
-void ConvolveScale2D_C(const void* const reference,
+void ConvolveScale2D_C(const void* LIBGAV1_RESTRICT const reference,
const ptrdiff_t reference_stride,
const int horizontal_filter_index,
const int vertical_filter_index, const int subpixel_x,
const int subpixel_y, const int step_x, const int step_y,
- const int width, const int height, void* prediction,
+ const int width, const int height,
+ void* LIBGAV1_RESTRICT prediction,
const ptrdiff_t pred_stride) {
constexpr int kRoundBitsHorizontal = (bitdepth == 12)
? kInterRoundBitsHorizontal12bpp
@@ -137,14 +142,12 @@ void ConvolveScale2D_C(const void* const reference,
}
template <int bitdepth, typename Pixel>
-void ConvolveCompoundScale2D_C(const void* const reference,
- const ptrdiff_t reference_stride,
- const int horizontal_filter_index,
- const int vertical_filter_index,
- const int subpixel_x, const int subpixel_y,
- const int step_x, const int step_y,
- const int width, const int height,
- void* prediction, const ptrdiff_t pred_stride) {
+void ConvolveCompoundScale2D_C(
+ const void* LIBGAV1_RESTRICT const reference,
+ const ptrdiff_t reference_stride, const int horizontal_filter_index,
+ const int vertical_filter_index, const int subpixel_x, const int subpixel_y,
+ const int step_x, const int step_y, const int width, const int height,
+ void* LIBGAV1_RESTRICT prediction, const ptrdiff_t pred_stride) {
// All compound functions output to the predictor buffer with |pred_stride|
// equal to |width|.
assert(pred_stride == width);
@@ -223,13 +226,13 @@ void ConvolveCompoundScale2D_C(const void* const reference,
}
template <int bitdepth, typename Pixel>
-void ConvolveCompound2D_C(const void* const reference,
+void ConvolveCompound2D_C(const void* LIBGAV1_RESTRICT const reference,
const ptrdiff_t reference_stride,
const int horizontal_filter_index,
const int vertical_filter_index,
const int horizontal_filter_id,
const int vertical_filter_id, const int width,
- const int height, void* prediction,
+ const int height, void* LIBGAV1_RESTRICT prediction,
const ptrdiff_t pred_stride) {
// All compound functions output to the predictor buffer with |pred_stride|
// equal to |width|.
@@ -307,11 +310,13 @@ void ConvolveCompound2D_C(const void* const reference,
// The output is the single prediction of the block, clipped to valid pixel
// range.
template <int bitdepth, typename Pixel>
-void Convolve2D_C(const void* const reference, const ptrdiff_t reference_stride,
+void Convolve2D_C(const void* LIBGAV1_RESTRICT const reference,
+ const ptrdiff_t reference_stride,
const int horizontal_filter_index,
const int vertical_filter_index,
const int horizontal_filter_id, const int vertical_filter_id,
- const int width, const int height, void* prediction,
+ const int width, const int height,
+ void* LIBGAV1_RESTRICT prediction,
const ptrdiff_t pred_stride) {
constexpr int kRoundBitsHorizontal = (bitdepth == 12)
? kInterRoundBitsHorizontal12bpp
@@ -385,13 +390,13 @@ void Convolve2D_C(const void* const reference, const ptrdiff_t reference_stride,
// The output is the single prediction of the block, clipped to valid pixel
// range.
template <int bitdepth, typename Pixel>
-void ConvolveHorizontal_C(const void* const reference,
+void ConvolveHorizontal_C(const void* LIBGAV1_RESTRICT const reference,
const ptrdiff_t reference_stride,
const int horizontal_filter_index,
const int /*vertical_filter_index*/,
const int horizontal_filter_id,
const int /*vertical_filter_id*/, const int width,
- const int height, void* prediction,
+ const int height, void* LIBGAV1_RESTRICT prediction,
const ptrdiff_t pred_stride) {
constexpr int kRoundBitsHorizontal = (bitdepth == 12)
? kInterRoundBitsHorizontal12bpp
@@ -427,13 +432,13 @@ void ConvolveHorizontal_C(const void* const reference,
// The output is the single prediction of the block, clipped to valid pixel
// range.
template <int bitdepth, typename Pixel>
-void ConvolveVertical_C(const void* const reference,
+void ConvolveVertical_C(const void* LIBGAV1_RESTRICT const reference,
const ptrdiff_t reference_stride,
const int /*horizontal_filter_index*/,
const int vertical_filter_index,
const int /*horizontal_filter_id*/,
const int vertical_filter_id, const int width,
- const int height, void* prediction,
+ const int height, void* LIBGAV1_RESTRICT prediction,
const ptrdiff_t pred_stride) {
const int filter_index = GetFilterIndex(vertical_filter_index, height);
const ptrdiff_t src_stride = reference_stride / sizeof(Pixel);
@@ -464,13 +469,13 @@ void ConvolveVertical_C(const void* const reference,
}
template <int bitdepth, typename Pixel>
-void ConvolveCopy_C(const void* const reference,
+void ConvolveCopy_C(const void* LIBGAV1_RESTRICT const reference,
const ptrdiff_t reference_stride,
const int /*horizontal_filter_index*/,
const int /*vertical_filter_index*/,
const int /*horizontal_filter_id*/,
const int /*vertical_filter_id*/, const int width,
- const int height, void* prediction,
+ const int height, void* LIBGAV1_RESTRICT prediction,
const ptrdiff_t pred_stride) {
const auto* src = static_cast<const uint8_t*>(reference);
auto* dest = static_cast<uint8_t*>(prediction);
@@ -483,13 +488,13 @@ void ConvolveCopy_C(const void* const reference,
}
template <int bitdepth, typename Pixel>
-void ConvolveCompoundCopy_C(const void* const reference,
+void ConvolveCompoundCopy_C(const void* LIBGAV1_RESTRICT const reference,
const ptrdiff_t reference_stride,
const int /*horizontal_filter_index*/,
const int /*vertical_filter_index*/,
const int /*horizontal_filter_id*/,
const int /*vertical_filter_id*/, const int width,
- const int height, void* prediction,
+ const int height, void* LIBGAV1_RESTRICT prediction,
const ptrdiff_t pred_stride) {
// All compound functions output to the predictor buffer with |pred_stride|
// equal to |width|.
@@ -523,11 +528,11 @@ void ConvolveCompoundCopy_C(const void* const reference,
// blended with another predictor to generate the final prediction of the block.
template <int bitdepth, typename Pixel>
void ConvolveCompoundHorizontal_C(
- const void* const reference, const ptrdiff_t reference_stride,
- const int horizontal_filter_index, const int /*vertical_filter_index*/,
- const int horizontal_filter_id, const int /*vertical_filter_id*/,
- const int width, const int height, void* prediction,
- const ptrdiff_t pred_stride) {
+ const void* LIBGAV1_RESTRICT const reference,
+ const ptrdiff_t reference_stride, const int horizontal_filter_index,
+ const int /*vertical_filter_index*/, const int horizontal_filter_id,
+ const int /*vertical_filter_id*/, const int width, const int height,
+ void* LIBGAV1_RESTRICT prediction, const ptrdiff_t pred_stride) {
// All compound functions output to the predictor buffer with |pred_stride|
// equal to |width|.
assert(pred_stride == width);
@@ -567,14 +572,12 @@ void ConvolveCompoundHorizontal_C(
// The output is not clipped to valid pixel range. Its output will be
// blended with another predictor to generate the final prediction of the block.
template <int bitdepth, typename Pixel>
-void ConvolveCompoundVertical_C(const void* const reference,
- const ptrdiff_t reference_stride,
- const int /*horizontal_filter_index*/,
- const int vertical_filter_index,
- const int /*horizontal_filter_id*/,
- const int vertical_filter_id, const int width,
- const int height, void* prediction,
- const ptrdiff_t pred_stride) {
+void ConvolveCompoundVertical_C(
+ const void* LIBGAV1_RESTRICT const reference,
+ const ptrdiff_t reference_stride, const int /*horizontal_filter_index*/,
+ const int vertical_filter_index, const int /*horizontal_filter_id*/,
+ const int vertical_filter_id, const int width, const int height,
+ void* LIBGAV1_RESTRICT prediction, const ptrdiff_t pred_stride) {
// All compound functions output to the predictor buffer with |pred_stride|
// equal to |width|.
assert(pred_stride == width);
@@ -615,14 +618,12 @@ void ConvolveCompoundVertical_C(const void* const reference,
// The output is the single prediction of the block, clipped to valid pixel
// range.
template <int bitdepth, typename Pixel>
-void ConvolveIntraBlockCopy2D_C(const void* const reference,
- const ptrdiff_t reference_stride,
- const int /*horizontal_filter_index*/,
- const int /*vertical_filter_index*/,
- const int /*horizontal_filter_id*/,
- const int /*vertical_filter_id*/,
- const int width, const int height,
- void* prediction, const ptrdiff_t pred_stride) {
+void ConvolveIntraBlockCopy2D_C(
+ const void* LIBGAV1_RESTRICT const reference,
+ const ptrdiff_t reference_stride, const int /*horizontal_filter_index*/,
+ const int /*vertical_filter_index*/, const int /*horizontal_filter_id*/,
+ const int /*vertical_filter_id*/, const int width, const int height,
+ void* LIBGAV1_RESTRICT prediction, const ptrdiff_t pred_stride) {
assert(width >= 4 && width <= kMaxSuperBlockSizeInPixels);
assert(height >= 4 && height <= kMaxSuperBlockSizeInPixels);
const auto* src = static_cast<const Pixel*>(reference);
@@ -670,14 +671,12 @@ void ConvolveIntraBlockCopy2D_C(const void* const reference,
// The filtering of intra block copy is simply the average of current and
// the next pixel.
template <int bitdepth, typename Pixel, bool is_horizontal>
-void ConvolveIntraBlockCopy1D_C(const void* const reference,
- const ptrdiff_t reference_stride,
- const int /*horizontal_filter_index*/,
- const int /*vertical_filter_index*/,
- const int /*horizontal_filter_id*/,
- const int /*vertical_filter_id*/,
- const int width, const int height,
- void* prediction, const ptrdiff_t pred_stride) {
+void ConvolveIntraBlockCopy1D_C(
+ const void* LIBGAV1_RESTRICT const reference,
+ const ptrdiff_t reference_stride, const int /*horizontal_filter_index*/,
+ const int /*vertical_filter_index*/, const int /*horizontal_filter_id*/,
+ const int /*vertical_filter_id*/, const int width, const int height,
+ void* LIBGAV1_RESTRICT prediction, const ptrdiff_t pred_stride) {
assert(width >= 4 && width <= kMaxSuperBlockSizeInPixels);
assert(height >= 4 && height <= kMaxSuperBlockSizeInPixels);
const auto* src = static_cast<const Pixel*>(reference);
diff --git a/src/dsp/convolve.inc b/src/dsp/convolve.inc
index 140648b..e0f755e 100644
--- a/src/dsp/convolve.inc
+++ b/src/dsp/convolve.inc
@@ -45,6 +45,7 @@ int GetNumTapsInFilter(const int filter_index) {
return 4;
}
-constexpr int kIntermediateStride = kMaxSuperBlockSizeInPixels;
+constexpr int kIntermediateAllocWidth = kMaxSuperBlockSizeInPixels;
+constexpr int kIntermediateStride = 8;
constexpr int kHorizontalOffset = 3;
constexpr int kFilterIndexShift = 6;
diff --git a/src/dsp/convolve_test.cc b/src/dsp/convolve_test.cc
index 4a2a9f1..295c814 100644
--- a/src/dsp/convolve_test.cc
+++ b/src/dsp/convolve_test.cc
@@ -16,7 +16,6 @@
#include <algorithm>
#include <cassert>
-#include <cmath>
#include <cstddef>
#include <cstdint>
#include <cstdio>
@@ -34,6 +33,7 @@
#include "src/dsp/constants.h"
#include "src/dsp/dsp.h"
#include "src/utils/common.h"
+#include "src/utils/compiler_attributes.h"
#include "src/utils/constants.h"
#include "src/utils/cpu.h"
#include "src/utils/memory.h"
@@ -55,669 +55,368 @@ constexpr int kMaxBlockHeight = kMaxSuperBlockSizeInPixels + kSubPixelTaps;
// filters [4] and [5] are only reached through GetFilterIndex().
constexpr int kMinimumViableRuns = 4 * 16;
-// When is_scaled_convolve_ is true, we don't test every combination of
-// type_param_, so some digests in ths array are redudant, marked as
-// "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa".
-// We keep it so that the logic of calculation id in GetDigestId() is clearer.
-const char* GetDigest8bpp(int id) {
- static const char* const kDigest[] = {
- "ae5977a4ceffbac0cde72a04a43a9d57", "fab093b917d36f6b69fb4f50a6b5c822",
- "1168251e6261e2ff1fa69a93226dbd76", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "d2f5ca2b7958c332a3fb771f66da01f0", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "6bbcc075f8b768a02cdc9149f150326d", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "c4e90cd202f9867517433b550afdc644", "43d6df191744f6c5d489c0673714a714",
- "bfe8197057b0f3f096344251047f481f", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "1681719b0f8905d99382f4132fe1472a", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "8d24b59c0f3942079ba4945ed6686269", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "ae5977a4ceffbac0cde72a04a43a9d57", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "995318eff1fe62822366490192ad8b5e", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "0ef1c5beb3228c6d9ecf3ced584c4aa8", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "fc02228efb85c665bd27a3dab72a9037", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "6cf5f791fe0d8dcd3526be3c6b814035", "eaa0942097fd2b2dd621b77e0a659896",
- "4821befdf63f8c6da6440afeb57f320f", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "7aec92c3b65e456b64ae285c12b03b0d", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "4ae70d9db2ec36885394db7d59bdd4f7", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "911212ae2492690de06d12bfaf71c7d4", "cb284b0ae039582039563638f682db26",
- "6b4393b2d7387dd291d3a7bd3aabcae4", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "0804d93136549388b6cd7fdcd187a578", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "b25f037602efdb4eaacb3ade1dc5c28f", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "6cf5f791fe0d8dcd3526be3c6b814035", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "704b0bb4128aa163ef5899e6d8ad9664", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "abf3f31ec4daff000e80f7ab9628688b", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "09e12a389cd454e10f750062102ea1b2", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "d905dfcad930aded7718587c05b48aaf", "fe85aaee8007d2130d56919242e01163",
- "c30fc44d83821141e84cc4793e127301", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "f72a99ad63f6a88c23724e898b705d21", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "5fee162fe52c11c823db4d5ede370654", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "a9210113ff6873e5b50d5d3ad67e440f", "b7633a78f959b20ca27ffb700b44b45c",
- "6d1c5145be9fd636ababd64c64d23a10", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "d55d8012ddddb55e6c3e51dafab92980", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "b1948cb353fa308f0d5592b0ad338997", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "d905dfcad930aded7718587c05b48aaf", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "04e3b7f46e748431c76cf6125057601c", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "71362b65cffd008d1ca4a20adc8cc15f", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "987f7a6a8bef47acbd1e49bb39f51ac4", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "6baf153feff04cc5b7e87c0bb60a905d", "fa1ad095bf696745599079fb73975b75",
- "a8293b933d9f2e5d7f922ea40111d643", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "07a1f07f114c4a38ba08d2f44e1e1132", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "9365186c59ef66d9def40f437022ad93", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "a7305087fae23de53d21a6909009ff69", "bd44440b5757b74bcc3e2f7f32ef42af",
- "a5a1ac658d7ce4a846a32b9fcfaa3475", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "3b1ceebf0579fcbbfd6136938c595b91", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "3bfad931bce82335219e0e29c15f2b21", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "6baf153feff04cc5b7e87c0bb60a905d", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "4cfad2c437084a93ea76913e21c2dd89", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "1a0bdfc96a3b9fd904e658f238ab1076", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "b8a710baa6a9fc784909671d450ecd99", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "871ed5a69ca31e6444faa720895949bf", "e55d0c54fd28355d32e29d411488b571",
- "354a54861a94e8b027afd9931e61f997", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "26b9de95edb45b31ac5aa19825831c7a", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "0f95fb0276c9c7910937fbdf75f2811d", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "8dcce009395264379c1a51239f4bb22c", "06925f05ea49811e3efc2a44b111b32b",
- "2370f4e4a83edf91b7f504bbe4b00e90", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "ecafabcad1045f15d31ce2f3b13132f2", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "68a701313d2247d2b32636ebc1f2a008", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "871ed5a69ca31e6444faa720895949bf", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "d372f0c17bce98855d6d59fbee814c3d", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "56d16e54afe205e97527902770e71c71", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "f9e6a56382d8d12da676d6631bb6ef75", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "68e2f90eaa0ab5da7e6f5776993f7eea", "8718965c4831a363a321a25f4aada7ba",
- "eeeb8589c1b31cbb565154736ca939ec", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "c1b836a6ce023663b90db0e320389414", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "b355dab2dbb6f5869018563eece22862", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "8dcce009395264379c1a51239f4bb22c", "e7c2bfd356c860c36053dea19f8d0705",
- "ae5464066a049622a7a264cdf9394b55", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "5f211eba020e256a5781b203c5aa1d2e", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "05afe1f40d37a45a97a5e0aadd5066fb", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "68e2f90eaa0ab5da7e6f5776993f7eea", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "d99ffd2579eb781c30bc0df7b76ad61e", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "1f7b5b8282ff3cf4d8e8c52d80ef5b4d", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "3bf8e11e18527b16f0d7c0361d74a52d", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "f1f8282fb33c30eb68c0c315b7a4bc01", "4c718ddbe8b5aa7118c8bc1c2f5ea158",
- "f49dab626ddd977ed171f79295c24935", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "5befcf222152ebc8d779fcc10b95320a", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "cf6ff8c43d8059cea6090a23ab66a0ef", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "d90a69e7bae8aa46ed0e1e5f911d7a07", "1d7113d705fa0edeef49e5c50a91151d",
- "45368b6db3d1fee739a64b0bc823ea9c", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "3b04497634364dd2cd3f2482b5d4b32f", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "9e1f0e0bddb58d15d0925eeaede9b84c", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "f1f8282fb33c30eb68c0c315b7a4bc01", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "4e139e57cbb049a0f4ef816adc48d026", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "79e9e260a2028c5fe320005c272064b9", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "b9ff54c6f1e3b41fc7fc0f3fa0e75cf2", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "9412064b0eebf8123f23d74147d04dff", "0dee657827cd48c4ce4a7657f6f92233",
- "78d2f27e0d4708cb16856d7d40dc16fb", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "62adf407fc27d8682ced4dd7b55af14e", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "a336f8b7bcf188840ca65c0d0e66518a", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "6ab4dc87be03be1dcc5d956ca819d938", "78cef82670ff99b1e4a279de3538c233",
- "8dff0f28192d9f8c0bf7fb5405719dd8", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "a8ac7b5dc65ffb758b0643508a0e744e", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "03313cdaa593a1a7b4869010dcc7b241", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "9412064b0eebf8123f23d74147d04dff", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "be53b2507048e7ff50226d15c0b28865", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "2418ebcdf85551b9ae6e3725f04aae6d", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "06ef1504f31af5f173d3317866ca57cb", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "cc08936effe309ab9a4fa1bf7e28e24e", "a81bcdeb021d3a23477c40c47548df52",
- "9d2393ea156a1c2083f5b4207793064b", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "35be0786a072bf2f1286989261bf6580", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "de953f03895923359c6a719e6a537b89", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "6ab4dc87be03be1dcc5d956ca819d938", "e053321d7c75951d5ff3dce85762acd3",
- "632738ef3ff3021cff45045c41978849", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "561ed8be43c221a561f8885a0d74c7ef", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "88a50d2b4107ee5b5074b2520183f8ac", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "cc08936effe309ab9a4fa1bf7e28e24e", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "b73f3c1a10405de89d1f9e812ff73b5a", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "98bdf907ebacacb734c9eef1ee727c6e", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "635e8ee11cf04d73598549234ad732a0", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "36cbef36fa21b98df03536c918bf752a", "b7a4d080e2f24040eebb785f437de66a",
- "a9c62745b95c66fa497a524886af57e2", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "90562fc42dc5d879ae74c4909c1dec30", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "8463ade9347ed602663e2cec5c4c3fe6", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "8f2afdb2f03cd04ffacd421b958caaa0", "2e15a26905467e5ad9f8da04b94e60b6",
- "f7ec43384037e8d6c618e0df826ec029", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "8159619fc234598c8c75154d80021fd4", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "ac50ea9f7306da95a5092709442989cf", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "36cbef36fa21b98df03536c918bf752a", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "c7d51b1f2df49ab83962257e8a5934e5", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "4dd5672d53c8f359e8f80badaa843dfc", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "fab693410d59ee88aa2895527efc31ac", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "9d0da6321cf5311ea0bdd41271763030", "22ff7819c55ce6b2e0ce5431eb8c309c",
- "2c614ec4463386ec075a0f1dbb587933", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "a1427352f9e413975a0949e2b300c657", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "392de11ffcd5c2ecf3db3480ee135340", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "710ccecc103033088d898a2b924551fb", "160c29a91e372d66b12e171e4d81bc18",
- "a6bc648197781a2dc99c487e66464320", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "8f43645dce92cf7594aa4822aa53b17d", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "739b17591437edffd36799237b962658", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "9d0da6321cf5311ea0bdd41271763030", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "159e443d79cc59b11ca4a80aa7aa09be", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "a1bef519bbf07138e2eec5a91694de46", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "3041eb26c23a63a587fbec623919e2d2", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "55a10165ee8a660d7dddacf7de558cdd", "355b691a656e6a287e4504ef2fbb8034",
- "7a8856480d752153370240b066b90f6a", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "bcbc418bc2beb243e463851cd95335a9", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "bddd31e3e852712e6244b616622af83d", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "710ccecc103033088d898a2b924551fb", "f6cb80c4d5683553929b1e41f99e487e",
- "1112ebd509007154c72c5a485b220b62", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "b6ccddb7dfa4eddc87b4eff08b5a3195", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "b8a7eb7dd9c216e240517edfc6489397", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "55a10165ee8a660d7dddacf7de558cdd", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "6ef14b14882e1465b0482b0e0b16d8ce", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "df1cb51fe1a937cd7834e973dc5cb814", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "c61d99d5daf575664fb7ad64976f4b03", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "ac7fc9f9ea7213743fae5a023faaaf08", "a6307a981600c3fb5b9d3e89ddf55069",
- "beaef1dbffadc701fccb7c18a03e3a41", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "cb8fedcbecee3947358dc61f95e56530", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "30a36245c40d978fc8976b442a8600c3", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "a4093e3e5902dd659407ce6471635a4e", "658f0f51eb2f965f7490053852653fc0",
- "9714c4ce636b6fb0ad05cba246d48c76", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "b4e605327b28db573d88844a1a09db8d", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "75b755f199dbf4a0e5ebbb86c2bd871d", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "ac7fc9f9ea7213743fae5a023faaaf08", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "22a8d287b425c870f40c64a50f91ce54", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "317fe65abf81ef3ea07976ef8667baeb", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "822f6c4eb5db760468d822b21f48d94d", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "077e1b7b355c7ab3ca40230ee8efd8ea", "628229ce2484d67e72c51b2f4ad124a6",
- "72b1e700c949d06eaf62d664dafdb5b6", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "0d0154a7d573685285a83a4cf201ac57", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "93aa662b988b8502e5ea95659eafde59", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "375d7f5358d7a088a498b8b3aaecc0d5", "b726ef75b641c21519ecc2f802bbaf39",
- "2c93dde8884f09fb5bb5ad6d95cde86d", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "15b00a15d1cc6cc96ca85d00b167e4dd", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "31b0017ba1110e3d70b020901bc15564", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "077e1b7b355c7ab3ca40230ee8efd8ea", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "f1d96db5a2e0a2160df38bd96d28d19b", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "2da29da97806ae0ee300c5e69c35a4aa", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "3f6fcb9fae3666e085b9e29002a802fc", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "7a3e8de2a1caae206cf3e51a86dfd15a", "c266a1b65599686c771fad8a822e7a49",
- "684f5c3a25a080edaf79add6e9137a8e", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "b14bd8068f108905682b83cc15778065", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "70440ba9ee7f9d16d297dbb49e54a56e", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "375d7f5358d7a088a498b8b3aaecc0d5", "4dca696cc0552c1d684c4fc963adc336",
- "a49e6160b5d1b56bc2046963101cd606", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "7bf911888c11a9fefd604b8b9c82e9a1", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "0a1aa8f5ecfd11ddba080af0051c576a", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "7a3e8de2a1caae206cf3e51a86dfd15a", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "637d1e5221422dfe9a6dbcfd7f62ebdd", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "555475f5d1685638169ab904447e4f13", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "d9b9fecd195736a6049c528d4cb886b5", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "1ddf9020f18fa7883355cf8c0881186a", "e681b35b1fe02e2a6698525040015cd0",
- "3be970f49e4288988818b087201d54da", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "c96c867d998473197dde9b587be14e3a", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "1eb2be4c05b50e427e29c72fa566bff5", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "08867ea5cc38c705ec52af821bc4736a", "c51c8bb294f4fa20bdab355ad1e7df37",
- "7f084953976111e9f65b57876e7552b1", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "bfb69b4d7d4aed73cfa75a0f55b66440", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "536181ee90de883cc383787aec089221", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "1ddf9020f18fa7883355cf8c0881186a", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "f275af4f1f350ffaaf650310cb5dddec", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "b3e3a6234e8045e6182cf90a09f767b2", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "fed17fc391e6c3db4aa14ea1d6596c87", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "2377dd167ef2707978bed6f10ffd4e76", "b1f6c0cd490b584b1883222a4c281e0f",
- "d2b9dba2968894a414756bb510ac389a", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "f596c63c7b14cada0174e17124c83942", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "52c0980bae63e8459e82eee7d8af2334", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "2afb540e8063f58d1b03896486c5e89b", "b929f7956cf35dd6225ca6cf45eacb23",
- "0846ec82555b66197c5c45b08240fbcc", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "034d1d62581bd0d840c4cf1e28227931", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "29f82b0f3e4113944bd28aacd9b8489a", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "2377dd167ef2707978bed6f10ffd4e76", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "f81c4d6b001a14584528880fa6988a87", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "849dfeca59074525dea59681a7f88ab4", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "d0d3482d981989e117cbb32fc4550267", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "f918e0e4422967c6a7e47298135c7ae9", "fc8718e6f9e6663c2b6bf9710f835bfc",
- "9a3215eb97aedbbddd76c7440837d040", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "eb2822ad8204ed4ecbf0f30fcb210498", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "75e57104d6058cd2bce1d3d8142d273d", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "2afb540e8063f58d1b03896486c5e89b", "d9d9f3c699cd03ab9d698e6b235ddcc6",
- "ca7471c126ccd22189e874f0a6e41960", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "8cba849640e9e2859d509bc81ca94acd", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "ee3e76371240d1f1ff811cea6a7d4f63", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "f918e0e4422967c6a7e47298135c7ae9", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "a5a2f9c2e7759d8a3dec1bc4b56be587", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "39a68af80be11e1682b6f3c4ede33530", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "39561688bf6680054edbfae6035316ce", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "b2264e129636368b5496760b39e64b7a", "4dbb4ce94d4948c990a51b15959d2fa6",
- "4e317feac6da46addf0e8b9d8d54304b", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "538ce869ffd23b6963e61badfab7712b", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "b4c735269ade44419169adbd852d5ddc", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "6ce47b11d2e60c5d183c84ce9f2e46cc", "3ac8d5b68ebb29fd1a41c5fa9d5f4382",
- "0802b6318fbd0969a33de8fdfcd07f10", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "bc79acf2a0fe419194cdb4529bc7dcc8", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "17a20dbbf09feae557d40aa5818fbe76", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "b2264e129636368b5496760b39e64b7a", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "2317c57ab69a36eb3bf278cf8a8795a3", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "b22d765af176d87e7d3048b4b89b86ad", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "087c5992ca6f829e1ba4ba5332d67947", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "c9cf1deba08dac5972b3b0a43eff8f98", "84777bdeb84e2530a1c8c1ee432ec934",
- "b384e9e3d81f9f4f9024028fbe451d8b", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "4e4677a0623d44237eb8d6a622cdc526", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "356d4003477283e157c8d2b5a79d913c", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "c9cf1deba08dac5972b3b0a43eff8f98", "1e58b76ca365b0bd4fd3c4519ec4a500",
- "24accebe2e795b13fcb56dd3abacf53f", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "98f584ceaf2d65af997f85d71ceeda1b", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "c9cf1deba08dac5972b3b0a43eff8f98", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "1e58b76ca365b0bd4fd3c4519ec4a500", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "24accebe2e795b13fcb56dd3abacf53f", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "98f584ceaf2d65af997f85d71ceeda1b", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
+struct ConvolveTestParam {
+ enum BlockSize {
+ kBlockSize2x2,
+ kBlockSize2x4,
+ kBlockSize4x2,
+ kBlockSize4x4,
+ kBlockSize4x8,
+ kBlockSize8x2,
+ kBlockSize8x4,
+ kBlockSize8x8,
+ kBlockSize8x16,
+ kBlockSize16x8,
+ kBlockSize16x16,
+ kBlockSize16x32,
+ kBlockSize32x16,
+ kBlockSize32x32,
+ kBlockSize32x64,
+ kBlockSize64x32,
+ kBlockSize64x64,
+ kBlockSize64x128,
+ kBlockSize128x64,
+ kBlockSize128x128,
+ kNumBlockSizes
+ };
+
+ static constexpr int kBlockWidth[kNumBlockSizes] = {
+ 2, 2, 4, 4, 4, 8, 8, 8, 8, 16, 16, 16, 32, 32, 32, 64, 64, 64, 128, 128};
+ static constexpr int kBlockHeight[kNumBlockSizes] = {
+ 2, 4, 2, 4, 8, 2, 4, 8, 16, 8, 16, 32, 16, 32, 64, 32, 64, 128, 64, 128};
+
+ explicit ConvolveTestParam(BlockSize block_size)
+ : block_size(block_size),
+ width(kBlockWidth[block_size]),
+ height(kBlockHeight[block_size]) {}
+
+ BlockSize block_size;
+ int width;
+ int height;
+};
+
+#if !LIBGAV1_CXX17
+constexpr int ConvolveTestParam::kBlockWidth[kNumBlockSizes]; // static.
+constexpr int ConvolveTestParam::kBlockHeight[kNumBlockSizes]; // static.
+#endif
+
+const char* GetConvolveDigest8bpp(int id) {
+ // Entries containing 'XXXXX...' are skipped. See the test for details.
+ static const char* const kDigest[ConvolveTestParam::kNumBlockSizes * 16] = {
+ "ae5977a4ceffbac0cde72a04a43a9d57", "6cf5f791fe0d8dcd3526be3c6b814035",
+ "d905dfcad930aded7718587c05b48aaf", "6baf153feff04cc5b7e87c0bb60a905d",
+ "871ed5a69ca31e6444faa720895949bf", "c9cf1deba08dac5972b3b0a43eff8f98",
+ "68e2f90eaa0ab5da7e6f5776993f7eea", "f1f8282fb33c30eb68c0c315b7a4bc01",
+ "9412064b0eebf8123f23d74147d04dff", "cc08936effe309ab9a4fa1bf7e28e24e",
+ "36cbef36fa21b98df03536c918bf752a", "9d0da6321cf5311ea0bdd41271763030",
+ "55a10165ee8a660d7dddacf7de558cdd", "ac7fc9f9ea7213743fae5a023faaaf08",
+ "077e1b7b355c7ab3ca40230ee8efd8ea", "7a3e8de2a1caae206cf3e51a86dfd15a",
+ "1ddf9020f18fa7883355cf8c0881186a", "2377dd167ef2707978bed6f10ffd4e76",
+ "f918e0e4422967c6a7e47298135c7ae9", "b2264e129636368b5496760b39e64b7a",
+ "1168251e6261e2ff1fa69a93226dbd76", "4821befdf63f8c6da6440afeb57f320f",
+ "c30fc44d83821141e84cc4793e127301", "a8293b933d9f2e5d7f922ea40111d643",
+ "354a54861a94e8b027afd9931e61f997", "b384e9e3d81f9f4f9024028fbe451d8b",
+ "eeeb8589c1b31cbb565154736ca939ec", "f49dab626ddd977ed171f79295c24935",
+ "78d2f27e0d4708cb16856d7d40dc16fb", "9d2393ea156a1c2083f5b4207793064b",
+ "a9c62745b95c66fa497a524886af57e2", "2c614ec4463386ec075a0f1dbb587933",
+ "7a8856480d752153370240b066b90f6a", "beaef1dbffadc701fccb7c18a03e3a41",
+ "72b1e700c949d06eaf62d664dafdb5b6", "684f5c3a25a080edaf79add6e9137a8e",
+ "3be970f49e4288988818b087201d54da", "d2b9dba2968894a414756bb510ac389a",
+ "9a3215eb97aedbbddd76c7440837d040", "4e317feac6da46addf0e8b9d8d54304b",
+ "d2f5ca2b7958c332a3fb771f66da01f0", "7aec92c3b65e456b64ae285c12b03b0d",
+ "f72a99ad63f6a88c23724e898b705d21", "07a1f07f114c4a38ba08d2f44e1e1132",
+ "26b9de95edb45b31ac5aa19825831c7a", "4e4677a0623d44237eb8d6a622cdc526",
+ "c1b836a6ce023663b90db0e320389414", "5befcf222152ebc8d779fcc10b95320a",
+ "62adf407fc27d8682ced4dd7b55af14e", "35be0786a072bf2f1286989261bf6580",
+ "90562fc42dc5d879ae74c4909c1dec30", "a1427352f9e413975a0949e2b300c657",
+ "bcbc418bc2beb243e463851cd95335a9", "cb8fedcbecee3947358dc61f95e56530",
+ "0d0154a7d573685285a83a4cf201ac57", "b14bd8068f108905682b83cc15778065",
+ "c96c867d998473197dde9b587be14e3a", "f596c63c7b14cada0174e17124c83942",
+ "eb2822ad8204ed4ecbf0f30fcb210498", "538ce869ffd23b6963e61badfab7712b",
+ "6bbcc075f8b768a02cdc9149f150326d", "4ae70d9db2ec36885394db7d59bdd4f7",
+ "5fee162fe52c11c823db4d5ede370654", "9365186c59ef66d9def40f437022ad93",
+ "0f95fb0276c9c7910937fbdf75f2811d", "356d4003477283e157c8d2b5a79d913c",
+ "b355dab2dbb6f5869018563eece22862", "cf6ff8c43d8059cea6090a23ab66a0ef",
+ "a336f8b7bcf188840ca65c0d0e66518a", "de953f03895923359c6a719e6a537b89",
+ "8463ade9347ed602663e2cec5c4c3fe6", "392de11ffcd5c2ecf3db3480ee135340",
+ "bddd31e3e852712e6244b616622af83d", "30a36245c40d978fc8976b442a8600c3",
+ "93aa662b988b8502e5ea95659eafde59", "70440ba9ee7f9d16d297dbb49e54a56e",
+ "1eb2be4c05b50e427e29c72fa566bff5", "52c0980bae63e8459e82eee7d8af2334",
+ "75e57104d6058cd2bce1d3d8142d273d", "b4c735269ade44419169adbd852d5ddc",
+ "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX", "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX",
+ "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX", "a7305087fae23de53d21a6909009ff69",
+ "8dcce009395264379c1a51239f4bb22c", "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX",
+ "8dcce009395264379c1a51239f4bb22c", "d90a69e7bae8aa46ed0e1e5f911d7a07",
+ "6ab4dc87be03be1dcc5d956ca819d938", "6ab4dc87be03be1dcc5d956ca819d938",
+ "8f2afdb2f03cd04ffacd421b958caaa0", "710ccecc103033088d898a2b924551fb",
+ "710ccecc103033088d898a2b924551fb", "a4093e3e5902dd659407ce6471635a4e",
+ "375d7f5358d7a088a498b8b3aaecc0d5", "375d7f5358d7a088a498b8b3aaecc0d5",
+ "08867ea5cc38c705ec52af821bc4736a", "2afb540e8063f58d1b03896486c5e89b",
+ "2afb540e8063f58d1b03896486c5e89b", "6ce47b11d2e60c5d183c84ce9f2e46cc",
+ "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX", "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX",
+ "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX", "a5a1ac658d7ce4a846a32b9fcfaa3475",
+ "2370f4e4a83edf91b7f504bbe4b00e90", "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX",
+ "ae5464066a049622a7a264cdf9394b55", "45368b6db3d1fee739a64b0bc823ea9c",
+ "8dff0f28192d9f8c0bf7fb5405719dd8", "632738ef3ff3021cff45045c41978849",
+ "f7ec43384037e8d6c618e0df826ec029", "a6bc648197781a2dc99c487e66464320",
+ "1112ebd509007154c72c5a485b220b62", "9714c4ce636b6fb0ad05cba246d48c76",
+ "2c93dde8884f09fb5bb5ad6d95cde86d", "a49e6160b5d1b56bc2046963101cd606",
+ "7f084953976111e9f65b57876e7552b1", "0846ec82555b66197c5c45b08240fbcc",
+ "ca7471c126ccd22189e874f0a6e41960", "0802b6318fbd0969a33de8fdfcd07f10",
+ "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX", "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX",
+ "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX", "3b1ceebf0579fcbbfd6136938c595b91",
+ "ecafabcad1045f15d31ce2f3b13132f2", "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX",
+ "5f211eba020e256a5781b203c5aa1d2e", "3b04497634364dd2cd3f2482b5d4b32f",
+ "a8ac7b5dc65ffb758b0643508a0e744e", "561ed8be43c221a561f8885a0d74c7ef",
+ "8159619fc234598c8c75154d80021fd4", "8f43645dce92cf7594aa4822aa53b17d",
+ "b6ccddb7dfa4eddc87b4eff08b5a3195", "b4e605327b28db573d88844a1a09db8d",
+ "15b00a15d1cc6cc96ca85d00b167e4dd", "7bf911888c11a9fefd604b8b9c82e9a1",
+ "bfb69b4d7d4aed73cfa75a0f55b66440", "034d1d62581bd0d840c4cf1e28227931",
+ "8cba849640e9e2859d509bc81ca94acd", "bc79acf2a0fe419194cdb4529bc7dcc8",
+ "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX", "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX",
+ "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX", "3bfad931bce82335219e0e29c15f2b21",
+ "68a701313d2247d2b32636ebc1f2a008", "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX",
+ "05afe1f40d37a45a97a5e0aadd5066fb", "9e1f0e0bddb58d15d0925eeaede9b84c",
+ "03313cdaa593a1a7b4869010dcc7b241", "88a50d2b4107ee5b5074b2520183f8ac",
+ "ac50ea9f7306da95a5092709442989cf", "739b17591437edffd36799237b962658",
+ "b8a7eb7dd9c216e240517edfc6489397", "75b755f199dbf4a0e5ebbb86c2bd871d",
+ "31b0017ba1110e3d70b020901bc15564", "0a1aa8f5ecfd11ddba080af0051c576a",
+ "536181ee90de883cc383787aec089221", "29f82b0f3e4113944bd28aacd9b8489a",
+ "ee3e76371240d1f1ff811cea6a7d4f63", "17a20dbbf09feae557d40aa5818fbe76",
+ "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX", "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX",
+ "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX", "6baf153feff04cc5b7e87c0bb60a905d",
+ "871ed5a69ca31e6444faa720895949bf", "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX",
+ "68e2f90eaa0ab5da7e6f5776993f7eea", "f1f8282fb33c30eb68c0c315b7a4bc01",
+ "9412064b0eebf8123f23d74147d04dff", "cc08936effe309ab9a4fa1bf7e28e24e",
+ "36cbef36fa21b98df03536c918bf752a", "9d0da6321cf5311ea0bdd41271763030",
+ "55a10165ee8a660d7dddacf7de558cdd", "ac7fc9f9ea7213743fae5a023faaaf08",
+ "077e1b7b355c7ab3ca40230ee8efd8ea", "7a3e8de2a1caae206cf3e51a86dfd15a",
+ "1ddf9020f18fa7883355cf8c0881186a", "2377dd167ef2707978bed6f10ffd4e76",
+ "f918e0e4422967c6a7e47298135c7ae9", "b2264e129636368b5496760b39e64b7a",
+ "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX", "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX",
+ "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX", "4cfad2c437084a93ea76913e21c2dd89",
+ "d372f0c17bce98855d6d59fbee814c3d", "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX",
+ "d99ffd2579eb781c30bc0df7b76ad61e", "4e139e57cbb049a0f4ef816adc48d026",
+ "be53b2507048e7ff50226d15c0b28865", "b73f3c1a10405de89d1f9e812ff73b5a",
+ "c7d51b1f2df49ab83962257e8a5934e5", "159e443d79cc59b11ca4a80aa7aa09be",
+ "6ef14b14882e1465b0482b0e0b16d8ce", "22a8d287b425c870f40c64a50f91ce54",
+ "f1d96db5a2e0a2160df38bd96d28d19b", "637d1e5221422dfe9a6dbcfd7f62ebdd",
+ "f275af4f1f350ffaaf650310cb5dddec", "f81c4d6b001a14584528880fa6988a87",
+ "a5a2f9c2e7759d8a3dec1bc4b56be587", "2317c57ab69a36eb3bf278cf8a8795a3",
+ "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX", "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX",
+ "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX", "1a0bdfc96a3b9fd904e658f238ab1076",
+ "56d16e54afe205e97527902770e71c71", "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX",
+ "1f7b5b8282ff3cf4d8e8c52d80ef5b4d", "79e9e260a2028c5fe320005c272064b9",
+ "2418ebcdf85551b9ae6e3725f04aae6d", "98bdf907ebacacb734c9eef1ee727c6e",
+ "4dd5672d53c8f359e8f80badaa843dfc", "a1bef519bbf07138e2eec5a91694de46",
+ "df1cb51fe1a937cd7834e973dc5cb814", "317fe65abf81ef3ea07976ef8667baeb",
+ "2da29da97806ae0ee300c5e69c35a4aa", "555475f5d1685638169ab904447e4f13",
+ "b3e3a6234e8045e6182cf90a09f767b2", "849dfeca59074525dea59681a7f88ab4",
+ "39a68af80be11e1682b6f3c4ede33530", "b22d765af176d87e7d3048b4b89b86ad",
+ "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX", "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX",
+ "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX", "b8a710baa6a9fc784909671d450ecd99",
+ "f9e6a56382d8d12da676d6631bb6ef75", "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX",
+ "3bf8e11e18527b16f0d7c0361d74a52d", "b9ff54c6f1e3b41fc7fc0f3fa0e75cf2",
+ "06ef1504f31af5f173d3317866ca57cb", "635e8ee11cf04d73598549234ad732a0",
+ "fab693410d59ee88aa2895527efc31ac", "3041eb26c23a63a587fbec623919e2d2",
+ "c61d99d5daf575664fb7ad64976f4b03", "822f6c4eb5db760468d822b21f48d94d",
+ "3f6fcb9fae3666e085b9e29002a802fc", "d9b9fecd195736a6049c528d4cb886b5",
+ "fed17fc391e6c3db4aa14ea1d6596c87", "d0d3482d981989e117cbb32fc4550267",
+ "39561688bf6680054edbfae6035316ce", "087c5992ca6f829e1ba4ba5332d67947",
+ };
+ assert(id >= 0);
+ assert(id < sizeof(kDigest) / sizeof(kDigest[0]));
+ return kDigest[id];
+}
+
+const char* GetConvolveScaleDigest8bpp(int id) {
+ // Entries containing 'XXXXX...' are skipped. See the test for details.
+ static const char* const kDigest[ConvolveTestParam::kNumBlockSizes * 2] = {
+ "0291a23f2ac4c40b5d8e957e63769904", "1d48447857472d6455af10d5526f6827",
+ "409b2278d6d372248f1891ca0dd12760", "9e416606a3f82fe5bb3f7182e4f42c2d",
+ "e126563f859ddd5c5ffde6f641168fad", "9bad4f1b7e1865f814b6fd5620816ebd",
+ "50e5e5a57185477cb2af83490c33b47c", "3d2fb301c61d7fbd0e21ac263f7ac552",
+ "5920032c6432c80c6e5e61b684018d13", "07ada64d24339488cdce492e6e0c6b0d",
+ "aaf1589aff6d062a87c627ab9ba20e3e", "91adf91bb24d2c4ea3f882bdf7396e33",
+ "1d17a932a68bb1f199f709e7725fe44b", "07716c63afda034cb386511ea25a63b5",
+ "cca17ef3324c41d189e674a059ef1255", "37d17e70619823a606c0b5f74bf2e33b",
+ "ba8ed5474c187c8e8d7f82a6a29ee860", "27663f037973ebe82ec10252a4d91299",
+ "24c27e187e8d5a2bbfa0fef9046d3eb0", "9854fdc91a48e3bd4639edcc940e5c09",
+ "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX", "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX",
+ "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX", "a71907c60a9f1f81972a2859ae54a805",
+ "817bc3bf0c77abc4186eac39f2320184", "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX",
+ "4e7182a8b226982e2678abcf5f83325d", "50cef7c6e57544135f102226bb95bed9",
+ "225e054dbcfff05b1c8b0792c731449e", "16eb63f03839159f3af0e08be857170f",
+ "c8e5d111a2e3f4487330a8bd893cb894", "4fd99eaf9c160442aab35b9bdc5d275b",
+ "8b0f61bfb30747d4c9215618ac42557c", "1df78022da202cefb9a8100b114152d9",
+ "378466e1eda63dbc03565b78af8e723f", "28ea721411fbf5fc805035be9a384140",
+ "4fed5d4163a3bfcc6726a42f20410b0a", "55abfca0c820771bd926e4b94f66a499",
+ "6c8b8ef0a78859c768e629e1decc0019", "d0ead286b5ba3841d24dd114efbfef0a",
};
+ assert(id >= 0);
+ assert(id < sizeof(kDigest) / sizeof(kDigest[0]));
return kDigest[id];
}
#if LIBGAV1_MAX_BITDEPTH >= 10
-const char* GetDigest10bpp(int id) {
- static const char* const kDigest[] = {
- "b1b6903d60501c7bc11e5285beb26a52", "3fa4ebd556ea33cfa7f0129ddfda0c5b",
- "a693b4bd0334a3b98d45e67d3985bb63", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "3e787534dff83c22b3033750e448865a", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "fd1da8d197cb385f7917cd296d67afb9", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "d9941769b66d012c68f70accc1a3b664", "98728677401560d7c29ba8bec59c6a00",
- "2924788891caa175bb0725b57de6cbd2", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "915a60e7bb2c38ad5a556098230d6092", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "a25de86fd8d389c1c75405aac8049b58", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "b1b6903d60501c7bc11e5285beb26a52", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "cf792b94b1f3f321fa0c1d6362d89c90", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "5f1622fde194bd04560b04f13dc47a7c", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "d935e0ec1d933d0c48fa529be4f998eb", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "a7855ed75772d7fa815978a202bbcd9f", "cd3e8b96ff6796650e138f5d106d70d4",
- "156de3172d9acf3c7f251cd7a18ad461", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "4c91f676a054d582bcae1ca9adb87a31", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "a984202c527b757337c605443f376915", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "20a390cc7e06a265ecc1e118f776c25a", "ab0da36b88021ed0efd806a1a4cd4fa0",
- "fc57a318fbf0c0f29c24edbc84e35ec6", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "568055866caf274d67e984307cda2742", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "3ff2b19730d6bb8b97f4d72085d2d5b8", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "a7855ed75772d7fa815978a202bbcd9f", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "acc8588292b326f15076dd3a3d260072", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "f990a13f7a062665d7f18a40bd5da2ae", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "931df73c3d50c4b2e4ec3502bc8774de", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "bde291a4e8087c085fe8b3632f4d7351", "555eead3b67766f56b0e3714d431506f",
- "e545b8a3ff958f8363c7968cbae96732", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "eab5894046a99ad0a1a12c91b0f37bd7", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "c347f4a58fd784c5e88c1a23e4ff15d2", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "9272ee0820b09bfdc252a97b2e103862", "be8dd418158226a00d5e01ccc3e4f66b",
- "34b37b59ee49108276be28a2e4585c2d", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "f4deb462014249d4ab02db7f7f62308e", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "6ae557169928f3be15c7aad8d67205b1", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "bde291a4e8087c085fe8b3632f4d7351", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "14be0f12550c814f75655b4e1e22ddde", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "af4cadb78ee54aacebac76c8ad275375", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "c0c4ebfd6dbbddd88114c36e8c9085da", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "238980eebc9e63ae3eea2771c7a70f12", "661c69a7b49984fa1e92cf8485ab28b6",
- "7842b2047356c1417d9d88219707f1a1", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "765b4cfbfc1a4988878c412d53bcb597", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "29cbaadbff9adf4a3d49bd9900a9dd0b", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "7e3fa9c03bc3dfbdeb67f24c5d9a49cd", "a65e13b534b32fdff3f48d09389daaf1",
- "da1a6ff2be03ec8acde4cb1cd519a6f0", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "d54206c34785cc3d8a06c2ceac46378c", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "b1f26ee13df2e14a757416ba8a682278", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "238980eebc9e63ae3eea2771c7a70f12", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "e552466a4e7ff187251b8914b084d404", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "aba5d5ef5e96fe418e65d20e506ea834", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "972aeba65e8a6d20dd0f95279be2aa75", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "0eac13431bd7d8a573318408a72246d5", "71c57b774e4c3d9b965b060e2a895448",
- "1a487c658d684314d91bb6d961a94672", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "bc63b29ec78c1efec5543885a45bb822", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "c5997b802a6ba1cf5ba1057ddc5baa7e", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "f3454ca93cbb0c8c09b0695d90a0df3d", "d259b9c0d0e3322114b2bcce04ae35dd",
- "a4ca37cb869a0dbd1c4a2dcc449a8f31", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "85a11892ed884e3e74968435f6b16e64", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "996b6c166f9ed25bd07ea6acdf7597ff", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "0eac13431bd7d8a573318408a72246d5", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "981b7c44b6f7b7ac2acf0cc4096e6bf4", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "d70bf16e2a31e90b7b3cdeaef1494cf9", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "34165457282e2af2e9b3f5840e4dec5d", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "73438155feb62595e3e406921102d748", "86d00d2e3dd4a198343f37e3dc4461c9",
- "0635a296be01b7e641de98ee27c33cd2", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "cecd57396a0033456408f3f3554c6912", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "59f33727e5beeb783a057770bec7b4cd", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "f3454ca93cbb0c8c09b0695d90a0df3d", "b11f98b5bb864413952d47a67b4add79",
- "1b5d1d4c7be8d5ec00a42a49eecf918f", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "16434230d24b9522ae2680e8c37e1b95", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "34895d4c69a6c3303693e6f431bcd5d8", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "73438155feb62595e3e406921102d748", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "a4c75372af36162831cb872e24e1088c", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "6df80bb7f264f4f285d09a4d61533fae", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "b8c5582b9bbb789c45471f93be83b41f", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "5871e0e88a776840d619670fbf107858", "57dd2cde826c50e0b0ec504396cb3ceb",
- "82dc120bf8c2043bc5eee81007309ebf", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "5b37f94ef136c1eb9a6181c19491459c", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "0654d72f22306b28d9ae42515845240c", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "1a77d2af4d2b6cf8737cfbcacacdc4e4", "7123d4aa8083da90ec6986dda0e126ce",
- "98b77e88b0784baaea64c98c8707fe46", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "963dea92f3efbb99137d1de9c56728d3", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "c9497b00cb1bc3363dd126ffdddadc8e", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "5871e0e88a776840d619670fbf107858", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "497271227a70a72f9ad25b415d41563f", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "c8831118d1004a7cca015a4fca140018", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "257bf5467db570974d7cf2356bacf116", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "1c6376ce55c9ee9e35d432edb1ffb3b7", "6fff9189c1d11f183f7c42d4ce5febdb",
- "58c826cad3c14cdf26a649265758c58b", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "716ba3a25b454e44b46caa42622c128c", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "6c9d7d9e6ef81d76e775a85c53abe209", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "89bec831efea2f88129dedcad06bb3fa", "e1ef4ae726d864b36a9b64b1e43ede7e",
- "8148788044522edc3c497e1017efe2ce", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "b72fb6a9a073c2fe65013af1842dc9b0", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "1e461869bb2ee9b6069c5e52cf817291", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "1c6376ce55c9ee9e35d432edb1ffb3b7", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "c48bd7e11ec44ba7b2bc8b6a04592439", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "b7f82c140369067c105c7967c75b6f9e", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "5255dded79f56b0078543b5a1814a668", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "d675e0195c9feca956e637f3f1959f40", "670fa8c31c82fced9a810b64c03e87ee",
- "f166254037c0dfb140f54cd7b08bddfe", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "9076f58c4ab20f2f06d701a6b53b1c4f", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "a35f435ccc67717a49251a07e62ae204", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "89bec831efea2f88129dedcad06bb3fa", "7c3a79a90f3f4b460540e796f3197ef1",
- "acf60abeda98bbea161139b915317423", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "86fa0c299737eb499cbcdce94abe2d33", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "8d7f1d7ea6a0dcc922ad5d2e77bc74dd", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "d675e0195c9feca956e637f3f1959f40", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "0960a9af91250e9faa1eaac32227bf6f", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "130f47aae365aabfec4360fa5b5ff554", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "ef745100f5f34c8ff841b2b0b57eb33f", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "b5681673903ade13d69e295f82fdd009", "9ccd4cc6216eab35ddcb66a76b55dd2f",
- "74ab206f14ac5f62653cd3dd71a7916d", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "d3212ab3922f147c3cf126c3b1aa17f6", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "c5325015cb0b7c42839ac4aa21803fa0", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "dead0fe4030085c22e92d16bb110de9d", "3c6d97f25d6bc647c843850be007f512",
- "262c96b1f2c4f85c86c0e9c77fedff1e", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "6b80af04470b83673d98f46925e678a5", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "138855d9bf0ccd0c62ac14c7bff4fd37", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "b5681673903ade13d69e295f82fdd009", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "746c2e0f96ae2246d534d67102be068c", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "92483ed631de21b685ffe6ccadbbec8f", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "edae8ed67286ca6a31573a541b3deb6f", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "3c43020105ae93a301404b4cd6238654", "cef7cfdcb8ca8d2612f31a1fe95ce371",
- "5621caef7cc1d6522903290ccc5c2cb8", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "b55fea77f0e14a8bf8b6562b766fe91f", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "f81f31f1585c0f70438c09e829416f20", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "306a2f5dfd675df4ed9af44fd5cac8c0", "1dfda318021a05a7e72fd815ddb0dfc8",
- "f35a3d13516440f9168076d9b07c9e98", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "65baca6167fe5249f7a839ce5b2fd591", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "64035142864914d05a48ef8e013631d0", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "3c43020105ae93a301404b4cd6238654", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "d6f6db079da9b8909a153c07cc9d0e63", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "cbb6ab31547df6b91cfb48630fdffb48", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "01adcd8bf15fbf70df47fbf3a953aa14", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "dd2c5880a94ed3758bfea0b0e8c78286", "5f6c1725f4c7c73a8d8f0d9468106624",
- "78ec6cf42cce4b1feb65e076c78ca241", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "59b578268ff26a1e21c5b4273f73f852", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "ab10b22fb8dd8199040745565b28595d", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "306a2f5dfd675df4ed9af44fd5cac8c0", "9209f83153ef6f09b5262536a2dc1671",
- "13782526fc2726100cb3cf375b3150ed", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "e47ded6c0eec1d5baadd02aff172f2b1", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "205904fa3c644433b46e01c11dd2fe40", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "dd2c5880a94ed3758bfea0b0e8c78286", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "7c8928a0d769f4264d195f39cb68a772", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "1eea5e8a24d6aa11778eb3e5e5e9c9f2", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "ba539808a8501609ce052a1562a62b25", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "4ebb1a7b25a39d8b9868ec8a1243103f", "c2732a08997e1f5176dfb297d2e89235",
- "42188e2dbb4e02cd353552ea147ad03f", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "16761e7c8ba2645718153bed83ae78f6", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "0d928d6111f86c60ccefc6c6604d5659", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "9d01c946a12f5ef9d9cebd9816e06014", "d738eb9f3f4f0b412b93687b55b6e45a",
- "13c07441b47b0c1ed80f015ac302d220", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "c0950e609f278efb7050d319a9756bb3", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "291425aaf8206b20e88db8ebf3cf7e7f", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "4ebb1a7b25a39d8b9868ec8a1243103f", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "db645c96fc8be04015e0eb538afec9ae", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "9e193b6b28ce798c44c744efde19eee9", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "ac8e6391200cec2abdebb00744a2ba82", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "d34ec07845cd8523651e5f5112984a14", "745c794b557d4a0d734e45d720a7f7ad",
- "f9813870fc27941a7c00a0443d7c2fe7", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "a9e9805769fe1baf5c7933793ccca0d8", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "4ed1a6200912995d4f571bdb7822aa83", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "768f63912e43148c13688d7f23281531", "43fb786fd2e79610d6a6d912b95f4509",
- "02880fde51ac991ad18d8986f4e5145c", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "9051290279237f9fb1389989b142d2dd", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "cb6238b8eb6b72980958e6fcceb2f2eb", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "d34ec07845cd8523651e5f5112984a14", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "946af3a8f5362def5f4e27cb0fd4e754", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "885c384d90aaa34acd8303958033c252", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "54b17120f7d71ddb4d70590ecd231cc1", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "2ce55308d873f4cd244f16da2b06e06e", "af7b76d3471cfbdc97d1e57bc2876ce7",
- "20b14a6b5af7aa356963bcaaf23d230d", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "553a2c24939dff18ec5833c77f556cfb", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "92e31a45513582f386dc9c22a57bbbbd", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "768f63912e43148c13688d7f23281531", "4e255554dab9dfa1064e20a905538308",
- "aa25073115bad49432953254e7dce0bc", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "34cdc1be291c95981c98812c5c343a15", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "626321a6dfac542d0fc70321fac13ff3", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "2ce55308d873f4cd244f16da2b06e06e", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "7ad78dfe7bbedf696dd58d9ad01bcfba", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "8110ed10e7234851dff3c7e4a51108a2", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "f6e36446a97611a4db4425df926974b2", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "a4bb5d5ff4b25f391265b5231049a09a", "cf4867c6b1b8be86a7e0bee708c28d83",
- "9c9c41435697f75fa118b6d6464ee7cb", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "5c1ec75a160c444fa90abf106fa1140e", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "6dbf310a9c8d85f76306d6a35545f8af", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "2e7927158e7b8e40e7269fc909fb584b", "8b72feff8bb0901229a2bd7da2857c4b",
- "69e3361b7199e10e75685b90fb0df623", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "5b64a6911cb7c3d60bb8f961ed9782a2", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "1c6fda7501e0f8bdad972f7857cd9354", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "a4bb5d5ff4b25f391265b5231049a09a", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "f0fd9c09d454e4ce918faa97e9ac10be", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "6fb9383302eb7e7a13387464d2634e03", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "a82f4080699300b659bbe1b5c4463147", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "c9106e0c820b03bcdde3aa94efc11a3e", "0408e10e51a31ac756a57d5149a2b409",
- "38816245ed832ba313fefafcbed1e5c8", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "2266840f11ac4c066d941ec473b1a54f", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "80fce29dc82d5857c1ed5ef2aea16835", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "123028e18c2bfb334e34adb5a4f67de4", "1670eb8ed876e609ed81236a683b4a3d",
- "2f8ab35f6e7030e82ca922a68b29af4a", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "7133de9d03a4b07716a12226b5e493e8", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "4fd485dadcb570e5a0a5addaf9ba84da", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "c9106e0c820b03bcdde3aa94efc11a3e", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "af6ae5c0eb28417bd251184baf2eaba7", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "864d51fcc737bc73a3f588b67515039a", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "ecedb178f7cad3dc1b921eca67f9efb6", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "7ec2eae9e118506da8b33440b399511a", "108a4a6530a6b9c933ccf14edbd896be",
- "5d34137cc8ddba75347b0fa1d0a91791", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "9e194755b2a37b615a517d5f8746dfbb", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "14f2c5b9d2cd621c178a39f1ec0c38eb", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "123028e18c2bfb334e34adb5a4f67de4", "2fdc713ba418780d0be33a3ebbcb323c",
- "452f91b01833c57db4e909575a029ff6", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "3594eff52d5ed875bd9655ddbf106fae", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "d3f140aea9e8eabf4e1e5190e0148288", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "7ec2eae9e118506da8b33440b399511a", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "866f8df540dd3b58ab1339314d139cbd", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "2ecb7890f00234bcb28c1d969f489012", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "0609ca0ff3ca90069e8b48829b4b0891", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "78de867c8ee947ed6d29055747f26949", "0a7cb4f51f1acf0940b59295b2327465",
- "465dcb046a0449b9dfb3e0b297aa3863", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "bbf86f8174334f0b8d869fd8d58bf92d", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "da54cfb4530841bda29966cfa05f4879", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "2c979c2bddef79a760e72a802f83cc76", "545426be3436073ba63790aa3c4a5598",
- "1fabf0655bedb671e4d7287fec8119ba", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "90d7e13aa2f9a064493ff2b3b5b12109", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "e4938219593bbed5ae638a93f2f4a580", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "78de867c8ee947ed6d29055747f26949", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "72803589b453a29501540aeddc23e6f4", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "c4793d431dbf2d88826bb440bf027512", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "839e86c681e97359f7819c766000dd1c", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "d05a237ed7a9ca877256b71555b1b8e4", "3052776d186fca6dd8011f4fe908a212",
- "94b3e5bcd6b849b66a4571ec3d23f9be", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "91d6bdbc62d4bb80c9b371d9704e3c9e", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "4f750f6375524311d260306deb233861", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "d05a237ed7a9ca877256b71555b1b8e4", "03ce2d07cac044d6b68604d398571844",
- "68ece92dcbe70a2ae9776d72972740a7", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "380d296d0d55a49dd86ee562b053a9d8", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "d05a237ed7a9ca877256b71555b1b8e4", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "03ce2d07cac044d6b68604d398571844", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "68ece92dcbe70a2ae9776d72972740a7", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "380d296d0d55a49dd86ee562b053a9d8", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
- "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
+const char* GetConvolveDigest10bpp(int id) {
+ // Entries containing 'XXXXX...' are skipped. See the test for details.
+ static const char* const kDigest[ConvolveTestParam::kNumBlockSizes * 16] = {
+ "b1b6903d60501c7bc11e5285beb26a52", "a7855ed75772d7fa815978a202bbcd9f",
+ "bde291a4e8087c085fe8b3632f4d7351", "238980eebc9e63ae3eea2771c7a70f12",
+ "0eac13431bd7d8a573318408a72246d5", "d05a237ed7a9ca877256b71555b1b8e4",
+ "73438155feb62595e3e406921102d748", "5871e0e88a776840d619670fbf107858",
+ "1c6376ce55c9ee9e35d432edb1ffb3b7", "d675e0195c9feca956e637f3f1959f40",
+ "b5681673903ade13d69e295f82fdd009", "3c43020105ae93a301404b4cd6238654",
+ "dd2c5880a94ed3758bfea0b0e8c78286", "4ebb1a7b25a39d8b9868ec8a1243103f",
+ "d34ec07845cd8523651e5f5112984a14", "2ce55308d873f4cd244f16da2b06e06e",
+ "a4bb5d5ff4b25f391265b5231049a09a", "c9106e0c820b03bcdde3aa94efc11a3e",
+ "7ec2eae9e118506da8b33440b399511a", "78de867c8ee947ed6d29055747f26949",
+ "a693b4bd0334a3b98d45e67d3985bb63", "156de3172d9acf3c7f251cd7a18ad461",
+ "e545b8a3ff958f8363c7968cbae96732", "7842b2047356c1417d9d88219707f1a1",
+ "1a487c658d684314d91bb6d961a94672", "94b3e5bcd6b849b66a4571ec3d23f9be",
+ "0635a296be01b7e641de98ee27c33cd2", "82dc120bf8c2043bc5eee81007309ebf",
+ "58c826cad3c14cdf26a649265758c58b", "f166254037c0dfb140f54cd7b08bddfe",
+ "74ab206f14ac5f62653cd3dd71a7916d", "5621caef7cc1d6522903290ccc5c2cb8",
+ "78ec6cf42cce4b1feb65e076c78ca241", "42188e2dbb4e02cd353552ea147ad03f",
+ "f9813870fc27941a7c00a0443d7c2fe7", "20b14a6b5af7aa356963bcaaf23d230d",
+ "9c9c41435697f75fa118b6d6464ee7cb", "38816245ed832ba313fefafcbed1e5c8",
+ "5d34137cc8ddba75347b0fa1d0a91791", "465dcb046a0449b9dfb3e0b297aa3863",
+ "3e787534dff83c22b3033750e448865a", "4c91f676a054d582bcae1ca9adb87a31",
+ "eab5894046a99ad0a1a12c91b0f37bd7", "765b4cfbfc1a4988878c412d53bcb597",
+ "bc63b29ec78c1efec5543885a45bb822", "91d6bdbc62d4bb80c9b371d9704e3c9e",
+ "cecd57396a0033456408f3f3554c6912", "5b37f94ef136c1eb9a6181c19491459c",
+ "716ba3a25b454e44b46caa42622c128c", "9076f58c4ab20f2f06d701a6b53b1c4f",
+ "d3212ab3922f147c3cf126c3b1aa17f6", "b55fea77f0e14a8bf8b6562b766fe91f",
+ "59b578268ff26a1e21c5b4273f73f852", "16761e7c8ba2645718153bed83ae78f6",
+ "a9e9805769fe1baf5c7933793ccca0d8", "553a2c24939dff18ec5833c77f556cfb",
+ "5c1ec75a160c444fa90abf106fa1140e", "2266840f11ac4c066d941ec473b1a54f",
+ "9e194755b2a37b615a517d5f8746dfbb", "bbf86f8174334f0b8d869fd8d58bf92d",
+ "fd1da8d197cb385f7917cd296d67afb9", "a984202c527b757337c605443f376915",
+ "c347f4a58fd784c5e88c1a23e4ff15d2", "29cbaadbff9adf4a3d49bd9900a9dd0b",
+ "c5997b802a6ba1cf5ba1057ddc5baa7e", "4f750f6375524311d260306deb233861",
+ "59f33727e5beeb783a057770bec7b4cd", "0654d72f22306b28d9ae42515845240c",
+ "6c9d7d9e6ef81d76e775a85c53abe209", "a35f435ccc67717a49251a07e62ae204",
+ "c5325015cb0b7c42839ac4aa21803fa0", "f81f31f1585c0f70438c09e829416f20",
+ "ab10b22fb8dd8199040745565b28595d", "0d928d6111f86c60ccefc6c6604d5659",
+ "4ed1a6200912995d4f571bdb7822aa83", "92e31a45513582f386dc9c22a57bbbbd",
+ "6dbf310a9c8d85f76306d6a35545f8af", "80fce29dc82d5857c1ed5ef2aea16835",
+ "14f2c5b9d2cd621c178a39f1ec0c38eb", "da54cfb4530841bda29966cfa05f4879",
+ "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX", "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX",
+ "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX", "7e3fa9c03bc3dfbdeb67f24c5d9a49cd",
+ "f3454ca93cbb0c8c09b0695d90a0df3d", "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX",
+ "f3454ca93cbb0c8c09b0695d90a0df3d", "1a77d2af4d2b6cf8737cfbcacacdc4e4",
+ "89bec831efea2f88129dedcad06bb3fa", "89bec831efea2f88129dedcad06bb3fa",
+ "dead0fe4030085c22e92d16bb110de9d", "306a2f5dfd675df4ed9af44fd5cac8c0",
+ "306a2f5dfd675df4ed9af44fd5cac8c0", "9d01c946a12f5ef9d9cebd9816e06014",
+ "768f63912e43148c13688d7f23281531", "768f63912e43148c13688d7f23281531",
+ "2e7927158e7b8e40e7269fc909fb584b", "123028e18c2bfb334e34adb5a4f67de4",
+ "123028e18c2bfb334e34adb5a4f67de4", "2c979c2bddef79a760e72a802f83cc76",
+ "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX", "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX",
+ "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX", "da1a6ff2be03ec8acde4cb1cd519a6f0",
+ "a4ca37cb869a0dbd1c4a2dcc449a8f31", "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX",
+ "1b5d1d4c7be8d5ec00a42a49eecf918f", "98b77e88b0784baaea64c98c8707fe46",
+ "8148788044522edc3c497e1017efe2ce", "acf60abeda98bbea161139b915317423",
+ "262c96b1f2c4f85c86c0e9c77fedff1e", "f35a3d13516440f9168076d9b07c9e98",
+ "13782526fc2726100cb3cf375b3150ed", "13c07441b47b0c1ed80f015ac302d220",
+ "02880fde51ac991ad18d8986f4e5145c", "aa25073115bad49432953254e7dce0bc",
+ "69e3361b7199e10e75685b90fb0df623", "2f8ab35f6e7030e82ca922a68b29af4a",
+ "452f91b01833c57db4e909575a029ff6", "1fabf0655bedb671e4d7287fec8119ba",
+ "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX", "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX",
+ "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX", "d54206c34785cc3d8a06c2ceac46378c",
+ "85a11892ed884e3e74968435f6b16e64", "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX",
+ "16434230d24b9522ae2680e8c37e1b95", "963dea92f3efbb99137d1de9c56728d3",
+ "b72fb6a9a073c2fe65013af1842dc9b0", "86fa0c299737eb499cbcdce94abe2d33",
+ "6b80af04470b83673d98f46925e678a5", "65baca6167fe5249f7a839ce5b2fd591",
+ "e47ded6c0eec1d5baadd02aff172f2b1", "c0950e609f278efb7050d319a9756bb3",
+ "9051290279237f9fb1389989b142d2dd", "34cdc1be291c95981c98812c5c343a15",
+ "5b64a6911cb7c3d60bb8f961ed9782a2", "7133de9d03a4b07716a12226b5e493e8",
+ "3594eff52d5ed875bd9655ddbf106fae", "90d7e13aa2f9a064493ff2b3b5b12109",
+ "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX", "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX",
+ "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX", "b1f26ee13df2e14a757416ba8a682278",
+ "996b6c166f9ed25bd07ea6acdf7597ff", "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX",
+ "34895d4c69a6c3303693e6f431bcd5d8", "c9497b00cb1bc3363dd126ffdddadc8e",
+ "1e461869bb2ee9b6069c5e52cf817291", "8d7f1d7ea6a0dcc922ad5d2e77bc74dd",
+ "138855d9bf0ccd0c62ac14c7bff4fd37", "64035142864914d05a48ef8e013631d0",
+ "205904fa3c644433b46e01c11dd2fe40", "291425aaf8206b20e88db8ebf3cf7e7f",
+ "cb6238b8eb6b72980958e6fcceb2f2eb", "626321a6dfac542d0fc70321fac13ff3",
+ "1c6fda7501e0f8bdad972f7857cd9354", "4fd485dadcb570e5a0a5addaf9ba84da",
+ "d3f140aea9e8eabf4e1e5190e0148288", "e4938219593bbed5ae638a93f2f4a580",
+ "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX", "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX",
+ "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX", "238980eebc9e63ae3eea2771c7a70f12",
+ "0eac13431bd7d8a573318408a72246d5", "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX",
+ "73438155feb62595e3e406921102d748", "5871e0e88a776840d619670fbf107858",
+ "1c6376ce55c9ee9e35d432edb1ffb3b7", "d675e0195c9feca956e637f3f1959f40",
+ "b5681673903ade13d69e295f82fdd009", "3c43020105ae93a301404b4cd6238654",
+ "dd2c5880a94ed3758bfea0b0e8c78286", "4ebb1a7b25a39d8b9868ec8a1243103f",
+ "d34ec07845cd8523651e5f5112984a14", "2ce55308d873f4cd244f16da2b06e06e",
+ "a4bb5d5ff4b25f391265b5231049a09a", "c9106e0c820b03bcdde3aa94efc11a3e",
+ "7ec2eae9e118506da8b33440b399511a", "78de867c8ee947ed6d29055747f26949",
+ "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX", "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX",
+ "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX", "e552466a4e7ff187251b8914b084d404",
+ "981b7c44b6f7b7ac2acf0cc4096e6bf4", "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX",
+ "a4c75372af36162831cb872e24e1088c", "497271227a70a72f9ad25b415d41563f",
+ "c48bd7e11ec44ba7b2bc8b6a04592439", "0960a9af91250e9faa1eaac32227bf6f",
+ "746c2e0f96ae2246d534d67102be068c", "d6f6db079da9b8909a153c07cc9d0e63",
+ "7c8928a0d769f4264d195f39cb68a772", "db645c96fc8be04015e0eb538afec9ae",
+ "946af3a8f5362def5f4e27cb0fd4e754", "7ad78dfe7bbedf696dd58d9ad01bcfba",
+ "f0fd9c09d454e4ce918faa97e9ac10be", "af6ae5c0eb28417bd251184baf2eaba7",
+ "866f8df540dd3b58ab1339314d139cbd", "72803589b453a29501540aeddc23e6f4",
+ "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX", "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX",
+ "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX", "aba5d5ef5e96fe418e65d20e506ea834",
+ "d70bf16e2a31e90b7b3cdeaef1494cf9", "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX",
+ "6df80bb7f264f4f285d09a4d61533fae", "c8831118d1004a7cca015a4fca140018",
+ "b7f82c140369067c105c7967c75b6f9e", "130f47aae365aabfec4360fa5b5ff554",
+ "92483ed631de21b685ffe6ccadbbec8f", "cbb6ab31547df6b91cfb48630fdffb48",
+ "1eea5e8a24d6aa11778eb3e5e5e9c9f2", "9e193b6b28ce798c44c744efde19eee9",
+ "885c384d90aaa34acd8303958033c252", "8110ed10e7234851dff3c7e4a51108a2",
+ "6fb9383302eb7e7a13387464d2634e03", "864d51fcc737bc73a3f588b67515039a",
+ "2ecb7890f00234bcb28c1d969f489012", "c4793d431dbf2d88826bb440bf027512",
+ "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX", "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX",
+ "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX", "972aeba65e8a6d20dd0f95279be2aa75",
+ "34165457282e2af2e9b3f5840e4dec5d", "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX",
+ "b8c5582b9bbb789c45471f93be83b41f", "257bf5467db570974d7cf2356bacf116",
+ "5255dded79f56b0078543b5a1814a668", "ef745100f5f34c8ff841b2b0b57eb33f",
+ "edae8ed67286ca6a31573a541b3deb6f", "01adcd8bf15fbf70df47fbf3a953aa14",
+ "ba539808a8501609ce052a1562a62b25", "ac8e6391200cec2abdebb00744a2ba82",
+ "54b17120f7d71ddb4d70590ecd231cc1", "f6e36446a97611a4db4425df926974b2",
+ "a82f4080699300b659bbe1b5c4463147", "ecedb178f7cad3dc1b921eca67f9efb6",
+ "0609ca0ff3ca90069e8b48829b4b0891", "839e86c681e97359f7819c766000dd1c",
};
+ assert(id >= 0);
+ assert(id < sizeof(kDigest) / sizeof(kDigest[0]));
return kDigest[id];
}
-#endif
-struct ConvolveTestParam {
- ConvolveTestParam(int width, int height) : width(width), height(height) {}
- int width;
- int height;
-};
+const char* GetConvolveScaleDigest10bpp(int id) {
+ // Entries containing 'XXXXX...' are skipped. See the test for details.
+ static const char* const kDigest[ConvolveTestParam::kNumBlockSizes * 2] = {
+ "27e21eb31687f9fbd0a66865fa8d7c8a", "9bff726c8e1d0998451a3b9cf2b3d8c8",
+ "661d74cfef36f12ed8d9b4c3ccb7fe0d", "5fc365fd1fcc9599dd97a885ba0c2eec",
+ "acdba2c82a6268e3c0ae8fc32be1b41f", "a5db60bbeaf56ab030ed21c42d553cf3",
+ "1228bb633f9fd63fdb998b775ca79e98", "07812c97f9f43a2a8ae07329dc488699",
+ "903525fb782119c4dfaf61b98a310c9f", "f38b51cef38b929e317861ccbc73ecd8",
+ "b78b05138e1d5fbf089144c42ce03058", "f2e227664cbf2d821b242a34fcbc9835",
+ "cb992dac70591e7d3663588ae13b9adc", "f2292d33657d939fa85ea5bacdfe39a3",
+ "7049dc742d6d8ad6f5d4309968ff281c", "e4beebde1ac335a4d92e4af94653a2ce",
+ "cc77875f98f54b9b26b5f7d9fcbc828d", "fb623f7b9e1ffcf2ae361599728a5589",
+ "c33847e47a7eda214734084640818df9", "ab3e1aec3d720c0c89c46a8d5b161b44",
+ "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX", "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX",
+ "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX", "efe4de861dcf0f7458b6208cae7e3584",
+ "814751c55fa84f0fed94ff15fc30fc24", "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX",
+ "31a63fe47297102937acbe7a328588b7", "b804a0a24633243f7da48d7a5f51c0bf",
+ "cb492672b005fc378cccc8c03003cd4a", "1d18732bcf2ea487e84579489cc59a22",
+ "457c4b3ec38a8d6c210584ade1a9fae2", "a3afdd468e6a5238a3dbd2cc21c11c9e",
+ "6ff8a16f21d6e8a9741dacf0734ae563", "3ffa29ef7e54e51f6849c9a3d3c79d03",
+ "af89899b083cf269ac1bd988aeb15b15", "3365d8411c11081fb228436238b9a671",
+ "3ba56d30f5f81d7098f356635a58b9af", "b3013776900c6520bd30f868e8c963b6",
+ "81febaa7342692483040f500ba2e5e2b", "4a51ff1d9a4a68687d590b41aa7835a3",
+ };
+ assert(id >= 0);
+ assert(id < sizeof(kDigest) / sizeof(kDigest[0]));
+ return kDigest[id];
+}
+#endif // LIBGAV1_MAX_BITDEPTH >= 10
struct ConvolveTypeParam {
ConvolveTypeParam(bool is_intra_block_copy, bool is_compound,
@@ -743,12 +442,10 @@ std::ostream& operator<<(std::ostream& os, const ConvolveTypeParam& param) {
<< param.has_vertical_filter << "/" << param.has_horizontal_filter;
}
-// TODO(b/146062680): split this to ConvolveTest and ConvolveScaleTest to
-// simplify the members and test logic.
+//------------------------------------------------------------------------------
template <int bitdepth, typename Pixel>
-class ConvolveTest
- : public testing::TestWithParam<
- std::tuple<ConvolveTestParam, ConvolveTypeParam, bool>> {
+class ConvolveTest : public testing::TestWithParam<
+ std::tuple<ConvolveTypeParam, ConvolveTestParam>> {
public:
ConvolveTest() = default;
~ConvolveTest() override = default;
@@ -758,14 +455,13 @@ class ConvolveTest
const Dsp* const dsp = GetDspTable(bitdepth);
ASSERT_NE(dsp, nullptr);
- GetConvolveFuncs(dsp, &base_convolve_func_, &base_convolve_scale_func_);
+ GetConvolveFunc(dsp, &base_convolve_func_);
const testing::TestInfo* const test_info =
testing::UnitTest::GetInstance()->current_test_info();
const absl::string_view test_case = test_info->test_suite_name();
if (absl::StartsWith(test_case, "C/")) {
base_convolve_func_ = nullptr;
- base_convolve_scale_func_ = nullptr;
} else if (absl::StartsWith(test_case, "SSE41/")) {
if ((GetCpuInfo() & kSSE4_1) != 0) {
ConvolveInit_SSE4_1();
@@ -776,59 +472,37 @@ class ConvolveTest
}
} else if (absl::StartsWith(test_case, "NEON/")) {
ConvolveInit_NEON();
+#if LIBGAV1_MAX_BITDEPTH >= 10
+ ConvolveInit10bpp_NEON();
+#endif
} else {
FAIL() << "Unrecognized architecture prefix in test case name: "
<< test_case;
}
- GetConvolveFuncs(dsp, &cur_convolve_func_, &cur_convolve_scale_func_);
+ GetConvolveFunc(dsp, &cur_convolve_func_);
// Skip functions that have not been specialized for this particular
// architecture.
if (cur_convolve_func_ == base_convolve_func_) {
cur_convolve_func_ = nullptr;
}
- if (cur_convolve_scale_func_ == base_convolve_scale_func_) {
- cur_convolve_scale_func_ = nullptr;
- }
}
protected:
int GetDigestId() const {
- // id is the combination of the 3-dimension array:
- // (param_, type_param_, is_scaled_convolve_)
- // The number of each array is 20, 16, 2.
- // The range of id is from 0 to 20x16x2 - 1.
- // is_scaled_convolve_: false, id += 0; true, id += 1;
- // type_param_: (0, 0, 0, 0), id += 0 * 2.
- // (0, 0, 0, 1), id += 1 * 2; (0, 0, 1, 0), id += 2 * 2;
- // ...
- // param_: (2, 2), id += 0 * 32; (2, 4), id += 1 * 32;
- // (4, 2), id += 2 * 32; (4, 4), id += 3 * 32;
- // ...
- int id = static_cast<int>(is_scaled_convolve_);
- id += 2 * static_cast<int>(type_param_.has_horizontal_filter);
- id += 2 * 2 * static_cast<int>(type_param_.has_vertical_filter);
- id += 2 * 4 * static_cast<int>(type_param_.is_compound);
- id += 2 * 8 * static_cast<int>(type_param_.is_intra_block_copy);
- if (param_.width == param_.height) {
- id += 32 * 3 * static_cast<int>(std::log2(param_.width) - 1);
- } else if (param_.width < param_.height) {
- id += 32 * (1 + 3 * static_cast<int>(std::log2(param_.width) - 1));
- } else {
- // param_.width > param_.height
- if (param_.width == 8 && param_.height == 2) {
- // Special case is at the end of the array.
- id += 32 * 19;
- } else {
- id += 32 * (2 + 3 * static_cast<int>(std::log2(param_.height) - 1));
- }
- }
+ int id = param_.block_size;
+ id += param_.kNumBlockSizes *
+ static_cast<int>(type_param_.has_horizontal_filter);
+ id += 2 * param_.kNumBlockSizes *
+ static_cast<int>(type_param_.has_vertical_filter);
+ id += 4 * param_.kNumBlockSizes * static_cast<int>(type_param_.is_compound);
+ id += 8 * param_.kNumBlockSizes *
+ static_cast<int>(type_param_.is_intra_block_copy);
return id;
}
- void GetConvolveFuncs(const Dsp* dsp, ConvolveFunc* func,
- ConvolveScaleFunc* scale_func);
+ void GetConvolveFunc(const Dsp* dsp, ConvolveFunc* func);
void SetInputData(bool use_fixed_values, int value);
void Check(bool use_fixed_values, const Pixel* src, const Pixel* dest,
libvpx_test::MD5* md5_digest);
@@ -839,44 +513,30 @@ class ConvolveTest
void Test(bool use_fixed_values, int value,
int num_runs = kMinimumViableRuns);
- const ConvolveTestParam param_ = std::get<0>(GetParam());
- const ConvolveTypeParam type_param_ = std::get<1>(GetParam());
- const bool is_scaled_convolve_ = std::get<2>(GetParam());
+ const ConvolveTypeParam type_param_ = std::get<0>(GetParam());
+ const ConvolveTestParam param_ = std::get<1>(GetParam());
private:
ConvolveFunc base_convolve_func_;
ConvolveFunc cur_convolve_func_;
- ConvolveScaleFunc base_convolve_scale_func_;
- ConvolveScaleFunc cur_convolve_scale_func_;
- // Convolve filters are 7-tap, which needs 3 pixels (kRestorationBoder)
- // padding.
- // When is_scaled_convolve_ is true, the source can be at most 2 times of
- // max width/height. So we allocate a larger memory for it and setup the
- // extra memory when is_scaled_convolve_ is true.
- Pixel source_[kMaxBlockHeight * kMaxBlockWidth * 4] = {};
- uint16_t source_16bit_[kMaxBlockHeight * kMaxBlockWidth * 4] = {};
+ // Convolve filters are 7-tap, which need 3 pixels
+ // (kRestorationHorizontalBorder) padding.
+ Pixel source_[kMaxBlockHeight * kMaxBlockWidth] = {};
+ uint16_t source_16bit_[kMaxBlockHeight * kMaxBlockWidth] = {};
uint16_t dest_16bit_[kMaxBlockHeight * kMaxBlockWidth] = {};
Pixel dest_clipped_[kMaxBlockHeight * kMaxBlockWidth] = {};
- const int source_stride_ =
- is_scaled_convolve_ ? kMaxBlockWidth * 2 : kMaxBlockWidth;
- const int source_height_ =
- is_scaled_convolve_ ? kMaxBlockHeight * 2 : kMaxBlockHeight;
+ const int source_stride_ = kMaxBlockWidth;
+ const int source_height_ = kMaxBlockHeight;
};
template <int bitdepth, typename Pixel>
-void ConvolveTest<bitdepth, Pixel>::GetConvolveFuncs(
- const Dsp* const dsp, ConvolveFunc* func, ConvolveScaleFunc* scale_func) {
- if (is_scaled_convolve_) {
- *func = nullptr;
- *scale_func = dsp->convolve_scale[type_param_.is_compound];
- } else {
- *scale_func = nullptr;
- *func =
- dsp->convolve[type_param_.is_intra_block_copy][type_param_.is_compound]
- [type_param_.has_vertical_filter]
- [type_param_.has_horizontal_filter];
- }
+void ConvolveTest<bitdepth, Pixel>::GetConvolveFunc(const Dsp* const dsp,
+ ConvolveFunc* func) {
+ *func =
+ dsp->convolve[type_param_.is_intra_block_copy][type_param_.is_compound]
+ [type_param_.has_vertical_filter]
+ [type_param_.has_horizontal_filter];
}
template <int bitdepth, typename Pixel>
@@ -889,8 +549,8 @@ void ConvolveTest<bitdepth, Pixel>::SetInputData(bool use_fixed_values,
kConvolveBorderLeftTop * source_stride_ + kConvolveBorderLeftTop;
const int mask = (1 << bitdepth) - 1;
libvpx_test::ACMRandom rnd(libvpx_test::ACMRandom::DeterministicSeed());
- const int height = is_scaled_convolve_ ? param_.height * 2 : param_.height;
- const int width = is_scaled_convolve_ ? param_.width * 2 : param_.width;
+ const int height = param_.height;
+ const int width = param_.width;
for (int y = 0; y < height; ++y) {
for (int x = 0; x < width; ++x) {
source_[y * source_stride_ + x + offset] = rnd.Rand16() & mask;
@@ -956,18 +616,10 @@ void ConvolveTest<bitdepth, Pixel>::Check16Bit(bool use_fixed_values,
}
template <int bitdepth, typename Pixel>
-void ConvolveTest<bitdepth, Pixel>::Test(bool use_fixed_values, int value,
- int num_runs /*= 16 * 6*/) {
+void ConvolveTest<bitdepth, Pixel>::Test(
+ bool use_fixed_values, int value, int num_runs /*= kMinimumViableRuns*/) {
// There's no meaning testing fixed input in compound convolve.
- if (type_param_.is_compound && use_fixed_values) GTEST_SKIP();
-
- // Scaled convolve does not behave differently under most params. Only need to
- // test the enabled compound implementation.
- if (is_scaled_convolve_ &&
- (type_param_.is_intra_block_copy || type_param_.has_vertical_filter ||
- type_param_.has_horizontal_filter)) {
- GTEST_SKIP();
- }
+ if (type_param_.is_compound && use_fixed_values) return;
// There should not be any function set for this combination.
if (type_param_.is_intra_block_copy && type_param_.is_compound) {
@@ -984,26 +636,18 @@ void ConvolveTest<bitdepth, Pixel>::Test(bool use_fixed_values, int value,
}
// Skip unspecialized functions.
- if (cur_convolve_func_ == nullptr && cur_convolve_scale_func_ == nullptr) {
+ if (cur_convolve_func_ == nullptr) {
GTEST_SKIP();
}
SetInputData(use_fixed_values, value);
- libvpx_test::ACMRandom rnd(libvpx_test::ACMRandom::DeterministicSeed() +
- GetDigestId());
- // [1,2048] for |step_[xy]|. This covers a scaling range of 1/1024 to 2x.
- const int step_x = (rnd.Rand16() & ((1 << 11) - 1)) + 1;
- const int step_y = (rnd.Rand16() & ((1 << 11) - 1)) + 1;
int subpixel_x = 0;
int subpixel_y = 0;
int vertical_index = 0;
int horizontal_index = 0;
const int offset =
kConvolveBorderLeftTop * kMaxBlockWidth + kConvolveBorderLeftTop;
- const int offset_scale =
- kConvolveBorderLeftTop * source_stride_ + kConvolveBorderLeftTop;
const Pixel* const src = source_ + offset;
- const Pixel* const src_scale = source_ + offset_scale;
const ptrdiff_t src_stride = source_stride_ * sizeof(Pixel);
const ptrdiff_t src_stride_16 = source_stride_;
const ptrdiff_t dst_stride = kMaxBlockWidth * sizeof(Pixel);
@@ -1041,22 +685,7 @@ void ConvolveTest<bitdepth, Pixel>::Test(bool use_fixed_values, int value,
horizontal_index += static_cast<int>(i % 16 == 0);
horizontal_index %= 4;
- if (is_scaled_convolve_) {
- ASSERT_EQ(cur_convolve_func_, nullptr);
- // Output type is uint16_t.
- const absl::Time start = absl::Now();
- if (type_param_.is_compound) {
- cur_convolve_scale_func_(
- source_, src_stride, horizontal_index, vertical_index, 0, 0, step_x,
- step_y, param_.width, param_.height, dst_16, dst_stride_compound);
- } else {
- cur_convolve_scale_func_(
- source_, src_stride, horizontal_index, vertical_index, 0, 0, step_x,
- step_y, param_.width, param_.height, dst_pixel, dst_stride);
- }
- elapsed_time += absl::Now() - start;
- } else if (type_param_.is_compound) {
- ASSERT_EQ(cur_convolve_scale_func_, nullptr);
+ if (type_param_.is_compound) {
// Output type is uint16_t.
const absl::Time start = absl::Now();
cur_convolve_func_(src, src_stride, horizontal_index, vertical_index,
@@ -1064,7 +693,6 @@ void ConvolveTest<bitdepth, Pixel>::Test(bool use_fixed_values, int value,
param_.height, dst_16, dst_stride_compound);
elapsed_time += absl::Now() - start;
} else {
- ASSERT_EQ(cur_convolve_scale_func_, nullptr);
// Output type is Pixel.
const absl::Time start = absl::Now();
cur_convolve_func_(src, src_stride, horizontal_index, vertical_index,
@@ -1077,30 +705,7 @@ void ConvolveTest<bitdepth, Pixel>::Test(bool use_fixed_values, int value,
// runs for speed timing.
if (i >= kMinimumViableRuns) continue;
- if (is_scaled_convolve_) {
- // Convolve function does not clip the output. The clipping is applied
- // later. But libaom clips the output. So we apply clipping to match
- // libaom in tests.
- if (type_param_.is_compound) {
- const int single_round_offset = (1 << bitdepth) + (1 << (bitdepth - 1));
- Pixel* dest_row = dest_clipped_;
- for (int y = 0; y < kMaxBlockHeight; ++y) {
- for (int x = 0; x < kMaxBlockWidth; ++x) {
- dest_row[x] = static_cast<Pixel>(Clip3(
- dest_16bit_[y * dst_stride_compound + x] - single_round_offset,
- 0, (1 << bitdepth) - 1));
- }
- dest_row += kMaxBlockWidth;
- }
- }
-
- if (type_param_.is_compound) {
- Check16Bit(use_fixed_values, source_16bit_ + offset_scale, dst_16,
- &md5_digest);
- } else {
- Check(use_fixed_values, src_scale, dst_pixel, &md5_digest);
- }
- } else if (type_param_.is_compound) {
+ if (type_param_.is_compound) {
// Need to copy source to a uint16_t buffer for comparison.
Pixel* src_ptr = source_;
uint16_t* src_ptr_16 = source_16bit_;
@@ -1122,15 +727,14 @@ void ConvolveTest<bitdepth, Pixel>::Test(bool use_fixed_values, int value,
// md5 sums are only calculated for random input.
const char* ref_digest;
if (bitdepth == 8) {
- ref_digest = GetDigest8bpp(GetDigestId());
+ ref_digest = GetConvolveDigest8bpp(GetDigestId());
} else {
#if LIBGAV1_MAX_BITDEPTH >= 10
- ref_digest = GetDigest10bpp(GetDigestId());
+ ref_digest = GetConvolveDigest10bpp(GetDigestId());
#endif // LIBGAV1_MAX_BITDEPTH >= 10
}
const char* direction;
- if (is_scaled_convolve_ || (type_param_.has_vertical_filter &&
- type_param_.has_horizontal_filter)) {
+ if (type_param_.has_vertical_filter && type_param_.has_horizontal_filter) {
direction = "2D";
} else if (type_param_.has_vertical_filter) {
direction = "Vertical";
@@ -1141,10 +745,9 @@ void ConvolveTest<bitdepth, Pixel>::Test(bool use_fixed_values, int value,
}
const auto elapsed_time_us =
static_cast<int>(absl::ToInt64Microseconds(elapsed_time));
- printf("Mode Convolve%s%s%s%s[%25s]: %5d us MD5: %s\n",
+ printf("Mode Convolve%s%s%s[%25s]: %5d us MD5: %s\n",
type_param_.is_compound ? "Compound" : "",
- type_param_.is_intra_block_copy ? "IntraBlockCopy" : "",
- is_scaled_convolve_ ? "Scale" : "", direction,
+ type_param_.is_intra_block_copy ? "IntraBlockCopy" : "", direction,
absl::StrFormat("%dx%d", param_.width, param_.height).c_str(),
elapsed_time_us, md5_digest.Get());
EXPECT_STREQ(ref_digest, md5_digest.Get());
@@ -1210,7 +813,7 @@ void ShowRange() {
assert(max > INT16_MAX && max < INT32_MAX);
}
- printf(" intermediate range: [%8d, %8d]\n", min, max);
+ printf(" Horizontal upscaled range: [%8d, %8d]\n", min, max);
const int first_pass_min = RightShiftWithRounding(min, horizontal_bits);
const int first_pass_max = RightShiftWithRounding(max, horizontal_bits);
@@ -1219,7 +822,7 @@ void ShowRange() {
assert(first_pass_min > INT16_MIN);
assert(first_pass_max < INT16_MAX);
- printf(" first pass output range: [%8d, %8d]\n", first_pass_min,
+ printf(" Horizontal downscaled range: [%8d, %8d]\n", first_pass_min,
first_pass_max);
// Second pass.
@@ -1230,14 +833,14 @@ void ShowRange() {
assert(min < INT16_MIN && min > INT32_MIN);
assert(max > INT16_MAX && max < INT32_MAX);
- printf(" intermediate range: [%8d, %8d]\n", min, max);
+ printf(" Vertical upscaled range: [%8d, %8d]\n", min, max);
// Second pass non-compound output is clipped to Pixel values.
const int second_pass_min =
Clip3(RightShiftWithRounding(min, vertical_bits), 0, max_input);
const int second_pass_max =
Clip3(RightShiftWithRounding(max, vertical_bits), 0, max_input);
- printf(" second pass output range: [%8d, %8d]\n", second_pass_min,
+ printf(" Pixel output range: [%8d, %8d]\n", second_pass_min,
second_pass_max);
// Output is Pixel so matches Pixel values.
@@ -1249,7 +852,7 @@ void ShowRange() {
const int compound_second_pass_max =
RightShiftWithRounding(max, compound_vertical_bits) + compound_offset;
- printf(" compound second pass output range: [%8d, %8d]\n",
+ printf(" Compound output range: [%8d, %8d]\n",
compound_second_pass_min, compound_second_pass_max);
if (bitdepth == 8) {
@@ -1287,17 +890,333 @@ TEST_P(ConvolveTest8bpp, DISABLED_Speed) {
Test(false, 0, num_runs);
}
+//------------------------------------------------------------------------------
+template <int bitdepth, typename Pixel>
+class ConvolveScaleTest
+ : public testing::TestWithParam<
+ std::tuple<bool /*is_compound*/, ConvolveTestParam>> {
+ public:
+ ConvolveScaleTest() = default;
+ ~ConvolveScaleTest() override = default;
+
+ void SetUp() override {
+ ConvolveInit_C();
+
+ const Dsp* const dsp = GetDspTable(bitdepth);
+ ASSERT_NE(dsp, nullptr);
+ base_convolve_scale_func_ = dsp->convolve_scale[is_compound_];
+
+ const testing::TestInfo* const test_info =
+ testing::UnitTest::GetInstance()->current_test_info();
+ const absl::string_view test_case = test_info->test_suite_name();
+ if (absl::StartsWith(test_case, "C/")) {
+ base_convolve_scale_func_ = nullptr;
+ } else if (absl::StartsWith(test_case, "SSE41/")) {
+ if ((GetCpuInfo() & kSSE4_1) != 0) {
+ ConvolveInit_SSE4_1();
+ }
+ } else if (absl::StartsWith(test_case, "AVX2/")) {
+ if ((GetCpuInfo() & kAVX2) != 0) {
+ ConvolveInit_AVX2();
+ }
+ } else if (absl::StartsWith(test_case, "NEON/")) {
+ ConvolveInit_NEON();
+#if LIBGAV1_MAX_BITDEPTH >= 10
+ ConvolveInit10bpp_NEON();
+#endif
+ } else {
+ FAIL() << "Unrecognized architecture prefix in test case name: "
+ << test_case;
+ }
+
+ cur_convolve_scale_func_ = dsp->convolve_scale[is_compound_];
+
+ // Skip functions that have not been specialized for this particular
+ // architecture.
+ if (cur_convolve_scale_func_ == base_convolve_scale_func_) {
+ cur_convolve_scale_func_ = nullptr;
+ }
+ }
+
+ protected:
+ int GetDigestId() const {
+ return param_.block_size +
+ param_.kNumBlockSizes * static_cast<int>(is_compound_);
+ }
+
+ void SetInputData(bool use_fixed_values, int value);
+ void Check(bool use_fixed_values, const Pixel* src, const Pixel* dest,
+ libvpx_test::MD5* md5_digest);
+ void Check16Bit(bool use_fixed_values, const uint16_t* src,
+ const uint16_t* dest, libvpx_test::MD5* md5_digest);
+ // |num_runs| covers the categories of filters (6) and the number of filters
+ // under each category (16).
+ void Test(bool use_fixed_values, int value,
+ int num_runs = kMinimumViableRuns);
+
+ const bool is_compound_ = std::get<0>(GetParam());
+ const ConvolveTestParam param_ = std::get<1>(GetParam());
+
+ private:
+ ConvolveScaleFunc base_convolve_scale_func_;
+ ConvolveScaleFunc cur_convolve_scale_func_;
+ // Convolve filters are 7-tap, which need 3 pixels
+ // (kRestorationHorizontalBorder) padding.
+ // The source can be at most 2 times of max width/height.
+ Pixel source_[kMaxBlockHeight * kMaxBlockWidth * 4] = {};
+ uint16_t source_16bit_[kMaxBlockHeight * kMaxBlockWidth * 4] = {};
+ uint16_t dest_16bit_[kMaxBlockHeight * kMaxBlockWidth] = {};
+ Pixel dest_clipped_[kMaxBlockHeight * kMaxBlockWidth] = {};
+
+ const int source_stride_ = kMaxBlockWidth * 2;
+ const int source_height_ = kMaxBlockHeight * 2;
+};
+
+template <int bitdepth, typename Pixel>
+void ConvolveScaleTest<bitdepth, Pixel>::SetInputData(bool use_fixed_values,
+ int value) {
+ if (use_fixed_values) {
+ std::fill(source_, source_ + source_height_ * source_stride_, value);
+ } else {
+ const int offset =
+ kConvolveBorderLeftTop * source_stride_ + kConvolveBorderLeftTop;
+ const int mask = (1 << bitdepth) - 1;
+ libvpx_test::ACMRandom rnd(libvpx_test::ACMRandom::DeterministicSeed());
+ const int height = param_.height * 2;
+ const int width = param_.width * 2;
+ for (int y = 0; y < height; ++y) {
+ for (int x = 0; x < width; ++x) {
+ source_[y * source_stride_ + x + offset] = rnd.Rand16() & mask;
+ }
+ }
+ // Copy border pixels to the left and right borders.
+ for (int y = 0; y < height; ++y) {
+ Memset(&source_[(y + kConvolveBorderLeftTop) * source_stride_],
+ source_[y * source_stride_ + offset], kConvolveBorderLeftTop);
+ Memset(&source_[y * source_stride_ + offset + width],
+ source_[y * source_stride_ + offset + width - 1],
+ kConvolveBorderLeftTop);
+ }
+ // Copy border pixels to the top and bottom borders.
+ for (int y = 0; y < kConvolveBorderLeftTop; ++y) {
+ memcpy(&source_[y * source_stride_],
+ &source_[kConvolveBorderLeftTop * source_stride_],
+ source_stride_ * sizeof(Pixel));
+ memcpy(&source_[(y + kConvolveBorderLeftTop + height) * source_stride_],
+ &source_[(kConvolveBorderLeftTop + height - 1) * source_stride_],
+ source_stride_ * sizeof(Pixel));
+ }
+ }
+}
+
+template <int bitdepth, typename Pixel>
+void ConvolveScaleTest<bitdepth, Pixel>::Check(bool use_fixed_values,
+ const Pixel* src,
+ const Pixel* dest,
+ libvpx_test::MD5* md5_digest) {
+ if (use_fixed_values) {
+ // For fixed values, input and output are identical.
+ const bool success =
+ test_utils::CompareBlocks(src, dest, param_.width, param_.height,
+ kMaxBlockWidth, kMaxBlockWidth, false, false);
+ EXPECT_TRUE(success);
+ } else {
+ // For random input, compare md5.
+ const int offset =
+ kConvolveBorderLeftTop * kMaxBlockWidth + kConvolveBorderLeftTop;
+ const size_t size = sizeof(dest_clipped_) - offset * sizeof(Pixel);
+ md5_digest->Add(reinterpret_cast<const uint8_t*>(dest), size);
+ }
+}
+
+template <int bitdepth, typename Pixel>
+void ConvolveScaleTest<bitdepth, Pixel>::Check16Bit(
+ bool use_fixed_values, const uint16_t* src, const uint16_t* dest,
+ libvpx_test::MD5* md5_digest) {
+ if (use_fixed_values) {
+ // For fixed values, input and output are identical.
+ const bool success =
+ test_utils::CompareBlocks(src, dest, param_.width, param_.height,
+ kMaxBlockWidth, kMaxBlockWidth, false);
+ EXPECT_TRUE(success);
+ } else {
+ // For random input, compare md5.
+ const int offset =
+ kConvolveBorderLeftTop * kMaxBlockWidth + kConvolveBorderLeftTop;
+ const size_t size = sizeof(dest_16bit_) - offset * sizeof(uint16_t);
+ md5_digest->Add(reinterpret_cast<const uint8_t*>(dest), size);
+ }
+}
+
+template <int bitdepth, typename Pixel>
+void ConvolveScaleTest<bitdepth, Pixel>::Test(
+ bool use_fixed_values, int value, int num_runs /*= kMinimumViableRuns*/) {
+ // There's no meaning testing fixed input in compound convolve.
+ if (is_compound_ && use_fixed_values) return;
+
+ // The compound function is only used for blocks 4x4 or greater.
+ if (is_compound_) {
+ if (param_.width < 4 || param_.height < 4) {
+ GTEST_SKIP();
+ }
+ }
+
+ // Skip unspecialized functions.
+ if (cur_convolve_scale_func_ == nullptr) {
+ GTEST_SKIP();
+ }
+
+ SetInputData(use_fixed_values, value);
+ libvpx_test::ACMRandom rnd(libvpx_test::ACMRandom::DeterministicSeed() +
+ GetDigestId());
+ // [1,2048] for |step_[xy]|. This covers a scaling range of 1/1024 to 2x.
+ const int step_x = (rnd.Rand16() & ((1 << 11) - 1)) + 1;
+ const int step_y = (rnd.Rand16() & ((1 << 11) - 1)) + 1;
+ int subpixel_x = 0;
+ int subpixel_y = 0;
+ int vertical_index = 0;
+ int horizontal_index = 0;
+ const int offset =
+ kConvolveBorderLeftTop * kMaxBlockWidth + kConvolveBorderLeftTop;
+ const int offset_scale =
+ kConvolveBorderLeftTop * source_stride_ + kConvolveBorderLeftTop;
+ const Pixel* const src_scale = source_ + offset_scale;
+ const ptrdiff_t src_stride = source_stride_ * sizeof(Pixel);
+ const ptrdiff_t dst_stride = kMaxBlockWidth * sizeof(Pixel);
+ // Pack Compound output since we control the predictor buffer.
+ const ptrdiff_t dst_stride_compound = param_.width;
+
+ // Output is always 16 bits regardless of |bitdepth|.
+ uint16_t* dst_16 = dest_16bit_ + offset;
+ // Output depends on |bitdepth|.
+ Pixel* dst_pixel = dest_clipped_ + offset;
+
+ // Collect the first |kMinimumViableRuns| into one md5 buffer.
+ libvpx_test::MD5 md5_digest;
+
+ absl::Duration elapsed_time;
+ for (int i = 0; i < num_runs; ++i) {
+ // Test every filter.
+ // Because of masking |subpixel_{x,y}| values roll over every 16 iterations.
+ subpixel_x += 1 << 6;
+ subpixel_y += 1 << 6;
+
+ const int horizontal_filter_id = (subpixel_x >> 6) & 0xF;
+ const int vertical_filter_id = (subpixel_y >> 6) & 0xF;
+
+ // |filter_id| == 0 (copy) must be handled by the appropriate 1D or copy
+ // function.
+ if (horizontal_filter_id == 0 || vertical_filter_id == 0) {
+ continue;
+ }
+
+ // For focused speed testing these can be set to the desired filter. Want
+ // only 8 tap filters? Set |{vertical,horizontal}_index| to 2.
+ vertical_index += static_cast<int>(i % 16 == 0);
+ vertical_index %= 4;
+ horizontal_index += static_cast<int>(i % 16 == 0);
+ horizontal_index %= 4;
+
+ // Output type is uint16_t.
+ const absl::Time start = absl::Now();
+ if (is_compound_) {
+ cur_convolve_scale_func_(
+ source_, src_stride, horizontal_index, vertical_index, 0, 0, step_x,
+ step_y, param_.width, param_.height, dst_16, dst_stride_compound);
+ } else {
+ cur_convolve_scale_func_(
+ source_, src_stride, horizontal_index, vertical_index, 0, 0, step_x,
+ step_y, param_.width, param_.height, dst_pixel, dst_stride);
+ }
+ elapsed_time += absl::Now() - start;
+
+ // Only check the output for the first set. After that it's just repeated
+ // runs for speed timing.
+ if (i >= kMinimumViableRuns) continue;
+
+ // Convolve function does not clip the output. The clipping is applied
+ // later, but libaom clips the output. So we apply clipping to match
+ // libaom in tests.
+ if (is_compound_) {
+ const int single_round_offset = (1 << bitdepth) + (1 << (bitdepth - 1));
+ Pixel* dest_row = dest_clipped_;
+ for (int y = 0; y < kMaxBlockHeight; ++y) {
+ for (int x = 0; x < kMaxBlockWidth; ++x) {
+ dest_row[x] = static_cast<Pixel>(Clip3(
+ dest_16bit_[y * dst_stride_compound + x] - single_round_offset, 0,
+ (1 << bitdepth) - 1));
+ }
+ dest_row += kMaxBlockWidth;
+ }
+ }
+
+ if (is_compound_) {
+ Check16Bit(use_fixed_values, source_16bit_ + offset_scale, dst_16,
+ &md5_digest);
+ } else {
+ Check(use_fixed_values, src_scale, dst_pixel, &md5_digest);
+ }
+ }
+
+ if (!use_fixed_values) {
+ // md5 sums are only calculated for random input.
+ const char* ref_digest;
+ if (bitdepth == 8) {
+ ref_digest = GetConvolveScaleDigest8bpp(GetDigestId());
+ } else {
+#if LIBGAV1_MAX_BITDEPTH >= 10
+ ref_digest = GetConvolveScaleDigest10bpp(GetDigestId());
+#endif // LIBGAV1_MAX_BITDEPTH >= 10
+ }
+
+ const auto elapsed_time_us =
+ static_cast<int>(absl::ToInt64Microseconds(elapsed_time));
+ printf("Mode Convolve%sScale2D[%25s]: %5d us MD5: %s\n",
+ is_compound_ ? "Compound" : "",
+ absl::StrFormat("%dx%d", param_.width, param_.height).c_str(),
+ elapsed_time_us, md5_digest.Get());
+ EXPECT_STREQ(ref_digest, md5_digest.Get());
+ }
+}
+
+using ConvolveScaleTest8bpp = ConvolveScaleTest<8, uint8_t>;
+
+TEST_P(ConvolveScaleTest8bpp, FixedValues) {
+ Test(true, 0);
+ Test(true, 1);
+ Test(true, 128);
+ Test(true, 255);
+}
+
+TEST_P(ConvolveScaleTest8bpp, RandomValues) { Test(false, 0); }
+
+TEST_P(ConvolveScaleTest8bpp, DISABLED_Speed) {
+ const int num_runs = static_cast<int>(1.0e7 / (param_.width * param_.height));
+ Test(false, 0, num_runs);
+}
+
+//------------------------------------------------------------------------------
const ConvolveTestParam kConvolveParam[] = {
- ConvolveTestParam(2, 2), ConvolveTestParam(2, 4),
- ConvolveTestParam(4, 2), ConvolveTestParam(4, 4),
- ConvolveTestParam(4, 8), ConvolveTestParam(8, 2),
- ConvolveTestParam(8, 4), ConvolveTestParam(8, 8),
- ConvolveTestParam(8, 16), ConvolveTestParam(16, 8),
- ConvolveTestParam(16, 16), ConvolveTestParam(16, 32),
- ConvolveTestParam(32, 16), ConvolveTestParam(32, 32),
- ConvolveTestParam(32, 64), ConvolveTestParam(64, 32),
- ConvolveTestParam(64, 64), ConvolveTestParam(64, 128),
- ConvolveTestParam(128, 64), ConvolveTestParam(128, 128),
+ ConvolveTestParam(ConvolveTestParam::kBlockSize2x2),
+ ConvolveTestParam(ConvolveTestParam::kBlockSize2x4),
+ ConvolveTestParam(ConvolveTestParam::kBlockSize4x2),
+ ConvolveTestParam(ConvolveTestParam::kBlockSize4x4),
+ ConvolveTestParam(ConvolveTestParam::kBlockSize4x8),
+ ConvolveTestParam(ConvolveTestParam::kBlockSize8x2),
+ ConvolveTestParam(ConvolveTestParam::kBlockSize8x4),
+ ConvolveTestParam(ConvolveTestParam::kBlockSize8x8),
+ ConvolveTestParam(ConvolveTestParam::kBlockSize8x16),
+ ConvolveTestParam(ConvolveTestParam::kBlockSize16x8),
+ ConvolveTestParam(ConvolveTestParam::kBlockSize16x16),
+ ConvolveTestParam(ConvolveTestParam::kBlockSize16x32),
+ ConvolveTestParam(ConvolveTestParam::kBlockSize32x16),
+ ConvolveTestParam(ConvolveTestParam::kBlockSize32x32),
+ ConvolveTestParam(ConvolveTestParam::kBlockSize32x64),
+ ConvolveTestParam(ConvolveTestParam::kBlockSize64x32),
+ ConvolveTestParam(ConvolveTestParam::kBlockSize64x64),
+ ConvolveTestParam(ConvolveTestParam::kBlockSize64x128),
+ ConvolveTestParam(ConvolveTestParam::kBlockSize128x64),
+ ConvolveTestParam(ConvolveTestParam::kBlockSize128x128),
};
const ConvolveTypeParam kConvolveTypeParam[] = {
@@ -1313,36 +1232,43 @@ const ConvolveTypeParam kConvolveTypeParam[] = {
ConvolveTypeParam(true, false, false, true),
ConvolveTypeParam(true, false, true, false),
ConvolveTypeParam(true, false, true, true),
+ // This is left to ensure no function exists for |intra_block_copy| when
+ // |is_compound| is true; all combinations aren't necessary.
ConvolveTypeParam(true, true, false, false),
- ConvolveTypeParam(true, true, false, true),
- ConvolveTypeParam(true, true, true, false),
- ConvolveTypeParam(true, true, true, true),
};
INSTANTIATE_TEST_SUITE_P(C, ConvolveTest8bpp,
- testing::Combine(testing::ValuesIn(kConvolveParam),
- testing::ValuesIn(kConvolveTypeParam),
- testing::Bool()));
+ testing::Combine(testing::ValuesIn(kConvolveTypeParam),
+ testing::ValuesIn(kConvolveParam)));
+INSTANTIATE_TEST_SUITE_P(C, ConvolveScaleTest8bpp,
+ testing::Combine(testing::Bool(),
+ testing::ValuesIn(kConvolveParam)));
#if LIBGAV1_ENABLE_NEON
INSTANTIATE_TEST_SUITE_P(NEON, ConvolveTest8bpp,
- testing::Combine(testing::ValuesIn(kConvolveParam),
- testing::ValuesIn(kConvolveTypeParam),
- testing::Bool()));
+ testing::Combine(testing::ValuesIn(kConvolveTypeParam),
+ testing::ValuesIn(kConvolveParam)));
+INSTANTIATE_TEST_SUITE_P(NEON, ConvolveScaleTest8bpp,
+ testing::Combine(testing::Bool(),
+ testing::ValuesIn(kConvolveParam)));
#endif // LIBGAV1_ENABLE_NEON
#if LIBGAV1_ENABLE_SSE4_1
INSTANTIATE_TEST_SUITE_P(SSE41, ConvolveTest8bpp,
- testing::Combine(testing::ValuesIn(kConvolveParam),
- testing::ValuesIn(kConvolveTypeParam),
- testing::Bool()));
+ testing::Combine(testing::ValuesIn(kConvolveTypeParam),
+ testing::ValuesIn(kConvolveParam)));
+INSTANTIATE_TEST_SUITE_P(SSE41, ConvolveScaleTest8bpp,
+ testing::Combine(testing::Bool(),
+ testing::ValuesIn(kConvolveParam)));
#endif // LIBGAV1_ENABLE_SSE4_1
#if LIBGAV1_ENABLE_AVX2
INSTANTIATE_TEST_SUITE_P(AVX2, ConvolveTest8bpp,
- testing::Combine(testing::ValuesIn(kConvolveParam),
- testing::ValuesIn(kConvolveTypeParam),
- testing::Bool()));
+ testing::Combine(testing::ValuesIn(kConvolveTypeParam),
+ testing::ValuesIn(kConvolveParam)));
+INSTANTIATE_TEST_SUITE_P(AVX2, ConvolveScaleTest8bpp,
+ testing::Combine(testing::Bool(),
+ testing::ValuesIn(kConvolveParam)));
#endif // LIBGAV1_ENABLE_AVX2
#if LIBGAV1_MAX_BITDEPTH >= 10
@@ -1362,10 +1288,38 @@ TEST_P(ConvolveTest10bpp, DISABLED_Speed) {
Test(false, 0, num_runs);
}
+using ConvolveScaleTest10bpp = ConvolveScaleTest<10, uint16_t>;
+
+TEST_P(ConvolveScaleTest10bpp, FixedValues) {
+ Test(true, 0);
+ Test(true, 1);
+ Test(true, 128);
+ Test(true, (1 << 10) - 1);
+}
+
+TEST_P(ConvolveScaleTest10bpp, RandomValues) { Test(false, 0); }
+
+TEST_P(ConvolveScaleTest10bpp, DISABLED_Speed) {
+ const int num_runs = static_cast<int>(1.0e7 / (param_.width * param_.height));
+ Test(false, 0, num_runs);
+}
+
INSTANTIATE_TEST_SUITE_P(C, ConvolveTest10bpp,
- testing::Combine(testing::ValuesIn(kConvolveParam),
- testing::ValuesIn(kConvolveTypeParam),
- testing::Bool()));
+ testing::Combine(testing::ValuesIn(kConvolveTypeParam),
+ testing::ValuesIn(kConvolveParam)));
+INSTANTIATE_TEST_SUITE_P(C, ConvolveScaleTest10bpp,
+ testing::Combine(testing::Bool(),
+ testing::ValuesIn(kConvolveParam)));
+
+#if LIBGAV1_ENABLE_NEON
+INSTANTIATE_TEST_SUITE_P(NEON, ConvolveTest10bpp,
+ testing::Combine(testing::ValuesIn(kConvolveTypeParam),
+ testing::ValuesIn(kConvolveParam)));
+INSTANTIATE_TEST_SUITE_P(NEON, ConvolveScaleTest10bpp,
+ testing::Combine(testing::Bool(),
+ testing::ValuesIn(kConvolveParam)));
+#endif // LIBGAV1_ENABLE_NEON
+
#endif // LIBGAV1_MAX_BITDEPTH >= 10
} // namespace
diff --git a/src/dsp/distance_weighted_blend.cc b/src/dsp/distance_weighted_blend.cc
index a035fbe..34d10fc 100644
--- a/src/dsp/distance_weighted_blend.cc
+++ b/src/dsp/distance_weighted_blend.cc
@@ -27,10 +27,12 @@ namespace dsp {
namespace {
template <int bitdepth, typename Pixel>
-void DistanceWeightedBlend_C(const void* prediction_0, const void* prediction_1,
+void DistanceWeightedBlend_C(const void* LIBGAV1_RESTRICT prediction_0,
+ const void* LIBGAV1_RESTRICT prediction_1,
const uint8_t weight_0, const uint8_t weight_1,
const int width, const int height,
- void* const dest, const ptrdiff_t dest_stride) {
+ void* LIBGAV1_RESTRICT const dest,
+ const ptrdiff_t dest_stride) {
// 7.11.3.2 Rounding variables derivation process
// 2 * FILTER_BITS(7) - (InterRound0(3|5) + InterRound1(7))
constexpr int inter_post_round_bits = (bitdepth == 12) ? 2 : 4;
diff --git a/src/dsp/distance_weighted_blend_test.cc b/src/dsp/distance_weighted_blend_test.cc
index b3f3a2e..fdf058e 100644
--- a/src/dsp/distance_weighted_blend_test.cc
+++ b/src/dsp/distance_weighted_blend_test.cc
@@ -14,13 +14,13 @@
#include "src/dsp/distance_weighted_blend.h"
+#include <cassert>
#include <cstdint>
#include <ostream>
#include <string>
#include <type_traits>
#include "absl/strings/match.h"
-#include "absl/strings/str_format.h"
#include "absl/strings/string_view.h"
#include "absl/time/clock.h"
#include "absl/time/time.h"
@@ -43,18 +43,8 @@ constexpr int kNumSpeedTests = 500000;
constexpr int kQuantizedDistanceLookup[4][2] = {
{9, 7}, {11, 5}, {12, 4}, {13, 3}};
-struct TestParam {
- TestParam(int width, int height) : width(width), height(height) {}
- int width;
- int height;
-};
-
-std::ostream& operator<<(std::ostream& os, const TestParam& param) {
- return os << "BlockSize" << param.width << "x" << param.height;
-}
-
template <int bitdepth, typename Pixel>
-class DistanceWeightedBlendTest : public testing::TestWithParam<TestParam>,
+class DistanceWeightedBlendTest : public testing::TestWithParam<BlockSize>,
public test_utils::MaxAlignedAllocable {
public:
DistanceWeightedBlendTest() = default;
@@ -91,8 +81,8 @@ class DistanceWeightedBlendTest : public testing::TestWithParam<TestParam>,
using PredType =
typename std::conditional<bitdepth == 8, int16_t, uint16_t>::type;
static constexpr int kDestStride = kMaxSuperBlockSizeInPixels;
- const int width_ = GetParam().width;
- const int height_ = GetParam().height;
+ const int width_ = kBlockWidthPixels[GetParam()];
+ const int height_ = kBlockHeightPixels[GetParam()];
alignas(kMaxAlignment) PredType
source1_[kMaxSuperBlockSizeInPixels * kMaxSuperBlockSizeInPixels];
alignas(kMaxAlignment) PredType
@@ -149,74 +139,51 @@ void DistanceWeightedBlendTest<bitdepth, Pixel>::Test(const char* digest,
elapsed_time += absl::Now() - start;
}
- test_utils::CheckMd5Digest(
- "DistanceWeightedBlend",
- absl::StrFormat("BlockSize%dx%d", width_, height_).c_str(), digest, dest_,
- sizeof(dest_), elapsed_time);
+ test_utils::CheckMd5Digest("DistanceWeightedBlend", ToString(GetParam()),
+ digest, dest_, sizeof(dest_), elapsed_time);
}
-const TestParam kTestParam[] = {
- TestParam(4, 4), TestParam(4, 8), TestParam(4, 16),
- TestParam(8, 4), TestParam(8, 8), TestParam(8, 16),
- TestParam(8, 32), TestParam(16, 4), TestParam(16, 8),
- TestParam(16, 16), TestParam(16, 32), TestParam(16, 64),
- TestParam(32, 8), TestParam(32, 16), TestParam(32, 32),
- TestParam(32, 64), TestParam(32, 128), TestParam(64, 16),
- TestParam(64, 32), TestParam(64, 64), TestParam(64, 128),
- TestParam(128, 32), TestParam(128, 64), TestParam(128, 128),
+const BlockSize kTestParam[] = {
+ kBlock4x4, kBlock4x8, kBlock4x16, kBlock8x4, kBlock8x8,
+ kBlock8x16, kBlock8x32, kBlock16x4, kBlock16x8, kBlock16x16,
+ kBlock16x32, kBlock16x64, kBlock32x8, kBlock32x16, kBlock32x32,
+ kBlock32x64, kBlock64x16, kBlock64x32, kBlock64x64, kBlock64x128,
+ kBlock128x64, kBlock128x128,
};
-const char* GetDistanceWeightedBlendDigest8bpp(const TestParam block_size) {
- static const char* const kDigestsWidth4[] = {
+const char* GetDistanceWeightedBlendDigest8bpp(const BlockSize block_size) {
+ static const char* const kDigests[kMaxBlockSizes] = {
+ // 4xN
"ebf389f724f8ab46a2cac895e4e073ca",
"09acd567b6b12c8cf8eb51d8b86eb4bf",
"57bb4d65695d8ec6752f2bd8686b64fd",
- };
- static const char* const kDigestsWidth8[] = {
+ // 8xN
"270905ac76f9a2cba8a552eb0bf7c8c1",
"f0801c8574d2c271ef2bbea77a1d7352",
"e761b580e3312be33a227492a233ce72",
"ff214dab1a7e98e2285961d6421720c6",
- };
- static const char* const kDigestsWidth16[] = {
- "4f712609a36e817f9752326d58562ff8", "14243f5c5f7c7104160c1f2cef0a0fbc",
- "3ac3f3161b7c8dd8436b02abfdde104a", "81a00b704e0e41a5dbe6436ac70c098d",
+ // 16xN
+ "4f712609a36e817f9752326d58562ff8",
+ "14243f5c5f7c7104160c1f2cef0a0fbc",
+ "3ac3f3161b7c8dd8436b02abfdde104a",
+ "81a00b704e0e41a5dbe6436ac70c098d",
"af8fd02017c7acdff788be742d700baa",
- };
- static const char* const kDigestsWidth32[] = {
- "ee34332c66a6d6ed8ce64031aafe776c", "b5e3d22bd2dbdb624c8b86a1afb5ce6d",
- "607ffc22098d81b7e37a7bf62f4af5d3", "3823dbf043b4682f56d5ca698e755ea5",
- "57f7e8d1e67645269ce760a2c8da4afc",
- };
- static const char* const kDigestsWidth64[] = {
+ // 32xN
+ "ee34332c66a6d6ed8ce64031aafe776c",
+ "b5e3d22bd2dbdb624c8b86a1afb5ce6d",
+ "607ffc22098d81b7e37a7bf62f4af5d3",
+ "3823dbf043b4682f56d5ca698e755ea5",
+ // 64xN
"4acf556b921956c2bc24659cd5128401",
"a298c544c9c3b27924b4c23cc687ea5a",
"539e2df267782ce61c70103b23b7d922",
"3b0cb2a0b5d384efee4d81401025bec1",
- };
- static const char* const kDigestsWidth128[] = {
- "d71ee689a40ff5f390d07717df4b7233",
+ // 128xN
"8b56b636dd712c2f8d138badb7219991",
"8cfc8836908902b8f915639b7bff45b3",
};
- const int height_index =
- FloorLog2(block_size.height) - FloorLog2(block_size.width) + 2;
- switch (block_size.width) {
- case 4:
- return kDigestsWidth4[height_index - 2];
- case 8:
- return kDigestsWidth8[height_index - 1];
- case 16:
- return kDigestsWidth16[height_index];
- case 32:
- return kDigestsWidth32[height_index];
- case 64:
- return kDigestsWidth64[height_index];
- default:
- EXPECT_EQ(block_size.width, 128)
- << "Unknown width parameter: " << block_size.width;
- return kDigestsWidth128[height_index];
- }
+ assert(block_size < kMaxBlockSizes);
+ return kDigests[block_size];
}
using DistanceWeightedBlendTest8bpp = DistanceWeightedBlendTest<8, uint8_t>;
@@ -243,57 +210,39 @@ INSTANTIATE_TEST_SUITE_P(SSE41, DistanceWeightedBlendTest8bpp,
#endif
#if LIBGAV1_MAX_BITDEPTH >= 10
-const char* GetDistanceWeightedBlendDigest10bpp(const TestParam block_size) {
- static const char* const kDigestsWidth4[] = {
+const char* GetDistanceWeightedBlendDigest10bpp(const BlockSize block_size) {
+ static const char* const kDigests[] = {
+ // 4xN
"55f594b56e16d5c401274affebbcc3d3",
"69df14da4bb33a8f7d7087921008e919",
"1b61f33604c54015794198a13bfebf46",
- };
- static const char* const kDigestsWidth8[] = {
+ // 8xN
"825a938185b152f7cf09bf1c0723ce2b",
"85ea315c51d979bc9b45834d6b40ec6f",
"92ebde208e8c39f7ec6de2de82182dbb",
"520f84716db5b43684dbb703806383fe",
- };
- static const char* const kDigestsWidth16[] = {
- "12ca23e3e2930005a0511646e8c83da4", "6208694a6744f4a3906f58c1add670e3",
- "a33d63889df989a3bbf84ff236614267", "34830846ecb0572a98bbd192fed02b16",
+ // 16xN
+ "12ca23e3e2930005a0511646e8c83da4",
+ "6208694a6744f4a3906f58c1add670e3",
+ "a33d63889df989a3bbf84ff236614267",
+ "34830846ecb0572a98bbd192fed02b16",
"34bb2f79c0bd7f9a80691b8af597f2a8",
- };
- static const char* const kDigestsWidth32[] = {
- "fa97f2d0e3143f1f44d3ac018b0d696d", "3df4a22456c9ab6ed346ab1b9750ae7d",
- "6276a058b35c6131bc0c94a4b4a37ebc", "9ca42da5d2d5eb339df03ae2c7a26914",
- "2ff0dc010a7b40830fb47423a9beb894",
- };
- static const char* const kDigestsWidth64[] = {
+ // 32xN
+ "fa97f2d0e3143f1f44d3ac018b0d696d",
+ "3df4a22456c9ab6ed346ab1b9750ae7d",
+ "6276a058b35c6131bc0c94a4b4a37ebc",
+ "9ca42da5d2d5eb339df03ae2c7a26914",
+ // 64xN
"800e692c520f99223bc24c1ac95a0166",
"818b6d20426585ef7fe844015a03aaf5",
"fb48691ccfff083e01d74826e88e613f",
"0bd350bc5bc604a224d77a5f5a422698",
- };
- static const char* const kDigestsWidth128[] = {
- "02aac5d5669c1245da876c5440c4d829",
+ // 128xN
"a130840813cd6bd69d09bcf5f8d0180f",
"6ece1846bea55e8f8f2ed7fbf73718de",
};
- const int height_index =
- FloorLog2(block_size.height) - FloorLog2(block_size.width) + 2;
- switch (block_size.width) {
- case 4:
- return kDigestsWidth4[height_index - 2];
- case 8:
- return kDigestsWidth8[height_index - 1];
- case 16:
- return kDigestsWidth16[height_index];
- case 32:
- return kDigestsWidth32[height_index];
- case 64:
- return kDigestsWidth64[height_index];
- default:
- EXPECT_EQ(block_size.width, 128)
- << "Unknown width parameter: " << block_size.width;
- return kDigestsWidth128[height_index];
- }
+ assert(block_size < kMaxBlockSizes);
+ return kDigests[block_size];
}
using DistanceWeightedBlendTest10bpp = DistanceWeightedBlendTest<10, uint16_t>;
@@ -321,4 +270,9 @@ INSTANTIATE_TEST_SUITE_P(NEON, DistanceWeightedBlendTest10bpp,
} // namespace
} // namespace dsp
+
+static std::ostream& operator<<(std::ostream& os, const BlockSize param) {
+ return os << ToString(param);
+}
+
} // namespace libgav1
diff --git a/src/dsp/dsp.cc b/src/dsp/dsp.cc
index a3d7701..aac0ca0 100644
--- a/src/dsp/dsp.cc
+++ b/src/dsp/dsp.cc
@@ -155,7 +155,9 @@ void DspInit() {
WarpInit_NEON();
WeightMaskInit_NEON();
#if LIBGAV1_MAX_BITDEPTH >= 10
+ ConvolveInit10bpp_NEON();
InverseTransformInit10bpp_NEON();
+ LoopRestorationInit10bpp_NEON();
#endif // LIBGAV1_MAX_BITDEPTH >= 10
#endif // LIBGAV1_ENABLE_NEON
});
diff --git a/src/dsp/dsp.h b/src/dsp/dsp.h
index 153db7f..f9e6b22 100644
--- a/src/dsp/dsp.h
+++ b/src/dsp/dsp.h
@@ -50,23 +50,23 @@ enum IntraPredictor : uint8_t {
};
// List of valid 1D transforms.
-enum Transform1D : uint8_t {
- k1DTransformDct, // Discrete Cosine Transform.
- k1DTransformAdst, // Asymmetric Discrete Sine Transform.
- k1DTransformIdentity,
- k1DTransformWht, // Walsh Hadamard Transform.
- kNum1DTransforms
+enum Transform1d : uint8_t {
+ kTransform1dDct, // Discrete Cosine Transform.
+ kTransform1dAdst, // Asymmetric Discrete Sine Transform.
+ kTransform1dIdentity,
+ kTransform1dWht, // Walsh Hadamard Transform.
+ kNumTransform1ds
};
// List of valid 1D transform sizes. Not all transforms may be available for all
// the sizes.
-enum TransformSize1D : uint8_t {
- k1DTransformSize4,
- k1DTransformSize8,
- k1DTransformSize16,
- k1DTransformSize32,
- k1DTransformSize64,
- kNum1DTransformSizes
+enum Transform1dSize : uint8_t {
+ kTransform1dSize4,
+ kTransform1dSize8,
+ kTransform1dSize16,
+ kTransform1dSize32,
+ kTransform1dSize64,
+ kNumTransform1dSizes
};
// The maximum width of the loop filter, fewer pixels may be filtered depending
@@ -120,36 +120,36 @@ inline const char* ToString(const IntraPredictor predictor) {
abort();
}
-inline const char* ToString(const Transform1D transform) {
+inline const char* ToString(const Transform1d transform) {
switch (transform) {
- case k1DTransformDct:
- return "k1DTransformDct";
- case k1DTransformAdst:
- return "k1DTransformAdst";
- case k1DTransformIdentity:
- return "k1DTransformIdentity";
- case k1DTransformWht:
- return "k1DTransformWht";
- case kNum1DTransforms:
- return "kNum1DTransforms";
+ case kTransform1dDct:
+ return "kTransform1dDct";
+ case kTransform1dAdst:
+ return "kTransform1dAdst";
+ case kTransform1dIdentity:
+ return "kTransform1dIdentity";
+ case kTransform1dWht:
+ return "kTransform1dWht";
+ case kNumTransform1ds:
+ return "kNumTransform1ds";
}
abort();
}
-inline const char* ToString(const TransformSize1D transform_size) {
+inline const char* ToString(const Transform1dSize transform_size) {
switch (transform_size) {
- case k1DTransformSize4:
- return "k1DTransformSize4";
- case k1DTransformSize8:
- return "k1DTransformSize8";
- case k1DTransformSize16:
- return "k1DTransformSize16";
- case k1DTransformSize32:
- return "k1DTransformSize32";
- case k1DTransformSize64:
- return "k1DTransformSize64";
- case kNum1DTransformSizes:
- return "kNum1DTransformSizes";
+ case kTransform1dSize4:
+ return "kTransform1dSize4";
+ case kTransform1dSize8:
+ return "kTransform1dSize8";
+ case kTransform1dSize16:
+ return "kTransform1dSize16";
+ case kTransform1dSize32:
+ return "kTransform1dSize32";
+ case kTransform1dSize64:
+ return "kTransform1dSize64";
+ case kNumTransform1dSizes:
+ return "kNumTransform1dSizes";
}
abort();
}
@@ -194,6 +194,7 @@ inline const char* ToString(const LoopFilterType filter_type) {
// by bitdepth with |stride| given in bytes. |top| is an unaligned pointer to
// the row above |dst|. |left| is an aligned vector of the column to the left
// of |dst|. top-left and bottom-left may be accessed.
+// The pointer arguments do not alias one another.
using IntraPredictorFunc = void (*)(void* dst, ptrdiff_t stride,
const void* top, const void* left);
using IntraPredictorFuncs =
@@ -209,6 +210,7 @@ using IntraPredictorFuncs =
// |top| has been upsampled as described in '7.11.2.11. Intra edge upsample
// process'. This can occur in cases with |width| + |height| <= 16. top-right
// is accessed.
+// The pointer arguments do not alias one another.
using DirectionalIntraPredictorZone1Func = void (*)(void* dst, ptrdiff_t stride,
const void* top, int width,
int height, int xstep,
@@ -226,6 +228,7 @@ using DirectionalIntraPredictorZone1Func = void (*)(void* dst, ptrdiff_t stride,
// described in '7.11.2.11. Intra edge upsample process'. This can occur in
// cases with |width| + |height| <= 16. top-left and upper-left are accessed,
// up to [-2] in each if |upsampled_top/left| are set.
+// The pointer arguments do not alias one another.
using DirectionalIntraPredictorZone2Func = void (*)(
void* dst, ptrdiff_t stride, const void* top, const void* left, int width,
int height, int xstep, int ystep, bool upsampled_top, bool upsampled_left);
@@ -240,6 +243,7 @@ using DirectionalIntraPredictorZone2Func = void (*)(
// |left| has been upsampled as described in '7.11.2.11. Intra edge upsample
// process'. This can occur in cases with |width| + |height| <= 16. bottom-left
// is accessed.
+// The pointer arguments do not alias one another.
using DirectionalIntraPredictorZone3Func = void (*)(void* dst, ptrdiff_t stride,
const void* left, int width,
int height, int ystep,
@@ -250,6 +254,7 @@ using DirectionalIntraPredictorZone3Func = void (*)(void* dst, ptrdiff_t stride,
// by bitdepth with |stride| given in bytes. |top| is an unaligned pointer to
// the row above |dst|. |left| is an aligned vector of the column to the left
// of |dst|. |width| and |height| are the size of the block in pixels.
+// The pointer arguments do not alias one another.
using FilterIntraPredictorFunc = void (*)(void* dst, ptrdiff_t stride,
const void* top, const void* left,
FilterIntraPredictor pred, int width,
@@ -303,11 +308,14 @@ using IntraEdgeUpsamplerFunc = void (*)(void* buffer, int size);
// 7.13.3).
// Apply the inverse transforms and add the residual to the destination frame
// for the transform type and block size |tx_size| starting at position
-// |start_x| and |start_y|. |dst_frame| is a pointer to an Array2D.
-// |adjusted_tx_height| is the number of rows to process based on the non-zero
-// coefficient count in the block. It will be 1 (non-zero coefficient count ==
-// 1), 4 or a multiple of 8 up to 32 or the original transform height,
-// whichever is less.
+// |start_x| and |start_y|. |dst_frame| is a pointer to an Array2D of Pixel
+// values. |adjusted_tx_height| is the number of rows to process based on the
+// non-zero coefficient count in the block. It will be 1 (non-zero coefficient
+// count == 1), 4 or a multiple of 8 up to 32 or the original transform height,
+// whichever is less. |src_buffer| is a pointer to an Array2D of Residual
+// values. On input |src_buffer| contains the dequantized values, on output it
+// contains the residual.
+// The pointer arguments do not alias one another.
using InverseTransformAddFunc = void (*)(TransformType tx_type,
TransformSize tx_size,
int adjusted_tx_height,
@@ -316,7 +324,7 @@ using InverseTransformAddFunc = void (*)(TransformType tx_type,
// The final dimension holds row and column transforms indexed with kRow and
// kColumn.
using InverseTransformAddFuncs =
- InverseTransformAddFunc[kNum1DTransforms][kNum1DTransformSizes][2];
+ InverseTransformAddFunc[kNumTransform1ds][kNumTransform1dSizes][2];
//------------------------------------------------------------------------------
// Post processing.
@@ -324,6 +332,13 @@ using InverseTransformAddFuncs =
// Loop filter function signature. Section 7.14.
// |dst| is an unaligned pointer to the output block. Pixel size is determined
// by bitdepth with |stride| given in bytes.
+// <threshold param> <spec name> <range>
+// |outer_thresh| blimit [7, 193]
+// |inner_thresh| limit [1, 63]
+// |hev_thresh| thresh [0, 63]
+// These are scaled by the implementation by 'bitdepth - 8' to produce
+// the spec variables blimitBd, limitBd and threshBd.
+// Note these functions are not called when the loop filter level is 0.
using LoopFilterFunc = void (*)(void* dst, ptrdiff_t stride, int outer_thresh,
int inner_thresh, int hev_thresh);
using LoopFilterFuncs =
@@ -333,6 +348,7 @@ using LoopFilterFuncs =
// |src| is a pointer to the source block. Pixel size is determined by bitdepth
// with |stride| given in bytes. |direction| and |variance| are output
// parameters and must not be nullptr.
+// The pointer arguments do not alias one another.
using CdefDirectionFunc = void (*)(const void* src, ptrdiff_t stride,
uint8_t* direction, int* variance);
@@ -344,6 +360,7 @@ using CdefDirectionFunc = void (*)(const void* src, ptrdiff_t stride,
// parameters.
// |direction| is the filtering direction.
// |dest| is the output buffer. |dest_stride| is given in bytes.
+// The pointer arguments do not alias one another.
using CdefFilteringFunc = void (*)(const uint16_t* source,
ptrdiff_t source_stride, int block_height,
int primary_strength, int secondary_strength,
@@ -381,6 +398,7 @@ using SuperResCoefficientsFunc = void (*)(int upscaled_width,
// |step| is the number of subpixels to move the kernel for the next destination
// pixel.
// |initial_subpixel_x| is a base offset from which |step| increments.
+// The pointer arguments do not alias one another.
using SuperResFunc = void (*)(const void* coefficients, void* source,
ptrdiff_t source_stride, int height,
int downscaled_width, int upscaled_width,
@@ -397,6 +415,7 @@ using SuperResFunc = void (*)(const void* coefficients, void* source,
// |top_border_stride| and |bottom_border_stride| are given in pixels.
// |restoration_buffer| contains buffers required for self guided filter and
// wiener filter. They must be initialized before calling.
+// The pointer arguments do not alias one another.
using LoopRestorationFunc = void (*)(
const RestorationUnitInfo& restoration_info, const void* source,
ptrdiff_t stride, const void* top_border, ptrdiff_t top_border_stride,
@@ -425,6 +444,7 @@ using LoopRestorationFuncs = LoopRestorationFunc[2];
// used. For compound vertical filtering kInterRoundBitsCompoundVertical will be
// used. Otherwise kInterRoundBitsVertical & kInterRoundBitsVertical12bpp will
// be used.
+// The pointer arguments do not alias one another.
using ConvolveFunc = void (*)(const void* reference, ptrdiff_t reference_stride,
int horizontal_filter_index,
int vertical_filter_index,
@@ -462,6 +482,7 @@ using ConvolveFuncs = ConvolveFunc[2][2][2][2];
// used. For compound vertical filtering kInterRoundBitsCompoundVertical will be
// used. Otherwise kInterRoundBitsVertical & kInterRoundBitsVertical12bpp will
// be used.
+// The pointer arguments do not alias one another.
using ConvolveScaleFunc = void (*)(const void* reference,
ptrdiff_t reference_stride,
int horizontal_filter_index,
@@ -482,6 +503,7 @@ using ConvolveScaleFuncs = ConvolveScaleFunc[2];
// The stride for the input buffers is equal to |width|.
// The valid range of block size is [8x8, 128x128] for the luma plane.
// |mask| is the output buffer. |mask_stride| is the output buffer stride.
+// The pointer arguments do not alias one another.
using WeightMaskFunc = void (*)(const void* prediction_0,
const void* prediction_1, uint8_t* mask,
ptrdiff_t mask_stride);
@@ -504,6 +526,7 @@ using WeightMaskFuncs = WeightMaskFunc[6][6][2];
// The stride for the input buffers is equal to |width|.
// The valid range of block size is [8x8, 128x128] for the luma plane.
// |dest| is the output buffer. |dest_stride| is the output buffer stride.
+// The pointer arguments do not alias one another.
using AverageBlendFunc = void (*)(const void* prediction_0,
const void* prediction_1, int width,
int height, void* dest,
@@ -525,6 +548,7 @@ using AverageBlendFunc = void (*)(const void* prediction_0,
// The stride for the input buffers is equal to |width|.
// The valid range of block size is [8x8, 128x128] for the luma plane.
// |dest| is the output buffer. |dest_stride| is the output buffer stride.
+// The pointer arguments do not alias one another.
using DistanceWeightedBlendFunc = void (*)(const void* prediction_0,
const void* prediction_1,
uint8_t weight_0, uint8_t weight_1,
@@ -550,17 +574,18 @@ using DistanceWeightedBlendFunc = void (*)(const void* prediction_0,
// |mask_stride| is corresponding stride.
// |width|, |height| are the same for both input blocks.
// If it's inter_intra (or wedge_inter_intra), the valid range of block size is
-// [8x8, 32x32]. Otherwise (including difference weighted prediction and
-// compound average prediction), the valid range is [8x8, 128x128].
+// [8x8, 32x32], no 4:1/1:4 blocks (Section 5.11.28). Otherwise (including
+// difference weighted prediction and compound average prediction), the valid
+// range is [8x8, 128x128].
// If there's subsampling, the corresponding width and height are halved for
// chroma planes.
-// |subsampling_x|, |subsampling_y| are the subsampling factors.
// |is_inter_intra| stands for the prediction mode. If it is true, one of the
// prediction blocks is from intra prediction of current frame. Otherwise, two
// prediction blocks are both inter frame predictions.
// |is_wedge_inter_intra| indicates if the mask is for the wedge prediction.
// |dest| is the output block.
// |dest_stride| is the corresponding stride for dest.
+// The pointer arguments do not alias one another.
using MaskBlendFunc = void (*)(const void* prediction_0,
const void* prediction_1,
ptrdiff_t prediction_stride_1,
@@ -577,6 +602,7 @@ using MaskBlendFuncs = MaskBlendFunc[3][2];
// |is_inter_intra| is true and |bitdepth| == 8.
// |prediction_[01]| are Pixel values (uint8_t).
// |prediction_1| is also the output buffer.
+// The pointer arguments do not alias one another.
using InterIntraMaskBlendFunc8bpp = void (*)(const uint8_t* prediction_0,
uint8_t* prediction_1,
ptrdiff_t prediction_stride_1,
@@ -600,9 +626,12 @@ using InterIntraMaskBlendFuncs8bpp = InterIntraMaskBlendFunc8bpp[3];
// clipped. Therefore obmc blending process doesn't need to clip the output.
// |prediction| is the first input block, which will be overwritten.
// |prediction_stride| is the stride, given in bytes.
-// |width|, |height| are the same for both input blocks.
+// |width|, |height| are the same for both input blocks. The range is [4x2,
+// 32x32] for kObmcDirectionVertical and [2x4, 32x32] for
+// kObmcDirectionHorizontal, see Section 7.11.3.9.
// |obmc_prediction| is the second input block.
// |obmc_prediction_stride| is its stride, given in bytes.
+// The pointer arguments do not alias one another.
using ObmcBlendFunc = void (*)(void* prediction, ptrdiff_t prediction_stride,
int width, int height,
const void* obmc_prediction,
@@ -645,6 +674,7 @@ using ObmcBlendFuncs = ObmcBlendFunc[kNumObmcDirections];
// Therefore, there must be at least one extra padding byte after the right
// border of the last row in the source buffer.
// * The top and bottom borders must be at least 13 pixels high.
+// The pointer arguments do not alias one another.
using WarpFunc = void (*)(const void* source, ptrdiff_t source_stride,
int source_width, int source_height,
const int* warp_params, int subsampling_x,
@@ -686,6 +716,7 @@ using LumaAutoRegressionFuncs =
// from frame header, mainly providing auto_regression_coeff_u and
// auto_regression_coeff_v for each chroma plane's filter, and
// auto_regression_shift to right shift the filter sums by.
+// The pointer arguments do not alias one another.
using ChromaAutoRegressionFunc = void (*)(const FilmGrainParams& params,
const void* luma_grain_buffer,
int subsampling_x, int subsampling_y,
@@ -704,6 +735,7 @@ using ChromaAutoRegressionFuncs =
// Because this function treats all planes identically and independently, it is
// simplified to take one grain buffer at a time. This means duplicating some
// random number generations, but that work can be reduced in other ways.
+// The pointer arguments do not alias one another.
using ConstructNoiseStripesFunc = void (*)(const void* grain_buffer,
int grain_seed, int width,
int height, int subsampling_x,
@@ -720,6 +752,7 @@ using ConstructNoiseStripesFuncs =
// Array2D containing the allocated plane for this frame. Because this function
// treats all planes identically and independently, it is simplified to take one
// grain buffer at a time.
+// The pointer arguments do not alias one another.
using ConstructNoiseImageOverlapFunc =
void (*)(const void* noise_stripes_buffer, int width, int height,
int subsampling_x, int subsampling_y, void* noise_image_buffer);
@@ -730,9 +763,12 @@ using ConstructNoiseImageOverlapFunc =
// |num_points| can be between 0 and 15. When 0, the lookup table is set to
// zero.
// |point_value| and |point_scaling| have |num_points| valid elements.
-using InitializeScalingLutFunc = void (*)(
- int num_points, const uint8_t point_value[], const uint8_t point_scaling[],
- uint8_t scaling_lut[kScalingLookupTableSize]);
+// The pointer arguments do not alias one another.
+using InitializeScalingLutFunc = void (*)(int num_points,
+ const uint8_t point_value[],
+ const uint8_t point_scaling[],
+ int16_t* scaling_lut,
+ const int scaling_lut_length);
// Blend noise with image. Section 7.18.3.5, third code block.
// |width| is the width of each row, while |height| is how many rows to compute.
@@ -749,18 +785,19 @@ using InitializeScalingLutFunc = void (*)(
// |scaling_shift| is applied as a right shift after scaling, so that scaling
// down is possible. It is found in FilmGrainParams, but supplied directly to
// BlendNoiseWithImageLumaFunc because it's the only member used.
-using BlendNoiseWithImageLumaFunc =
- void (*)(const void* noise_image_ptr, int min_value, int max_value,
- int scaling_shift, int width, int height, int start_height,
- const uint8_t scaling_lut_y[kScalingLookupTableSize],
- const void* source_plane_y, ptrdiff_t source_stride_y,
- void* dest_plane_y, ptrdiff_t dest_stride_y);
+// The dest plane may point to the source plane, depending on the value of
+// frame_header.show_existing_frame. |noise_image_ptr| and scaling_lut.* do not
+// alias other arguments.
+using BlendNoiseWithImageLumaFunc = void (*)(
+ const void* noise_image_ptr, int min_value, int max_value,
+ int scaling_shift, int width, int height, int start_height,
+ const int16_t* scaling_lut_y, const void* source_plane_y,
+ ptrdiff_t source_stride_y, void* dest_plane_y, ptrdiff_t dest_stride_y);
using BlendNoiseWithImageChromaFunc = void (*)(
Plane plane, const FilmGrainParams& params, const void* noise_image_ptr,
int min_value, int max_value, int width, int height, int start_height,
- int subsampling_x, int subsampling_y,
- const uint8_t scaling_lut[kScalingLookupTableSize],
+ int subsampling_x, int subsampling_y, const int16_t* scaling_lut,
const void* source_plane_y, ptrdiff_t source_stride_y,
const void* source_plane_uv, ptrdiff_t source_stride_uv,
void* dest_plane_uv, ptrdiff_t dest_stride_uv);
@@ -790,6 +827,8 @@ struct FilmGrainFuncs {
// tile.
// |motion_field| is the output which saves the projected motion field
// information.
+// Note: Only the entry from the 8-bit Dsp table is used as this function is
+// bitdepth agnostic.
using MotionFieldProjectionKernelFunc = void (*)(
const ReferenceInfo& reference_info, int reference_to_current_with_sign,
int dst_sign, int y8_start, int y8_end, int x8_start, int x8_end,
@@ -797,13 +836,16 @@ using MotionFieldProjectionKernelFunc = void (*)(
// Compound temporal motion vector projection function signature.
// Section 7.9.3 and 7.10.2.10.
-// |temporal_mvs| is the set of temporal reference motion vectors.
+// |temporal_mvs| is the aligned set of temporal reference motion vectors.
// |temporal_reference_offsets| specifies the number of frames covered by the
// original motion vector.
// |reference_offsets| specifies the number of frames to be covered by the
// projected motion vector.
// |count| is the number of the temporal motion vectors.
-// |candidate_mvs| is the set of projected motion vectors.
+// |candidate_mvs| is the aligned set of projected motion vectors.
+// The pointer arguments do not alias one another.
+// Note: Only the entry from the 8-bit Dsp table is used as this function is
+// bitdepth agnostic.
using MvProjectionCompoundFunc = void (*)(
const MotionVector* temporal_mvs, const int8_t* temporal_reference_offsets,
const int reference_offsets[2], int count,
@@ -811,13 +853,16 @@ using MvProjectionCompoundFunc = void (*)(
// Single temporal motion vector projection function signature.
// Section 7.9.3 and 7.10.2.10.
-// |temporal_mvs| is the set of temporal reference motion vectors.
+// |temporal_mvs| is the aligned set of temporal reference motion vectors.
// |temporal_reference_offsets| specifies the number of frames covered by the
// original motion vector.
// |reference_offset| specifies the number of frames to be covered by the
// projected motion vector.
// |count| is the number of the temporal motion vectors.
-// |candidate_mvs| is the set of projected motion vectors.
+// |candidate_mvs| is the aligned set of projected motion vectors.
+// The pointer arguments do not alias one another.
+// Note: Only the entry from the 8-bit Dsp table is used as this function is
+// bitdepth agnostic.
using MvProjectionSingleFunc = void (*)(
const MotionVector* temporal_mvs, const int8_t* temporal_reference_offsets,
int reference_offset, int count, MotionVector* candidate_mvs);
diff --git a/src/dsp/dsp_test.cc b/src/dsp/dsp_test.cc
index bf7b9f3..5c2a3aa 100644
--- a/src/dsp/dsp_test.cc
+++ b/src/dsp/dsp_test.cc
@@ -33,11 +33,11 @@ namespace dsp {
namespace {
// Maps 1D transform to the maximum valid size for the corresponding transform.
-constexpr int kMax1DTransformSize[kNum1DTransforms] = {
- k1DTransformSize64, // Dct.
- k1DTransformSize16, // Adst.
- k1DTransformSize32, // Identity.
- k1DTransformSize4, // Wht.
+constexpr int kMaxTransform1dSize[kNumTransform1ds] = {
+ kTransform1dSize64, // Dct.
+ kTransform1dSize16, // Adst.
+ kTransform1dSize32, // Identity.
+ kTransform1dSize4, // Wht.
};
void CheckTables(bool c_only) {
@@ -80,10 +80,10 @@ void CheckTables(bool c_only) {
}
EXPECT_NE(dsp->intra_edge_filter, nullptr);
EXPECT_NE(dsp->intra_edge_upsampler, nullptr);
- for (int i = 0; i < kNum1DTransforms; ++i) {
- for (int j = 0; j < kNum1DTransformSizes; ++j) {
+ for (int i = 0; i < kNumTransform1ds; ++i) {
+ for (int j = 0; j < kNumTransform1dSizes; ++j) {
for (int k = 0; k < 2; ++k) {
- if (j <= kMax1DTransformSize[i]) {
+ if (j <= kMaxTransform1dSize[i]) {
EXPECT_NE(dsp->inverse_transforms[i][j][k], nullptr)
<< "index [" << i << "][" << j << "][" << k << "]";
} else {
@@ -203,13 +203,23 @@ void CheckTables(bool c_only) {
EXPECT_NE(dsp->film_grain.initialize_scaling_lut, nullptr);
EXPECT_NE(dsp->film_grain.blend_noise_luma, nullptr);
- EXPECT_NE(dsp->motion_field_projection_kernel, nullptr);
- EXPECT_NE(dsp->mv_projection_compound[0], nullptr);
- EXPECT_NE(dsp->mv_projection_compound[1], nullptr);
- EXPECT_NE(dsp->mv_projection_compound[2], nullptr);
- EXPECT_NE(dsp->mv_projection_single[0], nullptr);
- EXPECT_NE(dsp->mv_projection_single[1], nullptr);
- EXPECT_NE(dsp->mv_projection_single[2], nullptr);
+ if (bitdepth == 8) {
+ EXPECT_NE(dsp->motion_field_projection_kernel, nullptr);
+ EXPECT_NE(dsp->mv_projection_compound[0], nullptr);
+ EXPECT_NE(dsp->mv_projection_compound[1], nullptr);
+ EXPECT_NE(dsp->mv_projection_compound[2], nullptr);
+ EXPECT_NE(dsp->mv_projection_single[0], nullptr);
+ EXPECT_NE(dsp->mv_projection_single[1], nullptr);
+ EXPECT_NE(dsp->mv_projection_single[2], nullptr);
+ } else {
+ EXPECT_EQ(dsp->motion_field_projection_kernel, nullptr);
+ EXPECT_EQ(dsp->mv_projection_compound[0], nullptr);
+ EXPECT_EQ(dsp->mv_projection_compound[1], nullptr);
+ EXPECT_EQ(dsp->mv_projection_compound[2], nullptr);
+ EXPECT_EQ(dsp->mv_projection_single[0], nullptr);
+ EXPECT_EQ(dsp->mv_projection_single[1], nullptr);
+ EXPECT_EQ(dsp->mv_projection_single[2], nullptr);
+ }
}
}
diff --git a/src/dsp/film_grain.cc b/src/dsp/film_grain.cc
index 41d1dd0..fa12b69 100644
--- a/src/dsp/film_grain.cc
+++ b/src/dsp/film_grain.cc
@@ -29,29 +29,26 @@
#include "src/utils/common.h"
#include "src/utils/compiler_attributes.h"
#include "src/utils/logging.h"
+#include "src/utils/memory.h"
namespace libgav1 {
namespace dsp {
namespace film_grain {
namespace {
-// Making this a template function prevents it from adding to code size when it
-// is not placed in the DSP table. Most functions in the dsp directory change
-// behavior by bitdepth, but because this one doesn't, it receives a dummy
-// parameter with one enforced value, ensuring only one copy is made.
-template <int singleton>
-void InitializeScalingLookupTable_C(
- int num_points, const uint8_t point_value[], const uint8_t point_scaling[],
- uint8_t scaling_lut[kScalingLookupTableSize]) {
- static_assert(singleton == 0,
- "Improper instantiation of InitializeScalingLookupTable_C. "
- "There should be only one copy of this function.");
+template <int bitdepth>
+void InitializeScalingLookupTable_C(int num_points, const uint8_t point_value[],
+ const uint8_t point_scaling[],
+ int16_t* scaling_lut,
+ const int scaling_lut_length) {
if (num_points == 0) {
- memset(scaling_lut, 0, sizeof(scaling_lut[0]) * kScalingLookupTableSize);
+ memset(scaling_lut, 0, sizeof(scaling_lut[0]) * scaling_lut_length);
return;
}
- static_assert(sizeof(scaling_lut[0]) == 1, "");
- memset(scaling_lut, point_scaling[0], point_value[0]);
+ constexpr int index_shift = bitdepth - kBitdepth8;
+ static_assert(sizeof(scaling_lut[0]) == 2, "");
+ Memset(scaling_lut, point_scaling[0],
+ std::max(static_cast<int>(point_value[0]), 1) << index_shift);
for (int i = 0; i < num_points - 1; ++i) {
const int delta_y = point_scaling[i + 1] - point_scaling[i];
const int delta_x = point_value[i + 1] - point_value[i];
@@ -59,25 +56,38 @@ void InitializeScalingLookupTable_C(
for (int x = 0; x < delta_x; ++x) {
const int v = point_scaling[i] + ((x * delta + 32768) >> 16);
assert(v >= 0 && v <= UINT8_MAX);
- scaling_lut[point_value[i] + x] = v;
+ const int lut_index = (point_value[i] + x) << index_shift;
+ scaling_lut[lut_index] = v;
+ }
+ }
+ const int16_t last_point_value = point_value[num_points - 1];
+ const int x_base = last_point_value << index_shift;
+ Memset(&scaling_lut[x_base], point_scaling[num_points - 1],
+ scaling_lut_length - x_base);
+ // Fill in the gaps.
+ if (bitdepth == kBitdepth10) {
+ for (int x = 4; x < x_base + 4; x += 4) {
+ const int start = scaling_lut[x - 4];
+ const int end = scaling_lut[x];
+ const int delta = end - start;
+ scaling_lut[x - 3] = start + RightShiftWithRounding(delta, 2);
+ scaling_lut[x - 2] = start + RightShiftWithRounding(2 * delta, 2);
+ scaling_lut[x - 1] = start + RightShiftWithRounding(3 * delta, 2);
}
}
- const uint8_t last_point_value = point_value[num_points - 1];
- memset(&scaling_lut[last_point_value], point_scaling[num_points - 1],
- kScalingLookupTableSize - last_point_value);
}
// Section 7.18.3.5.
-// Performs a piecewise linear interpolation into the scaling table.
template <int bitdepth>
-int ScaleLut(const uint8_t scaling_lut[kScalingLookupTableSize], int index) {
- const int shift = bitdepth - 8;
+int ScaleLut(const int16_t* scaling_lut, int index) {
+ if (bitdepth <= kBitdepth10) {
+ assert(index < kScalingLookupTableSize << (bitdepth - 2));
+ return scaling_lut[index];
+ }
+ // Performs a piecewise linear interpolation into the scaling table.
+ const int shift = bitdepth - kBitdepth8;
const int quotient = index >> shift;
const int remainder = index - (quotient << shift);
- if (bitdepth == 8) {
- assert(quotient < kScalingLookupTableSize);
- return scaling_lut[quotient];
- }
assert(quotient + 1 < kScalingLookupTableSize);
const int start = scaling_lut[quotient];
const int end = scaling_lut[quotient + 1];
@@ -153,12 +163,11 @@ void ApplyAutoRegressiveFilterToLumaGrain_C(const FilmGrainParams& params,
template <int bitdepth, typename GrainType, int auto_regression_coeff_lag,
bool use_luma>
-void ApplyAutoRegressiveFilterToChromaGrains_C(const FilmGrainParams& params,
- const void* luma_grain_buffer,
- int subsampling_x,
- int subsampling_y,
- void* u_grain_buffer,
- void* v_grain_buffer) {
+void ApplyAutoRegressiveFilterToChromaGrains_C(
+ const FilmGrainParams& params,
+ const void* LIBGAV1_RESTRICT luma_grain_buffer, int subsampling_x,
+ int subsampling_y, void* LIBGAV1_RESTRICT u_grain_buffer,
+ void* LIBGAV1_RESTRICT v_grain_buffer) {
static_assert(
auto_regression_coeff_lag >= 0 && auto_regression_coeff_lag <= 3,
"Unsupported autoregression lag for chroma.");
@@ -227,9 +236,10 @@ void ApplyAutoRegressiveFilterToChromaGrains_C(const FilmGrainParams& params,
// This implementation is for the condition overlap_flag == false.
template <int bitdepth, typename GrainType>
-void ConstructNoiseStripes_C(const void* grain_buffer, int grain_seed,
- int width, int height, int subsampling_x,
- int subsampling_y, void* noise_stripes_buffer) {
+void ConstructNoiseStripes_C(const void* LIBGAV1_RESTRICT grain_buffer,
+ int grain_seed, int width, int height,
+ int subsampling_x, int subsampling_y,
+ void* LIBGAV1_RESTRICT noise_stripes_buffer) {
auto* noise_stripes =
static_cast<Array2DView<GrainType>*>(noise_stripes_buffer);
const auto* grain = static_cast<const GrainType*>(grain_buffer);
@@ -272,8 +282,6 @@ void ConstructNoiseStripes_C(const void* grain_buffer, int grain_seed,
// Writes beyond the width of each row could happen below. To
// prevent those writes, we clip the number of pixels to copy against
// the remaining width.
- // TODO(petersonab): Allocate aligned stripes with extra width to cover
- // the size of the final stripe block, then remove this call to min.
const int copy_size =
std::min(kNoiseStripeHeight >> subsampling_x,
plane_width - (x << (1 - subsampling_x)));
@@ -291,10 +299,10 @@ void ConstructNoiseStripes_C(const void* grain_buffer, int grain_seed,
// This implementation is for the condition overlap_flag == true.
template <int bitdepth, typename GrainType>
-void ConstructNoiseStripesWithOverlap_C(const void* grain_buffer,
- int grain_seed, int width, int height,
- int subsampling_x, int subsampling_y,
- void* noise_stripes_buffer) {
+void ConstructNoiseStripesWithOverlap_C(
+ const void* LIBGAV1_RESTRICT grain_buffer, int grain_seed, int width,
+ int height, int subsampling_x, int subsampling_y,
+ void* LIBGAV1_RESTRICT noise_stripes_buffer) {
auto* noise_stripes =
static_cast<Array2DView<GrainType>*>(noise_stripes_buffer);
const auto* grain = static_cast<const GrainType*>(grain_buffer);
@@ -326,8 +334,6 @@ void ConstructNoiseStripesWithOverlap_C(const void* grain_buffer,
// The overlap computation only occurs when x > 0, so it is omitted here.
int i = 0;
do {
- // TODO(petersonab): Allocate aligned stripes with extra width to cover
- // the size of the final stripe block, then remove this call to min.
const int copy_size =
std::min(kNoiseStripeHeight >> subsampling_x, plane_width);
memcpy(&noise_stripe[i * plane_width],
@@ -399,8 +405,6 @@ void ConstructNoiseStripesWithOverlap_C(const void* grain_buffer,
// Writes beyond the width of each row could happen below. To
// prevent those writes, we clip the number of pixels to copy against
// the remaining width.
- // TODO(petersonab): Allocate aligned stripes with extra width to cover
- // the size of the final stripe block, then remove this call to min.
const int copy_size =
std::min(kNoiseStripeHeight >> subsampling_x,
plane_width - (x << (1 - subsampling_x))) -
@@ -417,10 +421,11 @@ void ConstructNoiseStripesWithOverlap_C(const void* grain_buffer,
}
template <int bitdepth, typename GrainType>
-inline void WriteOverlapLine_C(const GrainType* noise_stripe_row,
- const GrainType* noise_stripe_row_prev,
- int plane_width, int grain_coeff, int old_coeff,
- GrainType* noise_image_row) {
+inline void WriteOverlapLine_C(
+ const GrainType* LIBGAV1_RESTRICT noise_stripe_row,
+ const GrainType* LIBGAV1_RESTRICT noise_stripe_row_prev, int plane_width,
+ int grain_coeff, int old_coeff,
+ GrainType* LIBGAV1_RESTRICT noise_image_row) {
int x = 0;
do {
int grain = noise_stripe_row[x];
@@ -433,9 +438,10 @@ inline void WriteOverlapLine_C(const GrainType* noise_stripe_row,
}
template <int bitdepth, typename GrainType>
-void ConstructNoiseImageOverlap_C(const void* noise_stripes_buffer, int width,
- int height, int subsampling_x,
- int subsampling_y, void* noise_image_buffer) {
+void ConstructNoiseImageOverlap_C(
+ const void* LIBGAV1_RESTRICT noise_stripes_buffer, int width, int height,
+ int subsampling_x, int subsampling_y,
+ void* LIBGAV1_RESTRICT noise_image_buffer) {
const auto* noise_stripes =
static_cast<const Array2DView<GrainType>*>(noise_stripes_buffer);
auto* noise_image = static_cast<Array2D<GrainType>*>(noise_image_buffer);
@@ -495,12 +501,13 @@ void ConstructNoiseImageOverlap_C(const void* noise_stripes_buffer, int width,
}
template <int bitdepth, typename GrainType, typename Pixel>
-void BlendNoiseWithImageLuma_C(
- const void* noise_image_ptr, int min_value, int max_luma, int scaling_shift,
- int width, int height, int start_height,
- const uint8_t scaling_lut_y[kScalingLookupTableSize],
- const void* source_plane_y, ptrdiff_t source_stride_y, void* dest_plane_y,
- ptrdiff_t dest_stride_y) {
+void BlendNoiseWithImageLuma_C(const void* LIBGAV1_RESTRICT noise_image_ptr,
+ int min_value, int max_luma, int scaling_shift,
+ int width, int height, int start_height,
+ const int16_t* scaling_lut_y,
+ const void* source_plane_y,
+ ptrdiff_t source_stride_y, void* dest_plane_y,
+ ptrdiff_t dest_stride_y) {
const auto* noise_image =
static_cast<const Array2D<GrainType>*>(noise_image_ptr);
const auto* in_y = static_cast<const Pixel*>(source_plane_y);
@@ -524,10 +531,10 @@ void BlendNoiseWithImageLuma_C(
// This function is for the case params_.chroma_scaling_from_luma == false.
template <int bitdepth, typename GrainType, typename Pixel>
void BlendNoiseWithImageChroma_C(
- Plane plane, const FilmGrainParams& params, const void* noise_image_ptr,
- int min_value, int max_chroma, int width, int height, int start_height,
- int subsampling_x, int subsampling_y,
- const uint8_t scaling_lut_uv[kScalingLookupTableSize],
+ Plane plane, const FilmGrainParams& params,
+ const void* LIBGAV1_RESTRICT noise_image_ptr, int min_value, int max_chroma,
+ int width, int height, int start_height, int subsampling_x,
+ int subsampling_y, const int16_t* scaling_lut_uv,
const void* source_plane_y, ptrdiff_t source_stride_y,
const void* source_plane_uv, ptrdiff_t source_stride_uv,
void* dest_plane_uv, ptrdiff_t dest_stride_uv) {
@@ -571,7 +578,7 @@ void BlendNoiseWithImageChroma_C(
const int orig = in_uv[y * source_stride_uv + x];
const int combined = average_luma * luma_multiplier + orig * multiplier;
const int merged =
- Clip3((combined >> 6) + LeftShift(offset, bitdepth - 8), 0,
+ Clip3((combined >> 6) + LeftShift(offset, bitdepth - kBitdepth8), 0,
(1 << bitdepth) - 1);
int noise = noise_image[plane][y + start_height][x];
noise = RightShiftWithRounding(
@@ -586,13 +593,12 @@ void BlendNoiseWithImageChroma_C(
// This further implies that scaling_lut_u == scaling_lut_v == scaling_lut_y.
template <int bitdepth, typename GrainType, typename Pixel>
void BlendNoiseWithImageChromaWithCfl_C(
- Plane plane, const FilmGrainParams& params, const void* noise_image_ptr,
- int min_value, int max_chroma, int width, int height, int start_height,
- int subsampling_x, int subsampling_y,
- const uint8_t scaling_lut[kScalingLookupTableSize],
- const void* source_plane_y, ptrdiff_t source_stride_y,
- const void* source_plane_uv, ptrdiff_t source_stride_uv,
- void* dest_plane_uv, ptrdiff_t dest_stride_uv) {
+ Plane plane, const FilmGrainParams& params,
+ const void* LIBGAV1_RESTRICT noise_image_ptr, int min_value, int max_chroma,
+ int width, int height, int start_height, int subsampling_x,
+ int subsampling_y, const int16_t* scaling_lut, const void* source_plane_y,
+ ptrdiff_t source_stride_y, const void* source_plane_uv,
+ ptrdiff_t source_stride_uv, void* dest_plane_uv, ptrdiff_t dest_stride_uv) {
const auto* noise_image =
static_cast<const Array2D<GrainType>*>(noise_image_ptr);
const auto* in_y = static_cast<const Pixel*>(source_plane_y);
@@ -639,106 +645,108 @@ void Init8bpp() {
#if LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS
// LumaAutoRegressionFunc
dsp->film_grain.luma_auto_regression[0] =
- ApplyAutoRegressiveFilterToLumaGrain_C<8, int8_t>;
+ ApplyAutoRegressiveFilterToLumaGrain_C<kBitdepth8, int8_t>;
dsp->film_grain.luma_auto_regression[1] =
- ApplyAutoRegressiveFilterToLumaGrain_C<8, int8_t>;
+ ApplyAutoRegressiveFilterToLumaGrain_C<kBitdepth8, int8_t>;
dsp->film_grain.luma_auto_regression[2] =
- ApplyAutoRegressiveFilterToLumaGrain_C<8, int8_t>;
+ ApplyAutoRegressiveFilterToLumaGrain_C<kBitdepth8, int8_t>;
// ChromaAutoRegressionFunc
// Chroma autoregression should never be called when lag is 0 and use_luma is
// false.
dsp->film_grain.chroma_auto_regression[0][0] = nullptr;
dsp->film_grain.chroma_auto_regression[0][1] =
- ApplyAutoRegressiveFilterToChromaGrains_C<8, int8_t, 1, false>;
+ ApplyAutoRegressiveFilterToChromaGrains_C<kBitdepth8, int8_t, 1, false>;
dsp->film_grain.chroma_auto_regression[0][2] =
- ApplyAutoRegressiveFilterToChromaGrains_C<8, int8_t, 2, false>;
+ ApplyAutoRegressiveFilterToChromaGrains_C<kBitdepth8, int8_t, 2, false>;
dsp->film_grain.chroma_auto_regression[0][3] =
- ApplyAutoRegressiveFilterToChromaGrains_C<8, int8_t, 3, false>;
+ ApplyAutoRegressiveFilterToChromaGrains_C<kBitdepth8, int8_t, 3, false>;
dsp->film_grain.chroma_auto_regression[1][0] =
- ApplyAutoRegressiveFilterToChromaGrains_C<8, int8_t, 0, true>;
+ ApplyAutoRegressiveFilterToChromaGrains_C<kBitdepth8, int8_t, 0, true>;
dsp->film_grain.chroma_auto_regression[1][1] =
- ApplyAutoRegressiveFilterToChromaGrains_C<8, int8_t, 1, true>;
+ ApplyAutoRegressiveFilterToChromaGrains_C<kBitdepth8, int8_t, 1, true>;
dsp->film_grain.chroma_auto_regression[1][2] =
- ApplyAutoRegressiveFilterToChromaGrains_C<8, int8_t, 2, true>;
+ ApplyAutoRegressiveFilterToChromaGrains_C<kBitdepth8, int8_t, 2, true>;
dsp->film_grain.chroma_auto_regression[1][3] =
- ApplyAutoRegressiveFilterToChromaGrains_C<8, int8_t, 3, true>;
+ ApplyAutoRegressiveFilterToChromaGrains_C<kBitdepth8, int8_t, 3, true>;
// ConstructNoiseStripesFunc
dsp->film_grain.construct_noise_stripes[0] =
- ConstructNoiseStripes_C<8, int8_t>;
+ ConstructNoiseStripes_C<kBitdepth8, int8_t>;
dsp->film_grain.construct_noise_stripes[1] =
- ConstructNoiseStripesWithOverlap_C<8, int8_t>;
+ ConstructNoiseStripesWithOverlap_C<kBitdepth8, int8_t>;
// ConstructNoiseImageOverlapFunc
dsp->film_grain.construct_noise_image_overlap =
- ConstructNoiseImageOverlap_C<8, int8_t>;
+ ConstructNoiseImageOverlap_C<kBitdepth8, int8_t>;
// InitializeScalingLutFunc
- dsp->film_grain.initialize_scaling_lut = InitializeScalingLookupTable_C<0>;
+ dsp->film_grain.initialize_scaling_lut =
+ InitializeScalingLookupTable_C<kBitdepth8>;
// BlendNoiseWithImageLumaFunc
dsp->film_grain.blend_noise_luma =
- BlendNoiseWithImageLuma_C<8, int8_t, uint8_t>;
+ BlendNoiseWithImageLuma_C<kBitdepth8, int8_t, uint8_t>;
// BlendNoiseWithImageChromaFunc
dsp->film_grain.blend_noise_chroma[0] =
- BlendNoiseWithImageChroma_C<8, int8_t, uint8_t>;
+ BlendNoiseWithImageChroma_C<kBitdepth8, int8_t, uint8_t>;
dsp->film_grain.blend_noise_chroma[1] =
- BlendNoiseWithImageChromaWithCfl_C<8, int8_t, uint8_t>;
+ BlendNoiseWithImageChromaWithCfl_C<kBitdepth8, int8_t, uint8_t>;
#else // !LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS
static_cast<void>(dsp);
#ifndef LIBGAV1_Dsp8bpp_FilmGrainAutoregressionLuma
dsp->film_grain.luma_auto_regression[0] =
- ApplyAutoRegressiveFilterToLumaGrain_C<8, int8_t>;
+ ApplyAutoRegressiveFilterToLumaGrain_C<kBitdepth8, int8_t>;
dsp->film_grain.luma_auto_regression[1] =
- ApplyAutoRegressiveFilterToLumaGrain_C<8, int8_t>;
+ ApplyAutoRegressiveFilterToLumaGrain_C<kBitdepth8, int8_t>;
dsp->film_grain.luma_auto_regression[2] =
- ApplyAutoRegressiveFilterToLumaGrain_C<8, int8_t>;
+ ApplyAutoRegressiveFilterToLumaGrain_C<kBitdepth8, int8_t>;
#endif
#ifndef LIBGAV1_Dsp8bpp_FilmGrainAutoregressionChroma
// Chroma autoregression should never be called when lag is 0 and use_luma is
// false.
dsp->film_grain.chroma_auto_regression[0][0] = nullptr;
dsp->film_grain.chroma_auto_regression[0][1] =
- ApplyAutoRegressiveFilterToChromaGrains_C<8, int8_t, 1, false>;
+ ApplyAutoRegressiveFilterToChromaGrains_C<kBitdepth8, int8_t, 1, false>;
dsp->film_grain.chroma_auto_regression[0][2] =
- ApplyAutoRegressiveFilterToChromaGrains_C<8, int8_t, 2, false>;
+ ApplyAutoRegressiveFilterToChromaGrains_C<kBitdepth8, int8_t, 2, false>;
dsp->film_grain.chroma_auto_regression[0][3] =
- ApplyAutoRegressiveFilterToChromaGrains_C<8, int8_t, 3, false>;
+ ApplyAutoRegressiveFilterToChromaGrains_C<kBitdepth8, int8_t, 3, false>;
dsp->film_grain.chroma_auto_regression[1][0] =
- ApplyAutoRegressiveFilterToChromaGrains_C<8, int8_t, 0, true>;
+ ApplyAutoRegressiveFilterToChromaGrains_C<kBitdepth8, int8_t, 0, true>;
dsp->film_grain.chroma_auto_regression[1][1] =
- ApplyAutoRegressiveFilterToChromaGrains_C<8, int8_t, 1, true>;
+ ApplyAutoRegressiveFilterToChromaGrains_C<kBitdepth8, int8_t, 1, true>;
dsp->film_grain.chroma_auto_regression[1][2] =
- ApplyAutoRegressiveFilterToChromaGrains_C<8, int8_t, 2, true>;
+ ApplyAutoRegressiveFilterToChromaGrains_C<kBitdepth8, int8_t, 2, true>;
dsp->film_grain.chroma_auto_regression[1][3] =
- ApplyAutoRegressiveFilterToChromaGrains_C<8, int8_t, 3, true>;
+ ApplyAutoRegressiveFilterToChromaGrains_C<kBitdepth8, int8_t, 3, true>;
#endif
#ifndef LIBGAV1_Dsp8bpp_FilmGrainConstructNoiseStripes
dsp->film_grain.construct_noise_stripes[0] =
- ConstructNoiseStripes_C<8, int8_t>;
+ ConstructNoiseStripes_C<kBitdepth8, int8_t>;
dsp->film_grain.construct_noise_stripes[1] =
- ConstructNoiseStripesWithOverlap_C<8, int8_t>;
+ ConstructNoiseStripesWithOverlap_C<kBitdepth8, int8_t>;
#endif
#ifndef LIBGAV1_Dsp8bpp_FilmGrainConstructNoiseImageOverlap
dsp->film_grain.construct_noise_image_overlap =
- ConstructNoiseImageOverlap_C<8, int8_t>;
+ ConstructNoiseImageOverlap_C<kBitdepth8, int8_t>;
#endif
#ifndef LIBGAV1_Dsp8bpp_FilmGrainInitializeScalingLutFunc
- dsp->film_grain.initialize_scaling_lut = InitializeScalingLookupTable_C<0>;
+ dsp->film_grain.initialize_scaling_lut =
+ InitializeScalingLookupTable_C<kBitdepth8>;
#endif
#ifndef LIBGAV1_Dsp8bpp_FilmGrainBlendNoiseLuma
dsp->film_grain.blend_noise_luma =
- BlendNoiseWithImageLuma_C<8, int8_t, uint8_t>;
+ BlendNoiseWithImageLuma_C<kBitdepth8, int8_t, uint8_t>;
#endif
#ifndef LIBGAV1_Dsp8bpp_FilmGrainBlendNoiseChroma
dsp->film_grain.blend_noise_chroma[0] =
- BlendNoiseWithImageChroma_C<8, int8_t, uint8_t>;
+ BlendNoiseWithImageChroma_C<kBitdepth8, int8_t, uint8_t>;
#endif
#ifndef LIBGAV1_Dsp8bpp_FilmGrainBlendNoiseChromaWithCfl
dsp->film_grain.blend_noise_chroma[1] =
- BlendNoiseWithImageChromaWithCfl_C<8, int8_t, uint8_t>;
+ BlendNoiseWithImageChromaWithCfl_C<kBitdepth8, int8_t, uint8_t>;
#endif
#endif // LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS
}
@@ -751,106 +759,108 @@ void Init10bpp() {
// LumaAutoRegressionFunc
dsp->film_grain.luma_auto_regression[0] =
- ApplyAutoRegressiveFilterToLumaGrain_C<10, int16_t>;
+ ApplyAutoRegressiveFilterToLumaGrain_C<kBitdepth10, int16_t>;
dsp->film_grain.luma_auto_regression[1] =
- ApplyAutoRegressiveFilterToLumaGrain_C<10, int16_t>;
+ ApplyAutoRegressiveFilterToLumaGrain_C<kBitdepth10, int16_t>;
dsp->film_grain.luma_auto_regression[2] =
- ApplyAutoRegressiveFilterToLumaGrain_C<10, int16_t>;
+ ApplyAutoRegressiveFilterToLumaGrain_C<kBitdepth10, int16_t>;
// ChromaAutoRegressionFunc
// Chroma autoregression should never be called when lag is 0 and use_luma is
// false.
dsp->film_grain.chroma_auto_regression[0][0] = nullptr;
dsp->film_grain.chroma_auto_regression[0][1] =
- ApplyAutoRegressiveFilterToChromaGrains_C<10, int16_t, 1, false>;
+ ApplyAutoRegressiveFilterToChromaGrains_C<kBitdepth10, int16_t, 1, false>;
dsp->film_grain.chroma_auto_regression[0][2] =
- ApplyAutoRegressiveFilterToChromaGrains_C<10, int16_t, 2, false>;
+ ApplyAutoRegressiveFilterToChromaGrains_C<kBitdepth10, int16_t, 2, false>;
dsp->film_grain.chroma_auto_regression[0][3] =
- ApplyAutoRegressiveFilterToChromaGrains_C<10, int16_t, 3, false>;
+ ApplyAutoRegressiveFilterToChromaGrains_C<kBitdepth10, int16_t, 3, false>;
dsp->film_grain.chroma_auto_regression[1][0] =
- ApplyAutoRegressiveFilterToChromaGrains_C<10, int16_t, 0, true>;
+ ApplyAutoRegressiveFilterToChromaGrains_C<kBitdepth10, int16_t, 0, true>;
dsp->film_grain.chroma_auto_regression[1][1] =
- ApplyAutoRegressiveFilterToChromaGrains_C<10, int16_t, 1, true>;
+ ApplyAutoRegressiveFilterToChromaGrains_C<kBitdepth10, int16_t, 1, true>;
dsp->film_grain.chroma_auto_regression[1][2] =
- ApplyAutoRegressiveFilterToChromaGrains_C<10, int16_t, 2, true>;
+ ApplyAutoRegressiveFilterToChromaGrains_C<kBitdepth10, int16_t, 2, true>;
dsp->film_grain.chroma_auto_regression[1][3] =
- ApplyAutoRegressiveFilterToChromaGrains_C<10, int16_t, 3, true>;
+ ApplyAutoRegressiveFilterToChromaGrains_C<kBitdepth10, int16_t, 3, true>;
// ConstructNoiseStripesFunc
dsp->film_grain.construct_noise_stripes[0] =
- ConstructNoiseStripes_C<10, int16_t>;
+ ConstructNoiseStripes_C<kBitdepth10, int16_t>;
dsp->film_grain.construct_noise_stripes[1] =
- ConstructNoiseStripesWithOverlap_C<10, int16_t>;
+ ConstructNoiseStripesWithOverlap_C<kBitdepth10, int16_t>;
// ConstructNoiseImageOverlapFunc
dsp->film_grain.construct_noise_image_overlap =
- ConstructNoiseImageOverlap_C<10, int16_t>;
+ ConstructNoiseImageOverlap_C<kBitdepth10, int16_t>;
// InitializeScalingLutFunc
- dsp->film_grain.initialize_scaling_lut = InitializeScalingLookupTable_C<0>;
+ dsp->film_grain.initialize_scaling_lut =
+ InitializeScalingLookupTable_C<kBitdepth10>;
// BlendNoiseWithImageLumaFunc
dsp->film_grain.blend_noise_luma =
- BlendNoiseWithImageLuma_C<10, int16_t, uint16_t>;
+ BlendNoiseWithImageLuma_C<kBitdepth10, int16_t, uint16_t>;
// BlendNoiseWithImageChromaFunc
dsp->film_grain.blend_noise_chroma[0] =
- BlendNoiseWithImageChroma_C<10, int16_t, uint16_t>;
+ BlendNoiseWithImageChroma_C<kBitdepth10, int16_t, uint16_t>;
dsp->film_grain.blend_noise_chroma[1] =
- BlendNoiseWithImageChromaWithCfl_C<10, int16_t, uint16_t>;
+ BlendNoiseWithImageChromaWithCfl_C<kBitdepth10, int16_t, uint16_t>;
#else // !LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS
static_cast<void>(dsp);
#ifndef LIBGAV1_Dsp10bpp_FilmGrainAutoregressionLuma
dsp->film_grain.luma_auto_regression[0] =
- ApplyAutoRegressiveFilterToLumaGrain_C<10, int16_t>;
+ ApplyAutoRegressiveFilterToLumaGrain_C<kBitdepth10, int16_t>;
dsp->film_grain.luma_auto_regression[1] =
- ApplyAutoRegressiveFilterToLumaGrain_C<10, int16_t>;
+ ApplyAutoRegressiveFilterToLumaGrain_C<kBitdepth10, int16_t>;
dsp->film_grain.luma_auto_regression[2] =
- ApplyAutoRegressiveFilterToLumaGrain_C<10, int16_t>;
+ ApplyAutoRegressiveFilterToLumaGrain_C<kBitdepth10, int16_t>;
#endif
#ifndef LIBGAV1_Dsp10bpp_FilmGrainAutoregressionChroma
// Chroma autoregression should never be called when lag is 0 and use_luma is
// false.
dsp->film_grain.chroma_auto_regression[0][0] = nullptr;
dsp->film_grain.chroma_auto_regression[0][1] =
- ApplyAutoRegressiveFilterToChromaGrains_C<10, int16_t, 1, false>;
+ ApplyAutoRegressiveFilterToChromaGrains_C<kBitdepth10, int16_t, 1, false>;
dsp->film_grain.chroma_auto_regression[0][2] =
- ApplyAutoRegressiveFilterToChromaGrains_C<10, int16_t, 2, false>;
+ ApplyAutoRegressiveFilterToChromaGrains_C<kBitdepth10, int16_t, 2, false>;
dsp->film_grain.chroma_auto_regression[0][3] =
- ApplyAutoRegressiveFilterToChromaGrains_C<10, int16_t, 3, false>;
+ ApplyAutoRegressiveFilterToChromaGrains_C<kBitdepth10, int16_t, 3, false>;
dsp->film_grain.chroma_auto_regression[1][0] =
- ApplyAutoRegressiveFilterToChromaGrains_C<10, int16_t, 0, true>;
+ ApplyAutoRegressiveFilterToChromaGrains_C<kBitdepth10, int16_t, 0, true>;
dsp->film_grain.chroma_auto_regression[1][1] =
- ApplyAutoRegressiveFilterToChromaGrains_C<10, int16_t, 1, true>;
+ ApplyAutoRegressiveFilterToChromaGrains_C<kBitdepth10, int16_t, 1, true>;
dsp->film_grain.chroma_auto_regression[1][2] =
- ApplyAutoRegressiveFilterToChromaGrains_C<10, int16_t, 2, true>;
+ ApplyAutoRegressiveFilterToChromaGrains_C<kBitdepth10, int16_t, 2, true>;
dsp->film_grain.chroma_auto_regression[1][3] =
- ApplyAutoRegressiveFilterToChromaGrains_C<10, int16_t, 3, true>;
+ ApplyAutoRegressiveFilterToChromaGrains_C<kBitdepth10, int16_t, 3, true>;
#endif
#ifndef LIBGAV1_Dsp10bpp_FilmGrainConstructNoiseStripes
dsp->film_grain.construct_noise_stripes[0] =
- ConstructNoiseStripes_C<10, int16_t>;
+ ConstructNoiseStripes_C<kBitdepth10, int16_t>;
dsp->film_grain.construct_noise_stripes[1] =
- ConstructNoiseStripesWithOverlap_C<10, int16_t>;
+ ConstructNoiseStripesWithOverlap_C<kBitdepth10, int16_t>;
#endif
#ifndef LIBGAV1_Dsp10bpp_FilmGrainConstructNoiseImageOverlap
dsp->film_grain.construct_noise_image_overlap =
- ConstructNoiseImageOverlap_C<10, int16_t>;
+ ConstructNoiseImageOverlap_C<kBitdepth10, int16_t>;
#endif
#ifndef LIBGAV1_Dsp10bpp_FilmGrainInitializeScalingLutFunc
- dsp->film_grain.initialize_scaling_lut = InitializeScalingLookupTable_C<0>;
+ dsp->film_grain.initialize_scaling_lut =
+ InitializeScalingLookupTable_C<kBitdepth10>;
#endif
#ifndef LIBGAV1_Dsp10bpp_FilmGrainBlendNoiseLuma
dsp->film_grain.blend_noise_luma =
- BlendNoiseWithImageLuma_C<10, int16_t, uint16_t>;
+ BlendNoiseWithImageLuma_C<kBitdepth10, int16_t, uint16_t>;
#endif
#ifndef LIBGAV1_Dsp10bpp_FilmGrainBlendNoiseChroma
dsp->film_grain.blend_noise_chroma[0] =
- BlendNoiseWithImageChroma_C<10, int16_t, uint16_t>;
+ BlendNoiseWithImageChroma_C<kBitdepth10, int16_t, uint16_t>;
#endif
#ifndef LIBGAV1_Dsp10bpp_FilmGrainBlendNoiseChromaWithCfl
dsp->film_grain.blend_noise_chroma[1] =
- BlendNoiseWithImageChromaWithCfl_C<10, int16_t, uint16_t>;
+ BlendNoiseWithImageChromaWithCfl_C<kBitdepth10, int16_t, uint16_t>;
#endif
#endif // LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS
}
diff --git a/src/dsp/film_grain_common.h b/src/dsp/film_grain_common.h
index 64e3e8e..2e6ad45 100644
--- a/src/dsp/film_grain_common.h
+++ b/src/dsp/film_grain_common.h
@@ -59,15 +59,16 @@ enum {
// The two possible heights of the chroma noise array.
kMinChromaHeight = 38,
kMaxChromaHeight = 73,
- // The scaling lookup table maps bytes to bytes, so only uses 256 elements,
- // plus one for overflow in 10bit lookups.
+ // The standard scaling lookup table maps bytes to bytes, so only uses 256
+ // elements, plus one for overflow in 12bpp lookups. The size is scaled up for
+ // 10bpp.
kScalingLookupTableSize = 257,
// Padding is added to the scaling lookup table to permit overwrites by
// InitializeScalingLookupTable_NEON.
kScalingLookupTablePadding = 6,
// Padding is added to each row of the noise image to permit overreads by
// BlendNoiseWithImageLuma_NEON and overwrites by WriteOverlapLine8bpp_NEON.
- kNoiseImagePadding = 7,
+ kNoiseImagePadding = 15,
// Padding is added to the end of the |noise_stripes_| buffer to permit
// overreads by WriteOverlapLine8bpp_NEON.
kNoiseStripePadding = 7,
diff --git a/src/dsp/intra_edge_test.cc b/src/dsp/intra_edge_test.cc
index 90960c6..aca6f9e 100644
--- a/src/dsp/intra_edge_test.cc
+++ b/src/dsp/intra_edge_test.cc
@@ -24,6 +24,7 @@
#include "absl/time/time.h"
#include "gtest/gtest.h"
#include "src/dsp/dsp.h"
+#include "src/utils/compiler_attributes.h"
#include "src/utils/constants.h"
#include "src/utils/cpu.h"
#include "tests/third_party/libvpx/acm_random.h"
@@ -37,6 +38,7 @@ const char kIntraEdge[] = "IntraEdge";
const char kIntraEdgeFilterName[] = "Intra Edge Filter";
const char kIntraEdgeUpsamplerName[] = "Intra Edge Upsampler";
+constexpr int kIntraEdgeBufferSize = 144; // see Tile::IntraPrediction.
constexpr int kIntraEdgeFilterTestMaxSize = 129;
constexpr int kIntraEdgeFilterTestFixedInput[kIntraEdgeFilterTestMaxSize] = {
159, 208, 54, 136, 205, 124, 125, 165, 164, 63, 171, 143, 210, 236, 253,
@@ -104,14 +106,19 @@ class IntraEdgeFilterTest : public testing::TestWithParam<EdgeFilterParams> {
<< test_case;
}
+#if LIBGAV1_MSAN
+ // Match the behavior of Tile::IntraPrediction to prevent warnings due to
+ // assembly code (safely) overreading to fill a register.
+ memset(buffer_, 0, sizeof(buffer_));
+#endif // LIBGAV1_MSAN
cur_intra_edge_filter_ = dsp->intra_edge_filter;
}
void TestFixedValues(const char* digest);
void TestRandomValues(int num_runs);
- Pixel buffer_[kIntraEdgeFilterTestMaxSize];
- Pixel base_buffer_[kIntraEdgeFilterTestMaxSize];
+ Pixel buffer_[kIntraEdgeBufferSize];
+ Pixel base_buffer_[kIntraEdgeBufferSize];
int strength_ = GetParam().strength;
int size_ = GetParam().size;
@@ -141,9 +148,11 @@ void IntraEdgeFilterTest<bitdepth, Pixel>::TestRandomValues(int num_runs) {
libvpx_test::ACMRandom rnd(libvpx_test::ACMRandom::DeterministicSeed());
absl::Duration elapsed_time;
absl::Duration base_elapsed_time;
+ memset(base_buffer_, 0, sizeof(base_buffer_));
+ memset(buffer_, 0, sizeof(buffer_));
for (int num_tests = 0; num_tests < num_runs; ++num_tests) {
- for (int i = 0; i < kIntraEdgeFilterTestMaxSize; ++i) {
- const Pixel val = rnd(bitdepth);
+ for (int i = 0; i < size_; ++i) {
+ const Pixel val = rnd(1 << bitdepth);
buffer_[i] = val;
base_buffer_[i] = val;
}
@@ -236,7 +245,7 @@ TEST_P(IntraEdgeFilterTest8bpp, Correctness) {
TestRandomValues(1);
}
-TEST_P(IntraEdgeFilterTest8bpp, DISABLED_Speed) { TestRandomValues(5e7); }
+TEST_P(IntraEdgeFilterTest8bpp, DISABLED_Speed) { TestRandomValues(1e7); }
#if LIBGAV1_MAX_BITDEPTH >= 10
using IntraEdgeFilterTest10bpp = IntraEdgeFilterTest<10, uint16_t>;
@@ -305,7 +314,7 @@ TEST_P(IntraEdgeFilterTest10bpp, FixedInput) {
TestRandomValues(1);
}
-TEST_P(IntraEdgeFilterTest10bpp, DISABLED_Speed) { TestRandomValues(5e7); }
+TEST_P(IntraEdgeFilterTest10bpp, DISABLED_Speed) { TestRandomValues(1e7); }
#endif
template <int bitdepth, typename Pixel>
@@ -340,6 +349,11 @@ class IntraEdgeUpsamplerTest : public testing::TestWithParam<int> {
<< test_case;
}
cur_intra_edge_upsampler_ = dsp->intra_edge_upsampler;
+#if LIBGAV1_MSAN
+ // Match the behavior of Tile::IntraPrediction to prevent warnings due to
+ // assembly code (safely) overreading to fill a register.
+ memset(buffer_, 0, sizeof(buffer_));
+#endif
}
void TestFixedValues(const char* digest);
@@ -382,7 +396,7 @@ void IntraEdgeUpsamplerTest<bitdepth, Pixel>::TestRandomValues(int num_runs) {
buffer_[0] = 0;
base_buffer_[0] = 0;
for (int i = 1; i < size_ + 2; ++i) {
- const Pixel val = rnd(bitdepth);
+ const Pixel val = rnd(1 << bitdepth);
buffer_[i] = val;
base_buffer_[i] = val;
}
diff --git a/src/dsp/intrapred.cc b/src/dsp/intrapred.cc
index 4520c2c..75af279 100644
--- a/src/dsp/intrapred.cc
+++ b/src/dsp/intrapred.cc
@@ -63,8 +63,8 @@ struct IntraPredBppFuncs_C {
template <int block_width, int block_height, typename Pixel>
void IntraPredFuncs_C<block_width, block_height, Pixel>::DcTop(
- void* const dest, ptrdiff_t stride, const void* const top_row,
- const void* /*left_column*/) {
+ void* LIBGAV1_RESTRICT const dest, ptrdiff_t stride,
+ const void* LIBGAV1_RESTRICT const top_row, const void* /*left_column*/) {
int sum = block_width >> 1; // rounder
const auto* const top = static_cast<const Pixel*>(top_row);
for (int x = 0; x < block_width; ++x) sum += top[x];
@@ -80,8 +80,8 @@ void IntraPredFuncs_C<block_width, block_height, Pixel>::DcTop(
template <int block_width, int block_height, typename Pixel>
void IntraPredFuncs_C<block_width, block_height, Pixel>::DcLeft(
- void* const dest, ptrdiff_t stride, const void* /*top_row*/,
- const void* const left_column) {
+ void* LIBGAV1_RESTRICT const dest, ptrdiff_t stride,
+ const void* /*top_row*/, const void* LIBGAV1_RESTRICT const left_column) {
int sum = block_height >> 1; // rounder
const auto* const left = static_cast<const Pixel*>(left_column);
for (int y = 0; y < block_height; ++y) sum += left[y];
@@ -132,8 +132,9 @@ void IntraPredFuncs_C<block_width, block_height, Pixel>::DcLeft(
template <int block_width, int block_height, typename Pixel>
void IntraPredFuncs_C<block_width, block_height, Pixel>::Dc(
- void* const dest, ptrdiff_t stride, const void* const top_row,
- const void* const left_column) {
+ void* LIBGAV1_RESTRICT const dest, ptrdiff_t stride,
+ const void* LIBGAV1_RESTRICT const top_row,
+ const void* LIBGAV1_RESTRICT const left_column) {
const int divisor = block_width + block_height;
int sum = divisor >> 1; // rounder
@@ -158,8 +159,8 @@ void IntraPredFuncs_C<block_width, block_height, Pixel>::Dc(
// IntraPredFuncs_C::Vertical -- apply top row vertically
template <int block_width, int block_height, typename Pixel>
void IntraPredFuncs_C<block_width, block_height, Pixel>::Vertical(
- void* const dest, ptrdiff_t stride, const void* const top_row,
- const void* /*left_column*/) {
+ void* LIBGAV1_RESTRICT const dest, ptrdiff_t stride,
+ const void* LIBGAV1_RESTRICT const top_row, const void* /*left_column*/) {
auto* dst = static_cast<uint8_t*>(dest);
for (int y = 0; y < block_height; ++y) {
memcpy(dst, top_row, block_width * sizeof(Pixel));
@@ -170,8 +171,8 @@ void IntraPredFuncs_C<block_width, block_height, Pixel>::Vertical(
// IntraPredFuncs_C::Horizontal -- apply left column horizontally
template <int block_width, int block_height, typename Pixel>
void IntraPredFuncs_C<block_width, block_height, Pixel>::Horizontal(
- void* const dest, ptrdiff_t stride, const void* /*top_row*/,
- const void* const left_column) {
+ void* LIBGAV1_RESTRICT const dest, ptrdiff_t stride,
+ const void* /*top_row*/, const void* LIBGAV1_RESTRICT const left_column) {
const auto* const left = static_cast<const Pixel*>(left_column);
auto* dst = static_cast<Pixel*>(dest);
stride /= sizeof(Pixel);
@@ -184,8 +185,9 @@ void IntraPredFuncs_C<block_width, block_height, Pixel>::Horizontal(
// IntraPredFuncs_C::Paeth
template <int block_width, int block_height, typename Pixel>
void IntraPredFuncs_C<block_width, block_height, Pixel>::Paeth(
- void* const dest, ptrdiff_t stride, const void* const top_row,
- const void* const left_column) {
+ void* LIBGAV1_RESTRICT const dest, ptrdiff_t stride,
+ const void* LIBGAV1_RESTRICT const top_row,
+ const void* LIBGAV1_RESTRICT const left_column) {
const auto* const top = static_cast<const Pixel*>(top_row);
const auto* const left = static_cast<const Pixel*>(left_column);
const Pixel top_left = top[-1];
diff --git a/src/dsp/intrapred_cfl.cc b/src/dsp/intrapred_cfl.cc
index 948c0c0..0f7f4f2 100644
--- a/src/dsp/intrapred_cfl.cc
+++ b/src/dsp/intrapred_cfl.cc
@@ -41,7 +41,7 @@ constexpr TransformSize kTransformSizesLargerThan32x32[] = {
// |alpha| can be -16 to 16 (inclusive).
template <int block_width, int block_height, int bitdepth, typename Pixel>
void CflIntraPredictor_C(
- void* const dest, ptrdiff_t stride,
+ void* LIBGAV1_RESTRICT const dest, ptrdiff_t stride,
const int16_t luma[kCflLumaBufferStride][kCflLumaBufferStride],
const int alpha) {
auto* dst = static_cast<Pixel*>(dest);
@@ -66,7 +66,8 @@ template <int block_width, int block_height, int bitdepth, typename Pixel,
int subsampling_x, int subsampling_y>
void CflSubsampler_C(int16_t luma[kCflLumaBufferStride][kCflLumaBufferStride],
const int max_luma_width, const int max_luma_height,
- const void* const source, ptrdiff_t stride) {
+ const void* LIBGAV1_RESTRICT const source,
+ ptrdiff_t stride) {
assert(max_luma_width >= 4);
assert(max_luma_height >= 4);
const auto* src = static_cast<const Pixel*>(source);
diff --git a/src/dsp/intrapred_cfl_test.cc b/src/dsp/intrapred_cfl_test.cc
index e700a5b..82f1d2f 100644
--- a/src/dsp/intrapred_cfl_test.cc
+++ b/src/dsp/intrapred_cfl_test.cc
@@ -28,6 +28,7 @@
#include "src/dsp/constants.h"
#include "src/dsp/dsp.h"
#include "src/utils/common.h"
+#include "src/utils/compiler_attributes.h"
#include "src/utils/constants.h"
#include "src/utils/cpu.h"
#include "src/utils/memory.h"
@@ -352,6 +353,10 @@ void CflSubsamplerTest<bitdepth, Pixel, subsampling_type>::TestSpeed(
const int width = GetLumaWidth(block_width_, subsampling_type);
const int height = GetLumaHeight(block_height_, subsampling_type);
Pixel* src = intra_pred_mem_.ref_src;
+#if LIBGAV1_MSAN
+ // Quiet 10bpp CflSubsampler420_NEON() msan warning.
+ memset(src, 0, sizeof(intra_pred_mem_.ref_src));
+#endif
for (int i = 0; i < height; ++i) {
for (int j = 0; j < width; ++j) {
src[j] = rnd.RandRange(1 << bitdepth);
diff --git a/src/dsp/intrapred_directional.cc b/src/dsp/intrapred_directional.cc
index e670769..21a40b5 100644
--- a/src/dsp/intrapred_directional.cc
+++ b/src/dsp/intrapred_directional.cc
@@ -33,11 +33,10 @@ namespace {
// 7.11.2.4. Directional intra prediction process
template <typename Pixel>
-void DirectionalIntraPredictorZone1_C(void* const dest, ptrdiff_t stride,
- const void* const top_row,
- const int width, const int height,
- const int xstep,
- const bool upsampled_top) {
+void DirectionalIntraPredictorZone1_C(
+ void* LIBGAV1_RESTRICT const dest, ptrdiff_t stride,
+ const void* LIBGAV1_RESTRICT const top_row, const int width,
+ const int height, const int xstep, const bool upsampled_top) {
const auto* const top = static_cast<const Pixel*>(top_row);
auto* dst = static_cast<Pixel*>(dest);
stride /= sizeof(Pixel);
@@ -96,13 +95,12 @@ void DirectionalIntraPredictorZone1_C(void* const dest, ptrdiff_t stride,
}
template <typename Pixel>
-void DirectionalIntraPredictorZone2_C(void* const dest, ptrdiff_t stride,
- const void* const top_row,
- const void* const left_column,
- const int width, const int height,
- const int xstep, const int ystep,
- const bool upsampled_top,
- const bool upsampled_left) {
+void DirectionalIntraPredictorZone2_C(
+ void* LIBGAV1_RESTRICT const dest, ptrdiff_t stride,
+ const void* LIBGAV1_RESTRICT const top_row,
+ const void* LIBGAV1_RESTRICT const left_column, const int width,
+ const int height, const int xstep, const int ystep,
+ const bool upsampled_top, const bool upsampled_left) {
const auto* const top = static_cast<const Pixel*>(top_row);
const auto* const left = static_cast<const Pixel*>(left_column);
auto* dst = static_cast<Pixel*>(dest);
@@ -146,11 +144,10 @@ void DirectionalIntraPredictorZone2_C(void* const dest, ptrdiff_t stride,
}
template <typename Pixel>
-void DirectionalIntraPredictorZone3_C(void* const dest, ptrdiff_t stride,
- const void* const left_column,
- const int width, const int height,
- const int ystep,
- const bool upsampled_left) {
+void DirectionalIntraPredictorZone3_C(
+ void* LIBGAV1_RESTRICT const dest, ptrdiff_t stride,
+ const void* LIBGAV1_RESTRICT const left_column, const int width,
+ const int height, const int ystep, const bool upsampled_left) {
const auto* const left = static_cast<const Pixel*>(left_column);
stride /= sizeof(Pixel);
diff --git a/src/dsp/intrapred_directional_test.cc b/src/dsp/intrapred_directional_test.cc
index ebf9da0..9e98242 100644
--- a/src/dsp/intrapred_directional_test.cc
+++ b/src/dsp/intrapred_directional_test.cc
@@ -28,6 +28,7 @@
#include "src/dsp/constants.h"
#include "src/dsp/dsp.h"
#include "src/utils/common.h"
+#include "src/utils/compiler_attributes.h"
#include "src/utils/constants.h"
#include "src/utils/cpu.h"
#include "src/utils/memory.h"
@@ -79,6 +80,12 @@ class IntraPredTestBase : public testing::TestWithParam<TransformSize>,
struct IntraPredMem {
void Reset(libvpx_test::ACMRandom* rnd) {
ASSERT_NE(rnd, nullptr);
+#if LIBGAV1_MSAN
+ // Match the behavior of Tile::IntraPrediction to prevent warnings due to
+ // assembly code (safely) overreading to fill a register.
+ memset(left_mem, 0, sizeof(left_mem));
+ memset(top_mem, 0, sizeof(top_mem));
+#endif // LIBGAV1_MSAN
Pixel* const left = left_mem + 16;
Pixel* const top = top_mem + 16;
const int mask = (1 << bitdepth) - 1;
@@ -105,6 +112,12 @@ class IntraPredTestBase : public testing::TestWithParam<TransformSize>,
// Set ref_src, top-left, top and left to |pixel|.
void Set(const Pixel pixel) {
+#if LIBGAV1_MSAN
+ // Match the behavior of Tile::IntraPrediction to prevent warnings due to
+ // assembly code (safely) overreading to fill a register.
+ memset(left_mem, 0, sizeof(left_mem));
+ memset(top_mem, 0, sizeof(top_mem));
+#endif // LIBGAV1_MSAN
Pixel* const left = left_mem + 16;
Pixel* const top = top_mem + 16;
for (auto& r : ref_src) r = pixel;
@@ -702,7 +715,11 @@ const char* const* GetDirectionalIntraPredDigests8bpp(TransformSize tx_size) {
}
TEST_P(DirectionalIntraPredTest8bpp, DISABLED_Speed) {
- const auto num_runs = static_cast<int>(5e7 / (block_width_ * block_height_));
+#if LIBGAV1_ENABLE_NEON
+ const auto num_runs = static_cast<int>(2e7 / (block_width_ * block_height_));
+#else
+ const int num_runs = static_cast<int>(4e7 / (block_width_ * block_height_));
+#endif
for (int i = kZone1; i < kNumZones; ++i) {
TestSpeed(GetDirectionalIntraPredDigests8bpp(tx_size_),
static_cast<Zone>(i), num_runs);
@@ -867,7 +884,11 @@ const char* const* GetDirectionalIntraPredDigests10bpp(TransformSize tx_size) {
}
TEST_P(DirectionalIntraPredTest10bpp, DISABLED_Speed) {
- const auto num_runs = static_cast<int>(5e7 / (block_width_ * block_height_));
+#if LIBGAV1_ENABLE_NEON
+ const int num_runs = static_cast<int>(2e7 / (block_width_ * block_height_));
+#else
+ const int num_runs = static_cast<int>(4e7 / (block_width_ * block_height_));
+#endif
for (int i = kZone1; i < kNumZones; ++i) {
TestSpeed(GetDirectionalIntraPredDigests10bpp(tx_size_),
static_cast<Zone>(i), num_runs);
@@ -882,6 +903,7 @@ TEST_P(DirectionalIntraPredTest10bpp, FixedInput) {
}
TEST_P(DirectionalIntraPredTest10bpp, Overflow) { TestSaturatedValues(); }
+TEST_P(DirectionalIntraPredTest10bpp, Random) { TestRandomValues(); }
#endif // LIBGAV1_MAX_BITDEPTH >= 10
diff --git a/src/dsp/intrapred_filter.cc b/src/dsp/intrapred_filter.cc
index f4bd296..9a45eff 100644
--- a/src/dsp/intrapred_filter.cc
+++ b/src/dsp/intrapred_filter.cc
@@ -40,9 +40,9 @@ namespace {
// adjacent to the |top_row| or |left_column|. The set of 8 filters is selected
// according to |pred|.
template <int bitdepth, typename Pixel>
-void FilterIntraPredictor_C(void* const dest, ptrdiff_t stride,
- const void* const top_row,
- const void* const left_column,
+void FilterIntraPredictor_C(void* LIBGAV1_RESTRICT const dest, ptrdiff_t stride,
+ const void* LIBGAV1_RESTRICT const top_row,
+ const void* LIBGAV1_RESTRICT const left_column,
const FilterIntraPredictor pred, const int width,
const int height) {
const int kMaxPixel = (1 << bitdepth) - 1;
diff --git a/src/dsp/intrapred_filter_test.cc b/src/dsp/intrapred_filter_test.cc
index c420f0a..fe1efdc 100644
--- a/src/dsp/intrapred_filter_test.cc
+++ b/src/dsp/intrapred_filter_test.cc
@@ -542,6 +542,11 @@ INSTANTIATE_TEST_SUITE_P(NEON, FilterIntraPredTest8bpp,
#if LIBGAV1_MAX_BITDEPTH >= 10
INSTANTIATE_TEST_SUITE_P(C, FilterIntraPredTest10bpp,
testing::ValuesIn(kTransformSizesSmallerThan32x32));
+
+#if LIBGAV1_ENABLE_NEON
+INSTANTIATE_TEST_SUITE_P(NEON, FilterIntraPredTest10bpp,
+ testing::ValuesIn(kTransformSizesSmallerThan32x32));
+#endif // LIBGAV1_ENABLE_NEON
#endif // LIBGAV1_MAX_BITDEPTH >= 10
} // namespace
diff --git a/src/dsp/intrapred_smooth.cc b/src/dsp/intrapred_smooth.cc
index 83c005e..0c7f272 100644
--- a/src/dsp/intrapred_smooth.cc
+++ b/src/dsp/intrapred_smooth.cc
@@ -42,26 +42,15 @@ struct SmoothFuncs_C {
};
constexpr uint8_t kSmoothWeights[] = {
- // block dimension = 4
- 255, 149, 85, 64,
- // block dimension = 8
- 255, 197, 146, 105, 73, 50, 37, 32,
- // block dimension = 16
- 255, 225, 196, 170, 145, 123, 102, 84, 68, 54, 43, 33, 26, 20, 17, 16,
- // block dimension = 32
- 255, 240, 225, 210, 196, 182, 169, 157, 145, 133, 122, 111, 101, 92, 83, 74,
- 66, 59, 52, 45, 39, 34, 29, 25, 21, 17, 14, 12, 10, 9, 8, 8,
- // block dimension = 64
- 255, 248, 240, 233, 225, 218, 210, 203, 196, 189, 182, 176, 169, 163, 156,
- 150, 144, 138, 133, 127, 121, 116, 111, 106, 101, 96, 91, 86, 82, 77, 73,
- 69, 65, 61, 57, 54, 50, 47, 44, 41, 38, 35, 32, 29, 27, 25, 22, 20, 18, 16,
- 15, 13, 12, 10, 9, 8, 7, 6, 6, 5, 5, 4, 4, 4};
+#include "src/dsp/smooth_weights.inc"
+};
// SmoothFuncs_C::Smooth
template <int block_width, int block_height, typename Pixel>
void SmoothFuncs_C<block_width, block_height, Pixel>::Smooth(
- void* const dest, ptrdiff_t stride, const void* const top_row,
- const void* const left_column) {
+ void* LIBGAV1_RESTRICT const dest, ptrdiff_t stride,
+ const void* LIBGAV1_RESTRICT const top_row,
+ const void* LIBGAV1_RESTRICT const left_column) {
const auto* const top = static_cast<const Pixel*>(top_row);
const auto* const left = static_cast<const Pixel*>(left_column);
const Pixel top_right = top[block_width - 1];
@@ -94,8 +83,9 @@ void SmoothFuncs_C<block_width, block_height, Pixel>::Smooth(
// SmoothFuncs_C::SmoothVertical
template <int block_width, int block_height, typename Pixel>
void SmoothFuncs_C<block_width, block_height, Pixel>::SmoothVertical(
- void* const dest, ptrdiff_t stride, const void* const top_row,
- const void* const left_column) {
+ void* LIBGAV1_RESTRICT const dest, ptrdiff_t stride,
+ const void* LIBGAV1_RESTRICT const top_row,
+ const void* LIBGAV1_RESTRICT const left_column) {
const auto* const top = static_cast<const Pixel*>(top_row);
const auto* const left = static_cast<const Pixel*>(left_column);
const Pixel bottom_left = left[block_height - 1];
@@ -121,8 +111,9 @@ void SmoothFuncs_C<block_width, block_height, Pixel>::SmoothVertical(
// SmoothFuncs_C::SmoothHorizontal
template <int block_width, int block_height, typename Pixel>
void SmoothFuncs_C<block_width, block_height, Pixel>::SmoothHorizontal(
- void* const dest, ptrdiff_t stride, const void* const top_row,
- const void* const left_column) {
+ void* LIBGAV1_RESTRICT const dest, ptrdiff_t stride,
+ const void* LIBGAV1_RESTRICT const top_row,
+ const void* LIBGAV1_RESTRICT const left_column) {
const auto* const top = static_cast<const Pixel*>(top_row);
const auto* const left = static_cast<const Pixel*>(left_column);
const Pixel top_right = top[block_width - 1];
diff --git a/src/dsp/inverse_transform.cc b/src/dsp/inverse_transform.cc
index ed984d8..1b0064f 100644
--- a/src/dsp/inverse_transform.cc
+++ b/src/dsp/inverse_transform.cc
@@ -42,8 +42,8 @@ int32_t RangeCheckValue(int32_t value, int8_t range) {
#if defined(LIBGAV1_ENABLE_TRANSFORM_RANGE_CHECK) && \
LIBGAV1_ENABLE_TRANSFORM_RANGE_CHECK
assert(range <= 32);
- const int32_t min = -(1 << (range - 1));
- const int32_t max = (1 << (range - 1)) - 1;
+ const auto min = static_cast<int32_t>(-(uint32_t{1} << (range - 1)));
+ const auto max = static_cast<int32_t>((uint32_t{1} << (range - 1)) - 1);
if (min > value || value > max) {
LIBGAV1_DLOG(ERROR, "coeff out of bit range, value: %d bit range %d\n",
value, range);
@@ -140,7 +140,7 @@ void ClampIntermediate(Residual* const dst, int size) {
// For e.g. index (2, 3) will be computed as follows:
// * bitreverse(3) = bitreverse(..000011) = 110000...
// * interpreting that as an integer with bit-length 2+2 = 4 will be 1100 = 12
-constexpr uint8_t kBitReverseLookup[kNum1DTransformSizes][64] = {
+constexpr uint8_t kBitReverseLookup[kNumTransform1dSizes][64] = {
{0, 2, 1, 3, 0, 2, 1, 3, 0, 2, 1, 3, 0, 2, 1, 3, 0, 2, 1, 3, 0, 2,
1, 3, 0, 2, 1, 3, 0, 2, 1, 3, 0, 2, 1, 3, 0, 2, 1, 3, 0, 2, 1, 3,
0, 2, 1, 3, 0, 2, 1, 3, 0, 2, 1, 3, 0, 2, 1, 3, 0, 2, 1, 3},
@@ -532,8 +532,8 @@ void Adst4DcOnly_C(void* dest, int8_t range, bool should_round, int row_shift,
}
template <typename Residual>
-void AdstInputPermutation(int32_t* const dst, const Residual* const src,
- int n) {
+void AdstInputPermutation(int32_t* LIBGAV1_RESTRICT const dst,
+ const Residual* LIBGAV1_RESTRICT const src, int n) {
assert(n == 8 || n == 16);
for (int i = 0; i < n; ++i) {
dst[i] = src[((i & 1) == 0) ? n - i - 1 : i - 1];
@@ -544,8 +544,8 @@ constexpr int8_t kAdstOutputPermutationLookup[16] = {
0, 8, 12, 4, 6, 14, 10, 2, 3, 11, 15, 7, 5, 13, 9, 1};
template <typename Residual>
-void AdstOutputPermutation(Residual* const dst, const int32_t* const src,
- int n) {
+void AdstOutputPermutation(Residual* LIBGAV1_RESTRICT const dst,
+ const int32_t* LIBGAV1_RESTRICT const src, int n) {
assert(n == 8 || n == 16);
const auto shift = static_cast<int8_t>(n == 8);
for (int i = 0; i < n; ++i) {
@@ -1096,20 +1096,21 @@ void Wht4DcOnly_C(void* dest, int8_t range, bool /*should_round*/,
//------------------------------------------------------------------------------
// row/column transform loop
-using InverseTransform1DFunc = void (*)(void* dst, int8_t range);
+using InverseTransform1dFunc = void (*)(void* dst, int8_t range);
using InverseTransformDcOnlyFunc = void (*)(void* dest, int8_t range,
bool should_round, int row_shift,
bool is_row);
template <int bitdepth, typename Residual, typename Pixel,
- Transform1D transform1d_type,
+ Transform1d transform1d_type,
InverseTransformDcOnlyFunc dconly_transform1d,
- InverseTransform1DFunc transform1d_func, bool is_row>
+ InverseTransform1dFunc transform1d_func, bool is_row>
void TransformLoop_C(TransformType tx_type, TransformSize tx_size,
- int adjusted_tx_height, void* src_buffer, int start_x,
- int start_y, void* dst_frame) {
- constexpr bool lossless = transform1d_type == k1DTransformWht;
- constexpr bool is_identity = transform1d_type == k1DTransformIdentity;
+ int adjusted_tx_height, void* LIBGAV1_RESTRICT src_buffer,
+ int start_x, int start_y,
+ void* LIBGAV1_RESTRICT dst_frame) {
+ constexpr bool lossless = transform1d_type == kTransform1dWht;
+ constexpr bool is_identity = transform1d_type == kTransform1dIdentity;
// The transform size of the WHT is always 4x4. Setting tx_width and
// tx_height to the constant 4 for the WHT speeds the code up.
assert(!lossless || tx_size == kTransformSize4x4);
@@ -1127,7 +1128,7 @@ void TransformLoop_C(TransformType tx_type, TransformSize tx_size,
if (is_row) {
// Row transform.
const uint8_t row_shift = lossless ? 0 : kTransformRowShift[tx_size];
- // This is the |range| parameter of the InverseTransform1DFunc. For lossy
+ // This is the |range| parameter of the InverseTransform1dFunc. For lossy
// transforms, this will be equal to the clamping range.
const int8_t row_clamp_range = lossless ? 2 : (bitdepth + 8);
// If the width:height ratio of the transform size is 2:1 or 1:2, multiply
@@ -1170,10 +1171,10 @@ void TransformLoop_C(TransformType tx_type, TransformSize tx_size,
assert(!is_row);
constexpr uint8_t column_shift = lossless ? 0 : kTransformColumnShift;
- // This is the |range| parameter of the InverseTransform1DFunc. For lossy
+ // This is the |range| parameter of the InverseTransform1dFunc. For lossy
// transforms, this will be equal to the clamping range.
const int8_t column_clamp_range = lossless ? 0 : std::max(bitdepth + 6, 16);
- const bool flip_rows = transform1d_type == k1DTransformAdst &&
+ const bool flip_rows = transform1d_type == kTransform1dAdst &&
kTransformFlipRowsMask.Contains(tx_type);
const bool flip_columns =
!lossless && kTransformFlipColumnsMask.Contains(tx_type);
@@ -1216,114 +1217,114 @@ void TransformLoop_C(TransformType tx_type, TransformSize tx_size,
template <int bitdepth, typename Residual, typename Pixel>
void InitAll(Dsp* const dsp) {
// Maximum transform size for Dct is 64.
- dsp->inverse_transforms[k1DTransformDct][k1DTransformSize4][kRow] =
- TransformLoop_C<bitdepth, Residual, Pixel, k1DTransformDct,
+ dsp->inverse_transforms[kTransform1dDct][kTransform1dSize4][kRow] =
+ TransformLoop_C<bitdepth, Residual, Pixel, kTransform1dDct,
DctDcOnly_C<bitdepth, Residual, 2>, Dct_C<Residual, 2>,
/*is_row=*/true>;
- dsp->inverse_transforms[k1DTransformDct][k1DTransformSize4][kColumn] =
- TransformLoop_C<bitdepth, Residual, Pixel, k1DTransformDct,
+ dsp->inverse_transforms[kTransform1dDct][kTransform1dSize4][kColumn] =
+ TransformLoop_C<bitdepth, Residual, Pixel, kTransform1dDct,
DctDcOnly_C<bitdepth, Residual, 2>, Dct_C<Residual, 2>,
/*is_row=*/false>;
- dsp->inverse_transforms[k1DTransformDct][k1DTransformSize8][kRow] =
- TransformLoop_C<bitdepth, Residual, Pixel, k1DTransformDct,
+ dsp->inverse_transforms[kTransform1dDct][kTransform1dSize8][kRow] =
+ TransformLoop_C<bitdepth, Residual, Pixel, kTransform1dDct,
DctDcOnly_C<bitdepth, Residual, 3>, Dct_C<Residual, 3>,
/*is_row=*/true>;
- dsp->inverse_transforms[k1DTransformDct][k1DTransformSize8][kColumn] =
- TransformLoop_C<bitdepth, Residual, Pixel, k1DTransformDct,
+ dsp->inverse_transforms[kTransform1dDct][kTransform1dSize8][kColumn] =
+ TransformLoop_C<bitdepth, Residual, Pixel, kTransform1dDct,
DctDcOnly_C<bitdepth, Residual, 3>, Dct_C<Residual, 3>,
/*is_row=*/false>;
- dsp->inverse_transforms[k1DTransformDct][k1DTransformSize16][kRow] =
- TransformLoop_C<bitdepth, Residual, Pixel, k1DTransformDct,
+ dsp->inverse_transforms[kTransform1dDct][kTransform1dSize16][kRow] =
+ TransformLoop_C<bitdepth, Residual, Pixel, kTransform1dDct,
DctDcOnly_C<bitdepth, Residual, 4>, Dct_C<Residual, 4>,
/*is_row=*/true>;
- dsp->inverse_transforms[k1DTransformDct][k1DTransformSize16][kColumn] =
- TransformLoop_C<bitdepth, Residual, Pixel, k1DTransformDct,
+ dsp->inverse_transforms[kTransform1dDct][kTransform1dSize16][kColumn] =
+ TransformLoop_C<bitdepth, Residual, Pixel, kTransform1dDct,
DctDcOnly_C<bitdepth, Residual, 4>, Dct_C<Residual, 4>,
/*is_row=*/false>;
- dsp->inverse_transforms[k1DTransformDct][k1DTransformSize32][kRow] =
- TransformLoop_C<bitdepth, Residual, Pixel, k1DTransformDct,
+ dsp->inverse_transforms[kTransform1dDct][kTransform1dSize32][kRow] =
+ TransformLoop_C<bitdepth, Residual, Pixel, kTransform1dDct,
DctDcOnly_C<bitdepth, Residual, 5>, Dct_C<Residual, 5>,
/*is_row=*/true>;
- dsp->inverse_transforms[k1DTransformDct][k1DTransformSize32][kColumn] =
- TransformLoop_C<bitdepth, Residual, Pixel, k1DTransformDct,
+ dsp->inverse_transforms[kTransform1dDct][kTransform1dSize32][kColumn] =
+ TransformLoop_C<bitdepth, Residual, Pixel, kTransform1dDct,
DctDcOnly_C<bitdepth, Residual, 5>, Dct_C<Residual, 5>,
/*is_row=*/false>;
- dsp->inverse_transforms[k1DTransformDct][k1DTransformSize64][kRow] =
- TransformLoop_C<bitdepth, Residual, Pixel, k1DTransformDct,
+ dsp->inverse_transforms[kTransform1dDct][kTransform1dSize64][kRow] =
+ TransformLoop_C<bitdepth, Residual, Pixel, kTransform1dDct,
DctDcOnly_C<bitdepth, Residual, 6>, Dct_C<Residual, 6>,
/*is_row=*/true>;
- dsp->inverse_transforms[k1DTransformDct][k1DTransformSize64][kColumn] =
- TransformLoop_C<bitdepth, Residual, Pixel, k1DTransformDct,
+ dsp->inverse_transforms[kTransform1dDct][kTransform1dSize64][kColumn] =
+ TransformLoop_C<bitdepth, Residual, Pixel, kTransform1dDct,
DctDcOnly_C<bitdepth, Residual, 6>, Dct_C<Residual, 6>,
/*is_row=*/false>;
// Maximum transform size for Adst is 16.
- dsp->inverse_transforms[k1DTransformAdst][k1DTransformSize4][kRow] =
- TransformLoop_C<bitdepth, Residual, Pixel, k1DTransformAdst,
+ dsp->inverse_transforms[kTransform1dAdst][kTransform1dSize4][kRow] =
+ TransformLoop_C<bitdepth, Residual, Pixel, kTransform1dAdst,
Adst4DcOnly_C<bitdepth, Residual>, Adst4_C<Residual>,
/*is_row=*/true>;
- dsp->inverse_transforms[k1DTransformAdst][k1DTransformSize4][kColumn] =
- TransformLoop_C<bitdepth, Residual, Pixel, k1DTransformAdst,
+ dsp->inverse_transforms[kTransform1dAdst][kTransform1dSize4][kColumn] =
+ TransformLoop_C<bitdepth, Residual, Pixel, kTransform1dAdst,
Adst4DcOnly_C<bitdepth, Residual>, Adst4_C<Residual>,
/*is_row=*/false>;
- dsp->inverse_transforms[k1DTransformAdst][k1DTransformSize8][kRow] =
- TransformLoop_C<bitdepth, Residual, Pixel, k1DTransformAdst,
+ dsp->inverse_transforms[kTransform1dAdst][kTransform1dSize8][kRow] =
+ TransformLoop_C<bitdepth, Residual, Pixel, kTransform1dAdst,
Adst8DcOnly_C<bitdepth, Residual>, Adst8_C<Residual>,
/*is_row=*/true>;
- dsp->inverse_transforms[k1DTransformAdst][k1DTransformSize8][kColumn] =
- TransformLoop_C<bitdepth, Residual, Pixel, k1DTransformAdst,
+ dsp->inverse_transforms[kTransform1dAdst][kTransform1dSize8][kColumn] =
+ TransformLoop_C<bitdepth, Residual, Pixel, kTransform1dAdst,
Adst8DcOnly_C<bitdepth, Residual>, Adst8_C<Residual>,
/*is_row=*/false>;
- dsp->inverse_transforms[k1DTransformAdst][k1DTransformSize16][kRow] =
- TransformLoop_C<bitdepth, Residual, Pixel, k1DTransformAdst,
+ dsp->inverse_transforms[kTransform1dAdst][kTransform1dSize16][kRow] =
+ TransformLoop_C<bitdepth, Residual, Pixel, kTransform1dAdst,
Adst16DcOnly_C<bitdepth, Residual>, Adst16_C<Residual>,
/*is_row=*/true>;
- dsp->inverse_transforms[k1DTransformAdst][k1DTransformSize16][kColumn] =
- TransformLoop_C<bitdepth, Residual, Pixel, k1DTransformAdst,
+ dsp->inverse_transforms[kTransform1dAdst][kTransform1dSize16][kColumn] =
+ TransformLoop_C<bitdepth, Residual, Pixel, kTransform1dAdst,
Adst16DcOnly_C<bitdepth, Residual>, Adst16_C<Residual>,
/*is_row=*/false>;
// Maximum transform size for Identity transform is 32.
- dsp->inverse_transforms[k1DTransformIdentity][k1DTransformSize4][kRow] =
- TransformLoop_C<bitdepth, Residual, Pixel, k1DTransformIdentity,
+ dsp->inverse_transforms[kTransform1dIdentity][kTransform1dSize4][kRow] =
+ TransformLoop_C<bitdepth, Residual, Pixel, kTransform1dIdentity,
Identity4DcOnly_C<bitdepth, Residual>,
Identity4Row_C<Residual>, /*is_row=*/true>;
- dsp->inverse_transforms[k1DTransformIdentity][k1DTransformSize4][kColumn] =
- TransformLoop_C<bitdepth, Residual, Pixel, k1DTransformIdentity,
+ dsp->inverse_transforms[kTransform1dIdentity][kTransform1dSize4][kColumn] =
+ TransformLoop_C<bitdepth, Residual, Pixel, kTransform1dIdentity,
Identity4DcOnly_C<bitdepth, Residual>,
Identity4Column_C<Residual>, /*is_row=*/false>;
- dsp->inverse_transforms[k1DTransformIdentity][k1DTransformSize8][kRow] =
- TransformLoop_C<bitdepth, Residual, Pixel, k1DTransformIdentity,
+ dsp->inverse_transforms[kTransform1dIdentity][kTransform1dSize8][kRow] =
+ TransformLoop_C<bitdepth, Residual, Pixel, kTransform1dIdentity,
Identity8DcOnly_C<bitdepth, Residual>,
Identity8Row_C<Residual>, /*is_row=*/true>;
- dsp->inverse_transforms[k1DTransformIdentity][k1DTransformSize8][kColumn] =
- TransformLoop_C<bitdepth, Residual, Pixel, k1DTransformIdentity,
+ dsp->inverse_transforms[kTransform1dIdentity][kTransform1dSize8][kColumn] =
+ TransformLoop_C<bitdepth, Residual, Pixel, kTransform1dIdentity,
Identity8DcOnly_C<bitdepth, Residual>,
Identity8Column_C<Residual>, /*is_row=*/false>;
- dsp->inverse_transforms[k1DTransformIdentity][k1DTransformSize16][kRow] =
- TransformLoop_C<bitdepth, Residual, Pixel, k1DTransformIdentity,
+ dsp->inverse_transforms[kTransform1dIdentity][kTransform1dSize16][kRow] =
+ TransformLoop_C<bitdepth, Residual, Pixel, kTransform1dIdentity,
Identity16DcOnly_C<bitdepth, Residual>,
Identity16Row_C<Residual>, /*is_row=*/true>;
- dsp->inverse_transforms[k1DTransformIdentity][k1DTransformSize16][kColumn] =
- TransformLoop_C<bitdepth, Residual, Pixel, k1DTransformIdentity,
+ dsp->inverse_transforms[kTransform1dIdentity][kTransform1dSize16][kColumn] =
+ TransformLoop_C<bitdepth, Residual, Pixel, kTransform1dIdentity,
Identity16DcOnly_C<bitdepth, Residual>,
Identity16Column_C<Residual>, /*is_row=*/false>;
- dsp->inverse_transforms[k1DTransformIdentity][k1DTransformSize32][kRow] =
- TransformLoop_C<bitdepth, Residual, Pixel, k1DTransformIdentity,
+ dsp->inverse_transforms[kTransform1dIdentity][kTransform1dSize32][kRow] =
+ TransformLoop_C<bitdepth, Residual, Pixel, kTransform1dIdentity,
Identity32DcOnly_C<bitdepth, Residual>,
Identity32Row_C<Residual>, /*is_row=*/true>;
- dsp->inverse_transforms[k1DTransformIdentity][k1DTransformSize32][kColumn] =
- TransformLoop_C<bitdepth, Residual, Pixel, k1DTransformIdentity,
+ dsp->inverse_transforms[kTransform1dIdentity][kTransform1dSize32][kColumn] =
+ TransformLoop_C<bitdepth, Residual, Pixel, kTransform1dIdentity,
Identity32DcOnly_C<bitdepth, Residual>,
Identity32Column_C<Residual>, /*is_row=*/false>;
// Maximum transform size for Wht is 4.
- dsp->inverse_transforms[k1DTransformWht][k1DTransformSize4][kRow] =
- TransformLoop_C<bitdepth, Residual, Pixel, k1DTransformWht,
+ dsp->inverse_transforms[kTransform1dWht][kTransform1dSize4][kRow] =
+ TransformLoop_C<bitdepth, Residual, Pixel, kTransform1dWht,
Wht4DcOnly_C<bitdepth, Residual>, Wht4_C<Residual>,
/*is_row=*/true>;
- dsp->inverse_transforms[k1DTransformWht][k1DTransformSize4][kColumn] =
- TransformLoop_C<bitdepth, Residual, Pixel, k1DTransformWht,
+ dsp->inverse_transforms[kTransform1dWht][kTransform1dSize4][kColumn] =
+ TransformLoop_C<bitdepth, Residual, Pixel, kTransform1dWht,
Wht4DcOnly_C<bitdepth, Residual>, Wht4_C<Residual>,
/*is_row=*/false>;
}
@@ -1332,142 +1333,137 @@ void InitAll(Dsp* const dsp) {
void Init8bpp() {
Dsp* const dsp = dsp_internal::GetWritableDspTable(8);
assert(dsp != nullptr);
- for (auto& inverse_transform_by_size : dsp->inverse_transforms) {
- for (auto& inverse_transform : inverse_transform_by_size) {
- inverse_transform[kRow] = nullptr;
- inverse_transform[kColumn] = nullptr;
- }
- }
+ static_cast<void>(dsp);
#if LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS
InitAll<8, int16_t, uint8_t>(dsp);
#else // !LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS
-#ifndef LIBGAV1_Dsp8bpp_1DTransformSize4_1DTransformDct
- dsp->inverse_transforms[k1DTransformDct][k1DTransformSize4][kRow] =
- TransformLoop_C<8, int16_t, uint8_t, k1DTransformDct,
+#ifndef LIBGAV1_Dsp8bpp_Transform1dSize4_Transform1dDct
+ dsp->inverse_transforms[kTransform1dDct][kTransform1dSize4][kRow] =
+ TransformLoop_C<8, int16_t, uint8_t, kTransform1dDct,
DctDcOnly_C<8, int16_t, 2>, Dct_C<int16_t, 2>,
/*is_row=*/true>;
- dsp->inverse_transforms[k1DTransformDct][k1DTransformSize4][kColumn] =
- TransformLoop_C<8, int16_t, uint8_t, k1DTransformDct,
+ dsp->inverse_transforms[kTransform1dDct][kTransform1dSize4][kColumn] =
+ TransformLoop_C<8, int16_t, uint8_t, kTransform1dDct,
DctDcOnly_C<8, int16_t, 2>, Dct_C<int16_t, 2>,
/*is_row=*/false>;
#endif
-#ifndef LIBGAV1_Dsp8bpp_1DTransformSize8_1DTransformDct
- dsp->inverse_transforms[k1DTransformDct][k1DTransformSize8][kRow] =
- TransformLoop_C<8, int16_t, uint8_t, k1DTransformDct,
+#ifndef LIBGAV1_Dsp8bpp_Transform1dSize8_Transform1dDct
+ dsp->inverse_transforms[kTransform1dDct][kTransform1dSize8][kRow] =
+ TransformLoop_C<8, int16_t, uint8_t, kTransform1dDct,
DctDcOnly_C<8, int16_t, 3>, Dct_C<int16_t, 3>,
/*is_row=*/true>;
- dsp->inverse_transforms[k1DTransformDct][k1DTransformSize8][kColumn] =
- TransformLoop_C<8, int16_t, uint8_t, k1DTransformDct,
+ dsp->inverse_transforms[kTransform1dDct][kTransform1dSize8][kColumn] =
+ TransformLoop_C<8, int16_t, uint8_t, kTransform1dDct,
DctDcOnly_C<8, int16_t, 3>, Dct_C<int16_t, 3>,
/*is_row=*/false>;
#endif
-#ifndef LIBGAV1_Dsp8bpp_1DTransformSize16_1DTransformDct
- dsp->inverse_transforms[k1DTransformDct][k1DTransformSize16][kRow] =
- TransformLoop_C<8, int16_t, uint8_t, k1DTransformDct,
+#ifndef LIBGAV1_Dsp8bpp_Transform1dSize16_Transform1dDct
+ dsp->inverse_transforms[kTransform1dDct][kTransform1dSize16][kRow] =
+ TransformLoop_C<8, int16_t, uint8_t, kTransform1dDct,
DctDcOnly_C<8, int16_t, 4>, Dct_C<int16_t, 4>,
/*is_row=*/true>;
- dsp->inverse_transforms[k1DTransformDct][k1DTransformSize16][kColumn] =
- TransformLoop_C<8, int16_t, uint8_t, k1DTransformDct,
+ dsp->inverse_transforms[kTransform1dDct][kTransform1dSize16][kColumn] =
+ TransformLoop_C<8, int16_t, uint8_t, kTransform1dDct,
DctDcOnly_C<8, int16_t, 4>, Dct_C<int16_t, 4>,
/*is_row=*/false>;
#endif
-#ifndef LIBGAV1_Dsp8bpp_1DTransformSize32_1DTransformDct
- dsp->inverse_transforms[k1DTransformDct][k1DTransformSize32][kRow] =
- TransformLoop_C<8, int16_t, uint8_t, k1DTransformDct,
+#ifndef LIBGAV1_Dsp8bpp_Transform1dSize32_Transform1dDct
+ dsp->inverse_transforms[kTransform1dDct][kTransform1dSize32][kRow] =
+ TransformLoop_C<8, int16_t, uint8_t, kTransform1dDct,
DctDcOnly_C<8, int16_t, 5>, Dct_C<int16_t, 5>,
/*is_row=*/true>;
- dsp->inverse_transforms[k1DTransformDct][k1DTransformSize32][kColumn] =
- TransformLoop_C<8, int16_t, uint8_t, k1DTransformDct,
+ dsp->inverse_transforms[kTransform1dDct][kTransform1dSize32][kColumn] =
+ TransformLoop_C<8, int16_t, uint8_t, kTransform1dDct,
DctDcOnly_C<8, int16_t, 5>, Dct_C<int16_t, 5>,
/*is_row=*/false>;
#endif
-#ifndef LIBGAV1_Dsp8bpp_1DTransformSize64_1DTransformDct
- dsp->inverse_transforms[k1DTransformDct][k1DTransformSize64][kRow] =
- TransformLoop_C<8, int16_t, uint8_t, k1DTransformDct,
+#ifndef LIBGAV1_Dsp8bpp_Transform1dSize64_Transform1dDct
+ dsp->inverse_transforms[kTransform1dDct][kTransform1dSize64][kRow] =
+ TransformLoop_C<8, int16_t, uint8_t, kTransform1dDct,
DctDcOnly_C<8, int16_t, 6>, Dct_C<int16_t, 6>,
/*is_row=*/true>;
- dsp->inverse_transforms[k1DTransformDct][k1DTransformSize64][kColumn] =
- TransformLoop_C<8, int16_t, uint8_t, k1DTransformDct,
+ dsp->inverse_transforms[kTransform1dDct][kTransform1dSize64][kColumn] =
+ TransformLoop_C<8, int16_t, uint8_t, kTransform1dDct,
DctDcOnly_C<8, int16_t, 6>, Dct_C<int16_t, 6>,
/*is_row=*/false>;
#endif
-#ifndef LIBGAV1_Dsp8bpp_1DTransformSize4_1DTransformAdst
- dsp->inverse_transforms[k1DTransformAdst][k1DTransformSize4][kRow] =
- TransformLoop_C<8, int16_t, uint8_t, k1DTransformAdst,
+#ifndef LIBGAV1_Dsp8bpp_Transform1dSize4_Transform1dAdst
+ dsp->inverse_transforms[kTransform1dAdst][kTransform1dSize4][kRow] =
+ TransformLoop_C<8, int16_t, uint8_t, kTransform1dAdst,
Adst4DcOnly_C<8, int16_t>, Adst4_C<int16_t>,
/*is_row=*/true>;
- dsp->inverse_transforms[k1DTransformAdst][k1DTransformSize4][kColumn] =
- TransformLoop_C<8, int16_t, uint8_t, k1DTransformAdst,
+ dsp->inverse_transforms[kTransform1dAdst][kTransform1dSize4][kColumn] =
+ TransformLoop_C<8, int16_t, uint8_t, kTransform1dAdst,
Adst4DcOnly_C<8, int16_t>, Adst4_C<int16_t>,
/*is_row=*/false>;
#endif
-#ifndef LIBGAV1_Dsp8bpp_1DTransformSize8_1DTransformAdst
- dsp->inverse_transforms[k1DTransformAdst][k1DTransformSize8][kRow] =
- TransformLoop_C<8, int16_t, uint8_t, k1DTransformAdst,
+#ifndef LIBGAV1_Dsp8bpp_Transform1dSize8_Transform1dAdst
+ dsp->inverse_transforms[kTransform1dAdst][kTransform1dSize8][kRow] =
+ TransformLoop_C<8, int16_t, uint8_t, kTransform1dAdst,
Adst8DcOnly_C<8, int16_t>, Adst8_C<int16_t>,
/*is_row=*/true>;
- dsp->inverse_transforms[k1DTransformAdst][k1DTransformSize8][kColumn] =
- TransformLoop_C<8, int16_t, uint8_t, k1DTransformAdst,
+ dsp->inverse_transforms[kTransform1dAdst][kTransform1dSize8][kColumn] =
+ TransformLoop_C<8, int16_t, uint8_t, kTransform1dAdst,
Adst8DcOnly_C<8, int16_t>, Adst8_C<int16_t>,
/*is_row=*/false>;
#endif
-#ifndef LIBGAV1_Dsp8bpp_1DTransformSize16_1DTransformAdst
- dsp->inverse_transforms[k1DTransformAdst][k1DTransformSize16][kRow] =
- TransformLoop_C<8, int16_t, uint8_t, k1DTransformAdst,
+#ifndef LIBGAV1_Dsp8bpp_Transform1dSize16_Transform1dAdst
+ dsp->inverse_transforms[kTransform1dAdst][kTransform1dSize16][kRow] =
+ TransformLoop_C<8, int16_t, uint8_t, kTransform1dAdst,
Adst16DcOnly_C<8, int16_t>, Adst16_C<int16_t>,
/*is_row=*/true>;
- dsp->inverse_transforms[k1DTransformAdst][k1DTransformSize16][kColumn] =
- TransformLoop_C<8, int16_t, uint8_t, k1DTransformAdst,
+ dsp->inverse_transforms[kTransform1dAdst][kTransform1dSize16][kColumn] =
+ TransformLoop_C<8, int16_t, uint8_t, kTransform1dAdst,
Adst16DcOnly_C<8, int16_t>, Adst16_C<int16_t>,
/*is_row=*/false>;
#endif
-#ifndef LIBGAV1_Dsp8bpp_1DTransformSize4_1DTransformIdentity
- dsp->inverse_transforms[k1DTransformIdentity][k1DTransformSize4][kRow] =
- TransformLoop_C<8, int16_t, uint8_t, k1DTransformIdentity,
+#ifndef LIBGAV1_Dsp8bpp_Transform1dSize4_Transform1dIdentity
+ dsp->inverse_transforms[kTransform1dIdentity][kTransform1dSize4][kRow] =
+ TransformLoop_C<8, int16_t, uint8_t, kTransform1dIdentity,
Identity4DcOnly_C<8, int16_t>, Identity4Row_C<int16_t>,
/*is_row=*/true>;
- dsp->inverse_transforms[k1DTransformIdentity][k1DTransformSize4][kColumn] =
- TransformLoop_C<8, int16_t, uint8_t, k1DTransformIdentity,
+ dsp->inverse_transforms[kTransform1dIdentity][kTransform1dSize4][kColumn] =
+ TransformLoop_C<8, int16_t, uint8_t, kTransform1dIdentity,
Identity4DcOnly_C<8, int16_t>, Identity4Column_C<int16_t>,
/*is_row=*/false>;
#endif
-#ifndef LIBGAV1_Dsp8bpp_1DTransformSize8_1DTransformIdentity
- dsp->inverse_transforms[k1DTransformIdentity][k1DTransformSize8][kRow] =
- TransformLoop_C<8, int16_t, uint8_t, k1DTransformIdentity,
+#ifndef LIBGAV1_Dsp8bpp_Transform1dSize8_Transform1dIdentity
+ dsp->inverse_transforms[kTransform1dIdentity][kTransform1dSize8][kRow] =
+ TransformLoop_C<8, int16_t, uint8_t, kTransform1dIdentity,
Identity8DcOnly_C<8, int16_t>, Identity8Row_C<int16_t>,
/*is_row=*/true>;
- dsp->inverse_transforms[k1DTransformIdentity][k1DTransformSize8][kColumn] =
- TransformLoop_C<8, int16_t, uint8_t, k1DTransformIdentity,
+ dsp->inverse_transforms[kTransform1dIdentity][kTransform1dSize8][kColumn] =
+ TransformLoop_C<8, int16_t, uint8_t, kTransform1dIdentity,
Identity8DcOnly_C<8, int16_t>, Identity8Column_C<int16_t>,
/*is_row=*/false>;
#endif
-#ifndef LIBGAV1_Dsp8bpp_1DTransformSize16_1DTransformIdentity
- dsp->inverse_transforms[k1DTransformIdentity][k1DTransformSize16][kRow] =
- TransformLoop_C<8, int16_t, uint8_t, k1DTransformIdentity,
+#ifndef LIBGAV1_Dsp8bpp_Transform1dSize16_Transform1dIdentity
+ dsp->inverse_transforms[kTransform1dIdentity][kTransform1dSize16][kRow] =
+ TransformLoop_C<8, int16_t, uint8_t, kTransform1dIdentity,
Identity16DcOnly_C<8, int16_t>, Identity16Row_C<int16_t>,
/*is_row=*/true>;
- dsp->inverse_transforms[k1DTransformIdentity][k1DTransformSize16][kColumn] =
- TransformLoop_C<8, int16_t, uint8_t, k1DTransformIdentity,
+ dsp->inverse_transforms[kTransform1dIdentity][kTransform1dSize16][kColumn] =
+ TransformLoop_C<8, int16_t, uint8_t, kTransform1dIdentity,
Identity16DcOnly_C<8, int16_t>,
Identity16Column_C<int16_t>, /*is_row=*/false>;
#endif
-#ifndef LIBGAV1_Dsp8bpp_1DTransformSize32_1DTransformIdentity
- dsp->inverse_transforms[k1DTransformIdentity][k1DTransformSize32][kRow] =
- TransformLoop_C<8, int16_t, uint8_t, k1DTransformIdentity,
+#ifndef LIBGAV1_Dsp8bpp_Transform1dSize32_Transform1dIdentity
+ dsp->inverse_transforms[kTransform1dIdentity][kTransform1dSize32][kRow] =
+ TransformLoop_C<8, int16_t, uint8_t, kTransform1dIdentity,
Identity32DcOnly_C<8, int16_t>, Identity32Row_C<int16_t>,
/*is_row=*/true>;
- dsp->inverse_transforms[k1DTransformIdentity][k1DTransformSize32][kColumn] =
- TransformLoop_C<8, int16_t, uint8_t, k1DTransformIdentity,
+ dsp->inverse_transforms[kTransform1dIdentity][kTransform1dSize32][kColumn] =
+ TransformLoop_C<8, int16_t, uint8_t, kTransform1dIdentity,
Identity32DcOnly_C<8, int16_t>,
Identity32Column_C<int16_t>, /*is_row=*/false>;
#endif
-#ifndef LIBGAV1_Dsp8bpp_1DTransformSize4_1DTransformWht
- dsp->inverse_transforms[k1DTransformWht][k1DTransformSize4][kRow] =
- TransformLoop_C<8, int16_t, uint8_t, k1DTransformWht,
+#ifndef LIBGAV1_Dsp8bpp_Transform1dSize4_Transform1dWht
+ dsp->inverse_transforms[kTransform1dWht][kTransform1dSize4][kRow] =
+ TransformLoop_C<8, int16_t, uint8_t, kTransform1dWht,
Wht4DcOnly_C<8, int16_t>, Wht4_C<int16_t>,
/*is_row=*/true>;
- dsp->inverse_transforms[k1DTransformWht][k1DTransformSize4][kColumn] =
- TransformLoop_C<8, int16_t, uint8_t, k1DTransformWht,
+ dsp->inverse_transforms[kTransform1dWht][kTransform1dSize4][kColumn] =
+ TransformLoop_C<8, int16_t, uint8_t, kTransform1dWht,
Wht4DcOnly_C<8, int16_t>, Wht4_C<int16_t>,
/*is_row=*/false>;
#endif
@@ -1478,142 +1474,137 @@ void Init8bpp() {
void Init10bpp() {
Dsp* const dsp = dsp_internal::GetWritableDspTable(10);
assert(dsp != nullptr);
- for (auto& inverse_transform_by_size : dsp->inverse_transforms) {
- for (auto& inverse_transform : inverse_transform_by_size) {
- inverse_transform[kRow] = nullptr;
- inverse_transform[kColumn] = nullptr;
- }
- }
+ static_cast<void>(dsp);
#if LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS
InitAll<10, int32_t, uint16_t>(dsp);
#else // !LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS
-#ifndef LIBGAV1_Dsp10bpp_1DTransformSize4_1DTransformDct
- dsp->inverse_transforms[k1DTransformDct][k1DTransformSize4][kRow] =
- TransformLoop_C<10, int32_t, uint16_t, k1DTransformDct,
+#ifndef LIBGAV1_Dsp10bpp_Transform1dSize4_Transform1dDct
+ dsp->inverse_transforms[kTransform1dDct][kTransform1dSize4][kRow] =
+ TransformLoop_C<10, int32_t, uint16_t, kTransform1dDct,
DctDcOnly_C<10, int32_t, 2>, Dct_C<int32_t, 2>,
/*is_row=*/true>;
- dsp->inverse_transforms[k1DTransformDct][k1DTransformSize4][kColumn] =
- TransformLoop_C<10, int32_t, uint16_t, k1DTransformDct,
+ dsp->inverse_transforms[kTransform1dDct][kTransform1dSize4][kColumn] =
+ TransformLoop_C<10, int32_t, uint16_t, kTransform1dDct,
DctDcOnly_C<10, int32_t, 2>, Dct_C<int32_t, 2>,
/*is_row=*/false>;
#endif
-#ifndef LIBGAV1_Dsp10bpp_1DTransformSize8_1DTransformDct
- dsp->inverse_transforms[k1DTransformDct][k1DTransformSize8][kRow] =
- TransformLoop_C<10, int32_t, uint16_t, k1DTransformDct,
+#ifndef LIBGAV1_Dsp10bpp_Transform1dSize8_Transform1dDct
+ dsp->inverse_transforms[kTransform1dDct][kTransform1dSize8][kRow] =
+ TransformLoop_C<10, int32_t, uint16_t, kTransform1dDct,
DctDcOnly_C<10, int32_t, 3>, Dct_C<int32_t, 3>,
/*is_row=*/true>;
- dsp->inverse_transforms[k1DTransformDct][k1DTransformSize8][kColumn] =
- TransformLoop_C<10, int32_t, uint16_t, k1DTransformDct,
+ dsp->inverse_transforms[kTransform1dDct][kTransform1dSize8][kColumn] =
+ TransformLoop_C<10, int32_t, uint16_t, kTransform1dDct,
DctDcOnly_C<10, int32_t, 3>, Dct_C<int32_t, 3>,
/*is_row=*/false>;
#endif
-#ifndef LIBGAV1_Dsp10bpp_1DTransformSize16_1DTransformDct
- dsp->inverse_transforms[k1DTransformDct][k1DTransformSize16][kRow] =
- TransformLoop_C<10, int32_t, uint16_t, k1DTransformDct,
+#ifndef LIBGAV1_Dsp10bpp_Transform1dSize16_Transform1dDct
+ dsp->inverse_transforms[kTransform1dDct][kTransform1dSize16][kRow] =
+ TransformLoop_C<10, int32_t, uint16_t, kTransform1dDct,
DctDcOnly_C<10, int32_t, 4>, Dct_C<int32_t, 4>,
/*is_row=*/true>;
- dsp->inverse_transforms[k1DTransformDct][k1DTransformSize16][kColumn] =
- TransformLoop_C<10, int32_t, uint16_t, k1DTransformDct,
+ dsp->inverse_transforms[kTransform1dDct][kTransform1dSize16][kColumn] =
+ TransformLoop_C<10, int32_t, uint16_t, kTransform1dDct,
DctDcOnly_C<10, int32_t, 4>, Dct_C<int32_t, 4>,
/*is_row=*/false>;
#endif
-#ifndef LIBGAV1_Dsp10bpp_1DTransformSize32_1DTransformDct
- dsp->inverse_transforms[k1DTransformDct][k1DTransformSize32][kRow] =
- TransformLoop_C<10, int32_t, uint16_t, k1DTransformDct,
+#ifndef LIBGAV1_Dsp10bpp_Transform1dSize32_Transform1dDct
+ dsp->inverse_transforms[kTransform1dDct][kTransform1dSize32][kRow] =
+ TransformLoop_C<10, int32_t, uint16_t, kTransform1dDct,
DctDcOnly_C<10, int32_t, 5>, Dct_C<int32_t, 5>,
/*is_row=*/true>;
- dsp->inverse_transforms[k1DTransformDct][k1DTransformSize32][kColumn] =
- TransformLoop_C<10, int32_t, uint16_t, k1DTransformDct,
+ dsp->inverse_transforms[kTransform1dDct][kTransform1dSize32][kColumn] =
+ TransformLoop_C<10, int32_t, uint16_t, kTransform1dDct,
DctDcOnly_C<10, int32_t, 5>, Dct_C<int32_t, 5>,
/*is_row=*/false>;
#endif
-#ifndef LIBGAV1_Dsp10bpp_1DTransformSize64_1DTransformDct
- dsp->inverse_transforms[k1DTransformDct][k1DTransformSize64][kRow] =
- TransformLoop_C<10, int32_t, uint16_t, k1DTransformDct,
+#ifndef LIBGAV1_Dsp10bpp_Transform1dSize64_Transform1dDct
+ dsp->inverse_transforms[kTransform1dDct][kTransform1dSize64][kRow] =
+ TransformLoop_C<10, int32_t, uint16_t, kTransform1dDct,
DctDcOnly_C<10, int32_t, 6>, Dct_C<int32_t, 6>,
/*is_row=*/true>;
- dsp->inverse_transforms[k1DTransformDct][k1DTransformSize64][kColumn] =
- TransformLoop_C<10, int32_t, uint16_t, k1DTransformDct,
+ dsp->inverse_transforms[kTransform1dDct][kTransform1dSize64][kColumn] =
+ TransformLoop_C<10, int32_t, uint16_t, kTransform1dDct,
DctDcOnly_C<10, int32_t, 6>, Dct_C<int32_t, 6>,
/*is_row=*/false>;
#endif
-#ifndef LIBGAV1_Dsp10bpp_1DTransformSize4_1DTransformAdst
- dsp->inverse_transforms[k1DTransformAdst][k1DTransformSize4][kRow] =
- TransformLoop_C<10, int32_t, uint16_t, k1DTransformAdst,
+#ifndef LIBGAV1_Dsp10bpp_Transform1dSize4_Transform1dAdst
+ dsp->inverse_transforms[kTransform1dAdst][kTransform1dSize4][kRow] =
+ TransformLoop_C<10, int32_t, uint16_t, kTransform1dAdst,
Adst4DcOnly_C<10, int32_t>, Adst4_C<int32_t>,
/*is_row=*/true>;
- dsp->inverse_transforms[k1DTransformAdst][k1DTransformSize4][kColumn] =
- TransformLoop_C<10, int32_t, uint16_t, k1DTransformAdst,
+ dsp->inverse_transforms[kTransform1dAdst][kTransform1dSize4][kColumn] =
+ TransformLoop_C<10, int32_t, uint16_t, kTransform1dAdst,
Adst4DcOnly_C<10, int32_t>, Adst4_C<int32_t>,
/*is_row=*/false>;
#endif
-#ifndef LIBGAV1_Dsp10bpp_1DTransformSize8_1DTransformAdst
- dsp->inverse_transforms[k1DTransformAdst][k1DTransformSize8][kRow] =
- TransformLoop_C<10, int32_t, uint16_t, k1DTransformAdst,
+#ifndef LIBGAV1_Dsp10bpp_Transform1dSize8_Transform1dAdst
+ dsp->inverse_transforms[kTransform1dAdst][kTransform1dSize8][kRow] =
+ TransformLoop_C<10, int32_t, uint16_t, kTransform1dAdst,
Adst8DcOnly_C<10, int32_t>, Adst8_C<int32_t>,
/*is_row=*/true>;
- dsp->inverse_transforms[k1DTransformAdst][k1DTransformSize8][kColumn] =
- TransformLoop_C<10, int32_t, uint16_t, k1DTransformAdst,
+ dsp->inverse_transforms[kTransform1dAdst][kTransform1dSize8][kColumn] =
+ TransformLoop_C<10, int32_t, uint16_t, kTransform1dAdst,
Adst8DcOnly_C<10, int32_t>, Adst8_C<int32_t>,
/*is_row=*/false>;
#endif
-#ifndef LIBGAV1_Dsp10bpp_1DTransformSize16_1DTransformAdst
- dsp->inverse_transforms[k1DTransformAdst][k1DTransformSize16][kRow] =
- TransformLoop_C<10, int32_t, uint16_t, k1DTransformAdst,
+#ifndef LIBGAV1_Dsp10bpp_Transform1dSize16_Transform1dAdst
+ dsp->inverse_transforms[kTransform1dAdst][kTransform1dSize16][kRow] =
+ TransformLoop_C<10, int32_t, uint16_t, kTransform1dAdst,
Adst16DcOnly_C<10, int32_t>, Adst16_C<int32_t>,
/*is_row=*/true>;
- dsp->inverse_transforms[k1DTransformAdst][k1DTransformSize16][kColumn] =
- TransformLoop_C<10, int32_t, uint16_t, k1DTransformAdst,
+ dsp->inverse_transforms[kTransform1dAdst][kTransform1dSize16][kColumn] =
+ TransformLoop_C<10, int32_t, uint16_t, kTransform1dAdst,
Adst16DcOnly_C<10, int32_t>, Adst16_C<int32_t>,
/*is_row=*/false>;
#endif
-#ifndef LIBGAV1_Dsp10bpp_1DTransformSize4_1DTransformIdentity
- dsp->inverse_transforms[k1DTransformIdentity][k1DTransformSize4][kRow] =
- TransformLoop_C<10, int32_t, uint16_t, k1DTransformIdentity,
+#ifndef LIBGAV1_Dsp10bpp_Transform1dSize4_Transform1dIdentity
+ dsp->inverse_transforms[kTransform1dIdentity][kTransform1dSize4][kRow] =
+ TransformLoop_C<10, int32_t, uint16_t, kTransform1dIdentity,
Identity4DcOnly_C<10, int32_t>, Identity4Row_C<int32_t>,
/*is_row=*/true>;
- dsp->inverse_transforms[k1DTransformIdentity][k1DTransformSize4][kColumn] =
- TransformLoop_C<10, int32_t, uint16_t, k1DTransformIdentity,
+ dsp->inverse_transforms[kTransform1dIdentity][kTransform1dSize4][kColumn] =
+ TransformLoop_C<10, int32_t, uint16_t, kTransform1dIdentity,
Identity4DcOnly_C<10, int32_t>,
Identity4Column_C<int32_t>, /*is_row=*/false>;
#endif
-#ifndef LIBGAV1_Dsp10bpp_1DTransformSize8_1DTransformIdentity
- dsp->inverse_transforms[k1DTransformIdentity][k1DTransformSize8][kRow] =
- TransformLoop_C<10, int32_t, uint16_t, k1DTransformIdentity,
+#ifndef LIBGAV1_Dsp10bpp_Transform1dSize8_Transform1dIdentity
+ dsp->inverse_transforms[kTransform1dIdentity][kTransform1dSize8][kRow] =
+ TransformLoop_C<10, int32_t, uint16_t, kTransform1dIdentity,
Identity8DcOnly_C<10, int32_t>, Identity8Row_C<int32_t>,
/*is_row=*/true>;
- dsp->inverse_transforms[k1DTransformIdentity][k1DTransformSize8][kColumn] =
- TransformLoop_C<10, int32_t, uint16_t, k1DTransformIdentity,
+ dsp->inverse_transforms[kTransform1dIdentity][kTransform1dSize8][kColumn] =
+ TransformLoop_C<10, int32_t, uint16_t, kTransform1dIdentity,
Identity8DcOnly_C<10, int32_t>,
Identity8Column_C<int32_t>, /*is_row=*/false>;
#endif
-#ifndef LIBGAV1_Dsp10bpp_1DTransformSize16_1DTransformIdentity
- dsp->inverse_transforms[k1DTransformIdentity][k1DTransformSize16][kRow] =
- TransformLoop_C<10, int32_t, uint16_t, k1DTransformIdentity,
+#ifndef LIBGAV1_Dsp10bpp_Transform1dSize16_Transform1dIdentity
+ dsp->inverse_transforms[kTransform1dIdentity][kTransform1dSize16][kRow] =
+ TransformLoop_C<10, int32_t, uint16_t, kTransform1dIdentity,
Identity16DcOnly_C<10, int32_t>, Identity16Row_C<int32_t>,
/*is_row=*/true>;
- dsp->inverse_transforms[k1DTransformIdentity][k1DTransformSize16][kColumn] =
- TransformLoop_C<10, int32_t, uint16_t, k1DTransformIdentity,
+ dsp->inverse_transforms[kTransform1dIdentity][kTransform1dSize16][kColumn] =
+ TransformLoop_C<10, int32_t, uint16_t, kTransform1dIdentity,
Identity16DcOnly_C<10, int32_t>,
Identity16Column_C<int32_t>, /*is_row=*/false>;
#endif
-#ifndef LIBGAV1_Dsp10bpp_1DTransformSize32_1DTransformIdentity
- dsp->inverse_transforms[k1DTransformIdentity][k1DTransformSize32][kRow] =
- TransformLoop_C<10, int32_t, uint16_t, k1DTransformIdentity,
+#ifndef LIBGAV1_Dsp10bpp_Transform1dSize32_Transform1dIdentity
+ dsp->inverse_transforms[kTransform1dIdentity][kTransform1dSize32][kRow] =
+ TransformLoop_C<10, int32_t, uint16_t, kTransform1dIdentity,
Identity32DcOnly_C<10, int32_t>, Identity32Row_C<int32_t>,
/*is_row=*/true>;
- dsp->inverse_transforms[k1DTransformIdentity][k1DTransformSize32][kColumn] =
- TransformLoop_C<10, int32_t, uint16_t, k1DTransformIdentity,
+ dsp->inverse_transforms[kTransform1dIdentity][kTransform1dSize32][kColumn] =
+ TransformLoop_C<10, int32_t, uint16_t, kTransform1dIdentity,
Identity32DcOnly_C<10, int32_t>,
Identity32Column_C<int32_t>, /*is_row=*/false>;
#endif
-#ifndef LIBGAV1_Dsp10bpp_1DTransformSize4_1DTransformWht
- dsp->inverse_transforms[k1DTransformWht][k1DTransformSize4][kRow] =
- TransformLoop_C<10, int32_t, uint16_t, k1DTransformWht,
+#ifndef LIBGAV1_Dsp10bpp_Transform1dSize4_Transform1dWht
+ dsp->inverse_transforms[kTransform1dWht][kTransform1dSize4][kRow] =
+ TransformLoop_C<10, int32_t, uint16_t, kTransform1dWht,
Wht4DcOnly_C<10, int32_t>, Wht4_C<int32_t>,
/*is_row=*/true>;
- dsp->inverse_transforms[k1DTransformWht][k1DTransformSize4][kColumn] =
- TransformLoop_C<10, int32_t, uint16_t, k1DTransformWht,
+ dsp->inverse_transforms[kTransform1dWht][kTransform1dSize4][kColumn] =
+ TransformLoop_C<10, int32_t, uint16_t, kTransform1dWht,
Wht4DcOnly_C<10, int32_t>, Wht4_C<int32_t>,
/*is_row=*/false>;
#endif
diff --git a/src/dsp/inverse_transform_test.cc b/src/dsp/inverse_transform_test.cc
index 623e203..0ae23df 100644
--- a/src/dsp/inverse_transform_test.cc
+++ b/src/dsp/inverse_transform_test.cc
@@ -43,27 +43,27 @@ namespace {
constexpr int kMaxBlockSize = 64;
constexpr int kTotalPixels = kMaxBlockSize * kMaxBlockSize;
-const char* const kTransformSize1DNames[kNum1DTransformSizes] = {
- "k1DTransformSize4", "k1DTransformSize8", "k1DTransformSize16",
- "k1DTransformSize32", "k1DTransformSize64"};
-
-constexpr TransformSize1D kRow1DTransformSizes[] = {
- k1DTransformSize4, k1DTransformSize4, k1DTransformSize4,
- k1DTransformSize8, k1DTransformSize8, k1DTransformSize8,
- k1DTransformSize8, k1DTransformSize16, k1DTransformSize16,
- k1DTransformSize16, k1DTransformSize16, k1DTransformSize16,
- k1DTransformSize32, k1DTransformSize32, k1DTransformSize32,
- k1DTransformSize32, k1DTransformSize64, k1DTransformSize64,
- k1DTransformSize64};
-
-constexpr TransformSize1D kCol1DTransformSizes[] = {
- k1DTransformSize4, k1DTransformSize8, k1DTransformSize16,
- k1DTransformSize4, k1DTransformSize8, k1DTransformSize16,
- k1DTransformSize32, k1DTransformSize4, k1DTransformSize8,
- k1DTransformSize16, k1DTransformSize32, k1DTransformSize64,
- k1DTransformSize8, k1DTransformSize16, k1DTransformSize32,
- k1DTransformSize64, k1DTransformSize16, k1DTransformSize32,
- k1DTransformSize64};
+const char* const kTransform1dSizeNames[kNumTransform1dSizes] = {
+ "kTransform1dSize4", "kTransform1dSize8", "kTransform1dSize16",
+ "kTransform1dSize32", "kTransform1dSize64"};
+
+constexpr Transform1dSize kRowTransform1dSizes[] = {
+ kTransform1dSize4, kTransform1dSize4, kTransform1dSize4,
+ kTransform1dSize8, kTransform1dSize8, kTransform1dSize8,
+ kTransform1dSize8, kTransform1dSize16, kTransform1dSize16,
+ kTransform1dSize16, kTransform1dSize16, kTransform1dSize16,
+ kTransform1dSize32, kTransform1dSize32, kTransform1dSize32,
+ kTransform1dSize32, kTransform1dSize64, kTransform1dSize64,
+ kTransform1dSize64};
+
+constexpr Transform1dSize kColTransform1dSizes[] = {
+ kTransform1dSize4, kTransform1dSize8, kTransform1dSize16,
+ kTransform1dSize4, kTransform1dSize8, kTransform1dSize16,
+ kTransform1dSize32, kTransform1dSize4, kTransform1dSize8,
+ kTransform1dSize16, kTransform1dSize32, kTransform1dSize64,
+ kTransform1dSize8, kTransform1dSize16, kTransform1dSize32,
+ kTransform1dSize64, kTransform1dSize16, kTransform1dSize32,
+ kTransform1dSize64};
template <int bitdepth, typename SrcPixel, typename DstPixel>
class InverseTransformTestBase : public testing::TestWithParam<TransformSize>,
@@ -167,8 +167,8 @@ class InverseTransformTest
const Dsp* const dsp = GetDspTable(bitdepth);
ASSERT_NE(dsp, nullptr);
- tx_size_1d_row_ = kRow1DTransformSizes[tx_size_];
- tx_size_1d_column_ = kCol1DTransformSizes[tx_size_];
+ tx_size_1d_row_ = kRowTransform1dSizes[tx_size_];
+ tx_size_1d_column_ = kColTransform1dSizes[tx_size_];
memcpy(base_inverse_transforms_, dsp->inverse_transforms,
sizeof(base_inverse_transforms_));
@@ -193,7 +193,7 @@ class InverseTransformTest
memcpy(cur_inverse_transforms_, dsp->inverse_transforms,
sizeof(cur_inverse_transforms_));
- for (int i = 0; i < kNum1DTransforms; ++i) {
+ for (int i = 0; i < kNumTransform1ds; ++i) {
// skip functions that haven't been specialized for this particular
// architecture.
if (cur_inverse_transforms_[i][tx_size_1d_row_][kRow] ==
@@ -220,8 +220,8 @@ class InverseTransformTest
Array2DView<DstPixel> base_frame_buffer_;
Array2DView<DstPixel> cur_frame_buffer_;
- TransformSize1D tx_size_1d_row_ = k1DTransformSize4;
- TransformSize1D tx_size_1d_column_ = k1DTransformSize4;
+ Transform1dSize tx_size_1d_row_ = kTransform1dSize4;
+ Transform1dSize tx_size_1d_column_ = kTransform1dSize4;
InverseTransformAddFuncs base_inverse_transforms_;
InverseTransformAddFuncs cur_inverse_transforms_;
@@ -237,23 +237,23 @@ constexpr TransformType kLibgav1TxType[kNumTransformTypes] = {
kTransformTypeIdentityAdst, kTransformTypeAdstIdentity,
kTransformTypeIdentityFlipadst, kTransformTypeFlipadstIdentity};
-// Maps TransformType to dsp::Transform1D for the row transforms.
-constexpr Transform1D kRowTransform[kNumTransformTypes] = {
- k1DTransformDct, k1DTransformAdst, k1DTransformDct,
- k1DTransformAdst, k1DTransformAdst, k1DTransformDct,
- k1DTransformAdst, k1DTransformAdst, k1DTransformAdst,
- k1DTransformIdentity, k1DTransformIdentity, k1DTransformDct,
- k1DTransformIdentity, k1DTransformAdst, k1DTransformIdentity,
- k1DTransformAdst};
-
-// Maps TransformType to dsp::Transform1D for the column transforms.
-constexpr Transform1D kColumnTransform[kNumTransformTypes] = {
- k1DTransformDct, k1DTransformDct, k1DTransformAdst,
- k1DTransformAdst, k1DTransformDct, k1DTransformAdst,
- k1DTransformAdst, k1DTransformAdst, k1DTransformAdst,
- k1DTransformIdentity, k1DTransformDct, k1DTransformIdentity,
- k1DTransformAdst, k1DTransformIdentity, k1DTransformAdst,
- k1DTransformIdentity};
+// Maps TransformType to dsp::Transform1d for the row transforms.
+constexpr Transform1d kRowTransform[kNumTransformTypes] = {
+ kTransform1dDct, kTransform1dAdst, kTransform1dDct,
+ kTransform1dAdst, kTransform1dAdst, kTransform1dDct,
+ kTransform1dAdst, kTransform1dAdst, kTransform1dAdst,
+ kTransform1dIdentity, kTransform1dIdentity, kTransform1dDct,
+ kTransform1dIdentity, kTransform1dAdst, kTransform1dIdentity,
+ kTransform1dAdst};
+
+// Maps TransformType to dsp::Transform1d for the column transforms.
+constexpr Transform1d kColumnTransform[kNumTransformTypes] = {
+ kTransform1dDct, kTransform1dDct, kTransform1dAdst,
+ kTransform1dAdst, kTransform1dDct, kTransform1dAdst,
+ kTransform1dAdst, kTransform1dAdst, kTransform1dAdst,
+ kTransform1dIdentity, kTransform1dDct, kTransform1dIdentity,
+ kTransform1dAdst, kTransform1dIdentity, kTransform1dAdst,
+ kTransform1dIdentity};
// Mask indicating whether the transform sets contain a particular transform
// type. If |tx_type| is present in |tx_set|, then the |tx_type|th LSB is set.
@@ -281,10 +281,14 @@ void InverseTransformTest<bitdepth, Pixel, DstPixel>::TestRandomValues(
int num_tests) {
libvpx_test::ACMRandom rnd(libvpx_test::ACMRandom::DeterministicSeed());
- for (int tx_type_idx = 0; tx_type_idx < kNumTransformTypes; ++tx_type_idx) {
- const TransformType tx_type = kLibgav1TxType[tx_type_idx];
- const Transform1D row_transform = kRowTransform[tx_type];
- const Transform1D column_transform = kColumnTransform[tx_type];
+ for (int tx_type_idx = -1; tx_type_idx < kNumTransformTypes; ++tx_type_idx) {
+ const TransformType tx_type = (tx_type_idx == -1)
+ ? kTransformTypeDctDct
+ : kLibgav1TxType[tx_type_idx];
+ const Transform1d row_transform =
+ (tx_type_idx == -1) ? kTransform1dWht : kRowTransform[tx_type];
+ const Transform1d column_transform =
+ (tx_type_idx == -1) ? kTransform1dWht : kColumnTransform[tx_type];
// Skip the 'C' test case as this is used as the reference.
if (base_inverse_transforms_[row_transform][tx_size_1d_row_][kRow] ==
@@ -347,9 +351,9 @@ void InverseTransformTest<bitdepth, Pixel, DstPixel>::TestRandomValues(
kMaxBlockSize, false)) {
ADD_FAILURE() << "Result from optimized version of "
<< ToString(
- static_cast<TransformSize1D>(tx_size_1d_column_))
+ static_cast<Transform1dSize>(tx_size_1d_column_))
<< " differs from reference in iteration #" << n
- << "tx_type_idx:" << tx_type_idx;
+ << " tx_type_idx:" << tx_type_idx;
break;
}
}
@@ -360,19 +364,22 @@ void InverseTransformTest<bitdepth, Pixel, DstPixel>::TestRandomValues(
const auto cur_row_elapsed_time_us =
static_cast<int>(absl::ToInt64Microseconds(cur_elapsed_time[kRow]));
printf("TxType %30s[%19s]:: base_row: %5d us cur_row: %5d us %2.2fx \n",
- ToString(tx_type), kTransformSize1DNames[tx_size_1d_row_],
- base_row_elapsed_time_us, cur_row_elapsed_time_us,
+ (tx_type_idx == -1) ? ToString(row_transform) : ToString(tx_type),
+ kTransform1dSizeNames[tx_size_1d_row_], base_row_elapsed_time_us,
+ cur_row_elapsed_time_us,
static_cast<float>(base_row_elapsed_time_us) /
static_cast<float>(cur_row_elapsed_time_us));
const auto base_column_elapsed_time_us = static_cast<int>(
absl::ToInt64Microseconds(base_elapsed_time[kColumn]));
const auto cur_column_elapsed_time_us = static_cast<int>(
absl::ToInt64Microseconds(cur_elapsed_time[kColumn]));
- printf("TxType %30s[%19s]:: base_col: %5d us cur_col: %5d us %2.2fx \n",
- ToString(tx_type), kTransformSize1DNames[tx_size_1d_column_],
- base_column_elapsed_time_us, cur_column_elapsed_time_us,
- static_cast<float>(base_column_elapsed_time_us) /
- static_cast<float>(cur_column_elapsed_time_us));
+ printf(
+ "TxType %30s[%19s]:: base_col: %5d us cur_col: %5d us %2.2fx \n",
+ (tx_type_idx == -1) ? ToString(column_transform) : ToString(tx_type),
+ kTransform1dSizeNames[tx_size_1d_column_],
+ base_column_elapsed_time_us, cur_column_elapsed_time_us,
+ static_cast<float>(base_column_elapsed_time_us) /
+ static_cast<float>(cur_column_elapsed_time_us));
}
}
}
@@ -384,8 +391,8 @@ void InverseTransformTest<bitdepth, Pixel, DstPixel>::TestDcOnlyRandomValue(
for (int tx_type_idx = 0; tx_type_idx < kNumTransformTypes; ++tx_type_idx) {
const TransformType tx_type = kLibgav1TxType[tx_type_idx];
- const Transform1D row_transform = kRowTransform[tx_type];
- const Transform1D column_transform = kColumnTransform[tx_type];
+ const Transform1d row_transform = kRowTransform[tx_type];
+ const Transform1d column_transform = kColumnTransform[tx_type];
if (cur_inverse_transforms_[row_transform][tx_size_1d_row_][kRow] ==
nullptr ||
@@ -450,7 +457,7 @@ void InverseTransformTest<bitdepth, Pixel, DstPixel>::TestDcOnlyRandomValue(
kMaxBlockSize, false)) {
ADD_FAILURE() << "Result from dc only version of "
<< ToString(
- static_cast<TransformSize1D>(tx_size_1d_column_))
+ static_cast<Transform1dSize>(tx_size_1d_column_))
<< " differs from reference in iteration #" << n
<< "tx_type_idx:" << tx_type_idx;
break;
@@ -463,7 +470,7 @@ void InverseTransformTest<bitdepth, Pixel, DstPixel>::TestDcOnlyRandomValue(
const auto cur_row_elapsed_time_us =
static_cast<int>(absl::ToInt64Microseconds(cur_elapsed_time[kRow]));
printf("TxType %30s[%19s]:: base_row: %5d us cur_row: %5d us %2.2fx \n",
- ToString(tx_type), kTransformSize1DNames[tx_size_1d_row_],
+ ToString(tx_type), kTransform1dSizeNames[tx_size_1d_row_],
base_row_elapsed_time_us, cur_row_elapsed_time_us,
static_cast<float>(base_row_elapsed_time_us) /
static_cast<float>(cur_row_elapsed_time_us));
@@ -472,7 +479,7 @@ void InverseTransformTest<bitdepth, Pixel, DstPixel>::TestDcOnlyRandomValue(
const auto cur_column_elapsed_time_us = static_cast<int>(
absl::ToInt64Microseconds(cur_elapsed_time[kColumn]));
printf("TxType %30s[%19s]:: base_col: %5d us cur_col: %5d us %2.2fx \n",
- ToString(tx_type), kTransformSize1DNames[tx_size_1d_column_],
+ ToString(tx_type), kTransform1dSizeNames[tx_size_1d_column_],
base_column_elapsed_time_us, cur_column_elapsed_time_us,
static_cast<float>(base_column_elapsed_time_us) /
static_cast<float>(cur_column_elapsed_time_us));
diff --git a/src/dsp/libgav1_dsp.cmake b/src/dsp/libgav1_dsp.cmake
index a28334d..4bd1443 100644
--- a/src/dsp/libgav1_dsp.cmake
+++ b/src/dsp/libgav1_dsp.cmake
@@ -66,6 +66,7 @@ list(APPEND libgav1_dsp_sources
"${libgav1_source}/dsp/obmc.cc"
"${libgav1_source}/dsp/obmc.h"
"${libgav1_source}/dsp/obmc.inc"
+ "${libgav1_source}/dsp/smooth_weights.inc"
"${libgav1_source}/dsp/super_res.cc"
"${libgav1_source}/dsp/super_res.h"
"${libgav1_source}/dsp/warp.cc"
@@ -90,6 +91,7 @@ list(APPEND libgav1_dsp_sources_neon
"${libgav1_source}/dsp/arm/cdef_neon.cc"
"${libgav1_source}/dsp/arm/cdef_neon.h"
"${libgav1_source}/dsp/arm/common_neon.h"
+ "${libgav1_source}/dsp/arm/convolve_10bit_neon.cc"
"${libgav1_source}/dsp/arm/convolve_neon.cc"
"${libgav1_source}/dsp/arm/convolve_neon.h"
"${libgav1_source}/dsp/arm/distance_weighted_blend_neon.cc"
@@ -113,6 +115,7 @@ list(APPEND libgav1_dsp_sources_neon
"${libgav1_source}/dsp/arm/inverse_transform_neon.h"
"${libgav1_source}/dsp/arm/loop_filter_neon.cc"
"${libgav1_source}/dsp/arm/loop_filter_neon.h"
+ "${libgav1_source}/dsp/arm/loop_restoration_10bit_neon.cc"
"${libgav1_source}/dsp/arm/loop_restoration_neon.cc"
"${libgav1_source}/dsp/arm/loop_restoration_neon.h"
"${libgav1_source}/dsp/arm/mask_blend_neon.cc"
diff --git a/src/dsp/loop_filter.cc b/src/dsp/loop_filter.cc
index 6cad97d..14d47bf 100644
--- a/src/dsp/loop_filter.cc
+++ b/src/dsp/loop_filter.cc
@@ -56,6 +56,9 @@ struct LoopFilterFuncs_C {
inline void AdjustThresholds(const int bitdepth, int* const outer_thresh,
int* const inner_thresh, int* const hev_thresh) {
+ assert(*outer_thresh >= 7 && *outer_thresh <= 3 * kMaxLoopFilterValue + 4);
+ assert(*inner_thresh >= 1 && *inner_thresh <= kMaxLoopFilterValue);
+ assert(*hev_thresh >= 0 && *hev_thresh <= 3);
*outer_thresh <<= bitdepth - 8;
*inner_thresh <<= bitdepth - 8;
*hev_thresh <<= bitdepth - 8;
diff --git a/src/dsp/loop_filter_test.cc b/src/dsp/loop_filter_test.cc
index ca5107a..d013a1b 100644
--- a/src/dsp/loop_filter_test.cc
+++ b/src/dsp/loop_filter_test.cc
@@ -46,8 +46,6 @@ constexpr int kBlockStride = 32;
constexpr int kNumTests = 50000;
constexpr int kNumSpeedTests = 500000;
-constexpr int kMaxLoopFilter = 63;
-
template <typename Pixel>
void InitInput(Pixel* dst, const int stride, const int bitdepth,
libvpx_test::ACMRandom& rnd, const uint8_t inner_thresh,
@@ -172,11 +170,12 @@ void LoopFilterTest<bitdepth, Pixel>::TestRandomValues(
absl::Duration elapsed_time;
for (int n = 0; n < num_runs; ++n) {
Pixel dst[kNumPixels];
- const auto outer_thresh =
- static_cast<uint8_t>(rnd(3 * kMaxLoopFilter + 5));
- const auto inner_thresh = static_cast<uint8_t>(rnd(kMaxLoopFilter + 1));
+ const auto outer_thresh = static_cast<uint8_t>(
+ rnd(3 * kMaxLoopFilterValue - 2) + 7); // [7, 193].
+ const auto inner_thresh =
+ static_cast<uint8_t>(rnd(kMaxLoopFilterValue) + 1); // [1, 63].
const auto hev_thresh =
- static_cast<uint8_t>(rnd(kMaxLoopFilter + 1) >> 4);
+ static_cast<uint8_t>(rnd(kMaxLoopFilterValue + 1) >> 4); // [0, 3].
InitInput(dst, kBlockStride, bitdepth, rnd, inner_thresh, (n & 1) == 0);
const absl::Time start = absl::Now();
@@ -228,20 +227,20 @@ using LoopFilterTest8bpp = LoopFilterTest<8, uint8_t>;
const char* const* GetDigests8bpp(LoopFilterSize size) {
static const char* const kDigestsSize4[kNumLoopFilterTypes] = {
- "2e07bdb04b363d4ce69c7d738b1ee01a",
- "7ff41f2ffa809a2016d342d92afa7f89",
+ "6ba725d697d6209cb36dd199b8ffb47a",
+ "7dbb20e456ed0501fb4e7954f49f5e18",
};
static const char* const kDigestsSize6[kNumLoopFilterTypes] = {
- "2cd4d9ee7497ed67e38fad9cbeb7e278",
- "75c57a30a927d1aca1ac5c4f175712ca",
+ "89bb757faa44298b7f6e9c1a67f455a5",
+ "be75d5a2fcd83709ff0845f7d83f7006",
};
static const char* const kDigestsSize8[kNumLoopFilterTypes] = {
- "854860a272d58ace223454ea727a6fe4",
- "4129ee49b047777583c0e9b2006c87bf",
+ "b09137d68c7b4f8a8a15e33b4b69828f",
+ "ef8a7f1aa073805516d3518a82a5cfa4",
};
static const char* const kDigestsSize14[kNumLoopFilterTypes] = {
- "6eb768620b7ccc84b6f88b9193b02ad2",
- "56e034d9edbe0d5a3cae69b2d9b3486e",
+ "6a7bc061ace0888275af88093f82ca08",
+ "a957ddae005839aa41ba7691788b01e4",
};
switch (size) {
@@ -290,20 +289,20 @@ using LoopFilterTest10bpp = LoopFilterTest<10, uint16_t>;
const char* const* GetDigests10bpp(LoopFilterSize size) {
static const char* const kDigestsSize4[kNumLoopFilterTypes] = {
- "657dd0f612734c9c1fb50a2313567af4",
- "b1c0a0a0b35bad1589badf3c291c0461",
+ "72e75c478bb130ff1ebfa75f3a70b1a2",
+ "f32d67b611080e0bf1a9d162ff47c133",
};
static const char* const kDigestsSize6[kNumLoopFilterTypes] = {
- "d41906d4830157052d5bde417d9df9fc",
- "451490def78bd649d16d64db4e665a62",
+ "8aec73c60c87ac7cc6bc9cc5157a2795",
+ "0e4385d3a0cbb2b1551e05ad2b0f07fb",
};
static const char* const kDigestsSize8[kNumLoopFilterTypes] = {
- "a763127680f31db7184f2a63ee140268",
- "1f413bebacaa2435f0e07963a9095243",
+ "85cb2928fae43e1a27b2fe1b78ba7534",
+ "d044fad9d7c64b93ecb60c88ac48e55f",
};
static const char* const kDigestsSize14[kNumLoopFilterTypes] = {
- "f0e61add3e5856657c4055751a6dd6e2",
- "44da25d613ea601bf5f6e2a42d329cf0",
+ "ebca95ec0db6efbac7ff7cbeabc0e6d0",
+ "754ffaf0ac26a5953a029653bb5dd275",
};
switch (size) {
@@ -336,6 +335,10 @@ INSTANTIATE_TEST_SUITE_P(C, LoopFilterTest10bpp,
INSTANTIATE_TEST_SUITE_P(SSE41, LoopFilterTest10bpp,
testing::ValuesIn(kLoopFilterSizes));
#endif
+#if LIBGAV1_ENABLE_NEON
+INSTANTIATE_TEST_SUITE_P(NEON, LoopFilterTest10bpp,
+ testing::ValuesIn(kLoopFilterSizes));
+#endif
#endif
} // namespace
diff --git a/src/dsp/loop_restoration.cc b/src/dsp/loop_restoration.cc
index 1a15d90..2301a3e 100644
--- a/src/dsp/loop_restoration.cc
+++ b/src/dsp/loop_restoration.cc
@@ -144,11 +144,14 @@ inline void WienerVertical(const int16_t* wiener_buffer, const int width,
// Thus in libaom's computation, an offset of 128 is needed for filter[3].
template <int bitdepth, typename Pixel>
void WienerFilter_C(
- const RestorationUnitInfo& restoration_info, const void* const source,
- const ptrdiff_t stride, const void* const top_border,
- const ptrdiff_t top_border_stride, const void* const bottom_border,
+ const RestorationUnitInfo& LIBGAV1_RESTRICT restoration_info,
+ const void* LIBGAV1_RESTRICT const source, const ptrdiff_t stride,
+ const void* LIBGAV1_RESTRICT const top_border,
+ const ptrdiff_t top_border_stride,
+ const void* LIBGAV1_RESTRICT const bottom_border,
const ptrdiff_t bottom_border_stride, const int width, const int height,
- RestorationBuffer* const restoration_buffer, void* const dest) {
+ RestorationBuffer* LIBGAV1_RESTRICT const restoration_buffer,
+ void* LIBGAV1_RESTRICT const dest) {
constexpr int kCenterTap = kWienerFilterTaps / 2;
const int16_t* const number_leading_zero_coefficients =
restoration_info.wiener_info.number_leading_zero_coefficients;
@@ -867,11 +870,14 @@ inline void BoxFilterProcessPass2(const RestorationUnitInfo& restoration_info,
template <int bitdepth, typename Pixel>
void SelfGuidedFilter_C(
- const RestorationUnitInfo& restoration_info, const void* const source,
- const ptrdiff_t stride, const void* const top_border,
- const ptrdiff_t top_border_stride, const void* const bottom_border,
+ const RestorationUnitInfo& LIBGAV1_RESTRICT restoration_info,
+ const void* LIBGAV1_RESTRICT const source, const ptrdiff_t stride,
+ const void* LIBGAV1_RESTRICT const top_border,
+ const ptrdiff_t top_border_stride,
+ const void* LIBGAV1_RESTRICT const bottom_border,
const ptrdiff_t bottom_border_stride, const int width, const int height,
- RestorationBuffer* const restoration_buffer, void* const dest) {
+ RestorationBuffer* LIBGAV1_RESTRICT const restoration_buffer,
+ void* LIBGAV1_RESTRICT const dest) {
const int index = restoration_info.sgr_proj_info.index;
const int radius_pass_0 = kSgrProjParams[index][0]; // 2 or 0
const int radius_pass_1 = kSgrProjParams[index][2]; // 1 or 0
diff --git a/src/dsp/loop_restoration_test.cc b/src/dsp/loop_restoration_test.cc
index 97a05d4..4c54bc6 100644
--- a/src/dsp/loop_restoration_test.cc
+++ b/src/dsp/loop_restoration_test.cc
@@ -83,6 +83,9 @@ class SelfGuidedFilterTest : public testing::TestWithParam<int>,
}
} else if (absl::StartsWith(test_case, "NEON/")) {
LoopRestorationInit_NEON();
+#if LIBGAV1_MAX_BITDEPTH >= 10
+ LoopRestorationInit10bpp_NEON();
+#endif
} else {
FAIL() << "Unrecognized architecture prefix in test case name: "
<< test_case;
@@ -228,7 +231,11 @@ void SelfGuidedFilterTest<bitdepth, Pixel>::TestRandomValues(bool speed) {
if (target_self_guided_filter_func_ == nullptr) return;
constexpr int bd_index = (bitdepth == 8) ? 0 : 1;
const int num_inputs = speed ? 1 : 5;
- const int num_tests = speed ? 20000 : 1;
+#if LIBGAV1_ENABLE_NEON
+ const int num_tests = speed ? 4000 : 1;
+#else
+ const int num_tests = speed ? 10000 : 1;
+#endif
libvpx_test::ACMRandom rnd(libvpx_test::ACMRandom::DeterministicSeed());
const Pixel* const src = src_ + kOffset;
Pixel* const dst = dst_ + kOffset;
@@ -310,6 +317,10 @@ INSTANTIATE_TEST_SUITE_P(AVX2, SelfGuidedFilterTest10bpp,
INSTANTIATE_TEST_SUITE_P(SSE41, SelfGuidedFilterTest10bpp,
testing::ValuesIn(kUnitWidths));
#endif
+#if LIBGAV1_ENABLE_NEON
+INSTANTIATE_TEST_SUITE_P(NEON, SelfGuidedFilterTest10bpp,
+ testing::ValuesIn(kUnitWidths));
+#endif
#endif // LIBGAV1_MAX_BITDEPTH >= 10
@@ -348,6 +359,9 @@ class WienerFilterTest : public testing::TestWithParam<int>,
}
} else if (absl::StartsWith(test_case, "NEON/")) {
LoopRestorationInit_NEON();
+#if LIBGAV1_MAX_BITDEPTH >= 10
+ LoopRestorationInit10bpp_NEON();
+#endif
} else {
FAIL() << "Unrecognized architecture prefix in test case name: "
<< test_case;
@@ -477,7 +491,11 @@ void WienerFilterTest<bitdepth, Pixel>::TestRandomValues(bool speed) {
"3c91bf1a34672cd40bf261c5820d3ec3"}}};
if (target_wiener_filter_func_ == nullptr) return;
constexpr int bd_index = (bitdepth == 8) ? 0 : 1;
- const int num_tests = speed ? 100000 : 1;
+#if LIBGAV1_ENABLE_NEON
+ const int num_tests = speed ? 5000 : 1;
+#else
+ const int num_tests = speed ? 10000 : 1;
+#endif
const Pixel* const src = src_ + kOffset;
Pixel* const dst = dst_ + kOffset;
for (const auto vertical_order : kWienerOrders) {
@@ -545,7 +563,7 @@ void WienerFilterTest<bitdepth, Pixel>::TestCompare2C() {
kStride, unit_width_, unit_height_,
&restoration_buffer_, tmp);
if (!test_utils::CompareBlocks(dst, tmp, unit_width_, unit_height_,
- kStride, kStride, false, false)) {
+ kStride, kStride, false, true)) {
ADD_FAILURE() << "Mismatch -- wiener taps min/max";
}
}
@@ -608,6 +626,10 @@ INSTANTIATE_TEST_SUITE_P(AVX2, WienerFilterTest10bpp,
INSTANTIATE_TEST_SUITE_P(SSE41, WienerFilterTest10bpp,
testing::ValuesIn(kUnitWidths));
#endif
+#if LIBGAV1_ENABLE_NEON
+INSTANTIATE_TEST_SUITE_P(NEON, WienerFilterTest10bpp,
+ testing::ValuesIn(kUnitWidths));
+#endif
#endif // LIBGAV1_MAX_BITDEPTH >= 10
diff --git a/src/dsp/mask_blend.cc b/src/dsp/mask_blend.cc
index 15ef821..207fde0 100644
--- a/src/dsp/mask_blend.cc
+++ b/src/dsp/mask_blend.cc
@@ -25,7 +25,8 @@ namespace libgav1 {
namespace dsp {
namespace {
-uint8_t GetMaskValue(const uint8_t* mask, const uint8_t* mask_next_row, int x,
+uint8_t GetMaskValue(const uint8_t* LIBGAV1_RESTRICT mask,
+ const uint8_t* LIBGAV1_RESTRICT mask_next_row, int x,
int subsampling_x, int subsampling_y) {
if ((subsampling_x | subsampling_y) == 0) {
return mask[x];
@@ -43,10 +44,12 @@ uint8_t GetMaskValue(const uint8_t* mask, const uint8_t* mask_next_row, int x,
template <int bitdepth, typename Pixel, bool is_inter_intra, int subsampling_x,
int subsampling_y>
-void MaskBlend_C(const void* prediction_0, const void* prediction_1,
- const ptrdiff_t prediction_stride_1, const uint8_t* mask,
+void MaskBlend_C(const void* LIBGAV1_RESTRICT prediction_0,
+ const void* LIBGAV1_RESTRICT prediction_1,
+ const ptrdiff_t prediction_stride_1,
+ const uint8_t* LIBGAV1_RESTRICT mask,
const ptrdiff_t mask_stride, const int width, const int height,
- void* dest, const ptrdiff_t dest_stride) {
+ void* LIBGAV1_RESTRICT dest, const ptrdiff_t dest_stride) {
static_assert(!(bitdepth == 8 && is_inter_intra), "");
assert(mask != nullptr);
using PredType =
@@ -85,11 +88,12 @@ void MaskBlend_C(const void* prediction_0, const void* prediction_1,
}
template <int subsampling_x, int subsampling_y>
-void InterIntraMaskBlend8bpp_C(const uint8_t* prediction_0,
- uint8_t* prediction_1,
+void InterIntraMaskBlend8bpp_C(const uint8_t* LIBGAV1_RESTRICT prediction_0,
+ uint8_t* LIBGAV1_RESTRICT prediction_1,
const ptrdiff_t prediction_stride_1,
- const uint8_t* mask, const ptrdiff_t mask_stride,
- const int width, const int height) {
+ const uint8_t* LIBGAV1_RESTRICT mask,
+ const ptrdiff_t mask_stride, const int width,
+ const int height) {
assert(mask != nullptr);
constexpr int step_y = subsampling_y ? 2 : 1;
const uint8_t* mask_next_row = mask + mask_stride;
diff --git a/src/dsp/mask_blend_test.cc b/src/dsp/mask_blend_test.cc
index b5e7e60..be80b11 100644
--- a/src/dsp/mask_blend_test.cc
+++ b/src/dsp/mask_blend_test.cc
@@ -22,7 +22,6 @@
#include <type_traits>
#include "absl/strings/match.h"
-#include "absl/strings/str_format.h"
#include "absl/strings/string_view.h"
#include "absl/time/clock.h"
#include "absl/time/time.h"
@@ -57,46 +56,52 @@ constexpr int kCompoundPredictionRange[3][2] = {
const char* GetDigest8bpp(int id) {
static const char* const kDigest[] = {
"4b70d5ef5ac7554b4b2660a4abe14a41", "64adb36f07e4a2c4ea4f05cfd715ff58",
- "c490478208374a43765900ef7115c264", "b98f222eb70ef8589da2d6c839ca22b8",
- "54752ca05f67b5af571bc311aa4e3de3", "344b2dab7accd8bd0a255bee16207336",
- "0b2f6f755d1547eea7e0172f8133ea01", "310dc6364fdacba186c01f0e8ac4fcb7",
+ "2cd162cebf99724a3fc22d501bd8c8e4", "c490478208374a43765900ef7115c264",
+ "b98f222eb70ef8589da2d6c839ca22b8", "54752ca05f67b5af571bc311aa4e3de3",
+ "5ae48814dd285bfca4f5ee8e339dca99", "383f3f4f47563f065d1b6068e5931a24",
+ "344b2dab7accd8bd0a255bee16207336", "0b2f6f755d1547eea7e0172f8133ea01",
+ "310dc6364fdacba186c01f0e8ac4fcb7", "c2ee4673078d34971319c77ca77b23d1",
"b0c9f08b73d9e5c16eaf5abdbca1fdc0", "eaad805999d949fa1e1bbbb63b4b7827",
"6eb2a80d212df89403efb50db7a81b08", "c30730aa799dba78a2ebd3f729af82c7",
- "4346c2860b23f0072b6b288f14c1df36", "8f8dd3eeed74ef115ca8a2f82ebff0ba",
- "42e8872a81647767636f4c75609e0e2f", "1ff2526547d59557f7bb458249e34527",
- "cd303d685268aebd2919dd468928d0ba", "254fb3ad990f9d408d252c70dd682e27",
- "ba8d99c62853d14855f5d93e9574c97b", "e8ab744348681d6aa1043080efa86fc9",
- "2fa919ca1f54b4336de878ff4015c352", "18e47c9809b909c2bfad08e00dffc635",
- "9a90c843f06f0b662c509c26f5dd5054", "f89c608f884f37b064fc2b49eb2690a9",
- "2448734d948ca6ddeb0ce8038a4ab2cf", "a3e0f86b7a5cb49716a424709c00b5a4",
- "eb84dba768b54da10cded2f932f0aab7", "d6e8fdeb6875b70488f25d7f7ed9423f",
- "1ca0822febce19c02ddc42a7b3331257", "a9259bb9b87ad002619eb47b907d7226",
- "6408c5f327f1a9a390fb0046d4bc112b", "dba612489f87d00a82f2735fbcb98dcc",
- "e8626a97699fbd247d6358ad5f766bee", "5e638a6897d7a2950f3512f871fa19e6",
- "45a58708939779413f8e0e1de2ee5e6f", "079ae4682d398f0a7e4b66059589586d",
- "6a06e617308409f9181b59bdd4f63d83", "b05ade2c1a572fc5fcca92b4163d9afb",
- "30e955c3f86111207d5922575602e90a", "af5e6c65ed48a0eb7d509f7036398728",
- "f9da3310d7dc75910483dfdd2af6ee62", "a9423b4d67bee5e7c7bc3baa7a9c017a",
- "6b90a04333407013dd011c1af582e79f", "e658088a74bfb7cc57a2faa74a6f8689",
- "6eedf27126eba6915035f9f701a1b992", "89116a7c6ad3f70a5b3f3105d04ad1a8",
- "f41e5e166b049d0006d8b2cab56523b3", "3bed57a684075bbe3c25fd0c3e5520c3",
- "85c0b21af2afb18ce948abfe3e23c85b", "bd8aaa3602d6b42438f8449f8adb52cb",
- "1266bad904caad2c6d4047abefc2393d", "6573f2fe2a14c9ab7d5e192742388489",
- "6b9b443f6306059fa3fe18df9de6dc48", "c9a91ee6ae8b653f552866e4073dd097",
- "fa58938384198f7709d4871d155ba100", "033d121fc782e83ff94c31e73407d2a8",
- "7ea268d79f7b8c75a4feeb24e892471a", "73a376bb3e07172d1e094ab8e01a7d42",
- "13c366e0da1663fac126ea3d3876c110", "2f5eb5fcdf953c63fee2b8c75a6e5568",
- "2054b197f002223f2d75699884279511", "67ce53e6991657a922d77cc8a23f1e07",
- "f48e6d666435e7a917d6f90539b0d557", "21d03669d8d255e43552f8fb90724717",
- "43dbaa1a7aaf2a01764e78e041b6763b", "a8173347ea861ecee6da54f81df73951",
- "6b97ec4e4647a8de026d693059b855b7", "a85bf4c4b48791ac4971339877e4bc8a",
- "04cf84d020a60ce3ce53845255ca8ec9", "ddd87035b960499b883d0aefcf96b6b2",
- "278c5dd102474d598bf788cd66977ba9", "78b3790785811516142d417a49177c8c",
- "7883ea9c2df0b4f5797cba31f4352678", "727004811025ac97b04940e2eaf68f94",
- "7ffa3f97ec13dc8b6225550133a392bc", "6f5f2cb7a44aa0daea5c6b3315110591",
- "88a59d68875fb44ec3be9d3fa293bccb", "0516e71f76b9d998794d3d63e480fa2f",
- "193793d42f0964b4a958a68d9d7eb4ba", "4d259c7c6a95744e4ebaaa5361befb11",
- "c090155b997dc103203bcb5a9dcc6282",
+ "4346c2860b23f0072b6b288f14c1df36", "1cdace53543063e129a125c4084ca5d7",
+ "1ae5328e0c0f4f2bec640d1af03b2978", "3860e040fbee0c5f68f0b4af769209b3",
+ "e9480ded15d9c38ee19bf5fa816dd296", "4e17c222b64f428df29938a8120ca256",
+ "2a943bc6de9b29c8bcae189ad3bec276", "b5a6bc02c76fa61040678fb2c6c112d2",
+ "2c11bb9bd29c5577194edb77cfd1c614", "31ed1832810ae385f4ad8f57795dde1e",
+ "eb87d647839c33984dfb25bac0e7cdb3", "f652ec2b1478e35acb19cf28042ee849",
+ "0cfb18ac0cb94af1447bcac32ac20c36", "e152bbbf5ee4b40b7b41ec1f2e901aaa",
+ "f17f78fd485f7beafa8126c1cda801d7", "9f9fbee0cc9d99435efd3dff644be273",
+ "9b498843d66440c1e68dc7ab04f57d42", "2f2b0beceb31b79ccb9179991629e4b8",
+ "e06a6ebb6791529bb23fe5b0a9914220", "2b3d1ff19812a17c17b1be1f1727815e",
+ "d0bbdecec414950ed63a8a35c2bae397", "8e53906c6513058d7f17013fe0d32bf1",
+ "be0690efd31f0bf3c2adcd27ca011ed5", "c2b26243c5f147fdeadf52735aa68fb5",
+ "94bb83e774d9189c5ee04fb361855e19", "dad6441e723791a91f31a56b2136fd33",
+ "10ccac76a2debb842a0685a527b6a659", "346fb0a4914b64dda3ca0f521412b999",
+ "d7e400b855502bbb4f2b8294e207bb96", "3487503f2d73ec52f25b5e8d06c81da4",
+ "3f49c096acfcf46d44ce18b48debca7c", "8ed6a745a2b5457ac7f3ac145ce57e72",
+ "21f9dda5ef934a5ee6274b22cc22f93b", "507b60611afeb373384d9b7606f7ea46",
+ "ac766fadcdb85a47ad14a6846b9e5c36", "fde149bc2162e02bbc5fa85cc41641a5",
+ "f5f094b5742d0a920ba734b017452d24", "c90d06b0c76a0983bd1428df2a1b64b3",
+ "3649e6a6ed9f69e3f78e0b75160fb82a", "1d44b7649497e651216db50d325e3073",
+ "948fa112e90e3ca4d15f3d2f2acfab9a", "9bb54c0f7d07c0b44c44ba09379a04ff",
+ "228261ab6f098f489a8968cff1e1f7ae", "5e128db7462164f7327d1d8feeb2e4c7",
+ "9e8b97f6d9d482d5770b138bd1077747", "81563d505a4e8dd779a089abf2a28b77",
+ "b7157451de7cfa161dff1afd7f9b8622", "6a25cc0a4aaf8a315d1158dbb0ec2966",
+ "303867ee010ba51da485ee10149c6f9b", "63b64b7527d2476e9ae5139b8166e8c9",
+ "cfa93c2aeeb27a1190a445a6fee61e15", "804bcff8709665eed6830e24346101be",
+ "829947ed3e90776cda4ae82918461497", "1df10a1cb80c1a81f521e7e0f80b4f99",
+ "3c9593e42ac574f3555bb8511d438a54", "eecef71492c0626685815e646f728f79",
+ "0c43d59f456ddca2449e016ae4e34be7", "207d4ac2579f1271fc9eca8d743917b3",
+ "3c472bb0b1c891ffda19077ebb659e48", "a4ae7a0d25113bc0238fa27409f9c0dd",
+ "e8ad037ca81f46774bb01d20f46671ce", "b22741e4fe0e4062e40a2decec102ffd",
+ "c72f9e7bc0170163cb94da0faa0d3ffb", "accaf5d475d155cbd3a8c113f90718bc",
+ "2fd31e72444ea258380c16881580de81", "8a6a2a253f6f5b0ff75ba39488e6b082",
+ "c5e8159c0f3ebb7536e84ab3dadac1b3", "ef7ec20b46c7dcf16591835642bd68ef",
+ "0c3425399dc64870d726c2837666a55e", "0365029ffbfc4cedf3bf2d757ea5b9df",
+ "836aa403254af2e04d4b7a7c4db8bfc5", "7f2f3f9c91677b233795169f9a88b2b2",
+ "9fc8bbe787244dac638c367b9c611d13", "f66ef45fae8e163ab0f0f393531dad26",
+ "beb984e88b6f9b96ae6efe5da23ad16b", "1083b829ea766b1d4eb0bb96e9fb3bff",
+ "be8abad1da69e4d238a45fc02a0061cf",
};
return kDigest[id];
}
@@ -105,61 +110,68 @@ const char* GetDigest8bpp(int id) {
const char* GetDigest10bpp(int id) {
static const char* const kDigest[] = {
"1af3cbd1616941b59e6a3f6a417b6312", "1d8b3f4b9d5d2f4ff5be8e81b7243121",
- "53a3a76bf2bcd5761cd15fc739a4f4e1", "7597f69dc19a584280be0d67911db6a6",
- "e1221c172843dc6c1b345bcd370771cc", "2ccbe012ca167114b14c3ba70befa960",
- "0f68632d7e5faddb4554ca430d1df822", "8caa0061a26e142b783951d5abd7bf5d",
+ "e767350f150a84ac5a06dc348e815d62", "53a3a76bf2bcd5761cd15fc739a4f4e1",
+ "7597f69dc19a584280be0d67911db6a6", "e1221c172843dc6c1b345bcd370771cc",
+ "1a640c71ff9bb45505d89761f19efa8f", "e192f64322e0edb250b52f63aaa4de97",
+ "2ccbe012ca167114b14c3ba70befa960", "0f68632d7e5faddb4554ca430d1df822",
+ "8caa0061a26e142b783951d5abd7bf5d", "b01eeed3ec549e4a593100d9c5ba587a",
"1cce6acdbd8ca8d2546ba937584730bf", "022913e87a3c1a86aaefe2c2d4f89882",
"48f8ab636ba15a06731d869b603cbe58", "ba1616c990d224c20de123c3ccf19952",
- "346a797b7cb4de10759e329f8b49e077", "8f4aa102e9b1ac430bdb9ebd4ec4cfca",
- "5886397456b15e504ad55d8e0ce71e0e", "2a78b52ce43dc28606e83521963c00fa",
- "8d3ef5280063337b0df97f91251bb8fc", "81f0ceada000ce40586be828a2045430",
- "edb7b70a473392148bc419a44385326b", "97abe2eecaf9158a0529b234a241a57a",
- "65729d750aa1258e4a7eccef247ac8c2", "78cc995e81188b9e8b29fa58796a3313",
- "a1eb6a8c2f7c77e30e739a1b3b07cc74", "805b0f2f4b9d80f118d800b5ab4f603e",
- "12610c83533f7170149390ba581f70b2", "cba20deed43b49ada3f626c91510995d",
- "ba7ea35410b746fcbcf56c24ccb56d59", "933b2235b9b943984607d87f0bce1067",
- "7ae59015295db8983bc8472429076464", "c18cce63327b367c0a260e9cbf4222b9",
- "7c9672a7dfa964cb3ed3f2b4b443d2b6", "b29bcf1cc5369702e0179db1198db531",
- "412326aff6c89116240b5d3ef63fa5cc", "3d854589fd171e42d118be4627ec5330",
- "9a157e51e39ed314031224f074193791", "c645cdc63d3112f27b90cc9080c6d071",
- "3f360cc336a4ee9a9bd78bde1a6e9eb3", "37b40fa8674d03a7cd66afdee939b9bf",
- "cd6c7b98fe71b533c6a06d6d9122a6d0", "c26e0a0e90a969d762edcab770bed3b7",
- "e517967d2cf4f1b0fff09d334475e2ae", "bc760a328a0a4b2d75593667adfa2a0e",
- "b6239fdeeccc462640047cb2e2c2be96", "bc01f6a232ef9f0d9e57301779edd67f",
- "cf6e8c1823c5498fa5589db40406a6ad", "2a9a4bd0bd84f0b85225a5b30f5eaa16",
- "56f7bb2265dbd8a563bb269aa527c8a3", "fcbed0f0350be5a1384f95f8090d262e",
- "f3ecf2e5747ebff65ac78ecbe7cc5e6a", "1d57d1371ad2f5f320cc4de789665f7c",
- "e9f400fee64673b0f6313400fe449135", "5dfdc4a8376740011c777df46418b5d2",
- "a4eb2c077300c0d8eeda028c9db3a63a", "90551259280c2b2150f018304204f072",
- "4cbcd76496fc5b841cd164b6067b9c0b", "895964acc7b7e7d084de2266421c351b",
- "af2e05159d369d0e3b72707f242b2845", "c7d393cef751950df3b9ed8056a9ffce",
- "788541c0807aed47b863d47e5912555d", "163a06512f48c1b0f2535c8c50815bcc",
- "dc5e723bab9fbfd7074a62e05b6b3c2b", "bf91200ce1bf97b4642a601adc13d700",
- "d93fcefa6b9004baaab76d436e7ac931", "e89a2111caecc6bcf5f2b42ea0167ab4",
- "e04a058df9b87878ca97edc1c42e76e1", "5d1f60876147edd6ed29d1fb50172464",
- "655fb228aa410fd244c58c87fe510bec", "639a8a0a8f62d628136f5a97b3728b69",
- "5b60f2428b092a502d6471fa09befd7f", "40601555ac945b4d37d3434b6e5619be",
- "02be23bf1f89d5f5af02a39b98f96142", "9347a45bd54d28d8105f8183996b3505",
- "d8429cc7b0b388981861a0fdd40289f0", "c4b7fab3b044486f663e160c07805e0a",
- "f5f5d513b1f1c13d0abc70fc18afea48", "f236795ea30f1b8761b268734a245ba1",
- "c7b7452ea8247a3a40248278d08953d5", "ddd6ba3c5ec56cc7a0b0161ae67001fa",
- "94675749f2db46a8ade6f2f211db9a32", "3d165364ff96a5ef39e67a53fe3ed3be",
- "3d1d66a9401fd7e78050724ca1fa0419",
+ "346a797b7cb4de10759e329f8b49e077", "d4929154275255f2d786d6fc42c7c5d3",
+ "18a6af6f36ca1ea4ab6f5a76505de040", "0c43e68414bfc02f9b20e796506f643b",
+ "9f483f543f6b1d58e23abf9337ed6fe6", "e114860c2538b63f1be4a23560420cdc",
+ "da8680798f96572c46155c7838b452c3", "20b47a27617297231843c0f2ed7b559b",
+ "16fa4a4f33a32e28c79da83dca63fd41", "76e2c1d3c323777a3c478e11e1ba6bf2",
+ "dccdfd52a71855cc4da18af52bda4c03", "121befbd6c246e85a34225241b8bcaf1",
+ "5780757555fd87ca1ff3f1b498a1d6e9", "6b0be2256285694b1edc0201608e1326",
+ "b7ef338c58d17f69426b5a99170c7295", "b92b84b5b3d01afac02fb9c092e84b06",
+ "e6ef7fea8b183f871c4306c4f49370c5", "c1bf95c05774d8471504e57a3efa66e4",
+ "bbacdbdafc625a139361ec22fe2cf003", "5fbbb2d6ca8fc6d07ca8d4105fda4a01",
+ "c1cbb295d9f00aa865d91a95e96f99b2", "1490e4f2c874a76ecc2bbf35dce446c3",
+ "c3bd73daaeec39895a8b64812773c93c", "6d385068ef3afbd821183d36851f709b",
+ "a34c52ef7f2fd04d1cd420238641ef48", "45d10029358c6835cf968a30605659ea",
+ "a72c1bb18cf9312c5713ce0de370743d", "df7368db2a7515a1c06a4c9dd9e32ebf",
+ "52782632271caccfa9a35ed7533e2052", "6f0ef9b62d2b9956a6464694b7a86b79",
+ "814dbc176f7201725a1cfd1cf668b4b9", "065ffbee984f4b9343c8acb0eb04fcbe",
+ "0915d76ce458d5164e3c90c1ce150795", "bf2b431d9bfa7a9925ea6f6509267ae9",
+ "d3df8c0c940a01b7bf3c3afb80b6dcd4", "15ab86216c9856a8427a51fe599258a3",
+ "2cb078484472c88e26b7401c9f11cf51", "7c5f68cc098c8adabc9e26f9cd549151",
+ "a8e47da1fcc91c2bc74d030892621576", "71af422ba2d86a401f8278591c0ef540",
+ "964c902bb4698ce82f4aa0a1edc80cd6", "78271c37d62af86576dab72ed59746b3",
+ "7247c3a7534a41137027e7d3f255f5ef", "8e529ab964f5f9d0f7c3ced98239cfc8",
+ "2481ed50bff6b36a3cac6dca2aca5ae5", "78a1ff18bf217d45f5170675dee26948",
+ "00fc534119c13aa7af4b818cad9218a2", "67501a83c93f2f9debfa86955bdffde5",
+ "2a512ef738e33a4d8476f72654deffb4", "f4eef28078bbc12de9cfb5bc2fef6238",
+ "b7ac3a35205a978bed587356155bae0e", "51ea101f09c4de2f754b61ab5aff1526",
+ "2bd689d7ec964ee8c8f6f0682f93f5ca", "eecac8dbdaa73b8b3c2234892c444147",
+ "cb7086f44ef70ef919086a3d200d8c13", "0abe35e3c796c2de1e550426b2b19441",
+ "0eb140561e1ea3843464a5247d8ecb18", "d908f7317f00daacbe3dd43495db64ad",
+ "d4d677c4b347de0a13ccab7bc16b8e6e", "26523c2c2df7f31896a3ae5aa24d5ada",
+ "0ebb9f816684769816b2ae0b1f94e3a4", "fd938d0577e3687b0a810e199f69f0bb",
+ "eb8fb832e72030e2aa214936ae0effe4", "56631887763f7daf6e1e73783e5ff656",
+ "590a25cc722c2aa4d885eede5ef09f20", "80944a218ed9b9b0374cde72914449eb",
+ "d9cbc2f1e0e56cdd6722310932db1981", "a88eb213b7a6767bbe639cda120a4ab6",
+ "9972ecbadfdf3ed0b3fedf435c5a804f", "01fdf7e22405a1b17a8d275b7451094f",
+ "6a7824e10406fade0d032e886bbc76b6", "76fefadd793ec3928e915d92782bc7e1",
+ "0fbd6b076752c9f5c926ca5c1df892ac", "aac9457239f07ad633fcd45c1465af2a",
+ "56823ef9a8e21c9c7441cc9ed870d648", "52f4c7a0b7177175302652cbc482f442",
+ "f4a4f4d7c8b93c0486cf3cbaa26fbc19",
};
return kDigest[id];
}
#endif // LIBGAV1_MAX_BITDEPTH >= 10
struct MaskBlendTestParam {
- MaskBlendTestParam(int width, int height, int subsampling_x,
- int subsampling_y, bool is_inter_intra,
- bool is_wedge_inter_intra)
- : width(width),
- height(height),
+ MaskBlendTestParam(BlockSize block_size, int subsampling_x, int subsampling_y,
+ bool is_inter_intra, bool is_wedge_inter_intra)
+ : block_size(block_size),
+ width(kBlockWidthPixels[block_size]),
+ height(kBlockHeightPixels[block_size]),
subsampling_x(subsampling_x),
subsampling_y(subsampling_y),
is_inter_intra(is_inter_intra),
is_wedge_inter_intra(is_wedge_inter_intra) {}
+ BlockSize block_size;
int width;
int height;
int subsampling_x;
@@ -169,7 +181,7 @@ struct MaskBlendTestParam {
};
std::ostream& operator<<(std::ostream& os, const MaskBlendTestParam& param) {
- return os << "BlockSize" << param.width << "x" << param.height
+ return os << ToString(param.block_size)
<< ", subsampling(x/y): " << param.subsampling_x << "/"
<< param.subsampling_y
<< ", is_inter_intra: " << param.is_inter_intra
@@ -215,40 +227,44 @@ class MaskBlendTest : public testing::TestWithParam<MaskBlendTestParam>,
protected:
int GetDigestIdOffset() const {
// id is for retrieving the corresponding digest from the lookup table given
- // the set of input parameters. id can be figured out by its width, height
- // and an offset (id_offset).
+ // the set of input parameters. id can be figured out by the block size and
+ // an offset (id_offset).
// For example, in kMaskBlendTestParam, this set of parameters
// (8, 8, 0, 0, false, false) corresponds to the first entry in the
// digest lookup table, where id == 0.
- // (8, 8, 1, 0, false, false) corresponds to id == 13.
- // (8, 8, 1, 1, false, false) corresponds to id == 26.
- // (8, 8, 0, 0, true, false) corresponds to id == 39.
+ // (8, 8, 1, 0, false, false) corresponds to id == 17.
+ // (8, 8, 1, 1, false, false) corresponds to id == 34.
+ // (8, 8, 0, 0, true, false) corresponds to id == 51.
// Id_offset denotes offset for different modes (is_inter_intra,
- // is_wedge_inter_intra). Width and height help to figure out id:
- // width = 8, height = 8, id = id_offset + log2(8) - 3.
- // width = 8, height = 16, id = id_offset + log2(min(width, height) - 3 + 1.
+ // is_wedge_inter_intra).
// ...
if (!param_.is_inter_intra && !param_.is_wedge_inter_intra) {
- return param_.subsampling_x * 13 + param_.subsampling_y * 13;
+ return param_.subsampling_x * 17 + param_.subsampling_y * 17;
}
if (param_.is_inter_intra && !param_.is_wedge_inter_intra) {
- return 39 + param_.subsampling_x * 7 + param_.subsampling_y * 7;
+ return 51 + param_.subsampling_x * 7 + param_.subsampling_y * 7;
}
if (param_.is_inter_intra && param_.is_wedge_inter_intra) {
- return 60 + param_.subsampling_x * 7 + param_.subsampling_y * 7;
+ return 72 + param_.subsampling_x * 7 + param_.subsampling_y * 7;
}
return 0;
}
int GetDigestId() const {
- int id = GetDigestIdOffset();
- if (param_.width == param_.height) {
- return id + 3 * (FloorLog2(param_.width) - 3);
+ // Only 8x8 and larger blocks are tested.
+ int block_size_adjustment =
+ static_cast<int>(param_.block_size > kBlock16x4);
+ if (param_.is_inter_intra || param_.is_wedge_inter_intra) {
+ // 4:1/1:4 blocks are invalid for these modes.
+ block_size_adjustment += static_cast<int>(param_.block_size > kBlock8x32);
+ block_size_adjustment +=
+ static_cast<int>(param_.block_size > kBlock16x64);
+ block_size_adjustment += static_cast<int>(param_.block_size > kBlock32x8);
+ block_size_adjustment +=
+ static_cast<int>(param_.block_size > kBlock64x16);
}
- if (param_.width < param_.height) {
- return id + 1 + 3 * (FloorLog2(param_.width) - 3);
- }
- return id + 2 + 3 * (FloorLog2(param_.height) - 3);
+ return GetDigestIdOffset() + param_.block_size - kBlock8x8 -
+ block_size_adjustment;
}
void Test(const char* digest, int num_runs);
@@ -353,100 +369,112 @@ void MaskBlendTest<bitdepth, Pixel>::Test(const char* const digest,
elapsed_time += absl::Now() - start;
}
- test_utils::CheckMd5Digest(
- "MaskBlend",
- absl::StrFormat("%dx%d", param_.width, param_.height).c_str(), digest,
- dest_, sizeof(dest_), elapsed_time);
+ test_utils::CheckMd5Digest("MaskBlend", ToString(param_.block_size), digest,
+ dest_, sizeof(dest_), elapsed_time);
}
const MaskBlendTestParam kMaskBlendTestParam[] = {
// is_inter_intra = false, is_wedge_inter_intra = false.
// block size range is from 8x8 to 128x128.
- MaskBlendTestParam(8, 8, 0, 0, false, false),
- MaskBlendTestParam(8, 16, 0, 0, false, false),
- MaskBlendTestParam(16, 8, 0, 0, false, false),
- MaskBlendTestParam(16, 16, 0, 0, false, false),
- MaskBlendTestParam(16, 32, 0, 0, false, false),
- MaskBlendTestParam(32, 16, 0, 0, false, false),
- MaskBlendTestParam(32, 32, 0, 0, false, false),
- MaskBlendTestParam(32, 64, 0, 0, false, false),
- MaskBlendTestParam(64, 32, 0, 0, false, false),
- MaskBlendTestParam(64, 64, 0, 0, false, false),
- MaskBlendTestParam(64, 128, 0, 0, false, false),
- MaskBlendTestParam(128, 64, 0, 0, false, false),
- MaskBlendTestParam(128, 128, 0, 0, false, false),
- MaskBlendTestParam(8, 8, 1, 0, false, false),
- MaskBlendTestParam(8, 16, 1, 0, false, false),
- MaskBlendTestParam(16, 8, 1, 0, false, false),
- MaskBlendTestParam(16, 16, 1, 0, false, false),
- MaskBlendTestParam(16, 32, 1, 0, false, false),
- MaskBlendTestParam(32, 16, 1, 0, false, false),
- MaskBlendTestParam(32, 32, 1, 0, false, false),
- MaskBlendTestParam(32, 64, 1, 0, false, false),
- MaskBlendTestParam(64, 32, 1, 0, false, false),
- MaskBlendTestParam(64, 64, 1, 0, false, false),
- MaskBlendTestParam(64, 128, 1, 0, false, false),
- MaskBlendTestParam(128, 64, 1, 0, false, false),
- MaskBlendTestParam(128, 128, 1, 0, false, false),
- MaskBlendTestParam(8, 8, 1, 1, false, false),
- MaskBlendTestParam(8, 16, 1, 1, false, false),
- MaskBlendTestParam(16, 8, 1, 1, false, false),
- MaskBlendTestParam(16, 16, 1, 1, false, false),
- MaskBlendTestParam(16, 32, 1, 1, false, false),
- MaskBlendTestParam(32, 16, 1, 1, false, false),
- MaskBlendTestParam(32, 32, 1, 1, false, false),
- MaskBlendTestParam(32, 64, 1, 1, false, false),
- MaskBlendTestParam(64, 32, 1, 1, false, false),
- MaskBlendTestParam(64, 64, 1, 1, false, false),
- MaskBlendTestParam(64, 128, 1, 1, false, false),
- MaskBlendTestParam(128, 64, 1, 1, false, false),
- MaskBlendTestParam(128, 128, 1, 1, false, false),
+ MaskBlendTestParam(kBlock8x8, 0, 0, false, false),
+ MaskBlendTestParam(kBlock8x16, 0, 0, false, false),
+ MaskBlendTestParam(kBlock8x32, 0, 0, false, false),
+ MaskBlendTestParam(kBlock16x8, 0, 0, false, false),
+ MaskBlendTestParam(kBlock16x16, 0, 0, false, false),
+ MaskBlendTestParam(kBlock16x32, 0, 0, false, false),
+ MaskBlendTestParam(kBlock16x64, 0, 0, false, false),
+ MaskBlendTestParam(kBlock32x8, 0, 0, false, false),
+ MaskBlendTestParam(kBlock32x16, 0, 0, false, false),
+ MaskBlendTestParam(kBlock32x32, 0, 0, false, false),
+ MaskBlendTestParam(kBlock32x64, 0, 0, false, false),
+ MaskBlendTestParam(kBlock64x16, 0, 0, false, false),
+ MaskBlendTestParam(kBlock64x32, 0, 0, false, false),
+ MaskBlendTestParam(kBlock64x64, 0, 0, false, false),
+ MaskBlendTestParam(kBlock64x128, 0, 0, false, false),
+ MaskBlendTestParam(kBlock128x64, 0, 0, false, false),
+ MaskBlendTestParam(kBlock128x128, 0, 0, false, false),
+ MaskBlendTestParam(kBlock8x8, 1, 0, false, false),
+ MaskBlendTestParam(kBlock8x16, 1, 0, false, false),
+ MaskBlendTestParam(kBlock8x32, 1, 0, false, false),
+ MaskBlendTestParam(kBlock16x8, 1, 0, false, false),
+ MaskBlendTestParam(kBlock16x16, 1, 0, false, false),
+ MaskBlendTestParam(kBlock16x32, 1, 0, false, false),
+ MaskBlendTestParam(kBlock16x64, 1, 0, false, false),
+ MaskBlendTestParam(kBlock32x8, 1, 0, false, false),
+ MaskBlendTestParam(kBlock32x16, 1, 0, false, false),
+ MaskBlendTestParam(kBlock32x32, 1, 0, false, false),
+ MaskBlendTestParam(kBlock32x64, 1, 0, false, false),
+ MaskBlendTestParam(kBlock64x16, 1, 0, false, false),
+ MaskBlendTestParam(kBlock64x32, 1, 0, false, false),
+ MaskBlendTestParam(kBlock64x64, 1, 0, false, false),
+ MaskBlendTestParam(kBlock64x128, 1, 0, false, false),
+ MaskBlendTestParam(kBlock128x64, 1, 0, false, false),
+ MaskBlendTestParam(kBlock128x128, 1, 0, false, false),
+ MaskBlendTestParam(kBlock8x8, 1, 1, false, false),
+ MaskBlendTestParam(kBlock8x16, 1, 1, false, false),
+ MaskBlendTestParam(kBlock8x32, 1, 1, false, false),
+ MaskBlendTestParam(kBlock16x8, 1, 1, false, false),
+ MaskBlendTestParam(kBlock16x16, 1, 1, false, false),
+ MaskBlendTestParam(kBlock16x32, 1, 1, false, false),
+ MaskBlendTestParam(kBlock16x64, 1, 1, false, false),
+ MaskBlendTestParam(kBlock32x8, 1, 1, false, false),
+ MaskBlendTestParam(kBlock32x16, 1, 1, false, false),
+ MaskBlendTestParam(kBlock32x32, 1, 1, false, false),
+ MaskBlendTestParam(kBlock32x64, 1, 1, false, false),
+ MaskBlendTestParam(kBlock64x16, 1, 1, false, false),
+ MaskBlendTestParam(kBlock64x32, 1, 1, false, false),
+ MaskBlendTestParam(kBlock64x64, 1, 1, false, false),
+ MaskBlendTestParam(kBlock64x128, 1, 1, false, false),
+ MaskBlendTestParam(kBlock128x64, 1, 1, false, false),
+ MaskBlendTestParam(kBlock128x128, 1, 1, false, false),
// is_inter_intra = true, is_wedge_inter_intra = false.
- // block size range is from 8x8 to 32x32.
- MaskBlendTestParam(8, 8, 0, 0, true, false),
- MaskBlendTestParam(8, 16, 0, 0, true, false),
- MaskBlendTestParam(16, 8, 0, 0, true, false),
- MaskBlendTestParam(16, 16, 0, 0, true, false),
- MaskBlendTestParam(16, 32, 0, 0, true, false),
- MaskBlendTestParam(32, 16, 0, 0, true, false),
- MaskBlendTestParam(32, 32, 0, 0, true, false),
- MaskBlendTestParam(8, 8, 1, 0, true, false),
- MaskBlendTestParam(8, 16, 1, 0, true, false),
- MaskBlendTestParam(16, 8, 1, 0, true, false),
- MaskBlendTestParam(16, 16, 1, 0, true, false),
- MaskBlendTestParam(16, 32, 1, 0, true, false),
- MaskBlendTestParam(32, 16, 1, 0, true, false),
- MaskBlendTestParam(32, 32, 1, 0, true, false),
- MaskBlendTestParam(8, 8, 1, 1, true, false),
- MaskBlendTestParam(8, 16, 1, 1, true, false),
- MaskBlendTestParam(16, 8, 1, 1, true, false),
- MaskBlendTestParam(16, 16, 1, 1, true, false),
- MaskBlendTestParam(16, 32, 1, 1, true, false),
- MaskBlendTestParam(32, 16, 1, 1, true, false),
- MaskBlendTestParam(32, 32, 1, 1, true, false),
+ // block size range is from 8x8 to 32x32 (no 4:1/1:4 blocks, Section 5.11.28
+ // Read inter intra syntax).
+ MaskBlendTestParam(kBlock8x8, 0, 0, true, false),
+ MaskBlendTestParam(kBlock8x16, 0, 0, true, false),
+ MaskBlendTestParam(kBlock16x8, 0, 0, true, false),
+ MaskBlendTestParam(kBlock16x16, 0, 0, true, false),
+ MaskBlendTestParam(kBlock16x32, 0, 0, true, false),
+ MaskBlendTestParam(kBlock32x16, 0, 0, true, false),
+ MaskBlendTestParam(kBlock32x32, 0, 0, true, false),
+ MaskBlendTestParam(kBlock8x8, 1, 0, true, false),
+ MaskBlendTestParam(kBlock8x16, 1, 0, true, false),
+ MaskBlendTestParam(kBlock16x8, 1, 0, true, false),
+ MaskBlendTestParam(kBlock16x16, 1, 0, true, false),
+ MaskBlendTestParam(kBlock16x32, 1, 0, true, false),
+ MaskBlendTestParam(kBlock32x16, 1, 0, true, false),
+ MaskBlendTestParam(kBlock32x32, 1, 0, true, false),
+ MaskBlendTestParam(kBlock8x8, 1, 1, true, false),
+ MaskBlendTestParam(kBlock8x16, 1, 1, true, false),
+ MaskBlendTestParam(kBlock16x8, 1, 1, true, false),
+ MaskBlendTestParam(kBlock16x16, 1, 1, true, false),
+ MaskBlendTestParam(kBlock16x32, 1, 1, true, false),
+ MaskBlendTestParam(kBlock32x16, 1, 1, true, false),
+ MaskBlendTestParam(kBlock32x32, 1, 1, true, false),
// is_inter_intra = true, is_wedge_inter_intra = true.
- // block size range is from 8x8 to 32x32.
- MaskBlendTestParam(8, 8, 0, 0, true, true),
- MaskBlendTestParam(8, 16, 0, 0, true, true),
- MaskBlendTestParam(16, 8, 0, 0, true, true),
- MaskBlendTestParam(16, 16, 0, 0, true, true),
- MaskBlendTestParam(16, 32, 0, 0, true, true),
- MaskBlendTestParam(32, 16, 0, 0, true, true),
- MaskBlendTestParam(32, 32, 0, 0, true, true),
- MaskBlendTestParam(8, 8, 1, 0, true, true),
- MaskBlendTestParam(8, 16, 1, 0, true, true),
- MaskBlendTestParam(16, 8, 1, 0, true, true),
- MaskBlendTestParam(16, 16, 1, 0, true, true),
- MaskBlendTestParam(16, 32, 1, 0, true, true),
- MaskBlendTestParam(32, 16, 1, 0, true, true),
- MaskBlendTestParam(32, 32, 1, 0, true, true),
- MaskBlendTestParam(8, 8, 1, 1, true, true),
- MaskBlendTestParam(8, 16, 1, 1, true, true),
- MaskBlendTestParam(16, 8, 1, 1, true, true),
- MaskBlendTestParam(16, 16, 1, 1, true, true),
- MaskBlendTestParam(16, 32, 1, 1, true, true),
- MaskBlendTestParam(32, 16, 1, 1, true, true),
- MaskBlendTestParam(32, 32, 1, 1, true, true),
+ // block size range is from 8x8 to 32x32 (no 4:1/1:4 blocks, Section 5.11.28
+ // Read inter intra syntax).
+ MaskBlendTestParam(kBlock8x8, 0, 0, true, true),
+ MaskBlendTestParam(kBlock8x16, 0, 0, true, true),
+ MaskBlendTestParam(kBlock16x8, 0, 0, true, true),
+ MaskBlendTestParam(kBlock16x16, 0, 0, true, true),
+ MaskBlendTestParam(kBlock16x32, 0, 0, true, true),
+ MaskBlendTestParam(kBlock32x16, 0, 0, true, true),
+ MaskBlendTestParam(kBlock32x32, 0, 0, true, true),
+ MaskBlendTestParam(kBlock8x8, 1, 0, true, true),
+ MaskBlendTestParam(kBlock8x16, 1, 0, true, true),
+ MaskBlendTestParam(kBlock16x8, 1, 0, true, true),
+ MaskBlendTestParam(kBlock16x16, 1, 0, true, true),
+ MaskBlendTestParam(kBlock16x32, 1, 0, true, true),
+ MaskBlendTestParam(kBlock32x16, 1, 0, true, true),
+ MaskBlendTestParam(kBlock32x32, 1, 0, true, true),
+ MaskBlendTestParam(kBlock8x8, 1, 1, true, true),
+ MaskBlendTestParam(kBlock8x16, 1, 1, true, true),
+ MaskBlendTestParam(kBlock16x8, 1, 1, true, true),
+ MaskBlendTestParam(kBlock16x16, 1, 1, true, true),
+ MaskBlendTestParam(kBlock16x32, 1, 1, true, true),
+ MaskBlendTestParam(kBlock32x16, 1, 1, true, true),
+ MaskBlendTestParam(kBlock32x32, 1, 1, true, true),
};
using MaskBlendTest8bpp = MaskBlendTest<8, uint8_t>;
@@ -486,6 +514,10 @@ INSTANTIATE_TEST_SUITE_P(C, MaskBlendTest10bpp,
INSTANTIATE_TEST_SUITE_P(SSE41, MaskBlendTest10bpp,
testing::ValuesIn(kMaskBlendTestParam));
#endif
+#if LIBGAV1_ENABLE_NEON
+INSTANTIATE_TEST_SUITE_P(NEON, MaskBlendTest10bpp,
+ testing::ValuesIn(kMaskBlendTestParam));
+#endif
#endif // LIBGAV1_MAX_BITDEPTH >= 10
} // namespace
diff --git a/src/dsp/motion_field_projection.cc b/src/dsp/motion_field_projection.cc
index b51ec8f..7c17b8e 100644
--- a/src/dsp/motion_field_projection.cc
+++ b/src/dsp/motion_field_projection.cc
@@ -31,10 +31,8 @@ namespace {
// Silence unused function warnings when MotionFieldProjectionKernel_C is
// not used.
-#if LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS || \
- !defined(LIBGAV1_Dsp8bpp_MotionFieldProjectionKernel) || \
- (LIBGAV1_MAX_BITDEPTH >= 10 && \
- !defined(LIBGAV1_Dsp10bpp_MotionFieldProjectionKernel))
+#if LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS || \
+ !defined(LIBGAV1_Dsp8bpp_MotionFieldProjectionKernel)
// 7.9.2.
void MotionFieldProjectionKernel_C(const ReferenceInfo& reference_info,
@@ -101,38 +99,18 @@ void MotionFieldProjectionKernel_C(const ReferenceInfo& reference_info,
}
#endif // LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS ||
- // !defined(LIBGAV1_Dsp8bpp_MotionFieldProjectionKernel) ||
- // (LIBGAV1_MAX_BITDEPTH >= 10 &&
- // !defined(LIBGAV1_Dsp10bpp_MotionFieldProjectionKernel))
+ // !defined(LIBGAV1_Dsp8bpp_MotionFieldProjectionKernel)
-void Init8bpp() {
-#if LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS || \
- !defined(LIBGAV1_Dsp8bpp_MotionFieldProjectionKernel)
- Dsp* const dsp = dsp_internal::GetWritableDspTable(8);
- assert(dsp != nullptr);
- dsp->motion_field_projection_kernel = MotionFieldProjectionKernel_C;
-#endif
-}
+} // namespace
-#if LIBGAV1_MAX_BITDEPTH >= 10
-void Init10bpp() {
+void MotionFieldProjectionInit_C() {
#if LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS || \
- !defined(LIBGAV1_Dsp10bpp_MotionFieldProjectionKernel)
- Dsp* const dsp = dsp_internal::GetWritableDspTable(10);
+ !defined(LIBGAV1_Dsp8bpp_MotionFieldProjectionKernel)
+ Dsp* const dsp = dsp_internal::GetWritableDspTable(kBitdepth8);
assert(dsp != nullptr);
dsp->motion_field_projection_kernel = MotionFieldProjectionKernel_C;
#endif
}
-#endif
-
-} // namespace
-
-void MotionFieldProjectionInit_C() {
- Init8bpp();
-#if LIBGAV1_MAX_BITDEPTH >= 10
- Init10bpp();
-#endif
-}
} // namespace dsp
} // namespace libgav1
diff --git a/src/dsp/motion_vector_search.cc b/src/dsp/motion_vector_search.cc
index 9402302..205a1b6 100644
--- a/src/dsp/motion_vector_search.cc
+++ b/src/dsp/motion_vector_search.cc
@@ -29,16 +29,14 @@ namespace dsp {
namespace {
// Silence unused function warnings when the C functions are not used.
-#if LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS || \
- !defined(LIBGAV1_Dsp8bpp_MotionVectorSearch) || \
- (LIBGAV1_MAX_BITDEPTH >= 10 && \
- !defined(LIBGAV1_Dsp10bpp_MotionVectorSearch))
+#if LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS || \
+ !defined(LIBGAV1_Dsp8bpp_MotionVectorSearch)
void MvProjectionCompoundLowPrecision_C(
- const MotionVector* const temporal_mvs,
- const int8_t* const temporal_reference_offsets,
+ const MotionVector* LIBGAV1_RESTRICT const temporal_mvs,
+ const int8_t* LIBGAV1_RESTRICT const temporal_reference_offsets,
const int reference_offsets[2], const int count,
- CompoundMotionVector* const candidate_mvs) {
+ CompoundMotionVector* LIBGAV1_RESTRICT const candidate_mvs) {
// To facilitate the compilers, make a local copy of |reference_offsets|.
const int offsets[2] = {reference_offsets[0], reference_offsets[1]};
int index = 0;
@@ -62,10 +60,10 @@ void MvProjectionCompoundLowPrecision_C(
}
void MvProjectionCompoundForceInteger_C(
- const MotionVector* const temporal_mvs,
- const int8_t* const temporal_reference_offsets,
+ const MotionVector* LIBGAV1_RESTRICT const temporal_mvs,
+ const int8_t* LIBGAV1_RESTRICT const temporal_reference_offsets,
const int reference_offsets[2], const int count,
- CompoundMotionVector* const candidate_mvs) {
+ CompoundMotionVector* LIBGAV1_RESTRICT const candidate_mvs) {
// To facilitate the compilers, make a local copy of |reference_offsets|.
const int offsets[2] = {reference_offsets[0], reference_offsets[1]};
int index = 0;
@@ -91,10 +89,10 @@ void MvProjectionCompoundForceInteger_C(
}
void MvProjectionCompoundHighPrecision_C(
- const MotionVector* const temporal_mvs,
- const int8_t* const temporal_reference_offsets,
+ const MotionVector* LIBGAV1_RESTRICT const temporal_mvs,
+ const int8_t* LIBGAV1_RESTRICT const temporal_reference_offsets,
const int reference_offsets[2], const int count,
- CompoundMotionVector* const candidate_mvs) {
+ CompoundMotionVector* LIBGAV1_RESTRICT const candidate_mvs) {
// To facilitate the compilers, make a local copy of |reference_offsets|.
const int offsets[2] = {reference_offsets[0], reference_offsets[1]};
int index = 0;
@@ -113,9 +111,10 @@ void MvProjectionCompoundHighPrecision_C(
}
void MvProjectionSingleLowPrecision_C(
- const MotionVector* const temporal_mvs,
- const int8_t* const temporal_reference_offsets, const int reference_offset,
- const int count, MotionVector* const candidate_mvs) {
+ const MotionVector* LIBGAV1_RESTRICT const temporal_mvs,
+ const int8_t* LIBGAV1_RESTRICT const temporal_reference_offsets,
+ const int reference_offset, const int count,
+ MotionVector* LIBGAV1_RESTRICT const candidate_mvs) {
int index = 0;
do {
GetMvProjection(
@@ -131,9 +130,10 @@ void MvProjectionSingleLowPrecision_C(
}
void MvProjectionSingleForceInteger_C(
- const MotionVector* const temporal_mvs,
- const int8_t* const temporal_reference_offsets, const int reference_offset,
- const int count, MotionVector* const candidate_mvs) {
+ const MotionVector* LIBGAV1_RESTRICT const temporal_mvs,
+ const int8_t* LIBGAV1_RESTRICT const temporal_reference_offsets,
+ const int reference_offset, const int count,
+ MotionVector* LIBGAV1_RESTRICT const candidate_mvs) {
int index = 0;
do {
GetMvProjection(
@@ -151,9 +151,10 @@ void MvProjectionSingleForceInteger_C(
}
void MvProjectionSingleHighPrecision_C(
- const MotionVector* const temporal_mvs,
- const int8_t* const temporal_reference_offsets, const int reference_offset,
- const int count, MotionVector* const candidate_mvs) {
+ const MotionVector* LIBGAV1_RESTRICT const temporal_mvs,
+ const int8_t* LIBGAV1_RESTRICT const temporal_reference_offsets,
+ const int reference_offset, const int count,
+ MotionVector* LIBGAV1_RESTRICT const candidate_mvs) {
int index = 0;
do {
GetMvProjection(
@@ -164,29 +165,14 @@ void MvProjectionSingleHighPrecision_C(
}
#endif // LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS ||
- // !defined(LIBGAV1_Dsp8bpp_MotionVectorSearch) ||
- // (LIBGAV1_MAX_BITDEPTH >= 10 &&
- // !defined(LIBGAV1_Dsp10bpp_MotionVectorSearch))
+ // !defined(LIBGAV1_Dsp8bpp_MotionVectorSearch)
-void Init8bpp() {
-#if LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS || \
- !defined(LIBGAV1_Dsp8bpp_MotionVectorSearch)
- Dsp* const dsp = dsp_internal::GetWritableDspTable(8);
- assert(dsp != nullptr);
- dsp->mv_projection_compound[0] = MvProjectionCompoundLowPrecision_C;
- dsp->mv_projection_compound[1] = MvProjectionCompoundForceInteger_C;
- dsp->mv_projection_compound[2] = MvProjectionCompoundHighPrecision_C;
- dsp->mv_projection_single[0] = MvProjectionSingleLowPrecision_C;
- dsp->mv_projection_single[1] = MvProjectionSingleForceInteger_C;
- dsp->mv_projection_single[2] = MvProjectionSingleHighPrecision_C;
-#endif
-}
+} // namespace
-#if LIBGAV1_MAX_BITDEPTH >= 10
-void Init10bpp() {
+void MotionVectorSearchInit_C() {
#if LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS || \
- !defined(LIBGAV1_Dsp10bpp_MotionVectorSearch)
- Dsp* const dsp = dsp_internal::GetWritableDspTable(10);
+ !defined(LIBGAV1_Dsp8bpp_MotionVectorSearch)
+ Dsp* const dsp = dsp_internal::GetWritableDspTable(kBitdepth8);
assert(dsp != nullptr);
dsp->mv_projection_compound[0] = MvProjectionCompoundLowPrecision_C;
dsp->mv_projection_compound[1] = MvProjectionCompoundForceInteger_C;
@@ -196,16 +182,6 @@ void Init10bpp() {
dsp->mv_projection_single[2] = MvProjectionSingleHighPrecision_C;
#endif
}
-#endif
-
-} // namespace
-
-void MotionVectorSearchInit_C() {
- Init8bpp();
-#if LIBGAV1_MAX_BITDEPTH >= 10
- Init10bpp();
-#endif
-}
} // namespace dsp
} // namespace libgav1
diff --git a/src/dsp/obmc.cc b/src/dsp/obmc.cc
index 46d1b5b..6b5c6e3 100644
--- a/src/dsp/obmc.cc
+++ b/src/dsp/obmc.cc
@@ -30,15 +30,18 @@ namespace {
// 7.11.3.10 (from top samples).
template <typename Pixel>
-void OverlapBlendVertical_C(void* const prediction,
+void OverlapBlendVertical_C(void* LIBGAV1_RESTRICT const prediction,
const ptrdiff_t prediction_stride, const int width,
- const int height, const void* const obmc_prediction,
+ const int height,
+ const void* LIBGAV1_RESTRICT const obmc_prediction,
const ptrdiff_t obmc_prediction_stride) {
auto* pred = static_cast<Pixel*>(prediction);
const ptrdiff_t pred_stride = prediction_stride / sizeof(Pixel);
const auto* obmc_pred = static_cast<const Pixel*>(obmc_prediction);
const ptrdiff_t obmc_pred_stride = obmc_prediction_stride / sizeof(Pixel);
const uint8_t* const mask = kObmcMask + height - 2;
+ assert(width >= 4);
+ assert(height >= 2);
for (int y = 0; y < height; ++y) {
const uint8_t mask_value = mask[y];
@@ -53,16 +56,19 @@ void OverlapBlendVertical_C(void* const prediction,
// 7.11.3.10 (from left samples).
template <typename Pixel>
-void OverlapBlendHorizontal_C(void* const prediction,
- const ptrdiff_t prediction_stride,
- const int width, const int height,
- const void* const obmc_prediction,
- const ptrdiff_t obmc_prediction_stride) {
+void OverlapBlendHorizontal_C(
+ void* LIBGAV1_RESTRICT const prediction, const ptrdiff_t prediction_stride,
+ const int width, const int height,
+ const void* LIBGAV1_RESTRICT const obmc_prediction,
+ const ptrdiff_t obmc_prediction_stride) {
auto* pred = static_cast<Pixel*>(prediction);
const ptrdiff_t pred_stride = prediction_stride / sizeof(Pixel);
const auto* obmc_pred = static_cast<const Pixel*>(obmc_prediction);
const ptrdiff_t obmc_pred_stride = obmc_prediction_stride / sizeof(Pixel);
const uint8_t* const mask = kObmcMask + width - 2;
+ assert(width >= 2);
+ assert(height >= 4);
+
for (int y = 0; y < height; ++y) {
for (int x = 0; x < width; ++x) {
const uint8_t mask_value = mask[x];
diff --git a/src/dsp/obmc_test.cc b/src/dsp/obmc_test.cc
index 60b10c6..3672e12 100644
--- a/src/dsp/obmc_test.cc
+++ b/src/dsp/obmc_test.cc
@@ -42,18 +42,16 @@ namespace {
#include "src/dsp/obmc.inc"
constexpr int kMaxBlendingBlockSize = 64;
-constexpr int kNumSpeedTests = 1000000;
+constexpr int kNumSpeedTests = 2e8;
const char* GetDigest8bpp(int id) {
static const char* const kDigest[] = {
- "76906f87892c30c7059a5c97e4838c42", "0b8670d937217c66425f2662b51eebbe",
"c8659acd1e8ecdab06be73f0954fa1ae", "e785f31f2723a193fefd534bd6f6c18f",
"751fcd8a345fef1c38a25293c9b528c0", "69af412dfa5e96ad43b79c178cb1c58b",
"2766a64622e183bb4614f2018f14fa85", "8d98589a5cef6e68ee8fadf19d420e3c",
"19eccf31dd8cf1abcee9414128fe4141", "35019f98e30bcbc6ab624682a0628519",
"199c551164e73c100045d7ab033ffdcc", "ad5a5eb2906265690c22741b0715f37b",
- "e2152dea159249149ff4151111b73ed6", "6b44c0052789ce2fa4df882f35618e7d",
- "1edd570bec7e63780d83588f6aacda25", "b04b81c9e52c58885907dc7f1ef2c11c",
+ "e2152dea159249149ff4151111b73ed6", "1edd570bec7e63780d83588f6aacda25",
"b24ad192e151b1e0f74d1493004cb1b6", "6c1ce7ed3463cc60870e336f990d4f14",
"2e6b7a06da21512dfdd9a517d2988655", "971ba1c41ab13bb341c04f936760f546",
"55b803239d9f12888c666c5320450937", "3d0838963f8c95dafbfb8e5e25c865d2",
@@ -65,14 +63,12 @@ const char* GetDigest8bpp(int id) {
const char* GetDigestSpeed8bpp(int id) {
static const char* const kDigest[] = {
- "c5b532f5960477bdd50684ab25fae0f4", "bf76ed404bc5674e0a4ff238efceb62b",
"5ea519b616cd2998fbb9b25b4c2660cb", "f23d18197a96de48901738d130a147d9",
"07b4140c693947a63865f835089766c4", "62547d29bc4dfb2e201e9d907c09e345",
"c3988da521be50aeb9944564001b282b", "d5a8ff9ca1bd49f4260bb497c489b06c",
"b3e94f1e33c316759ebf47620327168c", "c5e64a34ca7e55f4daed19cbe4c27049",
"3b234eb729e8e79db8692c4cbe1b6667", "f9f3060a44c3a575470f9700b3c3a75b",
- "e3a1960b0a7238db1184a3f9d8e9a4b2", "721c7e8ec3aa0608b64f10f7ff5427db",
- "ba9938553703d520bc0ade427c397140", "8b6e15e8ecd234363f70f51c64b0aea1",
+ "e3a1960b0a7238db1184a3f9d8e9a4b2", "ba9938553703d520bc0ade427c397140",
"31bf64a6ed1e8002d488c0b9dcffb80a", "9ab1f3ae2e7f70cd27452f30cecfd18e",
"eaf25ac79ad70fc17ca96d8fcdf0f939", "9aaa88cb5e6b8757e37c3430bd664e70",
"8293874b2794df8fd22f5a35c3de7bee", "e9d6ee9106227c2c67ea9e6a4652e4ad",
@@ -85,14 +81,12 @@ const char* GetDigestSpeed8bpp(int id) {
#if LIBGAV1_MAX_BITDEPTH >= 10
const char* GetDigest10bpp(int id) {
static const char* const kDigest[] = {
- "6ab8f28e8fb3c4b10b23efee38d4154e", "d4374005d34e43e06c1b0c906289dadd",
"6f922e4142b644ca3f1eb0f363a1c34e", "84e7c098a9335b36082fec0bc7203075",
"40f00ea6884fea23a3b7fae59e3b02c3", "70cb92d08b4fdb6dd9c7d418cb1455d3",
"ed550798b56e70439a93cb48c359e873", "55e0d927b984e78cd51a1961e58a431d",
"482a6856b87265a82e4ea3fdadb2d95b", "0be46226ff87d74ff2ce68a83eaf9cca",
"bb4461f0131a1693a0a76f21d92a480b", "ea24f78d74c7864fb247c9a98c9b97b6",
- "d2e70b81882aeb3d9fccef89e7552a9d", "4a692ddf91905727bc524d91735cf93c",
- "f5d882ee6d9ae6f7dfa467ca99301424", "58821b87e7d9d4388d6003ffcb3723d1",
+ "d2e70b81882aeb3d9fccef89e7552a9d", "f5d882ee6d9ae6f7dfa467ca99301424",
"824ddb98eb4129b3d254c0bc7a64cd73", "5eaaafa8ef9b7ba5e2856a947e5b33df",
"071de1494e0f1b2f99266b90bdc43ddd", "c33227a96dad506adc32dacfb371ab78",
"e8a632f9fff240c439d4ae6e86795046", "26b90d74f18f9df4427b6180d48db1fc",
@@ -104,14 +98,12 @@ const char* GetDigest10bpp(int id) {
const char* GetDigestSpeed10bpp(int id) {
static const char* const kDigest[] = {
- "df59e5fd6e0237a56381f3a516806eb8", "f478bdf43e0b91b8dc9b2661eb207e49",
"80557576299708005111029cef04da53", "24f84f07f53f61cd46bdcfe1e05ff9b5",
"4dd6bc62145baa5357a4cbf6d7a6ef15", "0b7aa27cee43b8ae0c02d07887eaa225",
"9e28cdae73ca97433499c31ca79e1d07", "1cacd6466a143f88e736fffaf21e2246",
"9c7699626660d8965e06a54282a408f3", "eef893efef62b2eb4aaad06fc462819c",
"4965d0a3ff750813df85c0082b21bd4b", "ec10fd79fbf552abc595def392e9a863",
- "a148bbafdc4466fbb700b31acccca8ac", "ff0566921ff2d5145f79fbf409508fb2",
- "5da9d960988549f53b817003b93e4d01", "fa9028b2ed049ad71b5fd15f2daacbe5",
+ "a148bbafdc4466fbb700b31acccca8ac", "5da9d960988549f53b817003b93e4d01",
"b4c4f88d1fb54869ce7ff452ca7786a6", "d607f785fce62bad85102054539e7089",
"b441761ea2817e4618c594aaa11d670a", "1cc5e08e6d5f9315dbc0369b97af941d",
"568cc1a3a67ba4e6e77f54602d0ed3e3", "522f14c068f788bc284a7d1e47d623ed",
@@ -165,16 +157,19 @@ class ObmcBlendTest : public testing::TestWithParam<ObmcTestParam> {
protected:
int GetDigestId() const {
- // blending_direction_ == 0:
+ // blending_direction_ == kObmcDirectionVertical:
// (width, height):
- // (2, 2), id = 0. (2, 4), id = 1. (4, 2), id = 2.
- // (4, 4), id = 3. (4, 8), id = 4. (8, 4), id = 5.
+ // (4, 2), id = 0. (4, 4), id = 1. (4, 8), id = 2. (8, 4), id = 3.
// ...
- // blending_direction_ == 1: id starts from 13.
- const int id = (blending_direction_ == kObmcDirectionVertical) ? 0 : 13;
- if (width_ == height_) return id + 3 * (FloorLog2(width_) - 1);
- if (width_ < height_) return id + 1 + 3 * (FloorLog2(width_) - 1);
- return id + 2 + 3 * (FloorLog2(height_) - 1);
+ // blending_direction_ == kObmcDirectionHorizontal: id starts from 11.
+ // Vertical skips (2, 4) while horizontal skips (4, 2) creating a gap after
+ // (2, 4).
+ const int id = (blending_direction_ == kObmcDirectionVertical) ? 0
+ : (width_ == 2) ? 12
+ : 11;
+ if (width_ == height_) return id + 3 * (FloorLog2(width_) - 1) - 2;
+ if (width_ < height_) return id + 3 * (FloorLog2(width_) - 1) - 1;
+ return id + 3 * (FloorLog2(height_) - 1);
}
// Note |digest| is only used when |use_fixed_values| is false.
@@ -184,7 +179,7 @@ class ObmcBlendTest : public testing::TestWithParam<ObmcTestParam> {
private:
const int width_ = GetParam().width;
const int height_ = GetParam().height;
- const int blending_direction_ = GetParam().blending_direction;
+ const ObmcDirection blending_direction_ = GetParam().blending_direction;
Pixel source1_[kMaxBlendingBlockSize * kMaxBlendingBlockSize] = {};
Pixel source2_[kMaxBlendingBlockSize * kMaxBlendingBlockSize] = {};
dsp::ObmcBlendFunc func_;
@@ -223,8 +218,9 @@ void ObmcBlendTest<bitdepth, Pixel>::Test(const char* const digest,
EXPECT_TRUE(success);
} else {
test_utils::CheckMd5Digest(
- "Obmc", absl::StrFormat("%dx%d", width_, height_).c_str(), digest,
- source1_, sizeof(source1_), absl::Duration());
+ ToString(blending_direction_),
+ absl::StrFormat("%dx%d", width_, height_).c_str(), digest, source1_,
+ sizeof(source1_), absl::Duration());
}
}
@@ -256,14 +252,12 @@ void ObmcBlendTest<bitdepth, Pixel>::TestSpeed(const char* const digest,
}
memcpy(source1_, dest,
sizeof(Pixel) * kMaxBlendingBlockSize * kMaxBlendingBlockSize);
- test_utils::CheckMd5Digest("Obmc",
+ test_utils::CheckMd5Digest(ToString(blending_direction_),
absl::StrFormat("%dx%d", width_, height_).c_str(),
digest, source1_, sizeof(source1_), elapsed_time);
}
const ObmcTestParam kObmcTestParam[] = {
- ObmcTestParam(2, 2, kObmcDirectionVertical),
- ObmcTestParam(2, 4, kObmcDirectionVertical),
ObmcTestParam(4, 2, kObmcDirectionVertical),
ObmcTestParam(4, 4, kObmcDirectionVertical),
ObmcTestParam(4, 8, kObmcDirectionVertical),
@@ -275,9 +269,7 @@ const ObmcTestParam kObmcTestParam[] = {
ObmcTestParam(16, 32, kObmcDirectionVertical),
ObmcTestParam(32, 16, kObmcDirectionVertical),
ObmcTestParam(32, 32, kObmcDirectionVertical),
- ObmcTestParam(2, 2, kObmcDirectionHorizontal),
ObmcTestParam(2, 4, kObmcDirectionHorizontal),
- ObmcTestParam(4, 2, kObmcDirectionHorizontal),
ObmcTestParam(4, 4, kObmcDirectionHorizontal),
ObmcTestParam(4, 8, kObmcDirectionHorizontal),
ObmcTestParam(8, 4, kObmcDirectionHorizontal),
@@ -301,9 +293,8 @@ TEST_P(ObmcBlendTest8bpp, Blending) {
}
TEST_P(ObmcBlendTest8bpp, DISABLED_Speed) {
- TestSpeed(
- GetDigestSpeed8bpp(GetDigestId()),
- (kNumSpeedTests * 32 * 32) / (GetParam().height * GetParam().width));
+ TestSpeed(GetDigestSpeed8bpp(GetDigestId()),
+ kNumSpeedTests / (GetParam().height * GetParam().width));
}
INSTANTIATE_TEST_SUITE_P(C, ObmcBlendTest8bpp,
@@ -331,9 +322,8 @@ TEST_P(ObmcBlendTest10bpp, Blending) {
}
TEST_P(ObmcBlendTest10bpp, DISABLED_Speed) {
- TestSpeed(
- GetDigestSpeed10bpp(GetDigestId()),
- (kNumSpeedTests * 32 * 32) / (GetParam().height * GetParam().width));
+ TestSpeed(GetDigestSpeed10bpp(GetDigestId()),
+ kNumSpeedTests / (GetParam().height * GetParam().width));
}
INSTANTIATE_TEST_SUITE_P(C, ObmcBlendTest10bpp,
@@ -342,6 +332,10 @@ INSTANTIATE_TEST_SUITE_P(C, ObmcBlendTest10bpp,
INSTANTIATE_TEST_SUITE_P(SSE41, ObmcBlendTest10bpp,
testing::ValuesIn(kObmcTestParam));
#endif
+#if LIBGAV1_ENABLE_NEON
+INSTANTIATE_TEST_SUITE_P(NEON, ObmcBlendTest10bpp,
+ testing::ValuesIn(kObmcTestParam));
+#endif
#endif // LIBGAV1_MAX_BITDEPTH >= 10
} // namespace
diff --git a/src/dsp/smooth_weights.inc b/src/dsp/smooth_weights.inc
new file mode 100644
index 0000000..d4ee8a6
--- /dev/null
+++ b/src/dsp/smooth_weights.inc
@@ -0,0 +1,35 @@
+// Copyright 2021 The libgav1 Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Each row below contains weights used for a corresponding block size. Because
+// they are adjacent powers of 2, the index of each row is the sum of the sizes
+// of preceding rows, minus 4.
+// The weights need to be declared as uint8_t or uint16_t, depending on the
+// bitdepth, so the values are held in a single canonical place.
+// clang-format off
+ // block dimension = 4
+ 255, 149, 85, 64,
+ // block dimension = 8
+ 255, 197, 146, 105, 73, 50, 37, 32,
+ // block dimension = 16
+ 255, 225, 196, 170, 145, 123, 102, 84, 68, 54, 43, 33, 26, 20, 17, 16,
+ // block dimension = 32
+ 255, 240, 225, 210, 196, 182, 169, 157, 145, 133, 122, 111, 101, 92, 83, 74,
+ 66, 59, 52, 45, 39, 34, 29, 25, 21, 17, 14, 12, 10, 9, 8, 8,
+ // block dimension = 64
+ 255, 248, 240, 233, 225, 218, 210, 203, 196, 189, 182, 176, 169, 163, 156,
+ 150, 144, 138, 133, 127, 121, 116, 111, 106, 101, 96, 91, 86, 82, 77, 73,
+ 69, 65, 61, 57, 54, 50, 47, 44, 41, 38, 35, 32, 29, 27, 25, 22, 20, 18, 16,
+ 15, 13, 12, 10, 9, 8, 7, 6, 6, 5, 5, 4, 4, 4
+ // clang-format on
diff --git a/src/dsp/super_res.cc b/src/dsp/super_res.cc
index abb01a1..570ba73 100644
--- a/src/dsp/super_res.cc
+++ b/src/dsp/super_res.cc
@@ -25,11 +25,12 @@ namespace dsp {
namespace {
template <int bitdepth, typename Pixel>
-void SuperRes_C(const void* /*coefficients*/, void* const source,
+void SuperRes_C(const void* /*coefficients*/,
+ void* LIBGAV1_RESTRICT const source,
const ptrdiff_t source_stride, const int height,
const int downscaled_width, const int upscaled_width,
- const int initial_subpixel_x, const int step, void* const dest,
- ptrdiff_t dest_stride) {
+ const int initial_subpixel_x, const int step,
+ void* LIBGAV1_RESTRICT const dest, ptrdiff_t dest_stride) {
assert(step <= 1 << kSuperResScaleBits);
auto* src = static_cast<Pixel*>(source) - DivideBy2(kSuperResFilterTaps);
auto* dst = static_cast<Pixel*>(dest);
diff --git a/src/dsp/warp.cc b/src/dsp/warp.cc
index fbde65a..dd467ea 100644
--- a/src/dsp/warp.cc
+++ b/src/dsp/warp.cc
@@ -59,14 +59,14 @@ constexpr int kWarpedDiffPrecisionBits = 10;
// compound second pass output range: [ 8129, 57403]
template <bool is_compound, int bitdepth, typename Pixel>
-void Warp_C(const void* const source, ptrdiff_t source_stride,
+void Warp_C(const void* LIBGAV1_RESTRICT const source, ptrdiff_t source_stride,
const int source_width, const int source_height,
- const int* const warp_params, const int subsampling_x,
- const int subsampling_y, const int block_start_x,
- const int block_start_y, const int block_width,
- const int block_height, const int16_t alpha, const int16_t beta,
- const int16_t gamma, const int16_t delta, void* dest,
- ptrdiff_t dest_stride) {
+ const int* LIBGAV1_RESTRICT const warp_params,
+ const int subsampling_x, const int subsampling_y,
+ const int block_start_x, const int block_start_y,
+ const int block_width, const int block_height, const int16_t alpha,
+ const int16_t beta, const int16_t gamma, const int16_t delta,
+ void* LIBGAV1_RESTRICT dest, ptrdiff_t dest_stride) {
assert(block_width >= 8 && block_height >= 8);
if (is_compound) {
assert(dest_stride == block_width);
diff --git a/src/dsp/warp_test.cc b/src/dsp/warp_test.cc
index e7384f4..4d13051 100644
--- a/src/dsp/warp_test.cc
+++ b/src/dsp/warp_test.cc
@@ -638,6 +638,11 @@ TEST_P(WarpTest10bpp, RandomValues) { TestRandomValues(); }
TEST_P(WarpTest10bpp, DISABLED_Speed) { TestSpeed(); }
INSTANTIATE_TEST_SUITE_P(C, WarpTest10bpp, testing::ValuesIn(warp_test_param));
+
+#if LIBGAV1_ENABLE_NEON
+INSTANTIATE_TEST_SUITE_P(NEON, WarpTest10bpp,
+ testing::ValuesIn(warp_test_param));
+#endif
#endif
std::ostream& operator<<(std::ostream& os, const WarpTestParam& warp_param) {
diff --git a/src/dsp/weight_mask.cc b/src/dsp/weight_mask.cc
index 15d6bc6..41f4c70 100644
--- a/src/dsp/weight_mask.cc
+++ b/src/dsp/weight_mask.cc
@@ -29,8 +29,9 @@ namespace dsp {
namespace {
template <int width, int height, int bitdepth, bool mask_is_inverse>
-void WeightMask_C(const void* prediction_0, const void* prediction_1,
- uint8_t* mask, ptrdiff_t mask_stride) {
+void WeightMask_C(const void* LIBGAV1_RESTRICT prediction_0,
+ const void* LIBGAV1_RESTRICT prediction_1,
+ uint8_t* LIBGAV1_RESTRICT mask, ptrdiff_t mask_stride) {
using PredType =
typename std::conditional<bitdepth == 8, int16_t, uint16_t>::type;
const auto* pred_0 = static_cast<const PredType*>(prediction_0);
diff --git a/src/dsp/x86/average_blend_sse4.cc b/src/dsp/x86/average_blend_sse4.cc
index ec9f589..911c5a9 100644
--- a/src/dsp/x86/average_blend_sse4.cc
+++ b/src/dsp/x86/average_blend_sse4.cc
@@ -35,8 +35,9 @@ namespace {
constexpr int kInterPostRoundBit = 4;
-inline void AverageBlend4Row(const int16_t* prediction_0,
- const int16_t* prediction_1, uint8_t* dest) {
+inline void AverageBlend4Row(const int16_t* LIBGAV1_RESTRICT prediction_0,
+ const int16_t* LIBGAV1_RESTRICT prediction_1,
+ uint8_t* LIBGAV1_RESTRICT dest) {
const __m128i pred_0 = LoadLo8(prediction_0);
const __m128i pred_1 = LoadLo8(prediction_1);
__m128i res = _mm_add_epi16(pred_0, pred_1);
@@ -44,8 +45,9 @@ inline void AverageBlend4Row(const int16_t* prediction_0,
Store4(dest, _mm_packus_epi16(res, res));
}
-inline void AverageBlend8Row(const int16_t* prediction_0,
- const int16_t* prediction_1, uint8_t* dest) {
+inline void AverageBlend8Row(const int16_t* LIBGAV1_RESTRICT prediction_0,
+ const int16_t* LIBGAV1_RESTRICT prediction_1,
+ uint8_t* LIBGAV1_RESTRICT dest) {
const __m128i pred_0 = LoadAligned16(prediction_0);
const __m128i pred_1 = LoadAligned16(prediction_1);
__m128i res = _mm_add_epi16(pred_0, pred_1);
@@ -53,9 +55,10 @@ inline void AverageBlend8Row(const int16_t* prediction_0,
StoreLo8(dest, _mm_packus_epi16(res, res));
}
-inline void AverageBlendLargeRow(const int16_t* prediction_0,
- const int16_t* prediction_1, const int width,
- uint8_t* dest) {
+inline void AverageBlendLargeRow(const int16_t* LIBGAV1_RESTRICT prediction_0,
+ const int16_t* LIBGAV1_RESTRICT prediction_1,
+ const int width,
+ uint8_t* LIBGAV1_RESTRICT dest) {
int x = 0;
do {
const __m128i pred_00 = LoadAligned16(&prediction_0[x]);
@@ -71,8 +74,10 @@ inline void AverageBlendLargeRow(const int16_t* prediction_0,
} while (x < width);
}
-void AverageBlend_SSE4_1(const void* prediction_0, const void* prediction_1,
- const int width, const int height, void* const dest,
+void AverageBlend_SSE4_1(const void* LIBGAV1_RESTRICT prediction_0,
+ const void* LIBGAV1_RESTRICT prediction_1,
+ const int width, const int height,
+ void* LIBGAV1_RESTRICT const dest,
const ptrdiff_t dest_stride) {
auto* dst = static_cast<uint8_t*>(dest);
const auto* pred_0 = static_cast<const int16_t*>(prediction_0);
@@ -148,11 +153,11 @@ namespace {
constexpr int kInterPostRoundBitPlusOne = 5;
template <const int width, const int offset>
-inline void AverageBlendRow(const uint16_t* prediction_0,
- const uint16_t* prediction_1,
+inline void AverageBlendRow(const uint16_t* LIBGAV1_RESTRICT prediction_0,
+ const uint16_t* LIBGAV1_RESTRICT prediction_1,
const __m128i& compound_offset,
const __m128i& round_offset, const __m128i& max,
- const __m128i& zero, uint16_t* dst,
+ const __m128i& zero, uint16_t* LIBGAV1_RESTRICT dst,
const ptrdiff_t dest_stride) {
// pred_0/1 max range is 16b.
const __m128i pred_0 = LoadUnaligned16(prediction_0 + offset);
@@ -182,9 +187,10 @@ inline void AverageBlendRow(const uint16_t* prediction_0,
StoreHi8(dst + dest_stride, result);
}
-void AverageBlend10bpp_SSE4_1(const void* prediction_0,
- const void* prediction_1, const int width,
- const int height, void* const dest,
+void AverageBlend10bpp_SSE4_1(const void* LIBGAV1_RESTRICT prediction_0,
+ const void* LIBGAV1_RESTRICT prediction_1,
+ const int width, const int height,
+ void* LIBGAV1_RESTRICT const dest,
const ptrdiff_t dst_stride) {
auto* dst = static_cast<uint16_t*>(dest);
const ptrdiff_t dest_stride = dst_stride / sizeof(dst[0]);
diff --git a/src/dsp/x86/cdef_avx2.cc b/src/dsp/x86/cdef_avx2.cc
index d41dc38..01a2b9f 100644
--- a/src/dsp/x86/cdef_avx2.cc
+++ b/src/dsp/x86/cdef_avx2.cc
@@ -269,8 +269,8 @@ LIBGAV1_ALWAYS_INLINE void AddPartial_D7_D5(__m256i* v_src, __m256i* partial_lo,
_mm256_add_epi16(*partial_hi, _mm256_srli_si256(v_pair_add[3], 10));
}
-LIBGAV1_ALWAYS_INLINE void AddPartial(const uint8_t* src, ptrdiff_t stride,
- __m256i* partial) {
+LIBGAV1_ALWAYS_INLINE void AddPartial(const uint8_t* LIBGAV1_RESTRICT src,
+ ptrdiff_t stride, __m256i* partial) {
// 8x8 input
// 00 01 02 03 04 05 06 07
// 10 11 12 13 14 15 16 17
@@ -451,8 +451,10 @@ inline void Cost2And6_Pair(uint32_t* cost, const __m256i partial_a,
cost[6] = _mm_cvtsi128_si32(_mm_srli_si128(sums, 8));
}
-void CdefDirection_AVX2(const void* const source, ptrdiff_t stride,
- uint8_t* const direction, int* const variance) {
+void CdefDirection_AVX2(const void* LIBGAV1_RESTRICT const source,
+ ptrdiff_t stride,
+ uint8_t* LIBGAV1_RESTRICT const direction,
+ int* LIBGAV1_RESTRICT const variance) {
assert(direction != nullptr);
assert(variance != nullptr);
const auto* src = static_cast<const uint8_t*>(source);
@@ -500,8 +502,9 @@ void CdefDirection_AVX2(const void* const source, ptrdiff_t stride,
// CdefFilter
// Load 4 vectors based on the given |direction|.
-inline void LoadDirection(const uint16_t* const src, const ptrdiff_t stride,
- __m128i* output, const int direction) {
+inline void LoadDirection(const uint16_t* LIBGAV1_RESTRICT const src,
+ const ptrdiff_t stride, __m128i* output,
+ const int direction) {
// Each |direction| describes a different set of source values. Expand this
// set by negating each set. For |direction| == 0 this gives a diagonal line
// from top right to bottom left. The first value is y, the second x. Negative
@@ -525,8 +528,9 @@ inline void LoadDirection(const uint16_t* const src, const ptrdiff_t stride,
// Load 4 vectors based on the given |direction|. Use when |block_width| == 4 to
// do 2 rows at a time.
-void LoadDirection4(const uint16_t* const src, const ptrdiff_t stride,
- __m128i* output, const int direction) {
+void LoadDirection4(const uint16_t* LIBGAV1_RESTRICT const src,
+ const ptrdiff_t stride, __m128i* output,
+ const int direction) {
const int y_0 = kCdefDirections[direction][0][0];
const int x_0 = kCdefDirections[direction][0][1];
const int y_1 = kCdefDirections[direction][1][0];
@@ -569,11 +573,11 @@ inline __m256i ApplyConstrainAndTap(const __m256i& pixel, const __m256i& val,
}
template <int width, bool enable_primary = true, bool enable_secondary = true>
-void CdefFilter_AVX2(const uint16_t* src, const ptrdiff_t src_stride,
- const int height, const int primary_strength,
- const int secondary_strength, const int damping,
- const int direction, void* dest,
- const ptrdiff_t dst_stride) {
+void CdefFilter_AVX2(const uint16_t* LIBGAV1_RESTRICT src,
+ const ptrdiff_t src_stride, const int height,
+ const int primary_strength, const int secondary_strength,
+ const int damping, const int direction,
+ void* LIBGAV1_RESTRICT dest, const ptrdiff_t dst_stride) {
static_assert(width == 8 || width == 4, "Invalid CDEF width.");
static_assert(enable_primary || enable_secondary, "");
constexpr bool clipping_required = enable_primary && enable_secondary;
diff --git a/src/dsp/x86/cdef_sse4.cc b/src/dsp/x86/cdef_sse4.cc
index 6ede778..6c48844 100644
--- a/src/dsp/x86/cdef_sse4.cc
+++ b/src/dsp/x86/cdef_sse4.cc
@@ -241,8 +241,8 @@ LIBGAV1_ALWAYS_INLINE void AddPartial_D5_D7(__m128i* v_src, __m128i* partial_lo,
*partial_hi = _mm_add_epi16(*partial_hi, _mm_srli_si128(v_pair_add[3], 10));
}
-LIBGAV1_ALWAYS_INLINE void AddPartial(const uint8_t* src, ptrdiff_t stride,
- __m128i* partial_lo,
+LIBGAV1_ALWAYS_INLINE void AddPartial(const uint8_t* LIBGAV1_RESTRICT src,
+ ptrdiff_t stride, __m128i* partial_lo,
__m128i* partial_hi) {
// 8x8 input
// 00 01 02 03 04 05 06 07
@@ -395,8 +395,10 @@ inline uint32_t SquareSum_S16(const __m128i a) {
return SumVector_S32(square);
}
-void CdefDirection_SSE4_1(const void* const source, ptrdiff_t stride,
- uint8_t* const direction, int* const variance) {
+void CdefDirection_SSE4_1(const void* LIBGAV1_RESTRICT const source,
+ ptrdiff_t stride,
+ uint8_t* LIBGAV1_RESTRICT const direction,
+ int* LIBGAV1_RESTRICT const variance) {
assert(direction != nullptr);
assert(variance != nullptr);
const auto* src = static_cast<const uint8_t*>(source);
@@ -438,8 +440,9 @@ void CdefDirection_SSE4_1(const void* const source, ptrdiff_t stride,
// CdefFilter
// Load 4 vectors based on the given |direction|.
-inline void LoadDirection(const uint16_t* const src, const ptrdiff_t stride,
- __m128i* output, const int direction) {
+inline void LoadDirection(const uint16_t* LIBGAV1_RESTRICT const src,
+ const ptrdiff_t stride, __m128i* output,
+ const int direction) {
// Each |direction| describes a different set of source values. Expand this
// set by negating each set. For |direction| == 0 this gives a diagonal line
// from top right to bottom left. The first value is y, the second x. Negative
@@ -463,8 +466,9 @@ inline void LoadDirection(const uint16_t* const src, const ptrdiff_t stride,
// Load 4 vectors based on the given |direction|. Use when |block_width| == 4 to
// do 2 rows at a time.
-void LoadDirection4(const uint16_t* const src, const ptrdiff_t stride,
- __m128i* output, const int direction) {
+void LoadDirection4(const uint16_t* LIBGAV1_RESTRICT const src,
+ const ptrdiff_t stride, __m128i* output,
+ const int direction) {
const int y_0 = kCdefDirections[direction][0][0];
const int x_0 = kCdefDirections[direction][0][1];
const int y_1 = kCdefDirections[direction][1][0];
@@ -507,10 +511,11 @@ inline __m128i ApplyConstrainAndTap(const __m128i& pixel, const __m128i& val,
}
template <int width, bool enable_primary = true, bool enable_secondary = true>
-void CdefFilter_SSE4_1(const uint16_t* src, const ptrdiff_t src_stride,
- const int height, const int primary_strength,
- const int secondary_strength, const int damping,
- const int direction, void* dest,
+void CdefFilter_SSE4_1(const uint16_t* LIBGAV1_RESTRICT src,
+ const ptrdiff_t src_stride, const int height,
+ const int primary_strength, const int secondary_strength,
+ const int damping, const int direction,
+ void* LIBGAV1_RESTRICT dest,
const ptrdiff_t dst_stride) {
static_assert(width == 8 || width == 4, "Invalid CDEF width.");
static_assert(enable_primary || enable_secondary, "");
diff --git a/src/dsp/x86/common_avx2_test.cc b/src/dsp/x86/common_avx2_test.cc
new file mode 100644
index 0000000..2062683
--- /dev/null
+++ b/src/dsp/x86/common_avx2_test.cc
@@ -0,0 +1,67 @@
+// Copyright 2021 The libgav1 Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "src/dsp/x86/common_avx2.h"
+
+#include "gtest/gtest.h"
+
+#if LIBGAV1_TARGETING_AVX2
+
+#include <cstdint>
+
+#include "src/utils/common.h"
+
+namespace libgav1 {
+namespace dsp {
+namespace {
+
+// Show that RightShiftWithRounding_S16() is equal to
+// RightShiftWithRounding() only for values less than or equal to
+// INT16_MAX - ((1 << bits) >> 1). In particular, if bits == 16, then
+// RightShiftWithRounding_S16() is equal to RightShiftWithRounding() only for
+// negative values.
+TEST(CommonDspTest, AVX2RightShiftWithRoundingS16) {
+ for (int bits = 0; bits < 16; ++bits) {
+ const int bias = (1 << bits) >> 1;
+ for (int32_t value = INT16_MIN; value <= INT16_MAX; ++value) {
+ const __m256i v_val_d = _mm256_set1_epi16(value);
+ const __m256i v_result_d = RightShiftWithRounding_S16(v_val_d, bits);
+ // Note _mm256_extract_epi16 is avoided for compatibility with Visual
+ // Studio < 2017.
+ const int16_t result =
+ _mm_extract_epi16(_mm256_extracti128_si256(v_result_d, 0), 0);
+ const int32_t expected = RightShiftWithRounding(value, bits);
+ if (value <= INT16_MAX - bias) {
+ EXPECT_EQ(result, expected) << "value: " << value << ", bits: " << bits;
+ } else {
+ EXPECT_EQ(expected, 1 << (15 - bits));
+ EXPECT_EQ(result, -expected)
+ << "value: " << value << ", bits: " << bits;
+ }
+ }
+ }
+}
+
+} // namespace
+} // namespace dsp
+} // namespace libgav1
+
+#else // !LIBGAV1_TARGETING_AVX2
+
+TEST(CommonDspTest, AVX2) {
+ GTEST_SKIP() << "Build this module for x86(-64) with AVX2 enabled to enable "
+ "the tests.";
+}
+
+#endif // LIBGAV1_TARGETING_AVX2
diff --git a/src/dsp/x86/common_sse4_test.cc b/src/dsp/x86/common_sse4_test.cc
new file mode 100644
index 0000000..4ea811a
--- /dev/null
+++ b/src/dsp/x86/common_sse4_test.cc
@@ -0,0 +1,64 @@
+// Copyright 2021 The libgav1 Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "src/dsp/x86/common_sse4.h"
+
+#include "gtest/gtest.h"
+
+#if LIBGAV1_TARGETING_SSE4_1
+
+#include <cstdint>
+
+#include "src/utils/common.h"
+
+namespace libgav1 {
+namespace dsp {
+namespace {
+
+// Show that RightShiftWithRounding_S16() is equal to
+// RightShiftWithRounding() only for values less than or equal to
+// INT16_MAX - ((1 << bits) >> 1). In particular, if bits == 16, then
+// RightShiftWithRounding_S16() is equal to RightShiftWithRounding() only for
+// negative values.
+TEST(CommonDspTest, SSE4RightShiftWithRoundingS16) {
+ for (int bits = 0; bits < 16; ++bits) {
+ const int bias = (1 << bits) >> 1;
+ for (int32_t value = INT16_MIN; value <= INT16_MAX; ++value) {
+ const __m128i v_val_d = _mm_set1_epi16(value);
+ const __m128i v_result_d = RightShiftWithRounding_S16(v_val_d, bits);
+ const int16_t result = _mm_extract_epi16(v_result_d, 0);
+ const int32_t expected = RightShiftWithRounding(value, bits);
+ if (value <= INT16_MAX - bias) {
+ EXPECT_EQ(result, expected) << "value: " << value << ", bits: " << bits;
+ } else {
+ EXPECT_EQ(expected, 1 << (15 - bits));
+ EXPECT_EQ(result, -expected)
+ << "value: " << value << ", bits: " << bits;
+ }
+ }
+ }
+}
+
+} // namespace
+} // namespace dsp
+} // namespace libgav1
+
+#else // !LIBGAV1_TARGETING_SSE4_1
+
+TEST(CommonDspTest, SSE4) {
+ GTEST_SKIP() << "Build this module for x86(-64) with SSE4 enabled to enable "
+ "the tests.";
+}
+
+#endif // LIBGAV1_TARGETING_SSE4_1
diff --git a/src/dsp/x86/convolve_avx2.cc b/src/dsp/x86/convolve_avx2.cc
index 2ecb77c..4126ca9 100644
--- a/src/dsp/x86/convolve_avx2.cc
+++ b/src/dsp/x86/convolve_avx2.cc
@@ -127,10 +127,11 @@ __m256i HorizontalTaps8To16(const __m256i* const src,
// Filter 2xh sizes.
template <int num_taps, int filter_index, bool is_2d = false,
bool is_compound = false>
-void FilterHorizontal(const uint8_t* src, const ptrdiff_t src_stride,
- void* const dest, const ptrdiff_t pred_stride,
- const int /*width*/, const int height,
- const __m128i* const v_tap) {
+void FilterHorizontal(const uint8_t* LIBGAV1_RESTRICT src,
+ const ptrdiff_t src_stride,
+ void* LIBGAV1_RESTRICT const dest,
+ const ptrdiff_t pred_stride, const int /*width*/,
+ const int height, const __m128i* const v_tap) {
auto* dest8 = static_cast<uint8_t*>(dest);
auto* dest16 = static_cast<uint16_t*>(dest);
@@ -195,10 +196,11 @@ void FilterHorizontal(const uint8_t* src, const ptrdiff_t src_stride,
// Filter widths >= 4.
template <int num_taps, int filter_index, bool is_2d = false,
bool is_compound = false>
-void FilterHorizontal(const uint8_t* src, const ptrdiff_t src_stride,
- void* const dest, const ptrdiff_t pred_stride,
- const int width, const int height,
- const __m256i* const v_tap) {
+void FilterHorizontal(const uint8_t* LIBGAV1_RESTRICT src,
+ const ptrdiff_t src_stride,
+ void* LIBGAV1_RESTRICT const dest,
+ const ptrdiff_t pred_stride, const int width,
+ const int height, const __m256i* const v_tap) {
auto* dest8 = static_cast<uint8_t*>(dest);
auto* dest16 = static_cast<uint16_t*>(dest);
@@ -467,7 +469,8 @@ __m256i SimpleSum2DVerticalTaps(const __m256i* const src,
}
template <int num_taps, bool is_compound = false>
-void Filter2DVertical16xH(const uint16_t* src, void* const dst,
+void Filter2DVertical16xH(const uint16_t* LIBGAV1_RESTRICT src,
+ void* LIBGAV1_RESTRICT const dst,
const ptrdiff_t dst_stride, const int width,
const int height, const __m256i* const taps) {
assert(width >= 8);
@@ -542,9 +545,10 @@ void Filter2DVertical16xH(const uint16_t* src, void* const dst,
template <bool is_2d = false, bool is_compound = false>
LIBGAV1_ALWAYS_INLINE void DoHorizontalPass2xH(
- const uint8_t* const src, const ptrdiff_t src_stride, void* const dst,
- const ptrdiff_t dst_stride, const int width, const int height,
- const int filter_id, const int filter_index) {
+ const uint8_t* LIBGAV1_RESTRICT const src, const ptrdiff_t src_stride,
+ void* LIBGAV1_RESTRICT const dst, const ptrdiff_t dst_stride,
+ const int width, const int height, const int filter_id,
+ const int filter_index) {
assert(filter_id != 0);
__m128i v_tap[4];
const __m128i v_horizontal_filter =
@@ -567,9 +571,10 @@ LIBGAV1_ALWAYS_INLINE void DoHorizontalPass2xH(
template <bool is_2d = false, bool is_compound = false>
LIBGAV1_ALWAYS_INLINE void DoHorizontalPass(
- const uint8_t* const src, const ptrdiff_t src_stride, void* const dst,
- const ptrdiff_t dst_stride, const int width, const int height,
- const int filter_id, const int filter_index) {
+ const uint8_t* LIBGAV1_RESTRICT const src, const ptrdiff_t src_stride,
+ void* LIBGAV1_RESTRICT const dst, const ptrdiff_t dst_stride,
+ const int width, const int height, const int filter_id,
+ const int filter_index) {
assert(filter_id != 0);
__m256i v_tap[4];
const __m128i v_horizontal_filter =
@@ -602,13 +607,13 @@ LIBGAV1_ALWAYS_INLINE void DoHorizontalPass(
}
}
-void Convolve2D_AVX2(const void* const reference,
+void Convolve2D_AVX2(const void* LIBGAV1_RESTRICT const reference,
const ptrdiff_t reference_stride,
const int horizontal_filter_index,
const int vertical_filter_index,
const int horizontal_filter_id,
const int vertical_filter_id, const int width,
- const int height, void* prediction,
+ const int height, void* LIBGAV1_RESTRICT prediction,
const ptrdiff_t pred_stride) {
const int horiz_filter_index = GetFilterIndex(horizontal_filter_index, width);
const int vert_filter_index = GetFilterIndex(vertical_filter_index, height);
@@ -774,10 +779,11 @@ __m256i SumVerticalTaps(const __m256i* const srcs, const __m256i* const v_tap) {
}
template <int filter_index, bool is_compound = false>
-void FilterVertical32xH(const uint8_t* src, const ptrdiff_t src_stride,
- void* const dst, const ptrdiff_t dst_stride,
- const int width, const int height,
- const __m256i* const v_tap) {
+void FilterVertical32xH(const uint8_t* LIBGAV1_RESTRICT src,
+ const ptrdiff_t src_stride,
+ void* LIBGAV1_RESTRICT const dst,
+ const ptrdiff_t dst_stride, const int width,
+ const int height, const __m256i* const v_tap) {
const int num_taps = GetNumTapsInFilter(filter_index);
const int next_row = num_taps - 1;
auto* dst8 = static_cast<uint8_t*>(dst);
@@ -856,10 +862,11 @@ void FilterVertical32xH(const uint8_t* src, const ptrdiff_t src_stride,
}
template <int filter_index, bool is_compound = false>
-void FilterVertical16xH(const uint8_t* src, const ptrdiff_t src_stride,
- void* const dst, const ptrdiff_t dst_stride,
- const int /*width*/, const int height,
- const __m256i* const v_tap) {
+void FilterVertical16xH(const uint8_t* LIBGAV1_RESTRICT src,
+ const ptrdiff_t src_stride,
+ void* LIBGAV1_RESTRICT const dst,
+ const ptrdiff_t dst_stride, const int /*width*/,
+ const int height, const __m256i* const v_tap) {
const int num_taps = GetNumTapsInFilter(filter_index);
const int next_row = num_taps;
auto* dst8 = static_cast<uint8_t*>(dst);
@@ -958,10 +965,11 @@ void FilterVertical16xH(const uint8_t* src, const ptrdiff_t src_stride,
}
template <int filter_index, bool is_compound = false>
-void FilterVertical8xH(const uint8_t* src, const ptrdiff_t src_stride,
- void* const dst, const ptrdiff_t dst_stride,
- const int /*width*/, const int height,
- const __m256i* const v_tap) {
+void FilterVertical8xH(const uint8_t* LIBGAV1_RESTRICT src,
+ const ptrdiff_t src_stride,
+ void* LIBGAV1_RESTRICT const dst,
+ const ptrdiff_t dst_stride, const int /*width*/,
+ const int height, const __m256i* const v_tap) {
const int num_taps = GetNumTapsInFilter(filter_index);
const int next_row = num_taps;
auto* dst8 = static_cast<uint8_t*>(dst);
@@ -1055,10 +1063,11 @@ void FilterVertical8xH(const uint8_t* src, const ptrdiff_t src_stride,
}
template <int filter_index, bool is_compound = false>
-void FilterVertical8xH(const uint8_t* src, const ptrdiff_t src_stride,
- void* const dst, const ptrdiff_t dst_stride,
- const int /*width*/, const int height,
- const __m128i* const v_tap) {
+void FilterVertical8xH(const uint8_t* LIBGAV1_RESTRICT src,
+ const ptrdiff_t src_stride,
+ void* LIBGAV1_RESTRICT const dst,
+ const ptrdiff_t dst_stride, const int /*width*/,
+ const int height, const __m128i* const v_tap) {
const int num_taps = GetNumTapsInFilter(filter_index);
const int next_row = num_taps - 1;
auto* dst8 = static_cast<uint8_t*>(dst);
@@ -1119,13 +1128,13 @@ void FilterVertical8xH(const uint8_t* src, const ptrdiff_t src_stride,
} while (--y != 0);
}
-void ConvolveVertical_AVX2(const void* const reference,
+void ConvolveVertical_AVX2(const void* LIBGAV1_RESTRICT const reference,
const ptrdiff_t reference_stride,
const int /*horizontal_filter_index*/,
const int vertical_filter_index,
const int /*horizontal_filter_id*/,
const int vertical_filter_id, const int width,
- const int height, void* prediction,
+ const int height, void* LIBGAV1_RESTRICT prediction,
const ptrdiff_t pred_stride) {
const int filter_index = GetFilterIndex(vertical_filter_index, height);
const int vertical_taps = GetNumTapsInFilter(filter_index);
@@ -1257,11 +1266,11 @@ void ConvolveVertical_AVX2(const void* const reference,
}
void ConvolveCompoundVertical_AVX2(
- const void* const reference, const ptrdiff_t reference_stride,
- const int /*horizontal_filter_index*/, const int vertical_filter_index,
- const int /*horizontal_filter_id*/, const int vertical_filter_id,
- const int width, const int height, void* prediction,
- const ptrdiff_t /*pred_stride*/) {
+ const void* LIBGAV1_RESTRICT const reference,
+ const ptrdiff_t reference_stride, const int /*horizontal_filter_index*/,
+ const int vertical_filter_index, const int /*horizontal_filter_id*/,
+ const int vertical_filter_id, const int width, const int height,
+ void* LIBGAV1_RESTRICT prediction, const ptrdiff_t /*pred_stride*/) {
const int filter_index = GetFilterIndex(vertical_filter_index, height);
const int vertical_taps = GetNumTapsInFilter(filter_index);
const ptrdiff_t src_stride = reference_stride;
@@ -1366,14 +1375,12 @@ void ConvolveCompoundVertical_AVX2(
}
}
-void ConvolveHorizontal_AVX2(const void* const reference,
- const ptrdiff_t reference_stride,
- const int horizontal_filter_index,
- const int /*vertical_filter_index*/,
- const int horizontal_filter_id,
- const int /*vertical_filter_id*/, const int width,
- const int height, void* prediction,
- const ptrdiff_t pred_stride) {
+void ConvolveHorizontal_AVX2(
+ const void* LIBGAV1_RESTRICT const reference,
+ const ptrdiff_t reference_stride, const int horizontal_filter_index,
+ const int /*vertical_filter_index*/, const int horizontal_filter_id,
+ const int /*vertical_filter_id*/, const int width, const int height,
+ void* LIBGAV1_RESTRICT prediction, const ptrdiff_t pred_stride) {
const int filter_index = GetFilterIndex(horizontal_filter_index, width);
// Set |src| to the outermost tap.
const auto* src = static_cast<const uint8_t*>(reference) - kHorizontalOffset;
@@ -1390,11 +1397,11 @@ void ConvolveHorizontal_AVX2(const void* const reference,
}
void ConvolveCompoundHorizontal_AVX2(
- const void* const reference, const ptrdiff_t reference_stride,
- const int horizontal_filter_index, const int /*vertical_filter_index*/,
- const int horizontal_filter_id, const int /*vertical_filter_id*/,
- const int width, const int height, void* prediction,
- const ptrdiff_t pred_stride) {
+ const void* LIBGAV1_RESTRICT const reference,
+ const ptrdiff_t reference_stride, const int horizontal_filter_index,
+ const int /*vertical_filter_index*/, const int horizontal_filter_id,
+ const int /*vertical_filter_id*/, const int width, const int height,
+ void* LIBGAV1_RESTRICT prediction, const ptrdiff_t pred_stride) {
const int filter_index = GetFilterIndex(horizontal_filter_index, width);
// Set |src| to the outermost tap.
const auto* src = static_cast<const uint8_t*>(reference) - kHorizontalOffset;
@@ -1415,14 +1422,12 @@ void ConvolveCompoundHorizontal_AVX2(
filter_index);
}
-void ConvolveCompound2D_AVX2(const void* const reference,
- const ptrdiff_t reference_stride,
- const int horizontal_filter_index,
- const int vertical_filter_index,
- const int horizontal_filter_id,
- const int vertical_filter_id, const int width,
- const int height, void* prediction,
- const ptrdiff_t pred_stride) {
+void ConvolveCompound2D_AVX2(
+ const void* LIBGAV1_RESTRICT const reference,
+ const ptrdiff_t reference_stride, const int horizontal_filter_index,
+ const int vertical_filter_index, const int horizontal_filter_id,
+ const int vertical_filter_id, const int width, const int height,
+ void* LIBGAV1_RESTRICT prediction, const ptrdiff_t pred_stride) {
const int horiz_filter_index = GetFilterIndex(horizontal_filter_index, width);
const int vert_filter_index = GetFilterIndex(vertical_filter_index, height);
const int vertical_taps = GetNumTapsInFilter(vert_filter_index);
diff --git a/src/dsp/x86/convolve_sse4.cc b/src/dsp/x86/convolve_sse4.cc
index 9b72fe4..f7e5a71 100644
--- a/src/dsp/x86/convolve_sse4.cc
+++ b/src/dsp/x86/convolve_sse4.cc
@@ -37,7 +37,7 @@ namespace {
#include "src/dsp/x86/convolve_sse4.inc"
template <int filter_index>
-__m128i SumHorizontalTaps(const uint8_t* const src,
+__m128i SumHorizontalTaps(const uint8_t* LIBGAV1_RESTRICT const src,
const __m128i* const v_tap) {
__m128i v_src[4];
const __m128i src_long = LoadUnaligned16(src);
@@ -68,7 +68,7 @@ __m128i SumHorizontalTaps(const uint8_t* const src,
}
template <int filter_index>
-__m128i SimpleHorizontalTaps(const uint8_t* const src,
+__m128i SimpleHorizontalTaps(const uint8_t* LIBGAV1_RESTRICT const src,
const __m128i* const v_tap) {
__m128i sum = SumHorizontalTaps<filter_index>(src, v_tap);
@@ -84,7 +84,7 @@ __m128i SimpleHorizontalTaps(const uint8_t* const src,
}
template <int filter_index>
-__m128i HorizontalTaps8To16(const uint8_t* const src,
+__m128i HorizontalTaps8To16(const uint8_t* LIBGAV1_RESTRICT const src,
const __m128i* const v_tap) {
const __m128i sum = SumHorizontalTaps<filter_index>(src, v_tap);
@@ -93,10 +93,11 @@ __m128i HorizontalTaps8To16(const uint8_t* const src,
template <int num_taps, int filter_index, bool is_2d = false,
bool is_compound = false>
-void FilterHorizontal(const uint8_t* src, const ptrdiff_t src_stride,
- void* const dest, const ptrdiff_t pred_stride,
- const int width, const int height,
- const __m128i* const v_tap) {
+void FilterHorizontal(const uint8_t* LIBGAV1_RESTRICT src,
+ const ptrdiff_t src_stride,
+ void* LIBGAV1_RESTRICT const dest,
+ const ptrdiff_t pred_stride, const int width,
+ const int height, const __m128i* const v_tap) {
auto* dest8 = static_cast<uint8_t*>(dest);
auto* dest16 = static_cast<uint16_t*>(dest);
@@ -206,9 +207,10 @@ void FilterHorizontal(const uint8_t* src, const ptrdiff_t src_stride,
template <bool is_2d = false, bool is_compound = false>
LIBGAV1_ALWAYS_INLINE void DoHorizontalPass(
- const uint8_t* const src, const ptrdiff_t src_stride, void* const dst,
- const ptrdiff_t dst_stride, const int width, const int height,
- const int filter_id, const int filter_index) {
+ const uint8_t* LIBGAV1_RESTRICT const src, const ptrdiff_t src_stride,
+ void* LIBGAV1_RESTRICT const dst, const ptrdiff_t dst_stride,
+ const int width, const int height, const int filter_id,
+ const int filter_index) {
assert(filter_id != 0);
__m128i v_tap[4];
const __m128i v_horizontal_filter =
@@ -241,13 +243,13 @@ LIBGAV1_ALWAYS_INLINE void DoHorizontalPass(
}
}
-void Convolve2D_SSE4_1(const void* const reference,
+void Convolve2D_SSE4_1(const void* LIBGAV1_RESTRICT const reference,
const ptrdiff_t reference_stride,
const int horizontal_filter_index,
const int vertical_filter_index,
const int horizontal_filter_id,
const int vertical_filter_id, const int width,
- const int height, void* prediction,
+ const int height, void* LIBGAV1_RESTRICT prediction,
const ptrdiff_t pred_stride) {
const int horiz_filter_index = GetFilterIndex(horizontal_filter_index, width);
const int vert_filter_index = GetFilterIndex(vertical_filter_index, height);
@@ -328,10 +330,11 @@ void Convolve2D_SSE4_1(const void* const reference,
}
template <int filter_index, bool is_compound = false>
-void FilterVertical(const uint8_t* src, const ptrdiff_t src_stride,
- void* const dst, const ptrdiff_t dst_stride,
- const int width, const int height,
- const __m128i* const v_tap) {
+void FilterVertical(const uint8_t* LIBGAV1_RESTRICT src,
+ const ptrdiff_t src_stride,
+ void* LIBGAV1_RESTRICT const dst,
+ const ptrdiff_t dst_stride, const int width,
+ const int height, const __m128i* const v_tap) {
const int num_taps = GetNumTapsInFilter(filter_index);
const int next_row = num_taps - 1;
auto* dst8 = static_cast<uint8_t*>(dst);
@@ -400,14 +403,12 @@ void FilterVertical(const uint8_t* src, const ptrdiff_t src_stride,
} while (x < width);
}
-void ConvolveVertical_SSE4_1(const void* const reference,
- const ptrdiff_t reference_stride,
- const int /*horizontal_filter_index*/,
- const int vertical_filter_index,
- const int /*horizontal_filter_id*/,
- const int vertical_filter_id, const int width,
- const int height, void* prediction,
- const ptrdiff_t pred_stride) {
+void ConvolveVertical_SSE4_1(
+ const void* LIBGAV1_RESTRICT const reference,
+ const ptrdiff_t reference_stride, const int /*horizontal_filter_index*/,
+ const int vertical_filter_index, const int /*horizontal_filter_id*/,
+ const int vertical_filter_id, const int width, const int height,
+ void* LIBGAV1_RESTRICT prediction, const ptrdiff_t pred_stride) {
const int filter_index = GetFilterIndex(vertical_filter_index, height);
const int vertical_taps = GetNumTapsInFilter(filter_index);
const ptrdiff_t src_stride = reference_stride;
@@ -477,14 +478,12 @@ void ConvolveVertical_SSE4_1(const void* const reference,
}
}
-void ConvolveCompoundCopy_SSE4(const void* const reference,
- const ptrdiff_t reference_stride,
- const int /*horizontal_filter_index*/,
- const int /*vertical_filter_index*/,
- const int /*horizontal_filter_id*/,
- const int /*vertical_filter_id*/,
- const int width, const int height,
- void* prediction, const ptrdiff_t pred_stride) {
+void ConvolveCompoundCopy_SSE4(
+ const void* LIBGAV1_RESTRICT const reference,
+ const ptrdiff_t reference_stride, const int /*horizontal_filter_index*/,
+ const int /*vertical_filter_index*/, const int /*horizontal_filter_id*/,
+ const int /*vertical_filter_id*/, const int width, const int height,
+ void* LIBGAV1_RESTRICT prediction, const ptrdiff_t pred_stride) {
const auto* src = static_cast<const uint8_t*>(reference);
const ptrdiff_t src_stride = reference_stride;
auto* dest = static_cast<uint16_t*>(prediction);
@@ -539,11 +538,11 @@ void ConvolveCompoundCopy_SSE4(const void* const reference,
}
void ConvolveCompoundVertical_SSE4_1(
- const void* const reference, const ptrdiff_t reference_stride,
- const int /*horizontal_filter_index*/, const int vertical_filter_index,
- const int /*horizontal_filter_id*/, const int vertical_filter_id,
- const int width, const int height, void* prediction,
- const ptrdiff_t /*pred_stride*/) {
+ const void* LIBGAV1_RESTRICT const reference,
+ const ptrdiff_t reference_stride, const int /*horizontal_filter_index*/,
+ const int vertical_filter_index, const int /*horizontal_filter_id*/,
+ const int vertical_filter_id, const int width, const int height,
+ void* LIBGAV1_RESTRICT prediction, const ptrdiff_t /*pred_stride*/) {
const int filter_index = GetFilterIndex(vertical_filter_index, height);
const int vertical_taps = GetNumTapsInFilter(filter_index);
const ptrdiff_t src_stride = reference_stride;
@@ -608,14 +607,12 @@ void ConvolveCompoundVertical_SSE4_1(
}
}
-void ConvolveHorizontal_SSE4_1(const void* const reference,
- const ptrdiff_t reference_stride,
- const int horizontal_filter_index,
- const int /*vertical_filter_index*/,
- const int horizontal_filter_id,
- const int /*vertical_filter_id*/,
- const int width, const int height,
- void* prediction, const ptrdiff_t pred_stride) {
+void ConvolveHorizontal_SSE4_1(
+ const void* LIBGAV1_RESTRICT const reference,
+ const ptrdiff_t reference_stride, const int horizontal_filter_index,
+ const int /*vertical_filter_index*/, const int horizontal_filter_id,
+ const int /*vertical_filter_id*/, const int width, const int height,
+ void* LIBGAV1_RESTRICT prediction, const ptrdiff_t pred_stride) {
const int filter_index = GetFilterIndex(horizontal_filter_index, width);
// Set |src| to the outermost tap.
const auto* src = static_cast<const uint8_t*>(reference) - kHorizontalOffset;
@@ -626,11 +623,11 @@ void ConvolveHorizontal_SSE4_1(const void* const reference,
}
void ConvolveCompoundHorizontal_SSE4_1(
- const void* const reference, const ptrdiff_t reference_stride,
- const int horizontal_filter_index, const int /*vertical_filter_index*/,
- const int horizontal_filter_id, const int /*vertical_filter_id*/,
- const int width, const int height, void* prediction,
- const ptrdiff_t /*pred_stride*/) {
+ const void* LIBGAV1_RESTRICT const reference,
+ const ptrdiff_t reference_stride, const int horizontal_filter_index,
+ const int /*vertical_filter_index*/, const int horizontal_filter_id,
+ const int /*vertical_filter_id*/, const int width, const int height,
+ void* LIBGAV1_RESTRICT prediction, const ptrdiff_t /*pred_stride*/) {
const int filter_index = GetFilterIndex(horizontal_filter_index, width);
const auto* src = static_cast<const uint8_t*>(reference) - kHorizontalOffset;
auto* dest = static_cast<uint16_t*>(prediction);
@@ -640,14 +637,12 @@ void ConvolveCompoundHorizontal_SSE4_1(
filter_index);
}
-void ConvolveCompound2D_SSE4_1(const void* const reference,
- const ptrdiff_t reference_stride,
- const int horizontal_filter_index,
- const int vertical_filter_index,
- const int horizontal_filter_id,
- const int vertical_filter_id, const int width,
- const int height, void* prediction,
- const ptrdiff_t /*pred_stride*/) {
+void ConvolveCompound2D_SSE4_1(
+ const void* LIBGAV1_RESTRICT const reference,
+ const ptrdiff_t reference_stride, const int horizontal_filter_index,
+ const int vertical_filter_index, const int horizontal_filter_id,
+ const int vertical_filter_id, const int width, const int height,
+ void* LIBGAV1_RESTRICT prediction, const ptrdiff_t /*pred_stride*/) {
// The output of the horizontal filter, i.e. the intermediate_result, is
// guaranteed to fit in int16_t.
alignas(16) uint16_t
@@ -835,7 +830,8 @@ inline void GetHalfSubPixelFilter(__m128i* output) {
// exceed 4 when width <= 4, |grade_x| is set to 1 regardless of the value of
// |step_x|.
template <int num_taps, int grade_x>
-inline void PrepareSourceVectors(const uint8_t* src, const __m128i src_indices,
+inline void PrepareSourceVectors(const uint8_t* LIBGAV1_RESTRICT src,
+ const __m128i src_indices,
__m128i* const source /*[num_taps >> 1]*/) {
// |used_bytes| is only computed in msan builds. Mask away unused bytes for
// msan because it incorrectly models the outcome of the shuffles in some
@@ -900,10 +896,11 @@ inline __m128i HorizontalScaleIndices(const __m128i subpel_indices) {
}
template <int grade_x, int filter_index, int num_taps>
-inline void ConvolveHorizontalScale(const uint8_t* src, ptrdiff_t src_stride,
- int width, int subpixel_x, int step_x,
+inline void ConvolveHorizontalScale(const uint8_t* LIBGAV1_RESTRICT src,
+ ptrdiff_t src_stride, int width,
+ int subpixel_x, int step_x,
int intermediate_height,
- int16_t* intermediate) {
+ int16_t* LIBGAV1_RESTRICT intermediate) {
// Account for the 0-taps that precede the 2 nonzero taps.
const int kernel_offset = (8 - num_taps) >> 1;
const int ref_x = subpixel_x >> kScaleSubPixelBits;
@@ -946,11 +943,11 @@ inline void ConvolveHorizontalScale(const uint8_t* src, ptrdiff_t src_stride,
}
// |width| >= 8
+ int16_t* intermediate_x = intermediate;
int x = 0;
do {
const uint8_t* src_x =
&src[(p >> kScaleSubPixelBits) - ref_x + kernel_offset];
- int16_t* intermediate_x = intermediate + x;
// Only add steps to the 10-bit truncated p to avoid overflow.
const __m128i p_fraction = _mm_set1_epi16(p & 1023);
const __m128i subpel_indices = _mm_add_epi16(index_steps, p_fraction);
@@ -976,7 +973,8 @@ inline void ConvolveHorizontalScale(const uint8_t* src, ptrdiff_t src_stride,
}
template <int num_taps>
-inline void PrepareVerticalTaps(const int8_t* taps, __m128i* output) {
+inline void PrepareVerticalTaps(const int8_t* LIBGAV1_RESTRICT taps,
+ __m128i* output) {
// Avoid overreading the filter due to starting at kernel_offset.
// The only danger of overread is in the final filter, which has 4 taps.
const __m128i filter =
@@ -1072,10 +1070,12 @@ __m128i Sum2DVerticalTaps4x2(const __m128i* const src, const __m128i* taps_lo,
// |width_class| is 2, 4, or 8, according to the Store function that should be
// used.
template <int num_taps, int width_class, bool is_compound>
-inline void ConvolveVerticalScale(const int16_t* src, const int width,
- const int subpixel_y, const int filter_index,
- const int step_y, const int height,
- void* dest, const ptrdiff_t dest_stride) {
+inline void ConvolveVerticalScale(const int16_t* LIBGAV1_RESTRICT src,
+ const int intermediate_height,
+ const int width, const int subpixel_y,
+ const int filter_index, const int step_y,
+ const int height, void* LIBGAV1_RESTRICT dest,
+ const ptrdiff_t dest_stride) {
constexpr ptrdiff_t src_stride = kIntermediateStride;
constexpr int kernel_offset = (8 - num_taps) / 2;
const int16_t* src_y = src;
@@ -1138,15 +1138,19 @@ inline void ConvolveVerticalScale(const int16_t* src, const int width,
// |width_class| >= 8
__m128i filter_taps[num_taps >> 1];
- do { // y > 0
- src_y = src + (p >> kScaleSubPixelBits) * src_stride;
- const int filter_id = (p >> 6) & kSubPixelMask;
- const int8_t* filter =
- kHalfSubPixelFilters[filter_index][filter_id] + kernel_offset;
- PrepareVerticalTaps<num_taps>(filter, filter_taps);
-
- int x = 0;
- do { // x < width
+ int x = 0;
+ do { // x < width
+ auto* dest_y = static_cast<uint8_t*>(dest) + x;
+ auto* dest16_y = static_cast<uint16_t*>(dest) + x;
+ int p = subpixel_y & 1023;
+ int y = height;
+ do { // y > 0
+ const int filter_id = (p >> 6) & kSubPixelMask;
+ const int8_t* filter =
+ kHalfSubPixelFilters[filter_index][filter_id] + kernel_offset;
+ PrepareVerticalTaps<num_taps>(filter, filter_taps);
+
+ src_y = src + (p >> kScaleSubPixelBits) * src_stride;
for (int i = 0; i < num_taps; ++i) {
s[i] = LoadUnaligned16(src_y + i * src_stride);
}
@@ -1154,38 +1158,36 @@ inline void ConvolveVerticalScale(const int16_t* src, const int width,
const __m128i sums =
Sum2DVerticalTaps<num_taps, is_compound>(s, filter_taps);
if (is_compound) {
- StoreUnaligned16(dest16_y + x, sums);
+ StoreUnaligned16(dest16_y, sums);
} else {
- StoreLo8(dest_y + x, _mm_packus_epi16(sums, sums));
+ StoreLo8(dest_y, _mm_packus_epi16(sums, sums));
}
- x += 8;
- src_y += 8;
- } while (x < width);
- p += step_y;
- dest_y += dest_stride;
- dest16_y += dest_stride;
- } while (--y != 0);
+ p += step_y;
+ dest_y += dest_stride;
+ dest16_y += dest_stride;
+ } while (--y != 0);
+ src += kIntermediateStride * intermediate_height;
+ x += 8;
+ } while (x < width);
}
template <bool is_compound>
-void ConvolveScale2D_SSE4_1(const void* const reference,
+void ConvolveScale2D_SSE4_1(const void* LIBGAV1_RESTRICT const reference,
const ptrdiff_t reference_stride,
const int horizontal_filter_index,
const int vertical_filter_index,
const int subpixel_x, const int subpixel_y,
const int step_x, const int step_y, const int width,
- const int height, void* prediction,
+ const int height, void* LIBGAV1_RESTRICT prediction,
const ptrdiff_t pred_stride) {
const int horiz_filter_index = GetFilterIndex(horizontal_filter_index, width);
const int vert_filter_index = GetFilterIndex(vertical_filter_index, height);
assert(step_x <= 2048);
// The output of the horizontal filter, i.e. the intermediate_result, is
// guaranteed to fit in int16_t.
- // TODO(petersonab): Reduce intermediate block stride to width to make smaller
- // blocks faster.
alignas(16) int16_t
- intermediate_result[kMaxSuperBlockSizeInPixels *
- (2 * kMaxSuperBlockSizeInPixels + kSubPixelTaps)];
+ intermediate_result[kIntermediateAllocWidth *
+ (2 * kIntermediateAllocWidth + kSubPixelTaps)];
const int num_vert_taps = GetNumTapsInFilter(vert_filter_index);
const int intermediate_height =
(((height - 1) * step_y + (1 << kScaleSubPixelBits) - 1) >>
@@ -1282,76 +1284,78 @@ void ConvolveScale2D_SSE4_1(const void* const reference,
case 1:
if (!is_compound && width == 2) {
ConvolveVerticalScale<6, 2, is_compound>(
- intermediate, width, subpixel_y, vert_filter_index, step_y, height,
- prediction, pred_stride);
+ intermediate, intermediate_height, width, subpixel_y,
+ vert_filter_index, step_y, height, prediction, pred_stride);
} else if (width == 4) {
ConvolveVerticalScale<6, 4, is_compound>(
- intermediate, width, subpixel_y, vert_filter_index, step_y, height,
- prediction, pred_stride);
+ intermediate, intermediate_height, width, subpixel_y,
+ vert_filter_index, step_y, height, prediction, pred_stride);
} else {
ConvolveVerticalScale<6, 8, is_compound>(
- intermediate, width, subpixel_y, vert_filter_index, step_y, height,
- prediction, pred_stride);
+ intermediate, intermediate_height, width, subpixel_y,
+ vert_filter_index, step_y, height, prediction, pred_stride);
}
break;
case 2:
if (!is_compound && width == 2) {
ConvolveVerticalScale<8, 2, is_compound>(
- intermediate, width, subpixel_y, vert_filter_index, step_y, height,
- prediction, pred_stride);
+ intermediate, intermediate_height, width, subpixel_y,
+ vert_filter_index, step_y, height, prediction, pred_stride);
} else if (width == 4) {
ConvolveVerticalScale<8, 4, is_compound>(
- intermediate, width, subpixel_y, vert_filter_index, step_y, height,
- prediction, pred_stride);
+ intermediate, intermediate_height, width, subpixel_y,
+ vert_filter_index, step_y, height, prediction, pred_stride);
} else {
ConvolveVerticalScale<8, 8, is_compound>(
- intermediate, width, subpixel_y, vert_filter_index, step_y, height,
- prediction, pred_stride);
+ intermediate, intermediate_height, width, subpixel_y,
+ vert_filter_index, step_y, height, prediction, pred_stride);
}
break;
case 3:
if (!is_compound && width == 2) {
ConvolveVerticalScale<2, 2, is_compound>(
- intermediate, width, subpixel_y, vert_filter_index, step_y, height,
- prediction, pred_stride);
+ intermediate, intermediate_height, width, subpixel_y,
+ vert_filter_index, step_y, height, prediction, pred_stride);
} else if (width == 4) {
ConvolveVerticalScale<2, 4, is_compound>(
- intermediate, width, subpixel_y, vert_filter_index, step_y, height,
- prediction, pred_stride);
+ intermediate, intermediate_height, width, subpixel_y,
+ vert_filter_index, step_y, height, prediction, pred_stride);
} else {
ConvolveVerticalScale<2, 8, is_compound>(
- intermediate, width, subpixel_y, vert_filter_index, step_y, height,
- prediction, pred_stride);
+ intermediate, intermediate_height, width, subpixel_y,
+ vert_filter_index, step_y, height, prediction, pred_stride);
}
break;
default:
assert(vert_filter_index == 4 || vert_filter_index == 5);
if (!is_compound && width == 2) {
ConvolveVerticalScale<4, 2, is_compound>(
- intermediate, width, subpixel_y, vert_filter_index, step_y, height,
- prediction, pred_stride);
+ intermediate, intermediate_height, width, subpixel_y,
+ vert_filter_index, step_y, height, prediction, pred_stride);
} else if (width == 4) {
ConvolveVerticalScale<4, 4, is_compound>(
- intermediate, width, subpixel_y, vert_filter_index, step_y, height,
- prediction, pred_stride);
+ intermediate, intermediate_height, width, subpixel_y,
+ vert_filter_index, step_y, height, prediction, pred_stride);
} else {
ConvolveVerticalScale<4, 8, is_compound>(
- intermediate, width, subpixel_y, vert_filter_index, step_y, height,
- prediction, pred_stride);
+ intermediate, intermediate_height, width, subpixel_y,
+ vert_filter_index, step_y, height, prediction, pred_stride);
}
}
}
-inline void HalfAddHorizontal(const uint8_t* src, uint8_t* dst) {
+inline void HalfAddHorizontal(const uint8_t* LIBGAV1_RESTRICT src,
+ uint8_t* LIBGAV1_RESTRICT dst) {
const __m128i left = LoadUnaligned16(src);
const __m128i right = LoadUnaligned16(src + 1);
StoreUnaligned16(dst, _mm_avg_epu8(left, right));
}
template <int width>
-inline void IntraBlockCopyHorizontal(const uint8_t* src,
+inline void IntraBlockCopyHorizontal(const uint8_t* LIBGAV1_RESTRICT src,
const ptrdiff_t src_stride,
- const int height, uint8_t* dst,
+ const int height,
+ uint8_t* LIBGAV1_RESTRICT dst,
const ptrdiff_t dst_stride) {
const ptrdiff_t src_remainder_stride = src_stride - (width - 16);
const ptrdiff_t dst_remainder_stride = dst_stride - (width - 16);
@@ -1392,10 +1396,11 @@ inline void IntraBlockCopyHorizontal(const uint8_t* src,
}
void ConvolveIntraBlockCopyHorizontal_SSE4_1(
- const void* const reference, const ptrdiff_t reference_stride,
- const int /*horizontal_filter_index*/, const int /*vertical_filter_index*/,
- const int /*subpixel_x*/, const int /*subpixel_y*/, const int width,
- const int height, void* const prediction, const ptrdiff_t pred_stride) {
+ const void* LIBGAV1_RESTRICT const reference,
+ const ptrdiff_t reference_stride, const int /*horizontal_filter_index*/,
+ const int /*vertical_filter_index*/, const int /*subpixel_x*/,
+ const int /*subpixel_y*/, const int width, const int height,
+ void* LIBGAV1_RESTRICT const prediction, const ptrdiff_t pred_stride) {
const auto* src = static_cast<const uint8_t*>(reference);
auto* dest = static_cast<uint8_t*>(prediction);
@@ -1464,9 +1469,10 @@ void ConvolveIntraBlockCopyHorizontal_SSE4_1(
}
template <int width>
-inline void IntraBlockCopyVertical(const uint8_t* src,
+inline void IntraBlockCopyVertical(const uint8_t* LIBGAV1_RESTRICT src,
const ptrdiff_t src_stride, const int height,
- uint8_t* dst, const ptrdiff_t dst_stride) {
+ uint8_t* LIBGAV1_RESTRICT dst,
+ const ptrdiff_t dst_stride) {
const ptrdiff_t src_remainder_stride = src_stride - (width - 16);
const ptrdiff_t dst_remainder_stride = dst_stride - (width - 16);
__m128i row[8], below[8];
@@ -1553,11 +1559,11 @@ inline void IntraBlockCopyVertical(const uint8_t* src,
}
void ConvolveIntraBlockCopyVertical_SSE4_1(
- const void* const reference, const ptrdiff_t reference_stride,
- const int /*horizontal_filter_index*/, const int /*vertical_filter_index*/,
- const int /*horizontal_filter_id*/, const int /*vertical_filter_id*/,
- const int width, const int height, void* const prediction,
- const ptrdiff_t pred_stride) {
+ const void* LIBGAV1_RESTRICT const reference,
+ const ptrdiff_t reference_stride, const int /*horizontal_filter_index*/,
+ const int /*vertical_filter_index*/, const int /*horizontal_filter_id*/,
+ const int /*vertical_filter_id*/, const int width, const int height,
+ void* LIBGAV1_RESTRICT const prediction, const ptrdiff_t pred_stride) {
const auto* src = static_cast<const uint8_t*>(reference);
auto* dest = static_cast<uint8_t*>(prediction);
@@ -1622,7 +1628,8 @@ void ConvolveIntraBlockCopyVertical_SSE4_1(
}
// Load then add two uint8_t vectors. Return the uint16_t vector result.
-inline __m128i LoadU8AndAddLong(const uint8_t* src, const uint8_t* src1) {
+inline __m128i LoadU8AndAddLong(const uint8_t* LIBGAV1_RESTRICT src,
+ const uint8_t* LIBGAV1_RESTRICT src1) {
const __m128i a = _mm_cvtepu8_epi16(LoadLo8(src));
const __m128i b = _mm_cvtepu8_epi16(LoadLo8(src1));
return _mm_add_epi16(a, b);
@@ -1637,8 +1644,9 @@ inline __m128i AddU16RightShift2AndPack(__m128i v0, __m128i v1) {
}
template <int width>
-inline void IntraBlockCopy2D(const uint8_t* src, const ptrdiff_t src_stride,
- const int height, uint8_t* dst,
+inline void IntraBlockCopy2D(const uint8_t* LIBGAV1_RESTRICT src,
+ const ptrdiff_t src_stride, const int height,
+ uint8_t* LIBGAV1_RESTRICT dst,
const ptrdiff_t dst_stride) {
const ptrdiff_t src_remainder_stride = src_stride - (width - 8);
const ptrdiff_t dst_remainder_stride = dst_stride - (width - 8);
@@ -1793,11 +1801,11 @@ inline void IntraBlockCopy2D(const uint8_t* src, const ptrdiff_t src_stride,
}
void ConvolveIntraBlockCopy2D_SSE4_1(
- const void* const reference, const ptrdiff_t reference_stride,
- const int /*horizontal_filter_index*/, const int /*vertical_filter_index*/,
- const int /*horizontal_filter_id*/, const int /*vertical_filter_id*/,
- const int width, const int height, void* const prediction,
- const ptrdiff_t pred_stride) {
+ const void* LIBGAV1_RESTRICT const reference,
+ const ptrdiff_t reference_stride, const int /*horizontal_filter_index*/,
+ const int /*vertical_filter_index*/, const int /*horizontal_filter_id*/,
+ const int /*vertical_filter_id*/, const int width, const int height,
+ void* LIBGAV1_RESTRICT const prediction, const ptrdiff_t pred_stride) {
const auto* src = static_cast<const uint8_t*>(reference);
auto* dest = static_cast<uint8_t*>(prediction);
// Note: allow vertical access to height + 1. Because this function is only
diff --git a/src/dsp/x86/distance_weighted_blend_sse4.cc b/src/dsp/x86/distance_weighted_blend_sse4.cc
index 3c29b19..c813df4 100644
--- a/src/dsp/x86/distance_weighted_blend_sse4.cc
+++ b/src/dsp/x86/distance_weighted_blend_sse4.cc
@@ -54,8 +54,10 @@ inline __m128i ComputeWeightedAverage8(const __m128i& pred0,
template <int height>
inline void DistanceWeightedBlend4xH_SSE4_1(
- const int16_t* pred_0, const int16_t* pred_1, const uint8_t weight_0,
- const uint8_t weight_1, void* const dest, const ptrdiff_t dest_stride) {
+ const int16_t* LIBGAV1_RESTRICT pred_0,
+ const int16_t* LIBGAV1_RESTRICT pred_1, const uint8_t weight_0,
+ const uint8_t weight_1, void* LIBGAV1_RESTRICT const dest,
+ const ptrdiff_t dest_stride) {
auto* dst = static_cast<uint8_t*>(dest);
const __m128i weights = _mm_set1_epi32(weight_0 | (weight_1 << 16));
@@ -98,8 +100,10 @@ inline void DistanceWeightedBlend4xH_SSE4_1(
template <int height>
inline void DistanceWeightedBlend8xH_SSE4_1(
- const int16_t* pred_0, const int16_t* pred_1, const uint8_t weight_0,
- const uint8_t weight_1, void* const dest, const ptrdiff_t dest_stride) {
+ const int16_t* LIBGAV1_RESTRICT pred_0,
+ const int16_t* LIBGAV1_RESTRICT pred_1, const uint8_t weight_0,
+ const uint8_t weight_1, void* LIBGAV1_RESTRICT const dest,
+ const ptrdiff_t dest_stride) {
auto* dst = static_cast<uint8_t*>(dest);
const __m128i weights = _mm_set1_epi32(weight_0 | (weight_1 << 16));
@@ -125,9 +129,10 @@ inline void DistanceWeightedBlend8xH_SSE4_1(
}
inline void DistanceWeightedBlendLarge_SSE4_1(
- const int16_t* pred_0, const int16_t* pred_1, const uint8_t weight_0,
- const uint8_t weight_1, const int width, const int height, void* const dest,
- const ptrdiff_t dest_stride) {
+ const int16_t* LIBGAV1_RESTRICT pred_0,
+ const int16_t* LIBGAV1_RESTRICT pred_1, const uint8_t weight_0,
+ const uint8_t weight_1, const int width, const int height,
+ void* LIBGAV1_RESTRICT const dest, const ptrdiff_t dest_stride) {
auto* dst = static_cast<uint8_t*>(dest);
const __m128i weights = _mm_set1_epi32(weight_0 | (weight_1 << 16));
@@ -154,11 +159,12 @@ inline void DistanceWeightedBlendLarge_SSE4_1(
} while (--y != 0);
}
-void DistanceWeightedBlend_SSE4_1(const void* prediction_0,
- const void* prediction_1,
+void DistanceWeightedBlend_SSE4_1(const void* LIBGAV1_RESTRICT prediction_0,
+ const void* LIBGAV1_RESTRICT prediction_1,
const uint8_t weight_0,
const uint8_t weight_1, const int width,
- const int height, void* const dest,
+ const int height,
+ void* LIBGAV1_RESTRICT const dest,
const ptrdiff_t dest_stride) {
const auto* pred_0 = static_cast<const int16_t*>(prediction_0);
const auto* pred_1 = static_cast<const int16_t*>(prediction_1);
@@ -257,8 +263,10 @@ inline __m128i ComputeWeightedAverage8(const __m128i& pred0,
template <int height>
inline void DistanceWeightedBlend4xH_SSE4_1(
- const uint16_t* pred_0, const uint16_t* pred_1, const uint8_t weight_0,
- const uint8_t weight_1, void* const dest, const ptrdiff_t dest_stride) {
+ const uint16_t* LIBGAV1_RESTRICT pred_0,
+ const uint16_t* LIBGAV1_RESTRICT pred_1, const uint8_t weight_0,
+ const uint8_t weight_1, void* LIBGAV1_RESTRICT const dest,
+ const ptrdiff_t dest_stride) {
auto* dst = static_cast<uint16_t*>(dest);
const __m128i weight0 = _mm_set1_epi32(weight_0);
const __m128i weight1 = _mm_set1_epi32(weight_1);
@@ -301,8 +309,10 @@ inline void DistanceWeightedBlend4xH_SSE4_1(
template <int height>
inline void DistanceWeightedBlend8xH_SSE4_1(
- const uint16_t* pred_0, const uint16_t* pred_1, const uint8_t weight_0,
- const uint8_t weight_1, void* const dest, const ptrdiff_t dest_stride) {
+ const uint16_t* LIBGAV1_RESTRICT pred_0,
+ const uint16_t* LIBGAV1_RESTRICT pred_1, const uint8_t weight_0,
+ const uint8_t weight_1, void* LIBGAV1_RESTRICT const dest,
+ const ptrdiff_t dest_stride) {
auto* dst = static_cast<uint16_t*>(dest);
const __m128i weight0 = _mm_set1_epi32(weight_0);
const __m128i weight1 = _mm_set1_epi32(weight_1);
@@ -332,9 +342,10 @@ inline void DistanceWeightedBlend8xH_SSE4_1(
}
inline void DistanceWeightedBlendLarge_SSE4_1(
- const uint16_t* pred_0, const uint16_t* pred_1, const uint8_t weight_0,
- const uint8_t weight_1, const int width, const int height, void* const dest,
- const ptrdiff_t dest_stride) {
+ const uint16_t* LIBGAV1_RESTRICT pred_0,
+ const uint16_t* LIBGAV1_RESTRICT pred_1, const uint8_t weight_0,
+ const uint8_t weight_1, const int width, const int height,
+ void* LIBGAV1_RESTRICT const dest, const ptrdiff_t dest_stride) {
auto* dst = static_cast<uint16_t*>(dest);
const __m128i weight0 = _mm_set1_epi32(weight_0);
const __m128i weight1 = _mm_set1_epi32(weight_1);
@@ -364,11 +375,12 @@ inline void DistanceWeightedBlendLarge_SSE4_1(
} while (--y != 0);
}
-void DistanceWeightedBlend_SSE4_1(const void* prediction_0,
- const void* prediction_1,
+void DistanceWeightedBlend_SSE4_1(const void* LIBGAV1_RESTRICT prediction_0,
+ const void* LIBGAV1_RESTRICT prediction_1,
const uint8_t weight_0,
const uint8_t weight_1, const int width,
- const int height, void* const dest,
+ const int height,
+ void* LIBGAV1_RESTRICT const dest,
const ptrdiff_t dest_stride) {
const auto* pred_0 = static_cast<const uint16_t*>(prediction_0);
const auto* pred_1 = static_cast<const uint16_t*>(prediction_1);
diff --git a/src/dsp/x86/film_grain_sse4.cc b/src/dsp/x86/film_grain_sse4.cc
index 745c1ca..9ece947 100644
--- a/src/dsp/x86/film_grain_sse4.cc
+++ b/src/dsp/x86/film_grain_sse4.cc
@@ -126,30 +126,16 @@ inline __m128i Clip3(const __m128i value, const __m128i low,
}
template <int bitdepth, typename Pixel>
-inline __m128i GetScalingFactors(
- const uint8_t scaling_lut[kScalingLookupTableSize], const Pixel* source) {
+inline __m128i GetScalingFactors(const int16_t* scaling_lut,
+ const Pixel* source) {
alignas(16) int16_t start_vals[8];
- if (bitdepth == 8) {
- // TODO(petersonab): Speed this up by creating a uint16_t scaling_lut.
- // Currently this code results in a series of movzbl.
- for (int i = 0; i < 8; ++i) {
- start_vals[i] = scaling_lut[source[i]];
- }
- return LoadAligned16(start_vals);
- }
- alignas(16) int16_t end_vals[8];
- // TODO(petersonab): Precompute this into a larger table for direct lookups.
+ static_assert(bitdepth <= kBitdepth10,
+ "SSE4 Film Grain is not yet implemented for 12bpp.");
for (int i = 0; i < 8; ++i) {
- const int index = source[i] >> 2;
- start_vals[i] = scaling_lut[index];
- end_vals[i] = scaling_lut[index + 1];
+ assert(source[i] < kScalingLookupTableSize << (bitdepth - 2));
+ start_vals[i] = scaling_lut[source[i]];
}
- const __m128i start = LoadAligned16(start_vals);
- const __m128i end = LoadAligned16(end_vals);
- __m128i remainder = LoadSource(source);
- remainder = _mm_srli_epi16(_mm_slli_epi16(remainder, 14), 1);
- const __m128i delta = _mm_mulhrs_epi16(_mm_sub_epi16(end, start), remainder);
- return _mm_add_epi16(start, delta);
+ return LoadAligned16(start_vals);
}
// |scaling_shift| is in range [8,11].
@@ -162,11 +148,10 @@ inline __m128i ScaleNoise(const __m128i noise, const __m128i scaling,
template <int bitdepth, typename GrainType, typename Pixel>
void BlendNoiseWithImageLuma_SSE4_1(
- const void* noise_image_ptr, int min_value, int max_luma, int scaling_shift,
- int width, int height, int start_height,
- const uint8_t scaling_lut_y[kScalingLookupTableSize],
- const void* source_plane_y, ptrdiff_t source_stride_y, void* dest_plane_y,
- ptrdiff_t dest_stride_y) {
+ const void* LIBGAV1_RESTRICT noise_image_ptr, int min_value, int max_luma,
+ int scaling_shift, int width, int height, int start_height,
+ const int16_t* scaling_lut_y, const void* source_plane_y,
+ ptrdiff_t source_stride_y, void* dest_plane_y, ptrdiff_t dest_stride_y) {
const auto* noise_image =
static_cast<const Array2D<GrainType>*>(noise_image_ptr);
const auto* in_y_row = static_cast<const Pixel*>(source_plane_y);
@@ -181,7 +166,6 @@ void BlendNoiseWithImageLuma_SSE4_1(
do {
int x = 0;
for (; x < safe_width; x += 8) {
- // TODO(b/133525232): Make 16-pixel version of loop body.
const __m128i orig = LoadSource(&in_y_row[x]);
const __m128i scaling =
GetScalingFactors<bitdepth, Pixel>(scaling_lut_y, &in_y_row[x]);
@@ -216,9 +200,9 @@ void BlendNoiseWithImageLuma_SSE4_1(
template <int bitdepth, typename GrainType, typename Pixel>
inline __m128i BlendChromaValsWithCfl(
- const Pixel* average_luma_buffer,
- const uint8_t scaling_lut[kScalingLookupTableSize],
- const Pixel* chroma_cursor, const GrainType* noise_image_cursor,
+ const Pixel* LIBGAV1_RESTRICT average_luma_buffer,
+ const int16_t* scaling_lut, const Pixel* LIBGAV1_RESTRICT chroma_cursor,
+ const GrainType* LIBGAV1_RESTRICT noise_image_cursor,
const __m128i scaling_shift) {
const __m128i scaling =
GetScalingFactors<bitdepth, Pixel>(scaling_lut, average_luma_buffer);
@@ -232,11 +216,10 @@ template <int bitdepth, typename GrainType, typename Pixel>
LIBGAV1_ALWAYS_INLINE void BlendChromaPlaneWithCfl_SSE4_1(
const Array2D<GrainType>& noise_image, int min_value, int max_chroma,
int width, int height, int start_height, int subsampling_x,
- int subsampling_y, int scaling_shift,
- const uint8_t scaling_lut[kScalingLookupTableSize], const Pixel* in_y_row,
- ptrdiff_t source_stride_y, const Pixel* in_chroma_row,
- ptrdiff_t source_stride_chroma, Pixel* out_chroma_row,
- ptrdiff_t dest_stride) {
+ int subsampling_y, int scaling_shift, const int16_t* scaling_lut,
+ const Pixel* LIBGAV1_RESTRICT in_y_row, ptrdiff_t source_stride_y,
+ const Pixel* in_chroma_row, ptrdiff_t source_stride_chroma,
+ Pixel* out_chroma_row, ptrdiff_t dest_stride) {
const __m128i floor = _mm_set1_epi16(min_value);
const __m128i ceiling = _mm_set1_epi16(max_chroma);
alignas(16) Pixel luma_buffer[16];
@@ -258,8 +241,6 @@ LIBGAV1_ALWAYS_INLINE void BlendChromaPlaneWithCfl_SSE4_1(
int x = 0;
for (; x < safe_chroma_width; x += 8) {
const int luma_x = x << subsampling_x;
- // TODO(petersonab): Consider specializing by subsampling_x. In the 444
- // case &in_y_row[x] can be passed to GetScalingFactors directly.
const __m128i average_luma =
GetAverageLuma(&in_y_row[luma_x], subsampling_x);
StoreUnsigned(average_luma_buffer, average_luma);
@@ -277,7 +258,7 @@ LIBGAV1_ALWAYS_INLINE void BlendChromaPlaneWithCfl_SSE4_1(
// Prevent huge indices from entering GetScalingFactors due to
// uninitialized values. This is not a problem in 8bpp because the table
// is made larger than 255 values.
- if (bitdepth > 8) {
+ if (bitdepth > kBitdepth8) {
memset(luma_buffer, 0, sizeof(luma_buffer));
}
const int luma_x = x << subsampling_x;
@@ -306,11 +287,11 @@ LIBGAV1_ALWAYS_INLINE void BlendChromaPlaneWithCfl_SSE4_1(
// This further implies that scaling_lut_u == scaling_lut_v == scaling_lut_y.
template <int bitdepth, typename GrainType, typename Pixel>
void BlendNoiseWithImageChromaWithCfl_SSE4_1(
- Plane plane, const FilmGrainParams& params, const void* noise_image_ptr,
- int min_value, int max_chroma, int width, int height, int start_height,
- int subsampling_x, int subsampling_y,
- const uint8_t scaling_lut[kScalingLookupTableSize],
- const void* source_plane_y, ptrdiff_t source_stride_y,
+ Plane plane, const FilmGrainParams& params,
+ const void* LIBGAV1_RESTRICT noise_image_ptr, int min_value, int max_chroma,
+ int width, int height, int start_height, int subsampling_x,
+ int subsampling_y, const int16_t* scaling_lut,
+ const void* LIBGAV1_RESTRICT source_plane_y, ptrdiff_t source_stride_y,
const void* source_plane_uv, ptrdiff_t source_stride_uv,
void* dest_plane_uv, ptrdiff_t dest_stride_uv) {
const auto* noise_image =
@@ -335,10 +316,10 @@ namespace {
// |offset| is 32x4 packed to add with the result of _mm_madd_epi16.
inline __m128i BlendChromaValsNoCfl8bpp(
- const uint8_t scaling_lut[kScalingLookupTableSize], const __m128i& orig,
- const int8_t* noise_image_cursor, const __m128i& average_luma,
- const __m128i& scaling_shift, const __m128i& offset,
- const __m128i& weights) {
+ const int16_t* scaling_lut, const __m128i& orig,
+ const int8_t* LIBGAV1_RESTRICT noise_image_cursor,
+ const __m128i& average_luma, const __m128i& scaling_shift,
+ const __m128i& offset, const __m128i& weights) {
uint8_t merged_buffer[8];
const __m128i combined_lo =
_mm_madd_epi16(_mm_unpacklo_epi16(average_luma, orig), weights);
@@ -351,9 +332,9 @@ inline __m128i BlendChromaValsNoCfl8bpp(
StoreLo8(merged_buffer, _mm_packus_epi16(merged, merged));
const __m128i scaling =
- GetScalingFactors<8, uint8_t>(scaling_lut, merged_buffer);
+ GetScalingFactors<kBitdepth8, uint8_t>(scaling_lut, merged_buffer);
__m128i noise = LoadSource(noise_image_cursor);
- noise = ScaleNoise<8>(noise, scaling, scaling_shift);
+ noise = ScaleNoise<kBitdepth8>(noise, scaling, scaling_shift);
return _mm_add_epi16(orig, noise);
}
@@ -361,11 +342,10 @@ LIBGAV1_ALWAYS_INLINE void BlendChromaPlane8bpp_SSE4_1(
const Array2D<int8_t>& noise_image, int min_value, int max_chroma,
int width, int height, int start_height, int subsampling_x,
int subsampling_y, int scaling_shift, int chroma_offset,
- int chroma_multiplier, int luma_multiplier,
- const uint8_t scaling_lut[kScalingLookupTableSize], const uint8_t* in_y_row,
- ptrdiff_t source_stride_y, const uint8_t* in_chroma_row,
- ptrdiff_t source_stride_chroma, uint8_t* out_chroma_row,
- ptrdiff_t dest_stride) {
+ int chroma_multiplier, int luma_multiplier, const int16_t* scaling_lut,
+ const uint8_t* LIBGAV1_RESTRICT in_y_row, ptrdiff_t source_stride_y,
+ const uint8_t* in_chroma_row, ptrdiff_t source_stride_chroma,
+ uint8_t* out_chroma_row, ptrdiff_t dest_stride) {
const __m128i floor = _mm_set1_epi16(min_value);
const __m128i ceiling = _mm_set1_epi16(max_chroma);
@@ -432,11 +412,11 @@ LIBGAV1_ALWAYS_INLINE void BlendChromaPlane8bpp_SSE4_1(
// This function is for the case params_.chroma_scaling_from_luma == false.
void BlendNoiseWithImageChroma8bpp_SSE4_1(
- Plane plane, const FilmGrainParams& params, const void* noise_image_ptr,
- int min_value, int max_chroma, int width, int height, int start_height,
- int subsampling_x, int subsampling_y,
- const uint8_t scaling_lut[kScalingLookupTableSize],
- const void* source_plane_y, ptrdiff_t source_stride_y,
+ Plane plane, const FilmGrainParams& params,
+ const void* LIBGAV1_RESTRICT noise_image_ptr, int min_value, int max_chroma,
+ int width, int height, int start_height, int subsampling_x,
+ int subsampling_y, const int16_t* scaling_lut,
+ const void* LIBGAV1_RESTRICT source_plane_y, ptrdiff_t source_stride_y,
const void* source_plane_uv, ptrdiff_t source_stride_uv,
void* dest_plane_uv, ptrdiff_t dest_stride_uv) {
assert(plane == kPlaneU || plane == kPlaneV);
@@ -463,10 +443,10 @@ void Init8bpp() {
assert(dsp != nullptr);
dsp->film_grain.blend_noise_luma =
- BlendNoiseWithImageLuma_SSE4_1<8, int8_t, uint8_t>;
+ BlendNoiseWithImageLuma_SSE4_1<kBitdepth8, int8_t, uint8_t>;
dsp->film_grain.blend_noise_chroma[0] = BlendNoiseWithImageChroma8bpp_SSE4_1;
dsp->film_grain.blend_noise_chroma[1] =
- BlendNoiseWithImageChromaWithCfl_SSE4_1<8, int8_t, uint8_t>;
+ BlendNoiseWithImageChromaWithCfl_SSE4_1<kBitdepth8, int8_t, uint8_t>;
}
} // namespace
@@ -481,9 +461,9 @@ void Init10bpp() {
assert(dsp != nullptr);
dsp->film_grain.blend_noise_luma =
- BlendNoiseWithImageLuma_SSE4_1<10, int16_t, uint16_t>;
+ BlendNoiseWithImageLuma_SSE4_1<kBitdepth10, int16_t, uint16_t>;
dsp->film_grain.blend_noise_chroma[1] =
- BlendNoiseWithImageChromaWithCfl_SSE4_1<10, int16_t, uint16_t>;
+ BlendNoiseWithImageChromaWithCfl_SSE4_1<kBitdepth10, int16_t, uint16_t>;
}
} // namespace
diff --git a/src/dsp/x86/intra_edge_sse4.cc b/src/dsp/x86/intra_edge_sse4.cc
index d6af907..967be06 100644
--- a/src/dsp/x86/intra_edge_sse4.cc
+++ b/src/dsp/x86/intra_edge_sse4.cc
@@ -41,7 +41,8 @@ constexpr int kMaxEdgeBufferSize = 129;
// This function applies the kernel [0, 4, 8, 4, 0] to 12 values.
// Assumes |edge| has 16 packed byte values. Produces 12 filter outputs to
// write as overlapping sets of 8-bytes.
-inline void ComputeKernel1Store12(uint8_t* dest, const uint8_t* source) {
+inline void ComputeKernel1Store12(uint8_t* LIBGAV1_RESTRICT dest,
+ const uint8_t* LIBGAV1_RESTRICT source) {
const __m128i edge_lo = LoadUnaligned16(source);
const __m128i edge_hi = _mm_srli_si128(edge_lo, 6);
// Samples matched with the '4' tap, expanded to 16-bit.
@@ -77,7 +78,8 @@ inline void ComputeKernel1Store12(uint8_t* dest, const uint8_t* source) {
// This function applies the kernel [0, 5, 6, 5, 0] to 12 values.
// Assumes |edge| has 8 packed byte values, and that the 2 invalid values will
// be overwritten or safely discarded.
-inline void ComputeKernel2Store12(uint8_t* dest, const uint8_t* source) {
+inline void ComputeKernel2Store12(uint8_t* LIBGAV1_RESTRICT dest,
+ const uint8_t* LIBGAV1_RESTRICT source) {
const __m128i edge_lo = LoadUnaligned16(source);
const __m128i edge_hi = _mm_srli_si128(edge_lo, 6);
const __m128i outers_lo = _mm_cvtepu8_epi16(edge_lo);
@@ -115,7 +117,8 @@ inline void ComputeKernel2Store12(uint8_t* dest, const uint8_t* source) {
}
// This function applies the kernel [2, 4, 4, 4, 2] to 8 values.
-inline void ComputeKernel3Store8(uint8_t* dest, const uint8_t* source) {
+inline void ComputeKernel3Store8(uint8_t* LIBGAV1_RESTRICT dest,
+ const uint8_t* LIBGAV1_RESTRICT source) {
const __m128i edge_lo = LoadUnaligned16(source);
const __m128i edge_hi = _mm_srli_si128(edge_lo, 4);
// Finish |edge_lo| life cycle quickly.
diff --git a/src/dsp/x86/intrapred_cfl_sse4.cc b/src/dsp/x86/intrapred_cfl_sse4.cc
index f2dcfdb..eb7e466 100644
--- a/src/dsp/x86/intrapred_cfl_sse4.cc
+++ b/src/dsp/x86/intrapred_cfl_sse4.cc
@@ -88,7 +88,7 @@ inline __m128i CflPredictUnclipped(const __m128i* input, __m128i alpha_q12,
template <int width, int height>
void CflIntraPredictor_SSE4_1(
- void* const dest, ptrdiff_t stride,
+ void* LIBGAV1_RESTRICT const dest, ptrdiff_t stride,
const int16_t luma[kCflLumaBufferStride][kCflLumaBufferStride],
const int alpha) {
auto* dst = static_cast<uint8_t*>(dest);
@@ -127,7 +127,8 @@ void CflIntraPredictor_SSE4_1(
template <int block_height_log2, bool is_inside>
void CflSubsampler444_4xH_SSE4_1(
int16_t luma[kCflLumaBufferStride][kCflLumaBufferStride],
- const int max_luma_height, const void* const source, ptrdiff_t stride) {
+ const int max_luma_height, const void* LIBGAV1_RESTRICT const source,
+ ptrdiff_t stride) {
static_assert(block_height_log2 <= 4, "");
const int block_height = 1 << block_height_log2;
const int visible_height = max_luma_height;
@@ -189,7 +190,7 @@ template <int block_height_log2>
void CflSubsampler444_4xH_SSE4_1(
int16_t luma[kCflLumaBufferStride][kCflLumaBufferStride],
const int max_luma_width, const int max_luma_height,
- const void* const source, ptrdiff_t stride) {
+ const void* LIBGAV1_RESTRICT const source, ptrdiff_t stride) {
static_assert(block_height_log2 <= 4, "");
assert(max_luma_width >= 4);
assert(max_luma_height >= 4);
@@ -209,7 +210,7 @@ template <int block_height_log2, bool inside>
void CflSubsampler444_8xH_SSE4_1(
int16_t luma[kCflLumaBufferStride][kCflLumaBufferStride],
const int max_luma_width, const int max_luma_height,
- const void* const source, ptrdiff_t stride) {
+ const void* LIBGAV1_RESTRICT const source, ptrdiff_t stride) {
static_assert(block_height_log2 <= 5, "");
const int block_height = 1 << block_height_log2, block_width = 8;
const int visible_height = max_luma_height;
@@ -292,7 +293,7 @@ template <int block_height_log2>
void CflSubsampler444_8xH_SSE4_1(
int16_t luma[kCflLumaBufferStride][kCflLumaBufferStride],
const int max_luma_width, const int max_luma_height,
- const void* const source, ptrdiff_t stride) {
+ const void* LIBGAV1_RESTRICT const source, ptrdiff_t stride) {
static_assert(block_height_log2 <= 5, "");
assert(max_luma_width >= 4);
assert(max_luma_height >= 4);
@@ -315,7 +316,7 @@ template <int block_width_log2, int block_height_log2, bool inside>
void CflSubsampler444_SSE4_1(
int16_t luma[kCflLumaBufferStride][kCflLumaBufferStride],
const int max_luma_width, const int max_luma_height,
- const void* const source, ptrdiff_t stride) {
+ const void* LIBGAV1_RESTRICT const source, ptrdiff_t stride) {
static_assert(block_width_log2 == 4 || block_width_log2 == 5, "");
static_assert(block_height_log2 <= 5, "");
assert(max_luma_width >= 4);
@@ -418,7 +419,7 @@ template <int block_width_log2, int block_height_log2>
void CflSubsampler444_SSE4_1(
int16_t luma[kCflLumaBufferStride][kCflLumaBufferStride],
const int max_luma_width, const int max_luma_height,
- const void* const source, ptrdiff_t stride) {
+ const void* LIBGAV1_RESTRICT const source, ptrdiff_t stride) {
static_assert(block_width_log2 == 4 || block_width_log2 == 5, "");
static_assert(block_height_log2 <= 5, "");
assert(max_luma_width >= 4);
@@ -441,7 +442,7 @@ template <int block_height_log2>
void CflSubsampler420_4xH_SSE4_1(
int16_t luma[kCflLumaBufferStride][kCflLumaBufferStride],
const int /*max_luma_width*/, const int max_luma_height,
- const void* const source, ptrdiff_t stride) {
+ const void* LIBGAV1_RESTRICT const source, ptrdiff_t stride) {
const int block_height = 1 << block_height_log2;
const auto* src = static_cast<const uint8_t*>(source);
int16_t* luma_ptr = luma[0];
@@ -511,7 +512,7 @@ template <int block_height_log2, int max_luma_width>
inline void CflSubsampler420Impl_8xH_SSE4_1(
int16_t luma[kCflLumaBufferStride][kCflLumaBufferStride],
const int /*max_luma_width*/, const int max_luma_height,
- const void* const source, ptrdiff_t stride) {
+ const void* LIBGAV1_RESTRICT const source, ptrdiff_t stride) {
const int block_height = 1 << block_height_log2;
const auto* src = static_cast<const uint8_t*>(source);
const __m128i zero = _mm_setzero_si128();
@@ -620,7 +621,7 @@ template <int block_height_log2>
void CflSubsampler420_8xH_SSE4_1(
int16_t luma[kCflLumaBufferStride][kCflLumaBufferStride],
const int max_luma_width, const int max_luma_height,
- const void* const source, ptrdiff_t stride) {
+ const void* LIBGAV1_RESTRICT const source, ptrdiff_t stride) {
if (max_luma_width == 8) {
CflSubsampler420Impl_8xH_SSE4_1<block_height_log2, 8>(
luma, max_luma_width, max_luma_height, source, stride);
@@ -634,7 +635,7 @@ template <int block_width_log2, int block_height_log2, int max_luma_width>
inline void CflSubsampler420Impl_WxH_SSE4_1(
int16_t luma[kCflLumaBufferStride][kCflLumaBufferStride],
const int /*max_luma_width*/, const int max_luma_height,
- const void* const source, ptrdiff_t stride) {
+ const void* LIBGAV1_RESTRICT const source, ptrdiff_t stride) {
const auto* src = static_cast<const uint8_t*>(source);
const __m128i zero = _mm_setzero_si128();
__m128i final_sum = zero;
@@ -751,7 +752,7 @@ template <int block_width_log2, int block_height_log2>
void CflSubsampler420_WxH_SSE4_1(
int16_t luma[kCflLumaBufferStride][kCflLumaBufferStride],
const int max_luma_width, const int max_luma_height,
- const void* const source, ptrdiff_t stride) {
+ const void* LIBGAV1_RESTRICT const source, ptrdiff_t stride) {
switch (max_luma_width) {
case 8:
CflSubsampler420Impl_WxH_SSE4_1<block_width_log2, block_height_log2, 8>(
@@ -968,7 +969,7 @@ inline __m128i ClipEpi16(__m128i x, __m128i min, __m128i max) {
template <int width, int height>
void CflIntraPredictor_10bpp_SSE4_1(
- void* const dest, ptrdiff_t stride,
+ void* LIBGAV1_RESTRICT const dest, ptrdiff_t stride,
const int16_t luma[kCflLumaBufferStride][kCflLumaBufferStride],
const int alpha) {
constexpr int kCflLumaBufferStrideLog2_16i = 5;
@@ -1018,7 +1019,8 @@ void CflIntraPredictor_10bpp_SSE4_1(
template <int block_height_log2, bool is_inside>
void CflSubsampler444_4xH_SSE4_1(
int16_t luma[kCflLumaBufferStride][kCflLumaBufferStride],
- const int max_luma_height, const void* const source, ptrdiff_t stride) {
+ const int max_luma_height, const void* LIBGAV1_RESTRICT const source,
+ ptrdiff_t stride) {
static_assert(block_height_log2 <= 4, "");
const int block_height = 1 << block_height_log2;
const int visible_height = max_luma_height;
@@ -1079,7 +1081,7 @@ template <int block_height_log2>
void CflSubsampler444_4xH_SSE4_1(
int16_t luma[kCflLumaBufferStride][kCflLumaBufferStride],
const int max_luma_width, const int max_luma_height,
- const void* const source, ptrdiff_t stride) {
+ const void* LIBGAV1_RESTRICT const source, ptrdiff_t stride) {
static_cast<void>(max_luma_width);
static_cast<void>(max_luma_height);
static_assert(block_height_log2 <= 4, "");
@@ -1099,7 +1101,8 @@ void CflSubsampler444_4xH_SSE4_1(
template <int block_height_log2, bool is_inside>
void CflSubsampler444_8xH_SSE4_1(
int16_t luma[kCflLumaBufferStride][kCflLumaBufferStride],
- const int max_luma_height, const void* const source, ptrdiff_t stride) {
+ const int max_luma_height, const void* LIBGAV1_RESTRICT const source,
+ ptrdiff_t stride) {
const int block_height = 1 << block_height_log2;
const int visible_height = max_luma_height;
const __m128i dup16 = _mm_set1_epi32(0x01000100);
@@ -1158,7 +1161,7 @@ template <int block_height_log2>
void CflSubsampler444_8xH_SSE4_1(
int16_t luma[kCflLumaBufferStride][kCflLumaBufferStride],
const int max_luma_width, const int max_luma_height,
- const void* const source, ptrdiff_t stride) {
+ const void* LIBGAV1_RESTRICT const source, ptrdiff_t stride) {
static_cast<void>(max_luma_width);
static_cast<void>(max_luma_height);
static_assert(block_height_log2 <= 5, "");
@@ -1182,7 +1185,7 @@ template <int block_width_log2, int block_height_log2, bool is_inside>
void CflSubsampler444_WxH_SSE4_1(
int16_t luma[kCflLumaBufferStride][kCflLumaBufferStride],
const int max_luma_width, const int max_luma_height,
- const void* const source, ptrdiff_t stride) {
+ const void* LIBGAV1_RESTRICT const source, ptrdiff_t stride) {
const int block_height = 1 << block_height_log2;
const int visible_height = max_luma_height;
const int block_width = 1 << block_width_log2;
@@ -1278,7 +1281,7 @@ template <int block_width_log2, int block_height_log2>
void CflSubsampler444_WxH_SSE4_1(
int16_t luma[kCflLumaBufferStride][kCflLumaBufferStride],
const int max_luma_width, const int max_luma_height,
- const void* const source, ptrdiff_t stride) {
+ const void* LIBGAV1_RESTRICT const source, ptrdiff_t stride) {
static_assert(block_width_log2 == 4 || block_width_log2 == 5,
"This function will only work for block_width 16 and 32.");
static_assert(block_height_log2 <= 5, "");
@@ -1300,7 +1303,7 @@ template <int block_height_log2>
void CflSubsampler420_4xH_SSE4_1(
int16_t luma[kCflLumaBufferStride][kCflLumaBufferStride],
const int /*max_luma_width*/, const int max_luma_height,
- const void* const source, ptrdiff_t stride) {
+ const void* LIBGAV1_RESTRICT const source, ptrdiff_t stride) {
const int block_height = 1 << block_height_log2;
const auto* src = static_cast<const uint16_t*>(source);
const ptrdiff_t src_stride = stride / sizeof(src[0]);
@@ -1371,7 +1374,8 @@ void CflSubsampler420_4xH_SSE4_1(
template <int block_height_log2, int max_luma_width>
inline void CflSubsampler420Impl_8xH_SSE4_1(
int16_t luma[kCflLumaBufferStride][kCflLumaBufferStride],
- const int max_luma_height, const void* const source, ptrdiff_t stride) {
+ const int max_luma_height, const void* LIBGAV1_RESTRICT const source,
+ ptrdiff_t stride) {
const int block_height = 1 << block_height_log2;
const auto* src = static_cast<const uint16_t*>(source);
const ptrdiff_t src_stride = stride / sizeof(src[0]);
@@ -1483,7 +1487,7 @@ template <int block_height_log2>
void CflSubsampler420_8xH_SSE4_1(
int16_t luma[kCflLumaBufferStride][kCflLumaBufferStride],
const int max_luma_width, const int max_luma_height,
- const void* const source, ptrdiff_t stride) {
+ const void* LIBGAV1_RESTRICT const source, ptrdiff_t stride) {
if (max_luma_width == 8) {
CflSubsampler420Impl_8xH_SSE4_1<block_height_log2, 8>(luma, max_luma_height,
source, stride);
@@ -1496,7 +1500,8 @@ void CflSubsampler420_8xH_SSE4_1(
template <int block_width_log2, int block_height_log2, int max_luma_width>
inline void CflSubsampler420Impl_WxH_SSE4_1(
int16_t luma[kCflLumaBufferStride][kCflLumaBufferStride],
- const int max_luma_height, const void* const source, ptrdiff_t stride) {
+ const int max_luma_height, const void* LIBGAV1_RESTRICT const source,
+ ptrdiff_t stride) {
const auto* src = static_cast<const uint16_t*>(source);
const ptrdiff_t src_stride = stride / sizeof(src[0]);
const __m128i zero = _mm_setzero_si128();
@@ -1615,7 +1620,7 @@ template <int block_width_log2, int block_height_log2>
void CflSubsampler420_WxH_SSE4_1(
int16_t luma[kCflLumaBufferStride][kCflLumaBufferStride],
const int max_luma_width, const int max_luma_height,
- const void* const source, ptrdiff_t stride) {
+ const void* LIBGAV1_RESTRICT const source, ptrdiff_t stride) {
switch (max_luma_width) {
case 8:
CflSubsampler420Impl_WxH_SSE4_1<block_width_log2, block_height_log2, 8>(
diff --git a/src/dsp/x86/intrapred_filter_sse4.cc b/src/dsp/x86/intrapred_filter_sse4.cc
index 022af8d..a43a5cf 100644
--- a/src/dsp/x86/intrapred_filter_sse4.cc
+++ b/src/dsp/x86/intrapred_filter_sse4.cc
@@ -64,10 +64,10 @@ constexpr int kDuplicateFirstHalf = 0x44;
// at zero to preserve the sum.
// |pixels| contains p0-p7 in order as shown above.
// |taps_0_1| contains the filter kernels used to predict f0 and f1, and so on.
-inline void Filter4x2_SSE4_1(uint8_t* dst, const ptrdiff_t stride,
- const __m128i& pixels, const __m128i& taps_0_1,
- const __m128i& taps_2_3, const __m128i& taps_4_5,
- const __m128i& taps_6_7) {
+inline void Filter4x2_SSE4_1(uint8_t* LIBGAV1_RESTRICT dst,
+ const ptrdiff_t stride, const __m128i& pixels,
+ const __m128i& taps_0_1, const __m128i& taps_2_3,
+ const __m128i& taps_4_5, const __m128i& taps_6_7) {
const __m128i mul_0_01 = _mm_maddubs_epi16(pixels, taps_0_1);
const __m128i mul_0_23 = _mm_maddubs_epi16(pixels, taps_2_3);
// |output_half| contains 8 partial sums for f0-f7.
@@ -93,10 +93,10 @@ inline void Filter4x2_SSE4_1(uint8_t* dst, const ptrdiff_t stride,
// for successive blocks. This implementation takes advantage of the fact
// that the p5 and p6 for each sub-block come solely from the |left_ptr| buffer,
// using shifts to arrange things to fit reusable shuffle vectors.
-inline void Filter4xH(uint8_t* dest, ptrdiff_t stride,
- const uint8_t* const top_ptr,
- const uint8_t* const left_ptr, FilterIntraPredictor pred,
- const int height) {
+inline void Filter4xH(uint8_t* LIBGAV1_RESTRICT dest, ptrdiff_t stride,
+ const uint8_t* LIBGAV1_RESTRICT const top_ptr,
+ const uint8_t* LIBGAV1_RESTRICT const left_ptr,
+ FilterIntraPredictor pred, const int height) {
// Two filter kernels per vector.
const __m128i taps_0_1 = LoadAligned16(kFilterIntraTaps[pred][0]);
const __m128i taps_2_3 = LoadAligned16(kFilterIntraTaps[pred][2]);
@@ -271,9 +271,10 @@ inline void Filter4xH(uint8_t* dest, ptrdiff_t stride,
}
}
-void FilterIntraPredictor_SSE4_1(void* const dest, ptrdiff_t stride,
- const void* const top_row,
- const void* const left_column,
+void FilterIntraPredictor_SSE4_1(void* LIBGAV1_RESTRICT const dest,
+ ptrdiff_t stride,
+ const void* LIBGAV1_RESTRICT const top_row,
+ const void* LIBGAV1_RESTRICT const left_column,
FilterIntraPredictor pred, const int width,
const int height) {
const auto* const top_ptr = static_cast<const uint8_t*>(top_row);
diff --git a/src/dsp/x86/intrapred_smooth_sse4.cc b/src/dsp/x86/intrapred_smooth_sse4.cc
index de9f551..b53ee8c 100644
--- a/src/dsp/x86/intrapred_smooth_sse4.cc
+++ b/src/dsp/x86/intrapred_smooth_sse4.cc
@@ -38,23 +38,12 @@ namespace {
// to have visibility of the values. This helps reduce loads and in the
// creation of the inverse weights.
constexpr uint8_t kSmoothWeights[] = {
- // block dimension = 4
- 255, 149, 85, 64,
- // block dimension = 8
- 255, 197, 146, 105, 73, 50, 37, 32,
- // block dimension = 16
- 255, 225, 196, 170, 145, 123, 102, 84, 68, 54, 43, 33, 26, 20, 17, 16,
- // block dimension = 32
- 255, 240, 225, 210, 196, 182, 169, 157, 145, 133, 122, 111, 101, 92, 83, 74,
- 66, 59, 52, 45, 39, 34, 29, 25, 21, 17, 14, 12, 10, 9, 8, 8,
- // block dimension = 64
- 255, 248, 240, 233, 225, 218, 210, 203, 196, 189, 182, 176, 169, 163, 156,
- 150, 144, 138, 133, 127, 121, 116, 111, 106, 101, 96, 91, 86, 82, 77, 73,
- 69, 65, 61, 57, 54, 50, 47, 44, 41, 38, 35, 32, 29, 27, 25, 22, 20, 18, 16,
- 15, 13, 12, 10, 9, 8, 7, 6, 6, 5, 5, 4, 4, 4};
+#include "src/dsp/smooth_weights.inc"
+};
template <int y_mask>
-inline void WriteSmoothHorizontalSum4(void* const dest, const __m128i& left,
+inline void WriteSmoothHorizontalSum4(void* LIBGAV1_RESTRICT const dest,
+ const __m128i& left,
const __m128i& weights,
const __m128i& scaled_top_right,
const __m128i& round) {
@@ -77,7 +66,8 @@ inline __m128i SmoothDirectionalSum8(const __m128i& pixels,
return _mm_add_epi16(scaled_corner, weighted_px);
}
-inline void WriteSmoothDirectionalSum8(uint8_t* dest, const __m128i& pixels,
+inline void WriteSmoothDirectionalSum8(uint8_t* LIBGAV1_RESTRICT dest,
+ const __m128i& pixels,
const __m128i& weights,
const __m128i& scaled_corner,
const __m128i& round) {
@@ -91,13 +81,11 @@ inline void WriteSmoothDirectionalSum8(uint8_t* dest, const __m128i& pixels,
// For Horizontal, pixels1 and pixels2 are the same repeated value. For
// Vertical, weights1 and weights2 are the same, and scaled_corner1 and
// scaled_corner2 are the same.
-inline void WriteSmoothDirectionalSum16(uint8_t* dest, const __m128i& pixels1,
- const __m128i& pixels2,
- const __m128i& weights1,
- const __m128i& weights2,
- const __m128i& scaled_corner1,
- const __m128i& scaled_corner2,
- const __m128i& round) {
+inline void WriteSmoothDirectionalSum16(
+ uint8_t* LIBGAV1_RESTRICT dest, const __m128i& pixels1,
+ const __m128i& pixels2, const __m128i& weights1, const __m128i& weights2,
+ const __m128i& scaled_corner1, const __m128i& scaled_corner2,
+ const __m128i& round) {
const __m128i weighted_px1 = _mm_mullo_epi16(pixels1, weights1);
const __m128i weighted_px2 = _mm_mullo_epi16(pixels2, weights2);
const __m128i pred_sum1 = _mm_add_epi16(scaled_corner1, weighted_px1);
@@ -109,8 +97,9 @@ inline void WriteSmoothDirectionalSum16(uint8_t* dest, const __m128i& pixels1,
}
template <int y_mask>
-inline void WriteSmoothPredSum4(uint8_t* const dest, const __m128i& top,
- const __m128i& left, const __m128i& weights_x,
+inline void WriteSmoothPredSum4(uint8_t* LIBGAV1_RESTRICT const dest,
+ const __m128i& top, const __m128i& left,
+ const __m128i& weights_x,
const __m128i& weights_y,
const __m128i& scaled_bottom_left,
const __m128i& scaled_top_right,
@@ -135,7 +124,8 @@ inline void WriteSmoothPredSum4(uint8_t* const dest, const __m128i& top,
// pixels[0]: above and below_pred interleave vector
// pixels[1]: left vector
// pixels[2]: right_pred vector
-inline void LoadSmoothPixels4(const uint8_t* above, const uint8_t* left,
+inline void LoadSmoothPixels4(const uint8_t* LIBGAV1_RESTRICT above,
+ const uint8_t* LIBGAV1_RESTRICT left,
const int height, __m128i* pixels) {
if (height == 4) {
pixels[1] = Load4(left);
@@ -156,8 +146,9 @@ inline void LoadSmoothPixels4(const uint8_t* above, const uint8_t* left,
// weight_h[2]: same as [0], second half for height = 16 only
// weight_h[3]: same as [1], second half for height = 16 only
// weight_w[0]: weights_w and scale - weights_w interleave vector
-inline void LoadSmoothWeights4(const uint8_t* weight_array, const int height,
- __m128i* weight_h, __m128i* weight_w) {
+inline void LoadSmoothWeights4(const uint8_t* LIBGAV1_RESTRICT weight_array,
+ const int height, __m128i* weight_h,
+ __m128i* weight_w) {
const __m128i scale = _mm_set1_epi16(256);
const __m128i x_weights = Load4(weight_array);
weight_h[0] = _mm_cvtepu8_epi16(x_weights);
@@ -179,7 +170,8 @@ inline void LoadSmoothWeights4(const uint8_t* weight_array, const int height,
}
inline void WriteSmoothPred4x8(const __m128i* pixel, const __m128i* weights_y,
- const __m128i* weight_x, uint8_t* dst,
+ const __m128i* weight_x,
+ uint8_t* LIBGAV1_RESTRICT dst,
const ptrdiff_t stride,
const bool use_second_half) {
const __m128i round = _mm_set1_epi32(256);
@@ -215,8 +207,9 @@ inline void WriteSmoothPred4x8(const __m128i* pixel, const __m128i* weights_y,
// The interleaving approach has some overhead that causes it to underperform in
// the 4x4 case.
-void Smooth4x4_SSE4_1(void* const dest, const ptrdiff_t stride,
- const void* top_row, const void* left_column) {
+void Smooth4x4_SSE4_1(void* LIBGAV1_RESTRICT const dest, const ptrdiff_t stride,
+ const void* LIBGAV1_RESTRICT top_row,
+ const void* LIBGAV1_RESTRICT left_column) {
const __m128i top = _mm_cvtepu8_epi32(Load4(top_row));
const __m128i left = _mm_cvtepu8_epi32(Load4(left_column));
const __m128i weights = _mm_cvtepu8_epi32(Load4(kSmoothWeights));
@@ -247,8 +240,9 @@ void Smooth4x4_SSE4_1(void* const dest, const ptrdiff_t stride,
scaled_bottom_left, scaled_top_right, scale);
}
-void Smooth4x8_SSE4_1(void* const dest, const ptrdiff_t stride,
- const void* top_row, const void* left_column) {
+void Smooth4x8_SSE4_1(void* LIBGAV1_RESTRICT const dest, const ptrdiff_t stride,
+ const void* LIBGAV1_RESTRICT top_row,
+ const void* LIBGAV1_RESTRICT left_column) {
const auto* const left_ptr = static_cast<const uint8_t*>(left_column);
const auto* const top_ptr = static_cast<const uint8_t*>(top_row);
__m128i weights_x[1];
@@ -260,8 +254,10 @@ void Smooth4x8_SSE4_1(void* const dest, const ptrdiff_t stride,
WriteSmoothPred4x8(pixels, weights_y, weights_x, dst, stride, false);
}
-void Smooth4x16_SSE4_1(void* const dest, const ptrdiff_t stride,
- const void* top_row, const void* left_column) {
+void Smooth4x16_SSE4_1(void* LIBGAV1_RESTRICT const dest,
+ const ptrdiff_t stride,
+ const void* LIBGAV1_RESTRICT top_row,
+ const void* LIBGAV1_RESTRICT left_column) {
const auto* const left_ptr = static_cast<const uint8_t*>(left_column);
const auto* const top_ptr = static_cast<const uint8_t*>(top_row);
__m128i weights_x[1];
@@ -283,7 +279,8 @@ void Smooth4x16_SSE4_1(void* const dest, const ptrdiff_t stride,
// pixels[5]: above and below_pred interleave vector, second half
// pixels[6]: left vector + 16
// pixels[7]: right_pred vector
-inline void LoadSmoothPixels8(const uint8_t* above, const uint8_t* left,
+inline void LoadSmoothPixels8(const uint8_t* LIBGAV1_RESTRICT above,
+ const uint8_t* LIBGAV1_RESTRICT left,
const int height, __m128i* pixels) {
const __m128i bottom_left = _mm_set1_epi16(left[height - 1]);
__m128i top_row = _mm_cvtepu8_epi16(LoadLo8(above));
@@ -317,8 +314,9 @@ inline void LoadSmoothPixels8(const uint8_t* above, const uint8_t* left,
// weight_h[7]: same as [1], offset 24
// weight_w[0]: weights_w and scale - weights_w interleave vector, first half
// weight_w[1]: weights_w and scale - weights_w interleave vector, second half
-inline void LoadSmoothWeights8(const uint8_t* weight_array, const int height,
- __m128i* weight_w, __m128i* weight_h) {
+inline void LoadSmoothWeights8(const uint8_t* LIBGAV1_RESTRICT weight_array,
+ const int height, __m128i* weight_w,
+ __m128i* weight_h) {
const int offset = (height < 8) ? 0 : 4;
__m128i loaded_weights = LoadUnaligned16(&weight_array[offset]);
weight_h[0] = _mm_cvtepu8_epi16(loaded_weights);
@@ -360,7 +358,8 @@ inline void LoadSmoothWeights8(const uint8_t* weight_array, const int height,
inline void WriteSmoothPred8xH(const __m128i* pixels, const __m128i* weights_x,
const __m128i* weights_y, const int height,
- uint8_t* dst, const ptrdiff_t stride,
+ uint8_t* LIBGAV1_RESTRICT dst,
+ const ptrdiff_t stride,
const bool use_second_half) {
const __m128i round = _mm_set1_epi32(256);
const __m128i mask_increment = _mm_set1_epi16(0x0202);
@@ -405,8 +404,9 @@ inline void WriteSmoothPred8xH(const __m128i* pixels, const __m128i* weights_x,
}
}
-void Smooth8x4_SSE4_1(void* const dest, const ptrdiff_t stride,
- const void* top_row, const void* left_column) {
+void Smooth8x4_SSE4_1(void* LIBGAV1_RESTRICT const dest, const ptrdiff_t stride,
+ const void* LIBGAV1_RESTRICT top_row,
+ const void* LIBGAV1_RESTRICT left_column) {
const auto* const left_ptr = static_cast<const uint8_t*>(left_column);
const auto* const top_ptr = static_cast<const uint8_t*>(top_row);
__m128i pixels[4];
@@ -419,8 +419,9 @@ void Smooth8x4_SSE4_1(void* const dest, const ptrdiff_t stride,
WriteSmoothPred8xH(pixels, weights_x, weights_y, 4, dst, stride, false);
}
-void Smooth8x8_SSE4_1(void* const dest, const ptrdiff_t stride,
- const void* top_row, const void* left_column) {
+void Smooth8x8_SSE4_1(void* LIBGAV1_RESTRICT const dest, const ptrdiff_t stride,
+ const void* LIBGAV1_RESTRICT top_row,
+ const void* LIBGAV1_RESTRICT left_column) {
const auto* const left_ptr = static_cast<const uint8_t*>(left_column);
const auto* const top_ptr = static_cast<const uint8_t*>(top_row);
@@ -434,8 +435,10 @@ void Smooth8x8_SSE4_1(void* const dest, const ptrdiff_t stride,
WriteSmoothPred8xH(pixels, weights_x, weights_y, 8, dst, stride, false);
}
-void Smooth8x16_SSE4_1(void* const dest, const ptrdiff_t stride,
- const void* top_row, const void* left_column) {
+void Smooth8x16_SSE4_1(void* LIBGAV1_RESTRICT const dest,
+ const ptrdiff_t stride,
+ const void* LIBGAV1_RESTRICT top_row,
+ const void* LIBGAV1_RESTRICT left_column) {
const auto* const left_ptr = static_cast<const uint8_t*>(left_column);
const auto* const top_ptr = static_cast<const uint8_t*>(top_row);
__m128i pixels[4];
@@ -450,8 +453,10 @@ void Smooth8x16_SSE4_1(void* const dest, const ptrdiff_t stride,
WriteSmoothPred8xH(pixels, weights_x, &weights_y[2], 8, dst, stride, true);
}
-void Smooth8x32_SSE4_1(void* const dest, const ptrdiff_t stride,
- const void* top_row, const void* left_column) {
+void Smooth8x32_SSE4_1(void* LIBGAV1_RESTRICT const dest,
+ const ptrdiff_t stride,
+ const void* LIBGAV1_RESTRICT top_row,
+ const void* LIBGAV1_RESTRICT left_column) {
const auto* const left_ptr = static_cast<const uint8_t*>(left_column);
const auto* const top_ptr = static_cast<const uint8_t*>(top_row);
__m128i pixels[8];
@@ -473,8 +478,9 @@ void Smooth8x32_SSE4_1(void* const dest, const ptrdiff_t stride,
}
template <int width, int height>
-void SmoothWxH(void* const dest, const ptrdiff_t stride,
- const void* const top_row, const void* const left_column) {
+void SmoothWxH(void* LIBGAV1_RESTRICT const dest, const ptrdiff_t stride,
+ const void* LIBGAV1_RESTRICT const top_row,
+ const void* LIBGAV1_RESTRICT const left_column) {
const auto* const left_ptr = static_cast<const uint8_t*>(left_column);
const auto* const top_ptr = static_cast<const uint8_t*>(top_row);
const uint8_t* const sm_weights_h = kSmoothWeights + height - 4;
@@ -532,8 +538,10 @@ void SmoothWxH(void* const dest, const ptrdiff_t stride,
}
}
-void SmoothHorizontal4x4_SSE4_1(void* dest, const ptrdiff_t stride,
- const void* top_row, const void* left_column) {
+void SmoothHorizontal4x4_SSE4_1(void* LIBGAV1_RESTRICT dest,
+ const ptrdiff_t stride,
+ const void* LIBGAV1_RESTRICT top_row,
+ const void* LIBGAV1_RESTRICT left_column) {
const auto* const top_ptr = static_cast<const uint8_t*>(top_row);
const __m128i top_right = _mm_set1_epi32(top_ptr[3]);
const auto* const left_ptr = static_cast<const uint8_t*>(left_column);
@@ -553,9 +561,10 @@ void SmoothHorizontal4x4_SSE4_1(void* dest, const ptrdiff_t stride,
WriteSmoothHorizontalSum4<0xFF>(dst, left, weights, scaled_top_right, scale);
}
-void SmoothHorizontal4x8_SSE4_1(void* const dest, const ptrdiff_t stride,
- const void* const top_row,
- const void* const left_column) {
+void SmoothHorizontal4x8_SSE4_1(
+ void* LIBGAV1_RESTRICT const dest, const ptrdiff_t stride,
+ const void* LIBGAV1_RESTRICT const top_row,
+ const void* LIBGAV1_RESTRICT const left_column) {
const auto* const top = static_cast<const uint8_t*>(top_row);
const __m128i top_right = _mm_set1_epi32(top[3]);
const __m128i weights = _mm_cvtepu8_epi32(Load4(kSmoothWeights));
@@ -585,9 +594,10 @@ void SmoothHorizontal4x8_SSE4_1(void* const dest, const ptrdiff_t stride,
WriteSmoothHorizontalSum4<0xFF>(dst, left, weights, scaled_top_right, scale);
}
-void SmoothHorizontal4x16_SSE4_1(void* const dest, const ptrdiff_t stride,
- const void* const top_row,
- const void* const left_column) {
+void SmoothHorizontal4x16_SSE4_1(
+ void* LIBGAV1_RESTRICT const dest, const ptrdiff_t stride,
+ const void* LIBGAV1_RESTRICT const top_row,
+ const void* LIBGAV1_RESTRICT const left_column) {
const auto* const top = static_cast<const uint8_t*>(top_row);
const __m128i top_right = _mm_set1_epi32(top[3]);
const __m128i weights = _mm_cvtepu8_epi32(Load4(kSmoothWeights));
@@ -637,9 +647,10 @@ void SmoothHorizontal4x16_SSE4_1(void* const dest, const ptrdiff_t stride,
WriteSmoothHorizontalSum4<0xFF>(dst, left, weights, scaled_top_right, scale);
}
-void SmoothHorizontal8x4_SSE4_1(void* const dest, const ptrdiff_t stride,
- const void* const top_row,
- const void* const left_column) {
+void SmoothHorizontal8x4_SSE4_1(
+ void* LIBGAV1_RESTRICT const dest, const ptrdiff_t stride,
+ const void* LIBGAV1_RESTRICT const top_row,
+ const void* LIBGAV1_RESTRICT const left_column) {
const auto* const top = static_cast<const uint8_t*>(top_row);
const __m128i top_right = _mm_set1_epi16(top[7]);
const __m128i left = _mm_cvtepu8_epi16(Load4(left_column));
@@ -666,9 +677,10 @@ void SmoothHorizontal8x4_SSE4_1(void* const dest, const ptrdiff_t stride,
WriteSmoothDirectionalSum8(dst, left_y, weights, scaled_top_right, scale);
}
-void SmoothHorizontal8x8_SSE4_1(void* const dest, const ptrdiff_t stride,
- const void* const top_row,
- const void* const left_column) {
+void SmoothHorizontal8x8_SSE4_1(
+ void* LIBGAV1_RESTRICT const dest, const ptrdiff_t stride,
+ const void* LIBGAV1_RESTRICT const top_row,
+ const void* LIBGAV1_RESTRICT const left_column) {
const auto* const top = static_cast<const uint8_t*>(top_row);
const __m128i top_right = _mm_set1_epi16(top[7]);
const __m128i left = _mm_cvtepu8_epi16(LoadLo8(left_column));
@@ -686,9 +698,10 @@ void SmoothHorizontal8x8_SSE4_1(void* const dest, const ptrdiff_t stride,
}
}
-void SmoothHorizontal8x16_SSE4_1(void* const dest, const ptrdiff_t stride,
- const void* const top_row,
- const void* const left_column) {
+void SmoothHorizontal8x16_SSE4_1(
+ void* LIBGAV1_RESTRICT const dest, const ptrdiff_t stride,
+ const void* LIBGAV1_RESTRICT const top_row,
+ const void* LIBGAV1_RESTRICT const left_column) {
const auto* const top = static_cast<const uint8_t*>(top_row);
const __m128i top_right = _mm_set1_epi16(top[7]);
const __m128i weights = _mm_cvtepu8_epi16(LoadLo8(kSmoothWeights + 4));
@@ -714,9 +727,10 @@ void SmoothHorizontal8x16_SSE4_1(void* const dest, const ptrdiff_t stride,
}
}
-void SmoothHorizontal8x32_SSE4_1(void* const dest, const ptrdiff_t stride,
- const void* const top_row,
- const void* const left_column) {
+void SmoothHorizontal8x32_SSE4_1(
+ void* LIBGAV1_RESTRICT const dest, const ptrdiff_t stride,
+ const void* LIBGAV1_RESTRICT const top_row,
+ const void* LIBGAV1_RESTRICT const left_column) {
const auto* const top = static_cast<const uint8_t*>(top_row);
const __m128i top_right = _mm_set1_epi16(top[7]);
const __m128i weights = _mm_cvtepu8_epi16(LoadLo8(kSmoothWeights + 4));
@@ -756,9 +770,10 @@ void SmoothHorizontal8x32_SSE4_1(void* const dest, const ptrdiff_t stride,
}
}
-void SmoothHorizontal16x4_SSE4_1(void* const dest, const ptrdiff_t stride,
- const void* const top_row,
- const void* const left_column) {
+void SmoothHorizontal16x4_SSE4_1(
+ void* LIBGAV1_RESTRICT const dest, const ptrdiff_t stride,
+ const void* LIBGAV1_RESTRICT const top_row,
+ const void* LIBGAV1_RESTRICT const left_column) {
const auto* const top = static_cast<const uint8_t*>(top_row);
const __m128i top_right = _mm_set1_epi16(top[15]);
const __m128i left = _mm_cvtepu8_epi16(Load4(left_column));
@@ -795,9 +810,10 @@ void SmoothHorizontal16x4_SSE4_1(void* const dest, const ptrdiff_t stride,
scaled_top_right1, scaled_top_right2, scale);
}
-void SmoothHorizontal16x8_SSE4_1(void* const dest, const ptrdiff_t stride,
- const void* const top_row,
- const void* const left_column) {
+void SmoothHorizontal16x8_SSE4_1(
+ void* LIBGAV1_RESTRICT const dest, const ptrdiff_t stride,
+ const void* LIBGAV1_RESTRICT const top_row,
+ const void* LIBGAV1_RESTRICT const left_column) {
const auto* const top = static_cast<const uint8_t*>(top_row);
const __m128i top_right = _mm_set1_epi16(top[15]);
const __m128i left = _mm_cvtepu8_epi16(LoadLo8(left_column));
@@ -822,9 +838,10 @@ void SmoothHorizontal16x8_SSE4_1(void* const dest, const ptrdiff_t stride,
}
}
-void SmoothHorizontal16x16_SSE4_1(void* const dest, const ptrdiff_t stride,
- const void* const top_row,
- const void* const left_column) {
+void SmoothHorizontal16x16_SSE4_1(
+ void* LIBGAV1_RESTRICT const dest, const ptrdiff_t stride,
+ const void* LIBGAV1_RESTRICT const top_row,
+ const void* LIBGAV1_RESTRICT const left_column) {
const auto* const top = static_cast<const uint8_t*>(top_row);
const __m128i top_right = _mm_set1_epi16(top[15]);
const __m128i weights = LoadUnaligned16(kSmoothWeights + 12);
@@ -858,9 +875,10 @@ void SmoothHorizontal16x16_SSE4_1(void* const dest, const ptrdiff_t stride,
}
}
-void SmoothHorizontal16x32_SSE4_1(void* const dest, const ptrdiff_t stride,
- const void* const top_row,
- const void* const left_column) {
+void SmoothHorizontal16x32_SSE4_1(
+ void* LIBGAV1_RESTRICT const dest, const ptrdiff_t stride,
+ const void* LIBGAV1_RESTRICT const top_row,
+ const void* LIBGAV1_RESTRICT const left_column) {
const auto* const top = static_cast<const uint8_t*>(top_row);
const __m128i top_right = _mm_set1_epi16(top[15]);
const __m128i weights = LoadUnaligned16(kSmoothWeights + 12);
@@ -910,9 +928,10 @@ void SmoothHorizontal16x32_SSE4_1(void* const dest, const ptrdiff_t stride,
}
}
-void SmoothHorizontal16x64_SSE4_1(void* const dest, const ptrdiff_t stride,
- const void* const top_row,
- const void* const left_column) {
+void SmoothHorizontal16x64_SSE4_1(
+ void* LIBGAV1_RESTRICT const dest, const ptrdiff_t stride,
+ const void* LIBGAV1_RESTRICT const top_row,
+ const void* LIBGAV1_RESTRICT const left_column) {
const auto* const top = static_cast<const uint8_t*>(top_row);
const __m128i top_right = _mm_set1_epi16(top[15]);
const __m128i weights = LoadUnaligned16(kSmoothWeights + 12);
@@ -940,9 +959,10 @@ void SmoothHorizontal16x64_SSE4_1(void* const dest, const ptrdiff_t stride,
}
}
-void SmoothHorizontal32x8_SSE4_1(void* const dest, const ptrdiff_t stride,
- const void* const top_row,
- const void* const left_column) {
+void SmoothHorizontal32x8_SSE4_1(
+ void* LIBGAV1_RESTRICT const dest, const ptrdiff_t stride,
+ const void* LIBGAV1_RESTRICT const top_row,
+ const void* LIBGAV1_RESTRICT const left_column) {
const auto* const top = static_cast<const uint8_t*>(top_row);
const __m128i top_right = _mm_set1_epi16(top[31]);
const __m128i left = _mm_cvtepu8_epi16(LoadLo8(left_column));
@@ -978,9 +998,10 @@ void SmoothHorizontal32x8_SSE4_1(void* const dest, const ptrdiff_t stride,
}
}
-void SmoothHorizontal32x16_SSE4_1(void* const dest, const ptrdiff_t stride,
- const void* const top_row,
- const void* const left_column) {
+void SmoothHorizontal32x16_SSE4_1(
+ void* LIBGAV1_RESTRICT const dest, const ptrdiff_t stride,
+ const void* LIBGAV1_RESTRICT const top_row,
+ const void* LIBGAV1_RESTRICT const left_column) {
const auto* const top = static_cast<const uint8_t*>(top_row);
const __m128i top_right = _mm_set1_epi16(top[31]);
const __m128i left1 = _mm_cvtepu8_epi16(LoadLo8(left_column));
@@ -1027,9 +1048,10 @@ void SmoothHorizontal32x16_SSE4_1(void* const dest, const ptrdiff_t stride,
}
}
-void SmoothHorizontal32x32_SSE4_1(void* const dest, const ptrdiff_t stride,
- const void* const top_row,
- const void* const left_column) {
+void SmoothHorizontal32x32_SSE4_1(
+ void* LIBGAV1_RESTRICT const dest, const ptrdiff_t stride,
+ const void* LIBGAV1_RESTRICT const top_row,
+ const void* LIBGAV1_RESTRICT const left_column) {
const auto* const top = static_cast<const uint8_t*>(top_row);
const __m128i top_right = _mm_set1_epi16(top[31]);
const __m128i weights_lo = LoadUnaligned16(kSmoothWeights + 28);
@@ -1096,9 +1118,10 @@ void SmoothHorizontal32x32_SSE4_1(void* const dest, const ptrdiff_t stride,
}
}
-void SmoothHorizontal32x64_SSE4_1(void* const dest, const ptrdiff_t stride,
- const void* const top_row,
- const void* const left_column) {
+void SmoothHorizontal32x64_SSE4_1(
+ void* LIBGAV1_RESTRICT const dest, const ptrdiff_t stride,
+ const void* LIBGAV1_RESTRICT const top_row,
+ const void* LIBGAV1_RESTRICT const left_column) {
const auto* const top = static_cast<const uint8_t*>(top_row);
const __m128i top_right = _mm_set1_epi16(top[31]);
const __m128i weights_lo = LoadUnaligned16(kSmoothWeights + 28);
@@ -1137,9 +1160,10 @@ void SmoothHorizontal32x64_SSE4_1(void* const dest, const ptrdiff_t stride,
}
}
-void SmoothHorizontal64x16_SSE4_1(void* const dest, const ptrdiff_t stride,
- const void* const top_row,
- const void* const left_column) {
+void SmoothHorizontal64x16_SSE4_1(
+ void* LIBGAV1_RESTRICT const dest, const ptrdiff_t stride,
+ const void* LIBGAV1_RESTRICT const top_row,
+ const void* LIBGAV1_RESTRICT const left_column) {
const auto* const top = static_cast<const uint8_t*>(top_row);
const __m128i top_right = _mm_set1_epi16(top[63]);
const __m128i left1 = _mm_cvtepu8_epi16(LoadLo8(left_column));
@@ -1212,9 +1236,10 @@ void SmoothHorizontal64x16_SSE4_1(void* const dest, const ptrdiff_t stride,
}
}
-void SmoothHorizontal64x32_SSE4_1(void* const dest, const ptrdiff_t stride,
- const void* const top_row,
- const void* const left_column) {
+void SmoothHorizontal64x32_SSE4_1(
+ void* LIBGAV1_RESTRICT const dest, const ptrdiff_t stride,
+ const void* LIBGAV1_RESTRICT const top_row,
+ const void* LIBGAV1_RESTRICT const left_column) {
const auto* const top = static_cast<const uint8_t*>(top_row);
const __m128i top_right = _mm_set1_epi16(top[63]);
const __m128i left1 = _mm_cvtepu8_epi16(LoadLo8(left_column));
@@ -1315,9 +1340,10 @@ void SmoothHorizontal64x32_SSE4_1(void* const dest, const ptrdiff_t stride,
}
}
-void SmoothHorizontal64x64_SSE4_1(void* const dest, const ptrdiff_t stride,
- const void* const top_row,
- const void* const left_column) {
+void SmoothHorizontal64x64_SSE4_1(
+ void* LIBGAV1_RESTRICT const dest, const ptrdiff_t stride,
+ const void* LIBGAV1_RESTRICT const top_row,
+ const void* LIBGAV1_RESTRICT const left_column) {
const auto* const top = static_cast<const uint8_t*>(top_row);
const __m128i top_right = _mm_set1_epi16(top[63]);
const __m128i weights_lolo = LoadUnaligned16(kSmoothWeights + 60);
@@ -1378,7 +1404,8 @@ void SmoothHorizontal64x64_SSE4_1(void* const dest, const ptrdiff_t stride,
}
}
-inline void LoadSmoothVerticalPixels4(const uint8_t* above, const uint8_t* left,
+inline void LoadSmoothVerticalPixels4(const uint8_t* LIBGAV1_RESTRICT above,
+ const uint8_t* LIBGAV1_RESTRICT left,
const int height, __m128i* pixels) {
__m128i top = Load4(above);
const __m128i bottom_left = _mm_set1_epi16(left[height - 1]);
@@ -1390,7 +1417,8 @@ inline void LoadSmoothVerticalPixels4(const uint8_t* above, const uint8_t* left,
// (256-w) counterparts. This is precomputed by the compiler when the weights
// table is visible to this module. Removing this visibility can cut speed by up
// to half in both 4xH and 8xH transforms.
-inline void LoadSmoothVerticalWeights4(const uint8_t* weight_array,
+inline void LoadSmoothVerticalWeights4(const uint8_t* LIBGAV1_RESTRICT
+ weight_array,
const int height, __m128i* weights) {
const __m128i inverter = _mm_set1_epi16(256);
@@ -1413,7 +1441,8 @@ inline void LoadSmoothVerticalWeights4(const uint8_t* weight_array,
}
inline void WriteSmoothVertical4xH(const __m128i* pixel, const __m128i* weight,
- const int height, uint8_t* dst,
+ const int height,
+ uint8_t* LIBGAV1_RESTRICT dst,
const ptrdiff_t stride) {
const __m128i pred_round = _mm_set1_epi32(128);
const __m128i mask_increment = _mm_set1_epi16(0x0202);
@@ -1438,9 +1467,10 @@ inline void WriteSmoothVertical4xH(const __m128i* pixel, const __m128i* weight,
}
}
-void SmoothVertical4x4_SSE4_1(void* const dest, const ptrdiff_t stride,
- const void* const top_row,
- const void* const left_column) {
+void SmoothVertical4x4_SSE4_1(void* LIBGAV1_RESTRICT const dest,
+ const ptrdiff_t stride,
+ const void* LIBGAV1_RESTRICT const top_row,
+ const void* LIBGAV1_RESTRICT const left_column) {
const auto* const left = static_cast<const uint8_t*>(left_column);
const auto* const above = static_cast<const uint8_t*>(top_row);
auto* dst = static_cast<uint8_t*>(dest);
@@ -1453,9 +1483,10 @@ void SmoothVertical4x4_SSE4_1(void* const dest, const ptrdiff_t stride,
WriteSmoothVertical4xH(&pixels, weights, 4, dst, stride);
}
-void SmoothVertical4x8_SSE4_1(void* const dest, const ptrdiff_t stride,
- const void* const top_row,
- const void* const left_column) {
+void SmoothVertical4x8_SSE4_1(void* LIBGAV1_RESTRICT const dest,
+ const ptrdiff_t stride,
+ const void* LIBGAV1_RESTRICT const top_row,
+ const void* LIBGAV1_RESTRICT const left_column) {
const auto* const left = static_cast<const uint8_t*>(left_column);
const auto* const above = static_cast<const uint8_t*>(top_row);
auto* dst = static_cast<uint8_t*>(dest);
@@ -1468,9 +1499,10 @@ void SmoothVertical4x8_SSE4_1(void* const dest, const ptrdiff_t stride,
WriteSmoothVertical4xH(&pixels, weights, 8, dst, stride);
}
-void SmoothVertical4x16_SSE4_1(void* const dest, const ptrdiff_t stride,
- const void* const top_row,
- const void* const left_column) {
+void SmoothVertical4x16_SSE4_1(void* LIBGAV1_RESTRICT const dest,
+ const ptrdiff_t stride,
+ const void* LIBGAV1_RESTRICT const top_row,
+ const void* LIBGAV1_RESTRICT const left_column) {
const auto* const left = static_cast<const uint8_t*>(left_column);
const auto* const above = static_cast<const uint8_t*>(top_row);
auto* dst = static_cast<uint8_t*>(dest);
@@ -1485,9 +1517,10 @@ void SmoothVertical4x16_SSE4_1(void* const dest, const ptrdiff_t stride,
WriteSmoothVertical4xH(&pixels, &weights[2], 8, dst, stride);
}
-void SmoothVertical8x4_SSE4_1(void* const dest, const ptrdiff_t stride,
- const void* const top_row,
- const void* const left_column) {
+void SmoothVertical8x4_SSE4_1(void* LIBGAV1_RESTRICT const dest,
+ const ptrdiff_t stride,
+ const void* LIBGAV1_RESTRICT const top_row,
+ const void* LIBGAV1_RESTRICT const left_column) {
const auto* const left_ptr = static_cast<const uint8_t*>(left_column);
const __m128i bottom_left = _mm_set1_epi16(left_ptr[3]);
const __m128i weights = _mm_cvtepu8_epi16(Load4(kSmoothWeights));
@@ -1520,9 +1553,10 @@ void SmoothVertical8x4_SSE4_1(void* const dest, const ptrdiff_t stride,
WriteSmoothDirectionalSum8(dst, top, weights_y, scaled_bottom_left_y, scale);
}
-void SmoothVertical8x8_SSE4_1(void* const dest, const ptrdiff_t stride,
- const void* const top_row,
- const void* const left_column) {
+void SmoothVertical8x8_SSE4_1(void* LIBGAV1_RESTRICT const dest,
+ const ptrdiff_t stride,
+ const void* LIBGAV1_RESTRICT const top_row,
+ const void* LIBGAV1_RESTRICT const left_column) {
const auto* const left_ptr = static_cast<const uint8_t*>(left_column);
const __m128i bottom_left = _mm_set1_epi16(left_ptr[7]);
const __m128i weights = _mm_cvtepu8_epi16(LoadLo8(kSmoothWeights + 4));
@@ -1544,9 +1578,10 @@ void SmoothVertical8x8_SSE4_1(void* const dest, const ptrdiff_t stride,
}
}
-void SmoothVertical8x16_SSE4_1(void* const dest, const ptrdiff_t stride,
- const void* const top_row,
- const void* const left_column) {
+void SmoothVertical8x16_SSE4_1(void* LIBGAV1_RESTRICT const dest,
+ const ptrdiff_t stride,
+ const void* LIBGAV1_RESTRICT const top_row,
+ const void* LIBGAV1_RESTRICT const left_column) {
const auto* const left_ptr = static_cast<const uint8_t*>(left_column);
const __m128i bottom_left = _mm_set1_epi16(left_ptr[15]);
const __m128i weights = LoadUnaligned16(kSmoothWeights + 12);
@@ -1583,9 +1618,10 @@ void SmoothVertical8x16_SSE4_1(void* const dest, const ptrdiff_t stride,
}
}
-void SmoothVertical8x32_SSE4_1(void* const dest, const ptrdiff_t stride,
- const void* const top_row,
- const void* const left_column) {
+void SmoothVertical8x32_SSE4_1(void* LIBGAV1_RESTRICT const dest,
+ const ptrdiff_t stride,
+ const void* LIBGAV1_RESTRICT const top_row,
+ const void* LIBGAV1_RESTRICT const left_column) {
const auto* const left_ptr = static_cast<const uint8_t*>(left_column);
const __m128i zero = _mm_setzero_si128();
const __m128i bottom_left = _mm_set1_epi16(left_ptr[31]);
@@ -1649,9 +1685,10 @@ void SmoothVertical8x32_SSE4_1(void* const dest, const ptrdiff_t stride,
}
}
-void SmoothVertical16x4_SSE4_1(void* const dest, const ptrdiff_t stride,
- const void* const top_row,
- const void* const left_column) {
+void SmoothVertical16x4_SSE4_1(void* LIBGAV1_RESTRICT const dest,
+ const ptrdiff_t stride,
+ const void* LIBGAV1_RESTRICT const top_row,
+ const void* LIBGAV1_RESTRICT const left_column) {
const auto* const left_ptr = static_cast<const uint8_t*>(left_column);
auto* dst = static_cast<uint8_t*>(dest);
const __m128i bottom_left = _mm_set1_epi16(left_ptr[3]);
@@ -1694,9 +1731,10 @@ void SmoothVertical16x4_SSE4_1(void* const dest, const ptrdiff_t stride,
scale);
}
-void SmoothVertical16x8_SSE4_1(void* const dest, const ptrdiff_t stride,
- const void* const top_row,
- const void* const left_column) {
+void SmoothVertical16x8_SSE4_1(void* LIBGAV1_RESTRICT const dest,
+ const ptrdiff_t stride,
+ const void* LIBGAV1_RESTRICT const top_row,
+ const void* LIBGAV1_RESTRICT const left_column) {
const auto* const left_ptr = static_cast<const uint8_t*>(left_column);
auto* dst = static_cast<uint8_t*>(dest);
const __m128i bottom_left = _mm_set1_epi16(left_ptr[7]);
@@ -1722,9 +1760,10 @@ void SmoothVertical16x8_SSE4_1(void* const dest, const ptrdiff_t stride,
}
}
-void SmoothVertical16x16_SSE4_1(void* const dest, const ptrdiff_t stride,
- const void* const top_row,
- const void* const left_column) {
+void SmoothVertical16x16_SSE4_1(
+ void* LIBGAV1_RESTRICT const dest, const ptrdiff_t stride,
+ const void* LIBGAV1_RESTRICT const top_row,
+ const void* LIBGAV1_RESTRICT const left_column) {
const auto* const left_ptr = static_cast<const uint8_t*>(left_column);
auto* dst = static_cast<uint8_t*>(dest);
const __m128i bottom_left = _mm_set1_epi16(left_ptr[15]);
@@ -1766,9 +1805,10 @@ void SmoothVertical16x16_SSE4_1(void* const dest, const ptrdiff_t stride,
}
}
-void SmoothVertical16x32_SSE4_1(void* const dest, const ptrdiff_t stride,
- const void* const top_row,
- const void* const left_column) {
+void SmoothVertical16x32_SSE4_1(
+ void* LIBGAV1_RESTRICT const dest, const ptrdiff_t stride,
+ const void* LIBGAV1_RESTRICT const top_row,
+ const void* LIBGAV1_RESTRICT const left_column) {
const auto* const left_ptr = static_cast<const uint8_t*>(left_column);
auto* dst = static_cast<uint8_t*>(dest);
const __m128i bottom_left = _mm_set1_epi16(left_ptr[31]);
@@ -1839,9 +1879,10 @@ void SmoothVertical16x32_SSE4_1(void* const dest, const ptrdiff_t stride,
}
}
-void SmoothVertical16x64_SSE4_1(void* const dest, const ptrdiff_t stride,
- const void* const top_row,
- const void* const left_column) {
+void SmoothVertical16x64_SSE4_1(
+ void* LIBGAV1_RESTRICT const dest, const ptrdiff_t stride,
+ const void* LIBGAV1_RESTRICT const top_row,
+ const void* LIBGAV1_RESTRICT const left_column) {
const auto* const left_ptr = static_cast<const uint8_t*>(left_column);
auto* dst = static_cast<uint8_t*>(dest);
const __m128i bottom_left = _mm_set1_epi16(left_ptr[63]);
@@ -1887,9 +1928,10 @@ void SmoothVertical16x64_SSE4_1(void* const dest, const ptrdiff_t stride,
}
}
-void SmoothVertical32x8_SSE4_1(void* const dest, const ptrdiff_t stride,
- const void* const top_row,
- const void* const left_column) {
+void SmoothVertical32x8_SSE4_1(void* LIBGAV1_RESTRICT const dest,
+ const ptrdiff_t stride,
+ const void* LIBGAV1_RESTRICT const top_row,
+ const void* LIBGAV1_RESTRICT const left_column) {
const auto* const left_ptr = static_cast<const uint8_t*>(left_column);
const auto* const top_ptr = static_cast<const uint8_t*>(top_row);
auto* dst = static_cast<uint8_t*>(dest);
@@ -1922,9 +1964,10 @@ void SmoothVertical32x8_SSE4_1(void* const dest, const ptrdiff_t stride,
}
}
-void SmoothVertical32x16_SSE4_1(void* const dest, const ptrdiff_t stride,
- const void* const top_row,
- const void* const left_column) {
+void SmoothVertical32x16_SSE4_1(
+ void* LIBGAV1_RESTRICT const dest, const ptrdiff_t stride,
+ const void* LIBGAV1_RESTRICT const top_row,
+ const void* LIBGAV1_RESTRICT const left_column) {
const auto* const left_ptr = static_cast<const uint8_t*>(left_column);
const auto* const top_ptr = static_cast<const uint8_t*>(top_row);
auto* dst = static_cast<uint8_t*>(dest);
@@ -1975,9 +2018,10 @@ void SmoothVertical32x16_SSE4_1(void* const dest, const ptrdiff_t stride,
}
}
-void SmoothVertical32x32_SSE4_1(void* const dest, const ptrdiff_t stride,
- const void* const top_row,
- const void* const left_column) {
+void SmoothVertical32x32_SSE4_1(
+ void* LIBGAV1_RESTRICT const dest, const ptrdiff_t stride,
+ const void* LIBGAV1_RESTRICT const top_row,
+ const void* LIBGAV1_RESTRICT const left_column) {
const auto* const left_ptr = static_cast<const uint8_t*>(left_column);
auto* dst = static_cast<uint8_t*>(dest);
const auto* const top_ptr = static_cast<const uint8_t*>(top_row);
@@ -2063,9 +2107,10 @@ void SmoothVertical32x32_SSE4_1(void* const dest, const ptrdiff_t stride,
}
}
-void SmoothVertical32x64_SSE4_1(void* const dest, const ptrdiff_t stride,
- const void* const top_row,
- const void* const left_column) {
+void SmoothVertical32x64_SSE4_1(
+ void* LIBGAV1_RESTRICT const dest, const ptrdiff_t stride,
+ const void* LIBGAV1_RESTRICT const top_row,
+ const void* LIBGAV1_RESTRICT const left_column) {
const auto* const left_ptr = static_cast<const uint8_t*>(left_column);
auto* dst = static_cast<uint8_t*>(dest);
const auto* const top_ptr = static_cast<const uint8_t*>(top_row);
@@ -2120,9 +2165,10 @@ void SmoothVertical32x64_SSE4_1(void* const dest, const ptrdiff_t stride,
}
}
-void SmoothVertical64x16_SSE4_1(void* const dest, const ptrdiff_t stride,
- const void* const top_row,
- const void* const left_column) {
+void SmoothVertical64x16_SSE4_1(
+ void* LIBGAV1_RESTRICT const dest, const ptrdiff_t stride,
+ const void* LIBGAV1_RESTRICT const top_row,
+ const void* LIBGAV1_RESTRICT const left_column) {
const auto* const left_ptr = static_cast<const uint8_t*>(left_column);
auto* dst = static_cast<uint8_t*>(dest);
const auto* const top_ptr = static_cast<const uint8_t*>(top_row);
@@ -2192,9 +2238,10 @@ void SmoothVertical64x16_SSE4_1(void* const dest, const ptrdiff_t stride,
}
}
-void SmoothVertical64x32_SSE4_1(void* const dest, const ptrdiff_t stride,
- const void* const top_row,
- const void* const left_column) {
+void SmoothVertical64x32_SSE4_1(
+ void* LIBGAV1_RESTRICT const dest, const ptrdiff_t stride,
+ const void* LIBGAV1_RESTRICT const top_row,
+ const void* LIBGAV1_RESTRICT const left_column) {
const auto* const left_ptr = static_cast<const uint8_t*>(left_column);
auto* dst = static_cast<uint8_t*>(dest);
const auto* const top_ptr = static_cast<const uint8_t*>(top_row);
@@ -2311,9 +2358,10 @@ void SmoothVertical64x32_SSE4_1(void* const dest, const ptrdiff_t stride,
}
}
-void SmoothVertical64x64_SSE4_1(void* const dest, const ptrdiff_t stride,
- const void* const top_row,
- const void* const left_column) {
+void SmoothVertical64x64_SSE4_1(
+ void* LIBGAV1_RESTRICT const dest, const ptrdiff_t stride,
+ const void* LIBGAV1_RESTRICT const top_row,
+ const void* LIBGAV1_RESTRICT const left_column) {
const auto* const left_ptr = static_cast<const uint8_t*>(left_column);
auto* dst = static_cast<uint8_t*>(dest);
const auto* const top_ptr = static_cast<const uint8_t*>(top_row);
diff --git a/src/dsp/x86/intrapred_sse4.cc b/src/dsp/x86/intrapred_sse4.cc
index 063929d..556afed 100644
--- a/src/dsp/x86/intrapred_sse4.cc
+++ b/src/dsp/x86/intrapred_sse4.cc
@@ -90,11 +90,11 @@ struct DirectionalPredFuncs_SSE4_1 {
template <int width_log2, int height_log2, DcSumFunc top_sumfn,
DcSumFunc left_sumfn, DcStoreFunc storefn, int shiftk, int dc_mult>
-void DcPredFuncs_SSE4_1<width_log2, height_log2, top_sumfn, left_sumfn, storefn,
- shiftk, dc_mult>::DcTop(void* const dest,
- ptrdiff_t stride,
- const void* const top_row,
- const void* /*left_column*/) {
+void DcPredFuncs_SSE4_1<
+ width_log2, height_log2, top_sumfn, left_sumfn, storefn, shiftk,
+ dc_mult>::DcTop(void* LIBGAV1_RESTRICT const dest, ptrdiff_t stride,
+ const void* LIBGAV1_RESTRICT const top_row,
+ const void* /*left_column*/) {
const __m128i rounder = _mm_set1_epi32(1 << (width_log2 - 1));
const __m128i sum = top_sumfn(top_row);
const __m128i dc = _mm_srli_epi32(_mm_add_epi32(sum, rounder), width_log2);
@@ -103,11 +103,11 @@ void DcPredFuncs_SSE4_1<width_log2, height_log2, top_sumfn, left_sumfn, storefn,
template <int width_log2, int height_log2, DcSumFunc top_sumfn,
DcSumFunc left_sumfn, DcStoreFunc storefn, int shiftk, int dc_mult>
-void DcPredFuncs_SSE4_1<width_log2, height_log2, top_sumfn, left_sumfn, storefn,
- shiftk,
- dc_mult>::DcLeft(void* const dest, ptrdiff_t stride,
- const void* /*top_row*/,
- const void* const left_column) {
+void DcPredFuncs_SSE4_1<
+ width_log2, height_log2, top_sumfn, left_sumfn, storefn, shiftk,
+ dc_mult>::DcLeft(void* LIBGAV1_RESTRICT const dest, ptrdiff_t stride,
+ const void* /*top_row*/,
+ const void* LIBGAV1_RESTRICT const left_column) {
const __m128i rounder = _mm_set1_epi32(1 << (height_log2 - 1));
const __m128i sum = left_sumfn(left_column);
const __m128i dc = _mm_srli_epi32(_mm_add_epi32(sum, rounder), height_log2);
@@ -116,10 +116,11 @@ void DcPredFuncs_SSE4_1<width_log2, height_log2, top_sumfn, left_sumfn, storefn,
template <int width_log2, int height_log2, DcSumFunc top_sumfn,
DcSumFunc left_sumfn, DcStoreFunc storefn, int shiftk, int dc_mult>
-void DcPredFuncs_SSE4_1<width_log2, height_log2, top_sumfn, left_sumfn, storefn,
- shiftk, dc_mult>::Dc(void* const dest, ptrdiff_t stride,
- const void* const top_row,
- const void* const left_column) {
+void DcPredFuncs_SSE4_1<
+ width_log2, height_log2, top_sumfn, left_sumfn, storefn, shiftk,
+ dc_mult>::Dc(void* LIBGAV1_RESTRICT const dest, ptrdiff_t stride,
+ const void* LIBGAV1_RESTRICT const top_row,
+ const void* LIBGAV1_RESTRICT const left_column) {
const __m128i rounder =
_mm_set1_epi32((1 << (width_log2 - 1)) + (1 << (height_log2 - 1)));
const __m128i sum_top = top_sumfn(top_row);
@@ -141,8 +142,8 @@ void DcPredFuncs_SSE4_1<width_log2, height_log2, top_sumfn, left_sumfn, storefn,
template <ColumnStoreFunc col_storefn>
void DirectionalPredFuncs_SSE4_1<col_storefn>::Horizontal(
- void* const dest, ptrdiff_t stride, const void* /*top_row*/,
- const void* const left_column) {
+ void* LIBGAV1_RESTRICT const dest, ptrdiff_t stride,
+ const void* /*top_row*/, const void* LIBGAV1_RESTRICT const left_column) {
col_storefn(dest, stride, left_column);
}
@@ -384,8 +385,9 @@ inline void WriteDuplicate64x4(void* const dest, ptrdiff_t stride,
// ColStoreN<height> copies each of the |height| values in |column| across its
// corresponding in dest.
template <WriteDuplicateFunc writefn>
-inline void ColStore4_SSE4_1(void* const dest, ptrdiff_t stride,
- const void* const column) {
+inline void ColStore4_SSE4_1(void* LIBGAV1_RESTRICT const dest,
+ ptrdiff_t stride,
+ const void* LIBGAV1_RESTRICT const column) {
const __m128i col_data = Load4(column);
const __m128i col_dup16 = _mm_unpacklo_epi8(col_data, col_data);
const __m128i col_dup32 = _mm_unpacklo_epi16(col_dup16, col_dup16);
@@ -393,8 +395,9 @@ inline void ColStore4_SSE4_1(void* const dest, ptrdiff_t stride,
}
template <WriteDuplicateFunc writefn>
-inline void ColStore8_SSE4_1(void* const dest, ptrdiff_t stride,
- const void* const column) {
+inline void ColStore8_SSE4_1(void* LIBGAV1_RESTRICT const dest,
+ ptrdiff_t stride,
+ const void* LIBGAV1_RESTRICT const column) {
const ptrdiff_t stride4 = stride << 2;
const __m128i col_data = LoadLo8(column);
const __m128i col_dup16 = _mm_unpacklo_epi8(col_data, col_data);
@@ -407,8 +410,9 @@ inline void ColStore8_SSE4_1(void* const dest, ptrdiff_t stride,
}
template <WriteDuplicateFunc writefn>
-inline void ColStore16_SSE4_1(void* const dest, ptrdiff_t stride,
- const void* const column) {
+inline void ColStore16_SSE4_1(void* LIBGAV1_RESTRICT const dest,
+ ptrdiff_t stride,
+ const void* LIBGAV1_RESTRICT const column) {
const ptrdiff_t stride4 = stride << 2;
const __m128i col_data = _mm_loadu_si128(static_cast<const __m128i*>(column));
const __m128i col_dup16_lo = _mm_unpacklo_epi8(col_data, col_data);
@@ -428,8 +432,9 @@ inline void ColStore16_SSE4_1(void* const dest, ptrdiff_t stride,
}
template <WriteDuplicateFunc writefn>
-inline void ColStore32_SSE4_1(void* const dest, ptrdiff_t stride,
- const void* const column) {
+inline void ColStore32_SSE4_1(void* LIBGAV1_RESTRICT const dest,
+ ptrdiff_t stride,
+ const void* LIBGAV1_RESTRICT const column) {
const ptrdiff_t stride4 = stride << 2;
auto* dst = static_cast<uint8_t*>(dest);
for (int y = 0; y < 32; y += 16) {
@@ -457,8 +462,9 @@ inline void ColStore32_SSE4_1(void* const dest, ptrdiff_t stride,
}
template <WriteDuplicateFunc writefn>
-inline void ColStore64_SSE4_1(void* const dest, ptrdiff_t stride,
- const void* const column) {
+inline void ColStore64_SSE4_1(void* LIBGAV1_RESTRICT const dest,
+ ptrdiff_t stride,
+ const void* LIBGAV1_RESTRICT const column) {
const ptrdiff_t stride4 = stride << 2;
auto* dst = static_cast<uint8_t*>(dest);
for (int y = 0; y < 64; y += 16) {
@@ -574,7 +580,7 @@ struct DirDefs {
};
template <int y_mask>
-inline void WritePaethLine4(uint8_t* dst, const __m128i& top,
+inline void WritePaethLine4(uint8_t* LIBGAV1_RESTRICT dst, const __m128i& top,
const __m128i& left, const __m128i& top_lefts,
const __m128i& top_dists, const __m128i& left_dists,
const __m128i& top_left_diffs) {
@@ -614,7 +620,7 @@ inline void WritePaethLine4(uint8_t* dst, const __m128i& top,
// could pay off to accommodate top_left_dists for cmpgt, and repack into epi8
// for the blends.
template <int y_mask>
-inline void WritePaethLine8(uint8_t* dst, const __m128i& top,
+inline void WritePaethLine8(uint8_t* LIBGAV1_RESTRICT dst, const __m128i& top,
const __m128i& left, const __m128i& top_lefts,
const __m128i& top_dists, const __m128i& left_dists,
const __m128i& top_left_diffs) {
@@ -658,7 +664,7 @@ inline void WritePaethLine8(uint8_t* dst, const __m128i& top,
// |left_dists| is provided alongside its spread out version because it doesn't
// change between calls and interacts with both kinds of packing.
template <int y_mask>
-inline void WritePaethLine16(uint8_t* dst, const __m128i& top,
+inline void WritePaethLine16(uint8_t* LIBGAV1_RESTRICT dst, const __m128i& top,
const __m128i& left, const __m128i& top_lefts,
const __m128i& top_dists,
const __m128i& left_dists,
@@ -712,8 +718,9 @@ inline void WritePaethLine16(uint8_t* dst, const __m128i& top,
_mm_storeu_si128(reinterpret_cast<__m128i*>(dst), pred);
}
-void Paeth4x4_SSE4_1(void* const dest, ptrdiff_t stride,
- const void* const top_row, const void* const left_column) {
+void Paeth4x4_SSE4_1(void* LIBGAV1_RESTRICT const dest, ptrdiff_t stride,
+ const void* LIBGAV1_RESTRICT const top_row,
+ const void* LIBGAV1_RESTRICT const left_column) {
const __m128i left = _mm_cvtepu8_epi32(Load4(left_column));
const __m128i top = _mm_cvtepu8_epi32(Load4(top_row));
@@ -742,8 +749,9 @@ void Paeth4x4_SSE4_1(void* const dest, ptrdiff_t stride,
top_left_diff);
}
-void Paeth4x8_SSE4_1(void* const dest, ptrdiff_t stride,
- const void* const top_row, const void* const left_column) {
+void Paeth4x8_SSE4_1(void* LIBGAV1_RESTRICT const dest, ptrdiff_t stride,
+ const void* LIBGAV1_RESTRICT const top_row,
+ const void* LIBGAV1_RESTRICT const left_column) {
const __m128i left = LoadLo8(left_column);
const __m128i left_lo = _mm_cvtepu8_epi32(left);
const __m128i left_hi = _mm_cvtepu8_epi32(_mm_srli_si128(left, 4));
@@ -787,9 +795,9 @@ void Paeth4x8_SSE4_1(void* const dest, ptrdiff_t stride,
top_left_diff);
}
-void Paeth4x16_SSE4_1(void* const dest, ptrdiff_t stride,
- const void* const top_row,
- const void* const left_column) {
+void Paeth4x16_SSE4_1(void* LIBGAV1_RESTRICT const dest, ptrdiff_t stride,
+ const void* LIBGAV1_RESTRICT const top_row,
+ const void* LIBGAV1_RESTRICT const left_column) {
const __m128i left = LoadUnaligned16(left_column);
const __m128i left_0 = _mm_cvtepu8_epi32(left);
const __m128i left_1 = _mm_cvtepu8_epi32(_mm_srli_si128(left, 4));
@@ -862,8 +870,9 @@ void Paeth4x16_SSE4_1(void* const dest, ptrdiff_t stride,
top_left_diff);
}
-void Paeth8x4_SSE4_1(void* const dest, ptrdiff_t stride,
- const void* const top_row, const void* const left_column) {
+void Paeth8x4_SSE4_1(void* LIBGAV1_RESTRICT const dest, ptrdiff_t stride,
+ const void* LIBGAV1_RESTRICT const top_row,
+ const void* LIBGAV1_RESTRICT const left_column) {
const __m128i left = _mm_cvtepu8_epi16(Load4(left_column));
const __m128i top = _mm_cvtepu8_epi16(LoadLo8(top_row));
const auto* const top_ptr = static_cast<const uint8_t*>(top_row);
@@ -891,8 +900,9 @@ void Paeth8x4_SSE4_1(void* const dest, ptrdiff_t stride,
top_left_diff);
}
-void Paeth8x8_SSE4_1(void* const dest, ptrdiff_t stride,
- const void* const top_row, const void* const left_column) {
+void Paeth8x8_SSE4_1(void* LIBGAV1_RESTRICT const dest, ptrdiff_t stride,
+ const void* LIBGAV1_RESTRICT const top_row,
+ const void* LIBGAV1_RESTRICT const left_column) {
const __m128i left = _mm_cvtepu8_epi16(LoadLo8(left_column));
const __m128i top = _mm_cvtepu8_epi16(LoadLo8(top_row));
const auto* const top_ptr = static_cast<const uint8_t*>(top_row);
@@ -932,9 +942,9 @@ void Paeth8x8_SSE4_1(void* const dest, ptrdiff_t stride,
top_left_diff);
}
-void Paeth8x16_SSE4_1(void* const dest, ptrdiff_t stride,
- const void* const top_row,
- const void* const left_column) {
+void Paeth8x16_SSE4_1(void* LIBGAV1_RESTRICT const dest, ptrdiff_t stride,
+ const void* LIBGAV1_RESTRICT const top_row,
+ const void* LIBGAV1_RESTRICT const left_column) {
const __m128i left = LoadUnaligned16(left_column);
const __m128i left_lo = _mm_cvtepu8_epi16(left);
const __m128i left_hi = _mm_cvtepu8_epi16(_mm_srli_si128(left, 8));
@@ -1001,18 +1011,18 @@ void Paeth8x16_SSE4_1(void* const dest, ptrdiff_t stride,
left_dists, top_left_diff);
}
-void Paeth8x32_SSE4_1(void* const dest, ptrdiff_t stride,
- const void* const top_row,
- const void* const left_column) {
+void Paeth8x32_SSE4_1(void* LIBGAV1_RESTRICT const dest, ptrdiff_t stride,
+ const void* LIBGAV1_RESTRICT const top_row,
+ const void* LIBGAV1_RESTRICT const left_column) {
const auto* const left_ptr = static_cast<const uint8_t*>(left_column);
auto* const dst = static_cast<uint8_t*>(dest);
Paeth8x16_SSE4_1(dst, stride, top_row, left_column);
Paeth8x16_SSE4_1(dst + (stride << 4), stride, top_row, left_ptr + 16);
}
-void Paeth16x4_SSE4_1(void* const dest, ptrdiff_t stride,
- const void* const top_row,
- const void* const left_column) {
+void Paeth16x4_SSE4_1(void* LIBGAV1_RESTRICT const dest, ptrdiff_t stride,
+ const void* LIBGAV1_RESTRICT const top_row,
+ const void* LIBGAV1_RESTRICT const left_column) {
const __m128i left = Load4(left_column);
const __m128i top = LoadUnaligned16(top_row);
const __m128i top_lo = _mm_cvtepu8_epi16(top);
@@ -1057,7 +1067,7 @@ void Paeth16x4_SSE4_1(void* const dest, ptrdiff_t stride,
// Inlined for calling with offsets in larger transform sizes, mainly to
// preserve top_left.
-inline void WritePaeth16x8(void* const dest, ptrdiff_t stride,
+inline void WritePaeth16x8(void* LIBGAV1_RESTRICT const dest, ptrdiff_t stride,
const uint8_t top_left, const __m128i top,
const __m128i left) {
const __m128i top_lo = _mm_cvtepu8_epi16(top);
@@ -1115,9 +1125,9 @@ inline void WritePaeth16x8(void* const dest, ptrdiff_t stride,
top_left_diff_lo, top_left_diff_hi);
}
-void Paeth16x8_SSE4_1(void* const dest, ptrdiff_t stride,
- const void* const top_row,
- const void* const left_column) {
+void Paeth16x8_SSE4_1(void* LIBGAV1_RESTRICT const dest, ptrdiff_t stride,
+ const void* LIBGAV1_RESTRICT const top_row,
+ const void* LIBGAV1_RESTRICT const left_column) {
const __m128i top = LoadUnaligned16(top_row);
const __m128i left = LoadLo8(left_column);
const auto* const top_ptr = static_cast<const uint8_t*>(top_row);
@@ -1213,18 +1223,18 @@ void WritePaeth16x16(void* const dest, ptrdiff_t stride, const uint8_t top_left,
top_left_diff_lo, top_left_diff_hi);
}
-void Paeth16x16_SSE4_1(void* const dest, ptrdiff_t stride,
- const void* const top_row,
- const void* const left_column) {
+void Paeth16x16_SSE4_1(void* LIBGAV1_RESTRICT const dest, ptrdiff_t stride,
+ const void* LIBGAV1_RESTRICT const top_row,
+ const void* LIBGAV1_RESTRICT const left_column) {
const __m128i left = LoadUnaligned16(left_column);
const __m128i top = LoadUnaligned16(top_row);
const auto* const top_ptr = static_cast<const uint8_t*>(top_row);
WritePaeth16x16(static_cast<uint8_t*>(dest), stride, top_ptr[-1], top, left);
}
-void Paeth16x32_SSE4_1(void* const dest, ptrdiff_t stride,
- const void* const top_row,
- const void* const left_column) {
+void Paeth16x32_SSE4_1(void* LIBGAV1_RESTRICT const dest, ptrdiff_t stride,
+ const void* LIBGAV1_RESTRICT const top_row,
+ const void* LIBGAV1_RESTRICT const left_column) {
const __m128i left_0 = LoadUnaligned16(left_column);
const __m128i top = LoadUnaligned16(top_row);
const auto* const top_ptr = static_cast<const uint8_t*>(top_row);
@@ -1236,9 +1246,9 @@ void Paeth16x32_SSE4_1(void* const dest, ptrdiff_t stride,
WritePaeth16x16(dst + (stride << 4), stride, top_left, top, left_1);
}
-void Paeth16x64_SSE4_1(void* const dest, ptrdiff_t stride,
- const void* const top_row,
- const void* const left_column) {
+void Paeth16x64_SSE4_1(void* LIBGAV1_RESTRICT const dest, ptrdiff_t stride,
+ const void* LIBGAV1_RESTRICT const top_row,
+ const void* LIBGAV1_RESTRICT const left_column) {
const ptrdiff_t stride16 = stride << 4;
const __m128i left_0 = LoadUnaligned16(left_column);
const __m128i top = LoadUnaligned16(top_row);
@@ -1258,9 +1268,9 @@ void Paeth16x64_SSE4_1(void* const dest, ptrdiff_t stride,
WritePaeth16x16(dst, stride, top_left, top, left_3);
}
-void Paeth32x8_SSE4_1(void* const dest, ptrdiff_t stride,
- const void* const top_row,
- const void* const left_column) {
+void Paeth32x8_SSE4_1(void* LIBGAV1_RESTRICT const dest, ptrdiff_t stride,
+ const void* LIBGAV1_RESTRICT const top_row,
+ const void* LIBGAV1_RESTRICT const left_column) {
const __m128i left = LoadLo8(left_column);
const auto* const top_ptr = static_cast<const uint8_t*>(top_row);
const __m128i top_0 = LoadUnaligned16(top_row);
@@ -1271,9 +1281,9 @@ void Paeth32x8_SSE4_1(void* const dest, ptrdiff_t stride,
WritePaeth16x8(dst + 16, stride, top_left, top_1, left);
}
-void Paeth32x16_SSE4_1(void* const dest, ptrdiff_t stride,
- const void* const top_row,
- const void* const left_column) {
+void Paeth32x16_SSE4_1(void* LIBGAV1_RESTRICT const dest, ptrdiff_t stride,
+ const void* LIBGAV1_RESTRICT const top_row,
+ const void* LIBGAV1_RESTRICT const left_column) {
const __m128i left = LoadUnaligned16(left_column);
const auto* const top_ptr = static_cast<const uint8_t*>(top_row);
const __m128i top_0 = LoadUnaligned16(top_row);
@@ -1284,9 +1294,9 @@ void Paeth32x16_SSE4_1(void* const dest, ptrdiff_t stride,
WritePaeth16x16(dst + 16, stride, top_left, top_1, left);
}
-void Paeth32x32_SSE4_1(void* const dest, ptrdiff_t stride,
- const void* const top_row,
- const void* const left_column) {
+void Paeth32x32_SSE4_1(void* LIBGAV1_RESTRICT const dest, ptrdiff_t stride,
+ const void* LIBGAV1_RESTRICT const top_row,
+ const void* LIBGAV1_RESTRICT const left_column) {
const auto* const left_ptr = static_cast<const uint8_t*>(left_column);
const __m128i left_0 = LoadUnaligned16(left_ptr);
const auto* const top_ptr = static_cast<const uint8_t*>(top_row);
@@ -1302,9 +1312,9 @@ void Paeth32x32_SSE4_1(void* const dest, ptrdiff_t stride,
WritePaeth16x16(dst + 16, stride, top_left, top_1, left_1);
}
-void Paeth32x64_SSE4_1(void* const dest, ptrdiff_t stride,
- const void* const top_row,
- const void* const left_column) {
+void Paeth32x64_SSE4_1(void* LIBGAV1_RESTRICT const dest, ptrdiff_t stride,
+ const void* LIBGAV1_RESTRICT const top_row,
+ const void* LIBGAV1_RESTRICT const left_column) {
const auto* const left_ptr = static_cast<const uint8_t*>(left_column);
const __m128i left_0 = LoadUnaligned16(left_ptr);
const auto* const top_ptr = static_cast<const uint8_t*>(top_row);
@@ -1328,9 +1338,9 @@ void Paeth32x64_SSE4_1(void* const dest, ptrdiff_t stride,
WritePaeth16x16(dst + 16, stride, top_left, top_1, left_3);
}
-void Paeth64x16_SSE4_1(void* const dest, ptrdiff_t stride,
- const void* const top_row,
- const void* const left_column) {
+void Paeth64x16_SSE4_1(void* LIBGAV1_RESTRICT const dest, ptrdiff_t stride,
+ const void* LIBGAV1_RESTRICT const top_row,
+ const void* LIBGAV1_RESTRICT const left_column) {
const __m128i left = LoadUnaligned16(left_column);
const auto* const top_ptr = static_cast<const uint8_t*>(top_row);
const __m128i top_0 = LoadUnaligned16(top_ptr);
@@ -1345,9 +1355,9 @@ void Paeth64x16_SSE4_1(void* const dest, ptrdiff_t stride,
WritePaeth16x16(dst + 48, stride, top_left, top_3, left);
}
-void Paeth64x32_SSE4_1(void* const dest, ptrdiff_t stride,
- const void* const top_row,
- const void* const left_column) {
+void Paeth64x32_SSE4_1(void* LIBGAV1_RESTRICT const dest, ptrdiff_t stride,
+ const void* LIBGAV1_RESTRICT const top_row,
+ const void* LIBGAV1_RESTRICT const left_column) {
const auto* const left_ptr = static_cast<const uint8_t*>(left_column);
const __m128i left_0 = LoadUnaligned16(left_ptr);
const __m128i left_1 = LoadUnaligned16(left_ptr + 16);
@@ -1369,9 +1379,9 @@ void Paeth64x32_SSE4_1(void* const dest, ptrdiff_t stride,
WritePaeth16x16(dst + 48, stride, top_left, top_3, left_1);
}
-void Paeth64x64_SSE4_1(void* const dest, ptrdiff_t stride,
- const void* const top_row,
- const void* const left_column) {
+void Paeth64x64_SSE4_1(void* LIBGAV1_RESTRICT const dest, ptrdiff_t stride,
+ const void* LIBGAV1_RESTRICT const top_row,
+ const void* LIBGAV1_RESTRICT const left_column) {
const auto* const left_ptr = static_cast<const uint8_t*>(left_column);
const __m128i left_0 = LoadUnaligned16(left_ptr);
const __m128i left_1 = LoadUnaligned16(left_ptr + 16);
@@ -1793,7 +1803,6 @@ void Init8bpp() {
DirDefs::_64x64::Horizontal;
#endif
} // NOLINT(readability/fn_size)
-// TODO(petersonab): Split Init8bpp function into family-specific files.
} // namespace
} // namespace low_bitdepth
@@ -1937,16 +1946,18 @@ inline void WriteDuplicate64x4(void* const dest, ptrdiff_t stride,
// ColStoreN<height> copies each of the |height| values in |column| across its
// corresponding row in dest.
template <WriteDuplicateFunc writefn>
-inline void ColStore4_SSE4_1(void* const dest, ptrdiff_t stride,
- const void* const column) {
+inline void ColStore4_SSE4_1(void* LIBGAV1_RESTRICT const dest,
+ ptrdiff_t stride,
+ const void* LIBGAV1_RESTRICT const column) {
const __m128i col_data = LoadLo8(column);
const __m128i col_dup32 = _mm_unpacklo_epi16(col_data, col_data);
writefn(dest, stride, col_dup32);
}
template <WriteDuplicateFunc writefn>
-inline void ColStore8_SSE4_1(void* const dest, ptrdiff_t stride,
- const void* const column) {
+inline void ColStore8_SSE4_1(void* LIBGAV1_RESTRICT const dest,
+ ptrdiff_t stride,
+ const void* LIBGAV1_RESTRICT const column) {
const __m128i col_data = LoadUnaligned16(column);
const __m128i col_dup32_lo = _mm_unpacklo_epi16(col_data, col_data);
const __m128i col_dup32_hi = _mm_unpackhi_epi16(col_data, col_data);
@@ -1958,8 +1969,9 @@ inline void ColStore8_SSE4_1(void* const dest, ptrdiff_t stride,
}
template <WriteDuplicateFunc writefn>
-inline void ColStore16_SSE4_1(void* const dest, ptrdiff_t stride,
- const void* const column) {
+inline void ColStore16_SSE4_1(void* LIBGAV1_RESTRICT const dest,
+ ptrdiff_t stride,
+ const void* LIBGAV1_RESTRICT const column) {
const ptrdiff_t stride4 = stride << 2;
auto* dst = static_cast<uint8_t*>(dest);
for (int y = 0; y < 32; y += 16) {
@@ -1975,8 +1987,9 @@ inline void ColStore16_SSE4_1(void* const dest, ptrdiff_t stride,
}
template <WriteDuplicateFunc writefn>
-inline void ColStore32_SSE4_1(void* const dest, ptrdiff_t stride,
- const void* const column) {
+inline void ColStore32_SSE4_1(void* LIBGAV1_RESTRICT const dest,
+ ptrdiff_t stride,
+ const void* LIBGAV1_RESTRICT const column) {
const ptrdiff_t stride4 = stride << 2;
auto* dst = static_cast<uint8_t*>(dest);
for (int y = 0; y < 64; y += 16) {
@@ -1992,8 +2005,9 @@ inline void ColStore32_SSE4_1(void* const dest, ptrdiff_t stride,
}
template <WriteDuplicateFunc writefn>
-inline void ColStore64_SSE4_1(void* const dest, ptrdiff_t stride,
- const void* const column) {
+inline void ColStore64_SSE4_1(void* LIBGAV1_RESTRICT const dest,
+ ptrdiff_t stride,
+ const void* LIBGAV1_RESTRICT const column) {
const ptrdiff_t stride4 = stride << 2;
auto* dst = static_cast<uint8_t*>(dest);
for (int y = 0; y < 128; y += 16) {
diff --git a/src/dsp/x86/inverse_transform_sse4.cc b/src/dsp/x86/inverse_transform_sse4.cc
index 12c008f..e9ceb87 100644
--- a/src/dsp/x86/inverse_transform_sse4.cc
+++ b/src/dsp/x86/inverse_transform_sse4.cc
@@ -41,7 +41,8 @@ namespace {
#include "src/dsp/inverse_transform.inc"
template <int store_width, int store_count>
-LIBGAV1_ALWAYS_INLINE void StoreDst(int16_t* dst, int32_t stride, int32_t idx,
+LIBGAV1_ALWAYS_INLINE void StoreDst(int16_t* LIBGAV1_RESTRICT dst,
+ int32_t stride, int32_t idx,
const __m128i* s) {
// NOTE: It is expected that the compiler will unroll these loops.
if (store_width == 16) {
@@ -63,8 +64,8 @@ LIBGAV1_ALWAYS_INLINE void StoreDst(int16_t* dst, int32_t stride, int32_t idx,
}
template <int load_width, int load_count>
-LIBGAV1_ALWAYS_INLINE void LoadSrc(const int16_t* src, int32_t stride,
- int32_t idx, __m128i* x) {
+LIBGAV1_ALWAYS_INLINE void LoadSrc(const int16_t* LIBGAV1_RESTRICT src,
+ int32_t stride, int32_t idx, __m128i* x) {
// NOTE: It is expected that the compiler will unroll these loops.
if (load_width == 16) {
for (int i = 0; i < load_count; i += 4) {
@@ -1638,9 +1639,10 @@ LIBGAV1_ALWAYS_INLINE bool Identity4DcOnly(void* dest, int adjusted_tx_height,
LIBGAV1_ALWAYS_INLINE void Identity4ColumnStoreToFrame(
Array2DView<uint8_t> frame, const int start_x, const int start_y,
- const int tx_width, const int tx_height, const int16_t* source) {
+ const int tx_width, const int tx_height,
+ const int16_t* LIBGAV1_RESTRICT source) {
const int stride = frame.columns();
- uint8_t* dst = frame[start_y] + start_x;
+ uint8_t* LIBGAV1_RESTRICT dst = frame[start_y] + start_x;
const __m128i v_multiplier_fraction =
_mm_set1_epi16(static_cast<int16_t>(kIdentity4MultiplierFraction << 3));
@@ -1685,9 +1687,10 @@ LIBGAV1_ALWAYS_INLINE void Identity4ColumnStoreToFrame(
LIBGAV1_ALWAYS_INLINE void Identity4RowColumnStoreToFrame(
Array2DView<uint8_t> frame, const int start_x, const int start_y,
- const int tx_width, const int tx_height, const int16_t* source) {
+ const int tx_width, const int tx_height,
+ const int16_t* LIBGAV1_RESTRICT source) {
const int stride = frame.columns();
- uint8_t* dst = frame[start_y] + start_x;
+ uint8_t* LIBGAV1_RESTRICT dst = frame[start_y] + start_x;
const __m128i v_multiplier_fraction =
_mm_set1_epi16(static_cast<int16_t>(kIdentity4MultiplierFraction << 3));
@@ -1789,9 +1792,10 @@ LIBGAV1_ALWAYS_INLINE bool Identity8DcOnly(void* dest, int adjusted_tx_height,
LIBGAV1_ALWAYS_INLINE void Identity8ColumnStoreToFrame_SSE4_1(
Array2DView<uint8_t> frame, const int start_x, const int start_y,
- const int tx_width, const int tx_height, const int16_t* source) {
+ const int tx_width, const int tx_height,
+ const int16_t* LIBGAV1_RESTRICT source) {
const int stride = frame.columns();
- uint8_t* dst = frame[start_y] + start_x;
+ uint8_t* LIBGAV1_RESTRICT dst = frame[start_y] + start_x;
const __m128i v_eight = _mm_set1_epi16(8);
if (tx_width == 4) {
int i = 0;
@@ -1883,9 +1887,10 @@ LIBGAV1_ALWAYS_INLINE bool Identity16DcOnly(void* dest, int adjusted_tx_height,
LIBGAV1_ALWAYS_INLINE void Identity16ColumnStoreToFrame_SSE4_1(
Array2DView<uint8_t> frame, const int start_x, const int start_y,
- const int tx_width, const int tx_height, const int16_t* source) {
+ const int tx_width, const int tx_height,
+ const int16_t* LIBGAV1_RESTRICT source) {
const int stride = frame.columns();
- uint8_t* dst = frame[start_y] + start_x;
+ uint8_t* LIBGAV1_RESTRICT dst = frame[start_y] + start_x;
const __m128i v_eight = _mm_set1_epi16(8);
const __m128i v_multiplier =
_mm_set1_epi16(static_cast<int16_t>(kIdentity4MultiplierFraction << 4));
@@ -1966,9 +1971,10 @@ LIBGAV1_ALWAYS_INLINE bool Identity32DcOnly(void* dest,
LIBGAV1_ALWAYS_INLINE void Identity32ColumnStoreToFrame(
Array2DView<uint8_t> frame, const int start_x, const int start_y,
- const int tx_width, const int tx_height, const int16_t* source) {
+ const int tx_width, const int tx_height,
+ const int16_t* LIBGAV1_RESTRICT source) {
const int stride = frame.columns();
- uint8_t* dst = frame[start_y] + start_x;
+ uint8_t* LIBGAV1_RESTRICT dst = frame[start_y] + start_x;
const __m128i v_two = _mm_set1_epi16(2);
int i = 0;
@@ -1995,7 +2001,7 @@ LIBGAV1_ALWAYS_INLINE void Identity32ColumnStoreToFrame(
// Process 4 wht4 rows and columns.
LIBGAV1_ALWAYS_INLINE void Wht4_SSE4_1(Array2DView<uint8_t> frame,
const int start_x, const int start_y,
- const void* source,
+ const void* LIBGAV1_RESTRICT source,
const int adjusted_tx_height) {
const auto* const src = static_cast<const int16_t*>(source);
__m128i s[4], x[4];
@@ -2058,12 +2064,11 @@ LIBGAV1_ALWAYS_INLINE void Wht4_SSE4_1(Array2DView<uint8_t> frame,
// Store to frame.
const int stride = frame.columns();
- uint8_t* dst = frame[start_y] + start_x;
+ uint8_t* LIBGAV1_RESTRICT dst = frame[start_y] + start_x;
for (int row = 0; row < 4; ++row) {
const __m128i frame_data = Load4(dst);
const __m128i a = _mm_cvtepu8_epi16(frame_data);
- // Saturate to prevent overflowing int16_t
- const __m128i b = _mm_adds_epi16(a, s[row]);
+ const __m128i b = _mm_add_epi16(a, s[row]);
Store4(dst, _mm_packus_epi16(b, b));
dst += stride;
}
@@ -2075,13 +2080,13 @@ LIBGAV1_ALWAYS_INLINE void Wht4_SSE4_1(Array2DView<uint8_t> frame,
template <bool enable_flip_rows = false>
LIBGAV1_ALWAYS_INLINE void StoreToFrameWithRound(
Array2DView<uint8_t> frame, const int start_x, const int start_y,
- const int tx_width, const int tx_height, const int16_t* source,
- TransformType tx_type) {
+ const int tx_width, const int tx_height,
+ const int16_t* LIBGAV1_RESTRICT source, TransformType tx_type) {
const bool flip_rows =
enable_flip_rows ? kTransformFlipRowsMask.Contains(tx_type) : false;
const __m128i v_eight = _mm_set1_epi16(8);
const int stride = frame.columns();
- uint8_t* dst = frame[start_y] + start_x;
+ uint8_t* LIBGAV1_RESTRICT dst = frame[start_y] + start_x;
if (tx_width == 4) {
for (int i = 0; i < tx_height; ++i) {
const int row = flip_rows ? (tx_height - i - 1) * 4 : i * 4;
@@ -2262,8 +2267,10 @@ void Dct4TransformLoopRow_SSE4_1(TransformType /*tx_type*/,
void Dct4TransformLoopColumn_SSE4_1(TransformType tx_type,
TransformSize tx_size,
- int adjusted_tx_height, void* src_buffer,
- int start_x, int start_y, void* dst_frame) {
+ int adjusted_tx_height,
+ void* LIBGAV1_RESTRICT src_buffer,
+ int start_x, int start_y,
+ void* LIBGAV1_RESTRICT dst_frame) {
auto* src = static_cast<int16_t*>(src_buffer);
const int tx_width = kTransformWidth[tx_size];
@@ -2325,8 +2332,10 @@ void Dct8TransformLoopRow_SSE4_1(TransformType /*tx_type*/,
void Dct8TransformLoopColumn_SSE4_1(TransformType tx_type,
TransformSize tx_size,
- int adjusted_tx_height, void* src_buffer,
- int start_x, int start_y, void* dst_frame) {
+ int adjusted_tx_height,
+ void* LIBGAV1_RESTRICT src_buffer,
+ int start_x, int start_y,
+ void* LIBGAV1_RESTRICT dst_frame) {
auto* src = static_cast<int16_t*>(src_buffer);
const int tx_width = kTransformWidth[tx_size];
@@ -2386,9 +2395,10 @@ void Dct16TransformLoopRow_SSE4_1(TransformType /*tx_type*/,
void Dct16TransformLoopColumn_SSE4_1(TransformType tx_type,
TransformSize tx_size,
- int adjusted_tx_height, void* src_buffer,
+ int adjusted_tx_height,
+ void* LIBGAV1_RESTRICT src_buffer,
int start_x, int start_y,
- void* dst_frame) {
+ void* LIBGAV1_RESTRICT dst_frame) {
auto* src = static_cast<int16_t*>(src_buffer);
const int tx_width = kTransformWidth[tx_size];
@@ -2441,9 +2451,10 @@ void Dct32TransformLoopRow_SSE4_1(TransformType /*tx_type*/,
void Dct32TransformLoopColumn_SSE4_1(TransformType tx_type,
TransformSize tx_size,
- int adjusted_tx_height, void* src_buffer,
+ int adjusted_tx_height,
+ void* LIBGAV1_RESTRICT src_buffer,
int start_x, int start_y,
- void* dst_frame) {
+ void* LIBGAV1_RESTRICT dst_frame) {
auto* src = static_cast<int16_t*>(src_buffer);
const int tx_width = kTransformWidth[tx_size];
@@ -2486,9 +2497,10 @@ void Dct64TransformLoopRow_SSE4_1(TransformType /*tx_type*/,
void Dct64TransformLoopColumn_SSE4_1(TransformType tx_type,
TransformSize tx_size,
- int adjusted_tx_height, void* src_buffer,
+ int adjusted_tx_height,
+ void* LIBGAV1_RESTRICT src_buffer,
int start_x, int start_y,
- void* dst_frame) {
+ void* LIBGAV1_RESTRICT dst_frame) {
auto* src = static_cast<int16_t*>(src_buffer);
const int tx_width = kTransformWidth[tx_size];
@@ -2535,9 +2547,10 @@ void Adst4TransformLoopRow_SSE4_1(TransformType /*tx_type*/,
void Adst4TransformLoopColumn_SSE4_1(TransformType tx_type,
TransformSize tx_size,
- int adjusted_tx_height, void* src_buffer,
+ int adjusted_tx_height,
+ void* LIBGAV1_RESTRICT src_buffer,
int start_x, int start_y,
- void* dst_frame) {
+ void* LIBGAV1_RESTRICT dst_frame) {
auto* src = static_cast<int16_t*>(src_buffer);
const int tx_width = kTransformWidth[tx_size];
@@ -2594,9 +2607,10 @@ void Adst8TransformLoopRow_SSE4_1(TransformType /*tx_type*/,
void Adst8TransformLoopColumn_SSE4_1(TransformType tx_type,
TransformSize tx_size,
- int adjusted_tx_height, void* src_buffer,
+ int adjusted_tx_height,
+ void* LIBGAV1_RESTRICT src_buffer,
int start_x, int start_y,
- void* dst_frame) {
+ void* LIBGAV1_RESTRICT dst_frame) {
auto* src = static_cast<int16_t*>(src_buffer);
const int tx_width = kTransformWidth[tx_size];
@@ -2658,9 +2672,10 @@ void Adst16TransformLoopRow_SSE4_1(TransformType /*tx_type*/,
void Adst16TransformLoopColumn_SSE4_1(TransformType tx_type,
TransformSize tx_size,
- int adjusted_tx_height, void* src_buffer,
+ int adjusted_tx_height,
+ void* LIBGAV1_RESTRICT src_buffer,
int start_x, int start_y,
- void* dst_frame) {
+ void* LIBGAV1_RESTRICT dst_frame) {
auto& frame = *static_cast<Array2DView<uint8_t>*>(dst_frame);
auto* src = static_cast<int16_t*>(src_buffer);
const int tx_width = kTransformWidth[tx_size];
@@ -2727,8 +2742,9 @@ void Identity4TransformLoopRow_SSE4_1(TransformType tx_type,
void Identity4TransformLoopColumn_SSE4_1(TransformType tx_type,
TransformSize tx_size,
int adjusted_tx_height,
- void* src_buffer, int start_x,
- int start_y, void* dst_frame) {
+ void* LIBGAV1_RESTRICT src_buffer,
+ int start_x, int start_y,
+ void* LIBGAV1_RESTRICT dst_frame) {
auto& frame = *static_cast<Array2DView<uint8_t>*>(dst_frame);
auto* src = static_cast<int16_t*>(src_buffer);
const int tx_width = kTransformWidth[tx_size];
@@ -2799,8 +2815,9 @@ void Identity8TransformLoopRow_SSE4_1(TransformType tx_type,
void Identity8TransformLoopColumn_SSE4_1(TransformType tx_type,
TransformSize tx_size,
int adjusted_tx_height,
- void* src_buffer, int start_x,
- int start_y, void* dst_frame) {
+ void* LIBGAV1_RESTRICT src_buffer,
+ int start_x, int start_y,
+ void* LIBGAV1_RESTRICT dst_frame) {
auto* src = static_cast<int16_t*>(src_buffer);
const int tx_width = kTransformWidth[tx_size];
@@ -2839,8 +2856,9 @@ void Identity16TransformLoopRow_SSE4_1(TransformType /*tx_type*/,
void Identity16TransformLoopColumn_SSE4_1(TransformType tx_type,
TransformSize tx_size,
int adjusted_tx_height,
- void* src_buffer, int start_x,
- int start_y, void* dst_frame) {
+ void* LIBGAV1_RESTRICT src_buffer,
+ int start_x, int start_y,
+ void* LIBGAV1_RESTRICT dst_frame) {
auto* src = static_cast<int16_t*>(src_buffer);
const int tx_width = kTransformWidth[tx_size];
@@ -2884,8 +2902,9 @@ void Identity32TransformLoopRow_SSE4_1(TransformType /*tx_type*/,
void Identity32TransformLoopColumn_SSE4_1(TransformType /*tx_type*/,
TransformSize tx_size,
int adjusted_tx_height,
- void* src_buffer, int start_x,
- int start_y, void* dst_frame) {
+ void* LIBGAV1_RESTRICT src_buffer,
+ int start_x, int start_y,
+ void* LIBGAV1_RESTRICT dst_frame) {
auto& frame = *static_cast<Array2DView<uint8_t>*>(dst_frame);
auto* src = static_cast<int16_t*>(src_buffer);
const int tx_width = kTransformWidth[tx_size];
@@ -2907,8 +2926,10 @@ void Wht4TransformLoopRow_SSE4_1(TransformType tx_type, TransformSize tx_size,
void Wht4TransformLoopColumn_SSE4_1(TransformType tx_type,
TransformSize tx_size,
- int adjusted_tx_height, void* src_buffer,
- int start_x, int start_y, void* dst_frame) {
+ int adjusted_tx_height,
+ void* LIBGAV1_RESTRICT src_buffer,
+ int start_x, int start_y,
+ void* LIBGAV1_RESTRICT dst_frame) {
assert(tx_type == kTransformTypeDctDct);
assert(tx_size == kTransformSize4x4);
static_cast<void>(tx_type);
@@ -2928,88 +2949,88 @@ void Init8bpp() {
assert(dsp != nullptr);
// Maximum transform size for Dct is 64.
-#if DSP_ENABLED_8BPP_SSE4_1(1DTransformSize4_1DTransformDct)
- dsp->inverse_transforms[k1DTransformDct][k1DTransformSize4][kRow] =
+#if DSP_ENABLED_8BPP_SSE4_1(Transform1dSize4_Transform1dDct)
+ dsp->inverse_transforms[kTransform1dDct][kTransform1dSize4][kRow] =
Dct4TransformLoopRow_SSE4_1;
- dsp->inverse_transforms[k1DTransformDct][k1DTransformSize4][kColumn] =
+ dsp->inverse_transforms[kTransform1dDct][kTransform1dSize4][kColumn] =
Dct4TransformLoopColumn_SSE4_1;
#endif
-#if DSP_ENABLED_8BPP_SSE4_1(1DTransformSize8_1DTransformDct)
- dsp->inverse_transforms[k1DTransformDct][k1DTransformSize8][kRow] =
+#if DSP_ENABLED_8BPP_SSE4_1(Transform1dSize8_Transform1dDct)
+ dsp->inverse_transforms[kTransform1dDct][kTransform1dSize8][kRow] =
Dct8TransformLoopRow_SSE4_1;
- dsp->inverse_transforms[k1DTransformDct][k1DTransformSize8][kColumn] =
+ dsp->inverse_transforms[kTransform1dDct][kTransform1dSize8][kColumn] =
Dct8TransformLoopColumn_SSE4_1;
#endif
-#if DSP_ENABLED_8BPP_SSE4_1(1DTransformSize16_1DTransformDct)
- dsp->inverse_transforms[k1DTransformDct][k1DTransformSize16][kRow] =
+#if DSP_ENABLED_8BPP_SSE4_1(Transform1dSize16_Transform1dDct)
+ dsp->inverse_transforms[kTransform1dDct][kTransform1dSize16][kRow] =
Dct16TransformLoopRow_SSE4_1;
- dsp->inverse_transforms[k1DTransformDct][k1DTransformSize16][kColumn] =
+ dsp->inverse_transforms[kTransform1dDct][kTransform1dSize16][kColumn] =
Dct16TransformLoopColumn_SSE4_1;
#endif
-#if DSP_ENABLED_8BPP_SSE4_1(1DTransformSize32_1DTransformDct)
- dsp->inverse_transforms[k1DTransformDct][k1DTransformSize32][kRow] =
+#if DSP_ENABLED_8BPP_SSE4_1(Transform1dSize32_Transform1dDct)
+ dsp->inverse_transforms[kTransform1dDct][kTransform1dSize32][kRow] =
Dct32TransformLoopRow_SSE4_1;
- dsp->inverse_transforms[k1DTransformDct][k1DTransformSize32][kColumn] =
+ dsp->inverse_transforms[kTransform1dDct][kTransform1dSize32][kColumn] =
Dct32TransformLoopColumn_SSE4_1;
#endif
-#if DSP_ENABLED_8BPP_SSE4_1(1DTransformSize64_1DTransformDct)
- dsp->inverse_transforms[k1DTransformDct][k1DTransformSize64][kRow] =
+#if DSP_ENABLED_8BPP_SSE4_1(Transform1dSize64_Transform1dDct)
+ dsp->inverse_transforms[kTransform1dDct][kTransform1dSize64][kRow] =
Dct64TransformLoopRow_SSE4_1;
- dsp->inverse_transforms[k1DTransformDct][k1DTransformSize64][kColumn] =
+ dsp->inverse_transforms[kTransform1dDct][kTransform1dSize64][kColumn] =
Dct64TransformLoopColumn_SSE4_1;
#endif
// Maximum transform size for Adst is 16.
-#if DSP_ENABLED_8BPP_SSE4_1(1DTransformSize4_1DTransformAdst)
- dsp->inverse_transforms[k1DTransformAdst][k1DTransformSize4][kRow] =
+#if DSP_ENABLED_8BPP_SSE4_1(Transform1dSize4_Transform1dAdst)
+ dsp->inverse_transforms[kTransform1dAdst][kTransform1dSize4][kRow] =
Adst4TransformLoopRow_SSE4_1;
- dsp->inverse_transforms[k1DTransformAdst][k1DTransformSize4][kColumn] =
+ dsp->inverse_transforms[kTransform1dAdst][kTransform1dSize4][kColumn] =
Adst4TransformLoopColumn_SSE4_1;
#endif
-#if DSP_ENABLED_8BPP_SSE4_1(1DTransformSize8_1DTransformAdst)
- dsp->inverse_transforms[k1DTransformAdst][k1DTransformSize8][kRow] =
+#if DSP_ENABLED_8BPP_SSE4_1(Transform1dSize8_Transform1dAdst)
+ dsp->inverse_transforms[kTransform1dAdst][kTransform1dSize8][kRow] =
Adst8TransformLoopRow_SSE4_1;
- dsp->inverse_transforms[k1DTransformAdst][k1DTransformSize8][kColumn] =
+ dsp->inverse_transforms[kTransform1dAdst][kTransform1dSize8][kColumn] =
Adst8TransformLoopColumn_SSE4_1;
#endif
-#if DSP_ENABLED_8BPP_SSE4_1(1DTransformSize16_1DTransformAdst)
- dsp->inverse_transforms[k1DTransformAdst][k1DTransformSize16][kRow] =
+#if DSP_ENABLED_8BPP_SSE4_1(Transform1dSize16_Transform1dAdst)
+ dsp->inverse_transforms[kTransform1dAdst][kTransform1dSize16][kRow] =
Adst16TransformLoopRow_SSE4_1;
- dsp->inverse_transforms[k1DTransformAdst][k1DTransformSize16][kColumn] =
+ dsp->inverse_transforms[kTransform1dAdst][kTransform1dSize16][kColumn] =
Adst16TransformLoopColumn_SSE4_1;
#endif
// Maximum transform size for Identity transform is 32.
-#if DSP_ENABLED_8BPP_SSE4_1(1DTransformSize4_1DTransformIdentity)
- dsp->inverse_transforms[k1DTransformIdentity][k1DTransformSize4][kRow] =
+#if DSP_ENABLED_8BPP_SSE4_1(Transform1dSize4_Transform1dIdentity)
+ dsp->inverse_transforms[kTransform1dIdentity][kTransform1dSize4][kRow] =
Identity4TransformLoopRow_SSE4_1;
- dsp->inverse_transforms[k1DTransformIdentity][k1DTransformSize4][kColumn] =
+ dsp->inverse_transforms[kTransform1dIdentity][kTransform1dSize4][kColumn] =
Identity4TransformLoopColumn_SSE4_1;
#endif
-#if DSP_ENABLED_8BPP_SSE4_1(1DTransformSize8_1DTransformIdentity)
- dsp->inverse_transforms[k1DTransformIdentity][k1DTransformSize8][kRow] =
+#if DSP_ENABLED_8BPP_SSE4_1(Transform1dSize8_Transform1dIdentity)
+ dsp->inverse_transforms[kTransform1dIdentity][kTransform1dSize8][kRow] =
Identity8TransformLoopRow_SSE4_1;
- dsp->inverse_transforms[k1DTransformIdentity][k1DTransformSize8][kColumn] =
+ dsp->inverse_transforms[kTransform1dIdentity][kTransform1dSize8][kColumn] =
Identity8TransformLoopColumn_SSE4_1;
#endif
-#if DSP_ENABLED_8BPP_SSE4_1(1DTransformSize16_1DTransformIdentity)
- dsp->inverse_transforms[k1DTransformIdentity][k1DTransformSize16][kRow] =
+#if DSP_ENABLED_8BPP_SSE4_1(Transform1dSize16_Transform1dIdentity)
+ dsp->inverse_transforms[kTransform1dIdentity][kTransform1dSize16][kRow] =
Identity16TransformLoopRow_SSE4_1;
- dsp->inverse_transforms[k1DTransformIdentity][k1DTransformSize16][kColumn] =
+ dsp->inverse_transforms[kTransform1dIdentity][kTransform1dSize16][kColumn] =
Identity16TransformLoopColumn_SSE4_1;
#endif
-#if DSP_ENABLED_8BPP_SSE4_1(1DTransformSize32_1DTransformIdentity)
- dsp->inverse_transforms[k1DTransformIdentity][k1DTransformSize32][kRow] =
+#if DSP_ENABLED_8BPP_SSE4_1(Transform1dSize32_Transform1dIdentity)
+ dsp->inverse_transforms[kTransform1dIdentity][kTransform1dSize32][kRow] =
Identity32TransformLoopRow_SSE4_1;
- dsp->inverse_transforms[k1DTransformIdentity][k1DTransformSize32][kColumn] =
+ dsp->inverse_transforms[kTransform1dIdentity][kTransform1dSize32][kColumn] =
Identity32TransformLoopColumn_SSE4_1;
#endif
// Maximum transform size for Wht is 4.
-#if DSP_ENABLED_8BPP_SSE4_1(1DTransformSize4_1DTransformWht)
- dsp->inverse_transforms[k1DTransformWht][k1DTransformSize4][kRow] =
+#if DSP_ENABLED_8BPP_SSE4_1(Transform1dSize4_Transform1dWht)
+ dsp->inverse_transforms[kTransform1dWht][kTransform1dSize4][kRow] =
Wht4TransformLoopRow_SSE4_1;
- dsp->inverse_transforms[k1DTransformWht][k1DTransformSize4][kColumn] =
+ dsp->inverse_transforms[kTransform1dWht][kTransform1dSize4][kColumn] =
Wht4TransformLoopColumn_SSE4_1;
#endif
}
diff --git a/src/dsp/x86/inverse_transform_sse4.h b/src/dsp/x86/inverse_transform_sse4.h
index 106084b..c31e88b 100644
--- a/src/dsp/x86/inverse_transform_sse4.h
+++ b/src/dsp/x86/inverse_transform_sse4.h
@@ -34,56 +34,56 @@ void InverseTransformInit_SSE4_1();
// optimization being enabled, signal the sse4 implementation should be used.
#if LIBGAV1_TARGETING_SSE4_1
-#ifndef LIBGAV1_Dsp8bpp_1DTransformSize4_1DTransformDct
-#define LIBGAV1_Dsp8bpp_1DTransformSize4_1DTransformDct LIBGAV1_CPU_SSE4_1
+#ifndef LIBGAV1_Dsp8bpp_Transform1dSize4_Transform1dDct
+#define LIBGAV1_Dsp8bpp_Transform1dSize4_Transform1dDct LIBGAV1_CPU_SSE4_1
#endif
-#ifndef LIBGAV1_Dsp8bpp_1DTransformSize8_1DTransformDct
-#define LIBGAV1_Dsp8bpp_1DTransformSize8_1DTransformDct LIBGAV1_CPU_SSE4_1
+#ifndef LIBGAV1_Dsp8bpp_Transform1dSize8_Transform1dDct
+#define LIBGAV1_Dsp8bpp_Transform1dSize8_Transform1dDct LIBGAV1_CPU_SSE4_1
#endif
-#ifndef LIBGAV1_Dsp8bpp_1DTransformSize16_1DTransformDct
-#define LIBGAV1_Dsp8bpp_1DTransformSize16_1DTransformDct LIBGAV1_CPU_SSE4_1
+#ifndef LIBGAV1_Dsp8bpp_Transform1dSize16_Transform1dDct
+#define LIBGAV1_Dsp8bpp_Transform1dSize16_Transform1dDct LIBGAV1_CPU_SSE4_1
#endif
-#ifndef LIBGAV1_Dsp8bpp_1DTransformSize32_1DTransformDct
-#define LIBGAV1_Dsp8bpp_1DTransformSize32_1DTransformDct LIBGAV1_CPU_SSE4_1
+#ifndef LIBGAV1_Dsp8bpp_Transform1dSize32_Transform1dDct
+#define LIBGAV1_Dsp8bpp_Transform1dSize32_Transform1dDct LIBGAV1_CPU_SSE4_1
#endif
-#ifndef LIBGAV1_Dsp8bpp_1DTransformSize64_1DTransformDct
-#define LIBGAV1_Dsp8bpp_1DTransformSize64_1DTransformDct LIBGAV1_CPU_SSE4_1
+#ifndef LIBGAV1_Dsp8bpp_Transform1dSize64_Transform1dDct
+#define LIBGAV1_Dsp8bpp_Transform1dSize64_Transform1dDct LIBGAV1_CPU_SSE4_1
#endif
-#ifndef LIBGAV1_Dsp8bpp_1DTransformSize4_1DTransformAdst
-#define LIBGAV1_Dsp8bpp_1DTransformSize4_1DTransformAdst LIBGAV1_CPU_SSE4_1
+#ifndef LIBGAV1_Dsp8bpp_Transform1dSize4_Transform1dAdst
+#define LIBGAV1_Dsp8bpp_Transform1dSize4_Transform1dAdst LIBGAV1_CPU_SSE4_1
#endif
-#ifndef LIBGAV1_Dsp8bpp_1DTransformSize8_1DTransformAdst
-#define LIBGAV1_Dsp8bpp_1DTransformSize8_1DTransformAdst LIBGAV1_CPU_SSE4_1
+#ifndef LIBGAV1_Dsp8bpp_Transform1dSize8_Transform1dAdst
+#define LIBGAV1_Dsp8bpp_Transform1dSize8_Transform1dAdst LIBGAV1_CPU_SSE4_1
#endif
-#ifndef LIBGAV1_Dsp8bpp_1DTransformSize16_1DTransformAdst
-#define LIBGAV1_Dsp8bpp_1DTransformSize16_1DTransformAdst LIBGAV1_CPU_SSE4_1
+#ifndef LIBGAV1_Dsp8bpp_Transform1dSize16_Transform1dAdst
+#define LIBGAV1_Dsp8bpp_Transform1dSize16_Transform1dAdst LIBGAV1_CPU_SSE4_1
#endif
-#ifndef LIBGAV1_Dsp8bpp_1DTransformSize4_1DTransformIdentity
-#define LIBGAV1_Dsp8bpp_1DTransformSize4_1DTransformIdentity LIBGAV1_CPU_SSE4_1
+#ifndef LIBGAV1_Dsp8bpp_Transform1dSize4_Transform1dIdentity
+#define LIBGAV1_Dsp8bpp_Transform1dSize4_Transform1dIdentity LIBGAV1_CPU_SSE4_1
#endif
-#ifndef LIBGAV1_Dsp8bpp_1DTransformSize8_1DTransformIdentity
-#define LIBGAV1_Dsp8bpp_1DTransformSize8_1DTransformIdentity LIBGAV1_CPU_SSE4_1
+#ifndef LIBGAV1_Dsp8bpp_Transform1dSize8_Transform1dIdentity
+#define LIBGAV1_Dsp8bpp_Transform1dSize8_Transform1dIdentity LIBGAV1_CPU_SSE4_1
#endif
-#ifndef LIBGAV1_Dsp8bpp_1DTransformSize16_1DTransformIdentity
-#define LIBGAV1_Dsp8bpp_1DTransformSize16_1DTransformIdentity LIBGAV1_CPU_SSE4_1
+#ifndef LIBGAV1_Dsp8bpp_Transform1dSize16_Transform1dIdentity
+#define LIBGAV1_Dsp8bpp_Transform1dSize16_Transform1dIdentity LIBGAV1_CPU_SSE4_1
#endif
-#ifndef LIBGAV1_Dsp8bpp_1DTransformSize32_1DTransformIdentity
-#define LIBGAV1_Dsp8bpp_1DTransformSize32_1DTransformIdentity LIBGAV1_CPU_SSE4_1
+#ifndef LIBGAV1_Dsp8bpp_Transform1dSize32_Transform1dIdentity
+#define LIBGAV1_Dsp8bpp_Transform1dSize32_Transform1dIdentity LIBGAV1_CPU_SSE4_1
#endif
-#ifndef LIBGAV1_Dsp8bpp_1DTransformSize4_1DTransformWht
-#define LIBGAV1_Dsp8bpp_1DTransformSize4_1DTransformWht LIBGAV1_CPU_SSE4_1
+#ifndef LIBGAV1_Dsp8bpp_Transform1dSize4_Transform1dWht
+#define LIBGAV1_Dsp8bpp_Transform1dSize4_Transform1dWht LIBGAV1_CPU_SSE4_1
#endif
#endif // LIBGAV1_TARGETING_SSE4_1
#endif // LIBGAV1_SRC_DSP_X86_INVERSE_TRANSFORM_SSE4_H_
diff --git a/src/dsp/x86/loop_restoration_10bit_avx2.cc b/src/dsp/x86/loop_restoration_10bit_avx2.cc
index b38f322..daf5c42 100644
--- a/src/dsp/x86/loop_restoration_10bit_avx2.cc
+++ b/src/dsp/x86/loop_restoration_10bit_avx2.cc
@@ -472,11 +472,14 @@ inline void WienerVerticalTap1(const int16_t* wiener_buffer,
}
void WienerFilter_AVX2(
- const RestorationUnitInfo& restoration_info, const void* const source,
- const ptrdiff_t stride, const void* const top_border,
- const ptrdiff_t top_border_stride, const void* const bottom_border,
+ const RestorationUnitInfo& LIBGAV1_RESTRICT restoration_info,
+ const void* LIBGAV1_RESTRICT const source, const ptrdiff_t stride,
+ const void* LIBGAV1_RESTRICT const top_border,
+ const ptrdiff_t top_border_stride,
+ const void* LIBGAV1_RESTRICT const bottom_border,
const ptrdiff_t bottom_border_stride, const int width, const int height,
- RestorationBuffer* const restoration_buffer, void* const dest) {
+ RestorationBuffer* LIBGAV1_RESTRICT const restoration_buffer,
+ void* LIBGAV1_RESTRICT const dest) {
const int16_t* const number_leading_zero_coefficients =
restoration_info.wiener_info.number_leading_zero_coefficients;
const int number_rows_to_skip = std::max(
@@ -3097,11 +3100,14 @@ inline void BoxFilterProcessPass2(const RestorationUnitInfo& restoration_info,
// in the end of each row. It is safe to overwrite the output as it will not be
// part of the visible frame.
void SelfGuidedFilter_AVX2(
- const RestorationUnitInfo& restoration_info, const void* const source,
- const ptrdiff_t stride, const void* const top_border,
- const ptrdiff_t top_border_stride, const void* const bottom_border,
+ const RestorationUnitInfo& LIBGAV1_RESTRICT restoration_info,
+ const void* LIBGAV1_RESTRICT const source, const ptrdiff_t stride,
+ const void* LIBGAV1_RESTRICT const top_border,
+ const ptrdiff_t top_border_stride,
+ const void* LIBGAV1_RESTRICT const bottom_border,
const ptrdiff_t bottom_border_stride, const int width, const int height,
- RestorationBuffer* const restoration_buffer, void* const dest) {
+ RestorationBuffer* LIBGAV1_RESTRICT const restoration_buffer,
+ void* LIBGAV1_RESTRICT const dest) {
const int index = restoration_info.sgr_proj_info.index;
const int radius_pass_0 = kSgrProjParams[index][0]; // 2 or 0
const int radius_pass_1 = kSgrProjParams[index][2]; // 1 or 0
diff --git a/src/dsp/x86/loop_restoration_10bit_sse4.cc b/src/dsp/x86/loop_restoration_10bit_sse4.cc
index 96380e3..6625d51 100644
--- a/src/dsp/x86/loop_restoration_10bit_sse4.cc
+++ b/src/dsp/x86/loop_restoration_10bit_sse4.cc
@@ -429,11 +429,14 @@ inline void WienerVerticalTap1(const int16_t* wiener_buffer,
}
void WienerFilter_SSE4_1(
- const RestorationUnitInfo& restoration_info, const void* const source,
- const ptrdiff_t stride, const void* const top_border,
- const ptrdiff_t top_border_stride, const void* const bottom_border,
+ const RestorationUnitInfo& LIBGAV1_RESTRICT restoration_info,
+ const void* LIBGAV1_RESTRICT const source, const ptrdiff_t stride,
+ const void* LIBGAV1_RESTRICT const top_border,
+ const ptrdiff_t top_border_stride,
+ const void* LIBGAV1_RESTRICT const bottom_border,
const ptrdiff_t bottom_border_stride, const int width, const int height,
- RestorationBuffer* const restoration_buffer, void* const dest) {
+ RestorationBuffer* LIBGAV1_RESTRICT const restoration_buffer,
+ void* LIBGAV1_RESTRICT const dest) {
const int16_t* const number_leading_zero_coefficients =
restoration_info.wiener_info.number_leading_zero_coefficients;
const int number_rows_to_skip = std::max(
@@ -2465,11 +2468,14 @@ inline void BoxFilterProcessPass2(const RestorationUnitInfo& restoration_info,
// in the end of each row. It is safe to overwrite the output as it will not be
// part of the visible frame.
void SelfGuidedFilter_SSE4_1(
- const RestorationUnitInfo& restoration_info, const void* const source,
- const ptrdiff_t stride, const void* const top_border,
- const ptrdiff_t top_border_stride, const void* const bottom_border,
+ const RestorationUnitInfo& LIBGAV1_RESTRICT restoration_info,
+ const void* LIBGAV1_RESTRICT const source, const ptrdiff_t stride,
+ const void* LIBGAV1_RESTRICT const top_border,
+ const ptrdiff_t top_border_stride,
+ const void* LIBGAV1_RESTRICT const bottom_border,
const ptrdiff_t bottom_border_stride, const int width, const int height,
- RestorationBuffer* const restoration_buffer, void* const dest) {
+ RestorationBuffer* LIBGAV1_RESTRICT const restoration_buffer,
+ void* LIBGAV1_RESTRICT const dest) {
const int index = restoration_info.sgr_proj_info.index;
const int radius_pass_0 = kSgrProjParams[index][0]; // 2 or 0
const int radius_pass_1 = kSgrProjParams[index][2]; // 1 or 0
diff --git a/src/dsp/x86/loop_restoration_avx2.cc b/src/dsp/x86/loop_restoration_avx2.cc
index 351a324..30e8a22 100644
--- a/src/dsp/x86/loop_restoration_avx2.cc
+++ b/src/dsp/x86/loop_restoration_avx2.cc
@@ -483,11 +483,14 @@ inline void WienerVerticalTap1(const int16_t* wiener_buffer,
}
void WienerFilter_AVX2(
- const RestorationUnitInfo& restoration_info, const void* const source,
- const ptrdiff_t stride, const void* const top_border,
- const ptrdiff_t top_border_stride, const void* const bottom_border,
+ const RestorationUnitInfo& LIBGAV1_RESTRICT restoration_info,
+ const void* LIBGAV1_RESTRICT const source, const ptrdiff_t stride,
+ const void* LIBGAV1_RESTRICT const top_border,
+ const ptrdiff_t top_border_stride,
+ const void* LIBGAV1_RESTRICT const bottom_border,
const ptrdiff_t bottom_border_stride, const int width, const int height,
- RestorationBuffer* const restoration_buffer, void* const dest) {
+ RestorationBuffer* LIBGAV1_RESTRICT const restoration_buffer,
+ void* LIBGAV1_RESTRICT const dest) {
const int16_t* const number_leading_zero_coefficients =
restoration_info.wiener_info.number_leading_zero_coefficients;
const int number_rows_to_skip = std::max(
@@ -2880,11 +2883,14 @@ inline void BoxFilterProcessPass2(const RestorationUnitInfo& restoration_info,
// in the end of each row. It is safe to overwrite the output as it will not be
// part of the visible frame.
void SelfGuidedFilter_AVX2(
- const RestorationUnitInfo& restoration_info, const void* const source,
- const ptrdiff_t stride, const void* const top_border,
- const ptrdiff_t top_border_stride, const void* const bottom_border,
+ const RestorationUnitInfo& LIBGAV1_RESTRICT restoration_info,
+ const void* LIBGAV1_RESTRICT const source, const ptrdiff_t stride,
+ const void* LIBGAV1_RESTRICT const top_border,
+ const ptrdiff_t top_border_stride,
+ const void* LIBGAV1_RESTRICT const bottom_border,
const ptrdiff_t bottom_border_stride, const int width, const int height,
- RestorationBuffer* const restoration_buffer, void* const dest) {
+ RestorationBuffer* LIBGAV1_RESTRICT const restoration_buffer,
+ void* LIBGAV1_RESTRICT const dest) {
const int index = restoration_info.sgr_proj_info.index;
const int radius_pass_0 = kSgrProjParams[index][0]; // 2 or 0
const int radius_pass_1 = kSgrProjParams[index][2]; // 1 or 0
diff --git a/src/dsp/x86/loop_restoration_sse4.cc b/src/dsp/x86/loop_restoration_sse4.cc
index 273bcc8..3363f0e 100644
--- a/src/dsp/x86/loop_restoration_sse4.cc
+++ b/src/dsp/x86/loop_restoration_sse4.cc
@@ -482,11 +482,14 @@ inline void WienerVerticalTap1(const int16_t* wiener_buffer,
}
void WienerFilter_SSE4_1(
- const RestorationUnitInfo& restoration_info, const void* const source,
- const ptrdiff_t stride, const void* const top_border,
- const ptrdiff_t top_border_stride, const void* const bottom_border,
+ const RestorationUnitInfo& LIBGAV1_RESTRICT restoration_info,
+ const void* LIBGAV1_RESTRICT const source, const ptrdiff_t stride,
+ const void* LIBGAV1_RESTRICT const top_border,
+ const ptrdiff_t top_border_stride,
+ const void* LIBGAV1_RESTRICT const bottom_border,
const ptrdiff_t bottom_border_stride, const int width, const int height,
- RestorationBuffer* const restoration_buffer, void* const dest) {
+ RestorationBuffer* LIBGAV1_RESTRICT const restoration_buffer,
+ void* LIBGAV1_RESTRICT const dest) {
const int16_t* const number_leading_zero_coefficients =
restoration_info.wiener_info.number_leading_zero_coefficients;
const int number_rows_to_skip = std::max(
@@ -2510,11 +2513,14 @@ inline void BoxFilterProcessPass2(const RestorationUnitInfo& restoration_info,
// in the end of each row. It is safe to overwrite the output as it will not be
// part of the visible frame.
void SelfGuidedFilter_SSE4_1(
- const RestorationUnitInfo& restoration_info, const void* const source,
- const ptrdiff_t stride, const void* const top_border,
- const ptrdiff_t top_border_stride, const void* const bottom_border,
+ const RestorationUnitInfo& LIBGAV1_RESTRICT restoration_info,
+ const void* LIBGAV1_RESTRICT const source, const ptrdiff_t stride,
+ const void* LIBGAV1_RESTRICT const top_border,
+ const ptrdiff_t top_border_stride,
+ const void* LIBGAV1_RESTRICT const bottom_border,
const ptrdiff_t bottom_border_stride, const int width, const int height,
- RestorationBuffer* const restoration_buffer, void* const dest) {
+ RestorationBuffer* LIBGAV1_RESTRICT const restoration_buffer,
+ void* LIBGAV1_RESTRICT const dest) {
const int index = restoration_info.sgr_proj_info.index;
const int radius_pass_0 = kSgrProjParams[index][0]; // 2 or 0
const int radius_pass_1 = kSgrProjParams[index][2]; // 1 or 0
diff --git a/src/dsp/x86/mask_blend_sse4.cc b/src/dsp/x86/mask_blend_sse4.cc
index 2e836af..a18444b 100644
--- a/src/dsp/x86/mask_blend_sse4.cc
+++ b/src/dsp/x86/mask_blend_sse4.cc
@@ -36,7 +36,8 @@ namespace {
// Width can only be 4 when it is subsampled from a block of width 8, hence
// subsampling_x is always 1 when this function is called.
template <int subsampling_x, int subsampling_y>
-inline __m128i GetMask4x2(const uint8_t* mask, ptrdiff_t mask_stride) {
+inline __m128i GetMask4x2(const uint8_t* LIBGAV1_RESTRICT mask,
+ ptrdiff_t mask_stride) {
if (subsampling_x == 1) {
const __m128i mask_val_0 = _mm_cvtepu8_epi16(LoadLo8(mask));
const __m128i mask_val_1 =
@@ -62,7 +63,8 @@ inline __m128i GetMask4x2(const uint8_t* mask, ptrdiff_t mask_stride) {
// 16-bit is also the lowest packing for hadd, but without subsampling there is
// an unfortunate conversion required.
template <int subsampling_x, int subsampling_y>
-inline __m128i GetMask8(const uint8_t* mask, ptrdiff_t stride) {
+inline __m128i GetMask8(const uint8_t* LIBGAV1_RESTRICT mask,
+ ptrdiff_t stride) {
if (subsampling_x == 1) {
const __m128i row_vals = LoadUnaligned16(mask);
@@ -89,7 +91,8 @@ inline __m128i GetMask8(const uint8_t* mask, ptrdiff_t stride) {
// when is_inter_intra is true, the prediction values are brought to 8-bit
// packing as well.
template <int subsampling_x, int subsampling_y>
-inline __m128i GetInterIntraMask8(const uint8_t* mask, ptrdiff_t stride) {
+inline __m128i GetInterIntraMask8(const uint8_t* LIBGAV1_RESTRICT mask,
+ ptrdiff_t stride) {
if (subsampling_x == 1) {
const __m128i row_vals = LoadUnaligned16(mask);
@@ -116,10 +119,11 @@ inline __m128i GetInterIntraMask8(const uint8_t* mask, ptrdiff_t stride) {
return mask_val;
}
-inline void WriteMaskBlendLine4x2(const int16_t* const pred_0,
- const int16_t* const pred_1,
+inline void WriteMaskBlendLine4x2(const int16_t* LIBGAV1_RESTRICT const pred_0,
+ const int16_t* LIBGAV1_RESTRICT const pred_1,
const __m128i pred_mask_0,
- const __m128i pred_mask_1, uint8_t* dst,
+ const __m128i pred_mask_1,
+ uint8_t* LIBGAV1_RESTRICT dst,
const ptrdiff_t dst_stride) {
const __m128i pred_val_0 = LoadAligned16(pred_0);
const __m128i pred_val_1 = LoadAligned16(pred_1);
@@ -145,9 +149,11 @@ inline void WriteMaskBlendLine4x2(const int16_t* const pred_0,
}
template <int subsampling_x, int subsampling_y>
-inline void MaskBlending4x4_SSE4(const int16_t* pred_0, const int16_t* pred_1,
- const uint8_t* mask,
- const ptrdiff_t mask_stride, uint8_t* dst,
+inline void MaskBlending4x4_SSE4(const int16_t* LIBGAV1_RESTRICT pred_0,
+ const int16_t* LIBGAV1_RESTRICT pred_1,
+ const uint8_t* LIBGAV1_RESTRICT mask,
+ const ptrdiff_t mask_stride,
+ uint8_t* LIBGAV1_RESTRICT dst,
const ptrdiff_t dst_stride) {
const __m128i mask_inverter = _mm_set1_epi16(64);
__m128i pred_mask_0 =
@@ -167,10 +173,12 @@ inline void MaskBlending4x4_SSE4(const int16_t* pred_0, const int16_t* pred_1,
}
template <int subsampling_x, int subsampling_y>
-inline void MaskBlending4xH_SSE4(const int16_t* pred_0, const int16_t* pred_1,
- const uint8_t* const mask_ptr,
+inline void MaskBlending4xH_SSE4(const int16_t* LIBGAV1_RESTRICT pred_0,
+ const int16_t* LIBGAV1_RESTRICT pred_1,
+ const uint8_t* LIBGAV1_RESTRICT const mask_ptr,
const ptrdiff_t mask_stride, const int height,
- uint8_t* dst, const ptrdiff_t dst_stride) {
+ uint8_t* LIBGAV1_RESTRICT dst,
+ const ptrdiff_t dst_stride) {
const uint8_t* mask = mask_ptr;
if (height == 4) {
MaskBlending4x4_SSE4<subsampling_x, subsampling_y>(
@@ -222,11 +230,12 @@ inline void MaskBlending4xH_SSE4(const int16_t* pred_0, const int16_t* pred_1,
}
template <int subsampling_x, int subsampling_y>
-inline void MaskBlend_SSE4(const void* prediction_0, const void* prediction_1,
+inline void MaskBlend_SSE4(const void* LIBGAV1_RESTRICT prediction_0,
+ const void* LIBGAV1_RESTRICT prediction_1,
const ptrdiff_t /*prediction_stride_1*/,
- const uint8_t* const mask_ptr,
+ const uint8_t* LIBGAV1_RESTRICT const mask_ptr,
const ptrdiff_t mask_stride, const int width,
- const int height, void* dest,
+ const int height, void* LIBGAV1_RESTRICT dest,
const ptrdiff_t dst_stride) {
auto* dst = static_cast<uint8_t*>(dest);
const auto* pred_0 = static_cast<const int16_t*>(prediction_0);
@@ -277,11 +286,10 @@ inline void MaskBlend_SSE4(const void* prediction_0, const void* prediction_1,
} while (++y < height);
}
-inline void InterIntraWriteMaskBlendLine8bpp4x2(const uint8_t* const pred_0,
- uint8_t* const pred_1,
- const ptrdiff_t pred_stride_1,
- const __m128i pred_mask_0,
- const __m128i pred_mask_1) {
+inline void InterIntraWriteMaskBlendLine8bpp4x2(
+ const uint8_t* LIBGAV1_RESTRICT const pred_0,
+ uint8_t* LIBGAV1_RESTRICT const pred_1, const ptrdiff_t pred_stride_1,
+ const __m128i pred_mask_0, const __m128i pred_mask_1) {
const __m128i pred_mask = _mm_unpacklo_epi8(pred_mask_0, pred_mask_1);
const __m128i pred_val_0 = LoadLo8(pred_0);
@@ -301,11 +309,10 @@ inline void InterIntraWriteMaskBlendLine8bpp4x2(const uint8_t* const pred_0,
}
template <int subsampling_x, int subsampling_y>
-inline void InterIntraMaskBlending8bpp4x4_SSE4(const uint8_t* pred_0,
- uint8_t* pred_1,
- const ptrdiff_t pred_stride_1,
- const uint8_t* mask,
- const ptrdiff_t mask_stride) {
+inline void InterIntraMaskBlending8bpp4x4_SSE4(
+ const uint8_t* LIBGAV1_RESTRICT pred_0, uint8_t* LIBGAV1_RESTRICT pred_1,
+ const ptrdiff_t pred_stride_1, const uint8_t* LIBGAV1_RESTRICT mask,
+ const ptrdiff_t mask_stride) {
const __m128i mask_inverter = _mm_set1_epi8(64);
const __m128i pred_mask_u16_first =
GetMask4x2<subsampling_x, subsampling_y>(mask, mask_stride);
@@ -328,12 +335,11 @@ inline void InterIntraMaskBlending8bpp4x4_SSE4(const uint8_t* pred_0,
}
template <int subsampling_x, int subsampling_y>
-inline void InterIntraMaskBlending8bpp4xH_SSE4(const uint8_t* pred_0,
- uint8_t* pred_1,
- const ptrdiff_t pred_stride_1,
- const uint8_t* const mask_ptr,
- const ptrdiff_t mask_stride,
- const int height) {
+inline void InterIntraMaskBlending8bpp4xH_SSE4(
+ const uint8_t* LIBGAV1_RESTRICT pred_0, uint8_t* LIBGAV1_RESTRICT pred_1,
+ const ptrdiff_t pred_stride_1,
+ const uint8_t* LIBGAV1_RESTRICT const mask_ptr, const ptrdiff_t mask_stride,
+ const int height) {
const uint8_t* mask = mask_ptr;
if (height == 4) {
InterIntraMaskBlending8bpp4x4_SSE4<subsampling_x, subsampling_y>(
@@ -358,12 +364,11 @@ inline void InterIntraMaskBlending8bpp4xH_SSE4(const uint8_t* pred_0,
}
template <int subsampling_x, int subsampling_y>
-void InterIntraMaskBlend8bpp_SSE4(const uint8_t* prediction_0,
- uint8_t* prediction_1,
- const ptrdiff_t prediction_stride_1,
- const uint8_t* const mask_ptr,
- const ptrdiff_t mask_stride, const int width,
- const int height) {
+void InterIntraMaskBlend8bpp_SSE4(
+ const uint8_t* LIBGAV1_RESTRICT prediction_0,
+ uint8_t* LIBGAV1_RESTRICT prediction_1, const ptrdiff_t prediction_stride_1,
+ const uint8_t* LIBGAV1_RESTRICT const mask_ptr, const ptrdiff_t mask_stride,
+ const int width, const int height) {
if (width == 4) {
InterIntraMaskBlending8bpp4xH_SSE4<subsampling_x, subsampling_y>(
prediction_0, prediction_1, prediction_stride_1, mask_ptr, mask_stride,
@@ -503,10 +508,11 @@ inline __m128i GetMask8(const uint8_t* mask, const ptrdiff_t stride,
}
inline void WriteMaskBlendLine10bpp4x2_SSE4_1(
- const uint16_t* pred_0, const uint16_t* pred_1,
- const ptrdiff_t pred_stride_1, const __m128i& pred_mask_0,
- const __m128i& pred_mask_1, const __m128i& offset, const __m128i& max,
- const __m128i& shift4, uint16_t* dst, const ptrdiff_t dst_stride) {
+ const uint16_t* LIBGAV1_RESTRICT pred_0,
+ const uint16_t* LIBGAV1_RESTRICT pred_1, const ptrdiff_t pred_stride_1,
+ const __m128i& pred_mask_0, const __m128i& pred_mask_1,
+ const __m128i& offset, const __m128i& max, const __m128i& shift4,
+ uint16_t* LIBGAV1_RESTRICT dst, const ptrdiff_t dst_stride) {
const __m128i pred_val_0 = LoadUnaligned16(pred_0);
const __m128i pred_val_1 = LoadHi8(LoadLo8(pred_1), pred_1 + pred_stride_1);
@@ -544,11 +550,12 @@ inline void WriteMaskBlendLine10bpp4x2_SSE4_1(
}
template <int subsampling_x, int subsampling_y>
-inline void MaskBlend10bpp4x4_SSE4_1(const uint16_t* pred_0,
- const uint16_t* pred_1,
+inline void MaskBlend10bpp4x4_SSE4_1(const uint16_t* LIBGAV1_RESTRICT pred_0,
+ const uint16_t* LIBGAV1_RESTRICT pred_1,
const ptrdiff_t pred_stride_1,
- const uint8_t* mask,
- const ptrdiff_t mask_stride, uint16_t* dst,
+ const uint8_t* LIBGAV1_RESTRICT mask,
+ const ptrdiff_t mask_stride,
+ uint16_t* LIBGAV1_RESTRICT dst,
const ptrdiff_t dst_stride) {
const __m128i mask_inverter = _mm_set1_epi16(kMaskInverse);
const __m128i zero = _mm_setzero_si128();
@@ -575,13 +582,12 @@ inline void MaskBlend10bpp4x4_SSE4_1(const uint16_t* pred_0,
}
template <int subsampling_x, int subsampling_y>
-inline void MaskBlend10bpp4xH_SSE4_1(const uint16_t* pred_0,
- const uint16_t* pred_1,
- const ptrdiff_t pred_stride_1,
- const uint8_t* const mask_ptr,
- const ptrdiff_t mask_stride,
- const int height, uint16_t* dst,
- const ptrdiff_t dst_stride) {
+inline void MaskBlend10bpp4xH_SSE4_1(
+ const uint16_t* LIBGAV1_RESTRICT pred_0,
+ const uint16_t* LIBGAV1_RESTRICT pred_1, const ptrdiff_t pred_stride_1,
+ const uint8_t* LIBGAV1_RESTRICT const mask_ptr, const ptrdiff_t mask_stride,
+ const int height, uint16_t* LIBGAV1_RESTRICT dst,
+ const ptrdiff_t dst_stride) {
const uint8_t* mask = mask_ptr;
if (height == 4) {
MaskBlend10bpp4x4_SSE4_1<subsampling_x, subsampling_y>(
@@ -648,13 +654,13 @@ inline void MaskBlend10bpp4xH_SSE4_1(const uint16_t* pred_0,
}
template <int subsampling_x, int subsampling_y>
-inline void MaskBlend10bpp_SSE4_1(const void* prediction_0,
- const void* prediction_1,
- const ptrdiff_t prediction_stride_1,
- const uint8_t* const mask_ptr,
- const ptrdiff_t mask_stride, const int width,
- const int height, void* dest,
- const ptrdiff_t dest_stride) {
+inline void MaskBlend10bpp_SSE4_1(
+ const void* LIBGAV1_RESTRICT prediction_0,
+ const void* LIBGAV1_RESTRICT prediction_1,
+ const ptrdiff_t prediction_stride_1,
+ const uint8_t* LIBGAV1_RESTRICT const mask_ptr, const ptrdiff_t mask_stride,
+ const int width, const int height, void* LIBGAV1_RESTRICT dest,
+ const ptrdiff_t dest_stride) {
auto* dst = static_cast<uint16_t*>(dest);
const ptrdiff_t dst_stride = dest_stride / sizeof(dst[0]);
const auto* pred_0 = static_cast<const uint16_t*>(prediction_0);
@@ -725,10 +731,11 @@ inline void MaskBlend10bpp_SSE4_1(const void* prediction_0,
}
inline void InterIntraWriteMaskBlendLine10bpp4x2_SSE4_1(
- const uint16_t* prediction_0, const uint16_t* prediction_1,
+ const uint16_t* LIBGAV1_RESTRICT prediction_0,
+ const uint16_t* LIBGAV1_RESTRICT prediction_1,
const ptrdiff_t pred_stride_1, const __m128i& pred_mask_0,
- const __m128i& pred_mask_1, const __m128i& shift6, uint16_t* dst,
- const ptrdiff_t dst_stride) {
+ const __m128i& pred_mask_1, const __m128i& shift6,
+ uint16_t* LIBGAV1_RESTRICT dst, const ptrdiff_t dst_stride) {
const __m128i pred_val_0 = LoadUnaligned16(prediction_0);
const __m128i pred_val_1 =
LoadHi8(LoadLo8(prediction_1), prediction_1 + pred_stride_1);
@@ -751,9 +758,10 @@ inline void InterIntraWriteMaskBlendLine10bpp4x2_SSE4_1(
template <int subsampling_x, int subsampling_y>
inline void InterIntraMaskBlend10bpp4x4_SSE4_1(
- const uint16_t* pred_0, const uint16_t* pred_1,
- const ptrdiff_t pred_stride_1, const uint8_t* mask,
- const ptrdiff_t mask_stride, uint16_t* dst, const ptrdiff_t dst_stride) {
+ const uint16_t* LIBGAV1_RESTRICT pred_0,
+ const uint16_t* LIBGAV1_RESTRICT pred_1, const ptrdiff_t pred_stride_1,
+ const uint8_t* LIBGAV1_RESTRICT mask, const ptrdiff_t mask_stride,
+ uint16_t* LIBGAV1_RESTRICT dst, const ptrdiff_t dst_stride) {
const __m128i mask_inverter = _mm_set1_epi16(kMaskInverse);
const __m128i shift6 = _mm_set1_epi32((1 << 6) >> 1);
const __m128i zero = _mm_setzero_si128();
@@ -777,13 +785,12 @@ inline void InterIntraMaskBlend10bpp4x4_SSE4_1(
}
template <int subsampling_x, int subsampling_y>
-inline void InterIntraMaskBlend10bpp4xH_SSE4_1(const uint16_t* pred_0,
- const uint16_t* pred_1,
- const ptrdiff_t pred_stride_1,
- const uint8_t* const mask_ptr,
- const ptrdiff_t mask_stride,
- const int height, uint16_t* dst,
- const ptrdiff_t dst_stride) {
+inline void InterIntraMaskBlend10bpp4xH_SSE4_1(
+ const uint16_t* LIBGAV1_RESTRICT pred_0,
+ const uint16_t* LIBGAV1_RESTRICT pred_1, const ptrdiff_t pred_stride_1,
+ const uint8_t* LIBGAV1_RESTRICT const mask_ptr, const ptrdiff_t mask_stride,
+ const int height, uint16_t* LIBGAV1_RESTRICT dst,
+ const ptrdiff_t dst_stride) {
const uint8_t* mask = mask_ptr;
if (height == 4) {
InterIntraMaskBlend10bpp4x4_SSE4_1<subsampling_x, subsampling_y>(
@@ -848,9 +855,11 @@ inline void InterIntraMaskBlend10bpp4xH_SSE4_1(const uint16_t* pred_0,
template <int subsampling_x, int subsampling_y>
inline void InterIntraMaskBlend10bpp_SSE4_1(
- const void* prediction_0, const void* prediction_1,
- const ptrdiff_t prediction_stride_1, const uint8_t* const mask_ptr,
- const ptrdiff_t mask_stride, const int width, const int height, void* dest,
+ const void* LIBGAV1_RESTRICT prediction_0,
+ const void* LIBGAV1_RESTRICT prediction_1,
+ const ptrdiff_t prediction_stride_1,
+ const uint8_t* LIBGAV1_RESTRICT const mask_ptr, const ptrdiff_t mask_stride,
+ const int width, const int height, void* LIBGAV1_RESTRICT dest,
const ptrdiff_t dest_stride) {
auto* dst = static_cast<uint16_t*>(dest);
const ptrdiff_t dst_stride = dest_stride / sizeof(dst[0]);
diff --git a/src/dsp/x86/motion_field_projection_sse4.cc b/src/dsp/x86/motion_field_projection_sse4.cc
index e3f2cce..5641531 100644
--- a/src/dsp/x86/motion_field_projection_sse4.cc
+++ b/src/dsp/x86/motion_field_projection_sse4.cc
@@ -360,27 +360,12 @@ void MotionFieldProjectionKernel_SSE4_1(
} while (++y8 < y8_end);
}
-void Init8bpp() {
- Dsp* const dsp = dsp_internal::GetWritableDspTable(kBitdepth8);
- assert(dsp != nullptr);
- dsp->motion_field_projection_kernel = MotionFieldProjectionKernel_SSE4_1;
-}
-
-#if LIBGAV1_MAX_BITDEPTH >= 10
-void Init10bpp() {
- Dsp* const dsp = dsp_internal::GetWritableDspTable(kBitdepth10);
- assert(dsp != nullptr);
- dsp->motion_field_projection_kernel = MotionFieldProjectionKernel_SSE4_1;
-}
-#endif
-
} // namespace
void MotionFieldProjectionInit_SSE4_1() {
- Init8bpp();
-#if LIBGAV1_MAX_BITDEPTH >= 10
- Init10bpp();
-#endif
+ Dsp* const dsp = dsp_internal::GetWritableDspTable(kBitdepth8);
+ assert(dsp != nullptr);
+ dsp->motion_field_projection_kernel = MotionFieldProjectionKernel_SSE4_1;
}
} // namespace dsp
diff --git a/src/dsp/x86/motion_vector_search_sse4.cc b/src/dsp/x86/motion_vector_search_sse4.cc
index 7f5f035..dacc6ec 100644
--- a/src/dsp/x86/motion_vector_search_sse4.cc
+++ b/src/dsp/x86/motion_vector_search_sse4.cc
@@ -64,7 +64,7 @@ inline __m128i MvProjectionClip(const __m128i mvs[2],
}
inline __m128i MvProjectionCompoundClip(
- const MotionVector* const temporal_mvs,
+ const MotionVector* LIBGAV1_RESTRICT const temporal_mvs,
const int8_t temporal_reference_offsets[2],
const int reference_offsets[2]) {
const auto* const tmvs = reinterpret_cast<const int32_t*>(temporal_mvs);
@@ -83,8 +83,8 @@ inline __m128i MvProjectionCompoundClip(
}
inline __m128i MvProjectionSingleClip(
- const MotionVector* const temporal_mvs,
- const int8_t* const temporal_reference_offsets,
+ const MotionVector* LIBGAV1_RESTRICT const temporal_mvs,
+ const int8_t* LIBGAV1_RESTRICT const temporal_reference_offsets,
const int reference_offset) {
const auto* const tmvs = reinterpret_cast<const int16_t*>(temporal_mvs);
const __m128i temporal_mv = LoadAligned16(tmvs);
@@ -126,9 +126,10 @@ inline void ForceInteger(const __m128i mv, void* const candidate_mvs) {
}
void MvProjectionCompoundLowPrecision_SSE4_1(
- const MotionVector* temporal_mvs, const int8_t* temporal_reference_offsets,
+ const MotionVector* LIBGAV1_RESTRICT temporal_mvs,
+ const int8_t* LIBGAV1_RESTRICT temporal_reference_offsets,
const int reference_offsets[2], const int count,
- CompoundMotionVector* candidate_mvs) {
+ CompoundMotionVector* LIBGAV1_RESTRICT candidate_mvs) {
// |reference_offsets| non-zero check usually equals true and is ignored.
// To facilitate the compilers, make a local copy of |reference_offsets|.
const int offsets[2] = {reference_offsets[0], reference_offsets[1]};
@@ -143,9 +144,10 @@ void MvProjectionCompoundLowPrecision_SSE4_1(
}
void MvProjectionCompoundForceInteger_SSE4_1(
- const MotionVector* temporal_mvs, const int8_t* temporal_reference_offsets,
+ const MotionVector* LIBGAV1_RESTRICT temporal_mvs,
+ const int8_t* LIBGAV1_RESTRICT temporal_reference_offsets,
const int reference_offsets[2], const int count,
- CompoundMotionVector* candidate_mvs) {
+ CompoundMotionVector* LIBGAV1_RESTRICT candidate_mvs) {
// |reference_offsets| non-zero check usually equals true and is ignored.
// To facilitate the compilers, make a local copy of |reference_offsets|.
const int offsets[2] = {reference_offsets[0], reference_offsets[1]};
@@ -160,9 +162,10 @@ void MvProjectionCompoundForceInteger_SSE4_1(
}
void MvProjectionCompoundHighPrecision_SSE4_1(
- const MotionVector* temporal_mvs, const int8_t* temporal_reference_offsets,
+ const MotionVector* LIBGAV1_RESTRICT temporal_mvs,
+ const int8_t* LIBGAV1_RESTRICT temporal_reference_offsets,
const int reference_offsets[2], const int count,
- CompoundMotionVector* candidate_mvs) {
+ CompoundMotionVector* LIBGAV1_RESTRICT candidate_mvs) {
// |reference_offsets| non-zero check usually equals true and is ignored.
// To facilitate the compilers, make a local copy of |reference_offsets|.
const int offsets[2] = {reference_offsets[0], reference_offsets[1]};
@@ -177,8 +180,10 @@ void MvProjectionCompoundHighPrecision_SSE4_1(
}
void MvProjectionSingleLowPrecision_SSE4_1(
- const MotionVector* temporal_mvs, const int8_t* temporal_reference_offsets,
- const int reference_offset, const int count, MotionVector* candidate_mvs) {
+ const MotionVector* LIBGAV1_RESTRICT temporal_mvs,
+ const int8_t* LIBGAV1_RESTRICT temporal_reference_offsets,
+ const int reference_offset, const int count,
+ MotionVector* LIBGAV1_RESTRICT candidate_mvs) {
// Up to three more elements could be calculated.
int i = 0;
do {
@@ -190,8 +195,10 @@ void MvProjectionSingleLowPrecision_SSE4_1(
}
void MvProjectionSingleForceInteger_SSE4_1(
- const MotionVector* temporal_mvs, const int8_t* temporal_reference_offsets,
- const int reference_offset, const int count, MotionVector* candidate_mvs) {
+ const MotionVector* LIBGAV1_RESTRICT temporal_mvs,
+ const int8_t* LIBGAV1_RESTRICT temporal_reference_offsets,
+ const int reference_offset, const int count,
+ MotionVector* LIBGAV1_RESTRICT candidate_mvs) {
// Up to three more elements could be calculated.
int i = 0;
do {
@@ -203,8 +210,10 @@ void MvProjectionSingleForceInteger_SSE4_1(
}
void MvProjectionSingleHighPrecision_SSE4_1(
- const MotionVector* temporal_mvs, const int8_t* temporal_reference_offsets,
- const int reference_offset, const int count, MotionVector* candidate_mvs) {
+ const MotionVector* LIBGAV1_RESTRICT temporal_mvs,
+ const int8_t* LIBGAV1_RESTRICT temporal_reference_offsets,
+ const int reference_offset, const int count,
+ MotionVector* LIBGAV1_RESTRICT candidate_mvs) {
// Up to three more elements could be calculated.
int i = 0;
do {
@@ -215,20 +224,10 @@ void MvProjectionSingleHighPrecision_SSE4_1(
} while (i < count);
}
-void Init8bpp() {
- Dsp* const dsp = dsp_internal::GetWritableDspTable(kBitdepth8);
- assert(dsp != nullptr);
- dsp->mv_projection_compound[0] = MvProjectionCompoundLowPrecision_SSE4_1;
- dsp->mv_projection_compound[1] = MvProjectionCompoundForceInteger_SSE4_1;
- dsp->mv_projection_compound[2] = MvProjectionCompoundHighPrecision_SSE4_1;
- dsp->mv_projection_single[0] = MvProjectionSingleLowPrecision_SSE4_1;
- dsp->mv_projection_single[1] = MvProjectionSingleForceInteger_SSE4_1;
- dsp->mv_projection_single[2] = MvProjectionSingleHighPrecision_SSE4_1;
-}
+} // namespace
-#if LIBGAV1_MAX_BITDEPTH >= 10
-void Init10bpp() {
- Dsp* const dsp = dsp_internal::GetWritableDspTable(kBitdepth10);
+void MotionVectorSearchInit_SSE4_1() {
+ Dsp* const dsp = dsp_internal::GetWritableDspTable(kBitdepth8);
assert(dsp != nullptr);
dsp->mv_projection_compound[0] = MvProjectionCompoundLowPrecision_SSE4_1;
dsp->mv_projection_compound[1] = MvProjectionCompoundForceInteger_SSE4_1;
@@ -237,16 +236,6 @@ void Init10bpp() {
dsp->mv_projection_single[1] = MvProjectionSingleForceInteger_SSE4_1;
dsp->mv_projection_single[2] = MvProjectionSingleHighPrecision_SSE4_1;
}
-#endif
-
-} // namespace
-
-void MotionVectorSearchInit_SSE4_1() {
- Init8bpp();
-#if LIBGAV1_MAX_BITDEPTH >= 10
- Init10bpp();
-#endif
-}
} // namespace dsp
} // namespace libgav1
diff --git a/src/dsp/x86/obmc_sse4.cc b/src/dsp/x86/obmc_sse4.cc
index c34a7f7..8ce23b4 100644
--- a/src/dsp/x86/obmc_sse4.cc
+++ b/src/dsp/x86/obmc_sse4.cc
@@ -37,8 +37,9 @@ namespace {
#include "src/dsp/obmc.inc"
inline void OverlapBlendFromLeft2xH_SSE4_1(
- uint8_t* const prediction, const ptrdiff_t prediction_stride,
- const int height, const uint8_t* const obmc_prediction,
+ uint8_t* LIBGAV1_RESTRICT const prediction,
+ const ptrdiff_t prediction_stride, const int height,
+ const uint8_t* LIBGAV1_RESTRICT const obmc_prediction,
const ptrdiff_t obmc_prediction_stride) {
uint8_t* pred = prediction;
const uint8_t* obmc_pred = obmc_prediction;
@@ -68,8 +69,9 @@ inline void OverlapBlendFromLeft2xH_SSE4_1(
}
inline void OverlapBlendFromLeft4xH_SSE4_1(
- uint8_t* const prediction, const ptrdiff_t prediction_stride,
- const int height, const uint8_t* const obmc_prediction,
+ uint8_t* LIBGAV1_RESTRICT const prediction,
+ const ptrdiff_t prediction_stride, const int height,
+ const uint8_t* LIBGAV1_RESTRICT const obmc_prediction,
const ptrdiff_t obmc_prediction_stride) {
uint8_t* pred = prediction;
const uint8_t* obmc_pred = obmc_prediction;
@@ -106,8 +108,9 @@ inline void OverlapBlendFromLeft4xH_SSE4_1(
}
inline void OverlapBlendFromLeft8xH_SSE4_1(
- uint8_t* const prediction, const ptrdiff_t prediction_stride,
- const int height, const uint8_t* const obmc_prediction,
+ uint8_t* LIBGAV1_RESTRICT const prediction,
+ const ptrdiff_t prediction_stride, const int height,
+ const uint8_t* LIBGAV1_RESTRICT const obmc_prediction,
const ptrdiff_t obmc_prediction_stride) {
uint8_t* pred = prediction;
const uint8_t* obmc_pred = obmc_prediction;
@@ -130,13 +133,15 @@ inline void OverlapBlendFromLeft8xH_SSE4_1(
} while (--y != 0);
}
-void OverlapBlendFromLeft_SSE4_1(void* const prediction,
- const ptrdiff_t prediction_stride,
- const int width, const int height,
- const void* const obmc_prediction,
- const ptrdiff_t obmc_prediction_stride) {
+void OverlapBlendFromLeft_SSE4_1(
+ void* LIBGAV1_RESTRICT const prediction, const ptrdiff_t prediction_stride,
+ const int width, const int height,
+ const void* LIBGAV1_RESTRICT const obmc_prediction,
+ const ptrdiff_t obmc_prediction_stride) {
auto* pred = static_cast<uint8_t*>(prediction);
const auto* obmc_pred = static_cast<const uint8_t*>(obmc_prediction);
+ assert(width >= 2);
+ assert(height >= 4);
if (width == 2) {
OverlapBlendFromLeft2xH_SSE4_1(pred, prediction_stride, height, obmc_pred,
@@ -185,8 +190,9 @@ void OverlapBlendFromLeft_SSE4_1(void* const prediction,
}
inline void OverlapBlendFromTop4xH_SSE4_1(
- uint8_t* const prediction, const ptrdiff_t prediction_stride,
- const int height, const uint8_t* const obmc_prediction,
+ uint8_t* LIBGAV1_RESTRICT const prediction,
+ const ptrdiff_t prediction_stride, const int height,
+ const uint8_t* LIBGAV1_RESTRICT const obmc_prediction,
const ptrdiff_t obmc_prediction_stride) {
uint8_t* pred = prediction;
const uint8_t* obmc_pred = obmc_prediction;
@@ -227,8 +233,9 @@ inline void OverlapBlendFromTop4xH_SSE4_1(
}
inline void OverlapBlendFromTop8xH_SSE4_1(
- uint8_t* const prediction, const ptrdiff_t prediction_stride,
- const int height, const uint8_t* const obmc_prediction,
+ uint8_t* LIBGAV1_RESTRICT const prediction,
+ const ptrdiff_t prediction_stride, const int height,
+ const uint8_t* LIBGAV1_RESTRICT const obmc_prediction,
const ptrdiff_t obmc_prediction_stride) {
uint8_t* pred = prediction;
const uint8_t* obmc_pred = obmc_prediction;
@@ -253,15 +260,17 @@ inline void OverlapBlendFromTop8xH_SSE4_1(
} while (--y != 0);
}
-void OverlapBlendFromTop_SSE4_1(void* const prediction,
- const ptrdiff_t prediction_stride,
- const int width, const int height,
- const void* const obmc_prediction,
- const ptrdiff_t obmc_prediction_stride) {
+void OverlapBlendFromTop_SSE4_1(
+ void* LIBGAV1_RESTRICT const prediction, const ptrdiff_t prediction_stride,
+ const int width, const int height,
+ const void* LIBGAV1_RESTRICT const obmc_prediction,
+ const ptrdiff_t obmc_prediction_stride) {
auto* pred = static_cast<uint8_t*>(prediction);
const auto* obmc_pred = static_cast<const uint8_t*>(obmc_prediction);
+ assert(width >= 4);
+ assert(height >= 2);
- if (width <= 4) {
+ if (width == 4) {
OverlapBlendFromTop4xH_SSE4_1(pred, prediction_stride, height, obmc_pred,
obmc_prediction_stride);
return;
@@ -323,8 +332,9 @@ namespace {
constexpr int kRoundBitsObmcBlend = 6;
inline void OverlapBlendFromLeft2xH_SSE4_1(
- uint16_t* const prediction, const ptrdiff_t pred_stride, const int height,
- const uint16_t* const obmc_prediction, const ptrdiff_t obmc_pred_stride) {
+ uint16_t* LIBGAV1_RESTRICT const prediction, const ptrdiff_t pred_stride,
+ const int height, const uint16_t* LIBGAV1_RESTRICT const obmc_prediction,
+ const ptrdiff_t obmc_pred_stride) {
uint16_t* pred = prediction;
const uint16_t* obmc_pred = obmc_prediction;
const ptrdiff_t pred_stride2 = pred_stride << 1;
@@ -353,8 +363,9 @@ inline void OverlapBlendFromLeft2xH_SSE4_1(
}
inline void OverlapBlendFromLeft4xH_SSE4_1(
- uint16_t* const prediction, const ptrdiff_t pred_stride, const int height,
- const uint16_t* const obmc_prediction, const ptrdiff_t obmc_pred_stride) {
+ uint16_t* LIBGAV1_RESTRICT const prediction, const ptrdiff_t pred_stride,
+ const int height, const uint16_t* LIBGAV1_RESTRICT const obmc_prediction,
+ const ptrdiff_t obmc_pred_stride) {
uint16_t* pred = prediction;
const uint16_t* obmc_pred = obmc_prediction;
const ptrdiff_t pred_stride2 = pred_stride << 1;
@@ -385,16 +396,18 @@ inline void OverlapBlendFromLeft4xH_SSE4_1(
} while (y != 0);
}
-void OverlapBlendFromLeft10bpp_SSE4_1(void* const prediction,
- const ptrdiff_t prediction_stride,
- const int width, const int height,
- const void* const obmc_prediction,
- const ptrdiff_t obmc_prediction_stride) {
+void OverlapBlendFromLeft10bpp_SSE4_1(
+ void* LIBGAV1_RESTRICT const prediction, const ptrdiff_t prediction_stride,
+ const int width, const int height,
+ const void* LIBGAV1_RESTRICT const obmc_prediction,
+ const ptrdiff_t obmc_prediction_stride) {
auto* pred = static_cast<uint16_t*>(prediction);
const auto* obmc_pred = static_cast<const uint16_t*>(obmc_prediction);
const ptrdiff_t pred_stride = prediction_stride / sizeof(pred[0]);
const ptrdiff_t obmc_pred_stride =
obmc_prediction_stride / sizeof(obmc_pred[0]);
+ assert(width >= 2);
+ assert(height >= 4);
if (width == 2) {
OverlapBlendFromLeft2xH_SSE4_1(pred, pred_stride, height, obmc_pred,
@@ -437,54 +450,10 @@ void OverlapBlendFromLeft10bpp_SSE4_1(void* const prediction,
} while (x < width);
}
-inline void OverlapBlendFromTop2xH_SSE4_1(uint16_t* const prediction,
- const ptrdiff_t pred_stride,
- const int height,
- const uint16_t* const obmc_prediction,
- const ptrdiff_t obmc_pred_stride) {
- uint16_t* pred = prediction;
- const uint16_t* obmc_pred = obmc_prediction;
- const __m128i mask_inverter = _mm_set1_epi16(64);
- const __m128i mask_shuffler = _mm_set_epi32(0x01010101, 0x01010101, 0, 0);
- const __m128i mask_preinverter = _mm_set1_epi16(-256 | 1);
- const uint8_t* mask = kObmcMask + height - 2;
- const int compute_height =
- height - (height >> 2); // compute_height based on 8-bit opt
- const ptrdiff_t pred_stride2 = pred_stride << 1;
- const ptrdiff_t obmc_pred_stride2 = obmc_pred_stride << 1;
- int y = 0;
- do {
- // First mask in the first half, second mask in the second half.
- const __m128i mask_val = _mm_shuffle_epi8(Load4(mask + y), mask_shuffler);
- const __m128i masks =
- _mm_sub_epi8(mask_inverter, _mm_sign_epi8(mask_val, mask_preinverter));
- const __m128i masks_lo = _mm_cvtepi8_epi16(masks);
- const __m128i masks_hi = _mm_cvtepi8_epi16(_mm_srli_si128(masks, 8));
-
- const __m128i pred_val = LoadHi8(LoadLo8(pred), pred + pred_stride);
- const __m128i obmc_pred_val =
- LoadHi8(LoadLo8(obmc_pred), obmc_pred + obmc_pred_stride);
- const __m128i terms_lo = _mm_unpacklo_epi16(obmc_pred_val, pred_val);
- const __m128i terms_hi = _mm_unpackhi_epi16(obmc_pred_val, pred_val);
- const __m128i result_lo = RightShiftWithRounding_U32(
- _mm_madd_epi16(terms_lo, masks_lo), kRoundBitsObmcBlend);
- const __m128i result_hi = RightShiftWithRounding_U32(
- _mm_madd_epi16(terms_hi, masks_hi), kRoundBitsObmcBlend);
- const __m128i packed_result = _mm_packus_epi32(result_lo, result_hi);
-
- Store4(pred, packed_result);
- Store4(pred + pred_stride, _mm_srli_si128(packed_result, 8));
- pred += pred_stride2;
- obmc_pred += obmc_pred_stride2;
- y += 2;
- } while (y < compute_height);
-}
-
-inline void OverlapBlendFromTop4xH_SSE4_1(uint16_t* const prediction,
- const ptrdiff_t pred_stride,
- const int height,
- const uint16_t* const obmc_prediction,
- const ptrdiff_t obmc_pred_stride) {
+inline void OverlapBlendFromTop4xH_SSE4_1(
+ uint16_t* LIBGAV1_RESTRICT const prediction, const ptrdiff_t pred_stride,
+ const int height, const uint16_t* LIBGAV1_RESTRICT const obmc_prediction,
+ const ptrdiff_t obmc_pred_stride) {
uint16_t* pred = prediction;
const uint16_t* obmc_pred = obmc_prediction;
const __m128i mask_inverter = _mm_set1_epi16(64);
@@ -522,22 +491,19 @@ inline void OverlapBlendFromTop4xH_SSE4_1(uint16_t* const prediction,
} while (y < compute_height);
}
-void OverlapBlendFromTop10bpp_SSE4_1(void* const prediction,
- const ptrdiff_t prediction_stride,
- const int width, const int height,
- const void* const obmc_prediction,
- const ptrdiff_t obmc_prediction_stride) {
+void OverlapBlendFromTop10bpp_SSE4_1(
+ void* LIBGAV1_RESTRICT const prediction, const ptrdiff_t prediction_stride,
+ const int width, const int height,
+ const void* LIBGAV1_RESTRICT const obmc_prediction,
+ const ptrdiff_t obmc_prediction_stride) {
auto* pred = static_cast<uint16_t*>(prediction);
const auto* obmc_pred = static_cast<const uint16_t*>(obmc_prediction);
const ptrdiff_t pred_stride = prediction_stride / sizeof(pred[0]);
const ptrdiff_t obmc_pred_stride =
obmc_prediction_stride / sizeof(obmc_pred[0]);
+ assert(width >= 4);
+ assert(height >= 2);
- if (width == 2) {
- OverlapBlendFromTop2xH_SSE4_1(pred, pred_stride, height, obmc_pred,
- obmc_pred_stride);
- return;
- }
if (width == 4) {
OverlapBlendFromTop4xH_SSE4_1(pred, pred_stride, height, obmc_pred,
obmc_pred_stride);
diff --git a/src/dsp/x86/super_res_sse4.cc b/src/dsp/x86/super_res_sse4.cc
index 85d05bc..458d94e 100644
--- a/src/dsp/x86/super_res_sse4.cc
+++ b/src/dsp/x86/super_res_sse4.cc
@@ -90,11 +90,13 @@ void SuperResCoefficients_SSE4_1(const int upscaled_width,
} while (--x != 0);
}
-void SuperRes_SSE4_1(const void* const coefficients, void* const source,
+void SuperRes_SSE4_1(const void* LIBGAV1_RESTRICT const coefficients,
+ void* LIBGAV1_RESTRICT const source,
const ptrdiff_t source_stride, const int height,
const int downscaled_width, const int upscaled_width,
const int initial_subpixel_x, const int step,
- void* const dest, const ptrdiff_t dest_stride) {
+ void* LIBGAV1_RESTRICT const dest,
+ const ptrdiff_t dest_stride) {
auto* src = static_cast<uint8_t*>(source) - DivideBy2(kSuperResFilterTaps);
auto* dst = static_cast<uint8_t*>(dest);
int y = height;
@@ -227,11 +229,13 @@ void SuperResCoefficients_SSE4_1(const int upscaled_width,
}
template <int bitdepth>
-void SuperRes_SSE4_1(const void* const coefficients, void* const source,
+void SuperRes_SSE4_1(const void* LIBGAV1_RESTRICT const coefficients,
+ void* LIBGAV1_RESTRICT const source,
const ptrdiff_t source_stride, const int height,
const int downscaled_width, const int upscaled_width,
const int initial_subpixel_x, const int step,
- void* const dest, const ptrdiff_t dest_stride) {
+ void* LIBGAV1_RESTRICT const dest,
+ const ptrdiff_t dest_stride) {
auto* src = static_cast<uint16_t*>(source) - DivideBy2(kSuperResFilterTaps);
auto* dst = static_cast<uint16_t*>(dest);
int y = height;
diff --git a/src/dsp/x86/warp_sse4.cc b/src/dsp/x86/warp_sse4.cc
index 9ddfeac..5830894 100644
--- a/src/dsp/x86/warp_sse4.cc
+++ b/src/dsp/x86/warp_sse4.cc
@@ -101,7 +101,7 @@ inline void HorizontalFilter(const int sx4, const int16_t alpha,
template <bool is_compound>
inline void WriteVerticalFilter(const __m128i filter[8],
const int16_t intermediate_result[15][8], int y,
- void* dst_row) {
+ void* LIBGAV1_RESTRICT dst_row) {
constexpr int kRoundBitsVertical =
is_compound ? kInterRoundBitsCompoundVertical : kInterRoundBitsVertical;
__m128i sum_low = _mm_set1_epi32(kOffsetRemoval);
@@ -136,8 +136,9 @@ inline void WriteVerticalFilter(const __m128i filter[8],
template <bool is_compound>
inline void WriteVerticalFilter(const __m128i filter[8],
- const int16_t* intermediate_result_column,
- void* dst_row) {
+ const int16_t* LIBGAV1_RESTRICT
+ intermediate_result_column,
+ void* LIBGAV1_RESTRICT dst_row) {
constexpr int kRoundBitsVertical =
is_compound ? kInterRoundBitsCompoundVertical : kInterRoundBitsVertical;
__m128i sum_low = _mm_setzero_si128();
@@ -167,7 +168,7 @@ inline void WriteVerticalFilter(const __m128i filter[8],
template <bool is_compound, typename DestType>
inline void VerticalFilter(const int16_t source[15][8], int y4, int gamma,
- int delta, DestType* dest_row,
+ int delta, DestType* LIBGAV1_RESTRICT dest_row,
ptrdiff_t dest_stride) {
int sy4 = (y4 & ((1 << kWarpedModelPrecisionBits) - 1)) - MultiplyBy4(delta);
for (int y = 0; y < 8; ++y) {
@@ -187,8 +188,9 @@ inline void VerticalFilter(const int16_t source[15][8], int y4, int gamma,
}
template <bool is_compound, typename DestType>
-inline void VerticalFilter(const int16_t* source_cols, int y4, int gamma,
- int delta, DestType* dest_row,
+inline void VerticalFilter(const int16_t* LIBGAV1_RESTRICT source_cols, int y4,
+ int gamma, int delta,
+ DestType* LIBGAV1_RESTRICT dest_row,
ptrdiff_t dest_stride) {
int sy4 = (y4 & ((1 << kWarpedModelPrecisionBits) - 1)) - MultiplyBy4(delta);
for (int y = 0; y < 8; ++y) {
@@ -208,9 +210,11 @@ inline void VerticalFilter(const int16_t* source_cols, int y4, int gamma,
}
template <bool is_compound, typename DestType>
-inline void WarpRegion1(const uint8_t* src, ptrdiff_t source_stride,
- int source_width, int source_height, int ix4, int iy4,
- DestType* dst_row, ptrdiff_t dest_stride) {
+inline void WarpRegion1(const uint8_t* LIBGAV1_RESTRICT src,
+ ptrdiff_t source_stride, int source_width,
+ int source_height, int ix4, int iy4,
+ DestType* LIBGAV1_RESTRICT dst_row,
+ ptrdiff_t dest_stride) {
// Region 1
// Points to the left or right border of the first row of |src|.
const uint8_t* first_row_border =
@@ -244,10 +248,12 @@ inline void WarpRegion1(const uint8_t* src, ptrdiff_t source_stride,
}
template <bool is_compound, typename DestType>
-inline void WarpRegion2(const uint8_t* src, ptrdiff_t source_stride,
- int source_width, int y4, int ix4, int iy4, int gamma,
- int delta, int16_t intermediate_result_column[15],
- DestType* dst_row, ptrdiff_t dest_stride) {
+inline void WarpRegion2(const uint8_t* LIBGAV1_RESTRICT src,
+ ptrdiff_t source_stride, int source_width, int y4,
+ int ix4, int iy4, int gamma, int delta,
+ int16_t intermediate_result_column[15],
+ DestType* LIBGAV1_RESTRICT dst_row,
+ ptrdiff_t dest_stride) {
// Region 2.
// Points to the left or right border of the first row of |src|.
const uint8_t* first_row_border =
@@ -283,9 +289,10 @@ inline void WarpRegion2(const uint8_t* src, ptrdiff_t source_stride,
}
template <bool is_compound, typename DestType>
-inline void WarpRegion3(const uint8_t* src, ptrdiff_t source_stride,
- int source_height, int alpha, int beta, int x4, int ix4,
- int iy4, int16_t intermediate_result[15][8]) {
+inline void WarpRegion3(const uint8_t* LIBGAV1_RESTRICT src,
+ ptrdiff_t source_stride, int source_height, int alpha,
+ int beta, int x4, int ix4, int iy4,
+ int16_t intermediate_result[15][8]) {
// Region 3
// At this point, we know ix4 - 7 < source_width - 1 and ix4 + 7 > 0.
@@ -315,9 +322,9 @@ inline void WarpRegion3(const uint8_t* src, ptrdiff_t source_stride,
}
template <bool is_compound, typename DestType>
-inline void WarpRegion4(const uint8_t* src, ptrdiff_t source_stride, int alpha,
- int beta, int x4, int ix4, int iy4,
- int16_t intermediate_result[15][8]) {
+inline void WarpRegion4(const uint8_t* LIBGAV1_RESTRICT src,
+ ptrdiff_t source_stride, int alpha, int beta, int x4,
+ int ix4, int iy4, int16_t intermediate_result[15][8]) {
// Region 4.
// At this point, we know ix4 - 7 < source_width - 1 and ix4 + 7 > 0.
@@ -351,12 +358,14 @@ inline void WarpRegion4(const uint8_t* src, ptrdiff_t source_stride, int alpha,
}
template <bool is_compound, typename DestType>
-inline void HandleWarpBlock(const uint8_t* src, ptrdiff_t source_stride,
- int source_width, int source_height,
- const int* warp_params, int subsampling_x,
- int subsampling_y, int src_x, int src_y,
- int16_t alpha, int16_t beta, int16_t gamma,
- int16_t delta, DestType* dst_row,
+inline void HandleWarpBlock(const uint8_t* LIBGAV1_RESTRICT src,
+ ptrdiff_t source_stride, int source_width,
+ int source_height,
+ const int* LIBGAV1_RESTRICT warp_params,
+ int subsampling_x, int subsampling_y, int src_x,
+ int src_y, int16_t alpha, int16_t beta,
+ int16_t gamma, int16_t delta,
+ DestType* LIBGAV1_RESTRICT dst_row,
ptrdiff_t dest_stride) {
union {
// Intermediate_result is the output of the horizontal filtering and
@@ -460,11 +469,12 @@ inline void HandleWarpBlock(const uint8_t* src, ptrdiff_t source_stride,
}
template <bool is_compound>
-void Warp_SSE4_1(const void* source, ptrdiff_t source_stride, int source_width,
- int source_height, const int* warp_params, int subsampling_x,
+void Warp_SSE4_1(const void* LIBGAV1_RESTRICT source, ptrdiff_t source_stride,
+ int source_width, int source_height,
+ const int* LIBGAV1_RESTRICT warp_params, int subsampling_x,
int subsampling_y, int block_start_x, int block_start_y,
int block_width, int block_height, int16_t alpha, int16_t beta,
- int16_t gamma, int16_t delta, void* dest,
+ int16_t gamma, int16_t delta, void* LIBGAV1_RESTRICT dest,
ptrdiff_t dest_stride) {
const auto* const src = static_cast<const uint8_t*>(source);
using DestType =
diff --git a/src/dsp/x86/weight_mask_sse4.cc b/src/dsp/x86/weight_mask_sse4.cc
index 08a1739..69cb784 100644
--- a/src/dsp/x86/weight_mask_sse4.cc
+++ b/src/dsp/x86/weight_mask_sse4.cc
@@ -37,8 +37,9 @@ namespace {
constexpr int kRoundingBits8bpp = 4;
template <bool mask_is_inverse, bool is_store_16>
-inline void WeightMask16_SSE4(const int16_t* prediction_0,
- const int16_t* prediction_1, uint8_t* mask,
+inline void WeightMask16_SSE4(const int16_t* LIBGAV1_RESTRICT prediction_0,
+ const int16_t* LIBGAV1_RESTRICT prediction_1,
+ uint8_t* LIBGAV1_RESTRICT mask,
ptrdiff_t mask_stride) {
const __m128i pred_00 = LoadAligned16(prediction_0);
const __m128i pred_10 = LoadAligned16(prediction_1);
@@ -86,8 +87,9 @@ inline void WeightMask16_SSE4(const int16_t* prediction_0,
mask += mask_stride << 1
template <bool mask_is_inverse>
-void WeightMask8x8_SSE4(const void* prediction_0, const void* prediction_1,
- uint8_t* mask, ptrdiff_t mask_stride) {
+void WeightMask8x8_SSE4(const void* LIBGAV1_RESTRICT prediction_0,
+ const void* LIBGAV1_RESTRICT prediction_1,
+ uint8_t* LIBGAV1_RESTRICT mask, ptrdiff_t mask_stride) {
const auto* pred_0 = static_cast<const int16_t*>(prediction_0);
const auto* pred_1 = static_cast<const int16_t*>(prediction_1);
@@ -98,8 +100,10 @@ void WeightMask8x8_SSE4(const void* prediction_0, const void* prediction_1,
}
template <bool mask_is_inverse>
-void WeightMask8x16_SSE4(const void* prediction_0, const void* prediction_1,
- uint8_t* mask, ptrdiff_t mask_stride) {
+void WeightMask8x16_SSE4(const void* LIBGAV1_RESTRICT prediction_0,
+ const void* LIBGAV1_RESTRICT prediction_1,
+ uint8_t* LIBGAV1_RESTRICT mask,
+ ptrdiff_t mask_stride) {
const auto* pred_0 = static_cast<const int16_t*>(prediction_0);
const auto* pred_1 = static_cast<const int16_t*>(prediction_1);
int y3 = 3;
@@ -112,8 +116,10 @@ void WeightMask8x16_SSE4(const void* prediction_0, const void* prediction_1,
}
template <bool mask_is_inverse>
-void WeightMask8x32_SSE4(const void* prediction_0, const void* prediction_1,
- uint8_t* mask, ptrdiff_t mask_stride) {
+void WeightMask8x32_SSE4(const void* LIBGAV1_RESTRICT prediction_0,
+ const void* LIBGAV1_RESTRICT prediction_1,
+ uint8_t* LIBGAV1_RESTRICT mask,
+ ptrdiff_t mask_stride) {
const auto* pred_0 = static_cast<const int16_t*>(prediction_0);
const auto* pred_1 = static_cast<const int16_t*>(prediction_1);
int y5 = 5;
@@ -135,8 +141,10 @@ void WeightMask8x32_SSE4(const void* prediction_0, const void* prediction_1,
mask += mask_stride
template <bool mask_is_inverse>
-void WeightMask16x8_SSE4(const void* prediction_0, const void* prediction_1,
- uint8_t* mask, ptrdiff_t mask_stride) {
+void WeightMask16x8_SSE4(const void* LIBGAV1_RESTRICT prediction_0,
+ const void* LIBGAV1_RESTRICT prediction_1,
+ uint8_t* LIBGAV1_RESTRICT mask,
+ ptrdiff_t mask_stride) {
const auto* pred_0 = static_cast<const int16_t*>(prediction_0);
const auto* pred_1 = static_cast<const int16_t*>(prediction_1);
int y = 7;
@@ -147,8 +155,10 @@ void WeightMask16x8_SSE4(const void* prediction_0, const void* prediction_1,
}
template <bool mask_is_inverse>
-void WeightMask16x16_SSE4(const void* prediction_0, const void* prediction_1,
- uint8_t* mask, ptrdiff_t mask_stride) {
+void WeightMask16x16_SSE4(const void* LIBGAV1_RESTRICT prediction_0,
+ const void* LIBGAV1_RESTRICT prediction_1,
+ uint8_t* LIBGAV1_RESTRICT mask,
+ ptrdiff_t mask_stride) {
const auto* pred_0 = static_cast<const int16_t*>(prediction_0);
const auto* pred_1 = static_cast<const int16_t*>(prediction_1);
int y3 = 5;
@@ -161,8 +171,10 @@ void WeightMask16x16_SSE4(const void* prediction_0, const void* prediction_1,
}
template <bool mask_is_inverse>
-void WeightMask16x32_SSE4(const void* prediction_0, const void* prediction_1,
- uint8_t* mask, ptrdiff_t mask_stride) {
+void WeightMask16x32_SSE4(const void* LIBGAV1_RESTRICT prediction_0,
+ const void* LIBGAV1_RESTRICT prediction_1,
+ uint8_t* LIBGAV1_RESTRICT mask,
+ ptrdiff_t mask_stride) {
const auto* pred_0 = static_cast<const int16_t*>(prediction_0);
const auto* pred_1 = static_cast<const int16_t*>(prediction_1);
int y5 = 6;
@@ -178,8 +190,10 @@ void WeightMask16x32_SSE4(const void* prediction_0, const void* prediction_1,
}
template <bool mask_is_inverse>
-void WeightMask16x64_SSE4(const void* prediction_0, const void* prediction_1,
- uint8_t* mask, ptrdiff_t mask_stride) {
+void WeightMask16x64_SSE4(const void* LIBGAV1_RESTRICT prediction_0,
+ const void* LIBGAV1_RESTRICT prediction_1,
+ uint8_t* LIBGAV1_RESTRICT mask,
+ ptrdiff_t mask_stride) {
const auto* pred_0 = static_cast<const int16_t*>(prediction_0);
const auto* pred_1 = static_cast<const int16_t*>(prediction_1);
int y3 = 21;
@@ -203,8 +217,10 @@ void WeightMask16x64_SSE4(const void* prediction_0, const void* prediction_1,
mask += mask_stride
template <bool mask_is_inverse>
-void WeightMask32x8_SSE4(const void* prediction_0, const void* prediction_1,
- uint8_t* mask, ptrdiff_t mask_stride) {
+void WeightMask32x8_SSE4(const void* LIBGAV1_RESTRICT prediction_0,
+ const void* LIBGAV1_RESTRICT prediction_1,
+ uint8_t* LIBGAV1_RESTRICT mask,
+ ptrdiff_t mask_stride) {
const auto* pred_0 = static_cast<const int16_t*>(prediction_0);
const auto* pred_1 = static_cast<const int16_t*>(prediction_1);
WEIGHT32_AND_STRIDE;
@@ -218,8 +234,10 @@ void WeightMask32x8_SSE4(const void* prediction_0, const void* prediction_1,
}
template <bool mask_is_inverse>
-void WeightMask32x16_SSE4(const void* prediction_0, const void* prediction_1,
- uint8_t* mask, ptrdiff_t mask_stride) {
+void WeightMask32x16_SSE4(const void* LIBGAV1_RESTRICT prediction_0,
+ const void* LIBGAV1_RESTRICT prediction_1,
+ uint8_t* LIBGAV1_RESTRICT mask,
+ ptrdiff_t mask_stride) {
const auto* pred_0 = static_cast<const int16_t*>(prediction_0);
const auto* pred_1 = static_cast<const int16_t*>(prediction_1);
int y3 = 5;
@@ -232,8 +250,10 @@ void WeightMask32x16_SSE4(const void* prediction_0, const void* prediction_1,
}
template <bool mask_is_inverse>
-void WeightMask32x32_SSE4(const void* prediction_0, const void* prediction_1,
- uint8_t* mask, ptrdiff_t mask_stride) {
+void WeightMask32x32_SSE4(const void* LIBGAV1_RESTRICT prediction_0,
+ const void* LIBGAV1_RESTRICT prediction_1,
+ uint8_t* LIBGAV1_RESTRICT mask,
+ ptrdiff_t mask_stride) {
const auto* pred_0 = static_cast<const int16_t*>(prediction_0);
const auto* pred_1 = static_cast<const int16_t*>(prediction_1);
int y5 = 6;
@@ -249,8 +269,10 @@ void WeightMask32x32_SSE4(const void* prediction_0, const void* prediction_1,
}
template <bool mask_is_inverse>
-void WeightMask32x64_SSE4(const void* prediction_0, const void* prediction_1,
- uint8_t* mask, ptrdiff_t mask_stride) {
+void WeightMask32x64_SSE4(const void* LIBGAV1_RESTRICT prediction_0,
+ const void* LIBGAV1_RESTRICT prediction_1,
+ uint8_t* LIBGAV1_RESTRICT mask,
+ ptrdiff_t mask_stride) {
const auto* pred_0 = static_cast<const int16_t*>(prediction_0);
const auto* pred_1 = static_cast<const int16_t*>(prediction_1);
int y3 = 21;
@@ -278,8 +300,10 @@ void WeightMask32x64_SSE4(const void* prediction_0, const void* prediction_1,
mask += mask_stride
template <bool mask_is_inverse>
-void WeightMask64x16_SSE4(const void* prediction_0, const void* prediction_1,
- uint8_t* mask, ptrdiff_t mask_stride) {
+void WeightMask64x16_SSE4(const void* LIBGAV1_RESTRICT prediction_0,
+ const void* LIBGAV1_RESTRICT prediction_1,
+ uint8_t* LIBGAV1_RESTRICT mask,
+ ptrdiff_t mask_stride) {
const auto* pred_0 = static_cast<const int16_t*>(prediction_0);
const auto* pred_1 = static_cast<const int16_t*>(prediction_1);
int y3 = 0;
@@ -292,8 +316,10 @@ void WeightMask64x16_SSE4(const void* prediction_0, const void* prediction_1,
}
template <bool mask_is_inverse>
-void WeightMask64x32_SSE4(const void* prediction_0, const void* prediction_1,
- uint8_t* mask, ptrdiff_t mask_stride) {
+void WeightMask64x32_SSE4(const void* LIBGAV1_RESTRICT prediction_0,
+ const void* LIBGAV1_RESTRICT prediction_1,
+ uint8_t* LIBGAV1_RESTRICT mask,
+ ptrdiff_t mask_stride) {
const auto* pred_0 = static_cast<const int16_t*>(prediction_0);
const auto* pred_1 = static_cast<const int16_t*>(prediction_1);
int y5 = 0;
@@ -309,8 +335,10 @@ void WeightMask64x32_SSE4(const void* prediction_0, const void* prediction_1,
}
template <bool mask_is_inverse>
-void WeightMask64x64_SSE4(const void* prediction_0, const void* prediction_1,
- uint8_t* mask, ptrdiff_t mask_stride) {
+void WeightMask64x64_SSE4(const void* LIBGAV1_RESTRICT prediction_0,
+ const void* LIBGAV1_RESTRICT prediction_1,
+ uint8_t* LIBGAV1_RESTRICT mask,
+ ptrdiff_t mask_stride) {
const auto* pred_0 = static_cast<const int16_t*>(prediction_0);
const auto* pred_1 = static_cast<const int16_t*>(prediction_1);
int y3 = 0;
@@ -323,8 +351,10 @@ void WeightMask64x64_SSE4(const void* prediction_0, const void* prediction_1,
}
template <bool mask_is_inverse>
-void WeightMask64x128_SSE4(const void* prediction_0, const void* prediction_1,
- uint8_t* mask, ptrdiff_t mask_stride) {
+void WeightMask64x128_SSE4(const void* LIBGAV1_RESTRICT prediction_0,
+ const void* LIBGAV1_RESTRICT prediction_1,
+ uint8_t* LIBGAV1_RESTRICT mask,
+ ptrdiff_t mask_stride) {
const auto* pred_0 = static_cast<const int16_t*>(prediction_0);
const auto* pred_1 = static_cast<const int16_t*>(prediction_1);
int y3 = 0;
@@ -338,8 +368,10 @@ void WeightMask64x128_SSE4(const void* prediction_0, const void* prediction_1,
}
template <bool mask_is_inverse>
-void WeightMask128x64_SSE4(const void* prediction_0, const void* prediction_1,
- uint8_t* mask, ptrdiff_t mask_stride) {
+void WeightMask128x64_SSE4(const void* LIBGAV1_RESTRICT prediction_0,
+ const void* LIBGAV1_RESTRICT prediction_1,
+ uint8_t* LIBGAV1_RESTRICT mask,
+ ptrdiff_t mask_stride) {
const auto* pred_0 = static_cast<const int16_t*>(prediction_0);
const auto* pred_1 = static_cast<const int16_t*>(prediction_1);
int y3 = 0;
@@ -380,8 +412,10 @@ void WeightMask128x64_SSE4(const void* prediction_0, const void* prediction_1,
}
template <bool mask_is_inverse>
-void WeightMask128x128_SSE4(const void* prediction_0, const void* prediction_1,
- uint8_t* mask, ptrdiff_t mask_stride) {
+void WeightMask128x128_SSE4(const void* LIBGAV1_RESTRICT prediction_0,
+ const void* LIBGAV1_RESTRICT prediction_1,
+ uint8_t* LIBGAV1_RESTRICT mask,
+ ptrdiff_t mask_stride) {
const auto* pred_0 = static_cast<const int16_t*>(prediction_0);
const auto* pred_1 = static_cast<const int16_t*>(prediction_1);
int y3 = 0;
@@ -467,9 +501,10 @@ constexpr int kRoundingBits10bpp = 6;
constexpr int kScaledDiffShift = 4;
template <bool mask_is_inverse, bool is_store_16>
-inline void WeightMask16_10bpp_SSE4(const uint16_t* prediction_0,
- const uint16_t* prediction_1, uint8_t* mask,
- ptrdiff_t mask_stride) {
+inline void WeightMask16_10bpp_SSE4(
+ const uint16_t* LIBGAV1_RESTRICT prediction_0,
+ const uint16_t* LIBGAV1_RESTRICT prediction_1,
+ uint8_t* LIBGAV1_RESTRICT mask, ptrdiff_t mask_stride) {
const __m128i diff_offset = _mm_set1_epi8(38);
const __m128i mask_ceiling = _mm_set1_epi8(64);
const __m128i zero = _mm_setzero_si128();
@@ -538,8 +573,9 @@ inline void WeightMask16_10bpp_SSE4(const uint16_t* prediction_0,
mask += mask_stride << 1
template <bool mask_is_inverse>
-void WeightMask8x8_10bpp_SSE4(const void* prediction_0,
- const void* prediction_1, uint8_t* mask,
+void WeightMask8x8_10bpp_SSE4(const void* LIBGAV1_RESTRICT prediction_0,
+ const void* LIBGAV1_RESTRICT prediction_1,
+ uint8_t* LIBGAV1_RESTRICT mask,
ptrdiff_t mask_stride) {
const auto* pred_0 = static_cast<const uint16_t*>(prediction_0);
const auto* pred_1 = static_cast<const uint16_t*>(prediction_1);
@@ -551,8 +587,9 @@ void WeightMask8x8_10bpp_SSE4(const void* prediction_0,
}
template <bool mask_is_inverse>
-void WeightMask8x16_10bpp_SSE4(const void* prediction_0,
- const void* prediction_1, uint8_t* mask,
+void WeightMask8x16_10bpp_SSE4(const void* LIBGAV1_RESTRICT prediction_0,
+ const void* LIBGAV1_RESTRICT prediction_1,
+ uint8_t* LIBGAV1_RESTRICT mask,
ptrdiff_t mask_stride) {
const auto* pred_0 = static_cast<const uint16_t*>(prediction_0);
const auto* pred_1 = static_cast<const uint16_t*>(prediction_1);
@@ -566,8 +603,9 @@ void WeightMask8x16_10bpp_SSE4(const void* prediction_0,
}
template <bool mask_is_inverse>
-void WeightMask8x32_10bpp_SSE4(const void* prediction_0,
- const void* prediction_1, uint8_t* mask,
+void WeightMask8x32_10bpp_SSE4(const void* LIBGAV1_RESTRICT prediction_0,
+ const void* LIBGAV1_RESTRICT prediction_1,
+ uint8_t* LIBGAV1_RESTRICT mask,
ptrdiff_t mask_stride) {
const auto* pred_0 = static_cast<const uint16_t*>(prediction_0);
const auto* pred_1 = static_cast<const uint16_t*>(prediction_1);
@@ -591,8 +629,9 @@ void WeightMask8x32_10bpp_SSE4(const void* prediction_0,
mask += mask_stride
template <bool mask_is_inverse>
-void WeightMask16x8_10bpp_SSE4(const void* prediction_0,
- const void* prediction_1, uint8_t* mask,
+void WeightMask16x8_10bpp_SSE4(const void* LIBGAV1_RESTRICT prediction_0,
+ const void* LIBGAV1_RESTRICT prediction_1,
+ uint8_t* LIBGAV1_RESTRICT mask,
ptrdiff_t mask_stride) {
const auto* pred_0 = static_cast<const uint16_t*>(prediction_0);
const auto* pred_1 = static_cast<const uint16_t*>(prediction_1);
@@ -604,8 +643,9 @@ void WeightMask16x8_10bpp_SSE4(const void* prediction_0,
}
template <bool mask_is_inverse>
-void WeightMask16x16_10bpp_SSE4(const void* prediction_0,
- const void* prediction_1, uint8_t* mask,
+void WeightMask16x16_10bpp_SSE4(const void* LIBGAV1_RESTRICT prediction_0,
+ const void* LIBGAV1_RESTRICT prediction_1,
+ uint8_t* LIBGAV1_RESTRICT mask,
ptrdiff_t mask_stride) {
const auto* pred_0 = static_cast<const uint16_t*>(prediction_0);
const auto* pred_1 = static_cast<const uint16_t*>(prediction_1);
@@ -619,8 +659,9 @@ void WeightMask16x16_10bpp_SSE4(const void* prediction_0,
}
template <bool mask_is_inverse>
-void WeightMask16x32_10bpp_SSE4(const void* prediction_0,
- const void* prediction_1, uint8_t* mask,
+void WeightMask16x32_10bpp_SSE4(const void* LIBGAV1_RESTRICT prediction_0,
+ const void* LIBGAV1_RESTRICT prediction_1,
+ uint8_t* LIBGAV1_RESTRICT mask,
ptrdiff_t mask_stride) {
const auto* pred_0 = static_cast<const uint16_t*>(prediction_0);
const auto* pred_1 = static_cast<const uint16_t*>(prediction_1);
@@ -637,8 +678,9 @@ void WeightMask16x32_10bpp_SSE4(const void* prediction_0,
}
template <bool mask_is_inverse>
-void WeightMask16x64_10bpp_SSE4(const void* prediction_0,
- const void* prediction_1, uint8_t* mask,
+void WeightMask16x64_10bpp_SSE4(const void* LIBGAV1_RESTRICT prediction_0,
+ const void* LIBGAV1_RESTRICT prediction_1,
+ uint8_t* LIBGAV1_RESTRICT mask,
ptrdiff_t mask_stride) {
const auto* pred_0 = static_cast<const uint16_t*>(prediction_0);
const auto* pred_1 = static_cast<const uint16_t*>(prediction_1);
@@ -664,8 +706,9 @@ void WeightMask16x64_10bpp_SSE4(const void* prediction_0,
mask += mask_stride
template <bool mask_is_inverse>
-void WeightMask32x8_10bpp_SSE4(const void* prediction_0,
- const void* prediction_1, uint8_t* mask,
+void WeightMask32x8_10bpp_SSE4(const void* LIBGAV1_RESTRICT prediction_0,
+ const void* LIBGAV1_RESTRICT prediction_1,
+ uint8_t* LIBGAV1_RESTRICT mask,
ptrdiff_t mask_stride) {
const auto* pred_0 = static_cast<const uint16_t*>(prediction_0);
const auto* pred_1 = static_cast<const uint16_t*>(prediction_1);
@@ -680,8 +723,9 @@ void WeightMask32x8_10bpp_SSE4(const void* prediction_0,
}
template <bool mask_is_inverse>
-void WeightMask32x16_10bpp_SSE4(const void* prediction_0,
- const void* prediction_1, uint8_t* mask,
+void WeightMask32x16_10bpp_SSE4(const void* LIBGAV1_RESTRICT prediction_0,
+ const void* LIBGAV1_RESTRICT prediction_1,
+ uint8_t* LIBGAV1_RESTRICT mask,
ptrdiff_t mask_stride) {
const auto* pred_0 = static_cast<const uint16_t*>(prediction_0);
const auto* pred_1 = static_cast<const uint16_t*>(prediction_1);
@@ -695,8 +739,9 @@ void WeightMask32x16_10bpp_SSE4(const void* prediction_0,
}
template <bool mask_is_inverse>
-void WeightMask32x32_10bpp_SSE4(const void* prediction_0,
- const void* prediction_1, uint8_t* mask,
+void WeightMask32x32_10bpp_SSE4(const void* LIBGAV1_RESTRICT prediction_0,
+ const void* LIBGAV1_RESTRICT prediction_1,
+ uint8_t* LIBGAV1_RESTRICT mask,
ptrdiff_t mask_stride) {
const auto* pred_0 = static_cast<const uint16_t*>(prediction_0);
const auto* pred_1 = static_cast<const uint16_t*>(prediction_1);
@@ -713,8 +758,9 @@ void WeightMask32x32_10bpp_SSE4(const void* prediction_0,
}
template <bool mask_is_inverse>
-void WeightMask32x64_10bpp_SSE4(const void* prediction_0,
- const void* prediction_1, uint8_t* mask,
+void WeightMask32x64_10bpp_SSE4(const void* LIBGAV1_RESTRICT prediction_0,
+ const void* LIBGAV1_RESTRICT prediction_1,
+ uint8_t* LIBGAV1_RESTRICT mask,
ptrdiff_t mask_stride) {
const auto* pred_0 = static_cast<const uint16_t*>(prediction_0);
const auto* pred_1 = static_cast<const uint16_t*>(prediction_1);
@@ -744,8 +790,9 @@ void WeightMask32x64_10bpp_SSE4(const void* prediction_0,
mask += mask_stride
template <bool mask_is_inverse>
-void WeightMask64x16_10bpp_SSE4(const void* prediction_0,
- const void* prediction_1, uint8_t* mask,
+void WeightMask64x16_10bpp_SSE4(const void* LIBGAV1_RESTRICT prediction_0,
+ const void* LIBGAV1_RESTRICT prediction_1,
+ uint8_t* LIBGAV1_RESTRICT mask,
ptrdiff_t mask_stride) {
const auto* pred_0 = static_cast<const uint16_t*>(prediction_0);
const auto* pred_1 = static_cast<const uint16_t*>(prediction_1);
@@ -759,8 +806,9 @@ void WeightMask64x16_10bpp_SSE4(const void* prediction_0,
}
template <bool mask_is_inverse>
-void WeightMask64x32_10bpp_SSE4(const void* prediction_0,
- const void* prediction_1, uint8_t* mask,
+void WeightMask64x32_10bpp_SSE4(const void* LIBGAV1_RESTRICT prediction_0,
+ const void* LIBGAV1_RESTRICT prediction_1,
+ uint8_t* LIBGAV1_RESTRICT mask,
ptrdiff_t mask_stride) {
const auto* pred_0 = static_cast<const uint16_t*>(prediction_0);
const auto* pred_1 = static_cast<const uint16_t*>(prediction_1);
@@ -777,8 +825,9 @@ void WeightMask64x32_10bpp_SSE4(const void* prediction_0,
}
template <bool mask_is_inverse>
-void WeightMask64x64_10bpp_SSE4(const void* prediction_0,
- const void* prediction_1, uint8_t* mask,
+void WeightMask64x64_10bpp_SSE4(const void* LIBGAV1_RESTRICT prediction_0,
+ const void* LIBGAV1_RESTRICT prediction_1,
+ uint8_t* LIBGAV1_RESTRICT mask,
ptrdiff_t mask_stride) {
const auto* pred_0 = static_cast<const uint16_t*>(prediction_0);
const auto* pred_1 = static_cast<const uint16_t*>(prediction_1);
@@ -792,8 +841,9 @@ void WeightMask64x64_10bpp_SSE4(const void* prediction_0,
}
template <bool mask_is_inverse>
-void WeightMask64x128_10bpp_SSE4(const void* prediction_0,
- const void* prediction_1, uint8_t* mask,
+void WeightMask64x128_10bpp_SSE4(const void* LIBGAV1_RESTRICT prediction_0,
+ const void* LIBGAV1_RESTRICT prediction_1,
+ uint8_t* LIBGAV1_RESTRICT mask,
ptrdiff_t mask_stride) {
const auto* pred_0 = static_cast<const uint16_t*>(prediction_0);
const auto* pred_1 = static_cast<const uint16_t*>(prediction_1);
@@ -808,8 +858,9 @@ void WeightMask64x128_10bpp_SSE4(const void* prediction_0,
}
template <bool mask_is_inverse>
-void WeightMask128x64_10bpp_SSE4(const void* prediction_0,
- const void* prediction_1, uint8_t* mask,
+void WeightMask128x64_10bpp_SSE4(const void* LIBGAV1_RESTRICT prediction_0,
+ const void* LIBGAV1_RESTRICT prediction_1,
+ uint8_t* LIBGAV1_RESTRICT mask,
ptrdiff_t mask_stride) {
const auto* pred_0 = static_cast<const uint16_t*>(prediction_0);
const auto* pred_1 = static_cast<const uint16_t*>(prediction_1);
@@ -851,8 +902,9 @@ void WeightMask128x64_10bpp_SSE4(const void* prediction_0,
}
template <bool mask_is_inverse>
-void WeightMask128x128_10bpp_SSE4(const void* prediction_0,
- const void* prediction_1, uint8_t* mask,
+void WeightMask128x128_10bpp_SSE4(const void* LIBGAV1_RESTRICT prediction_0,
+ const void* LIBGAV1_RESTRICT prediction_1,
+ uint8_t* LIBGAV1_RESTRICT mask,
ptrdiff_t mask_stride) {
const auto* pred_0 = static_cast<const uint16_t*>(prediction_0);
const auto* pred_1 = static_cast<const uint16_t*>(prediction_1);
diff --git a/src/film_grain.cc b/src/film_grain.cc
index dac37b5..5c64ff2 100644
--- a/src/film_grain.cc
+++ b/src/film_grain.cc
@@ -24,6 +24,7 @@
#include "src/dsp/common.h"
#include "src/dsp/constants.h"
#include "src/dsp/dsp.h"
+#include "src/dsp/film_grain_common.h"
#include "src/utils/array_2d.h"
#include "src/utils/blocking_counter.h"
#include "src/utils/common.h"
@@ -318,10 +319,14 @@ bool FilmGrain<bitdepth>::Init() {
//
// Note: Although it does not seem to make sense, there are test vectors
// with chroma_scaling_from_luma=true and params_.num_y_points=0.
+#if LIBGAV1_MSAN
+ // Quiet film grain / md5 msan warnings.
+ memset(scaling_lut_y_, 0, sizeof(scaling_lut_y_));
+#endif
if (use_luma || params_.chroma_scaling_from_luma) {
dsp.film_grain.initialize_scaling_lut(
params_.num_y_points, params_.point_y_value, params_.point_y_scaling,
- scaling_lut_y_);
+ scaling_lut_y_, kScalingLutLength);
} else {
ASAN_POISON_MEMORY_REGION(scaling_lut_y_, sizeof(scaling_lut_y_));
}
@@ -331,25 +336,28 @@ bool FilmGrain<bitdepth>::Init() {
scaling_lut_v_ = scaling_lut_y_;
} else if (params_.num_u_points > 0 || params_.num_v_points > 0) {
const size_t buffer_size =
- (kScalingLookupTableSize + kScalingLookupTablePadding) *
- (static_cast<int>(params_.num_u_points > 0) +
- static_cast<int>(params_.num_v_points > 0));
- scaling_lut_chroma_buffer_.reset(new (std::nothrow) uint8_t[buffer_size]);
+ kScalingLutLength * (static_cast<int>(params_.num_u_points > 0) +
+ static_cast<int>(params_.num_v_points > 0));
+ scaling_lut_chroma_buffer_.reset(new (std::nothrow) int16_t[buffer_size]);
if (scaling_lut_chroma_buffer_ == nullptr) return false;
- uint8_t* buffer = scaling_lut_chroma_buffer_.get();
+ int16_t* buffer = scaling_lut_chroma_buffer_.get();
+#if LIBGAV1_MSAN
+ // Quiet film grain / md5 msan warnings.
+ memset(buffer, 0, buffer_size * 2);
+#endif
if (params_.num_u_points > 0) {
scaling_lut_u_ = buffer;
dsp.film_grain.initialize_scaling_lut(
params_.num_u_points, params_.point_u_value,
- params_.point_u_scaling, scaling_lut_u_);
- buffer += kScalingLookupTableSize + kScalingLookupTablePadding;
+ params_.point_u_scaling, scaling_lut_u_, kScalingLutLength);
+ buffer += kScalingLutLength;
}
if (params_.num_v_points > 0) {
scaling_lut_v_ = buffer;
dsp.film_grain.initialize_scaling_lut(
params_.num_v_points, params_.point_v_value,
- params_.point_v_scaling, scaling_lut_v_);
+ params_.point_v_scaling, scaling_lut_v_, kScalingLutLength);
}
}
}
@@ -364,7 +372,7 @@ void FilmGrain<bitdepth>::GenerateLumaGrain(const FilmGrainParams& params,
// 7.18.3.3 says luma_grain "will never be read in this case". So we don't
// call GenerateLumaGrain if params.num_y_points is equal to 0.
assert(params.num_y_points > 0);
- const int shift = 12 - bitdepth + params.grain_scale_shift;
+ const int shift = kBitdepth12 - bitdepth + params.grain_scale_shift;
uint16_t seed = params.grain_seed;
GrainType* luma_grain_row = luma_grain;
for (int y = 0; y < kLumaHeight; ++y) {
@@ -382,7 +390,7 @@ void FilmGrain<bitdepth>::GenerateChromaGrains(const FilmGrainParams& params,
int chroma_height,
GrainType* u_grain,
GrainType* v_grain) {
- const int shift = 12 - bitdepth + params.grain_scale_shift;
+ const int shift = kBitdepth12 - bitdepth + params.grain_scale_shift;
if (params.num_u_points == 0 && !params.chroma_scaling_from_luma) {
memset(u_grain, 0, chroma_height * chroma_width * sizeof(*u_grain));
} else {
@@ -460,22 +468,25 @@ bool FilmGrain<bitdepth>::AllocateNoiseStripes() {
template <int bitdepth>
bool FilmGrain<bitdepth>::AllocateNoiseImage() {
+ // When LIBGAV1_MSAN is enabled, zero initialize to quiet optimized film grain
+ // msan warnings.
+ constexpr bool zero_initialize = LIBGAV1_MSAN == 1;
if (params_.num_y_points > 0 &&
!noise_image_[kPlaneY].Reset(height_, width_ + kNoiseImagePadding,
- /*zero_initialize=*/false)) {
+ zero_initialize)) {
return false;
}
if (!is_monochrome_) {
if (!noise_image_[kPlaneU].Reset(
(height_ + subsampling_y_) >> subsampling_y_,
((width_ + subsampling_x_) >> subsampling_x_) + kNoiseImagePadding,
- /*zero_initialize=*/false)) {
+ zero_initialize)) {
return false;
}
if (!noise_image_[kPlaneV].Reset(
(height_ + subsampling_y_) >> subsampling_y_,
((width_ + subsampling_x_) >> subsampling_x_) + kNoiseImagePadding,
- /*zero_initialize=*/false)) {
+ zero_initialize)) {
return false;
}
}
@@ -556,7 +567,7 @@ void FilmGrain<bitdepth>::BlendNoiseChromaWorker(
const auto* source_cursor_y = reinterpret_cast<const Pixel*>(
source_plane_y + start_height * source_stride_y);
- const uint8_t* scaling_lut_uv;
+ const int16_t* scaling_lut_uv;
const uint8_t* source_plane_uv;
uint8_t* dest_plane_uv;
@@ -689,16 +700,16 @@ bool FilmGrain<bitdepth>::AddNoise(
int max_luma;
int max_chroma;
if (params_.clip_to_restricted_range) {
- min_value = 16 << (bitdepth - 8);
- max_luma = 235 << (bitdepth - 8);
+ min_value = 16 << (bitdepth - kBitdepth8);
+ max_luma = 235 << (bitdepth - kBitdepth8);
if (color_matrix_is_identity_) {
max_chroma = max_luma;
} else {
- max_chroma = 240 << (bitdepth - 8);
+ max_chroma = 240 << (bitdepth - kBitdepth8);
}
} else {
min_value = 0;
- max_luma = (256 << (bitdepth - 8)) - 1;
+ max_luma = (256 << (bitdepth - kBitdepth8)) - 1;
max_chroma = max_luma;
}
@@ -809,9 +820,9 @@ bool FilmGrain<bitdepth>::AddNoise(
}
// Explicit instantiations.
-template class FilmGrain<8>;
+template class FilmGrain<kBitdepth8>;
#if LIBGAV1_MAX_BITDEPTH >= 10
-template class FilmGrain<10>;
+template class FilmGrain<kBitdepth10>;
#endif
} // namespace libgav1
diff --git a/src/film_grain.h b/src/film_grain.h
index b588f6d..f2c1e93 100644
--- a/src/film_grain.h
+++ b/src/film_grain.h
@@ -103,6 +103,8 @@ class FilmGrain {
private:
using Pixel =
typename std::conditional<bitdepth == 8, uint8_t, uint16_t>::type;
+ static constexpr int kScalingLutLength =
+ (kScalingLookupTableSize + kScalingLookupTablePadding) << (bitdepth - 8);
bool Init();
@@ -156,13 +158,13 @@ class FilmGrain {
GrainType u_grain_[kMaxChromaHeight * kMaxChromaWidth];
GrainType v_grain_[kMaxChromaHeight * kMaxChromaWidth];
// Scaling lookup tables.
- uint8_t scaling_lut_y_[kScalingLookupTableSize + kScalingLookupTablePadding];
- uint8_t* scaling_lut_u_ = nullptr;
- uint8_t* scaling_lut_v_ = nullptr;
- // If allocated, this buffer is 256 * 2 bytes long and scaling_lut_u_ and
+ int16_t scaling_lut_y_[kScalingLutLength];
+ int16_t* scaling_lut_u_ = nullptr;
+ int16_t* scaling_lut_v_ = nullptr;
+ // If allocated, this buffer is 256 * 2 values long and scaling_lut_u_ and
// scaling_lut_v_ point into this buffer. Otherwise, scaling_lut_u_ and
// scaling_lut_v_ point to scaling_lut_y_.
- std::unique_ptr<uint8_t[]> scaling_lut_chroma_buffer_;
+ std::unique_ptr<int16_t[]> scaling_lut_chroma_buffer_;
// A two-dimensional array of noise data for each plane. Generated for each 32
// luma sample high stripe of the image. The first dimension is called
diff --git a/src/film_grain_test.cc b/src/film_grain_test.cc
new file mode 100644
index 0000000..bf37299
--- /dev/null
+++ b/src/film_grain_test.cc
@@ -0,0 +1,2360 @@
+// Copyright 2021 The libgav1 Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "src/dsp/film_grain.h"
+
+#include <algorithm>
+#include <cassert>
+#include <cstddef>
+#include <cstdint>
+#include <cstdio>
+#include <cstring>
+#include <memory>
+#include <new>
+#include <string>
+#include <tuple>
+#include <type_traits>
+
+#include "absl/strings/match.h"
+#include "absl/strings/str_format.h"
+#include "absl/time/clock.h"
+#include "absl/time/time.h"
+#include "gtest/gtest.h"
+#include "src/dsp/common.h"
+#include "src/dsp/dsp.h"
+#include "src/dsp/film_grain_common.h"
+#include "src/film_grain.h"
+#include "src/utils/array_2d.h"
+#include "src/utils/common.h"
+#include "src/utils/constants.h"
+#include "src/utils/cpu.h"
+#include "src/utils/memory.h"
+#include "src/utils/threadpool.h"
+#include "src/utils/types.h"
+#include "tests/block_utils.h"
+#include "tests/third_party/libvpx/acm_random.h"
+#include "tests/utils.h"
+
+namespace libgav1 {
+namespace dsp {
+namespace film_grain {
+namespace {
+
+constexpr int kNumSpeedTests = 50;
+constexpr int kNumFilmGrainTestParams = 10;
+constexpr size_t kLumaBlockSize = kLumaWidth * kLumaHeight;
+constexpr size_t kChromaBlockSize = kMaxChromaWidth * kMaxChromaHeight;
+// Dimensions for unit tests concerning applying grain to the whole frame.
+constexpr size_t kNumTestStripes = 64;
+constexpr int kNoiseStripeHeight = 34;
+constexpr size_t kFrameWidth = 1921;
+constexpr size_t kFrameHeight = (kNumTestStripes - 1) * 32 + 1;
+
+/*
+ The film grain parameters for 10 frames were generated with the following
+ command line:
+ aomenc --end-usage=q --cq-level=20 --cpu-used=8 -w 1920 -h 1080 \
+ --denoise-noise-level=50 --ivf breaking_bad_21m23s_10frames.1920_1080.yuv \
+ -o breaking_bad_21m23s_10frames.1920_1080.noise50.ivf
+*/
+constexpr FilmGrainParams kFilmGrainParams[10] = {
+ {/*apply_grain=*/true,
+ /*update_grain=*/true,
+ /*chroma_scaling_from_luma=*/false,
+ /*overlap_flag=*/true,
+ /*clip_to_restricted_range=*/false,
+ /*num_y_points=*/7,
+ /*num_u_points=*/8,
+ /*num_v_points=*/8,
+ /*point_y_value=*/{0, 13, 27, 40, 54, 121, 255, 0, 0, 0, 0, 0, 0, 0},
+ /*point_y_scaling=*/{71, 71, 91, 99, 98, 100, 100, 0, 0, 0, 0, 0, 0, 0},
+ /*point_u_value=*/{0, 13, 27, 40, 54, 67, 94, 255, 0, 0},
+ /*point_u_scaling=*/{37, 37, 43, 48, 48, 50, 51, 51, 0, 0},
+ /*point_v_value=*/{0, 13, 27, 40, 54, 67, 107, 255, 0, 0},
+ /*point_v_scaling=*/{48, 48, 43, 33, 32, 33, 34, 34, 0, 0},
+ /*chroma_scaling=*/11,
+ /*auto_regression_coeff_lag=*/3,
+ /*auto_regression_coeff_y=*/{2, -2, -2, 10, 3, -2, 1, -4,
+ 5, -1, -25, -13, 3, -1, 0, 7,
+ -20, 103, 26, -2, 1, 14, -49, 117},
+ /*auto_regression_coeff_u=*/{-2, 1, -3, 4, -4, 0, 3, 5, -5,
+ -17, 17, 0, -10, -5, -3, -30, 14, 70,
+ 29, 9, -2, -10, 50, 71, -11},
+ /*auto_regression_coeff_v=*/{3, -2, -7, 6, -7, -8, 3, 1, -12,
+ -15, 28, 5, -11, -2, -7, -27, 32, 62,
+ 31, 18, -2, -6, 61, 43, 2},
+ /*auto_regression_shift=*/8,
+ /*grain_seed=*/7391,
+ /*reference_index=*/0,
+ /*grain_scale_shift=*/0,
+ /*u_multiplier=*/0,
+ /*u_luma_multiplier=*/64,
+ /*u_offset=*/0,
+ /*v_multiplier=*/0,
+ /*v_luma_multiplier=*/64,
+ /*v_offset=*/0},
+ {/*apply_grain=*/true,
+ /*update_grain=*/true,
+ /*chroma_scaling_from_luma=*/false,
+ /*overlap_flag=*/true,
+ /*clip_to_restricted_range=*/false,
+ /*num_y_points=*/8,
+ /*num_u_points=*/7,
+ /*num_v_points=*/8,
+ /*point_y_value=*/{0, 13, 27, 40, 54, 94, 134, 255, 0, 0, 0, 0, 0, 0},
+ /*point_y_scaling=*/{72, 72, 91, 99, 97, 100, 102, 102, 0, 0, 0, 0, 0, 0},
+ /*point_u_value=*/{0, 13, 40, 54, 67, 134, 255, 0, 0, 0},
+ /*point_u_scaling=*/{38, 38, 50, 49, 51, 53, 53, 0, 0, 0},
+ /*point_v_value=*/{0, 13, 27, 40, 54, 67, 121, 255, 0, 0},
+ /*point_v_scaling=*/{50, 50, 45, 34, 33, 35, 37, 37, 0, 0},
+ /*chroma_scaling=*/11,
+ /*auto_regression_coeff_lag=*/3,
+ /*auto_regression_coeff_y=*/{2, -2, -2, 10, 3, -1, 1, -3,
+ 3, 1, -27, -12, 2, -1, 1, 7,
+ -17, 100, 27, 0, -1, 13, -50, 116},
+ /*auto_regression_coeff_u=*/{-3, 1, -2, 3, -3, -1, 2, 5, -3,
+ -16, 16, -2, -10, -2, -1, -31, 14, 70,
+ 29, 9, -1, -10, 47, 70, -11},
+ /*auto_regression_coeff_v=*/{1, 0, -5, 5, -6, -6, 2, 1, -10,
+ -14, 26, 4, -10, -3, -5, -26, 29, 63,
+ 31, 17, -1, -6, 55, 47, 2},
+ /*auto_regression_shift=*/8,
+ /*grain_seed=*/10772,
+ /*reference_index=*/0,
+ /*grain_scale_shift=*/0,
+ /*u_multiplier=*/0,
+ /*u_luma_multiplier=*/64,
+ /*u_offset=*/0,
+ /*v_multiplier=*/0,
+ /*v_luma_multiplier=*/64,
+ /*v_offset=*/0},
+ {/*apply_grain=*/true,
+ /*update_grain=*/true,
+ /*chroma_scaling_from_luma=*/false,
+ /*overlap_flag=*/true,
+ /*clip_to_restricted_range=*/false,
+ /*num_y_points=*/8,
+ /*num_u_points=*/7,
+ /*num_v_points=*/8,
+ /*point_y_value=*/{0, 13, 27, 40, 54, 94, 134, 255, 0, 0, 0, 0, 0, 0},
+ /*point_y_scaling=*/{71, 71, 91, 99, 98, 101, 103, 103, 0, 0, 0, 0, 0, 0},
+ /*point_u_value=*/{0, 13, 40, 54, 81, 107, 255, 0, 0, 0},
+ /*point_u_scaling=*/{37, 37, 49, 48, 51, 52, 52, 0, 0, 0},
+ /*point_v_value=*/{0, 13, 27, 40, 54, 67, 121, 255, 0, 0},
+ /*point_v_scaling=*/{49, 49, 44, 34, 32, 34, 36, 36, 0, 0},
+ /*chroma_scaling=*/11,
+ /*auto_regression_coeff_lag=*/3,
+ /*auto_regression_coeff_y=*/{1, -2, -2, 10, 3, -1, 1, -4,
+ 4, 1, -26, -12, 2, -1, 1, 7,
+ -18, 101, 26, -1, 0, 13, -49, 116},
+ /*auto_regression_coeff_u=*/{-3, 1, -3, 4, -3, -1, 2, 5, -4,
+ -16, 17, -2, -10, -3, -2, -31, 15, 70,
+ 28, 9, -1, -10, 48, 70, -11},
+ /*auto_regression_coeff_v=*/{1, -1, -6, 5, -6, -7, 2, 2, -11,
+ -14, 27, 5, -11, -3, -6, -26, 30, 62,
+ 30, 18, -2, -6, 58, 45, 2},
+ /*auto_regression_shift=*/8,
+ /*grain_seed=*/14153,
+ /*reference_index=*/0,
+ /*grain_scale_shift=*/0,
+ /*u_multiplier=*/0,
+ /*u_luma_multiplier=*/64,
+ /*u_offset=*/0,
+ /*v_multiplier=*/0,
+ /*v_luma_multiplier=*/64,
+ /*v_offset=*/0},
+ {/*apply_grain=*/true,
+ /*update_grain=*/true,
+ /*chroma_scaling_from_luma=*/false,
+ /*overlap_flag=*/true,
+ /*clip_to_restricted_range=*/false,
+ /*num_y_points=*/7,
+ /*num_u_points=*/5,
+ /*num_v_points=*/7,
+ /*point_y_value=*/{0, 13, 27, 40, 54, 121, 255, 0, 0, 0, 0, 0, 0, 0},
+ /*point_y_scaling=*/{71, 71, 90, 99, 98, 100, 100, 0, 0, 0, 0, 0, 0, 0},
+ /*point_u_value=*/{0, 13, 40, 107, 255, 0, 0, 0, 0, 0},
+ /*point_u_scaling=*/{37, 37, 48, 51, 51, 0, 0, 0, 0, 0},
+ /*point_v_value=*/{0, 13, 27, 40, 54, 94, 255, 0, 0, 0},
+ /*point_v_scaling=*/{49, 49, 43, 33, 32, 34, 34, 0, 0, 0},
+ /*chroma_scaling=*/11,
+ /*auto_regression_coeff_lag=*/3,
+ /*auto_regression_coeff_y=*/{2, -2, -2, 10, 3, -1, 1, -4,
+ 6, 0, -26, -13, 3, -1, 1, 6,
+ -20, 103, 26, -2, 1, 13, -48, 117},
+ /*auto_regression_coeff_u=*/{-3, 1, -2, 4, -4, -1, 2, 5, -5,
+ -16, 18, -1, -10, -3, -2, -30, 16, 69,
+ 28, 9, -2, -10, 50, 68, -11},
+ /*auto_regression_coeff_v=*/{2, -1, -6, 5, -6, -7, 2, 2, -11,
+ -15, 29, 4, -10, -3, -6, -26, 30, 62,
+ 31, 18, -3, -6, 59, 45, 3},
+ /*auto_regression_shift=*/8,
+ /*grain_seed=*/17534,
+ /*reference_index=*/0,
+ /*grain_scale_shift=*/0,
+ /*u_multiplier=*/0,
+ /*u_luma_multiplier=*/64,
+ /*u_offset=*/0,
+ /*v_multiplier=*/0,
+ /*v_luma_multiplier=*/64,
+ /*v_offset=*/0},
+ {/*apply_grain=*/true,
+ /*update_grain=*/true,
+ /*chroma_scaling_from_luma=*/false,
+ /*overlap_flag=*/true,
+ /*clip_to_restricted_range=*/false,
+ /*num_y_points=*/8,
+ /*num_u_points=*/7,
+ /*num_v_points=*/7,
+ /*point_y_value=*/{0, 13, 27, 40, 54, 94, 134, 255, 0, 0, 0, 0, 0, 0},
+ /*point_y_scaling=*/{71, 71, 91, 99, 98, 101, 103, 103, 0, 0, 0, 0, 0, 0},
+ /*point_u_value=*/{0, 13, 40, 54, 81, 107, 255, 0, 0, 0},
+ /*point_u_scaling=*/{37, 37, 49, 49, 52, 53, 53, 0, 0, 0},
+ /*point_v_value=*/{0, 13, 27, 40, 54, 94, 255, 0, 0, 0},
+ /*point_v_scaling=*/{50, 50, 44, 34, 33, 36, 37, 0, 0, 0},
+ /*chroma_scaling=*/11,
+ /*auto_regression_coeff_lag=*/3,
+ /*auto_regression_coeff_y=*/{2, -2, -2, 10, 3, -1, 1, -4,
+ 3, 1, -26, -12, 2, -1, 1, 7,
+ -17, 101, 26, 0, 0, 13, -50, 116},
+ /*auto_regression_coeff_u=*/{-2, 1, -2, 3, -3, -1, 2, 5, -4,
+ -16, 16, -2, -10, -3, -1, -31, 14, 70,
+ 28, 9, -1, -10, 48, 70, -11},
+ /*auto_regression_coeff_v=*/{1, 0, -5, 5, -6, -6, 2, 2, -10,
+ -14, 26, 4, -10, -3, -5, -26, 29, 63,
+ 30, 17, -1, -6, 56, 47, 3},
+ /*auto_regression_shift=*/8,
+ /*grain_seed=*/20915,
+ /*reference_index=*/0,
+ /*grain_scale_shift=*/0,
+ /*u_multiplier=*/0,
+ /*u_luma_multiplier=*/64,
+ /*u_offset=*/0,
+ /*v_multiplier=*/0,
+ /*v_luma_multiplier=*/64,
+ /*v_offset=*/0},
+ {/*apply_grain=*/true,
+ /*update_grain=*/true,
+ /*chroma_scaling_from_luma=*/false,
+ /*overlap_flag=*/true,
+ /*clip_to_restricted_range=*/false,
+ /*num_y_points=*/7,
+ /*num_u_points=*/7,
+ /*num_v_points=*/7,
+ /*point_y_value=*/{0, 13, 27, 40, 54, 134, 255, 0, 0, 0, 0, 0, 0, 0},
+ /*point_y_scaling=*/{72, 72, 91, 99, 97, 101, 101, 0, 0, 0, 0, 0, 0, 0},
+ /*point_u_value=*/{0, 13, 40, 54, 67, 107, 255, 0, 0, 0},
+ /*point_u_scaling=*/{38, 38, 51, 50, 52, 53, 54, 0, 0, 0},
+ /*point_v_value=*/{0, 13, 27, 40, 54, 94, 255, 0, 0, 0},
+ /*point_v_scaling=*/{51, 51, 45, 35, 33, 36, 36, 0, 0, 0},
+ /*chroma_scaling=*/11,
+ /*auto_regression_coeff_lag=*/3,
+ /*auto_regression_coeff_y=*/{2, -2, -2, 9, 3, -1, 1, -3,
+ 2, 2, -27, -12, 2, 0, 1, 7,
+ -16, 100, 27, 0, -1, 13, -51, 116},
+ /*auto_regression_coeff_u=*/{-3, 1, -2, 3, -3, -1, 1, 4, -2,
+ -17, 14, -3, -10, -2, 0, -31, 14, 71,
+ 29, 8, -2, -10, 45, 71, -11},
+ /*auto_regression_coeff_v=*/{0, -1, -5, 4, -6, -5, 2, 1, -9,
+ -14, 24, 3, -10, -3, -4, -25, 29, 63,
+ 31, 16, -1, -7, 54, 48, 2},
+ /*auto_regression_shift=*/8,
+ /*grain_seed=*/24296,
+ /*reference_index=*/0,
+ /*grain_scale_shift=*/0,
+ /*u_multiplier=*/0,
+ /*u_luma_multiplier=*/64,
+ /*u_offset=*/0,
+ /*v_multiplier=*/0,
+ /*v_luma_multiplier=*/64,
+ /*v_offset=*/0},
+ {/*apply_grain=*/true,
+ /*update_grain=*/true,
+ /*chroma_scaling_from_luma=*/false,
+ /*overlap_flag=*/true,
+ /*clip_to_restricted_range=*/false,
+ /*num_y_points=*/7,
+ /*num_u_points=*/7,
+ /*num_v_points=*/8,
+ /*point_y_value=*/{0, 13, 27, 40, 54, 134, 255, 0, 0, 0, 0, 0, 0, 0},
+ /*point_y_scaling=*/{72, 72, 91, 99, 97, 101, 101, 0, 0, 0, 0, 0, 0, 0},
+ /*point_u_value=*/{0, 13, 40, 54, 67, 134, 255, 0, 0, 0},
+ /*point_u_scaling=*/{38, 38, 50, 50, 51, 53, 53, 0, 0, 0},
+ /*point_v_value=*/{0, 13, 27, 40, 54, 67, 121, 255, 0, 0},
+ /*point_v_scaling=*/{50, 50, 45, 34, 33, 35, 36, 36, 0, 0},
+ /*chroma_scaling=*/11,
+ /*auto_regression_coeff_lag=*/3,
+ /*auto_regression_coeff_y=*/{2, -2, -2, 10, 3, -1, 1, -3,
+ 3, 2, -27, -12, 2, 0, 1, 7,
+ -17, 100, 27, 0, -1, 13, -51, 116},
+ /*auto_regression_coeff_u=*/{-3, 1, -2, 3, -3, -1, 1, 5, -3,
+ -16, 15, -2, -10, -2, -1, -31, 14, 70,
+ 29, 8, -1, -10, 46, 71, -11},
+ /*auto_regression_coeff_v=*/{1, 0, -5, 5, -6, -5, 2, 1, -9,
+ -14, 25, 4, -10, -3, -5, -25, 29, 63,
+ 31, 17, -1, -7, 55, 47, 2},
+ /*auto_regression_shift=*/8,
+ /*grain_seed=*/27677,
+ /*reference_index=*/0,
+ /*grain_scale_shift=*/0,
+ /*u_multiplier=*/0,
+ /*u_luma_multiplier=*/64,
+ /*u_offset=*/0,
+ /*v_multiplier=*/0,
+ /*v_luma_multiplier=*/64,
+ /*v_offset=*/0},
+ {/*apply_grain=*/true,
+ /*update_grain=*/true,
+ /*chroma_scaling_from_luma=*/false,
+ /*overlap_flag=*/true,
+ /*clip_to_restricted_range=*/false,
+ /*num_y_points=*/7,
+ /*num_u_points=*/7,
+ /*num_v_points=*/8,
+ /*point_y_value=*/{0, 13, 27, 40, 54, 121, 255, 0, 0, 0, 0, 0, 0, 0},
+ /*point_y_scaling=*/{72, 72, 92, 99, 97, 101, 101, 0, 0, 0, 0, 0, 0, 0},
+ /*point_u_value=*/{0, 13, 40, 54, 67, 174, 255, 0, 0, 0},
+ /*point_u_scaling=*/{38, 38, 51, 50, 52, 54, 54, 0, 0, 0},
+ /*point_v_value=*/{0, 13, 27, 40, 54, 67, 121, 255, 0, 0},
+ /*point_v_scaling=*/{51, 51, 46, 35, 33, 35, 37, 37, 0, 0},
+ /*chroma_scaling=*/11,
+ /*auto_regression_coeff_lag=*/3,
+ /*auto_regression_coeff_y=*/{1, -1, -2, 9, 3, -1, 1, -3,
+ 2, 2, -28, -12, 2, 0, 1, 8,
+ -16, 99, 27, 0, -1, 13, -51, 116},
+ /*auto_regression_coeff_u=*/{-3, 1, -2, 3, -3, -1, 2, 4, -2,
+ -16, 14, -3, -10, -2, 0, -31, 13, 71,
+ 29, 8, -2, -11, 44, 72, -11},
+ /*auto_regression_coeff_v=*/{0, -1, -5, 4, -6, -4, 2, 1, -9,
+ -13, 23, 3, -10, -3, -4, -25, 28, 63,
+ 32, 16, -1, -7, 54, 49, 2},
+ /*auto_regression_shift=*/8,
+ /*grain_seed=*/31058,
+ /*reference_index=*/0,
+ /*grain_scale_shift=*/0,
+ /*u_multiplier=*/0,
+ /*u_luma_multiplier=*/64,
+ /*u_offset=*/0,
+ /*v_multiplier=*/0,
+ /*v_luma_multiplier=*/64,
+ /*v_offset=*/0},
+ {/*apply_grain=*/true,
+ /*update_grain=*/true,
+ /*chroma_scaling_from_luma=*/false,
+ /*overlap_flag=*/true,
+ /*clip_to_restricted_range=*/false,
+ /*num_y_points=*/7,
+ /*num_u_points=*/7,
+ /*num_v_points=*/9,
+ /*point_y_value=*/{0, 13, 27, 40, 54, 121, 255, 0, 0, 0, 0, 0, 0, 0},
+ /*point_y_scaling=*/{72, 72, 92, 99, 98, 100, 98, 0, 0, 0, 0, 0, 0, 0},
+ /*point_u_value=*/{0, 13, 40, 54, 67, 228, 255, 0, 0, 0},
+ /*point_u_scaling=*/{38, 38, 51, 51, 52, 54, 54, 0, 0, 0},
+ /*point_v_value=*/{0, 13, 27, 40, 54, 67, 121, 201, 255, 0},
+ /*point_v_scaling=*/{51, 51, 46, 35, 34, 35, 37, 37, 37, 0},
+ /*chroma_scaling=*/11,
+ /*auto_regression_coeff_lag=*/3,
+ /*auto_regression_coeff_y=*/{1, -1, -2, 9, 3, -1, 1, -3,
+ 2, 2, -28, -12, 2, 0, 1, 8,
+ -16, 99, 27, 0, -1, 13, -52, 116},
+ /*auto_regression_coeff_u=*/{-3, 1, -2, 3, -3, -1, 1, 4, -2,
+ -16, 13, -3, -10, -2, 0, -31, 13, 71,
+ 29, 8, -2, -11, 44, 72, -11},
+ /*auto_regression_coeff_v=*/{0, -1, -5, 4, -6, -4, 2, 2, -8,
+ -13, 23, 3, -10, -3, -4, -25, 28, 63,
+ 32, 16, -1, -7, 54, 49, 2},
+ /*auto_regression_shift=*/8,
+ /*grain_seed=*/34439,
+ /*reference_index=*/0,
+ /*grain_scale_shift=*/0,
+ /*u_multiplier=*/0,
+ /*u_luma_multiplier=*/64,
+ /*u_offset=*/0,
+ /*v_multiplier=*/0,
+ /*v_luma_multiplier=*/64,
+ /*v_offset=*/0},
+ {/*apply_grain=*/true,
+ /*update_grain=*/true,
+ /*chroma_scaling_from_luma=*/false,
+ /*overlap_flag=*/true,
+ /*clip_to_restricted_range=*/false,
+ /*num_y_points=*/7,
+ /*num_u_points=*/7,
+ /*num_v_points=*/9,
+ /*point_y_value=*/{0, 13, 27, 40, 54, 121, 255, 0, 0, 0, 0, 0, 0, 0},
+ /*point_y_scaling=*/{72, 72, 92, 99, 98, 99, 95, 0, 0, 0, 0, 0, 0, 0},
+ /*point_u_value=*/{0, 13, 40, 54, 67, 228, 255, 0, 0, 0},
+ /*point_u_scaling=*/{39, 39, 51, 51, 52, 54, 54, 0, 0, 0},
+ /*point_v_value=*/{0, 13, 27, 40, 54, 67, 121, 201, 255, 0},
+ /*point_v_scaling=*/{51, 51, 46, 35, 34, 35, 36, 35, 35, 0},
+ /*chroma_scaling=*/11,
+ /*auto_regression_coeff_lag=*/3,
+ /*auto_regression_coeff_y=*/{1, -1, -2, 9, 3, -1, 1, -3,
+ 2, 2, -28, -11, 2, 0, 1, 8,
+ -16, 99, 27, 0, -1, 13, -52, 116},
+ /*auto_regression_coeff_u=*/{-3, 1, -2, 3, -3, -1, 1, 4, -2,
+ -16, 13, -3, -10, -2, 0, -30, 13, 71,
+ 29, 8, -2, -10, 43, 72, -11},
+ /*auto_regression_coeff_v=*/{0, -1, -5, 3, -6, -4, 2, 2, -8,
+ -13, 23, 3, -10, -3, -4, -25, 28, 64,
+ 32, 16, -1, -7, 53, 49, 2},
+ /*auto_regression_shift=*/8,
+ /*grain_seed=*/37820,
+ /*reference_index=*/0,
+ /*grain_scale_shift=*/0,
+ /*u_multiplier=*/0,
+ /*u_luma_multiplier=*/64,
+ /*u_offset=*/0,
+ /*v_multiplier=*/0,
+ /*v_luma_multiplier=*/64,
+ /*v_offset=*/0}};
+
+const char* GetTestDigestLuma(int bitdepth, int param_index) {
+ static const char* const kTestDigestsLuma8bpp[10] = {
+ "80da8e849110a10c0a73f9dec0d9a2fb", "54352f02aeda541e17a4c2d208897e2b",
+ "2ad9021124c82aca3e7c9517d00d1236", "f6c5f64513925b09ceba31e92511f8a1",
+ "46c6006578c68c3c8619f7a389c7de45", "fcddbd27545254dc50f1c333c8b7e313",
+ "c6d4dc181bf7f2f93ae099b836685151", "2949ef836748271195914fef9acf4e46",
+ "524e79bb87ed550e123d00a61df94381", "182222470d7b7a80017521d0261e4474",
+ };
+ static const char* const kTestDigestsLuma10bpp[10] = {
+ "27a49a2131fb6d4dd4b8c34da1b7642e", "4ea9134f6831dd398545c85b2a68e31f",
+ "4e12232a18a2b06e958d7ab6b953faad", "0ede12864ddaced2d8062ffa4225ce24",
+ "5fee492c4a430b2417a64aa4920b69e9", "39af842a3f9370d796e8ef047c0c42a8",
+ "0efbad5f9dc07391ad243232b8df1787", "2bd41882cd82960019aa2b87d5fb1fbc",
+ "1c66629c0c4e7b6f9b0a7a6944fbad50", "2c633a50ead62f8e844a409545f46244",
+ };
+
+ if (bitdepth == 8) {
+ return kTestDigestsLuma8bpp[param_index];
+ }
+ return kTestDigestsLuma10bpp[param_index];
+}
+
+const char* GetTestDigestChromaU(int bitdepth, int param_index) {
+ static const char* const kTestDigestsChromaU8bpp[10] = {
+ "e56b7bbe9f39bf987770b18aeca59514", "d0b3fd3cf2901dae31b73f20c510d83e",
+ "800c01d58d9fb72136d21ec2bb07899a", "4cd0badba679e8edbcd60a931fce49a1",
+ "cabec236cc17f91f3f08d8cde867aa72", "380a2205cf2d40c6a27152585f61a3b0",
+ "3813526234dc7f90f80f6684772c729a", "97a43a73066d88f9cbd915d56fc9c196",
+ "5b70b27a43dd63b03e23aecd3a935071", "d5cc98685582ffd47a41a97d2e377ac8",
+ };
+ static const char* const kTestDigestsChromaU10bpp[10] = {
+ "9a6d0369ba86317598e65913276dae6d", "2512bdc4c88f21f8185b040b7752d1db",
+ "1e86b779ce6555fcf5bd0ade2af67e73", "5ad463a354ffce522c52b616fb122024",
+ "290d53c22c2143b0882acb887da3fdf1", "54622407d865371d7e70bbf29fdda626",
+ "be306c6a94c55dbd9ef514f0ad4a0011", "904602329b0dec352b3b177b0a2554d2",
+ "58afc9497d968c67fdf2c0cf23b33aa3", "74fee7be6f62724bf901fdd04a733b46",
+ };
+ if (bitdepth == 8) {
+ return kTestDigestsChromaU8bpp[param_index];
+ }
+ return kTestDigestsChromaU10bpp[param_index];
+}
+
+const char* GetTestDigestChromaV(int bitdepth, int param_index) {
+ static const char* const kTestDigestsChromaV8bpp[10] = {
+ "7205ed6c07ed27b7b52d871e0559b8fa", "fad033b1482dba0ed2d450b461fa310e",
+ "6bb39798ec6a0f7bda0b0fcb0a555734", "08c19856e10123ae520ccfc63e2fbe7b",
+ "a7695a6b69fba740a50310dfa6cf1c00", "ac2eac2d13fc5b21c4f2995d5abe14b9",
+ "be35cb30062db628a9e1304fca8b75dc", "f5bfc7a910c76bcd5b32c40772170879",
+ "aca07b37d63f978d76df5cd75d0cea5e", "107c7c56d4ec21f346a1a02206301b0d",
+ };
+ static const char* const kTestDigestsChromaV10bpp[10] = {
+ "910724a77710996c90e272f1c1e9ff8e", "d293f861580770a89f1e266931a012ad",
+ "9e4f0c85fb533e51238586f9c3e68b6e", "a5ff4478d9eeb2168262c2e955e17a4f",
+ "fba6b1e8f28e4e90c836d41f28a0c154", "50b9a93f9a1f3845e6903bff9270a3e6",
+ "7b1624c3543badf5fadaee4d1e602e6b", "3be074e4ca0eec5770748b15661aaadd",
+ "639197401032f272d6c30666a2d08f43", "28075dd34246bf9d5e6197b1944f646a",
+ };
+ if (bitdepth == 8) {
+ return kTestDigestsChromaV8bpp[param_index];
+ }
+ return kTestDigestsChromaV10bpp[param_index];
+}
+
+const char* GetARTestDigestLuma(int bitdepth, int coeff_lag, int param_index) {
+ static const char* const kTestDigestsLuma8bpp[3][kNumFilmGrainTestParams] = {
+ {
+ "a835127918f93478b45f1ba4d20d81bd",
+ "a835127918f93478b45f1ba4d20d81bd",
+ "e5db4da626e214bb17bcc7ecffa76303",
+ "a835127918f93478b45f1ba4d20d81bd",
+ "a835127918f93478b45f1ba4d20d81bd",
+ "e5db4da626e214bb17bcc7ecffa76303",
+ "a835127918f93478b45f1ba4d20d81bd",
+ "1da62b7233de502123a18546b6c97da2",
+ "1da62b7233de502123a18546b6c97da2",
+ "1da62b7233de502123a18546b6c97da2",
+ },
+ {
+ "11464b880de3ecd6e6189c5c4e7f9b28",
+ "dfe411762e283b5f49bece02ec200951",
+ "5c534d92afdf0a5b53dbe4fe7271929c",
+ "2e1a68a18aca96c31320ba7ceab59be9",
+ "584c0323e6b276cb9acb1a294d462d58",
+ "9571eb8f1cbaa96ea3bf64a820a8d9f0",
+ "305285ff0df87aba3c59e3fc0818697d",
+ "0066d35c8818cf20230114dcd3765a4d",
+ "0066d35c8818cf20230114dcd3765a4d",
+ "16d61b046084ef2636eedc5a737cb6f6",
+ },
+ {
+ "0c9e2cf1b6c3cad0f7668026e8ea0516",
+ "7d094855292d0eded9e0d1b5bab1990b",
+ "fbf28860a5f1285dcc6725a45256a86a",
+ "dccb906904160ccabbd2c9a7797a4bf9",
+ "46f645e17f08a3260b1ae70284e5c5b8",
+ "124fdc90bed11a7320a0cbdee8b94400",
+ "8d2978651dddeaef6282191fa146f0a0",
+ "28b4d5aa33f05b3fb7f9323a11936bdc",
+ "6a8ea684f6736a069e3612d1af6391a8",
+ "2781ea40a63704dbfeb3a1ac5db6f2fc",
+ },
+ };
+
+ static const char* const kTestDigestsLuma10bpp[3][kNumFilmGrainTestParams] = {
+ {
+ "5e6bc8444ece2d38420f51d82238d812",
+ "5e6bc8444ece2d38420f51d82238d812",
+ "2bfaec768794af33d60a9771f971f68d",
+ "5e6bc8444ece2d38420f51d82238d812",
+ "5e6bc8444ece2d38420f51d82238d812",
+ "c880807a368c4e82c23bea6f035ad23f",
+ "5e6bc8444ece2d38420f51d82238d812",
+ "c576667da5286183ec3aab9a76f53a2e",
+ "c576667da5286183ec3aab9a76f53a2e",
+ "c576667da5286183ec3aab9a76f53a2e",
+ },
+ {
+ "095c2dd4d4d52aff9696df9bfdb70062",
+ "983d14afa497060792d472a449a380c7",
+ "c5fdc0f7c594b2b36132cec6f45a79bd",
+ "acff232ac5597c1712213150552281d1",
+ "4dd7341923b1d260092853553b6b6246",
+ "0ca8afd71a4f564ea1ce69c4af14e9ab",
+ "9bc7565e5359d09194fcee28e4bf7b94",
+ "6fea7805458b9d149f238a30e2dc3f13",
+ "6fea7805458b9d149f238a30e2dc3f13",
+ "681dff5fc7a7244ba4e4a582ca7ecb14",
+ },
+ {
+ "cb99352c9c6300e7e825188bb4adaee0",
+ "7e40674de0209bd72f8e9c6e39ee6f7c",
+ "3e475572f6b4ecbb2730fd16751ad7ed",
+ "e6e4c63abc9cb112d9d1f23886cd1415",
+ "1a1c953b175c105c604902877e2bab18",
+ "380a53072530223d4ee622e014ee4bdb",
+ "6137394ea1172fb7ea0cbac237ff1703",
+ "85ab0c813e46f97cb9f42542f44c01ad",
+ "68c8ac462f0e28cb35402c538bee32f1",
+ "0038502ffa4760c8feb6f9abd4de7250",
+ },
+ };
+
+ if (bitdepth == 8) {
+ return kTestDigestsLuma8bpp[coeff_lag - 1][param_index];
+ }
+ return kTestDigestsLuma10bpp[coeff_lag - 1][param_index];
+}
+
+const char* GetARTestDigestChromaU(int bitdepth, int coeff_lag,
+ int subsampling_x, int subsampling_y) {
+ static const char* const kTestDigestsChromaU8bpp[12] = {
+ "11ced66de0eaf55c1ff9bad18d7b8ed7", "0c3b77345dd4ab0915ef53693ab93ce4",
+ "b0645044ba080b3ceb8f299e269377d6", "50590ad5d895f0b4bc6694d878e9cd32",
+ "85e1bf3741100135062f5b4abfe7639b", "76955b70dde61ca5c7d079c501b90906",
+ "3f0995e1397fd9efd9fc46b67f7796b3", "0a0d6c3e4e1649eb101395bc97943a07",
+ "1878855ed8db600ccae1d39abac52ec6", "13ab2b28320ed3ac2b820f08fdfd424d",
+ "f3e95544a86ead5387e3dc4e043fd0f0", "ff8f5d2d97a6689e16a7e4f482f69f0b",
+ };
+
+ static const char* const kTestDigestsChromaU10bpp[12] = {
+ "707f2aa5aa7e77bc6e83ab08287d748d", "0bcf40c7fead9ac3a5d71b4cc1e21549",
+ "0c1df27053e5da7cf1276a122a8f4e8b", "782962f7425eb38923a4f87e7ab319d9",
+ "b4a709ae5967afef55530b9ea8ef0062", "70a971a0b9bf06212d510b396f0f9095",
+ "d033b89d6e31f8b13c83d94c840b7d54", "40bbe804bf3f90cee667d3b275e3c964",
+ "90bb2b9d518b945adcfd1b1807f7d170", "4bc34aa157fe5ad4270c611afa75e878",
+ "e2688d7286cd43fe0a3ea734d2ad0f77", "853193c4981bd882912171061327bdf2",
+ };
+
+ assert(!(subsampling_x == 0 && subsampling_y == 1));
+ const int base_index = 3 * coeff_lag + subsampling_x + subsampling_y;
+ if (bitdepth == 8) {
+ return kTestDigestsChromaU8bpp[base_index];
+ }
+ return kTestDigestsChromaU10bpp[base_index];
+}
+
+const char* GetARTestDigestChromaV(int bitdepth, int coeff_lag,
+ int subsampling_x, int subsampling_y) {
+ static const char* const kTestDigestsChromaV8bpp[12] = {
+ "5c2179f3d93be0a0da75d2bb90347c2f", "79b883847d7eaa7890e1d633b8e34353",
+ "90ade818e55808e8cf58c11debb5ddd1", "1d0f2a14bc4df2b2a1abaf8137029f92",
+ "ac753a57ade140dccb50c14f941ae1fc", "d24ab497558f6896f08dc17bcc3c50c1",
+ "3d74436c63920022a95c85b234db4e33", "061c2d53ed84c830f454e395c362cb16",
+ "05d24869d7fb952e332457a114c8b9b7", "fcee31b87a2ada8028c2a975e094856a",
+ "c019e2c475737abcf9c2b2a52845c646", "9cd994baa7021f8bdf1d1c468c1c8e9c",
+ };
+
+ static const char* const kTestDigestsChromaV10bpp[12] = {
+ "bc9e44454a05cac8571c15af5b720e79", "f0374436698d94e879c03331b1f30df4",
+ "4580dd009abd6eeed59485057c55f63e", "7d1f7aecd45302bb461f4467f2770f72",
+ "1f0d003fce6c5fedc147c6112813f43b", "4771a45c2c1a04c375400619d5536035",
+ "df9cf619a78907c0f6e58bc13d7d5546", "dd3715ce65d905f30070a36977c818e0",
+ "32de5800f76e34c128a1d89146b4010b", "db9d7c70c3f69feb68fae04398efc773",
+ "d3d0912e3fdb956fef416a010bd7b4c2", "a2fca8abd9fd38d2eef3c4495d9eff78",
+ };
+
+ assert(!(subsampling_x == 0 && subsampling_y == 1));
+ const int base_index = 3 * coeff_lag + subsampling_x + subsampling_y;
+ if (bitdepth == 8) {
+ return kTestDigestsChromaV8bpp[base_index];
+ }
+ return kTestDigestsChromaV10bpp[base_index];
+}
+
+const char* GetGrainGenerationTestDigestLuma(int bitdepth, int param_index) {
+ static const char* const kTestDigestsLuma8bpp[kNumFilmGrainTestParams] = {
+ "c48babd99e5cfcbaa13d8b6e0c12e644", "da4b971d2de19b709e2bc98d2e50caf3",
+ "96c72faac19a79c138afeea8b8ae8c7a", "90a2b9c8304a44d14e83ca51bfd2fe8a",
+ "72bd3aa85c17850acb430afb4183bf1a", "a0acf76349b9efbc9181fc31153d9ef6",
+ "6da74dd631a4ec8b9372c0bbec22e246", "6e11fa230f0e5fbb13084255c22cabf9",
+ "be1d257b762f9880d81680e9325932a2", "37e302075af8130b371de4430e8a22cf",
+ };
+
+ static const char* const kTestDigestsLuma10bpp[kNumFilmGrainTestParams] = {
+ "0a40fd2f261095a6154584a531328142", "9d0c8173a94a0514c769e94b6f254030",
+ "7894e959fdd5545895412e1512c9352d", "6802cad2748cf6db7f66f53807ee46ab",
+ "ea24e962b98351c3d929a8ae41e320e2", "b333dc944274a3a094073889ca6e11d6",
+ "7211d7ac0ff7d11b5ef1538c0d98f43d", "ef9f9cbc101a07da7bfa62637130e331",
+ "85a122e32648fde84b883a1f98947c60", "dee656e3791138285bc5b71e3491a177",
+ };
+
+ if (bitdepth == 8) {
+ return kTestDigestsLuma8bpp[param_index];
+ }
+ return kTestDigestsLuma10bpp[param_index];
+}
+
+const char* GetConstructStripesTestDigest(int bitdepth, int overlap_flag,
+ int subsampling_x,
+ int subsampling_y) {
+ static const char* const kTestDigests8bpp[6] = {
+ "cd14aaa6fc1728290fa75772730a2155", "13ad4551feadccc3a3a9bd5e25878d2a",
+ "ed6ad9532c96ef0d79ff3228c89a429f", "82f307a7f5fc3308c3ebe268b5169e70",
+ "aed793d525b85349a8c2eb6d40e93969", "311c3deb727621a7d4f18e8defb65de7",
+ };
+
+ static const char* const kTestDigests10bpp[6] = {
+ "4fe2fa1e428737de3595be3a097d0203", "80568c3c3b53bdbbd03b820179092dcd",
+ "bc7b73099961a0739c36e027d6d09ea1", "e5331364e5146a6327fd94e1467f59a3",
+ "125bf18b7787e8f0792ea12f9210de0d", "21cf98cbce17eca77dc150cc9be0e0a0",
+ };
+
+ const int base_index = 3 * overlap_flag + subsampling_x + subsampling_y;
+ if (bitdepth == 8) {
+ return kTestDigests8bpp[base_index];
+ }
+ return kTestDigests10bpp[base_index];
+}
+
+const char* GetConstructImageTestDigest(int bitdepth, int overlap_flag,
+ int subsampling_x, int subsampling_y) {
+ static const char* const kTestDigests8bpp[6] = {
+ "17030fc692e685557a3717f9334af7e8", "d16ea46147183cd7bc36bcfc2f936a5b",
+ "68152958540dbec885f71e3bcd7aa088", "bb43b420f05a122eb4780aca06055ab1",
+ "87567b04fbdf64f391258c0742de266b", "ce87d556048b3de32570faf6729f4010",
+ };
+
+ static const char* const kTestDigests10bpp[6] = {
+ "5b31b29a5e22126a9bf8cd6a01645777", "2bb94a25164117f2ab18dae18e2c6577",
+ "27e57a4ed6f0c9fe0a763a03f44805e8", "481642ab0b07437b76b169aa4eb82123",
+ "656a9ef056b04565bec9ca7e0873c408", "a70fff81ab28d02d99dd4f142699ba39",
+ };
+
+ const int base_index = 3 * overlap_flag + subsampling_x + subsampling_y;
+ if (bitdepth == 8) {
+ return kTestDigests8bpp[base_index];
+ }
+ return kTestDigests10bpp[base_index];
+}
+
+const char* GetScalingInitTestDigest(int param_index, int bitdepth) {
+ static const char* const kTestDigests8bpp[kNumFilmGrainTestParams] = {
+ "315202ca3bf9c46eac8605e89baffd2a", "640f6408702b07ab7e832e7326cce56f",
+ "f75ee83e3912a3f25949e852d67326cf", "211223f5d6a4b42a8e3c662f921b71c0",
+ "f75ee83e3912a3f25949e852d67326cf", "e7a1de8c5a2cac2145c586ecf1f9051c",
+ "e7a1de8c5a2cac2145c586ecf1f9051c", "276fe5e3b30b2db2a9ff798eb6cb8e00",
+ "ac67f1c3aff2f50ed4b1975bde67ffe3", "8db6145a60d506cc94f07cef8b27c681",
+ };
+
+ static const char* const kTestDigests10bpp[kNumFilmGrainTestParams] = {
+ "c50be59c62b634ff45ddfbe5b978adfc", "7626286109a2a1eaf0a26f6b2bbab9aa",
+ "f2302988140c47a0724fc55ff523b6ec", "5318e33d8a59a526347ffa6a72ba6ebd",
+ "f2302988140c47a0724fc55ff523b6ec", "f435b5fe98e9d8b6c61fa6f457601c2c",
+ "f435b5fe98e9d8b6c61fa6f457601c2c", "ff07a2944dbe094d01e199098764941c",
+ "11b3e256c74cee2b5679f7457793869a", "89fab5c1db09e242d0494d1c696a774a",
+ };
+
+ if (bitdepth == 8) {
+ return kTestDigests8bpp[param_index];
+ }
+ assert(bitdepth == 10);
+ return kTestDigests10bpp[param_index];
+}
+
+const char* GetBlendLumaTestDigest(int bitdepth) {
+ static const char* const kTestDigest8bpp = "de35b16c702690b1d311cdd0973835d7";
+
+ static const char* const kTestDigest10bpp =
+ "60e9f24dcaaa0207a8db5ab5f3c66608";
+
+ if (bitdepth == 8) {
+ return kTestDigest8bpp;
+ }
+ return kTestDigest10bpp;
+}
+
+const char* GetBlendChromaUTestDigest(int bitdepth,
+ int chroma_scaling_from_luma,
+ int subsampling_x, int subsampling_y) {
+ static const char* const kTestDigests8bpp[6] = {
+ "36ca194734d45e75079baba1f3ec9e9e", "182b388061f59fd3e24ef4581c536e67",
+ "2e7843b4c624f03316c3cbe1cc835859", "39e6d9606915da6a41168fbb006b55e4",
+ "3f44a4e252d4823544ac66a900dc7983", "1860f0831841f262d66b23f6a6b5833b",
+ };
+
+ static const char* const kTestDigests10bpp[6] = {
+ "2054665564f55750c9588b505eb01ac0", "4d8b0e248f8a6bfc72516aa164e76b0b",
+ "7e549800a4f9fff6833bb7738e272baf", "8de6f30dcda99a37b359fd815e62d2f7",
+ "9b7958a2278a16bce2b7bc31fdd811f5", "c5c3c8cccf6a2b4e40b4a412a5bf4f08",
+ };
+
+ const int base_index =
+ 3 * chroma_scaling_from_luma + subsampling_x + subsampling_y;
+ if (bitdepth == 8) {
+ return kTestDigests8bpp[base_index];
+ }
+ return kTestDigests10bpp[base_index];
+}
+
+const char* GetBlendChromaVTestDigest(int bitdepth,
+ int chroma_scaling_from_luma,
+ int subsampling_x, int subsampling_y) {
+ static const char* const kTestDigests8bpp[6] = {
+ "9a353e4f86d7ebaa980f7f6cfc0995ad", "17589b4039ed49ba16f32db9fae724b7",
+ "76ae8bed48a173b548993b6e1824ff67", "c1458ac9bdfbf0b4d6a175343b17b27b",
+ "fa76d1c8e48957537f26af6a5b54ec14", "313fe3c34568b7f9c5ecb09d419d4ba4",
+ };
+
+ static const char* const kTestDigests10bpp[6] = {
+ "8ab5a8e03f07547260033d6a0b689e3c", "275ede58d311e2f5fd76f222f45a64fc",
+ "ce13916e0f7b02087fd0356534d32770", "165bfc8cda0266936a67fa4ec9b215cb",
+ "ed4382caa936acf1158ff8049d18ffac", "942bdd1344c9182dd7572099fb9372db",
+ };
+
+ const int base_index =
+ 3 * chroma_scaling_from_luma + subsampling_x + subsampling_y;
+ if (bitdepth == 8) {
+ return kTestDigests8bpp[base_index];
+ }
+ return kTestDigests10bpp[base_index];
+}
+
+// GetFilmGrainRandomNumber() is only invoked with |bits| equal to 11 or 8. Test
+// both values of |bits|.
+TEST(FilmGrainTest, GetFilmGrainRandomNumber) {
+ uint16_t seed = 51968;
+ const struct {
+ int rand;
+ uint16_t seed;
+ } kExpected11[5] = {
+ {812, 25984}, {406, 12992}, {1227, 39264}, {1637, 52400}, {818, 26200},
+ };
+ for (int i = 0; i < 5; ++i) {
+ int rand = GetFilmGrainRandomNumber(11, &seed);
+ EXPECT_EQ(rand, kExpected11[i].rand) << "i = " << i;
+ EXPECT_EQ(seed, kExpected11[i].seed) << "i = " << i;
+ }
+ const struct {
+ int rand;
+ uint16_t seed;
+ } kExpected8[5] = {
+ {179, 45868}, {89, 22934}, {44, 11467}, {150, 38501}, {75, 19250},
+ };
+ for (int i = 0; i < 5; ++i) {
+ int rand = GetFilmGrainRandomNumber(8, &seed);
+ EXPECT_EQ(rand, kExpected8[i].rand) << "i = " << i;
+ EXPECT_EQ(seed, kExpected8[i].seed) << "i = " << i;
+ }
+}
+
+// In FilmGrainParams, if num_u_points and num_v_points are both 0 and
+// chroma_scaling_from_luma is false, GenerateChromaGrains() should set both
+// the u_grain and v_grain arrays to all zeros.
+TEST(FilmGrainTest, GenerateZeroChromaGrains) {
+ FilmGrainParams film_grain_params = {};
+ film_grain_params.apply_grain = true;
+ film_grain_params.update_grain = true;
+ film_grain_params.chroma_scaling = 8;
+ film_grain_params.auto_regression_shift = 6;
+ film_grain_params.grain_seed = 51968;
+
+ int8_t u_grain[73 * 82];
+ int8_t v_grain[73 * 82];
+ const int chroma_width = 44;
+ const int chroma_height = 38;
+
+ // Initialize u_grain and v_grain with arbitrary nonzero values.
+ memset(u_grain, 1, sizeof(u_grain));
+ memset(v_grain, 2, sizeof(v_grain));
+ for (int y = 0; y < chroma_height; ++y) {
+ for (int x = 0; x < chroma_width; ++x) {
+ EXPECT_NE(u_grain[y * chroma_width + x], 0);
+ EXPECT_NE(v_grain[y * chroma_width + x], 0);
+ }
+ }
+
+ FilmGrain<8>::GenerateChromaGrains(film_grain_params, chroma_width,
+ chroma_height, u_grain, v_grain);
+
+ for (int y = 0; y < chroma_height; ++y) {
+ for (int x = 0; x < chroma_width; ++x) {
+ EXPECT_EQ(u_grain[y * chroma_width + x], 0);
+ EXPECT_EQ(v_grain[y * chroma_width + x], 0);
+ }
+ }
+}
+
+// First parameter is coefficient lag. Second parameter is the index into
+// |kFilmGrainParams|.
+template <int bitdepth>
+class AutoRegressionTestLuma
+ : public testing::TestWithParam<std::tuple<int, int>> {
+ public:
+ using GrainType =
+ typename std::conditional<bitdepth == 8, int8_t, int16_t>::type;
+
+ AutoRegressionTestLuma() {
+ FilmGrainInit_C();
+ const dsp::Dsp* const dsp = dsp::GetDspTable(bitdepth);
+ const int index = std::get<0>(GetParam()) - 1;
+ base_luma_auto_regression_func_ =
+ dsp->film_grain.luma_auto_regression[index];
+
+ const testing::TestInfo* const test_info =
+ testing::UnitTest::GetInstance()->current_test_info();
+ const char* const test_case = test_info->test_suite_name();
+ if (absl::StartsWith(test_case, "C/")) {
+ base_luma_auto_regression_func_ = nullptr;
+ } else if (absl::StartsWith(test_case, "NEON/")) {
+#if LIBGAV1_ENABLE_NEON
+ FilmGrainInit_NEON();
+#endif
+ }
+ luma_auto_regression_func_ = dsp->film_grain.luma_auto_regression[index];
+ }
+
+ protected:
+ // |compare| determines whether to compare the output blocks from the SIMD
+ // implementation, if used, and the C implementation.
+ // |saturate| determines whether to set the inputs to maximum values. This is
+ // intended primarily as a way to simplify differences in output when
+ // debugging.
+ void TestAutoRegressiveFilterLuma(int coeff_lag, int param_index,
+ int num_runs, bool saturate, bool compare);
+ LumaAutoRegressionFunc luma_auto_regression_func_;
+ LumaAutoRegressionFunc base_luma_auto_regression_func_;
+ GrainType luma_block_buffer_[kLumaBlockSize];
+ GrainType base_luma_block_buffer_[kLumaBlockSize];
+};
+
+// First parameter is coefficient lag. Second parameter is the index into
+// |kFilmGrainParams|.
+template <int bitdepth>
+void AutoRegressionTestLuma<bitdepth>::TestAutoRegressiveFilterLuma(
+ int coeff_lag, int param_index, int num_runs, bool saturate, bool compare) {
+ if (luma_auto_regression_func_ == nullptr) return;
+ // Compare is only needed for NEON tests to compare with C output.
+ if (base_luma_auto_regression_func_ == nullptr && compare) return;
+ FilmGrainParams params = kFilmGrainParams[param_index];
+ params.auto_regression_coeff_lag = coeff_lag;
+ const int grain_max = GetGrainMax<bitdepth>();
+ for (int y = 0; y < kLumaHeight; ++y) {
+ for (int x = 0; x < kLumaWidth; ++x) {
+ if (saturate) {
+ luma_block_buffer_[y * kLumaWidth + x] = grain_max;
+ } else {
+ luma_block_buffer_[y * kLumaWidth + x] =
+ std::min(x - (kLumaWidth >> 1), y - (kLumaHeight >> 1)) *
+ (1 << (bitdepth - 8));
+ }
+ }
+ }
+
+ if (saturate) {
+ memset(params.auto_regression_coeff_y, 127,
+ sizeof(params.auto_regression_coeff_y));
+ }
+ if (compare) {
+ memcpy(base_luma_block_buffer_, luma_block_buffer_,
+ sizeof(luma_block_buffer_));
+ }
+
+ const absl::Time start = absl::Now();
+ for (int i = 0; i < num_runs; ++i) {
+ luma_auto_regression_func_(params, luma_block_buffer_);
+ }
+ const absl::Duration elapsed_time = absl::Now() - start;
+ if (num_runs > 1) {
+ printf("AutoRegressionLuma lag=%d, param_index=%d: %d us\n", coeff_lag,
+ param_index,
+ static_cast<int>(absl::ToInt64Microseconds(elapsed_time)));
+ return;
+ }
+ if (compare) {
+ base_luma_auto_regression_func_(params, base_luma_block_buffer_);
+ EXPECT_TRUE(test_utils::CompareBlocks(
+ luma_block_buffer_, base_luma_block_buffer_, kLumaWidth, kLumaHeight,
+ kLumaWidth, kLumaWidth, false));
+ } else {
+ test_utils::CheckMd5Digest(
+ "FilmGrain",
+ absl::StrFormat("AutoRegressionLuma lag=%d, param_index=%d", coeff_lag,
+ param_index)
+ .c_str(),
+ GetARTestDigestLuma(bitdepth, coeff_lag, param_index),
+ luma_block_buffer_, sizeof(luma_block_buffer_), elapsed_time);
+ }
+}
+
+using AutoRegressionTestLuma8bpp = AutoRegressionTestLuma<8>;
+
+TEST_P(AutoRegressionTestLuma8bpp, AutoRegressiveFilterLuma) {
+ TestAutoRegressiveFilterLuma(std::get<0>(GetParam()), std::get<1>(GetParam()),
+ 1, /*saturate=*/false,
+ /*compare=*/false);
+}
+
+TEST_P(AutoRegressionTestLuma8bpp, AutoRegressiveFilterLumaSaturated) {
+ TestAutoRegressiveFilterLuma(std::get<0>(GetParam()), std::get<1>(GetParam()),
+ 1, /*saturate=*/true,
+ /*compare=*/true);
+}
+
+TEST_P(AutoRegressionTestLuma8bpp, DISABLED_Speed) {
+ TestAutoRegressiveFilterLuma(std::get<0>(GetParam()), std::get<1>(GetParam()),
+ 1e5,
+ /*saturate=*/false, /*compare=*/false);
+}
+
+#if LIBGAV1_MAX_BITDEPTH >= 10
+using AutoRegressionTestLuma10bpp = AutoRegressionTestLuma<10>;
+
+TEST_P(AutoRegressionTestLuma10bpp, AutoRegressiveFilterLuma) {
+ TestAutoRegressiveFilterLuma(std::get<0>(GetParam()), std::get<1>(GetParam()),
+ 1, /*saturate=*/false,
+ /*compare=*/false);
+}
+
+TEST_P(AutoRegressionTestLuma10bpp, AutoRegressiveFilterLumaSaturated) {
+ TestAutoRegressiveFilterLuma(std::get<0>(GetParam()), std::get<1>(GetParam()),
+ 1, /*saturate=*/true,
+ /*compare=*/true);
+}
+
+TEST_P(AutoRegressionTestLuma10bpp, DISABLED_Speed) {
+ TestAutoRegressiveFilterLuma(std::get<0>(GetParam()), std::get<1>(GetParam()),
+ 1e5,
+ /*saturate=*/false, /*compare=*/false);
+}
+#endif // LIBGAV1_MAX_BITDEPTH >= 10
+
+INSTANTIATE_TEST_SUITE_P(
+ C, AutoRegressionTestLuma8bpp,
+ testing::Combine(testing::Range(1, 4) /* coeff_lag */,
+ testing::Range(0, 10) /* param_index */));
+#if LIBGAV1_ENABLE_NEON
+INSTANTIATE_TEST_SUITE_P(
+ NEON, AutoRegressionTestLuma8bpp,
+ testing::Combine(testing::Range(1, 4) /* coeff_lag */,
+ testing::Range(0, 10) /* param_index */));
+#endif
+
+#if LIBGAV1_MAX_BITDEPTH >= 10
+INSTANTIATE_TEST_SUITE_P(
+ C, AutoRegressionTestLuma10bpp,
+ testing::Combine(testing::Range(1, 4) /* coeff_lag */,
+ testing::Range(0, 10) /* param_index */));
+#if LIBGAV1_ENABLE_NEON
+INSTANTIATE_TEST_SUITE_P(
+ NEON, AutoRegressionTestLuma10bpp,
+ testing::Combine(testing::Range(1, 4) /* coeff_lag */,
+ testing::Range(0, 10) /* param_index */));
+#endif
+#endif // LIBGAV1_MAX_BITDEPTH >= 10
+
+struct AutoRegressionChromaTestParam {
+ explicit AutoRegressionChromaTestParam(const std::tuple<int, int>& in)
+ : coeff_lag(std::get<0>(in)) {
+ switch (std::get<1>(in)) {
+ case 0:
+ subsampling_x = 0;
+ subsampling_y = 0;
+ break;
+ case 1:
+ subsampling_x = 1;
+ subsampling_y = 0;
+ break;
+ default:
+ assert(std::get<1>(in) == 2);
+ subsampling_x = 1;
+ subsampling_y = 1;
+ }
+ }
+ const int coeff_lag;
+ int subsampling_x;
+ int subsampling_y;
+};
+
+template <int bitdepth>
+class AutoRegressionTestChroma
+ : public testing::TestWithParam<std::tuple<int, int>> {
+ public:
+ using GrainType =
+ typename std::conditional<bitdepth == 8, int8_t, int16_t>::type;
+
+ AutoRegressionTestChroma() {
+ AutoRegressionChromaTestParam test_param(GetParam());
+ FilmGrainInit_C();
+ const dsp::Dsp* const dsp = dsp::GetDspTable(bitdepth);
+ // This test suite does not cover num_y_points == 0. This should be covered
+ // in the test of the full synthesis process.
+ base_chroma_auto_regression_func_ =
+ dsp->film_grain.chroma_auto_regression[1][test_param.coeff_lag];
+
+ const testing::TestInfo* const test_info =
+ testing::UnitTest::GetInstance()->current_test_info();
+ const char* const test_case = test_info->test_suite_name();
+ if (absl::StartsWith(test_case, "C/")) {
+ base_chroma_auto_regression_func_ = nullptr;
+ } else if (absl::StartsWith(test_case, "NEON/")) {
+#if LIBGAV1_ENABLE_NEON
+ FilmGrainInit_NEON();
+#endif
+ }
+ chroma_auto_regression_func_ =
+ dsp->film_grain.chroma_auto_regression[1][test_param.coeff_lag];
+ }
+
+ ~AutoRegressionTestChroma() override = default;
+
+ protected:
+ // |compare| determines whether to compare the output blocks from the SIMD
+ // implementation, if used, and the C implementation.
+ // |saturate| determines whether to set the inputs to maximum values. This is
+ // intended primarily as a way to simplify differences in output when
+ // debugging.
+ void TestAutoRegressiveFilterChroma(int coeff_lag, int subsampling_x,
+ int subsampling_y, int num_runs,
+ bool saturate, bool compare);
+ ChromaAutoRegressionFunc chroma_auto_regression_func_;
+ ChromaAutoRegressionFunc base_chroma_auto_regression_func_;
+ GrainType luma_block_buffer_[kLumaBlockSize];
+ GrainType u_block_buffer_[kChromaBlockSize];
+ GrainType v_block_buffer_[kChromaBlockSize];
+ GrainType base_u_block_buffer_[kChromaBlockSize];
+ GrainType base_v_block_buffer_[kChromaBlockSize];
+};
+
+template <int bitdepth>
+void AutoRegressionTestChroma<bitdepth>::TestAutoRegressiveFilterChroma(
+ int coeff_lag, int subsampling_x, int subsampling_y, int num_runs,
+ bool saturate, bool compare) {
+ if (chroma_auto_regression_func_ == nullptr) return;
+ // Compare is only needed for NEON tests to compare with C output.
+ if (base_chroma_auto_regression_func_ == nullptr && compare) return;
+
+ // This function relies on the first set of sampled params for basics. The
+ // test param generators are used for coverage.
+ FilmGrainParams params = kFilmGrainParams[0];
+ params.auto_regression_coeff_lag = coeff_lag;
+ const int grain_max = GetGrainMax<bitdepth>();
+ const int grain_min = GetGrainMin<bitdepth>();
+ const int chroma_width =
+ (subsampling_x != 0) ? kMinChromaWidth : kMaxChromaWidth;
+ const int chroma_height =
+ (subsampling_y != 0) ? kMinChromaHeight : kMaxChromaHeight;
+ if (saturate) {
+ memset(params.auto_regression_coeff_u, 127,
+ sizeof(params.auto_regression_coeff_u));
+ memset(params.auto_regression_coeff_v, 127,
+ sizeof(params.auto_regression_coeff_v));
+ for (int y = 0; y < kLumaHeight; ++y) {
+ for (int x = 0; x < kLumaWidth; ++x) {
+ // This loop relies on the fact that kMaxChromaWidth == kLumaWidth.
+ luma_block_buffer_[y * kLumaWidth + x] = grain_max;
+ u_block_buffer_[y * kLumaWidth + x] = grain_max;
+ v_block_buffer_[y * kLumaWidth + x] = grain_max;
+ }
+ }
+ } else {
+ libvpx_test::ACMRandom rnd(libvpx_test::ACMRandom::DeterministicSeed());
+ // Allow any valid grain values.
+ const int random_range = grain_max - grain_min + 1;
+ for (int y = 0; y < kLumaHeight; ++y) {
+ for (int x = 0; x < kLumaWidth; ++x) {
+ // This loop relies on the fact that kMaxChromaWidth == kLumaWidth.
+ const int random_y = rnd(random_range);
+ luma_block_buffer_[y * kLumaWidth + x] = random_y + grain_min;
+ const int random_u = rnd(random_range);
+ u_block_buffer_[y * kLumaWidth + x] = random_u + grain_min;
+ const int random_v = rnd(random_range);
+ v_block_buffer_[y * kLumaWidth + x] = random_v + grain_min;
+ }
+ }
+ }
+ if (compare) {
+ memcpy(base_u_block_buffer_, u_block_buffer_, sizeof(u_block_buffer_));
+ memcpy(base_v_block_buffer_, v_block_buffer_, sizeof(v_block_buffer_));
+ }
+
+ const absl::Time start = absl::Now();
+ for (int i = 0; i < num_runs; ++i) {
+ chroma_auto_regression_func_(params, luma_block_buffer_, subsampling_x,
+ subsampling_y, u_block_buffer_,
+ v_block_buffer_);
+ }
+ const absl::Duration elapsed_time = absl::Now() - start;
+ if (num_runs > 1) {
+ printf("AutoRegressionChroma lag=%d, sub_x=%d, sub_y=%d: %d us\n",
+ coeff_lag, subsampling_x, subsampling_y,
+ static_cast<int>(absl::ToInt64Microseconds(elapsed_time)));
+ return;
+ }
+ if (compare) {
+ base_chroma_auto_regression_func_(params, luma_block_buffer_, subsampling_x,
+ subsampling_y, base_u_block_buffer_,
+ base_v_block_buffer_);
+ EXPECT_TRUE(test_utils::CompareBlocks(u_block_buffer_, base_u_block_buffer_,
+ chroma_width, chroma_height,
+ chroma_width, chroma_width, false));
+ EXPECT_TRUE(test_utils::CompareBlocks(v_block_buffer_, base_v_block_buffer_,
+ chroma_width, chroma_height,
+ chroma_width, chroma_width, false));
+ } else {
+ test_utils::CheckMd5Digest(
+ "FilmGrain",
+ absl::StrFormat("AutoRegressionChromaU lag=%d, sub_x=%d, sub_y=%d",
+ coeff_lag, subsampling_x, subsampling_y)
+ .c_str(),
+ GetARTestDigestChromaU(bitdepth, coeff_lag, subsampling_x,
+ subsampling_y),
+ u_block_buffer_, sizeof(u_block_buffer_), elapsed_time);
+ test_utils::CheckMd5Digest(
+ "FilmGrain",
+ absl::StrFormat("AutoRegressionChromaV lag=%d, sub_x=%d, sub_y=%d",
+ coeff_lag, subsampling_x, subsampling_y)
+ .c_str(),
+ GetARTestDigestChromaV(bitdepth, coeff_lag, subsampling_x,
+ subsampling_y),
+ v_block_buffer_, sizeof(v_block_buffer_), elapsed_time);
+ }
+}
+
+using AutoRegressionTestChroma8bpp = AutoRegressionTestChroma<8>;
+
+TEST_P(AutoRegressionTestChroma8bpp, AutoRegressiveFilterChroma) {
+ AutoRegressionChromaTestParam test_param(GetParam());
+ TestAutoRegressiveFilterChroma(test_param.coeff_lag, test_param.subsampling_x,
+ test_param.subsampling_y, 1,
+ /*saturate=*/false,
+ /*compare=*/false);
+}
+
+TEST_P(AutoRegressionTestChroma8bpp, AutoRegressiveFilterChromaSaturated) {
+ AutoRegressionChromaTestParam test_param(GetParam());
+ TestAutoRegressiveFilterChroma(test_param.coeff_lag, test_param.subsampling_x,
+ test_param.subsampling_y, 1, /*saturate=*/true,
+ /*compare=*/true);
+}
+
+TEST_P(AutoRegressionTestChroma8bpp, DISABLED_Speed) {
+ AutoRegressionChromaTestParam test_param(GetParam());
+ TestAutoRegressiveFilterChroma(
+ test_param.coeff_lag, test_param.subsampling_x, test_param.subsampling_y,
+ // Subsampling cuts each dimension of the chroma blocks in half, so run
+ // twice as many times to compensate.
+ 1e5 * (1 << (test_param.subsampling_y + test_param.subsampling_x)),
+ /*saturate=*/false, /*compare=*/false);
+}
+
+#if LIBGAV1_MAX_BITDEPTH >= 10
+using AutoRegressionTestChroma10bpp = AutoRegressionTestChroma<10>;
+
+TEST_P(AutoRegressionTestChroma10bpp, AutoRegressiveFilterChroma) {
+ AutoRegressionChromaTestParam test_param(GetParam());
+ TestAutoRegressiveFilterChroma(test_param.coeff_lag, test_param.subsampling_x,
+ test_param.subsampling_y, 1,
+ /*saturate=*/false,
+ /*compare=*/false);
+}
+
+TEST_P(AutoRegressionTestChroma10bpp, AutoRegressiveFilterChromaSaturated) {
+ AutoRegressionChromaTestParam test_param(GetParam());
+ TestAutoRegressiveFilterChroma(test_param.coeff_lag, test_param.subsampling_x,
+ test_param.subsampling_y, 1, /*saturate=*/true,
+ /*compare=*/true);
+}
+
+TEST_P(AutoRegressionTestChroma10bpp, DISABLED_Speed) {
+ AutoRegressionChromaTestParam test_param(GetParam());
+ TestAutoRegressiveFilterChroma(
+ test_param.coeff_lag, test_param.subsampling_x, test_param.subsampling_y,
+ // Subsampling cuts each dimension of the chroma blocks in half, so run
+ // twice as many times to compensate.
+ 1e5 * (1 << (test_param.subsampling_y + test_param.subsampling_x)),
+ /*saturate=*/false, /*compare=*/false);
+}
+
+#endif // LIBGAV1_MAX_BITDEPTH >= 10
+
+INSTANTIATE_TEST_SUITE_P(C, AutoRegressionTestChroma8bpp,
+ testing::Combine(testing::Range(0, 4) /* coeff_lag */,
+ testing::Range(0,
+ 3) /* subsampling */));
+
+#if LIBGAV1_MAX_BITDEPTH >= 10
+INSTANTIATE_TEST_SUITE_P(C, AutoRegressionTestChroma10bpp,
+ testing::Combine(testing::Range(0, 4) /* coeff_lag */,
+ testing::Range(0,
+ 3) /* subsampling */));
+#endif // LIBGAV1_MAX_BITDEPTH >= 10
+
+#if LIBGAV1_ENABLE_NEON
+INSTANTIATE_TEST_SUITE_P(NEON, AutoRegressionTestChroma8bpp,
+ testing::Combine(testing::Range(0, 4) /* coeff_lag */,
+ testing::Range(0,
+ 3) /* subsampling */));
+
+#if LIBGAV1_MAX_BITDEPTH >= 10
+INSTANTIATE_TEST_SUITE_P(NEON, AutoRegressionTestChroma10bpp,
+ testing::Combine(testing::Range(0, 4) /* coeff_lag */,
+ testing::Range(0,
+ 3) /* subsampling */));
+#endif // LIBGAV1_MAX_BITDEPTH >= 10
+#endif // LIBGAV1_ENABLE_NEON
+
+template <int bitdepth>
+class GrainGenerationTest : public testing::TestWithParam<int> {
+ protected:
+ using GrainType =
+ typename std::conditional<bitdepth == 8, int8_t, int16_t>::type;
+
+ void TestGenerateGrainLuma(int param_index, int num_runs);
+
+ GrainType luma_block_buffer_[kLumaBlockSize];
+};
+
+template <int bitdepth>
+void GrainGenerationTest<bitdepth>::TestGenerateGrainLuma(int param_index,
+ int num_runs) {
+ FilmGrainParams params = kFilmGrainParams[param_index];
+
+ const absl::Time start = absl::Now();
+ for (int i = 0; i < num_runs; ++i) {
+ FilmGrain<bitdepth>::GenerateLumaGrain(params, luma_block_buffer_);
+ }
+ const absl::Duration elapsed_time = absl::Now() - start;
+ if (num_runs == 1) {
+ test_utils::CheckMd5Digest(
+ "FilmGrain",
+ absl::StrFormat("GenerateGrainLuma param_index=%d", param_index)
+ .c_str(),
+ GetGrainGenerationTestDigestLuma(bitdepth, param_index),
+ luma_block_buffer_, sizeof(luma_block_buffer_), elapsed_time);
+ } else {
+ printf("GenerateGrainLuma param_index=%d: %d us\n", param_index,
+ static_cast<int>(absl::ToInt64Microseconds(elapsed_time)));
+ }
+}
+
+using GrainGenerationTest8bpp = GrainGenerationTest<8>;
+
+TEST_P(GrainGenerationTest8bpp, GenerateGrainLuma) {
+ TestGenerateGrainLuma(GetParam(), 1);
+}
+
+TEST_P(GrainGenerationTest8bpp, DISABLED_LumaSpeed) {
+ TestGenerateGrainLuma(GetParam(), 1e5);
+}
+
+#if LIBGAV1_MAX_BITDEPTH >= 10
+using GrainGenerationTest10bpp = GrainGenerationTest<10>;
+
+TEST_P(GrainGenerationTest10bpp, GenerateGrainLuma) {
+ TestGenerateGrainLuma(GetParam(), 1);
+}
+
+TEST_P(GrainGenerationTest10bpp, DISABLED_LumaSpeed) {
+ TestGenerateGrainLuma(GetParam(), 1e5);
+}
+#endif // LIBGAV1_MAX_BITDEPTH >= 10
+
+INSTANTIATE_TEST_SUITE_P(C, GrainGenerationTest8bpp,
+ testing::Range(0, 10) /* param_index */);
+
+#if LIBGAV1_MAX_BITDEPTH >= 10
+INSTANTIATE_TEST_SUITE_P(C, GrainGenerationTest10bpp,
+ testing::Range(0, 10) /* param_index */);
+#endif // LIBGAV1_MAX_BITDEPTH >= 10
+
+// This param type is used for both ConstructStripesTest and
+// ConstructImageTest.
+struct ConstructNoiseTestParam {
+ explicit ConstructNoiseTestParam(const std::tuple<int, int>& in)
+ : overlap_flag(std::get<0>(in)) {
+ switch (std::get<1>(in)) {
+ case 0:
+ subsampling_x = 0;
+ subsampling_y = 0;
+ break;
+ case 1:
+ subsampling_x = 1;
+ subsampling_y = 0;
+ break;
+ default:
+ assert(std::get<1>(in) == 2);
+ subsampling_x = 1;
+ subsampling_y = 1;
+ }
+ }
+ const int overlap_flag;
+ int subsampling_x;
+ int subsampling_y;
+};
+
+template <int bitdepth>
+class ConstructStripesTest
+ : public testing::TestWithParam<std::tuple<int, int>> {
+ public:
+ using GrainType =
+ typename std::conditional<bitdepth == 8, int8_t, int16_t>::type;
+
+ ConstructStripesTest() {
+ FilmGrainInit_C();
+ const dsp::Dsp* const dsp = dsp::GetDspTable(bitdepth);
+ base_construct_noise_stripes_func_ =
+ dsp->film_grain.construct_noise_stripes[std::get<0>(GetParam())];
+
+ const testing::TestInfo* const test_info =
+ testing::UnitTest::GetInstance()->current_test_info();
+ const char* const test_case = test_info->test_suite_name();
+ if (absl::StartsWith(test_case, "C/")) {
+ base_construct_noise_stripes_func_ = nullptr;
+ } else if (absl::StartsWith(test_case, "NEON/")) {
+#if LIBGAV1_ENABLE_NEON
+ FilmGrainInit_NEON();
+#endif
+ }
+ construct_noise_stripes_func_ =
+ dsp->film_grain.construct_noise_stripes[std::get<0>(GetParam())];
+ }
+
+ ~ConstructStripesTest() override = default;
+
+ protected:
+ // |compare| determines whether to compare the output blocks from the SIMD
+ // implementation, if used, and the C implementation.
+ // |saturate| determines whether to set the inputs to maximum values. This is
+ // intended primarily as a way to simplify differences in output when
+ // debugging.
+ void TestConstructNoiseStripes(int overlap_flag, int subsampling_x,
+ int subsampling_y, int num_runs, bool saturate,
+ bool compare);
+ ConstructNoiseStripesFunc construct_noise_stripes_func_;
+ ConstructNoiseStripesFunc base_construct_noise_stripes_func_;
+ GrainType grain_buffer_[kLumaBlockSize];
+ Array2DView<GrainType> noise_stripes_;
+ // Owns the memory that noise_stripes_ points to.
+ std::unique_ptr<GrainType[]> stripe_buffer_;
+ Array2DView<GrainType> base_noise_stripes_;
+ // Owns the memory that base_stripe_buffer_ points to.
+ std::unique_ptr<GrainType[]> base_stripe_buffer_;
+};
+
+template <int bitdepth>
+void ConstructStripesTest<bitdepth>::TestConstructNoiseStripes(
+ int overlap_flag, int subsampling_x, int subsampling_y, int num_runs,
+ bool saturate, bool compare) {
+ if (construct_noise_stripes_func_ == nullptr) return;
+ // Compare is only needed for NEON tests to compare with C output.
+ if (base_construct_noise_stripes_func_ == nullptr && compare) return;
+
+ const int stripe_width = ((kFrameWidth + subsampling_x) >> subsampling_x);
+ const int stripe_height = kNoiseStripeHeight;
+ const int stripe_size = stripe_height * stripe_width;
+ const int stripe_buffer_size = stripe_size * kNumTestStripes;
+ if (compare) {
+ base_stripe_buffer_.reset(new (
+ std::nothrow) GrainType[stripe_buffer_size + kNoiseStripePadding]());
+ ASSERT_NE(base_stripe_buffer_, nullptr);
+ base_noise_stripes_.Reset(kNumTestStripes, stripe_size,
+ base_stripe_buffer_.get());
+ }
+ stripe_buffer_.reset(
+ new (std::nothrow) GrainType[stripe_buffer_size + kNoiseStripePadding]());
+ ASSERT_NE(stripe_buffer_, nullptr);
+ noise_stripes_.Reset(kNumTestStripes, stripe_size, stripe_buffer_.get());
+
+ const int grain_max = GetGrainMax<bitdepth>();
+ const int grain_min = GetGrainMin<bitdepth>();
+ if (saturate) {
+ for (int y = 0; y < kLumaHeight; ++y) {
+ for (int x = 0; x < kLumaWidth; ++x) {
+ grain_buffer_[y * kLumaWidth + x] = grain_max;
+ }
+ }
+ } else {
+ libvpx_test::ACMRandom rnd(libvpx_test::ACMRandom::DeterministicSeed());
+ // Allow any valid grain values.
+ const int random_range = grain_max - grain_min + 1;
+ for (int y = 0; y < kLumaHeight; ++y) {
+ for (int x = 0; x < kLumaWidth; ++x) {
+ grain_buffer_[y * kLumaWidth + x] = grain_min + rnd(random_range);
+ }
+ }
+ }
+
+ const absl::Time start = absl::Now();
+ for (int i = 0; i < num_runs; ++i) {
+ construct_noise_stripes_func_(grain_buffer_, 68, kFrameWidth, kFrameHeight,
+ subsampling_x, subsampling_y,
+ &noise_stripes_);
+ }
+ const absl::Duration elapsed_time = absl::Now() - start;
+ if (num_runs > 1) {
+ printf(
+ "ConstructNoiseStripes Speed Test for overlap=%d, sub_x=%d, "
+ "sub_y=%d: %d us\n",
+ overlap_flag, subsampling_x, subsampling_y,
+ static_cast<int>(absl::ToInt64Microseconds(elapsed_time)));
+ return;
+ }
+ if (compare) {
+ base_construct_noise_stripes_func_(grain_buffer_, 68, kFrameWidth,
+ kFrameHeight, subsampling_x,
+ subsampling_y, &base_noise_stripes_);
+
+ constexpr int kCompareWidth = 64;
+ for (int stripe = 0; stripe < kNumTestStripes;) {
+ EXPECT_TRUE(test_utils::CompareBlocks(
+ noise_stripes_[stripe], base_noise_stripes_[stripe], kCompareWidth,
+ stripe_height, stripe_width, stripe_width, /*check_padding=*/false,
+ /*print_diff=*/false));
+ }
+ } else {
+ test_utils::CheckMd5Digest(
+ "FilmGrain",
+ absl::StrFormat("ConstructNoiseStripes overlap=%d, sub_x=%d, sub_y=%d",
+ overlap_flag, subsampling_x, subsampling_y)
+ .c_str(),
+ GetConstructStripesTestDigest(bitdepth, overlap_flag, subsampling_x,
+ subsampling_y),
+ noise_stripes_[0], stripe_buffer_size, elapsed_time);
+ }
+}
+
+using ConstructStripesTest8bpp = ConstructStripesTest<8>;
+
+TEST_P(ConstructStripesTest8bpp, RandomValues) {
+ ConstructNoiseTestParam test_params(GetParam());
+ TestConstructNoiseStripes(test_params.overlap_flag, test_params.subsampling_x,
+ test_params.subsampling_y, /*num_runs=*/1,
+ /*saturate=*/false, /*compare=*/false);
+}
+
+TEST_P(ConstructStripesTest8bpp, SaturatedValues) {
+ ConstructNoiseTestParam test_params(GetParam());
+ TestConstructNoiseStripes(test_params.overlap_flag, test_params.subsampling_x,
+ test_params.subsampling_y, /*num_runs=*/1,
+ /*saturate=*/true, /*compare=*/true);
+}
+TEST_P(ConstructStripesTest8bpp, DISABLED_Speed) {
+ ConstructNoiseTestParam test_params(GetParam());
+ TestConstructNoiseStripes(test_params.overlap_flag, test_params.subsampling_x,
+ test_params.subsampling_y, /*num_runs=*/500,
+ /*saturate=*/false, /*compare=*/false);
+}
+
+#if LIBGAV1_MAX_BITDEPTH >= 10
+using ConstructStripesTest10bpp = ConstructStripesTest<10>;
+
+TEST_P(ConstructStripesTest10bpp, RandomValues) {
+ ConstructNoiseTestParam test_params(GetParam());
+ TestConstructNoiseStripes(test_params.overlap_flag, test_params.subsampling_x,
+ test_params.subsampling_y, /*num_runs=*/1,
+ /*saturate=*/false, /*compare=*/false);
+}
+TEST_P(ConstructStripesTest10bpp, SaturatedValues) {
+ ConstructNoiseTestParam test_params(GetParam());
+ TestConstructNoiseStripes(test_params.overlap_flag, test_params.subsampling_x,
+ test_params.subsampling_y, /*num_runs=*/1,
+ /*saturate=*/true, /*compare=*/true);
+}
+
+TEST_P(ConstructStripesTest10bpp, DISABLED_Speed) {
+ ConstructNoiseTestParam test_params(GetParam());
+ TestConstructNoiseStripes(test_params.overlap_flag, test_params.subsampling_x,
+ test_params.subsampling_y, /*num_runs=*/500,
+ /*saturate=*/false, /*compare=*/false);
+}
+#endif // LIBGAV1_MAX_BITDEPTH >= 10
+
+INSTANTIATE_TEST_SUITE_P(C, ConstructStripesTest8bpp,
+ testing::Combine(testing::Range(0, 2),
+ testing::Range(0, 3)));
+
+#if LIBGAV1_MAX_BITDEPTH >= 10
+INSTANTIATE_TEST_SUITE_P(C, ConstructStripesTest10bpp,
+ testing::Combine(testing::Range(0, 2),
+ testing::Range(0, 3)));
+#endif // LIBGAV1_MAX_BITDEPTH >= 10
+
+template <int bitdepth>
+class ConstructImageTest : public testing::TestWithParam<std::tuple<int, int>> {
+ public:
+ using GrainType =
+ typename std::conditional<bitdepth == 8, int8_t, int16_t>::type;
+
+ ConstructImageTest() {
+ FilmGrainInit_C();
+ const dsp::Dsp* const dsp = dsp::GetDspTable(bitdepth);
+ base_construct_noise_image_overlap_func_ =
+ dsp->film_grain.construct_noise_image_overlap;
+
+ const testing::TestInfo* const test_info =
+ testing::UnitTest::GetInstance()->current_test_info();
+ const char* const test_case = test_info->test_suite_name();
+ if (absl::StartsWith(test_case, "C/")) {
+ base_construct_noise_image_overlap_func_ = nullptr;
+ } else if (absl::StartsWith(test_case, "NEON/")) {
+#if LIBGAV1_ENABLE_NEON
+ FilmGrainInit_NEON();
+#endif
+ }
+ construct_noise_image_overlap_func_ =
+ dsp->film_grain.construct_noise_image_overlap;
+ }
+
+ ~ConstructImageTest() override = default;
+
+ protected:
+ // |compare| determines whether to compare the output blocks from the SIMD
+ // implementation, if used, and the C implementation.
+ // |saturate| determines whether to set the inputs to maximum values. This is
+ // intended primarily as a way to simplify differences in output when
+ // debugging.
+ void TestConstructNoiseImage(int overlap_flag, int subsampling_x,
+ int subsampling_y, int num_runs, bool saturate,
+ bool compare);
+ ConstructNoiseImageOverlapFunc construct_noise_image_overlap_func_;
+ ConstructNoiseImageOverlapFunc base_construct_noise_image_overlap_func_;
+ Array2DView<GrainType> noise_stripes_;
+ // Owns the memory that noise_stripes_ points to.
+ std::unique_ptr<GrainType[]> stripe_buffer_;
+ Array2D<GrainType> noise_image_;
+ Array2D<GrainType> base_noise_image_;
+};
+
+template <int bitdepth>
+void ConstructImageTest<bitdepth>::TestConstructNoiseImage(
+ int overlap_flag, int subsampling_x, int subsampling_y, int num_runs,
+ bool saturate, bool compare) {
+ if (construct_noise_image_overlap_func_ == nullptr) return;
+ // Compare is only needed for NEON tests to compare with C output.
+ if (base_construct_noise_image_overlap_func_ == nullptr && compare) return;
+
+ const int image_width = ((kFrameWidth + subsampling_x) >> subsampling_x);
+ const int image_height = ((kFrameHeight + subsampling_y) >> subsampling_y);
+ const int stripe_height =
+ ((kNoiseStripeHeight + subsampling_y) >> subsampling_y);
+ const int image_stride = image_width + kNoiseImagePadding;
+ const int stripe_size = stripe_height * image_width;
+ if (compare) {
+ ASSERT_TRUE(base_noise_image_.Reset(image_height, image_stride,
+ /*zero_initialize=*/false));
+ }
+ ASSERT_TRUE(noise_image_.Reset(image_height, image_stride,
+ /*zero_initialize=*/false));
+ // Stride between stripe rows is |image_width|. Padding is only at the
+ // end of the final row of the final stripe to protect from overreads.
+ stripe_buffer_.reset(
+ new (std::nothrow)
+ GrainType[kNumTestStripes * stripe_size + kNoiseStripePadding]);
+ ASSERT_NE(stripe_buffer_, nullptr);
+ noise_stripes_.Reset(kNumTestStripes, stripe_size, stripe_buffer_.get());
+
+ const int grain_max = GetGrainMax<bitdepth>();
+ const int grain_min = GetGrainMin<bitdepth>();
+ if (saturate) {
+ for (int i = 0; i < stripe_size; ++i) {
+ noise_stripes_[0][i] = grain_max;
+ }
+ for (int stripe = 1; stripe < kNumTestStripes; ++stripe) {
+ memcpy(noise_stripes_[stripe], noise_stripes_[0],
+ stripe_size * sizeof(noise_stripes_[0][0]));
+ }
+ } else {
+ libvpx_test::ACMRandom rnd(libvpx_test::ACMRandom::DeterministicSeed());
+ // Allow any valid grain values.
+ const int random_range = grain_max - grain_min + 1;
+ for (int stripe = 0; stripe < kNumTestStripes; ++stripe) {
+ // Assign all allocated memory for this stripe.
+ for (int i = 0; i < stripe_height; ++i) {
+ for (int x = 0; x < image_width; ++x) {
+ noise_stripes_[stripe][i * image_width + x] =
+ grain_min + rnd(random_range);
+ }
+ }
+ }
+ }
+
+ const absl::Time start = absl::Now();
+ for (int i = 0; i < num_runs; ++i) {
+ FilmGrain<bitdepth>::ConstructNoiseImage(
+ &noise_stripes_, kFrameWidth, kFrameHeight, subsampling_x,
+ subsampling_y, overlap_flag << (1 - subsampling_y), &noise_image_);
+ if (overlap_flag == 1) {
+ construct_noise_image_overlap_func_(&noise_stripes_, kFrameWidth,
+ kFrameHeight, subsampling_x,
+ subsampling_y, &noise_image_);
+ }
+ }
+
+ const absl::Duration elapsed_time = absl::Now() - start;
+ if (num_runs > 1) {
+ printf(
+ "ConstructNoiseImage Speed Test for overlap=%d, sub_x=%d, "
+ "sub_y=%d: %d us\n",
+ overlap_flag, subsampling_x, subsampling_y,
+ static_cast<int>(absl::ToInt64Microseconds(elapsed_time)));
+ return;
+ }
+ if (compare) {
+ FilmGrain<bitdepth>::ConstructNoiseImage(
+ &noise_stripes_, kFrameWidth, kFrameHeight, subsampling_x,
+ subsampling_y, overlap_flag << (1 - subsampling_y), &base_noise_image_);
+ if (overlap_flag == 1) {
+ base_construct_noise_image_overlap_func_(
+ &noise_stripes_, kFrameWidth, kFrameHeight, subsampling_x,
+ subsampling_y, &base_noise_image_);
+ }
+ constexpr int kCompareWidth = 72;
+ constexpr int kCompareHeight = 72;
+ EXPECT_TRUE(test_utils::CompareBlocks(
+ noise_image_[0], base_noise_image_[0], kCompareWidth, kCompareHeight,
+ image_stride, image_stride, /*check_padding=*/false,
+ /*print_diff=*/false));
+ } else {
+ printf("BD%d \"%s\",\n", bitdepth,
+ test_utils::GetMd5Sum(noise_image_[0], image_width, image_height,
+ image_stride)
+ .c_str());
+ test_utils::CheckMd5Digest(
+ "FilmGrain",
+ absl::StrFormat("ConstructNoiseImage overlap=%d, sub_x=%d, sub_y=%d",
+ overlap_flag, subsampling_x, subsampling_y)
+ .c_str(),
+ GetConstructImageTestDigest(bitdepth, overlap_flag, subsampling_x,
+ subsampling_y),
+ noise_image_[0], image_width, image_height, image_stride, elapsed_time);
+ }
+}
+
+using ConstructImageTest8bpp = ConstructImageTest<8>;
+
+TEST_P(ConstructImageTest8bpp, RandomValues) {
+ ConstructNoiseTestParam test_params(GetParam());
+ TestConstructNoiseImage(test_params.overlap_flag, test_params.subsampling_x,
+ test_params.subsampling_y, /*num_runs=*/1,
+ /*saturate=*/false, /*compare=*/false);
+}
+
+TEST_P(ConstructImageTest8bpp, SaturatedValues) {
+ ConstructNoiseTestParam test_params(GetParam());
+ TestConstructNoiseImage(test_params.overlap_flag, test_params.subsampling_x,
+ test_params.subsampling_y, /*num_runs=*/1,
+ /*saturate=*/true, /*compare=*/true);
+}
+
+TEST_P(ConstructImageTest8bpp, DISABLED_Speed) {
+ ConstructNoiseTestParam test_params(GetParam());
+ TestConstructNoiseImage(test_params.overlap_flag, test_params.subsampling_x,
+ test_params.subsampling_y, /*num_runs=*/500,
+ /*saturate=*/false, /*compare=*/false);
+}
+
+#if LIBGAV1_MAX_BITDEPTH >= 10
+using ConstructImageTest10bpp = ConstructImageTest<10>;
+
+TEST_P(ConstructImageTest10bpp, RandomValues) {
+ ConstructNoiseTestParam test_params(GetParam());
+ TestConstructNoiseImage(test_params.overlap_flag, test_params.subsampling_x,
+ test_params.subsampling_y, /*num_runs=*/1,
+ /*saturate=*/false, /*compare=*/false);
+}
+
+TEST_P(ConstructImageTest10bpp, SaturatedValues) {
+ ConstructNoiseTestParam test_params(GetParam());
+ TestConstructNoiseImage(test_params.overlap_flag, test_params.subsampling_x,
+ test_params.subsampling_y, /*num_runs=*/1,
+ /*saturate=*/true, /*compare=*/true);
+}
+
+TEST_P(ConstructImageTest10bpp, DISABLED_Speed) {
+ ConstructNoiseTestParam test_params(GetParam());
+ TestConstructNoiseImage(test_params.overlap_flag, test_params.subsampling_x,
+ test_params.subsampling_y, /*num_runs=*/500,
+ /*saturate=*/false, /*compare=*/false);
+}
+#endif // LIBGAV1_MAX_BITDEPTH >= 10
+
+INSTANTIATE_TEST_SUITE_P(C, ConstructImageTest8bpp,
+ testing::Combine(testing::Range(0, 2),
+ testing::Range(0, 3)));
+
+#if LIBGAV1_ENABLE_NEON
+INSTANTIATE_TEST_SUITE_P(NEON, ConstructImageTest8bpp,
+ testing::Combine(testing::Range(0, 2),
+ testing::Range(0, 3)));
+#endif // LIBGAV1_ENABLE_NEON
+
+#if LIBGAV1_MAX_BITDEPTH >= 10
+INSTANTIATE_TEST_SUITE_P(C, ConstructImageTest10bpp,
+ testing::Combine(testing::Range(0, 2),
+ testing::Range(0, 3)));
+#endif // LIBGAV1_MAX_BITDEPTH >= 10
+
+template <int bitdepth>
+class ScalingLookupTableTest : public testing::TestWithParam<int> {
+ public:
+ ScalingLookupTableTest() {
+ test_utils::ResetDspTable(bitdepth);
+ FilmGrainInit_C();
+ const dsp::Dsp* const dsp = dsp::GetDspTable(bitdepth);
+
+ const testing::TestInfo* const test_info =
+ testing::UnitTest::GetInstance()->current_test_info();
+ const char* const test_case = test_info->test_suite_name();
+ if (absl::StartsWith(test_case, "NEON/")) {
+#if LIBGAV1_ENABLE_NEON
+ FilmGrainInit_NEON();
+#endif
+ }
+ initialize_func_ = dsp->film_grain.initialize_scaling_lut;
+ }
+ ~ScalingLookupTableTest() override = default;
+
+ protected:
+ void TestSpeed(int num_runs);
+ void ZeroPoints();
+
+ private:
+ static constexpr int kScalingLutBufferLength =
+ (kScalingLookupTableSize + kScalingLookupTablePadding) << (bitdepth - 8);
+ dsp::InitializeScalingLutFunc initialize_func_;
+ int16_t scaling_lut_[kScalingLutBufferLength];
+};
+
+template <int bitdepth>
+void ScalingLookupTableTest<bitdepth>::TestSpeed(int num_runs) {
+ if (initialize_func_ == nullptr) return;
+ const int param_index = GetParam();
+ const FilmGrainParams& params = kFilmGrainParams[param_index];
+ const absl::Time start = absl::Now();
+ Memset(scaling_lut_, 0, kScalingLutBufferLength);
+ for (int i = 0; i < num_runs; ++i) {
+ initialize_func_(params.num_y_points, params.point_y_value,
+ params.point_y_scaling, scaling_lut_,
+ kScalingLutBufferLength);
+ }
+ const absl::Duration elapsed_time = absl::Now() - start;
+ if (num_runs > 1) {
+ printf("InitializeScalingLut: %d us\n",
+ static_cast<int>(absl::ToInt64Microseconds(elapsed_time)));
+ return;
+ }
+ test_utils::CheckMd5Digest(
+ "FilmGrain",
+ absl::StrFormat("InitializeScalingLut for param set: %d", param_index)
+ .c_str(),
+ GetScalingInitTestDigest(param_index, bitdepth), scaling_lut_,
+ (sizeof(scaling_lut_[0]) * kScalingLookupTableSize) << (bitdepth - 8),
+ elapsed_time);
+}
+
+template <int bitdepth>
+void ScalingLookupTableTest<bitdepth>::ZeroPoints() {
+ if (initialize_func_ == nullptr) return;
+ const int param_index = GetParam();
+ const FilmGrainParams& params = kFilmGrainParams[param_index];
+ initialize_func_(0, params.point_y_value, params.point_y_scaling,
+ scaling_lut_, kScalingLookupTableSize);
+ for (int i = 0; i < kScalingLookupTableSize; ++i) {
+ ASSERT_EQ(scaling_lut_[i], 0);
+ }
+}
+
+using ScalingLookupTableTest8bpp = ScalingLookupTableTest<8>;
+
+TEST_P(ScalingLookupTableTest8bpp, ZeroPoints) { ZeroPoints(); }
+
+TEST_P(ScalingLookupTableTest8bpp, Correctness) { TestSpeed(/*num_runs=*/1); }
+
+TEST_P(ScalingLookupTableTest8bpp, DISABLED_Speed) {
+ TestSpeed(/*num_runs=*/1e5);
+}
+
+#if LIBGAV1_MAX_BITDEPTH >= 10
+using ScalingLookupTableTest10bpp = ScalingLookupTableTest<10>;
+
+TEST_P(ScalingLookupTableTest10bpp, ZeroPoints) { ZeroPoints(); }
+
+TEST_P(ScalingLookupTableTest10bpp, Correctness) { TestSpeed(/*num_runs=*/1); }
+
+TEST_P(ScalingLookupTableTest10bpp, DISABLED_Speed) {
+ TestSpeed(/*num_runs=*/1e5);
+}
+#endif // LIBGAV1_MAX_BITDEPTH >= 10
+
+INSTANTIATE_TEST_SUITE_P(C, ScalingLookupTableTest8bpp,
+ testing::Range(0, kNumFilmGrainTestParams));
+
+#if LIBGAV1_ENABLE_NEON
+INSTANTIATE_TEST_SUITE_P(NEON, ScalingLookupTableTest8bpp,
+ testing::Range(0, kNumFilmGrainTestParams));
+#endif
+
+#if LIBGAV1_MAX_BITDEPTH >= 10
+INSTANTIATE_TEST_SUITE_P(C, ScalingLookupTableTest10bpp,
+ testing::Range(0, kNumFilmGrainTestParams));
+
+#if LIBGAV1_ENABLE_NEON
+INSTANTIATE_TEST_SUITE_P(NEON, ScalingLookupTableTest10bpp,
+ testing::Range(0, kNumFilmGrainTestParams));
+#endif
+#endif // LIBGAV1_MAX_BITDEPTH >= 10
+
+struct BlendNoiseTestParam {
+ explicit BlendNoiseTestParam(const std::tuple<int, int>& in)
+ : chroma_scaling_from_luma(std::get<0>(in)) {
+ switch (std::get<1>(in)) {
+ case 0:
+ subsampling_x = 0;
+ subsampling_y = 0;
+ break;
+ case 1:
+ subsampling_x = 1;
+ subsampling_y = 0;
+ break;
+ default:
+ assert(std::get<1>(in) == 2);
+ subsampling_x = 1;
+ subsampling_y = 1;
+ }
+ }
+ const int chroma_scaling_from_luma;
+ int subsampling_x;
+ int subsampling_y;
+};
+
+template <int bitdepth, typename Pixel>
+class BlendNoiseTest : public testing::TestWithParam<std::tuple<int, int>> {
+ public:
+ using GrainType =
+ typename std::conditional<bitdepth == 8, int8_t, int16_t>::type;
+
+ BlendNoiseTest() {
+ test_utils::ResetDspTable(bitdepth);
+ FilmGrainInit_C();
+ const dsp::Dsp* const dsp = dsp::GetDspTable(bitdepth);
+
+ const testing::TestInfo* const test_info =
+ testing::UnitTest::GetInstance()->current_test_info();
+ const char* const test_case = test_info->test_suite_name();
+ if (absl::StartsWith(test_case, "NEON/")) {
+#if LIBGAV1_ENABLE_NEON
+ FilmGrainInit_NEON();
+#endif
+ } else if (absl::StartsWith(test_case, "SSE41/")) {
+ FilmGrainInit_SSE4_1();
+ }
+ const BlendNoiseTestParam test_param(GetParam());
+ chroma_scaling_from_luma_ = test_param.chroma_scaling_from_luma;
+ blend_luma_func_ = dsp->film_grain.blend_noise_luma;
+ blend_chroma_func_ =
+ dsp->film_grain.blend_noise_chroma[chroma_scaling_from_luma_];
+ subsampling_x_ = test_param.subsampling_x;
+ subsampling_y_ = test_param.subsampling_y;
+
+ uv_width_ = (width_ + subsampling_x_) >> subsampling_x_;
+ uv_height_ = (height_ + subsampling_y_) >> subsampling_y_;
+ uv_stride_ = uv_width_ * sizeof(Pixel);
+ y_stride_ = width_ * sizeof(Pixel);
+ const size_t buffer_size =
+ sizeof(Pixel) * (width_ * height_ + 2 * uv_width_ * uv_height_ +
+ 3 * kBorderPixelsFilmGrain);
+ source_buffer_.reset(new (std::nothrow) uint8_t[buffer_size]);
+ memset(source_buffer_.get(), 0, sizeof(source_buffer_[0]) * buffer_size);
+ dest_buffer_.reset(new (std::nothrow) uint8_t[buffer_size]);
+ memset(dest_buffer_.get(), 0, sizeof(dest_buffer_[0]) * buffer_size);
+ source_plane_y_ = source_buffer_.get();
+ source_plane_u_ =
+ source_plane_y_ + y_stride_ * height_ + kBorderPixelsFilmGrain;
+ source_plane_v_ =
+ source_plane_u_ + uv_stride_ * uv_height_ + kBorderPixelsFilmGrain;
+ dest_plane_y_ = dest_buffer_.get();
+ dest_plane_u_ =
+ dest_plane_y_ + y_stride_ * height_ + kBorderPixelsFilmGrain;
+ dest_plane_v_ =
+ dest_plane_u_ + uv_stride_ * uv_height_ + kBorderPixelsFilmGrain;
+ }
+ ~BlendNoiseTest() override = default;
+
+ protected:
+ void TestSpeed(int num_runs);
+
+ private:
+ static constexpr int kScalingLutBufferLength =
+ (kScalingLookupTableSize + kScalingLookupTablePadding) << 2;
+
+ void ConvertScalingLut10bpp(int16_t* scaling_lut_10bpp,
+ const int16_t* src_scaling_lut);
+ dsp::BlendNoiseWithImageLumaFunc blend_luma_func_;
+ dsp::BlendNoiseWithImageChromaFunc blend_chroma_func_;
+
+ const int width_ = 1921;
+ const int height_ = 1081;
+ int chroma_scaling_from_luma_ = 0;
+ int subsampling_x_ = 0;
+ int subsampling_y_ = 0;
+ int uv_width_ = 0;
+ int uv_height_ = 0;
+ int uv_stride_ = 0;
+ int y_stride_ = 0;
+ // This holds the data that |source_plane_y_|, |source_plane_u_|, and
+ // |source_plane_v_| point to.
+ std::unique_ptr<uint8_t[]> source_buffer_;
+ // This holds the data that |dest_plane_y_|, |dest_plane_u_|, and
+ // |dest_plane_v_| point to.
+ std::unique_ptr<uint8_t[]> dest_buffer_;
+ uint8_t* source_plane_y_ = nullptr;
+ uint8_t* source_plane_u_ = nullptr;
+ uint8_t* source_plane_v_ = nullptr;
+ uint8_t* dest_plane_y_ = nullptr;
+ uint8_t* dest_plane_u_ = nullptr;
+ uint8_t* dest_plane_v_ = nullptr;
+ Array2D<GrainType> noise_image_[kMaxPlanes];
+ int16_t scaling_lut_10bpp_y_[kScalingLutBufferLength];
+ int16_t scaling_lut_10bpp_u_[kScalingLutBufferLength];
+ int16_t scaling_lut_10bpp_v_[kScalingLutBufferLength];
+};
+
+template <int bitdepth, typename Pixel>
+void BlendNoiseTest<bitdepth, Pixel>::ConvertScalingLut10bpp(
+ int16_t* scaling_lut_10bpp, const int16_t* src_scaling_lut) {
+ for (int i = 0; i < kScalingLookupTableSize - 1; ++i) {
+ const int x_base = i << 2;
+ const int start = src_scaling_lut[i];
+ const int end_index = std::min(i + 1, kScalingLookupTableSize - 1);
+ const int end = src_scaling_lut[end_index];
+ const int delta = end - start;
+ scaling_lut_10bpp[x_base] = start;
+ scaling_lut_10bpp[x_base + 1] = start + RightShiftWithRounding(delta, 2);
+ scaling_lut_10bpp[x_base + 2] =
+ start + RightShiftWithRounding(2 * delta, 2);
+ scaling_lut_10bpp[x_base + 3] =
+ start + RightShiftWithRounding(3 * delta, 2);
+ }
+}
+
+template <int bitdepth, typename Pixel>
+void BlendNoiseTest<bitdepth, Pixel>::TestSpeed(const int num_runs) {
+ if (blend_chroma_func_ == nullptr || blend_luma_func_ == nullptr) return;
+ ASSERT_TRUE(noise_image_[kPlaneY].Reset(height_,
+ width_ + kBorderPixelsFilmGrain,
+ /*zero_initialize=*/false));
+ ASSERT_TRUE(noise_image_[kPlaneU].Reset(uv_height_,
+ uv_width_ + kBorderPixelsFilmGrain,
+ /*zero_initialize=*/false));
+ ASSERT_TRUE(noise_image_[kPlaneV].Reset(uv_height_,
+ uv_width_ + kBorderPixelsFilmGrain,
+ /*zero_initialize=*/false));
+ libvpx_test::ACMRandom rnd(libvpx_test::ACMRandom::DeterministicSeed());
+ // Allow any valid grain values.
+ const int grain_max = GetGrainMax<bitdepth>();
+ const int grain_min = GetGrainMin<bitdepth>();
+ const int random_range = grain_max - grain_min + 1;
+ auto* src_y = reinterpret_cast<Pixel*>(source_plane_y_);
+ auto* src_u = reinterpret_cast<Pixel*>(source_plane_u_);
+ auto* src_v = reinterpret_cast<Pixel*>(source_plane_v_);
+ for (int y = 0; y < height_; ++y) {
+ for (int x = 0; x < width_; ++x) {
+ const int random_source_y = rnd(random_range);
+ // Populating the luma source ensures the lookup table is tested. Chroma
+ // planes are given identical values. Giving them different values would
+ // artificially differentiate the outputs. It's important that the test
+ // expect that different outputs are caused by the different scaling
+ // lookup tables, rather than by different inputs.
+ const int uv_y_pos = y >> subsampling_y_;
+ const int uv_x_pos = x >> subsampling_x_;
+ src_y[y * width_ + x] = random_source_y;
+ src_u[uv_y_pos * uv_width_ + uv_x_pos] = random_source_y;
+ src_v[uv_y_pos * uv_width_ + uv_x_pos] = random_source_y;
+ const int random_y = rnd(random_range);
+ noise_image_[kPlaneY][y][x] = random_y + grain_min;
+ const int random_u = rnd(random_range);
+ noise_image_[kPlaneU][uv_y_pos][uv_x_pos] = random_u + grain_min;
+ const int random_v = rnd(random_range);
+ noise_image_[kPlaneV][uv_y_pos][uv_x_pos] = random_v + grain_min;
+ }
+ }
+ static constexpr int16_t kTestScalingLutY[kScalingLookupTableSize] = {
+ 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 73,
+ 75, 76, 77, 79, 80, 81, 83, 84, 86, 87, 88, 90, 91, 92, 92,
+ 93, 93, 94, 95, 95, 96, 97, 97, 98, 98, 99, 99, 99, 99, 98,
+ 98, 98, 98, 98, 98, 98, 97, 97, 97, 97, 97, 97, 97, 97, 97,
+ 97, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98,
+ 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 100, 100,
+ 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100,
+ 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101,
+ 101, 101, 101, 101, 101, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102,
+ 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102,
+ 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102,
+ 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102,
+ 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102,
+ 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102,
+ 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102,
+ 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102,
+ 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102,
+ 102, 102,
+ };
+ static constexpr int16_t kTestScalingLutU[kScalingLookupTableSize] = {
+ 30, 42, 53, 65, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74,
+ 75, 76, 78, 79, 81, 82, 83, 85, 86, 88, 89, 91, 92, 93, 93,
+ 94, 94, 95, 95, 96, 96, 97, 97, 98, 98, 99, 99, 99, 99, 99,
+ 99, 99, 99, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98,
+ 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98,
+ 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 99, 99,
+ 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99,
+ 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99,
+ 99, 99, 99, 99, 99, 99, 100, 100, 100, 100, 100, 100, 100, 100, 100,
+ 100, 100, 100, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120,
+ 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110,
+ 98, 98, 98, 98, 98, 98, 98, 97, 97, 97, 97, 97, 97, 97, 97,
+ 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97,
+ 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 96, 96, 96, 96, 96,
+ 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96,
+ 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 95,
+ 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95,
+ 95, 95,
+ };
+ static constexpr int16_t kTestScalingLutV[kScalingLookupTableSize] = {
+ 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 74, 74, 74,
+ 75, 75, 78, 79, 81, 82, 83, 85, 86, 88, 89, 91, 92, 93, 93,
+ 94, 94, 95, 95, 96, 96, 97, 97, 98, 98, 99, 99, 99, 99, 98,
+ 98, 98, 98, 98, 98, 98, 97, 97, 97, 97, 97, 97, 97, 97, 97,
+ 97, 97, 97, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98,
+ 98, 98, 98, 98, 98, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99,
+ 99, 99, 99, 99, 99, 99, 100, 100, 100, 100, 100, 100, 100, 100, 100,
+ 100, 100, 100, 100, 100, 100, 100, 100, 101, 101, 101, 101, 101, 101, 101,
+ 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101,
+ 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101,
+ 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101,
+ 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101,
+ 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101,
+ 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101,
+ 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150,
+ 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180,
+ 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200,
+ 255, 255,
+ };
+
+ if (bitdepth == 10) {
+ for (int i = 0; i < kScalingLutBufferLength; ++i) {
+ ConvertScalingLut10bpp(scaling_lut_10bpp_y_, kTestScalingLutY);
+ ConvertScalingLut10bpp(scaling_lut_10bpp_u_, kTestScalingLutU);
+ ConvertScalingLut10bpp(scaling_lut_10bpp_v_, kTestScalingLutV);
+ }
+ }
+ const FilmGrainParams& params = kFilmGrainParams[0];
+ const int min_value = 16 << (bitdepth - 8);
+ const int max_value = 235 << (bitdepth - 8);
+ const absl::Time start = absl::Now();
+ for (int i = 0; i < num_runs; ++i) {
+ if (chroma_scaling_from_luma_) {
+ blend_chroma_func_(
+ kPlaneU, params, noise_image_, min_value, max_value, width_, height_,
+ /*start_height=*/0, subsampling_x_, subsampling_y_,
+ (bitdepth == 10) ? scaling_lut_10bpp_y_ : kTestScalingLutY,
+ source_plane_y_, y_stride_, source_plane_u_, uv_stride_,
+ dest_plane_u_, uv_stride_);
+ blend_chroma_func_(
+ kPlaneV, params, noise_image_, min_value, max_value, width_, height_,
+ /*start_height=*/0, subsampling_x_, subsampling_y_,
+ (bitdepth == 10) ? scaling_lut_10bpp_y_ : kTestScalingLutY,
+ source_plane_y_, y_stride_, source_plane_v_, uv_stride_,
+ dest_plane_v_, uv_stride_);
+ } else {
+ blend_chroma_func_(
+ kPlaneU, params, noise_image_, min_value, max_value, width_, height_,
+ /*start_height=*/0, subsampling_x_, subsampling_y_,
+ (bitdepth == 10) ? scaling_lut_10bpp_u_ : kTestScalingLutU,
+ source_plane_y_, y_stride_, source_plane_u_, uv_stride_,
+ dest_plane_u_, uv_stride_);
+ blend_chroma_func_(
+ kPlaneV, params, noise_image_, min_value, max_value, width_, height_,
+ /*start_height=*/0, subsampling_x_, subsampling_y_,
+ (bitdepth == 10) ? scaling_lut_10bpp_v_ : kTestScalingLutV,
+ source_plane_y_, y_stride_, source_plane_v_, uv_stride_,
+ dest_plane_v_, uv_stride_);
+ }
+ blend_luma_func_(noise_image_, min_value, max_value, params.chroma_scaling,
+ width_, height_, /*start_height=*/0,
+ (bitdepth == 10) ? scaling_lut_10bpp_y_ : kTestScalingLutY,
+ source_plane_y_, y_stride_, dest_plane_y_, y_stride_);
+ }
+ const absl::Duration elapsed_time = absl::Now() - start;
+ const char* digest_luma = GetBlendLumaTestDigest(bitdepth);
+ printf("YBD%d \"%s\",\n", bitdepth,
+ test_utils::GetMd5Sum(dest_plane_y_, y_stride_ * height_).c_str());
+ printf("UBD%d \"%s\",\n", bitdepth,
+ test_utils::GetMd5Sum(dest_plane_u_, uv_stride_ * uv_height_).c_str());
+ printf("VBD%d \"%s\",\n", bitdepth,
+ test_utils::GetMd5Sum(dest_plane_v_, uv_stride_ * uv_height_).c_str());
+ test_utils::CheckMd5Digest(
+ "BlendNoiseWithImage",
+ absl::StrFormat("Luma cfl=%d, sub_x=%d, sub_y=%d",
+ chroma_scaling_from_luma_, subsampling_x_, subsampling_y_)
+ .c_str(),
+ digest_luma, dest_plane_y_, y_stride_ * height_, elapsed_time);
+ const char* digest_chroma_u = GetBlendChromaUTestDigest(
+ bitdepth, chroma_scaling_from_luma_, subsampling_x_, subsampling_y_);
+ test_utils::CheckMd5Digest(
+ "BlendNoiseWithImage",
+ absl::StrFormat("ChromaU cfl=%d, sub_x=%d, sub_y=%d",
+ chroma_scaling_from_luma_, subsampling_x_, subsampling_y_)
+ .c_str(),
+ digest_chroma_u, dest_plane_u_, uv_stride_ * uv_height_, elapsed_time);
+ const char* digest_chroma_v = GetBlendChromaVTestDigest(
+ bitdepth, chroma_scaling_from_luma_, subsampling_x_, subsampling_y_);
+ test_utils::CheckMd5Digest(
+ "BlendNoiseWithImage",
+ absl::StrFormat("ChromaV cfl=%d, sub_x=%d, sub_y=%d",
+ chroma_scaling_from_luma_, subsampling_x_, subsampling_y_)
+ .c_str(),
+ digest_chroma_v, dest_plane_v_, uv_stride_ * uv_height_, elapsed_time);
+}
+
+using BlendNoiseTest8bpp = BlendNoiseTest<8, uint8_t>;
+
+TEST_P(BlendNoiseTest8bpp, MatchesOriginalOutput) { TestSpeed(1); }
+
+TEST_P(BlendNoiseTest8bpp, DISABLED_Speed) { TestSpeed(kNumSpeedTests); }
+
+INSTANTIATE_TEST_SUITE_P(C, BlendNoiseTest8bpp,
+ testing::Combine(testing::Range(0, 2),
+ testing::Range(0, 3)));
+#if LIBGAV1_ENABLE_SSE4_1
+INSTANTIATE_TEST_SUITE_P(SSE41, BlendNoiseTest8bpp,
+ testing::Combine(testing::Range(0, 2),
+ testing::Range(0, 3)));
+#endif
+
+#if LIBGAV1_ENABLE_NEON
+INSTANTIATE_TEST_SUITE_P(NEON, BlendNoiseTest8bpp,
+ testing::Combine(testing::Range(0, 2),
+ testing::Range(0, 3)));
+#endif
+
+#if LIBGAV1_MAX_BITDEPTH >= 10
+using BlendNoiseTest10bpp = BlendNoiseTest<10, uint16_t>;
+
+TEST_P(BlendNoiseTest10bpp, MatchesOriginalOutput) { TestSpeed(1); }
+
+TEST_P(BlendNoiseTest10bpp, DISABLED_Speed) { TestSpeed(kNumSpeedTests); }
+
+INSTANTIATE_TEST_SUITE_P(C, BlendNoiseTest10bpp,
+ testing::Combine(testing::Range(0, 2),
+ testing::Range(0, 3)));
+#if LIBGAV1_ENABLE_SSE4_1
+INSTANTIATE_TEST_SUITE_P(SSE41, BlendNoiseTest10bpp,
+ testing::Combine(testing::Range(0, 2),
+ testing::Range(0, 3)));
+#endif
+
+#if LIBGAV1_ENABLE_NEON
+INSTANTIATE_TEST_SUITE_P(NEON, BlendNoiseTest10bpp,
+ testing::Combine(testing::Range(0, 2),
+ testing::Range(0, 3)));
+#endif
+#endif // LIBGAV1_MAX_BITDEPTH >= 10
+
+template <int bitdepth, typename Pixel>
+class FilmGrainSpeedTest : public testing::TestWithParam<int> {
+ public:
+ FilmGrainSpeedTest() {
+ test_utils::ResetDspTable(bitdepth);
+ FilmGrainInit_C();
+
+ const testing::TestInfo* const test_info =
+ testing::UnitTest::GetInstance()->current_test_info();
+ const char* const test_case = test_info->test_suite_name();
+ if (absl::StartsWith(test_case, "NEON/")) {
+#if LIBGAV1_ENABLE_NEON
+ FilmGrainInit_NEON();
+#endif
+ } else if (absl::StartsWith(test_case, "SSE41/")) {
+ FilmGrainInit_SSE4_1();
+ }
+ uv_width_ = (width_ + subsampling_x_) >> subsampling_x_;
+ uv_height_ = (height_ + subsampling_y_) >> subsampling_y_;
+ uv_stride_ = uv_width_ * sizeof(Pixel);
+ y_stride_ = width_ * sizeof(Pixel);
+ const size_t buffer_size =
+ sizeof(Pixel) * (width_ * height_ + 2 * uv_width_ * uv_height_);
+ source_buffer_.reset(new (std::nothrow) uint8_t[buffer_size]);
+ memset(source_buffer_.get(), 0, sizeof(source_buffer_[0]) * buffer_size);
+ dest_buffer_.reset(new (std::nothrow) uint8_t[buffer_size]);
+ memset(dest_buffer_.get(), 0, sizeof(dest_buffer_[0]) * buffer_size);
+ source_plane_y_ = source_buffer_.get();
+ source_plane_u_ = source_plane_y_ + y_stride_ * height_;
+ source_plane_v_ = source_plane_u_ + uv_stride_ * uv_height_;
+ dest_plane_y_ = dest_buffer_.get();
+ dest_plane_u_ = dest_plane_y_ + y_stride_ * height_;
+ dest_plane_v_ = dest_plane_u_ + uv_stride_ * uv_height_;
+ const int num_threads = GetParam();
+ thread_pool_ = ThreadPool::Create(num_threads);
+ }
+ ~FilmGrainSpeedTest() override = default;
+
+ protected:
+ void TestSpeed(int num_runs);
+
+ private:
+ const int width_ = 1920;
+ const int height_ = 1080;
+ const int subsampling_x_ = 1;
+ const int subsampling_y_ = 1;
+ int uv_width_ = 0;
+ int uv_height_ = 0;
+ int uv_stride_ = 0;
+ int y_stride_ = 0;
+ std::unique_ptr<uint8_t[]> source_buffer_;
+ std::unique_ptr<uint8_t[]> dest_buffer_;
+ const uint8_t* source_plane_y_ = nullptr;
+ const uint8_t* source_plane_u_ = nullptr;
+ const uint8_t* source_plane_v_ = nullptr;
+ uint8_t* dest_plane_y_ = nullptr;
+ uint8_t* dest_plane_u_ = nullptr;
+ uint8_t* dest_plane_v_ = nullptr;
+ std::unique_ptr<ThreadPool> thread_pool_;
+};
+
+// Each run of the speed test adds film grain noise to 10 dummy frames. The
+// film grain parameters for the 10 frames were generated with aomenc.
+template <int bitdepth, typename Pixel>
+void FilmGrainSpeedTest<bitdepth, Pixel>::TestSpeed(const int num_runs) {
+ const dsp::Dsp* dsp = GetDspTable(bitdepth);
+ if (dsp->film_grain.blend_noise_chroma[0] == nullptr ||
+ dsp->film_grain.blend_noise_luma == nullptr) {
+ return;
+ }
+ for (int k = 0; k < kNumFilmGrainTestParams; ++k) {
+ const FilmGrainParams& params = kFilmGrainParams[k];
+ const absl::Time start = absl::Now();
+ for (int i = 0; i < num_runs; ++i) {
+ FilmGrain<bitdepth> film_grain(params, /*is_monochrome=*/false,
+ /*color_matrix_is_identity=*/false,
+ subsampling_x_, subsampling_y_, width_,
+ height_, thread_pool_.get());
+ EXPECT_TRUE(film_grain.AddNoise(
+ source_plane_y_, y_stride_, source_plane_u_, source_plane_v_,
+ uv_stride_, dest_plane_y_, y_stride_, dest_plane_u_, dest_plane_v_,
+ uv_stride_));
+ }
+ const absl::Duration elapsed_time = absl::Now() - start;
+ const char* digest_luma = GetTestDigestLuma(bitdepth, k);
+ test_utils::CheckMd5Digest(
+ "FilmGrainSynthesisLuma",
+ absl::StrFormat("kFilmGrainParams[%d]", k).c_str(), digest_luma,
+ dest_plane_y_, y_stride_ * height_, elapsed_time);
+ const char* digest_chroma_u = GetTestDigestChromaU(bitdepth, k);
+ test_utils::CheckMd5Digest(
+ "FilmGrainSynthesisChromaU",
+ absl::StrFormat("kFilmGrainParams[%d]", k).c_str(), digest_chroma_u,
+ dest_plane_u_, uv_stride_ * uv_height_, elapsed_time);
+ const char* digest_chroma_v = GetTestDigestChromaV(bitdepth, k);
+ test_utils::CheckMd5Digest(
+ "FilmGrainSynthesisChromaV",
+ absl::StrFormat("kFilmGrainParams[%d]", k).c_str(), digest_chroma_v,
+ dest_plane_v_, uv_stride_ * uv_height_, elapsed_time);
+ }
+}
+
+using FilmGrainSpeedTest8bpp = FilmGrainSpeedTest<8, uint8_t>;
+
+TEST_P(FilmGrainSpeedTest8bpp, MatchesOriginalOutput) { TestSpeed(1); }
+
+TEST_P(FilmGrainSpeedTest8bpp, DISABLED_Speed) { TestSpeed(kNumSpeedTests); }
+
+INSTANTIATE_TEST_SUITE_P(C, FilmGrainSpeedTest8bpp, testing::Values(0, 3, 8));
+
+#if LIBGAV1_ENABLE_SSE4_1
+INSTANTIATE_TEST_SUITE_P(SSE41, FilmGrainSpeedTest8bpp,
+ testing::Values(0, 3, 8));
+#endif
+
+#if LIBGAV1_ENABLE_NEON
+INSTANTIATE_TEST_SUITE_P(NEON, FilmGrainSpeedTest8bpp,
+ testing::Values(0, 3, 8));
+#endif
+
+#if LIBGAV1_MAX_BITDEPTH >= 10
+using FilmGrainSpeedTest10bpp = FilmGrainSpeedTest<10, uint16_t>;
+
+TEST_P(FilmGrainSpeedTest10bpp, MatchesOriginalOutput) { TestSpeed(1); }
+
+TEST_P(FilmGrainSpeedTest10bpp, DISABLED_Speed) { TestSpeed(kNumSpeedTests); }
+
+INSTANTIATE_TEST_SUITE_P(C, FilmGrainSpeedTest10bpp, testing::Values(0, 3, 8));
+
+#if LIBGAV1_ENABLE_SSE4_1
+INSTANTIATE_TEST_SUITE_P(SSE41, FilmGrainSpeedTest10bpp,
+ testing::Values(0, 3, 8));
+#endif
+
+#if LIBGAV1_ENABLE_NEON
+INSTANTIATE_TEST_SUITE_P(NEON, FilmGrainSpeedTest10bpp,
+ testing::Values(0, 3, 8));
+#endif
+
+#endif // LIBGAV1_MAX_BITDEPTH >= 10
+
+} // namespace
+} // namespace film_grain
+} // namespace dsp
+} // namespace libgav1
diff --git a/src/frame_scratch_buffer.h b/src/frame_scratch_buffer.h
index 90c3bb8..1b0d2e0 100644
--- a/src/frame_scratch_buffer.h
+++ b/src/frame_scratch_buffer.h
@@ -17,10 +17,13 @@
#ifndef LIBGAV1_SRC_FRAME_SCRATCH_BUFFER_H_
#define LIBGAV1_SRC_FRAME_SCRATCH_BUFFER_H_
+#include <array>
#include <condition_variable> // NOLINT (unapproved c++11 header)
#include <cstdint>
#include <memory>
#include <mutex> // NOLINT (unapproved c++11 header)
+#include <new>
+#include <utility>
#include "src/loop_restoration_info.h"
#include "src/residual_buffer_pool.h"
@@ -46,9 +49,24 @@ using IntraPredictionBuffer =
// Buffer to facilitate decoding a frame. This struct is used only within
// DecoderImpl::DecodeTiles().
-struct FrameScratchBuffer {
+// The alignment requirement is due to the SymbolDecoderContext member
+// symbol_decoder_context and the TileScratchBufferPool member
+// tile_scratch_buffer_pool.
+struct FrameScratchBuffer : public MaxAlignedAllocable {
LoopRestorationInfo loop_restoration_info;
- Array2D<int16_t> cdef_index;
+ Array2D<int8_t> cdef_index;
+ // Encodes the block skip information as a bitmask for the entire frame which
+ // will be used by the cdef process.
+ //
+ // * The size of this array is rows4x4 / 2 * column4x4 / 16.
+ // * Each row of the bitmasks array (cdef_skip) stores the bitmask for 2 rows
+ // of 4x4 blocks.
+ // * Each entry in the row will store the skip information for 16 4x4 blocks
+ // (8 bits).
+ // * If any of the four 4x4 blocks in the 8x8 block is not a skip block, then
+ // the corresponding bit (as described below) will be set to 1.
+ // * For the 4x4 block at column4x4 the bit index is (column4x4 >> 1).
+ Array2D<uint8_t> cdef_skip;
Array2D<TransformSize> inter_transform_sizes;
BlockParametersHolder block_parameters_holder;
TemporalMotionField motion_field;
diff --git a/src/gav1/decoder_buffer.h b/src/gav1/decoder_buffer.h
index 37bcb29..880c320 100644
--- a/src/gav1/decoder_buffer.h
+++ b/src/gav1/decoder_buffer.h
@@ -129,24 +129,17 @@ typedef struct Libgav1DecoderBuffer {
Libgav1TransferCharacteristics transfer_characteristics;
Libgav1MatrixCoefficients matrix_coefficients;
- // Image storage dimensions.
- // NOTE: These fields are named w and h in vpx_image_t and aom_image_t.
- // uint32_t width; // Stored image width.
- // uint32_t height; // Stored image height.
int bitdepth; // Stored image bitdepth.
- // Image display dimensions.
- // NOTES:
- // 1. These fields are named d_w and d_h in vpx_image_t and aom_image_t.
- // 2. libvpx and libaom clients use d_w and d_h much more often than w and h.
- // 3. These fields can just be stored for the Y plane and the clients can
- // calculate the values for the U and V planes if the image format or
- // subsampling is exposed.
+ // Image display dimensions in Y/U/V order.
int displayed_width[3]; // Displayed image width.
int displayed_height[3]; // Displayed image height.
- int stride[3];
- uint8_t* plane[3];
+ // Values are given in Y/U/V order.
+ int stride[3]; // The width in bytes of one row of the |plane| buffer.
+ // This may include padding bytes for alignment or
+ // internal use by the decoder.
+ uint8_t* plane[3]; // The reconstructed image plane(s).
// Spatial id of this frame.
int spatial_id;
diff --git a/src/gav1/version.h b/src/gav1/version.h
index c018928..9bdc630 100644
--- a/src/gav1/version.h
+++ b/src/gav1/version.h
@@ -23,8 +23,8 @@
// (https://semver.org).
#define LIBGAV1_MAJOR_VERSION 0
-#define LIBGAV1_MINOR_VERSION 16
-#define LIBGAV1_PATCH_VERSION 3
+#define LIBGAV1_MINOR_VERSION 17
+#define LIBGAV1_PATCH_VERSION 0
#define LIBGAV1_VERSION \
((LIBGAV1_MAJOR_VERSION << 16) | (LIBGAV1_MINOR_VERSION << 8) | \
diff --git a/src/internal_frame_buffer_list_test.cc b/src/internal_frame_buffer_list_test.cc
new file mode 100644
index 0000000..21f1162
--- /dev/null
+++ b/src/internal_frame_buffer_list_test.cc
@@ -0,0 +1,158 @@
+// Copyright 2021 The libgav1 Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "src/internal_frame_buffer_list.h"
+
+#include <cstdint>
+
+#include "gtest/gtest.h"
+#include "src/gav1/decoder_buffer.h"
+#include "src/gav1/frame_buffer.h"
+
+namespace libgav1 {
+namespace {
+
+class InternalFrameBufferListTest : public testing::Test {
+ protected:
+ static constexpr int kBufferListSize = 10;
+
+ InternalFrameBufferListTest() {
+ on_frame_buffer_size_changed_ = OnInternalFrameBufferSizeChanged;
+ get_frame_buffer_ = GetInternalFrameBuffer;
+ release_frame_buffer_ = ReleaseInternalFrameBuffer;
+ callback_private_data_ = &buffer_list_;
+ }
+
+ // Frame buffer callbacks.
+ FrameBufferSizeChangedCallback on_frame_buffer_size_changed_;
+ GetFrameBufferCallback get_frame_buffer_;
+ ReleaseFrameBufferCallback release_frame_buffer_;
+ // Private data associated with the frame buffer callbacks.
+ void* callback_private_data_;
+
+ private:
+ InternalFrameBufferList buffer_list_;
+};
+
+TEST_F(InternalFrameBufferListTest, ReleaseInRandomOrder) {
+ const int bitdepth = 8;
+ const Libgav1ImageFormat image_format = kLibgav1ImageFormatYuv420;
+ const int width = 100;
+ const int height = 50;
+ const int left_border = 0;
+ const int right_border = 0;
+ const int top_border = 0;
+ const int bottom_border = 0;
+ const int stride_alignment = 16;
+
+ EXPECT_EQ(on_frame_buffer_size_changed_(callback_private_data_, bitdepth,
+ image_format, width, height,
+ left_border, right_border, top_border,
+ bottom_border, stride_alignment),
+ 0);
+
+ FrameBuffer frame_buffers[kBufferListSize];
+ for (auto& frame_buffer : frame_buffers) {
+ EXPECT_EQ(
+ get_frame_buffer_(callback_private_data_, bitdepth, image_format, width,
+ height, left_border, right_border, top_border,
+ bottom_border, stride_alignment, &frame_buffer),
+ 0);
+ EXPECT_NE(frame_buffer.plane[0], nullptr);
+ EXPECT_GE(frame_buffer.stride[0], 112);
+ EXPECT_NE(frame_buffer.plane[1], nullptr);
+ EXPECT_GE(frame_buffer.stride[1], 64);
+ EXPECT_NE(frame_buffer.plane[2], nullptr);
+ EXPECT_GE(frame_buffer.stride[2], 64);
+ }
+
+ // Release and get a few buffers at indexes <= 5 in random order.
+ static_assert(5 < kBufferListSize, "");
+ static constexpr int indexes[] = {1, 4, 5, 5, 4, 3, 2, 3, 5, 0};
+ for (int index : indexes) {
+ release_frame_buffer_(callback_private_data_,
+ frame_buffers[index].private_data);
+
+ EXPECT_EQ(get_frame_buffer_(callback_private_data_, bitdepth, image_format,
+ width, height, left_border, right_border,
+ top_border, bottom_border, stride_alignment,
+ &frame_buffers[index]),
+ 0);
+ EXPECT_NE(frame_buffers[index].plane[0], nullptr);
+ EXPECT_GE(frame_buffers[index].stride[0], 112);
+ EXPECT_NE(frame_buffers[index].plane[1], nullptr);
+ EXPECT_GE(frame_buffers[index].stride[1], 64);
+ EXPECT_NE(frame_buffers[index].plane[2], nullptr);
+ EXPECT_GE(frame_buffers[index].stride[2], 64);
+ }
+
+ for (auto& frame_buffer : frame_buffers) {
+ release_frame_buffer_(callback_private_data_, frame_buffer.private_data);
+ }
+}
+
+TEST_F(InternalFrameBufferListTest, VaryingBufferSizes) {
+ const int bitdepth = 8;
+ const Libgav1ImageFormat image_format = kLibgav1ImageFormatYuv420;
+ const int width = 64;
+ const int height = 48;
+ const int left_border = 16;
+ const int right_border = 16;
+ const int top_border = 16;
+ const int bottom_border = 16;
+ const int stride_alignment = 16;
+
+ EXPECT_EQ(on_frame_buffer_size_changed_(callback_private_data_, bitdepth,
+ image_format, 16 * width, 16 * height,
+ left_border, right_border, top_border,
+ bottom_border, stride_alignment),
+ 0);
+
+ FrameBuffer frame_buffer;
+ for (int i = 1; i <= 16; ++i) {
+ EXPECT_EQ(get_frame_buffer_(callback_private_data_, bitdepth, image_format,
+ i * width, i * height, left_border,
+ right_border, top_border, bottom_border,
+ stride_alignment, &frame_buffer),
+ 0);
+ EXPECT_NE(frame_buffer.plane[0], nullptr);
+ EXPECT_GE(frame_buffer.stride[0], i * width + left_border + right_border);
+ EXPECT_NE(frame_buffer.plane[1], nullptr);
+ EXPECT_GE(frame_buffer.stride[1],
+ (i * width + left_border + right_border) >> 1);
+ EXPECT_NE(frame_buffer.plane[2], nullptr);
+ EXPECT_GE(frame_buffer.stride[2],
+ (i * width + left_border + right_border) >> 1);
+ release_frame_buffer_(callback_private_data_, frame_buffer.private_data);
+ }
+ for (int i = 16; i >= 1; --i) {
+ EXPECT_EQ(get_frame_buffer_(callback_private_data_, bitdepth, image_format,
+ i * width, i * height, left_border,
+ right_border, top_border, bottom_border,
+ stride_alignment, &frame_buffer),
+ 0);
+ EXPECT_NE(frame_buffer.plane[0], nullptr);
+ EXPECT_GE(frame_buffer.stride[0], i * width + left_border + right_border);
+ EXPECT_NE(frame_buffer.plane[1], nullptr);
+ EXPECT_GE(frame_buffer.stride[1],
+ (i * width + left_border + right_border) >> 1);
+ EXPECT_NE(frame_buffer.plane[2], nullptr);
+ EXPECT_GE(frame_buffer.stride[2],
+ (i * width + left_border + right_border) >> 1);
+ release_frame_buffer_(callback_private_data_, frame_buffer.private_data);
+ }
+}
+
+} // namespace
+} // namespace libgav1
diff --git a/src/loop_restoration_info.cc b/src/loop_restoration_info.cc
index 2dba57d..8c17711 100644
--- a/src/loop_restoration_info.cc
+++ b/src/loop_restoration_info.cc
@@ -133,7 +133,7 @@ bool LoopRestorationInfo::PopulateUnitInfoForSuperBlock(
}
void LoopRestorationInfo::ReadUnitCoefficients(
- DaalaBitReader* const reader,
+ EntropyDecoder* const reader,
SymbolDecoderContext* const symbol_decoder_context, Plane plane,
int unit_id,
std::array<RestorationUnitInfo, kMaxPlanes>* const reference_unit_info) {
@@ -161,7 +161,7 @@ void LoopRestorationInfo::ReadUnitCoefficients(
}
void LoopRestorationInfo::ReadWienerInfo(
- DaalaBitReader* const reader, Plane plane, int unit_id,
+ EntropyDecoder* const reader, Plane plane, int unit_id,
std::array<RestorationUnitInfo, kMaxPlanes>* const reference_unit_info) {
for (int i = WienerInfo::kVertical; i <= WienerInfo::kHorizontal; ++i) {
if (plane != kPlaneY) {
@@ -198,7 +198,7 @@ void LoopRestorationInfo::ReadWienerInfo(
}
void LoopRestorationInfo::ReadSgrProjInfo(
- DaalaBitReader* const reader, Plane plane, int unit_id,
+ EntropyDecoder* const reader, Plane plane, int unit_id,
std::array<RestorationUnitInfo, kMaxPlanes>* const reference_unit_info) {
const int sgr_proj_index =
static_cast<int>(reader->ReadLiteral(kSgrProjParamsBits));
diff --git a/src/loop_restoration_info.h b/src/loop_restoration_info.h
index f174b89..bff6746 100644
--- a/src/loop_restoration_info.h
+++ b/src/loop_restoration_info.h
@@ -19,8 +19,6 @@
#include <array>
#include <cstdint>
-#include <memory>
-#include <vector>
#include "src/dsp/common.h"
#include "src/symbol_decoder_context.h"
@@ -58,16 +56,16 @@ class LoopRestorationInfo {
uint8_t superres_scale_denominator,
int row4x4, int column4x4,
LoopRestorationUnitInfo* unit_info) const;
- void ReadUnitCoefficients(DaalaBitReader* reader,
+ void ReadUnitCoefficients(EntropyDecoder* reader,
SymbolDecoderContext* symbol_decoder_context,
Plane plane, int unit_id,
std::array<RestorationUnitInfo, kMaxPlanes>*
reference_unit_info); // 5.11.58.
void ReadWienerInfo(
- DaalaBitReader* reader, Plane plane, int unit_id,
+ EntropyDecoder* reader, Plane plane, int unit_id,
std::array<RestorationUnitInfo, kMaxPlanes>* reference_unit_info);
void ReadSgrProjInfo(
- DaalaBitReader* reader, Plane plane, int unit_id,
+ EntropyDecoder* reader, Plane plane, int unit_id,
std::array<RestorationUnitInfo, kMaxPlanes>* reference_unit_info);
// Getters.
diff --git a/src/motion_vector.cc b/src/motion_vector.cc
index fdb1875..36018ab 100644
--- a/src/motion_vector.cc
+++ b/src/motion_vector.cc
@@ -83,14 +83,12 @@ void SetupGlobalMv(const Tile::Block& block, int index,
(gm.params[5] - (1 << kWarpedModelPrecisionBits)) * y +
gm.params[1];
if (frame_header.allow_high_precision_mv) {
- mv->mv[MotionVector::kRow] =
- RightShiftWithRoundingSigned(yc, kWarpedModelPrecisionBits - 3);
- mv->mv[MotionVector::kColumn] =
- RightShiftWithRoundingSigned(xc, kWarpedModelPrecisionBits - 3);
+ mv->mv[0] = RightShiftWithRoundingSigned(yc, kWarpedModelPrecisionBits - 3);
+ mv->mv[1] = RightShiftWithRoundingSigned(xc, kWarpedModelPrecisionBits - 3);
} else {
- mv->mv[MotionVector::kRow] = MultiplyBy2(
+ mv->mv[0] = MultiplyBy2(
RightShiftWithRoundingSigned(yc, kWarpedModelPrecisionBits - 2));
- mv->mv[MotionVector::kColumn] = MultiplyBy2(
+ mv->mv[1] = MultiplyBy2(
RightShiftWithRoundingSigned(xc, kWarpedModelPrecisionBits - 2));
LowerMvPrecision(frame_header, mv);
}
@@ -115,7 +113,7 @@ void SearchStack(const Tile::Block& block, const BlockParameters& mv_bp,
// LowerMvPrecision() is not necessary, since the values in
// |prediction_parameters.global_mv| and |mv_bp.mv| were generated by it.
const auto global_motion_type = global_motion[bp.reference_frame[0]].type;
- if (IsGlobalMvBlock(mv_bp.is_global_mv_block, global_motion_type)) {
+ if (IsGlobalMvBlock(mv_bp, global_motion_type)) {
candidate_mv = prediction_parameters.global_mv[0];
} else {
candidate_mv = mv_bp.mv.mv[index];
@@ -126,7 +124,7 @@ void SearchStack(const Tile::Block& block, const BlockParameters& mv_bp,
const int num_found = *num_mv_found;
const auto result = std::find_if(ref_mv_stack, ref_mv_stack + num_found,
[&candidate_mv](const MotionVector& ref_mv) {
- return ref_mv == candidate_mv;
+ return ref_mv.mv32 == candidate_mv.mv32;
});
if (result != ref_mv_stack + num_found) {
prediction_parameters.IncreaseWeight(std::distance(ref_mv_stack, result),
@@ -152,7 +150,7 @@ void CompoundSearchStack(const Tile::Block& block, const BlockParameters& mv_bp,
CompoundMotionVector candidate_mv = mv_bp.mv;
for (int i = 0; i < 2; ++i) {
const auto global_motion_type = global_motion[bp.reference_frame[i]].type;
- if (IsGlobalMvBlock(mv_bp.is_global_mv_block, global_motion_type)) {
+ if (IsGlobalMvBlock(mv_bp, global_motion_type)) {
candidate_mv.mv[i] = prediction_parameters.global_mv[i];
}
}
@@ -164,7 +162,7 @@ void CompoundSearchStack(const Tile::Block& block, const BlockParameters& mv_bp,
const auto result =
std::find_if(compound_ref_mv_stack, compound_ref_mv_stack + num_found,
[&candidate_mv](const CompoundMotionVector& ref_mv) {
- return ref_mv == candidate_mv;
+ return ref_mv.mv64 == candidate_mv.mv64;
});
if (result != compound_ref_mv_stack + num_found) {
prediction_parameters.IncreaseWeight(
@@ -172,7 +170,7 @@ void CompoundSearchStack(const Tile::Block& block, const BlockParameters& mv_bp,
return;
}
if (num_found >= kMaxRefMvStackSize) return;
- compound_ref_mv_stack[num_found] = candidate_mv;
+ compound_ref_mv_stack[num_found].mv64 = candidate_mv.mv64;
prediction_parameters.SetWeightIndexStackEntry(num_found, weight);
++*num_mv_found;
}
@@ -284,7 +282,8 @@ void AddTemporalReferenceMvCandidate(
frame_header.allow_high_precision_mv ? 2 : frame_header.force_integer_mv;
const MotionVector* const global_mv = prediction_parameters->global_mv;
if (is_compound) {
- CompoundMotionVector candidate_mvs[kMaxTemporalMvCandidatesWithPadding];
+ alignas(kMaxAlignment)
+ CompoundMotionVector candidate_mvs[kMaxTemporalMvCandidatesWithPadding];
const dsp::Dsp& dsp = *dsp::GetDspTable(8);
dsp.mv_projection_compound[mv_projection_function_index](
temporal_mvs, temporal_reference_offsets, reference_offsets, count,
@@ -310,7 +309,7 @@ void AddTemporalReferenceMvCandidate(
const auto result =
std::find_if(compound_ref_mv_stack, compound_ref_mv_stack + num_found,
[&candidate_mv](const CompoundMotionVector& ref_mv) {
- return ref_mv == candidate_mv;
+ return ref_mv.mv64 == candidate_mv.mv64;
});
if (result != compound_ref_mv_stack + num_found) {
prediction_parameters->IncreaseWeight(
@@ -318,7 +317,7 @@ void AddTemporalReferenceMvCandidate(
continue;
}
if (num_found >= kMaxRefMvStackSize) continue;
- compound_ref_mv_stack[num_found] = candidate_mv;
+ compound_ref_mv_stack[num_found].mv64 = candidate_mv.mv64;
prediction_parameters->SetWeightIndexStackEntry(num_found, 2);
++num_found;
} while (++index < count);
@@ -337,7 +336,7 @@ void AddTemporalReferenceMvCandidate(
const auto result =
std::find_if(ref_mv_stack, ref_mv_stack + num_found,
[&candidate_mv](const MotionVector& ref_mv) {
- return ref_mv == candidate_mv;
+ return ref_mv.mv32 == candidate_mv.mv32;
});
if (result != ref_mv_stack + num_found) {
prediction_parameters->IncreaseWeight(std::distance(ref_mv_stack, result),
@@ -369,7 +368,7 @@ void AddTemporalReferenceMvCandidate(
const auto result =
std::find_if(ref_mv_stack, ref_mv_stack + num_found,
[&candidate_mv](const MotionVector& ref_mv) {
- return ref_mv == candidate_mv;
+ return ref_mv.mv32 == candidate_mv.mv32;
});
if (result != ref_mv_stack + num_found) {
prediction_parameters->IncreaseWeight(std::distance(ref_mv_stack, result),
@@ -563,8 +562,8 @@ void AddExtraSingleMvCandidate(const Tile::Block& block, int mv_row,
candidate_mv.mv[1] *= -1;
}
assert(num_found <= 2);
- if ((num_found != 0 && ref_mv_stack[0] == candidate_mv) ||
- (num_found == 2 && ref_mv_stack[1] == candidate_mv)) {
+ if ((num_found != 0 && ref_mv_stack[0].mv32 == candidate_mv.mv32) ||
+ (num_found == 2 && ref_mv_stack[1].mv32 == candidate_mv.mv32)) {
continue;
}
ref_mv_stack[num_found] = candidate_mv;
@@ -624,16 +623,16 @@ void ExtraSearch(const Tile::Block& block, bool is_compound,
}
}
if (*num_mv_found == 1) {
- if (combined_mvs[0] == compound_ref_mv_stack[0]) {
- compound_ref_mv_stack[1] = combined_mvs[1];
+ if (combined_mvs[0].mv64 == compound_ref_mv_stack[0].mv64) {
+ compound_ref_mv_stack[1].mv64 = combined_mvs[1].mv64;
} else {
- compound_ref_mv_stack[1] = combined_mvs[0];
+ compound_ref_mv_stack[1].mv64 = combined_mvs[0].mv64;
}
prediction_parameters.SetWeightIndexStackEntry(1, 0);
} else {
assert(*num_mv_found == 0);
for (int i = 0; i < 2; ++i) {
- compound_ref_mv_stack[i] = combined_mvs[i];
+ compound_ref_mv_stack[i].mv64 = combined_mvs[i].mv64;
prediction_parameters.SetWeightIndexStackEntry(i, 0);
}
}
diff --git a/src/motion_vector.h b/src/motion_vector.h
index d739e80..68d14fe 100644
--- a/src/motion_vector.h
+++ b/src/motion_vector.h
@@ -30,9 +30,11 @@
namespace libgav1 {
-constexpr bool IsGlobalMvBlock(bool is_global_mv_block,
+constexpr bool IsGlobalMvBlock(const BlockParameters& bp,
GlobalMotionTransformationType type) {
- return is_global_mv_block &&
+ return (bp.y_mode == kPredictionModeGlobalMv ||
+ bp.y_mode == kPredictionModeGlobalGlobalMv) &&
+ !IsBlockDimension4(bp.size) &&
type > kGlobalMotionTransformationTypeTranslation;
}
diff --git a/src/obu_parser.cc b/src/obu_parser.cc
index 69480d7..445450b 100644
--- a/src/obu_parser.cc
+++ b/src/obu_parser.cc
@@ -140,10 +140,10 @@ bool ObuParser::ParseColorConfig(ObuSequenceHeader* sequence_header) {
int64_t scratch;
ColorConfig* const color_config = &sequence_header->color_config;
OBU_READ_BIT_OR_FAIL;
- const auto high_bitdepth = static_cast<bool>(scratch);
+ const bool high_bitdepth = scratch != 0;
if (sequence_header->profile == kProfile2 && high_bitdepth) {
OBU_READ_BIT_OR_FAIL;
- const auto is_twelve_bit = static_cast<bool>(scratch);
+ const bool is_twelve_bit = scratch != 0;
color_config->bitdepth = is_twelve_bit ? 12 : 10;
} else {
color_config->bitdepth = high_bitdepth ? 10 : 8;
@@ -152,10 +152,10 @@ bool ObuParser::ParseColorConfig(ObuSequenceHeader* sequence_header) {
color_config->is_monochrome = false;
} else {
OBU_READ_BIT_OR_FAIL;
- color_config->is_monochrome = static_cast<bool>(scratch);
+ color_config->is_monochrome = scratch != 0;
}
OBU_READ_BIT_OR_FAIL;
- const auto color_description_present_flag = static_cast<bool>(scratch);
+ const bool color_description_present_flag = scratch != 0;
if (color_description_present_flag) {
OBU_READ_LITERAL_OR_FAIL(8);
color_config->color_primary = static_cast<ColorPrimary>(scratch);
@@ -230,7 +230,7 @@ bool ObuParser::ParseColorConfig(ObuSequenceHeader* sequence_header) {
}
}
OBU_READ_BIT_OR_FAIL;
- color_config->separate_uv_delta_q = static_cast<bool>(scratch);
+ color_config->separate_uv_delta_q = scratch != 0;
}
if (color_config->matrix_coefficients == kMatrixCoefficientsIdentity &&
(color_config->subsampling_x != 0 || color_config->subsampling_y != 0)) {
@@ -246,7 +246,7 @@ bool ObuParser::ParseColorConfig(ObuSequenceHeader* sequence_header) {
bool ObuParser::ParseTimingInfo(ObuSequenceHeader* sequence_header) {
int64_t scratch;
OBU_READ_BIT_OR_FAIL;
- sequence_header->timing_info_present_flag = static_cast<bool>(scratch);
+ sequence_header->timing_info_present_flag = scratch != 0;
if (!sequence_header->timing_info_present_flag) return true;
TimingInfo* const info = &sequence_header->timing_info;
OBU_READ_LITERAL_OR_FAIL(32);
@@ -262,7 +262,7 @@ bool ObuParser::ParseTimingInfo(ObuSequenceHeader* sequence_header) {
return false;
}
OBU_READ_BIT_OR_FAIL;
- info->equal_picture_interval = static_cast<bool>(scratch);
+ info->equal_picture_interval = scratch != 0;
if (info->equal_picture_interval) {
OBU_READ_UVLC_OR_FAIL(info->num_ticks_per_picture);
++info->num_ticks_per_picture;
@@ -274,7 +274,7 @@ bool ObuParser::ParseDecoderModelInfo(ObuSequenceHeader* sequence_header) {
if (!sequence_header->timing_info_present_flag) return true;
int64_t scratch;
OBU_READ_BIT_OR_FAIL;
- sequence_header->decoder_model_info_present_flag = static_cast<bool>(scratch);
+ sequence_header->decoder_model_info_present_flag = scratch != 0;
if (!sequence_header->decoder_model_info_present_flag) return true;
DecoderModelInfo* const info = &sequence_header->decoder_model_info;
OBU_READ_LITERAL_OR_FAIL(5);
@@ -293,7 +293,7 @@ bool ObuParser::ParseOperatingParameters(ObuSequenceHeader* sequence_header,
int64_t scratch;
OBU_READ_BIT_OR_FAIL;
sequence_header->decoder_model_present_for_operating_point[index] =
- static_cast<bool>(scratch);
+ scratch != 0;
if (!sequence_header->decoder_model_present_for_operating_point[index]) {
return true;
}
@@ -305,7 +305,7 @@ bool ObuParser::ParseOperatingParameters(ObuSequenceHeader* sequence_header,
sequence_header->decoder_model_info.encoder_decoder_buffer_delay_length);
params->encoder_buffer_delay[index] = static_cast<uint32_t>(scratch);
OBU_READ_BIT_OR_FAIL;
- params->low_delay_mode_flag[index] = static_cast<bool>(scratch);
+ params->low_delay_mode_flag[index] = scratch != 0;
return true;
}
@@ -319,9 +319,9 @@ bool ObuParser::ParseSequenceHeader(bool seen_frame_header) {
}
sequence_header.profile = static_cast<BitstreamProfile>(scratch);
OBU_READ_BIT_OR_FAIL;
- sequence_header.still_picture = static_cast<bool>(scratch);
+ sequence_header.still_picture = scratch != 0;
OBU_READ_BIT_OR_FAIL;
- sequence_header.reduced_still_picture_header = static_cast<bool>(scratch);
+ sequence_header.reduced_still_picture_header = scratch != 0;
if (sequence_header.reduced_still_picture_header) {
if (!sequence_header.still_picture) {
LIBGAV1_DLOG(
@@ -338,7 +338,7 @@ bool ObuParser::ParseSequenceHeader(bool seen_frame_header) {
return false;
}
OBU_READ_BIT_OR_FAIL;
- const auto initial_display_delay_present_flag = static_cast<bool>(scratch);
+ const bool initial_display_delay_present_flag = scratch != 0;
OBU_READ_LITERAL_OR_FAIL(5);
sequence_header.operating_points = static_cast<int>(1 + scratch);
if (operating_point_ >= sequence_header.operating_points) {
@@ -374,7 +374,7 @@ bool ObuParser::ParseSequenceHeader(bool seen_frame_header) {
}
if (initial_display_delay_present_flag) {
OBU_READ_BIT_OR_FAIL;
- if (static_cast<bool>(scratch)) {
+ if (scratch != 0) {
OBU_READ_LITERAL_OR_FAIL(4);
sequence_header.initial_display_delay[i] = 1 + scratch;
}
@@ -391,7 +391,7 @@ bool ObuParser::ParseSequenceHeader(bool seen_frame_header) {
sequence_header.max_frame_height = static_cast<int32_t>(1 + scratch);
if (!sequence_header.reduced_still_picture_header) {
OBU_READ_BIT_OR_FAIL;
- sequence_header.frame_id_numbers_present = static_cast<bool>(scratch);
+ sequence_header.frame_id_numbers_present = scratch != 0;
}
if (sequence_header.frame_id_numbers_present) {
OBU_READ_LITERAL_OR_FAIL(4);
@@ -409,33 +409,33 @@ bool ObuParser::ParseSequenceHeader(bool seen_frame_header) {
}
}
OBU_READ_BIT_OR_FAIL;
- sequence_header.use_128x128_superblock = static_cast<bool>(scratch);
+ sequence_header.use_128x128_superblock = scratch != 0;
OBU_READ_BIT_OR_FAIL;
- sequence_header.enable_filter_intra = static_cast<bool>(scratch);
+ sequence_header.enable_filter_intra = scratch != 0;
OBU_READ_BIT_OR_FAIL;
- sequence_header.enable_intra_edge_filter = static_cast<bool>(scratch);
+ sequence_header.enable_intra_edge_filter = scratch != 0;
if (sequence_header.reduced_still_picture_header) {
sequence_header.force_screen_content_tools = kSelectScreenContentTools;
sequence_header.force_integer_mv = kSelectIntegerMv;
} else {
OBU_READ_BIT_OR_FAIL;
- sequence_header.enable_interintra_compound = static_cast<bool>(scratch);
+ sequence_header.enable_interintra_compound = scratch != 0;
OBU_READ_BIT_OR_FAIL;
- sequence_header.enable_masked_compound = static_cast<bool>(scratch);
+ sequence_header.enable_masked_compound = scratch != 0;
OBU_READ_BIT_OR_FAIL;
- sequence_header.enable_warped_motion = static_cast<bool>(scratch);
+ sequence_header.enable_warped_motion = scratch != 0;
OBU_READ_BIT_OR_FAIL;
- sequence_header.enable_dual_filter = static_cast<bool>(scratch);
+ sequence_header.enable_dual_filter = scratch != 0;
OBU_READ_BIT_OR_FAIL;
- sequence_header.enable_order_hint = static_cast<bool>(scratch);
+ sequence_header.enable_order_hint = scratch != 0;
if (sequence_header.enable_order_hint) {
OBU_READ_BIT_OR_FAIL;
- sequence_header.enable_jnt_comp = static_cast<bool>(scratch);
+ sequence_header.enable_jnt_comp = scratch != 0;
OBU_READ_BIT_OR_FAIL;
- sequence_header.enable_ref_frame_mvs = static_cast<bool>(scratch);
+ sequence_header.enable_ref_frame_mvs = scratch != 0;
}
OBU_READ_BIT_OR_FAIL;
- sequence_header.choose_screen_content_tools = static_cast<bool>(scratch);
+ sequence_header.choose_screen_content_tools = scratch != 0;
if (sequence_header.choose_screen_content_tools) {
sequence_header.force_screen_content_tools = kSelectScreenContentTools;
} else {
@@ -444,7 +444,7 @@ bool ObuParser::ParseSequenceHeader(bool seen_frame_header) {
}
if (sequence_header.force_screen_content_tools > 0) {
OBU_READ_BIT_OR_FAIL;
- sequence_header.choose_integer_mv = static_cast<bool>(scratch);
+ sequence_header.choose_integer_mv = scratch != 0;
if (sequence_header.choose_integer_mv) {
sequence_header.force_integer_mv = kSelectIntegerMv;
} else {
@@ -462,14 +462,14 @@ bool ObuParser::ParseSequenceHeader(bool seen_frame_header) {
}
}
OBU_READ_BIT_OR_FAIL;
- sequence_header.enable_superres = static_cast<bool>(scratch);
+ sequence_header.enable_superres = scratch != 0;
OBU_READ_BIT_OR_FAIL;
- sequence_header.enable_cdef = static_cast<bool>(scratch);
+ sequence_header.enable_cdef = scratch != 0;
OBU_READ_BIT_OR_FAIL;
- sequence_header.enable_restoration = static_cast<bool>(scratch);
+ sequence_header.enable_restoration = scratch != 0;
if (!ParseColorConfig(&sequence_header)) return false;
OBU_READ_BIT_OR_FAIL;
- sequence_header.film_grain_params_present = static_cast<bool>(scratch);
+ sequence_header.film_grain_params_present = scratch != 0;
// Compare new sequence header with old sequence header.
if (has_sequence_header_ &&
sequence_header.ParametersChanged(sequence_header_)) {
@@ -546,7 +546,7 @@ bool ObuParser::ParseFrameSizeAndRenderSize() {
// Render Size.
OBU_READ_BIT_OR_FAIL;
- frame_header_.render_and_frame_size_different = static_cast<bool>(scratch);
+ frame_header_.render_and_frame_size_different = scratch != 0;
if (frame_header_.render_and_frame_size_different) {
OBU_READ_LITERAL_OR_FAIL(16);
frame_header_.render_width = static_cast<int32_t>(1 + scratch);
@@ -567,7 +567,7 @@ bool ObuParser::ParseSuperResParametersAndComputeImageSize() {
frame_header_.use_superres = false;
if (sequence_header_.enable_superres) {
OBU_READ_BIT_OR_FAIL;
- frame_header_.use_superres = static_cast<bool>(scratch);
+ frame_header_.use_superres = scratch != 0;
}
if (frame_header_.use_superres) {
OBU_READ_LITERAL_OR_FAIL(3);
@@ -878,14 +878,14 @@ bool ObuParser::ParseLoopFilterParameters() {
OBU_READ_LITERAL_OR_FAIL(3);
loop_filter->sharpness = scratch;
OBU_READ_BIT_OR_FAIL;
- loop_filter->delta_enabled = static_cast<bool>(scratch);
+ loop_filter->delta_enabled = scratch != 0;
if (loop_filter->delta_enabled) {
OBU_READ_BIT_OR_FAIL;
- loop_filter->delta_update = static_cast<bool>(scratch);
+ loop_filter->delta_update = scratch != 0;
if (loop_filter->delta_update) {
for (auto& ref_delta : loop_filter->ref_deltas) {
OBU_READ_BIT_OR_FAIL;
- const auto update_ref_delta = static_cast<bool>(scratch);
+ const bool update_ref_delta = scratch != 0;
if (update_ref_delta) {
int scratch_int;
if (!bit_reader_->ReadInverseSignedLiteral(6, &scratch_int)) {
@@ -897,7 +897,7 @@ bool ObuParser::ParseLoopFilterParameters() {
}
for (auto& mode_delta : loop_filter->mode_deltas) {
OBU_READ_BIT_OR_FAIL;
- const auto update_mode_delta = static_cast<bool>(scratch);
+ const bool update_mode_delta = scratch != 0;
if (update_mode_delta) {
int scratch_int;
if (!bit_reader_->ReadInverseSignedLiteral(6, &scratch_int)) {
@@ -918,7 +918,7 @@ bool ObuParser::ParseDeltaQuantizer(int8_t* const delta) {
int64_t scratch;
*delta = 0;
OBU_READ_BIT_OR_FAIL;
- const auto delta_coded = static_cast<bool>(scratch);
+ const bool delta_coded = scratch != 0;
if (delta_coded) {
int scratch_int;
if (!bit_reader_->ReadInverseSignedLiteral(6, &scratch_int)) {
@@ -940,7 +940,7 @@ bool ObuParser::ParseQuantizerParameters() {
bool diff_uv_delta = false;
if (sequence_header_.color_config.separate_uv_delta_q) {
OBU_READ_BIT_OR_FAIL;
- diff_uv_delta = static_cast<bool>(scratch);
+ diff_uv_delta = scratch != 0;
}
if (!ParseDeltaQuantizer(&quantizer->delta_dc[kPlaneU]) ||
!ParseDeltaQuantizer(&quantizer->delta_ac[kPlaneU])) {
@@ -957,7 +957,7 @@ bool ObuParser::ParseQuantizerParameters() {
}
}
OBU_READ_BIT_OR_FAIL;
- quantizer->use_matrix = static_cast<bool>(scratch);
+ quantizer->use_matrix = scratch != 0;
if (quantizer->use_matrix) {
OBU_READ_LITERAL_OR_FAIL(4);
quantizer->matrix_level[kPlaneY] = scratch;
@@ -987,20 +987,20 @@ bool ObuParser::ParseSegmentationParameters() {
int64_t scratch;
Segmentation* const segmentation = &frame_header_.segmentation;
OBU_READ_BIT_OR_FAIL;
- segmentation->enabled = static_cast<bool>(scratch);
+ segmentation->enabled = scratch != 0;
if (!segmentation->enabled) return true;
if (frame_header_.primary_reference_frame == kPrimaryReferenceNone) {
segmentation->update_map = true;
segmentation->update_data = true;
} else {
OBU_READ_BIT_OR_FAIL;
- segmentation->update_map = static_cast<bool>(scratch);
+ segmentation->update_map = scratch != 0;
if (segmentation->update_map) {
OBU_READ_BIT_OR_FAIL;
- segmentation->temporal_update = static_cast<bool>(scratch);
+ segmentation->temporal_update = scratch != 0;
}
OBU_READ_BIT_OR_FAIL;
- segmentation->update_data = static_cast<bool>(scratch);
+ segmentation->update_data = scratch != 0;
if (!segmentation->update_data) {
// Part of the load_previous() function in the spec.
const int prev_frame_index =
@@ -1014,7 +1014,7 @@ bool ObuParser::ParseSegmentationParameters() {
for (int8_t i = 0; i < kMaxSegments; ++i) {
for (int8_t j = 0; j < kSegmentFeatureMax; ++j) {
OBU_READ_BIT_OR_FAIL;
- segmentation->feature_enabled[i][j] = static_cast<bool>(scratch);
+ segmentation->feature_enabled[i][j] = scratch != 0;
if (segmentation->feature_enabled[i][j]) {
if (Segmentation::FeatureSigned(static_cast<SegmentFeature>(j))) {
int scratch_int;
@@ -1049,7 +1049,7 @@ bool ObuParser::ParseQuantizerIndexDeltaParameters() {
int64_t scratch;
if (frame_header_.quantizer.base_index > 0) {
OBU_READ_BIT_OR_FAIL;
- frame_header_.delta_q.present = static_cast<bool>(scratch);
+ frame_header_.delta_q.present = scratch != 0;
if (frame_header_.delta_q.present) {
OBU_READ_LITERAL_OR_FAIL(2);
frame_header_.delta_q.scale = scratch;
@@ -1063,13 +1063,13 @@ bool ObuParser::ParseLoopFilterDeltaParameters() {
if (frame_header_.delta_q.present) {
if (!frame_header_.allow_intrabc) {
OBU_READ_BIT_OR_FAIL;
- frame_header_.delta_lf.present = static_cast<bool>(scratch);
+ frame_header_.delta_lf.present = scratch != 0;
}
if (frame_header_.delta_lf.present) {
OBU_READ_LITERAL_OR_FAIL(2);
frame_header_.delta_lf.scale = scratch;
OBU_READ_BIT_OR_FAIL;
- frame_header_.delta_lf.multi = static_cast<bool>(scratch);
+ frame_header_.delta_lf.multi = scratch != 0;
}
}
return true;
@@ -1193,7 +1193,7 @@ bool ObuParser::ParseFrameReferenceModeSyntax() {
int64_t scratch;
if (!IsIntraFrame(frame_header_.frame_type)) {
OBU_READ_BIT_OR_FAIL;
- frame_header_.reference_mode_select = static_cast<bool>(scratch);
+ frame_header_.reference_mode_select = scratch != 0;
}
return true;
}
@@ -1276,7 +1276,7 @@ bool ObuParser::ParseSkipModeParameters() {
if (!IsSkipModeAllowed()) return true;
int64_t scratch;
OBU_READ_BIT_OR_FAIL;
- frame_header_.skip_mode_present = static_cast<bool>(scratch);
+ frame_header_.skip_mode_present = scratch != 0;
return true;
}
@@ -1348,15 +1348,15 @@ bool ObuParser::ParseGlobalMotionParameters() {
GlobalMotion* const global_motion = &frame_header_.global_motion[ref];
int64_t scratch;
OBU_READ_BIT_OR_FAIL;
- const auto is_global = static_cast<bool>(scratch);
+ const bool is_global = scratch != 0;
if (is_global) {
OBU_READ_BIT_OR_FAIL;
- const auto is_rot_zoom = static_cast<bool>(scratch);
+ const bool is_rot_zoom = scratch != 0;
if (is_rot_zoom) {
global_motion->type = kGlobalMotionTransformationTypeRotZoom;
} else {
OBU_READ_BIT_OR_FAIL;
- const auto is_translation = static_cast<bool>(scratch);
+ const bool is_translation = scratch != 0;
global_motion->type = is_translation
? kGlobalMotionTransformationTypeTranslation
: kGlobalMotionTransformationTypeAffine;
@@ -1399,7 +1399,7 @@ bool ObuParser::ParseFilmGrainParameters() {
FilmGrainParams& film_grain_params = frame_header_.film_grain_params;
int64_t scratch;
OBU_READ_BIT_OR_FAIL;
- film_grain_params.apply_grain = static_cast<bool>(scratch);
+ film_grain_params.apply_grain = scratch != 0;
if (!film_grain_params.apply_grain) {
// film_grain_params is already zero-initialized.
return true;
@@ -1410,7 +1410,7 @@ bool ObuParser::ParseFilmGrainParameters() {
film_grain_params.update_grain = true;
if (frame_header_.frame_type == kFrameInter) {
OBU_READ_BIT_OR_FAIL;
- film_grain_params.update_grain = static_cast<bool>(scratch);
+ film_grain_params.update_grain = scratch != 0;
}
if (!film_grain_params.update_grain) {
OBU_READ_LITERAL_OR_FAIL(3);
@@ -1481,7 +1481,7 @@ bool ObuParser::ParseFilmGrainParameters() {
film_grain_params.chroma_scaling_from_luma = false;
} else {
OBU_READ_BIT_OR_FAIL;
- film_grain_params.chroma_scaling_from_luma = static_cast<bool>(scratch);
+ film_grain_params.chroma_scaling_from_luma = scratch != 0;
}
if (sequence_header_.color_config.is_monochrome ||
film_grain_params.chroma_scaling_from_luma ||
@@ -1597,9 +1597,9 @@ bool ObuParser::ParseFilmGrainParameters() {
film_grain_params.v_offset = static_cast<int16_t>(scratch - 256);
}
OBU_READ_BIT_OR_FAIL;
- film_grain_params.overlap_flag = static_cast<bool>(scratch);
+ film_grain_params.overlap_flag = scratch != 0;
OBU_READ_BIT_OR_FAIL;
- film_grain_params.clip_to_restricted_range = static_cast<bool>(scratch);
+ film_grain_params.clip_to_restricted_range = scratch != 0;
return true;
}
@@ -1626,7 +1626,7 @@ bool ObuParser::ParseTileInfoSyntax() {
minlog2_tile_columns, TileLog2(sb_max_tile_area, sb_rows * sb_columns));
int64_t scratch;
OBU_READ_BIT_OR_FAIL;
- tile_info->uniform_spacing = static_cast<bool>(scratch);
+ tile_info->uniform_spacing = scratch != 0;
if (tile_info->uniform_spacing) {
// Read tile columns.
tile_info->tile_columns_log2 = minlog2_tile_columns;
@@ -1759,7 +1759,7 @@ bool ObuParser::ReadAllowWarpedMotion() {
}
int64_t scratch;
OBU_READ_BIT_OR_FAIL;
- frame_header_.allow_warped_motion = static_cast<bool>(scratch);
+ frame_header_.allow_warped_motion = scratch != 0;
return true;
}
@@ -1774,7 +1774,7 @@ bool ObuParser::ParseFrameParameters() {
}
} else {
OBU_READ_BIT_OR_FAIL;
- frame_header_.show_existing_frame = static_cast<bool>(scratch);
+ frame_header_.show_existing_frame = scratch != 0;
if (frame_header_.show_existing_frame) {
OBU_READ_LITERAL_OR_FAIL(3);
frame_header_.frame_to_show = scratch;
@@ -1849,7 +1849,7 @@ bool ObuParser::ParseFrameParameters() {
frame_header_.frame_type = static_cast<FrameType>(scratch);
current_frame_->set_frame_type(frame_header_.frame_type);
OBU_READ_BIT_OR_FAIL;
- frame_header_.show_frame = static_cast<bool>(scratch);
+ frame_header_.show_frame = scratch != 0;
if (frame_header_.show_frame &&
sequence_header_.decoder_model_info_present_flag &&
!sequence_header_.timing_info.equal_picture_interval) {
@@ -1861,7 +1861,7 @@ bool ObuParser::ParseFrameParameters() {
frame_header_.showable_frame = (frame_header_.frame_type != kFrameKey);
} else {
OBU_READ_BIT_OR_FAIL;
- frame_header_.showable_frame = static_cast<bool>(scratch);
+ frame_header_.showable_frame = scratch != 0;
}
current_frame_->set_showable_frame(frame_header_.showable_frame);
if (frame_header_.frame_type == kFrameSwitch ||
@@ -1869,7 +1869,7 @@ bool ObuParser::ParseFrameParameters() {
frame_header_.error_resilient_mode = true;
} else {
OBU_READ_BIT_OR_FAIL;
- frame_header_.error_resilient_mode = static_cast<bool>(scratch);
+ frame_header_.error_resilient_mode = scratch != 0;
}
}
if (frame_header_.frame_type == kFrameKey && frame_header_.show_frame) {
@@ -1877,14 +1877,14 @@ bool ObuParser::ParseFrameParameters() {
decoder_state_.reference_frame.fill(nullptr);
}
OBU_READ_BIT_OR_FAIL;
- frame_header_.enable_cdf_update = !static_cast<bool>(scratch);
+ frame_header_.enable_cdf_update = scratch == 0;
if (sequence_header_.force_screen_content_tools ==
kSelectScreenContentTools) {
OBU_READ_BIT_OR_FAIL;
- frame_header_.allow_screen_content_tools = static_cast<bool>(scratch);
+ frame_header_.allow_screen_content_tools = scratch != 0;
} else {
frame_header_.allow_screen_content_tools =
- static_cast<bool>(sequence_header_.force_screen_content_tools);
+ sequence_header_.force_screen_content_tools != 0;
}
if (frame_header_.allow_screen_content_tools) {
if (sequence_header_.force_integer_mv == kSelectIntegerMv) {
@@ -1934,7 +1934,7 @@ bool ObuParser::ParseFrameParameters() {
frame_header_.frame_size_override_flag = true;
} else if (!sequence_header_.reduced_still_picture_header) {
OBU_READ_BIT_OR_FAIL;
- frame_header_.frame_size_override_flag = static_cast<bool>(scratch);
+ frame_header_.frame_size_override_flag = scratch != 0;
}
if (sequence_header_.order_hint_bits > 0) {
OBU_READ_LITERAL_OR_FAIL(sequence_header_.order_hint_bits);
@@ -1950,7 +1950,7 @@ bool ObuParser::ParseFrameParameters() {
}
if (sequence_header_.decoder_model_info_present_flag) {
OBU_READ_BIT_OR_FAIL;
- const auto buffer_removal_time_present = static_cast<bool>(scratch);
+ const bool buffer_removal_time_present = scratch != 0;
if (buffer_removal_time_present) {
for (int i = 0; i < sequence_header_.operating_points; ++i) {
if (!sequence_header_.decoder_model_present_for_operating_point[i]) {
@@ -1992,14 +1992,14 @@ bool ObuParser::ParseFrameParameters() {
if (frame_header_.allow_screen_content_tools &&
frame_header_.width == frame_header_.upscaled_width) {
OBU_READ_BIT_OR_FAIL;
- frame_header_.allow_intrabc = static_cast<bool>(scratch);
+ frame_header_.allow_intrabc = scratch != 0;
}
} else {
if (!sequence_header_.enable_order_hint) {
frame_header_.frame_refs_short_signaling = false;
} else {
OBU_READ_BIT_OR_FAIL;
- frame_header_.frame_refs_short_signaling = static_cast<bool>(scratch);
+ frame_header_.frame_refs_short_signaling = scratch != 0;
if (frame_header_.frame_refs_short_signaling) {
OBU_READ_LITERAL_OR_FAIL(3);
const int8_t last_frame_idx = scratch;
@@ -2054,7 +2054,7 @@ bool ObuParser::ParseFrameParameters() {
// Section 5.9.7.
for (int index : frame_header_.reference_frame_index) {
OBU_READ_BIT_OR_FAIL;
- frame_header_.found_reference = static_cast<bool>(scratch);
+ frame_header_.found_reference = scratch != 0;
if (frame_header_.found_reference) {
const RefCountedBuffer* reference_frame =
decoder_state_.reference_frame[index].get();
@@ -2079,10 +2079,10 @@ bool ObuParser::ParseFrameParameters() {
frame_header_.allow_high_precision_mv = false;
} else {
OBU_READ_BIT_OR_FAIL;
- frame_header_.allow_high_precision_mv = static_cast<bool>(scratch);
+ frame_header_.allow_high_precision_mv = scratch != 0;
}
OBU_READ_BIT_OR_FAIL;
- const auto is_filter_switchable = static_cast<bool>(scratch);
+ const bool is_filter_switchable = scratch != 0;
if (is_filter_switchable) {
frame_header_.interpolation_filter = kInterpolationFilterSwitchable;
} else {
@@ -2091,13 +2091,13 @@ bool ObuParser::ParseFrameParameters() {
static_cast<InterpolationFilter>(scratch);
}
OBU_READ_BIT_OR_FAIL;
- frame_header_.is_motion_mode_switchable = static_cast<bool>(scratch);
+ frame_header_.is_motion_mode_switchable = scratch != 0;
if (frame_header_.error_resilient_mode ||
!sequence_header_.enable_ref_frame_mvs) {
frame_header_.use_ref_frame_mvs = false;
} else {
OBU_READ_BIT_OR_FAIL;
- frame_header_.use_ref_frame_mvs = static_cast<bool>(scratch);
+ frame_header_.use_ref_frame_mvs = scratch != 0;
}
}
// At this point, we have parsed the frame and render sizes and computed
@@ -2151,7 +2151,7 @@ bool ObuParser::ParseFrameParameters() {
if (frame_header_.enable_cdf_update &&
!sequence_header_.reduced_still_picture_header) {
OBU_READ_BIT_OR_FAIL;
- frame_header_.enable_frame_end_update_cdf = !static_cast<bool>(scratch);
+ frame_header_.enable_frame_end_update_cdf = scratch == 0;
} else {
frame_header_.enable_frame_end_update_cdf = false;
}
@@ -2189,7 +2189,7 @@ bool ObuParser::ParseFrameHeader() {
if (!status) return false;
int64_t scratch;
OBU_READ_BIT_OR_FAIL;
- frame_header_.reduced_tx_set = static_cast<bool>(scratch);
+ frame_header_.reduced_tx_set = scratch != 0;
status = ParseGlobalMotionParameters();
if (!status) return false;
current_frame_->SetGlobalMotions(frame_header_.global_motion);
@@ -2236,16 +2236,13 @@ bool ObuParser::ParseMetadataScalability() {
const auto spatial_layers_count = static_cast<int>(scratch) + 1;
// spatial_layer_dimensions_present_flag
OBU_READ_BIT_OR_FAIL;
- const auto spatial_layer_dimensions_present_flag =
- static_cast<bool>(scratch);
+ const auto spatial_layer_dimensions_present_flag = scratch != 0;
// spatial_layer_description_present_flag
OBU_READ_BIT_OR_FAIL;
- const auto spatial_layer_description_present_flag =
- static_cast<bool>(scratch);
+ const auto spatial_layer_description_present_flag = scratch != 0;
// temporal_group_description_present_flag
OBU_READ_BIT_OR_FAIL;
- const auto temporal_group_description_present_flag =
- static_cast<bool>(scratch);
+ const auto temporal_group_description_present_flag = scratch != 0;
// scalability_structure_reserved_3bits
OBU_READ_LITERAL_OR_FAIL(3);
if (scratch != 0) {
@@ -2297,7 +2294,7 @@ bool ObuParser::ParseMetadataTimecode() {
OBU_READ_LITERAL_OR_FAIL(5);
// full_timestamp_flag
OBU_READ_BIT_OR_FAIL;
- const auto full_timestamp_flag = static_cast<bool>(scratch);
+ const bool full_timestamp_flag = scratch != 0;
// discontinuity_flag
OBU_READ_BIT_OR_FAIL;
// cnt_dropped_flag
@@ -2329,7 +2326,7 @@ bool ObuParser::ParseMetadataTimecode() {
} else {
// seconds_flag
OBU_READ_BIT_OR_FAIL;
- const auto seconds_flag = static_cast<bool>(scratch);
+ const bool seconds_flag = scratch != 0;
if (seconds_flag) {
// seconds_value
OBU_READ_LITERAL_OR_FAIL(6);
@@ -2340,7 +2337,7 @@ bool ObuParser::ParseMetadataTimecode() {
}
// minutes_flag
OBU_READ_BIT_OR_FAIL;
- const auto minutes_flag = static_cast<bool>(scratch);
+ const bool minutes_flag = scratch != 0;
if (minutes_flag) {
// minutes_value
OBU_READ_LITERAL_OR_FAIL(6);
@@ -2351,7 +2348,7 @@ bool ObuParser::ParseMetadataTimecode() {
}
// hours_flag
OBU_READ_BIT_OR_FAIL;
- const auto hours_flag = static_cast<bool>(scratch);
+ const bool hours_flag = scratch != 0;
if (hours_flag) {
// hours_value
OBU_READ_LITERAL_OR_FAIL(5);
@@ -2560,7 +2557,7 @@ bool ObuParser::ParseTileGroup(size_t size, size_t bytes_consumed_so_far) {
}
int64_t scratch;
OBU_READ_BIT_OR_FAIL;
- const auto tile_start_and_end_present_flag = static_cast<bool>(scratch);
+ const bool tile_start_and_end_present_flag = scratch != 0;
if (!tile_start_and_end_present_flag) {
if (!bit_reader_->AlignToNextByte()) {
LIBGAV1_DLOG(ERROR, "Byte alignment has non zero bits.");
@@ -2600,9 +2597,9 @@ bool ObuParser::ParseHeader() {
OBU_READ_LITERAL_OR_FAIL(4);
obu_header.type = static_cast<libgav1::ObuType>(scratch);
OBU_READ_BIT_OR_FAIL;
- const auto extension_flag = static_cast<bool>(scratch);
+ const bool extension_flag = scratch != 0;
OBU_READ_BIT_OR_FAIL;
- obu_header.has_size_field = static_cast<bool>(scratch);
+ obu_header.has_size_field = scratch != 0;
OBU_READ_BIT_OR_FAIL; // reserved.
if (scratch != 0) {
LIBGAV1_DLOG(WARNING, "obu_reserved_1bit is not zero.");
diff --git a/src/obu_parser.h b/src/obu_parser.h
index c4619ed..3f452ef 100644
--- a/src/obu_parser.h
+++ b/src/obu_parser.h
@@ -22,6 +22,7 @@
#include <cstdint>
#include <memory>
#include <type_traits>
+#include <utility>
#include "src/buffer_pool.h"
#include "src/decoder_state.h"
diff --git a/src/obu_parser_test.cc b/src/obu_parser_test.cc
new file mode 100644
index 0000000..6397ad0
--- /dev/null
+++ b/src/obu_parser_test.cc
@@ -0,0 +1,2675 @@
+// Copyright 2021 The libgav1 Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "src/obu_parser.h"
+
+#include <array>
+#include <cstddef>
+#include <cstdint>
+#include <cstring>
+#include <memory>
+#include <new>
+#include <string>
+#include <vector>
+
+#include "gtest/gtest.h"
+#include "src/buffer_pool.h"
+#include "src/decoder_impl.h"
+#include "src/decoder_state.h"
+#include "src/gav1/decoder_buffer.h"
+#include "src/gav1/status_code.h"
+#include "src/utils/common.h"
+#include "src/utils/constants.h"
+#include "src/utils/segmentation.h"
+#include "src/utils/types.h"
+#include "src/utils/vector.h"
+#include "tests/third_party/libvpx/acm_random.h"
+
+// Note the following test classes access private functions/members of
+// ObuParser. To be declared friends of ObuParser they must not have internal
+// linkage (they must be outside the anonymous namespace).
+namespace libgav1 {
+
+// Helper class to manipulate individual bits and generate a byte string.
+class BytesAndBits {
+ public:
+ // Append a bit to the end.
+ void AppendBit(uint8_t bit) { bits_.push_back(bit != 0); }
+
+ // Append a byte to the end.
+ void AppendByte(uint8_t byte) {
+ for (int i = 0; i < 8; ++i) {
+ AppendBit(GetNthBit(byte, i, 8));
+ }
+ }
+
+ // Append a literal of size |bits| to the end.
+ void AppendLiteral(int bits, int value) {
+ InsertLiteral(static_cast<int>(bits_.size()), bits, value);
+ }
+
+ // Append an inverse signed literal to the end. |bits + 1| bits are appended.
+ void AppendInverseSignedLiteral(int bits, int value) {
+ InsertInverseSignedLiteral(static_cast<int>(bits_.size()), bits, value);
+ }
+
+ // Append a sequence of bytes to the end.
+ void AppendBytes(const std::vector<uint8_t>& bytes) {
+ for (const auto& byte : bytes) {
+ AppendByte(byte);
+ }
+ }
+
+ // Insert |bit| in |offset|. Moves all other bits to the right by 1.
+ void InsertBit(int offset, uint8_t bit) {
+ auto iterator = bits_.begin();
+ bits_.insert(iterator + offset, bit != 0);
+ }
+
+ // Insert |value| of size |bits| at offset |offset|. Moves all other bits to
+ // the right by |bits|.
+ void InsertLiteral(int offset, int bits, int value) {
+ for (int i = 0; i < bits; ++i) {
+ InsertBit(i + offset, GetNthBit(value, i, bits));
+ }
+ }
+
+ // Insert |value| of size |bits| at offset |offset| as an inverse signed
+ // literal. Move all other bits to the right by |bits + 1|.
+ //
+ // Note: This is denoted su(1+bits) in the spec.
+ void InsertInverseSignedLiteral(int offset, int bits, int value) {
+ InsertBit(offset, (value >= 0) ? 0 : 1);
+ InsertLiteral(offset + 1, bits, value);
+ }
+
+ // Insert |value| at |offset| as an unsigned variable length code (uvlc).
+ // Return the number of bits inserted.
+ int InsertUvlc(int offset, int value) {
+ int leading_zeros = 1;
+ int shift_value = ++value;
+ while ((shift_value >>= 1) != 0) leading_zeros += 2;
+ int bits = 0;
+ InsertLiteral(offset, leading_zeros >> 1, 0);
+ bits += leading_zeros >> 1;
+ InsertLiteral(offset + bits, (leading_zeros + 1) >> 1, value);
+ bits += (leading_zeros + 1) >> 1;
+ return bits;
+ }
+
+ // Set the bit at |offset| to |bit|. The bit should already exist.
+ void SetBit(int offset, uint8_t bit) { bits_[offset] = bit != 0; }
+
+ // Set |bits| starting at |offset| to |value|. The bits should already exist.
+ void SetLiteral(int offset, int bits, int value) {
+ for (int i = 0; i < bits; ++i) {
+ SetBit(offset + i, GetNthBit(value, i, bits));
+ }
+ }
+
+ // Remove a bit in |offset|. Moves over all the following bits to the left by
+ // 1.
+ void RemoveBit(int offset) { RemoveLiteral(offset, 1); }
+
+ // Remove a literal of size |bits| from |offset|. Moves over all the
+ // following bits to the left by |bits|.
+ void RemoveLiteral(int offset, int bits) {
+ bits_.erase(bits_.begin() + offset, bits_.begin() + offset + bits);
+ }
+
+ // Remove all bits after offset.
+ void RemoveAllBitsAfter(int offset) {
+ RemoveLiteral(offset, static_cast<int>(bits_.size()) - offset);
+ }
+
+ // Clear all the bits stored.
+ void Clear() { bits_.clear(); }
+
+ // Generate the data vector from the bits. Pads 0 to the end of the last byte
+ // if necessary.
+ const std::vector<uint8_t>& GenerateData() {
+ data_.clear();
+ for (size_t i = 0; i < bits_.size(); i += 8) {
+ uint8_t byte = 0;
+ for (int j = 0; j < 8; ++j) {
+ const uint8_t bit =
+ ((i + j) < bits_.size()) ? static_cast<uint8_t>(bits_[i + j]) : 0;
+ byte |= bit << (7 - j);
+ }
+ data_.push_back(byte);
+ }
+ return data_;
+ }
+
+ private:
+ // Get the |n|th MSB from |value| with the assumption that |value| has |size|
+ // bits.
+ static uint8_t GetNthBit(int value, int n, int size) {
+ return (value >> (size - n - 1)) & 0x01;
+ }
+
+ std::vector<uint8_t> data_;
+ std::vector<bool> bits_;
+};
+
+class ObuParserTest : public testing::Test {
+ protected:
+ // Constants for unit tests.
+ static constexpr int kFrameWidthBits = 9;
+ static constexpr int kFrameHeightBits = 8;
+ static constexpr int kHeight = 240;
+ static constexpr int kWidth = 426;
+ static constexpr int kRows4x4 = 60;
+ static constexpr int kColumns4x4 = 108;
+ static constexpr int kFrameToShow = 2;
+ static constexpr int kDisplayFrameId = 10;
+ static constexpr int kFrameIdLengthBits = 15;
+ static constexpr int kDeltaFrameIdLengthBits = 14;
+
+ // Bit streams for testing. These may contain trailing bits and tests may have
+ // to remove some of the trailing bits to keep the boundary alignment.
+ const std::vector<uint8_t> kDefaultTemporalDelimiter = {0x12, 0x00};
+ // Bits Syntax element Value
+ // 1 obu_forbidden_bit 0
+ // 4 obu_type 2 (OBU_TEMPORAL_DELIMITER)
+ // 1 obu_extension_flag 1
+ // 1 obu_has_size_field 1
+ // 1 obu_reserved_1bit 0
+ // 3 temporal_id 6
+ // 2 spatial_id 2
+ // 3 extension_header_reserved_3bits 0
+ // 8 obu_size 0
+ const std::vector<uint8_t> kDefaultTemporalDelimiterWithExtension = {
+ 0x16, 0xd0, 0x00};
+ const std::vector<uint8_t> kDefaultHeaderWithoutSizeField = {0x10};
+ // Offset Bits Syntax element Value
+ // 0 3 seq_profile 0
+ // 3 1 still_picture 0
+ // 4 1 reduced_still_picture_header 0
+ // 5 1 timing_info_present_flag 0
+ // 6 1 initial_display_delay_present_flag 0
+ // 7 5 operating_points_cnt_minus_1 0
+ // 12 12 operating_point_idc[ 0 ] 0
+ // 24 5 seq_level_idx[ 0 ] 0
+ // 29 4 frame_width_bits_minus_1 8
+ // 33 4 frame_height_bits_minus_1 7
+ // 37 9 max_frame_width_minus_1 425
+ // 46 8 max_frame_height_minus_1 239
+ // 54 1 frame_id_numbers_present_flag 0
+ // 55 1 use_128x128_superblock 1
+ // 56 1 enable_filter_intra 1
+ // 57 1 enable_intra_edge_filter 1
+ // 58 1 enable_interintra_compound 1
+ // 59 1 enable_masked_compound 1
+ // 60 1 enable_warped_motion 0
+ // 61 1 enable_dual_filter 1
+ // 62 1 enable_order_hint 1
+ // 63 1 enable_jnt_comp 1
+ // 64 1 enable_ref_frame_mvs 1
+ // 65 1 seq_choose_screen_content_tools 1
+ // 66 1 seq_choose_integer_mv 1
+ // 67 3 order_hint_bits_minus_1 6
+ // 70 1 enable_superres 0
+ // 71 1 enable_cdef 1
+ // 72 1 enable_restoration 1
+ // ...
+ const std::vector<uint8_t> kDefaultSequenceHeader = {
+ 0x00, 0x00, 0x00, 0x04, 0x3e, 0xa7, 0xbd, 0xf7, 0xf9, 0x80, 0x40};
+ const std::vector<uint8_t> kDefaultFrameHeaderKeyFrame = {0x10, 0x00};
+ // Bits Syntax element Value
+ // 1 show_existing_frame 0
+ // 2 frame_type 2 (kFrameIntraOnly)
+ // 1 show_frame 1
+ // 1 error_resilient_mode 0
+ // 1 disable_cdf_update 0
+ // 1 frame_size_override_flag 0
+ // 8 refresh_frame_flags 4
+ // ...
+ const std::vector<uint8_t> kDefaultFrameHeaderIntraOnlyFrame = {0x50, 0x08,
+ 0x00};
+ // Bits Syntax element Value
+ // 1 show_existing_frame 0
+ // 2 frame_type 1 (kFrameInter)
+ // 1 show_frame 1
+ // 1 error_resilient_mode 0
+ // 1 disable_cdf_update 0
+ // 1 frame_size_override_flag 0
+ // 3 primary_ref_frame 1
+ // 8 refresh_frame_flags 4
+ // 3 ref_frame_idx[0] 0
+ // 3 ref_frame_idx[1] 1
+ // 3 ref_frame_idx[2] 2
+ // 3 ref_frame_idx[3] 3
+ // 3 ref_frame_idx[4] 4
+ // 3 ref_frame_idx[5] 5
+ // 3 ref_frame_idx[6] 6
+ // ...
+ const std::vector<uint8_t> kDefaultFrameHeaderInterFrame = {0x30, 0x41, 0x01,
+ 0x4e, 0x5c, 0x60};
+ const std::vector<uint8_t> kDefaultGlobalMotionParametersRotZoom = {
+ 0xff, 0x50, 0x77, 0x7e, 0x1f, 0xcd};
+ const std::vector<uint8_t> kDefaultGlobalMotionParametersAffine = {
+ 0x3f, 0x50, 0x77, 0x7b, 0xbf, 0xa8, 0x3e, 0x1f, 0xcd};
+
+ void SetUp() override {
+ buffer_pool_.reset(new (std::nothrow)
+ BufferPool(nullptr, nullptr, nullptr, nullptr));
+ ASSERT_NE(buffer_pool_, nullptr);
+ }
+
+ bool Init() {
+ obu_.reset(new (std::nothrow) ObuParser(nullptr, 0, 0, buffer_pool_.get(),
+ &decoder_state_));
+ if (obu_ == nullptr) return false;
+ obu_headers_ = &obu_->obu_headers_;
+ obu_frame_header_ = &obu_->frame_header_;
+ obu_sequence_header_ = &obu_->sequence_header_;
+ return true;
+ }
+
+ bool Init(const std::vector<uint8_t>& data, bool init_bit_reader = true) {
+ obu_.reset(new (std::nothrow) ObuParser(
+ data.data(), data.size(), 0, buffer_pool_.get(), &decoder_state_));
+ if (obu_ == nullptr) return false;
+ obu_headers_ = &obu_->obu_headers_;
+ obu_frame_header_ = &obu_->frame_header_;
+ obu_sequence_header_ = &obu_->sequence_header_;
+ return init_bit_reader ? obu_->InitBitReader(data.data(), data.size())
+ : true;
+ }
+
+ bool Parse(const std::string& input,
+ const ObuSequenceHeader* const sequence_header = nullptr) {
+ std::vector<uint8_t> data(input.begin(), input.end());
+ return Parse(data, sequence_header);
+ }
+
+ bool Parse(const std::vector<uint8_t>& data,
+ const ObuSequenceHeader* const sequence_header = nullptr) {
+ EXPECT_TRUE(Init(data, false));
+ if (sequence_header != nullptr) obu_->set_sequence_header(*sequence_header);
+ return obu_->ParseOneFrame(&current_frame_) == kStatusOk;
+ }
+
+ bool ParseSequenceHeader(const std::vector<uint8_t>& data) {
+ EXPECT_TRUE(Init(data));
+ return obu_->ParseSequenceHeader(/*seen_frame_header=*/false);
+ }
+
+ bool ParseFrameParameters(const std::vector<uint8_t>& data,
+ bool id_bits_present = false,
+ int force_screen_content_tools = 0,
+ int force_integer_mv = 0,
+ bool enable_superres = false) {
+ EXPECT_TRUE(Init(data));
+ if (id_bits_present) {
+ obu_->sequence_header_.frame_id_numbers_present = true;
+ obu_->sequence_header_.frame_id_length_bits = kFrameIdLengthBits;
+ obu_->sequence_header_.delta_frame_id_length_bits =
+ kDeltaFrameIdLengthBits;
+ }
+ obu_->sequence_header_.force_screen_content_tools =
+ force_screen_content_tools;
+ obu_->sequence_header_.force_integer_mv = force_integer_mv;
+ obu_->sequence_header_.enable_superres = enable_superres;
+ obu_->sequence_header_.frame_width_bits = kFrameWidthBits;
+ obu_->sequence_header_.frame_height_bits = kFrameHeightBits;
+ obu_->sequence_header_.max_frame_width = kWidth;
+ obu_->sequence_header_.max_frame_height = kHeight;
+ return obu_->ParseFrameParameters();
+ }
+
+ bool ParseSegmentationParameters(const std::vector<uint8_t>& data,
+ int primary_reference_frame,
+ int prev_frame_index) {
+ EXPECT_TRUE(Init(data));
+ obu_->frame_header_.primary_reference_frame = primary_reference_frame;
+ if (primary_reference_frame != kPrimaryReferenceNone) {
+ obu_->frame_header_.reference_frame_index[primary_reference_frame] =
+ prev_frame_index;
+ }
+ return obu_->ParseSegmentationParameters();
+ }
+
+ bool ParseFrameReferenceModeSyntax(const std::vector<uint8_t>& data,
+ FrameType frame_type) {
+ EXPECT_TRUE(Init(data));
+ obu_->frame_header_.frame_type = frame_type;
+ return obu_->ParseFrameReferenceModeSyntax();
+ }
+
+ bool ParseGlobalMotionParameters(const std::vector<uint8_t>& data,
+ FrameType frame_type) {
+ EXPECT_TRUE(Init(data));
+ obu_->frame_header_.frame_type = frame_type;
+ obu_->frame_header_.primary_reference_frame = kPrimaryReferenceNone;
+ return obu_->ParseGlobalMotionParameters();
+ }
+
+ bool ParseFilmGrainParameters(const std::vector<uint8_t>& data,
+ const ObuSequenceHeader& sequence_header,
+ const ObuFrameHeader& frame_header) {
+ EXPECT_TRUE(Init(data));
+ obu_->set_sequence_header(sequence_header);
+ obu_->frame_header_ = frame_header;
+ return obu_->ParseFilmGrainParameters();
+ }
+
+ bool ParseTileInfoSyntax(const std::vector<uint8_t>& data, int columns4x4,
+ int rows4x4, bool use_128x128_superblock) {
+ EXPECT_TRUE(Init(data));
+ obu_->frame_header_.columns4x4 = columns4x4;
+ obu_->frame_header_.rows4x4 = rows4x4;
+ obu_->sequence_header_.use_128x128_superblock = use_128x128_superblock;
+ return obu_->ParseTileInfoSyntax();
+ }
+
+ bool ParseMetadata(const std::vector<uint8_t>& data) {
+ EXPECT_TRUE(Init(data));
+ return obu_->ParseMetadata(data.data(), data.size());
+ }
+
+ void DefaultSequenceHeader(ObuSequenceHeader* const gold) {
+ memset(gold, 0, sizeof(*gold));
+ gold->profile = kProfile0;
+ gold->level[0].major = kMinimumMajorBitstreamLevel;
+ gold->operating_points = 1;
+ gold->max_frame_width = kWidth;
+ gold->max_frame_height = kHeight;
+ gold->frame_width_bits = kFrameWidthBits;
+ gold->frame_height_bits = kFrameHeightBits;
+ gold->use_128x128_superblock = true;
+ gold->enable_filter_intra = true;
+ gold->enable_intra_edge_filter = true;
+ gold->enable_interintra_compound = true;
+ gold->enable_masked_compound = true;
+ gold->enable_dual_filter = true;
+ gold->enable_order_hint = true;
+ gold->enable_jnt_comp = true;
+ gold->enable_ref_frame_mvs = true;
+ gold->choose_screen_content_tools = true;
+ gold->force_screen_content_tools = 2;
+ gold->choose_integer_mv = true;
+ gold->force_integer_mv = 2;
+ gold->order_hint_bits = 7;
+ gold->enable_cdef = true;
+ gold->enable_restoration = true;
+ gold->color_config.bitdepth = 8;
+ gold->color_config.color_primary = kColorPrimaryUnspecified;
+ gold->color_config.transfer_characteristics =
+ kTransferCharacteristicsUnspecified;
+ gold->color_config.matrix_coefficients = kMatrixCoefficientsUnspecified;
+ gold->color_config.subsampling_x = 1;
+ gold->color_config.subsampling_y = 1;
+ }
+
+ void DefaultFrameHeader(ObuFrameHeader* const gold, FrameType frame_type) {
+ memset(gold, 0, sizeof(*gold));
+ gold->frame_type = frame_type;
+ gold->show_frame = true;
+ gold->showable_frame = (frame_type != kFrameKey);
+ gold->enable_cdf_update = true;
+ gold->width = kWidth;
+ gold->height = kHeight;
+ gold->render_width = kWidth;
+ gold->render_height = kHeight;
+ gold->upscaled_width = kWidth;
+ gold->primary_reference_frame = kPrimaryReferenceNone;
+ gold->enable_frame_end_update_cdf = true;
+ gold->rows4x4 = kRows4x4;
+ gold->columns4x4 = kColumns4x4;
+ if (frame_type == kFrameKey) {
+ gold->refresh_frame_flags = 0xff;
+ gold->error_resilient_mode = true;
+ gold->force_integer_mv = 1;
+ } else if (frame_type == kFrameIntraOnly) {
+ gold->refresh_frame_flags = 4;
+ gold->force_integer_mv = 1;
+ } else if (frame_type == kFrameInter) {
+ gold->refresh_frame_flags = 4;
+ gold->primary_reference_frame = 1;
+ for (int i = 0; i < kNumInterReferenceFrameTypes; ++i) {
+ gold->reference_frame_index[i] = i;
+ }
+ gold->is_motion_mode_switchable = true;
+ }
+ }
+
+ void OverrideFrameSize(BytesAndBits* const data, ObuFrameHeader* const gold,
+ int flag_offset, int size_offset) {
+ data->SetBit(flag_offset, 1); // frame_size_override_flag.
+ data->InsertLiteral(size_offset, kFrameWidthBits,
+ kWidth - 2); // frame_width_minus_1.
+ data->InsertLiteral(size_offset + kFrameWidthBits, kFrameHeightBits,
+ kHeight - 2); // frame_height_minus_1.
+ gold->frame_size_override_flag = true;
+ gold->width = kWidth - 1;
+ gold->height = kHeight - 1;
+ gold->render_width = gold->width;
+ gold->render_height = gold->height;
+ gold->upscaled_width = gold->width;
+ }
+
+ void OverrideRenderSize(BytesAndBits* const data, ObuFrameHeader* const gold,
+ int flag_offset) {
+ data->SetBit(flag_offset, 1); // render_and_frame_size_different.
+ data->InsertLiteral(flag_offset + 1, 16,
+ kWidth - 10); // render_width_minus_1.
+ data->InsertLiteral(flag_offset + 17, 16,
+ kHeight - 10); // render_height_minus_1.
+ gold->render_width = kWidth - 9;
+ gold->render_height = kHeight - 9;
+ gold->render_and_frame_size_different = true;
+ }
+
+ void OverrideSegmentation(BytesAndBits* const data, Segmentation* const gold,
+ int offset) {
+ gold->update_data = true;
+ data->SetBit(offset++, static_cast<uint8_t>(gold->update_data));
+ libvpx_test::ACMRandom rnd(libvpx_test::ACMRandom::DeterministicSeed());
+ gold->segment_id_pre_skip = false;
+ gold->last_active_segment_id = 0;
+ for (int i = 0; i < kMaxSegments; ++i) {
+ for (int j = 0; j < kSegmentFeatureMax; ++j) {
+ gold->feature_enabled[i][j] = static_cast<bool>(rnd.Rand8() & 1);
+ data->InsertBit(offset++,
+ static_cast<uint8_t>(gold->feature_enabled[i][j]));
+ if (gold->feature_enabled[i][j]) {
+ gold->feature_data[i][j] = rnd(1 << kSegmentationFeatureBits[j]);
+ if (Segmentation::FeatureSigned(static_cast<SegmentFeature>(j))) {
+ if (static_cast<bool>(rnd.Rand8() & 1)) {
+ gold->feature_data[i][j] *= -1;
+ }
+ data->InsertInverseSignedLiteral(
+ offset, kSegmentationFeatureBits[j], gold->feature_data[i][j]);
+ offset += kSegmentationFeatureBits[j] + 1;
+ } else {
+ data->InsertLiteral(offset, kSegmentationFeatureBits[j],
+ gold->feature_data[i][j]);
+ offset += kSegmentationFeatureBits[j];
+ }
+ gold->last_active_segment_id = i;
+ if (j >= kSegmentFeatureReferenceFrame) {
+ gold->segment_id_pre_skip = true;
+ }
+ }
+ }
+ }
+ }
+
+ void VerifyObuHeader(bool extension) {
+ EXPECT_EQ(obu_->obu_headers().back().temporal_id, extension ? 6 : 0);
+ EXPECT_EQ(obu_->obu_headers().back().spatial_id, extension ? 2 : 0);
+ }
+
+#define OBU_TEST_COMPARE(x) EXPECT_EQ(expected.x, actual.x)
+ void VerifyFrameParameters(const ObuFrameHeader& expected,
+ bool id_bits_present = false) {
+ const ObuFrameHeader& actual = obu_->frame_header();
+ OBU_TEST_COMPARE(show_existing_frame);
+ if (actual.show_existing_frame) {
+ OBU_TEST_COMPARE(frame_to_show);
+ OBU_TEST_COMPARE(frame_presentation_time);
+ if (id_bits_present) {
+ OBU_TEST_COMPARE(display_frame_id);
+ }
+ return;
+ }
+ OBU_TEST_COMPARE(frame_type);
+ OBU_TEST_COMPARE(show_frame);
+ OBU_TEST_COMPARE(frame_presentation_time);
+ OBU_TEST_COMPARE(showable_frame);
+ OBU_TEST_COMPARE(error_resilient_mode);
+ OBU_TEST_COMPARE(enable_cdf_update);
+ OBU_TEST_COMPARE(current_frame_id);
+ OBU_TEST_COMPARE(frame_size_override_flag);
+ OBU_TEST_COMPARE(order_hint);
+ for (int i = 0; i < kNumReferenceFrameTypes; ++i) {
+ OBU_TEST_COMPARE(reference_order_hint[i]);
+ }
+ OBU_TEST_COMPARE(primary_reference_frame);
+ OBU_TEST_COMPARE(width);
+ OBU_TEST_COMPARE(height);
+ OBU_TEST_COMPARE(render_and_frame_size_different);
+ OBU_TEST_COMPARE(render_width);
+ OBU_TEST_COMPARE(render_height);
+ OBU_TEST_COMPARE(upscaled_width);
+ OBU_TEST_COMPARE(coded_lossless);
+ OBU_TEST_COMPARE(upscaled_lossless);
+ OBU_TEST_COMPARE(allow_screen_content_tools);
+ OBU_TEST_COMPARE(is_motion_mode_switchable);
+ OBU_TEST_COMPARE(refresh_frame_flags);
+ OBU_TEST_COMPARE(enable_frame_end_update_cdf);
+ OBU_TEST_COMPARE(force_integer_mv);
+ if (actual.frame_type == kFrameInter) {
+ for (int i = 0; i < kNumInterReferenceFrameTypes; ++i) {
+ OBU_TEST_COMPARE(reference_frame_index[i]);
+ }
+ }
+ OBU_TEST_COMPARE(use_superres);
+ OBU_TEST_COMPARE(rows4x4);
+ OBU_TEST_COMPARE(columns4x4);
+ }
+
+ void VerifyLoopFilterParameters(const LoopFilter& expected) {
+ const LoopFilter& actual = obu_->frame_header().loop_filter;
+ for (int i = 0; i < 4; ++i) {
+ OBU_TEST_COMPARE(level[i]);
+ }
+ OBU_TEST_COMPARE(sharpness);
+ OBU_TEST_COMPARE(delta_enabled);
+ OBU_TEST_COMPARE(delta_update);
+ for (int i = 0; i < kNumReferenceFrameTypes; ++i) {
+ OBU_TEST_COMPARE(ref_deltas[i]);
+ }
+ for (int i = 0; i < kLoopFilterMaxModeDeltas; ++i) {
+ OBU_TEST_COMPARE(mode_deltas[i]);
+ }
+ }
+
+ void VerifyQuantizerParameters(const QuantizerParameters& expected) {
+ const QuantizerParameters& actual = obu_->frame_header().quantizer;
+ OBU_TEST_COMPARE(base_index);
+ OBU_TEST_COMPARE(delta_dc[kPlaneY]);
+ OBU_TEST_COMPARE(delta_dc[kPlaneU]);
+ OBU_TEST_COMPARE(delta_dc[kPlaneV]);
+ EXPECT_EQ(0, actual.delta_ac[kPlaneY]);
+ OBU_TEST_COMPARE(delta_ac[kPlaneY]);
+ OBU_TEST_COMPARE(delta_ac[kPlaneU]);
+ OBU_TEST_COMPARE(delta_ac[kPlaneV]);
+ OBU_TEST_COMPARE(use_matrix);
+ OBU_TEST_COMPARE(matrix_level[kPlaneY]);
+ OBU_TEST_COMPARE(matrix_level[kPlaneU]);
+ OBU_TEST_COMPARE(matrix_level[kPlaneV]);
+ }
+
+ void VerifySegmentationParameters(const Segmentation& expected) {
+ const Segmentation& actual = obu_->frame_header().segmentation;
+ OBU_TEST_COMPARE(enabled);
+ OBU_TEST_COMPARE(update_map);
+ OBU_TEST_COMPARE(update_data);
+ OBU_TEST_COMPARE(temporal_update);
+ OBU_TEST_COMPARE(segment_id_pre_skip);
+ OBU_TEST_COMPARE(last_active_segment_id);
+ for (int i = 0; i < kMaxSegments; ++i) {
+ for (int j = 0; j < kSegmentFeatureMax; ++j) {
+ OBU_TEST_COMPARE(feature_enabled[i][j]);
+ OBU_TEST_COMPARE(feature_data[i][j]);
+ }
+ }
+ }
+
+ void VerifyDeltaParameters(const Delta& expected, const Delta& actual) {
+ OBU_TEST_COMPARE(present);
+ OBU_TEST_COMPARE(scale);
+ OBU_TEST_COMPARE(multi);
+ }
+
+ void VerifyCdefParameters(const Cdef& expected) {
+ const Cdef& actual = obu_->frame_header().cdef;
+ OBU_TEST_COMPARE(damping);
+ OBU_TEST_COMPARE(bits);
+ for (int i = 0; i < (1 << actual.bits); ++i) {
+ OBU_TEST_COMPARE(y_primary_strength[i]);
+ OBU_TEST_COMPARE(y_secondary_strength[i]);
+ OBU_TEST_COMPARE(uv_primary_strength[i]);
+ OBU_TEST_COMPARE(uv_secondary_strength[i]);
+ }
+ }
+
+ void VerifyLoopRestorationParameters(const LoopRestoration& expected) {
+ const LoopRestoration& actual = obu_->frame_header().loop_restoration;
+ for (int i = 0; i < kMaxPlanes; ++i) {
+ OBU_TEST_COMPARE(type[i]);
+ OBU_TEST_COMPARE(unit_size_log2[i]);
+ }
+ }
+
+ void VerifyGlobalMotionParameters(
+ const std::array<GlobalMotion, kNumReferenceFrameTypes>& gold) {
+ for (int i = kReferenceFrameLast; i <= kReferenceFrameAlternate; ++i) {
+ const GlobalMotion& expected = gold[i];
+ const GlobalMotion& actual = obu_->frame_header().global_motion[i];
+ OBU_TEST_COMPARE(type) << " i: " << i;
+ for (int j = 0; j < 6; ++j) {
+ OBU_TEST_COMPARE(params[j]) << " i: " << i << " j: " << j;
+ }
+ }
+ }
+
+ void VerifyFilmGrainParameters(const FilmGrainParams& expected) {
+ const FilmGrainParams& actual = obu_->frame_header().film_grain_params;
+ OBU_TEST_COMPARE(apply_grain);
+ OBU_TEST_COMPARE(update_grain);
+ OBU_TEST_COMPARE(chroma_scaling_from_luma);
+ OBU_TEST_COMPARE(overlap_flag);
+ OBU_TEST_COMPARE(clip_to_restricted_range);
+ OBU_TEST_COMPARE(num_y_points);
+ OBU_TEST_COMPARE(num_u_points);
+ OBU_TEST_COMPARE(num_v_points);
+ for (int i = 0; i < 14; ++i) {
+ OBU_TEST_COMPARE(point_y_value[i]);
+ OBU_TEST_COMPARE(point_y_scaling[i]);
+ }
+ for (int i = 0; i < 10; ++i) {
+ OBU_TEST_COMPARE(point_u_value[i]);
+ OBU_TEST_COMPARE(point_u_scaling[i]);
+ }
+ for (int i = 0; i < 10; ++i) {
+ OBU_TEST_COMPARE(point_v_value[i]);
+ OBU_TEST_COMPARE(point_v_scaling[i]);
+ }
+ OBU_TEST_COMPARE(chroma_scaling);
+ OBU_TEST_COMPARE(auto_regression_coeff_lag);
+ for (int i = 0; i < 24; ++i) {
+ OBU_TEST_COMPARE(auto_regression_coeff_y[i]);
+ }
+ for (int i = 0; i < 25; ++i) {
+ OBU_TEST_COMPARE(auto_regression_coeff_u[i]);
+ }
+ for (int i = 0; i < 25; ++i) {
+ OBU_TEST_COMPARE(auto_regression_coeff_v[i]);
+ }
+ OBU_TEST_COMPARE(auto_regression_shift);
+ OBU_TEST_COMPARE(grain_seed);
+ OBU_TEST_COMPARE(reference_index);
+ OBU_TEST_COMPARE(grain_scale_shift);
+ OBU_TEST_COMPARE(u_multiplier);
+ OBU_TEST_COMPARE(u_luma_multiplier);
+ OBU_TEST_COMPARE(u_offset);
+ OBU_TEST_COMPARE(v_multiplier);
+ OBU_TEST_COMPARE(v_luma_multiplier);
+ OBU_TEST_COMPARE(v_offset);
+ }
+
+ void VerifyTileInfoParameters(const TileInfo& expected) {
+ const TileInfo& actual = obu_->frame_header().tile_info;
+ OBU_TEST_COMPARE(uniform_spacing);
+ OBU_TEST_COMPARE(tile_columns_log2);
+ OBU_TEST_COMPARE(tile_columns);
+ for (int i = 0; i < kMaxTileColumns + 1; ++i) {
+ OBU_TEST_COMPARE(tile_column_start[i]) << "tile_column: " << i;
+ OBU_TEST_COMPARE(tile_column_width_in_superblocks[i])
+ << "tile_column: " << i;
+ }
+ OBU_TEST_COMPARE(tile_rows_log2);
+ OBU_TEST_COMPARE(tile_rows);
+ for (int i = 0; i < kMaxTileRows + 1; ++i) {
+ OBU_TEST_COMPARE(tile_row_start[i]) << "tile_row: " << i;
+ OBU_TEST_COMPARE(tile_row_height_in_superblocks[i]) << "tile_rows: " << i;
+ }
+ OBU_TEST_COMPARE(tile_count);
+ OBU_TEST_COMPARE(context_update_id);
+ OBU_TEST_COMPARE(tile_size_bytes);
+ }
+
+ void VerifySequenceHeader(const ObuSequenceHeader& expected) {
+ EXPECT_TRUE(obu_->sequence_header_changed());
+ const ObuSequenceHeader& actual = obu_->sequence_header();
+ OBU_TEST_COMPARE(profile);
+ OBU_TEST_COMPARE(still_picture);
+ OBU_TEST_COMPARE(reduced_still_picture_header);
+ OBU_TEST_COMPARE(operating_points);
+ for (int i = 0; i < actual.operating_points; ++i) {
+ OBU_TEST_COMPARE(operating_point_idc[i]) << "i: " << i;
+ OBU_TEST_COMPARE(level[i].major) << "i: " << i;
+ OBU_TEST_COMPARE(level[i].minor) << "i: " << i;
+ OBU_TEST_COMPARE(tier[i]) << "i: " << i;
+ }
+ OBU_TEST_COMPARE(frame_width_bits);
+ OBU_TEST_COMPARE(frame_height_bits);
+ OBU_TEST_COMPARE(max_frame_width);
+ OBU_TEST_COMPARE(max_frame_height);
+ OBU_TEST_COMPARE(frame_id_numbers_present);
+ if (actual.frame_id_numbers_present) {
+ OBU_TEST_COMPARE(frame_id_length_bits);
+ OBU_TEST_COMPARE(delta_frame_id_length_bits);
+ }
+ OBU_TEST_COMPARE(use_128x128_superblock);
+ OBU_TEST_COMPARE(enable_filter_intra);
+ OBU_TEST_COMPARE(enable_intra_edge_filter);
+ OBU_TEST_COMPARE(enable_interintra_compound);
+ OBU_TEST_COMPARE(enable_masked_compound);
+ OBU_TEST_COMPARE(enable_warped_motion);
+ OBU_TEST_COMPARE(enable_dual_filter);
+ OBU_TEST_COMPARE(enable_order_hint);
+ OBU_TEST_COMPARE(enable_jnt_comp);
+ OBU_TEST_COMPARE(enable_ref_frame_mvs);
+ OBU_TEST_COMPARE(choose_screen_content_tools);
+ OBU_TEST_COMPARE(force_screen_content_tools);
+ OBU_TEST_COMPARE(choose_integer_mv);
+ OBU_TEST_COMPARE(force_integer_mv);
+ OBU_TEST_COMPARE(order_hint_bits);
+ OBU_TEST_COMPARE(enable_superres);
+ OBU_TEST_COMPARE(enable_cdef);
+ OBU_TEST_COMPARE(enable_restoration);
+ OBU_TEST_COMPARE(color_config.bitdepth);
+ OBU_TEST_COMPARE(color_config.is_monochrome);
+ OBU_TEST_COMPARE(color_config.color_range);
+ OBU_TEST_COMPARE(color_config.subsampling_x);
+ OBU_TEST_COMPARE(color_config.subsampling_y);
+ OBU_TEST_COMPARE(color_config.chroma_sample_position);
+ OBU_TEST_COMPARE(timing_info_present_flag);
+ OBU_TEST_COMPARE(timing_info.num_units_in_tick);
+ OBU_TEST_COMPARE(timing_info.time_scale);
+ OBU_TEST_COMPARE(timing_info.equal_picture_interval);
+ OBU_TEST_COMPARE(timing_info.num_ticks_per_picture);
+ OBU_TEST_COMPARE(decoder_model_info_present_flag);
+ OBU_TEST_COMPARE(decoder_model_info.encoder_decoder_buffer_delay_length);
+ OBU_TEST_COMPARE(decoder_model_info.num_units_in_decoding_tick);
+ OBU_TEST_COMPARE(decoder_model_info.buffer_removal_time_length);
+ OBU_TEST_COMPARE(decoder_model_info.frame_presentation_time_length);
+ for (int i = 0; i < actual.operating_points; ++i) {
+ SCOPED_TRACE("i: " + std::to_string(i));
+ OBU_TEST_COMPARE(operating_parameters.decoder_buffer_delay[i]);
+ OBU_TEST_COMPARE(operating_parameters.encoder_buffer_delay[i]);
+ OBU_TEST_COMPARE(operating_parameters.low_delay_mode_flag[i]);
+ OBU_TEST_COMPARE(initial_display_delay[i]);
+ }
+ OBU_TEST_COMPARE(film_grain_params_present);
+ }
+
+ void VerifyMetadata(MetadataType type, const ObuMetadata& expected) {
+ const ObuMetadata& actual = obu_->metadata();
+ switch (type) {
+ case kMetadataTypeHdrContentLightLevel:
+ OBU_TEST_COMPARE(max_cll);
+ OBU_TEST_COMPARE(max_fall);
+ break;
+ case kMetadataTypeHdrMasteringDisplayColorVolume:
+ for (int i = 0; i < 3; ++i) {
+ OBU_TEST_COMPARE(primary_chromaticity_x[i]);
+ OBU_TEST_COMPARE(primary_chromaticity_y[i]);
+ }
+ OBU_TEST_COMPARE(white_point_chromaticity_x);
+ OBU_TEST_COMPARE(white_point_chromaticity_y);
+ OBU_TEST_COMPARE(luminance_max);
+ OBU_TEST_COMPARE(luminance_min);
+ break;
+ case kMetadataTypeScalability:
+ break;
+ case kMetadataTypeItutT35:
+ OBU_TEST_COMPARE(itu_t_t35_country_code);
+ OBU_TEST_COMPARE(itu_t_t35_country_code_extension_byte);
+ ASSERT_EQ(expected.itu_t_t35_payload_size,
+ actual.itu_t_t35_payload_size);
+ if (actual.itu_t_t35_payload_size != 0) {
+ EXPECT_EQ(memcmp(expected.itu_t_t35_payload_bytes.get(),
+ actual.itu_t_t35_payload_bytes.get(),
+ actual.itu_t_t35_payload_size),
+ 0);
+ }
+ break;
+ case kMetadataTypeTimecode:
+ break;
+ }
+ }
+
+#undef OBU_TEST_COMPARE
+
+ // Accessors to private members of ObuParser. This avoids the need for a
+ // dependency on a googletest header in the main library for FRIEND_TEST()
+ // (or the need to duplicate the implementation).
+ bool ObuParseFrameParameters() { return obu_->ParseFrameParameters(); }
+ bool ObuParseLoopFilterParameters() {
+ return obu_->ParseLoopFilterParameters();
+ }
+ bool ObuParseLoopFilterDeltaParameters() {
+ return obu_->ParseLoopFilterDeltaParameters();
+ }
+ bool ObuParseQuantizerParameters() {
+ return obu_->ParseQuantizerParameters();
+ }
+ bool ObuParseQuantizerIndexDeltaParameters() {
+ return obu_->ParseQuantizerIndexDeltaParameters();
+ }
+ void ObuComputeSegmentLosslessAndQIndex() {
+ obu_->ComputeSegmentLosslessAndQIndex();
+ }
+ bool ObuParseCdefParameters() { return obu_->ParseCdefParameters(); }
+ bool ObuParseLoopRestorationParameters() {
+ return obu_->ParseLoopRestorationParameters();
+ }
+ bool ObuParseTxModeSyntax() { return obu_->ParseTxModeSyntax(); }
+ bool ObuIsSkipModeAllowed() { return obu_->IsSkipModeAllowed(); }
+ bool ObuParseSkipModeParameters() { return obu_->ParseSkipModeParameters(); }
+ bool ObuReadAllowWarpedMotion() { return obu_->ReadAllowWarpedMotion(); }
+ bool ObuSetFrameReferences(int8_t last_frame_idx, int8_t gold_frame_idx) {
+ return obu_->SetFrameReferences(last_frame_idx, gold_frame_idx);
+ }
+
+ std::unique_ptr<BufferPool> buffer_pool_;
+ DecoderState decoder_state_;
+ std::unique_ptr<ObuParser> obu_;
+ // The following members are reset with each Init().
+ Vector<ObuHeader>* obu_headers_;
+ ObuFrameHeader* obu_frame_header_;
+ ObuSequenceHeader* obu_sequence_header_;
+ RefCountedBufferPtr current_frame_;
+};
+
+TEST_F(ObuParserTest, InvalidInputs) {
+ obu_.reset(new (std::nothrow)
+ ObuParser(nullptr, 0, 0, buffer_pool_.get(), &decoder_state_));
+ EXPECT_EQ(obu_->ParseOneFrame(&current_frame_), kStatusInvalidArgument);
+ obu_.reset(new (std::nothrow) ObuParser(nullptr, 10, 0, buffer_pool_.get(),
+ &decoder_state_));
+ EXPECT_EQ(obu_->ParseOneFrame(&current_frame_), kStatusInvalidArgument);
+ obu_.reset(new (std::nothrow)
+ ObuParser(kDefaultTemporalDelimiter.data(), 0, 0,
+ buffer_pool_.get(), &decoder_state_));
+ EXPECT_EQ(obu_->ParseOneFrame(&current_frame_), kStatusInvalidArgument);
+}
+
+TEST_F(ObuParserTest, TemporalDelimiter) {
+ BytesAndBits data;
+ data.AppendBytes(kDefaultTemporalDelimiter);
+
+ ASSERT_TRUE(Parse(data.GenerateData()));
+ EXPECT_EQ(obu_->obu_headers().size(), 1);
+ EXPECT_EQ(obu_->obu_headers().back().type, kObuTemporalDelimiter);
+ VerifyObuHeader(false);
+
+ // forbidden_bit is not zero.
+ data.SetBit(0, 1);
+ EXPECT_FALSE(Parse(data.GenerateData()));
+}
+
+TEST_F(ObuParserTest, HeaderExtensions) {
+ BytesAndBits data;
+ data.AppendBytes(kDefaultTemporalDelimiterWithExtension);
+
+ ASSERT_TRUE(Parse(data.GenerateData()));
+ EXPECT_EQ(obu_->obu_headers().size(), 1);
+ EXPECT_EQ(obu_->obu_headers().back().type, kObuTemporalDelimiter);
+ VerifyObuHeader(true);
+
+ // extension flag is set but no extensions found.
+ data.Clear();
+ data.AppendByte(kDefaultTemporalDelimiterWithExtension[0]);
+ EXPECT_FALSE(Parse(data.GenerateData()));
+}
+
+TEST_F(ObuParserTest, HeaderHasSizeFieldNotSet) {
+ BytesAndBits data;
+ data.AppendBytes(kDefaultHeaderWithoutSizeField);
+
+ EXPECT_FALSE(Parse(data.GenerateData()));
+}
+
+TEST_F(ObuParserTest, SequenceHeader) {
+ BytesAndBits data;
+ data.AppendBytes(kDefaultSequenceHeader);
+ ObuSequenceHeader gold;
+ DefaultSequenceHeader(&gold);
+
+ ASSERT_TRUE(ParseSequenceHeader(data.GenerateData()));
+ VerifySequenceHeader(gold);
+}
+
+TEST_F(ObuParserTest, SequenceHeaderLevel) {
+ BytesAndBits data;
+ data.AppendBytes(kDefaultSequenceHeader);
+ ObuSequenceHeader gold;
+ DefaultSequenceHeader(&gold);
+
+ // Set level to 1.
+ gold.level[0].major = 2;
+ gold.level[0].minor = 1;
+ data.SetLiteral(24, 5, 1); // level.
+
+ ASSERT_TRUE(ParseSequenceHeader(data.GenerateData()));
+ VerifySequenceHeader(gold);
+
+ // Set operating_point_idc of operating point 1 to 0x101 (temporal layer 0
+ // and spatial layer 0 should be decoded). Set level of operating point 1 to
+ // 8 (4.0) and tier to 1.
+ gold.operating_points = 2;
+ gold.operating_point_idc[1] = (1 << 0) | (1 << (0 + 8));
+ gold.level[1].major = 4;
+ gold.level[1].minor = 0;
+ gold.tier[1] = 1;
+ data.SetLiteral(7, 5, gold.operating_points - 1);
+ data.InsertLiteral(29, 12, 0x101); // operating_point_idc.
+ data.InsertLiteral(41, 5, 8); // level.
+ data.InsertBit(46, gold.tier[1]);
+
+ ASSERT_TRUE(ParseSequenceHeader(data.GenerateData()));
+ VerifySequenceHeader(gold);
+}
+
+TEST_F(ObuParserTest, SequenceHeaderProfile) {
+ BytesAndBits data;
+ data.AppendBytes(kDefaultSequenceHeader);
+ ObuSequenceHeader gold;
+ DefaultSequenceHeader(&gold);
+
+ gold.still_picture = true;
+ data.SetBit(3, static_cast<uint8_t>(gold.still_picture));
+
+ ASSERT_TRUE(ParseSequenceHeader(data.GenerateData()));
+ VerifySequenceHeader(gold);
+
+ // profile 2; bitdepth 8;
+ gold.profile = kProfile2;
+ gold.color_config.bitdepth = 8;
+ gold.color_config.subsampling_x = 1;
+ gold.color_config.subsampling_y = 0;
+ data.SetLiteral(0, 3, gold.profile);
+
+ ASSERT_TRUE(ParseSequenceHeader(data.GenerateData()));
+ VerifySequenceHeader(gold);
+
+ // profile 2; bitdepth 10;
+ gold.color_config.bitdepth = 10;
+ data.SetBit(73, 1); // high_bitdepth.
+ data.InsertBit(74, 0); // twelve_bit.
+
+ ASSERT_TRUE(ParseSequenceHeader(data.GenerateData()));
+ VerifySequenceHeader(gold);
+
+ // profile 2; bitdepth 12;
+ gold.color_config.bitdepth = 12;
+ gold.color_config.subsampling_y = 1;
+ data.SetBit(74, 1); // twelve_bit.
+ data.InsertBit(78, 1); // subsampling_x.
+ data.InsertBit(79, 1); // subsampling_y.
+
+ ASSERT_TRUE(ParseSequenceHeader(data.GenerateData()));
+ VerifySequenceHeader(gold);
+}
+
+TEST_F(ObuParserTest, SequenceHeaderIdLength) {
+ BytesAndBits data;
+ data.AppendBytes(kDefaultSequenceHeader);
+ ObuSequenceHeader gold;
+ DefaultSequenceHeader(&gold);
+
+ gold.frame_id_numbers_present = true;
+ gold.delta_frame_id_length_bits = kDeltaFrameIdLengthBits;
+ gold.frame_id_length_bits = kFrameIdLengthBits;
+ data.SetBit(54, 1); // frame_id_numbers_present.
+ data.InsertLiteral(55, 4, kDeltaFrameIdLengthBits - 2);
+ data.InsertLiteral(59, 3, kFrameIdLengthBits - kDeltaFrameIdLengthBits - 1);
+
+ ASSERT_TRUE(ParseSequenceHeader(data.GenerateData()));
+ VerifySequenceHeader(gold);
+}
+
+// An idLen greater than 16 is invalid.
+TEST_F(ObuParserTest, SequenceHeaderIdLengthInvalid) {
+ BytesAndBits data;
+ data.AppendBytes(kDefaultSequenceHeader);
+
+ data.SetBit(54, 1); // frame_id_numbers_present.
+ data.InsertLiteral(55, 4, kDeltaFrameIdLengthBits - 2);
+ data.InsertLiteral(59, 3, 17 - kDeltaFrameIdLengthBits - 1); // idLen = 17.
+
+ ASSERT_FALSE(ParseSequenceHeader(data.GenerateData()));
+}
+
+TEST_F(ObuParserTest, SequenceHeaderFlags) {
+ BytesAndBits data;
+ data.AppendBytes(kDefaultSequenceHeader);
+ ObuSequenceHeader gold;
+ DefaultSequenceHeader(&gold);
+
+ gold.enable_warped_motion = true;
+ gold.enable_superres = true;
+ data.SetBit(60, 1); // enable_warped_motion.
+ data.SetBit(70, 1); // enable_superres.
+
+ ASSERT_TRUE(ParseSequenceHeader(data.GenerateData()));
+ VerifySequenceHeader(gold);
+}
+
+TEST_F(ObuParserTest, SequenceHeaderForceScreenContentToolsEqualTo0) {
+ BytesAndBits data;
+ data.AppendBytes(kDefaultSequenceHeader);
+ ObuSequenceHeader gold;
+ DefaultSequenceHeader(&gold);
+
+ gold.choose_screen_content_tools = false;
+ gold.force_screen_content_tools = 0;
+ gold.choose_integer_mv = false;
+ gold.force_integer_mv = 2;
+ data.SetBit(65, 0); // choose_screen_content_tools.
+ data.SetBit(66, 0); // force_screen_content_tools.
+
+ ASSERT_TRUE(ParseSequenceHeader(data.GenerateData()));
+ VerifySequenceHeader(gold);
+}
+
+TEST_F(ObuParserTest, SequenceHeaderMonochrome) {
+ BytesAndBits data;
+ data.AppendBytes(kDefaultSequenceHeader);
+ ObuSequenceHeader gold;
+ DefaultSequenceHeader(&gold);
+
+ gold.color_config.is_monochrome = true;
+ gold.color_config.color_range = kColorRangeFull;
+ data.SetBit(74, 1); // monochrome.
+ data.InsertBit(76, 1); // color_range.
+
+ ASSERT_TRUE(ParseSequenceHeader(data.GenerateData()));
+ VerifySequenceHeader(gold);
+}
+
+// This tests TimingInfo, DecoderModelInfo and OperatingParameters. The test is
+// kind of long but it is the simplest way to test all three since they are
+// dependent on one another.
+TEST_F(ObuParserTest, SequenceHeaderTimingInfo) {
+ BytesAndBits data;
+ data.AppendBytes(kDefaultSequenceHeader);
+ ObuSequenceHeader gold;
+ DefaultSequenceHeader(&gold);
+
+ gold.timing_info_present_flag = true;
+ gold.timing_info.num_units_in_tick = 100;
+ gold.timing_info.time_scale = 1000;
+ gold.timing_info.equal_picture_interval = false;
+ gold.decoder_model_info_present_flag = false;
+ data.SetBit(5, static_cast<uint8_t>(gold.timing_info_present_flag));
+ data.InsertLiteral(6, 32, gold.timing_info.num_units_in_tick);
+ data.InsertLiteral(38, 32, gold.timing_info.time_scale);
+ data.InsertBit(70,
+ static_cast<uint8_t>(gold.timing_info.equal_picture_interval));
+ data.InsertBit(71,
+ static_cast<uint8_t>(gold.decoder_model_info_present_flag));
+
+ ASSERT_TRUE(ParseSequenceHeader(data.GenerateData()));
+ VerifySequenceHeader(gold);
+
+ gold.timing_info.equal_picture_interval = true;
+ gold.timing_info.num_ticks_per_picture = 7;
+ data.SetBit(70,
+ static_cast<uint8_t>(gold.timing_info.equal_picture_interval));
+ EXPECT_EQ(data.InsertUvlc(71, gold.timing_info.num_ticks_per_picture - 1), 5);
+
+ ASSERT_TRUE(ParseSequenceHeader(data.GenerateData()));
+ VerifySequenceHeader(gold);
+
+ gold.decoder_model_info_present_flag = true;
+ gold.decoder_model_info.encoder_decoder_buffer_delay_length = 5;
+ gold.decoder_model_info.num_units_in_decoding_tick = 1000;
+ gold.decoder_model_info.buffer_removal_time_length = 18;
+ gold.decoder_model_info.frame_presentation_time_length = 20;
+
+ data.SetBit(76, static_cast<uint8_t>(gold.decoder_model_info_present_flag));
+ data.InsertLiteral(
+ 77, 5, gold.decoder_model_info.encoder_decoder_buffer_delay_length - 1);
+ data.InsertLiteral(82, 32,
+ gold.decoder_model_info.num_units_in_decoding_tick);
+ data.InsertLiteral(114, 5,
+ gold.decoder_model_info.buffer_removal_time_length - 1);
+ data.InsertLiteral(
+ 119, 5, gold.decoder_model_info.frame_presentation_time_length - 1);
+ data.InsertBit(147, 0); // decoder_model_present_for_this_op.
+
+ ASSERT_TRUE(ParseSequenceHeader(data.GenerateData()));
+ VerifySequenceHeader(gold);
+
+ gold.operating_parameters.decoder_buffer_delay[0] = 10;
+ gold.operating_parameters.encoder_buffer_delay[0] = 20;
+ gold.operating_parameters.low_delay_mode_flag[0] = true;
+
+ data.SetBit(147, 1); // decoder_model_present_for_this_op.
+ data.InsertLiteral(
+ 148, gold.decoder_model_info.encoder_decoder_buffer_delay_length,
+ gold.operating_parameters.decoder_buffer_delay[0]);
+ data.InsertLiteral(
+ 153, gold.decoder_model_info.encoder_decoder_buffer_delay_length,
+ gold.operating_parameters.encoder_buffer_delay[0]);
+ data.InsertBit(158, static_cast<uint8_t>(
+ gold.operating_parameters.low_delay_mode_flag[0]));
+
+ ASSERT_TRUE(ParseSequenceHeader(data.GenerateData()));
+ VerifySequenceHeader(gold);
+}
+
+TEST_F(ObuParserTest, SequenceHeaderInitialDisplayDelay) {
+ BytesAndBits data;
+ data.AppendBytes(kDefaultSequenceHeader);
+ ObuSequenceHeader gold;
+ DefaultSequenceHeader(&gold);
+
+ gold.initial_display_delay[0] = 8;
+
+ data.SetBit(6, 1); // initial_display_delay_present_flag.
+ data.InsertBit(29, 1); // initial_display_delay_present_for_this_op.
+ data.InsertLiteral(30, 4, gold.initial_display_delay[0] - 1);
+
+ ASSERT_TRUE(ParseSequenceHeader(data.GenerateData()));
+ VerifySequenceHeader(gold);
+}
+
+// Parsing of a frame header should fail if no sequence header has been
+// received.
+TEST_F(ObuParserTest, FrameHeaderWithoutSequenceHeader) {
+ // The aom-test-data test vector av1-1-b8-01-size-16x16.ivf has two temporal
+ // units. The first temporal unit has a presentation timestamp of 0 and
+ // consists of three OBUs: a temporal delimiter OBU, a sequence header OBU,
+ // and a frame OBU.
+ const std::vector<uint8_t> kTemporalDelimiter = {0x12, 0x00};
+ const std::vector<uint8_t> kSequenceHeader = {
+ 0x0a, 0x0a, 0x00, 0x00, 0x00, 0x01, 0x9f, 0xfb, 0xff, 0xf3, 0x00, 0x80};
+ const std::vector<uint8_t> kFrame = {
+ 0x32, 0xa6, 0x01, 0x10, 0x00, 0x87, 0x80, 0x00, 0x03, 0x00, 0x00, 0x00,
+ 0x40, 0x00, 0x9e, 0x86, 0x5b, 0xb2, 0x22, 0xb5, 0x58, 0x4d, 0x68, 0xe6,
+ 0x37, 0x54, 0x42, 0x7b, 0x84, 0xce, 0xdf, 0x9f, 0xec, 0xab, 0x07, 0x4d,
+ 0xf6, 0xe1, 0x5e, 0x9e, 0x27, 0xbf, 0x93, 0x2f, 0x47, 0x0d, 0x7b, 0x7c,
+ 0x45, 0x8d, 0xcf, 0x26, 0xf7, 0x6c, 0x06, 0xd7, 0x8c, 0x2e, 0xf5, 0x2c,
+ 0xb0, 0x8a, 0x31, 0xac, 0x69, 0xf5, 0xcd, 0xd8, 0x71, 0x5d, 0xaf, 0xf8,
+ 0x96, 0x43, 0x8c, 0x9c, 0x23, 0x6f, 0xab, 0xd0, 0x35, 0x43, 0xdf, 0x81,
+ 0x12, 0xe3, 0x7d, 0xec, 0x22, 0xb0, 0x30, 0x54, 0x32, 0x9f, 0x90, 0xc0,
+ 0x5d, 0x64, 0x9b, 0x0f, 0x75, 0x31, 0x84, 0x3a, 0x57, 0xd7, 0x5f, 0x03,
+ 0x6e, 0x7f, 0x43, 0x17, 0x6d, 0x08, 0xc3, 0x81, 0x8a, 0xae, 0x73, 0x1c,
+ 0xa8, 0xa7, 0xe4, 0x9c, 0xa9, 0x5b, 0x3f, 0xd1, 0xeb, 0x75, 0x3a, 0x7f,
+ 0x22, 0x77, 0x38, 0x64, 0x1c, 0x77, 0xdb, 0xcd, 0xef, 0xb7, 0x08, 0x45,
+ 0x8e, 0x7f, 0xea, 0xa3, 0xd0, 0x81, 0xc9, 0xc1, 0xbc, 0x93, 0x9b, 0x41,
+ 0xb1, 0xa1, 0x42, 0x17, 0x98, 0x3f, 0x1e, 0x95, 0xdf, 0x68, 0x7c, 0xb7,
+ 0x98};
+
+ BytesAndBits data;
+ data.AppendBytes(kTemporalDelimiter);
+ // Skip the sequence header OBU.
+ data.AppendBytes(kFrame);
+ ASSERT_FALSE(Parse(data.GenerateData()));
+
+ // Now verify that all three OBUs are correct, by adding them to |data|
+ // successively.
+ data.Clear();
+ data.AppendBytes(kTemporalDelimiter);
+ ASSERT_TRUE(Parse(data.GenerateData()));
+ data.Clear();
+ data.AppendBytes(kTemporalDelimiter);
+ data.AppendBytes(kSequenceHeader);
+ ASSERT_TRUE(Parse(data.GenerateData()));
+ data.Clear();
+ data.AppendBytes(kTemporalDelimiter);
+ data.AppendBytes(kSequenceHeader);
+ data.AppendBytes(kFrame);
+ ASSERT_TRUE(Parse(data.GenerateData()));
+}
+
+TEST_F(ObuParserTest, FrameParameterShowExistingFrame) {
+ BytesAndBits data;
+ data.AppendBit(1); // show_existing_frame.
+ data.AppendLiteral(3, kFrameToShow); // frame_to_show.
+ ObuFrameHeader gold;
+ DefaultFrameHeader(&gold, kFrameKey);
+ gold.show_existing_frame = true;
+ gold.frame_to_show = kFrameToShow;
+
+ // kFrameToShow'th frame is not yet decoded.
+ ASSERT_FALSE(ParseFrameParameters(data.GenerateData()));
+
+ decoder_state_.reference_frame[kFrameToShow] = buffer_pool_->GetFreeBuffer();
+ // kFrameToShow'th frame is not a showable frame.
+ ASSERT_FALSE(ParseFrameParameters(data.GenerateData()));
+
+ decoder_state_.reference_frame[kFrameToShow]->set_showable_frame(true);
+ ASSERT_TRUE(ParseFrameParameters(data.GenerateData()));
+ VerifyFrameParameters(gold);
+}
+
+TEST_F(ObuParserTest, FrameParametersShowExistingFrameWithDisplayFrameId) {
+ BytesAndBits data;
+ data.AppendBit(1); // show_existing_frame.
+ data.AppendLiteral(3, kFrameToShow); // frame_to_show.
+ data.AppendLiteral(15, kDisplayFrameId); // display_frame_id.
+ ObuFrameHeader gold;
+ DefaultFrameHeader(&gold, kFrameKey);
+ gold.show_existing_frame = true;
+ gold.frame_to_show = kFrameToShow;
+ gold.display_frame_id = kDisplayFrameId;
+
+ // kFrameToShow'th frame is not yet decoded.
+ ASSERT_FALSE(ParseFrameParameters(data.GenerateData(), true));
+
+ decoder_state_.reference_frame_id[kFrameToShow] = kDisplayFrameId;
+ decoder_state_.reference_frame[kFrameToShow] = buffer_pool_->GetFreeBuffer();
+ // kFrameToShow'th frame is not a showable frame.
+ ASSERT_FALSE(ParseFrameParameters(data.GenerateData(), true));
+
+ decoder_state_.reference_frame[kFrameToShow]->set_showable_frame(true);
+ ASSERT_TRUE(ParseFrameParameters(data.GenerateData(), true));
+ VerifyFrameParameters(gold, true);
+}
+
+TEST_F(ObuParserTest, FrameParameterShowExistingFrameTemporalPointInfo) {
+ BytesAndBits data;
+ data.AppendBit(1); // show_existing_frame.
+ data.AppendLiteral(3, kFrameToShow); // frame_to_show.
+ data.AppendLiteral(20, 38); // frame_presentation_time.
+ ObuFrameHeader gold;
+ DefaultFrameHeader(&gold, kFrameKey);
+ gold.show_existing_frame = true;
+ gold.frame_to_show = kFrameToShow;
+ gold.frame_presentation_time = 38;
+
+ EXPECT_TRUE(Init(data.GenerateData()));
+ obu_sequence_header_->frame_width_bits = kFrameWidthBits;
+ obu_sequence_header_->frame_height_bits = kFrameHeightBits;
+ obu_sequence_header_->max_frame_width = kWidth;
+ obu_sequence_header_->max_frame_height = kHeight;
+
+ obu_sequence_header_->decoder_model_info_present_flag = true;
+ obu_sequence_header_->decoder_model_info.frame_presentation_time_length = 20;
+
+ decoder_state_.reference_frame[kFrameToShow] = buffer_pool_->GetFreeBuffer();
+ decoder_state_.reference_frame[kFrameToShow]->set_showable_frame(true);
+
+ ASSERT_TRUE(ObuParseFrameParameters());
+ VerifyFrameParameters(gold);
+}
+
+TEST_F(ObuParserTest, FrameParameterErrorResilientMode) {
+ BytesAndBits data;
+ data.AppendBytes(kDefaultFrameHeaderIntraOnlyFrame);
+ ObuFrameHeader gold;
+ DefaultFrameHeader(&gold, kFrameIntraOnly);
+
+ gold.error_resilient_mode = true;
+ data.SetBit(4, static_cast<uint8_t>(gold.error_resilient_mode));
+
+ ASSERT_TRUE(ParseFrameParameters(data.GenerateData()));
+ VerifyFrameParameters(gold);
+}
+
+TEST_F(ObuParserTest, FrameParameterKeyFrame) {
+ BytesAndBits data;
+ data.AppendBytes(kDefaultFrameHeaderKeyFrame);
+ ObuFrameHeader gold;
+ DefaultFrameHeader(&gold, kFrameKey);
+
+ ASSERT_TRUE(ParseFrameParameters(data.GenerateData()));
+ VerifyFrameParameters(gold);
+}
+
+TEST_F(ObuParserTest, FrameParameterKeyFrameTemporalPointInfo) {
+ BytesAndBits data;
+ data.AppendBytes(kDefaultFrameHeaderKeyFrame);
+ ObuFrameHeader gold;
+ DefaultFrameHeader(&gold, kFrameKey);
+
+ data.InsertLiteral(4, 20, 38); // frame_presentation_time.
+ gold.frame_presentation_time = 38;
+
+ EXPECT_TRUE(Init(data.GenerateData()));
+ obu_sequence_header_->frame_width_bits = kFrameWidthBits;
+ obu_sequence_header_->frame_height_bits = kFrameHeightBits;
+ obu_sequence_header_->max_frame_width = kWidth;
+ obu_sequence_header_->max_frame_height = kHeight;
+
+ obu_sequence_header_->decoder_model_info_present_flag = true;
+ obu_sequence_header_->decoder_model_info.frame_presentation_time_length = 20;
+
+ ASSERT_TRUE(ObuParseFrameParameters());
+ VerifyFrameParameters(gold);
+}
+
+TEST_F(ObuParserTest, FrameParameterKeyFrameOverrideSize) {
+ BytesAndBits data;
+ data.AppendBytes(kDefaultFrameHeaderKeyFrame);
+ ObuFrameHeader gold;
+ DefaultFrameHeader(&gold, kFrameKey);
+
+ OverrideFrameSize(&data, &gold, 5, 6);
+
+ ASSERT_TRUE(ParseFrameParameters(data.GenerateData()));
+ VerifyFrameParameters(gold);
+
+ OverrideRenderSize(&data, &gold, 23);
+
+ ASSERT_TRUE(ParseFrameParameters(data.GenerateData()));
+ VerifyFrameParameters(gold);
+}
+
+TEST_F(ObuParserTest, FrameParameterKeyFrameSuperRes) {
+ BytesAndBits data;
+ data.AppendBytes(kDefaultFrameHeaderKeyFrame);
+ ObuFrameHeader gold;
+ DefaultFrameHeader(&gold, kFrameKey);
+ gold.use_superres = true;
+ gold.superres_scale_denominator = 15;
+ gold.width = kWidth * 8 / 15;
+ gold.columns4x4 = 58;
+
+ data.SetBit(6, static_cast<int>(gold.use_superres));
+ data.SetLiteral(7, 3, gold.superres_scale_denominator - 9);
+
+ ASSERT_TRUE(ParseFrameParameters(data.GenerateData(), false, 0, 0, true));
+ VerifyFrameParameters(gold);
+}
+
+TEST_F(ObuParserTest, FrameParameterKeyFrameAllowScreenContentTools) {
+ BytesAndBits data;
+ data.AppendBytes(kDefaultFrameHeaderKeyFrame);
+ ObuFrameHeader gold;
+ DefaultFrameHeader(&gold, kFrameKey);
+
+ data.InsertBit(5, 1); // allow_screen_content_tools.
+ data.InsertBit(8, 1); // allow_intrabc.
+ gold.allow_screen_content_tools = true;
+ gold.allow_intrabc = true;
+
+ ASSERT_TRUE(ParseFrameParameters(data.GenerateData(), false, 2));
+ VerifyFrameParameters(gold);
+
+ data.InsertBit(6, 1); // force_integer_mv.
+ gold.force_integer_mv = 1;
+
+ ASSERT_TRUE(ParseFrameParameters(data.GenerateData(), false, 2, 2));
+ VerifyFrameParameters(gold);
+
+ data.SetBit(6, 0); // force_integer_mv.
+
+ // Gold need not be updated, because force_integer_mv is always 1 for
+ // keyframes.
+ ASSERT_TRUE(ParseFrameParameters(data.GenerateData(), false, 2, 2));
+ VerifyFrameParameters(gold);
+}
+
+TEST_F(ObuParserTest, FrameParameterIntraOnlyFrame) {
+ BytesAndBits data;
+ data.AppendBytes(kDefaultFrameHeaderIntraOnlyFrame);
+ ObuFrameHeader gold;
+ DefaultFrameHeader(&gold, kFrameIntraOnly);
+
+ ASSERT_TRUE(ParseFrameParameters(data.GenerateData()));
+ VerifyFrameParameters(gold);
+}
+
+TEST_F(ObuParserTest, FrameParameterIntraOnlyFrameOverrideSize) {
+ BytesAndBits data;
+ data.AppendBytes(kDefaultFrameHeaderIntraOnlyFrame);
+ ObuFrameHeader gold;
+ DefaultFrameHeader(&gold, kFrameIntraOnly);
+
+ OverrideFrameSize(&data, &gold, 6, 15);
+
+ ASSERT_TRUE(ParseFrameParameters(data.GenerateData()));
+ VerifyFrameParameters(gold);
+
+ OverrideRenderSize(&data, &gold, 32);
+
+ ASSERT_TRUE(ParseFrameParameters(data.GenerateData()));
+ VerifyFrameParameters(gold);
+}
+
+// An INTRA_ONLY_FRAME cannot set refresh_frame_flags to 0xff.
+TEST_F(ObuParserTest, FrameParameterIntraOnlyFrameRefreshAllFrames) {
+ BytesAndBits data;
+ data.AppendBytes(kDefaultFrameHeaderIntraOnlyFrame);
+ data.SetLiteral(7, 8, 0xFF); // refresh_frame_flags.
+
+ ASSERT_FALSE(ParseFrameParameters(data.GenerateData()));
+}
+
+TEST_F(ObuParserTest, FrameParameterInterFrame) {
+ BytesAndBits data;
+ data.AppendBytes(kDefaultFrameHeaderInterFrame);
+ ObuFrameHeader gold;
+ DefaultFrameHeader(&gold, kFrameInter);
+ ObuFrameHeader reference_frame_header;
+ reference_frame_header.width = kWidth;
+ reference_frame_header.height = kHeight;
+ reference_frame_header.render_width = kWidth;
+ reference_frame_header.render_height = kHeight;
+ reference_frame_header.upscaled_width = kWidth;
+ reference_frame_header.rows4x4 = kRows4x4;
+ reference_frame_header.columns4x4 = kColumns4x4;
+ reference_frame_header.refresh_frame_flags = 0;
+ for (auto& reference_frame : decoder_state_.reference_frame) {
+ reference_frame = buffer_pool_->GetFreeBuffer();
+ EXPECT_TRUE(reference_frame->SetFrameDimensions(reference_frame_header));
+ }
+
+ ASSERT_TRUE(ParseFrameParameters(data.GenerateData()));
+ VerifyFrameParameters(gold);
+}
+
+TEST_F(ObuParserTest, FrameParameterInterFrameOverrideSize) {
+ BytesAndBits data;
+ data.AppendBytes(kDefaultFrameHeaderInterFrame);
+ ObuFrameHeader gold;
+ DefaultFrameHeader(&gold, kFrameInter);
+ ObuFrameHeader reference_frame_header;
+ reference_frame_header.width = kWidth;
+ reference_frame_header.height = kHeight;
+ reference_frame_header.render_width = kWidth;
+ reference_frame_header.render_height = kHeight;
+ reference_frame_header.upscaled_width = kWidth;
+ reference_frame_header.rows4x4 = kRows4x4;
+ reference_frame_header.columns4x4 = kColumns4x4;
+ reference_frame_header.refresh_frame_flags = 0;
+ for (auto& reference_frame : decoder_state_.reference_frame) {
+ reference_frame = buffer_pool_->GetFreeBuffer();
+ EXPECT_TRUE(reference_frame->SetFrameDimensions(reference_frame_header));
+ }
+
+ data.InsertLiteral(39, kNumInterReferenceFrameTypes, 0); // found_ref.
+ OverrideFrameSize(&data, &gold, 6, 46);
+
+ ASSERT_TRUE(ParseFrameParameters(data.GenerateData()));
+ VerifyFrameParameters(gold);
+
+ OverrideRenderSize(&data, &gold, 63);
+
+ ASSERT_TRUE(ParseFrameParameters(data.GenerateData()));
+ VerifyFrameParameters(gold);
+}
+
+// This test verifies we check the following requirement at the end of Section
+// 6.8.4:
+// If FrameIsIntra is equal to 0 (indicating that this frame may use inter
+// prediction), the requirements described in the frame size with refs
+// semantics of section 6.8.6 must also be satisfied.
+TEST_F(ObuParserTest, FrameParameterInterFrameInvalidSize) {
+ BytesAndBits data;
+ data.AppendBytes(kDefaultFrameHeaderInterFrame);
+ ObuFrameHeader gold;
+ DefaultFrameHeader(&gold, kFrameInter);
+ ObuFrameHeader reference_frame_header;
+ reference_frame_header.width = kWidth;
+ reference_frame_header.height = 2 * kHeight + 8;
+ reference_frame_header.render_width = kWidth;
+ reference_frame_header.render_height = 2 * kHeight + 8;
+ reference_frame_header.upscaled_width = kWidth;
+ reference_frame_header.rows4x4 = 2 * kRows4x4 + 2;
+ reference_frame_header.columns4x4 = kColumns4x4;
+ reference_frame_header.refresh_frame_flags = 0;
+ for (auto& reference_frame : decoder_state_.reference_frame) {
+ reference_frame = buffer_pool_->GetFreeBuffer();
+ EXPECT_TRUE(reference_frame->SetFrameDimensions(reference_frame_header));
+ }
+
+ EXPECT_FALSE(ParseFrameParameters(data.GenerateData()));
+}
+
+// Tests the ObuParser::SetFrameReferences() method.
+//
+// This method uses the following data members as input:
+// decoder_state_.reference_order_hint
+// sequence_header_.enable_order_hint
+// sequence_header_.order_hint_bits
+// frame_header_.order_hint
+// So we need to set up these data members before calling
+// ObuParser::SetFrameReferences().
+//
+// The output is in frame_header_.reference_frame_index.
+TEST_F(ObuParserTest, SetFrameReferences) {
+ // All reference frames are forward references (because 9 < 17).
+ for (int i = 0; i < kNumReferenceFrameTypes; ++i) {
+ decoder_state_.reference_order_hint[i] = 9;
+ }
+
+ ASSERT_TRUE(Init());
+ obu_sequence_header_->enable_order_hint = true;
+ obu_sequence_header_->order_hint_bits = 5;
+ obu_sequence_header_->order_hint_shift_bits =
+ Mod32(32 - obu_sequence_header_->order_hint_bits);
+ obu_frame_header_->order_hint = 17;
+
+ const int8_t last_frame_idx = 0;
+ const int8_t gold_frame_idx = 1;
+
+ // Since all reference frames are forward references, we set the remaining
+ // five references in reverse chronological order. So Last2, Last3, Backward,
+ // Alternate2, and Alternate are set to 7, 6, 5, 4, and 3, respectively.
+
+ EXPECT_TRUE(ObuSetFrameReferences(last_frame_idx, gold_frame_idx));
+
+ EXPECT_EQ(
+ obu_frame_header_
+ ->reference_frame_index[kReferenceFrameLast - kReferenceFrameLast],
+ 0);
+ EXPECT_EQ(
+ obu_frame_header_
+ ->reference_frame_index[kReferenceFrameLast2 - kReferenceFrameLast],
+ 7);
+ EXPECT_EQ(
+ obu_frame_header_
+ ->reference_frame_index[kReferenceFrameLast3 - kReferenceFrameLast],
+ 6);
+ EXPECT_EQ(
+ obu_frame_header_
+ ->reference_frame_index[kReferenceFrameGolden - kReferenceFrameLast],
+ 1);
+ EXPECT_EQ(obu_frame_header_->reference_frame_index[kReferenceFrameBackward -
+ kReferenceFrameLast],
+ 5);
+ EXPECT_EQ(obu_frame_header_->reference_frame_index[kReferenceFrameAlternate2 -
+ kReferenceFrameLast],
+ 4);
+ EXPECT_EQ(obu_frame_header_->reference_frame_index[kReferenceFrameAlternate -
+ kReferenceFrameLast],
+ 3);
+}
+
+TEST_F(ObuParserTest, LoopFilterParameters) {
+ LoopFilter gold;
+ memset(&gold, 0, sizeof(gold));
+
+ BytesAndBits data;
+ data.AppendBit(0); // dummy.
+
+ ASSERT_TRUE(Init(data.GenerateData()));
+ obu_frame_header_->primary_reference_frame = kPrimaryReferenceNone;
+ obu_frame_header_->coded_lossless = true;
+ gold.ref_deltas[kReferenceFrameIntra] = 1;
+ gold.ref_deltas[kReferenceFrameGolden] = -1;
+ gold.ref_deltas[kReferenceFrameAlternate] = -1;
+ gold.ref_deltas[kReferenceFrameAlternate2] = -1;
+ ASSERT_TRUE(ObuParseLoopFilterParameters());
+ VerifyLoopFilterParameters(gold);
+
+ ASSERT_TRUE(Init(data.GenerateData()));
+ obu_frame_header_->primary_reference_frame = kPrimaryReferenceNone;
+ obu_frame_header_->allow_intrabc = true;
+ ASSERT_TRUE(ObuParseLoopFilterParameters());
+ VerifyLoopFilterParameters(gold);
+
+ gold.level[0] = 32;
+ gold.level[3] = 48;
+ gold.sharpness = 4;
+ data.Clear();
+ for (const auto& level : gold.level) {
+ data.AppendLiteral(6, level);
+ }
+ data.AppendLiteral(3, gold.sharpness);
+ data.AppendBit(0); // delta_enabled.
+
+ ASSERT_TRUE(Init(data.GenerateData()));
+ obu_frame_header_->primary_reference_frame = kPrimaryReferenceNone;
+ ASSERT_TRUE(ObuParseLoopFilterParameters());
+ VerifyLoopFilterParameters(gold);
+
+ gold.delta_enabled = true;
+ gold.delta_update = true;
+ gold.ref_deltas[0] = 20;
+ gold.mode_deltas[0] = -20;
+ data.SetBit(27, 1); // delta_enabled.
+ data.AppendBit(1); // delta_update.
+ for (int i = 0; i < kNumReferenceFrameTypes; ++i) {
+ if (i == 0) {
+ data.AppendBit(1); // update_ref_delta.
+ data.AppendInverseSignedLiteral(6, gold.ref_deltas[0]); // ref_delta.
+ } else {
+ data.AppendBit(0); // update_ref_delta.
+ }
+ }
+ for (int i = 0; i < kLoopFilterMaxModeDeltas; ++i) {
+ if (i == 0) {
+ data.AppendBit(1); // update_mode_delta.
+ data.AppendInverseSignedLiteral(6, gold.mode_deltas[0]); // mode_delta.
+ } else {
+ data.AppendBit(0); // update_mode_delta.
+ }
+ }
+
+ ASSERT_TRUE(Init(data.GenerateData()));
+ obu_frame_header_->primary_reference_frame = kPrimaryReferenceNone;
+ ASSERT_TRUE(ObuParseLoopFilterParameters());
+ VerifyLoopFilterParameters(gold);
+}
+
+TEST_F(ObuParserTest, QuantizerParameters) {
+ QuantizerParameters gold = {};
+ gold.base_index = 48;
+
+ BytesAndBits data;
+ data.AppendLiteral(8, gold.base_index);
+ data.AppendLiteral(3, 0); // delta_coded.
+ data.AppendBit(0); // use_matrix.
+
+ ASSERT_TRUE(Init(data.GenerateData()));
+ ASSERT_TRUE(ObuParseQuantizerParameters());
+ VerifyQuantizerParameters(gold);
+}
+
+TEST_F(ObuParserTest, QuantizerParametersMonochrome) {
+ QuantizerParameters gold = {};
+ gold.base_index = 48;
+
+ BytesAndBits data;
+ data.AppendLiteral(8, gold.base_index);
+ data.AppendBit(0); // delta_coded.
+ data.AppendBit(0); // use_matrix.
+ // The quantizer parameters end here. Add a 1 bit. It should not be parsed.
+ data.AppendBit(1); // Would be segmentation_enabled in a bitstream.
+
+ ASSERT_TRUE(Init(data.GenerateData()));
+ obu_sequence_header_->color_config.is_monochrome = true;
+ ASSERT_TRUE(ObuParseQuantizerParameters());
+ VerifyQuantizerParameters(gold);
+}
+
+TEST_F(ObuParserTest, QuantizerParametersDeltaCoded) {
+ QuantizerParameters gold = {};
+ gold.base_index = 48;
+ gold.delta_dc[kPlaneY] = -30;
+
+ BytesAndBits data;
+ data.AppendLiteral(8, gold.base_index);
+ data.AppendBit(1); // delta_coded.
+ data.AppendInverseSignedLiteral(6, gold.delta_dc[kPlaneY]);
+ data.AppendLiteral(2, 0); // delta_coded u dc/ac.
+ data.AppendBit(0); // use_matrix.
+
+ ASSERT_TRUE(Init(data.GenerateData()));
+ ASSERT_TRUE(ObuParseQuantizerParameters());
+ VerifyQuantizerParameters(gold);
+
+ gold.delta_dc[kPlaneU] = -40;
+ gold.delta_dc[kPlaneV] = gold.delta_dc[kPlaneU];
+ data.SetBit(16, 1); // delta_coded.
+ data.InsertInverseSignedLiteral(17, 6, gold.delta_dc[kPlaneU]);
+
+ ASSERT_TRUE(Init(data.GenerateData()));
+ ASSERT_TRUE(ObuParseQuantizerParameters());
+ VerifyQuantizerParameters(gold);
+
+ gold.delta_ac[kPlaneU] = 50;
+ gold.delta_ac[kPlaneV] = gold.delta_ac[kPlaneU];
+ data.SetBit(24, 1); // delta_coded.
+ data.InsertInverseSignedLiteral(25, 6, gold.delta_ac[kPlaneU]);
+
+ ASSERT_TRUE(Init(data.GenerateData()));
+ ASSERT_TRUE(ObuParseQuantizerParameters());
+ VerifyQuantizerParameters(gold);
+
+ gold.delta_dc[kPlaneV] = 60;
+ gold.delta_ac[kPlaneV] = 0;
+ data.InsertBit(16, 1); // diff_uv_delta.
+ data.InsertBit(33, 1); // delta_coded.
+ data.InsertInverseSignedLiteral(34, 6, gold.delta_dc[kPlaneV]);
+ data.InsertBit(41, 0); // delta_coded.
+
+ ASSERT_TRUE(Init(data.GenerateData()));
+ obu_sequence_header_->color_config.separate_uv_delta_q = true;
+ ASSERT_TRUE(ObuParseQuantizerParameters());
+ VerifyQuantizerParameters(gold);
+
+ gold.delta_ac[kPlaneV] = -20;
+ data.SetBit(41, 1); // delta_coded.
+ data.InsertInverseSignedLiteral(42, 6, gold.delta_ac[kPlaneV]);
+
+ ASSERT_TRUE(Init(data.GenerateData()));
+ obu_sequence_header_->color_config.separate_uv_delta_q = true;
+ ASSERT_TRUE(ObuParseQuantizerParameters());
+ VerifyQuantizerParameters(gold);
+}
+
+TEST_F(ObuParserTest, QuantizerParametersUseQmatrix) {
+ QuantizerParameters gold = {};
+ gold.base_index = 48;
+ gold.use_matrix = true;
+ gold.matrix_level[kPlaneY] = 3;
+ gold.matrix_level[kPlaneU] = 6;
+ gold.matrix_level[kPlaneV] = gold.matrix_level[kPlaneU];
+
+ // Test three cases.
+ // 1. separate_uv_delta_q = false (which implies diff_uv_delta = false).
+ BytesAndBits data;
+ data.AppendLiteral(8, gold.base_index);
+ data.AppendLiteral(3, 0); // delta_coded.
+ data.AppendBit(static_cast<uint8_t>(gold.use_matrix));
+ data.AppendLiteral(4, gold.matrix_level[kPlaneY]);
+ data.AppendLiteral(4, gold.matrix_level[kPlaneU]);
+
+ ASSERT_TRUE(Init(data.GenerateData()));
+ ASSERT_TRUE(ObuParseQuantizerParameters());
+ VerifyQuantizerParameters(gold);
+
+ // 2. separate_uv_delta_q = true and diff_uv_delta = false.
+ gold.matrix_level[kPlaneV] = 5;
+ data.InsertBit(9, 0); // diff_uv_delta.
+ data.AppendLiteral(4, gold.matrix_level[kPlaneV]);
+
+ ASSERT_TRUE(Init(data.GenerateData()));
+ obu_sequence_header_->color_config.separate_uv_delta_q = true;
+ ASSERT_TRUE(ObuParseQuantizerParameters());
+ VerifyQuantizerParameters(gold);
+
+ // 3. separate_uv_delta_q = true and diff_uv_delta = true.
+ data.SetBit(9, 1); // diff_uv_delta.
+ data.InsertLiteral(12, 2, 0); // delta_coded.
+ ASSERT_TRUE(Init(data.GenerateData()));
+ obu_sequence_header_->color_config.separate_uv_delta_q = true;
+ ASSERT_TRUE(ObuParseQuantizerParameters());
+ VerifyQuantizerParameters(gold);
+}
+
+TEST_F(ObuParserTest, SegmentationParameters) {
+ const int kPrimaryReferenceNotNone = 1;
+ const int kPrevFrameIndexNotNone = 2;
+
+ // Set up decoder_state_ with a previous frame containing saved segmentation
+ // parameters.
+ decoder_state_.reference_frame[kPrevFrameIndexNotNone] =
+ buffer_pool_->GetFreeBuffer();
+ ASSERT_NE(decoder_state_.reference_frame[kPrevFrameIndexNotNone], nullptr);
+ Segmentation prev_segmentation = {};
+ prev_segmentation.feature_enabled[2][0] = true;
+ prev_segmentation.feature_enabled[5][0] = true;
+ prev_segmentation.last_active_segment_id = 5;
+ decoder_state_.reference_frame[kPrevFrameIndexNotNone]
+ ->SetSegmentationParameters(prev_segmentation);
+
+ Segmentation gold;
+ memset(&gold, 0, sizeof(gold));
+
+ BytesAndBits data;
+ data.AppendBit(0); // segmentation_enabled.
+
+ // Since segmentation_enabled is false, we expect the parameters to be all
+ // zero/false.
+ ASSERT_TRUE(ParseSegmentationParameters(
+ data.GenerateData(), kPrimaryReferenceNotNone, kPrevFrameIndexNotNone));
+ VerifySegmentationParameters(gold);
+
+ gold.enabled = true;
+ gold.update_map = true;
+ gold.temporal_update = true;
+ data.SetBit(0, static_cast<uint8_t>(gold.enabled));
+ data.AppendBit(static_cast<uint8_t>(gold.update_map));
+ data.AppendBit(static_cast<uint8_t>(gold.temporal_update));
+ data.AppendBit(static_cast<uint8_t>(gold.update_data));
+
+ // Since update_data is false, we expect the parameters to be loaded from the
+ // previous frame in |decoder_state_|. So change |gold| accordingly.
+ gold.feature_enabled[2][0] = true;
+ gold.feature_enabled[5][0] = true;
+ gold.last_active_segment_id = 5;
+
+ ASSERT_TRUE(ParseSegmentationParameters(
+ data.GenerateData(), kPrimaryReferenceNotNone, kPrevFrameIndexNotNone));
+ VerifySegmentationParameters(gold);
+
+ OverrideSegmentation(&data, &gold, 3);
+
+ ASSERT_TRUE(ParseSegmentationParameters(
+ data.GenerateData(), kPrimaryReferenceNotNone, kPrevFrameIndexNotNone));
+ VerifySegmentationParameters(gold);
+
+ // If primary_ref_frame is kPrimaryReferenceNone, these three fields are
+ // implied.
+ data.RemoveBit(1); // segmentation_update_map.
+ data.RemoveBit(1); // segmentation_temporal_update.
+ data.RemoveBit(1); // segmentation_update_data.
+ gold.update_map = true;
+ gold.temporal_update = false;
+ gold.update_data = true;
+
+ // Since update_data is true, we expect the parameters to be read from
+ // |data|.
+ ASSERT_TRUE(ParseSegmentationParameters(data.GenerateData(),
+ kPrimaryReferenceNone, 0));
+ VerifySegmentationParameters(gold);
+}
+
+TEST_F(ObuParserTest, QuantizerIndexDeltaParameters) {
+ BytesAndBits data;
+ data.AppendBit(1); // delta_q_present.
+ data.AppendLiteral(2, 2); // delta_q_res.
+
+ Delta gold;
+ memset(&gold, 0, sizeof(gold));
+
+ ASSERT_TRUE(Init(data.GenerateData()));
+ ASSERT_TRUE(ObuParseQuantizerIndexDeltaParameters());
+ VerifyDeltaParameters(gold, obu_->frame_header().delta_q);
+
+ gold.present = true;
+ gold.scale = 2;
+ ASSERT_TRUE(Init(data.GenerateData()));
+ obu_frame_header_->quantizer.base_index = 40;
+ ASSERT_TRUE(ObuParseQuantizerIndexDeltaParameters());
+ VerifyDeltaParameters(gold, obu_->frame_header().delta_q);
+}
+
+TEST_F(ObuParserTest, LoopFilterDeltaParameters) {
+ BytesAndBits data;
+ data.AppendBit(1); // delta_lf_present.
+ data.AppendLiteral(2, 2); // delta_lf_res.
+ data.AppendBit(1); // delta_lf_multi.
+
+ Delta gold;
+ memset(&gold, 0, sizeof(gold));
+
+ // delta_q_present is false, so loop filter delta will not be read.
+ ASSERT_TRUE(Init(data.GenerateData()));
+ ASSERT_TRUE(ObuParseLoopFilterDeltaParameters());
+ VerifyDeltaParameters(gold, obu_->frame_header().delta_lf);
+
+ // allow_intrabc is true, so loop filter delta will not be read.
+ ASSERT_TRUE(Init(data.GenerateData()));
+ obu_frame_header_->delta_q.present = true;
+ obu_frame_header_->allow_intrabc = true;
+ ASSERT_TRUE(ObuParseLoopFilterDeltaParameters());
+ VerifyDeltaParameters(gold, obu_->frame_header().delta_lf);
+
+ gold.present = true;
+ gold.scale = 2;
+ gold.multi = true;
+ ASSERT_TRUE(Init(data.GenerateData()));
+ obu_frame_header_->delta_q.present = true;
+ ASSERT_TRUE(ObuParseLoopFilterDeltaParameters());
+ VerifyDeltaParameters(gold, obu_->frame_header().delta_lf);
+}
+
+TEST_F(ObuParserTest, ComputeSegmentLosslessAndQIndex) {
+ BytesAndBits data;
+ data.AppendBit(0); // dummy.
+
+ ASSERT_TRUE(Init(data.GenerateData()));
+
+ // Segmentation is disabled. All quantizers are 0.
+ ObuComputeSegmentLosslessAndQIndex();
+ EXPECT_TRUE(obu_->frame_header().coded_lossless);
+ EXPECT_TRUE(obu_->frame_header().upscaled_lossless);
+ for (const auto& qindex : obu_->frame_header().segmentation.qindex) {
+ EXPECT_EQ(qindex, 0);
+ }
+
+ // Segmentation is enabled. All quantizers are zero.
+ obu_frame_header_->segmentation.enabled = true;
+ ObuComputeSegmentLosslessAndQIndex();
+ EXPECT_TRUE(obu_->frame_header().coded_lossless);
+ EXPECT_TRUE(obu_->frame_header().upscaled_lossless);
+ for (const auto& qindex : obu_->frame_header().segmentation.qindex) {
+ EXPECT_EQ(qindex, 0);
+ }
+
+ // Segmentation is enabled. All quantizers are zero. upscaled_width != width.
+ obu_frame_header_->segmentation.enabled = true;
+ obu_frame_header_->upscaled_width = 100;
+ ObuComputeSegmentLosslessAndQIndex();
+ EXPECT_TRUE(obu_->frame_header().coded_lossless);
+ EXPECT_FALSE(obu_->frame_header().upscaled_lossless);
+ for (const auto& qindex : obu_->frame_header().segmentation.qindex) {
+ EXPECT_EQ(qindex, 0);
+ }
+
+ // Segmentation in disabled. Some quantizer deltas are non zero.
+ obu_frame_header_->segmentation.enabled = false;
+ obu_frame_header_->quantizer.delta_dc[kPlaneY] = 40;
+ ObuComputeSegmentLosslessAndQIndex();
+ EXPECT_FALSE(obu_->frame_header().coded_lossless);
+ EXPECT_FALSE(obu_->frame_header().upscaled_lossless);
+ for (const auto& qindex : obu_->frame_header().segmentation.qindex) {
+ EXPECT_EQ(qindex, 0);
+ }
+
+ // Segmentation is disabled. Quantizer base index is non zero.
+ obu_frame_header_->segmentation.enabled = true;
+ obu_frame_header_->quantizer.delta_dc[kPlaneY] = 0;
+ obu_frame_header_->quantizer.base_index = 40;
+ ObuComputeSegmentLosslessAndQIndex();
+ EXPECT_FALSE(obu_->frame_header().coded_lossless);
+ EXPECT_FALSE(obu_->frame_header().upscaled_lossless);
+ for (const auto& qindex : obu_->frame_header().segmentation.qindex) {
+ EXPECT_EQ(qindex, 40);
+ }
+}
+
+TEST_F(ObuParserTest, CdefParameters) {
+ Cdef gold;
+ memset(&gold, 0, sizeof(gold));
+ const int coeff_shift = 2; // bitdepth - 8.
+ gold.damping = 3 + coeff_shift;
+
+ BytesAndBits data;
+ data.AppendBit(0); // dummy.
+
+ ASSERT_TRUE(Init(data.GenerateData()));
+ obu_sequence_header_->color_config.bitdepth = 10;
+ ASSERT_TRUE(ObuParseCdefParameters());
+ // Cdef will be {0} except for damping because enable_cdef is false.
+ VerifyCdefParameters(gold);
+
+ ASSERT_TRUE(Init(data.GenerateData()));
+ obu_sequence_header_->enable_cdef = true;
+ obu_sequence_header_->color_config.bitdepth = 10;
+ obu_frame_header_->coded_lossless = true;
+ ASSERT_TRUE(ObuParseCdefParameters());
+ // Cdef will be {0} except for damping because coded_lossless is true.
+ VerifyCdefParameters(gold);
+
+ ASSERT_TRUE(Init(data.GenerateData()));
+ obu_sequence_header_->enable_cdef = true;
+ obu_sequence_header_->color_config.bitdepth = 10;
+ obu_frame_header_->allow_intrabc = true;
+ ASSERT_TRUE(ObuParseCdefParameters());
+ // Cdef will be {0} except for damping because allow_intrabc is true.
+ VerifyCdefParameters(gold);
+
+ gold.damping = 5;
+ gold.bits = 1;
+ data.Clear();
+ data.AppendLiteral(2, gold.damping - 3); // cdef_damping_minus3.
+ gold.damping += coeff_shift;
+ data.AppendLiteral(2, gold.bits); // cdef_bits.
+ for (int i = 0; i < 2; ++i) {
+ gold.y_primary_strength[i] = 10;
+ gold.y_secondary_strength[i] = (i == 0) ? 2 : 3;
+ gold.uv_primary_strength[i] = 12;
+ gold.uv_secondary_strength[i] = (i == 1) ? 2 : 3;
+ data.AppendLiteral(4, gold.y_primary_strength[i]);
+ data.AppendLiteral(2, gold.y_secondary_strength[i]);
+ data.AppendLiteral(4, gold.uv_primary_strength[i]);
+ data.AppendLiteral(2, gold.uv_secondary_strength[i]);
+ if (gold.y_secondary_strength[i] == 3) ++gold.y_secondary_strength[i];
+ if (gold.uv_secondary_strength[i] == 3) ++gold.uv_secondary_strength[i];
+ gold.y_primary_strength[i] <<= coeff_shift;
+ gold.uv_primary_strength[i] <<= coeff_shift;
+ gold.y_secondary_strength[i] <<= coeff_shift;
+ gold.uv_secondary_strength[i] <<= coeff_shift;
+ }
+
+ ASSERT_TRUE(Init(data.GenerateData()));
+ obu_sequence_header_->enable_cdef = true;
+ obu_sequence_header_->color_config.bitdepth = 10;
+ ASSERT_TRUE(ObuParseCdefParameters());
+ VerifyCdefParameters(gold);
+}
+
+TEST_F(ObuParserTest, LoopRestorationParameters) {
+ for (bool use_128x128_superblock : testing::Bool()) {
+ SCOPED_TRACE("use_128x128_superblock: " +
+ std::to_string(use_128x128_superblock));
+ LoopRestoration gold;
+ memset(&gold, 0, sizeof(gold));
+
+ BytesAndBits data;
+ data.AppendBit(0); // dummy.
+
+ // enable_restoration is false. nothing will be read.
+ ASSERT_TRUE(Init(data.GenerateData()));
+ obu_frame_header_->allow_intrabc = true;
+ obu_frame_header_->coded_lossless = true;
+ ASSERT_TRUE(ObuParseLoopRestorationParameters());
+ VerifyLoopRestorationParameters(gold);
+
+ // allow_intrabc is true. nothing will be read.
+ ASSERT_TRUE(Init(data.GenerateData()));
+ obu_frame_header_->allow_intrabc = true;
+ obu_sequence_header_->enable_restoration = true;
+ ASSERT_TRUE(ObuParseLoopRestorationParameters());
+ VerifyLoopRestorationParameters(gold);
+
+ // coded_lossless is true. nothing will be read.
+ ASSERT_TRUE(Init(data.GenerateData()));
+ obu_frame_header_->coded_lossless = true;
+ obu_sequence_header_->enable_restoration = true;
+ ASSERT_TRUE(ObuParseLoopRestorationParameters());
+ VerifyLoopRestorationParameters(gold);
+
+ data.Clear();
+ for (int i = 0; i < kMaxPlanes; ++i) {
+ data.AppendLiteral(2, kLoopRestorationTypeNone); // lr_type.
+ }
+
+ ASSERT_TRUE(Init(data.GenerateData()));
+ obu_sequence_header_->enable_restoration = true;
+ obu_sequence_header_->use_128x128_superblock = use_128x128_superblock;
+ ASSERT_TRUE(ObuParseLoopRestorationParameters());
+ VerifyLoopRestorationParameters(gold);
+
+ gold.type[0] = gold.type[1] = kLoopRestorationTypeWiener;
+ gold.unit_size_log2[0] = gold.unit_size_log2[1] = gold.unit_size_log2[2] =
+ use_128x128_superblock ? 8 : 7;
+ data.SetLiteral(0, 2, gold.type[0]); // lr_type.
+ data.SetLiteral(2, 2, gold.type[0]); // lr_type.
+ data.AppendBit(1); // lr_unit_shift.
+ if (!use_128x128_superblock) {
+ data.AppendBit(0); // lr_unit_extra_shift.
+ }
+
+ ASSERT_TRUE(Init(data.GenerateData()));
+ obu_sequence_header_->enable_restoration = true;
+ obu_sequence_header_->use_128x128_superblock = use_128x128_superblock;
+ ASSERT_TRUE(ObuParseLoopRestorationParameters());
+ VerifyLoopRestorationParameters(gold);
+
+ if (!use_128x128_superblock) {
+ gold.unit_size_log2[0] = gold.unit_size_log2[1] = gold.unit_size_log2[2] =
+ 8;
+ data.SetBit(7, 1); // lr_unit_extra_shift.
+
+ ASSERT_TRUE(Init(data.GenerateData()));
+ obu_sequence_header_->enable_restoration = true;
+ obu_sequence_header_->use_128x128_superblock = use_128x128_superblock;
+ ASSERT_TRUE(ObuParseLoopRestorationParameters());
+ VerifyLoopRestorationParameters(gold);
+ }
+
+ gold.unit_size_log2[1] = gold.unit_size_log2[2] = 7;
+ data.AppendBit(1); // lr_uv_shift.
+
+ ASSERT_TRUE(Init(data.GenerateData()));
+ obu_sequence_header_->enable_restoration = true;
+ obu_sequence_header_->use_128x128_superblock = use_128x128_superblock;
+ obu_sequence_header_->color_config.subsampling_x = 1;
+ obu_sequence_header_->color_config.subsampling_y = 1;
+ ASSERT_TRUE(ObuParseLoopRestorationParameters());
+ VerifyLoopRestorationParameters(gold);
+ }
+}
+
+TEST_F(ObuParserTest, TxModeSyntax) {
+ BytesAndBits data;
+ data.AppendBit(1); // tx_mode_select.
+
+ ASSERT_TRUE(Init(data.GenerateData()));
+ ASSERT_TRUE(ObuParseTxModeSyntax());
+ EXPECT_EQ(kTxModeSelect, obu_->frame_header().tx_mode);
+
+ data.SetBit(0, 0); // tx_mode_select.
+
+ ASSERT_TRUE(Init(data.GenerateData()));
+ ASSERT_TRUE(ObuParseTxModeSyntax());
+ EXPECT_EQ(kTxModeLargest, obu_->frame_header().tx_mode);
+
+ ASSERT_TRUE(Init(data.GenerateData()));
+ obu_frame_header_->coded_lossless = true;
+ ASSERT_TRUE(ObuParseTxModeSyntax());
+ EXPECT_EQ(kTxModeOnly4x4, obu_->frame_header().tx_mode);
+}
+
+TEST_F(ObuParserTest, FrameReferenceModeSyntax) {
+ BytesAndBits data;
+ data.AppendBit(0); // dummy.
+
+ ASSERT_TRUE(ParseFrameReferenceModeSyntax(data.GenerateData(), kFrameKey));
+ EXPECT_FALSE(obu_->frame_header().reference_mode_select);
+
+ data.SetBit(0, 1); // reference_mode_select.
+
+ ASSERT_TRUE(ParseFrameReferenceModeSyntax(data.GenerateData(), kFrameInter));
+ EXPECT_TRUE(obu_->frame_header().reference_mode_select);
+}
+
+TEST_F(ObuParserTest, SkipModeParameters) {
+ BytesAndBits data;
+ data.AppendBit(1); // skip_mode_present.
+
+ ASSERT_TRUE(Init(data.GenerateData()));
+ obu_frame_header_->frame_type = kFrameKey;
+ ASSERT_FALSE(ObuIsSkipModeAllowed());
+ ASSERT_TRUE(ObuParseSkipModeParameters());
+ EXPECT_FALSE(obu_->frame_header().skip_mode_present);
+
+ ASSERT_TRUE(Init(data.GenerateData()));
+ obu_frame_header_->frame_type = kFrameInter;
+ obu_frame_header_->reference_mode_select = true;
+ ASSERT_FALSE(ObuIsSkipModeAllowed());
+ ASSERT_TRUE(ObuParseSkipModeParameters());
+ EXPECT_FALSE(obu_->frame_header().skip_mode_present);
+
+ ASSERT_TRUE(Init(data.GenerateData()));
+ obu_frame_header_->frame_type = kFrameInter;
+ obu_frame_header_->reference_mode_select = true;
+ obu_sequence_header_->enable_order_hint = true;
+ obu_sequence_header_->order_hint_bits = 7;
+ obu_sequence_header_->order_hint_shift_bits =
+ Mod32(32 - obu_sequence_header_->order_hint_bits);
+ ASSERT_FALSE(ObuIsSkipModeAllowed());
+ ASSERT_TRUE(ObuParseSkipModeParameters());
+ EXPECT_FALSE(obu_->frame_header().skip_mode_present);
+
+ ASSERT_TRUE(Init(data.GenerateData()));
+ obu_frame_header_->frame_type = kFrameInter;
+ obu_frame_header_->reference_mode_select = true;
+ obu_frame_header_->order_hint = 1;
+ decoder_state_.order_hint = 1;
+ obu_sequence_header_->enable_order_hint = true;
+ obu_sequence_header_->order_hint_bits = 7;
+ obu_sequence_header_->order_hint_shift_bits =
+ Mod32(32 - obu_sequence_header_->order_hint_bits);
+ ASSERT_FALSE(ObuIsSkipModeAllowed());
+ ASSERT_TRUE(ObuParseSkipModeParameters());
+ EXPECT_FALSE(obu_->frame_header().skip_mode_present);
+
+ ASSERT_TRUE(Init(data.GenerateData()));
+ for (int i = 0; i < kNumInterReferenceFrameTypes; ++i) {
+ obu_frame_header_->reference_frame_index[i] = i;
+ decoder_state_.reference_order_hint[i] = i;
+ }
+ obu_frame_header_->frame_type = kFrameInter;
+ obu_frame_header_->reference_mode_select = true;
+ obu_frame_header_->order_hint = 1;
+ decoder_state_.order_hint = 1;
+ obu_sequence_header_->enable_order_hint = true;
+ obu_sequence_header_->order_hint_bits = 7;
+ obu_sequence_header_->order_hint_shift_bits =
+ Mod32(32 - obu_sequence_header_->order_hint_bits);
+ ASSERT_TRUE(ObuIsSkipModeAllowed());
+ ASSERT_TRUE(ObuParseSkipModeParameters());
+ EXPECT_TRUE(obu_->frame_header().skip_mode_present);
+}
+
+TEST_F(ObuParserTest, AllowWarpedMotion) {
+ BytesAndBits data;
+ data.AppendBit(0xff); // dummy.
+
+ // IsIntraFrame is true, so nothing will be read.
+ ASSERT_TRUE(Init(data.GenerateData()));
+ obu_frame_header_->frame_type = kFrameKey;
+ obu_frame_header_->error_resilient_mode = false;
+ obu_sequence_header_->enable_warped_motion = true;
+ ASSERT_TRUE(ObuReadAllowWarpedMotion());
+ EXPECT_FALSE(obu_->frame_header().allow_warped_motion);
+
+ // error_resilient_mode is true, so nothing will be read.
+ ASSERT_TRUE(Init(data.GenerateData()));
+ obu_frame_header_->frame_type = kFrameInter;
+ obu_frame_header_->error_resilient_mode = true;
+ obu_sequence_header_->enable_warped_motion = true;
+ ASSERT_TRUE(ObuReadAllowWarpedMotion());
+ EXPECT_FALSE(obu_->frame_header().allow_warped_motion);
+
+ // enable_warped_motion is false, so nothing will be read.
+ ASSERT_TRUE(Init(data.GenerateData()));
+ obu_frame_header_->frame_type = kFrameInter;
+ obu_frame_header_->error_resilient_mode = false;
+ obu_sequence_header_->enable_warped_motion = false;
+ ASSERT_TRUE(ObuReadAllowWarpedMotion());
+ EXPECT_FALSE(obu_->frame_header().allow_warped_motion);
+
+ // allow_warped_motion will be read and equal to true.
+ ASSERT_TRUE(Init(data.GenerateData()));
+ obu_frame_header_->frame_type = kFrameInter;
+ obu_frame_header_->error_resilient_mode = false;
+ obu_sequence_header_->enable_warped_motion = true;
+ ASSERT_TRUE(ObuReadAllowWarpedMotion());
+ EXPECT_TRUE(obu_->frame_header().allow_warped_motion);
+}
+
+TEST_F(ObuParserTest, GlobalMotionParameters) {
+ BytesAndBits data;
+ data.AppendBit(0); // dummy.
+ std::array<GlobalMotion, kNumReferenceFrameTypes> gold;
+ for (int i = kReferenceFrameLast; i <= kReferenceFrameAlternate; ++i) {
+ gold[i].type = kGlobalMotionTransformationTypeIdentity;
+ for (int j = 0; j < 6; ++j) {
+ gold[i].params[j] = (j % 3 == 2) ? 1 << kWarpedModelPrecisionBits : 0;
+ }
+ }
+
+ ASSERT_TRUE(ParseGlobalMotionParameters(data.GenerateData(), kFrameKey));
+ VerifyGlobalMotionParameters(gold);
+
+ data.Clear();
+ for (int i = kReferenceFrameLast; i <= kReferenceFrameAlternate; ++i) {
+ // is_global=1; is_rot_zoom=1; parameter_values;
+ data.AppendBytes(kDefaultGlobalMotionParametersRotZoom);
+
+ // Magic numbers based on kDefaultGlobalMotionParametersRotZoom.
+ gold[i].type = kGlobalMotionTransformationTypeRotZoom;
+ gold[i].params[0] = -73728;
+ gold[i].params[1] = -23552;
+ gold[i].params[2] = 65952;
+ gold[i].params[3] = -62;
+ gold[i].params[4] = 62;
+ gold[i].params[5] = 65952;
+ }
+
+ ASSERT_TRUE(ParseGlobalMotionParameters(data.GenerateData(), kFrameInter));
+ VerifyGlobalMotionParameters(gold);
+
+ data.Clear();
+ for (int i = kReferenceFrameLast; i <= kReferenceFrameAlternate; ++i) {
+ // This bit is not part of the hex string because it would make the whole
+ // string not align to 8 bits. Appending this separately so that we can keep
+ // the rest of them a magic hex string.
+ data.AppendBit(1); // is_global.
+ // is_rot_zoom=0; is_translation=0; parameter_values;
+ data.AppendBytes(kDefaultGlobalMotionParametersAffine);
+
+ // Magic numbers based on kDefaultGlobalMotionParametersAffine.
+ gold[i].type = kGlobalMotionTransformationTypeAffine;
+ gold[i].params[4] = -62;
+ }
+
+ ASSERT_TRUE(ParseGlobalMotionParameters(data.GenerateData(), kFrameInter));
+ VerifyGlobalMotionParameters(gold);
+}
+
+TEST_F(ObuParserTest, FilmGrainParameters) {
+ BytesAndBits data;
+ data.AppendBit(0); // dummy.
+
+ // Test film grain not present.
+ FilmGrainParams gold = {};
+ ObuSequenceHeader sequence_header = {};
+ sequence_header.film_grain_params_present = false;
+ ObuFrameHeader frame_header = {};
+ ASSERT_TRUE(ParseFilmGrainParameters(data.GenerateData(), sequence_header,
+ frame_header));
+ VerifyFilmGrainParameters(gold);
+
+ // Test if show_frame = false and showable_frame = false.
+ data.Clear();
+ gold = {};
+ sequence_header.film_grain_params_present = true;
+ frame_header.show_frame = false;
+ frame_header.showable_frame = false;
+ ASSERT_TRUE(ParseFilmGrainParameters(data.GenerateData(), sequence_header,
+ frame_header));
+ VerifyFilmGrainParameters(gold);
+
+ // Test if apply_grain = false.
+ data.Clear();
+ gold = {};
+ sequence_header.film_grain_params_present = true;
+ frame_header.show_frame = true;
+ frame_header.showable_frame = true;
+ data.AppendBit(0);
+ ASSERT_TRUE(ParseFilmGrainParameters(data.GenerateData(), sequence_header,
+ frame_header));
+ VerifyFilmGrainParameters(gold);
+
+ // Test if update_grain = false.
+ data.Clear();
+ gold = {};
+ sequence_header.film_grain_params_present = true;
+ frame_header.show_frame = true;
+ frame_header.showable_frame = true;
+ frame_header.frame_type = kFrameInter;
+ for (auto& index : frame_header.reference_frame_index) {
+ index = 1;
+ }
+ data.AppendBit(1);
+ gold.apply_grain = true;
+ data.AppendLiteral(16, 8);
+ gold.grain_seed = 8;
+ data.AppendBit(0);
+ gold.update_grain = false;
+ data.AppendLiteral(3, 1);
+ gold.reference_index = 1;
+ // Set up decoder_state_ with a previous frame containing saved film grain
+ // parameters.
+ decoder_state_.reference_frame[1] = buffer_pool_->GetFreeBuffer();
+ EXPECT_NE(decoder_state_.reference_frame[1], nullptr);
+ FilmGrainParams prev_grain_params = {};
+ prev_grain_params.apply_grain = true;
+ prev_grain_params.grain_seed = 11;
+ prev_grain_params.update_grain = true;
+ decoder_state_.reference_frame[1]->set_film_grain_params(prev_grain_params);
+ ASSERT_TRUE(ParseFilmGrainParameters(data.GenerateData(), sequence_header,
+ frame_header));
+ VerifyFilmGrainParameters(gold);
+
+ // Test if update_grain = true, is_monochrome = true;
+ data.Clear();
+ gold = {};
+ frame_header.frame_type = kFrameKey;
+ for (auto& index : frame_header.reference_frame_index) {
+ index = 0;
+ }
+ data.AppendBit(1);
+ gold.apply_grain = true;
+ data.AppendLiteral(16, 8);
+ gold.grain_seed = 8;
+ gold.update_grain = true;
+ data.AppendLiteral(4, 10);
+ gold.num_y_points = 10;
+ for (int i = 0; i < gold.num_y_points; ++i) {
+ data.AppendLiteral(8, 2 * i);
+ gold.point_y_value[i] = 2 * i;
+ data.AppendLiteral(8, i);
+ gold.point_y_scaling[i] = i;
+ }
+ sequence_header.color_config.is_monochrome = true;
+ gold.chroma_scaling_from_luma = false;
+ gold.num_u_points = 0;
+ gold.num_v_points = 0;
+ data.AppendLiteral(2, 3);
+ gold.chroma_scaling = 11;
+ data.AppendLiteral(2, 1);
+ gold.auto_regression_coeff_lag = 1;
+ const int num_pos_luma =
+ 2 * gold.auto_regression_coeff_lag * (gold.auto_regression_coeff_lag + 1);
+ for (int i = 0; i < num_pos_luma; ++i) {
+ data.AppendLiteral(8, i + 128);
+ gold.auto_regression_coeff_y[i] = i;
+ }
+ data.AppendLiteral(2, 0);
+ gold.auto_regression_shift = 6;
+ data.AppendLiteral(2, 1);
+ gold.grain_scale_shift = 1;
+ data.AppendBit(1);
+ gold.overlap_flag = true;
+ data.AppendBit(0);
+ gold.clip_to_restricted_range = false;
+ ASSERT_TRUE(ParseFilmGrainParameters(data.GenerateData(), sequence_header,
+ frame_header));
+ ASSERT_TRUE(
+ obu_->frame_header().frame_type == kFrameInter ||
+ obu_->frame_header().film_grain_params.update_grain); // a implies b.
+ VerifyFilmGrainParameters(gold);
+
+ // Test if update_grain = true, is_monochrome = false;
+ data.Clear();
+ gold = {};
+ frame_header.frame_type = kFrameKey;
+ data.AppendBit(1);
+ gold.apply_grain = true;
+ data.AppendLiteral(16, 8);
+ gold.grain_seed = 8;
+ gold.update_grain = true;
+ data.AppendLiteral(4, 10);
+ gold.num_y_points = 10;
+ for (int i = 0; i < gold.num_y_points; ++i) {
+ data.AppendLiteral(8, 2 * i);
+ gold.point_y_value[i] = 2 * i;
+ data.AppendLiteral(8, i);
+ gold.point_y_scaling[i] = i;
+ }
+ sequence_header.color_config.is_monochrome = false;
+ data.AppendBit(0);
+ gold.chroma_scaling_from_luma = false;
+ data.AppendLiteral(4, 5);
+ gold.num_u_points = 5;
+ for (int i = 0; i < gold.num_u_points; ++i) {
+ data.AppendLiteral(8, 2 * i + 1);
+ gold.point_u_value[i] = 2 * i + 1;
+ data.AppendLiteral(8, i);
+ gold.point_u_scaling[i] = i;
+ }
+ data.AppendLiteral(4, 3);
+ gold.num_v_points = 3;
+ for (int i = 0; i < gold.num_v_points; ++i) {
+ data.AppendLiteral(8, i);
+ gold.point_v_value[i] = i;
+ data.AppendLiteral(8, i + 1);
+ gold.point_v_scaling[i] = i + 1;
+ }
+ data.AppendLiteral(2, 3);
+ gold.chroma_scaling = 11;
+ data.AppendLiteral(2, 1);
+ gold.auto_regression_coeff_lag = 1;
+ const int num_pos_luma2 =
+ 2 * gold.auto_regression_coeff_lag * (gold.auto_regression_coeff_lag + 1);
+ for (int i = 0; i < num_pos_luma2; ++i) {
+ data.AppendLiteral(8, i + 128);
+ gold.auto_regression_coeff_y[i] = i;
+ }
+ for (int i = 0; i < num_pos_luma2 + 1; ++i) {
+ data.AppendLiteral(8, i);
+ gold.auto_regression_coeff_u[i] = i - 128;
+ }
+ for (int i = 0; i < num_pos_luma2 + 1; ++i) {
+ data.AppendLiteral(8, i);
+ gold.auto_regression_coeff_v[i] = i - 128;
+ }
+ data.AppendLiteral(2, 0);
+ gold.auto_regression_shift = 6;
+ data.AppendLiteral(2, 1);
+ gold.grain_scale_shift = 1;
+ data.AppendLiteral(8, 2);
+ gold.u_multiplier = -126;
+ data.AppendLiteral(8, 1);
+ gold.u_luma_multiplier = -127;
+ data.AppendLiteral(9, 3);
+ gold.u_offset = -253;
+ data.AppendLiteral(8, 3);
+ gold.v_multiplier = -125;
+ data.AppendLiteral(8, 2);
+ gold.v_luma_multiplier = -126;
+ data.AppendLiteral(9, 1);
+ gold.v_offset = -255;
+ data.AppendBit(1);
+ gold.overlap_flag = true;
+ data.AppendBit(0);
+ gold.clip_to_restricted_range = false;
+ ASSERT_TRUE(ParseFilmGrainParameters(data.GenerateData(), sequence_header,
+ frame_header));
+ ASSERT_TRUE(
+ obu_->frame_header().frame_type == kFrameInter ||
+ obu_->frame_header().film_grain_params.update_grain); // a implies b.
+ VerifyFilmGrainParameters(gold);
+}
+
+TEST_F(ObuParserTest, TileInfoSyntax) {
+ BytesAndBits data;
+ TileInfo gold;
+ memset(&gold, 0, sizeof(gold));
+
+ gold.uniform_spacing = true;
+ gold.tile_columns_log2 = 1;
+ gold.tile_columns = 2;
+ gold.tile_rows_log2 = 1;
+ gold.tile_rows = 2;
+ gold.tile_count = 4;
+ gold.tile_column_start[1] = 64;
+ gold.tile_column_start[2] = 88;
+ gold.tile_row_start[1] = 64;
+ gold.tile_row_start[2] = 72;
+ gold.context_update_id = 3;
+ gold.tile_size_bytes = 4;
+ data.AppendBit(static_cast<uint8_t>(gold.uniform_spacing));
+ data.AppendBit(1); // increment_tile_cols_log2.
+ data.AppendBit(0); // increment_tile_cols_log2.
+ data.AppendBit(1); // increment_tile_rows_log2.
+ data.AppendBit(0); // increment_tile_rows_log2.
+ data.AppendBit(1); // context update id, columns_log2+rows_log2 bits
+ data.AppendBit(1);
+ data.AppendLiteral(2, gold.tile_size_bytes - 1);
+
+ ASSERT_TRUE(ParseTileInfoSyntax(data.GenerateData(), 88, 72, true));
+ VerifyTileInfoParameters(gold);
+
+ gold.uniform_spacing = false;
+ gold.tile_column_width_in_superblocks[0] = 2;
+ gold.tile_column_width_in_superblocks[1] = 1;
+ gold.tile_row_height_in_superblocks[0] = 2;
+ gold.tile_row_height_in_superblocks[1] = 1;
+
+ data.SetBit(0, static_cast<uint8_t>(gold.uniform_spacing));
+ // The next 4 bits remain the same except now they represent f(w - 1) and
+ // extra_bit in DecodeUniform. All the subsequent bits are unchanged the
+ // represent the same thing as above.
+
+ ASSERT_TRUE(ParseTileInfoSyntax(data.GenerateData(), 88, 72, true));
+ VerifyTileInfoParameters(gold);
+
+ // No tiles.
+ memset(&gold, 0, sizeof(gold));
+ gold.uniform_spacing = true;
+ gold.tile_columns = 1;
+ gold.tile_rows = 1;
+ gold.tile_count = 1;
+ gold.tile_column_start[1] = 88;
+ gold.tile_row_start[1] = 72;
+ data.Clear();
+ data.AppendBit(static_cast<uint8_t>(gold.uniform_spacing));
+ data.AppendBit(0); // tile_cols_log2.
+ data.AppendBit(0); // tile_rows_log2.
+
+ ASSERT_TRUE(ParseTileInfoSyntax(data.GenerateData(), 88, 72, true));
+ VerifyTileInfoParameters(gold);
+
+ // 64x64 superblocks. No tiles.
+ gold.tile_column_start[1] = 640;
+ gold.tile_row_start[1] = 360;
+
+ ASSERT_TRUE(ParseTileInfoSyntax(data.GenerateData(), 640, 360, false));
+ VerifyTileInfoParameters(gold);
+}
+
+TEST_F(ObuParserTest, MetadataUnknownType) {
+ BytesAndBits data;
+ // The metadata_type 10 is a user private value (6-31).
+ data.AppendLiteral(8, 10); // metadata_type.
+ // The Note in Section 5.8.1 says "Decoders should ignore the entire OBU if
+ // they do not understand the metadata_type."
+ ASSERT_TRUE(ParseMetadata(data.GenerateData()));
+}
+
+TEST_F(ObuParserTest, MetadataCll) {
+ BytesAndBits data;
+ ObuMetadata gold;
+ gold.max_cll = 25;
+ gold.max_fall = 100;
+
+ data.AppendLiteral(8, kMetadataTypeHdrContentLightLevel);
+ data.AppendLiteral(16, gold.max_cll);
+ data.AppendLiteral(16, gold.max_fall);
+
+ ASSERT_TRUE(ParseMetadata(data.GenerateData()));
+ VerifyMetadata(kMetadataTypeHdrContentLightLevel, gold);
+}
+
+TEST_F(ObuParserTest, MetadataMdcv) {
+ BytesAndBits data;
+ ObuMetadata gold;
+ for (int i = 0; i < 3; ++i) {
+ gold.primary_chromaticity_x[i] = 0;
+ gold.primary_chromaticity_y[i] = 0;
+ }
+ gold.white_point_chromaticity_x = 250;
+ gold.white_point_chromaticity_y = 2500;
+ gold.luminance_max = 6000;
+ gold.luminance_min = 3000;
+
+ data.AppendLiteral(8, kMetadataTypeHdrMasteringDisplayColorVolume);
+ for (int i = 0; i < 3; ++i) {
+ data.AppendLiteral(16, gold.primary_chromaticity_x[i]);
+ data.AppendLiteral(16, gold.primary_chromaticity_y[i]);
+ }
+ data.AppendLiteral(16, gold.white_point_chromaticity_x);
+ data.AppendLiteral(16, gold.white_point_chromaticity_y);
+ data.AppendLiteral(32, gold.luminance_max);
+ data.AppendLiteral(32, gold.luminance_min);
+
+ ASSERT_TRUE(ParseMetadata(data.GenerateData()));
+ VerifyMetadata(kMetadataTypeHdrMasteringDisplayColorVolume, gold);
+}
+
+TEST_F(ObuParserTest, MetadataScalability) {
+ BytesAndBits data;
+ ObuMetadata gold;
+
+ data.AppendLiteral(8, kMetadataTypeScalability);
+ data.AppendLiteral(8, 0); // scalability_mode_idc
+
+ ASSERT_TRUE(ParseMetadata(data.GenerateData()));
+ VerifyMetadata(kMetadataTypeScalability, gold);
+}
+
+TEST_F(ObuParserTest, MetadataItutT35) {
+ BytesAndBits data;
+ ObuMetadata gold;
+ gold.itu_t_t35_country_code = 0xA6; // 1 0 1 0 0 1 1 0 Switzerland
+ gold.itu_t_t35_country_code_extension_byte = 0;
+ gold.itu_t_t35_payload_bytes.reset(new (std::nothrow) uint8_t[10]);
+ ASSERT_NE(gold.itu_t_t35_payload_bytes, nullptr);
+ for (int i = 0; i < 10; ++i) {
+ gold.itu_t_t35_payload_bytes[i] = 9 - i;
+ }
+ gold.itu_t_t35_payload_size = 10;
+
+ data.AppendLiteral(8, kMetadataTypeItutT35);
+ data.AppendLiteral(8, gold.itu_t_t35_country_code);
+ for (int i = 0; i < 10; ++i) {
+ data.AppendLiteral(8, 9 - i);
+ }
+ // For the kMetadataTypeItutT35 metadata type, we must include the trailing
+ // bit so that the end of the itu_t_t35_payload_bytes can be identified.
+ data.AppendLiteral(8, 0x80);
+ data.AppendLiteral(8, 0x00);
+ data.AppendLiteral(8, 0x00);
+
+ ASSERT_TRUE(ParseMetadata(data.GenerateData()));
+ VerifyMetadata(kMetadataTypeItutT35, gold);
+}
+
+TEST_F(ObuParserTest, MetadataTimecode) {
+ BytesAndBits data;
+ ObuMetadata gold;
+
+ data.AppendLiteral(8, kMetadataTypeTimecode);
+ data.AppendLiteral(5, 0); // counting_type
+ data.AppendBit(1); // full_timestamp_flag
+ data.AppendBit(0); // discontinuity_flag
+ data.AppendBit(0); // cnt_dropped_flag
+ data.AppendLiteral(9, 8); // n_frames
+ data.AppendLiteral(6, 59); // seconds_value
+ data.AppendLiteral(6, 59); // minutes_value
+ data.AppendLiteral(5, 23); // hours_value
+ data.AppendLiteral(5, 0); // time_offset_length
+
+ ASSERT_TRUE(ParseMetadata(data.GenerateData()));
+ VerifyMetadata(kMetadataTypeTimecode, gold);
+}
+
+TEST_F(ObuParserTest, MetadataTimecodeInvalidSecondsValue) {
+ BytesAndBits data;
+ ObuMetadata gold;
+
+ data.AppendLiteral(8, kMetadataTypeTimecode);
+ data.AppendLiteral(5, 0); // counting_type
+ data.AppendBit(1); // full_timestamp_flag
+ data.AppendBit(0); // discontinuity_flag
+ data.AppendBit(0); // cnt_dropped_flag
+ data.AppendLiteral(9, 8); // n_frames
+ data.AppendLiteral(6, 60); // seconds_value
+ data.AppendLiteral(6, 59); // minutes_value
+ data.AppendLiteral(5, 23); // hours_value
+ data.AppendLiteral(5, 0); // time_offset_length
+
+ EXPECT_FALSE(ParseMetadata(data.GenerateData()));
+}
+
+TEST_F(ObuParserTest, MetadataTimecodeInvalidMinutesValue) {
+ BytesAndBits data;
+ ObuMetadata gold;
+
+ data.AppendLiteral(8, kMetadataTypeTimecode);
+ data.AppendLiteral(5, 0); // counting_type
+ data.AppendBit(1); // full_timestamp_flag
+ data.AppendBit(0); // discontinuity_flag
+ data.AppendBit(0); // cnt_dropped_flag
+ data.AppendLiteral(9, 8); // n_frames
+ data.AppendLiteral(6, 59); // seconds_value
+ data.AppendLiteral(6, 60); // minutes_value
+ data.AppendLiteral(5, 23); // hours_value
+ data.AppendLiteral(5, 0); // time_offset_length
+
+ EXPECT_FALSE(ParseMetadata(data.GenerateData()));
+}
+
+TEST_F(ObuParserTest, MetadataTimecodeInvalidHoursValue) {
+ BytesAndBits data;
+ ObuMetadata gold;
+
+ data.AppendLiteral(8, kMetadataTypeTimecode);
+ data.AppendLiteral(5, 0); // counting_type
+ data.AppendBit(1); // full_timestamp_flag
+ data.AppendBit(0); // discontinuity_flag
+ data.AppendBit(0); // cnt_dropped_flag
+ data.AppendLiteral(9, 8); // n_frames
+ data.AppendLiteral(6, 59); // seconds_value
+ data.AppendLiteral(6, 59); // minutes_value
+ data.AppendLiteral(5, 24); // hours_value
+ data.AppendLiteral(5, 0); // time_offset_length
+
+ EXPECT_FALSE(ParseMetadata(data.GenerateData()));
+}
+
+} // namespace libgav1
diff --git a/src/post_filter.h b/src/post_filter.h
index dfcd08e..a247075 100644
--- a/src/post_filter.h
+++ b/src/post_filter.h
@@ -160,7 +160,7 @@ class PostFilter {
frame_header.cdef.uv_secondary_strength[0] > 0) &&
(do_post_filter_mask & 0x02) != 0;
}
- bool DoCdef() const { return DoCdef(frame_header_, do_post_filter_mask_); }
+ bool DoCdef() const { return do_cdef_; }
// If filter levels for Y plane (0 for vertical, 1 for horizontal),
// are all zero, deblock filter will not be applied.
static bool DoDeblock(const ObuFrameHeader& frame_header,
@@ -169,9 +169,7 @@ class PostFilter {
frame_header.loop_filter.level[1] > 0) &&
(do_post_filter_mask & 0x01) != 0;
}
- bool DoDeblock() const {
- return DoDeblock(frame_header_, do_post_filter_mask_);
- }
+ bool DoDeblock() const { return do_deblock_; }
uint8_t GetZeroDeltaDeblockFilterLevel(int segment_id, int level_index,
ReferenceFrameType type,
@@ -197,9 +195,7 @@ class PostFilter {
loop_restoration.type[kPlaneV] != kLoopRestorationTypeNone) &&
(do_post_filter_mask & 0x08) != 0;
}
- bool DoRestoration() const {
- return DoRestoration(loop_restoration_, do_post_filter_mask_, planes_);
- }
+ bool DoRestoration() const { return do_restoration_; }
// Returns a pointer to the unfiltered buffer. This is used by the Tile class
// to determine where to write the output of the tile decoding process taking
@@ -214,9 +210,7 @@ class PostFilter {
return frame_header.width != frame_header.upscaled_width &&
(do_post_filter_mask & 0x04) != 0;
}
- bool DoSuperRes() const {
- return DoSuperRes(frame_header_, do_post_filter_mask_);
- }
+ bool DoSuperRes() const { return do_superres_; }
LoopRestorationInfo* restoration_info() const { return restoration_info_; }
uint8_t* GetBufferOffset(uint8_t* base_buffer, int stride, Plane plane,
int row, int column) const {
@@ -244,13 +238,9 @@ class PostFilter {
private:
// The type of the HorizontalDeblockFilter and VerticalDeblockFilter member
// functions.
- using DeblockFilter = void (PostFilter::*)(int row4x4_start,
- int column4x4_start);
- // The lookup table for picking the deblock filter, according to deblock
- // filter type.
- const DeblockFilter deblock_filter_func_[2] = {
- &PostFilter::VerticalDeblockFilter, &PostFilter::HorizontalDeblockFilter};
-
+ using DeblockFilter = void (PostFilter::*)(int row4x4_start, int row4x4_end,
+ int column4x4_start,
+ int column4x4_end);
// Functions common to all post filters.
// Extends the frame by setting the border pixel values to the one from its
@@ -308,13 +298,6 @@ class PostFilter {
// Functions for the Deblocking filter.
- static int GetIndex(int row4x4) { return DivideBy4(row4x4); }
- static int GetShift(int row4x4, int column4x4) {
- return ((row4x4 & 3) << 4) | column4x4;
- }
- int GetDeblockUnitId(int row_unit, int column_unit) const {
- return row_unit * num_64x64_blocks_per_row_ + column_unit;
- }
bool GetHorizontalDeblockFilterEdgeInfo(int row4x4, int column4x4,
uint8_t* level, int* step,
int* filter_length) const;
@@ -330,8 +313,10 @@ class PostFilter {
BlockParameters* const* bp_ptr,
uint8_t* level_u, uint8_t* level_v,
int* step, int* filter_length) const;
- void HorizontalDeblockFilter(int row4x4_start, int column4x4_start);
- void VerticalDeblockFilter(int row4x4_start, int column4x4_start);
+ void HorizontalDeblockFilter(int row4x4_start, int row4x4_end,
+ int column4x4_start, int column4x4_end);
+ void VerticalDeblockFilter(int row4x4_start, int row4x4_end,
+ int column4x4_start, int column4x4_end);
// HorizontalDeblockFilter and VerticalDeblockFilter must have the correct
// signature.
static_assert(std::is_same<decltype(&PostFilter::HorizontalDeblockFilter),
@@ -340,9 +325,6 @@ class PostFilter {
static_assert(std::is_same<decltype(&PostFilter::VerticalDeblockFilter),
DeblockFilter>::value,
"");
- // Applies deblock filtering for the superblock row starting at |row4x4| with
- // a height of 4*|sb4x4|.
- void ApplyDeblockFilterForOneSuperBlockRow(int row4x4, int sb4x4);
// Worker function used for multi-threaded deblocking.
template <LoopFilterType loop_filter_type>
void DeblockFilterWorker(std::atomic<int>* row4x4_atomic);
@@ -465,13 +447,13 @@ class PostFilter {
WorkerFunction>::value,
"");
+ // The lookup table for picking the deblock filter, according to deblock
+ // filter type.
+ const DeblockFilter deblock_filter_func_[2] = {
+ &PostFilter::VerticalDeblockFilter, &PostFilter::HorizontalDeblockFilter};
const ObuFrameHeader& frame_header_;
const LoopRestoration& loop_restoration_;
const dsp::Dsp& dsp_;
- const int num_64x64_blocks_per_row_;
- const int upscaled_width_;
- const int width_;
- const int height_;
const int8_t bitdepth_;
const int8_t subsampling_x_[kMaxPlanes];
const int8_t subsampling_y_[kMaxPlanes];
@@ -480,6 +462,10 @@ class PostFilter {
const uint8_t* const inner_thresh_;
const uint8_t* const outer_thresh_;
const bool needs_chroma_deblock_;
+ const bool do_cdef_;
+ const bool do_deblock_;
+ const bool do_restoration_;
+ const bool do_superres_;
// This stores the deblocking filter levels assuming that the delta is zero.
// This will be used by all superblocks whose delta is zero (without having to
// recompute them). The dimensions (in order) are: segment_id, level_index
@@ -492,7 +478,8 @@ class PostFilter {
int initial_subpixel_x;
int step;
} super_res_info_[kMaxPlanes];
- const Array2D<int16_t>& cdef_index_;
+ const Array2D<int8_t>& cdef_index_;
+ const Array2D<uint8_t>& cdef_skip_;
const Array2D<TransformSize>& inter_transform_sizes_;
LoopRestorationInfo* const restoration_info_;
uint8_t* const superres_coefficients_[kNumPlaneTypes];
@@ -528,7 +515,6 @@ class PostFilter {
// (1). Loop Restoration is on.
// (2). Cdef is on, or multi-threading is enabled for post filter.
YuvBuffer& loop_restoration_border_;
- const uint8_t do_post_filter_mask_;
ThreadPool* const thread_pool_;
// Tracks the progress of the post filters.
diff --git a/src/post_filter/cdef.cc b/src/post_filter/cdef.cc
index f32b0a0..037fc17 100644
--- a/src/post_filter/cdef.cc
+++ b/src/post_filter/cdef.cc
@@ -126,8 +126,8 @@ void PostFilter::PrepareCdefBlock(int block_width4x4, int block_height4x4,
const int8_t subsampling_y = y_plane ? 0 : subsampling_y_[kPlaneU];
const int start_x = MultiplyBy4(column4x4) >> subsampling_x;
const int start_y = MultiplyBy4(row4x4) >> subsampling_y;
- const int plane_width = SubsampledValue(width_, subsampling_x);
- const int plane_height = SubsampledValue(height_, subsampling_y);
+ const int plane_width = SubsampledValue(frame_header_.width, subsampling_x);
+ const int plane_height = SubsampledValue(frame_header_.height, subsampling_y);
const int block_width = MultiplyBy4(block_width4x4) >> subsampling_x;
const int block_height = MultiplyBy4(block_height4x4) >> subsampling_y;
// unit_width, unit_height are the same as block_width, block_height unless
@@ -319,7 +319,7 @@ void PostFilter::ApplyCdefForOneUnit(uint16_t* cdef_block, const int index,
}
const bool is_frame_right =
- MultiplyBy4(column4x4_start) + MultiplyBy4(block_width4x4) >= width_;
+ MultiplyBy4(column4x4_start + block_width4x4) >= frame_header_.width;
if (!is_frame_right && thread_pool_ != nullptr) {
// Backup the last 2 columns for use in the next iteration.
use_border_columns[border_columns_dst_index][0] = true;
@@ -356,104 +356,111 @@ void PostFilter::ApplyCdefForOneUnit(uint16_t* cdef_block, const int index,
const bool compute_direction_and_variance =
(y_primary_strength | frame_header_.cdef.uv_primary_strength[index]) != 0;
- BlockParameters* const* bp_row0_base =
- block_parameters_.Address(row4x4_start, column4x4_start);
- BlockParameters* const* bp_row1_base =
- bp_row0_base + block_parameters_.columns4x4();
- const int bp_stride = MultiplyBy2(block_parameters_.columns4x4());
+ const uint8_t* skip_row =
+ &cdef_skip_[row4x4_start >> 1][column4x4_start >> 4];
+ const int skip_stride = cdef_skip_.columns();
int row4x4 = row4x4_start;
do {
uint8_t* cdef_buffer_base = cdef_buffer_row_base[kPlaneY];
const uint8_t* src_buffer_base = src_buffer_row_base[kPlaneY];
const uint16_t* cdef_src_base = cdef_src_row_base[kPlaneY];
- BlockParameters* const* bp0 = bp_row0_base;
- BlockParameters* const* bp1 = bp_row1_base;
int column4x4 = column4x4_start;
- do {
- const int block_width = kStep;
- const int block_height = kStep;
- const int cdef_stride = frame_buffer_.stride(kPlaneY);
- uint8_t* const cdef_buffer = cdef_buffer_base;
- const uint16_t* const cdef_src = cdef_src_base;
- const int src_stride = frame_buffer_.stride(kPlaneY);
- const uint8_t* const src_buffer = src_buffer_base;
-
- const bool skip = (*bp0)->skip && (*(bp0 + 1))->skip && (*bp1)->skip &&
- (*(bp1 + 1))->skip;
-
- if (skip) { // No cdef filtering.
+
+ if (*skip_row == 0) {
+ for (int i = 0; i < DivideBy2(block_width4x4); ++i, ++y_index) {
direction_y[y_index] = kCdefSkip;
- if (thread_pool_ == nullptr) {
- CopyPixels(src_buffer, src_stride, cdef_buffer, cdef_stride,
- block_width, block_height, sizeof(Pixel));
- }
- } else {
- // Zero out residual skip flag.
- direction_y[y_index] = 0;
-
- int variance = 0;
- if (compute_direction_and_variance) {
- if (thread_pool_ == nullptr ||
- row4x4 + kStep4x4 < row4x4_start + block_height4x4) {
- dsp_.cdef_direction(src_buffer, src_stride, &direction_y[y_index],
- &variance);
- } else if (sizeof(Pixel) == 2) {
- dsp_.cdef_direction(cdef_src, kCdefUnitSizeWithBorders * 2,
- &direction_y[y_index], &variance);
- } else {
- // If we are in the last row4x4 for this unit, then the last two
- // input rows have to come from |cdef_border_|. Since we already
- // have |cdef_src| populated correctly, use that as the input
- // for the direction process.
- uint8_t direction_src[8][8];
- const uint16_t* cdef_src_line = cdef_src;
- for (auto& direction_src_line : direction_src) {
- for (int i = 0; i < 8; ++i) {
- direction_src_line[i] = cdef_src_line[i];
- }
- cdef_src_line += kCdefUnitSizeWithBorders;
- }
- dsp_.cdef_direction(direction_src, 8, &direction_y[y_index],
- &variance);
- }
- }
- const int direction =
- (y_primary_strength == 0) ? 0 : direction_y[y_index];
- const int variance_strength =
- ((variance >> 6) != 0) ? std::min(FloorLog2(variance >> 6), 12) : 0;
- const uint8_t primary_strength =
- (variance != 0)
- ? (y_primary_strength * (4 + variance_strength) + 8) >> 4
- : 0;
- if ((primary_strength | y_secondary_strength) == 0) {
+ }
+ if (thread_pool_ == nullptr) {
+ CopyPixels(src_buffer_base, frame_buffer_.stride(kPlaneY),
+ cdef_buffer_base, frame_buffer_.stride(kPlaneY), 64, kStep,
+ sizeof(Pixel));
+ }
+ } else {
+ do {
+ const int block_width = kStep;
+ const int block_height = kStep;
+ const int cdef_stride = frame_buffer_.stride(kPlaneY);
+ uint8_t* const cdef_buffer = cdef_buffer_base;
+ const uint16_t* const cdef_src = cdef_src_base;
+ const int src_stride = frame_buffer_.stride(kPlaneY);
+ const uint8_t* const src_buffer = src_buffer_base;
+
+ const uint8_t skip_shift = (column4x4 >> 1) & 0x7;
+ const bool skip = ((*skip_row >> skip_shift) & 1) == 0;
+ if (skip) { // No cdef filtering.
+ direction_y[y_index] = kCdefSkip;
if (thread_pool_ == nullptr) {
CopyPixels(src_buffer, src_stride, cdef_buffer, cdef_stride,
block_width, block_height, sizeof(Pixel));
}
} else {
- const int strength_index =
- y_strength_index | (static_cast<int>(primary_strength == 0) << 1);
- dsp_.cdef_filters[1][strength_index](
- cdef_src, kCdefUnitSizeWithBorders, block_height,
- primary_strength, y_secondary_strength,
- frame_header_.cdef.damping, direction, cdef_buffer, cdef_stride);
+ // Zero out residual skip flag.
+ direction_y[y_index] = 0;
+
+ int variance = 0;
+ if (compute_direction_and_variance) {
+ if (thread_pool_ == nullptr ||
+ row4x4 + kStep4x4 < row4x4_start + block_height4x4) {
+ dsp_.cdef_direction(src_buffer, src_stride, &direction_y[y_index],
+ &variance);
+ } else if (sizeof(Pixel) == 2) {
+ dsp_.cdef_direction(cdef_src, kCdefUnitSizeWithBorders * 2,
+ &direction_y[y_index], &variance);
+ } else {
+ // If we are in the last row4x4 for this unit, then the last two
+ // input rows have to come from |cdef_border_|. Since we already
+ // have |cdef_src| populated correctly, use that as the input
+ // for the direction process.
+ uint8_t direction_src[8][8];
+ const uint16_t* cdef_src_line = cdef_src;
+ for (auto& direction_src_line : direction_src) {
+ for (int i = 0; i < 8; ++i) {
+ direction_src_line[i] = cdef_src_line[i];
+ }
+ cdef_src_line += kCdefUnitSizeWithBorders;
+ }
+ dsp_.cdef_direction(direction_src, 8, &direction_y[y_index],
+ &variance);
+ }
+ }
+ const int direction =
+ (y_primary_strength == 0) ? 0 : direction_y[y_index];
+ const int variance_strength =
+ ((variance >> 6) != 0) ? std::min(FloorLog2(variance >> 6), 12)
+ : 0;
+ const uint8_t primary_strength =
+ (variance != 0)
+ ? (y_primary_strength * (4 + variance_strength) + 8) >> 4
+ : 0;
+ if ((primary_strength | y_secondary_strength) == 0) {
+ if (thread_pool_ == nullptr) {
+ CopyPixels(src_buffer, src_stride, cdef_buffer, cdef_stride,
+ block_width, block_height, sizeof(Pixel));
+ }
+ } else {
+ const int strength_index =
+ y_strength_index |
+ (static_cast<int>(primary_strength == 0) << 1);
+ dsp_.cdef_filters[1][strength_index](
+ cdef_src, kCdefUnitSizeWithBorders, block_height,
+ primary_strength, y_secondary_strength,
+ frame_header_.cdef.damping, direction, cdef_buffer,
+ cdef_stride);
+ }
}
- }
- cdef_buffer_base += column_step[kPlaneY];
- src_buffer_base += column_step[kPlaneY];
- cdef_src_base += column_step[kPlaneY] / sizeof(Pixel);
+ cdef_buffer_base += column_step[kPlaneY];
+ src_buffer_base += column_step[kPlaneY];
+ cdef_src_base += column_step[kPlaneY] / sizeof(Pixel);
- bp0 += kStep4x4;
- bp1 += kStep4x4;
- column4x4 += kStep4x4;
- y_index++;
- } while (column4x4 < column4x4_start + block_width4x4);
+ column4x4 += kStep4x4;
+ y_index++;
+ } while (column4x4 < column4x4_start + block_width4x4);
+ }
cdef_buffer_row_base[kPlaneY] += cdef_buffer_row_base_stride[kPlaneY];
src_buffer_row_base[kPlaneY] += src_buffer_row_base_stride[kPlaneY];
cdef_src_row_base[kPlaneY] += cdef_src_row_base_stride[kPlaneY];
- bp_row0_base += bp_stride;
- bp_row1_base += bp_stride;
+ skip_row += skip_stride;
row4x4 += kStep4x4;
} while (row4x4 < row4x4_start + block_height4x4);
@@ -591,9 +598,12 @@ void PostFilter::ApplyCdefForOneSuperBlockRowHelper(
uint16_t* cdef_block, uint8_t border_columns[2][kMaxPlanes][256],
int row4x4, int block_height4x4) {
bool use_border_columns[2][2] = {};
- for (int column4x4 = 0; column4x4 < frame_header_.columns4x4;
- column4x4 += kStep64x64) {
- const int index = cdef_index_[DivideBy16(row4x4)][DivideBy16(column4x4)];
+ const bool non_zero_index = frame_header_.cdef.bits > 0;
+ const int8_t* cdef_index =
+ non_zero_index ? cdef_index_[DivideBy16(row4x4)] : nullptr;
+ int column4x4 = 0;
+ do {
+ const int index = non_zero_index ? *cdef_index++ : 0;
const int block_width4x4 =
std::min(kStep64x64, frame_header_.columns4x4 - column4x4);
@@ -602,29 +612,32 @@ void PostFilter::ApplyCdefForOneSuperBlockRowHelper(
ApplyCdefForOneUnit<uint16_t>(cdef_block, index, block_width4x4,
block_height4x4, row4x4, column4x4,
border_columns, use_border_columns);
- continue;
+ } else // NOLINT
+#endif // LIBGAV1_MAX_BITDEPTH >= 10
+ {
+ ApplyCdefForOneUnit<uint8_t>(cdef_block, index, block_width4x4,
+ block_height4x4, row4x4, column4x4,
+ border_columns, use_border_columns);
}
-#endif // LIBGAV1_MAX_BITDEPTH >= 10
- ApplyCdefForOneUnit<uint8_t>(cdef_block, index, block_width4x4,
- block_height4x4, row4x4, column4x4,
- border_columns, use_border_columns);
- }
+ column4x4 += kStep64x64;
+ } while (column4x4 < frame_header_.columns4x4);
}
void PostFilter::ApplyCdefForOneSuperBlockRow(int row4x4_start, int sb4x4,
bool is_last_row) {
assert(row4x4_start >= 0);
assert(DoCdef());
- for (int y = 0; y < sb4x4; y += kStep64x64) {
- const int row4x4 = row4x4_start + y;
+ int row4x4 = row4x4_start;
+ const int row4x4_limit = row4x4_start + sb4x4;
+ do {
if (row4x4 >= frame_header_.rows4x4) return;
// Apply cdef for the last 8 rows of the previous superblock row.
// One exception: If the superblock size is 128x128 and is_last_row is true,
// then we simply apply cdef for the entire superblock row without any lag.
// In that case, apply cdef for the previous superblock row only during the
- // first iteration (y == 0).
- if (row4x4 > 0 && (!is_last_row || y == 0)) {
+ // first iteration (row4x4 == row4x4_start).
+ if (row4x4 > 0 && (!is_last_row || row4x4 == row4x4_start)) {
assert(row4x4 >= 16);
ApplyCdefForOneSuperBlockRowHelper(cdef_block_, nullptr, row4x4 - 2, 2);
}
@@ -639,7 +652,8 @@ void PostFilter::ApplyCdefForOneSuperBlockRow(int row4x4_start, int sb4x4,
ApplyCdefForOneSuperBlockRowHelper(cdef_block_, nullptr, row4x4,
height4x4);
}
- }
+ row4x4 += kStep64x64;
+ } while (row4x4 < row4x4_limit);
}
void PostFilter::ApplyCdefWorker(std::atomic<int>* row4x4_atomic) {
diff --git a/src/post_filter/deblock.cc b/src/post_filter/deblock.cc
index 9b5ed0f..48ad823 100644
--- a/src/post_filter/deblock.cc
+++ b/src/post_filter/deblock.cc
@@ -101,9 +101,9 @@ void PostFilter::ComputeDeblockFilterLevels(
uint8_t deblock_filter_levels[kMaxSegments][kFrameLfCount]
[kNumReferenceFrameTypes][2]) const {
if (!DoDeblock()) return;
- for (int segment_id = 0;
- segment_id < (frame_header_.segmentation.enabled ? kMaxSegments : 1);
- ++segment_id) {
+ const int num_segments =
+ frame_header_.segmentation.enabled ? kMaxSegments : 1;
+ for (int segment_id = 0; segment_id < num_segments; ++segment_id) {
int level_index = 0;
for (; level_index < 2; ++level_index) {
ComputeDeblockFilterLevelsHelper(
@@ -295,8 +295,13 @@ void PostFilter::GetVerticalDeblockFilterEdgeInfoUV(
*filter_length = std::min(*step, step_prev);
}
-void PostFilter::HorizontalDeblockFilter(int row4x4_start,
- int column4x4_start) {
+void PostFilter::HorizontalDeblockFilter(int row4x4_start, int row4x4_end,
+ int column4x4_start,
+ int column4x4_end) {
+ const int height4x4 = row4x4_end - row4x4_start;
+ const int width4x4 = column4x4_end - column4x4_start;
+ if (height4x4 <= 0 || width4x4 <= 0) return;
+
const int column_step = 1;
const int src_step = 4 << pixel_size_log2_;
const ptrdiff_t src_stride = frame_buffer_.stride(kPlaneY);
@@ -305,17 +310,20 @@ void PostFilter::HorizontalDeblockFilter(int row4x4_start,
uint8_t level;
int filter_length;
- for (int column4x4 = 0; column4x4 < kNum4x4InLoopFilterUnit &&
- MultiplyBy4(column4x4_start + column4x4) < width_;
+ const int width = frame_header_.width;
+ const int height = frame_header_.height;
+ for (int column4x4 = 0;
+ column4x4 < width4x4 && MultiplyBy4(column4x4_start + column4x4) < width;
column4x4 += column_step, src += src_step) {
uint8_t* src_row = src;
- for (int row4x4 = 0; row4x4 < kNum4x4InLoopFilterUnit &&
- MultiplyBy4(row4x4_start + row4x4) < height_;
+ for (int row4x4 = 0;
+ row4x4 < height4x4 && MultiplyBy4(row4x4_start + row4x4) < height;
row4x4 += row_step) {
const bool need_filter = GetHorizontalDeblockFilterEdgeInfo(
row4x4_start + row4x4, column4x4_start + column4x4, &level, &row_step,
&filter_length);
if (need_filter) {
+ assert(level > 0 && level <= kMaxLoopFilterValue);
const dsp::LoopFilterSize size = GetLoopFilterSizeY(filter_length);
dsp_.loop_filters[size][kLoopFilterTypeHorizontal](
src_row, src_stride, outer_thresh_[level], inner_thresh_[level],
@@ -340,13 +348,13 @@ void PostFilter::HorizontalDeblockFilter(int row4x4_start,
uint8_t level_v;
int filter_length;
- for (int column4x4 = 0; column4x4 < kNum4x4InLoopFilterUnit &&
- MultiplyBy4(column4x4_start + column4x4) < width_;
+ for (int column4x4 = 0; column4x4 < width4x4 &&
+ MultiplyBy4(column4x4_start + column4x4) < width;
column4x4 += column_step, src_u += src_step, src_v += src_step) {
uint8_t* src_row_u = src_u;
uint8_t* src_row_v = src_v;
- for (int row4x4 = 0; row4x4 < kNum4x4InLoopFilterUnit &&
- MultiplyBy4(row4x4_start + row4x4) < height_;
+ for (int row4x4 = 0;
+ row4x4 < height4x4 && MultiplyBy4(row4x4_start + row4x4) < height;
row4x4 += row_step) {
GetHorizontalDeblockFilterEdgeInfoUV(
row4x4_start + row4x4, column4x4_start + column4x4, &level_u,
@@ -371,7 +379,12 @@ void PostFilter::HorizontalDeblockFilter(int row4x4_start,
}
}
-void PostFilter::VerticalDeblockFilter(int row4x4_start, int column4x4_start) {
+void PostFilter::VerticalDeblockFilter(int row4x4_start, int row4x4_end,
+ int column4x4_start, int column4x4_end) {
+ const int height4x4 = row4x4_end - row4x4_start;
+ const int width4x4 = column4x4_end - column4x4_start;
+ if (height4x4 <= 0 || width4x4 <= 0) return;
+
const ptrdiff_t row_stride = MultiplyBy4(frame_buffer_.stride(kPlaneY));
const ptrdiff_t src_stride = frame_buffer_.stride(kPlaneY);
uint8_t* src = GetSourceBuffer(kPlaneY, row4x4_start, column4x4_start);
@@ -383,18 +396,21 @@ void PostFilter::VerticalDeblockFilter(int row4x4_start, int column4x4_start) {
block_parameters_.Address(row4x4_start, column4x4_start);
const int bp_stride = block_parameters_.columns4x4();
const int column_step_shift = pixel_size_log2_;
- for (int row4x4 = 0; row4x4 < kNum4x4InLoopFilterUnit &&
- MultiplyBy4(row4x4_start + row4x4) < height_;
+ const int width = frame_header_.width;
+ const int height = frame_header_.height;
+ for (int row4x4 = 0;
+ row4x4 < height4x4 && MultiplyBy4(row4x4_start + row4x4) < height;
++row4x4, src += row_stride, bp_row_base += bp_stride) {
uint8_t* src_row = src;
BlockParameters* const* bp = bp_row_base;
- for (int column4x4 = 0; column4x4 < kNum4x4InLoopFilterUnit &&
- MultiplyBy4(column4x4_start + column4x4) < width_;
+ for (int column4x4 = 0; column4x4 < width4x4 &&
+ MultiplyBy4(column4x4_start + column4x4) < width;
column4x4 += column_step, bp += column_step) {
const bool need_filter = GetVerticalDeblockFilterEdgeInfo(
row4x4_start + row4x4, column4x4_start + column4x4, bp, &level,
&column_step, &filter_length);
if (need_filter) {
+ assert(level > 0 && level <= kMaxLoopFilterValue);
const dsp::LoopFilterSize size = GetLoopFilterSizeY(filter_length);
dsp_.loop_filters[size][kLoopFilterTypeVertical](
src_row, src_stride, outer_thresh_[level], inner_thresh_[level],
@@ -425,15 +441,15 @@ void PostFilter::VerticalDeblockFilter(int row4x4_start, int column4x4_start) {
GetDeblockPosition(row4x4_start, subsampling_y),
GetDeblockPosition(column4x4_start, subsampling_x));
const int bp_stride = block_parameters_.columns4x4() << subsampling_y;
- for (int row4x4 = 0; row4x4 < kNum4x4InLoopFilterUnit &&
- MultiplyBy4(row4x4_start + row4x4) < height_;
+ for (int row4x4 = 0;
+ row4x4 < height4x4 && MultiplyBy4(row4x4_start + row4x4) < height;
row4x4 += row_step, src_u += row_stride_u, src_v += row_stride_v,
bp_row_base += bp_stride) {
uint8_t* src_row_u = src_u;
uint8_t* src_row_v = src_v;
BlockParameters* const* bp = bp_row_base;
- for (int column4x4 = 0; column4x4 < kNum4x4InLoopFilterUnit &&
- MultiplyBy4(column4x4_start + column4x4) < width_;
+ for (int column4x4 = 0; column4x4 < width4x4 &&
+ MultiplyBy4(column4x4_start + column4x4) < width;
column4x4 += column_step, bp += column_step) {
GetVerticalDeblockFilterEdgeInfoUV(column4x4_start + column4x4, bp,
&level_u, &level_v, &column_step,
@@ -458,39 +474,15 @@ void PostFilter::VerticalDeblockFilter(int row4x4_start, int column4x4_start) {
}
}
-void PostFilter::ApplyDeblockFilterForOneSuperBlockRow(int row4x4_start,
- int sb4x4) {
- assert(row4x4_start >= 0);
- assert(DoDeblock());
- for (int y = 0; y < sb4x4; y += 16) {
- const int row4x4 = row4x4_start + y;
- if (row4x4 >= frame_header_.rows4x4) break;
- int column4x4;
- for (column4x4 = 0; column4x4 < frame_header_.columns4x4;
- column4x4 += kNum4x4InLoopFilterUnit) {
- // First apply vertical filtering
- VerticalDeblockFilter(row4x4, column4x4);
-
- // Delay one superblock to apply horizontal filtering.
- if (column4x4 != 0) {
- HorizontalDeblockFilter(row4x4, column4x4 - kNum4x4InLoopFilterUnit);
- }
- }
- // Horizontal filtering for the last 64x64 block.
- HorizontalDeblockFilter(row4x4, column4x4 - kNum4x4InLoopFilterUnit);
- }
-}
-
template <LoopFilterType loop_filter_type>
void PostFilter::DeblockFilterWorker(std::atomic<int>* row4x4_atomic) {
+ const int rows4x4 = frame_header_.rows4x4;
+ const int columns4x4 = frame_header_.columns4x4;
int row4x4;
- while ((row4x4 = row4x4_atomic->fetch_add(kNum4x4InLoopFilterUnit,
- std::memory_order_relaxed)) <
- frame_header_.rows4x4) {
- for (int column4x4 = 0; column4x4 < frame_header_.columns4x4;
- column4x4 += kNum4x4InLoopFilterUnit) {
- (this->*deblock_filter_func_[loop_filter_type])(row4x4, column4x4);
- }
+ while ((row4x4 = row4x4_atomic->fetch_add(
+ kNum4x4InLoopFilterUnit, std::memory_order_relaxed)) < rows4x4) {
+ (this->*deblock_filter_func_[loop_filter_type])(
+ row4x4, row4x4 + kNum4x4InLoopFilterUnit, 0, columns4x4);
}
}
@@ -504,20 +496,12 @@ void PostFilter::ApplyDeblockFilter(LoopFilterType loop_filter_type,
int column4x4_end, int sb4x4) {
assert(row4x4_start >= 0);
assert(DoDeblock());
-
- column4x4_end = std::min(column4x4_end, frame_header_.columns4x4);
+ column4x4_end =
+ std::min(Align(column4x4_end, static_cast<int>(kNum4x4InLoopFilterUnit)),
+ frame_header_.columns4x4);
if (column4x4_start >= column4x4_end) return;
-
- const DeblockFilter deblock_filter = deblock_filter_func_[loop_filter_type];
- const int sb_height4x4 =
- std::min(sb4x4, frame_header_.rows4x4 - row4x4_start);
- for (int y = 0; y < sb_height4x4; y += kNum4x4InLoopFilterUnit) {
- const int row4x4 = row4x4_start + y;
- for (int column4x4 = column4x4_start; column4x4 < column4x4_end;
- column4x4 += kNum4x4InLoopFilterUnit) {
- (this->*deblock_filter)(row4x4, column4x4);
- }
- }
+ (this->*deblock_filter_func_[loop_filter_type])(
+ row4x4_start, row4x4_start + sb4x4, column4x4_start, column4x4_end);
}
} // namespace libgav1
diff --git a/src/post_filter/loop_restoration.cc b/src/post_filter/loop_restoration.cc
index 826ef48..2e6982c 100644
--- a/src/post_filter/loop_restoration.cc
+++ b/src/post_filter/loop_restoration.cc
@@ -101,6 +101,8 @@ void PostFilter::ApplyLoopRestorationForOneSuperBlockRow(const int row4x4_start,
assert(row4x4_start >= 0);
assert(DoRestoration());
int plane = kPlaneY;
+ const int upscaled_width = frame_header_.upscaled_width;
+ const int height = frame_header_.height;
do {
if (loop_restoration_.type[plane] == kLoopRestorationTypeNone) {
continue;
@@ -108,9 +110,9 @@ void PostFilter::ApplyLoopRestorationForOneSuperBlockRow(const int row4x4_start,
const ptrdiff_t stride = frame_buffer_.stride(plane) / sizeof(Pixel);
const int unit_height_offset =
kRestorationUnitOffset >> subsampling_y_[plane];
- const int plane_height = SubsampledValue(height_, subsampling_y_[plane]);
+ const int plane_height = SubsampledValue(height, subsampling_y_[plane]);
const int plane_width =
- SubsampledValue(upscaled_width_, subsampling_x_[plane]);
+ SubsampledValue(upscaled_width, subsampling_x_[plane]);
const int plane_unit_size = 1 << loop_restoration_.unit_size_log2[plane];
const int plane_process_unit_height =
kRestorationUnitHeight >> subsampling_y_[plane];
diff --git a/src/post_filter/post_filter.cc b/src/post_filter/post_filter.cc
index 7671f01..bc71410 100644
--- a/src/post_filter/post_filter.cc
+++ b/src/post_filter/post_filter.cc
@@ -26,6 +26,7 @@
#include "src/utils/array_2d.h"
#include "src/utils/blocking_counter.h"
#include "src/utils/common.h"
+#include "src/utils/compiler_attributes.h"
#include "src/utils/constants.h"
#include "src/utils/memory.h"
#include "src/utils/types.h"
@@ -43,101 +44,6 @@ constexpr int kLoopRestorationBorderRows[2] = {54, 26};
} // namespace
-// The following example illustrates how ExtendFrame() extends a frame.
-// Suppose the frame width is 8 and height is 4, and left, right, top, and
-// bottom are all equal to 3.
-//
-// Before:
-//
-// ABCDEFGH
-// IJKLMNOP
-// QRSTUVWX
-// YZabcdef
-//
-// After:
-//
-// AAA|ABCDEFGH|HHH [3]
-// AAA|ABCDEFGH|HHH
-// AAA|ABCDEFGH|HHH
-// ---+--------+---
-// AAA|ABCDEFGH|HHH [1]
-// III|IJKLMNOP|PPP
-// QQQ|QRSTUVWX|XXX
-// YYY|YZabcdef|fff
-// ---+--------+---
-// YYY|YZabcdef|fff [2]
-// YYY|YZabcdef|fff
-// YYY|YZabcdef|fff
-//
-// ExtendFrame() first extends the rows to the left and to the right[1]. Then
-// it copies the extended last row to the bottom borders[2]. Finally it copies
-// the extended first row to the top borders[3].
-// static
-template <typename Pixel>
-void PostFilter::ExtendFrame(Pixel* const frame_start, const int width,
- const int height, const ptrdiff_t stride,
- const int left, const int right, const int top,
- const int bottom) {
- Pixel* src = frame_start;
- // Copy to left and right borders.
- int y = height;
- do {
- ExtendLine<Pixel>(src, width, left, right);
- src += stride;
- } while (--y != 0);
- // Copy to bottom borders. For performance we copy |stride| pixels
- // (including some padding pixels potentially) in each row, ending at the
- // bottom right border pixel. In the diagram the asterisks indicate padding
- // pixels.
- //
- // |<--- stride --->|
- // **YYY|YZabcdef|fff <-- Copy from the extended last row.
- // -----+--------+---
- // **YYY|YZabcdef|fff
- // **YYY|YZabcdef|fff
- // **YYY|YZabcdef|fff <-- bottom right border pixel
- assert(src == frame_start + height * stride);
- Pixel* dst = src - left;
- src = dst - stride;
- for (int y = 0; y < bottom; ++y) {
- memcpy(dst, src, sizeof(Pixel) * stride);
- dst += stride;
- }
- // Copy to top borders. For performance we copy |stride| pixels (including
- // some padding pixels potentially) in each row, starting from the top left
- // border pixel. In the diagram the asterisks indicate padding pixels.
- //
- // +-- top left border pixel
- // |
- // v
- // AAA|ABCDEFGH|HHH**
- // AAA|ABCDEFGH|HHH**
- // AAA|ABCDEFGH|HHH**
- // ---+--------+-----
- // AAA|ABCDEFGH|HHH** <-- Copy from the extended first row.
- // |<--- stride --->|
- src = frame_start - left;
- dst = frame_start - left - top * stride;
- for (int y = 0; y < top; ++y) {
- memcpy(dst, src, sizeof(Pixel) * stride);
- dst += stride;
- }
-}
-
-template void PostFilter::ExtendFrame<uint8_t>(uint8_t* const frame_start,
- const int width,
- const int height,
- const ptrdiff_t stride,
- const int left, const int right,
- const int top, const int bottom);
-
-#if LIBGAV1_MAX_BITDEPTH >= 10
-template void PostFilter::ExtendFrame<uint16_t>(
- uint16_t* const frame_start, const int width, const int height,
- const ptrdiff_t stride, const int left, const int right, const int top,
- const int bottom);
-#endif
-
PostFilter::PostFilter(const ObuFrameHeader& frame_header,
const ObuSequenceHeader& sequence_header,
FrameScratchBuffer* const frame_scratch_buffer,
@@ -146,11 +52,6 @@ PostFilter::PostFilter(const ObuFrameHeader& frame_header,
: frame_header_(frame_header),
loop_restoration_(frame_header.loop_restoration),
dsp_(*dsp),
- // Deblocking filter always uses 64x64 as step size.
- num_64x64_blocks_per_row_(DivideBy64(frame_header.width + 63)),
- upscaled_width_(frame_header.upscaled_width),
- width_(frame_header.width),
- height_(frame_header.height),
bitdepth_(sequence_header.color_config.bitdepth),
subsampling_x_{0, sequence_header.color_config.subsampling_x,
sequence_header.color_config.subsampling_x},
@@ -165,7 +66,13 @@ PostFilter::PostFilter(const ObuFrameHeader& frame_header,
outer_thresh_(kOuterThresh[frame_header.loop_filter.sharpness]),
needs_chroma_deblock_(frame_header.loop_filter.level[kPlaneU + 1] != 0 ||
frame_header.loop_filter.level[kPlaneV + 1] != 0),
+ do_cdef_(DoCdef(frame_header, do_post_filter_mask)),
+ do_deblock_(DoDeblock(frame_header, do_post_filter_mask)),
+ do_restoration_(
+ DoRestoration(loop_restoration_, do_post_filter_mask, planes_)),
+ do_superres_(DoSuperRes(frame_header, do_post_filter_mask)),
cdef_index_(frame_scratch_buffer->cdef_index),
+ cdef_skip_(frame_scratch_buffer->cdef_skip),
inter_transform_sizes_(frame_scratch_buffer->inter_transform_sizes),
restoration_info_(&frame_scratch_buffer->loop_restoration_info),
superres_coefficients_{
@@ -182,18 +89,19 @@ PostFilter::PostFilter(const ObuFrameHeader& frame_header,
frame_buffer_(*frame_buffer),
cdef_border_(frame_scratch_buffer->cdef_border),
loop_restoration_border_(frame_scratch_buffer->loop_restoration_border),
- do_post_filter_mask_(do_post_filter_mask),
thread_pool_(
frame_scratch_buffer->threading_strategy.post_filter_thread_pool()) {
const int8_t zero_delta_lf[kFrameLfCount] = {};
ComputeDeblockFilterLevels(zero_delta_lf, deblock_filter_levels_);
if (DoSuperRes()) {
int plane = kPlaneY;
+ const int width = frame_header_.width;
+ const int upscaled_width_fh = frame_header_.upscaled_width;
do {
const int downscaled_width =
- SubsampledValue(width_, subsampling_x_[plane]);
+ SubsampledValue(width, subsampling_x_[plane]);
const int upscaled_width =
- SubsampledValue(upscaled_width_, subsampling_x_[plane]);
+ SubsampledValue(upscaled_width_fh, subsampling_x_[plane]);
const int superres_width = downscaled_width << kSuperResScaleBits;
super_res_info_[plane].step =
(superres_width + upscaled_width / 2) / upscaled_width;
@@ -214,10 +122,10 @@ PostFilter::PostFilter(const ObuFrameHeader& frame_header,
? kMaxPlanesMonochrome
: static_cast<int>(kNumPlaneTypes);
do {
- dsp->super_res_coefficients(
- SubsampledValue(upscaled_width_, subsampling_x_[plane]),
- super_res_info_[plane].initial_subpixel_x,
- super_res_info_[plane].step, superres_coefficients_[plane]);
+ dsp->super_res_coefficients(super_res_info_[plane].upscaled_width,
+ super_res_info_[plane].initial_subpixel_x,
+ super_res_info_[plane].step,
+ superres_coefficients_[plane]);
} while (++plane < number_loops);
}
}
@@ -261,6 +169,101 @@ PostFilter::PostFilter(const ObuFrameHeader& frame_header,
}
}
+// The following example illustrates how ExtendFrame() extends a frame.
+// Suppose the frame width is 8 and height is 4, and left, right, top, and
+// bottom are all equal to 3.
+//
+// Before:
+//
+// ABCDEFGH
+// IJKLMNOP
+// QRSTUVWX
+// YZabcdef
+//
+// After:
+//
+// AAA|ABCDEFGH|HHH [3]
+// AAA|ABCDEFGH|HHH
+// AAA|ABCDEFGH|HHH
+// ---+--------+---
+// AAA|ABCDEFGH|HHH [1]
+// III|IJKLMNOP|PPP
+// QQQ|QRSTUVWX|XXX
+// YYY|YZabcdef|fff
+// ---+--------+---
+// YYY|YZabcdef|fff [2]
+// YYY|YZabcdef|fff
+// YYY|YZabcdef|fff
+//
+// ExtendFrame() first extends the rows to the left and to the right[1]. Then
+// it copies the extended last row to the bottom borders[2]. Finally it copies
+// the extended first row to the top borders[3].
+// static
+template <typename Pixel>
+void PostFilter::ExtendFrame(Pixel* const frame_start, const int width,
+ const int height, const ptrdiff_t stride,
+ const int left, const int right, const int top,
+ const int bottom) {
+ Pixel* src = frame_start;
+ // Copy to left and right borders.
+ int y = height;
+ do {
+ ExtendLine<Pixel>(src, width, left, right);
+ src += stride;
+ } while (--y != 0);
+ // Copy to bottom borders. For performance we copy |stride| pixels
+ // (including some padding pixels potentially) in each row, ending at the
+ // bottom right border pixel. In the diagram the asterisks indicate padding
+ // pixels.
+ //
+ // |<--- stride --->|
+ // **YYY|YZabcdef|fff <-- Copy from the extended last row.
+ // -----+--------+---
+ // **YYY|YZabcdef|fff
+ // **YYY|YZabcdef|fff
+ // **YYY|YZabcdef|fff <-- bottom right border pixel
+ assert(src == frame_start + height * stride);
+ Pixel* dst = src - left;
+ src = dst - stride;
+ for (int y = 0; y < bottom; ++y) {
+ memcpy(dst, src, sizeof(Pixel) * stride);
+ dst += stride;
+ }
+ // Copy to top borders. For performance we copy |stride| pixels (including
+ // some padding pixels potentially) in each row, starting from the top left
+ // border pixel. In the diagram the asterisks indicate padding pixels.
+ //
+ // +-- top left border pixel
+ // |
+ // v
+ // AAA|ABCDEFGH|HHH**
+ // AAA|ABCDEFGH|HHH**
+ // AAA|ABCDEFGH|HHH**
+ // ---+--------+-----
+ // AAA|ABCDEFGH|HHH** <-- Copy from the extended first row.
+ // |<--- stride --->|
+ src = frame_start - left;
+ dst = frame_start - left - top * stride;
+ for (int y = 0; y < top; ++y) {
+ memcpy(dst, src, sizeof(Pixel) * stride);
+ dst += stride;
+ }
+}
+
+template void PostFilter::ExtendFrame<uint8_t>(uint8_t* const frame_start,
+ const int width,
+ const int height,
+ const ptrdiff_t stride,
+ const int left, const int right,
+ const int top, const int bottom);
+
+#if LIBGAV1_MAX_BITDEPTH >= 10
+template void PostFilter::ExtendFrame<uint16_t>(
+ uint16_t* const frame_start, const int width, const int height,
+ const ptrdiff_t stride, const int left, const int right, const int top,
+ const int bottom);
+#endif
+
void PostFilter::ExtendFrameBoundary(uint8_t* const frame_start,
const int width, const int height,
const ptrdiff_t stride, const int left,
@@ -269,8 +272,7 @@ void PostFilter::ExtendFrameBoundary(uint8_t* const frame_start,
#if LIBGAV1_MAX_BITDEPTH >= 10
if (bitdepth_ >= 10) {
ExtendFrame<uint16_t>(reinterpret_cast<uint16_t*>(frame_start), width,
- height, stride / sizeof(uint16_t), left, right, top,
- bottom);
+ height, stride >> 1, left, right, top, bottom);
return;
}
#endif
@@ -280,11 +282,13 @@ void PostFilter::ExtendFrameBoundary(uint8_t* const frame_start,
void PostFilter::ExtendBordersForReferenceFrame() {
if (frame_header_.refresh_frame_flags == 0) return;
+ const int upscaled_width = frame_header_.upscaled_width;
+ const int height = frame_header_.height;
int plane = kPlaneY;
do {
const int plane_width =
- SubsampledValue(upscaled_width_, subsampling_x_[plane]);
- const int plane_height = SubsampledValue(height_, subsampling_y_[plane]);
+ SubsampledValue(upscaled_width, subsampling_x_[plane]);
+ const int plane_height = SubsampledValue(height, subsampling_y_[plane]);
assert(frame_buffer_.left_border(plane) >= kMinLeftBorderPixels &&
frame_buffer_.right_border(plane) >= kMinRightBorderPixels &&
frame_buffer_.top_border(plane) >= kMinTopBorderPixels &&
@@ -343,11 +347,13 @@ void PostFilter::CopyBordersForOneSuperBlockRow(int row4x4, int sb4x4,
// needs 2 extra rows for the bottom border in each plane.
const int extra_rows =
(for_loop_restoration && thread_pool_ == nullptr && !DoCdef()) ? 2 : 0;
+ const int upscaled_width = frame_header_.upscaled_width;
+ const int height = frame_header_.height;
int plane = kPlaneY;
do {
const int plane_width =
- SubsampledValue(upscaled_width_, subsampling_x_[plane]);
- const int plane_height = SubsampledValue(height_, subsampling_y_[plane]);
+ SubsampledValue(upscaled_width, subsampling_x_[plane]);
+ const int plane_height = SubsampledValue(height, subsampling_y_[plane]);
const int row = (MultiplyBy4(row4x4) - row_offset) >> subsampling_y_[plane];
assert(row >= 0);
if (row >= plane_height) break;
@@ -362,16 +368,25 @@ void PostFilter::CopyBordersForOneSuperBlockRow(int row4x4, int sb4x4,
progress_row_ = row + num_rows;
}
const bool copy_bottom = row + num_rows == plane_height;
- const int stride = frame_buffer_.stride(plane);
+ const ptrdiff_t stride = frame_buffer_.stride(plane);
uint8_t* const start = (for_loop_restoration ? superres_buffer_[plane]
: frame_buffer_.data(plane)) +
row * stride;
const int left_border = for_loop_restoration
? kRestorationHorizontalBorder
: frame_buffer_.left_border(plane);
+#if LIBGAV1_MSAN
+ // The optimized loop restoration code will overread the visible frame
+ // buffer into the right border. Extend the right boundary further to
+ // prevent msan warnings.
+ const int right_border = for_loop_restoration
+ ? kRestorationHorizontalBorder + 16
+ : frame_buffer_.right_border(plane);
+#else
const int right_border = for_loop_restoration
? kRestorationHorizontalBorder
: frame_buffer_.right_border(plane);
+#endif
const int top_border =
(row == 0) ? (for_loop_restoration ? kRestorationVerticalBorder
: frame_buffer_.top_border(plane))
@@ -390,6 +405,8 @@ void PostFilter::SetupLoopRestorationBorder(const int row4x4) {
assert(row4x4 >= 0);
assert(!DoCdef());
assert(DoRestoration());
+ const int upscaled_width = frame_header_.upscaled_width;
+ const int height = frame_header_.height;
int plane = kPlaneY;
do {
if (loop_restoration_.type[plane] == kLoopRestorationTypeNone) {
@@ -397,9 +414,9 @@ void PostFilter::SetupLoopRestorationBorder(const int row4x4) {
}
const int row_offset = DivideBy4(row4x4);
const int num_pixels =
- SubsampledValue(upscaled_width_, subsampling_x_[plane]);
+ SubsampledValue(upscaled_width, subsampling_x_[plane]);
const int row_width = num_pixels << pixel_size_log2_;
- const int plane_height = SubsampledValue(height_, subsampling_y_[plane]);
+ const int plane_height = SubsampledValue(height, subsampling_y_[plane]);
const int row = kLoopRestorationBorderRows[subsampling_y_[plane]];
const int absolute_row =
(MultiplyBy4(row4x4) >> subsampling_y_[plane]) + row;
@@ -437,30 +454,33 @@ void PostFilter::SetupLoopRestorationBorder(int row4x4_start, int sb4x4) {
const int row_offset_start = DivideBy4(row4x4);
const std::array<uint8_t*, kMaxPlanes> dst = {
loop_restoration_border_.data(kPlaneY) +
- row_offset_start * loop_restoration_border_.stride(kPlaneY),
+ row_offset_start * static_cast<ptrdiff_t>(
+ loop_restoration_border_.stride(kPlaneY)),
loop_restoration_border_.data(kPlaneU) +
- row_offset_start * loop_restoration_border_.stride(kPlaneU),
+ row_offset_start * static_cast<ptrdiff_t>(
+ loop_restoration_border_.stride(kPlaneU)),
loop_restoration_border_.data(kPlaneV) +
- row_offset_start * loop_restoration_border_.stride(kPlaneV)};
+ row_offset_start * static_cast<ptrdiff_t>(
+ loop_restoration_border_.stride(kPlaneV))};
// If SuperRes is enabled, then we apply SuperRes for the rows to be copied
// directly with |loop_restoration_border_| as the destination. Otherwise,
// we simply copy the rows.
if (DoSuperRes()) {
std::array<uint8_t*, kMaxPlanes> src;
std::array<int, kMaxPlanes> rows;
+ const int height = frame_header_.height;
int plane = kPlaneY;
do {
if (loop_restoration_.type[plane] == kLoopRestorationTypeNone) {
rows[plane] = 0;
continue;
}
- const int plane_height =
- SubsampledValue(frame_header_.height, subsampling_y_[plane]);
+ const int plane_height = SubsampledValue(height, subsampling_y_[plane]);
const int row = kLoopRestorationBorderRows[subsampling_y_[plane]];
const int absolute_row =
(MultiplyBy4(row4x4) >> subsampling_y_[plane]) + row;
src[plane] = GetSourceBuffer(static_cast<Plane>(plane), row4x4, 0) +
- row * frame_buffer_.stride(plane);
+ row * static_cast<ptrdiff_t>(frame_buffer_.stride(plane));
rows[plane] = Clip3(plane_height - absolute_row, 0, 4);
} while (++plane < planes_);
ApplySuperRes(src, rows, /*line_buffer_row=*/-1, dst,
@@ -487,6 +507,7 @@ void PostFilter::SetupLoopRestorationBorder(int row4x4_start, int sb4x4) {
} while (++plane < planes_);
}
// Extend the left and right boundaries needed for loop restoration.
+ const int upscaled_width = frame_header_.upscaled_width;
int plane = kPlaneY;
do {
if (loop_restoration_.type[plane] == kLoopRestorationTypeNone) {
@@ -494,7 +515,7 @@ void PostFilter::SetupLoopRestorationBorder(int row4x4_start, int sb4x4) {
}
uint8_t* dst_line = dst[plane];
const int plane_width =
- SubsampledValue(upscaled_width_, subsampling_x_[plane]);
+ SubsampledValue(upscaled_width, subsampling_x_[plane]);
for (int i = 0; i < 4; ++i) {
#if LIBGAV1_MAX_BITDEPTH >= 10
if (bitdepth_ >= 10) {
@@ -567,7 +588,9 @@ int PostFilter::ApplyFilteringForOneSuperBlockRow(int row4x4, int sb4x4,
bool do_deblock) {
if (row4x4 < 0) return -1;
if (DoDeblock() && do_deblock) {
- ApplyDeblockFilterForOneSuperBlockRow(row4x4, sb4x4);
+ VerticalDeblockFilter(row4x4, row4x4 + sb4x4, 0, frame_header_.columns4x4);
+ HorizontalDeblockFilter(row4x4, row4x4 + sb4x4, 0,
+ frame_header_.columns4x4);
}
if (DoRestoration() && DoCdef()) {
SetupLoopRestorationBorder(row4x4, sb4x4);
@@ -597,7 +620,7 @@ int PostFilter::ApplyFilteringForOneSuperBlockRow(int row4x4, int sb4x4,
if (is_last_row && !DoBorderExtensionInLoop()) {
ExtendBordersForReferenceFrame();
}
- return is_last_row ? height_ : progress_row_;
+ return is_last_row ? frame_header_.height : progress_row_;
}
} // namespace libgav1
diff --git a/src/post_filter/super_res.cc b/src/post_filter/super_res.cc
index 554e537..2133a8a 100644
--- a/src/post_filter/super_res.cc
+++ b/src/post_filter/super_res.cc
@@ -149,16 +149,17 @@ void PostFilter::ApplySuperResThreaded() {
int num_threads = thread_pool_->num_threads() + 1;
// The number of rows that will be processed by each thread in the thread pool
// (other than the current thread).
- int thread_pool_rows = height_ / num_threads;
+ int thread_pool_rows = frame_header_.height / num_threads;
thread_pool_rows = std::max(thread_pool_rows, 1);
// Make rows of Y plane even when there is subsampling for the other planes.
if ((thread_pool_rows & 1) != 0 && subsampling_y_[kPlaneU] != 0) {
++thread_pool_rows;
}
// Adjust the number of threads to what we really need.
- num_threads = Clip3(height_ / thread_pool_rows, 1, num_threads);
+ num_threads = Clip3(frame_header_.height / thread_pool_rows, 1, num_threads);
// For the current thread, we round up to process all the remaining rows.
- int current_thread_rows = height_ - thread_pool_rows * (num_threads - 1);
+ int current_thread_rows =
+ frame_header_.height - thread_pool_rows * (num_threads - 1);
// Make rows of Y plane even when there is subsampling for the other planes.
if ((current_thread_rows & 1) != 0 && subsampling_y_[kPlaneU] != 0) {
++current_thread_rows;
diff --git a/src/post_filter_test.cc b/src/post_filter_test.cc
new file mode 100644
index 0000000..db9d0f4
--- /dev/null
+++ b/src/post_filter_test.cc
@@ -0,0 +1,956 @@
+// Copyright 2021 The libgav1 Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "src/post_filter.h"
+
+#include <algorithm>
+#include <array>
+#include <cstddef>
+#include <cstdint>
+#include <cstdio>
+#include <cstring>
+#include <ostream>
+#include <string>
+#include <vector>
+
+#include "absl/time/clock.h"
+#include "absl/time/time.h"
+#include "gtest/gtest.h"
+#include "src/dsp/cdef.h"
+#include "src/dsp/dsp.h"
+#include "src/dsp/super_res.h"
+#include "src/frame_scratch_buffer.h"
+#include "src/obu_parser.h"
+#include "src/threading_strategy.h"
+#include "src/utils/array_2d.h"
+#include "src/utils/common.h"
+#include "src/utils/constants.h"
+#include "src/utils/memory.h"
+#include "src/utils/types.h"
+#include "src/yuv_buffer.h"
+#include "tests/block_utils.h"
+#include "tests/third_party/libvpx/acm_random.h"
+#include "tests/utils.h"
+
+namespace libgav1 {
+namespace {
+
+constexpr char kCdef[] = "Cdef";
+constexpr char kApplyCdefName[] = "ApplyCdef";
+constexpr int kMaxBlockWidth4x4 = 32;
+constexpr int kMaxBlockHeight4x4 = 32;
+constexpr int kMaxTestFrameSize = 1920 * 1080;
+
+int GetIdFromInputParam(int subsampling_x, int subsampling_y, int height) {
+ int id = subsampling_x * 8 + subsampling_y * 4;
+ if (height == 288) {
+ id += 0;
+ } else if (height == 480) {
+ id += 1;
+ } else if (height == 1080) {
+ id += 2;
+ } else {
+ id += 3;
+ }
+ return id;
+}
+
+const char* GetSuperResDigest8bpp(int id, int plane) {
+ static const char* const kDigestSuperRes[][kMaxPlanes] = {
+ {
+ // all input is 0.
+ "ff5f7a63d3b1f9176e216eb01a0387ad", // kPlaneY.
+ "38b6551d7ac3e86c8af407d5a1aa36dc", // kPlaneU.
+ "38b6551d7ac3e86c8af407d5a1aa36dc", // kPlaneV.
+ },
+ {
+ // all input is 1.
+ "819f21dcce0e779180bbd613a9e3543c", // kPlaneY.
+ "e784bfa8f517d83b014c3dcd45b780a5", // kPlaneU.
+ "e784bfa8f517d83b014c3dcd45b780a5", // kPlaneV.
+ },
+ {
+ // all input is 128.
+ "2d6ea5b39f9168d56c2e2b8846d208ec", // kPlaneY.
+ "8030b6e70f1544efbc37b902d3f88bd3", // kPlaneU.
+ "8030b6e70f1544efbc37b902d3f88bd3", // kPlaneV.
+ },
+ {
+ // all input is 255.
+ "5c0b4bc50e0980dc6ba7c042d3b50a5e", // kPlaneY.
+ "3c566ef847c45be09ddac297123a3bad", // kPlaneU.
+ "3c566ef847c45be09ddac297123a3bad", // kPlaneV.
+ },
+ {
+ // random input.
+ "50514467dd6a5c3a8268eddaa542c41f", // kPlaneY.
+ "3ce720c2b5b44928e1477b11040e5c00", // kPlaneU.
+ "3ce720c2b5b44928e1477b11040e5c00", // kPlaneV.
+ },
+ };
+ return kDigestSuperRes[id][plane];
+}
+
+#if LIBGAV1_MAX_BITDEPTH >= 10
+const char* GetSuperResDigest10bpp(int id, int plane) {
+ // Digests are in Y/U/V order.
+ static const char* const kDigestSuperRes[][kMaxPlanes] = {
+ {
+ // all input is 0.
+ "fccb1f57b252b1a86d335aea929d1d58",
+ "2f244a56091c9705794e92e6bcc38058",
+ "2f244a56091c9705794e92e6bcc38058",
+ },
+ {
+ // all input is 1.
+ "de8556204999d6e4bf74cfdde61a095b",
+ "e7d0f4ce6df81c46de95da7790a67384",
+ "e7d0f4ce6df81c46de95da7790a67384",
+ },
+ {
+ // all input is 512.
+ "d3b6980363eb9b808885537b3485af87",
+ "bcffddb26210da6861e7b31414e58b77",
+ "bcffddb26210da6861e7b31414e58b77",
+ },
+ {
+ // all input is 1023.
+ "ce0762aeee1cdef1db101e4ca39bcbd6",
+ "33aeaa7f5d7c032e3dfda43925c3dcb2",
+ "33aeaa7f5d7c032e3dfda43925c3dcb2",
+ },
+ {
+ // random input.
+ "63c701bceb187ffa535be15ae58f8171",
+ "f570e30e9ea8d2a1e6d99202cd2f8994",
+ "f570e30e9ea8d2a1e6d99202cd2f8994",
+ },
+ };
+ return kDigestSuperRes[id][plane];
+}
+#endif // LIBGAV1_MAX_BITDEPTH >= 10
+
+} // namespace
+
+// This type is used to parameterize the tests so is defined outside the
+// anonymous namespace to avoid the GCC -Wsubobject-linkage warning.
+struct FrameSizeParam {
+ FrameSizeParam(uint32_t width, uint32_t upscaled_width, uint32_t height,
+ int8_t ss_x, int8_t ss_y)
+ : width(width),
+ upscaled_width(upscaled_width),
+ height(height),
+ subsampling_x(ss_x),
+ subsampling_y(ss_y) {}
+ uint32_t width;
+ uint32_t upscaled_width;
+ uint32_t height;
+ int8_t subsampling_x;
+ int8_t subsampling_y;
+};
+
+// Print operators must be defined in the same namespace as the type for the
+// lookup to work correctly.
+static std::ostream& operator<<(std::ostream& os, const FrameSizeParam& param) {
+ return os << param.width << "x" << param.height
+ << ", upscaled_width: " << param.upscaled_width
+ << ", subsampling(x/y): " << static_cast<int>(param.subsampling_x)
+ << "/" << static_cast<int>(param.subsampling_y);
+}
+
+// Note the following test classes access private functions/members of
+// PostFilter. To be declared friends of PostFilter they must not have internal
+// linkage (they must be outside the anonymous namespace).
+template <int bitdepth, typename Pixel>
+class PostFilterTestBase : public testing::TestWithParam<FrameSizeParam> {
+ public:
+ PostFilterTestBase() = default;
+ PostFilterTestBase(const PostFilterTestBase&) = delete;
+ PostFilterTestBase& operator=(const PostFilterTestBase&) = delete;
+ ~PostFilterTestBase() override = default;
+
+ void SetUp() override {
+ // Allocate buffer_ with a border size of kBorderPixels (which is
+ // subsampled for chroma planes). Some tests (for loop restoration) only use
+ // the nearest 2 or 3 pixels (for both luma and chroma planes) in the
+ // border.
+ ASSERT_TRUE(buffer_.Realloc(
+ bitdepth, /*is_monochrome=*/false, frame_size_.upscaled_width,
+ frame_size_.height, frame_size_.subsampling_x,
+ frame_size_.subsampling_y, kBorderPixels, kBorderPixels, kBorderPixels,
+ kBorderPixels, nullptr, nullptr, nullptr));
+
+ ASSERT_TRUE(loop_restoration_border_.Realloc(
+ bitdepth, /*is_monochrome=*/false, frame_size_.upscaled_width,
+ frame_size_.height, frame_size_.subsampling_x,
+ frame_size_.subsampling_y, kBorderPixels, kBorderPixels, kBorderPixels,
+ kBorderPixels, nullptr, nullptr, nullptr));
+
+ for (int plane = kPlaneY; plane < kMaxPlanes; ++plane) {
+ const int8_t subsampling_x =
+ (plane == kPlaneY) ? 0 : frame_size_.subsampling_x;
+ const int8_t subsampling_y =
+ (plane == kPlaneY) ? 0 : frame_size_.subsampling_y;
+ width_[plane] = frame_size_.width >> subsampling_x;
+ upscaled_width_[plane] = frame_size_.upscaled_width >> subsampling_x;
+ stride_[plane] =
+ (frame_size_.upscaled_width + 2 * kBorderPixels) >> subsampling_x;
+ height_[plane] =
+ (frame_size_.height + 2 * kBorderPixels) >> subsampling_y;
+
+ reference_buffer_[plane].reserve(stride_[plane] * height_[plane]);
+ reference_buffer_[plane].resize(stride_[plane] * height_[plane]);
+ std::fill(reference_buffer_[plane].begin(),
+ reference_buffer_[plane].end(), 0);
+ }
+ }
+
+ protected:
+ YuvBuffer buffer_;
+ YuvBuffer cdef_border_;
+ YuvBuffer loop_restoration_border_;
+ uint32_t width_[kMaxPlanes];
+ uint32_t upscaled_width_[kMaxPlanes];
+ uint32_t stride_[kMaxPlanes];
+ uint32_t height_[kMaxPlanes];
+ std::vector<Pixel> reference_buffer_[kMaxPlanes];
+ const FrameSizeParam frame_size_ = GetParam();
+};
+
+template <int bitdepth, typename Pixel>
+class PostFilterHelperFuncTest : public PostFilterTestBase<bitdepth, Pixel> {
+ public:
+ PostFilterHelperFuncTest() = default;
+ PostFilterHelperFuncTest(const PostFilterHelperFuncTest&) = delete;
+ PostFilterHelperFuncTest& operator=(const PostFilterHelperFuncTest&) = delete;
+ ~PostFilterHelperFuncTest() override = default;
+
+ protected:
+ using PostFilterTestBase<bitdepth, Pixel>::buffer_;
+ using PostFilterTestBase<bitdepth, Pixel>::cdef_border_;
+ using PostFilterTestBase<bitdepth, Pixel>::loop_restoration_border_;
+ using PostFilterTestBase<bitdepth, Pixel>::width_;
+ using PostFilterTestBase<bitdepth, Pixel>::upscaled_width_;
+ using PostFilterTestBase<bitdepth, Pixel>::stride_;
+ using PostFilterTestBase<bitdepth, Pixel>::height_;
+ using PostFilterTestBase<bitdepth, Pixel>::reference_buffer_;
+ using PostFilterTestBase<bitdepth, Pixel>::frame_size_;
+
+ void SetUp() override {
+ PostFilterTestBase<bitdepth, Pixel>::SetUp();
+
+ for (int plane = kPlaneY; plane < kMaxPlanes; ++plane) {
+ const int8_t subsampling_x =
+ (plane == kPlaneY) ? 0 : frame_size_.subsampling_x;
+ const int8_t subsampling_y =
+ (plane == kPlaneY) ? 0 : frame_size_.subsampling_y;
+ width_[plane] = frame_size_.width >> subsampling_x;
+ upscaled_width_[plane] = frame_size_.upscaled_width >> subsampling_x;
+ stride_[plane] = (frame_size_.upscaled_width >> subsampling_x) +
+ 2 * kRestorationHorizontalBorder;
+ height_[plane] = (frame_size_.height >> subsampling_y) +
+ 2 * kRestorationVerticalBorder;
+ reference_buffer_[plane].reserve(stride_[plane] * height_[plane]);
+ reference_buffer_[plane].resize(stride_[plane] * height_[plane]);
+ std::fill(reference_buffer_[plane].begin(),
+ reference_buffer_[plane].end(), 0);
+ buffer_border_corner_[plane] =
+ reinterpret_cast<Pixel*>(buffer_.data(plane)) -
+ buffer_.stride(plane) / sizeof(Pixel) * kRestorationVerticalBorder -
+ kRestorationHorizontalBorder;
+ loop_restoration_border_corner_[plane] =
+ reinterpret_cast<Pixel*>(loop_restoration_border_.data(plane)) -
+ loop_restoration_border_.stride(plane) / sizeof(Pixel) *
+ kRestorationVerticalBorder -
+ kRestorationHorizontalBorder;
+ }
+ }
+
+ void TestExtendFrame(bool use_fixed_values, Pixel value);
+ void TestAdjustFrameBufferPointer();
+ void TestPrepareLoopRestorationBlock();
+
+ // Fill the frame buffer with either a fixed value, or random values.
+ // If fill in with random values, make special operations at buffer
+ // boundaries. Make the outer most 3 pixel wide borders the same value
+ // as their immediate inner neighbor. For example:
+ // 4 4 4 4 5 6 6 6 6
+ // 4 4 4 4 5 6 6 6 6
+ // 4 4 4 4 5 6 6 6 6
+ // ---------
+ // 4 4 4 | 4 5 6 | 6 6 6
+ // 1 1 1 | 1 0 1 | 1 1 1
+ // 0 0 0 | 0 1 0 | 0 0 0
+ // 1 1 1 | 1 0 1 | 1 1 1
+ // 0 0 0 | 0 1 0 | 0 0 0
+ // 6 6 6 | 6 5 4 | 4 4 4
+ // -------
+ // 6 6 6 6 5 4 4 4 4
+ // 6 6 6 6 5 4 4 4 4
+ // 6 6 6 6 5 4 4 4 4
+ // Pixels within box is the current block. Outside is extended area from it.
+ void FillBuffer(bool use_fixed_values, Pixel value);
+
+ // Points to the upper left corner of the restoration border in buffer_.
+ Pixel* buffer_border_corner_[kMaxPlanes];
+ // Points to the upper left corner of the restoration border in
+ // loop_restoration_border_.
+ Pixel* loop_restoration_border_corner_[kMaxPlanes];
+};
+
+template <int bitdepth, typename Pixel>
+void PostFilterHelperFuncTest<bitdepth, Pixel>::FillBuffer(
+ bool use_fixed_values, Pixel value) {
+ if (use_fixed_values) {
+ for (int plane = kPlaneY; plane < kMaxPlanes; ++plane) {
+ // Fill buffer with a fixed value.
+ std::fill(reference_buffer_[plane].begin(),
+ reference_buffer_[plane].end(), value);
+ // Fill frame buffer. Note that the border is not filled.
+ auto* row = reinterpret_cast<Pixel*>(buffer_.data(plane));
+ for (int i = 0; i < buffer_.height(plane); ++i) {
+ std::fill(row, row + width_[plane], value);
+ row += buffer_.stride(plane) / sizeof(Pixel);
+ }
+ }
+ } else { // Random value.
+ libvpx_test::ACMRandom rnd(libvpx_test::ACMRandom::DeterministicSeed());
+ const int mask = (1 << bitdepth) - 1;
+ for (int plane = kPlaneY; plane < kMaxPlanes; ++plane) {
+ // Fill buffer with random values.
+ std::vector<Pixel> line_buffer(stride_[plane]);
+ std::fill(line_buffer.begin(), line_buffer.end(), 0);
+ for (int i = kRestorationHorizontalBorder;
+ i < stride_[plane] - kRestorationHorizontalBorder; ++i) {
+ line_buffer[i] = rnd.Rand16() & mask;
+ }
+ // Copy boundary values to extended border.
+ for (int i = 0; i < kRestorationHorizontalBorder; ++i) {
+ line_buffer[i] = line_buffer[kRestorationHorizontalBorder];
+ line_buffer[stride_[plane] - i - 1] =
+ line_buffer[stride_[plane] - 1 - kRestorationHorizontalBorder];
+ }
+ // The first three rows are the same as the line_buffer.
+ for (int i = 0; i < kRestorationVerticalBorder + 1; ++i) {
+ std::copy(line_buffer.begin(), line_buffer.end(),
+ reference_buffer_[plane].begin() + i * stride_[plane]);
+ }
+ for (int i = kRestorationVerticalBorder + 1;
+ i < height_[plane] - kRestorationVerticalBorder; ++i) {
+ for (int j = kRestorationHorizontalBorder;
+ j < stride_[plane] - kRestorationHorizontalBorder; ++j) {
+ line_buffer[j] = rnd.Rand16() & mask;
+ }
+ for (int j = 0; j < kRestorationHorizontalBorder; ++j) {
+ line_buffer[j] = line_buffer[kRestorationHorizontalBorder];
+ line_buffer[stride_[plane] - j - 1] =
+ line_buffer[stride_[plane] - 1 - kRestorationHorizontalBorder];
+ }
+ std::copy(line_buffer.begin(), line_buffer.end(),
+ reference_buffer_[plane].begin() + i * stride_[plane]);
+ }
+ // The extended border are the same as the line_buffer.
+ for (int i = 0; i < kRestorationVerticalBorder; ++i) {
+ std::copy(line_buffer.begin(), line_buffer.end(),
+ reference_buffer_[plane].begin() +
+ (height_[plane] - kRestorationVerticalBorder + i) *
+ stride_[plane]);
+ }
+
+ // Fill frame buffer. Note that the border is not filled.
+ for (int i = 0; i < buffer_.height(plane); ++i) {
+ memcpy(buffer_.data(plane) + i * buffer_.stride(plane),
+ reference_buffer_[plane].data() + kRestorationHorizontalBorder +
+ (i + kRestorationVerticalBorder) * stride_[plane],
+ sizeof(Pixel) * width_[plane]);
+ }
+ }
+ }
+}
+
+template <int bitdepth, typename Pixel>
+void PostFilterHelperFuncTest<bitdepth, Pixel>::TestExtendFrame(
+ bool use_fixed_values, Pixel value) {
+ ObuFrameHeader frame_header = {};
+ frame_header.upscaled_width = frame_size_.upscaled_width;
+ frame_header.width = frame_size_.width;
+ frame_header.height = frame_size_.height;
+ ObuSequenceHeader sequence_header;
+ sequence_header.color_config.bitdepth = bitdepth;
+ sequence_header.color_config.is_monochrome = false;
+ sequence_header.color_config.subsampling_x = frame_size_.subsampling_x;
+ sequence_header.color_config.subsampling_y = frame_size_.subsampling_y;
+
+ const dsp::Dsp* const dsp = dsp::GetDspTable(bitdepth);
+ ASSERT_NE(dsp, nullptr);
+ FrameScratchBuffer frame_scratch_buffer;
+
+ PostFilter post_filter(frame_header, sequence_header, &frame_scratch_buffer,
+ &buffer_, dsp,
+ /*do_post_filter_mask=*/0x00);
+ FillBuffer(use_fixed_values, value);
+ for (int plane = kPlaneY; plane < kMaxPlanes; ++plane) {
+ const int plane_width =
+ plane == kPlaneY ? frame_header.upscaled_width
+ : frame_header.upscaled_width >>
+ sequence_header.color_config.subsampling_x;
+ const int plane_height =
+ plane == kPlaneY
+ ? frame_header.height
+ : frame_header.height >> sequence_header.color_config.subsampling_y;
+ PostFilter::ExtendFrame<Pixel>(
+ reinterpret_cast<Pixel*>(buffer_.data(plane)), plane_width,
+ plane_height, buffer_.stride(plane) / sizeof(Pixel),
+ kRestorationHorizontalBorder, kRestorationHorizontalBorder,
+ kRestorationVerticalBorder, kRestorationVerticalBorder);
+ const bool success = test_utils::CompareBlocks<Pixel>(
+ buffer_border_corner_[plane], reference_buffer_[plane].data(),
+ stride_[plane], height_[plane], buffer_.stride(plane) / sizeof(Pixel),
+ stride_[plane], /*check_padding=*/false, /*print_diff=*/false);
+ ASSERT_TRUE(success) << "Failure of extend frame at plane: " << plane;
+ }
+}
+
+template <int bitdepth, typename Pixel>
+class PostFilterSuperResTest : public PostFilterTestBase<bitdepth, Pixel> {
+ public:
+ PostFilterSuperResTest() {
+ test_utils::ResetDspTable(bitdepth);
+ dsp::SuperResInit_C();
+ dsp::SuperResInit_SSE4_1();
+ dsp::SuperResInit_NEON();
+ }
+ PostFilterSuperResTest(const PostFilterSuperResTest&) = delete;
+ PostFilterSuperResTest& operator=(const PostFilterSuperResTest&) = delete;
+ ~PostFilterSuperResTest() override = default;
+
+ protected:
+ using PostFilterTestBase<bitdepth, Pixel>::buffer_;
+ using PostFilterTestBase<bitdepth, Pixel>::width_;
+ using PostFilterTestBase<bitdepth, Pixel>::upscaled_width_;
+ using PostFilterTestBase<bitdepth, Pixel>::stride_;
+ using PostFilterTestBase<bitdepth, Pixel>::height_;
+ using PostFilterTestBase<bitdepth, Pixel>::reference_buffer_;
+ using PostFilterTestBase<bitdepth, Pixel>::frame_size_;
+
+ void TestApplySuperRes(bool use_fixed_values, Pixel value, int id,
+ bool multi_threaded);
+};
+
+// This class must be in namespace libgav1 to access private member function
+// of class PostFilter in src/post_filter.h.
+template <int bitdepth, typename Pixel>
+void PostFilterSuperResTest<bitdepth, Pixel>::TestApplySuperRes(
+ bool use_fixed_values, Pixel value, int id, bool multi_threaded) {
+ ObuFrameHeader frame_header = {};
+ frame_header.width = frame_size_.width;
+ frame_header.upscaled_width = frame_size_.upscaled_width;
+ frame_header.height = frame_size_.height;
+ frame_header.rows4x4 = DivideBy4(frame_size_.height);
+ frame_header.columns4x4 = DivideBy4(frame_size_.width);
+ frame_header.tile_info.tile_count = 1;
+ ObuSequenceHeader sequence_header;
+ sequence_header.color_config.bitdepth = bitdepth;
+ sequence_header.color_config.is_monochrome = false;
+ sequence_header.color_config.subsampling_x = frame_size_.subsampling_x;
+ sequence_header.color_config.subsampling_y = frame_size_.subsampling_y;
+
+ // Apply SuperRes.
+ Array2D<int16_t> cdef_index;
+ Array2D<TransformSize> inter_transform_sizes;
+ const dsp::Dsp* const dsp = dsp::GetDspTable(bitdepth);
+ ASSERT_NE(dsp, nullptr);
+ constexpr int kNumThreads = 4;
+ FrameScratchBuffer frame_scratch_buffer;
+ if (multi_threaded) {
+ ASSERT_TRUE(frame_scratch_buffer.threading_strategy.Reset(frame_header,
+ kNumThreads));
+ }
+ const int pixel_size = sequence_header.color_config.bitdepth == 8
+ ? sizeof(uint8_t)
+ : sizeof(uint16_t);
+ ASSERT_TRUE(frame_scratch_buffer.superres_coefficients[kPlaneTypeY].Resize(
+ kSuperResFilterTaps * Align(frame_header.upscaled_width, 16) *
+ pixel_size));
+ if (!sequence_header.color_config.is_monochrome &&
+ sequence_header.color_config.subsampling_x != 0) {
+ ASSERT_TRUE(frame_scratch_buffer.superres_coefficients[kPlaneTypeUV].Resize(
+ kSuperResFilterTaps *
+ Align(SubsampledValue(frame_header.upscaled_width, 1), 16) *
+ pixel_size));
+ }
+ ASSERT_TRUE(frame_scratch_buffer.superres_line_buffer.Realloc(
+ sequence_header.color_config.bitdepth,
+ sequence_header.color_config.is_monochrome,
+ MultiplyBy4(frame_header.columns4x4), (multi_threaded ? kNumThreads : 1),
+ sequence_header.color_config.subsampling_x,
+ /*subsampling_y=*/0, 2 * kSuperResHorizontalBorder,
+ 2 * (kSuperResHorizontalBorder + kSuperResHorizontalPadding), 0, 0,
+ nullptr, nullptr, nullptr));
+ PostFilter post_filter(frame_header, sequence_header, &frame_scratch_buffer,
+ &buffer_, dsp,
+ /*do_post_filter_mask=*/0x04);
+
+ const int num_planes = sequence_header.color_config.is_monochrome
+ ? kMaxPlanesMonochrome
+ : kMaxPlanes;
+ int width[kMaxPlanes];
+ int upscaled_width[kMaxPlanes];
+ int height[kMaxPlanes];
+
+ for (int plane = kPlaneY; plane < num_planes; ++plane) {
+ const int8_t subsampling_x =
+ (plane == kPlaneY) ? 0 : frame_size_.subsampling_x;
+ const int8_t subsampling_y =
+ (plane == kPlaneY) ? 0 : frame_size_.subsampling_y;
+ width[plane] = frame_size_.width >> subsampling_x;
+ upscaled_width[plane] = frame_size_.upscaled_width >> subsampling_x;
+ height[plane] = frame_size_.height >> subsampling_y;
+ if (use_fixed_values) {
+ auto* src = reinterpret_cast<Pixel*>(post_filter.cdef_buffer_[plane]);
+ for (int y = 0; y < height[plane]; ++y) {
+ for (int x = 0; x < width[plane]; ++x) {
+ src[x] = value;
+ }
+ src += buffer_.stride(plane) / sizeof(Pixel);
+ }
+ } else { // Random input.
+ const int mask = (1 << bitdepth) - 1;
+ libvpx_test::ACMRandom rnd(libvpx_test::ACMRandom::DeterministicSeed());
+ auto* src = reinterpret_cast<Pixel*>(post_filter.cdef_buffer_[plane]);
+ for (int y = 0; y < height[plane]; ++y) {
+ for (int x = 0; x < width[plane]; ++x) {
+ src[x] = rnd.Rand16() & mask;
+ }
+ src += buffer_.stride(plane) / sizeof(Pixel);
+ }
+ }
+ }
+
+ if (multi_threaded) {
+ post_filter.ApplySuperResThreaded();
+ } else {
+ std::array<uint8_t*, kMaxPlanes> buffers = {
+ post_filter.cdef_buffer_[kPlaneY], post_filter.cdef_buffer_[kPlaneU],
+ post_filter.cdef_buffer_[kPlaneV]};
+ std::array<uint8_t*, kMaxPlanes> dst = {
+ post_filter.GetSuperResBuffer(static_cast<Plane>(kPlaneY), 0, 0),
+ post_filter.GetSuperResBuffer(static_cast<Plane>(kPlaneU), 0, 0),
+ post_filter.GetSuperResBuffer(static_cast<Plane>(kPlaneV), 0, 0)};
+ std::array<int, kMaxPlanes> rows = {
+ frame_header.rows4x4 * 4,
+ (frame_header.rows4x4 * 4) >> frame_size_.subsampling_y,
+ (frame_header.rows4x4 * 4) >> frame_size_.subsampling_y};
+ post_filter.ApplySuperRes(buffers, rows, /*line_buffer_row=*/-1, dst);
+ }
+
+ // Check md5.
+ std::vector<Pixel> output;
+ for (int plane = kPlaneY; plane < num_planes; ++plane) {
+ output.reserve(upscaled_width[plane] * height[plane]);
+ output.resize(upscaled_width[plane] * height[plane]);
+ auto* dst = reinterpret_cast<Pixel*>(
+ post_filter.GetSuperResBuffer(static_cast<Plane>(plane), 0, 0));
+ for (int y = 0; y < height[plane]; ++y) {
+ for (int x = 0; x < upscaled_width[plane]; ++x) {
+ output[y * upscaled_width[plane] + x] = dst[x];
+ }
+ dst += buffer_.stride(plane) / sizeof(Pixel);
+ }
+ const std::string digest = test_utils::GetMd5Sum(
+ output.data(), upscaled_width[plane] * height[plane] * sizeof(Pixel));
+ printf("MD5: %s\n", digest.c_str());
+ const char* expected_digest = nullptr;
+ switch (bitdepth) {
+ case 8:
+ expected_digest = GetSuperResDigest8bpp(id, plane);
+ break;
+#if LIBGAV1_MAX_BITDEPTH >= 10
+ case 10:
+ expected_digest = GetSuperResDigest10bpp(id, plane);
+ break;
+#endif
+ }
+ ASSERT_NE(expected_digest, nullptr);
+ EXPECT_STREQ(digest.c_str(), expected_digest);
+ }
+}
+
+using PostFilterSuperResTest8bpp = PostFilterSuperResTest<8, uint8_t>;
+
+const FrameSizeParam kTestParamSuperRes[] = {
+ FrameSizeParam(176, 352, 288, 1, 1)};
+
+TEST_P(PostFilterSuperResTest8bpp, ApplySuperRes) {
+ TestApplySuperRes(true, 0, 0, false);
+ TestApplySuperRes(true, 1, 1, false);
+ TestApplySuperRes(true, 128, 2, false);
+ TestApplySuperRes(true, 255, 3, false);
+ TestApplySuperRes(false, 0, 4, false);
+}
+
+TEST_P(PostFilterSuperResTest8bpp, ApplySuperResThreaded) {
+ TestApplySuperRes(true, 0, 0, true);
+ TestApplySuperRes(true, 1, 1, true);
+ TestApplySuperRes(true, 128, 2, true);
+ TestApplySuperRes(true, 255, 3, true);
+ TestApplySuperRes(false, 0, 4, true);
+}
+
+INSTANTIATE_TEST_SUITE_P(PostFilterSuperResTestInstance,
+ PostFilterSuperResTest8bpp,
+ testing::ValuesIn(kTestParamSuperRes));
+
+using PostFilterHelperFuncTest8bpp = PostFilterHelperFuncTest<8, uint8_t>;
+
+const FrameSizeParam kTestParamExtendFrame[] = {
+ FrameSizeParam(16, 16, 16, 1, 1),
+ FrameSizeParam(64, 64, 64, 1, 1),
+ FrameSizeParam(128, 128, 64, 1, 1),
+ FrameSizeParam(64, 64, 128, 1, 1),
+ FrameSizeParam(352, 352, 288, 1, 1),
+ FrameSizeParam(720, 720, 480, 1, 1),
+ FrameSizeParam(1080, 1080, 720, 1, 1),
+ FrameSizeParam(16, 16, 16, 0, 0),
+ FrameSizeParam(64, 64, 64, 0, 0),
+ FrameSizeParam(128, 128, 64, 0, 0),
+ FrameSizeParam(64, 64, 128, 0, 0),
+ FrameSizeParam(352, 352, 288, 0, 0),
+ FrameSizeParam(720, 720, 480, 0, 0),
+ FrameSizeParam(1080, 1080, 720, 0, 0)};
+
+TEST_P(PostFilterHelperFuncTest8bpp, ExtendFrame) {
+ TestExtendFrame(true, 0);
+ TestExtendFrame(true, 1);
+ TestExtendFrame(true, 128);
+ TestExtendFrame(true, 255);
+ TestExtendFrame(false, 0);
+}
+
+INSTANTIATE_TEST_SUITE_P(PostFilterHelperFuncTestInstance,
+ PostFilterHelperFuncTest8bpp,
+ testing::ValuesIn(kTestParamExtendFrame));
+
+#if LIBGAV1_MAX_BITDEPTH >= 10
+using PostFilterSuperResTest10bpp = PostFilterSuperResTest<10, uint16_t>;
+
+TEST_P(PostFilterSuperResTest10bpp, ApplySuperRes) {
+ TestApplySuperRes(true, 0, 0, false);
+ TestApplySuperRes(true, 1, 1, false);
+ TestApplySuperRes(true, 1 << 9, 2, false);
+ TestApplySuperRes(true, (1 << 10) - 1, 3, false);
+ TestApplySuperRes(false, 0, 4, false);
+}
+
+TEST_P(PostFilterSuperResTest10bpp, ApplySuperResThreaded) {
+ TestApplySuperRes(true, 0, 0, true);
+ TestApplySuperRes(true, 1, 1, true);
+ TestApplySuperRes(true, 1 << 9, 2, true);
+ TestApplySuperRes(true, (1 << 10) - 1, 3, true);
+ TestApplySuperRes(false, 0, 4, true);
+}
+
+INSTANTIATE_TEST_SUITE_P(PostFilterSuperResTestInstance,
+ PostFilterSuperResTest10bpp,
+ testing::ValuesIn(kTestParamSuperRes));
+
+using PostFilterHelperFuncTest10bpp = PostFilterHelperFuncTest<10, uint16_t>;
+
+TEST_P(PostFilterHelperFuncTest10bpp, ExtendFrame) {
+ TestExtendFrame(true, 0);
+ TestExtendFrame(true, 1);
+ TestExtendFrame(true, 255);
+ TestExtendFrame(true, (1 << 10) - 1);
+ TestExtendFrame(false, 0);
+}
+
+INSTANTIATE_TEST_SUITE_P(PostFilterHelperFuncTestInstance,
+ PostFilterHelperFuncTest10bpp,
+ testing::ValuesIn(kTestParamExtendFrame));
+#endif // LIBGAV1_MAX_BITDEPTH >= 10
+
+namespace {
+
+const char* GetDigestApplyCdef8bpp(int id) {
+ static const char* const kDigest[] = {
+ "9593af24f9c6faecce53437f6e128edf", "ecb633cc2ecd6e7e0cf39d4439f4a6ea",
+ "9ec4cb4124f0a686a7bda72b447f5b8e", "7ebd859a23162bc864a69dbea60bc687",
+ "de7a15fc00664692a794aa68cf695980", "cf3fc8fe041f68d31ab4e34ad3643541",
+ "94c116b191b0268cf7ab4a0e6996e1ec", "1ad60c943a5a914aba7bc26706620a05",
+ "ce33c6f80e3608c4d18c49be2e393c20", "e140586ffc663798b74b8f6fb5b44736",
+ "b7379bba8bcb97f09a74655f4e0eee91", "02ce174061c98babd3987461b3984e47",
+ "64655dd1dfba8317e27d2fdcb211b7b4", "eeb6a61c70c5ee75a4c31dc5099b4dfb",
+ "ee944b31148fa2e30938084f7c046464", "db7b63497750fa4c51cf45c56a2da01c",
+ };
+ return kDigest[id];
+}
+
+#if LIBGAV1_MAX_BITDEPTH >= 10
+const char* GetDigestApplyCdef10bpp(int id) {
+ static const char* const kDigest[] = {
+ "53f8d68ac7f3aea65151b2066f8501c9", "021e70d5406fa182dd9713380eb66d1d",
+ "bab1c84e7f06b87d81617d2d0a194b89", "58e302ff0522f64901909fb97535b270",
+ "5ff95a6a798eadc7207793c03d898ce4", "1483d28cc0f1bfffedd1128966719aa0",
+ "6af5a36890b465ae962c2878af874f70", "bd1ed4a2ff09d323ab98190d1805a010",
+ "5ff95a6a798eadc7207793c03d898ce4", "1483d28cc0f1bfffedd1128966719aa0",
+ "6af5a36890b465ae962c2878af874f70", "bd1ed4a2ff09d323ab98190d1805a010",
+ "6f0299645cd6f0655fd26044cd43a37c", "56d7febf5bbebdc82e8f157ab926a0bb",
+ "f54654f11006453f496be5883216a3bb", "9abc6e3230792ba78bcc65504a62075e",
+ };
+ return kDigest[id];
+}
+#endif // LIBGAV1_MAX_BITDEPTH >= 10
+
+} // namespace
+
+template <int bitdepth, typename Pixel>
+class PostFilterApplyCdefTest : public testing::TestWithParam<FrameSizeParam>,
+ public test_utils::MaxAlignedAllocable {
+ public:
+ PostFilterApplyCdefTest() = default;
+ PostFilterApplyCdefTest(const PostFilterApplyCdefTest&) = delete;
+ PostFilterApplyCdefTest& operator=(const PostFilterApplyCdefTest&) = delete;
+ ~PostFilterApplyCdefTest() override = default;
+
+ protected:
+ void SetUp() override {
+ test_utils::ResetDspTable(bitdepth);
+ dsp::CdefInit_C();
+ dsp::CdefInit_SSE4_1();
+ dsp::CdefInit_NEON();
+
+ dsp_ = dsp::GetDspTable(bitdepth);
+ ASSERT_NE(dsp_, nullptr);
+ }
+
+ // Sets sequence_header_, frame_header_, cdef_index_ and cdef_skip_.
+ // Allocates yuv_buffer_ but does not set it.
+ void SetInput(libvpx_test::ACMRandom* rnd);
+ // Sets yuv_buffer_.
+ void SetInputBuffer(libvpx_test::ACMRandom* rnd, PostFilter* post_filter);
+ void CopyFilterOutputToDestBuffer();
+ void TestMultiThread(int num_threads);
+
+ ObuSequenceHeader sequence_header_;
+ ObuFrameHeader frame_header_ = {};
+ FrameScratchBuffer frame_scratch_buffer_;
+ YuvBuffer yuv_buffer_;
+ const dsp::Dsp* dsp_;
+ FrameSizeParam param_ = GetParam();
+ Pixel dest_[kMaxTestFrameSize * kMaxPlanes];
+ const size_t y_size_ = param_.width * param_.height;
+ const size_t uv_size_ = y_size_ >>
+ (param_.subsampling_x + param_.subsampling_y);
+ const size_t size_ = y_size_ + uv_size_ * 2;
+};
+
+template <int bitdepth, typename Pixel>
+void PostFilterApplyCdefTest<bitdepth, Pixel>::SetInput(
+ libvpx_test::ACMRandom* rnd) {
+ sequence_header_.color_config.bitdepth = bitdepth;
+ sequence_header_.color_config.subsampling_x = param_.subsampling_x;
+ sequence_header_.color_config.subsampling_y = param_.subsampling_y;
+ sequence_header_.color_config.is_monochrome = false;
+ sequence_header_.use_128x128_superblock =
+ static_cast<bool>(rnd->Rand16() & 1);
+
+ ASSERT_TRUE(param_.width <= param_.upscaled_width);
+ ASSERT_TRUE(param_.upscaled_width * param_.height <= kMaxTestFrameSize)
+ << "Please adjust the max frame size.";
+
+ frame_header_.width = param_.width;
+ frame_header_.upscaled_width = param_.upscaled_width;
+ frame_header_.height = param_.height;
+ frame_header_.columns4x4 = DivideBy4(Align(frame_header_.width, 8));
+ frame_header_.rows4x4 = DivideBy4(Align(frame_header_.height, 8));
+ frame_header_.tile_info.tile_count = 1;
+ frame_header_.refresh_frame_flags = 0;
+ Cdef* const cdef = &frame_header_.cdef;
+ const int coeff_shift = bitdepth - 8;
+ do {
+ cdef->damping = (rnd->Rand16() & 3) + 3 + coeff_shift;
+ cdef->bits = rnd->Rand16() & 3;
+ } while (cdef->bits <= 0);
+ for (int i = 0; i < (1 << cdef->bits); ++i) {
+ cdef->y_primary_strength[i] = (rnd->Rand16() & 15) << coeff_shift;
+ cdef->y_secondary_strength[i] = rnd->Rand16() & 3;
+ if (cdef->y_secondary_strength[i] == 3) {
+ ++cdef->y_secondary_strength[i];
+ }
+ cdef->y_secondary_strength[i] <<= coeff_shift;
+ cdef->uv_primary_strength[i] = (rnd->Rand16() & 15) << coeff_shift;
+ cdef->uv_secondary_strength[i] = rnd->Rand16() & 3;
+ if (cdef->uv_secondary_strength[i] == 3) {
+ ++cdef->uv_secondary_strength[i];
+ }
+ cdef->uv_secondary_strength[i] <<= coeff_shift;
+ }
+
+ const int rows64x64 = DivideBy16(frame_header_.rows4x4 + kMaxBlockHeight4x4);
+ const int columns64x64 =
+ DivideBy16(frame_header_.columns4x4 + kMaxBlockWidth4x4);
+ ASSERT_TRUE(frame_scratch_buffer_.cdef_index.Reset(rows64x64, columns64x64));
+ for (int row = 0; row < rows64x64; ++row) {
+ for (int column = 0; column < columns64x64; ++column) {
+ frame_scratch_buffer_.cdef_index[row][column] =
+ rnd->Rand16() & ((1 << cdef->bits) - 1);
+ }
+ }
+
+ const int skip_rows = DivideBy2(frame_header_.rows4x4 + kMaxBlockHeight4x4);
+ const int skip_columns =
+ DivideBy16(frame_header_.columns4x4 + kMaxBlockWidth4x4);
+ ASSERT_TRUE(frame_scratch_buffer_.cdef_skip.Reset(skip_rows, skip_columns));
+ for (int row = 0; row < skip_rows; ++row) {
+ memset(frame_scratch_buffer_.cdef_skip[row], 0xFF, skip_columns);
+ }
+
+ ASSERT_TRUE(yuv_buffer_.Realloc(
+ sequence_header_.color_config.bitdepth,
+ sequence_header_.color_config.is_monochrome, frame_header_.upscaled_width,
+ frame_header_.height, sequence_header_.color_config.subsampling_x,
+ sequence_header_.color_config.subsampling_y, kBorderPixels, kBorderPixels,
+ kBorderPixels, kBorderPixels, nullptr, nullptr, nullptr))
+ << "Failed to allocate source buffer.";
+}
+
+template <int bitdepth, typename Pixel>
+void PostFilterApplyCdefTest<bitdepth, Pixel>::SetInputBuffer(
+ libvpx_test::ACMRandom* rnd, PostFilter* post_filter) {
+ for (int plane = kPlaneY; plane < kMaxPlanes; ++plane) {
+ const int subsampling_x = (plane == 0) ? 0 : param_.subsampling_x;
+ const int subsampling_y = (plane == 0) ? 0 : param_.subsampling_y;
+ const int plane_width =
+ MultiplyBy4(frame_header_.columns4x4) >> subsampling_x;
+ const int plane_height =
+ MultiplyBy4(frame_header_.rows4x4) >> subsampling_y;
+ auto* src =
+ reinterpret_cast<Pixel*>(post_filter->GetUnfilteredBuffer(plane));
+ const int src_stride = yuv_buffer_.stride(plane) / sizeof(src[0]);
+ for (int y = 0; y < plane_height; ++y) {
+ for (int x = 0; x < plane_width; ++x) {
+ src[x] = rnd->Rand16() & ((1 << bitdepth) - 1);
+ }
+ src += src_stride;
+ }
+ }
+}
+
+template <int bitdepth, typename Pixel>
+void PostFilterApplyCdefTest<bitdepth, Pixel>::CopyFilterOutputToDestBuffer() {
+ for (int plane = kPlaneY; plane < kMaxPlanes; ++plane) {
+ const int subsampling_x = (plane == 0) ? 0 : param_.subsampling_x;
+ const int subsampling_y = (plane == 0) ? 0 : param_.subsampling_y;
+ const int plane_width = SubsampledValue(param_.width, subsampling_x);
+ const int plane_height = SubsampledValue(param_.height, subsampling_y);
+ auto* src = reinterpret_cast<Pixel*>(yuv_buffer_.data(plane));
+ const int src_stride = yuv_buffer_.stride(plane) / sizeof(src[0]);
+ Pixel* dest_plane =
+ dest_ +
+ ((plane == 0) ? 0 : ((plane == 1) ? y_size_ : y_size_ + uv_size_));
+ for (int y = 0; y < plane_height; ++y) {
+ for (int x = 0; x < plane_width; ++x) {
+ dest_plane[y * plane_width + x] = src[x];
+ }
+ src += src_stride;
+ }
+ }
+}
+
+template <int bitdepth, typename Pixel>
+void PostFilterApplyCdefTest<bitdepth, Pixel>::TestMultiThread(
+ int num_threads) {
+ libvpx_test::ACMRandom rnd(libvpx_test::ACMRandom::DeterministicSeed());
+ SetInput(&rnd);
+
+ ASSERT_TRUE(frame_scratch_buffer_.threading_strategy.Reset(frame_header_,
+ num_threads));
+ if (num_threads > 1) {
+ const int num_units =
+ MultiplyBy4(RightShiftWithCeiling(frame_header_.rows4x4, 4));
+ ASSERT_TRUE(frame_scratch_buffer_.cdef_border.Realloc(
+ bitdepth, /*is_monochrome=*/false,
+ MultiplyBy4(frame_header_.columns4x4), num_units,
+ sequence_header_.color_config.subsampling_x,
+ /*subsampling_y=*/0, kBorderPixels, kBorderPixels, kBorderPixels,
+ kBorderPixels, nullptr, nullptr, nullptr));
+ }
+
+ PostFilter post_filter(frame_header_, sequence_header_,
+ &frame_scratch_buffer_, &yuv_buffer_, dsp_,
+ /*do_post_filter_mask=*/0x02);
+ SetInputBuffer(&rnd, &post_filter);
+
+ const int id = GetIdFromInputParam(param_.subsampling_x, param_.subsampling_y,
+ param_.height);
+ absl::Duration elapsed_time;
+ const absl::Time start = absl::Now();
+
+ // Only ApplyCdef() and frame copy inside ApplyFilteringThreaded() are
+ // triggered, since we set the filter mask to 0x02.
+ post_filter.ApplyFilteringThreaded();
+ elapsed_time += absl::Now() - start;
+
+ CopyFilterOutputToDestBuffer();
+ if (bitdepth == 8) {
+ test_utils::CheckMd5Digest(kCdef, kApplyCdefName,
+ GetDigestApplyCdef8bpp(id), dest_, size_,
+ elapsed_time);
+#if LIBGAV1_MAX_BITDEPTH >= 10
+ } else {
+ test_utils::CheckMd5Digest(kCdef, kApplyCdefName,
+ GetDigestApplyCdef10bpp(id), dest_, size_,
+ elapsed_time);
+#endif // LIBGAV1_MAX_BITDEPTH >= 10
+ }
+}
+
+const FrameSizeParam kTestParamApplyCdef[] = {
+ FrameSizeParam(352, 352, 288, 0, 0), FrameSizeParam(720, 720, 480, 0, 0),
+ FrameSizeParam(1920, 1920, 1080, 0, 0), FrameSizeParam(251, 251, 187, 0, 0),
+ FrameSizeParam(352, 352, 288, 0, 1), FrameSizeParam(720, 720, 480, 0, 1),
+ FrameSizeParam(1920, 1920, 1080, 0, 1), FrameSizeParam(251, 251, 187, 0, 1),
+ FrameSizeParam(352, 352, 288, 1, 0), FrameSizeParam(720, 720, 480, 1, 0),
+ FrameSizeParam(1920, 1920, 1080, 1, 0), FrameSizeParam(251, 251, 187, 1, 0),
+ FrameSizeParam(352, 352, 288, 1, 1), FrameSizeParam(720, 720, 480, 1, 1),
+ FrameSizeParam(1920, 1920, 1080, 1, 1), FrameSizeParam(251, 251, 187, 1, 1),
+};
+
+using PostFilterApplyCdefTest8bpp = PostFilterApplyCdefTest<8, uint8_t>;
+
+TEST_P(PostFilterApplyCdefTest8bpp, ApplyCdef) {
+ TestMultiThread(2);
+ TestMultiThread(4);
+ TestMultiThread(8);
+}
+
+INSTANTIATE_TEST_SUITE_P(PostFilterApplyCdefTestInstance,
+ PostFilterApplyCdefTest8bpp,
+ testing::ValuesIn(kTestParamApplyCdef));
+
+#if LIBGAV1_MAX_BITDEPTH >= 10
+using PostFilterApplyCdefTest10bpp = PostFilterApplyCdefTest<10, uint16_t>;
+
+TEST_P(PostFilterApplyCdefTest10bpp, ApplyCdef) {
+ TestMultiThread(2);
+ TestMultiThread(4);
+ TestMultiThread(8);
+}
+
+INSTANTIATE_TEST_SUITE_P(PostFilterApplyCdefTestInstance,
+ PostFilterApplyCdefTest10bpp,
+ testing::ValuesIn(kTestParamApplyCdef));
+#endif // LIBGAV1_MAX_BITDEPTH >= 10
+
+} // namespace libgav1
diff --git a/src/prediction_mask.h b/src/prediction_mask.h
index 0134a0d..827a0fa 100644
--- a/src/prediction_mask.h
+++ b/src/prediction_mask.h
@@ -17,9 +17,6 @@
#ifndef LIBGAV1_SRC_PREDICTION_MASK_H_
#define LIBGAV1_SRC_PREDICTION_MASK_H_
-#include <cstddef>
-#include <cstdint>
-
#include "src/utils/bit_mask_set.h"
#include "src/utils/types.h"
diff --git a/src/prediction_mask_test.cc b/src/prediction_mask_test.cc
new file mode 100644
index 0000000..d2a12c2
--- /dev/null
+++ b/src/prediction_mask_test.cc
@@ -0,0 +1,214 @@
+// Copyright 2021 The libgav1 Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "src/prediction_mask.h"
+
+#include <array>
+#include <cstdint>
+#include <string>
+
+#include "gtest/gtest.h"
+#include "src/utils/array_2d.h"
+#include "src/utils/constants.h"
+#include "src/utils/types.h"
+#include "tests/utils.h"
+
+namespace libgav1 {
+namespace {
+
+constexpr int kWedgeDirectionTypes = 16;
+
+enum kWedgeDirection : uint8_t {
+ kWedgeHorizontal,
+ kWedgeVertical,
+ kWedgeOblique27,
+ kWedgeOblique63,
+ kWedgeOblique117,
+ kWedgeOblique153,
+};
+
+const char* const kExpectedWedgeMask[] = {
+ "cea09e4bf4227efef749672283f7369b", "2763ab02b70447b2f9d5ed4796ca33bc",
+ "8d83c4315eadda824893c3e79aa866d9", "a733fd7f143c1c6141983c5f816bb3d8",
+ "9a205bfca776ccde57a8031350f2f467", "d78b964719f52f302f4454df14e45e35",
+ "bdc3972cfeb44d0acebb49b2fcb76072", "c8872571833c165be99ada1c552bfd9b",
+ "26d2541e2f8efe48e2f4a1819b3a6896", "783871179337e78e5ef41a66c0c6937c",
+ "253d21c612d732fceedcf610c4ff099c", "c868d177dc2a2378ef362fa482f601e8",
+ "782d75e143d87cc1aeb5d040c48d3c2d", "718cbecf4db45c7d596eba07bd956601",
+ "3b60b9336c2cf699172eb4a3fef18787", "afe72d4bd206f1cb27e3736c3b0068cf",
+ "7b830a1a94bad23a1df1b8d9668708d0", "d3f421ff2b81686fd421f7c02622aac1",
+ "d9ac14dff8e3c415e85e99c3ce0fbd5b", "da493727a08773a950a0375881d912f2",
+ "2f4251fd1b4636a034e22611ea1223b6", "84f84f01900b8a894b19e353605846b0",
+ "bbf5dae73300b6a6789710ffc4fc59fd", "c711941a0889fbed9b926c1eb39a5616",
+ "2fcf270613df57a57e647f37bf9a19ec", "79ed9c2f828b765edf65027f1f0847f5",
+ "e8d3e821f4e7f2f39659071da8f2cc71", "823bb09e2c28f2a81bf8a2d030e8bab6",
+ "d598fb4f70ea6b705674497994aecbfa", "3737c39f058c57650be7e720dcd87aa1",
+ "eb1d9b1d30485d9870ca9380cbdfad43", "a23d3c24f291080fcd62c0a2a2aea181",
+ "968543d91aeae3b1814a5074b6aa9e8c", "6e2444d71a4f3ddfe643e72f9c3cf6c3",
+ "3bf78413aa04830849a3d9c7bfa41a84", "ece8306f9859bcfb042b0bda8f6750b6",
+ "608b29fcedb7fa054a599945b497c78c", "d69d622016872469dfbde4e589bfd679",
+ "38a2307174c27b634323c59da3339dc6", "5e44f0fad99dbe802ffd69c7dc239d56",
+ "a0eeaf3755a724fdf6469f43cb060d75", "7bcf8035c5057619ea8660c32802d6a1",
+ "6054e1c35fe13b9269ab01d1bc0d8848", "e0ec8f7c66ebabff60f5accd3d707788",
+ "0b9fd6e1053a706af5d0cd59dc7e1992", "709648ffab1992d8522b04ca23de577a",
+ "c576e378ed264d6cb00adfd3b4e428f1", "f6f3ae5348e7141775a8a6bc2be22f80",
+ "9289722adb38fa3b2fb775648f0cc3a8", "b7e02fa00b56aeea8e6098a92eac72e1",
+ "db2f6d66ffca8352271f1e3f0116838a", "5858c567b0719daaa364fb0e6d8aa5dc",
+ "db2d300f875d2465adabf4c1322cea6f", "05c66b54c4d32e5b64a7e77e751f0c51",
+ "f2c2a5a3ce510d21ef2e62eedba85afb", "3959d2191a11e800289e21fd283b2837",
+ "cc86023d079a4c5daadce8ad0cdd176f", "e853f3c6814a653a52926488184aae5e",
+ "8568b9d7215bb8dfb1b7ce66ef38e055", "42814ac5ed652afb4734465cca9e038c",
+ "dba6b7d5e93e6a20dac9a514824ad45c", "be77e0dce733b564e96024ea23c9db43",
+ "2aa7bd75a1d8eb1000f0ef9e19aa0d1d", "226d85741e3f35493e971dd13b689ec7",
+ "9e5a0cf4416f8afeaa3ddbe686b5b7db", "18389c77b362f6b4b727b99426251159",
+ "10c5d899de999bbdf35839be3f2d5ee3", "942ae479a36fb4b4d359bebd78a92f03",
+ "f14e4dd174958e16755cd1f456b083e0", "8a036cbd0aaf1bece25a1140109f688b",
+ "2e48eade95f9fa0b7dae147e66d83e13", "4387d723350a011e26b0e91bbeb3d7c2",
+ "5470f977d859232335945efc8bb49ff1", "6780fd81cf2561300c75c930e715c7a6",
+ "9786aca6b1b9abfc3eae51404bc3cbd5", "da65c1440fa370a0237284bf30e56b0b",
+ "8e0d5d83ab3c477fd11ef143a832f7bf", "97489c7a47aa69fef091e7e6e4049a8f",
+ "28787beac9e69001c2999976742764a3", "67760c48ff5f7bc50cd92727694ba271",
+ "57c2b0b7de5de0f40fb739ed095d82a4", "7b2a663ca7da4b73f1adfc7e0ca1eff1",
+ "980869e1795efb63ca623ce2f0043fb3", "575497eb213b05bab24017cc6ea4e56a",
+ "ca3b31382439f0bdd87b61fa10c7863b", "72c65bf29afb288f4d4ff51816429aa7",
+ "1fe8929387be982993cd2309e3eeae7a", "994246e2585179e00f49537713f33796",
+ "82ae324ba01002370e918724ce452738", "fb3bcb4811b8251f0cc5ec40859617e7",
+ "a2e24b21c1d3661412e00411d719210c", "7adc2b60d7d62df1d07e3e4458a46dc2",
+ "e71c1b2f9ccb1af0868c3869dc296506", "3e33e087c7e6f724528abbc658a1b631",
+ "19b80d80f6b83eedac4bab6226865ae1", "7d9293641c4ed3b21c14964ec785cfb9",
+ "5dd0fb9700f30c25bf7b65367c8f098d", "f96b55ec2d012807c972ef4731acd73d",
+ "5fc70808c3fa5b3c511926b434bfba66", "768c3ce37acfcd4e5ba05152e5710bc9",
+ "1271a52682566ebfc01d5c239177ffd4", "52d4fc11a7507695b2548e0424be50ab",
+ "729e7d421aaaf74daa27b0ce1ca0a305", "92d2ff4a9a679cdf0ff765a2d30bced1",
+ "d160ec6f1bd864eb2ac8fabf5af7fedd", "ad323dbcb4a651e96bd5c81bc185385d",
+ "937c1b7106a2e6aef0adf2c858b4df18", "0f9ad42d1c48970f8462921ac79849ee",
+ "32ed1e1a16ddbf816f81caca7cb56c93", "e91aa6389d8255b7744aaa875ba2ceec",
+ "88f9dedf6d565b2f60b511e389cf366a", "d0428fd42ca311cd3680ff4670d4f047",
+ "b9c7eeb7c9733f0220587643952602cb", "65adf32a5e03d161a411815179078ba3",
+ "4984a4e9a5bdf732c071d5b60029daf4", "b9b65a2a9f04b59766d305221e4cda5a",
+ "7b2d372fe33d6db1fcf75820b7523ed5", "9a07593316707f8e59fe09c7647ade15",
+ "33e75e0d2aa73e3410095c2f98c27a14", "f9ddb33b16431ff9cf6ae96dd4acc792",
+ "2df1a8655b2ef23f642b11b76b20f557", "9faba399ccf555c25a33c336cdd54d94",
+ "c94404e263c2dae2e955ead645348c08", "3d16d4be87cd4467c3f7be17287940c8",
+ "99d0fdae81d61680c7a5b1df38dc98fc", "a23b402d699a00c5c349b17e77f73552",
+ "c6f76c81c4050939a6bd5d30ca00b307", "bc3d035bd6e8f55497bfc6d1f81fc8be",
+ "99b10db073e13b49bd90655f7516383b", "ddfd0e434efe076e2706c5669c788566",
+ "e1d836f814e6eca80ef530f8676e0599", "ed3e4c64e9fd1006e0016e460970a423",
+ "0282542e21fa0dea0bf48ec0a2d25b2d", "7482eb8a7bf1417a61c21d82bc7c95f9",
+ "e98e9bb3d5edf7b943d0bbf1eec9bef6", "ad4d313beecf609ff3a7d30da3e54a1d",
+ "b98f8db9fa62fb73d26415f6fa31b330", "0591b3c34bf4750f20a74eee165a54bd",
+ "3054b56fec6968255f21d40f80f5121c", "59ecf60cbb8408e042816e73446fa79c",
+ "8fa8c996209a1ddb8a00c14ca19953f8", "e20d2462bc43a1a1bfbc5efe7a905666",
+ "b5065e40d5d103e21daabcf4d5fea805", "b65aba0f8e307ef08951f1abdb7c8f62",
+ "5fbec6e57c1c651bd7be69fccb0b39a6", "9dfc362f7212d086418b0def54a7c76c",
+ "6644928e9aaac5e5d64f4a2c437c778a", "1bf63c7539ea32489bec222d5bc5305f",
+ "755ec607a5edf116d188353a96a025c3", "bdc4cc354c4f57c38d3be3dbc9380e2d",
+ "7851752b4ae36793ab6f03cd91e7ba6f", "99b9834ea2f6ea8d9168c5c1ba7fe790",
+ "75a155c83b618b28d48f5f343cdfef62", "38821c97e04d2294766699a6846fefaf",
+ "14be7f588461273862c9d9b83d2f6f0a", "8c38ce521671f0eee7e6f6349ef4f981",
+ "043347de994f2fe68c08e7c06a7f6735", "cda15ea2caccbdd8a7342a6144278578",
+ "244d586e88c9d6a9a59059a82c3b8e57", "3712928dd0dd77f027370f22d61366a0",
+ "e4f1cd4785fc331ad6e3100da4a934f3", "3181459434921b5b15b64cfd2ee734c4",
+ "2d588831e98c7178c5370421a6f2fc60", "135cf6a67fc1b51dbcf9fcddb3ae1237",
+ "d701da4e1a890a37bb0e9af4a2f0b048", "02138b5a4882181f248945c3a8262050",
+ "7fbd4d06965b1d152d6c037b0302f307", "7917a20573da241868689ed49c0d5972",
+ "ffdd4257d91fe00e61de4d2668f1ee07", "72999b6d3bf1ee189e9269a27105991f",
+ "1b63d7f25388c9af4adac60d46b7a8ca", "e3ce0977224197ade58aa979f3206d68",
+ "73178ffd388b46891fc4a0440686b554", "f1f99faf52cea98c825470c6edd1d973",
+ "e6fae5d5682862ec3377b714b6b69825", "a4f96cca8da155204b0cc4258b068d3c",
+ "75c7674c2356325dcb14c222266c46f8", "932b23521c9d9d06096879a665a14e28",
+ "8ed48a84a99b4a5bf2ec8a7a2c1f1c79", "4f6f0214857a92ad92eca1c33a762424",
+ "34865190c3e91200a0609a6e770ebc5c", "e793f1f2e46876b1e417da5d59475fda",
+ "e83cd9a228941a152f6878aa939e1290", "d6f5cd74ba386bd98282e1fcb0528dbd",
+ "131b55ec66ffe76f9088f7b35d38c0dd", "2d0ae8ee059cbd8c7816e3c862efdf37",
+ "65baadd2cb85ffbc6480bf8c1f128d1a", "2b8e8af333c464b4213bbd9185a9b751",
+ "951fd5faed77a1ae9bf5ef8f30bd65c3", "41d38d40dfe9da2b9ff2146711bf6ab5",
+ "7430bde28aed5a9429db54ea663a5e26", "46576d59a13756c494793ad4b3a663e5",
+ "21802d0db30caa44cbdba2ac84cc49b5", "591cad82ae106d9e9670acd5b60e4548",
+ "c0484c58c6c009939e7f3ec0c1aa8e2d", "6405c55d0a1830cfdd37950bfd65fd6f",
+ "3bd74c067d2ba027fc004e9bf62254db", "6e920e6dbdbe55a97ff2bf3dfb38a3e0",
+ "e2ed20f89da293516b14be766a624299", "0a613ee53ec38cad995faa17a24fcb8f",
+ "0de937145c030d766c3f9fff09d7e39c", "4a560325b804fcb6643866e971ade8e8",
+ "be82c41d3a0f8bd4032c3e5e45b453da", "b27219f02db167bf5a416831b908b031",
+ "7cf5437e25d362bc373dd53d8fd78186", "39c801e28cc08150c2016083113d1a03",
+ "785a21219d9c42a7c5bd417f365535a3", "008c79298a87837bcb504c4dc39ca628",
+ "af24d1d6f4d3ee94f2af52471a64ca1f", "cd82218aae9815c106336aec7ce18833",
+ "9f405c66d4ce7533213c4ca82feaf252", "7ceda4ea6ddeccd04dbf6d3237fe956a",
+ "ae21b52869b85a64fa4e3a85a2a8bb8d", "a004927cdbf48e0dafcccfb6066cdd0c",
+ "949337a963a8a5c0f46cf774b078a7cd", "24f58b8db17d02f66d04d22ca6c5e026",
+ "2b1315a2e7c5d5309a7621651e741616", "5b317ef820e6c8e7ea7a7d7022e8349d",
+ "debd504650d35d9deca4c2461094949f", "19d0ca33e5b3a0afff1f39f0f42238e0",
+ "df1c6c7582bfa5ceb147a8dd253cfa43", "176647077c5e2d985b3807134aac118f",
+ "dd2850172602688eaaa768f705c1ba67", "6ba1a3929ae9725fc688b8189b16314f",
+ "639189abb754dfa6be3c813ee8342954", "d5d1b8bff370f280fba13827d6bdf0fb",
+ "4b0ad4ea387a952724cab42730f712d2", "8c9c1f09946b61315e9a45c7e39f1992",
+ "50ef75c2b7a17f972586ce053eb62d24", "d5922dd01d8d02ca00ab9648a3db343f",
+ "091f517b18f4438ea9c581b7471f2fc0", "fede855bfb936caaa8fb4a434adac1d3",
+ "081b612f810f38c5ff6dc1cd03bf2eb6", "bd10e764eaf7d7e0ec89de96423d0afe",
+ "3e64cb1355e05b0a4b0237fae3f33bb2", "7cb92e0ecc0dd06d0a5d248efba48630",
+ "ec875f2e155a2e124ef52bf35e9a876c", "15529c83eae41bfa804f2c386f480e90",
+ "ee0e59567874155fb54de63fc901ded7", "4ad160b0d0f5166f9cddf7235725406e",
+ "176b64b3883c33e2aa251159983ccaa1", "d9cca01946d2a47c0114b1f49e4d688f",
+ "73d706a13afa279d9c716b3ba3a2ed68", "dea5a7f010d2f1385fe2b7d1d36aafb0",
+ "b5432fbc22d2f96c1230cc33178da09e", "8b0e7399ce98b68de4048411ab649468",
+ "3d52c986a5a5852a4620fbb38259a109", "eb61882738fefdd105094d4c104cf8b0",
+ "24fbc0d3ee28e937cfa1a3fbbc4e8214", "c69eb0687e477c27ac0d4c5fe54bbe8b",
+ "00a4f498f05b2b348252927ecc82c8a3", "c76471a61250be52e8d5933e582b1e19",
+ "22ebb8812dd795fdc14f20a7f9f89844", "f7c7d5c04bc234545726f4b116b623ec",
+ "9fc323d6619af0101edfacb4e9c2b647", "902d7888215d6aac1cf41f1fb6a916d8",
+ "5817d80a0504a5b08627502aeece4f38", "a1afa4b4065c143bc4857e364cec7f3d",
+ "506d5a6ff434411ea893bb2dc021aa25", "31cd3ca39015ccee1e217e1c83fff2a0",
+ "eb1ed4ef292c7d8fead1f113c9fd998f", "35f3abf3a056b778e3d7885f8df6c07a",
+ "299d71ee557382f5e64f26f1a8e4e156", "12f8c591a4e257bcc26b385424cd8d47",
+ "0b273b03d817af587c8fb23de71f346d", "1d7592fe89c661e9f61d215d235aa2ee",
+ "331dc544956ee14064ab432c85d52828", "a0a4ccbe1c442717ad40b7d40ed81a40",
+ "45009d915bf1d4ab855b5b670d314839", "641dfe93841aaa18888cebb17b8566eb",
+ "2b177c880ce0c2b4e891abc1dc23dfc2", "23984491f7d6c206fb8babafc9aacfdb",
+ "5841b93edb22c702035e31b26c58a728", "9852506766cb47f48783640d14753089",
+ "8a43698d32f63b1e7191482e4b274fc3", "7bdef02623beae507a651ad398422876",
+ "b105138645ad27657a08a3a8e8871a7e", "913e40ebbf1b983ca4956b85364b9459",
+ "5776f97b4f0cfa435a99d5d90822922d", "a0ae92a24c2b20039d996ee2a7d8b107",
+ "a925cc792412e2a7abe89367c9fe28b1", "778183eab5c9e0ee559d828d8347a21c",
+ "c4b4777355a4c8e8858faec37ba23eec", "4cdd41c3648e8d05c3e8f58d08385f8b",
+ "7c1246737874f984feb1b5827a1f95db", "c75d766ff5af8db39d400962d5aba0b4",
+ "964f010f5aa6748461ca5573b013091d", "b003f3eab3b118e5a8a85c1873b3bb55"};
+
+TEST(WedgePredictionMaskTest, GenerateWedgeMask) {
+ WedgeMaskArray wedge_masks;
+ ASSERT_TRUE(GenerateWedgeMask(&wedge_masks));
+
+ // Check wedge masks.
+ int block_size_index = 0;
+ int index = 0;
+ for (int block_size = kBlock8x8; block_size < kMaxBlockSizes; ++block_size) {
+ const int width = kBlockWidthPixels[block_size];
+ const int height = kBlockHeightPixels[block_size];
+ if (width < 8 || height < 8 || width > 32 || height > 32) continue;
+
+ for (int flip_sign = 0; flip_sign <= 1; ++flip_sign) {
+ for (int direction = 0; direction < kWedgeDirectionTypes; ++direction) {
+ uint8_t* const block_wedge_mask =
+ wedge_masks[block_size_index][flip_sign][direction][0];
+ const std::string digest =
+ test_utils::GetMd5Sum(block_wedge_mask, width * height);
+ EXPECT_STREQ(digest.c_str(), kExpectedWedgeMask[index]);
+ index++;
+ }
+ }
+ block_size_index++;
+ }
+}
+
+} // namespace
+} // namespace libgav1
diff --git a/src/quantizer.h b/src/quantizer.h
index 00c53ab..c60756c 100644
--- a/src/quantizer.h
+++ b/src/quantizer.h
@@ -17,6 +17,7 @@
#ifndef LIBGAV1_SRC_QUANTIZER_H_
#define LIBGAV1_SRC_QUANTIZER_H_
+#include <array>
#include <cstdint>
#include "src/utils/constants.h"
diff --git a/src/quantizer_test.cc b/src/quantizer_test.cc
new file mode 100644
index 0000000..618d247
--- /dev/null
+++ b/src/quantizer_test.cc
@@ -0,0 +1,168 @@
+// Copyright 2021 The libgav1 Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "src/quantizer.h"
+
+#include <cstdint>
+
+#include "gtest/gtest.h"
+#include "src/obu_parser.h"
+#include "src/utils/constants.h"
+#include "src/utils/types.h"
+
+namespace libgav1 {
+namespace {
+
+TEST(QuantizerTest, GetQIndex) {
+ const int kBaseQIndex = 40;
+ const int kDelta = 10;
+ const int kOutOfRangeIndex = 200;
+ Segmentation segmentation = {};
+
+ EXPECT_EQ(GetQIndex(segmentation, 0, kBaseQIndex), kBaseQIndex);
+ EXPECT_EQ(GetQIndex(segmentation, kOutOfRangeIndex, kBaseQIndex),
+ kBaseQIndex);
+
+ segmentation.enabled = true;
+ EXPECT_EQ(GetQIndex(segmentation, 0, kBaseQIndex), kBaseQIndex);
+ EXPECT_EQ(GetQIndex(segmentation, kOutOfRangeIndex, kBaseQIndex),
+ kBaseQIndex);
+
+ segmentation.feature_enabled[1][kSegmentFeatureQuantizer] = true;
+ segmentation.feature_data[1][kSegmentFeatureQuantizer] = kDelta;
+ EXPECT_EQ(GetQIndex(segmentation, 1, kBaseQIndex), kBaseQIndex + kDelta);
+ EXPECT_EQ(GetQIndex(segmentation, kOutOfRangeIndex, kBaseQIndex),
+ kBaseQIndex);
+
+ segmentation.enabled = false;
+ EXPECT_EQ(GetQIndex(segmentation, 1, kBaseQIndex), kBaseQIndex);
+ EXPECT_EQ(GetQIndex(segmentation, kOutOfRangeIndex, kBaseQIndex),
+ kBaseQIndex);
+}
+
+TEST(QuantizerTest, GetDcValue) {
+ QuantizerParameters params = {};
+ params.delta_dc[kPlaneY] = 1;
+ params.delta_dc[kPlaneU] = 2;
+ params.delta_dc[kPlaneV] = 3;
+
+ // Test lookups of Dc_Qlookup[0][0], Dc_Qlookup[0][11], Dc_Qlookup[0][12],
+ // and Dc_Qlookup[0][255] in the spec, including the clipping of qindex.
+ {
+ Quantizer quantizer(8, &params);
+ EXPECT_EQ(quantizer.GetDcValue(kPlaneY, -2), 4);
+ EXPECT_EQ(quantizer.GetDcValue(kPlaneY, -1), 4);
+ EXPECT_EQ(quantizer.GetDcValue(kPlaneY, 10), 16);
+ EXPECT_EQ(quantizer.GetDcValue(kPlaneY, 11), 17);
+ EXPECT_EQ(quantizer.GetDcValue(kPlaneY, 254), 1336);
+ EXPECT_EQ(quantizer.GetDcValue(kPlaneY, 255), 1336);
+ EXPECT_EQ(quantizer.GetDcValue(kPlaneU, -3), 4);
+ EXPECT_EQ(quantizer.GetDcValue(kPlaneU, -2), 4);
+ EXPECT_EQ(quantizer.GetDcValue(kPlaneU, 9), 16);
+ EXPECT_EQ(quantizer.GetDcValue(kPlaneU, 10), 17);
+ EXPECT_EQ(quantizer.GetDcValue(kPlaneU, 253), 1336);
+ EXPECT_EQ(quantizer.GetDcValue(kPlaneU, 254), 1336);
+ EXPECT_EQ(quantizer.GetDcValue(kPlaneV, -4), 4);
+ EXPECT_EQ(quantizer.GetDcValue(kPlaneV, -3), 4);
+ EXPECT_EQ(quantizer.GetDcValue(kPlaneV, 8), 16);
+ EXPECT_EQ(quantizer.GetDcValue(kPlaneV, 9), 17);
+ EXPECT_EQ(quantizer.GetDcValue(kPlaneV, 252), 1336);
+ EXPECT_EQ(quantizer.GetDcValue(kPlaneV, 253), 1336);
+ }
+
+#if LIBGAV1_MAX_BITDEPTH >= 10
+ // Test lookups of Dc_Qlookup[1][0], Dc_Qlookup[1][11], Dc_Qlookup[1][12],
+ // and Dc_Qlookup[1][255] in the spec, including the clipping of qindex.
+ {
+ Quantizer quantizer(10, &params);
+ EXPECT_EQ(quantizer.GetDcValue(kPlaneY, -2), 4);
+ EXPECT_EQ(quantizer.GetDcValue(kPlaneY, -1), 4);
+ EXPECT_EQ(quantizer.GetDcValue(kPlaneY, 10), 34);
+ EXPECT_EQ(quantizer.GetDcValue(kPlaneY, 11), 37);
+ EXPECT_EQ(quantizer.GetDcValue(kPlaneY, 254), 5347);
+ EXPECT_EQ(quantizer.GetDcValue(kPlaneY, 255), 5347);
+ EXPECT_EQ(quantizer.GetDcValue(kPlaneU, -3), 4);
+ EXPECT_EQ(quantizer.GetDcValue(kPlaneU, -2), 4);
+ EXPECT_EQ(quantizer.GetDcValue(kPlaneU, 9), 34);
+ EXPECT_EQ(quantizer.GetDcValue(kPlaneU, 10), 37);
+ EXPECT_EQ(quantizer.GetDcValue(kPlaneU, 253), 5347);
+ EXPECT_EQ(quantizer.GetDcValue(kPlaneU, 254), 5347);
+ EXPECT_EQ(quantizer.GetDcValue(kPlaneV, -4), 4);
+ EXPECT_EQ(quantizer.GetDcValue(kPlaneV, -3), 4);
+ EXPECT_EQ(quantizer.GetDcValue(kPlaneV, 8), 34);
+ EXPECT_EQ(quantizer.GetDcValue(kPlaneV, 9), 37);
+ EXPECT_EQ(quantizer.GetDcValue(kPlaneV, 254), 5347);
+ EXPECT_EQ(quantizer.GetDcValue(kPlaneV, 253), 5347);
+ }
+#endif // LIBGAV1_MAX_BITDEPTH >= 10
+}
+
+TEST(QuantizerTest, GetAcValue) {
+ QuantizerParameters params = {};
+ params.delta_ac[kPlaneU] = 1;
+ params.delta_ac[kPlaneV] = 2;
+
+ // Test lookups of Ac_Qlookup[0][0], Ac_Qlookup[0][11], Ac_Qlookup[0][12],
+ // and Ac_Qlookup[0][255] in the spec, including the clipping of qindex.
+ {
+ Quantizer quantizer(8, &params);
+ EXPECT_EQ(quantizer.GetAcValue(kPlaneY, -1), 4);
+ EXPECT_EQ(quantizer.GetAcValue(kPlaneY, 0), 4);
+ EXPECT_EQ(quantizer.GetAcValue(kPlaneY, 11), 18);
+ EXPECT_EQ(quantizer.GetAcValue(kPlaneY, 12), 19);
+ EXPECT_EQ(quantizer.GetAcValue(kPlaneY, 255), 1828);
+ EXPECT_EQ(quantizer.GetAcValue(kPlaneY, 256), 1828);
+ EXPECT_EQ(quantizer.GetAcValue(kPlaneU, -2), 4);
+ EXPECT_EQ(quantizer.GetAcValue(kPlaneU, -1), 4);
+ EXPECT_EQ(quantizer.GetAcValue(kPlaneU, 10), 18);
+ EXPECT_EQ(quantizer.GetAcValue(kPlaneU, 11), 19);
+ EXPECT_EQ(quantizer.GetAcValue(kPlaneU, 254), 1828);
+ EXPECT_EQ(quantizer.GetAcValue(kPlaneU, 255), 1828);
+ EXPECT_EQ(quantizer.GetAcValue(kPlaneV, -3), 4);
+ EXPECT_EQ(quantizer.GetAcValue(kPlaneV, -2), 4);
+ EXPECT_EQ(quantizer.GetAcValue(kPlaneV, 9), 18);
+ EXPECT_EQ(quantizer.GetAcValue(kPlaneV, 10), 19);
+ EXPECT_EQ(quantizer.GetAcValue(kPlaneV, 253), 1828);
+ EXPECT_EQ(quantizer.GetAcValue(kPlaneV, 254), 1828);
+ }
+
+#if LIBGAV1_MAX_BITDEPTH >= 10
+ // Test lookups of Ac_Qlookup[1][0], Ac_Qlookup[1][11], Ac_Qlookup[1][12],
+ // and Ac_Qlookup[1][255] in the spec, including the clipping of qindex.
+ {
+ Quantizer quantizer(10, &params);
+ EXPECT_EQ(quantizer.GetAcValue(kPlaneY, -1), 4);
+ EXPECT_EQ(quantizer.GetAcValue(kPlaneY, 0), 4);
+ EXPECT_EQ(quantizer.GetAcValue(kPlaneY, 11), 37);
+ EXPECT_EQ(quantizer.GetAcValue(kPlaneY, 12), 40);
+ EXPECT_EQ(quantizer.GetAcValue(kPlaneY, 255), 7312);
+ EXPECT_EQ(quantizer.GetAcValue(kPlaneY, 256), 7312);
+ EXPECT_EQ(quantizer.GetAcValue(kPlaneU, -2), 4);
+ EXPECT_EQ(quantizer.GetAcValue(kPlaneU, -1), 4);
+ EXPECT_EQ(quantizer.GetAcValue(kPlaneU, 10), 37);
+ EXPECT_EQ(quantizer.GetAcValue(kPlaneU, 11), 40);
+ EXPECT_EQ(quantizer.GetAcValue(kPlaneU, 254), 7312);
+ EXPECT_EQ(quantizer.GetAcValue(kPlaneU, 255), 7312);
+ EXPECT_EQ(quantizer.GetAcValue(kPlaneV, -3), 4);
+ EXPECT_EQ(quantizer.GetAcValue(kPlaneV, -2), 4);
+ EXPECT_EQ(quantizer.GetAcValue(kPlaneV, 9), 37);
+ EXPECT_EQ(quantizer.GetAcValue(kPlaneV, 10), 40);
+ EXPECT_EQ(quantizer.GetAcValue(kPlaneV, 253), 7312);
+ EXPECT_EQ(quantizer.GetAcValue(kPlaneV, 254), 7312);
+ }
+#endif // LIBGAV1_MAX_BITDEPTH >= 10
+}
+
+} // namespace
+} // namespace libgav1
diff --git a/src/reconstruction.cc b/src/reconstruction.cc
index 1aa1233..bf48137 100644
--- a/src/reconstruction.cc
+++ b/src/reconstruction.cc
@@ -23,30 +23,30 @@
namespace libgav1 {
namespace {
-// Maps TransformType to dsp::Transform1D for the row transforms.
-constexpr dsp::Transform1D kRowTransform[kNumTransformTypes] = {
- dsp::k1DTransformDct, dsp::k1DTransformAdst,
- dsp::k1DTransformDct, dsp::k1DTransformAdst,
- dsp::k1DTransformAdst, dsp::k1DTransformDct,
- dsp::k1DTransformAdst, dsp::k1DTransformAdst,
- dsp::k1DTransformAdst, dsp::k1DTransformIdentity,
- dsp::k1DTransformIdentity, dsp::k1DTransformDct,
- dsp::k1DTransformIdentity, dsp::k1DTransformAdst,
- dsp::k1DTransformIdentity, dsp::k1DTransformAdst};
-
-// Maps TransformType to dsp::Transform1D for the column transforms.
-constexpr dsp::Transform1D kColumnTransform[kNumTransformTypes] = {
- dsp::k1DTransformDct, dsp::k1DTransformDct,
- dsp::k1DTransformAdst, dsp::k1DTransformAdst,
- dsp::k1DTransformDct, dsp::k1DTransformAdst,
- dsp::k1DTransformAdst, dsp::k1DTransformAdst,
- dsp::k1DTransformAdst, dsp::k1DTransformIdentity,
- dsp::k1DTransformDct, dsp::k1DTransformIdentity,
- dsp::k1DTransformAdst, dsp::k1DTransformIdentity,
- dsp::k1DTransformAdst, dsp::k1DTransformIdentity};
-
-dsp::TransformSize1D Get1DTransformSize(int size_log2) {
- return static_cast<dsp::TransformSize1D>(size_log2 - 2);
+// Maps TransformType to dsp::Transform1d for the row transforms.
+constexpr dsp::Transform1d kRowTransform[kNumTransformTypes] = {
+ dsp::kTransform1dDct, dsp::kTransform1dAdst,
+ dsp::kTransform1dDct, dsp::kTransform1dAdst,
+ dsp::kTransform1dAdst, dsp::kTransform1dDct,
+ dsp::kTransform1dAdst, dsp::kTransform1dAdst,
+ dsp::kTransform1dAdst, dsp::kTransform1dIdentity,
+ dsp::kTransform1dIdentity, dsp::kTransform1dDct,
+ dsp::kTransform1dIdentity, dsp::kTransform1dAdst,
+ dsp::kTransform1dIdentity, dsp::kTransform1dAdst};
+
+// Maps TransformType to dsp::Transform1d for the column transforms.
+constexpr dsp::Transform1d kColumnTransform[kNumTransformTypes] = {
+ dsp::kTransform1dDct, dsp::kTransform1dDct,
+ dsp::kTransform1dAdst, dsp::kTransform1dAdst,
+ dsp::kTransform1dDct, dsp::kTransform1dAdst,
+ dsp::kTransform1dAdst, dsp::kTransform1dAdst,
+ dsp::kTransform1dAdst, dsp::kTransform1dIdentity,
+ dsp::kTransform1dDct, dsp::kTransform1dIdentity,
+ dsp::kTransform1dAdst, dsp::kTransform1dIdentity,
+ dsp::kTransform1dAdst, dsp::kTransform1dIdentity};
+
+dsp::Transform1dSize GetTransform1dSize(int size_log2) {
+ return static_cast<dsp::Transform1dSize>(size_log2 - 2);
}
// Returns the number of rows to process based on |non_zero_coeff_count|. The
@@ -150,10 +150,10 @@ void Reconstruct(const dsp::Dsp& dsp, TransformType tx_type,
assert(tx_height <= 32);
// Row transform.
- const dsp::TransformSize1D row_transform_size =
- Get1DTransformSize(tx_width_log2);
- const dsp::Transform1D row_transform =
- lossless ? dsp::k1DTransformWht : kRowTransform[tx_type];
+ const dsp::Transform1dSize row_transform_size =
+ GetTransform1dSize(tx_width_log2);
+ const dsp::Transform1d row_transform =
+ lossless ? dsp::kTransform1dWht : kRowTransform[tx_type];
const dsp::InverseTransformAddFunc row_transform_func =
dsp.inverse_transforms[row_transform][row_transform_size][dsp::kRow];
assert(row_transform_func != nullptr);
@@ -162,10 +162,10 @@ void Reconstruct(const dsp::Dsp& dsp, TransformType tx_type,
frame);
// Column transform.
- const dsp::TransformSize1D column_transform_size =
- Get1DTransformSize(tx_height_log2);
- const dsp::Transform1D column_transform =
- lossless ? dsp::k1DTransformWht : kColumnTransform[tx_type];
+ const dsp::Transform1dSize column_transform_size =
+ GetTransform1dSize(tx_height_log2);
+ const dsp::Transform1d column_transform =
+ lossless ? dsp::kTransform1dWht : kColumnTransform[tx_type];
const dsp::InverseTransformAddFunc column_transform_func =
dsp.inverse_transforms[column_transform][column_transform_size]
[dsp::kColumn];
diff --git a/src/reconstruction_test.cc b/src/reconstruction_test.cc
new file mode 100644
index 0000000..fd780b3
--- /dev/null
+++ b/src/reconstruction_test.cc
@@ -0,0 +1,294 @@
+// Copyright 2021 The libgav1 Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "src/reconstruction.h"
+
+#include <cstddef>
+#include <cstdint>
+#include <cstring>
+#include <vector>
+
+#include "absl/strings/match.h"
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "src/dsp/constants.h"
+#include "src/dsp/dsp.h"
+#include "src/dsp/inverse_transform.h"
+#include "src/utils/array_2d.h"
+#include "src/utils/common.h"
+#include "src/utils/constants.h"
+#include "src/utils/cpu.h"
+#include "src/utils/memory.h"
+#include "tests/block_utils.h"
+#include "tests/utils.h"
+
+namespace libgav1 {
+namespace {
+
+// Import the scan tables in the anonymous namespace.
+#include "src/scan_tables.inc"
+
+constexpr int kTestTransformSize = 4;
+constexpr int8_t kTestBitdepth = 8;
+
+using testing::ElementsAreArray;
+
+// The 'int' parameter is unused but required to allow for instantiations of C,
+// NEON, etc.
+class ReconstructionTest : public testing::TestWithParam<int> {
+ public:
+ ReconstructionTest() = default;
+ ReconstructionTest(const ReconstructionTest&) = delete;
+ ReconstructionTest& operator=(const ReconstructionTest&) = delete;
+ ~ReconstructionTest() override = default;
+
+ protected:
+ void SetUp() override {
+ test_utils::ResetDspTable(kTestBitdepth);
+ dsp::InverseTransformInit_C();
+ dsp_ = dsp::GetDspTable(kTestBitdepth);
+ ASSERT_NE(dsp_, nullptr);
+ const testing::TestInfo* const test_info =
+ testing::UnitTest::GetInstance()->current_test_info();
+ if (test_info->value_param() != nullptr) {
+ const char* const test_case = test_info->test_suite_name();
+ if (absl::StartsWith(test_case, "C/")) {
+ } else if (absl::StartsWith(test_case, "SSE41/")) {
+ if ((GetCpuInfo() & kSSE4_1) != 0) {
+ dsp::InverseTransformInit_SSE4_1();
+ }
+ } else if (absl::StartsWith(test_case, "NEON/")) {
+ dsp::InverseTransformInit_NEON();
+ } else {
+ FAIL() << "Unrecognized architecture prefix in test case name: "
+ << test_case;
+ }
+ }
+ InitBuffers();
+ }
+
+ void InitBuffers(int width = kTestTransformSize,
+ int height = kTestTransformSize) {
+ const int size = width * height;
+ buffer_.clear();
+ buffer_.resize(size);
+ residual_buffer_.clear();
+ residual_buffer_.resize(size);
+ for (int i = 0; i < size; ++i) {
+ buffer_[i] = residual_buffer_[i] = i % 256;
+ }
+ frame_buffer_.Reset(height, width, buffer_.data());
+ }
+
+ template <int bitdepth>
+ void TestWht();
+
+ std::vector<uint8_t> buffer_;
+ std::vector<int16_t> residual_buffer_;
+ // |frame_buffer_| is just a 2D array view into the |buffer_|.
+ Array2DView<uint8_t> frame_buffer_;
+ const dsp::Dsp* dsp_;
+};
+
+template <int bitdepth>
+void ReconstructionTest::TestWht() {
+ static_assert(bitdepth == kBitdepth8 || bitdepth == kBitdepth10, "");
+ for (const auto transform :
+ dsp_->inverse_transforms[dsp::kTransform1dWht][dsp::kTransform1dSize4]) {
+ if (transform == nullptr) {
+ GTEST_SKIP() << "No function available for dsp::kTransform1dWht";
+ }
+ }
+ constexpr int max = 16 << bitdepth;
+ constexpr int min = -max;
+ static constexpr int16_t residual_inputs[][16]{
+ {64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+ {69, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+ {0, 0, 0, 0, 0, max - 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+ {0, 0, 0, 0, 0, min - 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+ // Note these are unrealistic inputs, but serve to test each position in
+ // the array and match extremes in some commercial test vectors.
+ {max, max, max, max, max, max, max, max, max, max, max, max, max, max,
+ max, max},
+ {min, min, min, min, min, min, min, min, min, min, min, min, min, min,
+ min, min}};
+ // Before the Reconstruct() call, the frame buffer is filled with all 127.
+ // After the Reconstruct() call, the frame buffer is expected to have the
+ // following values.
+ static constexpr uint8_t frame_outputs[][16]{
+ {131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131,
+ 131, 131},
+ {132, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131,
+ 131, 131},
+ {255, 255, 0, 0, 255, 255, 0, 0, 0, 0, 255, 255, 0, 0, 255, 255},
+ {0, 0, 255, 255, 0, 0, 255, 255, 255, 255, 0, 0, 255, 255, 0, 0},
+ {255, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127,
+ 127, 127},
+ {0, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127,
+ 127},
+ };
+
+ const TransformSize tx_size = kTransformSize4x4;
+ const TransformType tx_type = kTransformTypeDctDct;
+ const int tx_width = kTransformWidth[tx_size];
+ const int tx_height = kTransformHeight[tx_size];
+ const uint16_t* const scan = kScan[GetTransformClass(tx_type)][tx_size];
+
+ InitBuffers(tx_width, tx_height);
+
+ const int num_tests = sizeof(residual_inputs) / sizeof(residual_inputs[0]);
+ for (int i = 0; i < num_tests; ++i) {
+ int16_t eob; // Also known as non_zero_coeff_count.
+ for (eob = 15; eob >= 0; --eob) {
+ if (residual_inputs[i][scan[eob]] != 0) break;
+ }
+ ++eob;
+ memcpy(residual_buffer_.data(), residual_inputs[i],
+ sizeof(residual_inputs[i]));
+ memset(buffer_.data(), 127, sizeof(frame_outputs[i]));
+ Reconstruct(*dsp_, tx_type, tx_size, /*lossless=*/true,
+ residual_buffer_.data(), 0, 0, &frame_buffer_, eob);
+
+ EXPECT_TRUE(test_utils::CompareBlocks(buffer_.data(), frame_outputs[i],
+ tx_width, tx_height, tx_width,
+ tx_width, false, true))
+ << "Mismatch WHT test case " << i;
+ }
+}
+
+TEST_P(ReconstructionTest, ReconstructionSimple) {
+ for (const auto transform :
+ dsp_->inverse_transforms[dsp::kTransform1dIdentity]
+ [dsp::kTransform1dSize4]) {
+ if (transform == nullptr) GTEST_SKIP();
+ }
+ Reconstruct(*dsp_, kTransformTypeIdentityIdentity, kTransformSize4x4, false,
+ residual_buffer_.data(), 0, 0, &frame_buffer_, 16);
+ // clang-format off
+ static constexpr uint8_t expected_output_buffer[] = {
+ 0, 1, 2, 3,
+ 5, 6, 7, 8,
+ 9, 10, 11, 12,
+ 14, 15, 16, 17
+ };
+ // clang-format on
+ EXPECT_THAT(buffer_, ElementsAreArray(expected_output_buffer));
+}
+
+TEST_P(ReconstructionTest, ReconstructionFlipY) {
+ for (const auto transform :
+ dsp_->inverse_transforms[dsp::kTransform1dIdentity]
+ [dsp::kTransform1dSize4]) {
+ if (transform == nullptr) GTEST_SKIP();
+ }
+ Reconstruct(*dsp_, kTransformTypeIdentityFlipadst, kTransformSize4x4, false,
+ residual_buffer_.data(), 0, 0, &frame_buffer_, 16);
+ // clang-format off
+ static constexpr uint8_t expected_buffer[] = {
+ 0, 1, 2, 3,
+ 4, 5, 6, 7,
+ 7, 8, 9, 10,
+ 14, 15, 16, 17
+ };
+ // clang-format on
+ EXPECT_THAT(buffer_, ElementsAreArray(expected_buffer));
+}
+
+TEST_P(ReconstructionTest, ReconstructionFlipX) {
+ for (const auto transform :
+ dsp_->inverse_transforms[dsp::kTransform1dIdentity]
+ [dsp::kTransform1dSize4]) {
+ if (transform == nullptr) GTEST_SKIP();
+ }
+ Reconstruct(*dsp_, kTransformTypeFlipadstIdentity, kTransformSize4x4, false,
+ residual_buffer_.data(), 0, 0, &frame_buffer_, 16);
+ // clang-format off
+ static constexpr uint8_t expected_buffer[] = {
+ 0, 1, 2, 3,
+ 4, 5, 6, 8,
+ 8, 10, 10, 13,
+ 12, 14, 14, 18
+ };
+ // clang-format on
+ EXPECT_THAT(buffer_, ElementsAreArray(expected_buffer));
+}
+
+TEST_P(ReconstructionTest, ReconstructionFlipXAndFlipY) {
+ for (const auto transform :
+ dsp_->inverse_transforms[dsp::kTransform1dIdentity]
+ [dsp::kTransform1dSize4]) {
+ if (transform == nullptr) GTEST_SKIP();
+ }
+ Reconstruct(*dsp_, kTransformTypeFlipadstFlipadst, kTransformSize4x4, false,
+ residual_buffer_.data(), 0, 0, &frame_buffer_, 16);
+ // clang-format off
+ static constexpr uint8_t expected_buffer[] = {
+ 0, 1, 2, 3,
+ 4, 5, 6, 8,
+ 8, 8, 10, 9,
+ 12, 14, 14, 19
+ };
+ // clang-format on
+ EXPECT_THAT(buffer_, ElementsAreArray(expected_buffer));
+}
+
+TEST_P(ReconstructionTest, ReconstructionNonZeroStart) {
+ uint8_t buffer[64] = {};
+ Array2DView<uint8_t> frame_buffer(8, 8, buffer);
+ int k = 0;
+ for (int i = 0; i < kTestTransformSize; ++i) {
+ for (int j = 0; j < kTestTransformSize; ++j) {
+ frame_buffer[i + 4][j + 4] = k++;
+ }
+ }
+ for (const auto transform :
+ dsp_->inverse_transforms[dsp::kTransform1dIdentity]
+ [dsp::kTransform1dSize4]) {
+ if (transform == nullptr) GTEST_SKIP();
+ }
+ Reconstruct(*dsp_, kTransformTypeIdentityIdentity, kTransformSize4x4, false,
+ residual_buffer_.data(), 4, 4, &frame_buffer, 64);
+ // clang-format off
+ static constexpr uint8_t expected_buffer[] = {
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 1, 2, 3,
+ 0, 0, 0, 0, 5, 6, 7, 8,
+ 0, 0, 0, 0, 9, 10, 11, 12,
+ 0, 0, 0, 0, 14, 15, 16, 17
+ };
+ // clang-format on
+ EXPECT_THAT(buffer, ElementsAreArray(expected_buffer));
+}
+
+TEST_P(ReconstructionTest, Wht8bit) { TestWht<kBitdepth8>(); }
+
+#if LIBGAV1_MAX_BITDEPTH >= 10
+TEST_P(ReconstructionTest, Wht10bit) { TestWht<kBitdepth10>(); }
+#endif
+
+INSTANTIATE_TEST_SUITE_P(C, ReconstructionTest, testing::Values(0));
+
+#if LIBGAV1_ENABLE_SSE4_1
+INSTANTIATE_TEST_SUITE_P(SSE41, ReconstructionTest, testing::Values(0));
+#endif
+
+#if LIBGAV1_ENABLE_NEON
+INSTANTIATE_TEST_SUITE_P(NEON, ReconstructionTest, testing::Values(0));
+#endif
+
+} // namespace
+} // namespace libgav1
diff --git a/src/residual_buffer_pool_test.cc b/src/residual_buffer_pool_test.cc
new file mode 100644
index 0000000..84bc747
--- /dev/null
+++ b/src/residual_buffer_pool_test.cc
@@ -0,0 +1,201 @@
+// Copyright 2021 The libgav1 Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "src/residual_buffer_pool.h"
+
+#include <cstdint>
+#include <memory>
+#include <utility>
+
+#include "gtest/gtest.h"
+#include "src/utils/constants.h"
+#include "src/utils/queue.h"
+#include "src/utils/types.h"
+
+namespace libgav1 {
+namespace {
+
+TEST(ResidualBufferTest, TestUsage) {
+ ResidualBufferPool pool(true, 1, 1, sizeof(int16_t));
+ EXPECT_EQ(pool.Size(), 0);
+ // Get one buffer.
+ std::unique_ptr<ResidualBuffer> buffer1 = pool.Get();
+ uint8_t* const buffer1_ptr = buffer1->buffer();
+ ASSERT_NE(buffer1_ptr, nullptr);
+ // Get another buffer (while holding on to the first one).
+ std::unique_ptr<ResidualBuffer> buffer2 = pool.Get();
+ uint8_t* const buffer2_ptr = buffer2->buffer();
+ ASSERT_NE(buffer2_ptr, nullptr);
+ EXPECT_NE(buffer1_ptr, buffer2_ptr);
+ // Return the second buffer.
+ pool.Release(std::move(buffer2));
+ EXPECT_EQ(pool.Size(), 1);
+ // Get another buffer (this one should be the same as the buffer2).
+ std::unique_ptr<ResidualBuffer> buffer3 = pool.Get();
+ uint8_t* const buffer3_ptr = buffer3->buffer();
+ ASSERT_NE(buffer3_ptr, nullptr);
+ EXPECT_EQ(buffer3_ptr, buffer2_ptr);
+ EXPECT_EQ(pool.Size(), 0);
+ // Get another buffer (this one will be a new buffer).
+ std::unique_ptr<ResidualBuffer> buffer4 = pool.Get();
+ uint8_t* const buffer4_ptr = buffer4->buffer();
+ ASSERT_NE(buffer4_ptr, nullptr);
+ EXPECT_NE(buffer4_ptr, buffer1_ptr);
+ EXPECT_NE(buffer4_ptr, buffer3_ptr);
+ EXPECT_EQ(pool.Size(), 0);
+ // Return all the buffers.
+ pool.Release(std::move(buffer1));
+ EXPECT_EQ(pool.Size(), 1);
+ pool.Release(std::move(buffer3));
+ EXPECT_EQ(pool.Size(), 2);
+ pool.Release(std::move(buffer4));
+ EXPECT_EQ(pool.Size(), 3);
+ // Reset the buffer with same parameters.
+ pool.Reset(true, 1, 1, sizeof(int16_t));
+ EXPECT_EQ(pool.Size(), 3);
+ // Reset the buffer size with different parameters.
+ pool.Reset(true, 0, 1, sizeof(int32_t));
+ // The existing buffers should now have been invalidated.
+ EXPECT_EQ(pool.Size(), 0);
+ // Get and return a buffer.
+ std::unique_ptr<ResidualBuffer> buffer5 = pool.Get();
+ uint8_t* const buffer5_ptr = buffer5->buffer();
+ ASSERT_NE(buffer5_ptr, nullptr);
+ pool.Release(std::move(buffer5));
+ EXPECT_EQ(pool.Size(), 1);
+ // Reset the buffer with different value for use128x128_superblock.
+ pool.Reset(false, 0, 1, sizeof(int32_t));
+ // The existing buffers should now have been invalidated.
+ EXPECT_EQ(pool.Size(), 0);
+}
+
+TEST(ResidualBufferTest, TestQueue) {
+ ResidualBufferPool pool(true, 1, 1, sizeof(int16_t));
+ EXPECT_EQ(pool.Size(), 0);
+ // Get one buffer.
+ std::unique_ptr<ResidualBuffer> buffer1 = pool.Get();
+ uint8_t* const buffer1_ptr = buffer1->buffer();
+ ASSERT_NE(buffer1_ptr, nullptr);
+ auto* queue1 = buffer1->transform_parameters();
+ queue1->Push(TransformParameters(kTransformTypeAdstAdst, 10));
+ EXPECT_EQ(queue1->Size(), 1);
+ EXPECT_EQ(queue1->Front().type, kTransformTypeAdstAdst);
+ EXPECT_EQ(queue1->Front().non_zero_coeff_count, 10);
+ queue1->Push(TransformParameters(kTransformTypeDctDct, 20));
+ EXPECT_EQ(queue1->Size(), 2);
+ EXPECT_EQ(queue1->Front().type, kTransformTypeAdstAdst);
+ EXPECT_EQ(queue1->Front().non_zero_coeff_count, 10);
+ queue1->Pop();
+ EXPECT_EQ(queue1->Size(), 1);
+ EXPECT_EQ(queue1->Front().type, kTransformTypeDctDct);
+ EXPECT_EQ(queue1->Front().non_zero_coeff_count, 20);
+ // Return the buffer.
+ pool.Release(std::move(buffer1));
+ EXPECT_EQ(pool.Size(), 1);
+ // Get another buffer (should be the same as buffer1).
+ std::unique_ptr<ResidualBuffer> buffer2 = pool.Get();
+ uint8_t* const buffer2_ptr = buffer2->buffer();
+ ASSERT_NE(buffer2_ptr, nullptr);
+ EXPECT_EQ(buffer1_ptr, buffer2_ptr);
+ // Releasing the buffer should've cleared the queue.
+ EXPECT_EQ(buffer2->transform_parameters()->Size(), 0);
+}
+
+TEST(ResidualBufferTest, TestStackPushPop) {
+ ResidualBufferStack buffers;
+ EXPECT_EQ(buffers.Size(), 0);
+ EXPECT_EQ(buffers.Pop(), nullptr);
+
+ std::unique_ptr<ResidualBuffer> buffer0 = ResidualBuffer::Create(128, 128);
+ ResidualBuffer* const buffer0_ptr = buffer0.get();
+ EXPECT_NE(buffer0_ptr, nullptr);
+ std::unique_ptr<ResidualBuffer> buffer1 = ResidualBuffer::Create(128, 128);
+ ResidualBuffer* const buffer1_ptr = buffer1.get();
+ EXPECT_NE(buffer1_ptr, nullptr);
+ std::unique_ptr<ResidualBuffer> buffer2 = ResidualBuffer::Create(128, 128);
+ ResidualBuffer* const buffer2_ptr = buffer2.get();
+ EXPECT_NE(buffer2_ptr, nullptr);
+
+ // Push two buffers onto the stack.
+ buffers.Push(std::move(buffer0));
+ EXPECT_EQ(buffers.Size(), 1);
+ buffers.Push(std::move(buffer1));
+ EXPECT_EQ(buffers.Size(), 2);
+
+ // Pop one buffer off the stack.
+ std::unique_ptr<ResidualBuffer> top = buffers.Pop();
+ EXPECT_EQ(buffers.Size(), 1);
+ EXPECT_EQ(top.get(), buffer1_ptr);
+
+ // Push one buffer onto the stack.
+ buffers.Push(std::move(buffer2));
+ EXPECT_EQ(buffers.Size(), 2);
+
+ // Pop two buffers off the stack
+ top = buffers.Pop();
+ EXPECT_EQ(buffers.Size(), 1);
+ EXPECT_EQ(top.get(), buffer2_ptr);
+ top = buffers.Pop();
+ EXPECT_EQ(buffers.Size(), 0);
+ EXPECT_EQ(top.get(), buffer0_ptr);
+
+ // Try to pop a buffer off an empty stack.
+ top = buffers.Pop();
+ EXPECT_EQ(buffers.Size(), 0);
+ EXPECT_EQ(top, nullptr);
+}
+
+TEST(ResidualBufferTest, TestStackSwap) {
+ ResidualBufferStack buffers;
+ EXPECT_EQ(buffers.Size(), 0);
+ EXPECT_EQ(buffers.Pop(), nullptr);
+
+ std::unique_ptr<ResidualBuffer> buffer0 = ResidualBuffer::Create(128, 128);
+ ResidualBuffer* const buffer0_ptr = buffer0.get();
+ EXPECT_NE(buffer0_ptr, nullptr);
+ std::unique_ptr<ResidualBuffer> buffer1 = ResidualBuffer::Create(128, 128);
+ ResidualBuffer* const buffer1_ptr = buffer1.get();
+ EXPECT_NE(buffer1_ptr, nullptr);
+ std::unique_ptr<ResidualBuffer> buffer2 = ResidualBuffer::Create(128, 128);
+ ResidualBuffer* const buffer2_ptr = buffer2.get();
+ EXPECT_NE(buffer2_ptr, nullptr);
+
+ // Push three buffers onto the stack.
+ buffers.Push(std::move(buffer0));
+ EXPECT_EQ(buffers.Size(), 1);
+ buffers.Push(std::move(buffer1));
+ EXPECT_EQ(buffers.Size(), 2);
+ buffers.Push(std::move(buffer2));
+ EXPECT_EQ(buffers.Size(), 3);
+
+ // Swap the contents of the stacks.
+ ResidualBufferStack swapped;
+ swapped.Swap(&buffers);
+ EXPECT_EQ(buffers.Size(), 0);
+ EXPECT_EQ(swapped.Size(), 3);
+
+ // Pop three buffers off the swapped stack.
+ std::unique_ptr<ResidualBuffer> top = swapped.Pop();
+ EXPECT_EQ(swapped.Size(), 2);
+ EXPECT_EQ(top.get(), buffer2_ptr);
+ top = swapped.Pop();
+ EXPECT_EQ(swapped.Size(), 1);
+ EXPECT_EQ(top.get(), buffer1_ptr);
+ top = swapped.Pop();
+ EXPECT_EQ(swapped.Size(), 0);
+ EXPECT_EQ(top.get(), buffer0_ptr);
+}
+
+} // namespace
+} // namespace libgav1
diff --git a/src/scan_test.cc b/src/scan_test.cc
new file mode 100644
index 0000000..065ca03
--- /dev/null
+++ b/src/scan_test.cc
@@ -0,0 +1,85 @@
+// Copyright 2021 The libgav1 Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <algorithm>
+#include <cstdint>
+#include <tuple>
+#include <vector>
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "src/utils/common.h"
+#include "src/utils/constants.h"
+
+namespace libgav1 {
+namespace {
+
+// Import all the constants in the anonymous namespace.
+#include "src/scan_tables.inc"
+
+class ScanOrderTest
+ : public testing::TestWithParam<std::tuple<TransformClass, TransformSize>> {
+ public:
+ ScanOrderTest() = default;
+ ScanOrderTest(const ScanOrderTest&) = delete;
+ ScanOrderTest& operator=(const ScanOrderTest&) = delete;
+ ~ScanOrderTest() override = default;
+
+ protected:
+ TransformClass tx_class_ = std::get<0>(GetParam());
+ TransformSize tx_size_ = std::get<1>(GetParam());
+};
+
+TEST_P(ScanOrderTest, AllIndicesAreScannedExactlyOnce) {
+ const int tx_width = kTransformWidth[tx_size_];
+ const int tx_height = kTransformHeight[tx_size_];
+ int num_indices;
+ if (tx_class_ == kTransformClass2D || std::max(tx_width, tx_height) == 64) {
+ const int clamped_tx_width = std::min(32, tx_width);
+ const int clamped_tx_height = std::min(32, tx_height);
+ num_indices = clamped_tx_width * clamped_tx_height;
+ } else {
+ num_indices =
+ (std::max(tx_width, tx_height) > 16) ? 64 : tx_width * tx_height;
+ }
+ const uint16_t* const scan = kScan[tx_class_][tx_size_];
+ ASSERT_NE(scan, nullptr);
+ // Ensure that all the indices are scanned exactly once.
+ std::vector<int> scanned;
+ scanned.resize(num_indices);
+ for (int i = 0; i < num_indices; ++i) {
+ scanned[scan[i]]++;
+ }
+ EXPECT_THAT(scanned, testing::Each(1));
+}
+
+constexpr TransformClass kTestTransformClasses[] = {
+ kTransformClass2D, kTransformClassVertical, kTransformClassHorizontal};
+
+constexpr TransformSize kTestTransformSizes[] = {
+ kTransformSize4x4, kTransformSize4x8, kTransformSize4x16,
+ kTransformSize8x4, kTransformSize8x8, kTransformSize8x16,
+ kTransformSize8x32, kTransformSize16x4, kTransformSize16x8,
+ kTransformSize16x16, kTransformSize16x32, kTransformSize16x64,
+ kTransformSize32x8, kTransformSize32x16, kTransformSize32x32,
+ kTransformSize32x64, kTransformSize64x16, kTransformSize64x32,
+ kTransformSize64x64};
+
+INSTANTIATE_TEST_SUITE_P(
+ C, ScanOrderTest,
+ testing::Combine(testing::ValuesIn(kTestTransformClasses),
+ testing::ValuesIn(kTestTransformSizes)));
+
+} // namespace
+} // namespace libgav1
diff --git a/src/symbol_decoder_context_test.cc b/src/symbol_decoder_context_test.cc
new file mode 100644
index 0000000..4a0de86
--- /dev/null
+++ b/src/symbol_decoder_context_test.cc
@@ -0,0 +1,264 @@
+// Copyright 2021 The libgav1 Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "src/symbol_decoder_context.h"
+
+#include <cstdint>
+#include <cstring>
+
+#include "gtest/gtest.h"
+#include "src/utils/constants.h"
+
+namespace libgav1 {
+namespace {
+
+TEST(SymbolDecoderContextTest, ResetIntraFrameYModeCdf) {
+ // Note these are zero-initialized separately to avoid differences in padding
+ // values added to tables for alignment purposes when comparing the contexts
+ // with memcmp().
+ libgav1::SymbolDecoderContext gold_context = {};
+ libgav1::SymbolDecoderContext context = {};
+ gold_context.Initialize(0);
+ context.Initialize(0);
+ EXPECT_EQ(memcmp(&gold_context, &context, sizeof(gold_context)), 0);
+ EXPECT_EQ(context.intra_frame_y_mode_cdf[0][0][0], 32768 - 15588);
+ EXPECT_EQ(context.intra_frame_y_mode_cdf[0][0][1], 32768 - 17027);
+ ++context.intra_frame_y_mode_cdf[0][0][0];
+ --context.intra_frame_y_mode_cdf[0][0][1];
+ EXPECT_NE(memcmp(&gold_context, &context, sizeof(gold_context)), 0);
+ context.ResetIntraFrameYModeCdf();
+ EXPECT_EQ(memcmp(&gold_context, &context, sizeof(gold_context)), 0);
+}
+
+void ResetAndVerifyCounters(libgav1::SymbolDecoderContext* const context) {
+ libgav1::SymbolDecoderContext gold_context = {};
+ gold_context.Initialize(0);
+ EXPECT_NE(memcmp(&gold_context, context, sizeof(gold_context)), 0);
+ context->ResetCounters();
+ EXPECT_EQ(memcmp(&gold_context, context, sizeof(gold_context)), 0);
+}
+
+TEST(SymbolDecoderContextTest, ResetCounters1d) {
+ libgav1::SymbolDecoderContext context = {};
+ context.Initialize(0);
+ int value = 0;
+ context.delta_q_cdf[libgav1::kDeltaSymbolCount] = ++value;
+ context.delta_lf_cdf[libgav1::kDeltaSymbolCount] = ++value;
+ context.intra_block_copy_cdf[libgav1::kBooleanSymbolCount] = ++value;
+ context.cfl_alpha_signs_cdf[libgav1::kCflAlphaSignsSymbolCount] = ++value;
+ context.filter_intra_mode_cdf[libgav1::kNumFilterIntraPredictors] = ++value;
+ context.restoration_type_cdf[libgav1::kRestorationTypeSymbolCount] = ++value;
+ context.use_wiener_cdf[libgav1::kBooleanSymbolCount] = ++value;
+ context.use_sgrproj_cdf[libgav1::kBooleanSymbolCount] = ++value;
+ ResetAndVerifyCounters(&context);
+}
+
+void IncreasePartitionCounters(SymbolDecoderContext* symbol_context,
+ int value) {
+ const int min_bsize_log2 = k4x4WidthLog2[kBlock8x8];
+ const int max_bsize_log2 = k4x4WidthLog2[kBlock128x128];
+ for (int block_size_log2 = min_bsize_log2; block_size_log2 <= max_bsize_log2;
+ ++block_size_log2) {
+ for (int context = 0; context < kPartitionContexts; ++context) {
+ const int cdf_size =
+ SymbolDecoderContext::PartitionCdfSize(block_size_log2);
+ symbol_context->partition_cdf[block_size_log2 - min_bsize_log2][context]
+ [cdf_size] += value;
+ }
+ }
+}
+
+void IncreasePaletteColorIndexCounters(SymbolDecoderContext* symbol_context,
+ int value) {
+ for (auto& palette_color_index_cdf_plane :
+ symbol_context->palette_color_index_cdf) {
+ for (int symbol_count = 0; symbol_count < kPaletteSizeSymbolCount;
+ ++symbol_count) {
+ const int cdf_size = symbol_count + kMinPaletteSize;
+ for (int context = 0; context < kPaletteColorIndexContexts; ++context) {
+ palette_color_index_cdf_plane[symbol_count][context][cdf_size] += value;
+ }
+ }
+ }
+}
+
+void IncreaseTxTypeCounters(SymbolDecoderContext* context, int value) {
+ for (int set_idx = kTransformSetIntra1; set_idx <= kTransformSetIntra2;
+ ++set_idx) {
+ auto tx_set = static_cast<TransformSet>(set_idx);
+ for (int tx_size = 0; tx_size < kNumExtendedTransformSizes; ++tx_size) {
+ for (int mode = 0; mode < kIntraPredictionModesY; ++mode) {
+ context->intra_tx_type_cdf[SymbolDecoderContext::TxTypeIndex(
+ tx_set)][tx_size][mode][kNumTransformTypesInSet[tx_set]] += value;
+ }
+ }
+ }
+
+ for (int set_idx = kTransformSetInter1; set_idx <= kTransformSetInter3;
+ ++set_idx) {
+ auto tx_set = static_cast<TransformSet>(set_idx);
+ for (int tx_size = 0; tx_size < kNumExtendedTransformSizes; ++tx_size) {
+ context->inter_tx_type_cdf[SymbolDecoderContext::TxTypeIndex(tx_set)]
+ [tx_size][kNumTransformTypesInSet[tx_set]] +=
+ value;
+ }
+ }
+}
+
+void IncreaseTxDepthCounters(SymbolDecoderContext* symbol_context, int value) {
+ for (int context = 0; context < kTxDepthContexts; ++context) {
+ symbol_context->tx_depth_cdf[0][context][kMaxTxDepthSymbolCount - 1] +=
+ value;
+ }
+
+ for (int plane_category = 1; plane_category < 4; ++plane_category) {
+ for (int context = 0; context < kTxDepthContexts; ++context) {
+ symbol_context
+ ->tx_depth_cdf[plane_category][context][kMaxTxDepthSymbolCount] +=
+ value;
+ }
+ }
+}
+
+void IncreaseUVModeCounters(SymbolDecoderContext* symbol_context, int value) {
+ for (int cfl_allowed = 0; cfl_allowed < kBooleanSymbolCount; ++cfl_allowed) {
+ for (int mode = 0; mode < kIntraPredictionModesY; ++mode) {
+ symbol_context->uv_mode_cdf[cfl_allowed][mode][kIntraPredictionModesUV -
+ (1 - cfl_allowed)] +=
+ value;
+ }
+ }
+}
+
+#define ASSIGN_COUNTER_2D(array, offset) \
+ do { \
+ for (auto& d1 : context.array) { \
+ d1[libgav1::offset] = ++value; \
+ } \
+ } while (false)
+
+TEST(SymbolDecoderContextTest, ResetCounters2d) {
+ libgav1::SymbolDecoderContext context = {};
+ context.Initialize(0);
+ int value = 0;
+ ASSIGN_COUNTER_2D(segment_id_cdf, kMaxSegments);
+ ASSIGN_COUNTER_2D(use_predicted_segment_id_cdf, kBooleanSymbolCount);
+ ASSIGN_COUNTER_2D(skip_cdf, kBooleanSymbolCount);
+ ASSIGN_COUNTER_2D(skip_mode_cdf, kBooleanSymbolCount);
+ ASSIGN_COUNTER_2D(delta_lf_multi_cdf, kDeltaSymbolCount);
+ ASSIGN_COUNTER_2D(y_mode_cdf, kIntraPredictionModesY);
+ ASSIGN_COUNTER_2D(angle_delta_cdf, kAngleDeltaSymbolCount);
+ ASSIGN_COUNTER_2D(cfl_alpha_cdf, kCflAlphaSymbolCount);
+ ASSIGN_COUNTER_2D(use_filter_intra_cdf, kBooleanSymbolCount);
+ ASSIGN_COUNTER_2D(tx_split_cdf, kBooleanSymbolCount);
+ ASSIGN_COUNTER_2D(eob_pt_512_cdf, kEobPt512SymbolCount);
+ ASSIGN_COUNTER_2D(eob_pt_1024_cdf, kEobPt1024SymbolCount);
+ ASSIGN_COUNTER_2D(palette_y_size_cdf, kPaletteSizeSymbolCount);
+ ASSIGN_COUNTER_2D(has_palette_uv_cdf, kBooleanSymbolCount);
+ ASSIGN_COUNTER_2D(palette_uv_size_cdf, kPaletteSizeSymbolCount);
+ ASSIGN_COUNTER_2D(is_inter_cdf, kBooleanSymbolCount);
+ ASSIGN_COUNTER_2D(use_compound_reference_cdf, kBooleanSymbolCount);
+ ASSIGN_COUNTER_2D(compound_reference_type_cdf, kBooleanSymbolCount);
+ ASSIGN_COUNTER_2D(compound_prediction_mode_cdf,
+ kNumCompoundInterPredictionModes);
+ ASSIGN_COUNTER_2D(new_mv_cdf, kBooleanSymbolCount);
+ ASSIGN_COUNTER_2D(zero_mv_cdf, kBooleanSymbolCount);
+ ASSIGN_COUNTER_2D(reference_mv_cdf, kBooleanSymbolCount);
+ ASSIGN_COUNTER_2D(ref_mv_index_cdf, kBooleanSymbolCount);
+ ASSIGN_COUNTER_2D(is_inter_intra_cdf, kBooleanSymbolCount);
+ ASSIGN_COUNTER_2D(inter_intra_mode_cdf, kNumInterIntraModes);
+ ASSIGN_COUNTER_2D(is_wedge_inter_intra_cdf, kBooleanSymbolCount);
+ ASSIGN_COUNTER_2D(wedge_index_cdf, kWedgeIndexSymbolCount);
+ ASSIGN_COUNTER_2D(use_obmc_cdf, kBooleanSymbolCount);
+ ASSIGN_COUNTER_2D(motion_mode_cdf, kNumMotionModes);
+ ASSIGN_COUNTER_2D(is_explicit_compound_type_cdf, kBooleanSymbolCount);
+ ASSIGN_COUNTER_2D(is_compound_type_average_cdf, kBooleanSymbolCount);
+ ASSIGN_COUNTER_2D(compound_type_cdf, kNumExplicitCompoundPredictionTypes);
+ ASSIGN_COUNTER_2D(interpolation_filter_cdf, kNumExplicitInterpolationFilters);
+ ASSIGN_COUNTER_2D(mv_joint_cdf, kNumMvJointTypes);
+ ResetAndVerifyCounters(&context);
+}
+
+#undef ASSIGN_COUNTER_2D
+
+#define ASSIGN_COUNTER_3D(array, offset) \
+ do { \
+ for (auto& d1 : context.array) { \
+ for (auto& d2 : d1) { \
+ d2[libgav1::offset] = ++value; \
+ } \
+ } \
+ } while (false)
+
+TEST(SymbolDecoderContextTest, ResetCounters3d) {
+ libgav1::SymbolDecoderContext context = {};
+ context.Initialize(0);
+ int value = 0;
+ ASSIGN_COUNTER_3D(intra_frame_y_mode_cdf, kIntraPredictionModesY);
+ ASSIGN_COUNTER_3D(all_zero_cdf, kBooleanSymbolCount);
+ ASSIGN_COUNTER_3D(eob_pt_16_cdf, kEobPt16SymbolCount);
+ ASSIGN_COUNTER_3D(eob_pt_32_cdf, kEobPt32SymbolCount);
+ ASSIGN_COUNTER_3D(eob_pt_64_cdf, kEobPt64SymbolCount);
+ ASSIGN_COUNTER_3D(eob_pt_128_cdf, kEobPt128SymbolCount);
+ ASSIGN_COUNTER_3D(eob_pt_256_cdf, kEobPt256SymbolCount);
+ ASSIGN_COUNTER_3D(dc_sign_cdf, kBooleanSymbolCount);
+ ASSIGN_COUNTER_3D(has_palette_y_cdf, kBooleanSymbolCount);
+ ASSIGN_COUNTER_3D(compound_backward_reference_cdf, kBooleanSymbolCount);
+ ASSIGN_COUNTER_3D(single_reference_cdf, kBooleanSymbolCount);
+ ASSIGN_COUNTER_3D(mv_sign_cdf, kBooleanSymbolCount);
+ ASSIGN_COUNTER_3D(mv_class_cdf, kMvClassSymbolCount);
+ ASSIGN_COUNTER_3D(mv_class0_bit_cdf, kBooleanSymbolCount);
+ ASSIGN_COUNTER_3D(mv_class0_high_precision_cdf, kBooleanSymbolCount);
+ ASSIGN_COUNTER_3D(mv_fraction_cdf, kMvFractionSymbolCount);
+ ASSIGN_COUNTER_3D(mv_high_precision_cdf, kBooleanSymbolCount);
+ IncreasePartitionCounters(&context, value);
+ IncreaseTxTypeCounters(&context, value);
+ IncreaseTxDepthCounters(&context, value);
+ IncreaseUVModeCounters(&context, value);
+ ResetAndVerifyCounters(&context);
+}
+
+#undef ASSIGN_COUNTER_3D
+
+#define ASSIGN_COUNTER_4D(array, offset) \
+ do { \
+ for (auto& d1 : context.array) { \
+ for (auto& d2 : d1) { \
+ for (auto& d3 : d2) { \
+ d3[libgav1::offset] = ++value; \
+ } \
+ } \
+ } \
+ } while (false)
+
+TEST(SymbolDecoderContextTest, ResetCounters4d) {
+ libgav1::SymbolDecoderContext context = {};
+ context.Initialize(0);
+ int value = 0;
+ ASSIGN_COUNTER_4D(eob_extra_cdf, kBooleanSymbolCount);
+ ASSIGN_COUNTER_4D(coeff_base_eob_cdf, kCoeffBaseEobSymbolCount);
+ ASSIGN_COUNTER_4D(coeff_base_cdf, kCoeffBaseSymbolCount);
+ ASSIGN_COUNTER_4D(coeff_base_range_cdf, kCoeffBaseRangeSymbolCount);
+ ASSIGN_COUNTER_4D(compound_reference_cdf, kBooleanSymbolCount);
+ ASSIGN_COUNTER_4D(mv_class0_fraction_cdf, kMvFractionSymbolCount);
+ ASSIGN_COUNTER_4D(mv_bit_cdf, kBooleanSymbolCount);
+ IncreasePaletteColorIndexCounters(&context, value);
+ IncreaseTxTypeCounters(&context, value);
+ ResetAndVerifyCounters(&context);
+}
+
+#undef ASSIGN_COUNTER_4D
+
+} // namespace
+} // namespace libgav1
diff --git a/src/threading_strategy_test.cc b/src/threading_strategy_test.cc
new file mode 100644
index 0000000..2a7a781
--- /dev/null
+++ b/src/threading_strategy_test.cc
@@ -0,0 +1,281 @@
+// Copyright 2021 The libgav1 Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "src/threading_strategy.h"
+
+#include <memory>
+#include <utility>
+#include <vector>
+
+#include "absl/strings/str_cat.h"
+#include "gtest/gtest.h"
+#include "src/frame_scratch_buffer.h"
+#include "src/obu_parser.h"
+#include "src/utils/constants.h"
+#include "src/utils/threadpool.h"
+#include "src/utils/types.h"
+
+namespace libgav1 {
+namespace {
+
+class ThreadingStrategyTest : public testing::Test {
+ protected:
+ ThreadingStrategy strategy_;
+ ObuFrameHeader frame_header_ = {};
+};
+
+TEST_F(ThreadingStrategyTest, MaxThreadEnforced) {
+ frame_header_.tile_info.tile_count = 32;
+ ASSERT_TRUE(strategy_.Reset(frame_header_, 32));
+ EXPECT_NE(strategy_.tile_thread_pool(), nullptr);
+ for (int i = 0; i < 32; ++i) {
+ EXPECT_EQ(strategy_.row_thread_pool(i), nullptr);
+ }
+ EXPECT_NE(strategy_.post_filter_thread_pool(), nullptr);
+}
+
+TEST_F(ThreadingStrategyTest, UseAllThreadsForTiles) {
+ frame_header_.tile_info.tile_count = 8;
+ ASSERT_TRUE(strategy_.Reset(frame_header_, 8));
+ EXPECT_NE(strategy_.tile_thread_pool(), nullptr);
+ for (int i = 0; i < 8; ++i) {
+ EXPECT_EQ(strategy_.row_thread_pool(i), nullptr);
+ }
+ EXPECT_NE(strategy_.post_filter_thread_pool(), nullptr);
+}
+
+TEST_F(ThreadingStrategyTest, RowThreads) {
+ frame_header_.tile_info.tile_count = 2;
+ ASSERT_TRUE(strategy_.Reset(frame_header_, 8));
+ EXPECT_NE(strategy_.tile_thread_pool(), nullptr);
+ // Each tile should get 3 threads each.
+ for (int i = 0; i < 2; ++i) {
+ EXPECT_NE(strategy_.row_thread_pool(i), nullptr);
+ }
+ EXPECT_NE(strategy_.post_filter_thread_pool(), nullptr);
+}
+
+TEST_F(ThreadingStrategyTest, RowThreadsUnequal) {
+ frame_header_.tile_info.tile_count = 2;
+
+ ASSERT_TRUE(strategy_.Reset(frame_header_, 9));
+ EXPECT_NE(strategy_.tile_thread_pool(), nullptr);
+ EXPECT_NE(strategy_.row_thread_pool(0), nullptr);
+ EXPECT_NE(strategy_.row_thread_pool(1), nullptr);
+ EXPECT_NE(strategy_.post_filter_thread_pool(), nullptr);
+}
+
+// Test a random combination of tile_count and thread_count.
+TEST_F(ThreadingStrategyTest, MultipleCalls) {
+ frame_header_.tile_info.tile_count = 2;
+ ASSERT_TRUE(strategy_.Reset(frame_header_, 8));
+ EXPECT_NE(strategy_.tile_thread_pool(), nullptr);
+ for (int i = 0; i < 2; ++i) {
+ EXPECT_NE(strategy_.row_thread_pool(i), nullptr);
+ }
+ EXPECT_NE(strategy_.post_filter_thread_pool(), nullptr);
+
+ frame_header_.tile_info.tile_count = 8;
+ ASSERT_TRUE(strategy_.Reset(frame_header_, 8));
+ EXPECT_NE(strategy_.tile_thread_pool(), nullptr);
+ // Row threads must have been reset.
+ for (int i = 0; i < 8; ++i) {
+ EXPECT_EQ(strategy_.row_thread_pool(i), nullptr);
+ }
+ EXPECT_NE(strategy_.post_filter_thread_pool(), nullptr);
+
+ frame_header_.tile_info.tile_count = 8;
+ ASSERT_TRUE(strategy_.Reset(frame_header_, 16));
+ EXPECT_NE(strategy_.tile_thread_pool(), nullptr);
+ for (int i = 0; i < 8; ++i) {
+ EXPECT_NE(strategy_.row_thread_pool(i), nullptr);
+ }
+ EXPECT_NE(strategy_.post_filter_thread_pool(), nullptr);
+
+ frame_header_.tile_info.tile_count = 4;
+ ASSERT_TRUE(strategy_.Reset(frame_header_, 16));
+ EXPECT_NE(strategy_.tile_thread_pool(), nullptr);
+ for (int i = 0; i < 4; ++i) {
+ EXPECT_NE(strategy_.row_thread_pool(i), nullptr);
+ }
+ // All the other row threads must be reset.
+ for (int i = 4; i < 8; ++i) {
+ EXPECT_EQ(strategy_.row_thread_pool(i), nullptr);
+ }
+ EXPECT_NE(strategy_.post_filter_thread_pool(), nullptr);
+
+ frame_header_.tile_info.tile_count = 4;
+ ASSERT_TRUE(strategy_.Reset(frame_header_, 6));
+ EXPECT_NE(strategy_.tile_thread_pool(), nullptr);
+ // First two tiles will get 1 thread each.
+ for (int i = 0; i < 2; ++i) {
+ EXPECT_NE(strategy_.row_thread_pool(i), nullptr);
+ }
+ // All the other row threads must be reset.
+ for (int i = 2; i < 8; ++i) {
+ EXPECT_EQ(strategy_.row_thread_pool(i), nullptr);
+ }
+ EXPECT_NE(strategy_.post_filter_thread_pool(), nullptr);
+
+ ASSERT_TRUE(strategy_.Reset(frame_header_, 1));
+ EXPECT_EQ(strategy_.tile_thread_pool(), nullptr);
+ for (int i = 0; i < 8; ++i) {
+ EXPECT_EQ(strategy_.row_thread_pool(i), nullptr);
+ }
+ EXPECT_EQ(strategy_.post_filter_thread_pool(), nullptr);
+}
+
+// Tests the following order of calls (with thread count fixed at 4):
+// * 1 Tile - 2 Tiles - 1 Tile.
+TEST_F(ThreadingStrategyTest, MultipleCalls2) {
+ frame_header_.tile_info.tile_count = 1;
+ ASSERT_TRUE(strategy_.Reset(frame_header_, 4));
+ // When there is only one tile, tile thread pool must be nullptr.
+ EXPECT_EQ(strategy_.tile_thread_pool(), nullptr);
+ EXPECT_NE(strategy_.row_thread_pool(0), nullptr);
+ for (int i = 1; i < 8; ++i) {
+ EXPECT_EQ(strategy_.row_thread_pool(i), nullptr);
+ }
+ EXPECT_NE(strategy_.post_filter_thread_pool(), nullptr);
+
+ frame_header_.tile_info.tile_count = 2;
+ ASSERT_TRUE(strategy_.Reset(frame_header_, 4));
+ EXPECT_NE(strategy_.tile_thread_pool(), nullptr);
+ for (int i = 0; i < 2; ++i) {
+ EXPECT_NE(strategy_.row_thread_pool(i), nullptr);
+ }
+ for (int i = 2; i < 8; ++i) {
+ EXPECT_EQ(strategy_.row_thread_pool(i), nullptr);
+ }
+ EXPECT_NE(strategy_.post_filter_thread_pool(), nullptr);
+
+ frame_header_.tile_info.tile_count = 1;
+ ASSERT_TRUE(strategy_.Reset(frame_header_, 4));
+ EXPECT_EQ(strategy_.tile_thread_pool(), nullptr);
+ EXPECT_NE(strategy_.row_thread_pool(0), nullptr);
+ for (int i = 1; i < 8; ++i) {
+ EXPECT_EQ(strategy_.row_thread_pool(i), nullptr);
+ }
+ EXPECT_NE(strategy_.post_filter_thread_pool(), nullptr);
+}
+
+void VerifyFrameParallel(int thread_count, int tile_count, int tile_columns,
+ int expected_frame_threads,
+ const std::vector<int>& expected_tile_threads) {
+ ASSERT_EQ(expected_frame_threads, expected_tile_threads.size());
+ ASSERT_GT(thread_count, 1);
+ std::unique_ptr<ThreadPool> frame_thread_pool;
+ FrameScratchBufferPool frame_scratch_buffer_pool;
+ ASSERT_TRUE(InitializeThreadPoolsForFrameParallel(
+ thread_count, tile_count, tile_columns, &frame_thread_pool,
+ &frame_scratch_buffer_pool));
+ if (expected_frame_threads == 0) {
+ EXPECT_EQ(frame_thread_pool, nullptr);
+ return;
+ }
+ EXPECT_NE(frame_thread_pool.get(), nullptr);
+ EXPECT_EQ(frame_thread_pool->num_threads(), expected_frame_threads);
+ std::vector<std::unique_ptr<FrameScratchBuffer>> frame_scratch_buffers;
+ int actual_thread_count = frame_thread_pool->num_threads();
+ for (int i = 0; i < expected_frame_threads; ++i) {
+ SCOPED_TRACE(absl::StrCat("i: ", i));
+ frame_scratch_buffers.push_back(frame_scratch_buffer_pool.Get());
+ ThreadPool* const thread_pool =
+ frame_scratch_buffers.back()->threading_strategy.thread_pool();
+ if (expected_tile_threads[i] > 0) {
+ EXPECT_NE(thread_pool, nullptr);
+ EXPECT_EQ(thread_pool->num_threads(), expected_tile_threads[i]);
+ actual_thread_count += thread_pool->num_threads();
+ } else {
+ EXPECT_EQ(thread_pool, nullptr);
+ }
+ }
+ EXPECT_EQ(thread_count, actual_thread_count);
+ for (auto& frame_scratch_buffer : frame_scratch_buffers) {
+ frame_scratch_buffer_pool.Release(std::move(frame_scratch_buffer));
+ }
+}
+
+TEST(FrameParallelStrategyTest, FrameParallel) {
+ // This loop has thread_count <= 3 * tile count. So there should be no frame
+ // threads irrespective of the number of tile columns.
+ for (int thread_count = 2; thread_count <= 6; ++thread_count) {
+ VerifyFrameParallel(thread_count, /*tile_count=*/2, /*tile_columns=*/1,
+ /*expected_frame_threads=*/0,
+ /*expected_tile_threads=*/{});
+ VerifyFrameParallel(thread_count, /*tile_count=*/2, /*tile_columns=*/2,
+ /*expected_frame_threads=*/0,
+ /*expected_tile_threads=*/{});
+ }
+
+ // Equal number of tile threads for each frame thread.
+ VerifyFrameParallel(
+ /*thread_count=*/8, /*tile_count=*/1, /*tile_columns=*/1,
+ /*expected_frame_threads=*/4, /*expected_tile_threads=*/{1, 1, 1, 1});
+ VerifyFrameParallel(
+ /*thread_count=*/12, /*tile_count=*/2, /*tile_columns=*/2,
+ /*expected_frame_threads=*/4, /*expected_tile_threads=*/{2, 2, 2, 2});
+ VerifyFrameParallel(
+ /*thread_count=*/18, /*tile_count=*/2, /*tile_columns=*/2,
+ /*expected_frame_threads=*/6,
+ /*expected_tile_threads=*/{2, 2, 2, 2, 2, 2});
+ VerifyFrameParallel(
+ /*thread_count=*/16, /*tile_count=*/3, /*tile_columns=*/3,
+ /*expected_frame_threads=*/4, /*expected_tile_threads=*/{3, 3, 3, 3});
+
+ // Unequal number of tile threads for each frame thread.
+ VerifyFrameParallel(
+ /*thread_count=*/7, /*tile_count=*/1, /*tile_columns=*/1,
+ /*expected_frame_threads=*/3, /*expected_tile_threads=*/{2, 1, 1});
+ VerifyFrameParallel(
+ /*thread_count=*/14, /*tile_count=*/2, /*tile_columns=*/2,
+ /*expected_frame_threads=*/4, /*expected_tile_threads=*/{3, 3, 2, 2});
+ VerifyFrameParallel(
+ /*thread_count=*/20, /*tile_count=*/2, /*tile_columns=*/2,
+ /*expected_frame_threads=*/6,
+ /*expected_tile_threads=*/{3, 3, 2, 2, 2, 2});
+ VerifyFrameParallel(
+ /*thread_count=*/17, /*tile_count=*/3, /*tile_columns=*/3,
+ /*expected_frame_threads=*/4, /*expected_tile_threads=*/{4, 3, 3, 3});
+}
+
+TEST(FrameParallelStrategyTest, ThreadCountDoesNotExceedkMaxThreads) {
+ std::unique_ptr<ThreadPool> frame_thread_pool;
+ FrameScratchBufferPool frame_scratch_buffer_pool;
+ ASSERT_TRUE(InitializeThreadPoolsForFrameParallel(
+ /*thread_count=*/kMaxThreads + 10, /*tile_count=*/2, /*tile_columns=*/2,
+ &frame_thread_pool, &frame_scratch_buffer_pool));
+ EXPECT_NE(frame_thread_pool.get(), nullptr);
+ std::vector<std::unique_ptr<FrameScratchBuffer>> frame_scratch_buffers;
+ int actual_thread_count = frame_thread_pool->num_threads();
+ for (int i = 0; i < frame_thread_pool->num_threads(); ++i) {
+ SCOPED_TRACE(absl::StrCat("i: ", i));
+ frame_scratch_buffers.push_back(frame_scratch_buffer_pool.Get());
+ ThreadPool* const thread_pool =
+ frame_scratch_buffers.back()->threading_strategy.thread_pool();
+ if (thread_pool != nullptr) {
+ actual_thread_count += thread_pool->num_threads();
+ }
+ }
+ // In this case, the exact number of frame threads and tile threads depend on
+ // the value of kMaxThreads. So simply ensure that the total number of threads
+ // does not exceed kMaxThreads.
+ EXPECT_LE(actual_thread_count, kMaxThreads);
+ for (auto& frame_scratch_buffer : frame_scratch_buffers) {
+ frame_scratch_buffer_pool.Release(std::move(frame_scratch_buffer));
+ }
+}
+
+} // namespace
+} // namespace libgav1
diff --git a/src/tile.h b/src/tile.h
index 6bae2a0..83c3423 100644
--- a/src/tile.h
+++ b/src/tile.h
@@ -65,7 +65,9 @@ enum ProcessingMode {
kProcessingModeParseAndDecode,
};
-class Tile : public Allocable {
+// The alignment requirement is due to the SymbolDecoderContext member
+// symbol_decoder_context_.
+class Tile : public MaxAlignedAllocable {
public:
static std::unique_ptr<Tile> Create(
int tile_number, const uint8_t* const data, size_t size,
@@ -320,7 +322,7 @@ class Tile : public Allocable {
bool ReadSegmentId(const Block& block); // 5.11.9.
bool ReadIntraSegmentId(const Block& block); // 5.11.8.
void ReadSkip(const Block& block); // 5.11.11.
- void ReadSkipMode(const Block& block); // 5.11.10.
+ bool ReadSkipMode(const Block& block); // 5.11.10.
void ReadCdef(const Block& block); // 5.11.56.
// Returns the new value. |cdf| is an array of size kDeltaSymbolCount + 1.
int ReadAndClipDelta(uint16_t* cdf, int delta_small, int scale, int min_value,
@@ -330,6 +332,7 @@ class Tile : public Allocable {
// Populates |BlockParameters::deblock_filter_level| for the given |block|
// using |deblock_filter_levels_|.
void PopulateDeblockFilterLevel(const Block& block);
+ void PopulateCdefSkip(const Block& block);
void ReadPredictionModeY(const Block& block, bool intra_y_mode);
void ReadIntraAngleInfo(const Block& block,
PlaneType plane_type); // 5.11.42 and 5.11.43.
@@ -346,36 +349,41 @@ class Tile : public Allocable {
bool DecodeIntraModeInfo(const Block& block); // 5.11.7.
int8_t ComputePredictedSegmentId(const Block& block) const; // 5.11.21.
bool ReadInterSegmentId(const Block& block, bool pre_skip); // 5.11.19.
- void ReadIsInter(const Block& block); // 5.11.20.
+ void ReadIsInter(const Block& block, bool skip_mode); // 5.11.20.
bool ReadIntraBlockModeInfo(const Block& block,
bool intra_y_mode); // 5.11.22.
CompoundReferenceType ReadCompoundReferenceType(const Block& block);
template <bool is_single, bool is_backward, int index>
uint16_t* GetReferenceCdf(const Block& block, CompoundReferenceType type =
kNumCompoundReferenceTypes);
- void ReadReferenceFrames(const Block& block); // 5.11.25.
+ void ReadReferenceFrames(const Block& block, bool skip_mode); // 5.11.25.
void ReadInterPredictionModeY(const Block& block,
- const MvContexts& mode_contexts);
+ const MvContexts& mode_contexts,
+ bool skip_mode);
void ReadRefMvIndex(const Block& block);
- void ReadInterIntraMode(const Block& block, bool is_compound); // 5.11.28.
+ void ReadInterIntraMode(const Block& block, bool is_compound,
+ bool skip_mode); // 5.11.28.
bool IsScaled(ReferenceFrameType type) const { // Part of 5.11.27.
const int index =
frame_header_.reference_frame_index[type - kReferenceFrameLast];
return reference_frames_[index]->upscaled_width() != frame_header_.width ||
reference_frames_[index]->frame_height() != frame_header_.height;
}
- void ReadMotionMode(const Block& block, bool is_compound); // 5.11.27.
+ void ReadMotionMode(const Block& block, bool is_compound,
+ bool skip_mode); // 5.11.27.
uint16_t* GetIsExplicitCompoundTypeCdf(const Block& block);
uint16_t* GetIsCompoundTypeAverageCdf(const Block& block);
- void ReadCompoundType(const Block& block, bool is_compound); // 5.11.29.
+ void ReadCompoundType(const Block& block, bool is_compound, bool skip_mode,
+ bool* is_explicit_compound_type,
+ bool* is_compound_type_average); // 5.11.29.
uint16_t* GetInterpolationFilterCdf(const Block& block, int direction);
- void ReadInterpolationFilter(const Block& block);
- bool ReadInterBlockModeInfo(const Block& block); // 5.11.23.
- bool DecodeInterModeInfo(const Block& block); // 5.11.18.
- bool DecodeModeInfo(const Block& block); // 5.11.6.
- bool IsMvValid(const Block& block, bool is_compound) const; // 6.10.25.
- bool AssignInterMv(const Block& block, bool is_compound); // 5.11.26.
- bool AssignIntraMv(const Block& block); // 5.11.26.
+ void ReadInterpolationFilter(const Block& block, bool skip_mode);
+ bool ReadInterBlockModeInfo(const Block& block, bool skip_mode); // 5.11.23.
+ bool DecodeInterModeInfo(const Block& block); // 5.11.18.
+ bool DecodeModeInfo(const Block& block); // 5.11.6.
+ bool IsMvValid(const Block& block, bool is_compound) const; // 6.10.25.
+ bool AssignInterMv(const Block& block, bool is_compound); // 5.11.26.
+ bool AssignIntraMv(const Block& block); // 5.11.26.
int GetTopTransformWidth(const Block& block, int row4x4, int column4x4,
bool ignore_skip);
int GetLeftTransformHeight(const Block& block, int row4x4, int column4x4,
@@ -541,7 +549,6 @@ class Tile : public Allocable {
bool has_left, bool has_top, bool has_top_right,
bool has_bottom_left, PredictionMode mode,
TransformSize tx_size);
- bool IsSmoothPrediction(int row, int column, Plane plane) const;
int GetIntraEdgeFilterType(const Block& block,
Plane plane) const; // 7.11.2.8.
template <typename Pixel>
@@ -563,6 +570,17 @@ class Tile : public Allocable {
// for the given |block| and stores them into |current_frame_|.
void StoreMotionFieldMvsIntoCurrentFrame(const Block& block);
+ // SetCdfContext*() functions will populate the |left_context_| and
+ // |top_context_| for the |block|.
+ void SetCdfContextUsePredictedSegmentId(const Block& block,
+ bool use_predicted_segment_id);
+ void SetCdfContextCompoundType(const Block& block,
+ bool is_explicit_compound_type,
+ bool is_compound_type_average);
+ void SetCdfContextSkipMode(const Block& block, bool skip_mode);
+ void SetCdfContextPaletteSize(const Block& block);
+ void SetCdfContextUVMode(const Block& block);
+
// Returns the zero-based index of the super block that contains |row4x4|
// relative to the start of this tile.
int SuperBlockRowIndex(int row4x4) const {
@@ -577,6 +595,16 @@ class Tile : public Allocable {
(sequence_header_.use_128x128_superblock ? 5 : 4);
}
+ // Returns the zero-based index of the block that starts at row4x4 or
+ // column4x4 relative to the start of the superblock that contains the block.
+ // This is used to index into the members of |left_context_| and
+ // |top_context_|.
+ int CdfContextIndex(int row_or_column4x4) const {
+ return row_or_column4x4 -
+ (row_or_column4x4 &
+ (sequence_header_.use_128x128_superblock ? ~31 : ~15));
+ }
+
BlockSize SuperBlockSize() const {
return sequence_header_.use_128x128_superblock ? kBlock128x128
: kBlock64x64;
@@ -600,8 +628,6 @@ class Tile : public Allocable {
bool read_deltas_;
const int8_t subsampling_x_[kMaxPlanes];
const int8_t subsampling_y_[kMaxPlanes];
- int deblock_row_limit_[kMaxPlanes];
- int deblock_column_limit_[kMaxPlanes];
// The dimensions (in order) are: segment_id, level_index (based on plane and
// direction), reference_frame and mode_id.
@@ -649,7 +675,7 @@ class Tile : public Allocable {
const std::array<uint8_t, kNumReferenceFrameTypes>& reference_order_hint_;
const WedgeMaskArray& wedge_masks_;
const QuantizerMatrix& quantizer_matrix_;
- DaalaBitReader reader_;
+ EntropyDecoder reader_;
SymbolDecoderContext symbol_decoder_context_;
SymbolDecoderContext* const saved_symbol_decoder_context_;
const SegmentationMap* prev_segment_ids_;
@@ -712,7 +738,8 @@ class Tile : public Allocable {
Array2DView<uint8_t> buffer_[kMaxPlanes];
RefCountedBuffer& current_frame_;
- Array2D<int16_t>& cdef_index_;
+ Array2D<int8_t>& cdef_index_;
+ Array2D<uint8_t>& cdef_skip_;
Array2D<TransformSize>& inter_transform_sizes_;
std::array<RestorationUnitInfo, kMaxPlanes> reference_unit_info_;
// If |thread_pool_| is nullptr, the calling thread will do the parsing and
@@ -746,12 +773,19 @@ class Tile : public Allocable {
// Stores the progress of the reference frames. This will be used to avoid
// unnecessary calls into RefCountedBuffer::WaitUntil().
std::array<int, kNumReferenceFrameTypes> reference_frame_progress_cache_;
+ // Stores the CDF contexts necessary for the "left" block.
+ BlockCdfContext left_context_;
+ // Stores the CDF contexts necessary for the "top" block. The size of this
+ // buffer is the number of superblock columns in this tile. For each block,
+ // the access index will be the corresponding SuperBlockColumnIndex()'th
+ // entry.
+ DynamicBuffer<BlockCdfContext> top_context_;
};
struct Tile::Block {
- Block(const Tile& tile, BlockSize size, int row4x4, int column4x4,
+ Block(Tile* tile_ptr, BlockSize size, int row4x4, int column4x4,
TileScratchBuffer* const scratch_buffer, ResidualPtr* residual)
- : tile(tile),
+ : tile(*tile_ptr),
size(size),
row4x4(row4x4),
column4x4(column4x4),
@@ -760,7 +794,11 @@ struct Tile::Block {
width4x4(width >> 2),
height4x4(height >> 2),
scratch_buffer(scratch_buffer),
- residual(residual) {
+ residual(residual),
+ top_context(tile.top_context_.get() +
+ tile.SuperBlockColumnIndex(column4x4)),
+ top_context_index(tile.CdfContextIndex(column4x4)),
+ left_context_index(tile.CdfContextIndex(row4x4)) {
assert(size != kBlockInvalid);
residual_size[kPlaneY] = kPlaneResidualSize[size][0][0];
residual_size[kPlaneU] = residual_size[kPlaneV] =
@@ -881,7 +919,7 @@ struct Tile::Block {
return false;
}
- const Tile& tile;
+ Tile& tile;
bool has_chroma;
const BlockSize size;
bool top_available[kMaxPlanes];
@@ -898,6 +936,9 @@ struct Tile::Block {
BlockParameters* bp;
TileScratchBuffer* const scratch_buffer;
ResidualPtr* const residual;
+ BlockCdfContext* const top_context;
+ const int top_context_index;
+ const int left_context_index;
};
extern template bool
diff --git a/src/tile/bitstream/mode_info.cc b/src/tile/bitstream/mode_info.cc
index 0b22eb0..cb7b311 100644
--- a/src/tile/bitstream/mode_info.cc
+++ b/src/tile/bitstream/mode_info.cc
@@ -185,19 +185,22 @@ int GetReferenceContext(const Tile::Block& block,
} // namespace
bool Tile::ReadSegmentId(const Block& block) {
+ // These two asserts ensure that current_frame_.segmentation_map() is not
+ // nullptr.
+ assert(frame_header_.segmentation.enabled);
+ assert(frame_header_.segmentation.update_map);
+ const SegmentationMap& map = *current_frame_.segmentation_map();
int top_left = -1;
if (block.top_available[kPlaneY] && block.left_available[kPlaneY]) {
- top_left =
- block_parameters_holder_.Find(block.row4x4 - 1, block.column4x4 - 1)
- ->segment_id;
+ top_left = map.segment_id(block.row4x4 - 1, block.column4x4 - 1);
}
int top = -1;
if (block.top_available[kPlaneY]) {
- top = block.bp_top->segment_id;
+ top = map.segment_id(block.row4x4 - 1, block.column4x4);
}
int left = -1;
if (block.left_available[kPlaneY]) {
- left = block.bp_left->segment_id;
+ left = map.segment_id(block.row4x4, block.column4x4 - 1);
}
int pred;
if (top == -1) {
@@ -209,7 +212,7 @@ bool Tile::ReadSegmentId(const Block& block) {
}
BlockParameters& bp = *block.bp;
if (bp.skip) {
- bp.segment_id = pred;
+ bp.prediction_parameters->segment_id = pred;
return true;
}
int context = 0;
@@ -224,17 +227,18 @@ bool Tile::ReadSegmentId(const Block& block) {
symbol_decoder_context_.segment_id_cdf[context];
const int encoded_segment_id =
reader_.ReadSymbol<kMaxSegments>(segment_id_cdf);
- bp.segment_id =
+ bp.prediction_parameters->segment_id =
DecodeSegmentId(encoded_segment_id, pred,
frame_header_.segmentation.last_active_segment_id + 1);
// Check the bitstream conformance requirement in Section 6.10.8 of the spec.
- if (bp.segment_id < 0 ||
- bp.segment_id > frame_header_.segmentation.last_active_segment_id) {
+ if (bp.prediction_parameters->segment_id < 0 ||
+ bp.prediction_parameters->segment_id >
+ frame_header_.segmentation.last_active_segment_id) {
LIBGAV1_DLOG(
ERROR,
"Corrupted segment_ids: encoded %d, last active %d, postprocessed %d",
encoded_segment_id, frame_header_.segmentation.last_active_segment_id,
- bp.segment_id);
+ bp.prediction_parameters->segment_id);
return false;
}
return true;
@@ -243,7 +247,7 @@ bool Tile::ReadSegmentId(const Block& block) {
bool Tile::ReadIntraSegmentId(const Block& block) {
BlockParameters& bp = *block.bp;
if (!frame_header_.segmentation.enabled) {
- bp.segment_id = 0;
+ bp.prediction_parameters->segment_id = 0;
return true;
}
return ReadSegmentId(block);
@@ -252,8 +256,8 @@ bool Tile::ReadIntraSegmentId(const Block& block) {
void Tile::ReadSkip(const Block& block) {
BlockParameters& bp = *block.bp;
if (frame_header_.segmentation.segment_id_pre_skip &&
- frame_header_.segmentation.FeatureActive(bp.segment_id,
- kSegmentFeatureSkip)) {
+ frame_header_.segmentation.FeatureActive(
+ bp.prediction_parameters->segment_id, kSegmentFeatureSkip)) {
bp.skip = true;
return;
}
@@ -268,51 +272,53 @@ void Tile::ReadSkip(const Block& block) {
bp.skip = reader_.ReadSymbol(skip_cdf);
}
-void Tile::ReadSkipMode(const Block& block) {
+bool Tile::ReadSkipMode(const Block& block) {
BlockParameters& bp = *block.bp;
if (!frame_header_.skip_mode_present ||
- frame_header_.segmentation.FeatureActive(bp.segment_id,
- kSegmentFeatureSkip) ||
- frame_header_.segmentation.FeatureActive(bp.segment_id,
- kSegmentFeatureReferenceFrame) ||
- frame_header_.segmentation.FeatureActive(bp.segment_id,
- kSegmentFeatureGlobalMv) ||
+ frame_header_.segmentation.FeatureActive(
+ bp.prediction_parameters->segment_id, kSegmentFeatureSkip) ||
+ frame_header_.segmentation.FeatureActive(
+ bp.prediction_parameters->segment_id,
+ kSegmentFeatureReferenceFrame) ||
+ frame_header_.segmentation.FeatureActive(
+ bp.prediction_parameters->segment_id, kSegmentFeatureGlobalMv) ||
IsBlockDimension4(block.size)) {
- bp.skip_mode = false;
- return;
+ return false;
}
const int context =
(block.left_available[kPlaneY]
- ? static_cast<int>(block.bp_left->skip_mode)
+ ? static_cast<int>(left_context_.skip_mode[block.left_context_index])
: 0) +
- (block.top_available[kPlaneY] ? static_cast<int>(block.bp_top->skip_mode)
- : 0);
- bp.skip_mode =
- reader_.ReadSymbol(symbol_decoder_context_.skip_mode_cdf[context]);
+ (block.top_available[kPlaneY]
+ ? static_cast<int>(
+ block.top_context->skip_mode[block.top_context_index])
+ : 0);
+ return reader_.ReadSymbol(symbol_decoder_context_.skip_mode_cdf[context]);
}
void Tile::ReadCdef(const Block& block) {
BlockParameters& bp = *block.bp;
if (bp.skip || frame_header_.coded_lossless ||
- !sequence_header_.enable_cdef || frame_header_.allow_intrabc) {
+ !sequence_header_.enable_cdef || frame_header_.allow_intrabc ||
+ frame_header_.cdef.bits == 0) {
return;
}
- const int cdef_size4x4 = kNum4x4BlocksWide[kBlock64x64];
- const int cdef_mask4x4 = ~(cdef_size4x4 - 1);
- const int row4x4 = block.row4x4 & cdef_mask4x4;
- const int column4x4 = block.column4x4 & cdef_mask4x4;
- const int row = DivideBy16(row4x4);
- const int column = DivideBy16(column4x4);
- if (cdef_index_[row][column] == -1) {
- cdef_index_[row][column] =
- frame_header_.cdef.bits > 0
- ? static_cast<int16_t>(reader_.ReadLiteral(frame_header_.cdef.bits))
- : 0;
- for (int i = row4x4; i < row4x4 + block.height4x4; i += cdef_size4x4) {
- for (int j = column4x4; j < column4x4 + block.width4x4;
- j += cdef_size4x4) {
- cdef_index_[DivideBy16(i)][DivideBy16(j)] = cdef_index_[row][column];
- }
+ int8_t* const cdef_index =
+ &cdef_index_[DivideBy16(block.row4x4)][DivideBy16(block.column4x4)];
+ int stride = cdef_index_.columns();
+ if (cdef_index[0] == -1) {
+ cdef_index[0] =
+ static_cast<int8_t>(reader_.ReadLiteral(frame_header_.cdef.bits));
+ if (block.size == kBlock128x128) {
+ // This condition is shorthand for block.width4x4 > 16 && block.height4x4
+ // > 16.
+ cdef_index[1] = cdef_index[0];
+ cdef_index[stride] = cdef_index[0];
+ cdef_index[stride + 1] = cdef_index[0];
+ } else if (block.width4x4 > 16) {
+ cdef_index[1] = cdef_index[0];
+ } else if (block.height4x4 > 16) {
+ cdef_index[stride] = cdef_index[0];
}
}
}
@@ -328,7 +334,7 @@ int Tile::ReadAndClipDelta(uint16_t* const cdf, int delta_small, int scale,
abs = abs_remaining_bits + (1 << remaining_bit_count) + 1;
}
if (abs != 0) {
- const bool sign = static_cast<bool>(reader_.ReadBit());
+ const bool sign = reader_.ReadBit() != 0;
const int scaled_abs = abs << scale;
const int reduced_delta = sign ? -scaled_abs : scaled_abs;
value += reduced_delta;
@@ -404,8 +410,9 @@ void Tile::ReadIntraAngleInfo(const Block& block, PlaneType plane_type) {
PredictionParameters& prediction_parameters =
*block.bp->prediction_parameters;
prediction_parameters.angle_delta[plane_type] = 0;
- const PredictionMode mode =
- (plane_type == kPlaneTypeY) ? bp.y_mode : bp.uv_mode;
+ const PredictionMode mode = (plane_type == kPlaneTypeY)
+ ? bp.y_mode
+ : bp.prediction_parameters->uv_mode;
if (IsBlockSmallerThan8x8(block.size) || !IsDirectionalMode(mode)) return;
uint16_t* const cdf =
symbol_decoder_context_.angle_delta_cdf[mode - kPredictionModeVertical];
@@ -445,7 +452,8 @@ void Tile::ReadCflAlpha(const Block& block) {
void Tile::ReadPredictionModeUV(const Block& block) {
BlockParameters& bp = *block.bp;
bool chroma_from_luma_allowed;
- if (frame_header_.segmentation.lossless[bp.segment_id]) {
+ if (frame_header_.segmentation
+ .lossless[bp.prediction_parameters->segment_id]) {
chroma_from_luma_allowed = block.residual_size[kPlaneU] == kBlock4x4;
} else {
chroma_from_luma_allowed = IsBlockDimensionLessThan64(block.size);
@@ -454,10 +462,10 @@ void Tile::ReadPredictionModeUV(const Block& block) {
symbol_decoder_context_
.uv_mode_cdf[static_cast<int>(chroma_from_luma_allowed)][bp.y_mode];
if (chroma_from_luma_allowed) {
- bp.uv_mode = static_cast<PredictionMode>(
+ bp.prediction_parameters->uv_mode = static_cast<PredictionMode>(
reader_.ReadSymbol<kIntraPredictionModesUV>(cdf));
} else {
- bp.uv_mode = static_cast<PredictionMode>(
+ bp.prediction_parameters->uv_mode = static_cast<PredictionMode>(
reader_.ReadSymbol<kIntraPredictionModesUV - 1>(cdf));
}
}
@@ -528,7 +536,7 @@ void Tile::ReadFilterIntraModeInfo(const Block& block) {
*block.bp->prediction_parameters;
prediction_parameters.use_filter_intra = false;
if (!sequence_header_.enable_filter_intra || bp.y_mode != kPredictionModeDc ||
- bp.palette_mode_info.size[kPlaneTypeY] != 0 ||
+ bp.prediction_parameters->palette_mode_info.size[kPlaneTypeY] != 0 ||
!IsBlockDimensionLessThan64(block.size)) {
return;
}
@@ -548,7 +556,7 @@ bool Tile::DecodeIntraModeInfo(const Block& block) {
!ReadIntraSegmentId(block)) {
return false;
}
- bp.skip_mode = false;
+ SetCdfContextSkipMode(block, false);
ReadSkip(block);
if (!frame_header_.segmentation.segment_id_pre_skip &&
!ReadIntraSegmentId(block)) {
@@ -572,12 +580,14 @@ bool Tile::DecodeIntraModeInfo(const Block& block) {
bp.reference_frame[0] = kReferenceFrameIntra;
bp.reference_frame[1] = kReferenceFrameNone;
bp.y_mode = kPredictionModeDc;
- bp.uv_mode = kPredictionModeDc;
+ bp.prediction_parameters->uv_mode = kPredictionModeDc;
+ SetCdfContextUVMode(block);
prediction_parameters.motion_mode = kMotionModeSimple;
prediction_parameters.compound_prediction_type =
kCompoundPredictionTypeAverage;
- bp.palette_mode_info.size[kPlaneTypeY] = 0;
- bp.palette_mode_info.size[kPlaneTypeUV] = 0;
+ bp.prediction_parameters->palette_mode_info.size[kPlaneTypeY] = 0;
+ bp.prediction_parameters->palette_mode_info.size[kPlaneTypeUV] = 0;
+ SetCdfContextPaletteSize(block);
bp.interpolation_filter[0] = kInterpolationFilterBilinear;
bp.interpolation_filter[1] = kInterpolationFilterBilinear;
MvContexts dummy_mode_contexts;
@@ -608,59 +618,73 @@ int8_t Tile::ComputePredictedSegmentId(const Block& block) const {
return id;
}
+void Tile::SetCdfContextUsePredictedSegmentId(const Block& block,
+ bool use_predicted_segment_id) {
+ memset(left_context_.use_predicted_segment_id + block.left_context_index,
+ static_cast<int>(use_predicted_segment_id), block.height4x4);
+ memset(block.top_context->use_predicted_segment_id + block.top_context_index,
+ static_cast<int>(use_predicted_segment_id), block.width4x4);
+}
+
bool Tile::ReadInterSegmentId(const Block& block, bool pre_skip) {
BlockParameters& bp = *block.bp;
if (!frame_header_.segmentation.enabled) {
- bp.segment_id = 0;
+ bp.prediction_parameters->segment_id = 0;
return true;
}
if (!frame_header_.segmentation.update_map) {
- bp.segment_id = ComputePredictedSegmentId(block);
+ bp.prediction_parameters->segment_id = ComputePredictedSegmentId(block);
return true;
}
if (pre_skip) {
if (!frame_header_.segmentation.segment_id_pre_skip) {
- bp.segment_id = 0;
+ bp.prediction_parameters->segment_id = 0;
return true;
}
} else if (bp.skip) {
- bp.use_predicted_segment_id = false;
+ SetCdfContextUsePredictedSegmentId(block, false);
return ReadSegmentId(block);
}
if (frame_header_.segmentation.temporal_update) {
const int context =
(block.left_available[kPlaneY]
- ? static_cast<int>(block.bp_left->use_predicted_segment_id)
+ ? static_cast<int>(
+ left_context_
+ .use_predicted_segment_id[block.left_context_index])
: 0) +
(block.top_available[kPlaneY]
- ? static_cast<int>(block.bp_top->use_predicted_segment_id)
+ ? static_cast<int>(
+ block.top_context
+ ->use_predicted_segment_id[block.top_context_index])
: 0);
- bp.use_predicted_segment_id = reader_.ReadSymbol(
+ const bool use_predicted_segment_id = reader_.ReadSymbol(
symbol_decoder_context_.use_predicted_segment_id_cdf[context]);
- if (bp.use_predicted_segment_id) {
- bp.segment_id = ComputePredictedSegmentId(block);
+ SetCdfContextUsePredictedSegmentId(block, use_predicted_segment_id);
+ if (use_predicted_segment_id) {
+ bp.prediction_parameters->segment_id = ComputePredictedSegmentId(block);
return true;
}
}
return ReadSegmentId(block);
}
-void Tile::ReadIsInter(const Block& block) {
+void Tile::ReadIsInter(const Block& block, bool skip_mode) {
BlockParameters& bp = *block.bp;
- if (bp.skip_mode) {
+ if (skip_mode) {
bp.is_inter = true;
return;
}
- if (frame_header_.segmentation.FeatureActive(bp.segment_id,
- kSegmentFeatureReferenceFrame)) {
- bp.is_inter =
- frame_header_.segmentation
- .feature_data[bp.segment_id][kSegmentFeatureReferenceFrame] !=
- kReferenceFrameIntra;
+ if (frame_header_.segmentation.FeatureActive(
+ bp.prediction_parameters->segment_id,
+ kSegmentFeatureReferenceFrame)) {
+ bp.is_inter = frame_header_.segmentation
+ .feature_data[bp.prediction_parameters->segment_id]
+ [kSegmentFeatureReferenceFrame] !=
+ kReferenceFrameIntra;
return;
}
- if (frame_header_.segmentation.FeatureActive(bp.segment_id,
- kSegmentFeatureGlobalMv)) {
+ if (frame_header_.segmentation.FeatureActive(
+ bp.prediction_parameters->segment_id, kSegmentFeatureGlobalMv)) {
bp.is_inter = true;
return;
}
@@ -678,6 +702,49 @@ void Tile::ReadIsInter(const Block& block) {
reader_.ReadSymbol(symbol_decoder_context_.is_inter_cdf[context]);
}
+void Tile::SetCdfContextPaletteSize(const Block& block) {
+ const PaletteModeInfo& palette_mode_info =
+ block.bp->prediction_parameters->palette_mode_info;
+ for (int plane_type = kPlaneTypeY; plane_type <= kPlaneTypeUV; ++plane_type) {
+ memset(left_context_.palette_size[plane_type] + block.left_context_index,
+ palette_mode_info.size[plane_type], block.height4x4);
+ memset(
+ block.top_context->palette_size[plane_type] + block.top_context_index,
+ palette_mode_info.size[plane_type], block.width4x4);
+ if (palette_mode_info.size[plane_type] == 0) continue;
+ for (int i = block.left_context_index;
+ i < block.left_context_index + block.height4x4; ++i) {
+ memcpy(left_context_.palette_color[i][plane_type],
+ palette_mode_info.color[plane_type],
+ kMaxPaletteSize * sizeof(palette_mode_info.color[0][0]));
+ }
+ for (int i = block.top_context_index;
+ i < block.top_context_index + block.width4x4; ++i) {
+ memcpy(block.top_context->palette_color[i][plane_type],
+ palette_mode_info.color[plane_type],
+ kMaxPaletteSize * sizeof(palette_mode_info.color[0][0]));
+ }
+ }
+}
+
+void Tile::SetCdfContextUVMode(const Block& block) {
+ // BlockCdfContext.uv_mode is only used to compute is_smooth_prediction for
+ // the intra edge upsamplers in the subsequent blocks. They have some special
+ // rules for subsampled UV planes. For subsampled UV planes, update left
+ // context only if current block contains the last odd column and update top
+ // context only if current block contains the last odd row.
+ if (subsampling_x_[kPlaneU] == 0 || (block.column4x4 & 1) == 1 ||
+ block.width4x4 > 1) {
+ memset(left_context_.uv_mode + block.left_context_index,
+ block.bp->prediction_parameters->uv_mode, block.height4x4);
+ }
+ if (subsampling_y_[kPlaneU] == 0 || (block.row4x4 & 1) == 1 ||
+ block.height4x4 > 1) {
+ memset(block.top_context->uv_mode + block.top_context_index,
+ block.bp->prediction_parameters->uv_mode, block.width4x4);
+ }
+}
+
bool Tile::ReadIntraBlockModeInfo(const Block& block, bool intra_y_mode) {
BlockParameters& bp = *block.bp;
bp.reference_frame[0] = kReferenceFrameIntra;
@@ -686,12 +753,39 @@ bool Tile::ReadIntraBlockModeInfo(const Block& block, bool intra_y_mode) {
ReadIntraAngleInfo(block, kPlaneTypeY);
if (block.HasChroma()) {
ReadPredictionModeUV(block);
- if (bp.uv_mode == kPredictionModeChromaFromLuma) {
+ if (bp.prediction_parameters->uv_mode == kPredictionModeChromaFromLuma) {
ReadCflAlpha(block);
}
+ if (block.left_available[kPlaneU]) {
+ const int smooth_row =
+ block.row4x4 + (~block.row4x4 & subsampling_y_[kPlaneU]);
+ const int smooth_column =
+ block.column4x4 - 1 - (block.column4x4 & subsampling_x_[kPlaneU]);
+ const BlockParameters& bp_left =
+ *block_parameters_holder_.Find(smooth_row, smooth_column);
+ bp.prediction_parameters->chroma_left_uses_smooth_prediction =
+ (bp_left.reference_frame[0] <= kReferenceFrameIntra) &&
+ kPredictionModeSmoothMask.Contains(
+ left_context_.uv_mode[CdfContextIndex(smooth_row)]);
+ }
+ if (block.top_available[kPlaneU]) {
+ const int smooth_row =
+ block.row4x4 - 1 - (block.row4x4 & subsampling_y_[kPlaneU]);
+ const int smooth_column =
+ block.column4x4 + (~block.column4x4 & subsampling_x_[kPlaneU]);
+ const BlockParameters& bp_top =
+ *block_parameters_holder_.Find(smooth_row, smooth_column);
+ bp.prediction_parameters->chroma_top_uses_smooth_prediction =
+ (bp_top.reference_frame[0] <= kReferenceFrameIntra) &&
+ kPredictionModeSmoothMask.Contains(
+ top_context_.get()[SuperBlockColumnIndex(smooth_column)]
+ .uv_mode[CdfContextIndex(smooth_column)]);
+ }
+ SetCdfContextUVMode(block);
ReadIntraAngleInfo(block, kPlaneTypeUV);
}
ReadPaletteModeInfo(block);
+ SetCdfContextPaletteSize(block);
ReadFilterIntraModeInfo(block);
return true;
}
@@ -808,25 +902,27 @@ uint16_t* Tile::GetReferenceCdf(
return symbol_decoder_context_.compound_reference_cdf[type][context][index];
}
-void Tile::ReadReferenceFrames(const Block& block) {
+void Tile::ReadReferenceFrames(const Block& block, bool skip_mode) {
BlockParameters& bp = *block.bp;
- if (bp.skip_mode) {
+ if (skip_mode) {
bp.reference_frame[0] = frame_header_.skip_mode_frame[0];
bp.reference_frame[1] = frame_header_.skip_mode_frame[1];
return;
}
- if (frame_header_.segmentation.FeatureActive(bp.segment_id,
- kSegmentFeatureReferenceFrame)) {
+ if (frame_header_.segmentation.FeatureActive(
+ bp.prediction_parameters->segment_id,
+ kSegmentFeatureReferenceFrame)) {
bp.reference_frame[0] = static_cast<ReferenceFrameType>(
frame_header_.segmentation
- .feature_data[bp.segment_id][kSegmentFeatureReferenceFrame]);
+ .feature_data[bp.prediction_parameters->segment_id]
+ [kSegmentFeatureReferenceFrame]);
bp.reference_frame[1] = kReferenceFrameNone;
return;
}
- if (frame_header_.segmentation.FeatureActive(bp.segment_id,
- kSegmentFeatureSkip) ||
- frame_header_.segmentation.FeatureActive(bp.segment_id,
- kSegmentFeatureGlobalMv)) {
+ if (frame_header_.segmentation.FeatureActive(
+ bp.prediction_parameters->segment_id, kSegmentFeatureSkip) ||
+ frame_header_.segmentation.FeatureActive(
+ bp.prediction_parameters->segment_id, kSegmentFeatureGlobalMv)) {
bp.reference_frame[0] = kReferenceFrameLast;
bp.reference_frame[1] = kReferenceFrameNone;
return;
@@ -927,16 +1023,17 @@ void Tile::ReadReferenceFrames(const Block& block) {
}
void Tile::ReadInterPredictionModeY(const Block& block,
- const MvContexts& mode_contexts) {
+ const MvContexts& mode_contexts,
+ bool skip_mode) {
BlockParameters& bp = *block.bp;
- if (bp.skip_mode) {
+ if (skip_mode) {
bp.y_mode = kPredictionModeNearestNearestMv;
return;
}
- if (frame_header_.segmentation.FeatureActive(bp.segment_id,
- kSegmentFeatureSkip) ||
- frame_header_.segmentation.FeatureActive(bp.segment_id,
- kSegmentFeatureGlobalMv)) {
+ if (frame_header_.segmentation.FeatureActive(
+ bp.prediction_parameters->segment_id, kSegmentFeatureSkip) ||
+ frame_header_.segmentation.FeatureActive(
+ bp.prediction_parameters->segment_id, kSegmentFeatureGlobalMv)) {
bp.y_mode = kPredictionModeGlobalMv;
return;
}
@@ -995,13 +1092,14 @@ void Tile::ReadRefMvIndex(const Block& block) {
}
}
-void Tile::ReadInterIntraMode(const Block& block, bool is_compound) {
+void Tile::ReadInterIntraMode(const Block& block, bool is_compound,
+ bool skip_mode) {
BlockParameters& bp = *block.bp;
PredictionParameters& prediction_parameters =
*block.bp->prediction_parameters;
prediction_parameters.inter_intra_mode = kNumInterIntraModes;
prediction_parameters.is_wedge_inter_intra = false;
- if (bp.skip_mode || !sequence_header_.enable_interintra_compound ||
+ if (skip_mode || !sequence_header_.enable_interintra_compound ||
is_compound || !kIsInterIntraModeAllowedMask.Contains(block.size)) {
return;
}
@@ -1031,13 +1129,14 @@ void Tile::ReadInterIntraMode(const Block& block, bool is_compound) {
prediction_parameters.wedge_sign = 0;
}
-void Tile::ReadMotionMode(const Block& block, bool is_compound) {
+void Tile::ReadMotionMode(const Block& block, bool is_compound,
+ bool skip_mode) {
BlockParameters& bp = *block.bp;
PredictionParameters& prediction_parameters =
*block.bp->prediction_parameters;
const auto global_motion_type =
frame_header_.global_motion[bp.reference_frame[0]].type;
- if (bp.skip_mode || !frame_header_.is_motion_mode_switchable ||
+ if (skip_mode || !frame_header_.is_motion_mode_switchable ||
IsBlockDimension4(block.size) ||
(frame_header_.force_integer_mv == 0 &&
(bp.y_mode == kPredictionModeGlobalMv ||
@@ -1073,14 +1172,17 @@ uint16_t* Tile::GetIsExplicitCompoundTypeCdf(const Block& block) {
int context = 0;
if (block.top_available[kPlaneY]) {
if (!block.IsTopSingle()) {
- context += static_cast<int>(block.bp_top->is_explicit_compound_type);
+ context += static_cast<int>(
+ block.top_context
+ ->is_explicit_compound_type[block.top_context_index]);
} else if (block.TopReference(0) == kReferenceFrameAlternate) {
context += 3;
}
}
if (block.left_available[kPlaneY]) {
if (!block.IsLeftSingle()) {
- context += static_cast<int>(block.bp_left->is_explicit_compound_type);
+ context += static_cast<int>(
+ left_context_.is_explicit_compound_type[block.left_context_index]);
} else if (block.LeftReference(0) == kReferenceFrameAlternate) {
context += 3;
}
@@ -1099,14 +1201,16 @@ uint16_t* Tile::GetIsCompoundTypeAverageCdf(const Block& block) {
int context = (forward == backward) ? 3 : 0;
if (block.top_available[kPlaneY]) {
if (!block.IsTopSingle()) {
- context += static_cast<int>(block.bp_top->is_compound_type_average);
+ context += static_cast<int>(
+ block.top_context->is_compound_type_average[block.top_context_index]);
} else if (block.TopReference(0) == kReferenceFrameAlternate) {
++context;
}
}
if (block.left_available[kPlaneY]) {
if (!block.IsLeftSingle()) {
- context += static_cast<int>(block.bp_left->is_compound_type_average);
+ context += static_cast<int>(
+ left_context_.is_compound_type_average[block.left_context_index]);
} else if (block.LeftReference(0) == kReferenceFrameAlternate) {
++context;
}
@@ -1114,23 +1218,25 @@ uint16_t* Tile::GetIsCompoundTypeAverageCdf(const Block& block) {
return symbol_decoder_context_.is_compound_type_average_cdf[context];
}
-void Tile::ReadCompoundType(const Block& block, bool is_compound) {
- BlockParameters& bp = *block.bp;
- bp.is_explicit_compound_type = false;
- bp.is_compound_type_average = true;
+void Tile::ReadCompoundType(const Block& block, bool is_compound,
+ bool skip_mode,
+ bool* const is_explicit_compound_type,
+ bool* const is_compound_type_average) {
+ *is_explicit_compound_type = false;
+ *is_compound_type_average = true;
PredictionParameters& prediction_parameters =
*block.bp->prediction_parameters;
- if (bp.skip_mode) {
+ if (skip_mode) {
prediction_parameters.compound_prediction_type =
kCompoundPredictionTypeAverage;
return;
}
if (is_compound) {
if (sequence_header_.enable_masked_compound) {
- bp.is_explicit_compound_type =
+ *is_explicit_compound_type =
reader_.ReadSymbol(GetIsExplicitCompoundTypeCdf(block));
}
- if (bp.is_explicit_compound_type) {
+ if (*is_explicit_compound_type) {
if (kIsWedgeCompoundModeAllowed.Contains(block.size)) {
// Only kCompoundPredictionTypeWedge and
// kCompoundPredictionTypeDiffWeighted are signaled explicitly.
@@ -1143,11 +1249,11 @@ void Tile::ReadCompoundType(const Block& block, bool is_compound) {
}
} else {
if (sequence_header_.enable_jnt_comp) {
- bp.is_compound_type_average =
+ *is_compound_type_average =
reader_.ReadSymbol(GetIsCompoundTypeAverageCdf(block));
prediction_parameters.compound_prediction_type =
- bp.is_compound_type_average ? kCompoundPredictionTypeAverage
- : kCompoundPredictionTypeDistance;
+ *is_compound_type_average ? kCompoundPredictionTypeAverage
+ : kCompoundPredictionTypeDistance;
} else {
prediction_parameters.compound_prediction_type =
kCompoundPredictionTypeAverage;
@@ -1162,8 +1268,7 @@ void Tile::ReadCompoundType(const Block& block, bool is_compound) {
prediction_parameters.wedge_sign = static_cast<int>(reader_.ReadBit());
} else if (prediction_parameters.compound_prediction_type ==
kCompoundPredictionTypeDiffWeighted) {
- prediction_parameters.mask_is_inverse =
- static_cast<bool>(reader_.ReadBit());
+ prediction_parameters.mask_is_inverse = reader_.ReadBit() != 0;
}
return;
}
@@ -1209,7 +1314,7 @@ uint16_t* Tile::GetInterpolationFilterCdf(const Block& block, int direction) {
return symbol_decoder_context_.interpolation_filter_cdf[context];
}
-void Tile::ReadInterpolationFilter(const Block& block) {
+void Tile::ReadInterpolationFilter(const Block& block, bool skip_mode) {
BlockParameters& bp = *block.bp;
if (frame_header_.interpolation_filter != kInterpolationFilterSwitchable) {
static_assert(
@@ -1222,7 +1327,7 @@ void Tile::ReadInterpolationFilter(const Block& block) {
return;
}
bool interpolation_filter_present = true;
- if (bp.skip_mode ||
+ if (skip_mode ||
block.bp->prediction_parameters->motion_mode == kMotionModeLocalWarp) {
interpolation_filter_present = false;
} else if (!IsBlockDimension4(block.size) &&
@@ -1251,31 +1356,58 @@ void Tile::ReadInterpolationFilter(const Block& block) {
}
}
-bool Tile::ReadInterBlockModeInfo(const Block& block) {
+void Tile::SetCdfContextCompoundType(const Block& block,
+ bool is_explicit_compound_type,
+ bool is_compound_type_average) {
+ memset(left_context_.is_explicit_compound_type + block.left_context_index,
+ static_cast<int>(is_explicit_compound_type), block.height4x4);
+ memset(left_context_.is_compound_type_average + block.left_context_index,
+ static_cast<int>(is_compound_type_average), block.height4x4);
+ memset(block.top_context->is_explicit_compound_type + block.top_context_index,
+ static_cast<int>(is_explicit_compound_type), block.width4x4);
+ memset(block.top_context->is_compound_type_average + block.top_context_index,
+ static_cast<int>(is_compound_type_average), block.width4x4);
+}
+
+bool Tile::ReadInterBlockModeInfo(const Block& block, bool skip_mode) {
BlockParameters& bp = *block.bp;
- bp.palette_mode_info.size[kPlaneTypeY] = 0;
- bp.palette_mode_info.size[kPlaneTypeUV] = 0;
- ReadReferenceFrames(block);
+ bp.prediction_parameters->palette_mode_info.size[kPlaneTypeY] = 0;
+ bp.prediction_parameters->palette_mode_info.size[kPlaneTypeUV] = 0;
+ SetCdfContextPaletteSize(block);
+ ReadReferenceFrames(block, skip_mode);
const bool is_compound = bp.reference_frame[1] > kReferenceFrameIntra;
MvContexts mode_contexts;
FindMvStack(block, is_compound, &mode_contexts);
- ReadInterPredictionModeY(block, mode_contexts);
+ ReadInterPredictionModeY(block, mode_contexts, skip_mode);
ReadRefMvIndex(block);
if (!AssignInterMv(block, is_compound)) return false;
- ReadInterIntraMode(block, is_compound);
- ReadMotionMode(block, is_compound);
- ReadCompoundType(block, is_compound);
- ReadInterpolationFilter(block);
+ ReadInterIntraMode(block, is_compound, skip_mode);
+ ReadMotionMode(block, is_compound, skip_mode);
+ bool is_explicit_compound_type;
+ bool is_compound_type_average;
+ ReadCompoundType(block, is_compound, skip_mode, &is_explicit_compound_type,
+ &is_compound_type_average);
+ SetCdfContextCompoundType(block, is_explicit_compound_type,
+ is_compound_type_average);
+ ReadInterpolationFilter(block, skip_mode);
return true;
}
+void Tile::SetCdfContextSkipMode(const Block& block, bool skip_mode) {
+ memset(left_context_.skip_mode + block.left_context_index,
+ static_cast<int>(skip_mode), block.height4x4);
+ memset(block.top_context->skip_mode + block.top_context_index,
+ static_cast<int>(skip_mode), block.width4x4);
+}
+
bool Tile::DecodeInterModeInfo(const Block& block) {
BlockParameters& bp = *block.bp;
block.bp->prediction_parameters->use_intra_block_copy = false;
bp.skip = false;
if (!ReadInterSegmentId(block, /*pre_skip=*/true)) return false;
- ReadSkipMode(block);
- if (bp.skip_mode) {
+ bool skip_mode = ReadSkipMode(block);
+ SetCdfContextSkipMode(block, skip_mode);
+ if (skip_mode) {
bp.skip = true;
} else {
ReadSkip(block);
@@ -1290,8 +1422,8 @@ bool Tile::DecodeInterModeInfo(const Block& block) {
ReadLoopFilterDelta(block);
read_deltas_ = false;
}
- ReadIsInter(block);
- return bp.is_inter ? ReadInterBlockModeInfo(block)
+ ReadIsInter(block, skip_mode);
+ return bp.is_inter ? ReadInterBlockModeInfo(block, skip_mode)
: ReadIntraBlockModeInfo(block, /*intra_y_mode=*/false);
}
diff --git a/src/tile/bitstream/palette.cc b/src/tile/bitstream/palette.cc
index 41b42d6..27e5110 100644
--- a/src/tile/bitstream/palette.cc
+++ b/src/tile/bitstream/palette.cc
@@ -35,20 +35,23 @@ int Tile::GetPaletteCache(const Block& block, PlaneType plane_type,
uint16_t* const cache) {
const int top_size =
(block.top_available[kPlaneY] && Mod64(MultiplyBy4(block.row4x4)) != 0)
- ? block.bp_top->palette_mode_info.size[plane_type]
+ ? block.top_context->palette_size[plane_type][block.top_context_index]
+ : 0;
+ const int left_size =
+ block.left_available[kPlaneY]
+ ? left_context_.palette_size[plane_type][block.left_context_index]
: 0;
- const int left_size = block.left_available[kPlaneY]
- ? block.bp_left->palette_mode_info.size[plane_type]
- : 0;
if (left_size == 0 && top_size == 0) return 0;
// Merge the left and top colors in sorted order and store them in |cache|.
- uint16_t dummy[1];
- const uint16_t* top = (top_size > 0)
- ? block.bp_top->palette_mode_info.color[plane_type]
- : dummy;
+ uint16_t empty_palette[1];
+ const uint16_t* top =
+ (top_size > 0) ? block.top_context
+ ->palette_color[block.top_context_index][plane_type]
+ : empty_palette;
const uint16_t* left =
- (left_size > 0) ? block.bp_left->palette_mode_info.color[plane_type]
- : dummy;
+ (left_size > 0)
+ ? left_context_.palette_color[block.left_context_index][plane_type]
+ : empty_palette;
std::merge(top, top + top_size, left, left + left_size, cache);
// Deduplicate the entries in |cache| and return the number of unique
// entries.
@@ -61,8 +64,10 @@ void Tile::ReadPaletteColors(const Block& block, Plane plane) {
uint16_t cache[2 * kMaxPaletteSize];
const int n = GetPaletteCache(block, plane_type, cache);
BlockParameters& bp = *block.bp;
- const uint8_t palette_size = bp.palette_mode_info.size[plane_type];
- uint16_t* const palette_color = bp.palette_mode_info.color[plane];
+ const uint8_t palette_size =
+ bp.prediction_parameters->palette_mode_info.size[plane_type];
+ uint16_t* const palette_color =
+ bp.prediction_parameters->palette_mode_info.color[plane];
const int8_t bitdepth = sequence_header_.color_config.bitdepth;
int index = 0;
for (int i = 0; i < n && index < palette_size; ++i) {
@@ -101,7 +106,8 @@ void Tile::ReadPaletteColors(const Block& block, Plane plane) {
std::inplace_merge(palette_color, palette_color + merge_pivot,
palette_color + palette_size);
if (plane_type == kPlaneTypeUV) {
- uint16_t* const palette_color_v = bp.palette_mode_info.color[kPlaneV];
+ uint16_t* const palette_color_v =
+ bp.prediction_parameters->palette_mode_info.color[kPlaneV];
if (reader_.ReadBit() != 0) { // delta_encode_palette_colors_v.
const int bits = bitdepth - 4 + static_cast<int>(reader_.ReadLiteral(2));
palette_color_v[0] = reader_.ReadLiteral(bitdepth);
@@ -130,8 +136,8 @@ void Tile::ReadPaletteColors(const Block& block, Plane plane) {
void Tile::ReadPaletteModeInfo(const Block& block) {
BlockParameters& bp = *block.bp;
- bp.palette_mode_info.size[kPlaneTypeY] = 0;
- bp.palette_mode_info.size[kPlaneTypeUV] = 0;
+ bp.prediction_parameters->palette_mode_info.size[kPlaneTypeY] = 0;
+ bp.prediction_parameters->palette_mode_info.size[kPlaneTypeUV] = 0;
if (IsBlockSmallerThan8x8(block.size) || block.size > kBlock64x64 ||
!frame_header_.allow_screen_content_tools) {
return;
@@ -140,29 +146,32 @@ void Tile::ReadPaletteModeInfo(const Block& block) {
k4x4WidthLog2[block.size] + k4x4HeightLog2[block.size] - 2;
if (bp.y_mode == kPredictionModeDc) {
const int context =
- static_cast<int>(block.top_available[kPlaneY] &&
- block.bp_top->palette_mode_info.size[kPlaneTypeY] >
- 0) +
- static_cast<int>(block.left_available[kPlaneY] &&
- block.bp_left->palette_mode_info.size[kPlaneTypeY] >
- 0);
+ static_cast<int>(
+ block.top_available[kPlaneY] &&
+ block.top_context
+ ->palette_size[kPlaneTypeY][block.top_context_index] > 0) +
+ static_cast<int>(
+ block.left_available[kPlaneY] &&
+ left_context_.palette_size[kPlaneTypeY][block.left_context_index] >
+ 0);
const bool has_palette_y = reader_.ReadSymbol(
symbol_decoder_context_.has_palette_y_cdf[block_size_context][context]);
if (has_palette_y) {
- bp.palette_mode_info.size[kPlaneTypeY] =
+ bp.prediction_parameters->palette_mode_info.size[kPlaneTypeY] =
kMinPaletteSize +
reader_.ReadSymbol<kPaletteSizeSymbolCount>(
symbol_decoder_context_.palette_y_size_cdf[block_size_context]);
ReadPaletteColors(block, kPlaneY);
}
}
- if (block.HasChroma() && bp.uv_mode == kPredictionModeDc) {
- const int context =
- static_cast<int>(bp.palette_mode_info.size[kPlaneTypeY] > 0);
+ if (block.HasChroma() &&
+ bp.prediction_parameters->uv_mode == kPredictionModeDc) {
+ const int context = static_cast<int>(
+ bp.prediction_parameters->palette_mode_info.size[kPlaneTypeY] > 0);
const bool has_palette_uv =
reader_.ReadSymbol(symbol_decoder_context_.has_palette_uv_cdf[context]);
if (has_palette_uv) {
- bp.palette_mode_info.size[kPlaneTypeUV] =
+ bp.prediction_parameters->palette_mode_info.size[kPlaneTypeUV] =
kMinPaletteSize +
reader_.ReadSymbol<kPaletteSizeSymbolCount>(
symbol_decoder_context_.palette_uv_size_cdf[block_size_context]);
@@ -244,7 +253,8 @@ void Tile::PopulatePaletteColorContexts(
}
bool Tile::ReadPaletteTokens(const Block& block) {
- const PaletteModeInfo& palette_mode_info = block.bp->palette_mode_info;
+ const PaletteModeInfo& palette_mode_info =
+ block.bp->prediction_parameters->palette_mode_info;
PredictionParameters& prediction_parameters =
*block.bp->prediction_parameters;
for (int plane_type = kPlaneTypeY;
diff --git a/src/tile/bitstream/transform_size.cc b/src/tile/bitstream/transform_size.cc
index b79851d..7197400 100644
--- a/src/tile/bitstream/transform_size.cc
+++ b/src/tile/bitstream/transform_size.cc
@@ -95,7 +95,8 @@ int Tile::GetLeftTransformHeight(const Block& block, int row4x4, int column4x4,
TransformSize Tile::ReadFixedTransformSize(const Block& block) {
BlockParameters& bp = *block.bp;
- if (frame_header_.segmentation.lossless[bp.segment_id]) {
+ if (frame_header_.segmentation
+ .lossless[bp.prediction_parameters->segment_id]) {
return kTransformSize4x4;
}
const TransformSize max_rect_tx_size = kMaxTransformSizeRectangle[block.size];
@@ -189,8 +190,6 @@ void Tile::ReadVariableTransformTree(const Block& block, int row4x4,
memset(&inter_transform_sizes_[node.y + i][node.x], node.tx_size,
tx_width4x4);
}
- block_parameters_holder_.Find(node.y, node.x)->transform_size =
- node.tx_size;
} while (!stack.Empty());
}
@@ -198,7 +197,8 @@ void Tile::DecodeTransformSize(const Block& block) {
BlockParameters& bp = *block.bp;
if (frame_header_.tx_mode == kTxModeSelect && block.size > kBlock4x4 &&
bp.is_inter && !bp.skip &&
- !frame_header_.segmentation.lossless[bp.segment_id]) {
+ !frame_header_.segmentation
+ .lossless[bp.prediction_parameters->segment_id]) {
const TransformSize max_tx_size = kMaxTransformSizeRectangle[block.size];
const int tx_width4x4 = kTransformWidth4x4[max_tx_size];
const int tx_height4x4 = kTransformHeight4x4[max_tx_size];
@@ -210,10 +210,10 @@ void Tile::DecodeTransformSize(const Block& block) {
}
}
} else {
- bp.transform_size = ReadFixedTransformSize(block);
+ const TransformSize transform_size = ReadFixedTransformSize(block);
for (int row = block.row4x4; row < block.row4x4 + block.height4x4; ++row) {
static_assert(sizeof(TransformSize) == 1, "");
- memset(&inter_transform_sizes_[row][block.column4x4], bp.transform_size,
+ memset(&inter_transform_sizes_[row][block.column4x4], transform_size,
block.width4x4);
}
}
diff --git a/src/tile/prediction.cc b/src/tile/prediction.cc
index c5560a6..bba5a69 100644
--- a/src/tile/prediction.cc
+++ b/src/tile/prediction.cc
@@ -226,8 +226,8 @@ void Tile::IntraPrediction(const Block& block, Plane plane, int x, int y,
bool has_left, bool has_top, bool has_top_right,
bool has_bottom_left, PredictionMode mode,
TransformSize tx_size) {
- const int width = 1 << kTransformWidthLog2[tx_size];
- const int height = 1 << kTransformHeightLog2[tx_size];
+ const int width = kTransformWidth[tx_size];
+ const int height = kTransformHeight[tx_size];
const int x_shift = subsampling_x_[plane];
const int y_shift = subsampling_y_[plane];
const int max_x = (MultiplyBy4(frame_header_.columns4x4) >> x_shift) - 1;
@@ -386,36 +386,21 @@ template void Tile::IntraPrediction<uint16_t>(const Block& block, Plane plane,
TransformSize tx_size);
#endif
-constexpr BitMaskSet kPredictionModeSmoothMask(kPredictionModeSmooth,
- kPredictionModeSmoothHorizontal,
- kPredictionModeSmoothVertical);
-
-bool Tile::IsSmoothPrediction(int row, int column, Plane plane) const {
- const BlockParameters& bp = *block_parameters_holder_.Find(row, column);
- PredictionMode mode;
+int Tile::GetIntraEdgeFilterType(const Block& block, Plane plane) const {
+ bool top;
+ bool left;
if (plane == kPlaneY) {
- mode = bp.y_mode;
+ top = block.top_available[kPlaneY] &&
+ kPredictionModeSmoothMask.Contains(block.bp_top->y_mode);
+ left = block.left_available[kPlaneY] &&
+ kPredictionModeSmoothMask.Contains(block.bp_left->y_mode);
} else {
- if (bp.reference_frame[0] > kReferenceFrameIntra) return false;
- mode = bp.uv_mode;
- }
- return kPredictionModeSmoothMask.Contains(mode);
-}
-
-int Tile::GetIntraEdgeFilterType(const Block& block, Plane plane) const {
- const int subsampling_x = subsampling_x_[plane];
- const int subsampling_y = subsampling_y_[plane];
- if (block.top_available[plane]) {
- const int row = block.row4x4 - 1 - (block.row4x4 & subsampling_y);
- const int column = block.column4x4 + (~block.column4x4 & subsampling_x);
- if (IsSmoothPrediction(row, column, plane)) return 1;
+ top = block.top_available[plane] &&
+ block.bp->prediction_parameters->chroma_top_uses_smooth_prediction;
+ left = block.left_available[plane] &&
+ block.bp->prediction_parameters->chroma_left_uses_smooth_prediction;
}
- if (block.left_available[plane]) {
- const int row = block.row4x4 + (~block.row4x4 & subsampling_y);
- const int column = block.column4x4 - 1 - (block.column4x4 & subsampling_x);
- if (IsSmoothPrediction(row, column, plane)) return 1;
- }
- return 0;
+ return static_cast<int>(top || left);
}
template <typename Pixel>
@@ -510,7 +495,8 @@ void Tile::PalettePrediction(const Block& block, const Plane plane,
const int y, const TransformSize tx_size) {
const int tx_width = kTransformWidth[tx_size];
const int tx_height = kTransformHeight[tx_size];
- const uint16_t* const palette = block.bp->palette_mode_info.color[plane];
+ const uint16_t* const palette =
+ block.bp->prediction_parameters->palette_mode_info.color[plane];
const PlaneType plane_type = GetPlaneType(plane);
const int x4 = MultiplyBy4(x);
const int y4 = MultiplyBy4(y);
@@ -695,7 +681,7 @@ GlobalMotion* Tile::GetWarpParams(
? global_motion_params->type
: kNumGlobalMotionTransformationTypes;
const bool is_global_valid =
- IsGlobalMvBlock(block.bp->is_global_mv_block, global_motion_type) &&
+ IsGlobalMvBlock(*block.bp, global_motion_type) &&
SetupShear(global_motion_params);
// Valid global motion type implies reference type can't be intra.
assert(!is_global_valid || reference_type != kReferenceFrameIntra);
@@ -1028,6 +1014,7 @@ bool Tile::GetReferenceBlockPosition(
(((height - 1) * step_y + (1 << kScaleSubPixelBits) - 1) >>
kScaleSubPixelBits) +
kSubPixelTaps;
+ *ref_block_end_x += kConvolveScaleBorderRight - kConvolveBorderRight;
ref_block_end_y = *ref_block_start_y + block_height - 1;
}
// Determines if we need to extend beyond the left/right/top/bottom border.
@@ -1206,11 +1193,12 @@ bool Tile::BlockInterPrediction(
(ref_block_start_x + kConvolveBorderLeftTop) * pixel_size;
}
} else {
+ const int border_right =
+ is_scaled ? kConvolveScaleBorderRight : kConvolveBorderRight;
// The block width can be at most 2 times as much as current
// block's width because of scaling.
auto block_extended_width = Align<ptrdiff_t>(
- (2 * width + kConvolveBorderLeftTop + kConvolveBorderRight) *
- pixel_size,
+ (2 * width + kConvolveBorderLeftTop + border_right) * pixel_size,
kMaxAlignment);
convolve_buffer_stride = block.scratch_buffer->convolve_block_buffer_stride;
#if LIBGAV1_MAX_BITDEPTH >= 10
diff --git a/src/tile/tile.cc b/src/tile/tile.cc
index 9699517..5070bb6 100644
--- a/src/tile/tile.cc
+++ b/src/tile/tile.cc
@@ -463,6 +463,7 @@ Tile::Tile(int tile_number, const uint8_t* const data, size_t size,
: 1),
current_frame_(*current_frame),
cdef_index_(frame_scratch_buffer->cdef_index),
+ cdef_skip_(frame_scratch_buffer->cdef_skip),
inter_transform_sizes_(frame_scratch_buffer->inter_transform_sizes),
thread_pool_(thread_pool),
residual_buffer_pool_(frame_scratch_buffer->residual_buffer_pool.get()),
@@ -541,16 +542,6 @@ Tile::Tile(int tile_number, const uint8_t* const data, size_t size,
buffer_[plane].Reset(Align(buffer.height(plane), max_tx_length),
buffer.stride(plane),
post_filter_.GetUnfilteredBuffer(plane));
- const int plane_height =
- SubsampledValue(frame_header_.height, subsampling_y_[plane]);
- deblock_row_limit_[plane] =
- std::min(frame_header_.rows4x4, DivideBy4(plane_height + 3)
- << subsampling_y_[plane]);
- const int plane_width =
- SubsampledValue(frame_header_.width, subsampling_x_[plane]);
- deblock_column_limit_[plane] =
- std::min(frame_header_.columns4x4, DivideBy4(plane_width + 3)
- << subsampling_x_[plane]);
}
}
@@ -598,6 +589,10 @@ bool Tile::Init() {
column4x4_end_, &motion_field_);
}
ResetLoopRestorationParams();
+ if (!top_context_.Resize(superblock_columns_)) {
+ LIBGAV1_DLOG(ERROR, "Allocation of top_context_ failed.");
+ return false;
+ }
return true;
}
@@ -1019,7 +1014,8 @@ TransformType Tile::ComputeTransformType(const Block& block, Plane plane,
int block_y) {
const BlockParameters& bp = *block.bp;
const TransformSize tx_size_square_max = kTransformSizeSquareMax[tx_size];
- if (frame_header_.segmentation.lossless[bp.segment_id] ||
+ if (frame_header_.segmentation
+ .lossless[bp.prediction_parameters->segment_id] ||
tx_size_square_max == kTransformSize64x64) {
return kTransformTypeDctDct;
}
@@ -1034,7 +1030,7 @@ TransformType Tile::ComputeTransformType(const Block& block, Plane plane,
const int y4 = std::max(block.row4x4, block_y << subsampling_y_[kPlaneU]);
tx_type = transform_types_[y4 - block.row4x4][x4 - block.column4x4];
} else {
- tx_type = kModeToTransformType[bp.uv_mode];
+ tx_type = kModeToTransformType[bp.prediction_parameters->uv_mode];
}
return kTransformTypeInSetMask[tx_set].Contains(tx_type)
? tx_type
@@ -1048,7 +1044,8 @@ void Tile::ReadTransformType(const Block& block, int x4, int y4,
TransformType tx_type = kTransformTypeDctDct;
if (tx_set != kTransformSetDctOnly &&
- frame_header_.segmentation.qindex[bp.segment_id] > 0) {
+ frame_header_.segmentation.qindex[bp.prediction_parameters->segment_id] >
+ 0) {
const int cdf_index = SymbolDecoderContext::TxTypeIndex(tx_set);
const int cdf_tx_size_index =
TransformSizeToSquareTransformIndex(kTransformSizeSquareMin[tx_size]);
@@ -1309,7 +1306,7 @@ bool Tile::ReadSignAndApplyDequantization(
int length = 0;
bool golomb_length_bit = false;
do {
- golomb_length_bit = static_cast<bool>(reader_.ReadBit());
+ golomb_length_bit = reader_.ReadBit() != 0;
++length;
if (length > 20) {
LIBGAV1_DLOG(ERROR, "Invalid golomb_length %d", length);
@@ -1454,7 +1451,7 @@ int Tile::ReadTransformCoefficients(const Block& block, Plane plane,
for (int i = 1; i < eob_pt - 2; ++i) {
assert(eob_pt - i >= 3);
assert(eob_pt <= kEobPt1024SymbolCount);
- if (static_cast<bool>(reader_.ReadBit())) {
+ if (reader_.ReadBit() != 0) {
eob += 1 << (eob_pt - i - 3);
}
}
@@ -1500,15 +1497,17 @@ int Tile::ReadTransformCoefficients(const Block& block, Plane plane,
coeff_base_range_cdf, residual, level_buffer);
}
const int max_value = (1 << (7 + sequence_header_.color_config.bitdepth)) - 1;
- const int current_quantizer_index = GetQIndex(
- frame_header_.segmentation, bp.segment_id, current_quantizer_index_);
+ const int current_quantizer_index =
+ GetQIndex(frame_header_.segmentation,
+ bp.prediction_parameters->segment_id, current_quantizer_index_);
const int dc_q_value = quantizer_.GetDcValue(plane, current_quantizer_index);
const int ac_q_value = quantizer_.GetAcValue(plane, current_quantizer_index);
const int shift = kQuantizationShift[tx_size];
const uint8_t* const quantizer_matrix =
(frame_header_.quantizer.use_matrix &&
*tx_type < kTransformTypeIdentityIdentity &&
- !frame_header_.segmentation.lossless[bp.segment_id] &&
+ !frame_header_.segmentation
+ .lossless[bp.prediction_parameters->segment_id] &&
frame_header_.quantizer.matrix_level[plane] < 15)
? quantizer_matrix_[frame_header_.quantizer.matrix_level[plane]]
[plane_type][adjusted_tx_size]
@@ -1587,15 +1586,17 @@ bool Tile::TransformBlock(const Block& block, Plane plane, int base_x,
const bool do_decode = mode == kProcessingModeDecodeOnly ||
mode == kProcessingModeParseAndDecode;
if (do_decode && !bp.is_inter) {
- if (bp.palette_mode_info.size[GetPlaneType(plane)] > 0) {
+ if (bp.prediction_parameters->palette_mode_info.size[GetPlaneType(plane)] >
+ 0) {
CALL_BITDEPTH_FUNCTION(PalettePrediction, block, plane, start_x, start_y,
x, y, tx_size);
} else {
const PredictionMode mode =
- (plane == kPlaneY)
- ? bp.y_mode
- : (bp.uv_mode == kPredictionModeChromaFromLuma ? kPredictionModeDc
- : bp.uv_mode);
+ (plane == kPlaneY) ? bp.y_mode
+ : (bp.prediction_parameters->uv_mode ==
+ kPredictionModeChromaFromLuma
+ ? kPredictionModeDc
+ : bp.prediction_parameters->uv_mode);
const int tr_row4x4 = (sub_block_row4x4 >> subsampling_y);
const int tr_column4x4 =
(sub_block_column4x4 >> subsampling_x) + step_x + 1;
@@ -1609,7 +1610,8 @@ bool Tile::TransformBlock(const Block& block, Plane plane, int base_x,
block.scratch_buffer->block_decoded[plane][tr_row4x4][tr_column4x4],
block.scratch_buffer->block_decoded[plane][bl_row4x4][bl_column4x4],
mode, tx_size);
- if (plane != kPlaneY && bp.uv_mode == kPredictionModeChromaFromLuma) {
+ if (plane != kPlaneY &&
+ bp.prediction_parameters->uv_mode == kPredictionModeChromaFromLuma) {
CALL_BITDEPTH_FUNCTION(ChromaFromLumaPrediction, block, plane, start_x,
start_y, tx_size);
}
@@ -1738,14 +1740,16 @@ void Tile::ReconstructBlock(const Block& block, Plane plane, int start_x,
buffer_[plane].rows(), buffer_[plane].columns() / sizeof(uint16_t),
reinterpret_cast<uint16_t*>(&buffer_[plane][0][0]));
Reconstruct(dsp_, tx_type, tx_size,
- frame_header_.segmentation.lossless[block.bp->segment_id],
+ frame_header_.segmentation
+ .lossless[block.bp->prediction_parameters->segment_id],
reinterpret_cast<int32_t*>(*block.residual), start_x, start_y,
&buffer, non_zero_coeff_count);
} else // NOLINT
#endif
{
Reconstruct(dsp_, tx_type, tx_size,
- frame_header_.segmentation.lossless[block.bp->segment_id],
+ frame_header_.segmentation
+ .lossless[block.bp->prediction_parameters->segment_id],
reinterpret_cast<int16_t*>(*block.residual), start_x, start_y,
&buffer_[plane], non_zero_coeff_count);
}
@@ -1772,12 +1776,15 @@ bool Tile::Residual(const Block& block, ProcessingMode mode) {
// kTransformSize4x4. So we can simply use |bp.transform_size| here as
// the Y plane's transform size (part of Section 5.11.37 in the spec).
const TransformSize tx_size =
- (plane == kPlaneY) ? bp.transform_size : bp.uv_transform_size;
+ (plane == kPlaneY)
+ ? inter_transform_sizes_[block.row4x4][block.column4x4]
+ : bp.uv_transform_size;
const BlockSize plane_size =
kPlaneResidualSize[size_chunk4x4][subsampling_x][subsampling_y];
assert(plane_size != kBlockInvalid);
if (bp.is_inter &&
- !frame_header_.segmentation.lossless[bp.segment_id] &&
+ !frame_header_.segmentation
+ .lossless[bp.prediction_parameters->segment_id] &&
plane == kPlaneY) {
const int row_chunk4x4 = block.row4x4 + MultiplyBy16(chunk_y);
const int column_chunk4x4 = block.column4x4 + MultiplyBy16(chunk_x);
@@ -2112,15 +2119,53 @@ void Tile::PopulateDeblockFilterLevel(const Block& block) {
for (int i = 0; i < kFrameLfCount; ++i) {
if (delta_lf_all_zero_) {
bp.deblock_filter_level[i] = post_filter_.GetZeroDeltaDeblockFilterLevel(
- bp.segment_id, i, bp.reference_frame[0], mode_id);
+ bp.prediction_parameters->segment_id, i, bp.reference_frame[0],
+ mode_id);
} else {
bp.deblock_filter_level[i] =
- deblock_filter_levels_[bp.segment_id][i][bp.reference_frame[0]]
- [mode_id];
+ deblock_filter_levels_[bp.prediction_parameters->segment_id][i]
+ [bp.reference_frame[0]][mode_id];
}
}
}
+void Tile::PopulateCdefSkip(const Block& block) {
+ if (!post_filter_.DoCdef() || block.bp->skip ||
+ (frame_header_.cdef.bits > 0 &&
+ cdef_index_[DivideBy16(block.row4x4)][DivideBy16(block.column4x4)] ==
+ -1)) {
+ return;
+ }
+ // The rest of this function is an efficient version of the following code:
+ // for (int y = block.row4x4; y < block.row4x4 + block.height4x4; y++) {
+ // for (int x = block.column4x4; y < block.column4x4 + block.width4x4;
+ // x++) {
+ // const uint8_t mask = uint8_t{1} << ((x >> 1) & 0x7);
+ // cdef_skip_[y >> 1][x >> 4] |= mask;
+ // }
+ // }
+
+ // For all block widths other than 32, the mask will fit in uint8_t. For
+ // block width == 32, the mask is always 0xFFFF.
+ const int bw4 =
+ std::max(DivideBy2(block.width4x4) + (block.column4x4 & 1), 1);
+ const uint8_t mask = (block.width4x4 == 32)
+ ? 0xFF
+ : (uint8_t{0xFF} >> (8 - bw4))
+ << (DivideBy2(block.column4x4) & 0x7);
+ uint8_t* cdef_skip = &cdef_skip_[block.row4x4 >> 1][block.column4x4 >> 4];
+ const int stride = cdef_skip_.columns();
+ int row = 0;
+ do {
+ *cdef_skip |= mask;
+ if (block.width4x4 == 32) {
+ *(cdef_skip + 1) = 0xFF;
+ }
+ cdef_skip += stride;
+ row += 2;
+ } while (row < block.height4x4);
+}
+
bool Tile::ProcessBlock(int row4x4, int column4x4, BlockSize block_size,
TileScratchBuffer* const scratch_buffer,
ResidualPtr* residual) {
@@ -2150,7 +2195,7 @@ bool Tile::ProcessBlock(int row4x4, int column4x4, BlockSize block_size,
return false;
}
BlockParameters& bp = *bp_ptr;
- Block block(*this, block_size, row4x4, column4x4, scratch_buffer, residual);
+ Block block(this, block_size, row4x4, column4x4, scratch_buffer, residual);
bp.size = block_size;
bp.prediction_parameters =
split_parse_and_decode_ ? std::unique_ptr<PredictionParameters>(
@@ -2158,17 +2203,16 @@ bool Tile::ProcessBlock(int row4x4, int column4x4, BlockSize block_size,
: std::move(prediction_parameters_);
if (bp.prediction_parameters == nullptr) return false;
if (!DecodeModeInfo(block)) return false;
- bp.is_global_mv_block = (bp.y_mode == kPredictionModeGlobalMv ||
- bp.y_mode == kPredictionModeGlobalGlobalMv) &&
- !IsBlockDimension4(bp.size);
PopulateDeblockFilterLevel(block);
if (!ReadPaletteTokens(block)) return false;
DecodeTransformSize(block);
// Part of Section 5.11.37 in the spec (implemented as a simple lookup).
- bp.uv_transform_size = frame_header_.segmentation.lossless[bp.segment_id]
- ? kTransformSize4x4
- : kUVTransformSize[block.residual_size[kPlaneU]];
+ bp.uv_transform_size =
+ frame_header_.segmentation.lossless[bp.prediction_parameters->segment_id]
+ ? kTransformSize4x4
+ : kUVTransformSize[block.residual_size[kPlaneU]];
if (bp.skip) ResetEntropyContext(block);
+ PopulateCdefSkip(block);
if (split_parse_and_decode_) {
if (!Residual(block, kProcessingModeParseOnly)) return false;
} else {
@@ -2177,22 +2221,24 @@ bool Tile::ProcessBlock(int row4x4, int column4x4, BlockSize block_size,
return false;
}
}
- // If frame_header_.segmentation.enabled is false, bp.segment_id is 0 for all
- // blocks. We don't need to call save bp.segment_id in the current frame
- // because the current frame's segmentation map will be cleared to all 0s.
+ // If frame_header_.segmentation.enabled is false,
+ // bp.prediction_parameters->segment_id is 0 for all blocks. We don't need to
+ // call save bp.prediction_parameters->segment_id in the current frame because
+ // the current frame's segmentation map will be cleared to all 0s.
//
// If frame_header_.segmentation.enabled is true and
// frame_header_.segmentation.update_map is false, we will copy the previous
// frame's segmentation map to the current frame. So we don't need to call
- // save bp.segment_id in the current frame.
+ // save bp.prediction_parameters->segment_id in the current frame.
if (frame_header_.segmentation.enabled &&
frame_header_.segmentation.update_map) {
const int x_limit = std::min(frame_header_.columns4x4 - column4x4,
static_cast<int>(block.width4x4));
const int y_limit = std::min(frame_header_.rows4x4 - row4x4,
static_cast<int>(block.height4x4));
- current_frame_.segmentation_map()->FillBlock(row4x4, column4x4, x_limit,
- y_limit, bp.segment_id);
+ current_frame_.segmentation_map()->FillBlock(
+ row4x4, column4x4, x_limit, y_limit,
+ bp.prediction_parameters->segment_id);
}
StoreMotionFieldMvsIntoCurrentFrame(block);
if (!split_parse_and_decode_) {
@@ -2208,7 +2254,7 @@ bool Tile::DecodeBlock(int row4x4, int column4x4, BlockSize block_size,
column4x4 >= frame_header_.columns4x4) {
return true;
}
- Block block(*this, block_size, row4x4, column4x4, scratch_buffer, residual);
+ Block block(this, block_size, row4x4, column4x4, scratch_buffer, residual);
if (!ComputePrediction(block) ||
!Residual(block, kProcessingModeDecodeOnly)) {
return false;
@@ -2382,7 +2428,7 @@ void Tile::ResetLoopRestorationParams() {
}
void Tile::ResetCdef(const int row4x4, const int column4x4) {
- if (!sequence_header_.enable_cdef) return;
+ if (frame_header_.cdef.bits == 0) return;
const int row = DivideBy16(row4x4);
const int column = DivideBy16(column4x4);
cdef_index_[row][column] = -1;
@@ -2562,8 +2608,8 @@ void Tile::StoreMotionFieldMvsIntoCurrentFrame(const Block& block) {
// Must make a local copy so that StoreMotionFieldMvs() knows there is no
// overlap between load and store.
const MotionVector mv_to_store = bp.mv.mv[i];
- const int mv_row = std::abs(mv_to_store.mv[MotionVector::kRow]);
- const int mv_column = std::abs(mv_to_store.mv[MotionVector::kColumn]);
+ const int mv_row = std::abs(mv_to_store.mv[0]);
+ const int mv_column = std::abs(mv_to_store.mv[1]);
if (reference_frame_to_store > kReferenceFrameIntra &&
// kRefMvsLimit equals 0x07FF, so we can first bitwise OR the two
// absolute values and then compare with kRefMvsLimit to save a branch.
diff --git a/src/tile_scratch_buffer.h b/src/tile_scratch_buffer.h
index 3eaf8b8..828f550 100644
--- a/src/tile_scratch_buffer.h
+++ b/src/tile_scratch_buffer.h
@@ -17,8 +17,13 @@
#ifndef LIBGAV1_SRC_TILE_SCRATCH_BUFFER_H_
#define LIBGAV1_SRC_TILE_SCRATCH_BUFFER_H_
+#include <cstddef>
#include <cstdint>
+#include <cstring>
+#include <memory>
#include <mutex> // NOLINT (unapproved c++11 header)
+#include <new>
+#include <utility>
#include "src/dsp/constants.h"
#include "src/utils/common.h"
@@ -42,9 +47,10 @@ struct TileScratchBuffer : public MaxAlignedAllocable {
const int pixel_size = 1;
#endif
+ static_assert(kConvolveScaleBorderRight >= kConvolveBorderRight, "");
constexpr int unaligned_convolve_buffer_stride =
kMaxScaledSuperBlockSizeInPixels + kConvolveBorderLeftTop +
- kConvolveBorderRight;
+ kConvolveScaleBorderRight;
convolve_block_buffer_stride = Align<ptrdiff_t>(
unaligned_convolve_buffer_stride * pixel_size, kMaxAlignment);
constexpr int convolve_buffer_height = kMaxScaledSuperBlockSizeInPixels +
@@ -53,6 +59,13 @@ struct TileScratchBuffer : public MaxAlignedAllocable {
convolve_block_buffer = MakeAlignedUniquePtr<uint8_t>(
kMaxAlignment, convolve_buffer_height * convolve_block_buffer_stride);
+#if LIBGAV1_MSAN
+ // Quiet msan warnings in ConvolveScale2D_NEON(). Set with random non-zero
+ // value to aid in future debugging.
+ memset(convolve_block_buffer.get(), 0x66,
+ convolve_buffer_height * convolve_block_buffer_stride);
+#endif
+
return convolve_block_buffer != nullptr;
}
diff --git a/src/utils/array_2d_test.cc b/src/utils/array_2d_test.cc
new file mode 100644
index 0000000..0535274
--- /dev/null
+++ b/src/utils/array_2d_test.cc
@@ -0,0 +1,248 @@
+// Copyright 2021 The libgav1 Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "src/utils/array_2d.h"
+
+#include <cstdint>
+#include <memory>
+#include <new>
+#include <type_traits>
+
+#include "gtest/gtest.h"
+#include "src/utils/compiler_attributes.h"
+
+#if LIBGAV1_MSAN
+#include <sanitizer/msan_interface.h>
+#endif
+
+namespace libgav1 {
+namespace {
+
+constexpr int kRows = 50;
+constexpr int kColumns = 200;
+
+TEST(Array2dViewTest, TestUint8) {
+ uint8_t data[kRows * kColumns] = {};
+ Array2DView<uint8_t> data2d(kRows, kColumns, data);
+
+ // Verify data.
+ data[kColumns] = 100;
+ data[kColumns + 1] = 101;
+ data[kColumns * 2 + 10] = 210;
+ data[kColumns * 2 + 40] = 240;
+ EXPECT_EQ(data2d[1][0], 100);
+ EXPECT_EQ(data2d[1][1], 101);
+ EXPECT_EQ(data2d[2][10], 210);
+ EXPECT_EQ(data2d[2][40], 240);
+
+ // Verify pointers.
+ EXPECT_EQ(data2d[10], data + 10 * kColumns);
+}
+
+TEST(Array2dViewTest, TestUint16) {
+ uint16_t data[kRows * kColumns] = {};
+ Array2DView<uint16_t> data2d(kRows, kColumns, data);
+
+ // Verify data.
+ data[kColumns] = 100;
+ data[kColumns + 1] = 101;
+ data[kColumns * 2 + 10] = 210;
+ data[kColumns * 2 + 40] = 240;
+ EXPECT_EQ(data2d[1][0], 100);
+ EXPECT_EQ(data2d[1][1], 101);
+ EXPECT_EQ(data2d[2][10], 210);
+ EXPECT_EQ(data2d[2][40], 240);
+
+ // Verify pointers.
+ EXPECT_EQ(data2d[10], data + 10 * kColumns);
+}
+
+TEST(Array2dViewTest, TestUint8Const) {
+ uint8_t data[kRows * kColumns] = {};
+ // Declared as const to provide a read-only view of |data|.
+ const Array2DView<uint8_t> data2d(kRows, kColumns, data);
+
+ // Verify data.
+ data[kColumns] = 100;
+ data[kColumns + 1] = 101;
+ data[kColumns * 2 + 10] = 210;
+ data[kColumns * 2 + 40] = 240;
+ EXPECT_EQ(data2d[1][0], 100);
+ EXPECT_EQ(data2d[1][1], 101);
+ EXPECT_EQ(data2d[2][10], 210);
+ EXPECT_EQ(data2d[2][40], 240);
+
+ // Verify pointers.
+ EXPECT_EQ(data2d[10], data + 10 * kColumns);
+}
+
+TEST(Array2dTest, TestUint8) {
+ Array2D<uint8_t> data2d;
+ ASSERT_TRUE(data2d.Reset(kRows, kColumns, true));
+
+ EXPECT_EQ(data2d.rows(), kRows);
+ EXPECT_EQ(data2d.columns(), kColumns);
+
+ // Verify pointers.
+ for (int i = 0; i < kRows; ++i) {
+ EXPECT_NE(data2d[i], nullptr);
+ }
+
+ // Verify data (must be zero initialized).
+ for (int i = 0; i < kRows; ++i) {
+ for (int j = 0; j < kColumns; ++j) {
+ EXPECT_EQ(data2d[i][j], 0) << "Mismatch in [" << i << "][" << j << "]";
+ }
+ }
+
+ // Reset to a 2d array of smaller size with zero_initialize == false.
+ data2d[0][0] = 10;
+ ASSERT_TRUE(data2d.Reset(kRows - 1, kColumns - 1, false));
+
+ EXPECT_EQ(data2d.rows(), kRows - 1);
+ EXPECT_EQ(data2d.columns(), kColumns - 1);
+
+ // Verify pointers.
+ for (int i = 0; i < kRows - 1; ++i) {
+ EXPECT_NE(data2d[i], nullptr);
+ }
+
+ // Verify data (must be zero except for 0,0 because it was zero initialized in
+ // the previous call to Reset).
+ for (int i = 0; i < kRows - 1; ++i) {
+ for (int j = 0; j < kColumns - 1; ++j) {
+ if (i == 0 && j == 0) {
+ EXPECT_EQ(data2d[i][j], 10) << "Mismatch in [" << i << "][" << j << "]";
+ } else {
+ EXPECT_EQ(data2d[i][j], 0) << "Mismatch in [" << i << "][" << j << "]";
+ }
+ }
+ }
+
+ // Reset to a 2d array of smaller size with zero_initialize == true.
+ ASSERT_TRUE(data2d.Reset(kRows - 2, kColumns - 2, true));
+
+ EXPECT_EQ(data2d.rows(), kRows - 2);
+ EXPECT_EQ(data2d.columns(), kColumns - 2);
+
+ // Verify pointers.
+ for (int i = 0; i < kRows - 2; ++i) {
+ EXPECT_NE(data2d[i], nullptr);
+ }
+
+ // Verify data (must be zero initialized).
+ for (int i = 0; i < kRows - 2; ++i) {
+ for (int j = 0; j < kColumns - 2; ++j) {
+ EXPECT_EQ(data2d[i][j], 0) << "Mismatch in [" << i << "][" << j << "]";
+ }
+ }
+}
+
+TEST(Array2dTest, TestUniquePtr1) {
+ // A simple class that sets an int value to 0 in the destructor.
+ class Cleaner {
+ public:
+ explicit Cleaner(int* value) : value_(value) {}
+ ~Cleaner() { *value_ = 0; }
+
+ private:
+ int* value_;
+ };
+ int value = 100;
+ Array2D<std::unique_ptr<Cleaner>> data2d;
+ ASSERT_TRUE(data2d.Reset(4, 4, true));
+ data2d[0][0].reset(new (std::nothrow) Cleaner(&value));
+ EXPECT_EQ(value, 100);
+ // Reset to a smaller size. Depending on the implementation, the data_ buffer
+ // may or may not be reused.
+ ASSERT_TRUE(data2d.Reset(2, 2, true));
+ // Reset to a much larger size. The data_ buffer will be reallocated.
+ ASSERT_TRUE(data2d.Reset(32, 32, true));
+ // The destructors of all elements in the former data_ buffer should have
+ // been invoked.
+ EXPECT_EQ(value, 0);
+}
+
+TEST(Array2dTest, TestUniquePtr2) {
+ // A simple class that sets an int value to 0 in the destructor.
+ class Cleaner {
+ public:
+ explicit Cleaner(int* value) : value_(value) {}
+ ~Cleaner() { *value_ = 0; }
+
+ private:
+ int* value_;
+ };
+ int value1 = 100;
+ int value2 = 200;
+ Array2D<std::unique_ptr<Cleaner>> data2d;
+ ASSERT_TRUE(data2d.Reset(4, 4, false));
+ data2d[0][0].reset(new (std::nothrow) Cleaner(&value1));
+ data2d[3][3].reset(new (std::nothrow) Cleaner(&value2));
+ EXPECT_EQ(value1, 100);
+ EXPECT_EQ(value2, 200);
+ // Reset to a smaller size. Whether or not the data_ buffer is reused, the
+ // destructors of all existing elements should be invoked.
+ ASSERT_TRUE(data2d.Reset(2, 2, false));
+ EXPECT_EQ(value1, 0);
+ EXPECT_EQ(value2, 0);
+}
+
+// Shows that std::is_standard_layout is not relevant to the default
+// initialization vs. value initialization issue, but std::is_trivial is.
+TEST(Array2dTest, TestStructInit) {
+ // Make one data member private so that this struct does not have a standard
+ // layout. This also makes the struct not a POD type.
+ struct Point {
+ int x;
+ int Y() const { return y; }
+
+ private:
+ int y;
+ };
+
+ EXPECT_TRUE(std::is_trivial<Point>::value);
+ EXPECT_FALSE(std::is_standard_layout<Point>::value);
+
+ // The Point structs in this array are default initialized.
+ Array2D<Point> data2d_default_init;
+ ASSERT_TRUE(data2d_default_init.Reset(kRows, kColumns, false));
+ // The Point structs in this array are value initialized (i.e., zero
+ // initialized).
+ Array2D<Point> data2d;
+ ASSERT_TRUE(data2d.Reset(kRows, kColumns, true));
+
+#if LIBGAV1_MSAN
+ // Use MemorySanitizer to check Reset(rows, columns, false) does not
+ // initialize the memory while Reset(rows, columns, true) does.
+ //
+ // __msan_test_shadow(const void *x, uptr size) returns the offset of the
+ // first (at least partially) poisoned byte in the range, or -1 if the whole
+ // range is good.
+ for (int i = 0; i < kRows; ++i) {
+ EXPECT_EQ(__msan_test_shadow(data2d_default_init[i],
+ sizeof(data2d_default_init[0][0]) * kColumns),
+ 0);
+ EXPECT_EQ(__msan_test_shadow(data2d[i], sizeof(data2d[0][0]) * kColumns),
+ -1);
+ for (int j = 0; j < kColumns; ++j) {
+ EXPECT_EQ(data2d[i][j].x, 0);
+ EXPECT_EQ(data2d[i][j].Y(), 0);
+ }
+ }
+#endif
+}
+
+} // namespace
+} // namespace libgav1
diff --git a/src/utils/block_parameters_holder_test.cc b/src/utils/block_parameters_holder_test.cc
new file mode 100644
index 0000000..212eba5
--- /dev/null
+++ b/src/utils/block_parameters_holder_test.cc
@@ -0,0 +1,76 @@
+// Copyright 2021 The libgav1 Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "src/utils/block_parameters_holder.h"
+
+#include "gtest/gtest.h"
+#include "src/utils/constants.h"
+#include "src/utils/types.h"
+
+namespace libgav1 {
+namespace {
+
+TEST(BlockParametersHolder, TestBasic) {
+ BlockParametersHolder holder;
+ ASSERT_TRUE(holder.Reset(20, 20));
+
+ // Get a BlockParameters object.
+ BlockParameters* const bp1 = holder.Get(10, 10, kBlock32x32);
+ ASSERT_NE(bp1, nullptr);
+ // Ensure that cache was filled appropriately. From (10, 10) to (17, 17)
+ // should be bp1 (10 + 4x4 width/height of 32x32 block is 18).
+ for (int i = 10; i < 18; ++i) {
+ for (int j = 10; j < 18; ++j) {
+ EXPECT_EQ(holder.Find(i, j), bp1)
+ << "Mismatch in (" << i << ", " << j << ")";
+ }
+ }
+
+ // Get the maximum number of BlockParameters objects.
+ for (int i = 0; i < 399; ++i) {
+ EXPECT_NE(holder.Get(10, 10, kBlock32x32), nullptr)
+ << "Mismatch in index " << i;
+ }
+
+ // Get() should now return nullptr since there are no more BlockParameters
+ // objects available.
+ EXPECT_EQ(holder.Get(10, 10, kBlock32x32), nullptr);
+
+ // Reset the holder to the same size.
+ ASSERT_TRUE(holder.Reset(20, 20));
+
+ // Get a BlockParameters object. This should be the same as bp1 since the
+ // holder was Reset to the same size.
+ BlockParameters* const bp2 = holder.Get(10, 10, kBlock32x32);
+ EXPECT_EQ(bp2, bp1);
+
+ // Reset the holder to a smaller size.
+ ASSERT_TRUE(holder.Reset(20, 10));
+
+ // Get a BlockParameters object. This should be the same as bp1 since the
+ // holder was Reset to a smaller size.
+ BlockParameters* const bp3 = holder.Get(0, 0, kBlock32x32);
+ EXPECT_EQ(bp3, bp1);
+
+ // Reset the holder to a larger size.
+ ASSERT_TRUE(holder.Reset(30, 30));
+
+ // Get a BlockParameters object. This may or may not be the same as bp1 since
+ // the holder was Reset to a larger size.
+ BlockParameters* const bp4 = holder.Get(0, 0, kBlock32x32);
+ EXPECT_NE(bp4, nullptr);
+}
+
+} // namespace
+} // namespace libgav1
diff --git a/src/utils/blocking_counter_test.cc b/src/utils/blocking_counter_test.cc
new file mode 100644
index 0000000..1b6e7f5
--- /dev/null
+++ b/src/utils/blocking_counter_test.cc
@@ -0,0 +1,127 @@
+// Copyright 2021 The libgav1 Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "src/utils/blocking_counter.h"
+
+#include <array>
+#include <memory>
+
+#include "absl/time/clock.h"
+#include "absl/time/time.h"
+#include "gtest/gtest.h"
+#include "src/utils/threadpool.h"
+
+namespace libgav1 {
+namespace {
+
+constexpr int kNumWorkers = 10;
+constexpr int kNumJobs = 20;
+
+TEST(BlockingCounterTest, BasicFunctionality) {
+ std::unique_ptr<ThreadPool> pool = ThreadPool::Create(kNumWorkers);
+ BlockingCounter counter(kNumJobs);
+ std::array<bool, kNumJobs> done = {};
+
+ // Schedule the jobs.
+ for (int i = 0; i < kNumJobs; ++i) {
+ pool->Schedule([&counter, &done, i]() {
+ absl::SleepFor(absl::Seconds(1));
+ done[i] = true;
+ counter.Decrement();
+ });
+ }
+
+ // Wait for the jobs to complete. This should always return true.
+ ASSERT_TRUE(counter.Wait());
+
+ // Make sure the jobs were actually complete.
+ for (const auto& job_done : done) {
+ EXPECT_TRUE(job_done);
+ }
+}
+
+TEST(BlockingCounterTest, IncrementBy) {
+ std::unique_ptr<ThreadPool> pool = ThreadPool::Create(kNumWorkers);
+ BlockingCounter counter(0);
+ std::array<bool, kNumJobs> done = {};
+
+ // Schedule the jobs.
+ for (int i = 0; i < kNumJobs; ++i) {
+ counter.IncrementBy(1);
+ pool->Schedule([&counter, &done, i]() {
+ absl::SleepFor(absl::Seconds(1));
+ done[i] = true;
+ counter.Decrement();
+ });
+ }
+
+ // Wait for the jobs to complete. This should always return true.
+ ASSERT_TRUE(counter.Wait());
+
+ // Make sure the jobs were actually complete.
+ for (const auto& job_done : done) {
+ EXPECT_TRUE(job_done);
+ }
+}
+
+TEST(BlockingCounterWithStatusTest, BasicFunctionality) {
+ std::unique_ptr<ThreadPool> pool = ThreadPool::Create(kNumWorkers);
+ BlockingCounterWithStatus counter(kNumJobs);
+ std::array<bool, kNumJobs> done = {};
+
+ // Schedule the jobs.
+ for (int i = 0; i < kNumJobs; ++i) {
+ pool->Schedule([&counter, &done, i]() {
+ absl::SleepFor(absl::Seconds(1));
+ done[i] = true;
+ counter.Decrement(true);
+ });
+ }
+
+ // Wait for the jobs to complete. This should return true since all the jobs
+ // reported |job_succeeded| as true.
+ ASSERT_TRUE(counter.Wait());
+
+ // Make sure the jobs were actually complete.
+ for (const auto& job_done : done) {
+ EXPECT_TRUE(job_done);
+ }
+}
+
+TEST(BlockingCounterWithStatusTest, BasicFunctionalityWithStatus) {
+ std::unique_ptr<ThreadPool> pool = ThreadPool::Create(kNumWorkers);
+ BlockingCounterWithStatus counter(kNumJobs);
+ std::array<bool, kNumJobs> done = {};
+
+ // Schedule the jobs.
+ for (int i = 0; i < kNumJobs; ++i) {
+ pool->Schedule([&counter, &done, i]() {
+ absl::SleepFor(absl::Seconds(1));
+ done[i] = true;
+ counter.Decrement(i != 10);
+ });
+ }
+
+ // Wait for the jobs to complete. This should return false since one of the
+ // jobs reported |job_succeeded| as false.
+ ASSERT_FALSE(counter.Wait());
+
+ // Make sure the jobs were actually complete.
+ for (const auto& job_done : done) {
+ EXPECT_TRUE(job_done);
+ }
+}
+
+} // namespace
+} // namespace libgav1
diff --git a/src/utils/common.h b/src/utils/common.h
index 2e599f0..f75ace8 100644
--- a/src/utils/common.h
+++ b/src/utils/common.h
@@ -21,15 +21,17 @@
#include <intrin.h>
#pragma intrinsic(_BitScanForward)
#pragma intrinsic(_BitScanReverse)
-#if defined(_M_X64) || defined(_M_ARM) || defined(_M_ARM64)
+#if defined(_M_X64) || defined(_M_ARM64)
#pragma intrinsic(_BitScanReverse64)
#define HAVE_BITSCANREVERSE64
-#endif // defined(_M_X64) || defined(_M_ARM) || defined(_M_ARM64)
+#endif // defined(_M_X64) || defined(_M_ARM64)
#endif // defined(_MSC_VER)
+#include <algorithm>
#include <cassert>
#include <cstddef>
#include <cstdint>
+#include <cstdlib>
#include <cstring>
#include <type_traits>
@@ -40,6 +42,26 @@
namespace libgav1 {
+// LIBGAV1_RESTRICT
+// Declares a pointer with the restrict type qualifier if available.
+// This allows code to hint to the compiler that only this pointer references a
+// particular object or memory region within the scope of the block in which it
+// is declared. This may allow for improved optimizations due to the lack of
+// pointer aliasing. See also:
+// https://en.cppreference.com/w/c/language/restrict
+// Note a template alias is not used for compatibility with older compilers
+// (e.g., gcc < 10) that do not expand the type when instantiating a template
+// function, either explicitly or in an assignment to a function pointer as is
+// done within the dsp code. RestrictPtr<T>::type is an alternative to this,
+// similar to std::add_const, but for conciseness the macro is preferred.
+#ifdef __GNUC__
+#define LIBGAV1_RESTRICT __restrict__
+#elif defined(_MSC_VER)
+#define LIBGAV1_RESTRICT __restrict
+#else
+#define LIBGAV1_RESTRICT
+#endif
+
// Aligns |value| to the desired |alignment|. |alignment| must be a power of 2.
template <typename T>
inline T Align(T value, T alignment) {
diff --git a/src/utils/common_test.cc b/src/utils/common_test.cc
new file mode 100644
index 0000000..fdb218d
--- /dev/null
+++ b/src/utils/common_test.cc
@@ -0,0 +1,604 @@
+// Copyright 2021 The libgav1 Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "src/utils/common.h"
+
+#include <cstddef>
+#include <cstdint>
+#include <memory>
+#include <string>
+
+#include "absl/base/macros.h"
+#include "gtest/gtest.h"
+#include "src/utils/constants.h"
+#include "src/utils/memory.h"
+#include "src/utils/types.h"
+
+namespace libgav1 {
+namespace {
+
+int BitLength(int64_t n) {
+ int count = 0;
+ while (n != 0) {
+ ++count;
+ n >>= 1;
+ }
+ return count;
+}
+
+TEST(CommonUtilsTest, Align) {
+ for (int i = 0; i <= 8; ++i) {
+ const int alignment = 1 << i;
+ SCOPED_TRACE("alignment: " + std::to_string(alignment));
+ EXPECT_EQ(Align(0, alignment), 0);
+ EXPECT_EQ(Align(1, alignment), alignment);
+ EXPECT_EQ(Align(alignment + 1, alignment), 2 * alignment);
+ if (i > 1) {
+ EXPECT_EQ(Align(alignment - 1, alignment), alignment);
+ EXPECT_EQ(Align(2 * alignment - 1, alignment), 2 * alignment);
+ }
+ }
+}
+
+TEST(CommonUtilsTest, AlignAddr) {
+ auto buf = MakeAlignedUniquePtr<uint8_t>(/*alignment=*/1024, 512);
+ ASSERT_NE(buf, nullptr);
+ auto* const bufptr = buf.get();
+ ASSERT_EQ(reinterpret_cast<uintptr_t>(bufptr) % 1024, 0);
+
+ for (int i = 0; i <= 8; ++i) {
+ const int alignment = 1 << i;
+ ASSERT_LE(alignment, 1024);
+ SCOPED_TRACE("alignment: " + std::to_string(alignment));
+ EXPECT_EQ(AlignAddr(nullptr, alignment), nullptr);
+ EXPECT_EQ(AlignAddr(bufptr, alignment), bufptr);
+ EXPECT_EQ(AlignAddr(bufptr + 1, alignment), bufptr + alignment);
+ EXPECT_EQ(AlignAddr(bufptr + alignment + 1, alignment),
+ bufptr + 2 * alignment);
+ if (i > 1) {
+ EXPECT_EQ(AlignAddr(bufptr + alignment - 1, alignment),
+ bufptr + alignment);
+ EXPECT_EQ(AlignAddr(bufptr + 2 * alignment - 1, alignment),
+ bufptr + 2 * alignment);
+ }
+ }
+}
+
+TEST(CommonUtilsTest, Clip3) {
+ // Value <= lower boundary.
+ EXPECT_EQ(Clip3(10, 20, 30), 20);
+ EXPECT_EQ(Clip3(20, 20, 30), 20);
+ // Value >= higher boundary.
+ EXPECT_EQ(Clip3(40, 20, 30), 30);
+ EXPECT_EQ(Clip3(30, 20, 30), 30);
+ // Value within boundary.
+ EXPECT_EQ(Clip3(25, 20, 30), 25);
+ // Clipping based on bitdepth (clamp between 0 and 2^bitdepth - 1). Make sure
+ // that the resulting values are always in the pixel range for the
+ // corresponding bitdepth.
+ static constexpr int bitdepths[] = {8, 10, 12};
+ static constexpr int pixels[] = {100, 500, 5000, -100, -500, -5000};
+ for (const auto& bitdepth : bitdepths) {
+ for (const auto& pixel : pixels) {
+ const int clipped_pixel = Clip3(pixel, 0, (1 << bitdepth) - 1);
+ EXPECT_GE(clipped_pixel, 0)
+ << "Clip3 mismatch for bitdepth: " << bitdepth << " pixel: " << pixel;
+ EXPECT_LE(clipped_pixel, (1 << bitdepth) - 1)
+ << "Clip3 mismatch for bitdepth: " << bitdepth << " pixel: " << pixel;
+ }
+ }
+}
+
+template <typename Pixel>
+void TestExtendLine(int width, const int left, int right, Pixel left_value,
+ Pixel right_value) {
+ constexpr int size = 1000;
+ ASSERT_LE(width + left + right, size);
+ Pixel line[size];
+ Pixel* line_start = line + left;
+ line_start[0] = left_value;
+ line_start[width - 1] = right_value;
+ ExtendLine<Pixel>(line_start, width, left, right);
+ for (int x = 0; x < left; x++) {
+ EXPECT_EQ(left_value, line[x]) << "Left side mismatch at x: " << x;
+ }
+ for (int x = 0; x < right; x++) {
+ EXPECT_EQ(right_value, line[left + width + x])
+ << "Right side mismatch at x: " << x;
+ }
+}
+
+TEST(CommonUtilsTest, ExtendLine) {
+ TestExtendLine<uint8_t>(300, 0, 0, 31, 13);
+ TestExtendLine<uint8_t>(100, 10, 20, 31, 13);
+ TestExtendLine<uint8_t>(257, 31, 77, 59, 255);
+ TestExtendLine<uint16_t>(600, 0, 0, 1234, 4321);
+ TestExtendLine<uint16_t>(200, 55, 88, 12345, 54321);
+ TestExtendLine<uint16_t>(2, 99, 333, 257, 513);
+}
+
+template <typename T>
+void TestMemSetBlock(int rows, int columns, ptrdiff_t stride, T value) {
+ constexpr int size = 1000;
+ T block[size];
+ static_assert(sizeof(T) == 1, "");
+ ASSERT_LE(rows * stride, size);
+ ASSERT_LE(columns, stride);
+ MemSetBlock<T>(rows, columns, value, block, stride);
+ for (int y = 0; y < rows; y++) {
+ for (int x = 0; x < columns; x++) {
+ EXPECT_EQ(value, block[y * stride + x])
+ << "Mismatch at y: " << y << " x: " << x;
+ }
+ }
+}
+
+TEST(CommonUtilsTest, MemSetBlock) {
+ TestMemSetBlock<bool>(15, 28, 29, true);
+ TestMemSetBlock<bool>(17, 1, 24, false);
+ TestMemSetBlock<bool>(7, 2, 13, true);
+ TestMemSetBlock<int8_t>(35, 17, 19, 123);
+ TestMemSetBlock<uint8_t>(19, 16, 16, 234);
+}
+
+template <typename T>
+void TestSetBlock(int rows, int columns, ptrdiff_t stride, T value) {
+ constexpr int size = 1000;
+ T block[size];
+ ASSERT_LE(rows * stride, size);
+ ASSERT_LE(columns, stride);
+ SetBlock<T>(rows, columns, value, block, stride);
+ for (int y = 0; y < rows; y++) {
+ for (int x = 0; x < columns; x++) {
+ EXPECT_EQ(value, block[y * stride + x])
+ << "Mismatch at y: " << y << " x: " << x;
+ }
+ }
+}
+
+TEST(CommonUtilsTest, SetBlock) {
+ // Test 1-byte block set.
+ TestSetBlock<bool>(15, 28, 29, true);
+ TestSetBlock<bool>(17, 1, 24, false);
+ TestSetBlock<bool>(7, 2, 13, true);
+ TestSetBlock<int8_t>(35, 17, 19, 123);
+ TestSetBlock<uint8_t>(19, 16, 16, 234);
+ // Test 2-byte block set.
+ TestSetBlock<int16_t>(23, 27, 28, 1234);
+ TestSetBlock<uint16_t>(13, 39, 44, 4321);
+ // Test 4-byte block set.
+ TestSetBlock<int>(14, 7, 7, 12345);
+ TestSetBlock<int>(33, 4, 15, 54321);
+ // Test pointer block set.
+ int data;
+ TestSetBlock<int*>(23, 8, 25, &data);
+}
+
+TEST(CommonUtilsTest, CountTrailingZeros) {
+ EXPECT_EQ(CountTrailingZeros(0x1), 0);
+ EXPECT_EQ(CountTrailingZeros(0x3), 0);
+ EXPECT_EQ(CountTrailingZeros(0x7), 0);
+ EXPECT_EQ(CountTrailingZeros(0xF), 0);
+ EXPECT_EQ(CountTrailingZeros(0x2), 1);
+ EXPECT_EQ(CountTrailingZeros(0x6), 1);
+ EXPECT_EQ(CountTrailingZeros(0xE), 1);
+ EXPECT_EQ(CountTrailingZeros(0x4), 2);
+ EXPECT_EQ(CountTrailingZeros(0xC), 2);
+ EXPECT_EQ(CountTrailingZeros(0x8), 3);
+ EXPECT_EQ(CountTrailingZeros(0x10), 4);
+ EXPECT_EQ(CountTrailingZeros(0x30), 4);
+ EXPECT_EQ(CountTrailingZeros(0x70), 4);
+ EXPECT_EQ(CountTrailingZeros(0xF0), 4);
+ EXPECT_EQ(CountTrailingZeros(0x20), 5);
+ EXPECT_EQ(CountTrailingZeros(0x60), 5);
+ EXPECT_EQ(CountTrailingZeros(0xE0), 5);
+ EXPECT_EQ(CountTrailingZeros(0x40), 6);
+ EXPECT_EQ(CountTrailingZeros(0xC0), 6);
+ EXPECT_EQ(CountTrailingZeros(0x80), 7);
+ EXPECT_EQ(CountTrailingZeros(0x31), 0);
+ EXPECT_EQ(CountTrailingZeros(0x32), 1);
+ EXPECT_EQ(CountTrailingZeros(0x34), 2);
+ EXPECT_EQ(CountTrailingZeros(0x38), 3);
+ EXPECT_EQ(CountTrailingZeros(0x310), 4);
+ EXPECT_EQ(CountTrailingZeros(0x320), 5);
+ EXPECT_EQ(CountTrailingZeros(0x340), 6);
+ EXPECT_EQ(CountTrailingZeros(0x380), 7);
+}
+
+TEST(CommonUtilsTest, FloorLog2) {
+ // Powers of 2.
+ EXPECT_EQ(FloorLog2(1), 0);
+ EXPECT_EQ(FloorLog2(2), 1);
+ EXPECT_EQ(FloorLog2(8), 3);
+ EXPECT_EQ(FloorLog2(64), 6);
+ // Powers of 2 +/- 1.
+ EXPECT_EQ(FloorLog2(9), 3);
+ EXPECT_EQ(FloorLog2(15), 3);
+ EXPECT_EQ(FloorLog2(63), 5);
+ // Large value, smaller than 32 bit.
+ EXPECT_EQ(FloorLog2(0x7fffffff), 30);
+ EXPECT_EQ(FloorLog2(0x80000000), 31);
+ // Larger than 32 bit.
+ EXPECT_EQ(FloorLog2(uint64_t{0x7fffffffffffffff}), 62);
+ EXPECT_EQ(FloorLog2(uint64_t{0x8000000000000000}), 63);
+ EXPECT_EQ(FloorLog2(uint64_t{0xffffffffffffffff}), 63);
+}
+
+TEST(CommonUtilsTest, CeilLog2) {
+ // Even though log2(0) is -inf, here we explicitly define it to be 0.
+ EXPECT_EQ(CeilLog2(0), 0);
+ // Powers of 2.
+ EXPECT_EQ(CeilLog2(1), 0);
+ EXPECT_EQ(CeilLog2(2), 1);
+ EXPECT_EQ(CeilLog2(8), 3);
+ EXPECT_EQ(CeilLog2(64), 6);
+ // Powers of 2 +/- 1.
+ EXPECT_EQ(CeilLog2(9), 4);
+ EXPECT_EQ(CeilLog2(15), 4);
+ EXPECT_EQ(CeilLog2(63), 6);
+ // Large value.
+ EXPECT_EQ(CeilLog2(0x7fffffff), 31);
+}
+
+TEST(CommonUtilsTest, RightShiftWithCeiling) {
+ // Shift 1 bit.
+ EXPECT_EQ(RightShiftWithCeiling(1, 1), 1);
+ EXPECT_EQ(RightShiftWithCeiling(2, 1), 1);
+ EXPECT_EQ(RightShiftWithCeiling(3, 1), 2);
+ EXPECT_EQ(RightShiftWithCeiling(4, 1), 2);
+ EXPECT_EQ(RightShiftWithCeiling(5, 1), 3);
+ // Shift 2 bits.
+ EXPECT_EQ(RightShiftWithCeiling(1, 2), 1);
+ EXPECT_EQ(RightShiftWithCeiling(2, 2), 1);
+ EXPECT_EQ(RightShiftWithCeiling(3, 2), 1);
+ EXPECT_EQ(RightShiftWithCeiling(4, 2), 1);
+ EXPECT_EQ(RightShiftWithCeiling(5, 2), 2);
+ // Shift 20 bits.
+ EXPECT_EQ(RightShiftWithCeiling(1, 20), 1);
+ EXPECT_EQ(RightShiftWithCeiling((1 << 20) - 1, 20), 1);
+ EXPECT_EQ(RightShiftWithCeiling(1 << 20, 20), 1);
+ EXPECT_EQ(RightShiftWithCeiling((1 << 20) + 1, 20), 2);
+ EXPECT_EQ(RightShiftWithCeiling((1 << 21) - 1, 20), 2);
+}
+
+template <typename Input, typename Output>
+void VerifyRightShiftWithRounding(const Input* const values,
+ const int* const bits,
+ const Output* const rounded_values,
+ size_t count) {
+ for (size_t i = 0; i < count; ++i) {
+ const Output rounded_value = RightShiftWithRounding(values[i], bits[i]);
+ EXPECT_EQ(rounded_value, rounded_values[i]) << "Mismatch at index " << i;
+ // Rounding reduces the bit length by |bits[i]| - 1.
+ EXPECT_LE(BitLength(rounded_value), BitLength(values[i]) - (bits[i] - 1))
+ << "Mismatch at index " << i;
+ }
+}
+
+TEST(CommonUtilTest, RightShiftWithRoundingInt32) {
+ static constexpr int32_t values[] = {5, 203, 204, 255, 40000, 50000};
+ static constexpr int bits[] = {0, 3, 3, 3, 12, 12};
+ static constexpr int32_t rounded_values[] = {5, 25, 26, 32, 10, 12};
+ static_assert(ABSL_ARRAYSIZE(values) == ABSL_ARRAYSIZE(bits), "");
+ static_assert(ABSL_ARRAYSIZE(values) == ABSL_ARRAYSIZE(rounded_values), "");
+ VerifyRightShiftWithRounding<int32_t, int32_t>(values, bits, rounded_values,
+ ABSL_ARRAYSIZE(values));
+}
+
+TEST(CommonUtilTest, RightShiftWithRoundingUint32) {
+ static constexpr uint32_t values[] = {5, 203, 204, 255,
+ 40000, 50000, 0x7fffffff};
+ static constexpr int bits[] = {0, 3, 3, 3, 12, 12, 20};
+ static constexpr uint32_t rounded_values[] = {5, 25, 26, 32, 10, 12, 2048};
+ static_assert(ABSL_ARRAYSIZE(values) == ABSL_ARRAYSIZE(bits), "");
+ static_assert(ABSL_ARRAYSIZE(values) == ABSL_ARRAYSIZE(rounded_values), "");
+ VerifyRightShiftWithRounding<uint32_t, uint32_t>(values, bits, rounded_values,
+ ABSL_ARRAYSIZE(values));
+}
+
+TEST(CommonUtilTest, RightShiftWithRoundingInt64) {
+ static constexpr int64_t values[] = {5, 203, 204, 255,
+ 40000, 50000, 0x7fffffff, 0x8fffffff};
+ static constexpr int bits[] = {0, 3, 3, 3, 12, 12, 20, 20};
+ static constexpr int32_t rounded_values[] = {5, 25, 26, 32,
+ 10, 12, 2048, 2304};
+ static_assert(ABSL_ARRAYSIZE(values) == ABSL_ARRAYSIZE(bits), "");
+ static_assert(ABSL_ARRAYSIZE(values) == ABSL_ARRAYSIZE(rounded_values), "");
+ VerifyRightShiftWithRounding<int64_t, int32_t>(values, bits, rounded_values,
+ ABSL_ARRAYSIZE(values));
+}
+
+template <typename Input>
+void VerifyRightShiftWithRoundingSigned(const Input* const values,
+ const int* const bits,
+ const int32_t* const rounded_values,
+ int count) {
+ for (int i = 0; i < count; ++i) {
+ int32_t rounded_value = RightShiftWithRoundingSigned(values[i], bits[i]);
+ EXPECT_EQ(rounded_value, rounded_values[i]) << "Mismatch at index " << i;
+ rounded_value = RightShiftWithRoundingSigned(-values[i], bits[i]);
+ EXPECT_EQ(rounded_value, -rounded_values[i]) << "Mismatch at index " << i;
+ }
+}
+
+TEST(CommonUtilTest, RightShiftWithRoundingSignedInt32) {
+ static constexpr int32_t values[] = {203, 204, 255, 40000, 50000};
+ static constexpr int bits[] = {3, 3, 3, 12, 12};
+ static constexpr int32_t rounded_values[] = {25, 26, 32, 10, 12};
+ static_assert(ABSL_ARRAYSIZE(values) == ABSL_ARRAYSIZE(bits), "");
+ static_assert(ABSL_ARRAYSIZE(values) == ABSL_ARRAYSIZE(rounded_values), "");
+ VerifyRightShiftWithRoundingSigned<int32_t>(values, bits, rounded_values,
+ ABSL_ARRAYSIZE(values));
+}
+
+TEST(CommonUtilTest, RightShiftWithRoundingSignedInt64) {
+ static constexpr int64_t values[] = {203, 204, 255, 40000,
+ 50000, 0x7fffffff, 0x8fffffff};
+ static constexpr int bits[] = {3, 3, 3, 12, 12, 20, 20};
+ static constexpr int32_t rounded_values[] = {25, 26, 32, 10, 12, 2048, 2304};
+ static_assert(ABSL_ARRAYSIZE(values) == ABSL_ARRAYSIZE(bits), "");
+ static_assert(ABSL_ARRAYSIZE(values) == ABSL_ARRAYSIZE(rounded_values), "");
+ VerifyRightShiftWithRoundingSigned<int64_t>(values, bits, rounded_values,
+ ABSL_ARRAYSIZE(values));
+}
+
+TEST(CommonUtilTest, GetResidualBufferSize) {
+ // No subsampling.
+ EXPECT_EQ(GetResidualBufferSize(64, 64, 0, 0, 2),
+ /* 2*(64*64*3/1 + 32*4) = */ 24832);
+ // Only X is subsampled.
+ EXPECT_EQ(GetResidualBufferSize(64, 64, 1, 0, 2),
+ /* 2*(64*64*2/1 + 32*4) = */ 16640);
+ // Only Y is subsampled.
+ EXPECT_EQ(GetResidualBufferSize(64, 64, 0, 1, 2),
+ /* 2*(64*64*2/1 + 32*4) = */ 16640);
+ // Both X and Y are subsampled.
+ EXPECT_EQ(GetResidualBufferSize(64, 64, 1, 1, 2),
+ /* 2*(64*64*3/2 + 32*4) = */ 12544);
+}
+
+//------------------------------------------------------------------------------
+// Tests for bitstream util functions
+
+TEST(BitstreamUtilTest, IsIntraFrame) {
+ EXPECT_TRUE(IsIntraFrame(kFrameKey));
+ EXPECT_TRUE(IsIntraFrame(kFrameIntraOnly));
+ EXPECT_FALSE(IsIntraFrame(kFrameInter));
+ EXPECT_FALSE(IsIntraFrame(kFrameSwitch));
+}
+
+TEST(BitstreamUtilTest, GetTransformClass) {
+ static constexpr TransformClass expected_classes[kNumTransformTypes] = {
+ kTransformClass2D, kTransformClass2D,
+ kTransformClass2D, kTransformClass2D,
+ kTransformClass2D, kTransformClass2D,
+ kTransformClass2D, kTransformClass2D,
+ kTransformClass2D, kTransformClass2D,
+ kTransformClassVertical, kTransformClassHorizontal,
+ kTransformClassVertical, kTransformClassHorizontal,
+ kTransformClassVertical, kTransformClassHorizontal,
+ };
+ for (int i = 0; i < kNumTransformTypes; ++i) {
+ EXPECT_EQ(GetTransformClass(static_cast<TransformType>(i)),
+ expected_classes[i])
+ << "Mismatch at index " << i;
+ }
+}
+
+TEST(BitstreamUtilTest, RowOrColumn4x4ToPixel) {
+ EXPECT_EQ(RowOrColumn4x4ToPixel(10, kPlaneY, 0), 40);
+ EXPECT_EQ(RowOrColumn4x4ToPixel(10, kPlaneY, 1),
+ 40); // Subsampling should have no effect on Y plane.
+ EXPECT_EQ(RowOrColumn4x4ToPixel(10, kPlaneU, 0), 40);
+ EXPECT_EQ(RowOrColumn4x4ToPixel(10, kPlaneU, 1), 20);
+ EXPECT_EQ(RowOrColumn4x4ToPixel(10, kPlaneV, 0), 40);
+ EXPECT_EQ(RowOrColumn4x4ToPixel(10, kPlaneV, 1), 20);
+}
+
+TEST(BitstreamUtilTest, GetPlaneType) {
+ EXPECT_EQ(GetPlaneType(kPlaneY), kPlaneTypeY);
+ EXPECT_EQ(GetPlaneType(kPlaneU), kPlaneTypeUV);
+ EXPECT_EQ(GetPlaneType(kPlaneV), kPlaneTypeUV);
+}
+
+TEST(BitstreamUtils, IsDirectionalMode) {
+ static constexpr bool is_directional_modes[kNumPredictionModes] = {
+ false, true, true, true, true, true, true, true, true,
+ false, false, false, false, false, false, false, false, false,
+ false, false, false, false, false, false, false, false,
+ };
+ for (int i = 0; i < kNumPredictionModes; ++i) {
+ EXPECT_EQ(IsDirectionalMode(static_cast<PredictionMode>(i)),
+ is_directional_modes[i])
+ << "Mismatch at index " << i;
+ }
+}
+
+TEST(BitstreamUtils, GetRelativeDistance) {
+ // Both order_hint_bits and order_hint_shift_bits are zero. (a and b must be
+ // zero.)
+ EXPECT_EQ(GetRelativeDistance(0, 0, 0), 0);
+ EXPECT_EQ(GetRelativeDistance(10, 20, 27), -10);
+
+ EXPECT_EQ(GetRelativeDistance(2, 1, 30), 1);
+ EXPECT_EQ(GetRelativeDistance(2, 1, 29), 1);
+
+ EXPECT_EQ(GetRelativeDistance(1, 2, 30), -1);
+ EXPECT_EQ(GetRelativeDistance(1, 2, 29), -1);
+
+ // With an order_hint_bits of 4 and an order_hint_shift_bits of 28, 16 is the
+ // same as 0, 17 is the same as 1, etc. The most positive distance is 7, and
+ // the most negative distance is -8.
+
+ EXPECT_EQ(GetRelativeDistance(2, 6, 28), -4);
+ EXPECT_EQ(GetRelativeDistance(6, 2, 28), 4);
+ // 18 - 14 = 4.
+ EXPECT_EQ(GetRelativeDistance(2, 14, 28), 4);
+ // 14 - 18 = -4.
+ EXPECT_EQ(GetRelativeDistance(14, 2, 28), -4);
+ // If a and b are exactly 8 apart, GetRelativeDistance() cannot tell whether
+ // a is before or after b. GetRelativeDistance(a, b) and
+ // GetRelativeDistance(b, a) are both -8.
+ // 1 - 9 = -8.
+ EXPECT_EQ(GetRelativeDistance(1, 9, 28), -8);
+ // 9 - 17 = -8.
+ EXPECT_EQ(GetRelativeDistance(9, 1, 28), -8);
+
+ // With an order_hint_bits of 5 and an order_hint_shift_bits of 27, 32 is the
+ // same as 0, 33 is the same as 1, etc. The most positive distance is 15, and
+ // the most negative distance is -16.
+
+ // 31 - 32 = -1.
+ EXPECT_EQ(GetRelativeDistance(31, 0, 27), -1);
+ // 32 - 31 = 1.
+ EXPECT_EQ(GetRelativeDistance(0, 31, 27), 1);
+ // 30 - 33 = -3.
+ EXPECT_EQ(GetRelativeDistance(30, 1, 27), -3);
+ // 33 - 30 = 3.
+ EXPECT_EQ(GetRelativeDistance(1, 30, 27), 3);
+ // 25 - 36 = -11.
+ EXPECT_EQ(GetRelativeDistance(25, 4, 27), -11);
+ // 36 - 25 = 11.
+ EXPECT_EQ(GetRelativeDistance(4, 25, 27), 11);
+ // 15 - 0 = 15.
+ EXPECT_EQ(GetRelativeDistance(15, 0, 27), 15);
+ // If a and b are exactly 16 apart, GetRelativeDistance() cannot tell whether
+ // a is before or after b. GetRelativeDistance(a, b) and
+ // GetRelativeDistance(b, a) are both -16.
+ // 16 - 32 = -16.
+ EXPECT_EQ(GetRelativeDistance(16, 0, 27), -16);
+ // 0 - 16 = -16.
+ EXPECT_EQ(GetRelativeDistance(0, 16, 27), -16);
+}
+
+TEST(BitstreamUtils, ApplySign) {
+ // ApplyPositive(0) = 0
+ EXPECT_EQ(ApplySign(0, 0), 0);
+ // ApplyNegative(0) = 0
+ EXPECT_EQ(ApplySign(0, -1), 0);
+
+ // ApplyPositive(1) = 1
+ EXPECT_EQ(ApplySign(1, 0), 1);
+ // ApplyNegative(1) = -1
+ EXPECT_EQ(ApplySign(1, -1), -1);
+
+ // ApplyPositive(-1) = -1
+ EXPECT_EQ(ApplySign(-1, 0), -1);
+ // ApplyNegative(-1) = 1
+ EXPECT_EQ(ApplySign(-1, -1), 1);
+
+ // ApplyPositive(1234) = 1234
+ EXPECT_EQ(ApplySign(1234, 0), 1234);
+ // ApplyNegative(1234) = -1234
+ EXPECT_EQ(ApplySign(1234, -1), -1234);
+
+ // ApplyPositive(-1234) = -1234
+ EXPECT_EQ(ApplySign(-1234, 0), -1234);
+ // ApplyNegative(-1234) = 1234
+ EXPECT_EQ(ApplySign(-1234, -1), 1234);
+}
+
+// 7.9.3. (without the clamp for numerator and denominator).
+int SpecGetMvProjectionKernel(int mv, int numerator, int denominator) {
+ int value = mv * numerator * kProjectionMvDivisionLookup[denominator];
+ if (value >= 0) {
+ value += 1 << 13;
+ value >>= 14;
+ } else {
+ value = -value;
+ value += 1 << 13;
+ value >>= 14;
+ value = -value;
+ }
+ if (value < (-(1 << 14) + 1)) value = -(1 << 14) + 1;
+ if (value > (1 << 14) - 1) value = (1 << 14) - 1;
+ return value;
+}
+
+void SpecGetMvProjectionNoClamp(const MotionVector& mv, int numerator,
+ int denominator, MotionVector* projection_mv) {
+ for (int i = 0; i < 2; ++i) {
+ projection_mv->mv[i] =
+ SpecGetMvProjectionKernel(mv.mv[i], numerator, denominator);
+ }
+}
+
+TEST(BitstreamUtils, GetMvProjection) {
+ const int16_t mvs[5][2] = {
+ {0, 0}, {11, 73}, {-84, 272}, {733, -827}, {-472, -697}};
+ for (auto& mv_value : mvs) {
+ for (int numerator = -kMaxFrameDistance; numerator <= kMaxFrameDistance;
+ ++numerator) {
+ for (int denominator = 0; denominator <= kMaxFrameDistance;
+ ++denominator) {
+ MotionVector mv, projection_mv, spec_projection_mv;
+ mv.mv[0] = mv_value[0];
+ mv.mv[1] = mv_value[1];
+ GetMvProjection(mv, numerator, kProjectionMvDivisionLookup[denominator],
+ &projection_mv);
+ SpecGetMvProjectionNoClamp(mv, numerator, denominator,
+ &spec_projection_mv);
+ EXPECT_EQ(projection_mv.mv32, spec_projection_mv.mv32);
+ }
+ }
+ }
+}
+
+// 7.9.4.
+int SpecProject(int value, int delta, int dst_sign) {
+ constexpr int kMiSizeLog2 = 2;
+ const int sign = (dst_sign == 0) ? 1 : dst_sign;
+ int offset;
+ if (delta >= 0) {
+ offset = delta >> (3 + 1 + kMiSizeLog2);
+ } else {
+ offset = -((-delta) >> (3 + 1 + kMiSizeLog2));
+ }
+ return value + sign * offset;
+}
+
+TEST(BitstreamUtils, Project) {
+ for (int value = -10; value <= 10; ++value) {
+ for (int delta = -256; delta <= 256; ++delta) {
+ for (int dst_sign = -1; dst_sign <= 0; ++dst_sign) {
+ EXPECT_EQ(Project(value, delta, dst_sign),
+ SpecProject(value, delta, dst_sign));
+ }
+ }
+ }
+}
+
+TEST(BitstreamUtils, IsBlockSmallerThan8x8) {
+ static constexpr bool is_block_smaller_than8x8[kMaxBlockSizes] = {
+ true, true, false, true, false, false, false, false,
+ false, false, false, false, false, false, false, false,
+ false, false, false, false, false, false,
+ };
+ for (int i = 0; i < kMaxBlockSizes; ++i) {
+ EXPECT_EQ(IsBlockSmallerThan8x8(static_cast<BlockSize>(i)),
+ is_block_smaller_than8x8[i])
+ << "Mismatch at index " << i;
+ }
+}
+
+TEST(BitstreamUtils, TransformSizeToSquareTransformIndex) {
+ EXPECT_EQ(TransformSizeToSquareTransformIndex(kTransformSize4x4), 0);
+ EXPECT_EQ(TransformSizeToSquareTransformIndex(kTransformSize8x8), 1);
+ EXPECT_EQ(TransformSizeToSquareTransformIndex(kTransformSize16x16), 2);
+ EXPECT_EQ(TransformSizeToSquareTransformIndex(kTransformSize32x32), 3);
+ EXPECT_EQ(TransformSizeToSquareTransformIndex(kTransformSize64x64), 4);
+}
+
+} // namespace
+} // namespace libgav1
diff --git a/src/utils/compiler_attributes.h b/src/utils/compiler_attributes.h
index e122426..09f0035 100644
--- a/src/utils/compiler_attributes.h
+++ b/src/utils/compiler_attributes.h
@@ -165,7 +165,7 @@
// int p1_ LIBGAV1_GUARDED_BY(mu_);
// ...
// };
-// TODO(b/132506370): this can be reenabled after a local MutexLock
+// TODO(b/133245043): this can be reenabled after a local MutexLock
// implementation is added with proper thread annotations.
#if 0 // LIBGAV1_HAS_ATTRIBUTE(guarded_by)
#define LIBGAV1_GUARDED_BY(x) __attribute__((guarded_by(x)))
diff --git a/src/utils/constants.h b/src/utils/constants.h
index a2076c5..1126ad6 100644
--- a/src/utils/constants.h
+++ b/src/utils/constants.h
@@ -71,6 +71,7 @@ enum {
// but was increased to simplify the SIMD loads in
// ConvolveCompoundScale2D_NEON() and ConvolveScale2D_NEON().
kConvolveBorderRight = 8,
+ kConvolveScaleBorderRight = 15,
kConvolveBorderBottom = 4,
kSubPixelTaps = 8,
kWienerFilterBits = 7,
@@ -523,6 +524,10 @@ enum ObuType : int8_t {
kObuPadding = 15,
};
+constexpr BitMaskSet kPredictionModeSmoothMask(kPredictionModeSmooth,
+ kPredictionModeSmoothHorizontal,
+ kPredictionModeSmoothVertical);
+
//------------------------------------------------------------------------------
// ToString()
//
diff --git a/src/utils/cpu_test.cc b/src/utils/cpu_test.cc
new file mode 100644
index 0000000..3a01b33
--- /dev/null
+++ b/src/utils/cpu_test.cc
@@ -0,0 +1,248 @@
+// Copyright 2021 The libgav1 Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "src/utils/cpu.h"
+
+#if defined(__linux__)
+#include <unistd.h>
+
+#include <cerrno>
+#include <climits>
+#include <cstdio>
+#include <cstdlib>
+#include <cstring>
+#endif // defined(__linux__)
+
+#include "gtest/gtest.h"
+#include "src/utils/logging.h"
+
+namespace libgav1 {
+namespace {
+
+#if defined(__linux__)
+
+// Sample code for getting the number of performance CPU cores. The following
+// sources were consulted:
+// * https://www.kernel.org/doc/html/latest/admin-guide/cputopology.html
+// * cpu-hotplug.txt: CPU hotplug Support in Linux(tm) Kernel
+// https://lwn.net/Articles/537570/
+// * https://www.kernel.org/doc/Documentation/ABI/testing/sysfs-devices-system-cpu
+// * Android bionic source code of get_nprocs():
+// libc/bionic/sysinfo.cpp
+// * glibc 2.30 source code of get_nprocs():
+// sysdeps/unix/sysv/linux/getsysstats.c
+//
+// Tested on:
+// * Asus Nexus 7 2013: Qualcomm Snapdragon 600, 32-bit Android 6.0.1
+// (Marshmallow). Brings cores online and offline dynamically. (The tablet
+// has 4 cores. "0", "0-1", "0-2", and "0-3" have all been observed in the
+// /sys/devices/system/cpu/online file.) This causes the number of cores
+// currently online to potentially be lower than the number of cores that can
+// be brought online quickly.
+// * General Mobile 4G: Qualcomm Snapdragon 410, 32-bit Android 7.1.1 (Nougat).
+// * Motorola Moto G5 Plus: Qualcomm Snapdragon 625, 32-bit Android 8.1.0
+// (Oreo).
+// * Motorola Moto G7 Play: Qualcomm Snapdragon 632, 32-bit Android 9 (Pie).
+// All 8 cores have the same cpuinfo_max_freq (1804800), but there are two
+// values of cpuinfo_min_freq: cores 0-3 have 614400 and cores 4-7 have
+// 633600. We would need to check cpuinfo_min_freq to differentiate the two
+// kinds of cores (Qualcomm Kryo 250 Gold and Qualcomm Kryo 250 Silver).
+// * Pixel 2 XL: Qualcomm Snapdragon 835, 64-bit Android 9 (Pie).
+// * Pixel 3: Qualcomm Snapdragon 845, 64-bit Android 9 (Pie).
+// * Pixel 3a: Qualcomm Snapdragon 670, 64-bit Android 9 (Pie).
+// * Samsung Galaxy S6: Samsung Exynos 7 Octa (7420), 64-bit Android 7.0
+// (Nougat).
+// * Samsung Galaxy S8+ (SM-G955FD): Samsung Exynos 8895, 64-bit Android 8.0.0.
+//
+// Note: The sample code needs to use the 'long' type because it is the return
+// type of the Standard C Library function strtol(). The ClangTidy warnings are
+// suppressed with NOLINT(google-runtime-int) comments.
+
+// Returns the number of online processor cores.
+int GetNumberOfProcessorsOnline() {
+ // See https://developer.android.com/ndk/guides/cpu-features.
+ long num_cpus = sysconf(_SC_NPROCESSORS_ONLN); // NOLINT(google-runtime-int)
+ if (num_cpus < 0) {
+ LIBGAV1_DLOG(ERROR, "sysconf(_SC_NPROCESSORS_ONLN) failed: %s.",
+ strerror(errno));
+ return 0;
+ }
+ // It is safe to cast num_cpus to int. sysconf(_SC_NPROCESSORS_ONLN) returns
+ // the return value of get_nprocs(), which is an int.
+ return static_cast<int>(num_cpus);
+}
+
+// These CPUs support heterogeneous multiprocessing.
+#if defined(__arm__) || defined(__aarch64__)
+
+// A helper function used by GetNumberOfPerformanceCoresOnline().
+//
+// Returns the cpuinfo_max_freq value (in kHz) of the given CPU. Returns 0 on
+// failure.
+long GetCpuinfoMaxFreq(int cpu_index) { // NOLINT(google-runtime-int)
+ char buffer[128];
+ const int rv = snprintf(
+ buffer, sizeof(buffer),
+ "/sys/devices/system/cpu/cpu%d/cpufreq/cpuinfo_max_freq", cpu_index);
+ if (rv < 0 || rv >= sizeof(buffer)) {
+ LIBGAV1_DLOG(ERROR, "snprintf failed, or |buffer| is too small.");
+ return 0;
+ }
+ FILE* file = fopen(buffer, "r");
+ if (file == nullptr) {
+ LIBGAV1_DLOG(ERROR, "fopen(\"%s\", \"r\") failed: %s.", buffer,
+ strerror(errno));
+ return 0;
+ }
+ char* const str = fgets(buffer, sizeof(buffer), file);
+ fclose(file);
+ if (str == nullptr) {
+ LIBGAV1_DLOG(ERROR, "fgets failed.");
+ return 0;
+ }
+ const long freq = strtol(str, nullptr, 10); // NOLINT(google-runtime-int)
+ if (freq <= 0 || freq == LONG_MAX) {
+ LIBGAV1_DLOG(ERROR,
+ "No conversion can be performed, or the converted value is "
+ "invalid: %ld.",
+ freq);
+ return 0;
+ }
+ return freq;
+}
+
+// Returns the number of performance CPU cores that are online. The number of
+// efficiency CPU cores is subtracted from the total number of CPU cores. Uses
+// cpuinfo_max_freq to determine whether a CPU is a performance core or an
+// efficiency core.
+//
+// This function is not perfect. For example, the Snapdragon 632 SoC used in
+// Motorola Moto G7 has performance and efficiency cores with the same
+// cpuinfo_max_freq but different cpuinfo_min_freq. This function fails to
+// differentiate the two kinds of cores and reports all the cores as
+// performance cores.
+int GetNumberOfPerformanceCoresOnline() {
+ // Get the online CPU list. Some examples of the online CPU list are:
+ // "0-7"
+ // "0"
+ // "0-1,2,3,4-7"
+ char online[512];
+ FILE* file = fopen("/sys/devices/system/cpu/online", "r");
+ if (file == nullptr) {
+ LIBGAV1_DLOG(ERROR,
+ "fopen(\"/sys/devices/system/cpu/online\", \"r\") failed: %s.",
+ strerror(errno));
+ return 0;
+ }
+ char* const str = fgets(online, sizeof(online), file);
+ fclose(file);
+ file = nullptr;
+ if (str == nullptr) {
+ LIBGAV1_DLOG(ERROR, "fgets failed.");
+ return 0;
+ }
+ LIBGAV1_DLOG(INFO, "The online CPU list is %s", online);
+
+ // Count the number of the slowest CPUs. Some SoCs such as Snapdragon 855
+ // have performance cores with different max frequencies, so only the slowest
+ // CPUs are efficiency cores. If we count the number of the fastest CPUs, we
+ // will fail to count the second fastest performance cores.
+ long slowest_cpu_freq = LONG_MAX; // NOLINT(google-runtime-int)
+ int num_slowest_cpus = 0;
+ int num_cpus = 0;
+ const char* cp = online;
+ int range_begin = -1;
+ while (true) {
+ char* str_end;
+ const int cpu = static_cast<int>(strtol(cp, &str_end, 10));
+ if (str_end == cp) {
+ break;
+ }
+ cp = str_end;
+ if (*cp == '-') {
+ range_begin = cpu;
+ } else {
+ if (range_begin == -1) {
+ range_begin = cpu;
+ }
+
+ num_cpus += cpu - range_begin + 1;
+ for (int i = range_begin; i <= cpu; ++i) {
+ const long freq = GetCpuinfoMaxFreq(i); // NOLINT(google-runtime-int)
+ if (freq <= 0) {
+ return 0;
+ }
+ LIBGAV1_DLOG(INFO, "cpu%d max frequency is %ld kHz.", i, freq);
+ if (freq < slowest_cpu_freq) {
+ slowest_cpu_freq = freq;
+ num_slowest_cpus = 0;
+ }
+ if (freq == slowest_cpu_freq) {
+ ++num_slowest_cpus;
+ }
+ }
+
+ range_begin = -1;
+ }
+ if (*cp == '\0') {
+ break;
+ }
+ ++cp;
+ }
+
+ LIBGAV1_DLOG(INFO, "There are %d CPU cores.", num_cpus);
+ LIBGAV1_DLOG(INFO,
+ "%d CPU cores are the slowest, with max frequency %ld kHz.",
+ num_slowest_cpus, slowest_cpu_freq);
+ // If there are faster CPU cores than the slowest CPU cores, exclude the
+ // slowest CPU cores.
+ if (num_slowest_cpus < num_cpus) {
+ num_cpus -= num_slowest_cpus;
+ }
+ return num_cpus;
+}
+
+#else
+
+// Assume symmetric multiprocessing.
+int GetNumberOfPerformanceCoresOnline() {
+ return GetNumberOfProcessorsOnline();
+}
+
+#endif
+
+#endif // defined(__linux__)
+
+/*
+ Run this test with logging enabled on an Android device:
+ 64-bit Android:
+ tests/run_android_test.sh --test cpu --enable_asserts
+ 32-bit Android:
+ tests/run_android_test.sh --test cpu --arch arm \
+ --enable_asserts
+*/
+TEST(CpuTest, GetNumberOfPerformanceCoresOnline) {
+#if defined(__linux__)
+ const int num_cpus = GetNumberOfProcessorsOnline();
+ ASSERT_NE(num_cpus, 0);
+ LIBGAV1_DLOG(INFO, "There are %d cores online.", num_cpus);
+ const int num_performance_cpus = GetNumberOfPerformanceCoresOnline();
+ ASSERT_NE(num_performance_cpus, 0);
+ LIBGAV1_DLOG(INFO, "There are %d performance cores online.",
+ num_performance_cpus);
+#endif // defined(__linux__)
+}
+
+} // namespace
+} // namespace libgav1
diff --git a/src/utils/dynamic_buffer.h b/src/utils/dynamic_buffer.h
index 40ece26..0694980 100644
--- a/src/utils/dynamic_buffer.h
+++ b/src/utils/dynamic_buffer.h
@@ -17,6 +17,7 @@
#ifndef LIBGAV1_SRC_UTILS_DYNAMIC_BUFFER_H_
#define LIBGAV1_SRC_UTILS_DYNAMIC_BUFFER_H_
+#include <cstddef>
#include <memory>
#include <new>
diff --git a/src/utils/entropy_decoder.cc b/src/utils/entropy_decoder.cc
index bf21199..3d97e69 100644
--- a/src/utils/entropy_decoder.cc
+++ b/src/utils/entropy_decoder.cc
@@ -60,7 +60,8 @@ uint32_t ScaleCdf(uint32_t values_in_range_shifted, const uint16_t* const cdf,
(kMinimumProbabilityPerSymbol * (symbol_count - index));
}
-void UpdateCdf(uint16_t* const cdf, const int symbol_count, const int symbol) {
+void UpdateCdf(uint16_t* LIBGAV1_RESTRICT const cdf, const int symbol_count,
+ const int symbol) {
const uint16_t count = cdf[symbol_count];
// rate is computed in the spec as:
// 3 + ( cdf[N] > 15 ) + ( cdf[N] > 31 ) + Min(FloorLog2(N), 2)
@@ -168,7 +169,7 @@ void UpdateCdf(uint16_t* const cdf, const int symbol_count, const int symbol) {
// the cdf array. Since an invalid CDF value is written into cdf[7], the
// count in cdf[7] needs to be fixed up after the vectorized code.
-void UpdateCdf5(uint16_t* const cdf, const int symbol) {
+void UpdateCdf5(uint16_t* LIBGAV1_RESTRICT const cdf, const int symbol) {
uint16x4_t cdf_vec = vld1_u16(cdf);
const uint16_t count = cdf[5];
const int rate = (count >> 4) + 5;
@@ -195,7 +196,7 @@ void UpdateCdf5(uint16_t* const cdf, const int symbol) {
// This version works for |symbol_count| = 7, 8, or 9.
// See UpdateCdf5 for implementation details.
template <int symbol_count>
-void UpdateCdf7To9(uint16_t* const cdf, const int symbol) {
+void UpdateCdf7To9(uint16_t* LIBGAV1_RESTRICT const cdf, const int symbol) {
static_assert(symbol_count >= 7 && symbol_count <= 9, "");
uint16x8_t cdf_vec = vld1q_u16(cdf);
const uint16_t count = cdf[symbol_count];
@@ -229,7 +230,7 @@ void UpdateCdf9(uint16_t* const cdf, const int symbol) {
}
// See UpdateCdf5 for implementation details.
-void UpdateCdf11(uint16_t* const cdf, const int symbol) {
+void UpdateCdf11(uint16_t* LIBGAV1_RESTRICT const cdf, const int symbol) {
uint16x8_t cdf_vec = vld1q_u16(cdf + 2);
const uint16_t count = cdf[11];
cdf[11] = count + static_cast<uint16_t>(count < 32);
@@ -266,7 +267,7 @@ void UpdateCdf11(uint16_t* const cdf, const int symbol) {
}
// See UpdateCdf5 for implementation details.
-void UpdateCdf13(uint16_t* const cdf, const int symbol) {
+void UpdateCdf13(uint16_t* LIBGAV1_RESTRICT const cdf, const int symbol) {
uint16x8_t cdf_vec0 = vld1q_u16(cdf);
uint16x8_t cdf_vec1 = vld1q_u16(cdf + 4);
const uint16_t count = cdf[13];
@@ -299,7 +300,7 @@ void UpdateCdf13(uint16_t* const cdf, const int symbol) {
}
// See UpdateCdf5 for implementation details.
-void UpdateCdf16(uint16_t* const cdf, const int symbol) {
+void UpdateCdf16(uint16_t* LIBGAV1_RESTRICT const cdf, const int symbol) {
uint16x8_t cdf_vec = vld1q_u16(cdf);
const uint16_t count = cdf[16];
const int rate = (count >> 4) + 5;
@@ -351,7 +352,7 @@ inline void StoreUnaligned16(void* a, const __m128i v) {
_mm_storeu_si128(static_cast<__m128i*>(a), v);
}
-void UpdateCdf5(uint16_t* const cdf, const int symbol) {
+void UpdateCdf5(uint16_t* LIBGAV1_RESTRICT const cdf, const int symbol) {
__m128i cdf_vec = LoadLo8(cdf);
const uint16_t count = cdf[5];
const int rate = (count >> 4) + 5;
@@ -379,7 +380,7 @@ void UpdateCdf5(uint16_t* const cdf, const int symbol) {
// This version works for |symbol_count| = 7, 8, or 9.
// See UpdateCdf5 for implementation details.
template <int symbol_count>
-void UpdateCdf7To9(uint16_t* const cdf, const int symbol) {
+void UpdateCdf7To9(uint16_t* LIBGAV1_RESTRICT const cdf, const int symbol) {
static_assert(symbol_count >= 7 && symbol_count <= 9, "");
__m128i cdf_vec = LoadUnaligned16(cdf);
const uint16_t count = cdf[symbol_count];
@@ -412,7 +413,7 @@ void UpdateCdf9(uint16_t* const cdf, const int symbol) {
}
// See UpdateCdf5 for implementation details.
-void UpdateCdf11(uint16_t* const cdf, const int symbol) {
+void UpdateCdf11(uint16_t* LIBGAV1_RESTRICT const cdf, const int symbol) {
__m128i cdf_vec = LoadUnaligned16(cdf + 2);
const uint16_t count = cdf[11];
cdf[11] = count + static_cast<uint16_t>(count < 32);
@@ -447,7 +448,7 @@ void UpdateCdf11(uint16_t* const cdf, const int symbol) {
}
// See UpdateCdf5 for implementation details.
-void UpdateCdf13(uint16_t* const cdf, const int symbol) {
+void UpdateCdf13(uint16_t* LIBGAV1_RESTRICT const cdf, const int symbol) {
__m128i cdf_vec0 = LoadLo8(cdf);
__m128i cdf_vec1 = LoadUnaligned16(cdf + 4);
const uint16_t count = cdf[13];
@@ -478,7 +479,7 @@ void UpdateCdf13(uint16_t* const cdf, const int symbol) {
cdf[13] = count + static_cast<uint16_t>(count < 32);
}
-void UpdateCdf16(uint16_t* const cdf, const int symbol) {
+void UpdateCdf16(uint16_t* LIBGAV1_RESTRICT const cdf, const int symbol) {
__m128i cdf_vec0 = LoadUnaligned16(cdf);
const uint16_t count = cdf[16];
const int rate = (count >> 4) + 5;
@@ -543,8 +544,8 @@ void UpdateCdf16(uint16_t* const cdf, const int symbol) {
#endif // LIBGAV1_ENTROPY_DECODER_ENABLE_SSE2
#endif // LIBGAV1_ENTROPY_DECODER_ENABLE_NEON
-inline DaalaBitReader::WindowSize HostToBigEndian(
- const DaalaBitReader::WindowSize x) {
+inline EntropyDecoder::WindowSize HostToBigEndian(
+ const EntropyDecoder::WindowSize x) {
static_assert(sizeof(x) == 4 || sizeof(x) == 8, "");
#if defined(__GNUC__)
#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
@@ -554,7 +555,7 @@ inline DaalaBitReader::WindowSize HostToBigEndian(
#endif
#elif defined(_WIN32)
// Note Windows targets are assumed to be little endian.
- return static_cast<DaalaBitReader::WindowSize>(
+ return static_cast<EntropyDecoder::WindowSize>(
(sizeof(x) == 8) ? _byteswap_uint64(static_cast<unsigned __int64>(x))
: _byteswap_ulong(static_cast<unsigned long>(x)));
#else
@@ -565,10 +566,10 @@ inline DaalaBitReader::WindowSize HostToBigEndian(
} // namespace
#if !LIBGAV1_CXX17
-constexpr int DaalaBitReader::kWindowSize; // static.
+constexpr int EntropyDecoder::kWindowSize; // static.
#endif
-DaalaBitReader::DaalaBitReader(const uint8_t* data, size_t size,
+EntropyDecoder::EntropyDecoder(const uint8_t* data, size_t size,
bool allow_update_cdf)
: data_(data),
data_end_(data + size),
@@ -607,7 +608,7 @@ DaalaBitReader::DaalaBitReader(const uint8_t* data, size_t size,
// * The probability is fixed at half. So some multiplications can be replaced
// with bit operations.
// * Symbol count is fixed at 2.
-int DaalaBitReader::ReadBit() {
+int EntropyDecoder::ReadBit() {
const uint32_t curr =
((values_in_range_ & kReadBitMask) >> 1) + kMinimumProbabilityPerSymbol;
const auto symbol_value = static_cast<uint16_t>(window_diff_ >> bits_);
@@ -623,7 +624,7 @@ int DaalaBitReader::ReadBit() {
return bit;
}
-int64_t DaalaBitReader::ReadLiteral(int num_bits) {
+int64_t EntropyDecoder::ReadLiteral(int num_bits) {
assert(num_bits <= 32);
assert(num_bits > 0);
uint32_t literal = 0;
@@ -643,7 +644,8 @@ int64_t DaalaBitReader::ReadLiteral(int num_bits) {
return literal;
}
-int DaalaBitReader::ReadSymbol(uint16_t* const cdf, int symbol_count) {
+int EntropyDecoder::ReadSymbol(uint16_t* LIBGAV1_RESTRICT const cdf,
+ int symbol_count) {
const int symbol = ReadSymbolImpl(cdf, symbol_count);
if (allow_update_cdf_) {
UpdateCdf(cdf, symbol_count, symbol);
@@ -651,7 +653,7 @@ int DaalaBitReader::ReadSymbol(uint16_t* const cdf, int symbol_count) {
return symbol;
}
-bool DaalaBitReader::ReadSymbol(uint16_t* cdf) {
+bool EntropyDecoder::ReadSymbol(uint16_t* LIBGAV1_RESTRICT cdf) {
assert(cdf[1] == 0);
const bool symbol = ReadSymbolImpl(cdf[0]) != 0;
if (allow_update_cdf_) {
@@ -681,12 +683,12 @@ bool DaalaBitReader::ReadSymbol(uint16_t* cdf) {
return symbol;
}
-bool DaalaBitReader::ReadSymbolWithoutCdfUpdate(uint16_t cdf) {
+bool EntropyDecoder::ReadSymbolWithoutCdfUpdate(uint16_t cdf) {
return ReadSymbolImpl(cdf) != 0;
}
template <int symbol_count>
-int DaalaBitReader::ReadSymbol(uint16_t* const cdf) {
+int EntropyDecoder::ReadSymbol(uint16_t* LIBGAV1_RESTRICT const cdf) {
static_assert(symbol_count >= 3 && symbol_count <= 16, "");
if (symbol_count == 3 || symbol_count == 4) {
return ReadSymbol3Or4(cdf, symbol_count);
@@ -721,7 +723,7 @@ int DaalaBitReader::ReadSymbol(uint16_t* const cdf) {
return symbol;
}
-int DaalaBitReader::ReadSymbolImpl(const uint16_t* const cdf,
+int EntropyDecoder::ReadSymbolImpl(const uint16_t* LIBGAV1_RESTRICT const cdf,
int symbol_count) {
assert(cdf[symbol_count - 1] == 0);
--symbol_count;
@@ -744,8 +746,8 @@ int DaalaBitReader::ReadSymbolImpl(const uint16_t* const cdf,
return symbol;
}
-int DaalaBitReader::ReadSymbolImplBinarySearch(const uint16_t* const cdf,
- int symbol_count) {
+int EntropyDecoder::ReadSymbolImplBinarySearch(
+ const uint16_t* LIBGAV1_RESTRICT const cdf, int symbol_count) {
assert(cdf[symbol_count - 1] == 0);
assert(symbol_count > 1 && symbol_count <= 16);
--symbol_count;
@@ -787,7 +789,7 @@ int DaalaBitReader::ReadSymbolImplBinarySearch(const uint16_t* const cdf,
return low;
}
-int DaalaBitReader::ReadSymbolImpl(uint16_t cdf) {
+int EntropyDecoder::ReadSymbolImpl(uint16_t cdf) {
const auto symbol_value = static_cast<uint16_t>(window_diff_ >> bits_);
const uint32_t curr =
(((values_in_range_ >> 8) * (cdf >> kCdfPrecision)) >> 1) +
@@ -805,7 +807,7 @@ int DaalaBitReader::ReadSymbolImpl(uint16_t cdf) {
// Equivalent to ReadSymbol(cdf, [3,4]), with the ReadSymbolImpl and UpdateCdf
// calls inlined.
-int DaalaBitReader::ReadSymbol3Or4(uint16_t* const cdf,
+int EntropyDecoder::ReadSymbol3Or4(uint16_t* LIBGAV1_RESTRICT const cdf,
const int symbol_count) {
assert(cdf[symbol_count - 1] == 0);
uint32_t curr = values_in_range_;
@@ -970,7 +972,8 @@ found:
return symbol;
}
-int DaalaBitReader::ReadSymbolImpl8(const uint16_t* const cdf) {
+int EntropyDecoder::ReadSymbolImpl8(
+ const uint16_t* LIBGAV1_RESTRICT const cdf) {
assert(cdf[7] == 0);
uint32_t curr = values_in_range_;
uint32_t prev;
@@ -1033,7 +1036,7 @@ found:
return symbol;
}
-void DaalaBitReader::PopulateBits() {
+void EntropyDecoder::PopulateBits() {
constexpr int kMaxCachedBits = kWindowSize - 16;
#if defined(__aarch64__)
// Fast path: read eight bytes and add the first six bytes to window_diff_.
@@ -1092,7 +1095,7 @@ void DaalaBitReader::PopulateBits() {
window_diff_ = window_diff;
}
-void DaalaBitReader::NormalizeRange() {
+void EntropyDecoder::NormalizeRange() {
const int bits_used = 15 ^ FloorLog2(values_in_range_);
bits_ -= bits_used;
values_in_range_ <<= bits_used;
@@ -1100,18 +1103,18 @@ void DaalaBitReader::NormalizeRange() {
}
// Explicit instantiations.
-template int DaalaBitReader::ReadSymbol<3>(uint16_t* cdf);
-template int DaalaBitReader::ReadSymbol<4>(uint16_t* cdf);
-template int DaalaBitReader::ReadSymbol<5>(uint16_t* cdf);
-template int DaalaBitReader::ReadSymbol<6>(uint16_t* cdf);
-template int DaalaBitReader::ReadSymbol<7>(uint16_t* cdf);
-template int DaalaBitReader::ReadSymbol<8>(uint16_t* cdf);
-template int DaalaBitReader::ReadSymbol<9>(uint16_t* cdf);
-template int DaalaBitReader::ReadSymbol<10>(uint16_t* cdf);
-template int DaalaBitReader::ReadSymbol<11>(uint16_t* cdf);
-template int DaalaBitReader::ReadSymbol<12>(uint16_t* cdf);
-template int DaalaBitReader::ReadSymbol<13>(uint16_t* cdf);
-template int DaalaBitReader::ReadSymbol<14>(uint16_t* cdf);
-template int DaalaBitReader::ReadSymbol<16>(uint16_t* cdf);
+template int EntropyDecoder::ReadSymbol<3>(uint16_t* cdf);
+template int EntropyDecoder::ReadSymbol<4>(uint16_t* cdf);
+template int EntropyDecoder::ReadSymbol<5>(uint16_t* cdf);
+template int EntropyDecoder::ReadSymbol<6>(uint16_t* cdf);
+template int EntropyDecoder::ReadSymbol<7>(uint16_t* cdf);
+template int EntropyDecoder::ReadSymbol<8>(uint16_t* cdf);
+template int EntropyDecoder::ReadSymbol<9>(uint16_t* cdf);
+template int EntropyDecoder::ReadSymbol<10>(uint16_t* cdf);
+template int EntropyDecoder::ReadSymbol<11>(uint16_t* cdf);
+template int EntropyDecoder::ReadSymbol<12>(uint16_t* cdf);
+template int EntropyDecoder::ReadSymbol<13>(uint16_t* cdf);
+template int EntropyDecoder::ReadSymbol<14>(uint16_t* cdf);
+template int EntropyDecoder::ReadSymbol<16>(uint16_t* cdf);
} // namespace libgav1
diff --git a/src/utils/entropy_decoder.h b/src/utils/entropy_decoder.h
index c066b98..8eeaef4 100644
--- a/src/utils/entropy_decoder.h
+++ b/src/utils/entropy_decoder.h
@@ -25,20 +25,20 @@
namespace libgav1 {
-class DaalaBitReader : public BitReader {
+class EntropyDecoder final : public BitReader {
public:
// WindowSize must be an unsigned integer type with at least 32 bits. Use the
// largest type with fast arithmetic. size_t should meet these requirements.
using WindowSize = size_t;
- DaalaBitReader(const uint8_t* data, size_t size, bool allow_update_cdf);
- ~DaalaBitReader() override = default;
+ EntropyDecoder(const uint8_t* data, size_t size, bool allow_update_cdf);
+ ~EntropyDecoder() override = default;
// Move only.
- DaalaBitReader(DaalaBitReader&& rhs) noexcept;
- DaalaBitReader& operator=(DaalaBitReader&& rhs) noexcept;
+ EntropyDecoder(EntropyDecoder&& rhs) noexcept;
+ EntropyDecoder& operator=(EntropyDecoder&& rhs) noexcept;
- int ReadBit() final;
+ int ReadBit() override;
int64_t ReadLiteral(int num_bits) override;
// ReadSymbol() calls for which the |symbol_count| is only known at runtime
// will use this variant.
@@ -104,19 +104,19 @@ class DaalaBitReader : public BitReader {
WindowSize window_diff_;
};
-extern template int DaalaBitReader::ReadSymbol<3>(uint16_t* cdf);
-extern template int DaalaBitReader::ReadSymbol<4>(uint16_t* cdf);
-extern template int DaalaBitReader::ReadSymbol<5>(uint16_t* cdf);
-extern template int DaalaBitReader::ReadSymbol<6>(uint16_t* cdf);
-extern template int DaalaBitReader::ReadSymbol<7>(uint16_t* cdf);
-extern template int DaalaBitReader::ReadSymbol<8>(uint16_t* cdf);
-extern template int DaalaBitReader::ReadSymbol<9>(uint16_t* cdf);
-extern template int DaalaBitReader::ReadSymbol<10>(uint16_t* cdf);
-extern template int DaalaBitReader::ReadSymbol<11>(uint16_t* cdf);
-extern template int DaalaBitReader::ReadSymbol<12>(uint16_t* cdf);
-extern template int DaalaBitReader::ReadSymbol<13>(uint16_t* cdf);
-extern template int DaalaBitReader::ReadSymbol<14>(uint16_t* cdf);
-extern template int DaalaBitReader::ReadSymbol<16>(uint16_t* cdf);
+extern template int EntropyDecoder::ReadSymbol<3>(uint16_t* cdf);
+extern template int EntropyDecoder::ReadSymbol<4>(uint16_t* cdf);
+extern template int EntropyDecoder::ReadSymbol<5>(uint16_t* cdf);
+extern template int EntropyDecoder::ReadSymbol<6>(uint16_t* cdf);
+extern template int EntropyDecoder::ReadSymbol<7>(uint16_t* cdf);
+extern template int EntropyDecoder::ReadSymbol<8>(uint16_t* cdf);
+extern template int EntropyDecoder::ReadSymbol<9>(uint16_t* cdf);
+extern template int EntropyDecoder::ReadSymbol<10>(uint16_t* cdf);
+extern template int EntropyDecoder::ReadSymbol<11>(uint16_t* cdf);
+extern template int EntropyDecoder::ReadSymbol<12>(uint16_t* cdf);
+extern template int EntropyDecoder::ReadSymbol<13>(uint16_t* cdf);
+extern template int EntropyDecoder::ReadSymbol<14>(uint16_t* cdf);
+extern template int EntropyDecoder::ReadSymbol<16>(uint16_t* cdf);
} // namespace libgav1
diff --git a/src/utils/entropy_decoder_test.cc b/src/utils/entropy_decoder_test.cc
new file mode 100644
index 0000000..9d23088
--- /dev/null
+++ b/src/utils/entropy_decoder_test.cc
@@ -0,0 +1,1259 @@
+// Copyright 2021 The libgav1 Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "src/utils/entropy_decoder.h"
+
+#include <cstdint>
+#include <cstdio>
+
+#include "absl/time/clock.h"
+#include "absl/time/time.h"
+#include "gtest/gtest.h"
+
+namespace libgav1 {
+namespace {
+
+#include "src/utils/entropy_decoder_test_data.inc"
+
+class EntropyDecoderTest : public testing::Test {
+ protected:
+ // If compile_time is true, tests
+ // bool EntropyDecoder::ReadSymbol(uint16_t* cdf).
+ // Otherwise, tests
+ // int EntropyDecoder::ReadSymbol(uint16_t* cdf, int symbol_count)
+ // with symbol_count=2.
+ template <bool compile_time>
+ void TestReadSymbolBoolean(int num_runs);
+
+ // For N = 3..16 (except 15):
+ // template <bool compile_time>
+ // void TestReadSymbolN(int num_runs);
+ //
+ // If compile_time is true, tests
+ // int EntropyDecoder::ReadSymbol<N>(uint16_t* const cdf).
+ // Otherwise, tests
+ // int EntropyDecoder::ReadSymbol(uint16_t* cdf, int symbol_count)
+ // with symbol_count=N.
+ //
+ // NOTE: symbol_count=15 is not tested because AV1 does not use it.
+ template <bool compile_time>
+ void TestReadSymbol3(int num_runs);
+
+ template <bool compile_time>
+ void TestReadSymbol4(int num_runs);
+
+ template <bool compile_time>
+ void TestReadSymbol5(int num_runs);
+
+ template <bool compile_time>
+ void TestReadSymbol6(int num_runs);
+
+ template <bool compile_time>
+ void TestReadSymbol7(int num_runs);
+
+ template <bool compile_time>
+ void TestReadSymbol8(int num_runs);
+
+ template <bool compile_time>
+ void TestReadSymbol9(int num_runs);
+
+ template <bool compile_time>
+ void TestReadSymbol10(int num_runs);
+
+ template <bool compile_time>
+ void TestReadSymbol11(int num_runs);
+
+ template <bool compile_time>
+ void TestReadSymbol12(int num_runs);
+
+ template <bool compile_time>
+ void TestReadSymbol13(int num_runs);
+
+ template <bool compile_time>
+ void TestReadSymbol14(int num_runs);
+
+ template <bool compile_time>
+ void TestReadSymbol16(int num_runs);
+};
+
+template <bool compile_time>
+void EntropyDecoderTest::TestReadSymbolBoolean(int num_runs) {
+ static constexpr int kSymbols[4][4] = {{0, 0, 1, 1}, //
+ {0, 1, 1, 0}, //
+ {1, 0, 1, 0}, //
+ {1, 0, 0, 1}};
+ absl::Duration elapsed_time;
+ bool symbols[1024 * 4 * 4];
+ for (int run = 0; run < num_runs; ++run) {
+ EntropyDecoder reader(kBytesTestReadSymbolBoolean,
+ kNumBytesTestReadSymbolBoolean,
+ /*allow_update_cdf=*/true);
+ uint16_t cdf[4][3] = {
+ {16384, 0, 0},
+ {32768 - 8386, 0, 0},
+ {32768 - 24312, 0, 0},
+ {16384, 0, 0},
+ };
+ const absl::Time start = absl::Now();
+ int index = 0;
+ for (int i = 0; i < 1024; ++i) {
+ for (int j = 0; j < 4; ++j) {
+ for (int k = 0; k < 4; ++k) { // NOLINT(modernize-loop-convert)
+ if (compile_time) {
+ symbols[index++] = reader.ReadSymbol(cdf[k]);
+ } else {
+ symbols[index++] = reader.ReadSymbol(cdf[k], 2) != 0;
+ }
+ }
+ }
+ }
+ elapsed_time += absl::Now() - start;
+ }
+ if (compile_time) {
+ printf("TestReadSymbolBooleanCompileTime(%d): %5d us\n", num_runs,
+ static_cast<int>(absl::ToInt64Microseconds(elapsed_time)));
+ } else {
+ printf("TestReadSymbolBoolean(%d): %5d us\n", num_runs,
+ static_cast<int>(absl::ToInt64Microseconds(elapsed_time)));
+ }
+
+ int index = 0;
+ for (int i = 0; i < 1024; ++i) {
+ for (int j = 0; j < 4; ++j) { // NOLINT(modernize-loop-convert)
+ for (int k = 0; k < 4; ++k) {
+ ASSERT_EQ(symbols[index++], kSymbols[j][k]);
+ }
+ }
+ }
+}
+
+template <bool compile_time>
+void EntropyDecoderTest::TestReadSymbol3(int num_runs) {
+ static constexpr int kSymbols[6][4] = {{0, 2, 1, 2}, //
+ {1, 1, 2, 1}, //
+ {2, 0, 0, 0}, //
+ {0, 2, 0, 2}, //
+ {1, 2, 1, 0}, //
+ {2, 1, 1, 0}};
+ absl::Duration elapsed_time;
+ int symbols[1024 * 6 * 4];
+ for (int run = 0; run < num_runs; ++run) {
+ EntropyDecoder reader(kBytesTestReadSymbol3, kNumBytesTestReadSymbol3,
+ /*allow_update_cdf=*/true);
+ uint16_t cdf[4][4] = {
+ // pdf: 1/3, 1/3, 1/3
+ {32768 - 10923, 32768 - 21845, 0, 0},
+ // pdf: 1/6, 2/6, 3/6
+ {32768 - 5461, 32768 - 16384, 0, 0},
+ // pdf: 2/6, 3/6, 1/6
+ {32768 - 10923, 32768 - 27307, 0, 0},
+ // pdf: 3/6, 1/6, 2/6
+ {32768 - 16384, 32768 - 21845, 0, 0},
+ };
+ const absl::Time start = absl::Now();
+ int index = 0;
+ for (int i = 0; i < 1024; ++i) {
+ for (int j = 0; j < 6; ++j) {
+ for (int k = 0; k < 4; ++k) { // NOLINT(modernize-loop-convert)
+ if (compile_time) {
+ symbols[index++] = reader.ReadSymbol<3>(cdf[k]);
+ } else {
+ symbols[index++] = reader.ReadSymbol(cdf[k], 3);
+ }
+ }
+ }
+ }
+ elapsed_time += absl::Now() - start;
+ }
+ if (compile_time) {
+ printf("TestReadSymbol3CompileTime(%d): %5d us\n", num_runs,
+ static_cast<int>(absl::ToInt64Microseconds(elapsed_time)));
+ } else {
+ printf("TestReadSymbol3(%d): %5d us\n", num_runs,
+ static_cast<int>(absl::ToInt64Microseconds(elapsed_time)));
+ }
+
+ int index = 0;
+ for (int i = 0; i < 1024; ++i) {
+ for (int j = 0; j < 6; ++j) { // NOLINT(modernize-loop-convert)
+ for (int k = 0; k < 4; ++k) {
+ ASSERT_EQ(symbols[index++], kSymbols[j][k]);
+ }
+ }
+ }
+}
+
+template <bool compile_time>
+void EntropyDecoderTest::TestReadSymbol4(int num_runs) {
+ static constexpr int kSymbols[8][4] = {{0, 0, 3, 3}, //
+ {0, 0, 2, 2}, //
+ {1, 1, 0, 0}, //
+ {1, 2, 1, 1}, //
+ {2, 2, 3, 2}, //
+ {2, 3, 2, 1}, //
+ {3, 3, 0, 0}, //
+ {3, 3, 1, 1}};
+ absl::Duration elapsed_time;
+ int symbols[1024 * 8 * 4];
+ for (int run = 0; run < num_runs; ++run) {
+ EntropyDecoder reader(kBytesTestReadSymbol4, kNumBytesTestReadSymbol4,
+ /*allow_update_cdf=*/true);
+ uint16_t cdf[4][5] = {
+ // pdf: 1/4, 1/4, 1/4, 1/4
+ {32768 - 8192, 32768 - 16384, 32768 - 24576, 0, 0},
+ // pdf: 2/8, 1/8, 2/8, 3/8
+ {32768 - 8192, 32768 - 12288, 32768 - 20480, 0, 0},
+ // pdf: 1/4, 1/4, 1/4, 1/4
+ {32768 - 8192, 32768 - 16384, 32768 - 24576, 0, 0},
+ // pdf: 2/8, 3/8, 2/8, 1/8
+ {32768 - 8192, 32768 - 20480, 32768 - 28672, 0, 0},
+ };
+ const absl::Time start = absl::Now();
+ int index = 0;
+ for (int i = 0; i < 1024; ++i) {
+ for (int j = 0; j < 8; ++j) {
+ for (int k = 0; k < 4; ++k) { // NOLINT(modernize-loop-convert)
+ if (compile_time) {
+ symbols[index++] = reader.ReadSymbol<4>(cdf[k]);
+ } else {
+ symbols[index++] = reader.ReadSymbol(cdf[k], 4);
+ }
+ }
+ }
+ }
+ elapsed_time += absl::Now() - start;
+ }
+ if (compile_time) {
+ printf("TestReadSymbol4CompileTime(%d): %5d us\n", num_runs,
+ static_cast<int>(absl::ToInt64Microseconds(elapsed_time)));
+ } else {
+ printf("TestReadSymbol4(%d): %5d us\n", num_runs,
+ static_cast<int>(absl::ToInt64Microseconds(elapsed_time)));
+ }
+
+ int index = 0;
+ for (int i = 0; i < 1024; ++i) {
+ for (int j = 0; j < 8; ++j) { // NOLINT(modernize-loop-convert)
+ for (int k = 0; k < 4; ++k) {
+ ASSERT_EQ(symbols[index++], kSymbols[j][k]);
+ }
+ }
+ }
+}
+
+template <bool compile_time>
+void EntropyDecoderTest::TestReadSymbol5(int num_runs) {
+ static constexpr int kSymbols[10][4] = {{0, 0, 4, 4}, //
+ {0, 1, 3, 3}, //
+ {1, 2, 2, 2}, //
+ {1, 3, 1, 1}, //
+ {2, 4, 0, 0}, //
+ {2, 0, 4, 3}, //
+ {3, 1, 3, 2}, //
+ {3, 2, 2, 1}, //
+ {4, 3, 1, 2}, //
+ {4, 0, 4, 2}};
+ absl::Duration elapsed_time;
+ int symbols[320 * 10 * 4];
+ for (int run = 0; run < num_runs; ++run) {
+ EntropyDecoder reader(kBytesTestReadSymbol5, kNumBytesTestReadSymbol5,
+ /*allow_update_cdf=*/true);
+ uint16_t cdf[4][6] = {
+ // pdf: 1/5, 1/5, 1/5, 1/5, 1/5
+ {32768 - 6554, 32768 - 13107, 32768 - 19661, 32768 - 26214, 0, 0},
+ // pdf: 3/10, 2/10, 2/10, 2/10, 1/10
+ {32768 - 9830, 32768 - 16384, 32768 - 22938, 32768 - 29491, 0, 0},
+ // pdf: 1/10, 2/10, 2/10, 2/10, 3/10
+ {32768 - 3277, 32768 - 9830, 32768 - 16384, 32768 - 22938, 0, 0},
+ // pdf: 1/10, 2/10, 4/10, 2/10, 1/10
+ {32768 - 3277, 32768 - 9830, 32768 - 22938, 32768 - 29491, 0, 0},
+ };
+ const absl::Time start = absl::Now();
+ int index = 0;
+ for (int i = 0; i < 320; ++i) {
+ for (int j = 0; j < 10; ++j) {
+ for (int k = 0; k < 4; ++k) { // NOLINT(modernize-loop-convert)
+ if (compile_time) {
+ symbols[index++] = reader.ReadSymbol<5>(cdf[k]);
+ } else {
+ symbols[index++] = reader.ReadSymbol(cdf[k], 5);
+ }
+ }
+ }
+ }
+ elapsed_time += absl::Now() - start;
+ }
+ if (compile_time) {
+ printf("TestReadSymbol5CompileTime(%d): %5d us\n", num_runs,
+ static_cast<int>(absl::ToInt64Microseconds(elapsed_time)));
+ } else {
+ printf("TestReadSymbol5(%d): %5d us\n", num_runs,
+ static_cast<int>(absl::ToInt64Microseconds(elapsed_time)));
+ }
+
+ int index = 0;
+ for (int i = 0; i < 320; ++i) {
+ for (int j = 0; j < 10; ++j) { // NOLINT(modernize-loop-convert)
+ for (int k = 0; k < 4; ++k) {
+ ASSERT_EQ(symbols[index++], kSymbols[j][k]);
+ }
+ }
+ }
+}
+
+template <bool compile_time>
+void EntropyDecoderTest::TestReadSymbol6(int num_runs) {
+ static constexpr int kSymbols[12][4] = {{0, 0, 5, 5}, //
+ {0, 1, 4, 4}, //
+ {1, 2, 3, 3}, //
+ {1, 3, 2, 2}, //
+ {2, 4, 1, 1}, //
+ {2, 5, 0, 0}, //
+ {3, 0, 5, 4}, //
+ {3, 1, 4, 3}, //
+ {4, 2, 3, 2}, //
+ {4, 3, 2, 1}, //
+ {5, 4, 1, 3}, //
+ {5, 0, 5, 2}};
+ absl::Duration elapsed_time;
+ int symbols[256 * 12 * 4];
+ for (int run = 0; run < num_runs; ++run) {
+ EntropyDecoder reader(kBytesTestReadSymbol6, kNumBytesTestReadSymbol6,
+ /*allow_update_cdf=*/true);
+ uint16_t cdf[4][7] = {
+ // pmf: 1/6, 1/6, 1/6, 1/6, 1/6, 1/6
+ {32768 - 5461, 32768 - 10923, 32768 - 16384, 32768 - 21845,
+ 32768 - 27307, 0, 0},
+ // pmf: 3/12, 2/12, 2/12, 2/12, 2/12, 1/12
+ {32768 - 8192, 32768 - 13653, 32768 - 19115, 32768 - 24576,
+ 32768 - 30037, 0, 0},
+ // pmf: 1/12, 2/12, 2/12, 2/12, 2/12, 3/12
+ {32768 - 2731, 32768 - 8192, 32768 - 13653, 32768 - 19115,
+ 32768 - 24576, 0, 0},
+ // pmf: 1/12, 2/12, 3/12, 3/12, 2/12, 1/12
+ {32768 - 2731, 32768 - 8192, 32768 - 16384, 32768 - 24576,
+ 32768 - 30037, 0, 0},
+ };
+ const absl::Time start = absl::Now();
+ int index = 0;
+ for (int i = 0; i < 256; ++i) {
+ for (int j = 0; j < 12; ++j) {
+ for (int k = 0; k < 4; ++k) { // NOLINT(modernize-loop-convert)
+ if (compile_time) {
+ symbols[index++] = reader.ReadSymbol<6>(cdf[k]);
+ } else {
+ symbols[index++] = reader.ReadSymbol(cdf[k], 6);
+ }
+ }
+ }
+ }
+ elapsed_time += absl::Now() - start;
+ }
+ if (compile_time) {
+ printf("TestReadSymbol6CompileTime(%d): %5d us\n", num_runs,
+ static_cast<int>(absl::ToInt64Microseconds(elapsed_time)));
+ } else {
+ printf("TestReadSymbol6(%d): %5d us\n", num_runs,
+ static_cast<int>(absl::ToInt64Microseconds(elapsed_time)));
+ }
+
+ int index = 0;
+ for (int i = 0; i < 256; ++i) {
+ for (int j = 0; j < 12; ++j) { // NOLINT(modernize-loop-convert)
+ for (int k = 0; k < 4; ++k) {
+ ASSERT_EQ(symbols[index++], kSymbols[j][k]);
+ }
+ }
+ }
+}
+
+template <bool compile_time>
+void EntropyDecoderTest::TestReadSymbol7(int num_runs) {
+ static constexpr int kSymbols[14][4] = {{0, 4, 6, 3}, //
+ {1, 5, 5, 2}, //
+ {2, 6, 4, 1}, //
+ {3, 0, 3, 0}, //
+ {4, 1, 2, 6}, //
+ {5, 2, 1, 5}, //
+ {6, 3, 0, 4}, //
+ {0, 0, 6, 5}, //
+ {2, 1, 4, 3}, //
+ {4, 3, 6, 1}, //
+ {6, 5, 2, 4}, //
+ {1, 0, 5, 2}, //
+ {3, 2, 3, 2}, //
+ {5, 4, 5, 3}};
+ absl::Duration elapsed_time;
+ int symbols[1024 * 14 * 4];
+ for (int run = 0; run < num_runs; ++run) {
+ EntropyDecoder reader(kBytesTestReadSymbol7, kNumBytesTestReadSymbol7,
+ /*allow_update_cdf=*/true);
+ uint16_t cdf[4][8] = {
+ // pdf: 1/7, 1/7, 1/7, 1/7, 1/7, 1/7, 1/7
+ {32768 - 4681, 32768 - 9362, 32768 - 14043, 32768 - 18725,
+ 32768 - 23406, 32768 - 28087, 0, 0},
+ // pdf: 3/14, 2/14, 2/14, 2/14, 2/14, 2/14, 1/14
+ {32768 - 7022, 32768 - 11703, 32768 - 16384, 32768 - 21065,
+ 32768 - 25746, 32768 - 30427, 0, 0},
+ // pdf: 1/14, 1/14, 2/14, 2/14, 2/14, 3/14, 3/14
+ {32768 - 2341, 32768 - 4681, 32768 - 9362, 32768 - 14043, 32768 - 18725,
+ 32768 - 25746, 0, 0},
+ // pdf: 1/14, 2/14, 3/14, 3/14, 2/14, 2/14, 1/14
+ {32768 - 2341, 32768 - 7022, 32768 - 14043, 32768 - 21065,
+ 32768 - 25746, 32768 - 30427, 0, 0},
+ };
+ const absl::Time start = absl::Now();
+ int index = 0;
+ for (int i = 0; i < 1024; ++i) {
+ for (int j = 0; j < 14; ++j) {
+ for (int k = 0; k < 4; ++k) { // NOLINT(modernize-loop-convert)
+ if (compile_time) {
+ symbols[index++] = reader.ReadSymbol<7>(cdf[k]);
+ } else {
+ symbols[index++] = reader.ReadSymbol(cdf[k], 7);
+ }
+ }
+ }
+ }
+ elapsed_time += absl::Now() - start;
+ }
+ if (compile_time) {
+ printf("TestReadSymbol7CompileTime(%d): %5d us\n", num_runs,
+ static_cast<int>(absl::ToInt64Microseconds(elapsed_time)));
+ } else {
+ printf("TestReadSymbol7(%d): %5d us\n", num_runs,
+ static_cast<int>(absl::ToInt64Microseconds(elapsed_time)));
+ }
+
+ int index = 0;
+ for (int i = 0; i < 1024; ++i) {
+ for (int j = 0; j < 14; ++j) { // NOLINT(modernize-loop-convert)
+ for (int k = 0; k < 4; ++k) {
+ ASSERT_EQ(symbols[index++], kSymbols[j][k]);
+ }
+ }
+ }
+}
+
+template <bool compile_time>
+void EntropyDecoderTest::TestReadSymbol8(int num_runs) {
+ static constexpr int kSymbols[16][4] = {{0, 4, 7, 3}, //
+ {1, 5, 6, 2}, //
+ {2, 6, 5, 1}, //
+ {3, 7, 4, 0}, //
+ {4, 0, 3, 7}, //
+ {5, 1, 2, 6}, //
+ {6, 2, 1, 5}, //
+ {7, 3, 0, 4}, //
+ {0, 0, 6, 5}, //
+ {2, 1, 4, 3}, //
+ {4, 3, 6, 4}, //
+ {6, 5, 2, 2}, //
+ {1, 0, 7, 3}, //
+ {3, 2, 5, 5}, //
+ {5, 4, 7, 2}, //
+ {7, 6, 3, 4}};
+ absl::Duration elapsed_time;
+ int symbols[1024 * 16 * 4];
+ for (int run = 0; run < num_runs; ++run) {
+ EntropyDecoder reader(kBytesTestReadSymbol8, kNumBytesTestReadSymbol8,
+ /*allow_update_cdf=*/true);
+ uint16_t cdf[4][9] = {
+ // pdf: 1/8, 1/8, 1/8, 1/8, 1/8, 1/8, 1/8, 1/8
+ {32768 - 4096, 32768 - 8192, 32768 - 12288, 32768 - 16384,
+ 32768 - 20480, 32768 - 24576, 32768 - 28672, 0, 0},
+ // pdf: 3/16, 2/16, 2/16, 2/16, 2/16, 2/16, 2/16, 1/16
+ {32768 - 6144, 32768 - 10240, 32768 - 14336, 32768 - 18432,
+ 32768 - 22528, 32768 - 26624, 32768 - 30720, 0, 0},
+ // pdf: 1/16, 1/16, 2/16, 2/16, 2/16, 2/16, 3/16, 3/16
+ {32768 - 2048, 32768 - 4096, 32768 - 8192, 32768 - 12288, 32768 - 16384,
+ 32768 - 20480, 32768 - 26624, 0, 0},
+ // pdf: 1/16, 1/16, 3/16, 3/16, 3/16, 3/16, 1/16, 1/16
+ {32768 - 2048, 32768 - 4096, 32768 - 10240, 32768 - 16384,
+ 32768 - 22528, 32768 - 28672, 32768 - 30720, 0, 0},
+ };
+ const absl::Time start = absl::Now();
+ int index = 0;
+ for (int i = 0; i < 1024; ++i) {
+ for (int j = 0; j < 16; ++j) {
+ for (int k = 0; k < 4; ++k) { // NOLINT(modernize-loop-convert)
+ if (compile_time) {
+ symbols[index++] = reader.ReadSymbol<8>(cdf[k]);
+ } else {
+ symbols[index++] = reader.ReadSymbol(cdf[k], 8);
+ }
+ }
+ }
+ }
+ elapsed_time += absl::Now() - start;
+ }
+ if (compile_time) {
+ printf("TestReadSymbol8CompileTime(%d): %5d us\n", num_runs,
+ static_cast<int>(absl::ToInt64Microseconds(elapsed_time)));
+ } else {
+ printf("TestReadSymbol8(%d): %5d us\n", num_runs,
+ static_cast<int>(absl::ToInt64Microseconds(elapsed_time)));
+ }
+
+ int index = 0;
+ for (int i = 0; i < 1024; ++i) {
+ for (int j = 0; j < 16; ++j) { // NOLINT(modernize-loop-convert)
+ for (int k = 0; k < 4; ++k) {
+ ASSERT_EQ(symbols[index++], kSymbols[j][k]);
+ }
+ }
+ }
+}
+
+template <bool compile_time>
+void EntropyDecoderTest::TestReadSymbol9(int num_runs) {
+ static constexpr int kSymbols[18][4] = {{0, 4, 8, 3}, //
+ {1, 5, 7, 2}, //
+ {2, 6, 6, 1}, //
+ {3, 7, 5, 0}, //
+ {4, 8, 4, 8}, //
+ {5, 0, 3, 7}, //
+ {6, 1, 2, 6}, //
+ {7, 2, 1, 5}, //
+ {8, 3, 0, 4}, //
+ {0, 0, 8, 7}, //
+ {2, 1, 6, 5}, //
+ {4, 3, 4, 3}, //
+ {6, 5, 2, 1}, //
+ {8, 7, 7, 6}, //
+ {1, 0, 5, 4}, //
+ {3, 2, 3, 2}, //
+ {5, 4, 1, 4}, //
+ {7, 6, 8, 4}};
+ absl::Duration elapsed_time;
+ int symbols[128 * 18 * 4];
+ for (int run = 0; run < num_runs; ++run) {
+ EntropyDecoder reader(kBytesTestReadSymbol9, kNumBytesTestReadSymbol9,
+ /*allow_update_cdf=*/true);
+ uint16_t cdf[4][10] = {
+ // pmf: 1/9, 1/9, 1/9, 1/9, 1/9, 1/9, 1/9, 1/9, 1/9
+ {32768 - 3641, 32768 - 7282, 32768 - 10923, 32768 - 14564,
+ 32768 - 18204, 32768 - 21845, 32768 - 25486, 32768 - 29127, 0, 0},
+ // pmf: 3/18, 2/18, 2/18, 2/18, 2/18, 2/18, 2/18, 2/18, 1/18
+ {32768 - 5461, 32768 - 9102, 32768 - 12743, 32768 - 16384,
+ 32768 - 20025, 32768 - 23666, 32768 - 27307, 32768 - 30948, 0, 0},
+ // pmf: 1/18, 2/18, 2/18, 2/18, 2/18, 2/18, 2/18, 2/18, 3/18
+ {32768 - 1820, 32768 - 5461, 32768 - 9102, 32768 - 12743, 32768 - 16384,
+ 32768 - 20025, 32768 - 23666, 32768 - 27307, 0, 0},
+ // pmf: 1/18, 2/18, 2/18, 2/18, 4/18, 2/18, 2/18, 2/18, 1/18
+ {32768 - 1820, 32768 - 5461, 32768 - 9102, 32768 - 12743, 32768 - 20025,
+ 32768 - 23666, 32768 - 27307, 32768 - 30948, 0, 0},
+ };
+ const absl::Time start = absl::Now();
+ int index = 0;
+ for (int i = 0; i < 128; ++i) {
+ for (int j = 0; j < 18; ++j) {
+ for (int k = 0; k < 4; ++k) { // NOLINT(modernize-loop-convert)
+ if (compile_time) {
+ symbols[index++] = reader.ReadSymbol<9>(cdf[k]);
+ } else {
+ symbols[index++] = reader.ReadSymbol(cdf[k], 9);
+ }
+ }
+ }
+ }
+ elapsed_time += absl::Now() - start;
+ }
+ if (compile_time) {
+ printf("TestReadSymbol9CompileTime(%d): %5d us\n", num_runs,
+ static_cast<int>(absl::ToInt64Microseconds(elapsed_time)));
+ } else {
+ printf("TestReadSymbol9(%d): %5d us\n", num_runs,
+ static_cast<int>(absl::ToInt64Microseconds(elapsed_time)));
+ }
+
+ int index = 0;
+ for (int i = 0; i < 128; ++i) {
+ for (int j = 0; j < 18; ++j) { // NOLINT(modernize-loop-convert)
+ for (int k = 0; k < 4; ++k) {
+ ASSERT_EQ(symbols[index++], kSymbols[j][k]);
+ }
+ }
+ }
+}
+
+template <bool compile_time>
+void EntropyDecoderTest::TestReadSymbol10(int num_runs) {
+ static constexpr int kSymbols[20][4] = {{0, 5, 9, 4}, //
+ {1, 6, 8, 3}, //
+ {2, 7, 7, 2}, //
+ {3, 8, 6, 1}, //
+ {4, 9, 5, 0}, //
+ {5, 0, 4, 9}, //
+ {6, 1, 3, 8}, //
+ {7, 2, 2, 7}, //
+ {8, 3, 1, 6}, //
+ {9, 4, 0, 5}, //
+ {0, 0, 9, 7}, //
+ {2, 1, 8, 5}, //
+ {4, 3, 6, 3}, //
+ {6, 5, 4, 1}, //
+ {8, 7, 2, 8}, //
+ {1, 0, 9, 6}, //
+ {3, 2, 7, 4}, //
+ {5, 4, 5, 2}, //
+ {7, 6, 3, 5}, //
+ {9, 8, 1, 4}};
+ absl::Duration elapsed_time;
+ int symbols[96 * 20 * 4];
+ for (int run = 0; run < num_runs; ++run) {
+ EntropyDecoder reader(kBytesTestReadSymbol10, kNumBytesTestReadSymbol10,
+ /*allow_update_cdf=*/true);
+ uint16_t cdf[4][11] = {
+ // pmf: 1/10, 1/10, 1/10, 1/10, 1/10, 1/10, 1/10, 1/10, 1/10, 1/10
+ {32768 - 3277, 32768 - 6554, 32768 - 9830, 32768 - 13107, 32768 - 16384,
+ 32768 - 19661, 32768 - 22938, 32768 - 26214, 32768 - 29491, 0, 0},
+ // pmf: 3/20, 2/20, 2/20, 2/20, 2/20, 2/20, 2/20, 2/20, 2/20, 1/20
+ {32768 - 4915, 32768 - 8192, 32768 - 11469, 32768 - 14746,
+ 32768 - 18022, 32768 - 21299, 32768 - 24576, 32768 - 27853,
+ 32768 - 31130, 0, 0},
+ // pmf: 1/20, 2/20, 2/20, 2/20, 2/20, 2/20, 2/20, 2/20, 2/20, 3/20
+ {32768 - 1638, 32768 - 4915, 32768 - 8192, 32768 - 11469, 32768 - 14746,
+ 32768 - 18022, 32768 - 21299, 32768 - 24576, 32768 - 27853, 0, 0},
+ // pmf: 1/20, 2/20, 2/20, 2/20, 3/20, 3/20, 2/20, 2/20, 2/20, 1/20
+ {32768 - 1638, 32768 - 4915, 32768 - 8192, 32768 - 11469, 32768 - 16384,
+ 32768 - 21299, 32768 - 24576, 32768 - 27853, 32768 - 31130, 0, 0},
+ };
+ const absl::Time start = absl::Now();
+ int index = 0;
+ for (int i = 0; i < 96; ++i) {
+ for (int j = 0; j < 20; ++j) {
+ for (int k = 0; k < 4; ++k) { // NOLINT(modernize-loop-convert)
+ if (compile_time) {
+ symbols[index++] = reader.ReadSymbol<10>(cdf[k]);
+ } else {
+ symbols[index++] = reader.ReadSymbol(cdf[k], 10);
+ }
+ }
+ }
+ }
+ elapsed_time += absl::Now() - start;
+ }
+ if (compile_time) {
+ printf("TestReadSymbol10CompileTime(%d): %5d us\n", num_runs,
+ static_cast<int>(absl::ToInt64Microseconds(elapsed_time)));
+ } else {
+ printf("TestReadSymbol10(%d): %5d us\n", num_runs,
+ static_cast<int>(absl::ToInt64Microseconds(elapsed_time)));
+ }
+
+ int index = 0;
+ for (int i = 0; i < 96; ++i) {
+ for (int j = 0; j < 20; ++j) { // NOLINT(modernize-loop-convert)
+ for (int k = 0; k < 4; ++k) {
+ ASSERT_EQ(symbols[index++], kSymbols[j][k]);
+ }
+ }
+ }
+}
+
+template <bool compile_time>
+void EntropyDecoderTest::TestReadSymbol11(int num_runs) {
+ static constexpr int kSymbols[22][4] = {{0, 6, 10, 5}, //
+ {1, 7, 9, 4}, //
+ {2, 8, 8, 3}, //
+ {3, 9, 7, 2}, //
+ {4, 10, 6, 1}, //
+ {5, 0, 5, 0}, //
+ {6, 1, 4, 10}, //
+ {7, 2, 3, 9}, //
+ {8, 3, 2, 8}, //
+ {9, 4, 1, 7}, //
+ {10, 5, 0, 6}, //
+ {0, 0, 10, 9}, //
+ {2, 1, 8, 7}, //
+ {4, 3, 6, 5}, //
+ {6, 5, 4, 3}, //
+ {8, 7, 2, 1}, //
+ {10, 9, 10, 8}, //
+ {1, 0, 9, 6}, //
+ {3, 2, 7, 4}, //
+ {5, 4, 5, 2}, //
+ {7, 6, 3, 5}, //
+ {9, 8, 1, 5}};
+ absl::Duration elapsed_time;
+ int symbols[96 * 22 * 4];
+ for (int run = 0; run < num_runs; ++run) {
+ EntropyDecoder reader(kBytesTestReadSymbol11, kNumBytesTestReadSymbol11,
+ /*allow_update_cdf=*/true);
+ uint16_t cdf[4][12] = {
+ // pmf: 1/11, 1/11, 1/11, 1/11, 1/11, 1/11, 1/11, 1/11, 1/11, 1/11, 1/11
+ {32768 - 2979, 32768 - 5958, 32768 - 8937, 32768 - 11916, 32768 - 14895,
+ 32768 - 17873, 32768 - 20852, 32768 - 23831, 32768 - 26810,
+ 32768 - 29789, 0, 0},
+ // pmf: 3/22, 2/22, 2/22, 2/22, 2/22, 2/22, 2/22, 2/22, 2/22, 2/22, 1/22
+ {32768 - 4468, 32768 - 7447, 32768 - 10426, 32768 - 13405,
+ 32768 - 16384, 32768 - 19363, 32768 - 22342, 32768 - 25321,
+ 32768 - 28300, 32768 - 31279, 0, 0},
+ // pmf: 1/22, 2/22, 2/22, 2/22, 2/22, 2/22, 2/22, 2/22, 2/22, 2/22, 3/22
+ {32768 - 1489, 32768 - 4468, 32768 - 7447, 32768 - 10426, 32768 - 13405,
+ 32768 - 16384, 32768 - 19363, 32768 - 22342, 32768 - 25321,
+ 32768 - 28300, 0, 0},
+ // pmf: 1/22, 2/22, 2/22, 2/22, 2/22, 4/22, 2/22, 2/22, 2/22, 2/22, 1/22
+ {32768 - 1489, 32768 - 4468, 32768 - 7447, 32768 - 10426, 32768 - 13405,
+ 32768 - 19363, 32768 - 22342, 32768 - 25321, 32768 - 28300,
+ 32768 - 31279, 0, 0},
+ };
+ const absl::Time start = absl::Now();
+ int index = 0;
+ for (int i = 0; i < 96; ++i) {
+ for (int j = 0; j < 22; ++j) {
+ for (int k = 0; k < 4; ++k) { // NOLINT(modernize-loop-convert)
+ if (compile_time) {
+ symbols[index++] = reader.ReadSymbol<11>(cdf[k]);
+ } else {
+ symbols[index++] = reader.ReadSymbol(cdf[k], 11);
+ }
+ }
+ }
+ }
+ elapsed_time += absl::Now() - start;
+ }
+ if (compile_time) {
+ printf("TestReadSymbol11CompileTime(%d): %5d us\n", num_runs,
+ static_cast<int>(absl::ToInt64Microseconds(elapsed_time)));
+ } else {
+ printf("TestReadSymbol11(%d): %5d us\n", num_runs,
+ static_cast<int>(absl::ToInt64Microseconds(elapsed_time)));
+ }
+
+ int index = 0;
+ for (int i = 0; i < 96; ++i) {
+ for (int j = 0; j < 22; ++j) { // NOLINT(modernize-loop-convert)
+ for (int k = 0; k < 4; ++k) {
+ ASSERT_EQ(symbols[index++], kSymbols[j][k]);
+ }
+ }
+ }
+}
+
+template <bool compile_time>
+void EntropyDecoderTest::TestReadSymbol12(int num_runs) {
+ static constexpr int kSymbols[24][4] = {{0, 6, 11, 5}, //
+ {1, 7, 10, 4}, //
+ {2, 8, 9, 3}, //
+ {3, 9, 8, 2}, //
+ {4, 10, 7, 1}, //
+ {5, 11, 6, 0}, //
+ {6, 0, 5, 11}, //
+ {7, 1, 4, 10}, //
+ {8, 2, 3, 9}, //
+ {9, 3, 2, 8}, //
+ {10, 4, 1, 7}, //
+ {11, 5, 0, 6}, //
+ {0, 0, 11, 9}, //
+ {2, 1, 10, 7}, //
+ {4, 3, 8, 5}, //
+ {6, 5, 6, 3}, //
+ {8, 7, 4, 1}, //
+ {10, 9, 2, 10}, //
+ {1, 0, 11, 8}, //
+ {3, 2, 9, 6}, //
+ {5, 4, 7, 4}, //
+ {7, 6, 5, 2}, //
+ {9, 8, 3, 6}, //
+ {11, 10, 1, 5}};
+ absl::Duration elapsed_time;
+ int symbols[80 * 24 * 4];
+ for (int run = 0; run < num_runs; ++run) {
+ EntropyDecoder reader(kBytesTestReadSymbol12, kNumBytesTestReadSymbol12,
+ /*allow_update_cdf=*/true);
+ uint16_t cdf[4][13] = {
+ // pmf: 1/12, 1/12, 1/12, 1/12, 1/12, 1/12, 1/12, 1/12, 1/12, 1/12,
+ // 1/12,
+ // 1/12
+ {32768 - 2731, 32768 - 5461, 32768 - 8192, 32768 - 10923, 32768 - 13653,
+ 32768 - 16384, 32768 - 19115, 32768 - 21845, 32768 - 24576,
+ 32768 - 27307, 32768 - 30037, 0, 0},
+ // pmf: 3/24, 2/24, 2/24, 2/24, 2/24, 2/24, 2/24, 2/24, 2/24, 2/24,
+ // 2/24,
+ // 1/24
+ {32768 - 4096, 32768 - 6827, 32768 - 9557, 32768 - 12288, 32768 - 15019,
+ 32768 - 17749, 32768 - 20480, 32768 - 23211, 32768 - 25941,
+ 32768 - 28672, 32768 - 31403, 0, 0},
+ // pmf: 1/24, 2/24, 2/24, 2/24, 2/24, 2/24, 2/24, 2/24, 2/24, 2/24,
+ // 2/24,
+ // 3/24
+ {32768 - 1365, 32768 - 4096, 32768 - 6827, 32768 - 9557, 32768 - 12288,
+ 32768 - 15019, 32768 - 17749, 32768 - 20480, 32768 - 23211,
+ 32768 - 25941, 32768 - 28672, 0, 0},
+ // pmf: 1/24, 2/24, 2/24, 2/24, 2/24, 3/24, 3/24, 2/24, 2/24, 2/24,
+ // 2/24,
+ // 1/24
+ {32768 - 1365, 32768 - 4096, 32768 - 6827, 32768 - 9557, 32768 - 12288,
+ 32768 - 16384, 32768 - 20480, 32768 - 23211, 32768 - 25941,
+ 32768 - 28672, 32768 - 31403, 0, 0},
+ };
+ const absl::Time start = absl::Now();
+ int index = 0;
+ for (int i = 0; i < 80; ++i) {
+ for (int j = 0; j < 24; ++j) {
+ for (int k = 0; k < 4; ++k) { // NOLINT(modernize-loop-convert)
+ if (compile_time) {
+ symbols[index++] = reader.ReadSymbol<12>(cdf[k]);
+ } else {
+ symbols[index++] = reader.ReadSymbol(cdf[k], 12);
+ }
+ }
+ }
+ }
+ elapsed_time += absl::Now() - start;
+ }
+ if (compile_time) {
+ printf("TestReadSymbol12CompileTime(%d): %5d us\n", num_runs,
+ static_cast<int>(absl::ToInt64Microseconds(elapsed_time)));
+ } else {
+ printf("TestReadSymbol12(%d): %5d us\n", num_runs,
+ static_cast<int>(absl::ToInt64Microseconds(elapsed_time)));
+ }
+
+ int index = 0;
+ for (int i = 0; i < 80; ++i) {
+ for (int j = 0; j < 24; ++j) { // NOLINT(modernize-loop-convert)
+ for (int k = 0; k < 4; ++k) {
+ ASSERT_EQ(symbols[index++], kSymbols[j][k]);
+ }
+ }
+ }
+}
+
+template <bool compile_time>
+void EntropyDecoderTest::TestReadSymbol13(int num_runs) {
+ static constexpr int kSymbols[26][4] = {{0, 6, 12, 5}, //
+ {1, 7, 11, 4}, //
+ {2, 8, 10, 3}, //
+ {3, 9, 9, 2}, //
+ {4, 10, 8, 1}, //
+ {5, 11, 7, 0}, //
+ {6, 12, 6, 12}, //
+ {7, 0, 5, 11}, //
+ {8, 1, 4, 10}, //
+ {9, 2, 3, 9}, //
+ {10, 3, 2, 8}, //
+ {11, 4, 1, 7}, //
+ {12, 5, 0, 6}, //
+ {0, 0, 12, 11}, //
+ {2, 1, 10, 9}, //
+ {4, 3, 8, 7}, //
+ {6, 5, 6, 5}, //
+ {8, 7, 4, 3}, //
+ {10, 9, 2, 1}, //
+ {12, 11, 12, 10}, //
+ {1, 0, 11, 8}, //
+ {3, 2, 9, 6}, //
+ {5, 4, 7, 4}, //
+ {7, 6, 5, 2}, //
+ {9, 8, 3, 6}, //
+ {11, 10, 1, 6}};
+ absl::Duration elapsed_time;
+ int symbols[64 * 26 * 4];
+ for (int run = 0; run < num_runs; ++run) {
+ EntropyDecoder reader(kBytesTestReadSymbol13, kNumBytesTestReadSymbol13,
+ /*allow_update_cdf=*/true);
+ uint16_t cdf[4][14] = {
+ // pmf: 1/13, 1/13, 1/13, 1/13, 1/13, 1/13, 1/13, 1/13, 1/13, 1/13,
+ // 1/13, 1/13, 1/13
+ {32768 - 2521, 32768 - 5041, 32768 - 7562, 32768 - 10082, 32768 - 12603,
+ 32768 - 15124, 32768 - 17644, 32768 - 20165, 32768 - 22686,
+ 32768 - 25206, 32768 - 27727, 32768 - 30247, 0, 0},
+ // pmf: 3/26, 2/26, 2/26, 2/26, 2/26, 2/26, 2/26, 2/26, 2/26, 2/26,
+ // 2/26, 2/26, 1/26
+ {32768 - 3781, 32768 - 6302, 32768 - 8822, 32768 - 11343, 32768 - 13863,
+ 32768 - 16384, 32768 - 18905, 32768 - 21425, 32768 - 23946,
+ 32768 - 26466, 32768 - 28987, 32768 - 31508, 0, 0},
+ // pmf: 1/26, 2/26, 2/26, 2/26, 2/26, 2/26, 2/26, 2/26, 2/26, 2/26,
+ // 2/26, 2/26, 3/26
+ {32768 - 1260, 32768 - 3781, 32768 - 6302, 32768 - 8822, 32768 - 11343,
+ 32768 - 13863, 32768 - 16384, 32768 - 18905, 32768 - 21425,
+ 32768 - 23946, 32768 - 26466, 32768 - 28987, 0, 0},
+ // pmf: 1/26, 2/26, 2/26, 2/26, 2/26, 2/26, 4/26, 2/26, 2/26, 2/26,
+ // 2/26, 2/26, 1/26
+ {32768 - 1260, 32768 - 3781, 32768 - 6302, 32768 - 8822, 32768 - 11343,
+ 32768 - 13863, 32768 - 18905, 32768 - 21425, 32768 - 23946,
+ 32768 - 26466, 32768 - 28987, 32768 - 31508, 0, 0},
+ };
+ const absl::Time start = absl::Now();
+ int index = 0;
+ for (int i = 0; i < 64; ++i) {
+ for (int j = 0; j < 26; ++j) {
+ for (int k = 0; k < 4; ++k) { // NOLINT(modernize-loop-convert)
+ if (compile_time) {
+ symbols[index++] = reader.ReadSymbol<13>(cdf[k]);
+ } else {
+ symbols[index++] = reader.ReadSymbol(cdf[k], 13);
+ }
+ }
+ }
+ }
+ elapsed_time += absl::Now() - start;
+ }
+ if (compile_time) {
+ printf("TestReadSymbol13CompileTime(%d): %5d us\n", num_runs,
+ static_cast<int>(absl::ToInt64Microseconds(elapsed_time)));
+ } else {
+ printf("TestReadSymbol13(%d): %5d us\n", num_runs,
+ static_cast<int>(absl::ToInt64Microseconds(elapsed_time)));
+ }
+
+ int index = 0;
+ for (int i = 0; i < 64; ++i) {
+ for (int j = 0; j < 26; ++j) { // NOLINT(modernize-loop-convert)
+ for (int k = 0; k < 4; ++k) {
+ ASSERT_EQ(symbols[index++], kSymbols[j][k]);
+ }
+ }
+ }
+}
+
+template <bool compile_time>
+void EntropyDecoderTest::TestReadSymbol14(int num_runs) {
+ static constexpr int kSymbols[28][4] = {{0, 7, 13, 6}, //
+ {1, 8, 12, 5}, //
+ {2, 9, 11, 4}, //
+ {3, 10, 10, 3}, //
+ {4, 11, 9, 2}, //
+ {5, 12, 8, 1}, //
+ {6, 13, 7, 0}, //
+ {7, 0, 6, 13}, //
+ {8, 1, 5, 12}, //
+ {9, 2, 4, 11}, //
+ {10, 3, 3, 10}, //
+ {11, 4, 2, 9}, //
+ {12, 5, 1, 8}, //
+ {13, 6, 0, 7}, //
+ {0, 0, 13, 11}, //
+ {2, 1, 12, 9}, //
+ {4, 3, 10, 7}, //
+ {6, 5, 8, 5}, //
+ {8, 7, 6, 3}, //
+ {10, 9, 4, 1}, //
+ {12, 11, 2, 12}, //
+ {1, 0, 13, 10}, //
+ {3, 2, 11, 8}, //
+ {5, 4, 9, 6}, //
+ {7, 6, 7, 4}, //
+ {9, 8, 5, 2}, //
+ {11, 10, 3, 7}, //
+ {13, 12, 1, 6}};
+ absl::Duration elapsed_time;
+ int symbols[64 * 28 * 4];
+ for (int run = 0; run < num_runs; ++run) {
+ EntropyDecoder reader(kBytesTestReadSymbol14, kNumBytesTestReadSymbol14,
+ /*allow_update_cdf=*/true);
+ uint16_t cdf[4][15] = {
+ // pmf: 1/14, 1/14, 1/14, 1/14, 1/14, 1/14, 1/14, 1/14, 1/14, 1/14,
+ // 1/14, 1/14, 1/14, 1/14
+ {32768 - 2341, 32768 - 4681, 32768 - 7022, 32768 - 9362, 32768 - 11703,
+ 32768 - 14043, 32768 - 16384, 32768 - 18725, 32768 - 21065,
+ 32768 - 23406, 32768 - 25746, 32768 - 28087, 32768 - 30427, 0, 0},
+ // pmf: 3/28, 2/28, 2/28, 2/28, 2/28, 2/28, 2/28, 2/28, 2/28, 2/28,
+ // 2/28, 2/28, 2/28, 1/28
+ {32768 - 3511, 32768 - 5851, 32768 - 8192, 32768 - 10533, 32768 - 12873,
+ 32768 - 15214, 32768 - 17554, 32768 - 19895, 32768 - 22235,
+ 32768 - 24576, 32768 - 26917, 32768 - 29257, 32768 - 31598, 0, 0},
+ // pmf: 1/28, 2/28, 2/28, 2/28, 2/28, 2/28, 2/28, 2/28, 2/28, 2/28,
+ // 2/28, 2/28, 2/28, 3/28
+ {32768 - 1170, 32768 - 3511, 32768 - 5851, 32768 - 8192, 32768 - 10533,
+ 32768 - 12873, 32768 - 15214, 32768 - 17554, 32768 - 19895,
+ 32768 - 22235, 32768 - 24576, 32768 - 26917, 32768 - 29257, 0, 0},
+ // pmf: 1/28, 2/28, 2/28, 2/28, 2/28, 2/28, 3/28, 3/28, 2/28, 2/28,
+ // 2/28, 2/28, 2/28, 1/28
+ {32768 - 1170, 32768 - 3511, 32768 - 5851, 32768 - 8192, 32768 - 10533,
+ 32768 - 12873, 32768 - 16384, 32768 - 19895, 32768 - 22235,
+ 32768 - 24576, 32768 - 26917, 32768 - 29257, 32768 - 31598, 0, 0},
+ };
+ const absl::Time start = absl::Now();
+ int index = 0;
+ for (int i = 0; i < 64; ++i) {
+ for (int j = 0; j < 28; ++j) {
+ for (int k = 0; k < 4; ++k) { // NOLINT(modernize-loop-convert)
+ if (compile_time) {
+ symbols[index++] = reader.ReadSymbol<14>(cdf[k]);
+ } else {
+ symbols[index++] = reader.ReadSymbol(cdf[k], 14);
+ }
+ }
+ }
+ }
+ elapsed_time += absl::Now() - start;
+ }
+ if (compile_time) {
+ printf("TestReadSymbol14CompileTime(%d): %5d us\n", num_runs,
+ static_cast<int>(absl::ToInt64Microseconds(elapsed_time)));
+ } else {
+ printf("TestReadSymbol14(%d): %5d us\n", num_runs,
+ static_cast<int>(absl::ToInt64Microseconds(elapsed_time)));
+ }
+
+ int index = 0;
+ for (int i = 0; i < 64; ++i) {
+ for (int j = 0; j < 28; ++j) { // NOLINT(modernize-loop-convert)
+ for (int k = 0; k < 4; ++k) {
+ ASSERT_EQ(symbols[index++], kSymbols[j][k]);
+ }
+ }
+ }
+}
+
+template <bool compile_time>
+void EntropyDecoderTest::TestReadSymbol16(int num_runs) {
+ static constexpr int kSymbols[32][4] = {{0, 8, 15, 7}, //
+ {1, 9, 14, 6}, //
+ {2, 10, 13, 5}, //
+ {3, 11, 12, 4}, //
+ {4, 12, 11, 3}, //
+ {5, 13, 10, 2}, //
+ {6, 14, 9, 1}, //
+ {7, 15, 8, 0}, //
+ {8, 0, 7, 15}, //
+ {9, 1, 6, 14}, //
+ {10, 2, 5, 13}, //
+ {11, 3, 4, 12}, //
+ {12, 4, 3, 11}, //
+ {13, 5, 2, 10}, //
+ {14, 6, 1, 9}, //
+ {15, 7, 0, 8}, //
+ {0, 0, 15, 13}, //
+ {2, 1, 14, 11}, //
+ {4, 3, 12, 9}, //
+ {6, 5, 10, 7}, //
+ {8, 7, 8, 5}, //
+ {10, 9, 6, 3}, //
+ {12, 11, 4, 1}, //
+ {14, 13, 2, 14}, //
+ {1, 0, 15, 12}, //
+ {3, 2, 13, 10}, //
+ {5, 4, 11, 8}, //
+ {7, 6, 9, 6}, //
+ {9, 8, 7, 4}, //
+ {11, 10, 5, 2}, //
+ {13, 12, 3, 8}, //
+ {15, 14, 1, 7}};
+ absl::Duration elapsed_time;
+ int symbols[48 * 32 * 4];
+ for (int run = 0; run < num_runs; ++run) {
+ EntropyDecoder reader(kBytesTestReadSymbol16, kNumBytesTestReadSymbol16,
+ /*allow_update_cdf=*/true);
+ uint16_t cdf[4][17] = {
+ // pmf: 1/16, 1/16, 1/16, 1/16, 1/16, 1/16, 1/16, 1/16, 1/16, 1/16,
+ // 1/16, 1/16, 1/16, 1/16, 1/16, 1/16
+ {32768 - 2048, 32768 - 4096, 32768 - 6144, 32768 - 8192, 32768 - 10240,
+ 32768 - 12288, 32768 - 14336, 32768 - 16384, 32768 - 18432,
+ 32768 - 20480, 32768 - 22528, 32768 - 24576, 32768 - 26624,
+ 32768 - 28672, 32768 - 30720, 0, 0},
+ // pmf: 3/32, 2/32, 2/32, 2/32, 2/32, 2/32, 2/32, 2/32, 2/32, 2/32,
+ // 2/32, 2/32, 2/32, 2/32, 2/32, 1/32
+ {32768 - 3072, 32768 - 5120, 32768 - 7168, 32768 - 9216, 32768 - 11264,
+ 32768 - 13312, 32768 - 15360, 32768 - 17408, 32768 - 19456,
+ 32768 - 21504, 32768 - 23552, 32768 - 25600, 32768 - 27648,
+ 32768 - 29696, 32768 - 31744, 0, 0},
+ // pmf: 1/32, 2/32, 2/32, 2/32, 2/32, 2/32, 2/32, 2/32, 2/32, 2/32,
+ // 2/32, 2/32, 2/32, 2/32, 2/32, 3/32
+ {32768 - 1024, 32768 - 3072, 32768 - 5120, 32768 - 7168, 32768 - 9216,
+ 32768 - 11264, 32768 - 13312, 32768 - 15360, 32768 - 17408,
+ 32768 - 19456, 32768 - 21504, 32768 - 23552, 32768 - 25600,
+ 32768 - 27648, 32768 - 29696, 0, 0},
+ // pmf: 1/32, 2/32, 2/32, 2/32, 2/32, 2/32, 2/32, 3/32, 3/32, 2/32,
+ // 2/32, 2/32, 2/32, 2/32, 2/32, 1/32
+ {32768 - 1024, 32768 - 3072, 32768 - 5120, 32768 - 7168, 32768 - 9216,
+ 32768 - 11264, 32768 - 13312, 32768 - 16384, 32768 - 19456,
+ 32768 - 21504, 32768 - 23552, 32768 - 25600, 32768 - 27648,
+ 32768 - 29696, 32768 - 31744, 0, 0},
+ };
+ const absl::Time start = absl::Now();
+ int index = 0;
+ for (int i = 0; i < 48; ++i) {
+ for (int j = 0; j < 32; ++j) {
+ for (int k = 0; k < 4; ++k) { // NOLINT(modernize-loop-convert)
+ if (compile_time) {
+ symbols[index++] = reader.ReadSymbol<16>(cdf[k]);
+ } else {
+ symbols[index++] = reader.ReadSymbol(cdf[k], 16);
+ }
+ }
+ }
+ }
+ elapsed_time += absl::Now() - start;
+ }
+ if (compile_time) {
+ printf("TestReadSymbol16CompileTime(%d): %5d us\n", num_runs,
+ static_cast<int>(absl::ToInt64Microseconds(elapsed_time)));
+ } else {
+ printf("TestReadSymbol16(%d): %5d us\n", num_runs,
+ static_cast<int>(absl::ToInt64Microseconds(elapsed_time)));
+ }
+
+ int index = 0;
+ for (int i = 0; i < 48; ++i) {
+ for (int j = 0; j < 32; ++j) { // NOLINT(modernize-loop-convert)
+ for (int k = 0; k < 4; ++k) {
+ ASSERT_EQ(symbols[index++], kSymbols[j][k]);
+ }
+ }
+ }
+}
+
+TEST_F(EntropyDecoderTest, ReadSymbolBoolean) {
+ TestReadSymbolBoolean</*compile_time=*/false>(1);
+}
+
+TEST_F(EntropyDecoderTest, ReadSymbolBooleanCompileTime) {
+ TestReadSymbolBoolean</*compile_time=*/true>(1);
+}
+
+TEST_F(EntropyDecoderTest, ReadSymbol3) {
+ TestReadSymbol3</*compile_time=*/false>(1);
+}
+
+TEST_F(EntropyDecoderTest, ReadSymbol3CompileTime) {
+ TestReadSymbol3</*compile_time=*/true>(1);
+}
+
+TEST_F(EntropyDecoderTest, ReadSymbol4) {
+ TestReadSymbol4</*compile_time=*/false>(1);
+}
+
+TEST_F(EntropyDecoderTest, ReadSymbol4CompileTime) {
+ TestReadSymbol4</*compile_time=*/true>(1);
+}
+
+TEST_F(EntropyDecoderTest, ReadSymbol5) {
+ TestReadSymbol5</*compile_time=*/false>(1);
+}
+
+TEST_F(EntropyDecoderTest, ReadSymbol5CompileTime) {
+ TestReadSymbol5</*compile_time=*/true>(1);
+}
+
+TEST_F(EntropyDecoderTest, ReadSymbol6) {
+ TestReadSymbol6</*compile_time=*/false>(1);
+}
+
+TEST_F(EntropyDecoderTest, ReadSymbol6CompileTime) {
+ TestReadSymbol6</*compile_time=*/true>(1);
+}
+
+TEST_F(EntropyDecoderTest, ReadSymbol7) {
+ TestReadSymbol7</*compile_time=*/false>(1);
+}
+
+TEST_F(EntropyDecoderTest, ReadSymbol7CompileTime) {
+ TestReadSymbol7</*compile_time=*/true>(1);
+}
+
+TEST_F(EntropyDecoderTest, ReadSymbol8) {
+ TestReadSymbol8</*compile_time=*/false>(1);
+}
+
+TEST_F(EntropyDecoderTest, ReadSymbol8CompileTime) {
+ TestReadSymbol8</*compile_time=*/true>(1);
+}
+
+TEST_F(EntropyDecoderTest, ReadSymbol9) {
+ TestReadSymbol9</*compile_time=*/false>(1);
+}
+
+TEST_F(EntropyDecoderTest, ReadSymbol9CompileTime) {
+ TestReadSymbol9</*compile_time=*/true>(1);
+}
+
+TEST_F(EntropyDecoderTest, ReadSymbol10) {
+ TestReadSymbol10</*compile_time=*/false>(1);
+}
+
+TEST_F(EntropyDecoderTest, ReadSymbol10CompileTime) {
+ TestReadSymbol10</*compile_time=*/true>(1);
+}
+
+TEST_F(EntropyDecoderTest, ReadSymbol11) {
+ TestReadSymbol11</*compile_time=*/false>(1);
+}
+
+TEST_F(EntropyDecoderTest, ReadSymbol11CompileTime) {
+ TestReadSymbol11</*compile_time=*/true>(1);
+}
+
+TEST_F(EntropyDecoderTest, ReadSymbol12) {
+ TestReadSymbol12</*compile_time=*/false>(1);
+}
+
+TEST_F(EntropyDecoderTest, ReadSymbol12CompileTime) {
+ TestReadSymbol12</*compile_time=*/true>(1);
+}
+
+TEST_F(EntropyDecoderTest, ReadSymbol13) {
+ TestReadSymbol13</*compile_time=*/false>(1);
+}
+
+TEST_F(EntropyDecoderTest, ReadSymbol13CompileTime) {
+ TestReadSymbol13</*compile_time=*/true>(1);
+}
+
+TEST_F(EntropyDecoderTest, ReadSymbol14) {
+ TestReadSymbol14</*compile_time=*/false>(1);
+}
+
+TEST_F(EntropyDecoderTest, ReadSymbol14CompileTime) {
+ TestReadSymbol14</*compile_time=*/true>(1);
+}
+
+TEST_F(EntropyDecoderTest, ReadSymbol16) {
+ TestReadSymbol16</*compile_time=*/false>(1);
+}
+
+TEST_F(EntropyDecoderTest, ReadSymbol16CompileTime) {
+ TestReadSymbol16</*compile_time=*/true>(1);
+}
+
+TEST_F(EntropyDecoderTest, DISABLED_Speed) {
+ // compile_time=true is only tested for those symbol_count values that have
+ // an instantiation of the EntropyDecoder::ReadSymbol<symbol_count> template
+ // method.
+ TestReadSymbolBoolean</*compile_time=*/false>(10000);
+ TestReadSymbolBoolean</*compile_time=*/true>(10000);
+ TestReadSymbol3</*compile_time=*/false>(5000);
+ TestReadSymbol3</*compile_time=*/true>(5000);
+ TestReadSymbol4</*compile_time=*/false>(2000);
+ TestReadSymbol4</*compile_time=*/true>(2000);
+ TestReadSymbol5</*compile_time=*/false>(5000);
+ TestReadSymbol5</*compile_time=*/true>(5000);
+ TestReadSymbol6</*compile_time=*/false>(5000);
+ TestReadSymbol6</*compile_time=*/true>(5000);
+ TestReadSymbol7</*compile_time=*/false>(1000);
+ TestReadSymbol7</*compile_time=*/true>(1000);
+ TestReadSymbol8</*compile_time=*/false>(1000);
+ TestReadSymbol8</*compile_time=*/true>(1000);
+ TestReadSymbol9</*compile_time=*/false>(5000);
+ TestReadSymbol9</*compile_time=*/true>(5000);
+ TestReadSymbol10</*compile_time=*/false>(5000);
+ TestReadSymbol10</*compile_time=*/true>(5000);
+ TestReadSymbol11</*compile_time=*/false>(5000);
+ TestReadSymbol11</*compile_time=*/true>(5000);
+ TestReadSymbol12</*compile_time=*/false>(5000);
+ TestReadSymbol12</*compile_time=*/true>(5000);
+ TestReadSymbol13</*compile_time=*/false>(5000);
+ TestReadSymbol13</*compile_time=*/true>(5000);
+ TestReadSymbol14</*compile_time=*/false>(5000);
+ TestReadSymbol14</*compile_time=*/true>(5000);
+ TestReadSymbol16</*compile_time=*/false>(5000);
+ TestReadSymbol16</*compile_time=*/true>(5000);
+}
+
+} // namespace
+} // namespace libgav1
diff --git a/src/utils/entropy_decoder_test_data.inc b/src/utils/entropy_decoder_test_data.inc
new file mode 100644
index 0000000..9050d5e
--- /dev/null
+++ b/src/utils/entropy_decoder_test_data.inc
@@ -0,0 +1,8443 @@
+// Copyright 2021 The libgav1 Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// The kBytesTestReadSymbolBoolean[] array was encoded by using the following
+// libaom code:
+//
+// aom_cdf_prob cdf[4][3] = {
+// { 16384, 0, 0 },
+// { 32768 - 8386, 0, 0 },
+// { 32768 - 24312, 0, 0 },
+// { 16384, 0, 0 },
+// };
+// constexpr int kSymbols[4][4] = { { 0, 0, 1, 1 }, //
+// { 0, 1, 1, 0 }, //
+// { 1, 0, 1, 0 }, //
+// { 1, 0, 0, 1 } };
+// const unsigned int kBufferSize = 65536;
+// uint8_t bw_buffer[kBufferSize];
+// aom_writer bw;
+// bw.allow_update_cdf = 1;
+// aom_start_encode(&bw, bw_buffer);
+// for (int i = 0; i < 1024; ++i) {
+// for (int j = 0; j < 4; ++j) {
+// for (int k = 0; k < 4; ++k) {
+// aom_write_symbol(&bw, kSymbols[j][k], cdf[k], 2);
+// }
+// }
+// }
+// aom_stop_encode(&bw);
+// printf(" constexpr size_t kNumBytesTestReadSymbolBoolean = %u;\n", bw.pos);
+// printf(" constexpr uint8_t kBytesTestReadSymbolBoolean[] = {");
+// int count = 0;
+// for (unsigned int i = 0; i < bw.pos; ++i) {
+// if (count++ % 12 == 0) {
+// printf("\n ");
+// } else {
+// printf(" ");
+// }
+// printf("0x%02x,", bw_buffer[i]);
+// }
+// printf("\n };\n");
+
+constexpr size_t kNumBytesTestReadSymbolBoolean = 1880;
+constexpr uint8_t kBytesTestReadSymbolBoolean[] = {
+ 0x1e, 0xfe, 0x7c, 0xa2, 0x1e, 0xfc, 0xa1, 0x17, 0xee, 0xbf, 0x07, 0x76,
+ 0x2d, 0x11, 0x3a, 0xa5, 0x49, 0x65, 0xbb, 0x83, 0x89, 0x4b, 0xaa, 0x23,
+ 0x29, 0x0d, 0x81, 0x9f, 0x6a, 0xf2, 0x9f, 0x7e, 0x14, 0x9a, 0x86, 0x78,
+ 0x7f, 0xd5, 0x31, 0x14, 0x45, 0x8e, 0xf5, 0xc3, 0x36, 0x63, 0xcb, 0x4f,
+ 0xeb, 0x81, 0x19, 0x75, 0x3c, 0xda, 0x21, 0x71, 0x1d, 0x05, 0x34, 0x7e,
+ 0x43, 0xd4, 0x5b, 0xeb, 0x0a, 0x6d, 0xbe, 0xd2, 0x8f, 0xa5, 0x8f, 0xac,
+ 0x3b, 0x43, 0xb6, 0x8a, 0xf9, 0x86, 0xf7, 0x1a, 0x3c, 0x4b, 0x2b, 0x4c,
+ 0x4c, 0x4a, 0xff, 0xb9, 0x6f, 0x3c, 0xeb, 0xf6, 0x4c, 0xc8, 0x3c, 0x01,
+ 0x5f, 0x12, 0x76, 0x4f, 0x88, 0xa0, 0xa5, 0xe7, 0x1d, 0xb3, 0x97, 0xd8,
+ 0x31, 0x90, 0x8f, 0xd1, 0x46, 0xfd, 0xf7, 0xb1, 0x02, 0x0d, 0xf3, 0x9e,
+ 0xbe, 0xa2, 0xfb, 0xc2, 0x7e, 0xe8, 0x77, 0xff, 0xa8, 0x13, 0x59, 0xcd,
+ 0xba, 0xe7, 0xc2, 0x7e, 0xe8, 0x77, 0xff, 0xa8, 0x0e, 0xc3, 0x7b, 0x63,
+ 0x80, 0xfe, 0x33, 0xe8, 0x30, 0x37, 0xeb, 0xd3, 0x3e, 0x83, 0x03, 0x7e,
+ 0xbd, 0x33, 0xe8, 0x30, 0x37, 0xeb, 0xd3, 0x3e, 0x83, 0x03, 0x7e, 0xbd,
+ 0x33, 0xe8, 0x30, 0x37, 0xeb, 0xd3, 0x3e, 0x83, 0x03, 0x7e, 0xbd, 0x33,
+ 0xe8, 0x30, 0x37, 0xeb, 0xd3, 0x3e, 0x83, 0x03, 0x7e, 0xbd, 0x33, 0xe8,
+ 0x30, 0x37, 0xeb, 0xd3, 0x3e, 0x83, 0x03, 0x7e, 0xbd, 0x33, 0xe8, 0x30,
+ 0x37, 0xeb, 0xd3, 0x3e, 0x83, 0x03, 0x7e, 0xbd, 0x33, 0xe8, 0x30, 0x37,
+ 0xeb, 0xd3, 0x3e, 0x83, 0x03, 0x7e, 0xbd, 0x33, 0xe8, 0x30, 0x37, 0xeb,
+ 0xd3, 0x3e, 0x83, 0x03, 0x7e, 0xbd, 0x33, 0xe8, 0x30, 0x37, 0xeb, 0xd3,
+ 0x3e, 0x83, 0x03, 0x7e, 0xbd, 0x33, 0xe8, 0x30, 0x37, 0xeb, 0xd3, 0x3e,
+ 0x85, 0x13, 0x83, 0xe9, 0x58, 0xaf, 0xe8, 0xff, 0x03, 0xb8, 0xf5, 0x08,
+ 0x63, 0x03, 0xea, 0xe9, 0x3a, 0x39, 0x6d, 0xb6, 0x32, 0xc5, 0xff, 0xf7,
+ 0x19, 0x19, 0x9c, 0x29, 0x3a, 0xc5, 0x87, 0x27, 0x2d, 0xfa, 0x18, 0x96,
+ 0x13, 0x5a, 0xdf, 0xa1, 0x89, 0x61, 0x35, 0xad, 0xfa, 0x18, 0x96, 0x13,
+ 0x5a, 0xdf, 0xa1, 0x89, 0x61, 0x35, 0xad, 0xfa, 0x18, 0x96, 0x13, 0x5a,
+ 0xdf, 0xa1, 0x89, 0x61, 0x35, 0xad, 0xfa, 0x18, 0x96, 0x13, 0x5a, 0xdf,
+ 0xa1, 0x89, 0x61, 0x35, 0xad, 0xfa, 0x18, 0x96, 0x13, 0x5a, 0xdf, 0xa1,
+ 0x89, 0x61, 0x35, 0xad, 0xfa, 0x18, 0x96, 0x13, 0x5a, 0xdf, 0xa1, 0x89,
+ 0x61, 0x35, 0xad, 0xfa, 0x18, 0x96, 0x13, 0x5a, 0xdf, 0xa1, 0x89, 0x61,
+ 0x35, 0xad, 0xfa, 0x18, 0x96, 0x13, 0x5a, 0xdf, 0xa1, 0x89, 0x61, 0x35,
+ 0xad, 0xfa, 0x18, 0x96, 0x13, 0x5a, 0xdf, 0xa1, 0x89, 0x61, 0x35, 0xad,
+ 0xfa, 0x18, 0x96, 0x13, 0x5a, 0xdf, 0xa1, 0x89, 0x61, 0x35, 0xad, 0xfa,
+ 0x18, 0x96, 0x13, 0x5a, 0xdf, 0xa1, 0x89, 0x61, 0x35, 0xad, 0xfa, 0x18,
+ 0x96, 0x13, 0x5a, 0xdf, 0xa1, 0x89, 0x61, 0x35, 0xad, 0xfa, 0x18, 0x96,
+ 0x13, 0x5a, 0xdf, 0xa1, 0x89, 0x61, 0x35, 0xad, 0xfa, 0x18, 0x96, 0x13,
+ 0x5a, 0xdf, 0xa1, 0x89, 0x61, 0x35, 0xad, 0xfa, 0x18, 0x96, 0x13, 0x5a,
+ 0xdf, 0xa1, 0x89, 0x61, 0x35, 0xad, 0xfa, 0x18, 0x96, 0x13, 0x5a, 0xdf,
+ 0xa1, 0x89, 0x61, 0x35, 0xad, 0xfa, 0x18, 0x96, 0x13, 0x5a, 0xdf, 0xa1,
+ 0x89, 0x61, 0x35, 0xad, 0xfa, 0x18, 0x96, 0x13, 0x5a, 0xdf, 0xa1, 0x89,
+ 0x61, 0x35, 0xad, 0xfa, 0x18, 0x96, 0x13, 0x5a, 0xdf, 0xa1, 0x89, 0x61,
+ 0x35, 0xad, 0xfa, 0x18, 0x96, 0x13, 0x5a, 0xdf, 0xa1, 0x89, 0x61, 0x35,
+ 0xad, 0xfa, 0x18, 0x96, 0x13, 0x5a, 0xdf, 0xa1, 0x89, 0x61, 0x35, 0xad,
+ 0xfa, 0x18, 0x96, 0x13, 0x5a, 0xdf, 0xa1, 0x89, 0x61, 0x35, 0xad, 0xfa,
+ 0x18, 0x96, 0x13, 0x5a, 0xdf, 0xa1, 0x89, 0x61, 0x35, 0xad, 0xfa, 0x18,
+ 0x96, 0x13, 0x5a, 0xdf, 0xa1, 0x89, 0x61, 0x35, 0xad, 0xfa, 0x18, 0x96,
+ 0x13, 0x5a, 0xdf, 0xa1, 0x89, 0x61, 0x35, 0xad, 0xfa, 0x18, 0x96, 0x13,
+ 0x5a, 0xdf, 0xa1, 0x89, 0x61, 0x35, 0xad, 0xfa, 0x18, 0x96, 0x13, 0x5a,
+ 0xdf, 0xa1, 0x89, 0x61, 0x35, 0xad, 0xfa, 0x18, 0x96, 0x13, 0x5a, 0xdf,
+ 0xa1, 0x89, 0x61, 0x35, 0xad, 0xfa, 0x18, 0x96, 0x13, 0x5a, 0xdf, 0xa1,
+ 0x89, 0x61, 0x35, 0xad, 0xfa, 0x18, 0x96, 0x13, 0x5a, 0xdf, 0xa1, 0x89,
+ 0x61, 0x35, 0xad, 0xfa, 0x18, 0x96, 0x13, 0x5a, 0xdf, 0xa1, 0x89, 0x61,
+ 0x35, 0xad, 0xfa, 0x18, 0x96, 0x13, 0x5a, 0xdf, 0xa1, 0x89, 0x61, 0x35,
+ 0xad, 0xfa, 0x18, 0x96, 0x13, 0x5a, 0xdf, 0xa1, 0x89, 0x61, 0x35, 0xad,
+ 0xfa, 0x18, 0x96, 0x13, 0x5a, 0xdf, 0xa1, 0x89, 0x61, 0x35, 0xad, 0xfa,
+ 0x18, 0x96, 0x13, 0x5a, 0xdf, 0xa1, 0x89, 0x61, 0x35, 0xad, 0xfa, 0x18,
+ 0x96, 0x13, 0x5a, 0xdf, 0xa1, 0x89, 0x61, 0x35, 0xad, 0xfa, 0x18, 0x96,
+ 0x13, 0x5a, 0xdf, 0xa1, 0x89, 0x61, 0x35, 0xad, 0xfa, 0x18, 0x96, 0x13,
+ 0x5a, 0xdf, 0xa1, 0x89, 0x61, 0x35, 0xad, 0xfa, 0x18, 0x96, 0x13, 0x5a,
+ 0xdf, 0xa1, 0x89, 0x61, 0x35, 0xad, 0xfa, 0x18, 0x96, 0x13, 0x5a, 0xdf,
+ 0xa1, 0x89, 0x61, 0x35, 0xad, 0xfa, 0x18, 0x96, 0x13, 0x5a, 0xdf, 0xa1,
+ 0x89, 0x61, 0x35, 0xad, 0xfa, 0x18, 0x96, 0x13, 0x5a, 0xdf, 0xa1, 0x89,
+ 0x61, 0x35, 0xad, 0xfa, 0x18, 0x96, 0x13, 0x5a, 0xdf, 0xa1, 0x89, 0x61,
+ 0x35, 0xad, 0xfa, 0x18, 0x96, 0x13, 0x5a, 0xdf, 0xa1, 0x89, 0x61, 0x35,
+ 0xad, 0xfa, 0x18, 0x96, 0x13, 0x5a, 0xdf, 0xa1, 0x89, 0x61, 0x35, 0xad,
+ 0xfa, 0x18, 0x96, 0x13, 0x5a, 0xdf, 0xa1, 0x89, 0x61, 0x35, 0xad, 0xfa,
+ 0x18, 0x96, 0x13, 0x5a, 0xdf, 0xa1, 0x89, 0x61, 0x35, 0xad, 0xfa, 0x18,
+ 0x96, 0x13, 0x5a, 0xdf, 0xa1, 0x89, 0x61, 0x35, 0xad, 0xfa, 0x18, 0x96,
+ 0x13, 0x5a, 0xdf, 0xa1, 0x89, 0x61, 0x35, 0xad, 0xfa, 0x18, 0x96, 0x13,
+ 0x5a, 0xdf, 0xa1, 0x89, 0x61, 0x35, 0xad, 0xfa, 0x18, 0x96, 0x13, 0x5a,
+ 0xdf, 0xa1, 0x89, 0x61, 0x35, 0xad, 0xfa, 0x18, 0x96, 0x13, 0x5a, 0xdf,
+ 0xa1, 0x89, 0x61, 0x35, 0xad, 0xfa, 0x18, 0x96, 0x13, 0x5a, 0xdf, 0xa1,
+ 0x89, 0x61, 0x35, 0xad, 0xfa, 0x18, 0x96, 0x13, 0x5a, 0xdf, 0xa1, 0x89,
+ 0x61, 0x35, 0xad, 0xfa, 0x18, 0x96, 0x13, 0x5a, 0xdf, 0xa1, 0x89, 0x61,
+ 0x35, 0xad, 0xfa, 0x18, 0x96, 0x13, 0x5a, 0xdf, 0xa1, 0x89, 0x61, 0x35,
+ 0xad, 0xfa, 0x18, 0x96, 0x13, 0x5a, 0xdf, 0xa1, 0x89, 0x61, 0x35, 0xad,
+ 0xfa, 0x18, 0x96, 0x13, 0x5a, 0xdf, 0xa1, 0x89, 0x61, 0x35, 0xad, 0xfa,
+ 0x18, 0x96, 0x13, 0x5a, 0xdf, 0xa1, 0x89, 0x61, 0x35, 0xad, 0xfa, 0x18,
+ 0x96, 0x13, 0x5a, 0xdf, 0xa1, 0x89, 0x61, 0x35, 0xad, 0xfa, 0x18, 0x96,
+ 0x13, 0x5a, 0xdf, 0xa1, 0x89, 0x61, 0x35, 0xad, 0xfa, 0x18, 0x96, 0x13,
+ 0x5a, 0xdf, 0xa1, 0x89, 0x61, 0x35, 0xad, 0xfa, 0x18, 0x96, 0x13, 0x5a,
+ 0xdf, 0xa1, 0x89, 0x61, 0x35, 0xad, 0xfa, 0x18, 0x96, 0x13, 0x5a, 0xdf,
+ 0xa1, 0x89, 0x61, 0x35, 0xad, 0xfa, 0x18, 0x96, 0x13, 0x5a, 0xdf, 0xa1,
+ 0x89, 0x61, 0x35, 0xad, 0xfa, 0x18, 0x96, 0x13, 0x5a, 0xdf, 0xa1, 0x89,
+ 0x61, 0x35, 0xad, 0xfa, 0x18, 0x96, 0x13, 0x5a, 0xdf, 0xa1, 0x89, 0x61,
+ 0x35, 0xad, 0xfa, 0x18, 0x96, 0x13, 0x5a, 0xdf, 0xa1, 0x89, 0x61, 0x35,
+ 0xad, 0xfa, 0x18, 0x96, 0x13, 0x5a, 0xdf, 0xa1, 0x89, 0x61, 0x35, 0xad,
+ 0xfa, 0x18, 0x96, 0x13, 0x5a, 0xdf, 0xa1, 0x89, 0x61, 0x35, 0xad, 0xfa,
+ 0x18, 0x96, 0x13, 0x5a, 0xdf, 0xa1, 0x89, 0x61, 0x35, 0xad, 0xfa, 0x18,
+ 0x96, 0x13, 0x5a, 0xdf, 0xa1, 0x89, 0x61, 0x35, 0xad, 0xfa, 0x18, 0x96,
+ 0x13, 0x5a, 0xdf, 0xa1, 0x89, 0x61, 0x35, 0xad, 0xfa, 0x18, 0x96, 0x13,
+ 0x5a, 0xdf, 0xa1, 0x89, 0x61, 0x35, 0xad, 0xfa, 0x18, 0x96, 0x13, 0x5a,
+ 0xdf, 0xa1, 0x89, 0x61, 0x35, 0xad, 0xfa, 0x18, 0x96, 0x13, 0x5a, 0xdf,
+ 0xa1, 0x89, 0x61, 0x35, 0xad, 0xfa, 0x18, 0x96, 0x13, 0x5a, 0xdf, 0xa1,
+ 0x89, 0x61, 0x35, 0xad, 0xfa, 0x18, 0x96, 0x13, 0x5a, 0xdf, 0xa1, 0x89,
+ 0x61, 0x35, 0xad, 0xfa, 0x18, 0x96, 0x13, 0x5a, 0xdf, 0xa1, 0x89, 0x61,
+ 0x35, 0xad, 0xfa, 0x18, 0x96, 0x13, 0x5a, 0xdf, 0xa1, 0x89, 0x61, 0x35,
+ 0xad, 0xfa, 0x18, 0x96, 0x13, 0x5a, 0xdf, 0xa1, 0x89, 0x61, 0x35, 0xad,
+ 0xfa, 0x18, 0x96, 0x13, 0x5a, 0xdf, 0xa1, 0x89, 0x61, 0x35, 0xad, 0xfa,
+ 0x18, 0x96, 0x13, 0x5a, 0xdf, 0xa1, 0x89, 0x61, 0x35, 0xad, 0xfa, 0x18,
+ 0x96, 0x13, 0x5a, 0xdf, 0xa1, 0x89, 0x61, 0x35, 0xad, 0xfa, 0x18, 0x96,
+ 0x13, 0x5a, 0xdf, 0xa1, 0x89, 0x61, 0x35, 0xad, 0xfa, 0x18, 0x96, 0x13,
+ 0x5a, 0xdf, 0xa1, 0x89, 0x61, 0x35, 0xad, 0xfa, 0x18, 0x96, 0x13, 0x5a,
+ 0xdf, 0xa1, 0x89, 0x61, 0x35, 0xad, 0xfa, 0x18, 0x96, 0x13, 0x5a, 0xdf,
+ 0xa1, 0x89, 0x61, 0x35, 0xad, 0xfa, 0x18, 0x96, 0x13, 0x5a, 0xdf, 0xa1,
+ 0x89, 0x61, 0x35, 0xad, 0xfa, 0x18, 0x96, 0x13, 0x5a, 0xdf, 0xa1, 0x89,
+ 0x61, 0x35, 0xad, 0xfa, 0x18, 0x96, 0x13, 0x5a, 0xdf, 0xa1, 0x89, 0x61,
+ 0x35, 0xad, 0xfa, 0x18, 0x96, 0x13, 0x5a, 0xdf, 0xa1, 0x89, 0x61, 0x35,
+ 0xad, 0xfa, 0x18, 0x96, 0x13, 0x5a, 0xdf, 0xa1, 0x89, 0x61, 0x35, 0xad,
+ 0xfa, 0x18, 0x96, 0x13, 0x5a, 0xdf, 0xa1, 0x89, 0x61, 0x35, 0xad, 0xfa,
+ 0x18, 0x96, 0x13, 0x5a, 0xdf, 0xa1, 0x89, 0x61, 0x35, 0xad, 0xfa, 0x18,
+ 0x96, 0x13, 0x5a, 0xdf, 0xa1, 0x89, 0x61, 0x35, 0xad, 0xfa, 0x18, 0x96,
+ 0x13, 0x5a, 0xdf, 0xa1, 0x89, 0x61, 0x35, 0xad, 0xfa, 0x18, 0x96, 0x13,
+ 0x5a, 0xdf, 0xa1, 0x89, 0x61, 0x35, 0xad, 0xfa, 0x18, 0x96, 0x13, 0x5a,
+ 0xdf, 0xa1, 0x89, 0x61, 0x35, 0xad, 0xfa, 0x18, 0x96, 0x13, 0x5a, 0xdf,
+ 0xa1, 0x89, 0x61, 0x35, 0xad, 0xfa, 0x18, 0x96, 0x13, 0x5a, 0xdf, 0xa1,
+ 0x89, 0x61, 0x35, 0xad, 0xfa, 0x18, 0x96, 0x13, 0x5a, 0xdf, 0xa1, 0x89,
+ 0x61, 0x35, 0xad, 0xfa, 0x18, 0x96, 0x13, 0x5a, 0xdf, 0xa1, 0x89, 0x61,
+ 0x35, 0xad, 0xfa, 0x18, 0x96, 0x13, 0x5a, 0xdf, 0xa1, 0x89, 0x61, 0x35,
+ 0xad, 0xfa, 0x18, 0x96, 0x13, 0x5a, 0xdf, 0xa1, 0x89, 0x61, 0x35, 0xad,
+ 0xfa, 0x18, 0x96, 0x13, 0x5a, 0xdf, 0xa1, 0x89, 0x61, 0x35, 0xad, 0xfa,
+ 0x18, 0x96, 0x13, 0x5a, 0xdf, 0xa1, 0x89, 0x61, 0x35, 0xad, 0xfa, 0x18,
+ 0x96, 0x13, 0x5a, 0xdf, 0xa1, 0x89, 0x61, 0x35, 0xad, 0xfa, 0x18, 0x96,
+ 0x13, 0x5a, 0xdf, 0xa1, 0x89, 0x61, 0x35, 0xad, 0xfa, 0x18, 0x96, 0x13,
+ 0x5a, 0xdf, 0xa1, 0x89, 0x61, 0x35, 0xad, 0xfa, 0x18, 0x96, 0x13, 0x5a,
+ 0xdf, 0xa1, 0x89, 0x61, 0x35, 0xad, 0xfa, 0x18, 0x96, 0x13, 0x5a, 0xdf,
+ 0xa1, 0x89, 0x61, 0x35, 0xad, 0xfa, 0x18, 0x96, 0x13, 0x5a, 0xdf, 0xa1,
+ 0x89, 0x61, 0x35, 0xad, 0xfa, 0x18, 0x96, 0x13, 0x5a, 0xdf, 0xa1, 0x89,
+ 0x61, 0x35, 0xad, 0xfa, 0x18, 0x96, 0x13, 0x5a, 0xdf, 0xa1, 0x89, 0x61,
+ 0x35, 0xad, 0xfa, 0x18, 0x96, 0x13, 0x5a, 0xdf, 0xa1, 0x89, 0x61, 0x35,
+ 0xad, 0xfa, 0x18, 0x96, 0x13, 0x5a, 0xdf, 0xa1, 0x89, 0x61, 0x35, 0xad,
+ 0xfa, 0x18, 0x96, 0x13, 0x5a, 0xdf, 0xa1, 0x89, 0x61, 0x35, 0xad, 0xfa,
+ 0x18, 0x96, 0x13, 0x5a, 0xdf, 0xa1, 0x89, 0x61, 0x35, 0xad, 0xfa, 0x18,
+ 0x96, 0x13, 0x5a, 0xdf, 0xa1, 0x89, 0x61, 0x35, 0xad, 0xfa, 0x18, 0x96,
+ 0x13, 0x5a, 0xdf, 0xa1, 0x89, 0x61, 0x35, 0xad, 0xfa, 0x18, 0x96, 0x13,
+ 0x5a, 0xdf, 0xa1, 0x89, 0x61, 0x35, 0xad, 0xfa, 0x18, 0x96, 0x13, 0x5a,
+ 0xdf, 0xa1, 0x89, 0x61, 0x35, 0xad, 0xfa, 0x18, 0x96, 0x13, 0x5a, 0xdf,
+ 0xa1, 0x89, 0x61, 0x35, 0xad, 0xfa, 0x18, 0x96, 0x13, 0x5a, 0xdf, 0xa1,
+ 0x89, 0x61, 0x35, 0xad, 0xfa, 0x18, 0x96, 0x13, 0x5a, 0xdf, 0xa1, 0x89,
+ 0x61, 0x35, 0xad, 0xfa, 0x18, 0x96, 0x13, 0x5a, 0xdf, 0xa1, 0x89, 0x61,
+ 0x35, 0xad, 0xfa, 0x18, 0x96, 0x13, 0x5a, 0xdf, 0xa1, 0x89, 0x61, 0x35,
+ 0xad, 0xfa, 0x18, 0x96, 0x13, 0x5a, 0xdf, 0xa1, 0x89, 0x61, 0x35, 0xad,
+ 0xfa, 0x18, 0x96, 0x13, 0x5a, 0xdf, 0xa1, 0x89, 0x61, 0x35, 0xad, 0xfa,
+ 0x18, 0x96, 0x13, 0x5a, 0xdf, 0xa1, 0x89, 0x61, 0x35, 0xad, 0xfa, 0x18,
+ 0x96, 0x13, 0x5a, 0xdf, 0xa1, 0x89, 0x61, 0x35, 0xad, 0xfa, 0x18, 0x96,
+ 0x13, 0x5a, 0xdf, 0xa1, 0x89, 0x61, 0x35, 0xad, 0xfa, 0x18, 0x96, 0x13,
+ 0x5a, 0xdf, 0xa1, 0x89, 0x61, 0x35, 0xad, 0xfa, 0x18, 0x96, 0x13, 0x5a,
+ 0xdf, 0xa1, 0x89, 0x61, 0x35, 0xad, 0xfa, 0x18, 0x96, 0x13, 0x5a, 0xdf,
+ 0xa1, 0x89, 0x61, 0x35, 0xad, 0xfa, 0x18, 0x96, 0x13, 0x5a, 0xdf, 0xa1,
+ 0x89, 0x61, 0x35, 0xad, 0xfa, 0x18, 0x96, 0x13, 0x5a, 0xdf, 0xa1, 0x89,
+ 0x61, 0x35, 0xad, 0xfa, 0x18, 0x96, 0x13, 0x5a, 0xdf, 0xa1, 0x89, 0x61,
+ 0x35, 0xad, 0xfa, 0x18, 0x96, 0x13, 0x5a, 0xdf, 0xa1, 0x89, 0x61, 0x35,
+ 0xad, 0xfa, 0x18, 0x96, 0x13, 0x5a, 0xdf, 0xa1, 0x89, 0x61, 0x35, 0xad,
+ 0xfa, 0x18, 0x96, 0x13, 0x5a, 0xdf, 0xa1, 0x89, 0x61, 0x35, 0xad, 0xfa,
+ 0x18, 0x96, 0x13, 0x5a, 0xdf, 0xa1, 0x89, 0x61, 0x35, 0xad, 0xfa, 0x18,
+ 0x96, 0x13, 0x5a, 0xdf, 0xa1, 0x89, 0x61, 0x35, 0xad, 0xfa, 0x18, 0x96,
+ 0x13, 0x5a, 0xdf, 0xa1, 0x89, 0x61, 0x35, 0xac,
+};
+static_assert(sizeof(kBytesTestReadSymbolBoolean) ==
+ kNumBytesTestReadSymbolBoolean,
+ "");
+
+// The kBytesTestReadSymbol3[] array was encoded by using the following libaom
+// code:
+//
+// aom_cdf_prob cdf[4][4] = {
+// // pdf: 1/3, 1/3, 1/3
+// { 32768 - 10923, 32768 - 21845, 0, 0 },
+// // pdf: 1/6, 2/6, 3/6
+// { 32768 - 5461, 32768 - 16384, 0, 0 },
+// // pdf: 2/6, 3/6, 1/6
+// { 32768 - 10923, 32768 - 27307, 0, 0 },
+// // pdf: 3/6, 1/6, 2/6
+// { 32768 - 16384, 32768 - 21845, 0, 0 },
+// };
+// constexpr int kSymbols[6][4] = { { 0, 2, 1, 2 }, //
+// { 1, 1, 2, 1 }, //
+// { 2, 0, 0, 0 }, //
+// { 0, 2, 0, 2 }, //
+// { 1, 2, 1, 0 }, //
+// { 2, 1, 1, 0 } };
+// const unsigned int kBufferSize = 65536;
+// uint8_t bw_buffer[kBufferSize];
+// aom_writer bw;
+// bw.allow_update_cdf = 1;
+// aom_start_encode(&bw, bw_buffer);
+// for (int i = 0; i < 1024; ++i) {
+// for (int j = 0; j < 6; ++j) {
+// for (int k = 0; k < 4; ++k) {
+// aom_write_symbol(&bw, kSymbols[j][k], cdf[k], 3);
+// }
+// }
+// }
+// aom_stop_encode(&bw);
+// printf(" constexpr size_t kNumBytesTestReadSymbol3 = %u;\n", bw.pos);
+// printf(" constexpr uint8_t kBytesTestReadSymbol3[] = {");
+// int count = 0;
+// for (unsigned int i = 0; i < bw.pos; ++i) {
+// if (count++ % 12 == 0) {
+// printf("\n ");
+// } else {
+// printf(" ");
+// }
+// printf("0x%02x,", bw_buffer[i]);
+// }
+// printf("\n };\n");
+
+constexpr size_t kNumBytesTestReadSymbol3 = 4646;
+constexpr uint8_t kBytesTestReadSymbol3[] = {
+ 0x4a, 0xf9, 0x1a, 0x00, 0xef, 0x80, 0xd4, 0xcd, 0xc2, 0x55, 0x62, 0x76,
+ 0x3a, 0x60, 0x4e, 0xc9, 0x17, 0x91, 0x86, 0xb0, 0xa0, 0xcb, 0xf7, 0x7e,
+ 0x82, 0x1e, 0x92, 0xd9, 0xe5, 0xff, 0xaa, 0x0b, 0xa4, 0xc1, 0xfa, 0x0d,
+ 0xbe, 0x4f, 0x17, 0x4a, 0xfd, 0xee, 0xb6, 0x9b, 0x57, 0x3e, 0xdb, 0x60,
+ 0x19, 0xd2, 0xee, 0x35, 0x39, 0x73, 0xc9, 0x7b, 0x80, 0xc0, 0x9c, 0x9a,
+ 0xe8, 0x0f, 0x8b, 0xb8, 0x99, 0x02, 0xde, 0x68, 0x97, 0xab, 0xee, 0x2c,
+ 0xa0, 0xb1, 0x7b, 0x8e, 0x8a, 0x69, 0xd5, 0xcd, 0x40, 0x43, 0xa9, 0x4c,
+ 0xd5, 0xac, 0x33, 0x70, 0x64, 0x35, 0xa1, 0x18, 0xde, 0x31, 0x21, 0x2b,
+ 0xa1, 0xd2, 0x87, 0x63, 0x41, 0x4d, 0xd9, 0x0e, 0x17, 0xd8, 0x74, 0x19,
+ 0xbc, 0x33, 0xee, 0xd9, 0x21, 0x22, 0x16, 0xbb, 0x1e, 0x14, 0x46, 0xcf,
+ 0xfa, 0xee, 0xa2, 0xa0, 0xc0, 0x6b, 0xc5, 0xf0, 0xd8, 0x23, 0x6d, 0x20,
+ 0xda, 0x75, 0xff, 0x72, 0x3d, 0x41, 0x51, 0x21, 0x23, 0xa0, 0xce, 0xa0,
+ 0x46, 0xb0, 0x1d, 0x3d, 0xaf, 0x64, 0xf8, 0x57, 0xee, 0x81, 0x55, 0x3a,
+ 0xea, 0xd3, 0x3f, 0x96, 0x52, 0x31, 0xe5, 0xb5, 0x70, 0x01, 0x5a, 0xaf,
+ 0xbc, 0x69, 0x7e, 0x43, 0xdd, 0x2f, 0xe2, 0x40, 0xc7, 0x2d, 0x62, 0x8e,
+ 0xf0, 0x2a, 0xc0, 0x06, 0xe7, 0xe0, 0x63, 0x6e, 0x09, 0xa0, 0x57, 0x83,
+ 0x43, 0x5a, 0xe8, 0xb5, 0xc7, 0x1b, 0xf5, 0xe6, 0x3d, 0x19, 0xeb, 0xfa,
+ 0xda, 0x3d, 0x06, 0x3e, 0xa8, 0x96, 0x09, 0xad, 0x1d, 0xac, 0xf6, 0xef,
+ 0xc7, 0x32, 0x2f, 0x45, 0xe0, 0x4f, 0xa6, 0x9c, 0x2f, 0x66, 0x6b, 0xe3,
+ 0x36, 0xcf, 0x36, 0x41, 0xcb, 0xd9, 0xb8, 0xc3, 0x48, 0xf4, 0x18, 0xfa,
+ 0xa2, 0x58, 0x26, 0xb4, 0x76, 0xb3, 0xdb, 0xbf, 0x1c, 0xc8, 0xbd, 0x19,
+ 0xc1, 0x3e, 0x9a, 0x71, 0x85, 0x52, 0x94, 0x82, 0x48, 0x9c, 0x90, 0xcf,
+ 0x2f, 0xa0, 0xd1, 0x4b, 0x73, 0xcf, 0x73, 0xea, 0x89, 0x60, 0x93, 0xd1,
+ 0xda, 0xcf, 0x74, 0x5b, 0xd3, 0x22, 0xf4, 0x67, 0x04, 0xfa, 0x69, 0xc6,
+ 0x15, 0x4a, 0x52, 0x09, 0x22, 0x72, 0x43, 0x3c, 0xbe, 0x83, 0x45, 0x2d,
+ 0xcf, 0x3d, 0xcf, 0xaa, 0x25, 0x82, 0x4f, 0x47, 0x6b, 0x3d, 0xd1, 0x6f,
+ 0x4c, 0x8b, 0xd1, 0x9c, 0x13, 0xe9, 0xa7, 0x18, 0x55, 0x29, 0x48, 0x24,
+ 0x89, 0xc9, 0x0c, 0xf2, 0xfa, 0x0d, 0x14, 0xb7, 0x3c, 0xf7, 0x3e, 0xa8,
+ 0x96, 0x09, 0x3d, 0x1d, 0xac, 0xf7, 0x45, 0xbd, 0x32, 0x2f, 0x46, 0x70,
+ 0x4f, 0xa6, 0x9c, 0x61, 0x54, 0xa5, 0x20, 0x92, 0x27, 0x24, 0x33, 0xcb,
+ 0xe8, 0x34, 0x52, 0xdc, 0xf3, 0xdc, 0xfa, 0xa2, 0x58, 0x24, 0xf4, 0x76,
+ 0xb3, 0xdd, 0x16, 0xf4, 0xc8, 0xbd, 0x19, 0xc1, 0x3e, 0x9a, 0x71, 0x85,
+ 0x52, 0x94, 0x82, 0x48, 0x9c, 0x90, 0xcf, 0x2f, 0xa0, 0xd1, 0x4b, 0x73,
+ 0xcf, 0x73, 0xea, 0x89, 0x60, 0x93, 0xd1, 0xda, 0xcf, 0x74, 0x5b, 0xd3,
+ 0x22, 0xf4, 0x67, 0x04, 0xfa, 0x69, 0xc6, 0x15, 0x4a, 0x52, 0x09, 0x22,
+ 0x72, 0x43, 0x3c, 0xbe, 0x83, 0x45, 0x2d, 0xcf, 0x3d, 0xcf, 0xaa, 0x25,
+ 0x84, 0xaa, 0xde, 0xde, 0xba, 0x7e, 0x90, 0x92, 0xa0, 0xdc, 0xb3, 0x6c,
+ 0xaf, 0xe6, 0x2f, 0xeb, 0xc5, 0x33, 0xe7, 0x77, 0xcf, 0xda, 0xe7, 0x31,
+ 0x57, 0xb2, 0x8f, 0xde, 0x8f, 0x1d, 0xf4, 0xd3, 0x8c, 0xda, 0x94, 0xa4,
+ 0x12, 0xcd, 0xc9, 0x32, 0x6d, 0xf7, 0x2d, 0x0c, 0x2c, 0xf9, 0xd8, 0x0b,
+ 0x48, 0xf3, 0xb3, 0x2e, 0x80, 0xd7, 0x0a, 0xc4, 0x4f, 0x09, 0xfe, 0x84,
+ 0x07, 0xa0, 0xaf, 0x06, 0xa8, 0x6f, 0x7a, 0x56, 0xcf, 0x58, 0x1d, 0xb4,
+ 0xd5, 0x2b, 0x78, 0x19, 0xf8, 0xe5, 0xd6, 0x3e, 0xfc, 0x0d, 0x97, 0xd8,
+ 0x04, 0x10, 0xca, 0xc2, 0x54, 0x27, 0xfa, 0x10, 0x1e, 0x82, 0xbc, 0x1a,
+ 0xa1, 0xbd, 0xe9, 0x5b, 0x3d, 0x60, 0x76, 0xd3, 0x54, 0xad, 0xe0, 0x67,
+ 0xe3, 0x97, 0x58, 0xfb, 0xf0, 0x36, 0x5f, 0x60, 0x10, 0x43, 0x2b, 0x09,
+ 0x50, 0x9f, 0xe8, 0x40, 0x7a, 0x0a, 0xf0, 0x6a, 0x86, 0xf7, 0xa5, 0x6c,
+ 0xf5, 0x81, 0xdb, 0x4d, 0x52, 0xb7, 0x81, 0x9f, 0x8e, 0x5d, 0x63, 0xef,
+ 0xc0, 0xd9, 0x7d, 0x80, 0x41, 0x0c, 0xac, 0x25, 0x42, 0x7f, 0xa1, 0x01,
+ 0xe8, 0x2b, 0xc1, 0xaa, 0x1b, 0xde, 0x95, 0xb3, 0xd6, 0x07, 0x6d, 0x35,
+ 0x4a, 0xde, 0x06, 0x7e, 0x39, 0x75, 0x8f, 0xbf, 0x03, 0x65, 0xf6, 0x01,
+ 0x04, 0x32, 0xb0, 0x95, 0x09, 0xfe, 0x84, 0x07, 0xa0, 0xaf, 0x06, 0xa8,
+ 0x6f, 0x7a, 0x56, 0xcf, 0x58, 0x1d, 0xb4, 0xd5, 0x2b, 0x78, 0x19, 0xf8,
+ 0xe5, 0xd6, 0x3e, 0xfc, 0x0d, 0x97, 0xd8, 0x04, 0x10, 0xca, 0xc2, 0x54,
+ 0x27, 0xfa, 0x10, 0x1e, 0x82, 0xbc, 0x1a, 0xa1, 0xbd, 0xe9, 0x5b, 0x3d,
+ 0x60, 0x76, 0xd3, 0x54, 0xad, 0xe0, 0x67, 0xe3, 0x97, 0x58, 0xfb, 0xf0,
+ 0x36, 0x5f, 0x60, 0x10, 0x43, 0x2b, 0x09, 0x50, 0x9f, 0xe8, 0x40, 0x7a,
+ 0x0a, 0xf0, 0x6a, 0x86, 0xf7, 0xa5, 0x6c, 0xf5, 0x81, 0xdb, 0x4d, 0x52,
+ 0xb7, 0x81, 0x9f, 0x8e, 0x5d, 0x63, 0xef, 0xc0, 0xd9, 0x7d, 0x80, 0x41,
+ 0x0c, 0xac, 0x25, 0x42, 0x7f, 0xa1, 0x01, 0xe8, 0x2b, 0xc1, 0xaa, 0x1b,
+ 0xde, 0x95, 0xb3, 0xd6, 0x07, 0x6d, 0x35, 0x4a, 0xde, 0x06, 0x7e, 0x39,
+ 0x75, 0x8f, 0xbf, 0x03, 0x65, 0xf6, 0x01, 0x04, 0x32, 0xb0, 0x95, 0x09,
+ 0xfe, 0x84, 0x07, 0xa0, 0xaf, 0x06, 0xa8, 0x6f, 0x7a, 0x56, 0xcf, 0x58,
+ 0x1d, 0xb4, 0xd5, 0x2b, 0x78, 0x19, 0xf8, 0xe5, 0xd6, 0x3e, 0xfc, 0x0d,
+ 0x97, 0xd8, 0x04, 0x10, 0xca, 0xc2, 0x54, 0x27, 0xfa, 0x10, 0x1e, 0x82,
+ 0xbc, 0x1a, 0xa1, 0xbd, 0xe9, 0x5b, 0x3d, 0x60, 0x76, 0xd3, 0x54, 0xad,
+ 0xe0, 0x67, 0xe3, 0x97, 0x58, 0xfb, 0xf0, 0x36, 0x5f, 0x60, 0x10, 0x43,
+ 0x2b, 0x09, 0x50, 0x9f, 0xe8, 0x40, 0x7a, 0x0a, 0xf0, 0x6a, 0x86, 0xf7,
+ 0xa5, 0x6c, 0xf5, 0x81, 0xdb, 0x4d, 0x52, 0xb7, 0x81, 0x9f, 0x8e, 0x5d,
+ 0x63, 0xef, 0xc0, 0xd9, 0x7d, 0x80, 0x41, 0x0c, 0xac, 0x25, 0x42, 0x7f,
+ 0xa1, 0x01, 0xe8, 0x2b, 0xc1, 0xaa, 0x1b, 0xde, 0x95, 0xb3, 0xd6, 0x07,
+ 0x6d, 0x35, 0x4a, 0xde, 0x06, 0x7e, 0x39, 0x75, 0x8f, 0xbf, 0x03, 0x65,
+ 0xf6, 0x01, 0x04, 0x32, 0xb0, 0x95, 0x09, 0xfe, 0x84, 0x07, 0xa0, 0xaf,
+ 0x06, 0xa8, 0x6f, 0x7a, 0x56, 0xcf, 0x58, 0x1d, 0xb4, 0xd5, 0x2b, 0x78,
+ 0x19, 0xf8, 0xe5, 0xd6, 0x3e, 0xfc, 0x0d, 0x97, 0xd8, 0x04, 0x10, 0xca,
+ 0xc2, 0x54, 0x27, 0xfa, 0x10, 0x1e, 0x82, 0xbc, 0x1a, 0xa1, 0xbd, 0xe9,
+ 0x5b, 0x3d, 0x60, 0x76, 0xd3, 0x54, 0xad, 0xe0, 0x67, 0xe3, 0x97, 0x58,
+ 0xfb, 0xf0, 0x36, 0x5f, 0x60, 0x10, 0x43, 0x2b, 0x09, 0x50, 0x9f, 0xe8,
+ 0x40, 0x7a, 0x0a, 0xf0, 0x6a, 0x86, 0xf7, 0xa5, 0x6c, 0xf5, 0x81, 0xdb,
+ 0x4d, 0x52, 0xb7, 0x81, 0x9f, 0x8e, 0x5d, 0x63, 0xef, 0xc0, 0xd9, 0x7d,
+ 0x80, 0x41, 0x0c, 0xac, 0x25, 0x42, 0x7f, 0xa1, 0x01, 0xe8, 0x2b, 0xc1,
+ 0xaa, 0x1b, 0xde, 0x95, 0xb3, 0xd6, 0x07, 0x6d, 0x35, 0x4a, 0xde, 0x06,
+ 0x7e, 0x39, 0x75, 0x8f, 0xbf, 0x03, 0x65, 0xf6, 0x01, 0x04, 0x32, 0xb0,
+ 0x95, 0x09, 0xfe, 0x84, 0x07, 0xa0, 0xaf, 0x06, 0xa8, 0x6f, 0x7a, 0x56,
+ 0xcf, 0x58, 0x1d, 0xb4, 0xd5, 0x2b, 0x78, 0x19, 0xf8, 0xe5, 0xd6, 0x3e,
+ 0xfc, 0x0d, 0x97, 0xd8, 0x04, 0x10, 0xca, 0xc2, 0x54, 0x27, 0xfa, 0x10,
+ 0x1e, 0x82, 0xbc, 0x1a, 0xa1, 0xbd, 0xe9, 0x5b, 0x3d, 0x60, 0x76, 0xd3,
+ 0x54, 0xad, 0xe0, 0x67, 0xe3, 0x97, 0x58, 0xfb, 0xf0, 0x36, 0x5f, 0x60,
+ 0x10, 0x43, 0x2b, 0x09, 0x50, 0x9f, 0xe8, 0x40, 0x7a, 0x0a, 0xf0, 0x6a,
+ 0x86, 0xf7, 0xa5, 0x6c, 0xf5, 0x81, 0xdb, 0x4d, 0x52, 0xb7, 0x81, 0x9f,
+ 0x8e, 0x5d, 0x63, 0xef, 0xc0, 0xd9, 0x7d, 0x80, 0x41, 0x0c, 0xac, 0x25,
+ 0x42, 0x7f, 0xa1, 0x01, 0xe8, 0x2b, 0xc1, 0xaa, 0x1b, 0xde, 0x95, 0xb3,
+ 0xd6, 0x07, 0x6d, 0x35, 0x4a, 0xde, 0x06, 0x7e, 0x39, 0x75, 0x8f, 0xbf,
+ 0x03, 0x65, 0xf6, 0x01, 0x04, 0x32, 0xb0, 0x95, 0x09, 0xfe, 0x84, 0x07,
+ 0xa0, 0xaf, 0x06, 0xa8, 0x6f, 0x7a, 0x56, 0xcf, 0x58, 0x1d, 0xb4, 0xd5,
+ 0x2b, 0x78, 0x19, 0xf8, 0xe5, 0xd6, 0x3e, 0xfc, 0x0d, 0x97, 0xd8, 0x04,
+ 0x10, 0xca, 0xc2, 0x54, 0x27, 0xfa, 0x10, 0x1e, 0x82, 0xbc, 0x1a, 0xa1,
+ 0xbd, 0xe9, 0x5b, 0x3d, 0x60, 0x76, 0xd3, 0x54, 0xad, 0xe0, 0x67, 0xe3,
+ 0x97, 0x58, 0xfb, 0xf0, 0x36, 0x5f, 0x60, 0x10, 0x43, 0x2b, 0x09, 0x50,
+ 0x9f, 0xe8, 0x40, 0x7a, 0x0a, 0xf0, 0x6a, 0x86, 0xf7, 0xa5, 0x6c, 0xf5,
+ 0x81, 0xdb, 0x4d, 0x52, 0xb7, 0x81, 0x9f, 0x8e, 0x5d, 0x63, 0xef, 0xc0,
+ 0xd9, 0x7d, 0x80, 0x41, 0x0c, 0xac, 0x25, 0x42, 0x7f, 0xa1, 0x01, 0xe8,
+ 0x2b, 0xc1, 0xaa, 0x1b, 0xde, 0x95, 0xb3, 0xd6, 0x07, 0x6d, 0x35, 0x4a,
+ 0xde, 0x06, 0x7e, 0x39, 0x75, 0x8f, 0xbf, 0x03, 0x65, 0xf6, 0x01, 0x04,
+ 0x32, 0xb0, 0x95, 0x09, 0xfe, 0x84, 0x07, 0xa0, 0xaf, 0x06, 0xa8, 0x6f,
+ 0x7a, 0x56, 0xcf, 0x58, 0x1d, 0xb4, 0xd5, 0x2b, 0x78, 0x19, 0xf8, 0xe5,
+ 0xd6, 0x3e, 0xfc, 0x0d, 0x97, 0xd8, 0x04, 0x10, 0xca, 0xc2, 0x54, 0x27,
+ 0xfa, 0x10, 0x1e, 0x82, 0xbc, 0x1a, 0xa1, 0xbd, 0xe9, 0x5b, 0x3d, 0x60,
+ 0x76, 0xd3, 0x54, 0xad, 0xe0, 0x67, 0xe3, 0x97, 0x58, 0xfb, 0xf0, 0x36,
+ 0x5f, 0x60, 0x10, 0x43, 0x2b, 0x09, 0x50, 0x9f, 0xe8, 0x40, 0x7a, 0x0a,
+ 0xf0, 0x6a, 0x86, 0xf7, 0xa5, 0x6c, 0xf5, 0x81, 0xdb, 0x4d, 0x52, 0xb7,
+ 0x81, 0x9f, 0x8e, 0x5d, 0x63, 0xef, 0xc0, 0xd9, 0x7d, 0x80, 0x41, 0x0c,
+ 0xac, 0x25, 0x42, 0x7f, 0xa1, 0x01, 0xe8, 0x2b, 0xc1, 0xaa, 0x1b, 0xde,
+ 0x95, 0xb3, 0xd6, 0x07, 0x6d, 0x35, 0x4a, 0xde, 0x06, 0x7e, 0x39, 0x75,
+ 0x8f, 0xbf, 0x03, 0x65, 0xf6, 0x01, 0x04, 0x32, 0xb0, 0x95, 0x09, 0xfe,
+ 0x84, 0x07, 0xa0, 0xaf, 0x06, 0xa8, 0x6f, 0x7a, 0x56, 0xcf, 0x58, 0x1d,
+ 0xb4, 0xd5, 0x2b, 0x78, 0x19, 0xf8, 0xe5, 0xd6, 0x3e, 0xfc, 0x0d, 0x97,
+ 0xd8, 0x04, 0x10, 0xca, 0xc2, 0x54, 0x27, 0xfa, 0x10, 0x1e, 0x82, 0xbc,
+ 0x1a, 0xa1, 0xbd, 0xe9, 0x5b, 0x3d, 0x60, 0x76, 0xd3, 0x54, 0xad, 0xe0,
+ 0x67, 0xe3, 0x97, 0x58, 0xfb, 0xf0, 0x36, 0x5f, 0x60, 0x10, 0x43, 0x2b,
+ 0x09, 0x50, 0x9f, 0xe8, 0x40, 0x7a, 0x0a, 0xf0, 0x6a, 0x86, 0xf7, 0xa5,
+ 0x6c, 0xf5, 0x81, 0xdb, 0x4d, 0x52, 0xb7, 0x81, 0x9f, 0x8e, 0x5d, 0x63,
+ 0xef, 0xc0, 0xd9, 0x7d, 0x80, 0x41, 0x0c, 0xac, 0x25, 0x42, 0x7f, 0xa1,
+ 0x01, 0xe8, 0x2b, 0xc1, 0xaa, 0x1b, 0xde, 0x95, 0xb3, 0xd6, 0x07, 0x6d,
+ 0x35, 0x4a, 0xde, 0x06, 0x7e, 0x39, 0x75, 0x8f, 0xbf, 0x03, 0x65, 0xf6,
+ 0x01, 0x04, 0x32, 0xb0, 0x95, 0x09, 0xfe, 0x84, 0x07, 0xa0, 0xaf, 0x06,
+ 0xa8, 0x6f, 0x7a, 0x56, 0xcf, 0x58, 0x1d, 0xb4, 0xd5, 0x2b, 0x78, 0x19,
+ 0xf8, 0xe5, 0xd6, 0x3e, 0xfc, 0x0d, 0x97, 0xd8, 0x04, 0x10, 0xca, 0xc2,
+ 0x54, 0x27, 0xfa, 0x10, 0x1e, 0x82, 0xbc, 0x1a, 0xa1, 0xbd, 0xe9, 0x5b,
+ 0x3d, 0x60, 0x76, 0xd3, 0x54, 0xad, 0xe0, 0x67, 0xe3, 0x97, 0x58, 0xfb,
+ 0xf0, 0x36, 0x5f, 0x60, 0x10, 0x43, 0x2b, 0x09, 0x50, 0x9f, 0xe8, 0x40,
+ 0x7a, 0x0a, 0xf0, 0x6a, 0x86, 0xf7, 0xa5, 0x6c, 0xf5, 0x81, 0xdb, 0x4d,
+ 0x52, 0xb7, 0x81, 0x9f, 0x8e, 0x5d, 0x63, 0xef, 0xc0, 0xd9, 0x7d, 0x80,
+ 0x41, 0x0c, 0xac, 0x25, 0x42, 0x7f, 0xa1, 0x01, 0xe8, 0x2b, 0xc1, 0xaa,
+ 0x1b, 0xde, 0x95, 0xb3, 0xd6, 0x07, 0x6d, 0x35, 0x4a, 0xde, 0x06, 0x7e,
+ 0x39, 0x75, 0x8f, 0xbf, 0x03, 0x65, 0xf6, 0x01, 0x04, 0x32, 0xb0, 0x95,
+ 0x09, 0xfe, 0x84, 0x07, 0xa0, 0xaf, 0x06, 0xa8, 0x6f, 0x7a, 0x56, 0xcf,
+ 0x58, 0x1d, 0xb4, 0xd5, 0x2b, 0x78, 0x19, 0xf8, 0xe5, 0xd6, 0x3e, 0xfc,
+ 0x0d, 0x97, 0xd8, 0x04, 0x10, 0xca, 0xc2, 0x54, 0x27, 0xfa, 0x10, 0x1e,
+ 0x82, 0xbc, 0x1a, 0xa1, 0xbd, 0xe9, 0x5b, 0x3d, 0x60, 0x76, 0xd3, 0x54,
+ 0xad, 0xe0, 0x67, 0xe3, 0x97, 0x58, 0xfb, 0xf0, 0x36, 0x5f, 0x60, 0x10,
+ 0x43, 0x2b, 0x09, 0x50, 0x9f, 0xe8, 0x40, 0x7a, 0x0a, 0xf0, 0x6a, 0x86,
+ 0xf7, 0xa5, 0x6c, 0xf5, 0x81, 0xdb, 0x4d, 0x52, 0xb7, 0x81, 0x9f, 0x8e,
+ 0x5d, 0x63, 0xef, 0xc0, 0xd9, 0x7d, 0x80, 0x41, 0x0c, 0xac, 0x25, 0x42,
+ 0x7f, 0xa1, 0x01, 0xe8, 0x2b, 0xc1, 0xaa, 0x1b, 0xde, 0x95, 0xb3, 0xd6,
+ 0x07, 0x6d, 0x35, 0x4a, 0xde, 0x06, 0x7e, 0x39, 0x75, 0x8f, 0xbf, 0x03,
+ 0x65, 0xf6, 0x01, 0x04, 0x32, 0xb0, 0x95, 0x09, 0xfe, 0x84, 0x07, 0xa0,
+ 0xaf, 0x06, 0xa8, 0x6f, 0x7a, 0x56, 0xcf, 0x58, 0x1d, 0xb4, 0xd5, 0x2b,
+ 0x78, 0x19, 0xf8, 0xe5, 0xd6, 0x3e, 0xfc, 0x0d, 0x97, 0xd8, 0x04, 0x10,
+ 0xca, 0xc2, 0x54, 0x27, 0xfa, 0x10, 0x1e, 0x82, 0xbc, 0x1a, 0xa1, 0xbd,
+ 0xe9, 0x5b, 0x3d, 0x60, 0x76, 0xd3, 0x54, 0xad, 0xe0, 0x67, 0xe3, 0x97,
+ 0x58, 0xfb, 0xf0, 0x36, 0x5f, 0x60, 0x10, 0x43, 0x2b, 0x09, 0x50, 0x9f,
+ 0xe8, 0x40, 0x7a, 0x0a, 0xf0, 0x6a, 0x86, 0xf7, 0xa5, 0x6c, 0xf5, 0x81,
+ 0xdb, 0x4d, 0x52, 0xb7, 0x81, 0x9f, 0x8e, 0x5d, 0x63, 0xef, 0xc0, 0xd9,
+ 0x7d, 0x80, 0x41, 0x0c, 0xac, 0x25, 0x42, 0x7f, 0xa1, 0x01, 0xe8, 0x2b,
+ 0xc1, 0xaa, 0x1b, 0xde, 0x95, 0xb3, 0xd6, 0x07, 0x6d, 0x35, 0x4a, 0xde,
+ 0x06, 0x7e, 0x39, 0x75, 0x8f, 0xbf, 0x03, 0x65, 0xf6, 0x01, 0x04, 0x32,
+ 0xb0, 0x95, 0x09, 0xfe, 0x84, 0x07, 0xa0, 0xaf, 0x06, 0xa8, 0x6f, 0x7a,
+ 0x56, 0xcf, 0x58, 0x1d, 0xb4, 0xd5, 0x2b, 0x78, 0x19, 0xf8, 0xe5, 0xd6,
+ 0x3e, 0xfc, 0x0d, 0x97, 0xd8, 0x04, 0x10, 0xca, 0xc2, 0x54, 0x27, 0xfa,
+ 0x10, 0x1e, 0x82, 0xbc, 0x1a, 0xa1, 0xbd, 0xe9, 0x5b, 0x3d, 0x60, 0x76,
+ 0xd3, 0x54, 0xad, 0xe0, 0x67, 0xe3, 0x97, 0x58, 0xfb, 0xf0, 0x36, 0x5f,
+ 0x60, 0x10, 0x43, 0x2b, 0x09, 0x50, 0x9f, 0xe8, 0x40, 0x7a, 0x0a, 0xf0,
+ 0x6a, 0x86, 0xf7, 0xa5, 0x6c, 0xf5, 0x81, 0xdb, 0x4d, 0x52, 0xb7, 0x81,
+ 0x9f, 0x8e, 0x5d, 0x63, 0xef, 0xc0, 0xd9, 0x7d, 0x80, 0x41, 0x0c, 0xac,
+ 0x25, 0x42, 0x7f, 0xa1, 0x01, 0xe8, 0x2b, 0xc1, 0xaa, 0x1b, 0xde, 0x95,
+ 0xb3, 0xd6, 0x07, 0x6d, 0x35, 0x4a, 0xde, 0x06, 0x7e, 0x39, 0x75, 0x8f,
+ 0xbf, 0x03, 0x65, 0xf6, 0x01, 0x04, 0x32, 0xb0, 0x95, 0x09, 0xfe, 0x84,
+ 0x07, 0xa0, 0xaf, 0x06, 0xa8, 0x6f, 0x7a, 0x56, 0xcf, 0x58, 0x1d, 0xb4,
+ 0xd5, 0x2b, 0x78, 0x19, 0xf8, 0xe5, 0xd6, 0x3e, 0xfc, 0x0d, 0x97, 0xd8,
+ 0x04, 0x10, 0xca, 0xc2, 0x54, 0x27, 0xfa, 0x10, 0x1e, 0x82, 0xbc, 0x1a,
+ 0xa1, 0xbd, 0xe9, 0x5b, 0x3d, 0x60, 0x76, 0xd3, 0x54, 0xad, 0xe0, 0x67,
+ 0xe3, 0x97, 0x58, 0xfb, 0xf0, 0x36, 0x5f, 0x60, 0x10, 0x43, 0x2b, 0x09,
+ 0x50, 0x9f, 0xe8, 0x40, 0x7a, 0x0a, 0xf0, 0x6a, 0x86, 0xf7, 0xa5, 0x6c,
+ 0xf5, 0x81, 0xdb, 0x4d, 0x52, 0xb7, 0x81, 0x9f, 0x8e, 0x5d, 0x63, 0xef,
+ 0xc0, 0xd9, 0x7d, 0x80, 0x41, 0x0c, 0xac, 0x25, 0x42, 0x7f, 0xa1, 0x01,
+ 0xe8, 0x2b, 0xc1, 0xaa, 0x1b, 0xde, 0x95, 0xb3, 0xd6, 0x07, 0x6d, 0x35,
+ 0x4a, 0xde, 0x06, 0x7e, 0x39, 0x75, 0x8f, 0xbf, 0x03, 0x65, 0xf6, 0x01,
+ 0x04, 0x32, 0xb0, 0x95, 0x09, 0xfe, 0x84, 0x07, 0xa0, 0xaf, 0x06, 0xa8,
+ 0x6f, 0x7a, 0x56, 0xcf, 0x58, 0x1d, 0xb4, 0xd5, 0x2b, 0x78, 0x19, 0xf8,
+ 0xe5, 0xd6, 0x3e, 0xfc, 0x0d, 0x97, 0xd8, 0x04, 0x10, 0xca, 0xc2, 0x54,
+ 0x27, 0xfa, 0x10, 0x1e, 0x82, 0xbc, 0x1a, 0xa1, 0xbd, 0xe9, 0x5b, 0x3d,
+ 0x60, 0x76, 0xd3, 0x54, 0xad, 0xe0, 0x67, 0xe3, 0x97, 0x58, 0xfb, 0xf0,
+ 0x36, 0x5f, 0x60, 0x10, 0x43, 0x2b, 0x09, 0x50, 0x9f, 0xe8, 0x40, 0x7a,
+ 0x0a, 0xf0, 0x6a, 0x86, 0xf7, 0xa5, 0x6c, 0xf5, 0x81, 0xdb, 0x4d, 0x52,
+ 0xb7, 0x81, 0x9f, 0x8e, 0x5d, 0x63, 0xef, 0xc0, 0xd9, 0x7d, 0x80, 0x41,
+ 0x0c, 0xac, 0x25, 0x42, 0x7f, 0xa1, 0x01, 0xe8, 0x2b, 0xc1, 0xaa, 0x1b,
+ 0xde, 0x95, 0xb3, 0xd6, 0x07, 0x6d, 0x35, 0x4a, 0xde, 0x06, 0x7e, 0x39,
+ 0x75, 0x8f, 0xbf, 0x03, 0x65, 0xf6, 0x01, 0x04, 0x32, 0xb0, 0x95, 0x09,
+ 0xfe, 0x84, 0x07, 0xa0, 0xaf, 0x06, 0xa8, 0x6f, 0x7a, 0x56, 0xcf, 0x58,
+ 0x1d, 0xb4, 0xd5, 0x2b, 0x78, 0x19, 0xf8, 0xe5, 0xd6, 0x3e, 0xfc, 0x0d,
+ 0x97, 0xd8, 0x04, 0x10, 0xca, 0xc2, 0x54, 0x27, 0xfa, 0x10, 0x1e, 0x82,
+ 0xbc, 0x1a, 0xa1, 0xbd, 0xe9, 0x5b, 0x3d, 0x60, 0x76, 0xd3, 0x54, 0xad,
+ 0xe0, 0x67, 0xe3, 0x97, 0x58, 0xfb, 0xf0, 0x36, 0x5f, 0x60, 0x10, 0x43,
+ 0x2b, 0x09, 0x50, 0x9f, 0xe8, 0x40, 0x7a, 0x0a, 0xf0, 0x6a, 0x86, 0xf7,
+ 0xa5, 0x6c, 0xf5, 0x81, 0xdb, 0x4d, 0x52, 0xb7, 0x81, 0x9f, 0x8e, 0x5d,
+ 0x63, 0xef, 0xc0, 0xd9, 0x7d, 0x80, 0x41, 0x0c, 0xac, 0x25, 0x42, 0x7f,
+ 0xa1, 0x01, 0xe8, 0x2b, 0xc1, 0xaa, 0x1b, 0xde, 0x95, 0xb3, 0xd6, 0x07,
+ 0x6d, 0x35, 0x4a, 0xde, 0x06, 0x7e, 0x39, 0x75, 0x8f, 0xbf, 0x03, 0x65,
+ 0xf6, 0x01, 0x04, 0x32, 0xb0, 0x95, 0x09, 0xfe, 0x84, 0x07, 0xa0, 0xaf,
+ 0x06, 0xa8, 0x6f, 0x7a, 0x56, 0xcf, 0x58, 0x1d, 0xb4, 0xd5, 0x2b, 0x78,
+ 0x19, 0xf8, 0xe5, 0xd6, 0x3e, 0xfc, 0x0d, 0x97, 0xd8, 0x04, 0x10, 0xca,
+ 0xc2, 0x54, 0x27, 0xfa, 0x10, 0x1e, 0x82, 0xbc, 0x1a, 0xa1, 0xbd, 0xe9,
+ 0x5b, 0x3d, 0x60, 0x76, 0xd3, 0x54, 0xad, 0xe0, 0x67, 0xe3, 0x97, 0x58,
+ 0xfb, 0xf0, 0x36, 0x5f, 0x60, 0x10, 0x43, 0x2b, 0x09, 0x50, 0x9f, 0xe8,
+ 0x40, 0x7a, 0x0a, 0xf0, 0x6a, 0x86, 0xf7, 0xa5, 0x6c, 0xf5, 0x81, 0xdb,
+ 0x4d, 0x52, 0xb7, 0x81, 0x9f, 0x8e, 0x5d, 0x63, 0xef, 0xc0, 0xd9, 0x7d,
+ 0x80, 0x41, 0x0c, 0xac, 0x25, 0x42, 0x7f, 0xa1, 0x01, 0xe8, 0x2b, 0xc1,
+ 0xaa, 0x1b, 0xde, 0x95, 0xb3, 0xd6, 0x07, 0x6d, 0x35, 0x4a, 0xde, 0x06,
+ 0x7e, 0x39, 0x75, 0x8f, 0xbf, 0x03, 0x65, 0xf6, 0x01, 0x04, 0x32, 0xb0,
+ 0x95, 0x09, 0xfe, 0x84, 0x07, 0xa0, 0xaf, 0x06, 0xa8, 0x6f, 0x7a, 0x56,
+ 0xcf, 0x58, 0x1d, 0xb4, 0xd5, 0x2b, 0x78, 0x19, 0xf8, 0xe5, 0xd6, 0x3e,
+ 0xfc, 0x0d, 0x97, 0xd8, 0x04, 0x10, 0xca, 0xc2, 0x54, 0x27, 0xfa, 0x10,
+ 0x1e, 0x82, 0xbc, 0x1a, 0xa1, 0xbd, 0xe9, 0x5b, 0x3d, 0x60, 0x76, 0xd3,
+ 0x54, 0xad, 0xe0, 0x67, 0xe3, 0x97, 0x58, 0xfb, 0xf0, 0x36, 0x5f, 0x60,
+ 0x10, 0x43, 0x2b, 0x09, 0x50, 0x9f, 0xe8, 0x40, 0x7a, 0x0a, 0xf0, 0x6a,
+ 0x86, 0xf7, 0xa5, 0x6c, 0xf5, 0x81, 0xdb, 0x4d, 0x52, 0xb7, 0x81, 0x9f,
+ 0x8e, 0x5d, 0x63, 0xef, 0xc0, 0xd9, 0x7d, 0x80, 0x41, 0x0c, 0xac, 0x25,
+ 0x42, 0x7f, 0xa1, 0x01, 0xe8, 0x2b, 0xc1, 0xaa, 0x1b, 0xde, 0x95, 0xb3,
+ 0xd6, 0x07, 0x6d, 0x35, 0x4a, 0xde, 0x06, 0x7e, 0x39, 0x75, 0x8f, 0xbf,
+ 0x03, 0x65, 0xf6, 0x01, 0x04, 0x32, 0xb0, 0x95, 0x09, 0xfe, 0x84, 0x07,
+ 0xa0, 0xaf, 0x06, 0xa8, 0x6f, 0x7a, 0x56, 0xcf, 0x58, 0x1d, 0xb4, 0xd5,
+ 0x2b, 0x78, 0x19, 0xf8, 0xe5, 0xd6, 0x3e, 0xfc, 0x0d, 0x97, 0xd8, 0x04,
+ 0x10, 0xca, 0xc2, 0x54, 0x27, 0xfa, 0x10, 0x1e, 0x82, 0xbc, 0x1a, 0xa1,
+ 0xbd, 0xe9, 0x5b, 0x3d, 0x60, 0x76, 0xd3, 0x54, 0xad, 0xe0, 0x67, 0xe3,
+ 0x97, 0x58, 0xfb, 0xf0, 0x36, 0x5f, 0x60, 0x10, 0x43, 0x2b, 0x09, 0x50,
+ 0x9f, 0xe8, 0x40, 0x7a, 0x0a, 0xf0, 0x6a, 0x86, 0xf7, 0xa5, 0x6c, 0xf5,
+ 0x81, 0xdb, 0x4d, 0x52, 0xb7, 0x81, 0x9f, 0x8e, 0x5d, 0x63, 0xef, 0xc0,
+ 0xd9, 0x7d, 0x80, 0x41, 0x0c, 0xac, 0x25, 0x42, 0x7f, 0xa1, 0x01, 0xe8,
+ 0x2b, 0xc1, 0xaa, 0x1b, 0xde, 0x95, 0xb3, 0xd6, 0x07, 0x6d, 0x35, 0x4a,
+ 0xde, 0x06, 0x7e, 0x39, 0x75, 0x8f, 0xbf, 0x03, 0x65, 0xf6, 0x01, 0x04,
+ 0x32, 0xb0, 0x95, 0x09, 0xfe, 0x84, 0x07, 0xa0, 0xaf, 0x06, 0xa8, 0x6f,
+ 0x7a, 0x56, 0xcf, 0x58, 0x1d, 0xb4, 0xd5, 0x2b, 0x78, 0x19, 0xf8, 0xe5,
+ 0xd6, 0x3e, 0xfc, 0x0d, 0x97, 0xd8, 0x04, 0x10, 0xca, 0xc2, 0x54, 0x27,
+ 0xfa, 0x10, 0x1e, 0x82, 0xbc, 0x1a, 0xa1, 0xbd, 0xe9, 0x5b, 0x3d, 0x60,
+ 0x76, 0xd3, 0x54, 0xad, 0xe0, 0x67, 0xe3, 0x97, 0x58, 0xfb, 0xf0, 0x36,
+ 0x5f, 0x60, 0x10, 0x43, 0x2b, 0x09, 0x50, 0x9f, 0xe8, 0x40, 0x7a, 0x0a,
+ 0xf0, 0x6a, 0x86, 0xf7, 0xa5, 0x6c, 0xf5, 0x81, 0xdb, 0x4d, 0x52, 0xb7,
+ 0x81, 0x9f, 0x8e, 0x5d, 0x63, 0xef, 0xc0, 0xd9, 0x7d, 0x80, 0x41, 0x0c,
+ 0xac, 0x25, 0x42, 0x7f, 0xa1, 0x01, 0xe8, 0x2b, 0xc1, 0xaa, 0x1b, 0xde,
+ 0x95, 0xb3, 0xd6, 0x07, 0x6d, 0x35, 0x4a, 0xde, 0x06, 0x7e, 0x39, 0x75,
+ 0x8f, 0xbf, 0x03, 0x65, 0xf6, 0x01, 0x04, 0x32, 0xb0, 0x95, 0x09, 0xfe,
+ 0x84, 0x07, 0xa0, 0xaf, 0x06, 0xa8, 0x6f, 0x7a, 0x56, 0xcf, 0x58, 0x1d,
+ 0xb4, 0xd5, 0x2b, 0x78, 0x19, 0xf8, 0xe5, 0xd6, 0x3e, 0xfc, 0x0d, 0x97,
+ 0xd8, 0x04, 0x10, 0xca, 0xc2, 0x54, 0x27, 0xfa, 0x10, 0x1e, 0x82, 0xbc,
+ 0x1a, 0xa1, 0xbd, 0xe9, 0x5b, 0x3d, 0x60, 0x76, 0xd3, 0x54, 0xad, 0xe0,
+ 0x67, 0xe3, 0x97, 0x58, 0xfb, 0xf0, 0x36, 0x5f, 0x60, 0x10, 0x43, 0x2b,
+ 0x09, 0x50, 0x9f, 0xe8, 0x40, 0x7a, 0x0a, 0xf0, 0x6a, 0x86, 0xf7, 0xa5,
+ 0x6c, 0xf5, 0x81, 0xdb, 0x4d, 0x52, 0xb7, 0x81, 0x9f, 0x8e, 0x5d, 0x63,
+ 0xef, 0xc0, 0xd9, 0x7d, 0x80, 0x41, 0x0c, 0xac, 0x25, 0x42, 0x7f, 0xa1,
+ 0x01, 0xe8, 0x2b, 0xc1, 0xaa, 0x1b, 0xde, 0x95, 0xb3, 0xd6, 0x07, 0x6d,
+ 0x35, 0x4a, 0xde, 0x06, 0x7e, 0x39, 0x75, 0x8f, 0xbf, 0x03, 0x65, 0xf6,
+ 0x01, 0x04, 0x32, 0xb0, 0x95, 0x09, 0xfe, 0x84, 0x07, 0xa0, 0xaf, 0x06,
+ 0xa8, 0x6f, 0x7a, 0x56, 0xcf, 0x58, 0x1d, 0xb4, 0xd5, 0x2b, 0x78, 0x19,
+ 0xf8, 0xe5, 0xd6, 0x3e, 0xfc, 0x0d, 0x97, 0xd8, 0x04, 0x10, 0xca, 0xc2,
+ 0x54, 0x27, 0xfa, 0x10, 0x1e, 0x82, 0xbc, 0x1a, 0xa1, 0xbd, 0xe9, 0x5b,
+ 0x3d, 0x60, 0x76, 0xd3, 0x54, 0xad, 0xe0, 0x67, 0xe3, 0x97, 0x58, 0xfb,
+ 0xf0, 0x36, 0x5f, 0x60, 0x10, 0x43, 0x2b, 0x09, 0x50, 0x9f, 0xe8, 0x40,
+ 0x7a, 0x0a, 0xf0, 0x6a, 0x86, 0xf7, 0xa5, 0x6c, 0xf5, 0x81, 0xdb, 0x4d,
+ 0x52, 0xb7, 0x81, 0x9f, 0x8e, 0x5d, 0x63, 0xef, 0xc0, 0xd9, 0x7d, 0x80,
+ 0x41, 0x0c, 0xac, 0x25, 0x42, 0x7f, 0xa1, 0x01, 0xe8, 0x2b, 0xc1, 0xaa,
+ 0x1b, 0xde, 0x95, 0xb3, 0xd6, 0x07, 0x6d, 0x35, 0x4a, 0xde, 0x06, 0x7e,
+ 0x39, 0x75, 0x8f, 0xbf, 0x03, 0x65, 0xf6, 0x01, 0x04, 0x32, 0xb0, 0x95,
+ 0x09, 0xfe, 0x84, 0x07, 0xa0, 0xaf, 0x06, 0xa8, 0x6f, 0x7a, 0x56, 0xcf,
+ 0x58, 0x1d, 0xb4, 0xd5, 0x2b, 0x78, 0x19, 0xf8, 0xe5, 0xd6, 0x3e, 0xfc,
+ 0x0d, 0x97, 0xd8, 0x04, 0x10, 0xca, 0xc2, 0x54, 0x27, 0xfa, 0x10, 0x1e,
+ 0x82, 0xbc, 0x1a, 0xa1, 0xbd, 0xe9, 0x5b, 0x3d, 0x60, 0x76, 0xd3, 0x54,
+ 0xad, 0xe0, 0x67, 0xe3, 0x97, 0x58, 0xfb, 0xf0, 0x36, 0x5f, 0x60, 0x10,
+ 0x43, 0x2b, 0x09, 0x50, 0x9f, 0xe8, 0x40, 0x7a, 0x0a, 0xf0, 0x6a, 0x86,
+ 0xf7, 0xa5, 0x6c, 0xf5, 0x81, 0xdb, 0x4d, 0x52, 0xb7, 0x81, 0x9f, 0x8e,
+ 0x5d, 0x63, 0xef, 0xc0, 0xd9, 0x7d, 0x80, 0x41, 0x0c, 0xac, 0x25, 0x42,
+ 0x7f, 0xa1, 0x01, 0xe8, 0x2b, 0xc1, 0xaa, 0x1b, 0xde, 0x95, 0xb3, 0xd6,
+ 0x07, 0x6d, 0x35, 0x4a, 0xde, 0x06, 0x7e, 0x39, 0x75, 0x8f, 0xbf, 0x03,
+ 0x65, 0xf6, 0x01, 0x04, 0x32, 0xb0, 0x95, 0x09, 0xfe, 0x84, 0x07, 0xa0,
+ 0xaf, 0x06, 0xa8, 0x6f, 0x7a, 0x56, 0xcf, 0x58, 0x1d, 0xb4, 0xd5, 0x2b,
+ 0x78, 0x19, 0xf8, 0xe5, 0xd6, 0x3e, 0xfc, 0x0d, 0x97, 0xd8, 0x04, 0x10,
+ 0xca, 0xc2, 0x54, 0x27, 0xfa, 0x10, 0x1e, 0x82, 0xbc, 0x1a, 0xa1, 0xbd,
+ 0xe9, 0x5b, 0x3d, 0x60, 0x76, 0xd3, 0x54, 0xad, 0xe0, 0x67, 0xe3, 0x97,
+ 0x58, 0xfb, 0xf0, 0x36, 0x5f, 0x60, 0x10, 0x43, 0x2b, 0x09, 0x50, 0x9f,
+ 0xe8, 0x40, 0x7a, 0x0a, 0xf0, 0x6a, 0x86, 0xf7, 0xa5, 0x6c, 0xf5, 0x81,
+ 0xdb, 0x4d, 0x52, 0xb7, 0x81, 0x9f, 0x8e, 0x5d, 0x63, 0xef, 0xc0, 0xd9,
+ 0x7d, 0x80, 0x41, 0x0c, 0xac, 0x25, 0x42, 0x7f, 0xa1, 0x01, 0xe8, 0x2b,
+ 0xc1, 0xaa, 0x1b, 0xde, 0x95, 0xb3, 0xd6, 0x07, 0x6d, 0x35, 0x4a, 0xde,
+ 0x06, 0x7e, 0x39, 0x75, 0x8f, 0xbf, 0x03, 0x65, 0xf6, 0x01, 0x04, 0x32,
+ 0xb0, 0x95, 0x09, 0xfe, 0x84, 0x07, 0xa0, 0xaf, 0x06, 0xa8, 0x6f, 0x7a,
+ 0x56, 0xcf, 0x58, 0x1d, 0xb4, 0xd5, 0x2b, 0x78, 0x19, 0xf8, 0xe5, 0xd6,
+ 0x3e, 0xfc, 0x0d, 0x97, 0xd8, 0x04, 0x10, 0xca, 0xc2, 0x54, 0x27, 0xfa,
+ 0x10, 0x1e, 0x82, 0xbc, 0x1a, 0xa1, 0xbd, 0xe9, 0x5b, 0x3d, 0x60, 0x76,
+ 0xd3, 0x54, 0xad, 0xe0, 0x67, 0xe3, 0x97, 0x58, 0xfb, 0xf0, 0x36, 0x5f,
+ 0x60, 0x10, 0x43, 0x2b, 0x09, 0x50, 0x9f, 0xe8, 0x40, 0x7a, 0x0a, 0xf0,
+ 0x6a, 0x86, 0xf7, 0xa5, 0x6c, 0xf5, 0x81, 0xdb, 0x4d, 0x52, 0xb7, 0x81,
+ 0x9f, 0x8e, 0x5d, 0x63, 0xef, 0xc0, 0xd9, 0x7d, 0x80, 0x41, 0x0c, 0xac,
+ 0x25, 0x42, 0x7f, 0xa1, 0x01, 0xe8, 0x2b, 0xc1, 0xaa, 0x1b, 0xde, 0x95,
+ 0xb3, 0xd6, 0x07, 0x6d, 0x35, 0x4a, 0xde, 0x06, 0x7e, 0x39, 0x75, 0x8f,
+ 0xbf, 0x03, 0x65, 0xf6, 0x01, 0x04, 0x32, 0xb0, 0x95, 0x09, 0xfe, 0x84,
+ 0x07, 0xa0, 0xaf, 0x06, 0xa8, 0x6f, 0x7a, 0x56, 0xcf, 0x58, 0x1d, 0xb4,
+ 0xd5, 0x2b, 0x78, 0x19, 0xf8, 0xe5, 0xd6, 0x3e, 0xfc, 0x0d, 0x97, 0xd8,
+ 0x04, 0x10, 0xca, 0xc2, 0x54, 0x27, 0xfa, 0x10, 0x1e, 0x82, 0xbc, 0x1a,
+ 0xa1, 0xbd, 0xe9, 0x5b, 0x3d, 0x60, 0x76, 0xd3, 0x54, 0xad, 0xe0, 0x67,
+ 0xe3, 0x97, 0x58, 0xfb, 0xf0, 0x36, 0x5f, 0x60, 0x10, 0x43, 0x2b, 0x09,
+ 0x50, 0x9f, 0xe8, 0x40, 0x7a, 0x0a, 0xf0, 0x6a, 0x86, 0xf7, 0xa5, 0x6c,
+ 0xf5, 0x81, 0xdb, 0x4d, 0x52, 0xb7, 0x81, 0x9f, 0x8e, 0x5d, 0x63, 0xef,
+ 0xc0, 0xd9, 0x7d, 0x80, 0x41, 0x0c, 0xac, 0x25, 0x42, 0x7f, 0xa1, 0x01,
+ 0xe8, 0x2b, 0xc1, 0xaa, 0x1b, 0xde, 0x95, 0xb3, 0xd6, 0x07, 0x6d, 0x35,
+ 0x4a, 0xde, 0x06, 0x7e, 0x39, 0x75, 0x8f, 0xbf, 0x03, 0x65, 0xf6, 0x01,
+ 0x04, 0x32, 0xb0, 0x95, 0x09, 0xfe, 0x84, 0x07, 0xa0, 0xaf, 0x06, 0xa8,
+ 0x6f, 0x7a, 0x56, 0xcf, 0x58, 0x1d, 0xb4, 0xd5, 0x2b, 0x78, 0x19, 0xf8,
+ 0xe5, 0xd6, 0x3e, 0xfc, 0x0d, 0x97, 0xd8, 0x04, 0x10, 0xca, 0xc2, 0x54,
+ 0x27, 0xfa, 0x10, 0x1e, 0x82, 0xbc, 0x1a, 0xa1, 0xbd, 0xe9, 0x5b, 0x3d,
+ 0x60, 0x76, 0xd3, 0x54, 0xad, 0xe0, 0x67, 0xe3, 0x97, 0x58, 0xfb, 0xf0,
+ 0x36, 0x5f, 0x60, 0x10, 0x43, 0x2b, 0x09, 0x50, 0x9f, 0xe8, 0x40, 0x7a,
+ 0x0a, 0xf0, 0x6a, 0x86, 0xf7, 0xa5, 0x6c, 0xf5, 0x81, 0xdb, 0x4d, 0x52,
+ 0xb7, 0x81, 0x9f, 0x8e, 0x5d, 0x63, 0xef, 0xc0, 0xd9, 0x7d, 0x80, 0x41,
+ 0x0c, 0xac, 0x25, 0x42, 0x7f, 0xa1, 0x01, 0xe8, 0x2b, 0xc1, 0xaa, 0x1b,
+ 0xde, 0x95, 0xb3, 0xd6, 0x07, 0x6d, 0x35, 0x4a, 0xde, 0x06, 0x7e, 0x39,
+ 0x75, 0x8f, 0xbf, 0x03, 0x65, 0xf6, 0x01, 0x04, 0x32, 0xb0, 0x95, 0x09,
+ 0xfe, 0x84, 0x07, 0xa0, 0xaf, 0x06, 0xa8, 0x6f, 0x7a, 0x56, 0xcf, 0x58,
+ 0x1d, 0xb4, 0xd5, 0x2b, 0x78, 0x19, 0xf8, 0xe5, 0xd6, 0x3e, 0xfc, 0x0d,
+ 0x97, 0xd8, 0x04, 0x10, 0xca, 0xc2, 0x54, 0x27, 0xfa, 0x10, 0x1e, 0x82,
+ 0xbc, 0x1a, 0xa1, 0xbd, 0xe9, 0x5b, 0x3d, 0x60, 0x76, 0xd3, 0x54, 0xad,
+ 0xe0, 0x67, 0xe3, 0x97, 0x58, 0xfb, 0xf0, 0x36, 0x5f, 0x60, 0x10, 0x43,
+ 0x2b, 0x09, 0x50, 0x9f, 0xe8, 0x40, 0x7a, 0x0a, 0xf0, 0x6a, 0x86, 0xf7,
+ 0xa5, 0x6c, 0xf5, 0x81, 0xdb, 0x4d, 0x52, 0xb7, 0x81, 0x9f, 0x8e, 0x5d,
+ 0x63, 0xef, 0xc0, 0xd9, 0x7d, 0x80, 0x41, 0x0c, 0xac, 0x25, 0x42, 0x7f,
+ 0xa1, 0x01, 0xe8, 0x2b, 0xc1, 0xaa, 0x1b, 0xde, 0x95, 0xb3, 0xd6, 0x07,
+ 0x6d, 0x35, 0x4a, 0xde, 0x06, 0x7e, 0x39, 0x75, 0x8f, 0xbf, 0x03, 0x65,
+ 0xf6, 0x01, 0x04, 0x32, 0xb0, 0x95, 0x09, 0xfe, 0x84, 0x07, 0xa0, 0xaf,
+ 0x06, 0xa8, 0x6f, 0x7a, 0x56, 0xcf, 0x58, 0x1d, 0xb4, 0xd5, 0x2b, 0x78,
+ 0x19, 0xf8, 0xe5, 0xd6, 0x3e, 0xfc, 0x0d, 0x97, 0xd8, 0x04, 0x10, 0xca,
+ 0xc2, 0x54, 0x27, 0xfa, 0x10, 0x1e, 0x82, 0xbc, 0x1a, 0xa1, 0xbd, 0xe9,
+ 0x5b, 0x3d, 0x60, 0x76, 0xd3, 0x54, 0xad, 0xe0, 0x67, 0xe3, 0x97, 0x58,
+ 0xfb, 0xf0, 0x36, 0x5f, 0x60, 0x10, 0x43, 0x2b, 0x09, 0x50, 0x9f, 0xe8,
+ 0x40, 0x7a, 0x0a, 0xf0, 0x6a, 0x86, 0xf7, 0xa5, 0x6c, 0xf5, 0x81, 0xdb,
+ 0x4d, 0x52, 0xb7, 0x81, 0x9f, 0x8e, 0x5d, 0x63, 0xef, 0xc0, 0xd9, 0x7d,
+ 0x80, 0x41, 0x0c, 0xac, 0x25, 0x42, 0x7f, 0xa1, 0x01, 0xe8, 0x2b, 0xc1,
+ 0xaa, 0x1b, 0xde, 0x95, 0xb3, 0xd6, 0x07, 0x6d, 0x35, 0x4a, 0xde, 0x06,
+ 0x7e, 0x39, 0x75, 0x8f, 0xbf, 0x03, 0x65, 0xf6, 0x01, 0x04, 0x32, 0xb0,
+ 0x95, 0x09, 0xfe, 0x84, 0x07, 0xa0, 0xaf, 0x06, 0xa8, 0x6f, 0x7a, 0x56,
+ 0xcf, 0x58, 0x1d, 0xb4, 0xd5, 0x2b, 0x78, 0x19, 0xf8, 0xe5, 0xd6, 0x3e,
+ 0xfc, 0x0d, 0x97, 0xd8, 0x04, 0x10, 0xca, 0xc2, 0x54, 0x27, 0xfa, 0x10,
+ 0x1e, 0x82, 0xbc, 0x1a, 0xa1, 0xbd, 0xe9, 0x5b, 0x3d, 0x60, 0x76, 0xd3,
+ 0x54, 0xad, 0xe0, 0x67, 0xe3, 0x97, 0x58, 0xfb, 0xf0, 0x36, 0x5f, 0x60,
+ 0x10, 0x43, 0x2b, 0x09, 0x50, 0x9f, 0xe8, 0x40, 0x7a, 0x0a, 0xf0, 0x6a,
+ 0x86, 0xf7, 0xa5, 0x6c, 0xf5, 0x81, 0xdb, 0x4d, 0x52, 0xb7, 0x81, 0x9f,
+ 0x8e, 0x5d, 0x63, 0xef, 0xc0, 0xd9, 0x7d, 0x80, 0x41, 0x0c, 0xac, 0x25,
+ 0x42, 0x7f, 0xa1, 0x01, 0xe8, 0x2b, 0xc1, 0xaa, 0x1b, 0xde, 0x95, 0xb3,
+ 0xd6, 0x07, 0x6d, 0x35, 0x4a, 0xde, 0x06, 0x7e, 0x39, 0x75, 0x8f, 0xbf,
+ 0x03, 0x65, 0xf6, 0x01, 0x04, 0x32, 0xb0, 0x95, 0x09, 0xfe, 0x84, 0x07,
+ 0xa0, 0xaf, 0x06, 0xa8, 0x6f, 0x7a, 0x56, 0xcf, 0x58, 0x1d, 0xb4, 0xd5,
+ 0x2b, 0x78, 0x19, 0xf8, 0xe5, 0xd6, 0x3e, 0xfc, 0x0d, 0x97, 0xd8, 0x04,
+ 0x10, 0xca, 0xc2, 0x54, 0x27, 0xfa, 0x10, 0x1e, 0x82, 0xbc, 0x1a, 0xa1,
+ 0xbd, 0xe9, 0x5b, 0x3d, 0x60, 0x76, 0xd3, 0x54, 0xad, 0xe0, 0x67, 0xe3,
+ 0x97, 0x58, 0xfb, 0xf0, 0x36, 0x5f, 0x60, 0x10, 0x43, 0x2b, 0x09, 0x50,
+ 0x9f, 0xe8, 0x40, 0x7a, 0x0a, 0xf0, 0x6a, 0x86, 0xf7, 0xa5, 0x6c, 0xf5,
+ 0x81, 0xdb, 0x4d, 0x52, 0xb7, 0x81, 0x9f, 0x8e, 0x5d, 0x63, 0xef, 0xc0,
+ 0xd9, 0x7d, 0x80, 0x41, 0x0c, 0xac, 0x25, 0x42, 0x7f, 0xa1, 0x01, 0xe8,
+ 0x2b, 0xc1, 0xaa, 0x1b, 0xde, 0x95, 0xb3, 0xd6, 0x07, 0x6d, 0x35, 0x4a,
+ 0xde, 0x06, 0x7e, 0x39, 0x75, 0x8f, 0xbf, 0x03, 0x65, 0xf6, 0x01, 0x04,
+ 0x32, 0xb0, 0x95, 0x09, 0xfe, 0x84, 0x07, 0xa0, 0xaf, 0x06, 0xa8, 0x6f,
+ 0x7a, 0x56, 0xcf, 0x58, 0x1d, 0xb4, 0xd5, 0x2b, 0x78, 0x19, 0xf8, 0xe5,
+ 0xd6, 0x3e, 0xfc, 0x0d, 0x97, 0xd8, 0x04, 0x10, 0xca, 0xc2, 0x54, 0x27,
+ 0xfa, 0x10, 0x1e, 0x82, 0xbc, 0x1a, 0xa1, 0xbd, 0xe9, 0x5b, 0x3d, 0x60,
+ 0x76, 0xd3, 0x54, 0xad, 0xe0, 0x67, 0xe3, 0x97, 0x58, 0xfb, 0xf0, 0x36,
+ 0x5f, 0x60, 0x10, 0x43, 0x2b, 0x09, 0x50, 0x9f, 0xe8, 0x40, 0x7a, 0x0a,
+ 0xf0, 0x6a, 0x86, 0xf7, 0xa5, 0x6c, 0xf5, 0x81, 0xdb, 0x4d, 0x52, 0xb7,
+ 0x81, 0x9f, 0x8e, 0x5d, 0x63, 0xef, 0xc0, 0xd9, 0x7d, 0x80, 0x41, 0x0c,
+ 0xac, 0x25, 0x42, 0x7f, 0xa1, 0x01, 0xe8, 0x2b, 0xc1, 0xaa, 0x1b, 0xde,
+ 0x95, 0xb3, 0xd6, 0x07, 0x6d, 0x35, 0x4a, 0xde, 0x06, 0x7e, 0x39, 0x75,
+ 0x8f, 0xbf, 0x03, 0x65, 0xf6, 0x01, 0x04, 0x32, 0xb0, 0x95, 0x09, 0xfe,
+ 0x84, 0x07, 0xa0, 0xaf, 0x06, 0xa8, 0x6f, 0x7a, 0x56, 0xcf, 0x58, 0x1d,
+ 0xb4, 0xd5, 0x2b, 0x78, 0x19, 0xf8, 0xe5, 0xd6, 0x3e, 0xfc, 0x0d, 0x97,
+ 0xd8, 0x04, 0x10, 0xca, 0xc2, 0x54, 0x27, 0xfa, 0x10, 0x1e, 0x82, 0xbc,
+ 0x1a, 0xa1, 0xbd, 0xe9, 0x5b, 0x3d, 0x60, 0x76, 0xd3, 0x54, 0xad, 0xe0,
+ 0x67, 0xe3, 0x97, 0x58, 0xfb, 0xf0, 0x36, 0x5f, 0x60, 0x10, 0x43, 0x2b,
+ 0x09, 0x50, 0x9f, 0xe8, 0x40, 0x7a, 0x0a, 0xf0, 0x6a, 0x86, 0xf7, 0xa5,
+ 0x6c, 0xf5, 0x81, 0xdb, 0x4d, 0x52, 0xb7, 0x81, 0x9f, 0x8e, 0x5d, 0x63,
+ 0xef, 0xc0, 0xd9, 0x7d, 0x80, 0x41, 0x0c, 0xac, 0x25, 0x42, 0x7f, 0xa1,
+ 0x01, 0xe8, 0x2b, 0xc1, 0xaa, 0x1b, 0xde, 0x95, 0xb3, 0xd6, 0x07, 0x6d,
+ 0x35, 0x4a, 0xde, 0x06, 0x7e, 0x39, 0x75, 0x8f, 0xbf, 0x03, 0x65, 0xf6,
+ 0x01, 0x04, 0x32, 0xb0, 0x95, 0x09, 0xfe, 0x84, 0x07, 0xa0, 0xaf, 0x06,
+ 0xa8, 0x6f, 0x7a, 0x56, 0xcf, 0x58, 0x1d, 0xb4, 0xd5, 0x2b, 0x78, 0x19,
+ 0xf8, 0xe5, 0xd6, 0x3e, 0xfc, 0x0d, 0x97, 0xd8, 0x04, 0x10, 0xca, 0xc2,
+ 0x54, 0x27, 0xfa, 0x10, 0x1e, 0x82, 0xbc, 0x1a, 0xa1, 0xbd, 0xe9, 0x5b,
+ 0x3d, 0x60, 0x76, 0xd3, 0x54, 0xad, 0xe0, 0x67, 0xe3, 0x97, 0x58, 0xfb,
+ 0xf0, 0x36, 0x5f, 0x60, 0x10, 0x43, 0x2b, 0x09, 0x50, 0x9f, 0xe8, 0x40,
+ 0x7a, 0x0a, 0xf0, 0x6a, 0x86, 0xf7, 0xa5, 0x6c, 0xf5, 0x81, 0xdb, 0x4d,
+ 0x52, 0xb7, 0x81, 0x9f, 0x8e, 0x5d, 0x63, 0xef, 0xc0, 0xd9, 0x7d, 0x80,
+ 0x41, 0x08,
+};
+static_assert(sizeof(kBytesTestReadSymbol3) == kNumBytesTestReadSymbol3, "");
+
+// The kBytesTestReadSymbol4[] array was encoded by using the following libaom
+// code:
+//
+// aom_cdf_prob cdf[4][5] = {
+// // pdf: 1/4, 1/4, 1/4, 1/4
+// { 32768 - 8192, 32768 - 16384, 32768 - 24576, 0, 0 },
+// // pdf: 2/8, 1/8, 2/8, 3/8
+// { 32768 - 8192, 32768 - 12288, 32768 - 20480, 0, 0 },
+// // pdf: 1/4, 1/4, 1/4, 1/4
+// { 32768 - 8192, 32768 - 16384, 32768 - 24576, 0, 0 },
+// // pdf: 2/8, 3/8, 2/8, 1/8
+// { 32768 - 8192, 32768 - 20480, 32768 - 28672, 0, 0 },
+// };
+// constexpr int kSymbols[8][4] = { { 0, 0, 3, 3 }, //
+// { 0, 0, 2, 2 }, //
+// { 1, 1, 0, 0 }, //
+// { 1, 2, 1, 1 }, //
+// { 2, 2, 3, 2 }, //
+// { 2, 3, 2, 1 }, //
+// { 3, 3, 0, 0 }, //
+// { 3, 3, 1, 1 } };
+// const unsigned int kBufferSize = 65536;
+// uint8_t bw_buffer[kBufferSize];
+// aom_writer bw;
+// bw.allow_update_cdf = 1;
+// aom_start_encode(&bw, bw_buffer);
+// for (int i = 0; i < 1024; ++i) {
+// for (int j = 0; j < 8; ++j) {
+// for (int k = 0; k < 4; ++k) {
+// aom_write_symbol(&bw, kSymbols[j][k], cdf[k], 4);
+// }
+// }
+// }
+// aom_stop_encode(&bw);
+// printf(" constexpr size_t kNumBytesTestReadSymbol4 = %u;\n", bw.pos);
+// printf(" constexpr uint8_t kBytesTestReadSymbol4[] = {");
+// int count = 0;
+// for (unsigned int i = 0; i < bw.pos; ++i) {
+// if (count++ % 12 == 0) {
+// printf("\n ");
+// } else {
+// printf(" ");
+// }
+// printf("0x%02x,", bw_buffer[i]);
+// }
+// printf("\n };\n");
+
+constexpr size_t kNumBytesTestReadSymbol4 = 8055;
+constexpr uint8_t kBytesTestReadSymbol4[] = {
+ 0x0f, 0x9b, 0x2a, 0xf6, 0x38, 0x26, 0xa1, 0xd1, 0x82, 0x5f, 0x34, 0xb5,
+ 0xc7, 0xda, 0x9c, 0xd8, 0x8d, 0x4b, 0xbc, 0x5c, 0x0b, 0x8a, 0x7f, 0x6c,
+ 0x46, 0x3f, 0xa2, 0x03, 0xee, 0x1f, 0xea, 0x25, 0xc7, 0xb7, 0xe2, 0xc9,
+ 0x51, 0x0f, 0x7c, 0x0c, 0xe3, 0x7d, 0x7b, 0xe4, 0xbe, 0xde, 0x41, 0x5c,
+ 0x5a, 0xcf, 0xe6, 0x12, 0x50, 0x7b, 0xcc, 0x83, 0x76, 0x61, 0x03, 0x3a,
+ 0x1e, 0x1b, 0xf8, 0x9d, 0x08, 0x96, 0x98, 0x0f, 0x16, 0xac, 0x7c, 0x25,
+ 0x6c, 0xd1, 0xe8, 0xd8, 0xd6, 0x1c, 0xbd, 0x48, 0xa5, 0x3f, 0xd3, 0x21,
+ 0x4c, 0x4e, 0x94, 0xe3, 0xe3, 0xed, 0x30, 0x70, 0xdb, 0x2e, 0x95, 0xd5,
+ 0x7f, 0xfe, 0xed, 0x0e, 0x73, 0xe3, 0x29, 0x09, 0x5f, 0xe3, 0x0e, 0xa6,
+ 0xe7, 0xc6, 0x52, 0x12, 0xba, 0xdb, 0xb5, 0x63, 0xd9, 0xd8, 0xa4, 0x25,
+ 0x75, 0xb7, 0x6a, 0xc7, 0xb3, 0xad, 0x88, 0x46, 0x64, 0x3a, 0x36, 0xb1,
+ 0x2f, 0xb1, 0x03, 0xdb, 0x88, 0x74, 0x6d, 0x62, 0x5f, 0x62, 0x07, 0xb7,
+ 0x10, 0xe8, 0xda, 0xc6, 0x1d, 0x6e, 0x8e, 0x12, 0x58, 0x6e, 0x98, 0x4c,
+ 0xa1, 0x23, 0xc0, 0x9b, 0xb0, 0xdd, 0x31, 0xef, 0x64, 0xf0, 0x91, 0x37,
+ 0x61, 0xba, 0x63, 0xde, 0xc9, 0xe1, 0x22, 0x6e, 0xc3, 0x74, 0xc7, 0xea,
+ 0xcb, 0x70, 0xf6, 0xe2, 0x1d, 0x1b, 0x6c, 0xd5, 0x4f, 0x91, 0xc2, 0x4b,
+ 0x0a, 0xeb, 0xb3, 0x0d, 0x59, 0x39, 0x13, 0x76, 0x15, 0xd7, 0x66, 0x1a,
+ 0xf2, 0x72, 0x26, 0xec, 0x05, 0x3e, 0xcc, 0x31, 0x3e, 0x60, 0x4d, 0xd8,
+ 0x0a, 0x7d, 0x98, 0x62, 0x7c, 0xc0, 0xcc, 0x5a, 0x24, 0xc8, 0xa6, 0xda,
+ 0xe3, 0x09, 0x35, 0x70, 0x9c, 0x4c, 0x85, 0xac, 0x6f, 0x8b, 0x76, 0x30,
+ 0xcc, 0x6f, 0xcb, 0x3e, 0x36, 0xd6, 0xec, 0x61, 0x98, 0xdf, 0x99, 0xa5,
+ 0x7e, 0x2d, 0xd8, 0xc3, 0x31, 0xbf, 0x33, 0x4a, 0xfc, 0x5b, 0xb1, 0x86,
+ 0x63, 0x7e, 0x66, 0x95, 0xf8, 0xb7, 0x63, 0x0c, 0xc6, 0xfc, 0xcd, 0x2b,
+ 0xf1, 0x6e, 0xc6, 0x19, 0x8d, 0xf9, 0x9a, 0x57, 0xe2, 0xdd, 0x8c, 0x33,
+ 0x1b, 0xf3, 0x34, 0xaf, 0xc5, 0xbb, 0x18, 0x66, 0x37, 0xe6, 0x69, 0x5f,
+ 0x8b, 0x76, 0x30, 0xcc, 0x6f, 0xcc, 0xd2, 0xbf, 0x16, 0xec, 0x61, 0x98,
+ 0xdf, 0x99, 0xa5, 0x7e, 0x2d, 0xd1, 0x27, 0xb1, 0xbf, 0x30, 0x0b, 0xfc,
+ 0x5b, 0xa2, 0x4f, 0x63, 0xa0, 0x9b, 0x7a, 0xb6, 0xb7, 0x44, 0x9e, 0xc7,
+ 0x41, 0x36, 0xf5, 0x6d, 0x6e, 0x89, 0x3d, 0x8e, 0x82, 0x6d, 0xea, 0xda,
+ 0xdd, 0x12, 0x7b, 0x1d, 0x04, 0xdb, 0xd5, 0xb5, 0xba, 0x24, 0xf6, 0x3a,
+ 0x09, 0xb7, 0xab, 0x6b, 0x74, 0x49, 0xec, 0x74, 0x13, 0x6f, 0x56, 0xd6,
+ 0xe8, 0x93, 0xd8, 0xe8, 0x26, 0xde, 0xad, 0xad, 0xd1, 0x27, 0xb1, 0xd0,
+ 0x4d, 0xbd, 0x5b, 0x5b, 0xa2, 0x4f, 0x63, 0xa0, 0x9b, 0x7a, 0xb6, 0xb7,
+ 0x44, 0x9e, 0xc7, 0x41, 0x36, 0xf5, 0x6d, 0x6e, 0x89, 0x3d, 0x8e, 0x82,
+ 0x6d, 0xea, 0xda, 0xdd, 0x12, 0x7b, 0x1d, 0x04, 0xdb, 0xd5, 0xb5, 0xba,
+ 0x24, 0xf6, 0x3a, 0x09, 0xb7, 0xab, 0x6b, 0x74, 0x49, 0xec, 0x74, 0x13,
+ 0x6f, 0x56, 0xd6, 0xdf, 0x45, 0xaa, 0x16, 0xb7, 0xb7, 0x14, 0x09, 0xdb,
+ 0x9f, 0x17, 0x97, 0xae, 0xa1, 0xbe, 0x34, 0x9d, 0x0e, 0x01, 0x9f, 0xdb,
+ 0x16, 0xa9, 0x6a, 0x63, 0xf2, 0x9f, 0x5b, 0x3b, 0x0b, 0xae, 0x17, 0xd6,
+ 0x4d, 0x75, 0x8f, 0xe3, 0xf0, 0xe2, 0x90, 0xdc, 0x27, 0x2a, 0x94, 0x2c,
+ 0x94, 0x8f, 0x59, 0x61, 0xcd, 0xa3, 0xfb, 0x0d, 0xdf, 0xf8, 0xc9, 0x96,
+ 0x6f, 0x4f, 0xf6, 0x33, 0x8e, 0x8c, 0x0c, 0x3e, 0x0a, 0xaf, 0x56, 0x8f,
+ 0x24, 0xa8, 0xcd, 0x3d, 0x44, 0x97, 0xac, 0x07, 0x4c, 0x2a, 0xba, 0x80,
+ 0xc7, 0x1d, 0x6c, 0xec, 0xb3, 0xb7, 0xe0, 0x00, 0xe2, 0x37, 0xf8, 0x6f,
+ 0xfd, 0xba, 0x4e, 0x1a, 0xa5, 0x56, 0x54, 0x03, 0x9c, 0x25, 0xeb, 0x4d,
+ 0x7a, 0x15, 0x6d, 0x45, 0x02, 0x76, 0xe6, 0x66, 0xc2, 0x24, 0x4a, 0x7a,
+ 0x7f, 0x3d, 0xe3, 0x6b, 0x55, 0x4b, 0x4c, 0xdc, 0x7d, 0x85, 0x3f, 0x53,
+ 0x8e, 0x9a, 0x82, 0x1c, 0x52, 0x1b, 0x84, 0xe5, 0x52, 0x85, 0x92, 0x91,
+ 0xeb, 0x2c, 0x39, 0xb4, 0x7f, 0x61, 0xbb, 0xff, 0x19, 0x32, 0xcd, 0xe9,
+ 0xfe, 0xc6, 0x71, 0xd1, 0x81, 0x87, 0xc1, 0x55, 0xea, 0xd1, 0xe4, 0x95,
+ 0x19, 0xa7, 0xa8, 0x92, 0xf5, 0x80, 0xe9, 0x85, 0x57, 0x50, 0x18, 0xe3,
+ 0xad, 0x9d, 0x96, 0x76, 0xfc, 0x00, 0x1c, 0x46, 0xff, 0x0d, 0xff, 0xb7,
+ 0x49, 0xc3, 0x54, 0xaa, 0xca, 0x80, 0x73, 0x84, 0xbd, 0x69, 0xaf, 0x42,
+ 0xad, 0xa8, 0xa0, 0x4e, 0xdc, 0xcc, 0xd8, 0x44, 0x89, 0x4f, 0x4f, 0xe7,
+ 0xbc, 0x6d, 0x6a, 0xa9, 0x69, 0x9b, 0x8f, 0xb0, 0xa7, 0xea, 0x71, 0xd3,
+ 0x50, 0x43, 0x8a, 0x43, 0x70, 0x9c, 0xaa, 0x50, 0xb2, 0x52, 0x3d, 0x65,
+ 0x87, 0x36, 0x8f, 0xec, 0x37, 0x7f, 0xe3, 0x26, 0x59, 0xbd, 0x3f, 0xd8,
+ 0xce, 0x3a, 0x30, 0x30, 0xf8, 0x2a, 0xbd, 0x5a, 0x3c, 0x92, 0xa3, 0x34,
+ 0xf5, 0x12, 0x5e, 0xb0, 0x1d, 0x30, 0xaa, 0xea, 0x03, 0x1c, 0x75, 0xb3,
+ 0xb2, 0xce, 0xdf, 0x80, 0x03, 0x88, 0xdf, 0xe1, 0xbf, 0xf6, 0xe9, 0x38,
+ 0x6a, 0x95, 0x59, 0x50, 0x0e, 0x70, 0x97, 0xad, 0x35, 0xe8, 0x55, 0xb5,
+ 0x14, 0x09, 0xdb, 0x99, 0x9b, 0x08, 0x91, 0x29, 0xe9, 0xfc, 0xf7, 0x8d,
+ 0xad, 0x55, 0x2d, 0x33, 0x71, 0xf6, 0x14, 0xfd, 0x4e, 0x3a, 0x6a, 0x08,
+ 0x71, 0x48, 0x6e, 0x13, 0x95, 0x4a, 0x16, 0x4a, 0x47, 0xac, 0xb0, 0xe6,
+ 0xd1, 0xfd, 0x86, 0xef, 0xfc, 0x64, 0xcb, 0x37, 0xa7, 0xfb, 0x19, 0xc7,
+ 0x46, 0x06, 0x1f, 0x05, 0x57, 0xab, 0x47, 0x92, 0x54, 0x66, 0x9e, 0xa2,
+ 0x4b, 0xd6, 0x03, 0xa6, 0x15, 0x5d, 0x40, 0x63, 0x8e, 0xb6, 0x76, 0x59,
+ 0xdb, 0xf0, 0x00, 0x71, 0x1b, 0xfc, 0x37, 0xfe, 0xdd, 0x27, 0x0d, 0x52,
+ 0xab, 0x2a, 0x01, 0xce, 0x12, 0xf5, 0xa6, 0xbd, 0x0a, 0xb6, 0xa2, 0x81,
+ 0x3b, 0x73, 0x33, 0x61, 0x12, 0x25, 0x3d, 0x3f, 0x9e, 0xf1, 0xb5, 0xaa,
+ 0xa5, 0xa6, 0x6e, 0x3e, 0xc2, 0x9f, 0xa9, 0xc7, 0x4d, 0x41, 0x0e, 0x29,
+ 0x0d, 0xc2, 0x72, 0xa9, 0x42, 0xc9, 0x48, 0xf5, 0x96, 0x1c, 0xda, 0x3f,
+ 0xb0, 0xdd, 0xff, 0x8c, 0x99, 0x66, 0xf4, 0xff, 0x63, 0x38, 0xe8, 0xc0,
+ 0xc3, 0xe0, 0xaa, 0xf5, 0x68, 0xf2, 0x4a, 0x8c, 0xd3, 0xd4, 0x49, 0x7a,
+ 0xc0, 0x74, 0xc2, 0xab, 0xa8, 0x0c, 0x71, 0xd6, 0xce, 0xcb, 0x3b, 0x7e,
+ 0x00, 0x0e, 0x23, 0x7f, 0x86, 0xff, 0xdb, 0xa4, 0xe1, 0xaa, 0x55, 0x65,
+ 0x40, 0x39, 0xc2, 0x5e, 0xb4, 0xd7, 0xa1, 0x56, 0xd4, 0x50, 0x27, 0x6e,
+ 0x66, 0x6c, 0x22, 0x44, 0xa7, 0xa7, 0xf3, 0xde, 0x36, 0xb5, 0x54, 0xb4,
+ 0xcd, 0xc7, 0xd8, 0x53, 0xf5, 0x38, 0xe9, 0xa8, 0x21, 0xc5, 0x21, 0xb8,
+ 0x4e, 0x55, 0x28, 0x59, 0x29, 0x1e, 0xb2, 0xc3, 0x9b, 0x47, 0xf6, 0x1b,
+ 0xbf, 0xf1, 0x93, 0x2c, 0xde, 0x9f, 0xec, 0x67, 0x1d, 0x18, 0x18, 0x7c,
+ 0x15, 0x5e, 0xad, 0x1e, 0x49, 0x51, 0x9a, 0x7a, 0x89, 0x2f, 0x58, 0x0e,
+ 0x98, 0x55, 0x75, 0x01, 0x8e, 0x3a, 0xd9, 0xd9, 0x67, 0x6f, 0xc0, 0x01,
+ 0xc4, 0x6f, 0xf0, 0xdf, 0xfb, 0x74, 0x9c, 0x35, 0x4a, 0xac, 0xa8, 0x07,
+ 0x38, 0x4b, 0xd6, 0x9a, 0xf4, 0x2a, 0xda, 0x8a, 0x04, 0xed, 0xcc, 0xcd,
+ 0x84, 0x48, 0x94, 0xf4, 0xfe, 0x7b, 0xc6, 0xd6, 0xaa, 0x96, 0x99, 0xb8,
+ 0xfb, 0x0a, 0x7e, 0xa7, 0x1d, 0x35, 0x04, 0x38, 0xa4, 0x37, 0x09, 0xca,
+ 0xa5, 0x0b, 0x25, 0x23, 0xd6, 0x58, 0x73, 0x68, 0xfe, 0xc3, 0x77, 0xfe,
+ 0x32, 0x65, 0x9b, 0xd3, 0xfd, 0x8c, 0xe3, 0xa3, 0x03, 0x0f, 0x82, 0xab,
+ 0xd5, 0xa3, 0xc9, 0x2a, 0x33, 0x4f, 0x51, 0x25, 0xeb, 0x01, 0xd3, 0x0a,
+ 0xae, 0xa0, 0x31, 0xc7, 0x5b, 0x3b, 0x2c, 0xed, 0xf8, 0x00, 0x38, 0x8d,
+ 0xfe, 0x1b, 0xff, 0x6e, 0x93, 0x86, 0xa9, 0x55, 0x95, 0x00, 0xe7, 0x09,
+ 0x7a, 0xd3, 0x5e, 0x85, 0x5b, 0x51, 0x40, 0x9d, 0xb9, 0x99, 0xb0, 0x89,
+ 0x12, 0x9e, 0x9f, 0xcf, 0x78, 0xda, 0xd5, 0x52, 0xd3, 0x37, 0x1f, 0x61,
+ 0x4f, 0xd4, 0xe3, 0xa6, 0xa0, 0x87, 0x14, 0x86, 0xe1, 0x39, 0x54, 0xa1,
+ 0x64, 0xa4, 0x7a, 0xcb, 0x0e, 0x6d, 0x1f, 0xd8, 0x6e, 0xff, 0xc6, 0x4c,
+ 0xb3, 0x7a, 0x7f, 0xb1, 0x9c, 0x74, 0x60, 0x61, 0xf0, 0x55, 0x7a, 0xb4,
+ 0x79, 0x25, 0x46, 0x69, 0xea, 0x24, 0xbd, 0x60, 0x3a, 0x61, 0x55, 0xd4,
+ 0x06, 0x38, 0xeb, 0x67, 0x65, 0x9d, 0xbf, 0x00, 0x07, 0x11, 0xbf, 0xc3,
+ 0x7f, 0xed, 0xd2, 0x70, 0xd5, 0x2a, 0xb2, 0xa0, 0x1c, 0xe1, 0x2f, 0x5a,
+ 0x6b, 0xd0, 0xab, 0x6a, 0x28, 0x13, 0xb7, 0x33, 0x36, 0x11, 0x22, 0x53,
+ 0xd3, 0xf9, 0xef, 0x1b, 0x5a, 0xaa, 0x5a, 0x66, 0xe3, 0xec, 0x29, 0xfa,
+ 0x9c, 0x74, 0xd4, 0x10, 0xe2, 0x90, 0xdc, 0x27, 0x2a, 0x94, 0x2c, 0x94,
+ 0x8f, 0x59, 0x61, 0xcd, 0xa3, 0xfb, 0x0d, 0xdf, 0xf8, 0xc9, 0x96, 0x6f,
+ 0x4f, 0xf6, 0x33, 0x8e, 0x8c, 0x0c, 0x3e, 0x0a, 0xaf, 0x56, 0x8f, 0x24,
+ 0xa8, 0xcd, 0x3d, 0x44, 0x97, 0xac, 0x07, 0x4c, 0x2a, 0xba, 0x80, 0xc7,
+ 0x1d, 0x6c, 0xec, 0xb3, 0xb7, 0xe0, 0x00, 0xe2, 0x37, 0xf8, 0x6f, 0xfd,
+ 0xba, 0x4e, 0x1a, 0xa5, 0x56, 0x54, 0x03, 0x9c, 0x25, 0xeb, 0x4d, 0x7a,
+ 0x15, 0x6d, 0x45, 0x02, 0x76, 0xe6, 0x66, 0xc2, 0x24, 0x4a, 0x7a, 0x7f,
+ 0x3d, 0xe3, 0x6b, 0x55, 0x4b, 0x4c, 0xdc, 0x7d, 0x85, 0x3f, 0x53, 0x8e,
+ 0x9a, 0x82, 0x1c, 0x52, 0x1b, 0x84, 0xe5, 0x52, 0x85, 0x92, 0x91, 0xeb,
+ 0x2c, 0x39, 0xb4, 0x7f, 0x61, 0xbb, 0xff, 0x19, 0x32, 0xcd, 0xe9, 0xfe,
+ 0xc6, 0x71, 0xd1, 0x81, 0x87, 0xc1, 0x55, 0xea, 0xd1, 0xe4, 0x95, 0x19,
+ 0xa7, 0xa8, 0x92, 0xf5, 0x80, 0xe9, 0x85, 0x57, 0x50, 0x18, 0xe3, 0xad,
+ 0x9d, 0x96, 0x76, 0xfc, 0x00, 0x1c, 0x46, 0xff, 0x0d, 0xff, 0xb7, 0x49,
+ 0xc3, 0x54, 0xaa, 0xca, 0x80, 0x73, 0x84, 0xbd, 0x69, 0xaf, 0x42, 0xad,
+ 0xa8, 0xa0, 0x4e, 0xdc, 0xcc, 0xd8, 0x44, 0x89, 0x4f, 0x4f, 0xe7, 0xbc,
+ 0x6d, 0x6a, 0xa9, 0x69, 0x9b, 0x8f, 0xb0, 0xa7, 0xea, 0x71, 0xd3, 0x50,
+ 0x43, 0x8a, 0x43, 0x70, 0x9c, 0xaa, 0x50, 0xb2, 0x52, 0x3d, 0x65, 0x87,
+ 0x36, 0x8f, 0xec, 0x37, 0x7f, 0xe3, 0x26, 0x59, 0xbd, 0x3f, 0xd8, 0xce,
+ 0x3a, 0x30, 0x30, 0xf8, 0x2a, 0xbd, 0x5a, 0x3c, 0x92, 0xa3, 0x34, 0xf5,
+ 0x12, 0x5e, 0xb0, 0x1d, 0x30, 0xaa, 0xea, 0x03, 0x1c, 0x75, 0xb3, 0xb2,
+ 0xce, 0xdf, 0x80, 0x03, 0x88, 0xdf, 0xe1, 0xbf, 0xf6, 0xe9, 0x38, 0x6a,
+ 0x95, 0x59, 0x50, 0x0e, 0x70, 0x97, 0xad, 0x35, 0xe8, 0x55, 0xb5, 0x14,
+ 0x09, 0xdb, 0x99, 0x9b, 0x08, 0x91, 0x29, 0xe9, 0xfc, 0xf7, 0x8d, 0xad,
+ 0x55, 0x2d, 0x33, 0x71, 0xf6, 0x14, 0xfd, 0x4e, 0x3a, 0x6a, 0x08, 0x71,
+ 0x48, 0x6e, 0x13, 0x95, 0x4a, 0x16, 0x4a, 0x47, 0xac, 0xb0, 0xe6, 0xd1,
+ 0xfd, 0x86, 0xef, 0xfc, 0x64, 0xcb, 0x37, 0xa7, 0xfb, 0x19, 0xc7, 0x46,
+ 0x06, 0x1f, 0x05, 0x57, 0xab, 0x47, 0x92, 0x54, 0x66, 0x9e, 0xa2, 0x4b,
+ 0xd6, 0x03, 0xa6, 0x15, 0x5d, 0x40, 0x63, 0x8e, 0xb6, 0x76, 0x59, 0xdb,
+ 0xf0, 0x00, 0x71, 0x1b, 0xfc, 0x37, 0xfe, 0xdd, 0x27, 0x0d, 0x52, 0xab,
+ 0x2a, 0x01, 0xce, 0x12, 0xf5, 0xa6, 0xbd, 0x0a, 0xb6, 0xa2, 0x81, 0x3b,
+ 0x73, 0x33, 0x61, 0x12, 0x25, 0x3d, 0x3f, 0x9e, 0xf1, 0xb5, 0xaa, 0xa5,
+ 0xa6, 0x6e, 0x3e, 0xc2, 0x9f, 0xa9, 0xc7, 0x4d, 0x41, 0x0e, 0x29, 0x0d,
+ 0xc2, 0x72, 0xa9, 0x42, 0xc9, 0x48, 0xf5, 0x96, 0x1c, 0xda, 0x3f, 0xb0,
+ 0xdd, 0xff, 0x8c, 0x99, 0x66, 0xf4, 0xff, 0x63, 0x38, 0xe8, 0xc0, 0xc3,
+ 0xe0, 0xaa, 0xf5, 0x68, 0xf2, 0x4a, 0x8c, 0xd3, 0xd4, 0x49, 0x7a, 0xc0,
+ 0x74, 0xc2, 0xab, 0xa8, 0x0c, 0x71, 0xd6, 0xce, 0xcb, 0x3b, 0x7e, 0x00,
+ 0x0e, 0x23, 0x7f, 0x86, 0xff, 0xdb, 0xa4, 0xe1, 0xaa, 0x55, 0x65, 0x40,
+ 0x39, 0xc2, 0x5e, 0xb4, 0xd7, 0xa1, 0x56, 0xd4, 0x50, 0x27, 0x6e, 0x66,
+ 0x6c, 0x22, 0x44, 0xa7, 0xa7, 0xf3, 0xde, 0x36, 0xb5, 0x54, 0xb4, 0xcd,
+ 0xc7, 0xd8, 0x53, 0xf5, 0x38, 0xe9, 0xa8, 0x21, 0xc5, 0x21, 0xb8, 0x4e,
+ 0x55, 0x28, 0x59, 0x29, 0x1e, 0xb2, 0xc3, 0x9b, 0x47, 0xf6, 0x1b, 0xbf,
+ 0xf1, 0x93, 0x2c, 0xde, 0x9f, 0xec, 0x67, 0x1d, 0x18, 0x18, 0x7c, 0x15,
+ 0x5e, 0xad, 0x1e, 0x49, 0x51, 0x9a, 0x7a, 0x89, 0x2f, 0x58, 0x0e, 0x98,
+ 0x55, 0x75, 0x01, 0x8e, 0x3a, 0xd9, 0xd9, 0x67, 0x6f, 0xc0, 0x01, 0xc4,
+ 0x6f, 0xf0, 0xdf, 0xfb, 0x74, 0x9c, 0x35, 0x4a, 0xac, 0xa8, 0x07, 0x38,
+ 0x4b, 0xd6, 0x9a, 0xf4, 0x2a, 0xda, 0x8a, 0x04, 0xed, 0xcc, 0xcd, 0x84,
+ 0x48, 0x94, 0xf4, 0xfe, 0x7b, 0xc6, 0xd6, 0xaa, 0x96, 0x99, 0xb8, 0xfb,
+ 0x0a, 0x7e, 0xa7, 0x1d, 0x35, 0x04, 0x38, 0xa4, 0x37, 0x09, 0xca, 0xa5,
+ 0x0b, 0x25, 0x23, 0xd6, 0x58, 0x73, 0x68, 0xfe, 0xc3, 0x77, 0xfe, 0x32,
+ 0x65, 0x9b, 0xd3, 0xfd, 0x8c, 0xe3, 0xa3, 0x03, 0x0f, 0x82, 0xab, 0xd5,
+ 0xa3, 0xc9, 0x2a, 0x33, 0x4f, 0x51, 0x25, 0xeb, 0x01, 0xd3, 0x0a, 0xae,
+ 0xa0, 0x31, 0xc7, 0x5b, 0x3b, 0x2c, 0xed, 0xf8, 0x00, 0x38, 0x8d, 0xfe,
+ 0x1b, 0xff, 0x6e, 0x93, 0x86, 0xa9, 0x55, 0x95, 0x00, 0xe7, 0x09, 0x7a,
+ 0xd3, 0x5e, 0x85, 0x5b, 0x51, 0x40, 0x9d, 0xb9, 0x99, 0xb0, 0x89, 0x12,
+ 0x9e, 0x9f, 0xcf, 0x78, 0xda, 0xd5, 0x52, 0xd3, 0x37, 0x1f, 0x61, 0x4f,
+ 0xd4, 0xe3, 0xa6, 0xa0, 0x87, 0x14, 0x86, 0xe1, 0x39, 0x54, 0xa1, 0x64,
+ 0xa4, 0x7a, 0xcb, 0x0e, 0x6d, 0x1f, 0xd8, 0x6e, 0xff, 0xc6, 0x4c, 0xb3,
+ 0x7a, 0x7f, 0xb1, 0x9c, 0x74, 0x60, 0x61, 0xf0, 0x55, 0x7a, 0xb4, 0x79,
+ 0x25, 0x46, 0x69, 0xea, 0x24, 0xbd, 0x60, 0x3a, 0x61, 0x55, 0xd4, 0x06,
+ 0x38, 0xeb, 0x67, 0x65, 0x9d, 0xbf, 0x00, 0x07, 0x11, 0xbf, 0xc3, 0x7f,
+ 0xed, 0xd2, 0x70, 0xd5, 0x2a, 0xb2, 0xa0, 0x1c, 0xe1, 0x2f, 0x5a, 0x6b,
+ 0xd0, 0xab, 0x6a, 0x28, 0x13, 0xb7, 0x33, 0x36, 0x11, 0x22, 0x53, 0xd3,
+ 0xf9, 0xef, 0x1b, 0x5a, 0xaa, 0x5a, 0x66, 0xe3, 0xec, 0x29, 0xfa, 0x9c,
+ 0x74, 0xd4, 0x10, 0xe2, 0x90, 0xdc, 0x27, 0x2a, 0x94, 0x2c, 0x94, 0x8f,
+ 0x59, 0x61, 0xcd, 0xa3, 0xfb, 0x0d, 0xdf, 0xf8, 0xc9, 0x96, 0x6f, 0x4f,
+ 0xf6, 0x33, 0x8e, 0x8c, 0x0c, 0x3e, 0x0a, 0xaf, 0x56, 0x8f, 0x24, 0xa8,
+ 0xcd, 0x3d, 0x44, 0x97, 0xac, 0x07, 0x4c, 0x2a, 0xba, 0x80, 0xc7, 0x1d,
+ 0x6c, 0xec, 0xb3, 0xb7, 0xe0, 0x00, 0xe2, 0x37, 0xf8, 0x6f, 0xfd, 0xba,
+ 0x4e, 0x1a, 0xa5, 0x56, 0x54, 0x03, 0x9c, 0x25, 0xeb, 0x4d, 0x7a, 0x15,
+ 0x6d, 0x45, 0x02, 0x76, 0xe6, 0x66, 0xc2, 0x24, 0x4a, 0x7a, 0x7f, 0x3d,
+ 0xe3, 0x6b, 0x55, 0x4b, 0x4c, 0xdc, 0x7d, 0x85, 0x3f, 0x53, 0x8e, 0x9a,
+ 0x82, 0x1c, 0x52, 0x1b, 0x84, 0xe5, 0x52, 0x85, 0x92, 0x91, 0xeb, 0x2c,
+ 0x39, 0xb4, 0x7f, 0x61, 0xbb, 0xff, 0x19, 0x32, 0xcd, 0xe9, 0xfe, 0xc6,
+ 0x71, 0xd1, 0x81, 0x87, 0xc1, 0x55, 0xea, 0xd1, 0xe4, 0x95, 0x19, 0xa7,
+ 0xa8, 0x92, 0xf5, 0x80, 0xe9, 0x85, 0x57, 0x50, 0x18, 0xe3, 0xad, 0x9d,
+ 0x96, 0x76, 0xfc, 0x00, 0x1c, 0x46, 0xff, 0x0d, 0xff, 0xb7, 0x49, 0xc3,
+ 0x54, 0xaa, 0xca, 0x80, 0x73, 0x84, 0xbd, 0x69, 0xaf, 0x42, 0xad, 0xa8,
+ 0xa0, 0x4e, 0xdc, 0xcc, 0xd8, 0x44, 0x89, 0x4f, 0x4f, 0xe7, 0xbc, 0x6d,
+ 0x6a, 0xa9, 0x69, 0x9b, 0x8f, 0xb0, 0xa7, 0xea, 0x71, 0xd3, 0x50, 0x43,
+ 0x8a, 0x43, 0x70, 0x9c, 0xaa, 0x50, 0xb2, 0x52, 0x3d, 0x65, 0x87, 0x36,
+ 0x8f, 0xec, 0x37, 0x7f, 0xe3, 0x26, 0x59, 0xbd, 0x3f, 0xd8, 0xce, 0x3a,
+ 0x30, 0x30, 0xf8, 0x2a, 0xbd, 0x5a, 0x3c, 0x92, 0xa3, 0x34, 0xf5, 0x12,
+ 0x5e, 0xb0, 0x1d, 0x30, 0xaa, 0xea, 0x03, 0x1c, 0x75, 0xb3, 0xb2, 0xce,
+ 0xdf, 0x80, 0x03, 0x88, 0xdf, 0xe1, 0xbf, 0xf6, 0xe9, 0x38, 0x6a, 0x95,
+ 0x59, 0x50, 0x0e, 0x70, 0x97, 0xad, 0x35, 0xe8, 0x55, 0xb5, 0x14, 0x09,
+ 0xdb, 0x99, 0x9b, 0x08, 0x91, 0x29, 0xe9, 0xfc, 0xf7, 0x8d, 0xad, 0x55,
+ 0x2d, 0x33, 0x71, 0xf6, 0x14, 0xfd, 0x4e, 0x3a, 0x6a, 0x08, 0x71, 0x48,
+ 0x6e, 0x13, 0x95, 0x4a, 0x16, 0x4a, 0x47, 0xac, 0xb0, 0xe6, 0xd1, 0xfd,
+ 0x86, 0xef, 0xfc, 0x64, 0xcb, 0x37, 0xa7, 0xfb, 0x19, 0xc7, 0x46, 0x06,
+ 0x1f, 0x05, 0x57, 0xab, 0x47, 0x92, 0x54, 0x66, 0x9e, 0xa2, 0x4b, 0xd6,
+ 0x03, 0xa6, 0x15, 0x5d, 0x40, 0x63, 0x8e, 0xb6, 0x76, 0x59, 0xdb, 0xf0,
+ 0x00, 0x71, 0x1b, 0xfc, 0x37, 0xfe, 0xdd, 0x27, 0x0d, 0x52, 0xab, 0x2a,
+ 0x01, 0xce, 0x12, 0xf5, 0xa6, 0xbd, 0x0a, 0xb6, 0xa2, 0x81, 0x3b, 0x73,
+ 0x33, 0x61, 0x12, 0x25, 0x3d, 0x3f, 0x9e, 0xf1, 0xb5, 0xaa, 0xa5, 0xa6,
+ 0x6e, 0x3e, 0xc2, 0x9f, 0xa9, 0xc7, 0x4d, 0x41, 0x0e, 0x29, 0x0d, 0xc2,
+ 0x72, 0xa9, 0x42, 0xc9, 0x48, 0xf5, 0x96, 0x1c, 0xda, 0x3f, 0xb0, 0xdd,
+ 0xff, 0x8c, 0x99, 0x66, 0xf4, 0xff, 0x63, 0x38, 0xe8, 0xc0, 0xc3, 0xe0,
+ 0xaa, 0xf5, 0x68, 0xf2, 0x4a, 0x8c, 0xd3, 0xd4, 0x49, 0x7a, 0xc0, 0x74,
+ 0xc2, 0xab, 0xa8, 0x0c, 0x71, 0xd6, 0xce, 0xcb, 0x3b, 0x7e, 0x00, 0x0e,
+ 0x23, 0x7f, 0x86, 0xff, 0xdb, 0xa4, 0xe1, 0xaa, 0x55, 0x65, 0x40, 0x39,
+ 0xc2, 0x5e, 0xb4, 0xd7, 0xa1, 0x56, 0xd4, 0x50, 0x27, 0x6e, 0x66, 0x6c,
+ 0x22, 0x44, 0xa7, 0xa7, 0xf3, 0xde, 0x36, 0xb5, 0x54, 0xb4, 0xcd, 0xc7,
+ 0xd8, 0x53, 0xf5, 0x38, 0xe9, 0xa8, 0x21, 0xc5, 0x21, 0xb8, 0x4e, 0x55,
+ 0x28, 0x59, 0x29, 0x1e, 0xb2, 0xc3, 0x9b, 0x47, 0xf6, 0x1b, 0xbf, 0xf1,
+ 0x93, 0x2c, 0xde, 0x9f, 0xec, 0x67, 0x1d, 0x18, 0x18, 0x7c, 0x15, 0x5e,
+ 0xad, 0x1e, 0x49, 0x51, 0x9a, 0x7a, 0x89, 0x2f, 0x58, 0x0e, 0x98, 0x55,
+ 0x75, 0x01, 0x8e, 0x3a, 0xd9, 0xd9, 0x67, 0x6f, 0xc0, 0x01, 0xc4, 0x6f,
+ 0xf0, 0xdf, 0xfb, 0x74, 0x9c, 0x35, 0x4a, 0xac, 0xa8, 0x07, 0x38, 0x4b,
+ 0xd6, 0x9a, 0xf4, 0x2a, 0xda, 0x8a, 0x04, 0xed, 0xcc, 0xcd, 0x84, 0x48,
+ 0x94, 0xf4, 0xfe, 0x7b, 0xc6, 0xd6, 0xaa, 0x96, 0x99, 0xb8, 0xfb, 0x0a,
+ 0x7e, 0xa7, 0x1d, 0x35, 0x04, 0x38, 0xa4, 0x37, 0x09, 0xca, 0xa5, 0x0b,
+ 0x25, 0x23, 0xd6, 0x58, 0x73, 0x68, 0xfe, 0xc3, 0x77, 0xfe, 0x32, 0x65,
+ 0x9b, 0xd3, 0xfd, 0x8c, 0xe3, 0xa3, 0x03, 0x0f, 0x82, 0xab, 0xd5, 0xa3,
+ 0xc9, 0x2a, 0x33, 0x4f, 0x51, 0x25, 0xeb, 0x01, 0xd3, 0x0a, 0xae, 0xa0,
+ 0x31, 0xc7, 0x5b, 0x3b, 0x2c, 0xed, 0xf8, 0x00, 0x38, 0x8d, 0xfe, 0x1b,
+ 0xff, 0x6e, 0x93, 0x86, 0xa9, 0x55, 0x95, 0x00, 0xe7, 0x09, 0x7a, 0xd3,
+ 0x5e, 0x85, 0x5b, 0x51, 0x40, 0x9d, 0xb9, 0x99, 0xb0, 0x89, 0x12, 0x9e,
+ 0x9f, 0xcf, 0x78, 0xda, 0xd5, 0x52, 0xd3, 0x37, 0x1f, 0x61, 0x4f, 0xd4,
+ 0xe3, 0xa6, 0xa0, 0x87, 0x14, 0x86, 0xe1, 0x39, 0x54, 0xa1, 0x64, 0xa4,
+ 0x7a, 0xcb, 0x0e, 0x6d, 0x1f, 0xd8, 0x6e, 0xff, 0xc6, 0x4c, 0xb3, 0x7a,
+ 0x7f, 0xb1, 0x9c, 0x74, 0x60, 0x61, 0xf0, 0x55, 0x7a, 0xb4, 0x79, 0x25,
+ 0x46, 0x69, 0xea, 0x24, 0xbd, 0x60, 0x3a, 0x61, 0x55, 0xd4, 0x06, 0x38,
+ 0xeb, 0x67, 0x65, 0x9d, 0xbf, 0x00, 0x07, 0x11, 0xbf, 0xc3, 0x7f, 0xed,
+ 0xd2, 0x70, 0xd5, 0x2a, 0xb2, 0xa0, 0x1c, 0xe1, 0x2f, 0x5a, 0x6b, 0xd0,
+ 0xab, 0x6a, 0x28, 0x13, 0xb7, 0x33, 0x36, 0x11, 0x22, 0x53, 0xd3, 0xf9,
+ 0xef, 0x1b, 0x5a, 0xaa, 0x5a, 0x66, 0xe3, 0xec, 0x29, 0xfa, 0x9c, 0x74,
+ 0xd4, 0x10, 0xe2, 0x90, 0xdc, 0x27, 0x2a, 0x94, 0x2c, 0x94, 0x8f, 0x59,
+ 0x61, 0xcd, 0xa3, 0xfb, 0x0d, 0xdf, 0xf8, 0xc9, 0x96, 0x6f, 0x4f, 0xf6,
+ 0x33, 0x8e, 0x8c, 0x0c, 0x3e, 0x0a, 0xaf, 0x56, 0x8f, 0x24, 0xa8, 0xcd,
+ 0x3d, 0x44, 0x97, 0xac, 0x07, 0x4c, 0x2a, 0xba, 0x80, 0xc7, 0x1d, 0x6c,
+ 0xec, 0xb3, 0xb7, 0xe0, 0x00, 0xe2, 0x37, 0xf8, 0x6f, 0xfd, 0xba, 0x4e,
+ 0x1a, 0xa5, 0x56, 0x54, 0x03, 0x9c, 0x25, 0xeb, 0x4d, 0x7a, 0x15, 0x6d,
+ 0x45, 0x02, 0x76, 0xe6, 0x66, 0xc2, 0x24, 0x4a, 0x7a, 0x7f, 0x3d, 0xe3,
+ 0x6b, 0x55, 0x4b, 0x4c, 0xdc, 0x7d, 0x85, 0x3f, 0x53, 0x8e, 0x9a, 0x82,
+ 0x1c, 0x52, 0x1b, 0x84, 0xe5, 0x52, 0x85, 0x92, 0x91, 0xeb, 0x2c, 0x39,
+ 0xb4, 0x7f, 0x61, 0xbb, 0xff, 0x19, 0x32, 0xcd, 0xe9, 0xfe, 0xc6, 0x71,
+ 0xd1, 0x81, 0x87, 0xc1, 0x55, 0xea, 0xd1, 0xe4, 0x95, 0x19, 0xa7, 0xa8,
+ 0x92, 0xf5, 0x80, 0xe9, 0x85, 0x57, 0x50, 0x18, 0xe3, 0xad, 0x9d, 0x96,
+ 0x76, 0xfc, 0x00, 0x1c, 0x46, 0xff, 0x0d, 0xff, 0xb7, 0x49, 0xc3, 0x54,
+ 0xaa, 0xca, 0x80, 0x73, 0x84, 0xbd, 0x69, 0xaf, 0x42, 0xad, 0xa8, 0xa0,
+ 0x4e, 0xdc, 0xcc, 0xd8, 0x44, 0x89, 0x4f, 0x4f, 0xe7, 0xbc, 0x6d, 0x6a,
+ 0xa9, 0x69, 0x9b, 0x8f, 0xb0, 0xa7, 0xea, 0x71, 0xd3, 0x50, 0x43, 0x8a,
+ 0x43, 0x70, 0x9c, 0xaa, 0x50, 0xb2, 0x52, 0x3d, 0x65, 0x87, 0x36, 0x8f,
+ 0xec, 0x37, 0x7f, 0xe3, 0x26, 0x59, 0xbd, 0x3f, 0xd8, 0xce, 0x3a, 0x30,
+ 0x30, 0xf8, 0x2a, 0xbd, 0x5a, 0x3c, 0x92, 0xa3, 0x34, 0xf5, 0x12, 0x5e,
+ 0xb0, 0x1d, 0x30, 0xaa, 0xea, 0x03, 0x1c, 0x75, 0xb3, 0xb2, 0xce, 0xdf,
+ 0x80, 0x03, 0x88, 0xdf, 0xe1, 0xbf, 0xf6, 0xe9, 0x38, 0x6a, 0x95, 0x59,
+ 0x50, 0x0e, 0x70, 0x97, 0xad, 0x35, 0xe8, 0x55, 0xb5, 0x14, 0x09, 0xdb,
+ 0x99, 0x9b, 0x08, 0x91, 0x29, 0xe9, 0xfc, 0xf7, 0x8d, 0xad, 0x55, 0x2d,
+ 0x33, 0x71, 0xf6, 0x14, 0xfd, 0x4e, 0x3a, 0x6a, 0x08, 0x71, 0x48, 0x6e,
+ 0x13, 0x95, 0x4a, 0x16, 0x4a, 0x47, 0xac, 0xb0, 0xe6, 0xd1, 0xfd, 0x86,
+ 0xef, 0xfc, 0x64, 0xcb, 0x37, 0xa7, 0xfb, 0x19, 0xc7, 0x46, 0x06, 0x1f,
+ 0x05, 0x57, 0xab, 0x47, 0x92, 0x54, 0x66, 0x9e, 0xa2, 0x4b, 0xd6, 0x03,
+ 0xa6, 0x15, 0x5d, 0x40, 0x63, 0x8e, 0xb6, 0x76, 0x59, 0xdb, 0xf0, 0x00,
+ 0x71, 0x1b, 0xfc, 0x37, 0xfe, 0xdd, 0x27, 0x0d, 0x52, 0xab, 0x2a, 0x01,
+ 0xce, 0x12, 0xf5, 0xa6, 0xbd, 0x0a, 0xb6, 0xa2, 0x81, 0x3b, 0x73, 0x33,
+ 0x61, 0x12, 0x25, 0x3d, 0x3f, 0x9e, 0xf1, 0xb5, 0xaa, 0xa5, 0xa6, 0x6e,
+ 0x3e, 0xc2, 0x9f, 0xa9, 0xc7, 0x4d, 0x41, 0x0e, 0x29, 0x0d, 0xc2, 0x72,
+ 0xa9, 0x42, 0xc9, 0x48, 0xf5, 0x96, 0x1c, 0xda, 0x3f, 0xb0, 0xdd, 0xff,
+ 0x8c, 0x99, 0x66, 0xf4, 0xff, 0x63, 0x38, 0xe8, 0xc0, 0xc3, 0xe0, 0xaa,
+ 0xf5, 0x68, 0xf2, 0x4a, 0x8c, 0xd3, 0xd4, 0x49, 0x7a, 0xc0, 0x74, 0xc2,
+ 0xab, 0xa8, 0x0c, 0x71, 0xd6, 0xce, 0xcb, 0x3b, 0x7e, 0x00, 0x0e, 0x23,
+ 0x7f, 0x86, 0xff, 0xdb, 0xa4, 0xe1, 0xaa, 0x55, 0x65, 0x40, 0x39, 0xc2,
+ 0x5e, 0xb4, 0xd7, 0xa1, 0x56, 0xd4, 0x50, 0x27, 0x6e, 0x66, 0x6c, 0x22,
+ 0x44, 0xa7, 0xa7, 0xf3, 0xde, 0x36, 0xb5, 0x54, 0xb4, 0xcd, 0xc7, 0xd8,
+ 0x53, 0xf5, 0x38, 0xe9, 0xa8, 0x21, 0xc5, 0x21, 0xb8, 0x4e, 0x55, 0x28,
+ 0x59, 0x29, 0x1e, 0xb2, 0xc3, 0x9b, 0x47, 0xf6, 0x1b, 0xbf, 0xf1, 0x93,
+ 0x2c, 0xde, 0x9f, 0xec, 0x67, 0x1d, 0x18, 0x18, 0x7c, 0x15, 0x5e, 0xad,
+ 0x1e, 0x49, 0x51, 0x9a, 0x7a, 0x89, 0x2f, 0x58, 0x0e, 0x98, 0x55, 0x75,
+ 0x01, 0x8e, 0x3a, 0xd9, 0xd9, 0x67, 0x6f, 0xc0, 0x01, 0xc4, 0x6f, 0xf0,
+ 0xdf, 0xfb, 0x74, 0x9c, 0x35, 0x4a, 0xac, 0xa8, 0x07, 0x38, 0x4b, 0xd6,
+ 0x9a, 0xf4, 0x2a, 0xda, 0x8a, 0x04, 0xed, 0xcc, 0xcd, 0x84, 0x48, 0x94,
+ 0xf4, 0xfe, 0x7b, 0xc6, 0xd6, 0xaa, 0x96, 0x99, 0xb8, 0xfb, 0x0a, 0x7e,
+ 0xa7, 0x1d, 0x35, 0x04, 0x38, 0xa4, 0x37, 0x09, 0xca, 0xa5, 0x0b, 0x25,
+ 0x23, 0xd6, 0x58, 0x73, 0x68, 0xfe, 0xc3, 0x77, 0xfe, 0x32, 0x65, 0x9b,
+ 0xd3, 0xfd, 0x8c, 0xe3, 0xa3, 0x03, 0x0f, 0x82, 0xab, 0xd5, 0xa3, 0xc9,
+ 0x2a, 0x33, 0x4f, 0x51, 0x25, 0xeb, 0x01, 0xd3, 0x0a, 0xae, 0xa0, 0x31,
+ 0xc7, 0x5b, 0x3b, 0x2c, 0xed, 0xf8, 0x00, 0x38, 0x8d, 0xfe, 0x1b, 0xff,
+ 0x6e, 0x93, 0x86, 0xa9, 0x55, 0x95, 0x00, 0xe7, 0x09, 0x7a, 0xd3, 0x5e,
+ 0x85, 0x5b, 0x51, 0x40, 0x9d, 0xb9, 0x99, 0xb0, 0x89, 0x12, 0x9e, 0x9f,
+ 0xcf, 0x78, 0xda, 0xd5, 0x52, 0xd3, 0x37, 0x1f, 0x61, 0x4f, 0xd4, 0xe3,
+ 0xa6, 0xa0, 0x87, 0x14, 0x86, 0xe1, 0x39, 0x54, 0xa1, 0x64, 0xa4, 0x7a,
+ 0xcb, 0x0e, 0x6d, 0x1f, 0xd8, 0x6e, 0xff, 0xc6, 0x4c, 0xb3, 0x7a, 0x7f,
+ 0xb1, 0x9c, 0x74, 0x60, 0x61, 0xf0, 0x55, 0x7a, 0xb4, 0x79, 0x25, 0x46,
+ 0x69, 0xea, 0x24, 0xbd, 0x60, 0x3a, 0x61, 0x55, 0xd4, 0x06, 0x38, 0xeb,
+ 0x67, 0x65, 0x9d, 0xbf, 0x00, 0x07, 0x11, 0xbf, 0xc3, 0x7f, 0xed, 0xd2,
+ 0x70, 0xd5, 0x2a, 0xb2, 0xa0, 0x1c, 0xe1, 0x2f, 0x5a, 0x6b, 0xd0, 0xab,
+ 0x6a, 0x28, 0x13, 0xb7, 0x33, 0x36, 0x11, 0x22, 0x53, 0xd3, 0xf9, 0xef,
+ 0x1b, 0x5a, 0xaa, 0x5a, 0x66, 0xe3, 0xec, 0x29, 0xfa, 0x9c, 0x74, 0xd4,
+ 0x10, 0xe2, 0x90, 0xdc, 0x27, 0x2a, 0x94, 0x2c, 0x94, 0x8f, 0x59, 0x61,
+ 0xcd, 0xa3, 0xfb, 0x0d, 0xdf, 0xf8, 0xc9, 0x96, 0x6f, 0x4f, 0xf6, 0x33,
+ 0x8e, 0x8c, 0x0c, 0x3e, 0x0a, 0xaf, 0x56, 0x8f, 0x24, 0xa8, 0xcd, 0x3d,
+ 0x44, 0x97, 0xac, 0x07, 0x4c, 0x2a, 0xba, 0x80, 0xc7, 0x1d, 0x6c, 0xec,
+ 0xb3, 0xb7, 0xe0, 0x00, 0xe2, 0x37, 0xf8, 0x6f, 0xfd, 0xba, 0x4e, 0x1a,
+ 0xa5, 0x56, 0x54, 0x03, 0x9c, 0x25, 0xeb, 0x4d, 0x7a, 0x15, 0x6d, 0x45,
+ 0x02, 0x76, 0xe6, 0x66, 0xc2, 0x24, 0x4a, 0x7a, 0x7f, 0x3d, 0xe3, 0x6b,
+ 0x55, 0x4b, 0x4c, 0xdc, 0x7d, 0x85, 0x3f, 0x53, 0x8e, 0x9a, 0x82, 0x1c,
+ 0x52, 0x1b, 0x84, 0xe5, 0x52, 0x85, 0x92, 0x91, 0xeb, 0x2c, 0x39, 0xb4,
+ 0x7f, 0x61, 0xbb, 0xff, 0x19, 0x32, 0xcd, 0xe9, 0xfe, 0xc6, 0x71, 0xd1,
+ 0x81, 0x87, 0xc1, 0x55, 0xea, 0xd1, 0xe4, 0x95, 0x19, 0xa7, 0xa8, 0x92,
+ 0xf5, 0x80, 0xe9, 0x85, 0x57, 0x50, 0x18, 0xe3, 0xad, 0x9d, 0x96, 0x76,
+ 0xfc, 0x00, 0x1c, 0x46, 0xff, 0x0d, 0xff, 0xb7, 0x49, 0xc3, 0x54, 0xaa,
+ 0xca, 0x80, 0x73, 0x84, 0xbd, 0x69, 0xaf, 0x42, 0xad, 0xa8, 0xa0, 0x4e,
+ 0xdc, 0xcc, 0xd8, 0x44, 0x89, 0x4f, 0x4f, 0xe7, 0xbc, 0x6d, 0x6a, 0xa9,
+ 0x69, 0x9b, 0x8f, 0xb0, 0xa7, 0xea, 0x71, 0xd3, 0x50, 0x43, 0x8a, 0x43,
+ 0x70, 0x9c, 0xaa, 0x50, 0xb2, 0x52, 0x3d, 0x65, 0x87, 0x36, 0x8f, 0xec,
+ 0x37, 0x7f, 0xe3, 0x26, 0x59, 0xbd, 0x3f, 0xd8, 0xce, 0x3a, 0x30, 0x30,
+ 0xf8, 0x2a, 0xbd, 0x5a, 0x3c, 0x92, 0xa3, 0x34, 0xf5, 0x12, 0x5e, 0xb0,
+ 0x1d, 0x30, 0xaa, 0xea, 0x03, 0x1c, 0x75, 0xb3, 0xb2, 0xce, 0xdf, 0x80,
+ 0x03, 0x88, 0xdf, 0xe1, 0xbf, 0xf6, 0xe9, 0x38, 0x6a, 0x95, 0x59, 0x50,
+ 0x0e, 0x70, 0x97, 0xad, 0x35, 0xe8, 0x55, 0xb5, 0x14, 0x09, 0xdb, 0x99,
+ 0x9b, 0x08, 0x91, 0x29, 0xe9, 0xfc, 0xf7, 0x8d, 0xad, 0x55, 0x2d, 0x33,
+ 0x71, 0xf6, 0x14, 0xfd, 0x4e, 0x3a, 0x6a, 0x08, 0x71, 0x48, 0x6e, 0x13,
+ 0x95, 0x4a, 0x16, 0x4a, 0x47, 0xac, 0xb0, 0xe6, 0xd1, 0xfd, 0x86, 0xef,
+ 0xfc, 0x64, 0xcb, 0x37, 0xa7, 0xfb, 0x19, 0xc7, 0x46, 0x06, 0x1f, 0x05,
+ 0x57, 0xab, 0x47, 0x92, 0x54, 0x66, 0x9e, 0xa2, 0x4b, 0xd6, 0x03, 0xa6,
+ 0x15, 0x5d, 0x40, 0x63, 0x8e, 0xb6, 0x76, 0x59, 0xdb, 0xf0, 0x00, 0x71,
+ 0x1b, 0xfc, 0x37, 0xfe, 0xdd, 0x27, 0x0d, 0x52, 0xab, 0x2a, 0x01, 0xce,
+ 0x12, 0xf5, 0xa6, 0xbd, 0x0a, 0xb6, 0xa2, 0x81, 0x3b, 0x73, 0x33, 0x61,
+ 0x12, 0x25, 0x3d, 0x3f, 0x9e, 0xf1, 0xb5, 0xaa, 0xa5, 0xa6, 0x6e, 0x3e,
+ 0xc2, 0x9f, 0xa9, 0xc7, 0x4d, 0x41, 0x0e, 0x29, 0x0d, 0xc2, 0x72, 0xa9,
+ 0x42, 0xc9, 0x48, 0xf5, 0x96, 0x1c, 0xda, 0x3f, 0xb0, 0xdd, 0xff, 0x8c,
+ 0x99, 0x66, 0xf4, 0xff, 0x63, 0x38, 0xe8, 0xc0, 0xc3, 0xe0, 0xaa, 0xf5,
+ 0x68, 0xf2, 0x4a, 0x8c, 0xd3, 0xd4, 0x49, 0x7a, 0xc0, 0x74, 0xc2, 0xab,
+ 0xa8, 0x0c, 0x71, 0xd6, 0xce, 0xcb, 0x3b, 0x7e, 0x00, 0x0e, 0x23, 0x7f,
+ 0x86, 0xff, 0xdb, 0xa4, 0xe1, 0xaa, 0x55, 0x65, 0x40, 0x39, 0xc2, 0x5e,
+ 0xb4, 0xd7, 0xa1, 0x56, 0xd4, 0x50, 0x27, 0x6e, 0x66, 0x6c, 0x22, 0x44,
+ 0xa7, 0xa7, 0xf3, 0xde, 0x36, 0xb5, 0x54, 0xb4, 0xcd, 0xc7, 0xd8, 0x53,
+ 0xf5, 0x38, 0xe9, 0xa8, 0x21, 0xc5, 0x21, 0xb8, 0x4e, 0x55, 0x28, 0x59,
+ 0x29, 0x1e, 0xb2, 0xc3, 0x9b, 0x47, 0xf6, 0x1b, 0xbf, 0xf1, 0x93, 0x2c,
+ 0xde, 0x9f, 0xec, 0x67, 0x1d, 0x18, 0x18, 0x7c, 0x15, 0x5e, 0xad, 0x1e,
+ 0x49, 0x51, 0x9a, 0x7a, 0x89, 0x2f, 0x58, 0x0e, 0x98, 0x55, 0x75, 0x01,
+ 0x8e, 0x3a, 0xd9, 0xd9, 0x67, 0x6f, 0xc0, 0x01, 0xc4, 0x6f, 0xf0, 0xdf,
+ 0xfb, 0x74, 0x9c, 0x35, 0x4a, 0xac, 0xa8, 0x07, 0x38, 0x4b, 0xd6, 0x9a,
+ 0xf4, 0x2a, 0xda, 0x8a, 0x04, 0xed, 0xcc, 0xcd, 0x84, 0x48, 0x94, 0xf4,
+ 0xfe, 0x7b, 0xc6, 0xd6, 0xaa, 0x96, 0x99, 0xb8, 0xfb, 0x0a, 0x7e, 0xa7,
+ 0x1d, 0x35, 0x04, 0x38, 0xa4, 0x37, 0x09, 0xca, 0xa5, 0x0b, 0x25, 0x23,
+ 0xd6, 0x58, 0x73, 0x68, 0xfe, 0xc3, 0x77, 0xfe, 0x32, 0x65, 0x9b, 0xd3,
+ 0xfd, 0x8c, 0xe3, 0xa3, 0x03, 0x0f, 0x82, 0xab, 0xd5, 0xa3, 0xc9, 0x2a,
+ 0x33, 0x4f, 0x51, 0x25, 0xeb, 0x01, 0xd3, 0x0a, 0xae, 0xa0, 0x31, 0xc7,
+ 0x5b, 0x3b, 0x2c, 0xed, 0xf8, 0x00, 0x38, 0x8d, 0xfe, 0x1b, 0xff, 0x6e,
+ 0x93, 0x86, 0xa9, 0x55, 0x95, 0x00, 0xe7, 0x09, 0x7a, 0xd3, 0x5e, 0x85,
+ 0x5b, 0x51, 0x40, 0x9d, 0xb9, 0x99, 0xb0, 0x89, 0x12, 0x9e, 0x9f, 0xcf,
+ 0x78, 0xda, 0xd5, 0x52, 0xd3, 0x37, 0x1f, 0x61, 0x4f, 0xd4, 0xe3, 0xa6,
+ 0xa0, 0x87, 0x14, 0x86, 0xe1, 0x39, 0x54, 0xa1, 0x64, 0xa4, 0x7a, 0xcb,
+ 0x0e, 0x6d, 0x1f, 0xd8, 0x6e, 0xff, 0xc6, 0x4c, 0xb3, 0x7a, 0x7f, 0xb1,
+ 0x9c, 0x74, 0x60, 0x61, 0xf0, 0x55, 0x7a, 0xb4, 0x79, 0x25, 0x46, 0x69,
+ 0xea, 0x24, 0xbd, 0x60, 0x3a, 0x61, 0x55, 0xd4, 0x06, 0x38, 0xeb, 0x67,
+ 0x65, 0x9d, 0xbf, 0x00, 0x07, 0x11, 0xbf, 0xc3, 0x7f, 0xed, 0xd2, 0x70,
+ 0xd5, 0x2a, 0xb2, 0xa0, 0x1c, 0xe1, 0x2f, 0x5a, 0x6b, 0xd0, 0xab, 0x6a,
+ 0x28, 0x13, 0xb7, 0x33, 0x36, 0x11, 0x22, 0x53, 0xd3, 0xf9, 0xef, 0x1b,
+ 0x5a, 0xaa, 0x5a, 0x66, 0xe3, 0xec, 0x29, 0xfa, 0x9c, 0x74, 0xd4, 0x10,
+ 0xe2, 0x90, 0xdc, 0x27, 0x2a, 0x94, 0x2c, 0x94, 0x8f, 0x59, 0x61, 0xcd,
+ 0xa3, 0xfb, 0x0d, 0xdf, 0xf8, 0xc9, 0x96, 0x6f, 0x4f, 0xf6, 0x33, 0x8e,
+ 0x8c, 0x0c, 0x3e, 0x0a, 0xaf, 0x56, 0x8f, 0x24, 0xa8, 0xcd, 0x3d, 0x44,
+ 0x97, 0xac, 0x07, 0x4c, 0x2a, 0xba, 0x80, 0xc7, 0x1d, 0x6c, 0xec, 0xb3,
+ 0xb7, 0xe0, 0x00, 0xe2, 0x37, 0xf8, 0x6f, 0xfd, 0xba, 0x4e, 0x1a, 0xa5,
+ 0x56, 0x54, 0x03, 0x9c, 0x25, 0xeb, 0x4d, 0x7a, 0x15, 0x6d, 0x45, 0x02,
+ 0x76, 0xe6, 0x66, 0xc2, 0x24, 0x4a, 0x7a, 0x7f, 0x3d, 0xe3, 0x6b, 0x55,
+ 0x4b, 0x4c, 0xdc, 0x7d, 0x85, 0x3f, 0x53, 0x8e, 0x9a, 0x82, 0x1c, 0x52,
+ 0x1b, 0x84, 0xe5, 0x52, 0x85, 0x92, 0x91, 0xeb, 0x2c, 0x39, 0xb4, 0x7f,
+ 0x61, 0xbb, 0xff, 0x19, 0x32, 0xcd, 0xe9, 0xfe, 0xc6, 0x71, 0xd1, 0x81,
+ 0x87, 0xc1, 0x55, 0xea, 0xd1, 0xe4, 0x95, 0x19, 0xa7, 0xa8, 0x92, 0xf5,
+ 0x80, 0xe9, 0x85, 0x57, 0x50, 0x18, 0xe3, 0xad, 0x9d, 0x96, 0x76, 0xfc,
+ 0x00, 0x1c, 0x46, 0xff, 0x0d, 0xff, 0xb7, 0x49, 0xc3, 0x54, 0xaa, 0xca,
+ 0x80, 0x73, 0x84, 0xbd, 0x69, 0xaf, 0x42, 0xad, 0xa8, 0xa0, 0x4e, 0xdc,
+ 0xcc, 0xd8, 0x44, 0x89, 0x4f, 0x4f, 0xe7, 0xbc, 0x6d, 0x6a, 0xa9, 0x69,
+ 0x9b, 0x8f, 0xb0, 0xa7, 0xea, 0x71, 0xd3, 0x50, 0x43, 0x8a, 0x43, 0x70,
+ 0x9c, 0xaa, 0x50, 0xb2, 0x52, 0x3d, 0x65, 0x87, 0x36, 0x8f, 0xec, 0x37,
+ 0x7f, 0xe3, 0x26, 0x59, 0xbd, 0x3f, 0xd8, 0xce, 0x3a, 0x30, 0x30, 0xf8,
+ 0x2a, 0xbd, 0x5a, 0x3c, 0x92, 0xa3, 0x34, 0xf5, 0x12, 0x5e, 0xb0, 0x1d,
+ 0x30, 0xaa, 0xea, 0x03, 0x1c, 0x75, 0xb3, 0xb2, 0xce, 0xdf, 0x80, 0x03,
+ 0x88, 0xdf, 0xe1, 0xbf, 0xf6, 0xe9, 0x38, 0x6a, 0x95, 0x59, 0x50, 0x0e,
+ 0x70, 0x97, 0xad, 0x35, 0xe8, 0x55, 0xb5, 0x14, 0x09, 0xdb, 0x99, 0x9b,
+ 0x08, 0x91, 0x29, 0xe9, 0xfc, 0xf7, 0x8d, 0xad, 0x55, 0x2d, 0x33, 0x71,
+ 0xf6, 0x14, 0xfd, 0x4e, 0x3a, 0x6a, 0x08, 0x71, 0x48, 0x6e, 0x13, 0x95,
+ 0x4a, 0x16, 0x4a, 0x47, 0xac, 0xb0, 0xe6, 0xd1, 0xfd, 0x86, 0xef, 0xfc,
+ 0x64, 0xcb, 0x37, 0xa7, 0xfb, 0x19, 0xc7, 0x46, 0x06, 0x1f, 0x05, 0x57,
+ 0xab, 0x47, 0x92, 0x54, 0x66, 0x9e, 0xa2, 0x4b, 0xd6, 0x03, 0xa6, 0x15,
+ 0x5d, 0x40, 0x63, 0x8e, 0xb6, 0x76, 0x59, 0xdb, 0xf0, 0x00, 0x71, 0x1b,
+ 0xfc, 0x37, 0xfe, 0xdd, 0x27, 0x0d, 0x52, 0xab, 0x2a, 0x01, 0xce, 0x12,
+ 0xf5, 0xa6, 0xbd, 0x0a, 0xb6, 0xa2, 0x81, 0x3b, 0x73, 0x33, 0x61, 0x12,
+ 0x25, 0x3d, 0x3f, 0x9e, 0xf1, 0xb5, 0xaa, 0xa5, 0xa6, 0x6e, 0x3e, 0xc2,
+ 0x9f, 0xa9, 0xc7, 0x4d, 0x41, 0x0e, 0x29, 0x0d, 0xc2, 0x72, 0xa9, 0x42,
+ 0xc9, 0x48, 0xf5, 0x96, 0x1c, 0xda, 0x3f, 0xb0, 0xdd, 0xff, 0x8c, 0x99,
+ 0x66, 0xf4, 0xff, 0x63, 0x38, 0xe8, 0xc0, 0xc3, 0xe0, 0xaa, 0xf5, 0x68,
+ 0xf2, 0x4a, 0x8c, 0xd3, 0xd4, 0x49, 0x7a, 0xc0, 0x74, 0xc2, 0xab, 0xa8,
+ 0x0c, 0x71, 0xd6, 0xce, 0xcb, 0x3b, 0x7e, 0x00, 0x0e, 0x23, 0x7f, 0x86,
+ 0xff, 0xdb, 0xa4, 0xe1, 0xaa, 0x55, 0x65, 0x40, 0x39, 0xc2, 0x5e, 0xb4,
+ 0xd7, 0xa1, 0x56, 0xd4, 0x50, 0x27, 0x6e, 0x66, 0x6c, 0x22, 0x44, 0xa7,
+ 0xa7, 0xf3, 0xde, 0x36, 0xb5, 0x54, 0xb4, 0xcd, 0xc7, 0xd8, 0x53, 0xf5,
+ 0x38, 0xe9, 0xa8, 0x21, 0xc5, 0x21, 0xb8, 0x4e, 0x55, 0x28, 0x59, 0x29,
+ 0x1e, 0xb2, 0xc3, 0x9b, 0x47, 0xf6, 0x1b, 0xbf, 0xf1, 0x93, 0x2c, 0xde,
+ 0x9f, 0xec, 0x67, 0x1d, 0x18, 0x18, 0x7c, 0x15, 0x5e, 0xad, 0x1e, 0x49,
+ 0x51, 0x9a, 0x7a, 0x89, 0x2f, 0x58, 0x0e, 0x98, 0x55, 0x75, 0x01, 0x8e,
+ 0x3a, 0xd9, 0xd9, 0x67, 0x6f, 0xc0, 0x01, 0xc4, 0x6f, 0xf0, 0xdf, 0xfb,
+ 0x74, 0x9c, 0x35, 0x4a, 0xac, 0xa8, 0x07, 0x38, 0x4b, 0xd6, 0x9a, 0xf4,
+ 0x2a, 0xda, 0x8a, 0x04, 0xed, 0xcc, 0xcd, 0x84, 0x48, 0x94, 0xf4, 0xfe,
+ 0x7b, 0xc6, 0xd6, 0xaa, 0x96, 0x99, 0xb8, 0xfb, 0x0a, 0x7e, 0xa7, 0x1d,
+ 0x35, 0x04, 0x38, 0xa4, 0x37, 0x09, 0xca, 0xa5, 0x0b, 0x25, 0x23, 0xd6,
+ 0x58, 0x73, 0x68, 0xfe, 0xc3, 0x77, 0xfe, 0x32, 0x65, 0x9b, 0xd3, 0xfd,
+ 0x8c, 0xe3, 0xa3, 0x03, 0x0f, 0x82, 0xab, 0xd5, 0xa3, 0xc9, 0x2a, 0x33,
+ 0x4f, 0x51, 0x25, 0xeb, 0x01, 0xd3, 0x0a, 0xae, 0xa0, 0x31, 0xc7, 0x5b,
+ 0x3b, 0x2c, 0xed, 0xf8, 0x00, 0x38, 0x8d, 0xfe, 0x1b, 0xff, 0x6e, 0x93,
+ 0x86, 0xa9, 0x55, 0x95, 0x00, 0xe7, 0x09, 0x7a, 0xd3, 0x5e, 0x85, 0x5b,
+ 0x51, 0x40, 0x9d, 0xb9, 0x99, 0xb0, 0x89, 0x12, 0x9e, 0x9f, 0xcf, 0x78,
+ 0xda, 0xd5, 0x52, 0xd3, 0x37, 0x1f, 0x61, 0x4f, 0xd4, 0xe3, 0xa6, 0xa0,
+ 0x87, 0x14, 0x86, 0xe1, 0x39, 0x54, 0xa1, 0x64, 0xa4, 0x7a, 0xcb, 0x0e,
+ 0x6d, 0x1f, 0xd8, 0x6e, 0xff, 0xc6, 0x4c, 0xb3, 0x7a, 0x7f, 0xb1, 0x9c,
+ 0x74, 0x60, 0x61, 0xf0, 0x55, 0x7a, 0xb4, 0x79, 0x25, 0x46, 0x69, 0xea,
+ 0x24, 0xbd, 0x60, 0x3a, 0x61, 0x55, 0xd4, 0x06, 0x38, 0xeb, 0x67, 0x65,
+ 0x9d, 0xbf, 0x00, 0x07, 0x11, 0xbf, 0xc3, 0x7f, 0xed, 0xd2, 0x70, 0xd5,
+ 0x2a, 0xb2, 0xa0, 0x1c, 0xe1, 0x2f, 0x5a, 0x6b, 0xd0, 0xab, 0x6a, 0x28,
+ 0x13, 0xb7, 0x33, 0x36, 0x11, 0x22, 0x53, 0xd3, 0xf9, 0xef, 0x1b, 0x5a,
+ 0xaa, 0x5a, 0x66, 0xe3, 0xec, 0x29, 0xfa, 0x9c, 0x74, 0xd4, 0x10, 0xe2,
+ 0x90, 0xdc, 0x27, 0x2a, 0x94, 0x2c, 0x94, 0x8f, 0x59, 0x61, 0xcd, 0xa3,
+ 0xfb, 0x0d, 0xdf, 0xf8, 0xc9, 0x96, 0x6f, 0x4f, 0xf6, 0x33, 0x8e, 0x8c,
+ 0x0c, 0x3e, 0x0a, 0xaf, 0x56, 0x8f, 0x24, 0xa8, 0xcd, 0x3d, 0x44, 0x97,
+ 0xac, 0x07, 0x4c, 0x2a, 0xba, 0x80, 0xc7, 0x1d, 0x6c, 0xec, 0xb3, 0xb7,
+ 0xe0, 0x00, 0xe2, 0x37, 0xf8, 0x6f, 0xfd, 0xba, 0x4e, 0x1a, 0xa5, 0x56,
+ 0x54, 0x03, 0x9c, 0x25, 0xeb, 0x4d, 0x7a, 0x15, 0x6d, 0x45, 0x02, 0x76,
+ 0xe6, 0x66, 0xc2, 0x24, 0x4a, 0x7a, 0x7f, 0x3d, 0xe3, 0x6b, 0x55, 0x4b,
+ 0x4c, 0xdc, 0x7d, 0x85, 0x3f, 0x53, 0x8e, 0x9a, 0x82, 0x1c, 0x52, 0x1b,
+ 0x84, 0xe5, 0x52, 0x85, 0x92, 0x91, 0xeb, 0x2c, 0x39, 0xb4, 0x7f, 0x61,
+ 0xbb, 0xff, 0x19, 0x32, 0xcd, 0xe9, 0xfe, 0xc6, 0x71, 0xd1, 0x81, 0x87,
+ 0xc1, 0x55, 0xea, 0xd1, 0xe4, 0x95, 0x19, 0xa7, 0xa8, 0x92, 0xf5, 0x80,
+ 0xe9, 0x85, 0x57, 0x50, 0x18, 0xe3, 0xad, 0x9d, 0x96, 0x76, 0xfc, 0x00,
+ 0x1c, 0x46, 0xff, 0x0d, 0xff, 0xb7, 0x49, 0xc3, 0x54, 0xaa, 0xca, 0x80,
+ 0x73, 0x84, 0xbd, 0x69, 0xaf, 0x42, 0xad, 0xa8, 0xa0, 0x4e, 0xdc, 0xcc,
+ 0xd8, 0x44, 0x89, 0x4f, 0x4f, 0xe7, 0xbc, 0x6d, 0x6a, 0xa9, 0x69, 0x9b,
+ 0x8f, 0xb0, 0xa7, 0xea, 0x71, 0xd3, 0x50, 0x43, 0x8a, 0x43, 0x70, 0x9c,
+ 0xaa, 0x50, 0xb2, 0x52, 0x3d, 0x65, 0x87, 0x36, 0x8f, 0xec, 0x37, 0x7f,
+ 0xe3, 0x26, 0x59, 0xbd, 0x3f, 0xd8, 0xce, 0x3a, 0x30, 0x30, 0xf8, 0x2a,
+ 0xbd, 0x5a, 0x3c, 0x92, 0xa3, 0x34, 0xf5, 0x12, 0x5e, 0xb0, 0x1d, 0x30,
+ 0xaa, 0xea, 0x03, 0x1c, 0x75, 0xb3, 0xb2, 0xce, 0xdf, 0x80, 0x03, 0x88,
+ 0xdf, 0xe1, 0xbf, 0xf6, 0xe9, 0x38, 0x6a, 0x95, 0x59, 0x50, 0x0e, 0x70,
+ 0x97, 0xad, 0x35, 0xe8, 0x55, 0xb5, 0x14, 0x09, 0xdb, 0x99, 0x9b, 0x08,
+ 0x91, 0x29, 0xe9, 0xfc, 0xf7, 0x8d, 0xad, 0x55, 0x2d, 0x33, 0x71, 0xf6,
+ 0x14, 0xfd, 0x4e, 0x3a, 0x6a, 0x08, 0x71, 0x48, 0x6e, 0x13, 0x95, 0x4a,
+ 0x16, 0x4a, 0x47, 0xac, 0xb0, 0xe6, 0xd1, 0xfd, 0x86, 0xef, 0xfc, 0x64,
+ 0xcb, 0x37, 0xa7, 0xfb, 0x19, 0xc7, 0x46, 0x06, 0x1f, 0x05, 0x57, 0xab,
+ 0x47, 0x92, 0x54, 0x66, 0x9e, 0xa2, 0x4b, 0xd6, 0x03, 0xa6, 0x15, 0x5d,
+ 0x40, 0x63, 0x8e, 0xb6, 0x76, 0x59, 0xdb, 0xf0, 0x00, 0x71, 0x1b, 0xfc,
+ 0x37, 0xfe, 0xdd, 0x27, 0x0d, 0x52, 0xab, 0x2a, 0x01, 0xce, 0x12, 0xf5,
+ 0xa6, 0xbd, 0x0a, 0xb6, 0xa2, 0x81, 0x3b, 0x73, 0x33, 0x61, 0x12, 0x25,
+ 0x3d, 0x3f, 0x9e, 0xf1, 0xb5, 0xaa, 0xa5, 0xa6, 0x6e, 0x3e, 0xc2, 0x9f,
+ 0xa9, 0xc7, 0x4d, 0x41, 0x0e, 0x29, 0x0d, 0xc2, 0x72, 0xa9, 0x42, 0xc9,
+ 0x48, 0xf5, 0x96, 0x1c, 0xda, 0x3f, 0xb0, 0xdd, 0xff, 0x8c, 0x99, 0x66,
+ 0xf4, 0xff, 0x63, 0x38, 0xe8, 0xc0, 0xc3, 0xe0, 0xaa, 0xf5, 0x68, 0xf2,
+ 0x4a, 0x8c, 0xd3, 0xd4, 0x49, 0x7a, 0xc0, 0x74, 0xc2, 0xab, 0xa8, 0x0c,
+ 0x71, 0xd6, 0xce, 0xcb, 0x3b, 0x7e, 0x00, 0x0e, 0x23, 0x7f, 0x86, 0xff,
+ 0xdb, 0xa4, 0xe1, 0xaa, 0x55, 0x65, 0x40, 0x39, 0xc2, 0x5e, 0xb4, 0xd7,
+ 0xa1, 0x56, 0xd4, 0x50, 0x27, 0x6e, 0x66, 0x6c, 0x22, 0x44, 0xa7, 0xa7,
+ 0xf3, 0xde, 0x36, 0xb5, 0x54, 0xb4, 0xcd, 0xc7, 0xd8, 0x53, 0xf5, 0x38,
+ 0xe9, 0xa8, 0x21, 0xc5, 0x21, 0xb8, 0x4e, 0x55, 0x28, 0x59, 0x29, 0x1e,
+ 0xb2, 0xc3, 0x9b, 0x47, 0xf6, 0x1b, 0xbf, 0xf1, 0x93, 0x2c, 0xde, 0x9f,
+ 0xec, 0x67, 0x1d, 0x18, 0x18, 0x7c, 0x15, 0x5e, 0xad, 0x1e, 0x49, 0x51,
+ 0x9a, 0x7a, 0x89, 0x2f, 0x58, 0x0e, 0x98, 0x55, 0x75, 0x01, 0x8e, 0x3a,
+ 0xd9, 0xd9, 0x67, 0x6f, 0xc0, 0x01, 0xc4, 0x6f, 0xf0, 0xdf, 0xfb, 0x74,
+ 0x9c, 0x35, 0x4a, 0xac, 0xa8, 0x07, 0x38, 0x4b, 0xd6, 0x9a, 0xf4, 0x2a,
+ 0xda, 0x8a, 0x04, 0xed, 0xcc, 0xcd, 0x84, 0x48, 0x94, 0xf4, 0xfe, 0x7b,
+ 0xc6, 0xd6, 0xaa, 0x96, 0x99, 0xb8, 0xfb, 0x0a, 0x7e, 0xa7, 0x1d, 0x35,
+ 0x04, 0x38, 0xa4, 0x37, 0x09, 0xca, 0xa5, 0x0b, 0x25, 0x23, 0xd6, 0x58,
+ 0x73, 0x68, 0xfe, 0xc3, 0x77, 0xfe, 0x32, 0x65, 0x9b, 0xd3, 0xfd, 0x8c,
+ 0xe3, 0xa3, 0x03, 0x0f, 0x82, 0xab, 0xd5, 0xa3, 0xc9, 0x2a, 0x33, 0x4f,
+ 0x51, 0x25, 0xeb, 0x01, 0xd3, 0x0a, 0xae, 0xa0, 0x31, 0xc7, 0x5b, 0x3b,
+ 0x2c, 0xed, 0xf8, 0x00, 0x38, 0x8d, 0xfe, 0x1b, 0xff, 0x6e, 0x93, 0x86,
+ 0xa9, 0x55, 0x95, 0x00, 0xe7, 0x09, 0x7a, 0xd3, 0x5e, 0x85, 0x5b, 0x51,
+ 0x40, 0x9d, 0xb9, 0x99, 0xb0, 0x89, 0x12, 0x9e, 0x9f, 0xcf, 0x78, 0xda,
+ 0xd5, 0x52, 0xd3, 0x37, 0x1f, 0x61, 0x4f, 0xd4, 0xe3, 0xa6, 0xa0, 0x87,
+ 0x14, 0x86, 0xe1, 0x39, 0x54, 0xa1, 0x64, 0xa4, 0x7a, 0xcb, 0x0e, 0x6d,
+ 0x1f, 0xd8, 0x6e, 0xff, 0xc6, 0x4c, 0xb3, 0x7a, 0x7f, 0xb1, 0x9c, 0x74,
+ 0x60, 0x61, 0xf0, 0x55, 0x7a, 0xb4, 0x79, 0x25, 0x46, 0x69, 0xea, 0x24,
+ 0xbd, 0x60, 0x3a, 0x61, 0x55, 0xd4, 0x06, 0x38, 0xeb, 0x67, 0x65, 0x9d,
+ 0xbf, 0x00, 0x07, 0x11, 0xbf, 0xc3, 0x7f, 0xed, 0xd2, 0x70, 0xd5, 0x2a,
+ 0xb2, 0xa0, 0x1c, 0xe1, 0x2f, 0x5a, 0x6b, 0xd0, 0xab, 0x6a, 0x28, 0x13,
+ 0xb7, 0x33, 0x36, 0x11, 0x22, 0x53, 0xd3, 0xf9, 0xef, 0x1b, 0x5a, 0xaa,
+ 0x5a, 0x66, 0xe3, 0xec, 0x29, 0xfa, 0x9c, 0x74, 0xd4, 0x10, 0xe2, 0x90,
+ 0xdc, 0x27, 0x2a, 0x94, 0x2c, 0x94, 0x8f, 0x59, 0x61, 0xcd, 0xa3, 0xfb,
+ 0x0d, 0xdf, 0xf8, 0xc9, 0x96, 0x6f, 0x4f, 0xf6, 0x33, 0x8e, 0x8c, 0x0c,
+ 0x3e, 0x0a, 0xaf, 0x56, 0x8f, 0x24, 0xa8, 0xcd, 0x3d, 0x44, 0x97, 0xac,
+ 0x07, 0x4c, 0x2a, 0xba, 0x80, 0xc7, 0x1d, 0x6c, 0xec, 0xb3, 0xb7, 0xe0,
+ 0x00, 0xe2, 0x37, 0xf8, 0x6f, 0xfd, 0xba, 0x4e, 0x1a, 0xa5, 0x56, 0x54,
+ 0x03, 0x9c, 0x25, 0xeb, 0x4d, 0x7a, 0x15, 0x6d, 0x45, 0x02, 0x76, 0xe6,
+ 0x66, 0xc2, 0x24, 0x4a, 0x7a, 0x7f, 0x3d, 0xe3, 0x6b, 0x55, 0x4b, 0x4c,
+ 0xdc, 0x7d, 0x85, 0x3f, 0x53, 0x8e, 0x9a, 0x82, 0x1c, 0x52, 0x1b, 0x84,
+ 0xe5, 0x52, 0x85, 0x92, 0x91, 0xeb, 0x2c, 0x39, 0xb4, 0x7f, 0x61, 0xbb,
+ 0xff, 0x19, 0x32, 0xcd, 0xe9, 0xfe, 0xc6, 0x71, 0xd1, 0x81, 0x87, 0xc1,
+ 0x55, 0xea, 0xd1, 0xe4, 0x95, 0x19, 0xa7, 0xa8, 0x92, 0xf5, 0x80, 0xe9,
+ 0x85, 0x57, 0x50, 0x18, 0xe3, 0xad, 0x9d, 0x96, 0x76, 0xfc, 0x00, 0x1c,
+ 0x46, 0xff, 0x0d, 0xff, 0xb7, 0x49, 0xc3, 0x54, 0xaa, 0xca, 0x80, 0x73,
+ 0x84, 0xbd, 0x69, 0xaf, 0x42, 0xad, 0xa8, 0xa0, 0x4e, 0xdc, 0xcc, 0xd8,
+ 0x44, 0x89, 0x4f, 0x4f, 0xe7, 0xbc, 0x6d, 0x6a, 0xa9, 0x69, 0x9b, 0x8f,
+ 0xb0, 0xa7, 0xea, 0x71, 0xd3, 0x50, 0x43, 0x8a, 0x43, 0x70, 0x9c, 0xaa,
+ 0x50, 0xb2, 0x52, 0x3d, 0x65, 0x87, 0x36, 0x8f, 0xec, 0x37, 0x7f, 0xe3,
+ 0x26, 0x59, 0xbd, 0x3f, 0xd8, 0xce, 0x3a, 0x30, 0x30, 0xf8, 0x2a, 0xbd,
+ 0x5a, 0x3c, 0x92, 0xa3, 0x34, 0xf5, 0x12, 0x5e, 0xb0, 0x1d, 0x30, 0xaa,
+ 0xea, 0x03, 0x1c, 0x75, 0xb3, 0xb2, 0xce, 0xdf, 0x80, 0x03, 0x88, 0xdf,
+ 0xe1, 0xbf, 0xf6, 0xe9, 0x38, 0x6a, 0x95, 0x59, 0x50, 0x0e, 0x70, 0x97,
+ 0xad, 0x35, 0xe8, 0x55, 0xb5, 0x14, 0x09, 0xdb, 0x99, 0x9b, 0x08, 0x91,
+ 0x29, 0xe9, 0xfc, 0xf7, 0x8d, 0xad, 0x55, 0x2d, 0x33, 0x71, 0xf6, 0x14,
+ 0xfd, 0x4e, 0x3a, 0x6a, 0x08, 0x71, 0x48, 0x6e, 0x13, 0x95, 0x4a, 0x16,
+ 0x4a, 0x47, 0xac, 0xb0, 0xe6, 0xd1, 0xfd, 0x86, 0xef, 0xfc, 0x64, 0xcb,
+ 0x37, 0xa7, 0xfb, 0x19, 0xc7, 0x46, 0x06, 0x1f, 0x05, 0x57, 0xab, 0x47,
+ 0x92, 0x54, 0x66, 0x9e, 0xa2, 0x4b, 0xd6, 0x03, 0xa6, 0x15, 0x5d, 0x40,
+ 0x63, 0x8e, 0xb6, 0x76, 0x59, 0xdb, 0xf0, 0x00, 0x71, 0x1b, 0xfc, 0x37,
+ 0xfe, 0xdd, 0x27, 0x0d, 0x52, 0xab, 0x2a, 0x01, 0xce, 0x12, 0xf5, 0xa6,
+ 0xbd, 0x0a, 0xb6, 0xa2, 0x81, 0x3b, 0x73, 0x33, 0x61, 0x12, 0x25, 0x3d,
+ 0x3f, 0x9e, 0xf1, 0xb5, 0xaa, 0xa5, 0xa6, 0x6e, 0x3e, 0xc2, 0x9f, 0xa9,
+ 0xc7, 0x4d, 0x41, 0x0e, 0x29, 0x0d, 0xc2, 0x72, 0xa9, 0x42, 0xc9, 0x48,
+ 0xf5, 0x96, 0x1c, 0xda, 0x3f, 0xb0, 0xdd, 0xff, 0x8c, 0x99, 0x66, 0xf4,
+ 0xff, 0x63, 0x38, 0xe8, 0xc0, 0xc3, 0xe0, 0xaa, 0xf5, 0x68, 0xf2, 0x4a,
+ 0x8c, 0xd3, 0xd4, 0x49, 0x7a, 0xc0, 0x74, 0xc2, 0xab, 0xa8, 0x0c, 0x71,
+ 0xd6, 0xce, 0xcb, 0x3b, 0x7e, 0x00, 0x0e, 0x23, 0x7f, 0x86, 0xff, 0xdb,
+ 0xa4, 0xe1, 0xaa, 0x55, 0x65, 0x40, 0x39, 0xc2, 0x5e, 0xb4, 0xd7, 0xa1,
+ 0x56, 0xd4, 0x50, 0x27, 0x6e, 0x66, 0x6c, 0x22, 0x44, 0xa7, 0xa7, 0xf3,
+ 0xde, 0x36, 0xb5, 0x54, 0xb4, 0xcd, 0xc7, 0xd8, 0x53, 0xf5, 0x38, 0xe9,
+ 0xa8, 0x21, 0xc5, 0x21, 0xb8, 0x4e, 0x55, 0x28, 0x59, 0x29, 0x1e, 0xb2,
+ 0xc3, 0x9b, 0x47, 0xf6, 0x1b, 0xbf, 0xf1, 0x93, 0x2c, 0xde, 0x9f, 0xec,
+ 0x67, 0x1d, 0x18, 0x18, 0x7c, 0x15, 0x5e, 0xad, 0x1e, 0x49, 0x51, 0x9a,
+ 0x7a, 0x89, 0x2f, 0x58, 0x0e, 0x98, 0x55, 0x75, 0x01, 0x8e, 0x3a, 0xd9,
+ 0xd9, 0x67, 0x6f, 0xc0, 0x01, 0xc4, 0x6f, 0xf0, 0xdf, 0xfb, 0x74, 0x9c,
+ 0x35, 0x4a, 0xac, 0xa8, 0x07, 0x38, 0x4b, 0xd6, 0x9a, 0xf4, 0x2a, 0xda,
+ 0x8a, 0x04, 0xed, 0xcc, 0xcd, 0x84, 0x48, 0x94, 0xf4, 0xfe, 0x7b, 0xc6,
+ 0xd6, 0xaa, 0x96, 0x99, 0xb8, 0xfb, 0x0a, 0x7e, 0xa7, 0x1d, 0x35, 0x04,
+ 0x38, 0xa4, 0x37, 0x09, 0xca, 0xa5, 0x0b, 0x25, 0x23, 0xd6, 0x58, 0x73,
+ 0x68, 0xfe, 0xc3, 0x77, 0xfe, 0x32, 0x65, 0x9b, 0xd3, 0xfd, 0x8c, 0xe3,
+ 0xa3, 0x03, 0x0f, 0x82, 0xab, 0xd5, 0xa3, 0xc9, 0x2a, 0x33, 0x4f, 0x51,
+ 0x25, 0xeb, 0x01, 0xd3, 0x0a, 0xae, 0xa0, 0x31, 0xc7, 0x5b, 0x3b, 0x2c,
+ 0xed, 0xf8, 0x00, 0x38, 0x8d, 0xfe, 0x1b, 0xff, 0x6e, 0x93, 0x86, 0xa9,
+ 0x55, 0x95, 0x00, 0xe7, 0x09, 0x7a, 0xd3, 0x5e, 0x85, 0x5b, 0x51, 0x40,
+ 0x9d, 0xb9, 0x99, 0xb0, 0x89, 0x12, 0x9e, 0x9f, 0xcf, 0x78, 0xda, 0xd5,
+ 0x52, 0xd3, 0x37, 0x1f, 0x61, 0x4f, 0xd4, 0xe3, 0xa6, 0xa0, 0x87, 0x14,
+ 0x86, 0xe1, 0x39, 0x54, 0xa1, 0x64, 0xa4, 0x7a, 0xcb, 0x0e, 0x6d, 0x1f,
+ 0xd8, 0x6e, 0xff, 0xc6, 0x4c, 0xb3, 0x7a, 0x7f, 0xb1, 0x9c, 0x74, 0x60,
+ 0x61, 0xf0, 0x55, 0x7a, 0xb4, 0x79, 0x25, 0x46, 0x69, 0xea, 0x24, 0xbd,
+ 0x60, 0x3a, 0x61, 0x55, 0xd4, 0x06, 0x38, 0xeb, 0x67, 0x65, 0x9d, 0xbf,
+ 0x00, 0x07, 0x11, 0xbf, 0xc3, 0x7f, 0xed, 0xd2, 0x70, 0xd5, 0x2a, 0xb2,
+ 0xa0, 0x1c, 0xe1, 0x2f, 0x5a, 0x6b, 0xd0, 0xab, 0x6a, 0x28, 0x13, 0xb7,
+ 0x33, 0x36, 0x11, 0x22, 0x53, 0xd3, 0xf9, 0xef, 0x1b, 0x5a, 0xaa, 0x5a,
+ 0x66, 0xe3, 0xec, 0x29, 0xfa, 0x9c, 0x74, 0xd4, 0x10, 0xe2, 0x90, 0xdc,
+ 0x27, 0x2a, 0x94, 0x2c, 0x94, 0x8f, 0x59, 0x61, 0xcd, 0xa3, 0xfb, 0x0d,
+ 0xdf, 0xf8, 0xc9, 0x96, 0x6f, 0x4f, 0xf6, 0x33, 0x8e, 0x8c, 0x0c, 0x3e,
+ 0x0a, 0xaf, 0x56, 0x8f, 0x24, 0xa8, 0xcd, 0x3d, 0x44, 0x97, 0xac, 0x07,
+ 0x4c, 0x2a, 0xba, 0x80, 0xc7, 0x1d, 0x6c, 0xec, 0xb3, 0xb7, 0xe0, 0x00,
+ 0xe2, 0x37, 0xf8, 0x6f, 0xfd, 0xba, 0x4e, 0x1a, 0xa5, 0x56, 0x54, 0x03,
+ 0x9c, 0x25, 0xeb, 0x4d, 0x7a, 0x15, 0x6d, 0x45, 0x02, 0x76, 0xe6, 0x66,
+ 0xc2, 0x24, 0x4a, 0x7a, 0x7f, 0x3d, 0xe3, 0x6b, 0x55, 0x4b, 0x4c, 0xdc,
+ 0x7d, 0x85, 0x3f, 0x53, 0x8e, 0x9a, 0x82, 0x1c, 0x52, 0x1b, 0x84, 0xe5,
+ 0x52, 0x85, 0x92, 0x91, 0xeb, 0x2c, 0x39, 0xb4, 0x7f, 0x61, 0xbb, 0xff,
+ 0x19, 0x32, 0xcd, 0xe9, 0xfe, 0xc6, 0x71, 0xd1, 0x81, 0x87, 0xc1, 0x55,
+ 0xea, 0xd1, 0xe4, 0x95, 0x19, 0xa7, 0xa8, 0x92, 0xf5, 0x80, 0xe9, 0x85,
+ 0x57, 0x50, 0x18, 0xe3, 0xad, 0x9d, 0x96, 0x76, 0xfc, 0x00, 0x1c, 0x46,
+ 0xff, 0x0d, 0xff, 0xb7, 0x49, 0xc3, 0x54, 0xaa, 0xca, 0x80, 0x73, 0x84,
+ 0xbd, 0x69, 0xaf, 0x42, 0xad, 0xa8, 0xa0, 0x4e, 0xdc, 0xcc, 0xd8, 0x44,
+ 0x89, 0x4f, 0x4f, 0xe7, 0xbc, 0x6d, 0x6a, 0xa9, 0x69, 0x9b, 0x8f, 0xb0,
+ 0xa7, 0xea, 0x71, 0xd3, 0x50, 0x43, 0x8a, 0x43, 0x70, 0x9c, 0xaa, 0x50,
+ 0xb2, 0x52, 0x3d, 0x65, 0x87, 0x36, 0x8f, 0xec, 0x37, 0x7f, 0xe3, 0x26,
+ 0x59, 0xbd, 0x3f, 0xd8, 0xce, 0x3a, 0x30, 0x30, 0xf8, 0x2a, 0xbd, 0x5a,
+ 0x3c, 0x92, 0xa3, 0x34, 0xf5, 0x12, 0x5e, 0xb0, 0x1d, 0x30, 0xaa, 0xea,
+ 0x03, 0x1c, 0x75, 0xb3, 0xb2, 0xce, 0xdf, 0x80, 0x03, 0x88, 0xdf, 0xe1,
+ 0xbf, 0xf6, 0xe9, 0x38, 0x6a, 0x95, 0x59, 0x50, 0x0e, 0x70, 0x97, 0xad,
+ 0x35, 0xe8, 0x55, 0xb5, 0x14, 0x09, 0xdb, 0x99, 0x9b, 0x08, 0x91, 0x29,
+ 0xe9, 0xfc, 0xf7, 0x8d, 0xad, 0x55, 0x2d, 0x33, 0x71, 0xf6, 0x14, 0xfd,
+ 0x4e, 0x3a, 0x6a, 0x08, 0x71, 0x48, 0x6e, 0x13, 0x95, 0x4a, 0x16, 0x4a,
+ 0x47, 0xac, 0xb0, 0xe6, 0xd1, 0xfd, 0x86, 0xef, 0xfc, 0x64, 0xcb, 0x37,
+ 0xa7, 0xfb, 0x19, 0xc7, 0x46, 0x06, 0x1f, 0x05, 0x57, 0xab, 0x47, 0x92,
+ 0x54, 0x66, 0x9e, 0xa2, 0x4b, 0xd6, 0x03, 0xa6, 0x15, 0x5d, 0x40, 0x63,
+ 0x8e, 0xb6, 0x76, 0x59, 0xdb, 0xf0, 0x00, 0x71, 0x1b, 0xfc, 0x37, 0xfe,
+ 0xdd, 0x27, 0x0d, 0x52, 0xab, 0x2a, 0x01, 0xce, 0x12, 0xf5, 0xa6, 0xbd,
+ 0x0a, 0xb6, 0xa2, 0x81, 0x3b, 0x73, 0x33, 0x61, 0x12, 0x25, 0x3d, 0x3f,
+ 0x9e, 0xf1, 0xb5, 0xaa, 0xa5, 0xa6, 0x6e, 0x3e, 0xc2, 0x9f, 0xa9, 0xc7,
+ 0x4d, 0x41, 0x0e, 0x29, 0x0d, 0xc2, 0x72, 0xa9, 0x42, 0xc9, 0x48, 0xf5,
+ 0x96, 0x1c, 0xda, 0x3f, 0xb0, 0xdd, 0xff, 0x8c, 0x99, 0x66, 0xf4, 0xff,
+ 0x63, 0x38, 0xe8, 0xc0, 0xc3, 0xe0, 0xaa, 0xf5, 0x68, 0xf2, 0x4a, 0x8c,
+ 0xd3, 0xd4, 0x49, 0x7a, 0xc0, 0x74, 0xc2, 0xab, 0xa8, 0x0c, 0x71, 0xd6,
+ 0xce, 0xcb, 0x3b, 0x7e, 0x00, 0x0e, 0x23, 0x7f, 0x86, 0xff, 0xdb, 0xa4,
+ 0xe1, 0xaa, 0x55, 0x65, 0x40, 0x39, 0xc2, 0x5e, 0xb4, 0xd7, 0xa1, 0x56,
+ 0xd4, 0x50, 0x27, 0x6e, 0x66, 0x6c, 0x22, 0x44, 0xa7, 0xa7, 0xf3, 0xde,
+ 0x36, 0xb5, 0x54, 0xb4, 0xcd, 0xc7, 0xd8, 0x53, 0xf5, 0x38, 0xe9, 0xa8,
+ 0x21, 0xc5, 0x21, 0xb8, 0x4e, 0x55, 0x28, 0x59, 0x29, 0x1e, 0xb2, 0xc3,
+ 0x9b, 0x47, 0xf6, 0x1b, 0xbf, 0xf1, 0x93, 0x2c, 0xde, 0x9f, 0xec, 0x67,
+ 0x1d, 0x18, 0x18, 0x7c, 0x15, 0x5e, 0xad, 0x1e, 0x49, 0x51, 0x9a, 0x7a,
+ 0x89, 0x2f, 0x58, 0x0e, 0x98, 0x55, 0x75, 0x01, 0x8e, 0x3a, 0xd9, 0xd9,
+ 0x67, 0x6f, 0xc0, 0x01, 0xc4, 0x6f, 0xf0, 0xdf, 0xfb, 0x74, 0x9c, 0x35,
+ 0x4a, 0xac, 0xa8, 0x07, 0x38, 0x4b, 0xd6, 0x9a, 0xf4, 0x2a, 0xda, 0x8a,
+ 0x04, 0xed, 0xcc, 0xcd, 0x84, 0x48, 0x94, 0xf4, 0xfe, 0x7b, 0xc6, 0xd6,
+ 0xaa, 0x96, 0x99, 0xb8, 0xfb, 0x0a, 0x7e, 0xa7, 0x1d, 0x35, 0x04, 0x38,
+ 0xa4, 0x37, 0x09, 0xca, 0xa5, 0x0b, 0x25, 0x23, 0xd6, 0x58, 0x73, 0x68,
+ 0xfe, 0xc3, 0x77, 0xfe, 0x32, 0x65, 0x9b, 0xd3, 0xfd, 0x8c, 0xe3, 0xa3,
+ 0x03, 0x0f, 0x82, 0xab, 0xd5, 0xa3, 0xc9, 0x2a, 0x33, 0x4f, 0x51, 0x25,
+ 0xeb, 0x01, 0xd3, 0x0a, 0xae, 0xa0, 0x31, 0xc7, 0x5b, 0x3b, 0x2c, 0xed,
+ 0xf8, 0x00, 0x38, 0x8d, 0xfe, 0x1b, 0xff, 0x6e, 0x93, 0x86, 0xa9, 0x55,
+ 0x95, 0x00, 0xe7, 0x09, 0x7a, 0xd3, 0x5e, 0x85, 0x5b, 0x51, 0x40, 0x9d,
+ 0xb9, 0x99, 0xb0, 0x89, 0x12, 0x9e, 0x9f, 0xcf, 0x78, 0xda, 0xd5, 0x52,
+ 0xd3, 0x37, 0x1f, 0x61, 0x4f, 0xd4, 0xe3, 0xa6, 0xa0, 0x87, 0x14, 0x86,
+ 0xe1, 0x39, 0x54, 0xa1, 0x64, 0xa4, 0x7a, 0xcb, 0x0e, 0x6d, 0x1f, 0xd8,
+ 0x6e, 0xff, 0xc6, 0x4c, 0xb3, 0x7a, 0x7f, 0xb1, 0x9c, 0x74, 0x60, 0x61,
+ 0xf0, 0x55, 0x7a, 0xb4, 0x79, 0x25, 0x46, 0x69, 0xea, 0x24, 0xbd, 0x60,
+ 0x3a, 0x61, 0x55, 0xd4, 0x06, 0x38, 0xeb, 0x67, 0x65, 0x9d, 0xbf, 0x00,
+ 0x07, 0x11, 0xbf, 0xc3, 0x7f, 0xed, 0xd2, 0x70, 0xd5, 0x2a, 0xb2, 0xa0,
+ 0x1c, 0xe1, 0x2f, 0x5a, 0x6b, 0xd0, 0xab, 0x6a, 0x28, 0x13, 0xb7, 0x33,
+ 0x36, 0x11, 0x22, 0x53, 0xd3, 0xf9, 0xef, 0x1b, 0x5a, 0xaa, 0x5a, 0x66,
+ 0xe3, 0xec, 0x29, 0xfa, 0x9c, 0x74, 0xd4, 0x10, 0xe2, 0x90, 0xdc, 0x27,
+ 0x2a, 0x94, 0x2c, 0x94, 0x8f, 0x59, 0x61, 0xcd, 0xa3, 0xfb, 0x0d, 0xdf,
+ 0xf8, 0xc9, 0x96, 0x6f, 0x4f, 0xf6, 0x33, 0x8e, 0x8c, 0x0c, 0x3e, 0x0a,
+ 0xaf, 0x56, 0x8f, 0x24, 0xa8, 0xcd, 0x3d, 0x44, 0x97, 0xac, 0x07, 0x4c,
+ 0x2a, 0xba, 0x80, 0xc7, 0x1d, 0x6c, 0xec, 0xb3, 0xb7, 0xe0, 0x00, 0xe2,
+ 0x37, 0xf8, 0x6f, 0xfd, 0xba, 0x4e, 0x1a, 0xa5, 0x56, 0x54, 0x03, 0x9c,
+ 0x25, 0xeb, 0x4d, 0x7a, 0x15, 0x6d, 0x45, 0x02, 0x76, 0xe6, 0x66, 0xc2,
+ 0x24, 0x4a, 0x7a, 0x7f, 0x3d, 0xe3, 0x6b, 0x55, 0x4b, 0x4c, 0xdc, 0x7d,
+ 0x85, 0x3f, 0x53, 0x8e, 0x9a, 0x82, 0x1c, 0x52, 0x1b, 0x84, 0xe5, 0x52,
+ 0x85, 0x92, 0x91, 0xeb, 0x2c, 0x39, 0xb4, 0x7f, 0x61, 0xbb, 0xff, 0x19,
+ 0x32, 0xcd, 0xe9, 0xfe, 0xc6, 0x71, 0xd1, 0x81, 0x87, 0xc1, 0x55, 0xea,
+ 0xd1, 0xe4, 0x95, 0x19, 0xa7, 0xa8, 0x92, 0xf5, 0x80, 0xe9, 0x85, 0x57,
+ 0x50, 0x18, 0xe3, 0xad, 0x9d, 0x96, 0x76, 0xfc, 0x00, 0x1c, 0x46, 0xff,
+ 0x0d, 0xff, 0xb7, 0x49, 0xc3, 0x54, 0xaa, 0xca, 0x80, 0x73, 0x84, 0xbd,
+ 0x69, 0xaf, 0x42, 0xad, 0xa8, 0xa0, 0x4e, 0xdc, 0xcc, 0xd8, 0x44, 0x89,
+ 0x4f, 0x4f, 0xe7, 0xbc, 0x6d, 0x6a, 0xa9, 0x69, 0x9b, 0x8f, 0xb0, 0xa7,
+ 0xea, 0x71, 0xd3, 0x50, 0x43, 0x8a, 0x43, 0x70, 0x9c, 0xaa, 0x50, 0xb2,
+ 0x52, 0x3d, 0x65, 0x87, 0x36, 0x8f, 0xec, 0x37, 0x7f, 0xe3, 0x26, 0x59,
+ 0xbd, 0x3f, 0xd8, 0xce, 0x3a, 0x30, 0x30, 0xf8, 0x2a, 0xbd, 0x5a, 0x3c,
+ 0x92, 0xa3, 0x34, 0xf5, 0x12, 0x5e, 0xb0, 0x1d, 0x30, 0xaa, 0xea, 0x03,
+ 0x1c, 0x75, 0xb3, 0xb2, 0xce, 0xdf, 0x80, 0x03, 0x88, 0xdf, 0xe1, 0xbf,
+ 0xf6, 0xe9, 0x38, 0x6a, 0x95, 0x59, 0x50, 0x0e, 0x70, 0x97, 0xad, 0x35,
+ 0xe8, 0x55, 0xb5, 0x14, 0x09, 0xdb, 0x99, 0x9b, 0x08, 0x91, 0x29, 0xe9,
+ 0xfc, 0xf7, 0x8d, 0xad, 0x55, 0x2d, 0x33, 0x71, 0xf6, 0x14, 0xfd, 0x4e,
+ 0x3a, 0x6a, 0x08, 0x71, 0x48, 0x6e, 0x13, 0x95, 0x4a, 0x16, 0x4a, 0x47,
+ 0xac, 0xb0, 0xe6, 0xd1, 0xfd, 0x86, 0xef, 0xfc, 0x64, 0xcb, 0x37, 0xa7,
+ 0xfb, 0x19, 0xc7, 0x46, 0x06, 0x1f, 0x05, 0x57, 0xab, 0x47, 0x92, 0x54,
+ 0x66, 0x9e, 0xa2, 0x4b, 0xd6, 0x03, 0xa6, 0x15, 0x5d, 0x40, 0x63, 0x8e,
+ 0xb6, 0x76, 0x59, 0xdb, 0xf0, 0x00, 0x71, 0x1b, 0xfc, 0x37, 0xfe, 0xdd,
+ 0x27, 0x0d, 0x52, 0xab, 0x2a, 0x01, 0xce, 0x12, 0xf5, 0xa6, 0xbd, 0x0a,
+ 0xb6, 0xa2, 0x81, 0x3b, 0x73, 0x33, 0x61, 0x12, 0x25, 0x3d, 0x3f, 0x9e,
+ 0xf1, 0xb5, 0xaa, 0xa5, 0xa6, 0x6e, 0x3e, 0xc2, 0x9f, 0xa9, 0xc7, 0x4d,
+ 0x41, 0x0e, 0x29, 0x0d, 0xc2, 0x72, 0xa9, 0x42, 0xc9, 0x48, 0xf5, 0x96,
+ 0x1c, 0xda, 0x3f, 0xb0, 0xdd, 0xff, 0x8c, 0x99, 0x66, 0xf4, 0xff, 0x63,
+ 0x38, 0xe8, 0xc0, 0xc3, 0xe0, 0xaa, 0xf5, 0x68, 0xf2, 0x4a, 0x8c, 0xd3,
+ 0xd4, 0x49, 0x7a, 0xc0, 0x74, 0xc2, 0xab, 0xa8, 0x0c, 0x71, 0xd6, 0xce,
+ 0xcb, 0x3b, 0x7e, 0x00, 0x0e, 0x23, 0x7f, 0x86, 0xff, 0xdb, 0xa4, 0xe1,
+ 0xaa, 0x55, 0x65, 0x40, 0x39, 0xc2, 0x5e, 0xb4, 0xd7, 0xa1, 0x56, 0xd4,
+ 0x50, 0x27, 0x6e, 0x66, 0x6c, 0x22, 0x44, 0xa7, 0xa7, 0xf3, 0xde, 0x36,
+ 0xb5, 0x54, 0xb4, 0xcd, 0xc7, 0xd8, 0x53, 0xf5, 0x38, 0xe9, 0xa8, 0x21,
+ 0xc5, 0x21, 0xb8, 0x4e, 0x55, 0x28, 0x59, 0x29, 0x1e, 0xb2, 0xc3, 0x9b,
+ 0x47, 0xf6, 0x1b, 0xbf, 0xf1, 0x93, 0x2c, 0xde, 0x9f, 0xec, 0x67, 0x1d,
+ 0x18, 0x18, 0x7c, 0x15, 0x5e, 0xad, 0x1e, 0x49, 0x51, 0x9a, 0x7a, 0x89,
+ 0x2f, 0x58, 0x0e, 0x98, 0x55, 0x75, 0x01, 0x8e, 0x3a, 0xd9, 0xd9, 0x67,
+ 0x6f, 0xc0, 0x01, 0xc4, 0x6f, 0xf0, 0xdf, 0xfb, 0x74, 0x9c, 0x35, 0x4a,
+ 0xac, 0xa8, 0x07, 0x38, 0x4b, 0xd6, 0x9a, 0xf4, 0x2a, 0xda, 0x8a, 0x04,
+ 0xed, 0xcc, 0xcd, 0x84, 0x48, 0x94, 0xf4, 0xfe, 0x7b, 0xc6, 0xd6, 0xaa,
+ 0x96, 0x99, 0xb8, 0xfb, 0x0a, 0x7e, 0xa7, 0x1d, 0x35, 0x04, 0x38, 0xa4,
+ 0x37, 0x09, 0xca, 0xa5, 0x0b, 0x25, 0x23, 0xd6, 0x58, 0x73, 0x68, 0xfe,
+ 0xc3, 0x77, 0xfe, 0x32, 0x65, 0x9b, 0xd3, 0xfd, 0x8c, 0xe3, 0xa3, 0x03,
+ 0x0f, 0x82, 0xab, 0xd5, 0xa3, 0xc9, 0x2a, 0x33, 0x4f, 0x51, 0x25, 0xeb,
+ 0x01, 0xd3, 0x0a, 0xae, 0xa0, 0x31, 0xc7, 0x5b, 0x3b, 0x2c, 0xed, 0xf8,
+ 0x00, 0x38, 0x8d, 0xfe, 0x1b, 0xff, 0x6e, 0x93, 0x86, 0xa9, 0x55, 0x95,
+ 0x00, 0xe7, 0x09, 0x7a, 0xd3, 0x5e, 0x85, 0x5b, 0x51, 0x40, 0x9d, 0xb9,
+ 0x99, 0xb0, 0x89, 0x12, 0x9e, 0x9f, 0xcf, 0x78, 0xda, 0xd5, 0x52, 0xd3,
+ 0x37, 0x1f, 0x61, 0x4f, 0xd4, 0xe3, 0xa6, 0xa0, 0x87, 0x14, 0x86, 0xe1,
+ 0x39, 0x54, 0xa1, 0x64, 0xa4, 0x7a, 0xcb, 0x0e, 0x6d, 0x1f, 0xd8, 0x6e,
+ 0xff, 0xc6, 0x4c, 0xb3, 0x7a, 0x7f, 0xb1, 0x9c, 0x74, 0x60, 0x61, 0xf0,
+ 0x55, 0x7a, 0xb4, 0x79, 0x25, 0x46, 0x69, 0xea, 0x24, 0xbd, 0x60, 0x3a,
+ 0x61, 0x55, 0xd4, 0x06, 0x38, 0xeb, 0x67, 0x65, 0x9d, 0xbf, 0x00, 0x07,
+ 0x11, 0xbf, 0xc3, 0x7f, 0xed, 0xd2, 0x70, 0xd5, 0x2a, 0xb2, 0xa0, 0x1c,
+ 0xe1, 0x2f, 0x5a, 0x6b, 0xd0, 0xab, 0x6a, 0x28, 0x13, 0xb7, 0x33, 0x36,
+ 0x11, 0x22, 0x53, 0xd3, 0xf9, 0xef, 0x1b, 0x5a, 0xaa, 0x5a, 0x66, 0xe3,
+ 0xec, 0x29, 0xfa, 0x9c, 0x74, 0xd4, 0x10, 0xe2, 0x90, 0xdc, 0x27, 0x2a,
+ 0x94, 0x2c, 0x94, 0x8f, 0x59, 0x61, 0xcd, 0xa3, 0xfb, 0x0d, 0xdf, 0xf8,
+ 0xc9, 0x96, 0x6f, 0x4f, 0xf6, 0x33, 0x8e, 0x8c, 0x0c, 0x3e, 0x0a, 0xaf,
+ 0x56, 0x8f, 0x24,
+};
+static_assert(sizeof(kBytesTestReadSymbol4) == kNumBytesTestReadSymbol4, "");
+
+// The kBytesTestReadSymbol5[] array was encoded by using the following libaom
+// code:
+//
+// aom_cdf_prob cdf[4][6] = {
+// // pdf: 1/5, 1/5, 1/5, 1/5, 1/5
+// { 32768 - 6554, 32768 - 13107, 32768 - 19661, 32768 - 26214, 0, 0 },
+// // pdf: 3/10, 2/10, 2/10, 2/10, 1/10
+// { 32768 - 9830, 32768 - 16384, 32768 - 22938, 32768 - 29491, 0, 0 },
+// // pdf: 1/10, 2/10, 2/10, 2/10, 3/10
+// { 32768 - 3277, 32768 - 9830, 32768 - 16384, 32768 - 22938, 0, 0 },
+// // pdf: 1/10, 2/10, 4/10, 2/10, 1/10
+// { 32768 - 3277, 32768 - 9830, 32768 - 22938, 32768 - 29491, 0, 0 },
+// };
+// constexpr int kSymbols[10][4] = { { 0, 0, 4, 4 }, //
+// { 0, 1, 3, 3 }, //
+// { 1, 2, 2, 2 }, //
+// { 1, 3, 1, 1 }, //
+// { 2, 4, 0, 0 }, //
+// { 2, 0, 4, 3 }, //
+// { 3, 1, 3, 2 }, //
+// { 3, 2, 2, 1 }, //
+// { 4, 3, 1, 2 }, //
+// { 4, 0, 4, 2 } };
+// const unsigned int kBufferSize = 65536;
+// uint8_t bw_buffer[kBufferSize];
+// aom_writer bw;
+// bw.allow_update_cdf = 1;
+// aom_start_encode(&bw, bw_buffer);
+// for (int i = 0; i < 320; ++i) {
+// for (int j = 0; j < 10; ++j) {
+// for (int k = 0; k < 4; ++k) {
+// aom_write_symbol(&bw, kSymbols[j][k], cdf[k], 5);
+// }
+// }
+// }
+// aom_stop_encode(&bw);
+// printf("constexpr size_t kNumBytes = %u;\n", bw.pos);
+// printf("constexpr uint8_t kBytes[] = {");
+// int count = 0;
+// for (unsigned int i = 0; i < bw.pos; ++i) {
+// if (count++ % 12 == 0) {
+// printf("\n ");
+// } else {
+// printf(" ");
+// }
+// printf("0x%02x,", bw_buffer[i]);
+// }
+// printf("\n};\n");
+
+constexpr size_t kNumBytesTestReadSymbol5 = 3612;
+constexpr uint8_t kBytesTestReadSymbol5[] = {
+ 0x0f, 0x1c, 0x16, 0x78, 0x6f, 0x83, 0xfe, 0x29, 0x95, 0x9a, 0x42, 0xcc,
+ 0x70, 0x9a, 0x0d, 0x72, 0xe0, 0x7d, 0x63, 0x9e, 0x05, 0x3c, 0x88, 0x22,
+ 0x40, 0x57, 0x83, 0xa8, 0x69, 0x6f, 0xc3, 0xb2, 0x58, 0x6c, 0xa9, 0x41,
+ 0x3c, 0x2f, 0x3f, 0xa3, 0xe6, 0x4e, 0x5e, 0xaf, 0x42, 0x56, 0x9d, 0x3f,
+ 0x70, 0xeb, 0x00, 0x02, 0x86, 0x23, 0x5f, 0x8e, 0x1b, 0x35, 0x71, 0x7d,
+ 0x50, 0xbe, 0xb1, 0x1e, 0xe9, 0x2f, 0x08, 0x5a, 0x04, 0xc0, 0x7b, 0x98,
+ 0x20, 0xbd, 0xc5, 0x39, 0xf7, 0x93, 0x5c, 0x6c, 0x4a, 0x0f, 0x50, 0x24,
+ 0xe1, 0xf3, 0x2a, 0x8d, 0x53, 0x55, 0x9a, 0xd6, 0x3a, 0xd3, 0xd6, 0x9c,
+ 0x41, 0xa2, 0x2c, 0x05, 0x1c, 0x5a, 0x28, 0x8d, 0xc0, 0x4f, 0x8d, 0xc1,
+ 0x40, 0xaa, 0x19, 0xbf, 0xa7, 0x93, 0x48, 0xdf, 0x54, 0xcf, 0xb4, 0x47,
+ 0xc4, 0x39, 0x90, 0xbb, 0xff, 0xb4, 0x47, 0x65, 0x33, 0x34, 0x45, 0x23,
+ 0x5e, 0x79, 0xc5, 0xbd, 0x24, 0x30, 0x58, 0x8a, 0x19, 0x68, 0xbb, 0x08,
+ 0xaa, 0xff, 0xce, 0x68, 0x37, 0xb4, 0x62, 0x44, 0x31, 0xe8, 0x3e, 0x4d,
+ 0x05, 0x1d, 0xe2, 0x48, 0x56, 0xd5, 0x53, 0x19, 0xcc, 0xfd, 0x82, 0xa7,
+ 0x06, 0xc4, 0x66, 0x95, 0x6c, 0x43, 0x3d, 0x43, 0x86, 0xe3, 0x62, 0x51,
+ 0x26, 0x1c, 0x57, 0xed, 0x9a, 0x1a, 0x14, 0x4f, 0x41, 0x96, 0xc0, 0x72,
+ 0x38, 0x59, 0xff, 0x69, 0xae, 0x2b, 0x59, 0x65, 0x30, 0xfd, 0xa5, 0x6f,
+ 0x1b, 0xab, 0x01, 0x72, 0xb4, 0xcd, 0xba, 0x44, 0x73, 0x12, 0x31, 0xee,
+ 0x83, 0x08, 0x5c, 0x35, 0x41, 0x17, 0xf1, 0x80, 0x55, 0xdd, 0x67, 0xb2,
+ 0xd3, 0xe1, 0x04, 0x51, 0x69, 0x9b, 0x4b, 0x98, 0xcf, 0x17, 0x0a, 0xd4,
+ 0xdc, 0x61, 0xf2, 0xb9, 0x4b, 0x23, 0xb6, 0xe8, 0x0c, 0x0d, 0xda, 0x68,
+ 0xac, 0xd9, 0xf4, 0x11, 0x63, 0x4a, 0x7f, 0x17, 0x69, 0xdb, 0x91, 0x1b,
+ 0x1d, 0xfb, 0x74, 0x58, 0x69, 0xcc, 0xf5, 0xce, 0x0d, 0x1e, 0xdd, 0x6d,
+ 0x2e, 0x87, 0xf2, 0x36, 0x39, 0x22, 0x59, 0x78, 0x01, 0x2c, 0xf0, 0xe6,
+ 0x8c, 0xd1, 0xdb, 0xa4, 0xf4, 0xc4, 0x09, 0x0e, 0xfe, 0x93, 0x88, 0x90,
+ 0x3e, 0x55, 0x60, 0x51, 0x6a, 0xe9, 0x26, 0x41, 0x1f, 0x18, 0xab, 0xc1,
+ 0xa4, 0x66, 0x57, 0xdd, 0xe6, 0x88, 0xbd, 0x74, 0xa0, 0xd3, 0x65, 0x0d,
+ 0x04, 0xe3, 0x97, 0x1e, 0x9b, 0x59, 0xfc, 0xe2, 0x45, 0x9b, 0x90, 0xe1,
+ 0x80, 0x20, 0x85, 0x03, 0x06, 0x1f, 0x46, 0xb1, 0x69, 0xb4, 0xf3, 0x06,
+ 0xa8, 0xb5, 0x78, 0x2c, 0x21, 0xd1, 0x67, 0x8d, 0x91, 0xef, 0x6f, 0xec,
+ 0xed, 0x2c, 0xd7, 0x40, 0x32, 0x09, 0xed, 0x4e, 0x92, 0xbb, 0x28, 0x67,
+ 0xac, 0x09, 0x50, 0x7f, 0x30, 0xed, 0xde, 0x56, 0xeb, 0xc9, 0x23, 0x2f,
+ 0x13, 0x07, 0xef, 0x80, 0x9e, 0x83, 0x6a, 0x24, 0xd4, 0xd1, 0x84, 0xbe,
+ 0xf8, 0x1f, 0xb0, 0xaa, 0x6a, 0xf0, 0xda, 0x02, 0x0c, 0x94, 0xc9, 0xbc,
+ 0x0f, 0xe8, 0x76, 0x95, 0x79, 0x0e, 0x24, 0x1e, 0x4c, 0xdb, 0xe5, 0xd5,
+ 0x20, 0xee, 0x13, 0xff, 0xba, 0x1f, 0x7f, 0x67, 0x89, 0x4b, 0x6b, 0x28,
+ 0x33, 0x61, 0xfb, 0x53, 0xed, 0xf7, 0x13, 0x3f, 0x64, 0xc9, 0x26, 0x19,
+ 0xde, 0xe6, 0xec, 0x74, 0xe0, 0x0e, 0x7b, 0x07, 0xeb, 0xd9, 0xac, 0x7e,
+ 0x1d, 0xac, 0xba, 0xa0, 0x50, 0xc4, 0x12, 0xee, 0x58, 0xe5, 0xe9, 0x7c,
+ 0xa3, 0x40, 0xbd, 0x92, 0x6d, 0xa8, 0x08, 0x3c, 0x9e, 0xdb, 0xd3, 0x08,
+ 0x3d, 0xb3, 0x1c, 0x25, 0x09, 0x51, 0x55, 0xbb, 0x51, 0xc8, 0xe6, 0xd6,
+ 0x30, 0x86, 0x25, 0xa9, 0x01, 0xed, 0x55, 0x11, 0xa4, 0x5e, 0x3f, 0x57,
+ 0xb7, 0x9b, 0x64, 0xec, 0x3d, 0x93, 0x28, 0x34, 0xea, 0xe9, 0x53, 0xec,
+ 0x71, 0x7c, 0x1c, 0xee, 0x03, 0x26, 0x1a, 0x15, 0x9f, 0x6c, 0x74, 0xa5,
+ 0xe1, 0x04, 0x76, 0xcb, 0x0b, 0xf9, 0x96, 0x4f, 0x4e, 0xb6, 0x7e, 0xad,
+ 0xc5, 0x4b, 0x37, 0x44, 0x91, 0xfd, 0x1d, 0x69, 0x11, 0x17, 0x82, 0xc4,
+ 0x17, 0x39, 0x29, 0x99, 0x8f, 0xe1, 0x35, 0x4d, 0x9e, 0x4f, 0xc9, 0x98,
+ 0x71, 0x6b, 0xa9, 0x0d, 0x0a, 0xf8, 0xb6, 0x3a, 0x52, 0xf0, 0x82, 0x3b,
+ 0x65, 0x79, 0x60, 0x16, 0xa5, 0xa4, 0xf8, 0x0e, 0xc2, 0x3e, 0xf3, 0x23,
+ 0x82, 0x4d, 0x1f, 0x9d, 0x7b, 0xe1, 0xb8, 0xd3, 0x79, 0xc4, 0x04, 0x1d,
+ 0xfc, 0xbc, 0xdb, 0x37, 0x73, 0x27, 0xe3, 0x8d, 0x65, 0xcb, 0x72, 0xd2,
+ 0xaf, 0xe4, 0x7a, 0x9b, 0xc5, 0xd7, 0x13, 0x0d, 0x80, 0xf6, 0xaa, 0x90,
+ 0xd2, 0x30, 0x87, 0x1b, 0xdb, 0xcd, 0xb9, 0xea, 0x28, 0xfa, 0x10, 0xd5,
+ 0xf0, 0xf3, 0xb3, 0x26, 0xb5, 0x2b, 0x4b, 0x6d, 0x42, 0x15, 0xc2, 0xf4,
+ 0xc5, 0x27, 0xb8, 0xc1, 0xa3, 0x95, 0xe5, 0xb9, 0x69, 0x57, 0xf2, 0x3d,
+ 0x4d, 0xe2, 0xeb, 0x80, 0xb0, 0xd2, 0xcc, 0x90, 0x8e, 0xc9, 0x18, 0x43,
+ 0x8d, 0xed, 0xe4, 0x94, 0xe6, 0xe9, 0x55, 0x08, 0x6a, 0xf8, 0x79, 0xd9,
+ 0x93, 0x5a, 0x95, 0xa5, 0xb6, 0xa1, 0x0a, 0xe1, 0x7a, 0x62, 0x93, 0xdc,
+ 0x60, 0xd1, 0xca, 0xf2, 0xdc, 0xb4, 0xab, 0xf9, 0x1e, 0xa6, 0xf1, 0x75,
+ 0xc0, 0x58, 0x69, 0x66, 0x48, 0x47, 0x64, 0x8c, 0x21, 0xc6, 0xf6, 0xf2,
+ 0x4a, 0x73, 0x74, 0xaa, 0x84, 0x35, 0x7c, 0x3c, 0xec, 0xc9, 0xad, 0x4a,
+ 0xd2, 0xdb, 0x50, 0x85, 0x70, 0xbd, 0x31, 0x49, 0xee, 0x30, 0x68, 0xe5,
+ 0x79, 0x6e, 0x5a, 0x55, 0xfc, 0x8f, 0x53, 0x78, 0xba, 0xe0, 0x2c, 0x34,
+ 0xb3, 0x24, 0x23, 0xb2, 0x46, 0x10, 0xe3, 0x7b, 0x79, 0x25, 0x39, 0xba,
+ 0x55, 0x42, 0x1a, 0xbe, 0x1e, 0x76, 0x64, 0xd6, 0xa5, 0x69, 0x6d, 0xa8,
+ 0x42, 0xb8, 0x5e, 0x98, 0xa4, 0xf7, 0x18, 0x34, 0x72, 0xbc, 0xb7, 0x2d,
+ 0x2a, 0xfe, 0x47, 0xa9, 0xbc, 0x5d, 0x70, 0x16, 0x1a, 0x59, 0x92, 0x11,
+ 0xd9, 0x23, 0x08, 0x71, 0xbd, 0xbc, 0x92, 0x9c, 0xdd, 0x2a, 0xa1, 0x0d,
+ 0x5f, 0x0f, 0x3b, 0x32, 0x6b, 0x52, 0xb4, 0xb6, 0xd4, 0x21, 0x5c, 0x2f,
+ 0x4c, 0x52, 0x7b, 0x8c, 0x1a, 0x39, 0x5e, 0x5b, 0x96, 0x95, 0x7f, 0x23,
+ 0xd4, 0xde, 0x2e, 0xb8, 0x0b, 0x0d, 0x2c, 0xc9, 0x08, 0xec, 0x91, 0x84,
+ 0x38, 0xde, 0xde, 0x49, 0x4e, 0x6e, 0x95, 0x50, 0x86, 0xaf, 0x87, 0x9d,
+ 0x99, 0x35, 0xa9, 0x5a, 0x5b, 0x6a, 0x10, 0xae, 0x17, 0xa6, 0x29, 0x3d,
+ 0xc6, 0x0d, 0x1c, 0xaf, 0x2d, 0xcb, 0x4a, 0xbf, 0x91, 0xea, 0x6f, 0x17,
+ 0x5c, 0x05, 0x86, 0x96, 0x64, 0x84, 0x76, 0x48, 0xc2, 0x1c, 0x6f, 0x6f,
+ 0x24, 0xa7, 0x37, 0x4a, 0xa8, 0x43, 0x57, 0xc3, 0xce, 0xcc, 0x9a, 0xd4,
+ 0xad, 0x2d, 0xb5, 0x08, 0x57, 0x0b, 0xd3, 0x14, 0x9e, 0xe3, 0x06, 0x8e,
+ 0x57, 0x96, 0xe5, 0xa5, 0x5f, 0xc8, 0xf5, 0x37, 0x8b, 0xae, 0x02, 0xc3,
+ 0x4b, 0x32, 0x42, 0x3b, 0x24, 0x61, 0x0e, 0x37, 0xb7, 0x92, 0x53, 0x9b,
+ 0xa5, 0x54, 0x21, 0xab, 0xe1, 0xe7, 0x66, 0x4d, 0x6a, 0x56, 0x96, 0xda,
+ 0x84, 0x2b, 0x85, 0xe9, 0x8a, 0x4f, 0x71, 0x83, 0x47, 0x2b, 0xcb, 0x72,
+ 0xd2, 0xaf, 0xe4, 0x7a, 0x9b, 0xc5, 0xd7, 0x01, 0x61, 0xa5, 0x99, 0x21,
+ 0x1d, 0x92, 0x30, 0x87, 0x1b, 0xdb, 0xc9, 0x29, 0xcd, 0xd2, 0xaa, 0x10,
+ 0xd5, 0xf0, 0xf3, 0xb3, 0x26, 0xb5, 0x2b, 0x4b, 0x6d, 0x42, 0x15, 0xc2,
+ 0xf4, 0xc5, 0x27, 0xb8, 0xc1, 0xa3, 0x95, 0xe5, 0xb9, 0x69, 0x57, 0xf2,
+ 0x3d, 0x4d, 0xe2, 0xeb, 0x80, 0xb0, 0xd2, 0xcc, 0x90, 0x8e, 0xc9, 0x18,
+ 0x43, 0x8d, 0xed, 0xe4, 0x94, 0xe6, 0xe9, 0x55, 0x08, 0x6a, 0xf8, 0x79,
+ 0xd9, 0x93, 0x5a, 0x95, 0xa5, 0xb6, 0xa1, 0x0a, 0xe1, 0x7a, 0x62, 0x93,
+ 0xdc, 0x60, 0xd1, 0xca, 0xf2, 0xdc, 0xb4, 0xab, 0xf9, 0x1e, 0xa6, 0xf1,
+ 0x75, 0xc0, 0x58, 0x69, 0x66, 0x48, 0x47, 0x64, 0x8c, 0x21, 0xc6, 0xf6,
+ 0xf2, 0x4a, 0x73, 0x74, 0xaa, 0x84, 0x35, 0x7c, 0x3c, 0xec, 0xc9, 0xad,
+ 0x4a, 0xd2, 0xdb, 0x50, 0x85, 0x70, 0xbd, 0x31, 0x49, 0xee, 0x30, 0x68,
+ 0xe5, 0x79, 0x6e, 0x5a, 0x55, 0xfc, 0x8f, 0x53, 0x78, 0xba, 0xe0, 0x2c,
+ 0x34, 0xb3, 0x24, 0x23, 0xb2, 0x46, 0x10, 0xe3, 0x7b, 0x79, 0x25, 0x39,
+ 0xba, 0x55, 0x42, 0x1a, 0xbe, 0x1e, 0x76, 0x64, 0xd6, 0xa5, 0x69, 0x6d,
+ 0xa8, 0x42, 0xb8, 0x5e, 0x98, 0xa4, 0xf7, 0x18, 0x34, 0x72, 0xbc, 0xb7,
+ 0x2d, 0x2a, 0xfe, 0x47, 0xa9, 0xbc, 0x5d, 0x70, 0x16, 0x1a, 0x59, 0x92,
+ 0x11, 0xd9, 0x23, 0x08, 0x71, 0xbd, 0xbc, 0x92, 0x9c, 0xdd, 0x2a, 0xa1,
+ 0x0d, 0x5f, 0x0f, 0x3b, 0x32, 0x6b, 0x52, 0xb4, 0xb6, 0xd4, 0x21, 0x5c,
+ 0x2f, 0x4c, 0x52, 0x7b, 0x8c, 0x1a, 0x39, 0x5e, 0x5b, 0x96, 0x95, 0x7f,
+ 0x23, 0xd4, 0xde, 0x2e, 0xb8, 0x0b, 0x0d, 0x2c, 0xc9, 0x08, 0xec, 0x91,
+ 0x84, 0x38, 0xde, 0xde, 0x49, 0x4e, 0x6e, 0x95, 0x50, 0x86, 0xaf, 0x87,
+ 0x9d, 0x99, 0x35, 0xa9, 0x5a, 0x5b, 0x6a, 0x10, 0xae, 0x17, 0xa6, 0x29,
+ 0x3d, 0xc6, 0x0d, 0x1c, 0xaf, 0x2d, 0xcb, 0x4a, 0xbf, 0x91, 0xea, 0x6f,
+ 0x17, 0x5c, 0x05, 0x86, 0x96, 0x64, 0x84, 0x76, 0x48, 0xc2, 0x1c, 0x6f,
+ 0x6f, 0x24, 0xa7, 0x37, 0x4a, 0xa8, 0x43, 0x57, 0xc3, 0xce, 0xcc, 0x9a,
+ 0xd4, 0xad, 0x2d, 0xb5, 0x08, 0x57, 0x0b, 0xd3, 0x14, 0x9e, 0xe3, 0x06,
+ 0x8e, 0x57, 0x96, 0xe5, 0xa5, 0x5f, 0xc8, 0xf5, 0x37, 0x8b, 0xae, 0x02,
+ 0xc3, 0x4b, 0x32, 0x42, 0x3b, 0x24, 0x61, 0x0e, 0x37, 0xb7, 0x92, 0x53,
+ 0x9b, 0xa5, 0x54, 0x21, 0xab, 0xe1, 0xe7, 0x66, 0x4d, 0x6a, 0x56, 0x96,
+ 0xda, 0x84, 0x2b, 0x85, 0xe9, 0x8a, 0x4f, 0x71, 0x83, 0x47, 0x2b, 0xcb,
+ 0x72, 0xd2, 0xaf, 0xe4, 0x7a, 0x9b, 0xc5, 0xd7, 0x01, 0x61, 0xa5, 0x99,
+ 0x21, 0x1d, 0x92, 0x30, 0x87, 0x1b, 0xdb, 0xc9, 0x29, 0xcd, 0xd2, 0xaa,
+ 0x10, 0xd5, 0xf0, 0xf3, 0xb3, 0x26, 0xb5, 0x2b, 0x4b, 0x6d, 0x42, 0x15,
+ 0xc2, 0xf4, 0xc5, 0x27, 0xb8, 0xc1, 0xa3, 0x95, 0xe5, 0xb9, 0x69, 0x57,
+ 0xf2, 0x3d, 0x4d, 0xe2, 0xeb, 0x80, 0xb0, 0xd2, 0xcc, 0x90, 0x8e, 0xc9,
+ 0x18, 0x43, 0x8d, 0xed, 0xe4, 0x94, 0xe6, 0xe9, 0x55, 0x08, 0x6a, 0xf8,
+ 0x79, 0xd9, 0x93, 0x5a, 0x95, 0xa5, 0xb6, 0xa1, 0x0a, 0xe1, 0x7a, 0x62,
+ 0x93, 0xdc, 0x60, 0xd1, 0xca, 0xf2, 0xdc, 0xb4, 0xab, 0xf9, 0x1e, 0xa6,
+ 0xf1, 0x75, 0xc0, 0x58, 0x69, 0x66, 0x48, 0x47, 0x64, 0x8c, 0x21, 0xc6,
+ 0xf6, 0xf2, 0x4a, 0x73, 0x74, 0xaa, 0x84, 0x35, 0x7c, 0x3c, 0xec, 0xc9,
+ 0xad, 0x4a, 0xd2, 0xdb, 0x50, 0x85, 0x70, 0xbd, 0x31, 0x49, 0xee, 0x30,
+ 0x68, 0xe5, 0x79, 0x6e, 0x5a, 0x55, 0xfc, 0x8f, 0x53, 0x78, 0xba, 0xe0,
+ 0x2c, 0x34, 0xb3, 0x24, 0x23, 0xb2, 0x46, 0x10, 0xe3, 0x7b, 0x79, 0x25,
+ 0x39, 0xba, 0x55, 0x42, 0x1a, 0xbe, 0x1e, 0x76, 0x64, 0xd6, 0xa5, 0x69,
+ 0x6d, 0xa8, 0x42, 0xb8, 0x5e, 0x98, 0xa4, 0xf7, 0x18, 0x34, 0x72, 0xbc,
+ 0xb7, 0x2d, 0x2a, 0xfe, 0x47, 0xa9, 0xbc, 0x5d, 0x70, 0x16, 0x1a, 0x59,
+ 0x92, 0x11, 0xd9, 0x23, 0x08, 0x71, 0xbd, 0xbc, 0x92, 0x9c, 0xdd, 0x2a,
+ 0xa1, 0x0d, 0x5f, 0x0f, 0x3b, 0x32, 0x6b, 0x52, 0xb4, 0xb6, 0xd4, 0x21,
+ 0x5c, 0x2f, 0x4c, 0x52, 0x7b, 0x8c, 0x1a, 0x39, 0x5e, 0x5b, 0x96, 0x95,
+ 0x7f, 0x23, 0xd4, 0xde, 0x2e, 0xb8, 0x0b, 0x0d, 0x2c, 0xc9, 0x08, 0xec,
+ 0x91, 0x84, 0x38, 0xde, 0xde, 0x49, 0x4e, 0x6e, 0x95, 0x50, 0x86, 0xaf,
+ 0x87, 0x9d, 0x99, 0x35, 0xa9, 0x5a, 0x5b, 0x6a, 0x10, 0xae, 0x17, 0xa6,
+ 0x29, 0x3d, 0xc6, 0x0d, 0x1c, 0xaf, 0x2d, 0xcb, 0x4a, 0xbf, 0x91, 0xea,
+ 0x6f, 0x17, 0x5c, 0x05, 0x86, 0x96, 0x64, 0x84, 0x76, 0x48, 0xc2, 0x1c,
+ 0x6f, 0x6f, 0x24, 0xa7, 0x37, 0x4a, 0xa8, 0x43, 0x57, 0xc3, 0xce, 0xcc,
+ 0x9a, 0xd4, 0xad, 0x2d, 0xb5, 0x08, 0x57, 0x0b, 0xd3, 0x14, 0x9e, 0xe3,
+ 0x06, 0x8e, 0x57, 0x96, 0xe5, 0xa5, 0x5f, 0xc8, 0xf5, 0x37, 0x8b, 0xae,
+ 0x02, 0xc3, 0x4b, 0x32, 0x42, 0x3b, 0x24, 0x61, 0x0e, 0x37, 0xb7, 0x92,
+ 0x53, 0x9b, 0xa5, 0x54, 0x21, 0xab, 0xe1, 0xe7, 0x66, 0x4d, 0x6a, 0x56,
+ 0x96, 0xda, 0x84, 0x2b, 0x85, 0xe9, 0x8a, 0x4f, 0x71, 0x83, 0x47, 0x2b,
+ 0xcb, 0x72, 0xd2, 0xaf, 0xe4, 0x7a, 0x9b, 0xc5, 0xd7, 0x01, 0x61, 0xa5,
+ 0x99, 0x21, 0x1d, 0x92, 0x30, 0x87, 0x1b, 0xdb, 0xc9, 0x29, 0xcd, 0xd2,
+ 0xaa, 0x10, 0xd5, 0xf0, 0xf3, 0xb3, 0x26, 0xb5, 0x2b, 0x4b, 0x6d, 0x42,
+ 0x15, 0xc2, 0xf4, 0xc5, 0x27, 0xb8, 0xc1, 0xa3, 0x95, 0xe5, 0xb9, 0x69,
+ 0x57, 0xf2, 0x3d, 0x4d, 0xe2, 0xeb, 0x80, 0xb0, 0xd2, 0xcc, 0x90, 0x8e,
+ 0xc9, 0x18, 0x43, 0x8d, 0xed, 0xe4, 0x94, 0xe6, 0xe9, 0x55, 0x08, 0x6a,
+ 0xf8, 0x79, 0xd9, 0x93, 0x5a, 0x95, 0xa5, 0xb6, 0xa1, 0x0a, 0xe1, 0x7a,
+ 0x62, 0x93, 0xdc, 0x60, 0xd1, 0xca, 0xf2, 0xdc, 0xb4, 0xab, 0xf9, 0x1e,
+ 0xa6, 0xf1, 0x75, 0xc0, 0x58, 0x69, 0x66, 0x48, 0x47, 0x64, 0x8c, 0x21,
+ 0xc6, 0xf6, 0xf2, 0x4a, 0x73, 0x74, 0xaa, 0x84, 0x35, 0x7c, 0x3c, 0xec,
+ 0xc9, 0xad, 0x4a, 0xd2, 0xdb, 0x50, 0x85, 0x70, 0xbd, 0x31, 0x49, 0xee,
+ 0x30, 0x68, 0xe5, 0x79, 0x6e, 0x5a, 0x55, 0xfc, 0x8f, 0x53, 0x78, 0xba,
+ 0xe0, 0x2c, 0x34, 0xb3, 0x24, 0x23, 0xb2, 0x46, 0x10, 0xe3, 0x7b, 0x79,
+ 0x25, 0x39, 0xba, 0x55, 0x42, 0x1a, 0xbe, 0x1e, 0x76, 0x64, 0xd6, 0xa5,
+ 0x69, 0x6d, 0xa8, 0x42, 0xb8, 0x5e, 0x98, 0xa4, 0xf7, 0x18, 0x34, 0x72,
+ 0xbc, 0xb7, 0x2d, 0x2a, 0xfe, 0x47, 0xa9, 0xbc, 0x5d, 0x70, 0x16, 0x1a,
+ 0x59, 0x92, 0x11, 0xd9, 0x23, 0x08, 0x71, 0xbd, 0xbc, 0x92, 0x9c, 0xdd,
+ 0x2a, 0xa1, 0x0d, 0x5f, 0x0f, 0x3b, 0x32, 0x6b, 0x52, 0xb4, 0xb6, 0xd4,
+ 0x21, 0x5c, 0x2f, 0x4c, 0x52, 0x7b, 0x8c, 0x1a, 0x39, 0x5e, 0x5b, 0x96,
+ 0x95, 0x7f, 0x23, 0xd4, 0xde, 0x2e, 0xb8, 0x0b, 0x0d, 0x2c, 0xc9, 0x08,
+ 0xec, 0x91, 0x84, 0x38, 0xde, 0xde, 0x49, 0x4e, 0x6e, 0x95, 0x50, 0x86,
+ 0xaf, 0x87, 0x9d, 0x99, 0x35, 0xa9, 0x5a, 0x5b, 0x6a, 0x10, 0xae, 0x17,
+ 0xa6, 0x29, 0x3d, 0xc6, 0x0d, 0x1c, 0xaf, 0x2d, 0xcb, 0x4a, 0xbf, 0x91,
+ 0xea, 0x6f, 0x17, 0x5c, 0x05, 0x86, 0x96, 0x64, 0x84, 0x76, 0x48, 0xc2,
+ 0x1c, 0x6f, 0x6f, 0x24, 0xa7, 0x37, 0x4a, 0xa8, 0x43, 0x57, 0xc3, 0xce,
+ 0xcc, 0x9a, 0xd4, 0xad, 0x2d, 0xb5, 0x08, 0x57, 0x0b, 0xd3, 0x14, 0x9e,
+ 0xe3, 0x06, 0x8e, 0x57, 0x96, 0xe5, 0xa5, 0x5f, 0xc8, 0xf5, 0x37, 0x8b,
+ 0xae, 0x02, 0xc3, 0x4b, 0x32, 0x42, 0x3b, 0x24, 0x61, 0x0e, 0x37, 0xb7,
+ 0x92, 0x53, 0x9b, 0xa5, 0x54, 0x21, 0xab, 0xe1, 0xe7, 0x66, 0x4d, 0x6a,
+ 0x56, 0x96, 0xda, 0x84, 0x2b, 0x85, 0xe9, 0x8a, 0x4f, 0x71, 0x83, 0x47,
+ 0x2b, 0xcb, 0x72, 0xd2, 0xaf, 0xe4, 0x7a, 0x9b, 0xc5, 0xd7, 0x01, 0x61,
+ 0xa5, 0x99, 0x21, 0x1d, 0x92, 0x30, 0x87, 0x1b, 0xdb, 0xc9, 0x29, 0xcd,
+ 0xd2, 0xaa, 0x10, 0xd5, 0xf0, 0xf3, 0xb3, 0x26, 0xb5, 0x2b, 0x4b, 0x6d,
+ 0x42, 0x15, 0xc2, 0xf4, 0xc5, 0x27, 0xb8, 0xc1, 0xa3, 0x95, 0xe5, 0xb9,
+ 0x69, 0x57, 0xf2, 0x3d, 0x4d, 0xe2, 0xeb, 0x80, 0xb0, 0xd2, 0xcc, 0x90,
+ 0x8e, 0xc9, 0x18, 0x43, 0x8d, 0xed, 0xe4, 0x94, 0xe6, 0xe9, 0x55, 0x08,
+ 0x6a, 0xf8, 0x79, 0xd9, 0x93, 0x5a, 0x95, 0xa5, 0xb6, 0xa1, 0x0a, 0xe1,
+ 0x7a, 0x62, 0x93, 0xdc, 0x60, 0xd1, 0xca, 0xf2, 0xdc, 0xb4, 0xab, 0xf9,
+ 0x1e, 0xa6, 0xf1, 0x75, 0xc0, 0x58, 0x69, 0x66, 0x48, 0x47, 0x64, 0x8c,
+ 0x21, 0xc6, 0xf6, 0xf2, 0x4a, 0x73, 0x74, 0xaa, 0x84, 0x35, 0x7c, 0x3c,
+ 0xec, 0xc9, 0xad, 0x4a, 0xd2, 0xdb, 0x50, 0x85, 0x70, 0xbd, 0x31, 0x49,
+ 0xee, 0x30, 0x68, 0xe5, 0x79, 0x6e, 0x5a, 0x55, 0xfc, 0x8f, 0x53, 0x78,
+ 0xba, 0xe0, 0x2c, 0x34, 0xb3, 0x24, 0x23, 0xb2, 0x46, 0x10, 0xe3, 0x7b,
+ 0x79, 0x25, 0x39, 0xba, 0x55, 0x42, 0x1a, 0xbe, 0x1e, 0x76, 0x64, 0xd6,
+ 0xa5, 0x69, 0x6d, 0xa8, 0x42, 0xb8, 0x5e, 0x98, 0xa4, 0xf7, 0x18, 0x34,
+ 0x72, 0xbc, 0xb7, 0x2d, 0x2a, 0xfe, 0x47, 0xa9, 0xbc, 0x5d, 0x70, 0x16,
+ 0x1a, 0x59, 0x92, 0x11, 0xd9, 0x23, 0x08, 0x71, 0xbd, 0xbc, 0x92, 0x9c,
+ 0xdd, 0x2a, 0xa1, 0x0d, 0x5f, 0x0f, 0x3b, 0x32, 0x6b, 0x52, 0xb4, 0xb6,
+ 0xd4, 0x21, 0x5c, 0x2f, 0x4c, 0x52, 0x7b, 0x8c, 0x1a, 0x39, 0x5e, 0x5b,
+ 0x96, 0x95, 0x7f, 0x23, 0xd4, 0xde, 0x2e, 0xb8, 0x0b, 0x0d, 0x2c, 0xc9,
+ 0x08, 0xec, 0x91, 0x84, 0x38, 0xde, 0xde, 0x49, 0x4e, 0x6e, 0x95, 0x50,
+ 0x86, 0xaf, 0x87, 0x9d, 0x99, 0x35, 0xa9, 0x5a, 0x5b, 0x6a, 0x10, 0xae,
+ 0x17, 0xa6, 0x29, 0x3d, 0xc6, 0x0d, 0x1c, 0xaf, 0x2d, 0xcb, 0x4a, 0xbf,
+ 0x91, 0xea, 0x6f, 0x17, 0x5c, 0x05, 0x86, 0x96, 0x64, 0x84, 0x76, 0x48,
+ 0xc2, 0x1c, 0x6f, 0x6f, 0x24, 0xa7, 0x37, 0x4a, 0xa8, 0x43, 0x57, 0xc3,
+ 0xce, 0xcc, 0x9a, 0xd4, 0xad, 0x2d, 0xb5, 0x08, 0x57, 0x0b, 0xd3, 0x14,
+ 0x9e, 0xe3, 0x06, 0x8e, 0x57, 0x96, 0xe5, 0xa5, 0x5f, 0xc8, 0xf5, 0x37,
+ 0x8b, 0xae, 0x02, 0xc3, 0x4b, 0x32, 0x42, 0x3b, 0x24, 0x61, 0x0e, 0x37,
+ 0xb7, 0x92, 0x53, 0x9b, 0xa5, 0x54, 0x21, 0xab, 0xe1, 0xe7, 0x66, 0x4d,
+ 0x6a, 0x56, 0x96, 0xda, 0x84, 0x2b, 0x85, 0xe9, 0x8a, 0x4f, 0x71, 0x83,
+ 0x47, 0x2b, 0xcb, 0x72, 0xd2, 0xaf, 0xe4, 0x7a, 0x9b, 0xc5, 0xd7, 0x01,
+ 0x61, 0xa5, 0x99, 0x21, 0x1d, 0x92, 0x30, 0x87, 0x1b, 0xdb, 0xc9, 0x29,
+ 0xcd, 0xd2, 0xaa, 0x10, 0xd5, 0xf0, 0xf3, 0xb3, 0x26, 0xb5, 0x2b, 0x4b,
+ 0x6d, 0x42, 0x15, 0xc2, 0xf4, 0xc5, 0x27, 0xb8, 0xc1, 0xa3, 0x95, 0xe5,
+ 0xb9, 0x69, 0x57, 0xf2, 0x3d, 0x4d, 0xe2, 0xeb, 0x80, 0xb0, 0xd2, 0xcc,
+ 0x90, 0x8e, 0xc9, 0x18, 0x43, 0x8d, 0xed, 0xe4, 0x94, 0xe6, 0xe9, 0x55,
+ 0x08, 0x6a, 0xf8, 0x79, 0xd9, 0x93, 0x5a, 0x95, 0xa5, 0xb6, 0xa1, 0x0a,
+ 0xe1, 0x7a, 0x62, 0x93, 0xdc, 0x60, 0xd1, 0xca, 0xf2, 0xdc, 0xb4, 0xab,
+ 0xf9, 0x1e, 0xa6, 0xf1, 0x75, 0xc0, 0x58, 0x69, 0x66, 0x48, 0x47, 0x64,
+ 0x8c, 0x21, 0xc6, 0xf6, 0xf2, 0x4a, 0x73, 0x74, 0xaa, 0x84, 0x35, 0x7c,
+ 0x3c, 0xec, 0xc9, 0xad, 0x4a, 0xd2, 0xdb, 0x50, 0x85, 0x70, 0xbd, 0x31,
+ 0x49, 0xee, 0x30, 0x68, 0xe5, 0x79, 0x6e, 0x5a, 0x55, 0xfc, 0x8f, 0x53,
+ 0x78, 0xba, 0xe0, 0x2c, 0x34, 0xb3, 0x24, 0x23, 0xb2, 0x46, 0x10, 0xe3,
+ 0x7b, 0x79, 0x25, 0x39, 0xba, 0x55, 0x42, 0x1a, 0xbe, 0x1e, 0x76, 0x64,
+ 0xd6, 0xa5, 0x69, 0x6d, 0xa8, 0x42, 0xb8, 0x5e, 0x98, 0xa4, 0xf7, 0x18,
+ 0x34, 0x72, 0xbc, 0xb7, 0x2d, 0x2a, 0xfe, 0x47, 0xa9, 0xbc, 0x5d, 0x70,
+ 0x16, 0x1a, 0x59, 0x92, 0x11, 0xd9, 0x23, 0x08, 0x71, 0xbd, 0xbc, 0x92,
+ 0x9c, 0xdd, 0x2a, 0xa1, 0x0d, 0x5f, 0x0f, 0x3b, 0x32, 0x6b, 0x52, 0xb4,
+ 0xb6, 0xd4, 0x21, 0x5c, 0x2f, 0x4c, 0x52, 0x7b, 0x8c, 0x1a, 0x39, 0x5e,
+ 0x5b, 0x96, 0x95, 0x7f, 0x23, 0xd4, 0xde, 0x2e, 0xb8, 0x0b, 0x0d, 0x2c,
+ 0xc9, 0x08, 0xec, 0x91, 0x84, 0x38, 0xde, 0xde, 0x49, 0x4e, 0x6e, 0x95,
+ 0x50, 0x86, 0xaf, 0x87, 0x9d, 0x99, 0x35, 0xa9, 0x5a, 0x5b, 0x6a, 0x10,
+ 0xae, 0x17, 0xa6, 0x29, 0x3d, 0xc6, 0x0d, 0x1c, 0xaf, 0x2d, 0xcb, 0x4a,
+ 0xbf, 0x91, 0xea, 0x6f, 0x17, 0x5c, 0x05, 0x86, 0x96, 0x64, 0x84, 0x76,
+ 0x48, 0xc2, 0x1c, 0x6f, 0x6f, 0x24, 0xa7, 0x37, 0x4a, 0xa8, 0x43, 0x57,
+ 0xc3, 0xce, 0xcc, 0x9a, 0xd4, 0xad, 0x2d, 0xb5, 0x08, 0x57, 0x0b, 0xd3,
+ 0x14, 0x9e, 0xe3, 0x06, 0x8e, 0x57, 0x96, 0xe5, 0xa5, 0x5f, 0xc8, 0xf5,
+ 0x37, 0x8b, 0xae, 0x02, 0xc3, 0x4b, 0x32, 0x42, 0x3b, 0x24, 0x61, 0x0e,
+ 0x37, 0xb7, 0x92, 0x53, 0x9b, 0xa5, 0x54, 0x21, 0xab, 0xe1, 0xe7, 0x66,
+ 0x4d, 0x6a, 0x56, 0x96, 0xda, 0x84, 0x2b, 0x85, 0xe9, 0x8a, 0x4f, 0x71,
+ 0x83, 0x47, 0x2b, 0xcb, 0x72, 0xd2, 0xaf, 0xe4, 0x7a, 0x9b, 0xc5, 0xd7,
+ 0x01, 0x61, 0xa5, 0x99, 0x21, 0x1d, 0x92, 0x30, 0x87, 0x1b, 0xdb, 0xc9,
+ 0x29, 0xcd, 0xd2, 0xaa, 0x10, 0xd5, 0xf0, 0xf3, 0xb3, 0x26, 0xb5, 0x2b,
+ 0x4b, 0x6d, 0x42, 0x15, 0xc2, 0xf4, 0xc5, 0x27, 0xb8, 0xc1, 0xa3, 0x95,
+ 0xe5, 0xb9, 0x69, 0x57, 0xf2, 0x3d, 0x4d, 0xe2, 0xeb, 0x80, 0xb0, 0xd2,
+ 0xcc, 0x90, 0x8e, 0xc9, 0x18, 0x43, 0x8d, 0xed, 0xe4, 0x94, 0xe6, 0xe9,
+ 0x55, 0x08, 0x6a, 0xf8, 0x79, 0xd9, 0x93, 0x5a, 0x95, 0xa5, 0xb6, 0xa1,
+ 0x0a, 0xe1, 0x7a, 0x62, 0x93, 0xdc, 0x60, 0xd1, 0xca, 0xf2, 0xdc, 0xb4,
+ 0xab, 0xf9, 0x1e, 0xa6, 0xf1, 0x75, 0xc0, 0x58, 0x69, 0x66, 0x48, 0x47,
+ 0x64, 0x8c, 0x21, 0xc6, 0xf6, 0xf2, 0x4a, 0x73, 0x74, 0xaa, 0x84, 0x35,
+ 0x7c, 0x3c, 0xec, 0xc9, 0xad, 0x4a, 0xd2, 0xdb, 0x50, 0x85, 0x70, 0xbd,
+ 0x31, 0x49, 0xee, 0x30, 0x68, 0xe5, 0x79, 0x6e, 0x5a, 0x55, 0xfc, 0x8f,
+ 0x53, 0x78, 0xba, 0xe0, 0x2c, 0x34, 0xb3, 0x24, 0x23, 0xb2, 0x46, 0x10,
+ 0xe3, 0x7b, 0x79, 0x25, 0x39, 0xba, 0x55, 0x42, 0x1a, 0xbe, 0x1e, 0x76,
+ 0x64, 0xd6, 0xa5, 0x69, 0x6d, 0xa8, 0x42, 0xb8, 0x5e, 0x98, 0xa4, 0xf7,
+ 0x18, 0x34, 0x72, 0xbc, 0xb7, 0x2d, 0x2a, 0xfe, 0x47, 0xa9, 0xbc, 0x5d,
+ 0x70, 0x16, 0x1a, 0x59, 0x92, 0x11, 0xd9, 0x23, 0x08, 0x71, 0xbd, 0xbc,
+ 0x92, 0x9c, 0xdd, 0x2a, 0xa1, 0x0d, 0x5f, 0x0f, 0x3b, 0x32, 0x6b, 0x52,
+ 0xb4, 0xb6, 0xd4, 0x21, 0x5c, 0x2f, 0x4c, 0x52, 0x7b, 0x8c, 0x1a, 0x39,
+ 0x5e, 0x5b, 0x96, 0x95, 0x7f, 0x23, 0xd4, 0xde, 0x2e, 0xb8, 0x0b, 0x0d,
+ 0x2c, 0xc9, 0x08, 0xec, 0x91, 0x84, 0x38, 0xde, 0xde, 0x49, 0x4e, 0x6e,
+ 0x95, 0x50, 0x86, 0xaf, 0x87, 0x9d, 0x99, 0x35, 0xa9, 0x5a, 0x5b, 0x6a,
+ 0x10, 0xae, 0x17, 0xa6, 0x29, 0x3d, 0xc6, 0x0d, 0x1c, 0xaf, 0x2d, 0xcb,
+ 0x4a, 0xbf, 0x91, 0xea, 0x6f, 0x17, 0x5c, 0x05, 0x86, 0x96, 0x64, 0x84,
+ 0x76, 0x48, 0xc2, 0x1c, 0x6f, 0x6f, 0x24, 0xa7, 0x37, 0x4a, 0xa8, 0x43,
+ 0x57, 0xc3, 0xce, 0xcc, 0x9a, 0xd4, 0xad, 0x2d, 0xb5, 0x08, 0x57, 0x0b,
+ 0xd3, 0x14, 0x9e, 0xe3, 0x06, 0x8e, 0x57, 0x96, 0xe5, 0xa5, 0x5f, 0xc8,
+ 0xf5, 0x37, 0x8b, 0xae, 0x02, 0xc3, 0x4b, 0x32, 0x42, 0x3b, 0x24, 0x61,
+ 0x0e, 0x37, 0xb7, 0x92, 0x53, 0x9b, 0xa5, 0x54, 0x21, 0xab, 0xe1, 0xe7,
+ 0x66, 0x4d, 0x6a, 0x56, 0x96, 0xda, 0x84, 0x2b, 0x85, 0xe9, 0x8a, 0x4f,
+ 0x71, 0x83, 0x47, 0x2b, 0xcb, 0x72, 0xd2, 0xaf, 0xe4, 0x7a, 0x9b, 0xc5,
+ 0xd7, 0x01, 0x61, 0xa5, 0x99, 0x21, 0x1d, 0x92, 0x30, 0x87, 0x1b, 0xdb,
+ 0xc9, 0x29, 0xcd, 0xd2, 0xaa, 0x10, 0xd5, 0xf0, 0xf3, 0xb3, 0x26, 0xb5,
+ 0x2b, 0x4b, 0x6d, 0x42, 0x15, 0xc2, 0xf4, 0xc5, 0x27, 0xb8, 0xc1, 0xa3,
+ 0x95, 0xe5, 0xb9, 0x69, 0x57, 0xf2, 0x3d, 0x4d, 0xe2, 0xeb, 0x80, 0xb0,
+ 0xd2, 0xcc, 0x90, 0x8e, 0xc9, 0x18, 0x43, 0x8d, 0xed, 0xe4, 0x94, 0xe6,
+ 0xe9, 0x55, 0x08, 0x6a, 0xf8, 0x79, 0xd9, 0x93, 0x5a, 0x95, 0xa5, 0xb6,
+ 0xa1, 0x0a, 0xe1, 0x7a, 0x62, 0x93, 0xdc, 0x60, 0xd1, 0xca, 0xf2, 0xdc,
+ 0xb4, 0xab, 0xf9, 0x1e, 0xa6, 0xf1, 0x75, 0xc0, 0x58, 0x69, 0x66, 0x48,
+ 0x47, 0x64, 0x8c, 0x21, 0xc6, 0xf6, 0xf2, 0x4a, 0x73, 0x74, 0xaa, 0x84,
+ 0x35, 0x7c, 0x3c, 0xec, 0xc9, 0xad, 0x4a, 0xd2, 0xdb, 0x50, 0x85, 0x70,
+ 0xbd, 0x31, 0x49, 0xee, 0x30, 0x68, 0xe5, 0x79, 0x6e, 0x5a, 0x55, 0xfc,
+ 0x8f, 0x53, 0x78, 0xba, 0xe0, 0x2c, 0x34, 0xb3, 0x24, 0x23, 0xb2, 0x46,
+ 0x10, 0xe3, 0x7b, 0x79, 0x25, 0x39, 0xba, 0x55, 0x42, 0x1a, 0xbe, 0x1e,
+ 0x76, 0x64, 0xd6, 0xa5, 0x69, 0x6d, 0xa8, 0x42, 0xb8, 0x5e, 0x98, 0xa4,
+ 0xf7, 0x18, 0x34, 0x72, 0xbc, 0xb7, 0x2d, 0x2a, 0xfe, 0x47, 0xa9, 0xbc,
+ 0x5d, 0x70, 0x16, 0x1a, 0x59, 0x92, 0x11, 0xd9, 0x23, 0x08, 0x71, 0xbd,
+ 0xbc, 0x92, 0x9c, 0xdd, 0x2a, 0xa1, 0x0d, 0x5f, 0x0f, 0x3b, 0x32, 0x6b,
+ 0x52, 0xb4, 0xb6, 0xd4, 0x21, 0x5c, 0x2f, 0x4c, 0x52, 0x7b, 0x8c, 0x1a,
+ 0x39, 0x5e, 0x5b, 0x96, 0x95, 0x7f, 0x23, 0xd4, 0xde, 0x2e, 0xb8, 0x0b,
+ 0x0d, 0x2c, 0xc9, 0x08, 0xec, 0x91, 0x84, 0x38, 0xde, 0xde, 0x49, 0x4e,
+ 0x6e, 0x95, 0x50, 0x86, 0xaf, 0x87, 0x9d, 0x99, 0x35, 0xa9, 0x5a, 0x5b,
+ 0x6a, 0x10, 0xae, 0x17, 0xa6, 0x29, 0x3d, 0xc6, 0x0d, 0x1c, 0xaf, 0x2d,
+ 0xcb, 0x4a, 0xbf, 0x91, 0xea, 0x6f, 0x17, 0x5c, 0x05, 0x86, 0x96, 0x64,
+ 0x84, 0x76, 0x48, 0xc2, 0x1c, 0x6f, 0x6f, 0x24, 0xa7, 0x37, 0x4a, 0xa8,
+ 0x43, 0x57, 0xc3, 0xce, 0xcc, 0x9a, 0xd4, 0xad, 0x2d, 0xb5, 0x08, 0x57,
+ 0x0b, 0xd3, 0x14, 0x9e, 0xe3, 0x06, 0x8e, 0x57, 0x96, 0xe5, 0xa5, 0x5f,
+ 0xc8, 0xf5, 0x37, 0x8b, 0xae, 0x02, 0xc3, 0x4b, 0x32, 0x42, 0x3b, 0x24,
+ 0x61, 0x0e, 0x37, 0xb7, 0x92, 0x53, 0x9b, 0xa5, 0x54, 0x21, 0xab, 0xe1,
+ 0xe7, 0x66, 0x4d, 0x6a, 0x56, 0x96, 0xda, 0x84, 0x2b, 0x85, 0xe9, 0x8a,
+ 0x4f, 0x71, 0x83, 0x47, 0x2b, 0xcb, 0x72, 0xd2, 0xaf, 0xe4, 0x7a, 0x9b,
+ 0xc5, 0xd7, 0x01, 0x61, 0xa5, 0x99, 0x21, 0x1d, 0x92, 0x30, 0x87, 0x1b,
+ 0xdb, 0xc9, 0x29, 0xcd, 0xd2, 0xaa, 0x10, 0xd5, 0xf0, 0xf3, 0xb3, 0x26,
+ 0xb5, 0x2b, 0x4b, 0x6d, 0x42, 0x15, 0xc2, 0xf4, 0xc5, 0x27, 0xb8, 0xc1,
+ 0xa3, 0x95, 0xe5, 0xb9, 0x69, 0x57, 0xf2, 0x3d, 0x4d, 0xe2, 0xeb, 0x80,
+ 0xb0, 0xd2, 0xcc, 0x90, 0x8e, 0xc9, 0x18, 0x43, 0x8d, 0xed, 0xe4, 0x94,
+ 0xe6, 0xe9, 0x55, 0x08, 0x6a, 0xf8, 0x79, 0xd9, 0x93, 0x5a, 0x95, 0xa5,
+ 0xb6, 0xa1, 0x0a, 0xe1, 0x7a, 0x62, 0x93, 0xdc, 0x60, 0xd1, 0xca, 0xf2,
+ 0xdc, 0xb4, 0xab, 0xf9, 0x1e, 0xa6, 0xf1, 0x75, 0xc0, 0x58, 0x69, 0x66,
+ 0x48, 0x47, 0x64, 0x8c, 0x21, 0xc6, 0xf6, 0xf2, 0x4a, 0x73, 0x75, 0x80,
+};
+static_assert(sizeof(kBytesTestReadSymbol5) == kNumBytesTestReadSymbol5, "");
+
+// The kBytesTestReadSymbol6[] array was encoded by using the following libaom
+// code:
+//
+// aom_cdf_prob cdf[4][7] = {
+// // pmf: 1/6, 1/6, 1/6, 1/6, 1/6, 1/6
+// { 32768 - 5461, 32768 - 10923, 32768 - 16384, 32768 - 21845, 32768 - 27307,
+// 0, 0 },
+// // pmf: 3/12, 2/12, 2/12, 2/12, 2/12, 1/12
+// { 32768 - 8192, 32768 - 13653, 32768 - 19115, 32768 - 24576, 32768 - 30037,
+// 0, 0 },
+// // pmf: 1/12, 2/12, 2/12, 2/12, 2/12, 3/12
+// { 32768 - 2731, 32768 - 8192, 32768 - 13653, 32768 - 19115, 32768 - 24576,
+// 0, 0 },
+// // pmf: 1/12, 2/12, 3/12, 3/12, 2/12, 1/12
+// { 32768 - 2731, 32768 - 8192, 32768 - 16384, 32768 - 24576, 32768 - 30037,
+// 0, 0 },
+// };
+// constexpr int kSymbols[12][4] = { { 0, 0, 5, 5 }, //
+// { 0, 1, 4, 4 }, //
+// { 1, 2, 3, 3 }, //
+// { 1, 3, 2, 2 }, //
+// { 2, 4, 1, 1 }, //
+// { 2, 5, 0, 0 }, //
+// { 3, 0, 5, 4 }, //
+// { 3, 1, 4, 3 }, //
+// { 4, 2, 3, 2 }, //
+// { 4, 3, 2, 1 }, //
+// { 5, 4, 1, 3 }, //
+// { 5, 0, 5, 2 } };
+// const unsigned int kBufferSize = 65536;
+// uint8_t bw_buffer[kBufferSize];
+// aom_writer bw;
+// bw.allow_update_cdf = 1;
+// aom_start_encode(&bw, bw_buffer);
+// for (int i = 0; i < 256; ++i) {
+// for (int j = 0; j < 12; ++j) {
+// for (int k = 0; k < 4; ++k) {
+// aom_write_symbol(&bw, kSymbols[j][k], cdf[k], 6);
+// }
+// }
+// }
+// aom_stop_encode(&bw);
+// printf("constexpr size_t kNumBytes = %u;\n", bw.pos);
+// printf("constexpr uint8_t kBytes[] = {");
+// int count = 0;
+// for (unsigned int i = 0; i < bw.pos; ++i) {
+// if (count++ % 12 == 0) {
+// printf("\n ");
+// } else {
+// printf(" ");
+// }
+// printf("0x%02x,", bw_buffer[i]);
+// }
+// printf("\n};\n");
+
+constexpr size_t kNumBytesTestReadSymbol6 = 3917;
+constexpr uint8_t kBytesTestReadSymbol6[] = {
+ 0x0a, 0x8e, 0xb8, 0x15, 0xd5, 0x69, 0x63, 0x06, 0x48, 0x75, 0xf4, 0x4c,
+ 0xfa, 0x13, 0xba, 0x68, 0x61, 0xa6, 0x9f, 0x39, 0x63, 0xba, 0x63, 0x26,
+ 0xa8, 0xaa, 0xd0, 0x10, 0x4a, 0x05, 0xaf, 0x5f, 0x65, 0x57, 0x2f, 0x68,
+ 0x48, 0x2c, 0x64, 0xdf, 0x0a, 0x93, 0xcc, 0x84, 0x43, 0x97, 0x34, 0x79,
+ 0x10, 0x05, 0x4d, 0x58, 0xe9, 0xc3, 0xb4, 0x4a, 0x70, 0xd4, 0x81, 0x71,
+ 0x9f, 0x6b, 0x18, 0xb3, 0x72, 0xdf, 0x37, 0x87, 0x3e, 0x40, 0xd0, 0xff,
+ 0x10, 0x32, 0x22, 0xe4, 0x36, 0xef, 0xa2, 0x5e, 0x39, 0x5d, 0x42, 0x59,
+ 0x8c, 0x3f, 0x1b, 0x41, 0xdb, 0xc2, 0x8c, 0x64, 0xaf, 0xd2, 0x49, 0x45,
+ 0xd8, 0xad, 0x85, 0x3b, 0x70, 0x13, 0x83, 0x63, 0x49, 0x86, 0x35, 0xfe,
+ 0x93, 0x6b, 0x51, 0x0e, 0x32, 0x3d, 0xf0, 0x30, 0xe0, 0xf5, 0x42, 0x59,
+ 0x33, 0x8e, 0x63, 0x62, 0x46, 0x00, 0x69, 0x06, 0x52, 0x83, 0x37, 0x0b,
+ 0x37, 0x12, 0x38, 0x3b, 0x9c, 0xc3, 0x00, 0xed, 0x0a, 0xd4, 0xed, 0x69,
+ 0x01, 0xc5, 0x3a, 0x14, 0x29, 0xaf, 0x3e, 0x9c, 0x0a, 0xaf, 0x56, 0x50,
+ 0x56, 0xcd, 0xa1, 0xb0, 0x88, 0xef, 0xa7, 0x57, 0xe6, 0xe8, 0x2c, 0x42,
+ 0x60, 0x55, 0x22, 0x1f, 0xcc, 0x50, 0xa9, 0xda, 0xc2, 0x73, 0x19, 0x2e,
+ 0xfb, 0x74, 0x88, 0x42, 0x0d, 0x49, 0x12, 0x5e, 0x36, 0x43, 0xe7, 0x33,
+ 0x00, 0x7d, 0xd5, 0x35, 0xa3, 0xaf, 0x1e, 0x93, 0x5e, 0xe6, 0xae, 0x23,
+ 0x41, 0x55, 0x05, 0x19, 0xde, 0xa7, 0xf1, 0x07, 0xbd, 0x58, 0xc1, 0x10,
+ 0x0a, 0x4b, 0x5c, 0xee, 0xe3, 0xfb, 0xe5, 0xf5, 0xfc, 0x1a, 0x4e, 0x51,
+ 0xda, 0x3e, 0xc5, 0x36, 0xda, 0x3e, 0x83, 0xfd, 0x6b, 0x6f, 0x54, 0xdb,
+ 0x68, 0x5a, 0x9c, 0x46, 0xbf, 0x86, 0x23, 0xf1, 0xbd, 0xe1, 0x79, 0x5e,
+ 0xf7, 0x1c, 0xe0, 0xf7, 0xa6, 0xd5, 0x9f, 0x0b, 0x74, 0xd8, 0xf2, 0x0a,
+ 0x97, 0x71, 0xa2, 0xd2, 0x37, 0x05, 0x7e, 0x3e, 0xa4, 0xec, 0x16, 0x92,
+ 0x37, 0xdd, 0x45, 0x0c, 0x17, 0x42, 0xf0, 0x34, 0xf7, 0x38, 0x04, 0xdf,
+ 0xb8, 0xb4, 0xd6, 0xa0, 0x2c, 0x56, 0x96, 0x10, 0x30, 0x34, 0x10, 0x39,
+ 0x9e, 0x95, 0x3b, 0x13, 0xf3, 0x60, 0xa1, 0x48, 0xca, 0x9f, 0x91, 0xfe,
+ 0x42, 0xfb, 0xdf, 0x37, 0xf8, 0x5d, 0x49, 0x82, 0x42, 0x4f, 0x90, 0xdf,
+ 0xae, 0x32, 0x20, 0x9e, 0xb6, 0xcc, 0xa0, 0x30, 0x07, 0x15, 0x64, 0xb8,
+ 0x56, 0x84, 0x1e, 0x16, 0xa3, 0x35, 0xad, 0x14, 0x9d, 0x62, 0x65, 0x0c,
+ 0x77, 0x82, 0x74, 0x41, 0x9c, 0x68, 0x95, 0x03, 0x4f, 0xfc, 0x1c, 0xc7,
+ 0xd6, 0xe6, 0xe7, 0xb3, 0x54, 0x66, 0x87, 0xb6, 0x41, 0x03, 0xe2, 0x20,
+ 0xf7, 0xdb, 0x2a, 0x0a, 0x25, 0x20, 0x60, 0xdf, 0xfd, 0x9f, 0x5f, 0x2c,
+ 0x72, 0x5f, 0x2b, 0xf4, 0x07, 0x9f, 0xf3, 0x8a, 0xde, 0xf0, 0x4f, 0x8a,
+ 0xa7, 0x75, 0xe3, 0xe8, 0xc9, 0xa1, 0xa0, 0x01, 0xa1, 0x20, 0xc8, 0xfb,
+ 0xf9, 0x91, 0xd2, 0x23, 0x4f, 0x6c, 0x53, 0x3b, 0x12, 0x01, 0xac, 0x1f,
+ 0x89, 0x84, 0x98, 0xcd, 0x3c, 0x74, 0x51, 0x92, 0xbe, 0x87, 0x06, 0x62,
+ 0x49, 0xd2, 0x1b, 0x27, 0xfa, 0x28, 0xf8, 0xbd, 0xbb, 0x7a, 0x7d, 0xde,
+ 0xa2, 0x9c, 0x1b, 0x7c, 0x80, 0xe8, 0xe0, 0x43, 0x64, 0xdd, 0x22, 0x7e,
+ 0x2c, 0xe4, 0x79, 0x2e, 0xbd, 0x98, 0x1a, 0x59, 0x7e, 0xbe, 0xfd, 0x9e,
+ 0x0c, 0x31, 0x50, 0x10, 0xdd, 0x62, 0x3c, 0x47, 0x9a, 0x11, 0x1b, 0x48,
+ 0xf3, 0xd1, 0x2c, 0x1b, 0xc2, 0xb5, 0x57, 0x7c, 0xe5, 0x97, 0x6d, 0x78,
+ 0xe7, 0xa2, 0xd6, 0x57, 0x61, 0x95, 0xed, 0x8d, 0xda, 0xc6, 0xdf, 0x2c,
+ 0x1d, 0x48, 0xee, 0x53, 0xd8, 0x1e, 0x80, 0x41, 0xce, 0x58, 0x08, 0x96,
+ 0x6f, 0x82, 0x6e, 0x28, 0x6a, 0x5a, 0x2b, 0x4f, 0x02, 0x4d, 0x99, 0x32,
+ 0xea, 0x60, 0xce, 0x75, 0x57, 0x0c, 0x63, 0xf0, 0xda, 0x51, 0x1d, 0xcc,
+ 0xb8, 0x21, 0x35, 0x10, 0x56, 0xaf, 0x80, 0xb3, 0x0f, 0x17, 0x29, 0x0c,
+ 0x16, 0x07, 0x66, 0xe9, 0xcb, 0x52, 0xcd, 0xec, 0xb1, 0x79, 0xf8, 0xb9,
+ 0x05, 0x08, 0xa1, 0xd7, 0x03, 0x6f, 0x8e, 0x9a, 0x6e, 0xfb, 0x38, 0x3a,
+ 0xff, 0xa7, 0xa1, 0xd8, 0xb1, 0x56, 0x06, 0xde, 0xb1, 0xe7, 0x47, 0xc2,
+ 0xc2, 0xab, 0xa9, 0x5f, 0x01, 0x65, 0x5d, 0x4c, 0xac, 0xd8, 0x1c, 0xfd,
+ 0x2d, 0x55, 0x74, 0x8a, 0x2b, 0x41, 0x2d, 0x50, 0x0c, 0x9c, 0x64, 0xb2,
+ 0xed, 0xaf, 0x2a, 0xb4, 0x58, 0x93, 0xd8, 0xc2, 0xab, 0x04, 0x45, 0xfc,
+ 0xd7, 0x02, 0x1e, 0x14, 0xd4, 0x38, 0xba, 0x24, 0x07, 0x9a, 0x25, 0x52,
+ 0x13, 0xe1, 0xe4, 0x26, 0x66, 0x12, 0xba, 0x13, 0x11, 0x25, 0xea, 0x29,
+ 0xc5, 0xff, 0x34, 0xca, 0x18, 0x34, 0x97, 0x4a, 0x92, 0x00, 0xe8, 0x61,
+ 0x18, 0x85, 0x0b, 0x56, 0x83, 0x48, 0xf9, 0xdb, 0x26, 0x7b, 0x54, 0xc8,
+ 0xd2, 0x63, 0x1e, 0x7b, 0x25, 0x3c, 0x4a, 0xa6, 0xda, 0x10, 0x92, 0xca,
+ 0x8a, 0x2c, 0x89, 0x60, 0x8e, 0xda, 0xf2, 0xab, 0x45, 0x89, 0x3d, 0x8c,
+ 0x2d, 0x35, 0xda, 0xc1, 0x7c, 0x3d, 0x05, 0x8e, 0xad, 0x5b, 0xff, 0x7d,
+ 0x46, 0x7b, 0x74, 0x71, 0xec, 0x05, 0x9a, 0x85, 0xa4, 0x4f, 0xc3, 0x54,
+ 0x64, 0x90, 0xe5, 0x97, 0x89, 0x1a, 0xb0, 0x56, 0x30, 0x13, 0xda, 0x44,
+ 0x2c, 0xb0, 0x50, 0x0c, 0x64, 0x43, 0x4a, 0xd2, 0x2a, 0xb4, 0x8f, 0x9d,
+ 0xa6, 0xe5, 0x3c, 0x0c, 0x7a, 0x44, 0xb3, 0xeb, 0xa7, 0x92, 0xe5, 0x59,
+ 0xa6, 0x43, 0xe9, 0x2b, 0x1f, 0x69, 0x4a, 0xc4, 0x89, 0xe7, 0xe0, 0x04,
+ 0x9f, 0x1d, 0x33, 0x61, 0xe8, 0xab, 0x75, 0x8d, 0x30, 0xd6, 0x7c, 0xca,
+ 0x02, 0xbe, 0xf9, 0x1d, 0x02, 0x4e, 0x0f, 0x88, 0xc9, 0x3f, 0x54, 0x9d,
+ 0x93, 0x0d, 0x44, 0xf8, 0xf6, 0xa7, 0x1a, 0xb6, 0x8b, 0xf5, 0x14, 0xca,
+ 0xbd, 0x6c, 0x2d, 0x9e, 0xfa, 0x80, 0x36, 0x53, 0x06, 0xac, 0x39, 0x0f,
+ 0x6b, 0xdb, 0x2e, 0xe0, 0x4f, 0xf0, 0xa4, 0x44, 0x5a, 0xbb, 0xaa, 0x72,
+ 0x59, 0x3f, 0x58, 0x38, 0xe5, 0x5c, 0x76, 0x31, 0xe6, 0xfe, 0x08, 0x20,
+ 0xbe, 0x3f, 0xea, 0x00, 0x0d, 0x34, 0xd9, 0x4d, 0x06, 0x0a, 0xb5, 0x04,
+ 0x7b, 0x48, 0x22, 0xa9, 0x94, 0x47, 0x44, 0xfd, 0x65, 0x81, 0x45, 0x56,
+ 0x91, 0xf3, 0xb4, 0xdc, 0xa7, 0x6e, 0xb1, 0xa4, 0xc5, 0xd6, 0x81, 0x6a,
+ 0x78, 0x94, 0x8a, 0xa4, 0x21, 0x25, 0x63, 0xed, 0x25, 0x51, 0x86, 0x5d,
+ 0xa7, 0xa7, 0xf2, 0x17, 0x92, 0x06, 0x46, 0x5b, 0xaa, 0xc8, 0x74, 0x12,
+ 0x7f, 0x99, 0x40, 0x57, 0xdf, 0x23, 0xa0, 0x49, 0xc1, 0xf1, 0x19, 0x27,
+ 0xea, 0x93, 0xb2, 0x61, 0xa8, 0x9f, 0x1e, 0xd4, 0xe3, 0x56, 0xd1, 0x7e,
+ 0xa2, 0x99, 0x57, 0xad, 0x85, 0xb3, 0xdf, 0x50, 0x06, 0xca, 0x60, 0xd5,
+ 0x87, 0x21, 0xed, 0x7b, 0x65, 0xdc, 0x09, 0xfe, 0x14, 0x88, 0x8b, 0x57,
+ 0x75, 0x4e, 0x4b, 0x27, 0xeb, 0x07, 0x1c, 0xab, 0x8e, 0xc6, 0x3c, 0xdf,
+ 0xc1, 0x04, 0x17, 0xc7, 0xfd, 0x40, 0x01, 0xa6, 0x9b, 0x29, 0xa0, 0xc1,
+ 0x56, 0xa0, 0x8f, 0x69, 0x04, 0x55, 0x32, 0x88, 0xe8, 0x9f, 0x8d, 0x2b,
+ 0x48, 0xaa, 0xd2, 0x3e, 0x76, 0x9b, 0x94, 0xed, 0xd6, 0x34, 0x98, 0xba,
+ 0x16, 0x3c, 0x29, 0xce, 0x3d, 0x14, 0x84, 0x24, 0xac, 0x7d, 0xa4, 0xaa,
+ 0x30, 0xcb, 0xb4, 0xdd, 0xe3, 0x7a, 0x0e, 0x78, 0xc8, 0xcb, 0x75, 0x59,
+ 0x0e, 0x82, 0x4f, 0xf3, 0x28, 0x0a, 0xf6, 0x18, 0x41, 0xa0, 0x7c, 0xe5,
+ 0xff, 0xf2, 0xf9, 0x07, 0xe7, 0x99, 0x4c, 0xa6, 0x10, 0xa7, 0x08, 0x46,
+ 0x84, 0xa5, 0x22, 0xa9, 0x08, 0x49, 0x58, 0xfb, 0x49, 0x54, 0x61, 0x97,
+ 0x69, 0xbb, 0xc6, 0xf4, 0x1c, 0xf1, 0x91, 0x96, 0xea, 0xb2, 0x1d, 0x04,
+ 0x9f, 0xe6, 0x50, 0x15, 0xec, 0x30, 0x83, 0x40, 0xf9, 0xcb, 0xff, 0xe5,
+ 0xf2, 0x0f, 0xcf, 0x32, 0x99, 0x4c, 0x21, 0x4e, 0x10, 0x8d, 0x09, 0x4a,
+ 0x45, 0x52, 0x10, 0x92, 0xb1, 0xf6, 0x92, 0xa8, 0xc3, 0x2e, 0xd3, 0x77,
+ 0x8d, 0xe8, 0x39, 0xe3, 0x23, 0x2d, 0xd5, 0x64, 0x3a, 0x09, 0x3f, 0xcc,
+ 0xa0, 0x2b, 0xd8, 0x61, 0x06, 0x81, 0xf3, 0x97, 0xff, 0xcb, 0xe4, 0x1f,
+ 0x9e, 0x65, 0x32, 0x98, 0x42, 0x9c, 0x21, 0x1a, 0x12, 0x94, 0x8a, 0xa4,
+ 0x21, 0x25, 0x63, 0xed, 0x25, 0x51, 0x86, 0x5d, 0xa6, 0xef, 0x1b, 0xd0,
+ 0x73, 0xc6, 0x46, 0x5b, 0xaa, 0xc8, 0x74, 0x12, 0x7f, 0x99, 0x40, 0x57,
+ 0xb0, 0xc2, 0x0d, 0x03, 0xe7, 0x2f, 0xff, 0x97, 0xc8, 0x3f, 0x3c, 0xca,
+ 0x65, 0x30, 0x85, 0x38, 0x42, 0x34, 0x25, 0x29, 0x15, 0x48, 0x42, 0x4a,
+ 0xc7, 0xda, 0x4a, 0xa3, 0x0c, 0xbb, 0x4d, 0xde, 0x37, 0xa0, 0xe7, 0x8c,
+ 0x8c, 0xb7, 0x55, 0x90, 0xe8, 0x24, 0xff, 0x32, 0x80, 0xaf, 0x61, 0x84,
+ 0x1a, 0x07, 0xce, 0x5f, 0xff, 0x2f, 0x90, 0x7e, 0x79, 0x94, 0xca, 0x61,
+ 0x0a, 0x70, 0x84, 0x68, 0x4a, 0x52, 0x2a, 0x90, 0x84, 0x95, 0x8f, 0xb4,
+ 0x95, 0x46, 0x19, 0x76, 0x9b, 0xbc, 0x6f, 0x41, 0xcf, 0x19, 0x19, 0x6e,
+ 0xab, 0x21, 0xd0, 0x49, 0xfe, 0x65, 0x01, 0x5e, 0xc3, 0x08, 0x34, 0x0f,
+ 0x9c, 0xbf, 0xfe, 0x5f, 0x20, 0xfc, 0xf3, 0x29, 0x94, 0xc2, 0x14, 0xe1,
+ 0x08, 0xd0, 0x94, 0xa4, 0x55, 0x21, 0x09, 0x2b, 0x1f, 0x69, 0x2a, 0x8c,
+ 0x32, 0xed, 0x37, 0x78, 0xde, 0x83, 0x9e, 0x32, 0x32, 0xdd, 0x56, 0x43,
+ 0xa0, 0x93, 0xfc, 0xca, 0x02, 0xbd, 0x86, 0x10, 0x68, 0x1f, 0x39, 0x7f,
+ 0xfc, 0xbe, 0x41, 0xf9, 0xe6, 0x53, 0x29, 0x84, 0x29, 0xc2, 0x11, 0xa1,
+ 0x29, 0x48, 0xaa, 0x42, 0x12, 0x56, 0x3e, 0xd2, 0x55, 0x18, 0x65, 0xda,
+ 0x6e, 0xf1, 0xbd, 0x07, 0x3c, 0x64, 0x65, 0xba, 0xac, 0x87, 0x41, 0x27,
+ 0xf9, 0x94, 0x05, 0x7b, 0x0c, 0x20, 0xd0, 0x3e, 0x72, 0xff, 0xf9, 0x7c,
+ 0x83, 0xf3, 0xcc, 0xa6, 0x53, 0x08, 0x53, 0x84, 0x23, 0x42, 0x52, 0x91,
+ 0x54, 0x84, 0x24, 0xac, 0x7d, 0xa4, 0xaa, 0x30, 0xcb, 0xb4, 0xdd, 0xe3,
+ 0x7a, 0x0e, 0x78, 0xc8, 0xcb, 0x75, 0x59, 0x0e, 0x82, 0x4f, 0xf3, 0x28,
+ 0x0a, 0xf6, 0x18, 0x41, 0xa0, 0x7c, 0xe5, 0xff, 0xf2, 0xf9, 0x07, 0xe7,
+ 0x99, 0x4c, 0xa6, 0x10, 0xa7, 0x08, 0x46, 0x84, 0xa5, 0x22, 0xa9, 0x08,
+ 0x49, 0x58, 0xfb, 0x49, 0x54, 0x61, 0x97, 0x69, 0xbb, 0xc6, 0xf4, 0x1c,
+ 0xf1, 0x91, 0x96, 0xea, 0xb2, 0x1d, 0x04, 0x9f, 0xe6, 0x50, 0x15, 0xec,
+ 0x30, 0x83, 0x40, 0xf9, 0xcb, 0xff, 0xe5, 0xf2, 0x0f, 0xcf, 0x32, 0x99,
+ 0x4c, 0x21, 0x4e, 0x10, 0x8d, 0x09, 0x4a, 0x45, 0x52, 0x10, 0x92, 0xb1,
+ 0xf6, 0x92, 0xa8, 0xc3, 0x2e, 0xd3, 0x77, 0x8d, 0xe8, 0x39, 0xe3, 0x23,
+ 0x2d, 0xd5, 0x64, 0x3a, 0x09, 0x3f, 0xcc, 0xa0, 0x2b, 0xd8, 0x61, 0x06,
+ 0x81, 0xf3, 0x97, 0xff, 0xcb, 0xe4, 0x1f, 0x9e, 0x65, 0x32, 0x98, 0x42,
+ 0x9c, 0x21, 0x1a, 0x12, 0x94, 0x8a, 0xa4, 0x21, 0x25, 0x63, 0xed, 0x25,
+ 0x51, 0x86, 0x5d, 0xa6, 0xef, 0x1b, 0xd0, 0x73, 0xc6, 0x46, 0x5b, 0xaa,
+ 0xc8, 0x74, 0x12, 0x7f, 0x99, 0x40, 0x57, 0xb0, 0xc2, 0x0d, 0x03, 0xe7,
+ 0x2f, 0xff, 0x97, 0xc8, 0x3f, 0x3c, 0xca, 0x65, 0x30, 0x85, 0x38, 0x42,
+ 0x34, 0x25, 0x29, 0x15, 0x48, 0x42, 0x4a, 0xc7, 0xda, 0x4a, 0xa3, 0x0c,
+ 0xbb, 0x4d, 0xde, 0x37, 0xa0, 0xe7, 0x8c, 0x8c, 0xb7, 0x55, 0x90, 0xe8,
+ 0x24, 0xff, 0x32, 0x80, 0xaf, 0x61, 0x84, 0x1a, 0x07, 0xce, 0x5f, 0xff,
+ 0x2f, 0x90, 0x7e, 0x79, 0x94, 0xca, 0x61, 0x0a, 0x70, 0x84, 0x68, 0x4a,
+ 0x52, 0x2a, 0x90, 0x84, 0x95, 0x8f, 0xb4, 0x95, 0x46, 0x19, 0x76, 0x9b,
+ 0xbc, 0x6f, 0x41, 0xcf, 0x19, 0x19, 0x6e, 0xab, 0x21, 0xd0, 0x49, 0xfe,
+ 0x65, 0x01, 0x5e, 0xc3, 0x08, 0x34, 0x0f, 0x9c, 0xbf, 0xfe, 0x5f, 0x20,
+ 0xfc, 0xf3, 0x29, 0x94, 0xc2, 0x14, 0xe1, 0x08, 0xd0, 0x94, 0xa4, 0x55,
+ 0x21, 0x09, 0x2b, 0x1f, 0x69, 0x2a, 0x8c, 0x32, 0xed, 0x37, 0x78, 0xde,
+ 0x83, 0x9e, 0x32, 0x32, 0xdd, 0x56, 0x43, 0xa0, 0x93, 0xfc, 0xca, 0x02,
+ 0xbd, 0x86, 0x10, 0x68, 0x1f, 0x39, 0x7f, 0xfc, 0xbe, 0x41, 0xf9, 0xe6,
+ 0x53, 0x29, 0x84, 0x29, 0xc2, 0x11, 0xa1, 0x29, 0x48, 0xaa, 0x42, 0x12,
+ 0x56, 0x3e, 0xd2, 0x55, 0x18, 0x65, 0xda, 0x6e, 0xf1, 0xbd, 0x07, 0x3c,
+ 0x64, 0x65, 0xba, 0xac, 0x87, 0x41, 0x27, 0xf9, 0x94, 0x05, 0x7b, 0x0c,
+ 0x20, 0xd0, 0x3e, 0x72, 0xff, 0xf9, 0x7c, 0x83, 0xf3, 0xcc, 0xa6, 0x53,
+ 0x08, 0x53, 0x84, 0x23, 0x42, 0x52, 0x91, 0x54, 0x84, 0x24, 0xac, 0x7d,
+ 0xa4, 0xaa, 0x30, 0xcb, 0xb4, 0xdd, 0xe3, 0x7a, 0x0e, 0x78, 0xc8, 0xcb,
+ 0x75, 0x59, 0x0e, 0x82, 0x4f, 0xf3, 0x28, 0x0a, 0xf6, 0x18, 0x41, 0xa0,
+ 0x7c, 0xe5, 0xff, 0xf2, 0xf9, 0x07, 0xe7, 0x99, 0x4c, 0xa6, 0x10, 0xa7,
+ 0x08, 0x46, 0x84, 0xa5, 0x22, 0xa9, 0x08, 0x49, 0x58, 0xfb, 0x49, 0x54,
+ 0x61, 0x97, 0x69, 0xbb, 0xc6, 0xf4, 0x1c, 0xf1, 0x91, 0x96, 0xea, 0xb2,
+ 0x1d, 0x04, 0x9f, 0xe6, 0x50, 0x15, 0xec, 0x30, 0x83, 0x40, 0xf9, 0xcb,
+ 0xff, 0xe5, 0xf2, 0x0f, 0xcf, 0x32, 0x99, 0x4c, 0x21, 0x4e, 0x10, 0x8d,
+ 0x09, 0x4a, 0x45, 0x52, 0x10, 0x92, 0xb1, 0xf6, 0x92, 0xa8, 0xc3, 0x2e,
+ 0xd3, 0x77, 0x8d, 0xe8, 0x39, 0xe3, 0x23, 0x2d, 0xd5, 0x64, 0x3a, 0x09,
+ 0x3f, 0xcc, 0xa0, 0x2b, 0xd8, 0x61, 0x06, 0x81, 0xf3, 0x97, 0xff, 0xcb,
+ 0xe4, 0x1f, 0x9e, 0x65, 0x32, 0x98, 0x42, 0x9c, 0x21, 0x1a, 0x12, 0x94,
+ 0x8a, 0xa4, 0x21, 0x25, 0x63, 0xed, 0x25, 0x51, 0x86, 0x5d, 0xa6, 0xef,
+ 0x1b, 0xd0, 0x73, 0xc6, 0x46, 0x5b, 0xaa, 0xc8, 0x74, 0x12, 0x7f, 0x99,
+ 0x40, 0x57, 0xb0, 0xc2, 0x0d, 0x03, 0xe7, 0x2f, 0xff, 0x97, 0xc8, 0x3f,
+ 0x3c, 0xca, 0x65, 0x30, 0x85, 0x38, 0x42, 0x34, 0x25, 0x29, 0x15, 0x48,
+ 0x42, 0x4a, 0xc7, 0xda, 0x4a, 0xa3, 0x0c, 0xbb, 0x4d, 0xde, 0x37, 0xa0,
+ 0xe7, 0x8c, 0x8c, 0xb7, 0x55, 0x90, 0xe8, 0x24, 0xff, 0x32, 0x80, 0xaf,
+ 0x61, 0x84, 0x1a, 0x07, 0xce, 0x5f, 0xff, 0x2f, 0x90, 0x7e, 0x79, 0x94,
+ 0xca, 0x61, 0x0a, 0x70, 0x84, 0x68, 0x4a, 0x52, 0x2a, 0x90, 0x84, 0x95,
+ 0x8f, 0xb4, 0x95, 0x46, 0x19, 0x76, 0x9b, 0xbc, 0x6f, 0x41, 0xcf, 0x19,
+ 0x19, 0x6e, 0xab, 0x21, 0xd0, 0x49, 0xfe, 0x65, 0x01, 0x5e, 0xc3, 0x08,
+ 0x34, 0x0f, 0x9c, 0xbf, 0xfe, 0x5f, 0x20, 0xfc, 0xf3, 0x29, 0x94, 0xc2,
+ 0x14, 0xe1, 0x08, 0xd0, 0x94, 0xa4, 0x55, 0x21, 0x09, 0x2b, 0x1f, 0x69,
+ 0x2a, 0x8c, 0x32, 0xed, 0x37, 0x78, 0xde, 0x83, 0x9e, 0x32, 0x32, 0xdd,
+ 0x56, 0x43, 0xa0, 0x93, 0xfc, 0xca, 0x02, 0xbd, 0x86, 0x10, 0x68, 0x1f,
+ 0x39, 0x7f, 0xfc, 0xbe, 0x41, 0xf9, 0xe6, 0x53, 0x29, 0x84, 0x29, 0xc2,
+ 0x11, 0xa1, 0x29, 0x48, 0xaa, 0x42, 0x12, 0x56, 0x3e, 0xd2, 0x55, 0x18,
+ 0x65, 0xda, 0x6e, 0xf1, 0xbd, 0x07, 0x3c, 0x64, 0x65, 0xba, 0xac, 0x87,
+ 0x41, 0x27, 0xf9, 0x94, 0x05, 0x7b, 0x0c, 0x20, 0xd0, 0x3e, 0x72, 0xff,
+ 0xf9, 0x7c, 0x83, 0xf3, 0xcc, 0xa6, 0x53, 0x08, 0x53, 0x84, 0x23, 0x42,
+ 0x52, 0x91, 0x54, 0x84, 0x24, 0xac, 0x7d, 0xa4, 0xaa, 0x30, 0xcb, 0xb4,
+ 0xdd, 0xe3, 0x7a, 0x0e, 0x78, 0xc8, 0xcb, 0x75, 0x59, 0x0e, 0x82, 0x4f,
+ 0xf3, 0x28, 0x0a, 0xf6, 0x18, 0x41, 0xa0, 0x7c, 0xe5, 0xff, 0xf2, 0xf9,
+ 0x07, 0xe7, 0x99, 0x4c, 0xa6, 0x10, 0xa7, 0x08, 0x46, 0x84, 0xa5, 0x22,
+ 0xa9, 0x08, 0x49, 0x58, 0xfb, 0x49, 0x54, 0x61, 0x97, 0x69, 0xbb, 0xc6,
+ 0xf4, 0x1c, 0xf1, 0x91, 0x96, 0xea, 0xb2, 0x1d, 0x04, 0x9f, 0xe6, 0x50,
+ 0x15, 0xec, 0x30, 0x83, 0x40, 0xf9, 0xcb, 0xff, 0xe5, 0xf2, 0x0f, 0xcf,
+ 0x32, 0x99, 0x4c, 0x21, 0x4e, 0x10, 0x8d, 0x09, 0x4a, 0x45, 0x52, 0x10,
+ 0x92, 0xb1, 0xf6, 0x92, 0xa8, 0xc3, 0x2e, 0xd3, 0x77, 0x8d, 0xe8, 0x39,
+ 0xe3, 0x23, 0x2d, 0xd5, 0x64, 0x3a, 0x09, 0x3f, 0xcc, 0xa0, 0x2b, 0xd8,
+ 0x61, 0x06, 0x81, 0xf3, 0x97, 0xff, 0xcb, 0xe4, 0x1f, 0x9e, 0x65, 0x32,
+ 0x98, 0x42, 0x9c, 0x21, 0x1a, 0x12, 0x94, 0x8a, 0xa4, 0x21, 0x25, 0x63,
+ 0xed, 0x25, 0x51, 0x86, 0x5d, 0xa6, 0xef, 0x1b, 0xd0, 0x73, 0xc6, 0x46,
+ 0x5b, 0xaa, 0xc8, 0x74, 0x12, 0x7f, 0x99, 0x40, 0x57, 0xb0, 0xc2, 0x0d,
+ 0x03, 0xe7, 0x2f, 0xff, 0x97, 0xc8, 0x3f, 0x3c, 0xca, 0x65, 0x30, 0x85,
+ 0x38, 0x42, 0x34, 0x25, 0x29, 0x15, 0x48, 0x42, 0x4a, 0xc7, 0xda, 0x4a,
+ 0xa3, 0x0c, 0xbb, 0x4d, 0xde, 0x37, 0xa0, 0xe7, 0x8c, 0x8c, 0xb7, 0x55,
+ 0x90, 0xe8, 0x24, 0xff, 0x32, 0x80, 0xaf, 0x61, 0x84, 0x1a, 0x07, 0xce,
+ 0x5f, 0xff, 0x2f, 0x90, 0x7e, 0x79, 0x94, 0xca, 0x61, 0x0a, 0x70, 0x84,
+ 0x68, 0x4a, 0x52, 0x2a, 0x90, 0x84, 0x95, 0x8f, 0xb4, 0x95, 0x46, 0x19,
+ 0x76, 0x9b, 0xbc, 0x6f, 0x41, 0xcf, 0x19, 0x19, 0x6e, 0xab, 0x21, 0xd0,
+ 0x49, 0xfe, 0x65, 0x01, 0x5e, 0xc3, 0x08, 0x34, 0x0f, 0x9c, 0xbf, 0xfe,
+ 0x5f, 0x20, 0xfc, 0xf3, 0x29, 0x94, 0xc2, 0x14, 0xe1, 0x08, 0xd0, 0x94,
+ 0xa4, 0x55, 0x21, 0x09, 0x2b, 0x1f, 0x69, 0x2a, 0x8c, 0x32, 0xed, 0x37,
+ 0x78, 0xde, 0x83, 0x9e, 0x32, 0x32, 0xdd, 0x56, 0x43, 0xa0, 0x93, 0xfc,
+ 0xca, 0x02, 0xbd, 0x86, 0x10, 0x68, 0x1f, 0x39, 0x7f, 0xfc, 0xbe, 0x41,
+ 0xf9, 0xe6, 0x53, 0x29, 0x84, 0x29, 0xc2, 0x11, 0xa1, 0x29, 0x48, 0xaa,
+ 0x42, 0x12, 0x56, 0x3e, 0xd2, 0x55, 0x18, 0x65, 0xda, 0x6e, 0xf1, 0xbd,
+ 0x07, 0x3c, 0x64, 0x65, 0xba, 0xac, 0x87, 0x41, 0x27, 0xf9, 0x94, 0x05,
+ 0x7b, 0x0c, 0x20, 0xd0, 0x3e, 0x72, 0xff, 0xf9, 0x7c, 0x83, 0xf3, 0xcc,
+ 0xa6, 0x53, 0x08, 0x53, 0x84, 0x23, 0x42, 0x52, 0x91, 0x54, 0x84, 0x24,
+ 0xac, 0x7d, 0xa4, 0xaa, 0x30, 0xcb, 0xb4, 0xdd, 0xe3, 0x7a, 0x0e, 0x78,
+ 0xc8, 0xcb, 0x75, 0x59, 0x0e, 0x82, 0x4f, 0xf3, 0x28, 0x0a, 0xf6, 0x18,
+ 0x41, 0xa0, 0x7c, 0xe5, 0xff, 0xf2, 0xf9, 0x07, 0xe7, 0x99, 0x4c, 0xa6,
+ 0x10, 0xa7, 0x08, 0x46, 0x84, 0xa5, 0x22, 0xa9, 0x08, 0x49, 0x58, 0xfb,
+ 0x49, 0x54, 0x61, 0x97, 0x69, 0xbb, 0xc6, 0xf4, 0x1c, 0xf1, 0x91, 0x96,
+ 0xea, 0xb2, 0x1d, 0x04, 0x9f, 0xe6, 0x50, 0x15, 0xec, 0x30, 0x83, 0x40,
+ 0xf9, 0xcb, 0xff, 0xe5, 0xf2, 0x0f, 0xcf, 0x32, 0x99, 0x4c, 0x21, 0x4e,
+ 0x10, 0x8d, 0x09, 0x4a, 0x45, 0x52, 0x10, 0x92, 0xb1, 0xf6, 0x92, 0xa8,
+ 0xc3, 0x2e, 0xd3, 0x77, 0x8d, 0xe8, 0x39, 0xe3, 0x23, 0x2d, 0xd5, 0x64,
+ 0x3a, 0x09, 0x3f, 0xcc, 0xa0, 0x2b, 0xd8, 0x61, 0x06, 0x81, 0xf3, 0x97,
+ 0xff, 0xcb, 0xe4, 0x1f, 0x9e, 0x65, 0x32, 0x98, 0x42, 0x9c, 0x21, 0x1a,
+ 0x12, 0x94, 0x8a, 0xa4, 0x21, 0x25, 0x63, 0xed, 0x25, 0x51, 0x86, 0x5d,
+ 0xa6, 0xef, 0x1b, 0xd0, 0x73, 0xc6, 0x46, 0x5b, 0xaa, 0xc8, 0x74, 0x12,
+ 0x7f, 0x99, 0x40, 0x57, 0xb0, 0xc2, 0x0d, 0x03, 0xe7, 0x2f, 0xff, 0x97,
+ 0xc8, 0x3f, 0x3c, 0xca, 0x65, 0x30, 0x85, 0x38, 0x42, 0x34, 0x25, 0x29,
+ 0x15, 0x48, 0x42, 0x4a, 0xc7, 0xda, 0x4a, 0xa3, 0x0c, 0xbb, 0x4d, 0xde,
+ 0x37, 0xa0, 0xe7, 0x8c, 0x8c, 0xb7, 0x55, 0x90, 0xe8, 0x24, 0xff, 0x32,
+ 0x80, 0xaf, 0x61, 0x84, 0x1a, 0x07, 0xce, 0x5f, 0xff, 0x2f, 0x90, 0x7e,
+ 0x79, 0x94, 0xca, 0x61, 0x0a, 0x70, 0x84, 0x68, 0x4a, 0x52, 0x2a, 0x90,
+ 0x84, 0x95, 0x8f, 0xb4, 0x95, 0x46, 0x19, 0x76, 0x9b, 0xbc, 0x6f, 0x41,
+ 0xcf, 0x19, 0x19, 0x6e, 0xab, 0x21, 0xd0, 0x49, 0xfe, 0x65, 0x01, 0x5e,
+ 0xc3, 0x08, 0x34, 0x0f, 0x9c, 0xbf, 0xfe, 0x5f, 0x20, 0xfc, 0xf3, 0x29,
+ 0x94, 0xc2, 0x14, 0xe1, 0x08, 0xd0, 0x94, 0xa4, 0x55, 0x21, 0x09, 0x2b,
+ 0x1f, 0x69, 0x2a, 0x8c, 0x32, 0xed, 0x37, 0x78, 0xde, 0x83, 0x9e, 0x32,
+ 0x32, 0xdd, 0x56, 0x43, 0xa0, 0x93, 0xfc, 0xca, 0x02, 0xbd, 0x86, 0x10,
+ 0x68, 0x1f, 0x39, 0x7f, 0xfc, 0xbe, 0x41, 0xf9, 0xe6, 0x53, 0x29, 0x84,
+ 0x29, 0xc2, 0x11, 0xa1, 0x29, 0x48, 0xaa, 0x42, 0x12, 0x56, 0x3e, 0xd2,
+ 0x55, 0x18, 0x65, 0xda, 0x6e, 0xf1, 0xbd, 0x07, 0x3c, 0x64, 0x65, 0xba,
+ 0xac, 0x87, 0x41, 0x27, 0xf9, 0x94, 0x05, 0x7b, 0x0c, 0x20, 0xd0, 0x3e,
+ 0x72, 0xff, 0xf9, 0x7c, 0x83, 0xf3, 0xcc, 0xa6, 0x53, 0x08, 0x53, 0x84,
+ 0x23, 0x42, 0x52, 0x91, 0x54, 0x84, 0x24, 0xac, 0x7d, 0xa4, 0xaa, 0x30,
+ 0xcb, 0xb4, 0xdd, 0xe3, 0x7a, 0x0e, 0x78, 0xc8, 0xcb, 0x75, 0x59, 0x0e,
+ 0x82, 0x4f, 0xf3, 0x28, 0x0a, 0xf6, 0x18, 0x41, 0xa0, 0x7c, 0xe5, 0xff,
+ 0xf2, 0xf9, 0x07, 0xe7, 0x99, 0x4c, 0xa6, 0x10, 0xa7, 0x08, 0x46, 0x84,
+ 0xa5, 0x22, 0xa9, 0x08, 0x49, 0x58, 0xfb, 0x49, 0x54, 0x61, 0x97, 0x69,
+ 0xbb, 0xc6, 0xf4, 0x1c, 0xf1, 0x91, 0x96, 0xea, 0xb2, 0x1d, 0x04, 0x9f,
+ 0xe6, 0x50, 0x15, 0xec, 0x30, 0x83, 0x40, 0xf9, 0xcb, 0xff, 0xe5, 0xf2,
+ 0x0f, 0xcf, 0x32, 0x99, 0x4c, 0x21, 0x4e, 0x10, 0x8d, 0x09, 0x4a, 0x45,
+ 0x52, 0x10, 0x92, 0xb1, 0xf6, 0x92, 0xa8, 0xc3, 0x2e, 0xd3, 0x77, 0x8d,
+ 0xe8, 0x39, 0xe3, 0x23, 0x2d, 0xd5, 0x64, 0x3a, 0x09, 0x3f, 0xcc, 0xa0,
+ 0x2b, 0xd8, 0x61, 0x06, 0x81, 0xf3, 0x97, 0xff, 0xcb, 0xe4, 0x1f, 0x9e,
+ 0x65, 0x32, 0x98, 0x42, 0x9c, 0x21, 0x1a, 0x12, 0x94, 0x8a, 0xa4, 0x21,
+ 0x25, 0x63, 0xed, 0x25, 0x51, 0x86, 0x5d, 0xa6, 0xef, 0x1b, 0xd0, 0x73,
+ 0xc6, 0x46, 0x5b, 0xaa, 0xc8, 0x74, 0x12, 0x7f, 0x99, 0x40, 0x57, 0xb0,
+ 0xc2, 0x0d, 0x03, 0xe7, 0x2f, 0xff, 0x97, 0xc8, 0x3f, 0x3c, 0xca, 0x65,
+ 0x30, 0x85, 0x38, 0x42, 0x34, 0x25, 0x29, 0x15, 0x48, 0x42, 0x4a, 0xc7,
+ 0xda, 0x4a, 0xa3, 0x0c, 0xbb, 0x4d, 0xde, 0x37, 0xa0, 0xe7, 0x8c, 0x8c,
+ 0xb7, 0x55, 0x90, 0xe8, 0x24, 0xff, 0x32, 0x80, 0xaf, 0x61, 0x84, 0x1a,
+ 0x07, 0xce, 0x5f, 0xff, 0x2f, 0x90, 0x7e, 0x79, 0x94, 0xca, 0x61, 0x0a,
+ 0x70, 0x84, 0x68, 0x4a, 0x52, 0x2a, 0x90, 0x84, 0x95, 0x8f, 0xb4, 0x95,
+ 0x46, 0x19, 0x76, 0x9b, 0xbc, 0x6f, 0x41, 0xcf, 0x19, 0x19, 0x6e, 0xab,
+ 0x21, 0xd0, 0x49, 0xfe, 0x65, 0x01, 0x5e, 0xc3, 0x08, 0x34, 0x0f, 0x9c,
+ 0xbf, 0xfe, 0x5f, 0x20, 0xfc, 0xf3, 0x29, 0x94, 0xc2, 0x14, 0xe1, 0x08,
+ 0xd0, 0x94, 0xa4, 0x55, 0x21, 0x09, 0x2b, 0x1f, 0x69, 0x2a, 0x8c, 0x32,
+ 0xed, 0x37, 0x78, 0xde, 0x83, 0x9e, 0x32, 0x32, 0xdd, 0x56, 0x43, 0xa0,
+ 0x93, 0xfc, 0xca, 0x02, 0xbd, 0x86, 0x10, 0x68, 0x1f, 0x39, 0x7f, 0xfc,
+ 0xbe, 0x41, 0xf9, 0xe6, 0x53, 0x29, 0x84, 0x29, 0xc2, 0x11, 0xa1, 0x29,
+ 0x48, 0xaa, 0x42, 0x12, 0x56, 0x3e, 0xd2, 0x55, 0x18, 0x65, 0xda, 0x6e,
+ 0xf1, 0xbd, 0x07, 0x3c, 0x64, 0x65, 0xba, 0xac, 0x87, 0x41, 0x27, 0xf9,
+ 0x94, 0x05, 0x7b, 0x0c, 0x20, 0xd0, 0x3e, 0x72, 0xff, 0xf9, 0x7c, 0x83,
+ 0xf3, 0xcc, 0xa6, 0x53, 0x08, 0x53, 0x84, 0x23, 0x42, 0x52, 0x91, 0x54,
+ 0x84, 0x24, 0xac, 0x7d, 0xa4, 0xaa, 0x30, 0xcb, 0xb4, 0xdd, 0xe3, 0x7a,
+ 0x0e, 0x78, 0xc8, 0xcb, 0x75, 0x59, 0x0e, 0x82, 0x4f, 0xf3, 0x28, 0x0a,
+ 0xf6, 0x18, 0x41, 0xa0, 0x7c, 0xe5, 0xff, 0xf2, 0xf9, 0x07, 0xe7, 0x99,
+ 0x4c, 0xa6, 0x10, 0xa7, 0x08, 0x46, 0x84, 0xa5, 0x22, 0xa9, 0x08, 0x49,
+ 0x58, 0xfb, 0x49, 0x54, 0x61, 0x97, 0x69, 0xbb, 0xc6, 0xf4, 0x1c, 0xf1,
+ 0x91, 0x96, 0xea, 0xb2, 0x1d, 0x04, 0x9f, 0xe6, 0x50, 0x15, 0xec, 0x30,
+ 0x83, 0x40, 0xf9, 0xcb, 0xff, 0xe5, 0xf2, 0x0f, 0xcf, 0x32, 0x99, 0x4c,
+ 0x21, 0x4e, 0x10, 0x8d, 0x09, 0x4a, 0x45, 0x52, 0x10, 0x92, 0xb1, 0xf6,
+ 0x92, 0xa8, 0xc3, 0x2e, 0xd3, 0x77, 0x8d, 0xe8, 0x39, 0xe3, 0x23, 0x2d,
+ 0xd5, 0x64, 0x3a, 0x09, 0x3f, 0xcc, 0xa0, 0x2b, 0xd8, 0x61, 0x06, 0x81,
+ 0xf3, 0x97, 0xff, 0xcb, 0xe4, 0x1f, 0x9e, 0x65, 0x32, 0x98, 0x42, 0x9c,
+ 0x21, 0x1a, 0x12, 0x94, 0x8a, 0xa4, 0x21, 0x25, 0x63, 0xed, 0x25, 0x51,
+ 0x86, 0x5d, 0xa6, 0xef, 0x1b, 0xd0, 0x73, 0xc6, 0x46, 0x5b, 0xaa, 0xc8,
+ 0x74, 0x12, 0x7f, 0x99, 0x40, 0x57, 0xb0, 0xc2, 0x0d, 0x03, 0xe7, 0x2f,
+ 0xff, 0x97, 0xc8, 0x3f, 0x3c, 0xca, 0x65, 0x30, 0x85, 0x38, 0x42, 0x34,
+ 0x25, 0x29, 0x15, 0x48, 0x42, 0x4a, 0xc7, 0xda, 0x4a, 0xa3, 0x0c, 0xbb,
+ 0x4d, 0xde, 0x37, 0xa0, 0xe7, 0x8c, 0x8c, 0xb7, 0x55, 0x90, 0xe8, 0x24,
+ 0xff, 0x32, 0x80, 0xaf, 0x61, 0x84, 0x1a, 0x07, 0xce, 0x5f, 0xff, 0x2f,
+ 0x90, 0x7e, 0x79, 0x94, 0xca, 0x61, 0x0a, 0x70, 0x84, 0x68, 0x4a, 0x52,
+ 0x2a, 0x90, 0x84, 0x95, 0x8f, 0xb4, 0x95, 0x46, 0x19, 0x76, 0x9b, 0xbc,
+ 0x6f, 0x41, 0xcf, 0x19, 0x19, 0x6e, 0xab, 0x21, 0xd0, 0x49, 0xfe, 0x65,
+ 0x01, 0x5e, 0xc3, 0x08, 0x34, 0x0f, 0x9c, 0xbf, 0xfe, 0x5f, 0x20, 0xfc,
+ 0xf3, 0x29, 0x94, 0xc2, 0x14, 0xe1, 0x08, 0xd0, 0x94, 0xa4, 0x55, 0x21,
+ 0x09, 0x2b, 0x1f, 0x69, 0x2a, 0x8c, 0x32, 0xed, 0x37, 0x78, 0xde, 0x83,
+ 0x9e, 0x32, 0x32, 0xdd, 0x56, 0x43, 0xa0, 0x93, 0xfc, 0xca, 0x02, 0xbd,
+ 0x86, 0x10, 0x68, 0x1f, 0x39, 0x7f, 0xfc, 0xbe, 0x41, 0xf9, 0xe6, 0x53,
+ 0x29, 0x84, 0x29, 0xc2, 0x11, 0xa1, 0x29, 0x48, 0xaa, 0x42, 0x12, 0x56,
+ 0x3e, 0xd2, 0x55, 0x18, 0x65, 0xda, 0x6e, 0xf1, 0xbd, 0x07, 0x3c, 0x64,
+ 0x65, 0xba, 0xac, 0x87, 0x41, 0x27, 0xf9, 0x94, 0x05, 0x7b, 0x0c, 0x20,
+ 0xd0, 0x3e, 0x72, 0xff, 0xf9, 0x7c, 0x83, 0xf3, 0xcc, 0xa6, 0x53, 0x08,
+ 0x53, 0x84, 0x23, 0x42, 0x52, 0x91, 0x54, 0x84, 0x24, 0xac, 0x7d, 0xa4,
+ 0xaa, 0x30, 0xcb, 0xb4, 0xdd, 0xe3, 0x7a, 0x0e, 0x78, 0xc8, 0xcb, 0x75,
+ 0x59, 0x0e, 0x82, 0x4f, 0xf3, 0x28, 0x0a, 0xf6, 0x18, 0x41, 0xa0, 0x7c,
+ 0xe5, 0xff, 0xf2, 0xf9, 0x07, 0xe7, 0x99, 0x4c, 0xa6, 0x10, 0xa7, 0x08,
+ 0x46, 0x84, 0xa5, 0x22, 0xa9, 0x08, 0x49, 0x58, 0xfb, 0x49, 0x54, 0x61,
+ 0x97, 0x69, 0xbb, 0xc6, 0xf4, 0x1c, 0xf1, 0x91, 0x96, 0xea, 0xb2, 0x1d,
+ 0x04, 0x9f, 0xe6, 0x50, 0x15, 0xec, 0x30, 0x83, 0x40, 0xf9, 0xcb, 0xff,
+ 0xe5, 0xf2, 0x0f, 0xcf, 0x32, 0x99, 0x4c, 0x21, 0x4e, 0x10, 0x8d, 0x09,
+ 0x4a, 0x45, 0x52, 0x10, 0x92, 0xb1, 0xf6, 0x92, 0xa8, 0xc3, 0x2e, 0xd3,
+ 0x77, 0x8d, 0xe8, 0x39, 0xe3, 0x23, 0x2d, 0xd5, 0x64, 0x3a, 0x09, 0x3f,
+ 0xcc, 0xa0, 0x2b, 0xd8, 0x61, 0x06, 0x81, 0xf3, 0x97, 0xff, 0xcb, 0xe4,
+ 0x1f, 0x9e, 0x65, 0x32, 0x98, 0x42, 0x9c, 0x21, 0x1a, 0x12, 0x94, 0x8a,
+ 0xa4, 0x21, 0x25, 0x63, 0xed, 0x25, 0x51, 0x86, 0x5d, 0xa6, 0xef, 0x1b,
+ 0xd0, 0x73, 0xc6, 0x46, 0x5b, 0xaa, 0xc8, 0x74, 0x12, 0x7f, 0x99, 0x40,
+ 0x57, 0xb0, 0xc2, 0x0d, 0x03, 0xe7, 0x2f, 0xff, 0x97, 0xc8, 0x3f, 0x3c,
+ 0xca, 0x65, 0x30, 0x85, 0x38, 0x42, 0x34, 0x25, 0x29, 0x15, 0x48, 0x42,
+ 0x4a, 0xc7, 0xda, 0x4a, 0xa3, 0x0c, 0xbb, 0x4d, 0xde, 0x37, 0xa0, 0xe7,
+ 0x8c, 0x8c, 0xb7, 0x55, 0x90, 0xe8, 0x24, 0xff, 0x32, 0x80, 0xaf, 0x61,
+ 0x84, 0x1a, 0x07, 0xce, 0x5f, 0xff, 0x2f, 0x90, 0x7e, 0x79, 0x94, 0xca,
+ 0x61, 0x0a, 0x70, 0x84, 0x68, 0x4a, 0x52, 0x2a, 0x90, 0x84, 0x95, 0x8f,
+ 0xb4, 0x95, 0x46, 0x19, 0x76, 0x9b, 0xbc, 0x6f, 0x41, 0xcf, 0x19, 0x19,
+ 0x6e, 0xab, 0x21, 0xd0, 0x49, 0xfe, 0x65, 0x01, 0x5e, 0xc3, 0x08, 0x34,
+ 0x0f, 0x9c, 0xbf, 0xfe, 0x5f, 0x20, 0xfc, 0xf3, 0x29, 0x94, 0xc2, 0x14,
+ 0xe1, 0x08, 0xd0, 0x94, 0xa4, 0x55, 0x21, 0x09, 0x2b, 0x1f, 0x69, 0x2a,
+ 0x8c, 0x32, 0xed, 0x37, 0x78, 0xde, 0x83, 0x9e, 0x32, 0x32, 0xdd, 0x56,
+ 0x43, 0xa0, 0x93, 0xfc, 0xca, 0x02, 0xbd, 0x86, 0x10, 0x68, 0x1f, 0x39,
+ 0x7f, 0xfc, 0xbe, 0x41, 0xf9, 0xe6, 0x53, 0x29, 0x84, 0x29, 0xc2, 0x11,
+ 0xa1, 0x29, 0x48, 0xaa, 0x42, 0x12, 0x56, 0x3e, 0xd2, 0x55, 0x18, 0x65,
+ 0xda, 0x6e, 0xf1, 0xbd, 0x07, 0x3c, 0x64, 0x65, 0xba, 0xac, 0x87, 0x41,
+ 0x27, 0xf9, 0x94, 0x05, 0xa0,
+};
+static_assert(sizeof(kBytesTestReadSymbol6) == kNumBytesTestReadSymbol6, "");
+
+// The kBytesTestReadSymbol7[] array was encoded by using the following libaom
+// code:
+//
+// aom_cdf_prob cdf[4][8] = {
+// // pdf: 1/7, 1/7, 1/7, 1/7, 1/7, 1/7, 1/7
+// { 32768 - 4681, 32768 - 9362, 32768 - 14043, 32768 - 18725,
+// 32768 - 23406, 32768 - 28087, 0, 0 },
+// // pdf: 3/14, 2/14, 2/14, 2/14, 2/14, 2/14, 1/14
+// { 32768 - 7022, 32768 - 11703, 32768 - 16384, 32768 - 21065,
+// 32768 - 25746, 32768 - 30427, 0, 0 },
+// // pdf: 1/14, 1/14, 2/14, 2/14, 2/14, 3/14, 3/14
+// { 32768 - 2341, 32768 - 4681, 32768 - 9362, 32768 - 14043,
+// 32768 - 18725, 32768 - 25746, 0, 0 },
+// // pdf: 1/14, 2/14, 3/14, 3/14, 2/14, 2/14, 1/14
+// { 32768 - 2341, 32768 - 7022, 32768 - 14043, 32768 - 21065,
+// 32768 - 25746, 32768 - 30427, 0, 0 },
+// };
+// constexpr int kSymbols[14][4] = { { 0, 4, 6, 3 }, //
+// { 1, 5, 5, 2 }, //
+// { 2, 6, 4, 1 }, //
+// { 3, 0, 3, 0 }, //
+// { 4, 1, 2, 6 }, //
+// { 5, 2, 1, 5 }, //
+// { 6, 3, 0, 4 }, //
+// { 0, 0, 6, 5 }, //
+// { 2, 1, 4, 3 }, //
+// { 4, 3, 6, 1 }, //
+// { 6, 5, 2, 4 }, //
+// { 1, 0, 5, 2 }, //
+// { 3, 2, 3, 2 }, //
+// { 5, 4, 5, 3 } };
+// const unsigned int kBufferSize = 65536;
+// uint8_t bw_buffer[kBufferSize];
+// aom_writer bw;
+// bw.allow_update_cdf = 1;
+// aom_start_encode(&bw, bw_buffer);
+// for (int i = 0; i < 1024; ++i) {
+// for (int j = 0; j < 14; ++j) {
+// for (int k = 0; k < 4; ++k) {
+// aom_write_symbol(&bw, kSymbols[j][k], cdf[k], 7);
+// }
+// }
+// }
+// aom_stop_encode(&bw);
+// printf(" constexpr size_t kNumBytesTestReadSymbol7 = %u;\n", bw.pos);
+// printf(" constexpr uint8_t kBytesTestReadSymbol7[] = {");
+// int count = 0;
+// for (unsigned int i = 0; i < bw.pos; ++i) {
+// if (count++ % 12 == 0) {
+// printf("\n ");
+// } else {
+// printf(" ");
+// }
+// printf("0x%02x,", bw_buffer[i]);
+// }
+// printf("\n };\n");
+
+constexpr size_t kNumBytesTestReadSymbol7 = 19874;
+constexpr uint8_t kBytesTestReadSymbol7[] = {
+ 0x1c, 0x6a, 0xfc, 0x4b, 0xd1, 0xb5, 0x8c, 0x20, 0x72, 0x45, 0x48, 0x21,
+ 0x9e, 0x71, 0xe8, 0xc4, 0x91, 0x51, 0xab, 0xfd, 0x9c, 0x61, 0xf7, 0x98,
+ 0xd4, 0x87, 0x71, 0xe6, 0x23, 0x37, 0x7e, 0xa3, 0xe0, 0x83, 0x48, 0x2e,
+ 0xfe, 0xc3, 0xcb, 0x4f, 0x26, 0x9a, 0xd7, 0xe4, 0xca, 0xf4, 0x94, 0xb7,
+ 0xbc, 0x03, 0xc9, 0xc3, 0x5e, 0x7f, 0xef, 0x9b, 0x37, 0xff, 0x8f, 0x62,
+ 0xec, 0xb6, 0x09, 0x50, 0xa9, 0xc1, 0x4a, 0x97, 0xf4, 0xe7, 0x08, 0x57,
+ 0x87, 0x2d, 0x10, 0xca, 0xbc, 0x93, 0x85, 0xfb, 0xc8, 0xc7, 0x8f, 0xc1,
+ 0x4e, 0x1f, 0x50, 0xad, 0xba, 0x09, 0x9c, 0xf8, 0x94, 0x75, 0xdd, 0x2c,
+ 0x78, 0x5d, 0xa0, 0x4a, 0xf3, 0x7b, 0xc0, 0xa7, 0x71, 0xa5, 0x20, 0xe6,
+ 0xb0, 0xca, 0x09, 0xf2, 0x38, 0xfc, 0x61, 0x49, 0xdc, 0x83, 0x35, 0x1e,
+ 0xdd, 0x08, 0xd7, 0xaa, 0x50, 0x0e, 0xc5, 0x57, 0x05, 0x44, 0xd7, 0xdb,
+ 0x56, 0x2b, 0x1e, 0xe5, 0x33, 0x08, 0x7c, 0x3d, 0x25, 0x29, 0x05, 0x14,
+ 0x3a, 0x93, 0xff, 0xe7, 0x40, 0x25, 0x30, 0x17, 0xc3, 0x50, 0xad, 0xec,
+ 0xb3, 0x64, 0x87, 0x35, 0xb2, 0x5a, 0x1e, 0xa9, 0x48, 0xc8, 0x53, 0x30,
+ 0xf1, 0x43, 0x6f, 0xe1, 0x2a, 0x8b, 0x81, 0x49, 0xbc, 0xa8, 0x8a, 0x8b,
+ 0x2d, 0x1a, 0xc5, 0xcb, 0x47, 0xc1, 0xbc, 0xe0, 0x54, 0x98, 0xcc, 0x82,
+ 0xe9, 0xa6, 0x3f, 0x70, 0x55, 0xe3, 0xe0, 0x7d, 0x5f, 0xa9, 0xc4, 0xc1,
+ 0x62, 0x04, 0x2d, 0x15, 0xce, 0xab, 0x7c, 0xd9, 0x88, 0xc1, 0x67, 0x88,
+ 0x3d, 0x6e, 0x96, 0x03, 0x6f, 0xa7, 0x6a, 0xc2, 0x6f, 0x20, 0x8c, 0xf4,
+ 0xfb, 0x96, 0x0c, 0xb7, 0x14, 0xef, 0xa6, 0x83, 0xbd, 0x2b, 0x07, 0x8a,
+ 0x2a, 0x66, 0xb8, 0x0d, 0xa8, 0x72, 0x2a, 0x78, 0x90, 0x2a, 0xe4, 0x46,
+ 0x71, 0x8c, 0xcb, 0xcb, 0xbd, 0xfb, 0xc7, 0xa8, 0x9e, 0x9b, 0x6e, 0x6d,
+ 0x2b, 0xc2, 0x1c, 0xea, 0x16, 0x3a, 0x06, 0xc0, 0xbc, 0xd7, 0x30, 0x8d,
+ 0x87, 0x03, 0x04, 0x0d, 0x58, 0x58, 0x7b, 0x40, 0xf5, 0xe5, 0x7a, 0x51,
+ 0x80, 0x7a, 0x16, 0xc2, 0xaf, 0x83, 0x43, 0x16, 0xb3, 0x3a, 0x1b, 0x24,
+ 0x29, 0x80, 0x60, 0xee, 0x00, 0x91, 0x15, 0xdb, 0x28, 0x0d, 0xc2, 0xfb,
+ 0x74, 0x48, 0xd9, 0x54, 0x97, 0x66, 0xa4, 0xba, 0xc8, 0x19, 0xff, 0x25,
+ 0xca, 0xdf, 0x09, 0x66, 0xe4, 0xfe, 0xbb, 0x2b, 0x3f, 0x4a, 0x81, 0x5a,
+ 0xa6, 0x54, 0x5c, 0xf0, 0xe4, 0x49, 0x38, 0x13, 0xfb, 0xa2, 0xee, 0xf9,
+ 0x7d, 0x72, 0xa9, 0x37, 0x12, 0xf4, 0x04, 0x4e, 0x50, 0x19, 0x6f, 0x29,
+ 0x9d, 0x0d, 0xe7, 0xc3, 0x6d, 0x65, 0x0b, 0x04, 0x53, 0x57, 0x0c, 0xb5,
+ 0x71, 0xb4, 0xd6, 0xb0, 0xaa, 0xed, 0x38, 0x9e, 0x58, 0x55, 0x0d, 0xe4,
+ 0xe6, 0x43, 0x16, 0x93, 0x46, 0x73, 0x39, 0x87, 0xaa, 0x69, 0x07, 0x9f,
+ 0xd7, 0xb6, 0x77, 0x7d, 0xef, 0xc7, 0x19, 0x5d, 0x4f, 0x60, 0x20, 0x7e,
+ 0xf0, 0x34, 0xbe, 0xe4, 0x31, 0xf3, 0x72, 0xe0, 0x89, 0xfb, 0xc8, 0x0a,
+ 0xa9, 0xe6, 0x2c, 0x6b, 0xa5, 0xaa, 0xd5, 0x42, 0x69, 0xc0, 0x27, 0x3b,
+ 0x17, 0x98, 0x73, 0xa3, 0x66, 0x10, 0xd7, 0xac, 0xf9, 0x7f, 0xb2, 0xf3,
+ 0x38, 0x45, 0x23, 0xe2, 0xd4, 0xd2, 0x63, 0x1c, 0x84, 0xde, 0x25, 0xd4,
+ 0x3c, 0x76, 0x58, 0x1a, 0xb6, 0x07, 0x22, 0x74, 0xc2, 0xf7, 0x2c, 0xe1,
+ 0xc0, 0x51, 0x8c, 0xfa, 0xde, 0x6b, 0x35, 0x8c, 0x0f, 0x45, 0xf8, 0x5e,
+ 0x61, 0x2d, 0x4e, 0x90, 0x2d, 0xb7, 0x6c, 0xaf, 0x71, 0x72, 0xdf, 0x68,
+ 0xa9, 0xa2, 0x36, 0x79, 0xbd, 0xee, 0x88, 0xb0, 0xc8, 0xc9, 0xa6, 0x7e,
+ 0x8e, 0xe8, 0x16, 0xbc, 0xd6, 0x82, 0x54, 0xac, 0x81, 0x42, 0x0f, 0xc9,
+ 0x38, 0xd2, 0xe1, 0x17, 0x17, 0x4f, 0xc9, 0x0c, 0x39, 0xc0, 0x70, 0xd8,
+ 0xd8, 0x17, 0x37, 0x4a, 0x93, 0x40, 0x83, 0xe3, 0x3f, 0x05, 0x25, 0xab,
+ 0x6e, 0x58, 0xc1, 0x30, 0x62, 0x4d, 0xad, 0xcd, 0x1b, 0x7a, 0x4b, 0x08,
+ 0xf8, 0x69, 0x85, 0xf1, 0x10, 0x84, 0x22, 0x54, 0x3a, 0x0c, 0x2d, 0x1b,
+ 0xcd, 0x2d, 0xed, 0x95, 0x63, 0x1a, 0x9e, 0xbc, 0xb8, 0x76, 0x48, 0x65,
+ 0xd1, 0xa6, 0x22, 0x98, 0x3e, 0xda, 0x00, 0x56, 0xf4, 0xd3, 0xc5, 0xb0,
+ 0xb3, 0xb0, 0xfa, 0x0c, 0x84, 0x43, 0xfb, 0xa1, 0x1a, 0xba, 0x23, 0xc6,
+ 0x72, 0xea, 0x83, 0x96, 0xff, 0xfd, 0x0d, 0xba, 0x40, 0x32, 0x3e, 0x1a,
+ 0x61, 0x7b, 0xd5, 0x50, 0xfe, 0x41, 0xc8, 0x67, 0x71, 0xb4, 0xff, 0x24,
+ 0xf8, 0x7b, 0xa2, 0x6d, 0x97, 0x84, 0x8e, 0x36, 0x30, 0x05, 0xc3, 0x60,
+ 0x3b, 0x1c, 0xee, 0x34, 0x57, 0x05, 0x0f, 0x9e, 0xc2, 0xfd, 0xc8, 0x03,
+ 0xab, 0x8a, 0x54, 0xde, 0x6a, 0x22, 0xa5, 0xb7, 0x38, 0xf5, 0x91, 0x08,
+ 0xd4, 0xce, 0xe3, 0xa7, 0xb4, 0xcb, 0x58, 0x79, 0xe2, 0x34, 0x79, 0xfa,
+ 0xc2, 0x85, 0x01, 0xeb, 0x53, 0xf1, 0xca, 0x5c, 0xa1, 0xfc, 0x35, 0xa2,
+ 0x7b, 0x8f, 0x29, 0x1c, 0x67, 0xb0, 0x01, 0x1b, 0x5a, 0xa1, 0xc9, 0x3b,
+ 0x2c, 0xc6, 0x35, 0xbb, 0x29, 0x46, 0x13, 0xfa, 0xd9, 0x40, 0x63, 0x3e,
+ 0x6c, 0xa2, 0x36, 0x70, 0xe7, 0xc8, 0x76, 0x55, 0x70, 0xd2, 0x3f, 0xd1,
+ 0xae, 0x83, 0x9d, 0xb9, 0x60, 0x47, 0x3e, 0x38, 0x0d, 0x08, 0x3f, 0xe0,
+ 0x6b, 0x16, 0x7f, 0x7d, 0x7d, 0x40, 0x98, 0x99, 0xc1, 0x27, 0xf2, 0xb5,
+ 0xfe, 0x33, 0xce, 0x83, 0x8c, 0x7d, 0xa7, 0xe6, 0xeb, 0x06, 0xdb, 0x4f,
+ 0xca, 0x10, 0x82, 0x7b, 0x5e, 0xe8, 0xa9, 0x2e, 0xe0, 0x7a, 0xc2, 0x03,
+ 0x75, 0x6e, 0x4e, 0x2b, 0xb6, 0xc3, 0x99, 0xf5, 0x41, 0xe9, 0x75, 0xe5,
+ 0xc5, 0xae, 0x4f, 0xa8, 0x57, 0xf5, 0xf5, 0x89, 0x60, 0xae, 0x41, 0x13,
+ 0x91, 0x77, 0x84, 0xb6, 0x79, 0xea, 0xcb, 0xeb, 0x8d, 0x05, 0xe2, 0x18,
+ 0xfd, 0x36, 0x1f, 0x68, 0x34, 0xd1, 0x3c, 0xc3, 0xe1, 0x87, 0xd3, 0x2a,
+ 0xb1, 0xc5, 0xac, 0xe2, 0xc3, 0xaf, 0xd1, 0x53, 0x61, 0x5e, 0xba, 0xcb,
+ 0x32, 0xde, 0x97, 0xee, 0x4e, 0x58, 0xda, 0xda, 0x9d, 0x12, 0xe2, 0x75,
+ 0x20, 0xd5, 0xb4, 0x64, 0x82, 0x75, 0x3e, 0xee, 0xb9, 0x13, 0x54, 0x54,
+ 0x95, 0x36, 0x36, 0xa9, 0x85, 0x34, 0xa2, 0x37, 0xa0, 0x55, 0xe7, 0x1e,
+ 0x9e, 0xb8, 0xbf, 0x36, 0x96, 0x1b, 0x1c, 0xa9, 0x16, 0xa9, 0x66, 0xb6,
+ 0x30, 0x91, 0xc6, 0xfb, 0x51, 0x30, 0xc8, 0x19, 0x91, 0xca, 0x9e, 0x99,
+ 0x88, 0x5a, 0x29, 0xbc, 0x10, 0x8e, 0x21, 0x93, 0x4b, 0xd1, 0x10, 0x10,
+ 0x10, 0xca, 0x1a, 0x4d, 0x95, 0xd5, 0x0a, 0x08, 0xe4, 0xbc, 0xbc, 0xd4,
+ 0xc4, 0x48, 0xaa, 0xb7, 0x55, 0x88, 0x55, 0x59, 0xfa, 0x05, 0x17, 0xae,
+ 0x2f, 0xcd, 0xa5, 0x86, 0xc7, 0x2a, 0x45, 0xaa, 0x59, 0xad, 0x8c, 0x24,
+ 0x71, 0xbe, 0xd4, 0x4c, 0x32, 0x06, 0x64, 0x72, 0xa7, 0xa6, 0x62, 0x16,
+ 0x8a, 0x6f, 0x04, 0x23, 0x88, 0x64, 0xd2, 0xf4, 0x44, 0x04, 0x04, 0x32,
+ 0x86, 0x93, 0x65, 0x75, 0x42, 0x82, 0x39, 0x2f, 0x2f, 0x35, 0x31, 0x12,
+ 0x2a, 0xad, 0xd5, 0x62, 0x15, 0x56, 0x7e, 0x81, 0x48, 0x8e, 0xd3, 0x5e,
+ 0x73, 0x9d, 0xa3, 0xec, 0xca, 0xdd, 0xbe, 0x89, 0xd7, 0xb8, 0xa3, 0x59,
+ 0xeb, 0x97, 0xb3, 0xf2, 0xf1, 0xa6, 0x4b, 0x8e, 0x89, 0xe6, 0xe9, 0x0a,
+ 0x84, 0x9b, 0xbf, 0xd3, 0x6b, 0xd5, 0xbf, 0x1e, 0x7f, 0x87, 0x55, 0x76,
+ 0x5e, 0xa7, 0xe6, 0x3e, 0xcf, 0x6c, 0x16, 0x5f, 0xf1, 0xf6, 0xf0, 0x3e,
+ 0xd4, 0x4f, 0x71, 0xe5, 0x23, 0x8c, 0xf6, 0xa6, 0x11, 0xc3, 0xf8, 0x7b,
+ 0xc7, 0xea, 0x1a, 0x6a, 0xc7, 0x13, 0x2e, 0x5a, 0xf6, 0x61, 0x9b, 0x71,
+ 0x61, 0x3b, 0x66, 0x37, 0xd4, 0x28, 0xa6, 0xbf, 0xd6, 0xc6, 0x2e, 0x29,
+ 0xd6, 0x38, 0xb5, 0x9c, 0x58, 0x75, 0xfa, 0x2a, 0x6c, 0x2f, 0xa3, 0x8b,
+ 0x02, 0xbe, 0xdd, 0x38, 0xdb, 0x4f, 0xca, 0x25, 0x43, 0x09, 0x44, 0x79,
+ 0x94, 0xd1, 0x6c, 0x7a, 0xc2, 0x45, 0xaa, 0x53, 0x29, 0x8e, 0xd7, 0x81,
+ 0x74, 0xdd, 0xfa, 0x65, 0x18, 0xd5, 0xc5, 0xae, 0x4f, 0xa8, 0x57, 0xf6,
+ 0x04, 0xf5, 0xcd, 0xd8, 0xa0, 0x26, 0xb4, 0x41, 0xe3, 0x02, 0xc9, 0x95,
+ 0xfe, 0x90, 0x5f, 0xf6, 0xbb, 0x84, 0x11, 0xc0, 0xe6, 0x35, 0xff, 0x03,
+ 0x5f, 0x8c, 0xac, 0x56, 0x1e, 0xec, 0x29, 0xfc, 0x45, 0x97, 0x61, 0x74,
+ 0xa6, 0xed, 0x7c, 0x67, 0x7a, 0xf5, 0xdd, 0x80, 0xaf, 0x42, 0x04, 0x7f,
+ 0x82, 0x46, 0x15, 0x56, 0xea, 0xb1, 0x0a, 0xab, 0x3f, 0x40, 0xa4, 0x47,
+ 0x69, 0xaf, 0x39, 0xce, 0xd1, 0xf6, 0x65, 0x6e, 0xf0, 0x45, 0x5e, 0xfc,
+ 0x51, 0xac, 0xf5, 0xcb, 0xd9, 0xf9, 0x78, 0xd3, 0x25, 0xc7, 0x44, 0xf3,
+ 0x74, 0x85, 0x42, 0x4d, 0xdf, 0xe9, 0xb5, 0xea, 0xdf, 0x8f, 0x3f, 0xc3,
+ 0xaa, 0xbb, 0x2f, 0x53, 0xf3, 0x1f, 0x67, 0xb6, 0x0b, 0x2f, 0xf8, 0xfb,
+ 0x78, 0x1f, 0x6a, 0x27, 0xb8, 0xf2, 0x91, 0xc6, 0x7b, 0x53, 0x08, 0xe1,
+ 0xfc, 0x3d, 0xe3, 0xf5, 0x0d, 0x35, 0x63, 0x89, 0x97, 0x2d, 0x7b, 0x30,
+ 0xcd, 0xb8, 0xb0, 0x9d, 0xb3, 0x1b, 0xea, 0x14, 0x53, 0x5f, 0xeb, 0x63,
+ 0x17, 0x14, 0xeb, 0x1c, 0x5a, 0xce, 0x2c, 0x3a, 0xfd, 0x15, 0x36, 0x17,
+ 0xd1, 0xc5, 0x81, 0x5f, 0x6e, 0x9c, 0x6d, 0xa7, 0xe5, 0x12, 0xa1, 0x84,
+ 0xa2, 0x3c, 0xca, 0x68, 0xb6, 0x3d, 0x61, 0x22, 0xd5, 0x29, 0x94, 0xc7,
+ 0x6b, 0xc0, 0xba, 0x6e, 0xfd, 0x32, 0x8c, 0x6a, 0xe2, 0xd7, 0x27, 0xd4,
+ 0x2b, 0xfb, 0x02, 0x7a, 0xe6, 0xec, 0x50, 0x13, 0x5a, 0x20, 0xf1, 0x81,
+ 0x64, 0xca, 0xff, 0x48, 0x2f, 0xfb, 0x5d, 0xc2, 0x08, 0xe0, 0x73, 0x1a,
+ 0xff, 0x81, 0xaf, 0xc6, 0x56, 0x2b, 0x0f, 0x76, 0x14, 0xfe, 0x22, 0xcb,
+ 0xb0, 0xba, 0x53, 0x76, 0xbe, 0x33, 0xbd, 0x7a, 0xee, 0xc0, 0x57, 0xa1,
+ 0x02, 0x3f, 0xc1, 0x23, 0x0a, 0xab, 0x75, 0x58, 0x85, 0x55, 0x9f, 0xa0,
+ 0x52, 0x23, 0xb4, 0xd7, 0x9c, 0xe7, 0x68, 0xfb, 0x32, 0xb7, 0x78, 0x22,
+ 0xaf, 0x7e, 0x28, 0xd6, 0x7a, 0xe5, 0xec, 0xfc, 0xbc, 0x69, 0x92, 0xe3,
+ 0xa2, 0x79, 0xba, 0x42, 0xa1, 0x26, 0xef, 0xf4, 0xda, 0xf5, 0x6f, 0xc7,
+ 0x9f, 0xe1, 0xd5, 0x5d, 0x97, 0xa9, 0xf9, 0x8f, 0xb3, 0xdb, 0x05, 0x97,
+ 0xfc, 0x7d, 0xbc, 0x0f, 0xb5, 0x13, 0xdc, 0x79, 0x48, 0xe3, 0x3d, 0xa9,
+ 0x84, 0x70, 0xfe, 0x1e, 0xf1, 0xfa, 0x86, 0x9a, 0xb1, 0xc4, 0xcb, 0x96,
+ 0xbd, 0x98, 0x66, 0xdc, 0x58, 0x4e, 0xd9, 0x8d, 0xf5, 0x0a, 0x29, 0xaf,
+ 0xf5, 0xb1, 0x8b, 0x8a, 0x75, 0x8e, 0x2d, 0x67, 0x16, 0x1d, 0x7e, 0x8a,
+ 0x9b, 0x0b, 0xe8, 0xe2, 0xc0, 0xaf, 0xb7, 0x4e, 0x36, 0xd3, 0xf2, 0x89,
+ 0x50, 0xc2, 0x51, 0x1e, 0x65, 0x34, 0x5b, 0x1e, 0xb0, 0x91, 0x6a, 0x94,
+ 0xca, 0x63, 0xb5, 0xe0, 0x5d, 0x37, 0x7e, 0x99, 0x46, 0x35, 0x71, 0x6b,
+ 0x93, 0xea, 0x15, 0xfd, 0x81, 0x3d, 0x73, 0x76, 0x28, 0x09, 0xad, 0x10,
+ 0x78, 0xc0, 0xb2, 0x65, 0x7f, 0xa4, 0x17, 0xfd, 0xae, 0xe1, 0x04, 0x70,
+ 0x39, 0x8d, 0x7f, 0xc0, 0xd7, 0xe3, 0x2b, 0x15, 0x87, 0xbb, 0x0a, 0x7f,
+ 0x11, 0x65, 0xd8, 0x5d, 0x29, 0xbb, 0x5f, 0x19, 0xde, 0xbd, 0x77, 0x60,
+ 0x2b, 0xd0, 0x81, 0x1f, 0xe0, 0x91, 0x85, 0x55, 0xba, 0xac, 0x42, 0xaa,
+ 0xcf, 0xd0, 0x29, 0x11, 0xda, 0x6b, 0xce, 0x73, 0xb4, 0x7d, 0x99, 0x5b,
+ 0xbc, 0x11, 0x57, 0xbf, 0x14, 0x6b, 0x3d, 0x72, 0xf6, 0x7e, 0x5e, 0x34,
+ 0xc9, 0x71, 0xd1, 0x3c, 0xdd, 0x21, 0x50, 0x93, 0x77, 0xfa, 0x6d, 0x7a,
+ 0xb7, 0xe3, 0xcf, 0xf0, 0xea, 0xae, 0xe7, 0x1d, 0xfb, 0x2a, 0x2f, 0x0e,
+ 0xe3, 0xde, 0xf4, 0x54, 0x9c, 0xfa, 0x12, 0x3c, 0xca, 0x68, 0xb6, 0x3d,
+ 0x61, 0x0c, 0x55, 0xfb, 0xcc, 0xaa, 0x10, 0x1f, 0x60, 0x87, 0x11, 0xd9,
+ 0x71, 0x38, 0x60, 0xea, 0x58, 0x62, 0x9b, 0xec, 0x01, 0x07, 0xef, 0x37,
+ 0x25, 0x1f, 0x51, 0xd3, 0xaf, 0x3d, 0xd2, 0xcc, 0x9b, 0x48, 0x2f, 0xfb,
+ 0x5d, 0xc2, 0x08, 0xe0, 0x46, 0xb6, 0x49, 0xe9, 0x98, 0x86, 0xd0, 0xc3,
+ 0xe9, 0x99, 0x69, 0xeb, 0xde, 0x97, 0xf2, 0xcc, 0x66, 0x89, 0x9e, 0x43,
+ 0x9e, 0x0d, 0xa8, 0x5b, 0xac, 0x41, 0x47, 0xd9, 0x0a, 0xa4, 0xe7, 0xd0,
+ 0x91, 0xe6, 0x53, 0x45, 0xb1, 0xeb, 0x08, 0x62, 0xaf, 0xde, 0x65, 0x50,
+ 0x80, 0xfb, 0x04, 0x38, 0x8e, 0xcb, 0x89, 0xc3, 0x07, 0x52, 0xc3, 0x14,
+ 0xdf, 0x60, 0x08, 0x3f, 0x79, 0xb9, 0x28, 0xfa, 0x8e, 0x9d, 0x79, 0xee,
+ 0x96, 0x64, 0xda, 0x41, 0x7f, 0xda, 0xee, 0x10, 0x47, 0x02, 0x35, 0xb2,
+ 0x4f, 0x4c, 0xc4, 0x36, 0x86, 0x1f, 0x4c, 0xcb, 0x4f, 0x5e, 0xf4, 0xbf,
+ 0x96, 0x63, 0x34, 0x4c, 0xf2, 0x1c, 0xf0, 0x6d, 0x42, 0xdd, 0x62, 0x0a,
+ 0x3e, 0xc8, 0x55, 0x27, 0x3e, 0x84, 0x8f, 0x32, 0x9a, 0x2d, 0x8f, 0x58,
+ 0x43, 0x15, 0x7e, 0xf3, 0x2a, 0x84, 0x07, 0xd8, 0x21, 0xc4, 0x76, 0x5c,
+ 0x4e, 0x18, 0x3a, 0x96, 0x18, 0xa6, 0xfb, 0x00, 0x41, 0xfb, 0xcd, 0xc9,
+ 0x47, 0xd4, 0x74, 0xeb, 0xcf, 0x74, 0xb3, 0x26, 0xd2, 0x0b, 0xfe, 0xd7,
+ 0x70, 0x82, 0x38, 0x11, 0xad, 0x92, 0x7a, 0x66, 0x21, 0xb4, 0x30, 0xfa,
+ 0x66, 0x5a, 0x7a, 0xf7, 0xa5, 0xfc, 0xb3, 0x19, 0xa2, 0x67, 0x90, 0xe7,
+ 0x83, 0x6a, 0x16, 0xeb, 0x10, 0x51, 0xf6, 0x42, 0xa9, 0x39, 0xf4, 0x24,
+ 0x79, 0x94, 0xd1, 0x6c, 0x7a, 0xc2, 0x18, 0xab, 0xf7, 0x99, 0x54, 0x20,
+ 0x3e, 0xc1, 0x0e, 0x23, 0xb2, 0xe2, 0x70, 0xc1, 0xd4, 0xb0, 0xc5, 0x37,
+ 0xd8, 0x02, 0x0f, 0xde, 0x6e, 0x4a, 0x3e, 0xa3, 0xa7, 0x5e, 0x7b, 0xa5,
+ 0x99, 0x36, 0x90, 0x5f, 0xf6, 0xbb, 0x84, 0x11, 0xc0, 0x8d, 0x6c, 0x93,
+ 0xd3, 0x31, 0x0d, 0xa1, 0x87, 0xd3, 0x32, 0xd3, 0xd7, 0xbd, 0x2f, 0xe5,
+ 0x98, 0xcd, 0x13, 0x3c, 0x87, 0x3c, 0x1b, 0x50, 0xb7, 0x58, 0x82, 0x8f,
+ 0xb2, 0x15, 0x49, 0xcf, 0xa1, 0x23, 0xcc, 0xa6, 0x8b, 0x63, 0xd6, 0x10,
+ 0xc5, 0x5f, 0xbc, 0xca, 0xa1, 0x01, 0xf6, 0x08, 0x71, 0x1d, 0x97, 0x13,
+ 0x86, 0x0e, 0xa5, 0x86, 0x29, 0xbe, 0xc0, 0x10, 0x7e, 0xf3, 0x72, 0x51,
+ 0xf5, 0x1d, 0x3a, 0xf3, 0xdd, 0x2c, 0xc9, 0xb4, 0x82, 0xff, 0xb5, 0xdc,
+ 0x20, 0x8e, 0x04, 0x6b, 0x64, 0x9e, 0x99, 0x88, 0x6d, 0x0c, 0x3e, 0x99,
+ 0x96, 0x9e, 0xbd, 0xe9, 0x7f, 0x2c, 0xc6, 0x68, 0x99, 0xe4, 0x39, 0xe0,
+ 0xda, 0x85, 0xba, 0xc4, 0x14, 0x7d, 0x90, 0xaa, 0x4e, 0x7d, 0x09, 0x1e,
+ 0x65, 0x34, 0x5b, 0x1e, 0xb0, 0x86, 0x2a, 0xfd, 0xe6, 0x55, 0x08, 0x0f,
+ 0xb0, 0x43, 0x88, 0xec, 0xb8, 0x9c, 0x30, 0x75, 0x2c, 0x31, 0x4d, 0xf6,
+ 0x00, 0x83, 0xf7, 0x9b, 0x92, 0x8f, 0xa8, 0xe9, 0xd7, 0x9e, 0xe9, 0x66,
+ 0x4d, 0xa4, 0x17, 0xfd, 0xae, 0xe1, 0x04, 0x70, 0x23, 0x5b, 0x24, 0xf4,
+ 0xcc, 0x43, 0x68, 0x61, 0xf4, 0xcc, 0xb4, 0xf5, 0xef, 0x4b, 0xf9, 0x66,
+ 0x33, 0x44, 0xcf, 0x21, 0xcf, 0x06, 0xd4, 0x2d, 0xd6, 0x20, 0xa3, 0xec,
+ 0x85, 0x52, 0x73, 0xe8, 0x48, 0xf3, 0x29, 0xa2, 0xd8, 0xf5, 0x84, 0x31,
+ 0x57, 0xef, 0x32, 0xa8, 0x40, 0x7d, 0x82, 0x1c, 0x47, 0x65, 0xc4, 0xe1,
+ 0x83, 0xa9, 0x61, 0x8a, 0x6f, 0xb0, 0x04, 0x1f, 0xbc, 0xdc, 0x94, 0x7d,
+ 0x47, 0x4e, 0xbc, 0xf7, 0x4b, 0x32, 0x6d, 0x20, 0xbf, 0xed, 0x77, 0x08,
+ 0x23, 0x81, 0x1a, 0xd9, 0x27, 0xa6, 0x62, 0x1b, 0x43, 0x0f, 0xa6, 0x65,
+ 0xa7, 0xaf, 0x7a, 0x5f, 0xcb, 0x31, 0x9a, 0x26, 0x79, 0x0e, 0x78, 0x36,
+ 0xa1, 0x6e, 0xb1, 0x05, 0x1f, 0x64, 0x2a, 0x93, 0x9f, 0x42, 0x47, 0x99,
+ 0x4d, 0x16, 0xc7, 0xac, 0x21, 0x8a, 0xbf, 0x79, 0x95, 0x42, 0x03, 0xec,
+ 0x10, 0xe2, 0x3b, 0x2e, 0x27, 0x0c, 0x1d, 0x4b, 0x0c, 0x53, 0x7d, 0x80,
+ 0x20, 0xfd, 0xe6, 0xe4, 0xa3, 0xea, 0x3a, 0x75, 0xe7, 0xba, 0x59, 0x93,
+ 0x69, 0x05, 0xff, 0x6b, 0xb8, 0x41, 0x1c, 0x08, 0xd6, 0xc9, 0x3d, 0x33,
+ 0x10, 0xda, 0x18, 0x7d, 0x33, 0x2d, 0x3d, 0x7b, 0xd2, 0xfe, 0x59, 0x8c,
+ 0xd1, 0x33, 0xc8, 0x73, 0xc1, 0xb5, 0x0b, 0x75, 0x88, 0x28, 0xfb, 0x21,
+ 0x54, 0x9c, 0xfa, 0x12, 0x3c, 0xca, 0x68, 0xb6, 0x3d, 0x61, 0x0c, 0x55,
+ 0xfb, 0xcc, 0xaa, 0x10, 0x1f, 0x60, 0x87, 0x11, 0xd9, 0x71, 0x38, 0x60,
+ 0xea, 0x58, 0x62, 0x9b, 0xec, 0x01, 0x07, 0xef, 0x37, 0x25, 0x1f, 0x51,
+ 0xd3, 0xaf, 0x3d, 0xd2, 0xcc, 0x9b, 0x48, 0x2f, 0xfb, 0x5d, 0xc2, 0x08,
+ 0xe0, 0x46, 0xb6, 0x49, 0xe9, 0x98, 0x86, 0xd0, 0xc3, 0xe9, 0x99, 0x69,
+ 0xeb, 0xde, 0x97, 0xf2, 0xcc, 0x66, 0x89, 0x9e, 0x43, 0x9e, 0x0d, 0xa8,
+ 0x5b, 0xac, 0x41, 0x47, 0xd9, 0x0a, 0xa4, 0xe7, 0xd0, 0x91, 0xe6, 0x53,
+ 0x45, 0xb1, 0xeb, 0x08, 0x62, 0xaf, 0xde, 0x65, 0x50, 0x80, 0xfb, 0x04,
+ 0x38, 0x8e, 0xcb, 0x89, 0xc3, 0x07, 0x52, 0xc3, 0x14, 0xdf, 0x60, 0x08,
+ 0x3f, 0x79, 0xb9, 0x28, 0xfa, 0x8e, 0x9d, 0x79, 0xee, 0x96, 0x64, 0xda,
+ 0x41, 0x7f, 0xda, 0xee, 0x10, 0x47, 0x02, 0x35, 0xb2, 0x4f, 0x4c, 0xc4,
+ 0x36, 0x86, 0x1f, 0x4c, 0xcb, 0x4f, 0x5e, 0xf4, 0xbf, 0x96, 0x63, 0x34,
+ 0x4c, 0xf2, 0x1c, 0xf0, 0x6d, 0x42, 0xdd, 0x62, 0x0a, 0x3e, 0xc8, 0x55,
+ 0x27, 0x3e, 0x84, 0x8f, 0x32, 0x9a, 0x2d, 0x8f, 0x58, 0x43, 0x15, 0x7e,
+ 0xf3, 0x2a, 0x84, 0x07, 0xd8, 0x21, 0xc4, 0x76, 0x5c, 0x4e, 0x18, 0x3a,
+ 0x96, 0x18, 0xa6, 0xfb, 0x00, 0x41, 0xfb, 0xcd, 0xc9, 0x47, 0xd4, 0x74,
+ 0xeb, 0xcf, 0x74, 0xb3, 0x26, 0xd2, 0x0b, 0xfe, 0xd7, 0x70, 0x82, 0x38,
+ 0x11, 0xad, 0x92, 0x7a, 0x66, 0x21, 0xb4, 0x30, 0xfa, 0x66, 0x5a, 0x7a,
+ 0xf7, 0xa5, 0xfc, 0xb3, 0x19, 0xa2, 0x67, 0x90, 0xe7, 0x83, 0x6a, 0x16,
+ 0xeb, 0x10, 0x51, 0xf6, 0x42, 0xa9, 0x39, 0xf4, 0x24, 0x79, 0x94, 0xd1,
+ 0x6c, 0x7a, 0xc2, 0x18, 0xab, 0xf7, 0x99, 0x54, 0x20, 0x3e, 0xc1, 0x0e,
+ 0x23, 0xb2, 0xe2, 0x70, 0xc1, 0xd4, 0xb0, 0xc5, 0x37, 0xd8, 0x02, 0x0f,
+ 0xde, 0x6e, 0x4a, 0x3e, 0xa3, 0xa7, 0x5e, 0x7b, 0xa5, 0x99, 0x36, 0x90,
+ 0x5f, 0xf6, 0xbb, 0x84, 0x11, 0xc0, 0x8d, 0x6c, 0x93, 0xd3, 0x31, 0x0d,
+ 0xa1, 0x87, 0xd3, 0x32, 0xd3, 0xd7, 0xbd, 0x2f, 0xe5, 0x98, 0xcd, 0x13,
+ 0x3c, 0x87, 0x3c, 0x1b, 0x50, 0xb7, 0x58, 0x82, 0x8f, 0xb2, 0x15, 0x49,
+ 0xcf, 0xa1, 0x23, 0xcc, 0xa6, 0x8b, 0x63, 0xd6, 0x10, 0xc5, 0x5f, 0xbc,
+ 0xca, 0xa1, 0x01, 0xf6, 0x08, 0x71, 0x1d, 0x97, 0x13, 0x86, 0x0e, 0xa5,
+ 0x86, 0x29, 0xbe, 0xc0, 0x10, 0x7e, 0xf3, 0x72, 0x51, 0xf5, 0x1d, 0x3a,
+ 0xf3, 0xdd, 0x2c, 0xc9, 0xb4, 0x82, 0xff, 0xb5, 0xdc, 0x20, 0x8e, 0x04,
+ 0x6b, 0x64, 0x9e, 0x99, 0x88, 0x6d, 0x0c, 0x3e, 0x99, 0x96, 0x9e, 0xbd,
+ 0xe9, 0x7f, 0x2c, 0xc6, 0x68, 0x99, 0xe4, 0x39, 0xe0, 0xda, 0x85, 0xba,
+ 0xc4, 0x14, 0x7d, 0x90, 0xaa, 0x4e, 0x7d, 0x09, 0x1e, 0x65, 0x34, 0x5b,
+ 0x1e, 0xb0, 0x86, 0x2a, 0xfd, 0xe6, 0x55, 0x08, 0x0f, 0xb0, 0x43, 0x88,
+ 0xec, 0xb8, 0x9c, 0x30, 0x75, 0x2c, 0x31, 0x4d, 0xf6, 0x00, 0x83, 0xf7,
+ 0x9b, 0x92, 0x8f, 0xa8, 0xe9, 0xd7, 0x9e, 0xe9, 0x66, 0x4d, 0xa4, 0x17,
+ 0xfd, 0xae, 0xe1, 0x04, 0x70, 0x23, 0x5b, 0x24, 0xf4, 0xcc, 0x43, 0x68,
+ 0x61, 0xf4, 0xcc, 0xb4, 0xf5, 0xef, 0x4b, 0xf9, 0x66, 0x33, 0x44, 0xcf,
+ 0x21, 0xcf, 0x06, 0xd4, 0x2d, 0xd6, 0x20, 0xa3, 0xec, 0x85, 0x52, 0x73,
+ 0xe8, 0x48, 0xf3, 0x29, 0xa2, 0xd8, 0xf5, 0x84, 0x31, 0x57, 0xef, 0x32,
+ 0xa8, 0x40, 0x7d, 0x82, 0x1c, 0x47, 0x65, 0xc4, 0xe1, 0x83, 0xa9, 0x61,
+ 0x8a, 0x6f, 0xb0, 0x04, 0x1f, 0xbc, 0xdc, 0x94, 0x7d, 0x47, 0x4e, 0xbc,
+ 0xf7, 0x4b, 0x32, 0x6d, 0x20, 0xbf, 0xed, 0x77, 0x08, 0x23, 0x81, 0x1a,
+ 0xd9, 0x27, 0xa6, 0x62, 0x1b, 0x43, 0x0f, 0xa6, 0x65, 0xa7, 0xaf, 0x7a,
+ 0x5f, 0xcb, 0x31, 0x9a, 0x26, 0x79, 0x0e, 0x78, 0x36, 0xa1, 0x6e, 0xb1,
+ 0x05, 0x1f, 0x64, 0x2a, 0x93, 0x9f, 0x42, 0x47, 0x99, 0x4d, 0x16, 0xc7,
+ 0xac, 0x21, 0x8a, 0xbf, 0x79, 0x95, 0x42, 0x03, 0xec, 0x10, 0xe2, 0x3b,
+ 0x2e, 0x27, 0x0c, 0x1d, 0x4b, 0x0c, 0x53, 0x7d, 0x80, 0x20, 0xfd, 0xe6,
+ 0xe4, 0xa3, 0xea, 0x3a, 0x75, 0xe7, 0xba, 0x59, 0x93, 0x69, 0x05, 0xff,
+ 0x6b, 0xb8, 0x41, 0x1c, 0x08, 0xd6, 0xc9, 0x3d, 0x33, 0x10, 0xda, 0x18,
+ 0x7d, 0x33, 0x2d, 0x3d, 0x7b, 0xd2, 0xfe, 0x59, 0x8c, 0xd1, 0x33, 0xc8,
+ 0x73, 0xc1, 0xb5, 0x0b, 0x75, 0x88, 0x28, 0xfb, 0x21, 0x54, 0x9c, 0xfa,
+ 0x12, 0x3c, 0xca, 0x68, 0xb6, 0x3d, 0x61, 0x0c, 0x55, 0xfb, 0xcc, 0xaa,
+ 0x10, 0x1f, 0x60, 0x87, 0x11, 0xd9, 0x71, 0x38, 0x60, 0xea, 0x58, 0x62,
+ 0x9b, 0xec, 0x01, 0x07, 0xef, 0x37, 0x25, 0x1f, 0x51, 0xd3, 0xaf, 0x3d,
+ 0xd2, 0xcc, 0x9b, 0x48, 0x2f, 0xfb, 0x5d, 0xc2, 0x08, 0xe0, 0x46, 0xb6,
+ 0x49, 0xe9, 0x98, 0x86, 0xd0, 0xc3, 0xe9, 0x99, 0x69, 0xeb, 0xde, 0x97,
+ 0xf2, 0xcc, 0x66, 0x89, 0x9e, 0x43, 0x9e, 0x0d, 0xa8, 0x5b, 0xac, 0x41,
+ 0x47, 0xd9, 0x0a, 0xa4, 0xe7, 0xd0, 0x91, 0xe6, 0x53, 0x45, 0xb1, 0xeb,
+ 0x08, 0x62, 0xaf, 0xde, 0x65, 0x50, 0x80, 0xfb, 0x04, 0x38, 0x8e, 0xcb,
+ 0x89, 0xc3, 0x07, 0x52, 0xc3, 0x14, 0xdf, 0x60, 0x08, 0x3f, 0x79, 0xb9,
+ 0x28, 0xfa, 0x8e, 0x9d, 0x79, 0xee, 0x96, 0x64, 0xda, 0x41, 0x7f, 0xda,
+ 0xee, 0x10, 0x47, 0x02, 0x35, 0xb2, 0x4f, 0x4c, 0xc4, 0x36, 0x86, 0x1f,
+ 0x4c, 0xcb, 0x4f, 0x5e, 0xf4, 0xbf, 0x96, 0x63, 0x34, 0x4c, 0xf2, 0x1c,
+ 0xf0, 0x6d, 0x42, 0xdd, 0x62, 0x0a, 0x3e, 0xc8, 0x55, 0x27, 0x3e, 0x84,
+ 0x8f, 0x32, 0x9a, 0x2d, 0x8f, 0x58, 0x43, 0x15, 0x7e, 0xf3, 0x2a, 0x84,
+ 0x07, 0xd8, 0x21, 0xc4, 0x76, 0x5c, 0x4e, 0x18, 0x3a, 0x96, 0x18, 0xa6,
+ 0xfb, 0x00, 0x41, 0xfb, 0xcd, 0xc9, 0x47, 0xd4, 0x74, 0xeb, 0xcf, 0x74,
+ 0xb3, 0x26, 0xd2, 0x0b, 0xfe, 0xd7, 0x70, 0x82, 0x38, 0x11, 0xad, 0x92,
+ 0x7a, 0x66, 0x21, 0xb4, 0x30, 0xfa, 0x66, 0x5a, 0x7a, 0xf7, 0xa5, 0xfc,
+ 0xb3, 0x19, 0xa2, 0x67, 0x90, 0xe7, 0x83, 0x6a, 0x16, 0xeb, 0x10, 0x51,
+ 0xf6, 0x42, 0xa9, 0x39, 0xf4, 0x24, 0x79, 0x94, 0xd1, 0x6c, 0x7a, 0xc2,
+ 0x18, 0xab, 0xf7, 0x99, 0x54, 0x20, 0x3e, 0xc1, 0x0e, 0x23, 0xb2, 0xe2,
+ 0x70, 0xc1, 0xd4, 0xb0, 0xc5, 0x37, 0xd8, 0x02, 0x0f, 0xde, 0x6e, 0x4a,
+ 0x3e, 0xa3, 0xa7, 0x5e, 0x7b, 0xa5, 0x99, 0x36, 0x90, 0x5f, 0xf6, 0xbb,
+ 0x84, 0x11, 0xc0, 0x8d, 0x6c, 0x93, 0xd3, 0x31, 0x0d, 0xa1, 0x87, 0xd3,
+ 0x32, 0xd3, 0xd7, 0xbd, 0x2f, 0xe5, 0x98, 0xcd, 0x13, 0x3c, 0x87, 0x3c,
+ 0x1b, 0x50, 0xb7, 0x58, 0x82, 0x8f, 0xb2, 0x15, 0x49, 0xcf, 0xa1, 0x23,
+ 0xcc, 0xa6, 0x8b, 0x63, 0xd6, 0x10, 0xc5, 0x5f, 0xbc, 0xca, 0xa1, 0x01,
+ 0xf6, 0x08, 0x71, 0x1d, 0x97, 0x13, 0x86, 0x0e, 0xa5, 0x86, 0x29, 0xbe,
+ 0xc0, 0x10, 0x7e, 0xf3, 0x72, 0x51, 0xf5, 0x1d, 0x3a, 0xf3, 0xdd, 0x2c,
+ 0xc9, 0xb4, 0x82, 0xff, 0xb5, 0xdc, 0x20, 0x8e, 0x04, 0x6b, 0x64, 0x9e,
+ 0x99, 0x88, 0x6d, 0x0c, 0x3e, 0x99, 0x96, 0x9e, 0xbd, 0xe9, 0x7f, 0x2c,
+ 0xc6, 0x68, 0x99, 0xe4, 0x39, 0xe0, 0xda, 0x85, 0xba, 0xc4, 0x14, 0x7d,
+ 0x90, 0xaa, 0x4e, 0x7d, 0x09, 0x1e, 0x65, 0x34, 0x5b, 0x1e, 0xb0, 0x86,
+ 0x2a, 0xfd, 0xe6, 0x55, 0x08, 0x0f, 0xb0, 0x43, 0x88, 0xec, 0xb8, 0x9c,
+ 0x30, 0x75, 0x2c, 0x31, 0x4d, 0xf6, 0x00, 0x83, 0xf7, 0x9b, 0x92, 0x8f,
+ 0xa8, 0xe9, 0xd7, 0x9e, 0xe9, 0x66, 0x4d, 0xa4, 0x17, 0xfd, 0xae, 0xe1,
+ 0x04, 0x70, 0x23, 0x5b, 0x24, 0xf4, 0xcc, 0x43, 0x68, 0x61, 0xf4, 0xcc,
+ 0xb4, 0xf5, 0xef, 0x4b, 0xf9, 0x66, 0x33, 0x44, 0xcf, 0x21, 0xcf, 0x06,
+ 0xd4, 0x2d, 0xd6, 0x20, 0xa3, 0xec, 0x85, 0x52, 0x73, 0xe8, 0x48, 0xf3,
+ 0x29, 0xa2, 0xd8, 0xf5, 0x84, 0x31, 0x57, 0xef, 0x32, 0xa8, 0x40, 0x7d,
+ 0x82, 0x1c, 0x47, 0x65, 0xc4, 0xe1, 0x83, 0xa9, 0x61, 0x8a, 0x6f, 0xb0,
+ 0x04, 0x1f, 0xbc, 0xdc, 0x94, 0x7d, 0x47, 0x4e, 0xbc, 0xf7, 0x4b, 0x32,
+ 0x6d, 0x20, 0xbf, 0xed, 0x77, 0x08, 0x23, 0x81, 0x1a, 0xd9, 0x27, 0xa6,
+ 0x62, 0x1b, 0x43, 0x0f, 0xa6, 0x65, 0xa7, 0xaf, 0x7a, 0x5f, 0xcb, 0x31,
+ 0x9a, 0x26, 0x79, 0x0e, 0x78, 0x36, 0xa1, 0x6e, 0xb1, 0x05, 0x1f, 0x64,
+ 0x2a, 0x93, 0x9f, 0x42, 0x47, 0x99, 0x4d, 0x16, 0xc7, 0xac, 0x21, 0x8a,
+ 0xbf, 0x79, 0x95, 0x42, 0x03, 0xec, 0x10, 0xe2, 0x3b, 0x2e, 0x27, 0x0c,
+ 0x1d, 0x4b, 0x0c, 0x53, 0x7d, 0x80, 0x20, 0xfd, 0xe6, 0xe4, 0xa3, 0xea,
+ 0x3a, 0x75, 0xe7, 0xba, 0x59, 0x93, 0x69, 0x05, 0xff, 0x6b, 0xb8, 0x41,
+ 0x1c, 0x08, 0xd6, 0xc9, 0x3d, 0x33, 0x10, 0xda, 0x18, 0x7d, 0x33, 0x2d,
+ 0x3d, 0x7b, 0xd2, 0xfe, 0x59, 0x8c, 0xd1, 0x33, 0xc8, 0x73, 0xc1, 0xb5,
+ 0x0b, 0x75, 0x88, 0x28, 0xfb, 0x21, 0x54, 0x9c, 0xfa, 0x12, 0x3c, 0xca,
+ 0x68, 0xb6, 0x3d, 0x61, 0x0c, 0x55, 0xfb, 0xcc, 0xaa, 0x10, 0x1f, 0x60,
+ 0x87, 0x11, 0xd9, 0x71, 0x38, 0x60, 0xea, 0x58, 0x62, 0x9b, 0xec, 0x01,
+ 0x07, 0xef, 0x37, 0x25, 0x1f, 0x51, 0xd3, 0xaf, 0x3d, 0xd2, 0xcc, 0x9b,
+ 0x48, 0x2f, 0xfb, 0x5d, 0xc2, 0x08, 0xe0, 0x46, 0xb6, 0x49, 0xe9, 0x98,
+ 0x86, 0xd0, 0xc3, 0xe9, 0x99, 0x69, 0xeb, 0xde, 0x97, 0xf2, 0xcc, 0x66,
+ 0x89, 0x9e, 0x43, 0x9e, 0x0d, 0xa8, 0x5b, 0xac, 0x41, 0x47, 0xd9, 0x0a,
+ 0xa4, 0xe7, 0xd0, 0x91, 0xe6, 0x53, 0x45, 0xb1, 0xeb, 0x08, 0x62, 0xaf,
+ 0xde, 0x65, 0x50, 0x80, 0xfb, 0x04, 0x38, 0x8e, 0xcb, 0x89, 0xc3, 0x07,
+ 0x52, 0xc3, 0x14, 0xdf, 0x60, 0x08, 0x3f, 0x79, 0xb9, 0x28, 0xfa, 0x8e,
+ 0x9d, 0x79, 0xee, 0x96, 0x64, 0xda, 0x41, 0x7f, 0xda, 0xee, 0x10, 0x47,
+ 0x02, 0x35, 0xb2, 0x4f, 0x4c, 0xc4, 0x36, 0x86, 0x1f, 0x4c, 0xcb, 0x4f,
+ 0x5e, 0xf4, 0xbf, 0x96, 0x63, 0x34, 0x4c, 0xf2, 0x1c, 0xf0, 0x6d, 0x42,
+ 0xdd, 0x62, 0x0a, 0x3e, 0xc8, 0x55, 0x27, 0x3e, 0x84, 0x8f, 0x32, 0x9a,
+ 0x2d, 0x8f, 0x58, 0x43, 0x15, 0x7e, 0xf3, 0x2a, 0x84, 0x07, 0xd8, 0x21,
+ 0xc4, 0x76, 0x5c, 0x4e, 0x18, 0x3a, 0x96, 0x18, 0xa6, 0xfb, 0x00, 0x41,
+ 0xfb, 0xcd, 0xc9, 0x47, 0xd4, 0x74, 0xeb, 0xcf, 0x74, 0xb3, 0x26, 0xd2,
+ 0x0b, 0xfe, 0xd7, 0x70, 0x82, 0x38, 0x11, 0xad, 0x92, 0x7a, 0x66, 0x21,
+ 0xb4, 0x30, 0xfa, 0x66, 0x5a, 0x7a, 0xf7, 0xa5, 0xfc, 0xb3, 0x19, 0xa2,
+ 0x67, 0x90, 0xe7, 0x83, 0x6a, 0x16, 0xeb, 0x10, 0x51, 0xf6, 0x42, 0xa9,
+ 0x39, 0xf4, 0x24, 0x79, 0x94, 0xd1, 0x6c, 0x7a, 0xc2, 0x18, 0xab, 0xf7,
+ 0x99, 0x54, 0x20, 0x3e, 0xc1, 0x0e, 0x23, 0xb2, 0xe2, 0x70, 0xc1, 0xd4,
+ 0xb0, 0xc5, 0x37, 0xd8, 0x02, 0x0f, 0xde, 0x6e, 0x4a, 0x3e, 0xa3, 0xa7,
+ 0x5e, 0x7b, 0xa5, 0x99, 0x36, 0x90, 0x5f, 0xf6, 0xbb, 0x84, 0x11, 0xc0,
+ 0x8d, 0x6c, 0x93, 0xd3, 0x31, 0x0d, 0xa1, 0x87, 0xd3, 0x32, 0xd3, 0xd7,
+ 0xbd, 0x2f, 0xe5, 0x98, 0xcd, 0x13, 0x3c, 0x87, 0x3c, 0x1b, 0x50, 0xb7,
+ 0x58, 0x82, 0x8f, 0xb2, 0x15, 0x49, 0xcf, 0xa1, 0x23, 0xcc, 0xa6, 0x8b,
+ 0x63, 0xd6, 0x10, 0xc5, 0x5f, 0xbc, 0xca, 0xa1, 0x01, 0xf6, 0x08, 0x71,
+ 0x1d, 0x97, 0x13, 0x86, 0x0e, 0xa5, 0x86, 0x29, 0xbe, 0xc0, 0x10, 0x7e,
+ 0xf3, 0x72, 0x51, 0xf5, 0x1d, 0x3a, 0xf3, 0xdd, 0x2c, 0xc9, 0xb4, 0x82,
+ 0xff, 0xb5, 0xdc, 0x20, 0x8e, 0x04, 0x6b, 0x64, 0x9e, 0x99, 0x88, 0x6d,
+ 0x0c, 0x3e, 0x99, 0x96, 0x9e, 0xbd, 0xe9, 0x7f, 0x2c, 0xc6, 0x68, 0x99,
+ 0xe4, 0x39, 0xe0, 0xda, 0x85, 0xba, 0xc4, 0x14, 0x7d, 0x90, 0xaa, 0x4e,
+ 0x7d, 0x09, 0x1e, 0x65, 0x34, 0x5b, 0x1e, 0xb0, 0x86, 0x2a, 0xfd, 0xe6,
+ 0x55, 0x08, 0x0f, 0xb0, 0x43, 0x88, 0xec, 0xb8, 0x9c, 0x30, 0x75, 0x2c,
+ 0x31, 0x4d, 0xf6, 0x00, 0x83, 0xf7, 0x9b, 0x92, 0x8f, 0xa8, 0xe9, 0xd7,
+ 0x9e, 0xe9, 0x66, 0x4d, 0xa4, 0x17, 0xfd, 0xae, 0xe1, 0x04, 0x70, 0x23,
+ 0x5b, 0x24, 0xf4, 0xcc, 0x43, 0x68, 0x61, 0xf4, 0xcc, 0xb4, 0xf5, 0xef,
+ 0x4b, 0xf9, 0x66, 0x33, 0x44, 0xcf, 0x21, 0xcf, 0x06, 0xd4, 0x2d, 0xd6,
+ 0x20, 0xa3, 0xec, 0x85, 0x52, 0x73, 0xe8, 0x48, 0xf3, 0x29, 0xa2, 0xd8,
+ 0xf5, 0x84, 0x31, 0x57, 0xef, 0x32, 0xa8, 0x40, 0x7d, 0x82, 0x1c, 0x47,
+ 0x65, 0xc4, 0xe1, 0x83, 0xa9, 0x61, 0x8a, 0x6f, 0xb0, 0x04, 0x1f, 0xbc,
+ 0xdc, 0x94, 0x7d, 0x47, 0x4e, 0xbc, 0xf7, 0x4b, 0x32, 0x6d, 0x20, 0xbf,
+ 0xed, 0x77, 0x08, 0x23, 0x81, 0x1a, 0xd9, 0x27, 0xa6, 0x62, 0x1b, 0x43,
+ 0x0f, 0xa6, 0x65, 0xa7, 0xaf, 0x7a, 0x5f, 0xcb, 0x31, 0x9a, 0x26, 0x79,
+ 0x0e, 0x78, 0x36, 0xa1, 0x6e, 0xb1, 0x05, 0x1f, 0x64, 0x2a, 0x93, 0x9f,
+ 0x42, 0x47, 0x99, 0x4d, 0x16, 0xc7, 0xac, 0x21, 0x8a, 0xbf, 0x79, 0x95,
+ 0x42, 0x03, 0xec, 0x10, 0xe2, 0x3b, 0x2e, 0x27, 0x0c, 0x1d, 0x4b, 0x0c,
+ 0x53, 0x7d, 0x80, 0x20, 0xfd, 0xe6, 0xe4, 0xa3, 0xea, 0x3a, 0x75, 0xe7,
+ 0xba, 0x59, 0x93, 0x69, 0x05, 0xff, 0x6b, 0xb8, 0x41, 0x1c, 0x08, 0xd6,
+ 0xc9, 0x3d, 0x33, 0x10, 0xda, 0x18, 0x7d, 0x33, 0x2d, 0x3d, 0x7b, 0xd2,
+ 0xfe, 0x59, 0x8c, 0xd1, 0x33, 0xc8, 0x73, 0xc1, 0xb5, 0x0b, 0x75, 0x88,
+ 0x28, 0xfb, 0x21, 0x54, 0x9c, 0xfa, 0x12, 0x3c, 0xca, 0x68, 0xb6, 0x3d,
+ 0x61, 0x0c, 0x55, 0xfb, 0xcc, 0xaa, 0x10, 0x1f, 0x60, 0x87, 0x11, 0xd9,
+ 0x71, 0x38, 0x60, 0xea, 0x58, 0x62, 0x9b, 0xec, 0x01, 0x07, 0xef, 0x37,
+ 0x25, 0x1f, 0x51, 0xd3, 0xaf, 0x3d, 0xd2, 0xcc, 0x9b, 0x48, 0x2f, 0xfb,
+ 0x5d, 0xc2, 0x08, 0xe0, 0x46, 0xb6, 0x49, 0xe9, 0x98, 0x86, 0xd0, 0xc3,
+ 0xe9, 0x99, 0x69, 0xeb, 0xde, 0x97, 0xf2, 0xcc, 0x66, 0x89, 0x9e, 0x43,
+ 0x9e, 0x0d, 0xa8, 0x5b, 0xac, 0x41, 0x47, 0xd9, 0x0a, 0xa4, 0xe7, 0xd0,
+ 0x91, 0xe6, 0x53, 0x45, 0xb1, 0xeb, 0x08, 0x62, 0xaf, 0xde, 0x65, 0x50,
+ 0x80, 0xfb, 0x04, 0x38, 0x8e, 0xcb, 0x89, 0xc3, 0x07, 0x52, 0xc3, 0x14,
+ 0xdf, 0x60, 0x08, 0x3f, 0x79, 0xb9, 0x28, 0xfa, 0x8e, 0x9d, 0x79, 0xee,
+ 0x96, 0x64, 0xda, 0x41, 0x7f, 0xda, 0xee, 0x10, 0x47, 0x02, 0x35, 0xb2,
+ 0x4f, 0x4c, 0xc4, 0x36, 0x86, 0x1f, 0x4c, 0xcb, 0x4f, 0x5e, 0xf4, 0xbf,
+ 0x96, 0x63, 0x34, 0x4c, 0xf2, 0x1c, 0xf0, 0x6d, 0x42, 0xdd, 0x62, 0x0a,
+ 0x3e, 0xc8, 0x55, 0x27, 0x3e, 0x84, 0x8f, 0x32, 0x9a, 0x2d, 0x8f, 0x58,
+ 0x43, 0x15, 0x7e, 0xf3, 0x2a, 0x84, 0x07, 0xd8, 0x21, 0xc4, 0x76, 0x5c,
+ 0x4e, 0x18, 0x3a, 0x96, 0x18, 0xa6, 0xfb, 0x00, 0x41, 0xfb, 0xcd, 0xc9,
+ 0x47, 0xd4, 0x74, 0xeb, 0xcf, 0x74, 0xb3, 0x26, 0xd2, 0x0b, 0xfe, 0xd7,
+ 0x70, 0x82, 0x38, 0x11, 0xad, 0x92, 0x7a, 0x66, 0x21, 0xb4, 0x30, 0xfa,
+ 0x66, 0x5a, 0x7a, 0xf7, 0xa5, 0xfc, 0xb3, 0x19, 0xa2, 0x67, 0x90, 0xe7,
+ 0x83, 0x6a, 0x16, 0xeb, 0x10, 0x51, 0xf6, 0x42, 0xa9, 0x39, 0xf4, 0x24,
+ 0x79, 0x94, 0xd1, 0x6c, 0x7a, 0xc2, 0x18, 0xab, 0xf7, 0x99, 0x54, 0x20,
+ 0x3e, 0xc1, 0x0e, 0x23, 0xb2, 0xe2, 0x70, 0xc1, 0xd4, 0xb0, 0xc5, 0x37,
+ 0xd8, 0x02, 0x0f, 0xde, 0x6e, 0x4a, 0x3e, 0xa3, 0xa7, 0x5e, 0x7b, 0xa5,
+ 0x99, 0x36, 0x90, 0x5f, 0xf6, 0xbb, 0x84, 0x11, 0xc0, 0x8d, 0x6c, 0x93,
+ 0xd3, 0x31, 0x0d, 0xa1, 0x87, 0xd3, 0x32, 0xd3, 0xd7, 0xbd, 0x2f, 0xe5,
+ 0x98, 0xcd, 0x13, 0x3c, 0x87, 0x3c, 0x1b, 0x50, 0xb7, 0x58, 0x82, 0x8f,
+ 0xb2, 0x15, 0x49, 0xcf, 0xa1, 0x23, 0xcc, 0xa6, 0x8b, 0x63, 0xd6, 0x10,
+ 0xc5, 0x5f, 0xbc, 0xca, 0xa1, 0x01, 0xf6, 0x08, 0x71, 0x1d, 0x97, 0x13,
+ 0x86, 0x0e, 0xa5, 0x86, 0x29, 0xbe, 0xc0, 0x10, 0x7e, 0xf3, 0x72, 0x51,
+ 0xf5, 0x1d, 0x3a, 0xf3, 0xdd, 0x2c, 0xc9, 0xb4, 0x82, 0xff, 0xb5, 0xdc,
+ 0x20, 0x8e, 0x04, 0x6b, 0x64, 0x9e, 0x99, 0x88, 0x6d, 0x0c, 0x3e, 0x99,
+ 0x96, 0x9e, 0xbd, 0xe9, 0x7f, 0x2c, 0xc6, 0x68, 0x99, 0xe4, 0x39, 0xe0,
+ 0xda, 0x85, 0xba, 0xc4, 0x14, 0x7d, 0x90, 0xaa, 0x4e, 0x7d, 0x09, 0x1e,
+ 0x65, 0x34, 0x5b, 0x1e, 0xb0, 0x86, 0x2a, 0xfd, 0xe6, 0x55, 0x08, 0x0f,
+ 0xb0, 0x43, 0x88, 0xec, 0xb8, 0x9c, 0x30, 0x75, 0x2c, 0x31, 0x4d, 0xf6,
+ 0x00, 0x83, 0xf7, 0x9b, 0x92, 0x8f, 0xa8, 0xe9, 0xd7, 0x9e, 0xe9, 0x66,
+ 0x4d, 0xa4, 0x17, 0xfd, 0xae, 0xe1, 0x04, 0x70, 0x23, 0x5b, 0x24, 0xf4,
+ 0xcc, 0x43, 0x68, 0x61, 0xf4, 0xcc, 0xb4, 0xf5, 0xef, 0x4b, 0xf9, 0x66,
+ 0x33, 0x44, 0xcf, 0x21, 0xcf, 0x06, 0xd4, 0x2d, 0xd6, 0x20, 0xa3, 0xec,
+ 0x85, 0x52, 0x73, 0xe8, 0x48, 0xf3, 0x29, 0xa2, 0xd8, 0xf5, 0x84, 0x31,
+ 0x57, 0xef, 0x32, 0xa8, 0x40, 0x7d, 0x82, 0x1c, 0x47, 0x65, 0xc4, 0xe1,
+ 0x83, 0xa9, 0x61, 0x8a, 0x6f, 0xb0, 0x04, 0x1f, 0xbc, 0xdc, 0x94, 0x7d,
+ 0x47, 0x4e, 0xbc, 0xf7, 0x4b, 0x32, 0x6d, 0x20, 0xbf, 0xed, 0x77, 0x08,
+ 0x23, 0x81, 0x1a, 0xd9, 0x27, 0xa6, 0x62, 0x1b, 0x43, 0x0f, 0xa6, 0x65,
+ 0xa7, 0xaf, 0x7a, 0x5f, 0xcb, 0x31, 0x9a, 0x26, 0x79, 0x0e, 0x78, 0x36,
+ 0xa1, 0x6e, 0xb1, 0x05, 0x1f, 0x64, 0x2a, 0x93, 0x9f, 0x42, 0x47, 0x99,
+ 0x4d, 0x16, 0xc7, 0xac, 0x21, 0x8a, 0xbf, 0x79, 0x95, 0x42, 0x03, 0xec,
+ 0x10, 0xe2, 0x3b, 0x2e, 0x27, 0x0c, 0x1d, 0x4b, 0x0c, 0x53, 0x7d, 0x80,
+ 0x20, 0xfd, 0xe6, 0xe4, 0xa3, 0xea, 0x3a, 0x75, 0xe7, 0xba, 0x59, 0x93,
+ 0x69, 0x05, 0xff, 0x6b, 0xb8, 0x41, 0x1c, 0x08, 0xd6, 0xc9, 0x3d, 0x33,
+ 0x10, 0xda, 0x18, 0x7d, 0x33, 0x2d, 0x3d, 0x7b, 0xd2, 0xfe, 0x59, 0x8c,
+ 0xd1, 0x33, 0xc8, 0x73, 0xc1, 0xb5, 0x0b, 0x75, 0x88, 0x28, 0xfb, 0x21,
+ 0x54, 0x9c, 0xfa, 0x12, 0x3c, 0xca, 0x68, 0xb6, 0x3d, 0x61, 0x0c, 0x55,
+ 0xfb, 0xcc, 0xaa, 0x10, 0x1f, 0x60, 0x87, 0x11, 0xd9, 0x71, 0x38, 0x60,
+ 0xea, 0x58, 0x62, 0x9b, 0xec, 0x01, 0x07, 0xef, 0x37, 0x25, 0x1f, 0x51,
+ 0xd3, 0xaf, 0x3d, 0xd2, 0xcc, 0x9b, 0x48, 0x2f, 0xfb, 0x5d, 0xc2, 0x08,
+ 0xe0, 0x46, 0xb6, 0x49, 0xe9, 0x98, 0x86, 0xd0, 0xc3, 0xe9, 0x99, 0x69,
+ 0xeb, 0xde, 0x97, 0xf2, 0xcc, 0x66, 0x89, 0x9e, 0x43, 0x9e, 0x0d, 0xa8,
+ 0x5b, 0xac, 0x41, 0x47, 0xd9, 0x0a, 0xa4, 0xe7, 0xd0, 0x91, 0xe6, 0x53,
+ 0x45, 0xb1, 0xeb, 0x08, 0x62, 0xaf, 0xde, 0x65, 0x50, 0x80, 0xfb, 0x04,
+ 0x38, 0x8e, 0xcb, 0x89, 0xc3, 0x07, 0x52, 0xc3, 0x14, 0xdf, 0x60, 0x08,
+ 0x3f, 0x79, 0xb9, 0x28, 0xfa, 0x8e, 0x9d, 0x79, 0xee, 0x96, 0x64, 0xda,
+ 0x41, 0x7f, 0xda, 0xee, 0x10, 0x47, 0x02, 0x35, 0xb2, 0x4f, 0x4c, 0xc4,
+ 0x36, 0x86, 0x1f, 0x4c, 0xcb, 0x4f, 0x5e, 0xf4, 0xbf, 0x96, 0x63, 0x34,
+ 0x4c, 0xf2, 0x1c, 0xf0, 0x6d, 0x42, 0xdd, 0x62, 0x0a, 0x3e, 0xc8, 0x55,
+ 0x27, 0x3e, 0x84, 0x8f, 0x32, 0x9a, 0x2d, 0x8f, 0x58, 0x43, 0x15, 0x7e,
+ 0xf3, 0x2a, 0x84, 0x07, 0xd8, 0x21, 0xc4, 0x76, 0x5c, 0x4e, 0x18, 0x3a,
+ 0x96, 0x18, 0xa6, 0xfb, 0x00, 0x41, 0xfb, 0xcd, 0xc9, 0x47, 0xd4, 0x74,
+ 0xeb, 0xcf, 0x74, 0xb3, 0x26, 0xd2, 0x0b, 0xfe, 0xd7, 0x70, 0x82, 0x38,
+ 0x11, 0xad, 0x92, 0x7a, 0x66, 0x21, 0xb4, 0x30, 0xfa, 0x66, 0x5a, 0x7a,
+ 0xf7, 0xa5, 0xfc, 0xb3, 0x19, 0xa2, 0x67, 0x90, 0xe7, 0x83, 0x6a, 0x16,
+ 0xeb, 0x10, 0x51, 0xf6, 0x42, 0xa9, 0x39, 0xf4, 0x24, 0x79, 0x94, 0xd1,
+ 0x6c, 0x7a, 0xc2, 0x18, 0xab, 0xf7, 0x99, 0x54, 0x20, 0x3e, 0xc1, 0x0e,
+ 0x23, 0xb2, 0xe2, 0x70, 0xc1, 0xd4, 0xb0, 0xc5, 0x37, 0xd8, 0x02, 0x0f,
+ 0xde, 0x6e, 0x4a, 0x3e, 0xa3, 0xa7, 0x5e, 0x7b, 0xa5, 0x99, 0x36, 0x90,
+ 0x5f, 0xf6, 0xbb, 0x84, 0x11, 0xc0, 0x8d, 0x6c, 0x93, 0xd3, 0x31, 0x0d,
+ 0xa1, 0x87, 0xd3, 0x32, 0xd3, 0xd7, 0xbd, 0x2f, 0xe5, 0x98, 0xcd, 0x13,
+ 0x3c, 0x87, 0x3c, 0x1b, 0x50, 0xb7, 0x58, 0x82, 0x8f, 0xb2, 0x15, 0x49,
+ 0xcf, 0xa1, 0x23, 0xcc, 0xa6, 0x8b, 0x63, 0xd6, 0x10, 0xc5, 0x5f, 0xbc,
+ 0xca, 0xa1, 0x01, 0xf6, 0x08, 0x71, 0x1d, 0x97, 0x13, 0x86, 0x0e, 0xa5,
+ 0x86, 0x29, 0xbe, 0xc0, 0x10, 0x7e, 0xf3, 0x72, 0x51, 0xf5, 0x1d, 0x3a,
+ 0xf3, 0xdd, 0x2c, 0xc9, 0xb4, 0x82, 0xff, 0xb5, 0xdc, 0x20, 0x8e, 0x04,
+ 0x6b, 0x64, 0x9e, 0x99, 0x88, 0x6d, 0x0c, 0x3e, 0x99, 0x96, 0x9e, 0xbd,
+ 0xe9, 0x7f, 0x2c, 0xc6, 0x68, 0x99, 0xe4, 0x39, 0xe0, 0xda, 0x85, 0xba,
+ 0xc4, 0x14, 0x7d, 0x90, 0xaa, 0x4e, 0x7d, 0x09, 0x1e, 0x65, 0x34, 0x5b,
+ 0x1e, 0xb0, 0x86, 0x2a, 0xfd, 0xe6, 0x55, 0x08, 0x0f, 0xb0, 0x43, 0x88,
+ 0xec, 0xb8, 0x9c, 0x30, 0x75, 0x2c, 0x31, 0x4d, 0xf6, 0x00, 0x83, 0xf7,
+ 0x9b, 0x92, 0x8f, 0xa8, 0xe9, 0xd7, 0x9e, 0xe9, 0x66, 0x4d, 0xa4, 0x17,
+ 0xfd, 0xae, 0xe1, 0x04, 0x70, 0x23, 0x5b, 0x24, 0xf4, 0xcc, 0x43, 0x68,
+ 0x61, 0xf4, 0xcc, 0xb4, 0xf5, 0xef, 0x4b, 0xf9, 0x66, 0x33, 0x44, 0xcf,
+ 0x21, 0xcf, 0x06, 0xd4, 0x2d, 0xd6, 0x20, 0xa3, 0xec, 0x85, 0x52, 0x73,
+ 0xe8, 0x48, 0xf3, 0x29, 0xa2, 0xd8, 0xf5, 0x84, 0x31, 0x57, 0xef, 0x32,
+ 0xa8, 0x40, 0x7d, 0x82, 0x1c, 0x47, 0x65, 0xc4, 0xe1, 0x83, 0xa9, 0x61,
+ 0x8a, 0x6f, 0xb0, 0x04, 0x1f, 0xbc, 0xdc, 0x94, 0x7d, 0x47, 0x4e, 0xbc,
+ 0xf7, 0x4b, 0x32, 0x6d, 0x20, 0xbf, 0xed, 0x77, 0x08, 0x23, 0x81, 0x1a,
+ 0xd9, 0x27, 0xa6, 0x62, 0x1b, 0x43, 0x0f, 0xa6, 0x65, 0xa7, 0xaf, 0x7a,
+ 0x5f, 0xcb, 0x31, 0x9a, 0x26, 0x79, 0x0e, 0x78, 0x36, 0xa1, 0x6e, 0xb1,
+ 0x05, 0x1f, 0x64, 0x2a, 0x93, 0x9f, 0x42, 0x47, 0x99, 0x4d, 0x16, 0xc7,
+ 0xac, 0x21, 0x8a, 0xbf, 0x79, 0x95, 0x42, 0x03, 0xec, 0x10, 0xe2, 0x3b,
+ 0x2e, 0x27, 0x0c, 0x1d, 0x4b, 0x0c, 0x53, 0x7d, 0x80, 0x20, 0xfd, 0xe6,
+ 0xe4, 0xa3, 0xea, 0x3a, 0x75, 0xe7, 0xba, 0x59, 0x93, 0x69, 0x05, 0xff,
+ 0x6b, 0xb8, 0x41, 0x1c, 0x08, 0xd6, 0xc9, 0x3d, 0x33, 0x10, 0xda, 0x18,
+ 0x7d, 0x33, 0x2d, 0x3d, 0x7b, 0xd2, 0xfe, 0x59, 0x8c, 0xd1, 0x33, 0xc8,
+ 0x73, 0xc1, 0xb5, 0x0b, 0x75, 0x88, 0x28, 0xfb, 0x21, 0x54, 0x9c, 0xfa,
+ 0x12, 0x3c, 0xca, 0x68, 0xb6, 0x3d, 0x61, 0x0c, 0x55, 0xfb, 0xcc, 0xaa,
+ 0x10, 0x1f, 0x60, 0x87, 0x11, 0xd9, 0x71, 0x38, 0x60, 0xea, 0x58, 0x62,
+ 0x9b, 0xec, 0x01, 0x07, 0xef, 0x37, 0x25, 0x1f, 0x51, 0xd3, 0xaf, 0x3d,
+ 0xd2, 0xcc, 0x9b, 0x48, 0x2f, 0xfb, 0x5d, 0xc2, 0x08, 0xe0, 0x46, 0xb6,
+ 0x49, 0xe9, 0x98, 0x86, 0xd0, 0xc3, 0xe9, 0x99, 0x69, 0xeb, 0xde, 0x97,
+ 0xf2, 0xcc, 0x66, 0x89, 0x9e, 0x43, 0x9e, 0x0d, 0xa8, 0x5b, 0xac, 0x41,
+ 0x47, 0xd9, 0x0a, 0xa4, 0xe7, 0xd0, 0x91, 0xe6, 0x53, 0x45, 0xb1, 0xeb,
+ 0x08, 0x62, 0xaf, 0xde, 0x65, 0x50, 0x80, 0xfb, 0x04, 0x38, 0x8e, 0xcb,
+ 0x89, 0xc3, 0x07, 0x52, 0xc3, 0x14, 0xdf, 0x60, 0x08, 0x3f, 0x79, 0xb9,
+ 0x28, 0xfa, 0x8e, 0x9d, 0x79, 0xee, 0x96, 0x64, 0xda, 0x41, 0x7f, 0xda,
+ 0xee, 0x10, 0x47, 0x02, 0x35, 0xb2, 0x4f, 0x4c, 0xc4, 0x36, 0x86, 0x1f,
+ 0x4c, 0xcb, 0x4f, 0x5e, 0xf4, 0xbf, 0x96, 0x63, 0x34, 0x4c, 0xf2, 0x1c,
+ 0xf0, 0x6d, 0x42, 0xdd, 0x62, 0x0a, 0x3e, 0xc8, 0x55, 0x27, 0x3e, 0x84,
+ 0x8f, 0x32, 0x9a, 0x2d, 0x8f, 0x58, 0x43, 0x15, 0x7e, 0xf3, 0x2a, 0x84,
+ 0x07, 0xd8, 0x21, 0xc4, 0x76, 0x5c, 0x4e, 0x18, 0x3a, 0x96, 0x18, 0xa6,
+ 0xfb, 0x00, 0x41, 0xfb, 0xcd, 0xc9, 0x47, 0xd4, 0x74, 0xeb, 0xcf, 0x74,
+ 0xb3, 0x26, 0xd2, 0x0b, 0xfe, 0xd7, 0x70, 0x82, 0x38, 0x11, 0xad, 0x92,
+ 0x7a, 0x66, 0x21, 0xb4, 0x30, 0xfa, 0x66, 0x5a, 0x7a, 0xf7, 0xa5, 0xfc,
+ 0xb3, 0x19, 0xa2, 0x67, 0x90, 0xe7, 0x83, 0x6a, 0x16, 0xeb, 0x10, 0x51,
+ 0xf6, 0x42, 0xa9, 0x39, 0xf4, 0x24, 0x79, 0x94, 0xd1, 0x6c, 0x7a, 0xc2,
+ 0x18, 0xab, 0xf7, 0x99, 0x54, 0x20, 0x3e, 0xc1, 0x0e, 0x23, 0xb2, 0xe2,
+ 0x70, 0xc1, 0xd4, 0xb0, 0xc5, 0x37, 0xd8, 0x02, 0x0f, 0xde, 0x6e, 0x4a,
+ 0x3e, 0xa3, 0xa7, 0x5e, 0x7b, 0xa5, 0x99, 0x36, 0x90, 0x5f, 0xf6, 0xbb,
+ 0x84, 0x11, 0xc0, 0x8d, 0x6c, 0x93, 0xd3, 0x31, 0x0d, 0xa1, 0x87, 0xd3,
+ 0x32, 0xd3, 0xd7, 0xbd, 0x2f, 0xe5, 0x98, 0xcd, 0x13, 0x3c, 0x87, 0x3c,
+ 0x1b, 0x50, 0xb7, 0x58, 0x82, 0x8f, 0xb2, 0x15, 0x49, 0xcf, 0xa1, 0x23,
+ 0xcc, 0xa6, 0x8b, 0x63, 0xd6, 0x10, 0xc5, 0x5f, 0xbc, 0xca, 0xa1, 0x01,
+ 0xf6, 0x08, 0x71, 0x1d, 0x97, 0x13, 0x86, 0x0e, 0xa5, 0x86, 0x29, 0xbe,
+ 0xc0, 0x10, 0x7e, 0xf3, 0x72, 0x51, 0xf5, 0x1d, 0x3a, 0xf3, 0xdd, 0x2c,
+ 0xc9, 0xb4, 0x82, 0xff, 0xb5, 0xdc, 0x20, 0x8e, 0x04, 0x6b, 0x64, 0x9e,
+ 0x99, 0x88, 0x6d, 0x0c, 0x3e, 0x99, 0x96, 0x9e, 0xbd, 0xe9, 0x7f, 0x2c,
+ 0xc6, 0x68, 0x99, 0xe4, 0x39, 0xe0, 0xda, 0x85, 0xba, 0xc4, 0x14, 0x7d,
+ 0x90, 0xaa, 0x4e, 0x7d, 0x09, 0x1e, 0x65, 0x34, 0x5b, 0x1e, 0xb0, 0x86,
+ 0x2a, 0xfd, 0xe6, 0x55, 0x08, 0x0f, 0xb0, 0x43, 0x88, 0xec, 0xb8, 0x9c,
+ 0x30, 0x75, 0x2c, 0x31, 0x4d, 0xf6, 0x00, 0x83, 0xf7, 0x9b, 0x92, 0x8f,
+ 0xa8, 0xe9, 0xd7, 0x9e, 0xe9, 0x66, 0x4d, 0xa4, 0x17, 0xfd, 0xae, 0xe1,
+ 0x04, 0x70, 0x23, 0x5b, 0x24, 0xf4, 0xcc, 0x43, 0x68, 0x61, 0xf4, 0xcc,
+ 0xb4, 0xf5, 0xef, 0x4b, 0xf9, 0x66, 0x33, 0x44, 0xcf, 0x21, 0xcf, 0x06,
+ 0xd4, 0x2d, 0xd6, 0x20, 0xa3, 0xec, 0x85, 0x52, 0x73, 0xe8, 0x48, 0xf3,
+ 0x29, 0xa2, 0xd8, 0xf5, 0x84, 0x31, 0x57, 0xef, 0x32, 0xa8, 0x40, 0x7d,
+ 0x82, 0x1c, 0x47, 0x65, 0xc4, 0xe1, 0x83, 0xa9, 0x61, 0x8a, 0x6f, 0xb0,
+ 0x04, 0x1f, 0xbc, 0xdc, 0x94, 0x7d, 0x47, 0x4e, 0xbc, 0xf7, 0x4b, 0x32,
+ 0x6d, 0x20, 0xbf, 0xed, 0x77, 0x08, 0x23, 0x81, 0x1a, 0xd9, 0x27, 0xa6,
+ 0x62, 0x1b, 0x43, 0x0f, 0xa6, 0x65, 0xa7, 0xaf, 0x7a, 0x5f, 0xcb, 0x31,
+ 0x9a, 0x26, 0x79, 0x0e, 0x78, 0x36, 0xa1, 0x6e, 0xb1, 0x05, 0x1f, 0x64,
+ 0x2a, 0x93, 0x9f, 0x42, 0x47, 0x99, 0x4d, 0x16, 0xc7, 0xac, 0x21, 0x8a,
+ 0xbf, 0x79, 0x95, 0x42, 0x03, 0xec, 0x10, 0xe2, 0x3b, 0x2e, 0x27, 0x0c,
+ 0x1d, 0x4b, 0x0c, 0x53, 0x7d, 0x80, 0x20, 0xfd, 0xe6, 0xe4, 0xa3, 0xea,
+ 0x3a, 0x75, 0xe7, 0xba, 0x59, 0x93, 0x69, 0x05, 0xff, 0x6b, 0xb8, 0x41,
+ 0x1c, 0x08, 0xd6, 0xc9, 0x3d, 0x33, 0x10, 0xda, 0x18, 0x7d, 0x33, 0x2d,
+ 0x3d, 0x7b, 0xd2, 0xfe, 0x59, 0x8c, 0xd1, 0x33, 0xc8, 0x73, 0xc1, 0xb5,
+ 0x0b, 0x75, 0x88, 0x28, 0xfb, 0x21, 0x54, 0x9c, 0xfa, 0x12, 0x3c, 0xca,
+ 0x68, 0xb6, 0x3d, 0x61, 0x0c, 0x55, 0xfb, 0xcc, 0xaa, 0x10, 0x1f, 0x60,
+ 0x87, 0x11, 0xd9, 0x71, 0x38, 0x60, 0xea, 0x58, 0x62, 0x9b, 0xec, 0x01,
+ 0x07, 0xef, 0x37, 0x25, 0x1f, 0x51, 0xd3, 0xaf, 0x3d, 0xd2, 0xcc, 0x9b,
+ 0x48, 0x2f, 0xfb, 0x5d, 0xc2, 0x08, 0xe0, 0x46, 0xb6, 0x49, 0xe9, 0x98,
+ 0x86, 0xd0, 0xc3, 0xe9, 0x99, 0x69, 0xeb, 0xde, 0x97, 0xf2, 0xcc, 0x66,
+ 0x89, 0x9e, 0x43, 0x9e, 0x0d, 0xa8, 0x5b, 0xac, 0x41, 0x47, 0xd9, 0x0a,
+ 0xa4, 0xe7, 0xd0, 0x91, 0xe6, 0x53, 0x45, 0xb1, 0xeb, 0x08, 0x62, 0xaf,
+ 0xde, 0x65, 0x50, 0x80, 0xfb, 0x04, 0x38, 0x8e, 0xcb, 0x89, 0xc3, 0x07,
+ 0x52, 0xc3, 0x14, 0xdf, 0x60, 0x08, 0x3f, 0x79, 0xb9, 0x28, 0xfa, 0x8e,
+ 0x9d, 0x79, 0xee, 0x96, 0x64, 0xda, 0x41, 0x7f, 0xda, 0xee, 0x10, 0x47,
+ 0x02, 0x35, 0xb2, 0x4f, 0x4c, 0xc4, 0x36, 0x86, 0x1f, 0x4c, 0xcb, 0x4f,
+ 0x5e, 0xf4, 0xbf, 0x96, 0x63, 0x34, 0x4c, 0xf2, 0x1c, 0xf0, 0x6d, 0x42,
+ 0xdd, 0x62, 0x0a, 0x3e, 0xc8, 0x55, 0x27, 0x3e, 0x84, 0x8f, 0x32, 0x9a,
+ 0x2d, 0x8f, 0x58, 0x43, 0x15, 0x7e, 0xf3, 0x2a, 0x84, 0x07, 0xd8, 0x21,
+ 0xc4, 0x76, 0x5c, 0x4e, 0x18, 0x3a, 0x96, 0x18, 0xa6, 0xfb, 0x00, 0x41,
+ 0xfb, 0xcd, 0xc9, 0x47, 0xd4, 0x74, 0xeb, 0xcf, 0x74, 0xb3, 0x26, 0xd2,
+ 0x0b, 0xfe, 0xd7, 0x70, 0x82, 0x38, 0x11, 0xad, 0x92, 0x7a, 0x66, 0x21,
+ 0xb4, 0x30, 0xfa, 0x66, 0x5a, 0x7a, 0xf7, 0xa5, 0xfc, 0xb3, 0x19, 0xa2,
+ 0x67, 0x90, 0xe7, 0x83, 0x6a, 0x16, 0xeb, 0x10, 0x51, 0xf6, 0x42, 0xa9,
+ 0x39, 0xf4, 0x24, 0x79, 0x94, 0xd1, 0x6c, 0x7a, 0xc2, 0x18, 0xab, 0xf7,
+ 0x99, 0x54, 0x20, 0x3e, 0xc1, 0x0e, 0x23, 0xb2, 0xe2, 0x70, 0xc1, 0xd4,
+ 0xb0, 0xc5, 0x37, 0xd8, 0x02, 0x0f, 0xde, 0x6e, 0x4a, 0x3e, 0xa3, 0xa7,
+ 0x5e, 0x7b, 0xa5, 0x99, 0x36, 0x90, 0x5f, 0xf6, 0xbb, 0x84, 0x11, 0xc0,
+ 0x8d, 0x6c, 0x93, 0xd3, 0x31, 0x0d, 0xa1, 0x87, 0xd3, 0x32, 0xd3, 0xd7,
+ 0xbd, 0x2f, 0xe5, 0x98, 0xcd, 0x13, 0x3c, 0x87, 0x3c, 0x1b, 0x50, 0xb7,
+ 0x58, 0x82, 0x8f, 0xb2, 0x15, 0x49, 0xcf, 0xa1, 0x23, 0xcc, 0xa6, 0x8b,
+ 0x63, 0xd6, 0x10, 0xc5, 0x5f, 0xbc, 0xca, 0xa1, 0x01, 0xf6, 0x08, 0x71,
+ 0x1d, 0x97, 0x13, 0x86, 0x0e, 0xa5, 0x86, 0x29, 0xbe, 0xc0, 0x10, 0x7e,
+ 0xf3, 0x72, 0x51, 0xf5, 0x1d, 0x3a, 0xf3, 0xdd, 0x2c, 0xc9, 0xb4, 0x82,
+ 0xff, 0xb5, 0xdc, 0x20, 0x8e, 0x04, 0x6b, 0x64, 0x9e, 0x99, 0x88, 0x6d,
+ 0x0c, 0x3e, 0x99, 0x96, 0x9e, 0xbd, 0xe9, 0x7f, 0x2c, 0xc6, 0x68, 0x99,
+ 0xe4, 0x39, 0xe0, 0xda, 0x85, 0xba, 0xc4, 0x14, 0x7d, 0x90, 0xaa, 0x4e,
+ 0x7d, 0x09, 0x1e, 0x65, 0x34, 0x5b, 0x1e, 0xb0, 0x86, 0x2a, 0xfd, 0xe6,
+ 0x55, 0x08, 0x0f, 0xb0, 0x43, 0x88, 0xec, 0xb8, 0x9c, 0x30, 0x75, 0x2c,
+ 0x31, 0x4d, 0xf6, 0x00, 0x83, 0xf7, 0x9b, 0x92, 0x8f, 0xa8, 0xe9, 0xd7,
+ 0x9e, 0xe9, 0x66, 0x4d, 0xa4, 0x17, 0xfd, 0xae, 0xe1, 0x04, 0x70, 0x23,
+ 0x5b, 0x24, 0xf4, 0xcc, 0x43, 0x68, 0x61, 0xf4, 0xcc, 0xb4, 0xf5, 0xef,
+ 0x4b, 0xf9, 0x66, 0x33, 0x44, 0xcf, 0x21, 0xcf, 0x06, 0xd4, 0x2d, 0xd6,
+ 0x20, 0xa3, 0xec, 0x85, 0x52, 0x73, 0xe8, 0x48, 0xf3, 0x29, 0xa2, 0xd8,
+ 0xf5, 0x84, 0x31, 0x57, 0xef, 0x32, 0xa8, 0x40, 0x7d, 0x82, 0x1c, 0x47,
+ 0x65, 0xc4, 0xe1, 0x83, 0xa9, 0x61, 0x8a, 0x6f, 0xb0, 0x04, 0x1f, 0xbc,
+ 0xdc, 0x94, 0x7d, 0x47, 0x4e, 0xbc, 0xf7, 0x4b, 0x32, 0x6d, 0x20, 0xbf,
+ 0xed, 0x77, 0x08, 0x23, 0x81, 0x1a, 0xd9, 0x27, 0xa6, 0x62, 0x1b, 0x43,
+ 0x0f, 0xa6, 0x65, 0xa7, 0xaf, 0x7a, 0x5f, 0xcb, 0x31, 0x9a, 0x26, 0x79,
+ 0x0e, 0x78, 0x36, 0xa1, 0x6e, 0xb1, 0x05, 0x1f, 0x64, 0x2a, 0x93, 0x9f,
+ 0x42, 0x47, 0x99, 0x4d, 0x16, 0xc7, 0xac, 0x21, 0x8a, 0xbf, 0x79, 0x95,
+ 0x42, 0x03, 0xec, 0x10, 0xe2, 0x3b, 0x2e, 0x27, 0x0c, 0x1d, 0x4b, 0x0c,
+ 0x53, 0x7d, 0x80, 0x20, 0xfd, 0xe6, 0xe4, 0xa3, 0xea, 0x3a, 0x75, 0xe7,
+ 0xba, 0x59, 0x93, 0x69, 0x05, 0xff, 0x6b, 0xb8, 0x41, 0x1c, 0x08, 0xd6,
+ 0xc9, 0x3d, 0x33, 0x10, 0xda, 0x18, 0x7d, 0x33, 0x2d, 0x3d, 0x7b, 0xd2,
+ 0xfe, 0x59, 0x8c, 0xd1, 0x33, 0xc8, 0x73, 0xc1, 0xb5, 0x0b, 0x75, 0x88,
+ 0x28, 0xfb, 0x21, 0x54, 0x9c, 0xfa, 0x12, 0x3c, 0xca, 0x68, 0xb6, 0x3d,
+ 0x61, 0x0c, 0x55, 0xfb, 0xcc, 0xaa, 0x10, 0x1f, 0x60, 0x87, 0x11, 0xd9,
+ 0x71, 0x38, 0x60, 0xea, 0x58, 0x62, 0x9b, 0xec, 0x01, 0x07, 0xef, 0x37,
+ 0x25, 0x1f, 0x51, 0xd3, 0xaf, 0x3d, 0xd2, 0xcc, 0x9b, 0x48, 0x2f, 0xfb,
+ 0x5d, 0xc2, 0x08, 0xe0, 0x46, 0xb6, 0x49, 0xe9, 0x98, 0x86, 0xd0, 0xc3,
+ 0xe9, 0x99, 0x69, 0xeb, 0xde, 0x97, 0xf2, 0xcc, 0x66, 0x89, 0x9e, 0x43,
+ 0x9e, 0x0d, 0xa8, 0x5b, 0xac, 0x41, 0x47, 0xd9, 0x0a, 0xa4, 0xe7, 0xd0,
+ 0x91, 0xe6, 0x53, 0x45, 0xb1, 0xeb, 0x08, 0x62, 0xaf, 0xde, 0x65, 0x50,
+ 0x80, 0xfb, 0x04, 0x38, 0x8e, 0xcb, 0x89, 0xc3, 0x07, 0x52, 0xc3, 0x14,
+ 0xdf, 0x60, 0x08, 0x3f, 0x79, 0xb9, 0x28, 0xfa, 0x8e, 0x9d, 0x79, 0xee,
+ 0x96, 0x64, 0xda, 0x41, 0x7f, 0xda, 0xee, 0x10, 0x47, 0x02, 0x35, 0xb2,
+ 0x4f, 0x4c, 0xc4, 0x36, 0x86, 0x1f, 0x4c, 0xcb, 0x4f, 0x5e, 0xf4, 0xbf,
+ 0x96, 0x63, 0x34, 0x4c, 0xf2, 0x1c, 0xf0, 0x6d, 0x42, 0xdd, 0x62, 0x0a,
+ 0x3e, 0xc8, 0x55, 0x27, 0x3e, 0x84, 0x8f, 0x32, 0x9a, 0x2d, 0x8f, 0x58,
+ 0x43, 0x15, 0x7e, 0xf3, 0x2a, 0x84, 0x07, 0xd8, 0x21, 0xc4, 0x76, 0x5c,
+ 0x4e, 0x18, 0x3a, 0x96, 0x18, 0xa6, 0xfb, 0x00, 0x41, 0xfb, 0xcd, 0xc9,
+ 0x47, 0xd4, 0x74, 0xeb, 0xcf, 0x74, 0xb3, 0x26, 0xd2, 0x0b, 0xfe, 0xd7,
+ 0x70, 0x82, 0x38, 0x11, 0xad, 0x92, 0x7a, 0x66, 0x21, 0xb4, 0x30, 0xfa,
+ 0x66, 0x5a, 0x7a, 0xf7, 0xa5, 0xfc, 0xb3, 0x19, 0xa2, 0x67, 0x90, 0xe7,
+ 0x83, 0x6a, 0x16, 0xeb, 0x10, 0x51, 0xf6, 0x42, 0xa9, 0x39, 0xf4, 0x24,
+ 0x79, 0x94, 0xd1, 0x6c, 0x7a, 0xc2, 0x18, 0xab, 0xf7, 0x99, 0x54, 0x20,
+ 0x3e, 0xc1, 0x0e, 0x23, 0xb2, 0xe2, 0x70, 0xc1, 0xd4, 0xb0, 0xc5, 0x37,
+ 0xd8, 0x02, 0x0f, 0xde, 0x6e, 0x4a, 0x3e, 0xa3, 0xa7, 0x5e, 0x7b, 0xa5,
+ 0x99, 0x36, 0x90, 0x5f, 0xf6, 0xbb, 0x84, 0x11, 0xc0, 0x8d, 0x6c, 0x93,
+ 0xd3, 0x31, 0x0d, 0xa1, 0x87, 0xd3, 0x32, 0xd3, 0xd7, 0xbd, 0x2f, 0xe5,
+ 0x98, 0xcd, 0x13, 0x3c, 0x87, 0x3c, 0x1b, 0x50, 0xb7, 0x58, 0x82, 0x8f,
+ 0xb2, 0x15, 0x49, 0xcf, 0xa1, 0x23, 0xcc, 0xa6, 0x8b, 0x63, 0xd6, 0x10,
+ 0xc5, 0x5f, 0xbc, 0xca, 0xa1, 0x01, 0xf6, 0x08, 0x71, 0x1d, 0x97, 0x13,
+ 0x86, 0x0e, 0xa5, 0x86, 0x29, 0xbe, 0xc0, 0x10, 0x7e, 0xf3, 0x72, 0x51,
+ 0xf5, 0x1d, 0x3a, 0xf3, 0xdd, 0x2c, 0xc9, 0xb4, 0x82, 0xff, 0xb5, 0xdc,
+ 0x20, 0x8e, 0x04, 0x6b, 0x64, 0x9e, 0x99, 0x88, 0x6d, 0x0c, 0x3e, 0x99,
+ 0x96, 0x9e, 0xbd, 0xe9, 0x7f, 0x2c, 0xc6, 0x68, 0x99, 0xe4, 0x39, 0xe0,
+ 0xda, 0x85, 0xba, 0xc4, 0x14, 0x7d, 0x90, 0xaa, 0x4e, 0x7d, 0x09, 0x1e,
+ 0x65, 0x34, 0x5b, 0x1e, 0xb0, 0x86, 0x2a, 0xfd, 0xe6, 0x55, 0x08, 0x0f,
+ 0xb0, 0x43, 0x88, 0xec, 0xb8, 0x9c, 0x30, 0x75, 0x2c, 0x31, 0x4d, 0xf6,
+ 0x00, 0x83, 0xf7, 0x9b, 0x92, 0x8f, 0xa8, 0xe9, 0xd7, 0x9e, 0xe9, 0x66,
+ 0x4d, 0xa4, 0x17, 0xfd, 0xae, 0xe1, 0x04, 0x70, 0x23, 0x5b, 0x24, 0xf4,
+ 0xcc, 0x43, 0x68, 0x61, 0xf4, 0xcc, 0xb4, 0xf5, 0xef, 0x4b, 0xf9, 0x66,
+ 0x33, 0x44, 0xcf, 0x21, 0xcf, 0x06, 0xd4, 0x2d, 0xd6, 0x20, 0xa3, 0xec,
+ 0x85, 0x52, 0x73, 0xe8, 0x48, 0xf3, 0x29, 0xa2, 0xd8, 0xf5, 0x84, 0x31,
+ 0x57, 0xef, 0x32, 0xa8, 0x40, 0x7d, 0x82, 0x1c, 0x47, 0x65, 0xc4, 0xe1,
+ 0x83, 0xa9, 0x61, 0x8a, 0x6f, 0xb0, 0x04, 0x1f, 0xbc, 0xdc, 0x94, 0x7d,
+ 0x47, 0x4e, 0xbc, 0xf7, 0x4b, 0x32, 0x6d, 0x20, 0xbf, 0xed, 0x77, 0x08,
+ 0x23, 0x81, 0x1a, 0xd9, 0x27, 0xa6, 0x62, 0x1b, 0x43, 0x0f, 0xa6, 0x65,
+ 0xa7, 0xaf, 0x7a, 0x5f, 0xcb, 0x31, 0x9a, 0x26, 0x79, 0x0e, 0x78, 0x36,
+ 0xa1, 0x6e, 0xb1, 0x05, 0x1f, 0x64, 0x2a, 0x93, 0x9f, 0x42, 0x47, 0x99,
+ 0x4d, 0x16, 0xc7, 0xac, 0x21, 0x8a, 0xbf, 0x79, 0x95, 0x42, 0x03, 0xec,
+ 0x10, 0xe2, 0x3b, 0x2e, 0x27, 0x0c, 0x1d, 0x4b, 0x0c, 0x53, 0x7d, 0x80,
+ 0x20, 0xfd, 0xe6, 0xe4, 0xa3, 0xea, 0x3a, 0x75, 0xe7, 0xba, 0x59, 0x93,
+ 0x69, 0x05, 0xff, 0x6b, 0xb8, 0x41, 0x1c, 0x08, 0xd6, 0xc9, 0x3d, 0x33,
+ 0x10, 0xda, 0x18, 0x7d, 0x33, 0x2d, 0x3d, 0x7b, 0xd2, 0xfe, 0x59, 0x8c,
+ 0xd1, 0x33, 0xc8, 0x73, 0xc1, 0xb5, 0x0b, 0x75, 0x88, 0x28, 0xfb, 0x21,
+ 0x54, 0x9c, 0xfa, 0x12, 0x3c, 0xca, 0x68, 0xb6, 0x3d, 0x61, 0x0c, 0x55,
+ 0xfb, 0xcc, 0xaa, 0x10, 0x1f, 0x60, 0x87, 0x11, 0xd9, 0x71, 0x38, 0x60,
+ 0xea, 0x58, 0x62, 0x9b, 0xec, 0x01, 0x07, 0xef, 0x37, 0x25, 0x1f, 0x51,
+ 0xd3, 0xaf, 0x3d, 0xd2, 0xcc, 0x9b, 0x48, 0x2f, 0xfb, 0x5d, 0xc2, 0x08,
+ 0xe0, 0x46, 0xb6, 0x49, 0xe9, 0x98, 0x86, 0xd0, 0xc3, 0xe9, 0x99, 0x69,
+ 0xeb, 0xde, 0x97, 0xf2, 0xcc, 0x66, 0x89, 0x9e, 0x43, 0x9e, 0x0d, 0xa8,
+ 0x5b, 0xac, 0x41, 0x47, 0xd9, 0x0a, 0xa4, 0xe7, 0xd0, 0x91, 0xe6, 0x53,
+ 0x45, 0xb1, 0xeb, 0x08, 0x62, 0xaf, 0xde, 0x65, 0x50, 0x80, 0xfb, 0x04,
+ 0x38, 0x8e, 0xcb, 0x89, 0xc3, 0x07, 0x52, 0xc3, 0x14, 0xdf, 0x60, 0x08,
+ 0x3f, 0x79, 0xb9, 0x28, 0xfa, 0x8e, 0x9d, 0x79, 0xee, 0x96, 0x64, 0xda,
+ 0x41, 0x7f, 0xda, 0xee, 0x10, 0x47, 0x02, 0x35, 0xb2, 0x4f, 0x4c, 0xc4,
+ 0x36, 0x86, 0x1f, 0x4c, 0xcb, 0x4f, 0x5e, 0xf4, 0xbf, 0x96, 0x63, 0x34,
+ 0x4c, 0xf2, 0x1c, 0xf0, 0x6d, 0x42, 0xdd, 0x62, 0x0a, 0x3e, 0xc8, 0x55,
+ 0x27, 0x3e, 0x84, 0x8f, 0x32, 0x9a, 0x2d, 0x8f, 0x58, 0x43, 0x15, 0x7e,
+ 0xf3, 0x2a, 0x84, 0x07, 0xd8, 0x21, 0xc4, 0x76, 0x5c, 0x4e, 0x18, 0x3a,
+ 0x96, 0x18, 0xa6, 0xfb, 0x00, 0x41, 0xfb, 0xcd, 0xc9, 0x47, 0xd4, 0x74,
+ 0xeb, 0xcf, 0x74, 0xb3, 0x26, 0xd2, 0x0b, 0xfe, 0xd7, 0x70, 0x82, 0x38,
+ 0x11, 0xad, 0x92, 0x7a, 0x66, 0x21, 0xb4, 0x30, 0xfa, 0x66, 0x5a, 0x7a,
+ 0xf7, 0xa5, 0xfc, 0xb3, 0x19, 0xa2, 0x67, 0x90, 0xe7, 0x83, 0x6a, 0x16,
+ 0xeb, 0x10, 0x51, 0xf6, 0x42, 0xa9, 0x39, 0xf4, 0x24, 0x79, 0x94, 0xd1,
+ 0x6c, 0x7a, 0xc2, 0x18, 0xab, 0xf7, 0x99, 0x54, 0x20, 0x3e, 0xc1, 0x0e,
+ 0x23, 0xb2, 0xe2, 0x70, 0xc1, 0xd4, 0xb0, 0xc5, 0x37, 0xd8, 0x02, 0x0f,
+ 0xde, 0x6e, 0x4a, 0x3e, 0xa3, 0xa7, 0x5e, 0x7b, 0xa5, 0x99, 0x36, 0x90,
+ 0x5f, 0xf6, 0xbb, 0x84, 0x11, 0xc0, 0x8d, 0x6c, 0x93, 0xd3, 0x31, 0x0d,
+ 0xa1, 0x87, 0xd3, 0x32, 0xd3, 0xd7, 0xbd, 0x2f, 0xe5, 0x98, 0xcd, 0x13,
+ 0x3c, 0x87, 0x3c, 0x1b, 0x50, 0xb7, 0x58, 0x82, 0x8f, 0xb2, 0x15, 0x49,
+ 0xcf, 0xa1, 0x23, 0xcc, 0xa6, 0x8b, 0x63, 0xd6, 0x10, 0xc5, 0x5f, 0xbc,
+ 0xca, 0xa1, 0x01, 0xf6, 0x08, 0x71, 0x1d, 0x97, 0x13, 0x86, 0x0e, 0xa5,
+ 0x86, 0x29, 0xbe, 0xc0, 0x10, 0x7e, 0xf3, 0x72, 0x51, 0xf5, 0x1d, 0x3a,
+ 0xf3, 0xdd, 0x2c, 0xc9, 0xb4, 0x82, 0xff, 0xb5, 0xdc, 0x20, 0x8e, 0x04,
+ 0x6b, 0x64, 0x9e, 0x99, 0x88, 0x6d, 0x0c, 0x3e, 0x99, 0x96, 0x9e, 0xbd,
+ 0xe9, 0x7f, 0x2c, 0xc6, 0x68, 0x99, 0xe4, 0x39, 0xe0, 0xda, 0x85, 0xba,
+ 0xc4, 0x14, 0x7d, 0x90, 0xaa, 0x4e, 0x7d, 0x09, 0x1e, 0x65, 0x34, 0x5b,
+ 0x1e, 0xb0, 0x86, 0x2a, 0xfd, 0xe6, 0x55, 0x08, 0x0f, 0xb0, 0x43, 0x88,
+ 0xec, 0xb8, 0x9c, 0x30, 0x75, 0x2c, 0x31, 0x4d, 0xf6, 0x00, 0x83, 0xf7,
+ 0x9b, 0x92, 0x8f, 0xa8, 0xe9, 0xd7, 0x9e, 0xe9, 0x66, 0x4d, 0xa4, 0x17,
+ 0xfd, 0xae, 0xe1, 0x04, 0x70, 0x23, 0x5b, 0x24, 0xf4, 0xcc, 0x43, 0x68,
+ 0x61, 0xf4, 0xcc, 0xb4, 0xf5, 0xef, 0x4b, 0xf9, 0x66, 0x33, 0x44, 0xcf,
+ 0x21, 0xcf, 0x06, 0xd4, 0x2d, 0xd6, 0x20, 0xa3, 0xec, 0x85, 0x52, 0x73,
+ 0xe8, 0x48, 0xf3, 0x29, 0xa2, 0xd8, 0xf5, 0x84, 0x31, 0x57, 0xef, 0x32,
+ 0xa8, 0x40, 0x7d, 0x82, 0x1c, 0x47, 0x65, 0xc4, 0xe1, 0x83, 0xa9, 0x61,
+ 0x8a, 0x6f, 0xb0, 0x04, 0x1f, 0xbc, 0xdc, 0x94, 0x7d, 0x47, 0x4e, 0xbc,
+ 0xf7, 0x4b, 0x32, 0x6d, 0x20, 0xbf, 0xed, 0x77, 0x08, 0x23, 0x81, 0x1a,
+ 0xd9, 0x27, 0xa6, 0x62, 0x1b, 0x43, 0x0f, 0xa6, 0x65, 0xa7, 0xaf, 0x7a,
+ 0x5f, 0xcb, 0x31, 0x9a, 0x26, 0x79, 0x0e, 0x78, 0x36, 0xa1, 0x6e, 0xb1,
+ 0x05, 0x1f, 0x64, 0x2a, 0x93, 0x9f, 0x42, 0x47, 0x99, 0x4d, 0x16, 0xc7,
+ 0xac, 0x21, 0x8a, 0xbf, 0x79, 0x95, 0x42, 0x03, 0xec, 0x10, 0xe2, 0x3b,
+ 0x2e, 0x27, 0x0c, 0x1d, 0x4b, 0x0c, 0x53, 0x7d, 0x80, 0x20, 0xfd, 0xe6,
+ 0xe4, 0xa3, 0xea, 0x3a, 0x75, 0xe7, 0xba, 0x59, 0x93, 0x69, 0x05, 0xff,
+ 0x6b, 0xb8, 0x41, 0x1c, 0x08, 0xd6, 0xc9, 0x3d, 0x33, 0x10, 0xda, 0x18,
+ 0x7d, 0x33, 0x2d, 0x3d, 0x7b, 0xd2, 0xfe, 0x59, 0x8c, 0xd1, 0x33, 0xc8,
+ 0x73, 0xc1, 0xb5, 0x0b, 0x75, 0x88, 0x28, 0xfb, 0x21, 0x54, 0x9c, 0xfa,
+ 0x12, 0x3c, 0xca, 0x68, 0xb6, 0x3d, 0x61, 0x0c, 0x55, 0xfb, 0xcc, 0xaa,
+ 0x10, 0x1f, 0x60, 0x87, 0x11, 0xd9, 0x71, 0x38, 0x60, 0xea, 0x58, 0x62,
+ 0x9b, 0xec, 0x01, 0x07, 0xef, 0x37, 0x25, 0x1f, 0x51, 0xd3, 0xaf, 0x3d,
+ 0xd2, 0xcc, 0x9b, 0x48, 0x2f, 0xfb, 0x5d, 0xc2, 0x08, 0xe0, 0x46, 0xb6,
+ 0x49, 0xe9, 0x98, 0x86, 0xd0, 0xc3, 0xe9, 0x99, 0x69, 0xeb, 0xde, 0x97,
+ 0xf2, 0xcc, 0x66, 0x89, 0x9e, 0x43, 0x9e, 0x0d, 0xa8, 0x5b, 0xac, 0x41,
+ 0x47, 0xd9, 0x0a, 0xa4, 0xe7, 0xd0, 0x91, 0xe6, 0x53, 0x45, 0xb1, 0xeb,
+ 0x08, 0x62, 0xaf, 0xde, 0x65, 0x50, 0x80, 0xfb, 0x04, 0x38, 0x8e, 0xcb,
+ 0x89, 0xc3, 0x07, 0x52, 0xc3, 0x14, 0xdf, 0x60, 0x08, 0x3f, 0x79, 0xb9,
+ 0x28, 0xfa, 0x8e, 0x9d, 0x79, 0xee, 0x96, 0x64, 0xda, 0x41, 0x7f, 0xda,
+ 0xee, 0x10, 0x47, 0x02, 0x35, 0xb2, 0x4f, 0x4c, 0xc4, 0x36, 0x86, 0x1f,
+ 0x4c, 0xcb, 0x4f, 0x5e, 0xf4, 0xbf, 0x96, 0x63, 0x34, 0x4c, 0xf2, 0x1c,
+ 0xf0, 0x6d, 0x42, 0xdd, 0x62, 0x0a, 0x3e, 0xc8, 0x55, 0x27, 0x3e, 0x84,
+ 0x8f, 0x32, 0x9a, 0x2d, 0x8f, 0x58, 0x43, 0x15, 0x7e, 0xf3, 0x2a, 0x84,
+ 0x07, 0xd8, 0x21, 0xc4, 0x76, 0x5c, 0x4e, 0x18, 0x3a, 0x96, 0x18, 0xa6,
+ 0xfb, 0x00, 0x41, 0xfb, 0xcd, 0xc9, 0x47, 0xd4, 0x74, 0xeb, 0xcf, 0x74,
+ 0xb3, 0x26, 0xd2, 0x0b, 0xfe, 0xd7, 0x70, 0x82, 0x38, 0x11, 0xad, 0x92,
+ 0x7a, 0x66, 0x21, 0xb4, 0x30, 0xfa, 0x66, 0x5a, 0x7a, 0xf7, 0xa5, 0xfc,
+ 0xb3, 0x19, 0xa2, 0x67, 0x90, 0xe7, 0x83, 0x6a, 0x16, 0xeb, 0x10, 0x51,
+ 0xf6, 0x42, 0xa9, 0x39, 0xf4, 0x24, 0x79, 0x94, 0xd1, 0x6c, 0x7a, 0xc2,
+ 0x18, 0xab, 0xf7, 0x99, 0x54, 0x20, 0x3e, 0xc1, 0x0e, 0x23, 0xb2, 0xe2,
+ 0x70, 0xc1, 0xd4, 0xb0, 0xc5, 0x37, 0xd8, 0x02, 0x0f, 0xde, 0x6e, 0x4a,
+ 0x3e, 0xa3, 0xa7, 0x5e, 0x7b, 0xa5, 0x99, 0x36, 0x90, 0x5f, 0xf6, 0xbb,
+ 0x84, 0x11, 0xc0, 0x8d, 0x6c, 0x93, 0xd3, 0x31, 0x0d, 0xa1, 0x87, 0xd3,
+ 0x32, 0xd3, 0xd7, 0xbd, 0x2f, 0xe5, 0x98, 0xcd, 0x13, 0x3c, 0x87, 0x3c,
+ 0x1b, 0x50, 0xb7, 0x58, 0x82, 0x8f, 0xb2, 0x15, 0x49, 0xcf, 0xa1, 0x23,
+ 0xcc, 0xa6, 0x8b, 0x63, 0xd6, 0x10, 0xc5, 0x5f, 0xbc, 0xca, 0xa1, 0x01,
+ 0xf6, 0x08, 0x71, 0x1d, 0x97, 0x13, 0x86, 0x0e, 0xa5, 0x86, 0x29, 0xbe,
+ 0xc0, 0x10, 0x7e, 0xf3, 0x72, 0x51, 0xf5, 0x1d, 0x3a, 0xf3, 0xdd, 0x2c,
+ 0xc9, 0xb4, 0x82, 0xff, 0xb5, 0xdc, 0x20, 0x8e, 0x04, 0x6b, 0x64, 0x9e,
+ 0x99, 0x88, 0x6d, 0x0c, 0x3e, 0x99, 0x96, 0x9e, 0xbd, 0xe9, 0x7f, 0x2c,
+ 0xc6, 0x68, 0x99, 0xe4, 0x39, 0xe0, 0xda, 0x85, 0xba, 0xc4, 0x14, 0x7d,
+ 0x90, 0xaa, 0x4e, 0x7d, 0x09, 0x1e, 0x65, 0x34, 0x5b, 0x1e, 0xb0, 0x86,
+ 0x2a, 0xfd, 0xe6, 0x55, 0x08, 0x0f, 0xb0, 0x43, 0x88, 0xec, 0xb8, 0x9c,
+ 0x30, 0x75, 0x2c, 0x31, 0x4d, 0xf6, 0x00, 0x83, 0xf7, 0x9b, 0x92, 0x8f,
+ 0xa8, 0xe9, 0xd7, 0x9e, 0xe9, 0x66, 0x4d, 0xa4, 0x17, 0xfd, 0xae, 0xe1,
+ 0x04, 0x70, 0x23, 0x5b, 0x24, 0xf4, 0xcc, 0x43, 0x68, 0x61, 0xf4, 0xcc,
+ 0xb4, 0xf5, 0xef, 0x4b, 0xf9, 0x66, 0x33, 0x44, 0xcf, 0x21, 0xcf, 0x06,
+ 0xd4, 0x2d, 0xd6, 0x20, 0xa3, 0xec, 0x85, 0x52, 0x73, 0xe8, 0x48, 0xf3,
+ 0x29, 0xa2, 0xd8, 0xf5, 0x84, 0x31, 0x57, 0xef, 0x32, 0xa8, 0x40, 0x7d,
+ 0x82, 0x1c, 0x47, 0x65, 0xc4, 0xe1, 0x83, 0xa9, 0x61, 0x8a, 0x6f, 0xb0,
+ 0x04, 0x1f, 0xbc, 0xdc, 0x94, 0x7d, 0x47, 0x4e, 0xbc, 0xf7, 0x4b, 0x32,
+ 0x6d, 0x20, 0xbf, 0xed, 0x77, 0x08, 0x23, 0x81, 0x1a, 0xd9, 0x27, 0xa6,
+ 0x62, 0x1b, 0x43, 0x0f, 0xa6, 0x65, 0xa7, 0xaf, 0x7a, 0x5f, 0xcb, 0x31,
+ 0x9a, 0x26, 0x79, 0x0e, 0x78, 0x36, 0xa1, 0x6e, 0xb1, 0x05, 0x1f, 0x64,
+ 0x2a, 0x93, 0x9f, 0x42, 0x47, 0x99, 0x4d, 0x16, 0xc7, 0xac, 0x21, 0x8a,
+ 0xbf, 0x79, 0x95, 0x42, 0x03, 0xec, 0x10, 0xe2, 0x3b, 0x2e, 0x27, 0x0c,
+ 0x1d, 0x4b, 0x0c, 0x53, 0x7d, 0x80, 0x20, 0xfd, 0xe6, 0xe4, 0xa3, 0xea,
+ 0x3a, 0x75, 0xe7, 0xba, 0x59, 0x93, 0x69, 0x05, 0xff, 0x6b, 0xb8, 0x41,
+ 0x1c, 0x08, 0xd6, 0xc9, 0x3d, 0x33, 0x10, 0xda, 0x18, 0x7d, 0x33, 0x2d,
+ 0x3d, 0x7b, 0xd2, 0xfe, 0x59, 0x8c, 0xd1, 0x33, 0xc8, 0x73, 0xc1, 0xb5,
+ 0x0b, 0x75, 0x88, 0x28, 0xfb, 0x21, 0x54, 0x9c, 0xfa, 0x12, 0x3c, 0xca,
+ 0x68, 0xb6, 0x3d, 0x61, 0x0c, 0x55, 0xfb, 0xcc, 0xaa, 0x10, 0x1f, 0x60,
+ 0x87, 0x11, 0xd9, 0x71, 0x38, 0x60, 0xea, 0x58, 0x62, 0x9b, 0xec, 0x01,
+ 0x07, 0xef, 0x37, 0x25, 0x1f, 0x51, 0xd3, 0xaf, 0x3d, 0xd2, 0xcc, 0x9b,
+ 0x48, 0x2f, 0xfb, 0x5d, 0xc2, 0x08, 0xe0, 0x46, 0xb6, 0x49, 0xe9, 0x98,
+ 0x86, 0xd0, 0xc3, 0xe9, 0x99, 0x69, 0xeb, 0xde, 0x97, 0xf2, 0xcc, 0x66,
+ 0x89, 0x9e, 0x43, 0x9e, 0x0d, 0xa8, 0x5b, 0xac, 0x41, 0x47, 0xd9, 0x0a,
+ 0xa4, 0xe7, 0xd0, 0x91, 0xe6, 0x53, 0x45, 0xb1, 0xeb, 0x08, 0x62, 0xaf,
+ 0xde, 0x65, 0x50, 0x80, 0xfb, 0x04, 0x38, 0x8e, 0xcb, 0x89, 0xc3, 0x07,
+ 0x52, 0xc3, 0x14, 0xdf, 0x60, 0x08, 0x3f, 0x79, 0xb9, 0x28, 0xfa, 0x8e,
+ 0x9d, 0x79, 0xee, 0x96, 0x64, 0xda, 0x41, 0x7f, 0xda, 0xee, 0x10, 0x47,
+ 0x02, 0x35, 0xb2, 0x4f, 0x4c, 0xc4, 0x36, 0x86, 0x1f, 0x4c, 0xcb, 0x4f,
+ 0x5e, 0xf4, 0xbf, 0x96, 0x63, 0x34, 0x4c, 0xf2, 0x1c, 0xf0, 0x6d, 0x42,
+ 0xdd, 0x62, 0x0a, 0x3e, 0xc8, 0x55, 0x27, 0x3e, 0x84, 0x8f, 0x32, 0x9a,
+ 0x2d, 0x8f, 0x58, 0x43, 0x15, 0x7e, 0xf3, 0x2a, 0x84, 0x07, 0xd8, 0x21,
+ 0xc4, 0x76, 0x5c, 0x4e, 0x18, 0x3a, 0x96, 0x18, 0xa6, 0xfb, 0x00, 0x41,
+ 0xfb, 0xcd, 0xc9, 0x47, 0xd4, 0x74, 0xeb, 0xcf, 0x74, 0xb3, 0x26, 0xd2,
+ 0x0b, 0xfe, 0xd7, 0x70, 0x82, 0x38, 0x11, 0xad, 0x92, 0x7a, 0x66, 0x21,
+ 0xb4, 0x30, 0xfa, 0x66, 0x5a, 0x7a, 0xf7, 0xa5, 0xfc, 0xb3, 0x19, 0xa2,
+ 0x67, 0x90, 0xe7, 0x83, 0x6a, 0x16, 0xeb, 0x10, 0x51, 0xf6, 0x42, 0xa9,
+ 0x39, 0xf4, 0x24, 0x79, 0x94, 0xd1, 0x6c, 0x7a, 0xc2, 0x18, 0xab, 0xf7,
+ 0x99, 0x54, 0x20, 0x3e, 0xc1, 0x0e, 0x23, 0xb2, 0xe2, 0x70, 0xc1, 0xd4,
+ 0xb0, 0xc5, 0x37, 0xd8, 0x02, 0x0f, 0xde, 0x6e, 0x4a, 0x3e, 0xa3, 0xa7,
+ 0x5e, 0x7b, 0xa5, 0x99, 0x36, 0x90, 0x5f, 0xf6, 0xbb, 0x84, 0x11, 0xc0,
+ 0x8d, 0x6c, 0x93, 0xd3, 0x31, 0x0d, 0xa1, 0x87, 0xd3, 0x32, 0xd3, 0xd7,
+ 0xbd, 0x2f, 0xe5, 0x98, 0xcd, 0x13, 0x3c, 0x87, 0x3c, 0x1b, 0x50, 0xb7,
+ 0x58, 0x82, 0x8f, 0xb2, 0x15, 0x49, 0xcf, 0xa1, 0x23, 0xcc, 0xa6, 0x8b,
+ 0x63, 0xd6, 0x10, 0xc5, 0x5f, 0xbc, 0xca, 0xa1, 0x01, 0xf6, 0x08, 0x71,
+ 0x1d, 0x97, 0x13, 0x86, 0x0e, 0xa5, 0x86, 0x29, 0xbe, 0xc0, 0x10, 0x7e,
+ 0xf3, 0x72, 0x51, 0xf5, 0x1d, 0x3a, 0xf3, 0xdd, 0x2c, 0xc9, 0xb4, 0x82,
+ 0xff, 0xb5, 0xdc, 0x20, 0x8e, 0x04, 0x6b, 0x64, 0x9e, 0x99, 0x88, 0x6d,
+ 0x0c, 0x3e, 0x99, 0x96, 0x9e, 0xbd, 0xe9, 0x7f, 0x2c, 0xc6, 0x68, 0x99,
+ 0xe4, 0x39, 0xe0, 0xda, 0x85, 0xba, 0xc4, 0x14, 0x7d, 0x90, 0xaa, 0x4e,
+ 0x7d, 0x09, 0x1e, 0x65, 0x34, 0x5b, 0x1e, 0xb0, 0x86, 0x2a, 0xfd, 0xe6,
+ 0x55, 0x08, 0x0f, 0xb0, 0x43, 0x88, 0xec, 0xb8, 0x9c, 0x30, 0x75, 0x2c,
+ 0x31, 0x4d, 0xf6, 0x00, 0x83, 0xf7, 0x9b, 0x92, 0x8f, 0xa8, 0xe9, 0xd7,
+ 0x9e, 0xe9, 0x66, 0x4d, 0xa4, 0x17, 0xfd, 0xae, 0xe1, 0x04, 0x70, 0x23,
+ 0x5b, 0x24, 0xf4, 0xcc, 0x43, 0x68, 0x61, 0xf4, 0xcc, 0xb4, 0xf5, 0xef,
+ 0x4b, 0xf9, 0x66, 0x33, 0x44, 0xcf, 0x21, 0xcf, 0x06, 0xd4, 0x2d, 0xd6,
+ 0x20, 0xa3, 0xec, 0x85, 0x52, 0x73, 0xe8, 0x48, 0xf3, 0x29, 0xa2, 0xd8,
+ 0xf5, 0x84, 0x31, 0x57, 0xef, 0x32, 0xa8, 0x40, 0x7d, 0x82, 0x1c, 0x47,
+ 0x65, 0xc4, 0xe1, 0x83, 0xa9, 0x61, 0x8a, 0x6f, 0xb0, 0x04, 0x1f, 0xbc,
+ 0xdc, 0x94, 0x7d, 0x47, 0x4e, 0xbc, 0xf7, 0x4b, 0x32, 0x6d, 0x20, 0xbf,
+ 0xed, 0x77, 0x08, 0x23, 0x81, 0x1a, 0xd9, 0x27, 0xa6, 0x62, 0x1b, 0x43,
+ 0x0f, 0xa6, 0x65, 0xa7, 0xaf, 0x7a, 0x5f, 0xcb, 0x31, 0x9a, 0x26, 0x79,
+ 0x0e, 0x78, 0x36, 0xa1, 0x6e, 0xb1, 0x05, 0x1f, 0x64, 0x2a, 0x93, 0x9f,
+ 0x42, 0x47, 0x99, 0x4d, 0x16, 0xc7, 0xac, 0x21, 0x8a, 0xbf, 0x79, 0x95,
+ 0x42, 0x03, 0xec, 0x10, 0xe2, 0x3b, 0x2e, 0x27, 0x0c, 0x1d, 0x4b, 0x0c,
+ 0x53, 0x7d, 0x80, 0x20, 0xfd, 0xe6, 0xe4, 0xa3, 0xea, 0x3a, 0x75, 0xe7,
+ 0xba, 0x59, 0x93, 0x69, 0x05, 0xff, 0x6b, 0xb8, 0x41, 0x1c, 0x08, 0xd6,
+ 0xc9, 0x3d, 0x33, 0x10, 0xda, 0x18, 0x7d, 0x33, 0x2d, 0x3d, 0x7b, 0xd2,
+ 0xfe, 0x59, 0x8c, 0xd1, 0x33, 0xc8, 0x73, 0xc1, 0xb5, 0x0b, 0x75, 0x88,
+ 0x28, 0xfb, 0x21, 0x54, 0x9c, 0xfa, 0x12, 0x3c, 0xca, 0x68, 0xb6, 0x3d,
+ 0x61, 0x0c, 0x55, 0xfb, 0xcc, 0xaa, 0x10, 0x1f, 0x60, 0x87, 0x11, 0xd9,
+ 0x71, 0x38, 0x60, 0xea, 0x58, 0x62, 0x9b, 0xec, 0x01, 0x07, 0xef, 0x37,
+ 0x25, 0x1f, 0x51, 0xd3, 0xaf, 0x3d, 0xd2, 0xcc, 0x9b, 0x48, 0x2f, 0xfb,
+ 0x5d, 0xc2, 0x08, 0xe0, 0x46, 0xb6, 0x49, 0xe9, 0x98, 0x86, 0xd0, 0xc3,
+ 0xe9, 0x99, 0x69, 0xeb, 0xde, 0x97, 0xf2, 0xcc, 0x66, 0x89, 0x9e, 0x43,
+ 0x9e, 0x0d, 0xa8, 0x5b, 0xac, 0x41, 0x47, 0xd9, 0x0a, 0xa4, 0xe7, 0xd0,
+ 0x91, 0xe6, 0x53, 0x45, 0xb1, 0xeb, 0x08, 0x62, 0xaf, 0xde, 0x65, 0x50,
+ 0x80, 0xfb, 0x04, 0x38, 0x8e, 0xcb, 0x89, 0xc3, 0x07, 0x52, 0xc3, 0x14,
+ 0xdf, 0x60, 0x08, 0x3f, 0x79, 0xb9, 0x28, 0xfa, 0x8e, 0x9d, 0x79, 0xee,
+ 0x96, 0x64, 0xda, 0x41, 0x7f, 0xda, 0xee, 0x10, 0x47, 0x02, 0x35, 0xb2,
+ 0x4f, 0x4c, 0xc4, 0x36, 0x86, 0x1f, 0x4c, 0xcb, 0x4f, 0x5e, 0xf4, 0xbf,
+ 0x96, 0x63, 0x34, 0x4c, 0xf2, 0x1c, 0xf0, 0x6d, 0x42, 0xdd, 0x62, 0x0a,
+ 0x3e, 0xc8, 0x55, 0x27, 0x3e, 0x84, 0x8f, 0x32, 0x9a, 0x2d, 0x8f, 0x58,
+ 0x43, 0x15, 0x7e, 0xf3, 0x2a, 0x84, 0x07, 0xd8, 0x21, 0xc4, 0x76, 0x5c,
+ 0x4e, 0x18, 0x3a, 0x96, 0x18, 0xa6, 0xfb, 0x00, 0x41, 0xfb, 0xcd, 0xc9,
+ 0x47, 0xd4, 0x74, 0xeb, 0xcf, 0x74, 0xb3, 0x26, 0xd2, 0x0b, 0xfe, 0xd7,
+ 0x70, 0x82, 0x38, 0x11, 0xad, 0x92, 0x7a, 0x66, 0x21, 0xb4, 0x30, 0xfa,
+ 0x66, 0x5a, 0x7a, 0xf7, 0xa5, 0xfc, 0xb3, 0x19, 0xa2, 0x67, 0x90, 0xe7,
+ 0x83, 0x6a, 0x16, 0xeb, 0x10, 0x51, 0xf6, 0x42, 0xa9, 0x39, 0xf4, 0x24,
+ 0x79, 0x94, 0xd1, 0x6c, 0x7a, 0xc2, 0x18, 0xab, 0xf7, 0x99, 0x54, 0x20,
+ 0x3e, 0xc1, 0x0e, 0x23, 0xb2, 0xe2, 0x70, 0xc1, 0xd4, 0xb0, 0xc5, 0x37,
+ 0xd8, 0x02, 0x0f, 0xde, 0x6e, 0x4a, 0x3e, 0xa3, 0xa7, 0x5e, 0x7b, 0xa5,
+ 0x99, 0x36, 0x90, 0x5f, 0xf6, 0xbb, 0x84, 0x11, 0xc0, 0x8d, 0x6c, 0x93,
+ 0xd3, 0x31, 0x0d, 0xa1, 0x87, 0xd3, 0x32, 0xd3, 0xd7, 0xbd, 0x2f, 0xe5,
+ 0x98, 0xcd, 0x13, 0x3c, 0x87, 0x3c, 0x1b, 0x50, 0xb7, 0x58, 0x82, 0x8f,
+ 0xb2, 0x15, 0x49, 0xcf, 0xa1, 0x23, 0xcc, 0xa6, 0x8b, 0x63, 0xd6, 0x10,
+ 0xc5, 0x5f, 0xbc, 0xca, 0xa1, 0x01, 0xf6, 0x08, 0x71, 0x1d, 0x97, 0x13,
+ 0x86, 0x0e, 0xa5, 0x86, 0x29, 0xbe, 0xc0, 0x10, 0x7e, 0xf3, 0x72, 0x51,
+ 0xf5, 0x1d, 0x3a, 0xf3, 0xdd, 0x2c, 0xc9, 0xb4, 0x82, 0xff, 0xb5, 0xdc,
+ 0x20, 0x8e, 0x04, 0x6b, 0x64, 0x9e, 0x99, 0x88, 0x6d, 0x0c, 0x3e, 0x99,
+ 0x96, 0x9e, 0xbd, 0xe9, 0x7f, 0x2c, 0xc6, 0x68, 0x99, 0xe4, 0x39, 0xe0,
+ 0xda, 0x85, 0xba, 0xc4, 0x14, 0x7d, 0x90, 0xaa, 0x4e, 0x7d, 0x09, 0x1e,
+ 0x65, 0x34, 0x5b, 0x1e, 0xb0, 0x86, 0x2a, 0xfd, 0xe6, 0x55, 0x08, 0x0f,
+ 0xb0, 0x43, 0x88, 0xec, 0xb8, 0x9c, 0x30, 0x75, 0x2c, 0x31, 0x4d, 0xf6,
+ 0x00, 0x83, 0xf7, 0x9b, 0x92, 0x8f, 0xa8, 0xe9, 0xd7, 0x9e, 0xe9, 0x66,
+ 0x4d, 0xa4, 0x17, 0xfd, 0xae, 0xe1, 0x04, 0x70, 0x23, 0x5b, 0x24, 0xf4,
+ 0xcc, 0x43, 0x68, 0x61, 0xf4, 0xcc, 0xb4, 0xf5, 0xef, 0x4b, 0xf9, 0x66,
+ 0x33, 0x44, 0xcf, 0x21, 0xcf, 0x06, 0xd4, 0x2d, 0xd6, 0x20, 0xa3, 0xec,
+ 0x85, 0x52, 0x73, 0xe8, 0x48, 0xf3, 0x29, 0xa2, 0xd8, 0xf5, 0x84, 0x31,
+ 0x57, 0xef, 0x32, 0xa8, 0x40, 0x7d, 0x82, 0x1c, 0x47, 0x65, 0xc4, 0xe1,
+ 0x83, 0xa9, 0x61, 0x8a, 0x6f, 0xb0, 0x04, 0x1f, 0xbc, 0xdc, 0x94, 0x7d,
+ 0x47, 0x4e, 0xbc, 0xf7, 0x4b, 0x32, 0x6d, 0x20, 0xbf, 0xed, 0x77, 0x08,
+ 0x23, 0x81, 0x1a, 0xd9, 0x27, 0xa6, 0x62, 0x1b, 0x43, 0x0f, 0xa6, 0x65,
+ 0xa7, 0xaf, 0x7a, 0x5f, 0xcb, 0x31, 0x9a, 0x26, 0x79, 0x0e, 0x78, 0x36,
+ 0xa1, 0x6e, 0xb1, 0x05, 0x1f, 0x64, 0x2a, 0x93, 0x9f, 0x42, 0x47, 0x99,
+ 0x4d, 0x16, 0xc7, 0xac, 0x21, 0x8a, 0xbf, 0x79, 0x95, 0x42, 0x03, 0xec,
+ 0x10, 0xe2, 0x3b, 0x2e, 0x27, 0x0c, 0x1d, 0x4b, 0x0c, 0x53, 0x7d, 0x80,
+ 0x20, 0xfd, 0xe6, 0xe4, 0xa3, 0xea, 0x3a, 0x75, 0xe7, 0xba, 0x59, 0x93,
+ 0x69, 0x05, 0xff, 0x6b, 0xb8, 0x41, 0x1c, 0x08, 0xd6, 0xc9, 0x3d, 0x33,
+ 0x10, 0xda, 0x18, 0x7d, 0x33, 0x2d, 0x3d, 0x7b, 0xd2, 0xfe, 0x59, 0x8c,
+ 0xd1, 0x33, 0xc8, 0x73, 0xc1, 0xb5, 0x0b, 0x75, 0x88, 0x28, 0xfb, 0x21,
+ 0x54, 0x9c, 0xfa, 0x12, 0x3c, 0xca, 0x68, 0xb6, 0x3d, 0x61, 0x0c, 0x55,
+ 0xfb, 0xcc, 0xaa, 0x10, 0x1f, 0x60, 0x87, 0x11, 0xd9, 0x71, 0x38, 0x60,
+ 0xea, 0x58, 0x62, 0x9b, 0xec, 0x01, 0x07, 0xef, 0x37, 0x25, 0x1f, 0x51,
+ 0xd3, 0xaf, 0x3d, 0xd2, 0xcc, 0x9b, 0x48, 0x2f, 0xfb, 0x5d, 0xc2, 0x08,
+ 0xe0, 0x46, 0xb6, 0x49, 0xe9, 0x98, 0x86, 0xd0, 0xc3, 0xe9, 0x99, 0x69,
+ 0xeb, 0xde, 0x97, 0xf2, 0xcc, 0x66, 0x89, 0x9e, 0x43, 0x9e, 0x0d, 0xa8,
+ 0x5b, 0xac, 0x41, 0x47, 0xd9, 0x0a, 0xa4, 0xe7, 0xd0, 0x91, 0xe6, 0x53,
+ 0x45, 0xb1, 0xeb, 0x08, 0x62, 0xaf, 0xde, 0x65, 0x50, 0x80, 0xfb, 0x04,
+ 0x38, 0x8e, 0xcb, 0x89, 0xc3, 0x07, 0x52, 0xc3, 0x14, 0xdf, 0x60, 0x08,
+ 0x3f, 0x79, 0xb9, 0x28, 0xfa, 0x8e, 0x9d, 0x79, 0xee, 0x96, 0x64, 0xda,
+ 0x41, 0x7f, 0xda, 0xee, 0x10, 0x47, 0x02, 0x35, 0xb2, 0x4f, 0x4c, 0xc4,
+ 0x36, 0x86, 0x1f, 0x4c, 0xcb, 0x4f, 0x5e, 0xf4, 0xbf, 0x96, 0x63, 0x34,
+ 0x4c, 0xf2, 0x1c, 0xf0, 0x6d, 0x42, 0xdd, 0x62, 0x0a, 0x3e, 0xc8, 0x55,
+ 0x27, 0x3e, 0x84, 0x8f, 0x32, 0x9a, 0x2d, 0x8f, 0x58, 0x43, 0x15, 0x7e,
+ 0xf3, 0x2a, 0x84, 0x07, 0xd8, 0x21, 0xc4, 0x76, 0x5c, 0x4e, 0x18, 0x3a,
+ 0x96, 0x18, 0xa6, 0xfb, 0x00, 0x41, 0xfb, 0xcd, 0xc9, 0x47, 0xd4, 0x74,
+ 0xeb, 0xcf, 0x74, 0xb3, 0x26, 0xd2, 0x0b, 0xfe, 0xd7, 0x70, 0x82, 0x38,
+ 0x11, 0xad, 0x92, 0x7a, 0x66, 0x21, 0xb4, 0x30, 0xfa, 0x66, 0x5a, 0x7a,
+ 0xf7, 0xa5, 0xfc, 0xb3, 0x19, 0xa2, 0x67, 0x90, 0xe7, 0x83, 0x6a, 0x16,
+ 0xeb, 0x10, 0x51, 0xf6, 0x42, 0xa9, 0x39, 0xf4, 0x24, 0x79, 0x94, 0xd1,
+ 0x6c, 0x7a, 0xc2, 0x18, 0xab, 0xf7, 0x99, 0x54, 0x20, 0x3e, 0xc1, 0x0e,
+ 0x23, 0xb2, 0xe2, 0x70, 0xc1, 0xd4, 0xb0, 0xc5, 0x37, 0xd8, 0x02, 0x0f,
+ 0xde, 0x6e, 0x4a, 0x3e, 0xa3, 0xa7, 0x5e, 0x7b, 0xa5, 0x99, 0x36, 0x90,
+ 0x5f, 0xf6, 0xbb, 0x84, 0x11, 0xc0, 0x8d, 0x6c, 0x93, 0xd3, 0x31, 0x0d,
+ 0xa1, 0x87, 0xd3, 0x32, 0xd3, 0xd7, 0xbd, 0x2f, 0xe5, 0x98, 0xcd, 0x13,
+ 0x3c, 0x87, 0x3c, 0x1b, 0x50, 0xb7, 0x58, 0x82, 0x8f, 0xb2, 0x15, 0x49,
+ 0xcf, 0xa1, 0x23, 0xcc, 0xa6, 0x8b, 0x63, 0xd6, 0x10, 0xc5, 0x5f, 0xbc,
+ 0xca, 0xa1, 0x01, 0xf6, 0x08, 0x71, 0x1d, 0x97, 0x13, 0x86, 0x0e, 0xa5,
+ 0x86, 0x29, 0xbe, 0xc0, 0x10, 0x7e, 0xf3, 0x72, 0x51, 0xf5, 0x1d, 0x3a,
+ 0xf3, 0xdd, 0x2c, 0xc9, 0xb4, 0x82, 0xff, 0xb5, 0xdc, 0x20, 0x8e, 0x04,
+ 0x6b, 0x64, 0x9e, 0x99, 0x88, 0x6d, 0x0c, 0x3e, 0x99, 0x96, 0x9e, 0xbd,
+ 0xe9, 0x7f, 0x2c, 0xc6, 0x68, 0x99, 0xe4, 0x39, 0xe0, 0xda, 0x85, 0xba,
+ 0xc4, 0x14, 0x7d, 0x90, 0xaa, 0x4e, 0x7d, 0x09, 0x1e, 0x65, 0x34, 0x5b,
+ 0x1e, 0xb0, 0x86, 0x2a, 0xfd, 0xe6, 0x55, 0x08, 0x0f, 0xb0, 0x43, 0x88,
+ 0xec, 0xb8, 0x9c, 0x30, 0x75, 0x2c, 0x31, 0x4d, 0xf6, 0x00, 0x83, 0xf7,
+ 0x9b, 0x92, 0x8f, 0xa8, 0xe9, 0xd7, 0x9e, 0xe9, 0x66, 0x4d, 0xa4, 0x17,
+ 0xfd, 0xae, 0xe1, 0x04, 0x70, 0x23, 0x5b, 0x24, 0xf4, 0xcc, 0x43, 0x68,
+ 0x61, 0xf4, 0xcc, 0xb4, 0xf5, 0xef, 0x4b, 0xf9, 0x66, 0x33, 0x44, 0xcf,
+ 0x21, 0xcf, 0x06, 0xd4, 0x2d, 0xd6, 0x20, 0xa3, 0xec, 0x85, 0x52, 0x73,
+ 0xe8, 0x48, 0xf3, 0x29, 0xa2, 0xd8, 0xf5, 0x84, 0x31, 0x57, 0xef, 0x32,
+ 0xa8, 0x40, 0x7d, 0x82, 0x1c, 0x47, 0x65, 0xc4, 0xe1, 0x83, 0xa9, 0x61,
+ 0x8a, 0x6f, 0xb0, 0x04, 0x1f, 0xbc, 0xdc, 0x94, 0x7d, 0x47, 0x4e, 0xbc,
+ 0xf7, 0x4b, 0x32, 0x6d, 0x20, 0xbf, 0xed, 0x77, 0x08, 0x23, 0x81, 0x1a,
+ 0xd9, 0x27, 0xa6, 0x62, 0x1b, 0x43, 0x0f, 0xa6, 0x65, 0xa7, 0xaf, 0x7a,
+ 0x5f, 0xcb, 0x31, 0x9a, 0x26, 0x79, 0x0e, 0x78, 0x36, 0xa1, 0x6e, 0xb1,
+ 0x05, 0x1f, 0x64, 0x2a, 0x93, 0x9f, 0x42, 0x47, 0x99, 0x4d, 0x16, 0xc7,
+ 0xac, 0x21, 0x8a, 0xbf, 0x79, 0x95, 0x42, 0x03, 0xec, 0x10, 0xe2, 0x3b,
+ 0x2e, 0x27, 0x0c, 0x1d, 0x4b, 0x0c, 0x53, 0x7d, 0x80, 0x20, 0xfd, 0xe6,
+ 0xe4, 0xa3, 0xea, 0x3a, 0x75, 0xe7, 0xba, 0x59, 0x93, 0x69, 0x05, 0xff,
+ 0x6b, 0xb8, 0x41, 0x1c, 0x08, 0xd6, 0xc9, 0x3d, 0x33, 0x10, 0xda, 0x18,
+ 0x7d, 0x33, 0x2d, 0x3d, 0x7b, 0xd2, 0xfe, 0x59, 0x8c, 0xd1, 0x33, 0xc8,
+ 0x73, 0xc1, 0xb5, 0x0b, 0x75, 0x88, 0x28, 0xfb, 0x21, 0x54, 0x9c, 0xfa,
+ 0x12, 0x3c, 0xca, 0x68, 0xb6, 0x3d, 0x61, 0x0c, 0x55, 0xfb, 0xcc, 0xaa,
+ 0x10, 0x1f, 0x60, 0x87, 0x11, 0xd9, 0x71, 0x38, 0x60, 0xea, 0x58, 0x62,
+ 0x9b, 0xec, 0x01, 0x07, 0xef, 0x37, 0x25, 0x1f, 0x51, 0xd3, 0xaf, 0x3d,
+ 0xd2, 0xcc, 0x9b, 0x48, 0x2f, 0xfb, 0x5d, 0xc2, 0x08, 0xe0, 0x46, 0xb6,
+ 0x49, 0xe9, 0x98, 0x86, 0xd0, 0xc3, 0xe9, 0x99, 0x69, 0xeb, 0xde, 0x97,
+ 0xf2, 0xcc, 0x66, 0x89, 0x9e, 0x43, 0x9e, 0x0d, 0xa8, 0x5b, 0xac, 0x41,
+ 0x47, 0xd9, 0x0a, 0xa4, 0xe7, 0xd0, 0x91, 0xe6, 0x53, 0x45, 0xb1, 0xeb,
+ 0x08, 0x62, 0xaf, 0xde, 0x65, 0x50, 0x80, 0xfb, 0x04, 0x38, 0x8e, 0xcb,
+ 0x89, 0xc3, 0x07, 0x52, 0xc3, 0x14, 0xdf, 0x60, 0x08, 0x3f, 0x79, 0xb9,
+ 0x28, 0xfa, 0x8e, 0x9d, 0x79, 0xee, 0x96, 0x64, 0xda, 0x41, 0x7f, 0xda,
+ 0xee, 0x10, 0x47, 0x02, 0x35, 0xb2, 0x4f, 0x4c, 0xc4, 0x36, 0x86, 0x1f,
+ 0x4c, 0xcb, 0x4f, 0x5e, 0xf4, 0xbf, 0x96, 0x63, 0x34, 0x4c, 0xf2, 0x1c,
+ 0xf0, 0x6d, 0x42, 0xdd, 0x62, 0x0a, 0x3e, 0xc8, 0x55, 0x27, 0x3e, 0x84,
+ 0x8f, 0x32, 0x9a, 0x2d, 0x8f, 0x58, 0x43, 0x15, 0x7e, 0xf3, 0x2a, 0x84,
+ 0x07, 0xd8, 0x21, 0xc4, 0x76, 0x5c, 0x4e, 0x18, 0x3a, 0x96, 0x18, 0xa6,
+ 0xfb, 0x00, 0x41, 0xfb, 0xcd, 0xc9, 0x47, 0xd4, 0x74, 0xeb, 0xcf, 0x74,
+ 0xb3, 0x26, 0xd2, 0x0b, 0xfe, 0xd7, 0x70, 0x82, 0x38, 0x11, 0xad, 0x92,
+ 0x7a, 0x66, 0x21, 0xb4, 0x30, 0xfa, 0x66, 0x5a, 0x7a, 0xf7, 0xa5, 0xfc,
+ 0xb3, 0x19, 0xa2, 0x67, 0x90, 0xe7, 0x83, 0x6a, 0x16, 0xeb, 0x10, 0x51,
+ 0xf6, 0x42, 0xa9, 0x39, 0xf4, 0x24, 0x79, 0x94, 0xd1, 0x6c, 0x7a, 0xc2,
+ 0x18, 0xab, 0xf7, 0x99, 0x54, 0x20, 0x3e, 0xc1, 0x0e, 0x23, 0xb2, 0xe2,
+ 0x70, 0xc1, 0xd4, 0xb0, 0xc5, 0x37, 0xd8, 0x02, 0x0f, 0xde, 0x6e, 0x4a,
+ 0x3e, 0xa3, 0xa7, 0x5e, 0x7b, 0xa5, 0x99, 0x36, 0x90, 0x5f, 0xf6, 0xbb,
+ 0x84, 0x11, 0xc0, 0x8d, 0x6c, 0x93, 0xd3, 0x31, 0x0d, 0xa1, 0x87, 0xd3,
+ 0x32, 0xd3, 0xd7, 0xbd, 0x2f, 0xe5, 0x98, 0xcd, 0x13, 0x3c, 0x87, 0x3c,
+ 0x1b, 0x50, 0xb7, 0x58, 0x82, 0x8f, 0xb2, 0x15, 0x49, 0xcf, 0xa1, 0x23,
+ 0xcc, 0xa6, 0x8b, 0x63, 0xd6, 0x10, 0xc5, 0x5f, 0xbc, 0xca, 0xa1, 0x01,
+ 0xf6, 0x08, 0x71, 0x1d, 0x97, 0x13, 0x86, 0x0e, 0xa5, 0x86, 0x29, 0xbe,
+ 0xc0, 0x10, 0x7e, 0xf3, 0x72, 0x51, 0xf5, 0x1d, 0x3a, 0xf3, 0xdd, 0x2c,
+ 0xc9, 0xb4, 0x82, 0xff, 0xb5, 0xdc, 0x20, 0x8e, 0x04, 0x6b, 0x64, 0x9e,
+ 0x99, 0x88, 0x6d, 0x0c, 0x3e, 0x99, 0x96, 0x9e, 0xbd, 0xe9, 0x7f, 0x2c,
+ 0xc6, 0x68, 0x99, 0xe4, 0x39, 0xe0, 0xda, 0x85, 0xba, 0xc4, 0x14, 0x7d,
+ 0x90, 0xaa, 0x4e, 0x7d, 0x09, 0x1e, 0x65, 0x34, 0x5b, 0x1e, 0xb0, 0x86,
+ 0x2a, 0xfd, 0xe6, 0x55, 0x08, 0x0f, 0xb0, 0x43, 0x88, 0xec, 0xb8, 0x9c,
+ 0x30, 0x75, 0x2c, 0x31, 0x4d, 0xf6, 0x00, 0x83, 0xf7, 0x9b, 0x92, 0x8f,
+ 0xa8, 0xe9, 0xd7, 0x9e, 0xe9, 0x66, 0x4d, 0xa4, 0x17, 0xfd, 0xae, 0xe1,
+ 0x04, 0x70, 0x23, 0x5b, 0x24, 0xf4, 0xcc, 0x43, 0x68, 0x61, 0xf4, 0xcc,
+ 0xb4, 0xf5, 0xef, 0x4b, 0xf9, 0x66, 0x33, 0x44, 0xcf, 0x21, 0xcf, 0x06,
+ 0xd4, 0x2d, 0xd6, 0x20, 0xa3, 0xec, 0x85, 0x52, 0x73, 0xe8, 0x48, 0xf3,
+ 0x29, 0xa2, 0xd8, 0xf5, 0x84, 0x31, 0x57, 0xef, 0x32, 0xa8, 0x40, 0x7d,
+ 0x82, 0x1c, 0x47, 0x65, 0xc4, 0xe1, 0x83, 0xa9, 0x61, 0x8a, 0x6f, 0xb0,
+ 0x04, 0x1f, 0xbc, 0xdc, 0x94, 0x7d, 0x47, 0x4e, 0xbc, 0xf7, 0x4b, 0x32,
+ 0x6d, 0x20, 0xbf, 0xed, 0x77, 0x08, 0x23, 0x81, 0x1a, 0xd9, 0x27, 0xa6,
+ 0x62, 0x1b, 0x43, 0x0f, 0xa6, 0x65, 0xa7, 0xaf, 0x7a, 0x5f, 0xcb, 0x31,
+ 0x9a, 0x26, 0x79, 0x0e, 0x78, 0x36, 0xa1, 0x6e, 0xb1, 0x05, 0x1f, 0x64,
+ 0x2a, 0x93, 0x9f, 0x42, 0x47, 0x99, 0x4d, 0x16, 0xc7, 0xac, 0x21, 0x8a,
+ 0xbf, 0x79, 0x95, 0x42, 0x03, 0xec, 0x10, 0xe2, 0x3b, 0x2e, 0x27, 0x0c,
+ 0x1d, 0x4b, 0x0c, 0x53, 0x7d, 0x80, 0x20, 0xfd, 0xe6, 0xe4, 0xa3, 0xea,
+ 0x3a, 0x75, 0xe7, 0xba, 0x59, 0x93, 0x69, 0x05, 0xff, 0x6b, 0xb8, 0x41,
+ 0x1c, 0x08, 0xd6, 0xc9, 0x3d, 0x33, 0x10, 0xda, 0x18, 0x7d, 0x33, 0x2d,
+ 0x3d, 0x7b, 0xd2, 0xfe, 0x59, 0x8c, 0xd1, 0x33, 0xc8, 0x73, 0xc1, 0xb5,
+ 0x0b, 0x75, 0x88, 0x28, 0xfb, 0x21, 0x54, 0x9c, 0xfa, 0x12, 0x3c, 0xca,
+ 0x68, 0xb6, 0x3d, 0x61, 0x0c, 0x55, 0xfb, 0xcc, 0xaa, 0x10, 0x1f, 0x60,
+ 0x87, 0x11, 0xd9, 0x71, 0x38, 0x60, 0xea, 0x58, 0x62, 0x9b, 0xec, 0x01,
+ 0x07, 0xef, 0x37, 0x25, 0x1f, 0x51, 0xd3, 0xaf, 0x3d, 0xd2, 0xcc, 0x9b,
+ 0x48, 0x2f, 0xfb, 0x5d, 0xc2, 0x08, 0xe0, 0x46, 0xb6, 0x49, 0xe9, 0x98,
+ 0x86, 0xd0, 0xc3, 0xe9, 0x99, 0x69, 0xeb, 0xde, 0x97, 0xf2, 0xcc, 0x66,
+ 0x89, 0x9e, 0x43, 0x9e, 0x0d, 0xa8, 0x5b, 0xac, 0x41, 0x47, 0xd9, 0x0a,
+ 0xa4, 0xe7, 0xd0, 0x91, 0xe6, 0x53, 0x45, 0xb1, 0xeb, 0x08, 0x62, 0xaf,
+ 0xde, 0x65, 0x50, 0x80, 0xfb, 0x04, 0x38, 0x8e, 0xcb, 0x89, 0xc3, 0x07,
+ 0x52, 0xc3, 0x14, 0xdf, 0x60, 0x08, 0x3f, 0x79, 0xb9, 0x28, 0xfa, 0x8e,
+ 0x9d, 0x79, 0xee, 0x96, 0x64, 0xda, 0x41, 0x7f, 0xda, 0xee, 0x10, 0x47,
+ 0x02, 0x35, 0xb2, 0x4f, 0x4c, 0xc4, 0x36, 0x86, 0x1f, 0x4c, 0xcb, 0x4f,
+ 0x5e, 0xf4, 0xbf, 0x96, 0x63, 0x34, 0x4c, 0xf2, 0x1c, 0xf0, 0x6d, 0x42,
+ 0xdd, 0x62, 0x0a, 0x3e, 0xc8, 0x55, 0x27, 0x3e, 0x84, 0x8f, 0x32, 0x9a,
+ 0x2d, 0x8f, 0x58, 0x43, 0x15, 0x7e, 0xf3, 0x2a, 0x84, 0x07, 0xd8, 0x21,
+ 0xc4, 0x76, 0x5c, 0x4e, 0x18, 0x3a, 0x96, 0x18, 0xa6, 0xfb, 0x00, 0x41,
+ 0xfb, 0xcd, 0xc9, 0x47, 0xd4, 0x74, 0xeb, 0xcf, 0x74, 0xb3, 0x26, 0xd2,
+ 0x0b, 0xfe, 0xd7, 0x70, 0x82, 0x38, 0x11, 0xad, 0x92, 0x7a, 0x66, 0x21,
+ 0xb4, 0x30, 0xfa, 0x66, 0x5a, 0x7a, 0xf7, 0xa5, 0xfc, 0xb3, 0x19, 0xa2,
+ 0x67, 0x90, 0xe7, 0x83, 0x6a, 0x16, 0xeb, 0x10, 0x51, 0xf6, 0x42, 0xa9,
+ 0x39, 0xf4, 0x24, 0x79, 0x94, 0xd1, 0x6c, 0x7a, 0xc2, 0x18, 0xab, 0xf7,
+ 0x99, 0x54, 0x20, 0x3e, 0xc1, 0x0e, 0x23, 0xb2, 0xe2, 0x70, 0xc1, 0xd4,
+ 0xb0, 0xc5, 0x37, 0xd8, 0x02, 0x0f, 0xde, 0x6e, 0x4a, 0x3e, 0xa3, 0xa7,
+ 0x5e, 0x7b, 0xa5, 0x99, 0x36, 0x90, 0x5f, 0xf6, 0xbb, 0x84, 0x11, 0xc0,
+ 0x8d, 0x6c, 0x93, 0xd3, 0x31, 0x0d, 0xa1, 0x87, 0xd3, 0x32, 0xd3, 0xd7,
+ 0xbd, 0x2f, 0xe5, 0x98, 0xcd, 0x13, 0x3c, 0x87, 0x3c, 0x1b, 0x50, 0xb7,
+ 0x58, 0x82, 0x8f, 0xb2, 0x15, 0x49, 0xcf, 0xa1, 0x23, 0xcc, 0xa6, 0x8b,
+ 0x63, 0xd6, 0x10, 0xc5, 0x5f, 0xbc, 0xca, 0xa1, 0x01, 0xf6, 0x08, 0x71,
+ 0x1d, 0x97, 0x13, 0x86, 0x0e, 0xa5, 0x86, 0x29, 0xbe, 0xc0, 0x10, 0x7e,
+ 0xf3, 0x72, 0x51, 0xf5, 0x1d, 0x3a, 0xf3, 0xdd, 0x2c, 0xc9, 0xb4, 0x82,
+ 0xff, 0xb5, 0xdc, 0x20, 0x8e, 0x04, 0x6b, 0x64, 0x9e, 0x99, 0x88, 0x6d,
+ 0x0c, 0x3e, 0x99, 0x96, 0x9e, 0xbd, 0xe9, 0x7f, 0x2c, 0xc6, 0x68, 0x99,
+ 0xe4, 0x39, 0xe0, 0xda, 0x85, 0xba, 0xc4, 0x14, 0x7d, 0x90, 0xaa, 0x4e,
+ 0x7d, 0x09, 0x1e, 0x65, 0x34, 0x5b, 0x1e, 0xb0, 0x86, 0x2a, 0xfd, 0xe6,
+ 0x55, 0x08, 0x0f, 0xb0, 0x43, 0x88, 0xec, 0xb8, 0x9c, 0x30, 0x75, 0x2c,
+ 0x31, 0x4d, 0xf6, 0x00, 0x83, 0xf7, 0x9b, 0x92, 0x8f, 0xa8, 0xe9, 0xd7,
+ 0x9e, 0xe9, 0x66, 0x4d, 0xa4, 0x17, 0xfd, 0xae, 0xe1, 0x04, 0x70, 0x23,
+ 0x5b, 0x24, 0xf4, 0xcc, 0x43, 0x68, 0x61, 0xf4, 0xcc, 0xb4, 0xf5, 0xef,
+ 0x4b, 0xf9, 0x66, 0x33, 0x44, 0xcf, 0x21, 0xcf, 0x06, 0xd4, 0x2d, 0xd6,
+ 0x20, 0xa3, 0xec, 0x85, 0x52, 0x73, 0xe8, 0x48, 0xf3, 0x29, 0xa2, 0xd8,
+ 0xf5, 0x84, 0x31, 0x57, 0xef, 0x32, 0xa8, 0x40, 0x7d, 0x82, 0x1c, 0x47,
+ 0x65, 0xc4, 0xe1, 0x83, 0xa9, 0x61, 0x8a, 0x6f, 0xb0, 0x04, 0x1f, 0xbc,
+ 0xdc, 0x94, 0x7d, 0x47, 0x4e, 0xbc, 0xf7, 0x4b, 0x32, 0x6d, 0x20, 0xbf,
+ 0xed, 0x77, 0x08, 0x23, 0x81, 0x1a, 0xd9, 0x27, 0xa6, 0x62, 0x1b, 0x43,
+ 0x0f, 0xa6, 0x65, 0xa7, 0xaf, 0x7a, 0x5f, 0xcb, 0x31, 0x9a, 0x26, 0x79,
+ 0x0e, 0x78, 0x36, 0xa1, 0x6e, 0xb1, 0x05, 0x1f, 0x64, 0x2a, 0x93, 0x9f,
+ 0x42, 0x47, 0x99, 0x4d, 0x16, 0xc7, 0xac, 0x21, 0x8a, 0xbf, 0x79, 0x95,
+ 0x42, 0x03, 0xec, 0x10, 0xe2, 0x3b, 0x2e, 0x27, 0x0c, 0x1d, 0x4b, 0x0c,
+ 0x53, 0x7d, 0x80, 0x20, 0xfd, 0xe6, 0xe4, 0xa3, 0xea, 0x3a, 0x75, 0xe7,
+ 0xba, 0x59, 0x93, 0x69, 0x05, 0xff, 0x6b, 0xb8, 0x41, 0x1c, 0x08, 0xd6,
+ 0xc9, 0x3d, 0x33, 0x10, 0xda, 0x18, 0x7d, 0x33, 0x2d, 0x3d, 0x7b, 0xd2,
+ 0xfe, 0x59, 0x8c, 0xd1, 0x33, 0xc8, 0x73, 0xc1, 0xb5, 0x0b, 0x75, 0x88,
+ 0x28, 0xfb, 0x21, 0x54, 0x9c, 0xfa, 0x12, 0x3c, 0xca, 0x68, 0xb6, 0x3d,
+ 0x61, 0x0c, 0x55, 0xfb, 0xcc, 0xaa, 0x10, 0x1f, 0x60, 0x87, 0x11, 0xd9,
+ 0x71, 0x38, 0x60, 0xea, 0x58, 0x62, 0x9b, 0xec, 0x01, 0x07, 0xef, 0x37,
+ 0x25, 0x1f, 0x51, 0xd3, 0xaf, 0x3d, 0xd2, 0xcc, 0x9b, 0x48, 0x2f, 0xfb,
+ 0x5d, 0xc2, 0x08, 0xe0, 0x46, 0xb6, 0x49, 0xe9, 0x98, 0x86, 0xd0, 0xc3,
+ 0xe9, 0x99, 0x69, 0xeb, 0xde, 0x97, 0xf2, 0xcc, 0x66, 0x89, 0x9e, 0x43,
+ 0x9e, 0x0d, 0xa8, 0x5b, 0xac, 0x41, 0x47, 0xd9, 0x0a, 0xa4, 0xe7, 0xd0,
+ 0x91, 0xe6, 0x53, 0x45, 0xb1, 0xeb, 0x08, 0x62, 0xaf, 0xde, 0x65, 0x50,
+ 0x80, 0xfb, 0x04, 0x38, 0x8e, 0xcb, 0x89, 0xc3, 0x07, 0x52, 0xc3, 0x14,
+ 0xdf, 0x60, 0x08, 0x3f, 0x79, 0xb9, 0x28, 0xfa, 0x8e, 0x9d, 0x79, 0xee,
+ 0x96, 0x64, 0xda, 0x41, 0x7f, 0xda, 0xee, 0x10, 0x47, 0x02, 0x35, 0xb2,
+ 0x4f, 0x4c, 0xc4, 0x36, 0x86, 0x1f, 0x4c, 0xcb, 0x4f, 0x5e, 0xf4, 0xbf,
+ 0x96, 0x63, 0x34, 0x4c, 0xf2, 0x1c, 0xf0, 0x6d, 0x42, 0xdd, 0x62, 0x0a,
+ 0x3e, 0xc8, 0x55, 0x27, 0x3e, 0x84, 0x8f, 0x32, 0x9a, 0x2d, 0x8f, 0x58,
+ 0x43, 0x15, 0x7e, 0xf3, 0x2a, 0x84, 0x07, 0xd8, 0x21, 0xc4, 0x76, 0x5c,
+ 0x4e, 0x18, 0x3a, 0x96, 0x18, 0xa6, 0xfb, 0x00, 0x41, 0xfb, 0xcd, 0xc9,
+ 0x47, 0xd4, 0x74, 0xeb, 0xcf, 0x74, 0xb3, 0x26, 0xd2, 0x0b, 0xfe, 0xd7,
+ 0x70, 0x82, 0x38, 0x11, 0xad, 0x92, 0x7a, 0x66, 0x21, 0xb4, 0x30, 0xfa,
+ 0x66, 0x5a, 0x7a, 0xf7, 0xa5, 0xfc, 0xb3, 0x19, 0xa2, 0x67, 0x90, 0xe7,
+ 0x83, 0x6a, 0x16, 0xeb, 0x10, 0x51, 0xf6, 0x42, 0xa9, 0x39, 0xf4, 0x24,
+ 0x79, 0x94, 0xd1, 0x6c, 0x7a, 0xc2, 0x18, 0xab, 0xf7, 0x99, 0x54, 0x20,
+ 0x3e, 0xc1, 0x0e, 0x23, 0xb2, 0xe2, 0x70, 0xc1, 0xd4, 0xb0, 0xc5, 0x37,
+ 0xd8, 0x02, 0x0f, 0xde, 0x6e, 0x4a, 0x3e, 0xa3, 0xa7, 0x5e, 0x7b, 0xa5,
+ 0x99, 0x36, 0x90, 0x5f, 0xf6, 0xbb, 0x84, 0x11, 0xc0, 0x8d, 0x6c, 0x93,
+ 0xd3, 0x31, 0x0d, 0xa1, 0x87, 0xd3, 0x32, 0xd3, 0xd7, 0xbd, 0x2f, 0xe5,
+ 0x98, 0xcd, 0x13, 0x3c, 0x87, 0x3c, 0x1b, 0x50, 0xb7, 0x58, 0x82, 0x8f,
+ 0xb2, 0x15, 0x49, 0xcf, 0xa1, 0x23, 0xcc, 0xa6, 0x8b, 0x63, 0xd6, 0x10,
+ 0xc5, 0x5f, 0xbc, 0xca, 0xa1, 0x01, 0xf6, 0x08, 0x71, 0x1d, 0x97, 0x13,
+ 0x86, 0x0e, 0xa5, 0x86, 0x29, 0xbe, 0xc0, 0x10, 0x7e, 0xf3, 0x72, 0x51,
+ 0xf5, 0x1d, 0x3a, 0xf3, 0xdd, 0x2c, 0xc9, 0xb4, 0x82, 0xff, 0xb5, 0xdc,
+ 0x20, 0x8e, 0x04, 0x6b, 0x64, 0x9e, 0x99, 0x88, 0x6d, 0x0c, 0x3e, 0x99,
+ 0x96, 0x9e, 0xbd, 0xe9, 0x7f, 0x2c, 0xc6, 0x68, 0x99, 0xe4, 0x39, 0xe0,
+ 0xda, 0x85, 0xba, 0xc4, 0x14, 0x7d, 0x90, 0xaa, 0x4e, 0x7d, 0x09, 0x1e,
+ 0x65, 0x34, 0x5b, 0x1e, 0xb0, 0x86, 0x2a, 0xfd, 0xe6, 0x55, 0x08, 0x0f,
+ 0xb0, 0x43, 0x88, 0xec, 0xb8, 0x9c, 0x30, 0x75, 0x2c, 0x31, 0x4d, 0xf6,
+ 0x00, 0x83, 0xf7, 0x9b, 0x92, 0x8f, 0xa8, 0xe9, 0xd7, 0x9e, 0xe9, 0x66,
+ 0x4d, 0xa4, 0x17, 0xfd, 0xae, 0xe1, 0x04, 0x70, 0x23, 0x5b, 0x24, 0xf4,
+ 0xcc, 0x43, 0x68, 0x61, 0xf4, 0xcc, 0xb4, 0xf5, 0xef, 0x4b, 0xf9, 0x66,
+ 0x33, 0x44, 0xcf, 0x21, 0xcf, 0x06, 0xd4, 0x2d, 0xd6, 0x20, 0xa3, 0xec,
+ 0x85, 0x52, 0x73, 0xe8, 0x48, 0xf3, 0x29, 0xa2, 0xd8, 0xf5, 0x84, 0x31,
+ 0x57, 0xef, 0x32, 0xa8, 0x40, 0x7d, 0x82, 0x1c, 0x47, 0x65, 0xc4, 0xe1,
+ 0x83, 0xa9, 0x61, 0x8a, 0x6f, 0xb0, 0x04, 0x1f, 0xbc, 0xdc, 0x94, 0x7d,
+ 0x47, 0x4e, 0xbc, 0xf7, 0x4b, 0x32, 0x6d, 0x20, 0xbf, 0xed, 0x77, 0x08,
+ 0x23, 0x81, 0x1a, 0xd9, 0x27, 0xa6, 0x62, 0x1b, 0x43, 0x0f, 0xa6, 0x65,
+ 0xa7, 0xaf, 0x7a, 0x5f, 0xcb, 0x31, 0x9a, 0x26, 0x79, 0x0e, 0x78, 0x36,
+ 0xa1, 0x6e, 0xb1, 0x05, 0x1f, 0x64, 0x2a, 0x93, 0x9f, 0x42, 0x47, 0x99,
+ 0x4d, 0x16, 0xc7, 0xac, 0x21, 0x8a, 0xbf, 0x79, 0x95, 0x42, 0x03, 0xec,
+ 0x10, 0xe2, 0x3b, 0x2e, 0x27, 0x0c, 0x1d, 0x4b, 0x0c, 0x53, 0x7d, 0x80,
+ 0x20, 0xfd, 0xe6, 0xe4, 0xa3, 0xea, 0x3a, 0x75, 0xe7, 0xba, 0x59, 0x93,
+ 0x69, 0x05, 0xff, 0x6b, 0xb8, 0x41, 0x1c, 0x08, 0xd6, 0xc9, 0x3d, 0x33,
+ 0x10, 0xda, 0x18, 0x7d, 0x33, 0x2d, 0x3d, 0x7b, 0xd2, 0xfe, 0x59, 0x8c,
+ 0xd1, 0x33, 0xc8, 0x73, 0xc1, 0xb5, 0x0b, 0x75, 0x88, 0x28, 0xfb, 0x21,
+ 0x54, 0x9c, 0xfa, 0x12, 0x3c, 0xca, 0x68, 0xb6, 0x3d, 0x61, 0x0c, 0x55,
+ 0xfb, 0xcc, 0xaa, 0x10, 0x1f, 0x60, 0x87, 0x11, 0xd9, 0x71, 0x38, 0x60,
+ 0xea, 0x58, 0x62, 0x9b, 0xec, 0x01, 0x07, 0xef, 0x37, 0x25, 0x1f, 0x51,
+ 0xd3, 0xaf, 0x3d, 0xd2, 0xcc, 0x9b, 0x48, 0x2f, 0xfb, 0x5d, 0xc2, 0x08,
+ 0xe0, 0x46, 0xb6, 0x49, 0xe9, 0x98, 0x86, 0xd0, 0xc3, 0xe9, 0x99, 0x69,
+ 0xeb, 0xde, 0x97, 0xf2, 0xcc, 0x66, 0x89, 0x9e, 0x43, 0x9e, 0x0d, 0xa8,
+ 0x5b, 0xac, 0x41, 0x47, 0xd9, 0x0a, 0xa4, 0xe7, 0xd0, 0x91, 0xe6, 0x53,
+ 0x45, 0xb1, 0xeb, 0x08, 0x62, 0xaf, 0xde, 0x65, 0x50, 0x80, 0xfb, 0x04,
+ 0x38, 0x8e, 0xcb, 0x89, 0xc3, 0x07, 0x52, 0xc3, 0x14, 0xdf, 0x60, 0x08,
+ 0x3f, 0x79, 0xb9, 0x28, 0xfa, 0x8e, 0x9d, 0x79, 0xee, 0x96, 0x64, 0xda,
+ 0x41, 0x7f, 0xda, 0xee, 0x10, 0x47, 0x02, 0x35, 0xb2, 0x4f, 0x4c, 0xc4,
+ 0x36, 0x86, 0x1f, 0x4c, 0xcb, 0x4f, 0x5e, 0xf4, 0xbf, 0x96, 0x63, 0x34,
+ 0x4c, 0xf2, 0x1c, 0xf0, 0x6d, 0x42, 0xdd, 0x62, 0x0a, 0x3e, 0xc8, 0x55,
+ 0x27, 0x3e, 0x84, 0x8f, 0x32, 0x9a, 0x2d, 0x8f, 0x58, 0x43, 0x15, 0x7e,
+ 0xf3, 0x2a, 0x84, 0x07, 0xd8, 0x21, 0xc4, 0x76, 0x5c, 0x4e, 0x18, 0x3a,
+ 0x96, 0x18, 0xa6, 0xfb, 0x00, 0x41, 0xfb, 0xcd, 0xc9, 0x47, 0xd4, 0x74,
+ 0xeb, 0xcf, 0x74, 0xb3, 0x26, 0xd2, 0x0b, 0xfe, 0xd7, 0x70, 0x82, 0x38,
+ 0x11, 0xad, 0x92, 0x7a, 0x66, 0x21, 0xb4, 0x30, 0xfa, 0x66, 0x5a, 0x7a,
+ 0xf7, 0xa5, 0xfc, 0xb3, 0x19, 0xa2, 0x67, 0x90, 0xe7, 0x83, 0x6a, 0x16,
+ 0xeb, 0x10, 0x51, 0xf6, 0x42, 0xa9, 0x39, 0xf4, 0x24, 0x79, 0x94, 0xd1,
+ 0x6c, 0x7a, 0xc2, 0x18, 0xab, 0xf7, 0x99, 0x54, 0x20, 0x3e, 0xc1, 0x0e,
+ 0x23, 0xb2, 0xe2, 0x70, 0xc1, 0xd4, 0xb0, 0xc5, 0x37, 0xd8, 0x02, 0x0f,
+ 0xde, 0x6e, 0x4a, 0x3e, 0xa3, 0xa7, 0x5e, 0x7b, 0xa5, 0x99, 0x36, 0x90,
+ 0x5f, 0xf6, 0xbb, 0x84, 0x11, 0xc0, 0x8d, 0x6c, 0x93, 0xd3, 0x31, 0x0d,
+ 0xa1, 0x87, 0xd3, 0x32, 0xd3, 0xd7, 0xbd, 0x2f, 0xe5, 0x98, 0xcd, 0x13,
+ 0x3c, 0x87, 0x3c, 0x1b, 0x50, 0xb7, 0x58, 0x82, 0x8f, 0xb2, 0x15, 0x49,
+ 0xcf, 0xa1, 0x23, 0xcc, 0xa6, 0x8b, 0x63, 0xd6, 0x10, 0xc5, 0x5f, 0xbc,
+ 0xca, 0xa1, 0x01, 0xf6, 0x08, 0x71, 0x1d, 0x97, 0x13, 0x86, 0x0e, 0xa5,
+ 0x86, 0x29, 0xbe, 0xc0, 0x10, 0x7e, 0xf3, 0x72, 0x51, 0xf5, 0x1d, 0x3a,
+ 0xf3, 0xdd, 0x2c, 0xc9, 0xb4, 0x82, 0xff, 0xb5, 0xdc, 0x20, 0x8e, 0x04,
+ 0x6b, 0x64, 0x9e, 0x99, 0x88, 0x6d, 0x0c, 0x3e, 0x99, 0x96, 0x9e, 0xbd,
+ 0xe9, 0x7f, 0x2c, 0xc6, 0x68, 0x99, 0xe4, 0x39, 0xe0, 0xda, 0x85, 0xba,
+ 0xc4, 0x14, 0x7d, 0x90, 0xaa, 0x4e, 0x7d, 0x09, 0x1e, 0x65, 0x34, 0x5b,
+ 0x1e, 0xb0, 0x86, 0x2a, 0xfd, 0xe6, 0x55, 0x08, 0x0f, 0xb0, 0x43, 0x88,
+ 0xec, 0xb8, 0x9c, 0x30, 0x75, 0x2c, 0x31, 0x4d, 0xf6, 0x00, 0x83, 0xf7,
+ 0x9b, 0x92, 0x8f, 0xa8, 0xe9, 0xd7, 0x9e, 0xe9, 0x66, 0x4d, 0xa4, 0x17,
+ 0xfd, 0xae, 0xe1, 0x04, 0x70, 0x23, 0x5b, 0x24, 0xf4, 0xcc, 0x43, 0x68,
+ 0x61, 0xf4, 0xcc, 0xb4, 0xf5, 0xef, 0x4b, 0xf9, 0x66, 0x33, 0x44, 0xcf,
+ 0x21, 0xcf, 0x06, 0xd4, 0x2d, 0xd6, 0x20, 0xa3, 0xec, 0x85, 0x52, 0x73,
+ 0xe8, 0x48, 0xf3, 0x29, 0xa2, 0xd8, 0xf5, 0x84, 0x31, 0x57, 0xef, 0x32,
+ 0xa8, 0x40, 0x7d, 0x82, 0x1c, 0x47, 0x65, 0xc4, 0xe1, 0x83, 0xa9, 0x61,
+ 0x8a, 0x6f, 0xb0, 0x04, 0x1f, 0xbc, 0xdc, 0x94, 0x7d, 0x47, 0x4e, 0xbc,
+ 0xf7, 0x4b, 0x32, 0x6d, 0x20, 0xbf, 0xed, 0x77, 0x08, 0x23, 0x81, 0x1a,
+ 0xd9, 0x27, 0xa6, 0x62, 0x1b, 0x43, 0x0f, 0xa6, 0x65, 0xa7, 0xaf, 0x7a,
+ 0x5f, 0xcb, 0x31, 0x9a, 0x26, 0x79, 0x0e, 0x78, 0x36, 0xa1, 0x6e, 0xb1,
+ 0x05, 0x1f, 0x64, 0x2a, 0x93, 0x9f, 0x42, 0x47, 0x99, 0x4d, 0x16, 0xc7,
+ 0xac, 0x21, 0x8a, 0xbf, 0x79, 0x95, 0x42, 0x03, 0xec, 0x10, 0xe2, 0x3b,
+ 0x2e, 0x27, 0x0c, 0x1d, 0x4b, 0x0c, 0x53, 0x7d, 0x80, 0x20, 0xfd, 0xe6,
+ 0xe4, 0xa3, 0xea, 0x3a, 0x75, 0xe7, 0xba, 0x59, 0x93, 0x69, 0x05, 0xff,
+ 0x6b, 0xb8, 0x41, 0x1c, 0x08, 0xd6, 0xc9, 0x3d, 0x33, 0x10, 0xda, 0x18,
+ 0x7d, 0x33, 0x2d, 0x3d, 0x7b, 0xd2, 0xfe, 0x59, 0x8c, 0xd1, 0x33, 0xc8,
+ 0x73, 0xc1, 0xb5, 0x0b, 0x75, 0x88, 0x28, 0xfb, 0x21, 0x54, 0x9c, 0xfa,
+ 0x12, 0x3c, 0xca, 0x68, 0xb6, 0x3d, 0x61, 0x0c, 0x55, 0xfb, 0xcc, 0xaa,
+ 0x10, 0x1f, 0x60, 0x87, 0x11, 0xd9, 0x71, 0x38, 0x60, 0xea, 0x58, 0x62,
+ 0x9b, 0xec, 0x01, 0x07, 0xef, 0x37, 0x25, 0x1f, 0x51, 0xd3, 0xaf, 0x3d,
+ 0xd2, 0xcc, 0x9b, 0x48, 0x2f, 0xfb, 0x5d, 0xc2, 0x08, 0xe0, 0x46, 0xb6,
+ 0x49, 0xe9, 0x98, 0x86, 0xd0, 0xc3, 0xe9, 0x99, 0x69, 0xeb, 0xde, 0x97,
+ 0xf2, 0xcc, 0x66, 0x89, 0x9e, 0x43, 0x9e, 0x0d, 0xa8, 0x5b, 0xac, 0x41,
+ 0x47, 0xd9, 0x0a, 0xa4, 0xe7, 0xd0, 0x91, 0xe6, 0x53, 0x45, 0xb1, 0xeb,
+ 0x08, 0x62, 0xaf, 0xde, 0x65, 0x50, 0x80, 0xfb, 0x04, 0x38, 0x8e, 0xcb,
+ 0x89, 0xc3, 0x07, 0x52, 0xc3, 0x14, 0xdf, 0x60, 0x08, 0x3f, 0x79, 0xb9,
+ 0x28, 0xfa, 0x8e, 0x9d, 0x79, 0xee, 0x96, 0x64, 0xda, 0x41, 0x7f, 0xda,
+ 0xee, 0x10, 0x47, 0x02, 0x35, 0xb2, 0x4f, 0x4c, 0xc4, 0x36, 0x86, 0x1f,
+ 0x4c, 0xcb, 0x4f, 0x5e, 0xf4, 0xbf, 0x96, 0x63, 0x34, 0x4c, 0xf2, 0x1c,
+ 0xf0, 0x6d, 0x42, 0xdd, 0x62, 0x0a, 0x3e, 0xc8, 0x55, 0x27, 0x3e, 0x84,
+ 0x8f, 0x32, 0x9a, 0x2d, 0x8f, 0x58, 0x43, 0x15, 0x7e, 0xf3, 0x2a, 0x84,
+ 0x07, 0xd8, 0x21, 0xc4, 0x76, 0x5c, 0x4e, 0x18, 0x3a, 0x96, 0x18, 0xa6,
+ 0xfb, 0x00, 0x41, 0xfb, 0xcd, 0xc9, 0x47, 0xd4, 0x74, 0xeb, 0xcf, 0x74,
+ 0xb3, 0x26, 0xd2, 0x0b, 0xfe, 0xd7, 0x70, 0x82, 0x38, 0x11, 0xad, 0x92,
+ 0x7a, 0x66, 0x21, 0xb4, 0x30, 0xfa, 0x66, 0x5a, 0x7a, 0xf7, 0xa5, 0xfc,
+ 0xb3, 0x19, 0xa2, 0x67, 0x90, 0xe7, 0x83, 0x6a, 0x16, 0xeb, 0x10, 0x51,
+ 0xf6, 0x42, 0xa9, 0x39, 0xf4, 0x24, 0x79, 0x94, 0xd1, 0x6c, 0x7a, 0xc2,
+ 0x18, 0xab, 0xf7, 0x99, 0x54, 0x20, 0x3e, 0xc1, 0x0e, 0x23, 0xb2, 0xe2,
+ 0x70, 0xc1, 0xd4, 0xb0, 0xc5, 0x37, 0xd8, 0x02, 0x0f, 0xde, 0x6e, 0x4a,
+ 0x3e, 0xa3, 0xa7, 0x5e, 0x7b, 0xa5, 0x99, 0x36, 0x90, 0x5f, 0xf6, 0xbb,
+ 0x84, 0x11, 0xc0, 0x8d, 0x6c, 0x93, 0xd3, 0x31, 0x0d, 0xa1, 0x87, 0xd3,
+ 0x32, 0xd3, 0xd7, 0xbd, 0x2f, 0xe5, 0x98, 0xcd, 0x13, 0x3c, 0x87, 0x3c,
+ 0x1b, 0x50, 0xb7, 0x58, 0x82, 0x8f, 0xb2, 0x15, 0x49, 0xcf, 0xa1, 0x23,
+ 0xcc, 0xa6, 0x8b, 0x63, 0xd6, 0x10, 0xc5, 0x5f, 0xbc, 0xca, 0xa1, 0x01,
+ 0xf6, 0x08, 0x71, 0x1d, 0x97, 0x13, 0x86, 0x0e, 0xa5, 0x86, 0x29, 0xbe,
+ 0xc0, 0x10, 0x7e, 0xf3, 0x72, 0x51, 0xf5, 0x1d, 0x3a, 0xf3, 0xdd, 0x2c,
+ 0xc9, 0xb4, 0x82, 0xff, 0xb5, 0xdc, 0x20, 0x8e, 0x04, 0x6b, 0x64, 0x9e,
+ 0x99, 0x88, 0x6d, 0x0c, 0x3e, 0x99, 0x96, 0x9e, 0xbd, 0xe9, 0x7f, 0x2c,
+ 0xc6, 0x68, 0x99, 0xe4, 0x39, 0xe0, 0xda, 0x85, 0xba, 0xc4, 0x14, 0x7d,
+ 0x90, 0xaa, 0x4e, 0x7d, 0x09, 0x1e, 0x65, 0x34, 0x5b, 0x1e, 0xb0, 0x86,
+ 0x2a, 0xfd, 0xe6, 0x55, 0x08, 0x0f, 0xb0, 0x43, 0x88, 0xec, 0xb8, 0x9c,
+ 0x30, 0x75, 0x2c, 0x31, 0x4d, 0xf6, 0x00, 0x83, 0xf7, 0x9b, 0x92, 0x8f,
+ 0xa8, 0xe9, 0xd7, 0x9e, 0xe9, 0x66, 0x4d, 0xa4, 0x17, 0xfd, 0xae, 0xe1,
+ 0x04, 0x70, 0x23, 0x5b, 0x24, 0xf4, 0xcc, 0x43, 0x68, 0x61, 0xf4, 0xcc,
+ 0xb4, 0xf5, 0xef, 0x4b, 0xf9, 0x66, 0x33, 0x44, 0xcf, 0x21, 0xcf, 0x06,
+ 0xd4, 0x2d, 0xd6, 0x20, 0xa3, 0xec, 0x85, 0x52, 0x73, 0xe8, 0x48, 0xf3,
+ 0x29, 0xa2, 0xd8, 0xf5, 0x84, 0x31, 0x57, 0xef, 0x32, 0xa8, 0x40, 0x7d,
+ 0x82, 0x1c, 0x47, 0x65, 0xc4, 0xe1, 0x83, 0xa9, 0x61, 0x8a, 0x6f, 0xb0,
+ 0x04, 0x1f, 0xbc, 0xdc, 0x94, 0x7d, 0x47, 0x4e, 0xbc, 0xf7, 0x4b, 0x32,
+ 0x6d, 0x20, 0xbf, 0xed, 0x77, 0x08, 0x23, 0x81, 0x1a, 0xd9, 0x27, 0xa6,
+ 0x62, 0x1b, 0x43, 0x0f, 0xa6, 0x65, 0xa7, 0xaf, 0x7a, 0x5f, 0xcb, 0x31,
+ 0x9a, 0x26, 0x79, 0x0e, 0x78, 0x36, 0xa1, 0x6e, 0xb1, 0x05, 0x1f, 0x64,
+ 0x2a, 0x93, 0x9f, 0x42, 0x47, 0x99, 0x4d, 0x16, 0xc7, 0xac, 0x21, 0x8a,
+ 0xbf, 0x79, 0x95, 0x42, 0x03, 0xec, 0x10, 0xe2, 0x3b, 0x2e, 0x27, 0x0c,
+ 0x1d, 0x4b, 0x0c, 0x53, 0x7d, 0x80, 0x20, 0xfd, 0xe6, 0xe4, 0xa3, 0xea,
+ 0x3a, 0x75, 0xe7, 0xba, 0x59, 0x93, 0x69, 0x05, 0xff, 0x6b, 0xb8, 0x41,
+ 0x1c, 0x08, 0xd6, 0xc9, 0x3d, 0x33, 0x10, 0xda, 0x18, 0x7d, 0x33, 0x2d,
+ 0x3d, 0x7b, 0xd2, 0xfe, 0x59, 0x8c, 0xd1, 0x33, 0xc8, 0x73, 0xc1, 0xb5,
+ 0x0b, 0x75, 0x88, 0x28, 0xfb, 0x21, 0x54, 0x9c, 0xfa, 0x12, 0x3c, 0xca,
+ 0x68, 0xb6, 0x3d, 0x61, 0x0c, 0x55, 0xfb, 0xcc, 0xaa, 0x10, 0x1f, 0x60,
+ 0x87, 0x11, 0xd9, 0x71, 0x38, 0x60, 0xea, 0x58, 0x62, 0x9b, 0xec, 0x01,
+ 0x07, 0xef, 0x37, 0x25, 0x1f, 0x51, 0xd3, 0xaf, 0x3d, 0xd2, 0xcc, 0x9b,
+ 0x48, 0x2f, 0xfb, 0x5d, 0xc2, 0x08, 0xe0, 0x46, 0xb6, 0x49, 0xe9, 0x98,
+ 0x86, 0xd0, 0xc3, 0xe9, 0x99, 0x69, 0xeb, 0xde, 0x97, 0xf2, 0xcc, 0x66,
+ 0x89, 0x9e, 0x43, 0x9e, 0x0d, 0xa8, 0x5b, 0xac, 0x41, 0x47, 0xd9, 0x0a,
+ 0xa4, 0xe7, 0xd0, 0x91, 0xe6, 0x53, 0x45, 0xb1, 0xeb, 0x08, 0x62, 0xaf,
+ 0xde, 0x65, 0x50, 0x80, 0xfb, 0x04, 0x38, 0x8e, 0xcb, 0x89, 0xc3, 0x07,
+ 0x52, 0xc3, 0x14, 0xdf, 0x60, 0x08, 0x3f, 0x79, 0xb9, 0x28, 0xfa, 0x8e,
+ 0x9d, 0x79, 0xee, 0x96, 0x64, 0xda, 0x41, 0x7f, 0xda, 0xee, 0x10, 0x47,
+ 0x02, 0x35, 0xb2, 0x4f, 0x4c, 0xc4, 0x36, 0x86, 0x1f, 0x4c, 0xcb, 0x4f,
+ 0x5e, 0xf4, 0xbf, 0x96, 0x63, 0x34, 0x4c, 0xf2, 0x1c, 0xf0, 0x6d, 0x42,
+ 0xdd, 0x62, 0x0a, 0x3e, 0xc8, 0x55, 0x27, 0x3e, 0x84, 0x8f, 0x32, 0x9a,
+ 0x2d, 0x8f, 0x58, 0x43, 0x15, 0x7e, 0xf3, 0x2a, 0x84, 0x07, 0xd8, 0x21,
+ 0xc4, 0x76, 0x5c, 0x4e, 0x18, 0x3a, 0x96, 0x18, 0xa6, 0xfb, 0x00, 0x41,
+ 0xfb, 0xcd, 0xc9, 0x47, 0xd4, 0x74, 0xeb, 0xcf, 0x74, 0xb3, 0x26, 0xd2,
+ 0x0b, 0xfe, 0xd7, 0x70, 0x82, 0x38, 0x11, 0xad, 0x92, 0x7a, 0x66, 0x21,
+ 0xb4, 0x30, 0xfa, 0x66, 0x5a, 0x7a, 0xf7, 0xa5, 0xfc, 0xb3, 0x19, 0xa2,
+ 0x67, 0x90, 0xe7, 0x83, 0x6a, 0x16, 0xeb, 0x10, 0x51, 0xf6, 0x42, 0xa9,
+ 0x39, 0xf4, 0x24, 0x79, 0x94, 0xd1, 0x6c, 0x7a, 0xc2, 0x18, 0xab, 0xf7,
+ 0x99, 0x54, 0x20, 0x3e, 0xc1, 0x0e, 0x23, 0xb2, 0xe2, 0x70, 0xc1, 0xd4,
+ 0xb0, 0xc5, 0x37, 0xd8, 0x02, 0x0f, 0xde, 0x6e, 0x4a, 0x3e, 0xa3, 0xa7,
+ 0x5e, 0x7b, 0xa5, 0x99, 0x36, 0x90, 0x5f, 0xf6, 0xbb, 0x84, 0x11, 0xc0,
+ 0x8d, 0x6c, 0x93, 0xd3, 0x31, 0x0d, 0xa1, 0x87, 0xd3, 0x32, 0xd3, 0xd7,
+ 0xbd, 0x2f, 0xe5, 0x98, 0xcd, 0x13, 0x3c, 0x87, 0x3c, 0x1b, 0x50, 0xb7,
+ 0x58, 0x82, 0x8f, 0xb2, 0x15, 0x49, 0xcf, 0xa1, 0x23, 0xcc, 0xa6, 0x8b,
+ 0x63, 0xd6, 0x10, 0xc5, 0x5f, 0xbc, 0xca, 0xa1, 0x01, 0xf6, 0x08, 0x71,
+ 0x1d, 0x97, 0x13, 0x86, 0x0e, 0xa5, 0x86, 0x29, 0xbe, 0xc0, 0x10, 0x7e,
+ 0xf3, 0x72, 0x51, 0xf5, 0x1d, 0x3a, 0xf3, 0xdd, 0x2c, 0xc9, 0xb4, 0x82,
+ 0xff, 0xb5, 0xdc, 0x20, 0x8e, 0x04, 0x6b, 0x64, 0x9e, 0x99, 0x88, 0x6d,
+ 0x0c, 0x3e, 0x99, 0x96, 0x9e, 0xbd, 0xe9, 0x7f, 0x2c, 0xc6, 0x68, 0x99,
+ 0xe4, 0x39, 0xe0, 0xda, 0x85, 0xba, 0xc4, 0x14, 0x7d, 0x90, 0xaa, 0x4e,
+ 0x7d, 0x09, 0x1e, 0x65, 0x34, 0x5b, 0x1e, 0xb0, 0x86, 0x2a, 0xfd, 0xe6,
+ 0x55, 0x08, 0x0f, 0xb0, 0x43, 0x88, 0xec, 0xb8, 0x9c, 0x30, 0x75, 0x2c,
+ 0x31, 0x4d, 0xf6, 0x00, 0x83, 0xf7, 0x9b, 0x92, 0x8f, 0xa8, 0xe9, 0xd7,
+ 0x9e, 0xe9, 0x66, 0x4d, 0xa4, 0x17, 0xfd, 0xae, 0xe1, 0x04, 0x70, 0x23,
+ 0x5b, 0x24, 0xf4, 0xcc, 0x43, 0x68, 0x61, 0xf4, 0xcc, 0xb4, 0xf5, 0xef,
+ 0x4b, 0xf9, 0x66, 0x33, 0x44, 0xcf, 0x21, 0xcf, 0x06, 0xd4, 0x2d, 0xd6,
+ 0x20, 0xa3, 0xec, 0x85, 0x52, 0x73, 0xe8, 0x48, 0xf3, 0x29, 0xa2, 0xd8,
+ 0xf5, 0x84, 0x31, 0x57, 0xef, 0x32, 0xa8, 0x40, 0x7d, 0x82, 0x1c, 0x47,
+ 0x65, 0xc4, 0xe1, 0x83, 0xa9, 0x61, 0x8a, 0x6f, 0xb0, 0x04, 0x1f, 0xbc,
+ 0xdc, 0x94, 0x7d, 0x47, 0x4e, 0xbc, 0xf7, 0x4b, 0x32, 0x6d, 0x20, 0xbf,
+ 0xed, 0x77, 0x08, 0x23, 0x81, 0x1a, 0xd9, 0x27, 0xa6, 0x62, 0x1b, 0x43,
+ 0x0f, 0xa6, 0x65, 0xa7, 0xaf, 0x7a, 0x5f, 0xcb, 0x31, 0x9a, 0x26, 0x79,
+ 0x0e, 0x78, 0x36, 0xa1, 0x6e, 0xb1, 0x05, 0x1f, 0x64, 0x2a, 0x93, 0x9f,
+ 0x42, 0x47, 0x99, 0x4d, 0x16, 0xc7, 0xac, 0x21, 0x8a, 0xbf, 0x79, 0x95,
+ 0x42, 0x03, 0xec, 0x10, 0xe2, 0x3b, 0x2e, 0x27, 0x0c, 0x1d, 0x4b, 0x0c,
+ 0x53, 0x7d, 0x80, 0x20, 0xfd, 0xe6, 0xe4, 0xa3, 0xea, 0x3a, 0x75, 0xe7,
+ 0xba, 0x59, 0x93, 0x69, 0x05, 0xff, 0x6b, 0xb8, 0x41, 0x1c, 0x08, 0xd6,
+ 0xc9, 0x3d, 0x33, 0x10, 0xda, 0x18, 0x7d, 0x33, 0x2d, 0x3d, 0x7b, 0xd2,
+ 0xfe, 0x59, 0x8c, 0xd1, 0x33, 0xc8, 0x73, 0xc1, 0xb5, 0x0b, 0x75, 0x88,
+ 0x28, 0xfb, 0x21, 0x54, 0x9c, 0xfa, 0x12, 0x3c, 0xca, 0x68, 0xb6, 0x3d,
+ 0x61, 0x0c, 0x55, 0xfb, 0xcc, 0xaa, 0x10, 0x1f, 0x60, 0x87, 0x11, 0xd9,
+ 0x71, 0x38, 0x60, 0xea, 0x58, 0x62, 0x9b, 0xec, 0x01, 0x07, 0xef, 0x37,
+ 0x25, 0x1f, 0x51, 0xd3, 0xaf, 0x3d, 0xd2, 0xcc, 0x9b, 0x48, 0x2f, 0xfb,
+ 0x5d, 0xc2, 0x08, 0xe0, 0x46, 0xb6, 0x49, 0xe9, 0x98, 0x86, 0xd0, 0xc3,
+ 0xe9, 0x99, 0x69, 0xeb, 0xde, 0x97, 0xf2, 0xcc, 0x66, 0x89, 0x9e, 0x43,
+ 0x9e, 0x0d, 0xa8, 0x5b, 0xac, 0x41, 0x47, 0xd9, 0x0a, 0xa4, 0xe7, 0xd0,
+ 0x91, 0xe6, 0x53, 0x45, 0xb1, 0xeb, 0x08, 0x62, 0xaf, 0xde, 0x65, 0x50,
+ 0x80, 0xfb, 0x04, 0x38, 0x8e, 0xcb, 0x89, 0xc3, 0x07, 0x52, 0xc3, 0x14,
+ 0xdf, 0x60, 0x08, 0x3f, 0x79, 0xb9, 0x28, 0xfa, 0x8e, 0x9d, 0x79, 0xee,
+ 0x96, 0x64, 0xda, 0x41, 0x7f, 0xda, 0xee, 0x10, 0x47, 0x02, 0x35, 0xb2,
+ 0x4f, 0x4c, 0xc4, 0x36, 0x86, 0x1f, 0x4c, 0xcb, 0x4f, 0x5e, 0xf4, 0xbf,
+ 0x96, 0x63, 0x34, 0x4c, 0xf2, 0x1c, 0xf0, 0x6d, 0x42, 0xdd, 0x62, 0x0a,
+ 0x3e, 0xc8, 0x55, 0x27, 0x3e, 0x84, 0x8f, 0x32, 0x9a, 0x2d, 0x8f, 0x58,
+ 0x43, 0x15, 0x7e, 0xf3, 0x2a, 0x84, 0x07, 0xd8, 0x21, 0xc4, 0x76, 0x5c,
+ 0x4e, 0x18, 0x3a, 0x96, 0x18, 0xa6, 0xfb, 0x00, 0x41, 0xfb, 0xcd, 0xc9,
+ 0x47, 0xd4, 0x74, 0xeb, 0xcf, 0x74, 0xb3, 0x26, 0xd2, 0x0b, 0xfe, 0xd7,
+ 0x70, 0x82, 0x38, 0x11, 0xad, 0x92, 0x7a, 0x66, 0x21, 0xb4, 0x30, 0xfa,
+ 0x66, 0x5a, 0x7a, 0xf7, 0xa5, 0xfc, 0xb3, 0x19, 0xa2, 0x67, 0x90, 0xe7,
+ 0x83, 0x6a, 0x16, 0xeb, 0x10, 0x51, 0xf6, 0x42, 0xa9, 0x39, 0xf4, 0x24,
+ 0x79, 0x94, 0xd1, 0x6c, 0x7a, 0xc2, 0x18, 0xab, 0xf7, 0x99, 0x54, 0x20,
+ 0x3e, 0xc1, 0x0e, 0x23, 0xb2, 0xe2, 0x70, 0xc1, 0xd4, 0xb0, 0xc5, 0x37,
+ 0xd8, 0x02, 0x0f, 0xde, 0x6e, 0x4a, 0x3e, 0xa3, 0xa7, 0x5e, 0x7b, 0xa5,
+ 0x99, 0x36, 0x90, 0x5f, 0xf6, 0xbb, 0x84, 0x11, 0xc0, 0x8d, 0x6c, 0x93,
+ 0xd3, 0x31, 0x0d, 0xa1, 0x87, 0xd3, 0x32, 0xd3, 0xd7, 0xbd, 0x2f, 0xe5,
+ 0x98, 0xcd, 0x13, 0x3c, 0x87, 0x3c, 0x1b, 0x50, 0xb7, 0x58, 0x82, 0x8f,
+ 0xb2, 0x15, 0x49, 0xcf, 0xa1, 0x23, 0xcc, 0xa6, 0x8b, 0x63, 0xd6, 0x10,
+ 0xc5, 0x5f, 0xbc, 0xca, 0xa1, 0x01, 0xf6, 0x08, 0x71, 0x1d, 0x97, 0x13,
+ 0x86, 0x0e, 0xa5, 0x86, 0x29, 0xbe, 0xc0, 0x10, 0x7e, 0xf3, 0x72, 0x51,
+ 0xf5, 0x1d, 0x3a, 0xf3, 0xdd, 0x2c, 0xc9, 0xb4, 0x82, 0xff, 0xb5, 0xdc,
+ 0x20, 0x8e, 0x04, 0x6b, 0x64, 0x9e, 0x99, 0x88, 0x6d, 0x0c, 0x3e, 0x99,
+ 0x96, 0x9e, 0xbd, 0xe9, 0x7f, 0x2c, 0xc6, 0x68, 0x99, 0xe4, 0x39, 0xe0,
+ 0xda, 0x85, 0xba, 0xc4, 0x14, 0x7d, 0x90, 0xaa, 0x4e, 0x7d, 0x09, 0x1e,
+ 0x65, 0x34, 0x5b, 0x1e, 0xb0, 0x86, 0x2a, 0xfd, 0xe6, 0x55, 0x08, 0x0f,
+ 0xb0, 0x43, 0x88, 0xec, 0xb8, 0x9c, 0x30, 0x75, 0x2c, 0x31, 0x4d, 0xf6,
+ 0x00, 0x83, 0xf7, 0x9b, 0x92, 0x8f, 0xa8, 0xe9, 0xd7, 0x9e, 0xe9, 0x66,
+ 0x4d, 0xa4, 0x17, 0xfd, 0xae, 0xe1, 0x04, 0x70, 0x23, 0x5b, 0x24, 0xf4,
+ 0xcc, 0x43, 0x68, 0x61, 0xf4, 0xcc, 0xb4, 0xf5, 0xef, 0x4b, 0xf9, 0x66,
+ 0x33, 0x44, 0xcf, 0x21, 0xcf, 0x06, 0xd4, 0x2d, 0xd6, 0x20, 0xa3, 0xec,
+ 0x85, 0x52, 0x73, 0xe8, 0x48, 0xf3, 0x29, 0xa2, 0xd8, 0xf5, 0x84, 0x31,
+ 0x57, 0xef, 0x32, 0xa8, 0x40, 0x7d, 0x82, 0x1c, 0x47, 0x65, 0xc4, 0xe1,
+ 0x83, 0xa9, 0x61, 0x8a, 0x6f, 0xb0, 0x04, 0x1f, 0xbc, 0xdc, 0x94, 0x7d,
+ 0x47, 0x4e, 0xbc, 0xf7, 0x4b, 0x32, 0x6d, 0x20, 0xbf, 0xed, 0x77, 0x08,
+ 0x23, 0x81, 0x1a, 0xd9, 0x27, 0xa6, 0x62, 0x1b, 0x43, 0x0f, 0xa6, 0x65,
+ 0xa7, 0xaf, 0x7a, 0x5f, 0xcb, 0x31, 0x9a, 0x26, 0x79, 0x0e, 0x78, 0x36,
+ 0xa1, 0x6e, 0xb1, 0x05, 0x1f, 0x64, 0x2a, 0x93, 0x9f, 0x42, 0x47, 0x99,
+ 0x4d, 0x16, 0xc7, 0xac, 0x21, 0x8a, 0xbf, 0x79, 0x95, 0x42, 0x03, 0xec,
+ 0x10, 0xe2, 0x3b, 0x2e, 0x27, 0x0c, 0x1d, 0x4b, 0x0c, 0x53, 0x7d, 0x80,
+ 0x20, 0xfd, 0xe6, 0xe4, 0xa3, 0xea, 0x3a, 0x75, 0xe7, 0xba, 0x59, 0x93,
+ 0x69, 0x05, 0xff, 0x6b, 0xb8, 0x41, 0x1c, 0x08, 0xd6, 0xc9, 0x3d, 0x33,
+ 0x10, 0xda, 0x18, 0x7d, 0x33, 0x2d, 0x3d, 0x7b, 0xd2, 0xfe, 0x59, 0x8c,
+ 0xd1, 0x33, 0xc8, 0x73, 0xc1, 0xb5, 0x0b, 0x75, 0x88, 0x28, 0xfb, 0x21,
+ 0x54, 0x9c, 0xfa, 0x12, 0x3c, 0xca, 0x68, 0xb6, 0x3d, 0x61, 0x0c, 0x55,
+ 0xfb, 0xcc, 0xaa, 0x10, 0x1f, 0x60, 0x87, 0x11, 0xd9, 0x71, 0x38, 0x60,
+ 0xea, 0x58, 0x62, 0x9b, 0xec, 0x01, 0x07, 0xef, 0x37, 0x25, 0x1f, 0x51,
+ 0xd3, 0xaf, 0x3d, 0xd2, 0xcc, 0x9b, 0x48, 0x2f, 0xfb, 0x5d, 0xc2, 0x08,
+ 0xe0, 0x46, 0xb6, 0x49, 0xe9, 0x98, 0x86, 0xd0, 0xc3, 0xe9, 0x99, 0x69,
+ 0xeb, 0xde, 0x97, 0xf2, 0xcc, 0x66, 0x89, 0x9e, 0x43, 0x9e, 0x0d, 0xa8,
+ 0x5b, 0xac, 0x41, 0x47, 0xd9, 0x0a, 0xa4, 0xe7, 0xd0, 0x91, 0xe6, 0x53,
+ 0x45, 0xb1, 0xeb, 0x08, 0x62, 0xaf, 0xde, 0x65, 0x50, 0x80, 0xfb, 0x04,
+ 0x38, 0x8e, 0xcb, 0x89, 0xc3, 0x07, 0x52, 0xc3, 0x14, 0xdf, 0x60, 0x08,
+ 0x3f, 0x79, 0xb9, 0x28, 0xfa, 0x8e, 0x9d, 0x79, 0xee, 0x96, 0x64, 0xda,
+ 0x41, 0x7f, 0xda, 0xee, 0x10, 0x47, 0x02, 0x35, 0xb2, 0x4f, 0x4c, 0xc4,
+ 0x36, 0x86, 0x1f, 0x4c, 0xcb, 0x4f, 0x5e, 0xf4, 0xbf, 0x96, 0x63, 0x34,
+ 0x4c, 0xf2, 0x1c, 0xf0, 0x6d, 0x42, 0xdd, 0x62, 0x0a, 0x3e, 0xc8, 0x55,
+ 0x27, 0x3e, 0x84, 0x8f, 0x32, 0x9a, 0x2d, 0x8f, 0x58, 0x43, 0x15, 0x7e,
+ 0xf3, 0x2a, 0x84, 0x07, 0xd8, 0x21, 0xc4, 0x76, 0x5c, 0x4e, 0x18, 0x3a,
+ 0x96, 0x18, 0xa6, 0xfb, 0x00, 0x41, 0xfb, 0xcd, 0xc9, 0x47, 0xd4, 0x74,
+ 0xeb, 0xcf, 0x74, 0xb3, 0x26, 0xd2, 0x0b, 0xfe, 0xd7, 0x70, 0x82, 0x38,
+ 0x11, 0xad, 0x92, 0x7a, 0x66, 0x21, 0xb4, 0x30, 0xfa, 0x66, 0x5a, 0x7a,
+ 0xf7, 0xa5, 0xfc, 0xb3, 0x19, 0xa2, 0x67, 0x90, 0xe7, 0x83, 0x6a, 0x16,
+ 0xeb, 0x10, 0x51, 0xf6, 0x42, 0xa9, 0x39, 0xf4, 0x24, 0x79, 0x94, 0xd1,
+ 0x6c, 0x7a, 0xc2, 0x18, 0xab, 0xf7, 0x99, 0x54, 0x20, 0x3e, 0xc1, 0x0e,
+ 0x23, 0xb2, 0xe2, 0x70, 0xc1, 0xd4, 0xb0, 0xc5, 0x37, 0xd8, 0x02, 0x0f,
+ 0xde, 0x6e, 0x4a, 0x3e, 0xa3, 0xa7, 0x5e, 0x7b, 0xa5, 0x99, 0x36, 0x90,
+ 0x5f, 0xf6, 0xbb, 0x84, 0x11, 0xc0, 0x8d, 0x6c, 0x93, 0xd3, 0x31, 0x0d,
+ 0xa1, 0x87, 0xd3, 0x32, 0xd3, 0xd7, 0xbd, 0x2f, 0xe5, 0x98, 0xcd, 0x13,
+ 0x3c, 0x87, 0x3c, 0x1b, 0x50, 0xb7, 0x58, 0x82, 0x8f, 0xb2, 0x15, 0x49,
+ 0xcf, 0xa1, 0x23, 0xcc, 0xa6, 0x8b, 0x63, 0xd6, 0x10, 0xc5, 0x5f, 0xbc,
+ 0xca, 0xa1, 0x01, 0xf6, 0x08, 0x71, 0x1d, 0x97, 0x13, 0x86, 0x0e, 0xa5,
+ 0x86, 0x29, 0xbe, 0xc0, 0x10, 0x7e, 0xf3, 0x72, 0x51, 0xf5, 0x1d, 0x3a,
+ 0xf3, 0xdd, 0x2c, 0xc9, 0xb4, 0x82, 0xff, 0xb5, 0xdc, 0x20, 0x8e, 0x04,
+ 0x6b, 0x64, 0x9e, 0x99, 0x88, 0x6d, 0x0c, 0x3e, 0x99, 0x96, 0x9e, 0xbd,
+ 0xe9, 0x7f, 0x2c, 0xc6, 0x68, 0x99, 0xe4, 0x39, 0xe0, 0xda, 0x85, 0xba,
+ 0xc4, 0x14, 0x7d, 0x90, 0xaa, 0x4e, 0x7d, 0x09, 0x1e, 0x65, 0x34, 0x5b,
+ 0x1e, 0xb0, 0x86, 0x2a, 0xfd, 0xe6, 0x55, 0x08, 0x0f, 0xb0, 0x43, 0x88,
+ 0xec, 0xb8, 0x9c, 0x30, 0x75, 0x2c, 0x31, 0x4d, 0xf6, 0x00, 0x83, 0xf7,
+ 0x9b, 0x92, 0x8f, 0xa8, 0xe9, 0xd7, 0x9e, 0xe9, 0x66, 0x4d, 0xa4, 0x17,
+ 0xfd, 0xae, 0xe1, 0x04, 0x70, 0x23, 0x5b, 0x24, 0xf4, 0xcc, 0x43, 0x68,
+ 0x61, 0xf4, 0xcc, 0xb4, 0xf5, 0xef, 0x4b, 0xf9, 0x66, 0x33, 0x44, 0xcf,
+ 0x21, 0xcf, 0x06, 0xd4, 0x2d, 0xd6, 0x20, 0xa3, 0xec, 0x85, 0x52, 0x73,
+ 0xe8, 0x48, 0xf3, 0x29, 0xa2, 0xd8, 0xf5, 0x84, 0x31, 0x57, 0xef, 0x32,
+ 0xa8, 0x40, 0x7d, 0x82, 0x1c, 0x47, 0x65, 0xc4, 0xe1, 0x83, 0xa9, 0x61,
+ 0x8a, 0x6f, 0xb0, 0x04, 0x1f, 0xbc, 0xdc, 0x94, 0x7d, 0x47, 0x4e, 0xbc,
+ 0xf7, 0x4b, 0x32, 0x6d, 0x20, 0xbf, 0xed, 0x77, 0x08, 0x23, 0x81, 0x1a,
+ 0xd9, 0x27, 0xa6, 0x62, 0x1b, 0x43, 0x0f, 0xa6, 0x65, 0xa7, 0xaf, 0x7a,
+ 0x5f, 0xcb, 0x31, 0x9a, 0x26, 0x79, 0x0e, 0x78, 0x36, 0xa1, 0x6e, 0xb1,
+ 0x05, 0x1f, 0x64, 0x2a, 0x93, 0x9f, 0x42, 0x47, 0x99, 0x4d, 0x16, 0xc7,
+ 0xac, 0x21, 0x8a, 0xbf, 0x79, 0x95, 0x42, 0x03, 0xec, 0x10, 0xe2, 0x3b,
+ 0x2e, 0x27, 0x0c, 0x1d, 0x4b, 0x0c, 0x53, 0x7d, 0x80, 0x20, 0xfd, 0xe6,
+ 0xe4, 0xa3, 0xea, 0x3a, 0x75, 0xe7, 0xba, 0x59, 0x93, 0x69, 0x05, 0xff,
+ 0x6b, 0xb8, 0x41, 0x1c, 0x08, 0xd6, 0xc9, 0x3d, 0x33, 0x10, 0xda, 0x18,
+ 0x7d, 0x33, 0x2d, 0x3d, 0x7b, 0xd2, 0xfe, 0x59, 0x8c, 0xd1, 0x33, 0xc8,
+ 0x73, 0xc1, 0xb5, 0x0b, 0x75, 0x88, 0x28, 0xfb, 0x21, 0x54, 0x9c, 0xfa,
+ 0x12, 0x3c, 0xca, 0x68, 0xb6, 0x3d, 0x61, 0x0c, 0x55, 0xfb, 0xcc, 0xaa,
+ 0x10, 0x1f, 0x60, 0x87, 0x11, 0xd9, 0x71, 0x38, 0x60, 0xea, 0x58, 0x62,
+ 0x9b, 0xec, 0x01, 0x07, 0xef, 0x37, 0x25, 0x1f, 0x51, 0xd3, 0xaf, 0x3d,
+ 0xd2, 0xcc, 0x9b, 0x48, 0x2f, 0xfb, 0x5d, 0xc2, 0x08, 0xe0, 0x46, 0xb6,
+ 0x49, 0xe9, 0x98, 0x86, 0xd0, 0xc3, 0xe9, 0x99, 0x69, 0xeb, 0xde, 0x97,
+ 0xf2, 0xcc, 0x66, 0x89, 0x9e, 0x43, 0x9e, 0x0d, 0xa8, 0x5b, 0xac, 0x41,
+ 0x47, 0xd9, 0x0a, 0xa4, 0xe7, 0xd0, 0x91, 0xe6, 0x53, 0x45, 0xb1, 0xeb,
+ 0x08, 0x62, 0xaf, 0xde, 0x65, 0x50, 0x80, 0xfb, 0x04, 0x38, 0x8e, 0xcb,
+ 0x89, 0xc3, 0x07, 0x52, 0xc3, 0x14, 0xdf, 0x60, 0x08, 0x3f, 0x79, 0xb9,
+ 0x28, 0xfa, 0x8e, 0x9d, 0x79, 0xee, 0x96, 0x64, 0xda, 0x41, 0x7f, 0xda,
+ 0xee, 0x10, 0x47, 0x02, 0x35, 0xb2, 0x4f, 0x4c, 0xc4, 0x36, 0x86, 0x1f,
+ 0x4c, 0xcb, 0x4f, 0x5e, 0xf4, 0xbf, 0x96, 0x63, 0x34, 0x4c, 0xf2, 0x1c,
+ 0xf0, 0x6d, 0x42, 0xdd, 0x62, 0x0a, 0x3e, 0xc8, 0x55, 0x27, 0x3e, 0x84,
+ 0x8f, 0x32, 0x9a, 0x2d, 0x8f, 0x58, 0x43, 0x15, 0x7e, 0xf3, 0x2a, 0x84,
+ 0x07, 0xd8, 0x21, 0xc4, 0x76, 0x5c, 0x4e, 0x18, 0x3a, 0x96, 0x18, 0xa6,
+ 0xfb, 0x00, 0x41, 0xfb, 0xcd, 0xc9, 0x47, 0xd4, 0x74, 0xeb, 0xcf, 0x74,
+ 0xb3, 0x26, 0xd2, 0x0b, 0xfe, 0xd7, 0x70, 0x82, 0x38, 0x11, 0xad, 0x92,
+ 0x7a, 0x66, 0x21, 0xb4, 0x30, 0xfa, 0x66, 0x5a, 0x7a, 0xf7, 0xa5, 0xfc,
+ 0xb3, 0x19, 0xa2, 0x67, 0x90, 0xe7, 0x83, 0x6a, 0x16, 0xeb, 0x10, 0x51,
+ 0xf6, 0x42, 0xa9, 0x39, 0xf4, 0x24, 0x79, 0x94, 0xd1, 0x6c, 0x7a, 0xc2,
+ 0x18, 0xab, 0xf7, 0x99, 0x54, 0x20, 0x3e, 0xc1, 0x0e, 0x23, 0xb2, 0xe2,
+ 0x70, 0xc1, 0xd4, 0xb0, 0xc5, 0x37, 0xd8, 0x02, 0x0f, 0xde, 0x6e, 0x4a,
+ 0x3e, 0xa3, 0xa7, 0x5e, 0x7b, 0xa5, 0x99, 0x36, 0x90, 0x5f, 0xf6, 0xbb,
+ 0x84, 0x11, 0xc0, 0x8d, 0x6c, 0x93, 0xd3, 0x31, 0x0d, 0xa1, 0x87, 0xd3,
+ 0x32, 0xd3, 0xd7, 0xbd, 0x2f, 0xe5, 0x98, 0xcd, 0x13, 0x3c, 0x87, 0x3c,
+ 0x1b, 0x50, 0xb7, 0x58, 0x82, 0x8f, 0xb2, 0x15, 0x49, 0xcf, 0xa1, 0x23,
+ 0xcc, 0xa6, 0x8b, 0x63, 0xd6, 0x10, 0xc5, 0x5f, 0xbc, 0xca, 0xa1, 0x01,
+ 0xf6, 0x08, 0x71, 0x1d, 0x97, 0x13, 0x86, 0x0e, 0xa5, 0x86, 0x29, 0xbe,
+ 0xc0, 0x10, 0x7e, 0xf3, 0x72, 0x51, 0xf5, 0x1d, 0x3a, 0xf3, 0xdd, 0x2c,
+ 0xc9, 0xb4, 0x82, 0xff, 0xb5, 0xdc, 0x20, 0x8e, 0x04, 0x6b, 0x64, 0x9e,
+ 0x99, 0x88, 0x6d, 0x0c, 0x3e, 0x99, 0x96, 0x9e, 0xbd, 0xe9, 0x7f, 0x2c,
+ 0xc6, 0x68, 0x99, 0xe4, 0x39, 0xe0, 0xda, 0x85, 0xba, 0xc4, 0x14, 0x7d,
+ 0x90, 0xaa, 0x4e, 0x7d, 0x09, 0x1e, 0x65, 0x34, 0x5b, 0x1e, 0xb0, 0x86,
+ 0x2a, 0xfd, 0xe6, 0x55, 0x08, 0x0f, 0xb0, 0x43, 0x88, 0xec, 0xb8, 0x9c,
+ 0x30, 0x75, 0x2c, 0x31, 0x4d, 0xf6, 0x00, 0x83, 0xf7, 0x9b, 0x92, 0x8f,
+ 0xa8, 0xe9, 0xd7, 0x9e, 0xe9, 0x66, 0x4d, 0xa4, 0x17, 0xfd, 0xae, 0xe1,
+ 0x04, 0x70, 0x23, 0x5b, 0x24, 0xf4, 0xcc, 0x43, 0x68, 0x61, 0xf4, 0xcc,
+ 0xb4, 0xf5, 0xef, 0x4b, 0xf9, 0x66, 0x33, 0x44, 0xcf, 0x21, 0xcf, 0x06,
+ 0xd4, 0x2d, 0xd6, 0x20, 0xa3, 0xec, 0x85, 0x52, 0x73, 0xe8, 0x48, 0xf3,
+ 0x29, 0xa2, 0xd8, 0xf5, 0x84, 0x31, 0x57, 0xef, 0x32, 0xa8, 0x40, 0x7d,
+ 0x82, 0x1c, 0x47, 0x65, 0xc4, 0xe1, 0x83, 0xa9, 0x61, 0x8a, 0x6f, 0xb0,
+ 0x04, 0x1f, 0xbc, 0xdc, 0x94, 0x7d, 0x47, 0x4e, 0xbc, 0xf7, 0x4b, 0x32,
+ 0x6d, 0x20, 0xbf, 0xed, 0x77, 0x08, 0x23, 0x81, 0x1a, 0xd9, 0x27, 0xa6,
+ 0x62, 0x1b, 0x43, 0x0f, 0xa6, 0x65, 0xa7, 0xaf, 0x7a, 0x5f, 0xcb, 0x31,
+ 0x9a, 0x26, 0x79, 0x0e, 0x78, 0x36, 0xa1, 0x6e, 0xb1, 0x05, 0x1f, 0x64,
+ 0x2a, 0x93, 0x9f, 0x42, 0x47, 0x99, 0x4d, 0x16, 0xc7, 0xac, 0x21, 0x8a,
+ 0xbf, 0x79, 0x95, 0x42, 0x03, 0xec, 0x10, 0xe2, 0x3b, 0x2e, 0x27, 0x0c,
+ 0x1d, 0x4b, 0x0c, 0x53, 0x7d, 0x80, 0x20, 0xfd, 0xe6, 0xe4, 0xa3, 0xea,
+ 0x3a, 0x75, 0xe7, 0xba, 0x59, 0x93, 0x69, 0x05, 0xff, 0x6b, 0xb8, 0x41,
+ 0x1c, 0x08, 0xd6, 0xc9, 0x3d, 0x33, 0x10, 0xda, 0x18, 0x7d, 0x33, 0x2d,
+ 0x3d, 0x7b, 0xd2, 0xfe, 0x59, 0x8c, 0xd1, 0x33, 0xc8, 0x73, 0xc1, 0xb5,
+ 0x0b, 0x75, 0x88, 0x28, 0xfb, 0x21, 0x54, 0x9c, 0xfa, 0x12, 0x3c, 0xca,
+ 0x68, 0xb6, 0x3d, 0x61, 0x0c, 0x55, 0xfb, 0xcc, 0xaa, 0x10, 0x1f, 0x60,
+ 0x87, 0x11, 0xd9, 0x71, 0x38, 0x60, 0xea, 0x58, 0x62, 0x9b, 0xec, 0x01,
+ 0x07, 0xef, 0x37, 0x25, 0x1f, 0x51, 0xd3, 0xaf, 0x3d, 0xd2, 0xcc, 0x9b,
+ 0x48, 0x2f, 0xfb, 0x5d, 0xc2, 0x08, 0xe0, 0x46, 0xb6, 0x49, 0xe9, 0x98,
+ 0x86, 0xd0, 0xc3, 0xe9, 0x99, 0x69, 0xeb, 0xde, 0x97, 0xf2, 0xcc, 0x66,
+ 0x89, 0x9e, 0x43, 0x9e, 0x0d, 0xa8, 0x5b, 0xac, 0x41, 0x47, 0xd9, 0x0a,
+ 0xa4, 0xe7, 0xd0, 0x91, 0xe6, 0x53, 0x45, 0xb1, 0xeb, 0x08, 0x62, 0xaf,
+ 0xde, 0x65, 0x50, 0x80, 0xfb, 0x04, 0x38, 0x8e, 0xcb, 0x89, 0xc3, 0x07,
+ 0x52, 0xc3, 0x14, 0xdf, 0x60, 0x08, 0x3f, 0x79, 0xb9, 0x28, 0xfa, 0x8e,
+ 0x9d, 0x79, 0xee, 0x96, 0x64, 0xda, 0x41, 0x7f, 0xda, 0xee, 0x10, 0x47,
+ 0x02, 0x35, 0xb2, 0x4f, 0x4c, 0xc4, 0x36, 0x86, 0x1f, 0x4c, 0xcb, 0x4f,
+ 0x5e, 0xf4, 0xbf, 0x96, 0x63, 0x34, 0x4c, 0xf2, 0x1c, 0xf0, 0x6d, 0x42,
+ 0xdd, 0x62, 0x0a, 0x3e, 0xc8, 0x55, 0x27, 0x3e, 0x84, 0x8f, 0x32, 0x9a,
+ 0x2d, 0x8f, 0x58, 0x43, 0x15, 0x7e, 0xf3, 0x2a, 0x84, 0x07, 0xd8, 0x21,
+ 0xc4, 0x76, 0x5c, 0x4e, 0x18, 0x3a, 0x96, 0x18, 0xa6, 0xfb, 0x00, 0x41,
+ 0xfb, 0xcd, 0xc9, 0x47, 0xd4, 0x74, 0xeb, 0xcf, 0x74, 0xb3, 0x26, 0xd2,
+ 0x0b, 0xfe, 0xd7, 0x70, 0x82, 0x38, 0x11, 0xad, 0x92, 0x7a, 0x66, 0x21,
+ 0xb4, 0x30, 0xfa, 0x66, 0x5a, 0x7a, 0xf7, 0xa5, 0xfc, 0xb3, 0x19, 0xa2,
+ 0x67, 0x90, 0xe7, 0x83, 0x6a, 0x16, 0xeb, 0x10, 0x51, 0xf6, 0x42, 0xa9,
+ 0x39, 0xf4, 0x24, 0x79, 0x94, 0xd1, 0x6c, 0x7a, 0xc2, 0x18, 0xab, 0xf7,
+ 0x99, 0x54, 0x20, 0x3e, 0xc1, 0x0e, 0x23, 0xb2, 0xe2, 0x70, 0xc1, 0xd4,
+ 0xb0, 0xc5, 0x37, 0xd8, 0x02, 0x0f, 0xde, 0x6e, 0x4a, 0x3e, 0xa3, 0xa7,
+ 0x5e, 0x7b, 0xa5, 0x99, 0x36, 0x90, 0x5f, 0xf6, 0xbb, 0x84, 0x11, 0xc0,
+ 0x8d, 0x6c, 0x93, 0xd3, 0x31, 0x0d, 0xa1, 0x87, 0xd3, 0x32, 0xd3, 0xd7,
+ 0xbd, 0x2f, 0xe5, 0x98, 0xcd, 0x13, 0x3c, 0x87, 0x3c, 0x1b, 0x50, 0xb7,
+ 0x58, 0x82, 0x8f, 0xb2, 0x15, 0x49, 0xcf, 0xa1, 0x23, 0xcc, 0xa6, 0x8b,
+ 0x63, 0xd6, 0x10, 0xc5, 0x5f, 0xbc, 0xca, 0xa1, 0x01, 0xf6, 0x08, 0x71,
+ 0x1d, 0x97, 0x13, 0x86, 0x0e, 0xa5, 0x86, 0x29, 0xbe, 0xc0, 0x10, 0x7e,
+ 0xf3, 0x72, 0x51, 0xf5, 0x1d, 0x3a, 0xf3, 0xdd, 0x2c, 0xc9, 0xb4, 0x82,
+ 0xff, 0xb5, 0xdc, 0x20, 0x8e, 0x04, 0x6b, 0x64, 0x9e, 0x99, 0x88, 0x6d,
+ 0x0c, 0x3e, 0x99, 0x96, 0x9e, 0xbd, 0xe9, 0x7f, 0x2c, 0xc6, 0x68, 0x99,
+ 0xe4, 0x39, 0xe0, 0xda, 0x85, 0xba, 0xc4, 0x14, 0x7d, 0x90, 0xaa, 0x4e,
+ 0x7d, 0x09, 0x1e, 0x65, 0x34, 0x5b, 0x1e, 0xb0, 0x86, 0x2a, 0xfd, 0xe6,
+ 0x55, 0x08, 0x0f, 0xb0, 0x43, 0x88, 0xec, 0xb8, 0x9c, 0x30, 0x75, 0x2c,
+ 0x31, 0x4d, 0xf6, 0x00, 0x83, 0xf7, 0x9b, 0x92, 0x8f, 0xa8, 0xe9, 0xd7,
+ 0x9e, 0xe9, 0x66, 0x4d, 0xa4, 0x17, 0xfd, 0xae, 0xe1, 0x04, 0x70, 0x23,
+ 0x5b, 0x24, 0xf4, 0xcc, 0x43, 0x68, 0x61, 0xf4, 0xcc, 0xb4, 0xf5, 0xef,
+ 0x4b, 0xf9, 0x66, 0x33, 0x44, 0xcf, 0x21, 0xcf, 0x06, 0xd4, 0x2d, 0xd6,
+ 0x20, 0xa3, 0xec, 0x85, 0x52, 0x73, 0xe8, 0x48, 0xf3, 0x29, 0xa2, 0xd8,
+ 0xf5, 0x84, 0x31, 0x57, 0xef, 0x32, 0xa8, 0x40, 0x7d, 0x82, 0x1c, 0x47,
+ 0x65, 0xc4, 0xe1, 0x83, 0xa9, 0x61, 0x8a, 0x6f, 0xb0, 0x04, 0x1f, 0xbc,
+ 0xdc, 0x94, 0x7d, 0x47, 0x4e, 0xbc, 0xf7, 0x4b, 0x32, 0x6d, 0x20, 0xbf,
+ 0xed, 0x77, 0x08, 0x23, 0x81, 0x1a, 0xd9, 0x27, 0xa6, 0x62, 0x1b, 0x43,
+ 0x0f, 0xa6, 0x65, 0xa7, 0xaf, 0x7a, 0x5f, 0xcb, 0x31, 0x9a, 0x26, 0x79,
+ 0x0e, 0x78, 0x36, 0xa1, 0x6e, 0xb1, 0x05, 0x1f, 0x64, 0x2a, 0x93, 0x9f,
+ 0x42, 0x47, 0x99, 0x4d, 0x16, 0xc7, 0xac, 0x21, 0x8a, 0xbf, 0x79, 0x95,
+ 0x42, 0x03, 0xec, 0x10, 0xe2, 0x3b, 0x2e, 0x27, 0x0c, 0x1d, 0x4b, 0x0c,
+ 0x53, 0x7d, 0x80, 0x20, 0xfd, 0xe6, 0xe4, 0xa3, 0xea, 0x3a, 0x75, 0xe7,
+ 0xba, 0x59, 0x93, 0x69, 0x05, 0xff, 0x6b, 0xb8, 0x41, 0x1c, 0x08, 0xd6,
+ 0xc9, 0x3d, 0x33, 0x10, 0xda, 0x18, 0x7d, 0x33, 0x2d, 0x3d, 0x7b, 0xd2,
+ 0xfe, 0x59, 0x8c, 0xd1, 0x33, 0xc8, 0x73, 0xc1, 0xb5, 0x0b, 0x75, 0x88,
+ 0x28, 0xfb, 0x21, 0x54, 0x9c, 0xfa, 0x12, 0x3c, 0xca, 0x68, 0xb6, 0x3d,
+ 0x61, 0x0c, 0x55, 0xfb, 0xcc, 0xaa, 0x10, 0x1f, 0x60, 0x87, 0x11, 0xd9,
+ 0x71, 0x38, 0x60, 0xea, 0x58, 0x62, 0x9b, 0xec, 0x01, 0x07, 0xef, 0x37,
+ 0x25, 0x1f, 0x51, 0xd3, 0xaf, 0x3d, 0xd2, 0xcc, 0x9b, 0x48, 0x2f, 0xfb,
+ 0x5d, 0xc2, 0x08, 0xe0, 0x46, 0xb6, 0x49, 0xe9, 0x98, 0x86, 0xd0, 0xc3,
+ 0xe9, 0x99, 0x69, 0xeb, 0xde, 0x97, 0xf2, 0xcc, 0x66, 0x89, 0x9e, 0x43,
+ 0x9e, 0x0d, 0xa8, 0x5b, 0xac, 0x41, 0x47, 0xd9, 0x0a, 0xa4, 0xe7, 0xd0,
+ 0x91, 0xe6, 0x53, 0x45, 0xb1, 0xeb, 0x08, 0x62, 0xaf, 0xde, 0x65, 0x50,
+ 0x80, 0xfb, 0x04, 0x38, 0x8e, 0xcb, 0x89, 0xc3, 0x07, 0x52, 0xc3, 0x14,
+ 0xdf, 0x60, 0x08, 0x3f, 0x79, 0xb9, 0x28, 0xfa, 0x8e, 0x9d, 0x79, 0xee,
+ 0x96, 0x64, 0xda, 0x41, 0x7f, 0xda, 0xee, 0x10, 0x47, 0x02, 0x35, 0xb2,
+ 0x4f, 0x4c, 0xc4, 0x36, 0x86, 0x1f, 0x4c, 0xcb, 0x4f, 0x5e, 0xf4, 0xbf,
+ 0x96, 0x63, 0x34, 0x4c, 0xf2, 0x1c, 0xf0, 0x6d, 0x42, 0xdd, 0x62, 0x0a,
+ 0x3e, 0xc8, 0x55, 0x27, 0x3e, 0x84, 0x8f, 0x32, 0x9a, 0x2d, 0x8f, 0x58,
+ 0x43, 0x15, 0x7e, 0xf3, 0x2a, 0x84, 0x07, 0xd8, 0x21, 0xc4, 0x76, 0x5c,
+ 0x4e, 0x18, 0x3a, 0x96, 0x18, 0xa6, 0xfb, 0x00, 0x41, 0xfb, 0xcd, 0xc9,
+ 0x47, 0xd4, 0x74, 0xeb, 0xcf, 0x74, 0xb3, 0x26, 0xd2, 0x0b, 0xfe, 0xd7,
+ 0x70, 0x82, 0x38, 0x11, 0xad, 0x92, 0x7a, 0x66, 0x21, 0xb4, 0x30, 0xfa,
+ 0x66, 0x5a, 0x7a, 0xf7, 0xa5, 0xfc, 0xb3, 0x19, 0xa2, 0x67, 0x90, 0xe7,
+ 0x83, 0x6a, 0x16, 0xeb, 0x10, 0x51, 0xf6, 0x42, 0xa9, 0x39, 0xf4, 0x24,
+ 0x79, 0x94, 0xd1, 0x6c, 0x7a, 0xc2, 0x18, 0xab, 0xf7, 0x99, 0x54, 0x20,
+ 0x3e, 0xc1, 0x0e, 0x23, 0xb2, 0xe2, 0x70, 0xc1, 0xd4, 0xb0, 0xc5, 0x37,
+ 0xd8, 0x02, 0x0f, 0xde, 0x6e, 0x4a, 0x3e, 0xa3, 0xa7, 0x5e, 0x7b, 0xa5,
+ 0x99, 0x36, 0x90, 0x5f, 0xf6, 0xbb, 0x84, 0x11, 0xc0, 0x8d, 0x6c, 0x93,
+ 0xd3, 0x31, 0x0d, 0xa1, 0x87, 0xd3, 0x32, 0xd3, 0xd7, 0xbd, 0x2f, 0xe5,
+ 0x98, 0xcd, 0x13, 0x3c, 0x87, 0x3c, 0x1b, 0x50, 0xb7, 0x58, 0x82, 0x8f,
+ 0xb2, 0x15, 0x49, 0xcf, 0xa1, 0x23, 0xcc, 0xa6, 0x8b, 0x63, 0xd6, 0x10,
+ 0xc5, 0x5f, 0xbc, 0xca, 0xa1, 0x01, 0xf6, 0x08, 0x71, 0x1d, 0x97, 0x13,
+ 0x86, 0x0e, 0xa5, 0x86, 0x29, 0xbe, 0xc0, 0x10, 0x7e, 0xf3, 0x72, 0x51,
+ 0xf5, 0x1d, 0x3a, 0xf3, 0xdd, 0x2c, 0xc9, 0xb4, 0x82, 0xff, 0xb5, 0xdc,
+ 0x20, 0x8e, 0x04, 0x6b, 0x64, 0x9e, 0x99, 0x88, 0x6d, 0x0c, 0x3e, 0x99,
+ 0x96, 0x9e, 0xbd, 0xe9, 0x7f, 0x2c, 0xc6, 0x68, 0x99, 0xe4, 0x39, 0xe0,
+ 0xda, 0x85, 0xba, 0xc4, 0x14, 0x7d, 0x90, 0xaa, 0x4e, 0x7d, 0x09, 0x1e,
+ 0x65, 0x34, 0x5b, 0x1e, 0xb0, 0x86, 0x2a, 0xfd, 0xe6, 0x55, 0x08, 0x0f,
+ 0xb0, 0x43, 0x88, 0xec, 0xb8, 0x9c, 0x30, 0x75, 0x2c, 0x31, 0x4d, 0xf6,
+ 0x00, 0x83, 0xf7, 0x9b, 0x92, 0x8f, 0xa8, 0xe9, 0xd7, 0x9e, 0xe9, 0x66,
+ 0x4d, 0xa4, 0x17, 0xfd, 0xae, 0xe1, 0x04, 0x70, 0x23, 0x5b, 0x24, 0xf4,
+ 0xcc, 0x43, 0x68, 0x61, 0xf4, 0xcc, 0xb4, 0xf5, 0xef, 0x4b, 0xf9, 0x66,
+ 0x33, 0x44, 0xcf, 0x21, 0xcf, 0x06, 0xd4, 0x2d, 0xd6, 0x20, 0xa3, 0xec,
+ 0x85, 0x52, 0x73, 0xe8, 0x48, 0xf3, 0x29, 0xa2, 0xd8, 0xf5, 0x84, 0x31,
+ 0x57, 0xef, 0x32, 0xa8, 0x40, 0x7d, 0x82, 0x1c, 0x47, 0x65, 0xc4, 0xe1,
+ 0x83, 0xa9, 0x61, 0x8a, 0x6f, 0xb0, 0x04, 0x1f, 0xbc, 0xdc, 0x94, 0x7d,
+ 0x47, 0x4e, 0xbc, 0xf7, 0x4b, 0x32, 0x6d, 0x20, 0xbf, 0xed, 0x77, 0x08,
+ 0x23, 0x81, 0x1a, 0xd9, 0x27, 0xa6, 0x62, 0x1b, 0x43, 0x0f, 0xa6, 0x65,
+ 0xa7, 0xaf, 0x7a, 0x5f, 0xcb, 0x31, 0x9a, 0x26, 0x79, 0x0e, 0x78, 0x36,
+ 0xa1, 0x6e, 0xb1, 0x05, 0x1f, 0x64, 0x2a, 0x93, 0x9f, 0x42, 0x47, 0x99,
+ 0x4d, 0x16, 0xc7, 0xac, 0x21, 0x8a, 0xbf, 0x79, 0x95, 0x42, 0x03, 0xec,
+ 0x10, 0xe2, 0x3b, 0x2e, 0x27, 0x0c, 0x1d, 0x4b, 0x0c, 0x53, 0x7d, 0x80,
+ 0x20, 0xfd, 0xe6, 0xe4, 0xa3, 0xea, 0x3a, 0x75, 0xe7, 0xba, 0x59, 0x93,
+ 0x69, 0x05, 0xff, 0x6b, 0xb8, 0x41, 0x1c, 0x08, 0xd6, 0xc9, 0x3d, 0x33,
+ 0x10, 0xda, 0x18, 0x7d, 0x33, 0x2d, 0x3d, 0x7b, 0xd2, 0xfe, 0x59, 0x8c,
+ 0xd1, 0x33, 0xc8, 0x73, 0xc1, 0xb5, 0x0b, 0x75, 0x88, 0x28, 0xfb, 0x21,
+ 0x54, 0x9c, 0xfa, 0x12, 0x3c, 0xca, 0x68, 0xb6, 0x3d, 0x61, 0x0c, 0x55,
+ 0xfb, 0xcc, 0xaa, 0x10, 0x1f, 0x60, 0x87, 0x11, 0xd9, 0x71, 0x38, 0x60,
+ 0xea, 0x58, 0x62, 0x9b, 0xec, 0x01, 0x07, 0xef, 0x37, 0x25, 0x1f, 0x51,
+ 0xd3, 0xaf, 0x3d, 0xd2, 0xcc, 0x9b, 0x48, 0x2f, 0xfb, 0x5d, 0xc2, 0x08,
+ 0xe0, 0x46, 0xb6, 0x49, 0xe9, 0x98, 0x86, 0xd0, 0xc3, 0xe9, 0x99, 0x69,
+ 0xeb, 0xde, 0x97, 0xf2, 0xcc, 0x66, 0x89, 0x9e, 0x43, 0x9e, 0x0d, 0xa8,
+ 0x5b, 0xac, 0x41, 0x47, 0xd9, 0x0a, 0xa4, 0xe7, 0xd0, 0x91, 0xe6, 0x53,
+ 0x45, 0xb1, 0xeb, 0x08, 0x62, 0xaf, 0xde, 0x65, 0x50, 0x80, 0xfb, 0x04,
+ 0x38, 0x8e, 0xcb, 0x89, 0xc3, 0x07, 0x52, 0xc3, 0x14, 0xdf, 0x60, 0x08,
+ 0x3f, 0x79, 0xb9, 0x28, 0xfa, 0x8e, 0x9d, 0x79, 0xee, 0x96, 0x64, 0xda,
+ 0x41, 0x7f, 0xda, 0xee, 0x10, 0x47, 0x02, 0x35, 0xb2, 0x4f, 0x4c, 0xc4,
+ 0x36, 0x86, 0x1f, 0x4c, 0xcb, 0x4f, 0x5e, 0xf4, 0xbf, 0x96, 0x63, 0x34,
+ 0x4c, 0xf2, 0x1c, 0xf0, 0x6d, 0x42, 0xdd, 0x62, 0x0a, 0x3e, 0xc8, 0x55,
+ 0x27, 0x3e, 0x84, 0x8f, 0x32, 0x9a, 0x2d, 0x8f, 0x58, 0x43, 0x15, 0x7e,
+ 0xf3, 0x2a, 0x84, 0x07, 0xd8, 0x21, 0xc4, 0x76, 0x5c, 0x4e, 0x18, 0x3a,
+ 0x96, 0x18, 0xa6, 0xfb, 0x00, 0x41, 0xfb, 0xcd, 0xc9, 0x47, 0xd4, 0x74,
+ 0xeb, 0xcf, 0x74, 0xb3, 0x26, 0xd2, 0x0b, 0xfe, 0xd7, 0x70, 0x82, 0x38,
+ 0x11, 0xad, 0x92, 0x7a, 0x66, 0x21, 0xb4, 0x30, 0xfa, 0x66, 0x5a, 0x7a,
+ 0xf7, 0xa5, 0xfc, 0xb3, 0x19, 0xa2, 0x67, 0x90, 0xe7, 0x83, 0x6a, 0x16,
+ 0xeb, 0x10, 0x51, 0xf6, 0x42, 0xa9, 0x39, 0xf4, 0x24, 0x79, 0x94, 0xd1,
+ 0x6c, 0x7a, 0xc2, 0x18, 0xab, 0xf7, 0x99, 0x54, 0x20, 0x3e, 0xc1, 0x0e,
+ 0x23, 0xb2, 0xe2, 0x70, 0xc1, 0xd4, 0xb0, 0xc5, 0x37, 0xd8, 0x02, 0x0f,
+ 0xde, 0x6e, 0x4a, 0x3e, 0xa3, 0xa7, 0x5e, 0x7b, 0xa5, 0x99, 0x36, 0x90,
+ 0x5f, 0xf6, 0xbb, 0x84, 0x11, 0xc0, 0x8d, 0x6c, 0x93, 0xd3, 0x31, 0x0d,
+ 0xa1, 0x87, 0xd3, 0x32, 0xd3, 0xd7, 0xbd, 0x2f, 0xe5, 0x98, 0xcd, 0x13,
+ 0x3c, 0x87, 0x3c, 0x1b, 0x50, 0xb7, 0x58, 0x82, 0x8f, 0xb2, 0x15, 0x49,
+ 0xcf, 0xa1, 0x23, 0xcc, 0xa6, 0x8b, 0x63, 0xd6, 0x10, 0xc5, 0x5f, 0xbc,
+ 0xca, 0xa1, 0x01, 0xf6, 0x08, 0x71, 0x1d, 0x97, 0x13, 0x86, 0x0e, 0xa5,
+ 0x86, 0x29, 0xbe, 0xc0, 0x10, 0x7e, 0xf3, 0x72, 0x51, 0xf5, 0x1d, 0x3a,
+ 0xf3, 0xdd, 0x2c, 0xc9, 0xb4, 0x82, 0xff, 0xb5, 0xdc, 0x20, 0x8e, 0x04,
+ 0x6b, 0x64, 0x9e, 0x99, 0x88, 0x6d, 0x0c, 0x3e, 0x99, 0x96, 0x9e, 0xbd,
+ 0xe9, 0x7f, 0x2c, 0xc6, 0x68, 0x99, 0xe4, 0x39, 0xe0, 0xda, 0x85, 0xba,
+ 0xc4, 0x14, 0x7d, 0x90, 0xaa, 0x4e, 0x7d, 0x09, 0x1e, 0x65, 0x34, 0x5b,
+ 0x1e, 0xb0, 0x86, 0x2a, 0xfd, 0xe6, 0x55, 0x08, 0x0f, 0xb0, 0x43, 0x88,
+ 0xec, 0xb8, 0x9c, 0x30, 0x75, 0x2c, 0x31, 0x4d, 0xf6, 0x00, 0x83, 0xf7,
+ 0x9b, 0x92, 0x8f, 0xa8, 0xe9, 0xd7, 0x9e, 0xe9, 0x66, 0x4d, 0xa4, 0x17,
+ 0xfd, 0xae, 0xe1, 0x04, 0x70, 0x23, 0x5b, 0x24, 0xf4, 0xcc, 0x43, 0x68,
+ 0x61, 0xf4, 0xcc, 0xb4, 0xf5, 0xef, 0x4b, 0xf9, 0x66, 0x33, 0x44, 0xcf,
+ 0x21, 0xcf, 0x06, 0xd4, 0x2d, 0xd6, 0x20, 0xa3, 0xec, 0x85, 0x52, 0x73,
+ 0xe8, 0x48, 0xf3, 0x29, 0xa2, 0xd8, 0xf5, 0x84, 0x31, 0x57, 0xef, 0x32,
+ 0xa8, 0x40, 0x7d, 0x82, 0x1c, 0x47, 0x65, 0xc4, 0xe1, 0x83, 0xa9, 0x61,
+ 0x8a, 0x6f, 0xb0, 0x04, 0x1f, 0xbc, 0xdc, 0x94, 0x7d, 0x47, 0x4e, 0xbc,
+ 0xf7, 0x4b, 0x32, 0x6d, 0x20, 0xbf, 0xed, 0x77, 0x08, 0x23, 0x81, 0x1a,
+ 0xd9, 0x27, 0xa6, 0x62, 0x1b, 0x43, 0x0f, 0xa6, 0x65, 0xa7, 0xaf, 0x7a,
+ 0x5f, 0xcb, 0x31, 0x9a, 0x26, 0x79, 0x0e, 0x78, 0x36, 0xa1, 0x6e, 0xb1,
+ 0x05, 0x1f, 0x64, 0x2a, 0x93, 0x9f, 0x42, 0x47, 0x99, 0x4d, 0x16, 0xc7,
+ 0xac, 0x21, 0x8a, 0xbf, 0x79, 0x95, 0x42, 0x03, 0xec, 0x10, 0xe2, 0x3b,
+ 0x2e, 0x27, 0x0c, 0x1d, 0x4b, 0x0c, 0x53, 0x7d, 0x80, 0x20, 0xfd, 0xe6,
+ 0xe4, 0xa3, 0xea, 0x3a, 0x75, 0xe7, 0xba, 0x59, 0x93, 0x69, 0x05, 0xff,
+ 0x6b, 0xb8, 0x41, 0x1c, 0x08, 0xd6, 0xc9, 0x3d, 0x33, 0x10, 0xda, 0x18,
+ 0x7d, 0x33, 0x2d, 0x3d, 0x7b, 0xd2, 0xfe, 0x59, 0x8c, 0xd1, 0x33, 0xc8,
+ 0x73, 0xc1, 0xb5, 0x0b, 0x75, 0x88, 0x28, 0xfb, 0x21, 0x54, 0x9c, 0xfa,
+ 0x12, 0x3c, 0xca, 0x68, 0xb6, 0x3d, 0x61, 0x0c, 0x55, 0xfb, 0xcc, 0xaa,
+ 0x10, 0x1f, 0x60, 0x87, 0x11, 0xd9, 0x71, 0x38, 0x60, 0xea, 0x58, 0x62,
+ 0x9b, 0xec, 0x01, 0x07, 0xef, 0x37, 0x25, 0x1f, 0x51, 0xd3, 0xaf, 0x3d,
+ 0xd2, 0xcc, 0x9b, 0x48, 0x2f, 0xfb, 0x5d, 0xc2, 0x08, 0xe0, 0x46, 0xb6,
+ 0x49, 0xe9, 0x98, 0x86, 0xd0, 0xc3, 0xe9, 0x99, 0x69, 0xeb, 0xde, 0x97,
+ 0xf2, 0xcc, 0x66, 0x89, 0x9e, 0x43, 0x9e, 0x0d, 0xa8, 0x5b, 0xac, 0x41,
+ 0x47, 0xd9, 0x0a, 0xa4, 0xe7, 0xd0, 0x91, 0xe6, 0x53, 0x45, 0xb1, 0xeb,
+ 0x08, 0x62, 0xaf, 0xde, 0x65, 0x50, 0x80, 0xfb, 0x04, 0x38, 0x8e, 0xcb,
+ 0x89, 0xc3, 0x07, 0x52, 0xc3, 0x14, 0xdf, 0x60, 0x08, 0x3f, 0x79, 0xb9,
+ 0x28, 0xfa, 0x8e, 0x9d, 0x79, 0xee, 0x96, 0x64, 0xda, 0x41, 0x7f, 0xda,
+ 0xee, 0x10, 0x47, 0x02, 0x35, 0xb2, 0x4f, 0x4c, 0xc4, 0x36, 0x86, 0x1f,
+ 0x4c, 0xcb, 0x4f, 0x5e, 0xf4, 0xbf, 0x96, 0x63, 0x34, 0x4c, 0xf2, 0x1c,
+ 0xf0, 0x6d, 0x42, 0xdd, 0x62, 0x0a, 0x3e, 0xc8, 0x55, 0x27, 0x3e, 0x84,
+ 0x8f, 0x32, 0x9a, 0x2d, 0x8f, 0x58, 0x43, 0x15, 0x7e, 0xf3, 0x2a, 0x84,
+ 0x07, 0xd8, 0x21, 0xc4, 0x76, 0x5c, 0x4e, 0x18, 0x3a, 0x96, 0x18, 0xa6,
+ 0xfb, 0x00, 0x41, 0xfb, 0xcd, 0xc9, 0x47, 0xd4, 0x74, 0xeb, 0xcf, 0x74,
+ 0xb3, 0x26, 0xd2, 0x0b, 0xfe, 0xd7, 0x70, 0x82, 0x38, 0x11, 0xad, 0x92,
+ 0x7a, 0x66, 0x21, 0xb4, 0x30, 0xfa, 0x66, 0x5a, 0x7a, 0xf7, 0xa5, 0xfc,
+ 0xb3, 0x19, 0xa2, 0x67, 0x90, 0xe7, 0x83, 0x6a, 0x16, 0xeb, 0x10, 0x51,
+ 0xf6, 0x42, 0xa9, 0x39, 0xf4, 0x24, 0x79, 0x94, 0xd1, 0x6c, 0x7a, 0xc2,
+ 0x18, 0xab, 0xf7, 0x99, 0x54, 0x20, 0x3e, 0xc1, 0x0e, 0x23, 0xb2, 0xe2,
+ 0x70, 0xc1, 0xd4, 0xb0, 0xc5, 0x37, 0xd8, 0x02, 0x0f, 0xde, 0x6e, 0x4a,
+ 0x3e, 0xa3, 0xa7, 0x5e, 0x7b, 0xa5, 0x99, 0x36, 0x90, 0x5f, 0xf6, 0xbb,
+ 0x84, 0x11, 0xc0, 0x8d, 0x6c, 0x93, 0xd3, 0x31, 0x0d, 0xa1, 0x87, 0xd3,
+ 0x32, 0xd3, 0xd7, 0xbd, 0x2f, 0xe5, 0x98, 0xcd, 0x13, 0x3c, 0x87, 0x3c,
+ 0x1b, 0x50, 0xb7, 0x58, 0x82, 0x8f, 0xb2, 0x15, 0x49, 0xcf, 0xa1, 0x23,
+ 0xcc, 0xa6, 0x8b, 0x63, 0xd6, 0x10, 0xc5, 0x5f, 0xbc, 0xca, 0xa1, 0x01,
+ 0xf6, 0x08, 0x71, 0x1d, 0x97, 0x13, 0x86, 0x0e, 0xa5, 0x86, 0x29, 0xbe,
+ 0xc0, 0x10, 0x7e, 0xf3, 0x72, 0x51, 0xf5, 0x1d, 0x3a, 0xf3, 0xdd, 0x2c,
+ 0xc9, 0xb4, 0x82, 0xff, 0xb5, 0xdc, 0x20, 0x8e, 0x04, 0x6b, 0x64, 0x9e,
+ 0x99, 0x88, 0x6d, 0x0c, 0x3e, 0x99, 0x96, 0x9e, 0xbd, 0xe9, 0x7f, 0x2c,
+ 0xc6, 0x68, 0x99, 0xe4, 0x39, 0xe0, 0xda, 0x85, 0xba, 0xc4, 0x14, 0x7d,
+ 0x90, 0xaa, 0x4e, 0x7d, 0x09, 0x1e, 0x65, 0x34, 0x5b, 0x1e, 0xb0, 0x86,
+ 0x2a, 0xfd, 0xe6, 0x55, 0x08, 0x0f, 0xb0, 0x43, 0x88, 0xec, 0xb8, 0x9c,
+ 0x30, 0x75, 0x2c, 0x31, 0x4d, 0xf6, 0x00, 0x83, 0xf7, 0x9b, 0x92, 0x8f,
+ 0xa8, 0xe9, 0xd7, 0x9e, 0xe9, 0x66, 0x4d, 0xa4, 0x17, 0xfd, 0xae, 0xe1,
+ 0x04, 0x70, 0x23, 0x5b, 0x24, 0xf4, 0xcc, 0x43, 0x68, 0x61, 0xf4, 0xcc,
+ 0xb4, 0xf5, 0xef, 0x4b, 0xf9, 0x66, 0x33, 0x44, 0xcf, 0x21, 0xcf, 0x06,
+ 0xd4, 0x2d, 0xd6, 0x20, 0xa3, 0xec, 0x85, 0x52, 0x73, 0xe8, 0x48, 0xf3,
+ 0x29, 0xa2, 0xd8, 0xf5, 0x84, 0x31, 0x57, 0xef, 0x32, 0xa8, 0x40, 0x7d,
+ 0x82, 0x1c, 0x47, 0x65, 0xc4, 0xe1, 0x83, 0xa9, 0x61, 0x8a, 0x6f, 0xb0,
+ 0x04, 0x1f, 0xbc, 0xdc, 0x94, 0x7d, 0x47, 0x4e, 0xbc, 0xf7, 0x4b, 0x32,
+ 0x6d, 0x20, 0xbf, 0xed, 0x77, 0x08, 0x23, 0x81, 0x1a, 0xd9, 0x27, 0xa6,
+ 0x62, 0x1b, 0x43, 0x0f, 0xa6, 0x65, 0xa7, 0xaf, 0x7a, 0x5f, 0xcb, 0x31,
+ 0x9a, 0x26, 0x79, 0x0e, 0x78, 0x36, 0xa1, 0x6e, 0xb1, 0x05, 0x1f, 0x64,
+ 0x2a, 0x93, 0x9f, 0x42, 0x47, 0x99, 0x4d, 0x16, 0xc7, 0xac, 0x21, 0x8a,
+ 0xbf, 0x79, 0x95, 0x42, 0x03, 0xec, 0x10, 0xe2, 0x3b, 0x2e, 0x27, 0x0c,
+ 0x1d, 0x4b, 0x0c, 0x53, 0x7d, 0x80, 0x20, 0xfd, 0xe6, 0xe4, 0xa3, 0xea,
+ 0x3a, 0x75, 0xe7, 0xba, 0x59, 0x93, 0x69, 0x05, 0xff, 0x6b, 0xb8, 0x41,
+ 0x1c, 0x08, 0xd6, 0xc9, 0x3d, 0x33, 0x10, 0xda, 0x18, 0x7d, 0x33, 0x2d,
+ 0x3d, 0x7b, 0xd2, 0xfe, 0x59, 0x8c, 0xd1, 0x33, 0xc8, 0x73, 0xc1, 0xb5,
+ 0x0b, 0x75, 0x88, 0x28, 0xfb, 0x21, 0x54, 0x9c, 0xfa, 0x12, 0x3c, 0xca,
+ 0x68, 0xb6, 0x3d, 0x61, 0x0c, 0x55, 0xfb, 0xcc, 0xaa, 0x10, 0x1f, 0x60,
+ 0x87, 0x11, 0xd9, 0x71, 0x38, 0x60, 0xea, 0x58, 0x62, 0x9b, 0xec, 0x01,
+ 0x07, 0xef, 0x37, 0x25, 0x1f, 0x51, 0xd3, 0xaf, 0x3d, 0xd2, 0xcc, 0x9b,
+ 0x48, 0x2f, 0xfb, 0x5d, 0xc2, 0x08, 0xe0, 0x46, 0xb6, 0x49, 0xe9, 0x98,
+ 0x86, 0xd0, 0xc3, 0xe9, 0x99, 0x69, 0xeb, 0xde, 0x97, 0xf2, 0xcc, 0x66,
+ 0x89, 0x9e, 0x43, 0x9e, 0x0d, 0xa8, 0x5b, 0xac, 0x41, 0x47, 0xd9, 0x0a,
+ 0xa4, 0xe7, 0xd0, 0x91, 0xe6, 0x53, 0x45, 0xb1, 0xeb, 0x08, 0x62, 0xaf,
+ 0xde, 0x65, 0x50, 0x80, 0xfb, 0x04, 0x38, 0x8e, 0xcb, 0x89, 0xc3, 0x07,
+ 0x52, 0xc3, 0x14, 0xdf, 0x60, 0x08, 0x3f, 0x79, 0xb9, 0x28, 0xfa, 0x8e,
+ 0x9d, 0x79, 0xee, 0x96, 0x64, 0xda, 0x41, 0x7f, 0xda, 0xee, 0x10, 0x47,
+ 0x02, 0x35, 0xb2, 0x4f, 0x4c, 0xc4, 0x36, 0x86, 0x1f, 0x4c, 0xcb, 0x4f,
+ 0x5e, 0xf4, 0xbf, 0x96, 0x63, 0x34, 0x4c, 0xf2, 0x1c, 0xf0, 0x6d, 0x42,
+ 0xdd, 0x62, 0x0a, 0x3e, 0xc8, 0x55, 0x27, 0x3e, 0x84, 0x8f, 0x32, 0x9a,
+ 0x2d, 0x8f, 0x58, 0x43, 0x15, 0x7e, 0xf3, 0x2a, 0x84, 0x07, 0xd8, 0x21,
+ 0xc4, 0x76, 0x5c, 0x4e, 0x18, 0x3a, 0x96, 0x18, 0xa6, 0xfb, 0x00, 0x41,
+ 0xfb, 0xcd, 0xc9, 0x47, 0xd4, 0x74, 0xeb, 0xcf, 0x74, 0xb3, 0x26, 0xd2,
+ 0x0b, 0xfe, 0xd7, 0x70, 0x82, 0x38, 0x11, 0xad, 0x92, 0x7a, 0x66, 0x21,
+ 0xb4, 0x30, 0xfa, 0x66, 0x5a, 0x7a, 0xf7, 0xa5, 0xfc, 0xb3, 0x19, 0xa2,
+ 0x67, 0x90, 0xe7, 0x83, 0x6a, 0x16, 0xeb, 0x10, 0x51, 0xf6, 0x42, 0xa9,
+ 0x39, 0xf4, 0x24, 0x79, 0x94, 0xd1, 0x6c, 0x7a, 0xc2, 0x18, 0xab, 0xf7,
+ 0x99, 0x54, 0x20, 0x3e, 0xc1, 0x0e, 0x23, 0xb2, 0xe2, 0x70, 0xc1, 0xd4,
+ 0xb0, 0xc5, 0x37, 0xd8, 0x02, 0x0f, 0xde, 0x6e, 0x4a, 0x3e, 0xa3, 0xa7,
+ 0x5e, 0x7b, 0xa5, 0x99, 0x36, 0x90, 0x5f, 0xf6, 0xbb, 0x84, 0x11, 0xc0,
+ 0x8d, 0x6c, 0x93, 0xd3, 0x31, 0x0d, 0xa1, 0x87, 0xd3, 0x32, 0xd3, 0xd7,
+ 0xbd, 0x2f, 0xe5, 0x98, 0xcd, 0x13, 0x3c, 0x87, 0x3c, 0x1b, 0x50, 0xb7,
+ 0x58, 0x82, 0x8f, 0xb2, 0x15, 0x49, 0xcf, 0xa1, 0x23, 0xcc, 0xa6, 0x8b,
+ 0x63, 0xd6, 0x10, 0xc5, 0x5f, 0xbc, 0xca, 0xa1, 0x01, 0xf6, 0x08, 0x71,
+ 0x1d, 0x97, 0x13, 0x86, 0x0e, 0xa5, 0x86, 0x29, 0xbe, 0xc0, 0x10, 0x7e,
+ 0xf3, 0x72, 0x51, 0xf5, 0x1d, 0x3a, 0xf3, 0xdd, 0x2c, 0xc9, 0xb4, 0x82,
+ 0xff, 0xb5, 0xdc, 0x20, 0x8e, 0x04, 0x6b, 0x64, 0x9e, 0x99, 0x88, 0x6d,
+ 0x0c, 0x3e, 0x99, 0x96, 0x9e, 0xbd, 0xe9, 0x7f, 0x2c, 0xc6, 0x68, 0x99,
+ 0xe4, 0x39, 0xe0, 0xda, 0x85, 0xba, 0xc4, 0x14, 0x7d, 0x90, 0xaa, 0x4e,
+ 0x7d, 0x09, 0x1e, 0x65, 0x34, 0x5b, 0x1e, 0xb0, 0x86, 0x2a, 0xfd, 0xe6,
+ 0x55, 0x08, 0x0f, 0xb0, 0x43, 0x88, 0xec, 0xb8, 0x9c, 0x30, 0x75, 0x2c,
+ 0x31, 0x4d, 0xf6, 0x00, 0x83, 0xf7, 0x9b, 0x92, 0x8f, 0xa8, 0xe9, 0xd7,
+ 0x9e, 0xe9, 0x66, 0x4d, 0xa4, 0x17, 0xfd, 0xae, 0xe1, 0x04, 0x70, 0x23,
+ 0x5b, 0x24, 0xf4, 0xcc, 0x43, 0x68, 0x61, 0xf4, 0xcc, 0xb4, 0xf5, 0xef,
+ 0x4b, 0xf9, 0x66, 0x33, 0x44, 0xcf, 0x21, 0xcf, 0x06, 0xd4, 0x2d, 0xd6,
+ 0x20, 0xa3, 0xec, 0x85, 0x52, 0x73, 0xe8, 0x48, 0xf3, 0x29, 0xa2, 0xd8,
+ 0xf5, 0x84, 0x31, 0x57, 0xef, 0x32, 0xa8, 0x40, 0x7d, 0x82, 0x1c, 0x47,
+ 0x65, 0xc4, 0xe1, 0x83, 0xa9, 0x61, 0x8a, 0x6f, 0xb0, 0x04, 0x1f, 0xbc,
+ 0xdc, 0x94, 0x7d, 0x47, 0x4e, 0xbc, 0xf7, 0x4b, 0x32, 0x6d, 0x20, 0xbf,
+ 0xed, 0x77, 0x08, 0x23, 0x81, 0x1a, 0xd9, 0x27, 0xa6, 0x62, 0x1b, 0x43,
+ 0x0f, 0xa6, 0x65, 0xa7, 0xaf, 0x7a, 0x5f, 0xcb, 0x31, 0x9a, 0x26, 0x79,
+ 0x0e, 0x78, 0x36, 0xa1, 0x6e, 0xb1, 0x05, 0x1f, 0x64, 0x2a, 0x93, 0x9f,
+ 0x42, 0x47, 0x99, 0x4d, 0x16, 0xc7, 0xac, 0x21, 0x8a, 0xbf, 0x79, 0x95,
+ 0x42, 0x03, 0xec, 0x10, 0xe2, 0x3b, 0x2e, 0x27, 0x0c, 0x1d, 0x4b, 0x0c,
+ 0x53, 0x7d, 0x80, 0x20, 0xfd, 0xe6, 0xe4, 0xa3, 0xea, 0x3a, 0x75, 0xe7,
+ 0xba, 0x59, 0x93, 0x69, 0x05, 0xff, 0x6b, 0xb8, 0x41, 0x1c, 0x08, 0xd6,
+ 0xc9, 0x3d, 0x33, 0x10, 0xda, 0x18, 0x7d, 0x33, 0x2d, 0x3d, 0x7b, 0xd2,
+ 0xfe, 0x59, 0x8c, 0xd1, 0x33, 0xc8, 0x73, 0xc1, 0xb5, 0x0b, 0x75, 0x88,
+ 0x28, 0xfb, 0x21, 0x54, 0x9c, 0xfa, 0x12, 0x3c, 0xca, 0x68, 0xb6, 0x3d,
+ 0x61, 0x0c, 0x55, 0xfb, 0xcc, 0xaa, 0x10, 0x1f, 0x60, 0x87, 0x11, 0xd9,
+ 0x71, 0x38, 0x60, 0xea, 0x58, 0x62, 0x9b, 0xec, 0x01, 0x07, 0xef, 0x37,
+ 0x25, 0x1f, 0x51, 0xd3, 0xaf, 0x3d, 0xd2, 0xcc, 0x9b, 0x48, 0x2f, 0xfb,
+ 0x5d, 0xc2, 0x08, 0xe0, 0x46, 0xb6, 0x49, 0xe9, 0x98, 0x86, 0xd0, 0xc3,
+ 0xe9, 0x99, 0x69, 0xeb, 0xde, 0x97, 0xf2, 0xcc, 0x66, 0x89, 0x9e, 0x43,
+ 0x9e, 0x0d, 0xa8, 0x5b, 0xac, 0x41, 0x47, 0xd9, 0x0a, 0xa4, 0xe7, 0xd0,
+ 0x91, 0xe6, 0x53, 0x45, 0xb1, 0xeb, 0x08, 0x62, 0xaf, 0xde, 0x65, 0x50,
+ 0x80, 0xfb, 0x04, 0x38, 0x8e, 0xcb, 0x89, 0xc3, 0x07, 0x52, 0xc3, 0x14,
+ 0xdf, 0x60, 0x08, 0x3f, 0x79, 0xb9, 0x28, 0xfa, 0x8e, 0x9d, 0x79, 0xee,
+ 0x96, 0x64, 0xda, 0x41, 0x7f, 0xda, 0xee, 0x10, 0x47, 0x02, 0x35, 0xb2,
+ 0x4f, 0x4c, 0xc4, 0x36, 0x86, 0x1f, 0x4c, 0xcb, 0x4f, 0x5e, 0xf4, 0xbf,
+ 0x96, 0x63, 0x34, 0x4c, 0xf2, 0x1c, 0xf0, 0x6d, 0x42, 0xdd, 0x62, 0x0a,
+ 0x3e, 0xc8, 0x55, 0x27, 0x3e, 0x84, 0x8f, 0x32, 0x9a, 0x2d, 0x8f, 0x58,
+ 0x43, 0x15, 0x7e, 0xf3, 0x2a, 0x84, 0x07, 0xd8, 0x21, 0xc4, 0x76, 0x5c,
+ 0x4e, 0x18, 0x3a, 0x96, 0x18, 0xa6, 0xfb, 0x00, 0x41, 0xfb, 0xcd, 0xc9,
+ 0x47, 0xd4, 0x74, 0xeb, 0xcf, 0x74, 0xb3, 0x26, 0xd2, 0x0b, 0xfe, 0xd7,
+ 0x70, 0x82, 0x38, 0x11, 0xad, 0x92, 0x7a, 0x66, 0x21, 0xb4, 0x30, 0xfa,
+ 0x66, 0x5a, 0x7a, 0xf7, 0xa5, 0xfc, 0xb3, 0x19, 0xa2, 0x67, 0x90, 0xe7,
+ 0x83, 0x6a, 0x16, 0xeb, 0x10, 0x51, 0xf6, 0x42, 0xa9, 0x39, 0xf4, 0x24,
+ 0x79, 0x94, 0xd1, 0x6c, 0x7a, 0xc2, 0x18, 0xab, 0xf7, 0x99, 0x54, 0x20,
+ 0x3e, 0xc1, 0x0e, 0x23, 0xb2, 0xe2, 0x70, 0xc1, 0xd4, 0xb0, 0xc5, 0x37,
+ 0xd8, 0x02, 0x0f, 0xde, 0x6e, 0x4a, 0x3e, 0xa3, 0xa7, 0x5e, 0x7b, 0xa5,
+ 0x99, 0x36, 0x90, 0x5f, 0xf6, 0xbb, 0x84, 0x11, 0xc0, 0x8d, 0x6c, 0x93,
+ 0xd3, 0x31, 0x0d, 0xa1, 0x87, 0xd3, 0x32, 0xd3, 0xd7, 0xbd, 0x2f, 0xe5,
+ 0x98, 0xcd, 0x13, 0x3c, 0x87, 0x3c, 0x1b, 0x50, 0xb7, 0x58, 0x82, 0x8f,
+ 0xb2, 0x15, 0x49, 0xcf, 0xa1, 0x23, 0xcc, 0xa6, 0x8b, 0x63, 0xd6, 0x10,
+ 0xc5, 0x5f, 0xbc, 0xca, 0xa1, 0x01, 0xf6, 0x08, 0x71, 0x1d, 0x97, 0x13,
+ 0x86, 0x0e, 0xa5, 0x86, 0x29, 0xbe, 0xc0, 0x10, 0x7e, 0xf3, 0x72, 0x51,
+ 0xf5, 0x1d, 0x3a, 0xf3, 0xdd, 0x2c, 0xc9, 0xb4, 0x82, 0xff, 0xb5, 0xdc,
+ 0x20, 0x8e, 0x04, 0x6b, 0x64, 0x9e, 0x99, 0x88, 0x6d, 0x0c, 0x3e, 0x99,
+ 0x96, 0x9e, 0xbd, 0xe9, 0x7f, 0x2c, 0xc6, 0x68, 0x99, 0xe4, 0x39, 0xe0,
+ 0xda, 0x85, 0xba, 0xc4, 0x14, 0x7d, 0x90, 0xaa, 0x4e, 0x7d, 0x09, 0x1e,
+ 0x65, 0x34, 0x5b, 0x1e, 0xb0, 0x86, 0x2a, 0xfd, 0xe6, 0x55, 0x08, 0x0f,
+ 0xb0, 0x43, 0x88, 0xec, 0xb8, 0x9c, 0x30, 0x75, 0x2c, 0x31, 0x4d, 0xf6,
+ 0x00, 0x83, 0xf7, 0x9b, 0x92, 0x8f, 0xa8, 0xe9, 0xd7, 0x9e, 0xe9, 0x66,
+ 0x4d, 0xa4, 0x17, 0xfd, 0xae, 0xe1, 0x04, 0x70, 0x23, 0x5b, 0x24, 0xf4,
+ 0xcc, 0x43, 0x68, 0x61, 0xf4, 0xcc, 0xb4, 0xf5, 0xef, 0x4b, 0xf9, 0x66,
+ 0x33, 0x44, 0xcf, 0x21, 0xcf, 0x06, 0xd4, 0x2d, 0xd6, 0x20, 0xa3, 0xec,
+ 0x85, 0x52, 0x73, 0xe8, 0x48, 0xf3, 0x29, 0xa2, 0xd8, 0xf5, 0x84, 0x31,
+ 0x57, 0xef, 0x32, 0xa8, 0x40, 0x7d, 0x82, 0x1c, 0x47, 0x65, 0xc4, 0xe1,
+ 0x83, 0xa9, 0x61, 0x8a, 0x6f, 0xb0, 0x04, 0x1f, 0xbc, 0xdc, 0x94, 0x7d,
+ 0x47, 0x4e, 0xbc, 0xf7, 0x4b, 0x32, 0x6d, 0x20, 0xbf, 0xed, 0x77, 0x08,
+ 0x23, 0x81, 0x1a, 0xd9, 0x27, 0xa6, 0x62, 0x1b, 0x43, 0x0f, 0xa6, 0x65,
+ 0xa7, 0xaf, 0x7a, 0x5f, 0xcb, 0x31, 0x9a, 0x26, 0x79, 0x0e, 0x78, 0x36,
+ 0xa1, 0x6e, 0xb1, 0x05, 0x1f, 0x64, 0x2a, 0x93, 0x9f, 0x42, 0x47, 0x99,
+ 0x4d, 0x16, 0xc7, 0xac, 0x21, 0x8a, 0xbf, 0x79, 0x95, 0x42, 0x03, 0xec,
+ 0x10, 0xe2, 0x3b, 0x2e, 0x27, 0x0c, 0x1d, 0x4b, 0x0c, 0x53, 0x7d, 0x80,
+ 0x20, 0xfd, 0xe6, 0xe4, 0xa3, 0xea, 0x3a, 0x75, 0xe7, 0xba, 0x59, 0x93,
+ 0x69, 0x05, 0xff, 0x6b, 0xb8, 0x41, 0x1c, 0x08, 0xd6, 0xc9, 0x3d, 0x33,
+ 0x10, 0xda, 0x18, 0x7d, 0x33, 0x2d, 0x3d, 0x7b, 0xd2, 0xfe, 0x59, 0x8c,
+ 0xd1, 0x33, 0xc8, 0x73, 0xc1, 0xb5, 0x0b, 0x75, 0x88, 0x28, 0xfb, 0x21,
+ 0x54, 0x9c, 0xfa, 0x12, 0x3c, 0xca, 0x68, 0xb6, 0x3d, 0x61, 0x0c, 0x55,
+ 0xfb, 0xcc, 0xaa, 0x10, 0x1f, 0x60, 0x87, 0x11, 0xd9, 0x71, 0x38, 0x60,
+ 0xea, 0x58, 0x62, 0x9b, 0xec, 0x01, 0x07, 0xef, 0x37, 0x25, 0x1f, 0x51,
+ 0xd3, 0xaf, 0x3d, 0xd2, 0xcc, 0x9b, 0x48, 0x2f, 0xfb, 0x5d, 0xc2, 0x08,
+ 0xe0, 0x46, 0xb6, 0x49, 0xe9, 0x98, 0x86, 0xd0, 0xc3, 0xe9, 0x99, 0x69,
+ 0xeb, 0xde, 0x97, 0xf2, 0xcc, 0x66, 0x89, 0x9e, 0x43, 0x9e, 0x0d, 0xa8,
+ 0x5b, 0xac, 0x41, 0x47, 0xd9, 0x0a, 0xa4, 0xe7, 0xd0, 0x91, 0xe6, 0x53,
+ 0x45, 0xb1, 0xeb, 0x08, 0x62, 0xaf, 0xde, 0x65, 0x50, 0x80, 0xfb, 0x04,
+ 0x38, 0x8e, 0xcb, 0x89, 0xc3, 0x07, 0x52, 0xc3, 0x14, 0xdf, 0x60, 0x08,
+ 0x3f, 0x79, 0xb9, 0x28, 0xfa, 0x8e, 0x9d, 0x79, 0xee, 0x96, 0x64, 0xda,
+ 0x41, 0x7f, 0xda, 0xee, 0x10, 0x47, 0x02, 0x35, 0xb2, 0x4f, 0x4c, 0xc4,
+ 0x36, 0x86, 0x1f, 0x4c, 0xcb, 0x4f, 0x5e, 0xf4, 0xbf, 0x96, 0x63, 0x34,
+ 0x4c, 0xf2, 0x1c, 0xf0, 0x6d, 0x42, 0xdd, 0x62, 0x0a, 0x3e, 0xc8, 0x55,
+ 0x27, 0x3e, 0x84, 0x8f, 0x32, 0x9a, 0x2d, 0x8f, 0x58, 0x43, 0x15, 0x7e,
+ 0xf3, 0x2a, 0x84, 0x07, 0xd8, 0x21, 0xc4, 0x76, 0x5c, 0x4e, 0x18, 0x3a,
+ 0x96, 0x18, 0xa6, 0xfb, 0x00, 0x41, 0xfb, 0xcd, 0xc9, 0x47, 0xd4, 0x74,
+ 0xeb, 0xcf, 0x74, 0xb3, 0x26, 0xd2, 0x0b, 0xfe, 0xd7, 0x70, 0x82, 0x38,
+ 0x11, 0xad, 0x92, 0x7a, 0x66, 0x21, 0xb4, 0x30, 0xfa, 0x66, 0x5a, 0x7a,
+ 0xf7, 0xa5, 0xfc, 0xb3, 0x19, 0xa2, 0x67, 0x90, 0xe7, 0x83, 0x6a, 0x16,
+ 0xeb, 0x10, 0x51, 0xf6, 0x42, 0xa9, 0x39, 0xf4, 0x24, 0x79, 0x94, 0xd1,
+ 0x6c, 0x7a, 0xc2, 0x18, 0xab, 0xf7, 0x99, 0x54, 0x20, 0x3e, 0xc1, 0x0e,
+ 0x23, 0xb3,
+};
+static_assert(sizeof(kBytesTestReadSymbol7) == kNumBytesTestReadSymbol7, "");
+
+// The kBytesTestReadSymbol8[] array was encoded by using the following libaom
+// code:
+//
+// aom_cdf_prob cdf[4][9] = {
+// // pdf: 1/8, 1/8, 1/8, 1/8, 1/8, 1/8, 1/8, 1/8
+// { 32768 - 4096, 32768 - 8192, 32768 - 12288, 32768 - 16384,
+// 32768 - 20480, 32768 - 24576, 32768 - 28672, 0, 0 },
+// // pdf: 3/16, 2/16, 2/16, 2/16, 2/16, 2/16, 2/16, 1/16
+// { 32768 - 6144, 32768 - 10240, 32768 - 14336, 32768 - 18432,
+// 32768 - 22528, 32768 - 26624, 32768 - 30720, 0, 0 },
+// // pdf: 1/16, 1/16, 2/16, 2/16, 2/16, 2/16, 3/16, 3/16
+// { 32768 - 2048, 32768 - 4096, 32768 - 8192, 32768 - 12288,
+// 32768 - 16384, 32768 - 20480, 32768 - 26624, 0, 0 },
+// // pdf: 1/16, 1/16, 3/16, 3/16, 3/16, 3/16, 1/16, 1/16
+// { 32768 - 2048, 32768 - 4096, 32768 - 10240, 32768 - 16384,
+// 32768 - 22528, 32768 - 28672, 32768 - 30720, 0, 0 },
+// };
+// constexpr int kSymbols[16][4] = { { 0, 4, 7, 3 }, //
+// { 1, 5, 6, 2 }, //
+// { 2, 6, 5, 1 }, //
+// { 3, 7, 4, 0 }, //
+// { 4, 0, 3, 7 }, //
+// { 5, 1, 2, 6 }, //
+// { 6, 2, 1, 5 }, //
+// { 7, 3, 0, 4 }, //
+// { 0, 0, 6, 5 }, //
+// { 2, 1, 4, 3 }, //
+// { 4, 3, 6, 4 }, //
+// { 6, 5, 2, 2 }, //
+// { 1, 0, 7, 3 }, //
+// { 3, 2, 5, 5 }, //
+// { 5, 4, 7, 2 }, //
+// { 7, 6, 3, 4 } };
+// const unsigned int kBufferSize = 65536;
+// uint8_t bw_buffer[kBufferSize];
+// aom_writer bw;
+// bw.allow_update_cdf = 1;
+// aom_start_encode(&bw, bw_buffer);
+// for (int i = 0; i < 1024; ++i) {
+// for (int j = 0; j < 16; ++j) {
+// for (int k = 0; k < 4; ++k) {
+// aom_write_symbol(&bw, kSymbols[j][k], cdf[k], 8);
+// }
+// }
+// }
+// aom_stop_encode(&bw);
+// printf(" constexpr size_t kNumBytesTestReadSymbol8 = %u;\n", bw.pos);
+// printf(" constexpr uint8_t kBytesTestReadSymbol8[] = {");
+// int count = 0;
+// for (unsigned int i = 0; i < bw.pos; ++i) {
+// if (count++ % 12 == 0) {
+// printf("\n ");
+// } else {
+// printf(" ");
+// }
+// printf("0x%02x,", bw_buffer[i]);
+// }
+// printf("\n };\n");
+
+constexpr size_t kNumBytesTestReadSymbol8 = 24195;
+constexpr uint8_t kBytesTestReadSymbol8[] = {
+ 0x15, 0x60, 0xa8, 0x52, 0xf4, 0x88, 0xdd, 0x23, 0x40, 0xb1, 0xd6, 0xd2,
+ 0xc2, 0xa2, 0x4c, 0x0a, 0x5d, 0xba, 0xfe, 0xd2, 0x36, 0xd9, 0xcd, 0x51,
+ 0x10, 0x25, 0x13, 0x29, 0xfa, 0x0d, 0x87, 0xf9, 0xd1, 0x6f, 0xf2, 0x0d,
+ 0x3a, 0xbe, 0xd9, 0x83, 0x99, 0xd1, 0xdf, 0x24, 0x70, 0x28, 0xdb, 0x63,
+ 0xf6, 0x7c, 0x07, 0x2b, 0x68, 0xa3, 0x7a, 0x85, 0xd1, 0x47, 0xba, 0x59,
+ 0x18, 0x7e, 0x64, 0x3b, 0xac, 0xaf, 0xe3, 0x3a, 0x99, 0x82, 0x30, 0x92,
+ 0x7a, 0x93, 0x67, 0x9f, 0xac, 0x53, 0xf8, 0xdb, 0x03, 0x71, 0xc7, 0x4a,
+ 0xa9, 0xec, 0x10, 0xc9, 0xed, 0x5b, 0xa6, 0xd5, 0xc3, 0xdd, 0x81, 0x8d,
+ 0x25, 0xbe, 0x57, 0xcd, 0x01, 0x65, 0x33, 0x6c, 0x12, 0xe1, 0x37, 0x8b,
+ 0xf1, 0x08, 0x27, 0x3c, 0x5a, 0x30, 0x9f, 0x2d, 0x41, 0x2e, 0x75, 0x49,
+ 0xab, 0xa6, 0xb6, 0x4c, 0xbe, 0xe0, 0xd0, 0x20, 0x74, 0xeb, 0x05, 0x79,
+ 0x91, 0x60, 0xfd, 0xb2, 0x39, 0x54, 0xd9, 0x0c, 0x11, 0x04, 0x1f, 0x7b,
+ 0x5d, 0x2d, 0xe3, 0x3f, 0x48, 0xe4, 0x56, 0x11, 0x3d, 0x48, 0xdb, 0x5c,
+ 0x1c, 0x8b, 0x81, 0xbb, 0x8a, 0x53, 0xb7, 0x48, 0x5b, 0x15, 0x9b, 0x35,
+ 0xc1, 0x18, 0x0f, 0xc3, 0x1e, 0x1c, 0x16, 0x7e, 0x0a, 0xbf, 0x16, 0x0a,
+ 0xf5, 0x3f, 0xbe, 0x19, 0xc0, 0x0f, 0xa4, 0x59, 0xae, 0x0a, 0xcf, 0xf4,
+ 0x00, 0xb2, 0xff, 0x3a, 0xd8, 0x7f, 0x6c, 0xcf, 0x4f, 0xca, 0xa1, 0x40,
+ 0x47, 0x8e, 0xd0, 0x44, 0x49, 0x5a, 0x48, 0xe6, 0x86, 0x80, 0xbb, 0x57,
+ 0x36, 0x6e, 0x80, 0xf1, 0xd1, 0xd8, 0xb8, 0xad, 0xb7, 0x6b, 0x11, 0x79,
+ 0x02, 0x95, 0x20, 0xcf, 0x6f, 0x21, 0xe6, 0x5c, 0x65, 0x69, 0x4a, 0xf2,
+ 0x6f, 0x87, 0x68, 0xf1, 0xda, 0x3b, 0xe1, 0x64, 0x5c, 0xfc, 0x21, 0x02,
+ 0x7b, 0xf6, 0x39, 0x77, 0x36, 0x29, 0x3d, 0xda, 0x16, 0x2e, 0xdb, 0x55,
+ 0xac, 0x5a, 0x3a, 0x94, 0x9c, 0x79, 0x2c, 0x92, 0xa4, 0xe3, 0xe2, 0x87,
+ 0xd8, 0x14, 0x21, 0x76, 0xae, 0xf1, 0x8d, 0x7d, 0xdc, 0xde, 0x46, 0xd9,
+ 0xbd, 0xb6, 0x5f, 0xae, 0x77, 0xd0, 0xd7, 0x01, 0xed, 0xbe, 0x5f, 0xee,
+ 0x1a, 0x20, 0x0f, 0x88, 0x5c, 0x8a, 0x44, 0xad, 0x8f, 0x8f, 0x66, 0x9d,
+ 0x43, 0xf4, 0x41, 0x0a, 0xa1, 0xc8, 0x5c, 0xbc, 0x37, 0xe2, 0xca, 0xd2,
+ 0xd8, 0x27, 0x54, 0xdb, 0xdf, 0x7f, 0x0a, 0xd7, 0x65, 0x19, 0x99, 0x1a,
+ 0x92, 0x53, 0xdd, 0x1e, 0x5f, 0xad, 0x24, 0x8a, 0x8d, 0x76, 0xc4, 0xf7,
+ 0x7e, 0x74, 0xfe, 0x68, 0x99, 0x42, 0xfa, 0xaa, 0x6e, 0xdd, 0x91, 0xd4,
+ 0x71, 0x10, 0xb7, 0x45, 0xa8, 0x5f, 0x84, 0x0d, 0xeb, 0x38, 0x3e, 0xaa,
+ 0xf1, 0xad, 0x86, 0x8f, 0x1a, 0x3e, 0x9a, 0x29, 0xc7, 0x7b, 0xa7, 0xdf,
+ 0x51, 0x3d, 0x49, 0x08, 0x09, 0x69, 0x40, 0x9d, 0x45, 0xb8, 0x55, 0xce,
+ 0x96, 0x6c, 0x8b, 0xc6, 0xc9, 0x25, 0x70, 0xc9, 0xb3, 0xa8, 0xa8, 0x08,
+ 0x33, 0x7b, 0xca, 0x21, 0x9e, 0x5b, 0xb5, 0x02, 0x7f, 0xa3, 0x34, 0x7c,
+ 0x3d, 0xba, 0x91, 0x2e, 0xae, 0xc3, 0x1f, 0x9e, 0xc2, 0x4f, 0xdf, 0xa9,
+ 0x39, 0x9b, 0x9d, 0x6e, 0xc7, 0x90, 0xeb, 0x2b, 0xb0, 0x3f, 0xde, 0x37,
+ 0xb7, 0x94, 0x3d, 0x4b, 0x2c, 0x42, 0x3f, 0x47, 0xad, 0xc9, 0x23, 0xcb,
+ 0x4d, 0xc4, 0xdd, 0x5e, 0x67, 0x11, 0x9d, 0x45, 0xb8, 0x55, 0xce, 0x98,
+ 0x05, 0xce, 0x97, 0x99, 0x57, 0x84, 0x8d, 0x79, 0x97, 0x81, 0x4b, 0x8a,
+ 0x9c, 0x76, 0x73, 0x9a, 0xf7, 0x59, 0x54, 0x07, 0x6c, 0x11, 0x41, 0x44,
+ 0xf0, 0xa6, 0x2a, 0x5e, 0xb1, 0x48, 0x47, 0x39, 0xbb, 0x1b, 0xf0, 0x25,
+ 0x07, 0xe7, 0xd2, 0xbb, 0x9b, 0x9b, 0xd7, 0x7e, 0xc8, 0xdd, 0xae, 0xb6,
+ 0x23, 0x5e, 0xe0, 0xa5, 0xb0, 0xc6, 0xb6, 0x81, 0xe9, 0x51, 0x20, 0xe9,
+ 0x2f, 0x89, 0xcd, 0x13, 0x96, 0x21, 0x19, 0xc5, 0xd1, 0x65, 0x65, 0x88,
+ 0xd9, 0x7b, 0x87, 0xdc, 0xfb, 0x38, 0x54, 0x22, 0x27, 0xc4, 0xc4, 0x16,
+ 0x56, 0xff, 0x76, 0x69, 0xa6, 0x3b, 0xa0, 0x6d, 0xab, 0xb8, 0xdf, 0xc1,
+ 0xc2, 0xff, 0x65, 0x8f, 0x85, 0xbc, 0x69, 0xc0, 0xa5, 0x9a, 0xef, 0xf1,
+ 0x37, 0x57, 0x99, 0xc4, 0x67, 0x51, 0x6e, 0xdf, 0x30, 0xa4, 0x86, 0x47,
+ 0x34, 0x5f, 0x5e, 0x3c, 0xde, 0x6e, 0x96, 0x74, 0x5c, 0xbd, 0xca, 0xa3,
+ 0x50, 0xe4, 0xe8, 0x63, 0xdf, 0xb0, 0xf1, 0xbe, 0xa2, 0x58, 0x23, 0x7a,
+ 0x4a, 0x29, 0x62, 0x1f, 0x03, 0xf1, 0xe9, 0x19, 0xdd, 0x68, 0xe8, 0x1a,
+ 0x7a, 0x9b, 0x40, 0x0d, 0xb0, 0x15, 0x8b, 0x14, 0x63, 0x08, 0xa4, 0x21,
+ 0xa6, 0x0b, 0x34, 0x8a, 0x3e, 0x76, 0x7a, 0xa8, 0x11, 0x81, 0x16, 0x12,
+ 0xa5, 0xc6, 0x7a, 0xf1, 0xa0, 0x20, 0xff, 0x33, 0x3b, 0xa5, 0x43, 0xc7,
+ 0x42, 0xd3, 0x22, 0x90, 0x16, 0xa2, 0x28, 0x18, 0xa4, 0xc7, 0x24, 0xa3,
+ 0x92, 0x2a, 0xc3, 0xf9, 0x98, 0x04, 0x8a, 0x08, 0x0c, 0x66, 0x3a, 0xf1,
+ 0x19, 0x37, 0xe8, 0x2c, 0x85, 0x73, 0x3f, 0xa9, 0x7a, 0xf7, 0x5d, 0x1c,
+ 0x91, 0x56, 0x1f, 0xcc, 0xc0, 0x24, 0x50, 0x40, 0x63, 0x31, 0xd7, 0x88,
+ 0xc9, 0xbf, 0x41, 0x64, 0x2b, 0x99, 0xfd, 0x4b, 0xd7, 0xba, 0xe8, 0xe4,
+ 0x8a, 0xb0, 0xfe, 0x66, 0x01, 0x22, 0x82, 0x03, 0x19, 0x8e, 0xbc, 0x46,
+ 0x4d, 0xfa, 0x0b, 0x21, 0x5c, 0xcf, 0xea, 0x5e, 0xbd, 0xd7, 0x47, 0x24,
+ 0x55, 0x87, 0xf3, 0x30, 0x09, 0x14, 0x10, 0x18, 0xcc, 0x75, 0xe2, 0x32,
+ 0x6f, 0xd0, 0x59, 0x0a, 0xe6, 0x7f, 0x52, 0xf5, 0xee, 0xba, 0x39, 0x22,
+ 0xac, 0x3f, 0x99, 0x80, 0x48, 0xa0, 0x80, 0xc6, 0x63, 0xaf, 0x11, 0x93,
+ 0x7e, 0x82, 0xc8, 0x57, 0x33, 0xfa, 0x97, 0xaf, 0x75, 0xd1, 0xc9, 0x15,
+ 0x61, 0xfc, 0xcc, 0x02, 0x45, 0x04, 0x06, 0x33, 0x1d, 0x78, 0x8c, 0x9b,
+ 0xf4, 0x16, 0x42, 0xb9, 0x9f, 0xd4, 0xbd, 0x7b, 0xae, 0x8e, 0x48, 0xab,
+ 0x0f, 0xe6, 0x60, 0x12, 0x28, 0x20, 0x31, 0x98, 0xeb, 0xc4, 0x64, 0xdf,
+ 0xa0, 0xb2, 0x15, 0xcc, 0xfe, 0xa5, 0xeb, 0xdd, 0x74, 0x72, 0x45, 0x58,
+ 0x7f, 0x33, 0x00, 0x91, 0x41, 0x01, 0x8c, 0xc7, 0x5e, 0x23, 0x26, 0xfd,
+ 0x05, 0x90, 0xae, 0x67, 0xf5, 0x2f, 0x5e, 0xeb, 0xa3, 0x92, 0x2a, 0xc3,
+ 0xf9, 0x98, 0x04, 0x8a, 0x08, 0x0c, 0x66, 0x3a, 0xf1, 0x19, 0x37, 0xe8,
+ 0x2c, 0x85, 0x73, 0x3f, 0xa9, 0x7a, 0xf7, 0x5d, 0x1c, 0x91, 0x56, 0x1f,
+ 0xcc, 0xc0, 0x24, 0x50, 0x40, 0x63, 0x31, 0xd7, 0x88, 0xc9, 0xbf, 0x41,
+ 0x64, 0x2b, 0x99, 0xfd, 0x4b, 0xd7, 0xba, 0xe8, 0xe4, 0x8a, 0xb0, 0xfe,
+ 0x66, 0x01, 0x22, 0x82, 0x03, 0x19, 0x8e, 0xbc, 0x46, 0x4d, 0xfa, 0x0b,
+ 0x21, 0x5c, 0xcf, 0xea, 0x5e, 0xbd, 0xd7, 0x47, 0x24, 0x55, 0x87, 0xf3,
+ 0x30, 0x09, 0x14, 0x10, 0x18, 0xcc, 0x75, 0xe2, 0x32, 0x6f, 0xd0, 0x59,
+ 0x0a, 0xe6, 0x7f, 0x52, 0xf5, 0xee, 0xba, 0x39, 0x22, 0xac, 0x3f, 0x99,
+ 0x80, 0x48, 0xa0, 0x80, 0xc6, 0x63, 0xaf, 0x11, 0x93, 0x7e, 0x82, 0xc8,
+ 0x57, 0x33, 0xfa, 0x97, 0xaf, 0x75, 0xd1, 0xc9, 0x15, 0x61, 0xfc, 0xcc,
+ 0x02, 0x45, 0x04, 0x06, 0x33, 0x1d, 0x78, 0x8c, 0x9b, 0xf4, 0x16, 0x42,
+ 0xb9, 0x9f, 0xd4, 0xbd, 0x7b, 0xae, 0x8e, 0x48, 0xab, 0x0f, 0xe6, 0x60,
+ 0x12, 0x28, 0x20, 0x31, 0x98, 0xeb, 0xc4, 0x64, 0xdf, 0xa0, 0xb2, 0x15,
+ 0xcc, 0xfe, 0xa5, 0xeb, 0xdd, 0x74, 0x72, 0x45, 0x58, 0x7f, 0x33, 0x00,
+ 0x91, 0x41, 0x01, 0x8c, 0xc7, 0x5e, 0x23, 0x26, 0xfd, 0x05, 0x90, 0xae,
+ 0x67, 0xf5, 0x2f, 0x5e, 0xeb, 0xa3, 0x92, 0x2a, 0xc3, 0xf9, 0x98, 0x04,
+ 0x8a, 0x08, 0x0c, 0x66, 0x3a, 0xf1, 0x19, 0x37, 0xe8, 0x2c, 0x85, 0x73,
+ 0x3f, 0xa9, 0x7a, 0xf7, 0x5d, 0x1c, 0x91, 0x56, 0x1f, 0xcc, 0xc0, 0x24,
+ 0x50, 0x40, 0x63, 0x31, 0xd7, 0x88, 0xc9, 0xbf, 0x41, 0x64, 0x2b, 0x99,
+ 0xfd, 0x4b, 0xd7, 0xba, 0xe8, 0xe4, 0x8a, 0xb0, 0xfe, 0x66, 0x01, 0x22,
+ 0x82, 0x03, 0x19, 0x8e, 0xbc, 0x46, 0x4d, 0xfa, 0x0b, 0x21, 0x5c, 0xcf,
+ 0xea, 0x5e, 0xbd, 0xd7, 0x47, 0x24, 0x55, 0x87, 0xf3, 0x30, 0x09, 0x14,
+ 0x10, 0x18, 0xcc, 0x75, 0xe2, 0x32, 0x6f, 0xd0, 0x59, 0x0a, 0xe6, 0x7f,
+ 0x52, 0xf5, 0xee, 0xba, 0x39, 0x22, 0xac, 0x3f, 0x99, 0x80, 0x48, 0xa0,
+ 0x80, 0xc6, 0x63, 0xaf, 0x11, 0x93, 0x7e, 0x82, 0xc8, 0x57, 0x33, 0xfa,
+ 0x97, 0xaf, 0x75, 0xd1, 0xc9, 0x15, 0x61, 0xfc, 0xcc, 0x02, 0x45, 0x04,
+ 0x06, 0x33, 0x1d, 0x78, 0x8c, 0x9b, 0xf4, 0x16, 0x42, 0xb9, 0x9f, 0xd4,
+ 0xbd, 0x7b, 0xae, 0x8e, 0x48, 0xab, 0x0f, 0xe6, 0x60, 0x12, 0x28, 0x20,
+ 0x31, 0x98, 0xeb, 0xc4, 0x64, 0xdf, 0xa0, 0xb2, 0x15, 0xcc, 0xfe, 0xa5,
+ 0xeb, 0xdd, 0x74, 0x72, 0x45, 0x58, 0x7f, 0x33, 0x00, 0x91, 0x41, 0x01,
+ 0x8c, 0xc7, 0x5e, 0x23, 0x26, 0xfd, 0x05, 0x90, 0xae, 0x67, 0xf5, 0x2f,
+ 0x5e, 0xeb, 0xa3, 0x92, 0x2a, 0xc3, 0xf9, 0x98, 0x04, 0x8a, 0x08, 0x0c,
+ 0x66, 0x3a, 0xf1, 0x19, 0x37, 0xe8, 0x2c, 0x85, 0x73, 0x3f, 0xa9, 0x7a,
+ 0xf7, 0x5d, 0x1c, 0x91, 0x56, 0x1f, 0xcc, 0xc0, 0x24, 0x50, 0x40, 0x63,
+ 0x31, 0xd7, 0x88, 0xc9, 0xbf, 0x41, 0x64, 0x2b, 0x99, 0xfd, 0x4b, 0xd7,
+ 0xba, 0xe8, 0xe4, 0x8a, 0xb0, 0xfe, 0x66, 0x01, 0x22, 0x82, 0x03, 0x19,
+ 0x8e, 0xbc, 0x46, 0x4d, 0xfa, 0x0b, 0x21, 0x5c, 0xcf, 0xea, 0x5e, 0xbd,
+ 0xd7, 0x47, 0x24, 0x55, 0x87, 0xf3, 0x30, 0x09, 0x14, 0x10, 0x18, 0xcc,
+ 0x75, 0xe2, 0x32, 0x6f, 0xd0, 0x59, 0x0a, 0xe6, 0x7f, 0x52, 0xf5, 0xee,
+ 0xba, 0x39, 0x22, 0xac, 0x3f, 0x99, 0x80, 0x48, 0xa0, 0x80, 0xc6, 0x63,
+ 0xaf, 0x11, 0x93, 0x7e, 0x82, 0xc8, 0x57, 0x33, 0xfa, 0x97, 0xaf, 0x75,
+ 0xd1, 0xc9, 0x15, 0x61, 0xfc, 0xcc, 0x02, 0x45, 0x04, 0x06, 0x33, 0x1d,
+ 0x78, 0x8c, 0x9b, 0xf4, 0x16, 0x42, 0xb9, 0x9f, 0xd4, 0xbd, 0x7b, 0xae,
+ 0x8e, 0x48, 0xab, 0x0f, 0xe6, 0x60, 0x12, 0x28, 0x20, 0x31, 0x98, 0xeb,
+ 0xc4, 0x64, 0xdf, 0xa0, 0xb2, 0x15, 0xcc, 0xfe, 0xa5, 0xeb, 0xdd, 0x74,
+ 0x72, 0x45, 0x58, 0x7f, 0x33, 0x00, 0x91, 0x41, 0x01, 0x8c, 0xc7, 0x5e,
+ 0x23, 0x26, 0xfd, 0x05, 0x90, 0xae, 0x67, 0xf5, 0x2f, 0x5e, 0xeb, 0xa3,
+ 0x92, 0x2a, 0xc3, 0xf9, 0x98, 0x04, 0x8a, 0x08, 0x0c, 0x66, 0x3a, 0xf1,
+ 0x19, 0x37, 0xe8, 0x2c, 0x85, 0x73, 0x3f, 0xa9, 0x7a, 0xf7, 0x5d, 0x1c,
+ 0x91, 0x56, 0x1f, 0xcc, 0xc0, 0x24, 0x50, 0x40, 0x63, 0x31, 0xd7, 0x88,
+ 0xc9, 0xbf, 0x41, 0x64, 0x2b, 0x99, 0xfd, 0x4b, 0xd7, 0xba, 0xe8, 0xe4,
+ 0x8a, 0xb0, 0xfe, 0x66, 0x01, 0x22, 0x82, 0x03, 0x19, 0x8e, 0xbc, 0x46,
+ 0x4d, 0xfa, 0x0b, 0x21, 0x5c, 0xcf, 0xea, 0x5e, 0xbd, 0xd7, 0x47, 0x24,
+ 0x55, 0x87, 0xf3, 0x30, 0x09, 0x14, 0x10, 0x18, 0xcc, 0x75, 0xe2, 0x32,
+ 0x6f, 0xd0, 0x59, 0x0a, 0xe6, 0x7f, 0x52, 0xf5, 0xee, 0xba, 0x39, 0x22,
+ 0xac, 0x3f, 0x99, 0x80, 0x48, 0xa0, 0x80, 0xc6, 0x63, 0xaf, 0x11, 0x93,
+ 0x7e, 0x82, 0xc8, 0x57, 0x33, 0xfa, 0x97, 0xaf, 0x75, 0xd1, 0xc9, 0x15,
+ 0x61, 0xfc, 0xcc, 0x02, 0x45, 0x04, 0x06, 0x33, 0x1d, 0x78, 0x8c, 0x9b,
+ 0xf4, 0x16, 0x42, 0xb9, 0x9f, 0xd4, 0xbd, 0x7b, 0xae, 0x8e, 0x48, 0xab,
+ 0x0f, 0xe6, 0x60, 0x12, 0x28, 0x20, 0x31, 0x98, 0xeb, 0xc4, 0x64, 0xdf,
+ 0xa0, 0xb2, 0x15, 0xcc, 0xfe, 0xa5, 0xeb, 0xdd, 0x74, 0x72, 0x45, 0x58,
+ 0x7f, 0x33, 0x00, 0x91, 0x41, 0x01, 0x8c, 0xc7, 0x5e, 0x23, 0x26, 0xfd,
+ 0x05, 0x90, 0xae, 0x67, 0xf5, 0x2f, 0x5e, 0xeb, 0xa3, 0x92, 0x2a, 0xc3,
+ 0xf9, 0x98, 0x04, 0x8a, 0x08, 0x0c, 0x66, 0x3a, 0xf1, 0x19, 0x37, 0xe8,
+ 0x2c, 0x85, 0x73, 0x3f, 0xa9, 0x7a, 0xf7, 0x5d, 0x1c, 0x91, 0x56, 0x1f,
+ 0xcc, 0xc0, 0x24, 0x50, 0x40, 0x63, 0x31, 0xd7, 0x88, 0xc9, 0xbf, 0x41,
+ 0x64, 0x2b, 0x99, 0xfd, 0x4b, 0xd7, 0xba, 0xe8, 0xe4, 0x8a, 0xb0, 0xfe,
+ 0x66, 0x01, 0x22, 0x82, 0x03, 0x19, 0x8e, 0xbc, 0x46, 0x4d, 0xfa, 0x0b,
+ 0x21, 0x5c, 0xcf, 0xea, 0x5e, 0xbd, 0xd7, 0x47, 0x24, 0x55, 0x87, 0xf3,
+ 0x30, 0x09, 0x14, 0x10, 0x18, 0xcc, 0x75, 0xe2, 0x32, 0x6f, 0xd0, 0x59,
+ 0x0a, 0xe6, 0x7f, 0x52, 0xf5, 0xee, 0xba, 0x39, 0x22, 0xac, 0x3f, 0x99,
+ 0x80, 0x48, 0xa0, 0x80, 0xc6, 0x63, 0xaf, 0x11, 0x93, 0x7e, 0x82, 0xc8,
+ 0x57, 0x33, 0xfa, 0x97, 0xaf, 0x75, 0xd1, 0xc9, 0x15, 0x61, 0xfc, 0xcc,
+ 0x02, 0x45, 0x04, 0x06, 0x33, 0x1d, 0x78, 0x8c, 0x9b, 0xf4, 0x16, 0x42,
+ 0xb9, 0x9f, 0xd4, 0xbd, 0x7b, 0xae, 0x8e, 0x48, 0xab, 0x0f, 0xe6, 0x60,
+ 0x12, 0x28, 0x20, 0x31, 0x98, 0xeb, 0xc4, 0x64, 0xdf, 0xa0, 0xb2, 0x15,
+ 0xcc, 0xfe, 0xa5, 0xeb, 0xdd, 0x74, 0x72, 0x45, 0x58, 0x7f, 0x33, 0x00,
+ 0x91, 0x41, 0x01, 0x8c, 0xc7, 0x5e, 0x23, 0x26, 0xfd, 0x05, 0x90, 0xae,
+ 0x67, 0xf5, 0x2f, 0x5e, 0xeb, 0xa3, 0x92, 0x2a, 0xc3, 0xf9, 0x98, 0x04,
+ 0x8a, 0x08, 0x0c, 0x66, 0x3a, 0xf1, 0x19, 0x37, 0xe8, 0x2c, 0x85, 0x73,
+ 0x3f, 0xa9, 0x7a, 0xf7, 0x5d, 0x1c, 0x91, 0x56, 0x1f, 0xcc, 0xc0, 0x24,
+ 0x50, 0x40, 0x63, 0x31, 0xd7, 0x88, 0xc9, 0xbf, 0x41, 0x64, 0x2b, 0x99,
+ 0xfd, 0x4b, 0xd7, 0xba, 0xe8, 0xe4, 0x8a, 0xb0, 0xfe, 0x66, 0x01, 0x22,
+ 0x82, 0x03, 0x19, 0x8e, 0xbc, 0x46, 0x4d, 0xfa, 0x0b, 0x21, 0x5c, 0xcf,
+ 0xea, 0x5e, 0xbd, 0xd7, 0x47, 0x24, 0x55, 0x87, 0xf3, 0x30, 0x09, 0x14,
+ 0x10, 0x18, 0xcc, 0x75, 0xe2, 0x32, 0x6f, 0xd0, 0x59, 0x0a, 0xe6, 0x7f,
+ 0x52, 0xf5, 0xee, 0xba, 0x39, 0x22, 0xac, 0x3f, 0x99, 0x80, 0x48, 0xa0,
+ 0x80, 0xc6, 0x63, 0xaf, 0x11, 0x93, 0x7e, 0x82, 0xc8, 0x57, 0x33, 0xfa,
+ 0x97, 0xaf, 0x75, 0xd1, 0xc9, 0x15, 0x61, 0xfc, 0xcc, 0x02, 0x45, 0x04,
+ 0x06, 0x33, 0x1d, 0x78, 0x8c, 0x9b, 0xf4, 0x16, 0x42, 0xb9, 0x9f, 0xd4,
+ 0xbd, 0x7b, 0xae, 0x8e, 0x48, 0xab, 0x0f, 0xe6, 0x60, 0x12, 0x28, 0x20,
+ 0x31, 0x98, 0xeb, 0xc4, 0x64, 0xdf, 0xa0, 0xb2, 0x15, 0xcc, 0xfe, 0xa5,
+ 0xeb, 0xdd, 0x74, 0x72, 0x45, 0x58, 0x7f, 0x33, 0x00, 0x91, 0x41, 0x01,
+ 0x8c, 0xc7, 0x5e, 0x23, 0x26, 0xfd, 0x05, 0x90, 0xae, 0x67, 0xf5, 0x2f,
+ 0x5e, 0xeb, 0xa3, 0x92, 0x2a, 0xc3, 0xf9, 0x98, 0x04, 0x8a, 0x08, 0x0c,
+ 0x66, 0x3a, 0xf1, 0x19, 0x37, 0xe8, 0x2c, 0x85, 0x73, 0x3f, 0xa9, 0x7a,
+ 0xf7, 0x5d, 0x1c, 0x91, 0x56, 0x1f, 0xcc, 0xc0, 0x24, 0x50, 0x40, 0x63,
+ 0x31, 0xd7, 0x88, 0xc9, 0xbf, 0x41, 0x64, 0x2b, 0x99, 0xfd, 0x4b, 0xd7,
+ 0xba, 0xe8, 0xe4, 0x8a, 0xb0, 0xfe, 0x66, 0x01, 0x22, 0x82, 0x03, 0x19,
+ 0x8e, 0xbc, 0x46, 0x4d, 0xfa, 0x0b, 0x21, 0x5c, 0xcf, 0xea, 0x5e, 0xbd,
+ 0xd7, 0x47, 0x24, 0x55, 0x87, 0xf3, 0x30, 0x09, 0x14, 0x10, 0x18, 0xcc,
+ 0x75, 0xe2, 0x32, 0x6f, 0xd0, 0x59, 0x0a, 0xe6, 0x7f, 0x52, 0xf5, 0xee,
+ 0xba, 0x39, 0x22, 0xac, 0x3f, 0x99, 0x80, 0x48, 0xa0, 0x80, 0xc6, 0x63,
+ 0xaf, 0x11, 0x93, 0x7e, 0x82, 0xc8, 0x57, 0x33, 0xfa, 0x97, 0xaf, 0x75,
+ 0xd1, 0xc9, 0x15, 0x61, 0xfc, 0xcc, 0x02, 0x45, 0x04, 0x06, 0x33, 0x1d,
+ 0x78, 0x8c, 0x9b, 0xf4, 0x16, 0x42, 0xb9, 0x9f, 0xd4, 0xbd, 0x7b, 0xae,
+ 0x8e, 0x48, 0xab, 0x0f, 0xe6, 0x60, 0x12, 0x28, 0x20, 0x31, 0x98, 0xeb,
+ 0xc4, 0x64, 0xdf, 0xa0, 0xb2, 0x15, 0xcc, 0xfe, 0xa5, 0xeb, 0xdd, 0x74,
+ 0x72, 0x45, 0x58, 0x7f, 0x33, 0x00, 0x91, 0x41, 0x01, 0x8c, 0xc7, 0x5e,
+ 0x23, 0x26, 0xfd, 0x05, 0x90, 0xae, 0x67, 0xf5, 0x2f, 0x5e, 0xeb, 0xa3,
+ 0x92, 0x2a, 0xc3, 0xf9, 0x98, 0x04, 0x8a, 0x08, 0x0c, 0x66, 0x3a, 0xf1,
+ 0x19, 0x37, 0xe8, 0x2c, 0x85, 0x73, 0x3f, 0xa9, 0x7a, 0xf7, 0x5d, 0x1c,
+ 0x91, 0x56, 0x1f, 0xcc, 0xc0, 0x24, 0x50, 0x40, 0x63, 0x31, 0xd7, 0x88,
+ 0xc9, 0xbf, 0x41, 0x64, 0x2b, 0x99, 0xfd, 0x4b, 0xd7, 0xba, 0xe8, 0xe4,
+ 0x8a, 0xb0, 0xfe, 0x66, 0x01, 0x22, 0x82, 0x03, 0x19, 0x8e, 0xbc, 0x46,
+ 0x4d, 0xfa, 0x0b, 0x21, 0x5c, 0xcf, 0xea, 0x5e, 0xbd, 0xd7, 0x47, 0x24,
+ 0x55, 0x87, 0xf3, 0x30, 0x09, 0x14, 0x10, 0x18, 0xcc, 0x75, 0xe2, 0x32,
+ 0x6f, 0xd0, 0x59, 0x0a, 0xe6, 0x7f, 0x52, 0xf5, 0xee, 0xba, 0x39, 0x22,
+ 0xac, 0x3f, 0x99, 0x80, 0x48, 0xa0, 0x80, 0xc6, 0x63, 0xaf, 0x11, 0x93,
+ 0x7e, 0x82, 0xc8, 0x57, 0x33, 0xfa, 0x97, 0xaf, 0x75, 0xd1, 0xc9, 0x15,
+ 0x61, 0xfc, 0xcc, 0x02, 0x45, 0x04, 0x06, 0x33, 0x1d, 0x78, 0x8c, 0x9b,
+ 0xf4, 0x16, 0x42, 0xb9, 0x9f, 0xd4, 0xbd, 0x7b, 0xae, 0x8e, 0x48, 0xab,
+ 0x0f, 0xe6, 0x60, 0x12, 0x28, 0x20, 0x31, 0x98, 0xeb, 0xc4, 0x64, 0xdf,
+ 0xa0, 0xb2, 0x15, 0xcc, 0xfe, 0xa5, 0xeb, 0xdd, 0x74, 0x72, 0x45, 0x58,
+ 0x7f, 0x33, 0x00, 0x91, 0x41, 0x01, 0x8c, 0xc7, 0x5e, 0x23, 0x26, 0xfd,
+ 0x05, 0x90, 0xae, 0x67, 0xf5, 0x2f, 0x5e, 0xeb, 0xa3, 0x92, 0x2a, 0xc3,
+ 0xf9, 0x98, 0x04, 0x8a, 0x08, 0x0c, 0x66, 0x3a, 0xf1, 0x19, 0x37, 0xe8,
+ 0x2c, 0x85, 0x73, 0x3f, 0xa9, 0x7a, 0xf7, 0x5d, 0x1c, 0x91, 0x56, 0x1f,
+ 0xcc, 0xc0, 0x24, 0x50, 0x40, 0x63, 0x31, 0xd7, 0x88, 0xc9, 0xbf, 0x41,
+ 0x64, 0x2b, 0x99, 0xfd, 0x4b, 0xd7, 0xba, 0xe8, 0xe4, 0x8a, 0xb0, 0xfe,
+ 0x66, 0x01, 0x22, 0x82, 0x03, 0x19, 0x8e, 0xbc, 0x46, 0x4d, 0xfa, 0x0b,
+ 0x21, 0x5c, 0xcf, 0xea, 0x5e, 0xbd, 0xd7, 0x47, 0x24, 0x55, 0x87, 0xf3,
+ 0x30, 0x09, 0x14, 0x10, 0x18, 0xcc, 0x75, 0xe2, 0x32, 0x6f, 0xd0, 0x59,
+ 0x0a, 0xe6, 0x7f, 0x52, 0xf5, 0xee, 0xba, 0x39, 0x22, 0xac, 0x3f, 0x99,
+ 0x80, 0x48, 0xa0, 0x80, 0xc6, 0x63, 0xaf, 0x11, 0x93, 0x7e, 0x82, 0xc8,
+ 0x57, 0x33, 0xfa, 0x97, 0xaf, 0x75, 0xd1, 0xc9, 0x15, 0x61, 0xfc, 0xcc,
+ 0x02, 0x45, 0x04, 0x06, 0x33, 0x1d, 0x78, 0x8c, 0x9b, 0xf4, 0x16, 0x42,
+ 0xb9, 0x9f, 0xd4, 0xbd, 0x7b, 0xae, 0x8e, 0x48, 0xab, 0x0f, 0xe6, 0x60,
+ 0x12, 0x28, 0x20, 0x31, 0x98, 0xeb, 0xc4, 0x64, 0xdf, 0xa0, 0xb2, 0x15,
+ 0xcc, 0xfe, 0xa5, 0xeb, 0xdd, 0x74, 0x72, 0x45, 0x58, 0x7f, 0x33, 0x00,
+ 0x91, 0x41, 0x01, 0x8c, 0xc7, 0x5e, 0x23, 0x26, 0xfd, 0x05, 0x90, 0xae,
+ 0x67, 0xf5, 0x2f, 0x5e, 0xeb, 0xa3, 0x92, 0x2a, 0xc3, 0xf9, 0x98, 0x04,
+ 0x8a, 0x08, 0x0c, 0x66, 0x3a, 0xf1, 0x19, 0x37, 0xe8, 0x2c, 0x85, 0x73,
+ 0x3f, 0xa9, 0x7a, 0xf7, 0x5d, 0x1c, 0x91, 0x56, 0x1f, 0xcc, 0xc0, 0x24,
+ 0x50, 0x40, 0x63, 0x31, 0xd7, 0x88, 0xc9, 0xbf, 0x41, 0x64, 0x2b, 0x99,
+ 0xfd, 0x4b, 0xd7, 0xba, 0xe8, 0xe4, 0x8a, 0xb0, 0xfe, 0x66, 0x01, 0x22,
+ 0x82, 0x03, 0x19, 0x8e, 0xbc, 0x46, 0x4d, 0xfa, 0x0b, 0x21, 0x5c, 0xcf,
+ 0xea, 0x5e, 0xbd, 0xd7, 0x47, 0x24, 0x55, 0x87, 0xf3, 0x30, 0x09, 0x14,
+ 0x10, 0x18, 0xcc, 0x75, 0xe2, 0x32, 0x6f, 0xd0, 0x59, 0x0a, 0xe6, 0x7f,
+ 0x52, 0xf5, 0xee, 0xba, 0x39, 0x22, 0xac, 0x3f, 0x99, 0x80, 0x48, 0xa0,
+ 0x80, 0xc6, 0x63, 0xaf, 0x11, 0x93, 0x7e, 0x82, 0xc8, 0x57, 0x33, 0xfa,
+ 0x97, 0xaf, 0x75, 0xd1, 0xc9, 0x15, 0x61, 0xfc, 0xcc, 0x02, 0x45, 0x04,
+ 0x06, 0x33, 0x1d, 0x78, 0x8c, 0x9b, 0xf4, 0x16, 0x42, 0xb9, 0x9f, 0xd4,
+ 0xbd, 0x7b, 0xae, 0x8e, 0x48, 0xab, 0x0f, 0xe6, 0x60, 0x12, 0x28, 0x20,
+ 0x31, 0x98, 0xeb, 0xc4, 0x64, 0xdf, 0xa0, 0xb2, 0x15, 0xcc, 0xfe, 0xa5,
+ 0xeb, 0xdd, 0x74, 0x72, 0x45, 0x58, 0x7f, 0x33, 0x00, 0x91, 0x41, 0x01,
+ 0x8c, 0xc7, 0x5e, 0x23, 0x26, 0xfd, 0x05, 0x90, 0xae, 0x67, 0xf5, 0x2f,
+ 0x5e, 0xeb, 0xa3, 0x92, 0x2a, 0xc3, 0xf9, 0x98, 0x04, 0x8a, 0x08, 0x0c,
+ 0x66, 0x3a, 0xf1, 0x19, 0x37, 0xe8, 0x2c, 0x85, 0x73, 0x3f, 0xa9, 0x7a,
+ 0xf7, 0x5d, 0x1c, 0x91, 0x56, 0x1f, 0xcc, 0xc0, 0x24, 0x50, 0x40, 0x63,
+ 0x31, 0xd7, 0x88, 0xc9, 0xbf, 0x41, 0x64, 0x2b, 0x99, 0xfd, 0x4b, 0xd7,
+ 0xba, 0xe8, 0xe4, 0x8a, 0xb0, 0xfe, 0x66, 0x01, 0x22, 0x82, 0x03, 0x19,
+ 0x8e, 0xbc, 0x46, 0x4d, 0xfa, 0x0b, 0x21, 0x5c, 0xcf, 0xea, 0x5e, 0xbd,
+ 0xd7, 0x47, 0x24, 0x55, 0x87, 0xf3, 0x30, 0x09, 0x14, 0x10, 0x18, 0xcc,
+ 0x75, 0xe2, 0x32, 0x6f, 0xd0, 0x59, 0x0a, 0xe6, 0x7f, 0x52, 0xf5, 0xee,
+ 0xba, 0x39, 0x22, 0xac, 0x3f, 0x99, 0x80, 0x48, 0xa0, 0x80, 0xc6, 0x63,
+ 0xaf, 0x11, 0x93, 0x7e, 0x82, 0xc8, 0x57, 0x33, 0xfa, 0x97, 0xaf, 0x75,
+ 0xd1, 0xc9, 0x15, 0x61, 0xfc, 0xcc, 0x02, 0x45, 0x04, 0x06, 0x33, 0x1d,
+ 0x78, 0x8c, 0x9b, 0xf4, 0x16, 0x42, 0xb9, 0x9f, 0xd4, 0xbd, 0x7b, 0xae,
+ 0x8e, 0x48, 0xab, 0x0f, 0xe6, 0x60, 0x12, 0x28, 0x20, 0x31, 0x98, 0xeb,
+ 0xc4, 0x64, 0xdf, 0xa0, 0xb2, 0x15, 0xcc, 0xfe, 0xa5, 0xeb, 0xdd, 0x74,
+ 0x72, 0x45, 0x58, 0x7f, 0x33, 0x00, 0x91, 0x41, 0x01, 0x8c, 0xc7, 0x5e,
+ 0x23, 0x26, 0xfd, 0x05, 0x90, 0xae, 0x67, 0xf5, 0x2f, 0x5e, 0xeb, 0xa3,
+ 0x92, 0x2a, 0xc3, 0xf9, 0x98, 0x04, 0x8a, 0x08, 0x0c, 0x66, 0x3a, 0xf1,
+ 0x19, 0x37, 0xe8, 0x2c, 0x85, 0x73, 0x3f, 0xa9, 0x7a, 0xf7, 0x5d, 0x1c,
+ 0x91, 0x56, 0x1f, 0xcc, 0xc0, 0x24, 0x50, 0x40, 0x63, 0x31, 0xd7, 0x88,
+ 0xc9, 0xbf, 0x41, 0x64, 0x2b, 0x99, 0xfd, 0x4b, 0xd7, 0xba, 0xe8, 0xe4,
+ 0x8a, 0xb0, 0xfe, 0x66, 0x01, 0x22, 0x82, 0x03, 0x19, 0x8e, 0xbc, 0x46,
+ 0x4d, 0xfa, 0x0b, 0x21, 0x5c, 0xcf, 0xea, 0x5e, 0xbd, 0xd7, 0x47, 0x24,
+ 0x55, 0x87, 0xf3, 0x30, 0x09, 0x14, 0x10, 0x18, 0xcc, 0x75, 0xe2, 0x32,
+ 0x6f, 0xd0, 0x59, 0x0a, 0xe6, 0x7f, 0x52, 0xf5, 0xee, 0xba, 0x39, 0x22,
+ 0xac, 0x3f, 0x99, 0x80, 0x48, 0xa0, 0x80, 0xc6, 0x63, 0xaf, 0x11, 0x93,
+ 0x7e, 0x82, 0xc8, 0x57, 0x33, 0xfa, 0x97, 0xaf, 0x75, 0xd1, 0xc9, 0x15,
+ 0x61, 0xfc, 0xcc, 0x02, 0x45, 0x04, 0x06, 0x33, 0x1d, 0x78, 0x8c, 0x9b,
+ 0xf4, 0x16, 0x42, 0xb9, 0x9f, 0xd4, 0xbd, 0x7b, 0xae, 0x8e, 0x48, 0xab,
+ 0x0f, 0xe6, 0x60, 0x12, 0x28, 0x20, 0x31, 0x98, 0xeb, 0xc4, 0x64, 0xdf,
+ 0xa0, 0xb2, 0x15, 0xcc, 0xfe, 0xa5, 0xeb, 0xdd, 0x74, 0x72, 0x45, 0x58,
+ 0x7f, 0x33, 0x00, 0x91, 0x41, 0x01, 0x8c, 0xc7, 0x5e, 0x23, 0x26, 0xfd,
+ 0x05, 0x90, 0xae, 0x67, 0xf5, 0x2f, 0x5e, 0xeb, 0xa3, 0x92, 0x2a, 0xc3,
+ 0xf9, 0x98, 0x04, 0x8a, 0x08, 0x0c, 0x66, 0x3a, 0xf1, 0x19, 0x37, 0xe8,
+ 0x2c, 0x85, 0x73, 0x3f, 0xa9, 0x7a, 0xf7, 0x5d, 0x1c, 0x91, 0x56, 0x1f,
+ 0xcc, 0xc0, 0x24, 0x50, 0x40, 0x63, 0x31, 0xd7, 0x88, 0xc9, 0xbf, 0x41,
+ 0x64, 0x2b, 0x99, 0xfd, 0x4b, 0xd7, 0xba, 0xe8, 0xe4, 0x8a, 0xb0, 0xfe,
+ 0x66, 0x01, 0x22, 0x82, 0x03, 0x19, 0x8e, 0xbc, 0x46, 0x4d, 0xfa, 0x0b,
+ 0x21, 0x5c, 0xcf, 0xea, 0x5e, 0xbd, 0xd7, 0x47, 0x24, 0x55, 0x87, 0xf3,
+ 0x30, 0x09, 0x14, 0x10, 0x18, 0xcc, 0x75, 0xe2, 0x32, 0x6f, 0xd0, 0x59,
+ 0x0a, 0xe6, 0x7f, 0x52, 0xf5, 0xee, 0xba, 0x39, 0x22, 0xac, 0x3f, 0x99,
+ 0x80, 0x48, 0xa0, 0x80, 0xc6, 0x63, 0xaf, 0x11, 0x93, 0x7e, 0x82, 0xc8,
+ 0x57, 0x33, 0xfa, 0x97, 0xaf, 0x75, 0xd1, 0xc9, 0x15, 0x61, 0xfc, 0xcc,
+ 0x02, 0x45, 0x04, 0x06, 0x33, 0x1d, 0x78, 0x8c, 0x9b, 0xf4, 0x16, 0x42,
+ 0xb9, 0x9f, 0xd4, 0xbd, 0x7b, 0xae, 0x8e, 0x48, 0xab, 0x0f, 0xe6, 0x60,
+ 0x12, 0x28, 0x20, 0x31, 0x98, 0xeb, 0xc4, 0x64, 0xdf, 0xa0, 0xb2, 0x15,
+ 0xcc, 0xfe, 0xa5, 0xeb, 0xdd, 0x74, 0x72, 0x45, 0x58, 0x7f, 0x33, 0x00,
+ 0x91, 0x41, 0x01, 0x8c, 0xc7, 0x5e, 0x23, 0x26, 0xfd, 0x05, 0x90, 0xae,
+ 0x67, 0xf5, 0x2f, 0x5e, 0xeb, 0xa3, 0x92, 0x2a, 0xc3, 0xf9, 0x98, 0x04,
+ 0x8a, 0x08, 0x0c, 0x66, 0x3a, 0xf1, 0x19, 0x37, 0xe8, 0x2c, 0x85, 0x73,
+ 0x3f, 0xa9, 0x7a, 0xf7, 0x5d, 0x1c, 0x91, 0x56, 0x1f, 0xcc, 0xc0, 0x24,
+ 0x50, 0x40, 0x63, 0x31, 0xd7, 0x88, 0xc9, 0xbf, 0x41, 0x64, 0x2b, 0x99,
+ 0xfd, 0x4b, 0xd7, 0xba, 0xe8, 0xe4, 0x8a, 0xb0, 0xfe, 0x66, 0x01, 0x22,
+ 0x82, 0x03, 0x19, 0x8e, 0xbc, 0x46, 0x4d, 0xfa, 0x0b, 0x21, 0x5c, 0xcf,
+ 0xea, 0x5e, 0xbd, 0xd7, 0x47, 0x24, 0x55, 0x87, 0xf3, 0x30, 0x09, 0x14,
+ 0x10, 0x18, 0xcc, 0x75, 0xe2, 0x32, 0x6f, 0xd0, 0x59, 0x0a, 0xe6, 0x7f,
+ 0x52, 0xf5, 0xee, 0xba, 0x39, 0x22, 0xac, 0x3f, 0x99, 0x80, 0x48, 0xa0,
+ 0x80, 0xc6, 0x63, 0xaf, 0x11, 0x93, 0x7e, 0x82, 0xc8, 0x57, 0x33, 0xfa,
+ 0x97, 0xaf, 0x75, 0xd1, 0xc9, 0x15, 0x61, 0xfc, 0xcc, 0x02, 0x45, 0x04,
+ 0x06, 0x33, 0x1d, 0x78, 0x8c, 0x9b, 0xf4, 0x16, 0x42, 0xb9, 0x9f, 0xd4,
+ 0xbd, 0x7b, 0xae, 0x8e, 0x48, 0xab, 0x0f, 0xe6, 0x60, 0x12, 0x28, 0x20,
+ 0x31, 0x98, 0xeb, 0xc4, 0x64, 0xdf, 0xa0, 0xb2, 0x15, 0xcc, 0xfe, 0xa5,
+ 0xeb, 0xdd, 0x74, 0x72, 0x45, 0x58, 0x7f, 0x33, 0x00, 0x91, 0x41, 0x01,
+ 0x8c, 0xc7, 0x5e, 0x23, 0x26, 0xfd, 0x05, 0x90, 0xae, 0x67, 0xf5, 0x2f,
+ 0x5e, 0xeb, 0xa3, 0x92, 0x2a, 0xc3, 0xf9, 0x98, 0x04, 0x8a, 0x08, 0x0c,
+ 0x66, 0x3a, 0xf1, 0x19, 0x37, 0xe8, 0x2c, 0x85, 0x73, 0x3f, 0xa9, 0x7a,
+ 0xf7, 0x5d, 0x1c, 0x91, 0x56, 0x1f, 0xcc, 0xc0, 0x24, 0x50, 0x40, 0x63,
+ 0x31, 0xd7, 0x88, 0xc9, 0xbf, 0x41, 0x64, 0x2b, 0x99, 0xfd, 0x4b, 0xd7,
+ 0xba, 0xe8, 0xe4, 0x8a, 0xb0, 0xfe, 0x66, 0x01, 0x22, 0x82, 0x03, 0x19,
+ 0x8e, 0xbc, 0x46, 0x4d, 0xfa, 0x0b, 0x21, 0x5c, 0xcf, 0xea, 0x5e, 0xbd,
+ 0xd7, 0x47, 0x24, 0x55, 0x87, 0xf3, 0x30, 0x09, 0x14, 0x10, 0x18, 0xcc,
+ 0x75, 0xe2, 0x32, 0x6f, 0xd0, 0x59, 0x0a, 0xe6, 0x7f, 0x52, 0xf5, 0xee,
+ 0xba, 0x39, 0x22, 0xac, 0x3f, 0x99, 0x80, 0x48, 0xa0, 0x80, 0xc6, 0x63,
+ 0xaf, 0x11, 0x93, 0x7e, 0x82, 0xc8, 0x57, 0x33, 0xfa, 0x97, 0xaf, 0x75,
+ 0xd1, 0xc9, 0x15, 0x61, 0xfc, 0xcc, 0x02, 0x45, 0x04, 0x06, 0x33, 0x1d,
+ 0x78, 0x8c, 0x9b, 0xf4, 0x16, 0x42, 0xb9, 0x9f, 0xd4, 0xbd, 0x7b, 0xae,
+ 0x8e, 0x48, 0xab, 0x0f, 0xe6, 0x60, 0x12, 0x28, 0x20, 0x31, 0x98, 0xeb,
+ 0xc4, 0x64, 0xdf, 0xa0, 0xb2, 0x15, 0xcc, 0xfe, 0xa5, 0xeb, 0xdd, 0x74,
+ 0x72, 0x45, 0x58, 0x7f, 0x33, 0x00, 0x91, 0x41, 0x01, 0x8c, 0xc7, 0x5e,
+ 0x23, 0x26, 0xfd, 0x05, 0x90, 0xae, 0x67, 0xf5, 0x2f, 0x5e, 0xeb, 0xa3,
+ 0x92, 0x2a, 0xc3, 0xf9, 0x98, 0x04, 0x8a, 0x08, 0x0c, 0x66, 0x3a, 0xf1,
+ 0x19, 0x37, 0xe8, 0x2c, 0x85, 0x73, 0x3f, 0xa9, 0x7a, 0xf7, 0x5d, 0x1c,
+ 0x91, 0x56, 0x1f, 0xcc, 0xc0, 0x24, 0x50, 0x40, 0x63, 0x31, 0xd7, 0x88,
+ 0xc9, 0xbf, 0x41, 0x64, 0x2b, 0x99, 0xfd, 0x4b, 0xd7, 0xba, 0xe8, 0xe4,
+ 0x8a, 0xb0, 0xfe, 0x66, 0x01, 0x22, 0x82, 0x03, 0x19, 0x8e, 0xbc, 0x46,
+ 0x4d, 0xfa, 0x0b, 0x21, 0x5c, 0xcf, 0xea, 0x5e, 0xbd, 0xd7, 0x47, 0x24,
+ 0x55, 0x87, 0xf3, 0x30, 0x09, 0x14, 0x10, 0x18, 0xcc, 0x75, 0xe2, 0x32,
+ 0x6f, 0xd0, 0x59, 0x0a, 0xe6, 0x7f, 0x52, 0xf5, 0xee, 0xba, 0x39, 0x22,
+ 0xac, 0x3f, 0x99, 0x80, 0x48, 0xa0, 0x80, 0xc6, 0x63, 0xaf, 0x11, 0x93,
+ 0x7e, 0x82, 0xc8, 0x57, 0x33, 0xfa, 0x97, 0xaf, 0x75, 0xd1, 0xc9, 0x15,
+ 0x61, 0xfc, 0xcc, 0x02, 0x45, 0x04, 0x06, 0x33, 0x1d, 0x78, 0x8c, 0x9b,
+ 0xf4, 0x16, 0x42, 0xb9, 0x9f, 0xd4, 0xbd, 0x7b, 0xae, 0x8e, 0x48, 0xab,
+ 0x0f, 0xe6, 0x60, 0x12, 0x28, 0x20, 0x31, 0x98, 0xeb, 0xc4, 0x64, 0xdf,
+ 0xa0, 0xb2, 0x15, 0xcc, 0xfe, 0xa5, 0xeb, 0xdd, 0x74, 0x72, 0x45, 0x58,
+ 0x7f, 0x33, 0x00, 0x91, 0x41, 0x01, 0x8c, 0xc7, 0x5e, 0x23, 0x26, 0xfd,
+ 0x05, 0x90, 0xae, 0x67, 0xf5, 0x2f, 0x5e, 0xeb, 0xa3, 0x92, 0x2a, 0xc3,
+ 0xf9, 0x98, 0x04, 0x8a, 0x08, 0x0c, 0x66, 0x3a, 0xf1, 0x19, 0x37, 0xe8,
+ 0x2c, 0x85, 0x73, 0x3f, 0xa9, 0x7a, 0xf7, 0x5d, 0x1c, 0x91, 0x56, 0x1f,
+ 0xcc, 0xc0, 0x24, 0x50, 0x40, 0x63, 0x31, 0xd7, 0x88, 0xc9, 0xbf, 0x41,
+ 0x64, 0x2b, 0x99, 0xfd, 0x4b, 0xd7, 0xba, 0xe8, 0xe4, 0x8a, 0xb0, 0xfe,
+ 0x66, 0x01, 0x22, 0x82, 0x03, 0x19, 0x8e, 0xbc, 0x46, 0x4d, 0xfa, 0x0b,
+ 0x21, 0x5c, 0xcf, 0xea, 0x5e, 0xbd, 0xd7, 0x47, 0x24, 0x55, 0x87, 0xf3,
+ 0x30, 0x09, 0x14, 0x10, 0x18, 0xcc, 0x75, 0xe2, 0x32, 0x6f, 0xd0, 0x59,
+ 0x0a, 0xe6, 0x7f, 0x52, 0xf5, 0xee, 0xba, 0x39, 0x22, 0xac, 0x3f, 0x99,
+ 0x80, 0x48, 0xa0, 0x80, 0xc6, 0x63, 0xaf, 0x11, 0x93, 0x7e, 0x82, 0xc8,
+ 0x57, 0x33, 0xfa, 0x97, 0xaf, 0x75, 0xd1, 0xc9, 0x15, 0x61, 0xfc, 0xcc,
+ 0x02, 0x45, 0x04, 0x06, 0x33, 0x1d, 0x78, 0x8c, 0x9b, 0xf4, 0x16, 0x42,
+ 0xb9, 0x9f, 0xd4, 0xbd, 0x7b, 0xae, 0x8e, 0x48, 0xab, 0x0f, 0xe6, 0x60,
+ 0x12, 0x28, 0x20, 0x31, 0x98, 0xeb, 0xc4, 0x64, 0xdf, 0xa0, 0xb2, 0x15,
+ 0xcc, 0xfe, 0xa5, 0xeb, 0xdd, 0x74, 0x72, 0x45, 0x58, 0x7f, 0x33, 0x00,
+ 0x91, 0x41, 0x01, 0x8c, 0xc7, 0x5e, 0x23, 0x26, 0xfd, 0x05, 0x90, 0xae,
+ 0x67, 0xf5, 0x2f, 0x5e, 0xeb, 0xa3, 0x92, 0x2a, 0xc3, 0xf9, 0x98, 0x04,
+ 0x8a, 0x08, 0x0c, 0x66, 0x3a, 0xf1, 0x19, 0x37, 0xe8, 0x2c, 0x85, 0x73,
+ 0x3f, 0xa9, 0x7a, 0xf7, 0x5d, 0x1c, 0x91, 0x56, 0x1f, 0xcc, 0xc0, 0x24,
+ 0x50, 0x40, 0x63, 0x31, 0xd7, 0x88, 0xc9, 0xbf, 0x41, 0x64, 0x2b, 0x99,
+ 0xfd, 0x4b, 0xd7, 0xba, 0xe8, 0xe4, 0x8a, 0xb0, 0xfe, 0x66, 0x01, 0x22,
+ 0x82, 0x03, 0x19, 0x8e, 0xbc, 0x46, 0x4d, 0xfa, 0x0b, 0x21, 0x5c, 0xcf,
+ 0xea, 0x5e, 0xbd, 0xd7, 0x47, 0x24, 0x55, 0x87, 0xf3, 0x30, 0x09, 0x14,
+ 0x10, 0x18, 0xcc, 0x75, 0xe2, 0x32, 0x6f, 0xd0, 0x59, 0x0a, 0xe6, 0x7f,
+ 0x52, 0xf5, 0xee, 0xba, 0x39, 0x22, 0xac, 0x3f, 0x99, 0x80, 0x48, 0xa0,
+ 0x80, 0xc6, 0x63, 0xaf, 0x11, 0x93, 0x7e, 0x82, 0xc8, 0x57, 0x33, 0xfa,
+ 0x97, 0xaf, 0x75, 0xd1, 0xc9, 0x15, 0x61, 0xfc, 0xcc, 0x02, 0x45, 0x04,
+ 0x06, 0x33, 0x1d, 0x78, 0x8c, 0x9b, 0xf4, 0x16, 0x42, 0xb9, 0x9f, 0xd4,
+ 0xbd, 0x7b, 0xae, 0x8e, 0x48, 0xab, 0x0f, 0xe6, 0x60, 0x12, 0x28, 0x20,
+ 0x31, 0x98, 0xeb, 0xc4, 0x64, 0xdf, 0xa0, 0xb2, 0x15, 0xcc, 0xfe, 0xa5,
+ 0xeb, 0xdd, 0x74, 0x72, 0x45, 0x58, 0x7f, 0x33, 0x00, 0x91, 0x41, 0x01,
+ 0x8c, 0xc7, 0x5e, 0x23, 0x26, 0xfd, 0x05, 0x90, 0xae, 0x67, 0xf5, 0x2f,
+ 0x5e, 0xeb, 0xa3, 0x92, 0x2a, 0xc3, 0xf9, 0x98, 0x04, 0x8a, 0x08, 0x0c,
+ 0x66, 0x3a, 0xf1, 0x19, 0x37, 0xe8, 0x2c, 0x85, 0x73, 0x3f, 0xa9, 0x7a,
+ 0xf7, 0x5d, 0x1c, 0x91, 0x56, 0x1f, 0xcc, 0xc0, 0x24, 0x50, 0x40, 0x63,
+ 0x31, 0xd7, 0x88, 0xc9, 0xbf, 0x41, 0x64, 0x2b, 0x99, 0xfd, 0x4b, 0xd7,
+ 0xba, 0xe8, 0xe4, 0x8a, 0xb0, 0xfe, 0x66, 0x01, 0x22, 0x82, 0x03, 0x19,
+ 0x8e, 0xbc, 0x46, 0x4d, 0xfa, 0x0b, 0x21, 0x5c, 0xcf, 0xea, 0x5e, 0xbd,
+ 0xd7, 0x47, 0x24, 0x55, 0x87, 0xf3, 0x30, 0x09, 0x14, 0x10, 0x18, 0xcc,
+ 0x75, 0xe2, 0x32, 0x6f, 0xd0, 0x59, 0x0a, 0xe6, 0x7f, 0x52, 0xf5, 0xee,
+ 0xba, 0x39, 0x22, 0xac, 0x3f, 0x99, 0x80, 0x48, 0xa0, 0x80, 0xc6, 0x63,
+ 0xaf, 0x11, 0x93, 0x7e, 0x82, 0xc8, 0x57, 0x33, 0xfa, 0x97, 0xaf, 0x75,
+ 0xd1, 0xc9, 0x15, 0x61, 0xfc, 0xcc, 0x02, 0x45, 0x04, 0x06, 0x33, 0x1d,
+ 0x78, 0x8c, 0x9b, 0xf4, 0x16, 0x42, 0xb9, 0x9f, 0xd4, 0xbd, 0x7b, 0xae,
+ 0x8e, 0x48, 0xab, 0x0f, 0xe6, 0x60, 0x12, 0x28, 0x20, 0x31, 0x98, 0xeb,
+ 0xc4, 0x64, 0xdf, 0xa0, 0xb2, 0x15, 0xcc, 0xfe, 0xa5, 0xeb, 0xdd, 0x74,
+ 0x72, 0x45, 0x58, 0x7f, 0x33, 0x00, 0x91, 0x41, 0x01, 0x8c, 0xc7, 0x5e,
+ 0x23, 0x26, 0xfd, 0x05, 0x90, 0xae, 0x67, 0xf5, 0x2f, 0x5e, 0xeb, 0xa3,
+ 0x92, 0x2a, 0xc3, 0xf9, 0x98, 0x04, 0x8a, 0x08, 0x0c, 0x66, 0x3a, 0xf1,
+ 0x19, 0x37, 0xe8, 0x2c, 0x85, 0x73, 0x3f, 0xa9, 0x7a, 0xf7, 0x5d, 0x1c,
+ 0x91, 0x56, 0x1f, 0xcc, 0xc0, 0x24, 0x50, 0x40, 0x63, 0x31, 0xd7, 0x88,
+ 0xc9, 0xbf, 0x41, 0x64, 0x2b, 0x99, 0xfd, 0x4b, 0xd7, 0xba, 0xe8, 0xe4,
+ 0x8a, 0xb0, 0xfe, 0x66, 0x01, 0x22, 0x82, 0x03, 0x19, 0x8e, 0xbc, 0x46,
+ 0x4d, 0xfa, 0x0b, 0x21, 0x5c, 0xcf, 0xea, 0x5e, 0xbd, 0xd7, 0x47, 0x24,
+ 0x55, 0x87, 0xf3, 0x30, 0x09, 0x14, 0x10, 0x18, 0xcc, 0x75, 0xe2, 0x32,
+ 0x6f, 0xd0, 0x59, 0x0a, 0xe6, 0x7f, 0x52, 0xf5, 0xee, 0xba, 0x39, 0x22,
+ 0xac, 0x3f, 0x99, 0x80, 0x48, 0xa0, 0x80, 0xc6, 0x63, 0xaf, 0x11, 0x93,
+ 0x7e, 0x82, 0xc8, 0x57, 0x33, 0xfa, 0x97, 0xaf, 0x75, 0xd1, 0xc9, 0x15,
+ 0x61, 0xfc, 0xcc, 0x02, 0x45, 0x04, 0x06, 0x33, 0x1d, 0x78, 0x8c, 0x9b,
+ 0xf4, 0x16, 0x42, 0xb9, 0x9f, 0xd4, 0xbd, 0x7b, 0xae, 0x8e, 0x48, 0xab,
+ 0x0f, 0xe6, 0x60, 0x12, 0x28, 0x20, 0x31, 0x98, 0xeb, 0xc4, 0x64, 0xdf,
+ 0xa0, 0xb2, 0x15, 0xcc, 0xfe, 0xa5, 0xeb, 0xdd, 0x74, 0x72, 0x45, 0x58,
+ 0x7f, 0x33, 0x00, 0x91, 0x41, 0x01, 0x8c, 0xc7, 0x5e, 0x23, 0x26, 0xfd,
+ 0x05, 0x90, 0xae, 0x67, 0xf5, 0x2f, 0x5e, 0xeb, 0xa3, 0x92, 0x2a, 0xc3,
+ 0xf9, 0x98, 0x04, 0x8a, 0x08, 0x0c, 0x66, 0x3a, 0xf1, 0x19, 0x37, 0xe8,
+ 0x2c, 0x85, 0x73, 0x3f, 0xa9, 0x7a, 0xf7, 0x5d, 0x1c, 0x91, 0x56, 0x1f,
+ 0xcc, 0xc0, 0x24, 0x50, 0x40, 0x63, 0x31, 0xd7, 0x88, 0xc9, 0xbf, 0x41,
+ 0x64, 0x2b, 0x99, 0xfd, 0x4b, 0xd7, 0xba, 0xe8, 0xe4, 0x8a, 0xb0, 0xfe,
+ 0x66, 0x01, 0x22, 0x82, 0x03, 0x19, 0x8e, 0xbc, 0x46, 0x4d, 0xfa, 0x0b,
+ 0x21, 0x5c, 0xcf, 0xea, 0x5e, 0xbd, 0xd7, 0x47, 0x24, 0x55, 0x87, 0xf3,
+ 0x30, 0x09, 0x14, 0x10, 0x18, 0xcc, 0x75, 0xe2, 0x32, 0x6f, 0xd0, 0x59,
+ 0x0a, 0xe6, 0x7f, 0x52, 0xf5, 0xee, 0xba, 0x39, 0x22, 0xac, 0x3f, 0x99,
+ 0x80, 0x48, 0xa0, 0x80, 0xc6, 0x63, 0xaf, 0x11, 0x93, 0x7e, 0x82, 0xc8,
+ 0x57, 0x33, 0xfa, 0x97, 0xaf, 0x75, 0xd1, 0xc9, 0x15, 0x61, 0xfc, 0xcc,
+ 0x02, 0x45, 0x04, 0x06, 0x33, 0x1d, 0x78, 0x8c, 0x9b, 0xf4, 0x16, 0x42,
+ 0xb9, 0x9f, 0xd4, 0xbd, 0x7b, 0xae, 0x8e, 0x48, 0xab, 0x0f, 0xe6, 0x60,
+ 0x12, 0x28, 0x20, 0x31, 0x98, 0xeb, 0xc4, 0x64, 0xdf, 0xa0, 0xb2, 0x15,
+ 0xcc, 0xfe, 0xa5, 0xeb, 0xdd, 0x74, 0x72, 0x45, 0x58, 0x7f, 0x33, 0x00,
+ 0x91, 0x41, 0x01, 0x8c, 0xc7, 0x5e, 0x23, 0x26, 0xfd, 0x05, 0x90, 0xae,
+ 0x67, 0xf5, 0x2f, 0x5e, 0xeb, 0xa3, 0x92, 0x2a, 0xc3, 0xf9, 0x98, 0x04,
+ 0x8a, 0x08, 0x0c, 0x66, 0x3a, 0xf1, 0x19, 0x37, 0xe8, 0x2c, 0x85, 0x73,
+ 0x3f, 0xa9, 0x7a, 0xf7, 0x5d, 0x1c, 0x91, 0x56, 0x1f, 0xcc, 0xc0, 0x24,
+ 0x50, 0x40, 0x63, 0x31, 0xd7, 0x88, 0xc9, 0xbf, 0x41, 0x64, 0x2b, 0x99,
+ 0xfd, 0x4b, 0xd7, 0xba, 0xe8, 0xe4, 0x8a, 0xb0, 0xfe, 0x66, 0x01, 0x22,
+ 0x82, 0x03, 0x19, 0x8e, 0xbc, 0x46, 0x4d, 0xfa, 0x0b, 0x21, 0x5c, 0xcf,
+ 0xea, 0x5e, 0xbd, 0xd7, 0x47, 0x24, 0x55, 0x87, 0xf3, 0x30, 0x09, 0x14,
+ 0x10, 0x18, 0xcc, 0x75, 0xe2, 0x32, 0x6f, 0xd0, 0x59, 0x0a, 0xe6, 0x7f,
+ 0x52, 0xf5, 0xee, 0xba, 0x39, 0x22, 0xac, 0x3f, 0x99, 0x80, 0x48, 0xa0,
+ 0x80, 0xc6, 0x63, 0xaf, 0x11, 0x93, 0x7e, 0x82, 0xc8, 0x57, 0x33, 0xfa,
+ 0x97, 0xaf, 0x75, 0xd1, 0xc9, 0x15, 0x61, 0xfc, 0xcc, 0x02, 0x45, 0x04,
+ 0x06, 0x33, 0x1d, 0x78, 0x8c, 0x9b, 0xf4, 0x16, 0x42, 0xb9, 0x9f, 0xd4,
+ 0xbd, 0x7b, 0xae, 0x8e, 0x48, 0xab, 0x0f, 0xe6, 0x60, 0x12, 0x28, 0x20,
+ 0x31, 0x98, 0xeb, 0xc4, 0x64, 0xdf, 0xa0, 0xb2, 0x15, 0xcc, 0xfe, 0xa5,
+ 0xeb, 0xdd, 0x74, 0x72, 0x45, 0x58, 0x7f, 0x33, 0x00, 0x91, 0x41, 0x01,
+ 0x8c, 0xc7, 0x5e, 0x23, 0x26, 0xfd, 0x05, 0x90, 0xae, 0x67, 0xf5, 0x2f,
+ 0x5e, 0xeb, 0xa3, 0x92, 0x2a, 0xc3, 0xf9, 0x98, 0x04, 0x8a, 0x08, 0x0c,
+ 0x66, 0x3a, 0xf1, 0x19, 0x37, 0xe8, 0x2c, 0x85, 0x73, 0x3f, 0xa9, 0x7a,
+ 0xf7, 0x5d, 0x1c, 0x91, 0x56, 0x1f, 0xcc, 0xc0, 0x24, 0x50, 0x40, 0x63,
+ 0x31, 0xd7, 0x88, 0xc9, 0xbf, 0x41, 0x64, 0x2b, 0x99, 0xfd, 0x4b, 0xd7,
+ 0xba, 0xe8, 0xe4, 0x8a, 0xb0, 0xfe, 0x66, 0x01, 0x22, 0x82, 0x03, 0x19,
+ 0x8e, 0xbc, 0x46, 0x4d, 0xfa, 0x0b, 0x21, 0x5c, 0xcf, 0xea, 0x5e, 0xbd,
+ 0xd7, 0x47, 0x24, 0x55, 0x87, 0xf3, 0x30, 0x09, 0x14, 0x10, 0x18, 0xcc,
+ 0x75, 0xe2, 0x32, 0x6f, 0xd0, 0x59, 0x0a, 0xe6, 0x7f, 0x52, 0xf5, 0xee,
+ 0xba, 0x39, 0x22, 0xac, 0x3f, 0x99, 0x80, 0x48, 0xa0, 0x80, 0xc6, 0x63,
+ 0xaf, 0x11, 0x93, 0x7e, 0x82, 0xc8, 0x57, 0x33, 0xfa, 0x97, 0xaf, 0x75,
+ 0xd1, 0xc9, 0x15, 0x61, 0xfc, 0xcc, 0x02, 0x45, 0x04, 0x06, 0x33, 0x1d,
+ 0x78, 0x8c, 0x9b, 0xf4, 0x16, 0x42, 0xb9, 0x9f, 0xd4, 0xbd, 0x7b, 0xae,
+ 0x8e, 0x48, 0xab, 0x0f, 0xe6, 0x60, 0x12, 0x28, 0x20, 0x31, 0x98, 0xeb,
+ 0xc4, 0x64, 0xdf, 0xa0, 0xb2, 0x15, 0xcc, 0xfe, 0xa5, 0xeb, 0xdd, 0x74,
+ 0x72, 0x45, 0x58, 0x7f, 0x33, 0x00, 0x91, 0x41, 0x01, 0x8c, 0xc7, 0x5e,
+ 0x23, 0x26, 0xfd, 0x05, 0x90, 0xae, 0x67, 0xf5, 0x2f, 0x5e, 0xeb, 0xa3,
+ 0x92, 0x2a, 0xc3, 0xf9, 0x98, 0x04, 0x8a, 0x08, 0x0c, 0x66, 0x3a, 0xf1,
+ 0x19, 0x37, 0xe8, 0x2c, 0x85, 0x73, 0x3f, 0xa9, 0x7a, 0xf7, 0x5d, 0x1c,
+ 0x91, 0x56, 0x1f, 0xcc, 0xc0, 0x24, 0x50, 0x40, 0x63, 0x31, 0xd7, 0x88,
+ 0xc9, 0xbf, 0x41, 0x64, 0x2b, 0x99, 0xfd, 0x4b, 0xd7, 0xba, 0xe8, 0xe4,
+ 0x8a, 0xb0, 0xfe, 0x66, 0x01, 0x22, 0x82, 0x03, 0x19, 0x8e, 0xbc, 0x46,
+ 0x4d, 0xfa, 0x0b, 0x21, 0x5c, 0xcf, 0xea, 0x5e, 0xbd, 0xd7, 0x47, 0x24,
+ 0x55, 0x87, 0xf3, 0x30, 0x09, 0x14, 0x10, 0x18, 0xcc, 0x75, 0xe2, 0x32,
+ 0x6f, 0xd0, 0x59, 0x0a, 0xe6, 0x7f, 0x52, 0xf5, 0xee, 0xba, 0x39, 0x22,
+ 0xac, 0x3f, 0x99, 0x80, 0x48, 0xa0, 0x80, 0xc6, 0x63, 0xaf, 0x11, 0x93,
+ 0x7e, 0x82, 0xc8, 0x57, 0x33, 0xfa, 0x97, 0xaf, 0x75, 0xd1, 0xc9, 0x15,
+ 0x61, 0xfc, 0xcc, 0x02, 0x45, 0x04, 0x06, 0x33, 0x1d, 0x78, 0x8c, 0x9b,
+ 0xf4, 0x16, 0x42, 0xb9, 0x9f, 0xd4, 0xbd, 0x7b, 0xae, 0x8e, 0x48, 0xab,
+ 0x0f, 0xe6, 0x60, 0x12, 0x28, 0x20, 0x31, 0x98, 0xeb, 0xc4, 0x64, 0xdf,
+ 0xa0, 0xb2, 0x15, 0xcc, 0xfe, 0xa5, 0xeb, 0xdd, 0x74, 0x72, 0x45, 0x58,
+ 0x7f, 0x33, 0x00, 0x91, 0x41, 0x01, 0x8c, 0xc7, 0x5e, 0x23, 0x26, 0xfd,
+ 0x05, 0x90, 0xae, 0x67, 0xf5, 0x2f, 0x5e, 0xeb, 0xa3, 0x92, 0x2a, 0xc3,
+ 0xf9, 0x98, 0x04, 0x8a, 0x08, 0x0c, 0x66, 0x3a, 0xf1, 0x19, 0x37, 0xe8,
+ 0x2c, 0x85, 0x73, 0x3f, 0xa9, 0x7a, 0xf7, 0x5d, 0x1c, 0x91, 0x56, 0x1f,
+ 0xcc, 0xc0, 0x24, 0x50, 0x40, 0x63, 0x31, 0xd7, 0x88, 0xc9, 0xbf, 0x41,
+ 0x64, 0x2b, 0x99, 0xfd, 0x4b, 0xd7, 0xba, 0xe8, 0xe4, 0x8a, 0xb0, 0xfe,
+ 0x66, 0x01, 0x22, 0x82, 0x03, 0x19, 0x8e, 0xbc, 0x46, 0x4d, 0xfa, 0x0b,
+ 0x21, 0x5c, 0xcf, 0xea, 0x5e, 0xbd, 0xd7, 0x47, 0x24, 0x55, 0x87, 0xf3,
+ 0x30, 0x09, 0x14, 0x10, 0x18, 0xcc, 0x75, 0xe2, 0x32, 0x6f, 0xd0, 0x59,
+ 0x0a, 0xe6, 0x7f, 0x52, 0xf5, 0xee, 0xba, 0x39, 0x22, 0xac, 0x3f, 0x99,
+ 0x80, 0x48, 0xa0, 0x80, 0xc6, 0x63, 0xaf, 0x11, 0x93, 0x7e, 0x82, 0xc8,
+ 0x57, 0x33, 0xfa, 0x97, 0xaf, 0x75, 0xd1, 0xc9, 0x15, 0x61, 0xfc, 0xcc,
+ 0x02, 0x45, 0x04, 0x06, 0x33, 0x1d, 0x78, 0x8c, 0x9b, 0xf4, 0x16, 0x42,
+ 0xb9, 0x9f, 0xd4, 0xbd, 0x7b, 0xae, 0x8e, 0x48, 0xab, 0x0f, 0xe6, 0x60,
+ 0x12, 0x28, 0x20, 0x31, 0x98, 0xeb, 0xc4, 0x64, 0xdf, 0xa0, 0xb2, 0x15,
+ 0xcc, 0xfe, 0xa5, 0xeb, 0xdd, 0x74, 0x72, 0x45, 0x58, 0x7f, 0x33, 0x00,
+ 0x91, 0x41, 0x01, 0x8c, 0xc7, 0x5e, 0x23, 0x26, 0xfd, 0x05, 0x90, 0xae,
+ 0x67, 0xf5, 0x2f, 0x5e, 0xeb, 0xa3, 0x92, 0x2a, 0xc3, 0xf9, 0x98, 0x04,
+ 0x8a, 0x08, 0x0c, 0x66, 0x3a, 0xf1, 0x19, 0x37, 0xe8, 0x2c, 0x85, 0x73,
+ 0x3f, 0xa9, 0x7a, 0xf7, 0x5d, 0x1c, 0x91, 0x56, 0x1f, 0xcc, 0xc0, 0x24,
+ 0x50, 0x40, 0x63, 0x31, 0xd7, 0x88, 0xc9, 0xbf, 0x41, 0x64, 0x2b, 0x99,
+ 0xfd, 0x4b, 0xd7, 0xba, 0xe8, 0xe4, 0x8a, 0xb0, 0xfe, 0x66, 0x01, 0x22,
+ 0x82, 0x03, 0x19, 0x8e, 0xbc, 0x46, 0x4d, 0xfa, 0x0b, 0x21, 0x5c, 0xcf,
+ 0xea, 0x5e, 0xbd, 0xd7, 0x47, 0x24, 0x55, 0x87, 0xf3, 0x30, 0x09, 0x14,
+ 0x10, 0x18, 0xcc, 0x75, 0xe2, 0x32, 0x6f, 0xd0, 0x59, 0x0a, 0xe6, 0x7f,
+ 0x52, 0xf5, 0xee, 0xba, 0x39, 0x22, 0xac, 0x3f, 0x99, 0x80, 0x48, 0xa0,
+ 0x80, 0xc6, 0x63, 0xaf, 0x11, 0x93, 0x7e, 0x82, 0xc8, 0x57, 0x33, 0xfa,
+ 0x97, 0xaf, 0x75, 0xd1, 0xc9, 0x15, 0x61, 0xfc, 0xcc, 0x02, 0x45, 0x04,
+ 0x06, 0x33, 0x1d, 0x78, 0x8c, 0x9b, 0xf4, 0x16, 0x42, 0xb9, 0x9f, 0xd4,
+ 0xbd, 0x7b, 0xae, 0x8e, 0x48, 0xab, 0x0f, 0xe6, 0x60, 0x12, 0x28, 0x20,
+ 0x31, 0x98, 0xeb, 0xc4, 0x64, 0xdf, 0xa0, 0xb2, 0x15, 0xcc, 0xfe, 0xa5,
+ 0xeb, 0xdd, 0x74, 0x72, 0x45, 0x58, 0x7f, 0x33, 0x00, 0x91, 0x41, 0x01,
+ 0x8c, 0xc7, 0x5e, 0x23, 0x26, 0xfd, 0x05, 0x90, 0xae, 0x67, 0xf5, 0x2f,
+ 0x5e, 0xeb, 0xa3, 0x92, 0x2a, 0xc3, 0xf9, 0x98, 0x04, 0x8a, 0x08, 0x0c,
+ 0x66, 0x3a, 0xf1, 0x19, 0x37, 0xe8, 0x2c, 0x85, 0x73, 0x3f, 0xa9, 0x7a,
+ 0xf7, 0x5d, 0x1c, 0x91, 0x56, 0x1f, 0xcc, 0xc0, 0x24, 0x50, 0x40, 0x63,
+ 0x31, 0xd7, 0x88, 0xc9, 0xbf, 0x41, 0x64, 0x2b, 0x99, 0xfd, 0x4b, 0xd7,
+ 0xba, 0xe8, 0xe4, 0x8a, 0xb0, 0xfe, 0x66, 0x01, 0x22, 0x82, 0x03, 0x19,
+ 0x8e, 0xbc, 0x46, 0x4d, 0xfa, 0x0b, 0x21, 0x5c, 0xcf, 0xea, 0x5e, 0xbd,
+ 0xd7, 0x47, 0x24, 0x55, 0x87, 0xf3, 0x30, 0x09, 0x14, 0x10, 0x18, 0xcc,
+ 0x75, 0xe2, 0x32, 0x6f, 0xd0, 0x59, 0x0a, 0xe6, 0x7f, 0x52, 0xf5, 0xee,
+ 0xba, 0x39, 0x22, 0xac, 0x3f, 0x99, 0x80, 0x48, 0xa0, 0x80, 0xc6, 0x63,
+ 0xaf, 0x11, 0x93, 0x7e, 0x82, 0xc8, 0x57, 0x33, 0xfa, 0x97, 0xaf, 0x75,
+ 0xd1, 0xc9, 0x15, 0x61, 0xfc, 0xcc, 0x02, 0x45, 0x04, 0x06, 0x33, 0x1d,
+ 0x78, 0x8c, 0x9b, 0xf4, 0x16, 0x42, 0xb9, 0x9f, 0xd4, 0xbd, 0x7b, 0xae,
+ 0x8e, 0x48, 0xab, 0x0f, 0xe6, 0x60, 0x12, 0x28, 0x20, 0x31, 0x98, 0xeb,
+ 0xc4, 0x64, 0xdf, 0xa0, 0xb2, 0x15, 0xcc, 0xfe, 0xa5, 0xeb, 0xdd, 0x74,
+ 0x72, 0x45, 0x58, 0x7f, 0x33, 0x00, 0x91, 0x41, 0x01, 0x8c, 0xc7, 0x5e,
+ 0x23, 0x26, 0xfd, 0x05, 0x90, 0xae, 0x67, 0xf5, 0x2f, 0x5e, 0xeb, 0xa3,
+ 0x92, 0x2a, 0xc3, 0xf9, 0x98, 0x04, 0x8a, 0x08, 0x0c, 0x66, 0x3a, 0xf1,
+ 0x19, 0x37, 0xe8, 0x2c, 0x85, 0x73, 0x3f, 0xa9, 0x7a, 0xf7, 0x5d, 0x1c,
+ 0x91, 0x56, 0x1f, 0xcc, 0xc0, 0x24, 0x50, 0x40, 0x63, 0x31, 0xd7, 0x88,
+ 0xc9, 0xbf, 0x41, 0x64, 0x2b, 0x99, 0xfd, 0x4b, 0xd7, 0xba, 0xe8, 0xe4,
+ 0x8a, 0xb0, 0xfe, 0x66, 0x01, 0x22, 0x82, 0x03, 0x19, 0x8e, 0xbc, 0x46,
+ 0x4d, 0xfa, 0x0b, 0x21, 0x5c, 0xcf, 0xea, 0x5e, 0xbd, 0xd7, 0x47, 0x24,
+ 0x55, 0x87, 0xf3, 0x30, 0x09, 0x14, 0x10, 0x18, 0xcc, 0x75, 0xe2, 0x32,
+ 0x6f, 0xd0, 0x59, 0x0a, 0xe6, 0x7f, 0x52, 0xf5, 0xee, 0xba, 0x39, 0x22,
+ 0xac, 0x3f, 0x99, 0x80, 0x48, 0xa0, 0x80, 0xc6, 0x63, 0xaf, 0x11, 0x93,
+ 0x7e, 0x82, 0xc8, 0x57, 0x33, 0xfa, 0x97, 0xaf, 0x75, 0xd1, 0xc9, 0x15,
+ 0x61, 0xfc, 0xcc, 0x02, 0x45, 0x04, 0x06, 0x33, 0x1d, 0x78, 0x8c, 0x9b,
+ 0xf4, 0x16, 0x42, 0xb9, 0x9f, 0xd4, 0xbd, 0x7b, 0xae, 0x8e, 0x48, 0xab,
+ 0x0f, 0xe6, 0x60, 0x12, 0x28, 0x20, 0x31, 0x98, 0xeb, 0xc4, 0x64, 0xdf,
+ 0xa0, 0xb2, 0x15, 0xcc, 0xfe, 0xa5, 0xeb, 0xdd, 0x74, 0x72, 0x45, 0x58,
+ 0x7f, 0x33, 0x00, 0x91, 0x41, 0x01, 0x8c, 0xc7, 0x5e, 0x23, 0x26, 0xfd,
+ 0x05, 0x90, 0xae, 0x67, 0xf5, 0x2f, 0x5e, 0xeb, 0xa3, 0x92, 0x2a, 0xc3,
+ 0xf9, 0x98, 0x04, 0x8a, 0x08, 0x0c, 0x66, 0x3a, 0xf1, 0x19, 0x37, 0xe8,
+ 0x2c, 0x85, 0x73, 0x3f, 0xa9, 0x7a, 0xf7, 0x5d, 0x1c, 0x91, 0x56, 0x1f,
+ 0xcc, 0xc0, 0x24, 0x50, 0x40, 0x63, 0x31, 0xd7, 0x88, 0xc9, 0xbf, 0x41,
+ 0x64, 0x2b, 0x99, 0xfd, 0x4b, 0xd7, 0xba, 0xe8, 0xe4, 0x8a, 0xb0, 0xfe,
+ 0x66, 0x01, 0x22, 0x82, 0x03, 0x19, 0x8e, 0xbc, 0x46, 0x4d, 0xfa, 0x0b,
+ 0x21, 0x5c, 0xcf, 0xea, 0x5e, 0xbd, 0xd7, 0x47, 0x24, 0x55, 0x87, 0xf3,
+ 0x30, 0x09, 0x14, 0x10, 0x18, 0xcc, 0x75, 0xe2, 0x32, 0x6f, 0xd0, 0x59,
+ 0x0a, 0xe6, 0x7f, 0x52, 0xf5, 0xee, 0xba, 0x39, 0x22, 0xac, 0x3f, 0x99,
+ 0x80, 0x48, 0xa0, 0x80, 0xc6, 0x63, 0xaf, 0x11, 0x93, 0x7e, 0x82, 0xc8,
+ 0x57, 0x33, 0xfa, 0x97, 0xaf, 0x75, 0xd1, 0xc9, 0x15, 0x61, 0xfc, 0xcc,
+ 0x02, 0x45, 0x04, 0x06, 0x33, 0x1d, 0x78, 0x8c, 0x9b, 0xf4, 0x16, 0x42,
+ 0xb9, 0x9f, 0xd4, 0xbd, 0x7b, 0xae, 0x8e, 0x48, 0xab, 0x0f, 0xe6, 0x60,
+ 0x12, 0x28, 0x20, 0x31, 0x98, 0xeb, 0xc4, 0x64, 0xdf, 0xa0, 0xb2, 0x15,
+ 0xcc, 0xfe, 0xa5, 0xeb, 0xdd, 0x74, 0x72, 0x45, 0x58, 0x7f, 0x33, 0x00,
+ 0x91, 0x41, 0x01, 0x8c, 0xc7, 0x5e, 0x23, 0x26, 0xfd, 0x05, 0x90, 0xae,
+ 0x67, 0xf5, 0x2f, 0x5e, 0xeb, 0xa3, 0x92, 0x2a, 0xc3, 0xf9, 0x98, 0x04,
+ 0x8a, 0x08, 0x0c, 0x66, 0x3a, 0xf1, 0x19, 0x37, 0xe8, 0x2c, 0x85, 0x73,
+ 0x3f, 0xa9, 0x7a, 0xf7, 0x5d, 0x1c, 0x91, 0x56, 0x1f, 0xcc, 0xc0, 0x24,
+ 0x50, 0x40, 0x63, 0x31, 0xd7, 0x88, 0xc9, 0xbf, 0x41, 0x64, 0x2b, 0x99,
+ 0xfd, 0x4b, 0xd7, 0xba, 0xe8, 0xe4, 0x8a, 0xb0, 0xfe, 0x66, 0x01, 0x22,
+ 0x82, 0x03, 0x19, 0x8e, 0xbc, 0x46, 0x4d, 0xfa, 0x0b, 0x21, 0x5c, 0xcf,
+ 0xea, 0x5e, 0xbd, 0xd7, 0x47, 0x24, 0x55, 0x87, 0xf3, 0x30, 0x09, 0x14,
+ 0x10, 0x18, 0xcc, 0x75, 0xe2, 0x32, 0x6f, 0xd0, 0x59, 0x0a, 0xe6, 0x7f,
+ 0x52, 0xf5, 0xee, 0xba, 0x39, 0x22, 0xac, 0x3f, 0x99, 0x80, 0x48, 0xa0,
+ 0x80, 0xc6, 0x63, 0xaf, 0x11, 0x93, 0x7e, 0x82, 0xc8, 0x57, 0x33, 0xfa,
+ 0x97, 0xaf, 0x75, 0xd1, 0xc9, 0x15, 0x61, 0xfc, 0xcc, 0x02, 0x45, 0x04,
+ 0x06, 0x33, 0x1d, 0x78, 0x8c, 0x9b, 0xf4, 0x16, 0x42, 0xb9, 0x9f, 0xd4,
+ 0xbd, 0x7b, 0xae, 0x8e, 0x48, 0xab, 0x0f, 0xe6, 0x60, 0x12, 0x28, 0x20,
+ 0x31, 0x98, 0xeb, 0xc4, 0x64, 0xdf, 0xa0, 0xb2, 0x15, 0xcc, 0xfe, 0xa5,
+ 0xeb, 0xdd, 0x74, 0x72, 0x45, 0x58, 0x7f, 0x33, 0x00, 0x91, 0x41, 0x01,
+ 0x8c, 0xc7, 0x5e, 0x23, 0x26, 0xfd, 0x05, 0x90, 0xae, 0x67, 0xf5, 0x2f,
+ 0x5e, 0xeb, 0xa3, 0x92, 0x2a, 0xc3, 0xf9, 0x98, 0x04, 0x8a, 0x08, 0x0c,
+ 0x66, 0x3a, 0xf1, 0x19, 0x37, 0xe8, 0x2c, 0x85, 0x73, 0x3f, 0xa9, 0x7a,
+ 0xf7, 0x5d, 0x1c, 0x91, 0x56, 0x1f, 0xcc, 0xc0, 0x24, 0x50, 0x40, 0x63,
+ 0x31, 0xd7, 0x88, 0xc9, 0xbf, 0x41, 0x64, 0x2b, 0x99, 0xfd, 0x4b, 0xd7,
+ 0xba, 0xe8, 0xe4, 0x8a, 0xb0, 0xfe, 0x66, 0x01, 0x22, 0x82, 0x03, 0x19,
+ 0x8e, 0xbc, 0x46, 0x4d, 0xfa, 0x0b, 0x21, 0x5c, 0xcf, 0xea, 0x5e, 0xbd,
+ 0xd7, 0x47, 0x24, 0x55, 0x87, 0xf3, 0x30, 0x09, 0x14, 0x10, 0x18, 0xcc,
+ 0x75, 0xe2, 0x32, 0x6f, 0xd0, 0x59, 0x0a, 0xe6, 0x7f, 0x52, 0xf5, 0xee,
+ 0xba, 0x39, 0x22, 0xac, 0x3f, 0x99, 0x80, 0x48, 0xa0, 0x80, 0xc6, 0x63,
+ 0xaf, 0x11, 0x93, 0x7e, 0x82, 0xc8, 0x57, 0x33, 0xfa, 0x97, 0xaf, 0x75,
+ 0xd1, 0xc9, 0x15, 0x61, 0xfc, 0xcc, 0x02, 0x45, 0x04, 0x06, 0x33, 0x1d,
+ 0x78, 0x8c, 0x9b, 0xf4, 0x16, 0x42, 0xb9, 0x9f, 0xd4, 0xbd, 0x7b, 0xae,
+ 0x8e, 0x48, 0xab, 0x0f, 0xe6, 0x60, 0x12, 0x28, 0x20, 0x31, 0x98, 0xeb,
+ 0xc4, 0x64, 0xdf, 0xa0, 0xb2, 0x15, 0xcc, 0xfe, 0xa5, 0xeb, 0xdd, 0x74,
+ 0x72, 0x45, 0x58, 0x7f, 0x33, 0x00, 0x91, 0x41, 0x01, 0x8c, 0xc7, 0x5e,
+ 0x23, 0x26, 0xfd, 0x05, 0x90, 0xae, 0x67, 0xf5, 0x2f, 0x5e, 0xeb, 0xa3,
+ 0x92, 0x2a, 0xc3, 0xf9, 0x98, 0x04, 0x8a, 0x08, 0x0c, 0x66, 0x3a, 0xf1,
+ 0x19, 0x37, 0xe8, 0x2c, 0x85, 0x73, 0x3f, 0xa9, 0x7a, 0xf7, 0x5d, 0x1c,
+ 0x91, 0x56, 0x1f, 0xcc, 0xc0, 0x24, 0x50, 0x40, 0x63, 0x31, 0xd7, 0x88,
+ 0xc9, 0xbf, 0x41, 0x64, 0x2b, 0x99, 0xfd, 0x4b, 0xd7, 0xba, 0xe8, 0xe4,
+ 0x8a, 0xb0, 0xfe, 0x66, 0x01, 0x22, 0x82, 0x03, 0x19, 0x8e, 0xbc, 0x46,
+ 0x4d, 0xfa, 0x0b, 0x21, 0x5c, 0xcf, 0xea, 0x5e, 0xbd, 0xd7, 0x47, 0x24,
+ 0x55, 0x87, 0xf3, 0x30, 0x09, 0x14, 0x10, 0x18, 0xcc, 0x75, 0xe2, 0x32,
+ 0x6f, 0xd0, 0x59, 0x0a, 0xe6, 0x7f, 0x52, 0xf5, 0xee, 0xba, 0x39, 0x22,
+ 0xac, 0x3f, 0x99, 0x80, 0x48, 0xa0, 0x80, 0xc6, 0x63, 0xaf, 0x11, 0x93,
+ 0x7e, 0x82, 0xc8, 0x57, 0x33, 0xfa, 0x97, 0xaf, 0x75, 0xd1, 0xc9, 0x15,
+ 0x61, 0xfc, 0xcc, 0x02, 0x45, 0x04, 0x06, 0x33, 0x1d, 0x78, 0x8c, 0x9b,
+ 0xf4, 0x16, 0x42, 0xb9, 0x9f, 0xd4, 0xbd, 0x7b, 0xae, 0x8e, 0x48, 0xab,
+ 0x0f, 0xe6, 0x60, 0x12, 0x28, 0x20, 0x31, 0x98, 0xeb, 0xc4, 0x64, 0xdf,
+ 0xa0, 0xb2, 0x15, 0xcc, 0xfe, 0xa5, 0xeb, 0xdd, 0x74, 0x72, 0x45, 0x58,
+ 0x7f, 0x33, 0x00, 0x91, 0x41, 0x01, 0x8c, 0xc7, 0x5e, 0x23, 0x26, 0xfd,
+ 0x05, 0x90, 0xae, 0x67, 0xf5, 0x2f, 0x5e, 0xeb, 0xa3, 0x92, 0x2a, 0xc3,
+ 0xf9, 0x98, 0x04, 0x8a, 0x08, 0x0c, 0x66, 0x3a, 0xf1, 0x19, 0x37, 0xe8,
+ 0x2c, 0x85, 0x73, 0x3f, 0xa9, 0x7a, 0xf7, 0x5d, 0x1c, 0x91, 0x56, 0x1f,
+ 0xcc, 0xc0, 0x24, 0x50, 0x40, 0x63, 0x31, 0xd7, 0x88, 0xc9, 0xbf, 0x41,
+ 0x64, 0x2b, 0x99, 0xfd, 0x4b, 0xd7, 0xba, 0xe8, 0xe4, 0x8a, 0xb0, 0xfe,
+ 0x66, 0x01, 0x22, 0x82, 0x03, 0x19, 0x8e, 0xbc, 0x46, 0x4d, 0xfa, 0x0b,
+ 0x21, 0x5c, 0xcf, 0xea, 0x5e, 0xbd, 0xd7, 0x47, 0x24, 0x55, 0x87, 0xf3,
+ 0x30, 0x09, 0x14, 0x10, 0x18, 0xcc, 0x75, 0xe2, 0x32, 0x6f, 0xd0, 0x59,
+ 0x0a, 0xe6, 0x7f, 0x52, 0xf5, 0xee, 0xba, 0x39, 0x22, 0xac, 0x3f, 0x99,
+ 0x80, 0x48, 0xa0, 0x80, 0xc6, 0x63, 0xaf, 0x11, 0x93, 0x7e, 0x82, 0xc8,
+ 0x57, 0x33, 0xfa, 0x97, 0xaf, 0x75, 0xd1, 0xc9, 0x15, 0x61, 0xfc, 0xcc,
+ 0x02, 0x45, 0x04, 0x06, 0x33, 0x1d, 0x78, 0x8c, 0x9b, 0xf4, 0x16, 0x42,
+ 0xb9, 0x9f, 0xd4, 0xbd, 0x7b, 0xae, 0x8e, 0x48, 0xab, 0x0f, 0xe6, 0x60,
+ 0x12, 0x28, 0x20, 0x31, 0x98, 0xeb, 0xc4, 0x64, 0xdf, 0xa0, 0xb2, 0x15,
+ 0xcc, 0xfe, 0xa5, 0xeb, 0xdd, 0x74, 0x72, 0x45, 0x58, 0x7f, 0x33, 0x00,
+ 0x91, 0x41, 0x01, 0x8c, 0xc7, 0x5e, 0x23, 0x26, 0xfd, 0x05, 0x90, 0xae,
+ 0x67, 0xf5, 0x2f, 0x5e, 0xeb, 0xa3, 0x92, 0x2a, 0xc3, 0xf9, 0x98, 0x04,
+ 0x8a, 0x08, 0x0c, 0x66, 0x3a, 0xf1, 0x19, 0x37, 0xe8, 0x2c, 0x85, 0x73,
+ 0x3f, 0xa9, 0x7a, 0xf7, 0x5d, 0x1c, 0x91, 0x56, 0x1f, 0xcc, 0xc0, 0x24,
+ 0x50, 0x40, 0x63, 0x31, 0xd7, 0x88, 0xc9, 0xbf, 0x41, 0x64, 0x2b, 0x99,
+ 0xfd, 0x4b, 0xd7, 0xba, 0xe8, 0xe4, 0x8a, 0xb0, 0xfe, 0x66, 0x01, 0x22,
+ 0x82, 0x03, 0x19, 0x8e, 0xbc, 0x46, 0x4d, 0xfa, 0x0b, 0x21, 0x5c, 0xcf,
+ 0xea, 0x5e, 0xbd, 0xd7, 0x47, 0x24, 0x55, 0x87, 0xf3, 0x30, 0x09, 0x14,
+ 0x10, 0x18, 0xcc, 0x75, 0xe2, 0x32, 0x6f, 0xd0, 0x59, 0x0a, 0xe6, 0x7f,
+ 0x52, 0xf5, 0xee, 0xba, 0x39, 0x22, 0xac, 0x3f, 0x99, 0x80, 0x48, 0xa0,
+ 0x80, 0xc6, 0x63, 0xaf, 0x11, 0x93, 0x7e, 0x82, 0xc8, 0x57, 0x33, 0xfa,
+ 0x97, 0xaf, 0x75, 0xd1, 0xc9, 0x15, 0x61, 0xfc, 0xcc, 0x02, 0x45, 0x04,
+ 0x06, 0x33, 0x1d, 0x78, 0x8c, 0x9b, 0xf4, 0x16, 0x42, 0xb9, 0x9f, 0xd4,
+ 0xbd, 0x7b, 0xae, 0x8e, 0x48, 0xab, 0x0f, 0xe6, 0x60, 0x12, 0x28, 0x20,
+ 0x31, 0x98, 0xeb, 0xc4, 0x64, 0xdf, 0xa0, 0xb2, 0x15, 0xcc, 0xfe, 0xa5,
+ 0xeb, 0xdd, 0x74, 0x72, 0x45, 0x58, 0x7f, 0x33, 0x00, 0x91, 0x41, 0x01,
+ 0x8c, 0xc7, 0x5e, 0x23, 0x26, 0xfd, 0x05, 0x90, 0xae, 0x67, 0xf5, 0x2f,
+ 0x5e, 0xeb, 0xa3, 0x92, 0x2a, 0xc3, 0xf9, 0x98, 0x04, 0x8a, 0x08, 0x0c,
+ 0x66, 0x3a, 0xf1, 0x19, 0x37, 0xe8, 0x2c, 0x85, 0x73, 0x3f, 0xa9, 0x7a,
+ 0xf7, 0x5d, 0x1c, 0x91, 0x56, 0x1f, 0xcc, 0xc0, 0x24, 0x50, 0x40, 0x63,
+ 0x31, 0xd7, 0x88, 0xc9, 0xbf, 0x41, 0x64, 0x2b, 0x99, 0xfd, 0x4b, 0xd7,
+ 0xba, 0xe8, 0xe4, 0x8a, 0xb0, 0xfe, 0x66, 0x01, 0x22, 0x82, 0x03, 0x19,
+ 0x8e, 0xbc, 0x46, 0x4d, 0xfa, 0x0b, 0x21, 0x5c, 0xcf, 0xea, 0x5e, 0xbd,
+ 0xd7, 0x47, 0x24, 0x55, 0x87, 0xf3, 0x30, 0x09, 0x14, 0x10, 0x18, 0xcc,
+ 0x75, 0xe2, 0x32, 0x6f, 0xd0, 0x59, 0x0a, 0xe6, 0x7f, 0x52, 0xf5, 0xee,
+ 0xba, 0x39, 0x22, 0xac, 0x3f, 0x99, 0x80, 0x48, 0xa0, 0x80, 0xc6, 0x63,
+ 0xaf, 0x11, 0x93, 0x7e, 0x82, 0xc8, 0x57, 0x33, 0xfa, 0x97, 0xaf, 0x75,
+ 0xd1, 0xc9, 0x15, 0x61, 0xfc, 0xcc, 0x02, 0x45, 0x04, 0x06, 0x33, 0x1d,
+ 0x78, 0x8c, 0x9b, 0xf4, 0x16, 0x42, 0xb9, 0x9f, 0xd4, 0xbd, 0x7b, 0xae,
+ 0x8e, 0x48, 0xab, 0x0f, 0xe6, 0x60, 0x12, 0x28, 0x20, 0x31, 0x98, 0xeb,
+ 0xc4, 0x64, 0xdf, 0xa0, 0xb2, 0x15, 0xcc, 0xfe, 0xa5, 0xeb, 0xdd, 0x74,
+ 0x72, 0x45, 0x58, 0x7f, 0x33, 0x00, 0x91, 0x41, 0x01, 0x8c, 0xc7, 0x5e,
+ 0x23, 0x26, 0xfd, 0x05, 0x90, 0xae, 0x67, 0xf5, 0x2f, 0x5e, 0xeb, 0xa3,
+ 0x92, 0x2a, 0xc3, 0xf9, 0x98, 0x04, 0x8a, 0x08, 0x0c, 0x66, 0x3a, 0xf1,
+ 0x19, 0x37, 0xe8, 0x2c, 0x85, 0x73, 0x3f, 0xa9, 0x7a, 0xf7, 0x5d, 0x1c,
+ 0x91, 0x56, 0x1f, 0xcc, 0xc0, 0x24, 0x50, 0x40, 0x63, 0x31, 0xd7, 0x88,
+ 0xc9, 0xbf, 0x41, 0x64, 0x2b, 0x99, 0xfd, 0x4b, 0xd7, 0xba, 0xe8, 0xe4,
+ 0x8a, 0xb0, 0xfe, 0x66, 0x01, 0x22, 0x82, 0x03, 0x19, 0x8e, 0xbc, 0x46,
+ 0x4d, 0xfa, 0x0b, 0x21, 0x5c, 0xcf, 0xea, 0x5e, 0xbd, 0xd7, 0x47, 0x24,
+ 0x55, 0x87, 0xf3, 0x30, 0x09, 0x14, 0x10, 0x18, 0xcc, 0x75, 0xe2, 0x32,
+ 0x6f, 0xd0, 0x59, 0x0a, 0xe6, 0x7f, 0x52, 0xf5, 0xee, 0xba, 0x39, 0x22,
+ 0xac, 0x3f, 0x99, 0x80, 0x48, 0xa0, 0x80, 0xc6, 0x63, 0xaf, 0x11, 0x93,
+ 0x7e, 0x82, 0xc8, 0x57, 0x33, 0xfa, 0x97, 0xaf, 0x75, 0xd1, 0xc9, 0x15,
+ 0x61, 0xfc, 0xcc, 0x02, 0x45, 0x04, 0x06, 0x33, 0x1d, 0x78, 0x8c, 0x9b,
+ 0xf4, 0x16, 0x42, 0xb9, 0x9f, 0xd4, 0xbd, 0x7b, 0xae, 0x8e, 0x48, 0xab,
+ 0x0f, 0xe6, 0x60, 0x12, 0x28, 0x20, 0x31, 0x98, 0xeb, 0xc4, 0x64, 0xdf,
+ 0xa0, 0xb2, 0x15, 0xcc, 0xfe, 0xa5, 0xeb, 0xdd, 0x74, 0x72, 0x45, 0x58,
+ 0x7f, 0x33, 0x00, 0x91, 0x41, 0x01, 0x8c, 0xc7, 0x5e, 0x23, 0x26, 0xfd,
+ 0x05, 0x90, 0xae, 0x67, 0xf5, 0x2f, 0x5e, 0xeb, 0xa3, 0x92, 0x2a, 0xc3,
+ 0xf9, 0x98, 0x04, 0x8a, 0x08, 0x0c, 0x66, 0x3a, 0xf1, 0x19, 0x37, 0xe8,
+ 0x2c, 0x85, 0x73, 0x3f, 0xa9, 0x7a, 0xf7, 0x5d, 0x1c, 0x91, 0x56, 0x1f,
+ 0xcc, 0xc0, 0x24, 0x50, 0x40, 0x63, 0x31, 0xd7, 0x88, 0xc9, 0xbf, 0x41,
+ 0x64, 0x2b, 0x99, 0xfd, 0x4b, 0xd7, 0xba, 0xe8, 0xe4, 0x8a, 0xb0, 0xfe,
+ 0x66, 0x01, 0x22, 0x82, 0x03, 0x19, 0x8e, 0xbc, 0x46, 0x4d, 0xfa, 0x0b,
+ 0x21, 0x5c, 0xcf, 0xea, 0x5e, 0xbd, 0xd7, 0x47, 0x24, 0x55, 0x87, 0xf3,
+ 0x30, 0x09, 0x14, 0x10, 0x18, 0xcc, 0x75, 0xe2, 0x32, 0x6f, 0xd0, 0x59,
+ 0x0a, 0xe6, 0x7f, 0x52, 0xf5, 0xee, 0xba, 0x39, 0x22, 0xac, 0x3f, 0x99,
+ 0x80, 0x48, 0xa0, 0x80, 0xc6, 0x63, 0xaf, 0x11, 0x93, 0x7e, 0x82, 0xc8,
+ 0x57, 0x33, 0xfa, 0x97, 0xaf, 0x75, 0xd1, 0xc9, 0x15, 0x61, 0xfc, 0xcc,
+ 0x02, 0x45, 0x04, 0x06, 0x33, 0x1d, 0x78, 0x8c, 0x9b, 0xf4, 0x16, 0x42,
+ 0xb9, 0x9f, 0xd4, 0xbd, 0x7b, 0xae, 0x8e, 0x48, 0xab, 0x0f, 0xe6, 0x60,
+ 0x12, 0x28, 0x20, 0x31, 0x98, 0xeb, 0xc4, 0x64, 0xdf, 0xa0, 0xb2, 0x15,
+ 0xcc, 0xfe, 0xa5, 0xeb, 0xdd, 0x74, 0x72, 0x45, 0x58, 0x7f, 0x33, 0x00,
+ 0x91, 0x41, 0x01, 0x8c, 0xc7, 0x5e, 0x23, 0x26, 0xfd, 0x05, 0x90, 0xae,
+ 0x67, 0xf5, 0x2f, 0x5e, 0xeb, 0xa3, 0x92, 0x2a, 0xc3, 0xf9, 0x98, 0x04,
+ 0x8a, 0x08, 0x0c, 0x66, 0x3a, 0xf1, 0x19, 0x37, 0xe8, 0x2c, 0x85, 0x73,
+ 0x3f, 0xa9, 0x7a, 0xf7, 0x5d, 0x1c, 0x91, 0x56, 0x1f, 0xcc, 0xc0, 0x24,
+ 0x50, 0x40, 0x63, 0x31, 0xd7, 0x88, 0xc9, 0xbf, 0x41, 0x64, 0x2b, 0x99,
+ 0xfd, 0x4b, 0xd7, 0xba, 0xe8, 0xe4, 0x8a, 0xb0, 0xfe, 0x66, 0x01, 0x22,
+ 0x82, 0x03, 0x19, 0x8e, 0xbc, 0x46, 0x4d, 0xfa, 0x0b, 0x21, 0x5c, 0xcf,
+ 0xea, 0x5e, 0xbd, 0xd7, 0x47, 0x24, 0x55, 0x87, 0xf3, 0x30, 0x09, 0x14,
+ 0x10, 0x18, 0xcc, 0x75, 0xe2, 0x32, 0x6f, 0xd0, 0x59, 0x0a, 0xe6, 0x7f,
+ 0x52, 0xf5, 0xee, 0xba, 0x39, 0x22, 0xac, 0x3f, 0x99, 0x80, 0x48, 0xa0,
+ 0x80, 0xc6, 0x63, 0xaf, 0x11, 0x93, 0x7e, 0x82, 0xc8, 0x57, 0x33, 0xfa,
+ 0x97, 0xaf, 0x75, 0xd1, 0xc9, 0x15, 0x61, 0xfc, 0xcc, 0x02, 0x45, 0x04,
+ 0x06, 0x33, 0x1d, 0x78, 0x8c, 0x9b, 0xf4, 0x16, 0x42, 0xb9, 0x9f, 0xd4,
+ 0xbd, 0x7b, 0xae, 0x8e, 0x48, 0xab, 0x0f, 0xe6, 0x60, 0x12, 0x28, 0x20,
+ 0x31, 0x98, 0xeb, 0xc4, 0x64, 0xdf, 0xa0, 0xb2, 0x15, 0xcc, 0xfe, 0xa5,
+ 0xeb, 0xdd, 0x74, 0x72, 0x45, 0x58, 0x7f, 0x33, 0x00, 0x91, 0x41, 0x01,
+ 0x8c, 0xc7, 0x5e, 0x23, 0x26, 0xfd, 0x05, 0x90, 0xae, 0x67, 0xf5, 0x2f,
+ 0x5e, 0xeb, 0xa3, 0x92, 0x2a, 0xc3, 0xf9, 0x98, 0x04, 0x8a, 0x08, 0x0c,
+ 0x66, 0x3a, 0xf1, 0x19, 0x37, 0xe8, 0x2c, 0x85, 0x73, 0x3f, 0xa9, 0x7a,
+ 0xf7, 0x5d, 0x1c, 0x91, 0x56, 0x1f, 0xcc, 0xc0, 0x24, 0x50, 0x40, 0x63,
+ 0x31, 0xd7, 0x88, 0xc9, 0xbf, 0x41, 0x64, 0x2b, 0x99, 0xfd, 0x4b, 0xd7,
+ 0xba, 0xe8, 0xe4, 0x8a, 0xb0, 0xfe, 0x66, 0x01, 0x22, 0x82, 0x03, 0x19,
+ 0x8e, 0xbc, 0x46, 0x4d, 0xfa, 0x0b, 0x21, 0x5c, 0xcf, 0xea, 0x5e, 0xbd,
+ 0xd7, 0x47, 0x24, 0x55, 0x87, 0xf3, 0x30, 0x09, 0x14, 0x10, 0x18, 0xcc,
+ 0x75, 0xe2, 0x32, 0x6f, 0xd0, 0x59, 0x0a, 0xe6, 0x7f, 0x52, 0xf5, 0xee,
+ 0xba, 0x39, 0x22, 0xac, 0x3f, 0x99, 0x80, 0x48, 0xa0, 0x80, 0xc6, 0x63,
+ 0xaf, 0x11, 0x93, 0x7e, 0x82, 0xc8, 0x57, 0x33, 0xfa, 0x97, 0xaf, 0x75,
+ 0xd1, 0xc9, 0x15, 0x61, 0xfc, 0xcc, 0x02, 0x45, 0x04, 0x06, 0x33, 0x1d,
+ 0x78, 0x8c, 0x9b, 0xf4, 0x16, 0x42, 0xb9, 0x9f, 0xd4, 0xbd, 0x7b, 0xae,
+ 0x8e, 0x48, 0xab, 0x0f, 0xe6, 0x60, 0x12, 0x28, 0x20, 0x31, 0x98, 0xeb,
+ 0xc4, 0x64, 0xdf, 0xa0, 0xb2, 0x15, 0xcc, 0xfe, 0xa5, 0xeb, 0xdd, 0x74,
+ 0x72, 0x45, 0x58, 0x7f, 0x33, 0x00, 0x91, 0x41, 0x01, 0x8c, 0xc7, 0x5e,
+ 0x23, 0x26, 0xfd, 0x05, 0x90, 0xae, 0x67, 0xf5, 0x2f, 0x5e, 0xeb, 0xa3,
+ 0x92, 0x2a, 0xc3, 0xf9, 0x98, 0x04, 0x8a, 0x08, 0x0c, 0x66, 0x3a, 0xf1,
+ 0x19, 0x37, 0xe8, 0x2c, 0x85, 0x73, 0x3f, 0xa9, 0x7a, 0xf7, 0x5d, 0x1c,
+ 0x91, 0x56, 0x1f, 0xcc, 0xc0, 0x24, 0x50, 0x40, 0x63, 0x31, 0xd7, 0x88,
+ 0xc9, 0xbf, 0x41, 0x64, 0x2b, 0x99, 0xfd, 0x4b, 0xd7, 0xba, 0xe8, 0xe4,
+ 0x8a, 0xb0, 0xfe, 0x66, 0x01, 0x22, 0x82, 0x03, 0x19, 0x8e, 0xbc, 0x46,
+ 0x4d, 0xfa, 0x0b, 0x21, 0x5c, 0xcf, 0xea, 0x5e, 0xbd, 0xd7, 0x47, 0x24,
+ 0x55, 0x87, 0xf3, 0x30, 0x09, 0x14, 0x10, 0x18, 0xcc, 0x75, 0xe2, 0x32,
+ 0x6f, 0xd0, 0x59, 0x0a, 0xe6, 0x7f, 0x52, 0xf5, 0xee, 0xba, 0x39, 0x22,
+ 0xac, 0x3f, 0x99, 0x80, 0x48, 0xa0, 0x80, 0xc6, 0x63, 0xaf, 0x11, 0x93,
+ 0x7e, 0x82, 0xc8, 0x57, 0x33, 0xfa, 0x97, 0xaf, 0x75, 0xd1, 0xc9, 0x15,
+ 0x61, 0xfc, 0xcc, 0x02, 0x45, 0x04, 0x06, 0x33, 0x1d, 0x78, 0x8c, 0x9b,
+ 0xf4, 0x16, 0x42, 0xb9, 0x9f, 0xd4, 0xbd, 0x7b, 0xae, 0x8e, 0x48, 0xab,
+ 0x0f, 0xe6, 0x60, 0x12, 0x28, 0x20, 0x31, 0x98, 0xeb, 0xc4, 0x64, 0xdf,
+ 0xa0, 0xb2, 0x15, 0xcc, 0xfe, 0xa5, 0xeb, 0xdd, 0x74, 0x72, 0x45, 0x58,
+ 0x7f, 0x33, 0x00, 0x91, 0x41, 0x01, 0x8c, 0xc7, 0x5e, 0x23, 0x26, 0xfd,
+ 0x05, 0x90, 0xae, 0x67, 0xf5, 0x2f, 0x5e, 0xeb, 0xa3, 0x92, 0x2a, 0xc3,
+ 0xf9, 0x98, 0x04, 0x8a, 0x08, 0x0c, 0x66, 0x3a, 0xf1, 0x19, 0x37, 0xe8,
+ 0x2c, 0x85, 0x73, 0x3f, 0xa9, 0x7a, 0xf7, 0x5d, 0x1c, 0x91, 0x56, 0x1f,
+ 0xcc, 0xc0, 0x24, 0x50, 0x40, 0x63, 0x31, 0xd7, 0x88, 0xc9, 0xbf, 0x41,
+ 0x64, 0x2b, 0x99, 0xfd, 0x4b, 0xd7, 0xba, 0xe8, 0xe4, 0x8a, 0xb0, 0xfe,
+ 0x66, 0x01, 0x22, 0x82, 0x03, 0x19, 0x8e, 0xbc, 0x46, 0x4d, 0xfa, 0x0b,
+ 0x21, 0x5c, 0xcf, 0xea, 0x5e, 0xbd, 0xd7, 0x47, 0x24, 0x55, 0x87, 0xf3,
+ 0x30, 0x09, 0x14, 0x10, 0x18, 0xcc, 0x75, 0xe2, 0x32, 0x6f, 0xd0, 0x59,
+ 0x0a, 0xe6, 0x7f, 0x52, 0xf5, 0xee, 0xba, 0x39, 0x22, 0xac, 0x3f, 0x99,
+ 0x80, 0x48, 0xa0, 0x80, 0xc6, 0x63, 0xaf, 0x11, 0x93, 0x7e, 0x82, 0xc8,
+ 0x57, 0x33, 0xfa, 0x97, 0xaf, 0x75, 0xd1, 0xc9, 0x15, 0x61, 0xfc, 0xcc,
+ 0x02, 0x45, 0x04, 0x06, 0x33, 0x1d, 0x78, 0x8c, 0x9b, 0xf4, 0x16, 0x42,
+ 0xb9, 0x9f, 0xd4, 0xbd, 0x7b, 0xae, 0x8e, 0x48, 0xab, 0x0f, 0xe6, 0x60,
+ 0x12, 0x28, 0x20, 0x31, 0x98, 0xeb, 0xc4, 0x64, 0xdf, 0xa0, 0xb2, 0x15,
+ 0xcc, 0xfe, 0xa5, 0xeb, 0xdd, 0x74, 0x72, 0x45, 0x58, 0x7f, 0x33, 0x00,
+ 0x91, 0x41, 0x01, 0x8c, 0xc7, 0x5e, 0x23, 0x26, 0xfd, 0x05, 0x90, 0xae,
+ 0x67, 0xf5, 0x2f, 0x5e, 0xeb, 0xa3, 0x92, 0x2a, 0xc3, 0xf9, 0x98, 0x04,
+ 0x8a, 0x08, 0x0c, 0x66, 0x3a, 0xf1, 0x19, 0x37, 0xe8, 0x2c, 0x85, 0x73,
+ 0x3f, 0xa9, 0x7a, 0xf7, 0x5d, 0x1c, 0x91, 0x56, 0x1f, 0xcc, 0xc0, 0x24,
+ 0x50, 0x40, 0x63, 0x31, 0xd7, 0x88, 0xc9, 0xbf, 0x41, 0x64, 0x2b, 0x99,
+ 0xfd, 0x4b, 0xd7, 0xba, 0xe8, 0xe4, 0x8a, 0xb0, 0xfe, 0x66, 0x01, 0x22,
+ 0x82, 0x03, 0x19, 0x8e, 0xbc, 0x46, 0x4d, 0xfa, 0x0b, 0x21, 0x5c, 0xcf,
+ 0xea, 0x5e, 0xbd, 0xd7, 0x47, 0x24, 0x55, 0x87, 0xf3, 0x30, 0x09, 0x14,
+ 0x10, 0x18, 0xcc, 0x75, 0xe2, 0x32, 0x6f, 0xd0, 0x59, 0x0a, 0xe6, 0x7f,
+ 0x52, 0xf5, 0xee, 0xba, 0x39, 0x22, 0xac, 0x3f, 0x99, 0x80, 0x48, 0xa0,
+ 0x80, 0xc6, 0x63, 0xaf, 0x11, 0x93, 0x7e, 0x82, 0xc8, 0x57, 0x33, 0xfa,
+ 0x97, 0xaf, 0x75, 0xd1, 0xc9, 0x15, 0x61, 0xfc, 0xcc, 0x02, 0x45, 0x04,
+ 0x06, 0x33, 0x1d, 0x78, 0x8c, 0x9b, 0xf4, 0x16, 0x42, 0xb9, 0x9f, 0xd4,
+ 0xbd, 0x7b, 0xae, 0x8e, 0x48, 0xab, 0x0f, 0xe6, 0x60, 0x12, 0x28, 0x20,
+ 0x31, 0x98, 0xeb, 0xc4, 0x64, 0xdf, 0xa0, 0xb2, 0x15, 0xcc, 0xfe, 0xa5,
+ 0xeb, 0xdd, 0x74, 0x72, 0x45, 0x58, 0x7f, 0x33, 0x00, 0x91, 0x41, 0x01,
+ 0x8c, 0xc7, 0x5e, 0x23, 0x26, 0xfd, 0x05, 0x90, 0xae, 0x67, 0xf5, 0x2f,
+ 0x5e, 0xeb, 0xa3, 0x92, 0x2a, 0xc3, 0xf9, 0x98, 0x04, 0x8a, 0x08, 0x0c,
+ 0x66, 0x3a, 0xf1, 0x19, 0x37, 0xe8, 0x2c, 0x85, 0x73, 0x3f, 0xa9, 0x7a,
+ 0xf7, 0x5d, 0x1c, 0x91, 0x56, 0x1f, 0xcc, 0xc0, 0x24, 0x50, 0x40, 0x63,
+ 0x31, 0xd7, 0x88, 0xc9, 0xbf, 0x41, 0x64, 0x2b, 0x99, 0xfd, 0x4b, 0xd7,
+ 0xba, 0xe8, 0xe4, 0x8a, 0xb0, 0xfe, 0x66, 0x01, 0x22, 0x82, 0x03, 0x19,
+ 0x8e, 0xbc, 0x46, 0x4d, 0xfa, 0x0b, 0x21, 0x5c, 0xcf, 0xea, 0x5e, 0xbd,
+ 0xd7, 0x47, 0x24, 0x55, 0x87, 0xf3, 0x30, 0x09, 0x14, 0x10, 0x18, 0xcc,
+ 0x75, 0xe2, 0x32, 0x6f, 0xd0, 0x59, 0x0a, 0xe6, 0x7f, 0x52, 0xf5, 0xee,
+ 0xba, 0x39, 0x22, 0xac, 0x3f, 0x99, 0x80, 0x48, 0xa0, 0x80, 0xc6, 0x63,
+ 0xaf, 0x11, 0x93, 0x7e, 0x82, 0xc8, 0x57, 0x33, 0xfa, 0x97, 0xaf, 0x75,
+ 0xd1, 0xc9, 0x15, 0x61, 0xfc, 0xcc, 0x02, 0x45, 0x04, 0x06, 0x33, 0x1d,
+ 0x78, 0x8c, 0x9b, 0xf4, 0x16, 0x42, 0xb9, 0x9f, 0xd4, 0xbd, 0x7b, 0xae,
+ 0x8e, 0x48, 0xab, 0x0f, 0xe6, 0x60, 0x12, 0x28, 0x20, 0x31, 0x98, 0xeb,
+ 0xc4, 0x64, 0xdf, 0xa0, 0xb2, 0x15, 0xcc, 0xfe, 0xa5, 0xeb, 0xdd, 0x74,
+ 0x72, 0x45, 0x58, 0x7f, 0x33, 0x00, 0x91, 0x41, 0x01, 0x8c, 0xc7, 0x5e,
+ 0x23, 0x26, 0xfd, 0x05, 0x90, 0xae, 0x67, 0xf5, 0x2f, 0x5e, 0xeb, 0xa3,
+ 0x92, 0x2a, 0xc3, 0xf9, 0x98, 0x04, 0x8a, 0x08, 0x0c, 0x66, 0x3a, 0xf1,
+ 0x19, 0x37, 0xe8, 0x2c, 0x85, 0x73, 0x3f, 0xa9, 0x7a, 0xf7, 0x5d, 0x1c,
+ 0x91, 0x56, 0x1f, 0xcc, 0xc0, 0x24, 0x50, 0x40, 0x63, 0x31, 0xd7, 0x88,
+ 0xc9, 0xbf, 0x41, 0x64, 0x2b, 0x99, 0xfd, 0x4b, 0xd7, 0xba, 0xe8, 0xe4,
+ 0x8a, 0xb0, 0xfe, 0x66, 0x01, 0x22, 0x82, 0x03, 0x19, 0x8e, 0xbc, 0x46,
+ 0x4d, 0xfa, 0x0b, 0x21, 0x5c, 0xcf, 0xea, 0x5e, 0xbd, 0xd7, 0x47, 0x24,
+ 0x55, 0x87, 0xf3, 0x30, 0x09, 0x14, 0x10, 0x18, 0xcc, 0x75, 0xe2, 0x32,
+ 0x6f, 0xd0, 0x59, 0x0a, 0xe6, 0x7f, 0x52, 0xf5, 0xee, 0xba, 0x39, 0x22,
+ 0xac, 0x3f, 0x99, 0x80, 0x48, 0xa0, 0x80, 0xc6, 0x63, 0xaf, 0x11, 0x93,
+ 0x7e, 0x82, 0xc8, 0x57, 0x33, 0xfa, 0x97, 0xaf, 0x75, 0xd1, 0xc9, 0x15,
+ 0x61, 0xfc, 0xcc, 0x02, 0x45, 0x04, 0x06, 0x33, 0x1d, 0x78, 0x8c, 0x9b,
+ 0xf4, 0x16, 0x42, 0xb9, 0x9f, 0xd4, 0xbd, 0x7b, 0xae, 0x8e, 0x48, 0xab,
+ 0x0f, 0xe6, 0x60, 0x12, 0x28, 0x20, 0x31, 0x98, 0xeb, 0xc4, 0x64, 0xdf,
+ 0xa0, 0xb2, 0x15, 0xcc, 0xfe, 0xa5, 0xeb, 0xdd, 0x74, 0x72, 0x45, 0x58,
+ 0x7f, 0x33, 0x00, 0x91, 0x41, 0x01, 0x8c, 0xc7, 0x5e, 0x23, 0x26, 0xfd,
+ 0x05, 0x90, 0xae, 0x67, 0xf5, 0x2f, 0x5e, 0xeb, 0xa3, 0x92, 0x2a, 0xc3,
+ 0xf9, 0x98, 0x04, 0x8a, 0x08, 0x0c, 0x66, 0x3a, 0xf1, 0x19, 0x37, 0xe8,
+ 0x2c, 0x85, 0x73, 0x3f, 0xa9, 0x7a, 0xf7, 0x5d, 0x1c, 0x91, 0x56, 0x1f,
+ 0xcc, 0xc0, 0x24, 0x50, 0x40, 0x63, 0x31, 0xd7, 0x88, 0xc9, 0xbf, 0x41,
+ 0x64, 0x2b, 0x99, 0xfd, 0x4b, 0xd7, 0xba, 0xe8, 0xe4, 0x8a, 0xb0, 0xfe,
+ 0x66, 0x01, 0x22, 0x82, 0x03, 0x19, 0x8e, 0xbc, 0x46, 0x4d, 0xfa, 0x0b,
+ 0x21, 0x5c, 0xcf, 0xea, 0x5e, 0xbd, 0xd7, 0x47, 0x24, 0x55, 0x87, 0xf3,
+ 0x30, 0x09, 0x14, 0x10, 0x18, 0xcc, 0x75, 0xe2, 0x32, 0x6f, 0xd0, 0x59,
+ 0x0a, 0xe6, 0x7f, 0x52, 0xf5, 0xee, 0xba, 0x39, 0x22, 0xac, 0x3f, 0x99,
+ 0x80, 0x48, 0xa0, 0x80, 0xc6, 0x63, 0xaf, 0x11, 0x93, 0x7e, 0x82, 0xc8,
+ 0x57, 0x33, 0xfa, 0x97, 0xaf, 0x75, 0xd1, 0xc9, 0x15, 0x61, 0xfc, 0xcc,
+ 0x02, 0x45, 0x04, 0x06, 0x33, 0x1d, 0x78, 0x8c, 0x9b, 0xf4, 0x16, 0x42,
+ 0xb9, 0x9f, 0xd4, 0xbd, 0x7b, 0xae, 0x8e, 0x48, 0xab, 0x0f, 0xe6, 0x60,
+ 0x12, 0x28, 0x20, 0x31, 0x98, 0xeb, 0xc4, 0x64, 0xdf, 0xa0, 0xb2, 0x15,
+ 0xcc, 0xfe, 0xa5, 0xeb, 0xdd, 0x74, 0x72, 0x45, 0x58, 0x7f, 0x33, 0x00,
+ 0x91, 0x41, 0x01, 0x8c, 0xc7, 0x5e, 0x23, 0x26, 0xfd, 0x05, 0x90, 0xae,
+ 0x67, 0xf5, 0x2f, 0x5e, 0xeb, 0xa3, 0x92, 0x2a, 0xc3, 0xf9, 0x98, 0x04,
+ 0x8a, 0x08, 0x0c, 0x66, 0x3a, 0xf1, 0x19, 0x37, 0xe8, 0x2c, 0x85, 0x73,
+ 0x3f, 0xa9, 0x7a, 0xf7, 0x5d, 0x1c, 0x91, 0x56, 0x1f, 0xcc, 0xc0, 0x24,
+ 0x50, 0x40, 0x63, 0x31, 0xd7, 0x88, 0xc9, 0xbf, 0x41, 0x64, 0x2b, 0x99,
+ 0xfd, 0x4b, 0xd7, 0xba, 0xe8, 0xe4, 0x8a, 0xb0, 0xfe, 0x66, 0x01, 0x22,
+ 0x82, 0x03, 0x19, 0x8e, 0xbc, 0x46, 0x4d, 0xfa, 0x0b, 0x21, 0x5c, 0xcf,
+ 0xea, 0x5e, 0xbd, 0xd7, 0x47, 0x24, 0x55, 0x87, 0xf3, 0x30, 0x09, 0x14,
+ 0x10, 0x18, 0xcc, 0x75, 0xe2, 0x32, 0x6f, 0xd0, 0x59, 0x0a, 0xe6, 0x7f,
+ 0x52, 0xf5, 0xee, 0xba, 0x39, 0x22, 0xac, 0x3f, 0x99, 0x80, 0x48, 0xa0,
+ 0x80, 0xc6, 0x63, 0xaf, 0x11, 0x93, 0x7e, 0x82, 0xc8, 0x57, 0x33, 0xfa,
+ 0x97, 0xaf, 0x75, 0xd1, 0xc9, 0x15, 0x61, 0xfc, 0xcc, 0x02, 0x45, 0x04,
+ 0x06, 0x33, 0x1d, 0x78, 0x8c, 0x9b, 0xf4, 0x16, 0x42, 0xb9, 0x9f, 0xd4,
+ 0xbd, 0x7b, 0xae, 0x8e, 0x48, 0xab, 0x0f, 0xe6, 0x60, 0x12, 0x28, 0x20,
+ 0x31, 0x98, 0xeb, 0xc4, 0x64, 0xdf, 0xa0, 0xb2, 0x15, 0xcc, 0xfe, 0xa5,
+ 0xeb, 0xdd, 0x74, 0x72, 0x45, 0x58, 0x7f, 0x33, 0x00, 0x91, 0x41, 0x01,
+ 0x8c, 0xc7, 0x5e, 0x23, 0x26, 0xfd, 0x05, 0x90, 0xae, 0x67, 0xf5, 0x2f,
+ 0x5e, 0xeb, 0xa3, 0x92, 0x2a, 0xc3, 0xf9, 0x98, 0x04, 0x8a, 0x08, 0x0c,
+ 0x66, 0x3a, 0xf1, 0x19, 0x37, 0xe8, 0x2c, 0x85, 0x73, 0x3f, 0xa9, 0x7a,
+ 0xf7, 0x5d, 0x1c, 0x91, 0x56, 0x1f, 0xcc, 0xc0, 0x24, 0x50, 0x40, 0x63,
+ 0x31, 0xd7, 0x88, 0xc9, 0xbf, 0x41, 0x64, 0x2b, 0x99, 0xfd, 0x4b, 0xd7,
+ 0xba, 0xe8, 0xe4, 0x8a, 0xb0, 0xfe, 0x66, 0x01, 0x22, 0x82, 0x03, 0x19,
+ 0x8e, 0xbc, 0x46, 0x4d, 0xfa, 0x0b, 0x21, 0x5c, 0xcf, 0xea, 0x5e, 0xbd,
+ 0xd7, 0x47, 0x24, 0x55, 0x87, 0xf3, 0x30, 0x09, 0x14, 0x10, 0x18, 0xcc,
+ 0x75, 0xe2, 0x32, 0x6f, 0xd0, 0x59, 0x0a, 0xe6, 0x7f, 0x52, 0xf5, 0xee,
+ 0xba, 0x39, 0x22, 0xac, 0x3f, 0x99, 0x80, 0x48, 0xa0, 0x80, 0xc6, 0x63,
+ 0xaf, 0x11, 0x93, 0x7e, 0x82, 0xc8, 0x57, 0x33, 0xfa, 0x97, 0xaf, 0x75,
+ 0xd1, 0xc9, 0x15, 0x61, 0xfc, 0xcc, 0x02, 0x45, 0x04, 0x06, 0x33, 0x1d,
+ 0x78, 0x8c, 0x9b, 0xf4, 0x16, 0x42, 0xb9, 0x9f, 0xd4, 0xbd, 0x7b, 0xae,
+ 0x8e, 0x48, 0xab, 0x0f, 0xe6, 0x60, 0x12, 0x28, 0x20, 0x31, 0x98, 0xeb,
+ 0xc4, 0x64, 0xdf, 0xa0, 0xb2, 0x15, 0xcc, 0xfe, 0xa5, 0xeb, 0xdd, 0x74,
+ 0x72, 0x45, 0x58, 0x7f, 0x33, 0x00, 0x91, 0x41, 0x01, 0x8c, 0xc7, 0x5e,
+ 0x23, 0x26, 0xfd, 0x05, 0x90, 0xae, 0x67, 0xf5, 0x2f, 0x5e, 0xeb, 0xa3,
+ 0x92, 0x2a, 0xc3, 0xf9, 0x98, 0x04, 0x8a, 0x08, 0x0c, 0x66, 0x3a, 0xf1,
+ 0x19, 0x37, 0xe8, 0x2c, 0x85, 0x73, 0x3f, 0xa9, 0x7a, 0xf7, 0x5d, 0x1c,
+ 0x91, 0x56, 0x1f, 0xcc, 0xc0, 0x24, 0x50, 0x40, 0x63, 0x31, 0xd7, 0x88,
+ 0xc9, 0xbf, 0x41, 0x64, 0x2b, 0x99, 0xfd, 0x4b, 0xd7, 0xba, 0xe8, 0xe4,
+ 0x8a, 0xb0, 0xfe, 0x66, 0x01, 0x22, 0x82, 0x03, 0x19, 0x8e, 0xbc, 0x46,
+ 0x4d, 0xfa, 0x0b, 0x21, 0x5c, 0xcf, 0xea, 0x5e, 0xbd, 0xd7, 0x47, 0x24,
+ 0x55, 0x87, 0xf3, 0x30, 0x09, 0x14, 0x10, 0x18, 0xcc, 0x75, 0xe2, 0x32,
+ 0x6f, 0xd0, 0x59, 0x0a, 0xe6, 0x7f, 0x52, 0xf5, 0xee, 0xba, 0x39, 0x22,
+ 0xac, 0x3f, 0x99, 0x80, 0x48, 0xa0, 0x80, 0xc6, 0x63, 0xaf, 0x11, 0x93,
+ 0x7e, 0x82, 0xc8, 0x57, 0x33, 0xfa, 0x97, 0xaf, 0x75, 0xd1, 0xc9, 0x15,
+ 0x61, 0xfc, 0xcc, 0x02, 0x45, 0x04, 0x06, 0x33, 0x1d, 0x78, 0x8c, 0x9b,
+ 0xf4, 0x16, 0x42, 0xb9, 0x9f, 0xd4, 0xbd, 0x7b, 0xae, 0x8e, 0x48, 0xab,
+ 0x0f, 0xe6, 0x60, 0x12, 0x28, 0x20, 0x31, 0x98, 0xeb, 0xc4, 0x64, 0xdf,
+ 0xa0, 0xb2, 0x15, 0xcc, 0xfe, 0xa5, 0xeb, 0xdd, 0x74, 0x72, 0x45, 0x58,
+ 0x7f, 0x33, 0x00, 0x91, 0x41, 0x01, 0x8c, 0xc7, 0x5e, 0x23, 0x26, 0xfd,
+ 0x05, 0x90, 0xae, 0x67, 0xf5, 0x2f, 0x5e, 0xeb, 0xa3, 0x92, 0x2a, 0xc3,
+ 0xf9, 0x98, 0x04, 0x8a, 0x08, 0x0c, 0x66, 0x3a, 0xf1, 0x19, 0x37, 0xe8,
+ 0x2c, 0x85, 0x73, 0x3f, 0xa9, 0x7a, 0xf7, 0x5d, 0x1c, 0x91, 0x56, 0x1f,
+ 0xcc, 0xc0, 0x24, 0x50, 0x40, 0x63, 0x31, 0xd7, 0x88, 0xc9, 0xbf, 0x41,
+ 0x64, 0x2b, 0x99, 0xfd, 0x4b, 0xd7, 0xba, 0xe8, 0xe4, 0x8a, 0xb0, 0xfe,
+ 0x66, 0x01, 0x22, 0x82, 0x03, 0x19, 0x8e, 0xbc, 0x46, 0x4d, 0xfa, 0x0b,
+ 0x21, 0x5c, 0xcf, 0xea, 0x5e, 0xbd, 0xd7, 0x47, 0x24, 0x55, 0x87, 0xf3,
+ 0x30, 0x09, 0x14, 0x10, 0x18, 0xcc, 0x75, 0xe2, 0x32, 0x6f, 0xd0, 0x59,
+ 0x0a, 0xe6, 0x7f, 0x52, 0xf5, 0xee, 0xba, 0x39, 0x22, 0xac, 0x3f, 0x99,
+ 0x80, 0x48, 0xa0, 0x80, 0xc6, 0x63, 0xaf, 0x11, 0x93, 0x7e, 0x82, 0xc8,
+ 0x57, 0x33, 0xfa, 0x97, 0xaf, 0x75, 0xd1, 0xc9, 0x15, 0x61, 0xfc, 0xcc,
+ 0x02, 0x45, 0x04, 0x06, 0x33, 0x1d, 0x78, 0x8c, 0x9b, 0xf4, 0x16, 0x42,
+ 0xb9, 0x9f, 0xd4, 0xbd, 0x7b, 0xae, 0x8e, 0x48, 0xab, 0x0f, 0xe6, 0x60,
+ 0x12, 0x28, 0x20, 0x31, 0x98, 0xeb, 0xc4, 0x64, 0xdf, 0xa0, 0xb2, 0x15,
+ 0xcc, 0xfe, 0xa5, 0xeb, 0xdd, 0x74, 0x72, 0x45, 0x58, 0x7f, 0x33, 0x00,
+ 0x91, 0x41, 0x01, 0x8c, 0xc7, 0x5e, 0x23, 0x26, 0xfd, 0x05, 0x90, 0xae,
+ 0x67, 0xf5, 0x2f, 0x5e, 0xeb, 0xa3, 0x92, 0x2a, 0xc3, 0xf9, 0x98, 0x04,
+ 0x8a, 0x08, 0x0c, 0x66, 0x3a, 0xf1, 0x19, 0x37, 0xe8, 0x2c, 0x85, 0x73,
+ 0x3f, 0xa9, 0x7a, 0xf7, 0x5d, 0x1c, 0x91, 0x56, 0x1f, 0xcc, 0xc0, 0x24,
+ 0x50, 0x40, 0x63, 0x31, 0xd7, 0x88, 0xc9, 0xbf, 0x41, 0x64, 0x2b, 0x99,
+ 0xfd, 0x4b, 0xd7, 0xba, 0xe8, 0xe4, 0x8a, 0xb0, 0xfe, 0x66, 0x01, 0x22,
+ 0x82, 0x03, 0x19, 0x8e, 0xbc, 0x46, 0x4d, 0xfa, 0x0b, 0x21, 0x5c, 0xcf,
+ 0xea, 0x5e, 0xbd, 0xd7, 0x47, 0x24, 0x55, 0x87, 0xf3, 0x30, 0x09, 0x14,
+ 0x10, 0x18, 0xcc, 0x75, 0xe2, 0x32, 0x6f, 0xd0, 0x59, 0x0a, 0xe6, 0x7f,
+ 0x52, 0xf5, 0xee, 0xba, 0x39, 0x22, 0xac, 0x3f, 0x99, 0x80, 0x48, 0xa0,
+ 0x80, 0xc6, 0x63, 0xaf, 0x11, 0x93, 0x7e, 0x82, 0xc8, 0x57, 0x33, 0xfa,
+ 0x97, 0xaf, 0x75, 0xd1, 0xc9, 0x15, 0x61, 0xfc, 0xcc, 0x02, 0x45, 0x04,
+ 0x06, 0x33, 0x1d, 0x78, 0x8c, 0x9b, 0xf4, 0x16, 0x42, 0xb9, 0x9f, 0xd4,
+ 0xbd, 0x7b, 0xae, 0x8e, 0x48, 0xab, 0x0f, 0xe6, 0x60, 0x12, 0x28, 0x20,
+ 0x31, 0x98, 0xeb, 0xc4, 0x64, 0xdf, 0xa0, 0xb2, 0x15, 0xcc, 0xfe, 0xa5,
+ 0xeb, 0xdd, 0x74, 0x72, 0x45, 0x58, 0x7f, 0x33, 0x00, 0x91, 0x41, 0x01,
+ 0x8c, 0xc7, 0x5e, 0x23, 0x26, 0xfd, 0x05, 0x90, 0xae, 0x67, 0xf5, 0x2f,
+ 0x5e, 0xeb, 0xa3, 0x92, 0x2a, 0xc3, 0xf9, 0x98, 0x04, 0x8a, 0x08, 0x0c,
+ 0x66, 0x3a, 0xf1, 0x19, 0x37, 0xe8, 0x2c, 0x85, 0x73, 0x3f, 0xa9, 0x7a,
+ 0xf7, 0x5d, 0x1c, 0x91, 0x56, 0x1f, 0xcc, 0xc0, 0x24, 0x50, 0x40, 0x63,
+ 0x31, 0xd7, 0x88, 0xc9, 0xbf, 0x41, 0x64, 0x2b, 0x99, 0xfd, 0x4b, 0xd7,
+ 0xba, 0xe8, 0xe4, 0x8a, 0xb0, 0xfe, 0x66, 0x01, 0x22, 0x82, 0x03, 0x19,
+ 0x8e, 0xbc, 0x46, 0x4d, 0xfa, 0x0b, 0x21, 0x5c, 0xcf, 0xea, 0x5e, 0xbd,
+ 0xd7, 0x47, 0x24, 0x55, 0x87, 0xf3, 0x30, 0x09, 0x14, 0x10, 0x18, 0xcc,
+ 0x75, 0xe2, 0x32, 0x6f, 0xd0, 0x59, 0x0a, 0xe6, 0x7f, 0x52, 0xf5, 0xee,
+ 0xba, 0x39, 0x22, 0xac, 0x3f, 0x99, 0x80, 0x48, 0xa0, 0x80, 0xc6, 0x63,
+ 0xaf, 0x11, 0x93, 0x7e, 0x82, 0xc8, 0x57, 0x33, 0xfa, 0x97, 0xaf, 0x75,
+ 0xd1, 0xc9, 0x15, 0x61, 0xfc, 0xcc, 0x02, 0x45, 0x04, 0x06, 0x33, 0x1d,
+ 0x78, 0x8c, 0x9b, 0xf4, 0x16, 0x42, 0xb9, 0x9f, 0xd4, 0xbd, 0x7b, 0xae,
+ 0x8e, 0x48, 0xab, 0x0f, 0xe6, 0x60, 0x12, 0x28, 0x20, 0x31, 0x98, 0xeb,
+ 0xc4, 0x64, 0xdf, 0xa0, 0xb2, 0x15, 0xcc, 0xfe, 0xa5, 0xeb, 0xdd, 0x74,
+ 0x72, 0x45, 0x58, 0x7f, 0x33, 0x00, 0x91, 0x41, 0x01, 0x8c, 0xc7, 0x5e,
+ 0x23, 0x26, 0xfd, 0x05, 0x90, 0xae, 0x67, 0xf5, 0x2f, 0x5e, 0xeb, 0xa3,
+ 0x92, 0x2a, 0xc3, 0xf9, 0x98, 0x04, 0x8a, 0x08, 0x0c, 0x66, 0x3a, 0xf1,
+ 0x19, 0x37, 0xe8, 0x2c, 0x85, 0x73, 0x3f, 0xa9, 0x7a, 0xf7, 0x5d, 0x1c,
+ 0x91, 0x56, 0x1f, 0xcc, 0xc0, 0x24, 0x50, 0x40, 0x63, 0x31, 0xd7, 0x88,
+ 0xc9, 0xbf, 0x41, 0x64, 0x2b, 0x99, 0xfd, 0x4b, 0xd7, 0xba, 0xe8, 0xe4,
+ 0x8a, 0xb0, 0xfe, 0x66, 0x01, 0x22, 0x82, 0x03, 0x19, 0x8e, 0xbc, 0x46,
+ 0x4d, 0xfa, 0x0b, 0x21, 0x5c, 0xcf, 0xea, 0x5e, 0xbd, 0xd7, 0x47, 0x24,
+ 0x55, 0x87, 0xf3, 0x30, 0x09, 0x14, 0x10, 0x18, 0xcc, 0x75, 0xe2, 0x32,
+ 0x6f, 0xd0, 0x59, 0x0a, 0xe6, 0x7f, 0x52, 0xf5, 0xee, 0xba, 0x39, 0x22,
+ 0xac, 0x3f, 0x99, 0x80, 0x48, 0xa0, 0x80, 0xc6, 0x63, 0xaf, 0x11, 0x93,
+ 0x7e, 0x82, 0xc8, 0x57, 0x33, 0xfa, 0x97, 0xaf, 0x75, 0xd1, 0xc9, 0x15,
+ 0x61, 0xfc, 0xcc, 0x02, 0x45, 0x04, 0x06, 0x33, 0x1d, 0x78, 0x8c, 0x9b,
+ 0xf4, 0x16, 0x42, 0xb9, 0x9f, 0xd4, 0xbd, 0x7b, 0xae, 0x8e, 0x48, 0xab,
+ 0x0f, 0xe6, 0x60, 0x12, 0x28, 0x20, 0x31, 0x98, 0xeb, 0xc4, 0x64, 0xdf,
+ 0xa0, 0xb2, 0x15, 0xcc, 0xfe, 0xa5, 0xeb, 0xdd, 0x74, 0x72, 0x45, 0x58,
+ 0x7f, 0x33, 0x00, 0x91, 0x41, 0x01, 0x8c, 0xc7, 0x5e, 0x23, 0x26, 0xfd,
+ 0x05, 0x90, 0xae, 0x67, 0xf5, 0x2f, 0x5e, 0xeb, 0xa3, 0x92, 0x2a, 0xc3,
+ 0xf9, 0x98, 0x04, 0x8a, 0x08, 0x0c, 0x66, 0x3a, 0xf1, 0x19, 0x37, 0xe8,
+ 0x2c, 0x85, 0x73, 0x3f, 0xa9, 0x7a, 0xf7, 0x5d, 0x1c, 0x91, 0x56, 0x1f,
+ 0xcc, 0xc0, 0x24, 0x50, 0x40, 0x63, 0x31, 0xd7, 0x88, 0xc9, 0xbf, 0x41,
+ 0x64, 0x2b, 0x99, 0xfd, 0x4b, 0xd7, 0xba, 0xe8, 0xe4, 0x8a, 0xb0, 0xfe,
+ 0x66, 0x01, 0x22, 0x82, 0x03, 0x19, 0x8e, 0xbc, 0x46, 0x4d, 0xfa, 0x0b,
+ 0x21, 0x5c, 0xcf, 0xea, 0x5e, 0xbd, 0xd7, 0x47, 0x24, 0x55, 0x87, 0xf3,
+ 0x30, 0x09, 0x14, 0x10, 0x18, 0xcc, 0x75, 0xe2, 0x32, 0x6f, 0xd0, 0x59,
+ 0x0a, 0xe6, 0x7f, 0x52, 0xf5, 0xee, 0xba, 0x39, 0x22, 0xac, 0x3f, 0x99,
+ 0x80, 0x48, 0xa0, 0x80, 0xc6, 0x63, 0xaf, 0x11, 0x93, 0x7e, 0x82, 0xc8,
+ 0x57, 0x33, 0xfa, 0x97, 0xaf, 0x75, 0xd1, 0xc9, 0x15, 0x61, 0xfc, 0xcc,
+ 0x02, 0x45, 0x04, 0x06, 0x33, 0x1d, 0x78, 0x8c, 0x9b, 0xf4, 0x16, 0x42,
+ 0xb9, 0x9f, 0xd4, 0xbd, 0x7b, 0xae, 0x8e, 0x48, 0xab, 0x0f, 0xe6, 0x60,
+ 0x12, 0x28, 0x20, 0x31, 0x98, 0xeb, 0xc4, 0x64, 0xdf, 0xa0, 0xb2, 0x15,
+ 0xcc, 0xfe, 0xa5, 0xeb, 0xdd, 0x74, 0x72, 0x45, 0x58, 0x7f, 0x33, 0x00,
+ 0x91, 0x41, 0x01, 0x8c, 0xc7, 0x5e, 0x23, 0x26, 0xfd, 0x05, 0x90, 0xae,
+ 0x67, 0xf5, 0x2f, 0x5e, 0xeb, 0xa3, 0x92, 0x2a, 0xc3, 0xf9, 0x98, 0x04,
+ 0x8a, 0x08, 0x0c, 0x66, 0x3a, 0xf1, 0x19, 0x37, 0xe8, 0x2c, 0x85, 0x73,
+ 0x3f, 0xa9, 0x7a, 0xf7, 0x5d, 0x1c, 0x91, 0x56, 0x1f, 0xcc, 0xc0, 0x24,
+ 0x50, 0x40, 0x63, 0x31, 0xd7, 0x88, 0xc9, 0xbf, 0x41, 0x64, 0x2b, 0x99,
+ 0xfd, 0x4b, 0xd7, 0xba, 0xe8, 0xe4, 0x8a, 0xb0, 0xfe, 0x66, 0x01, 0x22,
+ 0x82, 0x03, 0x19, 0x8e, 0xbc, 0x46, 0x4d, 0xfa, 0x0b, 0x21, 0x5c, 0xcf,
+ 0xea, 0x5e, 0xbd, 0xd7, 0x47, 0x24, 0x55, 0x87, 0xf3, 0x30, 0x09, 0x14,
+ 0x10, 0x18, 0xcc, 0x75, 0xe2, 0x32, 0x6f, 0xd0, 0x59, 0x0a, 0xe6, 0x7f,
+ 0x52, 0xf5, 0xee, 0xba, 0x39, 0x22, 0xac, 0x3f, 0x99, 0x80, 0x48, 0xa0,
+ 0x80, 0xc6, 0x63, 0xaf, 0x11, 0x93, 0x7e, 0x82, 0xc8, 0x57, 0x33, 0xfa,
+ 0x97, 0xaf, 0x75, 0xd1, 0xc9, 0x15, 0x61, 0xfc, 0xcc, 0x02, 0x45, 0x04,
+ 0x06, 0x33, 0x1d, 0x78, 0x8c, 0x9b, 0xf4, 0x16, 0x42, 0xb9, 0x9f, 0xd4,
+ 0xbd, 0x7b, 0xae, 0x8e, 0x48, 0xab, 0x0f, 0xe6, 0x60, 0x12, 0x28, 0x20,
+ 0x31, 0x98, 0xeb, 0xc4, 0x64, 0xdf, 0xa0, 0xb2, 0x15, 0xcc, 0xfe, 0xa5,
+ 0xeb, 0xdd, 0x74, 0x72, 0x45, 0x58, 0x7f, 0x33, 0x00, 0x91, 0x41, 0x01,
+ 0x8c, 0xc7, 0x5e, 0x23, 0x26, 0xfd, 0x05, 0x90, 0xae, 0x67, 0xf5, 0x2f,
+ 0x5e, 0xeb, 0xa3, 0x92, 0x2a, 0xc3, 0xf9, 0x98, 0x04, 0x8a, 0x08, 0x0c,
+ 0x66, 0x3a, 0xf1, 0x19, 0x37, 0xe8, 0x2c, 0x85, 0x73, 0x3f, 0xa9, 0x7a,
+ 0xf7, 0x5d, 0x1c, 0x91, 0x56, 0x1f, 0xcc, 0xc0, 0x24, 0x50, 0x40, 0x63,
+ 0x31, 0xd7, 0x88, 0xc9, 0xbf, 0x41, 0x64, 0x2b, 0x99, 0xfd, 0x4b, 0xd7,
+ 0xba, 0xe8, 0xe4, 0x8a, 0xb0, 0xfe, 0x66, 0x01, 0x22, 0x82, 0x03, 0x19,
+ 0x8e, 0xbc, 0x46, 0x4d, 0xfa, 0x0b, 0x21, 0x5c, 0xcf, 0xea, 0x5e, 0xbd,
+ 0xd7, 0x47, 0x24, 0x55, 0x87, 0xf3, 0x30, 0x09, 0x14, 0x10, 0x18, 0xcc,
+ 0x75, 0xe2, 0x32, 0x6f, 0xd0, 0x59, 0x0a, 0xe6, 0x7f, 0x52, 0xf5, 0xee,
+ 0xba, 0x39, 0x22, 0xac, 0x3f, 0x99, 0x80, 0x48, 0xa0, 0x80, 0xc6, 0x63,
+ 0xaf, 0x11, 0x93, 0x7e, 0x82, 0xc8, 0x57, 0x33, 0xfa, 0x97, 0xaf, 0x75,
+ 0xd1, 0xc9, 0x15, 0x61, 0xfc, 0xcc, 0x02, 0x45, 0x04, 0x06, 0x33, 0x1d,
+ 0x78, 0x8c, 0x9b, 0xf4, 0x16, 0x42, 0xb9, 0x9f, 0xd4, 0xbd, 0x7b, 0xae,
+ 0x8e, 0x48, 0xab, 0x0f, 0xe6, 0x60, 0x12, 0x28, 0x20, 0x31, 0x98, 0xeb,
+ 0xc4, 0x64, 0xdf, 0xa0, 0xb2, 0x15, 0xcc, 0xfe, 0xa5, 0xeb, 0xdd, 0x74,
+ 0x72, 0x45, 0x58, 0x7f, 0x33, 0x00, 0x91, 0x41, 0x01, 0x8c, 0xc7, 0x5e,
+ 0x23, 0x26, 0xfd, 0x05, 0x90, 0xae, 0x67, 0xf5, 0x2f, 0x5e, 0xeb, 0xa3,
+ 0x92, 0x2a, 0xc3, 0xf9, 0x98, 0x04, 0x8a, 0x08, 0x0c, 0x66, 0x3a, 0xf1,
+ 0x19, 0x37, 0xe8, 0x2c, 0x85, 0x73, 0x3f, 0xa9, 0x7a, 0xf7, 0x5d, 0x1c,
+ 0x91, 0x56, 0x1f, 0xcc, 0xc0, 0x24, 0x50, 0x40, 0x63, 0x31, 0xd7, 0x88,
+ 0xc9, 0xbf, 0x41, 0x64, 0x2b, 0x99, 0xfd, 0x4b, 0xd7, 0xba, 0xe8, 0xe4,
+ 0x8a, 0xb0, 0xfe, 0x66, 0x01, 0x22, 0x82, 0x03, 0x19, 0x8e, 0xbc, 0x46,
+ 0x4d, 0xfa, 0x0b, 0x21, 0x5c, 0xcf, 0xea, 0x5e, 0xbd, 0xd7, 0x47, 0x24,
+ 0x55, 0x87, 0xf3, 0x30, 0x09, 0x14, 0x10, 0x18, 0xcc, 0x75, 0xe2, 0x32,
+ 0x6f, 0xd0, 0x59, 0x0a, 0xe6, 0x7f, 0x52, 0xf5, 0xee, 0xba, 0x39, 0x22,
+ 0xac, 0x3f, 0x99, 0x80, 0x48, 0xa0, 0x80, 0xc6, 0x63, 0xaf, 0x11, 0x93,
+ 0x7e, 0x82, 0xc8, 0x57, 0x33, 0xfa, 0x97, 0xaf, 0x75, 0xd1, 0xc9, 0x15,
+ 0x61, 0xfc, 0xcc, 0x02, 0x45, 0x04, 0x06, 0x33, 0x1d, 0x78, 0x8c, 0x9b,
+ 0xf4, 0x16, 0x42, 0xb9, 0x9f, 0xd4, 0xbd, 0x7b, 0xae, 0x8e, 0x48, 0xab,
+ 0x0f, 0xe6, 0x60, 0x12, 0x28, 0x20, 0x31, 0x98, 0xeb, 0xc4, 0x64, 0xdf,
+ 0xa0, 0xb2, 0x15, 0xcc, 0xfe, 0xa5, 0xeb, 0xdd, 0x74, 0x72, 0x45, 0x58,
+ 0x7f, 0x33, 0x00, 0x91, 0x41, 0x01, 0x8c, 0xc7, 0x5e, 0x23, 0x26, 0xfd,
+ 0x05, 0x90, 0xae, 0x67, 0xf5, 0x2f, 0x5e, 0xeb, 0xa3, 0x92, 0x2a, 0xc3,
+ 0xf9, 0x98, 0x04, 0x8a, 0x08, 0x0c, 0x66, 0x3a, 0xf1, 0x19, 0x37, 0xe8,
+ 0x2c, 0x85, 0x73, 0x3f, 0xa9, 0x7a, 0xf7, 0x5d, 0x1c, 0x91, 0x56, 0x1f,
+ 0xcc, 0xc0, 0x24, 0x50, 0x40, 0x63, 0x31, 0xd7, 0x88, 0xc9, 0xbf, 0x41,
+ 0x64, 0x2b, 0x99, 0xfd, 0x4b, 0xd7, 0xba, 0xe8, 0xe4, 0x8a, 0xb0, 0xfe,
+ 0x66, 0x01, 0x22, 0x82, 0x03, 0x19, 0x8e, 0xbc, 0x46, 0x4d, 0xfa, 0x0b,
+ 0x21, 0x5c, 0xcf, 0xea, 0x5e, 0xbd, 0xd7, 0x47, 0x24, 0x55, 0x87, 0xf3,
+ 0x30, 0x09, 0x14, 0x10, 0x18, 0xcc, 0x75, 0xe2, 0x32, 0x6f, 0xd0, 0x59,
+ 0x0a, 0xe6, 0x7f, 0x52, 0xf5, 0xee, 0xba, 0x39, 0x22, 0xac, 0x3f, 0x99,
+ 0x80, 0x48, 0xa0, 0x80, 0xc6, 0x63, 0xaf, 0x11, 0x93, 0x7e, 0x82, 0xc8,
+ 0x57, 0x33, 0xfa, 0x97, 0xaf, 0x75, 0xd1, 0xc9, 0x15, 0x61, 0xfc, 0xcc,
+ 0x02, 0x45, 0x04, 0x06, 0x33, 0x1d, 0x78, 0x8c, 0x9b, 0xf4, 0x16, 0x42,
+ 0xb9, 0x9f, 0xd4, 0xbd, 0x7b, 0xae, 0x8e, 0x48, 0xab, 0x0f, 0xe6, 0x60,
+ 0x12, 0x28, 0x20, 0x31, 0x98, 0xeb, 0xc4, 0x64, 0xdf, 0xa0, 0xb2, 0x15,
+ 0xcc, 0xfe, 0xa5, 0xeb, 0xdd, 0x74, 0x72, 0x45, 0x58, 0x7f, 0x33, 0x00,
+ 0x91, 0x41, 0x01, 0x8c, 0xc7, 0x5e, 0x23, 0x26, 0xfd, 0x05, 0x90, 0xae,
+ 0x67, 0xf5, 0x2f, 0x5e, 0xeb, 0xa3, 0x92, 0x2a, 0xc3, 0xf9, 0x98, 0x04,
+ 0x8a, 0x08, 0x0c, 0x66, 0x3a, 0xf1, 0x19, 0x37, 0xe8, 0x2c, 0x85, 0x73,
+ 0x3f, 0xa9, 0x7a, 0xf7, 0x5d, 0x1c, 0x91, 0x56, 0x1f, 0xcc, 0xc0, 0x24,
+ 0x50, 0x40, 0x63, 0x31, 0xd7, 0x88, 0xc9, 0xbf, 0x41, 0x64, 0x2b, 0x99,
+ 0xfd, 0x4b, 0xd7, 0xba, 0xe8, 0xe4, 0x8a, 0xb0, 0xfe, 0x66, 0x01, 0x22,
+ 0x82, 0x03, 0x19, 0x8e, 0xbc, 0x46, 0x4d, 0xfa, 0x0b, 0x21, 0x5c, 0xcf,
+ 0xea, 0x5e, 0xbd, 0xd7, 0x47, 0x24, 0x55, 0x87, 0xf3, 0x30, 0x09, 0x14,
+ 0x10, 0x18, 0xcc, 0x75, 0xe2, 0x32, 0x6f, 0xd0, 0x59, 0x0a, 0xe6, 0x7f,
+ 0x52, 0xf5, 0xee, 0xba, 0x39, 0x22, 0xac, 0x3f, 0x99, 0x80, 0x48, 0xa0,
+ 0x80, 0xc6, 0x63, 0xaf, 0x11, 0x93, 0x7e, 0x82, 0xc8, 0x57, 0x33, 0xfa,
+ 0x97, 0xaf, 0x75, 0xd1, 0xc9, 0x15, 0x61, 0xfc, 0xcc, 0x02, 0x45, 0x04,
+ 0x06, 0x33, 0x1d, 0x78, 0x8c, 0x9b, 0xf4, 0x16, 0x42, 0xb9, 0x9f, 0xd4,
+ 0xbd, 0x7b, 0xae, 0x8e, 0x48, 0xab, 0x0f, 0xe6, 0x60, 0x12, 0x28, 0x20,
+ 0x31, 0x98, 0xeb, 0xc4, 0x64, 0xdf, 0xa0, 0xb2, 0x15, 0xcc, 0xfe, 0xa5,
+ 0xeb, 0xdd, 0x74, 0x72, 0x45, 0x58, 0x7f, 0x33, 0x00, 0x91, 0x41, 0x01,
+ 0x8c, 0xc7, 0x5e, 0x23, 0x26, 0xfd, 0x05, 0x90, 0xae, 0x67, 0xf5, 0x2f,
+ 0x5e, 0xeb, 0xa3, 0x92, 0x2a, 0xc3, 0xf9, 0x98, 0x04, 0x8a, 0x08, 0x0c,
+ 0x66, 0x3a, 0xf1, 0x19, 0x37, 0xe8, 0x2c, 0x85, 0x73, 0x3f, 0xa9, 0x7a,
+ 0xf7, 0x5d, 0x1c, 0x91, 0x56, 0x1f, 0xcc, 0xc0, 0x24, 0x50, 0x40, 0x63,
+ 0x31, 0xd7, 0x88, 0xc9, 0xbf, 0x41, 0x64, 0x2b, 0x99, 0xfd, 0x4b, 0xd7,
+ 0xba, 0xe8, 0xe4, 0x8a, 0xb0, 0xfe, 0x66, 0x01, 0x22, 0x82, 0x03, 0x19,
+ 0x8e, 0xbc, 0x46, 0x4d, 0xfa, 0x0b, 0x21, 0x5c, 0xcf, 0xea, 0x5e, 0xbd,
+ 0xd7, 0x47, 0x24, 0x55, 0x87, 0xf3, 0x30, 0x09, 0x14, 0x10, 0x18, 0xcc,
+ 0x75, 0xe2, 0x32, 0x6f, 0xd0, 0x59, 0x0a, 0xe6, 0x7f, 0x52, 0xf5, 0xee,
+ 0xba, 0x39, 0x22, 0xac, 0x3f, 0x99, 0x80, 0x48, 0xa0, 0x80, 0xc6, 0x63,
+ 0xaf, 0x11, 0x93, 0x7e, 0x82, 0xc8, 0x57, 0x33, 0xfa, 0x97, 0xaf, 0x75,
+ 0xd1, 0xc9, 0x15, 0x61, 0xfc, 0xcc, 0x02, 0x45, 0x04, 0x06, 0x33, 0x1d,
+ 0x78, 0x8c, 0x9b, 0xf4, 0x16, 0x42, 0xb9, 0x9f, 0xd4, 0xbd, 0x7b, 0xae,
+ 0x8e, 0x48, 0xab, 0x0f, 0xe6, 0x60, 0x12, 0x28, 0x20, 0x31, 0x98, 0xeb,
+ 0xc4, 0x64, 0xdf, 0xa0, 0xb2, 0x15, 0xcc, 0xfe, 0xa5, 0xeb, 0xdd, 0x74,
+ 0x72, 0x45, 0x58, 0x7f, 0x33, 0x00, 0x91, 0x41, 0x01, 0x8c, 0xc7, 0x5e,
+ 0x23, 0x26, 0xfd, 0x05, 0x90, 0xae, 0x67, 0xf5, 0x2f, 0x5e, 0xeb, 0xa3,
+ 0x92, 0x2a, 0xc3, 0xf9, 0x98, 0x04, 0x8a, 0x08, 0x0c, 0x66, 0x3a, 0xf1,
+ 0x19, 0x37, 0xe8, 0x2c, 0x85, 0x73, 0x3f, 0xa9, 0x7a, 0xf7, 0x5d, 0x1c,
+ 0x91, 0x56, 0x1f, 0xcc, 0xc0, 0x24, 0x50, 0x40, 0x63, 0x31, 0xd7, 0x88,
+ 0xc9, 0xbf, 0x41, 0x64, 0x2b, 0x99, 0xfd, 0x4b, 0xd7, 0xba, 0xe8, 0xe4,
+ 0x8a, 0xb0, 0xfe, 0x66, 0x01, 0x22, 0x82, 0x03, 0x19, 0x8e, 0xbc, 0x46,
+ 0x4d, 0xfa, 0x0b, 0x21, 0x5c, 0xcf, 0xea, 0x5e, 0xbd, 0xd7, 0x47, 0x24,
+ 0x55, 0x87, 0xf3, 0x30, 0x09, 0x14, 0x10, 0x18, 0xcc, 0x75, 0xe2, 0x32,
+ 0x6f, 0xd0, 0x59, 0x0a, 0xe6, 0x7f, 0x52, 0xf5, 0xee, 0xba, 0x39, 0x22,
+ 0xac, 0x3f, 0x99, 0x80, 0x48, 0xa0, 0x80, 0xc6, 0x63, 0xaf, 0x11, 0x93,
+ 0x7e, 0x82, 0xc8, 0x57, 0x33, 0xfa, 0x97, 0xaf, 0x75, 0xd1, 0xc9, 0x15,
+ 0x61, 0xfc, 0xcc, 0x02, 0x45, 0x04, 0x06, 0x33, 0x1d, 0x78, 0x8c, 0x9b,
+ 0xf4, 0x16, 0x42, 0xb9, 0x9f, 0xd4, 0xbd, 0x7b, 0xae, 0x8e, 0x48, 0xab,
+ 0x0f, 0xe6, 0x60, 0x12, 0x28, 0x20, 0x31, 0x98, 0xeb, 0xc4, 0x64, 0xdf,
+ 0xa0, 0xb2, 0x15, 0xcc, 0xfe, 0xa5, 0xeb, 0xdd, 0x74, 0x72, 0x45, 0x58,
+ 0x7f, 0x33, 0x00, 0x91, 0x41, 0x01, 0x8c, 0xc7, 0x5e, 0x23, 0x26, 0xfd,
+ 0x05, 0x90, 0xae, 0x67, 0xf5, 0x2f, 0x5e, 0xeb, 0xa3, 0x92, 0x2a, 0xc3,
+ 0xf9, 0x98, 0x04, 0x8a, 0x08, 0x0c, 0x66, 0x3a, 0xf1, 0x19, 0x37, 0xe8,
+ 0x2c, 0x85, 0x73, 0x3f, 0xa9, 0x7a, 0xf7, 0x5d, 0x1c, 0x91, 0x56, 0x1f,
+ 0xcc, 0xc0, 0x24, 0x50, 0x40, 0x63, 0x31, 0xd7, 0x88, 0xc9, 0xbf, 0x41,
+ 0x64, 0x2b, 0x99, 0xfd, 0x4b, 0xd7, 0xba, 0xe8, 0xe4, 0x8a, 0xb0, 0xfe,
+ 0x66, 0x01, 0x22, 0x82, 0x03, 0x19, 0x8e, 0xbc, 0x46, 0x4d, 0xfa, 0x0b,
+ 0x21, 0x5c, 0xcf, 0xea, 0x5e, 0xbd, 0xd7, 0x47, 0x24, 0x55, 0x87, 0xf3,
+ 0x30, 0x09, 0x14, 0x10, 0x18, 0xcc, 0x75, 0xe2, 0x32, 0x6f, 0xd0, 0x59,
+ 0x0a, 0xe6, 0x7f, 0x52, 0xf5, 0xee, 0xba, 0x39, 0x22, 0xac, 0x3f, 0x99,
+ 0x80, 0x48, 0xa0, 0x80, 0xc6, 0x63, 0xaf, 0x11, 0x93, 0x7e, 0x82, 0xc8,
+ 0x57, 0x33, 0xfa, 0x97, 0xaf, 0x75, 0xd1, 0xc9, 0x15, 0x61, 0xfc, 0xcc,
+ 0x02, 0x45, 0x04, 0x06, 0x33, 0x1d, 0x78, 0x8c, 0x9b, 0xf4, 0x16, 0x42,
+ 0xb9, 0x9f, 0xd4, 0xbd, 0x7b, 0xae, 0x8e, 0x48, 0xab, 0x0f, 0xe6, 0x60,
+ 0x12, 0x28, 0x20, 0x31, 0x98, 0xeb, 0xc4, 0x64, 0xdf, 0xa0, 0xb2, 0x15,
+ 0xcc, 0xfe, 0xa5, 0xeb, 0xdd, 0x74, 0x72, 0x45, 0x58, 0x7f, 0x33, 0x00,
+ 0x91, 0x41, 0x01, 0x8c, 0xc7, 0x5e, 0x23, 0x26, 0xfd, 0x05, 0x90, 0xae,
+ 0x67, 0xf5, 0x2f, 0x5e, 0xeb, 0xa3, 0x92, 0x2a, 0xc3, 0xf9, 0x98, 0x04,
+ 0x8a, 0x08, 0x0c, 0x66, 0x3a, 0xf1, 0x19, 0x37, 0xe8, 0x2c, 0x85, 0x73,
+ 0x3f, 0xa9, 0x7a, 0xf7, 0x5d, 0x1c, 0x91, 0x56, 0x1f, 0xcc, 0xc0, 0x24,
+ 0x50, 0x40, 0x63, 0x31, 0xd7, 0x88, 0xc9, 0xbf, 0x41, 0x64, 0x2b, 0x99,
+ 0xfd, 0x4b, 0xd7, 0xba, 0xe8, 0xe4, 0x8a, 0xb0, 0xfe, 0x66, 0x01, 0x22,
+ 0x82, 0x03, 0x19, 0x8e, 0xbc, 0x46, 0x4d, 0xfa, 0x0b, 0x21, 0x5c, 0xcf,
+ 0xea, 0x5e, 0xbd, 0xd7, 0x47, 0x24, 0x55, 0x87, 0xf3, 0x30, 0x09, 0x14,
+ 0x10, 0x18, 0xcc, 0x75, 0xe2, 0x32, 0x6f, 0xd0, 0x59, 0x0a, 0xe6, 0x7f,
+ 0x52, 0xf5, 0xee, 0xba, 0x39, 0x22, 0xac, 0x3f, 0x99, 0x80, 0x48, 0xa0,
+ 0x80, 0xc6, 0x63, 0xaf, 0x11, 0x93, 0x7e, 0x82, 0xc8, 0x57, 0x33, 0xfa,
+ 0x97, 0xaf, 0x75, 0xd1, 0xc9, 0x15, 0x61, 0xfc, 0xcc, 0x02, 0x45, 0x04,
+ 0x06, 0x33, 0x1d, 0x78, 0x8c, 0x9b, 0xf4, 0x16, 0x42, 0xb9, 0x9f, 0xd4,
+ 0xbd, 0x7b, 0xae, 0x8e, 0x48, 0xab, 0x0f, 0xe6, 0x60, 0x12, 0x28, 0x20,
+ 0x31, 0x98, 0xeb, 0xc4, 0x64, 0xdf, 0xa0, 0xb2, 0x15, 0xcc, 0xfe, 0xa5,
+ 0xeb, 0xdd, 0x74, 0x72, 0x45, 0x58, 0x7f, 0x33, 0x00, 0x91, 0x41, 0x01,
+ 0x8c, 0xc7, 0x5e, 0x23, 0x26, 0xfd, 0x05, 0x90, 0xae, 0x67, 0xf5, 0x2f,
+ 0x5e, 0xeb, 0xa3, 0x92, 0x2a, 0xc3, 0xf9, 0x98, 0x04, 0x8a, 0x08, 0x0c,
+ 0x66, 0x3a, 0xf1, 0x19, 0x37, 0xe8, 0x2c, 0x85, 0x73, 0x3f, 0xa9, 0x7a,
+ 0xf7, 0x5d, 0x1c, 0x91, 0x56, 0x1f, 0xcc, 0xc0, 0x24, 0x50, 0x40, 0x63,
+ 0x31, 0xd7, 0x88, 0xc9, 0xbf, 0x41, 0x64, 0x2b, 0x99, 0xfd, 0x4b, 0xd7,
+ 0xba, 0xe8, 0xe4, 0x8a, 0xb0, 0xfe, 0x66, 0x01, 0x22, 0x82, 0x03, 0x19,
+ 0x8e, 0xbc, 0x46, 0x4d, 0xfa, 0x0b, 0x21, 0x5c, 0xcf, 0xea, 0x5e, 0xbd,
+ 0xd7, 0x47, 0x24, 0x55, 0x87, 0xf3, 0x30, 0x09, 0x14, 0x10, 0x18, 0xcc,
+ 0x75, 0xe2, 0x32, 0x6f, 0xd0, 0x59, 0x0a, 0xe6, 0x7f, 0x52, 0xf5, 0xee,
+ 0xba, 0x39, 0x22, 0xac, 0x3f, 0x99, 0x80, 0x48, 0xa0, 0x80, 0xc6, 0x63,
+ 0xaf, 0x11, 0x93, 0x7e, 0x82, 0xc8, 0x57, 0x33, 0xfa, 0x97, 0xaf, 0x75,
+ 0xd1, 0xc9, 0x15, 0x61, 0xfc, 0xcc, 0x02, 0x45, 0x04, 0x06, 0x33, 0x1d,
+ 0x78, 0x8c, 0x9b, 0xf4, 0x16, 0x42, 0xb9, 0x9f, 0xd4, 0xbd, 0x7b, 0xae,
+ 0x8e, 0x48, 0xab, 0x0f, 0xe6, 0x60, 0x12, 0x28, 0x20, 0x31, 0x98, 0xeb,
+ 0xc4, 0x64, 0xdf, 0xa0, 0xb2, 0x15, 0xcc, 0xfe, 0xa5, 0xeb, 0xdd, 0x74,
+ 0x72, 0x45, 0x58, 0x7f, 0x33, 0x00, 0x91, 0x41, 0x01, 0x8c, 0xc7, 0x5e,
+ 0x23, 0x26, 0xfd, 0x05, 0x90, 0xae, 0x67, 0xf5, 0x2f, 0x5e, 0xeb, 0xa3,
+ 0x92, 0x2a, 0xc3, 0xf9, 0x98, 0x04, 0x8a, 0x08, 0x0c, 0x66, 0x3a, 0xf1,
+ 0x19, 0x37, 0xe8, 0x2c, 0x85, 0x73, 0x3f, 0xa9, 0x7a, 0xf7, 0x5d, 0x1c,
+ 0x91, 0x56, 0x1f, 0xcc, 0xc0, 0x24, 0x50, 0x40, 0x63, 0x31, 0xd7, 0x88,
+ 0xc9, 0xbf, 0x41, 0x64, 0x2b, 0x99, 0xfd, 0x4b, 0xd7, 0xba, 0xe8, 0xe4,
+ 0x8a, 0xb0, 0xfe, 0x66, 0x01, 0x22, 0x82, 0x03, 0x19, 0x8e, 0xbc, 0x46,
+ 0x4d, 0xfa, 0x0b, 0x21, 0x5c, 0xcf, 0xea, 0x5e, 0xbd, 0xd7, 0x47, 0x24,
+ 0x55, 0x87, 0xf3, 0x30, 0x09, 0x14, 0x10, 0x18, 0xcc, 0x75, 0xe2, 0x32,
+ 0x6f, 0xd0, 0x59, 0x0a, 0xe6, 0x7f, 0x52, 0xf5, 0xee, 0xba, 0x39, 0x22,
+ 0xac, 0x3f, 0x99, 0x80, 0x48, 0xa0, 0x80, 0xc6, 0x63, 0xaf, 0x11, 0x93,
+ 0x7e, 0x82, 0xc8, 0x57, 0x33, 0xfa, 0x97, 0xaf, 0x75, 0xd1, 0xc9, 0x15,
+ 0x61, 0xfc, 0xcc, 0x02, 0x45, 0x04, 0x06, 0x33, 0x1d, 0x78, 0x8c, 0x9b,
+ 0xf4, 0x16, 0x42, 0xb9, 0x9f, 0xd4, 0xbd, 0x7b, 0xae, 0x8e, 0x48, 0xab,
+ 0x0f, 0xe6, 0x60, 0x12, 0x28, 0x20, 0x31, 0x98, 0xeb, 0xc4, 0x64, 0xdf,
+ 0xa0, 0xb2, 0x15, 0xcc, 0xfe, 0xa5, 0xeb, 0xdd, 0x74, 0x72, 0x45, 0x58,
+ 0x7f, 0x33, 0x00, 0x91, 0x41, 0x01, 0x8c, 0xc7, 0x5e, 0x23, 0x26, 0xfd,
+ 0x05, 0x90, 0xae, 0x67, 0xf5, 0x2f, 0x5e, 0xeb, 0xa3, 0x92, 0x2a, 0xc3,
+ 0xf9, 0x98, 0x04, 0x8a, 0x08, 0x0c, 0x66, 0x3a, 0xf1, 0x19, 0x37, 0xe8,
+ 0x2c, 0x85, 0x73, 0x3f, 0xa9, 0x7a, 0xf7, 0x5d, 0x1c, 0x91, 0x56, 0x1f,
+ 0xcc, 0xc0, 0x24, 0x50, 0x40, 0x63, 0x31, 0xd7, 0x88, 0xc9, 0xbf, 0x41,
+ 0x64, 0x2b, 0x99, 0xfd, 0x4b, 0xd7, 0xba, 0xe8, 0xe4, 0x8a, 0xb0, 0xfe,
+ 0x66, 0x01, 0x22, 0x82, 0x03, 0x19, 0x8e, 0xbc, 0x46, 0x4d, 0xfa, 0x0b,
+ 0x21, 0x5c, 0xcf, 0xea, 0x5e, 0xbd, 0xd7, 0x47, 0x24, 0x55, 0x87, 0xf3,
+ 0x30, 0x09, 0x14, 0x10, 0x18, 0xcc, 0x75, 0xe2, 0x32, 0x6f, 0xd0, 0x59,
+ 0x0a, 0xe6, 0x7f, 0x52, 0xf5, 0xee, 0xba, 0x39, 0x22, 0xac, 0x3f, 0x99,
+ 0x80, 0x48, 0xa0, 0x80, 0xc6, 0x63, 0xaf, 0x11, 0x93, 0x7e, 0x82, 0xc8,
+ 0x57, 0x33, 0xfa, 0x97, 0xaf, 0x75, 0xd1, 0xc9, 0x15, 0x61, 0xfc, 0xcc,
+ 0x02, 0x45, 0x04, 0x06, 0x33, 0x1d, 0x78, 0x8c, 0x9b, 0xf4, 0x16, 0x42,
+ 0xb9, 0x9f, 0xd4, 0xbd, 0x7b, 0xae, 0x8e, 0x48, 0xab, 0x0f, 0xe6, 0x60,
+ 0x12, 0x28, 0x20, 0x31, 0x98, 0xeb, 0xc4, 0x64, 0xdf, 0xa0, 0xb2, 0x15,
+ 0xcc, 0xfe, 0xa5, 0xeb, 0xdd, 0x74, 0x72, 0x45, 0x58, 0x7f, 0x33, 0x00,
+ 0x91, 0x41, 0x01, 0x8c, 0xc7, 0x5e, 0x23, 0x26, 0xfd, 0x05, 0x90, 0xae,
+ 0x67, 0xf5, 0x2f, 0x5e, 0xeb, 0xa3, 0x92, 0x2a, 0xc3, 0xf9, 0x98, 0x04,
+ 0x8a, 0x08, 0x0c, 0x66, 0x3a, 0xf1, 0x19, 0x37, 0xe8, 0x2c, 0x85, 0x73,
+ 0x3f, 0xa9, 0x7a, 0xf7, 0x5d, 0x1c, 0x91, 0x56, 0x1f, 0xcc, 0xc0, 0x24,
+ 0x50, 0x40, 0x63, 0x31, 0xd7, 0x88, 0xc9, 0xbf, 0x41, 0x64, 0x2b, 0x99,
+ 0xfd, 0x4b, 0xd7, 0xba, 0xe8, 0xe4, 0x8a, 0xb0, 0xfe, 0x66, 0x01, 0x22,
+ 0x82, 0x03, 0x19, 0x8e, 0xbc, 0x46, 0x4d, 0xfa, 0x0b, 0x21, 0x5c, 0xcf,
+ 0xea, 0x5e, 0xbd, 0xd7, 0x47, 0x24, 0x55, 0x87, 0xf3, 0x30, 0x09, 0x14,
+ 0x10, 0x18, 0xcc, 0x75, 0xe2, 0x32, 0x6f, 0xd0, 0x59, 0x0a, 0xe6, 0x7f,
+ 0x52, 0xf5, 0xee, 0xba, 0x39, 0x22, 0xac, 0x3f, 0x99, 0x80, 0x48, 0xa0,
+ 0x80, 0xc6, 0x63, 0xaf, 0x11, 0x93, 0x7e, 0x82, 0xc8, 0x57, 0x33, 0xfa,
+ 0x97, 0xaf, 0x75, 0xd1, 0xc9, 0x15, 0x61, 0xfc, 0xcc, 0x02, 0x45, 0x04,
+ 0x06, 0x33, 0x1d, 0x78, 0x8c, 0x9b, 0xf4, 0x16, 0x42, 0xb9, 0x9f, 0xd4,
+ 0xbd, 0x7b, 0xae, 0x8e, 0x48, 0xab, 0x0f, 0xe6, 0x60, 0x12, 0x28, 0x20,
+ 0x31, 0x98, 0xeb, 0xc4, 0x64, 0xdf, 0xa0, 0xb2, 0x15, 0xcc, 0xfe, 0xa5,
+ 0xeb, 0xdd, 0x74, 0x72, 0x45, 0x58, 0x7f, 0x33, 0x00, 0x91, 0x41, 0x01,
+ 0x8c, 0xc7, 0x5e, 0x23, 0x26, 0xfd, 0x05, 0x90, 0xae, 0x67, 0xf5, 0x2f,
+ 0x5e, 0xeb, 0xa3, 0x92, 0x2a, 0xc3, 0xf9, 0x98, 0x04, 0x8a, 0x08, 0x0c,
+ 0x66, 0x3a, 0xf1, 0x19, 0x37, 0xe8, 0x2c, 0x85, 0x73, 0x3f, 0xa9, 0x7a,
+ 0xf7, 0x5d, 0x1c, 0x91, 0x56, 0x1f, 0xcc, 0xc0, 0x24, 0x50, 0x40, 0x63,
+ 0x31, 0xd7, 0x88, 0xc9, 0xbf, 0x41, 0x64, 0x2b, 0x99, 0xfd, 0x4b, 0xd7,
+ 0xba, 0xe8, 0xe4, 0x8a, 0xb0, 0xfe, 0x66, 0x01, 0x22, 0x82, 0x03, 0x19,
+ 0x8e, 0xbc, 0x46, 0x4d, 0xfa, 0x0b, 0x21, 0x5c, 0xcf, 0xea, 0x5e, 0xbd,
+ 0xd7, 0x47, 0x24, 0x55, 0x87, 0xf3, 0x30, 0x09, 0x14, 0x10, 0x18, 0xcc,
+ 0x75, 0xe2, 0x32, 0x6f, 0xd0, 0x59, 0x0a, 0xe6, 0x7f, 0x52, 0xf5, 0xee,
+ 0xba, 0x39, 0x22, 0xac, 0x3f, 0x99, 0x80, 0x48, 0xa0, 0x80, 0xc6, 0x63,
+ 0xaf, 0x11, 0x93, 0x7e, 0x82, 0xc8, 0x57, 0x33, 0xfa, 0x97, 0xaf, 0x75,
+ 0xd1, 0xc9, 0x15, 0x61, 0xfc, 0xcc, 0x02, 0x45, 0x04, 0x06, 0x33, 0x1d,
+ 0x78, 0x8c, 0x9b, 0xf4, 0x16, 0x42, 0xb9, 0x9f, 0xd4, 0xbd, 0x7b, 0xae,
+ 0x8e, 0x48, 0xab, 0x0f, 0xe6, 0x60, 0x12, 0x28, 0x20, 0x31, 0x98, 0xeb,
+ 0xc4, 0x64, 0xdf, 0xa0, 0xb2, 0x15, 0xcc, 0xfe, 0xa5, 0xeb, 0xdd, 0x74,
+ 0x72, 0x45, 0x58, 0x7f, 0x33, 0x00, 0x91, 0x41, 0x01, 0x8c, 0xc7, 0x5e,
+ 0x23, 0x26, 0xfd, 0x05, 0x90, 0xae, 0x67, 0xf5, 0x2f, 0x5e, 0xeb, 0xa3,
+ 0x92, 0x2a, 0xc3, 0xf9, 0x98, 0x04, 0x8a, 0x08, 0x0c, 0x66, 0x3a, 0xf1,
+ 0x19, 0x37, 0xe8, 0x2c, 0x85, 0x73, 0x3f, 0xa9, 0x7a, 0xf7, 0x5d, 0x1c,
+ 0x91, 0x56, 0x1f, 0xcc, 0xc0, 0x24, 0x50, 0x40, 0x63, 0x31, 0xd7, 0x88,
+ 0xc9, 0xbf, 0x41, 0x64, 0x2b, 0x99, 0xfd, 0x4b, 0xd7, 0xba, 0xe8, 0xe4,
+ 0x8a, 0xb0, 0xfe, 0x66, 0x01, 0x22, 0x82, 0x03, 0x19, 0x8e, 0xbc, 0x46,
+ 0x4d, 0xfa, 0x0b, 0x21, 0x5c, 0xcf, 0xea, 0x5e, 0xbd, 0xd7, 0x47, 0x24,
+ 0x55, 0x87, 0xf3, 0x30, 0x09, 0x14, 0x10, 0x18, 0xcc, 0x75, 0xe2, 0x32,
+ 0x6f, 0xd0, 0x59, 0x0a, 0xe6, 0x7f, 0x52, 0xf5, 0xee, 0xba, 0x39, 0x22,
+ 0xac, 0x3f, 0x99, 0x80, 0x48, 0xa0, 0x80, 0xc6, 0x63, 0xaf, 0x11, 0x93,
+ 0x7e, 0x82, 0xc8, 0x57, 0x33, 0xfa, 0x97, 0xaf, 0x75, 0xd1, 0xc9, 0x15,
+ 0x61, 0xfc, 0xcc, 0x02, 0x45, 0x04, 0x06, 0x33, 0x1d, 0x78, 0x8c, 0x9b,
+ 0xf4, 0x16, 0x42, 0xb9, 0x9f, 0xd4, 0xbd, 0x7b, 0xae, 0x8e, 0x48, 0xab,
+ 0x0f, 0xe6, 0x60, 0x12, 0x28, 0x20, 0x31, 0x98, 0xeb, 0xc4, 0x64, 0xdf,
+ 0xa0, 0xb2, 0x15, 0xcc, 0xfe, 0xa5, 0xeb, 0xdd, 0x74, 0x72, 0x45, 0x58,
+ 0x7f, 0x33, 0x00, 0x91, 0x41, 0x01, 0x8c, 0xc7, 0x5e, 0x23, 0x26, 0xfd,
+ 0x05, 0x90, 0xae, 0x67, 0xf5, 0x2f, 0x5e, 0xeb, 0xa3, 0x92, 0x2a, 0xc3,
+ 0xf9, 0x98, 0x04, 0x8a, 0x08, 0x0c, 0x66, 0x3a, 0xf1, 0x19, 0x37, 0xe8,
+ 0x2c, 0x85, 0x73, 0x3f, 0xa9, 0x7a, 0xf7, 0x5d, 0x1c, 0x91, 0x56, 0x1f,
+ 0xcc, 0xc0, 0x24, 0x50, 0x40, 0x63, 0x31, 0xd7, 0x88, 0xc9, 0xbf, 0x41,
+ 0x64, 0x2b, 0x99, 0xfd, 0x4b, 0xd7, 0xba, 0xe8, 0xe4, 0x8a, 0xb0, 0xfe,
+ 0x66, 0x01, 0x22, 0x82, 0x03, 0x19, 0x8e, 0xbc, 0x46, 0x4d, 0xfa, 0x0b,
+ 0x21, 0x5c, 0xcf, 0xea, 0x5e, 0xbd, 0xd7, 0x47, 0x24, 0x55, 0x87, 0xf3,
+ 0x30, 0x09, 0x14, 0x10, 0x18, 0xcc, 0x75, 0xe2, 0x32, 0x6f, 0xd0, 0x59,
+ 0x0a, 0xe6, 0x7f, 0x52, 0xf5, 0xee, 0xba, 0x39, 0x22, 0xac, 0x3f, 0x99,
+ 0x80, 0x48, 0xa0, 0x80, 0xc6, 0x63, 0xaf, 0x11, 0x93, 0x7e, 0x82, 0xc8,
+ 0x57, 0x33, 0xfa, 0x97, 0xaf, 0x75, 0xd1, 0xc9, 0x15, 0x61, 0xfc, 0xcc,
+ 0x02, 0x45, 0x04, 0x06, 0x33, 0x1d, 0x78, 0x8c, 0x9b, 0xf4, 0x16, 0x42,
+ 0xb9, 0x9f, 0xd4, 0xbd, 0x7b, 0xae, 0x8e, 0x48, 0xab, 0x0f, 0xe6, 0x60,
+ 0x12, 0x28, 0x20, 0x31, 0x98, 0xeb, 0xc4, 0x64, 0xdf, 0xa0, 0xb2, 0x15,
+ 0xcc, 0xfe, 0xa5, 0xeb, 0xdd, 0x74, 0x72, 0x45, 0x58, 0x7f, 0x33, 0x00,
+ 0x91, 0x41, 0x01, 0x8c, 0xc7, 0x5e, 0x23, 0x26, 0xfd, 0x05, 0x90, 0xae,
+ 0x67, 0xf5, 0x2f, 0x5e, 0xeb, 0xa3, 0x92, 0x2a, 0xc3, 0xf9, 0x98, 0x04,
+ 0x8a, 0x08, 0x0c, 0x66, 0x3a, 0xf1, 0x19, 0x37, 0xe8, 0x2c, 0x85, 0x73,
+ 0x3f, 0xa9, 0x7a, 0xf7, 0x5d, 0x1c, 0x91, 0x56, 0x1f, 0xcc, 0xc0, 0x24,
+ 0x50, 0x40, 0x63, 0x31, 0xd7, 0x88, 0xc9, 0xbf, 0x41, 0x64, 0x2b, 0x99,
+ 0xfd, 0x4b, 0xd7, 0xba, 0xe8, 0xe4, 0x8a, 0xb0, 0xfe, 0x66, 0x01, 0x22,
+ 0x82, 0x03, 0x19, 0x8e, 0xbc, 0x46, 0x4d, 0xfa, 0x0b, 0x21, 0x5c, 0xcf,
+ 0xea, 0x5e, 0xbd, 0xd7, 0x47, 0x24, 0x55, 0x87, 0xf3, 0x30, 0x09, 0x14,
+ 0x10, 0x18, 0xcc, 0x75, 0xe2, 0x32, 0x6f, 0xd0, 0x59, 0x0a, 0xe6, 0x7f,
+ 0x52, 0xf5, 0xee, 0xba, 0x39, 0x22, 0xac, 0x3f, 0x99, 0x80, 0x48, 0xa0,
+ 0x80, 0xc6, 0x63, 0xaf, 0x11, 0x93, 0x7e, 0x82, 0xc8, 0x57, 0x33, 0xfa,
+ 0x97, 0xaf, 0x75, 0xd1, 0xc9, 0x15, 0x61, 0xfc, 0xcc, 0x02, 0x45, 0x04,
+ 0x06, 0x33, 0x1d, 0x78, 0x8c, 0x9b, 0xf4, 0x16, 0x42, 0xb9, 0x9f, 0xd4,
+ 0xbd, 0x7b, 0xae, 0x8e, 0x48, 0xab, 0x0f, 0xe6, 0x60, 0x12, 0x28, 0x20,
+ 0x31, 0x98, 0xeb, 0xc4, 0x64, 0xdf, 0xa0, 0xb2, 0x15, 0xcc, 0xfe, 0xa5,
+ 0xeb, 0xdd, 0x74, 0x72, 0x45, 0x58, 0x7f, 0x33, 0x00, 0x91, 0x41, 0x01,
+ 0x8c, 0xc7, 0x5e, 0x23, 0x26, 0xfd, 0x05, 0x90, 0xae, 0x67, 0xf5, 0x2f,
+ 0x5e, 0xeb, 0xa3, 0x92, 0x2a, 0xc3, 0xf9, 0x98, 0x04, 0x8a, 0x08, 0x0c,
+ 0x66, 0x3a, 0xf1, 0x19, 0x37, 0xe8, 0x2c, 0x85, 0x73, 0x3f, 0xa9, 0x7a,
+ 0xf7, 0x5d, 0x1c, 0x91, 0x56, 0x1f, 0xcc, 0xc0, 0x24, 0x50, 0x40, 0x63,
+ 0x31, 0xd7, 0x88, 0xc9, 0xbf, 0x41, 0x64, 0x2b, 0x99, 0xfd, 0x4b, 0xd7,
+ 0xba, 0xe8, 0xe4, 0x8a, 0xb0, 0xfe, 0x66, 0x01, 0x22, 0x82, 0x03, 0x19,
+ 0x8e, 0xbc, 0x46, 0x4d, 0xfa, 0x0b, 0x21, 0x5c, 0xcf, 0xea, 0x5e, 0xbd,
+ 0xd7, 0x47, 0x24, 0x55, 0x87, 0xf3, 0x30, 0x09, 0x14, 0x10, 0x18, 0xcc,
+ 0x75, 0xe2, 0x32, 0x6f, 0xd0, 0x59, 0x0a, 0xe6, 0x7f, 0x52, 0xf5, 0xee,
+ 0xba, 0x39, 0x22, 0xac, 0x3f, 0x99, 0x80, 0x48, 0xa0, 0x80, 0xc6, 0x63,
+ 0xaf, 0x11, 0x93, 0x7e, 0x82, 0xc8, 0x57, 0x33, 0xfa, 0x97, 0xaf, 0x75,
+ 0xd1, 0xc9, 0x15, 0x61, 0xfc, 0xcc, 0x02, 0x45, 0x04, 0x06, 0x33, 0x1d,
+ 0x78, 0x8c, 0x9b, 0xf4, 0x16, 0x42, 0xb9, 0x9f, 0xd4, 0xbd, 0x7b, 0xae,
+ 0x8e, 0x48, 0xab, 0x0f, 0xe6, 0x60, 0x12, 0x28, 0x20, 0x31, 0x98, 0xeb,
+ 0xc4, 0x64, 0xdf, 0xa0, 0xb2, 0x15, 0xcc, 0xfe, 0xa5, 0xeb, 0xdd, 0x74,
+ 0x72, 0x45, 0x58, 0x7f, 0x33, 0x00, 0x91, 0x41, 0x01, 0x8c, 0xc7, 0x5e,
+ 0x23, 0x26, 0xfd, 0x05, 0x90, 0xae, 0x67, 0xf5, 0x2f, 0x5e, 0xeb, 0xa3,
+ 0x92, 0x2a, 0xc3, 0xf9, 0x98, 0x04, 0x8a, 0x08, 0x0c, 0x66, 0x3a, 0xf1,
+ 0x19, 0x37, 0xe8, 0x2c, 0x85, 0x73, 0x3f, 0xa9, 0x7a, 0xf7, 0x5d, 0x1c,
+ 0x91, 0x56, 0x1f, 0xcc, 0xc0, 0x24, 0x50, 0x40, 0x63, 0x31, 0xd7, 0x88,
+ 0xc9, 0xbf, 0x41, 0x64, 0x2b, 0x99, 0xfd, 0x4b, 0xd7, 0xba, 0xe8, 0xe4,
+ 0x8a, 0xb0, 0xfe, 0x66, 0x01, 0x22, 0x82, 0x03, 0x19, 0x8e, 0xbc, 0x46,
+ 0x4d, 0xfa, 0x0b, 0x21, 0x5c, 0xcf, 0xea, 0x5e, 0xbd, 0xd7, 0x47, 0x24,
+ 0x55, 0x87, 0xf3, 0x30, 0x09, 0x14, 0x10, 0x18, 0xcc, 0x75, 0xe2, 0x32,
+ 0x6f, 0xd0, 0x59, 0x0a, 0xe6, 0x7f, 0x52, 0xf5, 0xee, 0xba, 0x39, 0x22,
+ 0xac, 0x3f, 0x99, 0x80, 0x48, 0xa0, 0x80, 0xc6, 0x63, 0xaf, 0x11, 0x93,
+ 0x7e, 0x82, 0xc8, 0x57, 0x33, 0xfa, 0x97, 0xaf, 0x75, 0xd1, 0xc9, 0x15,
+ 0x61, 0xfc, 0xcc, 0x02, 0x45, 0x04, 0x06, 0x33, 0x1d, 0x78, 0x8c, 0x9b,
+ 0xf4, 0x16, 0x42, 0xb9, 0x9f, 0xd4, 0xbd, 0x7b, 0xae, 0x8e, 0x48, 0xab,
+ 0x0f, 0xe6, 0x60, 0x12, 0x28, 0x20, 0x31, 0x98, 0xeb, 0xc4, 0x64, 0xdf,
+ 0xa0, 0xb2, 0x15, 0xcc, 0xfe, 0xa5, 0xeb, 0xdd, 0x74, 0x72, 0x45, 0x58,
+ 0x7f, 0x33, 0x00, 0x91, 0x41, 0x01, 0x8c, 0xc7, 0x5e, 0x23, 0x26, 0xfd,
+ 0x05, 0x90, 0xae, 0x67, 0xf5, 0x2f, 0x5e, 0xeb, 0xa3, 0x92, 0x2a, 0xc3,
+ 0xf9, 0x98, 0x04, 0x8a, 0x08, 0x0c, 0x66, 0x3a, 0xf1, 0x19, 0x37, 0xe8,
+ 0x2c, 0x85, 0x73, 0x3f, 0xa9, 0x7a, 0xf7, 0x5d, 0x1c, 0x91, 0x56, 0x1f,
+ 0xcc, 0xc0, 0x24, 0x50, 0x40, 0x63, 0x31, 0xd7, 0x88, 0xc9, 0xbf, 0x41,
+ 0x64, 0x2b, 0x99, 0xfd, 0x4b, 0xd7, 0xba, 0xe8, 0xe4, 0x8a, 0xb0, 0xfe,
+ 0x66, 0x01, 0x22, 0x82, 0x03, 0x19, 0x8e, 0xbc, 0x46, 0x4d, 0xfa, 0x0b,
+ 0x21, 0x5c, 0xcf, 0xea, 0x5e, 0xbd, 0xd7, 0x47, 0x24, 0x55, 0x87, 0xf3,
+ 0x30, 0x09, 0x14, 0x10, 0x18, 0xcc, 0x75, 0xe2, 0x32, 0x6f, 0xd0, 0x59,
+ 0x0a, 0xe6, 0x7f, 0x52, 0xf5, 0xee, 0xba, 0x39, 0x22, 0xac, 0x3f, 0x99,
+ 0x80, 0x48, 0xa0, 0x80, 0xc6, 0x63, 0xaf, 0x11, 0x93, 0x7e, 0x82, 0xc8,
+ 0x57, 0x33, 0xfa, 0x97, 0xaf, 0x75, 0xd1, 0xc9, 0x15, 0x61, 0xfc, 0xcc,
+ 0x02, 0x45, 0x04, 0x06, 0x33, 0x1d, 0x78, 0x8c, 0x9b, 0xf4, 0x16, 0x42,
+ 0xb9, 0x9f, 0xd4, 0xbd, 0x7b, 0xae, 0x8e, 0x48, 0xab, 0x0f, 0xe6, 0x60,
+ 0x12, 0x28, 0x20, 0x31, 0x98, 0xeb, 0xc4, 0x64, 0xdf, 0xa0, 0xb2, 0x15,
+ 0xcc, 0xfe, 0xa5, 0xeb, 0xdd, 0x74, 0x72, 0x45, 0x58, 0x7f, 0x33, 0x00,
+ 0x91, 0x41, 0x01, 0x8c, 0xc7, 0x5e, 0x23, 0x26, 0xfd, 0x05, 0x90, 0xae,
+ 0x67, 0xf5, 0x2f, 0x5e, 0xeb, 0xa3, 0x92, 0x2a, 0xc3, 0xf9, 0x98, 0x04,
+ 0x8a, 0x08, 0x0c, 0x66, 0x3a, 0xf1, 0x19, 0x37, 0xe8, 0x2c, 0x85, 0x73,
+ 0x3f, 0xa9, 0x7a, 0xf7, 0x5d, 0x1c, 0x91, 0x56, 0x1f, 0xcc, 0xc0, 0x24,
+ 0x50, 0x40, 0x63, 0x31, 0xd7, 0x88, 0xc9, 0xbf, 0x41, 0x64, 0x2b, 0x99,
+ 0xfd, 0x4b, 0xd7, 0xba, 0xe8, 0xe4, 0x8a, 0xb0, 0xfe, 0x66, 0x01, 0x22,
+ 0x82, 0x03, 0x19, 0x8e, 0xbc, 0x46, 0x4d, 0xfa, 0x0b, 0x21, 0x5c, 0xcf,
+ 0xea, 0x5e, 0xbd, 0xd7, 0x47, 0x24, 0x55, 0x87, 0xf3, 0x30, 0x09, 0x14,
+ 0x10, 0x18, 0xcc, 0x75, 0xe2, 0x32, 0x6f, 0xd0, 0x59, 0x0a, 0xe6, 0x7f,
+ 0x52, 0xf5, 0xee, 0xba, 0x39, 0x22, 0xac, 0x3f, 0x99, 0x80, 0x48, 0xa0,
+ 0x80, 0xc6, 0x63, 0xaf, 0x11, 0x93, 0x7e, 0x82, 0xc8, 0x57, 0x33, 0xfa,
+ 0x97, 0xaf, 0x75, 0xd1, 0xc9, 0x15, 0x61, 0xfc, 0xcc, 0x02, 0x45, 0x04,
+ 0x06, 0x33, 0x1d, 0x78, 0x8c, 0x9b, 0xf4, 0x16, 0x42, 0xb9, 0x9f, 0xd4,
+ 0xbd, 0x7b, 0xae, 0x8e, 0x48, 0xab, 0x0f, 0xe6, 0x60, 0x12, 0x28, 0x20,
+ 0x31, 0x98, 0xeb, 0xc4, 0x64, 0xdf, 0xa0, 0xb2, 0x15, 0xcc, 0xfe, 0xa5,
+ 0xeb, 0xdd, 0x74, 0x72, 0x45, 0x58, 0x7f, 0x33, 0x00, 0x91, 0x41, 0x01,
+ 0x8c, 0xc7, 0x5e, 0x23, 0x26, 0xfd, 0x05, 0x90, 0xae, 0x67, 0xf5, 0x2f,
+ 0x5e, 0xeb, 0xa3, 0x92, 0x2a, 0xc3, 0xf9, 0x98, 0x04, 0x8a, 0x08, 0x0c,
+ 0x66, 0x3a, 0xf1, 0x19, 0x37, 0xe8, 0x2c, 0x85, 0x73, 0x3f, 0xa9, 0x7a,
+ 0xf7, 0x5d, 0x1c, 0x91, 0x56, 0x1f, 0xcc, 0xc0, 0x24, 0x50, 0x40, 0x63,
+ 0x31, 0xd7, 0x88, 0xc9, 0xbf, 0x41, 0x64, 0x2b, 0x99, 0xfd, 0x4b, 0xd7,
+ 0xba, 0xe8, 0xe4, 0x8a, 0xb0, 0xfe, 0x66, 0x01, 0x22, 0x82, 0x03, 0x19,
+ 0x8e, 0xbc, 0x46, 0x4d, 0xfa, 0x0b, 0x21, 0x5c, 0xcf, 0xea, 0x5e, 0xbd,
+ 0xd7, 0x47, 0x24, 0x55, 0x87, 0xf3, 0x30, 0x09, 0x14, 0x10, 0x18, 0xcc,
+ 0x75, 0xe2, 0x32, 0x6f, 0xd0, 0x59, 0x0a, 0xe6, 0x7f, 0x52, 0xf5, 0xee,
+ 0xba, 0x39, 0x22, 0xac, 0x3f, 0x99, 0x80, 0x48, 0xa0, 0x80, 0xc6, 0x63,
+ 0xaf, 0x11, 0x93, 0x7e, 0x82, 0xc8, 0x57, 0x33, 0xfa, 0x97, 0xaf, 0x75,
+ 0xd1, 0xc9, 0x15, 0x61, 0xfc, 0xcc, 0x02, 0x45, 0x04, 0x06, 0x33, 0x1d,
+ 0x78, 0x8c, 0x9b, 0xf4, 0x16, 0x42, 0xb9, 0x9f, 0xd4, 0xbd, 0x7b, 0xae,
+ 0x8e, 0x48, 0xab, 0x0f, 0xe6, 0x60, 0x12, 0x28, 0x20, 0x31, 0x98, 0xeb,
+ 0xc4, 0x64, 0xdf, 0xa0, 0xb2, 0x15, 0xcc, 0xfe, 0xa5, 0xeb, 0xdd, 0x74,
+ 0x72, 0x45, 0x58, 0x7f, 0x33, 0x00, 0x91, 0x41, 0x01, 0x8c, 0xc7, 0x5e,
+ 0x23, 0x26, 0xfd, 0x05, 0x90, 0xae, 0x67, 0xf5, 0x2f, 0x5e, 0xeb, 0xa3,
+ 0x92, 0x2a, 0xc3, 0xf9, 0x98, 0x04, 0x8a, 0x08, 0x0c, 0x66, 0x3a, 0xf1,
+ 0x19, 0x37, 0xe8, 0x2c, 0x85, 0x73, 0x3f, 0xa9, 0x7a, 0xf7, 0x5d, 0x1c,
+ 0x91, 0x56, 0x1f, 0xcc, 0xc0, 0x24, 0x50, 0x40, 0x63, 0x31, 0xd7, 0x88,
+ 0xc9, 0xbf, 0x41, 0x64, 0x2b, 0x99, 0xfd, 0x4b, 0xd7, 0xba, 0xe8, 0xe4,
+ 0x8a, 0xb0, 0xfe, 0x66, 0x01, 0x22, 0x82, 0x03, 0x19, 0x8e, 0xbc, 0x46,
+ 0x4d, 0xfa, 0x0b, 0x21, 0x5c, 0xcf, 0xea, 0x5e, 0xbd, 0xd7, 0x47, 0x24,
+ 0x55, 0x87, 0xf3, 0x30, 0x09, 0x14, 0x10, 0x18, 0xcc, 0x75, 0xe2, 0x32,
+ 0x6f, 0xd0, 0x59, 0x0a, 0xe6, 0x7f, 0x52, 0xf5, 0xee, 0xba, 0x39, 0x22,
+ 0xac, 0x3f, 0x99, 0x80, 0x48, 0xa0, 0x80, 0xc6, 0x63, 0xaf, 0x11, 0x93,
+ 0x7e, 0x82, 0xc8, 0x57, 0x33, 0xfa, 0x97, 0xaf, 0x75, 0xd1, 0xc9, 0x15,
+ 0x61, 0xfc, 0xcc, 0x02, 0x45, 0x04, 0x06, 0x33, 0x1d, 0x78, 0x8c, 0x9b,
+ 0xf4, 0x16, 0x42, 0xb9, 0x9f, 0xd4, 0xbd, 0x7b, 0xae, 0x8e, 0x48, 0xab,
+ 0x0f, 0xe6, 0x60, 0x12, 0x28, 0x20, 0x31, 0x98, 0xeb, 0xc4, 0x64, 0xdf,
+ 0xa0, 0xb2, 0x15, 0xcc, 0xfe, 0xa5, 0xeb, 0xdd, 0x74, 0x72, 0x45, 0x58,
+ 0x7f, 0x33, 0x00, 0x91, 0x41, 0x01, 0x8c, 0xc7, 0x5e, 0x23, 0x26, 0xfd,
+ 0x05, 0x90, 0xae, 0x67, 0xf5, 0x2f, 0x5e, 0xeb, 0xa3, 0x92, 0x2a, 0xc3,
+ 0xf9, 0x98, 0x04, 0x8a, 0x08, 0x0c, 0x66, 0x3a, 0xf1, 0x19, 0x37, 0xe8,
+ 0x2c, 0x85, 0x73, 0x3f, 0xa9, 0x7a, 0xf7, 0x5d, 0x1c, 0x91, 0x56, 0x1f,
+ 0xcc, 0xc0, 0x24, 0x50, 0x40, 0x63, 0x31, 0xd7, 0x88, 0xc9, 0xbf, 0x41,
+ 0x64, 0x2b, 0x99, 0xfd, 0x4b, 0xd7, 0xba, 0xe8, 0xe4, 0x8a, 0xb0, 0xfe,
+ 0x66, 0x01, 0x22, 0x82, 0x03, 0x19, 0x8e, 0xbc, 0x46, 0x4d, 0xfa, 0x0b,
+ 0x21, 0x5c, 0xcf, 0xea, 0x5e, 0xbd, 0xd7, 0x47, 0x24, 0x55, 0x87, 0xf3,
+ 0x30, 0x09, 0x14, 0x10, 0x18, 0xcc, 0x75, 0xe2, 0x32, 0x6f, 0xd0, 0x59,
+ 0x0a, 0xe6, 0x7f, 0x52, 0xf5, 0xee, 0xba, 0x39, 0x22, 0xac, 0x3f, 0x99,
+ 0x80, 0x48, 0xa0, 0x80, 0xc6, 0x63, 0xaf, 0x11, 0x93, 0x7e, 0x82, 0xc8,
+ 0x57, 0x33, 0xfa, 0x97, 0xaf, 0x75, 0xd1, 0xc9, 0x15, 0x61, 0xfc, 0xcc,
+ 0x02, 0x45, 0x04, 0x06, 0x33, 0x1d, 0x78, 0x8c, 0x9b, 0xf4, 0x16, 0x42,
+ 0xb9, 0x9f, 0xd4, 0xbd, 0x7b, 0xae, 0x8e, 0x48, 0xab, 0x0f, 0xe6, 0x60,
+ 0x12, 0x28, 0x20, 0x31, 0x98, 0xeb, 0xc4, 0x64, 0xdf, 0xa0, 0xb2, 0x15,
+ 0xcc, 0xfe, 0xa5, 0xeb, 0xdd, 0x74, 0x72, 0x45, 0x58, 0x7f, 0x33, 0x00,
+ 0x91, 0x41, 0x01, 0x8c, 0xc7, 0x5e, 0x23, 0x26, 0xfd, 0x05, 0x90, 0xae,
+ 0x67, 0xf5, 0x2f, 0x5e, 0xeb, 0xa3, 0x92, 0x2a, 0xc3, 0xf9, 0x98, 0x04,
+ 0x8a, 0x08, 0x0c, 0x66, 0x3a, 0xf1, 0x19, 0x37, 0xe8, 0x2c, 0x85, 0x73,
+ 0x3f, 0xa9, 0x7a, 0xf7, 0x5d, 0x1c, 0x91, 0x56, 0x1f, 0xcc, 0xc0, 0x24,
+ 0x50, 0x40, 0x63, 0x31, 0xd7, 0x88, 0xc9, 0xbf, 0x41, 0x64, 0x2b, 0x99,
+ 0xfd, 0x4b, 0xd7, 0xba, 0xe8, 0xe4, 0x8a, 0xb0, 0xfe, 0x66, 0x01, 0x22,
+ 0x82, 0x03, 0x19, 0x8e, 0xbc, 0x46, 0x4d, 0xfa, 0x0b, 0x21, 0x5c, 0xcf,
+ 0xea, 0x5e, 0xbd, 0xd7, 0x47, 0x24, 0x55, 0x87, 0xf3, 0x30, 0x09, 0x14,
+ 0x10, 0x18, 0xcc, 0x75, 0xe2, 0x32, 0x6f, 0xd0, 0x59, 0x0a, 0xe6, 0x7f,
+ 0x52, 0xf5, 0xee, 0xba, 0x39, 0x22, 0xac, 0x3f, 0x99, 0x80, 0x48, 0xa0,
+ 0x80, 0xc6, 0x63, 0xaf, 0x11, 0x93, 0x7e, 0x82, 0xc8, 0x57, 0x33, 0xfa,
+ 0x97, 0xaf, 0x75, 0xd1, 0xc9, 0x15, 0x61, 0xfc, 0xcc, 0x02, 0x45, 0x04,
+ 0x06, 0x33, 0x1d, 0x78, 0x8c, 0x9b, 0xf4, 0x16, 0x42, 0xb9, 0x9f, 0xd4,
+ 0xbd, 0x7b, 0xae, 0x8e, 0x48, 0xab, 0x0f, 0xe6, 0x60, 0x12, 0x28, 0x20,
+ 0x31, 0x98, 0xeb, 0xc4, 0x64, 0xdf, 0xa0, 0xb2, 0x15, 0xcc, 0xfe, 0xa5,
+ 0xeb, 0xdd, 0x74, 0x72, 0x45, 0x58, 0x7f, 0x33, 0x00, 0x91, 0x41, 0x01,
+ 0x8c, 0xc7, 0x5e, 0x23, 0x26, 0xfd, 0x05, 0x90, 0xae, 0x67, 0xf5, 0x2f,
+ 0x5e, 0xeb, 0xa3, 0x92, 0x2a, 0xc3, 0xf9, 0x98, 0x04, 0x8a, 0x08, 0x0c,
+ 0x66, 0x3a, 0xf1, 0x19, 0x37, 0xe8, 0x2c, 0x85, 0x73, 0x3f, 0xa9, 0x7a,
+ 0xf7, 0x5d, 0x1c, 0x91, 0x56, 0x1f, 0xcc, 0xc0, 0x24, 0x50, 0x40, 0x63,
+ 0x31, 0xd7, 0x88, 0xc9, 0xbf, 0x41, 0x64, 0x2b, 0x99, 0xfd, 0x4b, 0xd7,
+ 0xba, 0xe8, 0xe4, 0x8a, 0xb0, 0xfe, 0x66, 0x01, 0x22, 0x82, 0x03, 0x19,
+ 0x8e, 0xbc, 0x46, 0x4d, 0xfa, 0x0b, 0x21, 0x5c, 0xcf, 0xea, 0x5e, 0xbd,
+ 0xd7, 0x47, 0x24, 0x55, 0x87, 0xf3, 0x30, 0x09, 0x14, 0x10, 0x18, 0xcc,
+ 0x75, 0xe2, 0x32, 0x6f, 0xd0, 0x59, 0x0a, 0xe6, 0x7f, 0x52, 0xf5, 0xee,
+ 0xba, 0x39, 0x22, 0xac, 0x3f, 0x99, 0x80, 0x48, 0xa0, 0x80, 0xc6, 0x63,
+ 0xaf, 0x11, 0x93, 0x7e, 0x82, 0xc8, 0x57, 0x33, 0xfa, 0x97, 0xaf, 0x75,
+ 0xd1, 0xc9, 0x15, 0x61, 0xfc, 0xcc, 0x02, 0x45, 0x04, 0x06, 0x33, 0x1d,
+ 0x78, 0x8c, 0x9b, 0xf4, 0x16, 0x42, 0xb9, 0x9f, 0xd4, 0xbd, 0x7b, 0xae,
+ 0x8e, 0x48, 0xab, 0x0f, 0xe6, 0x60, 0x12, 0x28, 0x20, 0x31, 0x98, 0xeb,
+ 0xc4, 0x64, 0xdf, 0xa0, 0xb2, 0x15, 0xcc, 0xfe, 0xa5, 0xeb, 0xdd, 0x74,
+ 0x72, 0x45, 0x58, 0x7f, 0x33, 0x00, 0x91, 0x41, 0x01, 0x8c, 0xc7, 0x5e,
+ 0x23, 0x26, 0xfd, 0x05, 0x90, 0xae, 0x67, 0xf5, 0x2f, 0x5e, 0xeb, 0xa3,
+ 0x92, 0x2a, 0xc3, 0xf9, 0x98, 0x04, 0x8a, 0x08, 0x0c, 0x66, 0x3a, 0xf1,
+ 0x19, 0x37, 0xe8, 0x2c, 0x85, 0x73, 0x3f, 0xa9, 0x7a, 0xf7, 0x5d, 0x1c,
+ 0x91, 0x56, 0x1f, 0xcc, 0xc0, 0x24, 0x50, 0x40, 0x63, 0x31, 0xd7, 0x88,
+ 0xc9, 0xbf, 0x41, 0x64, 0x2b, 0x99, 0xfd, 0x4b, 0xd7, 0xba, 0xe8, 0xe4,
+ 0x8a, 0xb0, 0xfe, 0x66, 0x01, 0x22, 0x82, 0x03, 0x19, 0x8e, 0xbc, 0x46,
+ 0x4d, 0xfa, 0x0b, 0x21, 0x5c, 0xcf, 0xea, 0x5e, 0xbd, 0xd7, 0x47, 0x24,
+ 0x55, 0x87, 0xf3, 0x30, 0x09, 0x14, 0x10, 0x18, 0xcc, 0x75, 0xe2, 0x32,
+ 0x6f, 0xd0, 0x59, 0x0a, 0xe6, 0x7f, 0x52, 0xf5, 0xee, 0xba, 0x39, 0x22,
+ 0xac, 0x3f, 0x99, 0x80, 0x48, 0xa0, 0x80, 0xc6, 0x63, 0xaf, 0x11, 0x93,
+ 0x7e, 0x82, 0xc8, 0x57, 0x33, 0xfa, 0x97, 0xaf, 0x75, 0xd1, 0xc9, 0x15,
+ 0x61, 0xfc, 0xcc, 0x02, 0x45, 0x04, 0x06, 0x33, 0x1d, 0x78, 0x8c, 0x9b,
+ 0xf4, 0x16, 0x42, 0xb9, 0x9f, 0xd4, 0xbd, 0x7b, 0xae, 0x8e, 0x48, 0xab,
+ 0x0f, 0xe6, 0x60, 0x12, 0x28, 0x20, 0x31, 0x98, 0xeb, 0xc4, 0x64, 0xdf,
+ 0xa0, 0xb2, 0x15, 0xcc, 0xfe, 0xa5, 0xeb, 0xdd, 0x74, 0x72, 0x45, 0x58,
+ 0x7f, 0x33, 0x00, 0x91, 0x41, 0x01, 0x8c, 0xc7, 0x5e, 0x23, 0x26, 0xfd,
+ 0x05, 0x90, 0xae, 0x67, 0xf5, 0x2f, 0x5e, 0xeb, 0xa3, 0x92, 0x2a, 0xc3,
+ 0xf9, 0x98, 0x04, 0x8a, 0x08, 0x0c, 0x66, 0x3a, 0xf1, 0x19, 0x37, 0xe8,
+ 0x2c, 0x85, 0x73, 0x3f, 0xa9, 0x7a, 0xf7, 0x5d, 0x1c, 0x91, 0x56, 0x1f,
+ 0xcc, 0xc0, 0x24, 0x50, 0x40, 0x63, 0x31, 0xd7, 0x88, 0xc9, 0xbf, 0x41,
+ 0x64, 0x2b, 0x99, 0xfd, 0x4b, 0xd7, 0xba, 0xe8, 0xe4, 0x8a, 0xb0, 0xfe,
+ 0x66, 0x01, 0x22, 0x82, 0x03, 0x19, 0x8e, 0xbc, 0x46, 0x4d, 0xfa, 0x0b,
+ 0x21, 0x5c, 0xcf, 0xea, 0x5e, 0xbd, 0xd7, 0x47, 0x24, 0x55, 0x87, 0xf3,
+ 0x30, 0x09, 0x14, 0x10, 0x18, 0xcc, 0x75, 0xe2, 0x32, 0x6f, 0xd0, 0x59,
+ 0x0a, 0xe6, 0x7f, 0x52, 0xf5, 0xee, 0xba, 0x39, 0x22, 0xac, 0x3f, 0x99,
+ 0x80, 0x48, 0xa0, 0x80, 0xc6, 0x63, 0xaf, 0x11, 0x93, 0x7e, 0x82, 0xc8,
+ 0x57, 0x33, 0xfa, 0x97, 0xaf, 0x75, 0xd1, 0xc9, 0x15, 0x61, 0xfc, 0xcc,
+ 0x02, 0x45, 0x04, 0x06, 0x33, 0x1d, 0x78, 0x8c, 0x9b, 0xf4, 0x16, 0x42,
+ 0xb9, 0x9f, 0xd4, 0xbd, 0x7b, 0xae, 0x8e, 0x48, 0xab, 0x0f, 0xe6, 0x60,
+ 0x12, 0x28, 0x20, 0x31, 0x98, 0xeb, 0xc4, 0x64, 0xdf, 0xa0, 0xb2, 0x15,
+ 0xcc, 0xfe, 0xa5, 0xeb, 0xdd, 0x74, 0x72, 0x45, 0x58, 0x7f, 0x33, 0x00,
+ 0x91, 0x41, 0x01, 0x8c, 0xc7, 0x5e, 0x23, 0x26, 0xfd, 0x05, 0x90, 0xae,
+ 0x67, 0xf5, 0x2f, 0x5e, 0xeb, 0xa3, 0x92, 0x2a, 0xc3, 0xf9, 0x98, 0x04,
+ 0x8a, 0x08, 0x0c, 0x66, 0x3a, 0xf1, 0x19, 0x37, 0xe8, 0x2c, 0x85, 0x73,
+ 0x3f, 0xa9, 0x7a, 0xf7, 0x5d, 0x1c, 0x91, 0x56, 0x1f, 0xcc, 0xc0, 0x24,
+ 0x50, 0x40, 0x63, 0x31, 0xd7, 0x88, 0xc9, 0xbf, 0x41, 0x64, 0x2b, 0x99,
+ 0xfd, 0x4b, 0xd7, 0xba, 0xe8, 0xe4, 0x8a, 0xb0, 0xfe, 0x66, 0x01, 0x22,
+ 0x82, 0x03, 0x19, 0x8e, 0xbc, 0x46, 0x4d, 0xfa, 0x0b, 0x21, 0x5c, 0xcf,
+ 0xea, 0x5e, 0xbd, 0xd7, 0x47, 0x24, 0x55, 0x87, 0xf3, 0x30, 0x09, 0x14,
+ 0x10, 0x18, 0xcc, 0x75, 0xe2, 0x32, 0x6f, 0xd0, 0x59, 0x0a, 0xe6, 0x7f,
+ 0x52, 0xf5, 0xee, 0xba, 0x39, 0x22, 0xac, 0x3f, 0x99, 0x80, 0x48, 0xa0,
+ 0x80, 0xc6, 0x63, 0xaf, 0x11, 0x93, 0x7e, 0x82, 0xc8, 0x57, 0x33, 0xfa,
+ 0x97, 0xaf, 0x75, 0xd1, 0xc9, 0x15, 0x61, 0xfc, 0xcc, 0x02, 0x45, 0x04,
+ 0x06, 0x33, 0x1d, 0x78, 0x8c, 0x9b, 0xf4, 0x16, 0x42, 0xb9, 0x9f, 0xd4,
+ 0xbd, 0x7b, 0xae, 0x8e, 0x48, 0xab, 0x0f, 0xe6, 0x60, 0x12, 0x28, 0x20,
+ 0x31, 0x98, 0xeb, 0xc4, 0x64, 0xdf, 0xa0, 0xb2, 0x15, 0xcc, 0xfe, 0xa5,
+ 0xeb, 0xdd, 0x74, 0x72, 0x45, 0x58, 0x7f, 0x33, 0x00, 0x91, 0x41, 0x01,
+ 0x8c, 0xc7, 0x5e, 0x23, 0x26, 0xfd, 0x05, 0x90, 0xae, 0x67, 0xf5, 0x2f,
+ 0x5e, 0xeb, 0xa3, 0x92, 0x2a, 0xc3, 0xf9, 0x98, 0x04, 0x8a, 0x08, 0x0c,
+ 0x66, 0x3a, 0xf1, 0x19, 0x37, 0xe8, 0x2c, 0x85, 0x73, 0x3f, 0xa9, 0x7a,
+ 0xf7, 0x5d, 0x1c, 0x91, 0x56, 0x1f, 0xcc, 0xc0, 0x24, 0x50, 0x40, 0x63,
+ 0x31, 0xd7, 0x88, 0xc9, 0xbf, 0x41, 0x64, 0x2b, 0x99, 0xfd, 0x4b, 0xd7,
+ 0xba, 0xe8, 0xe4, 0x8a, 0xb0, 0xfe, 0x66, 0x01, 0x22, 0x82, 0x03, 0x19,
+ 0x8e, 0xbc, 0x46, 0x4d, 0xfa, 0x0b, 0x21, 0x5c, 0xcf, 0xea, 0x5e, 0xbd,
+ 0xd7, 0x47, 0x24, 0x55, 0x87, 0xf3, 0x30, 0x09, 0x14, 0x10, 0x18, 0xcc,
+ 0x75, 0xe2, 0x32, 0x6f, 0xd0, 0x59, 0x0a, 0xe6, 0x7f, 0x52, 0xf5, 0xee,
+ 0xba, 0x39, 0x22, 0xac, 0x3f, 0x99, 0x80, 0x48, 0xa0, 0x80, 0xc6, 0x63,
+ 0xaf, 0x11, 0x93, 0x7e, 0x82, 0xc8, 0x57, 0x33, 0xfa, 0x97, 0xaf, 0x75,
+ 0xd1, 0xc9, 0x15, 0x61, 0xfc, 0xcc, 0x02, 0x45, 0x04, 0x06, 0x33, 0x1d,
+ 0x78, 0x8c, 0x9b, 0xf4, 0x16, 0x42, 0xb9, 0x9f, 0xd4, 0xbd, 0x7b, 0xae,
+ 0x8e, 0x48, 0xab, 0x0f, 0xe6, 0x60, 0x12, 0x28, 0x20, 0x31, 0x98, 0xeb,
+ 0xc4, 0x64, 0xdf, 0xa0, 0xb2, 0x15, 0xcc, 0xfe, 0xa5, 0xeb, 0xdd, 0x74,
+ 0x72, 0x45, 0x58, 0x7f, 0x33, 0x00, 0x91, 0x41, 0x01, 0x8c, 0xc7, 0x5e,
+ 0x23, 0x26, 0xfd, 0x05, 0x90, 0xae, 0x67, 0xf5, 0x2f, 0x5e, 0xeb, 0xa3,
+ 0x92, 0x2a, 0xc3, 0xf9, 0x98, 0x04, 0x8a, 0x08, 0x0c, 0x66, 0x3a, 0xf1,
+ 0x19, 0x37, 0xe8, 0x2c, 0x85, 0x73, 0x3f, 0xa9, 0x7a, 0xf7, 0x5d, 0x1c,
+ 0x91, 0x56, 0x1f, 0xcc, 0xc0, 0x24, 0x50, 0x40, 0x63, 0x31, 0xd7, 0x88,
+ 0xc9, 0xbf, 0x41, 0x64, 0x2b, 0x99, 0xfd, 0x4b, 0xd7, 0xba, 0xe8, 0xe4,
+ 0x8a, 0xb0, 0xfe, 0x66, 0x01, 0x22, 0x82, 0x03, 0x19, 0x8e, 0xbc, 0x46,
+ 0x4d, 0xfa, 0x0b, 0x21, 0x5c, 0xcf, 0xea, 0x5e, 0xbd, 0xd7, 0x47, 0x24,
+ 0x55, 0x87, 0xf3, 0x30, 0x09, 0x14, 0x10, 0x18, 0xcc, 0x75, 0xe2, 0x32,
+ 0x6f, 0xd0, 0x59, 0x0a, 0xe6, 0x7f, 0x52, 0xf5, 0xee, 0xba, 0x39, 0x22,
+ 0xac, 0x3f, 0x99, 0x80, 0x48, 0xa0, 0x80, 0xc6, 0x63, 0xaf, 0x11, 0x93,
+ 0x7e, 0x82, 0xc8, 0x57, 0x33, 0xfa, 0x97, 0xaf, 0x75, 0xd1, 0xc9, 0x15,
+ 0x61, 0xfc, 0xcc, 0x02, 0x45, 0x04, 0x06, 0x33, 0x1d, 0x78, 0x8c, 0x9b,
+ 0xf4, 0x16, 0x42, 0xb9, 0x9f, 0xd4, 0xbd, 0x7b, 0xae, 0x8e, 0x48, 0xab,
+ 0x0f, 0xe6, 0x60, 0x12, 0x28, 0x20, 0x31, 0x98, 0xeb, 0xc4, 0x64, 0xdf,
+ 0xa0, 0xb2, 0x15, 0xcc, 0xfe, 0xa5, 0xeb, 0xdd, 0x74, 0x72, 0x45, 0x58,
+ 0x7f, 0x33, 0x00, 0x91, 0x41, 0x01, 0x8c, 0xc7, 0x5e, 0x23, 0x26, 0xfd,
+ 0x05, 0x90, 0xae, 0x67, 0xf5, 0x2f, 0x5e, 0xeb, 0xa3, 0x92, 0x2a, 0xc3,
+ 0xf9, 0x98, 0x04, 0x8a, 0x08, 0x0c, 0x66, 0x3a, 0xf1, 0x19, 0x37, 0xe8,
+ 0x2c, 0x85, 0x73, 0x3f, 0xa9, 0x7a, 0xf7, 0x5d, 0x1c, 0x91, 0x56, 0x1f,
+ 0xcc, 0xc0, 0x24, 0x50, 0x40, 0x63, 0x31, 0xd7, 0x88, 0xc9, 0xbf, 0x41,
+ 0x64, 0x2b, 0x99, 0xfd, 0x4b, 0xd7, 0xba, 0xe8, 0xe4, 0x8a, 0xb0, 0xfe,
+ 0x66, 0x01, 0x22, 0x82, 0x03, 0x19, 0x8e, 0xbc, 0x46, 0x4d, 0xfa, 0x0b,
+ 0x21, 0x5c, 0xcf, 0xea, 0x5e, 0xbd, 0xd7, 0x47, 0x24, 0x55, 0x87, 0xf3,
+ 0x30, 0x09, 0x14, 0x10, 0x18, 0xcc, 0x75, 0xe2, 0x32, 0x6f, 0xd0, 0x59,
+ 0x0a, 0xe6, 0x7f, 0x52, 0xf5, 0xee, 0xba, 0x39, 0x22, 0xac, 0x3f, 0x99,
+ 0x80, 0x48, 0xa0, 0x80, 0xc6, 0x63, 0xaf, 0x11, 0x93, 0x7e, 0x82, 0xc8,
+ 0x57, 0x33, 0xfa, 0x97, 0xaf, 0x75, 0xd1, 0xc9, 0x15, 0x61, 0xfc, 0xcc,
+ 0x02, 0x45, 0x04, 0x06, 0x33, 0x1d, 0x78, 0x8c, 0x9b, 0xf4, 0x16, 0x42,
+ 0xb9, 0x9f, 0xd4, 0xbd, 0x7b, 0xae, 0x8e, 0x48, 0xab, 0x0f, 0xe6, 0x60,
+ 0x12, 0x28, 0x20, 0x31, 0x98, 0xeb, 0xc4, 0x64, 0xdf, 0xa0, 0xb2, 0x15,
+ 0xcc, 0xfe, 0xa5, 0xeb, 0xdd, 0x74, 0x72, 0x45, 0x58, 0x7f, 0x33, 0x00,
+ 0x91, 0x41, 0x01, 0x8c, 0xc7, 0x5e, 0x23, 0x26, 0xfd, 0x05, 0x90, 0xae,
+ 0x67, 0xf5, 0x2f, 0x5e, 0xeb, 0xa3, 0x92, 0x2a, 0xc3, 0xf9, 0x98, 0x04,
+ 0x8a, 0x08, 0x0c, 0x66, 0x3a, 0xf1, 0x19, 0x37, 0xe8, 0x2c, 0x85, 0x73,
+ 0x3f, 0xa9, 0x7a, 0xf7, 0x5d, 0x1c, 0x91, 0x56, 0x1f, 0xcc, 0xc0, 0x24,
+ 0x50, 0x40, 0x63, 0x31, 0xd7, 0x88, 0xc9, 0xbf, 0x41, 0x64, 0x2b, 0x99,
+ 0xfd, 0x4b, 0xd7, 0xba, 0xe8, 0xe4, 0x8a, 0xb0, 0xfe, 0x66, 0x01, 0x22,
+ 0x82, 0x03, 0x19, 0x8e, 0xbc, 0x46, 0x4d, 0xfa, 0x0b, 0x21, 0x5c, 0xcf,
+ 0xea, 0x5e, 0xbd, 0xd7, 0x47, 0x24, 0x55, 0x87, 0xf3, 0x30, 0x09, 0x14,
+ 0x10, 0x18, 0xcc, 0x75, 0xe2, 0x32, 0x6f, 0xd0, 0x59, 0x0a, 0xe6, 0x7f,
+ 0x52, 0xf5, 0xee, 0xba, 0x39, 0x22, 0xac, 0x3f, 0x99, 0x80, 0x48, 0xa0,
+ 0x80, 0xc6, 0x63, 0xaf, 0x11, 0x93, 0x7e, 0x82, 0xc8, 0x57, 0x33, 0xfa,
+ 0x97, 0xaf, 0x75, 0xd1, 0xc9, 0x15, 0x61, 0xfc, 0xcc, 0x02, 0x45, 0x04,
+ 0x06, 0x33, 0x1d, 0x78, 0x8c, 0x9b, 0xf4, 0x16, 0x42, 0xb9, 0x9f, 0xd4,
+ 0xbd, 0x7b, 0xae, 0x8e, 0x48, 0xab, 0x0f, 0xe6, 0x60, 0x12, 0x28, 0x20,
+ 0x31, 0x98, 0xeb, 0xc4, 0x64, 0xdf, 0xa0, 0xb2, 0x15, 0xcc, 0xfe, 0xa5,
+ 0xeb, 0xdd, 0x74, 0x72, 0x45, 0x58, 0x7f, 0x33, 0x00, 0x91, 0x41, 0x01,
+ 0x8c, 0xc7, 0x5e, 0x23, 0x26, 0xfd, 0x05, 0x90, 0xae, 0x67, 0xf5, 0x2f,
+ 0x5e, 0xeb, 0xa3, 0x92, 0x2a, 0xc3, 0xf9, 0x98, 0x04, 0x8a, 0x08, 0x0c,
+ 0x66, 0x3a, 0xf1, 0x19, 0x37, 0xe8, 0x2c, 0x85, 0x73, 0x3f, 0xa9, 0x7a,
+ 0xf7, 0x5d, 0x1c, 0x91, 0x56, 0x1f, 0xcc, 0xc0, 0x24, 0x50, 0x40, 0x63,
+ 0x31, 0xd7, 0x88, 0xc9, 0xbf, 0x41, 0x64, 0x2b, 0x99, 0xfd, 0x4b, 0xd7,
+ 0xba, 0xe8, 0xe4, 0x8a, 0xb0, 0xfe, 0x66, 0x01, 0x22, 0x82, 0x03, 0x19,
+ 0x8e, 0xbc, 0x46, 0x4d, 0xfa, 0x0b, 0x21, 0x5c, 0xcf, 0xea, 0x5e, 0xbd,
+ 0xd7, 0x47, 0x24, 0x55, 0x87, 0xf3, 0x30, 0x09, 0x14, 0x10, 0x18, 0xcc,
+ 0x75, 0xe2, 0x32, 0x6f, 0xd0, 0x59, 0x0a, 0xe6, 0x7f, 0x52, 0xf5, 0xee,
+ 0xba, 0x39, 0x22, 0xac, 0x3f, 0x99, 0x80, 0x48, 0xa0, 0x80, 0xc6, 0x63,
+ 0xaf, 0x11, 0x93, 0x7e, 0x82, 0xc8, 0x57, 0x33, 0xfa, 0x97, 0xaf, 0x75,
+ 0xd1, 0xc9, 0x15, 0x61, 0xfc, 0xcc, 0x02, 0x45, 0x04, 0x06, 0x33, 0x1d,
+ 0x78, 0x8c, 0x9b, 0xf4, 0x16, 0x42, 0xb9, 0x9f, 0xd4, 0xbd, 0x7b, 0xae,
+ 0x8e, 0x48, 0xab, 0x0f, 0xe6, 0x60, 0x12, 0x28, 0x20, 0x31, 0x98, 0xeb,
+ 0xc4, 0x64, 0xdf, 0xa0, 0xb2, 0x15, 0xcc, 0xfe, 0xa5, 0xeb, 0xdd, 0x74,
+ 0x72, 0x45, 0x58, 0x7f, 0x33, 0x00, 0x91, 0x41, 0x01, 0x8c, 0xc7, 0x5e,
+ 0x23, 0x26, 0xfd, 0x05, 0x90, 0xae, 0x67, 0xf5, 0x2f, 0x5e, 0xeb, 0xa3,
+ 0x92, 0x2a, 0xc3, 0xf9, 0x98, 0x04, 0x8a, 0x08, 0x0c, 0x66, 0x3a, 0xf1,
+ 0x19, 0x37, 0xe8, 0x2c, 0x85, 0x73, 0x3f, 0xa9, 0x7a, 0xf7, 0x5d, 0x1c,
+ 0x91, 0x56, 0x1f, 0xcc, 0xc0, 0x24, 0x50, 0x40, 0x63, 0x31, 0xd7, 0x88,
+ 0xc9, 0xbf, 0x41, 0x64, 0x2b, 0x99, 0xfd, 0x4b, 0xd7, 0xba, 0xe8, 0xe4,
+ 0x8a, 0xb0, 0xfe, 0x66, 0x01, 0x22, 0x82, 0x03, 0x19, 0x8e, 0xbc, 0x46,
+ 0x4d, 0xfa, 0x0b, 0x21, 0x5c, 0xcf, 0xea, 0x5e, 0xbd, 0xd7, 0x47, 0x24,
+ 0x55, 0x87, 0xf3, 0x30, 0x09, 0x14, 0x10, 0x18, 0xcc, 0x75, 0xe2, 0x32,
+ 0x6f, 0xd0, 0x59, 0x0a, 0xe6, 0x7f, 0x52, 0xf5, 0xee, 0xba, 0x39, 0x22,
+ 0xac, 0x3f, 0x99, 0x80, 0x48, 0xa0, 0x80, 0xc6, 0x63, 0xaf, 0x11, 0x93,
+ 0x7e, 0x82, 0xc8, 0x57, 0x33, 0xfa, 0x97, 0xaf, 0x75, 0xd1, 0xc9, 0x15,
+ 0x61, 0xfc, 0xcc, 0x02, 0x45, 0x04, 0x06, 0x33, 0x1d, 0x78, 0x8c, 0x9b,
+ 0xf4, 0x16, 0x42, 0xb9, 0x9f, 0xd4, 0xbd, 0x7b, 0xae, 0x8e, 0x48, 0xab,
+ 0x0f, 0xe6, 0x60, 0x12, 0x28, 0x20, 0x31, 0x98, 0xeb, 0xc4, 0x64, 0xdf,
+ 0xa0, 0xb2, 0x15, 0xcc, 0xfe, 0xa5, 0xeb, 0xdd, 0x74, 0x72, 0x45, 0x58,
+ 0x7f, 0x33, 0x00, 0x91, 0x41, 0x01, 0x8c, 0xc7, 0x5e, 0x23, 0x26, 0xfd,
+ 0x05, 0x90, 0xae, 0x67, 0xf5, 0x2f, 0x5e, 0xeb, 0xa3, 0x92, 0x2a, 0xc3,
+ 0xf9, 0x98, 0x04, 0x8a, 0x08, 0x0c, 0x66, 0x3a, 0xf1, 0x19, 0x37, 0xe8,
+ 0x2c, 0x85, 0x73, 0x3f, 0xa9, 0x7a, 0xf7, 0x5d, 0x1c, 0x91, 0x56, 0x1f,
+ 0xcc, 0xc0, 0x24, 0x50, 0x40, 0x63, 0x31, 0xd7, 0x88, 0xc9, 0xbf, 0x41,
+ 0x64, 0x2b, 0x99, 0xfd, 0x4b, 0xd7, 0xba, 0xe8, 0xe4, 0x8a, 0xb0, 0xfe,
+ 0x66, 0x01, 0x22, 0x82, 0x03, 0x19, 0x8e, 0xbc, 0x46, 0x4d, 0xfa, 0x0b,
+ 0x21, 0x5c, 0xcf, 0xea, 0x5e, 0xbd, 0xd7, 0x47, 0x24, 0x55, 0x87, 0xf3,
+ 0x30, 0x09, 0x14, 0x10, 0x18, 0xcc, 0x75, 0xe2, 0x32, 0x6f, 0xd0, 0x59,
+ 0x0a, 0xe6, 0x7f, 0x52, 0xf5, 0xee, 0xba, 0x39, 0x22, 0xac, 0x3f, 0x99,
+ 0x80, 0x48, 0xa0, 0x80, 0xc6, 0x63, 0xaf, 0x11, 0x93, 0x7e, 0x82, 0xc8,
+ 0x57, 0x33, 0xfa, 0x97, 0xaf, 0x75, 0xd1, 0xc9, 0x15, 0x61, 0xfc, 0xcc,
+ 0x02, 0x45, 0x04, 0x06, 0x33, 0x1d, 0x78, 0x8c, 0x9b, 0xf4, 0x16, 0x42,
+ 0xb9, 0x9f, 0xd4, 0xbd, 0x7b, 0xae, 0x8e, 0x48, 0xab, 0x0f, 0xe6, 0x60,
+ 0x12, 0x28, 0x20, 0x31, 0x98, 0xeb, 0xc4, 0x64, 0xdf, 0xa0, 0xb2, 0x15,
+ 0xcc, 0xfe, 0xa5, 0xeb, 0xdd, 0x74, 0x72, 0x45, 0x58, 0x7f, 0x33, 0x00,
+ 0x91, 0x41, 0x01, 0x8c, 0xc7, 0x5e, 0x23, 0x26, 0xfd, 0x05, 0x90, 0xae,
+ 0x67, 0xf5, 0x2f, 0x5e, 0xeb, 0xa3, 0x92, 0x2a, 0xc3, 0xf9, 0x98, 0x04,
+ 0x8a, 0x08, 0x0c, 0x66, 0x3a, 0xf1, 0x19, 0x37, 0xe8, 0x2c, 0x85, 0x73,
+ 0x3f, 0xa9, 0x7a, 0xf7, 0x5d, 0x1c, 0x91, 0x56, 0x1f, 0xcc, 0xc0, 0x24,
+ 0x50, 0x40, 0x63, 0x31, 0xd7, 0x88, 0xc9, 0xbf, 0x41, 0x64, 0x2b, 0x99,
+ 0xfd, 0x4b, 0xd7, 0xba, 0xe8, 0xe4, 0x8a, 0xb0, 0xfe, 0x66, 0x01, 0x22,
+ 0x82, 0x03, 0x19, 0x8e, 0xbc, 0x46, 0x4d, 0xfa, 0x0b, 0x21, 0x5c, 0xcf,
+ 0xea, 0x5e, 0xbd, 0xd7, 0x47, 0x24, 0x55, 0x87, 0xf3, 0x30, 0x09, 0x14,
+ 0x10, 0x18, 0xcc, 0x75, 0xe2, 0x32, 0x6f, 0xd0, 0x59, 0x0a, 0xe6, 0x7f,
+ 0x52, 0xf5, 0xee, 0xba, 0x39, 0x22, 0xac, 0x3f, 0x99, 0x80, 0x48, 0xa0,
+ 0x80, 0xc6, 0x63, 0xaf, 0x11, 0x93, 0x7e, 0x82, 0xc8, 0x57, 0x33, 0xfa,
+ 0x97, 0xaf, 0x75, 0xd1, 0xc9, 0x15, 0x61, 0xfc, 0xcc, 0x02, 0x45, 0x04,
+ 0x06, 0x33, 0x1d, 0x78, 0x8c, 0x9b, 0xf4, 0x16, 0x42, 0xb9, 0x9f, 0xd4,
+ 0xbd, 0x7b, 0xae, 0x8e, 0x48, 0xab, 0x0f, 0xe6, 0x60, 0x12, 0x28, 0x20,
+ 0x31, 0x98, 0xeb, 0xc4, 0x64, 0xdf, 0xa0, 0xb2, 0x15, 0xcc, 0xfe, 0xa5,
+ 0xeb, 0xdd, 0x74, 0x72, 0x45, 0x58, 0x7f, 0x33, 0x00, 0x91, 0x41, 0x01,
+ 0x8c, 0xc7, 0x5e, 0x23, 0x26, 0xfd, 0x05, 0x90, 0xae, 0x67, 0xf5, 0x2f,
+ 0x5e, 0xeb, 0xa3, 0x92, 0x2a, 0xc3, 0xf9, 0x98, 0x04, 0x8a, 0x08, 0x0c,
+ 0x66, 0x3a, 0xf1, 0x19, 0x37, 0xe8, 0x2c, 0x85, 0x73, 0x3f, 0xa9, 0x7a,
+ 0xf7, 0x5d, 0x1c, 0x91, 0x56, 0x1f, 0xcc, 0xc0, 0x24, 0x50, 0x40, 0x63,
+ 0x31, 0xd7, 0x88, 0xc9, 0xbf, 0x41, 0x64, 0x2b, 0x99, 0xfd, 0x4b, 0xd7,
+ 0xba, 0xe8, 0xe4, 0x8a, 0xb0, 0xfe, 0x66, 0x01, 0x22, 0x82, 0x03, 0x19,
+ 0x8e, 0xbc, 0x46, 0x4d, 0xfa, 0x0b, 0x21, 0x5c, 0xcf, 0xea, 0x5e, 0xbd,
+ 0xd7, 0x47, 0x24, 0x55, 0x87, 0xf3, 0x30, 0x09, 0x14, 0x10, 0x18, 0xcc,
+ 0x75, 0xe2, 0x32, 0x6f, 0xd0, 0x59, 0x0a, 0xe6, 0x7f, 0x52, 0xf5, 0xee,
+ 0xba, 0x39, 0x22, 0xac, 0x3f, 0x99, 0x80, 0x48, 0xa0, 0x80, 0xc6, 0x63,
+ 0xaf, 0x11, 0x93, 0x7e, 0x82, 0xc8, 0x57, 0x33, 0xfa, 0x97, 0xaf, 0x75,
+ 0xd1, 0xc9, 0x15, 0x61, 0xfc, 0xcc, 0x02, 0x45, 0x04, 0x06, 0x33, 0x1d,
+ 0x78, 0x8c, 0x9b, 0xf4, 0x16, 0x42, 0xb9, 0x9f, 0xd4, 0xbd, 0x7b, 0xae,
+ 0x8e, 0x48, 0xab, 0x0f, 0xe6, 0x60, 0x12, 0x28, 0x20, 0x31, 0x98, 0xeb,
+ 0xc4, 0x64, 0xdf, 0xa0, 0xb2, 0x15, 0xcc, 0xfe, 0xa5, 0xeb, 0xdd, 0x74,
+ 0x72, 0x45, 0x58, 0x7f, 0x33, 0x00, 0x91, 0x41, 0x01, 0x8c, 0xc7, 0x5e,
+ 0x23, 0x26, 0xfd, 0x05, 0x90, 0xae, 0x67, 0xf5, 0x2f, 0x5e, 0xeb, 0xa3,
+ 0x92, 0x2a, 0xc3, 0xf9, 0x98, 0x04, 0x8a, 0x08, 0x0c, 0x66, 0x3a, 0xf1,
+ 0x19, 0x37, 0xe8, 0x2c, 0x85, 0x73, 0x3f, 0xa9, 0x7a, 0xf7, 0x5d, 0x1c,
+ 0x91, 0x56, 0x1f, 0xcc, 0xc0, 0x24, 0x50, 0x40, 0x63, 0x31, 0xd7, 0x88,
+ 0xc9, 0xbf, 0x41, 0x64, 0x2b, 0x99, 0xfd, 0x4b, 0xd7, 0xba, 0xe8, 0xe4,
+ 0x8a, 0xb0, 0xfe, 0x66, 0x01, 0x22, 0x82, 0x03, 0x19, 0x8e, 0xbc, 0x46,
+ 0x4d, 0xfa, 0x0b, 0x21, 0x5c, 0xcf, 0xea, 0x5e, 0xbd, 0xd7, 0x47, 0x24,
+ 0x55, 0x87, 0xf3, 0x30, 0x09, 0x14, 0x10, 0x18, 0xcc, 0x75, 0xe2, 0x32,
+ 0x6f, 0xd0, 0x59, 0x0a, 0xe6, 0x7f, 0x52, 0xf5, 0xee, 0xba, 0x39, 0x22,
+ 0xac, 0x3f, 0x99, 0x80, 0x48, 0xa0, 0x80, 0xc6, 0x63, 0xaf, 0x11, 0x93,
+ 0x7e, 0x82, 0xc8, 0x57, 0x33, 0xfa, 0x97, 0xaf, 0x75, 0xd1, 0xc9, 0x15,
+ 0x61, 0xfc, 0xcc, 0x02, 0x45, 0x04, 0x06, 0x33, 0x1d, 0x78, 0x8c, 0x9b,
+ 0xf4, 0x16, 0x42, 0xb9, 0x9f, 0xd4, 0xbd, 0x7b, 0xae, 0x8e, 0x48, 0xab,
+ 0x0f, 0xe6, 0x60, 0x12, 0x28, 0x20, 0x31, 0x98, 0xeb, 0xc4, 0x64, 0xdf,
+ 0xa0, 0xb2, 0x15, 0xcc, 0xfe, 0xa5, 0xeb, 0xdd, 0x74, 0x72, 0x45, 0x58,
+ 0x7f, 0x33, 0x00, 0x91, 0x41, 0x01, 0x8c, 0xc7, 0x5e, 0x23, 0x26, 0xfd,
+ 0x05, 0x90, 0xae, 0x67, 0xf5, 0x2f, 0x5e, 0xeb, 0xa3, 0x92, 0x2a, 0xc3,
+ 0xf9, 0x98, 0x04, 0x8a, 0x08, 0x0c, 0x66, 0x3a, 0xf1, 0x19, 0x37, 0xe8,
+ 0x2c, 0x85, 0x73, 0x3f, 0xa9, 0x7a, 0xf7, 0x5d, 0x1c, 0x91, 0x56, 0x1f,
+ 0xcc, 0xc0, 0x24, 0x50, 0x40, 0x63, 0x31, 0xd7, 0x88, 0xc9, 0xbf, 0x41,
+ 0x64, 0x2b, 0x99, 0xfd, 0x4b, 0xd7, 0xba, 0xe8, 0xe4, 0x8a, 0xb0, 0xfe,
+ 0x66, 0x01, 0x22, 0x82, 0x03, 0x19, 0x8e, 0xbc, 0x46, 0x4d, 0xfa, 0x0b,
+ 0x21, 0x5c, 0xcf, 0xea, 0x5e, 0xbd, 0xd7, 0x47, 0x24, 0x55, 0x87, 0xf3,
+ 0x30, 0x09, 0x14, 0x10, 0x18, 0xcc, 0x75, 0xe2, 0x32, 0x6f, 0xd0, 0x59,
+ 0x0a, 0xe6, 0x7f, 0x52, 0xf5, 0xee, 0xba, 0x39, 0x22, 0xac, 0x3f, 0x99,
+ 0x80, 0x48, 0xa0, 0x80, 0xc6, 0x63, 0xaf, 0x11, 0x93, 0x7e, 0x82, 0xc8,
+ 0x57, 0x33, 0xfa, 0x97, 0xaf, 0x75, 0xd1, 0xc9, 0x15, 0x61, 0xfc, 0xcc,
+ 0x02, 0x45, 0x04, 0x06, 0x33, 0x1d, 0x78, 0x8c, 0x9b, 0xf4, 0x16, 0x42,
+ 0xb9, 0x9f, 0xd4, 0xbd, 0x7b, 0xae, 0x8e, 0x48, 0xab, 0x0f, 0xe6, 0x60,
+ 0x12, 0x28, 0x20, 0x31, 0x98, 0xeb, 0xc4, 0x64, 0xdf, 0xa0, 0xb2, 0x15,
+ 0xcc, 0xfe, 0xa5, 0xeb, 0xdd, 0x74, 0x72, 0x45, 0x58, 0x7f, 0x33, 0x00,
+ 0x91, 0x41, 0x01, 0x8c, 0xc7, 0x5e, 0x23, 0x26, 0xfd, 0x05, 0x90, 0xae,
+ 0x67, 0xf5, 0x2f, 0x5e, 0xeb, 0xa3, 0x92, 0x2a, 0xc3, 0xf9, 0x98, 0x04,
+ 0x8a, 0x08, 0x0c, 0x66, 0x3a, 0xf1, 0x19, 0x37, 0xe8, 0x2c, 0x85, 0x73,
+ 0x3f, 0xa9, 0x7a, 0xf7, 0x5d, 0x1c, 0x91, 0x56, 0x1f, 0xcc, 0xc0, 0x24,
+ 0x50, 0x40, 0x63, 0x31, 0xd7, 0x88, 0xc9, 0xbf, 0x41, 0x64, 0x2b, 0x99,
+ 0xfd, 0x4b, 0xd7, 0xba, 0xe8, 0xe4, 0x8a, 0xb0, 0xfe, 0x66, 0x01, 0x22,
+ 0x82, 0x03, 0x19, 0x8e, 0xbc, 0x46, 0x4d, 0xfa, 0x0b, 0x21, 0x5c, 0xcf,
+ 0xea, 0x5e, 0xbd, 0xd7, 0x47, 0x24, 0x55, 0x87, 0xf3, 0x30, 0x09, 0x14,
+ 0x10, 0x18, 0xcc, 0x75, 0xe2, 0x32, 0x6f, 0xd0, 0x59, 0x0a, 0xe6, 0x7f,
+ 0x52, 0xf5, 0xee, 0xba, 0x39, 0x22, 0xac, 0x3f, 0x99, 0x80, 0x48, 0xa0,
+ 0x80, 0xc6, 0x63, 0xaf, 0x11, 0x93, 0x7e, 0x82, 0xc8, 0x57, 0x33, 0xfa,
+ 0x97, 0xaf, 0x75, 0xd1, 0xc9, 0x15, 0x61, 0xfc, 0xcc, 0x02, 0x45, 0x04,
+ 0x06, 0x33, 0x1d, 0x78, 0x8c, 0x9b, 0xf4, 0x16, 0x42, 0xb9, 0x9f, 0xd4,
+ 0xbd, 0x7b, 0xae, 0x8e, 0x48, 0xab, 0x0f, 0xe6, 0x60, 0x12, 0x28, 0x20,
+ 0x31, 0x98, 0xeb, 0xc4, 0x64, 0xdf, 0xa0, 0xb2, 0x15, 0xcc, 0xfe, 0xa5,
+ 0xeb, 0xdd, 0x74, 0x72, 0x45, 0x58, 0x7f, 0x33, 0x00, 0x91, 0x41, 0x01,
+ 0x8c, 0xc7, 0x5e, 0x23, 0x26, 0xfd, 0x05, 0x90, 0xae, 0x67, 0xf5, 0x2f,
+ 0x5e, 0xeb, 0xa3, 0x92, 0x2a, 0xc3, 0xf9, 0x98, 0x04, 0x8a, 0x08, 0x0c,
+ 0x66, 0x3a, 0xf1, 0x19, 0x37, 0xe8, 0x2c, 0x85, 0x73, 0x3f, 0xa9, 0x7a,
+ 0xf7, 0x5d, 0x1c, 0x91, 0x56, 0x1f, 0xcc, 0xc0, 0x24, 0x50, 0x40, 0x63,
+ 0x31, 0xd7, 0x88, 0xc9, 0xbf, 0x41, 0x64, 0x2b, 0x99, 0xfd, 0x4b, 0xd7,
+ 0xba, 0xe8, 0xe4, 0x8a, 0xb0, 0xfe, 0x66, 0x01, 0x22, 0x82, 0x03, 0x19,
+ 0x8e, 0xbc, 0x46, 0x4d, 0xfa, 0x0b, 0x21, 0x5c, 0xcf, 0xea, 0x5e, 0xbd,
+ 0xd7, 0x47, 0x24, 0x55, 0x87, 0xf3, 0x30, 0x09, 0x14, 0x10, 0x18, 0xcc,
+ 0x75, 0xe2, 0x32, 0x6f, 0xd0, 0x59, 0x0a, 0xe6, 0x7f, 0x52, 0xf5, 0xee,
+ 0xba, 0x39, 0x22, 0xac, 0x3f, 0x99, 0x80, 0x48, 0xa0, 0x80, 0xc6, 0x63,
+ 0xaf, 0x11, 0x93, 0x7e, 0x82, 0xc8, 0x57, 0x33, 0xfa, 0x97, 0xaf, 0x75,
+ 0xd1, 0xc9, 0x15, 0x61, 0xfc, 0xcc, 0x02, 0x45, 0x04, 0x06, 0x33, 0x1d,
+ 0x78, 0x8c, 0x9b, 0xf4, 0x16, 0x42, 0xb9, 0x9f, 0xd4, 0xbd, 0x7b, 0xae,
+ 0x8e, 0x48, 0xab, 0x0f, 0xe6, 0x60, 0x12, 0x28, 0x20, 0x31, 0x98, 0xeb,
+ 0xc4, 0x64, 0xdf, 0xa0, 0xb2, 0x15, 0xcc, 0xfe, 0xa5, 0xeb, 0xdd, 0x74,
+ 0x72, 0x45, 0x58, 0x7f, 0x33, 0x00, 0x91, 0x41, 0x01, 0x8c, 0xc7, 0x5e,
+ 0x23, 0x26, 0xfd, 0x05, 0x90, 0xae, 0x67, 0xf5, 0x2f, 0x5e, 0xeb, 0xa3,
+ 0x92, 0x2a, 0xc3, 0xf9, 0x98, 0x04, 0x8a, 0x08, 0x0c, 0x66, 0x3a, 0xf1,
+ 0x19, 0x37, 0xe8, 0x2c, 0x85, 0x73, 0x3f, 0xa9, 0x7a, 0xf7, 0x5d, 0x1c,
+ 0x91, 0x56, 0x1f, 0xcc, 0xc0, 0x24, 0x50, 0x40, 0x63, 0x31, 0xd7, 0x88,
+ 0xc9, 0xbf, 0x41, 0x64, 0x2b, 0x99, 0xfd, 0x4b, 0xd7, 0xba, 0xe8, 0xe4,
+ 0x8a, 0xb0, 0xfe, 0x66, 0x01, 0x22, 0x82, 0x03, 0x19, 0x8e, 0xbc, 0x46,
+ 0x4d, 0xfa, 0x0b, 0x21, 0x5c, 0xcf, 0xea, 0x5e, 0xbd, 0xd7, 0x47, 0x24,
+ 0x55, 0x87, 0xf3, 0x30, 0x09, 0x14, 0x10, 0x18, 0xcc, 0x75, 0xe2, 0x32,
+ 0x6f, 0xd0, 0x59, 0x0a, 0xe6, 0x7f, 0x52, 0xf5, 0xee, 0xba, 0x39, 0x22,
+ 0xac, 0x3f, 0x99, 0x80, 0x48, 0xa0, 0x80, 0xc6, 0x63, 0xaf, 0x11, 0x93,
+ 0x7e, 0x82, 0xc8, 0x57, 0x33, 0xfa, 0x97, 0xaf, 0x75, 0xd1, 0xc9, 0x15,
+ 0x61, 0xfc, 0xcc, 0x02, 0x45, 0x04, 0x06, 0x33, 0x1d, 0x78, 0x8c, 0x9b,
+ 0xf4, 0x16, 0x42, 0xb9, 0x9f, 0xd4, 0xbd, 0x7b, 0xae, 0x8e, 0x48, 0xab,
+ 0x0f, 0xe6, 0x60, 0x12, 0x28, 0x20, 0x31, 0x98, 0xeb, 0xc4, 0x64, 0xdf,
+ 0xa0, 0xb2, 0x15, 0xcc, 0xfe, 0xa5, 0xeb, 0xdd, 0x74, 0x72, 0x45, 0x58,
+ 0x7f, 0x33, 0x00, 0x91, 0x41, 0x01, 0x8c, 0xc7, 0x5e, 0x23, 0x26, 0xfd,
+ 0x05, 0x90, 0xae, 0x67, 0xf5, 0x2f, 0x5e, 0xeb, 0xa3, 0x92, 0x2a, 0xc3,
+ 0xf9, 0x98, 0x04, 0x8a, 0x08, 0x0c, 0x66, 0x3a, 0xf1, 0x19, 0x37, 0xe8,
+ 0x2c, 0x85, 0x73, 0x3f, 0xa9, 0x7a, 0xf7, 0x5d, 0x1c, 0x91, 0x56, 0x1f,
+ 0xcc, 0xc0, 0x24, 0x50, 0x40, 0x63, 0x31, 0xd7, 0x88, 0xc9, 0xbf, 0x41,
+ 0x64, 0x2b, 0x99, 0xfd, 0x4b, 0xd7, 0xba, 0xe8, 0xe4, 0x8a, 0xb0, 0xfe,
+ 0x66, 0x01, 0x22, 0x82, 0x03, 0x19, 0x8e, 0xbc, 0x46, 0x4d, 0xfa, 0x0b,
+ 0x21, 0x5c, 0xcf, 0xea, 0x5e, 0xbd, 0xd7, 0x47, 0x24, 0x55, 0x87, 0xf3,
+ 0x30, 0x09, 0x14, 0x10, 0x18, 0xcc, 0x75, 0xe2, 0x32, 0x6f, 0xd0, 0x59,
+ 0x0a, 0xe6, 0x7f, 0x52, 0xf5, 0xee, 0xba, 0x39, 0x22, 0xac, 0x3f, 0x99,
+ 0x80, 0x48, 0xa0, 0x80, 0xc6, 0x63, 0xaf, 0x11, 0x93, 0x7e, 0x82, 0xc8,
+ 0x57, 0x33, 0xfa, 0x97, 0xaf, 0x75, 0xd1, 0xc9, 0x15, 0x61, 0xfc, 0xcc,
+ 0x02, 0x45, 0x04, 0x06, 0x33, 0x1d, 0x78, 0x8c, 0x9b, 0xf4, 0x16, 0x42,
+ 0xb9, 0x9f, 0xd4, 0xbd, 0x7b, 0xae, 0x8e, 0x48, 0xab, 0x0f, 0xe6, 0x60,
+ 0x12, 0x28, 0x20, 0x31, 0x98, 0xeb, 0xc4, 0x64, 0xdf, 0xa0, 0xb2, 0x15,
+ 0xcc, 0xfe, 0xa5, 0xeb, 0xdd, 0x74, 0x72, 0x45, 0x58, 0x7f, 0x33, 0x00,
+ 0x91, 0x41, 0x01, 0x8c, 0xc7, 0x5e, 0x23, 0x26, 0xfd, 0x05, 0x90, 0xae,
+ 0x67, 0xf5, 0x2f, 0x5e, 0xeb, 0xa3, 0x92, 0x2a, 0xc3, 0xf9, 0x98, 0x04,
+ 0x8a, 0x08, 0x0c, 0x66, 0x3a, 0xf1, 0x19, 0x37, 0xe8, 0x2c, 0x85, 0x73,
+ 0x3f, 0xa9, 0x7a, 0xf7, 0x5d, 0x1c, 0x91, 0x56, 0x1f, 0xcc, 0xc0, 0x24,
+ 0x50, 0x40, 0x63, 0x31, 0xd7, 0x88, 0xc9, 0xbf, 0x41, 0x64, 0x2b, 0x99,
+ 0xfd, 0x4b, 0xd7, 0xba, 0xe8, 0xe4, 0x8a, 0xb0, 0xfe, 0x66, 0x01, 0x22,
+ 0x82, 0x03, 0x19, 0x8e, 0xbc, 0x46, 0x4d, 0xfa, 0x0b, 0x21, 0x5c, 0xcf,
+ 0xea, 0x5e, 0xbd, 0xd7, 0x47, 0x24, 0x55, 0x87, 0xf3, 0x30, 0x09, 0x14,
+ 0x10, 0x18, 0xcc, 0x75, 0xe2, 0x32, 0x6f, 0xd0, 0x59, 0x0a, 0xe6, 0x7f,
+ 0x52, 0xf5, 0xee, 0xba, 0x39, 0x22, 0xac, 0x3f, 0x99, 0x80, 0x48, 0xa0,
+ 0x80, 0xc6, 0x63, 0xaf, 0x11, 0x93, 0x7e, 0x82, 0xc8, 0x57, 0x33, 0xfa,
+ 0x97, 0xaf, 0x75, 0xd1, 0xc9, 0x15, 0x61, 0xfc, 0xcc, 0x02, 0x45, 0x04,
+ 0x06, 0x33, 0x1d, 0x78, 0x8c, 0x9b, 0xf4, 0x16, 0x42, 0xb9, 0x9f, 0xd4,
+ 0xbd, 0x7b, 0xae, 0x8e, 0x48, 0xab, 0x0f, 0xe6, 0x60, 0x12, 0x28, 0x20,
+ 0x31, 0x98, 0xeb, 0xc4, 0x64, 0xdf, 0xa0, 0xb2, 0x15, 0xcc, 0xfe, 0xa5,
+ 0xeb, 0xdd, 0x74, 0x72, 0x45, 0x58, 0x7f, 0x33, 0x00, 0x91, 0x41, 0x01,
+ 0x8c, 0xc7, 0x5e, 0x23, 0x26, 0xfd, 0x05, 0x90, 0xae, 0x67, 0xf5, 0x2f,
+ 0x5e, 0xeb, 0xa3, 0x92, 0x2a, 0xc3, 0xf9, 0x98, 0x04, 0x8a, 0x08, 0x0c,
+ 0x66, 0x3a, 0xf1, 0x19, 0x37, 0xe8, 0x2c, 0x85, 0x73, 0x3f, 0xa9, 0x7a,
+ 0xf7, 0x5d, 0x1c, 0x91, 0x56, 0x1f, 0xcc, 0xc0, 0x24, 0x50, 0x40, 0x63,
+ 0x31, 0xd7, 0x88, 0xc9, 0xbf, 0x41, 0x64, 0x2b, 0x99, 0xfd, 0x4b, 0xd7,
+ 0xba, 0xe8, 0xe4, 0x8a, 0xb0, 0xfe, 0x66, 0x01, 0x22, 0x82, 0x03, 0x19,
+ 0x8e, 0xbc, 0x46, 0x4d, 0xfa, 0x0b, 0x21, 0x5c, 0xcf, 0xea, 0x5e, 0xbd,
+ 0xd7, 0x47, 0x24, 0x55, 0x87, 0xf3, 0x30, 0x09, 0x14, 0x10, 0x18, 0xcc,
+ 0x75, 0xe2, 0x32, 0x6f, 0xd0, 0x59, 0x0a, 0xe6, 0x7f, 0x52, 0xf5, 0xee,
+ 0xba, 0x39, 0x22, 0xac, 0x3f, 0x99, 0x80, 0x48, 0xa0, 0x80, 0xc6, 0x63,
+ 0xaf, 0x11, 0x93, 0x7e, 0x82, 0xc8, 0x57, 0x33, 0xfa, 0x97, 0xaf, 0x75,
+ 0xd1, 0xc9, 0x15, 0x61, 0xfc, 0xcc, 0x02, 0x45, 0x04, 0x06, 0x33, 0x1d,
+ 0x78, 0x8c, 0x9b, 0xf4, 0x16, 0x42, 0xb9, 0x9f, 0xd4, 0xbd, 0x7b, 0xae,
+ 0x8e, 0x48, 0xab, 0x0f, 0xe6, 0x60, 0x12, 0x28, 0x20, 0x31, 0x98, 0xeb,
+ 0xc4, 0x64, 0xdf, 0xa0, 0xb2, 0x15, 0xcc, 0xfe, 0xa5, 0xeb, 0xdd, 0x74,
+ 0x72, 0x45, 0x58, 0x7f, 0x33, 0x00, 0x91, 0x41, 0x01, 0x8c, 0xc7, 0x5e,
+ 0x23, 0x26, 0xfd, 0x05, 0x90, 0xae, 0x67, 0xf5, 0x2f, 0x5e, 0xeb, 0xa3,
+ 0x92, 0x2a, 0xc3, 0xf9, 0x98, 0x04, 0x8a, 0x08, 0x0c, 0x66, 0x3a, 0xf1,
+ 0x19, 0x37, 0xe8, 0x2c, 0x85, 0x73, 0x3f, 0xa9, 0x7a, 0xf7, 0x5d, 0x1c,
+ 0x91, 0x56, 0x1f, 0xcc, 0xc0, 0x24, 0x50, 0x40, 0x63, 0x31, 0xd7, 0x88,
+ 0xc9, 0xbf, 0x41, 0x64, 0x2b, 0x99, 0xfd, 0x4b, 0xd7, 0xba, 0xe8, 0xe4,
+ 0x8a, 0xb0, 0xfe, 0x66, 0x01, 0x22, 0x82, 0x03, 0x19, 0x8e, 0xbc, 0x46,
+ 0x4d, 0xfa, 0x0b, 0x21, 0x5c, 0xcf, 0xea, 0x5e, 0xbd, 0xd7, 0x47, 0x24,
+ 0x55, 0x87, 0xf3, 0x30, 0x09, 0x14, 0x10, 0x18, 0xcc, 0x75, 0xe2, 0x32,
+ 0x6f, 0xd0, 0x59, 0x0a, 0xe6, 0x7f, 0x52, 0xf5, 0xee, 0xba, 0x39, 0x22,
+ 0xac, 0x3f, 0x99, 0x80, 0x48, 0xa0, 0x80, 0xc6, 0x63, 0xaf, 0x11, 0x93,
+ 0x7e, 0x82, 0xc8, 0x57, 0x33, 0xfa, 0x97, 0xaf, 0x75, 0xd1, 0xc9, 0x15,
+ 0x61, 0xfc, 0xcc, 0x02, 0x45, 0x04, 0x06, 0x33, 0x1d, 0x78, 0x8c, 0x9b,
+ 0xf4, 0x16, 0x42, 0xb9, 0x9f, 0xd4, 0xbd, 0x7b, 0xae, 0x8e, 0x48, 0xab,
+ 0x0f, 0xe6, 0x60, 0x12, 0x28, 0x20, 0x31, 0x98, 0xeb, 0xc4, 0x64, 0xdf,
+ 0xa0, 0xb2, 0x15, 0xcc, 0xfe, 0xa5, 0xeb, 0xdd, 0x74, 0x72, 0x45, 0x58,
+ 0x7f, 0x33, 0x00, 0x91, 0x41, 0x01, 0x8c, 0xc7, 0x5e, 0x23, 0x26, 0xfd,
+ 0x05, 0x90, 0xae, 0x67, 0xf5, 0x2f, 0x5e, 0xeb, 0xa3, 0x92, 0x2a, 0xc3,
+ 0xf9, 0x98, 0x04, 0x8a, 0x08, 0x0c, 0x66, 0x3a, 0xf1, 0x19, 0x37, 0xe8,
+ 0x2c, 0x85, 0x73, 0x3f, 0xa9, 0x7a, 0xf7, 0x5d, 0x1c, 0x91, 0x56, 0x1f,
+ 0xcc, 0xc0, 0x24, 0x50, 0x40, 0x63, 0x31, 0xd7, 0x88, 0xc9, 0xbf, 0x41,
+ 0x64, 0x2b, 0x99, 0xfd, 0x4b, 0xd7, 0xba, 0xe8, 0xe4, 0x8a, 0xb0, 0xfe,
+ 0x66, 0x01, 0x22, 0x82, 0x03, 0x19, 0x8e, 0xbc, 0x46, 0x4d, 0xfa, 0x0b,
+ 0x21, 0x5c, 0xcf, 0xea, 0x5e, 0xbd, 0xd7, 0x47, 0x24, 0x55, 0x87, 0xf3,
+ 0x30, 0x09, 0x14, 0x10, 0x18, 0xcc, 0x75, 0xe2, 0x32, 0x6f, 0xd0, 0x59,
+ 0x0a, 0xe6, 0x7f, 0x52, 0xf5, 0xee, 0xba, 0x39, 0x22, 0xac, 0x3f, 0x99,
+ 0x80, 0x48, 0xa0, 0x80, 0xc6, 0x63, 0xaf, 0x11, 0x93, 0x7e, 0x82, 0xc8,
+ 0x57, 0x33, 0xfa, 0x97, 0xaf, 0x75, 0xd1, 0xc9, 0x15, 0x61, 0xfc, 0xcc,
+ 0x02, 0x45, 0x04, 0x06, 0x33, 0x1d, 0x78, 0x8c, 0x9b, 0xf4, 0x16, 0x42,
+ 0xb9, 0x9f, 0xd4, 0xbd, 0x7b, 0xae, 0x8e, 0x48, 0xab, 0x0f, 0xe6, 0x60,
+ 0x12, 0x28, 0x20, 0x31, 0x98, 0xeb, 0xc4, 0x64, 0xdf, 0xa0, 0xb2, 0x15,
+ 0xcc, 0xfe, 0xa5, 0xeb, 0xdd, 0x74, 0x72, 0x45, 0x58, 0x7f, 0x33, 0x00,
+ 0x91, 0x41, 0x01, 0x8c, 0xc7, 0x5e, 0x23, 0x26, 0xfd, 0x05, 0x90, 0xae,
+ 0x67, 0xf5, 0x2f, 0x5e, 0xeb, 0xa3, 0x92, 0x2a, 0xc3, 0xf9, 0x98, 0x04,
+ 0x8a, 0x08, 0x0c, 0x66, 0x3a, 0xf1, 0x19, 0x37, 0xe8, 0x2c, 0x85, 0x73,
+ 0x3f, 0xa9, 0x7a, 0xf7, 0x5d, 0x1c, 0x91, 0x56, 0x1f, 0xcc, 0xc0, 0x24,
+ 0x50, 0x40, 0x63, 0x31, 0xd7, 0x88, 0xc9, 0xbf, 0x41, 0x64, 0x2b, 0x99,
+ 0xfd, 0x4b, 0xd7, 0xba, 0xe8, 0xe4, 0x8a, 0xb0, 0xfe, 0x66, 0x01, 0x22,
+ 0x82, 0x03, 0x19, 0x8e, 0xbc, 0x46, 0x4d, 0xfa, 0x0b, 0x21, 0x5c, 0xcf,
+ 0xea, 0x5e, 0xbd, 0xd7, 0x47, 0x24, 0x55, 0x87, 0xf3, 0x30, 0x09, 0x14,
+ 0x10, 0x18, 0xcc, 0x75, 0xe2, 0x32, 0x6f, 0xd0, 0x59, 0x0a, 0xe6, 0x7f,
+ 0x52, 0xf5, 0xee, 0xba, 0x39, 0x22, 0xac, 0x3f, 0x99, 0x80, 0x48, 0xa0,
+ 0x80, 0xc6, 0x63, 0xaf, 0x11, 0x93, 0x7e, 0x82, 0xc8, 0x57, 0x33, 0xfa,
+ 0x97, 0xaf, 0x75, 0xd1, 0xc9, 0x15, 0x61, 0xfc, 0xcc, 0x02, 0x45, 0x04,
+ 0x06, 0x33, 0x1d, 0x78, 0x8c, 0x9b, 0xf4, 0x16, 0x42, 0xb9, 0x9f, 0xd4,
+ 0xbd, 0x7b, 0xae, 0x8e, 0x48, 0xab, 0x0f, 0xe6, 0x60, 0x12, 0x28, 0x20,
+ 0x31, 0x98, 0xeb, 0xc4, 0x64, 0xdf, 0xa0, 0xb2, 0x15, 0xcc, 0xfe, 0xa5,
+ 0xeb, 0xdd, 0x74, 0x72, 0x45, 0x58, 0x7f, 0x33, 0x00, 0x91, 0x41, 0x01,
+ 0x8c, 0xc7, 0x5e, 0x23, 0x26, 0xfd, 0x05, 0x90, 0xae, 0x67, 0xf5, 0x2f,
+ 0x5e, 0xeb, 0xa3, 0x92, 0x2a, 0xc3, 0xf9, 0x98, 0x04, 0x8a, 0x08, 0x0c,
+ 0x66, 0x3a, 0xf1, 0x19, 0x37, 0xe8, 0x2c, 0x85, 0x73, 0x3f, 0xa9, 0x7a,
+ 0xf7, 0x5d, 0x1c, 0x91, 0x56, 0x1f, 0xcc, 0xc0, 0x24, 0x50, 0x40, 0x63,
+ 0x31, 0xd7, 0x88, 0xc9, 0xbf, 0x41, 0x64, 0x2b, 0x99, 0xfd, 0x4b, 0xd7,
+ 0xba, 0xe8, 0xe4, 0x8a, 0xb0, 0xfe, 0x66, 0x01, 0x22, 0x82, 0x03, 0x19,
+ 0x8e, 0xbc, 0x46, 0x4d, 0xfa, 0x0b, 0x21, 0x5c, 0xcf, 0xea, 0x5e, 0xbd,
+ 0xd7, 0x47, 0x24, 0x55, 0x87, 0xf3, 0x30, 0x09, 0x14, 0x10, 0x18, 0xcc,
+ 0x75, 0xe2, 0x32, 0x6f, 0xd0, 0x59, 0x0a, 0xe6, 0x7f, 0x52, 0xf5, 0xee,
+ 0xba, 0x39, 0x22, 0xac, 0x3f, 0x99, 0x80, 0x48, 0xa0, 0x80, 0xc6, 0x63,
+ 0xaf, 0x11, 0x93, 0x7e, 0x82, 0xc8, 0x57, 0x33, 0xfa, 0x97, 0xaf, 0x75,
+ 0xd1, 0xc9, 0x15, 0x61, 0xfc, 0xcc, 0x02, 0x45, 0x04, 0x06, 0x33, 0x1d,
+ 0x78, 0x8c, 0x9b, 0xf4, 0x16, 0x42, 0xb9, 0x9f, 0xd4, 0xbd, 0x7b, 0xae,
+ 0x8e, 0x48, 0xab, 0x0f, 0xe6, 0x60, 0x12, 0x28, 0x20, 0x31, 0x98, 0xeb,
+ 0xc4, 0x64, 0xdf, 0xa0, 0xb2, 0x15, 0xcc, 0xfe, 0xa5, 0xeb, 0xdd, 0x74,
+ 0x72, 0x45, 0x58, 0x7f, 0x33, 0x00, 0x91, 0x41, 0x01, 0x8c, 0xc7, 0x5e,
+ 0x23, 0x26, 0xfd, 0x05, 0x90, 0xae, 0x67, 0xf5, 0x2f, 0x5e, 0xeb, 0xa3,
+ 0x92, 0x2a, 0xc3, 0xf9, 0x98, 0x04, 0x8a, 0x08, 0x0c, 0x66, 0x3a, 0xf1,
+ 0x19, 0x37, 0xe8, 0x2c, 0x85, 0x73, 0x3f, 0xa9, 0x7a, 0xf7, 0x5d, 0x1c,
+ 0x91, 0x56, 0x1f, 0xcc, 0xc0, 0x24, 0x50, 0x40, 0x63, 0x31, 0xd7, 0x88,
+ 0xc9, 0xbf, 0x41, 0x64, 0x2b, 0x99, 0xfd, 0x4b, 0xd7, 0xba, 0xe8, 0xe4,
+ 0x8a, 0xb0, 0xfe, 0x66, 0x01, 0x22, 0x82, 0x03, 0x19, 0x8e, 0xbc, 0x46,
+ 0x4d, 0xfa, 0x0b, 0x21, 0x5c, 0xcf, 0xea, 0x5e, 0xbd, 0xd7, 0x47, 0x24,
+ 0x55, 0x87, 0xf3, 0x30, 0x09, 0x14, 0x10, 0x18, 0xcc, 0x75, 0xe2, 0x32,
+ 0x6f, 0xd0, 0x59, 0x0a, 0xe6, 0x7f, 0x52, 0xf5, 0xee, 0xba, 0x39, 0x22,
+ 0xac, 0x3f, 0x99, 0x80, 0x48, 0xa0, 0x80, 0xc6, 0x63, 0xaf, 0x11, 0x93,
+ 0x7e, 0x82, 0xc8, 0x57, 0x33, 0xfa, 0x97, 0xaf, 0x75, 0xd1, 0xc9, 0x15,
+ 0x61, 0xfc, 0xcc, 0x02, 0x45, 0x04, 0x06, 0x33, 0x1d, 0x78, 0x8c, 0x9b,
+ 0xf4, 0x16, 0x42, 0xb9, 0x9f, 0xd4, 0xbd, 0x7b, 0xae, 0x8e, 0x48, 0xab,
+ 0x0f, 0xe6, 0x60, 0x12, 0x28, 0x20, 0x31, 0x98, 0xeb, 0xc4, 0x64, 0xdf,
+ 0xa0, 0xb2, 0x15, 0xcc, 0xfe, 0xa5, 0xeb, 0xdd, 0x74, 0x72, 0x45, 0x58,
+ 0x7f, 0x33, 0x00, 0x91, 0x41, 0x01, 0x8c, 0xc7, 0x5e, 0x23, 0x26, 0xfd,
+ 0x05, 0x90, 0xae, 0x67, 0xf5, 0x2f, 0x5e, 0xeb, 0xa3, 0x92, 0x2a, 0xc3,
+ 0xf9, 0x98, 0x04, 0x8a, 0x08, 0x0c, 0x66, 0x3a, 0xf1, 0x19, 0x37, 0xe8,
+ 0x2c, 0x85, 0x73, 0x3f, 0xa9, 0x7a, 0xf7, 0x5d, 0x1c, 0x91, 0x56, 0x1f,
+ 0xcc, 0xc0, 0x24, 0x50, 0x40, 0x63, 0x31, 0xd7, 0x88, 0xc9, 0xbf, 0x41,
+ 0x64, 0x2b, 0x99, 0xfd, 0x4b, 0xd7, 0xba, 0xe8, 0xe4, 0x8a, 0xb0, 0xfe,
+ 0x66, 0x01, 0x22, 0x82, 0x03, 0x19, 0x8e, 0xbc, 0x46, 0x4d, 0xfa, 0x0b,
+ 0x21, 0x5c, 0xcf, 0xea, 0x5e, 0xbd, 0xd7, 0x47, 0x24, 0x55, 0x87, 0xf3,
+ 0x30, 0x09, 0x14, 0x10, 0x18, 0xcc, 0x75, 0xe2, 0x32, 0x6f, 0xd0, 0x59,
+ 0x0a, 0xe6, 0x7f, 0x52, 0xf5, 0xee, 0xba, 0x39, 0x22, 0xac, 0x3f, 0x99,
+ 0x80, 0x48, 0xa0, 0x80, 0xc6, 0x63, 0xaf, 0x11, 0x93, 0x7e, 0x82, 0xc8,
+ 0x57, 0x33, 0xfa, 0x97, 0xaf, 0x75, 0xd1, 0xc9, 0x15, 0x61, 0xfc, 0xcc,
+ 0x02, 0x45, 0x04, 0x06, 0x33, 0x1d, 0x78, 0x8c, 0x9b, 0xf4, 0x16, 0x42,
+ 0xb9, 0x9f, 0xd4, 0xbd, 0x7b, 0xae, 0x8e, 0x48, 0xab, 0x0f, 0xe6, 0x60,
+ 0x12, 0x28, 0x20, 0x31, 0x98, 0xeb, 0xc4, 0x64, 0xdf, 0xa0, 0xb2, 0x15,
+ 0xcc, 0xfe, 0xa5, 0xeb, 0xdd, 0x74, 0x72, 0x45, 0x58, 0x7f, 0x33, 0x00,
+ 0x91, 0x41, 0x01, 0x8c, 0xc7, 0x5e, 0x23, 0x26, 0xfd, 0x05, 0x90, 0xae,
+ 0x67, 0xf5, 0x2f, 0x5e, 0xeb, 0xa3, 0x92, 0x2a, 0xc3, 0xf9, 0x98, 0x04,
+ 0x8a, 0x08, 0x0c, 0x66, 0x3a, 0xf1, 0x19, 0x37, 0xe8, 0x2c, 0x85, 0x73,
+ 0x3f, 0xa9, 0x7a, 0xf7, 0x5d, 0x1c, 0x91, 0x56, 0x1f, 0xcc, 0xc0, 0x24,
+ 0x50, 0x40, 0x63, 0x31, 0xd7, 0x88, 0xc9, 0xbf, 0x41, 0x64, 0x2b, 0x99,
+ 0xfd, 0x4b, 0xd7, 0xba, 0xe8, 0xe4, 0x8a, 0xb0, 0xfe, 0x66, 0x01, 0x22,
+ 0x82, 0x03, 0x19, 0x8e, 0xbc, 0x46, 0x4d, 0xfa, 0x0b, 0x21, 0x5c, 0xcf,
+ 0xea, 0x5e, 0xbd, 0xd7, 0x47, 0x24, 0x55, 0x87, 0xf3, 0x30, 0x09, 0x14,
+ 0x10, 0x18, 0xcc, 0x75, 0xe2, 0x32, 0x6f, 0xd0, 0x59, 0x0a, 0xe6, 0x7f,
+ 0x52, 0xf5, 0xee, 0xba, 0x39, 0x22, 0xac, 0x3f, 0x99, 0x80, 0x48, 0xa0,
+ 0x80, 0xc6, 0x63, 0xaf, 0x11, 0x93, 0x7e, 0x82, 0xc8, 0x57, 0x33, 0xfa,
+ 0x97, 0xaf, 0x75, 0xd1, 0xc9, 0x15, 0x61, 0xfc, 0xcc, 0x02, 0x45, 0x04,
+ 0x06, 0x33, 0x1d, 0x78, 0x8c, 0x9b, 0xf4, 0x16, 0x42, 0xb9, 0x9f, 0xd4,
+ 0xbd, 0x7b, 0xae, 0x8e, 0x48, 0xab, 0x0f, 0xe6, 0x60, 0x12, 0x28, 0x20,
+ 0x31, 0x98, 0xeb, 0xc4, 0x64, 0xdf, 0xa0, 0xb2, 0x15, 0xcc, 0xfe, 0xa5,
+ 0xeb, 0xdd, 0x74, 0x72, 0x45, 0x58, 0x7f, 0x33, 0x00, 0x91, 0x41, 0x01,
+ 0x8c, 0xc7, 0x5e, 0x23, 0x26, 0xfd, 0x05, 0x90, 0xae, 0x67, 0xf5, 0x2f,
+ 0x5e, 0xeb, 0xa3, 0x92, 0x2a, 0xc3, 0xf9, 0x98, 0x04, 0x8a, 0x08, 0x0c,
+ 0x66, 0x3a, 0xf1, 0x19, 0x37, 0xe8, 0x2c, 0x85, 0x73, 0x3f, 0xa9, 0x7a,
+ 0xf7, 0x5d, 0x1c, 0x91, 0x56, 0x1f, 0xcc, 0xc0, 0x24, 0x50, 0x40, 0x63,
+ 0x31, 0xd7, 0x88, 0xc9, 0xbf, 0x41, 0x64, 0x2b, 0x99, 0xfd, 0x4b, 0xd7,
+ 0xba, 0xe8, 0xe4, 0x8a, 0xb0, 0xfe, 0x66, 0x01, 0x22, 0x82, 0x03, 0x19,
+ 0x8e, 0xbc, 0x46, 0x4d, 0xfa, 0x0b, 0x21, 0x5c, 0xcf, 0xea, 0x5e, 0xbd,
+ 0xd7, 0x47, 0x24, 0x55, 0x87, 0xf3, 0x30, 0x09, 0x14, 0x10, 0x18, 0xcc,
+ 0x75, 0xe2, 0x32, 0x6f, 0xd0, 0x59, 0x0a, 0xe6, 0x7f, 0x52, 0xf5, 0xee,
+ 0xba, 0x39, 0x22, 0xac, 0x3f, 0x99, 0x80, 0x48, 0xa0, 0x80, 0xc6, 0x63,
+ 0xaf, 0x11, 0x93, 0x7e, 0x82, 0xc8, 0x57, 0x33, 0xfa, 0x97, 0xaf, 0x75,
+ 0xd1, 0xc9, 0x15, 0x61, 0xfc, 0xcc, 0x02, 0x45, 0x04, 0x06, 0x33, 0x1d,
+ 0x78, 0x8c, 0x9b, 0xf4, 0x16, 0x42, 0xb9, 0x9f, 0xd4, 0xbd, 0x7b, 0xae,
+ 0x8e, 0x48, 0xab, 0x0f, 0xe6, 0x60, 0x12, 0x28, 0x20, 0x31, 0x98, 0xeb,
+ 0xc4, 0x64, 0xdf, 0xa0, 0xb2, 0x15, 0xcc, 0xfe, 0xa5, 0xeb, 0xdd, 0x74,
+ 0x72, 0x45, 0x58, 0x7f, 0x33, 0x00, 0x91, 0x41, 0x01, 0x8c, 0xc7, 0x5e,
+ 0x23, 0x26, 0xfd, 0x05, 0x90, 0xae, 0x67, 0xf5, 0x2f, 0x5e, 0xeb, 0xa3,
+ 0x92, 0x2a, 0xc3, 0xf9, 0x98, 0x04, 0x8a, 0x08, 0x0c, 0x66, 0x3a, 0xf1,
+ 0x19, 0x37, 0xe8, 0x2c, 0x85, 0x73, 0x3f, 0xa9, 0x7a, 0xf7, 0x5d, 0x1c,
+ 0x91, 0x56, 0x1f, 0xcc, 0xc0, 0x24, 0x50, 0x40, 0x63, 0x31, 0xd7, 0x88,
+ 0xc9, 0xbf, 0x41, 0x64, 0x2b, 0x99, 0xfd, 0x4b, 0xd7, 0xba, 0xe8, 0xe4,
+ 0x8a, 0xb0, 0xfe, 0x66, 0x01, 0x22, 0x82, 0x03, 0x19, 0x8e, 0xbc, 0x46,
+ 0x4d, 0xfa, 0x0b, 0x21, 0x5c, 0xcf, 0xea, 0x5e, 0xbd, 0xd7, 0x47, 0x24,
+ 0x55, 0x87, 0xf3, 0x30, 0x09, 0x14, 0x10, 0x18, 0xcc, 0x75, 0xe2, 0x32,
+ 0x6f, 0xd0, 0x59, 0x0a, 0xe6, 0x7f, 0x52, 0xf5, 0xee, 0xba, 0x39, 0x22,
+ 0xac, 0x3f, 0x99, 0x80, 0x48, 0xa0, 0x80, 0xc6, 0x63, 0xaf, 0x11, 0x93,
+ 0x7e, 0x82, 0xc8, 0x57, 0x33, 0xfa, 0x97, 0xaf, 0x75, 0xd1, 0xc9, 0x15,
+ 0x61, 0xfc, 0xcc, 0x02, 0x45, 0x04, 0x06, 0x33, 0x1d, 0x78, 0x8c, 0x9b,
+ 0xf4, 0x16, 0x42, 0xb9, 0x9f, 0xd4, 0xbd, 0x7b, 0xae, 0x8e, 0x48, 0xab,
+ 0x0f, 0xe6, 0x60, 0x12, 0x28, 0x20, 0x31, 0x98, 0xeb, 0xc4, 0x64, 0xdf,
+ 0xa0, 0xb2, 0x15, 0xcc, 0xfe, 0xa5, 0xeb, 0xdd, 0x74, 0x72, 0x45, 0x58,
+ 0x7f, 0x33, 0x00, 0x91, 0x41, 0x01, 0x8c, 0xc7, 0x5e, 0x23, 0x26, 0xfd,
+ 0x05, 0x90, 0xae, 0x67, 0xf5, 0x2f, 0x5e, 0xeb, 0xa3, 0x92, 0x2a, 0xc3,
+ 0xf9, 0x98, 0x04, 0x8a, 0x08, 0x0c, 0x66, 0x3a, 0xf1, 0x19, 0x37, 0xe8,
+ 0x2c, 0x85, 0x73, 0x3f, 0xa9, 0x7a, 0xf7, 0x5d, 0x1c, 0x91, 0x56, 0x1f,
+ 0xcc, 0xc0, 0x24, 0x50, 0x40, 0x63, 0x31, 0xd7, 0x88, 0xc9, 0xbf, 0x41,
+ 0x64, 0x2b, 0x99, 0xfd, 0x4b, 0xd7, 0xba, 0xe8, 0xe4, 0x8a, 0xb0, 0xfe,
+ 0x66, 0x01, 0x22, 0x82, 0x03, 0x19, 0x8e, 0xbc, 0x46, 0x4d, 0xfa, 0x0b,
+ 0x21, 0x5c, 0xcf, 0xea, 0x5e, 0xbd, 0xd7, 0x47, 0x24, 0x55, 0x87, 0xf3,
+ 0x30, 0x09, 0x14, 0x10, 0x18, 0xcc, 0x75, 0xe2, 0x32, 0x6f, 0xd0, 0x59,
+ 0x0a, 0xe6, 0x7f, 0x52, 0xf5, 0xee, 0xba, 0x39, 0x22, 0xac, 0x3f, 0x99,
+ 0x80, 0x48, 0xa0, 0x80, 0xc6, 0x63, 0xaf, 0x11, 0x93, 0x7e, 0x82, 0xc8,
+ 0x57, 0x33, 0xfa, 0x97, 0xaf, 0x75, 0xd1, 0xc9, 0x15, 0x61, 0xfc, 0xcc,
+ 0x02, 0x45, 0x04, 0x06, 0x33, 0x1d, 0x78, 0x8c, 0x9b, 0xf4, 0x16, 0x42,
+ 0xb9, 0x9f, 0xd4, 0xbd, 0x7b, 0xae, 0x8e, 0x48, 0xab, 0x0f, 0xe6, 0x60,
+ 0x12, 0x28, 0x20, 0x31, 0x98, 0xeb, 0xc4, 0x64, 0xdf, 0xa0, 0xb2, 0x15,
+ 0xcc, 0xfe, 0xa5, 0xeb, 0xdd, 0x74, 0x72, 0x45, 0x58, 0x7f, 0x33, 0x00,
+ 0x91, 0x41, 0x01, 0x8c, 0xc7, 0x5e, 0x23, 0x26, 0xfd, 0x05, 0x90, 0xae,
+ 0x67, 0xf5, 0x2f, 0x5e, 0xeb, 0xa3, 0x92, 0x2a, 0xc3, 0xf9, 0x98, 0x04,
+ 0x8a, 0x08, 0x0c, 0x66, 0x3a, 0xf1, 0x19, 0x37, 0xe8, 0x2c, 0x85, 0x73,
+ 0x3f, 0xa9, 0x7a, 0xf7, 0x5d, 0x1c, 0x91, 0x56, 0x1f, 0xcc, 0xc0, 0x24,
+ 0x50, 0x40, 0x63, 0x31, 0xd7, 0x88, 0xc9, 0xbf, 0x41, 0x64, 0x2b, 0x99,
+ 0xfd, 0x4b, 0xd7, 0xba, 0xe8, 0xe4, 0x8a, 0xb0, 0xfe, 0x66, 0x01, 0x22,
+ 0x82, 0x03, 0x19, 0x8e, 0xbc, 0x46, 0x4d, 0xfa, 0x0b, 0x21, 0x5c, 0xcf,
+ 0xea, 0x5e, 0xbd, 0xd7, 0x47, 0x24, 0x55, 0x87, 0xf3, 0x30, 0x09, 0x14,
+ 0x10, 0x18, 0xcc, 0x75, 0xe2, 0x32, 0x6f, 0xd0, 0x59, 0x0a, 0xe6, 0x7f,
+ 0x52, 0xf5, 0xee, 0xba, 0x39, 0x22, 0xac, 0x3f, 0x99, 0x80, 0x48, 0xa0,
+ 0x80, 0xc6, 0x63, 0xaf, 0x11, 0x93, 0x7e, 0x82, 0xc8, 0x57, 0x33, 0xfa,
+ 0x97, 0xaf, 0x75, 0xd1, 0xc9, 0x15, 0x61, 0xfc, 0xcc, 0x02, 0x45, 0x04,
+ 0x06, 0x33, 0x1d, 0x78, 0x8c, 0x9b, 0xf4, 0x16, 0x42, 0xb9, 0x9f, 0xd4,
+ 0xbd, 0x7b, 0xae, 0x8e, 0x48, 0xab, 0x0f, 0xe6, 0x60, 0x12, 0x28, 0x20,
+ 0x31, 0x98, 0xeb, 0xc4, 0x64, 0xdf, 0xa0, 0xb2, 0x15, 0xcc, 0xfe, 0xa5,
+ 0xeb, 0xdd, 0x74, 0x72, 0x45, 0x58, 0x7f, 0x33, 0x00, 0x91, 0x41, 0x01,
+ 0x8c, 0xc7, 0x5e, 0x23, 0x26, 0xfd, 0x05, 0x90, 0xae, 0x67, 0xf5, 0x2f,
+ 0x5e, 0xeb, 0xa3, 0x92, 0x2a, 0xc3, 0xf9, 0x98, 0x04, 0x8a, 0x08, 0x0c,
+ 0x66, 0x3a, 0xf1, 0x19, 0x37, 0xe8, 0x2c, 0x85, 0x73, 0x3f, 0xa9, 0x7a,
+ 0xf7, 0x5d, 0x1c, 0x91, 0x56, 0x1f, 0xcc, 0xc0, 0x24, 0x50, 0x40, 0x63,
+ 0x31, 0xd7, 0x88, 0xc9, 0xbf, 0x41, 0x64, 0x2b, 0x99, 0xfd, 0x4b, 0xd7,
+ 0xba, 0xe8, 0xe4, 0x8a, 0xb0, 0xfe, 0x66, 0x01, 0x22, 0x82, 0x03, 0x19,
+ 0x8e, 0xbc, 0x46, 0x4d, 0xfa, 0x0b, 0x21, 0x5c, 0xcf, 0xea, 0x5e, 0xbd,
+ 0xd7, 0x47, 0x24, 0x55, 0x87, 0xf3, 0x30, 0x09, 0x14, 0x10, 0x18, 0xcc,
+ 0x75, 0xe2, 0x32, 0x6f, 0xd0, 0x59, 0x0a, 0xe6, 0x7f, 0x52, 0xf5, 0xee,
+ 0xba, 0x39, 0x22, 0xac, 0x3f, 0x99, 0x80, 0x48, 0xa0, 0x80, 0xc6, 0x63,
+ 0xaf, 0x11, 0x93, 0x7e, 0x82, 0xc8, 0x57, 0x33, 0xfa, 0x97, 0xaf, 0x75,
+ 0xd1, 0xc9, 0x15, 0x61, 0xfc, 0xcc, 0x02, 0x45, 0x04, 0x06, 0x33, 0x1d,
+ 0x78, 0x8c, 0x9b, 0xf4, 0x16, 0x42, 0xb9, 0x9f, 0xd4, 0xbd, 0x7b, 0xae,
+ 0x8e, 0x48, 0xab, 0x0f, 0xe6, 0x60, 0x12, 0x28, 0x20, 0x31, 0x98, 0xeb,
+ 0xc4, 0x64, 0xdf, 0xa0, 0xb2, 0x15, 0xcc, 0xfe, 0xa5, 0xeb, 0xdd, 0x74,
+ 0x72, 0x45, 0x58, 0x7f, 0x33, 0x00, 0x91, 0x41, 0x01, 0x8c, 0xc7, 0x5e,
+ 0x23, 0x26, 0xfd, 0x05, 0x90, 0xae, 0x67, 0xf5, 0x2f, 0x5e, 0xeb, 0xa3,
+ 0x92, 0x2a, 0xc3, 0xf9, 0x98, 0x04, 0x8a, 0x08, 0x0c, 0x66, 0x3a, 0xf1,
+ 0x19, 0x37, 0xe8, 0x2c, 0x85, 0x73, 0x3f, 0xa9, 0x7a, 0xf7, 0x5d, 0x1c,
+ 0x91, 0x56, 0x1f, 0xcc, 0xc0, 0x24, 0x50, 0x40, 0x63, 0x31, 0xd7, 0x88,
+ 0xc9, 0xbf, 0x41, 0x64, 0x2b, 0x99, 0xfd, 0x4b, 0xd7, 0xba, 0xe8, 0xe4,
+ 0x8a, 0xb0, 0xfe, 0x66, 0x01, 0x22, 0x82, 0x03, 0x19, 0x8e, 0xbc, 0x46,
+ 0x4d, 0xfa, 0x0b, 0x21, 0x5c, 0xcf, 0xea, 0x5e, 0xbd, 0xd7, 0x47, 0x24,
+ 0x55, 0x87, 0xf3, 0x30, 0x09, 0x14, 0x10, 0x18, 0xcc, 0x75, 0xe2, 0x32,
+ 0x6f, 0xd0, 0x59, 0x0a, 0xe6, 0x7f, 0x52, 0xf5, 0xee, 0xba, 0x39, 0x22,
+ 0xac, 0x3f, 0x99, 0x80, 0x48, 0xa0, 0x80, 0xc6, 0x63, 0xaf, 0x11, 0x93,
+ 0x7e, 0x82, 0xc8, 0x57, 0x33, 0xfa, 0x97, 0xaf, 0x75, 0xd1, 0xc9, 0x15,
+ 0x61, 0xfc, 0xcc, 0x02, 0x45, 0x04, 0x06, 0x33, 0x1d, 0x78, 0x8c, 0x9b,
+ 0xf4, 0x16, 0x42, 0xb9, 0x9f, 0xd4, 0xbd, 0x7b, 0xae, 0x8e, 0x48, 0xab,
+ 0x0f, 0xe6, 0x60, 0x12, 0x28, 0x20, 0x31, 0x98, 0xeb, 0xc4, 0x64, 0xdf,
+ 0xa0, 0xb2, 0x15, 0xcc, 0xfe, 0xa5, 0xeb, 0xdd, 0x74, 0x72, 0x45, 0x58,
+ 0x7f, 0x33, 0x00, 0x91, 0x41, 0x01, 0x8c, 0xc7, 0x5e, 0x23, 0x26, 0xfd,
+ 0x05, 0x90, 0xae, 0x67, 0xf5, 0x2f, 0x5e, 0xeb, 0xa3, 0x92, 0x2a, 0xc3,
+ 0xf9, 0x98, 0x04, 0x8a, 0x08, 0x0c, 0x66, 0x3a, 0xf1, 0x19, 0x37, 0xe8,
+ 0x2c, 0x85, 0x73, 0x3f, 0xa9, 0x7a, 0xf7, 0x5d, 0x1c, 0x91, 0x56, 0x1f,
+ 0xcc, 0xc0, 0x24, 0x50, 0x40, 0x63, 0x31, 0xd7, 0x88, 0xc9, 0xbf, 0x41,
+ 0x64, 0x2b, 0x99, 0xfd, 0x4b, 0xd7, 0xba, 0xe8, 0xe4, 0x8a, 0xb0, 0xfe,
+ 0x66, 0x01, 0x22, 0x82, 0x03, 0x19, 0x8e, 0xbc, 0x46, 0x4d, 0xfa, 0x0b,
+ 0x21, 0x5c, 0xcf, 0xea, 0x5e, 0xbd, 0xd7, 0x47, 0x24, 0x55, 0x87, 0xf3,
+ 0x30, 0x09, 0x14, 0x10, 0x18, 0xcc, 0x75, 0xe2, 0x32, 0x6f, 0xd0, 0x59,
+ 0x0a, 0xe6, 0x7f, 0x52, 0xf5, 0xee, 0xba, 0x39, 0x22, 0xac, 0x3f, 0x99,
+ 0x80, 0x48, 0xa0, 0x80, 0xc6, 0x63, 0xaf, 0x11, 0x93, 0x7e, 0x82, 0xc8,
+ 0x57, 0x33, 0xfa, 0x97, 0xaf, 0x75, 0xd1, 0xc9, 0x15, 0x61, 0xfc, 0xcc,
+ 0x02, 0x45, 0x04, 0x06, 0x33, 0x1d, 0x78, 0x8c, 0x9b, 0xf4, 0x16, 0x42,
+ 0xb9, 0x9f, 0xd4, 0xbd, 0x7b, 0xae, 0x8e, 0x48, 0xab, 0x0f, 0xe6, 0x60,
+ 0x12, 0x28, 0x20, 0x31, 0x98, 0xeb, 0xc4, 0x64, 0xdf, 0xa0, 0xb2, 0x15,
+ 0xcc, 0xfe, 0xa5, 0xeb, 0xdd, 0x74, 0x72, 0x45, 0x58, 0x7f, 0x33, 0x00,
+ 0x91, 0x41, 0x01, 0x8c, 0xc7, 0x5e, 0x23, 0x26, 0xfd, 0x05, 0x90, 0xae,
+ 0x67, 0xf5, 0x2f, 0x5e, 0xeb, 0xa3, 0x92, 0x2a, 0xc3, 0xf9, 0x98, 0x04,
+ 0x8a, 0x08, 0x0c, 0x66, 0x3a, 0xf1, 0x19, 0x37, 0xe8, 0x2c, 0x85, 0x73,
+ 0x3f, 0xa9, 0x7a, 0xf7, 0x5d, 0x1c, 0x91, 0x56, 0x1f, 0xcc, 0xc0, 0x24,
+ 0x50, 0x40, 0x63, 0x31, 0xd7, 0x88, 0xc9, 0xbf, 0x41, 0x64, 0x2b, 0x99,
+ 0xfd, 0x4b, 0xd7, 0xba, 0xe8, 0xe4, 0x8a, 0xb0, 0xfe, 0x66, 0x01, 0x22,
+ 0x82, 0x03, 0x19, 0x8e, 0xbc, 0x46, 0x4d, 0xfa, 0x0b, 0x21, 0x5c, 0xcf,
+ 0xea, 0x5e, 0xbd, 0xd7, 0x47, 0x24, 0x55, 0x87, 0xf3, 0x30, 0x09, 0x14,
+ 0x10, 0x18, 0xcc, 0x75, 0xe2, 0x32, 0x6f, 0xd0, 0x59, 0x0a, 0xe6, 0x7f,
+ 0x52, 0xf5, 0xee, 0xba, 0x39, 0x22, 0xac, 0x3f, 0x99, 0x80, 0x48, 0xa0,
+ 0x80, 0xc6, 0x63, 0xaf, 0x11, 0x93, 0x7e, 0x82, 0xc8, 0x57, 0x33, 0xfa,
+ 0x97, 0xaf, 0x75, 0xd1, 0xc9, 0x15, 0x61, 0xfc, 0xcc, 0x02, 0x45, 0x04,
+ 0x06, 0x33, 0x1d, 0x78, 0x8c, 0x9b, 0xf4, 0x16, 0x42, 0xb9, 0x9f, 0xd4,
+ 0xbd, 0x7b, 0xae, 0x8e, 0x48, 0xab, 0x0f, 0xe6, 0x60, 0x12, 0x28, 0x20,
+ 0x31, 0x98, 0xeb, 0xc4, 0x64, 0xdf, 0xa0, 0xb2, 0x15, 0xcc, 0xfe, 0xa5,
+ 0xeb, 0xdd, 0x74, 0x72, 0x45, 0x58, 0x7f, 0x33, 0x00, 0x91, 0x41, 0x01,
+ 0x8c, 0xc7, 0x5e, 0x23, 0x26, 0xfd, 0x05, 0x90, 0xae, 0x67, 0xf5, 0x2f,
+ 0x5e, 0xeb, 0xa3, 0x92, 0x2a, 0xc3, 0xf9, 0x98, 0x04, 0x8a, 0x08, 0x0c,
+ 0x66, 0x3a, 0xf1, 0x19, 0x37, 0xe8, 0x2c, 0x85, 0x73, 0x3f, 0xa9, 0x7a,
+ 0xf7, 0x5d, 0x1c, 0x91, 0x56, 0x1f, 0xcc, 0xc0, 0x24, 0x50, 0x40, 0x63,
+ 0x31, 0xd7, 0x88, 0xc9, 0xbf, 0x41, 0x64, 0x2b, 0x99, 0xfd, 0x4b, 0xd7,
+ 0xba, 0xe8, 0xe4, 0x8a, 0xb0, 0xfe, 0x66, 0x01, 0x22, 0x82, 0x03, 0x19,
+ 0x8e, 0xbc, 0x46, 0x4d, 0xfa, 0x0b, 0x21, 0x5c, 0xcf, 0xea, 0x5e, 0xbd,
+ 0xd7, 0x47, 0x24, 0x55, 0x87, 0xf3, 0x30, 0x09, 0x14, 0x10, 0x18, 0xcc,
+ 0x75, 0xe2, 0x32, 0x6f, 0xd0, 0x59, 0x0a, 0xe6, 0x7f, 0x52, 0xf5, 0xee,
+ 0xba, 0x39, 0x22, 0xac, 0x3f, 0x99, 0x80, 0x48, 0xa0, 0x80, 0xc6, 0x63,
+ 0xaf, 0x11, 0x93, 0x7e, 0x82, 0xc8, 0x57, 0x33, 0xfa, 0x97, 0xaf, 0x75,
+ 0xd1, 0xc9, 0x15, 0x61, 0xfc, 0xcc, 0x02, 0x45, 0x04, 0x06, 0x33, 0x1d,
+ 0x78, 0x8c, 0x9b, 0xf4, 0x16, 0x42, 0xb9, 0x9f, 0xd4, 0xbd, 0x7b, 0xae,
+ 0x8e, 0x48, 0xab, 0x0f, 0xe6, 0x60, 0x12, 0x28, 0x20, 0x31, 0x98, 0xeb,
+ 0xc4, 0x64, 0xdf, 0xa0, 0xb2, 0x15, 0xcc, 0xfe, 0xa5, 0xeb, 0xdd, 0x74,
+ 0x72, 0x45, 0x58, 0x7f, 0x33, 0x00, 0x91, 0x41, 0x01, 0x8c, 0xc7, 0x5e,
+ 0x23, 0x26, 0xfd, 0x05, 0x90, 0xae, 0x67, 0xf5, 0x2f, 0x5e, 0xeb, 0xa3,
+ 0x92, 0x2a, 0xc3, 0xf9, 0x98, 0x04, 0x8a, 0x08, 0x0c, 0x66, 0x3a, 0xf1,
+ 0x19, 0x37, 0xe8, 0x2c, 0x85, 0x73, 0x3f, 0xa9, 0x7a, 0xf7, 0x5d, 0x1c,
+ 0x91, 0x56, 0x1f, 0xcc, 0xc0, 0x24, 0x50, 0x40, 0x63, 0x31, 0xd7, 0x88,
+ 0xc9, 0xbf, 0x41, 0x64, 0x2b, 0x99, 0xfd, 0x4b, 0xd7, 0xba, 0xe8, 0xe4,
+ 0x8a, 0xb0, 0xfe, 0x66, 0x01, 0x22, 0x82, 0x03, 0x19, 0x8e, 0xbc, 0x46,
+ 0x4d, 0xfa, 0x0b, 0x21, 0x5c, 0xcf, 0xea, 0x5e, 0xbd, 0xd7, 0x47, 0x24,
+ 0x55, 0x87, 0xf3, 0x30, 0x09, 0x14, 0x10, 0x18, 0xcc, 0x75, 0xe2, 0x32,
+ 0x6f, 0xd0, 0x59, 0x0a, 0xe6, 0x7f, 0x52, 0xf5, 0xee, 0xba, 0x39, 0x22,
+ 0xac, 0x3f, 0x99, 0x80, 0x48, 0xa0, 0x80, 0xc6, 0x63, 0xaf, 0x11, 0x93,
+ 0x7e, 0x82, 0xc8, 0x57, 0x33, 0xfa, 0x97, 0xaf, 0x75, 0xd1, 0xc9, 0x15,
+ 0x61, 0xfc, 0xcc, 0x02, 0x45, 0x04, 0x06, 0x33, 0x1d, 0x78, 0x8c, 0x9b,
+ 0xf4, 0x16, 0x42, 0xb9, 0x9f, 0xd4, 0xbd, 0x7b, 0xae, 0x8e, 0x48, 0xab,
+ 0x0f, 0xe6, 0x60, 0x12, 0x28, 0x20, 0x31, 0x98, 0xeb, 0xc4, 0x64, 0xdf,
+ 0xa0, 0xb2, 0x15, 0xcc, 0xfe, 0xa5, 0xeb, 0xdd, 0x74, 0x72, 0x45, 0x58,
+ 0x7f, 0x33, 0x00, 0x91, 0x41, 0x01, 0x8c, 0xc7, 0x5e, 0x23, 0x26, 0xfd,
+ 0x05, 0x90, 0xae, 0x67, 0xf5, 0x2f, 0x5e, 0xeb, 0xa3, 0x92, 0x2a, 0xc3,
+ 0xf9, 0x98, 0x04, 0x8a, 0x08, 0x0c, 0x66, 0x3a, 0xf1, 0x19, 0x37, 0xe8,
+ 0x2c, 0x85, 0x73, 0x3f, 0xa9, 0x7a, 0xf7, 0x5d, 0x1c, 0x91, 0x56, 0x1f,
+ 0xcc, 0xc0, 0x24, 0x50, 0x40, 0x63, 0x31, 0xd7, 0x88, 0xc9, 0xbf, 0x41,
+ 0x64, 0x2b, 0x99, 0xfd, 0x4b, 0xd7, 0xba, 0xe8, 0xe4, 0x8a, 0xb0, 0xfe,
+ 0x66, 0x01, 0x22, 0x82, 0x03, 0x19, 0x8e, 0xbc, 0x46, 0x4d, 0xfa, 0x0b,
+ 0x21, 0x5c, 0xcf, 0xea, 0x5e, 0xbd, 0xd7, 0x47, 0x24, 0x55, 0x87, 0xf3,
+ 0x30, 0x09, 0x14, 0x10, 0x18, 0xcc, 0x75, 0xe2, 0x32, 0x6f, 0xd0, 0x59,
+ 0x0a, 0xe6, 0x7f, 0x52, 0xf5, 0xee, 0xba, 0x39, 0x22, 0xac, 0x3f, 0x99,
+ 0x80, 0x48, 0xa0, 0x80, 0xc6, 0x63, 0xaf, 0x11, 0x93, 0x7e, 0x82, 0xc8,
+ 0x57, 0x33, 0xfa, 0x97, 0xaf, 0x75, 0xd1, 0xc9, 0x15, 0x61, 0xfc, 0xcc,
+ 0x02, 0x45, 0x04, 0x06, 0x33, 0x1d, 0x78, 0x8c, 0x9b, 0xf4, 0x16, 0x42,
+ 0xb9, 0x9f, 0xd4, 0xbd, 0x7b, 0xae, 0x8e, 0x48, 0xab, 0x0f, 0xe6, 0x60,
+ 0x12, 0x28, 0x20, 0x31, 0x98, 0xeb, 0xc4, 0x64, 0xdf, 0xa0, 0xb2, 0x15,
+ 0xcc, 0xfe, 0xa5, 0xeb, 0xdd, 0x74, 0x72, 0x45, 0x58, 0x7f, 0x33, 0x00,
+ 0x91, 0x41, 0x01, 0x8c, 0xc7, 0x5e, 0x23, 0x26, 0xfd, 0x05, 0x90, 0xae,
+ 0x67, 0xf5, 0x2f, 0x5e, 0xeb, 0xa3, 0x92, 0x2a, 0xc3, 0xf9, 0x98, 0x04,
+ 0x8a, 0x08, 0x0c, 0x66, 0x3a, 0xf1, 0x19, 0x37, 0xe8, 0x2c, 0x85, 0x73,
+ 0x3f, 0xa9, 0x7a, 0xf7, 0x5d, 0x1c, 0x91, 0x56, 0x1f, 0xcc, 0xc0, 0x24,
+ 0x50, 0x40, 0x63, 0x31, 0xd7, 0x88, 0xc9, 0xbf, 0x41, 0x64, 0x2b, 0x99,
+ 0xfd, 0x4b, 0xd7, 0xba, 0xe8, 0xe4, 0x8a, 0xb0, 0xfe, 0x66, 0x01, 0x22,
+ 0x82, 0x03, 0x19, 0x8e, 0xbc, 0x46, 0x4d, 0xfa, 0x0b, 0x21, 0x5c, 0xcf,
+ 0xea, 0x5e, 0xbd, 0xd7, 0x47, 0x24, 0x55, 0x87, 0xf3, 0x30, 0x09, 0x14,
+ 0x10, 0x18, 0xcc, 0x75, 0xe2, 0x32, 0x6f, 0xd0, 0x59, 0x0a, 0xe6, 0x7f,
+ 0x52, 0xf5, 0xee, 0xba, 0x39, 0x22, 0xac, 0x3f, 0x99, 0x80, 0x48, 0xa0,
+ 0x80, 0xc6, 0x63, 0xaf, 0x11, 0x93, 0x7e, 0x82, 0xc8, 0x57, 0x33, 0xfa,
+ 0x97, 0xaf, 0x75, 0xd1, 0xc9, 0x15, 0x61, 0xfc, 0xcc, 0x02, 0x45, 0x04,
+ 0x06, 0x33, 0x1d, 0x78, 0x8c, 0x9b, 0xf4, 0x16, 0x42, 0xb9, 0x9f, 0xd4,
+ 0xbd, 0x7b, 0xae, 0x8e, 0x48, 0xab, 0x0f, 0xe6, 0x60, 0x12, 0x28, 0x20,
+ 0x31, 0x98, 0xeb, 0xc4, 0x64, 0xdf, 0xa0, 0xb2, 0x15, 0xcc, 0xfe, 0xa5,
+ 0xeb, 0xdd, 0x74, 0x72, 0x45, 0x58, 0x7f, 0x33, 0x00, 0x91, 0x41, 0x01,
+ 0x8c, 0xc7, 0x5e, 0x23, 0x26, 0xfd, 0x05, 0x90, 0xae, 0x67, 0xf5, 0x2f,
+ 0x5e, 0xeb, 0xa3, 0x92, 0x2a, 0xc3, 0xf9, 0x98, 0x04, 0x8a, 0x08, 0x0c,
+ 0x66, 0x3a, 0xf1, 0x19, 0x37, 0xe8, 0x2c, 0x85, 0x73, 0x3f, 0xa9, 0x7a,
+ 0xf7, 0x5d, 0x1c, 0x91, 0x56, 0x1f, 0xcc, 0xc0, 0x24, 0x50, 0x40, 0x63,
+ 0x31, 0xd7, 0x88, 0xc9, 0xbf, 0x41, 0x64, 0x2b, 0x99, 0xfd, 0x4b, 0xd7,
+ 0xba, 0xe8, 0xe4, 0x8a, 0xb0, 0xfe, 0x66, 0x01, 0x22, 0x82, 0x03, 0x19,
+ 0x8e, 0xbc, 0x46, 0x4d, 0xfa, 0x0b, 0x21, 0x5c, 0xcf, 0xea, 0x5e, 0xbd,
+ 0xd7, 0x47, 0x24, 0x55, 0x87, 0xf3, 0x30, 0x09, 0x14, 0x10, 0x18, 0xcc,
+ 0x75, 0xe2, 0x32, 0x6f, 0xd0, 0x59, 0x0a, 0xe6, 0x7f, 0x52, 0xf5, 0xee,
+ 0xba, 0x39, 0x22, 0xac, 0x3f, 0x99, 0x80, 0x48, 0xa0, 0x80, 0xc6, 0x63,
+ 0xaf, 0x11, 0x93, 0x7e, 0x82, 0xc8, 0x57, 0x33, 0xfa, 0x97, 0xaf, 0x75,
+ 0xd1, 0xc9, 0x15, 0x61, 0xfc, 0xcc, 0x02, 0x45, 0x04, 0x06, 0x33, 0x1d,
+ 0x78, 0x8c, 0x9b, 0xf4, 0x16, 0x42, 0xb9, 0x9f, 0xd4, 0xbd, 0x7b, 0xae,
+ 0x8e, 0x48, 0xab, 0x0f, 0xe6, 0x60, 0x12, 0x28, 0x20, 0x31, 0x98, 0xeb,
+ 0xc4, 0x64, 0xdf, 0xa0, 0xb2, 0x15, 0xcc, 0xfe, 0xa5, 0xeb, 0xdd, 0x74,
+ 0x72, 0x45, 0x58, 0x7f, 0x33, 0x00, 0x91, 0x41, 0x01, 0x8c, 0xc7, 0x5e,
+ 0x23, 0x26, 0xfd, 0x05, 0x90, 0xae, 0x67, 0xf5, 0x2f, 0x5e, 0xeb, 0xa3,
+ 0x92, 0x2a, 0xc3, 0xf9, 0x98, 0x04, 0x8a, 0x08, 0x0c, 0x66, 0x3a, 0xf1,
+ 0x19, 0x37, 0xe8, 0x2c, 0x85, 0x73, 0x3f, 0xa9, 0x7a, 0xf7, 0x5d, 0x1c,
+ 0x91, 0x56, 0x1f, 0xcc, 0xc0, 0x24, 0x50, 0x40, 0x63, 0x31, 0xd7, 0x88,
+ 0xc9, 0xbf, 0x41, 0x64, 0x2b, 0x99, 0xfd, 0x4b, 0xd7, 0xba, 0xe8, 0xe4,
+ 0x8a, 0xb0, 0xfe, 0x66, 0x01, 0x22, 0x82, 0x03, 0x19, 0x8e, 0xbc, 0x46,
+ 0x4d, 0xfa, 0x0b, 0x21, 0x5c, 0xcf, 0xea, 0x5e, 0xbd, 0xd7, 0x47, 0x24,
+ 0x55, 0x87, 0xf3, 0x30, 0x09, 0x14, 0x10, 0x18, 0xcc, 0x75, 0xe2, 0x32,
+ 0x6f, 0xd0, 0x59, 0x0a, 0xe6, 0x7f, 0x52, 0xf5, 0xee, 0xba, 0x39, 0x22,
+ 0xac, 0x3f, 0x99, 0x80, 0x48, 0xa0, 0x80, 0xc6, 0x63, 0xaf, 0x11, 0x93,
+ 0x7e, 0x82, 0xc8, 0x57, 0x33, 0xfa, 0x97, 0xaf, 0x75, 0xd1, 0xc9, 0x15,
+ 0x61, 0xfc, 0xcc, 0x02, 0x45, 0x04, 0x06, 0x33, 0x1d, 0x78, 0x8c, 0x9b,
+ 0xf4, 0x16, 0x42, 0xb9, 0x9f, 0xd4, 0xbd, 0x7b, 0xae, 0x8e, 0x48, 0xab,
+ 0x0f, 0xe6, 0x60, 0x12, 0x28, 0x20, 0x31, 0x98, 0xeb, 0xc4, 0x64, 0xdf,
+ 0xa0, 0xb2, 0x15, 0xcc, 0xfe, 0xa5, 0xeb, 0xdd, 0x74, 0x72, 0x45, 0x58,
+ 0x7f, 0x33, 0x00, 0x91, 0x41, 0x01, 0x8c, 0xc7, 0x5e, 0x23, 0x26, 0xfd,
+ 0x05, 0x90, 0xae, 0x67, 0xf5, 0x2f, 0x5e, 0xeb, 0xa3, 0x92, 0x2a, 0xc3,
+ 0xf9, 0x98, 0x04, 0x8a, 0x08, 0x0c, 0x66, 0x3a, 0xf1, 0x19, 0x37, 0xe8,
+ 0x2c, 0x85, 0x73, 0x3f, 0xa9, 0x7a, 0xf7, 0x5d, 0x1c, 0x91, 0x56, 0x1f,
+ 0xcc, 0xc0, 0x24, 0x50, 0x40, 0x63, 0x31, 0xd7, 0x88, 0xc9, 0xbf, 0x41,
+ 0x64, 0x2b, 0x99, 0xfd, 0x4b, 0xd7, 0xba, 0xe8, 0xe4, 0x8a, 0xb0, 0xfe,
+ 0x66, 0x01, 0x22, 0x82, 0x03, 0x19, 0x8e, 0xbc, 0x46, 0x4d, 0xfa, 0x0b,
+ 0x21, 0x5c, 0xcf, 0xea, 0x5e, 0xbd, 0xd7, 0x47, 0x24, 0x55, 0x87, 0xf3,
+ 0x30, 0x09, 0x14, 0x10, 0x18, 0xcc, 0x75, 0xe2, 0x32, 0x6f, 0xd0, 0x59,
+ 0x0a, 0xe6, 0x7f, 0x52, 0xf5, 0xee, 0xba, 0x39, 0x22, 0xac, 0x3f, 0x99,
+ 0x80, 0x48, 0xa0, 0x80, 0xc6, 0x63, 0xaf, 0x11, 0x93, 0x7e, 0x82, 0xc8,
+ 0x57, 0x33, 0xfa, 0x97, 0xaf, 0x75, 0xd1, 0xc9, 0x15, 0x61, 0xfc, 0xcc,
+ 0x02, 0x45, 0x04, 0x06, 0x33, 0x1d, 0x78, 0x8c, 0x9b, 0xf4, 0x16, 0x42,
+ 0xb9, 0x9f, 0xd4, 0xbd, 0x7b, 0xae, 0x8e, 0x48, 0xab, 0x0f, 0xe6, 0x60,
+ 0x12, 0x28, 0x20, 0x31, 0x98, 0xeb, 0xc4, 0x64, 0xdf, 0xa0, 0xb2, 0x15,
+ 0xcc, 0xfe, 0xa5, 0xeb, 0xdd, 0x74, 0x72, 0x45, 0x58, 0x7f, 0x33, 0x00,
+ 0x91, 0x41, 0x01, 0x8c, 0xc7, 0x5e, 0x23, 0x26, 0xfd, 0x05, 0x90, 0xae,
+ 0x67, 0xf5, 0x2f, 0x5e, 0xeb, 0xa3, 0x92, 0x2a, 0xc3, 0xf9, 0x98, 0x04,
+ 0x8a, 0x08, 0x0c, 0x66, 0x3a, 0xf1, 0x19, 0x37, 0xe8, 0x2c, 0x85, 0x73,
+ 0x3f, 0xa9, 0x7a, 0xf7, 0x5d, 0x1c, 0x91, 0x56, 0x1f, 0xcc, 0xc0, 0x24,
+ 0x50, 0x40, 0x63, 0x31, 0xd7, 0x88, 0xc9, 0xbf, 0x41, 0x64, 0x2b, 0x99,
+ 0xfd, 0x4b, 0xd7, 0xba, 0xe8, 0xe4, 0x8a, 0xb0, 0xfe, 0x66, 0x01, 0x22,
+ 0x82, 0x03, 0x19, 0x8e, 0xbc, 0x46, 0x4d, 0xfa, 0x0b, 0x21, 0x5c, 0xcf,
+ 0xea, 0x5e, 0xbd, 0xd7, 0x47, 0x24, 0x55, 0x87, 0xf3, 0x30, 0x09, 0x14,
+ 0x10, 0x18, 0xcc, 0x75, 0xe2, 0x32, 0x6f, 0xd0, 0x59, 0x0a, 0xe6, 0x7f,
+ 0x52, 0xf5, 0xee, 0xba, 0x39, 0x22, 0xac, 0x3f, 0x99, 0x80, 0x48, 0xa0,
+ 0x80, 0xc6, 0x63, 0xaf, 0x11, 0x93, 0x7e, 0x82, 0xc8, 0x57, 0x33, 0xfa,
+ 0x97, 0xaf, 0x75, 0xd1, 0xc9, 0x15, 0x61, 0xfc, 0xcc, 0x02, 0x45, 0x04,
+ 0x06, 0x33, 0x1d, 0x78, 0x8c, 0x9b, 0xf4, 0x16, 0x42, 0xb9, 0x9f, 0xd4,
+ 0xbd, 0x7b, 0xae, 0x8e, 0x48, 0xab, 0x0f, 0xe6, 0x60, 0x12, 0x28, 0x20,
+ 0x31, 0x98, 0xeb, 0xc4, 0x64, 0xdf, 0xa0, 0xb2, 0x15, 0xcc, 0xfe, 0xa5,
+ 0xeb, 0xdd, 0x74, 0x72, 0x45, 0x58, 0x7f, 0x33, 0x00, 0x91, 0x41, 0x01,
+ 0x8c, 0xc7, 0x5e, 0x23, 0x26, 0xfd, 0x05, 0x90, 0xae, 0x67, 0xf5, 0x2f,
+ 0x5e, 0xeb, 0xa3, 0x92, 0x2a, 0xc3, 0xf9, 0x98, 0x04, 0x8a, 0x08, 0x0c,
+ 0x66, 0x3a, 0xf1, 0x19, 0x37, 0xe8, 0x2c, 0x85, 0x73, 0x3f, 0xa9, 0x7a,
+ 0xf7, 0x5d, 0x1c, 0x91, 0x56, 0x1f, 0xcc, 0xc0, 0x24, 0x50, 0x40, 0x63,
+ 0x31, 0xd7, 0x88, 0xc9, 0xbf, 0x41, 0x64, 0x2b, 0x99, 0xfd, 0x4b, 0xd7,
+ 0xba, 0xe8, 0xe4, 0x8a, 0xb0, 0xfe, 0x66, 0x01, 0x22, 0x82, 0x03, 0x19,
+ 0x8e, 0xbc, 0x46, 0x4d, 0xfa, 0x0b, 0x21, 0x5c, 0xcf, 0xea, 0x5e, 0xbd,
+ 0xd7, 0x47, 0x24, 0x55, 0x87, 0xf3, 0x30, 0x09, 0x14, 0x10, 0x18, 0xcc,
+ 0x75, 0xe2, 0x32, 0x6f, 0xd0, 0x59, 0x0a, 0xe6, 0x7f, 0x52, 0xf5, 0xee,
+ 0xba, 0x39, 0x22, 0xac, 0x3f, 0x99, 0x80, 0x48, 0xa0, 0x80, 0xc6, 0x63,
+ 0xaf, 0x11, 0x93, 0x7e, 0x82, 0xc8, 0x57, 0x33, 0xfa, 0x97, 0xaf, 0x75,
+ 0xd1, 0xc9, 0x15, 0x61, 0xfc, 0xcc, 0x02, 0x45, 0x04, 0x06, 0x33, 0x1d,
+ 0x78, 0x8c, 0x9b, 0xf4, 0x16, 0x42, 0xb9, 0x9f, 0xd4, 0xbd, 0x7b, 0xae,
+ 0x8e, 0x48, 0xab, 0x0f, 0xe6, 0x60, 0x12, 0x28, 0x20, 0x31, 0x98, 0xeb,
+ 0xc4, 0x64, 0xdf, 0xa0, 0xb2, 0x15, 0xcc, 0xfe, 0xa5, 0xeb, 0xdd, 0x74,
+ 0x72, 0x45, 0x58, 0x7f, 0x33, 0x00, 0x91, 0x41, 0x01, 0x8c, 0xc7, 0x5e,
+ 0x23, 0x26, 0xfd, 0x05, 0x90, 0xae, 0x67, 0xf5, 0x2f, 0x5e, 0xeb, 0xa3,
+ 0x92, 0x2a, 0xc3, 0xf9, 0x98, 0x04, 0x8a, 0x08, 0x0c, 0x66, 0x3a, 0xf1,
+ 0x19, 0x37, 0xe8, 0x2c, 0x85, 0x73, 0x3f, 0xa9, 0x7a, 0xf7, 0x5d, 0x1c,
+ 0x91, 0x56, 0x1f, 0xcc, 0xc0, 0x24, 0x50, 0x40, 0x63, 0x31, 0xd7, 0x88,
+ 0xc9, 0xbf, 0x41, 0x64, 0x2b, 0x99, 0xfd, 0x4b, 0xd7, 0xba, 0xe8, 0xe4,
+ 0x8a, 0xb0, 0xfe, 0x66, 0x01, 0x22, 0x82, 0x03, 0x19, 0x8e, 0xbc, 0x46,
+ 0x4d, 0xfa, 0x0b, 0x21, 0x5c, 0xcf, 0xea, 0x5e, 0xbd, 0xd7, 0x47, 0x24,
+ 0x55, 0x87, 0xf3, 0x30, 0x09, 0x14, 0x10, 0x18, 0xcc, 0x75, 0xe2, 0x32,
+ 0x6f, 0xd0, 0xc0,
+};
+static_assert(sizeof(kBytesTestReadSymbol8) == kNumBytesTestReadSymbol8, "");
+
+// The kBytesTestReadSymbol9[] array was encoded by using the following libaom
+// code:
+//
+// aom_cdf_prob cdf[4][10] = {
+// // pmf: 1/9, 1/9, 1/9, 1/9, 1/9, 1/9, 1/9, 1/9, 1/9
+// { 32768 - 3641, 32768 - 7282, 32768 - 10923, 32768 - 14564, 32768 - 18204,
+// 32768 - 21845, 32768 - 25486, 32768 - 29127, 0, 0 },
+// // pmf: 3/18, 2/18, 2/18, 2/18, 2/18, 2/18, 2/18, 2/18, 1/18
+// { 32768 - 5461, 32768 - 9102, 32768 - 12743, 32768 - 16384, 32768 - 20025,
+// 32768 - 23666, 32768 - 27307, 32768 - 30948, 0, 0 },
+// // pmf: 1/18, 2/18, 2/18, 2/18, 2/18, 2/18, 2/18, 2/18, 3/18
+// { 32768 - 1820, 32768 - 5461, 32768 - 9102, 32768 - 12743, 32768 - 16384,
+// 32768 - 20025, 32768 - 23666, 32768 - 27307, 0, 0 },
+// // pmf: 1/18, 2/18, 2/18, 2/18, 4/18, 2/18, 2/18, 2/18, 1/18
+// { 32768 - 1820, 32768 - 5461, 32768 - 9102, 32768 - 12743, 32768 - 20025,
+// 32768 - 23666, 32768 - 27307, 32768 - 30948, 0, 0 },
+// };
+// constexpr int kSymbols[18][4] = { { 0, 4, 8, 3 }, //
+// { 1, 5, 7, 2 }, //
+// { 2, 6, 6, 1 }, //
+// { 3, 7, 5, 0 }, //
+// { 4, 8, 4, 8 }, //
+// { 5, 0, 3, 7 }, //
+// { 6, 1, 2, 6 }, //
+// { 7, 2, 1, 5 }, //
+// { 8, 3, 0, 4 }, //
+// { 0, 0, 8, 7 }, //
+// { 2, 1, 6, 5 }, //
+// { 4, 3, 4, 3 }, //
+// { 6, 5, 2, 1 }, //
+// { 8, 7, 7, 6 }, //
+// { 1, 0, 5, 4 }, //
+// { 3, 2, 3, 2 }, //
+// { 5, 4, 1, 4 }, //
+// { 7, 6, 8, 4 } };
+// const unsigned int kBufferSize = 65536;
+// uint8_t bw_buffer[kBufferSize];
+// aom_writer bw;
+// bw.allow_update_cdf = 1;
+// aom_start_encode(&bw, bw_buffer);
+// for (int i = 0; i < 128; ++i) {
+// for (int j = 0; j < 18; ++j) {
+// for (int k = 0; k < 4; ++k) {
+// aom_write_symbol(&bw, kSymbols[j][k], cdf[k], 9);
+// }
+// }
+// }
+// aom_stop_encode(&bw);
+// printf("constexpr size_t kNumBytes = %u;\n", bw.pos);
+// printf("constexpr uint8_t kBytes[] = {");
+// int count = 0;
+// for (unsigned int i = 0; i < bw.pos; ++i) {
+// if (count++ % 12 == 0) {
+// printf("\n ");
+// } else {
+// printf(" ");
+// }
+// printf("0x%02x,", bw_buffer[i]);
+// }
+// printf("\n};\n");
+
+constexpr size_t kNumBytesTestReadSymbol9 = 3650;
+constexpr uint8_t kBytesTestReadSymbol9[] = {
+ 0x10, 0xe6, 0x62, 0x17, 0x4c, 0x5e, 0xe0, 0x8c, 0x41, 0x75, 0x38, 0xda,
+ 0xb6, 0x33, 0xc7, 0x0e, 0x0f, 0x62, 0x87, 0x29, 0xbe, 0x28, 0x8b, 0x81,
+ 0x71, 0xab, 0x0d, 0xfe, 0x61, 0xf9, 0x96, 0x85, 0xfe, 0x78, 0x18, 0xe6,
+ 0x57, 0xa7, 0xf0, 0xd3, 0xd5, 0x62, 0x37, 0x9a, 0x3d, 0xc4, 0xad, 0x75,
+ 0x35, 0xc1, 0xe9, 0x63, 0xeb, 0x9c, 0xd3, 0xf4, 0xdb, 0xc0, 0xf3, 0x67,
+ 0x14, 0xbd, 0xde, 0xf7, 0xd1, 0x51, 0xf1, 0x62, 0x28, 0xd5, 0x39, 0x99,
+ 0x82, 0x5b, 0x9c, 0x3a, 0x37, 0x85, 0xe7, 0x48, 0x28, 0x02, 0x2d, 0xf1,
+ 0x15, 0x55, 0x77, 0x02, 0x2e, 0x62, 0x53, 0xf6, 0x8a, 0x53, 0x44, 0xfa,
+ 0xe0, 0xff, 0x05, 0xae, 0xdc, 0x30, 0xee, 0x36, 0x29, 0x80, 0xd5, 0x0a,
+ 0xa6, 0x5f, 0x53, 0xa2, 0x31, 0xc0, 0x5b, 0x2a, 0xa5, 0xa5, 0xd2, 0xc0,
+ 0x8d, 0x96, 0x66, 0x25, 0x93, 0x9e, 0xdc, 0x0b, 0x2f, 0xea, 0xe2, 0x51,
+ 0x0b, 0x12, 0x87, 0x90, 0x79, 0xe7, 0x8e, 0x6f, 0xc6, 0x99, 0x4b, 0x6a,
+ 0x50, 0x06, 0xf3, 0x3d, 0xf5, 0x25, 0x72, 0xc5, 0x9e, 0xab, 0x7b, 0x5b,
+ 0x15, 0xf5, 0xeb, 0xae, 0x02, 0xe4, 0x90, 0x2b, 0x15, 0x66, 0xf7, 0x50,
+ 0xfa, 0x46, 0x74, 0xae, 0xd4, 0x7f, 0xd4, 0x0b, 0xbf, 0xbc, 0x83, 0x60,
+ 0x6f, 0x25, 0x87, 0xde, 0xce, 0xb3, 0x86, 0x5a, 0x13, 0x00, 0x31, 0xf2,
+ 0x75, 0xca, 0x08, 0x71, 0xd2, 0xf4, 0xa9, 0xf9, 0x40, 0x23, 0xa7, 0x5e,
+ 0x50, 0x63, 0x64, 0x1d, 0xa2, 0x50, 0x2f, 0x01, 0x4c, 0x11, 0x8b, 0xcb,
+ 0x92, 0x40, 0x9d, 0x94, 0x50, 0x0a, 0xf5, 0x3b, 0xfc, 0x32, 0x1a, 0xbd,
+ 0x48, 0x73, 0xe7, 0x93, 0x0f, 0x53, 0xb2, 0x8e, 0xac, 0xef, 0x22, 0x2f,
+ 0x3e, 0xb0, 0x81, 0xc0, 0x06, 0x9b, 0x14, 0x5c, 0xa6, 0x16, 0xca, 0xa5,
+ 0x79, 0xd2, 0x6a, 0xd3, 0xfe, 0x93, 0x33, 0x2f, 0xdb, 0xcb, 0xca, 0xb3,
+ 0x1d, 0xc5, 0x56, 0x65, 0x53, 0x7f, 0xb9, 0x41, 0xe1, 0x54, 0x31, 0xa2,
+ 0x8c, 0x92, 0xc8, 0x04, 0xf7, 0x9d, 0x26, 0xad, 0x35, 0x00, 0x5a, 0xb2,
+ 0x78, 0x43, 0x14, 0xc2, 0xeb, 0x3a, 0x26, 0x4d, 0x49, 0x5d, 0x33, 0xe4,
+ 0xa9, 0xea, 0xd3, 0x67, 0xbf, 0xbc, 0xb6, 0x2e, 0x1c, 0xf7, 0xd0, 0x98,
+ 0x13, 0x0d, 0x7c, 0x94, 0x02, 0x28, 0x3e, 0x8a, 0xe5, 0x0c, 0x75, 0x82,
+ 0xe5, 0x81, 0x98, 0x87, 0x88, 0x97, 0x86, 0xd6, 0x46, 0x2c, 0x9c, 0x85,
+ 0xc2, 0x99, 0xfd, 0x0a, 0x68, 0xbf, 0x67, 0xfc, 0x17, 0xc7, 0x11, 0x54,
+ 0xd1, 0x20, 0x9d, 0x83, 0x52, 0x84, 0x5d, 0x4b, 0x62, 0xbf, 0x16, 0x5d,
+ 0x8e, 0x72, 0x46, 0xde, 0xb1, 0x77, 0xfb, 0x39, 0x98, 0xf0, 0x4d, 0xa6,
+ 0x7a, 0x7d, 0x1c, 0x16, 0xe9, 0x1e, 0x86, 0x7e, 0xf9, 0x22, 0x58, 0x93,
+ 0xea, 0x2e, 0x26, 0xc7, 0xfb, 0xd1, 0xb3, 0xc7, 0x99, 0xb1, 0x91, 0x67,
+ 0xf1, 0xa3, 0xe0, 0xd2, 0xe8, 0x17, 0x17, 0xd7, 0x0b, 0x7a, 0xd4, 0xed,
+ 0x9e, 0x72, 0x4e, 0xa2, 0x37, 0xc9, 0xd2, 0x16, 0x5d, 0x8b, 0xda, 0xdb,
+ 0x5c, 0x46, 0x05, 0x3e, 0xf7, 0xc8, 0x3a, 0xd5, 0xaf, 0xd9, 0x72, 0x82,
+ 0xbf, 0x96, 0xea, 0x09, 0xd3, 0xd5, 0xfe, 0x43, 0x24, 0xae, 0x95, 0x3d,
+ 0x6c, 0x68, 0x54, 0xad, 0xb5, 0xc4, 0x60, 0x54, 0x08, 0x3c, 0x57, 0x61,
+ 0xa1, 0x11, 0x21, 0x7f, 0xca, 0x48, 0x59, 0xb4, 0x1c, 0x39, 0x0d, 0xf2,
+ 0xdc, 0x62, 0xf0, 0xbb, 0x95, 0x39, 0x51, 0xe9, 0xdb, 0xf1, 0x5d, 0xd1,
+ 0x43, 0x83, 0x8a, 0xb1, 0x8d, 0x36, 0x39, 0x83, 0xc6, 0x94, 0x30, 0xbe,
+ 0xb6, 0x2f, 0x39, 0x05, 0xad, 0xcd, 0xf9, 0x4c, 0xc2, 0x34, 0xc7, 0x81,
+ 0x68, 0xb1, 0x20, 0x1d, 0xea, 0xd3, 0x8c, 0xca, 0xff, 0x4d, 0x94, 0xe1,
+ 0x3e, 0xc2, 0x74, 0x90, 0xed, 0x56, 0x3c, 0x1b, 0x5b, 0xf6, 0x40, 0xf9,
+ 0x3b, 0x94, 0x94, 0x23, 0xc6, 0x48, 0x6a, 0x59, 0xef, 0x04, 0xb7, 0x9f,
+ 0x55, 0x9c, 0x6f, 0x81, 0x73, 0xec, 0x27, 0x49, 0x0e, 0xd5, 0x63, 0xc1,
+ 0xb5, 0xbf, 0x64, 0x0f, 0x93, 0xb9, 0x49, 0x42, 0x3c, 0x64, 0x86, 0xa5,
+ 0x9e, 0xf0, 0x4b, 0x79, 0xf5, 0x59, 0xc7, 0xc5, 0x01, 0x6f, 0xbd, 0x6a,
+ 0x66, 0x93, 0x99, 0x47, 0xb6, 0xf7, 0xfa, 0x21, 0x72, 0x81, 0x71, 0x40,
+ 0x36, 0x81, 0xde, 0x5d, 0xdf, 0xdf, 0x30, 0x53, 0x03, 0x70, 0xfb, 0xb2,
+ 0x2d, 0x37, 0xeb, 0x19, 0xbc, 0xd2, 0x90, 0x44, 0x25, 0x42, 0x06, 0x30,
+ 0xc8, 0xcf, 0x4b, 0x0a, 0x01, 0x13, 0x5e, 0x17, 0x91, 0xc7, 0xcb, 0x79,
+ 0xed, 0x06, 0x39, 0xc1, 0x2e, 0x92, 0x29, 0xf5, 0xff, 0x24, 0xe7, 0x2b,
+ 0x3f, 0x19, 0x35, 0x6b, 0x3d, 0x69, 0xa2, 0x19, 0x20, 0x53, 0xd4, 0xca,
+ 0x08, 0x35, 0x6e, 0xe0, 0x5a, 0x9a, 0x9d, 0x48, 0xf5, 0x20, 0x24, 0x20,
+ 0x33, 0x94, 0x6b, 0x33, 0xdd, 0x78, 0xbf, 0x62, 0xf1, 0x43, 0x08, 0x97,
+ 0x53, 0x98, 0xe4, 0x17, 0x27, 0xfc, 0xe8, 0xf1, 0xb8, 0x4c, 0xb3, 0x79,
+ 0xc8, 0x05, 0x21, 0x1b, 0xe8, 0x56, 0xd2, 0x5f, 0xb6, 0x90, 0x14, 0x0c,
+ 0x96, 0x38, 0xc6, 0xc3, 0x6d, 0x10, 0xbf, 0xc6, 0x28, 0xfe, 0x1f, 0x13,
+ 0x81, 0x04, 0xeb, 0x37, 0x9c, 0x80, 0x52, 0x47, 0x0f, 0xa0, 0x6e, 0xcd,
+ 0x9c, 0x44, 0xdd, 0x61, 0x9c, 0x8f, 0xb2, 0xf5, 0xe0, 0xa0, 0x2b, 0x2f,
+ 0xe7, 0x67, 0xd0, 0xd7, 0x29, 0x08, 0x72, 0xee, 0xd5, 0x60, 0xb9, 0xbb,
+ 0x1b, 0x12, 0xce, 0x60, 0x98, 0xb9, 0x40, 0xd3, 0xd9, 0x77, 0x5d, 0x6b,
+ 0x78, 0xaa, 0x9a, 0x47, 0x2a, 0xf5, 0x38, 0xbb, 0xbe, 0x3a, 0x82, 0x6a,
+ 0xbf, 0x8b, 0x67, 0x7e, 0xa4, 0x78, 0xbf, 0xcf, 0x58, 0xce, 0x86, 0x2e,
+ 0x34, 0xb7, 0x76, 0x99, 0xa5, 0xf1, 0x0c, 0xa9, 0x1c, 0x9f, 0xad, 0xcb,
+ 0xac, 0xf4, 0x03, 0x60, 0xe0, 0x22, 0xfe, 0x02, 0x34, 0x9a, 0x14, 0xb9,
+ 0x11, 0xea, 0x4c, 0x3a, 0x59, 0xaa, 0xec, 0x8f, 0x82, 0x49, 0x23, 0xa2,
+ 0xd0, 0xf7, 0xc3, 0xf0, 0xaa, 0x2d, 0xb2, 0xb8, 0xce, 0x02, 0x2f, 0xe0,
+ 0x23, 0x49, 0xa1, 0x38, 0x12, 0xba, 0xab, 0x9f, 0x60, 0xe4, 0x0d, 0xfa,
+ 0x2b, 0xcc, 0xad, 0x6a, 0x06, 0xca, 0x38, 0x82, 0xc5, 0x88, 0x10, 0xb6,
+ 0xf5, 0xf6, 0x06, 0x7b, 0x03, 0x9c, 0xe4, 0x89, 0xaf, 0xdb, 0x66, 0x45,
+ 0xeb, 0x2c, 0x28, 0xe2, 0x40, 0x08, 0x44, 0xe2, 0x8a, 0x91, 0x19, 0x04,
+ 0x29, 0x46, 0xa7, 0xb5, 0x78, 0xae, 0x05, 0xcc, 0x38, 0x9f, 0xd8, 0x58,
+ 0xc9, 0x79, 0xf9, 0xad, 0x77, 0x66, 0x49, 0x62, 0xef, 0x13, 0x72, 0xee,
+ 0xda, 0x37, 0xb5, 0xd7, 0xf1, 0x51, 0x5d, 0x16, 0x11, 0xf3, 0x91, 0xf2,
+ 0x13, 0x49, 0x09, 0x50, 0x15, 0xc6, 0x48, 0xe6, 0xe9, 0x4c, 0xf0, 0x06,
+ 0x14, 0x3f, 0xef, 0x46, 0x15, 0xaf, 0x96, 0x0d, 0x17, 0x51, 0x08, 0xf2,
+ 0xe1, 0xc9, 0xb9, 0x1d, 0x8d, 0x8f, 0x74, 0x25, 0x04, 0x1f, 0x2c, 0x62,
+ 0x67, 0xe4, 0x4b, 0xdc, 0x67, 0x39, 0x2c, 0x7d, 0x3a, 0x1e, 0x6f, 0x5b,
+ 0x0b, 0xab, 0x0b, 0x1f, 0x64, 0x37, 0x19, 0x4f, 0x6b, 0x07, 0x05, 0xff,
+ 0x6e, 0x89, 0x8f, 0x22, 0x7d, 0x28, 0xd9, 0x3b, 0x9a, 0xe2, 0x3f, 0xff,
+ 0xc2, 0xb1, 0xca, 0x05, 0xbc, 0x05, 0xa5, 0xe7, 0x2d, 0x66, 0xf7, 0x37,
+ 0x92, 0xd2, 0xb4, 0x35, 0x26, 0x3f, 0x8c, 0x0c, 0x22, 0xa5, 0x5f, 0x5e,
+ 0x9c, 0x01, 0x46, 0x91, 0xe7, 0xa2, 0x92, 0x97, 0x0a, 0x19, 0x85, 0x2f,
+ 0x54, 0xe3, 0xa8, 0x26, 0xab, 0xe6, 0xb5, 0xd9, 0x71, 0x19, 0xb7, 0x41,
+ 0x11, 0xea, 0x4b, 0x6e, 0x7e, 0xdd, 0xfa, 0x8f, 0x83, 0x9b, 0x8a, 0xeb,
+ 0x0b, 0xe2, 0x7d, 0xe6, 0xee, 0x8f, 0x40, 0x36, 0x0e, 0x00, 0xac, 0x09,
+ 0x06, 0x99, 0x9b, 0x74, 0x11, 0x1e, 0xa4, 0xb6, 0xe7, 0xed, 0xdf, 0xa8,
+ 0xf8, 0x39, 0xb8, 0xae, 0xb0, 0xbe, 0x27, 0xde, 0x6e, 0xe8, 0xf4, 0x03,
+ 0x60, 0xe0, 0x0a, 0xc0, 0x90, 0x69, 0x99, 0xb7, 0x41, 0x11, 0xea, 0x4b,
+ 0x6e, 0x7e, 0xdd, 0xfa, 0x8f, 0x83, 0x9b, 0x8a, 0xeb, 0x0b, 0xe2, 0x7d,
+ 0xe6, 0xee, 0x8f, 0x40, 0x36, 0x0e, 0x00, 0xac, 0x09, 0x06, 0x99, 0x9b,
+ 0x74, 0x11, 0x1e, 0xa4, 0xb6, 0xe7, 0xed, 0xdf, 0xa8, 0xf8, 0x39, 0xb8,
+ 0xae, 0xb0, 0xbe, 0x27, 0xde, 0x6e, 0xe8, 0xf4, 0x03, 0x60, 0xe0, 0x0a,
+ 0xc0, 0x90, 0x69, 0x99, 0xb7, 0x41, 0x11, 0xea, 0x4b, 0x6e, 0x7e, 0xdd,
+ 0xfa, 0x8f, 0x83, 0x9b, 0x8a, 0xeb, 0x0b, 0xe2, 0x7d, 0xe6, 0xee, 0x8f,
+ 0x40, 0x36, 0x0e, 0x00, 0xac, 0x09, 0x06, 0x99, 0x9b, 0x74, 0x11, 0x1e,
+ 0xa4, 0xb6, 0xe7, 0xed, 0xdf, 0xa8, 0xf8, 0x39, 0xb8, 0xae, 0xb0, 0xbe,
+ 0x27, 0xde, 0x6e, 0xe8, 0xf4, 0x03, 0x60, 0xe0, 0x0a, 0xc0, 0x90, 0x69,
+ 0x99, 0xb7, 0x41, 0x11, 0xea, 0x4b, 0x6e, 0x7e, 0xdd, 0xfa, 0x8f, 0x83,
+ 0x9b, 0x8a, 0xeb, 0x0b, 0xe2, 0x7d, 0xe6, 0xee, 0x8f, 0x40, 0x36, 0x0e,
+ 0x00, 0xac, 0x09, 0x06, 0x99, 0x9b, 0x74, 0x11, 0x1e, 0xa4, 0xb6, 0xe7,
+ 0xed, 0xdf, 0xa8, 0xf8, 0x39, 0xb8, 0xae, 0xb0, 0xbe, 0x27, 0xde, 0x6e,
+ 0xe8, 0xf4, 0x03, 0x60, 0xe0, 0x0a, 0xc0, 0x90, 0x69, 0x99, 0xb7, 0x41,
+ 0x11, 0xea, 0x4b, 0x6e, 0x7e, 0xdd, 0xfa, 0x8f, 0x83, 0x9b, 0x8a, 0xeb,
+ 0x0b, 0xe2, 0x7d, 0xe6, 0xee, 0x8f, 0x40, 0x36, 0x0e, 0x00, 0xac, 0x09,
+ 0x06, 0x99, 0x9b, 0x74, 0x11, 0x1e, 0xa4, 0xb6, 0xe7, 0xed, 0xdf, 0xa8,
+ 0xf8, 0x39, 0xb8, 0xae, 0xb0, 0xbe, 0x27, 0xde, 0x6e, 0xe8, 0xf4, 0x03,
+ 0x60, 0xe0, 0x0a, 0xc0, 0x90, 0x69, 0x99, 0xb7, 0x41, 0x11, 0xea, 0x4b,
+ 0x6e, 0x7e, 0xdd, 0xfa, 0x8f, 0x83, 0x9b, 0x8a, 0xeb, 0x0b, 0xe2, 0x7d,
+ 0xe6, 0xee, 0x8f, 0x40, 0x36, 0x0e, 0x00, 0xac, 0x09, 0x06, 0x99, 0x9b,
+ 0x74, 0x11, 0x1e, 0xa4, 0xb6, 0xe7, 0xed, 0xdf, 0xa8, 0xf8, 0x39, 0xb8,
+ 0xae, 0xb0, 0xbe, 0x27, 0xde, 0x6e, 0xe8, 0xf4, 0x03, 0x60, 0xe0, 0x0a,
+ 0xc0, 0x90, 0x69, 0x99, 0xb7, 0x41, 0x11, 0xea, 0x4b, 0x6e, 0x7e, 0xdd,
+ 0xfa, 0x8f, 0x83, 0x9b, 0x8a, 0xeb, 0x0b, 0xe2, 0x7d, 0xe6, 0xee, 0x8f,
+ 0x40, 0x36, 0x0e, 0x00, 0xac, 0x09, 0x06, 0x99, 0x9b, 0x74, 0x11, 0x1e,
+ 0xa4, 0xb6, 0xe7, 0xed, 0xdf, 0xa8, 0xf8, 0x39, 0xb8, 0xae, 0xb0, 0xbe,
+ 0x27, 0xde, 0x6e, 0xe8, 0xf4, 0x03, 0x60, 0xe0, 0x0a, 0xc0, 0x90, 0x69,
+ 0x99, 0xb7, 0x41, 0x11, 0xea, 0x4b, 0x6e, 0x7e, 0xdd, 0xfa, 0x8f, 0x83,
+ 0x9b, 0x8a, 0xeb, 0x0b, 0xe2, 0x7d, 0xe6, 0xee, 0x8f, 0x40, 0x36, 0x0e,
+ 0x00, 0xac, 0x09, 0x06, 0x99, 0x9b, 0x74, 0x11, 0x1e, 0xa4, 0xb6, 0xe7,
+ 0xed, 0xdf, 0xa8, 0xf8, 0x39, 0xb8, 0xae, 0xb0, 0xbe, 0x27, 0xde, 0x6e,
+ 0xe8, 0xf4, 0x03, 0x60, 0xe0, 0x0a, 0xc0, 0x90, 0x69, 0x99, 0xb7, 0x41,
+ 0x11, 0xea, 0x4b, 0x6e, 0x7e, 0xdd, 0xfa, 0x8f, 0x83, 0x9b, 0x8a, 0xeb,
+ 0x0b, 0xe2, 0x7d, 0xe6, 0xee, 0x8f, 0x40, 0x36, 0x0e, 0x00, 0xac, 0x09,
+ 0x06, 0x99, 0x9b, 0x74, 0x11, 0x1e, 0xa4, 0xb6, 0xe7, 0xed, 0xdf, 0xa8,
+ 0xf8, 0x39, 0xb8, 0xae, 0xb0, 0xbe, 0x27, 0xde, 0x6e, 0xe8, 0xf4, 0x03,
+ 0x60, 0xe0, 0x0a, 0xc0, 0x90, 0x69, 0x99, 0xb7, 0x41, 0x11, 0xea, 0x4b,
+ 0x6e, 0x7e, 0xdd, 0xfa, 0x8f, 0x83, 0x9b, 0x8a, 0xeb, 0x0b, 0xe2, 0x7d,
+ 0xe6, 0xee, 0x8f, 0x40, 0x36, 0x0e, 0x00, 0xac, 0x09, 0x06, 0x99, 0x9b,
+ 0x74, 0x11, 0x1e, 0xa4, 0xb6, 0xe7, 0xed, 0xdf, 0xa8, 0xf8, 0x39, 0xb8,
+ 0xae, 0xb0, 0xbe, 0x27, 0xde, 0x6e, 0xe8, 0xf4, 0x03, 0x60, 0xe0, 0x0a,
+ 0xc0, 0x90, 0x69, 0x99, 0xb7, 0x41, 0x11, 0xea, 0x4b, 0x6e, 0x7e, 0xdd,
+ 0xfa, 0x8f, 0x83, 0x9b, 0x8a, 0xeb, 0x0b, 0xe2, 0x7d, 0xe6, 0xee, 0x8f,
+ 0x40, 0x36, 0x0e, 0x00, 0xac, 0x09, 0x06, 0x99, 0x9b, 0x74, 0x11, 0x1e,
+ 0xa4, 0xb6, 0xe7, 0xed, 0xdf, 0xa8, 0xf8, 0x39, 0xb8, 0xae, 0xb0, 0xbe,
+ 0x27, 0xde, 0x6e, 0xe8, 0xf4, 0x03, 0x60, 0xe0, 0x0a, 0xc0, 0x90, 0x69,
+ 0x99, 0xb7, 0x41, 0x11, 0xea, 0x4b, 0x6e, 0x7e, 0xdd, 0xfa, 0x8f, 0x83,
+ 0x9b, 0x8a, 0xeb, 0x0b, 0xe2, 0x7d, 0xe6, 0xee, 0x8f, 0x40, 0x36, 0x0e,
+ 0x00, 0xac, 0x09, 0x06, 0x99, 0x9b, 0x74, 0x11, 0x1e, 0xa4, 0xb6, 0xe7,
+ 0xed, 0xdf, 0xa8, 0xf8, 0x39, 0xb8, 0xae, 0xb0, 0xbe, 0x27, 0xde, 0x6e,
+ 0xe8, 0xf4, 0x03, 0x60, 0xe0, 0x0a, 0xc0, 0x90, 0x69, 0x99, 0xb7, 0x41,
+ 0x11, 0xea, 0x4b, 0x6e, 0x7e, 0xdd, 0xfa, 0x8f, 0x83, 0x9b, 0x8a, 0xeb,
+ 0x0b, 0xe2, 0x7d, 0xe6, 0xee, 0x8f, 0x40, 0x36, 0x0e, 0x00, 0xac, 0x09,
+ 0x06, 0x99, 0x9b, 0x74, 0x11, 0x1e, 0xa4, 0xb6, 0xe7, 0xed, 0xdf, 0xa8,
+ 0xf8, 0x39, 0xb8, 0xae, 0xb0, 0xbe, 0x27, 0xde, 0x6e, 0xe8, 0xf4, 0x03,
+ 0x60, 0xe0, 0x0a, 0xc0, 0x90, 0x69, 0x99, 0xb7, 0x41, 0x11, 0xea, 0x4b,
+ 0x6e, 0x7e, 0xdd, 0xfa, 0x8f, 0x83, 0x9b, 0x8a, 0xeb, 0x0b, 0xe2, 0x7d,
+ 0xe6, 0xee, 0x8f, 0x40, 0x36, 0x0e, 0x00, 0xac, 0x09, 0x06, 0x99, 0x9b,
+ 0x74, 0x11, 0x1e, 0xa4, 0xb6, 0xe7, 0xed, 0xdf, 0xa8, 0xf8, 0x39, 0xb8,
+ 0xae, 0xb0, 0xbe, 0x27, 0xde, 0x6e, 0xe8, 0xf4, 0x03, 0x60, 0xe0, 0x0a,
+ 0xc0, 0x90, 0x69, 0x99, 0xb7, 0x41, 0x11, 0xea, 0x4b, 0x6e, 0x7e, 0xdd,
+ 0xfa, 0x8f, 0x83, 0x9b, 0x8a, 0xeb, 0x0b, 0xe2, 0x7d, 0xe6, 0xee, 0x8f,
+ 0x40, 0x36, 0x0e, 0x00, 0xac, 0x09, 0x06, 0x99, 0x9b, 0x74, 0x11, 0x1e,
+ 0xa4, 0xb6, 0xe7, 0xed, 0xdf, 0xa8, 0xf8, 0x39, 0xb8, 0xae, 0xb0, 0xbe,
+ 0x27, 0xde, 0x6e, 0xe8, 0xf4, 0x03, 0x60, 0xe0, 0x0a, 0xc0, 0x90, 0x69,
+ 0x99, 0xb7, 0x41, 0x11, 0xea, 0x4b, 0x6e, 0x7e, 0xdd, 0xfa, 0x8f, 0x83,
+ 0x9b, 0x8a, 0xeb, 0x0b, 0xe2, 0x7d, 0xe6, 0xee, 0x8f, 0x40, 0x36, 0x0e,
+ 0x00, 0xac, 0x09, 0x06, 0x99, 0x9b, 0x74, 0x11, 0x1e, 0xa4, 0xb6, 0xe7,
+ 0xed, 0xdf, 0xa8, 0xf8, 0x39, 0xb8, 0xae, 0xb0, 0xbe, 0x27, 0xde, 0x6e,
+ 0xe8, 0xf4, 0x03, 0x60, 0xe0, 0x0a, 0xc0, 0x90, 0x69, 0x99, 0xb7, 0x41,
+ 0x11, 0xea, 0x4b, 0x6e, 0x7e, 0xdd, 0xfa, 0x8f, 0x83, 0x9b, 0x8a, 0xeb,
+ 0x0b, 0xe2, 0x7d, 0xe6, 0xee, 0x8f, 0x40, 0x36, 0x0e, 0x00, 0xac, 0x09,
+ 0x06, 0x99, 0x9b, 0x74, 0x11, 0x1e, 0xa4, 0xb6, 0xe7, 0xed, 0xdf, 0xa8,
+ 0xf8, 0x39, 0xb8, 0xae, 0xb0, 0xbe, 0x27, 0xde, 0x6e, 0xe8, 0xf4, 0x03,
+ 0x60, 0xe0, 0x0a, 0xc0, 0x90, 0x69, 0x99, 0xb7, 0x41, 0x11, 0xea, 0x4b,
+ 0x6e, 0x7e, 0xdd, 0xfa, 0x8f, 0x83, 0x9b, 0x8a, 0xeb, 0x0b, 0xe2, 0x7d,
+ 0xe6, 0xee, 0x8f, 0x40, 0x36, 0x0e, 0x00, 0xac, 0x09, 0x06, 0x99, 0x9b,
+ 0x74, 0x11, 0x1e, 0xa4, 0xb6, 0xe7, 0xed, 0xdf, 0xa8, 0xf8, 0x39, 0xb8,
+ 0xae, 0xb0, 0xbe, 0x27, 0xde, 0x6e, 0xe8, 0xf4, 0x03, 0x60, 0xe0, 0x0a,
+ 0xc0, 0x90, 0x69, 0x99, 0xb7, 0x41, 0x11, 0xea, 0x4b, 0x6e, 0x7e, 0xdd,
+ 0xfa, 0x8f, 0x83, 0x9b, 0x8a, 0xeb, 0x0b, 0xe2, 0x7d, 0xe6, 0xee, 0x8f,
+ 0x40, 0x36, 0x0e, 0x00, 0xac, 0x09, 0x06, 0x99, 0x9b, 0x74, 0x11, 0x1e,
+ 0xa4, 0xb6, 0xe7, 0xed, 0xdf, 0xa8, 0xf8, 0x39, 0xb8, 0xae, 0xb0, 0xbe,
+ 0x27, 0xde, 0x6e, 0xe8, 0xf4, 0x03, 0x60, 0xe0, 0x0a, 0xc0, 0x90, 0x69,
+ 0x99, 0xb7, 0x41, 0x11, 0xea, 0x4b, 0x6e, 0x7e, 0xdd, 0xfa, 0x8f, 0x83,
+ 0x9b, 0x8a, 0xeb, 0x0b, 0xe2, 0x7d, 0xe6, 0xee, 0x8f, 0x40, 0x36, 0x0e,
+ 0x00, 0xac, 0x09, 0x06, 0x99, 0x9b, 0x74, 0x11, 0x1e, 0xa4, 0xb6, 0xe7,
+ 0xed, 0xdf, 0xa8, 0xf8, 0x39, 0xb8, 0xae, 0xb0, 0xbe, 0x27, 0xde, 0x6e,
+ 0xe8, 0xf4, 0x03, 0x60, 0xe0, 0x0a, 0xc0, 0x90, 0x69, 0x99, 0xb7, 0x41,
+ 0x11, 0xea, 0x4b, 0x6e, 0x7e, 0xdd, 0xfa, 0x8f, 0x83, 0x9b, 0x8a, 0xeb,
+ 0x0b, 0xe2, 0x7d, 0xe6, 0xee, 0x8f, 0x40, 0x36, 0x0e, 0x00, 0xac, 0x09,
+ 0x06, 0x99, 0x9b, 0x74, 0x11, 0x1e, 0xa4, 0xb6, 0xe7, 0xed, 0xdf, 0xa8,
+ 0xf8, 0x39, 0xb8, 0xae, 0xb0, 0xbe, 0x27, 0xde, 0x6e, 0xe8, 0xf4, 0x03,
+ 0x60, 0xe0, 0x0a, 0xc0, 0x90, 0x69, 0x99, 0xb7, 0x41, 0x11, 0xea, 0x4b,
+ 0x6e, 0x7e, 0xdd, 0xfa, 0x8f, 0x83, 0x9b, 0x8a, 0xeb, 0x0b, 0xe2, 0x7d,
+ 0xe6, 0xee, 0x8f, 0x40, 0x36, 0x0e, 0x00, 0xac, 0x09, 0x06, 0x99, 0x9b,
+ 0x74, 0x11, 0x1e, 0xa4, 0xb6, 0xe7, 0xed, 0xdf, 0xa8, 0xf8, 0x39, 0xb8,
+ 0xae, 0xb0, 0xbe, 0x27, 0xde, 0x6e, 0xe8, 0xf4, 0x03, 0x60, 0xe0, 0x0a,
+ 0xc0, 0x90, 0x69, 0x99, 0xb7, 0x41, 0x11, 0xea, 0x4b, 0x6e, 0x7e, 0xdd,
+ 0xfa, 0x8f, 0x83, 0x9b, 0x8a, 0xeb, 0x0b, 0xe2, 0x7d, 0xe6, 0xee, 0x8f,
+ 0x40, 0x36, 0x0e, 0x00, 0xac, 0x09, 0x06, 0x99, 0x9b, 0x74, 0x11, 0x1e,
+ 0xa4, 0xb6, 0xe7, 0xed, 0xdf, 0xa8, 0xf8, 0x39, 0xb8, 0xae, 0xb0, 0xbe,
+ 0x27, 0xde, 0x6e, 0xe8, 0xf4, 0x03, 0x60, 0xe0, 0x0a, 0xc0, 0x90, 0x69,
+ 0x99, 0xb7, 0x41, 0x11, 0xea, 0x4b, 0x6e, 0x7e, 0xdd, 0xfa, 0x8f, 0x83,
+ 0x9b, 0x8a, 0xeb, 0x0b, 0xe2, 0x7d, 0xe6, 0xee, 0x8f, 0x40, 0x36, 0x0e,
+ 0x00, 0xac, 0x09, 0x06, 0x99, 0x9b, 0x74, 0x11, 0x1e, 0xa4, 0xb6, 0xe7,
+ 0xed, 0xdf, 0xa8, 0xf8, 0x39, 0xb8, 0xae, 0xb0, 0xbe, 0x27, 0xde, 0x6e,
+ 0xe8, 0xf4, 0x03, 0x60, 0xe0, 0x0a, 0xc0, 0x90, 0x69, 0x99, 0xb7, 0x41,
+ 0x11, 0xea, 0x4b, 0x6e, 0x7e, 0xdd, 0xfa, 0x8f, 0x83, 0x9b, 0x8a, 0xeb,
+ 0x0b, 0xe2, 0x7d, 0xe6, 0xee, 0x8f, 0x40, 0x36, 0x0e, 0x00, 0xac, 0x09,
+ 0x06, 0x99, 0x9b, 0x74, 0x11, 0x1e, 0xa4, 0xb6, 0xe7, 0xed, 0xdf, 0xa8,
+ 0xf8, 0x39, 0xb8, 0xae, 0xb0, 0xbe, 0x27, 0xde, 0x6e, 0xe8, 0xf4, 0x03,
+ 0x60, 0xe0, 0x0a, 0xc0, 0x90, 0x69, 0x99, 0xb7, 0x41, 0x11, 0xea, 0x4b,
+ 0x6e, 0x7e, 0xdd, 0xfa, 0x8f, 0x83, 0x9b, 0x8a, 0xeb, 0x0b, 0xe2, 0x7d,
+ 0xe6, 0xee, 0x8f, 0x40, 0x36, 0x0e, 0x00, 0xac, 0x09, 0x06, 0x99, 0x9b,
+ 0x74, 0x11, 0x1e, 0xa4, 0xb6, 0xe7, 0xed, 0xdf, 0xa8, 0xf8, 0x39, 0xb8,
+ 0xae, 0xb0, 0xbe, 0x27, 0xde, 0x6e, 0xe8, 0xf4, 0x03, 0x60, 0xe0, 0x0a,
+ 0xc0, 0x90, 0x69, 0x99, 0xb7, 0x41, 0x11, 0xea, 0x4b, 0x6e, 0x7e, 0xdd,
+ 0xfa, 0x8f, 0x83, 0x9b, 0x8a, 0xeb, 0x0b, 0xe2, 0x7d, 0xe6, 0xee, 0x8f,
+ 0x40, 0x36, 0x0e, 0x00, 0xac, 0x09, 0x06, 0x99, 0x9b, 0x74, 0x11, 0x1e,
+ 0xa4, 0xb6, 0xe7, 0xed, 0xdf, 0xa8, 0xf8, 0x39, 0xb8, 0xae, 0xb0, 0xbe,
+ 0x27, 0xde, 0x6e, 0xe8, 0xf4, 0x03, 0x60, 0xe0, 0x0a, 0xc0, 0x90, 0x69,
+ 0x99, 0xb7, 0x41, 0x11, 0xea, 0x4b, 0x6e, 0x7e, 0xdd, 0xfa, 0x8f, 0x83,
+ 0x9b, 0x8a, 0xeb, 0x0b, 0xe2, 0x7d, 0xe6, 0xee, 0x8f, 0x40, 0x36, 0x0e,
+ 0x00, 0xac, 0x09, 0x06, 0x99, 0x9b, 0x74, 0x11, 0x1e, 0xa4, 0xb6, 0xe7,
+ 0xed, 0xdf, 0xa8, 0xf8, 0x39, 0xb8, 0xae, 0xb0, 0xbe, 0x27, 0xde, 0x6e,
+ 0xe8, 0xf4, 0x03, 0x60, 0xe0, 0x0a, 0xc0, 0x90, 0x69, 0x99, 0xb7, 0x41,
+ 0x11, 0xea, 0x4b, 0x6e, 0x7e, 0xdd, 0xfa, 0x8f, 0x83, 0x9b, 0x8a, 0xeb,
+ 0x0b, 0xe2, 0x7d, 0xe6, 0xee, 0x8f, 0x40, 0x36, 0x0e, 0x00, 0xac, 0x09,
+ 0x06, 0x99, 0x9b, 0x74, 0x11, 0x1e, 0xa4, 0xb6, 0xe7, 0xed, 0xdf, 0xa8,
+ 0xf8, 0x39, 0xb8, 0xae, 0xb0, 0xbe, 0x27, 0xde, 0x6e, 0xe8, 0xf4, 0x03,
+ 0x60, 0xe0, 0x0a, 0xc0, 0x90, 0x69, 0x99, 0xb7, 0x41, 0x11, 0xea, 0x4b,
+ 0x6e, 0x7e, 0xdd, 0xfa, 0x8f, 0x83, 0x9b, 0x8a, 0xeb, 0x0b, 0xe2, 0x7d,
+ 0xe6, 0xee, 0x8f, 0x40, 0x36, 0x0e, 0x00, 0xac, 0x09, 0x06, 0x99, 0x9b,
+ 0x74, 0x11, 0x1e, 0xa4, 0xb6, 0xe7, 0xed, 0xdf, 0xa8, 0xf8, 0x39, 0xb8,
+ 0xae, 0xb0, 0xbe, 0x27, 0xde, 0x6e, 0xe8, 0xf4, 0x03, 0x60, 0xe0, 0x0a,
+ 0xc0, 0x90, 0x69, 0x99, 0xb7, 0x41, 0x11, 0xea, 0x4b, 0x6e, 0x7e, 0xdd,
+ 0xfa, 0x8f, 0x83, 0x9b, 0x8a, 0xeb, 0x0b, 0xe2, 0x7d, 0xe6, 0xee, 0x8f,
+ 0x40, 0x36, 0x0e, 0x00, 0xac, 0x09, 0x06, 0x99, 0x9b, 0x74, 0x11, 0x1e,
+ 0xa4, 0xb6, 0xe7, 0xed, 0xdf, 0xa8, 0xf8, 0x39, 0xb8, 0xae, 0xb0, 0xbe,
+ 0x27, 0xde, 0x6e, 0xe8, 0xf4, 0x03, 0x60, 0xe0, 0x0a, 0xc0, 0x90, 0x69,
+ 0x99, 0xb7, 0x41, 0x11, 0xea, 0x4b, 0x6e, 0x7e, 0xdd, 0xfa, 0x8f, 0x83,
+ 0x9b, 0x8a, 0xeb, 0x0b, 0xe2, 0x7d, 0xe6, 0xee, 0x8f, 0x40, 0x36, 0x0e,
+ 0x00, 0xac, 0x09, 0x06, 0x99, 0x9b, 0x74, 0x11, 0x1e, 0xa4, 0xb6, 0xe7,
+ 0xed, 0xdf, 0xa8, 0xf8, 0x39, 0xb8, 0xae, 0xb0, 0xbe, 0x27, 0xde, 0x6e,
+ 0xe8, 0xf4, 0x03, 0x60, 0xe0, 0x0a, 0xc0, 0x90, 0x69, 0x99, 0xb7, 0x41,
+ 0x11, 0xea, 0x4b, 0x6e, 0x7e, 0xdd, 0xfa, 0x8f, 0x83, 0x9b, 0x8a, 0xeb,
+ 0x0b, 0xe2, 0x7d, 0xe6, 0xee, 0x8f, 0x40, 0x36, 0x0e, 0x00, 0xac, 0x09,
+ 0x06, 0x99, 0x9b, 0x74, 0x11, 0x1e, 0xa4, 0xb6, 0xe7, 0xed, 0xdf, 0xa8,
+ 0xf8, 0x39, 0xb8, 0xae, 0xb0, 0xbe, 0x27, 0xde, 0x6e, 0xe8, 0xf4, 0x03,
+ 0x60, 0xe0, 0x0a, 0xc0, 0x90, 0x69, 0x99, 0xb7, 0x41, 0x11, 0xea, 0x4b,
+ 0x6e, 0x7e, 0xdd, 0xfa, 0x8f, 0x83, 0x9b, 0x8a, 0xeb, 0x0b, 0xe2, 0x7d,
+ 0xe6, 0xee, 0x8f, 0x40, 0x36, 0x0e, 0x00, 0xac, 0x09, 0x06, 0x99, 0x9b,
+ 0x74, 0x11, 0x1e, 0xa4, 0xb6, 0xe7, 0xed, 0xdf, 0xa8, 0xf8, 0x39, 0xb8,
+ 0xae, 0xb0, 0xbe, 0x27, 0xde, 0x6e, 0xe8, 0xf4, 0x03, 0x60, 0xe0, 0x0a,
+ 0xc0, 0x90, 0x69, 0x99, 0xb7, 0x41, 0x11, 0xea, 0x4b, 0x6e, 0x7e, 0xdd,
+ 0xfa, 0x8f, 0x83, 0x9b, 0x8a, 0xeb, 0x0b, 0xe2, 0x7d, 0xe6, 0xee, 0x8f,
+ 0x40, 0x36, 0x0e, 0x00, 0xac, 0x09, 0x06, 0x99, 0x9b, 0x74, 0x11, 0x1e,
+ 0xa4, 0xb6, 0xe7, 0xed, 0xdf, 0xa8, 0xf8, 0x39, 0xb8, 0xae, 0xb0, 0xbe,
+ 0x27, 0xde, 0x6e, 0xe8, 0xf4, 0x03, 0x60, 0xe0, 0x0a, 0xc0, 0x90, 0x69,
+ 0x99, 0xb7, 0x41, 0x11, 0xea, 0x4b, 0x6e, 0x7e, 0xdd, 0xfa, 0x8f, 0x83,
+ 0x9b, 0x8a, 0xeb, 0x0b, 0xe2, 0x7d, 0xe6, 0xee, 0x8f, 0x40, 0x36, 0x0e,
+ 0x00, 0xac, 0x09, 0x06, 0x99, 0x9b, 0x74, 0x11, 0x1e, 0xa4, 0xb6, 0xe7,
+ 0xed, 0xdf, 0xa8, 0xf8, 0x39, 0xb8, 0xae, 0xb0, 0xbe, 0x27, 0xde, 0x6e,
+ 0xe8, 0xf4, 0x03, 0x60, 0xe0, 0x0a, 0xc0, 0x90, 0x69, 0x99, 0xb7, 0x41,
+ 0x11, 0xea, 0x4b, 0x6e, 0x7e, 0xdd, 0xfa, 0x8f, 0x83, 0x9b, 0x8a, 0xeb,
+ 0x0b, 0xe2, 0x7d, 0xe6, 0xee, 0x8f, 0x40, 0x36, 0x0e, 0x00, 0xac, 0x09,
+ 0x06, 0x99, 0x9b, 0x74, 0x11, 0x1e, 0xa4, 0xb6, 0xe7, 0xed, 0xdf, 0xa8,
+ 0xf8, 0x39, 0xb8, 0xae, 0xb0, 0xbe, 0x27, 0xde, 0x6e, 0xe8, 0xf4, 0x03,
+ 0x60, 0xe0, 0x0a, 0xc0, 0x90, 0x69, 0x99, 0xb7, 0x41, 0x11, 0xea, 0x4b,
+ 0x6e, 0x7e, 0xdd, 0xfa, 0x8f, 0x83, 0x9b, 0x8a, 0xeb, 0x0b, 0xe2, 0x7d,
+ 0xe6, 0xee, 0x8f, 0x40, 0x36, 0x0e, 0x00, 0xac, 0x09, 0x06, 0x99, 0x9b,
+ 0x74, 0x11, 0x1e, 0xa4, 0xb6, 0xe7, 0xed, 0xdf, 0xa8, 0xf8, 0x39, 0xb8,
+ 0xae, 0xb0, 0xbe, 0x27, 0xde, 0x6e, 0xe8, 0xf4, 0x03, 0x60, 0xe0, 0x0a,
+ 0xc0, 0x90, 0x69, 0x99, 0xb7, 0x41, 0x11, 0xea, 0x4b, 0x6e, 0x7e, 0xdd,
+ 0xfa, 0x8f, 0x83, 0x9b, 0x8a, 0xeb, 0x0b, 0xe2, 0x7d, 0xe6, 0xee, 0x8f,
+ 0x40, 0x36, 0x0e, 0x00, 0xac, 0x09, 0x06, 0x99, 0x9b, 0x74, 0x11, 0x1e,
+ 0xa4, 0xb6, 0xe7, 0xed, 0xdf, 0xa8, 0xf8, 0x39, 0xb8, 0xae, 0xb0, 0xbe,
+ 0x27, 0xde, 0x6e, 0xe8, 0xf4, 0x03, 0x60, 0xe0, 0x0a, 0xc0, 0x90, 0x69,
+ 0x99, 0xb7, 0x41, 0x11, 0xea, 0x4b, 0x6e, 0x7e, 0xdd, 0xfa, 0x8f, 0x83,
+ 0x9b, 0x8a, 0xeb, 0x0b, 0xe2, 0x7d, 0xe6, 0xee, 0x8f, 0x40, 0x36, 0x0e,
+ 0x00, 0xac, 0x09, 0x06, 0x99, 0x9b, 0x74, 0x11, 0x1e, 0xa4, 0xb6, 0xe7,
+ 0xed, 0xdf, 0xa8, 0xf8, 0x39, 0xb8, 0xae, 0xb0, 0xbe, 0x27, 0xde, 0x6e,
+ 0xe8, 0xf4, 0x03, 0x60, 0xe0, 0x0a, 0xc0, 0x90, 0x69, 0x99, 0xb7, 0x41,
+ 0x11, 0xea, 0x4b, 0x6e, 0x7e, 0xdd, 0xfa, 0x8f, 0x83, 0x9b, 0x8a, 0xeb,
+ 0x0b, 0xe2, 0x7d, 0xe6, 0xee, 0x8f, 0x40, 0x36, 0x0e, 0x00, 0xac, 0x09,
+ 0x06, 0x99, 0x9b, 0x74, 0x11, 0x1e, 0xa4, 0xb6, 0xe7, 0xed, 0xdf, 0xa8,
+ 0xf8, 0x39, 0xb8, 0xae, 0xb0, 0xbe, 0x27, 0xde, 0x6e, 0xe8, 0xf4, 0x03,
+ 0x60, 0xe0, 0x0a, 0xc0, 0x90, 0x69, 0x99, 0xb7, 0x41, 0x11, 0xea, 0x4b,
+ 0x6e, 0x7e, 0xdd, 0xfa, 0x8f, 0x83, 0x9b, 0x8a, 0xeb, 0x0b, 0xe2, 0x7d,
+ 0xe6, 0xee, 0x8f, 0x40, 0x36, 0x0e, 0x00, 0xac, 0x09, 0x06, 0x99, 0x9b,
+ 0x74, 0x11, 0x1e, 0xa4, 0xb6, 0xe7, 0xed, 0xdf, 0xa8, 0xf8, 0x39, 0xb8,
+ 0xae, 0xb0, 0xbe, 0x27, 0xde, 0x6e, 0xe8, 0xf4, 0x03, 0x60, 0xe0, 0x0a,
+ 0xc0, 0x90, 0x69, 0x99, 0xb7, 0x41, 0x11, 0xea, 0x4b, 0x6e, 0x7e, 0xdd,
+ 0xfa, 0x8f, 0x83, 0x9b, 0x8a, 0xeb, 0x0b, 0xe2, 0x7d, 0xe6, 0xee, 0x8f,
+ 0x40, 0x36, 0x0e, 0x00, 0xac, 0x09, 0x06, 0x99, 0x9b, 0x74, 0x11, 0x1e,
+ 0xa4, 0xb6, 0xe7, 0xed, 0xdf, 0xa8, 0xf8, 0x39, 0xb8, 0xae, 0xb0, 0xbe,
+ 0x27, 0xde, 0x6e, 0xe8, 0xf4, 0x03, 0x60, 0xe0, 0x0a, 0xc0, 0x90, 0x69,
+ 0x99, 0xb7, 0x41, 0x11, 0xea, 0x4b, 0x6e, 0x7e, 0xdd, 0xfa, 0x8f, 0x83,
+ 0x9b, 0x8a, 0xeb, 0x0b, 0xe2, 0x7d, 0xe6, 0xee, 0x8f, 0x40, 0x36, 0x0e,
+ 0x00, 0xac, 0x09, 0x06, 0x99, 0x9b, 0x74, 0x11, 0x1e, 0xa4, 0xb6, 0xe7,
+ 0xed, 0xdf, 0xa8, 0xf8, 0x39, 0xb8, 0xae, 0xb0, 0xbe, 0x27, 0xde, 0x6e,
+ 0xe8, 0xf4, 0x03, 0x60, 0xe0, 0x0a, 0xc0, 0x90, 0x69, 0x99, 0xb7, 0x41,
+ 0x11, 0xea, 0x4b, 0x6e, 0x7e, 0xdd, 0xfa, 0x8f, 0x83, 0x9b, 0x8a, 0xeb,
+ 0x0b, 0xe2, 0x7d, 0xe6, 0xee, 0x8f, 0x40, 0x36, 0x0e, 0x00, 0xac, 0x09,
+ 0x06, 0x99, 0x9b, 0x74, 0x11, 0x1e, 0xa4, 0xb6, 0xe7, 0xed, 0xdf, 0xa8,
+ 0xf8, 0x39, 0xb8, 0xae, 0xb0, 0xbe, 0x27, 0xde, 0x6e, 0xe8, 0xf4, 0x03,
+ 0x60, 0xe0, 0x0a, 0xc0, 0x90, 0x69, 0x99, 0xb7, 0x41, 0x11, 0xea, 0x4b,
+ 0x6e, 0x7e, 0xdd, 0xfa, 0x8f, 0x83, 0x9b, 0x8a, 0xeb, 0x0b, 0xe2, 0x7d,
+ 0xe6, 0xee, 0x8f, 0x40, 0x36, 0x0e, 0x00, 0xac, 0x09, 0x06, 0x99, 0x9b,
+ 0x74, 0x11, 0x1e, 0xa4, 0xb6, 0xe7, 0xed, 0xdf, 0xa8, 0xf8, 0x39, 0xb8,
+ 0xae, 0xb0, 0xbe, 0x27, 0xde, 0x6e, 0xe8, 0xf4, 0x03, 0x60, 0xe0, 0x0a,
+ 0xc0, 0x98,
+};
+static_assert(sizeof(kBytesTestReadSymbol9) == kNumBytesTestReadSymbol9, "");
+
+// The kBytesTestReadSymbol10[] array was encoded by using the following libaom
+// code:
+//
+// aom_cdf_prob cdf[4][11] = {
+// // pmf: 1/10, 1/10, 1/10, 1/10, 1/10, 1/10, 1/10, 1/10, 1/10, 1/10
+// { 32768 - 3277, 32768 - 6554, 32768 - 9830, 32768 - 13107, 32768 - 16384,
+// 32768 - 19661, 32768 - 22938, 32768 - 26214, 32768 - 29491, 0, 0 },
+// // pmf: 3/20, 2/20, 2/20, 2/20, 2/20, 2/20, 2/20, 2/20, 2/20, 1/20
+// { 32768 - 4915, 32768 - 8192, 32768 - 11469, 32768 - 14746, 32768 - 18022,
+// 32768 - 21299, 32768 - 24576, 32768 - 27853, 32768 - 31130, 0, 0 },
+// // pmf: 1/20, 2/20, 2/20, 2/20, 2/20, 2/20, 2/20, 2/20, 2/20, 3/20
+// { 32768 - 1638, 32768 - 4915, 32768 - 8192, 32768 - 11469, 32768 - 14746,
+// 32768 - 18022, 32768 - 21299, 32768 - 24576, 32768 - 27853, 0, 0 },
+// // pmf: 1/20, 2/20, 2/20, 2/20, 3/20, 3/20, 2/20, 2/20, 2/20, 1/20
+// { 32768 - 1638, 32768 - 4915, 32768 - 8192, 32768 - 11469, 32768 - 16384,
+// 32768 - 21299, 32768 - 24576, 32768 - 27853, 32768 - 31130, 0, 0 },
+// };
+// constexpr int kSymbols[20][4] = { { 0, 5, 9, 4 }, //
+// { 1, 6, 8, 3 }, //
+// { 2, 7, 7, 2 }, //
+// { 3, 8, 6, 1 }, //
+// { 4, 9, 5, 0 }, //
+// { 5, 0, 4, 9 }, //
+// { 6, 1, 3, 8 }, //
+// { 7, 2, 2, 7 }, //
+// { 8, 3, 1, 6 }, //
+// { 9, 4, 0, 5 }, //
+// { 0, 0, 9, 7 }, //
+// { 2, 1, 8, 5 }, //
+// { 4, 3, 6, 3 }, //
+// { 6, 5, 4, 1 }, //
+// { 8, 7, 2, 8 }, //
+// { 1, 0, 9, 6 }, //
+// { 3, 2, 7, 4 }, //
+// { 5, 4, 5, 2 }, //
+// { 7, 6, 3, 5 }, //
+// { 9, 8, 1, 4 } };
+// const unsigned int kBufferSize = 65536;
+// uint8_t bw_buffer[kBufferSize];
+// aom_writer bw;
+// bw.allow_update_cdf = 1;
+// aom_start_encode(&bw, bw_buffer);
+// for (int i = 0; i < 96; ++i) {
+// for (int j = 0; j < 20; ++j) {
+// for (int k = 0; k < 4; ++k) {
+// aom_write_symbol(&bw, kSymbols[j][k], cdf[k], 10);
+// }
+// }
+// }
+// aom_stop_encode(&bw);
+// printf("constexpr size_t kNumBytes = %u;\n", bw.pos);
+// printf("constexpr uint8_t kBytes[] = {");
+// int count = 0;
+// for (unsigned int i = 0; i < bw.pos; ++i) {
+// if (count++ % 12 == 0) {
+// printf("\n ");
+// } else {
+// printf(" ");
+// }
+// printf("0x%02x,", bw_buffer[i]);
+// }
+// printf("\n};\n");
+
+constexpr size_t kNumBytesTestReadSymbol10 = 3204;
+constexpr uint8_t kBytesTestReadSymbol10[] = {
+ 0x10, 0x84, 0xe2, 0xe0, 0x0f, 0x08, 0xd6, 0x01, 0xd0, 0xaa, 0xd8, 0xb5,
+ 0x60, 0x4f, 0xb9, 0xb3, 0x73, 0x01, 0x8c, 0x92, 0xe6, 0xa0, 0xab, 0xe8,
+ 0xe4, 0x95, 0x85, 0x03, 0x5f, 0xbb, 0x3b, 0x1f, 0x27, 0xb1, 0x44, 0x95,
+ 0x50, 0x1f, 0xad, 0xc8, 0x35, 0xde, 0x44, 0xf3, 0xb6, 0x8d, 0xa2, 0x39,
+ 0xc3, 0xb6, 0xee, 0x3c, 0x10, 0x33, 0x27, 0x7a, 0x29, 0xcc, 0x7c, 0x08,
+ 0xcb, 0x94, 0xbe, 0xef, 0x96, 0x47, 0x30, 0x49, 0x47, 0x9c, 0xb7, 0x7e,
+ 0x23, 0x0c, 0x27, 0x8e, 0x1b, 0xdc, 0x6c, 0x92, 0x40, 0x98, 0xbf, 0x20,
+ 0xd4, 0x01, 0x72, 0x55, 0x8c, 0x3f, 0x3c, 0x76, 0x24, 0xd2, 0x2d, 0xba,
+ 0xa4, 0x54, 0x29, 0x80, 0xe9, 0x06, 0x2c, 0x68, 0xbd, 0xa7, 0xc5, 0xf7,
+ 0x44, 0xdf, 0x7e, 0x94, 0x90, 0x3f, 0x94, 0x7d, 0x9e, 0x36, 0xb8, 0x82,
+ 0x1d, 0x4a, 0x47, 0x1f, 0x6c, 0x29, 0x51, 0xd2, 0x84, 0xa8, 0xcd, 0x98,
+ 0xc0, 0xd2, 0xea, 0x4a, 0x25, 0x3c, 0xd7, 0x34, 0x64, 0x96, 0xd4, 0x06,
+ 0xed, 0x00, 0x98, 0xc3, 0x65, 0x10, 0xd4, 0xac, 0x6b, 0xab, 0xd7, 0x35,
+ 0x04, 0x89, 0xbf, 0x24, 0xcc, 0xfc, 0xc9, 0xe8, 0x87, 0x3d, 0xdb, 0x55,
+ 0xf0, 0xc9, 0x97, 0x71, 0x99, 0x00, 0x54, 0x50, 0x24, 0x66, 0xca, 0x24,
+ 0xfd, 0x1c, 0xb1, 0x71, 0x0e, 0xb5, 0x9c, 0x27, 0xfc, 0x7f, 0x95, 0x98,
+ 0xc8, 0x99, 0x9f, 0x9b, 0xc7, 0xf6, 0x69, 0xfa, 0xb2, 0x11, 0x77, 0x8d,
+ 0x02, 0x53, 0x32, 0x4e, 0x20, 0x2c, 0x21, 0x2b, 0x99, 0x9a, 0xec, 0x63,
+ 0x0b, 0xe2, 0x8f, 0x30, 0xf8, 0x3c, 0xd1, 0xb1, 0xbc, 0x52, 0x73, 0xce,
+ 0x85, 0x54, 0xdd, 0xe6, 0xf6, 0x9c, 0x2d, 0xca, 0x3d, 0xa8, 0x09, 0x34,
+ 0xa8, 0x41, 0x9c, 0x03, 0x78, 0xbc, 0x67, 0x11, 0x9f, 0xbe, 0xde, 0x9a,
+ 0x98, 0x8a, 0x8d, 0x0b, 0x88, 0x7f, 0xea, 0x82, 0x77, 0x61, 0x7a, 0xde,
+ 0xb0, 0xb1, 0x46, 0x8d, 0x23, 0x69, 0x2f, 0x17, 0x05, 0xff, 0x4a, 0x9e,
+ 0xf9, 0xb3, 0x9a, 0xd0, 0xc4, 0x81, 0xcf, 0xbc, 0xe6, 0x26, 0x2c, 0x37,
+ 0x55, 0xec, 0xdc, 0x23, 0x05, 0xdf, 0x30, 0xcf, 0x5a, 0x4a, 0x0c, 0x08,
+ 0xc0, 0xd7, 0x9d, 0x80, 0xc0, 0xa3, 0x56, 0x49, 0x41, 0xc4, 0xdd, 0xc5,
+ 0x69, 0x5c, 0xe5, 0x6c, 0xc5, 0xae, 0x4c, 0x95, 0x45, 0xf2, 0xf6, 0xd6,
+ 0x12, 0x25, 0xcc, 0x24, 0x56, 0x8c, 0x2b, 0x32, 0x51, 0x18, 0x1a, 0xec,
+ 0xb0, 0x62, 0x40, 0x82, 0x59, 0xb8, 0x38, 0x9f, 0x9f, 0x73, 0xf5, 0xb3,
+ 0xc3, 0x93, 0xa5, 0x4e, 0xab, 0x7f, 0x97, 0x56, 0x51, 0xb0, 0xff, 0x69,
+ 0x73, 0xc2, 0xd0, 0x60, 0x93, 0x59, 0x2f, 0xc7, 0x84, 0x14, 0x7e, 0x68,
+ 0xa7, 0x2b, 0x37, 0xb4, 0x2e, 0x69, 0x58, 0x55, 0x3c, 0xd2, 0xf1, 0xa8,
+ 0x2b, 0x6e, 0xd5, 0x11, 0x1c, 0x1d, 0x17, 0xd5, 0xf1, 0xfa, 0x8b, 0xd1,
+ 0x6c, 0xc2, 0x32, 0x9e, 0x66, 0x3e, 0x6a, 0x4a, 0x0e, 0xb8, 0xf9, 0xa8,
+ 0x1c, 0x23, 0xb1, 0x7e, 0xe7, 0xa0, 0x27, 0x5b, 0x1e, 0x8f, 0x8a, 0xb1,
+ 0x1e, 0x50, 0x99, 0x9c, 0x39, 0x5b, 0xa0, 0x76, 0xa2, 0x90, 0x20, 0xd5,
+ 0x61, 0xf8, 0x96, 0x5a, 0xbc, 0x91, 0x5d, 0xfc, 0x1e, 0xed, 0xea, 0xd8,
+ 0x10, 0x5d, 0x15, 0xfa, 0x2b, 0xa7, 0x77, 0xaf, 0xae, 0x64, 0xef, 0x06,
+ 0xa4, 0xf7, 0x65, 0x58, 0xb8, 0x64, 0x47, 0xcd, 0xfa, 0x12, 0x8e, 0x7d,
+ 0x5b, 0x96, 0x27, 0xda, 0xb9, 0x2a, 0x14, 0xfe, 0x3e, 0x57, 0xd7, 0x4e,
+ 0x86, 0xb3, 0x36, 0xd7, 0x77, 0x2d, 0xf6, 0x1e, 0xf3, 0xfd, 0xdb, 0x9a,
+ 0x92, 0x78, 0x0a, 0xa4, 0x17, 0xf1, 0x78, 0xfc, 0xc3, 0x6d, 0xa0, 0xf8,
+ 0x07, 0x6a, 0x68, 0xb1, 0x1b, 0x00, 0x27, 0x65, 0x68, 0x76, 0x10, 0x39,
+ 0x4b, 0x8a, 0x51, 0x7a, 0x53, 0x69, 0x79, 0xfc, 0xbc, 0xe6, 0xf4, 0x26,
+ 0xc3, 0xbf, 0x3a, 0x64, 0x56, 0x7d, 0x5f, 0x76, 0xa2, 0x42, 0xd1, 0xad,
+ 0x3f, 0xb8, 0xce, 0xfb, 0x79, 0x38, 0xf3, 0x85, 0x2a, 0x67, 0xf4, 0x71,
+ 0xfe, 0x0b, 0x79, 0xee, 0x85, 0xe0, 0x61, 0x9c, 0x9d, 0xd5, 0xe0, 0x0a,
+ 0xd7, 0xa6, 0x21, 0xc3, 0x60, 0xbf, 0xbd, 0x16, 0xca, 0xa0, 0x16, 0x9d,
+ 0xc4, 0x14, 0x99, 0x03, 0x7e, 0xe6, 0x62, 0x6e, 0xbe, 0x18, 0x45, 0x5e,
+ 0x15, 0x42, 0xac, 0x5b, 0x60, 0x9f, 0xbd, 0x1e, 0x8a, 0x58, 0x55, 0x75,
+ 0xcf, 0xbb, 0x12, 0xcb, 0xc2, 0xf4, 0x01, 0xfc, 0x96, 0x8d, 0x97, 0x67,
+ 0x94, 0x65, 0x6b, 0xd0, 0xeb, 0xff, 0x26, 0x30, 0x3a, 0xa0, 0xe9, 0x9b,
+ 0xa7, 0x5e, 0x81, 0x2b, 0x8e, 0xf7, 0xd6, 0xbf, 0x6f, 0xe4, 0x33, 0xd5,
+ 0xaa, 0x5a, 0x27, 0x18, 0x24, 0x76, 0x72, 0x72, 0x50, 0x72, 0x92, 0x88,
+ 0x9f, 0x88, 0x81, 0x0f, 0x33, 0xa7, 0x99, 0x83, 0x53, 0x03, 0x8c, 0x2d,
+ 0x36, 0x43, 0x52, 0x27, 0x27, 0x74, 0xcd, 0xf1, 0x1b, 0x76, 0x95, 0x11,
+ 0xdf, 0x4e, 0xb3, 0xa5, 0x2e, 0xe4, 0xac, 0x3a, 0xfd, 0x9f, 0xab, 0x96,
+ 0x7e, 0xb1, 0xf0, 0x19, 0x22, 0xc4, 0x06, 0x9b, 0xe7, 0xe2, 0xf8, 0xb4,
+ 0x17, 0xbd, 0x9d, 0x14, 0xac, 0x11, 0xc9, 0x79, 0x8e, 0x01, 0x23, 0xc9,
+ 0x6e, 0x5f, 0x96, 0x1e, 0x99, 0xe1, 0x19, 0x2c, 0xb1, 0x1b, 0x54, 0x30,
+ 0x3a, 0xb1, 0xe7, 0xbf, 0xbf, 0x17, 0x3d, 0x9b, 0x86, 0xd7, 0x4b, 0x68,
+ 0x46, 0xa6, 0xb0, 0x05, 0x66, 0x4b, 0x8a, 0xdc, 0x60, 0x60, 0x29, 0x95,
+ 0x35, 0x4b, 0x6f, 0xf5, 0x73, 0x51, 0x52, 0xb6, 0xec, 0xef, 0x74, 0xcb,
+ 0x0b, 0x00, 0x04, 0x15, 0xff, 0xb3, 0x13, 0xdd, 0x70, 0x5e, 0x65, 0xfc,
+ 0xa6, 0xb1, 0x13, 0x59, 0x29, 0xd0, 0x2e, 0xc4, 0x55, 0xcb, 0x99, 0xac,
+ 0xca, 0x48, 0x67, 0x3e, 0xfb, 0xfb, 0x54, 0xb7, 0x53, 0x32, 0xb4, 0x17,
+ 0xf6, 0x78, 0xd1, 0x64, 0x67, 0x76, 0x33, 0x3a, 0xe9, 0x13, 0x8c, 0x9c,
+ 0xf1, 0x74, 0xb7, 0xd1, 0x35, 0x41, 0xf2, 0x4d, 0x68, 0x53, 0x25, 0x57,
+ 0x97, 0x33, 0x18, 0xea, 0x96, 0xea, 0x66, 0x56, 0x82, 0xfe, 0xcf, 0x1a,
+ 0x2c, 0x8c, 0xee, 0xc6, 0x67, 0x5d, 0x22, 0x71, 0x93, 0x9e, 0x2e, 0x96,
+ 0xfa, 0x26, 0xa8, 0x3e, 0x49, 0xad, 0x0a, 0x64, 0xaa, 0xf2, 0xe6, 0x63,
+ 0x1d, 0x52, 0xfb, 0x67, 0x7e, 0x17, 0x91, 0x70, 0xef, 0x48, 0xe1, 0x2e,
+ 0x48, 0xe4, 0x8a, 0xc2, 0x4c, 0x5f, 0x77, 0x7f, 0x03, 0x45, 0xf0, 0x8d,
+ 0x44, 0xad, 0x1e, 0xef, 0xb5, 0x1f, 0x3c, 0x3c, 0x4e, 0x43, 0x87, 0xdd,
+ 0xec, 0xd9, 0x6e, 0xd0, 0xe8, 0x47, 0x75, 0x5b, 0xe5, 0xc0, 0x76, 0xb1,
+ 0x9c, 0x5b, 0x72, 0xeb, 0x15, 0x9c, 0x5a, 0xa1, 0x31, 0xc2, 0x46, 0xb4,
+ 0xe7, 0x9b, 0x5d, 0x86, 0x23, 0x3f, 0x47, 0xd9, 0x9b, 0x31, 0x4e, 0xa6,
+ 0x65, 0xe9, 0x2f, 0xa3, 0xf8, 0x34, 0x68, 0xf7, 0x61, 0xf5, 0x08, 0xc4,
+ 0x8a, 0x10, 0xa1, 0x9b, 0xa9, 0x30, 0x25, 0x8d, 0xaf, 0x67, 0x07, 0x8e,
+ 0x84, 0x62, 0xa5, 0xc3, 0x2f, 0x5d, 0x06, 0xaa, 0xd4, 0x02, 0x04, 0x77,
+ 0xed, 0xf4, 0xe0, 0xa9, 0xca, 0x95, 0xa2, 0x91, 0xe0, 0x56, 0x64, 0xb6,
+ 0xb8, 0x39, 0xda, 0x83, 0xc5, 0x10, 0x7e, 0xa6, 0x08, 0x10, 0x01, 0x15,
+ 0x2b, 0x6e, 0xce, 0xfe, 0x43, 0x01, 0xa9, 0xcb, 0xfd, 0xd9, 0x1b, 0x7e,
+ 0x11, 0x74, 0x96, 0x4a, 0x89, 0x3f, 0x07, 0xac, 0x74, 0xf9, 0x93, 0xb2,
+ 0xf6, 0xed, 0xb3, 0x29, 0xab, 0xc5, 0x0a, 0x90, 0xb3, 0x71, 0x51, 0xa5,
+ 0xba, 0x16, 0x01, 0xd4, 0x35, 0x11, 0xdc, 0xba, 0x27, 0xc3, 0x01, 0x05,
+ 0x65, 0x91, 0x6b, 0xff, 0x33, 0xb9, 0x9d, 0x84, 0xf7, 0xc0, 0x2d, 0x4b,
+ 0xf4, 0xb2, 0x39, 0xe4, 0x7d, 0x0f, 0xf6, 0x8d, 0xa4, 0x2c, 0xa2, 0x4d,
+ 0x4e, 0x8a, 0x2e, 0xff, 0x84, 0x5f, 0x43, 0x93, 0xa3, 0x43, 0xa2, 0xe3,
+ 0x23, 0x92, 0xf3, 0x57, 0xd2, 0x2e, 0x8e, 0xea, 0xff, 0x2c, 0x3d, 0x1f,
+ 0xc6, 0x94, 0x77, 0x19, 0xf6, 0xdb, 0x16, 0x4e, 0xd0, 0x3f, 0x32, 0xf3,
+ 0x7b, 0x89, 0x50, 0xc5, 0x5c, 0xfe, 0x86, 0xcf, 0xf6, 0x89, 0x88, 0xa3,
+ 0xa8, 0xd9, 0x52, 0x23, 0x68, 0x31, 0x90, 0xe2, 0xd4, 0x3a, 0x62, 0xb4,
+ 0xe6, 0x4e, 0xfa, 0x20, 0x21, 0xbf, 0xe5, 0x4e, 0x86, 0x6d, 0xbe, 0xbe,
+ 0xc6, 0x25, 0x4b, 0xf2, 0x20, 0x6c, 0x4e, 0xfc, 0x93, 0x41, 0x3f, 0x8b,
+ 0x29, 0x34, 0xb9, 0xd1, 0x61, 0xe0, 0x34, 0x83, 0x8e, 0x1f, 0x8c, 0x44,
+ 0xe2, 0x95, 0x2e, 0x73, 0x48, 0x8f, 0xeb, 0xd0, 0x6c, 0xec, 0xc4, 0xf6,
+ 0x48, 0x5e, 0xf7, 0x53, 0x3e, 0xa6, 0x77, 0x33, 0xb0, 0x9e, 0xf8, 0x05,
+ 0xa9, 0x7e, 0x96, 0x47, 0x3c, 0x8f, 0xa1, 0xfe, 0xd1, 0xb4, 0x85, 0x94,
+ 0x49, 0xa9, 0xd1, 0x45, 0xdf, 0xf0, 0x8b, 0xe8, 0x72, 0x74, 0x68, 0x74,
+ 0x5c, 0x67, 0xc2, 0xbb, 0xcd, 0x7b, 0x6a, 0x2f, 0x6b, 0x0a, 0x1d, 0xec,
+ 0x03, 0x48, 0xd2, 0x8e, 0xe3, 0x3e, 0xdb, 0x62, 0xc9, 0xda, 0x07, 0xe6,
+ 0x5e, 0x6f, 0x71, 0x2a, 0x18, 0xab, 0x9f, 0xd0, 0xd9, 0xfe, 0xd1, 0xac,
+ 0xf0, 0x21, 0xab, 0xd9, 0x70, 0x1e, 0xb9, 0x99, 0xa0, 0xcc, 0xeb, 0xe7,
+ 0x87, 0xee, 0xd9, 0x8e, 0xd0, 0xe5, 0xc0, 0x58, 0x75, 0x37, 0x3d, 0x03,
+ 0x4e, 0x18, 0x08, 0x27, 0xdd, 0x18, 0x38, 0x1b, 0xad, 0xf1, 0xd3, 0xcc,
+ 0xa1, 0x65, 0x26, 0x97, 0x3a, 0x2c, 0x3c, 0x06, 0x90, 0x71, 0xc3, 0xf1,
+ 0x88, 0x9c, 0x52, 0xa5, 0xce, 0x69, 0x11, 0xfd, 0x7a, 0x0d, 0x9d, 0x98,
+ 0x9e, 0xc9, 0x0b, 0xde, 0xea, 0x67, 0xd4, 0xce, 0xe6, 0x76, 0x13, 0xdf,
+ 0x00, 0xb5, 0x2f, 0xd2, 0xc8, 0xe7, 0x91, 0xf4, 0x3f, 0xda, 0x36, 0x90,
+ 0xb2, 0x89, 0x35, 0x3a, 0x28, 0xbb, 0xfe, 0x11, 0x7d, 0x0e, 0x4e, 0x8d,
+ 0x0e, 0x8b, 0x8c, 0xf8, 0x57, 0x79, 0xaf, 0x6d, 0x45, 0xed, 0x61, 0x43,
+ 0xbd, 0x80, 0x69, 0x1a, 0x51, 0xdc, 0x67, 0xdb, 0x6c, 0x59, 0x3b, 0x40,
+ 0xfc, 0xcb, 0xcd, 0xee, 0x25, 0x43, 0x15, 0x73, 0xfa, 0x1b, 0x3f, 0xda,
+ 0x35, 0x9e, 0x04, 0x35, 0x7b, 0x2e, 0x03, 0xd7, 0x33, 0x34, 0x19, 0x9d,
+ 0x7c, 0xf0, 0xfd, 0xdb, 0x31, 0xda, 0x1c, 0xb8, 0x0b, 0x0e, 0xa6, 0xe7,
+ 0xa0, 0x69, 0xc3, 0x01, 0x04, 0xfb, 0xa3, 0x07, 0x03, 0x75, 0xbe, 0x3a,
+ 0x79, 0x94, 0x2c, 0xa4, 0xd2, 0xe7, 0x45, 0x87, 0x80, 0xd2, 0x0e, 0x38,
+ 0x7e, 0x31, 0x13, 0x8a, 0x54, 0xb9, 0xcd, 0x22, 0x3f, 0xaf, 0x41, 0xb3,
+ 0xb3, 0x13, 0xd9, 0x21, 0x7b, 0xdd, 0x4c, 0xfa, 0x99, 0xdc, 0xce, 0xc2,
+ 0x7b, 0xe0, 0x16, 0xa5, 0xfa, 0x59, 0x1c, 0xf2, 0x3e, 0x87, 0xfb, 0x46,
+ 0xd2, 0x16, 0x51, 0x26, 0xa7, 0x45, 0x17, 0x7f, 0xc2, 0x2f, 0xa1, 0xc9,
+ 0xd1, 0xa1, 0xd1, 0x71, 0x9f, 0x0a, 0xef, 0x35, 0xed, 0xa8, 0xbd, 0xac,
+ 0x28, 0x77, 0xb0, 0x0d, 0x23, 0x4a, 0x3b, 0x8c, 0xfb, 0x6d, 0x8b, 0x27,
+ 0x68, 0x1f, 0x99, 0x79, 0xbd, 0xc4, 0xa8, 0x62, 0xae, 0x7f, 0x43, 0x67,
+ 0xfb, 0x46, 0xb3, 0xc0, 0x86, 0xaf, 0x65, 0xc0, 0x7a, 0xe6, 0x66, 0x83,
+ 0x33, 0xaf, 0x9e, 0x1f, 0xbb, 0x66, 0x3b, 0x43, 0x97, 0x01, 0x61, 0xd4,
+ 0xdc, 0xf4, 0x0d, 0x38, 0x60, 0x20, 0x9f, 0x74, 0x60, 0xe0, 0x6e, 0xb7,
+ 0xc7, 0x4f, 0x32, 0x85, 0x94, 0x9a, 0x5c, 0xe8, 0xb0, 0xf0, 0x1a, 0x41,
+ 0xc7, 0x0f, 0xc6, 0x22, 0x71, 0x4a, 0x97, 0x39, 0xa4, 0x47, 0xf5, 0xe8,
+ 0x36, 0x76, 0x62, 0x7b, 0x24, 0x2f, 0x7b, 0xa9, 0x9f, 0x53, 0x3b, 0x99,
+ 0xd8, 0x4f, 0x7c, 0x02, 0xd4, 0xbf, 0x4b, 0x23, 0x9e, 0x47, 0xd0, 0xff,
+ 0x68, 0xda, 0x42, 0xca, 0x24, 0xd4, 0xe8, 0xa2, 0xef, 0xf8, 0x45, 0xf4,
+ 0x39, 0x3a, 0x34, 0x3a, 0x2e, 0x33, 0xe1, 0x5d, 0xe6, 0xbd, 0xb5, 0x17,
+ 0xb5, 0x85, 0x0e, 0xf6, 0x01, 0xa4, 0x69, 0x47, 0x71, 0x9f, 0x6d, 0xb1,
+ 0x64, 0xed, 0x03, 0xf3, 0x2f, 0x37, 0xb8, 0x95, 0x0c, 0x55, 0xcf, 0xe8,
+ 0x6c, 0xff, 0x68, 0xd6, 0x78, 0x10, 0xd5, 0xec, 0xb8, 0x0f, 0x5c, 0xcc,
+ 0xd0, 0x66, 0x75, 0xf3, 0xc3, 0xf7, 0x6c, 0xc7, 0x68, 0x72, 0xe0, 0x2c,
+ 0x3a, 0x9b, 0x9e, 0x81, 0xa7, 0x0c, 0x04, 0x13, 0xee, 0x8c, 0x1c, 0x0d,
+ 0xd6, 0xf8, 0xe9, 0xe6, 0x50, 0xb2, 0x93, 0x4b, 0x9d, 0x16, 0x1e, 0x03,
+ 0x48, 0x38, 0xe1, 0xf8, 0xc4, 0x4e, 0x29, 0x52, 0xe7, 0x34, 0x88, 0xfe,
+ 0xbd, 0x06, 0xce, 0xcc, 0x4f, 0x64, 0x85, 0xef, 0x75, 0x33, 0xea, 0x67,
+ 0x73, 0x3b, 0x09, 0xef, 0x80, 0x5a, 0x97, 0xe9, 0x64, 0x73, 0xc8, 0xfa,
+ 0x1f, 0xed, 0x1b, 0x48, 0x59, 0x44, 0x9a, 0x9d, 0x14, 0x5d, 0xff, 0x08,
+ 0xbe, 0x87, 0x27, 0x46, 0x87, 0x45, 0xc6, 0x7c, 0x2b, 0xbc, 0xd7, 0xb6,
+ 0xa2, 0xf6, 0xb0, 0xa1, 0xde, 0xc0, 0x34, 0x8d, 0x28, 0xee, 0x33, 0xed,
+ 0xb6, 0x2c, 0x9d, 0xa0, 0x7e, 0x65, 0xe6, 0xf7, 0x12, 0xa1, 0x8a, 0xb9,
+ 0xfd, 0x0d, 0x9f, 0xed, 0x1a, 0xcf, 0x02, 0x1a, 0xbd, 0x97, 0x01, 0xeb,
+ 0x99, 0x9a, 0x0c, 0xce, 0xbe, 0x78, 0x7e, 0xed, 0x98, 0xed, 0x0e, 0x5c,
+ 0x05, 0x87, 0x53, 0x73, 0xd0, 0x34, 0xe1, 0x80, 0x82, 0x7d, 0xd1, 0x83,
+ 0x81, 0xba, 0xdf, 0x1d, 0x3c, 0xca, 0x16, 0x52, 0x69, 0x73, 0xa2, 0xc3,
+ 0xc0, 0x69, 0x07, 0x1c, 0x3f, 0x18, 0x89, 0xc5, 0x2a, 0x5c, 0xe6, 0x91,
+ 0x1f, 0xd7, 0xa0, 0xd9, 0xd9, 0x89, 0xec, 0x90, 0xbd, 0xee, 0xa6, 0x7d,
+ 0x4c, 0xee, 0x67, 0x61, 0x3d, 0xf0, 0x0b, 0x52, 0xfd, 0x2c, 0x8e, 0x79,
+ 0x1f, 0x43, 0xfd, 0xa3, 0x69, 0x0b, 0x28, 0x93, 0x53, 0xa2, 0x8b, 0xbf,
+ 0xe1, 0x17, 0xd0, 0xe4, 0xe8, 0xd0, 0xe8, 0xb8, 0xcf, 0x85, 0x77, 0x9a,
+ 0xf6, 0xd4, 0x5e, 0xd6, 0x14, 0x3b, 0xd8, 0x06, 0x91, 0xa5, 0x1d, 0xc6,
+ 0x7d, 0xb6, 0xc5, 0x93, 0xb4, 0x0f, 0xcc, 0xbc, 0xde, 0xe2, 0x54, 0x31,
+ 0x57, 0x3f, 0xa1, 0xb3, 0xfd, 0xa3, 0x59, 0xe0, 0x43, 0x57, 0xb2, 0xe0,
+ 0x3d, 0x73, 0x33, 0x41, 0x99, 0xd7, 0xcf, 0x0f, 0xdd, 0xb3, 0x1d, 0xa1,
+ 0xcb, 0x80, 0xb0, 0xea, 0x6e, 0x7a, 0x06, 0x9c, 0x30, 0x10, 0x4f, 0xba,
+ 0x30, 0x70, 0x37, 0x5b, 0xe3, 0xa7, 0x99, 0x42, 0xca, 0x4d, 0x2e, 0x74,
+ 0x58, 0x78, 0x0d, 0x20, 0xe3, 0x87, 0xe3, 0x11, 0x38, 0xa5, 0x4b, 0x9c,
+ 0xd2, 0x23, 0xfa, 0xf4, 0x1b, 0x3b, 0x31, 0x3d, 0x92, 0x17, 0xbd, 0xd4,
+ 0xcf, 0xa9, 0x9d, 0xcc, 0xec, 0x27, 0xbe, 0x01, 0x6a, 0x5f, 0xa5, 0x91,
+ 0xcf, 0x23, 0xe8, 0x7f, 0xb4, 0x6d, 0x21, 0x65, 0x12, 0x6a, 0x74, 0x51,
+ 0x77, 0xfc, 0x22, 0xfa, 0x1c, 0x9d, 0x1a, 0x1d, 0x17, 0x19, 0xf0, 0xae,
+ 0xf3, 0x5e, 0xda, 0x8b, 0xda, 0xc2, 0x87, 0x7b, 0x00, 0xd2, 0x34, 0xa3,
+ 0xb8, 0xcf, 0xb6, 0xd8, 0xb2, 0x76, 0x81, 0xf9, 0x97, 0x9b, 0xdc, 0x4a,
+ 0x86, 0x2a, 0xe7, 0xf4, 0x36, 0x7f, 0xb4, 0x6b, 0x3c, 0x08, 0x6a, 0xf6,
+ 0x5c, 0x07, 0xae, 0x66, 0x68, 0x33, 0x3a, 0xf9, 0xe1, 0xfb, 0xb6, 0x63,
+ 0xb4, 0x39, 0x70, 0x16, 0x1d, 0x4d, 0xcf, 0x40, 0xd3, 0x86, 0x02, 0x09,
+ 0xf7, 0x46, 0x0e, 0x06, 0xda, 0x64, 0x9a, 0x09, 0xfc, 0x59, 0x49, 0xa5,
+ 0xce, 0x8b, 0x0f, 0x01, 0xa4, 0x1c, 0x70, 0xfc, 0x62, 0x27, 0x14, 0xa9,
+ 0x73, 0x9a, 0x44, 0x7f, 0x5e, 0x83, 0x67, 0x66, 0x27, 0xb2, 0x42, 0xf7,
+ 0xba, 0x97, 0x1c, 0xed, 0x58, 0x51, 0xe8, 0xc8, 0xed, 0xfd, 0x74, 0x54,
+ 0x95, 0x92, 0xa1, 0xa0, 0xf0, 0xf1, 0x39, 0x0e, 0x1f, 0x77, 0xb3, 0x66,
+ 0xb2, 0x83, 0x37, 0x4e, 0x1a, 0xd0, 0x2f, 0x9b, 0xb4, 0xe3, 0x30, 0x75,
+ 0xf5, 0x52, 0x42, 0x65, 0xe3, 0x9e, 0x7d, 0x6b, 0x83, 0x5c, 0xcd, 0xca,
+ 0xad, 0x28, 0x53, 0xbe, 0xb6, 0xad, 0x46, 0x20, 0x88, 0x54, 0xe4, 0x49,
+ 0x1d, 0xee, 0xcb, 0x36, 0x69, 0x66, 0x09, 0xa8, 0x9d, 0xe4, 0x9a, 0x09,
+ 0xfc, 0x59, 0x49, 0xa5, 0xce, 0x8b, 0x0f, 0x01, 0xa4, 0x1c, 0x70, 0xfc,
+ 0x62, 0x27, 0x14, 0xa9, 0x73, 0x9a, 0x44, 0x7f, 0x5e, 0x83, 0x67, 0x66,
+ 0x27, 0xb2, 0x42, 0xf7, 0xba, 0x97, 0x1c, 0xed, 0x58, 0x51, 0xe8, 0xc8,
+ 0xed, 0xfd, 0x74, 0x54, 0x95, 0x92, 0xa1, 0xa0, 0xf0, 0xf1, 0x39, 0x0e,
+ 0x1f, 0x77, 0xb3, 0x66, 0xb2, 0x83, 0x37, 0x4e, 0x1a, 0xd0, 0x2f, 0x9b,
+ 0xb4, 0xe3, 0x30, 0x75, 0xf5, 0x52, 0x42, 0x65, 0xe3, 0x9e, 0x7d, 0x6b,
+ 0x83, 0x5c, 0xcd, 0xca, 0xad, 0x28, 0x53, 0xbe, 0xb6, 0xad, 0x46, 0x20,
+ 0x88, 0x54, 0xe4, 0x49, 0x1d, 0xee, 0xcb, 0x36, 0x69, 0x66, 0x09, 0xa8,
+ 0x9d, 0xe4, 0x9a, 0x09, 0xfc, 0x59, 0x49, 0xa5, 0xce, 0x8b, 0x0f, 0x01,
+ 0xa4, 0x1c, 0x70, 0xfc, 0x62, 0x27, 0x14, 0xa9, 0x73, 0x9a, 0x44, 0x7f,
+ 0x5e, 0x83, 0x67, 0x66, 0x27, 0xb2, 0x42, 0xf7, 0xba, 0x97, 0x1c, 0xed,
+ 0x58, 0x51, 0xe8, 0xc8, 0xed, 0xfd, 0x74, 0x54, 0x95, 0x92, 0xa1, 0xa0,
+ 0xf0, 0xf1, 0x39, 0x0e, 0x1f, 0x77, 0xb3, 0x66, 0xb2, 0x83, 0x37, 0x4e,
+ 0x1a, 0xd0, 0x2f, 0x9b, 0xb4, 0xe3, 0x30, 0x75, 0xf5, 0x52, 0x42, 0x65,
+ 0xe3, 0x9e, 0x7d, 0x6b, 0x83, 0x5c, 0xcd, 0xca, 0xad, 0x28, 0x53, 0xbe,
+ 0xb6, 0xad, 0x46, 0x20, 0x88, 0x54, 0xe4, 0x49, 0x1d, 0xee, 0xcb, 0x36,
+ 0x69, 0x66, 0x09, 0xa8, 0x9d, 0xe4, 0x9a, 0x09, 0xfc, 0x59, 0x49, 0xa5,
+ 0xce, 0x8b, 0x0f, 0x01, 0xa4, 0x1c, 0x70, 0xfc, 0x62, 0x27, 0x14, 0xa9,
+ 0x73, 0x9a, 0x44, 0x7f, 0x5e, 0x83, 0x67, 0x66, 0x27, 0xb2, 0x42, 0xf7,
+ 0xba, 0x97, 0x1c, 0xed, 0x58, 0x51, 0xe8, 0xc8, 0xed, 0xfd, 0x74, 0x54,
+ 0x95, 0x92, 0xa1, 0xa0, 0xf0, 0xf1, 0x39, 0x0e, 0x1f, 0x77, 0xb3, 0x66,
+ 0xb2, 0x83, 0x37, 0x4e, 0x1a, 0xd0, 0x2f, 0x9b, 0xb4, 0xe3, 0x30, 0x75,
+ 0xf5, 0x52, 0x42, 0x65, 0xe3, 0x9e, 0x7d, 0x6b, 0x83, 0x5c, 0xcd, 0xca,
+ 0xad, 0x28, 0x53, 0xbe, 0xb6, 0xad, 0x46, 0x20, 0x88, 0x54, 0xe4, 0x49,
+ 0x1d, 0xee, 0xcb, 0x36, 0x69, 0x66, 0x09, 0xa8, 0x9d, 0xe4, 0x9a, 0x09,
+ 0xfc, 0x59, 0x49, 0xa5, 0xce, 0x8b, 0x0f, 0x01, 0xa4, 0x1c, 0x70, 0xfc,
+ 0x62, 0x27, 0x14, 0xa9, 0x73, 0x9a, 0x44, 0x7f, 0x5e, 0x83, 0x67, 0x66,
+ 0x27, 0xb2, 0x42, 0xf7, 0xba, 0x97, 0x1c, 0xed, 0x58, 0x51, 0xe8, 0xc8,
+ 0xed, 0xfd, 0x74, 0x54, 0x95, 0x92, 0xa1, 0xa0, 0xf0, 0xf1, 0x39, 0x0e,
+ 0x1f, 0x77, 0xb3, 0x66, 0xb2, 0x83, 0x37, 0x4e, 0x1a, 0xd0, 0x2f, 0x9b,
+ 0xb4, 0xe3, 0x30, 0x75, 0xf5, 0x52, 0x42, 0x65, 0xe3, 0x9e, 0x7d, 0x6b,
+ 0x83, 0x5c, 0xcd, 0xca, 0xad, 0x28, 0x53, 0xbe, 0xb6, 0xad, 0x46, 0x20,
+ 0x88, 0x54, 0xe4, 0x49, 0x1d, 0xee, 0xcb, 0x36, 0x69, 0x66, 0x09, 0xa8,
+ 0x9d, 0xe4, 0x9a, 0x09, 0xfc, 0x59, 0x49, 0xa5, 0xce, 0x8b, 0x0f, 0x01,
+ 0xa4, 0x1c, 0x70, 0xfc, 0x62, 0x27, 0x14, 0xa9, 0x73, 0x9a, 0x44, 0x7f,
+ 0x5e, 0x83, 0x67, 0x66, 0x27, 0xb2, 0x42, 0xf7, 0xba, 0x97, 0x1c, 0xed,
+ 0x58, 0x51, 0xe8, 0xc8, 0xed, 0xfd, 0x74, 0x54, 0x95, 0x92, 0xa1, 0xa0,
+ 0xf0, 0xf1, 0x39, 0x0e, 0x1f, 0x77, 0xb3, 0x66, 0xb2, 0x83, 0x37, 0x4e,
+ 0x1a, 0xd0, 0x2f, 0x9b, 0xb4, 0xe3, 0x30, 0x75, 0xf5, 0x52, 0x42, 0x65,
+ 0xe3, 0x9e, 0x7d, 0x6b, 0x83, 0x5c, 0xcd, 0xca, 0xad, 0x28, 0x53, 0xbe,
+ 0xb6, 0xad, 0x46, 0x20, 0x88, 0x54, 0xe4, 0x49, 0x1d, 0xee, 0xcb, 0x36,
+ 0x69, 0x66, 0x09, 0xa8, 0x9d, 0xe4, 0x9a, 0x09, 0xfc, 0x59, 0x49, 0xa5,
+ 0xce, 0x8b, 0x0f, 0x01, 0xa4, 0x1c, 0x70, 0xfc, 0x62, 0x27, 0x14, 0xa9,
+ 0x73, 0x9a, 0x44, 0x7f, 0x5e, 0x83, 0x67, 0x66, 0x27, 0xb2, 0x42, 0xf7,
+ 0xba, 0x97, 0x1c, 0xed, 0x58, 0x51, 0xe8, 0xc8, 0xed, 0xfd, 0x74, 0x54,
+ 0x95, 0x92, 0xa1, 0xa0, 0xf0, 0xf1, 0x39, 0x0e, 0x1f, 0x77, 0xb3, 0x66,
+ 0xb2, 0x83, 0x37, 0x4e, 0x1a, 0xd0, 0x2f, 0x9b, 0xb4, 0xe3, 0x30, 0x75,
+ 0xf5, 0x52, 0x42, 0x65, 0xe3, 0x9e, 0x7d, 0x6b, 0x83, 0x5c, 0xcd, 0xca,
+ 0xad, 0x28, 0x53, 0xbe, 0xb6, 0xad, 0x46, 0x20, 0x88, 0x54, 0xe4, 0x49,
+ 0x1d, 0xee, 0xcb, 0x36, 0x69, 0x66, 0x09, 0xa8, 0x9d, 0xe4, 0x9a, 0x09,
+ 0xfc, 0x59, 0x49, 0xa5, 0xce, 0x8b, 0x0f, 0x01, 0xa4, 0x1c, 0x70, 0xfc,
+ 0x62, 0x27, 0x14, 0xa9, 0x73, 0x9a, 0x44, 0x7f, 0x5e, 0x83, 0x67, 0x66,
+ 0x27, 0xb2, 0x42, 0xf7, 0xba, 0x97, 0x1c, 0xed, 0x58, 0x51, 0xe8, 0xc8,
+ 0xed, 0xfd, 0x74, 0x54, 0x95, 0x92, 0xa1, 0xa0, 0xf0, 0xf1, 0x39, 0x0e,
+ 0x1f, 0x77, 0xb3, 0x66, 0xb2, 0x83, 0x37, 0x4e, 0x1a, 0xd0, 0x2f, 0x9b,
+ 0xb4, 0xe3, 0x30, 0x75, 0xf5, 0x52, 0x42, 0x65, 0xe3, 0x9e, 0x7d, 0x6b,
+ 0x83, 0x5c, 0xcd, 0xca, 0xad, 0x28, 0x53, 0xbe, 0xb6, 0xad, 0x46, 0x20,
+ 0x88, 0x54, 0xe4, 0x49, 0x1d, 0xee, 0xcb, 0x36, 0x69, 0x66, 0x09, 0xa8,
+ 0x9d, 0xe4, 0x9a, 0x09, 0xfc, 0x59, 0x49, 0xa5, 0xce, 0x8b, 0x0f, 0x01,
+ 0xa4, 0x1c, 0x70, 0xfc, 0x62, 0x27, 0x14, 0xa9, 0x73, 0x9a, 0x44, 0x7f,
+ 0x5e, 0x83, 0x67, 0x66, 0x27, 0xb2, 0x42, 0xf7, 0xba, 0x97, 0x1c, 0xed,
+ 0x58, 0x51, 0xe8, 0xc8, 0xed, 0xfd, 0x74, 0x54, 0x95, 0x92, 0xa1, 0xa0,
+ 0xf0, 0xf1, 0x39, 0x0e, 0x1f, 0x77, 0xb3, 0x66, 0xb2, 0x83, 0x37, 0x4e,
+ 0x1a, 0xd0, 0x2f, 0x9b, 0xb4, 0xe3, 0x30, 0x75, 0xf5, 0x52, 0x42, 0x65,
+ 0xe3, 0x9e, 0x7d, 0x6b, 0x83, 0x5c, 0xcd, 0xca, 0xad, 0x28, 0x53, 0xbe,
+ 0xb6, 0xad, 0x46, 0x20, 0x88, 0x54, 0xe4, 0x49, 0x1d, 0xee, 0xcb, 0x36,
+ 0x69, 0x66, 0x09, 0xa8, 0x9d, 0xe4, 0x9a, 0x09, 0xfc, 0x59, 0x49, 0xa5,
+ 0xce, 0x8b, 0x0f, 0x01, 0xa4, 0x1c, 0x70, 0xfc, 0x62, 0x27, 0x14, 0xa9,
+ 0x73, 0x9a, 0x44, 0x7f, 0x5e, 0x83, 0x67, 0x66, 0x27, 0xb2, 0x42, 0xf7,
+ 0xba, 0x97, 0x1c, 0xed, 0x58, 0x51, 0xe8, 0xc8, 0xed, 0xfd, 0x74, 0x54,
+ 0x95, 0x92, 0xa1, 0xa0, 0xf0, 0xf1, 0x39, 0x0e, 0x1f, 0x77, 0xb3, 0x66,
+ 0xb2, 0x83, 0x37, 0x4e, 0x1a, 0xd0, 0x2f, 0x9b, 0xb4, 0xe3, 0x30, 0x75,
+ 0xf5, 0x52, 0x42, 0x65, 0xe3, 0x9e, 0x7d, 0x6b, 0x83, 0x5c, 0xcd, 0xca,
+ 0xad, 0x28, 0x53, 0xbe, 0xb6, 0xad, 0x46, 0x20, 0x88, 0x54, 0xe4, 0x49,
+ 0x1d, 0xee, 0xcb, 0x36, 0x69, 0x66, 0x09, 0xa8, 0x9d, 0xe4, 0x9a, 0x09,
+ 0xfc, 0x59, 0x49, 0xa5, 0xce, 0x8b, 0x0f, 0x01, 0xa4, 0x1c, 0x70, 0xfc,
+ 0x62, 0x27, 0x14, 0xa9, 0x73, 0x9a, 0x44, 0x7f, 0x5e, 0x83, 0x67, 0x66,
+ 0x27, 0xb2, 0x42, 0xf7, 0xba, 0x97, 0x1c, 0xed, 0x58, 0x51, 0xe8, 0xc8,
+ 0xed, 0xfd, 0x74, 0x54, 0x95, 0x92, 0xa1, 0xa0, 0xf0, 0xf1, 0x39, 0x0e,
+ 0x1f, 0x77, 0xb3, 0x66, 0xb2, 0x83, 0x37, 0x4e, 0x1a, 0xd0, 0x2f, 0x9b,
+ 0xb4, 0xe3, 0x30, 0x75, 0xf5, 0x52, 0x42, 0x65, 0xe3, 0x9e, 0x7d, 0x6b,
+ 0x83, 0x5c, 0xcd, 0xca, 0xad, 0x28, 0x53, 0xbe, 0xb6, 0xad, 0x46, 0x20,
+ 0x88, 0x54, 0xe4, 0x49, 0x1d, 0xee, 0xcb, 0x36, 0x69, 0x66, 0x09, 0xa8,
+ 0x9d, 0xe4, 0x9a, 0x09, 0xfc, 0x59, 0x49, 0xa5, 0xce, 0x8b, 0x0f, 0x01,
+ 0xa4, 0x1c, 0x70, 0xfc, 0x62, 0x27, 0x14, 0xa9, 0x73, 0x9a, 0x44, 0x7f,
+ 0x5e, 0x83, 0x67, 0x66, 0x27, 0xb2, 0x42, 0xf7, 0xba, 0x97, 0x1d, 0x80,
+};
+static_assert(sizeof(kBytesTestReadSymbol10) == kNumBytesTestReadSymbol10, "");
+
+// The kBytesTestReadSymbol11[] array was encoded by using the following libaom
+// code:
+//
+// aom_cdf_prob cdf[4][12] = {
+// // pmf: 1/11, 1/11, 1/11, 1/11, 1/11, 1/11, 1/11, 1/11, 1/11, 1/11, 1/11
+// { 32768 - 2979, 32768 - 5958, 32768 - 8937, 32768 - 11916, 32768 - 14895,
+// 32768 - 17873, 32768 - 20852, 32768 - 23831, 32768 - 26810,
+// 32768 - 29789, 0, 0 },
+// // pmf: 3/22, 2/22, 2/22, 2/22, 2/22, 2/22, 2/22, 2/22, 2/22, 2/22, 1/22
+// { 32768 - 4468, 32768 - 7447, 32768 - 10426, 32768 - 13405, 32768 - 16384,
+// 32768 - 19363, 32768 - 22342, 32768 - 25321, 32768 - 28300,
+// 32768 - 31279, 0, 0 },
+// // pmf: 1/22, 2/22, 2/22, 2/22, 2/22, 2/22, 2/22, 2/22, 2/22, 2/22, 3/22
+// { 32768 - 1489, 32768 - 4468, 32768 - 7447, 32768 - 10426, 32768 - 13405,
+// 32768 - 16384, 32768 - 19363, 32768 - 22342, 32768 - 25321,
+// 32768 - 28300, 0, 0 },
+// // pmf: 1/22, 2/22, 2/22, 2/22, 2/22, 4/22, 2/22, 2/22, 2/22, 2/22, 1/22
+// { 32768 - 1489, 32768 - 4468, 32768 - 7447, 32768 - 10426, 32768 - 13405,
+// 32768 - 19363, 32768 - 22342, 32768 - 25321, 32768 - 28300,
+// 32768 - 31279, 0, 0 },
+// };
+// constexpr int kSymbols[22][4] = { { 0, 6, 10, 5 }, //
+// { 1, 7, 9, 4 }, //
+// { 2, 8, 8, 3 }, //
+// { 3, 9, 7, 2 }, //
+// { 4, 10, 6, 1 }, //
+// { 5, 0, 5, 0 }, //
+// { 6, 1, 4, 10 }, //
+// { 7, 2, 3, 9 }, //
+// { 8, 3, 2, 8 }, //
+// { 9, 4, 1, 7 }, //
+// { 10, 5, 0, 6 }, //
+// { 0, 0, 10, 9 }, //
+// { 2, 1, 8, 7 }, //
+// { 4, 3, 6, 5 }, //
+// { 6, 5, 4, 3 }, //
+// { 8, 7, 2, 1 }, //
+// { 10, 9, 10, 8 }, //
+// { 1, 0, 9, 6 }, //
+// { 3, 2, 7, 4 }, //
+// { 5, 4, 5, 2 }, //
+// { 7, 6, 3, 5 }, //
+// { 9, 8, 1, 5 } };
+// const unsigned int kBufferSize = 65536;
+// uint8_t bw_buffer[kBufferSize];
+// aom_writer bw;
+// bw.allow_update_cdf = 1;
+// aom_start_encode(&bw, bw_buffer);
+// for (int i = 0; i < 96; ++i) {
+// for (int j = 0; j < 22; ++j) {
+// for (int k = 0; k < 4; ++k) {
+// aom_write_symbol(&bw, kSymbols[j][k], cdf[k], 11);
+// }
+// }
+// }
+// aom_stop_encode(&bw);
+// printf("constexpr size_t kNumBytes = %u;\n", bw.pos);
+// printf("constexpr uint8_t kBytes[] = {");
+// int count = 0;
+// for (unsigned int i = 0; i < bw.pos; ++i) {
+// if (count++ % 12 == 0) {
+// printf("\n ");
+// } else {
+// printf(" ");
+// }
+// printf("0x%02x,", bw_buffer[i]);
+// }
+// printf("\n};\n");
+
+constexpr size_t kNumBytesTestReadSymbol11 = 3673;
+constexpr uint8_t kBytesTestReadSymbol11[] = {
+ 0x0f, 0xb4, 0x93, 0xdb, 0xbe, 0x10, 0xa5, 0x0b, 0xa6, 0x53, 0x86, 0x25,
+ 0xaf, 0x5e, 0xf9, 0xd6, 0x10, 0xd8, 0x5e, 0x2b, 0x6d, 0xf2, 0xf8, 0x35,
+ 0x97, 0xf6, 0x95, 0xeb, 0x67, 0x20, 0x49, 0x0e, 0x21, 0xb4, 0x73, 0x5e,
+ 0x72, 0x06, 0xdd, 0x76, 0x99, 0x3d, 0x67, 0x37, 0x27, 0xea, 0x21, 0x80,
+ 0xc6, 0xb8, 0xf7, 0x48, 0x5e, 0x11, 0xe2, 0xe7, 0x10, 0xad, 0x0b, 0x12,
+ 0x52, 0xd4, 0xe3, 0x63, 0x2a, 0x1d, 0x41, 0xf4, 0xce, 0x5d, 0x58, 0x5f,
+ 0x79, 0x6d, 0xdd, 0x4b, 0x3d, 0x99, 0xd9, 0x64, 0xdc, 0x08, 0x16, 0x1a,
+ 0xf3, 0x8f, 0x1e, 0x33, 0xfe, 0x7a, 0x49, 0xaa, 0x98, 0xb9, 0xe2, 0xc6,
+ 0x14, 0xb8, 0x51, 0x1f, 0x45, 0xce, 0xea, 0x97, 0xcd, 0xd0, 0x0b, 0x5d,
+ 0x12, 0x31, 0xbe, 0x78, 0x98, 0xa3, 0x77, 0x6a, 0xa0, 0xef, 0x57, 0x3a,
+ 0xc6, 0xe7, 0x52, 0x22, 0x06, 0x44, 0x35, 0x8e, 0xc9, 0xe8, 0x4f, 0x76,
+ 0xd9, 0x77, 0x8c, 0x80, 0xc9, 0xfc, 0x20, 0x0d, 0xc0, 0x67, 0x95, 0x21,
+ 0x93, 0x74, 0x4f, 0xf1, 0xf5, 0xdf, 0x5a, 0x10, 0xde, 0x57, 0xc8, 0x6e,
+ 0x33, 0x40, 0xae, 0x36, 0x4a, 0xc8, 0x49, 0xbf, 0x0d, 0x6d, 0x74, 0x34,
+ 0xff, 0xdc, 0x1b, 0xe3, 0xcf, 0xcf, 0xe6, 0xd1, 0xfb, 0x4d, 0xd5, 0x0e,
+ 0x86, 0x83, 0x21, 0x12, 0xf8, 0x51, 0x2a, 0xc4, 0x87, 0xd8, 0x1b, 0x1d,
+ 0xe7, 0x36, 0xb5, 0xc3, 0xf9, 0xf9, 0x8f, 0x0f, 0xc2, 0x21, 0x83, 0x75,
+ 0x14, 0x81, 0x17, 0xb1, 0x9b, 0x51, 0x56, 0x1d, 0xa1, 0xaa, 0xff, 0xd4,
+ 0x1f, 0xf3, 0x8d, 0xd1, 0x30, 0x53, 0x92, 0x69, 0xce, 0xf0, 0xc5, 0x75,
+ 0xcf, 0xd2, 0x6e, 0x37, 0x74, 0x79, 0xc3, 0x50, 0x52, 0x01, 0xc4, 0x0f,
+ 0x67, 0xe2, 0xb7, 0xe2, 0xf1, 0xcc, 0xd9, 0x49, 0xc4, 0x58, 0xbd, 0x8d,
+ 0x91, 0xb8, 0x35, 0xbd, 0x64, 0x12, 0x24, 0x20, 0x20, 0x29, 0x23, 0x94,
+ 0x85, 0xb6, 0xa8, 0x4e, 0xd4, 0x49, 0x09, 0x25, 0xc4, 0xc5, 0xa5, 0x0c,
+ 0x76, 0xa9, 0x4a, 0x75, 0x0f, 0xb9, 0x57, 0x33, 0xcd, 0xfd, 0xf8, 0x8f,
+ 0xae, 0x43, 0x48, 0xb8, 0xea, 0x87, 0x17, 0x0d, 0x3d, 0x8b, 0x9a, 0x21,
+ 0xe8, 0xbf, 0xc8, 0x5e, 0x18, 0x48, 0xa3, 0xcd, 0x08, 0x59, 0x9b, 0xdb,
+ 0x79, 0x5c, 0xe9, 0xa3, 0xe6, 0xba, 0x58, 0x53, 0x10, 0x9a, 0x2c, 0x2b,
+ 0x10, 0x5b, 0x96, 0x9a, 0x1f, 0x8f, 0xc2, 0x7d, 0xee, 0xe9, 0xc2, 0xbc,
+ 0x8f, 0x8b, 0xa7, 0x41, 0xb1, 0x33, 0x58, 0x6e, 0x25, 0x13, 0x3a, 0xd0,
+ 0x78, 0x53, 0xda, 0xa2, 0x35, 0x23, 0x89, 0x39, 0xa7, 0xef, 0x94, 0xda,
+ 0x2f, 0xc3, 0x17, 0x80, 0x27, 0xc7, 0x0f, 0xda, 0xfb, 0xda, 0x64, 0x3c,
+ 0x94, 0x8c, 0x39, 0xd0, 0x06, 0x62, 0x6c, 0x0d, 0x26, 0xba, 0x4f, 0xcb,
+ 0x8a, 0xa0, 0xbc, 0xeb, 0x3f, 0x65, 0x51, 0x8e, 0x1d, 0x2e, 0x9e, 0x5f,
+ 0xe3, 0x15, 0x0e, 0x58, 0x4f, 0xb7, 0xb6, 0x64, 0x95, 0xe8, 0x0e, 0x00,
+ 0x7c, 0x1e, 0xd9, 0xde, 0x35, 0x5a, 0xff, 0xd5, 0xe5, 0xb3, 0x64, 0xcc,
+ 0x8b, 0x93, 0xbc, 0x2a, 0x25, 0x7d, 0x50, 0x92, 0x3e, 0x23, 0x4c, 0x07,
+ 0x5e, 0xcf, 0xbb, 0x52, 0xd0, 0xc4, 0xd9, 0x77, 0x66, 0x01, 0x57, 0x1f,
+ 0xa0, 0x9d, 0xb2, 0x6d, 0x4e, 0x36, 0xc1, 0x9a, 0x70, 0x4e, 0xa3, 0x5f,
+ 0xf6, 0xf9, 0x50, 0x08, 0xcd, 0xf9, 0xe5, 0x76, 0x81, 0xea, 0x88, 0x2e,
+ 0xf5, 0x2a, 0xd4, 0x31, 0x39, 0x8d, 0xfe, 0x1c, 0x15, 0x1d, 0x41, 0x2b,
+ 0x55, 0xc7, 0xe8, 0x27, 0x6f, 0xc3, 0xf0, 0x23, 0x76, 0x9a, 0xb2, 0x87,
+ 0x0c, 0x71, 0x3c, 0x73, 0xea, 0x20, 0x93, 0xf4, 0x21, 0x56, 0xfb, 0x8e,
+ 0xd7, 0xaf, 0xc3, 0xd4, 0xf4, 0x31, 0x6f, 0xe8, 0x1f, 0x5b, 0x83, 0xa9,
+ 0x2b, 0x83, 0x08, 0x2e, 0xa2, 0xf3, 0x6c, 0x06, 0xe5, 0x89, 0x73, 0x73,
+ 0x98, 0x0e, 0x57, 0x07, 0x49, 0x68, 0xa4, 0xb2, 0x4a, 0x26, 0xd1, 0x91,
+ 0x49, 0x87, 0x05, 0x55, 0xa4, 0x88, 0x7d, 0x3d, 0x57, 0x7c, 0x20, 0x8c,
+ 0x2c, 0xea, 0x30, 0x63, 0x3a, 0xe4, 0xab, 0x27, 0x80, 0xab, 0xfb, 0x22,
+ 0x8a, 0x0f, 0xe0, 0xe9, 0xc5, 0xd5, 0x4f, 0x8a, 0x2c, 0x28, 0x36, 0x63,
+ 0xbd, 0xa3, 0xc4, 0x90, 0xe4, 0x9e, 0x98, 0xca, 0xce, 0xfc, 0x96, 0xb8,
+ 0x22, 0x0d, 0x17, 0xc8, 0xad, 0xc7, 0x01, 0x38, 0x6e, 0x95, 0x30, 0x74,
+ 0xda, 0xb8, 0xa9, 0xa8, 0xe6, 0xf2, 0x03, 0x41, 0xb2, 0x05, 0x37, 0x04,
+ 0x8b, 0x51, 0xf9, 0xeb, 0x97, 0xdf, 0xe9, 0xa8, 0x5f, 0x11, 0x2f, 0x9f,
+ 0x4f, 0xbe, 0xc1, 0x53, 0x2c, 0x75, 0x90, 0xca, 0xa3, 0x9b, 0xc1, 0x36,
+ 0xa3, 0x03, 0x65, 0xab, 0x57, 0xc4, 0x0e, 0x8a, 0x41, 0xfc, 0x60, 0x65,
+ 0x13, 0x87, 0x6d, 0xda, 0x00, 0xad, 0x56, 0x1c, 0x28, 0x7c, 0x4c, 0xa2,
+ 0x92, 0xda, 0x23, 0x00, 0xe8, 0x60, 0x20, 0x59, 0x45, 0x4a, 0x26, 0xae,
+ 0x22, 0x37, 0x7c, 0x14, 0xce, 0xff, 0x0d, 0xa9, 0xef, 0xfc, 0x93, 0xbd,
+ 0xde, 0x2b, 0x0f, 0xc7, 0xc0, 0x8a, 0x90, 0x06, 0xec, 0x53, 0x9f, 0xc8,
+ 0x5b, 0x7b, 0xe8, 0x38, 0x22, 0x75, 0xe9, 0x40, 0xbc, 0x62, 0xe9, 0x9d,
+ 0x49, 0xab, 0x88, 0x8d, 0xdf, 0x05, 0x33, 0xbf, 0xc3, 0x69, 0x6c, 0x36,
+ 0x71, 0x17, 0x70, 0xc1, 0xe0, 0xd1, 0x71, 0xcf, 0xd5, 0x48, 0x83, 0x50,
+ 0x74, 0x07, 0xc4, 0xca, 0x29, 0x2d, 0xa2, 0x30, 0x0e, 0x86, 0x02, 0x05,
+ 0x94, 0x54, 0xa2, 0x6a, 0xe2, 0x23, 0x77, 0xc1, 0x4c, 0xef, 0xa4, 0x8c,
+ 0xbe, 0x6b, 0x0f, 0x7c, 0x05, 0x30, 0x78, 0x34, 0x5c, 0x73, 0xf5, 0x52,
+ 0x20, 0xd4, 0x1d, 0x01, 0xca, 0x9f, 0x89, 0x3b, 0x91, 0x1d, 0x1f, 0x27,
+ 0xe1, 0xf9, 0xe8, 0xd0, 0xb2, 0x56, 0x32, 0x15, 0x37, 0xa3, 0x08, 0x38,
+ 0xb7, 0x57, 0xb4, 0x09, 0xfe, 0xf4, 0x72, 0xe1, 0x8f, 0x4b, 0x6b, 0x00,
+ 0x8c, 0xc5, 0x39, 0xd5, 0x45, 0x45, 0xbb, 0xf6, 0xb7, 0x01, 0xde, 0xef,
+ 0x8b, 0xaf, 0x85, 0x73, 0xc4, 0x93, 0x3f, 0xbe, 0xf8, 0x69, 0xbd, 0x71,
+ 0xa9, 0x65, 0x6f, 0x22, 0xa6, 0xca, 0x36, 0xf0, 0x34, 0x1b, 0x20, 0x24,
+ 0x6c, 0xd2, 0xe3, 0xbb, 0xb5, 0x80, 0xfc, 0xc4, 0x90, 0x54, 0x70, 0xab,
+ 0xb7, 0xb9, 0xdb, 0xeb, 0x3b, 0x1d, 0x75, 0xc8, 0x82, 0x9a, 0x15, 0x8a,
+ 0x88, 0xb0, 0x7a, 0x77, 0xcf, 0xdc, 0x96, 0x22, 0x4d, 0x08, 0x47, 0x9a,
+ 0x06, 0x3e, 0x47, 0xb1, 0x54, 0xdf, 0x22, 0x9d, 0x75, 0x8f, 0xdb, 0xc4,
+ 0x5a, 0xd0, 0xfe, 0x44, 0xc4, 0xce, 0x9a, 0x57, 0x0b, 0x20, 0x36, 0x07,
+ 0xb1, 0xcf, 0xfe, 0xb4, 0x3e, 0x03, 0x1b, 0x5d, 0xac, 0x40, 0x54, 0x88,
+ 0x52, 0x2e, 0x81, 0x8f, 0x3c, 0x52, 0x87, 0x68, 0x00, 0xa5, 0x95, 0xbc,
+ 0xd9, 0x67, 0x87, 0xa0, 0x75, 0x78, 0xb6, 0xa9, 0xda, 0x76, 0x9d, 0xe4,
+ 0x5a, 0x6d, 0xd5, 0x78, 0xcd, 0x7b, 0x26, 0x5f, 0xc0, 0x09, 0xab, 0x25,
+ 0x16, 0x38, 0xa1, 0x86, 0xa7, 0x5e, 0x5e, 0x2d, 0x3e, 0x2f, 0x09, 0xdc,
+ 0x31, 0x4d, 0x71, 0x2e, 0xec, 0x5f, 0xa0, 0xe0, 0x8f, 0x9c, 0xcd, 0x72,
+ 0xc8, 0x05, 0xa3, 0xb0, 0xfc, 0x4c, 0xdb, 0x6b, 0x24, 0xf2, 0x92, 0x6b,
+ 0x13, 0x79, 0x1c, 0x36, 0x90, 0x20, 0x71, 0xaa, 0x8c, 0x1c, 0xe4, 0xbf,
+ 0x54, 0xf8, 0x48, 0x51, 0xd2, 0x9a, 0x23, 0xa0, 0x55, 0x38, 0x24, 0x17,
+ 0x39, 0x89, 0x4f, 0xc9, 0x01, 0x77, 0x05, 0x16, 0x97, 0x3e, 0xac, 0x9f,
+ 0xba, 0x4a, 0xb1, 0x7e, 0x47, 0x0d, 0xa4, 0x08, 0x1c, 0x6a, 0xa3, 0x07,
+ 0x39, 0x2f, 0xd5, 0x3e, 0x12, 0x14, 0x74, 0xa6, 0x88, 0xe8, 0x15, 0x4e,
+ 0x09, 0x05, 0xce, 0x62, 0x53, 0xf2, 0x40, 0x7b, 0x49, 0x58, 0xc8, 0x5d,
+ 0x29, 0x54, 0xb1, 0xfd, 0xb0, 0xb2, 0x75, 0x2c, 0x55, 0x9f, 0xf9, 0x57,
+ 0x58, 0xec, 0xfb, 0xff, 0xa3, 0xa0, 0x27, 0x02, 0x0e, 0xa7, 0x52, 0xe7,
+ 0x9e, 0xbd, 0xb6, 0x1d, 0xe6, 0x7e, 0xa2, 0xc0, 0x95, 0xe1, 0x4d, 0xd5,
+ 0x78, 0xce, 0x08, 0x2d, 0xff, 0x0b, 0xe8, 0x34, 0xa7, 0x53, 0x15, 0x67,
+ 0xfe, 0x55, 0xd6, 0x3b, 0x3e, 0xff, 0xe8, 0xe8, 0x09, 0xc0, 0x83, 0xa9,
+ 0xd4, 0xb9, 0xe7, 0xaf, 0x6d, 0x87, 0x79, 0x9f, 0xa8, 0xb0, 0x25, 0x78,
+ 0x92, 0x0e, 0x9d, 0xf7, 0x55, 0xd9, 0x1a, 0xc5, 0x48, 0x6c, 0xbe, 0x66,
+ 0xb0, 0xf7, 0xbf, 0x95, 0x75, 0x8e, 0xcf, 0xbf, 0xfa, 0x3a, 0x02, 0x70,
+ 0x20, 0xde, 0xb0, 0xe4, 0xe4, 0x0e, 0x59, 0x44, 0x11, 0x28, 0xe1, 0x22,
+ 0xe8, 0x0e, 0x5b, 0x62, 0x69, 0x46, 0xb2, 0x1a, 0x9b, 0x63, 0x75, 0x31,
+ 0xb9, 0x4a, 0x90, 0x8d, 0x2e, 0xf8, 0xa8, 0xdb, 0x5a, 0x31, 0xcf, 0x9c,
+ 0x99, 0xd5, 0x85, 0x99, 0x5e, 0x0a, 0x51, 0x8d, 0x0d, 0x77, 0x3c, 0x51,
+ 0xe1, 0x98, 0x1c, 0x5a, 0xc1, 0xea, 0x38, 0x93, 0x44, 0xd7, 0xb6, 0xbb,
+ 0xa1, 0x0f, 0x38, 0x75, 0x5e, 0xff, 0x2d, 0x93, 0xfa, 0x7d, 0xca, 0xf6,
+ 0xb7, 0x4f, 0x5e, 0xbd, 0x3f, 0xbc, 0xb6, 0xc6, 0x7b, 0xae, 0x23, 0x97,
+ 0xc7, 0xcb, 0xa7, 0x98, 0x37, 0xf4, 0xd6, 0x0c, 0x12, 0xd6, 0xad, 0xc7,
+ 0x51, 0xb3, 0x0e, 0x88, 0x40, 0xfd, 0xf7, 0x1b, 0x29, 0xcf, 0xb8, 0x7c,
+ 0x29, 0xa1, 0xa2, 0x72, 0x05, 0xa1, 0x0f, 0x43, 0xa8, 0xc4, 0x24, 0x49,
+ 0x96, 0xbf, 0x56, 0xe4, 0xbf, 0xc7, 0x71, 0x5a, 0x18, 0x85, 0x65, 0xdd,
+ 0x17, 0x95, 0x30, 0x18, 0x8b, 0x18, 0xd2, 0xb2, 0x3f, 0x2e, 0xe9, 0x69,
+ 0x89, 0x90, 0xe0, 0x24, 0x08, 0x13, 0x23, 0x0a, 0x78, 0x59, 0x1e, 0xe6,
+ 0x33, 0x0f, 0x12, 0x73, 0xba, 0xb3, 0x3c, 0x1d, 0x05, 0x71, 0x7a, 0xd7,
+ 0x87, 0xd3, 0xaa, 0x7c, 0xb9, 0x3f, 0x74, 0x95, 0x62, 0xfc, 0x85, 0xac,
+ 0xe0, 0xe9, 0xaa, 0x6f, 0x48, 0x4b, 0xdf, 0xb6, 0x9a, 0x7c, 0x24, 0x28,
+ 0xe3, 0x6e, 0x40, 0xbd, 0x03, 0xab, 0xc5, 0xb5, 0x4e, 0xd3, 0xb4, 0xef,
+ 0x23, 0x1e, 0x6e, 0xab, 0xc6, 0x70, 0x41, 0x6f, 0xf8, 0x5f, 0x41, 0xa5,
+ 0x3a, 0x98, 0xab, 0x3f, 0xf2, 0xae, 0xb1, 0xd9, 0xf7, 0xff, 0xf0, 0x29,
+ 0xdf, 0x01, 0xed, 0xe9, 0xa3, 0x49, 0xc6, 0x1a, 0xec, 0xa3, 0x4e, 0x59,
+ 0x4b, 0xcd, 0x01, 0xcb, 0x6c, 0x4d, 0x28, 0xd6, 0x43, 0x53, 0x6c, 0x6e,
+ 0xa6, 0x37, 0x29, 0x52, 0x11, 0xa5, 0xdf, 0x15, 0x1b, 0x6b, 0x46, 0x3a,
+ 0x25, 0x93, 0x5c, 0x76, 0xdc, 0x12, 0xb8, 0x3e, 0xe0, 0xc4, 0xb8, 0xf8,
+ 0x96, 0x8e, 0xde, 0x49, 0xff, 0x58, 0x3d, 0x47, 0x12, 0x68, 0x9a, 0xf6,
+ 0xd7, 0x74, 0x21, 0xe7, 0x0e, 0xab, 0xdf, 0xe5, 0xb2, 0x7f, 0x4f, 0xb9,
+ 0x5e, 0xd6, 0xf7, 0x7a, 0xc8, 0x7e, 0xd7, 0xc0, 0x81, 0x63, 0xff, 0x84,
+ 0x30, 0x67, 0x40, 0x95, 0xcb, 0x03, 0x6b, 0xfb, 0x08, 0xd3, 0x09, 0xa8,
+ 0x93, 0x11, 0xf7, 0xf3, 0x68, 0x89, 0x79, 0x0d, 0x74, 0xce, 0xe9, 0xc6,
+ 0x83, 0xcd, 0xe0, 0x54, 0x51, 0xff, 0xe2, 0x3d, 0x76, 0x94, 0x72, 0xed,
+ 0xb3, 0x66, 0x98, 0x97, 0xd9, 0x0b, 0x3b, 0x1d, 0x75, 0xc8, 0xfd, 0x9a,
+ 0x15, 0x8a, 0x7c, 0xe9, 0xb6, 0x8e, 0x59, 0xf1, 0xbe, 0x8f, 0xe4, 0x3d,
+ 0xdd, 0x72, 0x98, 0x71, 0xe5, 0xef, 0xdc, 0x86, 0x2f, 0x9d, 0x75, 0x8c,
+ 0xe9, 0xbf, 0xd1, 0x89, 0xae, 0x44, 0xda, 0xa7, 0x69, 0xda, 0x77, 0x91,
+ 0x8f, 0x37, 0x55, 0xe3, 0x38, 0x20, 0xb7, 0xfc, 0x2f, 0xa0, 0xd2, 0x9d,
+ 0x4c, 0x55, 0x9f, 0xf9, 0x57, 0x58, 0xec, 0xfb, 0xff, 0xf8, 0x14, 0xef,
+ 0x80, 0xf6, 0xf4, 0xd1, 0xa4, 0xe3, 0x0d, 0x76, 0x51, 0xa7, 0x2c, 0xa5,
+ 0xe6, 0x80, 0xe5, 0xb6, 0x26, 0x94, 0x6b, 0x21, 0xa9, 0xb6, 0x37, 0x53,
+ 0x1b, 0x94, 0xa9, 0x08, 0xd2, 0xef, 0x8a, 0x8d, 0xb5, 0xa3, 0x1d, 0x12,
+ 0xc9, 0xae, 0x3b, 0x6e, 0x09, 0x5c, 0x1f, 0x70, 0x62, 0x5c, 0x7c, 0x4b,
+ 0x47, 0x6f, 0x24, 0xff, 0xac, 0x1e, 0xa3, 0x89, 0x34, 0x4d, 0x7b, 0x6b,
+ 0xba, 0x10, 0xf3, 0x87, 0x55, 0xef, 0xf2, 0xd9, 0x3f, 0xa7, 0xdc, 0xaf,
+ 0x6b, 0x7b, 0xbd, 0x64, 0x3f, 0x6b, 0xe0, 0x40, 0xb1, 0xff, 0xc2, 0x18,
+ 0x33, 0xa0, 0x4a, 0xe5, 0x81, 0xb5, 0xfd, 0x84, 0x69, 0x84, 0xd4, 0x49,
+ 0x88, 0xfb, 0xf9, 0xb4, 0x44, 0xbc, 0x86, 0xba, 0x67, 0x74, 0xe3, 0x41,
+ 0xe6, 0xf0, 0x2a, 0x28, 0xff, 0xf1, 0x1e, 0xbb, 0x4a, 0x39, 0x76, 0xd9,
+ 0xb3, 0x4c, 0x4b, 0xec, 0x85, 0x9d, 0x8e, 0xba, 0xe4, 0x7e, 0xcd, 0x0a,
+ 0xc5, 0x3e, 0x74, 0xdb, 0x47, 0x2c, 0xf8, 0xdf, 0x47, 0xf2, 0x1e, 0xee,
+ 0xb9, 0x4c, 0x38, 0xf2, 0xf7, 0xee, 0x43, 0x17, 0xce, 0xba, 0xc6, 0x74,
+ 0xdf, 0xe8, 0xc4, 0xd7, 0x22, 0x6d, 0x53, 0xb4, 0xed, 0x3b, 0xc8, 0xc7,
+ 0x9b, 0xaa, 0xf1, 0x9c, 0x10, 0x5b, 0xfe, 0x17, 0xd0, 0x69, 0x4e, 0xa6,
+ 0x2a, 0xcf, 0xfc, 0xab, 0xac, 0x76, 0x7d, 0xff, 0xfc, 0x0a, 0x77, 0xc0,
+ 0x7b, 0x7a, 0x68, 0xd2, 0x71, 0x86, 0xbb, 0x28, 0xd3, 0x96, 0x52, 0xf3,
+ 0x40, 0x72, 0xdb, 0x13, 0x4a, 0x35, 0x90, 0xd4, 0xdb, 0x1b, 0xa9, 0x8d,
+ 0xca, 0x54, 0x84, 0x69, 0x77, 0xc5, 0x46, 0xda, 0xd1, 0x8e, 0x89, 0x64,
+ 0xd7, 0x1d, 0xb7, 0x04, 0xae, 0x0f, 0xb8, 0x31, 0x2e, 0x3e, 0x25, 0xa3,
+ 0xb7, 0x92, 0x7f, 0xd6, 0x0f, 0x51, 0xc4, 0x9a, 0x26, 0xbd, 0xb5, 0xdd,
+ 0x08, 0x79, 0xc3, 0xaa, 0xf7, 0xf9, 0x6c, 0x9f, 0xd3, 0xee, 0x57, 0xb5,
+ 0xbd, 0xde, 0xb2, 0x1f, 0xb5, 0xf0, 0x20, 0x58, 0xff, 0xe1, 0x0c, 0x19,
+ 0xd0, 0x25, 0x72, 0xc0, 0xda, 0xfe, 0xc2, 0x34, 0xc2, 0x6a, 0x24, 0xc4,
+ 0x7d, 0xfc, 0xda, 0x22, 0x5e, 0x43, 0x5d, 0x33, 0xba, 0x71, 0xa0, 0xf3,
+ 0x78, 0x15, 0x14, 0x7f, 0xf8, 0x8f, 0x5d, 0xa5, 0x1c, 0xbb, 0x6c, 0xd9,
+ 0xa6, 0x25, 0xf6, 0x42, 0xce, 0xc7, 0x5d, 0x72, 0x3f, 0x66, 0x85, 0x62,
+ 0x9f, 0x3a, 0x6d, 0xa3, 0x96, 0x7c, 0x6f, 0xa3, 0xf9, 0x0f, 0x77, 0x5c,
+ 0xa6, 0x1c, 0x79, 0x7b, 0xf7, 0x21, 0x8b, 0xe7, 0x5d, 0x63, 0x3a, 0x6f,
+ 0xf4, 0x62, 0x6b, 0x91, 0x36, 0xa9, 0xda, 0x76, 0x9d, 0xe4, 0x63, 0xcd,
+ 0xd5, 0x78, 0xce, 0x08, 0x2d, 0xff, 0x0b, 0xe8, 0x34, 0xa7, 0x53, 0x15,
+ 0x67, 0xfe, 0x55, 0xd6, 0x3b, 0x3e, 0xff, 0xfe, 0x05, 0x3b, 0xe0, 0x3d,
+ 0xbd, 0x34, 0x69, 0x38, 0xc3, 0x5d, 0x94, 0x69, 0xcb, 0x29, 0x79, 0xa0,
+ 0x39, 0x6d, 0x89, 0xa5, 0x1a, 0xc8, 0x6a, 0x6d, 0x8d, 0xd4, 0xc6, 0xe5,
+ 0x2a, 0x42, 0x34, 0xbb, 0xe2, 0xa3, 0x6d, 0x68, 0xc7, 0x44, 0xb2, 0x6b,
+ 0x8e, 0xdb, 0x82, 0x57, 0x07, 0xdc, 0x18, 0x97, 0x1f, 0x12, 0xd1, 0xdb,
+ 0xc9, 0x3f, 0xeb, 0x07, 0xa8, 0xe2, 0x4d, 0x13, 0x5e, 0xda, 0xee, 0x84,
+ 0x3c, 0xe1, 0xd5, 0x7b, 0xfc, 0xb6, 0x4f, 0xe9, 0xf7, 0x2b, 0xda, 0xde,
+ 0xef, 0x59, 0x0f, 0xda, 0xf8, 0x10, 0x2c, 0x7f, 0xf0, 0x86, 0x0c, 0xe8,
+ 0x12, 0xb9, 0x60, 0x6d, 0x7f, 0x61, 0x1a, 0x61, 0x35, 0x12, 0x62, 0x3e,
+ 0xfe, 0x6d, 0x11, 0x2f, 0x21, 0xae, 0x99, 0xdd, 0x38, 0xd0, 0x79, 0xbc,
+ 0x0a, 0x8a, 0x3f, 0xfc, 0x47, 0xae, 0xd2, 0x8e, 0x5d, 0xb6, 0x6c, 0xd3,
+ 0x12, 0xfb, 0x21, 0x67, 0x63, 0xae, 0xb9, 0x1f, 0xb3, 0x42, 0xb1, 0x4f,
+ 0x9d, 0x36, 0xd1, 0xcb, 0x3e, 0x37, 0xd1, 0xfc, 0x87, 0xbb, 0xae, 0x53,
+ 0x0e, 0x3c, 0xbd, 0xfb, 0x90, 0xc5, 0xf3, 0xae, 0xb1, 0x9d, 0x37, 0xfa,
+ 0x31, 0x35, 0xc8, 0x9b, 0x54, 0xed, 0x3b, 0x4e, 0xf2, 0x31, 0xe6, 0xea,
+ 0xbc, 0x67, 0x04, 0x16, 0xff, 0x85, 0xf4, 0x1a, 0x53, 0xa9, 0x8a, 0xb3,
+ 0xff, 0x2a, 0xeb, 0x1d, 0x9f, 0x7f, 0xff, 0x02, 0x9d, 0xf0, 0x1e, 0xde,
+ 0x9a, 0x34, 0x9c, 0x61, 0xae, 0xca, 0x34, 0xe5, 0x94, 0xbc, 0xd0, 0x1c,
+ 0xb6, 0xc4, 0xd2, 0x8d, 0x64, 0x35, 0x36, 0xc6, 0xea, 0x63, 0x72, 0x95,
+ 0x21, 0x1a, 0x5d, 0xf1, 0x51, 0xb6, 0xb4, 0x63, 0xa2, 0x59, 0x35, 0xc7,
+ 0x6d, 0xc1, 0x2b, 0x83, 0xee, 0x0c, 0x4b, 0x8f, 0x89, 0x68, 0xed, 0xe4,
+ 0x9f, 0xf5, 0x83, 0xd4, 0x71, 0x26, 0x89, 0xaf, 0x6d, 0x77, 0x42, 0x1e,
+ 0x70, 0xea, 0xbd, 0xfe, 0x5b, 0x27, 0xf4, 0xfb, 0x95, 0xed, 0x6f, 0x77,
+ 0xac, 0x87, 0xed, 0x7c, 0x08, 0x16, 0x3f, 0xf8, 0x43, 0x06, 0x74, 0x09,
+ 0x5c, 0xb0, 0x36, 0xbf, 0xb0, 0x8d, 0x30, 0x9a, 0x89, 0x31, 0x1f, 0x7f,
+ 0x36, 0x88, 0x97, 0x90, 0xd7, 0x4c, 0xee, 0x9c, 0x68, 0x3c, 0xde, 0x05,
+ 0x45, 0x1f, 0xfe, 0x23, 0xd7, 0x69, 0x47, 0x2e, 0xdb, 0x36, 0x69, 0x89,
+ 0x7d, 0x90, 0xb3, 0xb1, 0xd7, 0x5c, 0x8f, 0xd9, 0xa1, 0x58, 0xa7, 0xce,
+ 0x9b, 0x68, 0xe5, 0x9f, 0x1b, 0xe8, 0xfe, 0x43, 0xdd, 0xd7, 0x29, 0x87,
+ 0x1e, 0x5e, 0xfd, 0xc8, 0x62, 0xf9, 0xd7, 0x58, 0xce, 0x9b, 0xfd, 0x18,
+ 0x9a, 0xe4, 0x4d, 0xaa, 0x76, 0x9d, 0xa7, 0x79, 0x18, 0xf3, 0x75, 0x5e,
+ 0x33, 0x82, 0x0b, 0x7f, 0xc2, 0xfa, 0x0d, 0x29, 0xd4, 0xc5, 0x59, 0xff,
+ 0x95, 0x75, 0x8e, 0xcf, 0xbf, 0xff, 0x81, 0x4e, 0xf8, 0x0f, 0x6f, 0x4d,
+ 0x1a, 0x4e, 0x30, 0xd7, 0x65, 0x1a, 0x72, 0xca, 0x5e, 0x68, 0x0e, 0x5b,
+ 0x62, 0x69, 0x46, 0xb2, 0x1a, 0x9b, 0x63, 0x75, 0x31, 0xb9, 0x4a, 0x90,
+ 0x8d, 0x2e, 0xf8, 0xa8, 0xdb, 0x5a, 0x31, 0xd1, 0x2c, 0x9a, 0xe3, 0xb6,
+ 0xe0, 0x95, 0xc1, 0xf7, 0x06, 0x25, 0xc7, 0xc4, 0xb4, 0x76, 0xf2, 0x4f,
+ 0xfa, 0xc1, 0xea, 0x38, 0x93, 0x44, 0xd7, 0xb6, 0xbb, 0xa1, 0x0f, 0x38,
+ 0x75, 0x5e, 0xff, 0x2d, 0x93, 0xfa, 0x7d, 0xca, 0xf6, 0xb7, 0xbb, 0xd6,
+ 0x43, 0xf6, 0xbe, 0x04, 0x0b, 0x1f, 0xfc, 0x21, 0x83, 0x3a, 0x04, 0xae,
+ 0x58, 0x1b, 0x5f, 0xd8, 0x46, 0x98, 0x4d, 0x44, 0x98, 0x8f, 0xbf, 0x9b,
+ 0x44, 0x4b, 0xc8, 0x6b, 0xa6, 0x77, 0x4e, 0x34, 0x1e, 0x6f, 0x02, 0xa2,
+ 0x8f, 0xff, 0x11, 0xeb, 0xb4, 0xa3, 0x97, 0x6d, 0x9b, 0x34, 0xc4, 0xbe,
+ 0xc8, 0x59, 0xd8, 0xeb, 0xae, 0x47, 0xec, 0xd0, 0xac, 0x53, 0xe7, 0x4d,
+ 0xb4, 0x72, 0xcf, 0x8d, 0xf4, 0x7f, 0x21, 0xee, 0xeb, 0x94, 0xc3, 0x8f,
+ 0x2f, 0x7e, 0xe4, 0x31, 0x7c, 0xeb, 0xac, 0x67, 0x4d, 0xfe, 0x8c, 0x4d,
+ 0x72, 0x26, 0xd5, 0x3b, 0x4e, 0xd3, 0xbc, 0x8c, 0x79, 0xba, 0xaf, 0x19,
+ 0xc1, 0x05, 0xbf, 0xe1, 0x7d, 0x06, 0x94, 0xea, 0x62, 0xac, 0xff, 0xca,
+ 0xba, 0xc7, 0x67, 0xdf, 0xff, 0xc0, 0xa7, 0x7c, 0x07, 0xb7, 0xa6, 0x8d,
+ 0x27, 0x18, 0x6b, 0xb2, 0x8d, 0x39, 0x65, 0x2f, 0x34, 0x07, 0x2d, 0xb1,
+ 0x34, 0xa3, 0x59, 0x0d, 0x4d, 0xb1, 0xba, 0x98, 0xdc, 0xa5, 0x48, 0x46,
+ 0x97, 0x7c, 0x54, 0x6d, 0xad, 0x18, 0xe8, 0x96, 0x4d, 0x71, 0xdb, 0x70,
+ 0x4a, 0xe0, 0xfb, 0x83, 0x12, 0xe3, 0xe2, 0x5a, 0x3b, 0x79, 0x27, 0xfd,
+ 0x60, 0xf5, 0x1c, 0x49, 0xa2, 0x6b, 0xdb, 0x5d, 0xd0, 0x87, 0x9c, 0x3a,
+ 0xaf, 0x7f, 0x96, 0xc9, 0xfd, 0x3e, 0xe5, 0x7b, 0x5b, 0xdd, 0xeb, 0x21,
+ 0xfb, 0x5f, 0x02, 0x05, 0x8f, 0xfe, 0x10, 0xc1, 0x9d, 0x02, 0x57, 0x2c,
+ 0x0d, 0xaf, 0xec, 0x23, 0x4c, 0x26, 0xa2, 0x4c, 0x47, 0xdf, 0xcd, 0xa2,
+ 0x25, 0xe4, 0x35, 0xd3, 0x3b, 0xa7, 0x1a, 0x0f, 0x37, 0x81, 0x51, 0x47,
+ 0xff, 0x88, 0xf5, 0xda, 0x51, 0xcb, 0xb6, 0xcd, 0x9a, 0x62, 0x5f, 0x64,
+ 0x2c, 0xec, 0x75, 0xd7, 0x23, 0xf6, 0x68, 0x56, 0x29, 0xf3, 0xa6, 0xda,
+ 0x39, 0x67, 0xc6, 0xfa, 0x3f, 0x90, 0xf7, 0x75, 0xca, 0x61, 0xc7, 0x97,
+ 0xbf, 0x72, 0x18, 0xbe, 0x75, 0xd6, 0x33, 0xa6, 0xff, 0x46, 0x26, 0xb9,
+ 0x13, 0x6a, 0x9d, 0xa7, 0x69, 0xde, 0x46, 0x3c, 0xdd, 0x57, 0x8c, 0xe0,
+ 0x82, 0xdf, 0xf0, 0xbe, 0x83, 0x4a, 0x75, 0x31, 0x56, 0x7f, 0xe5, 0x5d,
+ 0x63, 0xb3, 0xef, 0xff, 0xe0, 0x53, 0xbe, 0x03, 0xdb, 0xd3, 0x46, 0x93,
+ 0x8c, 0x35, 0xd9, 0x46, 0x9c, 0xb2, 0x97, 0x9a, 0x03, 0x96, 0xd8, 0x9a,
+ 0x51, 0xac, 0x86, 0xa6, 0xd8, 0xdd, 0x4c, 0x6e, 0x52, 0xa4, 0x23, 0x4b,
+ 0xbe, 0x2a, 0x36, 0xd6, 0x8c, 0x74, 0x4b, 0x26, 0xb8, 0xed, 0xb8, 0x25,
+ 0x70, 0x7d, 0xc1, 0x89, 0x71, 0xf1, 0x2d, 0x1d, 0xbc, 0x93, 0xfe, 0xb0,
+ 0x7a, 0x8e, 0x24, 0xd1, 0x35, 0xed, 0xae, 0xe8, 0x43, 0xce, 0x1d, 0x57,
+ 0xbf, 0xcb, 0x64, 0xfe, 0x9f, 0x72, 0xbd, 0xad, 0xee, 0xf5, 0x90, 0xfd,
+ 0xaf, 0x81, 0x02, 0xc7, 0xff, 0x08, 0x60, 0xce, 0x81, 0x2b, 0x96, 0x06,
+ 0xd7, 0xf6, 0x11, 0xa6, 0x13, 0x51, 0x26, 0x23, 0xef, 0xe6, 0xd1, 0x12,
+ 0xf2, 0x1a, 0xe9, 0x9d, 0xd3, 0x8d, 0x07, 0x9b, 0xc0, 0xa8, 0xa3, 0xff,
+ 0xc4, 0x7a, 0xed, 0x28, 0xe5, 0xdb, 0x66, 0xcd, 0x31, 0x2f, 0xb2, 0x16,
+ 0x76, 0x3a, 0xeb, 0x91, 0xfb, 0x34, 0x2b, 0x14, 0xf9, 0xd3, 0x6d, 0x1c,
+ 0xb3, 0xe3, 0x7d, 0x1f, 0xc8, 0x7b, 0xba, 0xe5, 0x30, 0xe3, 0xcb, 0xdf,
+ 0xb9, 0x0c, 0x5f, 0x3a, 0xeb, 0x19, 0xd3, 0x7f, 0xa3, 0x13, 0x5c, 0x89,
+ 0xb5, 0x4e, 0xd3, 0xb4, 0xef, 0x23, 0x1e, 0x6e, 0xab, 0xc6, 0x70, 0x41,
+ 0x6f, 0xf8, 0x5f, 0x41, 0xa5, 0x3a, 0x98, 0xab, 0x3f, 0xf2, 0xae, 0xb1,
+ 0xd9, 0xf7, 0xff, 0xf0, 0x29, 0xdf, 0x01, 0xed, 0xe9, 0xa3, 0x49, 0xc6,
+ 0x1a, 0xec, 0xa3, 0x4e, 0x59, 0x4b, 0xcd, 0x01, 0xcb, 0x6c, 0x4d, 0x28,
+ 0xd6, 0x43, 0x53, 0x6c, 0x6e, 0xa6, 0x37, 0x29, 0x52, 0x11, 0xa5, 0xdf,
+ 0x15, 0x1b, 0x6b, 0x46, 0x3a, 0x25, 0x93, 0x5c, 0x76, 0xdc, 0x12, 0xb8,
+ 0x3e, 0xe0, 0xc4, 0xb8, 0xf8, 0x96, 0x8e, 0xde, 0x49, 0xff, 0x58, 0x3d,
+ 0x47, 0x12, 0x68, 0x9a, 0xf6, 0xd7, 0x74, 0x21, 0xe7, 0x0e, 0xab, 0xdf,
+ 0xe5, 0xb2, 0x7f, 0x4f, 0xb9, 0x5e, 0xd6, 0xf7, 0x7a, 0xc8, 0x7e, 0xd7,
+ 0xc0, 0x81, 0x63, 0xff, 0x84, 0x30, 0x67, 0x40, 0x95, 0xcb, 0x03, 0x6b,
+ 0xfb, 0x08, 0xd3, 0x09, 0xa8, 0x93, 0x11, 0xf7, 0xf3, 0x68, 0x89, 0x79,
+ 0x0d, 0x74, 0xce, 0xe9, 0xc6, 0x83, 0xcd, 0xe0, 0x54, 0x51, 0xff, 0xe2,
+ 0x3d, 0x76, 0x94, 0x72, 0xed, 0xb3, 0x66, 0x98, 0x97, 0xd9, 0x0b, 0x3b,
+ 0x1d, 0x75, 0xc8, 0xfd, 0x9a, 0x15, 0x8a, 0x7c, 0xe9, 0xb6, 0x8e, 0x59,
+ 0xf1, 0xbe, 0x8f, 0xe4, 0x3d, 0xdd, 0x72, 0x98, 0x71, 0xe5, 0xef, 0xdc,
+ 0x86, 0x2f, 0x9d, 0x75, 0x8c, 0xe9, 0xbf, 0xd1, 0x89, 0xae, 0x44, 0xda,
+ 0xa7, 0x69, 0xda, 0x77, 0x91, 0x8f, 0x37, 0x55, 0xe3, 0x38, 0x20, 0xb7,
+ 0xfc, 0x2f, 0xa0, 0xd2, 0x9d, 0x4c, 0x55, 0x9f, 0xf9, 0x57, 0x58, 0xec,
+ 0xfb, 0xff, 0xf8, 0x14, 0xef, 0x80, 0xf6, 0xf4, 0xd1, 0xa4, 0xe3, 0x0d,
+ 0x76, 0x51, 0xa7, 0x2c, 0xa5, 0xe6, 0x80, 0xe5, 0xb6, 0x26, 0x94, 0x6b,
+ 0x21, 0xa9, 0xb6, 0x37, 0x53, 0x1b, 0x94, 0xa9, 0x08, 0xd2, 0xef, 0x8a,
+ 0x8d, 0xb5, 0xa3, 0x1d, 0x12, 0xc9, 0xae, 0x3b, 0x6e, 0x09, 0x5c, 0x1f,
+ 0x70, 0x62, 0x5c, 0x7c, 0x4b, 0x47, 0x6f, 0x24, 0xff, 0xac, 0x1e, 0xa3,
+ 0x89, 0x34, 0x4d, 0x7b, 0x6b, 0xba, 0x10, 0xf3, 0x87, 0x55, 0xef, 0xf2,
+ 0xd9, 0x3f, 0xa7, 0xdc, 0xaf, 0x6b, 0x7b, 0xbd, 0x64, 0x3f, 0x6b, 0xe0,
+ 0x40, 0xb1, 0xff, 0xc2, 0x18, 0x33, 0xa0, 0x4a, 0xe5, 0x81, 0xb5, 0xfd,
+ 0x84, 0x69, 0x84, 0xd4, 0x49, 0x88, 0xfb, 0xf9, 0xb4, 0x44, 0xbc, 0x86,
+ 0xba, 0x67, 0x74, 0xe3, 0x41, 0xe6, 0xf0, 0x2a, 0x28, 0xff, 0xf1, 0x1e,
+ 0xbb, 0x4a, 0x39, 0x76, 0xd9, 0xb3, 0x4c, 0x4b, 0xec, 0x85, 0x9d, 0x8e,
+ 0xba, 0xe4, 0x7e, 0xcd, 0x0a, 0xc5, 0x3e, 0x74, 0xdb, 0x47, 0x2c, 0xf8,
+ 0xdf, 0x47, 0xf2, 0x1e, 0xee, 0xb9, 0x4c, 0x38, 0xf2, 0xf7, 0xee, 0x43,
+ 0x17, 0xce, 0xba, 0xc6, 0x74, 0xdf, 0xe8, 0xc4, 0xd7, 0x22, 0x6d, 0x53,
+ 0xb4, 0xed, 0x3b, 0xc8, 0xc7, 0x9b, 0xaa, 0xf1, 0x9c, 0x10, 0x5b, 0xfe,
+ 0x17, 0xd0, 0x69, 0x4e, 0xa6, 0x2a, 0xcf, 0xfc, 0xab, 0xac, 0x76, 0x7d,
+ 0xff, 0xfc, 0x0a, 0x77, 0xc0, 0x7b, 0x7a, 0x68, 0xd2, 0x71, 0x86, 0xbb,
+ 0x28, 0xd3, 0x96, 0x52, 0xf3, 0x40, 0x72, 0xdb, 0x13, 0x4a, 0x35, 0x90,
+ 0xd4, 0xdb, 0x1b, 0xa9, 0x8d, 0xca, 0x54, 0x84, 0x69, 0x77, 0xc5, 0x46,
+ 0xda, 0xd1, 0x8e, 0x89, 0x64, 0xd7, 0x1d, 0xb7, 0x04, 0xae, 0x0f, 0xb8,
+ 0x31, 0x2e, 0x3e, 0x25, 0xa3, 0xb7, 0x92, 0x7f, 0xd6, 0x0f, 0x51, 0xc4,
+ 0x9a, 0x26, 0xbd, 0xb5, 0xdd, 0x08, 0x79, 0xc3, 0xaa, 0xf7, 0xf9, 0x6c,
+ 0x9f, 0xd3, 0xee, 0x57, 0xb5, 0xbd, 0xde, 0xb2, 0x1f, 0xb5, 0xf0, 0x20,
+ 0x58, 0xff, 0xe1, 0x0c, 0x19, 0xd0, 0x25, 0x72, 0xc0, 0xda, 0xfe, 0xc2,
+ 0x34, 0xc2, 0x6a, 0x24, 0xc4, 0x7d, 0xfc, 0xda, 0x22, 0x5e, 0x43, 0x5d,
+ 0x33, 0xba, 0x71, 0xa0, 0xf3, 0x78, 0x15, 0x14, 0x7f, 0xf8, 0x8f, 0x5d,
+ 0xa5, 0x1c, 0xbb, 0x6c, 0xd9, 0xa6, 0x25, 0xf6, 0x42, 0xce, 0xc7, 0x5d,
+ 0x72, 0x3f, 0x66, 0x85, 0x62, 0x9f, 0x3a, 0x6d, 0xa3, 0x96, 0x7c, 0x6f,
+ 0xa3, 0xf9, 0x0f, 0x77, 0x5c, 0xa6, 0x1c, 0x79, 0x7b, 0xf7, 0x21, 0x8b,
+ 0xe7, 0x5d, 0x63, 0x3a, 0x6f, 0xf4, 0x62, 0x6b, 0x91, 0x36, 0xa9, 0xda,
+ 0x76, 0x9d, 0xe4, 0x63, 0xcd, 0xd5, 0x78, 0xce, 0x08, 0x2d, 0xff, 0x0b,
+ 0xe8, 0x34, 0xa7, 0x53, 0x15, 0x67, 0xfe, 0x55, 0xd6, 0x3b, 0x3e, 0xff,
+ 0xfe, 0x05, 0x3b, 0xe0, 0x3d, 0xbd, 0x34, 0x69, 0x38, 0xc3, 0x5d, 0x94,
+ 0x69, 0xcb, 0x29, 0x79, 0xa0, 0x39, 0x6d, 0x89, 0xa5, 0x1a, 0xc8, 0x6a,
+ 0x6d, 0x8d, 0xd4, 0xc6, 0xe5, 0x2a, 0x42, 0x34, 0xbb, 0xe2, 0xa3, 0x6d,
+ 0x68, 0xc7, 0x44, 0xb2, 0x6b, 0x8e, 0xdb, 0x82, 0x57, 0x07, 0xdc, 0x18,
+ 0x97, 0x1f, 0x12, 0xd1, 0xdb, 0xc9, 0x3f, 0xeb, 0x07, 0xa8, 0xe2, 0x4d,
+ 0x13, 0x5e, 0xda, 0xee, 0x84, 0x3c, 0xe1, 0xd5, 0x7b, 0xfc, 0xb6, 0x4f,
+ 0xe9, 0xf7, 0x2b, 0xda, 0xde, 0xef, 0x59, 0x0f, 0xda, 0xf8, 0x10, 0x2c,
+ 0x7f, 0xf0, 0x86, 0x0c, 0xe8, 0x12, 0xb9, 0x60, 0x6d, 0x7f, 0x61, 0x1a,
+ 0x61, 0x35, 0x12, 0x62, 0x3e, 0xfe, 0x6d, 0x11, 0x2f, 0x21, 0xae, 0x99,
+ 0xdd, 0x38, 0xd0, 0x79, 0xbc, 0x0a, 0x8a, 0x3f, 0xfc, 0x47, 0xae, 0xd2,
+ 0x8e, 0x5d, 0xb6, 0x6c, 0xd3, 0x12, 0xfb, 0x21, 0x67, 0x63, 0xae, 0xb9,
+ 0x1f, 0xb3, 0x42, 0xb1, 0x4f, 0x9d, 0x36, 0xd1, 0xcb, 0x3e, 0x37, 0xd1,
+ 0xfc, 0x87, 0xbb, 0xae, 0x53, 0x0e, 0x3c, 0xbd, 0xfb, 0x90, 0xc5, 0xf3,
+ 0xae, 0xb1, 0x9d, 0x37, 0xfa, 0x31, 0x35, 0xc8, 0x9b, 0x54, 0xed, 0x3b,
+ 0x4e, 0xf2, 0x31, 0xe6, 0xea, 0xbc, 0x67, 0x04, 0x16, 0xff, 0x85, 0xf4,
+ 0x1a, 0x53, 0xa9, 0x8a, 0xb3, 0xff, 0x2a, 0xeb, 0x1d, 0x9f, 0x7f, 0xff,
+ 0x08,
+};
+static_assert(sizeof(kBytesTestReadSymbol11) == kNumBytesTestReadSymbol11, "");
+
+// The kBytesTestReadSymbol12[] array was encoded by using the following libaom
+// code:
+//
+// aom_cdf_prob cdf[4][13] = {
+// // pmf: 1/12, 1/12, 1/12, 1/12, 1/12, 1/12, 1/12, 1/12, 1/12, 1/12, 1/12,
+// // 1/12
+// { 32768 - 2731, 32768 - 5461, 32768 - 8192, 32768 - 10923, 32768 - 13653,
+// 32768 - 16384, 32768 - 19115, 32768 - 21845, 32768 - 24576,
+// 32768 - 27307, 32768 - 30037, 0, 0 },
+// // pmf: 3/24, 2/24, 2/24, 2/24, 2/24, 2/24, 2/24, 2/24, 2/24, 2/24, 2/24,
+// // 1/24
+// { 32768 - 4096, 32768 - 6827, 32768 - 9557, 32768 - 12288, 32768 - 15019,
+// 32768 - 17749, 32768 - 20480, 32768 - 23211, 32768 - 25941,
+// 32768 - 28672, 32768 - 31403, 0, 0 },
+// // pmf: 1/24, 2/24, 2/24, 2/24, 2/24, 2/24, 2/24, 2/24, 2/24, 2/24, 2/24,
+// // 3/24
+// { 32768 - 1365, 32768 - 4096, 32768 - 6827, 32768 - 9557, 32768 - 12288,
+// 32768 - 15019, 32768 - 17749, 32768 - 20480, 32768 - 23211,
+// 32768 - 25941, 32768 - 28672, 0, 0 },
+// // pmf: 1/24, 2/24, 2/24, 2/24, 2/24, 3/24, 3/24, 2/24, 2/24, 2/24, 2/24,
+// // 1/24
+// { 32768 - 1365, 32768 - 4096, 32768 - 6827, 32768 - 9557, 32768 - 12288,
+// 32768 - 16384, 32768 - 20480, 32768 - 23211, 32768 - 25941,
+// 32768 - 28672, 32768 - 31403, 0, 0 },
+// };
+// constexpr int kSymbols[24][4] = { { 0, 6, 11, 5 }, //
+// { 1, 7, 10, 4 }, //
+// { 2, 8, 9, 3 }, //
+// { 3, 9, 8, 2 }, //
+// { 4, 10, 7, 1 }, //
+// { 5, 11, 6, 0 }, //
+// { 6, 0, 5, 11 }, //
+// { 7, 1, 4, 10 }, //
+// { 8, 2, 3, 9 }, //
+// { 9, 3, 2, 8 }, //
+// { 10, 4, 1, 7 }, //
+// { 11, 5, 0, 6 }, //
+// { 0, 0, 11, 9 }, //
+// { 2, 1, 10, 7 }, //
+// { 4, 3, 8, 5 }, //
+// { 6, 5, 6, 3 }, //
+// { 8, 7, 4, 1 }, //
+// { 10, 9, 2, 10 }, //
+// { 1, 0, 11, 8 }, //
+// { 3, 2, 9, 6 }, //
+// { 5, 4, 7, 4 }, //
+// { 7, 6, 5, 2 }, //
+// { 9, 8, 3, 6 }, //
+// { 11, 10, 1, 5 } };
+// const unsigned int kBufferSize = 65536;
+// uint8_t bw_buffer[kBufferSize];
+// aom_writer bw;
+// bw.allow_update_cdf = 1;
+// aom_start_encode(&bw, bw_buffer);
+// for (int i = 0; i < 80; ++i) {
+// for (int j = 0; j < 24; ++j) {
+// for (int k = 0; k < 4; ++k) {
+// aom_write_symbol(&bw, kSymbols[j][k], cdf[k], 12);
+// }
+// }
+// }
+// aom_stop_encode(&bw);
+// printf("constexpr size_t kNumBytes = %u;\n", bw.pos);
+// printf("constexpr uint8_t kBytes[] = {");
+// int count = 0;
+// for (unsigned int i = 0; i < bw.pos; ++i) {
+// if (count++ % 12 == 0) {
+// printf("\n ");
+// } else {
+// printf(" ");
+// }
+// printf("0x%02x,", bw_buffer[i]);
+// }
+// printf("\n};\n");
+
+constexpr size_t kNumBytesTestReadSymbol12 = 3473;
+constexpr uint8_t kBytesTestReadSymbol12[] = {
+ 0x0d, 0x17, 0xf5, 0xbd, 0x05, 0xd0, 0x9c, 0x5d, 0x10, 0xc5, 0x9e, 0xc4,
+ 0x9f, 0xc6, 0xf4, 0x7d, 0xce, 0x67, 0x97, 0x49, 0xd1, 0x05, 0x54, 0xab,
+ 0xda, 0x22, 0x5b, 0xbc, 0x9c, 0x11, 0xc8, 0x0b, 0xe9, 0x6d, 0xb1, 0x8a,
+ 0x17, 0x06, 0x92, 0xed, 0xd4, 0x61, 0x48, 0x01, 0x64, 0x43, 0x65, 0x65,
+ 0xfc, 0x35, 0x9d, 0xbb, 0x68, 0x3f, 0x77, 0xbc, 0x8d, 0xd9, 0x3b, 0x48,
+ 0x77, 0x58, 0x2f, 0x19, 0xfa, 0x73, 0xa6, 0xc3, 0x65, 0x96, 0x6c, 0x9d,
+ 0x99, 0xb8, 0x65, 0x2b, 0x94, 0x11, 0x21, 0xf4, 0x95, 0xa4, 0xcd, 0xf2,
+ 0xbf, 0x65, 0x79, 0x34, 0x4b, 0xf6, 0x5c, 0xeb, 0xca, 0x07, 0x65, 0x4f,
+ 0xae, 0x67, 0xd8, 0xdf, 0xec, 0xc9, 0xd2, 0x26, 0x2e, 0xac, 0xea, 0xa2,
+ 0xbd, 0x0d, 0x79, 0x27, 0x91, 0xf5, 0x84, 0x89, 0xf9, 0x2a, 0xb3, 0x5e,
+ 0x48, 0x4b, 0x2b, 0x89, 0xc0, 0xa5, 0x9f, 0x94, 0x07, 0x82, 0x36, 0x11,
+ 0x65, 0x4d, 0xb0, 0xde, 0xac, 0xde, 0xac, 0xc0, 0x35, 0x7f, 0xf3, 0x9b,
+ 0x01, 0x0c, 0x35, 0x8b, 0xb5, 0x22, 0xb8, 0xea, 0x1c, 0xab, 0xbe, 0x08,
+ 0xd9, 0x23, 0x0a, 0x37, 0x95, 0x36, 0x3d, 0x28, 0xb3, 0x19, 0x34, 0x3a,
+ 0x47, 0xf8, 0x45, 0x33, 0x7a, 0x65, 0xae, 0x80, 0x48, 0x01, 0x20, 0xe8,
+ 0xcd, 0xb7, 0xce, 0xf7, 0xee, 0xd1, 0x50, 0x39, 0xec, 0xa6, 0x8b, 0xa0,
+ 0xb5, 0x56, 0x76, 0x1a, 0xb4, 0x6b, 0x31, 0xcf, 0x32, 0x0f, 0xb1, 0xba,
+ 0xb3, 0xa4, 0xb7, 0x34, 0xfe, 0x86, 0x87, 0xa7, 0x44, 0x70, 0x3b, 0x9e,
+ 0x94, 0xc5, 0x43, 0x82, 0xf1, 0x1a, 0xa1, 0x10, 0x05, 0x7c, 0x04, 0x63,
+ 0x5a, 0xfe, 0xc2, 0xb6, 0x15, 0x07, 0x3f, 0xb0, 0x3c, 0x43, 0x74, 0x33,
+ 0xec, 0xb8, 0xe0, 0xf5, 0x79, 0x48, 0x7c, 0x50, 0x4f, 0x4b, 0xb9, 0x08,
+ 0x33, 0xfd, 0x54, 0xd5, 0x6f, 0xdf, 0xca, 0xfe, 0x38, 0xa1, 0xeb, 0xa9,
+ 0xaf, 0xa5, 0x8f, 0xcf, 0xb3, 0xda, 0x77, 0x3f, 0x63, 0xcb, 0x98, 0x2b,
+ 0x71, 0x56, 0x60, 0xb4, 0x5c, 0x7d, 0x81, 0x85, 0xf3, 0x64, 0x9f, 0xf3,
+ 0xc2, 0xec, 0x2a, 0x27, 0x9b, 0x5e, 0x39, 0x30, 0x10, 0x0d, 0x43, 0xdb,
+ 0x9f, 0x7b, 0x8f, 0xb8, 0x09, 0xe2, 0x55, 0xb3, 0xc4, 0xb1, 0xeb, 0x23,
+ 0xcd, 0x32, 0xde, 0x58, 0xc2, 0x35, 0xda, 0x5c, 0x9a, 0xf8, 0x2d, 0xc6,
+ 0x19, 0x46, 0x64, 0x66, 0x5a, 0xdb, 0x53, 0xc8, 0x14, 0x41, 0xcc, 0x0c,
+ 0x3f, 0xff, 0x3e, 0xbe, 0x29, 0xba, 0x5f, 0x68, 0xa9, 0x31, 0x39, 0x79,
+ 0x2a, 0xfe, 0x14, 0x92, 0x8f, 0x2b, 0x31, 0xf1, 0x0a, 0x25, 0xd8, 0x22,
+ 0xe1, 0xc7, 0xcd, 0xda, 0xea, 0x88, 0xfa, 0x6a, 0xb0, 0x69, 0x77, 0xf6,
+ 0xd6, 0x46, 0xb9, 0xe6, 0x53, 0x09, 0x48, 0x65, 0xbd, 0xe6, 0xf8, 0xc0,
+ 0x04, 0x71, 0x26, 0x21, 0xe8, 0xf9, 0xc1, 0x71, 0x73, 0x6b, 0x3d, 0x73,
+ 0x16, 0x66, 0x38, 0xae, 0x59, 0xb9, 0xe3, 0x34, 0x8f, 0x17, 0x3c, 0x16,
+ 0xaa, 0x3f, 0x61, 0x49, 0xb3, 0x06, 0xcc, 0xb3, 0xcb, 0x7e, 0x42, 0xf1,
+ 0x2a, 0x0e, 0xb2, 0xcb, 0x1d, 0xf0, 0x0f, 0xc9, 0x20, 0xb1, 0x80, 0xce,
+ 0x08, 0xb9, 0xfa, 0xca, 0x3c, 0xd5, 0x67, 0x47, 0x36, 0x17, 0xc1, 0xf7,
+ 0x9d, 0x97, 0x79, 0x75, 0xee, 0xb0, 0xed, 0xfc, 0xd0, 0xdf, 0xc8, 0xa2,
+ 0xc1, 0xae, 0x51, 0x53, 0x88, 0x05, 0x95, 0x73, 0x7e, 0xd9, 0x3b, 0x9d,
+ 0xb0, 0x08, 0x37, 0xff, 0x51, 0x6f, 0xf9, 0xad, 0x60, 0xa5, 0x3a, 0xd6,
+ 0xba, 0xea, 0xf6, 0xea, 0x91, 0x2e, 0x5a, 0xa9, 0xbf, 0xe2, 0x52, 0x46,
+ 0x0c, 0xbd, 0x28, 0x2d, 0xa8, 0x5f, 0xc8, 0x41, 0x31, 0x53, 0x7a, 0x9f,
+ 0xfa, 0x73, 0x06, 0xc5, 0xae, 0x59, 0x8d, 0xe3, 0x0d, 0xfa, 0x99, 0x7f,
+ 0xee, 0xe4, 0x82, 0xd4, 0x36, 0x68, 0x09, 0x92, 0x09, 0xef, 0x70, 0x89,
+ 0xc6, 0xfa, 0xc7, 0x7e, 0x0f, 0x24, 0x8e, 0xad, 0x4e, 0xd9, 0x4c, 0x11,
+ 0xe7, 0x7d, 0x98, 0xf0, 0x80, 0x42, 0x0b, 0x86, 0x8d, 0x8e, 0x85, 0x97,
+ 0xd2, 0x11, 0x0f, 0x04, 0x59, 0xaf, 0xa5, 0xec, 0xda, 0x75, 0x64, 0x51,
+ 0x22, 0x7e, 0x38, 0x4b, 0xca, 0x9e, 0x82, 0x71, 0x72, 0x8d, 0x4c, 0xca,
+ 0xe1, 0x77, 0xe5, 0xe0, 0x9d, 0x64, 0x01, 0x48, 0x49, 0xcd, 0x3b, 0x90,
+ 0xd8, 0x9e, 0x15, 0x22, 0x76, 0xe0, 0x57, 0x06, 0x06, 0xaf, 0x2c, 0x09,
+ 0xce, 0x4c, 0xfa, 0x8b, 0xbf, 0xa1, 0x1b, 0xe3, 0xe7, 0xa5, 0xa0, 0xc0,
+ 0xc8, 0x4c, 0x79, 0x1b, 0xeb, 0x5d, 0xb8, 0x3b, 0x1c, 0x3f, 0xbc, 0x11,
+ 0x8f, 0xa0, 0x08, 0x2b, 0xd3, 0xe3, 0xca, 0xbc, 0x41, 0xc2, 0xa4, 0x4e,
+ 0xdc, 0x0a, 0xe1, 0x06, 0xef, 0x55, 0x13, 0xb3, 0xdd, 0xfd, 0xe2, 0x89,
+ 0x5f, 0xb5, 0xf6, 0xa9, 0xd7, 0xae, 0xc1, 0x14, 0xb6, 0x19, 0xd8, 0x5b,
+ 0x0f, 0x9a, 0xb0, 0xed, 0xc5, 0xc7, 0xa8, 0xa6, 0x08, 0x5a, 0x00, 0xad,
+ 0xf5, 0x9c, 0xb9, 0xd9, 0x45, 0x46, 0xf0, 0x9e, 0x2d, 0x55, 0xc6, 0x08,
+ 0x60, 0x0d, 0x9e, 0xa7, 0x68, 0xb6, 0xf7, 0xf3, 0xa9, 0x84, 0x7e, 0x63,
+ 0xe8, 0x48, 0x03, 0x1c, 0x15, 0x97, 0x94, 0xda, 0x04, 0xb2, 0xd0, 0x09,
+ 0xa5, 0x62, 0x21, 0x70, 0x88, 0x9f, 0xf5, 0x0c, 0x91, 0x0d, 0xbf, 0x69,
+ 0xe1, 0x6b, 0x4f, 0xc2, 0xf2, 0x32, 0xe1, 0x4b, 0xad, 0x58, 0xea, 0x0c,
+ 0x07, 0x13, 0x4a, 0x1b, 0x87, 0x6d, 0x6e, 0x2f, 0xb6, 0xc6, 0x30, 0x1e,
+ 0x2d, 0x1d, 0x5c, 0xdf, 0xd2, 0x5a, 0x88, 0xc8, 0x1c, 0xd9, 0xc3, 0x91,
+ 0x04, 0x45, 0x63, 0x11, 0x44, 0x35, 0x7f, 0x46, 0xf4, 0xd0, 0xd1, 0x73,
+ 0x9c, 0xae, 0x85, 0x5e, 0xda, 0xc7, 0xce, 0xb5, 0xbb, 0x3a, 0xb4, 0x67,
+ 0xa5, 0xad, 0xc6, 0x5e, 0x12, 0xc7, 0xc5, 0x72, 0xfc, 0x35, 0x2e, 0xae,
+ 0x46, 0x81, 0x22, 0x56, 0x6d, 0xc9, 0x36, 0x43, 0x17, 0x6b, 0x4d, 0x81,
+ 0xd6, 0x59, 0x35, 0x90, 0x3a, 0xd2, 0xde, 0x79, 0xbd, 0x21, 0xc4, 0x56,
+ 0xcb, 0x59, 0x3b, 0xe7, 0xb3, 0xab, 0x92, 0xce, 0x65, 0xc7, 0x20, 0xde,
+ 0xde, 0xb1, 0x94, 0xac, 0x1a, 0x23, 0xa4, 0x14, 0x56, 0x32, 0xc0, 0x9f,
+ 0x48, 0x31, 0xa6, 0x95, 0xc4, 0xb8, 0xf3, 0x9c, 0x8d, 0x34, 0x03, 0xc3,
+ 0x62, 0x63, 0x38, 0x15, 0x71, 0x08, 0x5e, 0x1b, 0xc0, 0xf2, 0x54, 0x13,
+ 0x66, 0x01, 0xf1, 0x38, 0xd9, 0x61, 0xf3, 0xdb, 0xd4, 0x83, 0x98, 0x3e,
+ 0xaa, 0xe1, 0xca, 0x2d, 0xfb, 0x6d, 0x02, 0xac, 0xf2, 0xa6, 0x04, 0x09,
+ 0xeb, 0xcb, 0xaf, 0xd5, 0x9d, 0x3d, 0xd7, 0xc2, 0xc1, 0x6f, 0xec, 0x53,
+ 0x65, 0x0e, 0x40, 0x77, 0x03, 0xcd, 0x79, 0x0a, 0x94, 0x27, 0x6b, 0x6f,
+ 0x32, 0xb3, 0xdb, 0x3e, 0x38, 0xe2, 0xd2, 0xca, 0x9b, 0x9e, 0x24, 0xc7,
+ 0x35, 0xfd, 0xc1, 0x86, 0x78, 0xd9, 0xc3, 0xfe, 0x03, 0xb3, 0x3f, 0xc1,
+ 0xf8, 0x09, 0x89, 0xdc, 0x3b, 0x08, 0xae, 0x85, 0xfa, 0x8e, 0x51, 0xbb,
+ 0x6f, 0xf4, 0x73, 0x43, 0xd2, 0xed, 0x6d, 0xfd, 0x2b, 0x23, 0xc3, 0x4f,
+ 0xc4, 0x1d, 0x25, 0xb9, 0x36, 0xc4, 0x98, 0xe6, 0xbf, 0xb8, 0x30, 0xcf,
+ 0x1b, 0x38, 0x7f, 0xc0, 0x76, 0x67, 0xf8, 0x3f, 0x01, 0x31, 0x3b, 0x87,
+ 0x60, 0xf9, 0x90, 0x01, 0x2c, 0x2f, 0xff, 0x6d, 0xfc, 0x8c, 0x3e, 0xeb,
+ 0x7f, 0x96, 0x41, 0x82, 0xfd, 0xc6, 0x93, 0x8d, 0xfa, 0x4e, 0x48, 0x49,
+ 0x33, 0x3a, 0xa3, 0x5e, 0x61, 0xdf, 0x88, 0x73, 0x66, 0x04, 0xf5, 0xe5,
+ 0xd7, 0xea, 0xce, 0x9e, 0xeb, 0xe1, 0x60, 0xb7, 0xf1, 0xcc, 0x0d, 0xc1,
+ 0xc4, 0xa0, 0x22, 0x0d, 0xe5, 0x8c, 0x8e, 0x26, 0xf9, 0x89, 0xa5, 0x02,
+ 0xf6, 0x4c, 0x3f, 0x10, 0x74, 0x96, 0xe4, 0xdb, 0x12, 0x63, 0x9a, 0xfe,
+ 0x70, 0x4e, 0x9a, 0x97, 0xc8, 0xad, 0x5f, 0x39, 0xa0, 0x81, 0x6a, 0xc4,
+ 0x93, 0x50, 0x94, 0x1e, 0x17, 0xe3, 0x3f, 0x6d, 0x91, 0x01, 0xed, 0x49,
+ 0x96, 0xed, 0x01, 0xc2, 0x2a, 0xe1, 0xc9, 0x39, 0x76, 0x1f, 0x87, 0xb6,
+ 0xe3, 0x76, 0xa1, 0xc6, 0x58, 0x1e, 0xdd, 0x2a, 0xdf, 0xbf, 0x82, 0xa3,
+ 0x6d, 0x87, 0x72, 0x2c, 0x7c, 0xdc, 0x3f, 0x2b, 0x6a, 0xf1, 0x9a, 0xe0,
+ 0x0e, 0xc3, 0xdc, 0x18, 0x3f, 0xc4, 0xbe, 0x11, 0x76, 0x54, 0xab, 0xe3,
+ 0xd6, 0x47, 0x90, 0x61, 0x87, 0x66, 0x08, 0x63, 0x95, 0x25, 0x20, 0x43,
+ 0x6e, 0x05, 0x80, 0xad, 0x01, 0x10, 0xc7, 0x6c, 0x04, 0xbe, 0xaf, 0xc5,
+ 0x50, 0xa7, 0x48, 0x4a, 0x47, 0x44, 0x71, 0xc9, 0xa5, 0xdb, 0xa2, 0x2b,
+ 0x12, 0xbc, 0x40, 0x39, 0x31, 0x69, 0x83, 0x03, 0xb9, 0xa0, 0x46, 0xf0,
+ 0xb4, 0x4b, 0x1b, 0x8d, 0xda, 0x87, 0x19, 0x60, 0x7b, 0x74, 0xab, 0x7e,
+ 0xfe, 0x0a, 0x8d, 0xb6, 0x1d, 0xc8, 0xb1, 0xf3, 0x70, 0xfc, 0xad, 0xab,
+ 0xc6, 0x6b, 0x80, 0xc8, 0xbb, 0x74, 0x45, 0x62, 0x57, 0x88, 0x07, 0x26,
+ 0x2d, 0x30, 0x60, 0x77, 0x34, 0x08, 0xde, 0x16, 0x89, 0x63, 0x71, 0xbb,
+ 0x50, 0xe3, 0x2c, 0x0f, 0x6e, 0x95, 0x6f, 0xe0, 0xad, 0x52, 0x17, 0x52,
+ 0x53, 0x83, 0x53, 0xf6, 0x9e, 0x15, 0xb5, 0x78, 0xcd, 0x70, 0x19, 0x17,
+ 0x6e, 0x88, 0xac, 0x4a, 0xf1, 0x00, 0xe4, 0xc5, 0xa6, 0x0c, 0x0e, 0xe6,
+ 0x81, 0x1b, 0xc2, 0xd1, 0x2c, 0x6e, 0x37, 0x6a, 0x1c, 0x65, 0x81, 0xed,
+ 0xd2, 0xad, 0xfc, 0x15, 0xaa, 0x42, 0xea, 0x4a, 0x70, 0x6a, 0x7e, 0xd3,
+ 0xc2, 0xb6, 0xaf, 0x19, 0xae, 0x03, 0x22, 0xed, 0xd1, 0x15, 0x89, 0x5e,
+ 0x20, 0x1c, 0x98, 0xb4, 0xc1, 0x81, 0xdc, 0xd0, 0x23, 0x78, 0x5a, 0x25,
+ 0x8d, 0xc6, 0xed, 0x43, 0x8c, 0xb0, 0x3d, 0xba, 0x55, 0xbf, 0x82, 0xb5,
+ 0x48, 0x5d, 0x49, 0x4e, 0x0d, 0x4f, 0xda, 0x78, 0x56, 0xd5, 0xe3, 0x35,
+ 0xc0, 0x64, 0x5d, 0xba, 0x22, 0xb1, 0x2b, 0xc4, 0x03, 0x93, 0x16, 0x98,
+ 0x30, 0x3b, 0x9a, 0x04, 0x6f, 0x0b, 0x44, 0xb1, 0xb8, 0xdd, 0xa8, 0x71,
+ 0x96, 0x07, 0xb7, 0x4a, 0xb7, 0xf0, 0x56, 0xa9, 0x0b, 0xa9, 0x29, 0xc1,
+ 0xa9, 0xfb, 0x4f, 0x0a, 0xda, 0xbc, 0x66, 0xb8, 0x0c, 0x8b, 0xb7, 0x44,
+ 0x56, 0x25, 0x78, 0x80, 0x72, 0x62, 0xd3, 0x06, 0x07, 0x73, 0x40, 0x8d,
+ 0xe1, 0x68, 0x96, 0x37, 0x1b, 0xb5, 0x0e, 0x32, 0xc0, 0xf6, 0xe9, 0x56,
+ 0xfe, 0x0a, 0xd5, 0x21, 0x75, 0x25, 0x38, 0x35, 0x3f, 0x69, 0xe1, 0x5b,
+ 0x57, 0x8c, 0xd7, 0x01, 0x91, 0x76, 0xe8, 0x8a, 0xc4, 0xaf, 0x10, 0x0e,
+ 0x4c, 0x5a, 0x60, 0xc0, 0xee, 0x68, 0x11, 0xbc, 0x2d, 0x12, 0xc6, 0xe3,
+ 0x76, 0xa1, 0xc6, 0x58, 0x1e, 0xdd, 0x2a, 0xdf, 0xc1, 0x5a, 0xa4, 0x2e,
+ 0xa4, 0xa7, 0x06, 0xa7, 0xed, 0x3c, 0x2b, 0x6a, 0xf1, 0x9a, 0xe0, 0x32,
+ 0x2e, 0xdd, 0x11, 0x58, 0x95, 0xe2, 0x01, 0xc9, 0x8b, 0x4c, 0x18, 0x1d,
+ 0xcd, 0x02, 0x37, 0x85, 0xa2, 0x58, 0xdc, 0x6e, 0xd4, 0x38, 0xcb, 0x03,
+ 0xdb, 0xa5, 0x5b, 0xf8, 0x2b, 0x54, 0x85, 0xd4, 0x94, 0xe0, 0xd4, 0xfd,
+ 0xa7, 0x85, 0x6d, 0x5e, 0x33, 0x5c, 0x06, 0x45, 0xdb, 0xa2, 0x2b, 0x12,
+ 0xbc, 0x40, 0x39, 0x31, 0x69, 0x83, 0x03, 0xb9, 0xa0, 0x46, 0xf0, 0xb4,
+ 0x4b, 0x1b, 0x8d, 0xda, 0x87, 0x19, 0x60, 0x7b, 0x74, 0xab, 0x7f, 0x05,
+ 0x6a, 0x90, 0xba, 0x92, 0x6a, 0x83, 0x68, 0x3d, 0x27, 0xd3, 0x43, 0x45,
+ 0xee, 0xc5, 0xfd, 0xe1, 0xb0, 0x60, 0xff, 0x12, 0xf8, 0x45, 0xd9, 0x47,
+ 0x09, 0x7b, 0x5c, 0x67, 0x66, 0x36, 0x0f, 0xc3, 0xdb, 0x71, 0xbb, 0x50,
+ 0xe3, 0x2c, 0x0f, 0x6e, 0x95, 0x6f, 0xe0, 0xad, 0x52, 0x17, 0x52, 0x4d,
+ 0x50, 0x6d, 0x07, 0xa4, 0xfa, 0x68, 0x68, 0xbd, 0xd8, 0xbf, 0xbc, 0x36,
+ 0x0c, 0x1f, 0xe2, 0x5f, 0x08, 0xbb, 0x28, 0xe1, 0x2f, 0x6b, 0x8c, 0xec,
+ 0xc6, 0xc1, 0xf8, 0x7b, 0x6e, 0x37, 0x6a, 0x1c, 0x65, 0x81, 0xed, 0xd2,
+ 0xad, 0xfc, 0x15, 0xaa, 0x42, 0xea, 0x49, 0xaa, 0x0d, 0xa0, 0xf4, 0x9f,
+ 0x4d, 0x0d, 0x17, 0xbb, 0x17, 0xf7, 0x86, 0xc1, 0x83, 0xfc, 0x4b, 0xe1,
+ 0x17, 0x65, 0x1c, 0x25, 0xed, 0x71, 0x9d, 0x98, 0xd8, 0x3f, 0x0f, 0x6d,
+ 0xc6, 0xed, 0x43, 0x8c, 0xb0, 0x3d, 0xba, 0x55, 0xbf, 0x82, 0xb5, 0x48,
+ 0x5d, 0x49, 0x35, 0x41, 0xb4, 0x1e, 0x93, 0xe9, 0xa1, 0xa2, 0xf7, 0x62,
+ 0xfe, 0xf0, 0xd8, 0x30, 0x7f, 0x89, 0x7c, 0x22, 0xec, 0xa3, 0x84, 0xbd,
+ 0xae, 0x33, 0xb3, 0x1b, 0x07, 0xe1, 0xed, 0xb8, 0xdd, 0xa8, 0x71, 0x96,
+ 0x07, 0xb7, 0x4a, 0xb7, 0xf0, 0x56, 0xa9, 0x0b, 0xa9, 0x26, 0xa8, 0x36,
+ 0x83, 0xd2, 0x7d, 0x34, 0x34, 0x5e, 0xec, 0x5f, 0xde, 0x1b, 0x06, 0x0f,
+ 0xf1, 0x2f, 0x84, 0x5d, 0x94, 0x70, 0x97, 0xb5, 0xc6, 0x76, 0x63, 0x60,
+ 0xfc, 0x3d, 0xb7, 0x1b, 0xb5, 0x0e, 0x32, 0xc0, 0xf6, 0xe9, 0x56, 0xfe,
+ 0x0a, 0xd5, 0x21, 0x75, 0x24, 0xd5, 0x06, 0xd0, 0x7a, 0x4f, 0xa6, 0x86,
+ 0x8b, 0xdd, 0x8b, 0xfb, 0xc3, 0x60, 0xc1, 0xfe, 0x25, 0xf0, 0x8b, 0xb2,
+ 0x8e, 0x12, 0xf6, 0xb8, 0xce, 0xcc, 0x6c, 0x1f, 0x87, 0xb6, 0xe3, 0x76,
+ 0xa1, 0xc6, 0x58, 0x1e, 0xdd, 0x2a, 0xdf, 0xc1, 0x5a, 0xa4, 0x2e, 0xa4,
+ 0x9a, 0xa0, 0xda, 0x0f, 0x49, 0xf4, 0xd0, 0xd1, 0x7b, 0xb1, 0x7f, 0x78,
+ 0x6c, 0x18, 0x3f, 0xc4, 0xbe, 0x11, 0x76, 0x51, 0xc2, 0x5e, 0xd7, 0x19,
+ 0xd9, 0x8d, 0x83, 0xf0, 0xf6, 0xdc, 0x6e, 0xd4, 0x38, 0xcb, 0x03, 0xdb,
+ 0xa5, 0x5b, 0xf8, 0x2b, 0x54, 0x85, 0xd4, 0x93, 0x54, 0x1b, 0x41, 0xe9,
+ 0x3e, 0x9a, 0x1a, 0x2f, 0x76, 0x2f, 0xef, 0x0d, 0x83, 0x07, 0xf8, 0x97,
+ 0xc2, 0x2e, 0xca, 0x38, 0x4b, 0xda, 0xe3, 0x3b, 0x31, 0xb0, 0x7e, 0x1e,
+ 0xdb, 0x8d, 0xda, 0x87, 0x19, 0x60, 0x7b, 0x74, 0xab, 0x7f, 0x05, 0x6a,
+ 0x90, 0xba, 0x92, 0x6a, 0x83, 0x68, 0x3d, 0x27, 0xd3, 0x43, 0x45, 0xee,
+ 0xc5, 0xfd, 0xe1, 0xb0, 0x60, 0xff, 0x12, 0xf8, 0x45, 0xd9, 0x47, 0x09,
+ 0x7b, 0x5c, 0x67, 0x66, 0x36, 0x0f, 0xc3, 0xdb, 0x71, 0xbb, 0x50, 0xe3,
+ 0x2c, 0x0f, 0x6e, 0x95, 0x6f, 0xe0, 0xad, 0x52, 0x17, 0x52, 0x4d, 0x50,
+ 0x6d, 0x07, 0xa4, 0xfa, 0x68, 0x68, 0xbd, 0xd8, 0xbf, 0xbc, 0x36, 0x0c,
+ 0x1f, 0xe2, 0x5f, 0x08, 0xbb, 0x28, 0xe1, 0x2f, 0x6b, 0x8c, 0xec, 0xc6,
+ 0xc1, 0xf8, 0x7b, 0x6e, 0x37, 0x6a, 0x1c, 0x65, 0x81, 0xed, 0xd2, 0xad,
+ 0xfc, 0x15, 0xaa, 0x42, 0xea, 0x49, 0xaa, 0x0d, 0xa0, 0xf4, 0x9f, 0x4d,
+ 0x0d, 0x17, 0xbb, 0x17, 0xf7, 0x86, 0xc1, 0x83, 0xfc, 0x4b, 0xe1, 0x17,
+ 0x65, 0x1c, 0x25, 0xed, 0x71, 0x9d, 0x98, 0xd8, 0x3f, 0x0f, 0x6d, 0xc6,
+ 0xed, 0x43, 0x8c, 0xb0, 0x3d, 0xba, 0x55, 0xbf, 0x82, 0xb5, 0x48, 0x5d,
+ 0x49, 0x35, 0x41, 0xb4, 0x1e, 0x93, 0xe9, 0xa1, 0xa2, 0xf7, 0x62, 0xfe,
+ 0xf0, 0xd8, 0x30, 0x7f, 0x89, 0x7c, 0x22, 0xec, 0xa3, 0x84, 0xbd, 0xae,
+ 0x33, 0xb3, 0x1b, 0x07, 0xe1, 0xed, 0xb8, 0xdd, 0xa8, 0x71, 0x96, 0x07,
+ 0xb7, 0x4a, 0xb7, 0xf0, 0x56, 0xa9, 0x0b, 0xa9, 0x26, 0xa8, 0x36, 0x83,
+ 0xd2, 0x7d, 0x34, 0x34, 0x5e, 0xec, 0x5f, 0xde, 0x1b, 0x06, 0x0f, 0xf1,
+ 0x2f, 0x84, 0x5d, 0x94, 0x70, 0x97, 0xb5, 0xc6, 0x76, 0x63, 0x60, 0xfc,
+ 0x3d, 0xb7, 0x1b, 0xb5, 0x0e, 0x32, 0xc0, 0xf6, 0xe9, 0x56, 0xfe, 0x0a,
+ 0xd5, 0x21, 0x75, 0x24, 0xd5, 0x06, 0xd0, 0x7a, 0x4f, 0xa6, 0x86, 0x8b,
+ 0xdd, 0x8b, 0xfb, 0xc3, 0x60, 0xc1, 0xfe, 0x25, 0xf0, 0x8b, 0xb2, 0x8e,
+ 0x12, 0xf6, 0xb8, 0xce, 0xcc, 0x6c, 0x1f, 0x87, 0xb6, 0xe3, 0x76, 0xa1,
+ 0xc6, 0x58, 0x1e, 0xdd, 0x2a, 0xdf, 0xc1, 0x5a, 0xa4, 0x2e, 0xa4, 0x9a,
+ 0xa0, 0xda, 0x0f, 0x49, 0xf4, 0xd0, 0xd1, 0x7b, 0xb1, 0x7f, 0x78, 0x6c,
+ 0x18, 0x3f, 0xc4, 0xbe, 0x11, 0x76, 0x51, 0xc2, 0x5e, 0xd7, 0x19, 0xd9,
+ 0x8d, 0x83, 0xf0, 0xf6, 0xdc, 0x6e, 0xd4, 0x38, 0xcb, 0x03, 0xdb, 0xa5,
+ 0x5b, 0xf8, 0x2b, 0x54, 0x85, 0xd4, 0x93, 0x54, 0x1b, 0x41, 0xe9, 0x3e,
+ 0x9a, 0x1a, 0x2f, 0x76, 0x2f, 0xef, 0x0d, 0x83, 0x07, 0xf8, 0x97, 0xc2,
+ 0x2e, 0xca, 0x38, 0x4b, 0xda, 0xe3, 0x3b, 0x31, 0xb0, 0x7e, 0x1e, 0xdb,
+ 0x8d, 0xda, 0x87, 0x19, 0x60, 0x7b, 0x74, 0xab, 0x7f, 0x05, 0x6a, 0x90,
+ 0xba, 0x92, 0x6a, 0x83, 0x68, 0x3d, 0x27, 0xd3, 0x43, 0x45, 0xee, 0xc5,
+ 0xfd, 0xe1, 0xb0, 0x60, 0xff, 0x12, 0xf8, 0x45, 0xd9, 0x47, 0x09, 0x7b,
+ 0x5c, 0x67, 0x66, 0x36, 0x0f, 0xc3, 0xdb, 0x71, 0xbb, 0x50, 0xe3, 0x2c,
+ 0x0f, 0x6e, 0x95, 0x6f, 0xe0, 0xad, 0x52, 0x17, 0x52, 0x4d, 0x50, 0x6d,
+ 0x07, 0xa4, 0xfa, 0x68, 0x68, 0xbd, 0xd8, 0xbf, 0xbc, 0x36, 0x0c, 0x1f,
+ 0xe2, 0x5f, 0x08, 0xbb, 0x28, 0xe1, 0x2f, 0x6b, 0x8c, 0xec, 0xc6, 0xc1,
+ 0xf8, 0x7b, 0x6e, 0x37, 0x6a, 0x1c, 0x65, 0x81, 0xed, 0xd2, 0xad, 0xfc,
+ 0x15, 0xaa, 0x42, 0xea, 0x49, 0xaa, 0x0d, 0xa0, 0xf4, 0x9f, 0x4d, 0x0d,
+ 0x17, 0xbb, 0x17, 0xf7, 0x86, 0xc1, 0x83, 0xfc, 0x4b, 0xe1, 0x17, 0x65,
+ 0x1c, 0x25, 0xed, 0x71, 0x9d, 0x98, 0xd8, 0x3f, 0x0f, 0x6d, 0xc6, 0xed,
+ 0x43, 0x8c, 0xb0, 0x3d, 0xba, 0x55, 0xbf, 0x82, 0xb5, 0x48, 0x5d, 0x49,
+ 0x35, 0x41, 0xb4, 0x1e, 0x93, 0xe9, 0xa1, 0xa2, 0xf7, 0x62, 0xfe, 0xf0,
+ 0xd8, 0x30, 0x7f, 0x89, 0x7c, 0x22, 0xec, 0xa3, 0x84, 0xbd, 0xae, 0x33,
+ 0xb3, 0x1b, 0x07, 0xe1, 0xed, 0xb8, 0xdd, 0xa8, 0x71, 0x96, 0x07, 0xb7,
+ 0x4a, 0xb7, 0xf0, 0x56, 0xa9, 0x0b, 0xa9, 0x26, 0xa8, 0x36, 0x83, 0xd2,
+ 0x7d, 0x34, 0x34, 0x5e, 0xec, 0x5f, 0xde, 0x1b, 0x06, 0x0f, 0xf1, 0x2f,
+ 0x84, 0x5d, 0x94, 0x70, 0x97, 0xb5, 0xc6, 0x76, 0x63, 0x60, 0xfc, 0x3d,
+ 0xb7, 0x1b, 0xb5, 0x0e, 0x32, 0xc0, 0xf6, 0xe9, 0x56, 0xfe, 0x0a, 0xd5,
+ 0x21, 0x75, 0x24, 0xd5, 0x06, 0xd0, 0x7a, 0x4f, 0xa6, 0x86, 0x8b, 0xdd,
+ 0x8b, 0xfb, 0xc3, 0x60, 0xc1, 0xfe, 0x25, 0xf0, 0x8b, 0xb2, 0x8e, 0x12,
+ 0xf6, 0xb8, 0xce, 0xcc, 0x6c, 0x1f, 0x87, 0xb6, 0xe3, 0x76, 0xa1, 0xc6,
+ 0x58, 0x1e, 0xdd, 0x2a, 0xdf, 0xc1, 0x5a, 0xa4, 0x2e, 0xa4, 0x9a, 0xa0,
+ 0xda, 0x0f, 0x49, 0xf4, 0xd0, 0xd1, 0x7b, 0xb1, 0x7f, 0x78, 0x6c, 0x18,
+ 0x3f, 0xc4, 0xbe, 0x11, 0x76, 0x51, 0xc2, 0x5e, 0xd7, 0x19, 0xd9, 0x8d,
+ 0x83, 0xf0, 0xf6, 0xdc, 0x6e, 0xd4, 0x38, 0xcb, 0x03, 0xdb, 0xa5, 0x5b,
+ 0xf8, 0x2b, 0x54, 0x85, 0xd4, 0x93, 0x54, 0x1b, 0x41, 0xe9, 0x3e, 0x9a,
+ 0x1a, 0x2f, 0x76, 0x2f, 0xef, 0x0d, 0x83, 0x07, 0xf8, 0x97, 0xc2, 0x2e,
+ 0xca, 0x38, 0x4b, 0xda, 0xe3, 0x3b, 0x31, 0xb0, 0x7e, 0x1e, 0xdb, 0x8d,
+ 0xda, 0x87, 0x19, 0x60, 0x7b, 0x74, 0xab, 0x7f, 0x05, 0x6a, 0x90, 0xba,
+ 0x92, 0x6a, 0x83, 0x68, 0x3d, 0x27, 0xd3, 0x43, 0x45, 0xee, 0xc5, 0xfd,
+ 0xe1, 0xb0, 0x60, 0xff, 0x12, 0xf8, 0x45, 0xd9, 0x47, 0x09, 0x7b, 0x5c,
+ 0x67, 0x66, 0x36, 0x0f, 0xc3, 0xdb, 0x71, 0xbb, 0x50, 0xe3, 0x2c, 0x0f,
+ 0x6e, 0x95, 0x6f, 0xe0, 0xad, 0x52, 0x17, 0x52, 0x4d, 0x50, 0x6d, 0x07,
+ 0xa4, 0xfa, 0x68, 0x68, 0xbd, 0xd8, 0xbf, 0xbc, 0x36, 0x0c, 0x1f, 0xe2,
+ 0x5f, 0x08, 0xbb, 0x28, 0xe1, 0x2f, 0x6b, 0x8c, 0xec, 0xc6, 0xc1, 0xf8,
+ 0x7b, 0x6e, 0x37, 0x6a, 0x1c, 0x65, 0x81, 0xed, 0xd2, 0xad, 0xfc, 0x15,
+ 0xaa, 0x42, 0xea, 0x49, 0xaa, 0x0d, 0xa0, 0xf4, 0x9f, 0x4d, 0x0d, 0x17,
+ 0xbb, 0x17, 0xf7, 0x86, 0xc1, 0x83, 0xfc, 0x4b, 0xe1, 0x17, 0x65, 0x1c,
+ 0x25, 0xed, 0x71, 0x9d, 0x98, 0xd8, 0x3f, 0x0f, 0x6d, 0xc6, 0xed, 0x43,
+ 0x8c, 0xb0, 0x3d, 0xba, 0x55, 0xbf, 0x82, 0xb5, 0x48, 0x5d, 0x49, 0x35,
+ 0x41, 0xb4, 0x1e, 0x93, 0xe9, 0xa1, 0xa2, 0xf7, 0x62, 0xfe, 0xf0, 0xd8,
+ 0x30, 0x7f, 0x89, 0x7c, 0x22, 0xec, 0xa3, 0x84, 0xbd, 0xae, 0x33, 0xb3,
+ 0x1b, 0x07, 0xe1, 0xed, 0xb8, 0xdd, 0xa8, 0x71, 0x96, 0x07, 0xb7, 0x4a,
+ 0xb7, 0xf0, 0x56, 0xa9, 0x0b, 0xa9, 0x26, 0xa8, 0x36, 0x83, 0xd2, 0x7d,
+ 0x34, 0x34, 0x5e, 0xec, 0x5f, 0xde, 0x1b, 0x06, 0x0f, 0xf1, 0x2f, 0x84,
+ 0x5d, 0x94, 0x70, 0x97, 0xb5, 0xc6, 0x76, 0x63, 0x60, 0xfc, 0x3d, 0xb7,
+ 0x1b, 0xb5, 0x0e, 0x32, 0xc0, 0xf6, 0xe9, 0x56, 0xfe, 0x0a, 0xd5, 0x21,
+ 0x75, 0x24, 0xd5, 0x06, 0xd0, 0x7a, 0x4f, 0xa6, 0x86, 0x8b, 0xdd, 0x8b,
+ 0xfb, 0xc3, 0x60, 0xc1, 0xfe, 0x25, 0xf0, 0x8b, 0xb2, 0x8e, 0x12, 0xf6,
+ 0xb8, 0xce, 0xcc, 0x6c, 0x1f, 0x87, 0xb6, 0xe3, 0x76, 0xa1, 0xc6, 0x58,
+ 0x1e, 0xdd, 0x2a, 0xdf, 0xc1, 0x5a, 0xa4, 0x2e, 0xa4, 0x9a, 0xa0, 0xda,
+ 0x0f, 0x49, 0xf4, 0xd0, 0xd1, 0x7b, 0xb1, 0x7f, 0x78, 0x6c, 0x18, 0x3f,
+ 0xc4, 0xbe, 0x11, 0x76, 0x51, 0xc2, 0x5e, 0xd7, 0x19, 0xd9, 0x8d, 0x83,
+ 0xf0, 0xf6, 0xdc, 0x6e, 0xd4, 0x38, 0xcb, 0x03, 0xdb, 0xa5, 0x5b, 0xf8,
+ 0x2b, 0x54, 0x85, 0xd4, 0x93, 0x54, 0x1b, 0x41, 0xe9, 0x3e, 0x9a, 0x1a,
+ 0x2f, 0x76, 0x2f, 0xef, 0x0d, 0x83, 0x07, 0xf8, 0x97, 0xc2, 0x2e, 0xca,
+ 0x38, 0x4b, 0xda, 0xe3, 0x3b, 0x31, 0xb0, 0x7e, 0x1e, 0xdb, 0x8d, 0xda,
+ 0x87, 0x19, 0x60, 0x7b, 0x74, 0xab, 0x7f, 0x05, 0x6a, 0x90, 0xba, 0x92,
+ 0x6a, 0x83, 0x68, 0x3d, 0x27, 0xd3, 0x43, 0x45, 0xee, 0xc5, 0xfd, 0xe1,
+ 0xb0, 0x60, 0xff, 0x12, 0xf8, 0x45, 0xd9, 0x47, 0x09, 0x7b, 0x5c, 0x67,
+ 0x66, 0x36, 0x0f, 0xc3, 0xdb, 0x71, 0xbb, 0x50, 0xe3, 0x2c, 0x0f, 0x6e,
+ 0x95, 0x6f, 0xe0, 0xad, 0x52, 0x17, 0x52, 0x4d, 0x50, 0x6d, 0x07, 0xa4,
+ 0xfa, 0x68, 0x68, 0xbd, 0xd8, 0xbf, 0xbc, 0x36, 0x0c, 0x1f, 0xe2, 0x5f,
+ 0x08, 0xbb, 0x28, 0xe1, 0x2f, 0x6b, 0x8c, 0xec, 0xc6, 0xc1, 0xf8, 0x7b,
+ 0x6e, 0x37, 0x6a, 0x1c, 0x65, 0x81, 0xed, 0xd2, 0xad, 0xfc, 0x15, 0xaa,
+ 0x42, 0xea, 0x49, 0xaa, 0x0d, 0xa0, 0xf4, 0x9f, 0x4d, 0x0d, 0x17, 0xbb,
+ 0x17, 0xf7, 0x86, 0xc1, 0x83, 0xfc, 0x4b, 0xe1, 0x17, 0x65, 0x1c, 0x25,
+ 0xed, 0x71, 0x9d, 0x98, 0xd8, 0x3f, 0x0f, 0x6d, 0xc6, 0xed, 0x43, 0x8c,
+ 0xb0, 0x3d, 0xba, 0x55, 0xbf, 0x82, 0xb5, 0x48, 0x5d, 0x49, 0x35, 0x41,
+ 0xb4, 0x1e, 0x93, 0xe9, 0xa1, 0xa2, 0xf7, 0x62, 0xfe, 0xf0, 0xd8, 0x30,
+ 0x7f, 0x89, 0x7c, 0x22, 0xec, 0xa3, 0x84, 0xbd, 0xae, 0x33, 0xb3, 0x1b,
+ 0x07, 0xe1, 0xed, 0xb8, 0xdd, 0xa8, 0x71, 0x96, 0x07, 0xb7, 0x4a, 0xb7,
+ 0xf0, 0x56, 0xa9, 0x0b, 0xa9, 0x26, 0xa8, 0x36, 0x83, 0xd2, 0x7d, 0x34,
+ 0x34, 0x5e, 0xec, 0x5f, 0xde, 0x1b, 0x06, 0x0f, 0xf1, 0x2f, 0x84, 0x5d,
+ 0x94, 0x70, 0x97, 0xb5, 0xc6, 0x76, 0x63, 0x60, 0xfc, 0x3d, 0xb7, 0x1b,
+ 0xb5, 0x0e, 0x32, 0xc0, 0xf6, 0xe9, 0x56, 0xfe, 0x0a, 0xd5, 0x21, 0x75,
+ 0x24, 0xd5, 0x06, 0xd0, 0x7a, 0x4f, 0xa6, 0x86, 0x8b, 0xdd, 0x8b, 0xfb,
+ 0xc3, 0x60, 0xc1, 0xfe, 0x25, 0xf0, 0x8b, 0xb2, 0x8e, 0x12, 0xf6, 0xb8,
+ 0xce, 0xcc, 0x6c, 0x1f, 0x87, 0xb6, 0xe3, 0x76, 0xa1, 0xc6, 0x58, 0x1e,
+ 0xdd, 0x2a, 0xdf, 0xc1, 0x5a, 0xa4, 0x2e, 0xa4, 0x9a, 0xa0, 0xda, 0x0f,
+ 0x49, 0xf4, 0xd0, 0xd1, 0x7b, 0xb1, 0x7f, 0x78, 0x6c, 0x18, 0x3f, 0xc4,
+ 0xbe, 0x11, 0x76, 0x51, 0xc2, 0x5e, 0xd7, 0x19, 0xd9, 0x8d, 0x83, 0xf0,
+ 0xf6, 0xdc, 0x6e, 0xd4, 0x38, 0xcb, 0x03, 0xdb, 0xa5, 0x5b, 0xf8, 0x2b,
+ 0x54, 0x85, 0xd4, 0x93, 0x54, 0x1b, 0x41, 0xe9, 0x3e, 0x9a, 0x1a, 0x2f,
+ 0x76, 0x2f, 0xef, 0x0d, 0x83, 0x07, 0xf8, 0x97, 0xc2, 0x2e, 0xca, 0x38,
+ 0x4b, 0xda, 0xe3, 0x3b, 0x31, 0xb0, 0x7e, 0x1e, 0xdb, 0x8d, 0xda, 0x87,
+ 0x19, 0x60, 0x7b, 0x74, 0xab, 0x7f, 0x05, 0x6a, 0x90, 0xba, 0x92, 0x6a,
+ 0x83, 0x68, 0x3d, 0x27, 0xd3, 0x43, 0x45, 0xee, 0xc5, 0xfd, 0xe1, 0xb0,
+ 0x60, 0xff, 0x12, 0xf8, 0x45, 0xd9, 0x47, 0x09, 0x7b, 0x5c, 0x67, 0x66,
+ 0x36, 0x0f, 0xc3, 0xdb, 0x71, 0xbb, 0x50, 0xe3, 0x2c, 0x0f, 0x6e, 0x95,
+ 0x6f, 0xe0, 0xad, 0x52, 0x17, 0x52, 0x4d, 0x50, 0x6d, 0x07, 0xa4, 0xfa,
+ 0x68, 0x68, 0xbd, 0xd8, 0xbf, 0xbc, 0x36, 0x0c, 0x1f, 0xe2, 0x5f, 0x08,
+ 0xbb, 0x28, 0xe1, 0x2f, 0x6b, 0x8c, 0xec, 0xc6, 0xc1, 0xf8, 0x7b, 0x6e,
+ 0x37, 0x6a, 0x1c, 0x65, 0x81, 0xed, 0xd2, 0xad, 0xfc, 0x15, 0xaa, 0x42,
+ 0xea, 0x49, 0xaa, 0x0d, 0xa0, 0xf4, 0x9f, 0x4d, 0x0d, 0x17, 0xbb, 0x17,
+ 0xf7, 0x86, 0xc1, 0x83, 0xfc, 0x4b, 0xe1, 0x17, 0x65, 0x1c, 0x25, 0xed,
+ 0x71, 0x9d, 0x98, 0xd8, 0x3f, 0x0f, 0x6d, 0xc6, 0xed, 0x43, 0x8c, 0xb0,
+ 0x3d, 0xba, 0x55, 0xbf, 0x82, 0xb5, 0x48, 0x5d, 0x49, 0x35, 0x41, 0xb4,
+ 0x1e, 0x93, 0xe9, 0xa1, 0xa2, 0xf7, 0x62, 0xfe, 0xf0, 0xd8, 0x30, 0x7f,
+ 0x89, 0x7c, 0x22, 0xec, 0xa3, 0x84, 0xbd, 0xae, 0x33, 0xb3, 0x1b, 0x07,
+ 0xe1, 0xed, 0xb8, 0xdd, 0xa8, 0x71, 0x96, 0x07, 0xb7, 0x4a, 0xb7, 0xf0,
+ 0x56, 0xa9, 0x0b, 0xa9, 0x26, 0xa8, 0x36, 0x83, 0xd2, 0x7d, 0x34, 0x34,
+ 0x5e, 0xec, 0x5f, 0xde, 0x1b, 0x06, 0x0f, 0xf1, 0x2f, 0x84, 0x5d, 0x94,
+ 0x70, 0x97, 0xb5, 0xc6, 0x7c,
+};
+static_assert(sizeof(kBytesTestReadSymbol12) == kNumBytesTestReadSymbol12, "");
+
+// The kBytesTestReadSymbol13[] array was encoded by using the following libaom
+// code:
+//
+// aom_cdf_prob cdf[4][14] = {
+// // pmf: 1/13, 1/13, 1/13, 1/13, 1/13, 1/13, 1/13, 1/13, 1/13, 1/13, 1/13,
+// // 1/13, 1/13
+// { 32768 - 2521, 32768 - 5041, 32768 - 7562, 32768 - 10082, 32768 - 12603,
+// 32768 - 15124, 32768 - 17644, 32768 - 20165, 32768 - 22686,
+// 32768 - 25206, 32768 - 27727, 32768 - 30247, 0, 0 },
+// // pmf: 3/26, 2/26, 2/26, 2/26, 2/26, 2/26, 2/26, 2/26, 2/26, 2/26, 2/26,
+// // 2/26, 1/26
+// { 32768 - 3781, 32768 - 6302, 32768 - 8822, 32768 - 11343, 32768 - 13863,
+// 32768 - 16384, 32768 - 18905, 32768 - 21425, 32768 - 23946,
+// 32768 - 26466, 32768 - 28987, 32768 - 31508, 0, 0 },
+// // pmf: 1/26, 2/26, 2/26, 2/26, 2/26, 2/26, 2/26, 2/26, 2/26, 2/26, 2/26,
+// // 2/26, 3/26
+// { 32768 - 1260, 32768 - 3781, 32768 - 6302, 32768 - 8822, 32768 - 11343,
+// 32768 - 13863, 32768 - 16384, 32768 - 18905, 32768 - 21425,
+// 32768 - 23946, 32768 - 26466, 32768 - 28987, 0, 0 },
+// // pmf: 1/26, 2/26, 2/26, 2/26, 2/26, 2/26, 4/26, 2/26, 2/26, 2/26, 2/26,
+// // 2/26, 1/26
+// { 32768 - 1260, 32768 - 3781, 32768 - 6302, 32768 - 8822, 32768 - 11343,
+// 32768 - 13863, 32768 - 18905, 32768 - 21425, 32768 - 23946,
+// 32768 - 26466, 32768 - 28987, 32768 - 31508, 0, 0 },
+// };
+// constexpr int kSymbols[26][4] = { { 0, 6, 12, 5 }, //
+// { 1, 7, 11, 4 }, //
+// { 2, 8, 10, 3 }, //
+// { 3, 9, 9, 2 }, //
+// { 4, 10, 8, 1 }, //
+// { 5, 11, 7, 0 }, //
+// { 6, 12, 6, 12 }, //
+// { 7, 0, 5, 11 }, //
+// { 8, 1, 4, 10 }, //
+// { 9, 2, 3, 9 }, //
+// { 10, 3, 2, 8 }, //
+// { 11, 4, 1, 7 }, //
+// { 12, 5, 0, 6 }, //
+// { 0, 0, 12, 11 }, //
+// { 2, 1, 10, 9 }, //
+// { 4, 3, 8, 7 }, //
+// { 6, 5, 6, 5 }, //
+// { 8, 7, 4, 3 }, //
+// { 10, 9, 2, 1 }, //
+// { 12, 11, 12, 10 }, //
+// { 1, 0, 11, 8 }, //
+// { 3, 2, 9, 6 }, //
+// { 5, 4, 7, 4 }, //
+// { 7, 6, 5, 2 }, //
+// { 9, 8, 3, 6 }, //
+// { 11, 10, 1, 6 } };
+// const unsigned int kBufferSize = 65536;
+// uint8_t bw_buffer[kBufferSize];
+// aom_writer bw;
+// bw.allow_update_cdf = 1;
+// aom_start_encode(&bw, bw_buffer);
+// for (int i = 0; i < 64; ++i) {
+// for (int j = 0; j < 26; ++j) {
+// for (int k = 0; k < 4; ++k) {
+// aom_write_symbol(&bw, kSymbols[j][k], cdf[k], 13);
+// }
+// }
+// }
+// aom_stop_encode(&bw);
+// printf("constexpr size_t kNumBytes = %u;\n", bw.pos);
+// printf("constexpr uint8_t kBytes[] = {");
+// int count = 0;
+// for (unsigned int i = 0; i < bw.pos; ++i) {
+// if (count++ % 12 == 0) {
+// printf("\n ");
+// } else {
+// printf(" ");
+// }
+// printf("0x%02x,", bw_buffer[i]);
+// }
+// printf("\n};\n");
+
+constexpr size_t kNumBytesTestReadSymbol13 = 3110;
+constexpr uint8_t kBytesTestReadSymbol13[] = {
+ 0x0b, 0x38, 0xa7, 0x3e, 0xde, 0x47, 0x2e, 0xe6, 0x9e, 0xe0, 0xa8, 0xc4,
+ 0x77, 0xda, 0x41, 0x64, 0x49, 0x60, 0xc4, 0x26, 0x68, 0xac, 0xf4, 0xa6,
+ 0x8c, 0x6e, 0xa6, 0xd3, 0xd9, 0x4b, 0xb9, 0x35, 0xb6, 0x53, 0x6c, 0x73,
+ 0x13, 0xd7, 0xfb, 0xbf, 0x96, 0xac, 0xea, 0x86, 0xb5, 0x24, 0x14, 0x2a,
+ 0x5a, 0x41, 0x38, 0xab, 0xfb, 0x92, 0x74, 0xf4, 0x0f, 0x24, 0xde, 0x2d,
+ 0x2d, 0x12, 0xd7, 0xb8, 0x2f, 0x4a, 0x4c, 0xd6, 0xc0, 0x4b, 0x01, 0x98,
+ 0xca, 0x7e, 0xde, 0x03, 0x75, 0x27, 0x59, 0x4f, 0x32, 0x54, 0xa5, 0xb5,
+ 0x79, 0xc3, 0xc4, 0x3c, 0x76, 0xa3, 0x2f, 0xaf, 0x2f, 0x0a, 0x84, 0xb5,
+ 0x60, 0xf5, 0x73, 0x88, 0xc0, 0x24, 0x1c, 0xfb, 0xff, 0x90, 0xb6, 0x05,
+ 0xe9, 0x43, 0x90, 0xc8, 0xd3, 0xfd, 0x3f, 0xc2, 0x0b, 0xb5, 0xfe, 0x12,
+ 0x55, 0x23, 0xa1, 0xf4, 0xba, 0xc7, 0x1f, 0xc3, 0xe5, 0xe3, 0x76, 0x68,
+ 0x3c, 0x57, 0xb9, 0x92, 0xea, 0x25, 0x93, 0x4e, 0x72, 0xff, 0x63, 0x28,
+ 0x0c, 0x90, 0x1d, 0xb6, 0x42, 0xb2, 0x25, 0x79, 0x8e, 0xee, 0x0c, 0x56,
+ 0x3d, 0x94, 0x3d, 0x80, 0xf2, 0x25, 0x6f, 0xd4, 0x93, 0x31, 0x18, 0x80,
+ 0x5a, 0x3a, 0xbb, 0x4d, 0xbb, 0x77, 0xc3, 0xb0, 0x20, 0x0e, 0xd3, 0xd8,
+ 0x10, 0x05, 0xb2, 0x81, 0x57, 0xf5, 0x8c, 0xe5, 0xac, 0x46, 0xc0, 0xae,
+ 0x9c, 0x08, 0x9d, 0x51, 0xf3, 0x16, 0xb9, 0xd7, 0x90, 0xa7, 0x9f, 0x40,
+ 0x5d, 0x14, 0xd1, 0xbd, 0xa2, 0x0b, 0xf3, 0xae, 0x3b, 0xfb, 0x0f, 0xe1,
+ 0x1a, 0x6e, 0x63, 0x3b, 0xdb, 0x41, 0x8e, 0xe8, 0x1f, 0x20, 0x18, 0xbe,
+ 0x69, 0x10, 0x86, 0x06, 0x06, 0x23, 0x3a, 0x40, 0xc1, 0x7f, 0x2e, 0x32,
+ 0xb4, 0x23, 0xac, 0x4b, 0x25, 0x6b, 0xef, 0xaf, 0xec, 0x5c, 0xf2, 0xd0,
+ 0x61, 0xb2, 0x3a, 0xa5, 0x3d, 0xcd, 0xf7, 0x99, 0x6b, 0x4e, 0xbb, 0x58,
+ 0x6a, 0x4c, 0xd7, 0xc0, 0x77, 0xd9, 0xae, 0x15, 0x7e, 0xde, 0xc9, 0xd8,
+ 0x24, 0x39, 0x3f, 0xa4, 0xf3, 0x24, 0x7e, 0xe0, 0x22, 0x19, 0x40, 0x3d,
+ 0x0c, 0xb0, 0xb7, 0xe3, 0x4b, 0x82, 0x6f, 0x82, 0x0e, 0xb1, 0x91, 0xef,
+ 0x84, 0x98, 0x69, 0x66, 0x24, 0xe7, 0x90, 0x13, 0x0d, 0xbd, 0x6b, 0x92,
+ 0xee, 0x1c, 0x0f, 0xe7, 0xfa, 0xb9, 0xb4, 0x6c, 0x68, 0x98, 0x4c, 0x27,
+ 0x42, 0xad, 0x5f, 0x8f, 0xe5, 0x25, 0xf9, 0x67, 0x84, 0x86, 0x2e, 0xf6,
+ 0x51, 0x71, 0x0d, 0x6c, 0x45, 0x8f, 0x96, 0x15, 0x73, 0xab, 0xff, 0xc0,
+ 0x87, 0x14, 0xba, 0x00, 0x67, 0x2c, 0x27, 0x03, 0xff, 0xa6, 0xe3, 0x09,
+ 0xae, 0xbb, 0xa5, 0x49, 0xee, 0x5f, 0x47, 0xc0, 0x30, 0x4a, 0x93, 0x28,
+ 0x48, 0x4d, 0x30, 0x49, 0xe7, 0xe6, 0x79, 0x96, 0x75, 0x6c, 0x62, 0xbc,
+ 0x9f, 0xaa, 0x39, 0x63, 0x1d, 0x33, 0xce, 0xd2, 0xa3, 0xd1, 0x93, 0xed,
+ 0x8d, 0xa6, 0xbd, 0x02, 0xf0, 0x44, 0xd5, 0x9e, 0x29, 0x02, 0x46, 0x87,
+ 0xaf, 0xdb, 0xfb, 0x20, 0x29, 0x26, 0xb7, 0x8c, 0x75, 0xee, 0xe9, 0x29,
+ 0x53, 0x01, 0x4a, 0xaa, 0xc2, 0x9f, 0x6c, 0x30, 0x21, 0x83, 0xa6, 0x09,
+ 0x32, 0x1d, 0xaa, 0x00, 0x6c, 0xea, 0x9c, 0x84, 0x16, 0x16, 0x0c, 0x06,
+ 0xcc, 0xf0, 0x19, 0xce, 0x57, 0xb3, 0x9f, 0x57, 0xf0, 0xdc, 0xda, 0x86,
+ 0x85, 0x2f, 0x09, 0x33, 0x8d, 0x59, 0xb8, 0xc1, 0x08, 0x4c, 0xee, 0xf8,
+ 0x33, 0x3d, 0x23, 0x13, 0x78, 0xa3, 0x98, 0xbf, 0xab, 0xef, 0x15, 0xe2,
+ 0x8d, 0xdb, 0xb4, 0xd0, 0x4b, 0x2f, 0x04, 0x3f, 0x6b, 0x11, 0xf0, 0x05,
+ 0xc7, 0x53, 0x1e, 0xc9, 0x73, 0x11, 0x81, 0xd3, 0xde, 0x21, 0xd8, 0x14,
+ 0x10, 0xbe, 0x30, 0xb2, 0x48, 0x55, 0x9b, 0x8c, 0x10, 0x84, 0xce, 0xef,
+ 0x83, 0x2f, 0x03, 0x10, 0x09, 0x0f, 0x70, 0xa8, 0x84, 0xea, 0x15, 0xdb,
+ 0xc7, 0xdf, 0x6f, 0x67, 0x5d, 0x1c, 0xc7, 0x1a, 0x1c, 0x15, 0xa6, 0x92,
+ 0xed, 0x63, 0xf0, 0xed, 0x77, 0x5d, 0x12, 0x1b, 0x8c, 0xab, 0x3e, 0xfa,
+ 0x12, 0xf6, 0x83, 0xda, 0x41, 0xbc, 0x97, 0x76, 0xb9, 0x1f, 0xc9, 0x36,
+ 0xc7, 0xe3, 0x9f, 0x93, 0x2e, 0x27, 0xdc, 0x90, 0x84, 0x6d, 0x81, 0x04,
+ 0x09, 0x4f, 0x10, 0xb9, 0x53, 0xd9, 0x8f, 0x99, 0x2b, 0x8b, 0x53, 0x4f,
+ 0xe8, 0x3e, 0x82, 0x1b, 0x0c, 0x3d, 0xbc, 0xe5, 0x5c, 0x13, 0xed, 0x4b,
+ 0x0b, 0x05, 0x72, 0xaa, 0xd2, 0xcf, 0xfc, 0x9f, 0xd0, 0xfd, 0xc7, 0xc6,
+ 0xc0, 0xa3, 0xa7, 0x05, 0xbb, 0x9e, 0xae, 0x63, 0xc0, 0x3d, 0x73, 0x92,
+ 0xe1, 0x98, 0xe4, 0xa5, 0xb3, 0xc4, 0x36, 0x90, 0x35, 0x6b, 0xab, 0x35,
+ 0x06, 0x98, 0xca, 0x35, 0x20, 0x5a, 0x6a, 0x84, 0x5c, 0x88, 0xca, 0x64,
+ 0x43, 0x87, 0xf2, 0x3c, 0x13, 0x58, 0x1c, 0x35, 0x2c, 0xf2, 0x1d, 0x5e,
+ 0xe0, 0x1b, 0x2c, 0x59, 0xc2, 0xcd, 0xf2, 0x96, 0x1a, 0x75, 0x3c, 0x10,
+ 0xe7, 0xe3, 0xa1, 0xbc, 0xec, 0x03, 0x79, 0x58, 0x26, 0x4d, 0xcf, 0xb4,
+ 0x00, 0xd3, 0x46, 0xee, 0x99, 0x52, 0x2f, 0x54, 0xcb, 0xa1, 0x75, 0xa1,
+ 0xa0, 0xf4, 0xaa, 0xe9, 0x4a, 0xe1, 0x74, 0xcc, 0xd1, 0x47, 0xda, 0x48,
+ 0x8b, 0x2e, 0xf9, 0x54, 0x98, 0x4e, 0x4f, 0x5a, 0x1b, 0xf5, 0x66, 0x62,
+ 0xa0, 0xc2, 0x0e, 0x1a, 0x91, 0xbd, 0x7a, 0x33, 0xfd, 0x7c, 0xfc, 0x8b,
+ 0xc0, 0x92, 0xd8, 0x97, 0x48, 0x6f, 0xf4, 0xe0, 0x6c, 0xcf, 0x17, 0xc9,
+ 0x44, 0x04, 0xcf, 0x50, 0x0d, 0x8f, 0xbc, 0x4f, 0x4e, 0x1d, 0x38, 0x38,
+ 0x5c, 0xb7, 0x8e, 0xe7, 0x52, 0xbe, 0x04, 0x68, 0x79, 0x9e, 0x68, 0x32,
+ 0x3b, 0xe4, 0xee, 0x65, 0x76, 0xf6, 0xb4, 0x47, 0x1c, 0xa5, 0xd0, 0x20,
+ 0x0f, 0x94, 0xe1, 0x2f, 0xa8, 0x87, 0xeb, 0xda, 0x2c, 0x54, 0xc4, 0x07,
+ 0x08, 0x89, 0xdc, 0xcf, 0x73, 0x0c, 0x1f, 0xea, 0xb4, 0x6d, 0xea, 0x17,
+ 0x70, 0x82, 0xb5, 0x18, 0x2f, 0x38, 0xc5, 0x47, 0x47, 0xd6, 0x37, 0x20,
+ 0x8d, 0x71, 0xd6, 0x16, 0x4d, 0x16, 0xd5, 0x77, 0x36, 0xb5, 0xd0, 0x20,
+ 0x5f, 0x4d, 0x89, 0x6c, 0x49, 0xc4, 0x13, 0x6c, 0x26, 0x8c, 0x8f, 0x6f,
+ 0x17, 0xab, 0xdf, 0x57, 0xa8, 0xab, 0xed, 0x8d, 0xa9, 0x00, 0x6b, 0xfc,
+ 0xf6, 0x72, 0xaf, 0x32, 0xc2, 0x0b, 0xb6, 0x6b, 0x7a, 0xac, 0xa9, 0x77,
+ 0x52, 0x87, 0x98, 0x43, 0x21, 0x72, 0x35, 0x6c, 0x27, 0x12, 0xbe, 0xf0,
+ 0x62, 0x16, 0x2a, 0xc6, 0xf7, 0x48, 0xd2, 0xc3, 0x25, 0xb4, 0x6a, 0x57,
+ 0x65, 0xd6, 0x07, 0xa0, 0xde, 0x9f, 0x3b, 0x3d, 0xdd, 0x27, 0x0e, 0x4c,
+ 0xe8, 0x4b, 0xe1, 0xd6, 0x33, 0xa7, 0x85, 0x75, 0x44, 0x7e, 0xf9, 0xfd,
+ 0xb9, 0x98, 0xa8, 0x30, 0x82, 0xdf, 0xd9, 0x97, 0x5c, 0x3f, 0x52, 0x20,
+ 0xd4, 0x38, 0x88, 0xc1, 0x53, 0x11, 0x14, 0x25, 0x6f, 0xeb, 0x4e, 0xf5,
+ 0xed, 0xf4, 0xba, 0x34, 0x23, 0x74, 0xbc, 0x46, 0x51, 0x96, 0x1b, 0x50,
+ 0x32, 0x03, 0xe5, 0x6d, 0xd7, 0xcf, 0xca, 0x60, 0xb2, 0xbc, 0xb6, 0x4b,
+ 0xc0, 0xee, 0x8b, 0x96, 0xa9, 0x4c, 0x1d, 0x9b, 0x2d, 0x11, 0xc7, 0x29,
+ 0x74, 0x08, 0x03, 0xe5, 0x1c, 0xe2, 0x6c, 0x21, 0x1e, 0x02, 0x4d, 0xb1,
+ 0x4e, 0x70, 0xb3, 0xfc, 0x06, 0xa5, 0xf9, 0xfb, 0x35, 0x1c, 0x89, 0xe3,
+ 0x1e, 0x27, 0xe0, 0x93, 0xd6, 0xd5, 0x15, 0x94, 0x40, 0x88, 0x71, 0xfd,
+ 0xaa, 0xbd, 0xf6, 0xae, 0x61, 0x52, 0x49, 0x33, 0x99, 0x85, 0xcd, 0x13,
+ 0x70, 0x7e, 0x1b, 0x76, 0x3a, 0x69, 0x9e, 0xfe, 0x3c, 0x65, 0x22, 0xf0,
+ 0x1f, 0x91, 0x57, 0x00, 0x5b, 0x28, 0xac, 0x1e, 0x1e, 0x24, 0xc7, 0xd8,
+ 0xdb, 0x3a, 0xd0, 0x85, 0x04, 0x4d, 0xf7, 0xe8, 0x3b, 0xdc, 0xa1, 0x5b,
+ 0x5e, 0xe3, 0x7a, 0xae, 0x72, 0x70, 0x7c, 0x52, 0x07, 0xf5, 0x1c, 0xda,
+ 0xd7, 0x40, 0x81, 0x7d, 0x36, 0x0a, 0x97, 0x8e, 0x0c, 0x25, 0xe7, 0xd3,
+ 0x81, 0xb0, 0xe2, 0xd0, 0x56, 0x16, 0x9c, 0x9d, 0x0e, 0xc7, 0x97, 0x8f,
+ 0xff, 0x68, 0xd4, 0x4f, 0x1a, 0x4c, 0x58, 0x6f, 0xe4, 0xd5, 0xc1, 0x07,
+ 0x7f, 0x31, 0x8c, 0x59, 0x02, 0x6f, 0xa7, 0x54, 0x1b, 0x02, 0x35, 0xe5,
+ 0x14, 0xec, 0x35, 0x3d, 0x17, 0x72, 0x11, 0x0c, 0x38, 0x62, 0x99, 0x4a,
+ 0x6a, 0x46, 0xcb, 0x36, 0x1b, 0x4b, 0x38, 0xff, 0x1d, 0xa4, 0xf7, 0x21,
+ 0xda, 0x73, 0x42, 0xc4, 0x2b, 0xf8, 0xd8, 0x43, 0x73, 0x60, 0x11, 0x22,
+ 0xc9, 0xe6, 0x07, 0xca, 0xa0, 0x29, 0x2a, 0x20, 0xd9, 0xdd, 0x7d, 0xed,
+ 0x28, 0x10, 0xde, 0xbe, 0x5e, 0xfd, 0x0c, 0x06, 0x4b, 0x1c, 0xc4, 0x56,
+ 0xc4, 0x12, 0x25, 0x5a, 0xd1, 0xfe, 0x03, 0x5e, 0x5e, 0xe0, 0x42, 0x8e,
+ 0x44, 0xf1, 0x8f, 0x13, 0xf0, 0x49, 0xeb, 0x59, 0xf3, 0x5b, 0x61, 0xd9,
+ 0xa4, 0xdf, 0x2e, 0x2a, 0x70, 0xc2, 0xf0, 0xef, 0x16, 0xf4, 0x1b, 0x5c,
+ 0xbd, 0x77, 0x42, 0xb9, 0x4c, 0x56, 0x8d, 0xc8, 0xf8, 0x05, 0xbd, 0x52,
+ 0xba, 0x6e, 0xe1, 0x89, 0xe1, 0xf2, 0xdb, 0xa7, 0xdf, 0xe0, 0xee, 0xc1,
+ 0x5c, 0x9e, 0x90, 0x11, 0x17, 0xd5, 0xc1, 0xb9, 0x2c, 0x08, 0x62, 0x0d,
+ 0x75, 0x05, 0xb2, 0xad, 0x22, 0xd6, 0x5c, 0x6e, 0xed, 0xa4, 0x06, 0x5a,
+ 0x42, 0x4f, 0xbf, 0x84, 0x53, 0xfa, 0x0b, 0xb7, 0x47, 0x6c, 0xba, 0x07,
+ 0xc9, 0xe4, 0x8c, 0xe4, 0xa3, 0x40, 0xdc, 0xcb, 0x58, 0xeb, 0xba, 0xc5,
+ 0xcc, 0x56, 0x74, 0x1e, 0x7b, 0x0f, 0x2a, 0xce, 0x35, 0x46, 0x39, 0x6d,
+ 0x81, 0x91, 0xb2, 0x05, 0x76, 0xfa, 0x8f, 0x43, 0x46, 0x25, 0xb7, 0x98,
+ 0x4e, 0x5f, 0x63, 0xf4, 0x0e, 0x4f, 0x5d, 0x85, 0x29, 0x9d, 0xdb, 0xa8,
+ 0xeb, 0x0a, 0xbb, 0xc4, 0xf8, 0x5a, 0xda, 0xe1, 0x9b, 0x1f, 0x9b, 0x4d,
+ 0x62, 0x65, 0x41, 0x34, 0x5b, 0x6c, 0x19, 0xa5, 0x3c, 0x35, 0x8e, 0x14,
+ 0x02, 0xcd, 0x1d, 0xf3, 0xfb, 0x70, 0x93, 0x46, 0xe2, 0x49, 0xc8, 0x31,
+ 0xfd, 0x47, 0x35, 0xfc, 0x7d, 0xb9, 0x79, 0xf7, 0x0d, 0xed, 0x98, 0x47,
+ 0xd2, 0xcf, 0x26, 0x8b, 0x10, 0x6f, 0x86, 0xca, 0xda, 0xb8, 0x41, 0xdb,
+ 0x0c, 0xc7, 0xc3, 0x56, 0xc5, 0x0f, 0xc7, 0xf2, 0xda, 0x45, 0xdf, 0x94,
+ 0xc1, 0x65, 0x79, 0x6c, 0x97, 0x81, 0xbd, 0xf1, 0x1e, 0x26, 0x6e, 0xfc,
+ 0x4f, 0x2e, 0x1e, 0x9c, 0xa2, 0x69, 0x54, 0x7a, 0xc3, 0x15, 0x44, 0x64,
+ 0x73, 0x11, 0x5b, 0x10, 0x48, 0x95, 0x6b, 0x49, 0x4e, 0xcb, 0x2b, 0x12,
+ 0x90, 0xaf, 0xf5, 0x5a, 0xfa, 0xf5, 0x0b, 0xb8, 0x49, 0x0a, 0x7d, 0xc4,
+ 0x6b, 0x0a, 0xa5, 0x6d, 0x32, 0xb2, 0x33, 0x3c, 0xb3, 0x65, 0x9c, 0x1f,
+ 0x7e, 0x50, 0xd3, 0x6a, 0xa2, 0xc1, 0xb9, 0xd9, 0xfa, 0x25, 0xfe, 0x1c,
+ 0x3f, 0x88, 0x47, 0x0a, 0x7e, 0x62, 0xa2, 0xf3, 0x3e, 0xae, 0x9f, 0x7f,
+ 0x83, 0xbb, 0x05, 0x72, 0x7a, 0x40, 0x44, 0x5f, 0x57, 0x06, 0xe4, 0xb0,
+ 0x21, 0x88, 0x35, 0xd4, 0x16, 0xca, 0xb4, 0x8b, 0x59, 0x71, 0xbb, 0xb6,
+ 0x90, 0x19, 0x69, 0x09, 0x3e, 0xfe, 0x11, 0x4f, 0xe8, 0x2e, 0xdd, 0x1d,
+ 0xb2, 0xe8, 0x1f, 0x27, 0x92, 0x33, 0x92, 0x8d, 0x04, 0x2e, 0x19, 0x16,
+ 0xb4, 0xb5, 0xcf, 0x52, 0x98, 0xcc, 0x2b, 0x85, 0x0c, 0x2d, 0x88, 0x38,
+ 0x24, 0x06, 0xf2, 0x47, 0xec, 0xce, 0xc6, 0xf7, 0x4e, 0xe4, 0x8b, 0xb5,
+ 0x4f, 0xbe, 0xae, 0x13, 0xd5, 0x0c, 0xe6, 0x13, 0x44, 0xa4, 0x76, 0x19,
+ 0x8c, 0x25, 0x28, 0x0f, 0x15, 0x8e, 0xa6, 0x9c, 0xee, 0x6e, 0xf0, 0x55,
+ 0x9d, 0x5a, 0x8f, 0xf6, 0x08, 0x27, 0x92, 0x1f, 0xcb, 0x4c, 0x8c, 0x2c,
+ 0xeb, 0x44, 0x26, 0x48, 0xec, 0x2e, 0x9b, 0xb3, 0xd9, 0x17, 0xee, 0x52,
+ 0x7d, 0x32, 0x47, 0x88, 0x4d, 0xf9, 0x11, 0xfc, 0xac, 0xa3, 0xb0, 0xc9,
+ 0x5e, 0x38, 0xa3, 0x8d, 0x56, 0xc8, 0x83, 0x7c, 0x53, 0x38, 0xe1, 0xd0,
+ 0x28, 0x7d, 0xc1, 0x65, 0x99, 0x39, 0x58, 0x36, 0xa3, 0x66, 0x71, 0x4c,
+ 0x28, 0xcb, 0x9f, 0xb5, 0x58, 0x4b, 0xa3, 0x5c, 0x4e, 0xf9, 0x8d, 0x5b,
+ 0x0c, 0xf1, 0x32, 0xbb, 0xe3, 0xb4, 0x47, 0xe8, 0x1c, 0x9e, 0xbb, 0x0a,
+ 0x53, 0x3b, 0xb7, 0x51, 0xd6, 0x15, 0x77, 0x89, 0xf0, 0xb5, 0xba, 0x71,
+ 0x84, 0x16, 0x81, 0xb0, 0xdf, 0x67, 0x12, 0x9f, 0xe7, 0x43, 0x70, 0x3a,
+ 0xb1, 0xdc, 0x40, 0x31, 0xe7, 0xdd, 0x6b, 0x74, 0xfc, 0x18, 0x7d, 0x0d,
+ 0xba, 0xda, 0x67, 0x66, 0x56, 0x43, 0x42, 0x80, 0xc6, 0x7c, 0xb3, 0x6c,
+ 0x89, 0x2e, 0xc7, 0x0d, 0x97, 0x8a, 0xbe, 0x1a, 0x36, 0x05, 0x10, 0x85,
+ 0x96, 0xa8, 0xbd, 0x29, 0x85, 0x52, 0xdc, 0xa3, 0x92, 0x20, 0xa1, 0xb0,
+ 0x45, 0x5a, 0x7e, 0xc3, 0x4c, 0x0b, 0x6f, 0x3a, 0xe4, 0xfe, 0x55, 0x01,
+ 0x49, 0x51, 0x06, 0xe7, 0xbb, 0x91, 0xd2, 0x77, 0x80, 0x1e, 0x07, 0xc7,
+ 0xe8, 0x60, 0x32, 0x58, 0xe6, 0x22, 0xb6, 0x20, 0x91, 0x2a, 0xd6, 0x92,
+ 0x9d, 0x96, 0x56, 0x25, 0x21, 0x5f, 0xea, 0xb5, 0xf5, 0xea, 0x17, 0x70,
+ 0x92, 0x14, 0xfb, 0x88, 0xd6, 0x15, 0x4a, 0xda, 0x65, 0x64, 0x66, 0x79,
+ 0x66, 0xcb, 0x38, 0x3e, 0xfc, 0xa1, 0xa0, 0x96, 0xf7, 0xb0, 0x4d, 0x87,
+ 0x80, 0x05, 0x1e, 0x85, 0xd8, 0xb8, 0xf8, 0x50, 0x3e, 0x9d, 0xc1, 0x83,
+ 0x81, 0x15, 0x59, 0x5d, 0x49, 0xd0, 0xed, 0x25, 0x2a, 0xf3, 0x59, 0xe4,
+ 0xc6, 0x4b, 0xc2, 0x0f, 0x19, 0x92, 0x2f, 0x7f, 0x96, 0xd0, 0x90, 0x08,
+ 0xef, 0x4f, 0x57, 0xa5, 0x3e, 0xec, 0xbe, 0xa5, 0x31, 0xd5, 0xcb, 0xbb,
+ 0xab, 0xde, 0x3b, 0xc8, 0x62, 0x8e, 0x35, 0x5b, 0x22, 0x0d, 0xf1, 0x4c,
+ 0xe3, 0x87, 0x40, 0xa1, 0xf7, 0x05, 0x96, 0x64, 0xe5, 0x60, 0xda, 0x8d,
+ 0x99, 0xc5, 0x30, 0xa3, 0x2e, 0x7e, 0xd5, 0x61, 0x2e, 0x8d, 0x71, 0x3b,
+ 0xe6, 0x35, 0x6c, 0x33, 0xc4, 0xca, 0xef, 0x8e, 0xd1, 0x1f, 0xa0, 0x72,
+ 0x7a, 0xec, 0x29, 0x4c, 0xee, 0xdd, 0x47, 0x58, 0x55, 0xde, 0x27, 0xc2,
+ 0xd6, 0xe9, 0xc6, 0x10, 0x5a, 0x06, 0xc3, 0x7d, 0x9c, 0x4a, 0x7f, 0x9d,
+ 0x0d, 0xc0, 0xea, 0xc7, 0x71, 0x00, 0xc7, 0x9f, 0x75, 0xad, 0xd3, 0xf0,
+ 0x61, 0xf4, 0x36, 0xeb, 0x69, 0x9d, 0x99, 0x59, 0x0d, 0x0a, 0x03, 0x19,
+ 0xf2, 0xcd, 0xb2, 0x24, 0xbb, 0x1c, 0x36, 0x5e, 0x2a, 0xf8, 0x68, 0xd8,
+ 0x14, 0x42, 0x16, 0x5a, 0xa2, 0xf4, 0xa6, 0x15, 0x4b, 0x72, 0x8e, 0x48,
+ 0x82, 0x86, 0xc1, 0x15, 0x69, 0xfb, 0x0d, 0x30, 0x2d, 0xbc, 0xeb, 0x93,
+ 0xf9, 0x54, 0x05, 0x25, 0x44, 0x1b, 0x9e, 0xee, 0x47, 0x49, 0xde, 0x00,
+ 0x78, 0x1f, 0x1f, 0xa1, 0x80, 0xc9, 0x63, 0x98, 0x8a, 0xd8, 0x82, 0x44,
+ 0xab, 0x5a, 0x4a, 0x76, 0x59, 0x58, 0x94, 0x85, 0x7f, 0xaa, 0xd7, 0xd7,
+ 0xa8, 0x5d, 0xc2, 0x48, 0x53, 0xee, 0x23, 0x58, 0x55, 0x2b, 0x69, 0x95,
+ 0x91, 0x99, 0xe5, 0x9b, 0x2c, 0xe0, 0xfb, 0xf2, 0x86, 0x82, 0x5b, 0xde,
+ 0xc1, 0x36, 0x1e, 0x00, 0x14, 0x7a, 0x17, 0x62, 0xe3, 0xe1, 0x40, 0xfa,
+ 0x77, 0x06, 0x0e, 0x04, 0x55, 0x65, 0x75, 0x27, 0x43, 0xb4, 0x94, 0xab,
+ 0xcd, 0x67, 0x93, 0x19, 0x2f, 0x08, 0x3c, 0x66, 0x48, 0xbd, 0xfe, 0x5b,
+ 0x42, 0x40, 0x23, 0xbd, 0x3d, 0x5e, 0x94, 0xfb, 0xb2, 0xfa, 0x94, 0xc7,
+ 0x57, 0x2e, 0xee, 0xaf, 0x78, 0xef, 0x21, 0x8a, 0x38, 0xd5, 0x6c, 0x88,
+ 0x37, 0xc5, 0x33, 0x8e, 0x1d, 0x02, 0x87, 0xdc, 0x16, 0x59, 0x93, 0x95,
+ 0x83, 0x6a, 0x36, 0x67, 0x14, 0xc2, 0x8c, 0xb9, 0xfb, 0x55, 0x84, 0xba,
+ 0x35, 0xc4, 0xef, 0x98, 0xd5, 0xb0, 0xcf, 0x13, 0x2b, 0xbe, 0x3b, 0x44,
+ 0x7e, 0x81, 0xc9, 0xeb, 0xb0, 0xa5, 0x33, 0xbb, 0x75, 0x1d, 0x61, 0x57,
+ 0x78, 0x9f, 0x0b, 0x5b, 0xa7, 0x18, 0x41, 0x68, 0x1b, 0x0d, 0xf6, 0x71,
+ 0x29, 0xfe, 0x74, 0x37, 0x03, 0xab, 0x1d, 0xc4, 0x03, 0x1e, 0x7d, 0xd6,
+ 0xb7, 0x4f, 0xc1, 0x87, 0xd0, 0xdb, 0xad, 0xa6, 0x76, 0x65, 0x64, 0x34,
+ 0x28, 0x0c, 0x67, 0xcb, 0x36, 0xc8, 0x92, 0xec, 0x70, 0xd9, 0x78, 0xab,
+ 0xe1, 0xa3, 0x60, 0x51, 0x08, 0x59, 0x6a, 0x8b, 0xd2, 0x98, 0x55, 0x2d,
+ 0xca, 0x39, 0x22, 0x0a, 0x1b, 0x04, 0x55, 0xa7, 0xec, 0x34, 0xc0, 0xb6,
+ 0xf3, 0xae, 0x4f, 0xe5, 0x50, 0x14, 0x95, 0x10, 0x6e, 0x7b, 0xb9, 0x1d,
+ 0x27, 0x78, 0x01, 0xe0, 0x7c, 0x7e, 0x86, 0x03, 0x25, 0x8e, 0x62, 0x2b,
+ 0x62, 0x09, 0x12, 0xad, 0x69, 0x29, 0xd9, 0x65, 0x62, 0x52, 0x15, 0xfe,
+ 0xab, 0x5f, 0x5e, 0xa1, 0x77, 0x09, 0x21, 0x4f, 0xb8, 0x8d, 0x61, 0x54,
+ 0xad, 0xa6, 0x56, 0x46, 0x67, 0x96, 0x6c, 0xb3, 0x83, 0xef, 0xca, 0x1a,
+ 0x09, 0x6f, 0x7b, 0x04, 0xd8, 0x78, 0x00, 0x51, 0xe8, 0x5d, 0x8b, 0x8f,
+ 0x85, 0x03, 0xe9, 0xdc, 0x18, 0x38, 0x11, 0x55, 0x95, 0xd4, 0x9d, 0x0e,
+ 0xd2, 0x52, 0xaf, 0x35, 0x9e, 0x4c, 0x64, 0xbc, 0x20, 0xf1, 0x99, 0x22,
+ 0xf7, 0xf9, 0x6d, 0x09, 0x00, 0x8e, 0xf4, 0xf5, 0x7a, 0x53, 0xee, 0xcb,
+ 0xea, 0x53, 0x1d, 0x5c, 0xbb, 0xba, 0xbd, 0xe3, 0xbc, 0x86, 0x28, 0xe3,
+ 0x55, 0xb2, 0x20, 0xdf, 0x14, 0xce, 0x38, 0x74, 0x0a, 0x1f, 0x70, 0x59,
+ 0x66, 0x4e, 0x56, 0x0d, 0xa8, 0xd9, 0x9c, 0x53, 0x0a, 0x32, 0xe7, 0xed,
+ 0x56, 0x12, 0xe8, 0xd7, 0x13, 0xbe, 0x63, 0x56, 0xc3, 0x3c, 0x4c, 0xae,
+ 0xf8, 0xed, 0x11, 0xfa, 0x07, 0x27, 0xae, 0xc2, 0x94, 0xce, 0xed, 0xd4,
+ 0x75, 0x85, 0x5d, 0xe2, 0x7c, 0x2d, 0x6e, 0x9c, 0x61, 0x05, 0xa0, 0x6c,
+ 0x37, 0xd9, 0xc4, 0xa7, 0xf9, 0xd0, 0xdc, 0x0e, 0xac, 0x77, 0x10, 0x0c,
+ 0x79, 0xf7, 0x5a, 0xdd, 0x3f, 0x06, 0x1f, 0x43, 0x6e, 0xb6, 0x99, 0xd9,
+ 0x95, 0x90, 0xd0, 0xa0, 0x31, 0x9f, 0x2c, 0xdb, 0x22, 0x4b, 0xb1, 0xc3,
+ 0x65, 0xe2, 0xaf, 0x86, 0x8d, 0x81, 0x44, 0x21, 0x65, 0xaa, 0x2f, 0x4a,
+ 0x61, 0x54, 0xb7, 0x28, 0xe4, 0x88, 0x28, 0x6c, 0x11, 0x56, 0x9f, 0xb0,
+ 0xd3, 0x02, 0xdb, 0xce, 0xb9, 0x3f, 0x95, 0x40, 0x52, 0x54, 0x41, 0xb9,
+ 0xee, 0xe4, 0x74, 0x9d, 0xe0, 0x07, 0x81, 0xf1, 0xfa, 0x18, 0x0c, 0x96,
+ 0x39, 0x88, 0xad, 0x88, 0x24, 0x4a, 0xb5, 0xa4, 0xa7, 0x65, 0x95, 0x89,
+ 0x48, 0x57, 0xfa, 0xad, 0x7d, 0x7a, 0x85, 0xdc, 0x24, 0x85, 0x3e, 0xe2,
+ 0x35, 0x85, 0x52, 0xb6, 0x99, 0x59, 0x19, 0x9e, 0x59, 0xb2, 0xce, 0x0f,
+ 0xbf, 0x28, 0x68, 0x25, 0xbd, 0xec, 0x13, 0x61, 0xe0, 0x01, 0x47, 0xa1,
+ 0x76, 0x2e, 0x3e, 0x14, 0x0f, 0xa7, 0x70, 0x60, 0xe0, 0x45, 0x56, 0x57,
+ 0x52, 0x74, 0x3b, 0x49, 0x4a, 0xbc, 0xd6, 0x79, 0x31, 0x92, 0xf0, 0x83,
+ 0xc6, 0x64, 0x8b, 0xdf, 0xe5, 0xb4, 0x24, 0x02, 0x3b, 0xd3, 0xd5, 0xe9,
+ 0x4f, 0xbb, 0x2f, 0xa9, 0x4c, 0x75, 0x72, 0xee, 0xea, 0xf7, 0x8e, 0xf2,
+ 0x18, 0xa3, 0x8d, 0x56, 0xc8, 0x83, 0x7c, 0x53, 0x38, 0xe1, 0xd0, 0x28,
+ 0x7d, 0xc1, 0x65, 0x99, 0x39, 0x58, 0x36, 0xa3, 0x66, 0x71, 0x4c, 0x28,
+ 0xcb, 0x9f, 0xb5, 0x58, 0x4b, 0xa3, 0x5c, 0x4e, 0xf9, 0x8d, 0x5b, 0x0c,
+ 0xf1, 0x32, 0xbb, 0xe3, 0xb4, 0x47, 0xe8, 0x1c, 0x9e, 0xbb, 0x0a, 0x53,
+ 0x3b, 0xb7, 0x51, 0xd6, 0x15, 0x77, 0x89, 0xf0, 0xb5, 0xba, 0x71, 0x84,
+ 0x16, 0x81, 0xb0, 0xdf, 0x67, 0x12, 0x9f, 0xe7, 0x43, 0x70, 0x3a, 0xb1,
+ 0xdc, 0x40, 0x31, 0xe7, 0xdd, 0x6b, 0x74, 0xfc, 0x18, 0x7d, 0x0d, 0xba,
+ 0xda, 0x67, 0x66, 0x56, 0x43, 0x42, 0x80, 0xc6, 0x7c, 0xb3, 0x6c, 0x89,
+ 0x2e, 0xc7, 0x0d, 0x97, 0x8a, 0xbe, 0x1a, 0x36, 0x05, 0x10, 0x85, 0x96,
+ 0xa8, 0xbd, 0x29, 0x85, 0x52, 0xdc, 0xa3, 0x92, 0x20, 0xa1, 0xb0, 0x45,
+ 0x5a, 0x7e, 0xc3, 0x4c, 0x0b, 0x6f, 0x3a, 0xe4, 0xfe, 0x55, 0x01, 0x49,
+ 0x51, 0x06, 0xe7, 0xbb, 0x91, 0xd2, 0x77, 0x80, 0x1e, 0x07, 0xc7, 0xe8,
+ 0x60, 0x32, 0x58, 0xe6, 0x22, 0xb6, 0x20, 0x91, 0x2a, 0xd6, 0x92, 0x9d,
+ 0x96, 0x56, 0x25, 0x21, 0x5f, 0xea, 0xb5, 0xf5, 0xea, 0x17, 0x70, 0x92,
+ 0x14, 0xfb, 0x88, 0xd6, 0x15, 0x4a, 0xda, 0x65, 0x64, 0x66, 0x79, 0x66,
+ 0xcb, 0x38, 0x3e, 0xfc, 0xa1, 0xa0, 0x96, 0xf7, 0xb0, 0x4d, 0x87, 0x80,
+ 0x05, 0x1e, 0x85, 0xd8, 0xb8, 0xf8, 0x50, 0x3e, 0x9d, 0xc1, 0x83, 0x81,
+ 0x15, 0x59, 0x5d, 0x49, 0xd0, 0xed, 0x25, 0x2a, 0xf3, 0x59, 0xe4, 0xc6,
+ 0x4b, 0xc2, 0x0f, 0x19, 0x92, 0x2f, 0x7f, 0x96, 0xd0, 0x90, 0x08, 0xef,
+ 0x4f, 0x57, 0xa5, 0x3e, 0xec, 0xbe, 0xa5, 0x31, 0xd5, 0xcb, 0xbb, 0xab,
+ 0xde, 0x3b, 0xc8, 0x62, 0x8e, 0x35, 0x5b, 0x22, 0x0d, 0xf1, 0x4c, 0xe3,
+ 0x87, 0x40, 0xa1, 0xf7, 0x05, 0x96, 0x64, 0xe5, 0x60, 0xda, 0x8d, 0x99,
+ 0xc5, 0x30, 0xa3, 0x2e, 0x7e, 0xd5, 0x61, 0x2e, 0x8d, 0x71, 0x3b, 0xe6,
+ 0x35, 0x6c, 0x33, 0xc4, 0xca, 0xef, 0x8e, 0xd1, 0x1f, 0xa0, 0x72, 0x7a,
+ 0xec, 0x29, 0x4c, 0xee, 0xdd, 0x47, 0x58, 0x55, 0xde, 0x27, 0xc2, 0xd6,
+ 0xe9, 0xc6, 0x10, 0x5a, 0x06, 0xc3, 0x7d, 0x9c, 0x4a, 0x7f, 0x9d, 0x0d,
+ 0xc0, 0xea, 0xc7, 0x71, 0x00, 0xc7, 0x9f, 0x75, 0xad, 0xd3, 0xf0, 0x61,
+ 0xf4, 0x36, 0xeb, 0x69, 0x9d, 0x99, 0x59, 0x0d, 0x0a, 0x03, 0x19, 0xf2,
+ 0xcd, 0xb2, 0x24, 0xbb, 0x1c, 0x36, 0x5e, 0x2a, 0xf8, 0x68, 0xd8, 0x14,
+ 0x42, 0x16, 0x5a, 0xa2, 0xf4, 0xa6, 0x15, 0x4b, 0x72, 0x8e, 0x48, 0x82,
+ 0x86, 0xc1, 0x15, 0x69, 0xfb, 0x0d, 0x30, 0x2d, 0xbc, 0xeb, 0x93, 0xf9,
+ 0x54, 0x05, 0x25, 0x44, 0x1b, 0x9e, 0xee, 0x47, 0x49, 0xde, 0x00, 0x78,
+ 0x1f, 0x1f, 0xa1, 0x80, 0xc9, 0x63, 0x98, 0x8a, 0xd8, 0x82, 0x44, 0xab,
+ 0x5a, 0x4a, 0x76, 0x59, 0x58, 0x94, 0x85, 0x7f, 0xaa, 0xd7, 0xd7, 0xa8,
+ 0x5d, 0xc2, 0x48, 0x53, 0xee, 0x23, 0x58, 0x55, 0x2b, 0x69, 0x95, 0x91,
+ 0x99, 0xe5, 0x9b, 0x2c, 0xe0, 0xfb, 0xf2, 0x86, 0x82, 0x5b, 0xde, 0xc1,
+ 0x36, 0x1e, 0x00, 0x14, 0x7a, 0x17, 0x62, 0xe3, 0xe1, 0x40, 0xfa, 0x77,
+ 0x06, 0x0e, 0x04, 0x55, 0x65, 0x75, 0x27, 0x43, 0xb4, 0x94, 0xab, 0xcd,
+ 0x67, 0x93, 0x19, 0x2f, 0x08, 0x3c, 0x66, 0x48, 0xbd, 0xfe, 0x5b, 0x42,
+ 0x40, 0x23, 0xbd, 0x3d, 0x5e, 0x94, 0xfb, 0xb2, 0xfa, 0x94, 0xc7, 0x57,
+ 0x2e, 0xee, 0xaf, 0x78, 0xef, 0x21, 0x8a, 0x38, 0xd5, 0x6c, 0x88, 0x37,
+ 0xc5, 0x33, 0x8e, 0x1d, 0x02, 0x87, 0xdc, 0x16, 0x59, 0x93, 0x95, 0x83,
+ 0x6a, 0x36, 0x67, 0x14, 0xc2, 0x8c, 0xb9, 0xfb, 0x55, 0x84, 0xba, 0x35,
+ 0xc4, 0xef, 0x98, 0xd5, 0xb0, 0xcf, 0x13, 0x2b, 0xbe, 0x3b, 0x44, 0x7e,
+ 0x81, 0xca,
+};
+static_assert(sizeof(kBytesTestReadSymbol13) == kNumBytesTestReadSymbol13, "");
+
+// The kBytesTestReadSymbol14[] array was encoded by using the following libaom
+// code:
+//
+// aom_cdf_prob cdf[4][15] = {
+// // pmf: 1/14, 1/14, 1/14, 1/14, 1/14, 1/14, 1/14, 1/14, 1/14, 1/14, 1/14,
+// // 1/14, 1/14, 1/14
+// { 32768 - 2341, 32768 - 4681, 32768 - 7022, 32768 - 9362, 32768 - 11703,
+// 32768 - 14043, 32768 - 16384, 32768 - 18725, 32768 - 21065,
+// 32768 - 23406, 32768 - 25746, 32768 - 28087, 32768 - 30427, 0, 0 },
+// // pmf: 3/28, 2/28, 2/28, 2/28, 2/28, 2/28, 2/28, 2/28, 2/28, 2/28, 2/28,
+// // 2/28, 2/28, 1/28
+// { 32768 - 3511, 32768 - 5851, 32768 - 8192, 32768 - 10533, 32768 - 12873,
+// 32768 - 15214, 32768 - 17554, 32768 - 19895, 32768 - 22235,
+// 32768 - 24576, 32768 - 26917, 32768 - 29257, 32768 - 31598, 0, 0 },
+// // pmf: 1/28, 2/28, 2/28, 2/28, 2/28, 2/28, 2/28, 2/28, 2/28, 2/28, 2/28,
+// // 2/28, 2/28, 3/28
+// { 32768 - 1170, 32768 - 3511, 32768 - 5851, 32768 - 8192, 32768 - 10533,
+// 32768 - 12873, 32768 - 15214, 32768 - 17554, 32768 - 19895,
+// 32768 - 22235, 32768 - 24576, 32768 - 26917, 32768 - 29257, 0, 0 },
+// // pmf: 1/28, 2/28, 2/28, 2/28, 2/28, 2/28, 3/28, 3/28, 2/28, 2/28, 2/28,
+// // 2/28, 2/28, 1/28
+// { 32768 - 1170, 32768 - 3511, 32768 - 5851, 32768 - 8192, 32768 - 10533,
+// 32768 - 12873, 32768 - 16384, 32768 - 19895, 32768 - 22235,
+// 32768 - 24576, 32768 - 26917, 32768 - 29257, 32768 - 31598, 0, 0 },
+// };
+// constexpr int kSymbols[28][4] = { { 0, 7, 13, 6 }, //
+// { 1, 8, 12, 5 }, //
+// { 2, 9, 11, 4 }, //
+// { 3, 10, 10, 3 }, //
+// { 4, 11, 9, 2 }, //
+// { 5, 12, 8, 1 }, //
+// { 6, 13, 7, 0 }, //
+// { 7, 0, 6, 13 }, //
+// { 8, 1, 5, 12 }, //
+// { 9, 2, 4, 11 }, //
+// { 10, 3, 3, 10 }, //
+// { 11, 4, 2, 9 }, //
+// { 12, 5, 1, 8 }, //
+// { 13, 6, 0, 7 }, //
+// { 0, 0, 13, 11 }, //
+// { 2, 1, 12, 9 }, //
+// { 4, 3, 10, 7 }, //
+// { 6, 5, 8, 5 }, //
+// { 8, 7, 6, 3 }, //
+// { 10, 9, 4, 1 }, //
+// { 12, 11, 2, 12 }, //
+// { 1, 0, 13, 10 }, //
+// { 3, 2, 11, 8 }, //
+// { 5, 4, 9, 6 }, //
+// { 7, 6, 7, 4 }, //
+// { 9, 8, 5, 2 }, //
+// { 11, 10, 3, 7 }, //
+// { 13, 12, 1, 6 } };
+// const unsigned int kBufferSize = 65536;
+// uint8_t bw_buffer[kBufferSize];
+// aom_writer bw;
+// bw.allow_update_cdf = 1;
+// aom_start_encode(&bw, bw_buffer);
+// for (int i = 0; i < 64; ++i) {
+// for (int j = 0; j < 28; ++j) {
+// for (int k = 0; k < 4; ++k) {
+// aom_write_symbol(&bw, kSymbols[j][k], cdf[k], 14);
+// }
+// }
+// }
+// aom_stop_encode(&bw);
+// printf("constexpr size_t kNumBytes = %u;\n", bw.pos);
+// printf("constexpr uint8_t kBytes[] = {");
+// int count = 0;
+// for (unsigned int i = 0; i < bw.pos; ++i) {
+// if (count++ % 12 == 0) {
+// printf("\n ");
+// } else {
+// printf(" ");
+// }
+// printf("0x%02x,", bw_buffer[i]);
+// }
+// printf("\n};\n");
+
+constexpr size_t kNumBytesTestReadSymbol14 = 3455;
+constexpr uint8_t kBytesTestReadSymbol14[] = {
+ 0x0a, 0xef, 0xeb, 0xb5, 0x78, 0x91, 0x0b, 0x9d, 0xee, 0x99, 0x14, 0x9c,
+ 0xf4, 0x58, 0x86, 0xe8, 0x69, 0x7f, 0x06, 0x07, 0x60, 0xb0, 0x79, 0xbe,
+ 0xea, 0xe5, 0x69, 0x1c, 0x67, 0x7a, 0x75, 0x91, 0x2f, 0x1d, 0x49, 0x4e,
+ 0x15, 0x40, 0x56, 0x15, 0xa1, 0xff, 0x72, 0x2d, 0xa5, 0x40, 0x81, 0x21,
+ 0x3d, 0x06, 0x78, 0xd2, 0x62, 0x8a, 0xf2, 0x63, 0x50, 0x9d, 0xbd, 0xa0,
+ 0xd4, 0x14, 0x42, 0x76, 0x4f, 0x44, 0xbe, 0xb2, 0xa1, 0x0d, 0x4c, 0x75,
+ 0xe4, 0x4a, 0xed, 0xf9, 0x7e, 0xb8, 0x7b, 0x5a, 0x26, 0x78, 0x5f, 0xe3,
+ 0x86, 0x72, 0x64, 0x48, 0x76, 0x51, 0x7a, 0x77, 0x3b, 0xcf, 0xa2, 0x8d,
+ 0x31, 0xec, 0xc1, 0xa7, 0xf9, 0x9a, 0x76, 0x00, 0x7c, 0x17, 0x40, 0x03,
+ 0x12, 0xe8, 0xed, 0xbf, 0x39, 0xe2, 0xdd, 0x6d, 0xdc, 0xe2, 0x34, 0xdf,
+ 0x0d, 0xa6, 0x86, 0x22, 0xca, 0x86, 0x5f, 0x57, 0x25, 0xc6, 0x57, 0x60,
+ 0xc3, 0x06, 0xe9, 0xf0, 0x06, 0xd4, 0xc0, 0xb3, 0xfc, 0x5b, 0xcd, 0xa9,
+ 0xc0, 0x51, 0x6e, 0x10, 0x0a, 0x5a, 0xfd, 0xbf, 0x92, 0xc8, 0x21, 0x0e,
+ 0x83, 0x74, 0xfe, 0x01, 0xec, 0x24, 0x61, 0x9d, 0x9e, 0xb8, 0xb2, 0x04,
+ 0xa7, 0xe9, 0xd6, 0xc7, 0x79, 0x5b, 0xaa, 0xdd, 0x94, 0x5d, 0x26, 0x61,
+ 0x0b, 0xee, 0x66, 0xf4, 0xb2, 0xd1, 0x9b, 0xf0, 0xb4, 0x9b, 0x50, 0x4c,
+ 0x4a, 0x57, 0xbc, 0xfe, 0x7e, 0xca, 0xfe, 0xa8, 0x22, 0x1b, 0x2f, 0x4a,
+ 0x26, 0x32, 0x96, 0xfd, 0x03, 0x02, 0x1b, 0x7c, 0x1d, 0x6d, 0x42, 0x48,
+ 0x2b, 0x11, 0x0d, 0x8f, 0x40, 0xb8, 0x15, 0xf1, 0xdd, 0x06, 0xf7, 0xa0,
+ 0x1f, 0x0f, 0x75, 0xb1, 0x53, 0x73, 0x1f, 0xbf, 0x97, 0xf7, 0xa0, 0xcb,
+ 0x5b, 0x98, 0xb7, 0x50, 0xa7, 0xc5, 0x23, 0x9b, 0x16, 0x0a, 0x2e, 0x03,
+ 0x68, 0x3a, 0x92, 0x75, 0xb8, 0xb0, 0xd8, 0xda, 0x2e, 0x82, 0x61, 0x3f,
+ 0xa0, 0x6e, 0x78, 0xe5, 0x7d, 0x14, 0xe5, 0x1f, 0x7b, 0xec, 0xb5, 0x14,
+ 0xb7, 0xa0, 0x72, 0xdc, 0x1a, 0x23, 0xa4, 0x5b, 0xc5, 0xc2, 0x75, 0x6a,
+ 0x7c, 0x36, 0xef, 0xf0, 0xd1, 0x5a, 0x34, 0x31, 0x0b, 0xae, 0x4c, 0x07,
+ 0xc2, 0xb7, 0xab, 0xd5, 0x67, 0xed, 0x65, 0x5e, 0xa0, 0x7e, 0x16, 0x04,
+ 0xc6, 0x1b, 0x74, 0x0f, 0xa9, 0x35, 0xe8, 0x71, 0x83, 0xca, 0xc3, 0x21,
+ 0x74, 0xf5, 0xee, 0x71, 0xd1, 0x4c, 0xa2, 0x1d, 0xce, 0x16, 0x4b, 0x9b,
+ 0xb0, 0x9f, 0x42, 0x08, 0x49, 0x6a, 0x82, 0x66, 0xe8, 0xb2, 0xce, 0xfd,
+ 0x8e, 0xdb, 0x9e, 0x9e, 0xeb, 0x4b, 0x3d, 0xbb, 0xab, 0x61, 0xe4, 0x0d,
+ 0x87, 0x8e, 0xe9, 0x7b, 0xe8, 0x57, 0x70, 0x8c, 0xab, 0x0c, 0x0f, 0x05,
+ 0x4b, 0xca, 0x6d, 0xe7, 0x94, 0x2b, 0x29, 0x28, 0xfd, 0xfa, 0x11, 0x4c,
+ 0x08, 0x51, 0xce, 0x45, 0x70, 0x87, 0x2b, 0xcf, 0x88, 0x80, 0x87, 0x38,
+ 0x80, 0x5d, 0x2e, 0x8f, 0x47, 0xd8, 0x5e, 0x75, 0x66, 0xa7, 0x86, 0x5e,
+ 0x98, 0xd4, 0x1b, 0x00, 0x11, 0xcf, 0x7b, 0xef, 0x8b, 0x17, 0x93, 0xe0,
+ 0x3a, 0x90, 0x7d, 0x0b, 0x45, 0x34, 0x2a, 0x67, 0xa4, 0x0e, 0xab, 0xc3,
+ 0x3b, 0x27, 0x68, 0x03, 0x4d, 0xcb, 0xd5, 0x87, 0x53, 0x37, 0xe5, 0xcc,
+ 0xc3, 0x73, 0x4a, 0x2c, 0x5f, 0xdc, 0x8d, 0xba, 0x6c, 0x11, 0xa0, 0x35,
+ 0xc6, 0xbe, 0xd9, 0xd6, 0x64, 0x2e, 0x4b, 0x85, 0xbf, 0x50, 0xdd, 0xa6,
+ 0xa0, 0xa4, 0x23, 0xd7, 0x82, 0xb6, 0x65, 0x4e, 0xa8, 0xd4, 0x19, 0xa1,
+ 0xe4, 0xc8, 0x4d, 0x69, 0x2a, 0x41, 0x4f, 0x1e, 0x46, 0xb1, 0xde, 0x64,
+ 0x0b, 0xf8, 0x62, 0xfe, 0x27, 0xc5, 0x2e, 0x31, 0x0f, 0x40, 0xae, 0x64,
+ 0x86, 0x2a, 0x36, 0x7e, 0x03, 0x01, 0x37, 0xf3, 0x36, 0x42, 0x3f, 0xaa,
+ 0x0b, 0xdd, 0xa9, 0x3e, 0x09, 0xe2, 0xe9, 0xea, 0x15, 0x5b, 0x0d, 0x4b,
+ 0xcc, 0x47, 0xa5, 0x24, 0xed, 0x0b, 0x3c, 0xb3, 0x6e, 0xc6, 0x1d, 0x47,
+ 0x39, 0x30, 0xe6, 0xf6, 0xc7, 0xae, 0x6b, 0x25, 0x09, 0xce, 0xf2, 0x2f,
+ 0xaf, 0x4d, 0x32, 0xac, 0x4f, 0xa4, 0xff, 0x39, 0x48, 0xbb, 0xe6, 0xdf,
+ 0x93, 0x41, 0x00, 0x2a, 0x82, 0xd9, 0x81, 0x79, 0xc4, 0x65, 0xf3, 0x62,
+ 0x17, 0x18, 0x37, 0xcf, 0xa0, 0xaa, 0xe5, 0xc6, 0x97, 0x84, 0x14, 0x1c,
+ 0x7e, 0x36, 0x72, 0xe2, 0x35, 0x84, 0x39, 0x43, 0x7b, 0xbf, 0xaf, 0x94,
+ 0x9a, 0xa2, 0xeb, 0xf9, 0xc4, 0x5c, 0x49, 0x5a, 0xef, 0x6b, 0xe6, 0x19,
+ 0x0e, 0xac, 0x08, 0x43, 0x4d, 0x5a, 0x14, 0x7e, 0x27, 0x4a, 0xd1, 0x4a,
+ 0x9b, 0x3f, 0xdc, 0x98, 0x5a, 0xcb, 0x40, 0x90, 0xdf, 0x56, 0xa1, 0x76,
+ 0x12, 0x71, 0xe1, 0x20, 0x5e, 0xf1, 0xaa, 0xd7, 0xba, 0x6c, 0xfb, 0x1d,
+ 0x20, 0xfe, 0xa0, 0x41, 0x65, 0x09, 0x5f, 0x8b, 0xde, 0x20, 0xb7, 0x26,
+ 0xd5, 0xce, 0x83, 0x14, 0x0d, 0x28, 0x36, 0x86, 0xe1, 0x02, 0x86, 0xde,
+ 0xf3, 0xc6, 0x44, 0x10, 0x04, 0x84, 0x9f, 0x18, 0x9b, 0xf1, 0x0a, 0xca,
+ 0x41, 0x53, 0xa9, 0xa9, 0x6b, 0xa5, 0x95, 0x22, 0x1d, 0x17, 0x3b, 0xc0,
+ 0x5f, 0xb7, 0x5e, 0xac, 0x73, 0x4e, 0x76, 0xaf, 0x4c, 0xb4, 0x4f, 0xf6,
+ 0x3f, 0xa1, 0x20, 0x2e, 0xf7, 0xa8, 0x14, 0x0d, 0xc3, 0x50, 0x97, 0x25,
+ 0xe0, 0xc4, 0x5c, 0x3e, 0xe6, 0xbe, 0xe9, 0xa4, 0x1e, 0x1d, 0xdb, 0x06,
+ 0xc1, 0x15, 0xf2, 0x6d, 0xbf, 0x71, 0xf2, 0x0b, 0xd9, 0x75, 0x4b, 0x38,
+ 0xf5, 0xe2, 0x69, 0x0d, 0x93, 0xa5, 0x8e, 0x4c, 0xc5, 0x2a, 0xb6, 0x45,
+ 0x60, 0x77, 0xd6, 0x14, 0x39, 0x5e, 0x70, 0x9e, 0x8d, 0x07, 0x20, 0x1c,
+ 0x05, 0xc9, 0xb0, 0x46, 0xf7, 0x6c, 0x3e, 0xf8, 0xf8, 0x0a, 0xad, 0x0b,
+ 0x22, 0x5e, 0x32, 0xbd, 0x46, 0xbc, 0x06, 0x7b, 0x92, 0x36, 0x5a, 0x2b,
+ 0xac, 0x68, 0x2d, 0x5a, 0xf4, 0xc2, 0x61, 0xe3, 0x9d, 0xf4, 0x5d, 0x59,
+ 0x59, 0x98, 0xb7, 0x5a, 0x73, 0x08, 0xf6, 0x4f, 0x0a, 0x75, 0x04, 0x93,
+ 0xc1, 0xe1, 0x9b, 0xe0, 0xb0, 0x2a, 0xf7, 0xdd, 0x8b, 0xae, 0xf5, 0x55,
+ 0x28, 0x6b, 0x21, 0x9b, 0x02, 0x43, 0xbd, 0x36, 0x4d, 0xa5, 0x17, 0xbb,
+ 0x97, 0xd4, 0x78, 0x1f, 0xe8, 0xd9, 0x98, 0x0e, 0x41, 0x96, 0x52, 0xab,
+ 0xad, 0x91, 0x92, 0xae, 0x62, 0x5c, 0xe7, 0xeb, 0x24, 0x1b, 0xe8, 0x2a,
+ 0xb2, 0xe8, 0xdc, 0x34, 0x7f, 0xe9, 0xa1, 0x4c, 0x4c, 0x13, 0xeb, 0x31,
+ 0x29, 0xc3, 0xc4, 0xf5, 0xb4, 0x50, 0xb1, 0x8b, 0x08, 0xc3, 0x30, 0xf8,
+ 0x40, 0xd8, 0x76, 0xd5, 0x4d, 0xf0, 0xc2, 0xd8, 0x67, 0x75, 0x01, 0x81,
+ 0x2a, 0xe0, 0x6b, 0xc0, 0xf5, 0x30, 0x55, 0xb6, 0xa9, 0x52, 0x19, 0xc4,
+ 0x73, 0x78, 0xc4, 0x9e, 0x13, 0x5f, 0xa7, 0x56, 0xb4, 0x07, 0x2c, 0x92,
+ 0x85, 0x66, 0x5d, 0x00, 0x47, 0x32, 0x3c, 0x8b, 0xbf, 0x86, 0x9e, 0xe2,
+ 0xfd, 0xf1, 0xf0, 0x15, 0x5a, 0x16, 0x44, 0xbc, 0x65, 0x7a, 0x8d, 0x78,
+ 0x0c, 0xf9, 0x94, 0x1d, 0x83, 0x7c, 0xee, 0xc7, 0x71, 0x23, 0x42, 0x2d,
+ 0xb3, 0xe4, 0x68, 0x31, 0xec, 0x17, 0x63, 0x27, 0xe3, 0x52, 0x9d, 0xd0,
+ 0xcd, 0xd8, 0xd8, 0x86, 0xb4, 0x91, 0x8a, 0xa3, 0xcb, 0xa3, 0x76, 0xc7,
+ 0x98, 0xda, 0xd6, 0xb8, 0x34, 0x1c, 0xf6, 0x72, 0x23, 0xd8, 0x1b, 0xbe,
+ 0x2d, 0x05, 0xe1, 0x83, 0x01, 0x74, 0xc7, 0xe3, 0x54, 0x85, 0xec, 0xec,
+ 0xfb, 0x3a, 0xa2, 0xf3, 0x21, 0x7a, 0x0b, 0x68, 0x91, 0x02, 0xd2, 0xa4,
+ 0x40, 0x21, 0xef, 0x4f, 0xe5, 0x3d, 0x6d, 0x6e, 0xfb, 0xba, 0xb1, 0x90,
+ 0x4f, 0x81, 0x07, 0x27, 0x5e, 0xa8, 0xab, 0xa8, 0x87, 0x38, 0x3c, 0xe5,
+ 0x48, 0x29, 0x9e, 0x77, 0x4c, 0xb4, 0x9d, 0x91, 0x2d, 0x8a, 0x0a, 0x84,
+ 0xdd, 0x93, 0x95, 0xdf, 0xd4, 0xa3, 0x8f, 0xb7, 0xaf, 0x07, 0xd3, 0x81,
+ 0xbb, 0x0d, 0x89, 0x42, 0x92, 0x0b, 0x66, 0x39, 0x8b, 0x99, 0x36, 0x61,
+ 0xbb, 0xe1, 0x05, 0xca, 0x68, 0xc8, 0x0f, 0xae, 0x9e, 0x7d, 0x75, 0x7f,
+ 0x24, 0xef, 0xdc, 0x97, 0x8d, 0xb9, 0xa5, 0x7a, 0x3c, 0xc4, 0x49, 0x79,
+ 0x47, 0x47, 0x61, 0x88, 0xaf, 0x96, 0x08, 0x11, 0x22, 0xff, 0xb7, 0x14,
+ 0x12, 0x15, 0x14, 0x26, 0xa3, 0x03, 0x0e, 0xb2, 0xff, 0x57, 0x9e, 0xc0,
+ 0x92, 0x4f, 0x4c, 0x69, 0xd4, 0xfe, 0xc1, 0x46, 0xc4, 0xe8, 0x64, 0x7f,
+ 0x08, 0x38, 0x90, 0x15, 0x8f, 0xc2, 0xc8, 0xa8, 0x50, 0x7f, 0x74, 0x4a,
+ 0xc3, 0x37, 0x52, 0x44, 0x25, 0x78, 0x19, 0x48, 0x00, 0xd1, 0x39, 0x43,
+ 0x3a, 0x14, 0x72, 0x8c, 0x8e, 0xa2, 0xf8, 0x95, 0x1e, 0x56, 0x07, 0xdd,
+ 0xcd, 0x89, 0xde, 0x71, 0xc3, 0x85, 0xc3, 0xcf, 0xe4, 0x6c, 0xf4, 0x43,
+ 0x95, 0x49, 0x27, 0x25, 0x35, 0x1a, 0xb9, 0xf7, 0xc8, 0x20, 0xeb, 0x01,
+ 0xbb, 0x49, 0x8d, 0xf4, 0xc0, 0x32, 0xbe, 0x74, 0x42, 0x07, 0x53, 0xd0,
+ 0xf4, 0x4c, 0x79, 0xa8, 0xb7, 0xf9, 0x09, 0xfd, 0xeb, 0x02, 0x83, 0x26,
+ 0x3b, 0x88, 0x1a, 0x41, 0x70, 0x95, 0x2f, 0x53, 0xc1, 0xc1, 0xa5, 0xbe,
+ 0x23, 0x32, 0x8b, 0x48, 0xb8, 0xff, 0x4c, 0x6b, 0x6e, 0xbf, 0xd7, 0xe0,
+ 0xf1, 0x3a, 0xfd, 0xd2, 0x1e, 0xa2, 0x11, 0x50, 0xa0, 0xfe, 0xd2, 0x3d,
+ 0x20, 0xa6, 0x79, 0xdd, 0x32, 0xd2, 0x76, 0x44, 0xb6, 0x28, 0x2a, 0x13,
+ 0x76, 0x4e, 0x57, 0x92, 0xa5, 0x01, 0x64, 0x30, 0x06, 0xf1, 0xba, 0x62,
+ 0x5a, 0x59, 0xab, 0xf2, 0x15, 0xef, 0x3c, 0x24, 0x96, 0x14, 0x6f, 0xd4,
+ 0x51, 0xee, 0x6d, 0xeb, 0x77, 0xad, 0xba, 0x03, 0xe0, 0xd2, 0x30, 0xbd,
+ 0xbf, 0x06, 0x14, 0xa3, 0xad, 0xd7, 0x97, 0x20, 0x89, 0x63, 0x8f, 0x84,
+ 0x0d, 0x87, 0x6d, 0x5b, 0xdf, 0x0c, 0x2d, 0x86, 0x77, 0x6b, 0x73, 0xd6,
+ 0x34, 0x83, 0xe5, 0x15, 0x88, 0x3e, 0xbc, 0x4d, 0x2c, 0x96, 0xd1, 0x1a,
+ 0x81, 0xf1, 0xb4, 0x6c, 0xaa, 0x52, 0x3a, 0x53, 0x52, 0xc6, 0x73, 0x1b,
+ 0xe6, 0xaa, 0xd5, 0xc8, 0x91, 0xee, 0x72, 0xad, 0x66, 0x25, 0x61, 0xbd,
+ 0xa7, 0x15, 0x46, 0x5d, 0x76, 0x4a, 0x47, 0x9b, 0x03, 0x44, 0xe5, 0x0c,
+ 0xe8, 0x51, 0xca, 0x32, 0x3a, 0x8b, 0xe2, 0x54, 0x79, 0x4d, 0x51, 0x4e,
+ 0xbb, 0x44, 0x2c, 0x30, 0xd1, 0xe6, 0xa1, 0xc9, 0x2c, 0x28, 0xdf, 0xa8,
+ 0xa3, 0xdc, 0xdb, 0xd6, 0xef, 0x5b, 0x74, 0x07, 0xc1, 0xa4, 0x55, 0x37,
+ 0xc6, 0xfc, 0xde, 0xf2, 0x35, 0xb3, 0xf2, 0x3f, 0xe8, 0x0c, 0xbe, 0x60,
+ 0x72, 0x56, 0xde, 0x5f, 0x0d, 0xdd, 0x2e, 0x67, 0x63, 0x31, 0x23, 0xbc,
+ 0xbe, 0x8d, 0x47, 0xdd, 0xa0, 0x38, 0xab, 0x04, 0xd7, 0xb7, 0x07, 0xf9,
+ 0x5d, 0x5e, 0x27, 0xd0, 0x6e, 0xda, 0x01, 0xda, 0x8b, 0x3d, 0xe9, 0x89,
+ 0xe4, 0xbb, 0xeb, 0x3d, 0xd2, 0xb1, 0x16, 0x16, 0xe6, 0x49, 0xb6, 0x28,
+ 0x02, 0xc3, 0xd0, 0x57, 0x17, 0x4f, 0x2a, 0x9b, 0x42, 0x74, 0x1d, 0x38,
+ 0xc4, 0x19, 0xdd, 0xad, 0xcf, 0x58, 0xd2, 0x0f, 0x94, 0x56, 0x20, 0xfa,
+ 0xf1, 0x34, 0xb2, 0x5b, 0x44, 0x6a, 0x07, 0xc6, 0xd1, 0xb2, 0xa9, 0x48,
+ 0xe9, 0x4d, 0x4b, 0x19, 0xcc, 0x6f, 0x9a, 0xab, 0x57, 0x22, 0x47, 0xb9,
+ 0xca, 0xb5, 0x98, 0x88, 0x58, 0x15, 0xe1, 0x37, 0x7b, 0x18, 0xdc, 0xea,
+ 0x45, 0xad, 0xc7, 0xc3, 0xb4, 0xeb, 0xcb, 0x85, 0x2c, 0x31, 0xa6, 0x5e,
+ 0x6a, 0x9d, 0xb6, 0x45, 0x19, 0x42, 0x5a, 0x2d, 0xe7, 0x15, 0x99, 0x8d,
+ 0xe5, 0x5b, 0x09, 0x52, 0x8e, 0x4d, 0xf1, 0xec, 0xb3, 0xb1, 0xf5, 0xfe,
+ 0x79, 0xb0, 0x4a, 0x4f, 0xb6, 0xbe, 0x18, 0x84, 0xe6, 0xaa, 0xb0, 0xe5,
+ 0x76, 0x3c, 0x35, 0x51, 0xd2, 0xa6, 0xf3, 0xfb, 0xe3, 0x1b, 0xf5, 0xc4,
+ 0x4f, 0x56, 0x3a, 0xc7, 0x41, 0x8d, 0xd7, 0x9e, 0x1e, 0xc9, 0x9c, 0xd8,
+ 0xd4, 0xe3, 0x4f, 0xb5, 0xfd, 0x78, 0x5e, 0x60, 0xff, 0xd3, 0xdc, 0x00,
+ 0xd6, 0x02, 0xba, 0x09, 0x8b, 0x93, 0xc9, 0xb4, 0x8e, 0x4e, 0x21, 0x27,
+ 0x5e, 0x89, 0x6c, 0x31, 0x79, 0xfc, 0xf0, 0xd8, 0xac, 0x48, 0x52, 0x7d,
+ 0xae, 0xc8, 0x4b, 0xef, 0x06, 0xde, 0xa4, 0xd3, 0x01, 0x46, 0xb2, 0xd6,
+ 0x28, 0x45, 0xd9, 0xcb, 0x63, 0x32, 0x19, 0x3e, 0xbf, 0x13, 0x99, 0x7f,
+ 0xdd, 0x0b, 0x25, 0x72, 0x57, 0x7a, 0x89, 0x68, 0xa4, 0xde, 0x98, 0xfc,
+ 0xa8, 0xbc, 0xf2, 0xc1, 0x82, 0x28, 0x59, 0xf7, 0x6b, 0x83, 0x60, 0x57,
+ 0x84, 0xdd, 0xec, 0x63, 0x73, 0xa9, 0x16, 0xb7, 0x1f, 0x0e, 0xd3, 0xaf,
+ 0x2e, 0x14, 0xb0, 0xc6, 0x99, 0x79, 0xaa, 0x76, 0xd9, 0x14, 0x65, 0x09,
+ 0x68, 0xb7, 0x9c, 0x56, 0x66, 0x37, 0x95, 0x6c, 0x25, 0x4a, 0x39, 0x37,
+ 0xc7, 0xb2, 0xce, 0xc7, 0xd7, 0xf9, 0xe6, 0xc1, 0x29, 0x3e, 0xda, 0xf8,
+ 0x62, 0x13, 0x9a, 0xaa, 0xc3, 0x95, 0xd8, 0xf0, 0xd5, 0x47, 0x4a, 0x9b,
+ 0xcf, 0xef, 0x8c, 0x6f, 0xd7, 0x11, 0x3d, 0x58, 0xeb, 0x1d, 0x06, 0x37,
+ 0x5e, 0x78, 0x7b, 0x26, 0x73, 0x63, 0x53, 0x8d, 0x3e, 0xd7, 0xf5, 0xe1,
+ 0x79, 0x83, 0xff, 0x4f, 0x70, 0x03, 0x58, 0x0a, 0xe8, 0x26, 0x2e, 0x4f,
+ 0x26, 0xd2, 0x39, 0x38, 0x84, 0x9d, 0x7a, 0x25, 0xb0, 0xc5, 0xe7, 0xf3,
+ 0xc3, 0x62, 0xb1, 0x21, 0x49, 0xf6, 0xbb, 0x21, 0x2f, 0xbc, 0x1b, 0x7a,
+ 0x93, 0x4c, 0x05, 0x1a, 0xcb, 0x58, 0xa1, 0x17, 0x67, 0x2d, 0x8c, 0xc8,
+ 0x64, 0xfa, 0xfc, 0x4e, 0x65, 0xff, 0x74, 0x2c, 0x95, 0xc9, 0x5d, 0xea,
+ 0x25, 0xa2, 0x93, 0x7a, 0x63, 0xf2, 0xa2, 0xf3, 0xcb, 0x06, 0x08, 0xa1,
+ 0x67, 0xdd, 0xae, 0x0d, 0x81, 0x5e, 0x13, 0x77, 0xb1, 0x8d, 0xce, 0xa4,
+ 0x5a, 0xdc, 0x7c, 0x3b, 0x4e, 0xbc, 0xb8, 0x52, 0xc3, 0x1a, 0x65, 0xe6,
+ 0xa9, 0xdb, 0x64, 0x51, 0x94, 0x25, 0xa2, 0xde, 0x71, 0x59, 0x98, 0xde,
+ 0x55, 0xb0, 0x95, 0x28, 0xe4, 0xdf, 0x1e, 0xcb, 0x3b, 0x1f, 0x5f, 0xe7,
+ 0x9b, 0x04, 0xa4, 0xfb, 0x6b, 0xe1, 0x88, 0x4e, 0x6a, 0xab, 0x0e, 0x57,
+ 0x63, 0xc3, 0x55, 0x1d, 0x2a, 0x6f, 0x3f, 0xbe, 0x31, 0xbf, 0x5c, 0x44,
+ 0xf5, 0x63, 0xac, 0x74, 0x18, 0xdd, 0x79, 0xe1, 0xec, 0x99, 0xcd, 0x8d,
+ 0x4e, 0x34, 0xfb, 0x5f, 0xd7, 0x85, 0xe6, 0x0f, 0xfd, 0x3d, 0xc0, 0x0d,
+ 0x60, 0x2b, 0xa0, 0x98, 0xb9, 0x3c, 0x9b, 0x48, 0xe4, 0xe2, 0x12, 0x75,
+ 0xe8, 0x96, 0xc3, 0x17, 0x9f, 0xcf, 0x0d, 0x8a, 0xc4, 0x85, 0x27, 0xda,
+ 0xec, 0x84, 0xbe, 0xf0, 0x6d, 0xea, 0x4d, 0x30, 0x14, 0x6b, 0x2d, 0x62,
+ 0x84, 0x5d, 0x9c, 0xb6, 0x33, 0x21, 0x93, 0xeb, 0xf1, 0x39, 0x97, 0xfd,
+ 0xd0, 0xb2, 0x57, 0x25, 0x77, 0xa8, 0x96, 0x8a, 0x4d, 0xe9, 0x8f, 0xca,
+ 0x8b, 0xcf, 0x2c, 0x18, 0x22, 0x85, 0x9f, 0x76, 0xb8, 0x36, 0x05, 0x78,
+ 0x4d, 0xde, 0xc6, 0x37, 0x3a, 0x91, 0x6b, 0x71, 0xf0, 0xed, 0x3a, 0xf2,
+ 0xe1, 0x4b, 0x0c, 0x69, 0x97, 0x9a, 0xa7, 0x6d, 0x91, 0x46, 0x50, 0x96,
+ 0x8b, 0x79, 0xc5, 0x66, 0x63, 0x79, 0x56, 0xc2, 0x54, 0xa3, 0x93, 0x7c,
+ 0x7b, 0x2c, 0xec, 0x7d, 0x7f, 0x9e, 0x6c, 0x12, 0x93, 0xed, 0xaf, 0x86,
+ 0x21, 0x39, 0xaa, 0xac, 0x39, 0x5d, 0x8f, 0x0d, 0x54, 0x74, 0xa9, 0xbc,
+ 0xfe, 0xf8, 0xc6, 0xfd, 0x71, 0x13, 0xd5, 0x8e, 0xb1, 0xd0, 0x63, 0x75,
+ 0xe7, 0x87, 0xb2, 0x67, 0x36, 0x35, 0x38, 0xd3, 0xed, 0x7f, 0x5e, 0x17,
+ 0x98, 0x3f, 0xf4, 0xf7, 0x00, 0x35, 0x80, 0xae, 0x82, 0x62, 0xe4, 0xf2,
+ 0x6d, 0x23, 0x93, 0x88, 0x49, 0xd7, 0xa2, 0x5b, 0x0c, 0x5e, 0x7f, 0x3c,
+ 0x36, 0x2b, 0x12, 0x14, 0x9f, 0x6b, 0xb2, 0x12, 0xfb, 0xc1, 0xb7, 0xa9,
+ 0x34, 0xc0, 0x51, 0xac, 0xb5, 0x8a, 0x11, 0x76, 0x72, 0xd8, 0xcc, 0x86,
+ 0x4f, 0xaf, 0xc4, 0xe6, 0x5f, 0xf7, 0x42, 0xc9, 0x5c, 0x95, 0xde, 0xa2,
+ 0x5a, 0x29, 0x37, 0xa6, 0x3f, 0x2a, 0x2f, 0x3c, 0xb0, 0x60, 0x8a, 0x16,
+ 0x7d, 0xda, 0xe0, 0xd8, 0x15, 0xe1, 0x37, 0x7b, 0x18, 0xdc, 0xea, 0x45,
+ 0xad, 0xc7, 0xc3, 0xb4, 0xeb, 0xcb, 0x85, 0x2c, 0x31, 0xa6, 0x5e, 0x6a,
+ 0x9d, 0xb6, 0x45, 0x19, 0x42, 0x5a, 0x2d, 0xe7, 0x15, 0x99, 0x8d, 0xe5,
+ 0x5b, 0x09, 0x52, 0x8e, 0x4d, 0xf1, 0xec, 0xb3, 0xb1, 0xf5, 0xfe, 0x79,
+ 0xb0, 0x4a, 0x4f, 0xb6, 0xbe, 0x18, 0x84, 0xe6, 0xaa, 0xb0, 0xe5, 0x76,
+ 0x3c, 0x35, 0x51, 0xd2, 0xa6, 0xf3, 0xfb, 0xe3, 0x1b, 0xf5, 0xc4, 0x4f,
+ 0x56, 0x3a, 0xc7, 0x41, 0x8d, 0xd7, 0x9e, 0x1e, 0xc9, 0x9c, 0xd8, 0xd4,
+ 0xe3, 0x4f, 0xb5, 0xfd, 0x78, 0x5e, 0x60, 0xff, 0xd3, 0xdc, 0x00, 0xd6,
+ 0x02, 0xba, 0x09, 0x8b, 0x93, 0xc9, 0xb4, 0x8e, 0x4e, 0x21, 0x27, 0x5e,
+ 0x89, 0x6c, 0x31, 0x79, 0xfc, 0xf0, 0xd8, 0xac, 0x48, 0x52, 0x7d, 0xae,
+ 0xc8, 0x4b, 0xef, 0x06, 0xde, 0xa4, 0xd3, 0x01, 0x46, 0xb2, 0xd6, 0x28,
+ 0x45, 0xd9, 0xcb, 0x63, 0x32, 0x19, 0x3e, 0xbf, 0x13, 0x99, 0x7f, 0xdd,
+ 0x0b, 0x25, 0x72, 0x57, 0x7a, 0x89, 0x68, 0xa4, 0xde, 0x98, 0xfc, 0xa8,
+ 0xbc, 0xf2, 0xc1, 0x82, 0x28, 0x59, 0xf7, 0x6b, 0x83, 0x60, 0x57, 0x84,
+ 0xdd, 0xec, 0x63, 0x73, 0xa9, 0x16, 0xb7, 0x1f, 0x0e, 0xd3, 0xaf, 0x2e,
+ 0x14, 0xb0, 0xc6, 0x99, 0x79, 0xaa, 0x76, 0xd9, 0x14, 0x65, 0x09, 0x68,
+ 0xb7, 0x9c, 0x56, 0x66, 0x37, 0x95, 0x6c, 0x25, 0x4a, 0x39, 0x37, 0xc7,
+ 0xb2, 0xce, 0xc7, 0xd7, 0xf9, 0xe6, 0xc1, 0x29, 0x3e, 0xda, 0xf8, 0x62,
+ 0x13, 0x9a, 0xaa, 0xc3, 0x95, 0xd8, 0xf0, 0xd5, 0x47, 0x4a, 0x9b, 0xcf,
+ 0xef, 0x8c, 0x6f, 0xd7, 0x11, 0x3d, 0x58, 0xeb, 0x1d, 0x06, 0x37, 0x5e,
+ 0x78, 0x7b, 0x26, 0x73, 0x63, 0x53, 0x8d, 0x3e, 0xd7, 0xf5, 0xe1, 0x79,
+ 0x83, 0xff, 0x4f, 0x70, 0x03, 0x58, 0x0a, 0xe8, 0x26, 0x2e, 0x4f, 0x26,
+ 0xd2, 0x39, 0x38, 0x84, 0x9d, 0x7a, 0x25, 0xb0, 0xc5, 0xe7, 0xf3, 0xc3,
+ 0x62, 0xb1, 0x21, 0x49, 0xf6, 0xbb, 0x21, 0x2f, 0xbc, 0x1b, 0x7a, 0x93,
+ 0x4c, 0x05, 0x1a, 0xcb, 0x58, 0xa1, 0x17, 0x67, 0x2d, 0x8c, 0xc8, 0x64,
+ 0xfa, 0xfc, 0x4e, 0x65, 0xff, 0x74, 0x2c, 0x95, 0xc9, 0x5d, 0xea, 0x25,
+ 0xa2, 0x93, 0x7a, 0x63, 0xf2, 0xa2, 0xf3, 0xcb, 0x06, 0x08, 0xa1, 0x67,
+ 0xdd, 0xae, 0x0d, 0x81, 0x5e, 0x13, 0x77, 0xb1, 0x8d, 0xce, 0xa4, 0x5a,
+ 0xdc, 0x7c, 0x3b, 0x4e, 0xbc, 0xb8, 0x52, 0xc3, 0x1a, 0x65, 0xe6, 0xa9,
+ 0xdb, 0x64, 0x51, 0x94, 0x25, 0xa2, 0xde, 0x71, 0x59, 0x98, 0xde, 0x55,
+ 0xb0, 0x95, 0x28, 0xe4, 0xdf, 0x1e, 0xcb, 0x3b, 0x1f, 0x5f, 0xe7, 0x9b,
+ 0x04, 0xa4, 0xfb, 0x6b, 0xe1, 0x88, 0x4e, 0x6a, 0xab, 0x0e, 0x57, 0x63,
+ 0xc3, 0x55, 0x1d, 0x2a, 0x6f, 0x3f, 0xbe, 0x31, 0xbf, 0x5c, 0x44, 0xf5,
+ 0x63, 0xac, 0x74, 0x18, 0xdd, 0x79, 0xe1, 0xec, 0x99, 0xcd, 0x8d, 0x4e,
+ 0x34, 0xfb, 0x5f, 0xd7, 0x85, 0xe6, 0x0f, 0xfd, 0x3d, 0xc0, 0x0d, 0x60,
+ 0x2b, 0xa0, 0x98, 0xb9, 0x3c, 0x9b, 0x48, 0xe4, 0xe2, 0x12, 0x75, 0xe8,
+ 0x96, 0xc3, 0x17, 0x9f, 0xcf, 0x0d, 0x8a, 0xc4, 0x85, 0x27, 0xda, 0xec,
+ 0x84, 0xbe, 0xf0, 0x6d, 0xea, 0x4d, 0x30, 0x14, 0x6b, 0x2d, 0x62, 0x84,
+ 0x5d, 0x9c, 0xb6, 0x33, 0x21, 0x93, 0xeb, 0xf1, 0x39, 0x97, 0xfd, 0xd0,
+ 0xb2, 0x57, 0x25, 0x77, 0xa8, 0x96, 0x8a, 0x4d, 0xe9, 0x8f, 0xca, 0x8b,
+ 0xcf, 0x2c, 0x18, 0x22, 0x85, 0x9f, 0x76, 0xb8, 0x36, 0x05, 0x78, 0x4d,
+ 0xde, 0xc6, 0x37, 0x3a, 0x91, 0x6b, 0x71, 0xf0, 0xed, 0x3a, 0xf2, 0xe1,
+ 0x4b, 0x0c, 0x69, 0x97, 0x9a, 0xa7, 0x6d, 0x91, 0x46, 0x50, 0x96, 0x8b,
+ 0x79, 0xc5, 0x66, 0x63, 0x79, 0x56, 0xc2, 0x54, 0xa3, 0x93, 0x7c, 0x7b,
+ 0x2c, 0xec, 0x7d, 0x7f, 0x9e, 0x6c, 0x12, 0x93, 0xed, 0xaf, 0x86, 0x21,
+ 0x39, 0xaa, 0xac, 0x39, 0x5d, 0x8f, 0x0d, 0x54, 0x74, 0xa9, 0xbc, 0xfe,
+ 0xf8, 0xc6, 0xfd, 0x71, 0x13, 0xd5, 0x8e, 0xb1, 0xd0, 0x63, 0x75, 0xe7,
+ 0x87, 0xb2, 0x67, 0x36, 0x35, 0x38, 0xd3, 0xed, 0x7f, 0x5e, 0x17, 0x98,
+ 0x3f, 0xf4, 0xf7, 0x00, 0x35, 0x80, 0xae, 0x82, 0x62, 0xe4, 0xf2, 0x6d,
+ 0x23, 0x93, 0x88, 0x49, 0xd7, 0xa2, 0x5b, 0x0c, 0x5e, 0x7f, 0x3c, 0x36,
+ 0x2b, 0x12, 0x14, 0x9f, 0x6b, 0xb2, 0x12, 0xfb, 0xc1, 0xb7, 0xa9, 0x34,
+ 0xc0, 0x51, 0xac, 0xb5, 0x8a, 0x11, 0x76, 0x72, 0xd8, 0xcc, 0x86, 0x4f,
+ 0xaf, 0xc4, 0xe6, 0x5f, 0xf7, 0x42, 0xc9, 0x5c, 0x95, 0xde, 0xa2, 0x5a,
+ 0x29, 0x37, 0xa6, 0x3f, 0x2a, 0x2f, 0x3c, 0xb0, 0x60, 0x8a, 0x16, 0x7d,
+ 0xda, 0xe0, 0xd8, 0x15, 0xe1, 0x37, 0x7b, 0x18, 0xdc, 0xea, 0x45, 0xad,
+ 0xc7, 0xc3, 0xb4, 0xeb, 0xcb, 0x85, 0x2c, 0x31, 0xa6, 0x5e, 0x6a, 0x9d,
+ 0xb6, 0x45, 0x19, 0x42, 0x5a, 0x2d, 0xe7, 0x15, 0x99, 0x8d, 0xe5, 0x5b,
+ 0x09, 0x52, 0x8e, 0x4d, 0xf1, 0xec, 0xb3, 0xb1, 0xf5, 0xfe, 0x79, 0xb0,
+ 0x4a, 0x4f, 0xb6, 0xbe, 0x18, 0x84, 0xe6, 0xaa, 0xb0, 0xe5, 0x76, 0x3c,
+ 0x35, 0x51, 0xd2, 0xa6, 0xf3, 0xfb, 0xe3, 0x1b, 0xf5, 0xc4, 0x4f, 0x56,
+ 0x3a, 0xc7, 0x41, 0x8d, 0xd7, 0x9e, 0x1e, 0xc9, 0x9c, 0xd8, 0xd4, 0xe3,
+ 0x4f, 0xb5, 0xfd, 0x78, 0x5e, 0x60, 0xff, 0xd3, 0xdc, 0x00, 0xd6, 0x02,
+ 0xba, 0x09, 0x8b, 0x93, 0xc9, 0xb4, 0x8e, 0x4e, 0x21, 0x27, 0x5e, 0x89,
+ 0x6c, 0x31, 0x79, 0xfc, 0xf0, 0xd8, 0xac, 0x48, 0x52, 0x7d, 0xae, 0xc8,
+ 0x4b, 0xef, 0x06, 0xde, 0xa4, 0xd3, 0x01, 0x46, 0xb2, 0xd6, 0x28, 0x45,
+ 0xd9, 0xcb, 0x63, 0x32, 0x19, 0x3e, 0xbf, 0x13, 0x99, 0x7f, 0xdd, 0x0b,
+ 0x25, 0x72, 0x57, 0x7a, 0x89, 0x68, 0xa4, 0xde, 0x98, 0xfc, 0xa8, 0xbc,
+ 0xf2, 0xc1, 0x82, 0x28, 0x59, 0xf7, 0x6b, 0x83, 0x60, 0x57, 0x84, 0xdd,
+ 0xec, 0x63, 0x73, 0xa9, 0x16, 0xb7, 0x1f, 0x0e, 0xd3, 0xaf, 0x2e, 0x14,
+ 0xb0, 0xc6, 0x99, 0x79, 0xaa, 0x76, 0xd9, 0x14, 0x65, 0x09, 0x68, 0xb7,
+ 0x9c, 0x56, 0x66, 0x37, 0x95, 0x6c, 0x25, 0x4a, 0x39, 0x37, 0xc7, 0xb2,
+ 0xce, 0xc7, 0xd7, 0xf9, 0xe6, 0xc1, 0x29, 0x3e, 0xda, 0xf8, 0x62, 0x13,
+ 0x9a, 0xaa, 0xc3, 0x95, 0xd8, 0xf0, 0xd5, 0x47, 0x4a, 0x9b, 0xcf, 0xef,
+ 0x8c, 0x6f, 0xd7, 0x11, 0x3d, 0x58, 0xeb, 0x1d, 0x06, 0x37, 0x5e, 0x78,
+ 0x7b, 0x26, 0x73, 0x63, 0x53, 0x8d, 0x3e, 0xd7, 0xf5, 0xe1, 0x79, 0x83,
+ 0xff, 0x4f, 0x70, 0x03, 0x58, 0x0a, 0xe8, 0x26, 0x2e, 0x4f, 0x26, 0xd2,
+ 0x39, 0x38, 0x84, 0x9d, 0x7a, 0x25, 0xb0, 0xc5, 0xe7, 0xf3, 0xc3, 0x62,
+ 0xb1, 0x21, 0x49, 0xf6, 0xbb, 0x21, 0x2f, 0xbc, 0x1b, 0x7a, 0x93, 0x4c,
+ 0x05, 0x1a, 0xcb, 0x58, 0xa1, 0x17, 0x67, 0x2d, 0x8c, 0xc8, 0x64, 0xfa,
+ 0xfc, 0x4e, 0x65, 0xff, 0x74, 0x2c, 0x95, 0xc9, 0x5d, 0xea, 0x25, 0xa2,
+ 0x93, 0x7a, 0x63, 0xf2, 0xa2, 0xf3, 0xcb, 0x06, 0x08, 0xa1, 0x67, 0xdd,
+ 0xae, 0x0d, 0x81, 0x5e, 0x13, 0x77, 0xb1, 0x8d, 0xce, 0xa4, 0x5a, 0xdc,
+ 0x7c, 0x3b, 0x4e, 0xbc, 0xb8, 0x52, 0xc3, 0x1a, 0x65, 0xe6, 0xa9, 0xdb,
+ 0x64, 0x51, 0x94, 0x25, 0xa2, 0xde, 0x71, 0x59, 0x98, 0xde, 0x55, 0xb0,
+ 0x95, 0x28, 0xe4, 0xdf, 0x1e, 0xcb, 0x3b, 0x1f, 0x5f, 0xe7, 0x9b, 0x04,
+ 0xa4, 0xfb, 0x6b, 0xe1, 0x88, 0x4e, 0x6a, 0xab, 0x0e, 0x57, 0x63, 0xc3,
+ 0x55, 0x1d, 0x2a, 0x6f, 0x3f, 0xbe, 0x31, 0xbf, 0x5c, 0x44, 0xf5, 0x63,
+ 0xac, 0x74, 0x18, 0xdd, 0x79, 0xe1, 0xec, 0x99, 0xcd, 0x8d, 0x4e, 0x34,
+ 0xfb, 0x5f, 0xd7, 0x85, 0xe6, 0x0f, 0xfd, 0x3d, 0xc0, 0x0d, 0x60, 0x2b,
+ 0xa0, 0x98, 0xb9, 0x3c, 0x9b, 0x48, 0xe4, 0xe2, 0x12, 0x75, 0xe8, 0x96,
+ 0xc3, 0x17, 0x9f, 0xcf, 0x0d, 0x8a, 0xc4, 0x85, 0x27, 0xda, 0xec, 0x84,
+ 0xbe, 0xf0, 0x6d, 0xea, 0x4d, 0x30, 0x14, 0x6b, 0x2d, 0x62, 0x84, 0x5d,
+ 0x9c, 0xb6, 0x33, 0x21, 0x93, 0xeb, 0xf1, 0x39, 0x97, 0xfd, 0xd0, 0xb2,
+ 0x57, 0x25, 0x77, 0xa8, 0x96, 0x8a, 0x4d, 0xe9, 0x8f, 0xca, 0x8b, 0xcf,
+ 0x2c, 0x18, 0x22, 0x85, 0x9f, 0x76, 0xb8, 0x36, 0x05, 0x78, 0x4d, 0xde,
+ 0xc6, 0x37, 0x3a, 0x91, 0x6b, 0x71, 0xf0, 0xed, 0x3a, 0xf2, 0xe1, 0x4b,
+ 0x0c, 0x69, 0x97, 0x9a, 0xa7, 0x6d, 0x91, 0x46, 0x50, 0x96, 0x8b, 0x79,
+ 0xc5, 0x66, 0x63, 0x79, 0x56, 0xc2, 0x54, 0xa3, 0x93, 0x7c, 0x7b, 0x2c,
+ 0xec, 0x7d, 0x7f, 0x9e, 0x6c, 0x12, 0x93, 0xed, 0xaf, 0x86, 0x21, 0x39,
+ 0xaa, 0xac, 0x39, 0x5d, 0x8f, 0x0d, 0x54, 0x74, 0xa9, 0xbc, 0xfe, 0xf8,
+ 0xc6, 0xfd, 0x71, 0x13, 0xd5, 0x8e, 0xb1, 0xd0, 0x63, 0x75, 0xe7, 0x87,
+ 0xb2, 0x67, 0x36, 0x35, 0x38, 0xd3, 0xed, 0x7f, 0x5e, 0x17, 0x98, 0x3f,
+ 0xf4, 0xf7, 0x00, 0x35, 0x80, 0xae, 0x82, 0x62, 0xe4, 0xf2, 0x6d, 0x23,
+ 0x93, 0x88, 0x49, 0xd7, 0xa2, 0x5b, 0x0c, 0x5e, 0x7f, 0x3c, 0x36, 0x2b,
+ 0x12, 0x14, 0x9f, 0x6b, 0xb2, 0x12, 0xfb, 0xc1, 0xb7, 0xa9, 0x34, 0xc0,
+ 0x51, 0xac, 0xb5, 0x8a, 0x11, 0x76, 0x72, 0xd8, 0xcc, 0x86, 0x4f, 0xaf,
+ 0xc4, 0xe6, 0x5f, 0xf7, 0x42, 0xc9, 0x5c, 0x95, 0xde, 0xa2, 0x70,
+};
+static_assert(sizeof(kBytesTestReadSymbol14) == kNumBytesTestReadSymbol14, "");
+
+// The kBytesTestReadSymbol16[] array was encoded by using the following libaom
+// code:
+//
+// aom_cdf_prob cdf[4][17] = {
+// // pmf: 1/16, 1/16, 1/16, 1/16, 1/16, 1/16, 1/16, 1/16, 1/16, 1/16, 1/16,
+// // 1/16, 1/16, 1/16, 1/16, 1/16
+// { 32768 - 2048, 32768 - 4096, 32768 - 6144, 32768 - 8192, 32768 - 10240,
+// 32768 - 12288, 32768 - 14336, 32768 - 16384, 32768 - 18432,
+// 32768 - 20480, 32768 - 22528, 32768 - 24576, 32768 - 26624,
+// 32768 - 28672, 32768 - 30720, 0, 0 },
+// // pmf: 3/32, 2/32, 2/32, 2/32, 2/32, 2/32, 2/32, 2/32, 2/32, 2/32, 2/32,
+// // 2/32, 2/32, 2/32, 2/32, 1/32
+// { 32768 - 3072, 32768 - 5120, 32768 - 7168, 32768 - 9216, 32768 - 11264,
+// 32768 - 13312, 32768 - 15360, 32768 - 17408, 32768 - 19456,
+// 32768 - 21504, 32768 - 23552, 32768 - 25600, 32768 - 27648,
+// 32768 - 29696, 32768 - 31744, 0, 0 },
+// // pmf: 1/32, 2/32, 2/32, 2/32, 2/32, 2/32, 2/32, 2/32, 2/32, 2/32, 2/32,
+// // 2/32, 2/32, 2/32, 2/32, 3/32
+// { 32768 - 1024, 32768 - 3072, 32768 - 5120, 32768 - 7168, 32768 - 9216,
+// 32768 - 11264, 32768 - 13312, 32768 - 15360, 32768 - 17408,
+// 32768 - 19456, 32768 - 21504, 32768 - 23552, 32768 - 25600,
+// 32768 - 27648, 32768 - 29696, 0, 0 },
+// // pmf: 1/32, 2/32, 2/32, 2/32, 2/32, 2/32, 2/32, 3/32, 3/32, 2/32, 2/32,
+// // 2/32, 2/32, 2/32, 2/32, 1/32
+// { 32768 - 1024, 32768 - 3072, 32768 - 5120, 32768 - 7168, 32768 - 9216,
+// 32768 - 11264, 32768 - 13312, 32768 - 16384, 32768 - 19456,
+// 32768 - 21504, 32768 - 23552, 32768 - 25600, 32768 - 27648,
+// 32768 - 29696, 32768 - 31744, 0, 0 },
+// };
+// constexpr int kSymbols[32][4] = { { 0, 8, 15, 7 }, //
+// { 1, 9, 14, 6 }, //
+// { 2, 10, 13, 5 }, //
+// { 3, 11, 12, 4 }, //
+// { 4, 12, 11, 3 }, //
+// { 5, 13, 10, 2 }, //
+// { 6, 14, 9, 1 }, //
+// { 7, 15, 8, 0 }, //
+// { 8, 0, 7, 15 }, //
+// { 9, 1, 6, 14 }, //
+// { 10, 2, 5, 13 }, //
+// { 11, 3, 4, 12 }, //
+// { 12, 4, 3, 11 }, //
+// { 13, 5, 2, 10 }, //
+// { 14, 6, 1, 9 }, //
+// { 15, 7, 0, 8 }, //
+// { 0, 0, 15, 13 }, //
+// { 2, 1, 14, 11 }, //
+// { 4, 3, 12, 9 }, //
+// { 6, 5, 10, 7 }, //
+// { 8, 7, 8, 5 }, //
+// { 10, 9, 6, 3 }, //
+// { 12, 11, 4, 1 }, //
+// { 14, 13, 2, 14 }, //
+// { 1, 0, 15, 12 }, //
+// { 3, 2, 13, 10 }, //
+// { 5, 4, 11, 8 }, //
+// { 7, 6, 9, 6 }, //
+// { 9, 8, 7, 4 }, //
+// { 11, 10, 5, 2 }, //
+// { 13, 12, 3, 8 }, //
+// { 15, 14, 1, 7 } };
+// const unsigned int kBufferSize = 65536;
+// uint8_t bw_buffer[kBufferSize];
+// aom_writer bw;
+// bw.allow_update_cdf = 1;
+// aom_start_encode(&bw, bw_buffer);
+// for (int i = 0; i < 48; ++i) {
+// for (int j = 0; j < 32; ++j) {
+// for (int k = 0; k < 4; ++k) {
+// aom_write_symbol(&bw, kSymbols[j][k], cdf[k], 16);
+// }
+// }
+// }
+// aom_stop_encode(&bw);
+// printf("constexpr size_t kNumBytes = %u;\n", bw.pos);
+// printf("constexpr uint8_t kBytes[] = {");
+// int count = 0;
+// for (unsigned int i = 0; i < bw.pos; ++i) {
+// if (count++ % 12 == 0) {
+// printf("\n ");
+// } else {
+// printf(" ");
+// }
+// printf("0x%02x,", bw_buffer[i]);
+// }
+// printf("\n};\n");
+
+constexpr size_t kNumBytesTestReadSymbol16 = 3120;
+constexpr uint8_t kBytesTestReadSymbol16[] = {
+ 0x09, 0x2c, 0xb8, 0x5a, 0xe4, 0xe6, 0xc6, 0x1f, 0x3e, 0xa7, 0x50, 0xbf,
+ 0x19, 0x26, 0xbf, 0x20, 0xc3, 0xa2, 0x08, 0xdf, 0x44, 0xd9, 0x4d, 0x8c,
+ 0xf7, 0xbf, 0x6b, 0x6d, 0x22, 0x97, 0x8e, 0xd7, 0x93, 0xad, 0x33, 0xe3,
+ 0x7f, 0x5b, 0x71, 0x03, 0x6b, 0x4e, 0xbf, 0xf5, 0x38, 0xbe, 0xba, 0x6c,
+ 0x0d, 0x28, 0xca, 0x74, 0x2d, 0x1d, 0x3f, 0x91, 0xad, 0x7e, 0x98, 0x5c,
+ 0xa7, 0x39, 0x5e, 0x7c, 0x43, 0x2b, 0x88, 0xb2, 0x81, 0x91, 0xad, 0x62,
+ 0x14, 0xc6, 0x0a, 0x81, 0x15, 0x1f, 0x4e, 0xd5, 0xc1, 0x5c, 0x43, 0x35,
+ 0xc3, 0xe6, 0x3d, 0xaa, 0xc3, 0xb5, 0x95, 0x01, 0xbd, 0x2d, 0x21, 0x04,
+ 0x14, 0x79, 0x7a, 0x02, 0x7e, 0xb8, 0x09, 0x20, 0x06, 0x82, 0xc8, 0x6f,
+ 0x29, 0x2c, 0xb2, 0x9b, 0xe2, 0x8d, 0xf5, 0x56, 0xf5, 0x64, 0xf4, 0xd7,
+ 0xfe, 0x24, 0x29, 0xb6, 0x35, 0x16, 0x08, 0x26, 0xc0, 0xf0, 0xfd, 0x33,
+ 0x04, 0x6f, 0x70, 0x85, 0x3a, 0xac, 0x8f, 0xab, 0x48, 0xce, 0x04, 0xc1,
+ 0x0a, 0x4c, 0xb6, 0xaa, 0x83, 0x39, 0xc1, 0xf6, 0x00, 0xb8, 0x56, 0x4e,
+ 0xa2, 0xd1, 0x19, 0x70, 0x6a, 0x2b, 0x86, 0xef, 0xbd, 0x11, 0x27, 0x54,
+ 0x52, 0x01, 0xa2, 0x3f, 0x53, 0x0e, 0x5b, 0x23, 0x3c, 0x90, 0x82, 0xaf,
+ 0x9d, 0x79, 0xb5, 0x5e, 0x7e, 0x2e, 0x6e, 0xad, 0x3d, 0xe9, 0x3a, 0xff,
+ 0xd7, 0x59, 0x40, 0xa3, 0x56, 0xa9, 0x5e, 0x52, 0xda, 0x04, 0x74, 0x09,
+ 0x47, 0x7c, 0x6c, 0x4b, 0xad, 0x00, 0x8b, 0xbc, 0x33, 0x16, 0x49, 0xf6,
+ 0xa5, 0x11, 0x8d, 0xb4, 0xbc, 0x28, 0xea, 0x1b, 0x34, 0x1e, 0xb7, 0x1e,
+ 0xbf, 0x50, 0xe3, 0x60, 0xad, 0x41, 0xe0, 0x19, 0xfa, 0xa4, 0x23, 0x98,
+ 0x48, 0x23, 0xad, 0xfa, 0xdb, 0x3c, 0x0a, 0x15, 0xeb, 0xf5, 0xf1, 0x43,
+ 0xf2, 0xfd, 0x42, 0xf2, 0xd0, 0x3f, 0xa6, 0x3b, 0xc8, 0x81, 0x52, 0xba,
+ 0xcf, 0x2d, 0xff, 0x2c, 0x24, 0x13, 0x62, 0x78, 0x01, 0xd8, 0xcb, 0xfc,
+ 0xda, 0x70, 0x58, 0xad, 0xf1, 0xe6, 0x30, 0x47, 0x39, 0xc6, 0xf0, 0xbc,
+ 0xe4, 0x89, 0x49, 0x46, 0x79, 0xde, 0xac, 0xde, 0xbd, 0x97, 0x18, 0x8f,
+ 0x17, 0x07, 0xc1, 0xaf, 0xf8, 0xc1, 0x45, 0x95, 0x50, 0x36, 0x4d, 0x16,
+ 0x35, 0x92, 0x2b, 0x5a, 0x71, 0x81, 0x59, 0xe5, 0x7f, 0xba, 0x10, 0xc9,
+ 0x49, 0xd4, 0xeb, 0x64, 0x08, 0x54, 0x8b, 0xfa, 0xb3, 0xc8, 0x3a, 0xd7,
+ 0xa6, 0xa9, 0xf2, 0xae, 0x04, 0xf8, 0x55, 0x5c, 0xff, 0x2d, 0x17, 0x53,
+ 0x37, 0xc5, 0x36, 0xd8, 0x42, 0xd7, 0x47, 0xd8, 0x00, 0x99, 0x9c, 0x5d,
+ 0x9f, 0x34, 0xc2, 0x09, 0x6b, 0x1a, 0xf3, 0x2f, 0xb0, 0xf8, 0x49, 0x54,
+ 0x9d, 0x4b, 0xb8, 0xcf, 0xc5, 0x3b, 0x7f, 0x49, 0x9b, 0x40, 0xa9, 0xd3,
+ 0x96, 0xe1, 0x6b, 0x87, 0x2d, 0x50, 0x76, 0x15, 0xd9, 0x9f, 0x87, 0x4f,
+ 0x13, 0x26, 0xf2, 0xf8, 0xae, 0xd4, 0x63, 0x02, 0x0c, 0xcb, 0xe5, 0x63,
+ 0x1c, 0x73, 0xdf, 0x57, 0x55, 0x16, 0x57, 0x3b, 0xfb, 0x9a, 0x06, 0x70,
+ 0xfc, 0x9f, 0x29, 0x16, 0xec, 0x63, 0x34, 0x6f, 0x40, 0x1f, 0x54, 0x2a,
+ 0xe7, 0x4a, 0x6f, 0xde, 0x86, 0xeb, 0x8c, 0x91, 0x3e, 0xfc, 0x6a, 0x48,
+ 0xd1, 0x51, 0x33, 0xd7, 0xe1, 0x9d, 0xf8, 0x71, 0x21, 0x7b, 0x02, 0x38,
+ 0x6a, 0xef, 0x30, 0x70, 0x38, 0x01, 0xc3, 0xef, 0x5d, 0x4f, 0xd3, 0x37,
+ 0x2d, 0xe0, 0x4f, 0x4b, 0x72, 0xbc, 0xde, 0x9f, 0x32, 0x97, 0xe2, 0x55,
+ 0x5e, 0x59, 0x5d, 0xa2, 0x9f, 0x5a, 0x04, 0x7c, 0x13, 0xe1, 0x35, 0x62,
+ 0x4a, 0x10, 0x24, 0x55, 0x63, 0xb8, 0x8f, 0x66, 0xbc, 0x04, 0x08, 0x4e,
+ 0xcc, 0xdc, 0x1f, 0x88, 0xc5, 0xcf, 0x8a, 0x7e, 0x24, 0x3e, 0x6f, 0x58,
+ 0xcb, 0x44, 0x3c, 0x18, 0x64, 0xd9, 0x84, 0xa8, 0x1c, 0x0b, 0x20, 0xf4,
+ 0x8b, 0x8b, 0x4b, 0xf8, 0x39, 0x8b, 0x01, 0x3a, 0x0b, 0x27, 0x67, 0xf8,
+ 0x0f, 0xbd, 0xb3, 0x32, 0xce, 0xef, 0xbc, 0x8c, 0xa3, 0x31, 0xee, 0x0b,
+ 0xdb, 0xc7, 0xc3, 0x43, 0x80, 0xe4, 0x7c, 0x9b, 0x89, 0xa4, 0x6b, 0x23,
+ 0x2f, 0xa8, 0x28, 0xe0, 0x55, 0x30, 0x6e, 0xe7, 0xc9, 0x50, 0x1d, 0xbf,
+ 0x67, 0xc8, 0x74, 0x58, 0x0f, 0xdb, 0xa6, 0x1f, 0xa6, 0xfd, 0xf0, 0x75,
+ 0xea, 0x62, 0xd5, 0x44, 0xa2, 0x7e, 0xed, 0x63, 0xba, 0x7c, 0x5d, 0xb7,
+ 0x16, 0x84, 0x30, 0x5d, 0xc2, 0xd3, 0x39, 0x61, 0x60, 0x0a, 0xb9, 0x34,
+ 0x5e, 0x54, 0xf4, 0x34, 0x77, 0x22, 0x05, 0x41, 0x6b, 0x6a, 0x13, 0xc3,
+ 0x10, 0x03, 0x8a, 0x78, 0xd2, 0x81, 0xac, 0x49, 0x31, 0xc8, 0xee, 0x15,
+ 0xc3, 0x42, 0x3b, 0x00, 0xf6, 0x05, 0x92, 0x82, 0x6e, 0x73, 0xb4, 0xfa,
+ 0xab, 0xe0, 0x2e, 0xe9, 0x5d, 0x89, 0x43, 0x0c, 0x4d, 0x88, 0x0c, 0xf1,
+ 0xa4, 0x19, 0x59, 0xa0, 0x69, 0x0c, 0xfc, 0xf9, 0x9a, 0xbc, 0x3b, 0x2e,
+ 0x3b, 0x29, 0xf8, 0xd7, 0x79, 0x11, 0xb2, 0x66, 0x26, 0x57, 0x34, 0x06,
+ 0xb8, 0x36, 0x41, 0xca, 0x01, 0x10, 0xca, 0x06, 0xee, 0xb6, 0xf7, 0x1d,
+ 0x0d, 0x88, 0xab, 0x07, 0xbe, 0x06, 0x8c, 0x1c, 0xa2, 0x76, 0x5e, 0xdb,
+ 0x60, 0xa4, 0x43, 0x17, 0x31, 0xc3, 0x4b, 0x0a, 0x01, 0x80, 0xa7, 0xf6,
+ 0xe6, 0x78, 0x64, 0x85, 0xb0, 0x8a, 0x28, 0x34, 0x82, 0x98, 0x29, 0x3f,
+ 0xde, 0x07, 0x9a, 0x80, 0xcf, 0xe3, 0x6f, 0x23, 0x57, 0x79, 0x11, 0xb2,
+ 0x61, 0x6d, 0x98, 0x26, 0xeb, 0x3b, 0xbf, 0xaa, 0x98, 0x62, 0xbb, 0xfd,
+ 0x21, 0x76, 0xe5, 0xc5, 0xe0, 0x09, 0x21, 0x65, 0x72, 0x94, 0xd3, 0x8a,
+ 0xcd, 0xfb, 0xec, 0x6e, 0x57, 0xd4, 0x2a, 0x92, 0xd1, 0xe9, 0x16, 0x46,
+ 0xa2, 0x38, 0xae, 0x4b, 0x7e, 0xa7, 0x0c, 0x26, 0x9d, 0x96, 0xd7, 0x49,
+ 0xa7, 0x02, 0x2b, 0x22, 0x9a, 0x39, 0x38, 0x11, 0xb8, 0xb3, 0xd5, 0x09,
+ 0xf9, 0x70, 0xb4, 0x1c, 0x4e, 0xe3, 0xba, 0xa0, 0x78, 0x76, 0x6d, 0xc4,
+ 0xab, 0x96, 0x3e, 0x98, 0x04, 0x4e, 0x50, 0x20, 0xd9, 0xfa, 0xea, 0xe2,
+ 0x99, 0x50, 0x84, 0x20, 0x18, 0x69, 0xbb, 0x6e, 0x41, 0x9d, 0x18, 0x71,
+ 0x15, 0x19, 0xd2, 0xf2, 0xa5, 0x69, 0x54, 0x8e, 0x60, 0x75, 0xd4, 0xe7,
+ 0xdb, 0xe1, 0x43, 0xfd, 0x2e, 0x21, 0x4f, 0xff, 0x98, 0x8b, 0x08, 0x74,
+ 0xca, 0x29, 0x7e, 0x3f, 0x2f, 0x6a, 0xf9, 0xe6, 0x49, 0x1d, 0xc6, 0x0b,
+ 0x76, 0xc9, 0x22, 0xc3, 0x4f, 0xaf, 0xa8, 0xf9, 0xd6, 0x9c, 0x9a, 0x64,
+ 0xec, 0xb3, 0x2c, 0x0f, 0x3e, 0x93, 0xc4, 0xb6, 0xd7, 0x36, 0x28, 0x04,
+ 0xe5, 0x81, 0x48, 0x14, 0x9f, 0x4e, 0xc5, 0x9b, 0xd7, 0xc0, 0x0e, 0x35,
+ 0xab, 0x49, 0xd3, 0x84, 0x9f, 0x5c, 0x93, 0x94, 0xa6, 0xd2, 0xb5, 0x83,
+ 0x9d, 0x38, 0x0f, 0x85, 0x04, 0xa3, 0xb7, 0x23, 0x20, 0x93, 0x85, 0x48,
+ 0x14, 0x0c, 0x22, 0x80, 0x92, 0x6c, 0xca, 0x3c, 0xc7, 0xfc, 0xa9, 0x88,
+ 0x62, 0xbc, 0x2a, 0x91, 0x08, 0x5b, 0xb4, 0x60, 0xd1, 0x0f, 0x3c, 0x33,
+ 0xc6, 0xe1, 0xf7, 0xca, 0xf7, 0xf9, 0xa1, 0x9b, 0xfa, 0xf7, 0x34, 0xe0,
+ 0x54, 0xac, 0x53, 0x42, 0x30, 0x76, 0xc8, 0xc2, 0xcd, 0x61, 0x49, 0x87,
+ 0x9c, 0x47, 0xf5, 0x98, 0xb5, 0x41, 0xf0, 0xad, 0xdb, 0x37, 0x06, 0xb8,
+ 0x54, 0xa5, 0x26, 0x11, 0x4b, 0x18, 0xbb, 0xa4, 0xfb, 0x24, 0xd3, 0x14,
+ 0x31, 0xfb, 0x56, 0x18, 0xd8, 0xc2, 0xd0, 0xd2, 0xab, 0xde, 0xdf, 0xa9,
+ 0xdf, 0x9e, 0xa6, 0x56, 0x0d, 0x9f, 0xe4, 0x19, 0x15, 0x58, 0x18, 0xc6,
+ 0x5e, 0x47, 0x05, 0x3a, 0x0e, 0x73, 0x68, 0x81, 0x39, 0x8c, 0x51, 0x1d,
+ 0x04, 0x4e, 0x18, 0x54, 0xa5, 0x3e, 0x13, 0x4a, 0x15, 0xc2, 0x43, 0x90,
+ 0xc2, 0x71, 0x8d, 0x53, 0x1b, 0xab, 0xe9, 0xbc, 0x69, 0x3e, 0x11, 0x46,
+ 0x9d, 0xa4, 0xd3, 0x15, 0x80, 0xec, 0xe8, 0x31, 0x4f, 0x5a, 0x2a, 0x15,
+ 0x3e, 0x7e, 0x7a, 0x44, 0x0e, 0x4a, 0xac, 0x9b, 0x46, 0x2f, 0x86, 0xf9,
+ 0xea, 0x59, 0x4f, 0x15, 0xa0, 0x4b, 0xd1, 0xaa, 0xd8, 0x3a, 0x83, 0xb6,
+ 0x25, 0x82, 0xb0, 0x44, 0x4a, 0x98, 0xbd, 0x10, 0xa2, 0xb0, 0x95, 0x02,
+ 0xfa, 0x1f, 0xd3, 0x54, 0x1c, 0x0a, 0xb1, 0x31, 0x28, 0xec, 0x4c, 0xd2,
+ 0x0c, 0xb9, 0xb0, 0xf4, 0x7a, 0x89, 0x63, 0x3c, 0x5f, 0xcf, 0x3c, 0xe8,
+ 0xba, 0x21, 0x66, 0x20, 0x01, 0xcb, 0x1b, 0xc6, 0xf9, 0x54, 0x0f, 0xda,
+ 0x4a, 0xcc, 0x81, 0x7b, 0x41, 0x81, 0xc0, 0x1f, 0xea, 0x9a, 0x9b, 0x96,
+ 0x0d, 0x47, 0xdd, 0x16, 0x52, 0x5c, 0xaf, 0xae, 0x82, 0x3d, 0x18, 0x60,
+ 0xfa, 0x34, 0xc2, 0x57, 0x2d, 0xc4, 0x2b, 0x2e, 0x41, 0xfe, 0xe7, 0x95,
+ 0xcd, 0x1f, 0xbe, 0x88, 0x31, 0xc1, 0x07, 0x2c, 0xd3, 0xb1, 0xbb, 0xeb,
+ 0x1d, 0xa3, 0x03, 0x1e, 0x70, 0xcc, 0x84, 0xe0, 0x65, 0x41, 0x0f, 0xf1,
+ 0x7c, 0x95, 0x4b, 0x41, 0x43, 0x62, 0xad, 0x5d, 0xff, 0x4f, 0x92, 0xc8,
+ 0xaa, 0x21, 0x23, 0xba, 0xa9, 0x90, 0xb5, 0xae, 0xc0, 0x1f, 0xae, 0x43,
+ 0xf1, 0x79, 0x14, 0x30, 0x16, 0x1d, 0x2a, 0x6c, 0xd1, 0xd8, 0xb3, 0x38,
+ 0x25, 0xd1, 0x66, 0xa5, 0x89, 0xc0, 0x8d, 0xc5, 0xa0, 0x6a, 0x7c, 0x64,
+ 0xf8, 0x45, 0x1a, 0x76, 0x93, 0x4c, 0x56, 0x03, 0xb3, 0xa0, 0xc5, 0x40,
+ 0xbc, 0x84, 0x98, 0x8d, 0xa4, 0xfe, 0x0b, 0x8c, 0x47, 0xa2, 0x88, 0x85,
+ 0x2a, 0x89, 0xad, 0xd3, 0x16, 0x5b, 0x20, 0x02, 0x70, 0xbf, 0x72, 0x29,
+ 0x0c, 0x0a, 0x9c, 0xac, 0x9c, 0x4d, 0xfa, 0x02, 0x5e, 0xe9, 0xe3, 0x52,
+ 0x84, 0x54, 0x1f, 0xb7, 0xea, 0xb1, 0xc4, 0x2f, 0x69, 0xd1, 0x33, 0xc6,
+ 0xb3, 0xee, 0xb0, 0x35, 0x1f, 0x19, 0x68, 0x2d, 0xef, 0xc1, 0xd3, 0x1c,
+ 0xa8, 0x84, 0x54, 0x3c, 0x21, 0xed, 0x78, 0x35, 0x3f, 0x82, 0xb2, 0xa8,
+ 0xe4, 0x25, 0x71, 0xfc, 0x1e, 0x1d, 0x36, 0xf4, 0xf4, 0x0f, 0x6f, 0x5b,
+ 0xd9, 0x21, 0x13, 0x3a, 0x3d, 0x17, 0x45, 0x31, 0x78, 0x97, 0x99, 0x15,
+ 0x87, 0xa9, 0xa6, 0x36, 0xf0, 0x20, 0xfa, 0xd5, 0x10, 0x01, 0x91, 0xa0,
+ 0x4f, 0x28, 0x6a, 0x13, 0x04, 0xff, 0x97, 0x96, 0xf1, 0xfc, 0x1c, 0xc8,
+ 0xcd, 0xe4, 0xbd, 0xe5, 0x40, 0x9a, 0x37, 0xc2, 0x01, 0x11, 0x2a, 0xc0,
+ 0x0e, 0x58, 0x69, 0x29, 0xd0, 0x72, 0x26, 0x7c, 0x23, 0xec, 0x58, 0xfe,
+ 0xbd, 0x15, 0x97, 0xe8, 0x29, 0x9f, 0x79, 0xb1, 0xfa, 0xac, 0x59, 0xe0,
+ 0x78, 0x1c, 0xb4, 0x29, 0xee, 0x00, 0x39, 0x11, 0x0a, 0x2a, 0xb9, 0x98,
+ 0x4e, 0xbf, 0x75, 0x9e, 0xe8, 0xbb, 0x4b, 0xe0, 0x6b, 0xab, 0x5b, 0x2f,
+ 0x2d, 0xe3, 0xf8, 0x39, 0x91, 0x9b, 0xc9, 0x7b, 0xca, 0x81, 0x34, 0x6f,
+ 0x84, 0x02, 0x22, 0x55, 0x80, 0x1c, 0xb0, 0xd2, 0x53, 0xa0, 0xe4, 0x4c,
+ 0xf8, 0x47, 0xd8, 0xb1, 0xfd, 0x7a, 0x2b, 0x2f, 0xd0, 0x53, 0x3e, 0xf3,
+ 0x63, 0xf5, 0x58, 0xb3, 0xc0, 0xf0, 0x39, 0x00, 0x08, 0x97, 0x4b, 0xe2,
+ 0x46, 0x04, 0xa2, 0x39, 0x9c, 0xf2, 0x57, 0x17, 0x4a, 0xdd, 0x9f, 0x5e,
+ 0xb1, 0x8b, 0x6b, 0x5d, 0x6e, 0x3e, 0x85, 0x34, 0x04, 0x96, 0x56, 0xe7,
+ 0x4f, 0x6f, 0xd0, 0x31, 0xe7, 0x0c, 0xc8, 0x88, 0xdd, 0x5b, 0x14, 0x00,
+ 0x60, 0x2a, 0x06, 0x18, 0xcd, 0x7f, 0xc9, 0xee, 0xd2, 0xd0, 0x8c, 0xc0,
+ 0xed, 0x8f, 0x4a, 0x3e, 0x83, 0x52, 0x2e, 0x4a, 0xe9, 0xfa, 0x1f, 0x1a,
+ 0xd5, 0xc0, 0x59, 0x4c, 0x8a, 0x2a, 0xab, 0x40, 0x2f, 0x84, 0xd2, 0x85,
+ 0x70, 0x90, 0x96, 0xf3, 0x84, 0x6f, 0x1e, 0x81, 0x8c, 0x80, 0x03, 0x03,
+ 0x2d, 0x36, 0x2e, 0x60, 0x79, 0x13, 0x63, 0x7f, 0xe7, 0xe3, 0x4a, 0x96,
+ 0x08, 0xd8, 0x35, 0x15, 0x46, 0x8a, 0xe0, 0xb8, 0xc4, 0x7a, 0x28, 0x88,
+ 0x52, 0xa8, 0x9a, 0xdd, 0x31, 0x65, 0xb2, 0x00, 0x24, 0xd9, 0xf4, 0x07,
+ 0xea, 0xab, 0x7c, 0xe8, 0xa2, 0xea, 0xa7, 0x23, 0xd1, 0x93, 0x9e, 0xe7,
+ 0x48, 0x34, 0x89, 0xf5, 0xb4, 0x45, 0x5e, 0xfa, 0xa6, 0xee, 0x32, 0x75,
+ 0x8c, 0x56, 0x08, 0xcc, 0xeb, 0x5b, 0x05, 0xc2, 0x1d, 0x62, 0xa8, 0x5d,
+ 0xaa, 0x50, 0xc2, 0x85, 0x85, 0x25, 0xb3, 0x5f, 0x60, 0xe7, 0x90, 0x1b,
+ 0xa8, 0xb7, 0xf6, 0x83, 0x11, 0x07, 0x1f, 0xfc, 0xce, 0x58, 0x22, 0x8a,
+ 0x3d, 0xa9, 0x8c, 0x18, 0x66, 0xa8, 0x32, 0x78, 0xa0, 0x16, 0x8a, 0xa2,
+ 0x5d, 0x2f, 0x89, 0x18, 0x12, 0x88, 0xe6, 0x73, 0xc9, 0x5c, 0x5d, 0x2b,
+ 0x76, 0x7d, 0x7a, 0xc6, 0x2d, 0xad, 0x75, 0xb8, 0xfa, 0x14, 0xd0, 0x12,
+ 0x59, 0x5b, 0x9d, 0x3d, 0xbf, 0x40, 0xc7, 0x9c, 0x33, 0x22, 0x23, 0x75,
+ 0x6c, 0x50, 0x01, 0x80, 0xa8, 0x18, 0x63, 0x35, 0xff, 0x27, 0xbb, 0x4b,
+ 0x42, 0x33, 0x03, 0xb6, 0x3d, 0x28, 0xfa, 0x0d, 0x48, 0xb9, 0x2b, 0xa7,
+ 0xe8, 0x7c, 0x6b, 0x57, 0x01, 0x65, 0x32, 0x28, 0xaa, 0xad, 0x00, 0xbe,
+ 0x13, 0x4a, 0x15, 0xc2, 0x42, 0x5b, 0xce, 0x11, 0xbc, 0x7a, 0x06, 0x32,
+ 0x00, 0x0c, 0x0c, 0xb4, 0xd8, 0xb9, 0x81, 0xe4, 0x4d, 0x8d, 0xff, 0x9f,
+ 0x8d, 0x2a, 0x58, 0x23, 0x60, 0xd4, 0x55, 0x1a, 0x2b, 0x82, 0xe3, 0x11,
+ 0xe8, 0xa2, 0x21, 0x4a, 0xa2, 0x6b, 0x74, 0xc5, 0x96, 0xc8, 0x00, 0x93,
+ 0x67, 0xd0, 0x1f, 0xaa, 0xad, 0xf3, 0xa2, 0x8b, 0xaa, 0x9c, 0x8f, 0x46,
+ 0x4e, 0x7b, 0x9d, 0x20, 0xd2, 0x27, 0xd6, 0xd1, 0x15, 0x7b, 0xea, 0x9b,
+ 0xb8, 0xc9, 0xd6, 0x31, 0x58, 0x23, 0x33, 0xad, 0x6c, 0x17, 0x08, 0x75,
+ 0x8a, 0xa1, 0x76, 0xa9, 0x43, 0x0a, 0x16, 0x14, 0x96, 0xcd, 0x7d, 0x83,
+ 0x9e, 0x40, 0x6e, 0xa2, 0xdf, 0xda, 0x0c, 0x44, 0x1c, 0x7f, 0xf3, 0x39,
+ 0x60, 0x8a, 0x28, 0xf6, 0xa6, 0x30, 0x61, 0x9a, 0xa0, 0xc9, 0xe2, 0x80,
+ 0x5a, 0x2a, 0x89, 0x74, 0xbe, 0x24, 0x60, 0x4a, 0x23, 0x99, 0xcf, 0x25,
+ 0x71, 0x74, 0xad, 0xd9, 0xf5, 0xeb, 0x18, 0xb6, 0xb5, 0xd6, 0xe3, 0xe8,
+ 0x53, 0x40, 0x49, 0x65, 0x6e, 0x74, 0xf6, 0xfd, 0x03, 0x1e, 0x70, 0xcc,
+ 0x88, 0x8d, 0xd5, 0xb1, 0x40, 0x06, 0x02, 0xa0, 0x61, 0x8c, 0xd7, 0xfc,
+ 0x9e, 0xed, 0x2d, 0x08, 0xcc, 0x0e, 0xd8, 0xf4, 0xa3, 0xe9, 0x41, 0x30,
+ 0x05, 0xc8, 0xbd, 0x3c, 0xa4, 0xb7, 0x09, 0x6f, 0x9c, 0xc8, 0xa2, 0xaa,
+ 0xb4, 0x02, 0xf8, 0x4d, 0x28, 0x57, 0x09, 0x09, 0x6f, 0x38, 0x46, 0xf1,
+ 0xe8, 0x18, 0xc8, 0x00, 0x30, 0x32, 0xd3, 0x62, 0xe6, 0x07, 0x91, 0x36,
+ 0x37, 0xfe, 0x7e, 0x34, 0xa9, 0x60, 0x8d, 0x83, 0x51, 0x54, 0x68, 0xae,
+ 0x0b, 0x8c, 0x47, 0xa2, 0x88, 0x85, 0x2a, 0x89, 0xad, 0xd3, 0x16, 0x5b,
+ 0x20, 0x02, 0x4f, 0xc0, 0x04, 0x8e, 0x38, 0xde, 0xd8, 0x95, 0xfc, 0x97,
+ 0xd9, 0xd2, 0x15, 0xdb, 0x1a, 0xcc, 0x69, 0x02, 0xad, 0x4a, 0x5a, 0x70,
+ 0x8b, 0xbf, 0xfc, 0x35, 0x6d, 0x3a, 0x0f, 0xc9, 0xea, 0x78, 0x1a, 0xd1,
+ 0xcb, 0xb7, 0xaa, 0xb8, 0xf2, 0x44, 0xdf, 0xb3, 0xfe, 0x24, 0x83, 0xb9,
+ 0x53, 0x94, 0x7e, 0xa5, 0xc5, 0x3f, 0xa2, 0x31, 0x3d, 0xdc, 0x0b, 0xb1,
+ 0x24, 0x2f, 0x99, 0x4a, 0xd4, 0x0e, 0x6b, 0x3a, 0x34, 0x31, 0xc5, 0x87,
+ 0x68, 0xbd, 0x61, 0xbd, 0xe2, 0xa0, 0xdb, 0x9a, 0x33, 0xfd, 0xc5, 0x10,
+ 0x3f, 0xfb, 0xeb, 0xbd, 0x29, 0x03, 0x85, 0x8d, 0x08, 0x7b, 0xb6, 0xf7,
+ 0xf0, 0xf5, 0x13, 0x69, 0x3e, 0x35, 0x68, 0x58, 0x50, 0xdb, 0x50, 0x13,
+ 0x02, 0x3e, 0x81, 0x4b, 0x44, 0x6c, 0x75, 0x02, 0xe6, 0x90, 0x75, 0x6c,
+ 0xc6, 0x7c, 0x23, 0xec, 0x58, 0xfe, 0xbd, 0x15, 0x97, 0xe8, 0x29, 0x9f,
+ 0x80, 0x54, 0x65, 0xb8, 0x3c, 0x40, 0xe6, 0xdb, 0xbe, 0x51, 0x73, 0xe5,
+ 0xf1, 0x23, 0x02, 0x51, 0x1c, 0xce, 0x79, 0x2b, 0x8b, 0xa5, 0x6e, 0xcf,
+ 0xaf, 0x58, 0xc5, 0xb5, 0xae, 0xb7, 0x1f, 0x42, 0x9a, 0x02, 0x4b, 0x2b,
+ 0x73, 0xa7, 0xb7, 0xe8, 0x18, 0xf3, 0x86, 0x64, 0x44, 0x6e, 0xad, 0x8a,
+ 0x00, 0x30, 0x15, 0x03, 0x0c, 0x66, 0xbf, 0xe4, 0xf7, 0x69, 0x68, 0x46,
+ 0x60, 0x76, 0xc7, 0xa5, 0x1f, 0x4a, 0x09, 0x80, 0x2e, 0x45, 0xe9, 0xe5,
+ 0x25, 0xb8, 0x4b, 0x7c, 0xe6, 0x45, 0x15, 0x55, 0xa0, 0x17, 0xc2, 0x69,
+ 0x42, 0xb8, 0x48, 0x4b, 0x79, 0xc2, 0x37, 0x8f, 0x40, 0xc6, 0x40, 0x01,
+ 0x81, 0x96, 0x9b, 0x17, 0x30, 0x3c, 0x89, 0xb1, 0xbf, 0xf3, 0xf1, 0xa5,
+ 0x4b, 0x04, 0x6c, 0x1a, 0x8a, 0xa3, 0x45, 0x70, 0x5c, 0x62, 0x3d, 0x14,
+ 0x44, 0x29, 0x54, 0x4d, 0x6e, 0x98, 0xb2, 0xd9, 0x00, 0x12, 0x7e, 0x00,
+ 0x24, 0x71, 0xc6, 0xf6, 0xc4, 0xaf, 0xe4, 0xbe, 0xce, 0x90, 0xae, 0xd8,
+ 0xd6, 0x63, 0x48, 0x15, 0x6a, 0x52, 0xd3, 0x84, 0x5d, 0xff, 0xe1, 0xab,
+ 0x69, 0xd0, 0x7e, 0x4f, 0x53, 0xc0, 0xd6, 0x8e, 0x5d, 0xbd, 0x55, 0xc7,
+ 0x92, 0x26, 0xfd, 0x9f, 0xf1, 0x24, 0x1d, 0xca, 0x9c, 0xa3, 0xf5, 0x2e,
+ 0x29, 0xfd, 0x11, 0x89, 0xee, 0xe0, 0x5d, 0x89, 0x21, 0x7c, 0xca, 0x56,
+ 0xa0, 0x73, 0x59, 0xd1, 0xa1, 0x8e, 0x2c, 0x3b, 0x45, 0xeb, 0x0d, 0xef,
+ 0x15, 0x06, 0xdc, 0xd1, 0x9f, 0xee, 0x28, 0x81, 0xff, 0xdf, 0x5d, 0xe9,
+ 0x48, 0x1c, 0x2c, 0x68, 0x43, 0xdd, 0xb7, 0xbf, 0x87, 0xa8, 0x9b, 0x49,
+ 0xf1, 0xab, 0x42, 0xc2, 0x86, 0xda, 0x80, 0x98, 0x11, 0xf4, 0x0a, 0x5a,
+ 0x23, 0x63, 0xa8, 0x17, 0x34, 0x83, 0xab, 0x66, 0x33, 0xe1, 0x1f, 0x62,
+ 0xc7, 0xf5, 0xe8, 0xac, 0xbf, 0x41, 0x4c, 0xfc, 0x02, 0xa3, 0x2d, 0xc1,
+ 0xe2, 0x07, 0x36, 0xdd, 0xf2, 0x8b, 0x9f, 0x2f, 0x89, 0x18, 0x12, 0x88,
+ 0xe6, 0x73, 0xc9, 0x5c, 0x5d, 0x2b, 0x76, 0x7d, 0x7a, 0xc6, 0x2d, 0xad,
+ 0x75, 0xb8, 0xfa, 0x14, 0xd0, 0x12, 0x59, 0x5b, 0x9d, 0x3d, 0xbf, 0x40,
+ 0xc7, 0x9c, 0x33, 0x22, 0x23, 0x75, 0x6c, 0x50, 0x01, 0x80, 0xa8, 0x83,
+ 0x06, 0xd4, 0xd6, 0x8d, 0x36, 0x78, 0xf9, 0x03, 0x23, 0xdb, 0x17, 0x90,
+ 0x52, 0x0c, 0x5f, 0x1b, 0xe6, 0x44, 0x79, 0x52, 0xc5, 0x50, 0x17, 0x81,
+ 0xf3, 0x1b, 0x88, 0xba, 0xfd, 0xbd, 0xa5, 0x51, 0x65, 0x6d, 0x33, 0x96,
+ 0xc2, 0x71, 0x8d, 0x53, 0x1b, 0xab, 0xe9, 0xb9, 0xd0, 0x45, 0x61, 0xaf,
+ 0xf9, 0xb7, 0x38, 0x55, 0x4f, 0xe9, 0x85, 0x1d, 0x4c, 0x0e, 0x40, 0x77,
+ 0x03, 0xbc, 0x09, 0xd0, 0x37, 0xe3, 0xde, 0xf1, 0x0c, 0xa6, 0xc8, 0xd5,
+ 0x63, 0x01, 0xfd, 0xe7, 0xc0, 0x9a, 0xe0, 0x98, 0x02, 0xe4, 0x5e, 0x9e,
+ 0x52, 0x5b, 0x84, 0xb7, 0xce, 0x64, 0x51, 0x55, 0x5a, 0x01, 0x7c, 0x26,
+ 0x94, 0x2b, 0x84, 0x84, 0xb7, 0x9c, 0x23, 0x78, 0xf4, 0x0c, 0x64, 0x00,
+ 0x18, 0x19, 0x69, 0xb1, 0x73, 0x03, 0xc8, 0x9b, 0x1b, 0xff, 0x3f, 0x1a,
+ 0x54, 0xb0, 0x46, 0xc1, 0xa8, 0xaa, 0x34, 0x57, 0x07, 0x13, 0xd3, 0x43,
+ 0xb1, 0xaa, 0x4b, 0xc4, 0xcb, 0x5a, 0x9b, 0xa2, 0x23, 0x98, 0xa2, 0xd3,
+ 0x2b, 0x8c, 0x7b, 0xf8, 0xc7, 0xaa, 0xf6, 0xcc, 0xb8, 0xfc, 0xb5, 0x77,
+ 0xce, 0xff, 0x9d, 0x0e, 0xdb, 0x2b, 0x03, 0xc7, 0x42, 0x86, 0xf1, 0xcb,
+ 0xa2, 0xa7, 0x85, 0x77, 0x58, 0x1a, 0x8f, 0x8c, 0xb4, 0x16, 0xf7, 0xe0,
+ 0xe9, 0x8e, 0x54, 0x42, 0x2a, 0x1e, 0x10, 0xf6, 0xbc, 0x1a, 0x9f, 0xa1,
+ 0xcb, 0xff, 0x13, 0x06, 0x88, 0x6b, 0xb1, 0xeb, 0x37, 0x26, 0xe5, 0x34,
+ 0x0d, 0x73, 0x87, 0x91, 0x60, 0x6c, 0xd7, 0x2d, 0xc3, 0x5f, 0x40, 0x68,
+ 0x45, 0x07, 0x6e, 0x62, 0xa9, 0xe3, 0x52, 0x75, 0xef, 0x14, 0xf5, 0x89,
+ 0x0a, 0x3a, 0x57, 0x8b, 0xac, 0xbe, 0x86, 0x67, 0xd1, 0xd8, 0x35, 0xe5,
+ 0xe7, 0x75, 0xb8, 0xf8, 0x28, 0x6d, 0xa8, 0x09, 0x81, 0x1f, 0x40, 0xa5,
+ 0xa2, 0x36, 0x3a, 0x81, 0x73, 0x48, 0x3e, 0x8c, 0x9d, 0x1f, 0x78, 0xc5,
+ 0x92, 0x36, 0x1a, 0xae, 0xdf, 0xda, 0xf8, 0x0a, 0x7e, 0x69, 0xcb, 0xaf,
+ 0x74, 0x59, 0x49, 0x72, 0xa7, 0x97, 0x1c, 0x8c, 0xf0, 0x16, 0x01, 0x4a,
+ 0xcc, 0x1a, 0xa1, 0x24, 0x83, 0x7b, 0x34, 0x65, 0x20, 0x51, 0x11, 0xae,
+ 0x5d, 0xa7, 0x68, 0x9c, 0xec, 0x29, 0x27, 0xfc, 0x07, 0x49, 0xb4, 0x9b,
+ 0x65, 0xb2, 0x51, 0x97, 0xae, 0xa5, 0x8a, 0x70, 0xe5, 0x53, 0xd3, 0xa2,
+ 0x34, 0x35, 0xbd, 0xbf, 0x75, 0x64, 0xda, 0x88, 0x8c, 0xe9, 0xc3, 0x9a,
+ 0x32, 0xf0, 0x5a, 0x96, 0xae, 0xef, 0x9a, 0xdd, 0x84, 0xc2, 0x97, 0x22,
+ 0x2f, 0x06, 0x83, 0x32, 0x10, 0xff, 0x1d, 0x61, 0x60, 0x5f, 0x69, 0x10,
+ 0x5d, 0x23, 0xc6, 0xf3, 0x3f, 0xa9, 0x53, 0xfe, 0xd0, 0x3e, 0x90, 0xe6,
+ 0x54, 0x48, 0xab, 0x01, 0x76, 0x75, 0x88, 0x7b, 0x4e, 0xc6, 0xd0, 0x9b,
+ 0x7a, 0xcd, 0x87, 0x36, 0x3e, 0x7e, 0x3d, 0xef, 0x10, 0xca, 0x6c, 0x8d,
+ 0x56, 0x30, 0x1f, 0xde, 0x7c, 0x09, 0xae, 0x09, 0x80, 0x2e, 0x45, 0xe9,
+ 0xe5, 0x25, 0xb8, 0x4b, 0x7c, 0xe6, 0x45, 0x15, 0x55, 0xa0, 0x17, 0xc2,
+ 0x69, 0x42, 0xb8, 0x48, 0x4b, 0x79, 0xc2, 0x37, 0x8f, 0x40, 0xc6, 0x40,
+ 0x01, 0x81, 0x96, 0x9b, 0x17, 0x30, 0x3c, 0x89, 0xb1, 0xbf, 0xf3, 0xf1,
+ 0xa5, 0x5c, 0xdc, 0x1e, 0x69, 0xfc, 0xf1, 0xd8, 0x5d, 0xda, 0x13, 0x5b,
+ 0xbc, 0x1f, 0x41, 0x4a, 0xde, 0x44, 0x3c, 0x5e, 0xbd, 0x46, 0xb7, 0xad,
+ 0x32, 0xb8, 0xc7, 0xbf, 0x8c, 0x7a, 0xaf, 0x6c, 0xcb, 0x8f, 0xcb, 0x57,
+ 0x7c, 0xef, 0xf9, 0xd0, 0xed, 0xb2, 0xb0, 0x3c, 0x74, 0x28, 0x6f, 0x1c,
+ 0xba, 0x2a, 0x78, 0x57, 0x75, 0x81, 0xa8, 0xf8, 0xcb, 0x41, 0x6f, 0x7e,
+ 0x0e, 0x98, 0xe5, 0x44, 0x22, 0xa2, 0x00, 0x6c, 0xba, 0xaf, 0x51, 0xcc,
+ 0x9f, 0xba, 0x97, 0x39, 0xbb, 0x41, 0x60, 0xf0, 0xe9, 0xb7, 0xa7, 0xa0,
+ 0x7b, 0x7a, 0xde, 0xc9, 0x22, 0x13, 0xf4, 0x04, 0xaf, 0x91, 0xf5, 0x37,
+ 0x53, 0xad, 0x8d, 0x0d, 0x15, 0x7a, 0xf1, 0x81, 0x07, 0xd6, 0xa8, 0x80,
+ 0x0c, 0x8d, 0x02, 0x79, 0x43, 0x50, 0x98, 0x27, 0xfc, 0xbc, 0xb7, 0x8f,
+ 0xe0, 0xe6, 0x46, 0x6f, 0x25, 0xef, 0x2a, 0x04, 0xd1, 0xbe, 0x10, 0x3d,
+ 0xb4, 0x43, 0x3e, 0xf7, 0xea, 0xf4, 0xb8, 0x24, 0xdc, 0x77, 0x4f, 0x52,
+ 0x26, 0x55, 0xae, 0xbc, 0x6f, 0xe0, 0x8e, 0x41, 0x97, 0x82, 0xd4, 0xb5,
+ 0x77, 0x7c, 0xd6, 0xec, 0x26, 0x14, 0xb9, 0x11, 0x78, 0x34, 0x19, 0x90,
+ 0x87, 0xf8, 0xeb, 0x0b, 0x02, 0xfb, 0x48, 0x82, 0xe9, 0x1e, 0x37, 0x99,
+ 0xfd, 0x4a, 0x9f, 0xf6, 0x81, 0xf4, 0x87, 0x32, 0xa2, 0x45, 0x58, 0x0b,
+ 0xb3, 0xac, 0x43, 0xda, 0x76, 0x36, 0x84, 0xdb, 0xd6, 0x6c, 0x39, 0xb1,
+ 0xf3, 0xf1, 0xef, 0x78, 0x86, 0x53, 0x64, 0x6a, 0xb1, 0x80, 0xfe, 0xf3,
+ 0xe0, 0x4d, 0x70, 0x4c, 0x01, 0x72, 0x2f, 0x4f, 0x29, 0x2d, 0xc2, 0x5c,
+};
+static_assert(sizeof(kBytesTestReadSymbol16) == kNumBytesTestReadSymbol16, "");
diff --git a/src/utils/memory.h b/src/utils/memory.h
index a8da53b..d1762a2 100644
--- a/src/utils/memory.h
+++ b/src/utils/memory.h
@@ -17,7 +17,7 @@
#ifndef LIBGAV1_SRC_UTILS_MEMORY_H_
#define LIBGAV1_SRC_UTILS_MEMORY_H_
-#if defined(__ANDROID__) || defined(_MSC_VER)
+#if defined(__ANDROID__) || defined(_MSC_VER) || defined(__MINGW32__)
#include <malloc.h>
#endif
@@ -55,7 +55,7 @@ enum {
// void AlignedFree(void* aligned_memory);
// Free aligned memory.
-#if defined(_MSC_VER) // MSVC
+#if defined(_MSC_VER) || defined(__MINGW32__)
inline void* AlignedAlloc(size_t alignment, size_t size) {
return _aligned_malloc(size, alignment);
@@ -63,7 +63,7 @@ inline void* AlignedAlloc(size_t alignment, size_t size) {
inline void AlignedFree(void* aligned_memory) { _aligned_free(aligned_memory); }
-#else // !defined(_MSC_VER)
+#else // !(defined(_MSC_VER) || defined(__MINGW32__))
inline void* AlignedAlloc(size_t alignment, size_t size) {
#if defined(__ANDROID__)
@@ -89,7 +89,7 @@ inline void* AlignedAlloc(size_t alignment, size_t size) {
inline void AlignedFree(void* aligned_memory) { free(aligned_memory); }
-#endif // defined(_MSC_VER)
+#endif // defined(_MSC_VER) || defined(__MINGW32__)
inline void Memset(uint8_t* const dst, int value, size_t count) {
memset(dst, value, count);
@@ -101,6 +101,12 @@ inline void Memset(uint16_t* const dst, int value, size_t count) {
}
}
+inline void Memset(int16_t* const dst, int value, size_t count) {
+ for (size_t i = 0; i < count; ++i) {
+ dst[i] = static_cast<int16_t>(value);
+ }
+}
+
struct MallocDeleter {
void operator()(void* ptr) const { free(ptr); }
};
diff --git a/src/utils/memory_test.cc b/src/utils/memory_test.cc
new file mode 100644
index 0000000..42f6a15
--- /dev/null
+++ b/src/utils/memory_test.cc
@@ -0,0 +1,184 @@
+// Copyright 2021 The libgav1 Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "src/utils/memory.h"
+
+#include <cstddef>
+#include <cstdint>
+#include <memory>
+#include <new>
+
+#include "absl/base/config.h"
+#include "gtest/gtest.h"
+
+#ifdef ABSL_HAVE_EXCEPTIONS
+#include <exception>
+#endif
+
+namespace libgav1 {
+namespace {
+
+constexpr size_t kMaxAllocableSize = 0x40000000;
+
+struct Small : public Allocable {
+ uint8_t x;
+};
+
+struct Huge : public Allocable {
+ uint8_t x[kMaxAllocableSize + 1];
+};
+
+struct SmallMaxAligned : public MaxAlignedAllocable {
+ alignas(kMaxAlignment) uint8_t x;
+};
+
+struct HugeMaxAligned : public MaxAlignedAllocable {
+ alignas(kMaxAlignment) uint8_t x[kMaxAllocableSize + 1];
+};
+
+#ifdef ABSL_HAVE_EXCEPTIONS
+struct ThrowingConstructor : public Allocable {
+ ThrowingConstructor() { throw std::exception(); }
+
+ uint8_t x;
+};
+
+struct MaxAlignedThrowingConstructor : public MaxAlignedAllocable {
+ MaxAlignedThrowingConstructor() { throw std::exception(); }
+
+ uint8_t x;
+};
+#endif
+
+TEST(MemoryTest, TestAlignedAllocFree) {
+ for (size_t alignment = 1; alignment <= 1 << 20; alignment <<= 1) {
+ void* p = AlignedAlloc(alignment, 1);
+ // Note this additional check is to avoid an incorrect static-analysis
+ // warning for leaked memory with a plain ASSERT_NE().
+ if (p == nullptr) {
+ FAIL() << "AlignedAlloc(" << alignment << ", 1)";
+ }
+ const auto p_value = reinterpret_cast<uintptr_t>(p);
+ EXPECT_EQ(p_value % alignment, 0)
+ << "AlignedAlloc(" << alignment << ", 1) = " << p;
+ AlignedFree(p);
+ }
+}
+
+TEST(MemoryTest, TestAlignedUniquePtrAlloc) {
+ for (size_t alignment = 1; alignment <= 1 << 20; alignment <<= 1) {
+ auto p = MakeAlignedUniquePtr<uint8_t>(alignment, 1);
+ ASSERT_NE(p, nullptr) << "MakeAlignedUniquePtr(" << alignment << ", 1)";
+ const auto p_value = reinterpret_cast<uintptr_t>(p.get());
+ EXPECT_EQ(p_value % alignment, 0)
+ << "MakeAlignedUniquePtr(" << alignment << ", 1) = " << p.get();
+ }
+}
+
+TEST(MemoryTest, TestAllocable) {
+ // Allocable::operator new (std::nothrow) is called.
+ std::unique_ptr<Small> small(new (std::nothrow) Small);
+ EXPECT_NE(small, nullptr);
+ // Allocable::operator delete is called.
+ small = nullptr;
+
+ // Allocable::operator new[] (std::nothrow) is called.
+ std::unique_ptr<Small[]> small_array_of_smalls(new (std::nothrow) Small[10]);
+ EXPECT_NE(small_array_of_smalls, nullptr);
+ // Allocable::operator delete[] is called.
+ small_array_of_smalls = nullptr;
+
+ // Allocable::operator new (std::nothrow) is called.
+ std::unique_ptr<Huge> huge(new (std::nothrow) Huge);
+ EXPECT_EQ(huge, nullptr);
+
+ // Allocable::operator new[] (std::nothrow) is called.
+ std::unique_ptr<Small[]> huge_array_of_smalls(
+ new (std::nothrow) Small[kMaxAllocableSize / sizeof(Small) + 1]);
+ EXPECT_EQ(huge_array_of_smalls, nullptr);
+
+#ifdef ABSL_HAVE_EXCEPTIONS
+ try {
+ // Allocable::operator new (std::nothrow) is called.
+ // The constructor throws an exception.
+ // Allocable::operator delete (std::nothrow) is called.
+ ThrowingConstructor* always = new (std::nothrow) ThrowingConstructor;
+ static_cast<void>(always);
+ } catch (...) {
+ }
+
+ try {
+ // Allocable::operator new[] (std::nothrow) is called.
+ // The constructor throws an exception.
+ // Allocable::operator delete[] (std::nothrow) is called.
+ ThrowingConstructor* always = new (std::nothrow) ThrowingConstructor[2];
+ static_cast<void>(always);
+ } catch (...) {
+ }
+#endif // ABSL_HAVE_EXCEPTIONS
+}
+
+TEST(MemoryTest, TestMaxAlignedAllocable) {
+ // MaxAlignedAllocable::operator new (std::nothrow) is called.
+ std::unique_ptr<SmallMaxAligned> small(new (std::nothrow) SmallMaxAligned);
+ EXPECT_NE(small, nullptr);
+ // Note this check doesn't guarantee conformance as a suitably aligned
+ // address may be returned from any allocator.
+ EXPECT_EQ(reinterpret_cast<uintptr_t>(small.get()) & (kMaxAlignment - 1), 0);
+ // MaxAlignedAllocable::operator delete is called.
+ small = nullptr;
+
+ // MaxAlignedAllocable::operator new[] (std::nothrow) is called.
+ std::unique_ptr<SmallMaxAligned[]> small_array_of_smalls(
+ new (std::nothrow) SmallMaxAligned[10]);
+ EXPECT_NE(small_array_of_smalls, nullptr);
+ EXPECT_EQ(reinterpret_cast<uintptr_t>(small_array_of_smalls.get()) &
+ (kMaxAlignment - 1),
+ 0);
+ // MaxAlignedAllocable::operator delete[] is called.
+ small_array_of_smalls = nullptr;
+
+ // MaxAlignedAllocable::operator new (std::nothrow) is called.
+ std::unique_ptr<HugeMaxAligned> huge(new (std::nothrow) HugeMaxAligned);
+ EXPECT_EQ(huge, nullptr);
+
+ // MaxAlignedAllocable::operator new[] (std::nothrow) is called.
+ std::unique_ptr<SmallMaxAligned[]> huge_array_of_smalls(
+ new (std::nothrow)
+ SmallMaxAligned[kMaxAllocableSize / sizeof(SmallMaxAligned) + 1]);
+ EXPECT_EQ(huge_array_of_smalls, nullptr);
+
+#ifdef ABSL_HAVE_EXCEPTIONS
+ try {
+ // MaxAlignedAllocable::operator new (std::nothrow) is called.
+ // The constructor throws an exception.
+ // MaxAlignedAllocable::operator delete (std::nothrow) is called.
+ auto* always = new (std::nothrow) MaxAlignedThrowingConstructor;
+ static_cast<void>(always);
+ } catch (...) {
+ }
+
+ try {
+ // MaxAlignedAllocable::operator new[] (std::nothrow) is called.
+ // The constructor throws an exception.
+ // MaxAlignedAllocable::operator delete[] (std::nothrow) is called.
+ auto* always = new (std::nothrow) MaxAlignedThrowingConstructor[2];
+ static_cast<void>(always);
+ } catch (...) {
+ }
+#endif // ABSL_HAVE_EXCEPTIONS
+}
+
+} // namespace
+} // namespace libgav1
diff --git a/src/utils/queue.h b/src/utils/queue.h
index cffb9ca..fcc7bfe 100644
--- a/src/utils/queue.h
+++ b/src/utils/queue.h
@@ -21,6 +21,7 @@
#include <cstddef>
#include <memory>
#include <new>
+#include <utility>
#include "src/utils/compiler_attributes.h"
diff --git a/src/utils/queue_test.cc b/src/utils/queue_test.cc
new file mode 100644
index 0000000..d84ae5f
--- /dev/null
+++ b/src/utils/queue_test.cc
@@ -0,0 +1,86 @@
+// Copyright 2021 The libgav1 Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "src/utils/queue.h"
+
+#include <utility>
+#include <vector>
+
+#include "gtest/gtest.h"
+
+namespace libgav1 {
+namespace {
+
+struct TestClass {
+ TestClass() = default;
+ explicit TestClass(int i) : i(i) {}
+ int i;
+ // The vector exists simply so that the class is not trivially copyable.
+ std::vector<int> dummy;
+};
+
+TEST(QueueTest, Basic) {
+ Queue<TestClass> queue;
+ ASSERT_TRUE(queue.Init(8));
+ EXPECT_TRUE(queue.Empty());
+
+ for (int i = 0; i < 8; ++i) {
+ EXPECT_FALSE(queue.Full());
+ TestClass test(i);
+ queue.Push(std::move(test));
+ EXPECT_EQ(queue.Back().i, i);
+ EXPECT_FALSE(queue.Empty());
+ }
+ EXPECT_TRUE(queue.Full());
+
+ for (int i = 0; i < 8; ++i) {
+ EXPECT_FALSE(queue.Empty());
+ EXPECT_EQ(queue.Front().i, i);
+ queue.Pop();
+ EXPECT_FALSE(queue.Full());
+ }
+ EXPECT_TRUE(queue.Empty());
+
+ for (int i = 0; i < 8; ++i) {
+ EXPECT_FALSE(queue.Full());
+ TestClass test(i);
+ queue.Push(std::move(test));
+ EXPECT_EQ(queue.Back().i, i);
+ EXPECT_FALSE(queue.Empty());
+ }
+ EXPECT_TRUE(queue.Full());
+ queue.Clear();
+ EXPECT_TRUE(queue.Empty());
+ EXPECT_FALSE(queue.Full());
+}
+
+TEST(QueueTest, WrapAround) {
+ Queue<TestClass> queue;
+ ASSERT_TRUE(queue.Init(8));
+ EXPECT_TRUE(queue.Empty());
+
+ for (int i = 0; i < 100; ++i) {
+ EXPECT_FALSE(queue.Full());
+ TestClass test(i);
+ queue.Push(std::move(test));
+ EXPECT_EQ(queue.Back().i, i);
+ EXPECT_FALSE(queue.Empty());
+ EXPECT_EQ(queue.Front().i, i);
+ queue.Pop();
+ EXPECT_TRUE(queue.Empty());
+ }
+}
+
+} // namespace
+} // namespace libgav1
diff --git a/src/utils/raw_bit_reader.h b/src/utils/raw_bit_reader.h
index 7d8ce8f..da770d1 100644
--- a/src/utils/raw_bit_reader.h
+++ b/src/utils/raw_bit_reader.h
@@ -25,7 +25,7 @@
namespace libgav1 {
-class RawBitReader : public BitReader, public Allocable {
+class RawBitReader final : public BitReader, public Allocable {
public:
RawBitReader(const uint8_t* data, size_t size);
~RawBitReader() override = default;
diff --git a/src/utils/raw_bit_reader_test.cc b/src/utils/raw_bit_reader_test.cc
new file mode 100644
index 0000000..22a97a7
--- /dev/null
+++ b/src/utils/raw_bit_reader_test.cc
@@ -0,0 +1,580 @@
+// Copyright 2021 The libgav1 Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "src/utils/raw_bit_reader.h"
+
+#include <bitset>
+#include <cstddef>
+#include <cstdint>
+#include <memory>
+#include <new>
+#include <string>
+#include <tuple>
+#include <vector>
+
+#include "gtest/gtest.h"
+#include "src/utils/constants.h"
+#include "tests/third_party/libvpx/acm_random.h"
+
+namespace libgav1 {
+namespace {
+
+std::string IntegerToString(int x) { return std::bitset<8>(x).to_string(); }
+
+class RawBitReaderTest : public testing::TestWithParam<std::tuple<int, int>> {
+ protected:
+ RawBitReaderTest()
+ : literal_size_(std::get<0>(GetParam())),
+ test_data_size_(std::get<1>(GetParam())) {}
+
+ void CreateReader(const std::vector<uint8_t>& data) {
+ data_ = data;
+ raw_bit_reader_.reset(new (std::nothrow)
+ RawBitReader(data_.data(), data_.size()));
+ }
+
+ void CreateReader(int size) {
+ libvpx_test::ACMRandom rnd(libvpx_test::ACMRandom::DeterministicSeed());
+ data_.clear();
+ for (int i = 0; i < size; ++i) {
+ data_.push_back(rnd.Rand8());
+ }
+ raw_bit_reader_.reset(new (std::nothrow)
+ RawBitReader(data_.data(), data_.size()));
+ }
+
+ // Some tests don't depend on |literal_size_|. For those tests, return true if
+ // the |literal_size_| is greater than 1. If this function returns true, the
+ // test will abort.
+ bool RunOnlyOnce() const { return literal_size_ > 1; }
+
+ std::unique_ptr<RawBitReader> raw_bit_reader_;
+ std::vector<uint8_t> data_;
+ int literal_size_;
+ int test_data_size_;
+};
+
+TEST_P(RawBitReaderTest, ReadBit) {
+ if (RunOnlyOnce()) return;
+ CreateReader(test_data_size_);
+ for (const auto& value : data_) {
+ const std::string expected = IntegerToString(value);
+ for (int j = 0; j < 8; ++j) {
+ EXPECT_FALSE(raw_bit_reader_->Finished());
+ EXPECT_EQ(static_cast<int>(expected[j] == '1'),
+ raw_bit_reader_->ReadBit());
+ }
+ }
+ EXPECT_TRUE(raw_bit_reader_->Finished());
+ EXPECT_EQ(raw_bit_reader_->ReadBit(), -1);
+}
+
+TEST_P(RawBitReaderTest, ReadLiteral) {
+ const int size_bytes = literal_size_;
+ const int size_bits = 8 * size_bytes;
+ CreateReader(test_data_size_ * size_bytes);
+ for (size_t i = 0; i < data_.size(); i += size_bytes) {
+ uint32_t expected_literal = 0;
+ for (int j = 0; j < size_bytes; ++j) {
+ expected_literal |=
+ static_cast<uint32_t>(data_[i + j] << (8 * (size_bytes - j - 1)));
+ }
+ EXPECT_FALSE(raw_bit_reader_->Finished());
+ const int64_t actual_literal = raw_bit_reader_->ReadLiteral(size_bits);
+ EXPECT_EQ(static_cast<int64_t>(expected_literal), actual_literal);
+ EXPECT_GE(actual_literal, 0);
+ }
+ EXPECT_TRUE(raw_bit_reader_->Finished());
+ EXPECT_EQ(raw_bit_reader_->ReadLiteral(10), -1);
+}
+
+TEST_P(RawBitReaderTest, ReadLiteral32BitsWithMsbSet) {
+ if (RunOnlyOnce()) return;
+ // Three 32-bit values with MSB set.
+ CreateReader({0xff, 0xff, 0xff, 0xff, // 4294967295
+ 0x80, 0xff, 0xee, 0xdd, // 2164256477
+ 0xa0, 0xaa, 0xbb, 0xcc}); // 2695543756
+ static constexpr int64_t expected_literals[] = {4294967295, 2164256477,
+ 2695543756};
+ for (const int64_t expected_literal : expected_literals) {
+ EXPECT_FALSE(raw_bit_reader_->Finished());
+ const int64_t actual_literal = raw_bit_reader_->ReadLiteral(32);
+ EXPECT_EQ(expected_literal, actual_literal);
+ EXPECT_GE(actual_literal, 0);
+ }
+ EXPECT_TRUE(raw_bit_reader_->Finished());
+ EXPECT_EQ(raw_bit_reader_->ReadLiteral(10), -1);
+}
+
+TEST_P(RawBitReaderTest, ReadLiteralNotEnoughBits) {
+ if (RunOnlyOnce()) return;
+ CreateReader(4); // 32 bits.
+ EXPECT_GE(raw_bit_reader_->ReadLiteral(16), 0);
+ EXPECT_EQ(raw_bit_reader_->ReadLiteral(32), -1);
+}
+
+TEST_P(RawBitReaderTest, ReadLiteralMaxNumBits) {
+ if (RunOnlyOnce()) return;
+ CreateReader(4); // 32 bits.
+ EXPECT_NE(raw_bit_reader_->ReadLiteral(32), -1);
+}
+
+TEST_P(RawBitReaderTest, ReadInverseSignedLiteral) {
+ if (RunOnlyOnce()) return;
+ // This is the only usage for this function in the decoding process. So
+ // testing just that case.
+ const int size_bits = 6;
+ data_.clear();
+ // Negative value followed by a positive value.
+ data_.push_back(0xd2);
+ data_.push_back(0xa4);
+ raw_bit_reader_.reset(new (std::nothrow)
+ RawBitReader(data_.data(), data_.size()));
+ int value;
+ EXPECT_TRUE(raw_bit_reader_->ReadInverseSignedLiteral(size_bits, &value));
+ EXPECT_EQ(value, -23);
+ EXPECT_TRUE(raw_bit_reader_->ReadInverseSignedLiteral(size_bits, &value));
+ EXPECT_EQ(value, 41);
+ // We have only two bits left. Trying to read an inverse signed literal of 2
+ // bits actually needs 3 bits. So this should fail.
+ EXPECT_FALSE(raw_bit_reader_->ReadInverseSignedLiteral(2, &value));
+}
+
+TEST_P(RawBitReaderTest, ZeroSize) {
+ if (RunOnlyOnce()) return;
+ // Valid data, zero size.
+ data_.clear();
+ data_.push_back(0xf0);
+ raw_bit_reader_.reset(new (std::nothrow) RawBitReader(data_.data(), 0));
+ EXPECT_EQ(raw_bit_reader_->ReadBit(), -1);
+ EXPECT_EQ(raw_bit_reader_->ReadLiteral(2), -1);
+ // NULL data, zero size.
+ raw_bit_reader_.reset(new (std::nothrow) RawBitReader(nullptr, 0));
+ EXPECT_EQ(raw_bit_reader_->ReadBit(), -1);
+ EXPECT_EQ(raw_bit_reader_->ReadLiteral(2), -1);
+}
+
+TEST_P(RawBitReaderTest, AlignToNextByte) {
+ if (RunOnlyOnce()) return;
+ CreateReader({0x00, 0x00, 0x00, 0x0f});
+ EXPECT_EQ(raw_bit_reader_->bit_offset(), 0);
+ EXPECT_EQ(raw_bit_reader_->byte_offset(), 0);
+ EXPECT_TRUE(raw_bit_reader_->AlignToNextByte());
+ EXPECT_EQ(raw_bit_reader_->bit_offset(), 0);
+ EXPECT_EQ(raw_bit_reader_->byte_offset(), 0);
+ EXPECT_NE(raw_bit_reader_->ReadBit(), -1);
+ EXPECT_EQ(raw_bit_reader_->bit_offset(), 1);
+ EXPECT_EQ(raw_bit_reader_->byte_offset(), 1);
+ EXPECT_TRUE(raw_bit_reader_->AlignToNextByte());
+ EXPECT_EQ(raw_bit_reader_->bit_offset(), 8);
+ EXPECT_EQ(raw_bit_reader_->byte_offset(), 1);
+ EXPECT_NE(raw_bit_reader_->ReadLiteral(16), -1);
+ EXPECT_EQ(raw_bit_reader_->bit_offset(), 24);
+ EXPECT_EQ(raw_bit_reader_->byte_offset(), 3);
+ EXPECT_TRUE(raw_bit_reader_->AlignToNextByte());
+ EXPECT_EQ(raw_bit_reader_->bit_offset(), 24);
+ EXPECT_EQ(raw_bit_reader_->byte_offset(), 3);
+ EXPECT_NE(raw_bit_reader_->ReadBit(), -1);
+ EXPECT_EQ(raw_bit_reader_->bit_offset(), 25);
+ EXPECT_EQ(raw_bit_reader_->byte_offset(), 4);
+ // Some bits are non-zero.
+ EXPECT_FALSE(raw_bit_reader_->AlignToNextByte());
+}
+
+TEST_P(RawBitReaderTest, VerifyAndSkipTrailingBits) {
+ if (RunOnlyOnce()) return;
+ std::vector<uint8_t> data;
+
+ // 1 byte trailing byte.
+ data.push_back(0x80);
+ CreateReader(data);
+ EXPECT_EQ(raw_bit_reader_->bit_offset(), 0);
+ EXPECT_TRUE(raw_bit_reader_->VerifyAndSkipTrailingBits(8));
+ EXPECT_EQ(raw_bit_reader_->bit_offset(), 8);
+
+ // 2 byte trailing byte beginning at a byte-aligned offset.
+ data.clear();
+ data.push_back(0xf8);
+ data.push_back(0x80);
+ CreateReader(data);
+ EXPECT_NE(raw_bit_reader_->ReadLiteral(8), -1);
+ EXPECT_EQ(raw_bit_reader_->bit_offset(), 8);
+ EXPECT_TRUE(raw_bit_reader_->VerifyAndSkipTrailingBits(8));
+ EXPECT_EQ(raw_bit_reader_->bit_offset(), 16);
+
+ // 2 byte trailing byte beginning at a non-byte-aligned offset.
+ data.clear();
+ data.push_back(0xf8);
+ data.push_back(0x00);
+ CreateReader(data);
+ EXPECT_NE(raw_bit_reader_->ReadLiteral(4), -1);
+ EXPECT_EQ(raw_bit_reader_->bit_offset(), 4);
+ EXPECT_TRUE(raw_bit_reader_->VerifyAndSkipTrailingBits(4));
+ EXPECT_EQ(raw_bit_reader_->bit_offset(), 8);
+
+ // Invalid trailing byte at a byte-aligned offset.
+ data.clear();
+ data.push_back(0xf7);
+ data.push_back(0x70);
+ CreateReader(data);
+ EXPECT_NE(raw_bit_reader_->ReadLiteral(8), -1);
+ EXPECT_EQ(raw_bit_reader_->bit_offset(), 8);
+ EXPECT_FALSE(raw_bit_reader_->VerifyAndSkipTrailingBits(8));
+
+ // Invalid trailing byte at a non-byte-aligned offset.
+ CreateReader(data);
+ EXPECT_NE(raw_bit_reader_->ReadLiteral(4), -1);
+ EXPECT_EQ(raw_bit_reader_->bit_offset(), 4);
+ EXPECT_FALSE(raw_bit_reader_->VerifyAndSkipTrailingBits(12));
+
+ // No more data available.
+ CreateReader(data);
+ EXPECT_NE(raw_bit_reader_->ReadLiteral(16), -1);
+ EXPECT_EQ(raw_bit_reader_->bit_offset(), 16);
+ EXPECT_TRUE(raw_bit_reader_->Finished());
+ EXPECT_FALSE(raw_bit_reader_->VerifyAndSkipTrailingBits(8));
+}
+
+TEST_P(RawBitReaderTest, ReadLittleEndian) {
+ if (RunOnlyOnce()) return;
+ std::vector<uint8_t> data;
+ size_t actual;
+
+ // Invalid input.
+ data.push_back(0x00); // dummy.
+ CreateReader(data);
+ EXPECT_FALSE(raw_bit_reader_->ReadLittleEndian(1, nullptr));
+
+ // One byte value.
+ data.clear();
+ data.push_back(0x01);
+ CreateReader(data);
+ ASSERT_TRUE(raw_bit_reader_->ReadLittleEndian(1, &actual));
+ EXPECT_EQ(actual, 1);
+ EXPECT_EQ(raw_bit_reader_->bit_offset(), 8);
+ EXPECT_TRUE(raw_bit_reader_->Finished());
+
+ // One byte value with leading bytes.
+ data.clear();
+ data.push_back(0x01);
+ data.push_back(0x00);
+ data.push_back(0x00);
+ data.push_back(0x00);
+ CreateReader(data);
+ ASSERT_TRUE(raw_bit_reader_->ReadLittleEndian(4, &actual));
+ EXPECT_EQ(actual, 1);
+ EXPECT_EQ(raw_bit_reader_->bit_offset(), 32);
+ EXPECT_TRUE(raw_bit_reader_->Finished());
+
+ // Two byte value.
+ data.clear();
+ data.push_back(0xD9);
+ data.push_back(0x01);
+ CreateReader(data);
+ ASSERT_TRUE(raw_bit_reader_->ReadLittleEndian(2, &actual));
+ EXPECT_EQ(actual, 473);
+ EXPECT_EQ(raw_bit_reader_->bit_offset(), 16);
+ EXPECT_TRUE(raw_bit_reader_->Finished());
+
+ // Two byte value with leading bytes.
+ data.clear();
+ data.push_back(0xD9);
+ data.push_back(0x01);
+ data.push_back(0x00);
+ data.push_back(0x00);
+ CreateReader(data);
+ ASSERT_TRUE(raw_bit_reader_->ReadLittleEndian(4, &actual));
+ EXPECT_EQ(actual, 473);
+ EXPECT_EQ(raw_bit_reader_->bit_offset(), 32);
+ EXPECT_TRUE(raw_bit_reader_->Finished());
+
+ // Not enough bytes.
+ data.clear();
+ data.push_back(0x01);
+ CreateReader(data);
+ EXPECT_FALSE(raw_bit_reader_->ReadLittleEndian(2, &actual));
+}
+
+TEST_P(RawBitReaderTest, ReadUnsignedLeb128) {
+ if (RunOnlyOnce()) return;
+ std::vector<uint8_t> data;
+ size_t actual;
+
+ // Invalid input.
+ data.push_back(0x00); // dummy.
+ CreateReader(data);
+ EXPECT_FALSE(raw_bit_reader_->ReadUnsignedLeb128(nullptr));
+
+ // One byte value.
+ data.clear();
+ data.push_back(0x01);
+ CreateReader(data);
+ ASSERT_TRUE(raw_bit_reader_->ReadUnsignedLeb128(&actual));
+ EXPECT_EQ(actual, 1);
+ EXPECT_EQ(raw_bit_reader_->bit_offset(), 8);
+ EXPECT_TRUE(raw_bit_reader_->Finished());
+
+ // One byte value with trailing bytes.
+ data.clear();
+ data.push_back(0x81);
+ data.push_back(0x80);
+ data.push_back(0x80);
+ data.push_back(0x00);
+ CreateReader(data);
+ ASSERT_TRUE(raw_bit_reader_->ReadUnsignedLeb128(&actual));
+ EXPECT_EQ(actual, 1);
+ EXPECT_EQ(raw_bit_reader_->bit_offset(), 32);
+ EXPECT_TRUE(raw_bit_reader_->Finished());
+
+ // Two byte value.
+ data.clear();
+ data.push_back(0xD9);
+ data.push_back(0x01);
+ CreateReader(data);
+ ASSERT_TRUE(raw_bit_reader_->ReadUnsignedLeb128(&actual));
+ EXPECT_EQ(actual, 217);
+ EXPECT_EQ(raw_bit_reader_->bit_offset(), 16);
+ EXPECT_TRUE(raw_bit_reader_->Finished());
+
+ // Two byte value with trailing bytes.
+ data.clear();
+ data.push_back(0xD9);
+ data.push_back(0x81);
+ data.push_back(0x80);
+ data.push_back(0x80);
+ data.push_back(0x00);
+ CreateReader(data);
+ ASSERT_TRUE(raw_bit_reader_->ReadUnsignedLeb128(&actual));
+ EXPECT_EQ(actual, 217);
+ EXPECT_EQ(raw_bit_reader_->bit_offset(), 40);
+ EXPECT_TRUE(raw_bit_reader_->Finished());
+
+ // Value > 32 bits.
+ data.clear();
+ for (int i = 0; i < 5; ++i) data.push_back(0xD9);
+ data.push_back(0x00);
+ CreateReader(data);
+ EXPECT_FALSE(raw_bit_reader_->ReadUnsignedLeb128(&actual));
+
+ // Not enough bytes (truncated leb128 value).
+ data.clear();
+ data.push_back(0x81);
+ data.push_back(0x81);
+ data.push_back(0x81);
+ CreateReader(data);
+ EXPECT_FALSE(raw_bit_reader_->ReadUnsignedLeb128(&actual));
+
+ // Exceeds kMaximumLeb128Size.
+ data.clear();
+ for (int i = 0; i < 10; ++i) data.push_back(0x80);
+ CreateReader(data);
+ EXPECT_FALSE(raw_bit_reader_->ReadUnsignedLeb128(&actual));
+}
+
+TEST_P(RawBitReaderTest, ReadUvlc) {
+ if (RunOnlyOnce()) return;
+ std::vector<uint8_t> data;
+ uint32_t actual;
+
+ // Invalid input.
+ data.push_back(0x00); // dummy.
+ CreateReader(data);
+ EXPECT_FALSE(raw_bit_reader_->ReadUvlc(nullptr));
+
+ // Zero bit value.
+ data.clear();
+ data.push_back(0x80);
+ CreateReader(data);
+ ASSERT_TRUE(raw_bit_reader_->ReadUvlc(&actual));
+ EXPECT_EQ(actual, 0);
+ EXPECT_EQ(raw_bit_reader_->bit_offset(), 1);
+
+ // One bit value.
+ data.clear();
+ data.push_back(0x60); // 011...
+ CreateReader(data);
+ ASSERT_TRUE(raw_bit_reader_->ReadUvlc(&actual));
+ EXPECT_EQ(actual, 2);
+ EXPECT_EQ(raw_bit_reader_->bit_offset(), 3);
+
+ // Two bit value.
+ data.clear();
+ data.push_back(0x38); // 00111...
+ CreateReader(data);
+ ASSERT_TRUE(raw_bit_reader_->ReadUvlc(&actual));
+ EXPECT_EQ(actual, 6);
+ EXPECT_EQ(raw_bit_reader_->bit_offset(), 5);
+
+ // 31 bit value.
+ data.clear();
+ // (1 << 32) - 2 (= UINT32_MAX - 1) is the largest value that can be encoded
+ // as uvlc().
+ data.push_back(0x00);
+ data.push_back(0x00);
+ data.push_back(0x00);
+ data.push_back(0x01);
+ data.push_back(0xFF);
+ data.push_back(0xFF);
+ data.push_back(0xFF);
+ data.push_back(0xFE);
+ CreateReader(data);
+ ASSERT_TRUE(raw_bit_reader_->ReadUvlc(&actual));
+ EXPECT_EQ(actual, UINT32_MAX - 1);
+ EXPECT_EQ(raw_bit_reader_->bit_offset(), 63);
+
+ // Not enough bits (truncated uvlc value).
+ data.clear();
+ data.push_back(0x07);
+ CreateReader(data);
+ EXPECT_FALSE(raw_bit_reader_->ReadUvlc(&actual));
+
+ // 32 bits.
+ data.clear();
+ data.push_back(0x00);
+ data.push_back(0x00);
+ data.push_back(0x00);
+ data.push_back(0x00);
+ data.push_back(0xFF);
+ CreateReader(data);
+ EXPECT_FALSE(raw_bit_reader_->ReadUvlc(&actual));
+
+ // Exceeds 32 bits.
+ data.clear();
+ data.push_back(0x00);
+ data.push_back(0x00);
+ data.push_back(0x00);
+ data.push_back(0x00);
+ data.push_back(0x0F);
+ CreateReader(data);
+ EXPECT_FALSE(raw_bit_reader_->ReadUvlc(&actual));
+}
+
+TEST_P(RawBitReaderTest, DecodeSignedSubexpWithReference) {
+ if (RunOnlyOnce()) return;
+ std::vector<uint8_t> data;
+ int actual;
+
+ data.push_back(0xa0); // v = 5;
+ CreateReader(data);
+ EXPECT_TRUE(raw_bit_reader_->DecodeSignedSubexpWithReference(
+ 10, 20, 15, kGlobalMotionReadControl, &actual));
+ EXPECT_EQ(actual, 12);
+
+ data.clear();
+ data.push_back(0xd0); // v = 6; extra_bit = 1;
+ CreateReader(data);
+ EXPECT_TRUE(raw_bit_reader_->DecodeSignedSubexpWithReference(
+ 10, 20, 15, kGlobalMotionReadControl, &actual));
+ EXPECT_EQ(actual, 11);
+
+ data.clear();
+ data.push_back(0xc8); // subexp_more_bits = 1; v = 9;
+ CreateReader(data);
+ EXPECT_TRUE(raw_bit_reader_->DecodeSignedSubexpWithReference(
+ 10, 40, 15, kGlobalMotionReadControl, &actual));
+ EXPECT_EQ(actual, 27);
+
+ data.clear();
+ data.push_back(0x60); // subexp_more_bits = 0; subexp_bits = 6.
+ CreateReader(data);
+ EXPECT_TRUE(raw_bit_reader_->DecodeSignedSubexpWithReference(
+ 10, 40, 15, kGlobalMotionReadControl, &actual));
+ EXPECT_EQ(actual, 18);
+
+ data.clear();
+ data.push_back(0x60);
+ CreateReader(data);
+ // Control is greater than 32, which makes b >= 32 in DecodeSubexp() and
+ // should return false.
+ EXPECT_FALSE(raw_bit_reader_->DecodeSignedSubexpWithReference(10, 40, 15, 35,
+ &actual));
+}
+
+TEST_P(RawBitReaderTest, DecodeUniform) {
+ if (RunOnlyOnce()) return;
+ // Test the example from the AV1 spec, Section 4.10.7. ns(n).
+ // n = 5
+ // Value ns(n) encoding
+ // -------------------------------
+ // 0 00
+ // 1 01
+ // 2 10
+ // 3 110
+ // 4 111
+ //
+ // The five encoded values are concatenated into two bytes.
+ std::vector<uint8_t> data = {0x1b, 0x70};
+ CreateReader(data);
+ int actual;
+ for (int i = 0; i < 5; ++i) {
+ EXPECT_TRUE(raw_bit_reader_->DecodeUniform(5, &actual));
+ EXPECT_EQ(actual, i);
+ }
+
+ // If n is a power of 2, ns(n) is simply the log2(n)-bit representation of
+ // the unsigned number.
+ // Test n = 16.
+ // The 16 encoded values are concatenated into 8 bytes.
+ data = {0x01, 0x23, 0x45, 0x67, 0x89, 0xab, 0xcd, 0xef};
+ CreateReader(data);
+ for (int i = 0; i < 16; ++i) {
+ EXPECT_TRUE(raw_bit_reader_->DecodeUniform(16, &actual));
+ EXPECT_EQ(actual, i);
+ }
+}
+
+TEST_P(RawBitReaderTest, SkipBytes) {
+ if (RunOnlyOnce()) return;
+ std::vector<uint8_t> data = {0x00, 0x00, 0x00, 0x00, 0x00};
+ CreateReader(data);
+ EXPECT_EQ(raw_bit_reader_->bit_offset(), 0);
+ EXPECT_TRUE(raw_bit_reader_->SkipBytes(1));
+ EXPECT_EQ(raw_bit_reader_->bit_offset(), 8);
+ EXPECT_GE(raw_bit_reader_->ReadBit(), 0);
+ EXPECT_EQ(raw_bit_reader_->bit_offset(), 9);
+ EXPECT_FALSE(raw_bit_reader_->SkipBytes(1)); // Not at a byte boundary.
+ EXPECT_TRUE(raw_bit_reader_->AlignToNextByte());
+ EXPECT_EQ(raw_bit_reader_->bit_offset(), 16);
+ EXPECT_FALSE(raw_bit_reader_->SkipBytes(10)); // Not enough bytes.
+ EXPECT_EQ(raw_bit_reader_->bit_offset(), 16);
+ EXPECT_TRUE(raw_bit_reader_->SkipBytes(3));
+ EXPECT_TRUE(raw_bit_reader_->Finished());
+ EXPECT_EQ(raw_bit_reader_->ReadBit(), -1);
+}
+
+TEST_P(RawBitReaderTest, SkipBits) {
+ if (RunOnlyOnce()) return;
+ std::vector<uint8_t> data = {0x00, 0x00, 0x00, 0x00, 0x00};
+ CreateReader(data);
+ EXPECT_EQ(raw_bit_reader_->bit_offset(), 0);
+ EXPECT_TRUE(raw_bit_reader_->SkipBits(8));
+ EXPECT_EQ(raw_bit_reader_->bit_offset(), 8);
+ EXPECT_GE(raw_bit_reader_->ReadBit(), 0);
+ EXPECT_EQ(raw_bit_reader_->bit_offset(), 9);
+ EXPECT_TRUE(raw_bit_reader_->SkipBits(10)); // Not at a byte boundary.
+ EXPECT_EQ(raw_bit_reader_->bit_offset(), 19);
+ EXPECT_FALSE(raw_bit_reader_->SkipBits(80)); // Not enough bytes.
+ EXPECT_EQ(raw_bit_reader_->bit_offset(), 19);
+ EXPECT_TRUE(raw_bit_reader_->SkipBits(21));
+ EXPECT_TRUE(raw_bit_reader_->Finished());
+ EXPECT_EQ(raw_bit_reader_->ReadBit(), -1);
+}
+
+INSTANTIATE_TEST_SUITE_P(
+ RawBitReaderTestInstance, RawBitReaderTest,
+ testing::Combine(testing::Range(1, 5), // literal size.
+ testing::Values(100))); // number of bits/literals.
+
+} // namespace
+} // namespace libgav1
diff --git a/src/utils/reference_info.h b/src/utils/reference_info.h
index a660791..73c32d9 100644
--- a/src/utils/reference_info.h
+++ b/src/utils/reference_info.h
@@ -21,6 +21,7 @@
#include <cstdint>
#include "src/utils/array_2d.h"
+#include "src/utils/compiler_attributes.h"
#include "src/utils/constants.h"
#include "src/utils/types.h"
diff --git a/src/utils/segmentation_map_test.cc b/src/utils/segmentation_map_test.cc
new file mode 100644
index 0000000..4d8a7c9
--- /dev/null
+++ b/src/utils/segmentation_map_test.cc
@@ -0,0 +1,120 @@
+// Copyright 2021 The libgav1 Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "src/utils/segmentation_map.h"
+
+#include <cstdint>
+
+#include "gtest/gtest.h"
+
+namespace libgav1 {
+namespace {
+
+TEST(SegmentationMapTest, Clear) {
+ constexpr int32_t kRows4x4 = 60;
+ constexpr int32_t kColumns4x4 = 80;
+ SegmentationMap segmentation_map;
+ ASSERT_TRUE(segmentation_map.Allocate(kRows4x4, kColumns4x4));
+
+ segmentation_map.Clear();
+ for (int row4x4 = 0; row4x4 < kRows4x4; ++row4x4) {
+ for (int column4x4 = 0; column4x4 < kColumns4x4; ++column4x4) {
+ EXPECT_EQ(segmentation_map.segment_id(row4x4, column4x4), 0);
+ }
+ }
+}
+
+TEST(SegmentationMapTest, FillBlock) {
+ constexpr int32_t kRows4x4 = 60;
+ constexpr int32_t kColumns4x4 = 80;
+ SegmentationMap segmentation_map;
+ ASSERT_TRUE(segmentation_map.Allocate(kRows4x4, kColumns4x4));
+
+ // Fill the whole image with 2.
+ segmentation_map.FillBlock(0, 0, kColumns4x4, kRows4x4, 2);
+ // Fill a block with 1.
+ constexpr int kBlockWidth4x4 = 10;
+ constexpr int kBlockHeight4x4 = 20;
+ segmentation_map.FillBlock(4, 6, kBlockWidth4x4, kBlockHeight4x4, 1);
+ for (int row4x4 = 0; row4x4 < kRows4x4; ++row4x4) {
+ for (int column4x4 = 0; column4x4 < kColumns4x4; ++column4x4) {
+ if (4 <= row4x4 && row4x4 < 4 + kBlockHeight4x4 && 6 <= column4x4 &&
+ column4x4 < 6 + kBlockWidth4x4) {
+ // Inside the block.
+ EXPECT_EQ(segmentation_map.segment_id(row4x4, column4x4), 1);
+ } else {
+ // Outside the block.
+ EXPECT_EQ(segmentation_map.segment_id(row4x4, column4x4), 2);
+ }
+ }
+ }
+}
+
+TEST(SegmentationMapTest, CopyFrom) {
+ constexpr int32_t kRows4x4 = 60;
+ constexpr int32_t kColumns4x4 = 80;
+ SegmentationMap segmentation_map;
+ ASSERT_TRUE(segmentation_map.Allocate(kRows4x4, kColumns4x4));
+
+ // Split the segmentation map into four blocks of equal size.
+ constexpr int kBlockWidth4x4 = 40;
+ constexpr int kBlockHeight4x4 = 30;
+ segmentation_map.FillBlock(0, 0, kBlockWidth4x4, kBlockHeight4x4, 1);
+ segmentation_map.FillBlock(0, kBlockWidth4x4, kBlockWidth4x4, kBlockHeight4x4,
+ 2);
+ segmentation_map.FillBlock(kBlockHeight4x4, 0, kBlockWidth4x4,
+ kBlockHeight4x4, 3);
+ segmentation_map.FillBlock(kBlockHeight4x4, kBlockWidth4x4, kBlockWidth4x4,
+ kBlockHeight4x4, 4);
+
+ SegmentationMap segmentation_map2;
+ ASSERT_TRUE(segmentation_map2.Allocate(kRows4x4, kColumns4x4));
+ segmentation_map2.CopyFrom(segmentation_map);
+
+ for (int row4x4 = 0; row4x4 < kBlockHeight4x4; ++row4x4) {
+ for (int column4x4 = 0; column4x4 < kBlockWidth4x4; ++column4x4) {
+ EXPECT_EQ(segmentation_map.segment_id(row4x4, column4x4), 1);
+ EXPECT_EQ(segmentation_map2.segment_id(row4x4, column4x4), 1);
+ }
+ }
+ for (int row4x4 = 0; row4x4 < kBlockHeight4x4; ++row4x4) {
+ for (int column4x4 = 0; column4x4 < kBlockWidth4x4; ++column4x4) {
+ EXPECT_EQ(segmentation_map.segment_id(row4x4, kBlockWidth4x4 + column4x4),
+ 2);
+ EXPECT_EQ(
+ segmentation_map2.segment_id(row4x4, kBlockWidth4x4 + column4x4), 2);
+ }
+ }
+ for (int row4x4 = 0; row4x4 < kBlockHeight4x4; ++row4x4) {
+ for (int column4x4 = 0; column4x4 < kBlockWidth4x4; ++column4x4) {
+ EXPECT_EQ(
+ segmentation_map.segment_id(kBlockHeight4x4 + row4x4, column4x4), 3);
+ EXPECT_EQ(
+ segmentation_map2.segment_id(kBlockHeight4x4 + row4x4, column4x4), 3);
+ }
+ }
+ for (int row4x4 = 0; row4x4 < kBlockHeight4x4; ++row4x4) {
+ for (int column4x4 = 0; column4x4 < kBlockWidth4x4; ++column4x4) {
+ EXPECT_EQ(segmentation_map.segment_id(kBlockHeight4x4 + row4x4,
+ kBlockWidth4x4 + column4x4),
+ 4);
+ EXPECT_EQ(segmentation_map2.segment_id(kBlockHeight4x4 + row4x4,
+ kBlockWidth4x4 + column4x4),
+ 4);
+ }
+ }
+}
+
+} // namespace
+} // namespace libgav1
diff --git a/src/utils/segmentation_test.cc b/src/utils/segmentation_test.cc
new file mode 100644
index 0000000..e985b2d
--- /dev/null
+++ b/src/utils/segmentation_test.cc
@@ -0,0 +1,40 @@
+// Copyright 2021 The libgav1 Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "src/utils/segmentation.h"
+
+#include <cstdint>
+
+#include "gtest/gtest.h"
+#include "src/utils/common.h"
+#include "src/utils/types.h"
+
+namespace libgav1 {
+namespace {
+
+int GetUnsignedBits(const unsigned int num_values) {
+ return (num_values > 0) ? FloorLog2(num_values) + 1 : 0;
+}
+
+// Check that kSegmentationFeatureBits and kSegmentationFeatureMaxValues are
+// consistent with each other.
+TEST(SegmentationTest, FeatureBitsAndMaxValuesConsistency) {
+ for (int feature = 0; feature < kSegmentFeatureMax; feature++) {
+ EXPECT_EQ(kSegmentationFeatureBits[feature],
+ GetUnsignedBits(kSegmentationFeatureMaxValues[feature]));
+ }
+}
+
+} // namespace
+} // namespace libgav1
diff --git a/src/utils/stack_test.cc b/src/utils/stack_test.cc
new file mode 100644
index 0000000..4de2ab6
--- /dev/null
+++ b/src/utils/stack_test.cc
@@ -0,0 +1,74 @@
+// Copyright 2021 The libgav1 Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "src/utils/stack.h"
+
+#include <cstdint>
+#include <utility>
+
+#include "gtest/gtest.h"
+
+namespace libgav1 {
+namespace {
+
+constexpr int kStackSize = 8;
+
+TEST(StackTest, SimpleType) {
+ Stack<int, kStackSize> stack;
+ EXPECT_TRUE(stack.Empty());
+
+ for (int i = 0; i < kStackSize; ++i) {
+ stack.Push(i);
+ EXPECT_FALSE(stack.Empty());
+ }
+
+ for (int i = kStackSize - 1; i >= 0; --i) {
+ EXPECT_EQ(stack.Pop(), i);
+ }
+ EXPECT_TRUE(stack.Empty());
+}
+
+TEST(StackTest, LargeStruct) {
+ struct LargeMoveOnlyStruct {
+ LargeMoveOnlyStruct() = default;
+ // Move only.
+ LargeMoveOnlyStruct(LargeMoveOnlyStruct&& other) = default;
+ LargeMoveOnlyStruct& operator=(LargeMoveOnlyStruct&& other) = default;
+
+ int32_t array1[1000];
+ uint64_t array2[2000];
+ };
+
+ Stack<LargeMoveOnlyStruct, kStackSize> stack;
+ EXPECT_TRUE(stack.Empty());
+
+ LargeMoveOnlyStruct large_move_only_struct[kStackSize];
+ for (int i = 0; i < kStackSize; ++i) {
+ LargeMoveOnlyStruct& l = large_move_only_struct[i];
+ l.array1[0] = i;
+ l.array2[0] = i;
+ stack.Push(std::move(l));
+ EXPECT_FALSE(stack.Empty());
+ }
+
+ for (int i = kStackSize - 1; i >= 0; --i) {
+ LargeMoveOnlyStruct l = stack.Pop();
+ EXPECT_EQ(l.array1[0], i);
+ EXPECT_EQ(l.array2[0], i);
+ }
+ EXPECT_TRUE(stack.Empty());
+}
+
+} // namespace
+} // namespace libgav1
diff --git a/src/utils/threadpool_test.cc b/src/utils/threadpool_test.cc
new file mode 100644
index 0000000..17854dc
--- /dev/null
+++ b/src/utils/threadpool_test.cc
@@ -0,0 +1,133 @@
+// Copyright 2021 The libgav1 Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "src/utils/threadpool.h"
+
+#include <cassert>
+#include <cstdint>
+#include <memory>
+
+#include "absl/synchronization/mutex.h"
+#include "absl/time/clock.h"
+#include "absl/time/time.h"
+#include "gtest/gtest.h"
+#include "src/utils/compiler_attributes.h"
+#include "src/utils/executor.h"
+
+namespace libgav1 {
+namespace {
+
+class SimpleGuardedInteger {
+ public:
+ explicit SimpleGuardedInteger(int initial_value) : value_(initial_value) {}
+ SimpleGuardedInteger(const SimpleGuardedInteger&) = delete;
+ SimpleGuardedInteger& operator=(const SimpleGuardedInteger&) = delete;
+
+ void Decrement() {
+ absl::MutexLock l(&mutex_);
+ assert(value_ >= 1);
+ --value_;
+ changed_.SignalAll();
+ }
+
+ void Increment() {
+ absl::MutexLock l(&mutex_);
+ ++value_;
+ changed_.SignalAll();
+ }
+
+ int Value() {
+ absl::MutexLock l(&mutex_);
+ return value_;
+ }
+
+ void WaitForZero() {
+ absl::MutexLock l(&mutex_);
+ while (value_ != 0) {
+ changed_.Wait(&mutex_);
+ }
+ }
+
+ private:
+ absl::Mutex mutex_;
+ absl::CondVar changed_;
+ int value_ LIBGAV1_GUARDED_BY(mutex_);
+};
+
+// Loops for |milliseconds| of wall-clock time.
+void LoopForMs(int64_t milliseconds) {
+ const absl::Time deadline = absl::Now() + absl::Milliseconds(milliseconds);
+ while (absl::Now() < deadline) {
+ }
+}
+
+// A function that increments the given integer.
+void IncrementIntegerJob(SimpleGuardedInteger* value) {
+ LoopForMs(100);
+ value->Increment();
+}
+
+TEST(ThreadPoolTest, ThreadedIntegerIncrement) {
+ std::unique_ptr<ThreadPool> thread_pool = ThreadPool::Create(100);
+ ASSERT_NE(thread_pool, nullptr);
+ EXPECT_EQ(thread_pool->num_threads(), 100);
+ SimpleGuardedInteger count(0);
+ for (int i = 0; i < 1000; ++i) {
+ thread_pool->Schedule([&count]() { IncrementIntegerJob(&count); });
+ }
+ thread_pool.reset(nullptr);
+ EXPECT_EQ(count.Value(), 1000);
+}
+
+// Test a ThreadPool via the Executor interface.
+TEST(ThreadPoolTest, ExecutorInterface) {
+ std::unique_ptr<ThreadPool> thread_pool = ThreadPool::Create(100);
+ ASSERT_NE(thread_pool, nullptr);
+ std::unique_ptr<Executor> executor(thread_pool.release());
+ SimpleGuardedInteger count(0);
+ for (int i = 0; i < 1000; ++i) {
+ executor->Schedule([&count]() { IncrementIntegerJob(&count); });
+ }
+ executor.reset(nullptr);
+ EXPECT_EQ(count.Value(), 1000);
+}
+
+TEST(ThreadPoolTest, DestroyWithoutUse) {
+ std::unique_ptr<ThreadPool> thread_pool = ThreadPool::Create(100);
+ EXPECT_NE(thread_pool, nullptr);
+ thread_pool.reset(nullptr);
+}
+
+// If num_threads is 0, ThreadPool::Create() should return a null pointer.
+TEST(ThreadPoolTest, NumThreadsZero) {
+ std::unique_ptr<ThreadPool> thread_pool = ThreadPool::Create(0);
+ EXPECT_EQ(thread_pool, nullptr);
+}
+
+// If num_threads is 1, the closures are run in FIFO order.
+TEST(ThreadPoolTest, OneThreadRunsClosuresFIFO) {
+ int count = 0; // Declare first so that it outlives the thread pool.
+ std::unique_ptr<ThreadPool> pool = ThreadPool::Create(1);
+ ASSERT_NE(pool, nullptr);
+ EXPECT_EQ(pool->num_threads(), 1);
+ for (int i = 0; i < 1000; ++i) {
+ pool->Schedule([&count, i]() {
+ EXPECT_EQ(count, i);
+ count++;
+ });
+ }
+}
+
+} // namespace
+} // namespace libgav1
diff --git a/src/utils/types.h b/src/utils/types.h
index eba13b7..0dd6360 100644
--- a/src/utils/types.h
+++ b/src/utils/types.h
@@ -28,45 +28,20 @@
namespace libgav1 {
-struct MotionVector : public Allocable {
- static constexpr int kRow = 0;
- static constexpr int kColumn = 1;
-
- MotionVector() = default;
- MotionVector(const MotionVector& mv) = default;
-
- MotionVector& operator=(const MotionVector& rhs) {
- mv32 = rhs.mv32;
- return *this;
- }
-
- bool operator==(const MotionVector& rhs) const { return mv32 == rhs.mv32; }
-
- union {
- // Motion vectors will always fit in int16_t and using int16_t here instead
- // of int saves significant memory since some of the frame sized structures
- // store motion vectors.
- int16_t mv[2];
- // A uint32_t view into the |mv| array. Useful for cases where both the
- // motion vectors have to be copied or compared with a single 32 bit
- // instruction.
- uint32_t mv32;
- };
+union MotionVector {
+ // Motion vectors will always fit in int16_t and using int16_t here instead
+ // of int saves significant memory since some of the frame sized structures
+ // store motion vectors.
+ // Index 0 is the entry for row (horizontal direction) motion vector.
+ // Index 1 is the entry for column (vertical direction) motion vector.
+ int16_t mv[2];
+ // A uint32_t view into the |mv| array. Useful for cases where both the
+ // motion vectors have to be copied or compared with a single 32 bit
+ // instruction.
+ uint32_t mv32;
};
union CompoundMotionVector {
- CompoundMotionVector() = default;
- CompoundMotionVector(const CompoundMotionVector& mv) = default;
-
- CompoundMotionVector& operator=(const CompoundMotionVector& rhs) {
- mv64 = rhs.mv64;
- return *this;
- }
-
- bool operator==(const CompoundMotionVector& rhs) const {
- return mv64 == rhs.mv64;
- }
-
MotionVector mv[2];
// A uint64_t view into the |mv| array. Useful for cases where all the motion
// vectors have to be copied or compared with a single 64 bit instruction.
@@ -163,6 +138,11 @@ struct PredictionParameters : public Allocable {
MotionVector global_mv[2];
int num_warp_samples;
int warp_estimate_candidates[kMaxLeastSquaresSamples][4];
+ PaletteModeInfo palette_mode_info;
+ int8_t segment_id; // segment_id is in the range [0, 7].
+ PredictionMode uv_mode;
+ bool chroma_top_uses_smooth_prediction;
+ bool chroma_left_uses_smooth_prediction;
};
// A lot of BlockParameters objects are created, so the smallest type is used
@@ -171,19 +151,8 @@ struct PredictionParameters : public Allocable {
struct BlockParameters : public Allocable {
BlockSize size;
bool skip;
- // True means that this block will use some default settings (that
- // correspond to compound prediction) and so most of the mode info is
- // skipped. False means that the mode info is not skipped.
- bool skip_mode;
bool is_inter;
- bool is_explicit_compound_type; // comp_group_idx in the spec.
- bool is_compound_type_average; // compound_idx in the spec.
- bool is_global_mv_block;
- bool use_predicted_segment_id; // only valid with temporal update enabled.
- int8_t segment_id; // segment_id is in the range [0, 7].
PredictionMode y_mode;
- PredictionMode uv_mode;
- TransformSize transform_size;
TransformSize uv_transform_size;
InterpolationFilter interpolation_filter[2];
ReferenceFrameType reference_frame[2];
@@ -194,7 +163,6 @@ struct BlockParameters : public Allocable {
// 3 - V plane (both directions).
uint8_t deblock_filter_level[kFrameLfCount];
CompoundMotionVector mv;
- PaletteModeInfo palette_mode_info;
// When |Tile::split_parse_and_decode_| is true, each block gets its own
// instance of |prediction_parameters|. When it is false, all the blocks point
// to |Tile::prediction_parameters_|. This field is valid only as long as the
@@ -203,6 +171,18 @@ struct BlockParameters : public Allocable {
std::unique_ptr<PredictionParameters> prediction_parameters;
};
+// Used to store the left and top block parameters that are used for computing
+// the cdf context of the subsequent blocks.
+struct BlockCdfContext {
+ bool use_predicted_segment_id[32];
+ bool is_explicit_compound_type[32]; // comp_group_idx in the spec.
+ bool is_compound_type_average[32]; // compound_idx in the spec.
+ bool skip_mode[32];
+ uint8_t palette_size[kNumPlaneTypes][32];
+ uint16_t palette_color[32][kNumPlaneTypes][kMaxPaletteSize];
+ PredictionMode uv_mode[32];
+};
+
// A five dimensional array used to store the wedge masks. The dimensions are:
// - block_size_index (returned by GetWedgeBlockSizeIndex() in prediction.cc).
// - flip_sign (0 or 1).
diff --git a/src/utils/unbounded_queue_test.cc b/src/utils/unbounded_queue_test.cc
new file mode 100644
index 0000000..b107ad0
--- /dev/null
+++ b/src/utils/unbounded_queue_test.cc
@@ -0,0 +1,163 @@
+// Copyright 2021 The libgav1 Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "src/utils/unbounded_queue.h"
+
+#include <new>
+#include <utility>
+
+#include "gtest/gtest.h"
+
+namespace libgav1 {
+namespace {
+
+class Integer {
+ public:
+ explicit Integer(int value) : value_(new (std::nothrow) int{value}) {}
+
+ // Move only.
+ Integer(Integer&& other) : value_(other.value_) { other.value_ = nullptr; }
+ Integer& operator=(Integer&& other) {
+ if (this != &other) {
+ delete value_;
+ value_ = other.value_;
+ other.value_ = nullptr;
+ }
+ return *this;
+ }
+
+ ~Integer() { delete value_; }
+
+ int value() const { return *value_; }
+
+ private:
+ int* value_;
+};
+
+TEST(UnboundedQueueTest, Basic) {
+ UnboundedQueue<int> queue;
+ ASSERT_TRUE(queue.Init());
+ EXPECT_TRUE(queue.Empty());
+
+ for (int i = 0; i < 8; ++i) {
+ EXPECT_TRUE(queue.GrowIfNeeded());
+ queue.Push(i);
+ EXPECT_FALSE(queue.Empty());
+ }
+
+ for (int i = 0; i < 8; ++i) {
+ EXPECT_FALSE(queue.Empty());
+ EXPECT_EQ(queue.Front(), i);
+ queue.Pop();
+ }
+ EXPECT_TRUE(queue.Empty());
+}
+
+TEST(UnboundedQueueTest, WrapAround) {
+ UnboundedQueue<int> queue;
+ ASSERT_TRUE(queue.Init());
+ EXPECT_TRUE(queue.Empty());
+
+ for (int i = 0; i < 1000; ++i) {
+ EXPECT_TRUE(queue.GrowIfNeeded());
+ queue.Push(i);
+ EXPECT_FALSE(queue.Empty());
+ EXPECT_EQ(queue.Front(), i);
+ queue.Pop();
+ EXPECT_TRUE(queue.Empty());
+ }
+}
+
+TEST(UnboundedQueueTest, EmptyBeforeInit) {
+ UnboundedQueue<int> queue;
+ EXPECT_TRUE(queue.Empty());
+}
+
+TEST(UnboundedQueueTest, LotsOfElements) {
+ UnboundedQueue<Integer> queue;
+ ASSERT_TRUE(queue.Init());
+ EXPECT_TRUE(queue.Empty());
+
+ for (int i = 0; i < 10000; ++i) {
+ Integer integer(i);
+ EXPECT_EQ(integer.value(), i);
+ EXPECT_TRUE(queue.GrowIfNeeded());
+ queue.Push(std::move(integer));
+ EXPECT_FALSE(queue.Empty());
+ }
+
+ for (int i = 0; i < 5000; ++i) {
+ EXPECT_FALSE(queue.Empty());
+ const Integer& integer = queue.Front();
+ EXPECT_EQ(integer.value(), i);
+ queue.Pop();
+ }
+ // Leave some elements in the queue to test destroying a nonempty queue.
+ EXPECT_FALSE(queue.Empty());
+}
+
+// Copy constructor and assignment are deleted, but move constructor and
+// assignment are OK.
+TEST(UnboundedQueueTest, Move) {
+ UnboundedQueue<int> ints1;
+ ASSERT_TRUE(ints1.Init());
+ EXPECT_TRUE(ints1.GrowIfNeeded());
+ ints1.Push(2);
+ EXPECT_TRUE(ints1.GrowIfNeeded());
+ ints1.Push(3);
+ EXPECT_TRUE(ints1.GrowIfNeeded());
+ ints1.Push(5);
+ EXPECT_TRUE(ints1.GrowIfNeeded());
+ ints1.Push(7);
+
+ // Move constructor.
+ UnboundedQueue<int> ints2(std::move(ints1));
+ EXPECT_EQ(ints2.Front(), 2);
+ ints2.Pop();
+ EXPECT_EQ(ints2.Front(), 3);
+ ints2.Pop();
+ EXPECT_EQ(ints2.Front(), 5);
+ ints2.Pop();
+ EXPECT_EQ(ints2.Front(), 7);
+ ints2.Pop();
+ EXPECT_TRUE(ints2.Empty());
+
+ EXPECT_TRUE(ints2.GrowIfNeeded());
+ ints2.Push(11);
+ EXPECT_TRUE(ints2.GrowIfNeeded());
+ ints2.Push(13);
+ EXPECT_TRUE(ints2.GrowIfNeeded());
+ ints2.Push(17);
+ EXPECT_TRUE(ints2.GrowIfNeeded());
+ ints2.Push(19);
+
+ // Move assignment.
+ UnboundedQueue<int> ints3;
+ ASSERT_TRUE(ints3.Init());
+ EXPECT_TRUE(ints3.GrowIfNeeded());
+ ints3.Push(23);
+ ints3 = std::move(ints2);
+ EXPECT_EQ(ints3.Front(), 11);
+ ints3.Pop();
+ EXPECT_EQ(ints3.Front(), 13);
+ ints3.Pop();
+ EXPECT_EQ(ints3.Front(), 17);
+ ints3.Pop();
+ EXPECT_EQ(ints3.Front(), 19);
+ ints3.Pop();
+ EXPECT_TRUE(ints3.Empty());
+}
+
+} // namespace
+} // namespace libgav1
diff --git a/src/utils/vector.h b/src/utils/vector.h
index e211240..9a21aeb 100644
--- a/src/utils/vector.h
+++ b/src/utils/vector.h
@@ -24,6 +24,7 @@
#include <cstdlib>
#include <cstring>
#include <iterator>
+#include <new>
#include <type_traits>
#include <utility>
diff --git a/src/utils/vector_test.cc b/src/utils/vector_test.cc
new file mode 100644
index 0000000..5b0127c
--- /dev/null
+++ b/src/utils/vector_test.cc
@@ -0,0 +1,234 @@
+// Copyright 2021 The libgav1 Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "src/utils/vector.h"
+
+#include <memory>
+#include <new>
+#include <utility>
+
+#include "gtest/gtest.h"
+#include "src/utils/compiler_attributes.h"
+
+#if LIBGAV1_MSAN
+#include <sanitizer/msan_interface.h>
+#endif
+
+namespace libgav1 {
+namespace {
+
+class Foo {
+ public:
+ Foo() = default;
+
+ int x() const { return x_; }
+
+ private:
+ int x_ = 38;
+};
+
+class Point {
+ public:
+ Point(int x, int y) : x_(x), y_(y) {}
+
+ int x() const { return x_; }
+ int y() const { return y_; }
+
+ private:
+ int x_;
+ int y_;
+};
+
+TEST(VectorTest, NoCtor) {
+ VectorNoCtor<int> v;
+ EXPECT_TRUE(v.resize(100));
+ Vector<int> w;
+ EXPECT_TRUE(w.resize(100));
+
+#if LIBGAV1_MSAN
+ // Use MemorySanitizer to check VectorNoCtor::resize() does not initialize
+ // the memory while Vector::resize() does.
+ //
+ // __msan_test_shadow(const void *x, uptr size) returns the offset of the
+ // first (at least partially) poisoned byte in the range, or -1 if the whole
+ // range is good.
+ for (size_t i = 0; i < 100; ++i) {
+ EXPECT_EQ(__msan_test_shadow(&v[i], sizeof(int)), 0);
+ EXPECT_EQ(__msan_test_shadow(&w[i], sizeof(int)), -1);
+ }
+#endif
+}
+
+TEST(VectorTest, Constructor) {
+ Vector<Foo> v;
+ EXPECT_TRUE(v.resize(100));
+ for (const Foo& foo : v) {
+ EXPECT_EQ(foo.x(), 38);
+ }
+}
+
+TEST(VectorTest, PushBack) {
+ // Create a vector containing integers
+ Vector<int> v;
+ EXPECT_TRUE(v.reserve(8));
+ EXPECT_EQ(v.size(), 0);
+
+ EXPECT_TRUE(v.push_back(25));
+ EXPECT_EQ(v.size(), 1);
+ EXPECT_EQ(v[0], 25);
+
+ EXPECT_TRUE(v.push_back(13));
+ EXPECT_EQ(v.size(), 2);
+ EXPECT_EQ(v[0], 25);
+ EXPECT_EQ(v[1], 13);
+}
+
+TEST(VectorTest, PushBackUnchecked) {
+ Vector<std::unique_ptr<Point>> v;
+ EXPECT_TRUE(v.reserve(2));
+ EXPECT_EQ(v.size(), 0);
+
+ std::unique_ptr<Point> point(new (std::nothrow) Point(1, 2));
+ EXPECT_NE(point, nullptr);
+ v.push_back_unchecked(std::move(point));
+ EXPECT_EQ(v.size(), 1);
+ EXPECT_EQ(v[0]->x(), 1);
+ EXPECT_EQ(v[0]->y(), 2);
+
+ point.reset(new (std::nothrow) Point(3, 4));
+ EXPECT_NE(point, nullptr);
+ v.push_back_unchecked(std::move(point));
+ EXPECT_EQ(v.size(), 2);
+ EXPECT_EQ(v[0]->x(), 1);
+ EXPECT_EQ(v[0]->y(), 2);
+ EXPECT_EQ(v[1]->x(), 3);
+ EXPECT_EQ(v[1]->y(), 4);
+}
+
+TEST(VectorTest, EmplaceBack) {
+ Vector<Point> v;
+ EXPECT_EQ(v.size(), 0);
+
+ EXPECT_TRUE(v.emplace_back(1, 2));
+ EXPECT_EQ(v.size(), 1);
+ EXPECT_EQ(v[0].x(), 1);
+ EXPECT_EQ(v[0].y(), 2);
+
+ EXPECT_TRUE(v.emplace_back(3, 4));
+ EXPECT_EQ(v.size(), 2);
+ EXPECT_EQ(v[0].x(), 1);
+ EXPECT_EQ(v[0].y(), 2);
+ EXPECT_EQ(v[1].x(), 3);
+ EXPECT_EQ(v[1].y(), 4);
+}
+
+// Copy constructor and assignment are deleted, but move constructor and
+// assignment are OK.
+TEST(VectorTest, Move) {
+ Vector<int> ints1;
+ EXPECT_TRUE(ints1.reserve(4));
+ EXPECT_TRUE(ints1.push_back(2));
+ EXPECT_TRUE(ints1.push_back(3));
+ EXPECT_TRUE(ints1.push_back(5));
+ EXPECT_TRUE(ints1.push_back(7));
+
+ // Move constructor.
+ Vector<int> ints2(std::move(ints1));
+ EXPECT_EQ(ints2.size(), 4);
+ EXPECT_EQ(ints2[0], 2);
+ EXPECT_EQ(ints2[1], 3);
+ EXPECT_EQ(ints2[2], 5);
+ EXPECT_EQ(ints2[3], 7);
+
+ // Move assignment.
+ Vector<int> ints3;
+ EXPECT_TRUE(ints3.reserve(1));
+ EXPECT_TRUE(ints3.push_back(11));
+ ints3 = std::move(ints2);
+ EXPECT_EQ(ints3.size(), 4);
+ EXPECT_EQ(ints3[0], 2);
+ EXPECT_EQ(ints3[1], 3);
+ EXPECT_EQ(ints3[2], 5);
+ EXPECT_EQ(ints3[3], 7);
+}
+
+TEST(VectorTest, Erase) {
+ Vector<int> ints;
+ EXPECT_TRUE(ints.reserve(4));
+ EXPECT_TRUE(ints.push_back(2));
+ EXPECT_TRUE(ints.push_back(3));
+ EXPECT_TRUE(ints.push_back(5));
+ EXPECT_TRUE(ints.push_back(7));
+
+ EXPECT_EQ(ints.size(), 4);
+ EXPECT_EQ(ints[0], 2);
+ EXPECT_EQ(ints[1], 3);
+ EXPECT_EQ(ints[2], 5);
+ EXPECT_EQ(ints[3], 7);
+
+ ints.erase(ints.begin());
+ EXPECT_EQ(ints.size(), 3);
+ EXPECT_EQ(ints[0], 3);
+ EXPECT_EQ(ints[1], 5);
+ EXPECT_EQ(ints[2], 7);
+}
+
+TEST(VectorTest, EraseNonTrivial) {
+ // A simple class that sets an int value to 0 in the destructor.
+ class Cleaner {
+ public:
+ explicit Cleaner(int* value) : value_(value) {}
+ ~Cleaner() { *value_ = 0; }
+
+ int value() const { return *value_; }
+
+ private:
+ int* value_;
+ };
+ int value1 = 100;
+ int value2 = 200;
+ Vector<std::unique_ptr<Cleaner>> v;
+ EXPECT_TRUE(v.reserve(2));
+ EXPECT_EQ(v.capacity(), 2);
+
+ std::unique_ptr<Cleaner> c(new (std::nothrow) Cleaner(&value1));
+ EXPECT_NE(c, nullptr);
+ EXPECT_TRUE(v.push_back(std::move(c)));
+ c.reset(new (std::nothrow) Cleaner(&value2));
+ EXPECT_NE(c, nullptr);
+ EXPECT_TRUE(v.push_back(std::move(c)));
+ EXPECT_EQ(v.size(), 2);
+ EXPECT_EQ(value1, 100);
+ EXPECT_EQ(value2, 200);
+
+ v.erase(v.begin());
+ EXPECT_EQ(v.size(), 1);
+ EXPECT_EQ(v.capacity(), 2);
+ EXPECT_EQ(value1, 0);
+ EXPECT_EQ(value2, 200);
+ EXPECT_EQ(v[0].get()->value(), value2);
+
+ EXPECT_TRUE(v.shrink_to_fit());
+ EXPECT_EQ(v.size(), 1);
+ EXPECT_EQ(v.capacity(), 1);
+ EXPECT_EQ(value2, 200);
+ EXPECT_EQ(v[0].get()->value(), value2);
+
+ v.clear();
+ EXPECT_TRUE(v.empty());
+ EXPECT_EQ(value2, 0);
+}
+
+} // namespace
+} // namespace libgav1
diff --git a/src/version_test.cc b/src/version_test.cc
new file mode 100644
index 0000000..aaa5e1c
--- /dev/null
+++ b/src/version_test.cc
@@ -0,0 +1,66 @@
+// Copyright 2021 The libgav1 Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "src/gav1/version.h"
+
+#include <regex> // NOLINT (unapproved c++11 header)
+
+#include "gtest/gtest.h"
+
+namespace libgav1 {
+namespace {
+
+TEST(VersionTest, GetVersion) {
+ const int library_version = GetVersion();
+ EXPECT_EQ((library_version >> 24) & 0xff, 0);
+ // Note if we link against a shared object there's potential for a mismatch
+ // if a different library is loaded at runtime.
+ EXPECT_EQ((library_version >> 16) & 0xff, LIBGAV1_MAJOR_VERSION);
+ EXPECT_EQ((library_version >> 8) & 0xff, LIBGAV1_MINOR_VERSION);
+ EXPECT_EQ(library_version & 0xff, LIBGAV1_PATCH_VERSION);
+
+ const int header_version = LIBGAV1_VERSION;
+ EXPECT_EQ((header_version >> 24) & 0xff, 0);
+ EXPECT_EQ((header_version >> 16) & 0xff, LIBGAV1_MAJOR_VERSION);
+ EXPECT_EQ((header_version >> 8) & 0xff, LIBGAV1_MINOR_VERSION);
+ EXPECT_EQ(header_version & 0xff, LIBGAV1_PATCH_VERSION);
+}
+
+TEST(VersionTest, GetVersionString) {
+ const char* version = GetVersionString();
+ ASSERT_NE(version, nullptr);
+ // https://semver.org/#is-there-a-suggested-regular-expression-regex-to-check-a-semver-string
+ const std::regex semver_regex(
+ R"(^(0|[1-9]\d*)\.(0|[1-9]\d*)\.(0|[1-9]\d*))"
+ R"((?:-((?:0|[1-9]\d*|\d*[a-zA-Z-][0-9a-zA-Z-]*))"
+ R"((?:\.(?:0|[1-9]\d*|\d*[a-zA-Z-][0-9a-zA-Z-]*))*))?)"
+ R"((?:\+([0-9a-zA-Z-]+(?:\.[0-9a-zA-Z-]+)*))?$)");
+
+ EXPECT_TRUE(std::regex_match(version, semver_regex)) << version;
+ // Regex validation:
+ // It shouldn't accept a version starting with a non-digit.
+ version = "v1.2.3";
+ EXPECT_FALSE(std::regex_match(version, semver_regex)) << version;
+ // It shouldn't accept a version with spaces."
+ version = "1.2.3 alpha";
+ EXPECT_FALSE(std::regex_match(version, semver_regex)) << version;
+}
+
+TEST(VersionTest, GetBuildConfiguration) {
+ const char* config = GetBuildConfiguration();
+ ASSERT_NE(config, nullptr);
+}
+
+} // namespace
+} // namespace libgav1
diff --git a/src/warp_prediction.cc b/src/warp_prediction.cc
index dd06317..69b40e8 100644
--- a/src/warp_prediction.cc
+++ b/src/warp_prediction.cc
@@ -153,10 +153,8 @@ bool WarpEstimation(const int num_samples, const int block_width4x4,
const int mid_x = MultiplyBy4(column4x4) + MultiplyBy2(block_width4x4) - 1;
const int subpixel_mid_y = MultiplyBy8(mid_y);
const int subpixel_mid_x = MultiplyBy8(mid_x);
- const int reference_subpixel_mid_y =
- subpixel_mid_y + mv.mv[MotionVector::kRow];
- const int reference_subpixel_mid_x =
- subpixel_mid_x + mv.mv[MotionVector::kColumn];
+ const int reference_subpixel_mid_y = subpixel_mid_y + mv.mv[0];
+ const int reference_subpixel_mid_x = subpixel_mid_x + mv.mv[1];
for (int i = 0; i < num_samples; ++i) {
// candidates[][0] and candidates[][1] are the row/column coordinates of the
@@ -223,14 +221,12 @@ bool WarpEstimation(const int num_samples, const int block_width4x4,
params[4] = NonDiagonalClamp(params[4]);
params[5] = DiagonalClamp(params[5]);
- const int vx =
- mv.mv[MotionVector::kColumn] * (1 << (kWarpedModelPrecisionBits - 3)) -
- (mid_x * (params[2] - (1 << kWarpedModelPrecisionBits)) +
- mid_y * params[3]);
- const int vy =
- mv.mv[MotionVector::kRow] * (1 << (kWarpedModelPrecisionBits - 3)) -
- (mid_x * params[4] +
- mid_y * (params[5] - (1 << kWarpedModelPrecisionBits)));
+ const int vx = mv.mv[1] * (1 << (kWarpedModelPrecisionBits - 3)) -
+ (mid_x * (params[2] - (1 << kWarpedModelPrecisionBits)) +
+ mid_y * params[3]);
+ const int vy = mv.mv[0] * (1 << (kWarpedModelPrecisionBits - 3)) -
+ (mid_x * params[4] +
+ mid_y * (params[5] - (1 << kWarpedModelPrecisionBits)));
params[0] =
Clip3(vx, -kWarpModelTranslationClamp, kWarpModelTranslationClamp - 1);
params[1] =
diff --git a/src/warp_prediction_test.cc b/src/warp_prediction_test.cc
new file mode 100644
index 0000000..46f262f
--- /dev/null
+++ b/src/warp_prediction_test.cc
@@ -0,0 +1,246 @@
+// Copyright 2021 The libgav1 Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "src/warp_prediction.h"
+
+#include <cstddef>
+#include <cstdint>
+#include <ostream>
+
+#include "absl/base/macros.h"
+#include "gtest/gtest.h"
+#include "src/obu_parser.h"
+#include "src/utils/common.h"
+#include "src/utils/constants.h"
+#include "src/utils/types.h"
+#include "tests/third_party/libvpx/acm_random.h"
+
+namespace libgav1 {
+namespace {
+
+constexpr int16_t kExpectedWarpParamsOutput[10][4] = {
+ {0, 0, 0, 0},
+ {2880, 2880, 2752, 2752},
+ {-1408, -1408, -1472, -1472},
+ {0, 0, 0, 0},
+ {6784, 6784, 6144, 6144}, // Invalid.
+ {-5312, -5312, -5824, -5824},
+ {-3904, -3904, -4160, -4160},
+ {2496, 2496, 2368, 2368},
+ {1024, 1024, 1024, 1024},
+ {-7808, -7808, -8832, -8832}, // Invalid.
+};
+
+constexpr bool kExpectedWarpValid[10] = {
+ true, true, true, true, false, true, true, true, true, false,
+};
+
+int RandomWarpedParam(int seed_offset, int bits) {
+ libvpx_test::ACMRandom rnd(seed_offset +
+ libvpx_test::ACMRandom::DeterministicSeed());
+ // 1 in 8 chance of generating zero (arbitrary).
+ const bool zero = (rnd.Rand16() & 7) == 0;
+ if (zero) return 0;
+ // Generate uniform values in the range [-(1 << bits), 1] U [1, 1 << bits].
+ const int mask = (1 << bits) - 1;
+ const int value = 1 + (rnd.RandRange(1U << 31) & mask);
+ const bool sign = (rnd.Rand16() & 1) != 0;
+ return sign ? value : -value;
+}
+
+void GenerateWarpedModel(GlobalMotion* warp_params, int seed) {
+ do {
+ warp_params->params[0] =
+ RandomWarpedParam(seed, kWarpedModelPrecisionBits + 6);
+ warp_params->params[1] =
+ RandomWarpedParam(seed, kWarpedModelPrecisionBits + 6);
+ warp_params->params[2] =
+ RandomWarpedParam(seed, kWarpedModelPrecisionBits - 3) +
+ (1 << kWarpedModelPrecisionBits);
+ warp_params->params[3] =
+ RandomWarpedParam(seed, kWarpedModelPrecisionBits - 3);
+ warp_params->params[4] =
+ RandomWarpedParam(seed, kWarpedModelPrecisionBits - 3);
+ warp_params->params[5] =
+ RandomWarpedParam(seed, kWarpedModelPrecisionBits - 3) +
+ (1 << kWarpedModelPrecisionBits);
+ } while (warp_params->params[2] == 0);
+}
+
+TEST(WarpPredictionTest, SetupShear) {
+ for (size_t i = 0; i < ABSL_ARRAYSIZE(kExpectedWarpParamsOutput); ++i) {
+ GlobalMotion warp_params;
+ GenerateWarpedModel(&warp_params, static_cast<int>(i));
+ const bool warp_valid = SetupShear(&warp_params);
+
+ SCOPED_TRACE(testing::Message() << "Test failure at iteration: " << i);
+ EXPECT_EQ(warp_valid, kExpectedWarpValid[i]);
+ EXPECT_EQ(warp_params.alpha, kExpectedWarpParamsOutput[i][0]);
+ EXPECT_EQ(warp_params.beta, kExpectedWarpParamsOutput[i][1]);
+ EXPECT_EQ(warp_params.gamma, kExpectedWarpParamsOutput[i][2]);
+ EXPECT_EQ(warp_params.delta, kExpectedWarpParamsOutput[i][3]);
+ }
+
+ // Test signed shift behavior in delta and gamma generation.
+ GlobalMotion warp_params;
+ warp_params.params[0] = 24748;
+ warp_params.params[1] = -142530;
+ warp_params.params[2] = 65516;
+ warp_params.params[3] = -640;
+ warp_params.params[4] = 256;
+ warp_params.params[5] = 65310;
+ EXPECT_TRUE(SetupShear(&warp_params));
+ EXPECT_EQ(warp_params.alpha, 0);
+ EXPECT_EQ(warp_params.beta, -640);
+ EXPECT_EQ(warp_params.gamma, 256);
+ EXPECT_EQ(warp_params.delta, -192);
+
+ warp_params.params[0] = 24748;
+ warp_params.params[1] = -142530;
+ warp_params.params[2] = 61760;
+ warp_params.params[3] = -640;
+ warp_params.params[4] = -13312;
+ warp_params.params[5] = 65310;
+ EXPECT_TRUE(SetupShear(&warp_params));
+ EXPECT_EQ(warp_params.alpha, -3776);
+ EXPECT_EQ(warp_params.beta, -640);
+ EXPECT_EQ(warp_params.gamma, -14144);
+ EXPECT_EQ(warp_params.delta, -384);
+}
+
+struct WarpInputParam {
+ WarpInputParam(int num_samples, int block_width4x4, int block_height4x4)
+ : num_samples(num_samples),
+ block_width4x4(block_width4x4),
+ block_height4x4(block_height4x4) {}
+ int num_samples;
+ int block_width4x4;
+ int block_height4x4;
+};
+
+std::ostream& operator<<(std::ostream& os, const WarpInputParam& param) {
+ return os << "num_samples: " << param.num_samples
+ << ", block_(width/height)4x4: " << param.block_width4x4 << "x"
+ << param.block_height4x4;
+}
+
+const WarpInputParam warp_test_param[] = {
+ // sample = 1.
+ WarpInputParam(1, 1, 1),
+ WarpInputParam(1, 1, 2),
+ WarpInputParam(1, 2, 1),
+ WarpInputParam(1, 2, 2),
+ WarpInputParam(1, 2, 4),
+ WarpInputParam(1, 4, 2),
+ WarpInputParam(1, 4, 4),
+ WarpInputParam(1, 4, 8),
+ WarpInputParam(1, 8, 4),
+ WarpInputParam(1, 8, 8),
+ WarpInputParam(1, 8, 16),
+ WarpInputParam(1, 16, 8),
+ WarpInputParam(1, 16, 16),
+ WarpInputParam(1, 16, 32),
+ WarpInputParam(1, 32, 16),
+ WarpInputParam(1, 32, 32),
+ // sample = 8.
+ WarpInputParam(8, 1, 1),
+ WarpInputParam(8, 1, 2),
+ WarpInputParam(8, 2, 1),
+ WarpInputParam(8, 2, 2),
+ WarpInputParam(8, 2, 4),
+ WarpInputParam(8, 4, 2),
+ WarpInputParam(8, 4, 4),
+ WarpInputParam(8, 4, 8),
+ WarpInputParam(8, 8, 4),
+ WarpInputParam(8, 8, 8),
+ WarpInputParam(8, 8, 16),
+ WarpInputParam(8, 16, 8),
+ WarpInputParam(8, 16, 16),
+ WarpInputParam(8, 16, 32),
+ WarpInputParam(8, 32, 16),
+ WarpInputParam(8, 32, 32),
+};
+
+constexpr bool kExpectedWarpEstimationValid[2] = {false, true};
+
+constexpr int kExpectedWarpEstimationOutput[16][6] = {
+ {8388607, 8388607, 57345, -8191, -8191, 57345},
+ {8388607, 8388607, 57345, -8191, -8191, 57345},
+ {8388607, 8388607, 57345, -8191, -8191, 57345},
+ {8388607, 8388607, 57345, -8191, -8191, 57345},
+ {8388607, 8388607, 57345, -8191, -8191, 57345},
+ {8388607, 8388607, 57345, -8191, -8191, 57345},
+ {8388607, 8388607, 57345, -8191, -8191, 57345},
+ {8388607, 8388607, 57345, -8191, -8191, 57345},
+ {8388607, 8388607, 57345, -8191, -8191, 57345},
+ {8388607, 8388607, 57345, -8191, -8191, 57345},
+ {2146296, 1589240, 57345, 8191, -8191, 73727},
+ {1753128, 1196072, 73727, -8191, 8191, 57345},
+ {-8388608, -8388608, 73727, 8191, 8191, 73727},
+ {-4435485, -8388608, 65260, 8191, 8191, 73727},
+ {-8388608, -7552929, 73727, 8191, 8191, 68240},
+ {-8388608, -8388608, 73727, 8191, 8191, 70800},
+};
+
+class WarpEstimationTest : public testing::TestWithParam<WarpInputParam> {
+ public:
+ WarpEstimationTest() = default;
+ ~WarpEstimationTest() override = default;
+
+ protected:
+ WarpInputParam param_ = GetParam();
+};
+
+TEST_P(WarpEstimationTest, WarpEstimation) {
+ // Set input params.
+ libvpx_test::ACMRandom rnd(libvpx_test::ACMRandom::DeterministicSeed());
+ const int row4x4 = rnd.Rand8();
+ const int column4x4 = rnd.Rand8();
+ MotionVector mv;
+ mv.mv[0] = rnd.Rand8();
+ mv.mv[1] = rnd.Rand8();
+ int candidates[kMaxLeastSquaresSamples][4];
+ for (int i = 0; i < param_.num_samples; ++i) {
+ // Make candidates relative to the top left of frame.
+ candidates[i][0] = rnd.Rand8() + MultiplyBy32(row4x4);
+ candidates[i][1] = rnd.Rand8() + MultiplyBy32(column4x4);
+ candidates[i][2] = rnd.Rand8() + MultiplyBy32(row4x4);
+ candidates[i][3] = rnd.Rand8() + MultiplyBy32(column4x4);
+ }
+
+ // Get output.
+ GlobalMotion warp_params;
+ const bool warp_success = WarpEstimation(
+ param_.num_samples, param_.block_width4x4, param_.block_height4x4, row4x4,
+ column4x4, mv, candidates, &warp_params);
+ if (param_.num_samples == 1) {
+ EXPECT_EQ(warp_success, kExpectedWarpEstimationValid[0]);
+ } else {
+ EXPECT_EQ(warp_success, kExpectedWarpEstimationValid[1]);
+ int index = FloorLog2(param_.block_width4x4) * 3 - 1;
+ if (param_.block_width4x4 == param_.block_height4x4) {
+ index += 1;
+ } else if (param_.block_width4x4 < param_.block_height4x4) {
+ index += 2;
+ }
+ for (size_t i = 0; i < ABSL_ARRAYSIZE(warp_params.params); ++i) {
+ EXPECT_EQ(warp_params.params[i], kExpectedWarpEstimationOutput[index][i]);
+ }
+ }
+}
+
+INSTANTIATE_TEST_SUITE_P(WarpFuncTest, WarpEstimationTest,
+ testing::ValuesIn(warp_test_param));
+} // namespace
+} // namespace libgav1
diff --git a/src/yuv_buffer.cc b/src/yuv_buffer.cc
index c74e140..efb8016 100644
--- a/src/yuv_buffer.cc
+++ b/src/yuv_buffer.cc
@@ -20,6 +20,7 @@
#include "src/frame_buffer_utils.h"
#include "src/utils/common.h"
+#include "src/utils/compiler_attributes.h"
#include "src/utils/logging.h"
namespace libgav1 {
@@ -195,6 +196,60 @@ bool YuvBuffer::Realloc(int bitdepth, bool is_monochrome, int width, int height,
assert(!is_monochrome || buffer_[kPlaneU] == nullptr);
assert(!is_monochrome || buffer_[kPlaneV] == nullptr);
+#if LIBGAV1_MSAN
+ const int pixel_size = (bitdepth == 8) ? sizeof(uint8_t) : sizeof(uint16_t);
+ int width_in_bytes = width * pixel_size;
+ // The optimized loop restoration code will overread the visible frame buffer
+ // into the right border. The optimized cfl subsambler uses the right border
+ // as well. Initialize the right border and padding to prevent msan warnings.
+ int right_border_size_in_bytes = right_border * pixel_size;
+ // Calculate the padding bytes for the buffer. Note: The stride of the buffer
+ // is always a multiple of 16. (see yuv_buffer.h)
+ const int right_padding_in_bytes =
+ stride_[kPlaneY] - (pixel_size * (width + left_border + right_border));
+ const int padded_right_border_size =
+ right_border_size_in_bytes + right_padding_in_bytes;
+ constexpr uint8_t right_val = 0x55;
+ uint8_t* rb = buffer_[kPlaneY] + width_in_bytes;
+ for (int i = 0; i < height + bottom_border; ++i) {
+ memset(rb, right_val, padded_right_border_size);
+ rb += stride_[kPlaneY];
+ }
+ if (!is_monochrome) {
+ int uv_width_in_bytes = uv_width * pixel_size;
+ int uv_right_border_size_in_bytes = uv_right_border * pixel_size;
+ const int u_right_padding_in_bytes =
+ stride_[kPlaneU] -
+ (pixel_size * (uv_width + uv_left_border + uv_right_border));
+ const int u_padded_right_border_size =
+ uv_right_border_size_in_bytes + u_right_padding_in_bytes;
+ rb = buffer_[kPlaneU] + uv_width_in_bytes;
+ for (int i = 0; i < uv_height; ++i) {
+ memset(rb, right_val, u_padded_right_border_size);
+ rb += stride_[kPlaneU];
+ }
+ const int v_right_padding_in_bytes =
+ stride_[kPlaneV] -
+ ((uv_width + uv_left_border + uv_right_border) * pixel_size);
+ const int v_padded_right_border_size =
+ uv_right_border_size_in_bytes + v_right_padding_in_bytes;
+ rb = buffer_[kPlaneV] + uv_width_in_bytes;
+ for (int i = 0; i < uv_height; ++i) {
+ memset(rb, right_val, v_padded_right_border_size);
+ rb += stride_[kPlaneV];
+ }
+ }
+
+ // The optimized cfl subsampler will overread (to the right of the current
+ // block) into the uninitialized visible area. The cfl subsampler can overread
+ // into the bottom border as well. Initialize the both to quiet msan warnings.
+ uint8_t* y_visible = buffer_[kPlaneY];
+ for (int i = 0; i < height + bottom_border; ++i) {
+ memset(y_visible, right_val, width_in_bytes);
+ y_visible += stride_[kPlaneY];
+ }
+#endif
+
return true;
}
diff --git a/tests/block_utils.cc b/tests/block_utils.cc
index 96833a2..07337c4 100644
--- a/tests/block_utils.cc
+++ b/tests/block_utils.cc
@@ -23,6 +23,7 @@ namespace libgav1 {
namespace test_utils {
namespace {
+#define LIBGAV1_DEBUG_FORMAT_CODE "x"
template <typename Pixel>
void PrintBlockDiff(const Pixel* block1, const Pixel* block2, int width,
int height, int stride1, int stride2,
@@ -35,15 +36,17 @@ void PrintBlockDiff(const Pixel* block1, const Pixel* block2, int width,
for (int x = 0; x < print_width; ++x) {
if (x >= width) {
if (block1[x] == block2[x]) {
- printf("[%*d] ", field_width, block1[x]);
+ printf("[%*" LIBGAV1_DEBUG_FORMAT_CODE "] ", field_width, block1[x]);
} else {
- printf("[*%*d] ", field_width - 1, block1[x]);
+ printf("[*%*" LIBGAV1_DEBUG_FORMAT_CODE "] ", field_width - 1,
+ block1[x]);
}
} else {
if (block1[x] == block2[x]) {
- printf("%*d ", field_width, block1[x]);
+ printf("%*" LIBGAV1_DEBUG_FORMAT_CODE " ", field_width, block1[x]);
} else {
- printf("*%*d ", field_width - 1, block1[x]);
+ printf("*%*" LIBGAV1_DEBUG_FORMAT_CODE " ", field_width - 1,
+ block1[x]);
}
}
}
@@ -52,6 +55,7 @@ void PrintBlockDiff(const Pixel* block1, const Pixel* block2, int width,
block2 += stride2;
}
}
+#undef LIBGAV1_DEBUG_FORMAT_CODE
} // namespace
diff --git a/tests/data/five-frames.ivf b/tests/data/five-frames.ivf
new file mode 100644
index 0000000..08bc6db
--- /dev/null
+++ b/tests/data/five-frames.ivf
Binary files differ
diff --git a/tests/data/ivf-header-and-truncated-frame-header b/tests/data/ivf-header-and-truncated-frame-header
new file mode 100644
index 0000000..c6d7a6a
--- /dev/null
+++ b/tests/data/ivf-header-and-truncated-frame-header
Binary files differ
diff --git a/tests/data/ivf-header-only b/tests/data/ivf-header-only
new file mode 100644
index 0000000..e751f36
--- /dev/null
+++ b/tests/data/ivf-header-only
Binary files differ
diff --git a/tests/data/ivf-signature-only b/tests/data/ivf-signature-only
new file mode 100644
index 0000000..8550ef8
--- /dev/null
+++ b/tests/data/ivf-signature-only
@@ -0,0 +1 @@
+DKIF
diff --git a/tests/data/one-frame-large-timestamp.ivf b/tests/data/one-frame-large-timestamp.ivf
new file mode 100644
index 0000000..44886da
--- /dev/null
+++ b/tests/data/one-frame-large-timestamp.ivf
Binary files differ
diff --git a/tests/data/one-frame-truncated.ivf b/tests/data/one-frame-truncated.ivf
new file mode 100644
index 0000000..94e5b09
--- /dev/null
+++ b/tests/data/one-frame-truncated.ivf
Binary files differ
diff --git a/tests/data/one-frame.ivf b/tests/data/one-frame.ivf
new file mode 100644
index 0000000..436e461
--- /dev/null
+++ b/tests/data/one-frame.ivf
Binary files differ
diff --git a/tests/libgav1_tests.cmake b/tests/libgav1_tests.cmake
index ac2fb2e..2b3f41c 100644
--- a/tests/libgav1_tests.cmake
+++ b/tests/libgav1_tests.cmake
@@ -69,14 +69,56 @@ list(APPEND libgav1_tests_utils_sources
list(APPEND libgav1_tests_utils_test_sources
"${libgav1_root}/tests/utils_test.cc")
+list(APPEND libgav1_array_2d_test_sources
+ "${libgav1_source}/utils/array_2d_test.cc")
list(APPEND libgav1_average_blend_test_sources
"${libgav1_source}/dsp/average_blend_test.cc")
+list(APPEND libgav1_block_parameters_holder_test_sources
+ "${libgav1_source}/utils/block_parameters_holder_test.cc")
+list(APPEND libgav1_blocking_counter_test_sources
+ "${libgav1_source}/utils/blocking_counter_test.cc")
+list(APPEND libgav1_buffer_pool_test_sources
+ "${libgav1_source}/buffer_pool_test.cc")
list(APPEND libgav1_cdef_test_sources "${libgav1_source}/dsp/cdef_test.cc")
+list(
+ APPEND libgav1_common_test_sources "${libgav1_source}/utils/common_test.cc")
+list(APPEND libgav1_common_avx2_test_sources
+ "${libgav1_source}/dsp/x86/common_avx2.h"
+ "${libgav1_source}/dsp/x86/common_avx2.inc"
+ "${libgav1_source}/dsp/x86/common_avx2_test.cc"
+ "${libgav1_source}/dsp/x86/common_sse4.inc")
+list(APPEND libgav1_common_neon_test_sources
+ "${libgav1_source}/dsp/arm/common_neon_test.cc")
+list(APPEND libgav1_common_sse4_test_sources
+ "${libgav1_source}/dsp/x86/common_sse4.h"
+ "${libgav1_source}/dsp/x86/common_sse4.inc"
+ "${libgav1_source}/dsp/x86/common_sse4_test.cc")
list(APPEND libgav1_convolve_test_sources
"${libgav1_source}/dsp/convolve_test.cc")
+list(APPEND libgav1_cpu_test_sources "${libgav1_source}/utils/cpu_test.cc")
+list(APPEND libgav1_c_decoder_test_sources "${libgav1_source}/c_decoder_test.c")
+list(APPEND libgav1_c_version_test_sources "${libgav1_source}/c_version_test.c")
+list(APPEND libgav1_decoder_test_sources "${libgav1_source}/decoder_test.cc")
+list(APPEND libgav1_decoder_buffer_test_sources
+ "${libgav1_source}/decoder_buffer_test.cc")
list(APPEND libgav1_distance_weighted_blend_test_sources
"${libgav1_source}/dsp/distance_weighted_blend_test.cc")
list(APPEND libgav1_dsp_test_sources "${libgav1_source}/dsp/dsp_test.cc")
+list(APPEND libgav1_entropy_decoder_test_sources
+ "${libgav1_source}/utils/entropy_decoder_test.cc"
+ "${libgav1_source}/utils/entropy_decoder_test_data.inc")
+list(APPEND libgav1_file_reader_test_sources
+ "${libgav1_examples}/file_reader_test.cc"
+ "${libgav1_examples}/file_reader_test_common.cc"
+ "${libgav1_examples}/file_reader_test_common.h")
+list(APPEND libgav1_film_grain_test_sources
+ "${libgav1_source}/film_grain_test.cc")
+list(APPEND libgav1_file_reader_factory_test_sources
+ "${libgav1_examples}/file_reader_factory_test.cc")
+list(APPEND libgav1_file_writer_test_sources
+ "${libgav1_examples}/file_writer_test.cc")
+list(APPEND libgav1_internal_frame_buffer_list_test_sources
+ "${libgav1_source}/internal_frame_buffer_list_test.cc")
list(APPEND libgav1_intra_edge_test_sources
"${libgav1_source}/dsp/intra_edge_test.cc")
list(APPEND libgav1_intrapred_cfl_test_sources
@@ -103,8 +145,44 @@ list(APPEND libgav1_super_res_test_sources
"${libgav1_source}/dsp/super_res_test.cc")
list(APPEND libgav1_weight_mask_test_sources
"${libgav1_source}/dsp/weight_mask_test.cc")
+list(
+ APPEND libgav1_memory_test_sources "${libgav1_source}/utils/memory_test.cc")
list(APPEND libgav1_obmc_test_sources "${libgav1_source}/dsp/obmc_test.cc")
+list(APPEND libgav1_obu_parser_test_sources
+ "${libgav1_source}/obu_parser_test.cc")
+list(APPEND libgav1_post_filter_test_sources
+ "${libgav1_source}/post_filter_test.cc")
+list(APPEND libgav1_prediction_mask_test_sources
+ "${libgav1_source}/prediction_mask_test.cc")
+list(
+ APPEND libgav1_quantizer_test_sources "${libgav1_source}/quantizer_test.cc")
+list(APPEND libgav1_queue_test_sources "${libgav1_source}/utils/queue_test.cc")
+list(APPEND libgav1_raw_bit_reader_test_sources
+ "${libgav1_source}/utils/raw_bit_reader_test.cc")
+list(APPEND libgav1_reconstruction_test_sources
+ "${libgav1_source}/reconstruction_test.cc")
+list(APPEND libgav1_residual_buffer_pool_test_sources
+ "${libgav1_source}/residual_buffer_pool_test.cc")
+list(APPEND libgav1_scan_test_sources "${libgav1_source}/scan_test.cc")
+list(APPEND libgav1_segmentation_map_test_sources
+ "${libgav1_source}/utils/segmentation_map_test.cc")
+list(APPEND libgav1_segmentation_test_sources
+ "${libgav1_source}/utils/segmentation_test.cc")
+list(APPEND libgav1_stack_test_sources "${libgav1_source}/utils/stack_test.cc")
+list(APPEND libgav1_symbol_decoder_context_test_sources
+ "${libgav1_source}/symbol_decoder_context_test.cc")
+list(APPEND libgav1_threadpool_test_sources
+ "${libgav1_source}/utils/threadpool_test.cc")
+list(APPEND libgav1_threading_strategy_test_sources
+ "${libgav1_source}/threading_strategy_test.cc")
+list(APPEND libgav1_unbounded_queue_test_sources
+ "${libgav1_source}/utils/unbounded_queue_test.cc")
+list(
+ APPEND libgav1_vector_test_sources "${libgav1_source}/utils/vector_test.cc")
+list(APPEND libgav1_version_test_sources "${libgav1_source}/version_test.cc")
list(APPEND libgav1_warp_test_sources "${libgav1_source}/dsp/warp_test.cc")
+list(APPEND libgav1_warp_prediction_test_sources
+ "${libgav1_source}/warp_prediction_test.cc")
macro(libgav1_add_tests_targets)
if(NOT LIBGAV1_ENABLE_TESTS)
@@ -157,6 +235,343 @@ macro(libgav1_add_tests_targets)
libgav1_add_executable(TEST
NAME
+ array_2d_test
+ SOURCES
+ ${libgav1_array_2d_test_sources}
+ DEFINES
+ ${libgav1_defines}
+ INCLUDES
+ ${libgav1_test_include_paths}
+ OBJLIB_DEPS
+ libgav1_utils
+ LIB_DEPS
+ ${libgav1_common_test_absl_deps}
+ libgav1_gtest
+ libgav1_gtest_main)
+
+ libgav1_add_executable(TEST
+ NAME
+ block_parameters_holder_test
+ SOURCES
+ ${libgav1_block_parameters_holder_test_sources}
+ DEFINES
+ ${libgav1_defines}
+ INCLUDES
+ ${libgav1_test_include_paths}
+ OBJLIB_DEPS
+ libgav1_utils
+ LIB_DEPS
+ ${libgav1_common_test_absl_deps}
+ libgav1_gtest
+ libgav1_gtest_main)
+
+ libgav1_add_executable(TEST
+ NAME
+ blocking_counter_test
+ SOURCES
+ ${libgav1_blocking_counter_test_sources}
+ DEFINES
+ ${libgav1_defines}
+ INCLUDES
+ ${libgav1_test_include_paths}
+ OBJLIB_DEPS
+ libgav1_utils
+ LIB_DEPS
+ absl::time
+ ${libgav1_common_test_absl_deps}
+ libgav1_gtest
+ libgav1_gtest_main)
+
+ if(libgav1_have_avx2)
+ libgav1_add_executable(TEST
+ NAME
+ common_avx2_test
+ SOURCES
+ ${libgav1_common_avx2_test_sources}
+ DEFINES
+ ${libgav1_defines}
+ INCLUDES
+ ${libgav1_test_include_paths}
+ LIB_DEPS
+ ${libgav1_common_test_absl_deps}
+ libgav1_gtest
+ libgav1_gtest_main)
+ endif()
+
+ if(libgav1_have_neon)
+ libgav1_add_executable(TEST
+ NAME
+ common_neon_test
+ SOURCES
+ ${libgav1_common_neon_test_sources}
+ DEFINES
+ ${libgav1_defines}
+ INCLUDES
+ ${libgav1_test_include_paths}
+ OBJLIB_DEPS
+ libgav1_tests_block_utils
+ LIB_DEPS
+ ${libgav1_common_test_absl_deps}
+ libgav1_gtest
+ libgav1_gtest_main)
+ endif()
+
+ if(libgav1_have_sse4)
+ libgav1_add_executable(TEST
+ NAME
+ common_sse4_test
+ SOURCES
+ ${libgav1_common_sse4_test_sources}
+ DEFINES
+ ${libgav1_defines}
+ INCLUDES
+ ${libgav1_test_include_paths}
+ LIB_DEPS
+ ${libgav1_common_test_absl_deps}
+ libgav1_gtest
+ libgav1_gtest_main)
+ endif()
+
+ libgav1_add_executable(TEST
+ NAME
+ common_test
+ SOURCES
+ ${libgav1_common_test_sources}
+ DEFINES
+ ${libgav1_defines}
+ INCLUDES
+ ${libgav1_test_include_paths}
+ OBJLIB_DEPS
+ libgav1_utils
+ LIB_DEPS
+ ${libgav1_common_test_absl_deps}
+ libgav1_gtest
+ libgav1_gtest_main)
+
+ libgav1_add_executable(TEST
+ NAME
+ cpu_test
+ SOURCES
+ ${libgav1_cpu_test_sources}
+ DEFINES
+ ${libgav1_defines}
+ INCLUDES
+ ${libgav1_test_include_paths}
+ OBJLIB_DEPS
+ libgav1_utils
+ LIB_DEPS
+ ${libgav1_common_test_absl_deps}
+ libgav1_gtest
+ libgav1_gtest_main)
+
+ libgav1_add_executable(TEST
+ NAME
+ entropy_decoder_test
+ SOURCES
+ ${libgav1_entropy_decoder_test_sources}
+ DEFINES
+ ${libgav1_defines}
+ INCLUDES
+ ${libgav1_test_include_paths}
+ OBJLIB_DEPS
+ libgav1_utils
+ LIB_DEPS
+ absl::time
+ ${libgav1_common_test_absl_deps}
+ libgav1_gtest
+ libgav1_gtest_main)
+
+ libgav1_add_executable(TEST
+ NAME
+ file_reader_test
+ SOURCES
+ ${libgav1_file_reader_test_sources}
+ DEFINES
+ ${libgav1_defines}
+ INCLUDES
+ ${libgav1_test_include_paths}
+ OBJLIB_DEPS
+ libgav1_dsp
+ libgav1_file_reader
+ libgav1_utils
+ libgav1_tests_utils
+ LIB_DEPS
+ absl::strings
+ absl::time
+ ${libgav1_common_test_absl_deps}
+ libgav1_gtest
+ libgav1_gtest_main)
+
+ libgav1_add_executable(TEST
+ NAME
+ file_reader_factory_test
+ SOURCES
+ ${libgav1_file_reader_factory_test_sources}
+ DEFINES
+ ${libgav1_defines}
+ INCLUDES
+ ${libgav1_test_include_paths}
+ OBJLIB_DEPS
+ libgav1_file_reader
+ libgav1_utils
+ LIB_DEPS
+ absl::memory
+ ${libgav1_common_test_absl_deps}
+ libgav1_gtest
+ libgav1_gtest_main)
+
+ libgav1_add_executable(TEST
+ NAME
+ film_grain_test
+ SOURCES
+ ${libgav1_film_grain_test_sources}
+ DEFINES
+ ${libgav1_defines}
+ INCLUDES
+ ${libgav1_test_include_paths}
+ OBJLIB_DEPS
+ libgav1_decoder
+ libgav1_dsp
+ libgav1_tests_block_utils
+ libgav1_tests_utils
+ libgav1_utils
+ LIB_DEPS
+ absl::str_format_internal
+ absl::time
+ ${libgav1_common_test_absl_deps}
+ libgav1_gtest
+ libgav1_gtest_main)
+
+ libgav1_add_executable(TEST
+ NAME
+ memory_test
+ SOURCES
+ ${libgav1_memory_test_sources}
+ DEFINES
+ ${libgav1_defines}
+ INCLUDES
+ ${libgav1_test_include_paths}
+ LIB_DEPS
+ absl::base
+ libgav1_gtest
+ libgav1_gtest_main)
+
+ libgav1_add_executable(TEST
+ NAME
+ queue_test
+ SOURCES
+ ${libgav1_queue_test_sources}
+ DEFINES
+ ${libgav1_defines}
+ INCLUDES
+ ${libgav1_test_include_paths}
+ OBJLIB_DEPS
+ libgav1_utils
+ LIB_DEPS
+ ${libgav1_common_test_absl_deps}
+ libgav1_gtest
+ libgav1_gtest_main)
+
+ libgav1_add_executable(TEST
+ NAME
+ segmentation_map_test
+ SOURCES
+ ${libgav1_segmentation_map_test_sources}
+ DEFINES
+ ${libgav1_defines}
+ INCLUDES
+ ${libgav1_test_include_paths}
+ OBJLIB_DEPS
+ libgav1_utils
+ LIB_DEPS
+ ${libgav1_common_test_absl_deps}
+ libgav1_gtest
+ libgav1_gtest_main)
+
+ libgav1_add_executable(TEST
+ NAME
+ segmentation_test
+ SOURCES
+ ${libgav1_segmentation_test_sources}
+ DEFINES
+ ${libgav1_defines}
+ INCLUDES
+ ${libgav1_test_include_paths}
+ OBJLIB_DEPS
+ libgav1_utils
+ LIB_DEPS
+ ${libgav1_common_test_absl_deps}
+ libgav1_gtest
+ libgav1_gtest_main)
+
+ libgav1_add_executable(TEST
+ NAME
+ stack_test
+ SOURCES
+ ${libgav1_stack_test_sources}
+ DEFINES
+ ${libgav1_defines}
+ INCLUDES
+ ${libgav1_test_include_paths}
+ OBJLIB_DEPS
+ libgav1_utils
+ LIB_DEPS
+ ${libgav1_common_test_absl_deps}
+ libgav1_gtest
+ libgav1_gtest_main)
+
+ libgav1_add_executable(TEST
+ NAME
+ symbol_decoder_context_test
+ SOURCES
+ ${libgav1_symbol_decoder_context_test_sources}
+ DEFINES
+ ${libgav1_defines}
+ INCLUDES
+ ${libgav1_test_include_paths}
+ OBJLIB_DEPS
+ libgav1_decoder
+ libgav1_dsp
+ libgav1_utils
+ LIB_DEPS
+ ${libgav1_common_test_absl_deps}
+ libgav1_gtest
+ libgav1_gtest_main)
+
+ libgav1_add_executable(TEST
+ NAME
+ threadpool_test
+ SOURCES
+ ${libgav1_threadpool_test_sources}
+ DEFINES
+ ${libgav1_defines}
+ INCLUDES
+ ${libgav1_test_include_paths}
+ OBJLIB_DEPS
+ libgav1_utils
+ LIB_DEPS
+ absl::synchronization
+ libgav1_gtest
+ libgav1_gtest_main)
+
+ libgav1_add_executable(TEST
+ NAME
+ unbounded_queue_test
+ SOURCES
+ ${libgav1_unbounded_queue_test_sources}
+ DEFINES
+ ${libgav1_defines}
+ INCLUDES
+ ${libgav1_test_include_paths}
+ OBJLIB_DEPS
+ libgav1_utils
+ LIB_DEPS
+ ${libgav1_common_test_absl_deps}
+ libgav1_gtest
+ libgav1_gtest_main)
+
+ libgav1_add_executable(TEST
+ NAME
tests_utils_test
SOURCES
${libgav1_tests_utils_test_sources}
@@ -169,11 +584,43 @@ macro(libgav1_add_tests_targets)
libgav1_tests_utils
libgav1_utils
LIB_DEPS
+ absl::strings
absl::time
${libgav1_common_test_absl_deps}
libgav1_gtest
libgav1_gtest_main)
+ libgav1_add_executable(TEST
+ NAME
+ vector_test
+ SOURCES
+ ${libgav1_vector_test_sources}
+ DEFINES
+ ${libgav1_defines}
+ INCLUDES
+ ${libgav1_test_include_paths}
+ OBJLIB_DEPS
+ libgav1_utils
+ LIB_DEPS
+ ${libgav1_common_test_absl_deps}
+ libgav1_gtest
+ libgav1_gtest_main)
+
+ libgav1_add_executable(TEST
+ NAME
+ version_test
+ SOURCES
+ ${libgav1_version_test_sources}
+ DEFINES
+ ${libgav1_defines}
+ INCLUDES
+ ${libgav1_test_include_paths}
+ LIB_DEPS
+ ${libgav1_dependency}
+ ${libgav1_common_test_absl_deps}
+ libgav1_gtest
+ libgav1_gtest_main)
+
libgav1_add_library(TEST
NAME
libgav1_tests_block_utils
@@ -214,7 +661,6 @@ macro(libgav1_add_tests_targets)
libgav1_tests_utils
libgav1_utils
LIB_DEPS
- absl::str_format_internal
absl::strings
absl::time
${libgav1_common_test_absl_deps}
@@ -223,6 +669,24 @@ macro(libgav1_add_tests_targets)
libgav1_add_executable(TEST
NAME
+ buffer_pool_test
+ SOURCES
+ ${libgav1_buffer_pool_test_sources}
+ DEFINES
+ ${libgav1_defines}
+ INCLUDES
+ ${libgav1_test_include_paths}
+ OBJLIB_DEPS
+ libgav1_decoder
+ libgav1_dsp
+ libgav1_utils
+ LIB_DEPS
+ ${libgav1_common_test_absl_deps}
+ libgav1_gtest
+ libgav1_gtest_main)
+
+ libgav1_add_executable(TEST
+ NAME
cdef_test
SOURCES
${libgav1_cdef_test_sources}
@@ -266,6 +730,60 @@ macro(libgav1_add_tests_targets)
libgav1_add_executable(TEST
NAME
+ c_decoder_test
+ SOURCES
+ ${libgav1_c_decoder_test_sources}
+ DEFINES
+ ${libgav1_defines}
+ INCLUDES
+ ${libgav1_include_paths}
+ LIB_DEPS
+ ${libgav1_dependency})
+
+ libgav1_add_executable(TEST
+ NAME
+ c_version_test
+ SOURCES
+ ${libgav1_c_version_test_sources}
+ DEFINES
+ ${libgav1_defines}
+ INCLUDES
+ ${libgav1_include_paths}
+ LIB_DEPS
+ ${libgav1_dependency})
+
+ libgav1_add_executable(TEST
+ NAME
+ decoder_test
+ SOURCES
+ ${libgav1_decoder_test_sources}
+ DEFINES
+ ${libgav1_defines}
+ INCLUDES
+ ${libgav1_test_include_paths}
+ LIB_DEPS
+ ${libgav1_dependency}
+ ${libgav1_common_test_absl_deps}
+ libgav1_gtest
+ libgav1_gtest_main)
+
+ libgav1_add_executable(TEST
+ NAME
+ decoder_buffer_test
+ SOURCES
+ ${libgav1_decoder_buffer_test_sources}
+ DEFINES
+ ${libgav1_defines}
+ INCLUDES
+ ${libgav1_test_include_paths}
+ LIB_DEPS
+ ${libgav1_dependency}
+ ${libgav1_common_test_absl_deps}
+ libgav1_gtest
+ libgav1_gtest_main)
+
+ libgav1_add_executable(TEST
+ NAME
distance_weighted_blend_test
SOURCES
${libgav1_distance_weighted_blend_test_sources}
@@ -279,7 +797,7 @@ macro(libgav1_add_tests_targets)
libgav1_tests_utils
libgav1_utils
LIB_DEPS
- absl::str_format_internal
+ absl::strings
absl::time
${libgav1_common_test_absl_deps}
libgav1_gtest
@@ -308,6 +826,29 @@ macro(libgav1_add_tests_targets)
libgav1_add_executable(TEST
NAME
+ file_writer_test
+ SOURCES
+ ${libgav1_file_writer_test_sources}
+ DEFINES
+ ${libgav1_defines}
+ INCLUDES
+ ${libgav1_test_include_paths}
+ OBJLIB_DEPS
+ libgav1_decoder
+ libgav1_dsp
+ libgav1_file_writer
+ libgav1_tests_utils
+ libgav1_utils
+ LIB_DEPS
+ absl::memory
+ absl::strings
+ absl::time
+ ${libgav1_common_test_absl_deps}
+ libgav1_gtest
+ libgav1_gtest_main)
+
+ libgav1_add_executable(TEST
+ NAME
intrapred_cfl_test
SOURCES
${libgav1_intrapred_cfl_test_sources}
@@ -435,6 +976,24 @@ macro(libgav1_add_tests_targets)
libgav1_add_executable(TEST
NAME
+ internal_frame_buffer_list_test
+ SOURCES
+ ${libgav1_internal_frame_buffer_list_test_sources}
+ DEFINES
+ ${libgav1_defines}
+ INCLUDES
+ ${libgav1_test_include_paths}
+ OBJLIB_DEPS
+ libgav1_decoder
+ libgav1_dsp
+ libgav1_utils
+ LIB_DEPS
+ ${libgav1_common_test_absl_deps}
+ libgav1_gtest
+ libgav1_gtest_main)
+
+ libgav1_add_executable(TEST
+ NAME
loop_filter_test
SOURCES
${libgav1_loop_filter_test_sources}
@@ -490,7 +1049,7 @@ macro(libgav1_add_tests_targets)
libgav1_tests_utils
libgav1_utils
LIB_DEPS
- absl::str_format_internal
+ absl::strings
absl::time
${libgav1_common_test_absl_deps}
libgav1_gtest
@@ -562,6 +1121,158 @@ macro(libgav1_add_tests_targets)
libgav1_add_executable(TEST
NAME
+ obu_parser_test
+ SOURCES
+ ${libgav1_obu_parser_test_sources}
+ DEFINES
+ ${libgav1_defines}
+ INCLUDES
+ ${libgav1_test_include_paths}
+ OBJLIB_DEPS
+ libgav1_decoder
+ libgav1_dsp
+ libgav1_utils
+ LIB_DEPS
+ ${libgav1_common_test_absl_deps}
+ libgav1_gtest
+ libgav1_gtest_main)
+
+ libgav1_add_executable(TEST
+ NAME
+ post_filter_test
+ SOURCES
+ ${libgav1_post_filter_test_sources}
+ DEFINES
+ ${libgav1_defines}
+ INCLUDES
+ ${libgav1_test_include_paths}
+ OBJLIB_DEPS
+ libgav1_decoder
+ libgav1_dsp
+ libgav1_tests_block_utils
+ libgav1_tests_utils
+ libgav1_utils
+ LIB_DEPS
+ absl::time
+ ${libgav1_common_test_absl_deps}
+ libgav1_gtest
+ libgav1_gtest_main)
+
+ libgav1_add_executable(TEST
+ NAME
+ prediction_mask_test
+ SOURCES
+ ${libgav1_prediction_mask_test_sources}
+ DEFINES
+ ${libgav1_defines}
+ INCLUDES
+ ${libgav1_test_include_paths}
+ OBJLIB_DEPS
+ libgav1_decoder
+ libgav1_dsp
+ libgav1_tests_utils
+ libgav1_utils
+ LIB_DEPS
+ absl::strings
+ absl::time
+ ${libgav1_common_test_absl_deps}
+ libgav1_gtest
+ libgav1_gtest_main)
+
+ libgav1_add_executable(TEST
+ NAME
+ quantizer_test
+ SOURCES
+ ${libgav1_quantizer_test_sources}
+ DEFINES
+ ${libgav1_defines}
+ INCLUDES
+ ${libgav1_test_include_paths}
+ OBJLIB_DEPS
+ libgav1_decoder
+ libgav1_dsp
+ libgav1_utils
+ LIB_DEPS
+ ${libgav1_common_test_absl_deps}
+ libgav1_gtest
+ libgav1_gtest_main)
+
+ libgav1_add_executable(TEST
+ NAME
+ raw_bit_reader_test
+ SOURCES
+ ${libgav1_raw_bit_reader_test_sources}
+ DEFINES
+ ${libgav1_defines}
+ INCLUDES
+ ${libgav1_test_include_paths}
+ OBJLIB_DEPS
+ libgav1_utils
+ LIB_DEPS
+ ${libgav1_common_test_absl_deps}
+ libgav1_gtest
+ libgav1_gtest_main)
+
+ libgav1_add_executable(TEST
+ NAME
+ reconstruction_test
+ SOURCES
+ ${libgav1_reconstruction_test_sources}
+ DEFINES
+ ${libgav1_defines}
+ INCLUDES
+ ${libgav1_test_include_paths}
+ OBJLIB_DEPS
+ libgav1_decoder
+ libgav1_dsp
+ libgav1_tests_block_utils
+ libgav1_tests_utils
+ libgav1_utils
+ ${libgav1_test_objlib_deps}
+ LIB_DEPS
+ absl::strings
+ ${libgav1_common_test_absl_deps}
+ libgav1_gtest
+ libgav1_gtest_main)
+
+ libgav1_add_executable(TEST
+ NAME
+ residual_buffer_pool_test
+ SOURCES
+ ${libgav1_residual_buffer_pool_test_sources}
+ DEFINES
+ ${libgav1_defines}
+ INCLUDES
+ ${libgav1_test_include_paths}
+ OBJLIB_DEPS
+ libgav1_decoder
+ libgav1_dsp
+ libgav1_utils
+ ${libgav1_test_objlib_deps}
+ LIB_DEPS
+ ${libgav1_common_test_absl_deps}
+ libgav1_gtest
+ libgav1_gtest_main)
+
+ libgav1_add_executable(TEST
+ NAME
+ scan_test
+ SOURCES
+ ${libgav1_scan_test_sources}
+ DEFINES
+ ${libgav1_defines}
+ INCLUDES
+ ${libgav1_test_include_paths}
+ OBJLIB_DEPS
+ libgav1_utils
+ ${libgav1_test_objlib_deps}
+ LIB_DEPS
+ ${libgav1_common_test_absl_deps}
+ libgav1_gtest
+ libgav1_gtest_main)
+
+ libgav1_add_executable(TEST
+ NAME
super_res_test
SOURCES
${libgav1_super_res_test_sources}
@@ -583,6 +1294,26 @@ macro(libgav1_add_tests_targets)
libgav1_add_executable(TEST
NAME
+ threading_strategy_test
+ SOURCES
+ ${libgav1_threading_strategy_test_sources}
+ DEFINES
+ ${libgav1_defines}
+ INCLUDES
+ ${libgav1_test_include_paths}
+ OBJLIB_DEPS
+ libgav1_decoder
+ libgav1_dsp
+ libgav1_utils
+ ${libgav1_test_objlib_deps}
+ LIB_DEPS
+ absl::str_format_internal
+ ${libgav1_common_test_absl_deps}
+ libgav1_gtest
+ libgav1_gtest_main)
+
+ libgav1_add_executable(TEST
+ NAME
warp_test
SOURCES
${libgav1_warp_test_sources}
@@ -605,6 +1336,24 @@ macro(libgav1_add_tests_targets)
libgav1_add_executable(TEST
NAME
+ warp_prediction_test
+ SOURCES
+ ${libgav1_warp_prediction_test_sources}
+ DEFINES
+ ${libgav1_defines}
+ INCLUDES
+ ${libgav1_test_include_paths}
+ OBJLIB_DEPS
+ libgav1_decoder
+ libgav1_dsp
+ libgav1_utils
+ LIB_DEPS
+ ${libgav1_common_test_absl_deps}
+ libgav1_gtest
+ libgav1_gtest_main)
+
+ libgav1_add_executable(TEST
+ NAME
weight_mask_test
SOURCES
${libgav1_weight_mask_test_sources}
diff --git a/tests/utils.cc b/tests/utils.cc
index b73cf01..e91ea87 100644
--- a/tests/utils.cc
+++ b/tests/utils.cc
@@ -21,6 +21,7 @@
#include <memory>
#include <string>
+#include "absl/strings/string_view.h"
#include "absl/time/time.h"
#include "gtest/gtest.h"
#include "src/dsp/dsp.h"
@@ -30,6 +31,30 @@
namespace libgav1 {
namespace test_utils {
+namespace {
+
+int CloseFile(FILE* stream) { return fclose(stream); }
+
+bool ReadFileToString(absl::string_view file_name, std::string* const string) {
+ using FilePtr = std::unique_ptr<FILE, decltype(&CloseFile)>;
+ FilePtr file(fopen(std::string(file_name).c_str(), "rb"), &CloseFile);
+ if (file == nullptr) return false;
+
+ do {
+ int c = fgetc(file.get());
+ if (ferror(file.get()) != 0) return false;
+
+ if (c != EOF) {
+ string->append(1, static_cast<char>(c));
+ } else {
+ break;
+ }
+ } while (true);
+
+ return true;
+}
+
+} // namespace
void ResetDspTable(const int bitdepth) {
dsp::Dsp* const dsp = dsp_internal::GetWritableDspTable(bitdepth);
@@ -116,5 +141,57 @@ void CheckMd5Digest(const char name[], const char function_name[],
EXPECT_STREQ(expected_digest, actual_digest);
}
+namespace {
+
+std::string GetSourceDir() {
+#if defined(__ANDROID__)
+ // Test files must be manually supplied. This path is frequently
+ // available on development devices.
+ return std::string("/data/local/tmp/tests/data");
+#elif defined(LIBGAV1_FLAGS_SRCDIR)
+ return std::string(LIBGAV1_FLAGS_SRCDIR) + "/tests/data";
+#else
+ return std::string(".");
+#endif // defined(__ANDROID__)
+}
+
+std::string GetTempDir() {
+ const char* path = getenv("TMPDIR");
+ if (path == nullptr || path[0] == '\0') path = getenv("TEMP");
+ if (path != nullptr && path[0] != '\0') return std::string(path);
+
+#if defined(__ANDROID__)
+ return std::string("/data/local/tmp");
+#elif defined(LIBGAV1_FLAGS_TMPDIR)
+ return std::string(LIBGAV1_FLAGS_TMPDIR);
+#else
+ return std::string(".");
+#endif // defined(__ANDROID__)
+}
+
+} // namespace
+
+std::string GetTestInputFilePath(absl::string_view file_name) {
+ const char* const path = getenv("LIBGAV1_TEST_DATA_PATH");
+ if (path != nullptr && path[0] != '\0') {
+ return std::string(path) + "/" + std::string(file_name);
+ }
+ return GetSourceDir() + "/" + std::string(file_name);
+}
+
+std::string GetTestOutputFilePath(absl::string_view file_name) {
+ return GetTempDir() + "/" + std::string(file_name);
+}
+
+void GetTestData(absl::string_view file_name, const bool is_output_file,
+ std::string* const output) {
+ ASSERT_NE(output, nullptr);
+ const std::string absolute_file_path = is_output_file
+ ? GetTestOutputFilePath(file_name)
+ : GetTestInputFilePath(file_name);
+
+ ASSERT_TRUE(ReadFileToString(absolute_file_path, output));
+}
+
} // namespace test_utils
} // namespace libgav1
diff --git a/tests/utils.h b/tests/utils.h
index b3062da..4d73070 100644
--- a/tests/utils.h
+++ b/tests/utils.h
@@ -22,6 +22,7 @@
#include <string>
#include "absl/base/config.h"
+#include "absl/strings/string_view.h"
#include "absl/time/time.h"
#include "src/gav1/decoder_buffer.h"
#include "src/utils/memory.h"
@@ -132,6 +133,24 @@ void CheckMd5Digest(const char name[], const char function_name[],
const char expected_digest[], const char actual_digest[],
absl::Duration elapsed_time);
+//------------------------------------------------------------------------------
+// Reads the test data from |file_name| as a string into |output|. The
+// |is_output_file| argument controls the expansion of |file_name| to its full
+// path. When |is_output_file| is true GetTestData() reads from
+// utils.cc::GetTempDir(), and when it is false the file is read from
+// utils.cc::GetSourceDir().
+void GetTestData(absl::string_view file_name, bool is_output_file,
+ std::string* output);
+
+//------------------------------------------------------------------------------
+// Returns the full path to |file_name| from libgav1/tests/data.
+std::string GetTestInputFilePath(absl::string_view file_name);
+
+//------------------------------------------------------------------------------
+// Returns the full path to |file_name| in a location where the file can be
+// opened for writing.
+std::string GetTestOutputFilePath(absl::string_view file_name);
+
} // namespace test_utils
} // namespace libgav1