aboutsummaryrefslogtreecommitdiff
path: root/src/dsp/cdef.cc
diff options
context:
space:
mode:
Diffstat (limited to 'src/dsp/cdef.cc')
-rw-r--r--src/dsp/cdef.cc306
1 files changed, 306 insertions, 0 deletions
diff --git a/src/dsp/cdef.cc b/src/dsp/cdef.cc
new file mode 100644
index 0000000..0b50517
--- /dev/null
+++ b/src/dsp/cdef.cc
@@ -0,0 +1,306 @@
+// Copyright 2019 The libgav1 Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "src/dsp/cdef.h"
+
+#include <algorithm>
+#include <cassert>
+#include <cstddef>
+#include <cstdint>
+#include <cstring>
+
+#include "src/dsp/constants.h"
+#include "src/dsp/dsp.h"
+#include "src/utils/common.h"
+#include "src/utils/constants.h"
+
+namespace libgav1 {
+namespace dsp {
+namespace {
+
+#include "src/dsp/cdef.inc"
+
+// Silence unused function warnings when CdefDirection_C is obviated.
+#if LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS || \
+ !defined(LIBGAV1_Dsp8bpp_CdefDirection) || \
+ (LIBGAV1_MAX_BITDEPTH >= 10 && !defined(LIBGAV1_Dsp10bpp_CdefDirection))
+constexpr int16_t kDivisionTable[] = {840, 420, 280, 210, 168, 140, 120, 105};
+
+int32_t Square(int32_t x) { return x * x; }
+
+template <int bitdepth, typename Pixel>
+void CdefDirection_C(const void* const source, ptrdiff_t stride,
+ uint8_t* const direction, int* const variance) {
+ assert(direction != nullptr);
+ assert(variance != nullptr);
+ const auto* src = static_cast<const Pixel*>(source);
+ stride /= sizeof(Pixel);
+ int32_t cost[8] = {};
+ // |partial| does not have to be int32_t for 8bpp. int16_t will suffice. We
+ // use int32_t to keep it simple since |cost| will have to be int32_t.
+ int32_t partial[8][15] = {};
+ for (int i = 0; i < 8; ++i) {
+ for (int j = 0; j < 8; ++j) {
+ const int x = (src[j] >> (bitdepth - 8)) - 128;
+ partial[0][i + j] += x;
+ partial[1][i + j / 2] += x;
+ partial[2][i] += x;
+ partial[3][3 + i - j / 2] += x;
+ partial[4][7 + i - j] += x;
+ partial[5][3 - i / 2 + j] += x;
+ partial[6][j] += x;
+ partial[7][i / 2 + j] += x;
+ }
+ src += stride;
+ }
+ for (int i = 0; i < 8; ++i) {
+ cost[2] += Square(partial[2][i]);
+ cost[6] += Square(partial[6][i]);
+ }
+ cost[2] *= kDivisionTable[7];
+ cost[6] *= kDivisionTable[7];
+ for (int i = 0; i < 7; ++i) {
+ cost[0] += (Square(partial[0][i]) + Square(partial[0][14 - i])) *
+ kDivisionTable[i];
+ cost[4] += (Square(partial[4][i]) + Square(partial[4][14 - i])) *
+ kDivisionTable[i];
+ }
+ cost[0] += Square(partial[0][7]) * kDivisionTable[7];
+ cost[4] += Square(partial[4][7]) * kDivisionTable[7];
+ for (int i = 1; i < 8; i += 2) {
+ for (int j = 0; j < 5; ++j) {
+ cost[i] += Square(partial[i][3 + j]);
+ }
+ cost[i] *= kDivisionTable[7];
+ for (int j = 0; j < 3; ++j) {
+ cost[i] += (Square(partial[i][j]) + Square(partial[i][10 - j])) *
+ kDivisionTable[2 * j + 1];
+ }
+ }
+ int32_t best_cost = 0;
+ *direction = 0;
+ for (int i = 0; i < 8; ++i) {
+ if (cost[i] > best_cost) {
+ best_cost = cost[i];
+ *direction = i;
+ }
+ }
+ *variance = (best_cost - cost[(*direction + 4) & 7]) >> 10;
+}
+#endif // LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS ||
+ // !defined(LIBGAV1_Dsp8bpp_CdefDirection) ||
+ // (LIBGAV1_MAX_BITDEPTH >= 10 &&
+ // !defined(LIBGAV1_Dsp10bpp_CdefDirection))
+
+// Silence unused function warnings when CdefFilter_C is obviated.
+#if LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS || \
+ !defined(LIBGAV1_Dsp8bpp_CdefFilters) || \
+ (LIBGAV1_MAX_BITDEPTH >= 10 && !defined(LIBGAV1_Dsp10bpp_CdefFilters))
+
+int Constrain(int diff, int threshold, int damping) {
+ assert(threshold != 0);
+ damping = std::max(0, damping - FloorLog2(threshold));
+ const int sign = (diff < 0) ? -1 : 1;
+ return sign *
+ Clip3(threshold - (std::abs(diff) >> damping), 0, std::abs(diff));
+}
+
+// Filters the source block. It doesn't check whether the candidate pixel is
+// inside the frame. However it requires the source input to be padded with a
+// constant large value (kCdefLargeValue) if at the boundary.
+template <int block_width, int bitdepth, typename Pixel,
+ bool enable_primary = true, bool enable_secondary = true>
+void CdefFilter_C(const uint16_t* src, const ptrdiff_t src_stride,
+ const int block_height, const int primary_strength,
+ const int secondary_strength, const int damping,
+ const int direction, void* const dest,
+ const ptrdiff_t dest_stride) {
+ static_assert(block_width == 4 || block_width == 8, "Invalid CDEF width.");
+ static_assert(enable_primary || enable_secondary, "");
+ assert(block_height == 4 || block_height == 8);
+ assert(direction >= 0 && direction <= 7);
+ constexpr int coeff_shift = bitdepth - 8;
+ // Section 5.9.19. CDEF params syntax.
+ assert(primary_strength >= 0 && primary_strength <= 15 << coeff_shift);
+ assert(secondary_strength >= 0 && secondary_strength <= 4 << coeff_shift &&
+ secondary_strength != 3 << coeff_shift);
+ assert(primary_strength != 0 || secondary_strength != 0);
+ // damping is decreased by 1 for chroma.
+ assert((damping >= 3 && damping <= 6 + coeff_shift) ||
+ (damping >= 2 && damping <= 5 + coeff_shift));
+ // When only primary_strength or secondary_strength are non-zero the number
+ // of pixels inspected (4 for primary_strength, 8 for secondary_strength) and
+ // the taps used don't exceed the amount the sum is
+ // descaled by (16) so we can skip tracking and clipping to the minimum and
+ // maximum value observed.
+ constexpr bool clipping_required = enable_primary && enable_secondary;
+ static constexpr int kCdefSecondaryTaps[2] = {kCdefSecondaryTap0,
+ kCdefSecondaryTap1};
+ auto* dst = static_cast<Pixel*>(dest);
+ const ptrdiff_t dst_stride = dest_stride / sizeof(Pixel);
+ int y = block_height;
+ do {
+ int x = 0;
+ do {
+ int16_t sum = 0;
+ const uint16_t pixel_value = src[x];
+ uint16_t max_value = pixel_value;
+ uint16_t min_value = pixel_value;
+ for (int k = 0; k < 2; ++k) {
+ static constexpr int signs[] = {-1, 1};
+ for (const int& sign : signs) {
+ if (enable_primary) {
+ const int dy = sign * kCdefDirections[direction][k][0];
+ const int dx = sign * kCdefDirections[direction][k][1];
+ const uint16_t value = src[dy * src_stride + dx + x];
+ // Note: the summation can ignore the condition check in SIMD
+ // implementation, because Constrain() will return 0 when
+ // value == kCdefLargeValue.
+ if (value != kCdefLargeValue) {
+ sum += Constrain(value - pixel_value, primary_strength, damping) *
+ kCdefPrimaryTaps[(primary_strength >> coeff_shift) & 1][k];
+ if (clipping_required) {
+ max_value = std::max(value, max_value);
+ min_value = std::min(value, min_value);
+ }
+ }
+ }
+
+ if (enable_secondary) {
+ static constexpr int offsets[] = {-2, 2};
+ for (const int& offset : offsets) {
+ const int dy = sign * kCdefDirections[direction + offset][k][0];
+ const int dx = sign * kCdefDirections[direction + offset][k][1];
+ const uint16_t value = src[dy * src_stride + dx + x];
+ // Note: the summation can ignore the condition check in SIMD
+ // implementation.
+ if (value != kCdefLargeValue) {
+ sum += Constrain(value - pixel_value, secondary_strength,
+ damping) *
+ kCdefSecondaryTaps[k];
+ if (clipping_required) {
+ max_value = std::max(value, max_value);
+ min_value = std::min(value, min_value);
+ }
+ }
+ }
+ }
+ }
+ }
+
+ const int offset = (8 + sum - (sum < 0)) >> 4;
+ if (clipping_required) {
+ dst[x] = static_cast<Pixel>(
+ Clip3(pixel_value + offset, min_value, max_value));
+ } else {
+ dst[x] = static_cast<Pixel>(pixel_value + offset);
+ }
+ } while (++x < block_width);
+
+ src += src_stride;
+ dst += dst_stride;
+ } while (--y != 0);
+}
+#endif // LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS ||
+ // !defined(LIBGAV1_Dsp8bpp_CdefFilters) ||
+ // (LIBGAV1_MAX_BITDEPTH >= 10 &&
+ // !defined(LIBGAV1_Dsp10bpp_CdefFilters))
+
+void Init8bpp() {
+ Dsp* const dsp = dsp_internal::GetWritableDspTable(8);
+ assert(dsp != nullptr);
+#if LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS
+ dsp->cdef_direction = CdefDirection_C<8, uint8_t>;
+ dsp->cdef_filters[0][0] = CdefFilter_C<4, 8, uint8_t>;
+ dsp->cdef_filters[0][1] = CdefFilter_C<4, 8, uint8_t, /*enable_primary=*/true,
+ /*enable_secondary=*/false>;
+ dsp->cdef_filters[0][2] =
+ CdefFilter_C<4, 8, uint8_t, /*enable_primary=*/false>;
+ dsp->cdef_filters[1][0] = CdefFilter_C<8, 8, uint8_t>;
+ dsp->cdef_filters[1][1] = CdefFilter_C<8, 8, uint8_t, /*enable_primary=*/true,
+ /*enable_secondary=*/false>;
+ dsp->cdef_filters[1][2] =
+ CdefFilter_C<8, 8, uint8_t, /*enable_primary=*/false>;
+#else // !LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS
+ static_cast<void>(dsp);
+#ifndef LIBGAV1_Dsp8bpp_CdefDirection
+ dsp->cdef_direction = CdefDirection_C<8, uint8_t>;
+#endif
+#ifndef LIBGAV1_Dsp8bpp_CdefFilters
+ dsp->cdef_filters[0][0] = CdefFilter_C<4, 8, uint8_t>;
+ dsp->cdef_filters[0][1] = CdefFilter_C<4, 8, uint8_t, /*enable_primary=*/true,
+ /*enable_secondary=*/false>;
+ dsp->cdef_filters[0][2] =
+ CdefFilter_C<4, 8, uint8_t, /*enable_primary=*/false>;
+ dsp->cdef_filters[1][0] = CdefFilter_C<8, 8, uint8_t>;
+ dsp->cdef_filters[1][1] = CdefFilter_C<8, 8, uint8_t, /*enable_primary=*/true,
+ /*enable_secondary=*/false>;
+ dsp->cdef_filters[1][2] =
+ CdefFilter_C<8, 8, uint8_t, /*enable_primary=*/false>;
+#endif
+#endif // LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS
+}
+
+#if LIBGAV1_MAX_BITDEPTH >= 10
+void Init10bpp() {
+ Dsp* const dsp = dsp_internal::GetWritableDspTable(10);
+ assert(dsp != nullptr);
+#if LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS
+ dsp->cdef_direction = CdefDirection_C<10, uint16_t>;
+ dsp->cdef_filters[0][0] = CdefFilter_C<4, 10, uint16_t>;
+ dsp->cdef_filters[0][1] =
+ CdefFilter_C<4, 10, uint16_t, /*enable_primary=*/true,
+ /*enable_secondary=*/false>;
+ dsp->cdef_filters[0][2] =
+ CdefFilter_C<4, 10, uint16_t, /*enable_primary=*/false>;
+ dsp->cdef_filters[1][0] = CdefFilter_C<8, 10, uint16_t>;
+ dsp->cdef_filters[1][1] =
+ CdefFilter_C<8, 10, uint16_t, /*enable_primary=*/true,
+ /*enable_secondary=*/false>;
+ dsp->cdef_filters[1][2] =
+ CdefFilter_C<8, 10, uint16_t, /*enable_primary=*/false>;
+#else // !LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS
+ static_cast<void>(dsp);
+#ifndef LIBGAV1_Dsp10bpp_CdefDirection
+ dsp->cdef_direction = CdefDirection_C<10, uint16_t>;
+#endif
+#ifndef LIBGAV1_Dsp10bpp_CdefFilters
+ dsp->cdef_filters[0][0] = CdefFilter_C<4, 10, uint16_t>;
+ dsp->cdef_filters[0][1] =
+ CdefFilter_C<4, 10, uint16_t, /*enable_primary=*/true,
+ /*enable_secondary=*/false>;
+ dsp->cdef_filters[0][2] =
+ CdefFilter_C<4, 10, uint16_t, /*enable_primary=*/false>;
+ dsp->cdef_filters[1][0] = CdefFilter_C<8, 10, uint16_t>;
+ dsp->cdef_filters[1][1] =
+ CdefFilter_C<8, 10, uint16_t, /*enable_primary=*/true,
+ /*enable_secondary=*/false>;
+ dsp->cdef_filters[1][2] =
+ CdefFilter_C<8, 10, uint16_t, /*enable_primary=*/false>;
+#endif
+#endif // LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS
+}
+#endif
+
+} // namespace
+
+void CdefInit_C() {
+ Init8bpp();
+#if LIBGAV1_MAX_BITDEPTH >= 10
+ Init10bpp();
+#endif
+}
+
+} // namespace dsp
+} // namespace libgav1