aboutsummaryrefslogtreecommitdiff
path: root/src/dsp/x86/cdef_avx2.cc
diff options
context:
space:
mode:
Diffstat (limited to 'src/dsp/x86/cdef_avx2.cc')
-rw-r--r--src/dsp/x86/cdef_avx2.cc30
1 files changed, 17 insertions, 13 deletions
diff --git a/src/dsp/x86/cdef_avx2.cc b/src/dsp/x86/cdef_avx2.cc
index d41dc38..01a2b9f 100644
--- a/src/dsp/x86/cdef_avx2.cc
+++ b/src/dsp/x86/cdef_avx2.cc
@@ -269,8 +269,8 @@ LIBGAV1_ALWAYS_INLINE void AddPartial_D7_D5(__m256i* v_src, __m256i* partial_lo,
_mm256_add_epi16(*partial_hi, _mm256_srli_si256(v_pair_add[3], 10));
}
-LIBGAV1_ALWAYS_INLINE void AddPartial(const uint8_t* src, ptrdiff_t stride,
- __m256i* partial) {
+LIBGAV1_ALWAYS_INLINE void AddPartial(const uint8_t* LIBGAV1_RESTRICT src,
+ ptrdiff_t stride, __m256i* partial) {
// 8x8 input
// 00 01 02 03 04 05 06 07
// 10 11 12 13 14 15 16 17
@@ -451,8 +451,10 @@ inline void Cost2And6_Pair(uint32_t* cost, const __m256i partial_a,
cost[6] = _mm_cvtsi128_si32(_mm_srli_si128(sums, 8));
}
-void CdefDirection_AVX2(const void* const source, ptrdiff_t stride,
- uint8_t* const direction, int* const variance) {
+void CdefDirection_AVX2(const void* LIBGAV1_RESTRICT const source,
+ ptrdiff_t stride,
+ uint8_t* LIBGAV1_RESTRICT const direction,
+ int* LIBGAV1_RESTRICT const variance) {
assert(direction != nullptr);
assert(variance != nullptr);
const auto* src = static_cast<const uint8_t*>(source);
@@ -500,8 +502,9 @@ void CdefDirection_AVX2(const void* const source, ptrdiff_t stride,
// CdefFilter
// Load 4 vectors based on the given |direction|.
-inline void LoadDirection(const uint16_t* const src, const ptrdiff_t stride,
- __m128i* output, const int direction) {
+inline void LoadDirection(const uint16_t* LIBGAV1_RESTRICT const src,
+ const ptrdiff_t stride, __m128i* output,
+ const int direction) {
// Each |direction| describes a different set of source values. Expand this
// set by negating each set. For |direction| == 0 this gives a diagonal line
// from top right to bottom left. The first value is y, the second x. Negative
@@ -525,8 +528,9 @@ inline void LoadDirection(const uint16_t* const src, const ptrdiff_t stride,
// Load 4 vectors based on the given |direction|. Use when |block_width| == 4 to
// do 2 rows at a time.
-void LoadDirection4(const uint16_t* const src, const ptrdiff_t stride,
- __m128i* output, const int direction) {
+void LoadDirection4(const uint16_t* LIBGAV1_RESTRICT const src,
+ const ptrdiff_t stride, __m128i* output,
+ const int direction) {
const int y_0 = kCdefDirections[direction][0][0];
const int x_0 = kCdefDirections[direction][0][1];
const int y_1 = kCdefDirections[direction][1][0];
@@ -569,11 +573,11 @@ inline __m256i ApplyConstrainAndTap(const __m256i& pixel, const __m256i& val,
}
template <int width, bool enable_primary = true, bool enable_secondary = true>
-void CdefFilter_AVX2(const uint16_t* src, const ptrdiff_t src_stride,
- const int height, const int primary_strength,
- const int secondary_strength, const int damping,
- const int direction, void* dest,
- const ptrdiff_t dst_stride) {
+void CdefFilter_AVX2(const uint16_t* LIBGAV1_RESTRICT src,
+ const ptrdiff_t src_stride, const int height,
+ const int primary_strength, const int secondary_strength,
+ const int damping, const int direction,
+ void* LIBGAV1_RESTRICT dest, const ptrdiff_t dst_stride) {
static_assert(width == 8 || width == 4, "Invalid CDEF width.");
static_assert(enable_primary || enable_secondary, "");
constexpr bool clipping_required = enable_primary && enable_secondary;