diff options
Diffstat (limited to 'src/dsp/x86/cdef_sse4.cc')
-rw-r--r-- | src/dsp/x86/cdef_sse4.cc | 29 |
1 files changed, 17 insertions, 12 deletions
diff --git a/src/dsp/x86/cdef_sse4.cc b/src/dsp/x86/cdef_sse4.cc index 6ede778..6c48844 100644 --- a/src/dsp/x86/cdef_sse4.cc +++ b/src/dsp/x86/cdef_sse4.cc @@ -241,8 +241,8 @@ LIBGAV1_ALWAYS_INLINE void AddPartial_D5_D7(__m128i* v_src, __m128i* partial_lo, *partial_hi = _mm_add_epi16(*partial_hi, _mm_srli_si128(v_pair_add[3], 10)); } -LIBGAV1_ALWAYS_INLINE void AddPartial(const uint8_t* src, ptrdiff_t stride, - __m128i* partial_lo, +LIBGAV1_ALWAYS_INLINE void AddPartial(const uint8_t* LIBGAV1_RESTRICT src, + ptrdiff_t stride, __m128i* partial_lo, __m128i* partial_hi) { // 8x8 input // 00 01 02 03 04 05 06 07 @@ -395,8 +395,10 @@ inline uint32_t SquareSum_S16(const __m128i a) { return SumVector_S32(square); } -void CdefDirection_SSE4_1(const void* const source, ptrdiff_t stride, - uint8_t* const direction, int* const variance) { +void CdefDirection_SSE4_1(const void* LIBGAV1_RESTRICT const source, + ptrdiff_t stride, + uint8_t* LIBGAV1_RESTRICT const direction, + int* LIBGAV1_RESTRICT const variance) { assert(direction != nullptr); assert(variance != nullptr); const auto* src = static_cast<const uint8_t*>(source); @@ -438,8 +440,9 @@ void CdefDirection_SSE4_1(const void* const source, ptrdiff_t stride, // CdefFilter // Load 4 vectors based on the given |direction|. -inline void LoadDirection(const uint16_t* const src, const ptrdiff_t stride, - __m128i* output, const int direction) { +inline void LoadDirection(const uint16_t* LIBGAV1_RESTRICT const src, + const ptrdiff_t stride, __m128i* output, + const int direction) { // Each |direction| describes a different set of source values. Expand this // set by negating each set. For |direction| == 0 this gives a diagonal line // from top right to bottom left. The first value is y, the second x. Negative @@ -463,8 +466,9 @@ inline void LoadDirection(const uint16_t* const src, const ptrdiff_t stride, // Load 4 vectors based on the given |direction|. Use when |block_width| == 4 to // do 2 rows at a time. -void LoadDirection4(const uint16_t* const src, const ptrdiff_t stride, - __m128i* output, const int direction) { +void LoadDirection4(const uint16_t* LIBGAV1_RESTRICT const src, + const ptrdiff_t stride, __m128i* output, + const int direction) { const int y_0 = kCdefDirections[direction][0][0]; const int x_0 = kCdefDirections[direction][0][1]; const int y_1 = kCdefDirections[direction][1][0]; @@ -507,10 +511,11 @@ inline __m128i ApplyConstrainAndTap(const __m128i& pixel, const __m128i& val, } template <int width, bool enable_primary = true, bool enable_secondary = true> -void CdefFilter_SSE4_1(const uint16_t* src, const ptrdiff_t src_stride, - const int height, const int primary_strength, - const int secondary_strength, const int damping, - const int direction, void* dest, +void CdefFilter_SSE4_1(const uint16_t* LIBGAV1_RESTRICT src, + const ptrdiff_t src_stride, const int height, + const int primary_strength, const int secondary_strength, + const int damping, const int direction, + void* LIBGAV1_RESTRICT dest, const ptrdiff_t dst_stride) { static_assert(width == 8 || width == 4, "Invalid CDEF width."); static_assert(enable_primary || enable_secondary, ""); |