diff options
Diffstat (limited to 'src/dsp/arm/intrapred_cfl_neon.cc')
-rw-r--r-- | src/dsp/arm/intrapred_cfl_neon.cc | 48 |
1 files changed, 26 insertions, 22 deletions
diff --git a/src/dsp/arm/intrapred_cfl_neon.cc b/src/dsp/arm/intrapred_cfl_neon.cc index 8d8748f..ad39947 100644 --- a/src/dsp/arm/intrapred_cfl_neon.cc +++ b/src/dsp/arm/intrapred_cfl_neon.cc @@ -76,7 +76,7 @@ template <int block_width, int block_height> void CflSubsampler420_NEON( int16_t luma[kCflLumaBufferStride][kCflLumaBufferStride], const int max_luma_width, const int max_luma_height, - const void* const source, const ptrdiff_t stride) { + const void* LIBGAV1_RESTRICT const source, const ptrdiff_t stride) { const auto* src = static_cast<const uint8_t*>(source); uint32_t sum; if (block_width == 4) { @@ -140,7 +140,7 @@ void CflSubsampler420_NEON( const uint8_t a11 = src[max_luma_width - 1 + stride]; // Dup the 2x2 sum at the max luma offset. const uint16x8_t max_luma_sum = - vdupq_n_u16((uint16_t)((a00 + a01 + a10 + a11) << 1)); + vdupq_n_u16(static_cast<uint16_t>((a00 + a01 + a10 + a11) << 1)); uint16x8_t x_index = {0, 2, 4, 6, 8, 10, 12, 14}; ptrdiff_t src_x_offset = 0; @@ -173,7 +173,7 @@ template <int block_width, int block_height> void CflSubsampler444_NEON( int16_t luma[kCflLumaBufferStride][kCflLumaBufferStride], const int max_luma_width, const int max_luma_height, - const void* const source, const ptrdiff_t stride) { + const void* LIBGAV1_RESTRICT const source, const ptrdiff_t stride) { const auto* src = static_cast<const uint8_t*>(source); uint32_t sum; if (block_width == 4) { @@ -276,7 +276,7 @@ inline uint8x8_t Combine8(const int16x8_t luma, const int alpha, // uint8_t. Saturated int16_t >> 6 outranges uint8_t. template <int block_height> inline void CflIntraPredictor4xN_NEON( - void* const dest, const ptrdiff_t stride, + void* LIBGAV1_RESTRICT const dest, const ptrdiff_t stride, const int16_t luma[kCflLumaBufferStride][kCflLumaBufferStride], const int alpha) { auto* dst = static_cast<uint8_t*>(dest); @@ -295,7 +295,7 @@ inline void CflIntraPredictor4xN_NEON( template <int block_height> inline void CflIntraPredictor8xN_NEON( - void* const dest, const ptrdiff_t stride, + void* LIBGAV1_RESTRICT const dest, const ptrdiff_t stride, const int16_t luma[kCflLumaBufferStride][kCflLumaBufferStride], const int alpha) { auto* dst = static_cast<uint8_t*>(dest); @@ -310,7 +310,7 @@ inline void CflIntraPredictor8xN_NEON( template <int block_height> inline void CflIntraPredictor16xN_NEON( - void* const dest, const ptrdiff_t stride, + void* LIBGAV1_RESTRICT const dest, const ptrdiff_t stride, const int16_t luma[kCflLumaBufferStride][kCflLumaBufferStride], const int alpha) { auto* dst = static_cast<uint8_t*>(dest); @@ -328,7 +328,7 @@ inline void CflIntraPredictor16xN_NEON( template <int block_height> inline void CflIntraPredictor32xN_NEON( - void* const dest, const ptrdiff_t stride, + void* LIBGAV1_RESTRICT const dest, const ptrdiff_t stride, const int16_t luma[kCflLumaBufferStride][kCflLumaBufferStride], const int alpha) { auto* dst = static_cast<uint8_t*>(dest); @@ -507,7 +507,8 @@ inline uint16x8_t StoreLumaResults8_420(const uint16x8_t vertical_sum0, template <int block_height_log2, bool is_inside> void CflSubsampler444_4xH_NEON( int16_t luma[kCflLumaBufferStride][kCflLumaBufferStride], - const int max_luma_height, const void* const source, ptrdiff_t stride) { + const int max_luma_height, const void* LIBGAV1_RESTRICT const source, + ptrdiff_t stride) { static_assert(block_height_log2 <= 4, ""); const int block_height = 1 << block_height_log2; const int visible_height = max_luma_height; @@ -568,7 +569,7 @@ template <int block_height_log2> void CflSubsampler444_4xH_NEON( int16_t luma[kCflLumaBufferStride][kCflLumaBufferStride], const int max_luma_width, const int max_luma_height, - const void* const source, ptrdiff_t stride) { + const void* LIBGAV1_RESTRICT const source, ptrdiff_t stride) { static_cast<void>(max_luma_width); static_cast<void>(max_luma_height); static_assert(block_height_log2 <= 4, ""); @@ -588,7 +589,8 @@ void CflSubsampler444_4xH_NEON( template <int block_height_log2, bool is_inside> void CflSubsampler444_8xH_NEON( int16_t luma[kCflLumaBufferStride][kCflLumaBufferStride], - const int max_luma_height, const void* const source, ptrdiff_t stride) { + const int max_luma_height, const void* LIBGAV1_RESTRICT const source, + ptrdiff_t stride) { const int block_height = 1 << block_height_log2; const int visible_height = max_luma_height; const auto* src = static_cast<const uint16_t*>(source); @@ -643,7 +645,7 @@ template <int block_height_log2> void CflSubsampler444_8xH_NEON( int16_t luma[kCflLumaBufferStride][kCflLumaBufferStride], const int max_luma_width, const int max_luma_height, - const void* const source, ptrdiff_t stride) { + const void* LIBGAV1_RESTRICT const source, ptrdiff_t stride) { static_cast<void>(max_luma_width); static_cast<void>(max_luma_height); static_assert(block_height_log2 <= 5, ""); @@ -667,7 +669,7 @@ template <int block_width_log2, int block_height_log2, bool is_inside> void CflSubsampler444_WxH_NEON( int16_t luma[kCflLumaBufferStride][kCflLumaBufferStride], const int max_luma_width, const int max_luma_height, - const void* const source, ptrdiff_t stride) { + const void* LIBGAV1_RESTRICT const source, ptrdiff_t stride) { const int block_height = 1 << block_height_log2; const int visible_height = max_luma_height; const int block_width = 1 << block_width_log2; @@ -751,7 +753,7 @@ template <int block_width_log2, int block_height_log2> void CflSubsampler444_WxH_NEON( int16_t luma[kCflLumaBufferStride][kCflLumaBufferStride], const int max_luma_width, const int max_luma_height, - const void* const source, ptrdiff_t stride) { + const void* LIBGAV1_RESTRICT const source, ptrdiff_t stride) { static_assert(block_width_log2 == 4 || block_width_log2 == 5, "This function will only work for block_width 16 and 32."); static_assert(block_height_log2 <= 5, ""); @@ -773,7 +775,7 @@ template <int block_height_log2> void CflSubsampler420_4xH_NEON( int16_t luma[kCflLumaBufferStride][kCflLumaBufferStride], const int /*max_luma_width*/, const int max_luma_height, - const void* const source, ptrdiff_t stride) { + const void* LIBGAV1_RESTRICT const source, ptrdiff_t stride) { const int block_height = 1 << block_height_log2; const auto* src = static_cast<const uint16_t*>(source); const ptrdiff_t src_stride = stride / sizeof(src[0]); @@ -839,7 +841,8 @@ void CflSubsampler420_4xH_NEON( template <int block_height_log2, int max_luma_width> inline void CflSubsampler420Impl_8xH_NEON( int16_t luma[kCflLumaBufferStride][kCflLumaBufferStride], - const int max_luma_height, const void* const source, ptrdiff_t stride) { + const int max_luma_height, const void* LIBGAV1_RESTRICT const source, + ptrdiff_t stride) { const int block_height = 1 << block_height_log2; const auto* src = static_cast<const uint16_t*>(source); const ptrdiff_t src_stride = stride / sizeof(src[0]); @@ -944,7 +947,7 @@ template <int block_height_log2> void CflSubsampler420_8xH_NEON( int16_t luma[kCflLumaBufferStride][kCflLumaBufferStride], const int max_luma_width, const int max_luma_height, - const void* const source, ptrdiff_t stride) { + const void* LIBGAV1_RESTRICT const source, ptrdiff_t stride) { if (max_luma_width == 8) { CflSubsampler420Impl_8xH_NEON<block_height_log2, 8>(luma, max_luma_height, source, stride); @@ -957,7 +960,8 @@ void CflSubsampler420_8xH_NEON( template <int block_width_log2, int block_height_log2, int max_luma_width> inline void CflSubsampler420Impl_WxH_NEON( int16_t luma[kCflLumaBufferStride][kCflLumaBufferStride], - const int max_luma_height, const void* const source, ptrdiff_t stride) { + const int max_luma_height, const void* LIBGAV1_RESTRICT const source, + ptrdiff_t stride) { const auto* src = static_cast<const uint16_t*>(source); const ptrdiff_t src_stride = stride / sizeof(src[0]); const int block_height = 1 << block_height_log2; @@ -1062,7 +1066,7 @@ template <int block_width_log2, int block_height_log2> void CflSubsampler420_WxH_NEON( int16_t luma[kCflLumaBufferStride][kCflLumaBufferStride], const int max_luma_width, const int max_luma_height, - const void* const source, ptrdiff_t stride) { + const void* LIBGAV1_RESTRICT const source, ptrdiff_t stride) { switch (max_luma_width) { case 8: CflSubsampler420Impl_WxH_NEON<block_width_log2, block_height_log2, 8>( @@ -1109,7 +1113,7 @@ inline uint16x8_t Combine8(const int16x8_t luma, const int16x8_t alpha_abs, template <int block_height, int bitdepth = 10> inline void CflIntraPredictor4xN_NEON( - void* const dest, const ptrdiff_t stride, + void* LIBGAV1_RESTRICT const dest, const ptrdiff_t stride, const int16_t luma[kCflLumaBufferStride][kCflLumaBufferStride], const int alpha) { auto* dst = static_cast<uint16_t*>(dest); @@ -1133,7 +1137,7 @@ inline void CflIntraPredictor4xN_NEON( template <int block_height, int bitdepth = 10> inline void CflIntraPredictor8xN_NEON( - void* const dest, const ptrdiff_t stride, + void* LIBGAV1_RESTRICT const dest, const ptrdiff_t stride, const int16_t luma[kCflLumaBufferStride][kCflLumaBufferStride], const int alpha) { auto* dst = static_cast<uint16_t*>(dest); @@ -1153,7 +1157,7 @@ inline void CflIntraPredictor8xN_NEON( template <int block_height, int bitdepth = 10> inline void CflIntraPredictor16xN_NEON( - void* const dest, const ptrdiff_t stride, + void* LIBGAV1_RESTRICT const dest, const ptrdiff_t stride, const int16_t luma[kCflLumaBufferStride][kCflLumaBufferStride], const int alpha) { auto* dst = static_cast<uint16_t*>(dest); @@ -1177,7 +1181,7 @@ inline void CflIntraPredictor16xN_NEON( template <int block_height, int bitdepth = 10> inline void CflIntraPredictor32xN_NEON( - void* const dest, const ptrdiff_t stride, + void* LIBGAV1_RESTRICT const dest, const ptrdiff_t stride, const int16_t luma[kCflLumaBufferStride][kCflLumaBufferStride], const int alpha) { auto* dst = static_cast<uint16_t*>(dest); |