diff options
Diffstat (limited to 'src/dsp/x86/warp_sse4.cc')
-rw-r--r-- | src/dsp/x86/warp_sse4.cc | 66 |
1 files changed, 38 insertions, 28 deletions
diff --git a/src/dsp/x86/warp_sse4.cc b/src/dsp/x86/warp_sse4.cc index 9ddfeac..5830894 100644 --- a/src/dsp/x86/warp_sse4.cc +++ b/src/dsp/x86/warp_sse4.cc @@ -101,7 +101,7 @@ inline void HorizontalFilter(const int sx4, const int16_t alpha, template <bool is_compound> inline void WriteVerticalFilter(const __m128i filter[8], const int16_t intermediate_result[15][8], int y, - void* dst_row) { + void* LIBGAV1_RESTRICT dst_row) { constexpr int kRoundBitsVertical = is_compound ? kInterRoundBitsCompoundVertical : kInterRoundBitsVertical; __m128i sum_low = _mm_set1_epi32(kOffsetRemoval); @@ -136,8 +136,9 @@ inline void WriteVerticalFilter(const __m128i filter[8], template <bool is_compound> inline void WriteVerticalFilter(const __m128i filter[8], - const int16_t* intermediate_result_column, - void* dst_row) { + const int16_t* LIBGAV1_RESTRICT + intermediate_result_column, + void* LIBGAV1_RESTRICT dst_row) { constexpr int kRoundBitsVertical = is_compound ? kInterRoundBitsCompoundVertical : kInterRoundBitsVertical; __m128i sum_low = _mm_setzero_si128(); @@ -167,7 +168,7 @@ inline void WriteVerticalFilter(const __m128i filter[8], template <bool is_compound, typename DestType> inline void VerticalFilter(const int16_t source[15][8], int y4, int gamma, - int delta, DestType* dest_row, + int delta, DestType* LIBGAV1_RESTRICT dest_row, ptrdiff_t dest_stride) { int sy4 = (y4 & ((1 << kWarpedModelPrecisionBits) - 1)) - MultiplyBy4(delta); for (int y = 0; y < 8; ++y) { @@ -187,8 +188,9 @@ inline void VerticalFilter(const int16_t source[15][8], int y4, int gamma, } template <bool is_compound, typename DestType> -inline void VerticalFilter(const int16_t* source_cols, int y4, int gamma, - int delta, DestType* dest_row, +inline void VerticalFilter(const int16_t* LIBGAV1_RESTRICT source_cols, int y4, + int gamma, int delta, + DestType* LIBGAV1_RESTRICT dest_row, ptrdiff_t dest_stride) { int sy4 = (y4 & ((1 << kWarpedModelPrecisionBits) - 1)) - MultiplyBy4(delta); for (int y = 0; y < 8; ++y) { @@ -208,9 +210,11 @@ inline void VerticalFilter(const int16_t* source_cols, int y4, int gamma, } template <bool is_compound, typename DestType> -inline void WarpRegion1(const uint8_t* src, ptrdiff_t source_stride, - int source_width, int source_height, int ix4, int iy4, - DestType* dst_row, ptrdiff_t dest_stride) { +inline void WarpRegion1(const uint8_t* LIBGAV1_RESTRICT src, + ptrdiff_t source_stride, int source_width, + int source_height, int ix4, int iy4, + DestType* LIBGAV1_RESTRICT dst_row, + ptrdiff_t dest_stride) { // Region 1 // Points to the left or right border of the first row of |src|. const uint8_t* first_row_border = @@ -244,10 +248,12 @@ inline void WarpRegion1(const uint8_t* src, ptrdiff_t source_stride, } template <bool is_compound, typename DestType> -inline void WarpRegion2(const uint8_t* src, ptrdiff_t source_stride, - int source_width, int y4, int ix4, int iy4, int gamma, - int delta, int16_t intermediate_result_column[15], - DestType* dst_row, ptrdiff_t dest_stride) { +inline void WarpRegion2(const uint8_t* LIBGAV1_RESTRICT src, + ptrdiff_t source_stride, int source_width, int y4, + int ix4, int iy4, int gamma, int delta, + int16_t intermediate_result_column[15], + DestType* LIBGAV1_RESTRICT dst_row, + ptrdiff_t dest_stride) { // Region 2. // Points to the left or right border of the first row of |src|. const uint8_t* first_row_border = @@ -283,9 +289,10 @@ inline void WarpRegion2(const uint8_t* src, ptrdiff_t source_stride, } template <bool is_compound, typename DestType> -inline void WarpRegion3(const uint8_t* src, ptrdiff_t source_stride, - int source_height, int alpha, int beta, int x4, int ix4, - int iy4, int16_t intermediate_result[15][8]) { +inline void WarpRegion3(const uint8_t* LIBGAV1_RESTRICT src, + ptrdiff_t source_stride, int source_height, int alpha, + int beta, int x4, int ix4, int iy4, + int16_t intermediate_result[15][8]) { // Region 3 // At this point, we know ix4 - 7 < source_width - 1 and ix4 + 7 > 0. @@ -315,9 +322,9 @@ inline void WarpRegion3(const uint8_t* src, ptrdiff_t source_stride, } template <bool is_compound, typename DestType> -inline void WarpRegion4(const uint8_t* src, ptrdiff_t source_stride, int alpha, - int beta, int x4, int ix4, int iy4, - int16_t intermediate_result[15][8]) { +inline void WarpRegion4(const uint8_t* LIBGAV1_RESTRICT src, + ptrdiff_t source_stride, int alpha, int beta, int x4, + int ix4, int iy4, int16_t intermediate_result[15][8]) { // Region 4. // At this point, we know ix4 - 7 < source_width - 1 and ix4 + 7 > 0. @@ -351,12 +358,14 @@ inline void WarpRegion4(const uint8_t* src, ptrdiff_t source_stride, int alpha, } template <bool is_compound, typename DestType> -inline void HandleWarpBlock(const uint8_t* src, ptrdiff_t source_stride, - int source_width, int source_height, - const int* warp_params, int subsampling_x, - int subsampling_y, int src_x, int src_y, - int16_t alpha, int16_t beta, int16_t gamma, - int16_t delta, DestType* dst_row, +inline void HandleWarpBlock(const uint8_t* LIBGAV1_RESTRICT src, + ptrdiff_t source_stride, int source_width, + int source_height, + const int* LIBGAV1_RESTRICT warp_params, + int subsampling_x, int subsampling_y, int src_x, + int src_y, int16_t alpha, int16_t beta, + int16_t gamma, int16_t delta, + DestType* LIBGAV1_RESTRICT dst_row, ptrdiff_t dest_stride) { union { // Intermediate_result is the output of the horizontal filtering and @@ -460,11 +469,12 @@ inline void HandleWarpBlock(const uint8_t* src, ptrdiff_t source_stride, } template <bool is_compound> -void Warp_SSE4_1(const void* source, ptrdiff_t source_stride, int source_width, - int source_height, const int* warp_params, int subsampling_x, +void Warp_SSE4_1(const void* LIBGAV1_RESTRICT source, ptrdiff_t source_stride, + int source_width, int source_height, + const int* LIBGAV1_RESTRICT warp_params, int subsampling_x, int subsampling_y, int block_start_x, int block_start_y, int block_width, int block_height, int16_t alpha, int16_t beta, - int16_t gamma, int16_t delta, void* dest, + int16_t gamma, int16_t delta, void* LIBGAV1_RESTRICT dest, ptrdiff_t dest_stride) { const auto* const src = static_cast<const uint8_t*>(source); using DestType = |