aboutsummaryrefslogtreecommitdiff
path: root/src/dsp/x86/warp_sse4.cc
diff options
context:
space:
mode:
Diffstat (limited to 'src/dsp/x86/warp_sse4.cc')
-rw-r--r--src/dsp/x86/warp_sse4.cc66
1 files changed, 38 insertions, 28 deletions
diff --git a/src/dsp/x86/warp_sse4.cc b/src/dsp/x86/warp_sse4.cc
index 9ddfeac..5830894 100644
--- a/src/dsp/x86/warp_sse4.cc
+++ b/src/dsp/x86/warp_sse4.cc
@@ -101,7 +101,7 @@ inline void HorizontalFilter(const int sx4, const int16_t alpha,
template <bool is_compound>
inline void WriteVerticalFilter(const __m128i filter[8],
const int16_t intermediate_result[15][8], int y,
- void* dst_row) {
+ void* LIBGAV1_RESTRICT dst_row) {
constexpr int kRoundBitsVertical =
is_compound ? kInterRoundBitsCompoundVertical : kInterRoundBitsVertical;
__m128i sum_low = _mm_set1_epi32(kOffsetRemoval);
@@ -136,8 +136,9 @@ inline void WriteVerticalFilter(const __m128i filter[8],
template <bool is_compound>
inline void WriteVerticalFilter(const __m128i filter[8],
- const int16_t* intermediate_result_column,
- void* dst_row) {
+ const int16_t* LIBGAV1_RESTRICT
+ intermediate_result_column,
+ void* LIBGAV1_RESTRICT dst_row) {
constexpr int kRoundBitsVertical =
is_compound ? kInterRoundBitsCompoundVertical : kInterRoundBitsVertical;
__m128i sum_low = _mm_setzero_si128();
@@ -167,7 +168,7 @@ inline void WriteVerticalFilter(const __m128i filter[8],
template <bool is_compound, typename DestType>
inline void VerticalFilter(const int16_t source[15][8], int y4, int gamma,
- int delta, DestType* dest_row,
+ int delta, DestType* LIBGAV1_RESTRICT dest_row,
ptrdiff_t dest_stride) {
int sy4 = (y4 & ((1 << kWarpedModelPrecisionBits) - 1)) - MultiplyBy4(delta);
for (int y = 0; y < 8; ++y) {
@@ -187,8 +188,9 @@ inline void VerticalFilter(const int16_t source[15][8], int y4, int gamma,
}
template <bool is_compound, typename DestType>
-inline void VerticalFilter(const int16_t* source_cols, int y4, int gamma,
- int delta, DestType* dest_row,
+inline void VerticalFilter(const int16_t* LIBGAV1_RESTRICT source_cols, int y4,
+ int gamma, int delta,
+ DestType* LIBGAV1_RESTRICT dest_row,
ptrdiff_t dest_stride) {
int sy4 = (y4 & ((1 << kWarpedModelPrecisionBits) - 1)) - MultiplyBy4(delta);
for (int y = 0; y < 8; ++y) {
@@ -208,9 +210,11 @@ inline void VerticalFilter(const int16_t* source_cols, int y4, int gamma,
}
template <bool is_compound, typename DestType>
-inline void WarpRegion1(const uint8_t* src, ptrdiff_t source_stride,
- int source_width, int source_height, int ix4, int iy4,
- DestType* dst_row, ptrdiff_t dest_stride) {
+inline void WarpRegion1(const uint8_t* LIBGAV1_RESTRICT src,
+ ptrdiff_t source_stride, int source_width,
+ int source_height, int ix4, int iy4,
+ DestType* LIBGAV1_RESTRICT dst_row,
+ ptrdiff_t dest_stride) {
// Region 1
// Points to the left or right border of the first row of |src|.
const uint8_t* first_row_border =
@@ -244,10 +248,12 @@ inline void WarpRegion1(const uint8_t* src, ptrdiff_t source_stride,
}
template <bool is_compound, typename DestType>
-inline void WarpRegion2(const uint8_t* src, ptrdiff_t source_stride,
- int source_width, int y4, int ix4, int iy4, int gamma,
- int delta, int16_t intermediate_result_column[15],
- DestType* dst_row, ptrdiff_t dest_stride) {
+inline void WarpRegion2(const uint8_t* LIBGAV1_RESTRICT src,
+ ptrdiff_t source_stride, int source_width, int y4,
+ int ix4, int iy4, int gamma, int delta,
+ int16_t intermediate_result_column[15],
+ DestType* LIBGAV1_RESTRICT dst_row,
+ ptrdiff_t dest_stride) {
// Region 2.
// Points to the left or right border of the first row of |src|.
const uint8_t* first_row_border =
@@ -283,9 +289,10 @@ inline void WarpRegion2(const uint8_t* src, ptrdiff_t source_stride,
}
template <bool is_compound, typename DestType>
-inline void WarpRegion3(const uint8_t* src, ptrdiff_t source_stride,
- int source_height, int alpha, int beta, int x4, int ix4,
- int iy4, int16_t intermediate_result[15][8]) {
+inline void WarpRegion3(const uint8_t* LIBGAV1_RESTRICT src,
+ ptrdiff_t source_stride, int source_height, int alpha,
+ int beta, int x4, int ix4, int iy4,
+ int16_t intermediate_result[15][8]) {
// Region 3
// At this point, we know ix4 - 7 < source_width - 1 and ix4 + 7 > 0.
@@ -315,9 +322,9 @@ inline void WarpRegion3(const uint8_t* src, ptrdiff_t source_stride,
}
template <bool is_compound, typename DestType>
-inline void WarpRegion4(const uint8_t* src, ptrdiff_t source_stride, int alpha,
- int beta, int x4, int ix4, int iy4,
- int16_t intermediate_result[15][8]) {
+inline void WarpRegion4(const uint8_t* LIBGAV1_RESTRICT src,
+ ptrdiff_t source_stride, int alpha, int beta, int x4,
+ int ix4, int iy4, int16_t intermediate_result[15][8]) {
// Region 4.
// At this point, we know ix4 - 7 < source_width - 1 and ix4 + 7 > 0.
@@ -351,12 +358,14 @@ inline void WarpRegion4(const uint8_t* src, ptrdiff_t source_stride, int alpha,
}
template <bool is_compound, typename DestType>
-inline void HandleWarpBlock(const uint8_t* src, ptrdiff_t source_stride,
- int source_width, int source_height,
- const int* warp_params, int subsampling_x,
- int subsampling_y, int src_x, int src_y,
- int16_t alpha, int16_t beta, int16_t gamma,
- int16_t delta, DestType* dst_row,
+inline void HandleWarpBlock(const uint8_t* LIBGAV1_RESTRICT src,
+ ptrdiff_t source_stride, int source_width,
+ int source_height,
+ const int* LIBGAV1_RESTRICT warp_params,
+ int subsampling_x, int subsampling_y, int src_x,
+ int src_y, int16_t alpha, int16_t beta,
+ int16_t gamma, int16_t delta,
+ DestType* LIBGAV1_RESTRICT dst_row,
ptrdiff_t dest_stride) {
union {
// Intermediate_result is the output of the horizontal filtering and
@@ -460,11 +469,12 @@ inline void HandleWarpBlock(const uint8_t* src, ptrdiff_t source_stride,
}
template <bool is_compound>
-void Warp_SSE4_1(const void* source, ptrdiff_t source_stride, int source_width,
- int source_height, const int* warp_params, int subsampling_x,
+void Warp_SSE4_1(const void* LIBGAV1_RESTRICT source, ptrdiff_t source_stride,
+ int source_width, int source_height,
+ const int* LIBGAV1_RESTRICT warp_params, int subsampling_x,
int subsampling_y, int block_start_x, int block_start_y,
int block_width, int block_height, int16_t alpha, int16_t beta,
- int16_t gamma, int16_t delta, void* dest,
+ int16_t gamma, int16_t delta, void* LIBGAV1_RESTRICT dest,
ptrdiff_t dest_stride) {
const auto* const src = static_cast<const uint8_t*>(source);
using DestType =