diff options
Diffstat (limited to 'src/dsp/x86/warp_sse4.cc')
-rw-r--r-- | src/dsp/x86/warp_sse4.cc | 58 |
1 files changed, 29 insertions, 29 deletions
diff --git a/src/dsp/x86/warp_sse4.cc b/src/dsp/x86/warp_sse4.cc index 5830894..5498052 100644 --- a/src/dsp/x86/warp_sse4.cc +++ b/src/dsp/x86/warp_sse4.cc @@ -167,7 +167,7 @@ inline void WriteVerticalFilter(const __m128i filter[8], } template <bool is_compound, typename DestType> -inline void VerticalFilter(const int16_t source[15][8], int y4, int gamma, +inline void VerticalFilter(const int16_t source[15][8], int64_t y4, int gamma, int delta, DestType* LIBGAV1_RESTRICT dest_row, ptrdiff_t dest_stride) { int sy4 = (y4 & ((1 << kWarpedModelPrecisionBits) - 1)) - MultiplyBy4(delta); @@ -188,8 +188,8 @@ inline void VerticalFilter(const int16_t source[15][8], int y4, int gamma, } template <bool is_compound, typename DestType> -inline void VerticalFilter(const int16_t* LIBGAV1_RESTRICT source_cols, int y4, - int gamma, int delta, +inline void VerticalFilter(const int16_t* LIBGAV1_RESTRICT source_cols, + int64_t y4, int gamma, int delta, DestType* LIBGAV1_RESTRICT dest_row, ptrdiff_t dest_stride) { int sy4 = (y4 & ((1 << kWarpedModelPrecisionBits) - 1)) - MultiplyBy4(delta); @@ -249,7 +249,7 @@ inline void WarpRegion1(const uint8_t* LIBGAV1_RESTRICT src, template <bool is_compound, typename DestType> inline void WarpRegion2(const uint8_t* LIBGAV1_RESTRICT src, - ptrdiff_t source_stride, int source_width, int y4, + ptrdiff_t source_stride, int source_width, int64_t y4, int ix4, int iy4, int gamma, int delta, int16_t intermediate_result_column[15], DestType* LIBGAV1_RESTRICT dst_row, @@ -291,7 +291,7 @@ inline void WarpRegion2(const uint8_t* LIBGAV1_RESTRICT src, template <bool is_compound, typename DestType> inline void WarpRegion3(const uint8_t* LIBGAV1_RESTRICT src, ptrdiff_t source_stride, int source_height, int alpha, - int beta, int x4, int ix4, int iy4, + int beta, int64_t x4, int ix4, int iy4, int16_t intermediate_result[15][8]) { // Region 3 // At this point, we know ix4 - 7 < source_width - 1 and ix4 + 7 > 0. @@ -323,8 +323,9 @@ inline void WarpRegion3(const uint8_t* LIBGAV1_RESTRICT src, template <bool is_compound, typename DestType> inline void WarpRegion4(const uint8_t* LIBGAV1_RESTRICT src, - ptrdiff_t source_stride, int alpha, int beta, int x4, - int ix4, int iy4, int16_t intermediate_result[15][8]) { + ptrdiff_t source_stride, int alpha, int beta, + int64_t x4, int ix4, int iy4, + int16_t intermediate_result[15][8]) { // Region 4. // At this point, we know ix4 - 7 < source_width - 1 and ix4 + 7 > 0. @@ -379,14 +380,8 @@ inline void HandleWarpBlock(const uint8_t* LIBGAV1_RESTRICT src, int16_t intermediate_result_column[15]; }; - const int dst_x = - src_x * warp_params[2] + src_y * warp_params[3] + warp_params[0]; - const int dst_y = - src_x * warp_params[4] + src_y * warp_params[5] + warp_params[1]; - const int x4 = dst_x >> subsampling_x; - const int y4 = dst_y >> subsampling_y; - const int ix4 = x4 >> kWarpedModelPrecisionBits; - const int iy4 = y4 >> kWarpedModelPrecisionBits; + const WarpFilterParams filter_params = GetWarpFilterParams( + src_x, src_y, subsampling_x, subsampling_y, warp_params); // A prediction block may fall outside the frame's boundaries. If a // prediction block is calculated using only samples outside the frame's // boundary, the filtering can be simplified. We can divide the plane @@ -439,33 +434,38 @@ inline void HandleWarpBlock(const uint8_t* LIBGAV1_RESTRICT src, // border index (source_width - 1 or 0, respectively). Then for each x, // the inner for loop of the horizontal filter is reduced to multiplying // the border pixel by the sum of the filter coefficients. - if (ix4 - 7 >= source_width - 1 || ix4 + 7 <= 0) { - if ((iy4 - 7 >= source_height - 1 || iy4 + 7 <= 0)) { + if (filter_params.ix4 - 7 >= source_width - 1 || filter_params.ix4 + 7 <= 0) { + if ((filter_params.iy4 - 7 >= source_height - 1 || + filter_params.iy4 + 7 <= 0)) { // Outside the frame in both directions. One repeated value. - WarpRegion1<is_compound, DestType>(src, source_stride, source_width, - source_height, ix4, iy4, dst_row, - dest_stride); + WarpRegion1<is_compound, DestType>( + src, source_stride, source_width, source_height, filter_params.ix4, + filter_params.iy4, dst_row, dest_stride); return; } // Outside the frame horizontally. Rows repeated. WarpRegion2<is_compound, DestType>( - src, source_stride, source_width, y4, ix4, iy4, gamma, delta, - intermediate_result_column, dst_row, dest_stride); + src, source_stride, source_width, filter_params.y4, filter_params.ix4, + filter_params.iy4, gamma, delta, intermediate_result_column, dst_row, + dest_stride); return; } - if ((iy4 - 7 >= source_height - 1 || iy4 + 7 <= 0)) { + if ((filter_params.iy4 - 7 >= source_height - 1 || + filter_params.iy4 + 7 <= 0)) { // Outside the frame vertically. - WarpRegion3<is_compound, DestType>(src, source_stride, source_height, alpha, - beta, x4, ix4, iy4, intermediate_result); + WarpRegion3<is_compound, DestType>( + src, source_stride, source_height, alpha, beta, filter_params.x4, + filter_params.ix4, filter_params.iy4, intermediate_result); } else { // Inside the frame. - WarpRegion4<is_compound, DestType>(src, source_stride, alpha, beta, x4, ix4, - iy4, intermediate_result); + WarpRegion4<is_compound, DestType>(src, source_stride, alpha, beta, + filter_params.x4, filter_params.ix4, + filter_params.iy4, intermediate_result); } // Region 3 and 4 vertical filter. - VerticalFilter<is_compound, DestType>(intermediate_result, y4, gamma, delta, - dst_row, dest_stride); + VerticalFilter<is_compound, DestType>(intermediate_result, filter_params.y4, + gamma, delta, dst_row, dest_stride); } template <bool is_compound> |