aboutsummaryrefslogtreecommitdiff
path: root/src/dsp/x86/weight_mask_sse4.cc
diff options
context:
space:
mode:
Diffstat (limited to 'src/dsp/x86/weight_mask_sse4.cc')
-rw-r--r--src/dsp/x86/weight_mask_sse4.cc360
1 files changed, 182 insertions, 178 deletions
diff --git a/src/dsp/x86/weight_mask_sse4.cc b/src/dsp/x86/weight_mask_sse4.cc
index 69cb784..53a374d 100644
--- a/src/dsp/x86/weight_mask_sse4.cc
+++ b/src/dsp/x86/weight_mask_sse4.cc
@@ -37,10 +37,10 @@ namespace {
constexpr int kRoundingBits8bpp = 4;
template <bool mask_is_inverse, bool is_store_16>
-inline void WeightMask16_SSE4(const int16_t* LIBGAV1_RESTRICT prediction_0,
- const int16_t* LIBGAV1_RESTRICT prediction_1,
- uint8_t* LIBGAV1_RESTRICT mask,
- ptrdiff_t mask_stride) {
+inline void WeightMask16_SSE4_1(const int16_t* LIBGAV1_RESTRICT prediction_0,
+ const int16_t* LIBGAV1_RESTRICT prediction_1,
+ uint8_t* LIBGAV1_RESTRICT mask,
+ ptrdiff_t mask_stride) {
const __m128i pred_00 = LoadAligned16(prediction_0);
const __m128i pred_10 = LoadAligned16(prediction_1);
const __m128i difference_0 = RightShiftWithRounding_U16(
@@ -78,7 +78,7 @@ inline void WeightMask16_SSE4(const int16_t* LIBGAV1_RESTRICT prediction_0,
}
#define WEIGHT8_PAIR_WITHOUT_STRIDE \
- WeightMask16_SSE4<mask_is_inverse, false>(pred_0, pred_1, mask, mask_stride)
+ WeightMask16_SSE4_1<mask_is_inverse, false>(pred_0, pred_1, mask, mask_stride)
#define WEIGHT8_PAIR_AND_STRIDE \
WEIGHT8_PAIR_WITHOUT_STRIDE; \
@@ -87,9 +87,10 @@ inline void WeightMask16_SSE4(const int16_t* LIBGAV1_RESTRICT prediction_0,
mask += mask_stride << 1
template <bool mask_is_inverse>
-void WeightMask8x8_SSE4(const void* LIBGAV1_RESTRICT prediction_0,
- const void* LIBGAV1_RESTRICT prediction_1,
- uint8_t* LIBGAV1_RESTRICT mask, ptrdiff_t mask_stride) {
+void WeightMask8x8_SSE4_1(const void* LIBGAV1_RESTRICT prediction_0,
+ const void* LIBGAV1_RESTRICT prediction_1,
+ uint8_t* LIBGAV1_RESTRICT mask,
+ ptrdiff_t mask_stride) {
const auto* pred_0 = static_cast<const int16_t*>(prediction_0);
const auto* pred_1 = static_cast<const int16_t*>(prediction_1);
@@ -100,10 +101,10 @@ void WeightMask8x8_SSE4(const void* LIBGAV1_RESTRICT prediction_0,
}
template <bool mask_is_inverse>
-void WeightMask8x16_SSE4(const void* LIBGAV1_RESTRICT prediction_0,
- const void* LIBGAV1_RESTRICT prediction_1,
- uint8_t* LIBGAV1_RESTRICT mask,
- ptrdiff_t mask_stride) {
+void WeightMask8x16_SSE4_1(const void* LIBGAV1_RESTRICT prediction_0,
+ const void* LIBGAV1_RESTRICT prediction_1,
+ uint8_t* LIBGAV1_RESTRICT mask,
+ ptrdiff_t mask_stride) {
const auto* pred_0 = static_cast<const int16_t*>(prediction_0);
const auto* pred_1 = static_cast<const int16_t*>(prediction_1);
int y3 = 3;
@@ -116,10 +117,10 @@ void WeightMask8x16_SSE4(const void* LIBGAV1_RESTRICT prediction_0,
}
template <bool mask_is_inverse>
-void WeightMask8x32_SSE4(const void* LIBGAV1_RESTRICT prediction_0,
- const void* LIBGAV1_RESTRICT prediction_1,
- uint8_t* LIBGAV1_RESTRICT mask,
- ptrdiff_t mask_stride) {
+void WeightMask8x32_SSE4_1(const void* LIBGAV1_RESTRICT prediction_0,
+ const void* LIBGAV1_RESTRICT prediction_1,
+ uint8_t* LIBGAV1_RESTRICT mask,
+ ptrdiff_t mask_stride) {
const auto* pred_0 = static_cast<const int16_t*>(prediction_0);
const auto* pred_1 = static_cast<const int16_t*>(prediction_1);
int y5 = 5;
@@ -132,7 +133,7 @@ void WeightMask8x32_SSE4(const void* LIBGAV1_RESTRICT prediction_0,
}
#define WEIGHT16_WITHOUT_STRIDE \
- WeightMask16_SSE4<mask_is_inverse, true>(pred_0, pred_1, mask, mask_stride)
+ WeightMask16_SSE4_1<mask_is_inverse, true>(pred_0, pred_1, mask, mask_stride)
#define WEIGHT16_AND_STRIDE \
WEIGHT16_WITHOUT_STRIDE; \
@@ -141,10 +142,10 @@ void WeightMask8x32_SSE4(const void* LIBGAV1_RESTRICT prediction_0,
mask += mask_stride
template <bool mask_is_inverse>
-void WeightMask16x8_SSE4(const void* LIBGAV1_RESTRICT prediction_0,
- const void* LIBGAV1_RESTRICT prediction_1,
- uint8_t* LIBGAV1_RESTRICT mask,
- ptrdiff_t mask_stride) {
+void WeightMask16x8_SSE4_1(const void* LIBGAV1_RESTRICT prediction_0,
+ const void* LIBGAV1_RESTRICT prediction_1,
+ uint8_t* LIBGAV1_RESTRICT mask,
+ ptrdiff_t mask_stride) {
const auto* pred_0 = static_cast<const int16_t*>(prediction_0);
const auto* pred_1 = static_cast<const int16_t*>(prediction_1);
int y = 7;
@@ -155,10 +156,10 @@ void WeightMask16x8_SSE4(const void* LIBGAV1_RESTRICT prediction_0,
}
template <bool mask_is_inverse>
-void WeightMask16x16_SSE4(const void* LIBGAV1_RESTRICT prediction_0,
- const void* LIBGAV1_RESTRICT prediction_1,
- uint8_t* LIBGAV1_RESTRICT mask,
- ptrdiff_t mask_stride) {
+void WeightMask16x16_SSE4_1(const void* LIBGAV1_RESTRICT prediction_0,
+ const void* LIBGAV1_RESTRICT prediction_1,
+ uint8_t* LIBGAV1_RESTRICT mask,
+ ptrdiff_t mask_stride) {
const auto* pred_0 = static_cast<const int16_t*>(prediction_0);
const auto* pred_1 = static_cast<const int16_t*>(prediction_1);
int y3 = 5;
@@ -171,10 +172,10 @@ void WeightMask16x16_SSE4(const void* LIBGAV1_RESTRICT prediction_0,
}
template <bool mask_is_inverse>
-void WeightMask16x32_SSE4(const void* LIBGAV1_RESTRICT prediction_0,
- const void* LIBGAV1_RESTRICT prediction_1,
- uint8_t* LIBGAV1_RESTRICT mask,
- ptrdiff_t mask_stride) {
+void WeightMask16x32_SSE4_1(const void* LIBGAV1_RESTRICT prediction_0,
+ const void* LIBGAV1_RESTRICT prediction_1,
+ uint8_t* LIBGAV1_RESTRICT mask,
+ ptrdiff_t mask_stride) {
const auto* pred_0 = static_cast<const int16_t*>(prediction_0);
const auto* pred_1 = static_cast<const int16_t*>(prediction_1);
int y5 = 6;
@@ -190,10 +191,10 @@ void WeightMask16x32_SSE4(const void* LIBGAV1_RESTRICT prediction_0,
}
template <bool mask_is_inverse>
-void WeightMask16x64_SSE4(const void* LIBGAV1_RESTRICT prediction_0,
- const void* LIBGAV1_RESTRICT prediction_1,
- uint8_t* LIBGAV1_RESTRICT mask,
- ptrdiff_t mask_stride) {
+void WeightMask16x64_SSE4_1(const void* LIBGAV1_RESTRICT prediction_0,
+ const void* LIBGAV1_RESTRICT prediction_1,
+ uint8_t* LIBGAV1_RESTRICT mask,
+ ptrdiff_t mask_stride) {
const auto* pred_0 = static_cast<const int16_t*>(prediction_0);
const auto* pred_1 = static_cast<const int16_t*>(prediction_1);
int y3 = 21;
@@ -205,10 +206,11 @@ void WeightMask16x64_SSE4(const void* LIBGAV1_RESTRICT prediction_0,
WEIGHT16_WITHOUT_STRIDE;
}
-#define WEIGHT32_WITHOUT_STRIDE \
- WeightMask16_SSE4<mask_is_inverse, true>(pred_0, pred_1, mask, mask_stride); \
- WeightMask16_SSE4<mask_is_inverse, true>(pred_0 + 16, pred_1 + 16, \
- mask + 16, mask_stride)
+#define WEIGHT32_WITHOUT_STRIDE \
+ WeightMask16_SSE4_1<mask_is_inverse, true>(pred_0, pred_1, mask, \
+ mask_stride); \
+ WeightMask16_SSE4_1<mask_is_inverse, true>(pred_0 + 16, pred_1 + 16, \
+ mask + 16, mask_stride)
#define WEIGHT32_AND_STRIDE \
WEIGHT32_WITHOUT_STRIDE; \
@@ -217,10 +219,10 @@ void WeightMask16x64_SSE4(const void* LIBGAV1_RESTRICT prediction_0,
mask += mask_stride
template <bool mask_is_inverse>
-void WeightMask32x8_SSE4(const void* LIBGAV1_RESTRICT prediction_0,
- const void* LIBGAV1_RESTRICT prediction_1,
- uint8_t* LIBGAV1_RESTRICT mask,
- ptrdiff_t mask_stride) {
+void WeightMask32x8_SSE4_1(const void* LIBGAV1_RESTRICT prediction_0,
+ const void* LIBGAV1_RESTRICT prediction_1,
+ uint8_t* LIBGAV1_RESTRICT mask,
+ ptrdiff_t mask_stride) {
const auto* pred_0 = static_cast<const int16_t*>(prediction_0);
const auto* pred_1 = static_cast<const int16_t*>(prediction_1);
WEIGHT32_AND_STRIDE;
@@ -234,10 +236,10 @@ void WeightMask32x8_SSE4(const void* LIBGAV1_RESTRICT prediction_0,
}
template <bool mask_is_inverse>
-void WeightMask32x16_SSE4(const void* LIBGAV1_RESTRICT prediction_0,
- const void* LIBGAV1_RESTRICT prediction_1,
- uint8_t* LIBGAV1_RESTRICT mask,
- ptrdiff_t mask_stride) {
+void WeightMask32x16_SSE4_1(const void* LIBGAV1_RESTRICT prediction_0,
+ const void* LIBGAV1_RESTRICT prediction_1,
+ uint8_t* LIBGAV1_RESTRICT mask,
+ ptrdiff_t mask_stride) {
const auto* pred_0 = static_cast<const int16_t*>(prediction_0);
const auto* pred_1 = static_cast<const int16_t*>(prediction_1);
int y3 = 5;
@@ -250,10 +252,10 @@ void WeightMask32x16_SSE4(const void* LIBGAV1_RESTRICT prediction_0,
}
template <bool mask_is_inverse>
-void WeightMask32x32_SSE4(const void* LIBGAV1_RESTRICT prediction_0,
- const void* LIBGAV1_RESTRICT prediction_1,
- uint8_t* LIBGAV1_RESTRICT mask,
- ptrdiff_t mask_stride) {
+void WeightMask32x32_SSE4_1(const void* LIBGAV1_RESTRICT prediction_0,
+ const void* LIBGAV1_RESTRICT prediction_1,
+ uint8_t* LIBGAV1_RESTRICT mask,
+ ptrdiff_t mask_stride) {
const auto* pred_0 = static_cast<const int16_t*>(prediction_0);
const auto* pred_1 = static_cast<const int16_t*>(prediction_1);
int y5 = 6;
@@ -269,10 +271,10 @@ void WeightMask32x32_SSE4(const void* LIBGAV1_RESTRICT prediction_0,
}
template <bool mask_is_inverse>
-void WeightMask32x64_SSE4(const void* LIBGAV1_RESTRICT prediction_0,
- const void* LIBGAV1_RESTRICT prediction_1,
- uint8_t* LIBGAV1_RESTRICT mask,
- ptrdiff_t mask_stride) {
+void WeightMask32x64_SSE4_1(const void* LIBGAV1_RESTRICT prediction_0,
+ const void* LIBGAV1_RESTRICT prediction_1,
+ uint8_t* LIBGAV1_RESTRICT mask,
+ ptrdiff_t mask_stride) {
const auto* pred_0 = static_cast<const int16_t*>(prediction_0);
const auto* pred_1 = static_cast<const int16_t*>(prediction_1);
int y3 = 21;
@@ -284,14 +286,15 @@ void WeightMask32x64_SSE4(const void* LIBGAV1_RESTRICT prediction_0,
WEIGHT32_WITHOUT_STRIDE;
}
-#define WEIGHT64_WITHOUT_STRIDE \
- WeightMask16_SSE4<mask_is_inverse, true>(pred_0, pred_1, mask, mask_stride); \
- WeightMask16_SSE4<mask_is_inverse, true>(pred_0 + 16, pred_1 + 16, \
- mask + 16, mask_stride); \
- WeightMask16_SSE4<mask_is_inverse, true>(pred_0 + 32, pred_1 + 32, \
- mask + 32, mask_stride); \
- WeightMask16_SSE4<mask_is_inverse, true>(pred_0 + 48, pred_1 + 48, \
- mask + 48, mask_stride)
+#define WEIGHT64_WITHOUT_STRIDE \
+ WeightMask16_SSE4_1<mask_is_inverse, true>(pred_0, pred_1, mask, \
+ mask_stride); \
+ WeightMask16_SSE4_1<mask_is_inverse, true>(pred_0 + 16, pred_1 + 16, \
+ mask + 16, mask_stride); \
+ WeightMask16_SSE4_1<mask_is_inverse, true>(pred_0 + 32, pred_1 + 32, \
+ mask + 32, mask_stride); \
+ WeightMask16_SSE4_1<mask_is_inverse, true>(pred_0 + 48, pred_1 + 48, \
+ mask + 48, mask_stride)
#define WEIGHT64_AND_STRIDE \
WEIGHT64_WITHOUT_STRIDE; \
@@ -300,10 +303,10 @@ void WeightMask32x64_SSE4(const void* LIBGAV1_RESTRICT prediction_0,
mask += mask_stride
template <bool mask_is_inverse>
-void WeightMask64x16_SSE4(const void* LIBGAV1_RESTRICT prediction_0,
- const void* LIBGAV1_RESTRICT prediction_1,
- uint8_t* LIBGAV1_RESTRICT mask,
- ptrdiff_t mask_stride) {
+void WeightMask64x16_SSE4_1(const void* LIBGAV1_RESTRICT prediction_0,
+ const void* LIBGAV1_RESTRICT prediction_1,
+ uint8_t* LIBGAV1_RESTRICT mask,
+ ptrdiff_t mask_stride) {
const auto* pred_0 = static_cast<const int16_t*>(prediction_0);
const auto* pred_1 = static_cast<const int16_t*>(prediction_1);
int y3 = 0;
@@ -316,10 +319,10 @@ void WeightMask64x16_SSE4(const void* LIBGAV1_RESTRICT prediction_0,
}
template <bool mask_is_inverse>
-void WeightMask64x32_SSE4(const void* LIBGAV1_RESTRICT prediction_0,
- const void* LIBGAV1_RESTRICT prediction_1,
- uint8_t* LIBGAV1_RESTRICT mask,
- ptrdiff_t mask_stride) {
+void WeightMask64x32_SSE4_1(const void* LIBGAV1_RESTRICT prediction_0,
+ const void* LIBGAV1_RESTRICT prediction_1,
+ uint8_t* LIBGAV1_RESTRICT mask,
+ ptrdiff_t mask_stride) {
const auto* pred_0 = static_cast<const int16_t*>(prediction_0);
const auto* pred_1 = static_cast<const int16_t*>(prediction_1);
int y5 = 0;
@@ -335,10 +338,10 @@ void WeightMask64x32_SSE4(const void* LIBGAV1_RESTRICT prediction_0,
}
template <bool mask_is_inverse>
-void WeightMask64x64_SSE4(const void* LIBGAV1_RESTRICT prediction_0,
- const void* LIBGAV1_RESTRICT prediction_1,
- uint8_t* LIBGAV1_RESTRICT mask,
- ptrdiff_t mask_stride) {
+void WeightMask64x64_SSE4_1(const void* LIBGAV1_RESTRICT prediction_0,
+ const void* LIBGAV1_RESTRICT prediction_1,
+ uint8_t* LIBGAV1_RESTRICT mask,
+ ptrdiff_t mask_stride) {
const auto* pred_0 = static_cast<const int16_t*>(prediction_0);
const auto* pred_1 = static_cast<const int16_t*>(prediction_1);
int y3 = 0;
@@ -351,10 +354,10 @@ void WeightMask64x64_SSE4(const void* LIBGAV1_RESTRICT prediction_0,
}
template <bool mask_is_inverse>
-void WeightMask64x128_SSE4(const void* LIBGAV1_RESTRICT prediction_0,
- const void* LIBGAV1_RESTRICT prediction_1,
- uint8_t* LIBGAV1_RESTRICT mask,
- ptrdiff_t mask_stride) {
+void WeightMask64x128_SSE4_1(const void* LIBGAV1_RESTRICT prediction_0,
+ const void* LIBGAV1_RESTRICT prediction_1,
+ uint8_t* LIBGAV1_RESTRICT mask,
+ ptrdiff_t mask_stride) {
const auto* pred_0 = static_cast<const int16_t*>(prediction_0);
const auto* pred_1 = static_cast<const int16_t*>(prediction_1);
int y3 = 0;
@@ -368,10 +371,10 @@ void WeightMask64x128_SSE4(const void* LIBGAV1_RESTRICT prediction_0,
}
template <bool mask_is_inverse>
-void WeightMask128x64_SSE4(const void* LIBGAV1_RESTRICT prediction_0,
- const void* LIBGAV1_RESTRICT prediction_1,
- uint8_t* LIBGAV1_RESTRICT mask,
- ptrdiff_t mask_stride) {
+void WeightMask128x64_SSE4_1(const void* LIBGAV1_RESTRICT prediction_0,
+ const void* LIBGAV1_RESTRICT prediction_1,
+ uint8_t* LIBGAV1_RESTRICT mask,
+ ptrdiff_t mask_stride) {
const auto* pred_0 = static_cast<const int16_t*>(prediction_0);
const auto* pred_1 = static_cast<const int16_t*>(prediction_1);
int y3 = 0;
@@ -412,10 +415,10 @@ void WeightMask128x64_SSE4(const void* LIBGAV1_RESTRICT prediction_0,
}
template <bool mask_is_inverse>
-void WeightMask128x128_SSE4(const void* LIBGAV1_RESTRICT prediction_0,
- const void* LIBGAV1_RESTRICT prediction_1,
- uint8_t* LIBGAV1_RESTRICT mask,
- ptrdiff_t mask_stride) {
+void WeightMask128x128_SSE4_1(const void* LIBGAV1_RESTRICT prediction_0,
+ const void* LIBGAV1_RESTRICT prediction_1,
+ uint8_t* LIBGAV1_RESTRICT mask,
+ ptrdiff_t mask_stride) {
const auto* pred_0 = static_cast<const int16_t*>(prediction_0);
const auto* pred_1 = static_cast<const int16_t*>(prediction_1);
int y3 = 0;
@@ -466,8 +469,9 @@ void WeightMask128x128_SSE4(const void* LIBGAV1_RESTRICT prediction_0,
#define INIT_WEIGHT_MASK_8BPP(width, height, w_index, h_index) \
dsp->weight_mask[w_index][h_index][0] = \
- WeightMask##width##x##height##_SSE4<0>; \
- dsp->weight_mask[w_index][h_index][1] = WeightMask##width##x##height##_SSE4<1>
+ WeightMask##width##x##height##_SSE4_1<0>; \
+ dsp->weight_mask[w_index][h_index][1] = \
+ WeightMask##width##x##height##_SSE4_1<1>
void Init8bpp() {
Dsp* const dsp = dsp_internal::GetWritableDspTable(kBitdepth8);
assert(dsp != nullptr);
@@ -501,7 +505,7 @@ constexpr int kRoundingBits10bpp = 6;
constexpr int kScaledDiffShift = 4;
template <bool mask_is_inverse, bool is_store_16>
-inline void WeightMask16_10bpp_SSE4(
+inline void WeightMask16_10bpp_SSE4_1(
const uint16_t* LIBGAV1_RESTRICT prediction_0,
const uint16_t* LIBGAV1_RESTRICT prediction_1,
uint8_t* LIBGAV1_RESTRICT mask, ptrdiff_t mask_stride) {
@@ -562,9 +566,9 @@ inline void WeightMask16_10bpp_SSE4(
}
}
-#define WEIGHT8_PAIR_WITHOUT_STRIDE_10BPP \
- WeightMask16_10bpp_SSE4<mask_is_inverse, false>(pred_0, pred_1, mask, \
- mask_stride)
+#define WEIGHT8_PAIR_WITHOUT_STRIDE_10BPP \
+ WeightMask16_10bpp_SSE4_1<mask_is_inverse, false>(pred_0, pred_1, mask, \
+ mask_stride)
#define WEIGHT8_PAIR_AND_STRIDE_10BPP \
WEIGHT8_PAIR_WITHOUT_STRIDE_10BPP; \
@@ -573,10 +577,10 @@ inline void WeightMask16_10bpp_SSE4(
mask += mask_stride << 1
template <bool mask_is_inverse>
-void WeightMask8x8_10bpp_SSE4(const void* LIBGAV1_RESTRICT prediction_0,
- const void* LIBGAV1_RESTRICT prediction_1,
- uint8_t* LIBGAV1_RESTRICT mask,
- ptrdiff_t mask_stride) {
+void WeightMask8x8_10bpp_SSE4_1(const void* LIBGAV1_RESTRICT prediction_0,
+ const void* LIBGAV1_RESTRICT prediction_1,
+ uint8_t* LIBGAV1_RESTRICT mask,
+ ptrdiff_t mask_stride) {
const auto* pred_0 = static_cast<const uint16_t*>(prediction_0);
const auto* pred_1 = static_cast<const uint16_t*>(prediction_1);
@@ -587,10 +591,10 @@ void WeightMask8x8_10bpp_SSE4(const void* LIBGAV1_RESTRICT prediction_0,
}
template <bool mask_is_inverse>
-void WeightMask8x16_10bpp_SSE4(const void* LIBGAV1_RESTRICT prediction_0,
- const void* LIBGAV1_RESTRICT prediction_1,
- uint8_t* LIBGAV1_RESTRICT mask,
- ptrdiff_t mask_stride) {
+void WeightMask8x16_10bpp_SSE4_1(const void* LIBGAV1_RESTRICT prediction_0,
+ const void* LIBGAV1_RESTRICT prediction_1,
+ uint8_t* LIBGAV1_RESTRICT mask,
+ ptrdiff_t mask_stride) {
const auto* pred_0 = static_cast<const uint16_t*>(prediction_0);
const auto* pred_1 = static_cast<const uint16_t*>(prediction_1);
int y3 = 3;
@@ -603,10 +607,10 @@ void WeightMask8x16_10bpp_SSE4(const void* LIBGAV1_RESTRICT prediction_0,
}
template <bool mask_is_inverse>
-void WeightMask8x32_10bpp_SSE4(const void* LIBGAV1_RESTRICT prediction_0,
- const void* LIBGAV1_RESTRICT prediction_1,
- uint8_t* LIBGAV1_RESTRICT mask,
- ptrdiff_t mask_stride) {
+void WeightMask8x32_10bpp_SSE4_1(const void* LIBGAV1_RESTRICT prediction_0,
+ const void* LIBGAV1_RESTRICT prediction_1,
+ uint8_t* LIBGAV1_RESTRICT mask,
+ ptrdiff_t mask_stride) {
const auto* pred_0 = static_cast<const uint16_t*>(prediction_0);
const auto* pred_1 = static_cast<const uint16_t*>(prediction_1);
int y5 = 5;
@@ -618,9 +622,9 @@ void WeightMask8x32_10bpp_SSE4(const void* LIBGAV1_RESTRICT prediction_0,
WEIGHT8_PAIR_WITHOUT_STRIDE_10BPP;
}
-#define WEIGHT16_WITHOUT_STRIDE_10BPP \
- WeightMask16_10bpp_SSE4<mask_is_inverse, true>(pred_0, pred_1, mask, \
- mask_stride)
+#define WEIGHT16_WITHOUT_STRIDE_10BPP \
+ WeightMask16_10bpp_SSE4_1<mask_is_inverse, true>(pred_0, pred_1, mask, \
+ mask_stride)
#define WEIGHT16_AND_STRIDE_10BPP \
WEIGHT16_WITHOUT_STRIDE_10BPP; \
@@ -629,10 +633,10 @@ void WeightMask8x32_10bpp_SSE4(const void* LIBGAV1_RESTRICT prediction_0,
mask += mask_stride
template <bool mask_is_inverse>
-void WeightMask16x8_10bpp_SSE4(const void* LIBGAV1_RESTRICT prediction_0,
- const void* LIBGAV1_RESTRICT prediction_1,
- uint8_t* LIBGAV1_RESTRICT mask,
- ptrdiff_t mask_stride) {
+void WeightMask16x8_10bpp_SSE4_1(const void* LIBGAV1_RESTRICT prediction_0,
+ const void* LIBGAV1_RESTRICT prediction_1,
+ uint8_t* LIBGAV1_RESTRICT mask,
+ ptrdiff_t mask_stride) {
const auto* pred_0 = static_cast<const uint16_t*>(prediction_0);
const auto* pred_1 = static_cast<const uint16_t*>(prediction_1);
int y = 7;
@@ -643,10 +647,10 @@ void WeightMask16x8_10bpp_SSE4(const void* LIBGAV1_RESTRICT prediction_0,
}
template <bool mask_is_inverse>
-void WeightMask16x16_10bpp_SSE4(const void* LIBGAV1_RESTRICT prediction_0,
- const void* LIBGAV1_RESTRICT prediction_1,
- uint8_t* LIBGAV1_RESTRICT mask,
- ptrdiff_t mask_stride) {
+void WeightMask16x16_10bpp_SSE4_1(const void* LIBGAV1_RESTRICT prediction_0,
+ const void* LIBGAV1_RESTRICT prediction_1,
+ uint8_t* LIBGAV1_RESTRICT mask,
+ ptrdiff_t mask_stride) {
const auto* pred_0 = static_cast<const uint16_t*>(prediction_0);
const auto* pred_1 = static_cast<const uint16_t*>(prediction_1);
int y3 = 5;
@@ -659,10 +663,10 @@ void WeightMask16x16_10bpp_SSE4(const void* LIBGAV1_RESTRICT prediction_0,
}
template <bool mask_is_inverse>
-void WeightMask16x32_10bpp_SSE4(const void* LIBGAV1_RESTRICT prediction_0,
- const void* LIBGAV1_RESTRICT prediction_1,
- uint8_t* LIBGAV1_RESTRICT mask,
- ptrdiff_t mask_stride) {
+void WeightMask16x32_10bpp_SSE4_1(const void* LIBGAV1_RESTRICT prediction_0,
+ const void* LIBGAV1_RESTRICT prediction_1,
+ uint8_t* LIBGAV1_RESTRICT mask,
+ ptrdiff_t mask_stride) {
const auto* pred_0 = static_cast<const uint16_t*>(prediction_0);
const auto* pred_1 = static_cast<const uint16_t*>(prediction_1);
int y5 = 6;
@@ -678,10 +682,10 @@ void WeightMask16x32_10bpp_SSE4(const void* LIBGAV1_RESTRICT prediction_0,
}
template <bool mask_is_inverse>
-void WeightMask16x64_10bpp_SSE4(const void* LIBGAV1_RESTRICT prediction_0,
- const void* LIBGAV1_RESTRICT prediction_1,
- uint8_t* LIBGAV1_RESTRICT mask,
- ptrdiff_t mask_stride) {
+void WeightMask16x64_10bpp_SSE4_1(const void* LIBGAV1_RESTRICT prediction_0,
+ const void* LIBGAV1_RESTRICT prediction_1,
+ uint8_t* LIBGAV1_RESTRICT mask,
+ ptrdiff_t mask_stride) {
const auto* pred_0 = static_cast<const uint16_t*>(prediction_0);
const auto* pred_1 = static_cast<const uint16_t*>(prediction_1);
int y3 = 21;
@@ -693,11 +697,11 @@ void WeightMask16x64_10bpp_SSE4(const void* LIBGAV1_RESTRICT prediction_0,
WEIGHT16_WITHOUT_STRIDE_10BPP;
}
-#define WEIGHT32_WITHOUT_STRIDE_10BPP \
- WeightMask16_10bpp_SSE4<mask_is_inverse, true>(pred_0, pred_1, mask, \
- mask_stride); \
- WeightMask16_10bpp_SSE4<mask_is_inverse, true>(pred_0 + 16, pred_1 + 16, \
- mask + 16, mask_stride)
+#define WEIGHT32_WITHOUT_STRIDE_10BPP \
+ WeightMask16_10bpp_SSE4_1<mask_is_inverse, true>(pred_0, pred_1, mask, \
+ mask_stride); \
+ WeightMask16_10bpp_SSE4_1<mask_is_inverse, true>(pred_0 + 16, pred_1 + 16, \
+ mask + 16, mask_stride)
#define WEIGHT32_AND_STRIDE_10BPP \
WEIGHT32_WITHOUT_STRIDE_10BPP; \
@@ -706,10 +710,10 @@ void WeightMask16x64_10bpp_SSE4(const void* LIBGAV1_RESTRICT prediction_0,
mask += mask_stride
template <bool mask_is_inverse>
-void WeightMask32x8_10bpp_SSE4(const void* LIBGAV1_RESTRICT prediction_0,
- const void* LIBGAV1_RESTRICT prediction_1,
- uint8_t* LIBGAV1_RESTRICT mask,
- ptrdiff_t mask_stride) {
+void WeightMask32x8_10bpp_SSE4_1(const void* LIBGAV1_RESTRICT prediction_0,
+ const void* LIBGAV1_RESTRICT prediction_1,
+ uint8_t* LIBGAV1_RESTRICT mask,
+ ptrdiff_t mask_stride) {
const auto* pred_0 = static_cast<const uint16_t*>(prediction_0);
const auto* pred_1 = static_cast<const uint16_t*>(prediction_1);
WEIGHT32_AND_STRIDE_10BPP;
@@ -723,10 +727,10 @@ void WeightMask32x8_10bpp_SSE4(const void* LIBGAV1_RESTRICT prediction_0,
}
template <bool mask_is_inverse>
-void WeightMask32x16_10bpp_SSE4(const void* LIBGAV1_RESTRICT prediction_0,
- const void* LIBGAV1_RESTRICT prediction_1,
- uint8_t* LIBGAV1_RESTRICT mask,
- ptrdiff_t mask_stride) {
+void WeightMask32x16_10bpp_SSE4_1(const void* LIBGAV1_RESTRICT prediction_0,
+ const void* LIBGAV1_RESTRICT prediction_1,
+ uint8_t* LIBGAV1_RESTRICT mask,
+ ptrdiff_t mask_stride) {
const auto* pred_0 = static_cast<const uint16_t*>(prediction_0);
const auto* pred_1 = static_cast<const uint16_t*>(prediction_1);
int y3 = 5;
@@ -739,10 +743,10 @@ void WeightMask32x16_10bpp_SSE4(const void* LIBGAV1_RESTRICT prediction_0,
}
template <bool mask_is_inverse>
-void WeightMask32x32_10bpp_SSE4(const void* LIBGAV1_RESTRICT prediction_0,
- const void* LIBGAV1_RESTRICT prediction_1,
- uint8_t* LIBGAV1_RESTRICT mask,
- ptrdiff_t mask_stride) {
+void WeightMask32x32_10bpp_SSE4_1(const void* LIBGAV1_RESTRICT prediction_0,
+ const void* LIBGAV1_RESTRICT prediction_1,
+ uint8_t* LIBGAV1_RESTRICT mask,
+ ptrdiff_t mask_stride) {
const auto* pred_0 = static_cast<const uint16_t*>(prediction_0);
const auto* pred_1 = static_cast<const uint16_t*>(prediction_1);
int y5 = 6;
@@ -758,10 +762,10 @@ void WeightMask32x32_10bpp_SSE4(const void* LIBGAV1_RESTRICT prediction_0,
}
template <bool mask_is_inverse>
-void WeightMask32x64_10bpp_SSE4(const void* LIBGAV1_RESTRICT prediction_0,
- const void* LIBGAV1_RESTRICT prediction_1,
- uint8_t* LIBGAV1_RESTRICT mask,
- ptrdiff_t mask_stride) {
+void WeightMask32x64_10bpp_SSE4_1(const void* LIBGAV1_RESTRICT prediction_0,
+ const void* LIBGAV1_RESTRICT prediction_1,
+ uint8_t* LIBGAV1_RESTRICT mask,
+ ptrdiff_t mask_stride) {
const auto* pred_0 = static_cast<const uint16_t*>(prediction_0);
const auto* pred_1 = static_cast<const uint16_t*>(prediction_1);
int y3 = 21;
@@ -773,15 +777,15 @@ void WeightMask32x64_10bpp_SSE4(const void* LIBGAV1_RESTRICT prediction_0,
WEIGHT32_WITHOUT_STRIDE_10BPP;
}
-#define WEIGHT64_WITHOUT_STRIDE_10BPP \
- WeightMask16_10bpp_SSE4<mask_is_inverse, true>(pred_0, pred_1, mask, \
- mask_stride); \
- WeightMask16_10bpp_SSE4<mask_is_inverse, true>(pred_0 + 16, pred_1 + 16, \
- mask + 16, mask_stride); \
- WeightMask16_10bpp_SSE4<mask_is_inverse, true>(pred_0 + 32, pred_1 + 32, \
- mask + 32, mask_stride); \
- WeightMask16_10bpp_SSE4<mask_is_inverse, true>(pred_0 + 48, pred_1 + 48, \
- mask + 48, mask_stride)
+#define WEIGHT64_WITHOUT_STRIDE_10BPP \
+ WeightMask16_10bpp_SSE4_1<mask_is_inverse, true>(pred_0, pred_1, mask, \
+ mask_stride); \
+ WeightMask16_10bpp_SSE4_1<mask_is_inverse, true>(pred_0 + 16, pred_1 + 16, \
+ mask + 16, mask_stride); \
+ WeightMask16_10bpp_SSE4_1<mask_is_inverse, true>(pred_0 + 32, pred_1 + 32, \
+ mask + 32, mask_stride); \
+ WeightMask16_10bpp_SSE4_1<mask_is_inverse, true>(pred_0 + 48, pred_1 + 48, \
+ mask + 48, mask_stride)
#define WEIGHT64_AND_STRIDE_10BPP \
WEIGHT64_WITHOUT_STRIDE_10BPP; \
@@ -790,10 +794,10 @@ void WeightMask32x64_10bpp_SSE4(const void* LIBGAV1_RESTRICT prediction_0,
mask += mask_stride
template <bool mask_is_inverse>
-void WeightMask64x16_10bpp_SSE4(const void* LIBGAV1_RESTRICT prediction_0,
- const void* LIBGAV1_RESTRICT prediction_1,
- uint8_t* LIBGAV1_RESTRICT mask,
- ptrdiff_t mask_stride) {
+void WeightMask64x16_10bpp_SSE4_1(const void* LIBGAV1_RESTRICT prediction_0,
+ const void* LIBGAV1_RESTRICT prediction_1,
+ uint8_t* LIBGAV1_RESTRICT mask,
+ ptrdiff_t mask_stride) {
const auto* pred_0 = static_cast<const uint16_t*>(prediction_0);
const auto* pred_1 = static_cast<const uint16_t*>(prediction_1);
int y3 = 5;
@@ -806,10 +810,10 @@ void WeightMask64x16_10bpp_SSE4(const void* LIBGAV1_RESTRICT prediction_0,
}
template <bool mask_is_inverse>
-void WeightMask64x32_10bpp_SSE4(const void* LIBGAV1_RESTRICT prediction_0,
- const void* LIBGAV1_RESTRICT prediction_1,
- uint8_t* LIBGAV1_RESTRICT mask,
- ptrdiff_t mask_stride) {
+void WeightMask64x32_10bpp_SSE4_1(const void* LIBGAV1_RESTRICT prediction_0,
+ const void* LIBGAV1_RESTRICT prediction_1,
+ uint8_t* LIBGAV1_RESTRICT mask,
+ ptrdiff_t mask_stride) {
const auto* pred_0 = static_cast<const uint16_t*>(prediction_0);
const auto* pred_1 = static_cast<const uint16_t*>(prediction_1);
int y5 = 6;
@@ -825,10 +829,10 @@ void WeightMask64x32_10bpp_SSE4(const void* LIBGAV1_RESTRICT prediction_0,
}
template <bool mask_is_inverse>
-void WeightMask64x64_10bpp_SSE4(const void* LIBGAV1_RESTRICT prediction_0,
- const void* LIBGAV1_RESTRICT prediction_1,
- uint8_t* LIBGAV1_RESTRICT mask,
- ptrdiff_t mask_stride) {
+void WeightMask64x64_10bpp_SSE4_1(const void* LIBGAV1_RESTRICT prediction_0,
+ const void* LIBGAV1_RESTRICT prediction_1,
+ uint8_t* LIBGAV1_RESTRICT mask,
+ ptrdiff_t mask_stride) {
const auto* pred_0 = static_cast<const uint16_t*>(prediction_0);
const auto* pred_1 = static_cast<const uint16_t*>(prediction_1);
int y3 = 21;
@@ -841,10 +845,10 @@ void WeightMask64x64_10bpp_SSE4(const void* LIBGAV1_RESTRICT prediction_0,
}
template <bool mask_is_inverse>
-void WeightMask64x128_10bpp_SSE4(const void* LIBGAV1_RESTRICT prediction_0,
- const void* LIBGAV1_RESTRICT prediction_1,
- uint8_t* LIBGAV1_RESTRICT mask,
- ptrdiff_t mask_stride) {
+void WeightMask64x128_10bpp_SSE4_1(const void* LIBGAV1_RESTRICT prediction_0,
+ const void* LIBGAV1_RESTRICT prediction_1,
+ uint8_t* LIBGAV1_RESTRICT mask,
+ ptrdiff_t mask_stride) {
const auto* pred_0 = static_cast<const uint16_t*>(prediction_0);
const auto* pred_1 = static_cast<const uint16_t*>(prediction_1);
int y3 = 42;
@@ -858,10 +862,10 @@ void WeightMask64x128_10bpp_SSE4(const void* LIBGAV1_RESTRICT prediction_0,
}
template <bool mask_is_inverse>
-void WeightMask128x64_10bpp_SSE4(const void* LIBGAV1_RESTRICT prediction_0,
- const void* LIBGAV1_RESTRICT prediction_1,
- uint8_t* LIBGAV1_RESTRICT mask,
- ptrdiff_t mask_stride) {
+void WeightMask128x64_10bpp_SSE4_1(const void* LIBGAV1_RESTRICT prediction_0,
+ const void* LIBGAV1_RESTRICT prediction_1,
+ uint8_t* LIBGAV1_RESTRICT mask,
+ ptrdiff_t mask_stride) {
const auto* pred_0 = static_cast<const uint16_t*>(prediction_0);
const auto* pred_1 = static_cast<const uint16_t*>(prediction_1);
int y3 = 21;
@@ -902,10 +906,10 @@ void WeightMask128x64_10bpp_SSE4(const void* LIBGAV1_RESTRICT prediction_0,
}
template <bool mask_is_inverse>
-void WeightMask128x128_10bpp_SSE4(const void* LIBGAV1_RESTRICT prediction_0,
- const void* LIBGAV1_RESTRICT prediction_1,
- uint8_t* LIBGAV1_RESTRICT mask,
- ptrdiff_t mask_stride) {
+void WeightMask128x128_10bpp_SSE4_1(const void* LIBGAV1_RESTRICT prediction_0,
+ const void* LIBGAV1_RESTRICT prediction_1,
+ uint8_t* LIBGAV1_RESTRICT mask,
+ ptrdiff_t mask_stride) {
const auto* pred_0 = static_cast<const uint16_t*>(prediction_0);
const auto* pred_1 = static_cast<const uint16_t*>(prediction_1);
int y3 = 42;
@@ -956,9 +960,9 @@ void WeightMask128x128_10bpp_SSE4(const void* LIBGAV1_RESTRICT prediction_0,
#define INIT_WEIGHT_MASK_10BPP(width, height, w_index, h_index) \
dsp->weight_mask[w_index][h_index][0] = \
- WeightMask##width##x##height##_10bpp_SSE4<0>; \
+ WeightMask##width##x##height##_10bpp_SSE4_1<0>; \
dsp->weight_mask[w_index][h_index][1] = \
- WeightMask##width##x##height##_10bpp_SSE4<1>
+ WeightMask##width##x##height##_10bpp_SSE4_1<1>
void Init10bpp() {
Dsp* const dsp = dsp_internal::GetWritableDspTable(kBitdepth10);
assert(dsp != nullptr);