aboutsummaryrefslogtreecommitdiff
path: root/src/dsp/x86/motion_vector_search_sse4.cc
diff options
context:
space:
mode:
Diffstat (limited to 'src/dsp/x86/motion_vector_search_sse4.cc')
-rw-r--r--src/dsp/x86/motion_vector_search_sse4.cc65
1 files changed, 27 insertions, 38 deletions
diff --git a/src/dsp/x86/motion_vector_search_sse4.cc b/src/dsp/x86/motion_vector_search_sse4.cc
index 7f5f035..dacc6ec 100644
--- a/src/dsp/x86/motion_vector_search_sse4.cc
+++ b/src/dsp/x86/motion_vector_search_sse4.cc
@@ -64,7 +64,7 @@ inline __m128i MvProjectionClip(const __m128i mvs[2],
}
inline __m128i MvProjectionCompoundClip(
- const MotionVector* const temporal_mvs,
+ const MotionVector* LIBGAV1_RESTRICT const temporal_mvs,
const int8_t temporal_reference_offsets[2],
const int reference_offsets[2]) {
const auto* const tmvs = reinterpret_cast<const int32_t*>(temporal_mvs);
@@ -83,8 +83,8 @@ inline __m128i MvProjectionCompoundClip(
}
inline __m128i MvProjectionSingleClip(
- const MotionVector* const temporal_mvs,
- const int8_t* const temporal_reference_offsets,
+ const MotionVector* LIBGAV1_RESTRICT const temporal_mvs,
+ const int8_t* LIBGAV1_RESTRICT const temporal_reference_offsets,
const int reference_offset) {
const auto* const tmvs = reinterpret_cast<const int16_t*>(temporal_mvs);
const __m128i temporal_mv = LoadAligned16(tmvs);
@@ -126,9 +126,10 @@ inline void ForceInteger(const __m128i mv, void* const candidate_mvs) {
}
void MvProjectionCompoundLowPrecision_SSE4_1(
- const MotionVector* temporal_mvs, const int8_t* temporal_reference_offsets,
+ const MotionVector* LIBGAV1_RESTRICT temporal_mvs,
+ const int8_t* LIBGAV1_RESTRICT temporal_reference_offsets,
const int reference_offsets[2], const int count,
- CompoundMotionVector* candidate_mvs) {
+ CompoundMotionVector* LIBGAV1_RESTRICT candidate_mvs) {
// |reference_offsets| non-zero check usually equals true and is ignored.
// To facilitate the compilers, make a local copy of |reference_offsets|.
const int offsets[2] = {reference_offsets[0], reference_offsets[1]};
@@ -143,9 +144,10 @@ void MvProjectionCompoundLowPrecision_SSE4_1(
}
void MvProjectionCompoundForceInteger_SSE4_1(
- const MotionVector* temporal_mvs, const int8_t* temporal_reference_offsets,
+ const MotionVector* LIBGAV1_RESTRICT temporal_mvs,
+ const int8_t* LIBGAV1_RESTRICT temporal_reference_offsets,
const int reference_offsets[2], const int count,
- CompoundMotionVector* candidate_mvs) {
+ CompoundMotionVector* LIBGAV1_RESTRICT candidate_mvs) {
// |reference_offsets| non-zero check usually equals true and is ignored.
// To facilitate the compilers, make a local copy of |reference_offsets|.
const int offsets[2] = {reference_offsets[0], reference_offsets[1]};
@@ -160,9 +162,10 @@ void MvProjectionCompoundForceInteger_SSE4_1(
}
void MvProjectionCompoundHighPrecision_SSE4_1(
- const MotionVector* temporal_mvs, const int8_t* temporal_reference_offsets,
+ const MotionVector* LIBGAV1_RESTRICT temporal_mvs,
+ const int8_t* LIBGAV1_RESTRICT temporal_reference_offsets,
const int reference_offsets[2], const int count,
- CompoundMotionVector* candidate_mvs) {
+ CompoundMotionVector* LIBGAV1_RESTRICT candidate_mvs) {
// |reference_offsets| non-zero check usually equals true and is ignored.
// To facilitate the compilers, make a local copy of |reference_offsets|.
const int offsets[2] = {reference_offsets[0], reference_offsets[1]};
@@ -177,8 +180,10 @@ void MvProjectionCompoundHighPrecision_SSE4_1(
}
void MvProjectionSingleLowPrecision_SSE4_1(
- const MotionVector* temporal_mvs, const int8_t* temporal_reference_offsets,
- const int reference_offset, const int count, MotionVector* candidate_mvs) {
+ const MotionVector* LIBGAV1_RESTRICT temporal_mvs,
+ const int8_t* LIBGAV1_RESTRICT temporal_reference_offsets,
+ const int reference_offset, const int count,
+ MotionVector* LIBGAV1_RESTRICT candidate_mvs) {
// Up to three more elements could be calculated.
int i = 0;
do {
@@ -190,8 +195,10 @@ void MvProjectionSingleLowPrecision_SSE4_1(
}
void MvProjectionSingleForceInteger_SSE4_1(
- const MotionVector* temporal_mvs, const int8_t* temporal_reference_offsets,
- const int reference_offset, const int count, MotionVector* candidate_mvs) {
+ const MotionVector* LIBGAV1_RESTRICT temporal_mvs,
+ const int8_t* LIBGAV1_RESTRICT temporal_reference_offsets,
+ const int reference_offset, const int count,
+ MotionVector* LIBGAV1_RESTRICT candidate_mvs) {
// Up to three more elements could be calculated.
int i = 0;
do {
@@ -203,8 +210,10 @@ void MvProjectionSingleForceInteger_SSE4_1(
}
void MvProjectionSingleHighPrecision_SSE4_1(
- const MotionVector* temporal_mvs, const int8_t* temporal_reference_offsets,
- const int reference_offset, const int count, MotionVector* candidate_mvs) {
+ const MotionVector* LIBGAV1_RESTRICT temporal_mvs,
+ const int8_t* LIBGAV1_RESTRICT temporal_reference_offsets,
+ const int reference_offset, const int count,
+ MotionVector* LIBGAV1_RESTRICT candidate_mvs) {
// Up to three more elements could be calculated.
int i = 0;
do {
@@ -215,20 +224,10 @@ void MvProjectionSingleHighPrecision_SSE4_1(
} while (i < count);
}
-void Init8bpp() {
- Dsp* const dsp = dsp_internal::GetWritableDspTable(kBitdepth8);
- assert(dsp != nullptr);
- dsp->mv_projection_compound[0] = MvProjectionCompoundLowPrecision_SSE4_1;
- dsp->mv_projection_compound[1] = MvProjectionCompoundForceInteger_SSE4_1;
- dsp->mv_projection_compound[2] = MvProjectionCompoundHighPrecision_SSE4_1;
- dsp->mv_projection_single[0] = MvProjectionSingleLowPrecision_SSE4_1;
- dsp->mv_projection_single[1] = MvProjectionSingleForceInteger_SSE4_1;
- dsp->mv_projection_single[2] = MvProjectionSingleHighPrecision_SSE4_1;
-}
+} // namespace
-#if LIBGAV1_MAX_BITDEPTH >= 10
-void Init10bpp() {
- Dsp* const dsp = dsp_internal::GetWritableDspTable(kBitdepth10);
+void MotionVectorSearchInit_SSE4_1() {
+ Dsp* const dsp = dsp_internal::GetWritableDspTable(kBitdepth8);
assert(dsp != nullptr);
dsp->mv_projection_compound[0] = MvProjectionCompoundLowPrecision_SSE4_1;
dsp->mv_projection_compound[1] = MvProjectionCompoundForceInteger_SSE4_1;
@@ -237,16 +236,6 @@ void Init10bpp() {
dsp->mv_projection_single[1] = MvProjectionSingleForceInteger_SSE4_1;
dsp->mv_projection_single[2] = MvProjectionSingleHighPrecision_SSE4_1;
}
-#endif
-
-} // namespace
-
-void MotionVectorSearchInit_SSE4_1() {
- Init8bpp();
-#if LIBGAV1_MAX_BITDEPTH >= 10
- Init10bpp();
-#endif
-}
} // namespace dsp
} // namespace libgav1