diff options
Diffstat (limited to 'src/dsp/x86/motion_vector_search_sse4.cc')
-rw-r--r-- | src/dsp/x86/motion_vector_search_sse4.cc | 65 |
1 files changed, 27 insertions, 38 deletions
diff --git a/src/dsp/x86/motion_vector_search_sse4.cc b/src/dsp/x86/motion_vector_search_sse4.cc index 7f5f035..dacc6ec 100644 --- a/src/dsp/x86/motion_vector_search_sse4.cc +++ b/src/dsp/x86/motion_vector_search_sse4.cc @@ -64,7 +64,7 @@ inline __m128i MvProjectionClip(const __m128i mvs[2], } inline __m128i MvProjectionCompoundClip( - const MotionVector* const temporal_mvs, + const MotionVector* LIBGAV1_RESTRICT const temporal_mvs, const int8_t temporal_reference_offsets[2], const int reference_offsets[2]) { const auto* const tmvs = reinterpret_cast<const int32_t*>(temporal_mvs); @@ -83,8 +83,8 @@ inline __m128i MvProjectionCompoundClip( } inline __m128i MvProjectionSingleClip( - const MotionVector* const temporal_mvs, - const int8_t* const temporal_reference_offsets, + const MotionVector* LIBGAV1_RESTRICT const temporal_mvs, + const int8_t* LIBGAV1_RESTRICT const temporal_reference_offsets, const int reference_offset) { const auto* const tmvs = reinterpret_cast<const int16_t*>(temporal_mvs); const __m128i temporal_mv = LoadAligned16(tmvs); @@ -126,9 +126,10 @@ inline void ForceInteger(const __m128i mv, void* const candidate_mvs) { } void MvProjectionCompoundLowPrecision_SSE4_1( - const MotionVector* temporal_mvs, const int8_t* temporal_reference_offsets, + const MotionVector* LIBGAV1_RESTRICT temporal_mvs, + const int8_t* LIBGAV1_RESTRICT temporal_reference_offsets, const int reference_offsets[2], const int count, - CompoundMotionVector* candidate_mvs) { + CompoundMotionVector* LIBGAV1_RESTRICT candidate_mvs) { // |reference_offsets| non-zero check usually equals true and is ignored. // To facilitate the compilers, make a local copy of |reference_offsets|. const int offsets[2] = {reference_offsets[0], reference_offsets[1]}; @@ -143,9 +144,10 @@ void MvProjectionCompoundLowPrecision_SSE4_1( } void MvProjectionCompoundForceInteger_SSE4_1( - const MotionVector* temporal_mvs, const int8_t* temporal_reference_offsets, + const MotionVector* LIBGAV1_RESTRICT temporal_mvs, + const int8_t* LIBGAV1_RESTRICT temporal_reference_offsets, const int reference_offsets[2], const int count, - CompoundMotionVector* candidate_mvs) { + CompoundMotionVector* LIBGAV1_RESTRICT candidate_mvs) { // |reference_offsets| non-zero check usually equals true and is ignored. // To facilitate the compilers, make a local copy of |reference_offsets|. const int offsets[2] = {reference_offsets[0], reference_offsets[1]}; @@ -160,9 +162,10 @@ void MvProjectionCompoundForceInteger_SSE4_1( } void MvProjectionCompoundHighPrecision_SSE4_1( - const MotionVector* temporal_mvs, const int8_t* temporal_reference_offsets, + const MotionVector* LIBGAV1_RESTRICT temporal_mvs, + const int8_t* LIBGAV1_RESTRICT temporal_reference_offsets, const int reference_offsets[2], const int count, - CompoundMotionVector* candidate_mvs) { + CompoundMotionVector* LIBGAV1_RESTRICT candidate_mvs) { // |reference_offsets| non-zero check usually equals true and is ignored. // To facilitate the compilers, make a local copy of |reference_offsets|. const int offsets[2] = {reference_offsets[0], reference_offsets[1]}; @@ -177,8 +180,10 @@ void MvProjectionCompoundHighPrecision_SSE4_1( } void MvProjectionSingleLowPrecision_SSE4_1( - const MotionVector* temporal_mvs, const int8_t* temporal_reference_offsets, - const int reference_offset, const int count, MotionVector* candidate_mvs) { + const MotionVector* LIBGAV1_RESTRICT temporal_mvs, + const int8_t* LIBGAV1_RESTRICT temporal_reference_offsets, + const int reference_offset, const int count, + MotionVector* LIBGAV1_RESTRICT candidate_mvs) { // Up to three more elements could be calculated. int i = 0; do { @@ -190,8 +195,10 @@ void MvProjectionSingleLowPrecision_SSE4_1( } void MvProjectionSingleForceInteger_SSE4_1( - const MotionVector* temporal_mvs, const int8_t* temporal_reference_offsets, - const int reference_offset, const int count, MotionVector* candidate_mvs) { + const MotionVector* LIBGAV1_RESTRICT temporal_mvs, + const int8_t* LIBGAV1_RESTRICT temporal_reference_offsets, + const int reference_offset, const int count, + MotionVector* LIBGAV1_RESTRICT candidate_mvs) { // Up to three more elements could be calculated. int i = 0; do { @@ -203,8 +210,10 @@ void MvProjectionSingleForceInteger_SSE4_1( } void MvProjectionSingleHighPrecision_SSE4_1( - const MotionVector* temporal_mvs, const int8_t* temporal_reference_offsets, - const int reference_offset, const int count, MotionVector* candidate_mvs) { + const MotionVector* LIBGAV1_RESTRICT temporal_mvs, + const int8_t* LIBGAV1_RESTRICT temporal_reference_offsets, + const int reference_offset, const int count, + MotionVector* LIBGAV1_RESTRICT candidate_mvs) { // Up to three more elements could be calculated. int i = 0; do { @@ -215,20 +224,10 @@ void MvProjectionSingleHighPrecision_SSE4_1( } while (i < count); } -void Init8bpp() { - Dsp* const dsp = dsp_internal::GetWritableDspTable(kBitdepth8); - assert(dsp != nullptr); - dsp->mv_projection_compound[0] = MvProjectionCompoundLowPrecision_SSE4_1; - dsp->mv_projection_compound[1] = MvProjectionCompoundForceInteger_SSE4_1; - dsp->mv_projection_compound[2] = MvProjectionCompoundHighPrecision_SSE4_1; - dsp->mv_projection_single[0] = MvProjectionSingleLowPrecision_SSE4_1; - dsp->mv_projection_single[1] = MvProjectionSingleForceInteger_SSE4_1; - dsp->mv_projection_single[2] = MvProjectionSingleHighPrecision_SSE4_1; -} +} // namespace -#if LIBGAV1_MAX_BITDEPTH >= 10 -void Init10bpp() { - Dsp* const dsp = dsp_internal::GetWritableDspTable(kBitdepth10); +void MotionVectorSearchInit_SSE4_1() { + Dsp* const dsp = dsp_internal::GetWritableDspTable(kBitdepth8); assert(dsp != nullptr); dsp->mv_projection_compound[0] = MvProjectionCompoundLowPrecision_SSE4_1; dsp->mv_projection_compound[1] = MvProjectionCompoundForceInteger_SSE4_1; @@ -237,16 +236,6 @@ void Init10bpp() { dsp->mv_projection_single[1] = MvProjectionSingleForceInteger_SSE4_1; dsp->mv_projection_single[2] = MvProjectionSingleHighPrecision_SSE4_1; } -#endif - -} // namespace - -void MotionVectorSearchInit_SSE4_1() { - Init8bpp(); -#if LIBGAV1_MAX_BITDEPTH >= 10 - Init10bpp(); -#endif -} } // namespace dsp } // namespace libgav1 |