diff options
author | Boyuan Yang <byang@debian.org> | 2023-11-27 22:46:32 -0500 |
---|---|---|
committer | Boyuan Yang <byang@debian.org> | 2023-11-27 22:46:32 -0500 |
commit | 7fee0fd1b17b4f963fa1db74c3a5fcc3ff142e0d (patch) | |
tree | 4ed468528001d8e80e3be09413ae927ca2ac05ce /src/dsp/x86 | |
parent | 0d1e75e423265689dd49c7d6023d8bba70ca4d05 (diff) | |
parent | 19564cb4f77660cdb2f980ca619d4b979b9fe342 (diff) | |
download | libgav1-7fee0fd1b17b4f963fa1db74c3a5fcc3ff142e0d.tar.gz libgav1-7fee0fd1b17b4f963fa1db74c3a5fcc3ff142e0d.tar.bz2 libgav1-7fee0fd1b17b4f963fa1db74c3a5fcc3ff142e0d.zip |
Update upstream source from tag 'upstream/0.19.0'
Update to upstream version '0.19.0'
with Debian dir a4233a4a247b06e8d6e36d07d059f03582d97721
Diffstat (limited to 'src/dsp/x86')
-rw-r--r-- | src/dsp/x86/common_avx2_test.cc | 16 | ||||
-rw-r--r-- | src/dsp/x86/common_avx2_test.h | 26 | ||||
-rw-r--r-- | src/dsp/x86/common_sse4_test.cc | 16 | ||||
-rw-r--r-- | src/dsp/x86/common_sse4_test.h | 26 | ||||
-rw-r--r-- | src/dsp/x86/convolve_avx2.cc | 9 | ||||
-rw-r--r-- | src/dsp/x86/convolve_sse4.cc | 13 | ||||
-rw-r--r-- | src/dsp/x86/intrapred_directional_sse4.cc | 4 | ||||
-rw-r--r-- | src/dsp/x86/loop_restoration_10bit_sse4.cc | 9 | ||||
-rw-r--r-- | src/dsp/x86/loop_restoration_sse4.cc | 9 |
9 files changed, 116 insertions, 12 deletions
diff --git a/src/dsp/x86/common_avx2_test.cc b/src/dsp/x86/common_avx2_test.cc index 2062683..4b294b0 100644 --- a/src/dsp/x86/common_avx2_test.cc +++ b/src/dsp/x86/common_avx2_test.cc @@ -12,26 +12,27 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "src/dsp/x86/common_avx2.h" +#include "src/dsp/x86/common_avx2_test.h" #include "gtest/gtest.h" +#include "src/utils/cpu.h" #if LIBGAV1_TARGETING_AVX2 #include <cstdint> +#include "src/dsp/x86/common_avx2.h" #include "src/utils/common.h" namespace libgav1 { namespace dsp { -namespace { // Show that RightShiftWithRounding_S16() is equal to // RightShiftWithRounding() only for values less than or equal to // INT16_MAX - ((1 << bits) >> 1). In particular, if bits == 16, then // RightShiftWithRounding_S16() is equal to RightShiftWithRounding() only for // negative values. -TEST(CommonDspTest, AVX2RightShiftWithRoundingS16) { +void AVX2RightShiftWithRoundingS16Test() { for (int bits = 0; bits < 16; ++bits) { const int bias = (1 << bits) >> 1; for (int32_t value = INT16_MIN; value <= INT16_MAX; ++value) { @@ -53,15 +54,20 @@ TEST(CommonDspTest, AVX2RightShiftWithRoundingS16) { } } -} // namespace } // namespace dsp } // namespace libgav1 #else // !LIBGAV1_TARGETING_AVX2 -TEST(CommonDspTest, AVX2) { +namespace libgav1 { +namespace dsp { + +void AVX2RightShiftWithRoundingS16Test() { GTEST_SKIP() << "Build this module for x86(-64) with AVX2 enabled to enable " "the tests."; } +} // namespace dsp +} // namespace libgav1 + #endif // LIBGAV1_TARGETING_AVX2 diff --git a/src/dsp/x86/common_avx2_test.h b/src/dsp/x86/common_avx2_test.h new file mode 100644 index 0000000..1124f7f --- /dev/null +++ b/src/dsp/x86/common_avx2_test.h @@ -0,0 +1,26 @@ +// Copyright 2023 The libgav1 Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef LIBGAV1_SRC_DSP_X86_COMMON_AVX2_TEST_H_ +#define LIBGAV1_SRC_DSP_X86_COMMON_AVX2_TEST_H_ + +namespace libgav1 { +namespace dsp { + +void AVX2RightShiftWithRoundingS16Test(); + +} // namespace dsp +} // namespace libgav1 + +#endif // LIBGAV1_SRC_DSP_X86_COMMON_AVX2_TEST_H_ diff --git a/src/dsp/x86/common_sse4_test.cc b/src/dsp/x86/common_sse4_test.cc index 3288cfc..592630c 100644 --- a/src/dsp/x86/common_sse4_test.cc +++ b/src/dsp/x86/common_sse4_test.cc @@ -12,26 +12,27 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "src/dsp/x86/common_sse4.h" +#include "src/dsp/x86/common_sse4_test.h" #include "gtest/gtest.h" +#include "src/utils/cpu.h" #if LIBGAV1_TARGETING_SSE4_1 #include <cstdint> +#include "src/dsp/x86/common_sse4.h" #include "src/utils/common.h" namespace libgav1 { namespace dsp { -namespace { // Show that RightShiftWithRounding_S16() is equal to // RightShiftWithRounding() only for values less than or equal to // INT16_MAX - ((1 << bits) >> 1). In particular, if bits == 16, then // RightShiftWithRounding_S16() is equal to RightShiftWithRounding() only for // negative values. -TEST(CommonDspTest, SSE41RightShiftWithRoundingS16) { +void SSE41RightShiftWithRoundingS16Test() { for (int bits = 0; bits < 16; ++bits) { const int bias = (1 << bits) >> 1; for (int32_t value = INT16_MIN; value <= INT16_MAX; ++value) { @@ -50,15 +51,20 @@ TEST(CommonDspTest, SSE41RightShiftWithRoundingS16) { } } -} // namespace } // namespace dsp } // namespace libgav1 #else // !LIBGAV1_TARGETING_SSE4_1 -TEST(CommonDspTest, SSE41) { +namespace libgav1 { +namespace dsp { + +void SSE41RightShiftWithRoundingS16Test() { GTEST_SKIP() << "Build this module for x86(-64) with SSE4 enabled to enable " "the tests."; } +} // namespace dsp +} // namespace libgav1 + #endif // LIBGAV1_TARGETING_SSE4_1 diff --git a/src/dsp/x86/common_sse4_test.h b/src/dsp/x86/common_sse4_test.h new file mode 100644 index 0000000..169439a --- /dev/null +++ b/src/dsp/x86/common_sse4_test.h @@ -0,0 +1,26 @@ +// Copyright 2023 The libgav1 Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef LIBGAV1_SRC_DSP_X86_COMMON_SSE4_TEST_H_ +#define LIBGAV1_SRC_DSP_X86_COMMON_SSE4_TEST_H_ + +namespace libgav1 { +namespace dsp { + +void SSE41RightShiftWithRoundingS16Test(); + +} // namespace dsp +} // namespace libgav1 + +#endif // LIBGAV1_SRC_DSP_X86_COMMON_SSE4_TEST_H_ diff --git a/src/dsp/x86/convolve_avx2.cc b/src/dsp/x86/convolve_avx2.cc index 6e94347..ff51aee 100644 --- a/src/dsp/x86/convolve_avx2.cc +++ b/src/dsp/x86/convolve_avx2.cc @@ -27,6 +27,7 @@ #include "src/dsp/dsp.h" #include "src/dsp/x86/common_avx2.h" #include "src/utils/common.h" +#include "src/utils/compiler_attributes.h" #include "src/utils/constants.h" namespace libgav1 { @@ -607,6 +608,10 @@ void Convolve2D_AVX2(const void* LIBGAV1_RESTRICT const reference, alignas(32) uint16_t intermediate_result[kMaxSuperBlockSizeInPixels * (kMaxSuperBlockSizeInPixels + kSubPixelTaps - 1)]; +#if LIBGAV1_MSAN + // Quiet msan warnings. Set with random non-zero value to aid in debugging. + memset(intermediate_result, 0x33, sizeof(intermediate_result)); +#endif const int intermediate_height = height + vertical_taps - 1; const ptrdiff_t src_stride = reference_stride; @@ -1374,6 +1379,10 @@ void ConvolveCompound2D_AVX2( alignas(32) uint16_t intermediate_result[kMaxSuperBlockSizeInPixels * (kMaxSuperBlockSizeInPixels + kSubPixelTaps - 1)]; +#if LIBGAV1_MSAN + // Quiet msan warnings. Set with random non-zero value to aid in debugging. + memset(intermediate_result, 0x33, sizeof(intermediate_result)); +#endif const int intermediate_height = height + vertical_taps - 1; const ptrdiff_t src_stride = reference_stride; diff --git a/src/dsp/x86/convolve_sse4.cc b/src/dsp/x86/convolve_sse4.cc index f427c4c..99b87d6 100644 --- a/src/dsp/x86/convolve_sse4.cc +++ b/src/dsp/x86/convolve_sse4.cc @@ -28,6 +28,7 @@ #include "src/dsp/dsp.h" #include "src/dsp/x86/common_sse4.h" #include "src/utils/common.h" +#include "src/utils/compiler_attributes.h" namespace libgav1 { namespace dsp { @@ -254,6 +255,10 @@ void Convolve2D_SSE4_1(const void* LIBGAV1_RESTRICT const reference, alignas(16) uint16_t intermediate_result[kMaxSuperBlockSizeInPixels * (kMaxSuperBlockSizeInPixels + kSubPixelTaps - 1)]; +#if LIBGAV1_MSAN + // Quiet msan warnings. Set with random non-zero value to aid in debugging. + memset(intermediate_result, 0x33, sizeof(intermediate_result)); +#endif const int intermediate_height = height + vertical_taps - 1; const ptrdiff_t src_stride = reference_stride; @@ -617,6 +622,10 @@ void ConvolveCompound2D_SSE4_1( alignas(16) uint16_t intermediate_result[kMaxSuperBlockSizeInPixels * (kMaxSuperBlockSizeInPixels + kSubPixelTaps - 1)]; +#if LIBGAV1_MSAN + // Quiet msan warnings. Set with random non-zero value to aid in debugging. + memset(intermediate_result, 0x33, sizeof(intermediate_result)); +#endif // Horizontal filter. // Filter types used for width <= 4 are different from those for width > 4. @@ -1157,6 +1166,10 @@ void ConvolveScale2D_SSE4_1(const void* LIBGAV1_RESTRICT const reference, alignas(16) int16_t intermediate_result[kIntermediateAllocWidth * (2 * kIntermediateAllocWidth + kSubPixelTaps)]; +#if LIBGAV1_MSAN + // Quiet msan warnings. Set with random non-zero value to aid in debugging. + memset(intermediate_result, 0x44, sizeof(intermediate_result)); +#endif const int num_vert_taps = dsp::GetNumTapsInFilter(vert_filter_index); const int intermediate_height = (((height - 1) * step_y + (1 << kScaleSubPixelBits) - 1) >> diff --git a/src/dsp/x86/intrapred_directional_sse4.cc b/src/dsp/x86/intrapred_directional_sse4.cc index bc61745..2e64d21 100644 --- a/src/dsp/x86/intrapred_directional_sse4.cc +++ b/src/dsp/x86/intrapred_directional_sse4.cc @@ -1023,6 +1023,10 @@ void DirectionalIntraPredictorZone2_SSE4_1(void* const dest, ptrdiff_t stride, uint8_t left_buffer[288]; memcpy(top_buffer + 128, static_cast<const uint8_t*>(top_row) - 16, 160); memcpy(left_buffer + 128, static_cast<const uint8_t*>(left_column) - 16, 160); +#if LIBGAV1_MSAN + memset(top_buffer, 0x33, 128); + memset(left_buffer, 0x44, 128); +#endif const uint8_t* top_ptr = top_buffer + 144; const uint8_t* left_ptr = left_buffer + 144; if (width == 4 || height == 4) { diff --git a/src/dsp/x86/loop_restoration_10bit_sse4.cc b/src/dsp/x86/loop_restoration_10bit_sse4.cc index 6625d51..029e168 100644 --- a/src/dsp/x86/loop_restoration_10bit_sse4.cc +++ b/src/dsp/x86/loop_restoration_10bit_sse4.cc @@ -1079,7 +1079,14 @@ inline void LookupIntermediate(const __m128i sum, const __m128i index, // general-purpose register to process. Faster than using _mm_extract_epi8(). uint8_t temp[8]; StoreLo8(temp, idx); - *ma = _mm_insert_epi8(*ma, kSgrMaLookup[temp[0]], offset + 0); + // offset == 0 is assumed to be the first call to this function. The value is + // mov'd to avoid -Wuninitialized warnings under gcc. mov should at least + // equivalent if not faster than pinsrb. + if (offset == 0) { + *ma = _mm_cvtsi32_si128(kSgrMaLookup[temp[0]]); + } else { + *ma = _mm_insert_epi8(*ma, kSgrMaLookup[temp[0]], offset + 0); + } *ma = _mm_insert_epi8(*ma, kSgrMaLookup[temp[1]], offset + 1); *ma = _mm_insert_epi8(*ma, kSgrMaLookup[temp[2]], offset + 2); *ma = _mm_insert_epi8(*ma, kSgrMaLookup[temp[3]], offset + 3); diff --git a/src/dsp/x86/loop_restoration_sse4.cc b/src/dsp/x86/loop_restoration_sse4.cc index b4df072..8c24c39 100644 --- a/src/dsp/x86/loop_restoration_sse4.cc +++ b/src/dsp/x86/loop_restoration_sse4.cc @@ -1222,7 +1222,14 @@ inline void LookupIntermediate(const __m128i sum, const __m128i index, // general-purpose register to process. Faster than using _mm_extract_epi8(). uint8_t temp[8]; StoreLo8(temp, idx); - *ma = _mm_insert_epi8(*ma, kSgrMaLookup[temp[0]], offset + 0); + // offset == 0 is assumed to be the first call to this function. The value is + // mov'd to avoid -Wuninitialized warnings under gcc. mov should at least + // equivalent if not faster than pinsrb. + if (offset == 0) { + *ma = _mm_cvtsi32_si128(kSgrMaLookup[temp[0]]); + } else { + *ma = _mm_insert_epi8(*ma, kSgrMaLookup[temp[0]], offset + 0); + } *ma = _mm_insert_epi8(*ma, kSgrMaLookup[temp[1]], offset + 1); *ma = _mm_insert_epi8(*ma, kSgrMaLookup[temp[2]], offset + 2); *ma = _mm_insert_epi8(*ma, kSgrMaLookup[temp[3]], offset + 3); |