aboutsummaryrefslogtreecommitdiff
path: root/src/dsp/x86
diff options
context:
space:
mode:
authorBoyuan Yang <byang@debian.org>2023-11-27 22:46:32 -0500
committerBoyuan Yang <byang@debian.org>2023-11-27 22:46:32 -0500
commit7fee0fd1b17b4f963fa1db74c3a5fcc3ff142e0d (patch)
tree4ed468528001d8e80e3be09413ae927ca2ac05ce /src/dsp/x86
parent0d1e75e423265689dd49c7d6023d8bba70ca4d05 (diff)
parent19564cb4f77660cdb2f980ca619d4b979b9fe342 (diff)
downloadlibgav1-7fee0fd1b17b4f963fa1db74c3a5fcc3ff142e0d.tar.gz
libgav1-7fee0fd1b17b4f963fa1db74c3a5fcc3ff142e0d.tar.bz2
libgav1-7fee0fd1b17b4f963fa1db74c3a5fcc3ff142e0d.zip
Update upstream source from tag 'upstream/0.19.0'
Update to upstream version '0.19.0' with Debian dir a4233a4a247b06e8d6e36d07d059f03582d97721
Diffstat (limited to 'src/dsp/x86')
-rw-r--r--src/dsp/x86/common_avx2_test.cc16
-rw-r--r--src/dsp/x86/common_avx2_test.h26
-rw-r--r--src/dsp/x86/common_sse4_test.cc16
-rw-r--r--src/dsp/x86/common_sse4_test.h26
-rw-r--r--src/dsp/x86/convolve_avx2.cc9
-rw-r--r--src/dsp/x86/convolve_sse4.cc13
-rw-r--r--src/dsp/x86/intrapred_directional_sse4.cc4
-rw-r--r--src/dsp/x86/loop_restoration_10bit_sse4.cc9
-rw-r--r--src/dsp/x86/loop_restoration_sse4.cc9
9 files changed, 116 insertions, 12 deletions
diff --git a/src/dsp/x86/common_avx2_test.cc b/src/dsp/x86/common_avx2_test.cc
index 2062683..4b294b0 100644
--- a/src/dsp/x86/common_avx2_test.cc
+++ b/src/dsp/x86/common_avx2_test.cc
@@ -12,26 +12,27 @@
// See the License for the specific language governing permissions and
// limitations under the License.
-#include "src/dsp/x86/common_avx2.h"
+#include "src/dsp/x86/common_avx2_test.h"
#include "gtest/gtest.h"
+#include "src/utils/cpu.h"
#if LIBGAV1_TARGETING_AVX2
#include <cstdint>
+#include "src/dsp/x86/common_avx2.h"
#include "src/utils/common.h"
namespace libgav1 {
namespace dsp {
-namespace {
// Show that RightShiftWithRounding_S16() is equal to
// RightShiftWithRounding() only for values less than or equal to
// INT16_MAX - ((1 << bits) >> 1). In particular, if bits == 16, then
// RightShiftWithRounding_S16() is equal to RightShiftWithRounding() only for
// negative values.
-TEST(CommonDspTest, AVX2RightShiftWithRoundingS16) {
+void AVX2RightShiftWithRoundingS16Test() {
for (int bits = 0; bits < 16; ++bits) {
const int bias = (1 << bits) >> 1;
for (int32_t value = INT16_MIN; value <= INT16_MAX; ++value) {
@@ -53,15 +54,20 @@ TEST(CommonDspTest, AVX2RightShiftWithRoundingS16) {
}
}
-} // namespace
} // namespace dsp
} // namespace libgav1
#else // !LIBGAV1_TARGETING_AVX2
-TEST(CommonDspTest, AVX2) {
+namespace libgav1 {
+namespace dsp {
+
+void AVX2RightShiftWithRoundingS16Test() {
GTEST_SKIP() << "Build this module for x86(-64) with AVX2 enabled to enable "
"the tests.";
}
+} // namespace dsp
+} // namespace libgav1
+
#endif // LIBGAV1_TARGETING_AVX2
diff --git a/src/dsp/x86/common_avx2_test.h b/src/dsp/x86/common_avx2_test.h
new file mode 100644
index 0000000..1124f7f
--- /dev/null
+++ b/src/dsp/x86/common_avx2_test.h
@@ -0,0 +1,26 @@
+// Copyright 2023 The libgav1 Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef LIBGAV1_SRC_DSP_X86_COMMON_AVX2_TEST_H_
+#define LIBGAV1_SRC_DSP_X86_COMMON_AVX2_TEST_H_
+
+namespace libgav1 {
+namespace dsp {
+
+void AVX2RightShiftWithRoundingS16Test();
+
+} // namespace dsp
+} // namespace libgav1
+
+#endif // LIBGAV1_SRC_DSP_X86_COMMON_AVX2_TEST_H_
diff --git a/src/dsp/x86/common_sse4_test.cc b/src/dsp/x86/common_sse4_test.cc
index 3288cfc..592630c 100644
--- a/src/dsp/x86/common_sse4_test.cc
+++ b/src/dsp/x86/common_sse4_test.cc
@@ -12,26 +12,27 @@
// See the License for the specific language governing permissions and
// limitations under the License.
-#include "src/dsp/x86/common_sse4.h"
+#include "src/dsp/x86/common_sse4_test.h"
#include "gtest/gtest.h"
+#include "src/utils/cpu.h"
#if LIBGAV1_TARGETING_SSE4_1
#include <cstdint>
+#include "src/dsp/x86/common_sse4.h"
#include "src/utils/common.h"
namespace libgav1 {
namespace dsp {
-namespace {
// Show that RightShiftWithRounding_S16() is equal to
// RightShiftWithRounding() only for values less than or equal to
// INT16_MAX - ((1 << bits) >> 1). In particular, if bits == 16, then
// RightShiftWithRounding_S16() is equal to RightShiftWithRounding() only for
// negative values.
-TEST(CommonDspTest, SSE41RightShiftWithRoundingS16) {
+void SSE41RightShiftWithRoundingS16Test() {
for (int bits = 0; bits < 16; ++bits) {
const int bias = (1 << bits) >> 1;
for (int32_t value = INT16_MIN; value <= INT16_MAX; ++value) {
@@ -50,15 +51,20 @@ TEST(CommonDspTest, SSE41RightShiftWithRoundingS16) {
}
}
-} // namespace
} // namespace dsp
} // namespace libgav1
#else // !LIBGAV1_TARGETING_SSE4_1
-TEST(CommonDspTest, SSE41) {
+namespace libgav1 {
+namespace dsp {
+
+void SSE41RightShiftWithRoundingS16Test() {
GTEST_SKIP() << "Build this module for x86(-64) with SSE4 enabled to enable "
"the tests.";
}
+} // namespace dsp
+} // namespace libgav1
+
#endif // LIBGAV1_TARGETING_SSE4_1
diff --git a/src/dsp/x86/common_sse4_test.h b/src/dsp/x86/common_sse4_test.h
new file mode 100644
index 0000000..169439a
--- /dev/null
+++ b/src/dsp/x86/common_sse4_test.h
@@ -0,0 +1,26 @@
+// Copyright 2023 The libgav1 Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef LIBGAV1_SRC_DSP_X86_COMMON_SSE4_TEST_H_
+#define LIBGAV1_SRC_DSP_X86_COMMON_SSE4_TEST_H_
+
+namespace libgav1 {
+namespace dsp {
+
+void SSE41RightShiftWithRoundingS16Test();
+
+} // namespace dsp
+} // namespace libgav1
+
+#endif // LIBGAV1_SRC_DSP_X86_COMMON_SSE4_TEST_H_
diff --git a/src/dsp/x86/convolve_avx2.cc b/src/dsp/x86/convolve_avx2.cc
index 6e94347..ff51aee 100644
--- a/src/dsp/x86/convolve_avx2.cc
+++ b/src/dsp/x86/convolve_avx2.cc
@@ -27,6 +27,7 @@
#include "src/dsp/dsp.h"
#include "src/dsp/x86/common_avx2.h"
#include "src/utils/common.h"
+#include "src/utils/compiler_attributes.h"
#include "src/utils/constants.h"
namespace libgav1 {
@@ -607,6 +608,10 @@ void Convolve2D_AVX2(const void* LIBGAV1_RESTRICT const reference,
alignas(32) uint16_t
intermediate_result[kMaxSuperBlockSizeInPixels *
(kMaxSuperBlockSizeInPixels + kSubPixelTaps - 1)];
+#if LIBGAV1_MSAN
+ // Quiet msan warnings. Set with random non-zero value to aid in debugging.
+ memset(intermediate_result, 0x33, sizeof(intermediate_result));
+#endif
const int intermediate_height = height + vertical_taps - 1;
const ptrdiff_t src_stride = reference_stride;
@@ -1374,6 +1379,10 @@ void ConvolveCompound2D_AVX2(
alignas(32) uint16_t
intermediate_result[kMaxSuperBlockSizeInPixels *
(kMaxSuperBlockSizeInPixels + kSubPixelTaps - 1)];
+#if LIBGAV1_MSAN
+ // Quiet msan warnings. Set with random non-zero value to aid in debugging.
+ memset(intermediate_result, 0x33, sizeof(intermediate_result));
+#endif
const int intermediate_height = height + vertical_taps - 1;
const ptrdiff_t src_stride = reference_stride;
diff --git a/src/dsp/x86/convolve_sse4.cc b/src/dsp/x86/convolve_sse4.cc
index f427c4c..99b87d6 100644
--- a/src/dsp/x86/convolve_sse4.cc
+++ b/src/dsp/x86/convolve_sse4.cc
@@ -28,6 +28,7 @@
#include "src/dsp/dsp.h"
#include "src/dsp/x86/common_sse4.h"
#include "src/utils/common.h"
+#include "src/utils/compiler_attributes.h"
namespace libgav1 {
namespace dsp {
@@ -254,6 +255,10 @@ void Convolve2D_SSE4_1(const void* LIBGAV1_RESTRICT const reference,
alignas(16) uint16_t
intermediate_result[kMaxSuperBlockSizeInPixels *
(kMaxSuperBlockSizeInPixels + kSubPixelTaps - 1)];
+#if LIBGAV1_MSAN
+ // Quiet msan warnings. Set with random non-zero value to aid in debugging.
+ memset(intermediate_result, 0x33, sizeof(intermediate_result));
+#endif
const int intermediate_height = height + vertical_taps - 1;
const ptrdiff_t src_stride = reference_stride;
@@ -617,6 +622,10 @@ void ConvolveCompound2D_SSE4_1(
alignas(16) uint16_t
intermediate_result[kMaxSuperBlockSizeInPixels *
(kMaxSuperBlockSizeInPixels + kSubPixelTaps - 1)];
+#if LIBGAV1_MSAN
+ // Quiet msan warnings. Set with random non-zero value to aid in debugging.
+ memset(intermediate_result, 0x33, sizeof(intermediate_result));
+#endif
// Horizontal filter.
// Filter types used for width <= 4 are different from those for width > 4.
@@ -1157,6 +1166,10 @@ void ConvolveScale2D_SSE4_1(const void* LIBGAV1_RESTRICT const reference,
alignas(16) int16_t
intermediate_result[kIntermediateAllocWidth *
(2 * kIntermediateAllocWidth + kSubPixelTaps)];
+#if LIBGAV1_MSAN
+ // Quiet msan warnings. Set with random non-zero value to aid in debugging.
+ memset(intermediate_result, 0x44, sizeof(intermediate_result));
+#endif
const int num_vert_taps = dsp::GetNumTapsInFilter(vert_filter_index);
const int intermediate_height =
(((height - 1) * step_y + (1 << kScaleSubPixelBits) - 1) >>
diff --git a/src/dsp/x86/intrapred_directional_sse4.cc b/src/dsp/x86/intrapred_directional_sse4.cc
index bc61745..2e64d21 100644
--- a/src/dsp/x86/intrapred_directional_sse4.cc
+++ b/src/dsp/x86/intrapred_directional_sse4.cc
@@ -1023,6 +1023,10 @@ void DirectionalIntraPredictorZone2_SSE4_1(void* const dest, ptrdiff_t stride,
uint8_t left_buffer[288];
memcpy(top_buffer + 128, static_cast<const uint8_t*>(top_row) - 16, 160);
memcpy(left_buffer + 128, static_cast<const uint8_t*>(left_column) - 16, 160);
+#if LIBGAV1_MSAN
+ memset(top_buffer, 0x33, 128);
+ memset(left_buffer, 0x44, 128);
+#endif
const uint8_t* top_ptr = top_buffer + 144;
const uint8_t* left_ptr = left_buffer + 144;
if (width == 4 || height == 4) {
diff --git a/src/dsp/x86/loop_restoration_10bit_sse4.cc b/src/dsp/x86/loop_restoration_10bit_sse4.cc
index 6625d51..029e168 100644
--- a/src/dsp/x86/loop_restoration_10bit_sse4.cc
+++ b/src/dsp/x86/loop_restoration_10bit_sse4.cc
@@ -1079,7 +1079,14 @@ inline void LookupIntermediate(const __m128i sum, const __m128i index,
// general-purpose register to process. Faster than using _mm_extract_epi8().
uint8_t temp[8];
StoreLo8(temp, idx);
- *ma = _mm_insert_epi8(*ma, kSgrMaLookup[temp[0]], offset + 0);
+ // offset == 0 is assumed to be the first call to this function. The value is
+ // mov'd to avoid -Wuninitialized warnings under gcc. mov should at least
+ // equivalent if not faster than pinsrb.
+ if (offset == 0) {
+ *ma = _mm_cvtsi32_si128(kSgrMaLookup[temp[0]]);
+ } else {
+ *ma = _mm_insert_epi8(*ma, kSgrMaLookup[temp[0]], offset + 0);
+ }
*ma = _mm_insert_epi8(*ma, kSgrMaLookup[temp[1]], offset + 1);
*ma = _mm_insert_epi8(*ma, kSgrMaLookup[temp[2]], offset + 2);
*ma = _mm_insert_epi8(*ma, kSgrMaLookup[temp[3]], offset + 3);
diff --git a/src/dsp/x86/loop_restoration_sse4.cc b/src/dsp/x86/loop_restoration_sse4.cc
index b4df072..8c24c39 100644
--- a/src/dsp/x86/loop_restoration_sse4.cc
+++ b/src/dsp/x86/loop_restoration_sse4.cc
@@ -1222,7 +1222,14 @@ inline void LookupIntermediate(const __m128i sum, const __m128i index,
// general-purpose register to process. Faster than using _mm_extract_epi8().
uint8_t temp[8];
StoreLo8(temp, idx);
- *ma = _mm_insert_epi8(*ma, kSgrMaLookup[temp[0]], offset + 0);
+ // offset == 0 is assumed to be the first call to this function. The value is
+ // mov'd to avoid -Wuninitialized warnings under gcc. mov should at least
+ // equivalent if not faster than pinsrb.
+ if (offset == 0) {
+ *ma = _mm_cvtsi32_si128(kSgrMaLookup[temp[0]]);
+ } else {
+ *ma = _mm_insert_epi8(*ma, kSgrMaLookup[temp[0]], offset + 0);
+ }
*ma = _mm_insert_epi8(*ma, kSgrMaLookup[temp[1]], offset + 1);
*ma = _mm_insert_epi8(*ma, kSgrMaLookup[temp[2]], offset + 2);
*ma = _mm_insert_epi8(*ma, kSgrMaLookup[temp[3]], offset + 3);