Update upstream source from tag 'upstream/0.19.0'

Update to upstream version '0.19.0' with Debian dir a4233a4a247b06e8d6e36d07d059f03582d97721
author: Boyuan Yang <byang@debian.org> 2023-11-27 22:46:32 -0500
committer: Boyuan Yang <byang@debian.org> 2023-11-27 22:46:32 -0500
commit: 7fee0fd1b17b4f963fa1db74c3a5fcc3ff142e0d (patch)
tree: 4ed468528001d8e80e3be09413ae927ca2ac05ce /src/dsp/x86
parent: 0d1e75e423265689dd49c7d6023d8bba70ca4d05 (diff)
parent: 19564cb4f77660cdb2f980ca619d4b979b9fe342 (diff)
download: libgav1-7fee0fd1b17b4f963fa1db74c3a5fcc3ff142e0d.tar.gz
libgav1-7fee0fd1b17b4f963fa1db74c3a5fcc3ff142e0d.tar.bz2
libgav1-7fee0fd1b17b4f963fa1db74c3a5fcc3ff142e0d.zip
9 files changed, 116 insertions, 12 deletions
diff --git a/src/dsp/x86/common_avx2_test.cc b/src/dsp/x86/common_avx2_test.cc
index 2062683..4b294b0 100644
--- a/src/dsp/x86/common_avx2_test.cc
+++ b/src/dsp/x86/common_avx2_test.cc
@@ -12,26 +12,27 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-#include "src/dsp/x86/common_avx2.h"
+#include "src/dsp/x86/common_avx2_test.h"
 
 #include "gtest/gtest.h"
+#include "src/utils/cpu.h"
 
 #if LIBGAV1_TARGETING_AVX2
 
 #include <cstdint>
 
+#include "src/dsp/x86/common_avx2.h"
 #include "src/utils/common.h"
 
 namespace libgav1 {
 namespace dsp {
-namespace {
 
 // Show that RightShiftWithRounding_S16() is equal to
 // RightShiftWithRounding() only for values less than or equal to
 // INT16_MAX - ((1 << bits) >> 1). In particular, if bits == 16, then
 // RightShiftWithRounding_S16() is equal to RightShiftWithRounding() only for
 // negative values.
-TEST(CommonDspTest, AVX2RightShiftWithRoundingS16) {
+void AVX2RightShiftWithRoundingS16Test() {
   for (int bits = 0; bits < 16; ++bits) {
     const int bias = (1 << bits) >> 1;
     for (int32_t value = INT16_MIN; value <= INT16_MAX; ++value) {
@@ -53,15 +54,20 @@ TEST(CommonDspTest, AVX2RightShiftWithRoundingS16) {
   }
 }
 
-}  // namespace
 }  // namespace dsp
 }  // namespace libgav1
 
 #else  // !LIBGAV1_TARGETING_AVX2
 
-TEST(CommonDspTest, AVX2) {
+namespace libgav1 {
+namespace dsp {
+
+void AVX2RightShiftWithRoundingS16Test() {
   GTEST_SKIP() << "Build this module for x86(-64) with AVX2 enabled to enable "
                   "the tests.";
 }
 
+}  // namespace dsp
+}  // namespace libgav1
+
 #endif  // LIBGAV1_TARGETING_AVX2
diff --git a/src/dsp/x86/common_avx2_test.h b/src/dsp/x86/common_avx2_test.h
new file mode 100644
index 0000000..1124f7f
--- /dev/null
+++ b/src/dsp/x86/common_avx2_test.h
@@ -0,0 +1,26 @@
+// Copyright 2023 The libgav1 Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef LIBGAV1_SRC_DSP_X86_COMMON_AVX2_TEST_H_
+#define LIBGAV1_SRC_DSP_X86_COMMON_AVX2_TEST_H_
+
+namespace libgav1 {
+namespace dsp {
+
+void AVX2RightShiftWithRoundingS16Test();
+
+}  // namespace dsp
+}  // namespace libgav1
+
+#endif  // LIBGAV1_SRC_DSP_X86_COMMON_AVX2_TEST_H_
diff --git a/src/dsp/x86/common_sse4_test.cc b/src/dsp/x86/common_sse4_test.cc
index 3288cfc..592630c 100644
--- a/src/dsp/x86/common_sse4_test.cc
+++ b/src/dsp/x86/common_sse4_test.cc
@@ -12,26 +12,27 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-#include "src/dsp/x86/common_sse4.h"
+#include "src/dsp/x86/common_sse4_test.h"
 
 #include "gtest/gtest.h"
+#include "src/utils/cpu.h"
 
 #if LIBGAV1_TARGETING_SSE4_1
 
 #include <cstdint>
 
+#include "src/dsp/x86/common_sse4.h"
 #include "src/utils/common.h"
 
 namespace libgav1 {
 namespace dsp {
-namespace {
 
 // Show that RightShiftWithRounding_S16() is equal to
 // RightShiftWithRounding() only for values less than or equal to
 // INT16_MAX - ((1 << bits) >> 1). In particular, if bits == 16, then
 // RightShiftWithRounding_S16() is equal to RightShiftWithRounding() only for
 // negative values.
-TEST(CommonDspTest, SSE41RightShiftWithRoundingS16) {
+void SSE41RightShiftWithRoundingS16Test() {
   for (int bits = 0; bits < 16; ++bits) {
     const int bias = (1 << bits) >> 1;
     for (int32_t value = INT16_MIN; value <= INT16_MAX; ++value) {
@@ -50,15 +51,20 @@ TEST(CommonDspTest, SSE41RightShiftWithRoundingS16) {
   }
 }
 
-}  // namespace
 }  // namespace dsp
 }  // namespace libgav1
 
 #else  // !LIBGAV1_TARGETING_SSE4_1
 
-TEST(CommonDspTest, SSE41) {
+namespace libgav1 {
+namespace dsp {
+
+void SSE41RightShiftWithRoundingS16Test() {
   GTEST_SKIP() << "Build this module for x86(-64) with SSE4 enabled to enable "
                   "the tests.";
 }
 
+}  // namespace dsp
+}  // namespace libgav1
+
 #endif  // LIBGAV1_TARGETING_SSE4_1
diff --git a/src/dsp/x86/common_sse4_test.h b/src/dsp/x86/common_sse4_test.h
new file mode 100644
index 0000000..169439a
--- /dev/null
+++ b/src/dsp/x86/common_sse4_test.h
@@ -0,0 +1,26 @@
+// Copyright 2023 The libgav1 Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef LIBGAV1_SRC_DSP_X86_COMMON_SSE4_TEST_H_
+#define LIBGAV1_SRC_DSP_X86_COMMON_SSE4_TEST_H_
+
+namespace libgav1 {
+namespace dsp {
+
+void SSE41RightShiftWithRoundingS16Test();
+
+}  // namespace dsp
+}  // namespace libgav1
+
+#endif  // LIBGAV1_SRC_DSP_X86_COMMON_SSE4_TEST_H_
diff --git a/src/dsp/x86/convolve_avx2.cc b/src/dsp/x86/convolve_avx2.cc
index 6e94347..ff51aee 100644
--- a/src/dsp/x86/convolve_avx2.cc
+++ b/src/dsp/x86/convolve_avx2.cc
@@ -27,6 +27,7 @@
 #include "src/dsp/dsp.h"
 #include "src/dsp/x86/common_avx2.h"
 #include "src/utils/common.h"
+#include "src/utils/compiler_attributes.h"
 #include "src/utils/constants.h"
 
 namespace libgav1 {
@@ -607,6 +608,10 @@ void Convolve2D_AVX2(const void* LIBGAV1_RESTRICT const reference,
   alignas(32) uint16_t
       intermediate_result[kMaxSuperBlockSizeInPixels *
                           (kMaxSuperBlockSizeInPixels + kSubPixelTaps - 1)];
+#if LIBGAV1_MSAN
+  // Quiet msan warnings. Set with random non-zero value to aid in debugging.
+  memset(intermediate_result, 0x33, sizeof(intermediate_result));
+#endif
   const int intermediate_height = height + vertical_taps - 1;
 
   const ptrdiff_t src_stride = reference_stride;
@@ -1374,6 +1379,10 @@ void ConvolveCompound2D_AVX2(
   alignas(32) uint16_t
       intermediate_result[kMaxSuperBlockSizeInPixels *
                           (kMaxSuperBlockSizeInPixels + kSubPixelTaps - 1)];
+#if LIBGAV1_MSAN
+  // Quiet msan warnings. Set with random non-zero value to aid in debugging.
+  memset(intermediate_result, 0x33, sizeof(intermediate_result));
+#endif
   const int intermediate_height = height + vertical_taps - 1;
 
   const ptrdiff_t src_stride = reference_stride;
diff --git a/src/dsp/x86/convolve_sse4.cc b/src/dsp/x86/convolve_sse4.cc
index f427c4c..99b87d6 100644
--- a/src/dsp/x86/convolve_sse4.cc
+++ b/src/dsp/x86/convolve_sse4.cc
@@ -28,6 +28,7 @@
 #include "src/dsp/dsp.h"
 #include "src/dsp/x86/common_sse4.h"
 #include "src/utils/common.h"
+#include "src/utils/compiler_attributes.h"
 
 namespace libgav1 {
 namespace dsp {
@@ -254,6 +255,10 @@ void Convolve2D_SSE4_1(const void* LIBGAV1_RESTRICT const reference,
   alignas(16) uint16_t
       intermediate_result[kMaxSuperBlockSizeInPixels *
                           (kMaxSuperBlockSizeInPixels + kSubPixelTaps - 1)];
+#if LIBGAV1_MSAN
+  // Quiet msan warnings. Set with random non-zero value to aid in debugging.
+  memset(intermediate_result, 0x33, sizeof(intermediate_result));
+#endif
   const int intermediate_height = height + vertical_taps - 1;
 
   const ptrdiff_t src_stride = reference_stride;
@@ -617,6 +622,10 @@ void ConvolveCompound2D_SSE4_1(
   alignas(16) uint16_t
       intermediate_result[kMaxSuperBlockSizeInPixels *
                           (kMaxSuperBlockSizeInPixels + kSubPixelTaps - 1)];
+#if LIBGAV1_MSAN
+  // Quiet msan warnings. Set with random non-zero value to aid in debugging.
+  memset(intermediate_result, 0x33, sizeof(intermediate_result));
+#endif
 
   // Horizontal filter.
   // Filter types used for width <= 4 are different from those for width > 4.
@@ -1157,6 +1166,10 @@ void ConvolveScale2D_SSE4_1(const void* LIBGAV1_RESTRICT const reference,
   alignas(16) int16_t
       intermediate_result[kIntermediateAllocWidth *
                           (2 * kIntermediateAllocWidth + kSubPixelTaps)];
+#if LIBGAV1_MSAN
+  // Quiet msan warnings. Set with random non-zero value to aid in debugging.
+  memset(intermediate_result, 0x44, sizeof(intermediate_result));
+#endif
   const int num_vert_taps = dsp::GetNumTapsInFilter(vert_filter_index);
   const int intermediate_height =
       (((height - 1) * step_y + (1 << kScaleSubPixelBits) - 1) >>
diff --git a/src/dsp/x86/intrapred_directional_sse4.cc b/src/dsp/x86/intrapred_directional_sse4.cc
index bc61745..2e64d21 100644
--- a/src/dsp/x86/intrapred_directional_sse4.cc
+++ b/src/dsp/x86/intrapred_directional_sse4.cc
@@ -1023,6 +1023,10 @@ void DirectionalIntraPredictorZone2_SSE4_1(void* const dest, ptrdiff_t stride,
   uint8_t left_buffer[288];
   memcpy(top_buffer + 128, static_cast<const uint8_t*>(top_row) - 16, 160);
   memcpy(left_buffer + 128, static_cast<const uint8_t*>(left_column) - 16, 160);
+#if LIBGAV1_MSAN
+  memset(top_buffer, 0x33, 128);
+  memset(left_buffer, 0x44, 128);
+#endif
   const uint8_t* top_ptr = top_buffer + 144;
   const uint8_t* left_ptr = left_buffer + 144;
   if (width == 4 || height == 4) {
diff --git a/src/dsp/x86/loop_restoration_10bit_sse4.cc b/src/dsp/x86/loop_restoration_10bit_sse4.cc
index 6625d51..029e168 100644
--- a/src/dsp/x86/loop_restoration_10bit_sse4.cc
+++ b/src/dsp/x86/loop_restoration_10bit_sse4.cc
@@ -1079,7 +1079,14 @@ inline void LookupIntermediate(const __m128i sum, const __m128i index,
   // general-purpose register to process. Faster than using _mm_extract_epi8().
   uint8_t temp[8];
   StoreLo8(temp, idx);
-  *ma = _mm_insert_epi8(*ma, kSgrMaLookup[temp[0]], offset + 0);
+  // offset == 0 is assumed to be the first call to this function. The value is
+  // mov'd to avoid -Wuninitialized warnings under gcc. mov should at least
+  // equivalent if not faster than pinsrb.
+  if (offset == 0) {
+    *ma = _mm_cvtsi32_si128(kSgrMaLookup[temp[0]]);
+  } else {
+    *ma = _mm_insert_epi8(*ma, kSgrMaLookup[temp[0]], offset + 0);
+  }
   *ma = _mm_insert_epi8(*ma, kSgrMaLookup[temp[1]], offset + 1);
   *ma = _mm_insert_epi8(*ma, kSgrMaLookup[temp[2]], offset + 2);
   *ma = _mm_insert_epi8(*ma, kSgrMaLookup[temp[3]], offset + 3);
diff --git a/src/dsp/x86/loop_restoration_sse4.cc b/src/dsp/x86/loop_restoration_sse4.cc
index b4df072..8c24c39 100644
--- a/src/dsp/x86/loop_restoration_sse4.cc
+++ b/src/dsp/x86/loop_restoration_sse4.cc
@@ -1222,7 +1222,14 @@ inline void LookupIntermediate(const __m128i sum, const __m128i index,
   // general-purpose register to process. Faster than using _mm_extract_epi8().
   uint8_t temp[8];
   StoreLo8(temp, idx);
-  *ma = _mm_insert_epi8(*ma, kSgrMaLookup[temp[0]], offset + 0);
+  // offset == 0 is assumed to be the first call to this function. The value is
+  // mov'd to avoid -Wuninitialized warnings under gcc. mov should at least
+  // equivalent if not faster than pinsrb.
+  if (offset == 0) {
+    *ma = _mm_cvtsi32_si128(kSgrMaLookup[temp[0]]);
+  } else {
+    *ma = _mm_insert_epi8(*ma, kSgrMaLookup[temp[0]], offset + 0);
+  }
   *ma = _mm_insert_epi8(*ma, kSgrMaLookup[temp[1]], offset + 1);
   *ma = _mm_insert_epi8(*ma, kSgrMaLookup[temp[2]], offset + 2);
   *ma = _mm_insert_epi8(*ma, kSgrMaLookup[temp[3]], offset + 3);
author	Boyuan Yang <byang@debian.org>	2023-11-27 22:46:32 -0500
committer	Boyuan Yang <byang@debian.org>	2023-11-27 22:46:32 -0500
commit	7fee0fd1b17b4f963fa1db74c3a5fcc3ff142e0d (patch)
tree	4ed468528001d8e80e3be09413ae927ca2ac05ce /src/dsp/x86
parent	0d1e75e423265689dd49c7d6023d8bba70ca4d05 (diff)
parent	19564cb4f77660cdb2f980ca619d4b979b9fe342 (diff)
download	libgav1-7fee0fd1b17b4f963fa1db74c3a5fcc3ff142e0d.tar.gz libgav1-7fee0fd1b17b4f963fa1db74c3a5fcc3ff142e0d.tar.bz2 libgav1-7fee0fd1b17b4f963fa1db74c3a5fcc3ff142e0d.zip