diff options
Diffstat (limited to 'src/dsp/x86/loop_restoration_10bit_sse4.cc')
-rw-r--r-- | src/dsp/x86/loop_restoration_10bit_sse4.cc | 9 |
1 files changed, 8 insertions, 1 deletions
diff --git a/src/dsp/x86/loop_restoration_10bit_sse4.cc b/src/dsp/x86/loop_restoration_10bit_sse4.cc index 6625d51..029e168 100644 --- a/src/dsp/x86/loop_restoration_10bit_sse4.cc +++ b/src/dsp/x86/loop_restoration_10bit_sse4.cc @@ -1079,7 +1079,14 @@ inline void LookupIntermediate(const __m128i sum, const __m128i index, // general-purpose register to process. Faster than using _mm_extract_epi8(). uint8_t temp[8]; StoreLo8(temp, idx); - *ma = _mm_insert_epi8(*ma, kSgrMaLookup[temp[0]], offset + 0); + // offset == 0 is assumed to be the first call to this function. The value is + // mov'd to avoid -Wuninitialized warnings under gcc. mov should at least + // equivalent if not faster than pinsrb. + if (offset == 0) { + *ma = _mm_cvtsi32_si128(kSgrMaLookup[temp[0]]); + } else { + *ma = _mm_insert_epi8(*ma, kSgrMaLookup[temp[0]], offset + 0); + } *ma = _mm_insert_epi8(*ma, kSgrMaLookup[temp[1]], offset + 1); *ma = _mm_insert_epi8(*ma, kSgrMaLookup[temp[2]], offset + 2); *ma = _mm_insert_epi8(*ma, kSgrMaLookup[temp[3]], offset + 3); |