diff options
author | Boyuan Yang <byang@debian.org> | 2022-07-14 15:56:59 -0400 |
---|---|---|
committer | Boyuan Yang <byang@debian.org> | 2022-07-14 15:56:59 -0400 |
commit | 1a2e17bd28a068714658551c8c355171ce15dfa0 (patch) | |
tree | db9e739007016850ee355365874a20b07034ef2c /src/dsp/arm/inverse_transform_10bit_neon.cc | |
parent | a08da9600832caf817125edee2c3206fe24cd5cb (diff) | |
parent | d4dbf19f6b0181ee78034bfe4caf189d1c016998 (diff) | |
download | libgav1-1a2e17bd28a068714658551c8c355171ce15dfa0.tar.gz libgav1-1a2e17bd28a068714658551c8c355171ce15dfa0.tar.bz2 libgav1-1a2e17bd28a068714658551c8c355171ce15dfa0.zip |
Update upstream source from tag 'upstream/0.18.0'
Update to upstream version '0.18.0'
with Debian dir a69c1f7f3e7109393a3f9f5f1a2e7a5c3d3eda9f
Diffstat (limited to 'src/dsp/arm/inverse_transform_10bit_neon.cc')
-rw-r--r-- | src/dsp/arm/inverse_transform_10bit_neon.cc | 28 |
1 files changed, 20 insertions, 8 deletions
diff --git a/src/dsp/arm/inverse_transform_10bit_neon.cc b/src/dsp/arm/inverse_transform_10bit_neon.cc index 617accc..e6f0d9d 100644 --- a/src/dsp/arm/inverse_transform_10bit_neon.cc +++ b/src/dsp/arm/inverse_transform_10bit_neon.cc @@ -282,9 +282,12 @@ LIBGAV1_ALWAYS_INLINE void Dct4_NEON(void* dest, int32_t step, bool is_row, const int32x4_t max = vdupq_n_s32((1 << range) - 1); int32x4_t s[4], x[4]; - LoadSrc<4>(dst, step, 0, x); if (is_row) { - Transpose4x4(x, x); + assert(step == 4); + int32x4x4_t y = vld4q_s32(dst); + for (int i = 0; i < 4; ++i) x[i] = y.val[i]; + } else { + LoadSrc<4>(dst, step, 0, x); } // stage 1. @@ -301,9 +304,12 @@ LIBGAV1_ALWAYS_INLINE void Dct4_NEON(void* dest, int32_t step, bool is_row, for (auto& i : s) { i = vmovl_s16(vqmovn_s32(vqrshlq_s32(i, v_row_shift))); } - Transpose4x4(s, s); + int32x4x4_t y; + for (int i = 0; i < 4; ++i) y.val[i] = s[i]; + vst4q_s32(dst, y); + } else { + StoreDst<4>(dst, step, 0, s); } - StoreDst<4>(dst, step, 0, s); } template <ButterflyRotationFunc butterfly_rotation, @@ -937,9 +943,12 @@ LIBGAV1_ALWAYS_INLINE void Adst4_NEON(void* dest, int32_t step, bool is_row, int32x4_t s[8]; int32x4_t x[4]; - LoadSrc<4>(dst, step, 0, x); if (is_row) { - Transpose4x4(x, x); + assert(step == 4); + int32x4x4_t y = vld4q_s32(dst); + for (int i = 0; i < 4; ++i) x[i] = y.val[i]; + } else { + LoadSrc<4>(dst, step, 0, x); } // stage 1. @@ -981,9 +990,12 @@ LIBGAV1_ALWAYS_INLINE void Adst4_NEON(void* dest, int32_t step, bool is_row, x[1] = vmovl_s16(vqmovn_s32(vqrshlq_s32(x[1], v_row_shift))); x[2] = vmovl_s16(vqmovn_s32(vqrshlq_s32(x[2], v_row_shift))); x[3] = vmovl_s16(vqmovn_s32(vqrshlq_s32(x[3], v_row_shift))); - Transpose4x4(x, x); + int32x4x4_t y; + for (int i = 0; i < 4; ++i) y.val[i] = x[i]; + vst4q_s32(dst, y); + } else { + StoreDst<4>(dst, step, 0, x); } - StoreDst<4>(dst, step, 0, x); } alignas(16) constexpr int32_t kAdst4DcOnlyMultiplier[4] = {1321, 2482, 3344, |