aboutsummaryrefslogtreecommitdiff
path: root/src/dsp/arm/inverse_transform_neon.cc
diff options
context:
space:
mode:
Diffstat (limited to 'src/dsp/arm/inverse_transform_neon.cc')
-rw-r--r--src/dsp/arm/inverse_transform_neon.cc235
1 files changed, 159 insertions, 76 deletions
diff --git a/src/dsp/arm/inverse_transform_neon.cc b/src/dsp/arm/inverse_transform_neon.cc
index 315d5e9..1c2e111 100644
--- a/src/dsp/arm/inverse_transform_neon.cc
+++ b/src/dsp/arm/inverse_transform_neon.cc
@@ -273,7 +273,8 @@ LIBGAV1_ALWAYS_INLINE void Transpose8x4To4x8(const int16x8_t in[4],
//------------------------------------------------------------------------------
template <int store_width, int store_count>
-LIBGAV1_ALWAYS_INLINE void StoreDst(int16_t* dst, int32_t stride, int32_t idx,
+LIBGAV1_ALWAYS_INLINE void StoreDst(int16_t* LIBGAV1_RESTRICT dst,
+ int32_t stride, int32_t idx,
const int16x8_t* const s) {
assert(store_count % 4 == 0);
assert(store_width == 8 || store_width == 16);
@@ -297,8 +298,8 @@ LIBGAV1_ALWAYS_INLINE void StoreDst(int16_t* dst, int32_t stride, int32_t idx,
}
template <int load_width, int load_count>
-LIBGAV1_ALWAYS_INLINE void LoadSrc(const int16_t* src, int32_t stride,
- int32_t idx, int16x8_t* x) {
+LIBGAV1_ALWAYS_INLINE void LoadSrc(const int16_t* LIBGAV1_RESTRICT src,
+ int32_t stride, int32_t idx, int16x8_t* x) {
assert(load_count % 4 == 0);
assert(load_width == 8 || load_width == 16);
// NOTE: It is expected that the compiler will unroll these loops.
@@ -388,6 +389,33 @@ LIBGAV1_ALWAYS_INLINE void ButterflyRotation_FirstIsZero(int16x8_t* a,
int16x8_t* b,
const int angle,
const bool flip) {
+#if defined(__ARM_FEATURE_QRDMX) && defined(__aarch64__) && \
+ defined(__clang__) // ARM v8.1-A
+ // Clang optimizes vqrdmulhq_n_s16 and vqsubq_s16 (in HadamardRotation) into
+ // vqrdmlshq_s16 resulting in an "off by one" error. For now, do not use
+ // vqrdmulhq_n_s16().
+ const int16_t cos128 = Cos128(angle);
+ const int16_t sin128 = Sin128(angle);
+ const int32x4_t x0 = vmull_n_s16(vget_low_s16(*b), -sin128);
+ const int32x4_t y0 = vmull_n_s16(vget_low_s16(*b), cos128);
+ const int16x4_t x1 = vqrshrn_n_s32(x0, 12);
+ const int16x4_t y1 = vqrshrn_n_s32(y0, 12);
+
+ const int32x4_t x0_hi = vmull_n_s16(vget_high_s16(*b), -sin128);
+ const int32x4_t y0_hi = vmull_n_s16(vget_high_s16(*b), cos128);
+ const int16x4_t x1_hi = vqrshrn_n_s32(x0_hi, 12);
+ const int16x4_t y1_hi = vqrshrn_n_s32(y0_hi, 12);
+
+ const int16x8_t x = vcombine_s16(x1, x1_hi);
+ const int16x8_t y = vcombine_s16(y1, y1_hi);
+ if (flip) {
+ *a = y;
+ *b = x;
+ } else {
+ *a = x;
+ *b = y;
+ }
+#else
const int16_t cos128 = Cos128(angle);
const int16_t sin128 = Sin128(angle);
// For this function, the max value returned by Sin128() is 4091, which fits
@@ -403,12 +431,40 @@ LIBGAV1_ALWAYS_INLINE void ButterflyRotation_FirstIsZero(int16x8_t* a,
*a = x;
*b = y;
}
+#endif
}
LIBGAV1_ALWAYS_INLINE void ButterflyRotation_SecondIsZero(int16x8_t* a,
int16x8_t* b,
const int angle,
const bool flip) {
+#if defined(__ARM_FEATURE_QRDMX) && defined(__aarch64__) && \
+ defined(__clang__) // ARM v8.1-A
+ // Clang optimizes vqrdmulhq_n_s16 and vqsubq_s16 (in HadamardRotation) into
+ // vqrdmlshq_s16 resulting in an "off by one" error. For now, do not use
+ // vqrdmulhq_n_s16().
+ const int16_t cos128 = Cos128(angle);
+ const int16_t sin128 = Sin128(angle);
+ const int32x4_t x0 = vmull_n_s16(vget_low_s16(*a), cos128);
+ const int32x4_t y0 = vmull_n_s16(vget_low_s16(*a), sin128);
+ const int16x4_t x1 = vqrshrn_n_s32(x0, 12);
+ const int16x4_t y1 = vqrshrn_n_s32(y0, 12);
+
+ const int32x4_t x0_hi = vmull_n_s16(vget_high_s16(*a), cos128);
+ const int32x4_t y0_hi = vmull_n_s16(vget_high_s16(*a), sin128);
+ const int16x4_t x1_hi = vqrshrn_n_s32(x0_hi, 12);
+ const int16x4_t y1_hi = vqrshrn_n_s32(y0_hi, 12);
+
+ const int16x8_t x = vcombine_s16(x1, x1_hi);
+ const int16x8_t y = vcombine_s16(y1, y1_hi);
+ if (flip) {
+ *a = y;
+ *b = x;
+ } else {
+ *a = x;
+ *b = y;
+ }
+#else
const int16_t cos128 = Cos128(angle);
const int16_t sin128 = Sin128(angle);
const int16x8_t x = vqrdmulhq_n_s16(*a, cos128 << 3);
@@ -420,6 +476,7 @@ LIBGAV1_ALWAYS_INLINE void ButterflyRotation_SecondIsZero(int16x8_t* a,
*a = x;
*b = y;
}
+#endif
}
LIBGAV1_ALWAYS_INLINE void HadamardRotation(int16x8_t* a, int16x8_t* b,
@@ -736,8 +793,8 @@ LIBGAV1_ALWAYS_INLINE void Dct16_NEON(void* dest, int32_t step, bool is_row,
if (is_row) {
const int16x8_t v_row_shift = vdupq_n_s16(-row_shift);
- for (int i = 0; i < 16; ++i) {
- s[i] = vqrshlq_s16(s[i], v_row_shift);
+ for (auto& i : s) {
+ i = vqrshlq_s16(i, v_row_shift);
}
}
@@ -914,8 +971,8 @@ LIBGAV1_ALWAYS_INLINE void Dct32_NEON(void* dest, const int32_t step,
for (int idx = 0; idx < 32; idx += 8) {
int16x8_t output[8];
Transpose8x8(&s[idx], output);
- for (int i = 0; i < 8; ++i) {
- output[i] = vqrshlq_s16(output[i], v_row_shift);
+ for (auto& o : output) {
+ o = vqrshlq_s16(o, v_row_shift);
}
StoreDst<16, 8>(dst, step, idx, output);
}
@@ -1135,8 +1192,8 @@ void Dct64_NEON(void* dest, int32_t step, bool is_row, int row_shift) {
for (int idx = 0; idx < 64; idx += 8) {
int16x8_t output[8];
Transpose8x8(&s[idx], output);
- for (int i = 0; i < 8; ++i) {
- output[i] = vqrshlq_s16(output[i], v_row_shift);
+ for (auto& o : output) {
+ o = vqrshlq_s16(o, v_row_shift);
}
StoreDst<16, 8>(dst, step, idx, output);
}
@@ -1611,13 +1668,13 @@ LIBGAV1_ALWAYS_INLINE void Adst16_NEON(void* dest, int32_t step, bool is_row,
const int16x8_t v_row_shift = vdupq_n_s16(-row_shift);
int16x8_t output[4];
Transpose4x8To8x4(x, output);
- for (int i = 0; i < 4; ++i) {
- output[i] = vqrshlq_s16(output[i], v_row_shift);
+ for (auto& o : output) {
+ o = vqrshlq_s16(o, v_row_shift);
}
StoreDst<16, 4>(dst, step, 0, output);
Transpose4x8To8x4(&x[8], output);
- for (int i = 0; i < 4; ++i) {
- output[i] = vqrshlq_s16(output[i], v_row_shift);
+ for (auto& o : output) {
+ o = vqrshlq_s16(o, v_row_shift);
}
StoreDst<16, 4>(dst, step, 8, output);
} else {
@@ -1629,8 +1686,8 @@ LIBGAV1_ALWAYS_INLINE void Adst16_NEON(void* dest, int32_t step, bool is_row,
for (int idx = 0; idx < 16; idx += 8) {
int16x8_t output[8];
Transpose8x8(&x[idx], output);
- for (int i = 0; i < 8; ++i) {
- output[i] = vqrshlq_s16(output[i], v_row_shift);
+ for (auto& o : output) {
+ o = vqrshlq_s16(o, v_row_shift);
}
StoreDst<16, 8>(dst, step, idx, output);
}
@@ -1805,9 +1862,10 @@ LIBGAV1_ALWAYS_INLINE bool Identity4DcOnly(void* dest, int adjusted_tx_height,
template <int identity_size>
LIBGAV1_ALWAYS_INLINE void IdentityColumnStoreToFrame(
Array2DView<uint8_t> frame, const int start_x, const int start_y,
- const int tx_width, const int tx_height, const int16_t* source) {
+ const int tx_width, const int tx_height,
+ const int16_t* LIBGAV1_RESTRICT source) {
const int stride = frame.columns();
- uint8_t* dst = frame[start_y] + start_x;
+ uint8_t* LIBGAV1_RESTRICT dst = frame[start_y] + start_x;
if (identity_size < 32) {
if (tx_width == 4) {
@@ -1891,9 +1949,10 @@ LIBGAV1_ALWAYS_INLINE void IdentityColumnStoreToFrame(
LIBGAV1_ALWAYS_INLINE void Identity4RowColumnStoreToFrame(
Array2DView<uint8_t> frame, const int start_x, const int start_y,
- const int tx_width, const int tx_height, const int16_t* source) {
+ const int tx_width, const int tx_height,
+ const int16_t* LIBGAV1_RESTRICT source) {
const int stride = frame.columns();
- uint8_t* dst = frame[start_y] + start_x;
+ uint8_t* LIBGAV1_RESTRICT dst = frame[start_y] + start_x;
if (tx_width == 4) {
uint8x8_t frame_data = vdup_n_u8(0);
@@ -2106,8 +2165,9 @@ LIBGAV1_ALWAYS_INLINE void TransposeAndPermute4x4WideInput(
}
// Process 4 wht4 rows and columns.
-LIBGAV1_ALWAYS_INLINE void Wht4_NEON(uint8_t* dst, const int dst_stride,
- const void* source,
+LIBGAV1_ALWAYS_INLINE void Wht4_NEON(uint8_t* LIBGAV1_RESTRICT dst,
+ const int dst_stride,
+ const void* LIBGAV1_RESTRICT source,
const int adjusted_tx_height) {
const auto* const src = static_cast<const int16_t*>(source);
int16x4_t s[4];
@@ -2273,11 +2333,12 @@ LIBGAV1_ALWAYS_INLINE void RowShift(int16_t* source, int num_rows,
template <int tx_height, bool enable_flip_rows = false>
LIBGAV1_ALWAYS_INLINE void StoreToFrameWithRound(
Array2DView<uint8_t> frame, const int start_x, const int start_y,
- const int tx_width, const int16_t* source, TransformType tx_type) {
+ const int tx_width, const int16_t* LIBGAV1_RESTRICT source,
+ TransformType tx_type) {
const bool flip_rows =
enable_flip_rows ? kTransformFlipRowsMask.Contains(tx_type) : false;
const int stride = frame.columns();
- uint8_t* dst = frame[start_y] + start_x;
+ uint8_t* LIBGAV1_RESTRICT dst = frame[start_y] + start_x;
// Enable for 4x4, 4x8, 4x16
if (tx_height < 32 && tx_width == 4) {
@@ -2338,7 +2399,7 @@ void Dct4TransformLoopRow_NEON(TransformType /*tx_type*/, TransformSize tx_size,
auto* src = static_cast<int16_t*>(src_buffer);
const int tx_height = kTransformHeight[tx_size];
const bool should_round = (tx_height == 8);
- const int row_shift = (tx_height == 16);
+ const int row_shift = static_cast<int>(tx_height == 16);
if (DctDcOnly<4>(src, adjusted_tx_height, should_round, row_shift)) {
return;
@@ -2368,8 +2429,10 @@ void Dct4TransformLoopRow_NEON(TransformType /*tx_type*/, TransformSize tx_size,
}
void Dct4TransformLoopColumn_NEON(TransformType tx_type, TransformSize tx_size,
- int adjusted_tx_height, void* src_buffer,
- int start_x, int start_y, void* dst_frame) {
+ int adjusted_tx_height,
+ void* LIBGAV1_RESTRICT src_buffer,
+ int start_x, int start_y,
+ void* LIBGAV1_RESTRICT dst_frame) {
auto* src = static_cast<int16_t*>(src_buffer);
const int tx_width = kTransformWidth[tx_size];
@@ -2435,8 +2498,10 @@ void Dct8TransformLoopRow_NEON(TransformType /*tx_type*/, TransformSize tx_size,
}
void Dct8TransformLoopColumn_NEON(TransformType tx_type, TransformSize tx_size,
- int adjusted_tx_height, void* src_buffer,
- int start_x, int start_y, void* dst_frame) {
+ int adjusted_tx_height,
+ void* LIBGAV1_RESTRICT src_buffer,
+ int start_x, int start_y,
+ void* LIBGAV1_RESTRICT dst_frame) {
auto* src = static_cast<int16_t*>(src_buffer);
const int tx_width = kTransformWidth[tx_size];
@@ -2497,8 +2562,10 @@ void Dct16TransformLoopRow_NEON(TransformType /*tx_type*/,
}
void Dct16TransformLoopColumn_NEON(TransformType tx_type, TransformSize tx_size,
- int adjusted_tx_height, void* src_buffer,
- int start_x, int start_y, void* dst_frame) {
+ int adjusted_tx_height,
+ void* LIBGAV1_RESTRICT src_buffer,
+ int start_x, int start_y,
+ void* LIBGAV1_RESTRICT dst_frame) {
auto* src = static_cast<int16_t*>(src_buffer);
const int tx_width = kTransformWidth[tx_size];
@@ -2551,8 +2618,10 @@ void Dct32TransformLoopRow_NEON(TransformType /*tx_type*/,
}
void Dct32TransformLoopColumn_NEON(TransformType tx_type, TransformSize tx_size,
- int adjusted_tx_height, void* src_buffer,
- int start_x, int start_y, void* dst_frame) {
+ int adjusted_tx_height,
+ void* LIBGAV1_RESTRICT src_buffer,
+ int start_x, int start_y,
+ void* LIBGAV1_RESTRICT dst_frame) {
auto* src = static_cast<int16_t*>(src_buffer);
const int tx_width = kTransformWidth[tx_size];
@@ -2594,8 +2663,10 @@ void Dct64TransformLoopRow_NEON(TransformType /*tx_type*/,
}
void Dct64TransformLoopColumn_NEON(TransformType tx_type, TransformSize tx_size,
- int adjusted_tx_height, void* src_buffer,
- int start_x, int start_y, void* dst_frame) {
+ int adjusted_tx_height,
+ void* LIBGAV1_RESTRICT src_buffer,
+ int start_x, int start_y,
+ void* LIBGAV1_RESTRICT dst_frame) {
auto* src = static_cast<int16_t*>(src_buffer);
const int tx_width = kTransformWidth[tx_size];
@@ -2645,8 +2716,10 @@ void Adst4TransformLoopRow_NEON(TransformType /*tx_type*/,
}
void Adst4TransformLoopColumn_NEON(TransformType tx_type, TransformSize tx_size,
- int adjusted_tx_height, void* src_buffer,
- int start_x, int start_y, void* dst_frame) {
+ int adjusted_tx_height,
+ void* LIBGAV1_RESTRICT src_buffer,
+ int start_x, int start_y,
+ void* LIBGAV1_RESTRICT dst_frame) {
auto* src = static_cast<int16_t*>(src_buffer);
const int tx_width = kTransformWidth[tx_size];
@@ -2707,8 +2780,10 @@ void Adst8TransformLoopRow_NEON(TransformType /*tx_type*/,
}
void Adst8TransformLoopColumn_NEON(TransformType tx_type, TransformSize tx_size,
- int adjusted_tx_height, void* src_buffer,
- int start_x, int start_y, void* dst_frame) {
+ int adjusted_tx_height,
+ void* LIBGAV1_RESTRICT src_buffer,
+ int start_x, int start_y,
+ void* LIBGAV1_RESTRICT dst_frame) {
auto* src = static_cast<int16_t*>(src_buffer);
const int tx_width = kTransformWidth[tx_size];
@@ -2771,8 +2846,10 @@ void Adst16TransformLoopRow_NEON(TransformType /*tx_type*/,
void Adst16TransformLoopColumn_NEON(TransformType tx_type,
TransformSize tx_size,
- int adjusted_tx_height, void* src_buffer,
- int start_x, int start_y, void* dst_frame) {
+ int adjusted_tx_height,
+ void* LIBGAV1_RESTRICT src_buffer,
+ int start_x, int start_y,
+ void* LIBGAV1_RESTRICT dst_frame) {
auto* src = static_cast<int16_t*>(src_buffer);
const int tx_width = kTransformWidth[tx_size];
@@ -2844,9 +2921,10 @@ void Identity4TransformLoopRow_NEON(TransformType tx_type,
void Identity4TransformLoopColumn_NEON(TransformType tx_type,
TransformSize tx_size,
- int adjusted_tx_height, void* src_buffer,
+ int adjusted_tx_height,
+ void* LIBGAV1_RESTRICT src_buffer,
int start_x, int start_y,
- void* dst_frame) {
+ void* LIBGAV1_RESTRICT dst_frame) {
auto& frame = *static_cast<Array2DView<uint8_t>*>(dst_frame);
auto* src = static_cast<int16_t*>(src_buffer);
const int tx_width = kTransformWidth[tx_size];
@@ -2919,9 +2997,10 @@ void Identity8TransformLoopRow_NEON(TransformType tx_type,
void Identity8TransformLoopColumn_NEON(TransformType tx_type,
TransformSize tx_size,
- int adjusted_tx_height, void* src_buffer,
+ int adjusted_tx_height,
+ void* LIBGAV1_RESTRICT src_buffer,
int start_x, int start_y,
- void* dst_frame) {
+ void* LIBGAV1_RESTRICT dst_frame) {
auto* src = static_cast<int16_t*>(src_buffer);
const int tx_width = kTransformWidth[tx_size];
@@ -2960,8 +3039,9 @@ void Identity16TransformLoopRow_NEON(TransformType /*tx_type*/,
void Identity16TransformLoopColumn_NEON(TransformType tx_type,
TransformSize tx_size,
int adjusted_tx_height,
- void* src_buffer, int start_x,
- int start_y, void* dst_frame) {
+ void* LIBGAV1_RESTRICT src_buffer,
+ int start_x, int start_y,
+ void* LIBGAV1_RESTRICT dst_frame) {
auto* src = static_cast<int16_t*>(src_buffer);
const int tx_width = kTransformWidth[tx_size];
@@ -3007,8 +3087,9 @@ void Identity32TransformLoopRow_NEON(TransformType /*tx_type*/,
void Identity32TransformLoopColumn_NEON(TransformType /*tx_type*/,
TransformSize tx_size,
int adjusted_tx_height,
- void* src_buffer, int start_x,
- int start_y, void* dst_frame) {
+ void* LIBGAV1_RESTRICT src_buffer,
+ int start_x, int start_y,
+ void* LIBGAV1_RESTRICT dst_frame) {
auto& frame = *static_cast<Array2DView<uint8_t>*>(dst_frame);
auto* src = static_cast<int16_t*>(src_buffer);
const int tx_width = kTransformWidth[tx_size];
@@ -3029,8 +3110,10 @@ void Wht4TransformLoopRow_NEON(TransformType tx_type, TransformSize tx_size,
}
void Wht4TransformLoopColumn_NEON(TransformType tx_type, TransformSize tx_size,
- int adjusted_tx_height, void* src_buffer,
- int start_x, int start_y, void* dst_frame) {
+ int adjusted_tx_height,
+ void* LIBGAV1_RESTRICT src_buffer,
+ int start_x, int start_y,
+ void* LIBGAV1_RESTRICT dst_frame) {
assert(tx_type == kTransformTypeDctDct);
assert(tx_size == kTransformSize4x4);
static_cast<void>(tx_type);
@@ -3050,63 +3133,63 @@ void Init8bpp() {
Dsp* const dsp = dsp_internal::GetWritableDspTable(kBitdepth8);
assert(dsp != nullptr);
// Maximum transform size for Dct is 64.
- dsp->inverse_transforms[k1DTransformDct][k1DTransformSize4][kRow] =
+ dsp->inverse_transforms[kTransform1dDct][kTransform1dSize4][kRow] =
Dct4TransformLoopRow_NEON;
- dsp->inverse_transforms[k1DTransformDct][k1DTransformSize4][kColumn] =
+ dsp->inverse_transforms[kTransform1dDct][kTransform1dSize4][kColumn] =
Dct4TransformLoopColumn_NEON;
- dsp->inverse_transforms[k1DTransformDct][k1DTransformSize8][kRow] =
+ dsp->inverse_transforms[kTransform1dDct][kTransform1dSize8][kRow] =
Dct8TransformLoopRow_NEON;
- dsp->inverse_transforms[k1DTransformDct][k1DTransformSize8][kColumn] =
+ dsp->inverse_transforms[kTransform1dDct][kTransform1dSize8][kColumn] =
Dct8TransformLoopColumn_NEON;
- dsp->inverse_transforms[k1DTransformDct][k1DTransformSize16][kRow] =
+ dsp->inverse_transforms[kTransform1dDct][kTransform1dSize16][kRow] =
Dct16TransformLoopRow_NEON;
- dsp->inverse_transforms[k1DTransformDct][k1DTransformSize16][kColumn] =
+ dsp->inverse_transforms[kTransform1dDct][kTransform1dSize16][kColumn] =
Dct16TransformLoopColumn_NEON;
- dsp->inverse_transforms[k1DTransformDct][k1DTransformSize32][kRow] =
+ dsp->inverse_transforms[kTransform1dDct][kTransform1dSize32][kRow] =
Dct32TransformLoopRow_NEON;
- dsp->inverse_transforms[k1DTransformDct][k1DTransformSize32][kColumn] =
+ dsp->inverse_transforms[kTransform1dDct][kTransform1dSize32][kColumn] =
Dct32TransformLoopColumn_NEON;
- dsp->inverse_transforms[k1DTransformDct][k1DTransformSize64][kRow] =
+ dsp->inverse_transforms[kTransform1dDct][kTransform1dSize64][kRow] =
Dct64TransformLoopRow_NEON;
- dsp->inverse_transforms[k1DTransformDct][k1DTransformSize64][kColumn] =
+ dsp->inverse_transforms[kTransform1dDct][kTransform1dSize64][kColumn] =
Dct64TransformLoopColumn_NEON;
// Maximum transform size for Adst is 16.
- dsp->inverse_transforms[k1DTransformAdst][k1DTransformSize4][kRow] =
+ dsp->inverse_transforms[kTransform1dAdst][kTransform1dSize4][kRow] =
Adst4TransformLoopRow_NEON;
- dsp->inverse_transforms[k1DTransformAdst][k1DTransformSize4][kColumn] =
+ dsp->inverse_transforms[kTransform1dAdst][kTransform1dSize4][kColumn] =
Adst4TransformLoopColumn_NEON;
- dsp->inverse_transforms[k1DTransformAdst][k1DTransformSize8][kRow] =
+ dsp->inverse_transforms[kTransform1dAdst][kTransform1dSize8][kRow] =
Adst8TransformLoopRow_NEON;
- dsp->inverse_transforms[k1DTransformAdst][k1DTransformSize8][kColumn] =
+ dsp->inverse_transforms[kTransform1dAdst][kTransform1dSize8][kColumn] =
Adst8TransformLoopColumn_NEON;
- dsp->inverse_transforms[k1DTransformAdst][k1DTransformSize16][kRow] =
+ dsp->inverse_transforms[kTransform1dAdst][kTransform1dSize16][kRow] =
Adst16TransformLoopRow_NEON;
- dsp->inverse_transforms[k1DTransformAdst][k1DTransformSize16][kColumn] =
+ dsp->inverse_transforms[kTransform1dAdst][kTransform1dSize16][kColumn] =
Adst16TransformLoopColumn_NEON;
// Maximum transform size for Identity transform is 32.
- dsp->inverse_transforms[k1DTransformIdentity][k1DTransformSize4][kRow] =
+ dsp->inverse_transforms[kTransform1dIdentity][kTransform1dSize4][kRow] =
Identity4TransformLoopRow_NEON;
- dsp->inverse_transforms[k1DTransformIdentity][k1DTransformSize4][kColumn] =
+ dsp->inverse_transforms[kTransform1dIdentity][kTransform1dSize4][kColumn] =
Identity4TransformLoopColumn_NEON;
- dsp->inverse_transforms[k1DTransformIdentity][k1DTransformSize8][kRow] =
+ dsp->inverse_transforms[kTransform1dIdentity][kTransform1dSize8][kRow] =
Identity8TransformLoopRow_NEON;
- dsp->inverse_transforms[k1DTransformIdentity][k1DTransformSize8][kColumn] =
+ dsp->inverse_transforms[kTransform1dIdentity][kTransform1dSize8][kColumn] =
Identity8TransformLoopColumn_NEON;
- dsp->inverse_transforms[k1DTransformIdentity][k1DTransformSize16][kRow] =
+ dsp->inverse_transforms[kTransform1dIdentity][kTransform1dSize16][kRow] =
Identity16TransformLoopRow_NEON;
- dsp->inverse_transforms[k1DTransformIdentity][k1DTransformSize16][kColumn] =
+ dsp->inverse_transforms[kTransform1dIdentity][kTransform1dSize16][kColumn] =
Identity16TransformLoopColumn_NEON;
- dsp->inverse_transforms[k1DTransformIdentity][k1DTransformSize32][kRow] =
+ dsp->inverse_transforms[kTransform1dIdentity][kTransform1dSize32][kRow] =
Identity32TransformLoopRow_NEON;
- dsp->inverse_transforms[k1DTransformIdentity][k1DTransformSize32][kColumn] =
+ dsp->inverse_transforms[kTransform1dIdentity][kTransform1dSize32][kColumn] =
Identity32TransformLoopColumn_NEON;
// Maximum transform size for Wht is 4.
- dsp->inverse_transforms[k1DTransformWht][k1DTransformSize4][kRow] =
+ dsp->inverse_transforms[kTransform1dWht][kTransform1dSize4][kRow] =
Wht4TransformLoopRow_NEON;
- dsp->inverse_transforms[k1DTransformWht][k1DTransformSize4][kColumn] =
+ dsp->inverse_transforms[kTransform1dWht][kTransform1dSize4][kColumn] =
Wht4TransformLoopColumn_NEON;
}