aboutsummaryrefslogtreecommitdiff
path: root/packages/gcc/14.2.0/0011-aarch64-Fix-invalid-nested-subregs-PR115464.patch
diff options
context:
space:
mode:
Diffstat (limited to 'packages/gcc/14.2.0/0011-aarch64-Fix-invalid-nested-subregs-PR115464.patch')
-rw-r--r--packages/gcc/14.2.0/0011-aarch64-Fix-invalid-nested-subregs-PR115464.patch139
1 files changed, 139 insertions, 0 deletions
diff --git a/packages/gcc/14.2.0/0011-aarch64-Fix-invalid-nested-subregs-PR115464.patch b/packages/gcc/14.2.0/0011-aarch64-Fix-invalid-nested-subregs-PR115464.patch
new file mode 100644
index 00000000..2625c308
--- /dev/null
+++ b/packages/gcc/14.2.0/0011-aarch64-Fix-invalid-nested-subregs-PR115464.patch
@@ -0,0 +1,139 @@
+From cb547fed9177c2a28f376c881facfcf4b64e70a9 Mon Sep 17 00:00:00 2001
+From: Richard Sandiford <richard.sandiford@arm.com>
+Date: Thu, 13 Jun 2024 12:48:21 +0100
+Subject: [PATCH 11/16] aarch64: Fix invalid nested subregs [PR115464]
+
+The testcase extracts one arm_neon.h vector from a pair (one subreg)
+and then reinterprets the result as an SVE vector (another subreg).
+Each subreg makes sense individually, but we can't fold them together
+into a single subreg: it's 32 bytes -> 16 bytes -> 16*N bytes,
+but the interpretation of 32 bytes -> 16*N bytes depends on
+whether N==1 or N>1.
+
+Since the second subreg makes sense individually, simplify_subreg
+should bail out rather than ICE on it. simplify_gen_subreg will
+then do the same (because it already checks validate_subreg).
+This leaves simplify_gen_subreg returning null, requiring the
+caller to take appropriate action.
+
+I think this is relatively likely to occur elsewhere, so the patch
+adds a helper for forcing a subreg, allowing a temporary pseudo to
+be created where necessary.
+
+I'll follow up by using force_subreg in more places. This patch
+is intended to be a minimal backportable fix for the PR.
+
+gcc/
+ PR target/115464
+ * simplify-rtx.cc (simplify_context::simplify_subreg): Don't try
+ to fold two subregs together if their relationship isn't known
+ at compile time.
+ * explow.h (force_subreg): Declare.
+ * explow.cc (force_subreg): New function.
+ * config/aarch64/aarch64-sve-builtins-base.cc
+ (svset_neonq_impl::expand): Use it instead of simplify_gen_subreg.
+
+gcc/testsuite/
+ PR target/115464
+ * gcc.target/aarch64/sve/acle/general/pr115464.c: New test.
+
+(cherry picked from commit 0970ff46ba6330fc80e8736fc05b2eaeeae0b6a0)
+---
+ gcc/config/aarch64/aarch64-sve-builtins-base.cc | 2 +-
+ gcc/explow.cc | 15 +++++++++++++++
+ gcc/explow.h | 2 ++
+ gcc/simplify-rtx.cc | 5 +++++
+ .../aarch64/sve/acle/general/pr115464.c | 13 +++++++++++++
+ 5 files changed, 36 insertions(+), 1 deletion(-)
+ create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/acle/general/pr115464.c
+
+diff --git a/gcc/config/aarch64/aarch64-sve-builtins-base.cc b/gcc/config/aarch64/aarch64-sve-builtins-base.cc
+index 0d2edf3f19e..c9182594bc1 100644
+--- a/gcc/config/aarch64/aarch64-sve-builtins-base.cc
++++ b/gcc/config/aarch64/aarch64-sve-builtins-base.cc
+@@ -1174,7 +1174,7 @@ public:
+ Advanced SIMD argument as an SVE vector. */
+ if (!BYTES_BIG_ENDIAN
+ && is_undef (CALL_EXPR_ARG (e.call_expr, 0)))
+- return simplify_gen_subreg (mode, e.args[1], GET_MODE (e.args[1]), 0);
++ return force_subreg (mode, e.args[1], GET_MODE (e.args[1]), 0);
+
+ rtx_vector_builder builder (VNx16BImode, 16, 2);
+ for (unsigned int i = 0; i < 16; i++)
+diff --git a/gcc/explow.cc b/gcc/explow.cc
+index 8e5f6b8e680..f6843398c4b 100644
+--- a/gcc/explow.cc
++++ b/gcc/explow.cc
+@@ -745,6 +745,21 @@ force_reg (machine_mode mode, rtx x)
+ return temp;
+ }
+
++/* Like simplify_gen_subreg, but force OP into a new register if the
++ subreg cannot be formed directly. */
++
++rtx
++force_subreg (machine_mode outermode, rtx op,
++ machine_mode innermode, poly_uint64 byte)
++{
++ rtx x = simplify_gen_subreg (outermode, op, innermode, byte);
++ if (x)
++ return x;
++
++ op = copy_to_mode_reg (innermode, op);
++ return simplify_gen_subreg (outermode, op, innermode, byte);
++}
++
+ /* If X is a memory ref, copy its contents to a new temp reg and return
+ that reg. Otherwise, return X. */
+
+diff --git a/gcc/explow.h b/gcc/explow.h
+index 16aa02cfb68..cbd1fcb7eb3 100644
+--- a/gcc/explow.h
++++ b/gcc/explow.h
+@@ -42,6 +42,8 @@ extern rtx copy_to_suggested_reg (rtx, rtx, machine_mode);
+ Args are mode (in case value is a constant) and the value. */
+ extern rtx force_reg (machine_mode, rtx);
+
++extern rtx force_subreg (machine_mode, rtx, machine_mode, poly_uint64);
++
+ /* Return given rtx, copied into a new temp reg if it was in memory. */
+ extern rtx force_not_mem (rtx);
+
+diff --git a/gcc/simplify-rtx.cc b/gcc/simplify-rtx.cc
+index dceaa13333c..729d408aa55 100644
+--- a/gcc/simplify-rtx.cc
++++ b/gcc/simplify-rtx.cc
+@@ -7612,6 +7612,11 @@ simplify_context::simplify_subreg (machine_mode outermode, rtx op,
+ poly_uint64 innermostsize = GET_MODE_SIZE (innermostmode);
+ rtx newx;
+
++ /* Make sure that the relationship between the two subregs is
++ known at compile time. */
++ if (!ordered_p (outersize, innermostsize))
++ return NULL_RTX;
++
+ if (outermode == innermostmode
+ && known_eq (byte, 0U)
+ && known_eq (SUBREG_BYTE (op), 0))
+diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/pr115464.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/pr115464.c
+new file mode 100644
+index 00000000000..d728d1325ed
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/pr115464.c
+@@ -0,0 +1,13 @@
++/* { dg-options "-O2" } */
++
++#include <arm_neon.h>
++#include <arm_sve.h>
++#include <arm_neon_sve_bridge.h>
++
++svuint16_t
++convolve4_4_x (uint16x8x2_t permute_tbl)
++{
++ return svset_neonq_u16 (svundef_u16 (), permute_tbl.val[1]);
++}
++
++/* { dg-final { scan-assembler {\tmov\tz0\.d, z1\.d\n} } } */
+--
+2.44.2
+