diff options
Diffstat (limited to 'packages/gcc/14.2.0/0011-aarch64-Fix-invalid-nested-subregs-PR115464.patch')
-rw-r--r-- | packages/gcc/14.2.0/0011-aarch64-Fix-invalid-nested-subregs-PR115464.patch | 139 |
1 files changed, 0 insertions, 139 deletions
diff --git a/packages/gcc/14.2.0/0011-aarch64-Fix-invalid-nested-subregs-PR115464.patch b/packages/gcc/14.2.0/0011-aarch64-Fix-invalid-nested-subregs-PR115464.patch deleted file mode 100644 index 2625c308..00000000 --- a/packages/gcc/14.2.0/0011-aarch64-Fix-invalid-nested-subregs-PR115464.patch +++ /dev/null @@ -1,139 +0,0 @@ -From cb547fed9177c2a28f376c881facfcf4b64e70a9 Mon Sep 17 00:00:00 2001 -From: Richard Sandiford <richard.sandiford@arm.com> -Date: Thu, 13 Jun 2024 12:48:21 +0100 -Subject: [PATCH 11/16] aarch64: Fix invalid nested subregs [PR115464] - -The testcase extracts one arm_neon.h vector from a pair (one subreg) -and then reinterprets the result as an SVE vector (another subreg). -Each subreg makes sense individually, but we can't fold them together -into a single subreg: it's 32 bytes -> 16 bytes -> 16*N bytes, -but the interpretation of 32 bytes -> 16*N bytes depends on -whether N==1 or N>1. - -Since the second subreg makes sense individually, simplify_subreg -should bail out rather than ICE on it. simplify_gen_subreg will -then do the same (because it already checks validate_subreg). -This leaves simplify_gen_subreg returning null, requiring the -caller to take appropriate action. - -I think this is relatively likely to occur elsewhere, so the patch -adds a helper for forcing a subreg, allowing a temporary pseudo to -be created where necessary. - -I'll follow up by using force_subreg in more places. This patch -is intended to be a minimal backportable fix for the PR. - -gcc/ - PR target/115464 - * simplify-rtx.cc (simplify_context::simplify_subreg): Don't try - to fold two subregs together if their relationship isn't known - at compile time. - * explow.h (force_subreg): Declare. - * explow.cc (force_subreg): New function. - * config/aarch64/aarch64-sve-builtins-base.cc - (svset_neonq_impl::expand): Use it instead of simplify_gen_subreg. - -gcc/testsuite/ - PR target/115464 - * gcc.target/aarch64/sve/acle/general/pr115464.c: New test. - -(cherry picked from commit 0970ff46ba6330fc80e8736fc05b2eaeeae0b6a0) ---- - gcc/config/aarch64/aarch64-sve-builtins-base.cc | 2 +- - gcc/explow.cc | 15 +++++++++++++++ - gcc/explow.h | 2 ++ - gcc/simplify-rtx.cc | 5 +++++ - .../aarch64/sve/acle/general/pr115464.c | 13 +++++++++++++ - 5 files changed, 36 insertions(+), 1 deletion(-) - create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/acle/general/pr115464.c - -diff --git a/gcc/config/aarch64/aarch64-sve-builtins-base.cc b/gcc/config/aarch64/aarch64-sve-builtins-base.cc -index 0d2edf3f19e..c9182594bc1 100644 ---- a/gcc/config/aarch64/aarch64-sve-builtins-base.cc -+++ b/gcc/config/aarch64/aarch64-sve-builtins-base.cc -@@ -1174,7 +1174,7 @@ public: - Advanced SIMD argument as an SVE vector. */ - if (!BYTES_BIG_ENDIAN - && is_undef (CALL_EXPR_ARG (e.call_expr, 0))) -- return simplify_gen_subreg (mode, e.args[1], GET_MODE (e.args[1]), 0); -+ return force_subreg (mode, e.args[1], GET_MODE (e.args[1]), 0); - - rtx_vector_builder builder (VNx16BImode, 16, 2); - for (unsigned int i = 0; i < 16; i++) -diff --git a/gcc/explow.cc b/gcc/explow.cc -index 8e5f6b8e680..f6843398c4b 100644 ---- a/gcc/explow.cc -+++ b/gcc/explow.cc -@@ -745,6 +745,21 @@ force_reg (machine_mode mode, rtx x) - return temp; - } - -+/* Like simplify_gen_subreg, but force OP into a new register if the -+ subreg cannot be formed directly. */ -+ -+rtx -+force_subreg (machine_mode outermode, rtx op, -+ machine_mode innermode, poly_uint64 byte) -+{ -+ rtx x = simplify_gen_subreg (outermode, op, innermode, byte); -+ if (x) -+ return x; -+ -+ op = copy_to_mode_reg (innermode, op); -+ return simplify_gen_subreg (outermode, op, innermode, byte); -+} -+ - /* If X is a memory ref, copy its contents to a new temp reg and return - that reg. Otherwise, return X. */ - -diff --git a/gcc/explow.h b/gcc/explow.h -index 16aa02cfb68..cbd1fcb7eb3 100644 ---- a/gcc/explow.h -+++ b/gcc/explow.h -@@ -42,6 +42,8 @@ extern rtx copy_to_suggested_reg (rtx, rtx, machine_mode); - Args are mode (in case value is a constant) and the value. */ - extern rtx force_reg (machine_mode, rtx); - -+extern rtx force_subreg (machine_mode, rtx, machine_mode, poly_uint64); -+ - /* Return given rtx, copied into a new temp reg if it was in memory. */ - extern rtx force_not_mem (rtx); - -diff --git a/gcc/simplify-rtx.cc b/gcc/simplify-rtx.cc -index dceaa13333c..729d408aa55 100644 ---- a/gcc/simplify-rtx.cc -+++ b/gcc/simplify-rtx.cc -@@ -7612,6 +7612,11 @@ simplify_context::simplify_subreg (machine_mode outermode, rtx op, - poly_uint64 innermostsize = GET_MODE_SIZE (innermostmode); - rtx newx; - -+ /* Make sure that the relationship between the two subregs is -+ known at compile time. */ -+ if (!ordered_p (outersize, innermostsize)) -+ return NULL_RTX; -+ - if (outermode == innermostmode - && known_eq (byte, 0U) - && known_eq (SUBREG_BYTE (op), 0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/pr115464.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/pr115464.c -new file mode 100644 -index 00000000000..d728d1325ed ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/pr115464.c -@@ -0,0 +1,13 @@ -+/* { dg-options "-O2" } */ -+ -+#include <arm_neon.h> -+#include <arm_sve.h> -+#include <arm_neon_sve_bridge.h> -+ -+svuint16_t -+convolve4_4_x (uint16x8x2_t permute_tbl) -+{ -+ return svset_neonq_u16 (svundef_u16 (), permute_tbl.val[1]); -+} -+ -+/* { dg-final { scan-assembler {\tmov\tz0\.d, z1\.d\n} } } */ --- -2.44.2 - |