aboutsummaryrefslogtreecommitdiff
path: root/packages/gcc/13.2.0/0024-aarch64-Measure-reg_offset-from-the-bottom-of-the-fr.patch
blob: 709a632ba5e700ca92a3d708c682105e7b0fab51 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
From f2b585375205b0a1802d79c682ba33766ecd1f0f Mon Sep 17 00:00:00 2001
From: Richard Sandiford <richard.sandiford@arm.com>
Date: Tue, 12 Sep 2023 16:07:17 +0100
Subject: [PATCH 24/32] aarch64: Measure reg_offset from the bottom of the
 frame

reg_offset was measured from the bottom of the saved register area.
This made perfect sense with the original layout, since the bottom
of the saved register area was also the hard frame pointer address.
It became slightly less obvious with SVE, since we save SVE
registers below the hard frame pointer, but it still made sense.

However, if we want to allow different frame layouts, it's more
convenient and obvious to measure reg_offset from the bottom of
the frame.  After previous patches, it's also a slight simplification
in its own right.

gcc/
	* config/aarch64/aarch64.h (aarch64_frame): Add comment above
	reg_offset.
	* config/aarch64/aarch64.cc (aarch64_layout_frame): Walk offsets
	from the bottom of the frame, rather than the bottom of the saved
	register area.  Measure reg_offset from the bottom of the frame
	rather than the bottom of the saved register area.
	(aarch64_save_callee_saves): Update accordingly.
	(aarch64_restore_callee_saves): Likewise.
	(aarch64_get_separate_components): Likewise.
	(aarch64_process_components): Likewise.
---
 gcc/config/aarch64/aarch64.cc | 53 ++++++++++++++++-------------------
 gcc/config/aarch64/aarch64.h  |  3 ++
 2 files changed, 27 insertions(+), 29 deletions(-)

diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index 95499ae49ba2..af99807ef8ab 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -8400,7 +8400,6 @@ aarch64_needs_frame_chain (void)
 static void
 aarch64_layout_frame (void)
 {
-  poly_int64 offset = 0;
   int regno, last_fp_reg = INVALID_REGNUM;
   machine_mode vector_save_mode = aarch64_reg_save_mode (V8_REGNUM);
   poly_int64 vector_save_size = GET_MODE_SIZE (vector_save_mode);
@@ -8478,7 +8477,9 @@ aarch64_layout_frame (void)
   gcc_assert (crtl->is_leaf
 	      || maybe_ne (frame.reg_offset[R30_REGNUM], SLOT_NOT_REQUIRED));
 
-  frame.bytes_below_saved_regs = crtl->outgoing_args_size;
+  poly_int64 offset = crtl->outgoing_args_size;
+  gcc_assert (multiple_p (offset, STACK_BOUNDARY / BITS_PER_UNIT));
+  frame.bytes_below_saved_regs = offset;
 
   /* Now assign stack slots for the registers.  Start with the predicate
      registers, since predicate LDR and STR have a relatively small
@@ -8490,7 +8491,8 @@ aarch64_layout_frame (void)
 	offset += BYTES_PER_SVE_PRED;
       }
 
-  if (maybe_ne (offset, 0))
+  poly_int64 saved_prs_size = offset - frame.bytes_below_saved_regs;
+  if (maybe_ne (saved_prs_size, 0))
     {
       /* If we have any vector registers to save above the predicate registers,
 	 the offset of the vector register save slots need to be a multiple
@@ -8508,10 +8510,10 @@ aarch64_layout_frame (void)
 	offset = aligned_upper_bound (offset, STACK_BOUNDARY / BITS_PER_UNIT);
       else
 	{
-	  if (known_le (offset, vector_save_size))
-	    offset = vector_save_size;
-	  else if (known_le (offset, vector_save_size * 2))
-	    offset = vector_save_size * 2;
+	  if (known_le (saved_prs_size, vector_save_size))
+	    offset = frame.bytes_below_saved_regs + vector_save_size;
+	  else if (known_le (saved_prs_size, vector_save_size * 2))
+	    offset = frame.bytes_below_saved_regs + vector_save_size * 2;
 	  else
 	    gcc_unreachable ();
 	}
@@ -8528,9 +8530,10 @@ aarch64_layout_frame (void)
 
   /* OFFSET is now the offset of the hard frame pointer from the bottom
      of the callee save area.  */
-  bool saves_below_hard_fp_p = maybe_ne (offset, 0);
-  frame.below_hard_fp_saved_regs_size = offset;
-  frame.bytes_below_hard_fp = offset + frame.bytes_below_saved_regs;
+  frame.below_hard_fp_saved_regs_size = offset - frame.bytes_below_saved_regs;
+  bool saves_below_hard_fp_p
+    = maybe_ne (frame.below_hard_fp_saved_regs_size, 0);
+  frame.bytes_below_hard_fp = offset;
   if (frame.emit_frame_chain)
     {
       /* FP and LR are placed in the linkage record.  */
@@ -8581,9 +8584,10 @@ aarch64_layout_frame (void)
 
   offset = aligned_upper_bound (offset, STACK_BOUNDARY / BITS_PER_UNIT);
 
-  frame.saved_regs_size = offset;
+  frame.saved_regs_size = offset - frame.bytes_below_saved_regs;
 
-  poly_int64 varargs_and_saved_regs_size = offset + frame.saved_varargs_size;
+  poly_int64 varargs_and_saved_regs_size
+    = frame.saved_regs_size + frame.saved_varargs_size;
 
   poly_int64 saved_regs_and_above
     = aligned_upper_bound (varargs_and_saved_regs_size
@@ -9105,9 +9109,7 @@ aarch64_save_callee_saves (poly_int64 bytes_below_sp,
 
       machine_mode mode = aarch64_reg_save_mode (regno);
       reg = gen_rtx_REG (mode, regno);
-      offset = (frame.reg_offset[regno]
-		+ frame.bytes_below_saved_regs
-		- bytes_below_sp);
+      offset = frame.reg_offset[regno] - bytes_below_sp;
       rtx base_rtx = stack_pointer_rtx;
       poly_int64 sp_offset = offset;
 
@@ -9214,9 +9216,7 @@ aarch64_restore_callee_saves (poly_int64 bytes_below_sp, unsigned start,
 
       machine_mode mode = aarch64_reg_save_mode (regno);
       reg = gen_rtx_REG (mode, regno);
-      offset = (frame.reg_offset[regno]
-		+ frame.bytes_below_saved_regs
-		- bytes_below_sp);
+      offset = frame.reg_offset[regno] - bytes_below_sp;
       rtx base_rtx = stack_pointer_rtx;
       if (mode == VNx2DImode && BYTES_BIG_ENDIAN)
 	aarch64_adjust_sve_callee_save_base (mode, base_rtx, anchor_reg,
@@ -9355,14 +9355,12 @@ aarch64_get_separate_components (void)
 	   it as a stack probe for -fstack-clash-protection.  */
 	if (flag_stack_clash_protection
 	    && maybe_ne (frame.below_hard_fp_saved_regs_size, 0)
-	    && known_eq (offset, 0))
+	    && known_eq (offset, frame.bytes_below_saved_regs))
 	  continue;
 
 	/* Get the offset relative to the register we'll use.  */
 	if (frame_pointer_needed)
-	  offset -= frame.below_hard_fp_saved_regs_size;
-	else
-	  offset += frame.bytes_below_saved_regs;
+	  offset -= frame.bytes_below_hard_fp;
 
 	/* Check that we can access the stack slot of the register with one
 	   direct load with no adjustments needed.  */
@@ -9509,9 +9507,7 @@ aarch64_process_components (sbitmap components, bool prologue_p)
       rtx reg = gen_rtx_REG (mode, regno);
       poly_int64 offset = frame.reg_offset[regno];
       if (frame_pointer_needed)
-	offset -= frame.below_hard_fp_saved_regs_size;
-      else
-	offset += frame.bytes_below_saved_regs;
+	offset -= frame.bytes_below_hard_fp;
 
       rtx addr = plus_constant (Pmode, ptr_reg, offset);
       rtx mem = gen_frame_mem (mode, addr);
@@ -9563,9 +9559,7 @@ aarch64_process_components (sbitmap components, bool prologue_p)
       /* REGNO2 can be saved/restored in a pair with REGNO.  */
       rtx reg2 = gen_rtx_REG (mode, regno2);
       if (frame_pointer_needed)
-	offset2 -= frame.below_hard_fp_saved_regs_size;
-      else
-	offset2 += frame.bytes_below_saved_regs;
+	offset2 -= frame.bytes_below_hard_fp;
       rtx addr2 = plus_constant (Pmode, ptr_reg, offset2);
       rtx mem2 = gen_frame_mem (mode, addr2);
       rtx set2 = prologue_p ? gen_rtx_SET (mem2, reg2)
@@ -9681,7 +9675,8 @@ aarch64_allocate_and_probe_stack_space (rtx temp1, rtx temp2,
   if (final_adjustment_p
       && known_eq (frame.below_hard_fp_saved_regs_size, 0))
     {
-      poly_int64 lr_offset = frame.reg_offset[LR_REGNUM];
+      poly_int64 lr_offset = (frame.reg_offset[LR_REGNUM]
+			      - frame.bytes_below_saved_regs);
       if (known_ge (lr_offset, 0))
 	min_probe_threshold -= lr_offset.to_constant ();
       else
diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
index 108a5731b0d7..c8becb098c88 100644
--- a/gcc/config/aarch64/aarch64.h
+++ b/gcc/config/aarch64/aarch64.h
@@ -766,6 +766,9 @@ extern enum aarch64_processor aarch64_tune;
 #ifdef HAVE_POLY_INT_H
 struct GTY (()) aarch64_frame
 {
+  /* The offset from the bottom of the static frame (the bottom of the
+     outgoing arguments) of each register save slot, or -2 if no save is
+     needed.  */
   poly_int64 reg_offset[LAST_SAVED_REGNUM + 1];
 
   /* The number of extra stack bytes taken up by register varargs.
-- 
2.42.0