1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
|
From ada2ab0093596be707f23a3466ac82cff59fcffe Mon Sep 17 00:00:00 2001
From: Richard Sandiford <richard.sandiford@arm.com>
Date: Tue, 12 Sep 2023 16:07:13 +0100
Subject: [PATCH 17/32] aarch64: Add bytes_below_saved_regs to frame info
The frame layout code currently hard-codes the assumption that
the number of bytes below the saved registers is equal to the
size of the outgoing arguments. This patch abstracts that
value into a new field of aarch64_frame.
gcc/
* config/aarch64/aarch64.h (aarch64_frame::bytes_below_saved_regs): New
field.
* config/aarch64/aarch64.cc (aarch64_layout_frame): Initialize it,
and use it instead of crtl->outgoing_args_size.
(aarch64_get_separate_components): Use bytes_below_saved_regs instead
of outgoing_args_size.
(aarch64_process_components): Likewise.
---
gcc/config/aarch64/aarch64.cc | 71 ++++++++++++++++++-----------------
gcc/config/aarch64/aarch64.h | 5 +++
2 files changed, 41 insertions(+), 35 deletions(-)
diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index 37643041ffb1..dacc2b0e4dd2 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -8478,6 +8478,8 @@ aarch64_layout_frame (void)
gcc_assert (crtl->is_leaf
|| maybe_ne (frame.reg_offset[R30_REGNUM], SLOT_NOT_REQUIRED));
+ frame.bytes_below_saved_regs = crtl->outgoing_args_size;
+
/* Now assign stack slots for the registers. Start with the predicate
registers, since predicate LDR and STR have a relatively small
offset range. These saves happen below the hard frame pointer. */
@@ -8582,18 +8584,18 @@ aarch64_layout_frame (void)
poly_int64 varargs_and_saved_regs_size = offset + frame.saved_varargs_size;
- poly_int64 above_outgoing_args
+ poly_int64 saved_regs_and_above
= aligned_upper_bound (varargs_and_saved_regs_size
+ get_frame_size (),
STACK_BOUNDARY / BITS_PER_UNIT);
frame.hard_fp_offset
- = above_outgoing_args - frame.below_hard_fp_saved_regs_size;
+ = saved_regs_and_above - frame.below_hard_fp_saved_regs_size;
/* Both these values are already aligned. */
- gcc_assert (multiple_p (crtl->outgoing_args_size,
+ gcc_assert (multiple_p (frame.bytes_below_saved_regs,
STACK_BOUNDARY / BITS_PER_UNIT));
- frame.frame_size = above_outgoing_args + crtl->outgoing_args_size;
+ frame.frame_size = saved_regs_and_above + frame.bytes_below_saved_regs;
frame.locals_offset = frame.saved_varargs_size;
@@ -8637,7 +8639,7 @@ aarch64_layout_frame (void)
else if (frame.wb_pop_candidate1 != INVALID_REGNUM)
max_push_offset = 256;
- HOST_WIDE_INT const_size, const_outgoing_args_size, const_fp_offset;
+ HOST_WIDE_INT const_size, const_below_saved_regs, const_fp_offset;
HOST_WIDE_INT const_saved_regs_size;
if (known_eq (frame.saved_regs_size, 0))
frame.initial_adjust = frame.frame_size;
@@ -8645,31 +8647,31 @@ aarch64_layout_frame (void)
&& const_size < max_push_offset
&& known_eq (frame.hard_fp_offset, const_size))
{
- /* Simple, small frame with no outgoing arguments:
+ /* Simple, small frame with no data below the saved registers.
stp reg1, reg2, [sp, -frame_size]!
stp reg3, reg4, [sp, 16] */
frame.callee_adjust = const_size;
}
- else if (crtl->outgoing_args_size.is_constant (&const_outgoing_args_size)
+ else if (frame.bytes_below_saved_regs.is_constant (&const_below_saved_regs)
&& frame.saved_regs_size.is_constant (&const_saved_regs_size)
- && const_outgoing_args_size + const_saved_regs_size < 512
- /* We could handle this case even with outgoing args, provided
- that the number of args left us with valid offsets for all
- predicate and vector save slots. It's such a rare case that
- it hardly seems worth the effort though. */
- && (!saves_below_hard_fp_p || const_outgoing_args_size == 0)
+ && const_below_saved_regs + const_saved_regs_size < 512
+ /* We could handle this case even with data below the saved
+ registers, provided that that data left us with valid offsets
+ for all predicate and vector save slots. It's such a rare
+ case that it hardly seems worth the effort though. */
+ && (!saves_below_hard_fp_p || const_below_saved_regs == 0)
&& !(cfun->calls_alloca
&& frame.hard_fp_offset.is_constant (&const_fp_offset)
&& const_fp_offset < max_push_offset))
{
- /* Frame with small outgoing arguments:
+ /* Frame with small area below the saved registers:
sub sp, sp, frame_size
- stp reg1, reg2, [sp, outgoing_args_size]
- stp reg3, reg4, [sp, outgoing_args_size + 16] */
+ stp reg1, reg2, [sp, bytes_below_saved_regs]
+ stp reg3, reg4, [sp, bytes_below_saved_regs + 16] */
frame.initial_adjust = frame.frame_size;
- frame.callee_offset = const_outgoing_args_size;
+ frame.callee_offset = const_below_saved_regs;
}
else if (saves_below_hard_fp_p
&& known_eq (frame.saved_regs_size,
@@ -8679,30 +8681,29 @@ aarch64_layout_frame (void)
sub sp, sp, hard_fp_offset + below_hard_fp_saved_regs_size
save SVE registers relative to SP
- sub sp, sp, outgoing_args_size */
+ sub sp, sp, bytes_below_saved_regs */
frame.initial_adjust = (frame.hard_fp_offset
+ frame.below_hard_fp_saved_regs_size);
- frame.final_adjust = crtl->outgoing_args_size;
+ frame.final_adjust = frame.bytes_below_saved_regs;
}
else if (frame.hard_fp_offset.is_constant (&const_fp_offset)
&& const_fp_offset < max_push_offset)
{
- /* Frame with large outgoing arguments or SVE saves, but with
- a small local area:
+ /* Frame with large area below the saved registers, or with SVE saves,
+ but with a small area above:
stp reg1, reg2, [sp, -hard_fp_offset]!
stp reg3, reg4, [sp, 16]
[sub sp, sp, below_hard_fp_saved_regs_size]
[save SVE registers relative to SP]
- sub sp, sp, outgoing_args_size */
+ sub sp, sp, bytes_below_saved_regs */
frame.callee_adjust = const_fp_offset;
frame.sve_callee_adjust = frame.below_hard_fp_saved_regs_size;
- frame.final_adjust = crtl->outgoing_args_size;
+ frame.final_adjust = frame.bytes_below_saved_regs;
}
else
{
- /* Frame with large local area and outgoing arguments or SVE saves,
- using frame pointer:
+ /* General case:
sub sp, sp, hard_fp_offset
stp x29, x30, [sp, 0]
@@ -8710,10 +8711,10 @@ aarch64_layout_frame (void)
stp reg3, reg4, [sp, 16]
[sub sp, sp, below_hard_fp_saved_regs_size]
[save SVE registers relative to SP]
- sub sp, sp, outgoing_args_size */
+ sub sp, sp, bytes_below_saved_regs */
frame.initial_adjust = frame.hard_fp_offset;
frame.sve_callee_adjust = frame.below_hard_fp_saved_regs_size;
- frame.final_adjust = crtl->outgoing_args_size;
+ frame.final_adjust = frame.bytes_below_saved_regs;
}
/* Make sure the individual adjustments add up to the full frame size. */
@@ -9358,7 +9359,7 @@ aarch64_get_separate_components (void)
if (frame_pointer_needed)
offset -= frame.below_hard_fp_saved_regs_size;
else
- offset += crtl->outgoing_args_size;
+ offset += frame.bytes_below_saved_regs;
/* Check that we can access the stack slot of the register with one
direct load with no adjustments needed. */
@@ -9507,7 +9508,7 @@ aarch64_process_components (sbitmap components, bool prologue_p)
if (frame_pointer_needed)
offset -= frame.below_hard_fp_saved_regs_size;
else
- offset += crtl->outgoing_args_size;
+ offset += frame.bytes_below_saved_regs;
rtx addr = plus_constant (Pmode, ptr_reg, offset);
rtx mem = gen_frame_mem (mode, addr);
@@ -9561,7 +9562,7 @@ aarch64_process_components (sbitmap components, bool prologue_p)
if (frame_pointer_needed)
offset2 -= frame.below_hard_fp_saved_regs_size;
else
- offset2 += crtl->outgoing_args_size;
+ offset2 += frame.bytes_below_saved_regs;
rtx addr2 = plus_constant (Pmode, ptr_reg, offset2);
rtx mem2 = gen_frame_mem (mode, addr2);
rtx set2 = prologue_p ? gen_rtx_SET (mem2, reg2)
@@ -9635,10 +9636,10 @@ aarch64_stack_clash_protection_alloca_probe_range (void)
registers. If POLY_SIZE is not large enough to require a probe this function
will only adjust the stack. When allocating the stack space
FRAME_RELATED_P is then used to indicate if the allocation is frame related.
- FINAL_ADJUSTMENT_P indicates whether we are allocating the outgoing
- arguments. If we are then we ensure that any allocation larger than the ABI
- defined buffer needs a probe so that the invariant of having a 1KB buffer is
- maintained.
+ FINAL_ADJUSTMENT_P indicates whether we are allocating the area below
+ the saved registers. If we are then we ensure that any allocation
+ larger than the ABI defined buffer needs a probe so that the
+ invariant of having a 1KB buffer is maintained.
We emit barriers after each stack adjustment to prevent optimizations from
breaking the invariant that we never drop the stack more than a page. This
@@ -9847,7 +9848,7 @@ aarch64_allocate_and_probe_stack_space (rtx temp1, rtx temp2,
/* Handle any residuals. Residuals of at least MIN_PROBE_THRESHOLD have to
be probed. This maintains the requirement that each page is probed at
least once. For initial probing we probe only if the allocation is
- more than GUARD_SIZE - buffer, and for the outgoing arguments we probe
+ more than GUARD_SIZE - buffer, and below the saved registers we probe
if the amount is larger than buffer. GUARD_SIZE - buffer + buffer ==
GUARD_SIZE. This works that for any allocation that is large enough to
trigger a probe here, we'll have at least one, and if they're not large
diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
index 73b09e205083..0b6faa3ddf17 100644
--- a/gcc/config/aarch64/aarch64.h
+++ b/gcc/config/aarch64/aarch64.h
@@ -777,6 +777,11 @@ struct GTY (()) aarch64_frame
/* The size of the callee-save registers with a slot in REG_OFFSET. */
poly_int64 saved_regs_size;
+ /* The number of bytes between the bottom of the static frame (the bottom
+ of the outgoing arguments) and the bottom of the register save area.
+ This value is always a multiple of STACK_BOUNDARY. */
+ poly_int64 bytes_below_saved_regs;
+
/* The size of the callee-save registers with a slot in REG_OFFSET that
are saved below the hard frame pointer. */
poly_int64 below_hard_fp_saved_regs_size;
--
2.42.0
|