1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
|
From 8433953434a7b58c0923140d39eb3c5988c1d097 Mon Sep 17 00:00:00 2001
From: Richard Sandiford <richard.sandiford@arm.com>
Date: Tue, 12 Sep 2023 16:07:19 +0100
Subject: [PATCH 28/32] aarch64: Put LR save probe in first 16 bytes
-fstack-clash-protection uses the save of LR as a probe for the next
allocation. The next allocation could be:
* another part of the static frame, e.g. when allocating SVE save slots
or outgoing arguments
* an alloca in the same function
* an allocation made by a callee function
However, when -fomit-frame-pointer is used, the LR save slot is placed
above the other GPR save slots. It could therefore be up to 80 bytes
above the base of the GPR save area (which is also the hard fp address).
aarch64_allocate_and_probe_stack_space took this into account when
deciding how much subsequent space could be allocated without needing
a probe. However, it interacted badly with:
/* If doing a small final adjustment, we always probe at offset 0.
This is done to avoid issues when LR is not at position 0 or when
the final adjustment is smaller than the probing offset. */
else if (final_adjustment_p && rounded_size == 0)
residual_probe_offset = 0;
which forces any allocation that is smaller than the guard page size
to be probed at offset 0 rather than the usual offset 1024. It was
therefore possible to construct cases in which we had:
* a probe using LR at SP + 80 bytes (or some other value >= 16)
* an allocation of the guard page size - 16 bytes
* a probe at SP + 0
which allocates guard page size + 64 consecutive unprobed bytes.
This patch requires the LR probe to be in the first 16 bytes of the
save area when stack clash protection is active. Doing it
unconditionally would cause code-quality regressions.
Putting LR before other registers prevents push/pop allocation
when shadow call stacks are enabled, since LR is restored
separately from the other callee-saved registers.
The new comment doesn't say that the probe register is required
to be LR, since a later patch removes that restriction.
gcc/
* config/aarch64/aarch64.cc (aarch64_layout_frame): Ensure that
the LR save slot is in the first 16 bytes of the register save area.
Only form STP/LDP push/pop candidates if both registers are valid.
(aarch64_allocate_and_probe_stack_space): Remove workaround for
when LR was not in the first 16 bytes.
gcc/testsuite/
* gcc.target/aarch64/stack-check-prologue-18.c: New test.
* gcc.target/aarch64/stack-check-prologue-19.c: Likewise.
* gcc.target/aarch64/stack-check-prologue-20.c: Likewise.
---
gcc/config/aarch64/aarch64.cc | 72 ++++++-------
.../aarch64/stack-check-prologue-18.c | 100 ++++++++++++++++++
.../aarch64/stack-check-prologue-19.c | 100 ++++++++++++++++++
.../aarch64/stack-check-prologue-20.c | 3 +
4 files changed, 233 insertions(+), 42 deletions(-)
create mode 100644 gcc/testsuite/gcc.target/aarch64/stack-check-prologue-18.c
create mode 100644 gcc/testsuite/gcc.target/aarch64/stack-check-prologue-19.c
create mode 100644 gcc/testsuite/gcc.target/aarch64/stack-check-prologue-20.c
diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index 5cad847977a4..a765f92329d3 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -8534,26 +8534,34 @@ aarch64_layout_frame (void)
bool saves_below_hard_fp_p
= maybe_ne (frame.below_hard_fp_saved_regs_size, 0);
frame.bytes_below_hard_fp = offset;
+
+ auto allocate_gpr_slot = [&](unsigned int regno)
+ {
+ frame.reg_offset[regno] = offset;
+ if (frame.wb_push_candidate1 == INVALID_REGNUM)
+ frame.wb_push_candidate1 = regno;
+ else if (frame.wb_push_candidate2 == INVALID_REGNUM)
+ frame.wb_push_candidate2 = regno;
+ offset += UNITS_PER_WORD;
+ };
+
if (frame.emit_frame_chain)
{
/* FP and LR are placed in the linkage record. */
- frame.reg_offset[R29_REGNUM] = offset;
- frame.wb_push_candidate1 = R29_REGNUM;
- frame.reg_offset[R30_REGNUM] = offset + UNITS_PER_WORD;
- frame.wb_push_candidate2 = R30_REGNUM;
- offset += 2 * UNITS_PER_WORD;
+ allocate_gpr_slot (R29_REGNUM);
+ allocate_gpr_slot (R30_REGNUM);
}
+ else if (flag_stack_clash_protection
+ && known_eq (frame.reg_offset[R30_REGNUM], SLOT_REQUIRED))
+ /* Put the LR save slot first, since it makes a good choice of probe
+ for stack clash purposes. The idea is that the link register usually
+ has to be saved before a call anyway, and so we lose little by
+ stopping it from being individually shrink-wrapped. */
+ allocate_gpr_slot (R30_REGNUM);
for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++)
if (known_eq (frame.reg_offset[regno], SLOT_REQUIRED))
- {
- frame.reg_offset[regno] = offset;
- if (frame.wb_push_candidate1 == INVALID_REGNUM)
- frame.wb_push_candidate1 = regno;
- else if (frame.wb_push_candidate2 == INVALID_REGNUM)
- frame.wb_push_candidate2 = regno;
- offset += UNITS_PER_WORD;
- }
+ allocate_gpr_slot (regno);
poly_int64 max_int_offset = offset;
offset = aligned_upper_bound (offset, STACK_BOUNDARY / BITS_PER_UNIT);
@@ -8631,10 +8639,13 @@ aarch64_layout_frame (void)
max_push_offset to 0, because no registers are popped at this time,
so callee_adjust cannot be adjusted. */
HOST_WIDE_INT max_push_offset = 0;
- if (frame.wb_pop_candidate2 != INVALID_REGNUM)
- max_push_offset = 512;
- else if (frame.wb_pop_candidate1 != INVALID_REGNUM)
- max_push_offset = 256;
+ if (frame.wb_pop_candidate1 != INVALID_REGNUM)
+ {
+ if (frame.wb_pop_candidate2 != INVALID_REGNUM)
+ max_push_offset = 512;
+ else
+ max_push_offset = 256;
+ }
HOST_WIDE_INT const_size, const_below_saved_regs, const_above_fp;
HOST_WIDE_INT const_saved_regs_size;
@@ -9654,29 +9665,6 @@ aarch64_allocate_and_probe_stack_space (rtx temp1, rtx temp2,
= (final_adjustment_p
? guard_used_by_caller + byte_sp_alignment
: guard_size - guard_used_by_caller);
- /* When doing the final adjustment for the outgoing arguments, take into
- account any unprobed space there is above the current SP. There are
- two cases:
-
- - When saving SVE registers below the hard frame pointer, we force
- the lowest save to take place in the prologue before doing the final
- adjustment (i.e. we don't allow the save to be shrink-wrapped).
- This acts as a probe at SP, so there is no unprobed space.
-
- - When there are no SVE register saves, we use the store of the link
- register as a probe. We can't assume that LR was saved at position 0
- though, so treat any space below it as unprobed. */
- if (final_adjustment_p
- && known_eq (frame.below_hard_fp_saved_regs_size, 0))
- {
- poly_int64 lr_offset = (frame.reg_offset[LR_REGNUM]
- - frame.bytes_below_saved_regs);
- if (known_ge (lr_offset, 0))
- min_probe_threshold -= lr_offset.to_constant ();
- else
- gcc_assert (!flag_stack_clash_protection || known_eq (poly_size, 0));
- }
-
poly_int64 frame_size = frame.frame_size;
/* We should always have a positive probe threshold. */
@@ -9856,8 +9844,8 @@ aarch64_allocate_and_probe_stack_space (rtx temp1, rtx temp2,
if (final_adjustment_p && rounded_size != 0)
min_probe_threshold = 0;
/* If doing a small final adjustment, we always probe at offset 0.
- This is done to avoid issues when LR is not at position 0 or when
- the final adjustment is smaller than the probing offset. */
+ This is done to avoid issues when the final adjustment is smaller
+ than the probing offset. */
else if (final_adjustment_p && rounded_size == 0)
residual_probe_offset = 0;
diff --git a/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-18.c b/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-18.c
new file mode 100644
index 000000000000..82447d20fff5
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-18.c
@@ -0,0 +1,100 @@
+/* { dg-options "-O2 -fstack-clash-protection -fomit-frame-pointer --param stack-clash-protection-guard-size=12" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+void f(int, ...);
+void g();
+
+/*
+** test1:
+** ...
+** str x30, \[sp\]
+** sub sp, sp, #4064
+** str xzr, \[sp\]
+** cbnz w0, .*
+** bl g
+** ...
+** str x26, \[sp, #?4128\]
+** ...
+*/
+int test1(int z) {
+ __uint128_t x = 0;
+ int y[0x400];
+ if (z)
+ {
+ asm volatile ("" :::
+ "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26");
+ f(0, 0, 0, 0, 0, 0, 0, &y,
+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
+ x, x, x, x, x, x, x, x, x, x, x, x, x, x);
+ }
+ g();
+ return 1;
+}
+
+/*
+** test2:
+** ...
+** str x30, \[sp\]
+** sub sp, sp, #1040
+** str xzr, \[sp\]
+** cbnz w0, .*
+** bl g
+** ...
+*/
+int test2(int z) {
+ __uint128_t x = 0;
+ int y[0x400];
+ if (z)
+ {
+ asm volatile ("" :::
+ "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26");
+ f(0, 0, 0, 0, 0, 0, 0, &y,
+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
+ x);
+ }
+ g();
+ return 1;
+}
+
+/*
+** test3:
+** ...
+** str x30, \[sp\]
+** sub sp, sp, #1024
+** cbnz w0, .*
+** bl g
+** ...
+*/
+int test3(int z) {
+ __uint128_t x = 0;
+ int y[0x400];
+ if (z)
+ {
+ asm volatile ("" :::
+ "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26");
+ f(0, 0, 0, 0, 0, 0, 0, &y,
+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x);
+ }
+ g();
+ return 1;
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-19.c b/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-19.c
new file mode 100644
index 000000000000..73ac3e4e4eb0
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-19.c
@@ -0,0 +1,100 @@
+/* { dg-options "-O2 -fstack-clash-protection -fomit-frame-pointer --param stack-clash-protection-guard-size=12 -fsanitize=shadow-call-stack -ffixed-x18" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+void f(int, ...);
+void g();
+
+/*
+** test1:
+** ...
+** str x30, \[sp\]
+** sub sp, sp, #4064
+** str xzr, \[sp\]
+** cbnz w0, .*
+** bl g
+** ...
+** str x26, \[sp, #?4128\]
+** ...
+*/
+int test1(int z) {
+ __uint128_t x = 0;
+ int y[0x400];
+ if (z)
+ {
+ asm volatile ("" :::
+ "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26");
+ f(0, 0, 0, 0, 0, 0, 0, &y,
+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
+ x, x, x, x, x, x, x, x, x, x, x, x, x, x);
+ }
+ g();
+ return 1;
+}
+
+/*
+** test2:
+** ...
+** str x30, \[sp\]
+** sub sp, sp, #1040
+** str xzr, \[sp\]
+** cbnz w0, .*
+** bl g
+** ...
+*/
+int test2(int z) {
+ __uint128_t x = 0;
+ int y[0x400];
+ if (z)
+ {
+ asm volatile ("" :::
+ "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26");
+ f(0, 0, 0, 0, 0, 0, 0, &y,
+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
+ x);
+ }
+ g();
+ return 1;
+}
+
+/*
+** test3:
+** ...
+** str x30, \[sp\]
+** sub sp, sp, #1024
+** cbnz w0, .*
+** bl g
+** ...
+*/
+int test3(int z) {
+ __uint128_t x = 0;
+ int y[0x400];
+ if (z)
+ {
+ asm volatile ("" :::
+ "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26");
+ f(0, 0, 0, 0, 0, 0, 0, &y,
+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x);
+ }
+ g();
+ return 1;
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-20.c b/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-20.c
new file mode 100644
index 000000000000..690aae8dfd5b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-20.c
@@ -0,0 +1,3 @@
+/* { dg-options "-O2 -fstack-protector-all -fstack-clash-protection -fomit-frame-pointer --param stack-clash-protection-guard-size=12 -fsanitize=shadow-call-stack -ffixed-x18" } */
+
+#include "stack-check-prologue-19.c"
--
2.42.0
|