aboutsummaryrefslogtreecommitdiff
path: root/packages/gcc/9.5.0/0022-aarch64-Add-bytes_below_hard_fp-to-frame-info.patch
blob: 039c352fc2233346ef3cc2c4ec3c47604786885d (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
From 347487fffa0266d43bf18f1f91878410881f596e Mon Sep 17 00:00:00 2001
From: Richard Sandiford <richard.sandiford@arm.com>
Date: Fri, 16 Jun 2023 16:55:12 +0100
Subject: [PATCH 22/30] aarch64: Add bytes_below_hard_fp to frame info

The frame layout code currently hard-codes the assumption that
the number of bytes below the saved registers is equal to the
size of the outgoing arguments.  This patch abstracts that
value into a new field of aarch64_frame.

gcc/
	* config/aarch64/aarch64.h (aarch64_frame::bytes_below_hard_fp): New
	field.
	* config/aarch64/aarch64.c (aarch64_layout_frame): Initialize it,
	and use it instead of crtl->outgoing_args_size.
	(aarch64_get_separate_components): Use bytes_below_hard_fp instead
	of outgoing_args_size.
	(aarch64_process_components): Likewise.
---
 gcc/config/aarch64/aarch64.c | 50 +++++++++++++++++++-----------------
 gcc/config/aarch64/aarch64.h |  6 ++++-
 2 files changed, 32 insertions(+), 24 deletions(-)

diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index e9dad682738f..25cf10cc4b94 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -4684,6 +4684,8 @@ aarch64_layout_frame (void)
 	last_fp_reg = regno;
       }
 
+  cfun->machine->frame.bytes_below_hard_fp = crtl->outgoing_args_size;
+
   if (cfun->machine->frame.emit_frame_chain)
     {
       /* FP and LR are placed in the linkage record.  */
@@ -4751,11 +4753,11 @@ aarch64_layout_frame (void)
 			   STACK_BOUNDARY / BITS_PER_UNIT);
 
   /* Both these values are already aligned.  */
-  gcc_assert (multiple_p (crtl->outgoing_args_size,
+  gcc_assert (multiple_p (cfun->machine->frame.bytes_below_hard_fp,
 			  STACK_BOUNDARY / BITS_PER_UNIT));
   cfun->machine->frame.frame_size
     = (cfun->machine->frame.hard_fp_offset
-       + crtl->outgoing_args_size);
+       + cfun->machine->frame.bytes_below_hard_fp);
 
   cfun->machine->frame.locals_offset = cfun->machine->frame.saved_varargs_size;
 
@@ -4775,23 +4777,23 @@ aarch64_layout_frame (void)
     cfun->machine->frame.initial_adjust = cfun->machine->frame.frame_size;
   else if (cfun->machine->frame.frame_size.is_constant (&const_size)
 	   && const_size < max_push_offset
-	   && known_eq (crtl->outgoing_args_size, 0))
+	   && known_eq (cfun->machine->frame.bytes_below_hard_fp, 0))
     {
-      /* Simple, small frame with no outgoing arguments:
+      /* Simple, small frame with no data below the saved registers.
 	 stp reg1, reg2, [sp, -frame_size]!
 	 stp reg3, reg4, [sp, 16]  */
       cfun->machine->frame.callee_adjust = const_size;
     }
-  else if (known_lt (crtl->outgoing_args_size
+  else if (known_lt (cfun->machine->frame.bytes_below_hard_fp
 		     + cfun->machine->frame.saved_regs_size, 512)
 	   && !(cfun->calls_alloca
 		&& known_lt (cfun->machine->frame.hard_fp_offset,
 			     max_push_offset)))
     {
-      /* Frame with small outgoing arguments:
+      /* Frame with small area below the saved registers:
 	 sub sp, sp, frame_size
-	 stp reg1, reg2, [sp, outgoing_args_size]
-	 stp reg3, reg4, [sp, outgoing_args_size + 16]  */
+	 stp reg1, reg2, [sp, bytes_below_hard_fp]
+	 stp reg3, reg4, [sp, bytes_below_hard_fp + 16]  */
       cfun->machine->frame.initial_adjust = cfun->machine->frame.frame_size;
       cfun->machine->frame.callee_offset
 	= cfun->machine->frame.frame_size - cfun->machine->frame.hard_fp_offset;
@@ -4799,22 +4801,23 @@ aarch64_layout_frame (void)
   else if (cfun->machine->frame.hard_fp_offset.is_constant (&const_fp_offset)
 	   && const_fp_offset < max_push_offset)
     {
-      /* Frame with large outgoing arguments but a small local area:
+      /* Frame with large area below the saved registers, but with a
+	 small area above:
 	 stp reg1, reg2, [sp, -hard_fp_offset]!
 	 stp reg3, reg4, [sp, 16]
-	 sub sp, sp, outgoing_args_size  */
+	 sub sp, sp, bytes_below_hard_fp  */
       cfun->machine->frame.callee_adjust = const_fp_offset;
       cfun->machine->frame.final_adjust
 	= cfun->machine->frame.frame_size - cfun->machine->frame.callee_adjust;
     }
   else
     {
-      /* Frame with large local area and outgoing arguments using frame pointer:
+      /* General case:
 	 sub sp, sp, hard_fp_offset
 	 stp x29, x30, [sp, 0]
 	 add x29, sp, 0
 	 stp reg3, reg4, [sp, 16]
-	 sub sp, sp, outgoing_args_size  */
+	 sub sp, sp, bytes_below_hard_fp  */
       cfun->machine->frame.initial_adjust = cfun->machine->frame.hard_fp_offset;
       cfun->machine->frame.final_adjust
 	= cfun->machine->frame.frame_size - cfun->machine->frame.initial_adjust;
@@ -5243,9 +5246,11 @@ aarch64_get_separate_components (void)
     if (aarch64_register_saved_on_entry (regno))
       {
 	poly_int64 offset = cfun->machine->frame.reg_offset[regno];
+
+	/* Get the offset relative to the register we'll use.  */
 	if (!frame_pointer_needed)
-	  offset += cfun->machine->frame.frame_size
-		    - cfun->machine->frame.hard_fp_offset;
+	  offset += cfun->machine->frame.bytes_below_hard_fp;
+
 	/* Check that we can access the stack slot of the register with one
 	   direct load with no adjustments needed.  */
 	if (offset_12bit_unsigned_scaled_p (DImode, offset))
@@ -5367,8 +5372,8 @@ aarch64_process_components (sbitmap components, bool prologue_p)
       rtx reg = gen_rtx_REG (mode, regno);
       poly_int64 offset = cfun->machine->frame.reg_offset[regno];
       if (!frame_pointer_needed)
-	offset += cfun->machine->frame.frame_size
-		  - cfun->machine->frame.hard_fp_offset;
+	offset += cfun->machine->frame.bytes_below_hard_fp;
+
       rtx addr = plus_constant (Pmode, ptr_reg, offset);
       rtx mem = gen_frame_mem (mode, addr);
 
@@ -5410,8 +5415,7 @@ aarch64_process_components (sbitmap components, bool prologue_p)
       /* REGNO2 can be saved/restored in a pair with REGNO.  */
       rtx reg2 = gen_rtx_REG (mode, regno2);
       if (!frame_pointer_needed)
-	offset2 += cfun->machine->frame.frame_size
-		  - cfun->machine->frame.hard_fp_offset;
+	offset2 += cfun->machine->frame.bytes_below_hard_fp;
       rtx addr2 = plus_constant (Pmode, ptr_reg, offset2);
       rtx mem2 = gen_frame_mem (mode, addr2);
       rtx set2 = prologue_p ? gen_rtx_SET (mem2, reg2)
@@ -5478,10 +5482,10 @@ aarch64_stack_clash_protection_alloca_probe_range (void)
    registers.  If POLY_SIZE is not large enough to require a probe this function
    will only adjust the stack.  When allocating the stack space
    FRAME_RELATED_P is then used to indicate if the allocation is frame related.
-   FINAL_ADJUSTMENT_P indicates whether we are allocating the outgoing
-   arguments.  If we are then we ensure that any allocation larger than the ABI
-   defined buffer needs a probe so that the invariant of having a 1KB buffer is
-   maintained.
+   FINAL_ADJUSTMENT_P indicates whether we are allocating the area below
+   the saved registers.  If we are then we ensure that any allocation
+   larger than the ABI defined buffer needs a probe so that the
+   invariant of having a 1KB buffer is maintained.
 
    We emit barriers after each stack adjustment to prevent optimizations from
    breaking the invariant that we never drop the stack more than a page.  This
@@ -5671,7 +5675,7 @@ aarch64_allocate_and_probe_stack_space (rtx temp1, rtx temp2,
   /* Handle any residuals.  Residuals of at least MIN_PROBE_THRESHOLD have to
      be probed.  This maintains the requirement that each page is probed at
      least once.  For initial probing we probe only if the allocation is
-     more than GUARD_SIZE - buffer, and for the outgoing arguments we probe
+     more than GUARD_SIZE - buffer, and below the saved registers we probe
      if the amount is larger than buffer.  GUARD_SIZE - buffer + buffer ==
      GUARD_SIZE.  This works that for any allocation that is large enough to
      trigger a probe here, we'll have at least one, and if they're not large
diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
index af0bc3f18817..95831637ba73 100644
--- a/gcc/config/aarch64/aarch64.h
+++ b/gcc/config/aarch64/aarch64.h
@@ -712,9 +712,13 @@ struct GTY (()) aarch64_frame
   HOST_WIDE_INT saved_varargs_size;
 
   /* The size of the saved callee-save int/FP registers.  */
-
   HOST_WIDE_INT saved_regs_size;
 
+  /* The number of bytes between the bottom of the static frame (the bottom
+     of the outgoing arguments) and the hard frame pointer.  This value is
+     always a multiple of STACK_BOUNDARY.  */
+  poly_int64 bytes_below_hard_fp;
+
   /* Offset from the base of the frame (incomming SP) to the
      top of the locals area.  This value is always a multiple of
      STACK_BOUNDARY.  */
-- 
2.42.0