aboutsummaryrefslogtreecommitdiff
path: root/x86_64
diff options
context:
space:
mode:
authorLuca Dariz <luca.dariz@gmail.com>2022-02-05 18:51:24 +0100
committerSamuel Thibault <samuel.thibault@ens-lyon.org>2022-08-27 19:09:19 +0200
commitf90ba97d5091ad85f089817231f53e34a0f6f259 (patch)
tree9586f5d9d0e11f508b150315b7466d5197726d56 /x86_64
parentf2626beb39dc416ceebfa35076d89cd291e2b903 (diff)
downloadgnumach-f90ba97d5091ad85f089817231f53e34a0f6f259.tar.gz
gnumach-f90ba97d5091ad85f089817231f53e34a0f6f259.tar.bz2
gnumach-f90ba97d5091ad85f089817231f53e34a0f6f259.zip
add support for booting from grub with x86_64
* configure: compile for native x86_64 by default instead of xen * x86_64/Makefrag.am: introduce KERNEL_MAP_BASE to reuse the constant in both code and linker script * x86_64/ldscript: use a .boot section for the very first operations, until we reach long mode. This section is not really allocated, so it doesn't need to be freed later. The vm system is later initialized starting from .text and not including .boot * link kernel at 0x4000000 as the xen version, higher values causes linker errors * we can't use full segmentation in long mode, so we need to create a temporary mapping during early boot to be able to jump to high addresses * build direct map for first 4G in boothdr, it seems required by Linux drivers * add INTEL_PTE_PS bit definition to enable 2MB pages during bootstrap * ensure write bit is set in PDP entry access rights. This only applies to PAE-enabled kernels, mandatory for x86_64. On xen platform it seems to be handled differently Signed-off-by: Luca Dariz <luca@orpolo.org> Message-Id: <20220205175129.309469-2-luca@orpolo.org>
Diffstat (limited to 'x86_64')
-rw-r--r--x86_64/Makefrag.am17
-rw-r--r--x86_64/boothdr.S238
-rw-r--r--x86_64/ldscript28
3 files changed, 272 insertions, 11 deletions
diff --git a/x86_64/Makefrag.am b/x86_64/Makefrag.am
index 6b6bb2cb..0139940a 100644
--- a/x86_64/Makefrag.am
+++ b/x86_64/Makefrag.am
@@ -212,10 +212,25 @@ nodist_libkernel_a_SOURCES += \
#
if PLATFORM_at
+# This should probably be 0xffffffff80000000 for mcmodel=kernel, but let's try
+# to stay in the first 8G first, otherwise we have to fix the pmap module to
+# actually use the l4 page level
+#KERNEL_MAP_BASE=0x100000000
+# but for nor try with < 4G, otherwise we have linker errors
+KERNEL_MAP_BASE=0x40000000
gnumach_LINKFLAGS += \
--defsym _START_MAP=$(_START_MAP) \
- --defsym _START=_START_MAP+0x40000000 \
+ --defsym _START=_START_MAP \
+ --defsym KERNEL_MAP_BASE=$(KERNEL_MAP_BASE) \
-T '$(srcdir)'/x86_64/ldscript
+
+AM_CFLAGS += -D_START_MAP=$(_START_MAP) \
+ -DKERNEL_MAP_BASE=$(KERNEL_MAP_BASE)
+AM_CCASFLAGS += -D_START_MAP=$(_START_MAP) \
+ -DKERNEL_MAP_BASE=$(KERNEL_MAP_BASE)
+
+AM_CCASFLAGS += \
+ -Ii386
endif
AM_CPPFLAGS += \
diff --git a/x86_64/boothdr.S b/x86_64/boothdr.S
new file mode 100644
index 00000000..12fc7ca2
--- /dev/null
+++ b/x86_64/boothdr.S
@@ -0,0 +1,238 @@
+/*
+ * Copyright (C) 2022 Free Software Foundation
+ *
+ * This program is free software ; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation ; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY ; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with the program ; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <mach/machine/asm.h>
+
+#include <i386/i386asm.h>
+#include <i386/i386/proc_reg.h>
+#include <i386/i386/seg.h>
+ /*
+ * This section will be put first into .boot. See also x86_64/ldscript.
+ */
+ .section .boot.text,"ax"
+ .globl boot_start
+
+ /* We should never be entered this way. */
+ .code32
+boot_start:
+ jmp boot_entry
+
+ /* MultiBoot header - see multiboot.h. */
+#define MULTIBOOT_MAGIC 0x1BADB002
+#ifdef __ELF__
+#define MULTIBOOT_FLAGS 0x00000003
+#else /* __ELF__ */
+#define MULTIBOOT_FLAGS 0x00010003
+#endif /* __ELF__ */
+ P2ALIGN(2)
+boot_hdr:
+ .long MULTIBOOT_MAGIC
+ .long MULTIBOOT_FLAGS
+ /*
+ * The next item here is the checksum.
+ * XX this works OK until we need at least the 30th bit.
+ */
+ .long - (MULTIBOOT_MAGIC+MULTIBOOT_FLAGS)
+#ifndef __ELF__ /* a.out kludge */
+ .long boot_hdr /* header_addr */
+ .long _start /* load_addr */
+ .long _edata /* load_end_addr */
+ .long _end /* bss_end_addr */
+ .long boot_entry /* entry */
+#endif /* __ELF__ */
+
+boot_entry:
+ /*
+ * Prepare minimal page mapping to jump to 64 bit and to C code.
+ * The first 4GB is identity mapped, and the first 2GB are re-mapped
+ * to high addresses at KERNEL_MAP_BASE
+ */
+
+ movl $p3table,%eax
+ or $(PTE_V|PTE_W),%eax
+ movl %eax,(p4table)
+ /*
+ * Fill 4 entries in L3 table to cover the whole 32-bit 4GB address
+ * space. Part of it might be remapped later if the kernel is mapped
+ * below 4G.
+ */
+ movl $p2table,%eax
+ or $(PTE_V|PTE_W),%eax
+ movl %eax,(p3table)
+ movl $p2table1,%eax
+ or $(PTE_V|PTE_W),%eax
+ movl %eax,(p3table + 8)
+ movl $p2table2,%eax
+ or $(PTE_V|PTE_W),%eax
+ movl %eax,(p3table + 16)
+ movl $p2table3,%eax
+ or $(PTE_V|PTE_W),%eax
+ movl %eax,(p3table + 24)
+ /* point each page table level two entry to a page */
+ mov $0,%ecx
+.map_p2_table:
+ mov $0x200000,%eax // 2MiB page, should be always available
+ mul %ecx
+ or $(PTE_V|PTE_W|PTE_S),%eax // enable 2MiB page instead of 4k
+ mov %eax,p2table(,%ecx,8)
+ inc %ecx
+ cmp $2048,%ecx // 512 entries per table, map 4 L2 tables
+ jne .map_p2_table
+
+ /*
+ * KERNEL_MAP_BASE must me aligned to 2GB.
+ * Depending on kernel starting address, we might need to add another
+ * entry in the L4 table (controlling 512 GB chunks). In any case, we
+ * add two entries in L3 table to make sure we map 2GB for the kernel.
+ * Note that this may override part of the mapping create above.
+ */
+.kernel_map:
+#if KERNEL_MAP_BASE >= (1U << 39)
+ movl $p3ktable,%eax
+ or $(PTE_V|PTE_W),%eax
+ movl %eax,(p4table + (8 * ((KERNEL_MAP_BASE >> 39) & 0x1FF))) // select 512G block
+ movl $p2ktable1,%eax
+ or $(PTE_V|PTE_W),%eax
+ movl %eax,(p3ktable + (8 * ((KERNEL_MAP_BASE >> 30) & 0x1FF) )) // select first 1G block
+ movl $p2ktable2,%eax
+ or $(PTE_V|PTE_W),%eax
+ movl %eax,(p3ktable + (8 * (((KERNEL_MAP_BASE >> 30) & 0x1FF) + 1) )) // select second 1G block
+#else
+ movl $p2ktable1,%eax
+ or $(PTE_V|PTE_W),%eax
+ movl %eax,(p3table + (8 * ((KERNEL_MAP_BASE >> 30) & 0x1FF) )) // select first 1G block
+ movl $p2ktable2,%eax
+ or $(PTE_V|PTE_W),%eax
+ movl %eax,(p3table + (8 * (((KERNEL_MAP_BASE >> 30) & 0x1FF) + 1) )) // select second 1G block
+#endif
+
+ mov $0,%ecx
+.map_p2k_table:
+ mov $0x200000,%eax // 2MiB page, should be always available
+ mul %ecx
+ or $(PTE_V|PTE_W|PTE_S),%eax // enable 2MiB page instead of 4K
+ mov %eax,p2ktable1(,%ecx,8)
+ inc %ecx
+ cmp $1024,%ecx // 512 entries per table, map 2 L2 tables
+ jne .map_p2k_table
+
+switch64:
+ /*
+ * Jump to 64 bit mode, we have to
+ * - enable PAE
+ * - enable long mode
+ * - enable paging and load the tables filled above in CR3
+ * - jump to a 64-bit code segment
+ */
+ mov %cr4,%eax
+ or $CR4_PAE,%eax
+ mov %eax,%cr4
+ mov $0xC0000080,%ecx // select EFER register
+ rdmsr
+ or $(1 << 8),%eax // long mode enable bit
+ wrmsr
+ mov $p4table,%eax
+ mov %eax,%cr3
+ mov %cr0,%eax
+ or $CR0_PG,%eax
+ or $CR0_WP,%eax
+ mov %eax,%cr0
+
+ lgdt gdt64pointer
+ movw $0,%ax
+ movw %ax,%fs
+ movw %ax,%gs
+ movw $16,%ax
+ movw %ax,%ds
+ movw %ax,%es
+ movw %ax,%ss
+ ljmp $8,$boot_entry64
+
+ .code64
+
+ /* why do we need this? it seems overwritten by linker */
+ .globl _start
+_start:
+
+boot_entry64:
+ /* Switch to our own interrupt stack. */
+ movq $(_intstack+INTSTACK_SIZE),%rax
+ andq $(~15),%rax
+ movq %rax,%rsp
+
+ /* Reset EFLAGS to a known state. */
+ pushq $0
+ popf
+ /* save multiboot info for later */
+ movq %rbx,%r8
+
+ /* Fix ifunc entries */
+ movq $__rela_iplt_start,%rsi
+ movq $__rela_iplt_end,%rdi
+iplt_cont:
+ cmpq %rdi,%rsi
+ jae iplt_done
+ movq (%rsi),%rbx /* r_offset */
+ movb 4(%rsi),%al /* info */
+ cmpb $42,%al /* IRELATIVE */
+ jnz iplt_next
+ call *(%ebx) /* call ifunc */
+ movq %rax,(%rbx) /* fixed address */
+iplt_next:
+ addq $8,%rsi
+ jmp iplt_cont
+iplt_done:
+
+ /* restore multiboot info */
+ movq %r8,%rdi
+ /* Jump into C code. */
+ call EXT(c_boot_entry)
+ /* not reached */
+ nop
+
+ .section .boot.data
+ .comm _intstack,INTSTACK_SIZE
+
+ .code32
+ .section .boot.data
+ .align 4096
+#define SEG_ACCESS_OFS 40
+#define SEG_GRANULARITY_OFS 52
+gdt64:
+ .quad 0
+gdt64code:
+ .quad (ACC_P << SEG_ACCESS_OFS) | (ACC_CODE_R << SEG_ACCESS_OFS) | (SZ_64 << SEG_GRANULARITY_OFS)
+gdt64data:
+ .quad (ACC_P << SEG_ACCESS_OFS) | (ACC_DATA_W << SEG_ACCESS_OFS)
+gdt64end:
+ .skip (4096 - (gdt64end - gdt64))
+gdt64pointer:
+ .word gdt64end - gdt64 - 1
+ .quad gdt64
+
+ .section .boot.data
+ .align 4096
+p4table: .space 4096
+p3table: .space 4096
+p2table: .space 4096
+p2table1: .space 4096
+p2table2: .space 4096
+p2table3: .space 4096
+p3ktable: .space 4096
+p2ktable1: .space 4096
+p2ktable2: .space 4096
diff --git a/x86_64/ldscript b/x86_64/ldscript
index 375e8104..de99795e 100644
--- a/x86_64/ldscript
+++ b/x86_64/ldscript
@@ -2,7 +2,7 @@
OUTPUT_FORMAT("elf64-x86-64", "elf64-x86-64",
"elf64-x86-64")
OUTPUT_ARCH(i386:x86-64)
-ENTRY(_start)
+ENTRY(boot_start)
SECTIONS
{
/*
@@ -11,22 +11,30 @@ SECTIONS
* be first in there. See also `i386/i386at/boothdr.S' and
* `gnumach_LINKFLAGS' in `i386/Makefrag.am'.
*/
- . = _START;
- .text :
- AT (_START_MAP)
+
+ . = _START_MAP;
+ .boot :
+ {
+ *(.boot.text)
+ *(.boot.data)
+ } =0x90909090
+
+ . += KERNEL_MAP_BASE;
+ _start = .;
+ .text : AT(((ADDR(.text)) - KERNEL_MAP_BASE))
{
- *(.text.start)
+ *(.text*)
*(.text .stub .text.* .gnu.linkonce.t.*)
*(.text.unlikely .text.*_unlikely)
KEEP (*(.text.*personality*))
/* .gnu.warning sections are handled specially by elf32.em. */
*(.gnu.warning)
} =0x90909090
- .init :
+ .init : AT(((ADDR(.init)) - KERNEL_MAP_BASE))
{
KEEP (*(.init))
} =0x90909090
- .fini :
+ .fini : AT(((ADDR(.fini)) - KERNEL_MAP_BASE))
{
KEEP (*(.fini))
} =0x90909090
@@ -69,7 +77,7 @@ SECTIONS
PROVIDE_HIDDEN (__rela_iplt_end = .);
}
.plt : { *(.plt) *(.iplt) }
- .rodata : { *(.rodata .rodata.* .gnu.linkonce.r.*) }
+ .rodata : AT(((ADDR(.rodata)) - KERNEL_MAP_BASE)) { *(.rodata .rodata.* .gnu.linkonce.r.*) }
.rodata1 : { *(.rodata1) }
.eh_frame_hdr : { *(.eh_frame_hdr) }
.eh_frame : ONLY_IF_RO { KEEP (*(.eh_frame)) }
@@ -139,7 +147,7 @@ SECTIONS
.got : { *(.got) *(.igot) }
. = DATA_SEGMENT_RELRO_END (24, .);
.got.plt : { *(.got.plt) *(.igot.plt) }
- .data :
+ .data : AT(((ADDR(.data)) - KERNEL_MAP_BASE))
{
*(.data .data.* .gnu.linkonce.d.*)
SORT(CONSTRUCTORS)
@@ -147,7 +155,7 @@ SECTIONS
.data1 : { *(.data1) }
_edata = .; PROVIDE (edata = .);
__bss_start = .;
- .bss :
+ .bss : AT(((ADDR(.bss)) - KERNEL_MAP_BASE))
{
*(.dynbss)
*(.bss .bss.* .gnu.linkonce.b.*)