aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--configure.ac3
-rw-r--r--i386/configfrag.ac2
-rw-r--r--i386/i386/i386asm.sym1
-rw-r--r--i386/i386/vm_param.h2
-rw-r--r--i386/intel/pmap.c4
-rw-r--r--i386/intel/pmap.h1
-rw-r--r--x86_64/Makefrag.am17
-rw-r--r--x86_64/boothdr.S238
-rw-r--r--x86_64/ldscript28
9 files changed, 280 insertions, 16 deletions
diff --git a/configure.ac b/configure.ac
index 019842db..3aaa935c 100644
--- a/configure.ac
+++ b/configure.ac
@@ -56,8 +56,7 @@ case $host_platform:$host_cpu in
default:i?86)
host_platform=at;;
default:x86_64)]
- AC_MSG_WARN([Platform set to Xen by default, this can not boot on non-Xen systems, you currently need a 32bit build for that.])
- [host_platform=xen;;
+ [host_platform=at;;
at:i?86 | xen:i?86 | at:x86_64 | xen:x86_64)
:;;
*)]
diff --git a/i386/configfrag.ac b/i386/configfrag.ac
index f697e277..f07a98ca 100644
--- a/i386/configfrag.ac
+++ b/i386/configfrag.ac
@@ -106,6 +106,8 @@ AC_ARG_ENABLE([apic],
enable_pae=${enable_pae-yes};;
*:i?86)
:;;
+ *:x86_64)
+ enable_pae=${enable_pae-yes};;
*)
if [ x"$enable_pae" = xyes ]; then]
AC_MSG_ERROR([can only enable the `PAE' feature on ix86.])
diff --git a/i386/i386/i386asm.sym b/i386/i386/i386asm.sym
index 0662aea0..9e1d13d7 100644
--- a/i386/i386/i386asm.sym
+++ b/i386/i386/i386asm.sym
@@ -122,6 +122,7 @@ expr sizeof(pt_entry_t) PTE_SIZE
expr INTEL_PTE_PFN PTE_PFN
expr INTEL_PTE_VALID PTE_V
expr INTEL_PTE_WRITE PTE_W
+expr INTEL_PTE_PS PTE_S
expr ~INTEL_PTE_VALID PTE_INVALID
expr NPTES PTES_PER_PAGE
expr INTEL_PTE_VALID|INTEL_PTE_WRITE INTEL_PTE_KERNEL
diff --git a/i386/i386/vm_param.h b/i386/i386/vm_param.h
index edd9522c..314fdb35 100644
--- a/i386/i386/vm_param.h
+++ b/i386/i386/vm_param.h
@@ -36,7 +36,7 @@
* for better trace support in kdb; the _START symbol has to be offset by the
* same amount. */
#ifdef __x86_64__
-#define VM_MIN_KERNEL_ADDRESS 0x40000000UL
+#define VM_MIN_KERNEL_ADDRESS KERNEL_MAP_BASE
#else
#define VM_MIN_KERNEL_ADDRESS 0xC0000000UL
#endif
diff --git a/i386/intel/pmap.c b/i386/intel/pmap.c
index 5302b092..57c18a0d 100644
--- a/i386/intel/pmap.c
+++ b/i386/intel/pmap.c
@@ -655,7 +655,7 @@ void pmap_bootstrap(void)
pa_to_pte(_kvtophys((void *) kernel_page_dir
+ i * INTEL_PGBYTES))
| INTEL_PTE_VALID
-#ifdef MACH_PV_PAGETABLES
+#if !defined(MACH_HYP) || defined(MACH_PV_PAGETABLES)
| INTEL_PTE_WRITE
#endif
);
@@ -1298,7 +1298,7 @@ pmap_t pmap_create(vm_size_t size)
WRITE_PTE(&p->pdpbase[i],
pa_to_pte(kvtophys((vm_offset_t) page_dir[i]))
| INTEL_PTE_VALID
-#ifdef MACH_PV_PAGETABLES
+#if !defined(MACH_HYP) || defined(MACH_PV_PAGETABLES)
| INTEL_PTE_WRITE
#endif
);
diff --git a/i386/intel/pmap.h b/i386/intel/pmap.h
index 63683bc5..bad640c1 100644
--- a/i386/intel/pmap.h
+++ b/i386/intel/pmap.h
@@ -148,6 +148,7 @@ typedef phys_addr_t pt_entry_t;
#define INTEL_PTE_NCACHE 0x00000010
#define INTEL_PTE_REF 0x00000020
#define INTEL_PTE_MOD 0x00000040
+#define INTEL_PTE_PS 0x00000080
#ifdef MACH_PV_PAGETABLES
/* Not supported */
#define INTEL_PTE_GLOBAL 0x00000000
diff --git a/x86_64/Makefrag.am b/x86_64/Makefrag.am
index 6b6bb2cb..0139940a 100644
--- a/x86_64/Makefrag.am
+++ b/x86_64/Makefrag.am
@@ -212,10 +212,25 @@ nodist_libkernel_a_SOURCES += \
#
if PLATFORM_at
+# This should probably be 0xffffffff80000000 for mcmodel=kernel, but let's try
+# to stay in the first 8G first, otherwise we have to fix the pmap module to
+# actually use the l4 page level
+#KERNEL_MAP_BASE=0x100000000
+# but for nor try with < 4G, otherwise we have linker errors
+KERNEL_MAP_BASE=0x40000000
gnumach_LINKFLAGS += \
--defsym _START_MAP=$(_START_MAP) \
- --defsym _START=_START_MAP+0x40000000 \
+ --defsym _START=_START_MAP \
+ --defsym KERNEL_MAP_BASE=$(KERNEL_MAP_BASE) \
-T '$(srcdir)'/x86_64/ldscript
+
+AM_CFLAGS += -D_START_MAP=$(_START_MAP) \
+ -DKERNEL_MAP_BASE=$(KERNEL_MAP_BASE)
+AM_CCASFLAGS += -D_START_MAP=$(_START_MAP) \
+ -DKERNEL_MAP_BASE=$(KERNEL_MAP_BASE)
+
+AM_CCASFLAGS += \
+ -Ii386
endif
AM_CPPFLAGS += \
diff --git a/x86_64/boothdr.S b/x86_64/boothdr.S
new file mode 100644
index 00000000..12fc7ca2
--- /dev/null
+++ b/x86_64/boothdr.S
@@ -0,0 +1,238 @@
+/*
+ * Copyright (C) 2022 Free Software Foundation
+ *
+ * This program is free software ; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation ; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY ; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with the program ; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <mach/machine/asm.h>
+
+#include <i386/i386asm.h>
+#include <i386/i386/proc_reg.h>
+#include <i386/i386/seg.h>
+ /*
+ * This section will be put first into .boot. See also x86_64/ldscript.
+ */
+ .section .boot.text,"ax"
+ .globl boot_start
+
+ /* We should never be entered this way. */
+ .code32
+boot_start:
+ jmp boot_entry
+
+ /* MultiBoot header - see multiboot.h. */
+#define MULTIBOOT_MAGIC 0x1BADB002
+#ifdef __ELF__
+#define MULTIBOOT_FLAGS 0x00000003
+#else /* __ELF__ */
+#define MULTIBOOT_FLAGS 0x00010003
+#endif /* __ELF__ */
+ P2ALIGN(2)
+boot_hdr:
+ .long MULTIBOOT_MAGIC
+ .long MULTIBOOT_FLAGS
+ /*
+ * The next item here is the checksum.
+ * XX this works OK until we need at least the 30th bit.
+ */
+ .long - (MULTIBOOT_MAGIC+MULTIBOOT_FLAGS)
+#ifndef __ELF__ /* a.out kludge */
+ .long boot_hdr /* header_addr */
+ .long _start /* load_addr */
+ .long _edata /* load_end_addr */
+ .long _end /* bss_end_addr */
+ .long boot_entry /* entry */
+#endif /* __ELF__ */
+
+boot_entry:
+ /*
+ * Prepare minimal page mapping to jump to 64 bit and to C code.
+ * The first 4GB is identity mapped, and the first 2GB are re-mapped
+ * to high addresses at KERNEL_MAP_BASE
+ */
+
+ movl $p3table,%eax
+ or $(PTE_V|PTE_W),%eax
+ movl %eax,(p4table)
+ /*
+ * Fill 4 entries in L3 table to cover the whole 32-bit 4GB address
+ * space. Part of it might be remapped later if the kernel is mapped
+ * below 4G.
+ */
+ movl $p2table,%eax
+ or $(PTE_V|PTE_W),%eax
+ movl %eax,(p3table)
+ movl $p2table1,%eax
+ or $(PTE_V|PTE_W),%eax
+ movl %eax,(p3table + 8)
+ movl $p2table2,%eax
+ or $(PTE_V|PTE_W),%eax
+ movl %eax,(p3table + 16)
+ movl $p2table3,%eax
+ or $(PTE_V|PTE_W),%eax
+ movl %eax,(p3table + 24)
+ /* point each page table level two entry to a page */
+ mov $0,%ecx
+.map_p2_table:
+ mov $0x200000,%eax // 2MiB page, should be always available
+ mul %ecx
+ or $(PTE_V|PTE_W|PTE_S),%eax // enable 2MiB page instead of 4k
+ mov %eax,p2table(,%ecx,8)
+ inc %ecx
+ cmp $2048,%ecx // 512 entries per table, map 4 L2 tables
+ jne .map_p2_table
+
+ /*
+ * KERNEL_MAP_BASE must me aligned to 2GB.
+ * Depending on kernel starting address, we might need to add another
+ * entry in the L4 table (controlling 512 GB chunks). In any case, we
+ * add two entries in L3 table to make sure we map 2GB for the kernel.
+ * Note that this may override part of the mapping create above.
+ */
+.kernel_map:
+#if KERNEL_MAP_BASE >= (1U << 39)
+ movl $p3ktable,%eax
+ or $(PTE_V|PTE_W),%eax
+ movl %eax,(p4table + (8 * ((KERNEL_MAP_BASE >> 39) & 0x1FF))) // select 512G block
+ movl $p2ktable1,%eax
+ or $(PTE_V|PTE_W),%eax
+ movl %eax,(p3ktable + (8 * ((KERNEL_MAP_BASE >> 30) & 0x1FF) )) // select first 1G block
+ movl $p2ktable2,%eax
+ or $(PTE_V|PTE_W),%eax
+ movl %eax,(p3ktable + (8 * (((KERNEL_MAP_BASE >> 30) & 0x1FF) + 1) )) // select second 1G block
+#else
+ movl $p2ktable1,%eax
+ or $(PTE_V|PTE_W),%eax
+ movl %eax,(p3table + (8 * ((KERNEL_MAP_BASE >> 30) & 0x1FF) )) // select first 1G block
+ movl $p2ktable2,%eax
+ or $(PTE_V|PTE_W),%eax
+ movl %eax,(p3table + (8 * (((KERNEL_MAP_BASE >> 30) & 0x1FF) + 1) )) // select second 1G block
+#endif
+
+ mov $0,%ecx
+.map_p2k_table:
+ mov $0x200000,%eax // 2MiB page, should be always available
+ mul %ecx
+ or $(PTE_V|PTE_W|PTE_S),%eax // enable 2MiB page instead of 4K
+ mov %eax,p2ktable1(,%ecx,8)
+ inc %ecx
+ cmp $1024,%ecx // 512 entries per table, map 2 L2 tables
+ jne .map_p2k_table
+
+switch64:
+ /*
+ * Jump to 64 bit mode, we have to
+ * - enable PAE
+ * - enable long mode
+ * - enable paging and load the tables filled above in CR3
+ * - jump to a 64-bit code segment
+ */
+ mov %cr4,%eax
+ or $CR4_PAE,%eax
+ mov %eax,%cr4
+ mov $0xC0000080,%ecx // select EFER register
+ rdmsr
+ or $(1 << 8),%eax // long mode enable bit
+ wrmsr
+ mov $p4table,%eax
+ mov %eax,%cr3
+ mov %cr0,%eax
+ or $CR0_PG,%eax
+ or $CR0_WP,%eax
+ mov %eax,%cr0
+
+ lgdt gdt64pointer
+ movw $0,%ax
+ movw %ax,%fs
+ movw %ax,%gs
+ movw $16,%ax
+ movw %ax,%ds
+ movw %ax,%es
+ movw %ax,%ss
+ ljmp $8,$boot_entry64
+
+ .code64
+
+ /* why do we need this? it seems overwritten by linker */
+ .globl _start
+_start:
+
+boot_entry64:
+ /* Switch to our own interrupt stack. */
+ movq $(_intstack+INTSTACK_SIZE),%rax
+ andq $(~15),%rax
+ movq %rax,%rsp
+
+ /* Reset EFLAGS to a known state. */
+ pushq $0
+ popf
+ /* save multiboot info for later */
+ movq %rbx,%r8
+
+ /* Fix ifunc entries */
+ movq $__rela_iplt_start,%rsi
+ movq $__rela_iplt_end,%rdi
+iplt_cont:
+ cmpq %rdi,%rsi
+ jae iplt_done
+ movq (%rsi),%rbx /* r_offset */
+ movb 4(%rsi),%al /* info */
+ cmpb $42,%al /* IRELATIVE */
+ jnz iplt_next
+ call *(%ebx) /* call ifunc */
+ movq %rax,(%rbx) /* fixed address */
+iplt_next:
+ addq $8,%rsi
+ jmp iplt_cont
+iplt_done:
+
+ /* restore multiboot info */
+ movq %r8,%rdi
+ /* Jump into C code. */
+ call EXT(c_boot_entry)
+ /* not reached */
+ nop
+
+ .section .boot.data
+ .comm _intstack,INTSTACK_SIZE
+
+ .code32
+ .section .boot.data
+ .align 4096
+#define SEG_ACCESS_OFS 40
+#define SEG_GRANULARITY_OFS 52
+gdt64:
+ .quad 0
+gdt64code:
+ .quad (ACC_P << SEG_ACCESS_OFS) | (ACC_CODE_R << SEG_ACCESS_OFS) | (SZ_64 << SEG_GRANULARITY_OFS)
+gdt64data:
+ .quad (ACC_P << SEG_ACCESS_OFS) | (ACC_DATA_W << SEG_ACCESS_OFS)
+gdt64end:
+ .skip (4096 - (gdt64end - gdt64))
+gdt64pointer:
+ .word gdt64end - gdt64 - 1
+ .quad gdt64
+
+ .section .boot.data
+ .align 4096
+p4table: .space 4096
+p3table: .space 4096
+p2table: .space 4096
+p2table1: .space 4096
+p2table2: .space 4096
+p2table3: .space 4096
+p3ktable: .space 4096
+p2ktable1: .space 4096
+p2ktable2: .space 4096
diff --git a/x86_64/ldscript b/x86_64/ldscript
index 375e8104..de99795e 100644
--- a/x86_64/ldscript
+++ b/x86_64/ldscript
@@ -2,7 +2,7 @@
OUTPUT_FORMAT("elf64-x86-64", "elf64-x86-64",
"elf64-x86-64")
OUTPUT_ARCH(i386:x86-64)
-ENTRY(_start)
+ENTRY(boot_start)
SECTIONS
{
/*
@@ -11,22 +11,30 @@ SECTIONS
* be first in there. See also `i386/i386at/boothdr.S' and
* `gnumach_LINKFLAGS' in `i386/Makefrag.am'.
*/
- . = _START;
- .text :
- AT (_START_MAP)
+
+ . = _START_MAP;
+ .boot :
+ {
+ *(.boot.text)
+ *(.boot.data)
+ } =0x90909090
+
+ . += KERNEL_MAP_BASE;
+ _start = .;
+ .text : AT(((ADDR(.text)) - KERNEL_MAP_BASE))
{
- *(.text.start)
+ *(.text*)
*(.text .stub .text.* .gnu.linkonce.t.*)
*(.text.unlikely .text.*_unlikely)
KEEP (*(.text.*personality*))
/* .gnu.warning sections are handled specially by elf32.em. */
*(.gnu.warning)
} =0x90909090
- .init :
+ .init : AT(((ADDR(.init)) - KERNEL_MAP_BASE))
{
KEEP (*(.init))
} =0x90909090
- .fini :
+ .fini : AT(((ADDR(.fini)) - KERNEL_MAP_BASE))
{
KEEP (*(.fini))
} =0x90909090
@@ -69,7 +77,7 @@ SECTIONS
PROVIDE_HIDDEN (__rela_iplt_end = .);
}
.plt : { *(.plt) *(.iplt) }
- .rodata : { *(.rodata .rodata.* .gnu.linkonce.r.*) }
+ .rodata : AT(((ADDR(.rodata)) - KERNEL_MAP_BASE)) { *(.rodata .rodata.* .gnu.linkonce.r.*) }
.rodata1 : { *(.rodata1) }
.eh_frame_hdr : { *(.eh_frame_hdr) }
.eh_frame : ONLY_IF_RO { KEEP (*(.eh_frame)) }
@@ -139,7 +147,7 @@ SECTIONS
.got : { *(.got) *(.igot) }
. = DATA_SEGMENT_RELRO_END (24, .);
.got.plt : { *(.got.plt) *(.igot.plt) }
- .data :
+ .data : AT(((ADDR(.data)) - KERNEL_MAP_BASE))
{
*(.data .data.* .gnu.linkonce.d.*)
SORT(CONSTRUCTORS)
@@ -147,7 +155,7 @@ SECTIONS
.data1 : { *(.data1) }
_edata = .; PROVIDE (edata = .);
__bss_start = .;
- .bss :
+ .bss : AT(((ADDR(.bss)) - KERNEL_MAP_BASE))
{
*(.dynbss)
*(.bss .bss.* .gnu.linkonce.b.*)