diff options
author | Damien Zammit <damien@zamaudio.com> | 2023-09-24 10:35:10 +0000 |
---|---|---|
committer | Samuel Thibault <samuel.thibault@ens-lyon.org> | 2023-09-24 14:38:03 +0200 |
commit | b11e10e2c81c2b608176021364a36d84173358e3 (patch) | |
tree | 8bde3f47151865143341e864040af3bde68385a5 /i386 | |
parent | 31d45d0d8ee1d8eee96fc2a283a388b6b6aca669 (diff) | |
download | gnumach-b11e10e2c81c2b608176021364a36d84173358e3.tar.gz gnumach-b11e10e2c81c2b608176021364a36d84173358e3.tar.bz2 gnumach-b11e10e2c81c2b608176021364a36d84173358e3.zip |
percpu area using gs segment
This speeds up smp again, by storing the struct processor
in a percpu area and avoiding an expensive cpu_number every call
of current_processor(), as well as getting the cpu_number by
an offset into the percpu area. Untested on 64 bit
and work remains to use other percpu arrays.
TESTED: (NCPUS=8) -smp 1 boots to login shell ~2x slower than uniprocessor
TESTED: (NCPUS=8) -smp 2 boots to INIT but hangs there
TESTED: (NCPUS=8) -smp 4 gets stuck seemingly within rumpdisk and hangs
TESTED: (NCPUS=1) uniprocessor is a bit faster than normal
Message-Id: <20230924103428.455966-3-damien@zamaudio.com>
Diffstat (limited to 'i386')
-rw-r--r-- | i386/Makefrag.am | 2 | ||||
-rw-r--r-- | i386/i386/cpu_number.h | 17 | ||||
-rw-r--r-- | i386/i386/fpu.c | 2 | ||||
-rw-r--r-- | i386/i386/gdt.c | 21 | ||||
-rw-r--r-- | i386/i386/gdt.h | 8 | ||||
-rw-r--r-- | i386/i386/i386asm.sym | 2 | ||||
-rw-r--r-- | i386/i386/locore.S | 20 | ||||
-rw-r--r-- | i386/i386/mp_desc.c | 3 | ||||
-rw-r--r-- | i386/i386/percpu.c | 31 | ||||
-rw-r--r-- | i386/i386/percpu.h | 83 | ||||
-rw-r--r-- | i386/i386/pit.c | 2 | ||||
-rw-r--r-- | i386/i386/spl.S | 16 | ||||
-rw-r--r-- | i386/i386at/model_dep.c | 1 |
13 files changed, 177 insertions, 31 deletions
diff --git a/i386/Makefrag.am b/i386/Makefrag.am index 274e8695..c1724cea 100644 --- a/i386/Makefrag.am +++ b/i386/Makefrag.am @@ -108,6 +108,8 @@ libkernel_a_SOURCES += \ i386/i386/irq.c \ i386/i386/irq.h \ i386/i386/msr.h \ + i386/i386/percpu.c \ + i386/i386/percpu.h \ i386/i386/pit.c \ i386/i386/pit.h diff --git a/i386/i386/cpu_number.h b/i386/i386/cpu_number.h index 8357be84..6ba46e4b 100644 --- a/i386/i386/cpu_number.h +++ b/i386/i386/cpu_number.h @@ -30,6 +30,8 @@ #ifndef _I386_CPU_NUMBER_H_ #define _I386_CPU_NUMBER_H_ +#define MY(stm) %gs:PERCPU_##stm + #if NCPUS > 1 #ifdef __i386__ @@ -45,8 +47,8 @@ shrl $24, reg ;\ movl %cs:CX(cpu_id_lut, reg), reg ;\ -/* Never call CPU_NUMBER(%esi) */ -#define CPU_NUMBER(reg) \ +/* Never call CPU_NUMBER_NO_GS(%esi) */ +#define CPU_NUMBER_NO_GS(reg) \ pushl %esi ;\ pushl %eax ;\ pushl %ebx ;\ @@ -63,20 +65,29 @@ movl %esi, reg ;\ popl %esi ;\ +#define CPU_NUMBER(reg) \ + movl MY(CPU_ID), reg; + #ifndef __ASSEMBLER__ #include <kern/cpu_number.h> #include <i386/apic.h> +#include <i386/percpu.h> -static inline int cpu_number(void) +static inline int cpu_number_slow(void) { return cpu_id_lut[apic_get_current_cpu()]; } +static inline int cpu_number(void) +{ + return percpu_get(int, cpu_id); +} #endif #else /* NCPUS == 1 */ #define CPU_NUMBER_NO_STACK(reg) +#define CPU_NUMBER_NO_GS(reg) #define CPU_NUMBER(reg) #define CX(addr,reg) addr diff --git a/i386/i386/fpu.c b/i386/i386/fpu.c index fefe5e49..e1818683 100644 --- a/i386/i386/fpu.c +++ b/i386/i386/fpu.c @@ -119,7 +119,7 @@ init_fpu(void) #else /* MACH_RING1 */ unsigned int native = 0; - if (machine_slot[cpu_number()].cpu_type >= CPU_TYPE_I486) + if (machine_slot[cpu_number_slow()].cpu_type >= CPU_TYPE_I486) native = CR0_NE; /* diff --git a/i386/i386/gdt.c b/i386/i386/gdt.c index ddda603b..4edd3ec5 100644 --- a/i386/i386/gdt.c +++ b/i386/i386/gdt.c @@ -35,6 +35,8 @@ #include <kern/assert.h> #include <intel/pmap.h> +#include <kern/cpu_number.h> +#include <machine/percpu.h> #include "vm_param.h" #include "seg.h" @@ -48,7 +50,7 @@ extern struct real_descriptor gdt[GDTSZ]; static void -gdt_fill(struct real_descriptor *mygdt) +gdt_fill(int cpu, struct real_descriptor *mygdt) { /* Initialize the kernel code and data segment descriptors. */ #ifdef __x86_64__ @@ -73,6 +75,16 @@ gdt_fill(struct real_descriptor *mygdt) 0xffffffff, ACC_PL_K|ACC_DATA_W, SZ_32); #endif /* MACH_PV_DESCRIPTORS */ + vm_offset_t thiscpu = kvtolin(&percpu_array[cpu]); + _fill_gdt_descriptor(mygdt, PERCPU_DS, + thiscpu, + thiscpu + sizeof(struct percpu) - 1, +#ifdef __x86_64__ + ACC_PL_K|ACC_DATA_W, SZ_64 +#else + ACC_PL_K|ACC_DATA_W, SZ_32 +#endif + ); #endif #ifdef MACH_PV_DESCRIPTORS @@ -119,15 +131,16 @@ reload_segs(void) "movw %w1,%%ds\n" "movw %w1,%%es\n" + "movw %w3,%%gs\n" "movw %w1,%%ss\n" - : : "i" (KERNEL_CS), "r" (KERNEL_DS), "r" (0)); + : : "i" (KERNEL_CS), "r" (KERNEL_DS), "r" (0), "r" (PERCPU_DS)); #endif } void gdt_init(void) { - gdt_fill(gdt); + gdt_fill(0, gdt); reload_segs(); @@ -146,7 +159,7 @@ gdt_init(void) void ap_gdt_init(int cpu) { - gdt_fill(mp_gdt[cpu]); + gdt_fill(cpu, mp_gdt[cpu]); reload_segs(); } diff --git a/i386/i386/gdt.h b/i386/i386/gdt.h index 80ca8ada..c7da012a 100644 --- a/i386/i386/gdt.h +++ b/i386/i386/gdt.h @@ -77,11 +77,9 @@ /* 0x58 used by user TSS in 64bit mode */ -#ifdef __x86_64__ -#define GDTSZ sel_idx(0x60) -#else -#define GDTSZ sel_idx(0x58) -#endif +#define PERCPU_DS 0x68 /* per-cpu data mapping */ + +#define GDTSZ sel_idx(0x70) #ifndef __ASSEMBLER__ diff --git a/i386/i386/i386asm.sym b/i386/i386/i386asm.sym index 5d546c08..d96b8be8 100644 --- a/i386/i386/i386asm.sym +++ b/i386/i386/i386asm.sym @@ -53,6 +53,8 @@ expr CALL_PMAP_UPDATE offset ApicLocalUnit lu apic_id APIC_ID +offset percpu pc cpu_id PERCPU_CPU_ID + offset pcb pcb iss offset thread th pcb diff --git a/i386/i386/locore.S b/i386/i386/locore.S index 0cac8df4..870db785 100644 --- a/i386/i386/locore.S +++ b/i386/i386/locore.S @@ -244,7 +244,7 @@ timer_normalize: * Switch to a new timer. */ ENTRY(timer_switch) - CPU_NUMBER(%edx) /* get this CPU */ + CPU_NUMBER_NO_GS(%edx) /* get this CPU */ movl VA_ETC,%ecx /* get timer */ movl CX(EXT(current_tstamp),%edx),%eax /* get old time stamp */ movl %ecx,CX(EXT(current_tstamp),%edx) /* set new time stamp */ @@ -262,7 +262,7 @@ ENTRY(timer_switch) * Initialize the first timer for a CPU. */ ENTRY(start_timer) - CPU_NUMBER(%edx) /* get this CPU */ + CPU_NUMBER_NO_GS(%edx) /* get this CPU */ movl VA_ETC,%ecx /* get timer */ movl %ecx,CX(EXT(current_tstamp),%edx) /* set initial time stamp */ movl S_ARG0,%ecx /* get timer */ @@ -469,7 +469,8 @@ trap_push_segs: mov %ax,%ds /* (same as kernel stack segment) */ mov %ax,%es mov %ax,%fs - mov %ax,%gs + mov $(PERCPU_DS),%ax + movw %ax,%gs trap_set_segs: cld /* clear direction flag */ @@ -673,7 +674,7 @@ ENTRY(all_intrs) pushl %edx cld /* clear direction flag */ - CPU_NUMBER(%ecx) + CPU_NUMBER_NO_GS(%ecx) movl %esp,%edx /* on an interrupt stack? */ and $(~(INTSTACK_SIZE-1)),%edx cmpl %ss:CX(EXT(int_stack_base),%ecx),%edx @@ -687,7 +688,8 @@ ENTRY(all_intrs) mov %dx,%ds mov %dx,%es mov %dx,%fs - mov %dx,%gs + mov $(PERCPU_DS),%dx + movw %dx,%gs CPU_NUMBER(%edx) @@ -745,7 +747,7 @@ LEXT(return_to_iret) /* to find the return from calling interrupt) */ iret /* return to caller */ int_from_intstack: - CPU_NUMBER(%edx) + CPU_NUMBER_NO_GS(%edx) cmpl CX(EXT(int_stack_base),%edx),%esp /* seemingly looping? */ jb stack_overflowed /* if not: */ call EXT(interrupt) /* call interrupt routine */ @@ -793,7 +795,8 @@ ast_from_interrupt: mov %dx,%ds mov %dx,%es mov %dx,%fs - mov %dx,%gs + mov $(PERCPU_DS),%dx + movw %dx,%gs CPU_NUMBER(%edx) TIME_TRAP_UENTRY @@ -1052,7 +1055,8 @@ syscall_entry_2: mov %dx,%ds mov %dx,%es mov %dx,%fs - mov %dx,%gs + mov $(PERCPU_DS),%dx + movw %dx,%gs /* * Shuffle eflags,eip,cs into proper places diff --git a/i386/i386/mp_desc.c b/i386/i386/mp_desc.c index f1a1f989..f4ccc381 100644 --- a/i386/i386/mp_desc.c +++ b/i386/i386/mp_desc.c @@ -238,6 +238,7 @@ cpu_setup(int cpu) flush_instr_queue(); printf("AP=(%u) paging done\n", cpu); + init_percpu(cpu); mp_desc_init(cpu); printf("AP=(%u) mpdesc done\n", cpu); @@ -275,7 +276,7 @@ cpu_setup(int cpu) void cpu_ap_main() { - int cpu = cpu_number(); + int cpu = cpu_number_slow(); do { cpu_pause(); diff --git a/i386/i386/percpu.c b/i386/i386/percpu.c new file mode 100644 index 00000000..a4db7b68 --- /dev/null +++ b/i386/i386/percpu.c @@ -0,0 +1,31 @@ +/* + * Copyright (c) 2023 Free Software Foundation, Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +#include <i386/smp.h> +#include <i386/apic.h> +#include <kern/cpu_number.h> +#include <i386/percpu.h> + +struct percpu percpu_array[NCPUS] = {0}; + +void init_percpu(int cpu) +{ + int apic_id = apic_get_current_cpu(); + + percpu_array[cpu].self = &percpu_array[cpu]; + percpu_array[cpu].apic_id = apic_id; + percpu_array[cpu].cpu_id = cpu; +} diff --git a/i386/i386/percpu.h b/i386/i386/percpu.h new file mode 100644 index 00000000..202504da --- /dev/null +++ b/i386/i386/percpu.h @@ -0,0 +1,83 @@ +/* + * Copyright (c) 2023 Free Software Foundation, Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#ifndef _PERCPU_H_ +#define _PERCPU_H_ + +struct percpu; + +#define percpu_assign(stm, val) \ + asm("mov %[src], %%gs:%c[offs]" \ + : /* No outputs */ \ + : [src] "r" (val), [offs] "e" (__builtin_offsetof(struct percpu, stm)) \ + : ); + +#define percpu_get(typ, stm) \ +MACRO_BEGIN \ + typ val_; \ + \ + asm("mov %%gs:%c[offs], %[dst]" \ + : [dst] "=r" (val_) \ + : [offs] "e" (__builtin_offsetof(struct percpu, stm)) \ + : ); \ + \ + val_; \ +MACRO_END + +#define percpu_ptr(typ, stm) \ +MACRO_BEGIN \ + typ *ptr_ = (typ *)__builtin_offsetof(struct percpu, stm); \ + \ + asm("add %%gs:0, %[pointer]" \ + : [pointer] "+r" (ptr_) \ + : /* No inputs */ \ + : ); \ + \ + ptr_; \ +MACRO_END + +#include <kern/processor.h> +#include <kern/thread.h> + +struct percpu { + struct percpu *self; + int apic_id; + int cpu_id; + struct processor processor; +/* + struct machine_slot machine_slot; + struct mp_desc_table mp_desc_table; + thread_t active_thread; + vm_offset_t active_stack; + vm_offset_t int_stack_top; + vm_offset_t int_stack_base; + ast_t need_ast; + ipc_kmsg_t ipc_kmsg_cache; + pmap_update_list cpu_update_list; + spl_t saved_ipl; + spl_t curr_ipl; + timer_data_t kernel_timer; + timer_t current_timer; + unsigned long in_interrupt; +*/ +}; + +extern struct percpu percpu_array[NCPUS]; + +void init_percpu(int cpu); + +#endif /* _PERCPU_H_ */ diff --git a/i386/i386/pit.c b/i386/i386/pit.c index 6c006a98..9e527fca 100644 --- a/i386/i386/pit.c +++ b/i386/i386/pit.c @@ -118,7 +118,7 @@ pit_mdelay(int msec) void clkstart(void) { - if (cpu_number() != 0) + if (cpu_number_slow() != 0) /* Only one PIT initialization is needed */ return; unsigned char byte; diff --git a/i386/i386/spl.S b/i386/i386/spl.S index 2f2c8e3a..9ce780f4 100644 --- a/i386/i386/spl.S +++ b/i386/i386/spl.S @@ -48,7 +48,7 @@ lock orl $1,hyp_shared_info+CPU_PENDING_SEL; /* Yes, activate it */ \ ENTRY(spl0) mb; - CPU_NUMBER(%edx) + CPU_NUMBER_NO_GS(%edx) movl CX(EXT(curr_ipl),%edx),%eax /* save current ipl */ pushl %eax cli /* disable interrupts */ @@ -77,7 +77,7 @@ ENTRY(spl0) #endif cli /* disable interrupts */ 1: - CPU_NUMBER(%edx) + CPU_NUMBER_NO_GS(%edx) cmpl $(SPL0),CX(EXT(curr_ipl),%edx) /* are we at spl0? */ je 1f /* yes, all done */ movl $(SPL0),CX(EXT(curr_ipl),%edx) /* set ipl */ @@ -123,14 +123,14 @@ ENTRY(spl7) mb; /* just clear IF */ cli - CPU_NUMBER(%edx) + CPU_NUMBER_NO_GS(%edx) movl $SPL7,%eax xchgl CX(EXT(curr_ipl),%edx),%eax ret ENTRY(splx) movl S_ARG0,%edx /* get ipl */ - CPU_NUMBER(%eax) + CPU_NUMBER_NO_GS(%eax) #if (MACH_KDB || MACH_TTD) && !defined(MACH_XEN) /* First make sure that if we're exitting from ipl7, IF is still cleared */ cmpl $SPL7,CX(EXT(curr_ipl),%eax) /* from ipl7? */ @@ -145,7 +145,7 @@ ENTRY(splx) #endif /* (MACH_KDB || MACH_TTD) && !MACH_XEN */ testl %edx,%edx /* spl0? */ jz EXT(spl0) /* yes, handle specially */ - CPU_NUMBER(%eax) + CPU_NUMBER_NO_GS(%eax) cmpl CX(EXT(curr_ipl),%eax),%edx /* same ipl as current? */ jne spl /* no */ cmpl $SPL7,%edx /* spl7? */ @@ -194,7 +194,7 @@ splx_cli: 1: xorl %edx,%edx /* edx = ipl 0 */ 2: - CPU_NUMBER(%eax) + CPU_NUMBER_NO_GS(%eax) cmpl CX(EXT(curr_ipl),%eax),%edx /* same ipl as current? */ je 1f /* yes, all done */ movl %edx,CX(EXT(curr_ipl),%eax) /* set ipl */ @@ -213,7 +213,7 @@ splx_cli: .align TEXT_ALIGN .globl spl spl: - CPU_NUMBER(%eax) + CPU_NUMBER_NO_GS(%eax) #if (MACH_KDB || MACH_TTD) && !defined(MACH_XEN) /* First make sure that if we're exitting from ipl7, IF is still cleared */ cmpl $SPL7,CX(EXT(curr_ipl),%eax) /* from ipl7? */ @@ -233,7 +233,7 @@ spl: /* get int mask */ #endif cli /* disable interrupts */ - CPU_NUMBER(%eax) + CPU_NUMBER_NO_GS(%eax) xchgl CX(EXT(curr_ipl),%eax),%edx /* set ipl */ #ifdef MACH_XEN XEN_SETMASK() /* program PICs with new mask */ diff --git a/i386/i386at/model_dep.c b/i386/i386at/model_dep.c index f83214b1..97acfdd6 100644 --- a/i386/i386at/model_dep.c +++ b/i386/i386at/model_dep.c @@ -462,6 +462,7 @@ i386at_init(void) ldt_init(); ktss_init(); + init_percpu(0); #if NCPUS > 1 /* Initialize SMP structures in the master processor */ mp_desc_init(0); |