From b11e10e2c81c2b608176021364a36d84173358e3 Mon Sep 17 00:00:00 2001 From: Damien Zammit Date: Sun, 24 Sep 2023 10:35:10 +0000 Subject: percpu area using gs segment This speeds up smp again, by storing the struct processor in a percpu area and avoiding an expensive cpu_number every call of current_processor(), as well as getting the cpu_number by an offset into the percpu area. Untested on 64 bit and work remains to use other percpu arrays. TESTED: (NCPUS=8) -smp 1 boots to login shell ~2x slower than uniprocessor TESTED: (NCPUS=8) -smp 2 boots to INIT but hangs there TESTED: (NCPUS=8) -smp 4 gets stuck seemingly within rumpdisk and hangs TESTED: (NCPUS=1) uniprocessor is a bit faster than normal Message-Id: <20230924103428.455966-3-damien@zamaudio.com> --- kern/processor.h | 17 +++++------------ 1 file changed, 5 insertions(+), 12 deletions(-) (limited to 'kern/processor.h') diff --git a/kern/processor.h b/kern/processor.h index 79386627..fc204ffa 100644 --- a/kern/processor.h +++ b/kern/processor.h @@ -112,6 +112,7 @@ typedef struct processor Processor; extern struct processor processor_array[NCPUS]; #include +#include /* * Chain of all processor sets. @@ -196,23 +197,15 @@ extern processor_t master_processor; #define PROCESSOR_ASSIGN 4 /* Assignment is changing */ #define PROCESSOR_SHUTDOWN 5 /* Being shutdown */ -/* - * Use processor ptr array to find current processor's data structure. - * This replaces a multiplication (index into processor_array) with - * an array lookup and a memory reference. It also allows us to save - * space if processor numbering gets too sparse. - */ - -extern processor_t processor_ptr[NCPUS]; - -#define cpu_to_processor(i) (processor_ptr[i]) +#define processor_ptr(i) (&percpu_array[i].processor) +#define cpu_to_processor processor_ptr -#define current_processor() (processor_ptr[cpu_number()]) +#define current_processor() (percpu_ptr(struct processor, processor)) #define current_processor_set() (current_processor()->processor_set) /* Compatibility -- will go away */ -#define cpu_state(slot_num) (processor_ptr[slot_num]->state) +#define cpu_state(slot_num) (processor_ptr(slot_num)->state) #define cpu_idle(slot_num) (cpu_state(slot_num) == PROCESSOR_IDLE) /* Useful lock macros */ -- cgit v1.2.3