diff options
Diffstat (limited to 'vm')
-rw-r--r-- | vm/vm_page.c | 1244 | ||||
-rw-r--r-- | vm/vm_page.h | 114 | ||||
-rw-r--r-- | vm/vm_pageout.c | 649 | ||||
-rw-r--r-- | vm/vm_pageout.h | 4 | ||||
-rw-r--r-- | vm/vm_resident.c | 316 |
5 files changed, 1456 insertions, 871 deletions
diff --git a/vm/vm_page.c b/vm/vm_page.c index f966e4dc..4c11ea7a 100644 --- a/vm/vm_page.c +++ b/vm/vm_page.c @@ -27,10 +27,13 @@ * multiprocessor systems. When a pool is empty and cannot provide a page, * it is filled by transferring multiple pages from the backend buddy system. * The symmetric case is handled likewise. + * + * TODO Limit number of dirty pages, block allocations above a top limit. */ #include <string.h> #include <kern/assert.h> +#include <kern/counters.h> #include <kern/cpu_number.h> #include <kern/debug.h> #include <kern/list.h> @@ -42,6 +45,7 @@ #include <machine/pmap.h> #include <sys/types.h> #include <vm/vm_page.h> +#include <vm/vm_pageout.h> #define DEBUG 0 @@ -100,12 +104,96 @@ struct vm_page_free_list { }; /* + * XXX Because of a potential deadlock involving the default pager (see + * vm_map_lock()), it's currently impossible to reliably determine the + * minimum number of free pages required for successful pageout. Since + * that process is dependent on the amount of physical memory, we scale + * the minimum number of free pages from it, in the hope that memory + * exhaustion happens as rarely as possible... + */ + +/* + * Ratio used to compute the minimum number of pages in a segment. + */ +#define VM_PAGE_SEG_THRESHOLD_MIN_NUM 5 +#define VM_PAGE_SEG_THRESHOLD_MIN_DENOM 100 + +/* + * Number of pages reserved for privileged allocations in a segment. + */ +#define VM_PAGE_SEG_THRESHOLD_MIN 500 + +/* + * Ratio used to compute the threshold below which pageout is started. + */ +#define VM_PAGE_SEG_THRESHOLD_LOW_NUM 6 +#define VM_PAGE_SEG_THRESHOLD_LOW_DENOM 100 + +/* + * Minimum value the low threshold can have for a segment. + */ +#define VM_PAGE_SEG_THRESHOLD_LOW 600 + +#if VM_PAGE_SEG_THRESHOLD_LOW <= VM_PAGE_SEG_THRESHOLD_MIN +#error VM_PAGE_SEG_THRESHOLD_LOW invalid +#endif /* VM_PAGE_SEG_THRESHOLD_LOW >= VM_PAGE_SEG_THRESHOLD_MIN */ + +/* + * Ratio used to compute the threshold above which pageout is stopped. + */ +#define VM_PAGE_SEG_THRESHOLD_HIGH_NUM 10 +#define VM_PAGE_SEG_THRESHOLD_HIGH_DENOM 100 + +/* + * Minimum value the high threshold can have for a segment. + */ +#define VM_PAGE_SEG_THRESHOLD_HIGH 1000 + +#if VM_PAGE_SEG_THRESHOLD_HIGH <= VM_PAGE_SEG_THRESHOLD_LOW +#error VM_PAGE_SEG_THRESHOLD_HIGH invalid +#endif /* VM_PAGE_SEG_THRESHOLD_HIGH <= VM_PAGE_SEG_THRESHOLD_LOW */ + +/* + * Minimum number of pages allowed for a segment. + */ +#define VM_PAGE_SEG_MIN_PAGES 2000 + +#if VM_PAGE_SEG_MIN_PAGES <= VM_PAGE_SEG_THRESHOLD_HIGH +#error VM_PAGE_SEG_MIN_PAGES invalid +#endif /* VM_PAGE_SEG_MIN_PAGES <= VM_PAGE_SEG_THRESHOLD_HIGH */ + +/* + * Ratio used to compute the threshold of active pages beyond which + * to refill the inactive queue. + */ +#define VM_PAGE_HIGH_ACTIVE_PAGE_NUM 1 +#define VM_PAGE_HIGH_ACTIVE_PAGE_DENOM 3 + +/* + * Page cache queue. + * + * XXX The current implementation hardcodes a preference to evict external + * pages first and keep internal ones as much as possible. This is because + * the Hurd default pager implementation suffers from bugs that can easily + * cause the system to freeze. + */ +struct vm_page_queue { + struct list internal_pages; + struct list external_pages; +}; + +/* * Segment name buffer size. */ #define VM_PAGE_NAME_SIZE 16 /* * Segment of contiguous memory. + * + * XXX Per-segment locking is probably useless, since one or both of the + * page queues lock and the free page queue lock is held on any access. + * However it should first be made clear which lock protects access to + * which members of a segment. */ struct vm_page_seg { struct vm_page_cpu_pool cpu_pools[NCPUS]; @@ -117,6 +205,19 @@ struct vm_page_seg { simple_lock_data_t lock; struct vm_page_free_list free_lists[VM_PAGE_NR_FREE_LISTS]; unsigned long nr_free_pages; + + /* Free memory thresholds */ + unsigned long min_free_pages; /* Privileged allocations only */ + unsigned long low_free_pages; /* Pageout daemon starts scanning */ + unsigned long high_free_pages; /* Pageout daemon stops scanning, + unprivileged allocations resume */ + + /* Page cache related data */ + struct vm_page_queue active_pages; + unsigned long nr_active_pages; + unsigned long high_active_pages; + struct vm_page_queue inactive_pages; + unsigned long nr_inactive_pages; }; /* @@ -160,6 +261,16 @@ static struct vm_page_boot_seg vm_page_boot_segs[VM_PAGE_MAX_SEGS] __initdata; */ static unsigned int vm_page_segs_size __read_mostly; +/* + * If true, unprivileged allocations are blocked, disregarding any other + * condition. + * + * This variable is also used to resume clients once pages are available. + * + * The free page queue lock must be held when accessing this variable. + */ +static boolean_t vm_page_alloc_paused; + static void __init vm_page_init_pa(struct vm_page *page, unsigned short seg_index, phys_addr_t pa) { @@ -183,6 +294,40 @@ vm_page_set_type(struct vm_page *page, unsigned int order, unsigned short type) page[i].type = type; } +static boolean_t +vm_page_pageable(const struct vm_page *page) +{ + return (page->object != NULL) + && (page->wire_count == 0) + && (page->active || page->inactive); +} + +static boolean_t +vm_page_can_move(const struct vm_page *page) +{ + /* + * This function is called on pages pulled from the page queues, + * implying they're pageable, which is why the wire count isn't + * checked here. + */ + + return !page->busy + && !page->wanted + && !page->absent + && page->object->alive; +} + +static void +vm_page_remove_mappings(struct vm_page *page) +{ + page->busy = TRUE; + pmap_page_protect(page->phys_addr, VM_PROT_NONE); + + if (!page->dirty) { + page->dirty = pmap_is_modified(page->phys_addr); + } +} + static void __init vm_page_free_list_init(struct vm_page_free_list *free_list) { @@ -219,6 +364,19 @@ vm_page_seg_alloc_from_buddy(struct vm_page_seg *seg, unsigned int order) assert(order < VM_PAGE_NR_FREE_LISTS); + if (vm_page_alloc_paused && current_thread() + && !current_thread()->vm_privilege) { + return NULL; + } else if (seg->nr_free_pages <= seg->low_free_pages) { + vm_pageout_start(); + + if ((seg->nr_free_pages <= seg->min_free_pages) + && current_thread() && !current_thread()->vm_privilege) { + vm_page_alloc_paused = TRUE; + return NULL; + } + } + for (i = order; i < VM_PAGE_NR_FREE_LISTS; i++) { free_list = &seg->free_lists[i]; @@ -241,6 +399,11 @@ vm_page_seg_alloc_from_buddy(struct vm_page_seg *seg, unsigned int order) } seg->nr_free_pages -= (1 << order); + + if (seg->nr_free_pages < seg->min_free_pages) { + vm_page_alloc_paused = TRUE; + } + return page; } @@ -364,6 +527,65 @@ vm_page_cpu_pool_drain(struct vm_page_cpu_pool *cpu_pool, simple_unlock(&seg->lock); } +static void +vm_page_queue_init(struct vm_page_queue *queue) +{ + list_init(&queue->internal_pages); + list_init(&queue->external_pages); +} + +static void +vm_page_queue_push(struct vm_page_queue *queue, struct vm_page *page) +{ + if (page->external) { + list_insert_tail(&queue->external_pages, &page->node); + } else { + list_insert_tail(&queue->internal_pages, &page->node); + } +} + +static void +vm_page_queue_remove(struct vm_page_queue *queue, struct vm_page *page) +{ + (void)queue; + list_remove(&page->node); +} + +static struct vm_page * +vm_page_queue_first(struct vm_page_queue *queue, boolean_t external_only) +{ + struct vm_page *page; + + if (!list_empty(&queue->external_pages)) { + page = list_first_entry(&queue->external_pages, struct vm_page, node); + return page; + } + + if (!external_only && !list_empty(&queue->internal_pages)) { + page = list_first_entry(&queue->internal_pages, struct vm_page, node); + return page; + } + + return NULL; +} + +static struct vm_page_seg * +vm_page_seg_get(unsigned short index) +{ + assert(index < vm_page_segs_size); + return &vm_page_segs[index]; +} + +static unsigned int +vm_page_seg_index(const struct vm_page_seg *seg) +{ + unsigned int index; + + index = seg - vm_page_segs; + assert(index < vm_page_segs_size); + return index; +} + static phys_addr_t __init vm_page_seg_size(struct vm_page_seg *seg) { @@ -386,6 +608,39 @@ vm_page_seg_compute_pool_size(struct vm_page_seg *seg) } static void __init +vm_page_seg_compute_pageout_thresholds(struct vm_page_seg *seg) +{ + unsigned long nr_pages; + + nr_pages = vm_page_atop(vm_page_seg_size(seg)); + + if (nr_pages < VM_PAGE_SEG_MIN_PAGES) { + panic("vm_page: segment too small"); + } + + seg->min_free_pages = nr_pages * VM_PAGE_SEG_THRESHOLD_MIN_NUM + / VM_PAGE_SEG_THRESHOLD_MIN_DENOM; + + if (seg->min_free_pages < VM_PAGE_SEG_THRESHOLD_MIN) { + seg->min_free_pages = VM_PAGE_SEG_THRESHOLD_MIN; + } + + seg->low_free_pages = nr_pages * VM_PAGE_SEG_THRESHOLD_LOW_NUM + / VM_PAGE_SEG_THRESHOLD_LOW_DENOM; + + if (seg->low_free_pages < VM_PAGE_SEG_THRESHOLD_LOW) { + seg->low_free_pages = VM_PAGE_SEG_THRESHOLD_LOW; + } + + seg->high_free_pages = nr_pages * VM_PAGE_SEG_THRESHOLD_HIGH_NUM + / VM_PAGE_SEG_THRESHOLD_HIGH_DENOM; + + if (seg->high_free_pages < VM_PAGE_SEG_THRESHOLD_HIGH) { + seg->high_free_pages = VM_PAGE_SEG_THRESHOLD_HIGH; + } +} + +static void __init vm_page_seg_init(struct vm_page_seg *seg, phys_addr_t start, phys_addr_t end, struct vm_page *pages) { @@ -408,7 +663,15 @@ vm_page_seg_init(struct vm_page_seg *seg, phys_addr_t start, phys_addr_t end, vm_page_free_list_init(&seg->free_lists[i]); seg->nr_free_pages = 0; - i = seg - vm_page_segs; + + vm_page_seg_compute_pageout_thresholds(seg); + + vm_page_queue_init(&seg->active_pages); + seg->nr_active_pages = 0; + vm_page_queue_init(&seg->inactive_pages); + seg->nr_inactive_pages = 0; + + i = vm_page_seg_index(seg); for (pa = seg->start; pa < seg->end; pa += PAGE_SIZE) vm_page_init_pa(&pages[vm_page_atop(pa - seg->start)], i, pa); @@ -485,6 +748,502 @@ vm_page_seg_free(struct vm_page_seg *seg, struct vm_page *page, } } +static void +vm_page_seg_add_active_page(struct vm_page_seg *seg, struct vm_page *page) +{ + assert(page->object != NULL); + assert(page->seg_index == vm_page_seg_index(seg)); + assert(page->type != VM_PT_FREE); + assert(page->order == VM_PAGE_ORDER_UNLISTED); + assert(!page->free && !page->active && !page->inactive); + page->active = TRUE; + page->reference = TRUE; + vm_page_queue_push(&seg->active_pages, page); + seg->nr_active_pages++; + vm_page_active_count++; +} + +static void +vm_page_seg_remove_active_page(struct vm_page_seg *seg, struct vm_page *page) +{ + assert(page->object != NULL); + assert(page->seg_index == vm_page_seg_index(seg)); + assert(page->type != VM_PT_FREE); + assert(page->order == VM_PAGE_ORDER_UNLISTED); + assert(!page->free && page->active && !page->inactive); + page->active = FALSE; + vm_page_queue_remove(&seg->active_pages, page); + seg->nr_active_pages--; + vm_page_active_count--; +} + +static void +vm_page_seg_add_inactive_page(struct vm_page_seg *seg, struct vm_page *page) +{ + assert(page->object != NULL); + assert(page->seg_index == vm_page_seg_index(seg)); + assert(page->type != VM_PT_FREE); + assert(page->order == VM_PAGE_ORDER_UNLISTED); + assert(!page->free && !page->active && !page->inactive); + page->inactive = TRUE; + vm_page_queue_push(&seg->inactive_pages, page); + seg->nr_inactive_pages++; + vm_page_inactive_count++; +} + +static void +vm_page_seg_remove_inactive_page(struct vm_page_seg *seg, struct vm_page *page) +{ + assert(page->object != NULL); + assert(page->seg_index == vm_page_seg_index(seg)); + assert(page->type != VM_PT_FREE); + assert(page->order == VM_PAGE_ORDER_UNLISTED); + assert(!page->free && !page->active && page->inactive); + page->inactive = FALSE; + vm_page_queue_remove(&seg->inactive_pages, page); + seg->nr_inactive_pages--; + vm_page_inactive_count--; +} + +/* + * Attempt to pull an active page. + * + * If successful, the object containing the page is locked. + */ +static struct vm_page * +vm_page_seg_pull_active_page(struct vm_page_seg *seg, boolean_t external_only) +{ + struct vm_page *page, *first; + boolean_t locked; + + first = NULL; + + for (;;) { + page = vm_page_queue_first(&seg->active_pages, external_only); + + if (page == NULL) { + break; + } else if (first == NULL) { + first = page; + } else if (first == page) { + break; + } + + vm_page_seg_remove_active_page(seg, page); + locked = vm_object_lock_try(page->object); + + if (!locked) { + vm_page_seg_add_active_page(seg, page); + continue; + } + + if (!vm_page_can_move(page)) { + vm_page_seg_add_active_page(seg, page); + vm_object_unlock(page->object); + continue; + } + + return page; + } + + return NULL; +} + +/* + * Attempt to pull an inactive page. + * + * If successful, the object containing the page is locked. + * + * XXX See vm_page_seg_pull_active_page (duplicated code). + */ +static struct vm_page * +vm_page_seg_pull_inactive_page(struct vm_page_seg *seg, boolean_t external_only) +{ + struct vm_page *page, *first; + boolean_t locked; + + first = NULL; + + for (;;) { + page = vm_page_queue_first(&seg->inactive_pages, external_only); + + if (page == NULL) { + break; + } else if (first == NULL) { + first = page; + } else if (first == page) { + break; + } + + vm_page_seg_remove_inactive_page(seg, page); + locked = vm_object_lock_try(page->object); + + if (!locked) { + vm_page_seg_add_inactive_page(seg, page); + continue; + } + + if (!vm_page_can_move(page)) { + vm_page_seg_add_inactive_page(seg, page); + vm_object_unlock(page->object); + continue; + } + + return page; + } + + return NULL; +} + +/* + * Attempt to pull a page cache page. + * + * If successful, the object containing the page is locked. + */ +static struct vm_page * +vm_page_seg_pull_cache_page(struct vm_page_seg *seg, + boolean_t external_only, + boolean_t *was_active) +{ + struct vm_page *page; + + page = vm_page_seg_pull_inactive_page(seg, external_only); + + if (page != NULL) { + *was_active = FALSE; + return page; + } + + page = vm_page_seg_pull_active_page(seg, external_only); + + if (page != NULL) { + *was_active = TRUE; + return page; + } + + return NULL; +} + +static boolean_t +vm_page_seg_min_page_available(const struct vm_page_seg *seg) +{ + return (seg->nr_free_pages > seg->min_free_pages); +} + +static boolean_t +vm_page_seg_page_available(const struct vm_page_seg *seg) +{ + return (seg->nr_free_pages > seg->high_free_pages); +} + +static boolean_t +vm_page_seg_usable(const struct vm_page_seg *seg) +{ + return (seg->nr_free_pages >= seg->high_free_pages); +} + +static void +vm_page_seg_double_lock(struct vm_page_seg *seg1, struct vm_page_seg *seg2) +{ + assert(seg1 != seg2); + + if (seg1 < seg2) { + simple_lock(&seg1->lock); + simple_lock(&seg2->lock); + } else { + simple_lock(&seg2->lock); + simple_lock(&seg1->lock); + } +} + +static void +vm_page_seg_double_unlock(struct vm_page_seg *seg1, struct vm_page_seg *seg2) +{ + simple_unlock(&seg1->lock); + simple_unlock(&seg2->lock); +} + +/* + * Attempt to balance a segment by moving one page to another segment. + * + * Return TRUE if a page was actually moved. + */ +static boolean_t +vm_page_seg_balance_page(struct vm_page_seg *seg, + struct vm_page_seg *remote_seg) +{ + struct vm_page *src, *dest; + vm_object_t object; + vm_offset_t offset; + boolean_t was_active; + + vm_page_lock_queues(); + simple_lock(&vm_page_queue_free_lock); + vm_page_seg_double_lock(seg, remote_seg); + + if (vm_page_seg_usable(seg) + || !vm_page_seg_page_available(remote_seg)) { + goto error; + } + + src = vm_page_seg_pull_cache_page(seg, FALSE, &was_active); + + if (src == NULL) { + goto error; + } + + assert(src->object != NULL); + assert(!src->fictitious && !src->private); + assert(src->wire_count == 0); + assert(src->type != VM_PT_FREE); + assert(src->order == VM_PAGE_ORDER_UNLISTED); + + dest = vm_page_seg_alloc_from_buddy(remote_seg, 0); + assert(dest != NULL); + + vm_page_seg_double_unlock(seg, remote_seg); + simple_unlock(&vm_page_queue_free_lock); + + if (!was_active && !src->reference && pmap_is_referenced(src->phys_addr)) { + src->reference = TRUE; + } + + object = src->object; + offset = src->offset; + vm_page_remove(src); + + vm_page_remove_mappings(src); + + vm_page_set_type(dest, 0, src->type); + memcpy(&dest->vm_page_header, &src->vm_page_header, + sizeof(*dest) - VM_PAGE_HEADER_SIZE); + vm_page_copy(src, dest); + + if (!src->dirty) { + pmap_clear_modify(dest->phys_addr); + } + + dest->busy = FALSE; + + simple_lock(&vm_page_queue_free_lock); + vm_page_init(src); + src->free = TRUE; + simple_lock(&seg->lock); + vm_page_set_type(src, 0, VM_PT_FREE); + vm_page_seg_free_to_buddy(seg, src, 0); + simple_unlock(&seg->lock); + simple_unlock(&vm_page_queue_free_lock); + + vm_page_insert(dest, object, offset); + vm_object_unlock(object); + + if (was_active) { + vm_page_activate(dest); + } else { + vm_page_deactivate(dest); + } + + vm_page_unlock_queues(); + + return TRUE; + +error: + vm_page_seg_double_unlock(seg, remote_seg); + simple_unlock(&vm_page_queue_free_lock); + vm_page_unlock_queues(); + return FALSE; +} + +static boolean_t +vm_page_seg_balance(struct vm_page_seg *seg) +{ + struct vm_page_seg *remote_seg; + unsigned int i; + boolean_t balanced; + + /* + * It's important here that pages are moved to lower priority + * segments first. + */ + + for (i = vm_page_segs_size - 1; i < vm_page_segs_size; i--) { + remote_seg = vm_page_seg_get(i); + + if (remote_seg == seg) { + continue; + } + + balanced = vm_page_seg_balance_page(seg, remote_seg); + + if (balanced) { + return TRUE; + } + } + + return FALSE; +} + +static boolean_t +vm_page_seg_evict(struct vm_page_seg *seg, + boolean_t external_only, boolean_t low_memory) +{ + struct vm_page *page; + boolean_t reclaim, laundry; + vm_object_t object; + boolean_t was_active; + + page = NULL; + object = NULL; + +restart: + vm_page_lock_queues(); + simple_lock(&seg->lock); + + if (page != NULL) { + vm_object_lock(page->object); + } else { + page = vm_page_seg_pull_cache_page(seg, external_only, &was_active); + + if (page == NULL) { + goto out; + } + } + + assert(page->object != NULL); + assert(!page->fictitious && !page->private); + assert(page->wire_count == 0); + assert(page->type != VM_PT_FREE); + assert(page->order == VM_PAGE_ORDER_UNLISTED); + + object = page->object; + + if (!was_active + && (page->reference || pmap_is_referenced(page->phys_addr))) { + vm_page_seg_add_active_page(seg, page); + simple_unlock(&seg->lock); + vm_object_unlock(object); + vm_stat.reactivations++; + current_task()->reactivations++; + vm_page_unlock_queues(); + page = NULL; + goto restart; + } + + vm_page_remove_mappings(page); + + if (!page->dirty && !page->precious) { + reclaim = TRUE; + goto out; + } + + reclaim = FALSE; + + /* + * If we are very low on memory, then we can't rely on an external + * pager to clean a dirty page, because external pagers are not + * vm-privileged. + * + * The laundry bit tells vm_pageout_setup not to do any special + * processing of this page since it's immediately going to be + * double paged out to the default pager. The laundry bit is + * reset and the page is inserted into an internal object by + * vm_pageout_setup before the double paging pass. + */ + + assert(!page->laundry); + + if (object->internal || !low_memory) { + laundry = FALSE; + } else { + laundry = page->laundry = TRUE; + } + +out: + simple_unlock(&seg->lock); + + if (object == NULL) { + vm_page_unlock_queues(); + return FALSE; + } + + if (reclaim) { + vm_page_free(page); + vm_page_unlock_queues(); + + if (vm_object_collectable(object)) { + vm_object_collect(object); + } else { + vm_object_unlock(object); + } + + return TRUE; + } + + vm_page_unlock_queues(); + + /* + * If there is no memory object for the page, create one and hand it + * to the default pager. First try to collapse, so we don't create + * one unnecessarily. + */ + + if (!object->pager_initialized) { + vm_object_collapse(object); + } + + if (!object->pager_initialized) { + vm_object_pager_create(object); + } + + if (!object->pager_initialized) { + panic("vm_page_seg_evict"); + } + + vm_pageout_page(page, FALSE, TRUE); /* flush it */ + vm_object_unlock(object); + + if (laundry) { + goto restart; + } + + return TRUE; +} + +static void +vm_page_seg_compute_high_active_page(struct vm_page_seg *seg) +{ + unsigned long nr_pages; + + nr_pages = seg->nr_active_pages + seg->nr_inactive_pages; + seg->high_active_pages = nr_pages * VM_PAGE_HIGH_ACTIVE_PAGE_NUM + / VM_PAGE_HIGH_ACTIVE_PAGE_DENOM; +} + +static void +vm_page_seg_refill_inactive(struct vm_page_seg *seg) +{ + struct vm_page *page; + + simple_lock(&seg->lock); + + vm_page_seg_compute_high_active_page(seg); + + while (seg->nr_active_pages > seg->high_active_pages) { + page = vm_page_seg_pull_active_page(seg, FALSE); + + if (page == NULL) { + break; + } + + page->reference = FALSE; + pmap_clear_reference(page->phys_addr); + vm_page_seg_add_inactive_page(seg, page); + vm_object_unlock(page->object); + } + + simple_unlock(&seg->lock); +} + void __init vm_page_load(unsigned int seg_index, phys_addr_t start, phys_addr_t end) { @@ -712,6 +1471,77 @@ vm_page_lookup_pa(phys_addr_t pa) return NULL; } +static struct vm_page_seg * +vm_page_lookup_seg(const struct vm_page *page) +{ + struct vm_page_seg *seg; + unsigned int i; + + for (i = 0; i < vm_page_segs_size; i++) { + seg = &vm_page_segs[i]; + + if ((page->phys_addr >= seg->start) && (page->phys_addr < seg->end)) { + return seg; + } + } + + return NULL; +} + +void vm_page_check(const struct vm_page *page) +{ + if (page->fictitious) { + if (page->private) { + panic("vm_page: page both fictitious and private"); + } + + if (page->phys_addr != vm_page_fictitious_addr) { + panic("vm_page: invalid fictitious page"); + } + } else { + struct vm_page_seg *seg; + + if (page->phys_addr == vm_page_fictitious_addr) { + panic("vm_page: real page has fictitious address"); + } + + seg = vm_page_lookup_seg(page); + + if (seg == NULL) { + if (!page->private) { + panic("vm_page: page claims it's managed but not in any segment"); + } + } else { + if (page->private) { + struct vm_page *real_page; + + if (vm_page_pageable(page)) { + panic("vm_page: private page is pageable"); + } + + real_page = vm_page_lookup_pa(page->phys_addr); + + if (vm_page_pageable(real_page)) { + panic("vm_page: page underlying private page is pageable"); + } + + if ((real_page->type == VM_PT_FREE) + || (real_page->order != VM_PAGE_ORDER_UNLISTED)) { + panic("vm_page: page underlying private pagei is free"); + } + } else { + unsigned int index; + + index = vm_page_seg_index(seg); + + if (index != page->seg_index) { + panic("vm_page: page segment mismatch"); + } + } + } + } +} + struct vm_page * vm_page_alloc_pa(unsigned int order, unsigned int selector, unsigned short type) { @@ -725,8 +1555,8 @@ vm_page_alloc_pa(unsigned int order, unsigned int selector, unsigned short type) return page; } - if (type == VM_PT_PMAP) - panic("vm_page: unable to allocate pmap page"); + if (!current_thread() || current_thread()->vm_privilege) + panic("vm_page: privileged thread unable to allocate page"); return NULL; } @@ -769,6 +1599,9 @@ vm_page_info_all(void) printf("vm_page: %s: pages: %lu (%luM), free: %lu (%luM)\n", vm_page_seg_name(i), pages, pages >> (20 - PAGE_SHIFT), seg->nr_free_pages, seg->nr_free_pages >> (20 - PAGE_SHIFT)); + printf("vm_page: %s: min:%lu low:%lu high:%lu\n", + vm_page_seg_name(vm_page_seg_index(seg)), + seg->min_free_pages, seg->low_free_pages, seg->high_free_pages); } } @@ -879,3 +1712,408 @@ vm_page_mem_free(void) return total; } + +/* + * Mark this page as wired down by yet another map, removing it + * from paging queues as necessary. + * + * The page's object and the page queues must be locked. + */ +void +vm_page_wire(struct vm_page *page) +{ + VM_PAGE_CHECK(page); + + if (page->wire_count == 0) { + vm_page_queues_remove(page); + + if (!page->private && !page->fictitious) { + vm_page_wire_count++; + } + } + + page->wire_count++; +} + +/* + * Release one wiring of this page, potentially enabling it to be paged again. + * + * The page's object and the page queues must be locked. + */ +void +vm_page_unwire(struct vm_page *page) +{ + struct vm_page_seg *seg; + + VM_PAGE_CHECK(page); + + assert(page->wire_count != 0); + page->wire_count--; + + if ((page->wire_count != 0) + || page->fictitious + || page->private) { + return; + } + + seg = vm_page_seg_get(page->seg_index); + + simple_lock(&seg->lock); + vm_page_seg_add_active_page(seg, page); + simple_unlock(&seg->lock); + + vm_page_wire_count--; +} + +/* + * Returns the given page to the inactive list, indicating that + * no physical maps have access to this page. + * [Used by the physical mapping system.] + * + * The page queues must be locked. + */ +void +vm_page_deactivate(struct vm_page *page) +{ + struct vm_page_seg *seg; + + VM_PAGE_CHECK(page); + + /* + * This page is no longer very interesting. If it was + * interesting (active or inactive/referenced), then we + * clear the reference bit and (re)enter it in the + * inactive queue. Note wired pages should not have + * their reference bit cleared. + */ + + if (page->active || (page->inactive && page->reference)) { + if (!page->fictitious && !page->private && !page->absent) { + pmap_clear_reference(page->phys_addr); + } + + page->reference = FALSE; + vm_page_queues_remove(page); + } + + if ((page->wire_count == 0) && !page->fictitious + && !page->private && !page->inactive) { + seg = vm_page_seg_get(page->seg_index); + + simple_lock(&seg->lock); + vm_page_seg_add_inactive_page(seg, page); + simple_unlock(&seg->lock); + } +} + +/* + * Put the specified page on the active list (if appropriate). + * + * The page queues must be locked. + */ +void +vm_page_activate(struct vm_page *page) +{ + struct vm_page_seg *seg; + + VM_PAGE_CHECK(page); + + /* + * Unconditionally remove so that, even if the page was already + * active, it gets back to the end of the active queue. + */ + vm_page_queues_remove(page); + + if ((page->wire_count == 0) && !page->fictitious && !page->private) { + seg = vm_page_seg_get(page->seg_index); + + if (page->active) + panic("vm_page_activate: already active"); + + simple_lock(&seg->lock); + vm_page_seg_add_active_page(seg, page); + simple_unlock(&seg->lock); + } +} + +void +vm_page_queues_remove(struct vm_page *page) +{ + struct vm_page_seg *seg; + + assert(!page->active || !page->inactive); + + if (!page->active && !page->inactive) { + return; + } + + seg = vm_page_seg_get(page->seg_index); + + simple_lock(&seg->lock); + + if (page->active) { + vm_page_seg_remove_active_page(seg, page); + } else { + vm_page_seg_remove_inactive_page(seg, page); + } + + simple_unlock(&seg->lock); +} + +/* + * Check whether segments are all usable for unprivileged allocations. + * + * If all segments are usable, resume pending unprivileged allocations + * and return TRUE. + * + * This function acquires vm_page_queue_free_lock, which is held on return. + */ +static boolean_t +vm_page_check_usable(void) +{ + struct vm_page_seg *seg; + boolean_t usable; + unsigned int i; + + simple_lock(&vm_page_queue_free_lock); + + for (i = 0; i < vm_page_segs_size; i++) { + seg = vm_page_seg_get(i); + + simple_lock(&seg->lock); + usable = vm_page_seg_usable(seg); + simple_unlock(&seg->lock); + + if (!usable) { + return FALSE; + } + } + + vm_page_external_pagedout = -1; + vm_page_alloc_paused = FALSE; + thread_wakeup(&vm_page_alloc_paused); + return TRUE; +} + +static boolean_t +vm_page_may_balance(void) +{ + struct vm_page_seg *seg; + boolean_t page_available; + unsigned int i; + + for (i = 0; i < vm_page_segs_size; i++) { + seg = vm_page_seg_get(i); + + simple_lock(&seg->lock); + page_available = vm_page_seg_page_available(seg); + simple_unlock(&seg->lock); + + if (page_available) { + return TRUE; + } + } + + return FALSE; +} + +static boolean_t +vm_page_balance_once(void) +{ + boolean_t balanced; + unsigned int i; + + /* + * It's important here that pages are moved from higher priority + * segments first. + */ + + for (i = 0; i < vm_page_segs_size; i++) { + balanced = vm_page_seg_balance(vm_page_seg_get(i)); + + if (balanced) { + return TRUE; + } + } + + return FALSE; +} + +boolean_t +vm_page_balance(void) +{ + boolean_t balanced; + + while (vm_page_may_balance()) { + balanced = vm_page_balance_once(); + + if (!balanced) { + break; + } + } + + return vm_page_check_usable(); +} + +static boolean_t +vm_page_evict_once(boolean_t external_only) +{ + struct vm_page_seg *seg; + boolean_t low_memory, min_page_available, evicted; + unsigned int i; + + /* + * XXX Page allocation currently only uses the DIRECTMAP selector, + * allowing us to know which segments to look at when determining + * whether we're very low on memory. + */ + low_memory = TRUE; + + simple_lock(&vm_page_queue_free_lock); + + for (i = 0; i < vm_page_segs_size; i++) { + if (i > VM_PAGE_SEG_DIRECTMAP) { + break; + } + + seg = vm_page_seg_get(i); + + simple_lock(&seg->lock); + min_page_available = vm_page_seg_min_page_available(seg); + simple_unlock(&seg->lock); + + if (min_page_available) { + low_memory = FALSE; + break; + } + } + + simple_unlock(&vm_page_queue_free_lock); + + /* + * It's important here that pages are evicted from lower priority + * segments first. + */ + + for (i = vm_page_segs_size - 1; i < vm_page_segs_size; i--) { + evicted = vm_page_seg_evict(vm_page_seg_get(i), + external_only, low_memory); + + if (evicted) { + return TRUE; + } + } + + return FALSE; +} + +#define VM_PAGE_MAX_LAUNDRY 5 +#define VM_PAGE_MAX_EVICTIONS 5 + +boolean_t +vm_page_evict(boolean_t *should_wait) +{ + boolean_t pause, evicted, external_only; + unsigned int i; + + *should_wait = TRUE; + external_only = TRUE; + + simple_lock(&vm_page_queue_free_lock); + vm_page_external_pagedout = 0; + simple_unlock(&vm_page_queue_free_lock); + +again: + vm_page_lock_queues(); + pause = (vm_page_laundry_count >= VM_PAGE_MAX_LAUNDRY); + vm_page_unlock_queues(); + + if (pause) { + simple_lock(&vm_page_queue_free_lock); + return FALSE; + } + + for (i = 0; i < VM_PAGE_MAX_EVICTIONS; i++) { + evicted = vm_page_evict_once(external_only); + + if (!evicted) { + break; + } + } + + simple_lock(&vm_page_queue_free_lock); + + /* + * Keep in mind eviction may not cause pageouts, since non-precious + * clean pages are simply released. + */ + if ((vm_page_external_pagedout == 0) || (vm_page_laundry_count == 0)) { + /* + * No pageout, but some clean pages were freed. Start a complete + * scan again without waiting. + */ + if (evicted) { + *should_wait = FALSE; + return FALSE; + } + + /* + * Eviction failed, consider pages from internal objects on the + * next attempt. + */ + if (external_only) { + simple_unlock(&vm_page_queue_free_lock); + external_only = FALSE; + goto again; + } + + /* + * TODO Find out what could cause this and how to deal with it. + * This will likely require an out-of-memory killer. + */ + panic("vm_page: unable to recycle any page"); + } + + simple_unlock(&vm_page_queue_free_lock); + + return vm_page_check_usable(); +} + +void +vm_page_refill_inactive(void) +{ + unsigned int i; + + vm_page_lock_queues(); + + for (i = 0; i < vm_page_segs_size; i++) { + vm_page_seg_refill_inactive(vm_page_seg_get(i)); + } + + vm_page_unlock_queues(); +} + +void +vm_page_wait(void (*continuation)(void)) +{ + assert(!current_thread()->vm_privilege); + + simple_lock(&vm_page_queue_free_lock); + + if (!vm_page_alloc_paused) { + simple_unlock(&vm_page_queue_free_lock); + return; + } + + assert_wait(&vm_page_alloc_paused, FALSE); + + simple_unlock(&vm_page_queue_free_lock); + + if (continuation != 0) { + counter(c_vm_page_wait_block_user++); + thread_block(continuation); + } else { + counter(c_vm_page_wait_block_kernel++); + thread_block((void (*)(void)) 0); + } +} diff --git a/vm/vm_page.h b/vm/vm_page.h index 164ab6d4..eb684c1b 100644 --- a/vm/vm_page.h +++ b/vm/vm_page.h @@ -40,6 +40,7 @@ #include <vm/vm_object.h> #include <vm/vm_types.h> #include <kern/queue.h> +#include <kern/list.h> #include <kern/lock.h> #include <kern/log2.h> @@ -77,8 +78,7 @@ */ struct vm_page { - /* Members used in the vm_page module only */ - struct list node; + struct list node; /* page queues or free list (P) */ unsigned short type; unsigned short seg_index; unsigned short order; @@ -90,15 +90,13 @@ struct vm_page { */ phys_addr_t phys_addr; + queue_chain_t listq; /* all pages in same object (O) */ + struct vm_page *next; /* VP bucket link (O) */ + /* We use an empty struct as the delimiter. */ struct {} vm_page_header; #define VM_PAGE_HEADER_SIZE offsetof(struct vm_page, vm_page_header) - queue_chain_t pageq; /* queue info for FIFO - * queue or free list (P) */ - queue_chain_t listq; /* all pages in same object (O) */ - struct vm_page *next; /* VP bucket link (O) */ - vm_object_t object; /* which object am I in (O,P) */ vm_offset_t offset; /* offset into that object (O,P) */ @@ -136,7 +134,9 @@ struct vm_page { * some useful check on a page structure. */ -#define VM_PAGE_CHECK(mem) +#define VM_PAGE_CHECK(mem) vm_page_check(mem) + +void vm_page_check(const struct vm_page *page); /* * Each pageable resident page falls into one of three lists: @@ -155,13 +155,6 @@ struct vm_page { */ extern -vm_page_t vm_page_queue_fictitious; /* fictitious free queue */ -extern -queue_head_t vm_page_queue_active; /* active memory queue */ -extern -queue_head_t vm_page_queue_inactive; /* inactive memory queue */ - -extern int vm_page_fictitious_count;/* How many fictitious pages are free? */ extern int vm_page_active_count; /* How many pages are active? */ @@ -170,25 +163,15 @@ int vm_page_inactive_count; /* How many pages are inactive? */ extern int vm_page_wire_count; /* How many pages are wired? */ extern -int vm_page_free_target; /* How many do we want free? */ -extern -int vm_page_free_min; /* When to wakeup pageout */ -extern -int vm_page_inactive_target;/* How many do we want inactive? */ -extern -int vm_page_free_reserved; /* How many pages reserved to do pageout */ -extern int vm_page_laundry_count; /* How many pages being laundered? */ - +extern +int vm_page_external_pagedout; /* How many external pages being paged out? */ decl_simple_lock_data(extern,vm_page_queue_lock)/* lock on active and inactive page queues */ decl_simple_lock_data(extern,vm_page_queue_free_lock) /* lock on free page queue */ -extern unsigned int vm_page_free_wanted; - /* how many threads are waiting for memory */ - extern phys_addr_t vm_page_fictitious_addr; /* (fake) phys_addr of fictitious pages */ @@ -204,7 +187,7 @@ extern vm_page_t vm_page_grab_fictitious(void); extern boolean_t vm_page_convert(vm_page_t *); extern void vm_page_more_fictitious(void); extern vm_page_t vm_page_grab(void); -extern void vm_page_release(vm_page_t); +extern void vm_page_release(vm_page_t, boolean_t, boolean_t); extern phys_addr_t vm_page_grab_phys_addr(void); extern vm_page_t vm_page_grab_contig(vm_size_t, unsigned int); extern void vm_page_free_contig(vm_page_t, vm_size_t); @@ -294,22 +277,7 @@ extern unsigned int vm_page_info( #define vm_page_lock_queues() simple_lock(&vm_page_queue_lock) #define vm_page_unlock_queues() simple_unlock(&vm_page_queue_lock) -#define VM_PAGE_QUEUES_REMOVE(mem) \ - MACRO_BEGIN \ - if (mem->active) { \ - queue_remove(&vm_page_queue_active, \ - mem, vm_page_t, pageq); \ - mem->active = FALSE; \ - vm_page_active_count--; \ - } \ - \ - if (mem->inactive) { \ - queue_remove(&vm_page_queue_inactive, \ - mem, vm_page_t, pageq); \ - mem->inactive = FALSE; \ - vm_page_inactive_count--; \ - } \ - MACRO_END +#define VM_PAGE_QUEUES_REMOVE(mem) vm_page_queues_remove(mem) /* * Copyright (c) 2010-2014 Richard Braun. @@ -358,18 +326,11 @@ extern unsigned int vm_page_info( /* * Page usage types. - * - * Failing to allocate pmap pages will cause a kernel panic. - * TODO Obviously, this needs to be addressed, e.g. with a reserved pool of - * pages. */ #define VM_PT_FREE 0 /* Page unused */ #define VM_PT_RESERVED 1 /* Page reserved at boot time */ #define VM_PT_TABLE 2 /* Page is part of the page table */ -#define VM_PT_PMAP 3 /* Page stores pmap-specific data */ -#define VM_PT_KMEM 4 /* Page is part of a kmem slab */ -#define VM_PT_STACK 5 /* Type for generic kernel allocations */ -#define VM_PT_KERNEL 6 /* Type for generic kernel allocations */ +#define VM_PT_KERNEL 3 /* Type for generic kernel allocations */ static inline unsigned short vm_page_type(const struct vm_page *page) @@ -521,4 +482,53 @@ phys_addr_t vm_page_mem_size(void); */ unsigned long vm_page_mem_free(void); +/* + * Remove the given page from any page queue it might be in. + */ +void vm_page_queues_remove(struct vm_page *page); + +/* + * Balance physical pages among segments. + * + * This function should be called first by the pageout daemon + * on memory pressure, since it may be unnecessary to perform any + * other operation, let alone shrink caches, if balancing is + * enough to make enough free pages. + * + * Return TRUE if balancing made enough free pages for unprivileged + * allocations to succeed, in which case pending allocations are resumed. + * + * This function acquires vm_page_queue_free_lock, which is held on return. + */ +boolean_t vm_page_balance(void); + +/* + * Evict physical pages. + * + * This function should be called by the pageout daemon after balancing + * the segments and shrinking kernel caches. + * + * Return TRUE if eviction made enough free pages for unprivileged + * allocations to succeed, in which case pending allocations are resumed. + * + * Otherwise, report whether the pageout daemon should wait (some pages + * have been paged out) or not (only clean pages have been released). + * + * This function acquires vm_page_queue_free_lock, which is held on return. + */ +boolean_t vm_page_evict(boolean_t *should_wait); + +/* + * Turn active pages into inactive ones for second-chance LRU + * approximation. + * + * This function should be called by the pageout daemon on memory pressure, + * i.e. right before evicting pages. + * + * XXX This is probably not the best strategy, compared to keeping the + * active/inactive ratio in check at all times, but this means less + * frequent refills. + */ +void vm_page_refill_inactive(void); + #endif /* _VM_VM_PAGE_H_ */ diff --git a/vm/vm_pageout.c b/vm/vm_pageout.c index a36c9905..dd0f995c 100644 --- a/vm/vm_pageout.c +++ b/vm/vm_pageout.c @@ -53,123 +53,17 @@ #include <vm/vm_pageout.h> #include <machine/locore.h> - - -#ifndef VM_PAGEOUT_BURST_MAX -#define VM_PAGEOUT_BURST_MAX 10 /* number of pages */ -#endif /* VM_PAGEOUT_BURST_MAX */ - -#ifndef VM_PAGEOUT_BURST_MIN -#define VM_PAGEOUT_BURST_MIN 5 /* number of pages */ -#endif /* VM_PAGEOUT_BURST_MIN */ - -#ifndef VM_PAGEOUT_BURST_WAIT -#define VM_PAGEOUT_BURST_WAIT 10 /* milliseconds per page */ -#endif /* VM_PAGEOUT_BURST_WAIT */ - -#ifndef VM_PAGEOUT_EMPTY_WAIT -#define VM_PAGEOUT_EMPTY_WAIT 75 /* milliseconds */ -#endif /* VM_PAGEOUT_EMPTY_WAIT */ - -#ifndef VM_PAGEOUT_PAUSE_MAX -#define VM_PAGEOUT_PAUSE_MAX 10 /* number of pauses */ -#endif /* VM_PAGEOUT_PAUSE_MAX */ - /* - * To obtain a reasonable LRU approximation, the inactive queue - * needs to be large enough to give pages on it a chance to be - * referenced a second time. This macro defines the fraction - * of active+inactive pages that should be inactive. - * The pageout daemon uses it to update vm_page_inactive_target. - * - * If the number of free pages falls below vm_page_free_target and - * vm_page_inactive_count is below vm_page_inactive_target, - * then the pageout daemon starts running. + * Event placeholder for pageout requests, synchronized with + * the free page queue lock. */ - -#ifndef VM_PAGE_INACTIVE_TARGET -#define VM_PAGE_INACTIVE_TARGET(avail) ((avail) * 2 / 3) -#endif /* VM_PAGE_INACTIVE_TARGET */ +static int vm_pageout_requested; /* - * Once the pageout daemon starts running, it keeps going - * until the number of free pages meets or exceeds vm_page_free_target. + * Event placeholder for pageout throttling, synchronized with + * the free page queue lock. */ - -#ifndef VM_PAGE_FREE_TARGET -#define VM_PAGE_FREE_TARGET(free) (150 + (free) * 10 / 100) -#endif /* VM_PAGE_FREE_TARGET */ - -/* - * The pageout daemon always starts running once the number of free pages - * falls below vm_page_free_min. - */ - -#ifndef VM_PAGE_FREE_MIN -#define VM_PAGE_FREE_MIN(free) (100 + (free) * 8 / 100) -#endif /* VM_PAGE_FREE_MIN */ - -/* - * When the number of free pages falls below vm_page_free_reserved, - * only vm-privileged threads can allocate pages. vm-privilege - * allows the pageout daemon and default pager (and any other - * associated threads needed for default pageout) to continue - * operation by dipping into the reserved pool of pages. */ - -#ifndef VM_PAGE_FREE_RESERVED -#define VM_PAGE_FREE_RESERVED 500 -#endif /* VM_PAGE_FREE_RESERVED */ - -/* - * When the number of free pages falls below vm_pageout_reserved_internal, - * the pageout daemon no longer trusts external pagers to clean pages. - * External pagers are probably all wedged waiting for a free page. - * It forcibly double-pages dirty pages belonging to external objects, - * getting the pages to the default pager to clean. - */ - -#ifndef VM_PAGEOUT_RESERVED_INTERNAL -#define VM_PAGEOUT_RESERVED_INTERNAL(reserve) ((reserve) - 250) -#endif /* VM_PAGEOUT_RESERVED_INTERNAL */ - -/* - * When the number of free pages falls below vm_pageout_reserved_really, - * the pageout daemon stops work entirely to let the default pager - * catch up (assuming the default pager has pages to clean). - * Beyond this point, it is too dangerous to consume memory - * even for memory_object_data_write messages to the default pager. - */ - -#ifndef VM_PAGEOUT_RESERVED_REALLY -#define VM_PAGEOUT_RESERVED_REALLY(reserve) ((reserve) - 400) -#endif /* VM_PAGEOUT_RESERVED_REALLY */ - -unsigned int vm_pageout_reserved_internal = 0; -unsigned int vm_pageout_reserved_really = 0; - -unsigned int vm_pageout_burst_max = 0; -unsigned int vm_pageout_burst_min = 0; -unsigned int vm_pageout_burst_wait = 0; /* milliseconds per page */ -unsigned int vm_pageout_empty_wait = 0; /* milliseconds */ -unsigned int vm_pageout_pause_count = 0; -unsigned int vm_pageout_pause_max = 0; - -/* - * These variables record the pageout daemon's actions: - * how many pages it looks at and what happens to those pages. - * No locking needed because only one thread modifies the variables. - */ - -unsigned int vm_pageout_active = 0; /* debugging */ -unsigned int vm_pageout_inactive = 0; /* debugging */ -unsigned int vm_pageout_inactive_nolock = 0; /* debugging */ -unsigned int vm_pageout_inactive_busy = 0; /* debugging */ -unsigned int vm_pageout_inactive_absent = 0; /* debugging */ -unsigned int vm_pageout_inactive_used = 0; /* debugging */ -unsigned int vm_pageout_inactive_clean = 0; /* debugging */ -unsigned int vm_pageout_inactive_dirty = 0; /* debugging */ -unsigned int vm_pageout_inactive_double = 0; /* debugging */ -unsigned int vm_pageout_inactive_cleaned_external = 0; +static int vm_pageout_continue; /* * Routine: vm_pageout_setup @@ -224,15 +118,20 @@ vm_pageout_setup( /* * If we are not flushing the page, allocate a - * page in the object. If we cannot get the - * page, flush instead. + * page in the object. */ if (!flush) { - vm_object_lock(new_object); - new_m = vm_page_alloc(new_object, new_offset); - if (new_m == VM_PAGE_NULL) - flush = TRUE; - vm_object_unlock(new_object); + for (;;) { + vm_object_lock(new_object); + new_m = vm_page_alloc(new_object, new_offset); + vm_object_unlock(new_object); + + if (new_m != VM_PAGE_NULL) { + break; + } + + VM_PAGE_WAIT(NULL); + } } if (flush) { @@ -337,26 +236,33 @@ vm_pageout_setup( vm_page_lock_queues(); vm_stat.pageouts++; if (m->laundry) { + /* - * vm_pageout_scan is telling us to put this page - * at the front of the inactive queue, so it will - * be immediately paged out to the default pager. + * The caller is telling us that it is going to + * immediately double page this page to the default + * pager. */ assert(!old_object->internal); m->laundry = FALSE; - - queue_enter_first(&vm_page_queue_inactive, m, - vm_page_t, pageq); - m->inactive = TRUE; - vm_page_inactive_count++; } else if (old_object->internal) { m->laundry = TRUE; vm_page_laundry_count++; vm_page_wire(m); - } else + } else { vm_page_activate(m); + + /* + * If vm_page_external_pagedout is negative, + * the pageout daemon isn't expecting to be + * notified. + */ + + if (vm_page_external_pagedout >= 0) { + vm_page_external_pagedout++; + } + } vm_page_unlock_queues(); /* @@ -487,455 +393,102 @@ vm_pageout_page( /* * vm_pageout_scan does the dirty work for the pageout daemon. - * It returns with vm_page_queue_free_lock held and - * vm_page_free_wanted == 0. + * + * Return TRUE if the pageout daemon is done for now, FALSE otherwise, + * in which case should_wait indicates whether the pageout daemon + * should wait to allow pagers to keep up. + * + * It returns with vm_page_queue_free_lock held. */ -void vm_pageout_scan(void) +boolean_t vm_pageout_scan(boolean_t *should_wait) { - unsigned int burst_count; - unsigned int want_pages; + boolean_t done; /* - * We want to gradually dribble pages from the active queue - * to the inactive queue. If we let the inactive queue get - * very small, and then suddenly dump many pages into it, - * those pages won't get a sufficient chance to be referenced - * before we start taking them from the inactive queue. - * - * We must limit the rate at which we send pages to the pagers. - * data_write messages consume memory, for message buffers and - * for map-copy objects. If we get too far ahead of the pagers, - * we can potentially run out of memory. - * - * We can use the laundry count to limit directly the number - * of pages outstanding to the default pager. A similar - * strategy for external pagers doesn't work, because - * external pagers don't have to deallocate the pages sent them, - * and because we might have to send pages to external pagers - * even if they aren't processing writes. So we also - * use a burst count to limit writes to external pagers. - * - * When memory is very tight, we can't rely on external pagers to - * clean pages. They probably aren't running, because they - * aren't vm-privileged. If we kept sending dirty pages to them, - * we could exhaust the free list. However, we can't just ignore - * pages belonging to external objects, because there might be no - * pages belonging to internal objects. Hence, we get the page - * into an internal object and then immediately double-page it, - * sending it to the default pager. - * - * slab_collect should be last, because the other operations - * might return memory to caches. When we pause we use - * vm_pageout_scan_continue as our continuation, so we will - * reenter vm_pageout_scan periodically and attempt to reclaim - * internal memory even if we never reach vm_page_free_target. + * Try balancing pages among segments first, since this + * may be enough to resume unprivileged allocations. */ - stack_collect(); - net_kmsg_collect(); - consider_task_collect(); - if (0) /* XXX: pcb_collect doesn't do anything yet, so it is - pointless to call consider_thread_collect. */ - consider_thread_collect(); - slab_collect(); - - for (burst_count = 0;;) { - vm_page_t m; - vm_object_t object; - unsigned long free_count; - - /* - * Recalculate vm_page_inactivate_target. - */ - - vm_page_lock_queues(); - vm_page_inactive_target = - VM_PAGE_INACTIVE_TARGET(vm_page_active_count + - vm_page_inactive_count); - - /* - * Move pages from active to inactive. - */ - - while ((vm_page_inactive_count < vm_page_inactive_target) && - !queue_empty(&vm_page_queue_active)) { - vm_object_t obj; - - vm_pageout_active++; - m = (vm_page_t) queue_first(&vm_page_queue_active); - assert(m->active && !m->inactive); - - obj = m->object; - if (!vm_object_lock_try(obj)) { - /* - * Move page to end and continue. - */ - - queue_remove(&vm_page_queue_active, m, - vm_page_t, pageq); - queue_enter(&vm_page_queue_active, m, - vm_page_t, pageq); - vm_page_unlock_queues(); - vm_page_lock_queues(); - continue; - } - - /* - * If the page is busy, then we pull it - * off the active queue and leave it alone. - */ - - if (m->busy) { - vm_object_unlock(obj); - queue_remove(&vm_page_queue_active, m, - vm_page_t, pageq); - m->active = FALSE; - vm_page_active_count--; - continue; - } - - /* - * Deactivate the page while holding the object - * locked, so we know the page is still not busy. - * This should prevent races between pmap_enter - * and pmap_clear_reference. The page might be - * absent or fictitious, but vm_page_deactivate - * can handle that. - */ - - vm_page_deactivate(m); - vm_object_unlock(obj); - } - - /* - * We are done if we have met our targets *and* - * nobody is still waiting for a page. - */ - - simple_lock(&vm_page_queue_free_lock); - free_count = vm_page_mem_free(); - if ((free_count >= vm_page_free_target) && - (vm_page_free_wanted == 0)) { - vm_page_unlock_queues(); - break; - } - want_pages = ((free_count < vm_page_free_target) || - vm_page_free_wanted); - simple_unlock(&vm_page_queue_free_lock); - - /* - * Sometimes we have to pause: - * 1) No inactive pages - nothing to do. - * 2) Flow control - wait for pagers to catch up. - * 3) Extremely low memory - sending out dirty pages - * consumes memory. We don't take the risk of doing - * this if the default pager already has work to do. - */ - pause: - if (queue_empty(&vm_page_queue_inactive) || - (burst_count >= vm_pageout_burst_max) || - (vm_page_laundry_count >= vm_pageout_burst_max) || - ((free_count < vm_pageout_reserved_really) && - (vm_page_laundry_count > 0))) { - unsigned int pages, msecs; - - /* - * vm_pageout_burst_wait is msecs/page. - * If there is nothing for us to do, we wait - * at least vm_pageout_empty_wait msecs. - */ - - if (vm_page_laundry_count > burst_count) - pages = vm_page_laundry_count; - else - pages = burst_count; - msecs = pages * vm_pageout_burst_wait; - - if (queue_empty(&vm_page_queue_inactive) && - (msecs < vm_pageout_empty_wait)) - msecs = vm_pageout_empty_wait; - vm_page_unlock_queues(); - - thread_will_wait_with_timeout(current_thread(), msecs); - counter(c_vm_pageout_scan_block++); - thread_block(vm_pageout_scan_continue); - call_continuation(vm_pageout_scan_continue); - /*NOTREACHED*/ - } - - vm_pageout_inactive++; - - /* Find a page we are interested in paging out. If we - need pages, then we'll page anything out; otherwise - we only page out external pages. */ - m = (vm_page_t) queue_first (&vm_page_queue_inactive); - while (1) - { - assert (!m->active && m->inactive); - if (want_pages || m->external) - break; - - m = (vm_page_t) queue_next (&m->pageq); - if (!m) - goto pause; - } - - object = m->object; + /* This function returns with vm_page_queue_free_lock held */ + done = vm_page_balance(); - /* - * Try to lock object; since we've got the - * page queues lock, we can only try for this one. - */ - - if (!vm_object_lock_try(object)) { - /* - * Move page to end and continue. - */ - - queue_remove(&vm_page_queue_inactive, m, - vm_page_t, pageq); - queue_enter(&vm_page_queue_inactive, m, - vm_page_t, pageq); - vm_page_unlock_queues(); - vm_pageout_inactive_nolock++; - continue; - } - - /* - * Remove the page from the inactive list. - */ - - queue_remove(&vm_page_queue_inactive, m, vm_page_t, pageq); - vm_page_inactive_count--; - m->inactive = FALSE; - - if (m->busy || !object->alive) { - /* - * Somebody is already playing with this page. - * Leave it off the pageout queues. - */ - - vm_page_unlock_queues(); - vm_object_unlock(object); - vm_pageout_inactive_busy++; - continue; - } - - /* - * If it's absent, we can reclaim the page. - */ - - if (want_pages && m->absent) { - vm_pageout_inactive_absent++; - reclaim_page: - vm_page_free(m); - vm_page_unlock_queues(); - - if (vm_object_collectable(object)) - vm_object_collect(object); - else - vm_object_unlock(object); - - continue; - } - - /* - * If it's being used, reactivate. - * (Fictitious pages are either busy or absent.) - */ - - assert(!m->fictitious); - if (m->reference || pmap_is_referenced(m->phys_addr)) { - vm_object_unlock(object); - vm_page_activate(m); - vm_stat.reactivations++; - current_task()->reactivations++; - vm_page_unlock_queues(); - vm_pageout_inactive_used++; - continue; - } - - /* - * Eliminate all mappings. - */ - - m->busy = TRUE; - pmap_page_protect(m->phys_addr, VM_PROT_NONE); - if (!m->dirty) - m->dirty = pmap_is_modified(m->phys_addr); - - /* If we don't actually need more memory, and the page - is not dirty, put it on the tail of the inactive queue - and move on to the next page. */ - if (!want_pages && !m->dirty) { - queue_remove (&vm_page_queue_inactive, m, - vm_page_t, pageq); - queue_enter (&vm_page_queue_inactive, m, - vm_page_t, pageq); - vm_page_unlock_queues(); - vm_pageout_inactive_cleaned_external++; - continue; - } - - /* - * If it's clean and not precious, we can free the page. - */ - - if (!m->dirty && !m->precious) { - vm_pageout_inactive_clean++; - goto reclaim_page; - } - - /* - * If we are very low on memory, then we can't - * rely on an external pager to clean a dirty page, - * because external pagers are not vm-privileged. - * - * The laundry bit tells vm_pageout_setup to - * put the page back at the front of the inactive - * queue instead of activating the page. Hence, - * we will pick the page up again immediately and - * resend it to the default pager. - */ - - assert(!m->laundry); - if ((free_count < vm_pageout_reserved_internal) && - !object->internal) { - m->laundry = TRUE; - vm_pageout_inactive_double++; - } - vm_page_unlock_queues(); - - /* - * If there is no memory object for the page, create - * one and hand it to the default pager. - * [First try to collapse, so we don't create - * one unnecessarily.] - */ - - if (!object->pager_initialized) - vm_object_collapse(object); - if (!object->pager_initialized) - vm_object_pager_create(object); - if (!object->pager_initialized) - panic("vm_pageout_scan"); - - vm_pageout_inactive_dirty++; - vm_pageout_page(m, FALSE, TRUE); /* flush it */ - vm_object_unlock(object); - burst_count++; + if (done) { + return TRUE; } -} -void vm_pageout_scan_continue(void) -{ + simple_unlock(&vm_page_queue_free_lock); + /* - * We just paused to let the pagers catch up. - * If vm_page_laundry_count is still high, - * then we aren't waiting long enough. - * If we have paused some vm_pageout_pause_max times without - * adjusting vm_pageout_burst_wait, it might be too big, - * so we decrease it. + * Balancing is not enough. Shrink caches and scan pages + * for eviction. */ - vm_page_lock_queues(); - if (vm_page_laundry_count > vm_pageout_burst_min) { - vm_pageout_burst_wait++; - vm_pageout_pause_count = 0; - } else if (++vm_pageout_pause_count > vm_pageout_pause_max) { - vm_pageout_burst_wait = (vm_pageout_burst_wait * 3) / 4; - if (vm_pageout_burst_wait < 1) - vm_pageout_burst_wait = 1; - vm_pageout_pause_count = 0; - } - vm_page_unlock_queues(); - - vm_pageout_continue(); - /*NOTREACHED*/ -} - -/* - * vm_pageout is the high level pageout daemon. - */ + stack_collect(); + net_kmsg_collect(); + consider_task_collect(); + if (0) /* XXX: pcb_collect doesn't do anything yet, so it is + pointless to call consider_thread_collect. */ + consider_thread_collect(); -void vm_pageout_continue(void) -{ /* - * The pageout daemon is never done, so loop forever. - * We should call vm_pageout_scan at least once each - * time we are woken, even if vm_page_free_wanted is - * zero, to check vm_page_free_target and - * vm_page_inactive_target. + * slab_collect should be last, because the other operations + * might return memory to caches. */ + slab_collect(); - for (;;) { - vm_pageout_scan(); - /* we hold vm_page_queue_free_lock now */ - assert(vm_page_free_wanted == 0); + vm_page_refill_inactive(); - assert_wait(&vm_page_free_wanted, FALSE); - simple_unlock(&vm_page_queue_free_lock); - counter(c_vm_pageout_block++); - thread_block(vm_pageout_continue); - } + /* This function returns with vm_page_queue_free_lock held */ + return vm_page_evict(should_wait); } void vm_pageout(void) { - unsigned long free_after_reserve; + boolean_t done, should_wait; current_thread()->vm_privilege = 1; stack_privilege(current_thread()); thread_set_own_priority(0); - /* - * Initialize some paging parameters. - */ - - if (vm_pageout_burst_max == 0) - vm_pageout_burst_max = VM_PAGEOUT_BURST_MAX; - - if (vm_pageout_burst_min == 0) - vm_pageout_burst_min = VM_PAGEOUT_BURST_MIN; - - if (vm_pageout_burst_wait == 0) - vm_pageout_burst_wait = VM_PAGEOUT_BURST_WAIT; - - if (vm_pageout_empty_wait == 0) - vm_pageout_empty_wait = VM_PAGEOUT_EMPTY_WAIT; - - if (vm_page_free_reserved == 0) - vm_page_free_reserved = VM_PAGE_FREE_RESERVED; - - if (vm_pageout_pause_max == 0) - vm_pageout_pause_max = VM_PAGEOUT_PAUSE_MAX; - - if (vm_pageout_reserved_internal == 0) - vm_pageout_reserved_internal = - VM_PAGEOUT_RESERVED_INTERNAL(vm_page_free_reserved); - - if (vm_pageout_reserved_really == 0) - vm_pageout_reserved_really = - VM_PAGEOUT_RESERVED_REALLY(vm_page_free_reserved); - - free_after_reserve = vm_page_mem_free() - vm_page_free_reserved; - - if (vm_page_free_min == 0) - vm_page_free_min = vm_page_free_reserved + - VM_PAGE_FREE_MIN(free_after_reserve); + for (;;) { + done = vm_pageout_scan(&should_wait); + /* we hold vm_page_queue_free_lock now */ - if (vm_page_free_target == 0) - vm_page_free_target = vm_page_free_reserved + - VM_PAGE_FREE_TARGET(free_after_reserve); + if (done) { + thread_sleep(&vm_pageout_requested, + simple_lock_addr(vm_page_queue_free_lock), + FALSE); + } else if (should_wait) { + assert_wait(&vm_pageout_continue, FALSE); + thread_set_timeout(500); + simple_unlock(&vm_page_queue_free_lock); + thread_block(NULL); + } else { + simple_unlock(&vm_page_queue_free_lock); + } + } +} - if (vm_page_free_target < vm_page_free_min + 5) - vm_page_free_target = vm_page_free_min + 5; +/* + * Start pageout + * + * The free page queue lock must be held before calling this function. + */ +void vm_pageout_start(void) +{ + if (!current_thread()) + return; - /* - * vm_pageout_scan will set vm_page_inactive_target. - */ + thread_wakeup_one(&vm_pageout_requested); +} - vm_pageout_continue(); - /*NOTREACHED*/ +/* + * Resume pageout + * + * The free page queue lock must be held before calling this function. + */ +void vm_pageout_resume(void) +{ + thread_wakeup_one(&vm_pageout_continue); } diff --git a/vm/vm_pageout.h b/vm/vm_pageout.h index ea6cfaf4..6ddd821c 100644 --- a/vm/vm_pageout.h +++ b/vm/vm_pageout.h @@ -46,8 +46,8 @@ extern void vm_pageout_page(vm_page_t, boolean_t, boolean_t); extern void vm_pageout(void) __attribute__((noreturn)); -extern void vm_pageout_continue(void) __attribute__((noreturn)); +extern void vm_pageout_start(void); -extern void vm_pageout_scan_continue(void) __attribute__((noreturn)); +extern void vm_pageout_resume(void); #endif /* _VM_VM_PAGEOUT_H_ */ diff --git a/vm/vm_resident.c b/vm/vm_resident.c index eac0f50c..e276fe68 100644 --- a/vm/vm_resident.c +++ b/vm/vm_resident.c @@ -39,6 +39,7 @@ #include <mach/vm_prot.h> #include <kern/counters.h> #include <kern/debug.h> +#include <kern/list.h> #include <kern/sched_prim.h> #include <kern/task.h> #include <kern/thread.h> @@ -95,22 +96,13 @@ vm_page_bucket_t *vm_page_buckets; /* Array of buckets */ unsigned long vm_page_bucket_count = 0; /* How big is array? */ unsigned long vm_page_hash_mask; /* Mask for hash function */ -vm_page_t vm_page_queue_fictitious; +static struct list vm_page_queue_fictitious; decl_simple_lock_data(,vm_page_queue_free_lock) -unsigned int vm_page_free_wanted; int vm_page_fictitious_count; -int vm_page_external_count; int vm_object_external_count; int vm_object_external_pages; /* - * This variable isn't directly used. It's merely a placeholder for the - * address used to synchronize threads waiting for pages to become - * available. The real value is returned by vm_page_free_mem(). - */ -unsigned int vm_page_free_avail; - -/* * Occasionally, the virtual memory system uses * resident page structures that do not refer to * real pages, for example to leave a page with @@ -136,8 +128,6 @@ phys_addr_t vm_page_fictitious_addr = (phys_addr_t) -1; * defined here, but are shared by the pageout * module. */ -queue_head_t vm_page_queue_active; -queue_head_t vm_page_queue_inactive; decl_simple_lock_data(,vm_page_queue_lock) int vm_page_active_count; int vm_page_inactive_count; @@ -149,11 +139,8 @@ int vm_page_wire_count; * (done here in vm_page_alloc) can trigger the * pageout daemon. */ -int vm_page_free_target = 0; -int vm_page_free_min = 0; -int vm_page_inactive_target = 0; -int vm_page_free_reserved = 0; int vm_page_laundry_count = 0; +int vm_page_external_pagedout = 0; /* @@ -191,11 +178,7 @@ void vm_page_bootstrap( simple_lock_init(&vm_page_queue_free_lock); simple_lock_init(&vm_page_queue_lock); - vm_page_queue_fictitious = VM_PAGE_NULL; - queue_init(&vm_page_queue_active); - queue_init(&vm_page_queue_inactive); - - vm_page_free_wanted = 0; + list_init(&vm_page_queue_fictitious); /* * Allocate (and initialize) the virtual-to-physical @@ -330,6 +313,7 @@ void vm_page_module_init(void) * table and object list. * * The object and page must be locked. + * The free page queue must not be locked. */ void vm_page_insert( @@ -407,6 +391,7 @@ void vm_page_insert( * and we don't do deactivate-behind. * * The object and page must be locked. + * The free page queue must not be locked. */ void vm_page_replace( @@ -457,6 +442,7 @@ void vm_page_replace( listq); m->tabled = FALSE; object->resident_page_count--; + VM_PAGE_QUEUES_REMOVE(m); if (m->external) { m->external = FALSE; @@ -501,9 +487,10 @@ void vm_page_replace( * vm_page_remove: [ internal use only ] * * Removes the given mem entry from the object/offset-page - * table and the object page list. + * table, the object page list, and the page queues. * * The object and page must be locked. + * The free page queue must not be locked. */ void vm_page_remove( @@ -551,6 +538,8 @@ void vm_page_remove( mem->tabled = FALSE; + VM_PAGE_QUEUES_REMOVE(mem); + if (mem->external) { mem->external = FALSE; vm_object_external_pages--; @@ -665,11 +654,15 @@ vm_page_t vm_page_grab_fictitious(void) vm_page_t m; simple_lock(&vm_page_queue_free_lock); - m = vm_page_queue_fictitious; - if (m != VM_PAGE_NULL) { - vm_page_fictitious_count--; - vm_page_queue_fictitious = (vm_page_t) m->pageq.next; + if (list_empty(&vm_page_queue_fictitious)) { + m = VM_PAGE_NULL; + } else { + m = list_first_entry(&vm_page_queue_fictitious, + struct vm_page, node); + assert(m->fictitious); + list_remove(&m->node); m->free = FALSE; + vm_page_fictitious_count--; } simple_unlock(&vm_page_queue_free_lock); @@ -689,8 +682,7 @@ static void vm_page_release_fictitious( if (m->free) panic("vm_page_release_fictitious"); m->free = TRUE; - m->pageq.next = (queue_entry_t) vm_page_queue_fictitious; - vm_page_queue_fictitious = m; + list_insert_head(&vm_page_queue_fictitious, &m->node); vm_page_fictitious_count++; simple_unlock(&vm_page_queue_free_lock); } @@ -779,18 +771,6 @@ vm_page_t vm_page_grab(void) simple_lock(&vm_page_queue_free_lock); - /* - * Only let privileged threads (involved in pageout) - * dip into the reserved pool or exceed the limit - * for externally-managed pages. - */ - - if ((vm_page_mem_free() < vm_page_free_reserved) - && !current_thread()->vm_privilege) { - simple_unlock(&vm_page_queue_free_lock); - return VM_PAGE_NULL; - } - mem = vm_page_alloc_pa(0, VM_PAGE_SEL_DIRECTMAP, VM_PT_KERNEL); if (mem == NULL) { @@ -801,22 +781,6 @@ vm_page_t vm_page_grab(void) mem->free = FALSE; simple_unlock(&vm_page_queue_free_lock); - /* - * Decide if we should poke the pageout daemon. - * We do this if the free count is less than the low - * water mark, or if the free count is less than the high - * water mark (but above the low water mark) and the inactive - * count is less than its target. - * - * We don't have the counts locked ... if they change a little, - * it doesn't really matter. - */ - - if ((vm_page_mem_free() < vm_page_free_min) || - ((vm_page_mem_free() < vm_page_free_target) && - (vm_page_inactive_count < vm_page_inactive_target))) - thread_wakeup((event_t) &vm_page_free_wanted); - return mem; } @@ -836,38 +800,37 @@ phys_addr_t vm_page_grab_phys_addr(void) */ void vm_page_release( - vm_page_t mem) + vm_page_t mem, + boolean_t laundry, + boolean_t external) { simple_lock(&vm_page_queue_free_lock); if (mem->free) panic("vm_page_release"); mem->free = TRUE; vm_page_free_pa(mem, 0); + if (laundry) { + vm_page_laundry_count--; - /* - * Check if we should wake up someone waiting for page. - * But don't bother waking them unless they can allocate. - * - * We wakeup only one thread, to prevent starvation. - * Because the scheduling system handles wait queues FIFO, - * if we wakeup all waiting threads, one greedy thread - * can starve multiple niceguy threads. When the threads - * all wakeup, the greedy threads runs first, grabs the page, - * and waits for another page. It will be the first to run - * when the next page is freed. - * - * However, there is a slight danger here. - * The thread we wake might not use the free page. - * Then the other threads could wait indefinitely - * while the page goes unused. To forestall this, - * the pageout daemon will keep making free pages - * as long as vm_page_free_wanted is non-zero. - */ + if (vm_page_laundry_count == 0) { + vm_pageout_resume(); + } + } + if (external) { + + /* + * If vm_page_external_pagedout is negative, + * the pageout daemon isn't expecting to be + * notified. + */ + + if (vm_page_external_pagedout > 0) { + vm_page_external_pagedout--; + } - if ((vm_page_free_wanted > 0) && - (vm_page_mem_free() >= vm_page_free_reserved)) { - vm_page_free_wanted--; - thread_wakeup_one((event_t) &vm_page_free_avail); + if (vm_page_external_pagedout == 0) { + vm_pageout_resume(); + } } simple_unlock(&vm_page_queue_free_lock); @@ -892,18 +855,6 @@ vm_page_t vm_page_grab_contig( simple_lock(&vm_page_queue_free_lock); - /* - * Only let privileged threads (involved in pageout) - * dip into the reserved pool or exceed the limit - * for externally-managed pages. - */ - - if (((vm_page_mem_free() - nr_pages) <= vm_page_free_reserved) - && !current_thread()->vm_privilege) { - simple_unlock(&vm_page_queue_free_lock); - return VM_PAGE_NULL; - } - /* TODO Allow caller to pass type */ mem = vm_page_alloc_pa(order, selector, VM_PT_KERNEL); @@ -918,22 +869,6 @@ vm_page_t vm_page_grab_contig( simple_unlock(&vm_page_queue_free_lock); - /* - * Decide if we should poke the pageout daemon. - * We do this if the free count is less than the low - * water mark, or if the free count is less than the high - * water mark (but above the low water mark) and the inactive - * count is less than its target. - * - * We don't have the counts locked ... if they change a little, - * it doesn't really matter. - */ - - if ((vm_page_mem_free() < vm_page_free_min) || - ((vm_page_mem_free() < vm_page_free_target) && - (vm_page_inactive_count < vm_page_inactive_target))) - thread_wakeup((event_t) &vm_page_free_wanted); - return mem; } @@ -961,52 +896,10 @@ void vm_page_free_contig(vm_page_t mem, vm_size_t size) vm_page_free_pa(mem, order); - if ((vm_page_free_wanted > 0) && - (vm_page_mem_free() >= vm_page_free_reserved)) { - vm_page_free_wanted--; - thread_wakeup_one((event_t) &vm_page_free_avail); - } - simple_unlock(&vm_page_queue_free_lock); } /* - * vm_page_wait: - * - * Wait for a page to become available. - * If there are plenty of free pages, then we don't sleep. - */ - -void vm_page_wait( - void (*continuation)(void)) -{ - - /* - * We can't use vm_page_free_reserved to make this - * determination. Consider: some thread might - * need to allocate two pages. The first allocation - * succeeds, the second fails. After the first page is freed, - * a call to vm_page_wait must really block. - */ - - simple_lock(&vm_page_queue_free_lock); - if ((vm_page_mem_free() < vm_page_free_target)) { - if (vm_page_free_wanted++ == 0) - thread_wakeup((event_t)&vm_page_free_wanted); - assert_wait((event_t)&vm_page_free_avail, FALSE); - simple_unlock(&vm_page_queue_free_lock); - if (continuation != 0) { - counter(c_vm_page_wait_block_user++); - thread_block(continuation); - } else { - counter(c_vm_page_wait_block_kernel++); - thread_block((void (*)(void)) 0); - } - } else - simple_unlock(&vm_page_queue_free_lock); -} - -/* * vm_page_alloc: * * Allocate and return a memory cell associated @@ -1046,9 +939,11 @@ void vm_page_free( if (mem->free) panic("vm_page_free"); - if (mem->tabled) + if (mem->tabled) { vm_page_remove(mem); - VM_PAGE_QUEUES_REMOVE(mem); + } + + assert(!mem->active && !mem->inactive); if (mem->wire_count != 0) { if (!mem->private && !mem->fictitious) @@ -1056,11 +951,6 @@ void vm_page_free( mem->wire_count = 0; } - if (mem->laundry) { - vm_page_laundry_count--; - mem->laundry = FALSE; - } - PAGE_WAKEUP_DONE(mem); if (mem->absent) @@ -1077,116 +967,10 @@ void vm_page_free( mem->fictitious = TRUE; vm_page_release_fictitious(mem); } else { + boolean_t laundry = mem->laundry; + boolean_t external = mem->external; vm_page_init(mem); - vm_page_release(mem); - } -} - -/* - * vm_page_wire: - * - * Mark this page as wired down by yet - * another map, removing it from paging queues - * as necessary. - * - * The page's object and the page queues must be locked. - */ -void vm_page_wire( - vm_page_t mem) -{ - VM_PAGE_CHECK(mem); - - if (mem->wire_count == 0) { - VM_PAGE_QUEUES_REMOVE(mem); - if (!mem->private && !mem->fictitious) - vm_page_wire_count++; - } - mem->wire_count++; -} - -/* - * vm_page_unwire: - * - * Release one wiring of this page, potentially - * enabling it to be paged again. - * - * The page's object and the page queues must be locked. - */ -void vm_page_unwire( - vm_page_t mem) -{ - VM_PAGE_CHECK(mem); - - if (--mem->wire_count == 0) { - queue_enter(&vm_page_queue_active, mem, vm_page_t, pageq); - vm_page_active_count++; - mem->active = TRUE; - if (!mem->private && !mem->fictitious) - vm_page_wire_count--; - } -} - -/* - * vm_page_deactivate: - * - * Returns the given page to the inactive list, - * indicating that no physical maps have access - * to this page. [Used by the physical mapping system.] - * - * The page queues must be locked. - */ -void vm_page_deactivate( - vm_page_t m) -{ - VM_PAGE_CHECK(m); - - /* - * This page is no longer very interesting. If it was - * interesting (active or inactive/referenced), then we - * clear the reference bit and (re)enter it in the - * inactive queue. Note wired pages should not have - * their reference bit cleared. - */ - - if (m->active || (m->inactive && m->reference)) { - if (!m->fictitious && !m->absent) - pmap_clear_reference(m->phys_addr); - m->reference = FALSE; - VM_PAGE_QUEUES_REMOVE(m); - } - if (m->wire_count == 0 && !m->inactive) { - queue_enter(&vm_page_queue_inactive, m, vm_page_t, pageq); - m->inactive = TRUE; - vm_page_inactive_count++; - } -} - -/* - * vm_page_activate: - * - * Put the specified page on the active list (if appropriate). - * - * The page queues must be locked. - */ - -void vm_page_activate( - vm_page_t m) -{ - VM_PAGE_CHECK(m); - - if (m->inactive) { - queue_remove(&vm_page_queue_inactive, m, vm_page_t, - pageq); - vm_page_inactive_count--; - m->inactive = FALSE; - } - if (m->wire_count == 0) { - if (m->active) - panic("vm_page_activate: already active"); - - queue_enter(&vm_page_queue_active, m, vm_page_t, pageq); - m->active = TRUE; - vm_page_active_count++; + vm_page_release(mem, laundry, external); } } |