diff options
Diffstat (limited to 'vm/vm_fault.c')
-rw-r--r-- | vm/vm_fault.c | 2182 |
1 files changed, 2182 insertions, 0 deletions
diff --git a/vm/vm_fault.c b/vm/vm_fault.c new file mode 100644 index 00000000..e45687cd --- /dev/null +++ b/vm/vm_fault.c @@ -0,0 +1,2182 @@ +/* + * Mach Operating System + * Copyright (c) 1994,1990,1989,1988,1987 Carnegie Mellon University. + * Copyright (c) 1993,1994 The University of Utah and + * the Computer Systems Laboratory (CSL). + * All rights reserved. + * + * Permission to use, copy, modify and distribute this software and its + * documentation is hereby granted, provided that both the copyright + * notice and this permission notice appear in all copies of the + * software, derivative works or modified versions, and any portions + * thereof, and that both notices appear in supporting documentation. + * + * CARNEGIE MELLON, THE UNIVERSITY OF UTAH AND CSL ALLOW FREE USE OF + * THIS SOFTWARE IN ITS "AS IS" CONDITION, AND DISCLAIM ANY LIABILITY + * OF ANY KIND FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF + * THIS SOFTWARE. + * + * Carnegie Mellon requests users of this software to return to + * + * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU + * School of Computer Science + * Carnegie Mellon University + * Pittsburgh PA 15213-3890 + * + * any improvements or extensions that they make and grant Carnegie Mellon + * the rights to redistribute these changes. + */ +/* + * File: vm_fault.c + * Author: Avadis Tevanian, Jr., Michael Wayne Young + * + * Page fault handling module. + */ +#include <mach_pagemap.h> +#include <mach_kdb.h> +#include <mach_pcsample.h> + + +#include <vm/vm_fault.h> +#include <mach/kern_return.h> +#include <mach/message.h> /* for error codes */ +#include <kern/counters.h> +#include <kern/thread.h> +#include <kern/sched_prim.h> +#include <vm/vm_map.h> +#include <vm/vm_object.h> +#include <vm/vm_page.h> +#include <vm/pmap.h> +#include <mach/vm_statistics.h> +#include <vm/vm_pageout.h> +#include <mach/vm_param.h> +#include <mach/memory_object.h> +#include "memory_object_user.h" + /* For memory_object_data_{request,unlock} */ +#include <kern/mach_param.h> +#include <kern/macro_help.h> +#include <kern/zalloc.h> + +#if MACH_PCSAMPLE +#include <kern/pc_sample.h> +#endif + + + +/* + * State needed by vm_fault_continue. + * This is a little hefty to drop directly + * into the thread structure. + */ +typedef struct vm_fault_state { + struct vm_map *vmf_map; + vm_offset_t vmf_vaddr; + vm_prot_t vmf_fault_type; + boolean_t vmf_change_wiring; + void (*vmf_continuation)(); + vm_map_version_t vmf_version; + boolean_t vmf_wired; + struct vm_object *vmf_object; + vm_offset_t vmf_offset; + vm_prot_t vmf_prot; + + boolean_t vmfp_backoff; + struct vm_object *vmfp_object; + vm_offset_t vmfp_offset; + struct vm_page *vmfp_first_m; + vm_prot_t vmfp_access; +} vm_fault_state_t; + +zone_t vm_fault_state_zone = 0; + +int vm_object_absent_max = 50; + +int vm_fault_debug = 0; + +boolean_t vm_fault_dirty_handling = FALSE; +boolean_t vm_fault_interruptible = TRUE; + +boolean_t software_reference_bits = TRUE; + +#if MACH_KDB +extern struct db_watchpoint *db_watchpoint_list; +#endif MACH_KDB + +/* + * Routine: vm_fault_init + * Purpose: + * Initialize our private data structures. + */ +void vm_fault_init() +{ + vm_fault_state_zone = zinit(sizeof(vm_fault_state_t), + THREAD_MAX * sizeof(vm_fault_state_t), + sizeof(vm_fault_state_t), + 0, "vm fault state"); +} + +/* + * Routine: vm_fault_cleanup + * Purpose: + * Clean up the result of vm_fault_page. + * Results: + * The paging reference for "object" is released. + * "object" is unlocked. + * If "top_page" is not null, "top_page" is + * freed and the paging reference for the object + * containing it is released. + * + * In/out conditions: + * "object" must be locked. + */ +void +vm_fault_cleanup(object, top_page) + register vm_object_t object; + register vm_page_t top_page; +{ + vm_object_paging_end(object); + vm_object_unlock(object); + + if (top_page != VM_PAGE_NULL) { + object = top_page->object; + vm_object_lock(object); + VM_PAGE_FREE(top_page); + vm_object_paging_end(object); + vm_object_unlock(object); + } +} + + +#if MACH_PCSAMPLE +/* + * Do PC sampling on current thread, assuming + * that it is the thread taking this page fault. + * + * Must check for THREAD_NULL, since faults + * can occur before threads are running. + */ + +#define vm_stat_sample(flavor) \ + MACRO_BEGIN \ + thread_t _thread_ = current_thread(); \ + \ + if (_thread_ != THREAD_NULL) \ + take_pc_sample_macro(_thread_, (flavor)); \ + MACRO_END + +#else +#define vm_stat_sample(x) +#endif /* MACH_PCSAMPLE */ + + + +/* + * Routine: vm_fault_page + * Purpose: + * Find the resident page for the virtual memory + * specified by the given virtual memory object + * and offset. + * Additional arguments: + * The required permissions for the page is given + * in "fault_type". Desired permissions are included + * in "protection". + * + * If the desired page is known to be resident (for + * example, because it was previously wired down), asserting + * the "unwiring" parameter will speed the search. + * + * If the operation can be interrupted (by thread_abort + * or thread_terminate), then the "interruptible" + * parameter should be asserted. + * + * Results: + * The page containing the proper data is returned + * in "result_page". + * + * In/out conditions: + * The source object must be locked and referenced, + * and must donate one paging reference. The reference + * is not affected. The paging reference and lock are + * consumed. + * + * If the call succeeds, the object in which "result_page" + * resides is left locked and holding a paging reference. + * If this is not the original object, a busy page in the + * original object is returned in "top_page", to prevent other + * callers from pursuing this same data, along with a paging + * reference for the original object. The "top_page" should + * be destroyed when this guarantee is no longer required. + * The "result_page" is also left busy. It is not removed + * from the pageout queues. + */ +vm_fault_return_t vm_fault_page(first_object, first_offset, + fault_type, must_be_resident, interruptible, + protection, + result_page, top_page, + resume, continuation) + /* Arguments: */ + vm_object_t first_object; /* Object to begin search */ + vm_offset_t first_offset; /* Offset into object */ + vm_prot_t fault_type; /* What access is requested */ + boolean_t must_be_resident;/* Must page be resident? */ + boolean_t interruptible; /* May fault be interrupted? */ + /* Modifies in place: */ + vm_prot_t *protection; /* Protection for mapping */ + /* Returns: */ + vm_page_t *result_page; /* Page found, if successful */ + vm_page_t *top_page; /* Page in top object, if + * not result_page. + */ + /* More arguments: */ + boolean_t resume; /* We are restarting. */ + void (*continuation)(); /* Continuation for blocking. */ +{ + register + vm_page_t m; + register + vm_object_t object; + register + vm_offset_t offset; + vm_page_t first_m; + vm_object_t next_object; + vm_object_t copy_object; + boolean_t look_for_page; + vm_prot_t access_required; + +#ifdef CONTINUATIONS + if (resume) { + register vm_fault_state_t *state = + (vm_fault_state_t *) current_thread()->ith_other; + + if (state->vmfp_backoff) + goto after_block_and_backoff; + + object = state->vmfp_object; + offset = state->vmfp_offset; + first_m = state->vmfp_first_m; + access_required = state->vmfp_access; + goto after_thread_block; + } +#else /* not CONTINUATIONS */ + assert(continuation == 0); + assert(!resume); +#endif /* not CONTINUATIONS */ + + vm_stat_sample(SAMPLED_PC_VM_FAULTS_ANY); + vm_stat.faults++; /* needs lock XXX */ + +/* + * Recovery actions + */ +#define RELEASE_PAGE(m) \ + MACRO_BEGIN \ + PAGE_WAKEUP_DONE(m); \ + vm_page_lock_queues(); \ + if (!m->active && !m->inactive) \ + vm_page_activate(m); \ + vm_page_unlock_queues(); \ + MACRO_END + + if (vm_fault_dirty_handling +#if MACH_KDB + /* + * If there are watchpoints set, then + * we don't want to give away write permission + * on a read fault. Make the task write fault, + * so that the watchpoint code notices the access. + */ + || db_watchpoint_list +#endif MACH_KDB + ) { + /* + * If we aren't asking for write permission, + * then don't give it away. We're using write + * faults to set the dirty bit. + */ + if (!(fault_type & VM_PROT_WRITE)) + *protection &= ~VM_PROT_WRITE; + } + + if (!vm_fault_interruptible) + interruptible = FALSE; + + /* + * INVARIANTS (through entire routine): + * + * 1) At all times, we must either have the object + * lock or a busy page in some object to prevent + * some other thread from trying to bring in + * the same page. + * + * Note that we cannot hold any locks during the + * pager access or when waiting for memory, so + * we use a busy page then. + * + * Note also that we aren't as concerned about more than + * one thread attempting to memory_object_data_unlock + * the same page at once, so we don't hold the page + * as busy then, but do record the highest unlock + * value so far. [Unlock requests may also be delivered + * out of order.] + * + * 2) To prevent another thread from racing us down the + * shadow chain and entering a new page in the top + * object before we do, we must keep a busy page in + * the top object while following the shadow chain. + * + * 3) We must increment paging_in_progress on any object + * for which we have a busy page, to prevent + * vm_object_collapse from removing the busy page + * without our noticing. + * + * 4) We leave busy pages on the pageout queues. + * If the pageout daemon comes across a busy page, + * it will remove the page from the pageout queues. + */ + + /* + * Search for the page at object/offset. + */ + + object = first_object; + offset = first_offset; + first_m = VM_PAGE_NULL; + access_required = fault_type; + + /* + * See whether this page is resident + */ + + while (TRUE) { + m = vm_page_lookup(object, offset); + if (m != VM_PAGE_NULL) { + /* + * If the page is being brought in, + * wait for it and then retry. + * + * A possible optimization: if the page + * is known to be resident, we can ignore + * pages that are absent (regardless of + * whether they're busy). + */ + + if (m->busy) { + kern_return_t wait_result; + + PAGE_ASSERT_WAIT(m, interruptible); + vm_object_unlock(object); +#ifdef CONTINUATIONS + if (continuation != (void (*)()) 0) { + register vm_fault_state_t *state = + (vm_fault_state_t *) current_thread()->ith_other; + + /* + * Save variables in case + * thread_block discards + * our kernel stack. + */ + + state->vmfp_backoff = FALSE; + state->vmfp_object = object; + state->vmfp_offset = offset; + state->vmfp_first_m = first_m; + state->vmfp_access = + access_required; + state->vmf_prot = *protection; + + counter(c_vm_fault_page_block_busy_user++); + thread_block(continuation); + } else +#endif /* CONTINUATIONS */ + { + counter(c_vm_fault_page_block_busy_kernel++); + thread_block((void (*)()) 0); + } + after_thread_block: + wait_result = current_thread()->wait_result; + vm_object_lock(object); + if (wait_result != THREAD_AWAKENED) { + vm_fault_cleanup(object, first_m); + if (wait_result == THREAD_RESTART) + return(VM_FAULT_RETRY); + else + return(VM_FAULT_INTERRUPTED); + } + continue; + } + + /* + * If the page is in error, give up now. + */ + + if (m->error) { + VM_PAGE_FREE(m); + vm_fault_cleanup(object, first_m); + return(VM_FAULT_MEMORY_ERROR); + } + + /* + * If the page isn't busy, but is absent, + * then it was deemed "unavailable". + */ + + if (m->absent) { + /* + * Remove the non-existent page (unless it's + * in the top object) and move on down to the + * next object (if there is one). + */ + + offset += object->shadow_offset; + access_required = VM_PROT_READ; + next_object = object->shadow; + if (next_object == VM_OBJECT_NULL) { + vm_page_t real_m; + + assert(!must_be_resident); + + /* + * Absent page at bottom of shadow + * chain; zero fill the page we left + * busy in the first object, and flush + * the absent page. But first we + * need to allocate a real page. + */ + + real_m = vm_page_grab(); + if (real_m == VM_PAGE_NULL) { + vm_fault_cleanup(object, first_m); + return(VM_FAULT_MEMORY_SHORTAGE); + } + + if (object != first_object) { + VM_PAGE_FREE(m); + vm_object_paging_end(object); + vm_object_unlock(object); + object = first_object; + offset = first_offset; + m = first_m; + first_m = VM_PAGE_NULL; + vm_object_lock(object); + } + + VM_PAGE_FREE(m); + assert(real_m->busy); + vm_page_lock_queues(); + vm_page_insert(real_m, object, offset); + vm_page_unlock_queues(); + m = real_m; + + /* + * Drop the lock while zero filling + * page. Then break because this + * is the page we wanted. Checking + * the page lock is a waste of time; + * this page was either absent or + * newly allocated -- in both cases + * it can't be page locked by a pager. + */ + vm_object_unlock(object); + + vm_page_zero_fill(m); + + vm_stat_sample(SAMPLED_PC_VM_ZFILL_FAULTS); + + vm_stat.zero_fill_count++; + vm_object_lock(object); + pmap_clear_modify(m->phys_addr); + break; + } else { + if (must_be_resident) { + vm_object_paging_end(object); + } else if (object != first_object) { + vm_object_paging_end(object); + VM_PAGE_FREE(m); + } else { + first_m = m; + m->absent = FALSE; + vm_object_absent_release(object); + m->busy = TRUE; + + vm_page_lock_queues(); + VM_PAGE_QUEUES_REMOVE(m); + vm_page_unlock_queues(); + } + vm_object_lock(next_object); + vm_object_unlock(object); + object = next_object; + vm_object_paging_begin(object); + continue; + } + } + + /* + * If the desired access to this page has + * been locked out, request that it be unlocked. + */ + + if (access_required & m->page_lock) { + if ((access_required & m->unlock_request) != access_required) { + vm_prot_t new_unlock_request; + kern_return_t rc; + + if (!object->pager_ready) { + vm_object_assert_wait(object, + VM_OBJECT_EVENT_PAGER_READY, + interruptible); + goto block_and_backoff; + } + + new_unlock_request = m->unlock_request = + (access_required | m->unlock_request); + vm_object_unlock(object); + if ((rc = memory_object_data_unlock( + object->pager, + object->pager_request, + offset + object->paging_offset, + PAGE_SIZE, + new_unlock_request)) + != KERN_SUCCESS) { + printf("vm_fault: memory_object_data_unlock failed\n"); + vm_object_lock(object); + vm_fault_cleanup(object, first_m); + return((rc == MACH_SEND_INTERRUPTED) ? + VM_FAULT_INTERRUPTED : + VM_FAULT_MEMORY_ERROR); + } + vm_object_lock(object); + continue; + } + + PAGE_ASSERT_WAIT(m, interruptible); + goto block_and_backoff; + } + + /* + * We mark the page busy and leave it on + * the pageout queues. If the pageout + * deamon comes across it, then it will + * remove the page. + */ + + if (!software_reference_bits) { + vm_page_lock_queues(); + if (m->inactive) { + vm_stat_sample(SAMPLED_PC_VM_REACTIVATION_FAULTS); + vm_stat.reactivations++; + } + + VM_PAGE_QUEUES_REMOVE(m); + vm_page_unlock_queues(); + } + + assert(!m->busy); + m->busy = TRUE; + assert(!m->absent); + break; + } + + look_for_page = + (object->pager_created) +#if MACH_PAGEMAP + && (vm_external_state_get(object->existence_info, offset + object->paging_offset) != + VM_EXTERNAL_STATE_ABSENT) +#endif MACH_PAGEMAP + ; + + if ((look_for_page || (object == first_object)) + && !must_be_resident) { + /* + * Allocate a new page for this object/offset + * pair. + */ + + m = vm_page_grab_fictitious(); + if (m == VM_PAGE_NULL) { + vm_fault_cleanup(object, first_m); + return(VM_FAULT_FICTITIOUS_SHORTAGE); + } + + vm_page_lock_queues(); + vm_page_insert(m, object, offset); + vm_page_unlock_queues(); + } + + if (look_for_page && !must_be_resident) { + kern_return_t rc; + + /* + * If the memory manager is not ready, we + * cannot make requests. + */ + if (!object->pager_ready) { + vm_object_assert_wait(object, + VM_OBJECT_EVENT_PAGER_READY, + interruptible); + VM_PAGE_FREE(m); + goto block_and_backoff; + } + + if (object->internal) { + /* + * Requests to the default pager + * must reserve a real page in advance, + * because the pager's data-provided + * won't block for pages. + */ + + if (m->fictitious && !vm_page_convert(m)) { + VM_PAGE_FREE(m); + vm_fault_cleanup(object, first_m); + return(VM_FAULT_MEMORY_SHORTAGE); + } + } else if (object->absent_count > + vm_object_absent_max) { + /* + * If there are too many outstanding page + * requests pending on this object, we + * wait for them to be resolved now. + */ + + vm_object_absent_assert_wait(object, interruptible); + VM_PAGE_FREE(m); + goto block_and_backoff; + } + + /* + * Indicate that the page is waiting for data + * from the memory manager. + */ + + m->absent = TRUE; + object->absent_count++; + + /* + * We have a busy page, so we can + * release the object lock. + */ + vm_object_unlock(object); + + /* + * Call the memory manager to retrieve the data. + */ + + vm_stat.pageins++; + vm_stat_sample(SAMPLED_PC_VM_PAGEIN_FAULTS); + + if ((rc = memory_object_data_request(object->pager, + object->pager_request, + m->offset + object->paging_offset, + PAGE_SIZE, access_required)) != KERN_SUCCESS) { + if (rc != MACH_SEND_INTERRUPTED) + printf("%s(0x%x, 0x%x, 0x%x, 0x%x, 0x%x) failed, %d\n", + "memory_object_data_request", + object->pager, + object->pager_request, + m->offset + object->paging_offset, + PAGE_SIZE, access_required, rc); + /* + * Don't want to leave a busy page around, + * but the data request may have blocked, + * so check if it's still there and busy. + */ + vm_object_lock(object); + if (m == vm_page_lookup(object,offset) && + m->absent && m->busy) + VM_PAGE_FREE(m); + vm_fault_cleanup(object, first_m); + return((rc == MACH_SEND_INTERRUPTED) ? + VM_FAULT_INTERRUPTED : + VM_FAULT_MEMORY_ERROR); + } + + /* + * Retry with same object/offset, since new data may + * be in a different page (i.e., m is meaningless at + * this point). + */ + vm_object_lock(object); + continue; + } + + /* + * For the XP system, the only case in which we get here is if + * object has no pager (or unwiring). If the pager doesn't + * have the page this is handled in the m->absent case above + * (and if you change things here you should look above). + */ + if (object == first_object) + first_m = m; + else + { + assert(m == VM_PAGE_NULL); + } + + /* + * Move on to the next object. Lock the next + * object before unlocking the current one. + */ + access_required = VM_PROT_READ; + + offset += object->shadow_offset; + next_object = object->shadow; + if (next_object == VM_OBJECT_NULL) { + assert(!must_be_resident); + + /* + * If there's no object left, fill the page + * in the top object with zeros. But first we + * need to allocate a real page. + */ + + if (object != first_object) { + vm_object_paging_end(object); + vm_object_unlock(object); + + object = first_object; + offset = first_offset; + vm_object_lock(object); + } + + m = first_m; + assert(m->object == object); + first_m = VM_PAGE_NULL; + + if (m->fictitious && !vm_page_convert(m)) { + VM_PAGE_FREE(m); + vm_fault_cleanup(object, VM_PAGE_NULL); + return(VM_FAULT_MEMORY_SHORTAGE); + } + + vm_object_unlock(object); + vm_page_zero_fill(m); + vm_stat_sample(SAMPLED_PC_VM_ZFILL_FAULTS); + vm_stat.zero_fill_count++; + vm_object_lock(object); + pmap_clear_modify(m->phys_addr); + break; + } + else { + vm_object_lock(next_object); + if ((object != first_object) || must_be_resident) + vm_object_paging_end(object); + vm_object_unlock(object); + object = next_object; + vm_object_paging_begin(object); + } + } + + /* + * PAGE HAS BEEN FOUND. + * + * This page (m) is: + * busy, so that we can play with it; + * not absent, so that nobody else will fill it; + * possibly eligible for pageout; + * + * The top-level page (first_m) is: + * VM_PAGE_NULL if the page was found in the + * top-level object; + * busy, not absent, and ineligible for pageout. + * + * The current object (object) is locked. A paging + * reference is held for the current and top-level + * objects. + */ + +#if EXTRA_ASSERTIONS + assert(m->busy && !m->absent); + assert((first_m == VM_PAGE_NULL) || + (first_m->busy && !first_m->absent && + !first_m->active && !first_m->inactive)); +#endif EXTRA_ASSERTIONS + + /* + * If the page is being written, but isn't + * already owned by the top-level object, + * we have to copy it into a new page owned + * by the top-level object. + */ + + if (object != first_object) { + /* + * We only really need to copy if we + * want to write it. + */ + + if (fault_type & VM_PROT_WRITE) { + vm_page_t copy_m; + + assert(!must_be_resident); + + /* + * If we try to collapse first_object at this + * point, we may deadlock when we try to get + * the lock on an intermediate object (since we + * have the bottom object locked). We can't + * unlock the bottom object, because the page + * we found may move (by collapse) if we do. + * + * Instead, we first copy the page. Then, when + * we have no more use for the bottom object, + * we unlock it and try to collapse. + * + * Note that we copy the page even if we didn't + * need to... that's the breaks. + */ + + /* + * Allocate a page for the copy + */ + copy_m = vm_page_grab(); + if (copy_m == VM_PAGE_NULL) { + RELEASE_PAGE(m); + vm_fault_cleanup(object, first_m); + return(VM_FAULT_MEMORY_SHORTAGE); + } + + vm_object_unlock(object); + vm_page_copy(m, copy_m); + vm_object_lock(object); + + /* + * If another map is truly sharing this + * page with us, we have to flush all + * uses of the original page, since we + * can't distinguish those which want the + * original from those which need the + * new copy. + * + * XXXO If we know that only one map has + * access to this page, then we could + * avoid the pmap_page_protect() call. + */ + + vm_page_lock_queues(); + vm_page_deactivate(m); + pmap_page_protect(m->phys_addr, VM_PROT_NONE); + vm_page_unlock_queues(); + + /* + * We no longer need the old page or object. + */ + + PAGE_WAKEUP_DONE(m); + vm_object_paging_end(object); + vm_object_unlock(object); + + vm_stat.cow_faults++; + vm_stat_sample(SAMPLED_PC_VM_COW_FAULTS); + object = first_object; + offset = first_offset; + + vm_object_lock(object); + VM_PAGE_FREE(first_m); + first_m = VM_PAGE_NULL; + assert(copy_m->busy); + vm_page_lock_queues(); + vm_page_insert(copy_m, object, offset); + vm_page_unlock_queues(); + m = copy_m; + + /* + * Now that we've gotten the copy out of the + * way, let's try to collapse the top object. + * But we have to play ugly games with + * paging_in_progress to do that... + */ + + vm_object_paging_end(object); + vm_object_collapse(object); + vm_object_paging_begin(object); + } + else { + *protection &= (~VM_PROT_WRITE); + } + } + + /* + * Now check whether the page needs to be pushed into the + * copy object. The use of asymmetric copy on write for + * shared temporary objects means that we may do two copies to + * satisfy the fault; one above to get the page from a + * shadowed object, and one here to push it into the copy. + */ + + while ((copy_object = first_object->copy) != VM_OBJECT_NULL) { + vm_offset_t copy_offset; + vm_page_t copy_m; + + /* + * If the page is being written, but hasn't been + * copied to the copy-object, we have to copy it there. + */ + + if ((fault_type & VM_PROT_WRITE) == 0) { + *protection &= ~VM_PROT_WRITE; + break; + } + + /* + * If the page was guaranteed to be resident, + * we must have already performed the copy. + */ + + if (must_be_resident) + break; + + /* + * Try to get the lock on the copy_object. + */ + if (!vm_object_lock_try(copy_object)) { + vm_object_unlock(object); + + simple_lock_pause(); /* wait a bit */ + + vm_object_lock(object); + continue; + } + + /* + * Make another reference to the copy-object, + * to keep it from disappearing during the + * copy. + */ + assert(copy_object->ref_count > 0); + copy_object->ref_count++; + + /* + * Does the page exist in the copy? + */ + copy_offset = first_offset - copy_object->shadow_offset; + copy_m = vm_page_lookup(copy_object, copy_offset); + if (copy_m != VM_PAGE_NULL) { + if (copy_m->busy) { + /* + * If the page is being brought + * in, wait for it and then retry. + */ + PAGE_ASSERT_WAIT(copy_m, interruptible); + RELEASE_PAGE(m); + copy_object->ref_count--; + assert(copy_object->ref_count > 0); + vm_object_unlock(copy_object); + goto block_and_backoff; + } + } + else { + /* + * Allocate a page for the copy + */ + copy_m = vm_page_alloc(copy_object, copy_offset); + if (copy_m == VM_PAGE_NULL) { + RELEASE_PAGE(m); + copy_object->ref_count--; + assert(copy_object->ref_count > 0); + vm_object_unlock(copy_object); + vm_fault_cleanup(object, first_m); + return(VM_FAULT_MEMORY_SHORTAGE); + } + + /* + * Must copy page into copy-object. + */ + + vm_page_copy(m, copy_m); + + /* + * If the old page was in use by any users + * of the copy-object, it must be removed + * from all pmaps. (We can't know which + * pmaps use it.) + */ + + vm_page_lock_queues(); + pmap_page_protect(m->phys_addr, VM_PROT_NONE); + copy_m->dirty = TRUE; + vm_page_unlock_queues(); + + /* + * If there's a pager, then immediately + * page out this page, using the "initialize" + * option. Else, we use the copy. + */ + + if (!copy_object->pager_created) { + vm_page_lock_queues(); + vm_page_activate(copy_m); + vm_page_unlock_queues(); + PAGE_WAKEUP_DONE(copy_m); + } else { + /* + * The page is already ready for pageout: + * not on pageout queues and busy. + * Unlock everything except the + * copy_object itself. + */ + + vm_object_unlock(object); + + /* + * Write the page to the copy-object, + * flushing it from the kernel. + */ + + vm_pageout_page(copy_m, TRUE, TRUE); + + /* + * Since the pageout may have + * temporarily dropped the + * copy_object's lock, we + * check whether we'll have + * to deallocate the hard way. + */ + + if ((copy_object->shadow != object) || + (copy_object->ref_count == 1)) { + vm_object_unlock(copy_object); + vm_object_deallocate(copy_object); + vm_object_lock(object); + continue; + } + + /* + * Pick back up the old object's + * lock. [It is safe to do so, + * since it must be deeper in the + * object tree.] + */ + + vm_object_lock(object); + } + + /* + * Because we're pushing a page upward + * in the object tree, we must restart + * any faults that are waiting here. + * [Note that this is an expansion of + * PAGE_WAKEUP that uses the THREAD_RESTART + * wait result]. Can't turn off the page's + * busy bit because we're not done with it. + */ + + if (m->wanted) { + m->wanted = FALSE; + thread_wakeup_with_result((event_t) m, + THREAD_RESTART); + } + } + + /* + * The reference count on copy_object must be + * at least 2: one for our extra reference, + * and at least one from the outside world + * (we checked that when we last locked + * copy_object). + */ + copy_object->ref_count--; + assert(copy_object->ref_count > 0); + vm_object_unlock(copy_object); + + break; + } + + *result_page = m; + *top_page = first_m; + + /* + * If the page can be written, assume that it will be. + * [Earlier, we restrict the permission to allow write + * access only if the fault so required, so we don't + * mark read-only data as dirty.] + */ + + if (vm_fault_dirty_handling && (*protection & VM_PROT_WRITE)) + m->dirty = TRUE; + + return(VM_FAULT_SUCCESS); + + block_and_backoff: + vm_fault_cleanup(object, first_m); + +#ifdef CONTINUATIONS + if (continuation != (void (*)()) 0) { + register vm_fault_state_t *state = + (vm_fault_state_t *) current_thread()->ith_other; + + /* + * Save variables in case we must restart. + */ + + state->vmfp_backoff = TRUE; + state->vmf_prot = *protection; + + counter(c_vm_fault_page_block_backoff_user++); + thread_block(continuation); + } else +#endif /* CONTINUATIONS */ + { + counter(c_vm_fault_page_block_backoff_kernel++); + thread_block((void (*)()) 0); + } + after_block_and_backoff: + if (current_thread()->wait_result == THREAD_AWAKENED) + return VM_FAULT_RETRY; + else + return VM_FAULT_INTERRUPTED; + +#undef RELEASE_PAGE +} + +/* + * Routine: vm_fault + * Purpose: + * Handle page faults, including pseudo-faults + * used to change the wiring status of pages. + * Returns: + * If an explicit (expression) continuation is supplied, + * then we call the continuation instead of returning. + * Implementation: + * Explicit continuations make this a little icky, + * because it hasn't been rewritten to embrace CPS. + * Instead, we have resume arguments for vm_fault and + * vm_fault_page, to let continue the fault computation. + * + * vm_fault and vm_fault_page save mucho state + * in the moral equivalent of a closure. The state + * structure is allocated when first entering vm_fault + * and deallocated when leaving vm_fault. + */ + +#ifdef CONTINUATIONS +void +vm_fault_continue() +{ + register vm_fault_state_t *state = + (vm_fault_state_t *) current_thread()->ith_other; + + (void) vm_fault(state->vmf_map, + state->vmf_vaddr, + state->vmf_fault_type, + state->vmf_change_wiring, + TRUE, state->vmf_continuation); + /*NOTREACHED*/ +} +#endif /* CONTINUATIONS */ + +kern_return_t vm_fault(map, vaddr, fault_type, change_wiring, + resume, continuation) + vm_map_t map; + vm_offset_t vaddr; + vm_prot_t fault_type; + boolean_t change_wiring; + boolean_t resume; + void (*continuation)(); +{ + vm_map_version_t version; /* Map version for verificiation */ + boolean_t wired; /* Should mapping be wired down? */ + vm_object_t object; /* Top-level object */ + vm_offset_t offset; /* Top-level offset */ + vm_prot_t prot; /* Protection for mapping */ + vm_object_t old_copy_object; /* Saved copy object */ + vm_page_t result_page; /* Result of vm_fault_page */ + vm_page_t top_page; /* Placeholder page */ + kern_return_t kr; + + register + vm_page_t m; /* Fast access to result_page */ + +#ifdef CONTINUATIONS + if (resume) { + register vm_fault_state_t *state = + (vm_fault_state_t *) current_thread()->ith_other; + + /* + * Retrieve cached variables and + * continue vm_fault_page. + */ + + object = state->vmf_object; + if (object == VM_OBJECT_NULL) + goto RetryFault; + version = state->vmf_version; + wired = state->vmf_wired; + offset = state->vmf_offset; + prot = state->vmf_prot; + + kr = vm_fault_page(object, offset, fault_type, + (change_wiring && !wired), !change_wiring, + &prot, &result_page, &top_page, + TRUE, vm_fault_continue); + goto after_vm_fault_page; + } + + if (continuation != (void (*)()) 0) { + /* + * We will probably need to save state. + */ + + char * state; + + /* + * if this assignment stmt is written as + * 'active_threads[cpu_number()] = zalloc()', + * cpu_number may be evaluated before zalloc; + * if zalloc blocks, cpu_number will be wrong + */ + + state = (char *) zalloc(vm_fault_state_zone); + current_thread()->ith_other = state; + + } +#else /* not CONTINUATIONS */ + assert(continuation == 0); + assert(!resume); +#endif /* not CONTINUATIONS */ + + RetryFault: ; + + /* + * Find the backing store object and offset into + * it to begin the search. + */ + + if ((kr = vm_map_lookup(&map, vaddr, fault_type, &version, + &object, &offset, + &prot, &wired)) != KERN_SUCCESS) { + goto done; + } + + /* + * If the page is wired, we must fault for the current protection + * value, to avoid further faults. + */ + + if (wired) + fault_type = prot; + + /* + * Make a reference to this object to + * prevent its disposal while we are messing with + * it. Once we have the reference, the map is free + * to be diddled. Since objects reference their + * shadows (and copies), they will stay around as well. + */ + + assert(object->ref_count > 0); + object->ref_count++; + vm_object_paging_begin(object); + +#ifdef CONTINUATIONS + if (continuation != (void (*)()) 0) { + register vm_fault_state_t *state = + (vm_fault_state_t *) current_thread()->ith_other; + + /* + * Save variables, in case vm_fault_page discards + * our kernel stack and we have to restart. + */ + + state->vmf_map = map; + state->vmf_vaddr = vaddr; + state->vmf_fault_type = fault_type; + state->vmf_change_wiring = change_wiring; + state->vmf_continuation = continuation; + + state->vmf_version = version; + state->vmf_wired = wired; + state->vmf_object = object; + state->vmf_offset = offset; + state->vmf_prot = prot; + + kr = vm_fault_page(object, offset, fault_type, + (change_wiring && !wired), !change_wiring, + &prot, &result_page, &top_page, + FALSE, vm_fault_continue); + } else +#endif /* CONTINUATIONS */ + { + kr = vm_fault_page(object, offset, fault_type, + (change_wiring && !wired), !change_wiring, + &prot, &result_page, &top_page, + FALSE, (void (*)()) 0); + } + after_vm_fault_page: + + /* + * If we didn't succeed, lose the object reference immediately. + */ + + if (kr != VM_FAULT_SUCCESS) + vm_object_deallocate(object); + + /* + * See why we failed, and take corrective action. + */ + + switch (kr) { + case VM_FAULT_SUCCESS: + break; + case VM_FAULT_RETRY: + goto RetryFault; + case VM_FAULT_INTERRUPTED: + kr = KERN_SUCCESS; + goto done; + case VM_FAULT_MEMORY_SHORTAGE: +#ifdef CONTINUATIONS + if (continuation != (void (*)()) 0) { + register vm_fault_state_t *state = + (vm_fault_state_t *) current_thread()->ith_other; + + /* + * Save variables in case VM_PAGE_WAIT + * discards our kernel stack. + */ + + state->vmf_map = map; + state->vmf_vaddr = vaddr; + state->vmf_fault_type = fault_type; + state->vmf_change_wiring = change_wiring; + state->vmf_continuation = continuation; + state->vmf_object = VM_OBJECT_NULL; + + VM_PAGE_WAIT(vm_fault_continue); + } else +#endif /* CONTINUATIONS */ + VM_PAGE_WAIT((void (*)()) 0); + goto RetryFault; + case VM_FAULT_FICTITIOUS_SHORTAGE: + vm_page_more_fictitious(); + goto RetryFault; + case VM_FAULT_MEMORY_ERROR: + kr = KERN_MEMORY_ERROR; + goto done; + } + + m = result_page; + + assert((change_wiring && !wired) ? + (top_page == VM_PAGE_NULL) : + ((top_page == VM_PAGE_NULL) == (m->object == object))); + + /* + * How to clean up the result of vm_fault_page. This + * happens whether the mapping is entered or not. + */ + +#define UNLOCK_AND_DEALLOCATE \ + MACRO_BEGIN \ + vm_fault_cleanup(m->object, top_page); \ + vm_object_deallocate(object); \ + MACRO_END + + /* + * What to do with the resulting page from vm_fault_page + * if it doesn't get entered into the physical map: + */ + +#define RELEASE_PAGE(m) \ + MACRO_BEGIN \ + PAGE_WAKEUP_DONE(m); \ + vm_page_lock_queues(); \ + if (!m->active && !m->inactive) \ + vm_page_activate(m); \ + vm_page_unlock_queues(); \ + MACRO_END + + /* + * We must verify that the maps have not changed + * since our last lookup. + */ + + old_copy_object = m->object->copy; + + vm_object_unlock(m->object); + while (!vm_map_verify(map, &version)) { + vm_object_t retry_object; + vm_offset_t retry_offset; + vm_prot_t retry_prot; + + /* + * To avoid trying to write_lock the map while another + * thread has it read_locked (in vm_map_pageable), we + * do not try for write permission. If the page is + * still writable, we will get write permission. If it + * is not, or has been marked needs_copy, we enter the + * mapping without write permission, and will merely + * take another fault. + */ + kr = vm_map_lookup(&map, vaddr, + fault_type & ~VM_PROT_WRITE, &version, + &retry_object, &retry_offset, &retry_prot, + &wired); + + if (kr != KERN_SUCCESS) { + vm_object_lock(m->object); + RELEASE_PAGE(m); + UNLOCK_AND_DEALLOCATE; + goto done; + } + + vm_object_unlock(retry_object); + vm_object_lock(m->object); + + if ((retry_object != object) || + (retry_offset != offset)) { + RELEASE_PAGE(m); + UNLOCK_AND_DEALLOCATE; + goto RetryFault; + } + + /* + * Check whether the protection has changed or the object + * has been copied while we left the map unlocked. + */ + prot &= retry_prot; + vm_object_unlock(m->object); + } + vm_object_lock(m->object); + + /* + * If the copy object changed while the top-level object + * was unlocked, then we must take away write permission. + */ + + if (m->object->copy != old_copy_object) + prot &= ~VM_PROT_WRITE; + + /* + * If we want to wire down this page, but no longer have + * adequate permissions, we must start all over. + */ + + if (wired && (prot != fault_type)) { + vm_map_verify_done(map, &version); + RELEASE_PAGE(m); + UNLOCK_AND_DEALLOCATE; + goto RetryFault; + } + + /* + * It's critically important that a wired-down page be faulted + * only once in each map for which it is wired. + */ + + vm_object_unlock(m->object); + + /* + * Put this page into the physical map. + * We had to do the unlock above because pmap_enter + * may cause other faults. The page may be on + * the pageout queues. If the pageout daemon comes + * across the page, it will remove it from the queues. + */ + + PMAP_ENTER(map->pmap, vaddr, m, prot, wired); + + /* + * If the page is not wired down and isn't already + * on a pageout queue, then put it where the + * pageout daemon can find it. + */ + vm_object_lock(m->object); + vm_page_lock_queues(); + if (change_wiring) { + if (wired) + vm_page_wire(m); + else + vm_page_unwire(m); + } else if (software_reference_bits) { + if (!m->active && !m->inactive) + vm_page_activate(m); + m->reference = TRUE; + } else { + vm_page_activate(m); + } + vm_page_unlock_queues(); + + /* + * Unlock everything, and return + */ + + vm_map_verify_done(map, &version); + PAGE_WAKEUP_DONE(m); + kr = KERN_SUCCESS; + UNLOCK_AND_DEALLOCATE; + +#undef UNLOCK_AND_DEALLOCATE +#undef RELEASE_PAGE + + done: +#ifdef CONTINUATIONS + if (continuation != (void (*)()) 0) { + register vm_fault_state_t *state = + (vm_fault_state_t *) current_thread()->ith_other; + + zfree(vm_fault_state_zone, (vm_offset_t) state); + (*continuation)(kr); + /*NOTREACHED*/ + } +#endif /* CONTINUATIONS */ + + return(kr); +} + +kern_return_t vm_fault_wire_fast(); + +/* + * vm_fault_wire: + * + * Wire down a range of virtual addresses in a map. + */ +void vm_fault_wire(map, entry) + vm_map_t map; + vm_map_entry_t entry; +{ + + register vm_offset_t va; + register pmap_t pmap; + register vm_offset_t end_addr = entry->vme_end; + + pmap = vm_map_pmap(map); + + /* + * Inform the physical mapping system that the + * range of addresses may not fault, so that + * page tables and such can be locked down as well. + */ + + pmap_pageable(pmap, entry->vme_start, end_addr, FALSE); + + /* + * We simulate a fault to get the page and enter it + * in the physical map. + */ + + for (va = entry->vme_start; va < end_addr; va += PAGE_SIZE) { + if (vm_fault_wire_fast(map, va, entry) != KERN_SUCCESS) + (void) vm_fault(map, va, VM_PROT_NONE, TRUE, + FALSE, (void (*)()) 0); + } +} + +/* + * vm_fault_unwire: + * + * Unwire a range of virtual addresses in a map. + */ +void vm_fault_unwire(map, entry) + vm_map_t map; + vm_map_entry_t entry; +{ + register vm_offset_t va; + register pmap_t pmap; + register vm_offset_t end_addr = entry->vme_end; + vm_object_t object; + + pmap = vm_map_pmap(map); + + object = (entry->is_sub_map) + ? VM_OBJECT_NULL : entry->object.vm_object; + + /* + * Since the pages are wired down, we must be able to + * get their mappings from the physical map system. + */ + + for (va = entry->vme_start; va < end_addr; va += PAGE_SIZE) { + pmap_change_wiring(pmap, va, FALSE); + + if (object == VM_OBJECT_NULL) { + vm_map_lock_set_recursive(map); + (void) vm_fault(map, va, VM_PROT_NONE, TRUE, + FALSE, (void (*)()) 0); + vm_map_lock_clear_recursive(map); + } else { + vm_prot_t prot; + vm_page_t result_page; + vm_page_t top_page; + vm_fault_return_t result; + + do { + prot = VM_PROT_NONE; + + vm_object_lock(object); + vm_object_paging_begin(object); + result = vm_fault_page(object, + entry->offset + + (va - entry->vme_start), + VM_PROT_NONE, TRUE, + FALSE, &prot, + &result_page, + &top_page, + FALSE, (void (*)()) 0); + } while (result == VM_FAULT_RETRY); + + if (result != VM_FAULT_SUCCESS) + panic("vm_fault_unwire: failure"); + + vm_page_lock_queues(); + vm_page_unwire(result_page); + vm_page_unlock_queues(); + PAGE_WAKEUP_DONE(result_page); + + vm_fault_cleanup(result_page->object, top_page); + } + } + + /* + * Inform the physical mapping system that the range + * of addresses may fault, so that page tables and + * such may be unwired themselves. + */ + + pmap_pageable(pmap, entry->vme_start, end_addr, TRUE); +} + +/* + * vm_fault_wire_fast: + * + * Handle common case of a wire down page fault at the given address. + * If successful, the page is inserted into the associated physical map. + * The map entry is passed in to avoid the overhead of a map lookup. + * + * NOTE: the given address should be truncated to the + * proper page address. + * + * KERN_SUCCESS is returned if the page fault is handled; otherwise, + * a standard error specifying why the fault is fatal is returned. + * + * The map in question must be referenced, and remains so. + * Caller has a read lock on the map. + * + * This is a stripped version of vm_fault() for wiring pages. Anything + * other than the common case will return KERN_FAILURE, and the caller + * is expected to call vm_fault(). + */ +kern_return_t vm_fault_wire_fast(map, va, entry) + vm_map_t map; + vm_offset_t va; + vm_map_entry_t entry; +{ + vm_object_t object; + vm_offset_t offset; + register vm_page_t m; + vm_prot_t prot; + + vm_stat.faults++; /* needs lock XXX */ +/* + * Recovery actions + */ + +#undef RELEASE_PAGE +#define RELEASE_PAGE(m) { \ + PAGE_WAKEUP_DONE(m); \ + vm_page_lock_queues(); \ + vm_page_unwire(m); \ + vm_page_unlock_queues(); \ +} + + +#undef UNLOCK_THINGS +#define UNLOCK_THINGS { \ + object->paging_in_progress--; \ + vm_object_unlock(object); \ +} + +#undef UNLOCK_AND_DEALLOCATE +#define UNLOCK_AND_DEALLOCATE { \ + UNLOCK_THINGS; \ + vm_object_deallocate(object); \ +} +/* + * Give up and have caller do things the hard way. + */ + +#define GIVE_UP { \ + UNLOCK_AND_DEALLOCATE; \ + return(KERN_FAILURE); \ +} + + + /* + * If this entry is not directly to a vm_object, bail out. + */ + if (entry->is_sub_map) + return(KERN_FAILURE); + + /* + * Find the backing store object and offset into it. + */ + + object = entry->object.vm_object; + offset = (va - entry->vme_start) + entry->offset; + prot = entry->protection; + + /* + * Make a reference to this object to prevent its + * disposal while we are messing with it. + */ + + vm_object_lock(object); + assert(object->ref_count > 0); + object->ref_count++; + object->paging_in_progress++; + + /* + * INVARIANTS (through entire routine): + * + * 1) At all times, we must either have the object + * lock or a busy page in some object to prevent + * some other thread from trying to bring in + * the same page. + * + * 2) Once we have a busy page, we must remove it from + * the pageout queues, so that the pageout daemon + * will not grab it away. + * + */ + + /* + * Look for page in top-level object. If it's not there or + * there's something going on, give up. + */ + m = vm_page_lookup(object, offset); + if ((m == VM_PAGE_NULL) || (m->error) || + (m->busy) || (m->absent) || (prot & m->page_lock)) { + GIVE_UP; + } + + /* + * Wire the page down now. All bail outs beyond this + * point must unwire the page. + */ + + vm_page_lock_queues(); + vm_page_wire(m); + vm_page_unlock_queues(); + + /* + * Mark page busy for other threads. + */ + assert(!m->busy); + m->busy = TRUE; + assert(!m->absent); + + /* + * Give up if the page is being written and there's a copy object + */ + if ((object->copy != VM_OBJECT_NULL) && (prot & VM_PROT_WRITE)) { + RELEASE_PAGE(m); + GIVE_UP; + } + + /* + * Put this page into the physical map. + * We have to unlock the object because pmap_enter + * may cause other faults. + */ + vm_object_unlock(object); + + PMAP_ENTER(map->pmap, va, m, prot, TRUE); + + /* + * Must relock object so that paging_in_progress can be cleared. + */ + vm_object_lock(object); + + /* + * Unlock everything, and return + */ + + PAGE_WAKEUP_DONE(m); + UNLOCK_AND_DEALLOCATE; + + return(KERN_SUCCESS); + +} + +/* + * Routine: vm_fault_copy_cleanup + * Purpose: + * Release a page used by vm_fault_copy. + */ + +void vm_fault_copy_cleanup(page, top_page) + vm_page_t page; + vm_page_t top_page; +{ + vm_object_t object = page->object; + + vm_object_lock(object); + PAGE_WAKEUP_DONE(page); + vm_page_lock_queues(); + if (!page->active && !page->inactive) + vm_page_activate(page); + vm_page_unlock_queues(); + vm_fault_cleanup(object, top_page); +} + +/* + * Routine: vm_fault_copy + * + * Purpose: + * Copy pages from one virtual memory object to another -- + * neither the source nor destination pages need be resident. + * + * Before actually copying a page, the version associated with + * the destination address map wil be verified. + * + * In/out conditions: + * The caller must hold a reference, but not a lock, to + * each of the source and destination objects and to the + * destination map. + * + * Results: + * Returns KERN_SUCCESS if no errors were encountered in + * reading or writing the data. Returns KERN_INTERRUPTED if + * the operation was interrupted (only possible if the + * "interruptible" argument is asserted). Other return values + * indicate a permanent error in copying the data. + * + * The actual amount of data copied will be returned in the + * "copy_size" argument. In the event that the destination map + * verification failed, this amount may be less than the amount + * requested. + */ +kern_return_t vm_fault_copy( + src_object, + src_offset, + src_size, + dst_object, + dst_offset, + dst_map, + dst_version, + interruptible + ) + vm_object_t src_object; + vm_offset_t src_offset; + vm_size_t *src_size; /* INOUT */ + vm_object_t dst_object; + vm_offset_t dst_offset; + vm_map_t dst_map; + vm_map_version_t *dst_version; + boolean_t interruptible; +{ + vm_page_t result_page; + vm_prot_t prot; + + vm_page_t src_page; + vm_page_t src_top_page; + + vm_page_t dst_page; + vm_page_t dst_top_page; + + vm_size_t amount_done; + vm_object_t old_copy_object; + +#define RETURN(x) \ + MACRO_BEGIN \ + *src_size = amount_done; \ + MACRO_RETURN(x); \ + MACRO_END + + amount_done = 0; + do { /* while (amount_done != *src_size) */ + + RetrySourceFault: ; + + if (src_object == VM_OBJECT_NULL) { + /* + * No source object. We will just + * zero-fill the page in dst_object. + */ + + src_page = VM_PAGE_NULL; + } else { + prot = VM_PROT_READ; + + vm_object_lock(src_object); + vm_object_paging_begin(src_object); + + switch (vm_fault_page(src_object, src_offset, + VM_PROT_READ, FALSE, interruptible, + &prot, &result_page, &src_top_page, + FALSE, (void (*)()) 0)) { + + case VM_FAULT_SUCCESS: + break; + case VM_FAULT_RETRY: + goto RetrySourceFault; + case VM_FAULT_INTERRUPTED: + RETURN(MACH_SEND_INTERRUPTED); + case VM_FAULT_MEMORY_SHORTAGE: + VM_PAGE_WAIT((void (*)()) 0); + goto RetrySourceFault; + case VM_FAULT_FICTITIOUS_SHORTAGE: + vm_page_more_fictitious(); + goto RetrySourceFault; + case VM_FAULT_MEMORY_ERROR: + return(KERN_MEMORY_ERROR); + } + + src_page = result_page; + + assert((src_top_page == VM_PAGE_NULL) == + (src_page->object == src_object)); + + assert ((prot & VM_PROT_READ) != VM_PROT_NONE); + + vm_object_unlock(src_page->object); + } + + RetryDestinationFault: ; + + prot = VM_PROT_WRITE; + + vm_object_lock(dst_object); + vm_object_paging_begin(dst_object); + + switch (vm_fault_page(dst_object, dst_offset, VM_PROT_WRITE, + FALSE, FALSE /* interruptible */, + &prot, &result_page, &dst_top_page, + FALSE, (void (*)()) 0)) { + + case VM_FAULT_SUCCESS: + break; + case VM_FAULT_RETRY: + goto RetryDestinationFault; + case VM_FAULT_INTERRUPTED: + if (src_page != VM_PAGE_NULL) + vm_fault_copy_cleanup(src_page, + src_top_page); + RETURN(MACH_SEND_INTERRUPTED); + case VM_FAULT_MEMORY_SHORTAGE: + VM_PAGE_WAIT((void (*)()) 0); + goto RetryDestinationFault; + case VM_FAULT_FICTITIOUS_SHORTAGE: + vm_page_more_fictitious(); + goto RetryDestinationFault; + case VM_FAULT_MEMORY_ERROR: + if (src_page != VM_PAGE_NULL) + vm_fault_copy_cleanup(src_page, + src_top_page); + return(KERN_MEMORY_ERROR); + } + assert ((prot & VM_PROT_WRITE) != VM_PROT_NONE); + + dst_page = result_page; + + old_copy_object = dst_page->object->copy; + + vm_object_unlock(dst_page->object); + + if (!vm_map_verify(dst_map, dst_version)) { + + BailOut: ; + + if (src_page != VM_PAGE_NULL) + vm_fault_copy_cleanup(src_page, src_top_page); + vm_fault_copy_cleanup(dst_page, dst_top_page); + break; + } + + + vm_object_lock(dst_page->object); + if (dst_page->object->copy != old_copy_object) { + vm_object_unlock(dst_page->object); + vm_map_verify_done(dst_map, dst_version); + goto BailOut; + } + vm_object_unlock(dst_page->object); + + /* + * Copy the page, and note that it is dirty + * immediately. + */ + + if (src_page == VM_PAGE_NULL) + vm_page_zero_fill(dst_page); + else + vm_page_copy(src_page, dst_page); + dst_page->dirty = TRUE; + + /* + * Unlock everything, and return + */ + + vm_map_verify_done(dst_map, dst_version); + + if (src_page != VM_PAGE_NULL) + vm_fault_copy_cleanup(src_page, src_top_page); + vm_fault_copy_cleanup(dst_page, dst_top_page); + + amount_done += PAGE_SIZE; + src_offset += PAGE_SIZE; + dst_offset += PAGE_SIZE; + + } while (amount_done != *src_size); + + RETURN(KERN_SUCCESS); +#undef RETURN + + /*NOTREACHED*/ +} + + + + + +#ifdef notdef + +/* + * Routine: vm_fault_page_overwrite + * + * Description: + * A form of vm_fault_page that assumes that the + * resulting page will be overwritten in its entirety, + * making it unnecessary to obtain the correct *contents* + * of the page. + * + * Implementation: + * XXX Untested. Also unused. Eventually, this technology + * could be used in vm_fault_copy() to advantage. + */ +vm_fault_return_t vm_fault_page_overwrite(dst_object, dst_offset, result_page) + register + vm_object_t dst_object; + vm_offset_t dst_offset; + vm_page_t *result_page; /* OUT */ +{ + register + vm_page_t dst_page; + +#define interruptible FALSE /* XXX */ + + while (TRUE) { + /* + * Look for a page at this offset + */ + + while ((dst_page = vm_page_lookup(dst_object, dst_offset)) + == VM_PAGE_NULL) { + /* + * No page, no problem... just allocate one. + */ + + dst_page = vm_page_alloc(dst_object, dst_offset); + if (dst_page == VM_PAGE_NULL) { + vm_object_unlock(dst_object); + VM_PAGE_WAIT((void (*)()) 0); + vm_object_lock(dst_object); + continue; + } + + /* + * Pretend that the memory manager + * write-protected the page. + * + * Note that we will be asking for write + * permission without asking for the data + * first. + */ + + dst_page->overwriting = TRUE; + dst_page->page_lock = VM_PROT_WRITE; + dst_page->absent = TRUE; + dst_object->absent_count++; + + break; + + /* + * When we bail out, we might have to throw + * away the page created here. + */ + +#define DISCARD_PAGE \ + MACRO_BEGIN \ + vm_object_lock(dst_object); \ + dst_page = vm_page_lookup(dst_object, dst_offset); \ + if ((dst_page != VM_PAGE_NULL) && dst_page->overwriting) \ + VM_PAGE_FREE(dst_page); \ + vm_object_unlock(dst_object); \ + MACRO_END + } + + /* + * If the page is write-protected... + */ + + if (dst_page->page_lock & VM_PROT_WRITE) { + /* + * ... and an unlock request hasn't been sent + */ + + if ( ! (dst_page->unlock_request & VM_PROT_WRITE)) { + vm_prot_t u; + kern_return_t rc; + + /* + * ... then send one now. + */ + + if (!dst_object->pager_ready) { + vm_object_assert_wait(dst_object, + VM_OBJECT_EVENT_PAGER_READY, + interruptible); + vm_object_unlock(dst_object); + thread_block((void (*)()) 0); + if (current_thread()->wait_result != + THREAD_AWAKENED) { + DISCARD_PAGE; + return(VM_FAULT_INTERRUPTED); + } + continue; + } + + u = dst_page->unlock_request |= VM_PROT_WRITE; + vm_object_unlock(dst_object); + + if ((rc = memory_object_data_unlock( + dst_object->pager, + dst_object->pager_request, + dst_offset + dst_object->paging_offset, + PAGE_SIZE, + u)) != KERN_SUCCESS) { + printf("vm_object_overwrite: memory_object_data_unlock failed\n"); + DISCARD_PAGE; + return((rc == MACH_SEND_INTERRUPTED) ? + VM_FAULT_INTERRUPTED : + VM_FAULT_MEMORY_ERROR); + } + vm_object_lock(dst_object); + continue; + } + + /* ... fall through to wait below */ + } else { + /* + * If the page isn't being used for other + * purposes, then we're done. + */ + if ( ! (dst_page->busy || dst_page->absent || dst_page->error) ) + break; + } + + PAGE_ASSERT_WAIT(dst_page, interruptible); + vm_object_unlock(dst_object); + thread_block((void (*)()) 0); + if (current_thread()->wait_result != THREAD_AWAKENED) { + DISCARD_PAGE; + return(VM_FAULT_INTERRUPTED); + } + } + + *result_page = dst_page; + return(VM_FAULT_SUCCESS); + +#undef interruptible +#undef DISCARD_PAGE +} + +#endif notdef |