aboutsummaryrefslogtreecommitdiff
path: root/vm
diff options
context:
space:
mode:
authorThomas Bushnell <thomas@gnu.org>1997-02-25 21:28:37 +0000
committerThomas Bushnell <thomas@gnu.org>1997-02-25 21:28:37 +0000
commitf07a4c844da9f0ecae5bbee1ab94be56505f26f7 (patch)
tree12b07c7e578fc1a5f53dbfde2632408491ff2a70 /vm
downloadgnumach-f07a4c844da9f0ecae5bbee1ab94be56505f26f7.tar.gz
gnumach-f07a4c844da9f0ecae5bbee1ab94be56505f26f7.tar.bz2
gnumach-f07a4c844da9f0ecae5bbee1ab94be56505f26f7.zip
Initial source
Diffstat (limited to 'vm')
-rw-r--r--vm/memory_object.c1191
-rw-r--r--vm/memory_object.h43
-rw-r--r--vm/memory_object_default.cli28
-rw-r--r--vm/memory_object_user.cli28
-rw-r--r--vm/pmap.h267
-rw-r--r--vm/vm_debug.c499
-rw-r--r--vm/vm_external.c159
-rw-r--r--vm/vm_external.h89
-rw-r--r--vm/vm_fault.c2182
-rw-r--r--vm/vm_fault.h64
-rw-r--r--vm/vm_init.c84
-rw-r--r--vm/vm_kern.c1072
-rw-r--r--vm/vm_kern.h63
-rw-r--r--vm/vm_map.c5244
-rw-r--r--vm/vm_map.h448
-rw-r--r--vm/vm_object.c3090
-rw-r--r--vm/vm_object.h374
-rw-r--r--vm/vm_page.h322
-rw-r--r--vm/vm_pageout.c924
-rw-r--r--vm/vm_pageout.h46
-rw-r--r--vm/vm_resident.c1505
-rw-r--r--vm/vm_user.c397
-rw-r--r--vm/vm_user.h50
23 files changed, 18169 insertions, 0 deletions
diff --git a/vm/memory_object.c b/vm/memory_object.c
new file mode 100644
index 00000000..a2b0bed8
--- /dev/null
+++ b/vm/memory_object.c
@@ -0,0 +1,1191 @@
+/*
+ * Mach Operating System
+ * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University.
+ * Copyright (c) 1993,1994 The University of Utah and
+ * the Computer Systems Laboratory (CSL).
+ * All rights reserved.
+ *
+ * Permission to use, copy, modify and distribute this software and its
+ * documentation is hereby granted, provided that both the copyright
+ * notice and this permission notice appear in all copies of the
+ * software, derivative works or modified versions, and any portions
+ * thereof, and that both notices appear in supporting documentation.
+ *
+ * CARNEGIE MELLON, THE UNIVERSITY OF UTAH AND CSL ALLOW FREE USE OF
+ * THIS SOFTWARE IN ITS "AS IS" CONDITION, AND DISCLAIM ANY LIABILITY
+ * OF ANY KIND FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF
+ * THIS SOFTWARE.
+ *
+ * Carnegie Mellon requests users of this software to return to
+ *
+ * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
+ * School of Computer Science
+ * Carnegie Mellon University
+ * Pittsburgh PA 15213-3890
+ *
+ * any improvements or extensions that they make and grant Carnegie Mellon
+ * the rights to redistribute these changes.
+ */
+/*
+ * File: vm/memory_object.c
+ * Author: Michael Wayne Young
+ *
+ * External memory management interface control functions.
+ */
+
+/*
+ * Interface dependencies:
+ */
+
+#include <mach/std_types.h> /* For pointer_t */
+#include <mach/mach_types.h>
+
+#include <mach/kern_return.h>
+#include <vm/vm_object.h>
+#include <mach/memory_object.h>
+#include <mach/boolean.h>
+#include <mach/vm_prot.h>
+#include <mach/message.h>
+
+#include "memory_object_user.h"
+#include "memory_object_default.h"
+
+/*
+ * Implementation dependencies:
+ */
+#include <vm/memory_object.h>
+#include <vm/vm_page.h>
+#include <vm/vm_pageout.h>
+#include <vm/pmap.h> /* For copy_to_phys, pmap_clear_modify */
+#include <kern/thread.h> /* For current_thread() */
+#include <kern/host.h>
+#include <vm/vm_kern.h> /* For kernel_map, vm_move */
+#include <vm/vm_map.h> /* For vm_map_pageable */
+#include <ipc/ipc_port.h>
+
+#include <norma_vm.h>
+#include <norma_ipc.h>
+#if NORMA_VM
+#include <norma/xmm_server_rename.h>
+#endif NORMA_VM
+#include <mach_pagemap.h>
+#if MACH_PAGEMAP
+#include <vm/vm_external.h>
+#endif MACH_PAGEMAP
+
+typedef int memory_object_lock_result_t; /* moved from below */
+
+
+ipc_port_t memory_manager_default = IP_NULL;
+decl_simple_lock_data(,memory_manager_default_lock)
+
+/*
+ * Important note:
+ * All of these routines gain a reference to the
+ * object (first argument) as part of the automatic
+ * argument conversion. Explicit deallocation is necessary.
+ */
+
+#if !NORMA_VM
+/*
+ * If successful, destroys the map copy object.
+ */
+kern_return_t memory_object_data_provided(object, offset, data, data_cnt,
+ lock_value)
+ vm_object_t object;
+ vm_offset_t offset;
+ pointer_t data;
+ unsigned int data_cnt;
+ vm_prot_t lock_value;
+{
+ return memory_object_data_supply(object, offset, (vm_map_copy_t) data,
+ data_cnt, lock_value, FALSE, IP_NULL,
+ 0);
+}
+#endif !NORMA_VM
+
+
+kern_return_t memory_object_data_supply(object, offset, data_copy, data_cnt,
+ lock_value, precious, reply_to, reply_to_type)
+ register
+ vm_object_t object;
+ register
+ vm_offset_t offset;
+ vm_map_copy_t data_copy;
+ unsigned int data_cnt;
+ vm_prot_t lock_value;
+ boolean_t precious;
+ ipc_port_t reply_to;
+ mach_msg_type_name_t reply_to_type;
+{
+ kern_return_t result = KERN_SUCCESS;
+ vm_offset_t error_offset = 0;
+ register
+ vm_page_t m;
+ register
+ vm_page_t data_m;
+ vm_size_t original_length;
+ vm_offset_t original_offset;
+ vm_page_t *page_list;
+ boolean_t was_absent;
+ vm_map_copy_t orig_copy = data_copy;
+
+ /*
+ * Look for bogus arguments
+ */
+
+ if (object == VM_OBJECT_NULL) {
+ return(KERN_INVALID_ARGUMENT);
+ }
+
+ if (lock_value & ~VM_PROT_ALL) {
+ vm_object_deallocate(object);
+ return(KERN_INVALID_ARGUMENT);
+ }
+
+ if ((data_cnt % PAGE_SIZE) != 0) {
+ vm_object_deallocate(object);
+ return(KERN_INVALID_ARGUMENT);
+ }
+
+ /*
+ * Adjust the offset from the memory object to the offset
+ * within the vm_object.
+ */
+
+ original_length = data_cnt;
+ original_offset = offset;
+
+ assert(data_copy->type == VM_MAP_COPY_PAGE_LIST);
+ page_list = &data_copy->cpy_page_list[0];
+
+ vm_object_lock(object);
+ vm_object_paging_begin(object);
+ offset -= object->paging_offset;
+
+ /*
+ * Loop over copy stealing pages for pagein.
+ */
+
+ for (; data_cnt > 0 ; data_cnt -= PAGE_SIZE, offset += PAGE_SIZE) {
+
+ assert(data_copy->cpy_npages > 0);
+ data_m = *page_list;
+
+ if (data_m == VM_PAGE_NULL || data_m->tabled ||
+ data_m->error || data_m->absent || data_m->fictitious) {
+
+ panic("Data_supply: bad page");
+ }
+
+ /*
+ * Look up target page and check its state.
+ */
+
+retry_lookup:
+ m = vm_page_lookup(object,offset);
+ if (m == VM_PAGE_NULL) {
+ was_absent = FALSE;
+ }
+ else {
+ if (m->absent && m->busy) {
+
+ /*
+ * Page was requested. Free the busy
+ * page waiting for it. Insertion
+ * of new page happens below.
+ */
+
+ VM_PAGE_FREE(m);
+ was_absent = TRUE;
+ }
+ else {
+
+ /*
+ * Have to wait for page that is busy and
+ * not absent. This is probably going to
+ * be an error, but go back and check.
+ */
+ if (m->busy) {
+ PAGE_ASSERT_WAIT(m, FALSE);
+ vm_object_unlock(object);
+ thread_block((void (*)()) 0);
+ vm_object_lock(object);
+ goto retry_lookup;
+ }
+
+ /*
+ * Page already present; error.
+ * This is an error if data is precious.
+ */
+ result = KERN_MEMORY_PRESENT;
+ error_offset = offset + object->paging_offset;
+
+ break;
+ }
+ }
+
+ /*
+ * Ok to pagein page. Target object now has no page
+ * at offset. Set the page parameters, then drop
+ * in new page and set up pageout state. Object is
+ * still locked here.
+ *
+ * Must clear busy bit in page before inserting it.
+ * Ok to skip wakeup logic because nobody else
+ * can possibly know about this page.
+ */
+
+ data_m->busy = FALSE;
+ data_m->dirty = FALSE;
+ pmap_clear_modify(data_m->phys_addr);
+
+ data_m->page_lock = lock_value;
+ data_m->unlock_request = VM_PROT_NONE;
+ data_m->precious = precious;
+
+ vm_page_lock_queues();
+ vm_page_insert(data_m, object, offset);
+
+ if (was_absent)
+ vm_page_activate(data_m);
+ else
+ vm_page_deactivate(data_m);
+
+ vm_page_unlock_queues();
+
+ /*
+ * Null out this page list entry, and advance to next
+ * page.
+ */
+
+ *page_list++ = VM_PAGE_NULL;
+
+ if (--(data_copy->cpy_npages) == 0 &&
+ vm_map_copy_has_cont(data_copy)) {
+ vm_map_copy_t new_copy;
+
+ vm_object_unlock(object);
+
+ vm_map_copy_invoke_cont(data_copy, &new_copy, &result);
+
+ if (result == KERN_SUCCESS) {
+
+ /*
+ * Consume on success requires that
+ * we keep the original vm_map_copy
+ * around in case something fails.
+ * Free the old copy if it's not the original
+ */
+ if (data_copy != orig_copy) {
+ vm_map_copy_discard(data_copy);
+ }
+
+ if ((data_copy = new_copy) != VM_MAP_COPY_NULL)
+ page_list = &data_copy->cpy_page_list[0];
+
+ vm_object_lock(object);
+ }
+ else {
+ vm_object_lock(object);
+ error_offset = offset + object->paging_offset +
+ PAGE_SIZE;
+ break;
+ }
+ }
+ }
+
+ /*
+ * Send reply if one was requested.
+ */
+ vm_object_paging_end(object);
+ vm_object_unlock(object);
+
+ if (vm_map_copy_has_cont(data_copy))
+ vm_map_copy_abort_cont(data_copy);
+
+ if (IP_VALID(reply_to)) {
+ memory_object_supply_completed(
+ reply_to, reply_to_type,
+ object->pager_request,
+ original_offset,
+ original_length,
+ result,
+ error_offset);
+ }
+
+ vm_object_deallocate(object);
+
+ /*
+ * Consume on success: The final data copy must be
+ * be discarded if it is not the original. The original
+ * gets discarded only if this routine succeeds.
+ */
+ if (data_copy != orig_copy)
+ vm_map_copy_discard(data_copy);
+ if (result == KERN_SUCCESS)
+ vm_map_copy_discard(orig_copy);
+
+
+ return(result);
+}
+
+kern_return_t memory_object_data_error(object, offset, size, error_value)
+ vm_object_t object;
+ vm_offset_t offset;
+ vm_size_t size;
+ kern_return_t error_value;
+{
+ if (object == VM_OBJECT_NULL)
+ return(KERN_INVALID_ARGUMENT);
+
+ if (size != round_page(size))
+ return(KERN_INVALID_ARGUMENT);
+
+#ifdef lint
+ /* Error value is ignored at this time */
+ error_value++;
+#endif
+
+ vm_object_lock(object);
+ offset -= object->paging_offset;
+
+ while (size != 0) {
+ register vm_page_t m;
+
+ m = vm_page_lookup(object, offset);
+ if ((m != VM_PAGE_NULL) && m->busy && m->absent) {
+ m->error = TRUE;
+ m->absent = FALSE;
+ vm_object_absent_release(object);
+
+ PAGE_WAKEUP_DONE(m);
+
+ vm_page_lock_queues();
+ vm_page_activate(m);
+ vm_page_unlock_queues();
+ }
+
+ size -= PAGE_SIZE;
+ offset += PAGE_SIZE;
+ }
+ vm_object_unlock(object);
+
+ vm_object_deallocate(object);
+ return(KERN_SUCCESS);
+}
+
+kern_return_t memory_object_data_unavailable(object, offset, size)
+ vm_object_t object;
+ vm_offset_t offset;
+ vm_size_t size;
+{
+#if MACH_PAGEMAP
+ vm_external_t existence_info = VM_EXTERNAL_NULL;
+#endif MACH_PAGEMAP
+
+ if (object == VM_OBJECT_NULL)
+ return(KERN_INVALID_ARGUMENT);
+
+ if (size != round_page(size))
+ return(KERN_INVALID_ARGUMENT);
+
+#if MACH_PAGEMAP
+ if ((offset == 0) && (size > VM_EXTERNAL_LARGE_SIZE) &&
+ (object->existence_info == VM_EXTERNAL_NULL)) {
+ existence_info = vm_external_create(VM_EXTERNAL_SMALL_SIZE);
+ }
+#endif MACH_PAGEMAP
+
+ vm_object_lock(object);
+#if MACH_PAGEMAP
+ if (existence_info != VM_EXTERNAL_NULL) {
+ object->existence_info = existence_info;
+ }
+ if ((offset == 0) && (size > VM_EXTERNAL_LARGE_SIZE)) {
+ vm_object_unlock(object);
+ vm_object_deallocate(object);
+ return(KERN_SUCCESS);
+ }
+#endif MACH_PAGEMAP
+ offset -= object->paging_offset;
+
+ while (size != 0) {
+ register vm_page_t m;
+
+ /*
+ * We're looking for pages that are both busy and
+ * absent (waiting to be filled), converting them
+ * to just absent.
+ *
+ * Pages that are just busy can be ignored entirely.
+ */
+
+ m = vm_page_lookup(object, offset);
+ if ((m != VM_PAGE_NULL) && m->busy && m->absent) {
+ PAGE_WAKEUP_DONE(m);
+
+ vm_page_lock_queues();
+ vm_page_activate(m);
+ vm_page_unlock_queues();
+ }
+ size -= PAGE_SIZE;
+ offset += PAGE_SIZE;
+ }
+
+ vm_object_unlock(object);
+
+ vm_object_deallocate(object);
+ return(KERN_SUCCESS);
+}
+
+/*
+ * Routine: memory_object_lock_page
+ *
+ * Description:
+ * Perform the appropriate lock operations on the
+ * given page. See the description of
+ * "memory_object_lock_request" for the meanings
+ * of the arguments.
+ *
+ * Returns an indication that the operation
+ * completed, blocked, or that the page must
+ * be cleaned.
+ */
+
+#define MEMORY_OBJECT_LOCK_RESULT_DONE 0
+#define MEMORY_OBJECT_LOCK_RESULT_MUST_BLOCK 1
+#define MEMORY_OBJECT_LOCK_RESULT_MUST_CLEAN 2
+#define MEMORY_OBJECT_LOCK_RESULT_MUST_RETURN 3
+
+memory_object_lock_result_t memory_object_lock_page(m, should_return,
+ should_flush, prot)
+ vm_page_t m;
+ memory_object_return_t should_return;
+ boolean_t should_flush;
+ vm_prot_t prot;
+{
+ /*
+ * Don't worry about pages for which the kernel
+ * does not have any data.
+ */
+
+ if (m->absent)
+ return(MEMORY_OBJECT_LOCK_RESULT_DONE);
+
+ /*
+ * If we cannot change access to the page,
+ * either because a mapping is in progress
+ * (busy page) or because a mapping has been
+ * wired, then give up.
+ */
+
+ if (m->busy)
+ return(MEMORY_OBJECT_LOCK_RESULT_MUST_BLOCK);
+
+ assert(!m->fictitious);
+
+ if (m->wire_count != 0) {
+ /*
+ * If no change would take place
+ * anyway, return successfully.
+ *
+ * No change means:
+ * Not flushing AND
+ * No change to page lock [2 checks] AND
+ * Don't need to send page to manager
+ *
+ * Don't need to send page to manager means:
+ * No clean or return request OR (
+ * Page is not dirty [2 checks] AND (
+ * Page is not precious OR
+ * No request to return precious pages ))
+ *
+ * Now isn't that straightforward and obvious ?? ;-)
+ *
+ * XXX This doesn't handle sending a copy of a wired
+ * XXX page to the pager, but that will require some
+ * XXX significant surgery.
+ */
+
+ if (!should_flush &&
+ ((m->page_lock == prot) || (prot == VM_PROT_NO_CHANGE)) &&
+ ((should_return == MEMORY_OBJECT_RETURN_NONE) ||
+ (!m->dirty && !pmap_is_modified(m->phys_addr) &&
+ (!m->precious ||
+ should_return != MEMORY_OBJECT_RETURN_ALL)))) {
+ /*
+ * Restart page unlock requests,
+ * even though no change took place.
+ * [Memory managers may be expecting
+ * to see new requests.]
+ */
+ m->unlock_request = VM_PROT_NONE;
+ PAGE_WAKEUP(m);
+
+ return(MEMORY_OBJECT_LOCK_RESULT_DONE);
+ }
+
+ return(MEMORY_OBJECT_LOCK_RESULT_MUST_BLOCK);
+ }
+
+ /*
+ * If the page is to be flushed, allow
+ * that to be done as part of the protection.
+ */
+
+ if (should_flush)
+ prot = VM_PROT_ALL;
+
+ /*
+ * Set the page lock.
+ *
+ * If we are decreasing permission, do it now;
+ * let the fault handler take care of increases
+ * (pmap_page_protect may not increase protection).
+ */
+
+ if (prot != VM_PROT_NO_CHANGE) {
+ if ((m->page_lock ^ prot) & prot) {
+ pmap_page_protect(m->phys_addr, VM_PROT_ALL & ~prot);
+ }
+ m->page_lock = prot;
+
+ /*
+ * Restart any past unlock requests, even if no
+ * change resulted. If the manager explicitly
+ * requested no protection change, then it is assumed
+ * to be remembering past requests.
+ */
+
+ m->unlock_request = VM_PROT_NONE;
+ PAGE_WAKEUP(m);
+ }
+
+ /*
+ * Handle cleaning.
+ */
+
+ if (should_return != MEMORY_OBJECT_RETURN_NONE) {
+ /*
+ * Check whether the page is dirty. If
+ * write permission has not been removed,
+ * this may have unpredictable results.
+ */
+
+ if (!m->dirty)
+ m->dirty = pmap_is_modified(m->phys_addr);
+
+ if (m->dirty || (m->precious &&
+ should_return == MEMORY_OBJECT_RETURN_ALL)) {
+ /*
+ * If we weren't planning
+ * to flush the page anyway,
+ * we may need to remove the
+ * page from the pageout
+ * system and from physical
+ * maps now.
+ */
+
+ vm_page_lock_queues();
+ VM_PAGE_QUEUES_REMOVE(m);
+ vm_page_unlock_queues();
+
+ if (!should_flush)
+ pmap_page_protect(m->phys_addr,
+ VM_PROT_NONE);
+
+ /*
+ * Cleaning a page will cause
+ * it to be flushed.
+ */
+
+ if (m->dirty)
+ return(MEMORY_OBJECT_LOCK_RESULT_MUST_CLEAN);
+ else
+ return(MEMORY_OBJECT_LOCK_RESULT_MUST_RETURN);
+ }
+ }
+
+ /*
+ * Handle flushing
+ */
+
+ if (should_flush) {
+ VM_PAGE_FREE(m);
+ } else {
+ extern boolean_t vm_page_deactivate_hint;
+
+ /*
+ * XXX Make clean but not flush a paging hint,
+ * and deactivate the pages. This is a hack
+ * because it overloads flush/clean with
+ * implementation-dependent meaning. This only
+ * happens to pages that are already clean.
+ */
+
+ if (vm_page_deactivate_hint &&
+ (should_return != MEMORY_OBJECT_RETURN_NONE)) {
+ vm_page_lock_queues();
+ vm_page_deactivate(m);
+ vm_page_unlock_queues();
+ }
+ }
+
+ return(MEMORY_OBJECT_LOCK_RESULT_DONE);
+}
+
+/*
+ * Routine: memory_object_lock_request [user interface]
+ *
+ * Description:
+ * Control use of the data associated with the given
+ * memory object. For each page in the given range,
+ * perform the following operations, in order:
+ * 1) restrict access to the page (disallow
+ * forms specified by "prot");
+ * 2) return data to the manager (if "should_return"
+ * is RETURN_DIRTY and the page is dirty, or
+ * "should_return" is RETURN_ALL and the page
+ * is either dirty or precious); and,
+ * 3) flush the cached copy (if "should_flush"
+ * is asserted).
+ * The set of pages is defined by a starting offset
+ * ("offset") and size ("size"). Only pages with the
+ * same page alignment as the starting offset are
+ * considered.
+ *
+ * A single acknowledgement is sent (to the "reply_to"
+ * port) when these actions are complete. If successful,
+ * the naked send right for reply_to is consumed.
+ */
+
+kern_return_t
+memory_object_lock_request(object, offset, size,
+ should_return, should_flush, prot,
+ reply_to, reply_to_type)
+ register vm_object_t object;
+ register vm_offset_t offset;
+ register vm_size_t size;
+ memory_object_return_t should_return;
+ boolean_t should_flush;
+ vm_prot_t prot;
+ ipc_port_t reply_to;
+ mach_msg_type_name_t reply_to_type;
+{
+ register vm_page_t m;
+ vm_offset_t original_offset = offset;
+ vm_size_t original_size = size;
+ vm_offset_t paging_offset = 0;
+ vm_object_t new_object = VM_OBJECT_NULL;
+ vm_offset_t new_offset = 0;
+ vm_offset_t last_offset = offset;
+ int page_lock_result;
+ int pageout_action = 0; /* '=0' to quiet lint */
+
+#define DATA_WRITE_MAX 32
+ vm_page_t holding_pages[DATA_WRITE_MAX];
+
+ /*
+ * Check for bogus arguments.
+ */
+ if (object == VM_OBJECT_NULL ||
+ ((prot & ~VM_PROT_ALL) != 0 && prot != VM_PROT_NO_CHANGE))
+ return (KERN_INVALID_ARGUMENT);
+
+ size = round_page(size);
+
+ /*
+ * Lock the object, and acquire a paging reference to
+ * prevent the memory_object and control ports from
+ * being destroyed.
+ */
+
+ vm_object_lock(object);
+ vm_object_paging_begin(object);
+ offset -= object->paging_offset;
+
+ /*
+ * To avoid blocking while scanning for pages, save
+ * dirty pages to be cleaned all at once.
+ *
+ * XXXO A similar strategy could be used to limit the
+ * number of times that a scan must be restarted for
+ * other reasons. Those pages that would require blocking
+ * could be temporarily collected in another list, or
+ * their offsets could be recorded in a small array.
+ */
+
+ /*
+ * XXX NOTE: May want to consider converting this to a page list
+ * XXX vm_map_copy interface. Need to understand object
+ * XXX coalescing implications before doing so.
+ */
+
+#define PAGEOUT_PAGES \
+MACRO_BEGIN \
+ vm_map_copy_t copy; \
+ register int i; \
+ register vm_page_t hp; \
+ \
+ vm_object_unlock(object); \
+ \
+ (void) vm_map_copyin_object(new_object, 0, new_offset, &copy); \
+ \
+ if (object->use_old_pageout) { \
+ assert(pageout_action == MEMORY_OBJECT_LOCK_RESULT_MUST_CLEAN); \
+ (void) memory_object_data_write( \
+ object->pager, \
+ object->pager_request, \
+ paging_offset, \
+ (pointer_t) copy, \
+ new_offset); \
+ } \
+ else { \
+ (void) memory_object_data_return( \
+ object->pager, \
+ object->pager_request, \
+ paging_offset, \
+ (pointer_t) copy, \
+ new_offset, \
+ (pageout_action == MEMORY_OBJECT_LOCK_RESULT_MUST_CLEAN), \
+ !should_flush); \
+ } \
+ \
+ vm_object_lock(object); \
+ \
+ for (i = 0; i < atop(new_offset); i++) { \
+ hp = holding_pages[i]; \
+ if (hp != VM_PAGE_NULL) \
+ VM_PAGE_FREE(hp); \
+ } \
+ \
+ new_object = VM_OBJECT_NULL; \
+MACRO_END
+
+ for (;
+ size != 0;
+ size -= PAGE_SIZE, offset += PAGE_SIZE)
+ {
+ /*
+ * Limit the number of pages to be cleaned at once.
+ */
+ if (new_object != VM_OBJECT_NULL &&
+ new_offset >= PAGE_SIZE * DATA_WRITE_MAX)
+ {
+ PAGEOUT_PAGES;
+ }
+
+ while ((m = vm_page_lookup(object, offset)) != VM_PAGE_NULL) {
+ switch ((page_lock_result = memory_object_lock_page(m,
+ should_return,
+ should_flush,
+ prot)))
+ {
+ case MEMORY_OBJECT_LOCK_RESULT_DONE:
+ /*
+ * End of a cluster of dirty pages.
+ */
+ if (new_object != VM_OBJECT_NULL) {
+ PAGEOUT_PAGES;
+ continue;
+ }
+ break;
+
+ case MEMORY_OBJECT_LOCK_RESULT_MUST_BLOCK:
+ /*
+ * Since it is necessary to block,
+ * clean any dirty pages now.
+ */
+ if (new_object != VM_OBJECT_NULL) {
+ PAGEOUT_PAGES;
+ continue;
+ }
+
+ PAGE_ASSERT_WAIT(m, FALSE);
+ vm_object_unlock(object);
+ thread_block((void (*)()) 0);
+ vm_object_lock(object);
+ continue;
+
+ case MEMORY_OBJECT_LOCK_RESULT_MUST_CLEAN:
+ case MEMORY_OBJECT_LOCK_RESULT_MUST_RETURN:
+ /*
+ * The clean and return cases are similar.
+ *
+ * Mark the page busy since we unlock the
+ * object below.
+ */
+ m->busy = TRUE;
+
+ /*
+ * if this would form a discontiguous block,
+ * clean the old pages and start anew.
+ *
+ * NOTE: The first time through here, new_object
+ * is null, hiding the fact that pageout_action
+ * is not initialized.
+ */
+ if (new_object != VM_OBJECT_NULL &&
+ (last_offset != offset ||
+ pageout_action != page_lock_result)) {
+ PAGEOUT_PAGES;
+ }
+
+ vm_object_unlock(object);
+
+ /*
+ * If we have not already allocated an object
+ * for a range of pages to be written, do so
+ * now.
+ */
+ if (new_object == VM_OBJECT_NULL) {
+ new_object = vm_object_allocate(original_size);
+ new_offset = 0;
+ paging_offset = m->offset +
+ object->paging_offset;
+ pageout_action = page_lock_result;
+ }
+
+ /*
+ * Move or copy the dirty page into the
+ * new object.
+ */
+ m = vm_pageout_setup(m,
+ m->offset + object->paging_offset,
+ new_object,
+ new_offset,
+ should_flush);
+
+ /*
+ * Save the holding page if there is one.
+ */
+ holding_pages[atop(new_offset)] = m;
+ new_offset += PAGE_SIZE;
+ last_offset = offset + PAGE_SIZE;
+
+ vm_object_lock(object);
+ break;
+ }
+ break;
+ }
+ }
+
+ /*
+ * We have completed the scan for applicable pages.
+ * Clean any pages that have been saved.
+ */
+ if (new_object != VM_OBJECT_NULL) {
+ PAGEOUT_PAGES;
+ }
+
+ if (IP_VALID(reply_to)) {
+ vm_object_unlock(object);
+
+ /* consumes our naked send-once/send right for reply_to */
+ (void) memory_object_lock_completed(reply_to, reply_to_type,
+ object->pager_request, original_offset, original_size);
+
+ vm_object_lock(object);
+ }
+
+ vm_object_paging_end(object);
+ vm_object_unlock(object);
+ vm_object_deallocate(object);
+
+ return (KERN_SUCCESS);
+}
+
+#if !NORMA_VM
+/*
+ * Old version of memory_object_lock_request.
+ */
+kern_return_t
+xxx_memory_object_lock_request(object, offset, size,
+ should_clean, should_flush, prot,
+ reply_to, reply_to_type)
+ register vm_object_t object;
+ register vm_offset_t offset;
+ register vm_size_t size;
+ boolean_t should_clean;
+ boolean_t should_flush;
+ vm_prot_t prot;
+ ipc_port_t reply_to;
+ mach_msg_type_name_t reply_to_type;
+{
+ register int should_return;
+
+ if (should_clean)
+ should_return = MEMORY_OBJECT_RETURN_DIRTY;
+ else
+ should_return = MEMORY_OBJECT_RETURN_NONE;
+
+ return(memory_object_lock_request(object,offset,size,
+ should_return, should_flush, prot,
+ reply_to, reply_to_type));
+}
+#endif !NORMA_VM
+
+kern_return_t
+memory_object_set_attributes_common(object, object_ready, may_cache,
+ copy_strategy, use_old_pageout)
+ vm_object_t object;
+ boolean_t object_ready;
+ boolean_t may_cache;
+ memory_object_copy_strategy_t copy_strategy;
+ boolean_t use_old_pageout;
+{
+ if (object == VM_OBJECT_NULL)
+ return(KERN_INVALID_ARGUMENT);
+
+ /*
+ * Verify the attributes of importance
+ */
+
+ switch(copy_strategy) {
+ case MEMORY_OBJECT_COPY_NONE:
+ case MEMORY_OBJECT_COPY_CALL:
+ case MEMORY_OBJECT_COPY_DELAY:
+ case MEMORY_OBJECT_COPY_TEMPORARY:
+ break;
+ default:
+ vm_object_deallocate(object);
+ return(KERN_INVALID_ARGUMENT);
+ }
+
+ if (object_ready)
+ object_ready = TRUE;
+ if (may_cache)
+ may_cache = TRUE;
+
+ vm_object_lock(object);
+
+ /*
+ * Wake up anyone waiting for the ready attribute
+ * to become asserted.
+ */
+
+ if (object_ready && !object->pager_ready) {
+ object->use_old_pageout = use_old_pageout;
+ vm_object_wakeup(object, VM_OBJECT_EVENT_PAGER_READY);
+ }
+
+ /*
+ * Copy the attributes
+ */
+
+ object->can_persist = may_cache;
+ object->pager_ready = object_ready;
+ if (copy_strategy == MEMORY_OBJECT_COPY_TEMPORARY) {
+ object->temporary = TRUE;
+ } else {
+ object->copy_strategy = copy_strategy;
+ }
+
+ vm_object_unlock(object);
+
+ vm_object_deallocate(object);
+
+ return(KERN_SUCCESS);
+}
+
+#if !NORMA_VM
+
+/*
+ * XXX rpd claims that reply_to could be obviated in favor of a client
+ * XXX stub that made change_attributes an RPC. Need investigation.
+ */
+
+kern_return_t memory_object_change_attributes(object, may_cache,
+ copy_strategy, reply_to, reply_to_type)
+ vm_object_t object;
+ boolean_t may_cache;
+ memory_object_copy_strategy_t copy_strategy;
+ ipc_port_t reply_to;
+ mach_msg_type_name_t reply_to_type;
+{
+ kern_return_t result;
+
+ /*
+ * Do the work and throw away our object reference. It
+ * is important that the object reference be deallocated
+ * BEFORE sending the reply. The whole point of the reply
+ * is that it shows up after the terminate message that
+ * may be generated by setting the object uncacheable.
+ *
+ * XXX may_cache may become a tri-valued variable to handle
+ * XXX uncache if not in use.
+ */
+ result = memory_object_set_attributes_common(object, TRUE,
+ may_cache, copy_strategy,
+ FALSE);
+
+ if (IP_VALID(reply_to)) {
+
+ /* consumes our naked send-once/send right for reply_to */
+ (void) memory_object_change_completed(reply_to, reply_to_type,
+ may_cache, copy_strategy);
+
+ }
+
+ return(result);
+}
+
+kern_return_t
+memory_object_set_attributes(object, object_ready, may_cache, copy_strategy)
+ vm_object_t object;
+ boolean_t object_ready;
+ boolean_t may_cache;
+ memory_object_copy_strategy_t copy_strategy;
+{
+ return memory_object_set_attributes_common(object, object_ready,
+ may_cache, copy_strategy,
+ TRUE);
+}
+
+kern_return_t memory_object_ready(object, may_cache, copy_strategy)
+ vm_object_t object;
+ boolean_t may_cache;
+ memory_object_copy_strategy_t copy_strategy;
+{
+ return memory_object_set_attributes_common(object, TRUE,
+ may_cache, copy_strategy,
+ FALSE);
+}
+#endif !NORMA_VM
+
+kern_return_t memory_object_get_attributes(object, object_ready,
+ may_cache, copy_strategy)
+ vm_object_t object;
+ boolean_t *object_ready;
+ boolean_t *may_cache;
+ memory_object_copy_strategy_t *copy_strategy;
+{
+ if (object == VM_OBJECT_NULL)
+ return(KERN_INVALID_ARGUMENT);
+
+ vm_object_lock(object);
+ *may_cache = object->can_persist;
+ *object_ready = object->pager_ready;
+ *copy_strategy = object->copy_strategy;
+ vm_object_unlock(object);
+
+ vm_object_deallocate(object);
+
+ return(KERN_SUCCESS);
+}
+
+/*
+ * If successful, consumes the supplied naked send right.
+ */
+kern_return_t vm_set_default_memory_manager(host, default_manager)
+ host_t host;
+ ipc_port_t *default_manager;
+{
+ ipc_port_t current_manager;
+ ipc_port_t new_manager;
+ ipc_port_t returned_manager;
+
+ if (host == HOST_NULL)
+ return(KERN_INVALID_HOST);
+
+ new_manager = *default_manager;
+ simple_lock(&memory_manager_default_lock);
+ current_manager = memory_manager_default;
+
+ if (new_manager == IP_NULL) {
+ /*
+ * Retrieve the current value.
+ */
+
+ returned_manager = ipc_port_copy_send(current_manager);
+ } else {
+ /*
+ * Retrieve the current value,
+ * and replace it with the supplied value.
+ * We consume the supplied naked send right.
+ */
+
+ returned_manager = current_manager;
+ memory_manager_default = new_manager;
+
+ /*
+ * In case anyone's been waiting for a memory
+ * manager to be established, wake them up.
+ */
+
+ thread_wakeup((event_t) &memory_manager_default);
+ }
+
+ simple_unlock(&memory_manager_default_lock);
+
+ *default_manager = returned_manager;
+ return(KERN_SUCCESS);
+}
+
+/*
+ * Routine: memory_manager_default_reference
+ * Purpose:
+ * Returns a naked send right for the default
+ * memory manager. The returned right is always
+ * valid (not IP_NULL or IP_DEAD).
+ */
+
+ipc_port_t memory_manager_default_reference()
+{
+ ipc_port_t current_manager;
+
+ simple_lock(&memory_manager_default_lock);
+
+ while (current_manager = ipc_port_copy_send(memory_manager_default),
+ !IP_VALID(current_manager)) {
+ thread_sleep((event_t) &memory_manager_default,
+ simple_lock_addr(memory_manager_default_lock),
+ FALSE);
+ simple_lock(&memory_manager_default_lock);
+ }
+
+ simple_unlock(&memory_manager_default_lock);
+
+ return current_manager;
+}
+
+/*
+ * Routine: memory_manager_default_port
+ * Purpose:
+ * Returns true if the receiver for the port
+ * is the default memory manager.
+ *
+ * This is a hack to let ds_read_done
+ * know when it should keep memory wired.
+ */
+
+boolean_t memory_manager_default_port(port)
+ ipc_port_t port;
+{
+ ipc_port_t current;
+ boolean_t result;
+
+ simple_lock(&memory_manager_default_lock);
+ current = memory_manager_default;
+ if (IP_VALID(current)) {
+ /*
+ * There is no point in bothering to lock
+ * both ports, which would be painful to do.
+ * If the receive rights are moving around,
+ * we might be inaccurate.
+ */
+
+ result = port->ip_receiver == current->ip_receiver;
+ } else
+ result = FALSE;
+ simple_unlock(&memory_manager_default_lock);
+
+ return result;
+}
+
+void memory_manager_default_init()
+{
+ memory_manager_default = IP_NULL;
+ simple_lock_init(&memory_manager_default_lock);
+}
diff --git a/vm/memory_object.h b/vm/memory_object.h
new file mode 100644
index 00000000..9afa0623
--- /dev/null
+++ b/vm/memory_object.h
@@ -0,0 +1,43 @@
+/*
+ * Mach Operating System
+ * Copyright (c) 1991 Carnegie Mellon University
+ * All Rights Reserved.
+ *
+ * Permission to use, copy, modify and distribute this software and its
+ * documentation is hereby granted, provided that both the copyright
+ * notice and this permission notice appear in all copies of the
+ * software, derivative works or modified versions, and any portions
+ * thereof, and that both notices appear in supporting documentation.
+ *
+ * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS
+ * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
+ * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
+ *
+ * Carnegie Mellon requests users of this software to return to
+ *
+ * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
+ * School of Computer Science
+ * Carnegie Mellon University
+ * Pittsburgh PA 15213-3890
+ *
+ * any improvements or extensions that they make and grant Carnegie the
+ * rights to redistribute these changes.
+ */
+
+#ifndef _VM_MEMORY_OBJECT_H_
+#define _VM_MEMORY_OBJECT_H_
+
+#include <mach/boolean.h>
+
+/*
+ * We use "struct ipc_port *" instead of "ipc_port_t"
+ * to avoid include file circularities.
+ */
+
+extern struct ipc_port *memory_manager_default_reference();
+extern boolean_t memory_manager_default_port();
+extern void memory_manager_default_init();
+
+extern struct ipc_port *memory_manager_default;
+
+#endif _VM_MEMORY_OBJECT_H_
diff --git a/vm/memory_object_default.cli b/vm/memory_object_default.cli
new file mode 100644
index 00000000..998a9864
--- /dev/null
+++ b/vm/memory_object_default.cli
@@ -0,0 +1,28 @@
+/*
+ * Copyright (c) 1994 The University of Utah and
+ * the Computer Systems Laboratory at the University of Utah (CSL).
+ * All rights reserved.
+ *
+ * Permission to use, copy, modify and distribute this software is hereby
+ * granted provided that (1) source code retains these copyright, permission,
+ * and disclaimer notices, and (2) redistributions including binaries
+ * reproduce the notices in supporting documentation, and (3) all advertising
+ * materials mentioning features or use of this software display the following
+ * acknowledgement: ``This product includes software developed by the
+ * Computer Systems Laboratory at the University of Utah.''
+ *
+ * THE UNIVERSITY OF UTAH AND CSL ALLOW FREE USE OF THIS SOFTWARE IN ITS "AS
+ * IS" CONDITION. THE UNIVERSITY OF UTAH AND CSL DISCLAIM ANY LIABILITY OF
+ * ANY KIND FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
+ *
+ * CSL requests users of this software to return to csl-dist@cs.utah.edu any
+ * improvements that they make and grant CSL redistribution rights.
+ *
+ * Author: Bryan Ford, University of Utah CSL
+ */
+/* This is a client presentation file. */
+
+#define KERNEL_USER 1
+#define SEQNOS 1
+
+#include <mach/memory_object_default.defs>
diff --git a/vm/memory_object_user.cli b/vm/memory_object_user.cli
new file mode 100644
index 00000000..2bba41fc
--- /dev/null
+++ b/vm/memory_object_user.cli
@@ -0,0 +1,28 @@
+/*
+ * Copyright (c) 1994 The University of Utah and
+ * the Computer Systems Laboratory at the University of Utah (CSL).
+ * All rights reserved.
+ *
+ * Permission to use, copy, modify and distribute this software is hereby
+ * granted provided that (1) source code retains these copyright, permission,
+ * and disclaimer notices, and (2) redistributions including binaries
+ * reproduce the notices in supporting documentation, and (3) all advertising
+ * materials mentioning features or use of this software display the following
+ * acknowledgement: ``This product includes software developed by the
+ * Computer Systems Laboratory at the University of Utah.''
+ *
+ * THE UNIVERSITY OF UTAH AND CSL ALLOW FREE USE OF THIS SOFTWARE IN ITS "AS
+ * IS" CONDITION. THE UNIVERSITY OF UTAH AND CSL DISCLAIM ANY LIABILITY OF
+ * ANY KIND FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
+ *
+ * CSL requests users of this software to return to csl-dist@cs.utah.edu any
+ * improvements that they make and grant CSL redistribution rights.
+ *
+ * Author: Bryan Ford, University of Utah CSL
+ */
+/* This is a client presentation file. */
+
+#define KERNEL_USER 1
+#define SEQNOS 1
+
+#include <mach/memory_object.defs>
diff --git a/vm/pmap.h b/vm/pmap.h
new file mode 100644
index 00000000..f9a949ed
--- /dev/null
+++ b/vm/pmap.h
@@ -0,0 +1,267 @@
+/*
+ * Mach Operating System
+ * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University.
+ * Copyright (c) 1993,1994 The University of Utah and
+ * the Computer Systems Laboratory (CSL).
+ * All rights reserved.
+ *
+ * Permission to use, copy, modify and distribute this software and its
+ * documentation is hereby granted, provided that both the copyright
+ * notice and this permission notice appear in all copies of the
+ * software, derivative works or modified versions, and any portions
+ * thereof, and that both notices appear in supporting documentation.
+ *
+ * CARNEGIE MELLON, THE UNIVERSITY OF UTAH AND CSL ALLOW FREE USE OF
+ * THIS SOFTWARE IN ITS "AS IS" CONDITION, AND DISCLAIM ANY LIABILITY
+ * OF ANY KIND FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF
+ * THIS SOFTWARE.
+ *
+ * Carnegie Mellon requests users of this software to return to
+ *
+ * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
+ * School of Computer Science
+ * Carnegie Mellon University
+ * Pittsburgh PA 15213-3890
+ *
+ * any improvements or extensions that they make and grant Carnegie Mellon
+ * the rights to redistribute these changes.
+ */
+/*
+ * File: vm/pmap.h
+ * Author: Avadis Tevanian, Jr.
+ * Date: 1985
+ *
+ * Machine address mapping definitions -- machine-independent
+ * section. [For machine-dependent section, see "machine/pmap.h".]
+ */
+
+#ifndef _VM_PMAP_H_
+#define _VM_PMAP_H_
+
+#include <machine/pmap.h>
+#include <mach/machine/vm_types.h>
+#include <mach/vm_prot.h>
+#include <mach/boolean.h>
+
+/*
+ * The following is a description of the interface to the
+ * machine-dependent "physical map" data structure. The module
+ * must provide a "pmap_t" data type that represents the
+ * set of valid virtual-to-physical addresses for one user
+ * address space. [The kernel address space is represented
+ * by a distinguished "pmap_t".] The routines described manage
+ * this type, install and update virtual-to-physical mappings,
+ * and perform operations on physical addresses common to
+ * many address spaces.
+ */
+
+/*
+ * Routines used for initialization.
+ * There is traditionally also a pmap_bootstrap,
+ * used very early by machine-dependent code,
+ * but it is not part of the interface.
+ */
+
+extern vm_offset_t pmap_steal_memory(); /* During VM initialization,
+ * steal a chunk of memory.
+ */
+extern unsigned int pmap_free_pages(); /* During VM initialization,
+ * report remaining unused
+ * physical pages.
+ */
+extern void pmap_startup(); /* During VM initialization,
+ * use remaining physical pages
+ * to allocate page frames.
+ */
+extern void pmap_init(); /* Initialization,
+ * after kernel runs
+ * in virtual memory.
+ */
+
+#ifndef MACHINE_PAGES
+/*
+ * If machine/pmap.h defines MACHINE_PAGES, it must implement
+ * the above functions. The pmap module has complete control.
+ * Otherwise, it must implement
+ * pmap_free_pages
+ * pmap_virtual_space
+ * pmap_next_page
+ * pmap_init
+ * and vm/vm_resident.c implements pmap_steal_memory and pmap_startup
+ * using pmap_free_pages, pmap_next_page, pmap_virtual_space,
+ * and pmap_enter. pmap_free_pages may over-estimate the number
+ * of unused physical pages, and pmap_next_page may return FALSE
+ * to indicate that there are no more unused pages to return.
+ * However, for best performance pmap_free_pages should be accurate.
+ */
+
+extern boolean_t pmap_next_page(); /* During VM initialization,
+ * return the next unused
+ * physical page.
+ */
+extern void pmap_virtual_space(); /* During VM initialization,
+ * report virtual space
+ * available for the kernel.
+ */
+#endif MACHINE_PAGES
+
+/*
+ * Routines to manage the physical map data structure.
+ */
+
+/* Create a pmap_t. */
+pmap_t pmap_create(vm_size_t size);
+
+/* Return the kernel's pmap_t. */
+#ifndef pmap_kernel
+extern pmap_t pmap_kernel(void);
+#endif pmap_kernel
+
+/* Gain and release a reference. */
+extern void pmap_reference(pmap_t pmap);
+extern void pmap_destroy(pmap_t pmap);
+
+/* Enter a mapping */
+extern void pmap_enter(pmap_t pmap, vm_offset_t va, vm_offset_t pa,
+ vm_prot_t prot, boolean_t wired);
+
+
+/*
+ * Routines that operate on ranges of virtual addresses.
+ */
+
+/* Remove mappings. */
+void pmap_remove(pmap_t pmap, vm_offset_t sva, vm_offset_t eva);
+
+/* Change protections. */
+void pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot);
+
+/*
+ * Routines to set up hardware state for physical maps to be used.
+ */
+extern void pmap_activate(); /* Prepare pmap_t to run
+ * on a given processor.
+ */
+extern void pmap_deactivate(); /* Release pmap_t from
+ * use on processor.
+ */
+
+
+/*
+ * Routines that operate on physical addresses.
+ */
+
+/* Restrict access to page. */
+void pmap_page_protect(vm_offset_t pa, vm_prot_t prot);
+
+/*
+ * Routines to manage reference/modify bits based on
+ * physical addresses, simulating them if not provided
+ * by the hardware.
+ */
+
+/* Clear reference bit */
+void pmap_clear_reference(vm_offset_t pa);
+
+/* Return reference bit */
+#ifndef pmap_is_referenced
+boolean_t pmap_is_referenced(vm_offset_t pa);
+#endif pmap_is_referenced
+
+/* Clear modify bit */
+void pmap_clear_modify(vm_offset_t pa);
+
+/* Return modify bit */
+boolean_t pmap_is_modified(vm_offset_t pa);
+
+
+/*
+ * Statistics routines
+ */
+extern void pmap_statistics(); /* Return statistics */
+
+#ifndef pmap_resident_count
+extern int pmap_resident_count();
+#endif pmap_resident_count
+
+/*
+ * Sundry required routines
+ */
+extern vm_offset_t pmap_extract(); /* Return a virtual-to-physical
+ * mapping, if possible.
+ */
+
+extern boolean_t pmap_access(); /* Is virtual address valid? */
+
+extern void pmap_collect(); /* Perform garbage
+ * collection, if any
+ */
+
+extern void pmap_change_wiring(); /* Specify pageability */
+
+#ifndef pmap_phys_address
+extern vm_offset_t pmap_phys_address(); /* Transform address
+ * returned by device
+ * driver mapping function
+ * to physical address
+ * known to this module.
+ */
+#endif pmap_phys_address
+#ifndef pmap_phys_to_frame
+extern int pmap_phys_to_frame(); /* Inverse of
+ * pmap_phys_address,
+ * for use by device driver
+ * mapping function in
+ * machine-independent
+ * pseudo-devices.
+ */
+#endif pmap_phys_to_frame
+
+/*
+ * Optional routines
+ */
+#ifndef pmap_copy
+extern void pmap_copy(); /* Copy range of
+ * mappings, if desired.
+ */
+#endif pmap_copy
+#ifndef pmap_attribute
+extern kern_return_t pmap_attribute(); /* Get/Set special
+ * memory attributes
+ */
+#endif pmap_attribute
+
+/*
+ * Routines defined as macros.
+ */
+#ifndef PMAP_ACTIVATE_USER
+#define PMAP_ACTIVATE_USER(pmap, thread, cpu) { \
+ if ((pmap) != kernel_pmap) \
+ PMAP_ACTIVATE(pmap, thread, cpu); \
+}
+#endif PMAP_ACTIVATE_USER
+
+#ifndef PMAP_DEACTIVATE_USER
+#define PMAP_DEACTIVATE_USER(pmap, thread, cpu) { \
+ if ((pmap) != kernel_pmap) \
+ PMAP_DEACTIVATE(pmap, thread, cpu); \
+}
+#endif PMAP_DEACTIVATE_USER
+
+#ifndef PMAP_ACTIVATE_KERNEL
+#define PMAP_ACTIVATE_KERNEL(cpu) \
+ PMAP_ACTIVATE(kernel_pmap, THREAD_NULL, cpu)
+#endif PMAP_ACTIVATE_KERNEL
+
+#ifndef PMAP_DEACTIVATE_KERNEL
+#define PMAP_DEACTIVATE_KERNEL(cpu) \
+ PMAP_DEACTIVATE(kernel_pmap, THREAD_NULL, cpu)
+#endif PMAP_DEACTIVATE_KERNEL
+
+/*
+ * Exported data structures
+ */
+
+extern pmap_t kernel_pmap; /* The kernel's map */
+
+#endif _VM_PMAP_H_
diff --git a/vm/vm_debug.c b/vm/vm_debug.c
new file mode 100644
index 00000000..17c8c311
--- /dev/null
+++ b/vm/vm_debug.c
@@ -0,0 +1,499 @@
+/*
+ * Mach Operating System
+ * Copyright (c) 1991,1990 Carnegie Mellon University
+ * All Rights Reserved.
+ *
+ * Permission to use, copy, modify and distribute this software and its
+ * documentation is hereby granted, provided that both the copyright
+ * notice and this permission notice appear in all copies of the
+ * software, derivative works or modified versions, and any portions
+ * thereof, and that both notices appear in supporting documentation.
+ *
+ * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
+ * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
+ * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
+ *
+ * Carnegie Mellon requests users of this software to return to
+ *
+ * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
+ * School of Computer Science
+ * Carnegie Mellon University
+ * Pittsburgh PA 15213-3890
+ *
+ * any improvements or extensions that they make and grant Carnegie Mellon
+ * the rights to redistribute these changes.
+ */
+/*
+ * File: vm/vm_debug.c.
+ * Author: Rich Draves
+ * Date: March, 1990
+ *
+ * Exported kernel calls. See mach_debug/mach_debug.defs.
+ */
+
+#include <mach_vm_debug.h>
+#if MACH_VM_DEBUG
+
+#include <kern/thread.h>
+#include <mach/kern_return.h>
+#include <mach/machine/vm_types.h>
+#include <mach/memory_object.h>
+#include <mach/vm_prot.h>
+#include <mach/vm_inherit.h>
+#include <mach/vm_param.h>
+#include <mach_debug/vm_info.h>
+#include <mach_debug/hash_info.h>
+#include <vm/vm_map.h>
+#include <vm/vm_kern.h>
+#include <vm/vm_object.h>
+#include <kern/task.h>
+#include <kern/host.h>
+#include <ipc/ipc_port.h>
+
+
+
+/*
+ * Routine: vm_object_real_name
+ * Purpose:
+ * Convert a VM object to a name port.
+ * Conditions:
+ * Takes object and port locks.
+ * Returns:
+ * A naked send right for the object's name port,
+ * or IP_NULL if the object or its name port is null.
+ */
+
+ipc_port_t
+vm_object_real_name(object)
+ vm_object_t object;
+{
+ ipc_port_t port = IP_NULL;
+
+ if (object != VM_OBJECT_NULL) {
+ vm_object_lock(object);
+ if (object->pager_name != IP_NULL)
+ port = ipc_port_make_send(object->pager_name);
+ vm_object_unlock(object);
+ }
+
+ return port;
+}
+
+/*
+ * Routine: mach_vm_region_info [kernel call]
+ * Purpose:
+ * Retrieve information about a VM region,
+ * including info about the object chain.
+ * Conditions:
+ * Nothing locked.
+ * Returns:
+ * KERN_SUCCESS Retrieve region/object info.
+ * KERN_INVALID_TASK The map is null.
+ * KERN_NO_SPACE There is no entry at/after the address.
+ */
+
+kern_return_t
+mach_vm_region_info(map, address, regionp, portp)
+ vm_map_t map;
+ vm_offset_t address;
+ vm_region_info_t *regionp;
+ ipc_port_t *portp;
+{
+ vm_map_t cmap; /* current map in traversal */
+ vm_map_t nmap; /* next map to look at */
+ vm_map_entry_t entry; /* entry in current map */
+ vm_object_t object;
+
+ if (map == VM_MAP_NULL)
+ return KERN_INVALID_TASK;
+
+ /* find the entry containing (or following) the address */
+
+ vm_map_lock_read(map);
+ for (cmap = map;;) {
+ /* cmap is read-locked */
+
+ if (!vm_map_lookup_entry(cmap, address, &entry)) {
+ entry = entry->vme_next;
+ if (entry == vm_map_to_entry(cmap)) {
+ if (map == cmap) {
+ vm_map_unlock_read(cmap);
+ return KERN_NO_SPACE;
+ }
+
+ /* back out to top-level & skip this submap */
+
+ address = vm_map_max(cmap);
+ vm_map_unlock_read(cmap);
+ vm_map_lock_read(map);
+ cmap = map;
+ continue;
+ }
+ }
+
+ if (entry->is_sub_map) {
+ /* move down to the sub map */
+
+ nmap = entry->object.sub_map;
+ vm_map_lock_read(nmap);
+ vm_map_unlock_read(cmap);
+ cmap = nmap;
+ continue;
+ } else {
+ break;
+ }
+ /*NOTREACHED*/
+ }
+
+
+ assert(entry->vme_start < entry->vme_end);
+
+ regionp->vri_start = entry->vme_start;
+ regionp->vri_end = entry->vme_end;
+
+ /* attributes from the real entry */
+
+ regionp->vri_protection = entry->protection;
+ regionp->vri_max_protection = entry->max_protection;
+ regionp->vri_inheritance = entry->inheritance;
+ regionp->vri_wired_count = entry->wired_count;
+ regionp->vri_user_wired_count = entry->user_wired_count;
+
+ object = entry->object.vm_object;
+ *portp = vm_object_real_name(object);
+ regionp->vri_object = (vm_offset_t) object;
+ regionp->vri_offset = entry->offset;
+ regionp->vri_needs_copy = entry->needs_copy;
+
+ regionp->vri_sharing = entry->is_shared;
+
+ vm_map_unlock_read(cmap);
+ return KERN_SUCCESS;
+}
+
+/*
+ * Routine: mach_vm_object_info [kernel call]
+ * Purpose:
+ * Retrieve information about a VM object.
+ * Conditions:
+ * Nothing locked.
+ * Returns:
+ * KERN_SUCCESS Retrieved object info.
+ * KERN_INVALID_ARGUMENT The object is null.
+ */
+
+kern_return_t
+mach_vm_object_info(object, infop, shadowp, copyp)
+ vm_object_t object;
+ vm_object_info_t *infop;
+ ipc_port_t *shadowp;
+ ipc_port_t *copyp;
+{
+ vm_object_info_t info;
+ vm_object_info_state_t state;
+ ipc_port_t shadow, copy;
+
+ if (object == VM_OBJECT_NULL)
+ return KERN_INVALID_ARGUMENT;
+
+ /*
+ * Because of lock-ordering/deadlock considerations,
+ * we can't use vm_object_real_name for the copy object.
+ */
+
+ retry:
+ vm_object_lock(object);
+ copy = IP_NULL;
+ if (object->copy != VM_OBJECT_NULL) {
+ if (!vm_object_lock_try(object->copy)) {
+ vm_object_unlock(object);
+ simple_lock_pause(); /* wait a bit */
+ goto retry;
+ }
+
+ if (object->copy->pager_name != IP_NULL)
+ copy = ipc_port_make_send(object->copy->pager_name);
+ vm_object_unlock(object->copy);
+ }
+ shadow = vm_object_real_name(object->shadow);
+
+ info.voi_object = (vm_offset_t) object;
+ info.voi_pagesize = PAGE_SIZE;
+ info.voi_size = object->size;
+ info.voi_ref_count = object->ref_count;
+ info.voi_resident_page_count = object->resident_page_count;
+ info.voi_absent_count = object->absent_count;
+ info.voi_copy = (vm_offset_t) object->copy;
+ info.voi_shadow = (vm_offset_t) object->shadow;
+ info.voi_shadow_offset = object->shadow_offset;
+ info.voi_paging_offset = object->paging_offset;
+ info.voi_copy_strategy = object->copy_strategy;
+ info.voi_last_alloc = object->last_alloc;
+ info.voi_paging_in_progress = object->paging_in_progress;
+
+ state = 0;
+ if (object->pager_created)
+ state |= VOI_STATE_PAGER_CREATED;
+ if (object->pager_initialized)
+ state |= VOI_STATE_PAGER_INITIALIZED;
+ if (object->pager_ready)
+ state |= VOI_STATE_PAGER_READY;
+ if (object->can_persist)
+ state |= VOI_STATE_CAN_PERSIST;
+ if (object->internal)
+ state |= VOI_STATE_INTERNAL;
+ if (object->temporary)
+ state |= VOI_STATE_TEMPORARY;
+ if (object->alive)
+ state |= VOI_STATE_ALIVE;
+ if (object->lock_in_progress)
+ state |= VOI_STATE_LOCK_IN_PROGRESS;
+ if (object->lock_restart)
+ state |= VOI_STATE_LOCK_RESTART;
+ if (object->use_old_pageout)
+ state |= VOI_STATE_USE_OLD_PAGEOUT;
+ info.voi_state = state;
+ vm_object_unlock(object);
+
+ *infop = info;
+ *shadowp = shadow;
+ *copyp = copy;
+ return KERN_SUCCESS;
+}
+
+#define VPI_STATE_NODATA (VPI_STATE_BUSY|VPI_STATE_FICTITIOUS| \
+ VPI_STATE_PRIVATE|VPI_STATE_ABSENT)
+
+/*
+ * Routine: mach_vm_object_pages [kernel call]
+ * Purpose:
+ * Retrieve information about the pages in a VM object.
+ * Conditions:
+ * Nothing locked. Obeys CountInOut protocol.
+ * Returns:
+ * KERN_SUCCESS Retrieved object info.
+ * KERN_INVALID_ARGUMENT The object is null.
+ * KERN_RESOURCE_SHORTAGE Couldn't allocate memory.
+ */
+
+kern_return_t
+mach_vm_object_pages(object, pagesp, countp)
+ vm_object_t object;
+ vm_page_info_array_t *pagesp;
+ natural_t *countp;
+{
+ vm_size_t size;
+ vm_offset_t addr;
+ vm_page_info_t *pages;
+ unsigned int potential, actual, count;
+ vm_page_t p;
+ kern_return_t kr;
+
+ if (object == VM_OBJECT_NULL)
+ return KERN_INVALID_ARGUMENT;
+
+ /* start with in-line memory */
+
+ pages = *pagesp;
+ potential = *countp;
+
+ for (size = 0;;) {
+ vm_object_lock(object);
+ actual = object->resident_page_count;
+ if (actual <= potential)
+ break;
+ vm_object_unlock(object);
+
+ if (pages != *pagesp)
+ kmem_free(ipc_kernel_map, addr, size);
+
+ size = round_page(actual * sizeof *pages);
+ kr = kmem_alloc(ipc_kernel_map, &addr, size);
+ if (kr != KERN_SUCCESS)
+ return kr;
+
+ pages = (vm_page_info_t *) addr;
+ potential = size/sizeof *pages;
+ }
+ /* object is locked, we have enough wired memory */
+
+ count = 0;
+ queue_iterate(&object->memq, p, vm_page_t, listq) {
+ vm_page_info_t *info = &pages[count++];
+ vm_page_info_state_t state = 0;
+
+ info->vpi_offset = p->offset;
+ info->vpi_phys_addr = p->phys_addr;
+ info->vpi_wire_count = p->wire_count;
+ info->vpi_page_lock = p->page_lock;
+ info->vpi_unlock_request = p->unlock_request;
+
+ if (p->busy)
+ state |= VPI_STATE_BUSY;
+ if (p->wanted)
+ state |= VPI_STATE_WANTED;
+ if (p->tabled)
+ state |= VPI_STATE_TABLED;
+ if (p->fictitious)
+ state |= VPI_STATE_FICTITIOUS;
+ if (p->private)
+ state |= VPI_STATE_PRIVATE;
+ if (p->absent)
+ state |= VPI_STATE_ABSENT;
+ if (p->error)
+ state |= VPI_STATE_ERROR;
+ if (p->dirty)
+ state |= VPI_STATE_DIRTY;
+ if (p->precious)
+ state |= VPI_STATE_PRECIOUS;
+ if (p->overwriting)
+ state |= VPI_STATE_OVERWRITING;
+
+ if (((state & (VPI_STATE_NODATA|VPI_STATE_DIRTY)) == 0) &&
+ pmap_is_modified(p->phys_addr)) {
+ state |= VPI_STATE_DIRTY;
+ p->dirty = TRUE;
+ }
+
+ vm_page_lock_queues();
+ if (p->inactive)
+ state |= VPI_STATE_INACTIVE;
+ if (p->active)
+ state |= VPI_STATE_ACTIVE;
+ if (p->laundry)
+ state |= VPI_STATE_LAUNDRY;
+ if (p->free)
+ state |= VPI_STATE_FREE;
+ if (p->reference)
+ state |= VPI_STATE_REFERENCE;
+
+ if (((state & (VPI_STATE_NODATA|VPI_STATE_REFERENCE)) == 0) &&
+ pmap_is_referenced(p->phys_addr)) {
+ state |= VPI_STATE_REFERENCE;
+ p->reference = TRUE;
+ }
+ vm_page_unlock_queues();
+
+ info->vpi_state = state;
+ }
+
+ if (object->resident_page_count != count)
+ panic("mach_vm_object_pages");
+ vm_object_unlock(object);
+
+ if (pages == *pagesp) {
+ /* data fit in-line; nothing to deallocate */
+
+ *countp = actual;
+ } else if (actual == 0) {
+ kmem_free(ipc_kernel_map, addr, size);
+
+ *countp = 0;
+ } else {
+ vm_size_t size_used, rsize_used;
+ vm_map_copy_t copy;
+
+ /* kmem_alloc doesn't zero memory */
+
+ size_used = actual * sizeof *pages;
+ rsize_used = round_page(size_used);
+
+ if (rsize_used != size)
+ kmem_free(ipc_kernel_map,
+ addr + rsize_used, size - rsize_used);
+
+ if (size_used != rsize_used)
+ bzero((char *) (addr + size_used),
+ rsize_used - size_used);
+
+ kr = vm_map_copyin(ipc_kernel_map, addr, rsize_used,
+ TRUE, &copy);
+ assert(kr == KERN_SUCCESS);
+
+ *pagesp = (vm_page_info_t *) copy;
+ *countp = actual;
+ }
+
+ return KERN_SUCCESS;
+}
+
+#endif MACH_VM_DEBUG
+
+/*
+ * Routine: host_virtual_physical_table_info
+ * Purpose:
+ * Return information about the VP table.
+ * Conditions:
+ * Nothing locked. Obeys CountInOut protocol.
+ * Returns:
+ * KERN_SUCCESS Returned information.
+ * KERN_INVALID_HOST The host is null.
+ * KERN_RESOURCE_SHORTAGE Couldn't allocate memory.
+ */
+
+kern_return_t
+host_virtual_physical_table_info(host, infop, countp)
+ host_t host;
+ hash_info_bucket_array_t *infop;
+ natural_t *countp;
+{
+ vm_offset_t addr;
+ vm_size_t size = 0;/* '=0' to quiet gcc warnings */
+ hash_info_bucket_t *info;
+ unsigned int potential, actual;
+ kern_return_t kr;
+
+ if (host == HOST_NULL)
+ return KERN_INVALID_HOST;
+
+ /* start with in-line data */
+
+ info = *infop;
+ potential = *countp;
+
+ for (;;) {
+ actual = vm_page_info(info, potential);
+ if (actual <= potential)
+ break;
+
+ /* allocate more memory */
+
+ if (info != *infop)
+ kmem_free(ipc_kernel_map, addr, size);
+
+ size = round_page(actual * sizeof *info);
+ kr = kmem_alloc_pageable(ipc_kernel_map, &addr, size);
+ if (kr != KERN_SUCCESS)
+ return KERN_RESOURCE_SHORTAGE;
+
+ info = (hash_info_bucket_t *) addr;
+ potential = size/sizeof *info;
+ }
+
+ if (info == *infop) {
+ /* data fit in-line; nothing to deallocate */
+
+ *countp = actual;
+ } else if (actual == 0) {
+ kmem_free(ipc_kernel_map, addr, size);
+
+ *countp = 0;
+ } else {
+ vm_map_copy_t copy;
+ vm_size_t used;
+
+ used = round_page(actual * sizeof *info);
+
+ if (used != size)
+ kmem_free(ipc_kernel_map, addr + used, size - used);
+
+ kr = vm_map_copyin(ipc_kernel_map, addr, used,
+ TRUE, &copy);
+ assert(kr == KERN_SUCCESS);
+
+ *infop = (hash_info_bucket_t *) copy;
+ *countp = actual;
+ }
+
+ return KERN_SUCCESS;
+}
diff --git a/vm/vm_external.c b/vm/vm_external.c
new file mode 100644
index 00000000..da591375
--- /dev/null
+++ b/vm/vm_external.c
@@ -0,0 +1,159 @@
+/*
+ * Mach Operating System
+ * Copyright (c) 1991,1990,1989 Carnegie Mellon University
+ * All Rights Reserved.
+ *
+ * Permission to use, copy, modify and distribute this software and its
+ * documentation is hereby granted, provided that both the copyright
+ * notice and this permission notice appear in all copies of the
+ * software, derivative works or modified versions, and any portions
+ * thereof, and that both notices appear in supporting documentation.
+ *
+ * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
+ * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
+ * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
+ *
+ * Carnegie Mellon requests users of this software to return to
+ *
+ * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
+ * School of Computer Science
+ * Carnegie Mellon University
+ * Pittsburgh PA 15213-3890
+ *
+ * any improvements or extensions that they make and grant Carnegie Mellon
+ * the rights to redistribute these changes.
+ */
+/*
+ * This module maintains information about the presence of
+ * pages not in memory. Since an external memory object
+ * must maintain a complete knowledge of its contents, this
+ * information takes the form of hints.
+ */
+
+#include <mach/boolean.h>
+#include <kern/zalloc.h>
+#include <vm/vm_external.h>
+#include <mach/vm_param.h>
+#include <kern/assert.h>
+
+
+
+boolean_t vm_external_unsafe = FALSE;
+
+zone_t vm_external_zone = ZONE_NULL;
+
+/*
+ * The implementation uses bit arrays to record whether
+ * a page has been written to external storage. For
+ * convenience, these bit arrays come in two sizes
+ * (measured in bytes).
+ */
+
+#define SMALL_SIZE (VM_EXTERNAL_SMALL_SIZE/8)
+#define LARGE_SIZE (VM_EXTERNAL_LARGE_SIZE/8)
+
+zone_t vm_object_small_existence_map_zone;
+zone_t vm_object_large_existence_map_zone;
+
+
+vm_external_t vm_external_create(size)
+ vm_offset_t size;
+{
+ vm_external_t result;
+ vm_size_t bytes;
+
+ if (vm_external_zone == ZONE_NULL)
+ return(VM_EXTERNAL_NULL);
+
+ result = (vm_external_t) zalloc(vm_external_zone);
+ result->existence_map = (char *) 0;
+
+ bytes = (atop(size) + 07) >> 3;
+ if (bytes <= SMALL_SIZE) {
+ result->existence_map =
+ (char *) zalloc(vm_object_small_existence_map_zone);
+ result->existence_size = SMALL_SIZE;
+ } else if (bytes <= LARGE_SIZE) {
+ result->existence_map =
+ (char *) zalloc(vm_object_large_existence_map_zone);
+ result->existence_size = LARGE_SIZE;
+ }
+ return(result);
+}
+
+void vm_external_destroy(e)
+ vm_external_t e;
+{
+ if (e == VM_EXTERNAL_NULL)
+ return;
+
+ if (e->existence_map != (char *) 0) {
+ if (e->existence_size <= SMALL_SIZE) {
+ zfree(vm_object_small_existence_map_zone,
+ (vm_offset_t) e->existence_map);
+ } else {
+ zfree(vm_object_large_existence_map_zone,
+ (vm_offset_t) e->existence_map);
+ }
+ }
+ zfree(vm_external_zone, (vm_offset_t) e);
+}
+
+vm_external_state_t _vm_external_state_get(e, offset)
+ vm_external_t e;
+ vm_offset_t offset;
+{
+ unsigned
+ int bit, byte;
+
+ if (vm_external_unsafe ||
+ (e == VM_EXTERNAL_NULL) ||
+ (e->existence_map == (char *) 0))
+ return(VM_EXTERNAL_STATE_UNKNOWN);
+
+ bit = atop(offset);
+ byte = bit >> 3;
+ if (byte >= e->existence_size) return (VM_EXTERNAL_STATE_UNKNOWN);
+ return( (e->existence_map[byte] & (1 << (bit & 07))) ?
+ VM_EXTERNAL_STATE_EXISTS : VM_EXTERNAL_STATE_ABSENT );
+}
+
+void vm_external_state_set(e, offset, state)
+ vm_external_t e;
+ vm_offset_t offset;
+ vm_external_state_t state;
+{
+ unsigned
+ int bit, byte;
+
+ if ((e == VM_EXTERNAL_NULL) || (e->existence_map == (char *) 0))
+ return;
+
+ if (state != VM_EXTERNAL_STATE_EXISTS)
+ return;
+
+ bit = atop(offset);
+ byte = bit >> 3;
+ if (byte >= e->existence_size) return;
+ e->existence_map[byte] |= (1 << (bit & 07));
+}
+
+void vm_external_module_initialize()
+{
+ vm_size_t size = (vm_size_t) sizeof(struct vm_external);
+
+ vm_external_zone = zinit(size, 16*1024*size, size,
+ 0, "external page bitmaps");
+
+ vm_object_small_existence_map_zone = zinit(SMALL_SIZE,
+ round_page(LARGE_SIZE * SMALL_SIZE),
+ round_page(SMALL_SIZE),
+ ZONE_EXHAUSTIBLE,
+ "object small existence maps");
+
+ vm_object_large_existence_map_zone = zinit(LARGE_SIZE,
+ round_page(8 * LARGE_SIZE),
+ round_page(LARGE_SIZE),
+ ZONE_EXHAUSTIBLE,
+ "object large existence maps");
+}
diff --git a/vm/vm_external.h b/vm/vm_external.h
new file mode 100644
index 00000000..70ffd650
--- /dev/null
+++ b/vm/vm_external.h
@@ -0,0 +1,89 @@
+/*
+ * Mach Operating System
+ * Copyright (c) 1991,1990,1989 Carnegie Mellon University
+ * All Rights Reserved.
+ *
+ * Permission to use, copy, modify and distribute this software and its
+ * documentation is hereby granted, provided that both the copyright
+ * notice and this permission notice appear in all copies of the
+ * software, derivative works or modified versions, and any portions
+ * thereof, and that both notices appear in supporting documentation.
+ *
+ * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
+ * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
+ * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
+ *
+ * Carnegie Mellon requests users of this software to return to
+ *
+ * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
+ * School of Computer Science
+ * Carnegie Mellon University
+ * Pittsburgh PA 15213-3890
+ *
+ * any improvements or extensions that they make and grant Carnegie Mellon
+ * the rights to redistribute these changes.
+ */
+
+#ifndef _VM_VM_EXTERNAL_H_
+#define _VM_VM_EXTERNAL_H_
+
+/*
+ * External page management hint technology
+ *
+ * The data structure exported by this module maintains
+ * a (potentially incomplete) map of the pages written
+ * to external storage for a range of virtual memory.
+ */
+
+/*
+ * The data structure representing the state of pages
+ * on external storage.
+ */
+
+typedef struct vm_external {
+ int existence_size; /* Size of the following bitmap */
+ char *existence_map; /* A bitmap of pages that have
+ * been written to backing
+ * storage.
+ */
+ int existence_count;/* Number of bits turned on in
+ * existence_map.
+ */
+} *vm_external_t;
+
+#define VM_EXTERNAL_NULL ((vm_external_t) 0)
+
+#define VM_EXTERNAL_SMALL_SIZE 128
+#define VM_EXTERNAL_LARGE_SIZE 8192
+
+/*
+ * The states that may be recorded for a page of external storage.
+ */
+
+typedef int vm_external_state_t;
+#define VM_EXTERNAL_STATE_EXISTS 1
+#define VM_EXTERNAL_STATE_UNKNOWN 2
+#define VM_EXTERNAL_STATE_ABSENT 3
+
+
+/*
+ * Routines exported by this module.
+ */
+
+extern void vm_external_module_initialize();
+ /* Initialize the module */
+
+extern vm_external_t vm_external_create(); /* Create a vm_external_t */
+extern void vm_external_destroy(); /* Destroy one */
+
+extern void vm_external_state_set();/* Set state of a page. */
+#define vm_external_state_get(e,offset) (((e) != VM_EXTERNAL_NULL) ? \
+ _vm_external_state_get(e, offset) : \
+ VM_EXTERNAL_STATE_UNKNOWN)
+ /* Retrieve the state
+ * for a given page, if known.
+ */
+extern vm_external_state_t _vm_external_state_get();
+ /* HIDDEN routine */
+
+#endif _VM_VM_EXTERNAL_H_
diff --git a/vm/vm_fault.c b/vm/vm_fault.c
new file mode 100644
index 00000000..e45687cd
--- /dev/null
+++ b/vm/vm_fault.c
@@ -0,0 +1,2182 @@
+/*
+ * Mach Operating System
+ * Copyright (c) 1994,1990,1989,1988,1987 Carnegie Mellon University.
+ * Copyright (c) 1993,1994 The University of Utah and
+ * the Computer Systems Laboratory (CSL).
+ * All rights reserved.
+ *
+ * Permission to use, copy, modify and distribute this software and its
+ * documentation is hereby granted, provided that both the copyright
+ * notice and this permission notice appear in all copies of the
+ * software, derivative works or modified versions, and any portions
+ * thereof, and that both notices appear in supporting documentation.
+ *
+ * CARNEGIE MELLON, THE UNIVERSITY OF UTAH AND CSL ALLOW FREE USE OF
+ * THIS SOFTWARE IN ITS "AS IS" CONDITION, AND DISCLAIM ANY LIABILITY
+ * OF ANY KIND FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF
+ * THIS SOFTWARE.
+ *
+ * Carnegie Mellon requests users of this software to return to
+ *
+ * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
+ * School of Computer Science
+ * Carnegie Mellon University
+ * Pittsburgh PA 15213-3890
+ *
+ * any improvements or extensions that they make and grant Carnegie Mellon
+ * the rights to redistribute these changes.
+ */
+/*
+ * File: vm_fault.c
+ * Author: Avadis Tevanian, Jr., Michael Wayne Young
+ *
+ * Page fault handling module.
+ */
+#include <mach_pagemap.h>
+#include <mach_kdb.h>
+#include <mach_pcsample.h>
+
+
+#include <vm/vm_fault.h>
+#include <mach/kern_return.h>
+#include <mach/message.h> /* for error codes */
+#include <kern/counters.h>
+#include <kern/thread.h>
+#include <kern/sched_prim.h>
+#include <vm/vm_map.h>
+#include <vm/vm_object.h>
+#include <vm/vm_page.h>
+#include <vm/pmap.h>
+#include <mach/vm_statistics.h>
+#include <vm/vm_pageout.h>
+#include <mach/vm_param.h>
+#include <mach/memory_object.h>
+#include "memory_object_user.h"
+ /* For memory_object_data_{request,unlock} */
+#include <kern/mach_param.h>
+#include <kern/macro_help.h>
+#include <kern/zalloc.h>
+
+#if MACH_PCSAMPLE
+#include <kern/pc_sample.h>
+#endif
+
+
+
+/*
+ * State needed by vm_fault_continue.
+ * This is a little hefty to drop directly
+ * into the thread structure.
+ */
+typedef struct vm_fault_state {
+ struct vm_map *vmf_map;
+ vm_offset_t vmf_vaddr;
+ vm_prot_t vmf_fault_type;
+ boolean_t vmf_change_wiring;
+ void (*vmf_continuation)();
+ vm_map_version_t vmf_version;
+ boolean_t vmf_wired;
+ struct vm_object *vmf_object;
+ vm_offset_t vmf_offset;
+ vm_prot_t vmf_prot;
+
+ boolean_t vmfp_backoff;
+ struct vm_object *vmfp_object;
+ vm_offset_t vmfp_offset;
+ struct vm_page *vmfp_first_m;
+ vm_prot_t vmfp_access;
+} vm_fault_state_t;
+
+zone_t vm_fault_state_zone = 0;
+
+int vm_object_absent_max = 50;
+
+int vm_fault_debug = 0;
+
+boolean_t vm_fault_dirty_handling = FALSE;
+boolean_t vm_fault_interruptible = TRUE;
+
+boolean_t software_reference_bits = TRUE;
+
+#if MACH_KDB
+extern struct db_watchpoint *db_watchpoint_list;
+#endif MACH_KDB
+
+/*
+ * Routine: vm_fault_init
+ * Purpose:
+ * Initialize our private data structures.
+ */
+void vm_fault_init()
+{
+ vm_fault_state_zone = zinit(sizeof(vm_fault_state_t),
+ THREAD_MAX * sizeof(vm_fault_state_t),
+ sizeof(vm_fault_state_t),
+ 0, "vm fault state");
+}
+
+/*
+ * Routine: vm_fault_cleanup
+ * Purpose:
+ * Clean up the result of vm_fault_page.
+ * Results:
+ * The paging reference for "object" is released.
+ * "object" is unlocked.
+ * If "top_page" is not null, "top_page" is
+ * freed and the paging reference for the object
+ * containing it is released.
+ *
+ * In/out conditions:
+ * "object" must be locked.
+ */
+void
+vm_fault_cleanup(object, top_page)
+ register vm_object_t object;
+ register vm_page_t top_page;
+{
+ vm_object_paging_end(object);
+ vm_object_unlock(object);
+
+ if (top_page != VM_PAGE_NULL) {
+ object = top_page->object;
+ vm_object_lock(object);
+ VM_PAGE_FREE(top_page);
+ vm_object_paging_end(object);
+ vm_object_unlock(object);
+ }
+}
+
+
+#if MACH_PCSAMPLE
+/*
+ * Do PC sampling on current thread, assuming
+ * that it is the thread taking this page fault.
+ *
+ * Must check for THREAD_NULL, since faults
+ * can occur before threads are running.
+ */
+
+#define vm_stat_sample(flavor) \
+ MACRO_BEGIN \
+ thread_t _thread_ = current_thread(); \
+ \
+ if (_thread_ != THREAD_NULL) \
+ take_pc_sample_macro(_thread_, (flavor)); \
+ MACRO_END
+
+#else
+#define vm_stat_sample(x)
+#endif /* MACH_PCSAMPLE */
+
+
+
+/*
+ * Routine: vm_fault_page
+ * Purpose:
+ * Find the resident page for the virtual memory
+ * specified by the given virtual memory object
+ * and offset.
+ * Additional arguments:
+ * The required permissions for the page is given
+ * in "fault_type". Desired permissions are included
+ * in "protection".
+ *
+ * If the desired page is known to be resident (for
+ * example, because it was previously wired down), asserting
+ * the "unwiring" parameter will speed the search.
+ *
+ * If the operation can be interrupted (by thread_abort
+ * or thread_terminate), then the "interruptible"
+ * parameter should be asserted.
+ *
+ * Results:
+ * The page containing the proper data is returned
+ * in "result_page".
+ *
+ * In/out conditions:
+ * The source object must be locked and referenced,
+ * and must donate one paging reference. The reference
+ * is not affected. The paging reference and lock are
+ * consumed.
+ *
+ * If the call succeeds, the object in which "result_page"
+ * resides is left locked and holding a paging reference.
+ * If this is not the original object, a busy page in the
+ * original object is returned in "top_page", to prevent other
+ * callers from pursuing this same data, along with a paging
+ * reference for the original object. The "top_page" should
+ * be destroyed when this guarantee is no longer required.
+ * The "result_page" is also left busy. It is not removed
+ * from the pageout queues.
+ */
+vm_fault_return_t vm_fault_page(first_object, first_offset,
+ fault_type, must_be_resident, interruptible,
+ protection,
+ result_page, top_page,
+ resume, continuation)
+ /* Arguments: */
+ vm_object_t first_object; /* Object to begin search */
+ vm_offset_t first_offset; /* Offset into object */
+ vm_prot_t fault_type; /* What access is requested */
+ boolean_t must_be_resident;/* Must page be resident? */
+ boolean_t interruptible; /* May fault be interrupted? */
+ /* Modifies in place: */
+ vm_prot_t *protection; /* Protection for mapping */
+ /* Returns: */
+ vm_page_t *result_page; /* Page found, if successful */
+ vm_page_t *top_page; /* Page in top object, if
+ * not result_page.
+ */
+ /* More arguments: */
+ boolean_t resume; /* We are restarting. */
+ void (*continuation)(); /* Continuation for blocking. */
+{
+ register
+ vm_page_t m;
+ register
+ vm_object_t object;
+ register
+ vm_offset_t offset;
+ vm_page_t first_m;
+ vm_object_t next_object;
+ vm_object_t copy_object;
+ boolean_t look_for_page;
+ vm_prot_t access_required;
+
+#ifdef CONTINUATIONS
+ if (resume) {
+ register vm_fault_state_t *state =
+ (vm_fault_state_t *) current_thread()->ith_other;
+
+ if (state->vmfp_backoff)
+ goto after_block_and_backoff;
+
+ object = state->vmfp_object;
+ offset = state->vmfp_offset;
+ first_m = state->vmfp_first_m;
+ access_required = state->vmfp_access;
+ goto after_thread_block;
+ }
+#else /* not CONTINUATIONS */
+ assert(continuation == 0);
+ assert(!resume);
+#endif /* not CONTINUATIONS */
+
+ vm_stat_sample(SAMPLED_PC_VM_FAULTS_ANY);
+ vm_stat.faults++; /* needs lock XXX */
+
+/*
+ * Recovery actions
+ */
+#define RELEASE_PAGE(m) \
+ MACRO_BEGIN \
+ PAGE_WAKEUP_DONE(m); \
+ vm_page_lock_queues(); \
+ if (!m->active && !m->inactive) \
+ vm_page_activate(m); \
+ vm_page_unlock_queues(); \
+ MACRO_END
+
+ if (vm_fault_dirty_handling
+#if MACH_KDB
+ /*
+ * If there are watchpoints set, then
+ * we don't want to give away write permission
+ * on a read fault. Make the task write fault,
+ * so that the watchpoint code notices the access.
+ */
+ || db_watchpoint_list
+#endif MACH_KDB
+ ) {
+ /*
+ * If we aren't asking for write permission,
+ * then don't give it away. We're using write
+ * faults to set the dirty bit.
+ */
+ if (!(fault_type & VM_PROT_WRITE))
+ *protection &= ~VM_PROT_WRITE;
+ }
+
+ if (!vm_fault_interruptible)
+ interruptible = FALSE;
+
+ /*
+ * INVARIANTS (through entire routine):
+ *
+ * 1) At all times, we must either have the object
+ * lock or a busy page in some object to prevent
+ * some other thread from trying to bring in
+ * the same page.
+ *
+ * Note that we cannot hold any locks during the
+ * pager access or when waiting for memory, so
+ * we use a busy page then.
+ *
+ * Note also that we aren't as concerned about more than
+ * one thread attempting to memory_object_data_unlock
+ * the same page at once, so we don't hold the page
+ * as busy then, but do record the highest unlock
+ * value so far. [Unlock requests may also be delivered
+ * out of order.]
+ *
+ * 2) To prevent another thread from racing us down the
+ * shadow chain and entering a new page in the top
+ * object before we do, we must keep a busy page in
+ * the top object while following the shadow chain.
+ *
+ * 3) We must increment paging_in_progress on any object
+ * for which we have a busy page, to prevent
+ * vm_object_collapse from removing the busy page
+ * without our noticing.
+ *
+ * 4) We leave busy pages on the pageout queues.
+ * If the pageout daemon comes across a busy page,
+ * it will remove the page from the pageout queues.
+ */
+
+ /*
+ * Search for the page at object/offset.
+ */
+
+ object = first_object;
+ offset = first_offset;
+ first_m = VM_PAGE_NULL;
+ access_required = fault_type;
+
+ /*
+ * See whether this page is resident
+ */
+
+ while (TRUE) {
+ m = vm_page_lookup(object, offset);
+ if (m != VM_PAGE_NULL) {
+ /*
+ * If the page is being brought in,
+ * wait for it and then retry.
+ *
+ * A possible optimization: if the page
+ * is known to be resident, we can ignore
+ * pages that are absent (regardless of
+ * whether they're busy).
+ */
+
+ if (m->busy) {
+ kern_return_t wait_result;
+
+ PAGE_ASSERT_WAIT(m, interruptible);
+ vm_object_unlock(object);
+#ifdef CONTINUATIONS
+ if (continuation != (void (*)()) 0) {
+ register vm_fault_state_t *state =
+ (vm_fault_state_t *) current_thread()->ith_other;
+
+ /*
+ * Save variables in case
+ * thread_block discards
+ * our kernel stack.
+ */
+
+ state->vmfp_backoff = FALSE;
+ state->vmfp_object = object;
+ state->vmfp_offset = offset;
+ state->vmfp_first_m = first_m;
+ state->vmfp_access =
+ access_required;
+ state->vmf_prot = *protection;
+
+ counter(c_vm_fault_page_block_busy_user++);
+ thread_block(continuation);
+ } else
+#endif /* CONTINUATIONS */
+ {
+ counter(c_vm_fault_page_block_busy_kernel++);
+ thread_block((void (*)()) 0);
+ }
+ after_thread_block:
+ wait_result = current_thread()->wait_result;
+ vm_object_lock(object);
+ if (wait_result != THREAD_AWAKENED) {
+ vm_fault_cleanup(object, first_m);
+ if (wait_result == THREAD_RESTART)
+ return(VM_FAULT_RETRY);
+ else
+ return(VM_FAULT_INTERRUPTED);
+ }
+ continue;
+ }
+
+ /*
+ * If the page is in error, give up now.
+ */
+
+ if (m->error) {
+ VM_PAGE_FREE(m);
+ vm_fault_cleanup(object, first_m);
+ return(VM_FAULT_MEMORY_ERROR);
+ }
+
+ /*
+ * If the page isn't busy, but is absent,
+ * then it was deemed "unavailable".
+ */
+
+ if (m->absent) {
+ /*
+ * Remove the non-existent page (unless it's
+ * in the top object) and move on down to the
+ * next object (if there is one).
+ */
+
+ offset += object->shadow_offset;
+ access_required = VM_PROT_READ;
+ next_object = object->shadow;
+ if (next_object == VM_OBJECT_NULL) {
+ vm_page_t real_m;
+
+ assert(!must_be_resident);
+
+ /*
+ * Absent page at bottom of shadow
+ * chain; zero fill the page we left
+ * busy in the first object, and flush
+ * the absent page. But first we
+ * need to allocate a real page.
+ */
+
+ real_m = vm_page_grab();
+ if (real_m == VM_PAGE_NULL) {
+ vm_fault_cleanup(object, first_m);
+ return(VM_FAULT_MEMORY_SHORTAGE);
+ }
+
+ if (object != first_object) {
+ VM_PAGE_FREE(m);
+ vm_object_paging_end(object);
+ vm_object_unlock(object);
+ object = first_object;
+ offset = first_offset;
+ m = first_m;
+ first_m = VM_PAGE_NULL;
+ vm_object_lock(object);
+ }
+
+ VM_PAGE_FREE(m);
+ assert(real_m->busy);
+ vm_page_lock_queues();
+ vm_page_insert(real_m, object, offset);
+ vm_page_unlock_queues();
+ m = real_m;
+
+ /*
+ * Drop the lock while zero filling
+ * page. Then break because this
+ * is the page we wanted. Checking
+ * the page lock is a waste of time;
+ * this page was either absent or
+ * newly allocated -- in both cases
+ * it can't be page locked by a pager.
+ */
+ vm_object_unlock(object);
+
+ vm_page_zero_fill(m);
+
+ vm_stat_sample(SAMPLED_PC_VM_ZFILL_FAULTS);
+
+ vm_stat.zero_fill_count++;
+ vm_object_lock(object);
+ pmap_clear_modify(m->phys_addr);
+ break;
+ } else {
+ if (must_be_resident) {
+ vm_object_paging_end(object);
+ } else if (object != first_object) {
+ vm_object_paging_end(object);
+ VM_PAGE_FREE(m);
+ } else {
+ first_m = m;
+ m->absent = FALSE;
+ vm_object_absent_release(object);
+ m->busy = TRUE;
+
+ vm_page_lock_queues();
+ VM_PAGE_QUEUES_REMOVE(m);
+ vm_page_unlock_queues();
+ }
+ vm_object_lock(next_object);
+ vm_object_unlock(object);
+ object = next_object;
+ vm_object_paging_begin(object);
+ continue;
+ }
+ }
+
+ /*
+ * If the desired access to this page has
+ * been locked out, request that it be unlocked.
+ */
+
+ if (access_required & m->page_lock) {
+ if ((access_required & m->unlock_request) != access_required) {
+ vm_prot_t new_unlock_request;
+ kern_return_t rc;
+
+ if (!object->pager_ready) {
+ vm_object_assert_wait(object,
+ VM_OBJECT_EVENT_PAGER_READY,
+ interruptible);
+ goto block_and_backoff;
+ }
+
+ new_unlock_request = m->unlock_request =
+ (access_required | m->unlock_request);
+ vm_object_unlock(object);
+ if ((rc = memory_object_data_unlock(
+ object->pager,
+ object->pager_request,
+ offset + object->paging_offset,
+ PAGE_SIZE,
+ new_unlock_request))
+ != KERN_SUCCESS) {
+ printf("vm_fault: memory_object_data_unlock failed\n");
+ vm_object_lock(object);
+ vm_fault_cleanup(object, first_m);
+ return((rc == MACH_SEND_INTERRUPTED) ?
+ VM_FAULT_INTERRUPTED :
+ VM_FAULT_MEMORY_ERROR);
+ }
+ vm_object_lock(object);
+ continue;
+ }
+
+ PAGE_ASSERT_WAIT(m, interruptible);
+ goto block_and_backoff;
+ }
+
+ /*
+ * We mark the page busy and leave it on
+ * the pageout queues. If the pageout
+ * deamon comes across it, then it will
+ * remove the page.
+ */
+
+ if (!software_reference_bits) {
+ vm_page_lock_queues();
+ if (m->inactive) {
+ vm_stat_sample(SAMPLED_PC_VM_REACTIVATION_FAULTS);
+ vm_stat.reactivations++;
+ }
+
+ VM_PAGE_QUEUES_REMOVE(m);
+ vm_page_unlock_queues();
+ }
+
+ assert(!m->busy);
+ m->busy = TRUE;
+ assert(!m->absent);
+ break;
+ }
+
+ look_for_page =
+ (object->pager_created)
+#if MACH_PAGEMAP
+ && (vm_external_state_get(object->existence_info, offset + object->paging_offset) !=
+ VM_EXTERNAL_STATE_ABSENT)
+#endif MACH_PAGEMAP
+ ;
+
+ if ((look_for_page || (object == first_object))
+ && !must_be_resident) {
+ /*
+ * Allocate a new page for this object/offset
+ * pair.
+ */
+
+ m = vm_page_grab_fictitious();
+ if (m == VM_PAGE_NULL) {
+ vm_fault_cleanup(object, first_m);
+ return(VM_FAULT_FICTITIOUS_SHORTAGE);
+ }
+
+ vm_page_lock_queues();
+ vm_page_insert(m, object, offset);
+ vm_page_unlock_queues();
+ }
+
+ if (look_for_page && !must_be_resident) {
+ kern_return_t rc;
+
+ /*
+ * If the memory manager is not ready, we
+ * cannot make requests.
+ */
+ if (!object->pager_ready) {
+ vm_object_assert_wait(object,
+ VM_OBJECT_EVENT_PAGER_READY,
+ interruptible);
+ VM_PAGE_FREE(m);
+ goto block_and_backoff;
+ }
+
+ if (object->internal) {
+ /*
+ * Requests to the default pager
+ * must reserve a real page in advance,
+ * because the pager's data-provided
+ * won't block for pages.
+ */
+
+ if (m->fictitious && !vm_page_convert(m)) {
+ VM_PAGE_FREE(m);
+ vm_fault_cleanup(object, first_m);
+ return(VM_FAULT_MEMORY_SHORTAGE);
+ }
+ } else if (object->absent_count >
+ vm_object_absent_max) {
+ /*
+ * If there are too many outstanding page
+ * requests pending on this object, we
+ * wait for them to be resolved now.
+ */
+
+ vm_object_absent_assert_wait(object, interruptible);
+ VM_PAGE_FREE(m);
+ goto block_and_backoff;
+ }
+
+ /*
+ * Indicate that the page is waiting for data
+ * from the memory manager.
+ */
+
+ m->absent = TRUE;
+ object->absent_count++;
+
+ /*
+ * We have a busy page, so we can
+ * release the object lock.
+ */
+ vm_object_unlock(object);
+
+ /*
+ * Call the memory manager to retrieve the data.
+ */
+
+ vm_stat.pageins++;
+ vm_stat_sample(SAMPLED_PC_VM_PAGEIN_FAULTS);
+
+ if ((rc = memory_object_data_request(object->pager,
+ object->pager_request,
+ m->offset + object->paging_offset,
+ PAGE_SIZE, access_required)) != KERN_SUCCESS) {
+ if (rc != MACH_SEND_INTERRUPTED)
+ printf("%s(0x%x, 0x%x, 0x%x, 0x%x, 0x%x) failed, %d\n",
+ "memory_object_data_request",
+ object->pager,
+ object->pager_request,
+ m->offset + object->paging_offset,
+ PAGE_SIZE, access_required, rc);
+ /*
+ * Don't want to leave a busy page around,
+ * but the data request may have blocked,
+ * so check if it's still there and busy.
+ */
+ vm_object_lock(object);
+ if (m == vm_page_lookup(object,offset) &&
+ m->absent && m->busy)
+ VM_PAGE_FREE(m);
+ vm_fault_cleanup(object, first_m);
+ return((rc == MACH_SEND_INTERRUPTED) ?
+ VM_FAULT_INTERRUPTED :
+ VM_FAULT_MEMORY_ERROR);
+ }
+
+ /*
+ * Retry with same object/offset, since new data may
+ * be in a different page (i.e., m is meaningless at
+ * this point).
+ */
+ vm_object_lock(object);
+ continue;
+ }
+
+ /*
+ * For the XP system, the only case in which we get here is if
+ * object has no pager (or unwiring). If the pager doesn't
+ * have the page this is handled in the m->absent case above
+ * (and if you change things here you should look above).
+ */
+ if (object == first_object)
+ first_m = m;
+ else
+ {
+ assert(m == VM_PAGE_NULL);
+ }
+
+ /*
+ * Move on to the next object. Lock the next
+ * object before unlocking the current one.
+ */
+ access_required = VM_PROT_READ;
+
+ offset += object->shadow_offset;
+ next_object = object->shadow;
+ if (next_object == VM_OBJECT_NULL) {
+ assert(!must_be_resident);
+
+ /*
+ * If there's no object left, fill the page
+ * in the top object with zeros. But first we
+ * need to allocate a real page.
+ */
+
+ if (object != first_object) {
+ vm_object_paging_end(object);
+ vm_object_unlock(object);
+
+ object = first_object;
+ offset = first_offset;
+ vm_object_lock(object);
+ }
+
+ m = first_m;
+ assert(m->object == object);
+ first_m = VM_PAGE_NULL;
+
+ if (m->fictitious && !vm_page_convert(m)) {
+ VM_PAGE_FREE(m);
+ vm_fault_cleanup(object, VM_PAGE_NULL);
+ return(VM_FAULT_MEMORY_SHORTAGE);
+ }
+
+ vm_object_unlock(object);
+ vm_page_zero_fill(m);
+ vm_stat_sample(SAMPLED_PC_VM_ZFILL_FAULTS);
+ vm_stat.zero_fill_count++;
+ vm_object_lock(object);
+ pmap_clear_modify(m->phys_addr);
+ break;
+ }
+ else {
+ vm_object_lock(next_object);
+ if ((object != first_object) || must_be_resident)
+ vm_object_paging_end(object);
+ vm_object_unlock(object);
+ object = next_object;
+ vm_object_paging_begin(object);
+ }
+ }
+
+ /*
+ * PAGE HAS BEEN FOUND.
+ *
+ * This page (m) is:
+ * busy, so that we can play with it;
+ * not absent, so that nobody else will fill it;
+ * possibly eligible for pageout;
+ *
+ * The top-level page (first_m) is:
+ * VM_PAGE_NULL if the page was found in the
+ * top-level object;
+ * busy, not absent, and ineligible for pageout.
+ *
+ * The current object (object) is locked. A paging
+ * reference is held for the current and top-level
+ * objects.
+ */
+
+#if EXTRA_ASSERTIONS
+ assert(m->busy && !m->absent);
+ assert((first_m == VM_PAGE_NULL) ||
+ (first_m->busy && !first_m->absent &&
+ !first_m->active && !first_m->inactive));
+#endif EXTRA_ASSERTIONS
+
+ /*
+ * If the page is being written, but isn't
+ * already owned by the top-level object,
+ * we have to copy it into a new page owned
+ * by the top-level object.
+ */
+
+ if (object != first_object) {
+ /*
+ * We only really need to copy if we
+ * want to write it.
+ */
+
+ if (fault_type & VM_PROT_WRITE) {
+ vm_page_t copy_m;
+
+ assert(!must_be_resident);
+
+ /*
+ * If we try to collapse first_object at this
+ * point, we may deadlock when we try to get
+ * the lock on an intermediate object (since we
+ * have the bottom object locked). We can't
+ * unlock the bottom object, because the page
+ * we found may move (by collapse) if we do.
+ *
+ * Instead, we first copy the page. Then, when
+ * we have no more use for the bottom object,
+ * we unlock it and try to collapse.
+ *
+ * Note that we copy the page even if we didn't
+ * need to... that's the breaks.
+ */
+
+ /*
+ * Allocate a page for the copy
+ */
+ copy_m = vm_page_grab();
+ if (copy_m == VM_PAGE_NULL) {
+ RELEASE_PAGE(m);
+ vm_fault_cleanup(object, first_m);
+ return(VM_FAULT_MEMORY_SHORTAGE);
+ }
+
+ vm_object_unlock(object);
+ vm_page_copy(m, copy_m);
+ vm_object_lock(object);
+
+ /*
+ * If another map is truly sharing this
+ * page with us, we have to flush all
+ * uses of the original page, since we
+ * can't distinguish those which want the
+ * original from those which need the
+ * new copy.
+ *
+ * XXXO If we know that only one map has
+ * access to this page, then we could
+ * avoid the pmap_page_protect() call.
+ */
+
+ vm_page_lock_queues();
+ vm_page_deactivate(m);
+ pmap_page_protect(m->phys_addr, VM_PROT_NONE);
+ vm_page_unlock_queues();
+
+ /*
+ * We no longer need the old page or object.
+ */
+
+ PAGE_WAKEUP_DONE(m);
+ vm_object_paging_end(object);
+ vm_object_unlock(object);
+
+ vm_stat.cow_faults++;
+ vm_stat_sample(SAMPLED_PC_VM_COW_FAULTS);
+ object = first_object;
+ offset = first_offset;
+
+ vm_object_lock(object);
+ VM_PAGE_FREE(first_m);
+ first_m = VM_PAGE_NULL;
+ assert(copy_m->busy);
+ vm_page_lock_queues();
+ vm_page_insert(copy_m, object, offset);
+ vm_page_unlock_queues();
+ m = copy_m;
+
+ /*
+ * Now that we've gotten the copy out of the
+ * way, let's try to collapse the top object.
+ * But we have to play ugly games with
+ * paging_in_progress to do that...
+ */
+
+ vm_object_paging_end(object);
+ vm_object_collapse(object);
+ vm_object_paging_begin(object);
+ }
+ else {
+ *protection &= (~VM_PROT_WRITE);
+ }
+ }
+
+ /*
+ * Now check whether the page needs to be pushed into the
+ * copy object. The use of asymmetric copy on write for
+ * shared temporary objects means that we may do two copies to
+ * satisfy the fault; one above to get the page from a
+ * shadowed object, and one here to push it into the copy.
+ */
+
+ while ((copy_object = first_object->copy) != VM_OBJECT_NULL) {
+ vm_offset_t copy_offset;
+ vm_page_t copy_m;
+
+ /*
+ * If the page is being written, but hasn't been
+ * copied to the copy-object, we have to copy it there.
+ */
+
+ if ((fault_type & VM_PROT_WRITE) == 0) {
+ *protection &= ~VM_PROT_WRITE;
+ break;
+ }
+
+ /*
+ * If the page was guaranteed to be resident,
+ * we must have already performed the copy.
+ */
+
+ if (must_be_resident)
+ break;
+
+ /*
+ * Try to get the lock on the copy_object.
+ */
+ if (!vm_object_lock_try(copy_object)) {
+ vm_object_unlock(object);
+
+ simple_lock_pause(); /* wait a bit */
+
+ vm_object_lock(object);
+ continue;
+ }
+
+ /*
+ * Make another reference to the copy-object,
+ * to keep it from disappearing during the
+ * copy.
+ */
+ assert(copy_object->ref_count > 0);
+ copy_object->ref_count++;
+
+ /*
+ * Does the page exist in the copy?
+ */
+ copy_offset = first_offset - copy_object->shadow_offset;
+ copy_m = vm_page_lookup(copy_object, copy_offset);
+ if (copy_m != VM_PAGE_NULL) {
+ if (copy_m->busy) {
+ /*
+ * If the page is being brought
+ * in, wait for it and then retry.
+ */
+ PAGE_ASSERT_WAIT(copy_m, interruptible);
+ RELEASE_PAGE(m);
+ copy_object->ref_count--;
+ assert(copy_object->ref_count > 0);
+ vm_object_unlock(copy_object);
+ goto block_and_backoff;
+ }
+ }
+ else {
+ /*
+ * Allocate a page for the copy
+ */
+ copy_m = vm_page_alloc(copy_object, copy_offset);
+ if (copy_m == VM_PAGE_NULL) {
+ RELEASE_PAGE(m);
+ copy_object->ref_count--;
+ assert(copy_object->ref_count > 0);
+ vm_object_unlock(copy_object);
+ vm_fault_cleanup(object, first_m);
+ return(VM_FAULT_MEMORY_SHORTAGE);
+ }
+
+ /*
+ * Must copy page into copy-object.
+ */
+
+ vm_page_copy(m, copy_m);
+
+ /*
+ * If the old page was in use by any users
+ * of the copy-object, it must be removed
+ * from all pmaps. (We can't know which
+ * pmaps use it.)
+ */
+
+ vm_page_lock_queues();
+ pmap_page_protect(m->phys_addr, VM_PROT_NONE);
+ copy_m->dirty = TRUE;
+ vm_page_unlock_queues();
+
+ /*
+ * If there's a pager, then immediately
+ * page out this page, using the "initialize"
+ * option. Else, we use the copy.
+ */
+
+ if (!copy_object->pager_created) {
+ vm_page_lock_queues();
+ vm_page_activate(copy_m);
+ vm_page_unlock_queues();
+ PAGE_WAKEUP_DONE(copy_m);
+ } else {
+ /*
+ * The page is already ready for pageout:
+ * not on pageout queues and busy.
+ * Unlock everything except the
+ * copy_object itself.
+ */
+
+ vm_object_unlock(object);
+
+ /*
+ * Write the page to the copy-object,
+ * flushing it from the kernel.
+ */
+
+ vm_pageout_page(copy_m, TRUE, TRUE);
+
+ /*
+ * Since the pageout may have
+ * temporarily dropped the
+ * copy_object's lock, we
+ * check whether we'll have
+ * to deallocate the hard way.
+ */
+
+ if ((copy_object->shadow != object) ||
+ (copy_object->ref_count == 1)) {
+ vm_object_unlock(copy_object);
+ vm_object_deallocate(copy_object);
+ vm_object_lock(object);
+ continue;
+ }
+
+ /*
+ * Pick back up the old object's
+ * lock. [It is safe to do so,
+ * since it must be deeper in the
+ * object tree.]
+ */
+
+ vm_object_lock(object);
+ }
+
+ /*
+ * Because we're pushing a page upward
+ * in the object tree, we must restart
+ * any faults that are waiting here.
+ * [Note that this is an expansion of
+ * PAGE_WAKEUP that uses the THREAD_RESTART
+ * wait result]. Can't turn off the page's
+ * busy bit because we're not done with it.
+ */
+
+ if (m->wanted) {
+ m->wanted = FALSE;
+ thread_wakeup_with_result((event_t) m,
+ THREAD_RESTART);
+ }
+ }
+
+ /*
+ * The reference count on copy_object must be
+ * at least 2: one for our extra reference,
+ * and at least one from the outside world
+ * (we checked that when we last locked
+ * copy_object).
+ */
+ copy_object->ref_count--;
+ assert(copy_object->ref_count > 0);
+ vm_object_unlock(copy_object);
+
+ break;
+ }
+
+ *result_page = m;
+ *top_page = first_m;
+
+ /*
+ * If the page can be written, assume that it will be.
+ * [Earlier, we restrict the permission to allow write
+ * access only if the fault so required, so we don't
+ * mark read-only data as dirty.]
+ */
+
+ if (vm_fault_dirty_handling && (*protection & VM_PROT_WRITE))
+ m->dirty = TRUE;
+
+ return(VM_FAULT_SUCCESS);
+
+ block_and_backoff:
+ vm_fault_cleanup(object, first_m);
+
+#ifdef CONTINUATIONS
+ if (continuation != (void (*)()) 0) {
+ register vm_fault_state_t *state =
+ (vm_fault_state_t *) current_thread()->ith_other;
+
+ /*
+ * Save variables in case we must restart.
+ */
+
+ state->vmfp_backoff = TRUE;
+ state->vmf_prot = *protection;
+
+ counter(c_vm_fault_page_block_backoff_user++);
+ thread_block(continuation);
+ } else
+#endif /* CONTINUATIONS */
+ {
+ counter(c_vm_fault_page_block_backoff_kernel++);
+ thread_block((void (*)()) 0);
+ }
+ after_block_and_backoff:
+ if (current_thread()->wait_result == THREAD_AWAKENED)
+ return VM_FAULT_RETRY;
+ else
+ return VM_FAULT_INTERRUPTED;
+
+#undef RELEASE_PAGE
+}
+
+/*
+ * Routine: vm_fault
+ * Purpose:
+ * Handle page faults, including pseudo-faults
+ * used to change the wiring status of pages.
+ * Returns:
+ * If an explicit (expression) continuation is supplied,
+ * then we call the continuation instead of returning.
+ * Implementation:
+ * Explicit continuations make this a little icky,
+ * because it hasn't been rewritten to embrace CPS.
+ * Instead, we have resume arguments for vm_fault and
+ * vm_fault_page, to let continue the fault computation.
+ *
+ * vm_fault and vm_fault_page save mucho state
+ * in the moral equivalent of a closure. The state
+ * structure is allocated when first entering vm_fault
+ * and deallocated when leaving vm_fault.
+ */
+
+#ifdef CONTINUATIONS
+void
+vm_fault_continue()
+{
+ register vm_fault_state_t *state =
+ (vm_fault_state_t *) current_thread()->ith_other;
+
+ (void) vm_fault(state->vmf_map,
+ state->vmf_vaddr,
+ state->vmf_fault_type,
+ state->vmf_change_wiring,
+ TRUE, state->vmf_continuation);
+ /*NOTREACHED*/
+}
+#endif /* CONTINUATIONS */
+
+kern_return_t vm_fault(map, vaddr, fault_type, change_wiring,
+ resume, continuation)
+ vm_map_t map;
+ vm_offset_t vaddr;
+ vm_prot_t fault_type;
+ boolean_t change_wiring;
+ boolean_t resume;
+ void (*continuation)();
+{
+ vm_map_version_t version; /* Map version for verificiation */
+ boolean_t wired; /* Should mapping be wired down? */
+ vm_object_t object; /* Top-level object */
+ vm_offset_t offset; /* Top-level offset */
+ vm_prot_t prot; /* Protection for mapping */
+ vm_object_t old_copy_object; /* Saved copy object */
+ vm_page_t result_page; /* Result of vm_fault_page */
+ vm_page_t top_page; /* Placeholder page */
+ kern_return_t kr;
+
+ register
+ vm_page_t m; /* Fast access to result_page */
+
+#ifdef CONTINUATIONS
+ if (resume) {
+ register vm_fault_state_t *state =
+ (vm_fault_state_t *) current_thread()->ith_other;
+
+ /*
+ * Retrieve cached variables and
+ * continue vm_fault_page.
+ */
+
+ object = state->vmf_object;
+ if (object == VM_OBJECT_NULL)
+ goto RetryFault;
+ version = state->vmf_version;
+ wired = state->vmf_wired;
+ offset = state->vmf_offset;
+ prot = state->vmf_prot;
+
+ kr = vm_fault_page(object, offset, fault_type,
+ (change_wiring && !wired), !change_wiring,
+ &prot, &result_page, &top_page,
+ TRUE, vm_fault_continue);
+ goto after_vm_fault_page;
+ }
+
+ if (continuation != (void (*)()) 0) {
+ /*
+ * We will probably need to save state.
+ */
+
+ char * state;
+
+ /*
+ * if this assignment stmt is written as
+ * 'active_threads[cpu_number()] = zalloc()',
+ * cpu_number may be evaluated before zalloc;
+ * if zalloc blocks, cpu_number will be wrong
+ */
+
+ state = (char *) zalloc(vm_fault_state_zone);
+ current_thread()->ith_other = state;
+
+ }
+#else /* not CONTINUATIONS */
+ assert(continuation == 0);
+ assert(!resume);
+#endif /* not CONTINUATIONS */
+
+ RetryFault: ;
+
+ /*
+ * Find the backing store object and offset into
+ * it to begin the search.
+ */
+
+ if ((kr = vm_map_lookup(&map, vaddr, fault_type, &version,
+ &object, &offset,
+ &prot, &wired)) != KERN_SUCCESS) {
+ goto done;
+ }
+
+ /*
+ * If the page is wired, we must fault for the current protection
+ * value, to avoid further faults.
+ */
+
+ if (wired)
+ fault_type = prot;
+
+ /*
+ * Make a reference to this object to
+ * prevent its disposal while we are messing with
+ * it. Once we have the reference, the map is free
+ * to be diddled. Since objects reference their
+ * shadows (and copies), they will stay around as well.
+ */
+
+ assert(object->ref_count > 0);
+ object->ref_count++;
+ vm_object_paging_begin(object);
+
+#ifdef CONTINUATIONS
+ if (continuation != (void (*)()) 0) {
+ register vm_fault_state_t *state =
+ (vm_fault_state_t *) current_thread()->ith_other;
+
+ /*
+ * Save variables, in case vm_fault_page discards
+ * our kernel stack and we have to restart.
+ */
+
+ state->vmf_map = map;
+ state->vmf_vaddr = vaddr;
+ state->vmf_fault_type = fault_type;
+ state->vmf_change_wiring = change_wiring;
+ state->vmf_continuation = continuation;
+
+ state->vmf_version = version;
+ state->vmf_wired = wired;
+ state->vmf_object = object;
+ state->vmf_offset = offset;
+ state->vmf_prot = prot;
+
+ kr = vm_fault_page(object, offset, fault_type,
+ (change_wiring && !wired), !change_wiring,
+ &prot, &result_page, &top_page,
+ FALSE, vm_fault_continue);
+ } else
+#endif /* CONTINUATIONS */
+ {
+ kr = vm_fault_page(object, offset, fault_type,
+ (change_wiring && !wired), !change_wiring,
+ &prot, &result_page, &top_page,
+ FALSE, (void (*)()) 0);
+ }
+ after_vm_fault_page:
+
+ /*
+ * If we didn't succeed, lose the object reference immediately.
+ */
+
+ if (kr != VM_FAULT_SUCCESS)
+ vm_object_deallocate(object);
+
+ /*
+ * See why we failed, and take corrective action.
+ */
+
+ switch (kr) {
+ case VM_FAULT_SUCCESS:
+ break;
+ case VM_FAULT_RETRY:
+ goto RetryFault;
+ case VM_FAULT_INTERRUPTED:
+ kr = KERN_SUCCESS;
+ goto done;
+ case VM_FAULT_MEMORY_SHORTAGE:
+#ifdef CONTINUATIONS
+ if (continuation != (void (*)()) 0) {
+ register vm_fault_state_t *state =
+ (vm_fault_state_t *) current_thread()->ith_other;
+
+ /*
+ * Save variables in case VM_PAGE_WAIT
+ * discards our kernel stack.
+ */
+
+ state->vmf_map = map;
+ state->vmf_vaddr = vaddr;
+ state->vmf_fault_type = fault_type;
+ state->vmf_change_wiring = change_wiring;
+ state->vmf_continuation = continuation;
+ state->vmf_object = VM_OBJECT_NULL;
+
+ VM_PAGE_WAIT(vm_fault_continue);
+ } else
+#endif /* CONTINUATIONS */
+ VM_PAGE_WAIT((void (*)()) 0);
+ goto RetryFault;
+ case VM_FAULT_FICTITIOUS_SHORTAGE:
+ vm_page_more_fictitious();
+ goto RetryFault;
+ case VM_FAULT_MEMORY_ERROR:
+ kr = KERN_MEMORY_ERROR;
+ goto done;
+ }
+
+ m = result_page;
+
+ assert((change_wiring && !wired) ?
+ (top_page == VM_PAGE_NULL) :
+ ((top_page == VM_PAGE_NULL) == (m->object == object)));
+
+ /*
+ * How to clean up the result of vm_fault_page. This
+ * happens whether the mapping is entered or not.
+ */
+
+#define UNLOCK_AND_DEALLOCATE \
+ MACRO_BEGIN \
+ vm_fault_cleanup(m->object, top_page); \
+ vm_object_deallocate(object); \
+ MACRO_END
+
+ /*
+ * What to do with the resulting page from vm_fault_page
+ * if it doesn't get entered into the physical map:
+ */
+
+#define RELEASE_PAGE(m) \
+ MACRO_BEGIN \
+ PAGE_WAKEUP_DONE(m); \
+ vm_page_lock_queues(); \
+ if (!m->active && !m->inactive) \
+ vm_page_activate(m); \
+ vm_page_unlock_queues(); \
+ MACRO_END
+
+ /*
+ * We must verify that the maps have not changed
+ * since our last lookup.
+ */
+
+ old_copy_object = m->object->copy;
+
+ vm_object_unlock(m->object);
+ while (!vm_map_verify(map, &version)) {
+ vm_object_t retry_object;
+ vm_offset_t retry_offset;
+ vm_prot_t retry_prot;
+
+ /*
+ * To avoid trying to write_lock the map while another
+ * thread has it read_locked (in vm_map_pageable), we
+ * do not try for write permission. If the page is
+ * still writable, we will get write permission. If it
+ * is not, or has been marked needs_copy, we enter the
+ * mapping without write permission, and will merely
+ * take another fault.
+ */
+ kr = vm_map_lookup(&map, vaddr,
+ fault_type & ~VM_PROT_WRITE, &version,
+ &retry_object, &retry_offset, &retry_prot,
+ &wired);
+
+ if (kr != KERN_SUCCESS) {
+ vm_object_lock(m->object);
+ RELEASE_PAGE(m);
+ UNLOCK_AND_DEALLOCATE;
+ goto done;
+ }
+
+ vm_object_unlock(retry_object);
+ vm_object_lock(m->object);
+
+ if ((retry_object != object) ||
+ (retry_offset != offset)) {
+ RELEASE_PAGE(m);
+ UNLOCK_AND_DEALLOCATE;
+ goto RetryFault;
+ }
+
+ /*
+ * Check whether the protection has changed or the object
+ * has been copied while we left the map unlocked.
+ */
+ prot &= retry_prot;
+ vm_object_unlock(m->object);
+ }
+ vm_object_lock(m->object);
+
+ /*
+ * If the copy object changed while the top-level object
+ * was unlocked, then we must take away write permission.
+ */
+
+ if (m->object->copy != old_copy_object)
+ prot &= ~VM_PROT_WRITE;
+
+ /*
+ * If we want to wire down this page, but no longer have
+ * adequate permissions, we must start all over.
+ */
+
+ if (wired && (prot != fault_type)) {
+ vm_map_verify_done(map, &version);
+ RELEASE_PAGE(m);
+ UNLOCK_AND_DEALLOCATE;
+ goto RetryFault;
+ }
+
+ /*
+ * It's critically important that a wired-down page be faulted
+ * only once in each map for which it is wired.
+ */
+
+ vm_object_unlock(m->object);
+
+ /*
+ * Put this page into the physical map.
+ * We had to do the unlock above because pmap_enter
+ * may cause other faults. The page may be on
+ * the pageout queues. If the pageout daemon comes
+ * across the page, it will remove it from the queues.
+ */
+
+ PMAP_ENTER(map->pmap, vaddr, m, prot, wired);
+
+ /*
+ * If the page is not wired down and isn't already
+ * on a pageout queue, then put it where the
+ * pageout daemon can find it.
+ */
+ vm_object_lock(m->object);
+ vm_page_lock_queues();
+ if (change_wiring) {
+ if (wired)
+ vm_page_wire(m);
+ else
+ vm_page_unwire(m);
+ } else if (software_reference_bits) {
+ if (!m->active && !m->inactive)
+ vm_page_activate(m);
+ m->reference = TRUE;
+ } else {
+ vm_page_activate(m);
+ }
+ vm_page_unlock_queues();
+
+ /*
+ * Unlock everything, and return
+ */
+
+ vm_map_verify_done(map, &version);
+ PAGE_WAKEUP_DONE(m);
+ kr = KERN_SUCCESS;
+ UNLOCK_AND_DEALLOCATE;
+
+#undef UNLOCK_AND_DEALLOCATE
+#undef RELEASE_PAGE
+
+ done:
+#ifdef CONTINUATIONS
+ if (continuation != (void (*)()) 0) {
+ register vm_fault_state_t *state =
+ (vm_fault_state_t *) current_thread()->ith_other;
+
+ zfree(vm_fault_state_zone, (vm_offset_t) state);
+ (*continuation)(kr);
+ /*NOTREACHED*/
+ }
+#endif /* CONTINUATIONS */
+
+ return(kr);
+}
+
+kern_return_t vm_fault_wire_fast();
+
+/*
+ * vm_fault_wire:
+ *
+ * Wire down a range of virtual addresses in a map.
+ */
+void vm_fault_wire(map, entry)
+ vm_map_t map;
+ vm_map_entry_t entry;
+{
+
+ register vm_offset_t va;
+ register pmap_t pmap;
+ register vm_offset_t end_addr = entry->vme_end;
+
+ pmap = vm_map_pmap(map);
+
+ /*
+ * Inform the physical mapping system that the
+ * range of addresses may not fault, so that
+ * page tables and such can be locked down as well.
+ */
+
+ pmap_pageable(pmap, entry->vme_start, end_addr, FALSE);
+
+ /*
+ * We simulate a fault to get the page and enter it
+ * in the physical map.
+ */
+
+ for (va = entry->vme_start; va < end_addr; va += PAGE_SIZE) {
+ if (vm_fault_wire_fast(map, va, entry) != KERN_SUCCESS)
+ (void) vm_fault(map, va, VM_PROT_NONE, TRUE,
+ FALSE, (void (*)()) 0);
+ }
+}
+
+/*
+ * vm_fault_unwire:
+ *
+ * Unwire a range of virtual addresses in a map.
+ */
+void vm_fault_unwire(map, entry)
+ vm_map_t map;
+ vm_map_entry_t entry;
+{
+ register vm_offset_t va;
+ register pmap_t pmap;
+ register vm_offset_t end_addr = entry->vme_end;
+ vm_object_t object;
+
+ pmap = vm_map_pmap(map);
+
+ object = (entry->is_sub_map)
+ ? VM_OBJECT_NULL : entry->object.vm_object;
+
+ /*
+ * Since the pages are wired down, we must be able to
+ * get their mappings from the physical map system.
+ */
+
+ for (va = entry->vme_start; va < end_addr; va += PAGE_SIZE) {
+ pmap_change_wiring(pmap, va, FALSE);
+
+ if (object == VM_OBJECT_NULL) {
+ vm_map_lock_set_recursive(map);
+ (void) vm_fault(map, va, VM_PROT_NONE, TRUE,
+ FALSE, (void (*)()) 0);
+ vm_map_lock_clear_recursive(map);
+ } else {
+ vm_prot_t prot;
+ vm_page_t result_page;
+ vm_page_t top_page;
+ vm_fault_return_t result;
+
+ do {
+ prot = VM_PROT_NONE;
+
+ vm_object_lock(object);
+ vm_object_paging_begin(object);
+ result = vm_fault_page(object,
+ entry->offset +
+ (va - entry->vme_start),
+ VM_PROT_NONE, TRUE,
+ FALSE, &prot,
+ &result_page,
+ &top_page,
+ FALSE, (void (*)()) 0);
+ } while (result == VM_FAULT_RETRY);
+
+ if (result != VM_FAULT_SUCCESS)
+ panic("vm_fault_unwire: failure");
+
+ vm_page_lock_queues();
+ vm_page_unwire(result_page);
+ vm_page_unlock_queues();
+ PAGE_WAKEUP_DONE(result_page);
+
+ vm_fault_cleanup(result_page->object, top_page);
+ }
+ }
+
+ /*
+ * Inform the physical mapping system that the range
+ * of addresses may fault, so that page tables and
+ * such may be unwired themselves.
+ */
+
+ pmap_pageable(pmap, entry->vme_start, end_addr, TRUE);
+}
+
+/*
+ * vm_fault_wire_fast:
+ *
+ * Handle common case of a wire down page fault at the given address.
+ * If successful, the page is inserted into the associated physical map.
+ * The map entry is passed in to avoid the overhead of a map lookup.
+ *
+ * NOTE: the given address should be truncated to the
+ * proper page address.
+ *
+ * KERN_SUCCESS is returned if the page fault is handled; otherwise,
+ * a standard error specifying why the fault is fatal is returned.
+ *
+ * The map in question must be referenced, and remains so.
+ * Caller has a read lock on the map.
+ *
+ * This is a stripped version of vm_fault() for wiring pages. Anything
+ * other than the common case will return KERN_FAILURE, and the caller
+ * is expected to call vm_fault().
+ */
+kern_return_t vm_fault_wire_fast(map, va, entry)
+ vm_map_t map;
+ vm_offset_t va;
+ vm_map_entry_t entry;
+{
+ vm_object_t object;
+ vm_offset_t offset;
+ register vm_page_t m;
+ vm_prot_t prot;
+
+ vm_stat.faults++; /* needs lock XXX */
+/*
+ * Recovery actions
+ */
+
+#undef RELEASE_PAGE
+#define RELEASE_PAGE(m) { \
+ PAGE_WAKEUP_DONE(m); \
+ vm_page_lock_queues(); \
+ vm_page_unwire(m); \
+ vm_page_unlock_queues(); \
+}
+
+
+#undef UNLOCK_THINGS
+#define UNLOCK_THINGS { \
+ object->paging_in_progress--; \
+ vm_object_unlock(object); \
+}
+
+#undef UNLOCK_AND_DEALLOCATE
+#define UNLOCK_AND_DEALLOCATE { \
+ UNLOCK_THINGS; \
+ vm_object_deallocate(object); \
+}
+/*
+ * Give up and have caller do things the hard way.
+ */
+
+#define GIVE_UP { \
+ UNLOCK_AND_DEALLOCATE; \
+ return(KERN_FAILURE); \
+}
+
+
+ /*
+ * If this entry is not directly to a vm_object, bail out.
+ */
+ if (entry->is_sub_map)
+ return(KERN_FAILURE);
+
+ /*
+ * Find the backing store object and offset into it.
+ */
+
+ object = entry->object.vm_object;
+ offset = (va - entry->vme_start) + entry->offset;
+ prot = entry->protection;
+
+ /*
+ * Make a reference to this object to prevent its
+ * disposal while we are messing with it.
+ */
+
+ vm_object_lock(object);
+ assert(object->ref_count > 0);
+ object->ref_count++;
+ object->paging_in_progress++;
+
+ /*
+ * INVARIANTS (through entire routine):
+ *
+ * 1) At all times, we must either have the object
+ * lock or a busy page in some object to prevent
+ * some other thread from trying to bring in
+ * the same page.
+ *
+ * 2) Once we have a busy page, we must remove it from
+ * the pageout queues, so that the pageout daemon
+ * will not grab it away.
+ *
+ */
+
+ /*
+ * Look for page in top-level object. If it's not there or
+ * there's something going on, give up.
+ */
+ m = vm_page_lookup(object, offset);
+ if ((m == VM_PAGE_NULL) || (m->error) ||
+ (m->busy) || (m->absent) || (prot & m->page_lock)) {
+ GIVE_UP;
+ }
+
+ /*
+ * Wire the page down now. All bail outs beyond this
+ * point must unwire the page.
+ */
+
+ vm_page_lock_queues();
+ vm_page_wire(m);
+ vm_page_unlock_queues();
+
+ /*
+ * Mark page busy for other threads.
+ */
+ assert(!m->busy);
+ m->busy = TRUE;
+ assert(!m->absent);
+
+ /*
+ * Give up if the page is being written and there's a copy object
+ */
+ if ((object->copy != VM_OBJECT_NULL) && (prot & VM_PROT_WRITE)) {
+ RELEASE_PAGE(m);
+ GIVE_UP;
+ }
+
+ /*
+ * Put this page into the physical map.
+ * We have to unlock the object because pmap_enter
+ * may cause other faults.
+ */
+ vm_object_unlock(object);
+
+ PMAP_ENTER(map->pmap, va, m, prot, TRUE);
+
+ /*
+ * Must relock object so that paging_in_progress can be cleared.
+ */
+ vm_object_lock(object);
+
+ /*
+ * Unlock everything, and return
+ */
+
+ PAGE_WAKEUP_DONE(m);
+ UNLOCK_AND_DEALLOCATE;
+
+ return(KERN_SUCCESS);
+
+}
+
+/*
+ * Routine: vm_fault_copy_cleanup
+ * Purpose:
+ * Release a page used by vm_fault_copy.
+ */
+
+void vm_fault_copy_cleanup(page, top_page)
+ vm_page_t page;
+ vm_page_t top_page;
+{
+ vm_object_t object = page->object;
+
+ vm_object_lock(object);
+ PAGE_WAKEUP_DONE(page);
+ vm_page_lock_queues();
+ if (!page->active && !page->inactive)
+ vm_page_activate(page);
+ vm_page_unlock_queues();
+ vm_fault_cleanup(object, top_page);
+}
+
+/*
+ * Routine: vm_fault_copy
+ *
+ * Purpose:
+ * Copy pages from one virtual memory object to another --
+ * neither the source nor destination pages need be resident.
+ *
+ * Before actually copying a page, the version associated with
+ * the destination address map wil be verified.
+ *
+ * In/out conditions:
+ * The caller must hold a reference, but not a lock, to
+ * each of the source and destination objects and to the
+ * destination map.
+ *
+ * Results:
+ * Returns KERN_SUCCESS if no errors were encountered in
+ * reading or writing the data. Returns KERN_INTERRUPTED if
+ * the operation was interrupted (only possible if the
+ * "interruptible" argument is asserted). Other return values
+ * indicate a permanent error in copying the data.
+ *
+ * The actual amount of data copied will be returned in the
+ * "copy_size" argument. In the event that the destination map
+ * verification failed, this amount may be less than the amount
+ * requested.
+ */
+kern_return_t vm_fault_copy(
+ src_object,
+ src_offset,
+ src_size,
+ dst_object,
+ dst_offset,
+ dst_map,
+ dst_version,
+ interruptible
+ )
+ vm_object_t src_object;
+ vm_offset_t src_offset;
+ vm_size_t *src_size; /* INOUT */
+ vm_object_t dst_object;
+ vm_offset_t dst_offset;
+ vm_map_t dst_map;
+ vm_map_version_t *dst_version;
+ boolean_t interruptible;
+{
+ vm_page_t result_page;
+ vm_prot_t prot;
+
+ vm_page_t src_page;
+ vm_page_t src_top_page;
+
+ vm_page_t dst_page;
+ vm_page_t dst_top_page;
+
+ vm_size_t amount_done;
+ vm_object_t old_copy_object;
+
+#define RETURN(x) \
+ MACRO_BEGIN \
+ *src_size = amount_done; \
+ MACRO_RETURN(x); \
+ MACRO_END
+
+ amount_done = 0;
+ do { /* while (amount_done != *src_size) */
+
+ RetrySourceFault: ;
+
+ if (src_object == VM_OBJECT_NULL) {
+ /*
+ * No source object. We will just
+ * zero-fill the page in dst_object.
+ */
+
+ src_page = VM_PAGE_NULL;
+ } else {
+ prot = VM_PROT_READ;
+
+ vm_object_lock(src_object);
+ vm_object_paging_begin(src_object);
+
+ switch (vm_fault_page(src_object, src_offset,
+ VM_PROT_READ, FALSE, interruptible,
+ &prot, &result_page, &src_top_page,
+ FALSE, (void (*)()) 0)) {
+
+ case VM_FAULT_SUCCESS:
+ break;
+ case VM_FAULT_RETRY:
+ goto RetrySourceFault;
+ case VM_FAULT_INTERRUPTED:
+ RETURN(MACH_SEND_INTERRUPTED);
+ case VM_FAULT_MEMORY_SHORTAGE:
+ VM_PAGE_WAIT((void (*)()) 0);
+ goto RetrySourceFault;
+ case VM_FAULT_FICTITIOUS_SHORTAGE:
+ vm_page_more_fictitious();
+ goto RetrySourceFault;
+ case VM_FAULT_MEMORY_ERROR:
+ return(KERN_MEMORY_ERROR);
+ }
+
+ src_page = result_page;
+
+ assert((src_top_page == VM_PAGE_NULL) ==
+ (src_page->object == src_object));
+
+ assert ((prot & VM_PROT_READ) != VM_PROT_NONE);
+
+ vm_object_unlock(src_page->object);
+ }
+
+ RetryDestinationFault: ;
+
+ prot = VM_PROT_WRITE;
+
+ vm_object_lock(dst_object);
+ vm_object_paging_begin(dst_object);
+
+ switch (vm_fault_page(dst_object, dst_offset, VM_PROT_WRITE,
+ FALSE, FALSE /* interruptible */,
+ &prot, &result_page, &dst_top_page,
+ FALSE, (void (*)()) 0)) {
+
+ case VM_FAULT_SUCCESS:
+ break;
+ case VM_FAULT_RETRY:
+ goto RetryDestinationFault;
+ case VM_FAULT_INTERRUPTED:
+ if (src_page != VM_PAGE_NULL)
+ vm_fault_copy_cleanup(src_page,
+ src_top_page);
+ RETURN(MACH_SEND_INTERRUPTED);
+ case VM_FAULT_MEMORY_SHORTAGE:
+ VM_PAGE_WAIT((void (*)()) 0);
+ goto RetryDestinationFault;
+ case VM_FAULT_FICTITIOUS_SHORTAGE:
+ vm_page_more_fictitious();
+ goto RetryDestinationFault;
+ case VM_FAULT_MEMORY_ERROR:
+ if (src_page != VM_PAGE_NULL)
+ vm_fault_copy_cleanup(src_page,
+ src_top_page);
+ return(KERN_MEMORY_ERROR);
+ }
+ assert ((prot & VM_PROT_WRITE) != VM_PROT_NONE);
+
+ dst_page = result_page;
+
+ old_copy_object = dst_page->object->copy;
+
+ vm_object_unlock(dst_page->object);
+
+ if (!vm_map_verify(dst_map, dst_version)) {
+
+ BailOut: ;
+
+ if (src_page != VM_PAGE_NULL)
+ vm_fault_copy_cleanup(src_page, src_top_page);
+ vm_fault_copy_cleanup(dst_page, dst_top_page);
+ break;
+ }
+
+
+ vm_object_lock(dst_page->object);
+ if (dst_page->object->copy != old_copy_object) {
+ vm_object_unlock(dst_page->object);
+ vm_map_verify_done(dst_map, dst_version);
+ goto BailOut;
+ }
+ vm_object_unlock(dst_page->object);
+
+ /*
+ * Copy the page, and note that it is dirty
+ * immediately.
+ */
+
+ if (src_page == VM_PAGE_NULL)
+ vm_page_zero_fill(dst_page);
+ else
+ vm_page_copy(src_page, dst_page);
+ dst_page->dirty = TRUE;
+
+ /*
+ * Unlock everything, and return
+ */
+
+ vm_map_verify_done(dst_map, dst_version);
+
+ if (src_page != VM_PAGE_NULL)
+ vm_fault_copy_cleanup(src_page, src_top_page);
+ vm_fault_copy_cleanup(dst_page, dst_top_page);
+
+ amount_done += PAGE_SIZE;
+ src_offset += PAGE_SIZE;
+ dst_offset += PAGE_SIZE;
+
+ } while (amount_done != *src_size);
+
+ RETURN(KERN_SUCCESS);
+#undef RETURN
+
+ /*NOTREACHED*/
+}
+
+
+
+
+
+#ifdef notdef
+
+/*
+ * Routine: vm_fault_page_overwrite
+ *
+ * Description:
+ * A form of vm_fault_page that assumes that the
+ * resulting page will be overwritten in its entirety,
+ * making it unnecessary to obtain the correct *contents*
+ * of the page.
+ *
+ * Implementation:
+ * XXX Untested. Also unused. Eventually, this technology
+ * could be used in vm_fault_copy() to advantage.
+ */
+vm_fault_return_t vm_fault_page_overwrite(dst_object, dst_offset, result_page)
+ register
+ vm_object_t dst_object;
+ vm_offset_t dst_offset;
+ vm_page_t *result_page; /* OUT */
+{
+ register
+ vm_page_t dst_page;
+
+#define interruptible FALSE /* XXX */
+
+ while (TRUE) {
+ /*
+ * Look for a page at this offset
+ */
+
+ while ((dst_page = vm_page_lookup(dst_object, dst_offset))
+ == VM_PAGE_NULL) {
+ /*
+ * No page, no problem... just allocate one.
+ */
+
+ dst_page = vm_page_alloc(dst_object, dst_offset);
+ if (dst_page == VM_PAGE_NULL) {
+ vm_object_unlock(dst_object);
+ VM_PAGE_WAIT((void (*)()) 0);
+ vm_object_lock(dst_object);
+ continue;
+ }
+
+ /*
+ * Pretend that the memory manager
+ * write-protected the page.
+ *
+ * Note that we will be asking for write
+ * permission without asking for the data
+ * first.
+ */
+
+ dst_page->overwriting = TRUE;
+ dst_page->page_lock = VM_PROT_WRITE;
+ dst_page->absent = TRUE;
+ dst_object->absent_count++;
+
+ break;
+
+ /*
+ * When we bail out, we might have to throw
+ * away the page created here.
+ */
+
+#define DISCARD_PAGE \
+ MACRO_BEGIN \
+ vm_object_lock(dst_object); \
+ dst_page = vm_page_lookup(dst_object, dst_offset); \
+ if ((dst_page != VM_PAGE_NULL) && dst_page->overwriting) \
+ VM_PAGE_FREE(dst_page); \
+ vm_object_unlock(dst_object); \
+ MACRO_END
+ }
+
+ /*
+ * If the page is write-protected...
+ */
+
+ if (dst_page->page_lock & VM_PROT_WRITE) {
+ /*
+ * ... and an unlock request hasn't been sent
+ */
+
+ if ( ! (dst_page->unlock_request & VM_PROT_WRITE)) {
+ vm_prot_t u;
+ kern_return_t rc;
+
+ /*
+ * ... then send one now.
+ */
+
+ if (!dst_object->pager_ready) {
+ vm_object_assert_wait(dst_object,
+ VM_OBJECT_EVENT_PAGER_READY,
+ interruptible);
+ vm_object_unlock(dst_object);
+ thread_block((void (*)()) 0);
+ if (current_thread()->wait_result !=
+ THREAD_AWAKENED) {
+ DISCARD_PAGE;
+ return(VM_FAULT_INTERRUPTED);
+ }
+ continue;
+ }
+
+ u = dst_page->unlock_request |= VM_PROT_WRITE;
+ vm_object_unlock(dst_object);
+
+ if ((rc = memory_object_data_unlock(
+ dst_object->pager,
+ dst_object->pager_request,
+ dst_offset + dst_object->paging_offset,
+ PAGE_SIZE,
+ u)) != KERN_SUCCESS) {
+ printf("vm_object_overwrite: memory_object_data_unlock failed\n");
+ DISCARD_PAGE;
+ return((rc == MACH_SEND_INTERRUPTED) ?
+ VM_FAULT_INTERRUPTED :
+ VM_FAULT_MEMORY_ERROR);
+ }
+ vm_object_lock(dst_object);
+ continue;
+ }
+
+ /* ... fall through to wait below */
+ } else {
+ /*
+ * If the page isn't being used for other
+ * purposes, then we're done.
+ */
+ if ( ! (dst_page->busy || dst_page->absent || dst_page->error) )
+ break;
+ }
+
+ PAGE_ASSERT_WAIT(dst_page, interruptible);
+ vm_object_unlock(dst_object);
+ thread_block((void (*)()) 0);
+ if (current_thread()->wait_result != THREAD_AWAKENED) {
+ DISCARD_PAGE;
+ return(VM_FAULT_INTERRUPTED);
+ }
+ }
+
+ *result_page = dst_page;
+ return(VM_FAULT_SUCCESS);
+
+#undef interruptible
+#undef DISCARD_PAGE
+}
+
+#endif notdef
diff --git a/vm/vm_fault.h b/vm/vm_fault.h
new file mode 100644
index 00000000..eee39994
--- /dev/null
+++ b/vm/vm_fault.h
@@ -0,0 +1,64 @@
+/*
+ * Mach Operating System
+ * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
+ * All Rights Reserved.
+ *
+ * Permission to use, copy, modify and distribute this software and its
+ * documentation is hereby granted, provided that both the copyright
+ * notice and this permission notice appear in all copies of the
+ * software, derivative works or modified versions, and any portions
+ * thereof, and that both notices appear in supporting documentation.
+ *
+ * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
+ * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
+ * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
+ *
+ * Carnegie Mellon requests users of this software to return to
+ *
+ * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
+ * School of Computer Science
+ * Carnegie Mellon University
+ * Pittsburgh PA 15213-3890
+ *
+ * any improvements or extensions that they make and grant Carnegie Mellon
+ * the rights to redistribute these changes.
+ */
+/*
+ * File: vm/vm_fault.h
+ *
+ * Page fault handling module declarations.
+ */
+
+#ifndef _VM_VM_FAULT_H_
+#define _VM_VM_FAULT_H_
+
+#include <mach/kern_return.h>
+
+/*
+ * Page fault handling based on vm_object only.
+ */
+
+typedef kern_return_t vm_fault_return_t;
+#define VM_FAULT_SUCCESS 0
+#define VM_FAULT_RETRY 1
+#define VM_FAULT_INTERRUPTED 2
+#define VM_FAULT_MEMORY_SHORTAGE 3
+#define VM_FAULT_FICTITIOUS_SHORTAGE 4
+#define VM_FAULT_MEMORY_ERROR 5
+
+extern void vm_fault_init();
+extern vm_fault_return_t vm_fault_page();
+
+extern void vm_fault_cleanup();
+/*
+ * Page fault handling based on vm_map (or entries therein)
+ */
+
+extern kern_return_t vm_fault();
+extern void vm_fault_wire();
+extern void vm_fault_unwire();
+
+extern kern_return_t vm_fault_copy(); /* Copy pages from
+ * one object to another
+ */
+#endif _VM_VM_FAULT_H_
diff --git a/vm/vm_init.c b/vm/vm_init.c
new file mode 100644
index 00000000..b76b11b6
--- /dev/null
+++ b/vm/vm_init.c
@@ -0,0 +1,84 @@
+/*
+ * Mach Operating System
+ * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University.
+ * Copyright (c) 1993,1994 The University of Utah and
+ * the Computer Systems Laboratory (CSL).
+ * All rights reserved.
+ *
+ * Permission to use, copy, modify and distribute this software and its
+ * documentation is hereby granted, provided that both the copyright
+ * notice and this permission notice appear in all copies of the
+ * software, derivative works or modified versions, and any portions
+ * thereof, and that both notices appear in supporting documentation.
+ *
+ * CARNEGIE MELLON, THE UNIVERSITY OF UTAH AND CSL ALLOW FREE USE OF
+ * THIS SOFTWARE IN ITS "AS IS" CONDITION, AND DISCLAIM ANY LIABILITY
+ * OF ANY KIND FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF
+ * THIS SOFTWARE.
+ *
+ * Carnegie Mellon requests users of this software to return to
+ *
+ * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
+ * School of Computer Science
+ * Carnegie Mellon University
+ * Pittsburgh PA 15213-3890
+ *
+ * any improvements or extensions that they make and grant Carnegie Mellon
+ * the rights to redistribute these changes.
+ */
+/*
+ * File: vm/vm_init.c
+ * Author: Avadis Tevanian, Jr., Michael Wayne Young
+ * Date: 1985
+ *
+ * Initialize the Virtual Memory subsystem.
+ */
+
+#include <mach/machine/vm_types.h>
+#include <kern/zalloc.h>
+#include <kern/kalloc.h>
+#include <vm/vm_object.h>
+#include <vm/vm_map.h>
+#include <vm/vm_page.h>
+#include <vm/vm_kern.h>
+#include <vm/memory_object.h>
+
+
+
+/*
+ * vm_mem_bootstrap initializes the virtual memory system.
+ * This is done only by the first cpu up.
+ */
+
+void vm_mem_bootstrap()
+{
+ vm_offset_t start, end;
+
+ /*
+ * Initializes resident memory structures.
+ * From here on, all physical memory is accounted for,
+ * and we use only virtual addresses.
+ */
+
+ vm_page_bootstrap(&start, &end);
+
+ /*
+ * Initialize other VM packages
+ */
+
+ zone_bootstrap();
+ vm_object_bootstrap();
+ vm_map_init();
+ kmem_init(start, end);
+ pmap_init();
+ zone_init();
+ kalloc_init();
+ vm_fault_init();
+ vm_page_module_init();
+ memory_manager_default_init();
+}
+
+void vm_mem_init()
+{
+ vm_object_init();
+}
diff --git a/vm/vm_kern.c b/vm/vm_kern.c
new file mode 100644
index 00000000..eb1e0795
--- /dev/null
+++ b/vm/vm_kern.c
@@ -0,0 +1,1072 @@
+/*
+ * Mach Operating System
+ * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University.
+ * Copyright (c) 1993,1994 The University of Utah and
+ * the Computer Systems Laboratory (CSL).
+ * All rights reserved.
+ *
+ * Permission to use, copy, modify and distribute this software and its
+ * documentation is hereby granted, provided that both the copyright
+ * notice and this permission notice appear in all copies of the
+ * software, derivative works or modified versions, and any portions
+ * thereof, and that both notices appear in supporting documentation.
+ *
+ * CARNEGIE MELLON, THE UNIVERSITY OF UTAH AND CSL ALLOW FREE USE OF
+ * THIS SOFTWARE IN ITS "AS IS" CONDITION, AND DISCLAIM ANY LIABILITY
+ * OF ANY KIND FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF
+ * THIS SOFTWARE.
+ *
+ * Carnegie Mellon requests users of this software to return to
+ *
+ * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
+ * School of Computer Science
+ * Carnegie Mellon University
+ * Pittsburgh PA 15213-3890
+ *
+ * any improvements or extensions that they make and grant Carnegie Mellon
+ * the rights to redistribute these changes.
+ */
+/*
+ * File: vm/vm_kern.c
+ * Author: Avadis Tevanian, Jr., Michael Wayne Young
+ * Date: 1985
+ *
+ * Kernel memory management.
+ */
+
+#include <mach/kern_return.h>
+#include "vm_param.h"
+#include <kern/assert.h>
+#include <kern/lock.h>
+#include <kern/thread.h>
+#include <vm/vm_fault.h>
+#include <vm/vm_kern.h>
+#include <vm/vm_map.h>
+#include <vm/vm_object.h>
+#include <vm/vm_page.h>
+#include <vm/vm_pageout.h>
+
+
+
+/*
+ * Variables exported by this module.
+ */
+
+vm_map_t kernel_map;
+vm_map_t kernel_pageable_map;
+
+extern void kmem_alloc_pages();
+extern void kmem_remap_pages();
+
+/*
+ * projected_buffer_allocate
+ *
+ * Allocate a wired-down buffer shared between kernel and user task.
+ * Fresh, zero-filled memory is allocated.
+ * If persistence is false, this buffer can only be deallocated from
+ * user task using projected_buffer_deallocate, and deallocation
+ * from user task also deallocates the buffer from the kernel map.
+ * projected_buffer_collect is called from vm_map_deallocate to
+ * automatically deallocate projected buffers on task_deallocate.
+ * Sharing with more than one user task is achieved by using
+ * projected_buffer_map for the second and subsequent tasks.
+ * The user is precluded from manipulating the VM entry of this buffer
+ * (i.e. changing protection, inheritance or machine attributes).
+ */
+
+kern_return_t
+projected_buffer_allocate(map, size, persistence, kernel_p,
+ user_p, protection, inheritance)
+ vm_map_t map;
+ vm_size_t size;
+ int persistence;
+ vm_offset_t *kernel_p;
+ vm_offset_t *user_p;
+ vm_prot_t protection;
+ vm_inherit_t inheritance; /*Currently only VM_INHERIT_NONE supported*/
+{
+ vm_object_t object;
+ vm_map_entry_t u_entry, k_entry;
+ vm_offset_t addr;
+ vm_size_t r_size;
+ kern_return_t kr;
+
+ if (map == VM_MAP_NULL || map == kernel_map)
+ return(KERN_INVALID_ARGUMENT);
+
+ /*
+ * Allocate a new object.
+ */
+
+ size = round_page(size);
+ object = vm_object_allocate(size);
+
+ vm_map_lock(kernel_map);
+ kr = vm_map_find_entry(kernel_map, &addr, size, (vm_offset_t) 0,
+ VM_OBJECT_NULL, &k_entry);
+ if (kr != KERN_SUCCESS) {
+ vm_map_unlock(kernel_map);
+ vm_object_deallocate(object);
+ return kr;
+ }
+
+ k_entry->object.vm_object = object;
+ if (!persistence)
+ k_entry->projected_on = (vm_map_entry_t) -1;
+ /*Mark entry so as to automatically deallocate it when
+ last corresponding user entry is deallocated*/
+ vm_map_unlock(kernel_map);
+ *kernel_p = addr;
+
+ vm_map_lock(map);
+ kr = vm_map_find_entry(map, &addr, size, (vm_offset_t) 0,
+ VM_OBJECT_NULL, &u_entry);
+ if (kr != KERN_SUCCESS) {
+ vm_map_unlock(map);
+ vm_map_lock(kernel_map);
+ vm_map_entry_delete(kernel_map, k_entry);
+ vm_map_unlock(kernel_map);
+ vm_object_deallocate(object);
+ return kr;
+ }
+
+ u_entry->object.vm_object = object;
+ vm_object_reference(object);
+ u_entry->projected_on = k_entry;
+ /*Creates coupling with kernel mapping of the buffer, and
+ also guarantees that user cannot directly manipulate
+ buffer VM entry*/
+ u_entry->protection = protection;
+ u_entry->max_protection = protection;
+ u_entry->inheritance = inheritance;
+ vm_map_unlock(map);
+ *user_p = addr;
+
+ /*
+ * Allocate wired-down memory in the object,
+ * and enter it in the kernel pmap.
+ */
+ kmem_alloc_pages(object, 0,
+ *kernel_p, *kernel_p + size,
+ VM_PROT_READ | VM_PROT_WRITE);
+ bzero(*kernel_p, size); /*Zero fill*/
+
+ /* Set up physical mappings for user pmap */
+
+ pmap_pageable(map->pmap, *user_p, *user_p + size, FALSE);
+ for (r_size = 0; r_size < size; r_size += PAGE_SIZE) {
+ addr = pmap_extract(kernel_pmap, *kernel_p + r_size);
+ pmap_enter(map->pmap, *user_p + r_size, addr,
+ protection, TRUE);
+ }
+
+ return(KERN_SUCCESS);
+}
+
+
+/*
+ * projected_buffer_map
+ *
+ * Map an area of kernel memory onto a task's address space.
+ * No new memory is allocated; the area must previously exist in the
+ * kernel memory map.
+ */
+
+kern_return_t
+projected_buffer_map(map, kernel_addr, size, user_p, protection, inheritance)
+ vm_map_t map;
+ vm_offset_t kernel_addr;
+ vm_size_t size;
+ vm_offset_t *user_p;
+ vm_prot_t protection;
+ vm_inherit_t inheritance; /*Currently only VM_INHERIT_NONE supported*/
+{
+ vm_object_t object;
+ vm_map_entry_t u_entry, k_entry;
+ vm_offset_t physical_addr, user_addr;
+ vm_size_t r_size;
+ kern_return_t kr;
+
+ /*
+ * Find entry in kernel map
+ */
+
+ size = round_page(size);
+ if (map == VM_MAP_NULL || map == kernel_map ||
+ !vm_map_lookup_entry(kernel_map, kernel_addr, &k_entry) ||
+ kernel_addr + size > k_entry->vme_end)
+ return(KERN_INVALID_ARGUMENT);
+
+
+ /*
+ * Create entry in user task
+ */
+
+ vm_map_lock(map);
+ kr = vm_map_find_entry(map, &user_addr, size, (vm_offset_t) 0,
+ VM_OBJECT_NULL, &u_entry);
+ if (kr != KERN_SUCCESS) {
+ vm_map_unlock(map);
+ return kr;
+ }
+
+ u_entry->object.vm_object = k_entry->object.vm_object;
+ vm_object_reference(k_entry->object.vm_object);
+ u_entry->offset = kernel_addr - k_entry->vme_start + k_entry->offset;
+ u_entry->projected_on = k_entry;
+ /*Creates coupling with kernel mapping of the buffer, and
+ also guarantees that user cannot directly manipulate
+ buffer VM entry*/
+ u_entry->protection = protection;
+ u_entry->max_protection = protection;
+ u_entry->inheritance = inheritance;
+ u_entry->wired_count = k_entry->wired_count;
+ vm_map_unlock(map);
+ *user_p = user_addr;
+
+ /* Set up physical mappings for user pmap */
+
+ pmap_pageable(map->pmap, user_addr, user_addr + size,
+ !k_entry->wired_count);
+ for (r_size = 0; r_size < size; r_size += PAGE_SIZE) {
+ physical_addr = pmap_extract(kernel_pmap, kernel_addr + r_size);
+ pmap_enter(map->pmap, user_addr + r_size, physical_addr,
+ protection, k_entry->wired_count);
+ }
+
+ return(KERN_SUCCESS);
+}
+
+
+/*
+ * projected_buffer_deallocate
+ *
+ * Unmap projected buffer from task's address space.
+ * May also unmap buffer from kernel map, if buffer is not
+ * persistent and only the kernel reference remains.
+ */
+
+kern_return_t
+projected_buffer_deallocate(map, start, end)
+ vm_map_t map;
+ vm_offset_t start, end;
+{
+ vm_map_entry_t entry, k_entry;
+
+ vm_map_lock(map);
+ if (map == VM_MAP_NULL || map == kernel_map ||
+ !vm_map_lookup_entry(map, start, &entry) ||
+ end > entry->vme_end ||
+ /*Check corresponding kernel entry*/
+ (k_entry = entry->projected_on) == 0) {
+ vm_map_unlock(map);
+ return(KERN_INVALID_ARGUMENT);
+ }
+
+ /*Prepare for deallocation*/
+ if (entry->vme_start < start)
+ _vm_map_clip_start(map, entry, start);
+ if (entry->vme_end > end)
+ _vm_map_clip_end(map, entry, end);
+ if (map->first_free == entry) /*Adjust first_free hint*/
+ map->first_free = entry->vme_prev;
+ entry->projected_on = 0; /*Needed to allow deletion*/
+ entry->wired_count = 0; /*Avoid unwire fault*/
+ vm_map_entry_delete(map, entry);
+ vm_map_unlock(map);
+
+ /*Check if the buffer is not persistent and only the
+ kernel mapping remains, and if so delete it*/
+ vm_map_lock(kernel_map);
+ if (k_entry->projected_on == (vm_map_entry_t) -1 &&
+ k_entry->object.vm_object->ref_count == 1) {
+ if (kernel_map->first_free == k_entry)
+ kernel_map->first_free = k_entry->vme_prev;
+ k_entry->projected_on = 0; /*Allow unwire fault*/
+ vm_map_entry_delete(kernel_map, k_entry);
+ }
+ vm_map_unlock(kernel_map);
+ return(KERN_SUCCESS);
+}
+
+
+/*
+ * projected_buffer_collect
+ *
+ * Unmap all projected buffers from task's address space.
+ */
+
+kern_return_t
+projected_buffer_collect(map)
+ vm_map_t map;
+{
+ vm_map_entry_t entry, next;
+
+ if (map == VM_MAP_NULL || map == kernel_map)
+ return(KERN_INVALID_ARGUMENT);
+
+ for (entry = vm_map_first_entry(map);
+ entry != vm_map_to_entry(map);
+ entry = next) {
+ next = entry->vme_next;
+ if (entry->projected_on != 0)
+ projected_buffer_deallocate(map, entry->vme_start, entry->vme_end);
+ }
+ return(KERN_SUCCESS);
+}
+
+
+/*
+ * projected_buffer_in_range
+ *
+ * Verifies whether a projected buffer exists in the address range
+ * given.
+ */
+
+boolean_t
+projected_buffer_in_range(map, start, end)
+ vm_map_t map;
+ vm_offset_t start, end;
+{
+ vm_map_entry_t entry;
+
+ if (map == VM_MAP_NULL || map == kernel_map)
+ return(FALSE);
+
+ /*Find first entry*/
+ if (!vm_map_lookup_entry(map, start, &entry))
+ entry = entry->vme_next;
+
+ while (entry != vm_map_to_entry(map) && entry->projected_on == 0 &&
+ entry->vme_start <= end) {
+ entry = entry->vme_next;
+ }
+ return(entry != vm_map_to_entry(map) && entry->vme_start <= end);
+}
+
+
+/*
+ * kmem_alloc:
+ *
+ * Allocate wired-down memory in the kernel's address map
+ * or a submap. The memory is not zero-filled.
+ */
+
+kern_return_t
+kmem_alloc(map, addrp, size)
+ vm_map_t map;
+ vm_offset_t *addrp;
+ vm_size_t size;
+{
+ vm_object_t object;
+ vm_map_entry_t entry;
+ vm_offset_t addr;
+ kern_return_t kr;
+
+ /*
+ * Allocate a new object. We must do this before locking
+ * the map, lest we risk deadlock with the default pager:
+ * device_read_alloc uses kmem_alloc,
+ * which tries to allocate an object,
+ * which uses kmem_alloc_wired to get memory,
+ * which blocks for pages.
+ * then the default pager needs to read a block
+ * to process a memory_object_data_write,
+ * and device_read_alloc calls kmem_alloc
+ * and deadlocks on the map lock.
+ */
+
+ size = round_page(size);
+ object = vm_object_allocate(size);
+
+ vm_map_lock(map);
+ kr = vm_map_find_entry(map, &addr, size, (vm_offset_t) 0,
+ VM_OBJECT_NULL, &entry);
+ if (kr != KERN_SUCCESS) {
+ vm_map_unlock(map);
+ vm_object_deallocate(object);
+ return kr;
+ }
+
+ entry->object.vm_object = object;
+ entry->offset = 0;
+
+ /*
+ * Since we have not given out this address yet,
+ * it is safe to unlock the map.
+ */
+ vm_map_unlock(map);
+
+ /*
+ * Allocate wired-down memory in the kernel_object,
+ * for this entry, and enter it in the kernel pmap.
+ */
+ kmem_alloc_pages(object, 0,
+ addr, addr + size,
+ VM_PROT_DEFAULT);
+
+ /*
+ * Return the memory, not zeroed.
+ */
+ *addrp = addr;
+ return KERN_SUCCESS;
+}
+
+/*
+ * kmem_realloc:
+ *
+ * Reallocate wired-down memory in the kernel's address map
+ * or a submap. Newly allocated pages are not zeroed.
+ * This can only be used on regions allocated with kmem_alloc.
+ *
+ * If successful, the pages in the old region are mapped twice.
+ * The old region is unchanged. Use kmem_free to get rid of it.
+ */
+kern_return_t kmem_realloc(map, oldaddr, oldsize, newaddrp, newsize)
+ vm_map_t map;
+ vm_offset_t oldaddr;
+ vm_size_t oldsize;
+ vm_offset_t *newaddrp;
+ vm_size_t newsize;
+{
+ vm_offset_t oldmin, oldmax;
+ vm_offset_t newaddr;
+ vm_object_t object;
+ vm_map_entry_t oldentry, newentry;
+ kern_return_t kr;
+
+ oldmin = trunc_page(oldaddr);
+ oldmax = round_page(oldaddr + oldsize);
+ oldsize = oldmax - oldmin;
+ newsize = round_page(newsize);
+
+ /*
+ * Find space for the new region.
+ */
+
+ vm_map_lock(map);
+ kr = vm_map_find_entry(map, &newaddr, newsize, (vm_offset_t) 0,
+ VM_OBJECT_NULL, &newentry);
+ if (kr != KERN_SUCCESS) {
+ vm_map_unlock(map);
+ return kr;
+ }
+
+ /*
+ * Find the VM object backing the old region.
+ */
+
+ if (!vm_map_lookup_entry(map, oldmin, &oldentry))
+ panic("kmem_realloc");
+ object = oldentry->object.vm_object;
+
+ /*
+ * Increase the size of the object and
+ * fill in the new region.
+ */
+
+ vm_object_reference(object);
+ vm_object_lock(object);
+ if (object->size != oldsize)
+ panic("kmem_realloc");
+ object->size = newsize;
+ vm_object_unlock(object);
+
+ newentry->object.vm_object = object;
+ newentry->offset = 0;
+
+ /*
+ * Since we have not given out this address yet,
+ * it is safe to unlock the map. We are trusting
+ * that nobody will play with either region.
+ */
+
+ vm_map_unlock(map);
+
+ /*
+ * Remap the pages in the old region and
+ * allocate more pages for the new region.
+ */
+
+ kmem_remap_pages(object, 0,
+ newaddr, newaddr + oldsize,
+ VM_PROT_DEFAULT);
+ kmem_alloc_pages(object, oldsize,
+ newaddr + oldsize, newaddr + newsize,
+ VM_PROT_DEFAULT);
+
+ *newaddrp = newaddr;
+ return KERN_SUCCESS;
+}
+
+/*
+ * kmem_alloc_wired:
+ *
+ * Allocate wired-down memory in the kernel's address map
+ * or a submap. The memory is not zero-filled.
+ *
+ * The memory is allocated in the kernel_object.
+ * It may not be copied with vm_map_copy, and
+ * it may not be reallocated with kmem_realloc.
+ */
+
+kern_return_t
+kmem_alloc_wired(map, addrp, size)
+ vm_map_t map;
+ vm_offset_t *addrp;
+ vm_size_t size;
+{
+ vm_map_entry_t entry;
+ vm_offset_t offset;
+ vm_offset_t addr;
+ kern_return_t kr;
+
+ /*
+ * Use the kernel object for wired-down kernel pages.
+ * Assume that no region of the kernel object is
+ * referenced more than once. We want vm_map_find_entry
+ * to extend an existing entry if possible.
+ */
+
+ size = round_page(size);
+ vm_map_lock(map);
+ kr = vm_map_find_entry(map, &addr, size, (vm_offset_t) 0,
+ kernel_object, &entry);
+ if (kr != KERN_SUCCESS) {
+ vm_map_unlock(map);
+ return kr;
+ }
+
+ /*
+ * Since we didn't know where the new region would
+ * start, we couldn't supply the correct offset into
+ * the kernel object. We only initialize the entry
+ * if we aren't extending an existing entry.
+ */
+
+ offset = addr - VM_MIN_KERNEL_ADDRESS;
+
+ if (entry->object.vm_object == VM_OBJECT_NULL) {
+ vm_object_reference(kernel_object);
+
+ entry->object.vm_object = kernel_object;
+ entry->offset = offset;
+ }
+
+ /*
+ * Since we have not given out this address yet,
+ * it is safe to unlock the map.
+ */
+ vm_map_unlock(map);
+
+ /*
+ * Allocate wired-down memory in the kernel_object,
+ * for this entry, and enter it in the kernel pmap.
+ */
+ kmem_alloc_pages(kernel_object, offset,
+ addr, addr + size,
+ VM_PROT_DEFAULT);
+
+ /*
+ * Return the memory, not zeroed.
+ */
+ *addrp = addr;
+ return KERN_SUCCESS;
+}
+
+/*
+ * kmem_alloc_aligned:
+ *
+ * Like kmem_alloc_wired, except that the memory is aligned.
+ * The size should be a power-of-2.
+ */
+
+kern_return_t
+kmem_alloc_aligned(map, addrp, size)
+ vm_map_t map;
+ vm_offset_t *addrp;
+ vm_size_t size;
+{
+ vm_map_entry_t entry;
+ vm_offset_t offset;
+ vm_offset_t addr;
+ kern_return_t kr;
+
+ if ((size & (size - 1)) != 0)
+ panic("kmem_alloc_aligned");
+
+ /*
+ * Use the kernel object for wired-down kernel pages.
+ * Assume that no region of the kernel object is
+ * referenced more than once. We want vm_map_find_entry
+ * to extend an existing entry if possible.
+ */
+
+ size = round_page(size);
+ vm_map_lock(map);
+ kr = vm_map_find_entry(map, &addr, size, size - 1,
+ kernel_object, &entry);
+ if (kr != KERN_SUCCESS) {
+ vm_map_unlock(map);
+ return kr;
+ }
+
+ /*
+ * Since we didn't know where the new region would
+ * start, we couldn't supply the correct offset into
+ * the kernel object. We only initialize the entry
+ * if we aren't extending an existing entry.
+ */
+
+ offset = addr - VM_MIN_KERNEL_ADDRESS;
+
+ if (entry->object.vm_object == VM_OBJECT_NULL) {
+ vm_object_reference(kernel_object);
+
+ entry->object.vm_object = kernel_object;
+ entry->offset = offset;
+ }
+
+ /*
+ * Since we have not given out this address yet,
+ * it is safe to unlock the map.
+ */
+ vm_map_unlock(map);
+
+ /*
+ * Allocate wired-down memory in the kernel_object,
+ * for this entry, and enter it in the kernel pmap.
+ */
+ kmem_alloc_pages(kernel_object, offset,
+ addr, addr + size,
+ VM_PROT_DEFAULT);
+
+ /*
+ * Return the memory, not zeroed.
+ */
+ *addrp = addr;
+ return KERN_SUCCESS;
+}
+
+/*
+ * kmem_alloc_pageable:
+ *
+ * Allocate pageable memory in the kernel's address map.
+ */
+
+kern_return_t
+kmem_alloc_pageable(map, addrp, size)
+ vm_map_t map;
+ vm_offset_t *addrp;
+ vm_size_t size;
+{
+ vm_offset_t addr;
+ kern_return_t kr;
+
+ addr = vm_map_min(map);
+ kr = vm_map_enter(map, &addr, round_page(size),
+ (vm_offset_t) 0, TRUE,
+ VM_OBJECT_NULL, (vm_offset_t) 0, FALSE,
+ VM_PROT_DEFAULT, VM_PROT_ALL, VM_INHERIT_DEFAULT);
+ if (kr != KERN_SUCCESS)
+ return kr;
+
+ *addrp = addr;
+ return KERN_SUCCESS;
+}
+
+/*
+ * kmem_free:
+ *
+ * Release a region of kernel virtual memory allocated
+ * with kmem_alloc, kmem_alloc_wired, or kmem_alloc_pageable,
+ * and return the physical pages associated with that region.
+ */
+
+void
+kmem_free(map, addr, size)
+ vm_map_t map;
+ vm_offset_t addr;
+ vm_size_t size;
+{
+ kern_return_t kr;
+
+ kr = vm_map_remove(map, trunc_page(addr), round_page(addr + size));
+ if (kr != KERN_SUCCESS)
+ panic("kmem_free");
+}
+
+/*
+ * Allocate new wired pages in an object.
+ * The object is assumed to be mapped into the kernel map or
+ * a submap.
+ */
+void
+kmem_alloc_pages(object, offset, start, end, protection)
+ register vm_object_t object;
+ register vm_offset_t offset;
+ register vm_offset_t start, end;
+ vm_prot_t protection;
+{
+ /*
+ * Mark the pmap region as not pageable.
+ */
+ pmap_pageable(kernel_pmap, start, end, FALSE);
+
+ while (start < end) {
+ register vm_page_t mem;
+
+ vm_object_lock(object);
+
+ /*
+ * Allocate a page
+ */
+ while ((mem = vm_page_alloc(object, offset))
+ == VM_PAGE_NULL) {
+ vm_object_unlock(object);
+ VM_PAGE_WAIT((void (*)()) 0);
+ vm_object_lock(object);
+ }
+
+ /*
+ * Wire it down
+ */
+ vm_page_lock_queues();
+ vm_page_wire(mem);
+ vm_page_unlock_queues();
+ vm_object_unlock(object);
+
+ /*
+ * Enter it in the kernel pmap
+ */
+ PMAP_ENTER(kernel_pmap, start, mem,
+ protection, TRUE);
+
+ vm_object_lock(object);
+ PAGE_WAKEUP_DONE(mem);
+ vm_object_unlock(object);
+
+ start += PAGE_SIZE;
+ offset += PAGE_SIZE;
+ }
+}
+
+/*
+ * Remap wired pages in an object into a new region.
+ * The object is assumed to be mapped into the kernel map or
+ * a submap.
+ */
+void
+kmem_remap_pages(object, offset, start, end, protection)
+ register vm_object_t object;
+ register vm_offset_t offset;
+ register vm_offset_t start, end;
+ vm_prot_t protection;
+{
+ /*
+ * Mark the pmap region as not pageable.
+ */
+ pmap_pageable(kernel_pmap, start, end, FALSE);
+
+ while (start < end) {
+ register vm_page_t mem;
+
+ vm_object_lock(object);
+
+ /*
+ * Find a page
+ */
+ if ((mem = vm_page_lookup(object, offset)) == VM_PAGE_NULL)
+ panic("kmem_remap_pages");
+
+ /*
+ * Wire it down (again)
+ */
+ vm_page_lock_queues();
+ vm_page_wire(mem);
+ vm_page_unlock_queues();
+ vm_object_unlock(object);
+
+ /*
+ * Enter it in the kernel pmap. The page isn't busy,
+ * but this shouldn't be a problem because it is wired.
+ */
+ PMAP_ENTER(kernel_pmap, start, mem,
+ protection, TRUE);
+
+ start += PAGE_SIZE;
+ offset += PAGE_SIZE;
+ }
+}
+
+/*
+ * kmem_suballoc:
+ *
+ * Allocates a map to manage a subrange
+ * of the kernel virtual address space.
+ *
+ * Arguments are as follows:
+ *
+ * parent Map to take range from
+ * size Size of range to find
+ * min, max Returned endpoints of map
+ * pageable Can the region be paged
+ */
+
+vm_map_t
+kmem_suballoc(parent, min, max, size, pageable)
+ vm_map_t parent;
+ vm_offset_t *min, *max;
+ vm_size_t size;
+ boolean_t pageable;
+{
+ vm_map_t map;
+ vm_offset_t addr;
+ kern_return_t kr;
+
+ size = round_page(size);
+
+ /*
+ * Need reference on submap object because it is internal
+ * to the vm_system. vm_object_enter will never be called
+ * on it (usual source of reference for vm_map_enter).
+ */
+ vm_object_reference(vm_submap_object);
+
+ addr = (vm_offset_t) vm_map_min(parent);
+ kr = vm_map_enter(parent, &addr, size,
+ (vm_offset_t) 0, TRUE,
+ vm_submap_object, (vm_offset_t) 0, FALSE,
+ VM_PROT_DEFAULT, VM_PROT_ALL, VM_INHERIT_DEFAULT);
+ if (kr != KERN_SUCCESS)
+ panic("kmem_suballoc");
+
+ pmap_reference(vm_map_pmap(parent));
+ map = vm_map_create(vm_map_pmap(parent), addr, addr + size, pageable);
+ if (map == VM_MAP_NULL)
+ panic("kmem_suballoc");
+
+ kr = vm_map_submap(parent, addr, addr + size, map);
+ if (kr != KERN_SUCCESS)
+ panic("kmem_suballoc");
+
+ *min = addr;
+ *max = addr + size;
+ return map;
+}
+
+/*
+ * kmem_init:
+ *
+ * Initialize the kernel's virtual memory map, taking
+ * into account all memory allocated up to this time.
+ */
+void kmem_init(start, end)
+ vm_offset_t start;
+ vm_offset_t end;
+{
+ kernel_map = vm_map_create(pmap_kernel(),
+ VM_MIN_KERNEL_ADDRESS, end,
+ FALSE);
+
+ /*
+ * Reserve virtual memory allocated up to this time.
+ */
+
+ if (start != VM_MIN_KERNEL_ADDRESS) {
+ kern_return_t rc;
+ vm_offset_t addr = VM_MIN_KERNEL_ADDRESS;
+ rc = vm_map_enter(kernel_map,
+ &addr, start - VM_MIN_KERNEL_ADDRESS,
+ (vm_offset_t) 0, TRUE,
+ VM_OBJECT_NULL, (vm_offset_t) 0, FALSE,
+ VM_PROT_DEFAULT, VM_PROT_ALL,
+ VM_INHERIT_DEFAULT);
+ if (rc)
+ panic("%s:%d: vm_map_enter failed (%d)\n", rc);
+ }
+}
+
+/*
+ * New and improved IO wiring support.
+ */
+
+/*
+ * kmem_io_map_copyout:
+ *
+ * Establish temporary mapping in designated map for the memory
+ * passed in. Memory format must be a page_list vm_map_copy.
+ * Mapping is READ-ONLY.
+ */
+
+kern_return_t
+kmem_io_map_copyout(map, addr, alloc_addr, alloc_size, copy, min_size)
+ vm_map_t map;
+ vm_offset_t *addr; /* actual addr of data */
+ vm_offset_t *alloc_addr; /* page aligned addr */
+ vm_size_t *alloc_size; /* size allocated */
+ vm_map_copy_t copy;
+ vm_size_t min_size; /* Do at least this much */
+{
+ vm_offset_t myaddr, offset;
+ vm_size_t mysize, copy_size;
+ kern_return_t ret;
+ register
+ vm_page_t *page_list;
+ vm_map_copy_t new_copy;
+ register
+ int i;
+
+ assert(copy->type == VM_MAP_COPY_PAGE_LIST);
+ assert(min_size != 0);
+
+ /*
+ * Figure out the size in vm pages.
+ */
+ min_size += copy->offset - trunc_page(copy->offset);
+ min_size = round_page(min_size);
+ mysize = round_page(copy->offset + copy->size) -
+ trunc_page(copy->offset);
+
+ /*
+ * If total size is larger than one page list and
+ * we don't have to do more than one page list, then
+ * only do one page list.
+ *
+ * XXX Could be much smarter about this ... like trimming length
+ * XXX if we need more than one page list but not all of them.
+ */
+
+ copy_size = ptoa(copy->cpy_npages);
+ if (mysize > copy_size && copy_size > min_size)
+ mysize = copy_size;
+
+ /*
+ * Allocate some address space in the map (must be kernel
+ * space).
+ */
+ myaddr = vm_map_min(map);
+ ret = vm_map_enter(map, &myaddr, mysize,
+ (vm_offset_t) 0, TRUE,
+ VM_OBJECT_NULL, (vm_offset_t) 0, FALSE,
+ VM_PROT_DEFAULT, VM_PROT_ALL, VM_INHERIT_DEFAULT);
+
+ if (ret != KERN_SUCCESS)
+ return(ret);
+
+ /*
+ * Tell the pmap module that this will be wired, and
+ * enter the mappings.
+ */
+ pmap_pageable(vm_map_pmap(map), myaddr, myaddr + mysize, TRUE);
+
+ *addr = myaddr + (copy->offset - trunc_page(copy->offset));
+ *alloc_addr = myaddr;
+ *alloc_size = mysize;
+
+ offset = myaddr;
+ page_list = &copy->cpy_page_list[0];
+ while (TRUE) {
+ for ( i = 0; i < copy->cpy_npages; i++, offset += PAGE_SIZE) {
+ PMAP_ENTER(vm_map_pmap(map), offset, *page_list,
+ VM_PROT_READ, TRUE);
+ page_list++;
+ }
+
+ if (offset == (myaddr + mysize))
+ break;
+
+ /*
+ * Onward to the next page_list. The extend_cont
+ * leaves the current page list's pages alone;
+ * they'll be cleaned up at discard. Reset this
+ * copy's continuation to discard the next one.
+ */
+ vm_map_copy_invoke_extend_cont(copy, &new_copy, &ret);
+
+ if (ret != KERN_SUCCESS) {
+ kmem_io_map_deallocate(map, myaddr, mysize);
+ return(ret);
+ }
+ copy->cpy_cont = vm_map_copy_discard_cont;
+ copy->cpy_cont_args = (char *) new_copy;
+ copy = new_copy;
+ page_list = &copy->cpy_page_list[0];
+ }
+
+ return(ret);
+}
+
+/*
+ * kmem_io_map_deallocate:
+ *
+ * Get rid of the mapping established by kmem_io_map_copyout.
+ * Assumes that addr and size have been rounded to page boundaries.
+ * (e.g., the alloc_addr and alloc_size returned by kmem_io_map_copyout)
+ */
+
+void
+kmem_io_map_deallocate(map, addr, size)
+ vm_map_t map;
+ vm_offset_t addr;
+ vm_size_t size;
+{
+ /*
+ * Remove the mappings. The pmap_remove is needed.
+ */
+
+ pmap_remove(vm_map_pmap(map), addr, addr + size);
+ vm_map_remove(map, addr, addr + size);
+}
+
+/*
+ * Routine: copyinmap
+ * Purpose:
+ * Like copyin, except that fromaddr is an address
+ * in the specified VM map. This implementation
+ * is incomplete; it handles the current user map
+ * and the kernel map/submaps.
+ */
+
+int copyinmap(map, fromaddr, toaddr, length)
+ vm_map_t map;
+ char *fromaddr, *toaddr;
+ int length;
+{
+ if (vm_map_pmap(map) == kernel_pmap) {
+ /* assume a correct copy */
+ bcopy(fromaddr, toaddr, length);
+ return 0;
+ }
+
+ if (current_map() == map)
+ return copyin( fromaddr, toaddr, length);
+
+ return 1;
+}
+
+/*
+ * Routine: copyoutmap
+ * Purpose:
+ * Like copyout, except that toaddr is an address
+ * in the specified VM map. This implementation
+ * is incomplete; it handles the current user map
+ * and the kernel map/submaps.
+ */
+
+int copyoutmap(map, fromaddr, toaddr, length)
+ vm_map_t map;
+ char *fromaddr, *toaddr;
+ int length;
+{
+ if (vm_map_pmap(map) == kernel_pmap) {
+ /* assume a correct copy */
+ bcopy(fromaddr, toaddr, length);
+ return 0;
+ }
+
+ if (current_map() == map)
+ return copyout(fromaddr, toaddr, length);
+
+ return 1;
+}
diff --git a/vm/vm_kern.h b/vm/vm_kern.h
new file mode 100644
index 00000000..8e00fcce
--- /dev/null
+++ b/vm/vm_kern.h
@@ -0,0 +1,63 @@
+/*
+ * Mach Operating System
+ * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
+ * All Rights Reserved.
+ *
+ * Permission to use, copy, modify and distribute this software and its
+ * documentation is hereby granted, provided that both the copyright
+ * notice and this permission notice appear in all copies of the
+ * software, derivative works or modified versions, and any portions
+ * thereof, and that both notices appear in supporting documentation.
+ *
+ * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
+ * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
+ * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
+ *
+ * Carnegie Mellon requests users of this software to return to
+ *
+ * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
+ * School of Computer Science
+ * Carnegie Mellon University
+ * Pittsburgh PA 15213-3890
+ *
+ * any improvements or extensions that they make and grant Carnegie Mellon
+ * the rights to redistribute these changes.
+ */
+/*
+ * File: vm/vm_kern.h
+ * Author: Avadis Tevanian, Jr., Michael Wayne Young
+ * Date: 1985
+ *
+ * Kernel memory management definitions.
+ */
+
+#ifndef _VM_VM_KERN_H_
+#define _VM_VM_KERN_H_
+
+#include <mach/kern_return.h>
+#include <vm/vm_map.h>
+
+extern kern_return_t projected_buffer_allocate();
+extern kern_return_t projected_buffer_deallocate();
+extern kern_return_t projected_buffer_map();
+extern kern_return_t projected_buffer_collect();
+
+extern void kmem_init();
+
+extern kern_return_t kmem_alloc();
+extern kern_return_t kmem_alloc_pageable();
+extern kern_return_t kmem_alloc_wired();
+extern kern_return_t kmem_alloc_aligned();
+extern kern_return_t kmem_realloc();
+extern void kmem_free();
+
+extern vm_map_t kmem_suballoc();
+
+extern kern_return_t kmem_io_map_copyout();
+extern void kmem_io_map_deallocate();
+
+extern vm_map_t kernel_map;
+extern vm_map_t kernel_pageable_map;
+extern vm_map_t ipc_kernel_map;
+
+#endif _VM_VM_KERN_H_
diff --git a/vm/vm_map.c b/vm/vm_map.c
new file mode 100644
index 00000000..c71b8580
--- /dev/null
+++ b/vm/vm_map.c
@@ -0,0 +1,5244 @@
+/*
+ * Mach Operating System
+ * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University.
+ * Copyright (c) 1993,1994 The University of Utah and
+ * the Computer Systems Laboratory (CSL).
+ * All rights reserved.
+ *
+ * Permission to use, copy, modify and distribute this software and its
+ * documentation is hereby granted, provided that both the copyright
+ * notice and this permission notice appear in all copies of the
+ * software, derivative works or modified versions, and any portions
+ * thereof, and that both notices appear in supporting documentation.
+ *
+ * CARNEGIE MELLON, THE UNIVERSITY OF UTAH AND CSL ALLOW FREE USE OF
+ * THIS SOFTWARE IN ITS "AS IS" CONDITION, AND DISCLAIM ANY LIABILITY
+ * OF ANY KIND FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF
+ * THIS SOFTWARE.
+ *
+ * Carnegie Mellon requests users of this software to return to
+ *
+ * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
+ * School of Computer Science
+ * Carnegie Mellon University
+ * Pittsburgh PA 15213-3890
+ *
+ * any improvements or extensions that they make and grant Carnegie Mellon
+ * the rights to redistribute these changes.
+ */
+/*
+ * File: vm/vm_map.c
+ * Author: Avadis Tevanian, Jr., Michael Wayne Young
+ * Date: 1985
+ *
+ * Virtual memory mapping module.
+ */
+
+#include <norma_ipc.h>
+
+#include <mach/kern_return.h>
+#include <mach/port.h>
+#include <mach/vm_attributes.h>
+#include <mach/vm_param.h>
+#include <kern/assert.h>
+#include <kern/zalloc.h>
+#include <vm/vm_fault.h>
+#include <vm/vm_map.h>
+#include <vm/vm_object.h>
+#include <vm/vm_page.h>
+#include <vm/vm_kern.h>
+#include <ipc/ipc_port.h>
+
+/*
+ * Macros to copy a vm_map_entry. We must be careful to correctly
+ * manage the wired page count. vm_map_entry_copy() creates a new
+ * map entry to the same memory - the wired count in the new entry
+ * must be set to zero. vm_map_entry_copy_full() creates a new
+ * entry that is identical to the old entry. This preserves the
+ * wire count; it's used for map splitting and zone changing in
+ * vm_map_copyout.
+ */
+#define vm_map_entry_copy(NEW,OLD) \
+MACRO_BEGIN \
+ *(NEW) = *(OLD); \
+ (NEW)->is_shared = FALSE; \
+ (NEW)->needs_wakeup = FALSE; \
+ (NEW)->in_transition = FALSE; \
+ (NEW)->wired_count = 0; \
+ (NEW)->user_wired_count = 0; \
+MACRO_END
+
+#define vm_map_entry_copy_full(NEW,OLD) (*(NEW) = *(OLD))
+
+/*
+ * Virtual memory maps provide for the mapping, protection,
+ * and sharing of virtual memory objects. In addition,
+ * this module provides for an efficient virtual copy of
+ * memory from one map to another.
+ *
+ * Synchronization is required prior to most operations.
+ *
+ * Maps consist of an ordered doubly-linked list of simple
+ * entries; a single hint is used to speed up lookups.
+ *
+ * Sharing maps have been deleted from this version of Mach.
+ * All shared objects are now mapped directly into the respective
+ * maps. This requires a change in the copy on write strategy;
+ * the asymmetric (delayed) strategy is used for shared temporary
+ * objects instead of the symmetric (shadow) strategy. This is
+ * selected by the (new) use_shared_copy bit in the object. See
+ * vm_object_copy_temporary in vm_object.c for details. All maps
+ * are now "top level" maps (either task map, kernel map or submap
+ * of the kernel map).
+ *
+ * Since portions of maps are specified by start/end addreses,
+ * which may not align with existing map entries, all
+ * routines merely "clip" entries to these start/end values.
+ * [That is, an entry is split into two, bordering at a
+ * start or end value.] Note that these clippings may not
+ * always be necessary (as the two resulting entries are then
+ * not changed); however, the clipping is done for convenience.
+ * No attempt is currently made to "glue back together" two
+ * abutting entries.
+ *
+ * The symmetric (shadow) copy strategy implements virtual copy
+ * by copying VM object references from one map to
+ * another, and then marking both regions as copy-on-write.
+ * It is important to note that only one writeable reference
+ * to a VM object region exists in any map when this strategy
+ * is used -- this means that shadow object creation can be
+ * delayed until a write operation occurs. The asymmetric (delayed)
+ * strategy allows multiple maps to have writeable references to
+ * the same region of a vm object, and hence cannot delay creating
+ * its copy objects. See vm_object_copy_temporary() in vm_object.c.
+ * Copying of permanent objects is completely different; see
+ * vm_object_copy_strategically() in vm_object.c.
+ */
+
+zone_t vm_map_zone; /* zone for vm_map structures */
+zone_t vm_map_entry_zone; /* zone for vm_map_entry structures */
+zone_t vm_map_kentry_zone; /* zone for kernel entry structures */
+zone_t vm_map_copy_zone; /* zone for vm_map_copy structures */
+
+boolean_t vm_map_lookup_entry(); /* forward declaration */
+
+/*
+ * Placeholder object for submap operations. This object is dropped
+ * into the range by a call to vm_map_find, and removed when
+ * vm_map_submap creates the submap.
+ */
+
+vm_object_t vm_submap_object;
+
+/*
+ * vm_map_init:
+ *
+ * Initialize the vm_map module. Must be called before
+ * any other vm_map routines.
+ *
+ * Map and entry structures are allocated from zones -- we must
+ * initialize those zones.
+ *
+ * There are three zones of interest:
+ *
+ * vm_map_zone: used to allocate maps.
+ * vm_map_entry_zone: used to allocate map entries.
+ * vm_map_kentry_zone: used to allocate map entries for the kernel.
+ *
+ * The kernel allocates map entries from a special zone that is initially
+ * "crammed" with memory. It would be difficult (perhaps impossible) for
+ * the kernel to allocate more memory to a entry zone when it became
+ * empty since the very act of allocating memory implies the creation
+ * of a new entry.
+ */
+
+vm_offset_t kentry_data;
+vm_size_t kentry_data_size;
+int kentry_count = 256; /* to init kentry_data_size */
+
+void vm_map_init()
+{
+ vm_map_zone = zinit((vm_size_t) sizeof(struct vm_map), 40*1024,
+ PAGE_SIZE, 0, "maps");
+ vm_map_entry_zone = zinit((vm_size_t) sizeof(struct vm_map_entry),
+ 1024*1024, PAGE_SIZE*5,
+ 0, "non-kernel map entries");
+ vm_map_kentry_zone = zinit((vm_size_t) sizeof(struct vm_map_entry),
+ kentry_data_size, kentry_data_size,
+ ZONE_FIXED /* XXX */, "kernel map entries");
+
+ vm_map_copy_zone = zinit((vm_size_t) sizeof(struct vm_map_copy),
+ 16*1024, PAGE_SIZE, 0,
+ "map copies");
+
+ /*
+ * Cram the kentry zone with initial data.
+ */
+ zcram(vm_map_kentry_zone, kentry_data, kentry_data_size);
+
+ /*
+ * Submap object is initialized by vm_object_init.
+ */
+}
+
+/*
+ * vm_map_create:
+ *
+ * Creates and returns a new empty VM map with
+ * the given physical map structure, and having
+ * the given lower and upper address bounds.
+ */
+vm_map_t vm_map_create(pmap, min, max, pageable)
+ pmap_t pmap;
+ vm_offset_t min, max;
+ boolean_t pageable;
+{
+ register vm_map_t result;
+
+ result = (vm_map_t) zalloc(vm_map_zone);
+ if (result == VM_MAP_NULL)
+ panic("vm_map_create");
+
+ vm_map_first_entry(result) = vm_map_to_entry(result);
+ vm_map_last_entry(result) = vm_map_to_entry(result);
+ result->hdr.nentries = 0;
+ result->hdr.entries_pageable = pageable;
+
+ result->size = 0;
+ result->ref_count = 1;
+ result->pmap = pmap;
+ result->min_offset = min;
+ result->max_offset = max;
+ result->wiring_required = FALSE;
+ result->wait_for_space = FALSE;
+ result->first_free = vm_map_to_entry(result);
+ result->hint = vm_map_to_entry(result);
+ vm_map_lock_init(result);
+ simple_lock_init(&result->ref_lock);
+ simple_lock_init(&result->hint_lock);
+
+ return(result);
+}
+
+/*
+ * vm_map_entry_create: [ internal use only ]
+ *
+ * Allocates a VM map entry for insertion in the
+ * given map (or map copy). No fields are filled.
+ */
+#define vm_map_entry_create(map) \
+ _vm_map_entry_create(&(map)->hdr)
+
+#define vm_map_copy_entry_create(copy) \
+ _vm_map_entry_create(&(copy)->cpy_hdr)
+
+vm_map_entry_t _vm_map_entry_create(map_header)
+ register struct vm_map_header *map_header;
+{
+ register zone_t zone;
+ register vm_map_entry_t entry;
+
+ if (map_header->entries_pageable)
+ zone = vm_map_entry_zone;
+ else
+ zone = vm_map_kentry_zone;
+
+ entry = (vm_map_entry_t) zalloc(zone);
+ if (entry == VM_MAP_ENTRY_NULL)
+ panic("vm_map_entry_create");
+
+ return(entry);
+}
+
+/*
+ * vm_map_entry_dispose: [ internal use only ]
+ *
+ * Inverse of vm_map_entry_create.
+ */
+#define vm_map_entry_dispose(map, entry) \
+ _vm_map_entry_dispose(&(map)->hdr, (entry))
+
+#define vm_map_copy_entry_dispose(map, entry) \
+ _vm_map_entry_dispose(&(copy)->cpy_hdr, (entry))
+
+void _vm_map_entry_dispose(map_header, entry)
+ register struct vm_map_header *map_header;
+ register vm_map_entry_t entry;
+{
+ register zone_t zone;
+
+ if (map_header->entries_pageable)
+ zone = vm_map_entry_zone;
+ else
+ zone = vm_map_kentry_zone;
+
+ zfree(zone, (vm_offset_t) entry);
+}
+
+/*
+ * vm_map_entry_{un,}link:
+ *
+ * Insert/remove entries from maps (or map copies).
+ */
+#define vm_map_entry_link(map, after_where, entry) \
+ _vm_map_entry_link(&(map)->hdr, after_where, entry)
+
+#define vm_map_copy_entry_link(copy, after_where, entry) \
+ _vm_map_entry_link(&(copy)->cpy_hdr, after_where, entry)
+
+#define _vm_map_entry_link(hdr, after_where, entry) \
+ MACRO_BEGIN \
+ (hdr)->nentries++; \
+ (entry)->vme_prev = (after_where); \
+ (entry)->vme_next = (after_where)->vme_next; \
+ (entry)->vme_prev->vme_next = \
+ (entry)->vme_next->vme_prev = (entry); \
+ MACRO_END
+
+#define vm_map_entry_unlink(map, entry) \
+ _vm_map_entry_unlink(&(map)->hdr, entry)
+
+#define vm_map_copy_entry_unlink(copy, entry) \
+ _vm_map_entry_unlink(&(copy)->cpy_hdr, entry)
+
+#define _vm_map_entry_unlink(hdr, entry) \
+ MACRO_BEGIN \
+ (hdr)->nentries--; \
+ (entry)->vme_next->vme_prev = (entry)->vme_prev; \
+ (entry)->vme_prev->vme_next = (entry)->vme_next; \
+ MACRO_END
+
+/*
+ * vm_map_reference:
+ *
+ * Creates another valid reference to the given map.
+ *
+ */
+void vm_map_reference(map)
+ register vm_map_t map;
+{
+ if (map == VM_MAP_NULL)
+ return;
+
+ simple_lock(&map->ref_lock);
+ map->ref_count++;
+ simple_unlock(&map->ref_lock);
+}
+
+/*
+ * vm_map_deallocate:
+ *
+ * Removes a reference from the specified map,
+ * destroying it if no references remain.
+ * The map should not be locked.
+ */
+void vm_map_deallocate(map)
+ register vm_map_t map;
+{
+ register int c;
+
+ if (map == VM_MAP_NULL)
+ return;
+
+ simple_lock(&map->ref_lock);
+ c = --map->ref_count;
+ simple_unlock(&map->ref_lock);
+
+ if (c > 0) {
+ return;
+ }
+
+ projected_buffer_collect(map);
+ (void) vm_map_delete(map, map->min_offset, map->max_offset);
+
+ pmap_destroy(map->pmap);
+
+ zfree(vm_map_zone, (vm_offset_t) map);
+}
+
+/*
+ * SAVE_HINT:
+ *
+ * Saves the specified entry as the hint for
+ * future lookups. Performs necessary interlocks.
+ */
+#define SAVE_HINT(map,value) \
+ simple_lock(&(map)->hint_lock); \
+ (map)->hint = (value); \
+ simple_unlock(&(map)->hint_lock);
+
+/*
+ * vm_map_lookup_entry: [ internal use only ]
+ *
+ * Finds the map entry containing (or
+ * immediately preceding) the specified address
+ * in the given map; the entry is returned
+ * in the "entry" parameter. The boolean
+ * result indicates whether the address is
+ * actually contained in the map.
+ */
+boolean_t vm_map_lookup_entry(map, address, entry)
+ register vm_map_t map;
+ register vm_offset_t address;
+ vm_map_entry_t *entry; /* OUT */
+{
+ register vm_map_entry_t cur;
+ register vm_map_entry_t last;
+
+ /*
+ * Start looking either from the head of the
+ * list, or from the hint.
+ */
+
+ simple_lock(&map->hint_lock);
+ cur = map->hint;
+ simple_unlock(&map->hint_lock);
+
+ if (cur == vm_map_to_entry(map))
+ cur = cur->vme_next;
+
+ if (address >= cur->vme_start) {
+ /*
+ * Go from hint to end of list.
+ *
+ * But first, make a quick check to see if
+ * we are already looking at the entry we
+ * want (which is usually the case).
+ * Note also that we don't need to save the hint
+ * here... it is the same hint (unless we are
+ * at the header, in which case the hint didn't
+ * buy us anything anyway).
+ */
+ last = vm_map_to_entry(map);
+ if ((cur != last) && (cur->vme_end > address)) {
+ *entry = cur;
+ return(TRUE);
+ }
+ }
+ else {
+ /*
+ * Go from start to hint, *inclusively*
+ */
+ last = cur->vme_next;
+ cur = vm_map_first_entry(map);
+ }
+
+ /*
+ * Search linearly
+ */
+
+ while (cur != last) {
+ if (cur->vme_end > address) {
+ if (address >= cur->vme_start) {
+ /*
+ * Save this lookup for future
+ * hints, and return
+ */
+
+ *entry = cur;
+ SAVE_HINT(map, cur);
+ return(TRUE);
+ }
+ break;
+ }
+ cur = cur->vme_next;
+ }
+ *entry = cur->vme_prev;
+ SAVE_HINT(map, *entry);
+ return(FALSE);
+}
+
+/*
+ * Routine: invalid_user_access
+ *
+ * Verifies whether user access is valid.
+ */
+
+boolean_t
+invalid_user_access(map, start, end, prot)
+ vm_map_t map;
+ vm_offset_t start, end;
+ vm_prot_t prot;
+{
+ vm_map_entry_t entry;
+
+ return (map == VM_MAP_NULL || map == kernel_map ||
+ !vm_map_lookup_entry(map, start, &entry) ||
+ entry->vme_end < end ||
+ (prot & ~(entry->protection)));
+}
+
+
+/*
+ * Routine: vm_map_find_entry
+ * Purpose:
+ * Allocate a range in the specified virtual address map,
+ * returning the entry allocated for that range.
+ * Used by kmem_alloc, etc. Returns wired entries.
+ *
+ * The map must be locked.
+ *
+ * If an entry is allocated, the object/offset fields
+ * are initialized to zero. If an object is supplied,
+ * then an existing entry may be extended.
+ */
+kern_return_t vm_map_find_entry(map, address, size, mask, object, o_entry)
+ register vm_map_t map;
+ vm_offset_t *address; /* OUT */
+ vm_size_t size;
+ vm_offset_t mask;
+ vm_object_t object;
+ vm_map_entry_t *o_entry; /* OUT */
+{
+ register vm_map_entry_t entry, new_entry;
+ register vm_offset_t start;
+ register vm_offset_t end;
+
+ /*
+ * Look for the first possible address;
+ * if there's already something at this
+ * address, we have to start after it.
+ */
+
+ if ((entry = map->first_free) == vm_map_to_entry(map))
+ start = map->min_offset;
+ else
+ start = entry->vme_end;
+
+ /*
+ * In any case, the "entry" always precedes
+ * the proposed new region throughout the loop:
+ */
+
+ while (TRUE) {
+ register vm_map_entry_t next;
+
+ /*
+ * Find the end of the proposed new region.
+ * Be sure we didn't go beyond the end, or
+ * wrap around the address.
+ */
+
+ if (((start + mask) & ~mask) < start)
+ return(KERN_NO_SPACE);
+ start = ((start + mask) & ~mask);
+ end = start + size;
+
+ if ((end > map->max_offset) || (end < start))
+ return(KERN_NO_SPACE);
+
+ /*
+ * If there are no more entries, we must win.
+ */
+
+ next = entry->vme_next;
+ if (next == vm_map_to_entry(map))
+ break;
+
+ /*
+ * If there is another entry, it must be
+ * after the end of the potential new region.
+ */
+
+ if (next->vme_start >= end)
+ break;
+
+ /*
+ * Didn't fit -- move to the next entry.
+ */
+
+ entry = next;
+ start = entry->vme_end;
+ }
+
+ /*
+ * At this point,
+ * "start" and "end" should define the endpoints of the
+ * available new range, and
+ * "entry" should refer to the region before the new
+ * range, and
+ *
+ * the map should be locked.
+ */
+
+ *address = start;
+
+ /*
+ * See whether we can avoid creating a new entry by
+ * extending one of our neighbors. [So far, we only attempt to
+ * extend from below.]
+ */
+
+ if ((object != VM_OBJECT_NULL) &&
+ (entry != vm_map_to_entry(map)) &&
+ (entry->vme_end == start) &&
+ (!entry->is_shared) &&
+ (!entry->is_sub_map) &&
+ (entry->object.vm_object == object) &&
+ (entry->needs_copy == FALSE) &&
+ (entry->inheritance == VM_INHERIT_DEFAULT) &&
+ (entry->protection == VM_PROT_DEFAULT) &&
+ (entry->max_protection == VM_PROT_ALL) &&
+ (entry->wired_count == 1) &&
+ (entry->user_wired_count == 0) &&
+ (entry->projected_on == 0)) {
+ /*
+ * Because this is a special case,
+ * we don't need to use vm_object_coalesce.
+ */
+
+ entry->vme_end = end;
+ new_entry = entry;
+ } else {
+ new_entry = vm_map_entry_create(map);
+
+ new_entry->vme_start = start;
+ new_entry->vme_end = end;
+
+ new_entry->is_shared = FALSE;
+ new_entry->is_sub_map = FALSE;
+ new_entry->object.vm_object = VM_OBJECT_NULL;
+ new_entry->offset = (vm_offset_t) 0;
+
+ new_entry->needs_copy = FALSE;
+
+ new_entry->inheritance = VM_INHERIT_DEFAULT;
+ new_entry->protection = VM_PROT_DEFAULT;
+ new_entry->max_protection = VM_PROT_ALL;
+ new_entry->wired_count = 1;
+ new_entry->user_wired_count = 0;
+
+ new_entry->in_transition = FALSE;
+ new_entry->needs_wakeup = FALSE;
+ new_entry->projected_on = 0;
+
+ /*
+ * Insert the new entry into the list
+ */
+
+ vm_map_entry_link(map, entry, new_entry);
+ }
+
+ map->size += size;
+
+ /*
+ * Update the free space hint and the lookup hint
+ */
+
+ map->first_free = new_entry;
+ SAVE_HINT(map, new_entry);
+
+ *o_entry = new_entry;
+ return(KERN_SUCCESS);
+}
+
+int vm_map_pmap_enter_print = FALSE;
+int vm_map_pmap_enter_enable = FALSE;
+
+/*
+ * Routine: vm_map_pmap_enter
+ *
+ * Description:
+ * Force pages from the specified object to be entered into
+ * the pmap at the specified address if they are present.
+ * As soon as a page not found in the object the scan ends.
+ *
+ * Returns:
+ * Nothing.
+ *
+ * In/out conditions:
+ * The source map should not be locked on entry.
+ */
+void
+vm_map_pmap_enter(map, addr, end_addr, object, offset, protection)
+ vm_map_t map;
+ register
+ vm_offset_t addr;
+ register
+ vm_offset_t end_addr;
+ register
+ vm_object_t object;
+ vm_offset_t offset;
+ vm_prot_t protection;
+{
+ while (addr < end_addr) {
+ register vm_page_t m;
+
+ vm_object_lock(object);
+ vm_object_paging_begin(object);
+
+ m = vm_page_lookup(object, offset);
+ if (m == VM_PAGE_NULL || m->absent) {
+ vm_object_paging_end(object);
+ vm_object_unlock(object);
+ return;
+ }
+
+ if (vm_map_pmap_enter_print) {
+ printf("vm_map_pmap_enter:");
+ printf("map: %x, addr: %x, object: %x, offset: %x\n",
+ map, addr, object, offset);
+ }
+
+ m->busy = TRUE;
+ vm_object_unlock(object);
+
+ PMAP_ENTER(map->pmap, addr, m,
+ protection, FALSE);
+
+ vm_object_lock(object);
+ PAGE_WAKEUP_DONE(m);
+ vm_page_lock_queues();
+ if (!m->active && !m->inactive)
+ vm_page_activate(m);
+ vm_page_unlock_queues();
+ vm_object_paging_end(object);
+ vm_object_unlock(object);
+
+ offset += PAGE_SIZE;
+ addr += PAGE_SIZE;
+ }
+}
+
+/*
+ * Routine: vm_map_enter
+ *
+ * Description:
+ * Allocate a range in the specified virtual address map.
+ * The resulting range will refer to memory defined by
+ * the given memory object and offset into that object.
+ *
+ * Arguments are as defined in the vm_map call.
+ */
+kern_return_t vm_map_enter(
+ map,
+ address, size, mask, anywhere,
+ object, offset, needs_copy,
+ cur_protection, max_protection, inheritance)
+ register
+ vm_map_t map;
+ vm_offset_t *address; /* IN/OUT */
+ vm_size_t size;
+ vm_offset_t mask;
+ boolean_t anywhere;
+ vm_object_t object;
+ vm_offset_t offset;
+ boolean_t needs_copy;
+ vm_prot_t cur_protection;
+ vm_prot_t max_protection;
+ vm_inherit_t inheritance;
+{
+ register vm_map_entry_t entry;
+ register vm_offset_t start;
+ register vm_offset_t end;
+ kern_return_t result = KERN_SUCCESS;
+
+#define RETURN(value) { result = value; goto BailOut; }
+
+ StartAgain: ;
+
+ start = *address;
+
+ if (anywhere) {
+ vm_map_lock(map);
+
+ /*
+ * Calculate the first possible address.
+ */
+
+ if (start < map->min_offset)
+ start = map->min_offset;
+ if (start > map->max_offset)
+ RETURN(KERN_NO_SPACE);
+
+ /*
+ * Look for the first possible address;
+ * if there's already something at this
+ * address, we have to start after it.
+ */
+
+ if (start == map->min_offset) {
+ if ((entry = map->first_free) != vm_map_to_entry(map))
+ start = entry->vme_end;
+ } else {
+ vm_map_entry_t tmp_entry;
+ if (vm_map_lookup_entry(map, start, &tmp_entry))
+ start = tmp_entry->vme_end;
+ entry = tmp_entry;
+ }
+
+ /*
+ * In any case, the "entry" always precedes
+ * the proposed new region throughout the
+ * loop:
+ */
+
+ while (TRUE) {
+ register vm_map_entry_t next;
+
+ /*
+ * Find the end of the proposed new region.
+ * Be sure we didn't go beyond the end, or
+ * wrap around the address.
+ */
+
+ if (((start + mask) & ~mask) < start)
+ return(KERN_NO_SPACE);
+ start = ((start + mask) & ~mask);
+ end = start + size;
+
+ if ((end > map->max_offset) || (end < start)) {
+ if (map->wait_for_space) {
+ if (size <= (map->max_offset -
+ map->min_offset)) {
+ assert_wait((event_t) map, TRUE);
+ vm_map_unlock(map);
+ thread_block((void (*)()) 0);
+ goto StartAgain;
+ }
+ }
+
+ RETURN(KERN_NO_SPACE);
+ }
+
+ /*
+ * If there are no more entries, we must win.
+ */
+
+ next = entry->vme_next;
+ if (next == vm_map_to_entry(map))
+ break;
+
+ /*
+ * If there is another entry, it must be
+ * after the end of the potential new region.
+ */
+
+ if (next->vme_start >= end)
+ break;
+
+ /*
+ * Didn't fit -- move to the next entry.
+ */
+
+ entry = next;
+ start = entry->vme_end;
+ }
+ *address = start;
+ } else {
+ vm_map_entry_t temp_entry;
+
+ /*
+ * Verify that:
+ * the address doesn't itself violate
+ * the mask requirement.
+ */
+
+ if ((start & mask) != 0)
+ return(KERN_NO_SPACE);
+
+ vm_map_lock(map);
+
+ /*
+ * ... the address is within bounds
+ */
+
+ end = start + size;
+
+ if ((start < map->min_offset) ||
+ (end > map->max_offset) ||
+ (start >= end)) {
+ RETURN(KERN_INVALID_ADDRESS);
+ }
+
+ /*
+ * ... the starting address isn't allocated
+ */
+
+ if (vm_map_lookup_entry(map, start, &temp_entry))
+ RETURN(KERN_NO_SPACE);
+
+ entry = temp_entry;
+
+ /*
+ * ... the next region doesn't overlap the
+ * end point.
+ */
+
+ if ((entry->vme_next != vm_map_to_entry(map)) &&
+ (entry->vme_next->vme_start < end))
+ RETURN(KERN_NO_SPACE);
+ }
+
+ /*
+ * At this point,
+ * "start" and "end" should define the endpoints of the
+ * available new range, and
+ * "entry" should refer to the region before the new
+ * range, and
+ *
+ * the map should be locked.
+ */
+
+ /*
+ * See whether we can avoid creating a new entry (and object) by
+ * extending one of our neighbors. [So far, we only attempt to
+ * extend from below.]
+ */
+
+ if ((object == VM_OBJECT_NULL) &&
+ (entry != vm_map_to_entry(map)) &&
+ (entry->vme_end == start) &&
+ (!entry->is_shared) &&
+ (!entry->is_sub_map) &&
+ (entry->inheritance == inheritance) &&
+ (entry->protection == cur_protection) &&
+ (entry->max_protection == max_protection) &&
+ (entry->wired_count == 0) && /* implies user_wired_count == 0 */
+ (entry->projected_on == 0)) {
+ if (vm_object_coalesce(entry->object.vm_object,
+ VM_OBJECT_NULL,
+ entry->offset,
+ (vm_offset_t) 0,
+ (vm_size_t)(entry->vme_end - entry->vme_start),
+ (vm_size_t)(end - entry->vme_end))) {
+
+ /*
+ * Coalesced the two objects - can extend
+ * the previous map entry to include the
+ * new range.
+ */
+ map->size += (end - entry->vme_end);
+ entry->vme_end = end;
+ RETURN(KERN_SUCCESS);
+ }
+ }
+
+ /*
+ * Create a new entry
+ */
+
+ /**/ {
+ register vm_map_entry_t new_entry;
+
+ new_entry = vm_map_entry_create(map);
+
+ new_entry->vme_start = start;
+ new_entry->vme_end = end;
+
+ new_entry->is_shared = FALSE;
+ new_entry->is_sub_map = FALSE;
+ new_entry->object.vm_object = object;
+ new_entry->offset = offset;
+
+ new_entry->needs_copy = needs_copy;
+
+ new_entry->inheritance = inheritance;
+ new_entry->protection = cur_protection;
+ new_entry->max_protection = max_protection;
+ new_entry->wired_count = 0;
+ new_entry->user_wired_count = 0;
+
+ new_entry->in_transition = FALSE;
+ new_entry->needs_wakeup = FALSE;
+ new_entry->projected_on = 0;
+
+ /*
+ * Insert the new entry into the list
+ */
+
+ vm_map_entry_link(map, entry, new_entry);
+ map->size += size;
+
+ /*
+ * Update the free space hint and the lookup hint
+ */
+
+ if ((map->first_free == entry) &&
+ ((entry == vm_map_to_entry(map) ? map->min_offset : entry->vme_end)
+ >= new_entry->vme_start))
+ map->first_free = new_entry;
+
+ SAVE_HINT(map, new_entry);
+
+ vm_map_unlock(map);
+
+ if ((object != VM_OBJECT_NULL) &&
+ (vm_map_pmap_enter_enable) &&
+ (!anywhere) &&
+ (!needs_copy) &&
+ (size < (128*1024))) {
+ vm_map_pmap_enter(map, start, end,
+ object, offset, cur_protection);
+ }
+
+ return(result);
+ /**/ }
+
+ BailOut: ;
+
+ vm_map_unlock(map);
+ return(result);
+
+#undef RETURN
+}
+
+/*
+ * vm_map_clip_start: [ internal use only ]
+ *
+ * Asserts that the given entry begins at or after
+ * the specified address; if necessary,
+ * it splits the entry into two.
+ */
+void _vm_map_clip_start();
+#define vm_map_clip_start(map, entry, startaddr) \
+ MACRO_BEGIN \
+ if ((startaddr) > (entry)->vme_start) \
+ _vm_map_clip_start(&(map)->hdr,(entry),(startaddr)); \
+ MACRO_END
+
+void _vm_map_copy_clip_start();
+#define vm_map_copy_clip_start(copy, entry, startaddr) \
+ MACRO_BEGIN \
+ if ((startaddr) > (entry)->vme_start) \
+ _vm_map_clip_start(&(copy)->cpy_hdr,(entry),(startaddr)); \
+ MACRO_END
+
+/*
+ * This routine is called only when it is known that
+ * the entry must be split.
+ */
+void _vm_map_clip_start(map_header, entry, start)
+ register struct vm_map_header *map_header;
+ register vm_map_entry_t entry;
+ register vm_offset_t start;
+{
+ register vm_map_entry_t new_entry;
+
+ /*
+ * Split off the front portion --
+ * note that we must insert the new
+ * entry BEFORE this one, so that
+ * this entry has the specified starting
+ * address.
+ */
+
+ new_entry = _vm_map_entry_create(map_header);
+ vm_map_entry_copy_full(new_entry, entry);
+
+ new_entry->vme_end = start;
+ entry->offset += (start - entry->vme_start);
+ entry->vme_start = start;
+
+ _vm_map_entry_link(map_header, entry->vme_prev, new_entry);
+
+ if (entry->is_sub_map)
+ vm_map_reference(new_entry->object.sub_map);
+ else
+ vm_object_reference(new_entry->object.vm_object);
+}
+
+/*
+ * vm_map_clip_end: [ internal use only ]
+ *
+ * Asserts that the given entry ends at or before
+ * the specified address; if necessary,
+ * it splits the entry into two.
+ */
+void _vm_map_clip_end();
+#define vm_map_clip_end(map, entry, endaddr) \
+ MACRO_BEGIN \
+ if ((endaddr) < (entry)->vme_end) \
+ _vm_map_clip_end(&(map)->hdr,(entry),(endaddr)); \
+ MACRO_END
+
+void _vm_map_copy_clip_end();
+#define vm_map_copy_clip_end(copy, entry, endaddr) \
+ MACRO_BEGIN \
+ if ((endaddr) < (entry)->vme_end) \
+ _vm_map_clip_end(&(copy)->cpy_hdr,(entry),(endaddr)); \
+ MACRO_END
+
+/*
+ * This routine is called only when it is known that
+ * the entry must be split.
+ */
+void _vm_map_clip_end(map_header, entry, end)
+ register struct vm_map_header *map_header;
+ register vm_map_entry_t entry;
+ register vm_offset_t end;
+{
+ register vm_map_entry_t new_entry;
+
+ /*
+ * Create a new entry and insert it
+ * AFTER the specified entry
+ */
+
+ new_entry = _vm_map_entry_create(map_header);
+ vm_map_entry_copy_full(new_entry, entry);
+
+ new_entry->vme_start = entry->vme_end = end;
+ new_entry->offset += (end - entry->vme_start);
+
+ _vm_map_entry_link(map_header, entry, new_entry);
+
+ if (entry->is_sub_map)
+ vm_map_reference(new_entry->object.sub_map);
+ else
+ vm_object_reference(new_entry->object.vm_object);
+}
+
+/*
+ * VM_MAP_RANGE_CHECK: [ internal use only ]
+ *
+ * Asserts that the starting and ending region
+ * addresses fall within the valid range of the map.
+ */
+#define VM_MAP_RANGE_CHECK(map, start, end) \
+ { \
+ if (start < vm_map_min(map)) \
+ start = vm_map_min(map); \
+ if (end > vm_map_max(map)) \
+ end = vm_map_max(map); \
+ if (start > end) \
+ start = end; \
+ }
+
+/*
+ * vm_map_submap: [ kernel use only ]
+ *
+ * Mark the given range as handled by a subordinate map.
+ *
+ * This range must have been created with vm_map_find using
+ * the vm_submap_object, and no other operations may have been
+ * performed on this range prior to calling vm_map_submap.
+ *
+ * Only a limited number of operations can be performed
+ * within this rage after calling vm_map_submap:
+ * vm_fault
+ * [Don't try vm_map_copyin!]
+ *
+ * To remove a submapping, one must first remove the
+ * range from the superior map, and then destroy the
+ * submap (if desired). [Better yet, don't try it.]
+ */
+kern_return_t vm_map_submap(map, start, end, submap)
+ register vm_map_t map;
+ register vm_offset_t start;
+ register vm_offset_t end;
+ vm_map_t submap;
+{
+ vm_map_entry_t entry;
+ register kern_return_t result = KERN_INVALID_ARGUMENT;
+ register vm_object_t object;
+
+ vm_map_lock(map);
+
+ VM_MAP_RANGE_CHECK(map, start, end);
+
+ if (vm_map_lookup_entry(map, start, &entry)) {
+ vm_map_clip_start(map, entry, start);
+ }
+ else
+ entry = entry->vme_next;
+
+ vm_map_clip_end(map, entry, end);
+
+ if ((entry->vme_start == start) && (entry->vme_end == end) &&
+ (!entry->is_sub_map) &&
+ ((object = entry->object.vm_object) == vm_submap_object) &&
+ (object->resident_page_count == 0) &&
+ (object->copy == VM_OBJECT_NULL) &&
+ (object->shadow == VM_OBJECT_NULL) &&
+ (!object->pager_created)) {
+ entry->object.vm_object = VM_OBJECT_NULL;
+ vm_object_deallocate(object);
+ entry->is_sub_map = TRUE;
+ vm_map_reference(entry->object.sub_map = submap);
+ result = KERN_SUCCESS;
+ }
+ vm_map_unlock(map);
+
+ return(result);
+}
+
+/*
+ * vm_map_protect:
+ *
+ * Sets the protection of the specified address
+ * region in the target map. If "set_max" is
+ * specified, the maximum protection is to be set;
+ * otherwise, only the current protection is affected.
+ */
+kern_return_t vm_map_protect(map, start, end, new_prot, set_max)
+ register vm_map_t map;
+ register vm_offset_t start;
+ register vm_offset_t end;
+ register vm_prot_t new_prot;
+ register boolean_t set_max;
+{
+ register vm_map_entry_t current;
+ vm_map_entry_t entry;
+
+ vm_map_lock(map);
+
+ VM_MAP_RANGE_CHECK(map, start, end);
+
+ if (vm_map_lookup_entry(map, start, &entry)) {
+ vm_map_clip_start(map, entry, start);
+ }
+ else
+ entry = entry->vme_next;
+
+ /*
+ * Make a first pass to check for protection
+ * violations.
+ */
+
+ current = entry;
+ while ((current != vm_map_to_entry(map)) &&
+ (current->vme_start < end)) {
+
+ if (current->is_sub_map) {
+ vm_map_unlock(map);
+ return(KERN_INVALID_ARGUMENT);
+ }
+ if ((new_prot & (VM_PROT_NOTIFY | current->max_protection))
+ != new_prot) {
+ vm_map_unlock(map);
+ return(KERN_PROTECTION_FAILURE);
+ }
+
+ current = current->vme_next;
+ }
+
+ /*
+ * Go back and fix up protections.
+ * [Note that clipping is not necessary the second time.]
+ */
+
+ current = entry;
+
+ while ((current != vm_map_to_entry(map)) &&
+ (current->vme_start < end)) {
+
+ vm_prot_t old_prot;
+
+ vm_map_clip_end(map, current, end);
+
+ old_prot = current->protection;
+ if (set_max)
+ current->protection =
+ (current->max_protection = new_prot) &
+ old_prot;
+ else
+ current->protection = new_prot;
+
+ /*
+ * Update physical map if necessary.
+ */
+
+ if (current->protection != old_prot) {
+ pmap_protect(map->pmap, current->vme_start,
+ current->vme_end,
+ current->protection);
+ }
+ current = current->vme_next;
+ }
+
+ vm_map_unlock(map);
+ return(KERN_SUCCESS);
+}
+
+/*
+ * vm_map_inherit:
+ *
+ * Sets the inheritance of the specified address
+ * range in the target map. Inheritance
+ * affects how the map will be shared with
+ * child maps at the time of vm_map_fork.
+ */
+kern_return_t vm_map_inherit(map, start, end, new_inheritance)
+ register vm_map_t map;
+ register vm_offset_t start;
+ register vm_offset_t end;
+ register vm_inherit_t new_inheritance;
+{
+ register vm_map_entry_t entry;
+ vm_map_entry_t temp_entry;
+
+ vm_map_lock(map);
+
+ VM_MAP_RANGE_CHECK(map, start, end);
+
+ if (vm_map_lookup_entry(map, start, &temp_entry)) {
+ entry = temp_entry;
+ vm_map_clip_start(map, entry, start);
+ }
+ else
+ entry = temp_entry->vme_next;
+
+ while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
+ vm_map_clip_end(map, entry, end);
+
+ entry->inheritance = new_inheritance;
+
+ entry = entry->vme_next;
+ }
+
+ vm_map_unlock(map);
+ return(KERN_SUCCESS);
+}
+
+/*
+ * vm_map_pageable_common:
+ *
+ * Sets the pageability of the specified address
+ * range in the target map. Regions specified
+ * as not pageable require locked-down physical
+ * memory and physical page maps. access_type indicates
+ * types of accesses that must not generate page faults.
+ * This is checked against protection of memory being locked-down.
+ * access_type of VM_PROT_NONE makes memory pageable.
+ *
+ * The map must not be locked, but a reference
+ * must remain to the map throughout the call.
+ *
+ * Callers should use macros in vm/vm_map.h (i.e. vm_map_pageable,
+ * or vm_map_pageable_user); don't call vm_map_pageable directly.
+ */
+kern_return_t vm_map_pageable_common(map, start, end, access_type, user_wire)
+ register vm_map_t map;
+ register vm_offset_t start;
+ register vm_offset_t end;
+ register vm_prot_t access_type;
+ boolean_t user_wire;
+{
+ register vm_map_entry_t entry;
+ vm_map_entry_t start_entry;
+
+ vm_map_lock(map);
+
+ VM_MAP_RANGE_CHECK(map, start, end);
+
+ if (vm_map_lookup_entry(map, start, &start_entry)) {
+ entry = start_entry;
+ /*
+ * vm_map_clip_start will be done later.
+ */
+ }
+ else {
+ /*
+ * Start address is not in map; this is fatal.
+ */
+ vm_map_unlock(map);
+ return(KERN_FAILURE);
+ }
+
+ /*
+ * Actions are rather different for wiring and unwiring,
+ * so we have two separate cases.
+ */
+
+ if (access_type == VM_PROT_NONE) {
+
+ vm_map_clip_start(map, entry, start);
+
+ /*
+ * Unwiring. First ensure that the range to be
+ * unwired is really wired down.
+ */
+ while ((entry != vm_map_to_entry(map)) &&
+ (entry->vme_start < end)) {
+
+ if ((entry->wired_count == 0) ||
+ ((entry->vme_end < end) &&
+ ((entry->vme_next == vm_map_to_entry(map)) ||
+ (entry->vme_next->vme_start > entry->vme_end))) ||
+ (user_wire && (entry->user_wired_count == 0))) {
+ vm_map_unlock(map);
+ return(KERN_INVALID_ARGUMENT);
+ }
+ entry = entry->vme_next;
+ }
+
+ /*
+ * Now decrement the wiring count for each region.
+ * If a region becomes completely unwired,
+ * unwire its physical pages and mappings.
+ */
+ entry = start_entry;
+ while ((entry != vm_map_to_entry(map)) &&
+ (entry->vme_start < end)) {
+ vm_map_clip_end(map, entry, end);
+
+ if (user_wire) {
+ if (--(entry->user_wired_count) == 0)
+ entry->wired_count--;
+ }
+ else {
+ entry->wired_count--;
+ }
+
+ if (entry->wired_count == 0)
+ vm_fault_unwire(map, entry);
+
+ entry = entry->vme_next;
+ }
+ }
+
+ else {
+ /*
+ * Wiring. We must do this in two passes:
+ *
+ * 1. Holding the write lock, we create any shadow
+ * or zero-fill objects that need to be created.
+ * Then we clip each map entry to the region to be
+ * wired and increment its wiring count. We
+ * create objects before clipping the map entries
+ * to avoid object proliferation.
+ *
+ * 2. We downgrade to a read lock, and call
+ * vm_fault_wire to fault in the pages for any
+ * newly wired area (wired_count is 1).
+ *
+ * Downgrading to a read lock for vm_fault_wire avoids
+ * a possible deadlock with another thread that may have
+ * faulted on one of the pages to be wired (it would mark
+ * the page busy, blocking us, then in turn block on the
+ * map lock that we hold). Because of problems in the
+ * recursive lock package, we cannot upgrade to a write
+ * lock in vm_map_lookup. Thus, any actions that require
+ * the write lock must be done beforehand. Because we
+ * keep the read lock on the map, the copy-on-write
+ * status of the entries we modify here cannot change.
+ */
+
+ /*
+ * Pass 1.
+ */
+ while ((entry != vm_map_to_entry(map)) &&
+ (entry->vme_start < end)) {
+ vm_map_clip_end(map, entry, end);
+
+ if (entry->wired_count == 0) {
+
+ /*
+ * Perform actions of vm_map_lookup that need
+ * the write lock on the map: create a shadow
+ * object for a copy-on-write region, or an
+ * object for a zero-fill region.
+ */
+ if (entry->needs_copy &&
+ ((entry->protection & VM_PROT_WRITE) != 0)) {
+
+ vm_object_shadow(&entry->object.vm_object,
+ &entry->offset,
+ (vm_size_t)(entry->vme_end
+ - entry->vme_start));
+ entry->needs_copy = FALSE;
+ }
+ if (entry->object.vm_object == VM_OBJECT_NULL) {
+ entry->object.vm_object =
+ vm_object_allocate(
+ (vm_size_t)(entry->vme_end
+ - entry->vme_start));
+ entry->offset = (vm_offset_t)0;
+ }
+ }
+ vm_map_clip_start(map, entry, start);
+ vm_map_clip_end(map, entry, end);
+
+ if (user_wire) {
+ if ((entry->user_wired_count)++ == 0)
+ entry->wired_count++;
+ }
+ else {
+ entry->wired_count++;
+ }
+
+ /*
+ * Check for holes and protection mismatch.
+ * Holes: Next entry should be contiguous unless
+ * this is the end of the region.
+ * Protection: Access requested must be allowed.
+ */
+ if (((entry->vme_end < end) &&
+ ((entry->vme_next == vm_map_to_entry(map)) ||
+ (entry->vme_next->vme_start > entry->vme_end))) ||
+ ((entry->protection & access_type) != access_type)) {
+ /*
+ * Found a hole or protection problem.
+ * Object creation actions
+ * do not need to be undone, but the
+ * wired counts need to be restored.
+ */
+ while ((entry != vm_map_to_entry(map)) &&
+ (entry->vme_end > start)) {
+ if (user_wire) {
+ if (--(entry->user_wired_count) == 0)
+ entry->wired_count--;
+ }
+ else {
+ entry->wired_count--;
+ }
+
+ entry = entry->vme_prev;
+ }
+
+ vm_map_unlock(map);
+ return(KERN_FAILURE);
+ }
+ entry = entry->vme_next;
+ }
+
+ /*
+ * Pass 2.
+ */
+
+ /*
+ * HACK HACK HACK HACK
+ *
+ * If we are wiring in the kernel map or a submap of it,
+ * unlock the map to avoid deadlocks. We trust that the
+ * kernel threads are well-behaved, and therefore will
+ * not do anything destructive to this region of the map
+ * while we have it unlocked. We cannot trust user threads
+ * to do the same.
+ *
+ * HACK HACK HACK HACK
+ */
+ if (vm_map_pmap(map) == kernel_pmap) {
+ vm_map_unlock(map); /* trust me ... */
+ }
+ else {
+ vm_map_lock_set_recursive(map);
+ vm_map_lock_write_to_read(map);
+ }
+
+ entry = start_entry;
+ while (entry != vm_map_to_entry(map) &&
+ entry->vme_start < end) {
+ /*
+ * Wiring cases:
+ * Kernel: wired == 1 && user_wired == 0
+ * User: wired == 1 && user_wired == 1
+ *
+ * Don't need to wire if either is > 1. wired = 0 &&
+ * user_wired == 1 can't happen.
+ */
+
+ /*
+ * XXX This assumes that the faults always succeed.
+ */
+ if ((entry->wired_count == 1) &&
+ (entry->user_wired_count <= 1)) {
+ vm_fault_wire(map, entry);
+ }
+ entry = entry->vme_next;
+ }
+
+ if (vm_map_pmap(map) == kernel_pmap) {
+ vm_map_lock(map);
+ }
+ else {
+ vm_map_lock_clear_recursive(map);
+ }
+ }
+
+ vm_map_unlock(map);
+
+ return(KERN_SUCCESS);
+}
+
+/*
+ * vm_map_entry_delete: [ internal use only ]
+ *
+ * Deallocate the given entry from the target map.
+ */
+void vm_map_entry_delete(map, entry)
+ register vm_map_t map;
+ register vm_map_entry_t entry;
+{
+ register vm_offset_t s, e;
+ register vm_object_t object;
+ extern vm_object_t kernel_object;
+
+ s = entry->vme_start;
+ e = entry->vme_end;
+
+ /*Check if projected buffer*/
+ if (map != kernel_map && entry->projected_on != 0) {
+ /*Check if projected kernel entry is persistent;
+ may only manipulate directly if it is*/
+ if (entry->projected_on->projected_on == 0)
+ entry->wired_count = 0; /*Avoid unwire fault*/
+ else
+ return;
+ }
+
+ /*
+ * Get the object. Null objects cannot have pmap entries.
+ */
+
+ if ((object = entry->object.vm_object) != VM_OBJECT_NULL) {
+
+ /*
+ * Unwire before removing addresses from the pmap;
+ * otherwise, unwiring will put the entries back in
+ * the pmap.
+ */
+
+ if (entry->wired_count != 0) {
+ vm_fault_unwire(map, entry);
+ entry->wired_count = 0;
+ entry->user_wired_count = 0;
+ }
+
+ /*
+ * If the object is shared, we must remove
+ * *all* references to this data, since we can't
+ * find all of the physical maps which are sharing
+ * it.
+ */
+
+ if (object == kernel_object) {
+ vm_object_lock(object);
+ vm_object_page_remove(object, entry->offset,
+ entry->offset + (e - s));
+ vm_object_unlock(object);
+ } else if (entry->is_shared) {
+ vm_object_pmap_remove(object,
+ entry->offset,
+ entry->offset + (e - s));
+ }
+ else {
+ pmap_remove(map->pmap, s, e);
+ }
+ }
+
+ /*
+ * Deallocate the object only after removing all
+ * pmap entries pointing to its pages.
+ */
+
+ if (entry->is_sub_map)
+ vm_map_deallocate(entry->object.sub_map);
+ else
+ vm_object_deallocate(entry->object.vm_object);
+
+ vm_map_entry_unlink(map, entry);
+ map->size -= e - s;
+
+ vm_map_entry_dispose(map, entry);
+}
+
+/*
+ * vm_map_delete: [ internal use only ]
+ *
+ * Deallocates the given address range from the target
+ * map.
+ */
+
+kern_return_t vm_map_delete(map, start, end)
+ register vm_map_t map;
+ register vm_offset_t start;
+ register vm_offset_t end;
+{
+ vm_map_entry_t entry;
+ vm_map_entry_t first_entry;
+
+ /*
+ * Find the start of the region, and clip it
+ */
+
+ if (!vm_map_lookup_entry(map, start, &first_entry))
+ entry = first_entry->vme_next;
+ else {
+ entry = first_entry;
+#if NORMA_IPC_xxx
+ /*
+ * XXX Had to disable this code because:
+
+ _vm_map_delete(c0804b78,c2198000,c219a000,0,c219a000)+df
+ [vm/vm_map.c:2007]
+ _vm_map_remove(c0804b78,c2198000,c219a000,c0817834,
+ c081786c)+42 [vm/vm_map.c:2094]
+ _kmem_io_map_deallocate(c0804b78,c2198000,2000,c0817834,
+ c081786c)+43 [vm/vm_kern.c:818]
+ _device_write_dealloc(c081786c)+117 [device/ds_routines.c:814]
+ _ds_write_done(c081786c,0)+2e [device/ds_routines.c:848]
+ _io_done_thread_continue(c08150c0,c21d4e14,c21d4e30,c08150c0,
+ c080c114)+14 [device/ds_routines.c:1350]
+
+ */
+ if (start > entry->vme_start
+ && end == entry->vme_end
+ && ! entry->wired_count /* XXX ??? */
+ && ! entry->is_shared
+ && ! entry->projected_on
+ && ! entry->is_sub_map) {
+ extern vm_object_t kernel_object;
+ register vm_object_t object = entry->object.vm_object;
+
+ /*
+ * The region to be deleted lives at the end
+ * of this entry, and thus all we have to do is
+ * truncate the entry.
+ *
+ * This special case is necessary if we want
+ * coalescing to do us any good.
+ *
+ * XXX Do we have to adjust object size?
+ */
+ if (object == kernel_object) {
+ vm_object_lock(object);
+ vm_object_page_remove(object,
+ entry->offset + start,
+ entry->offset +
+ (end - start));
+ vm_object_unlock(object);
+ } else if (entry->is_shared) {
+ vm_object_pmap_remove(object,
+ entry->offset + start,
+ entry->offset +
+ (end - start));
+ } else {
+ pmap_remove(map->pmap, start, end);
+ }
+ object->size -= (end - start); /* XXX */
+
+ entry->vme_end = start;
+ map->size -= (end - start);
+
+ if (map->wait_for_space) {
+ thread_wakeup((event_t) map);
+ }
+ return KERN_SUCCESS;
+ }
+#endif NORMA_IPC
+ vm_map_clip_start(map, entry, start);
+
+ /*
+ * Fix the lookup hint now, rather than each
+ * time though the loop.
+ */
+
+ SAVE_HINT(map, entry->vme_prev);
+ }
+
+ /*
+ * Save the free space hint
+ */
+
+ if (map->first_free->vme_start >= start)
+ map->first_free = entry->vme_prev;
+
+ /*
+ * Step through all entries in this region
+ */
+
+ while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
+ vm_map_entry_t next;
+
+ vm_map_clip_end(map, entry, end);
+
+ /*
+ * If the entry is in transition, we must wait
+ * for it to exit that state. It could be clipped
+ * while we leave the map unlocked.
+ */
+ if(entry->in_transition) {
+ /*
+ * Say that we are waiting, and wait for entry.
+ */
+ entry->needs_wakeup = TRUE;
+ vm_map_entry_wait(map, FALSE);
+ vm_map_lock(map);
+
+ /*
+ * The entry could have been clipped or it
+ * may not exist anymore. look it up again.
+ */
+ if(!vm_map_lookup_entry(map, start, &entry)) {
+ entry = entry->vme_next;
+ }
+ continue;
+ }
+
+ next = entry->vme_next;
+
+ vm_map_entry_delete(map, entry);
+ entry = next;
+ }
+
+ if (map->wait_for_space)
+ thread_wakeup((event_t) map);
+
+ return(KERN_SUCCESS);
+}
+
+/*
+ * vm_map_remove:
+ *
+ * Remove the given address range from the target map.
+ * This is the exported form of vm_map_delete.
+ */
+kern_return_t vm_map_remove(map, start, end)
+ register vm_map_t map;
+ register vm_offset_t start;
+ register vm_offset_t end;
+{
+ register kern_return_t result;
+
+ vm_map_lock(map);
+ VM_MAP_RANGE_CHECK(map, start, end);
+ result = vm_map_delete(map, start, end);
+ vm_map_unlock(map);
+
+ return(result);
+}
+
+
+/*
+ * vm_map_copy_steal_pages:
+ *
+ * Steal all the pages from a vm_map_copy page_list by copying ones
+ * that have not already been stolen.
+ */
+void
+vm_map_copy_steal_pages(copy)
+vm_map_copy_t copy;
+{
+ register vm_page_t m, new_m;
+ register int i;
+ vm_object_t object;
+
+ for (i = 0; i < copy->cpy_npages; i++) {
+
+ /*
+ * If the page is not tabled, then it's already stolen.
+ */
+ m = copy->cpy_page_list[i];
+ if (!m->tabled)
+ continue;
+
+ /*
+ * Page was not stolen, get a new
+ * one and do the copy now.
+ */
+ while ((new_m = vm_page_grab()) == VM_PAGE_NULL) {
+ VM_PAGE_WAIT((void(*)()) 0);
+ }
+
+ vm_page_copy(m, new_m);
+
+ object = m->object;
+ vm_object_lock(object);
+ vm_page_lock_queues();
+ if (!m->active && !m->inactive)
+ vm_page_activate(m);
+ vm_page_unlock_queues();
+ PAGE_WAKEUP_DONE(m);
+ vm_object_paging_end(object);
+ vm_object_unlock(object);
+
+ copy->cpy_page_list[i] = new_m;
+ }
+}
+
+/*
+ * vm_map_copy_page_discard:
+ *
+ * Get rid of the pages in a page_list copy. If the pages are
+ * stolen, they are freed. If the pages are not stolen, they
+ * are unbusied, and associated state is cleaned up.
+ */
+void vm_map_copy_page_discard(copy)
+vm_map_copy_t copy;
+{
+ while (copy->cpy_npages > 0) {
+ vm_page_t m;
+
+ if((m = copy->cpy_page_list[--(copy->cpy_npages)]) !=
+ VM_PAGE_NULL) {
+
+ /*
+ * If it's not in the table, then it's
+ * a stolen page that goes back
+ * to the free list. Else it belongs
+ * to some object, and we hold a
+ * paging reference on that object.
+ */
+ if (!m->tabled) {
+ VM_PAGE_FREE(m);
+ }
+ else {
+ vm_object_t object;
+
+ object = m->object;
+
+ vm_object_lock(object);
+ vm_page_lock_queues();
+ if (!m->active && !m->inactive)
+ vm_page_activate(m);
+ vm_page_unlock_queues();
+
+ PAGE_WAKEUP_DONE(m);
+ vm_object_paging_end(object);
+ vm_object_unlock(object);
+ }
+ }
+ }
+}
+
+/*
+ * Routine: vm_map_copy_discard
+ *
+ * Description:
+ * Dispose of a map copy object (returned by
+ * vm_map_copyin).
+ */
+void
+vm_map_copy_discard(copy)
+ vm_map_copy_t copy;
+{
+free_next_copy:
+ if (copy == VM_MAP_COPY_NULL)
+ return;
+
+ switch (copy->type) {
+ case VM_MAP_COPY_ENTRY_LIST:
+ while (vm_map_copy_first_entry(copy) !=
+ vm_map_copy_to_entry(copy)) {
+ vm_map_entry_t entry = vm_map_copy_first_entry(copy);
+
+ vm_map_copy_entry_unlink(copy, entry);
+ vm_object_deallocate(entry->object.vm_object);
+ vm_map_copy_entry_dispose(copy, entry);
+ }
+ break;
+ case VM_MAP_COPY_OBJECT:
+ vm_object_deallocate(copy->cpy_object);
+ break;
+ case VM_MAP_COPY_PAGE_LIST:
+
+ /*
+ * To clean this up, we have to unbusy all the pages
+ * and release the paging references in their objects.
+ */
+ if (copy->cpy_npages > 0)
+ vm_map_copy_page_discard(copy);
+
+ /*
+ * If there's a continuation, abort it. The
+ * abort routine releases any storage.
+ */
+ if (vm_map_copy_has_cont(copy)) {
+
+ /*
+ * Special case: recognize
+ * vm_map_copy_discard_cont and optimize
+ * here to avoid tail recursion.
+ */
+ if (copy->cpy_cont == vm_map_copy_discard_cont) {
+ register vm_map_copy_t new_copy;
+
+ new_copy = (vm_map_copy_t) copy->cpy_cont_args;
+ zfree(vm_map_copy_zone, (vm_offset_t) copy);
+ copy = new_copy;
+ goto free_next_copy;
+ }
+ else {
+ vm_map_copy_abort_cont(copy);
+ }
+ }
+
+ break;
+ }
+ zfree(vm_map_copy_zone, (vm_offset_t) copy);
+}
+
+/*
+ * Routine: vm_map_copy_copy
+ *
+ * Description:
+ * Move the information in a map copy object to
+ * a new map copy object, leaving the old one
+ * empty.
+ *
+ * This is used by kernel routines that need
+ * to look at out-of-line data (in copyin form)
+ * before deciding whether to return SUCCESS.
+ * If the routine returns FAILURE, the original
+ * copy object will be deallocated; therefore,
+ * these routines must make a copy of the copy
+ * object and leave the original empty so that
+ * deallocation will not fail.
+ */
+vm_map_copy_t
+vm_map_copy_copy(copy)
+ vm_map_copy_t copy;
+{
+ vm_map_copy_t new_copy;
+
+ if (copy == VM_MAP_COPY_NULL)
+ return VM_MAP_COPY_NULL;
+
+ /*
+ * Allocate a new copy object, and copy the information
+ * from the old one into it.
+ */
+
+ new_copy = (vm_map_copy_t) zalloc(vm_map_copy_zone);
+ *new_copy = *copy;
+
+ if (copy->type == VM_MAP_COPY_ENTRY_LIST) {
+ /*
+ * The links in the entry chain must be
+ * changed to point to the new copy object.
+ */
+ vm_map_copy_first_entry(copy)->vme_prev
+ = vm_map_copy_to_entry(new_copy);
+ vm_map_copy_last_entry(copy)->vme_next
+ = vm_map_copy_to_entry(new_copy);
+ }
+
+ /*
+ * Change the old copy object into one that contains
+ * nothing to be deallocated.
+ */
+ copy->type = VM_MAP_COPY_OBJECT;
+ copy->cpy_object = VM_OBJECT_NULL;
+
+ /*
+ * Return the new object.
+ */
+ return new_copy;
+}
+
+/*
+ * Routine: vm_map_copy_discard_cont
+ *
+ * Description:
+ * A version of vm_map_copy_discard that can be called
+ * as a continuation from a vm_map_copy page list.
+ */
+kern_return_t vm_map_copy_discard_cont(cont_args, copy_result)
+vm_map_copyin_args_t cont_args;
+vm_map_copy_t *copy_result; /* OUT */
+{
+ vm_map_copy_discard((vm_map_copy_t) cont_args);
+ if (copy_result != (vm_map_copy_t *)0)
+ *copy_result = VM_MAP_COPY_NULL;
+ return(KERN_SUCCESS);
+}
+
+/*
+ * Routine: vm_map_copy_overwrite
+ *
+ * Description:
+ * Copy the memory described by the map copy
+ * object (copy; returned by vm_map_copyin) onto
+ * the specified destination region (dst_map, dst_addr).
+ * The destination must be writeable.
+ *
+ * Unlike vm_map_copyout, this routine actually
+ * writes over previously-mapped memory. If the
+ * previous mapping was to a permanent (user-supplied)
+ * memory object, it is preserved.
+ *
+ * The attributes (protection and inheritance) of the
+ * destination region are preserved.
+ *
+ * If successful, consumes the copy object.
+ * Otherwise, the caller is responsible for it.
+ *
+ * Implementation notes:
+ * To overwrite temporary virtual memory, it is
+ * sufficient to remove the previous mapping and insert
+ * the new copy. This replacement is done either on
+ * the whole region (if no permanent virtual memory
+ * objects are embedded in the destination region) or
+ * in individual map entries.
+ *
+ * To overwrite permanent virtual memory, it is
+ * necessary to copy each page, as the external
+ * memory management interface currently does not
+ * provide any optimizations.
+ *
+ * Once a page of permanent memory has been overwritten,
+ * it is impossible to interrupt this function; otherwise,
+ * the call would be neither atomic nor location-independent.
+ * The kernel-state portion of a user thread must be
+ * interruptible.
+ *
+ * It may be expensive to forward all requests that might
+ * overwrite permanent memory (vm_write, vm_copy) to
+ * uninterruptible kernel threads. This routine may be
+ * called by interruptible threads; however, success is
+ * not guaranteed -- if the request cannot be performed
+ * atomically and interruptibly, an error indication is
+ * returned.
+ */
+kern_return_t vm_map_copy_overwrite(dst_map, dst_addr, copy, interruptible)
+ vm_map_t dst_map;
+ vm_offset_t dst_addr;
+ vm_map_copy_t copy;
+ boolean_t interruptible;
+{
+ vm_size_t size;
+ vm_offset_t start;
+ vm_map_entry_t tmp_entry;
+ vm_map_entry_t entry;
+
+ boolean_t contains_permanent_objects = FALSE;
+
+ interruptible = FALSE; /* XXX */
+
+ /*
+ * Check for null copy object.
+ */
+
+ if (copy == VM_MAP_COPY_NULL)
+ return(KERN_SUCCESS);
+
+ /*
+ * Only works for entry lists at the moment. Will
+ * support page lists LATER.
+ */
+
+#if NORMA_IPC
+ vm_map_convert_from_page_list(copy);
+#else
+ assert(copy->type == VM_MAP_COPY_ENTRY_LIST);
+#endif
+
+ /*
+ * Currently this routine only handles page-aligned
+ * regions. Eventually, it should handle misalignments
+ * by actually copying pages.
+ */
+
+ if (!page_aligned(copy->offset) ||
+ !page_aligned(copy->size) ||
+ !page_aligned(dst_addr))
+ return(KERN_INVALID_ARGUMENT);
+
+ size = copy->size;
+
+ if (size == 0) {
+ vm_map_copy_discard(copy);
+ return(KERN_SUCCESS);
+ }
+
+ /*
+ * Verify that the destination is all writeable
+ * initially.
+ */
+start_pass_1:
+ vm_map_lock(dst_map);
+ if (!vm_map_lookup_entry(dst_map, dst_addr, &tmp_entry)) {
+ vm_map_unlock(dst_map);
+ return(KERN_INVALID_ADDRESS);
+ }
+ vm_map_clip_start(dst_map, tmp_entry, dst_addr);
+ for (entry = tmp_entry;;) {
+ vm_size_t sub_size = (entry->vme_end - entry->vme_start);
+ vm_map_entry_t next = entry->vme_next;
+
+ if ( ! (entry->protection & VM_PROT_WRITE)) {
+ vm_map_unlock(dst_map);
+ return(KERN_PROTECTION_FAILURE);
+ }
+
+ /*
+ * If the entry is in transition, we must wait
+ * for it to exit that state. Anything could happen
+ * when we unlock the map, so start over.
+ */
+ if (entry->in_transition) {
+
+ /*
+ * Say that we are waiting, and wait for entry.
+ */
+ entry->needs_wakeup = TRUE;
+ vm_map_entry_wait(dst_map, FALSE);
+
+ goto start_pass_1;
+ }
+
+ if (size <= sub_size)
+ break;
+
+ if ((next == vm_map_to_entry(dst_map)) ||
+ (next->vme_start != entry->vme_end)) {
+ vm_map_unlock(dst_map);
+ return(KERN_INVALID_ADDRESS);
+ }
+
+
+ /*
+ * Check for permanent objects in the destination.
+ */
+
+ if ((entry->object.vm_object != VM_OBJECT_NULL) &&
+ !entry->object.vm_object->temporary)
+ contains_permanent_objects = TRUE;
+
+ size -= sub_size;
+ entry = next;
+ }
+
+ /*
+ * If there are permanent objects in the destination, then
+ * the copy cannot be interrupted.
+ */
+
+ if (interruptible && contains_permanent_objects)
+ return(KERN_FAILURE); /* XXX */
+
+ /*
+ * XXXO If there are no permanent objects in the destination,
+ * XXXO and the source and destination map entry zones match,
+ * XXXO and the destination map entry is not shared,
+ * XXXO then the map entries can be deleted and replaced
+ * XXXO with those from the copy. The following code is the
+ * XXXO basic idea of what to do, but there are lots of annoying
+ * XXXO little details about getting protection and inheritance
+ * XXXO right. Should add protection, inheritance, and sharing checks
+ * XXXO to the above pass and make sure that no wiring is involved.
+ */
+/*
+ * if (!contains_permanent_objects &&
+ * copy->cpy_hdr.entries_pageable == dst_map->hdr.entries_pageable) {
+ *
+ * *
+ * * Run over copy and adjust entries. Steal code
+ * * from vm_map_copyout() to do this.
+ * *
+ *
+ * tmp_entry = tmp_entry->vme_prev;
+ * vm_map_delete(dst_map, dst_addr, dst_addr + copy->size);
+ * vm_map_copy_insert(dst_map, tmp_entry, copy);
+ *
+ * vm_map_unlock(dst_map);
+ * vm_map_copy_discard(copy);
+ * }
+ */
+ /*
+ *
+ * Make a second pass, overwriting the data
+ * At the beginning of each loop iteration,
+ * the next entry to be overwritten is "tmp_entry"
+ * (initially, the value returned from the lookup above),
+ * and the starting address expected in that entry
+ * is "start".
+ */
+
+ start = dst_addr;
+
+ while (vm_map_copy_first_entry(copy) != vm_map_copy_to_entry(copy)) {
+ vm_map_entry_t copy_entry = vm_map_copy_first_entry(copy);
+ vm_size_t copy_size = (copy_entry->vme_end - copy_entry->vme_start);
+ vm_object_t object;
+
+ entry = tmp_entry;
+ size = (entry->vme_end - entry->vme_start);
+ /*
+ * Make sure that no holes popped up in the
+ * address map, and that the protection is
+ * still valid, in case the map was unlocked
+ * earlier.
+ */
+
+ if (entry->vme_start != start) {
+ vm_map_unlock(dst_map);
+ return(KERN_INVALID_ADDRESS);
+ }
+ assert(entry != vm_map_to_entry(dst_map));
+
+ /*
+ * Check protection again
+ */
+
+ if ( ! (entry->protection & VM_PROT_WRITE)) {
+ vm_map_unlock(dst_map);
+ return(KERN_PROTECTION_FAILURE);
+ }
+
+ /*
+ * Adjust to source size first
+ */
+
+ if (copy_size < size) {
+ vm_map_clip_end(dst_map, entry, entry->vme_start + copy_size);
+ size = copy_size;
+ }
+
+ /*
+ * Adjust to destination size
+ */
+
+ if (size < copy_size) {
+ vm_map_copy_clip_end(copy, copy_entry,
+ copy_entry->vme_start + size);
+ copy_size = size;
+ }
+
+ assert((entry->vme_end - entry->vme_start) == size);
+ assert((tmp_entry->vme_end - tmp_entry->vme_start) == size);
+ assert((copy_entry->vme_end - copy_entry->vme_start) == size);
+
+ /*
+ * If the destination contains temporary unshared memory,
+ * we can perform the copy by throwing it away and
+ * installing the source data.
+ */
+
+ object = entry->object.vm_object;
+ if (!entry->is_shared &&
+ ((object == VM_OBJECT_NULL) || object->temporary)) {
+ vm_object_t old_object = entry->object.vm_object;
+ vm_offset_t old_offset = entry->offset;
+
+ entry->object = copy_entry->object;
+ entry->offset = copy_entry->offset;
+ entry->needs_copy = copy_entry->needs_copy;
+ entry->wired_count = 0;
+ entry->user_wired_count = 0;
+
+ vm_map_copy_entry_unlink(copy, copy_entry);
+ vm_map_copy_entry_dispose(copy, copy_entry);
+
+ vm_object_pmap_protect(
+ old_object,
+ old_offset,
+ size,
+ dst_map->pmap,
+ tmp_entry->vme_start,
+ VM_PROT_NONE);
+
+ vm_object_deallocate(old_object);
+
+ /*
+ * Set up for the next iteration. The map
+ * has not been unlocked, so the next
+ * address should be at the end of this
+ * entry, and the next map entry should be
+ * the one following it.
+ */
+
+ start = tmp_entry->vme_end;
+ tmp_entry = tmp_entry->vme_next;
+ } else {
+ vm_map_version_t version;
+ vm_object_t dst_object = entry->object.vm_object;
+ vm_offset_t dst_offset = entry->offset;
+ kern_return_t r;
+
+ /*
+ * Take an object reference, and record
+ * the map version information so that the
+ * map can be safely unlocked.
+ */
+
+ vm_object_reference(dst_object);
+
+ version.main_timestamp = dst_map->timestamp;
+
+ vm_map_unlock(dst_map);
+
+ /*
+ * Copy as much as possible in one pass
+ */
+
+ copy_size = size;
+ r = vm_fault_copy(
+ copy_entry->object.vm_object,
+ copy_entry->offset,
+ &copy_size,
+ dst_object,
+ dst_offset,
+ dst_map,
+ &version,
+ FALSE /* XXX interruptible */ );
+
+ /*
+ * Release the object reference
+ */
+
+ vm_object_deallocate(dst_object);
+
+ /*
+ * If a hard error occurred, return it now
+ */
+
+ if (r != KERN_SUCCESS)
+ return(r);
+
+ if (copy_size != 0) {
+ /*
+ * Dispose of the copied region
+ */
+
+ vm_map_copy_clip_end(copy, copy_entry,
+ copy_entry->vme_start + copy_size);
+ vm_map_copy_entry_unlink(copy, copy_entry);
+ vm_object_deallocate(copy_entry->object.vm_object);
+ vm_map_copy_entry_dispose(copy, copy_entry);
+ }
+
+ /*
+ * Pick up in the destination map where we left off.
+ *
+ * Use the version information to avoid a lookup
+ * in the normal case.
+ */
+
+ start += copy_size;
+ vm_map_lock(dst_map);
+ if ((version.main_timestamp + 1) == dst_map->timestamp) {
+ /* We can safely use saved tmp_entry value */
+
+ vm_map_clip_end(dst_map, tmp_entry, start);
+ tmp_entry = tmp_entry->vme_next;
+ } else {
+ /* Must do lookup of tmp_entry */
+
+ if (!vm_map_lookup_entry(dst_map, start, &tmp_entry)) {
+ vm_map_unlock(dst_map);
+ return(KERN_INVALID_ADDRESS);
+ }
+ vm_map_clip_start(dst_map, tmp_entry, start);
+ }
+ }
+
+ }
+ vm_map_unlock(dst_map);
+
+ /*
+ * Throw away the vm_map_copy object
+ */
+ vm_map_copy_discard(copy);
+
+ return(KERN_SUCCESS);
+}
+
+/*
+ * Macro: vm_map_copy_insert
+ *
+ * Description:
+ * Link a copy chain ("copy") into a map at the
+ * specified location (after "where").
+ * Side effects:
+ * The copy chain is destroyed.
+ * Warning:
+ * The arguments are evaluated multiple times.
+ */
+#define vm_map_copy_insert(map, where, copy) \
+ MACRO_BEGIN \
+ (((where)->vme_next)->vme_prev = vm_map_copy_last_entry(copy)) \
+ ->vme_next = ((where)->vme_next); \
+ ((where)->vme_next = vm_map_copy_first_entry(copy)) \
+ ->vme_prev = (where); \
+ (map)->hdr.nentries += (copy)->cpy_hdr.nentries; \
+ zfree(vm_map_copy_zone, (vm_offset_t) copy); \
+ MACRO_END
+
+/*
+ * Routine: vm_map_copyout
+ *
+ * Description:
+ * Copy out a copy chain ("copy") into newly-allocated
+ * space in the destination map.
+ *
+ * If successful, consumes the copy object.
+ * Otherwise, the caller is responsible for it.
+ */
+kern_return_t vm_map_copyout(dst_map, dst_addr, copy)
+ register
+ vm_map_t dst_map;
+ vm_offset_t *dst_addr; /* OUT */
+ register
+ vm_map_copy_t copy;
+{
+ vm_size_t size;
+ vm_size_t adjustment;
+ vm_offset_t start;
+ vm_offset_t vm_copy_start;
+ vm_map_entry_t last;
+ register
+ vm_map_entry_t entry;
+
+ /*
+ * Check for null copy object.
+ */
+
+ if (copy == VM_MAP_COPY_NULL) {
+ *dst_addr = 0;
+ return(KERN_SUCCESS);
+ }
+
+ /*
+ * Check for special copy object, created
+ * by vm_map_copyin_object.
+ */
+
+ if (copy->type == VM_MAP_COPY_OBJECT) {
+ vm_object_t object = copy->cpy_object;
+ vm_size_t offset = copy->offset;
+ vm_size_t tmp_size = copy->size;
+ kern_return_t kr;
+
+ *dst_addr = 0;
+ kr = vm_map_enter(dst_map, dst_addr, tmp_size,
+ (vm_offset_t) 0, TRUE,
+ object, offset, FALSE,
+ VM_PROT_DEFAULT, VM_PROT_ALL,
+ VM_INHERIT_DEFAULT);
+ if (kr != KERN_SUCCESS)
+ return(kr);
+ zfree(vm_map_copy_zone, (vm_offset_t) copy);
+ return(KERN_SUCCESS);
+ }
+
+ if (copy->type == VM_MAP_COPY_PAGE_LIST)
+ return(vm_map_copyout_page_list(dst_map, dst_addr, copy));
+
+ /*
+ * Find space for the data
+ */
+
+ vm_copy_start = trunc_page(copy->offset);
+ size = round_page(copy->offset + copy->size) - vm_copy_start;
+
+ StartAgain: ;
+
+ vm_map_lock(dst_map);
+ start = ((last = dst_map->first_free) == vm_map_to_entry(dst_map)) ?
+ vm_map_min(dst_map) : last->vme_end;
+
+ while (TRUE) {
+ vm_map_entry_t next = last->vme_next;
+ vm_offset_t end = start + size;
+
+ if ((end > dst_map->max_offset) || (end < start)) {
+ if (dst_map->wait_for_space) {
+ if (size <= (dst_map->max_offset - dst_map->min_offset)) {
+ assert_wait((event_t) dst_map, TRUE);
+ vm_map_unlock(dst_map);
+ thread_block((void (*)()) 0);
+ goto StartAgain;
+ }
+ }
+ vm_map_unlock(dst_map);
+ return(KERN_NO_SPACE);
+ }
+
+ if ((next == vm_map_to_entry(dst_map)) ||
+ (next->vme_start >= end))
+ break;
+
+ last = next;
+ start = last->vme_end;
+ }
+
+ /*
+ * Since we're going to just drop the map
+ * entries from the copy into the destination
+ * map, they must come from the same pool.
+ */
+
+ if (copy->cpy_hdr.entries_pageable != dst_map->hdr.entries_pageable) {
+ /*
+ * Mismatches occur when dealing with the default
+ * pager.
+ */
+ zone_t old_zone;
+ vm_map_entry_t next, new;
+
+ /*
+ * Find the zone that the copies were allocated from
+ */
+ old_zone = (copy->cpy_hdr.entries_pageable)
+ ? vm_map_entry_zone
+ : vm_map_kentry_zone;
+ entry = vm_map_copy_first_entry(copy);
+
+ /*
+ * Reinitialize the copy so that vm_map_copy_entry_link
+ * will work.
+ */
+ copy->cpy_hdr.nentries = 0;
+ copy->cpy_hdr.entries_pageable = dst_map->hdr.entries_pageable;
+ vm_map_copy_first_entry(copy) =
+ vm_map_copy_last_entry(copy) =
+ vm_map_copy_to_entry(copy);
+
+ /*
+ * Copy each entry.
+ */
+ while (entry != vm_map_copy_to_entry(copy)) {
+ new = vm_map_copy_entry_create(copy);
+ vm_map_entry_copy_full(new, entry);
+ vm_map_copy_entry_link(copy,
+ vm_map_copy_last_entry(copy),
+ new);
+ next = entry->vme_next;
+ zfree(old_zone, (vm_offset_t) entry);
+ entry = next;
+ }
+ }
+
+ /*
+ * Adjust the addresses in the copy chain, and
+ * reset the region attributes.
+ */
+
+ adjustment = start - vm_copy_start;
+ for (entry = vm_map_copy_first_entry(copy);
+ entry != vm_map_copy_to_entry(copy);
+ entry = entry->vme_next) {
+ entry->vme_start += adjustment;
+ entry->vme_end += adjustment;
+
+ entry->inheritance = VM_INHERIT_DEFAULT;
+ entry->protection = VM_PROT_DEFAULT;
+ entry->max_protection = VM_PROT_ALL;
+ entry->projected_on = 0;
+
+ /*
+ * If the entry is now wired,
+ * map the pages into the destination map.
+ */
+ if (entry->wired_count != 0) {
+ register vm_offset_t va;
+ vm_offset_t offset;
+ register vm_object_t object;
+
+ object = entry->object.vm_object;
+ offset = entry->offset;
+ va = entry->vme_start;
+
+ pmap_pageable(dst_map->pmap,
+ entry->vme_start,
+ entry->vme_end,
+ TRUE);
+
+ while (va < entry->vme_end) {
+ register vm_page_t m;
+
+ /*
+ * Look up the page in the object.
+ * Assert that the page will be found in the
+ * top object:
+ * either
+ * the object was newly created by
+ * vm_object_copy_slowly, and has
+ * copies of all of the pages from
+ * the source object
+ * or
+ * the object was moved from the old
+ * map entry; because the old map
+ * entry was wired, all of the pages
+ * were in the top-level object.
+ * (XXX not true if we wire pages for
+ * reading)
+ */
+ vm_object_lock(object);
+ vm_object_paging_begin(object);
+
+ m = vm_page_lookup(object, offset);
+ if (m == VM_PAGE_NULL || m->wire_count == 0 ||
+ m->absent)
+ panic("vm_map_copyout: wiring 0x%x", m);
+
+ m->busy = TRUE;
+ vm_object_unlock(object);
+
+ PMAP_ENTER(dst_map->pmap, va, m,
+ entry->protection, TRUE);
+
+ vm_object_lock(object);
+ PAGE_WAKEUP_DONE(m);
+ /* the page is wired, so we don't have to activate */
+ vm_object_paging_end(object);
+ vm_object_unlock(object);
+
+ offset += PAGE_SIZE;
+ va += PAGE_SIZE;
+ }
+ }
+
+
+ }
+
+ /*
+ * Correct the page alignment for the result
+ */
+
+ *dst_addr = start + (copy->offset - vm_copy_start);
+
+ /*
+ * Update the hints and the map size
+ */
+
+ if (dst_map->first_free == last)
+ dst_map->first_free = vm_map_copy_last_entry(copy);
+ SAVE_HINT(dst_map, vm_map_copy_last_entry(copy));
+
+ dst_map->size += size;
+
+ /*
+ * Link in the copy
+ */
+
+ vm_map_copy_insert(dst_map, last, copy);
+
+ vm_map_unlock(dst_map);
+
+ /*
+ * XXX If wiring_required, call vm_map_pageable
+ */
+
+ return(KERN_SUCCESS);
+}
+
+/*
+ *
+ * vm_map_copyout_page_list:
+ *
+ * Version of vm_map_copyout() for page list vm map copies.
+ *
+ */
+kern_return_t vm_map_copyout_page_list(dst_map, dst_addr, copy)
+ register
+ vm_map_t dst_map;
+ vm_offset_t *dst_addr; /* OUT */
+ register
+ vm_map_copy_t copy;
+{
+ vm_size_t size;
+ vm_offset_t start;
+ vm_offset_t end;
+ vm_offset_t offset;
+ vm_map_entry_t last;
+ register
+ vm_object_t object;
+ vm_page_t *page_list, m;
+ vm_map_entry_t entry;
+ vm_offset_t old_last_offset;
+ boolean_t cont_invoked, needs_wakeup = FALSE;
+ kern_return_t result = KERN_SUCCESS;
+ vm_map_copy_t orig_copy;
+ vm_offset_t dst_offset;
+ boolean_t must_wire;
+
+ /*
+ * Make sure the pages are stolen, because we are
+ * going to put them in a new object. Assume that
+ * all pages are identical to first in this regard.
+ */
+
+ page_list = &copy->cpy_page_list[0];
+ if ((*page_list)->tabled)
+ vm_map_copy_steal_pages(copy);
+
+ /*
+ * Find space for the data
+ */
+
+ size = round_page(copy->offset + copy->size) -
+ trunc_page(copy->offset);
+StartAgain:
+ vm_map_lock(dst_map);
+ must_wire = dst_map->wiring_required;
+
+ last = dst_map->first_free;
+ if (last == vm_map_to_entry(dst_map)) {
+ start = vm_map_min(dst_map);
+ } else {
+ start = last->vme_end;
+ }
+
+ while (TRUE) {
+ vm_map_entry_t next = last->vme_next;
+ end = start + size;
+
+ if ((end > dst_map->max_offset) || (end < start)) {
+ if (dst_map->wait_for_space) {
+ if (size <= (dst_map->max_offset -
+ dst_map->min_offset)) {
+ assert_wait((event_t) dst_map, TRUE);
+ vm_map_unlock(dst_map);
+ thread_block((void (*)()) 0);
+ goto StartAgain;
+ }
+ }
+ vm_map_unlock(dst_map);
+ return(KERN_NO_SPACE);
+ }
+
+ if ((next == vm_map_to_entry(dst_map)) ||
+ (next->vme_start >= end)) {
+ break;
+ }
+
+ last = next;
+ start = last->vme_end;
+ }
+
+ /*
+ * See whether we can avoid creating a new entry (and object) by
+ * extending one of our neighbors. [So far, we only attempt to
+ * extend from below.]
+ *
+ * The code path below here is a bit twisted. If any of the
+ * extension checks fails, we branch to create_object. If
+ * it all works, we fall out the bottom and goto insert_pages.
+ */
+ if (last == vm_map_to_entry(dst_map) ||
+ last->vme_end != start ||
+ last->is_shared != FALSE ||
+ last->is_sub_map != FALSE ||
+ last->inheritance != VM_INHERIT_DEFAULT ||
+ last->protection != VM_PROT_DEFAULT ||
+ last->max_protection != VM_PROT_ALL ||
+ (must_wire ? (last->wired_count != 1 ||
+ last->user_wired_count != 1) :
+ (last->wired_count != 0))) {
+ goto create_object;
+ }
+
+ /*
+ * If this entry needs an object, make one.
+ */
+ if (last->object.vm_object == VM_OBJECT_NULL) {
+ object = vm_object_allocate(
+ (vm_size_t)(last->vme_end - last->vme_start + size));
+ last->object.vm_object = object;
+ last->offset = 0;
+ vm_object_lock(object);
+ }
+ else {
+ vm_offset_t prev_offset = last->offset;
+ vm_size_t prev_size = start - last->vme_start;
+ vm_size_t new_size;
+
+ /*
+ * This is basically vm_object_coalesce.
+ */
+
+ object = last->object.vm_object;
+ vm_object_lock(object);
+
+ /*
+ * Try to collapse the object first
+ */
+ vm_object_collapse(object);
+
+ /*
+ * Can't coalesce if pages not mapped to
+ * last may be in use anyway:
+ * . more than one reference
+ * . paged out
+ * . shadows another object
+ * . has a copy elsewhere
+ * . paging references (pages might be in page-list)
+ */
+
+ if ((object->ref_count > 1) ||
+ object->pager_created ||
+ (object->shadow != VM_OBJECT_NULL) ||
+ (object->copy != VM_OBJECT_NULL) ||
+ (object->paging_in_progress != 0)) {
+ vm_object_unlock(object);
+ goto create_object;
+ }
+
+ /*
+ * Extend the object if necessary. Don't have to call
+ * vm_object_page_remove because the pages aren't mapped,
+ * and vm_page_replace will free up any old ones it encounters.
+ */
+ new_size = prev_offset + prev_size + size;
+ if (new_size > object->size)
+ object->size = new_size;
+ }
+
+ /*
+ * Coalesced the two objects - can extend
+ * the previous map entry to include the
+ * new range.
+ */
+ dst_map->size += size;
+ last->vme_end = end;
+
+ SAVE_HINT(dst_map, last);
+
+ goto insert_pages;
+
+create_object:
+
+ /*
+ * Create object
+ */
+ object = vm_object_allocate(size);
+
+ /*
+ * Create entry
+ */
+
+ entry = vm_map_entry_create(dst_map);
+
+ entry->object.vm_object = object;
+ entry->offset = 0;
+
+ entry->is_shared = FALSE;
+ entry->is_sub_map = FALSE;
+ entry->needs_copy = FALSE;
+
+ if (must_wire) {
+ entry->wired_count = 1;
+ entry->user_wired_count = 1;
+ } else {
+ entry->wired_count = 0;
+ entry->user_wired_count = 0;
+ }
+
+ entry->in_transition = TRUE;
+ entry->needs_wakeup = FALSE;
+
+ entry->vme_start = start;
+ entry->vme_end = start + size;
+
+ entry->inheritance = VM_INHERIT_DEFAULT;
+ entry->protection = VM_PROT_DEFAULT;
+ entry->max_protection = VM_PROT_ALL;
+ entry->projected_on = 0;
+
+ vm_object_lock(object);
+
+ /*
+ * Update the hints and the map size
+ */
+ if (dst_map->first_free == last) {
+ dst_map->first_free = entry;
+ }
+ SAVE_HINT(dst_map, entry);
+ dst_map->size += size;
+
+ /*
+ * Link in the entry
+ */
+ vm_map_entry_link(dst_map, last, entry);
+ last = entry;
+
+ /*
+ * Transfer pages into new object.
+ * Scan page list in vm_map_copy.
+ */
+insert_pages:
+ dst_offset = copy->offset & PAGE_MASK;
+ cont_invoked = FALSE;
+ orig_copy = copy;
+ last->in_transition = TRUE;
+ old_last_offset = last->offset
+ + (start - last->vme_start);
+
+ vm_page_lock_queues();
+
+ for (offset = 0; offset < size; offset += PAGE_SIZE) {
+ m = *page_list;
+ assert(m && !m->tabled);
+
+ /*
+ * Must clear busy bit in page before inserting it.
+ * Ok to skip wakeup logic because nobody else
+ * can possibly know about this page.
+ * The page is dirty in its new object.
+ */
+
+ assert(!m->wanted);
+
+ m->busy = FALSE;
+ m->dirty = TRUE;
+ vm_page_replace(m, object, old_last_offset + offset);
+ if (must_wire) {
+ vm_page_wire(m);
+ PMAP_ENTER(dst_map->pmap,
+ last->vme_start + m->offset - last->offset,
+ m, last->protection, TRUE);
+ } else {
+ vm_page_activate(m);
+ }
+
+ *page_list++ = VM_PAGE_NULL;
+ if (--(copy->cpy_npages) == 0 &&
+ vm_map_copy_has_cont(copy)) {
+ vm_map_copy_t new_copy;
+
+ /*
+ * Ok to unlock map because entry is
+ * marked in_transition.
+ */
+ cont_invoked = TRUE;
+ vm_page_unlock_queues();
+ vm_object_unlock(object);
+ vm_map_unlock(dst_map);
+ vm_map_copy_invoke_cont(copy, &new_copy, &result);
+
+ if (result == KERN_SUCCESS) {
+
+ /*
+ * If we got back a copy with real pages,
+ * steal them now. Either all of the
+ * pages in the list are tabled or none
+ * of them are; mixtures are not possible.
+ *
+ * Save original copy for consume on
+ * success logic at end of routine.
+ */
+ if (copy != orig_copy)
+ vm_map_copy_discard(copy);
+
+ if ((copy = new_copy) != VM_MAP_COPY_NULL) {
+ page_list = &copy->cpy_page_list[0];
+ if ((*page_list)->tabled)
+ vm_map_copy_steal_pages(copy);
+ }
+ }
+ else {
+ /*
+ * Continuation failed.
+ */
+ vm_map_lock(dst_map);
+ goto error;
+ }
+
+ vm_map_lock(dst_map);
+ vm_object_lock(object);
+ vm_page_lock_queues();
+ }
+ }
+
+ vm_page_unlock_queues();
+ vm_object_unlock(object);
+
+ *dst_addr = start + dst_offset;
+
+ /*
+ * Clear the in transition bits. This is easy if we
+ * didn't have a continuation.
+ */
+error:
+ if (!cont_invoked) {
+ /*
+ * We didn't unlock the map, so nobody could
+ * be waiting.
+ */
+ last->in_transition = FALSE;
+ assert(!last->needs_wakeup);
+ needs_wakeup = FALSE;
+ }
+ else {
+ if (!vm_map_lookup_entry(dst_map, start, &entry))
+ panic("vm_map_copyout_page_list: missing entry");
+
+ /*
+ * Clear transition bit for all constituent entries that
+ * were in the original entry. Also check for waiters.
+ */
+ while((entry != vm_map_to_entry(dst_map)) &&
+ (entry->vme_start < end)) {
+ assert(entry->in_transition);
+ entry->in_transition = FALSE;
+ if(entry->needs_wakeup) {
+ entry->needs_wakeup = FALSE;
+ needs_wakeup = TRUE;
+ }
+ entry = entry->vme_next;
+ }
+ }
+
+ if (result != KERN_SUCCESS)
+ vm_map_delete(dst_map, start, end);
+
+ vm_map_unlock(dst_map);
+
+ if (needs_wakeup)
+ vm_map_entry_wakeup(dst_map);
+
+ /*
+ * Consume on success logic.
+ */
+ if (copy != orig_copy) {
+ zfree(vm_map_copy_zone, (vm_offset_t) copy);
+ }
+ if (result == KERN_SUCCESS) {
+ zfree(vm_map_copy_zone, (vm_offset_t) orig_copy);
+ }
+
+ return(result);
+}
+
+/*
+ * Routine: vm_map_copyin
+ *
+ * Description:
+ * Copy the specified region (src_addr, len) from the
+ * source address space (src_map), possibly removing
+ * the region from the source address space (src_destroy).
+ *
+ * Returns:
+ * A vm_map_copy_t object (copy_result), suitable for
+ * insertion into another address space (using vm_map_copyout),
+ * copying over another address space region (using
+ * vm_map_copy_overwrite). If the copy is unused, it
+ * should be destroyed (using vm_map_copy_discard).
+ *
+ * In/out conditions:
+ * The source map should not be locked on entry.
+ */
+kern_return_t vm_map_copyin(src_map, src_addr, len, src_destroy, copy_result)
+ vm_map_t src_map;
+ vm_offset_t src_addr;
+ vm_size_t len;
+ boolean_t src_destroy;
+ vm_map_copy_t *copy_result; /* OUT */
+{
+ vm_map_entry_t tmp_entry; /* Result of last map lookup --
+ * in multi-level lookup, this
+ * entry contains the actual
+ * vm_object/offset.
+ */
+
+ vm_offset_t src_start; /* Start of current entry --
+ * where copy is taking place now
+ */
+ vm_offset_t src_end; /* End of entire region to be
+ * copied */
+
+ register
+ vm_map_copy_t copy; /* Resulting copy */
+
+ /*
+ * Check for copies of zero bytes.
+ */
+
+ if (len == 0) {
+ *copy_result = VM_MAP_COPY_NULL;
+ return(KERN_SUCCESS);
+ }
+
+ /*
+ * Compute start and end of region
+ */
+
+ src_start = trunc_page(src_addr);
+ src_end = round_page(src_addr + len);
+
+ /*
+ * Check that the end address doesn't overflow
+ */
+
+ if (src_end <= src_start)
+ if ((src_end < src_start) || (src_start != 0))
+ return(KERN_INVALID_ADDRESS);
+
+ /*
+ * Allocate a header element for the list.
+ *
+ * Use the start and end in the header to
+ * remember the endpoints prior to rounding.
+ */
+
+ copy = (vm_map_copy_t) zalloc(vm_map_copy_zone);
+ vm_map_copy_first_entry(copy) =
+ vm_map_copy_last_entry(copy) = vm_map_copy_to_entry(copy);
+ copy->type = VM_MAP_COPY_ENTRY_LIST;
+ copy->cpy_hdr.nentries = 0;
+ copy->cpy_hdr.entries_pageable = TRUE;
+
+ copy->offset = src_addr;
+ copy->size = len;
+
+#define RETURN(x) \
+ MACRO_BEGIN \
+ vm_map_unlock(src_map); \
+ vm_map_copy_discard(copy); \
+ MACRO_RETURN(x); \
+ MACRO_END
+
+ /*
+ * Find the beginning of the region.
+ */
+
+ vm_map_lock(src_map);
+
+ if (!vm_map_lookup_entry(src_map, src_start, &tmp_entry))
+ RETURN(KERN_INVALID_ADDRESS);
+ vm_map_clip_start(src_map, tmp_entry, src_start);
+
+ /*
+ * Go through entries until we get to the end.
+ */
+
+ while (TRUE) {
+ register
+ vm_map_entry_t src_entry = tmp_entry; /* Top-level entry */
+ vm_size_t src_size; /* Size of source
+ * map entry (in both
+ * maps)
+ */
+
+ register
+ vm_object_t src_object; /* Object to copy */
+ vm_offset_t src_offset;
+
+ boolean_t src_needs_copy; /* Should source map
+ * be made read-only
+ * for copy-on-write?
+ */
+
+ register
+ vm_map_entry_t new_entry; /* Map entry for copy */
+ boolean_t new_entry_needs_copy; /* Will new entry be COW? */
+
+ boolean_t was_wired; /* Was source wired? */
+ vm_map_version_t version; /* Version before locks
+ * dropped to make copy
+ */
+
+ /*
+ * Verify that the region can be read.
+ */
+
+ if (! (src_entry->protection & VM_PROT_READ))
+ RETURN(KERN_PROTECTION_FAILURE);
+
+ /*
+ * Clip against the endpoints of the entire region.
+ */
+
+ vm_map_clip_end(src_map, src_entry, src_end);
+
+ src_size = src_entry->vme_end - src_start;
+ src_object = src_entry->object.vm_object;
+ src_offset = src_entry->offset;
+ was_wired = (src_entry->wired_count != 0);
+
+ /*
+ * Create a new address map entry to
+ * hold the result. Fill in the fields from
+ * the appropriate source entries.
+ */
+
+ new_entry = vm_map_copy_entry_create(copy);
+ vm_map_entry_copy(new_entry, src_entry);
+
+ /*
+ * Attempt non-blocking copy-on-write optimizations.
+ */
+
+ if (src_destroy &&
+ (src_object == VM_OBJECT_NULL ||
+ (src_object->temporary && !src_object->use_shared_copy)))
+ {
+ /*
+ * If we are destroying the source, and the object
+ * is temporary, and not shared writable,
+ * we can move the object reference
+ * from the source to the copy. The copy is
+ * copy-on-write only if the source is.
+ * We make another reference to the object, because
+ * destroying the source entry will deallocate it.
+ */
+ vm_object_reference(src_object);
+
+ /*
+ * Copy is always unwired. vm_map_copy_entry
+ * set its wired count to zero.
+ */
+
+ goto CopySuccessful;
+ }
+
+ if (!was_wired &&
+ vm_object_copy_temporary(
+ &new_entry->object.vm_object,
+ &new_entry->offset,
+ &src_needs_copy,
+ &new_entry_needs_copy)) {
+
+ new_entry->needs_copy = new_entry_needs_copy;
+
+ /*
+ * Handle copy-on-write obligations
+ */
+
+ if (src_needs_copy && !tmp_entry->needs_copy) {
+ vm_object_pmap_protect(
+ src_object,
+ src_offset,
+ src_size,
+ (src_entry->is_shared ? PMAP_NULL
+ : src_map->pmap),
+ src_entry->vme_start,
+ src_entry->protection &
+ ~VM_PROT_WRITE);
+
+ tmp_entry->needs_copy = TRUE;
+ }
+
+ /*
+ * The map has never been unlocked, so it's safe to
+ * move to the next entry rather than doing another
+ * lookup.
+ */
+
+ goto CopySuccessful;
+ }
+
+ new_entry->needs_copy = FALSE;
+
+ /*
+ * Take an object reference, so that we may
+ * release the map lock(s).
+ */
+
+ assert(src_object != VM_OBJECT_NULL);
+ vm_object_reference(src_object);
+
+ /*
+ * Record the timestamp for later verification.
+ * Unlock the map.
+ */
+
+ version.main_timestamp = src_map->timestamp;
+ vm_map_unlock(src_map);
+
+ /*
+ * Perform the copy
+ */
+
+ if (was_wired) {
+ vm_object_lock(src_object);
+ (void) vm_object_copy_slowly(
+ src_object,
+ src_offset,
+ src_size,
+ FALSE,
+ &new_entry->object.vm_object);
+ new_entry->offset = 0;
+ new_entry->needs_copy = FALSE;
+ } else {
+ kern_return_t result;
+
+ result = vm_object_copy_strategically(src_object,
+ src_offset,
+ src_size,
+ &new_entry->object.vm_object,
+ &new_entry->offset,
+ &new_entry_needs_copy);
+
+ new_entry->needs_copy = new_entry_needs_copy;
+
+
+ if (result != KERN_SUCCESS) {
+ vm_map_copy_entry_dispose(copy, new_entry);
+
+ vm_map_lock(src_map);
+ RETURN(result);
+ }
+
+ }
+
+ /*
+ * Throw away the extra reference
+ */
+
+ vm_object_deallocate(src_object);
+
+ /*
+ * Verify that the map has not substantially
+ * changed while the copy was being made.
+ */
+
+ vm_map_lock(src_map); /* Increments timestamp once! */
+
+ if ((version.main_timestamp + 1) == src_map->timestamp)
+ goto CopySuccessful;
+
+ /*
+ * Simple version comparison failed.
+ *
+ * Retry the lookup and verify that the
+ * same object/offset are still present.
+ *
+ * [Note: a memory manager that colludes with
+ * the calling task can detect that we have
+ * cheated. While the map was unlocked, the
+ * mapping could have been changed and restored.]
+ */
+
+ if (!vm_map_lookup_entry(src_map, src_start, &tmp_entry)) {
+ vm_map_copy_entry_dispose(copy, new_entry);
+ RETURN(KERN_INVALID_ADDRESS);
+ }
+
+ src_entry = tmp_entry;
+ vm_map_clip_start(src_map, src_entry, src_start);
+
+ if ((src_entry->protection & VM_PROT_READ) == VM_PROT_NONE)
+ goto VerificationFailed;
+
+ if (src_entry->vme_end < new_entry->vme_end)
+ src_size = (new_entry->vme_end = src_entry->vme_end) - src_start;
+
+ if ((src_entry->object.vm_object != src_object) ||
+ (src_entry->offset != src_offset) ) {
+
+ /*
+ * Verification failed.
+ *
+ * Start over with this top-level entry.
+ */
+
+ VerificationFailed: ;
+
+ vm_object_deallocate(new_entry->object.vm_object);
+ vm_map_copy_entry_dispose(copy, new_entry);
+ tmp_entry = src_entry;
+ continue;
+ }
+
+ /*
+ * Verification succeeded.
+ */
+
+ CopySuccessful: ;
+
+ /*
+ * Link in the new copy entry.
+ */
+
+ vm_map_copy_entry_link(copy, vm_map_copy_last_entry(copy),
+ new_entry);
+
+ /*
+ * Determine whether the entire region
+ * has been copied.
+ */
+ src_start = new_entry->vme_end;
+ if ((src_start >= src_end) && (src_end != 0))
+ break;
+
+ /*
+ * Verify that there are no gaps in the region
+ */
+
+ tmp_entry = src_entry->vme_next;
+ if (tmp_entry->vme_start != src_start)
+ RETURN(KERN_INVALID_ADDRESS);
+ }
+
+ /*
+ * If the source should be destroyed, do it now, since the
+ * copy was successful.
+ */
+ if (src_destroy)
+ (void) vm_map_delete(src_map, trunc_page(src_addr), src_end);
+
+ vm_map_unlock(src_map);
+
+ *copy_result = copy;
+ return(KERN_SUCCESS);
+
+#undef RETURN
+}
+
+/*
+ * vm_map_copyin_object:
+ *
+ * Create a copy object from an object.
+ * Our caller donates an object reference.
+ */
+
+kern_return_t vm_map_copyin_object(object, offset, size, copy_result)
+ vm_object_t object;
+ vm_offset_t offset; /* offset of region in object */
+ vm_size_t size; /* size of region in object */
+ vm_map_copy_t *copy_result; /* OUT */
+{
+ vm_map_copy_t copy; /* Resulting copy */
+
+ /*
+ * We drop the object into a special copy object
+ * that contains the object directly. These copy objects
+ * are distinguished by entries_pageable == FALSE
+ * and null links.
+ */
+
+ copy = (vm_map_copy_t) zalloc(vm_map_copy_zone);
+ vm_map_copy_first_entry(copy) =
+ vm_map_copy_last_entry(copy) = VM_MAP_ENTRY_NULL;
+ copy->type = VM_MAP_COPY_OBJECT;
+ copy->cpy_object = object;
+ copy->offset = offset;
+ copy->size = size;
+
+ *copy_result = copy;
+ return(KERN_SUCCESS);
+}
+
+/*
+ * vm_map_copyin_page_list_cont:
+ *
+ * Continuation routine for vm_map_copyin_page_list.
+ *
+ * If vm_map_copyin_page_list can't fit the entire vm range
+ * into a single page list object, it creates a continuation.
+ * When the target of the operation has used the pages in the
+ * initial page list, it invokes the continuation, which calls
+ * this routine. If an error happens, the continuation is aborted
+ * (abort arg to this routine is TRUE). To avoid deadlocks, the
+ * pages are discarded from the initial page list before invoking
+ * the continuation.
+ *
+ * NOTE: This is not the same sort of continuation used by
+ * the scheduler.
+ */
+
+kern_return_t vm_map_copyin_page_list_cont(cont_args, copy_result)
+vm_map_copyin_args_t cont_args;
+vm_map_copy_t *copy_result; /* OUT */
+{
+ kern_return_t result = 0; /* '=0' to quiet gcc warnings */
+ register boolean_t do_abort, src_destroy, src_destroy_only;
+
+ /*
+ * Check for cases that only require memory destruction.
+ */
+ do_abort = (copy_result == (vm_map_copy_t *) 0);
+ src_destroy = (cont_args->destroy_len != (vm_size_t) 0);
+ src_destroy_only = (cont_args->src_len == (vm_size_t) 0);
+
+ if (do_abort || src_destroy_only) {
+ if (src_destroy)
+ result = vm_map_remove(cont_args->map,
+ cont_args->destroy_addr,
+ cont_args->destroy_addr + cont_args->destroy_len);
+ if (!do_abort)
+ *copy_result = VM_MAP_COPY_NULL;
+ }
+ else {
+ result = vm_map_copyin_page_list(cont_args->map,
+ cont_args->src_addr, cont_args->src_len, src_destroy,
+ cont_args->steal_pages, copy_result, TRUE);
+
+ if (src_destroy && !cont_args->steal_pages &&
+ vm_map_copy_has_cont(*copy_result)) {
+ vm_map_copyin_args_t new_args;
+ /*
+ * Transfer old destroy info.
+ */
+ new_args = (vm_map_copyin_args_t)
+ (*copy_result)->cpy_cont_args;
+ new_args->destroy_addr = cont_args->destroy_addr;
+ new_args->destroy_len = cont_args->destroy_len;
+ }
+ }
+
+ vm_map_deallocate(cont_args->map);
+ kfree((vm_offset_t)cont_args, sizeof(vm_map_copyin_args_data_t));
+
+ return(result);
+}
+
+/*
+ * vm_map_copyin_page_list:
+ *
+ * This is a variant of vm_map_copyin that copies in a list of pages.
+ * If steal_pages is TRUE, the pages are only in the returned list.
+ * If steal_pages is FALSE, the pages are busy and still in their
+ * objects. A continuation may be returned if not all the pages fit:
+ * the recipient of this copy_result must be prepared to deal with it.
+ */
+
+kern_return_t vm_map_copyin_page_list(src_map, src_addr, len, src_destroy,
+ steal_pages, copy_result, is_cont)
+ vm_map_t src_map;
+ vm_offset_t src_addr;
+ vm_size_t len;
+ boolean_t src_destroy;
+ boolean_t steal_pages;
+ vm_map_copy_t *copy_result; /* OUT */
+ boolean_t is_cont;
+{
+ vm_map_entry_t src_entry;
+ vm_page_t m;
+ vm_offset_t src_start;
+ vm_offset_t src_end;
+ vm_size_t src_size;
+ register
+ vm_object_t src_object;
+ register
+ vm_offset_t src_offset;
+ vm_offset_t src_last_offset;
+ register
+ vm_map_copy_t copy; /* Resulting copy */
+ kern_return_t result = KERN_SUCCESS;
+ boolean_t need_map_lookup;
+ vm_map_copyin_args_t cont_args;
+
+ /*
+ * If steal_pages is FALSE, this leaves busy pages in
+ * the object. A continuation must be used if src_destroy
+ * is true in this case (!steal_pages && src_destroy).
+ *
+ * XXX Still have a more general problem of what happens
+ * XXX if the same page occurs twice in a list. Deadlock
+ * XXX can happen if vm_fault_page was called. A
+ * XXX possible solution is to use a continuation if vm_fault_page
+ * XXX is called and we cross a map entry boundary.
+ */
+
+ /*
+ * Check for copies of zero bytes.
+ */
+
+ if (len == 0) {
+ *copy_result = VM_MAP_COPY_NULL;
+ return(KERN_SUCCESS);
+ }
+
+ /*
+ * Compute start and end of region
+ */
+
+ src_start = trunc_page(src_addr);
+ src_end = round_page(src_addr + len);
+
+ /*
+ * Check that the end address doesn't overflow
+ */
+
+ if (src_end <= src_start && (src_end < src_start || src_start != 0)) {
+ return KERN_INVALID_ADDRESS;
+ }
+
+ /*
+ * Allocate a header element for the page list.
+ *
+ * Record original offset and size, as caller may not
+ * be page-aligned.
+ */
+
+ copy = (vm_map_copy_t) zalloc(vm_map_copy_zone);
+ copy->type = VM_MAP_COPY_PAGE_LIST;
+ copy->cpy_npages = 0;
+ copy->offset = src_addr;
+ copy->size = len;
+ copy->cpy_cont = ((kern_return_t (*)()) 0);
+ copy->cpy_cont_args = (char *) VM_MAP_COPYIN_ARGS_NULL;
+
+ /*
+ * Find the beginning of the region.
+ */
+
+do_map_lookup:
+
+ vm_map_lock(src_map);
+
+ if (!vm_map_lookup_entry(src_map, src_start, &src_entry)) {
+ result = KERN_INVALID_ADDRESS;
+ goto error;
+ }
+ need_map_lookup = FALSE;
+
+ /*
+ * Go through entries until we get to the end.
+ */
+
+ while (TRUE) {
+
+ if (! (src_entry->protection & VM_PROT_READ)) {
+ result = KERN_PROTECTION_FAILURE;
+ goto error;
+ }
+
+ if (src_end > src_entry->vme_end)
+ src_size = src_entry->vme_end - src_start;
+ else
+ src_size = src_end - src_start;
+
+ src_object = src_entry->object.vm_object;
+ src_offset = src_entry->offset +
+ (src_start - src_entry->vme_start);
+
+ /*
+ * If src_object is NULL, allocate it now;
+ * we're going to fault on it shortly.
+ */
+ if (src_object == VM_OBJECT_NULL) {
+ src_object = vm_object_allocate((vm_size_t)
+ src_entry->vme_end -
+ src_entry->vme_start);
+ src_entry->object.vm_object = src_object;
+ }
+
+ /*
+ * Iterate over pages. Fault in ones that aren't present.
+ */
+ src_last_offset = src_offset + src_size;
+ for (; (src_offset < src_last_offset && !need_map_lookup);
+ src_offset += PAGE_SIZE, src_start += PAGE_SIZE) {
+
+ if (copy->cpy_npages == VM_MAP_COPY_PAGE_LIST_MAX) {
+make_continuation:
+ /*
+ * At this point we have the max number of
+ * pages busy for this thread that we're
+ * willing to allow. Stop here and record
+ * arguments for the remainder. Note:
+ * this means that this routine isn't atomic,
+ * but that's the breaks. Note that only
+ * the first vm_map_copy_t that comes back
+ * from this routine has the right offset
+ * and size; those from continuations are
+ * page rounded, and short by the amount
+ * already done.
+ *
+ * Reset src_end so the src_destroy
+ * code at the bottom doesn't do
+ * something stupid.
+ */
+
+ cont_args = (vm_map_copyin_args_t)
+ kalloc(sizeof(vm_map_copyin_args_data_t));
+ cont_args->map = src_map;
+ vm_map_reference(src_map);
+ cont_args->src_addr = src_start;
+ cont_args->src_len = len - (src_start - src_addr);
+ if (src_destroy) {
+ cont_args->destroy_addr = cont_args->src_addr;
+ cont_args->destroy_len = cont_args->src_len;
+ }
+ else {
+ cont_args->destroy_addr = (vm_offset_t) 0;
+ cont_args->destroy_len = (vm_offset_t) 0;
+ }
+ cont_args->steal_pages = steal_pages;
+
+ copy->cpy_cont_args = (char *) cont_args;
+ copy->cpy_cont = vm_map_copyin_page_list_cont;
+
+ src_end = src_start;
+ vm_map_clip_end(src_map, src_entry, src_end);
+ break;
+ }
+
+ /*
+ * Try to find the page of data.
+ */
+ vm_object_lock(src_object);
+ vm_object_paging_begin(src_object);
+ if (((m = vm_page_lookup(src_object, src_offset)) !=
+ VM_PAGE_NULL) && !m->busy && !m->fictitious &&
+ !m->absent && !m->error) {
+
+ /*
+ * This is the page. Mark it busy
+ * and keep the paging reference on
+ * the object whilst we do our thing.
+ */
+ m->busy = TRUE;
+
+ /*
+ * Also write-protect the page, so
+ * that the map`s owner cannot change
+ * the data. The busy bit will prevent
+ * faults on the page from succeeding
+ * until the copy is released; after
+ * that, the page can be re-entered
+ * as writable, since we didn`t alter
+ * the map entry. This scheme is a
+ * cheap copy-on-write.
+ *
+ * Don`t forget the protection and
+ * the page_lock value!
+ *
+ * If the source is being destroyed
+ * AND not shared writable, we don`t
+ * have to protect the page, since
+ * we will destroy the (only)
+ * writable mapping later.
+ */
+ if (!src_destroy ||
+ src_object->use_shared_copy)
+ {
+ pmap_page_protect(m->phys_addr,
+ src_entry->protection
+ & ~m->page_lock
+ & ~VM_PROT_WRITE);
+ }
+
+ }
+ else {
+ vm_prot_t result_prot;
+ vm_page_t top_page;
+ kern_return_t kr;
+
+ /*
+ * Have to fault the page in; must
+ * unlock the map to do so. While
+ * the map is unlocked, anything
+ * can happen, we must lookup the
+ * map entry before continuing.
+ */
+ vm_map_unlock(src_map);
+ need_map_lookup = TRUE;
+retry:
+ result_prot = VM_PROT_READ;
+
+ kr = vm_fault_page(src_object, src_offset,
+ VM_PROT_READ, FALSE, FALSE,
+ &result_prot, &m, &top_page,
+ FALSE, (void (*)()) 0);
+ /*
+ * Cope with what happened.
+ */
+ switch (kr) {
+ case VM_FAULT_SUCCESS:
+ break;
+ case VM_FAULT_INTERRUPTED: /* ??? */
+ case VM_FAULT_RETRY:
+ vm_object_lock(src_object);
+ vm_object_paging_begin(src_object);
+ goto retry;
+ case VM_FAULT_MEMORY_SHORTAGE:
+ VM_PAGE_WAIT((void (*)()) 0);
+ vm_object_lock(src_object);
+ vm_object_paging_begin(src_object);
+ goto retry;
+ case VM_FAULT_FICTITIOUS_SHORTAGE:
+ vm_page_more_fictitious();
+ vm_object_lock(src_object);
+ vm_object_paging_begin(src_object);
+ goto retry;
+ case VM_FAULT_MEMORY_ERROR:
+ /*
+ * Something broke. If this
+ * is a continuation, return
+ * a partial result if possible,
+ * else fail the whole thing.
+ * In the continuation case, the
+ * next continuation call will
+ * get this error if it persists.
+ */
+ vm_map_lock(src_map);
+ if (is_cont &&
+ copy->cpy_npages != 0)
+ goto make_continuation;
+
+ result = KERN_MEMORY_ERROR;
+ goto error;
+ }
+
+ if (top_page != VM_PAGE_NULL) {
+ vm_object_lock(src_object);
+ VM_PAGE_FREE(top_page);
+ vm_object_paging_end(src_object);
+ vm_object_unlock(src_object);
+ }
+
+ /*
+ * We do not need to write-protect
+ * the page, since it cannot have
+ * been in the pmap (and we did not
+ * enter it above). The busy bit
+ * will protect the page from being
+ * entered as writable until it is
+ * unlocked.
+ */
+
+ }
+
+ /*
+ * The page is busy, its object is locked, and
+ * we have a paging reference on it. Either
+ * the map is locked, or need_map_lookup is
+ * TRUE.
+ *
+ * Put the page in the page list.
+ */
+ copy->cpy_page_list[copy->cpy_npages++] = m;
+ vm_object_unlock(m->object);
+ }
+
+ /*
+ * DETERMINE whether the entire region
+ * has been copied.
+ */
+ if (src_start >= src_end && src_end != 0) {
+ if (need_map_lookup)
+ vm_map_lock(src_map);
+ break;
+ }
+
+ /*
+ * If need_map_lookup is TRUE, have to start over with
+ * another map lookup. Note that we dropped the map
+ * lock (to call vm_fault_page) above only in this case.
+ */
+ if (need_map_lookup)
+ goto do_map_lookup;
+
+ /*
+ * Verify that there are no gaps in the region
+ */
+
+ src_start = src_entry->vme_end;
+ src_entry = src_entry->vme_next;
+ if (src_entry->vme_start != src_start) {
+ result = KERN_INVALID_ADDRESS;
+ goto error;
+ }
+ }
+
+ /*
+ * If steal_pages is true, make sure all
+ * pages in the copy are not in any object
+ * We try to remove them from the original
+ * object, but we may have to copy them.
+ *
+ * At this point every page in the list is busy
+ * and holds a paging reference to its object.
+ * When we're done stealing, every page is busy,
+ * and in no object (m->tabled == FALSE).
+ */
+ src_start = trunc_page(src_addr);
+ if (steal_pages) {
+ register int i;
+ vm_offset_t unwire_end;
+
+ unwire_end = src_start;
+ for (i = 0; i < copy->cpy_npages; i++) {
+
+ /*
+ * Remove the page from its object if it
+ * can be stolen. It can be stolen if:
+ *
+ * (1) The source is being destroyed,
+ * the object is temporary, and
+ * not shared.
+ * (2) The page is not precious.
+ *
+ * The not shared check consists of two
+ * parts: (a) there are no objects that
+ * shadow this object. (b) it is not the
+ * object in any shared map entries (i.e.,
+ * use_shared_copy is not set).
+ *
+ * The first check (a) means that we can't
+ * steal pages from objects that are not
+ * at the top of their shadow chains. This
+ * should not be a frequent occurrence.
+ *
+ * Stealing wired pages requires telling the
+ * pmap module to let go of them.
+ *
+ * NOTE: stealing clean pages from objects
+ * whose mappings survive requires a call to
+ * the pmap module. Maybe later.
+ */
+ m = copy->cpy_page_list[i];
+ src_object = m->object;
+ vm_object_lock(src_object);
+
+ if (src_destroy &&
+ src_object->temporary &&
+ (!src_object->shadowed) &&
+ (!src_object->use_shared_copy) &&
+ !m->precious) {
+ vm_offset_t page_vaddr;
+
+ page_vaddr = src_start + (i * PAGE_SIZE);
+ if (m->wire_count > 0) {
+
+ assert(m->wire_count == 1);
+ /*
+ * In order to steal a wired
+ * page, we have to unwire it
+ * first. We do this inline
+ * here because we have the page.
+ *
+ * Step 1: Unwire the map entry.
+ * Also tell the pmap module
+ * that this piece of the
+ * pmap is pageable.
+ */
+ vm_object_unlock(src_object);
+ if (page_vaddr >= unwire_end) {
+ if (!vm_map_lookup_entry(src_map,
+ page_vaddr, &src_entry))
+ panic("vm_map_copyin_page_list: missing wired map entry");
+
+ vm_map_clip_start(src_map, src_entry,
+ page_vaddr);
+ vm_map_clip_end(src_map, src_entry,
+ src_start + src_size);
+
+ assert(src_entry->wired_count > 0);
+ src_entry->wired_count = 0;
+ src_entry->user_wired_count = 0;
+ unwire_end = src_entry->vme_end;
+ pmap_pageable(vm_map_pmap(src_map),
+ page_vaddr, unwire_end, TRUE);
+ }
+
+ /*
+ * Step 2: Unwire the page.
+ * pmap_remove handles this for us.
+ */
+ vm_object_lock(src_object);
+ }
+
+ /*
+ * Don't need to remove the mapping;
+ * vm_map_delete will handle it.
+ *
+ * Steal the page. Setting the wire count
+ * to zero is vm_page_unwire without
+ * activating the page.
+ */
+ vm_page_lock_queues();
+ vm_page_remove(m);
+ if (m->wire_count > 0) {
+ m->wire_count = 0;
+ vm_page_wire_count--;
+ } else {
+ VM_PAGE_QUEUES_REMOVE(m);
+ }
+ vm_page_unlock_queues();
+ }
+ else {
+ /*
+ * Have to copy this page. Have to
+ * unlock the map while copying,
+ * hence no further page stealing.
+ * Hence just copy all the pages.
+ * Unlock the map while copying;
+ * This means no further page stealing.
+ */
+ vm_object_unlock(src_object);
+ vm_map_unlock(src_map);
+
+ vm_map_copy_steal_pages(copy);
+
+ vm_map_lock(src_map);
+ break;
+ }
+
+ vm_object_paging_end(src_object);
+ vm_object_unlock(src_object);
+ }
+
+ /*
+ * If the source should be destroyed, do it now, since the
+ * copy was successful.
+ */
+
+ if (src_destroy) {
+ (void) vm_map_delete(src_map, src_start, src_end);
+ }
+ }
+ else {
+ /*
+ * !steal_pages leaves busy pages in the map.
+ * This will cause src_destroy to hang. Use
+ * a continuation to prevent this.
+ */
+ if (src_destroy && !vm_map_copy_has_cont(copy)) {
+ cont_args = (vm_map_copyin_args_t)
+ kalloc(sizeof(vm_map_copyin_args_data_t));
+ vm_map_reference(src_map);
+ cont_args->map = src_map;
+ cont_args->src_addr = (vm_offset_t) 0;
+ cont_args->src_len = (vm_size_t) 0;
+ cont_args->destroy_addr = src_start;
+ cont_args->destroy_len = src_end - src_start;
+ cont_args->steal_pages = FALSE;
+
+ copy->cpy_cont_args = (char *) cont_args;
+ copy->cpy_cont = vm_map_copyin_page_list_cont;
+ }
+
+ }
+
+ vm_map_unlock(src_map);
+
+ *copy_result = copy;
+ return(result);
+
+error:
+ vm_map_unlock(src_map);
+ vm_map_copy_discard(copy);
+ return(result);
+}
+
+/*
+ * vm_map_fork:
+ *
+ * Create and return a new map based on the old
+ * map, according to the inheritance values on the
+ * regions in that map.
+ *
+ * The source map must not be locked.
+ */
+vm_map_t vm_map_fork(old_map)
+ vm_map_t old_map;
+{
+ vm_map_t new_map;
+ register
+ vm_map_entry_t old_entry;
+ register
+ vm_map_entry_t new_entry;
+ pmap_t new_pmap = pmap_create((vm_size_t) 0);
+ vm_size_t new_size = 0;
+ vm_size_t entry_size;
+ register
+ vm_object_t object;
+
+ vm_map_lock(old_map);
+
+ new_map = vm_map_create(new_pmap,
+ old_map->min_offset,
+ old_map->max_offset,
+ old_map->hdr.entries_pageable);
+
+ for (
+ old_entry = vm_map_first_entry(old_map);
+ old_entry != vm_map_to_entry(old_map);
+ ) {
+ if (old_entry->is_sub_map)
+ panic("vm_map_fork: encountered a submap");
+
+ entry_size = (old_entry->vme_end - old_entry->vme_start);
+
+ switch (old_entry->inheritance) {
+ case VM_INHERIT_NONE:
+ break;
+
+ case VM_INHERIT_SHARE:
+ /*
+ * New sharing code. New map entry
+ * references original object. Temporary
+ * objects use asynchronous copy algorithm for
+ * future copies. First make sure we have
+ * the right object. If we need a shadow,
+ * or someone else already has one, then
+ * make a new shadow and share it.
+ */
+
+ object = old_entry->object.vm_object;
+ if (object == VM_OBJECT_NULL) {
+ object = vm_object_allocate(
+ (vm_size_t)(old_entry->vme_end -
+ old_entry->vme_start));
+ old_entry->offset = 0;
+ old_entry->object.vm_object = object;
+ assert(!old_entry->needs_copy);
+ }
+ else if (old_entry->needs_copy || object->shadowed ||
+ (object->temporary && !old_entry->is_shared &&
+ object->size > (vm_size_t)(old_entry->vme_end -
+ old_entry->vme_start))) {
+
+ assert(object->temporary);
+ assert(!(object->shadowed && old_entry->is_shared));
+ vm_object_shadow(
+ &old_entry->object.vm_object,
+ &old_entry->offset,
+ (vm_size_t) (old_entry->vme_end -
+ old_entry->vme_start));
+
+ /*
+ * If we're making a shadow for other than
+ * copy on write reasons, then we have
+ * to remove write permission.
+ */
+
+ if (!old_entry->needs_copy &&
+ (old_entry->protection & VM_PROT_WRITE)) {
+ pmap_protect(vm_map_pmap(old_map),
+ old_entry->vme_start,
+ old_entry->vme_end,
+ old_entry->protection &
+ ~VM_PROT_WRITE);
+ }
+ old_entry->needs_copy = FALSE;
+ object = old_entry->object.vm_object;
+ }
+
+ /*
+ * Set use_shared_copy to indicate that
+ * object must use shared (delayed) copy-on
+ * write. This is ignored for permanent objects.
+ * Bump the reference count for the new entry
+ */
+
+ vm_object_lock(object);
+ object->use_shared_copy = TRUE;
+ object->ref_count++;
+ vm_object_unlock(object);
+
+ if (old_entry->projected_on != 0) {
+ /*
+ * If entry is projected buffer, clone the
+ * entry exactly.
+ */
+
+ vm_map_entry_copy_full(new_entry, old_entry);
+
+ } else {
+ /*
+ * Clone the entry, using object ref from above.
+ * Mark both entries as shared.
+ */
+
+ new_entry = vm_map_entry_create(new_map);
+ vm_map_entry_copy(new_entry, old_entry);
+ old_entry->is_shared = TRUE;
+ new_entry->is_shared = TRUE;
+ }
+
+ /*
+ * Insert the entry into the new map -- we
+ * know we're inserting at the end of the new
+ * map.
+ */
+
+ vm_map_entry_link(
+ new_map,
+ vm_map_last_entry(new_map),
+ new_entry);
+
+ /*
+ * Update the physical map
+ */
+
+ pmap_copy(new_map->pmap, old_map->pmap,
+ new_entry->vme_start,
+ entry_size,
+ old_entry->vme_start);
+
+ new_size += entry_size;
+ break;
+
+ case VM_INHERIT_COPY:
+ if (old_entry->wired_count == 0) {
+ boolean_t src_needs_copy;
+ boolean_t new_entry_needs_copy;
+
+ new_entry = vm_map_entry_create(new_map);
+ vm_map_entry_copy(new_entry, old_entry);
+
+ if (vm_object_copy_temporary(
+ &new_entry->object.vm_object,
+ &new_entry->offset,
+ &src_needs_copy,
+ &new_entry_needs_copy)) {
+
+ /*
+ * Handle copy-on-write obligations
+ */
+
+ if (src_needs_copy && !old_entry->needs_copy) {
+ vm_object_pmap_protect(
+ old_entry->object.vm_object,
+ old_entry->offset,
+ entry_size,
+ (old_entry->is_shared ?
+ PMAP_NULL :
+ old_map->pmap),
+ old_entry->vme_start,
+ old_entry->protection &
+ ~VM_PROT_WRITE);
+
+ old_entry->needs_copy = TRUE;
+ }
+
+ new_entry->needs_copy = new_entry_needs_copy;
+
+ /*
+ * Insert the entry at the end
+ * of the map.
+ */
+
+ vm_map_entry_link(new_map,
+ vm_map_last_entry(new_map),
+ new_entry);
+
+
+ new_size += entry_size;
+ break;
+ }
+
+ vm_map_entry_dispose(new_map, new_entry);
+ }
+
+ /* INNER BLOCK (copy cannot be optimized) */ {
+
+ vm_offset_t start = old_entry->vme_start;
+ vm_map_copy_t copy;
+ vm_map_entry_t last = vm_map_last_entry(new_map);
+
+ vm_map_unlock(old_map);
+ if (vm_map_copyin(old_map,
+ start,
+ entry_size,
+ FALSE,
+ &copy)
+ != KERN_SUCCESS) {
+ vm_map_lock(old_map);
+ if (!vm_map_lookup_entry(old_map, start, &last))
+ last = last->vme_next;
+ old_entry = last;
+ /*
+ * For some error returns, want to
+ * skip to the next element.
+ */
+
+ continue;
+ }
+
+ /*
+ * Insert the copy into the new map
+ */
+
+ vm_map_copy_insert(new_map, last, copy);
+ new_size += entry_size;
+
+ /*
+ * Pick up the traversal at the end of
+ * the copied region.
+ */
+
+ vm_map_lock(old_map);
+ start += entry_size;
+ if (!vm_map_lookup_entry(old_map, start, &last))
+ last = last->vme_next;
+ else
+ vm_map_clip_start(old_map, last, start);
+ old_entry = last;
+
+ continue;
+ /* INNER BLOCK (copy cannot be optimized) */ }
+ }
+ old_entry = old_entry->vme_next;
+ }
+
+ new_map->size = new_size;
+ vm_map_unlock(old_map);
+
+ return(new_map);
+}
+
+/*
+ * vm_map_lookup:
+ *
+ * Finds the VM object, offset, and
+ * protection for a given virtual address in the
+ * specified map, assuming a page fault of the
+ * type specified.
+ *
+ * Returns the (object, offset, protection) for
+ * this address, whether it is wired down, and whether
+ * this map has the only reference to the data in question.
+ * In order to later verify this lookup, a "version"
+ * is returned.
+ *
+ * The map should not be locked; it will not be
+ * locked on exit. In order to guarantee the
+ * existence of the returned object, it is returned
+ * locked.
+ *
+ * If a lookup is requested with "write protection"
+ * specified, the map may be changed to perform virtual
+ * copying operations, although the data referenced will
+ * remain the same.
+ */
+kern_return_t vm_map_lookup(var_map, vaddr, fault_type, out_version,
+ object, offset, out_prot, wired)
+ vm_map_t *var_map; /* IN/OUT */
+ register vm_offset_t vaddr;
+ register vm_prot_t fault_type;
+
+ vm_map_version_t *out_version; /* OUT */
+ vm_object_t *object; /* OUT */
+ vm_offset_t *offset; /* OUT */
+ vm_prot_t *out_prot; /* OUT */
+ boolean_t *wired; /* OUT */
+{
+ register vm_map_entry_t entry;
+ register vm_map_t map = *var_map;
+ register vm_prot_t prot;
+
+ RetryLookup: ;
+
+ /*
+ * Lookup the faulting address.
+ */
+
+ vm_map_lock_read(map);
+
+#define RETURN(why) \
+ { \
+ vm_map_unlock_read(map); \
+ return(why); \
+ }
+
+ /*
+ * If the map has an interesting hint, try it before calling
+ * full blown lookup routine.
+ */
+
+ simple_lock(&map->hint_lock);
+ entry = map->hint;
+ simple_unlock(&map->hint_lock);
+
+ if ((entry == vm_map_to_entry(map)) ||
+ (vaddr < entry->vme_start) || (vaddr >= entry->vme_end)) {
+ vm_map_entry_t tmp_entry;
+
+ /*
+ * Entry was either not a valid hint, or the vaddr
+ * was not contained in the entry, so do a full lookup.
+ */
+ if (!vm_map_lookup_entry(map, vaddr, &tmp_entry))
+ RETURN(KERN_INVALID_ADDRESS);
+
+ entry = tmp_entry;
+ }
+
+ /*
+ * Handle submaps.
+ */
+
+ if (entry->is_sub_map) {
+ vm_map_t old_map = map;
+
+ *var_map = map = entry->object.sub_map;
+ vm_map_unlock_read(old_map);
+ goto RetryLookup;
+ }
+
+ /*
+ * Check whether this task is allowed to have
+ * this page.
+ */
+
+ prot = entry->protection;
+
+ if ((fault_type & (prot)) != fault_type)
+ if ((prot & VM_PROT_NOTIFY) && (fault_type & VM_PROT_WRITE)) {
+ RETURN(KERN_WRITE_PROTECTION_FAILURE);
+ } else {
+ RETURN(KERN_PROTECTION_FAILURE);
+ }
+
+ /*
+ * If this page is not pageable, we have to get
+ * it for all possible accesses.
+ */
+
+ if (*wired = (entry->wired_count != 0))
+ prot = fault_type = entry->protection;
+
+ /*
+ * If the entry was copy-on-write, we either ...
+ */
+
+ if (entry->needs_copy) {
+ /*
+ * If we want to write the page, we may as well
+ * handle that now since we've got the map locked.
+ *
+ * If we don't need to write the page, we just
+ * demote the permissions allowed.
+ */
+
+ if (fault_type & VM_PROT_WRITE) {
+ /*
+ * Make a new object, and place it in the
+ * object chain. Note that no new references
+ * have appeared -- one just moved from the
+ * map to the new object.
+ */
+
+ if (vm_map_lock_read_to_write(map)) {
+ goto RetryLookup;
+ }
+ map->timestamp++;
+
+ vm_object_shadow(
+ &entry->object.vm_object,
+ &entry->offset,
+ (vm_size_t) (entry->vme_end - entry->vme_start));
+
+ entry->needs_copy = FALSE;
+
+ vm_map_lock_write_to_read(map);
+ }
+ else {
+ /*
+ * We're attempting to read a copy-on-write
+ * page -- don't allow writes.
+ */
+
+ prot &= (~VM_PROT_WRITE);
+ }
+ }
+
+ /*
+ * Create an object if necessary.
+ */
+ if (entry->object.vm_object == VM_OBJECT_NULL) {
+
+ if (vm_map_lock_read_to_write(map)) {
+ goto RetryLookup;
+ }
+
+ entry->object.vm_object = vm_object_allocate(
+ (vm_size_t)(entry->vme_end - entry->vme_start));
+ entry->offset = 0;
+ vm_map_lock_write_to_read(map);
+ }
+
+ /*
+ * Return the object/offset from this entry. If the entry
+ * was copy-on-write or empty, it has been fixed up. Also
+ * return the protection.
+ */
+
+ *offset = (vaddr - entry->vme_start) + entry->offset;
+ *object = entry->object.vm_object;
+ *out_prot = prot;
+
+ /*
+ * Lock the object to prevent it from disappearing
+ */
+
+ vm_object_lock(*object);
+
+ /*
+ * Save the version number and unlock the map.
+ */
+
+ out_version->main_timestamp = map->timestamp;
+
+ RETURN(KERN_SUCCESS);
+
+#undef RETURN
+}
+
+/*
+ * vm_map_verify:
+ *
+ * Verifies that the map in question has not changed
+ * since the given version. If successful, the map
+ * will not change until vm_map_verify_done() is called.
+ */
+boolean_t vm_map_verify(map, version)
+ register
+ vm_map_t map;
+ register
+ vm_map_version_t *version; /* REF */
+{
+ boolean_t result;
+
+ vm_map_lock_read(map);
+ result = (map->timestamp == version->main_timestamp);
+
+ if (!result)
+ vm_map_unlock_read(map);
+
+ return(result);
+}
+
+/*
+ * vm_map_verify_done:
+ *
+ * Releases locks acquired by a vm_map_verify.
+ *
+ * This is now a macro in vm/vm_map.h. It does a
+ * vm_map_unlock_read on the map.
+ */
+
+/*
+ * vm_region:
+ *
+ * User call to obtain information about a region in
+ * a task's address map.
+ */
+
+kern_return_t vm_region(map, address, size,
+ protection, max_protection,
+ inheritance, is_shared,
+ object_name, offset_in_object)
+ vm_map_t map;
+ vm_offset_t *address; /* IN/OUT */
+ vm_size_t *size; /* OUT */
+ vm_prot_t *protection; /* OUT */
+ vm_prot_t *max_protection; /* OUT */
+ vm_inherit_t *inheritance; /* OUT */
+ boolean_t *is_shared; /* OUT */
+ ipc_port_t *object_name; /* OUT */
+ vm_offset_t *offset_in_object; /* OUT */
+{
+ vm_map_entry_t tmp_entry;
+ register
+ vm_map_entry_t entry;
+ register
+ vm_offset_t tmp_offset;
+ vm_offset_t start;
+
+ if (map == VM_MAP_NULL)
+ return(KERN_INVALID_ARGUMENT);
+
+ start = *address;
+
+ vm_map_lock_read(map);
+ if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
+ if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
+ vm_map_unlock_read(map);
+ return(KERN_NO_SPACE);
+ }
+ } else {
+ entry = tmp_entry;
+ }
+
+ start = entry->vme_start;
+ *protection = entry->protection;
+ *max_protection = entry->max_protection;
+ *inheritance = entry->inheritance;
+ *address = start;
+ *size = (entry->vme_end - start);
+
+ tmp_offset = entry->offset;
+
+
+ if (entry->is_sub_map) {
+ *is_shared = FALSE;
+ *object_name = IP_NULL;
+ *offset_in_object = tmp_offset;
+ } else {
+ *is_shared = entry->is_shared;
+ *object_name = vm_object_name(entry->object.vm_object);
+ *offset_in_object = tmp_offset;
+ }
+
+ vm_map_unlock_read(map);
+
+ return(KERN_SUCCESS);
+}
+
+/*
+ * Routine: vm_map_simplify
+ *
+ * Description:
+ * Attempt to simplify the map representation in
+ * the vicinity of the given starting address.
+ * Note:
+ * This routine is intended primarily to keep the
+ * kernel maps more compact -- they generally don't
+ * benefit from the "expand a map entry" technology
+ * at allocation time because the adjacent entry
+ * is often wired down.
+ */
+void vm_map_simplify(map, start)
+ vm_map_t map;
+ vm_offset_t start;
+{
+ vm_map_entry_t this_entry;
+ vm_map_entry_t prev_entry;
+
+ vm_map_lock(map);
+ if (
+ (vm_map_lookup_entry(map, start, &this_entry)) &&
+ ((prev_entry = this_entry->vme_prev) != vm_map_to_entry(map)) &&
+
+ (prev_entry->vme_end == start) &&
+
+ (prev_entry->is_shared == FALSE) &&
+ (prev_entry->is_sub_map == FALSE) &&
+
+ (this_entry->is_shared == FALSE) &&
+ (this_entry->is_sub_map == FALSE) &&
+
+ (prev_entry->inheritance == this_entry->inheritance) &&
+ (prev_entry->protection == this_entry->protection) &&
+ (prev_entry->max_protection == this_entry->max_protection) &&
+ (prev_entry->wired_count == this_entry->wired_count) &&
+ (prev_entry->user_wired_count == this_entry->user_wired_count) &&
+
+ (prev_entry->needs_copy == this_entry->needs_copy) &&
+
+ (prev_entry->object.vm_object == this_entry->object.vm_object) &&
+ ((prev_entry->offset + (prev_entry->vme_end - prev_entry->vme_start))
+ == this_entry->offset) &&
+ (prev_entry->projected_on == 0) &&
+ (this_entry->projected_on == 0)
+ ) {
+ if (map->first_free == this_entry)
+ map->first_free = prev_entry;
+
+ SAVE_HINT(map, prev_entry);
+ vm_map_entry_unlink(map, this_entry);
+ prev_entry->vme_end = this_entry->vme_end;
+ vm_object_deallocate(this_entry->object.vm_object);
+ vm_map_entry_dispose(map, this_entry);
+ }
+ vm_map_unlock(map);
+}
+
+
+/*
+ * Routine: vm_map_machine_attribute
+ * Purpose:
+ * Provide machine-specific attributes to mappings,
+ * such as cachability etc. for machines that provide
+ * them. NUMA architectures and machines with big/strange
+ * caches will use this.
+ * Note:
+ * Responsibilities for locking and checking are handled here,
+ * everything else in the pmap module. If any non-volatile
+ * information must be kept, the pmap module should handle
+ * it itself. [This assumes that attributes do not
+ * need to be inherited, which seems ok to me]
+ */
+kern_return_t vm_map_machine_attribute(map, address, size, attribute, value)
+ vm_map_t map;
+ vm_offset_t address;
+ vm_size_t size;
+ vm_machine_attribute_t attribute;
+ vm_machine_attribute_val_t* value; /* IN/OUT */
+{
+ kern_return_t ret;
+
+ if (address < vm_map_min(map) ||
+ (address + size) > vm_map_max(map))
+ return KERN_INVALID_ARGUMENT;
+
+ vm_map_lock(map);
+
+ ret = pmap_attribute(map->pmap, address, size, attribute, value);
+
+ vm_map_unlock(map);
+
+ return ret;
+}
+
+#include <mach_kdb.h>
+
+
+#if MACH_KDB
+
+#define printf kdbprintf
+
+/*
+ * vm_map_print: [ debug ]
+ */
+void vm_map_print(map)
+ register vm_map_t map;
+{
+ register vm_map_entry_t entry;
+ extern int indent;
+
+ iprintf("Task map 0x%X: pmap=0x%X,",
+ (vm_offset_t) map, (vm_offset_t) (map->pmap));
+ printf("ref=%d,nentries=%d,", map->ref_count, map->hdr.nentries);
+ printf("version=%d\n", map->timestamp);
+ indent += 2;
+ for (entry = vm_map_first_entry(map);
+ entry != vm_map_to_entry(map);
+ entry = entry->vme_next) {
+ static char *inheritance_name[3] = { "share", "copy", "none"};
+
+ iprintf("map entry 0x%X: ", (vm_offset_t) entry);
+ printf("start=0x%X, end=0x%X, ",
+ (vm_offset_t) entry->vme_start, (vm_offset_t) entry->vme_end);
+ printf("prot=%X/%X/%s, ",
+ entry->protection,
+ entry->max_protection,
+ inheritance_name[entry->inheritance]);
+ if (entry->wired_count != 0) {
+ printf("wired(");
+ if (entry->user_wired_count != 0)
+ printf("u");
+ if (entry->wired_count >
+ ((entry->user_wired_count == 0) ? 0 : 1))
+ printf("k");
+ printf(") ");
+ }
+ if (entry->in_transition) {
+ printf("in transition");
+ if (entry->needs_wakeup)
+ printf("(wake request)");
+ printf(", ");
+ }
+ if (entry->is_sub_map) {
+ printf("submap=0x%X, offset=0x%X\n",
+ (vm_offset_t) entry->object.sub_map,
+ (vm_offset_t) entry->offset);
+ } else {
+ printf("object=0x%X, offset=0x%X",
+ (vm_offset_t) entry->object.vm_object,
+ (vm_offset_t) entry->offset);
+ if (entry->is_shared)
+ printf(", shared");
+ if (entry->needs_copy)
+ printf(", copy needed");
+ printf("\n");
+
+ if ((entry->vme_prev == vm_map_to_entry(map)) ||
+ (entry->vme_prev->object.vm_object != entry->object.vm_object)) {
+ indent += 2;
+ vm_object_print(entry->object.vm_object);
+ indent -= 2;
+ }
+ }
+ }
+ indent -= 2;
+}
+
+/*
+ * Routine: vm_map_copy_print
+ * Purpose:
+ * Pretty-print a copy object for ddb.
+ */
+
+void vm_map_copy_print(copy)
+ vm_map_copy_t copy;
+{
+ extern int indent;
+ int i, npages;
+
+ printf("copy object 0x%x\n", copy);
+
+ indent += 2;
+
+ iprintf("type=%d", copy->type);
+ switch (copy->type) {
+ case VM_MAP_COPY_ENTRY_LIST:
+ printf("[entry_list]");
+ break;
+
+ case VM_MAP_COPY_OBJECT:
+ printf("[object]");
+ break;
+
+ case VM_MAP_COPY_PAGE_LIST:
+ printf("[page_list]");
+ break;
+
+ default:
+ printf("[bad type]");
+ break;
+ }
+ printf(", offset=0x%x", copy->offset);
+ printf(", size=0x%x\n", copy->size);
+
+ switch (copy->type) {
+ case VM_MAP_COPY_ENTRY_LIST:
+ /* XXX add stuff here */
+ break;
+
+ case VM_MAP_COPY_OBJECT:
+ iprintf("object=0x%x\n", copy->cpy_object);
+ break;
+
+ case VM_MAP_COPY_PAGE_LIST:
+ iprintf("npages=%d", copy->cpy_npages);
+ printf(", cont=%x", copy->cpy_cont);
+ printf(", cont_args=%x\n", copy->cpy_cont_args);
+ if (copy->cpy_npages < 0) {
+ npages = 0;
+ } else if (copy->cpy_npages > VM_MAP_COPY_PAGE_LIST_MAX) {
+ npages = VM_MAP_COPY_PAGE_LIST_MAX;
+ } else {
+ npages = copy->cpy_npages;
+ }
+ iprintf("copy->cpy_page_list[0..%d] = {", npages);
+ for (i = 0; i < npages - 1; i++) {
+ printf("0x%x, ", copy->cpy_page_list[i]);
+ }
+ if (npages > 0) {
+ printf("0x%x", copy->cpy_page_list[npages - 1]);
+ }
+ printf("}\n");
+ break;
+ }
+
+ indent -=2;
+}
+#endif MACH_KDB
+
+#if NORMA_IPC
+/*
+ * This should one day be eliminated;
+ * we should always construct the right flavor of copy object
+ * the first time. Troublesome areas include vm_read, where vm_map_copyin
+ * is called without knowing whom the copy object is for.
+ * There are also situations where we do want a lazy data structure
+ * even if we are sending to a remote port...
+ */
+
+/*
+ * Convert a copy to a page list. The copy argument is in/out
+ * because we probably have to allocate a new vm_map_copy structure.
+ * We take responsibility for discarding the old structure and
+ * use a continuation to do so. Postponing this discard ensures
+ * that the objects containing the pages we've marked busy will stick
+ * around.
+ */
+kern_return_t
+vm_map_convert_to_page_list(caller_copy)
+ vm_map_copy_t *caller_copy;
+{
+ vm_map_entry_t entry, next_entry;
+ vm_offset_t va;
+ vm_offset_t offset;
+ vm_object_t object;
+ kern_return_t result;
+ vm_map_copy_t copy, new_copy;
+ int i, num_pages = 0;
+
+ zone_t entry_zone;
+
+ copy = *caller_copy;
+
+ /*
+ * We may not have to do anything,
+ * or may not be able to do anything.
+ */
+ if (copy == VM_MAP_COPY_NULL || copy->type == VM_MAP_COPY_PAGE_LIST) {
+ return KERN_SUCCESS;
+ }
+ if (copy->type == VM_MAP_COPY_OBJECT) {
+ return vm_map_convert_to_page_list_from_object(caller_copy);
+ }
+ if (copy->type != VM_MAP_COPY_ENTRY_LIST) {
+ panic("vm_map_convert_to_page_list: copy type %d!\n",
+ copy->type);
+ }
+
+ /*
+ * Allocate the new copy. Set its continuation to
+ * discard the old one.
+ */
+ new_copy = (vm_map_copy_t) zalloc(vm_map_copy_zone);
+ new_copy->type = VM_MAP_COPY_PAGE_LIST;
+ new_copy->cpy_npages = 0;
+ new_copy->offset = copy->offset;
+ new_copy->size = copy->size;
+ new_copy->cpy_cont = vm_map_copy_discard_cont;
+ new_copy->cpy_cont_args = (char *) copy;
+
+ /*
+ * Iterate over entries.
+ */
+ for (entry = vm_map_copy_first_entry(copy);
+ entry != vm_map_copy_to_entry(copy);
+ entry = entry->vme_next) {
+
+ object = entry->object.vm_object;
+ offset = entry->offset;
+ /*
+ * Iterate over pages.
+ */
+ for (va = entry->vme_start;
+ va < entry->vme_end;
+ va += PAGE_SIZE, offset += PAGE_SIZE) {
+
+ vm_page_t m;
+
+ if (new_copy->cpy_npages == VM_MAP_COPY_PAGE_LIST_MAX) {
+ /*
+ * What a mess. We need a continuation
+ * to do the page list, but also one
+ * to discard the old copy. The right
+ * thing to do is probably to copy
+ * out the old copy into the kernel
+ * map (or some temporary task holding
+ * map if we're paranoid about large
+ * copies), and then copyin the page
+ * list that we really wanted with
+ * src_destroy. LATER.
+ */
+ panic("vm_map_convert_to_page_list: num\n");
+ }
+
+ /*
+ * Try to find the page of data.
+ */
+ vm_object_lock(object);
+ vm_object_paging_begin(object);
+ if (((m = vm_page_lookup(object, offset)) !=
+ VM_PAGE_NULL) && !m->busy && !m->fictitious &&
+ !m->absent && !m->error) {
+
+ /*
+ * This is the page. Mark it busy
+ * and keep the paging reference on
+ * the object whilst we do our thing.
+ */
+ m->busy = TRUE;
+
+ /*
+ * Also write-protect the page, so
+ * that the map`s owner cannot change
+ * the data. The busy bit will prevent
+ * faults on the page from succeeding
+ * until the copy is released; after
+ * that, the page can be re-entered
+ * as writable, since we didn`t alter
+ * the map entry. This scheme is a
+ * cheap copy-on-write.
+ *
+ * Don`t forget the protection and
+ * the page_lock value!
+ */
+
+ pmap_page_protect(m->phys_addr,
+ entry->protection
+ & ~m->page_lock
+ & ~VM_PROT_WRITE);
+
+ }
+ else {
+ vm_prot_t result_prot;
+ vm_page_t top_page;
+ kern_return_t kr;
+
+retry:
+ result_prot = VM_PROT_READ;
+
+ kr = vm_fault_page(object, offset,
+ VM_PROT_READ, FALSE, FALSE,
+ &result_prot, &m, &top_page,
+ FALSE, (void (*)()) 0);
+ if (kr == VM_FAULT_MEMORY_SHORTAGE) {
+ VM_PAGE_WAIT((void (*)()) 0);
+ vm_object_lock(object);
+ vm_object_paging_begin(object);
+ goto retry;
+ }
+ if (kr != VM_FAULT_SUCCESS) {
+ /* XXX what about data_error? */
+ vm_object_lock(object);
+ vm_object_paging_begin(object);
+ goto retry;
+ }
+ if (top_page != VM_PAGE_NULL) {
+ vm_object_lock(object);
+ VM_PAGE_FREE(top_page);
+ vm_object_paging_end(object);
+ vm_object_unlock(object);
+ }
+ }
+ assert(m);
+ m->busy = TRUE;
+ new_copy->cpy_page_list[new_copy->cpy_npages++] = m;
+ vm_object_unlock(object);
+ }
+ }
+
+ *caller_copy = new_copy;
+ return KERN_SUCCESS;
+}
+
+kern_return_t
+vm_map_convert_to_page_list_from_object(caller_copy)
+ vm_map_copy_t *caller_copy;
+{
+ vm_object_t object;
+ vm_offset_t offset;
+ vm_map_copy_t copy, new_copy;
+
+ copy = *caller_copy;
+ assert(copy->type == VM_MAP_COPY_OBJECT);
+ object = copy->cpy_object;
+ assert(object->size == round_page(object->size));
+
+ /*
+ * Allocate the new copy. Set its continuation to
+ * discard the old one.
+ */
+ new_copy = (vm_map_copy_t) zalloc(vm_map_copy_zone);
+ new_copy->type = VM_MAP_COPY_PAGE_LIST;
+ new_copy->cpy_npages = 0;
+ new_copy->offset = copy->offset;
+ new_copy->size = copy->size;
+ new_copy->cpy_cont = vm_map_copy_discard_cont;
+ new_copy->cpy_cont_args = (char *) copy;
+
+ /*
+ * XXX memory_object_lock_request can probably bust this
+ * XXX See continuation comment in previous routine for solution.
+ */
+ assert(object->size <= VM_MAP_COPY_PAGE_LIST_MAX * PAGE_SIZE);
+
+ for (offset = 0; offset < object->size; offset += PAGE_SIZE) {
+ vm_page_t m;
+
+ /*
+ * Try to find the page of data.
+ */
+ vm_object_lock(object);
+ vm_object_paging_begin(object);
+ m = vm_page_lookup(object, offset);
+ if ((m != VM_PAGE_NULL) && !m->busy && !m->fictitious &&
+ !m->absent && !m->error) {
+
+ /*
+ * This is the page. Mark it busy
+ * and keep the paging reference on
+ * the object whilst we do our thing.
+ */
+ m->busy = TRUE;
+ }
+ else {
+ vm_prot_t result_prot;
+ vm_page_t top_page;
+ kern_return_t kr;
+
+retry:
+ result_prot = VM_PROT_READ;
+
+ kr = vm_fault_page(object, offset,
+ VM_PROT_READ, FALSE, FALSE,
+ &result_prot, &m, &top_page,
+ FALSE, (void (*)()) 0);
+ if (kr == VM_FAULT_MEMORY_SHORTAGE) {
+ VM_PAGE_WAIT((void (*)()) 0);
+ vm_object_lock(object);
+ vm_object_paging_begin(object);
+ goto retry;
+ }
+ if (kr != VM_FAULT_SUCCESS) {
+ /* XXX what about data_error? */
+ vm_object_lock(object);
+ vm_object_paging_begin(object);
+ goto retry;
+ }
+
+ if (top_page != VM_PAGE_NULL) {
+ vm_object_lock(object);
+ VM_PAGE_FREE(top_page);
+ vm_object_paging_end(object);
+ vm_object_unlock(object);
+ }
+ }
+ assert(m);
+ m->busy = TRUE;
+ new_copy->cpy_page_list[new_copy->cpy_npages++] = m;
+ vm_object_unlock(object);
+ }
+
+ *caller_copy = new_copy;
+ return (KERN_SUCCESS);
+}
+
+kern_return_t
+vm_map_convert_from_page_list(copy)
+ vm_map_copy_t copy;
+{
+ vm_object_t object;
+ int i;
+ vm_map_entry_t new_entry;
+ vm_page_t *page_list;
+
+ /*
+ * Check type of copy object.
+ */
+ if (copy->type == VM_MAP_COPY_ENTRY_LIST) {
+ return KERN_SUCCESS;
+ }
+ if (copy->type == VM_MAP_COPY_OBJECT) {
+ printf("vm_map_convert_from_page_list: COPY_OBJECT?");
+ return KERN_SUCCESS;
+ }
+ if (copy->type != VM_MAP_COPY_PAGE_LIST) {
+ panic("vm_map_convert_from_page_list 0x%x %d",
+ copy,
+ copy->type);
+ }
+
+ /*
+ * Make sure the pages are loose. This may be
+ * a "Can't Happen", but just to be safe ...
+ */
+ page_list = &copy->cpy_page_list[0];
+ if ((*page_list)->tabled)
+ vm_map_copy_steal_pages(copy);
+
+ /*
+ * Create object, and stuff pages into it.
+ */
+ object = vm_object_allocate(copy->cpy_npages);
+ for (i = 0; i < copy->cpy_npages; i++) {
+ register vm_page_t m = *page_list++;
+ vm_page_insert(m, object, i * PAGE_SIZE);
+ m->busy = FALSE;
+ m->dirty = TRUE;
+ vm_page_activate(m);
+ }
+
+ /*
+ * XXX If this page list contained a continuation, then
+ * XXX we're screwed. The right thing to do is probably do
+ * XXX the copyout, and then copyin the entry list we really
+ * XXX wanted.
+ */
+ if (vm_map_copy_has_cont(copy))
+ panic("convert_from_page_list: continuation");
+
+ /*
+ * Change type of copy object
+ */
+ vm_map_copy_first_entry(copy) =
+ vm_map_copy_last_entry(copy) = vm_map_copy_to_entry(copy);
+ copy->type = VM_MAP_COPY_ENTRY_LIST;
+ copy->cpy_hdr.nentries = 0;
+ copy->cpy_hdr.entries_pageable = TRUE;
+
+ /*
+ * Allocate and initialize an entry for object
+ */
+ new_entry = vm_map_copy_entry_create(copy);
+ new_entry->vme_start = trunc_page(copy->offset);
+ new_entry->vme_end = round_page(copy->offset + copy->size);
+ new_entry->object.vm_object = object;
+ new_entry->offset = 0;
+ new_entry->is_shared = FALSE;
+ new_entry->is_sub_map = FALSE;
+ new_entry->needs_copy = FALSE;
+ new_entry->protection = VM_PROT_DEFAULT;
+ new_entry->max_protection = VM_PROT_ALL;
+ new_entry->inheritance = VM_INHERIT_DEFAULT;
+ new_entry->wired_count = 0;
+ new_entry->user_wired_count = 0;
+ new_entry->projected_on = 0;
+
+ /*
+ * Insert entry into copy object, and return.
+ */
+ vm_map_copy_entry_link(copy, vm_map_copy_last_entry(copy), new_entry);
+ return(KERN_SUCCESS);
+}
+#endif NORMA_IPC
diff --git a/vm/vm_map.h b/vm/vm_map.h
new file mode 100644
index 00000000..0bdb7d13
--- /dev/null
+++ b/vm/vm_map.h
@@ -0,0 +1,448 @@
+/*
+ * Mach Operating System
+ * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University.
+ * Copyright (c) 1993,1994 The University of Utah and
+ * the Computer Systems Laboratory (CSL).
+ * All rights reserved.
+ *
+ * Permission to use, copy, modify and distribute this software and its
+ * documentation is hereby granted, provided that both the copyright
+ * notice and this permission notice appear in all copies of the
+ * software, derivative works or modified versions, and any portions
+ * thereof, and that both notices appear in supporting documentation.
+ *
+ * CARNEGIE MELLON, THE UNIVERSITY OF UTAH AND CSL ALLOW FREE USE OF
+ * THIS SOFTWARE IN ITS "AS IS" CONDITION, AND DISCLAIM ANY LIABILITY
+ * OF ANY KIND FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF
+ * THIS SOFTWARE.
+ *
+ * Carnegie Mellon requests users of this software to return to
+ *
+ * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
+ * School of Computer Science
+ * Carnegie Mellon University
+ * Pittsburgh PA 15213-3890
+ *
+ * any improvements or extensions that they make and grant Carnegie Mellon
+ * the rights to redistribute these changes.
+ */
+/*
+ * File: vm/vm_map.h
+ * Author: Avadis Tevanian, Jr., Michael Wayne Young
+ * Date: 1985
+ *
+ * Virtual memory map module definitions.
+ *
+ * Contributors:
+ * avie, dlb, mwyoung
+ */
+
+#ifndef _VM_VM_MAP_H_
+#define _VM_VM_MAP_H_
+
+#include <mach/kern_return.h>
+#include <mach/boolean.h>
+#include <mach/machine/vm_types.h>
+#include <mach/vm_prot.h>
+#include <mach/vm_inherit.h>
+#include <vm/pmap.h>
+#include <vm/vm_object.h>
+#include <vm/vm_page.h>
+#include <kern/lock.h>
+#include <kern/macro_help.h>
+
+/*
+ * Types defined:
+ *
+ * vm_map_t the high-level address map data structure.
+ * vm_map_entry_t an entry in an address map.
+ * vm_map_version_t a timestamp of a map, for use with vm_map_lookup
+ * vm_map_copy_t represents memory copied from an address map,
+ * used for inter-map copy operations
+ */
+
+/*
+ * Type: vm_map_object_t [internal use only]
+ *
+ * Description:
+ * The target of an address mapping, either a virtual
+ * memory object or a sub map (of the kernel map).
+ */
+typedef union vm_map_object {
+ struct vm_object *vm_object; /* object object */
+ struct vm_map *sub_map; /* belongs to another map */
+} vm_map_object_t;
+
+/*
+ * Type: vm_map_entry_t [internal use only]
+ *
+ * Description:
+ * A single mapping within an address map.
+ *
+ * Implementation:
+ * Address map entries consist of start and end addresses,
+ * a VM object (or sub map) and offset into that object,
+ * and user-exported inheritance and protection information.
+ * Control information for virtual copy operations is also
+ * stored in the address map entry.
+ */
+struct vm_map_links {
+ struct vm_map_entry *prev; /* previous entry */
+ struct vm_map_entry *next; /* next entry */
+ vm_offset_t start; /* start address */
+ vm_offset_t end; /* end address */
+};
+
+struct vm_map_entry {
+ struct vm_map_links links; /* links to other entries */
+#define vme_prev links.prev
+#define vme_next links.next
+#define vme_start links.start
+#define vme_end links.end
+ union vm_map_object object; /* object I point to */
+ vm_offset_t offset; /* offset into object */
+ unsigned int
+ /* boolean_t */ is_shared:1, /* region is shared */
+ /* boolean_t */ is_sub_map:1, /* Is "object" a submap? */
+ /* boolean_t */ in_transition:1, /* Entry being changed */
+ /* boolean_t */ needs_wakeup:1, /* Waiters on in_transition */
+ /* Only used when object is a vm_object: */
+ /* boolean_t */ needs_copy:1; /* does object need to be copied */
+
+ /* Only in task maps: */
+ vm_prot_t protection; /* protection code */
+ vm_prot_t max_protection; /* maximum protection */
+ vm_inherit_t inheritance; /* inheritance */
+ unsigned short wired_count; /* can be paged if = 0 */
+ unsigned short user_wired_count; /* for vm_wire */
+ struct vm_map_entry *projected_on; /* 0 for normal map entry
+ or persistent kernel map projected buffer entry;
+ -1 for non-persistent kernel map projected buffer entry;
+ pointer to corresponding kernel map entry for user map
+ projected buffer entry */
+};
+
+typedef struct vm_map_entry *vm_map_entry_t;
+
+#define VM_MAP_ENTRY_NULL ((vm_map_entry_t) 0)
+
+/*
+ * Type: struct vm_map_header
+ *
+ * Description:
+ * Header for a vm_map and a vm_map_copy.
+ */
+struct vm_map_header {
+ struct vm_map_links links; /* first, last, min, max */
+ int nentries; /* Number of entries */
+ boolean_t entries_pageable;
+ /* are map entries pageable? */
+};
+
+/*
+ * Type: vm_map_t [exported; contents invisible]
+ *
+ * Description:
+ * An address map -- a directory relating valid
+ * regions of a task's address space to the corresponding
+ * virtual memory objects.
+ *
+ * Implementation:
+ * Maps are doubly-linked lists of map entries, sorted
+ * by address. One hint is used to start
+ * searches again from the last successful search,
+ * insertion, or removal. Another hint is used to
+ * quickly find free space.
+ */
+struct vm_map {
+ lock_data_t lock; /* Lock for map data */
+ struct vm_map_header hdr; /* Map entry header */
+#define min_offset hdr.links.start /* start of range */
+#define max_offset hdr.links.end /* end of range */
+ pmap_t pmap; /* Physical map */
+ vm_size_t size; /* virtual size */
+ int ref_count; /* Reference count */
+ decl_simple_lock_data(, ref_lock) /* Lock for ref_count field */
+ vm_map_entry_t hint; /* hint for quick lookups */
+ decl_simple_lock_data(, hint_lock) /* lock for hint storage */
+ vm_map_entry_t first_free; /* First free space hint */
+ boolean_t wait_for_space; /* Should callers wait
+ for space? */
+ boolean_t wiring_required;/* All memory wired? */
+ unsigned int timestamp; /* Version number */
+};
+typedef struct vm_map *vm_map_t;
+
+#define VM_MAP_NULL ((vm_map_t) 0)
+
+#define vm_map_to_entry(map) ((struct vm_map_entry *) &(map)->hdr.links)
+#define vm_map_first_entry(map) ((map)->hdr.links.next)
+#define vm_map_last_entry(map) ((map)->hdr.links.prev)
+
+/*
+ * Type: vm_map_version_t [exported; contents invisible]
+ *
+ * Description:
+ * Map versions may be used to quickly validate a previous
+ * lookup operation.
+ *
+ * Usage note:
+ * Because they are bulky objects, map versions are usually
+ * passed by reference.
+ *
+ * Implementation:
+ * Just a timestamp for the main map.
+ */
+typedef struct vm_map_version {
+ unsigned int main_timestamp;
+} vm_map_version_t;
+
+/*
+ * Type: vm_map_copy_t [exported; contents invisible]
+ *
+ * Description:
+ * A map copy object represents a region of virtual memory
+ * that has been copied from an address map but is still
+ * in transit.
+ *
+ * A map copy object may only be used by a single thread
+ * at a time.
+ *
+ * Implementation:
+ * There are three formats for map copy objects.
+ * The first is very similar to the main
+ * address map in structure, and as a result, some
+ * of the internal maintenance functions/macros can
+ * be used with either address maps or map copy objects.
+ *
+ * The map copy object contains a header links
+ * entry onto which the other entries that represent
+ * the region are chained.
+ *
+ * The second format is a single vm object. This is used
+ * primarily in the pageout path. The third format is a
+ * list of vm pages. An optional continuation provides
+ * a hook to be called to obtain more of the memory,
+ * or perform other operations. The continuation takes 3
+ * arguments, a saved arg buffer, a pointer to a new vm_map_copy
+ * (returned) and an abort flag (abort if TRUE).
+ */
+
+#if iPSC386 || iPSC860
+#define VM_MAP_COPY_PAGE_LIST_MAX 64
+#else iPSC386 || iPSC860
+#define VM_MAP_COPY_PAGE_LIST_MAX 8
+#endif iPSC386 || iPSC860
+
+typedef struct vm_map_copy {
+ int type;
+#define VM_MAP_COPY_ENTRY_LIST 1
+#define VM_MAP_COPY_OBJECT 2
+#define VM_MAP_COPY_PAGE_LIST 3
+ vm_offset_t offset;
+ vm_size_t size;
+ union {
+ struct vm_map_header hdr; /* ENTRY_LIST */
+ struct { /* OBJECT */
+ vm_object_t object;
+ } c_o;
+ struct { /* PAGE_LIST */
+ vm_page_t page_list[VM_MAP_COPY_PAGE_LIST_MAX];
+ int npages;
+ kern_return_t (*cont)();
+ char *cont_args;
+ } c_p;
+ } c_u;
+} *vm_map_copy_t;
+
+#define cpy_hdr c_u.hdr
+
+#define cpy_object c_u.c_o.object
+
+#define cpy_page_list c_u.c_p.page_list
+#define cpy_npages c_u.c_p.npages
+#define cpy_cont c_u.c_p.cont
+#define cpy_cont_args c_u.c_p.cont_args
+
+#define VM_MAP_COPY_NULL ((vm_map_copy_t) 0)
+
+/*
+ * Useful macros for entry list copy objects
+ */
+
+#define vm_map_copy_to_entry(copy) \
+ ((struct vm_map_entry *) &(copy)->cpy_hdr.links)
+#define vm_map_copy_first_entry(copy) \
+ ((copy)->cpy_hdr.links.next)
+#define vm_map_copy_last_entry(copy) \
+ ((copy)->cpy_hdr.links.prev)
+
+/*
+ * Continuation macros for page list copy objects
+ */
+
+#define vm_map_copy_invoke_cont(old_copy, new_copy, result) \
+MACRO_BEGIN \
+ vm_map_copy_page_discard(old_copy); \
+ *result = (*((old_copy)->cpy_cont))((old_copy)->cpy_cont_args, \
+ new_copy); \
+ (old_copy)->cpy_cont = (kern_return_t (*)()) 0; \
+MACRO_END
+
+#define vm_map_copy_invoke_extend_cont(old_copy, new_copy, result) \
+MACRO_BEGIN \
+ *result = (*((old_copy)->cpy_cont))((old_copy)->cpy_cont_args, \
+ new_copy); \
+ (old_copy)->cpy_cont = (kern_return_t (*)()) 0; \
+MACRO_END
+
+#define vm_map_copy_abort_cont(old_copy) \
+MACRO_BEGIN \
+ vm_map_copy_page_discard(old_copy); \
+ (*((old_copy)->cpy_cont))((old_copy)->cpy_cont_args, \
+ (vm_map_copy_t *) 0); \
+ (old_copy)->cpy_cont = (kern_return_t (*)()) 0; \
+ (old_copy)->cpy_cont_args = (char *) 0; \
+MACRO_END
+
+#define vm_map_copy_has_cont(copy) \
+ (((copy)->cpy_cont) != (kern_return_t (*)()) 0)
+
+/*
+ * Continuation structures for vm_map_copyin_page_list.
+ */
+
+typedef struct {
+ vm_map_t map;
+ vm_offset_t src_addr;
+ vm_size_t src_len;
+ vm_offset_t destroy_addr;
+ vm_size_t destroy_len;
+ boolean_t steal_pages;
+} vm_map_copyin_args_data_t, *vm_map_copyin_args_t;
+
+#define VM_MAP_COPYIN_ARGS_NULL ((vm_map_copyin_args_t) 0)
+
+/*
+ * Macros: vm_map_lock, etc. [internal use only]
+ * Description:
+ * Perform locking on the data portion of a map.
+ */
+
+#define vm_map_lock_init(map) \
+MACRO_BEGIN \
+ lock_init(&(map)->lock, TRUE); \
+ (map)->timestamp = 0; \
+MACRO_END
+
+#define vm_map_lock(map) \
+MACRO_BEGIN \
+ lock_write(&(map)->lock); \
+ (map)->timestamp++; \
+MACRO_END
+
+#define vm_map_unlock(map) lock_write_done(&(map)->lock)
+#define vm_map_lock_read(map) lock_read(&(map)->lock)
+#define vm_map_unlock_read(map) lock_read_done(&(map)->lock)
+#define vm_map_lock_write_to_read(map) \
+ lock_write_to_read(&(map)->lock)
+#define vm_map_lock_read_to_write(map) \
+ (lock_read_to_write(&(map)->lock) || (((map)->timestamp++), 0))
+#define vm_map_lock_set_recursive(map) \
+ lock_set_recursive(&(map)->lock)
+#define vm_map_lock_clear_recursive(map) \
+ lock_clear_recursive(&(map)->lock)
+
+/*
+ * Exported procedures that operate on vm_map_t.
+ */
+
+extern vm_offset_t kentry_data;
+extern vm_offset_t kentry_data_size;
+extern int kentry_count;
+extern void vm_map_init(); /* Initialize the module */
+
+extern vm_map_t vm_map_create(); /* Create an empty map */
+extern vm_map_t vm_map_fork(); /* Create a map in the image
+ * of an existing map */
+
+extern void vm_map_reference(); /* Gain a reference to
+ * an existing map */
+extern void vm_map_deallocate(); /* Lose a reference */
+
+extern kern_return_t vm_map_enter(); /* Enter a mapping */
+extern kern_return_t vm_map_find_entry(); /* Enter a mapping primitive */
+extern kern_return_t vm_map_remove(); /* Deallocate a region */
+extern kern_return_t vm_map_protect(); /* Change protection */
+extern kern_return_t vm_map_inherit(); /* Change inheritance */
+
+extern void vm_map_print(); /* Debugging: print a map */
+
+extern kern_return_t vm_map_lookup(); /* Look up an address */
+extern boolean_t vm_map_verify(); /* Verify that a previous
+ * lookup is still valid */
+/* vm_map_verify_done is now a macro -- see below */
+extern kern_return_t vm_map_copyin(); /* Make a copy of a region */
+extern kern_return_t vm_map_copyin_page_list();/* Make a copy of a region
+ * using a page list copy */
+extern kern_return_t vm_map_copyout(); /* Place a copy into a map */
+extern kern_return_t vm_map_copy_overwrite();/* Overwrite existing memory
+ * with a copy */
+extern void vm_map_copy_discard(); /* Discard a copy without
+ * using it */
+extern kern_return_t vm_map_copy_discard_cont();/* Page list continuation
+ * version of previous */
+
+extern kern_return_t vm_map_machine_attribute();
+ /* Add or remove machine-
+ dependent attributes from
+ map regions */
+
+/*
+ * Functions implemented as macros
+ */
+#define vm_map_min(map) ((map)->min_offset)
+ /* Lowest valid address in
+ * a map */
+
+#define vm_map_max(map) ((map)->max_offset)
+ /* Highest valid address */
+
+#define vm_map_pmap(map) ((map)->pmap)
+ /* Physical map associated
+ * with this address map */
+
+#define vm_map_verify_done(map, version) (vm_map_unlock_read(map))
+ /* Operation that required
+ * a verified lookup is
+ * now complete */
+/*
+ * Pageability functions. Includes macro to preserve old interface.
+ */
+extern kern_return_t vm_map_pageable_common();
+
+#define vm_map_pageable(map, s, e, access) \
+ vm_map_pageable_common(map, s, e, access, FALSE)
+
+#define vm_map_pageable_user(map, s, e, access) \
+ vm_map_pageable_common(map, s, e, access, TRUE)
+
+/*
+ * Submap object. Must be used to create memory to be put
+ * in a submap by vm_map_submap.
+ */
+extern vm_object_t vm_submap_object;
+
+/*
+ * Wait and wakeup macros for in_transition map entries.
+ */
+#define vm_map_entry_wait(map, interruptible) \
+ MACRO_BEGIN \
+ assert_wait((event_t)&(map)->hdr, interruptible); \
+ vm_map_unlock(map); \
+ thread_block((void (*)()) 0); \
+ MACRO_END
+
+#define vm_map_entry_wakeup(map) thread_wakeup((event_t)&(map)->hdr)
+
+#endif _VM_VM_MAP_H_
diff --git a/vm/vm_object.c b/vm/vm_object.c
new file mode 100644
index 00000000..5186ee6c
--- /dev/null
+++ b/vm/vm_object.c
@@ -0,0 +1,3090 @@
+/*
+ * Mach Operating System
+ * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University.
+ * Copyright (c) 1993,1994 The University of Utah and
+ * the Computer Systems Laboratory (CSL).
+ * All rights reserved.
+ *
+ * Permission to use, copy, modify and distribute this software and its
+ * documentation is hereby granted, provided that both the copyright
+ * notice and this permission notice appear in all copies of the
+ * software, derivative works or modified versions, and any portions
+ * thereof, and that both notices appear in supporting documentation.
+ *
+ * CARNEGIE MELLON, THE UNIVERSITY OF UTAH AND CSL ALLOW FREE USE OF
+ * THIS SOFTWARE IN ITS "AS IS" CONDITION, AND DISCLAIM ANY LIABILITY
+ * OF ANY KIND FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF
+ * THIS SOFTWARE.
+ *
+ * Carnegie Mellon requests users of this software to return to
+ *
+ * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
+ * School of Computer Science
+ * Carnegie Mellon University
+ * Pittsburgh PA 15213-3890
+ *
+ * any improvements or extensions that they make and grant Carnegie Mellon
+ * the rights to redistribute these changes.
+ */
+/*
+ * File: vm/vm_object.c
+ * Author: Avadis Tevanian, Jr., Michael Wayne Young
+ *
+ * Virtual memory object module.
+ */
+
+#include <norma_vm.h>
+#include <mach_pagemap.h>
+
+#if NORMA_VM
+#include <norma/xmm_server_rename.h>
+#endif /* NORMA_VM */
+
+#include <mach/memory_object.h>
+#include "memory_object_default.h"
+#include "memory_object_user.h"
+#include "vm_param.h"
+#include <ipc/ipc_port.h>
+#include <ipc/ipc_space.h>
+#include <kern/assert.h>
+#include <kern/lock.h>
+#include <kern/queue.h>
+#include <kern/xpr.h>
+#include <kern/zalloc.h>
+#include <vm/memory_object.h>
+#include <vm/vm_fault.h>
+#include <vm/vm_map.h>
+#include <vm/vm_object.h>
+#include <vm/vm_page.h>
+#include <vm/vm_pageout.h>
+
+
+void memory_object_release(
+ ipc_port_t pager,
+ pager_request_t pager_request,
+ ipc_port_t pager_name); /* forward */
+
+void vm_object_deactivate_pages(vm_object_t);
+
+/*
+ * Virtual memory objects maintain the actual data
+ * associated with allocated virtual memory. A given
+ * page of memory exists within exactly one object.
+ *
+ * An object is only deallocated when all "references"
+ * are given up. Only one "reference" to a given
+ * region of an object should be writeable.
+ *
+ * Associated with each object is a list of all resident
+ * memory pages belonging to that object; this list is
+ * maintained by the "vm_page" module, but locked by the object's
+ * lock.
+ *
+ * Each object also records the memory object port
+ * that is used by the kernel to request and write
+ * back data (the memory object port, field "pager"),
+ * and the ports provided to the memory manager, the server that
+ * manages that data, to return data and control its
+ * use (the memory object control port, field "pager_request")
+ * and for naming (the memory object name port, field "pager_name").
+ *
+ * Virtual memory objects are allocated to provide
+ * zero-filled memory (vm_allocate) or map a user-defined
+ * memory object into a virtual address space (vm_map).
+ *
+ * Virtual memory objects that refer to a user-defined
+ * memory object are called "permanent", because all changes
+ * made in virtual memory are reflected back to the
+ * memory manager, which may then store it permanently.
+ * Other virtual memory objects are called "temporary",
+ * meaning that changes need be written back only when
+ * necessary to reclaim pages, and that storage associated
+ * with the object can be discarded once it is no longer
+ * mapped.
+ *
+ * A permanent memory object may be mapped into more
+ * than one virtual address space. Moreover, two threads
+ * may attempt to make the first mapping of a memory
+ * object concurrently. Only one thread is allowed to
+ * complete this mapping; all others wait for the
+ * "pager_initialized" field is asserted, indicating
+ * that the first thread has initialized all of the
+ * necessary fields in the virtual memory object structure.
+ *
+ * The kernel relies on a *default memory manager* to
+ * provide backing storage for the zero-filled virtual
+ * memory objects. The memory object ports associated
+ * with these temporary virtual memory objects are only
+ * generated and passed to the default memory manager
+ * when it becomes necessary. Virtual memory objects
+ * that depend on the default memory manager are called
+ * "internal". The "pager_created" field is provided to
+ * indicate whether these ports have ever been allocated.
+ *
+ * The kernel may also create virtual memory objects to
+ * hold changed pages after a copy-on-write operation.
+ * In this case, the virtual memory object (and its
+ * backing storage -- its memory object) only contain
+ * those pages that have been changed. The "shadow"
+ * field refers to the virtual memory object that contains
+ * the remainder of the contents. The "shadow_offset"
+ * field indicates where in the "shadow" these contents begin.
+ * The "copy" field refers to a virtual memory object
+ * to which changed pages must be copied before changing
+ * this object, in order to implement another form
+ * of copy-on-write optimization.
+ *
+ * The virtual memory object structure also records
+ * the attributes associated with its memory object.
+ * The "pager_ready", "can_persist" and "copy_strategy"
+ * fields represent those attributes. The "cached_list"
+ * field is used in the implementation of the persistence
+ * attribute.
+ *
+ * ZZZ Continue this comment.
+ */
+
+zone_t vm_object_zone; /* vm backing store zone */
+
+/*
+ * All wired-down kernel memory belongs to a single virtual
+ * memory object (kernel_object) to avoid wasting data structures.
+ */
+vm_object_t kernel_object;
+
+/*
+ * Virtual memory objects that are not referenced by
+ * any address maps, but that are allowed to persist
+ * (an attribute specified by the associated memory manager),
+ * are kept in a queue (vm_object_cached_list).
+ *
+ * When an object from this queue is referenced again,
+ * for example to make another address space mapping,
+ * it must be removed from the queue. That is, the
+ * queue contains *only* objects with zero references.
+ *
+ * The kernel may choose to terminate objects from this
+ * queue in order to reclaim storage. The current policy
+ * is to permit a fixed maximum number of unreferenced
+ * objects (vm_object_cached_max).
+ *
+ * A simple lock (accessed by routines
+ * vm_object_cache_{lock,lock_try,unlock}) governs the
+ * object cache. It must be held when objects are
+ * added to or removed from the cache (in vm_object_terminate).
+ * The routines that acquire a reference to a virtual
+ * memory object based on one of the memory object ports
+ * must also lock the cache.
+ *
+ * Ideally, the object cache should be more isolated
+ * from the reference mechanism, so that the lock need
+ * not be held to make simple references.
+ */
+queue_head_t vm_object_cached_list;
+int vm_object_cached_count;
+int vm_object_cached_max = 100; /* may be patched*/
+
+decl_simple_lock_data(,vm_object_cached_lock_data)
+
+#define vm_object_cache_lock() \
+ simple_lock(&vm_object_cached_lock_data)
+#define vm_object_cache_lock_try() \
+ simple_lock_try(&vm_object_cached_lock_data)
+#define vm_object_cache_unlock() \
+ simple_unlock(&vm_object_cached_lock_data)
+
+/*
+ * Virtual memory objects are initialized from
+ * a template (see vm_object_allocate).
+ *
+ * When adding a new field to the virtual memory
+ * object structure, be sure to add initialization
+ * (see vm_object_init).
+ */
+vm_object_t vm_object_template;
+
+/*
+ * vm_object_allocate:
+ *
+ * Returns a new object with the given size.
+ */
+
+vm_object_t _vm_object_allocate(
+ vm_size_t size)
+{
+ register vm_object_t object;
+
+ object = (vm_object_t) zalloc(vm_object_zone);
+
+ *object = *vm_object_template;
+ queue_init(&object->memq);
+ vm_object_lock_init(object);
+ object->size = size;
+
+ return object;
+}
+
+vm_object_t vm_object_allocate(
+ vm_size_t size)
+{
+ register vm_object_t object;
+ register ipc_port_t port;
+
+ object = _vm_object_allocate(size);
+#if !NORMA_VM
+ port = ipc_port_alloc_kernel();
+ if (port == IP_NULL)
+ panic("vm_object_allocate");
+ object->pager_name = port;
+ ipc_kobject_set(port, (ipc_kobject_t) object, IKOT_PAGING_NAME);
+#endif /* !NORMA_VM */
+
+ return object;
+}
+
+/*
+ * vm_object_bootstrap:
+ *
+ * Initialize the VM objects module.
+ */
+void vm_object_bootstrap(void)
+{
+ vm_object_zone = zinit((vm_size_t) sizeof(struct vm_object),
+ round_page(512*1024),
+ round_page(12*1024),
+ 0, "objects");
+
+ queue_init(&vm_object_cached_list);
+ simple_lock_init(&vm_object_cached_lock_data);
+
+ /*
+ * Fill in a template object, for quick initialization
+ */
+
+ vm_object_template = (vm_object_t) zalloc(vm_object_zone);
+ bzero((char *) vm_object_template, sizeof *vm_object_template);
+
+ vm_object_template->ref_count = 1;
+ vm_object_template->size = 0;
+ vm_object_template->resident_page_count = 0;
+ vm_object_template->copy = VM_OBJECT_NULL;
+ vm_object_template->shadow = VM_OBJECT_NULL;
+ vm_object_template->shadow_offset = (vm_offset_t) 0;
+
+ vm_object_template->pager = IP_NULL;
+ vm_object_template->paging_offset = 0;
+ vm_object_template->pager_request = PAGER_REQUEST_NULL;
+ vm_object_template->pager_name = IP_NULL;
+
+ vm_object_template->pager_created = FALSE;
+ vm_object_template->pager_initialized = FALSE;
+ vm_object_template->pager_ready = FALSE;
+
+ vm_object_template->copy_strategy = MEMORY_OBJECT_COPY_NONE;
+ /* ignored if temporary, will be reset before
+ * permanent object becomes ready */
+ vm_object_template->use_shared_copy = FALSE;
+ vm_object_template->shadowed = FALSE;
+
+ vm_object_template->absent_count = 0;
+ vm_object_template->all_wanted = 0; /* all bits FALSE */
+
+ vm_object_template->paging_in_progress = 0;
+ vm_object_template->can_persist = FALSE;
+ vm_object_template->internal = TRUE;
+ vm_object_template->temporary = TRUE;
+ vm_object_template->alive = TRUE;
+ vm_object_template->lock_in_progress = FALSE;
+ vm_object_template->lock_restart = FALSE;
+ vm_object_template->use_old_pageout = TRUE; /* XXX change later */
+ vm_object_template->last_alloc = (vm_offset_t) 0;
+
+#if MACH_PAGEMAP
+ vm_object_template->existence_info = VM_EXTERNAL_NULL;
+#endif /* MACH_PAGEMAP */
+
+ /*
+ * Initialize the "kernel object"
+ */
+
+ kernel_object = _vm_object_allocate(
+ VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS);
+
+ /*
+ * Initialize the "submap object". Make it as large as the
+ * kernel object so that no limit is imposed on submap sizes.
+ */
+
+ vm_submap_object = _vm_object_allocate(
+ VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS);
+
+#if MACH_PAGEMAP
+ vm_external_module_initialize();
+#endif /* MACH_PAGEMAP */
+}
+
+void vm_object_init(void)
+{
+#if !NORMA_VM
+ /*
+ * Finish initializing the kernel object.
+ * The submap object doesn't need a name port.
+ */
+
+ kernel_object->pager_name = ipc_port_alloc_kernel();
+ ipc_kobject_set(kernel_object->pager_name,
+ (ipc_kobject_t) kernel_object,
+ IKOT_PAGING_NAME);
+#endif /* !NORMA_VM */
+}
+
+/*
+ * vm_object_reference:
+ *
+ * Gets another reference to the given object.
+ */
+void vm_object_reference(
+ register vm_object_t object)
+{
+ if (object == VM_OBJECT_NULL)
+ return;
+
+ vm_object_lock(object);
+ assert(object->ref_count > 0);
+ object->ref_count++;
+ vm_object_unlock(object);
+}
+
+/*
+ * vm_object_deallocate:
+ *
+ * Release a reference to the specified object,
+ * gained either through a vm_object_allocate
+ * or a vm_object_reference call. When all references
+ * are gone, storage associated with this object
+ * may be relinquished.
+ *
+ * No object may be locked.
+ */
+void vm_object_deallocate(
+ register vm_object_t object)
+{
+ vm_object_t temp;
+
+ while (object != VM_OBJECT_NULL) {
+
+ /*
+ * The cache holds a reference (uncounted) to
+ * the object; we must lock it before removing
+ * the object.
+ */
+
+ vm_object_cache_lock();
+
+ /*
+ * Lose the reference
+ */
+ vm_object_lock(object);
+ if (--(object->ref_count) > 0) {
+
+ /*
+ * If there are still references, then
+ * we are done.
+ */
+ vm_object_unlock(object);
+ vm_object_cache_unlock();
+ return;
+ }
+
+ /*
+ * See whether this object can persist. If so, enter
+ * it in the cache, then deactivate all of its
+ * pages.
+ */
+ if (object->can_persist) {
+ boolean_t overflow;
+
+ /*
+ * Enter the object onto the queue
+ * of "cached" objects. Remember whether
+ * we've caused the queue to overflow,
+ * as a hint.
+ */
+
+ queue_enter(&vm_object_cached_list, object,
+ vm_object_t, cached_list);
+ overflow = (++vm_object_cached_count > vm_object_cached_max);
+ vm_object_cache_unlock();
+
+ vm_object_deactivate_pages(object);
+ vm_object_unlock(object);
+
+ /*
+ * If we didn't overflow, or if the queue has
+ * been reduced back to below the specified
+ * minimum, then quit.
+ */
+ if (!overflow)
+ return;
+
+ while (TRUE) {
+ vm_object_cache_lock();
+ if (vm_object_cached_count <=
+ vm_object_cached_max) {
+ vm_object_cache_unlock();
+ return;
+ }
+
+ /*
+ * If we must trim down the queue, take
+ * the first object, and proceed to
+ * terminate it instead of the original
+ * object. Have to wait for pager init.
+ * if it's in progress.
+ */
+ object= (vm_object_t)
+ queue_first(&vm_object_cached_list);
+ vm_object_lock(object);
+
+ if (!(object->pager_created &&
+ !object->pager_initialized)) {
+
+ /*
+ * Ok to terminate, hang on to lock.
+ */
+ break;
+ }
+
+ vm_object_assert_wait(object,
+ VM_OBJECT_EVENT_INITIALIZED, FALSE);
+ vm_object_unlock(object);
+ vm_object_cache_unlock();
+ thread_block((void (*)()) 0);
+
+ /*
+ * Continue loop to check if cache still
+ * needs to be trimmed.
+ */
+ }
+
+ /*
+ * Actually remove object from cache.
+ */
+
+ queue_remove(&vm_object_cached_list, object,
+ vm_object_t, cached_list);
+ vm_object_cached_count--;
+
+ assert(object->ref_count == 0);
+ }
+ else {
+ if (object->pager_created &&
+ !object->pager_initialized) {
+
+ /*
+ * Have to wait for initialization.
+ * Put reference back and retry
+ * when it's initialized.
+ */
+ object->ref_count++;
+ vm_object_assert_wait(object,
+ VM_OBJECT_EVENT_INITIALIZED, FALSE);
+ vm_object_unlock(object);
+ vm_object_cache_unlock();
+ thread_block((void (*)()) 0);
+ continue;
+ }
+ }
+
+ /*
+ * Take the reference to the shadow object
+ * out of the object to be destroyed.
+ */
+
+ temp = object->shadow;
+
+ /*
+ * Destroy the object; the cache lock will
+ * be released in the process.
+ */
+
+ vm_object_terminate(object);
+
+ /*
+ * Deallocate the reference to the shadow
+ * by continuing the loop with that object
+ * in place of the original.
+ */
+
+ object = temp;
+ }
+}
+
+boolean_t vm_object_terminate_remove_all = FALSE;
+
+/*
+ * Routine: vm_object_terminate
+ * Purpose:
+ * Free all resources associated with a vm_object.
+ * In/out conditions:
+ * Upon entry, the object and the cache must be locked,
+ * and the object must have no references.
+ *
+ * The shadow object reference is left alone.
+ *
+ * Upon exit, the cache will be unlocked, and the
+ * object will cease to exist.
+ */
+void vm_object_terminate(
+ register vm_object_t object)
+{
+ register vm_page_t p;
+ vm_object_t shadow_object;
+
+ /*
+ * Make sure the object isn't already being terminated
+ */
+
+ assert(object->alive);
+ object->alive = FALSE;
+
+ /*
+ * Make sure no one can look us up now.
+ */
+
+ vm_object_remove(object);
+ vm_object_cache_unlock();
+
+ /*
+ * Detach the object from its shadow if we are the shadow's
+ * copy.
+ */
+ if ((shadow_object = object->shadow) != VM_OBJECT_NULL) {
+ vm_object_lock(shadow_object);
+ assert((shadow_object->copy == object) ||
+ (shadow_object->copy == VM_OBJECT_NULL));
+ shadow_object->copy = VM_OBJECT_NULL;
+ vm_object_unlock(shadow_object);
+ }
+
+ /*
+ * The pageout daemon might be playing with our pages.
+ * Now that the object is dead, it won't touch any more
+ * pages, but some pages might already be on their way out.
+ * Hence, we wait until the active paging activities have ceased.
+ */
+
+ vm_object_paging_wait(object, FALSE);
+
+ /*
+ * Clean or free the pages, as appropriate.
+ * It is possible for us to find busy/absent pages,
+ * if some faults on this object were aborted.
+ */
+
+ if ((object->temporary) || (object->pager == IP_NULL)) {
+ while (!queue_empty(&object->memq)) {
+ p = (vm_page_t) queue_first(&object->memq);
+
+ VM_PAGE_CHECK(p);
+
+ if (p->busy && !p->absent)
+ panic("vm_object_terminate.2 0x%x 0x%x",
+ object, p);
+
+ VM_PAGE_FREE(p);
+ }
+ } else while (!queue_empty(&object->memq)) {
+ p = (vm_page_t) queue_first(&object->memq);
+
+ VM_PAGE_CHECK(p);
+
+ if (p->busy && !p->absent)
+ panic("vm_object_terminate.3 0x%x 0x%x", object, p);
+
+ vm_page_lock_queues();
+ VM_PAGE_QUEUES_REMOVE(p);
+ vm_page_unlock_queues();
+
+ if (p->absent || p->private) {
+
+ /*
+ * For private pages, VM_PAGE_FREE just
+ * leaves the page structure around for
+ * its owner to clean up. For absent
+ * pages, the structure is returned to
+ * the appropriate pool.
+ */
+
+ goto free_page;
+ }
+
+ if (p->fictitious)
+ panic("vm_object_terminate.4 0x%x 0x%x", object, p);
+
+ if (!p->dirty)
+ p->dirty = pmap_is_modified(p->phys_addr);
+
+ if (p->dirty || p->precious) {
+ p->busy = TRUE;
+ vm_pageout_page(p, FALSE, TRUE); /* flush page */
+ } else {
+ free_page:
+ VM_PAGE_FREE(p);
+ }
+ }
+
+ assert(object->ref_count == 0);
+ assert(object->paging_in_progress == 0);
+
+ /*
+ * Throw away port rights... note that they may
+ * already have been thrown away (by vm_object_destroy
+ * or memory_object_destroy).
+ *
+ * Instead of destroying the control and name ports,
+ * we send all rights off to the memory manager instead,
+ * using memory_object_terminate.
+ */
+
+ vm_object_unlock(object);
+
+ if (object->pager != IP_NULL) {
+ /* consumes our rights for pager, pager_request, pager_name */
+ memory_object_release(object->pager,
+ object->pager_request,
+ object->pager_name);
+ } else if (object->pager_name != IP_NULL) {
+ /* consumes our right for pager_name */
+#if NORMA_VM
+ ipc_port_release_send(object->pager_name);
+#else /* NORMA_VM */
+ ipc_port_dealloc_kernel(object->pager_name);
+#endif /* NORMA_VM */
+ }
+
+#if MACH_PAGEMAP
+ vm_external_destroy(object->existence_info);
+#endif /* MACH_PAGEMAP */
+
+ /*
+ * Free the space for the object.
+ */
+
+ zfree(vm_object_zone, (vm_offset_t) object);
+}
+
+/*
+ * Routine: vm_object_pager_wakeup
+ * Purpose: Wake up anyone waiting for IKOT_PAGER_TERMINATING
+ */
+
+void
+vm_object_pager_wakeup(
+ ipc_port_t pager)
+{
+ boolean_t someone_waiting;
+
+ /*
+ * If anyone was waiting for the memory_object_terminate
+ * to be queued, wake them up now.
+ */
+ vm_object_cache_lock();
+ assert(ip_kotype(pager) == IKOT_PAGER_TERMINATING);
+ someone_waiting = (pager->ip_kobject != IKO_NULL);
+ if (ip_active(pager))
+ ipc_kobject_set(pager, IKO_NULL, IKOT_NONE);
+ vm_object_cache_unlock();
+ if (someone_waiting) {
+ thread_wakeup((event_t) pager);
+ }
+}
+
+/*
+ * Routine: memory_object_release
+ * Purpose: Terminate the pager and release port rights,
+ * just like memory_object_terminate, except
+ * that we wake up anyone blocked in vm_object_enter
+ * waiting for termination message to be queued
+ * before calling memory_object_init.
+ */
+void memory_object_release(
+ ipc_port_t pager,
+ pager_request_t pager_request,
+ ipc_port_t pager_name)
+{
+
+ /*
+ * Keep a reference to pager port;
+ * the terminate might otherwise release all references.
+ */
+ ip_reference(pager);
+
+ /*
+ * Terminate the pager.
+ */
+ (void) memory_object_terminate(pager, pager_request, pager_name);
+
+ /*
+ * Wakeup anyone waiting for this terminate
+ */
+ vm_object_pager_wakeup(pager);
+
+ /*
+ * Release reference to pager port.
+ */
+ ip_release(pager);
+}
+
+/*
+ * Routine: vm_object_abort_activity [internal use only]
+ * Purpose:
+ * Abort paging requests pending on this object.
+ * In/out conditions:
+ * The object is locked on entry and exit.
+ */
+void vm_object_abort_activity(
+ vm_object_t object)
+{
+ register
+ vm_page_t p;
+ vm_page_t next;
+
+ /*
+ * Abort all activity that would be waiting
+ * for a result on this memory object.
+ *
+ * We could also choose to destroy all pages
+ * that we have in memory for this object, but
+ * we don't.
+ */
+
+ p = (vm_page_t) queue_first(&object->memq);
+ while (!queue_end(&object->memq, (queue_entry_t) p)) {
+ next = (vm_page_t) queue_next(&p->listq);
+
+ /*
+ * If it's being paged in, destroy it.
+ * If an unlock has been requested, start it again.
+ */
+
+ if (p->busy && p->absent) {
+ VM_PAGE_FREE(p);
+ }
+ else {
+ if (p->unlock_request != VM_PROT_NONE)
+ p->unlock_request = VM_PROT_NONE;
+ PAGE_WAKEUP(p);
+ }
+
+ p = next;
+ }
+
+ /*
+ * Wake up threads waiting for the memory object to
+ * become ready.
+ */
+
+ object->pager_ready = TRUE;
+ vm_object_wakeup(object, VM_OBJECT_EVENT_PAGER_READY);
+}
+
+/*
+ * Routine: memory_object_destroy [user interface]
+ * Purpose:
+ * Shut down a memory object, despite the
+ * presence of address map (or other) references
+ * to the vm_object.
+ * Note:
+ * This routine may be called either from the user interface,
+ * or from port destruction handling (via vm_object_destroy).
+ */
+kern_return_t memory_object_destroy(
+ register
+ vm_object_t object,
+ kern_return_t reason)
+{
+ ipc_port_t old_object, old_name;
+ pager_request_t old_control;
+
+#ifdef lint
+ reason++;
+#endif /* lint */
+
+ if (object == VM_OBJECT_NULL)
+ return KERN_SUCCESS;
+
+ /*
+ * Remove the port associations immediately.
+ *
+ * This will prevent the memory manager from further
+ * meddling. [If it wanted to flush data or make
+ * other changes, it should have done so before performing
+ * the destroy call.]
+ */
+
+ vm_object_cache_lock();
+ vm_object_lock(object);
+ vm_object_remove(object);
+ object->can_persist = FALSE;
+ vm_object_cache_unlock();
+
+ /*
+ * Rip out the ports from the vm_object now... this
+ * will prevent new memory_object calls from succeeding.
+ */
+
+ old_object = object->pager;
+ object->pager = IP_NULL;
+
+ old_control = object->pager_request;
+ object->pager_request = PAGER_REQUEST_NULL;
+
+ old_name = object->pager_name;
+ object->pager_name = IP_NULL;
+
+
+ /*
+ * Wait for existing paging activity (that might
+ * have the old ports) to subside.
+ */
+
+ vm_object_paging_wait(object, FALSE);
+ vm_object_unlock(object);
+
+ /*
+ * Shut down the ports now.
+ *
+ * [Paging operations may be proceeding concurrently --
+ * they'll get the null values established above.]
+ */
+
+ if (old_object != IP_NULL) {
+ /* consumes our rights for object, control, name */
+ memory_object_release(old_object, old_control,
+ old_name);
+ } else if (old_name != IP_NULL) {
+ /* consumes our right for name */
+#if NORMA_VM
+ ipc_port_release_send(object->pager_name);
+#else /* NORMA_VM */
+ ipc_port_dealloc_kernel(object->pager_name);
+#endif /* NORMA_VM */
+ }
+
+ /*
+ * Lose the reference that was donated for this routine
+ */
+
+ vm_object_deallocate(object);
+
+ return KERN_SUCCESS;
+}
+
+/*
+ * vm_object_deactivate_pages
+ *
+ * Deactivate all pages in the specified object. (Keep its pages
+ * in memory even though it is no longer referenced.)
+ *
+ * The object must be locked.
+ */
+void vm_object_deactivate_pages(
+ register vm_object_t object)
+{
+ register vm_page_t p;
+
+ queue_iterate(&object->memq, p, vm_page_t, listq) {
+ vm_page_lock_queues();
+ if (!p->busy)
+ vm_page_deactivate(p);
+ vm_page_unlock_queues();
+ }
+}
+
+
+/*
+ * Routine: vm_object_pmap_protect
+ *
+ * Purpose:
+ * Reduces the permission for all physical
+ * pages in the specified object range.
+ *
+ * If removing write permission only, it is
+ * sufficient to protect only the pages in
+ * the top-level object; only those pages may
+ * have write permission.
+ *
+ * If removing all access, we must follow the
+ * shadow chain from the top-level object to
+ * remove access to all pages in shadowed objects.
+ *
+ * The object must *not* be locked. The object must
+ * be temporary/internal.
+ *
+ * If pmap is not NULL, this routine assumes that
+ * the only mappings for the pages are in that
+ * pmap.
+ */
+boolean_t vm_object_pmap_protect_by_page = FALSE;
+
+void vm_object_pmap_protect(
+ register vm_object_t object,
+ register vm_offset_t offset,
+ vm_offset_t size,
+ pmap_t pmap,
+ vm_offset_t pmap_start,
+ vm_prot_t prot)
+{
+ if (object == VM_OBJECT_NULL)
+ return;
+
+ vm_object_lock(object);
+
+ assert(object->temporary && object->internal);
+
+ while (TRUE) {
+ if (object->resident_page_count > atop(size) / 2 &&
+ pmap != PMAP_NULL) {
+ vm_object_unlock(object);
+ pmap_protect(pmap, pmap_start, pmap_start + size, prot);
+ return;
+ }
+
+ {
+ register vm_page_t p;
+ register vm_offset_t end;
+
+ end = offset + size;
+
+ queue_iterate(&object->memq, p, vm_page_t, listq) {
+ if (!p->fictitious &&
+ (offset <= p->offset) &&
+ (p->offset < end)) {
+ if ((pmap == PMAP_NULL) ||
+ vm_object_pmap_protect_by_page) {
+ pmap_page_protect(p->phys_addr,
+ prot & ~p->page_lock);
+ } else {
+ vm_offset_t start =
+ pmap_start +
+ (p->offset - offset);
+
+ pmap_protect(pmap,
+ start,
+ start + PAGE_SIZE,
+ prot);
+ }
+ }
+ }
+ }
+
+ if (prot == VM_PROT_NONE) {
+ /*
+ * Must follow shadow chain to remove access
+ * to pages in shadowed objects.
+ */
+ register vm_object_t next_object;
+
+ next_object = object->shadow;
+ if (next_object != VM_OBJECT_NULL) {
+ offset += object->shadow_offset;
+ vm_object_lock(next_object);
+ vm_object_unlock(object);
+ object = next_object;
+ }
+ else {
+ /*
+ * End of chain - we are done.
+ */
+ break;
+ }
+ }
+ else {
+ /*
+ * Pages in shadowed objects may never have
+ * write permission - we may stop here.
+ */
+ break;
+ }
+ }
+
+ vm_object_unlock(object);
+}
+
+/*
+ * vm_object_pmap_remove:
+ *
+ * Removes all physical pages in the specified
+ * object range from all physical maps.
+ *
+ * The object must *not* be locked.
+ */
+void vm_object_pmap_remove(
+ register vm_object_t object,
+ register vm_offset_t start,
+ register vm_offset_t end)
+{
+ register vm_page_t p;
+
+ if (object == VM_OBJECT_NULL)
+ return;
+
+ vm_object_lock(object);
+ queue_iterate(&object->memq, p, vm_page_t, listq) {
+ if (!p->fictitious &&
+ (start <= p->offset) &&
+ (p->offset < end))
+ pmap_page_protect(p->phys_addr, VM_PROT_NONE);
+ }
+ vm_object_unlock(object);
+}
+
+/*
+ * Routine: vm_object_copy_slowly
+ *
+ * Description:
+ * Copy the specified range of the source
+ * virtual memory object without using
+ * protection-based optimizations (such
+ * as copy-on-write). The pages in the
+ * region are actually copied.
+ *
+ * In/out conditions:
+ * The caller must hold a reference and a lock
+ * for the source virtual memory object. The source
+ * object will be returned *unlocked*.
+ *
+ * Results:
+ * If the copy is completed successfully, KERN_SUCCESS is
+ * returned. If the caller asserted the interruptible
+ * argument, and an interruption occurred while waiting
+ * for a user-generated event, MACH_SEND_INTERRUPTED is
+ * returned. Other values may be returned to indicate
+ * hard errors during the copy operation.
+ *
+ * A new virtual memory object is returned in a
+ * parameter (_result_object). The contents of this
+ * new object, starting at a zero offset, are a copy
+ * of the source memory region. In the event of
+ * an error, this parameter will contain the value
+ * VM_OBJECT_NULL.
+ */
+kern_return_t vm_object_copy_slowly(
+ register
+ vm_object_t src_object,
+ vm_offset_t src_offset,
+ vm_size_t size,
+ boolean_t interruptible,
+ vm_object_t *_result_object) /* OUT */
+{
+ vm_object_t new_object;
+ vm_offset_t new_offset;
+
+ if (size == 0) {
+ vm_object_unlock(src_object);
+ *_result_object = VM_OBJECT_NULL;
+ return KERN_INVALID_ARGUMENT;
+ }
+
+ /*
+ * Prevent destruction of the source object while we copy.
+ */
+
+ assert(src_object->ref_count > 0);
+ src_object->ref_count++;
+ vm_object_unlock(src_object);
+
+ /*
+ * Create a new object to hold the copied pages.
+ * A few notes:
+ * We fill the new object starting at offset 0,
+ * regardless of the input offset.
+ * We don't bother to lock the new object within
+ * this routine, since we have the only reference.
+ */
+
+ new_object = vm_object_allocate(size);
+ new_offset = 0;
+
+ assert(size == trunc_page(size)); /* Will the loop terminate? */
+
+ for ( ;
+ size != 0 ;
+ src_offset += PAGE_SIZE, new_offset += PAGE_SIZE, size -= PAGE_SIZE
+ ) {
+ vm_page_t new_page;
+ vm_fault_return_t result;
+
+ while ((new_page = vm_page_alloc(new_object, new_offset))
+ == VM_PAGE_NULL) {
+ VM_PAGE_WAIT((void (*)()) 0);
+ }
+
+ do {
+ vm_prot_t prot = VM_PROT_READ;
+ vm_page_t _result_page;
+ vm_page_t top_page;
+ register
+ vm_page_t result_page;
+
+ vm_object_lock(src_object);
+ src_object->paging_in_progress++;
+
+ result = vm_fault_page(src_object, src_offset,
+ VM_PROT_READ, FALSE, interruptible,
+ &prot, &_result_page, &top_page,
+ FALSE, (void (*)()) 0);
+
+ switch(result) {
+ case VM_FAULT_SUCCESS:
+ result_page = _result_page;
+
+ /*
+ * We don't need to hold the object
+ * lock -- the busy page will be enough.
+ * [We don't care about picking up any
+ * new modifications.]
+ *
+ * Copy the page to the new object.
+ *
+ * POLICY DECISION:
+ * If result_page is clean,
+ * we could steal it instead
+ * of copying.
+ */
+
+ vm_object_unlock(result_page->object);
+ vm_page_copy(result_page, new_page);
+
+ /*
+ * Let go of both pages (make them
+ * not busy, perform wakeup, activate).
+ */
+
+ new_page->busy = FALSE;
+ new_page->dirty = TRUE;
+ vm_object_lock(result_page->object);
+ PAGE_WAKEUP_DONE(result_page);
+
+ vm_page_lock_queues();
+ if (!result_page->active &&
+ !result_page->inactive)
+ vm_page_activate(result_page);
+ vm_page_activate(new_page);
+ vm_page_unlock_queues();
+
+ /*
+ * Release paging references and
+ * top-level placeholder page, if any.
+ */
+
+ vm_fault_cleanup(result_page->object,
+ top_page);
+
+ break;
+
+ case VM_FAULT_RETRY:
+ break;
+
+ case VM_FAULT_MEMORY_SHORTAGE:
+ VM_PAGE_WAIT((void (*)()) 0);
+ break;
+
+ case VM_FAULT_FICTITIOUS_SHORTAGE:
+ vm_page_more_fictitious();
+ break;
+
+ case VM_FAULT_INTERRUPTED:
+ vm_page_free(new_page);
+ vm_object_deallocate(new_object);
+ vm_object_deallocate(src_object);
+ *_result_object = VM_OBJECT_NULL;
+ return MACH_SEND_INTERRUPTED;
+
+ case VM_FAULT_MEMORY_ERROR:
+ /*
+ * A policy choice:
+ * (a) ignore pages that we can't
+ * copy
+ * (b) return the null object if
+ * any page fails [chosen]
+ */
+
+ vm_page_free(new_page);
+ vm_object_deallocate(new_object);
+ vm_object_deallocate(src_object);
+ *_result_object = VM_OBJECT_NULL;
+ return KERN_MEMORY_ERROR;
+ }
+ } while (result != VM_FAULT_SUCCESS);
+ }
+
+ /*
+ * Lose the extra reference, and return our object.
+ */
+
+ vm_object_deallocate(src_object);
+ *_result_object = new_object;
+ return KERN_SUCCESS;
+}
+
+/*
+ * Routine: vm_object_copy_temporary
+ *
+ * Purpose:
+ * Copy the specified range of the source virtual
+ * memory object, if it can be done without blocking.
+ *
+ * Results:
+ * If the copy is successful, the copy is returned in
+ * the arguments; otherwise, the arguments are not
+ * affected.
+ *
+ * In/out conditions:
+ * The object should be unlocked on entry and exit.
+ */
+
+vm_object_t vm_object_copy_delayed(); /* forward declaration */
+
+boolean_t vm_object_copy_temporary(
+ vm_object_t *_object, /* INOUT */
+ vm_offset_t *_offset, /* INOUT */
+ boolean_t *_src_needs_copy, /* OUT */
+ boolean_t *_dst_needs_copy) /* OUT */
+{
+ vm_object_t object = *_object;
+
+#ifdef lint
+ ++*_offset;
+#endif /* lint */
+
+ if (object == VM_OBJECT_NULL) {
+ *_src_needs_copy = FALSE;
+ *_dst_needs_copy = FALSE;
+ return TRUE;
+ }
+
+ /*
+ * If the object is temporary, we can perform
+ * a symmetric copy-on-write without asking.
+ */
+
+ vm_object_lock(object);
+ if (object->temporary) {
+
+ /*
+ * Shared objects use delayed copy
+ */
+ if (object->use_shared_copy) {
+
+ /*
+ * Asymmetric copy strategy. Destination
+ * must be copied (to allow copy object reuse).
+ * Source is unaffected.
+ */
+ vm_object_unlock(object);
+ object = vm_object_copy_delayed(object);
+ *_object = object;
+ *_src_needs_copy = FALSE;
+ *_dst_needs_copy = TRUE;
+ return TRUE;
+ }
+
+ /*
+ * Make another reference to the object.
+ *
+ * Leave object/offset unchanged.
+ */
+
+ assert(object->ref_count > 0);
+ object->ref_count++;
+ object->shadowed = TRUE;
+ vm_object_unlock(object);
+
+ /*
+ * Both source and destination must make
+ * shadows, and the source must be made
+ * read-only if not already.
+ */
+
+ *_src_needs_copy = TRUE;
+ *_dst_needs_copy = TRUE;
+ return TRUE;
+ }
+
+ if (object->pager_ready &&
+ (object->copy_strategy == MEMORY_OBJECT_COPY_DELAY)) {
+ /* XXX Do something intelligent (see temporary code above) */
+ }
+ vm_object_unlock(object);
+
+ return FALSE;
+}
+
+/*
+ * Routine: vm_object_copy_call [internal]
+ *
+ * Description:
+ * Copy the specified (src_offset, size) portion
+ * of the source object (src_object), using the
+ * user-managed copy algorithm.
+ *
+ * In/out conditions:
+ * The source object must be locked on entry. It
+ * will be *unlocked* on exit.
+ *
+ * Results:
+ * If the copy is successful, KERN_SUCCESS is returned.
+ * This routine is interruptible; if a wait for
+ * a user-generated event is interrupted, MACH_SEND_INTERRUPTED
+ * is returned. Other return values indicate hard errors
+ * in creating the user-managed memory object for the copy.
+ *
+ * A new object that represents the copied virtual
+ * memory is returned in a parameter (*_result_object).
+ * If the return value indicates an error, this parameter
+ * is not valid.
+ */
+kern_return_t vm_object_copy_call(
+ vm_object_t src_object,
+ vm_offset_t src_offset,
+ vm_size_t size,
+ vm_object_t *_result_object) /* OUT */
+{
+ vm_offset_t src_end = src_offset + size;
+ ipc_port_t new_memory_object;
+ vm_object_t new_object;
+ vm_page_t p;
+
+ /*
+ * Set the backing object for the new
+ * temporary object.
+ */
+
+ assert(src_object->ref_count > 0);
+ src_object->ref_count++;
+ vm_object_paging_begin(src_object);
+ vm_object_unlock(src_object);
+
+ /*
+ * Create a memory object port to be associated
+ * with this new vm_object.
+ *
+ * Since the kernel has the only rights to this
+ * port, we need not hold the cache lock.
+ *
+ * Since we have the only object reference, we
+ * need not be worried about collapse operations.
+ *
+ */
+
+ new_memory_object = ipc_port_alloc_kernel();
+ if (new_memory_object == IP_NULL) {
+ panic("vm_object_copy_call: allocate memory object port");
+ /* XXX Shouldn't panic here. */
+ }
+
+ /* we hold a naked receive right for new_memory_object */
+ (void) ipc_port_make_send(new_memory_object);
+ /* now we also hold a naked send right for new_memory_object */
+
+ /*
+ * Let the memory manager know that a copy operation
+ * is in progress. Note that we're using the old
+ * memory object's ports (for which we're holding
+ * a paging reference)... the memory manager cannot
+ * yet affect the new memory object.
+ */
+
+ (void) memory_object_copy(src_object->pager,
+ src_object->pager_request,
+ src_offset, size,
+ new_memory_object);
+ /* no longer hold the naked receive right for new_memory_object */
+
+ vm_object_lock(src_object);
+ vm_object_paging_end(src_object);
+
+ /*
+ * Remove write access from all of the pages of
+ * the old memory object that we can.
+ */
+
+ queue_iterate(&src_object->memq, p, vm_page_t, listq) {
+ if (!p->fictitious &&
+ (src_offset <= p->offset) &&
+ (p->offset < src_end) &&
+ !(p->page_lock & VM_PROT_WRITE)) {
+ p->page_lock |= VM_PROT_WRITE;
+ pmap_page_protect(p->phys_addr, VM_PROT_ALL & ~p->page_lock);
+ }
+ }
+
+ vm_object_unlock(src_object);
+
+ /*
+ * Initialize the rest of the paging stuff
+ */
+
+ new_object = vm_object_enter(new_memory_object, size, FALSE);
+ new_object->shadow = src_object;
+ new_object->shadow_offset = src_offset;
+
+ /*
+ * Drop the reference for new_memory_object taken above.
+ */
+
+ ipc_port_release_send(new_memory_object);
+ /* no longer hold the naked send right for new_memory_object */
+
+ *_result_object = new_object;
+ return KERN_SUCCESS;
+}
+
+/*
+ * Routine: vm_object_copy_delayed [internal]
+ *
+ * Description:
+ * Copy the specified virtual memory object, using
+ * the asymmetric copy-on-write algorithm.
+ *
+ * In/out conditions:
+ * The object must be unlocked on entry.
+ *
+ * This routine will not block waiting for user-generated
+ * events. It is not interruptible.
+ */
+vm_object_t vm_object_copy_delayed(
+ vm_object_t src_object)
+{
+ vm_object_t new_copy;
+ vm_object_t old_copy;
+ vm_page_t p;
+
+ /*
+ * The user-level memory manager wants to see
+ * all of the changes to this object, but it
+ * has promised not to make any changes on its own.
+ *
+ * Perform an asymmetric copy-on-write, as follows:
+ * Create a new object, called a "copy object"
+ * to hold pages modified by the new mapping
+ * (i.e., the copy, not the original mapping).
+ * Record the original object as the backing
+ * object for the copy object. If the
+ * original mapping does not change a page,
+ * it may be used read-only by the copy.
+ * Record the copy object in the original
+ * object. When the original mapping causes
+ * a page to be modified, it must be copied
+ * to a new page that is "pushed" to the
+ * copy object.
+ * Mark the new mapping (the copy object)
+ * copy-on-write. This makes the copy
+ * object itself read-only, allowing it
+ * to be reused if the original mapping
+ * makes no changes, and simplifying the
+ * synchronization required in the "push"
+ * operation described above.
+ *
+ * The copy-on-write is said to be assymetric because
+ * the original object is *not* marked copy-on-write.
+ * A copied page is pushed to the copy object, regardless
+ * which party attempted to modify the page.
+ *
+ * Repeated asymmetric copy operations may be done.
+ * If the original object has not been changed since
+ * the last copy, its copy object can be reused.
+ * Otherwise, a new copy object can be inserted
+ * between the original object and its previous
+ * copy object. Since any copy object is read-only,
+ * this cannot affect the contents of the previous copy
+ * object.
+ *
+ * Note that a copy object is higher in the object
+ * tree than the original object; therefore, use of
+ * the copy object recorded in the original object
+ * must be done carefully, to avoid deadlock.
+ */
+
+ /*
+ * Allocate a new copy object before locking, even
+ * though we may not need it later.
+ */
+
+ new_copy = vm_object_allocate(src_object->size);
+
+ vm_object_lock(src_object);
+
+ /*
+ * See whether we can reuse the result of a previous
+ * copy operation.
+ */
+ Retry:
+ old_copy = src_object->copy;
+ if (old_copy != VM_OBJECT_NULL) {
+ /*
+ * Try to get the locks (out of order)
+ */
+ if (!vm_object_lock_try(old_copy)) {
+ vm_object_unlock(src_object);
+
+ simple_lock_pause(); /* wait a bit */
+
+ vm_object_lock(src_object);
+ goto Retry;
+ }
+
+ /*
+ * Determine whether the old copy object has
+ * been modified.
+ */
+
+ if (old_copy->resident_page_count == 0 &&
+ !old_copy->pager_created) {
+ /*
+ * It has not been modified.
+ *
+ * Return another reference to
+ * the existing copy-object.
+ */
+ assert(old_copy->ref_count > 0);
+ old_copy->ref_count++;
+ vm_object_unlock(old_copy);
+ vm_object_unlock(src_object);
+
+ vm_object_deallocate(new_copy);
+
+ return old_copy;
+ }
+
+ /*
+ * The copy-object is always made large enough to
+ * completely shadow the original object, since
+ * it may have several users who want to shadow
+ * the original object at different points.
+ */
+
+ assert((old_copy->shadow == src_object) &&
+ (old_copy->shadow_offset == (vm_offset_t) 0));
+
+ /*
+ * Make the old copy-object shadow the new one.
+ * It will receive no more pages from the original
+ * object.
+ */
+
+ src_object->ref_count--; /* remove ref. from old_copy */
+ assert(src_object->ref_count > 0);
+ old_copy->shadow = new_copy;
+ assert(new_copy->ref_count > 0);
+ new_copy->ref_count++;
+ vm_object_unlock(old_copy); /* done with old_copy */
+ }
+
+ /*
+ * Point the new copy at the existing object.
+ */
+
+ new_copy->shadow = src_object;
+ new_copy->shadow_offset = 0;
+ new_copy->shadowed = TRUE; /* caller must set needs_copy */
+ assert(src_object->ref_count > 0);
+ src_object->ref_count++;
+ src_object->copy = new_copy;
+
+ /*
+ * Mark all pages of the existing object copy-on-write.
+ * This object may have a shadow chain below it, but
+ * those pages will already be marked copy-on-write.
+ */
+
+ queue_iterate(&src_object->memq, p, vm_page_t, listq) {
+ if (!p->fictitious)
+ pmap_page_protect(p->phys_addr,
+ (VM_PROT_ALL & ~VM_PROT_WRITE &
+ ~p->page_lock));
+ }
+
+ vm_object_unlock(src_object);
+
+ return new_copy;
+}
+
+/*
+ * Routine: vm_object_copy_strategically
+ *
+ * Purpose:
+ * Perform a copy according to the source object's
+ * declared strategy. This operation may block,
+ * and may be interrupted.
+ */
+kern_return_t vm_object_copy_strategically(
+ register
+ vm_object_t src_object,
+ vm_offset_t src_offset,
+ vm_size_t size,
+ vm_object_t *dst_object, /* OUT */
+ vm_offset_t *dst_offset, /* OUT */
+ boolean_t *dst_needs_copy) /* OUT */
+{
+ kern_return_t result = KERN_SUCCESS; /* to quiet gcc warnings */
+ boolean_t interruptible = TRUE; /* XXX */
+
+ assert(src_object != VM_OBJECT_NULL);
+
+ vm_object_lock(src_object);
+
+ /* XXX assert(!src_object->temporary); JSB FIXME */
+
+ /*
+ * The copy strategy is only valid if the memory manager
+ * is "ready".
+ */
+
+ while (!src_object->pager_ready) {
+ vm_object_wait( src_object,
+ VM_OBJECT_EVENT_PAGER_READY,
+ interruptible);
+ if (interruptible &&
+ (current_thread()->wait_result != THREAD_AWAKENED)) {
+ *dst_object = VM_OBJECT_NULL;
+ *dst_offset = 0;
+ *dst_needs_copy = FALSE;
+ return MACH_SEND_INTERRUPTED;
+ }
+ vm_object_lock(src_object);
+ }
+
+ /*
+ * The object may be temporary (even though it is external).
+ * If so, do a symmetric copy.
+ */
+
+ if (src_object->temporary) {
+ /*
+ * XXX
+ * This does not count as intelligent!
+ * This buys us the object->temporary optimizations,
+ * but we aren't using a symmetric copy,
+ * which may confuse the vm code. The correct thing
+ * to do here is to figure out what to call to get
+ * a temporary shadowing set up.
+ */
+ src_object->copy_strategy = MEMORY_OBJECT_COPY_DELAY;
+ }
+
+ /*
+ * The object is permanent. Use the appropriate copy strategy.
+ */
+
+ switch (src_object->copy_strategy) {
+ case MEMORY_OBJECT_COPY_NONE:
+ if ((result = vm_object_copy_slowly(
+ src_object,
+ src_offset,
+ size,
+ interruptible,
+ dst_object))
+ == KERN_SUCCESS) {
+ *dst_offset = 0;
+ *dst_needs_copy = FALSE;
+ }
+ break;
+
+ case MEMORY_OBJECT_COPY_CALL:
+ if ((result = vm_object_copy_call(
+ src_object,
+ src_offset,
+ size,
+ dst_object))
+ == KERN_SUCCESS) {
+ *dst_offset = 0;
+ *dst_needs_copy = FALSE;
+ }
+ break;
+
+ case MEMORY_OBJECT_COPY_DELAY:
+ vm_object_unlock(src_object);
+ *dst_object = vm_object_copy_delayed(src_object);
+ *dst_offset = src_offset;
+ *dst_needs_copy = TRUE;
+
+ result = KERN_SUCCESS;
+ break;
+ }
+
+ return result;
+}
+
+/*
+ * vm_object_shadow:
+ *
+ * Create a new object which is backed by the
+ * specified existing object range. The source
+ * object reference is deallocated.
+ *
+ * The new object and offset into that object
+ * are returned in the source parameters.
+ */
+
+void vm_object_shadow(
+ vm_object_t *object, /* IN/OUT */
+ vm_offset_t *offset, /* IN/OUT */
+ vm_size_t length)
+{
+ register vm_object_t source;
+ register vm_object_t result;
+
+ source = *object;
+
+ /*
+ * Allocate a new object with the given length
+ */
+
+ if ((result = vm_object_allocate(length)) == VM_OBJECT_NULL)
+ panic("vm_object_shadow: no object for shadowing");
+
+ /*
+ * The new object shadows the source object, adding
+ * a reference to it. Our caller changes his reference
+ * to point to the new object, removing a reference to
+ * the source object. Net result: no change of reference
+ * count.
+ */
+ result->shadow = source;
+
+ /*
+ * Store the offset into the source object,
+ * and fix up the offset into the new object.
+ */
+
+ result->shadow_offset = *offset;
+
+ /*
+ * Return the new things
+ */
+
+ *offset = 0;
+ *object = result;
+}
+
+/*
+ * The relationship between vm_object structures and
+ * the memory_object ports requires careful synchronization.
+ *
+ * All associations are created by vm_object_enter. All three
+ * port fields are filled in, as follows:
+ * pager: the memory_object port itself, supplied by
+ * the user requesting a mapping (or the kernel,
+ * when initializing internal objects); the
+ * kernel simulates holding send rights by keeping
+ * a port reference;
+ * pager_request:
+ * pager_name:
+ * the memory object control and name ports,
+ * created by the kernel; the kernel holds
+ * receive (and ownership) rights to these
+ * ports, but no other references.
+ * All of the ports are referenced by their global names.
+ *
+ * When initialization is complete, the "initialized" field
+ * is asserted. Other mappings using a particular memory object,
+ * and any references to the vm_object gained through the
+ * port association must wait for this initialization to occur.
+ *
+ * In order to allow the memory manager to set attributes before
+ * requests (notably virtual copy operations, but also data or
+ * unlock requests) are made, a "ready" attribute is made available.
+ * Only the memory manager may affect the value of this attribute.
+ * Its value does not affect critical kernel functions, such as
+ * internal object initialization or destruction. [Furthermore,
+ * memory objects created by the kernel are assumed to be ready
+ * immediately; the default memory manager need not explicitly
+ * set the "ready" attribute.]
+ *
+ * [Both the "initialized" and "ready" attribute wait conditions
+ * use the "pager" field as the wait event.]
+ *
+ * The port associations can be broken down by any of the
+ * following routines:
+ * vm_object_terminate:
+ * No references to the vm_object remain, and
+ * the object cannot (or will not) be cached.
+ * This is the normal case, and is done even
+ * though one of the other cases has already been
+ * done.
+ * vm_object_destroy:
+ * The memory_object port has been destroyed,
+ * meaning that the kernel cannot flush dirty
+ * pages or request new data or unlock existing
+ * data.
+ * memory_object_destroy:
+ * The memory manager has requested that the
+ * kernel relinquish rights to the memory object
+ * port. [The memory manager may not want to
+ * destroy the port, but may wish to refuse or
+ * tear down existing memory mappings.]
+ * Each routine that breaks an association must break all of
+ * them at once. At some later time, that routine must clear
+ * the vm_object port fields and release the port rights.
+ * [Furthermore, each routine must cope with the simultaneous
+ * or previous operations of the others.]
+ *
+ * In addition to the lock on the object, the vm_object_cache_lock
+ * governs the port associations. References gained through the
+ * port association require use of the cache lock.
+ *
+ * Because the port fields may be cleared spontaneously, they
+ * cannot be used to determine whether a memory object has
+ * ever been associated with a particular vm_object. [This
+ * knowledge is important to the shadow object mechanism.]
+ * For this reason, an additional "created" attribute is
+ * provided.
+ *
+ * During various paging operations, the port values found in the
+ * vm_object must be valid. To prevent these port rights from being
+ * released, and to prevent the port associations from changing
+ * (other than being removed, i.e., made null), routines may use
+ * the vm_object_paging_begin/end routines [actually, macros].
+ * The implementation uses the "paging_in_progress" and "wanted" fields.
+ * [Operations that alter the validity of the port values include the
+ * termination routines and vm_object_collapse.]
+ */
+
+vm_object_t vm_object_lookup(
+ ipc_port_t port)
+{
+ vm_object_t object = VM_OBJECT_NULL;
+
+ if (IP_VALID(port)) {
+ ip_lock(port);
+ if (ip_active(port) &&
+#if NORMA_VM
+ (ip_kotype(port) == IKOT_PAGER)) {
+#else /* NORMA_VM */
+ (ip_kotype(port) == IKOT_PAGING_REQUEST)) {
+#endif /* NORMA_VM */
+ vm_object_cache_lock();
+ object = (vm_object_t) port->ip_kobject;
+ vm_object_lock(object);
+
+ assert(object->alive);
+
+ if (object->ref_count == 0) {
+ queue_remove(&vm_object_cached_list, object,
+ vm_object_t, cached_list);
+ vm_object_cached_count--;
+ }
+
+ object->ref_count++;
+ vm_object_unlock(object);
+ vm_object_cache_unlock();
+ }
+ ip_unlock(port);
+ }
+
+ return object;
+}
+
+vm_object_t vm_object_lookup_name(
+ ipc_port_t port)
+{
+ vm_object_t object = VM_OBJECT_NULL;
+
+ if (IP_VALID(port)) {
+ ip_lock(port);
+ if (ip_active(port) &&
+ (ip_kotype(port) == IKOT_PAGING_NAME)) {
+ vm_object_cache_lock();
+ object = (vm_object_t) port->ip_kobject;
+ vm_object_lock(object);
+
+ assert(object->alive);
+
+ if (object->ref_count == 0) {
+ queue_remove(&vm_object_cached_list, object,
+ vm_object_t, cached_list);
+ vm_object_cached_count--;
+ }
+
+ object->ref_count++;
+ vm_object_unlock(object);
+ vm_object_cache_unlock();
+ }
+ ip_unlock(port);
+ }
+
+ return object;
+}
+
+void vm_object_destroy(
+ ipc_port_t pager)
+{
+ vm_object_t object;
+ pager_request_t old_request;
+ ipc_port_t old_name;
+
+ /*
+ * Perform essentially the same operations as in vm_object_lookup,
+ * except that this time we look up based on the memory_object
+ * port, not the control port.
+ */
+ vm_object_cache_lock();
+ if (ip_kotype(pager) != IKOT_PAGER) {
+ vm_object_cache_unlock();
+ return;
+ }
+
+ object = (vm_object_t) pager->ip_kobject;
+ vm_object_lock(object);
+ if (object->ref_count == 0) {
+ queue_remove(&vm_object_cached_list, object,
+ vm_object_t, cached_list);
+ vm_object_cached_count--;
+ }
+ object->ref_count++;
+
+ object->can_persist = FALSE;
+
+ assert(object->pager == pager);
+
+ /*
+ * Remove the port associations.
+ *
+ * Note that the memory_object itself is dead, so
+ * we don't bother with it.
+ */
+
+ object->pager = IP_NULL;
+ vm_object_remove(object);
+
+ old_request = object->pager_request;
+ object->pager_request = PAGER_REQUEST_NULL;
+
+ old_name = object->pager_name;
+ object->pager_name = IP_NULL;
+
+ vm_object_unlock(object);
+ vm_object_cache_unlock();
+
+ /*
+ * Clean up the port references. Note that there's no
+ * point in trying the memory_object_terminate call
+ * because the memory_object itself is dead.
+ */
+
+ ipc_port_release_send(pager);
+#if !NORMA_VM
+ if (old_request != IP_NULL)
+ ipc_port_dealloc_kernel(old_request);
+#endif /* !NORMA_VM */
+ if (old_name != IP_NULL)
+#if NORMA_VM
+ ipc_port_release_send(old_name);
+#else /* NORMA_VM */
+ ipc_port_dealloc_kernel(old_name);
+#endif /* NORMA_VM */
+
+ /*
+ * Restart pending page requests
+ */
+
+ vm_object_abort_activity(object);
+
+ /*
+ * Lose the object reference.
+ */
+
+ vm_object_deallocate(object);
+}
+
+boolean_t vm_object_accept_old_init_protocol = FALSE;
+
+/*
+ * Routine: vm_object_enter
+ * Purpose:
+ * Find a VM object corresponding to the given
+ * pager; if no such object exists, create one,
+ * and initialize the pager.
+ */
+vm_object_t vm_object_enter(
+ ipc_port_t pager,
+ vm_size_t size,
+ boolean_t internal)
+{
+ register
+ vm_object_t object;
+ vm_object_t new_object;
+ boolean_t must_init;
+ ipc_kobject_type_t po;
+
+restart:
+ if (!IP_VALID(pager))
+ return vm_object_allocate(size);
+
+ new_object = VM_OBJECT_NULL;
+ must_init = FALSE;
+
+ /*
+ * Look for an object associated with this port.
+ */
+
+ vm_object_cache_lock();
+ for (;;) {
+ po = ip_kotype(pager);
+
+ /*
+ * If a previous object is being terminated,
+ * we must wait for the termination message
+ * to be queued.
+ *
+ * We set kobject to a non-null value to let the
+ * terminator know that someone is waiting.
+ * Among the possibilities is that the port
+ * could die while we're waiting. Must restart
+ * instead of continuing the loop.
+ */
+
+ if (po == IKOT_PAGER_TERMINATING) {
+ pager->ip_kobject = (ipc_kobject_t) pager;
+ assert_wait((event_t) pager, FALSE);
+ vm_object_cache_unlock();
+ thread_block((void (*)()) 0);
+ goto restart;
+ }
+
+ /*
+ * Bail if there is already a kobject associated
+ * with the pager port.
+ */
+ if (po != IKOT_NONE) {
+ break;
+ }
+
+ /*
+ * We must unlock to create a new object;
+ * if we do so, we must try the lookup again.
+ */
+
+ if (new_object == VM_OBJECT_NULL) {
+ vm_object_cache_unlock();
+ new_object = vm_object_allocate(size);
+ vm_object_cache_lock();
+ } else {
+ /*
+ * Lookup failed twice, and we have something
+ * to insert; set the object.
+ */
+
+ ipc_kobject_set(pager,
+ (ipc_kobject_t) new_object,
+ IKOT_PAGER);
+ new_object = VM_OBJECT_NULL;
+ must_init = TRUE;
+ }
+ }
+
+ if (internal)
+ must_init = TRUE;
+
+ /*
+ * It's only good if it's a VM object!
+ */
+
+ object = (po == IKOT_PAGER) ? (vm_object_t) pager->ip_kobject
+ : VM_OBJECT_NULL;
+
+ if ((object != VM_OBJECT_NULL) && !must_init) {
+ vm_object_lock(object);
+ if (object->ref_count == 0) {
+ queue_remove(&vm_object_cached_list, object,
+ vm_object_t, cached_list);
+ vm_object_cached_count--;
+ }
+ object->ref_count++;
+ vm_object_unlock(object);
+
+ vm_stat.hits++;
+ }
+ assert((object == VM_OBJECT_NULL) || (object->ref_count > 0) ||
+ ((object->paging_in_progress != 0) && internal));
+
+ vm_stat.lookups++;
+
+ vm_object_cache_unlock();
+
+ /*
+ * If we raced to create a vm_object but lost, let's
+ * throw away ours.
+ */
+
+ if (new_object != VM_OBJECT_NULL)
+ vm_object_deallocate(new_object);
+
+ if (object == VM_OBJECT_NULL)
+ return(object);
+
+ if (must_init) {
+ /*
+ * Copy the naked send right we were given.
+ */
+
+ pager = ipc_port_copy_send(pager);
+ if (!IP_VALID(pager))
+ panic("vm_object_enter: port died"); /* XXX */
+
+ object->pager_created = TRUE;
+ object->pager = pager;
+
+#if NORMA_VM
+
+ /*
+ * Let the xmm system know that we want to use the pager.
+ *
+ * Name port will be provided by the xmm system
+ * when set_attributes_common is called.
+ */
+
+ object->internal = internal;
+ object->pager_ready = internal;
+ if (internal) {
+ assert(object->temporary);
+ } else {
+ object->temporary = FALSE;
+ }
+ object->pager_name = IP_NULL;
+
+ (void) xmm_memory_object_init(object);
+#else /* NORMA_VM */
+
+ /*
+ * Allocate request port.
+ */
+
+ object->pager_request = ipc_port_alloc_kernel();
+ if (object->pager_request == IP_NULL)
+ panic("vm_object_enter: pager request alloc");
+
+ ipc_kobject_set(object->pager_request,
+ (ipc_kobject_t) object,
+ IKOT_PAGING_REQUEST);
+
+ /*
+ * Let the pager know we're using it.
+ */
+
+ if (internal) {
+ /* acquire a naked send right for the DMM */
+ ipc_port_t DMM = memory_manager_default_reference();
+
+ /* mark the object internal */
+ object->internal = TRUE;
+ assert(object->temporary);
+
+ /* default-pager objects are ready immediately */
+ object->pager_ready = TRUE;
+
+ /* consumes the naked send right for DMM */
+ (void) memory_object_create(DMM,
+ pager,
+ object->size,
+ object->pager_request,
+ object->pager_name,
+ PAGE_SIZE);
+ } else {
+ /* the object is external and not temporary */
+ object->internal = FALSE;
+ object->temporary = FALSE;
+
+ /* user pager objects are not ready until marked so */
+ object->pager_ready = FALSE;
+
+ (void) memory_object_init(pager,
+ object->pager_request,
+ object->pager_name,
+ PAGE_SIZE);
+
+ }
+#endif /* NORMA_VM */
+
+ vm_object_lock(object);
+ object->pager_initialized = TRUE;
+
+ if (vm_object_accept_old_init_protocol)
+ object->pager_ready = TRUE;
+
+ vm_object_wakeup(object, VM_OBJECT_EVENT_INITIALIZED);
+ } else {
+ vm_object_lock(object);
+ }
+ /*
+ * [At this point, the object must be locked]
+ */
+
+ /*
+ * Wait for the work above to be done by the first
+ * thread to map this object.
+ */
+
+ while (!object->pager_initialized) {
+ vm_object_wait( object,
+ VM_OBJECT_EVENT_INITIALIZED,
+ FALSE);
+ vm_object_lock(object);
+ }
+ vm_object_unlock(object);
+
+ return object;
+}
+
+/*
+ * Routine: vm_object_pager_create
+ * Purpose:
+ * Create a memory object for an internal object.
+ * In/out conditions:
+ * The object is locked on entry and exit;
+ * it may be unlocked within this call.
+ * Limitations:
+ * Only one thread may be performing a
+ * vm_object_pager_create on an object at
+ * a time. Presumably, only the pageout
+ * daemon will be using this routine.
+ */
+void vm_object_pager_create(
+ register
+ vm_object_t object)
+{
+ ipc_port_t pager;
+
+ if (object->pager_created) {
+ /*
+ * Someone else got to it first...
+ * wait for them to finish initializing
+ */
+
+ while (!object->pager_initialized) {
+ vm_object_wait( object,
+ VM_OBJECT_EVENT_PAGER_READY,
+ FALSE);
+ vm_object_lock(object);
+ }
+ return;
+ }
+
+ /*
+ * Indicate that a memory object has been assigned
+ * before dropping the lock, to prevent a race.
+ */
+
+ object->pager_created = TRUE;
+
+ /*
+ * Prevent collapse or termination by
+ * holding a paging reference
+ */
+
+ vm_object_paging_begin(object);
+ vm_object_unlock(object);
+
+#if MACH_PAGEMAP
+ object->existence_info = vm_external_create(
+ object->size +
+ object->paging_offset);
+ assert((object->size + object->paging_offset) >=
+ object->size);
+#endif /* MACH_PAGEMAP */
+
+ /*
+ * Create the pager, and associate with it
+ * this object.
+ *
+ * Note that we only make the port association
+ * so that vm_object_enter can properly look up
+ * the object to complete the initialization...
+ * we do not expect any user to ever map this
+ * object.
+ *
+ * Since the kernel has the only rights to the
+ * port, it's safe to install the association
+ * without holding the cache lock.
+ */
+
+ pager = ipc_port_alloc_kernel();
+ if (pager == IP_NULL)
+ panic("vm_object_pager_create: allocate pager port");
+
+ (void) ipc_port_make_send(pager);
+ ipc_kobject_set(pager, (ipc_kobject_t) object, IKOT_PAGER);
+
+ /*
+ * Initialize the rest of the paging stuff
+ */
+
+ if (vm_object_enter(pager, object->size, TRUE) != object)
+ panic("vm_object_pager_create: mismatch");
+
+ /*
+ * Drop the naked send right taken above.
+ */
+
+ ipc_port_release_send(pager);
+
+ /*
+ * Release the paging reference
+ */
+
+ vm_object_lock(object);
+ vm_object_paging_end(object);
+}
+
+/*
+ * Routine: vm_object_remove
+ * Purpose:
+ * Eliminate the pager/object association
+ * for this pager.
+ * Conditions:
+ * The object cache must be locked.
+ */
+void vm_object_remove(
+ vm_object_t object)
+{
+ ipc_port_t port;
+
+ if ((port = object->pager) != IP_NULL) {
+ if (ip_kotype(port) == IKOT_PAGER)
+ ipc_kobject_set(port, IKO_NULL,
+ IKOT_PAGER_TERMINATING);
+ else if (ip_kotype(port) != IKOT_NONE)
+ panic("vm_object_remove: bad object port");
+ }
+#if !NORMA_VM
+ if ((port = object->pager_request) != IP_NULL) {
+ if (ip_kotype(port) == IKOT_PAGING_REQUEST)
+ ipc_kobject_set(port, IKO_NULL, IKOT_NONE);
+ else if (ip_kotype(port) != IKOT_NONE)
+ panic("vm_object_remove: bad request port");
+ }
+ if ((port = object->pager_name) != IP_NULL) {
+ if (ip_kotype(port) == IKOT_PAGING_NAME)
+ ipc_kobject_set(port, IKO_NULL, IKOT_NONE);
+ else if (ip_kotype(port) != IKOT_NONE)
+ panic("vm_object_remove: bad name port");
+ }
+#endif /* !NORMA_VM */
+}
+
+/*
+ * Global variables for vm_object_collapse():
+ *
+ * Counts for normal collapses and bypasses.
+ * Debugging variables, to watch or disable collapse.
+ */
+long object_collapses = 0;
+long object_bypasses = 0;
+
+int vm_object_collapse_debug = 0;
+boolean_t vm_object_collapse_allowed = TRUE;
+boolean_t vm_object_collapse_bypass_allowed = TRUE;
+
+/*
+ * vm_object_collapse:
+ *
+ * Collapse an object with the object backing it.
+ * Pages in the backing object are moved into the
+ * parent, and the backing object is deallocated.
+ *
+ * Requires that the object be locked and the page
+ * queues be unlocked. May unlock/relock the object,
+ * so the caller should hold a reference for the object.
+ */
+void vm_object_collapse(
+ register vm_object_t object)
+{
+ register vm_object_t backing_object;
+ register vm_offset_t backing_offset;
+ register vm_size_t size;
+ register vm_offset_t new_offset;
+ register vm_page_t p, pp;
+ ipc_port_t old_name_port;
+
+ if (!vm_object_collapse_allowed)
+ return;
+
+ while (TRUE) {
+ /*
+ * Verify that the conditions are right for collapse:
+ *
+ * The object exists and no pages in it are currently
+ * being paged out (or have ever been paged out).
+ *
+ * This check is probably overkill -- if a memory
+ * object has not been created, the fault handler
+ * shouldn't release the object lock while paging
+ * is in progress or absent pages exist.
+ */
+ if (object == VM_OBJECT_NULL ||
+ object->pager_created ||
+ object->paging_in_progress != 0 ||
+ object->absent_count != 0)
+ return;
+
+ /*
+ * There is a backing object, and
+ */
+
+ if ((backing_object = object->shadow) == VM_OBJECT_NULL)
+ return;
+
+ vm_object_lock(backing_object);
+ /*
+ * ...
+ * The backing object is not read_only,
+ * and no pages in the backing object are
+ * currently being paged out.
+ * The backing object is internal.
+ *
+ * XXX It may be sufficient for the backing
+ * XXX object to be temporary.
+ */
+
+ if (!backing_object->internal ||
+ backing_object->paging_in_progress != 0) {
+ vm_object_unlock(backing_object);
+ return;
+ }
+
+ /*
+ * The backing object can't be a copy-object:
+ * the shadow_offset for the copy-object must stay
+ * as 0. Furthermore (for the 'we have all the
+ * pages' case), if we bypass backing_object and
+ * just shadow the next object in the chain, old
+ * pages from that object would then have to be copied
+ * BOTH into the (former) backing_object and into the
+ * parent object.
+ */
+ if (backing_object->shadow != VM_OBJECT_NULL &&
+ backing_object->shadow->copy != VM_OBJECT_NULL) {
+ vm_object_unlock(backing_object);
+ return;
+ }
+
+ /*
+ * We know that we can either collapse the backing
+ * object (if the parent is the only reference to
+ * it) or (perhaps) remove the parent's reference
+ * to it.
+ */
+
+ backing_offset = object->shadow_offset;
+ size = object->size;
+
+ /*
+ * If there is exactly one reference to the backing
+ * object, we can collapse it into the parent.
+ */
+
+ if (backing_object->ref_count == 1) {
+ if (!vm_object_cache_lock_try()) {
+ vm_object_unlock(backing_object);
+ return;
+ }
+
+ /*
+ * We can collapse the backing object.
+ *
+ * Move all in-memory pages from backing_object
+ * to the parent. Pages that have been paged out
+ * will be overwritten by any of the parent's
+ * pages that shadow them.
+ */
+
+ while (!queue_empty(&backing_object->memq)) {
+
+ p = (vm_page_t)
+ queue_first(&backing_object->memq);
+
+ new_offset = (p->offset - backing_offset);
+
+ assert(!p->busy || p->absent);
+
+ /*
+ * If the parent has a page here, or if
+ * this page falls outside the parent,
+ * dispose of it.
+ *
+ * Otherwise, move it as planned.
+ */
+
+ if (p->offset < backing_offset ||
+ new_offset >= size) {
+ vm_page_lock_queues();
+ vm_page_free(p);
+ vm_page_unlock_queues();
+ } else {
+ pp = vm_page_lookup(object, new_offset);
+ if (pp != VM_PAGE_NULL && !pp->absent) {
+ /*
+ * Parent object has a real page.
+ * Throw away the backing object's
+ * page.
+ */
+ vm_page_lock_queues();
+ vm_page_free(p);
+ vm_page_unlock_queues();
+ }
+ else {
+ if (pp != VM_PAGE_NULL) {
+ /*
+ * Parent has an absent page...
+ * it's not being paged in, so
+ * it must really be missing from
+ * the parent.
+ *
+ * Throw out the absent page...
+ * any faults looking for that
+ * page will restart with the new
+ * one.
+ */
+
+ /*
+ * This should never happen -- the
+ * parent cannot have ever had an
+ * external memory object, and thus
+ * cannot have absent pages.
+ */
+ panic("vm_object_collapse: bad case");
+
+ vm_page_lock_queues();
+ vm_page_free(pp);
+ vm_page_unlock_queues();
+
+ /*
+ * Fall through to move the backing
+ * object's page up.
+ */
+ }
+ /*
+ * Parent now has no page.
+ * Move the backing object's page up.
+ */
+ vm_page_rename(p, object, new_offset);
+ }
+ }
+ }
+
+ /*
+ * Move the pager from backing_object to object.
+ *
+ * XXX We're only using part of the paging space
+ * for keeps now... we ought to discard the
+ * unused portion.
+ */
+
+ switch (vm_object_collapse_debug) {
+ case 0:
+ break;
+ case 1:
+ if ((backing_object->pager == IP_NULL) &&
+ (backing_object->pager_request ==
+ PAGER_REQUEST_NULL))
+ break;
+ /* Fall through to... */
+
+ default:
+ printf("vm_object_collapse: %#x (pager %#x, request %#x) up to %#x\n",
+ backing_object, backing_object->pager, backing_object->pager_request,
+ object);
+ if (vm_object_collapse_debug > 2)
+ Debugger("vm_object_collapse");
+ }
+
+ object->pager = backing_object->pager;
+ if (object->pager != IP_NULL)
+ ipc_kobject_set(object->pager,
+ (ipc_kobject_t) object,
+ IKOT_PAGER);
+ object->pager_initialized = backing_object->pager_initialized;
+ object->pager_ready = backing_object->pager_ready;
+ object->pager_created = backing_object->pager_created;
+
+ object->pager_request = backing_object->pager_request;
+#if NORMA_VM
+ old_name_port = object->pager_name;
+ object->pager_name = backing_object->pager_name;
+#else /* NORMA_VM */
+ if (object->pager_request != IP_NULL)
+ ipc_kobject_set(object->pager_request,
+ (ipc_kobject_t) object,
+ IKOT_PAGING_REQUEST);
+ old_name_port = object->pager_name;
+ if (old_name_port != IP_NULL)
+ ipc_kobject_set(old_name_port,
+ IKO_NULL, IKOT_NONE);
+ object->pager_name = backing_object->pager_name;
+ if (object->pager_name != IP_NULL)
+ ipc_kobject_set(object->pager_name,
+ (ipc_kobject_t) object,
+ IKOT_PAGING_NAME);
+#endif /* NORMA_VM */
+
+ vm_object_cache_unlock();
+
+ /*
+ * If there is no pager, leave paging-offset alone.
+ */
+ if (object->pager != IP_NULL)
+ object->paging_offset =
+ backing_object->paging_offset +
+ backing_offset;
+
+#if MACH_PAGEMAP
+ assert(object->existence_info == VM_EXTERNAL_NULL);
+ object->existence_info = backing_object->existence_info;
+#endif /* MACH_PAGEMAP */
+
+ /*
+ * Object now shadows whatever backing_object did.
+ * Note that the reference to backing_object->shadow
+ * moves from within backing_object to within object.
+ */
+
+ object->shadow = backing_object->shadow;
+ object->shadow_offset += backing_object->shadow_offset;
+ if (object->shadow != VM_OBJECT_NULL &&
+ object->shadow->copy != VM_OBJECT_NULL) {
+ panic("vm_object_collapse: we collapsed a copy-object!");
+ }
+ /*
+ * Discard backing_object.
+ *
+ * Since the backing object has no pages, no
+ * pager left, and no object references within it,
+ * all that is necessary is to dispose of it.
+ */
+
+ assert(
+ (backing_object->ref_count == 1) &&
+ (backing_object->resident_page_count == 0) &&
+ (backing_object->paging_in_progress == 0)
+ );
+
+ assert(backing_object->alive);
+ backing_object->alive = FALSE;
+ vm_object_unlock(backing_object);
+
+ vm_object_unlock(object);
+ if (old_name_port != IP_NULL)
+#if NORMA_VM
+ ipc_port_release_send(old_name_port);
+#else /* NORMA_VM */
+ ipc_port_dealloc_kernel(old_name_port);
+#endif /* NORMA_VM */
+ zfree(vm_object_zone, (vm_offset_t) backing_object);
+ vm_object_lock(object);
+
+ object_collapses++;
+ }
+ else {
+ if (!vm_object_collapse_bypass_allowed) {
+ vm_object_unlock(backing_object);
+ return;
+ }
+
+ /*
+ * If all of the pages in the backing object are
+ * shadowed by the parent object, the parent
+ * object no longer has to shadow the backing
+ * object; it can shadow the next one in the
+ * chain.
+ *
+ * The backing object must not be paged out - we'd
+ * have to check all of the paged-out pages, as
+ * well.
+ */
+
+ if (backing_object->pager_created) {
+ vm_object_unlock(backing_object);
+ return;
+ }
+
+ /*
+ * Should have a check for a 'small' number
+ * of pages here.
+ */
+
+ queue_iterate(&backing_object->memq, p,
+ vm_page_t, listq)
+ {
+ new_offset = (p->offset - backing_offset);
+
+ /*
+ * If the parent has a page here, or if
+ * this page falls outside the parent,
+ * keep going.
+ *
+ * Otherwise, the backing_object must be
+ * left in the chain.
+ */
+
+ if (p->offset >= backing_offset &&
+ new_offset <= size &&
+ (pp = vm_page_lookup(object, new_offset))
+ == VM_PAGE_NULL) {
+ /*
+ * Page still needed.
+ * Can't go any further.
+ */
+ vm_object_unlock(backing_object);
+ return;
+ }
+ }
+
+ /*
+ * Make the parent shadow the next object
+ * in the chain. Deallocating backing_object
+ * will not remove it, since its reference
+ * count is at least 2.
+ */
+
+ vm_object_reference(object->shadow = backing_object->shadow);
+ object->shadow_offset += backing_object->shadow_offset;
+
+ /*
+ * Backing object might have had a copy pointer
+ * to us. If it did, clear it.
+ */
+ if (backing_object->copy == object)
+ backing_object->copy = VM_OBJECT_NULL;
+
+ /*
+ * Drop the reference count on backing_object.
+ * Since its ref_count was at least 2, it
+ * will not vanish; so we don't need to call
+ * vm_object_deallocate.
+ */
+ backing_object->ref_count--;
+ assert(backing_object->ref_count > 0);
+ vm_object_unlock(backing_object);
+
+ object_bypasses ++;
+
+ }
+
+ /*
+ * Try again with this object's new backing object.
+ */
+ }
+}
+
+/*
+ * Routine: vm_object_page_remove: [internal]
+ * Purpose:
+ * Removes all physical pages in the specified
+ * object range from the object's list of pages.
+ *
+ * In/out conditions:
+ * The object must be locked.
+ */
+unsigned int vm_object_page_remove_lookup = 0;
+unsigned int vm_object_page_remove_iterate = 0;
+
+void vm_object_page_remove(
+ register vm_object_t object,
+ register vm_offset_t start,
+ register vm_offset_t end)
+{
+ register vm_page_t p, next;
+
+ /*
+ * One and two page removals are most popular.
+ * The factor of 16 here is somewhat arbitrary.
+ * It balances vm_object_lookup vs iteration.
+ */
+
+ if (atop(end - start) < (unsigned)object->resident_page_count/16) {
+ vm_object_page_remove_lookup++;
+
+ for (; start < end; start += PAGE_SIZE) {
+ p = vm_page_lookup(object, start);
+ if (p != VM_PAGE_NULL) {
+ if (!p->fictitious)
+ pmap_page_protect(p->phys_addr,
+ VM_PROT_NONE);
+ vm_page_lock_queues();
+ vm_page_free(p);
+ vm_page_unlock_queues();
+ }
+ }
+ } else {
+ vm_object_page_remove_iterate++;
+
+ p = (vm_page_t) queue_first(&object->memq);
+ while (!queue_end(&object->memq, (queue_entry_t) p)) {
+ next = (vm_page_t) queue_next(&p->listq);
+ if ((start <= p->offset) && (p->offset < end)) {
+ if (!p->fictitious)
+ pmap_page_protect(p->phys_addr,
+ VM_PROT_NONE);
+ vm_page_lock_queues();
+ vm_page_free(p);
+ vm_page_unlock_queues();
+ }
+ p = next;
+ }
+ }
+}
+
+/*
+ * Routine: vm_object_coalesce
+ * Function: Coalesces two objects backing up adjoining
+ * regions of memory into a single object.
+ *
+ * returns TRUE if objects were combined.
+ *
+ * NOTE: Only works at the moment if the second object is NULL -
+ * if it's not, which object do we lock first?
+ *
+ * Parameters:
+ * prev_object First object to coalesce
+ * prev_offset Offset into prev_object
+ * next_object Second object into coalesce
+ * next_offset Offset into next_object
+ *
+ * prev_size Size of reference to prev_object
+ * next_size Size of reference to next_object
+ *
+ * Conditions:
+ * The object must *not* be locked.
+ */
+
+boolean_t vm_object_coalesce(
+ register vm_object_t prev_object,
+ vm_object_t next_object,
+ vm_offset_t prev_offset,
+ vm_offset_t next_offset,
+ vm_size_t prev_size,
+ vm_size_t next_size)
+{
+ vm_size_t newsize;
+
+#ifdef lint
+ next_offset++;
+#endif /* lint */
+
+ if (next_object != VM_OBJECT_NULL) {
+ return FALSE;
+ }
+
+ if (prev_object == VM_OBJECT_NULL) {
+ return TRUE;
+ }
+
+ vm_object_lock(prev_object);
+
+ /*
+ * Try to collapse the object first
+ */
+ vm_object_collapse(prev_object);
+
+ /*
+ * Can't coalesce if pages not mapped to
+ * prev_entry may be in use anyway:
+ * . more than one reference
+ * . paged out
+ * . shadows another object
+ * . has a copy elsewhere
+ * . paging references (pages might be in page-list)
+ */
+
+ if ((prev_object->ref_count > 1) ||
+ prev_object->pager_created ||
+ (prev_object->shadow != VM_OBJECT_NULL) ||
+ (prev_object->copy != VM_OBJECT_NULL) ||
+ (prev_object->paging_in_progress != 0)) {
+ vm_object_unlock(prev_object);
+ return FALSE;
+ }
+
+ /*
+ * Remove any pages that may still be in the object from
+ * a previous deallocation.
+ */
+
+ vm_object_page_remove(prev_object,
+ prev_offset + prev_size,
+ prev_offset + prev_size + next_size);
+
+ /*
+ * Extend the object if necessary.
+ */
+ newsize = prev_offset + prev_size + next_size;
+ if (newsize > prev_object->size)
+ prev_object->size = newsize;
+
+ vm_object_unlock(prev_object);
+ return TRUE;
+}
+
+vm_object_t vm_object_request_object(
+ ipc_port_t p)
+{
+ return vm_object_lookup(p);
+}
+
+/*
+ * Routine: vm_object_name
+ * Purpose:
+ * Returns a naked send right to the "name" port associated
+ * with this object.
+ */
+ipc_port_t vm_object_name(
+ vm_object_t object)
+{
+ ipc_port_t p;
+
+ if (object == VM_OBJECT_NULL)
+ return IP_NULL;
+
+ vm_object_lock(object);
+
+ while (object->shadow != VM_OBJECT_NULL) {
+ vm_object_t new_object = object->shadow;
+ vm_object_lock(new_object);
+ vm_object_unlock(object);
+ object = new_object;
+ }
+
+ p = object->pager_name;
+ if (p != IP_NULL)
+#if NORMA_VM
+ p = ipc_port_copy_send(p);
+#else /* NORMA_VM */
+ p = ipc_port_make_send(p);
+#endif /* NORMA_VM */
+ vm_object_unlock(object);
+
+ return p;
+}
+
+/*
+ * Attach a set of physical pages to an object, so that they can
+ * be mapped by mapping the object. Typically used to map IO memory.
+ *
+ * The mapping function and its private data are used to obtain the
+ * physical addresses for each page to be mapped.
+ */
+void
+vm_object_page_map(
+ vm_object_t object,
+ vm_offset_t offset,
+ vm_size_t size,
+ vm_offset_t (*map_fn)(void *, vm_offset_t),
+ void * map_fn_data) /* private to map_fn */
+{
+ int num_pages;
+ int i;
+ vm_page_t m;
+ vm_page_t old_page;
+ vm_offset_t addr;
+
+ num_pages = atop(size);
+
+ for (i = 0; i < num_pages; i++, offset += PAGE_SIZE) {
+
+ addr = (*map_fn)(map_fn_data, offset);
+
+ while ((m = vm_page_grab_fictitious()) == VM_PAGE_NULL)
+ vm_page_more_fictitious();
+
+ vm_object_lock(object);
+ if ((old_page = vm_page_lookup(object, offset))
+ != VM_PAGE_NULL)
+ {
+ vm_page_lock_queues();
+ vm_page_free(old_page);
+ vm_page_unlock_queues();
+ }
+
+ vm_page_init(m, addr);
+ m->private = TRUE; /* don`t free page */
+ m->wire_count = 1;
+ vm_page_lock_queues();
+ vm_page_insert(m, object, offset);
+ vm_page_unlock_queues();
+
+ PAGE_WAKEUP_DONE(m);
+ vm_object_unlock(object);
+ }
+}
+
+#include <mach_kdb.h>
+
+
+#if MACH_KDB
+#define printf kdbprintf
+
+boolean_t vm_object_print_pages = FALSE;
+
+/*
+ * vm_object_print: [ debug ]
+ */
+void vm_object_print(
+ vm_object_t object)
+{
+ register vm_page_t p;
+ extern indent;
+
+ register int count;
+
+ if (object == VM_OBJECT_NULL)
+ return;
+
+ iprintf("Object 0x%X: size=0x%X",
+ (vm_offset_t) object, (vm_offset_t) object->size);
+ printf(", %d references, %d resident pages,", object->ref_count,
+ object->resident_page_count);
+ printf(" %d absent pages,", object->absent_count);
+ printf(" %d paging ops\n", object->paging_in_progress);
+ indent += 2;
+ iprintf("memory object=0x%X (offset=0x%X),",
+ (vm_offset_t) object->pager, (vm_offset_t) object->paging_offset);
+ printf("control=0x%X, name=0x%X\n",
+ (vm_offset_t) object->pager_request, (vm_offset_t) object->pager_name);
+ iprintf("%s%s",
+ object->pager_ready ? " ready" : "",
+ object->pager_created ? " created" : "");
+ printf("%s,%s ",
+ object->pager_initialized ? "" : "uninitialized",
+ object->temporary ? "temporary" : "permanent");
+ printf("%s%s,",
+ object->internal ? "internal" : "external",
+ object->can_persist ? " cacheable" : "");
+ printf("copy_strategy=%d\n", (vm_offset_t)object->copy_strategy);
+ iprintf("shadow=0x%X (offset=0x%X),",
+ (vm_offset_t) object->shadow, (vm_offset_t) object->shadow_offset);
+ printf("copy=0x%X\n", (vm_offset_t) object->copy);
+
+ indent += 2;
+
+ if (vm_object_print_pages) {
+ count = 0;
+ p = (vm_page_t) queue_first(&object->memq);
+ while (!queue_end(&object->memq, (queue_entry_t) p)) {
+ if (count == 0) iprintf("memory:=");
+ else if (count == 4) {printf("\n"); iprintf(" ..."); count = 0;}
+ else printf(",");
+ count++;
+
+ printf("(off=0x%X,page=0x%X)", p->offset, (vm_offset_t) p);
+ p = (vm_page_t) queue_next(&p->listq);
+ }
+ if (count != 0)
+ printf("\n");
+ }
+ indent -= 4;
+}
+
+#endif /* MACH_KDB */
diff --git a/vm/vm_object.h b/vm/vm_object.h
new file mode 100644
index 00000000..d3d050a0
--- /dev/null
+++ b/vm/vm_object.h
@@ -0,0 +1,374 @@
+/*
+ * Mach Operating System
+ * Copyright (c) 1993-1987 Carnegie Mellon University
+ * All Rights Reserved.
+ *
+ * Permission to use, copy, modify and distribute this software and its
+ * documentation is hereby granted, provided that both the copyright
+ * notice and this permission notice appear in all copies of the
+ * software, derivative works or modified versions, and any portions
+ * thereof, and that both notices appear in supporting documentation.
+ *
+ * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
+ * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
+ * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
+ *
+ * Carnegie Mellon requests users of this software to return to
+ *
+ * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
+ * School of Computer Science
+ * Carnegie Mellon University
+ * Pittsburgh PA 15213-3890
+ *
+ * any improvements or extensions that they make and grant Carnegie Mellon
+ * the rights to redistribute these changes.
+ */
+/*
+ * File: vm_object.h
+ * Author: Avadis Tevanian, Jr., Michael Wayne Young
+ * Date: 1985
+ *
+ * Virtual memory object module definitions.
+ */
+
+#ifndef _VM_VM_OBJECT_H_
+#define _VM_VM_OBJECT_H_
+
+#include <mach_pagemap.h>
+#include <norma_vm.h>
+
+#include <mach/kern_return.h>
+#include <mach/boolean.h>
+#include <mach/memory_object.h>
+#include <mach/port.h>
+#include <mach/vm_prot.h>
+#include <mach/machine/vm_types.h>
+#include <kern/queue.h>
+#include <kern/lock.h>
+#include <kern/assert.h>
+#include <kern/macro_help.h>
+#include <vm/pmap.h>
+
+#if MACH_PAGEMAP
+#include <vm/vm_external.h>
+#endif /* MACH_PAGEMAP */
+
+#if NORMA_VM
+typedef struct xmm_obj * pager_request_t;
+#else /* NORMA_VM */
+typedef struct ipc_port * pager_request_t;
+#endif /* NORMA_VM */
+#define PAGER_REQUEST_NULL ((pager_request_t) 0)
+
+/*
+ * Types defined:
+ *
+ * vm_object_t Virtual memory object.
+ *
+ * We use "struct ipc_port *" instead of "ipc_port_t"
+ * to avoid include file circularities.
+ */
+
+struct vm_object {
+ queue_chain_t memq; /* Resident memory */
+ decl_simple_lock_data(, Lock) /* Synchronization */
+#if VM_OBJECT_DEBUG
+ thread_t LockHolder; /* Thread holding Lock */
+#endif VM_OBJECT_DEBUG
+ vm_size_t size; /* Object size (only valid
+ * if internal)
+ */
+
+ short ref_count; /* Number of references */
+ short resident_page_count;
+ /* number of resident pages */
+
+ struct vm_object *copy; /* Object that should receive
+ * a copy of my changed pages
+ */
+ struct vm_object *shadow; /* My shadow */
+ vm_offset_t shadow_offset; /* Offset into shadow */
+
+ struct ipc_port *pager; /* Where to get data */
+ vm_offset_t paging_offset; /* Offset into memory object */
+ pager_request_t pager_request; /* Where data comes back */
+ struct ipc_port *pager_name; /* How to identify region */
+
+ memory_object_copy_strategy_t
+ copy_strategy; /* How to handle data copy */
+
+ unsigned int
+ absent_count; /* The number of pages that
+ * have been requested but
+ * not filled. That is, the
+ * number of pages for which
+ * the "absent" attribute is
+ * asserted.
+ */
+
+ unsigned int /* boolean_t array */
+ all_wanted; /* Bit array of "want to be
+ * awakened" notations. See
+ * VM_OBJECT_EVENT_* items
+ * below
+ */
+
+ unsigned int
+ paging_in_progress:16,
+ /* The memory object ports are
+ * being used (e.g., for pagein
+ * or pageout) -- don't change any
+ * of these fields (i.e., don't
+ * collapse, destroy or terminate)
+ */
+ /* boolean_t */ pager_created:1,/* Has pager ever been created? */
+ /* boolean_t */ pager_initialized:1,/* Are fields ready to use? */
+ /* boolean_t */ pager_ready:1, /* Will manager take requests? */
+
+ /* boolean_t */ can_persist:1, /* The kernel may keep the data
+ * for this object (and rights to
+ * the memory object) after all
+ * address map references are
+ * deallocated?
+ */
+ /* boolean_t */ internal:1, /* Created by the kernel (and
+ * therefore, managed by the
+ * default memory manger)
+ */
+ /* boolean_t */ temporary:1, /* Permanent objects may be changed
+ * externally by the memory manager,
+ * and changes made in memory must
+ * be reflected back to the memory
+ * manager. Temporary objects lack
+ * both of these characteristics.
+ */
+ /* boolean_t */ alive:1, /* Not yet terminated (debug) */
+ /* boolean_t */ lock_in_progress : 1,
+ /* Is a multi-page lock
+ * request in progress?
+ */
+ /* boolean_t */ lock_restart : 1,
+ /* Should lock request in
+ * progress restart search?
+ */
+ /* boolean_t */ use_old_pageout : 1,
+ /* Use old pageout primitives?
+ */
+ /* boolean_t */ use_shared_copy : 1,/* Use shared (i.e.,
+ * delayed) copy on write */
+ /* boolean_t */ shadowed: 1; /* Shadow may exist */
+
+ queue_chain_t cached_list; /* Attachment point for the list
+ * of objects cached as a result
+ * of their can_persist value
+ */
+ vm_offset_t last_alloc; /* last allocation offset */
+#if MACH_PAGEMAP
+ vm_external_t existence_info;
+#endif /* MACH_PAGEMAP */
+};
+
+typedef struct vm_object *vm_object_t;
+#define VM_OBJECT_NULL ((vm_object_t) 0)
+
+extern
+vm_object_t kernel_object; /* the single kernel object */
+
+/*
+ * Declare procedures that operate on VM objects.
+ */
+
+extern void vm_object_bootstrap(void);
+extern void vm_object_init(void);
+extern void vm_object_terminate(vm_object_t);
+extern vm_object_t vm_object_allocate(vm_size_t);
+extern void vm_object_reference(vm_object_t);
+extern void vm_object_deallocate(vm_object_t);
+extern void vm_object_pmap_protect(
+ vm_object_t object,
+ vm_offset_t offset,
+ vm_size_t size,
+ pmap_t pmap,
+ vm_offset_t pmap_start,
+ vm_prot_t prot);
+extern void vm_object_pmap_remove(
+ vm_object_t object,
+ vm_offset_t start,
+ vm_offset_t end);
+extern void vm_object_page_remove(
+ vm_object_t object,
+ vm_offset_t start,
+ vm_offset_t end);
+extern void vm_object_shadow(
+ vm_object_t *object, /* in/out */
+ vm_offset_t *offset, /* in/out */
+ vm_size_t length);
+extern void vm_object_collapse(vm_object_t);
+extern vm_object_t vm_object_lookup(struct ipc_port *);
+extern vm_object_t vm_object_lookup_name(struct ipc_port *);
+extern struct ipc_port *vm_object_name(vm_object_t);
+extern void vm_object_remove(vm_object_t);
+
+extern boolean_t vm_object_copy_temporary(
+ vm_object_t *_object, /* in/out */
+ vm_offset_t *_offset, /* in/out */
+ boolean_t *_src_needs_copy, /* out */
+ boolean_t *_dst_needs_copy); /* out */
+extern kern_return_t vm_object_copy_strategically(
+ vm_object_t src_object,
+ vm_offset_t src_offset,
+ vm_size_t size,
+ vm_object_t *dst_object, /* out */
+ vm_offset_t *dst_offset, /* out */
+ boolean_t *dst_needs_copy); /* out */
+extern kern_return_t vm_object_copy_slowly(
+ vm_object_t src_object,
+ vm_offset_t src_offset,
+ vm_size_t size,
+ boolean_t interruptible,
+ vm_object_t *_result_object); /* out */
+
+extern vm_object_t vm_object_enter(
+ struct ipc_port *pager,
+ vm_size_t size,
+ boolean_t internal);
+extern void vm_object_pager_create(
+ vm_object_t object);
+extern void vm_object_destroy(
+ struct ipc_port *pager);
+
+extern void vm_object_page_map(
+ vm_object_t,
+ vm_offset_t,
+ vm_size_t,
+ vm_offset_t (*)(void *, vm_offset_t),
+ void *);
+
+extern void vm_object_print(vm_object_t);
+
+extern vm_object_t vm_object_request_object(struct ipc_port *);
+
+/*
+ * Event waiting handling
+ */
+
+#define VM_OBJECT_EVENT_INITIALIZED 0
+#define VM_OBJECT_EVENT_PAGER_READY 1
+#define VM_OBJECT_EVENT_PAGING_IN_PROGRESS 2
+#define VM_OBJECT_EVENT_ABSENT_COUNT 3
+#define VM_OBJECT_EVENT_LOCK_IN_PROGRESS 4
+
+#define vm_object_wait(object, event, interruptible) \
+ MACRO_BEGIN \
+ (object)->all_wanted |= 1 << (event); \
+ vm_object_sleep(((vm_offset_t) object) + (event), \
+ (object), \
+ (interruptible)); \
+ MACRO_END
+
+#define vm_object_assert_wait(object, event, interruptible) \
+ MACRO_BEGIN \
+ (object)->all_wanted |= 1 << (event); \
+ assert_wait((event_t)(((vm_offset_t) object) + (event)), (interruptible)); \
+ MACRO_END
+
+#define vm_object_wakeup(object, event) \
+ MACRO_BEGIN \
+ if ((object)->all_wanted & (1 << (event))) \
+ thread_wakeup((event_t)(((vm_offset_t) object) + (event))); \
+ (object)->all_wanted &= ~(1 << (event)); \
+ MACRO_END
+
+/*
+ * Routines implemented as macros
+ */
+
+#define vm_object_paging_begin(object) \
+ ((object)->paging_in_progress++)
+
+#define vm_object_paging_end(object) \
+ MACRO_BEGIN \
+ assert((object)->paging_in_progress != 0); \
+ if (--(object)->paging_in_progress == 0) { \
+ vm_object_wakeup(object, \
+ VM_OBJECT_EVENT_PAGING_IN_PROGRESS); \
+ } \
+ MACRO_END
+
+#define vm_object_paging_wait(object, interruptible) \
+ MACRO_BEGIN \
+ while ((object)->paging_in_progress != 0) { \
+ vm_object_wait( (object), \
+ VM_OBJECT_EVENT_PAGING_IN_PROGRESS, \
+ (interruptible)); \
+ vm_object_lock(object); \
+ \
+ /*XXX if ((interruptible) && */ \
+ /*XXX (current_thread()->wait_result != THREAD_AWAKENED))*/ \
+ /*XXX break; */ \
+ } \
+ MACRO_END
+
+#define vm_object_absent_assert_wait(object, interruptible) \
+ MACRO_BEGIN \
+ vm_object_assert_wait( (object), \
+ VM_OBJECT_EVENT_ABSENT_COUNT, \
+ (interruptible)); \
+ MACRO_END
+
+
+#define vm_object_absent_release(object) \
+ MACRO_BEGIN \
+ (object)->absent_count--; \
+ vm_object_wakeup((object), \
+ VM_OBJECT_EVENT_ABSENT_COUNT); \
+ MACRO_END
+
+/*
+ * Object locking macros (with and without debugging)
+ */
+
+#if VM_OBJECT_DEBUG
+#define vm_object_lock_init(object) \
+MACRO_BEGIN \
+ simple_lock_init(&(object)->Lock); \
+ (object)->LockHolder = 0; \
+MACRO_END
+#define vm_object_lock(object) \
+MACRO_BEGIN \
+ simple_lock(&(object)->Lock); \
+ (object)->LockHolder = current_thread(); \
+MACRO_END
+#define vm_object_unlock(object) \
+MACRO_BEGIN \
+ if ((object)->LockHolder != current_thread()) \
+ panic("vm_object_unlock 0x%x", (object)); \
+ (object)->LockHolder = 0; \
+ simple_unlock(&(object)->Lock); \
+MACRO_END
+#define vm_object_lock_try(object) \
+ (simple_lock_try(&(object)->Lock) \
+ ? ( ((object)->LockHolder = current_thread()) , TRUE) \
+ : FALSE)
+#define vm_object_sleep(event, object, interruptible) \
+MACRO_BEGIN \
+ if ((object)->LockHolder != current_thread()) \
+ panic("vm_object_sleep %#x", (object)); \
+ (object)->LockHolder = 0; \
+ thread_sleep((event_t)(event), simple_lock_addr((object)->Lock), \
+ (interruptible)); \
+MACRO_END
+#define vm_object_lock_taken(object) \
+ ((object)->LockHolder == current_thread())
+#else /* VM_OBJECT_DEBUG */
+#define vm_object_lock_init(object) simple_lock_init(&(object)->Lock)
+#define vm_object_lock(object) simple_lock(&(object)->Lock)
+#define vm_object_unlock(object) simple_unlock(&(object)->Lock)
+#define vm_object_lock_try(object) simple_lock_try(&(object)->Lock)
+#define vm_object_sleep(event, object, interruptible) \
+ thread_sleep((event_t)(event), simple_lock_addr((object)->Lock), \
+ (interruptible))
+#define vm_object_lock_taken(object) simple_lock_taken(&(object)->Lock)
+#endif /* VM_OBJECT_DEBUG */
+
+#endif /* _VM_VM_OBJECT_H_ */
diff --git a/vm/vm_page.h b/vm/vm_page.h
new file mode 100644
index 00000000..f7fa80a3
--- /dev/null
+++ b/vm/vm_page.h
@@ -0,0 +1,322 @@
+/*
+ * Mach Operating System
+ * Copyright (c) 1993-1988 Carnegie Mellon University
+ * All Rights Reserved.
+ *
+ * Permission to use, copy, modify and distribute this software and its
+ * documentation is hereby granted, provided that both the copyright
+ * notice and this permission notice appear in all copies of the
+ * software, derivative works or modified versions, and any portions
+ * thereof, and that both notices appear in supporting documentation.
+ *
+ * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
+ * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
+ * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
+ *
+ * Carnegie Mellon requests users of this software to return to
+ *
+ * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
+ * School of Computer Science
+ * Carnegie Mellon University
+ * Pittsburgh PA 15213-3890
+ *
+ * any improvements or extensions that they make and grant Carnegie Mellon
+ * the rights to redistribute these changes.
+ */
+/*
+ * File: vm/vm_page.h
+ * Author: Avadis Tevanian, Jr., Michael Wayne Young
+ * Date: 1985
+ *
+ * Resident memory system definitions.
+ */
+
+#ifndef _VM_VM_PAGE_H_
+#define _VM_VM_PAGE_H_
+
+#include <mach_vm_debug.h>
+
+#include <mach/boolean.h>
+#include <mach/vm_prot.h>
+#include <mach/vm_param.h>
+#include <vm/vm_object.h>
+#include <kern/queue.h>
+#include <kern/lock.h>
+#include <kern/zalloc.h>
+
+#include <kern/macro_help.h>
+#include <kern/sched_prim.h> /* definitions of wait/wakeup */
+
+#if MACH_VM_DEBUG
+#include <mach_debug/hash_info.h>
+#endif
+
+/*
+ * Management of resident (logical) pages.
+ *
+ * A small structure is kept for each resident
+ * page, indexed by page number. Each structure
+ * is an element of several lists:
+ *
+ * A hash table bucket used to quickly
+ * perform object/offset lookups
+ *
+ * A list of all pages for a given object,
+ * so they can be quickly deactivated at
+ * time of deallocation.
+ *
+ * An ordered list of pages due for pageout.
+ *
+ * In addition, the structure contains the object
+ * and offset to which this page belongs (for pageout),
+ * and sundry status bits.
+ *
+ * Fields in this structure are locked either by the lock on the
+ * object that the page belongs to (O) or by the lock on the page
+ * queues (P). [Some fields require that both locks be held to
+ * change that field; holding either lock is sufficient to read.]
+ */
+
+struct vm_page {
+ queue_chain_t pageq; /* queue info for FIFO
+ * queue or free list (P) */
+ queue_chain_t listq; /* all pages in same object (O) */
+ struct vm_page *next; /* VP bucket link (O) */
+
+ vm_object_t object; /* which object am I in (O,P) */
+ vm_offset_t offset; /* offset into that object (O,P) */
+
+ unsigned int wire_count:16, /* how many wired down maps use me?
+ (O&P) */
+ /* boolean_t */ inactive:1, /* page is in inactive list (P) */
+ active:1, /* page is in active list (P) */
+ laundry:1, /* page is being cleaned now (P)*/
+ free:1, /* page is on free list (P) */
+ reference:1, /* page has been used (P) */
+ :0; /* (force to 'long' boundary) */
+#ifdef ns32000
+ int pad; /* extra space for ns32000 bit ops */
+#endif /* ns32000 */
+
+ unsigned int
+ /* boolean_t */ busy:1, /* page is in transit (O) */
+ wanted:1, /* someone is waiting for page (O) */
+ tabled:1, /* page is in VP table (O) */
+ fictitious:1, /* Physical page doesn't exist (O) */
+ private:1, /* Page should not be returned to
+ * the free list (O) */
+ absent:1, /* Data has been requested, but is
+ * not yet available (O) */
+ error:1, /* Data manager was unable to provide
+ * data due to error (O) */
+ dirty:1, /* Page must be cleaned (O) */
+ precious:1, /* Page is precious; data must be
+ * returned even if clean (O) */
+ overwriting:1, /* Request to unlock has been made
+ * without having data. (O)
+ * [See vm_object_overwrite] */
+ :0;
+
+ vm_offset_t phys_addr; /* Physical address of page, passed
+ * to pmap_enter (read-only) */
+ vm_prot_t page_lock; /* Uses prohibited by data manager (O) */
+ vm_prot_t unlock_request; /* Outstanding unlock request (O) */
+};
+
+typedef struct vm_page *vm_page_t;
+
+#define VM_PAGE_NULL ((vm_page_t) 0)
+
+/*
+ * For debugging, this macro can be defined to perform
+ * some useful check on a page structure.
+ */
+
+#define VM_PAGE_CHECK(mem)
+
+/*
+ * Each pageable resident page falls into one of three lists:
+ *
+ * free
+ * Available for allocation now.
+ * inactive
+ * Not referenced in any map, but still has an
+ * object/offset-page mapping, and may be dirty.
+ * This is the list of pages that should be
+ * paged out next.
+ * active
+ * A list of pages which have been placed in
+ * at least one physical map. This list is
+ * ordered, in LRU-like fashion.
+ */
+
+extern
+vm_page_t vm_page_queue_free; /* memory free queue */
+extern
+vm_page_t vm_page_queue_fictitious; /* fictitious free queue */
+extern
+queue_head_t vm_page_queue_active; /* active memory queue */
+extern
+queue_head_t vm_page_queue_inactive; /* inactive memory queue */
+
+extern
+vm_offset_t first_phys_addr; /* physical address for first_page */
+extern
+vm_offset_t last_phys_addr; /* physical address for last_page */
+
+extern
+int vm_page_free_count; /* How many pages are free? */
+extern
+int vm_page_fictitious_count;/* How many fictitious pages are free? */
+extern
+int vm_page_active_count; /* How many pages are active? */
+extern
+int vm_page_inactive_count; /* How many pages are inactive? */
+extern
+int vm_page_wire_count; /* How many pages are wired? */
+extern
+int vm_page_free_target; /* How many do we want free? */
+extern
+int vm_page_free_min; /* When to wakeup pageout */
+extern
+int vm_page_inactive_target;/* How many do we want inactive? */
+extern
+int vm_page_free_reserved; /* How many pages reserved to do pageout */
+extern
+int vm_page_laundry_count; /* How many pages being laundered? */
+
+decl_simple_lock_data(extern,vm_page_queue_lock)/* lock on active and inactive
+ page queues */
+decl_simple_lock_data(extern,vm_page_queue_free_lock)
+ /* lock on free page queue */
+
+extern unsigned int vm_page_free_wanted;
+ /* how many threads are waiting for memory */
+
+extern vm_offset_t vm_page_fictitious_addr;
+ /* (fake) phys_addr of fictitious pages */
+
+extern void vm_page_bootstrap(
+ vm_offset_t *startp,
+ vm_offset_t *endp);
+extern void vm_page_module_init(void);
+
+extern void vm_page_create(
+ vm_offset_t start,
+ vm_offset_t end);
+extern vm_page_t vm_page_lookup(
+ vm_object_t object,
+ vm_offset_t offset);
+extern vm_page_t vm_page_grab_fictitious(void);
+extern void vm_page_release_fictitious(vm_page_t);
+extern boolean_t vm_page_convert(vm_page_t);
+extern void vm_page_more_fictitious(void);
+extern vm_page_t vm_page_grab(void);
+extern void vm_page_release(vm_page_t);
+extern void vm_page_wait(void (*)(void));
+extern vm_page_t vm_page_alloc(
+ vm_object_t object,
+ vm_offset_t offset);
+extern void vm_page_init(
+ vm_page_t mem,
+ vm_offset_t phys_addr);
+extern void vm_page_free(vm_page_t);
+extern void vm_page_activate(vm_page_t);
+extern void vm_page_deactivate(vm_page_t);
+extern void vm_page_rename(
+ vm_page_t mem,
+ vm_object_t new_object,
+ vm_offset_t new_offset);
+extern void vm_page_insert(
+ vm_page_t mem,
+ vm_object_t object,
+ vm_offset_t offset);
+extern void vm_page_remove(
+ vm_page_t mem);
+
+extern void vm_page_zero_fill(vm_page_t);
+extern void vm_page_copy(vm_page_t src_m, vm_page_t dest_m);
+
+extern void vm_page_wire(vm_page_t);
+extern void vm_page_unwire(vm_page_t);
+
+extern void vm_set_page_size(void);
+
+#if MACH_VM_DEBUG
+extern unsigned int vm_page_info(
+ hash_info_bucket_t *info,
+ unsigned int count);
+#endif
+
+/*
+ * Functions implemented as macros
+ */
+
+#define PAGE_ASSERT_WAIT(m, interruptible) \
+ MACRO_BEGIN \
+ (m)->wanted = TRUE; \
+ assert_wait((event_t) (m), (interruptible)); \
+ MACRO_END
+
+#define PAGE_WAKEUP_DONE(m) \
+ MACRO_BEGIN \
+ (m)->busy = FALSE; \
+ if ((m)->wanted) { \
+ (m)->wanted = FALSE; \
+ thread_wakeup(((event_t) m)); \
+ } \
+ MACRO_END
+
+#define PAGE_WAKEUP(m) \
+ MACRO_BEGIN \
+ if ((m)->wanted) { \
+ (m)->wanted = FALSE; \
+ thread_wakeup((event_t) (m)); \
+ } \
+ MACRO_END
+
+#define VM_PAGE_FREE(p) \
+ MACRO_BEGIN \
+ vm_page_lock_queues(); \
+ vm_page_free(p); \
+ vm_page_unlock_queues(); \
+ MACRO_END
+
+/*
+ * Macro to be used in place of pmap_enter()
+ */
+
+#define PMAP_ENTER(pmap, virtual_address, page, protection, wired) \
+ MACRO_BEGIN \
+ pmap_enter( \
+ (pmap), \
+ (virtual_address), \
+ (page)->phys_addr, \
+ (protection) & ~(page)->page_lock, \
+ (wired) \
+ ); \
+ MACRO_END
+
+#define VM_PAGE_WAIT(continuation) vm_page_wait(continuation)
+
+#define vm_page_lock_queues() simple_lock(&vm_page_queue_lock)
+#define vm_page_unlock_queues() simple_unlock(&vm_page_queue_lock)
+
+#define VM_PAGE_QUEUES_REMOVE(mem) \
+ MACRO_BEGIN \
+ if (mem->active) { \
+ queue_remove(&vm_page_queue_active, \
+ mem, vm_page_t, pageq); \
+ mem->active = FALSE; \
+ vm_page_active_count--; \
+ } \
+ \
+ if (mem->inactive) { \
+ queue_remove(&vm_page_queue_inactive, \
+ mem, vm_page_t, pageq); \
+ mem->inactive = FALSE; \
+ vm_page_inactive_count--; \
+ } \
+ MACRO_END
+
+#endif /* _VM_VM_PAGE_H_ */
diff --git a/vm/vm_pageout.c b/vm/vm_pageout.c
new file mode 100644
index 00000000..411531bb
--- /dev/null
+++ b/vm/vm_pageout.c
@@ -0,0 +1,924 @@
+/*
+ * Mach Operating System
+ * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University.
+ * Copyright (c) 1993,1994 The University of Utah and
+ * the Computer Systems Laboratory (CSL).
+ * All rights reserved.
+ *
+ * Permission to use, copy, modify and distribute this software and its
+ * documentation is hereby granted, provided that both the copyright
+ * notice and this permission notice appear in all copies of the
+ * software, derivative works or modified versions, and any portions
+ * thereof, and that both notices appear in supporting documentation.
+ *
+ * CARNEGIE MELLON, THE UNIVERSITY OF UTAH AND CSL ALLOW FREE USE OF
+ * THIS SOFTWARE IN ITS "AS IS" CONDITION, AND DISCLAIM ANY LIABILITY
+ * OF ANY KIND FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF
+ * THIS SOFTWARE.
+ *
+ * Carnegie Mellon requests users of this software to return to
+ *
+ * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
+ * School of Computer Science
+ * Carnegie Mellon University
+ * Pittsburgh PA 15213-3890
+ *
+ * any improvements or extensions that they make and grant Carnegie Mellon
+ * the rights to redistribute these changes.
+ */
+/*
+ * File: vm/vm_pageout.c
+ * Author: Avadis Tevanian, Jr., Michael Wayne Young
+ * Date: 1985
+ *
+ * The proverbial page-out daemon.
+ */
+
+#include <mach_pagemap.h>
+#include <norma_vm.h>
+
+#include <mach/mach_types.h>
+#include <mach/memory_object.h>
+#include "memory_object_default.h"
+#include "memory_object_user.h"
+#include <mach/vm_param.h>
+#include <mach/vm_statistics.h>
+#include <kern/counters.h>
+#include <kern/thread.h>
+#include <vm/pmap.h>
+#include <vm/vm_map.h>
+#include <vm/vm_object.h>
+#include <vm/vm_page.h>
+#include <vm/vm_pageout.h>
+#include <machine/vm_tuning.h>
+
+
+
+#ifndef VM_PAGEOUT_BURST_MAX
+#define VM_PAGEOUT_BURST_MAX 10 /* number of pages */
+#endif VM_PAGEOUT_BURST_MAX
+
+#ifndef VM_PAGEOUT_BURST_MIN
+#define VM_PAGEOUT_BURST_MIN 5 /* number of pages */
+#endif VM_PAGEOUT_BURST_MIN
+
+#ifndef VM_PAGEOUT_BURST_WAIT
+#define VM_PAGEOUT_BURST_WAIT 30 /* milliseconds per page */
+#endif VM_PAGEOUT_BURST_WAIT
+
+#ifndef VM_PAGEOUT_EMPTY_WAIT
+#define VM_PAGEOUT_EMPTY_WAIT 200 /* milliseconds */
+#endif VM_PAGEOUT_EMPTY_WAIT
+
+#ifndef VM_PAGEOUT_PAUSE_MAX
+#define VM_PAGEOUT_PAUSE_MAX 10 /* number of pauses */
+#endif VM_PAGEOUT_PAUSE_MAX
+
+/*
+ * To obtain a reasonable LRU approximation, the inactive queue
+ * needs to be large enough to give pages on it a chance to be
+ * referenced a second time. This macro defines the fraction
+ * of active+inactive pages that should be inactive.
+ * The pageout daemon uses it to update vm_page_inactive_target.
+ *
+ * If vm_page_free_count falls below vm_page_free_target and
+ * vm_page_inactive_count is below vm_page_inactive_target,
+ * then the pageout daemon starts running.
+ */
+
+#ifndef VM_PAGE_INACTIVE_TARGET
+#define VM_PAGE_INACTIVE_TARGET(avail) ((avail) * 2 / 3)
+#endif VM_PAGE_INACTIVE_TARGET
+
+/*
+ * Once the pageout daemon starts running, it keeps going
+ * until vm_page_free_count meets or exceeds vm_page_free_target.
+ */
+
+#ifndef VM_PAGE_FREE_TARGET
+#define VM_PAGE_FREE_TARGET(free) (15 + (free) / 80)
+#endif VM_PAGE_FREE_TARGET
+
+/*
+ * The pageout daemon always starts running once vm_page_free_count
+ * falls below vm_page_free_min.
+ */
+
+#ifndef VM_PAGE_FREE_MIN
+#define VM_PAGE_FREE_MIN(free) (10 + (free) / 100)
+#endif VM_PAGE_FREE_MIN
+
+/*
+ * When vm_page_free_count falls below vm_page_free_reserved,
+ * only vm-privileged threads can allocate pages. vm-privilege
+ * allows the pageout daemon and default pager (and any other
+ * associated threads needed for default pageout) to continue
+ * operation by dipping into the reserved pool of pages.
+ */
+
+#ifndef VM_PAGE_FREE_RESERVED
+#define VM_PAGE_FREE_RESERVED 15
+#endif VM_PAGE_FREE_RESERVED
+
+/*
+ * When vm_page_free_count falls below vm_pageout_reserved_internal,
+ * the pageout daemon no longer trusts external pagers to clean pages.
+ * External pagers are probably all wedged waiting for a free page.
+ * It forcibly double-pages dirty pages belonging to external objects,
+ * getting the pages to the default pager to clean.
+ */
+
+#ifndef VM_PAGEOUT_RESERVED_INTERNAL
+#define VM_PAGEOUT_RESERVED_INTERNAL(reserve) ((reserve) - 5)
+#endif VM_PAGEOUT_RESERVED_INTERNAL
+
+/*
+ * When vm_page_free_count falls below vm_pageout_reserved_really,
+ * the pageout daemon stops work entirely to let the default pager
+ * catch up (assuming the default pager has pages to clean).
+ * Beyond this point, it is too dangerous to consume memory
+ * even for memory_object_data_write messages to the default pager.
+ */
+
+#ifndef VM_PAGEOUT_RESERVED_REALLY
+#define VM_PAGEOUT_RESERVED_REALLY(reserve) ((reserve) - 10)
+#endif VM_PAGEOUT_RESERVED_REALLY
+
+extern void vm_pageout_continue();
+extern void vm_pageout_scan_continue();
+
+unsigned int vm_pageout_reserved_internal = 0;
+unsigned int vm_pageout_reserved_really = 0;
+
+unsigned int vm_pageout_burst_max = 0;
+unsigned int vm_pageout_burst_min = 0;
+unsigned int vm_pageout_burst_wait = 0; /* milliseconds per page */
+unsigned int vm_pageout_empty_wait = 0; /* milliseconds */
+unsigned int vm_pageout_pause_count = 0;
+unsigned int vm_pageout_pause_max = 0;
+
+/*
+ * These variables record the pageout daemon's actions:
+ * how many pages it looks at and what happens to those pages.
+ * No locking needed because only one thread modifies the variables.
+ */
+
+unsigned int vm_pageout_active = 0; /* debugging */
+unsigned int vm_pageout_inactive = 0; /* debugging */
+unsigned int vm_pageout_inactive_nolock = 0; /* debugging */
+unsigned int vm_pageout_inactive_busy = 0; /* debugging */
+unsigned int vm_pageout_inactive_absent = 0; /* debugging */
+unsigned int vm_pageout_inactive_used = 0; /* debugging */
+unsigned int vm_pageout_inactive_clean = 0; /* debugging */
+unsigned int vm_pageout_inactive_dirty = 0; /* debugging */
+unsigned int vm_pageout_inactive_double = 0; /* debugging */
+
+#if NORMA_VM
+/*
+ * Define them here, since they won't be defined by memory_object_user.h.
+ */
+extern kern_return_t memory_object_data_initialize();
+extern kern_return_t memory_object_data_write();
+#endif NORMA_VM
+
+/*
+ * Routine: vm_pageout_setup
+ * Purpose:
+ * Set up a page for pageout.
+ *
+ * Move or copy the page to a new object, as part
+ * of which it will be sent to its memory manager
+ * in a memory_object_data_write or memory_object_initialize
+ * message.
+ *
+ * The "paging_offset" argument specifies the offset
+ * of the page within its external memory object.
+ *
+ * The "new_object" and "new_offset" arguments
+ * indicate where the page should be moved.
+ *
+ * The "flush" argument specifies whether the page
+ * should be flushed from its object. If not, a
+ * copy of the page is moved to the new object.
+ *
+ * In/Out conditions:
+ * The page in question must not be on any pageout queues,
+ * and must be busy. The object to which it belongs
+ * must be unlocked, and the caller must hold a paging
+ * reference to it. The new_object must not be locked.
+ *
+ * If the page is flushed from its original object,
+ * this routine returns a pointer to a place-holder page,
+ * inserted at the same offset, to block out-of-order
+ * requests for the page. The place-holder page must
+ * be freed after the data_write or initialize message
+ * has been sent. If the page is copied,
+ * the holding page is VM_PAGE_NULL.
+ *
+ * The original page is put on a paging queue and marked
+ * not busy on exit.
+ */
+vm_page_t
+vm_pageout_setup(m, paging_offset, new_object, new_offset, flush)
+ register vm_page_t m;
+ vm_offset_t paging_offset;
+ register vm_object_t new_object;
+ vm_offset_t new_offset;
+ boolean_t flush;
+{
+ register vm_object_t old_object = m->object;
+ register vm_page_t holding_page = 0; /*'=0'to quiet gcc warnings*/
+ register vm_page_t new_m;
+
+ assert(m->busy && !m->absent && !m->fictitious);
+
+ /*
+ * If we are not flushing the page, allocate a
+ * page in the object. If we cannot get the
+ * page, flush instead.
+ */
+ if (!flush) {
+ vm_object_lock(new_object);
+ new_m = vm_page_alloc(new_object, new_offset);
+ if (new_m == VM_PAGE_NULL)
+ flush = TRUE;
+ vm_object_unlock(new_object);
+ }
+
+ if (flush) {
+ /*
+ * Create a place-holder page where the old one was,
+ * to prevent anyone from attempting to page in this
+ * page while we`re unlocked.
+ */
+ while ((holding_page = vm_page_grab_fictitious())
+ == VM_PAGE_NULL)
+ vm_page_more_fictitious();
+
+ vm_object_lock(old_object);
+ vm_page_lock_queues();
+ vm_page_remove(m);
+ vm_page_unlock_queues();
+ PAGE_WAKEUP_DONE(m);
+
+ vm_page_lock_queues();
+ vm_page_insert(holding_page, old_object, m->offset);
+ vm_page_unlock_queues();
+
+ /*
+ * Record that this page has been written out
+ */
+#if MACH_PAGEMAP
+ vm_external_state_set(old_object->existence_info,
+ paging_offset,
+ VM_EXTERNAL_STATE_EXISTS);
+#endif MACH_PAGEMAP
+
+ vm_object_unlock(old_object);
+
+ vm_object_lock(new_object);
+
+ /*
+ * Move this page into the new object
+ */
+
+ vm_page_lock_queues();
+ vm_page_insert(m, new_object, new_offset);
+ vm_page_unlock_queues();
+
+ m->dirty = TRUE;
+ m->precious = FALSE;
+ m->page_lock = VM_PROT_NONE;
+ m->unlock_request = VM_PROT_NONE;
+ }
+ else {
+ /*
+ * Copy the data into the new page,
+ * and mark the new page as clean.
+ */
+ vm_page_copy(m, new_m);
+
+ vm_object_lock(old_object);
+ m->dirty = FALSE;
+ pmap_clear_modify(m->phys_addr);
+
+ /*
+ * Deactivate old page.
+ */
+ vm_page_lock_queues();
+ vm_page_deactivate(m);
+ vm_page_unlock_queues();
+
+ PAGE_WAKEUP_DONE(m);
+
+ /*
+ * Record that this page has been written out
+ */
+
+#if MACH_PAGEMAP
+ vm_external_state_set(old_object->existence_info,
+ paging_offset,
+ VM_EXTERNAL_STATE_EXISTS);
+#endif MACH_PAGEMAP
+
+ vm_object_unlock(old_object);
+
+ vm_object_lock(new_object);
+
+ /*
+ * Use the new page below.
+ */
+ m = new_m;
+ m->dirty = TRUE;
+ assert(!m->precious);
+ PAGE_WAKEUP_DONE(m);
+ }
+
+ /*
+ * Make the old page eligible for replacement again; if a
+ * user-supplied memory manager fails to release the page,
+ * it will be paged out again to the default memory manager.
+ *
+ * Note that pages written to the default memory manager
+ * must be wired down -- in return, it guarantees to free
+ * this page, rather than reusing it.
+ */
+
+ vm_page_lock_queues();
+ vm_stat.pageouts++;
+ if (m->laundry) {
+ /*
+ * vm_pageout_scan is telling us to put this page
+ * at the front of the inactive queue, so it will
+ * be immediately paged out to the default pager.
+ */
+
+ assert(!old_object->internal);
+ m->laundry = FALSE;
+
+ queue_enter_first(&vm_page_queue_inactive, m,
+ vm_page_t, pageq);
+ m->inactive = TRUE;
+ vm_page_inactive_count++;
+ } else if (old_object->internal) {
+ m->laundry = TRUE;
+ vm_page_laundry_count++;
+
+ vm_page_wire(m);
+ } else
+ vm_page_activate(m);
+ vm_page_unlock_queues();
+
+ /*
+ * Since IPC operations may block, we drop locks now.
+ * [The placeholder page is busy, and we still have
+ * paging_in_progress incremented.]
+ */
+
+ vm_object_unlock(new_object);
+
+ /*
+ * Return the placeholder page to simplify cleanup.
+ */
+ return (flush ? holding_page : VM_PAGE_NULL);
+}
+
+/*
+ * Routine: vm_pageout_page
+ * Purpose:
+ * Causes the specified page to be written back to
+ * the appropriate memory object.
+ *
+ * The "initial" argument specifies whether this
+ * data is an initialization only, and should use
+ * memory_object_data_initialize instead of
+ * memory_object_data_write.
+ *
+ * The "flush" argument specifies whether the page
+ * should be flushed from the object. If not, a
+ * copy of the data is sent to the memory object.
+ *
+ * In/out conditions:
+ * The page in question must not be on any pageout queues.
+ * The object to which it belongs must be locked.
+ * Implementation:
+ * Move this page to a completely new object, if flushing;
+ * copy to a new page in a new object, if not.
+ */
+void
+vm_pageout_page(m, initial, flush)
+ register vm_page_t m;
+ boolean_t initial;
+ boolean_t flush;
+{
+ vm_map_copy_t copy;
+ register vm_object_t old_object;
+ register vm_object_t new_object;
+ register vm_page_t holding_page;
+ vm_offset_t paging_offset;
+ kern_return_t rc;
+ boolean_t precious_clean;
+
+ assert(m->busy);
+
+ /*
+ * Cleaning but not flushing a clean precious page is a
+ * no-op. Remember whether page is clean and precious now
+ * because vm_pageout_setup will mark it dirty and not precious.
+ *
+ * XXX Check if precious_clean && !flush can really happen.
+ */
+ precious_clean = (!m->dirty) && m->precious;
+ if (precious_clean && !flush) {
+ PAGE_WAKEUP_DONE(m);
+ return;
+ }
+
+ /*
+ * Verify that we really want to clean this page.
+ */
+ if (m->absent || m->error || (!m->dirty && !m->precious)) {
+ VM_PAGE_FREE(m);
+ return;
+ }
+
+ /*
+ * Create a paging reference to let us play with the object.
+ */
+ old_object = m->object;
+ paging_offset = m->offset + old_object->paging_offset;
+ vm_object_paging_begin(old_object);
+ vm_object_unlock(old_object);
+
+ /*
+ * Allocate a new object into which we can put the page.
+ */
+ new_object = vm_object_allocate(PAGE_SIZE);
+
+ /*
+ * Move the page into the new object.
+ */
+ holding_page = vm_pageout_setup(m,
+ paging_offset,
+ new_object,
+ 0, /* new offset */
+ flush); /* flush */
+
+ rc = vm_map_copyin_object(new_object, 0, PAGE_SIZE, &copy);
+ assert(rc == KERN_SUCCESS);
+
+ if (initial || old_object->use_old_pageout) {
+ rc = (*(initial ? memory_object_data_initialize
+ : memory_object_data_write))
+ (old_object->pager,
+ old_object->pager_request,
+ paging_offset, (pointer_t) copy, PAGE_SIZE);
+ }
+ else {
+ rc = memory_object_data_return(
+ old_object->pager,
+ old_object->pager_request,
+ paging_offset, (pointer_t) copy, PAGE_SIZE,
+ !precious_clean, !flush);
+ }
+
+ if (rc != KERN_SUCCESS)
+ vm_map_copy_discard(copy);
+
+ /*
+ * Clean up.
+ */
+ vm_object_lock(old_object);
+ if (holding_page != VM_PAGE_NULL)
+ VM_PAGE_FREE(holding_page);
+ vm_object_paging_end(old_object);
+}
+
+/*
+ * vm_pageout_scan does the dirty work for the pageout daemon.
+ * It returns with vm_page_queue_free_lock held and
+ * vm_page_free_wanted == 0.
+ */
+
+void vm_pageout_scan()
+{
+ unsigned int burst_count;
+
+ /*
+ * We want to gradually dribble pages from the active queue
+ * to the inactive queue. If we let the inactive queue get
+ * very small, and then suddenly dump many pages into it,
+ * those pages won't get a sufficient chance to be referenced
+ * before we start taking them from the inactive queue.
+ *
+ * We must limit the rate at which we send pages to the pagers.
+ * data_write messages consume memory, for message buffers and
+ * for map-copy objects. If we get too far ahead of the pagers,
+ * we can potentially run out of memory.
+ *
+ * We can use the laundry count to limit directly the number
+ * of pages outstanding to the default pager. A similar
+ * strategy for external pagers doesn't work, because
+ * external pagers don't have to deallocate the pages sent them,
+ * and because we might have to send pages to external pagers
+ * even if they aren't processing writes. So we also
+ * use a burst count to limit writes to external pagers.
+ *
+ * When memory is very tight, we can't rely on external pagers to
+ * clean pages. They probably aren't running, because they
+ * aren't vm-privileged. If we kept sending dirty pages to them,
+ * we could exhaust the free list. However, we can't just ignore
+ * pages belonging to external objects, because there might be no
+ * pages belonging to internal objects. Hence, we get the page
+ * into an internal object and then immediately double-page it,
+ * sending it to the default pager.
+ *
+ * consider_zone_gc should be last, because the other operations
+ * might return memory to zones. When we pause we use
+ * vm_pageout_scan_continue as our continuation, so we will
+ * reenter vm_pageout_scan periodically and attempt to reclaim
+ * internal memory even if we never reach vm_page_free_target.
+ */
+
+ Restart:
+ stack_collect();
+ net_kmsg_collect();
+ consider_task_collect();
+ consider_thread_collect();
+ consider_zone_gc();
+
+ for (burst_count = 0;;) {
+ register vm_page_t m;
+ register vm_object_t object;
+ unsigned int free_count;
+
+ /*
+ * Recalculate vm_page_inactivate_target.
+ */
+
+ vm_page_lock_queues();
+ vm_page_inactive_target =
+ VM_PAGE_INACTIVE_TARGET(vm_page_active_count +
+ vm_page_inactive_count);
+
+ /*
+ * Move pages from active to inactive.
+ */
+
+ while ((vm_page_inactive_count < vm_page_inactive_target) &&
+ !queue_empty(&vm_page_queue_active)) {
+ register vm_object_t obj;
+
+ vm_pageout_active++;
+ m = (vm_page_t) queue_first(&vm_page_queue_active);
+ assert(m->active && !m->inactive);
+
+ obj = m->object;
+ if (!vm_object_lock_try(obj)) {
+ /*
+ * Move page to end and continue.
+ */
+
+ queue_remove(&vm_page_queue_active, m,
+ vm_page_t, pageq);
+ queue_enter(&vm_page_queue_active, m,
+ vm_page_t, pageq);
+ vm_page_unlock_queues();
+ vm_page_lock_queues();
+ continue;
+ }
+
+ /*
+ * If the page is busy, then we pull it
+ * off the active queue and leave it alone.
+ */
+
+ if (m->busy) {
+ vm_object_unlock(obj);
+ queue_remove(&vm_page_queue_active, m,
+ vm_page_t, pageq);
+ m->active = FALSE;
+ vm_page_active_count--;
+ continue;
+ }
+
+ /*
+ * Deactivate the page while holding the object
+ * locked, so we know the page is still not busy.
+ * This should prevent races between pmap_enter
+ * and pmap_clear_reference. The page might be
+ * absent or fictitious, but vm_page_deactivate
+ * can handle that.
+ */
+
+ vm_page_deactivate(m);
+ vm_object_unlock(obj);
+ }
+
+ /*
+ * We are done if we have met our target *and*
+ * nobody is still waiting for a page.
+ */
+
+ simple_lock(&vm_page_queue_free_lock);
+ free_count = vm_page_free_count;
+ if ((free_count >= vm_page_free_target) &
+ (vm_page_free_wanted == 0)) {
+ vm_page_unlock_queues();
+ break;
+ }
+ simple_unlock(&vm_page_queue_free_lock);
+
+ /*
+ * Sometimes we have to pause:
+ * 1) No inactive pages - nothing to do.
+ * 2) Flow control - wait for pagers to catch up.
+ * 3) Extremely low memory - sending out dirty pages
+ * consumes memory. We don't take the risk of doing
+ * this if the default pager already has work to do.
+ */
+
+ if (queue_empty(&vm_page_queue_inactive) ||
+ (burst_count >= vm_pageout_burst_max) ||
+ (vm_page_laundry_count >= vm_pageout_burst_max) ||
+ ((free_count < vm_pageout_reserved_really) &&
+ (vm_page_laundry_count > 0))) {
+ unsigned int pages, msecs;
+
+ /*
+ * vm_pageout_burst_wait is msecs/page.
+ * If there is nothing for us to do, we wait
+ * at least vm_pageout_empty_wait msecs.
+ */
+
+ if (vm_page_laundry_count > burst_count)
+ pages = vm_page_laundry_count;
+ else
+ pages = burst_count;
+ msecs = pages * vm_pageout_burst_wait;
+
+ if (queue_empty(&vm_page_queue_inactive) &&
+ (msecs < vm_pageout_empty_wait))
+ msecs = vm_pageout_empty_wait;
+ vm_page_unlock_queues();
+
+ thread_will_wait_with_timeout(current_thread(), msecs);
+ counter(c_vm_pageout_scan_block++);
+ thread_block(vm_pageout_scan_continue);
+#ifndef CONTINUATIONS
+ /*
+ * Unfortunately, we don't have call_continuation
+ * so we can't rely on tail-recursion.
+ */
+
+ vm_pageout_scan_continue();
+ goto Restart;
+#else /* CONTINUATIONS */
+ call_continuation(vm_pageout_scan_continue);
+ /*NOTREACHED*/
+#endif /* CONTINUATIONS */
+ }
+
+ vm_pageout_inactive++;
+ m = (vm_page_t) queue_first(&vm_page_queue_inactive);
+ assert(!m->active && m->inactive);
+ object = m->object;
+
+ /*
+ * Try to lock object; since we've got the
+ * page queues lock, we can only try for this one.
+ */
+
+ if (!vm_object_lock_try(object)) {
+ /*
+ * Move page to end and continue.
+ */
+
+ queue_remove(&vm_page_queue_inactive, m,
+ vm_page_t, pageq);
+ queue_enter(&vm_page_queue_inactive, m,
+ vm_page_t, pageq);
+ vm_page_unlock_queues();
+ vm_pageout_inactive_nolock++;
+ continue;
+ }
+
+ /*
+ * Remove the page from the inactive list.
+ */
+
+ queue_remove(&vm_page_queue_inactive, m, vm_page_t, pageq);
+ vm_page_inactive_count--;
+ m->inactive = FALSE;
+
+ if (m->busy || !object->alive) {
+ /*
+ * Somebody is already playing with this page.
+ * Leave it off the pageout queues.
+ */
+
+ vm_page_unlock_queues();
+ vm_object_unlock(object);
+ vm_pageout_inactive_busy++;
+ continue;
+ }
+
+ /*
+ * If it's absent, we can reclaim the page.
+ */
+
+ if (m->absent) {
+ vm_pageout_inactive_absent++;
+ reclaim_page:
+ vm_page_free(m);
+ vm_page_unlock_queues();
+ vm_object_unlock(object);
+ continue;
+ }
+
+ /*
+ * If it's being used, reactivate.
+ * (Fictitious pages are either busy or absent.)
+ */
+
+ assert(!m->fictitious);
+ if (m->reference || pmap_is_referenced(m->phys_addr)) {
+ vm_object_unlock(object);
+ vm_page_activate(m);
+ vm_stat.reactivations++;
+ vm_page_unlock_queues();
+ vm_pageout_inactive_used++;
+ continue;
+ }
+
+ /*
+ * Eliminate all mappings.
+ */
+
+ m->busy = TRUE;
+ pmap_page_protect(m->phys_addr, VM_PROT_NONE);
+ if (!m->dirty)
+ m->dirty = pmap_is_modified(m->phys_addr);
+
+ /*
+ * If it's clean and not precious, we can free the page.
+ */
+
+ if (!m->dirty && !m->precious) {
+ vm_pageout_inactive_clean++;
+ goto reclaim_page;
+ }
+
+ /*
+ * If we are very low on memory, then we can't
+ * rely on an external pager to clean a dirty page,
+ * because external pagers are not vm-privileged.
+ *
+ * The laundry bit tells vm_pageout_setup to
+ * put the page back at the front of the inactive
+ * queue instead of activating the page. Hence,
+ * we will pick the page up again immediately and
+ * resend it to the default pager.
+ */
+
+ assert(!m->laundry);
+ if ((free_count < vm_pageout_reserved_internal) &&
+ !object->internal) {
+ m->laundry = TRUE;
+ vm_pageout_inactive_double++;
+ }
+ vm_page_unlock_queues();
+
+ /*
+ * If there is no memory object for the page, create
+ * one and hand it to the default pager.
+ * [First try to collapse, so we don't create
+ * one unnecessarily.]
+ */
+
+ if (!object->pager_initialized)
+ vm_object_collapse(object);
+ if (!object->pager_initialized)
+ vm_object_pager_create(object);
+ if (!object->pager_initialized)
+ panic("vm_pageout_scan");
+
+ vm_pageout_inactive_dirty++;
+ vm_pageout_page(m, FALSE, TRUE); /* flush it */
+ vm_object_unlock(object);
+ burst_count++;
+ }
+}
+
+void vm_pageout_scan_continue()
+{
+ /*
+ * We just paused to let the pagers catch up.
+ * If vm_page_laundry_count is still high,
+ * then we aren't waiting long enough.
+ * If we have paused some vm_pageout_pause_max times without
+ * adjusting vm_pageout_burst_wait, it might be too big,
+ * so we decrease it.
+ */
+
+ vm_page_lock_queues();
+ if (vm_page_laundry_count > vm_pageout_burst_min) {
+ vm_pageout_burst_wait++;
+ vm_pageout_pause_count = 0;
+ } else if (++vm_pageout_pause_count > vm_pageout_pause_max) {
+ vm_pageout_burst_wait = (vm_pageout_burst_wait * 3) / 4;
+ if (vm_pageout_burst_wait < 1)
+ vm_pageout_burst_wait = 1;
+ vm_pageout_pause_count = 0;
+ }
+ vm_page_unlock_queues();
+
+#ifdef CONTINUATIONS
+ vm_pageout_continue();
+ /*NOTREACHED*/
+#endif /* CONTINUATIONS */
+}
+
+/*
+ * vm_pageout is the high level pageout daemon.
+ */
+
+void vm_pageout_continue()
+{
+ /*
+ * The pageout daemon is never done, so loop forever.
+ * We should call vm_pageout_scan at least once each
+ * time we are woken, even if vm_page_free_wanted is
+ * zero, to check vm_page_free_target and
+ * vm_page_inactive_target.
+ */
+
+ for (;;) {
+ vm_pageout_scan();
+ /* we hold vm_page_queue_free_lock now */
+ assert(vm_page_free_wanted == 0);
+
+ assert_wait(&vm_page_free_wanted, FALSE);
+ simple_unlock(&vm_page_queue_free_lock);
+ counter(c_vm_pageout_block++);
+ thread_block(vm_pageout_continue);
+ }
+}
+
+void vm_pageout()
+{
+ int free_after_reserve;
+
+ current_thread()->vm_privilege = TRUE;
+ stack_privilege(current_thread());
+
+ /*
+ * Initialize some paging parameters.
+ */
+
+ if (vm_pageout_burst_max == 0)
+ vm_pageout_burst_max = VM_PAGEOUT_BURST_MAX;
+
+ if (vm_pageout_burst_min == 0)
+ vm_pageout_burst_min = VM_PAGEOUT_BURST_MIN;
+
+ if (vm_pageout_burst_wait == 0)
+ vm_pageout_burst_wait = VM_PAGEOUT_BURST_WAIT;
+
+ if (vm_pageout_empty_wait == 0)
+ vm_pageout_empty_wait = VM_PAGEOUT_EMPTY_WAIT;
+
+ if (vm_page_free_reserved == 0)
+ vm_page_free_reserved = VM_PAGE_FREE_RESERVED;
+
+ if (vm_pageout_pause_max == 0)
+ vm_pageout_pause_max = VM_PAGEOUT_PAUSE_MAX;
+
+ if (vm_pageout_reserved_internal == 0)
+ vm_pageout_reserved_internal =
+ VM_PAGEOUT_RESERVED_INTERNAL(vm_page_free_reserved);
+
+ if (vm_pageout_reserved_really == 0)
+ vm_pageout_reserved_really =
+ VM_PAGEOUT_RESERVED_REALLY(vm_page_free_reserved);
+
+ free_after_reserve = vm_page_free_count - vm_page_free_reserved;
+
+ if (vm_page_free_min == 0)
+ vm_page_free_min = vm_page_free_reserved +
+ VM_PAGE_FREE_MIN(free_after_reserve);
+
+ if (vm_page_free_target == 0)
+ vm_page_free_target = vm_page_free_reserved +
+ VM_PAGE_FREE_TARGET(free_after_reserve);
+
+ if (vm_page_free_target < vm_page_free_min + 5)
+ vm_page_free_target = vm_page_free_min + 5;
+
+ /*
+ * vm_pageout_scan will set vm_page_inactive_target.
+ */
+
+ vm_pageout_continue();
+ /*NOTREACHED*/
+}
diff --git a/vm/vm_pageout.h b/vm/vm_pageout.h
new file mode 100644
index 00000000..5b47a5e0
--- /dev/null
+++ b/vm/vm_pageout.h
@@ -0,0 +1,46 @@
+/*
+ * Mach Operating System
+ * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
+ * All Rights Reserved.
+ *
+ * Permission to use, copy, modify and distribute this software and its
+ * documentation is hereby granted, provided that both the copyright
+ * notice and this permission notice appear in all copies of the
+ * software, derivative works or modified versions, and any portions
+ * thereof, and that both notices appear in supporting documentation.
+ *
+ * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
+ * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
+ * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
+ *
+ * Carnegie Mellon requests users of this software to return to
+ *
+ * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
+ * School of Computer Science
+ * Carnegie Mellon University
+ * Pittsburgh PA 15213-3890
+ *
+ * any improvements or extensions that they make and grant Carnegie Mellon
+ * the rights to redistribute these changes.
+ */
+/*
+ * File: vm/vm_pageout.h
+ * Author: Avadis Tevanian, Jr.
+ * Date: 1986
+ *
+ * Declarations for the pageout daemon interface.
+ */
+
+#ifndef _VM_VM_PAGEOUT_H_
+#define _VM_VM_PAGEOUT_H_
+
+#include <vm/vm_page.h>
+
+/*
+ * Exported routines.
+ */
+
+extern vm_page_t vm_pageout_setup();
+extern void vm_pageout_page();
+
+#endif _VM_VM_PAGEOUT_H_
diff --git a/vm/vm_resident.c b/vm/vm_resident.c
new file mode 100644
index 00000000..5c4f2822
--- /dev/null
+++ b/vm/vm_resident.c
@@ -0,0 +1,1505 @@
+/*
+ * Mach Operating System
+ * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University.
+ * Copyright (c) 1993,1994 The University of Utah and
+ * the Computer Systems Laboratory (CSL).
+ * All rights reserved.
+ *
+ * Permission to use, copy, modify and distribute this software and its
+ * documentation is hereby granted, provided that both the copyright
+ * notice and this permission notice appear in all copies of the
+ * software, derivative works or modified versions, and any portions
+ * thereof, and that both notices appear in supporting documentation.
+ *
+ * CARNEGIE MELLON, THE UNIVERSITY OF UTAH AND CSL ALLOW FREE USE OF
+ * THIS SOFTWARE IN ITS "AS IS" CONDITION, AND DISCLAIM ANY LIABILITY
+ * OF ANY KIND FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF
+ * THIS SOFTWARE.
+ *
+ * Carnegie Mellon requests users of this software to return to
+ *
+ * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
+ * School of Computer Science
+ * Carnegie Mellon University
+ * Pittsburgh PA 15213-3890
+ *
+ * any improvements or extensions that they make and grant Carnegie Mellon
+ * the rights to redistribute these changes.
+ */
+/*
+ * File: vm/vm_page.c
+ * Author: Avadis Tevanian, Jr., Michael Wayne Young
+ *
+ * Resident memory management module.
+ */
+#include <cpus.h>
+
+#include <mach/vm_prot.h>
+#include <kern/counters.h>
+#include <kern/sched_prim.h>
+#include <kern/task.h>
+#include <kern/thread.h>
+#include <mach/vm_statistics.h>
+#include "vm_param.h"
+#include <kern/xpr.h>
+#include <kern/zalloc.h>
+#include <vm/pmap.h>
+#include <vm/vm_map.h>
+#include <vm/vm_page.h>
+#include <vm/vm_pageout.h>
+#include <vm/vm_kern.h>
+
+#include <mach_vm_debug.h>
+#if MACH_VM_DEBUG
+#include <mach/kern_return.h>
+#include <mach_debug/hash_info.h>
+#include <vm/vm_user.h>
+#endif
+
+/* in zalloc.c XXX */
+extern vm_offset_t zdata;
+extern vm_size_t zdata_size;
+
+/*
+ * Associated with eacn page of user-allocatable memory is a
+ * page structure.
+ */
+
+/*
+ * These variables record the values returned by vm_page_bootstrap,
+ * for debugging purposes. The implementation of pmap_steal_memory
+ * and pmap_startup here also uses them internally.
+ */
+
+vm_offset_t virtual_space_start;
+vm_offset_t virtual_space_end;
+
+/*
+ * The vm_page_lookup() routine, which provides for fast
+ * (virtual memory object, offset) to page lookup, employs
+ * the following hash table. The vm_page_{insert,remove}
+ * routines install and remove associations in the table.
+ * [This table is often called the virtual-to-physical,
+ * or VP, table.]
+ */
+typedef struct {
+ decl_simple_lock_data(,lock)
+ vm_page_t pages;
+} vm_page_bucket_t;
+
+vm_page_bucket_t *vm_page_buckets; /* Array of buckets */
+unsigned int vm_page_bucket_count = 0; /* How big is array? */
+unsigned int vm_page_hash_mask; /* Mask for hash function */
+
+/*
+ * Resident page structures are initialized from
+ * a template (see vm_page_alloc).
+ *
+ * When adding a new field to the virtual memory
+ * object structure, be sure to add initialization
+ * (see vm_page_bootstrap).
+ */
+struct vm_page vm_page_template;
+
+/*
+ * Resident pages that represent real memory
+ * are allocated from a free list.
+ */
+vm_page_t vm_page_queue_free;
+vm_page_t vm_page_queue_fictitious;
+decl_simple_lock_data(,vm_page_queue_free_lock)
+unsigned int vm_page_free_wanted;
+int vm_page_free_count;
+int vm_page_fictitious_count;
+
+unsigned int vm_page_free_count_minimum; /* debugging */
+
+/*
+ * Occasionally, the virtual memory system uses
+ * resident page structures that do not refer to
+ * real pages, for example to leave a page with
+ * important state information in the VP table.
+ *
+ * These page structures are allocated the way
+ * most other kernel structures are.
+ */
+zone_t vm_page_zone;
+
+/*
+ * Fictitious pages don't have a physical address,
+ * but we must initialize phys_addr to something.
+ * For debugging, this should be a strange value
+ * that the pmap module can recognize in assertions.
+ */
+vm_offset_t vm_page_fictitious_addr = (vm_offset_t) -1;
+
+/*
+ * Resident page structures are also chained on
+ * queues that are used by the page replacement
+ * system (pageout daemon). These queues are
+ * defined here, but are shared by the pageout
+ * module.
+ */
+queue_head_t vm_page_queue_active;
+queue_head_t vm_page_queue_inactive;
+decl_simple_lock_data(,vm_page_queue_lock)
+int vm_page_active_count;
+int vm_page_inactive_count;
+int vm_page_wire_count;
+
+/*
+ * Several page replacement parameters are also
+ * shared with this module, so that page allocation
+ * (done here in vm_page_alloc) can trigger the
+ * pageout daemon.
+ */
+int vm_page_free_target = 0;
+int vm_page_free_min = 0;
+int vm_page_inactive_target = 0;
+int vm_page_free_reserved = 0;
+int vm_page_laundry_count = 0;
+
+/*
+ * The VM system has a couple of heuristics for deciding
+ * that pages are "uninteresting" and should be placed
+ * on the inactive queue as likely candidates for replacement.
+ * These variables let the heuristics be controlled at run-time
+ * to make experimentation easier.
+ */
+
+boolean_t vm_page_deactivate_behind = TRUE;
+boolean_t vm_page_deactivate_hint = TRUE;
+
+/*
+ * vm_page_bootstrap:
+ *
+ * Initializes the resident memory module.
+ *
+ * Allocates memory for the page cells, and
+ * for the object/offset-to-page hash table headers.
+ * Each page cell is initialized and placed on the free list.
+ * Returns the range of available kernel virtual memory.
+ */
+
+void vm_page_bootstrap(
+ vm_offset_t *startp,
+ vm_offset_t *endp)
+{
+ register vm_page_t m;
+ int i;
+
+ /*
+ * Initialize the vm_page template.
+ */
+
+ m = &vm_page_template;
+ m->object = VM_OBJECT_NULL; /* reset later */
+ m->offset = 0; /* reset later */
+ m->wire_count = 0;
+
+ m->inactive = FALSE;
+ m->active = FALSE;
+ m->laundry = FALSE;
+ m->free = FALSE;
+
+ m->busy = TRUE;
+ m->wanted = FALSE;
+ m->tabled = FALSE;
+ m->fictitious = FALSE;
+ m->private = FALSE;
+ m->absent = FALSE;
+ m->error = FALSE;
+ m->dirty = FALSE;
+ m->precious = FALSE;
+ m->reference = FALSE;
+
+ m->phys_addr = 0; /* reset later */
+
+ m->page_lock = VM_PROT_NONE;
+ m->unlock_request = VM_PROT_NONE;
+
+ /*
+ * Initialize the page queues.
+ */
+
+ simple_lock_init(&vm_page_queue_free_lock);
+ simple_lock_init(&vm_page_queue_lock);
+
+ vm_page_queue_free = VM_PAGE_NULL;
+ vm_page_queue_fictitious = VM_PAGE_NULL;
+ queue_init(&vm_page_queue_active);
+ queue_init(&vm_page_queue_inactive);
+
+ vm_page_free_wanted = 0;
+
+ /*
+ * Steal memory for the zone system.
+ */
+
+ kentry_data_size = kentry_count * sizeof(struct vm_map_entry);
+ kentry_data = pmap_steal_memory(kentry_data_size);
+
+ zdata = pmap_steal_memory(zdata_size);
+
+ /*
+ * Allocate (and initialize) the virtual-to-physical
+ * table hash buckets.
+ *
+ * The number of buckets should be a power of two to
+ * get a good hash function. The following computation
+ * chooses the first power of two that is greater
+ * than the number of physical pages in the system.
+ */
+
+ if (vm_page_bucket_count == 0) {
+ unsigned int npages = pmap_free_pages();
+
+ vm_page_bucket_count = 1;
+ while (vm_page_bucket_count < npages)
+ vm_page_bucket_count <<= 1;
+ }
+
+ vm_page_hash_mask = vm_page_bucket_count - 1;
+
+ if (vm_page_hash_mask & vm_page_bucket_count)
+ printf("vm_page_bootstrap: WARNING -- strange page hash\n");
+
+ vm_page_buckets = (vm_page_bucket_t *)
+ pmap_steal_memory(vm_page_bucket_count *
+ sizeof(vm_page_bucket_t));
+
+ for (i = 0; i < vm_page_bucket_count; i++) {
+ register vm_page_bucket_t *bucket = &vm_page_buckets[i];
+
+ bucket->pages = VM_PAGE_NULL;
+ simple_lock_init(&bucket->lock);
+ }
+
+ /*
+ * Machine-dependent code allocates the resident page table.
+ * It uses vm_page_init to initialize the page frames.
+ * The code also returns to us the virtual space available
+ * to the kernel. We don't trust the pmap module
+ * to get the alignment right.
+ */
+
+ pmap_startup(&virtual_space_start, &virtual_space_end);
+ virtual_space_start = round_page(virtual_space_start);
+ virtual_space_end = trunc_page(virtual_space_end);
+
+ *startp = virtual_space_start;
+ *endp = virtual_space_end;
+
+ printf("vm_page_bootstrap: %d free pages\n", vm_page_free_count);
+ vm_page_free_count_minimum = vm_page_free_count;
+}
+
+#ifndef MACHINE_PAGES
+/*
+ * We implement pmap_steal_memory and pmap_startup with the help
+ * of two simpler functions, pmap_virtual_space and pmap_next_page.
+ */
+
+vm_offset_t pmap_steal_memory(
+ vm_size_t size)
+{
+ vm_offset_t addr, vaddr, paddr;
+
+ /*
+ * We round the size to an integer multiple.
+ */
+
+ size = (size + 3) &~ 3;
+
+ /*
+ * If this is the first call to pmap_steal_memory,
+ * we have to initialize ourself.
+ */
+
+ if (virtual_space_start == virtual_space_end) {
+ pmap_virtual_space(&virtual_space_start, &virtual_space_end);
+
+ /*
+ * The initial values must be aligned properly, and
+ * we don't trust the pmap module to do it right.
+ */
+
+ virtual_space_start = round_page(virtual_space_start);
+ virtual_space_end = trunc_page(virtual_space_end);
+ }
+
+ /*
+ * Allocate virtual memory for this request.
+ */
+
+ addr = virtual_space_start;
+ virtual_space_start += size;
+
+ /*
+ * Allocate and map physical pages to back new virtual pages.
+ */
+
+ for (vaddr = round_page(addr);
+ vaddr < addr + size;
+ vaddr += PAGE_SIZE) {
+ if (!pmap_next_page(&paddr))
+ panic("pmap_steal_memory");
+
+ /*
+ * XXX Logically, these mappings should be wired,
+ * but some pmap modules barf if they are.
+ */
+
+ pmap_enter(kernel_pmap, vaddr, paddr,
+ VM_PROT_READ|VM_PROT_WRITE, FALSE);
+ }
+
+ return addr;
+}
+
+void pmap_startup(
+ vm_offset_t *startp,
+ vm_offset_t *endp)
+{
+ unsigned int i, npages, pages_initialized;
+ vm_page_t pages;
+ vm_offset_t paddr;
+
+ /*
+ * We calculate how many page frames we will have
+ * and then allocate the page structures in one chunk.
+ */
+
+ npages = ((PAGE_SIZE * pmap_free_pages() +
+ (round_page(virtual_space_start) - virtual_space_start)) /
+ (PAGE_SIZE + sizeof *pages));
+
+ pages = (vm_page_t) pmap_steal_memory(npages * sizeof *pages);
+
+ /*
+ * Initialize the page frames.
+ */
+
+ for (i = 0, pages_initialized = 0; i < npages; i++) {
+ if (!pmap_next_page(&paddr))
+ break;
+
+ vm_page_init(&pages[i], paddr);
+ pages_initialized++;
+ }
+
+ /*
+ * Release pages in reverse order so that physical pages
+ * initially get allocated in ascending addresses. This keeps
+ * the devices (which must address physical memory) happy if
+ * they require several consecutive pages.
+ */
+
+ for (i = pages_initialized; i > 0; i--) {
+ vm_page_release(&pages[i - 1]);
+ }
+
+ /*
+ * We have to re-align virtual_space_start,
+ * because pmap_steal_memory has been using it.
+ */
+
+ virtual_space_start = round_page(virtual_space_start);
+
+ *startp = virtual_space_start;
+ *endp = virtual_space_end;
+}
+#endif /* MACHINE_PAGES */
+
+/*
+ * Routine: vm_page_module_init
+ * Purpose:
+ * Second initialization pass, to be done after
+ * the basic VM system is ready.
+ */
+void vm_page_module_init(void)
+{
+ vm_page_zone = zinit((vm_size_t) sizeof(struct vm_page),
+ VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS,
+ PAGE_SIZE,
+ 0, "vm pages");
+}
+
+/*
+ * Routine: vm_page_create
+ * Purpose:
+ * After the VM system is up, machine-dependent code
+ * may stumble across more physical memory. For example,
+ * memory that it was reserving for a frame buffer.
+ * vm_page_create turns this memory into available pages.
+ */
+
+void vm_page_create(
+ vm_offset_t start,
+ vm_offset_t end)
+{
+ vm_offset_t paddr;
+ vm_page_t m;
+
+ for (paddr = round_page(start);
+ paddr < trunc_page(end);
+ paddr += PAGE_SIZE) {
+ m = (vm_page_t) zalloc(vm_page_zone);
+ if (m == VM_PAGE_NULL)
+ panic("vm_page_create");
+
+ vm_page_init(m, paddr);
+ vm_page_release(m);
+ }
+}
+
+/*
+ * vm_page_hash:
+ *
+ * Distributes the object/offset key pair among hash buckets.
+ *
+ * NOTE: To get a good hash function, the bucket count should
+ * be a power of two.
+ */
+#define vm_page_hash(object, offset) \
+ (((unsigned int)(vm_offset_t)object + (unsigned int)atop(offset)) \
+ & vm_page_hash_mask)
+
+/*
+ * vm_page_insert: [ internal use only ]
+ *
+ * Inserts the given mem entry into the object/object-page
+ * table and object list.
+ *
+ * The object and page must be locked.
+ */
+
+void vm_page_insert(
+ register vm_page_t mem,
+ register vm_object_t object,
+ register vm_offset_t offset)
+{
+ register vm_page_bucket_t *bucket;
+
+ VM_PAGE_CHECK(mem);
+
+ if (mem->tabled)
+ panic("vm_page_insert");
+
+ /*
+ * Record the object/offset pair in this page
+ */
+
+ mem->object = object;
+ mem->offset = offset;
+
+ /*
+ * Insert it into the object_object/offset hash table
+ */
+
+ bucket = &vm_page_buckets[vm_page_hash(object, offset)];
+ simple_lock(&bucket->lock);
+ mem->next = bucket->pages;
+ bucket->pages = mem;
+ simple_unlock(&bucket->lock);
+
+ /*
+ * Now link into the object's list of backed pages.
+ */
+
+ queue_enter(&object->memq, mem, vm_page_t, listq);
+ mem->tabled = TRUE;
+
+ /*
+ * Show that the object has one more resident page.
+ */
+
+ object->resident_page_count++;
+
+ /*
+ * Detect sequential access and inactivate previous page.
+ * We ignore busy pages.
+ */
+
+ if (vm_page_deactivate_behind &&
+ (offset == object->last_alloc + PAGE_SIZE)) {
+ vm_page_t last_mem;
+
+ last_mem = vm_page_lookup(object, object->last_alloc);
+ if ((last_mem != VM_PAGE_NULL) && !last_mem->busy)
+ vm_page_deactivate(last_mem);
+ }
+ object->last_alloc = offset;
+}
+
+/*
+ * vm_page_replace:
+ *
+ * Exactly like vm_page_insert, except that we first
+ * remove any existing page at the given offset in object
+ * and we don't do deactivate-behind.
+ *
+ * The object and page must be locked.
+ */
+
+void vm_page_replace(
+ register vm_page_t mem,
+ register vm_object_t object,
+ register vm_offset_t offset)
+{
+ register vm_page_bucket_t *bucket;
+
+ VM_PAGE_CHECK(mem);
+
+ if (mem->tabled)
+ panic("vm_page_replace");
+
+ /*
+ * Record the object/offset pair in this page
+ */
+
+ mem->object = object;
+ mem->offset = offset;
+
+ /*
+ * Insert it into the object_object/offset hash table,
+ * replacing any page that might have been there.
+ */
+
+ bucket = &vm_page_buckets[vm_page_hash(object, offset)];
+ simple_lock(&bucket->lock);
+ if (bucket->pages) {
+ vm_page_t *mp = &bucket->pages;
+ register vm_page_t m = *mp;
+ do {
+ if (m->object == object && m->offset == offset) {
+ /*
+ * Remove page from bucket and from object,
+ * and return it to the free list.
+ */
+ *mp = m->next;
+ queue_remove(&object->memq, m, vm_page_t,
+ listq);
+ m->tabled = FALSE;
+ object->resident_page_count--;
+
+ /*
+ * Return page to the free list.
+ * Note the page is not tabled now, so this
+ * won't self-deadlock on the bucket lock.
+ */
+
+ vm_page_free(m);
+ break;
+ }
+ mp = &m->next;
+ } while ((m = *mp) != 0);
+ mem->next = bucket->pages;
+ } else {
+ mem->next = VM_PAGE_NULL;
+ }
+ bucket->pages = mem;
+ simple_unlock(&bucket->lock);
+
+ /*
+ * Now link into the object's list of backed pages.
+ */
+
+ queue_enter(&object->memq, mem, vm_page_t, listq);
+ mem->tabled = TRUE;
+
+ /*
+ * And show that the object has one more resident
+ * page.
+ */
+
+ object->resident_page_count++;
+}
+
+/*
+ * vm_page_remove: [ internal use only ]
+ *
+ * Removes the given mem entry from the object/offset-page
+ * table and the object page list.
+ *
+ * The object and page must be locked.
+ */
+
+void vm_page_remove(
+ register vm_page_t mem)
+{
+ register vm_page_bucket_t *bucket;
+ register vm_page_t this;
+
+ assert(mem->tabled);
+ VM_PAGE_CHECK(mem);
+
+ /*
+ * Remove from the object_object/offset hash table
+ */
+
+ bucket = &vm_page_buckets[vm_page_hash(mem->object, mem->offset)];
+ simple_lock(&bucket->lock);
+ if ((this = bucket->pages) == mem) {
+ /* optimize for common case */
+
+ bucket->pages = mem->next;
+ } else {
+ register vm_page_t *prev;
+
+ for (prev = &this->next;
+ (this = *prev) != mem;
+ prev = &this->next)
+ continue;
+ *prev = this->next;
+ }
+ simple_unlock(&bucket->lock);
+
+ /*
+ * Now remove from the object's list of backed pages.
+ */
+
+ queue_remove(&mem->object->memq, mem, vm_page_t, listq);
+
+ /*
+ * And show that the object has one fewer resident
+ * page.
+ */
+
+ mem->object->resident_page_count--;
+
+ mem->tabled = FALSE;
+}
+
+/*
+ * vm_page_lookup:
+ *
+ * Returns the page associated with the object/offset
+ * pair specified; if none is found, VM_PAGE_NULL is returned.
+ *
+ * The object must be locked. No side effects.
+ */
+
+vm_page_t vm_page_lookup(
+ register vm_object_t object,
+ register vm_offset_t offset)
+{
+ register vm_page_t mem;
+ register vm_page_bucket_t *bucket;
+
+ /*
+ * Search the hash table for this object/offset pair
+ */
+
+ bucket = &vm_page_buckets[vm_page_hash(object, offset)];
+
+ simple_lock(&bucket->lock);
+ for (mem = bucket->pages; mem != VM_PAGE_NULL; mem = mem->next) {
+ VM_PAGE_CHECK(mem);
+ if ((mem->object == object) && (mem->offset == offset))
+ break;
+ }
+ simple_unlock(&bucket->lock);
+ return mem;
+}
+
+/*
+ * vm_page_rename:
+ *
+ * Move the given memory entry from its
+ * current object to the specified target object/offset.
+ *
+ * The object must be locked.
+ */
+void vm_page_rename(
+ register vm_page_t mem,
+ register vm_object_t new_object,
+ vm_offset_t new_offset)
+{
+ /*
+ * Changes to mem->object require the page lock because
+ * the pageout daemon uses that lock to get the object.
+ */
+
+ vm_page_lock_queues();
+ vm_page_remove(mem);
+ vm_page_insert(mem, new_object, new_offset);
+ vm_page_unlock_queues();
+}
+
+/*
+ * vm_page_init:
+ *
+ * Initialize the fields in a new page.
+ * This takes a structure with random values and initializes it
+ * so that it can be given to vm_page_release or vm_page_insert.
+ */
+void vm_page_init(
+ vm_page_t mem,
+ vm_offset_t phys_addr)
+{
+ *mem = vm_page_template;
+ mem->phys_addr = phys_addr;
+}
+
+/*
+ * vm_page_grab_fictitious:
+ *
+ * Remove a fictitious page from the free list.
+ * Returns VM_PAGE_NULL if there are no free pages.
+ */
+
+vm_page_t vm_page_grab_fictitious(void)
+{
+ register vm_page_t m;
+
+ simple_lock(&vm_page_queue_free_lock);
+ m = vm_page_queue_fictitious;
+ if (m != VM_PAGE_NULL) {
+ vm_page_fictitious_count--;
+ vm_page_queue_fictitious = (vm_page_t) m->pageq.next;
+ m->free = FALSE;
+ }
+ simple_unlock(&vm_page_queue_free_lock);
+
+ return m;
+}
+
+/*
+ * vm_page_release_fictitious:
+ *
+ * Release a fictitious page to the free list.
+ */
+
+void vm_page_release_fictitious(
+ register vm_page_t m)
+{
+ simple_lock(&vm_page_queue_free_lock);
+ if (m->free)
+ panic("vm_page_release_fictitious");
+ m->free = TRUE;
+ m->pageq.next = (queue_entry_t) vm_page_queue_fictitious;
+ vm_page_queue_fictitious = m;
+ vm_page_fictitious_count++;
+ simple_unlock(&vm_page_queue_free_lock);
+}
+
+/*
+ * vm_page_more_fictitious:
+ *
+ * Add more fictitious pages to the free list.
+ * Allowed to block.
+ */
+
+int vm_page_fictitious_quantum = 5;
+
+void vm_page_more_fictitious(void)
+{
+ register vm_page_t m;
+ int i;
+
+ for (i = 0; i < vm_page_fictitious_quantum; i++) {
+ m = (vm_page_t) zalloc(vm_page_zone);
+ if (m == VM_PAGE_NULL)
+ panic("vm_page_more_fictitious");
+
+ vm_page_init(m, vm_page_fictitious_addr);
+ m->fictitious = TRUE;
+ vm_page_release_fictitious(m);
+ }
+}
+
+/*
+ * vm_page_convert:
+ *
+ * Attempt to convert a fictitious page into a real page.
+ */
+
+boolean_t vm_page_convert(
+ register vm_page_t m)
+{
+ register vm_page_t real_m;
+
+ real_m = vm_page_grab();
+ if (real_m == VM_PAGE_NULL)
+ return FALSE;
+
+ m->phys_addr = real_m->phys_addr;
+ m->fictitious = FALSE;
+
+ real_m->phys_addr = vm_page_fictitious_addr;
+ real_m->fictitious = TRUE;
+
+ vm_page_release_fictitious(real_m);
+ return TRUE;
+}
+
+/*
+ * vm_page_grab:
+ *
+ * Remove a page from the free list.
+ * Returns VM_PAGE_NULL if the free list is too small.
+ */
+
+vm_page_t vm_page_grab(void)
+{
+ register vm_page_t mem;
+
+ simple_lock(&vm_page_queue_free_lock);
+
+ /*
+ * Only let privileged threads (involved in pageout)
+ * dip into the reserved pool.
+ */
+
+ if ((vm_page_free_count < vm_page_free_reserved) &&
+ !current_thread()->vm_privilege) {
+ simple_unlock(&vm_page_queue_free_lock);
+ return VM_PAGE_NULL;
+ }
+
+ if (vm_page_queue_free == VM_PAGE_NULL)
+ panic("vm_page_grab");
+
+ if (--vm_page_free_count < vm_page_free_count_minimum)
+ vm_page_free_count_minimum = vm_page_free_count;
+ mem = vm_page_queue_free;
+ vm_page_queue_free = (vm_page_t) mem->pageq.next;
+ mem->free = FALSE;
+ simple_unlock(&vm_page_queue_free_lock);
+
+ /*
+ * Decide if we should poke the pageout daemon.
+ * We do this if the free count is less than the low
+ * water mark, or if the free count is less than the high
+ * water mark (but above the low water mark) and the inactive
+ * count is less than its target.
+ *
+ * We don't have the counts locked ... if they change a little,
+ * it doesn't really matter.
+ */
+
+ if ((vm_page_free_count < vm_page_free_min) ||
+ ((vm_page_free_count < vm_page_free_target) &&
+ (vm_page_inactive_count < vm_page_inactive_target)))
+ thread_wakeup((event_t) &vm_page_free_wanted);
+
+ return mem;
+}
+
+vm_offset_t vm_page_grab_phys_addr(void)
+{
+ vm_page_t p = vm_page_grab();
+ if (p == VM_PAGE_NULL)
+ return -1;
+ else
+ return p->phys_addr;
+}
+
+/*
+ * vm_page_grab_contiguous_pages:
+ *
+ * Take N pages off the free list, the pages should
+ * cover a contiguous range of physical addresses.
+ * [Used by device drivers to cope with DMA limitations]
+ *
+ * Returns the page descriptors in ascending order, or
+ * Returns KERN_RESOURCE_SHORTAGE if it could not.
+ */
+
+/* Biggest phys page number for the pages we handle in VM */
+
+vm_size_t vm_page_big_pagenum = 0; /* Set this before call! */
+
+kern_return_t
+vm_page_grab_contiguous_pages(
+ int npages,
+ vm_page_t pages[],
+ natural_t *bits)
+{
+ register int first_set;
+ int size, alloc_size;
+ kern_return_t ret;
+ vm_page_t mem, prevmem;
+
+#ifndef NBBY
+#define NBBY 8 /* size in bits of sizeof()`s unity */
+#endif
+
+#define NBPEL (sizeof(natural_t)*NBBY)
+
+ size = (vm_page_big_pagenum + NBPEL - 1)
+ & ~(NBPEL - 1); /* in bits */
+
+ size = size / NBBY; /* in bytes */
+
+ /*
+ * If we are called before the VM system is fully functional
+ * the invoker must provide us with the work space. [one bit
+ * per page starting at phys 0 and up to vm_page_big_pagenum]
+ */
+ if (bits == 0) {
+ alloc_size = round_page(size);
+ if (kmem_alloc_wired(kernel_map,
+ (vm_offset_t *)&bits,
+ alloc_size)
+ != KERN_SUCCESS)
+ return KERN_RESOURCE_SHORTAGE;
+ } else
+ alloc_size = 0;
+
+ bzero(bits, size);
+
+ /*
+ * A very large granularity call, its rare so that is ok
+ */
+ simple_lock(&vm_page_queue_free_lock);
+
+ /*
+ * Do not dip into the reserved pool.
+ */
+
+ if (vm_page_free_count < vm_page_free_reserved) {
+ simple_unlock(&vm_page_queue_free_lock);
+ return KERN_RESOURCE_SHORTAGE;
+ }
+
+ /*
+ * First pass through, build a big bit-array of
+ * the pages that are free. It is not going to
+ * be too large anyways, in 4k we can fit info
+ * for 32k pages.
+ */
+ mem = vm_page_queue_free;
+ while (mem) {
+ register int word_index, bit_index;
+
+ bit_index = (mem->phys_addr >> PAGE_SHIFT);
+ word_index = bit_index / NBPEL;
+ bit_index = bit_index - (word_index * NBPEL);
+ bits[word_index] |= 1 << bit_index;
+
+ mem = (vm_page_t) mem->pageq.next;
+ }
+
+ /*
+ * Second loop. Scan the bit array for NPAGES
+ * contiguous bits. That gives us, if any,
+ * the range of pages we will be grabbing off
+ * the free list.
+ */
+ {
+ register int bits_so_far = 0, i;
+
+ first_set = 0;
+
+ for (i = 0; i < size; i += sizeof(natural_t)) {
+
+ register natural_t v = bits[i / sizeof(natural_t)];
+ register int bitpos;
+
+ /*
+ * Bitscan this one word
+ */
+ if (v) {
+ /*
+ * keep counting them beans ?
+ */
+ bitpos = 0;
+
+ if (bits_so_far) {
+count_ones:
+ while (v & 1) {
+ bitpos++;
+ /*
+ * got enough beans ?
+ */
+ if (++bits_so_far == npages)
+ goto found_em;
+ v >>= 1;
+ }
+ /* if we are being lucky, roll again */
+ if (bitpos == NBPEL)
+ continue;
+ }
+
+ /*
+ * search for beans here
+ */
+ bits_so_far = 0;
+count_zeroes:
+ while ((bitpos < NBPEL) && ((v & 1) == 0)) {
+ bitpos++;
+ v >>= 1;
+ }
+ if (v & 1) {
+ first_set = (i * NBBY) + bitpos;
+ goto count_ones;
+ }
+ }
+ /*
+ * No luck
+ */
+ bits_so_far = 0;
+ }
+ }
+
+ /*
+ * We could not find enough contiguous pages.
+ */
+not_found_em:
+ simple_unlock(&vm_page_queue_free_lock);
+
+ ret = KERN_RESOURCE_SHORTAGE;
+ goto out;
+
+ /*
+ * Final pass. Now we know which pages we want.
+ * Scan the list until we find them all, grab
+ * pages as we go. FIRST_SET tells us where
+ * in the bit-array our pages start.
+ */
+found_em:
+ vm_page_free_count -= npages;
+ if (vm_page_free_count < vm_page_free_count_minimum)
+ vm_page_free_count_minimum = vm_page_free_count;
+
+ {
+ register vm_offset_t first_phys, last_phys;
+
+ /* cache values for compare */
+ first_phys = first_set << PAGE_SHIFT;
+ last_phys = first_phys + (npages << PAGE_SHIFT);/* not included */
+
+ /* running pointers */
+ mem = vm_page_queue_free;
+ prevmem = VM_PAGE_NULL;
+
+ while (mem) {
+
+ register vm_offset_t addr;
+
+ addr = mem->phys_addr;
+
+ if ((addr >= first_phys) &&
+ (addr < last_phys)) {
+ if (prevmem)
+ prevmem->pageq.next = mem->pageq.next;
+ pages[(addr - first_phys) >> PAGE_SHIFT] = mem;
+ mem->free = FALSE;
+ /*
+ * Got them all ?
+ */
+ if (--npages == 0) break;
+ } else
+ prevmem = mem;
+
+ mem = (vm_page_t) mem->pageq.next;
+ }
+ }
+
+ simple_unlock(&vm_page_queue_free_lock);
+
+ /*
+ * Decide if we should poke the pageout daemon.
+ * We do this if the free count is less than the low
+ * water mark, or if the free count is less than the high
+ * water mark (but above the low water mark) and the inactive
+ * count is less than its target.
+ *
+ * We don't have the counts locked ... if they change a little,
+ * it doesn't really matter.
+ */
+
+ if ((vm_page_free_count < vm_page_free_min) ||
+ ((vm_page_free_count < vm_page_free_target) &&
+ (vm_page_inactive_count < vm_page_inactive_target)))
+ thread_wakeup(&vm_page_free_wanted);
+
+ ret = KERN_SUCCESS;
+out:
+ if (alloc_size)
+ kmem_free(kernel_map, (vm_offset_t) bits, alloc_size);
+
+ return ret;
+}
+
+/*
+ * vm_page_release:
+ *
+ * Return a page to the free list.
+ */
+
+void vm_page_release(
+ register vm_page_t mem)
+{
+ simple_lock(&vm_page_queue_free_lock);
+ if (mem->free)
+ panic("vm_page_release");
+ mem->free = TRUE;
+ mem->pageq.next = (queue_entry_t) vm_page_queue_free;
+ vm_page_queue_free = mem;
+ vm_page_free_count++;
+
+ /*
+ * Check if we should wake up someone waiting for page.
+ * But don't bother waking them unless they can allocate.
+ *
+ * We wakeup only one thread, to prevent starvation.
+ * Because the scheduling system handles wait queues FIFO,
+ * if we wakeup all waiting threads, one greedy thread
+ * can starve multiple niceguy threads. When the threads
+ * all wakeup, the greedy threads runs first, grabs the page,
+ * and waits for another page. It will be the first to run
+ * when the next page is freed.
+ *
+ * However, there is a slight danger here.
+ * The thread we wake might not use the free page.
+ * Then the other threads could wait indefinitely
+ * while the page goes unused. To forestall this,
+ * the pageout daemon will keep making free pages
+ * as long as vm_page_free_wanted is non-zero.
+ */
+
+ if ((vm_page_free_wanted > 0) &&
+ (vm_page_free_count >= vm_page_free_reserved)) {
+ vm_page_free_wanted--;
+ thread_wakeup_one((event_t) &vm_page_free_count);
+ }
+
+ simple_unlock(&vm_page_queue_free_lock);
+}
+
+/*
+ * vm_page_wait:
+ *
+ * Wait for a page to become available.
+ * If there are plenty of free pages, then we don't sleep.
+ */
+
+void vm_page_wait(
+ void (*continuation)(void))
+{
+
+#ifndef CONTINUATIONS
+ assert (continuation == 0);
+#endif
+
+ /*
+ * We can't use vm_page_free_reserved to make this
+ * determination. Consider: some thread might
+ * need to allocate two pages. The first allocation
+ * succeeds, the second fails. After the first page is freed,
+ * a call to vm_page_wait must really block.
+ */
+
+ simple_lock(&vm_page_queue_free_lock);
+ if (vm_page_free_count < vm_page_free_target) {
+ if (vm_page_free_wanted++ == 0)
+ thread_wakeup((event_t)&vm_page_free_wanted);
+ assert_wait((event_t)&vm_page_free_count, FALSE);
+ simple_unlock(&vm_page_queue_free_lock);
+ if (continuation != 0) {
+ counter(c_vm_page_wait_block_user++);
+ thread_block(continuation);
+ } else {
+ counter(c_vm_page_wait_block_kernel++);
+ thread_block((void (*)(void)) 0);
+ }
+ } else
+ simple_unlock(&vm_page_queue_free_lock);
+}
+
+/*
+ * vm_page_alloc:
+ *
+ * Allocate and return a memory cell associated
+ * with this VM object/offset pair.
+ *
+ * Object must be locked.
+ */
+
+vm_page_t vm_page_alloc(
+ vm_object_t object,
+ vm_offset_t offset)
+{
+ register vm_page_t mem;
+
+ mem = vm_page_grab();
+ if (mem == VM_PAGE_NULL)
+ return VM_PAGE_NULL;
+
+ vm_page_lock_queues();
+ vm_page_insert(mem, object, offset);
+ vm_page_unlock_queues();
+
+ return mem;
+}
+
+/*
+ * vm_page_free:
+ *
+ * Returns the given page to the free list,
+ * disassociating it with any VM object.
+ *
+ * Object and page queues must be locked prior to entry.
+ */
+void vm_page_free(
+ register vm_page_t mem)
+{
+ if (mem->free)
+ panic("vm_page_free");
+
+ if (mem->tabled)
+ vm_page_remove(mem);
+ VM_PAGE_QUEUES_REMOVE(mem);
+
+ if (mem->wire_count != 0) {
+ if (!mem->private && !mem->fictitious)
+ vm_page_wire_count--;
+ mem->wire_count = 0;
+ }
+
+ if (mem->laundry) {
+ vm_page_laundry_count--;
+ mem->laundry = FALSE;
+ }
+
+ PAGE_WAKEUP_DONE(mem);
+
+ if (mem->absent)
+ vm_object_absent_release(mem->object);
+
+ /*
+ * XXX The calls to vm_page_init here are
+ * really overkill.
+ */
+
+ if (mem->private || mem->fictitious) {
+ vm_page_init(mem, vm_page_fictitious_addr);
+ mem->fictitious = TRUE;
+ vm_page_release_fictitious(mem);
+ } else {
+ vm_page_init(mem, mem->phys_addr);
+ vm_page_release(mem);
+ }
+}
+
+/*
+ * vm_page_wire:
+ *
+ * Mark this page as wired down by yet
+ * another map, removing it from paging queues
+ * as necessary.
+ *
+ * The page's object and the page queues must be locked.
+ */
+void vm_page_wire(
+ register vm_page_t mem)
+{
+ VM_PAGE_CHECK(mem);
+
+ if (mem->wire_count == 0) {
+ VM_PAGE_QUEUES_REMOVE(mem);
+ if (!mem->private && !mem->fictitious)
+ vm_page_wire_count++;
+ }
+ mem->wire_count++;
+}
+
+/*
+ * vm_page_unwire:
+ *
+ * Release one wiring of this page, potentially
+ * enabling it to be paged again.
+ *
+ * The page's object and the page queues must be locked.
+ */
+void vm_page_unwire(
+ register vm_page_t mem)
+{
+ VM_PAGE_CHECK(mem);
+
+ if (--mem->wire_count == 0) {
+ queue_enter(&vm_page_queue_active, mem, vm_page_t, pageq);
+ vm_page_active_count++;
+ mem->active = TRUE;
+ if (!mem->private && !mem->fictitious)
+ vm_page_wire_count--;
+ }
+}
+
+/*
+ * vm_page_deactivate:
+ *
+ * Returns the given page to the inactive list,
+ * indicating that no physical maps have access
+ * to this page. [Used by the physical mapping system.]
+ *
+ * The page queues must be locked.
+ */
+void vm_page_deactivate(
+ register vm_page_t m)
+{
+ VM_PAGE_CHECK(m);
+
+ /*
+ * This page is no longer very interesting. If it was
+ * interesting (active or inactive/referenced), then we
+ * clear the reference bit and (re)enter it in the
+ * inactive queue. Note wired pages should not have
+ * their reference bit cleared.
+ */
+
+ if (m->active || (m->inactive && m->reference)) {
+ if (!m->fictitious && !m->absent)
+ pmap_clear_reference(m->phys_addr);
+ m->reference = FALSE;
+ VM_PAGE_QUEUES_REMOVE(m);
+ }
+ if (m->wire_count == 0 && !m->inactive) {
+ queue_enter(&vm_page_queue_inactive, m, vm_page_t, pageq);
+ m->inactive = TRUE;
+ vm_page_inactive_count++;
+ }
+}
+
+/*
+ * vm_page_activate:
+ *
+ * Put the specified page on the active list (if appropriate).
+ *
+ * The page queues must be locked.
+ */
+
+void vm_page_activate(
+ register vm_page_t m)
+{
+ VM_PAGE_CHECK(m);
+
+ if (m->inactive) {
+ queue_remove(&vm_page_queue_inactive, m, vm_page_t,
+ pageq);
+ vm_page_inactive_count--;
+ m->inactive = FALSE;
+ }
+ if (m->wire_count == 0) {
+ if (m->active)
+ panic("vm_page_activate: already active");
+
+ queue_enter(&vm_page_queue_active, m, vm_page_t, pageq);
+ m->active = TRUE;
+ vm_page_active_count++;
+ }
+}
+
+/*
+ * vm_page_zero_fill:
+ *
+ * Zero-fill the specified page.
+ */
+void vm_page_zero_fill(
+ vm_page_t m)
+{
+ VM_PAGE_CHECK(m);
+
+ pmap_zero_page(m->phys_addr);
+}
+
+/*
+ * vm_page_copy:
+ *
+ * Copy one page to another
+ */
+
+void vm_page_copy(
+ vm_page_t src_m,
+ vm_page_t dest_m)
+{
+ VM_PAGE_CHECK(src_m);
+ VM_PAGE_CHECK(dest_m);
+
+ pmap_copy_page(src_m->phys_addr, dest_m->phys_addr);
+}
+
+#if MACH_VM_DEBUG
+/*
+ * Routine: vm_page_info
+ * Purpose:
+ * Return information about the global VP table.
+ * Fills the buffer with as much information as possible
+ * and returns the desired size of the buffer.
+ * Conditions:
+ * Nothing locked. The caller should provide
+ * possibly-pageable memory.
+ */
+
+unsigned int
+vm_page_info(
+ hash_info_bucket_t *info,
+ unsigned int count)
+{
+ int i;
+
+ if (vm_page_bucket_count < count)
+ count = vm_page_bucket_count;
+
+ for (i = 0; i < count; i++) {
+ vm_page_bucket_t *bucket = &vm_page_buckets[i];
+ unsigned int bucket_count = 0;
+ vm_page_t m;
+
+ simple_lock(&bucket->lock);
+ for (m = bucket->pages; m != VM_PAGE_NULL; m = m->next)
+ bucket_count++;
+ simple_unlock(&bucket->lock);
+
+ /* don't touch pageable memory while holding locks */
+ info[i].hib_count = bucket_count;
+ }
+
+ return vm_page_bucket_count;
+}
+#endif /* MACH_VM_DEBUG */
+
+#include <mach_kdb.h>
+#if MACH_KDB
+#define printf kdbprintf
+
+/*
+ * Routine: vm_page_print [exported]
+ */
+void vm_page_print(p)
+ vm_page_t p;
+{
+ iprintf("Page 0x%X: object 0x%X,", (vm_offset_t) p, (vm_offset_t) p->object);
+ printf(" offset 0x%X", (vm_offset_t) p->offset);
+ printf("wire_count %d,", p->wire_count);
+ printf(" %s",
+ (p->active ? "active" : (p->inactive ? "inactive" : "loose")));
+ printf("%s",
+ (p->free ? " free" : ""));
+ printf("%s ",
+ (p->laundry ? " laundry" : ""));
+ printf("%s",
+ (p->dirty ? "dirty" : "clean"));
+ printf("%s",
+ (p->busy ? " busy" : ""));
+ printf("%s",
+ (p->absent ? " absent" : ""));
+ printf("%s",
+ (p->error ? " error" : ""));
+ printf("%s",
+ (p->fictitious ? " fictitious" : ""));
+ printf("%s",
+ (p->private ? " private" : ""));
+ printf("%s",
+ (p->wanted ? " wanted" : ""));
+ printf("%s,",
+ (p->tabled ? "" : "not_tabled"));
+ printf("phys_addr = 0x%X, lock = 0x%X, unlock_request = 0x%X\n",
+ (vm_offset_t) p->phys_addr,
+ (vm_offset_t) p->page_lock,
+ (vm_offset_t) p->unlock_request);
+}
+#endif /* MACH_KDB */
diff --git a/vm/vm_user.c b/vm/vm_user.c
new file mode 100644
index 00000000..ebe98449
--- /dev/null
+++ b/vm/vm_user.c
@@ -0,0 +1,397 @@
+/*
+ * Mach Operating System
+ * Copyright (c) 1991,1990,1989,1988 Carnegie Mellon University
+ * All Rights Reserved.
+ *
+ * Permission to use, copy, modify and distribute this software and its
+ * documentation is hereby granted, provided that both the copyright
+ * notice and this permission notice appear in all copies of the
+ * software, derivative works or modified versions, and any portions
+ * thereof, and that both notices appear in supporting documentation.
+ *
+ * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
+ * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
+ * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
+ *
+ * Carnegie Mellon requests users of this software to return to
+ *
+ * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
+ * School of Computer Science
+ * Carnegie Mellon University
+ * Pittsburgh PA 15213-3890
+ *
+ * any improvements or extensions that they make and grant Carnegie Mellon
+ * the rights to redistribute these changes.
+ */
+/*
+ * File: vm/vm_user.c
+ * Author: Avadis Tevanian, Jr., Michael Wayne Young
+ *
+ * User-exported virtual memory functions.
+ */
+
+#include <mach/boolean.h>
+#include <mach/kern_return.h>
+#include <mach/mach_types.h> /* to get vm_address_t */
+#include <mach/memory_object.h>
+#include <mach/std_types.h> /* to get pointer_t */
+#include <mach/vm_attributes.h>
+#include <mach/vm_param.h>
+#include <mach/vm_statistics.h>
+#include <kern/host.h>
+#include <kern/task.h>
+#include <vm/vm_fault.h>
+#include <vm/vm_map.h>
+#include <vm/vm_object.h>
+#include <vm/vm_page.h>
+
+
+
+vm_statistics_data_t vm_stat;
+
+/*
+ * vm_allocate allocates "zero fill" memory in the specfied
+ * map.
+ */
+kern_return_t vm_allocate(map, addr, size, anywhere)
+ register vm_map_t map;
+ register vm_offset_t *addr;
+ register vm_size_t size;
+ boolean_t anywhere;
+{
+ kern_return_t result;
+
+ if (map == VM_MAP_NULL)
+ return(KERN_INVALID_ARGUMENT);
+ if (size == 0) {
+ *addr = 0;
+ return(KERN_SUCCESS);
+ }
+
+ if (anywhere)
+ *addr = vm_map_min(map);
+ else
+ *addr = trunc_page(*addr);
+ size = round_page(size);
+
+ result = vm_map_enter(
+ map,
+ addr,
+ size,
+ (vm_offset_t)0,
+ anywhere,
+ VM_OBJECT_NULL,
+ (vm_offset_t)0,
+ FALSE,
+ VM_PROT_DEFAULT,
+ VM_PROT_ALL,
+ VM_INHERIT_DEFAULT);
+
+ return(result);
+}
+
+/*
+ * vm_deallocate deallocates the specified range of addresses in the
+ * specified address map.
+ */
+kern_return_t vm_deallocate(map, start, size)
+ register vm_map_t map;
+ vm_offset_t start;
+ vm_size_t size;
+{
+ if (map == VM_MAP_NULL)
+ return(KERN_INVALID_ARGUMENT);
+
+ if (size == (vm_offset_t) 0)
+ return(KERN_SUCCESS);
+
+ return(vm_map_remove(map, trunc_page(start), round_page(start+size)));
+}
+
+/*
+ * vm_inherit sets the inheritance of the specified range in the
+ * specified map.
+ */
+kern_return_t vm_inherit(map, start, size, new_inheritance)
+ register vm_map_t map;
+ vm_offset_t start;
+ vm_size_t size;
+ vm_inherit_t new_inheritance;
+{
+ if (map == VM_MAP_NULL)
+ return(KERN_INVALID_ARGUMENT);
+
+ switch (new_inheritance) {
+ case VM_INHERIT_NONE:
+ case VM_INHERIT_COPY:
+ case VM_INHERIT_SHARE:
+ break;
+ default:
+ return(KERN_INVALID_ARGUMENT);
+ }
+
+ /*Check if range includes projected buffer;
+ user is not allowed direct manipulation in that case*/
+ if (projected_buffer_in_range(map, start, start+size))
+ return(KERN_INVALID_ARGUMENT);
+
+ return(vm_map_inherit(map,
+ trunc_page(start),
+ round_page(start+size),
+ new_inheritance));
+}
+
+/*
+ * vm_protect sets the protection of the specified range in the
+ * specified map.
+ */
+
+kern_return_t vm_protect(map, start, size, set_maximum, new_protection)
+ register vm_map_t map;
+ vm_offset_t start;
+ vm_size_t size;
+ boolean_t set_maximum;
+ vm_prot_t new_protection;
+{
+ if ((map == VM_MAP_NULL) ||
+ (new_protection & ~(VM_PROT_ALL|VM_PROT_NOTIFY)))
+ return(KERN_INVALID_ARGUMENT);
+
+ /*Check if range includes projected buffer;
+ user is not allowed direct manipulation in that case*/
+ if (projected_buffer_in_range(map, start, start+size))
+ return(KERN_INVALID_ARGUMENT);
+
+ return(vm_map_protect(map,
+ trunc_page(start),
+ round_page(start+size),
+ new_protection,
+ set_maximum));
+}
+
+kern_return_t vm_statistics(map, stat)
+ vm_map_t map;
+ vm_statistics_data_t *stat;
+{
+ if (map == VM_MAP_NULL)
+ return(KERN_INVALID_ARGUMENT);
+
+ *stat = vm_stat;
+
+ stat->pagesize = PAGE_SIZE;
+ stat->free_count = vm_page_free_count;
+ stat->active_count = vm_page_active_count;
+ stat->inactive_count = vm_page_inactive_count;
+ stat->wire_count = vm_page_wire_count;
+
+ return(KERN_SUCCESS);
+}
+
+/*
+ * Handle machine-specific attributes for a mapping, such
+ * as cachability, migrability, etc.
+ */
+kern_return_t vm_machine_attribute(map, address, size, attribute, value)
+ vm_map_t map;
+ vm_address_t address;
+ vm_size_t size;
+ vm_machine_attribute_t attribute;
+ vm_machine_attribute_val_t* value; /* IN/OUT */
+{
+ extern kern_return_t vm_map_machine_attribute();
+
+ if (map == VM_MAP_NULL)
+ return(KERN_INVALID_ARGUMENT);
+
+ /*Check if range includes projected buffer;
+ user is not allowed direct manipulation in that case*/
+ if (projected_buffer_in_range(map, address, address+size))
+ return(KERN_INVALID_ARGUMENT);
+
+ return vm_map_machine_attribute(map, address, size, attribute, value);
+}
+
+kern_return_t vm_read(map, address, size, data, data_size)
+ vm_map_t map;
+ vm_address_t address;
+ vm_size_t size;
+ pointer_t *data;
+ vm_size_t *data_size;
+{
+ kern_return_t error;
+ vm_map_copy_t ipc_address;
+
+ if (map == VM_MAP_NULL)
+ return(KERN_INVALID_ARGUMENT);
+
+ if ((error = vm_map_copyin(map,
+ address,
+ size,
+ FALSE, /* src_destroy */
+ &ipc_address)) == KERN_SUCCESS) {
+ *data = (pointer_t) ipc_address;
+ *data_size = size;
+ }
+ return(error);
+}
+
+kern_return_t vm_write(map, address, data, size)
+ vm_map_t map;
+ vm_address_t address;
+ pointer_t data;
+ vm_size_t size;
+{
+ if (map == VM_MAP_NULL)
+ return KERN_INVALID_ARGUMENT;
+
+ return vm_map_copy_overwrite(map, address, (vm_map_copy_t) data,
+ FALSE /* interruptible XXX */);
+}
+
+kern_return_t vm_copy(map, source_address, size, dest_address)
+ vm_map_t map;
+ vm_address_t source_address;
+ vm_size_t size;
+ vm_address_t dest_address;
+{
+ vm_map_copy_t copy;
+ kern_return_t kr;
+
+ if (map == VM_MAP_NULL)
+ return KERN_INVALID_ARGUMENT;
+
+ kr = vm_map_copyin(map, source_address, size,
+ FALSE, &copy);
+ if (kr != KERN_SUCCESS)
+ return kr;
+
+ kr = vm_map_copy_overwrite(map, dest_address, copy,
+ FALSE /* interruptible XXX */);
+ if (kr != KERN_SUCCESS) {
+ vm_map_copy_discard(copy);
+ return kr;
+ }
+
+ return KERN_SUCCESS;
+}
+
+/*
+ * Routine: vm_map
+ */
+kern_return_t vm_map(
+ target_map,
+ address, size, mask, anywhere,
+ memory_object, offset,
+ copy,
+ cur_protection, max_protection, inheritance)
+ vm_map_t target_map;
+ vm_offset_t *address;
+ vm_size_t size;
+ vm_offset_t mask;
+ boolean_t anywhere;
+ ipc_port_t memory_object;
+ vm_offset_t offset;
+ boolean_t copy;
+ vm_prot_t cur_protection;
+ vm_prot_t max_protection;
+ vm_inherit_t inheritance;
+{
+ register
+ vm_object_t object;
+ register
+ kern_return_t result;
+
+ if ((target_map == VM_MAP_NULL) ||
+ (cur_protection & ~VM_PROT_ALL) ||
+ (max_protection & ~VM_PROT_ALL))
+ return(KERN_INVALID_ARGUMENT);
+
+ switch (inheritance) {
+ case VM_INHERIT_NONE:
+ case VM_INHERIT_COPY:
+ case VM_INHERIT_SHARE:
+ break;
+ default:
+ return(KERN_INVALID_ARGUMENT);
+ }
+
+ *address = trunc_page(*address);
+ size = round_page(size);
+
+ if (!IP_VALID(memory_object)) {
+ object = VM_OBJECT_NULL;
+ offset = 0;
+ copy = FALSE;
+ } else if ((object = vm_object_enter(memory_object, size, FALSE))
+ == VM_OBJECT_NULL)
+ return(KERN_INVALID_ARGUMENT);
+
+ /*
+ * Perform the copy if requested
+ */
+
+ if (copy) {
+ vm_object_t new_object;
+ vm_offset_t new_offset;
+
+ result = vm_object_copy_strategically(object, offset, size,
+ &new_object, &new_offset,
+ &copy);
+
+ /*
+ * Throw away the reference to the
+ * original object, as it won't be mapped.
+ */
+
+ vm_object_deallocate(object);
+
+ if (result != KERN_SUCCESS)
+ return (result);
+
+ object = new_object;
+ offset = new_offset;
+ }
+
+ if ((result = vm_map_enter(target_map,
+ address, size, mask, anywhere,
+ object, offset,
+ copy,
+ cur_protection, max_protection, inheritance
+ )) != KERN_SUCCESS)
+ vm_object_deallocate(object);
+ return(result);
+}
+
+/*
+ * Specify that the range of the virtual address space
+ * of the target task must not cause page faults for
+ * the indicated accesses.
+ *
+ * [ To unwire the pages, specify VM_PROT_NONE. ]
+ */
+kern_return_t vm_wire(host, map, start, size, access)
+ host_t host;
+ register vm_map_t map;
+ vm_offset_t start;
+ vm_size_t size;
+ vm_prot_t access;
+{
+ if (host == HOST_NULL)
+ return KERN_INVALID_HOST;
+
+ if (map == VM_MAP_NULL)
+ return KERN_INVALID_TASK;
+
+ if (access & ~VM_PROT_ALL)
+ return KERN_INVALID_ARGUMENT;
+
+ /*Check if range includes projected buffer;
+ user is not allowed direct manipulation in that case*/
+ if (projected_buffer_in_range(map, start, start+size))
+ return(KERN_INVALID_ARGUMENT);
+
+ return vm_map_pageable_user(map,
+ trunc_page(start),
+ round_page(start+size),
+ access);
+}
diff --git a/vm/vm_user.h b/vm/vm_user.h
new file mode 100644
index 00000000..f8740107
--- /dev/null
+++ b/vm/vm_user.h
@@ -0,0 +1,50 @@
+/*
+ * Mach Operating System
+ * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
+ * All Rights Reserved.
+ *
+ * Permission to use, copy, modify and distribute this software and its
+ * documentation is hereby granted, provided that both the copyright
+ * notice and this permission notice appear in all copies of the
+ * software, derivative works or modified versions, and any portions
+ * thereof, and that both notices appear in supporting documentation.
+ *
+ * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
+ * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
+ * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
+ *
+ * Carnegie Mellon requests users of this software to return to
+ *
+ * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
+ * School of Computer Science
+ * Carnegie Mellon University
+ * Pittsburgh PA 15213-3890
+ *
+ * any improvements or extensions that they make and grant Carnegie Mellon
+ * the rights to redistribute these changes.
+ */
+/*
+ * File: vm/vm_user.h
+ * Author: Avadis Tevanian, Jr., Michael Wayne Young
+ * Date: 1986
+ *
+ * Declarations of user-visible virtual address space
+ * management functionality.
+ */
+
+#ifndef _VM_VM_USER_H_
+#define _VM_VM_USER_H_
+
+#include <mach/kern_return.h>
+
+extern kern_return_t vm_allocate();
+extern kern_return_t vm_deallocate();
+extern kern_return_t vm_inherit();
+extern kern_return_t vm_protect();
+extern kern_return_t vm_statistics();
+extern kern_return_t vm_read();
+extern kern_return_t vm_write();
+extern kern_return_t vm_copy();
+extern kern_return_t vm_map();
+
+#endif _VM_VM_USER_H_