From 7b87e10b89a190bca0d9e6c5e183984411840dc3 Mon Sep 17 00:00:00 2001 From: Sergey Bugaev Date: Fri, 5 Apr 2024 18:18:48 +0300 Subject: vm: Fix use-after-free in vm_map_pageable_scan() When operating on the kernel map, vm_map_pageable_scan() does what the code itself describes as "HACK HACK HACK HACK": it unlocks the map, and calls vm_fault_wire() with the map unlocked. This hack is required to avoid a deadlock in case vm_fault or one of its callees (perhaps, a pager) needs to allocate memory in the kernel map. The hack relies on other kernel code being "well-behaved", in particular on that nothing will do any serious changes to this region of memory while the map is unlocked, since this region of memory is "owned" by the caller. This reasoning doesn't apply to the validity of the 'end' entry (the first entry after the region to be wired), since it's not a part of the region, and is "owned" by someone else. Once the map is unlocked, the 'end' entry could get deallocated. Alternatively, a different entry could get inserted after the VM region in front of 'end', which would break the 'for (entry = start; entry != end; entry = entry->vme_next)' loop condition. This was not an issue in the original Mach 3 kernel, since it used an address range check for the loop condition, but got broken in commit 023401c5b97023670a44059a60eb2a3a11c8a929 "VM: rework map entry wiring". Fix this by switching the iteration back to use an address check. This partly fixes a deadlock with concurrent mach_port_names() calls on SMP, which was Reported-by: Damien Zammit Message-ID: <20240405151850.41633-1-bugaevc@gmail.com> --- vm/vm_map.c | 26 ++++++++++++++++---------- 1 file changed, 16 insertions(+), 10 deletions(-) (limited to 'vm/vm_map.c') diff --git a/vm/vm_map.c b/vm/vm_map.c index 7db76b7b..6c06f064 100644 --- a/vm/vm_map.c +++ b/vm/vm_map.c @@ -1419,13 +1419,13 @@ vm_map_entry_reset_wired(vm_map_t map, vm_map_entry_t entry) * is downgraded to a read lock. The caller should always consider * the map read locked on return. */ -static void -vm_map_pageable_scan(struct vm_map *map, - struct vm_map_entry *start, - struct vm_map_entry *end) +static void vm_map_pageable_scan( + vm_map_t map, + vm_map_entry_t start_entry, + vm_offset_t end) { - struct vm_map_entry *entry; - boolean_t do_wire_faults; + vm_map_entry_t entry; + boolean_t do_wire_faults; /* * Pass 1. Update counters and prepare wiring faults. @@ -1433,7 +1433,10 @@ vm_map_pageable_scan(struct vm_map *map, do_wire_faults = FALSE; - for (entry = start; entry != end; entry = entry->vme_next) { + for (entry = start_entry; + (entry != vm_map_to_entry(map)) && + (entry->vme_start < end); + entry = entry->vme_next) { /* * Unwiring. @@ -1558,7 +1561,10 @@ vm_map_pageable_scan(struct vm_map *map, vm_map_lock_write_to_read(map); } - for (entry = start; entry != end; entry = entry->vme_next) { + for (entry = start_entry; + (entry != vm_map_to_entry(map)) && + (entry->vme_end <= end); + entry = entry->vme_next) { /* * The wiring count can only be 1 if it was * incremented by this function right before @@ -1683,7 +1689,7 @@ kern_return_t vm_map_protect( current = next; /* Returns with the map read-locked if successful */ - vm_map_pageable_scan(map, entry, current); + vm_map_pageable_scan(map, entry, end); vm_map_unlock(map); return(KERN_SUCCESS); @@ -1822,7 +1828,7 @@ kern_return_t vm_map_pageable( } /* Returns with the map read-locked */ - vm_map_pageable_scan(map, start_entry, end_entry); + vm_map_pageable_scan(map, start_entry, end); if (lock_map) { vm_map_unlock(map); -- cgit v1.2.3