aboutsummaryrefslogtreecommitdiff
path: root/ext2fs/pager.c
diff options
context:
space:
mode:
Diffstat (limited to 'ext2fs/pager.c')
-rw-r--r--ext2fs/pager.c301
1 files changed, 199 insertions, 102 deletions
diff --git a/ext2fs/pager.c b/ext2fs/pager.c
index dfc43a9f..0136f9b1 100644
--- a/ext2fs/pager.c
+++ b/ext2fs/pager.c
@@ -1,8 +1,8 @@
/* Pager for ext2fs
- Copyright (C) 1994, 1995, 1996 Free Software Foundation, Inc.
+ Copyright (C) 1994,95,96,97,98,99,2000,02 Free Software Foundation, Inc.
- Converted for ext2fs by Miles Bader <miles@gnu.ai.mit.edu>
+ Converted for ext2fs by Miles Bader <miles@gnu.org>
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License as
@@ -18,12 +18,17 @@
along with this program; if not, write to the Free Software
Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
-#include <strings.h>
+#include <string.h>
+#include <errno.h>
+#include <hurd/store.h>
#include "ext2fs.h"
/* A ports bucket to hold pager ports. */
struct port_bucket *pager_bucket;
+/* Mapped image of the disk. */
+void *disk_image;
+
spin_lock_t node_to_page_lock = SPIN_LOCK_INITIALIZER;
#ifdef DONT_CACHE_MEMORY_OBJECTS
@@ -32,13 +37,91 @@ spin_lock_t node_to_page_lock = SPIN_LOCK_INITIALIZER;
#define MAY_CACHE 1
#endif
-/* ---------------------------------------------------------------- */
+#define STATS
+
+#ifdef STATS
+struct ext2fs_pager_stats
+{
+ spin_lock_t lock;
+
+ unsigned long disk_pageins;
+ unsigned long disk_pageouts;
+
+ unsigned long file_pageins;
+ unsigned long file_pagein_reads; /* Device reads done by file pagein */
+ unsigned long file_pagein_freed_bufs; /* Discarded pages */
+ unsigned long file_pagein_alloced_bufs; /* Allocated pages */
+
+ unsigned long file_pageouts;
+
+ unsigned long file_page_unlocks;
+ unsigned long file_grows;
+};
+
+static struct ext2fs_pager_stats ext2s_pager_stats;
+
+#define STAT_INC(field) \
+do { spin_lock (&ext2s_pager_stats.lock); \
+ ext2s_pager_stats.field++; \
+ spin_unlock (&ext2s_pager_stats.lock); } while (0)
+#else /* !STATS */
+#define STAT_INC(field) /* nop */0
+#endif /* STATS */
+
+#define FREE_PAGE_BUFS 24
+
+/* Returns a single page page-aligned buffer. */
+static void *
+get_page_buf ()
+{
+ static struct mutex free_page_bufs_lock = MUTEX_INITIALIZER;
+ static void *free_page_bufs;
+ static int num_free_page_bufs;
+ void *buf;
+
+ mutex_lock (&free_page_bufs_lock);
+ if (num_free_page_bufs > 0)
+ {
+ buf = free_page_bufs;
+ num_free_page_bufs --;
+ if (num_free_page_bufs > 0)
+ free_page_bufs += vm_page_size;
+#ifndef NDEBUG
+ else
+ free_page_bufs = 0;
+#endif /* ! NDEBUG */
+ }
+ else
+ {
+ assert (free_page_bufs == 0);
+ buf = mmap (0, vm_page_size * FREE_PAGE_BUFS,
+ PROT_READ|PROT_WRITE, MAP_ANON, 0, 0);
+ if (buf == MAP_FAILED)
+ buf = 0;
+ else
+ {
+ free_page_bufs = buf + vm_page_size;
+ num_free_page_bufs = FREE_PAGE_BUFS - 1;
+ }
+ }
+
+ mutex_unlock (&free_page_bufs_lock);
+ return buf;
+}
+
+/* Frees a block returned by get_page_buf. */
+static inline void
+free_page_buf (void *buf)
+{
+ munmap (buf, vm_page_size);
+}
+
/* Find the location on disk of page OFFSET in NODE. Return the disk block
- in BLOCK (if unallocated, then return 0). If *LOCK is 0, then it a reader
- lock is aquired on NODE's ALLOC_LOCK before doing anything, and left
- locked after return -- even if an error is returned. 0 on success or an
- error code otherwise is returned. */
+ in BLOCK (if unallocated, then return 0). If *LOCK is 0, then a reader
+ lock is acquired on NODE's ALLOC_LOCK before doing anything, and left
+ locked after the return -- even if an error is returned. 0 is returned
+ on success otherwise an error code. */
static error_t
find_block (struct node *node, vm_offset_t offset,
block_t *block, struct rwlock **lock)
@@ -65,14 +148,12 @@ find_block (struct node *node, vm_offset_t offset,
return err;
}
-/* ---------------------------------------------------------------- */
-
/* Read one page for the pager backing NODE at offset PAGE, into BUF. This
may need to read several filesystem blocks to satisfy one page, and tries
to consolidate the i/o if possible. */
static error_t
file_pager_read_page (struct node *node, vm_offset_t page,
- vm_address_t *buf, int *writelock)
+ void **buf, int *writelock)
{
error_t err;
int offs = 0;
@@ -91,30 +172,47 @@ file_pager_read_page (struct node *node, vm_offset_t page,
if (num_pending_blocks > 0)
{
block_t dev_block = pending_blocks << log2_dev_blocks_per_fs_block;
- int length = num_pending_blocks << log2_block_size;
- vm_address_t new_buf;
+ size_t amount = num_pending_blocks << log2_block_size;
+ /* The buffer we try to read into; on the first read, we pass in a
+ size of zero, so that the read is guaranteed to allocate a new
+ buffer, otherwise, we try to read directly into the tail of the
+ buffer we've already got. */
+ void *new_buf = *buf + offs;
+ size_t new_len = offs == 0 ? 0 : vm_page_size - offs;
+
+ STAT_INC (file_pagein_reads);
- err = diskfs_device_read_sync (dev_block, &new_buf, length);
+ err = store_read (store, dev_block, amount, &new_buf, &new_len);
if (err)
return err;
+ else if (amount != new_len)
+ return EIO;
- if (offs == 0)
- /* First read, make the returned page be our buffer. */
- *buf = new_buf;
- else
+ if (new_buf != *buf + offs)
{
- /* We've already got some buffer, so copy into it. */
- bcopy ((char *)new_buf, (char *)*buf + offs, length);
- vm_deallocate (mach_task_self (), new_buf, length);
+ /* The read went into a different buffer than the one we
+ passed. */
+ if (offs == 0)
+ /* First read, make the returned page be our buffer. */
+ *buf = new_buf;
+ else
+ /* We've already got some buffer, so copy into it. */
+ {
+ bcopy (new_buf, *buf + offs, new_len);
+ free_page_buf (new_buf); /* Return NEW_BUF to our pool. */
+ STAT_INC (file_pagein_freed_bufs);
+ }
}
- offs += length;
+ offs += new_len;
num_pending_blocks = 0;
}
return 0;
}
+ STAT_INC (file_pageins);
+
*writelock = 0;
if (page >= node->allocsize)
@@ -151,11 +249,12 @@ file_pager_read_page (struct node *node, vm_offset_t page,
if (offs == 0)
/* No page allocated to read into yet. */
{
- err = vm_allocate (mach_task_self (), buf, vm_page_size, 1);
- if (err)
+ *buf = get_page_buf ();
+ if (! *buf)
break;
+ STAT_INC (file_pagein_alloced_bufs);
}
- bzero ((char *)*buf + offs, block_size);
+ bzero (*buf + offs, block_size);
offs += block_size;
}
else
@@ -177,16 +276,14 @@ file_pager_read_page (struct node *node, vm_offset_t page,
return err;
}
-/* ---------------------------------------------------------------- */
-
struct pending_blocks
{
/* The block number of the first of the blocks. */
block_t block;
/* How many blocks we have. */
- int num;
+ off_t num;
/* A (page-aligned) buffer pointing to the data we're dealing with. */
- vm_address_t buf;
+ void *buf;
/* And an offset into BUF. */
int offs;
};
@@ -199,22 +296,24 @@ pending_blocks_write (struct pending_blocks *pb)
{
error_t err;
block_t dev_block = pb->block << log2_dev_blocks_per_fs_block;
- int length = pb->num << log2_block_size;
+ size_t length = pb->num << log2_block_size, amount;
- ext2_debug ("writing block %lu[%d]", pb->block, pb->num);
+ ext2_debug ("writing block %u[%ld]", pb->block, pb->num);
if (pb->offs > 0)
/* Put what we're going to write into a page-aligned buffer. */
{
- vm_address_t page_buf = get_page_buf ();
- bcopy ((char *)pb->buf + pb->offs, (void *)page_buf, length);
- err = diskfs_device_write_sync (dev_block, page_buf, length);
+ void *page_buf = get_page_buf ();
+ bcopy (pb->buf + pb->offs, (void *)page_buf, length);
+ err = store_write (store, dev_block, page_buf, length, &amount);
free_page_buf (page_buf);
}
else
- err = diskfs_device_write_sync (dev_block, pb->buf, length);
+ err = store_write (store, dev_block, pb->buf, length, &amount);
if (err)
return err;
+ else if (amount != length)
+ return EIO;
pb->offs += length;
pb->num = 0;
@@ -224,7 +323,7 @@ pending_blocks_write (struct pending_blocks *pb)
}
static void
-pending_blocks_init (struct pending_blocks *pb, vm_address_t buf)
+pending_blocks_init (struct pending_blocks *pb, void *buf)
{
pb->buf = buf;
pb->block = 0;
@@ -258,32 +357,34 @@ pending_blocks_add (struct pending_blocks *pb, block_t block)
return 0;
}
-/* ---------------------------------------------------------------- */
-
/* Write one page for the pager backing NODE, at offset PAGE, into BUF. This
may need to write several filesystem blocks to satisfy one page, and tries
to consolidate the i/o if possible. */
static error_t
-file_pager_write_page (struct node *node, vm_offset_t offset, vm_address_t buf)
+file_pager_write_page (struct node *node, vm_offset_t offset, void *buf)
{
error_t err = 0;
struct pending_blocks pb;
- struct rwlock *lock = 0;
+ struct rwlock *lock = &node->dn->alloc_lock;
block_t block;
int left = vm_page_size;
pending_blocks_init (&pb, buf);
+ /* Holding NODE->dn->alloc_lock effectively locks NODE->allocsize,
+ at least for the cases we care about: pager_unlock_page,
+ diskfs_grow and diskfs_truncate. */
+ rwlock_reader_lock (&node->dn->alloc_lock);
+
if (offset >= node->allocsize)
- {
- err = EIO;
- left = 0;
- }
+ left = 0;
else if (offset + left > node->allocsize)
left = node->allocsize - offset;
ext2_debug ("writing inode %d page %d[%d]", node->cache_id, offset, left);
+ STAT_INC (file_pageouts);
+
while (left > 0)
{
err = find_block (node, offset, &block, &lock);
@@ -298,26 +399,24 @@ file_pager_write_page (struct node *node, vm_offset_t offset, vm_address_t buf)
if (!err)
pending_blocks_write (&pb);
- if (lock)
- rwlock_reader_unlock (lock);
+ rwlock_reader_unlock (&node->dn->alloc_lock);
return err;
}
-/* ---------------------------------------------------------------- */
-
static error_t
-disk_pager_read_page (vm_offset_t page, vm_address_t *buf, int *writelock)
+disk_pager_read_page (vm_offset_t page, void **buf, int *writelock)
{
error_t err;
- int length = vm_page_size;
- vm_size_t dev_end = diskfs_device_size << diskfs_log2_device_block_size;
+ size_t length = vm_page_size, read = 0;
+ vm_size_t dev_end = store->size;
if (page + vm_page_size > dev_end)
length = dev_end - page;
- err = diskfs_device_read_sync (page >> diskfs_log2_device_block_size,
- (void *)buf, length);
+ err = store_read (store, page >> store->log2_block_size, length, buf, &read);
+ if (read != length)
+ return EIO;
if (!err && length != vm_page_size)
bzero ((void *)(*buf + length), vm_page_size - length);
@@ -327,17 +426,19 @@ disk_pager_read_page (vm_offset_t page, vm_address_t *buf, int *writelock)
}
static error_t
-disk_pager_write_page (vm_offset_t page, vm_address_t buf)
+disk_pager_write_page (vm_offset_t page, void *buf)
{
error_t err = 0;
- int length = vm_page_size;
- vm_size_t dev_end = diskfs_device_size << diskfs_log2_device_block_size;
+ size_t length = vm_page_size, amount;
+ vm_size_t dev_end = store->size;
if (page + vm_page_size > dev_end)
length = dev_end - page;
ext2_debug ("writing disk page %d[%d]", page, length);
+ STAT_INC (disk_pageouts);
+
if (modified_global_blocks)
/* Be picky about which blocks in a page that we write. */
{
@@ -374,26 +475,27 @@ disk_pager_write_page (vm_offset_t page, vm_address_t buf)
err = pending_blocks_write (&pb);
}
else
- err =
- diskfs_device_write_sync (page >> diskfs_log2_device_block_size,
- buf, length);
+ {
+ err = store_write (store, page >> store->log2_block_size,
+ buf, length, &amount);
+ if (!err && length != amount)
+ err = EIO;
+ }
return err;
}
-/* ---------------------------------------------------------------- */
-
/* Satisfy a pager read request for either the disk pager or file pager
PAGER, to the page at offset PAGE into BUF. WRITELOCK should be set if
the pager should make the page writeable. */
error_t
pager_read_page (struct user_pager_info *pager, vm_offset_t page,
- vm_address_t *buf, int *writelock)
+ vm_address_t *buf, int *writelock)
{
if (pager->type == DISK)
- return disk_pager_read_page (page, buf, writelock);
+ return disk_pager_read_page (page, (void **)buf, writelock);
else
- return file_pager_read_page (pager->node, page, buf, writelock);
+ return file_pager_read_page (pager->node, page, (void **)buf, writelock);
}
/* Satisfy a pager write request for either the disk pager or file pager
@@ -403,13 +505,11 @@ pager_write_page (struct user_pager_info *pager, vm_offset_t page,
vm_address_t buf)
{
if (pager->type == DISK)
- return disk_pager_write_page (page, buf);
+ return disk_pager_write_page (page, (void *)buf);
else
- return file_pager_write_page (pager->node, page, buf);
+ return file_pager_write_page (pager->node, page, (void *)buf);
}
-/* ---------------------------------------------------------------- */
-
/* Make page PAGE writable, at least up to ALLOCSIZE. This function and
diskfs_grow are the only places that blocks are actually added to the
file. */
@@ -465,20 +565,20 @@ pager_unlock_page (struct user_pager_info *pager, vm_offset_t page)
page, vm_page_size, node->cache_id);
#endif
+ STAT_INC (file_page_unlocks);
+
rwlock_writer_unlock (&dn->alloc_lock);
if (err == ENOSPC)
ext2_warning ("This filesystem is out of space, and will now crash. Bye!");
else if (err)
- ext2_warning ("inode=%d, page=0x%x: %s",
+ ext2_warning ("inode=%Ld, page=0x%zx: %s",
node->cache_id, page, strerror (err));
return err;
}
}
-/* ---------------------------------------------------------------- */
-
/* Grow the disk allocated to locked node NODE to be at least SIZE bytes, and
set NODE->allocsize to the actual allocated size. (If the allocated size
is already SIZE bytes, do nothing.) CRED identifies the user responsible
@@ -530,8 +630,8 @@ diskfs_grow (struct node *node, off_t size, struct protid *cred)
? new_end_block
: old_page_end_block);
- ext2_debug ("extending writable page %u by %ld blocks"
- "; first new block = %lu",
+ ext2_debug ("extending writable page %u by %d blocks"
+ "; first new block = %u",
trunc_page (old_size),
writable_end - end_block,
end_block);
@@ -544,9 +644,9 @@ diskfs_grow (struct node *node, off_t size, struct protid *cred)
}
diskfs_end_catch_exception ();
- if (err)
+ if (! err)
/* Reflect how much we allocated successfully. */
- new_size = (end_block - 1) << log2_block_size;
+ new_size = end_block << log2_block_size;
else
/* See if it's still valid to say this. */
dn->last_page_partially_writable =
@@ -554,11 +654,13 @@ diskfs_grow (struct node *node, off_t size, struct protid *cred)
}
}
+ STAT_INC (file_grows);
+
ext2_debug ("new size: %ld%s.", new_size,
dn->last_page_partially_writable
? " (last page writable)": "");
if (err)
- ext2_warning ("inode=%d, target=%ld: %s",
+ ext2_warning ("inode=%Ld, target=%Ld: %s",
node->cache_id, new_size, strerror (err));
node->allocsize = new_size;
@@ -571,8 +673,6 @@ diskfs_grow (struct node *node, off_t size, struct protid *cred)
return 0;
}
-/* ---------------------------------------------------------------- */
-
/* This syncs a single file (NODE) to disk. Wait for all I/O to complete
if WAIT is set. NODE->lock must be held. */
void
@@ -618,8 +718,6 @@ flush_node_pager (struct node *node)
}
-/* ---------------------------------------------------------------- */
-
/* Return in *OFFSET and *SIZE the minimum valid address the pager will
accept and the size of the object. */
inline error_t
@@ -631,7 +729,7 @@ pager_report_extent (struct user_pager_info *pager,
*offset = 0;
if (pager->type == DISK)
- *size = diskfs_device_size << diskfs_log2_device_block_size;
+ *size = store->size;
else
*size = pager->node->allocsize;
@@ -667,27 +765,17 @@ pager_dropweak (struct user_pager_info *p __attribute__ ((unused)))
{
}
-/* ---------------------------------------------------------------- */
-
-/* A top-level function for the paging thread that just services paging
- requests. */
-static void
-service_paging_requests (any_t foo __attribute__ ((unused)))
-{
- for (;;)
- ports_manage_port_operations_multithread (pager_bucket, pager_demuxer,
- 1000 * 60 * 2, 1000 * 60 * 10,
- 1, MACH_PORT_NULL);
-}
-
/* Create the DISK pager. */
void
create_disk_pager (void)
{
struct user_pager_info *upi = malloc (sizeof (struct user_pager_info));
-
+ if (!upi)
+ ext2_panic ("can't create disk pager: %s", strerror (errno));
upi->type = DISK;
- disk_pager_setup (upi, MAY_CACHE);
+ pager_bucket = ports_create_bucket ();
+ diskfs_start_disk_pager (upi, pager_bucket, MAY_CACHE, store->size,
+ &disk_image);
}
/* Call this to create a FILE_DATA pager and return a send right.
@@ -723,11 +811,19 @@ diskfs_get_filemap (struct node *node, vm_prot_t prot)
malloc (sizeof (struct user_pager_info));
upi->type = FILE_DATA;
upi->node = node;
- upi->max_prot = 0;
+ upi->max_prot = prot;
diskfs_nref_light (node);
node->dn->pager =
pager_create (upi, pager_bucket, MAY_CACHE,
MEMORY_OBJECT_COPY_DELAY);
+ if (node->dn->pager == 0)
+ {
+ diskfs_nrele_light (node);
+ free (upi);
+ spin_unlock (&node_to_page_lock);
+ return MACH_PORT_NULL;
+ }
+
right = pager_get_port (node->dn->pager);
ports_port_deref (node->dn->pager);
}
@@ -755,7 +851,10 @@ drop_pager_softrefs (struct node *node)
spin_unlock (&node_to_page_lock);
if (MAY_CACHE && pager)
- pager_change_attributes (pager, 0, MEMORY_OBJECT_COPY_DELAY, 0);
+ {
+ pager_sync (pager, 0);
+ pager_change_attributes (pager, 0, MEMORY_OBJECT_COPY_DELAY, 0);
+ }
if (pager)
ports_port_deref (pager);
}
@@ -798,7 +897,7 @@ diskfs_shutdown_pager ()
error_t shutdown_one (void *v_p)
{
struct pager *p = v_p;
- if (p != disk_pager)
+ if (p != diskfs_disk_pager)
pager_shutdown (p);
return 0;
}
@@ -821,7 +920,7 @@ diskfs_sync_everything (int wait)
error_t sync_one (void *v_p)
{
struct pager *p = v_p;
- if (p != disk_pager)
+ if (p != diskfs_disk_pager)
pager_sync (p, wait);
return 0;
}
@@ -833,8 +932,6 @@ diskfs_sync_everything (int wait)
sync_global (wait);
}
-/* ---------------------------------------------------------------- */
-
static void
disable_caching ()
{