aboutsummaryrefslogtreecommitdiff
path: root/storeio
diff options
context:
space:
mode:
Diffstat (limited to 'storeio')
-rw-r--r--storeio/Makefile29
-rw-r--r--storeio/dev.c473
-rw-r--r--storeio/dev.h127
-rw-r--r--storeio/io.c365
-rw-r--r--storeio/open.c127
-rw-r--r--storeio/open.h68
-rw-r--r--storeio/pager.c265
-rw-r--r--storeio/storeio.c425
8 files changed, 1879 insertions, 0 deletions
diff --git a/storeio/Makefile b/storeio/Makefile
new file mode 100644
index 00000000..f027ebd4
--- /dev/null
+++ b/storeio/Makefile
@@ -0,0 +1,29 @@
+# Makefile for storeio
+#
+# Copyright (C) 1995, 1996, 1997, 2000 Free Software Foundation, Inc.
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License as
+# published by the Free Software Foundation; either version 2, or (at
+# your option) any later version.
+#
+# This program is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+dir := storeio
+makemode := server
+
+target = storeio
+SRCS = dev.c storeio.c open.c pager.c io.c
+LCLHDRS = dev.h open.h
+
+OBJS = $(SRCS:.c=.o)
+HURDLIBS = trivfs pager fshelp iohelp store threads ports ihash shouldbeinlibc
+
+include ../Makeconf
diff --git a/storeio/dev.c b/storeio/dev.c
new file mode 100644
index 00000000..31b084f9
--- /dev/null
+++ b/storeio/dev.c
@@ -0,0 +1,473 @@
+/* store `device' I/O
+
+ Copyright (C) 1995, 1996, 1998, 1999, 2000, 2001, 2002, 2008
+ Free Software Foundation, Inc.
+ Written by Miles Bader <miles@gnu.org>
+
+ This program is free software; you can redistribute it and/or
+ modify it under the terms of the GNU General Public License as
+ published by the Free Software Foundation; either version 2, or (at
+ your option) any later version.
+
+ This program is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
+
+#include <hurd.h>
+#include <assert.h>
+#include <string.h>
+#include <hurd/pager.h>
+#include <hurd/store.h>
+#include <sys/mman.h>
+
+#include "dev.h"
+
+/* These functions deal with the buffer used for doing non-block-aligned I/O. */
+
+static inline int
+dev_buf_is_active (struct dev *dev)
+{
+ return dev->buf_offs >= 0;
+}
+
+/* Invalidate DEV's buffer, writing it to disk if necessary. */
+static error_t
+dev_buf_discard (struct dev *dev)
+{
+ if (dev_buf_is_active (dev))
+ {
+ if (dev->buf_dirty)
+ {
+ size_t amount;
+ struct store *store = dev->store;
+ error_t err =
+ store_write (store, dev->buf_offs >> store->log2_block_size,
+ dev->buf, store->block_size, &amount);
+ if (!err && amount < store->block_size)
+ err = EIO;
+ if (err)
+ return err;
+ dev->buf_dirty = 0;
+ }
+ dev->buf_offs = -1;
+ }
+ return 0;
+}
+
+/* Make DEV's buffer active, reading the block from DEV's store which
+ contains OFFS. */
+static error_t
+dev_buf_fill (struct dev *dev, off_t offs)
+{
+ error_t err;
+ unsigned block_mask = dev->block_mask;
+ void *buf = dev->buf;
+ struct store *store = dev->store;
+ size_t buf_len = store->block_size;
+
+ if (dev_buf_is_active (dev))
+ {
+ if ((dev->buf_offs & ~block_mask) == (offs & ~block_mask))
+ return 0; /* Correct block alredy in buffer. */
+ else
+ {
+ err = dev_buf_discard (dev);
+ if (err)
+ return err;
+ }
+ }
+
+ err = store_read (store, offs >> store->log2_block_size, store->block_size,
+ &buf, &buf_len);
+ if (err)
+ return err;
+
+ if (buf != dev->buf)
+ {
+ munmap (dev->buf, store->block_size);
+ dev->buf = buf;
+ }
+
+ dev->buf_offs = offs & ~block_mask;
+
+ return 0;
+}
+
+/* Do an in-buffer partial-block I/O operation. */
+static error_t
+dev_buf_rw (struct dev *dev, size_t buf_offs, size_t *io_offs, size_t *len,
+ error_t (*const buf_rw) (size_t buf_offs,
+ size_t io_offs, size_t len))
+{
+ size_t block_size = dev->store->block_size;
+
+ assert (dev_buf_is_active (dev));
+
+ if (buf_offs + *len >= block_size)
+ /* Only part of BUF lies within the buffer (or everything up
+ to the end of the block, in which case we want to flush
+ the buffer anyway). */
+ {
+ size_t buf_len = block_size - buf_offs;
+ error_t err = (*buf_rw) (buf_offs, *io_offs, buf_len);
+ if (err)
+ return err;
+ *io_offs += buf_len;
+ *len -= buf_len;
+ return dev_buf_discard (dev);
+ }
+ else
+ /* All I/O is within the block. */
+ {
+ error_t err = (*buf_rw) (buf_offs, *io_offs, *len);
+ if (err)
+ return err;
+ *io_offs += *len;
+ *len = 0;
+ return 0;
+ }
+}
+
+/* Called with DEV->lock held. Try to open the store underlying DEV. */
+error_t
+dev_open (struct dev *dev)
+{
+ error_t err;
+ const int flags = ((dev->readonly ? STORE_READONLY : 0)
+ | (dev->no_fileio ? STORE_NO_FILEIO : 0));
+
+ assert (dev->store == 0);
+
+ if (dev->store_name == 0)
+ {
+ /* This means we had no store arguments.
+ We are to operate on our underlying node. */
+ err = store_create (storeio_fsys->underlying, flags, 0, &dev->store);
+ }
+ else
+ /* Open based on the previously parsed store arguments. */
+ err = store_parsed_open (dev->store_name, flags, &dev->store);
+ if (err)
+ return err;
+
+ /* Inactivate the store, it will be activated at first access.
+ We ignore possible EINVAL here . XXX Pass STORE_INACTIVE to
+ store_create/store_parsed_open instead when libstore is fixed
+ to support this. */
+ store_set_flags (dev->store, STORE_INACTIVE);
+
+ dev->buf = mmap (0, dev->store->block_size, PROT_READ|PROT_WRITE,
+ MAP_ANON, 0, 0);
+ if (dev->buf == MAP_FAILED)
+ {
+ store_free (dev->store);
+ dev->store = 0;
+ return ENOMEM;
+ }
+
+ if (!dev->inhibit_cache)
+ {
+ dev->buf_offs = -1;
+ rwlock_init (&dev->io_lock);
+ dev->block_mask = (1 << dev->store->log2_block_size) - 1;
+ dev->pager = 0;
+ mutex_init (&dev->pager_lock);
+ }
+
+ return 0;
+}
+
+/* Shut down the store underlying DEV and free any resources it consumes.
+ DEV itself remains intact so that dev_open can be called again.
+ This should be called with DEV->lock held. */
+void
+dev_close (struct dev *dev)
+{
+ assert (dev->store);
+
+ if (!dev->inhibit_cache)
+ {
+ if (dev->pager != NULL)
+ pager_shutdown (dev->pager);
+
+ dev_buf_discard (dev);
+
+ munmap (dev->buf, dev->store->block_size);
+ }
+
+ store_free (dev->store);
+ dev->store = 0;
+}
+
+/* Try and write out any pending writes to DEV. If WAIT is true, will wait
+ for any paging activity to cease. */
+error_t
+dev_sync(struct dev *dev, int wait)
+{
+ error_t err;
+
+ if (dev->inhibit_cache)
+ return 0;
+
+ /* Sync any paged backing store. */
+ if (dev->pager != NULL)
+ pager_sync (dev->pager, wait);
+
+ rwlock_writer_lock (&dev->io_lock);
+ err = dev_buf_discard (dev);
+ rwlock_writer_unlock (&dev->io_lock);
+
+ return err;
+}
+
+/* Takes care of buffering I/O to/from DEV for a transfer at position OFFS,
+ length LEN; the amount of I/O successfully done is returned in AMOUNT.
+ BUF_RW is called to do I/O that's entirely inside DEV's internal buffer,
+ and RAW_RW to do I/O directly to DEV's store. */
+static inline error_t
+buffered_rw (struct dev *dev, off_t offs, size_t len, size_t *amount,
+ error_t (* const buf_rw) (size_t buf_offs,
+ size_t io_offs, size_t len),
+ error_t (* const raw_rw) (off_t offs,
+ size_t io_offs, size_t len,
+ size_t *amount))
+{
+ error_t err = 0;
+ unsigned block_mask = dev->block_mask;
+ unsigned block_size = dev->store->block_size;
+ size_t io_offs = 0; /* Offset within this I/O operation. */
+ unsigned block_offs = offs & block_mask; /* Offset within a block. */
+
+ rwlock_writer_lock (&dev->io_lock);
+
+ if (block_offs != 0)
+ /* The start of the I/O isn't block aligned. */
+ {
+ err = dev_buf_fill (dev, offs);
+ if (! err)
+ err = dev_buf_rw (dev, block_offs, &io_offs, &len, buf_rw);
+ }
+
+ if (!err && len > 0)
+ /* Now the I/O should be block aligned. */
+ {
+ if (len >= block_size)
+ {
+ size_t amount;
+ err = dev_buf_discard (dev);
+ if (! err)
+ err =
+ (*raw_rw) (offs + io_offs, io_offs, len & ~block_mask, &amount);
+ if (! err)
+ {
+ io_offs += amount;
+ len -= amount;
+ }
+ }
+ if (len > 0 && len < block_size)
+ /* All full blocks were written successfully, so write
+ the tail end into the buffer. */
+ {
+ err = dev_buf_fill (dev, offs + io_offs);
+ if (! err)
+ err = dev_buf_rw (dev, 0, &io_offs, &len, buf_rw);
+ }
+ }
+
+ if (! err)
+ *amount = io_offs;
+
+ rwlock_writer_unlock (&dev->io_lock);
+
+ return err;
+}
+
+/* Takes care of buffering I/O to/from DEV for a transfer at position OFFS,
+ length LEN, and direction DIR. BUF_RW is called to do I/O to/from data
+ buffered in DEV, and RAW_RW to do I/O directly to DEV's store. */
+static inline error_t
+dev_rw (struct dev *dev, off_t offs, size_t len, size_t *amount,
+ error_t (* const buf_rw) (size_t buf_offs,
+ size_t io_offs, size_t len),
+ error_t (* const raw_rw) (off_t offs,
+ size_t io_offs, size_t len,
+ size_t *amount))
+{
+ error_t err;
+ unsigned block_mask = dev->block_mask;
+
+ if (offs < 0 || offs > dev->store->size)
+ return EINVAL;
+ else if (offs + len > dev->store->size)
+ len = dev->store->size - offs;
+
+ rwlock_reader_lock (&dev->io_lock);
+ if (dev_buf_is_active (dev)
+ || (offs & block_mask) != 0 || (len & block_mask) != 0)
+ /* Some non-aligned I/O has been done, or is needed, so we need to deal
+ with DEV's buffer, which means getting an exclusive lock. */
+ {
+ /* Acquire a writer lock instead of a reader lock. Note that other
+ writers may have acquired the lock by the time we get it. */
+ rwlock_reader_unlock (&dev->io_lock);
+ err = buffered_rw (dev, offs, len, amount, buf_rw, raw_rw);
+ }
+ else
+ /* Only block-aligned I/O is being done, so things are easy. */
+ {
+ err = (*raw_rw) (offs, 0, len, amount);
+ rwlock_reader_unlock (&dev->io_lock);
+ }
+
+ return err;
+}
+
+/* Write LEN bytes from BUF to DEV, returning the amount actually written in
+ AMOUNT. If successful, 0 is returned, otherwise an error code is
+ returned. */
+error_t
+dev_write (struct dev *dev, off_t offs, void *buf, size_t len,
+ size_t *amount)
+{
+ error_t buf_write (size_t buf_offs, size_t io_offs, size_t len)
+ {
+ bcopy (buf + io_offs, dev->buf + buf_offs, len);
+ dev->buf_dirty = 1;
+ return 0;
+ }
+ error_t raw_write (off_t offs, size_t io_offs, size_t len, size_t *amount)
+ {
+ struct store *store = dev->store;
+ return
+ store_write (store, offs >> store->log2_block_size,
+ buf + io_offs, len, amount);
+ }
+
+ if (dev->inhibit_cache)
+ {
+ /* Under --no-cache, we permit only whole-block writes.
+ Note that in this case we handle non-power-of-two block sizes. */
+
+ struct store *store = dev->store;
+
+ if (store->block_size == 0)
+ /* We don't know the block size, so let the device enforce it. */
+ return store_write (dev->store, offs, buf, len, amount);
+
+ if ((offs & (store->block_size - 1)) != 0
+ || (len & (store->block_size - 1)) != 0)
+ /* Not whole blocks. No can do. */
+ return EINVAL; /* EIO? */
+
+ /* Do a direct write to the store. */
+ return store_write (dev->store, offs << store->log2_block_size,
+ buf, len, amount);
+ }
+
+ return dev_rw (dev, offs, len, amount, buf_write, raw_write);
+}
+
+/* Read up to WHOLE_AMOUNT bytes from DEV, returned in BUF and LEN in the
+ with the usual mach memory result semantics. If successful, 0 is
+ returned, otherwise an error code is returned. */
+error_t
+dev_read (struct dev *dev, off_t offs, size_t whole_amount,
+ void **buf, size_t *len)
+{
+ error_t err;
+ int allocated_buf = 0;
+ error_t ensure_buf ()
+ {
+ if (*len < whole_amount)
+ {
+ void *new = mmap (0, whole_amount, PROT_READ|PROT_WRITE,
+ MAP_ANON, 0, 0);
+ if (new == (void *) -1)
+ return errno;
+ *buf = new;
+ allocated_buf = 1;
+ }
+ return 0;
+ }
+ error_t buf_read (size_t buf_offs, size_t io_offs, size_t len)
+ {
+ error_t err = ensure_buf ();
+ if (! err)
+ bcopy (dev->buf + buf_offs, *buf + io_offs, len);
+ return err;
+ }
+ error_t raw_read (off_t offs, size_t io_offs, size_t len, size_t *amount)
+ {
+ struct store *store = dev->store;
+ off_t addr = offs >> store->log2_block_size;
+ if (len == whole_amount)
+ /* Just return whatever the device does. */
+ return store_read (store, addr, len, buf, amount);
+ else
+ /* This read is returning less than the whole request, so we allocate
+ a buffer big enough to hold everything, in case we have to
+ coalesce multiple reads into a single return buffer. */
+ {
+ error_t err = ensure_buf ();
+ if (! err)
+ {
+ void *_req_buf = *buf + io_offs, *req_buf = _req_buf;
+ size_t req_len = len;
+ err = store_read (store, addr, len, &req_buf, &req_len);
+ if (! err)
+ {
+ if (req_buf != _req_buf)
+ /* Copy from wherever the read put it. */
+ {
+ bcopy (req_buf, _req_buf, req_len);
+ munmap (req_buf, req_len);
+ }
+ *amount = req_len;
+ }
+ }
+ return err;
+ }
+ }
+
+ if (dev->store->size > 0 && offs == dev->store->size)
+ {
+ /* Reading end of file. */
+ *len = 0;
+ return 0;
+ }
+
+ if (dev->inhibit_cache)
+ {
+ /* Under --no-cache, we permit only whole-block reads.
+ Note that in this case we handle non-power-of-two block sizes.
+ We could, that is, but libstore won't have it (see libstore/make.c).
+ If the device does not report a block size, we let any attempt
+ through on the assumption the device will enforce its own limits. */
+
+ struct store *store = dev->store;
+
+ if (store->block_size == 0)
+ /* We don't know the block size, so let the device enforce it. */
+ return store_read (dev->store, offs, whole_amount, buf, len);
+
+ if ((offs & (store->block_size - 1)) != 0
+ || (whole_amount & (store->block_size - 1)) != 0)
+ /* Not whole blocks. No can do. */
+ return EINVAL;
+
+ /* Do a direct read from the store. */
+ return store_read (dev->store, offs << store->log2_block_size,
+ whole_amount, buf, len);
+ }
+
+ err = dev_rw (dev, offs, whole_amount, len, buf_read, raw_read);
+ if (err && allocated_buf)
+ munmap (*buf, whole_amount);
+
+ return err;
+}
diff --git a/storeio/dev.h b/storeio/dev.h
new file mode 100644
index 00000000..23924ca5
--- /dev/null
+++ b/storeio/dev.h
@@ -0,0 +1,127 @@
+/* store `device' I/O
+
+ Copyright (C) 1995,96,97,99,2000,2001 Free Software Foundation, Inc.
+ Written by Miles Bader <miles@gnu.org>
+
+ This program is free software; you can redistribute it and/or
+ modify it under the terms of the GNU General Public License as
+ published by the Free Software Foundation; either version 2, or (at
+ your option) any later version.
+
+ This program is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
+
+#ifndef __DEV_H__
+#define __DEV_H__
+
+#include <mach.h>
+#include <device/device.h>
+#include <rwlock.h>
+#include <hurd/store.h>
+#include <hurd/trivfs.h>
+
+extern struct trivfs_control *storeio_fsys;
+
+/* Information about backend store, which we presumptively call a "device". */
+struct dev
+{
+ /* The argument specification that we use to open the store. */
+ struct store_parsed *store_name;
+
+ /* The device to which we're doing io. This is null when the
+ device is closed, in which case we will open from `store_name'. */
+ struct store *store;
+
+ int readonly; /* Nonzero if user gave --readonly flag. */
+ int enforced; /* Nonzero if user gave --enforced flag. */
+ int no_fileio; /* Nonzero if user gave --no-fileio flag. */
+ dev_t rdev; /* A unixy device number for st_rdev. */
+
+ /* The current owner of the open device. For terminals, this affects
+ controlling terminal behavior (see term_become_ctty). For all objects
+ this affects old-style async IO. Negative values represent pgrps. This
+ has nothing to do with the owner of a file (as returned by io_stat, and
+ as used for various permission checks by filesystems). An owner of 0
+ indicates that there is no owner. */
+ pid_t owner;
+
+ /* The number of active opens. */
+ int nperopens;
+
+ /* This lock protects `store', `owner' and `nperopens'. The other
+ members never change after creation, except for those locked by
+ io_lock (below). */
+ struct mutex lock;
+
+ /* Nonzero iff the --no-cache flag was given.
+ If this is set, the remaining members are not used at all
+ and don't need to be initialized or cleaned up. */
+ int inhibit_cache;
+
+ /* A bitmask corresponding to the part of an offset that lies within a
+ device block. */
+ unsigned block_mask;
+
+ /* Lock to arbitrate I/O through this device. Block I/O can occur in
+ parallel, and requires only a reader-lock.
+ Non-block I/O is always serialized, and requires a writer-lock. */
+ struct rwlock io_lock;
+
+ /* Non-block I/O is buffered through BUF. BUF_OFFS is the device offset
+ corresponding to the start of BUF (which holds one block); if it is -1,
+ then BUF is inactive. */
+ void *buf;
+ off_t buf_offs;
+ int buf_dirty;
+
+ struct pager *pager;
+ struct mutex pager_lock;
+};
+
+static inline int
+dev_is_readonly (const struct dev *dev)
+{
+ return dev->readonly || (dev->store && (dev->store->flags & STORE_READONLY));
+}
+
+/* Called with DEV->lock held. Try to open the store underlying DEV. */
+error_t dev_open (struct dev *dev);
+
+/* Shut down the store underlying DEV and free any resources it consumes.
+ DEV itself remains intact so that dev_open can be called again.
+ This should be called with DEV->lock held. */
+void dev_close (struct dev *dev);
+
+/* Returns in MEMOBJ the port for a memory object backed by the storage on
+ DEV. Returns 0 or the error code if an error occurred. */
+error_t dev_get_memory_object(struct dev *dev, vm_prot_t prot,
+ memory_object_t *memobj);
+
+/* Try to stop all paging activity on DEV, returning true if we were
+ successful. If NOSYNC is true, then we won't write back any (kernel)
+ cached pages to the device. */
+int dev_stop_paging (struct dev *dev, int nosync);
+
+/* Try and write out any pending writes to DEV. If WAIT is true, will wait
+ for any paging activity to cease. */
+error_t dev_sync (struct dev *dev, int wait);
+
+/* Write LEN bytes from BUF to DEV, returning the amount actually written in
+ AMOUNT. If successful, 0 is returned, otherwise an error code is
+ returned. */
+error_t dev_write (struct dev *dev, off_t offs, void *buf, size_t len,
+ size_t *amount);
+
+/* Read up to AMOUNT bytes from DEV, returned in BUF and LEN in the with the
+ usual mach memory result semantics. If successful, 0 is returned,
+ otherwise an error code is returned. */
+error_t dev_read (struct dev *dev, off_t offs, size_t amount,
+ void **buf, size_t *len);
+
+#endif /* !__DEV_H__ */
diff --git a/storeio/io.c b/storeio/io.c
new file mode 100644
index 00000000..508df77a
--- /dev/null
+++ b/storeio/io.c
@@ -0,0 +1,365 @@
+/* The hurd io interface to storeio
+
+ Copyright (C) 1995,96,97,99,2000,02 Free Software Foundation, Inc.
+ Written by Miles Bader <miles@gnu.org>
+
+ This program is free software; you can redistribute it and/or
+ modify it under the terms of the GNU General Public License as
+ published by the Free Software Foundation; either version 2, or (at
+ your option) any later version.
+
+ This program is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
+
+#include <hurd/trivfs.h>
+#include <stdio.h>
+#include <fcntl.h>
+
+#include "open.h"
+#include "dev.h"
+
+/* Return objects mapping the data underlying this memory object. If
+ the object can be read then memobjrd will be provided; if the
+ object can be written then memobjwr will be provided. For objects
+ where read data and write data are the same, these objects will be
+ equal, otherwise they will be disjoint. Servers are permitted to
+ implement io_map but not io_map_cntl. Some objects do not provide
+ mapping; they will set none of the ports and return an error. Such
+ objects can still be accessed by io_read and io_write. */
+error_t
+trivfs_S_io_map (struct trivfs_protid *cred,
+ mach_port_t reply, mach_msg_type_name_t reply_type,
+ memory_object_t *rd_obj, mach_msg_type_name_t *rd_type,
+ memory_object_t *wr_obj, mach_msg_type_name_t *wr_type)
+{
+ if (! cred)
+ return EOPNOTSUPP;
+ else if (! (cred->po->openmodes & (O_READ|O_WRITE)))
+ return EBADF;
+ else
+ {
+ mach_port_t memobj;
+ int flags = cred->po->openmodes;
+ vm_prot_t prot =
+ ((flags & O_READ) ? VM_PROT_READ : 0)
+ | ((flags & O_WRITE) ? VM_PROT_WRITE : 0);
+ struct open *open = (struct open *)cred->po->hook;
+ error_t err = dev_get_memory_object (open->dev, prot, &memobj);
+
+ if (!err)
+ {
+ if (flags & O_READ)
+ *rd_obj = memobj;
+ else
+ *rd_obj = MACH_PORT_NULL;
+ if (flags & O_WRITE)
+ *wr_obj = memobj;
+ else
+ *wr_obj = MACH_PORT_NULL;
+
+ if ((flags & (O_READ|O_WRITE)) == (O_READ|O_WRITE)
+ && memobj != MACH_PORT_NULL)
+ mach_port_mod_refs (mach_task_self (), memobj,
+ MACH_PORT_RIGHT_SEND, 1);
+ }
+
+ *rd_type = *wr_type = MACH_MSG_TYPE_MOVE_SEND;
+
+ return err;
+ }
+}
+
+/* Read data from an IO object. If offset if -1, read from the object
+ maintained file pointer. If the object is not seekable, offset is
+ ignored. The amount desired to be read is in AMOUNT. */
+error_t
+trivfs_S_io_read (struct trivfs_protid *cred,
+ mach_port_t reply, mach_msg_type_name_t reply_type,
+ char **data, mach_msg_type_number_t *data_len,
+ loff_t offs, mach_msg_type_number_t amount)
+{
+ if (! cred)
+ return EOPNOTSUPP;
+ else if (! (cred->po->openmodes & O_READ))
+ return EBADF;
+ else
+ return open_read ((struct open *)cred->po->hook,
+ offs, amount, (void **)data, data_len);
+}
+
+/* Tell how much data can be read from the object without blocking for
+ a "long time" (this should be the same meaning of "long time" used
+ by the nonblocking flag. */
+error_t
+trivfs_S_io_readable (struct trivfs_protid *cred,
+ mach_port_t reply, mach_msg_type_name_t reply_type,
+ mach_msg_type_number_t *amount)
+{
+ if (! cred)
+ return EOPNOTSUPP;
+ else if (! (cred->po->openmodes & O_READ))
+ return EBADF;
+ else
+ {
+ struct open *open = (struct open *)cred->po->hook;
+ *amount = open->dev->store->size - open->offs;
+ return 0;
+ }
+}
+
+/* Write data to an IO object. If offset is -1, write at the object
+ maintained file pointer. If the object is not seekable, offset is
+ ignored. The amount successfully written is returned in amount. A
+ given user should not have more than one outstanding io_write on an
+ object at a time; servers implement congestion control by delaying
+ responses to io_write. Servers may drop data (returning ENOBUFS)
+ if they recevie more than one write when not prepared for it. */
+error_t
+trivfs_S_io_write (struct trivfs_protid *cred,
+ mach_port_t reply, mach_msg_type_name_t reply_type,
+ char *data, mach_msg_type_number_t data_len,
+ loff_t offs, mach_msg_type_number_t *amount)
+{
+ if (! cred)
+ return EOPNOTSUPP;
+ else if (! (cred->po->openmodes & O_WRITE))
+ return EBADF;
+ else
+ return open_write ((struct open *)cred->po->hook,
+ offs, (void *)data, data_len, amount);
+}
+
+/* Change current read/write offset */
+error_t
+trivfs_S_io_seek (struct trivfs_protid *cred,
+ mach_port_t reply, mach_msg_type_name_t reply_type,
+ off_t offs, int whence, off_t *new_offs)
+{
+ if (! cred)
+ return EOPNOTSUPP;
+ else
+ return open_seek ((struct open *)cred->po->hook, offs, whence, new_offs);
+}
+
+/* SELECT_TYPE is the bitwise OR of SELECT_READ, SELECT_WRITE, and SELECT_URG.
+ Block until one of the indicated types of i/o can be done "quickly", and
+ return the types that are then available. */
+error_t
+trivfs_S_io_select (struct trivfs_protid *cred,
+ mach_port_t reply, mach_msg_type_name_t reply_type,
+ int *type)
+{
+ if (! cred)
+ return EOPNOTSUPP;
+ *type &= ~SELECT_URG;
+ return 0;
+}
+
+/* Truncate file. */
+error_t
+trivfs_S_file_set_size (struct trivfs_protid *cred,
+ mach_port_t reply, mach_msg_type_name_t reply_type,
+ off_t size)
+{
+ if (! cred)
+ return EOPNOTSUPP;
+ else
+ return 0;
+}
+
+/* These four routines modify the O_APPEND, O_ASYNC, O_FSYNC, and
+ O_NONBLOCK bits for the IO object. In addition, io_get_openmodes
+ will tell you which of O_READ, O_WRITE, and O_EXEC the object can
+ be used for. The O_ASYNC bit affects icky async I/O; good async
+ I/O is done through io_async which is orthogonal to these calls. */
+
+error_t
+trivfs_S_io_get_openmodes (struct trivfs_protid *cred,
+ mach_port_t reply, mach_msg_type_name_t reply_type,
+ int *bits)
+{
+ if (! cred)
+ return EOPNOTSUPP;
+ else
+ {
+ *bits = cred->po->openmodes;
+ return 0;
+ }
+}
+
+error_t
+trivfs_S_io_set_all_openmodes (struct trivfs_protid *cred,
+ mach_port_t reply,
+ mach_msg_type_name_t reply_type,
+ int mode)
+{
+ if (! cred)
+ return EOPNOTSUPP;
+ else
+ return 0;
+}
+
+error_t
+trivfs_S_io_set_some_openmodes (struct trivfs_protid *cred,
+ mach_port_t reply,
+ mach_msg_type_name_t reply_type,
+ int bits)
+{
+ if (! cred)
+ return EOPNOTSUPP;
+ else
+ return 0;
+}
+
+error_t
+trivfs_S_io_clear_some_openmodes (struct trivfs_protid *cred,
+ mach_port_t reply,
+ mach_msg_type_name_t reply_type,
+ int bits)
+{
+ if (! cred)
+ return EOPNOTSUPP;
+ else
+ return 0;
+}
+
+/* Get/set the owner of the IO object. For terminals, this affects
+ controlling terminal behavior (see term_become_ctty). For all
+ objects this affects old-style async IO. Negative values represent
+ pgrps. This has nothing to do with the owner of a file (as
+ returned by io_stat, and as used for various permission checks by
+ filesystems). An owner of 0 indicates that there is no owner. */
+error_t
+trivfs_S_io_get_owner (struct trivfs_protid *cred,
+ mach_port_t reply,
+ mach_msg_type_name_t reply_type,
+ pid_t *owner)
+{
+ if (! cred)
+ return EOPNOTSUPP;
+ else
+ {
+ struct open *open = (struct open *)cred->po->hook;
+ *owner = open->dev->owner; /* atomic word fetch */
+ return 0;
+ }
+}
+
+error_t
+trivfs_S_io_mod_owner (struct trivfs_protid *cred,
+ mach_port_t reply, mach_msg_type_name_t reply_type,
+ pid_t owner)
+{
+ if (! cred)
+ return EOPNOTSUPP;
+ else
+ {
+ struct open *open = (struct open *)cred->po->hook;
+ open->dev->owner = owner; /* atomic word store */
+ return 0;
+ }
+}
+
+/* File syncing operations; these all do the same thing, sync the underlying
+ device. */
+
+error_t
+trivfs_S_file_sync (struct trivfs_protid *cred,
+ mach_port_t reply, mach_msg_type_name_t reply_type,
+ int wait, int omit_metadata)
+{
+ if (cred)
+ return dev_sync (((struct open *)cred->po->hook)->dev, wait);
+ else
+ return EOPNOTSUPP;
+}
+
+error_t
+trivfs_S_file_syncfs (struct trivfs_protid *cred,
+ mach_port_t reply, mach_msg_type_name_t reply_type,
+ int wait, int dochildren)
+{
+ if (cred)
+ return dev_sync (((struct open *)cred->po->hook)->dev, wait);
+ else
+ return EOPNOTSUPP;
+}
+
+error_t
+trivfs_S_file_get_storage_info (struct trivfs_protid *cred,
+ mach_port_t reply,
+ mach_msg_type_name_t reply_type,
+ mach_port_t **ports,
+ mach_msg_type_name_t *ports_type,
+ mach_msg_type_number_t *num_ports,
+ int **ints, mach_msg_type_number_t *num_ints,
+ off_t **offsets,
+ mach_msg_type_number_t *num_offsets,
+ char **data, mach_msg_type_number_t *data_len)
+{
+ *ports_type = MACH_MSG_TYPE_COPY_SEND;
+
+ if (! cred || ! cred->po->hook)
+ return EOPNOTSUPP;
+ else
+ {
+ error_t err;
+ struct dev *dev = ((struct open *)cred->po->hook)->dev;
+ struct store *store = dev->store;
+
+ if (dev->enforced && !(store->flags & STORE_ENFORCED))
+ {
+ /* The --enforced switch tells us not to let anyone
+ get at the device, no matter how trustable they are. */
+ size_t name_len = (store->name ? strlen (store->name) + 1 : 0);
+ int i;
+ *num_ports = 0;
+ i = 0;
+ (*ints)[i++] = STORAGE_OTHER;
+ (*ints)[i++] = store->flags;
+ (*ints)[i++] = store->block_size;
+ (*ints)[i++] = 1; /* num_runs */
+ (*ints)[i++] = name_len;
+ (*ints)[i++] = 0; /* misc_len */
+ *num_ints = i;
+ i = 0;
+ (*offsets)[i++] = 0;
+ (*offsets)[i++] = store->size;
+ *num_offsets = i;
+ if (store->name)
+ memcpy (*data, store->name, name_len);
+ *data_len = name_len;
+ return 0;
+ }
+
+ if (!cred->isroot
+ && !store_is_securely_returnable (store, cred->po->openmodes))
+ {
+ struct store *clone;
+ err = store_clone (store, &clone);
+ if (! err)
+ {
+ err = store_set_flags (clone, STORE_INACTIVE);
+ if (err == EINVAL)
+ err = EACCES;
+ else
+ err = store_return (clone,
+ ports, num_ports, ints, num_ints,
+ offsets, num_offsets, data, data_len);
+ store_free (clone);
+ }
+ }
+ else
+ err = store_return (store,
+ ports, num_ports, ints, num_ints,
+ offsets, num_offsets, data, data_len);
+
+ return err;
+ }
+}
diff --git a/storeio/open.c b/storeio/open.c
new file mode 100644
index 00000000..805115ce
--- /dev/null
+++ b/storeio/open.c
@@ -0,0 +1,127 @@
+/* Per-open information for storeio
+
+ Copyright (C) 1995, 1996, 2006 Free Software Foundation, Inc.
+
+ Written by Miles Bader <miles@gnu.ai.mit.edu>
+
+ This program is free software; you can redistribute it and/or
+ modify it under the terms of the GNU General Public License as
+ published by the Free Software Foundation; either version 2, or (at
+ your option) any later version.
+
+ This program is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
+
+#include <hurd.h>
+#include <stdio.h>
+
+#include "open.h"
+#include "dev.h"
+
+/* Returns a new per-open structure for the device DEV in OPEN. If an error
+ occurs, the error-code is returned, otherwise 0. */
+error_t
+open_create (struct dev *dev, struct open **open)
+{
+ *open = malloc (sizeof (struct open));
+ if (*open == NULL)
+ return ENOMEM;
+
+ (*open)->dev = dev;
+ (*open)->offs = 0;
+ mutex_init (&(*open)->lock);
+
+ return 0;
+}
+
+/* Free OPEN and any resources it holds. */
+void
+open_free (struct open *open)
+{
+ free (open);
+}
+
+/* Writes up to LEN bytes from BUF to OPEN's device at device offset OFFS
+ (which may be ignored if the device doesn't support random access),
+ and returns the number of bytes written in AMOUNT. If no error occurs,
+ zero is returned, otherwise the error code is returned. */
+error_t
+open_write (struct open *open, off_t offs, void *buf, size_t len,
+ vm_size_t *amount)
+{
+ error_t err;
+ if (offs < 0)
+ /* Use OPEN's offset. */
+ {
+ mutex_lock (&open->lock);
+ err = dev_write (open->dev, open->offs, buf, len, amount);
+ if (! err)
+ open->offs += *amount;
+ mutex_unlock (&open->lock);
+ }
+ else
+ err = dev_write (open->dev, offs, buf, len, amount);
+ return err;
+}
+
+/* Reads up to AMOUNT bytes from the device into BUF and LEN using the
+ standard mach out-array convention. If no error occurs, zero is returned,
+ otherwise the error code is returned. */
+error_t
+open_read (struct open *open, off_t offs, size_t amount,
+ void **buf, vm_size_t *len)
+{
+ error_t err;
+ if (offs < 0)
+ /* Use OPEN's offset. */
+ {
+ mutex_lock (&open->lock);
+ err = dev_read (open->dev, open->offs, amount, buf, len);
+ if (! err)
+ open->offs += *len;
+ mutex_unlock (&open->lock);
+ }
+ else
+ err = dev_read (open->dev, offs, amount, buf, len);
+ return err;
+}
+
+/* Set OPEN's location to OFFS, interpreted according to WHENCE as by seek.
+ The new absolute location is returned in NEW_OFFS (and may not be the same
+ as OFFS). If no error occurs, zero is returned, otherwise the error code
+ is returned. */
+error_t
+open_seek (struct open *open, off_t offs, int whence, off_t *new_offs)
+{
+ error_t err = 0;
+
+ mutex_lock (&open->lock);
+
+ switch (whence)
+ {
+ case SEEK_CUR:
+ offs += open->offs;
+ goto check;
+ case SEEK_END:
+ offs += open->dev->store->size;
+ case SEEK_SET:
+ check:
+ if (offs >= 0)
+ {
+ *new_offs = open->offs = offs;
+ break;
+ }
+ default:
+ err = EINVAL;
+ }
+
+ mutex_unlock (&open->lock);
+
+ return err;
+}
diff --git a/storeio/open.h b/storeio/open.h
new file mode 100644
index 00000000..cbac2a37
--- /dev/null
+++ b/storeio/open.h
@@ -0,0 +1,68 @@
+/* Per-open information for storeio
+
+ Copyright (C) 1995, 1996 Free Software Foundation, Inc.
+
+ Written by Miles Bader <miles@gnu.ai.mit.edu>
+
+ This program is free software; you can redistribute it and/or
+ modify it under the terms of the GNU General Public License as
+ published by the Free Software Foundation; either version 2, or (at
+ your option) any later version.
+
+ This program is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
+
+#ifndef __OPEN_H__
+#define __OPEN_H__
+
+#include "dev.h"
+
+/* ---------------------------------------------------------------- */
+
+/* A structure describing a particular i/o stream on this device. */
+struct open
+{
+ /* The device that this an open on. */
+ struct dev *dev;
+
+ /* The per-open offset used for I/O operations that don't specify an
+ explicit offset. */
+ off_t offs;
+
+ /* A lock used to control write access to OFFS. */
+ struct mutex lock;
+};
+
+/* Returns a new per-open structure for the device DEV in OPEN. If an error
+ occurs, the error-code is returned, otherwise 0. */
+error_t open_create (struct dev *dev, struct open **open);
+
+/* Free OPEN and any resources it holds. */
+void open_free (struct open *open);
+
+/* Writes up to LEN bytes from BUF to OPEN's device at device offset OFFS
+ (which may be ignored if the device doesn't support random access),
+ and returns the number of bytes written in AMOUNT. If no error occurs,
+ zero is returned, otherwise the error code is returned. */
+error_t open_write (struct open *open, off_t offs, void *buf, size_t len,
+ size_t *amount);
+
+/* Reads up to AMOUNT bytes from the device into BUF and BUF_LEN using the
+ standard mach out-array convention. If no error occurs, zero is returned,
+ otherwise the error code is returned. */
+error_t open_read (struct open *open, off_t offs, size_t amount,
+ void **buf, size_t *buf_len);
+
+/* Set OPEN's location to OFFS, interpreted according to WHENCE as by seek.
+ The new absolute location is returned in NEW_OFFS (and may not be the same
+ as OFFS). If no error occurs, zero is returned, otherwise the error code
+ is returned. */
+error_t open_seek (struct open *open, off_t offs, int whence, off_t *new_offs);
+
+#endif /* !__OPEN_H__ */
diff --git a/storeio/pager.c b/storeio/pager.c
new file mode 100644
index 00000000..1fb1d07e
--- /dev/null
+++ b/storeio/pager.c
@@ -0,0 +1,265 @@
+/* Paging interface for storeio devices
+
+ Copyright (C) 1995,96,97,99,2002 Free Software Foundation, Inc.
+
+ Written by Miles Bader <miles@gnu.ai.mit.edu>
+
+ This program is free software; you can redistribute it and/or
+ modify it under the terms of the GNU General Public License as
+ published by the Free Software Foundation; either version 2, or (at
+ your option) any later version.
+
+ This program is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
+
+#include <hurd.h>
+#include <hurd/pager.h>
+#include <assert.h>
+#include <strings.h>
+#include <unistd.h>
+#include <errno.h>
+#include <sys/mman.h>
+
+#include "dev.h"
+
+/* ---------------------------------------------------------------- */
+/* Pager library callbacks; see <hurd/pager.h> for more info. */
+
+/* For pager PAGER, read one page from offset PAGE. Set *BUF to be the
+ address of the page, and set *WRITE_LOCK if the page must be provided
+ read-only. The only permissible error returns are EIO, EDQUOT, and
+ ENOSPC. */
+error_t
+pager_read_page (struct user_pager_info *upi,
+ vm_offset_t page, vm_address_t *buf, int *writelock)
+{
+ error_t err;
+ size_t read = 0; /* bytes actually read */
+ int want = vm_page_size; /* bytes we want to read */
+ struct dev *dev = (struct dev *)upi;
+ struct store *store = dev->store;
+
+ if (page + want > store->size)
+ /* Read a partial page if necessary to avoid reading off the end. */
+ want = store->size - page;
+
+ err = dev_read (dev, page, want, (void **)buf, &read);
+
+ if (!err && want < vm_page_size)
+ /* Zero anything we didn't read. Allocation only happens in page-size
+ multiples, so we know we can write there. */
+ memset ((char *)*buf + want, '\0', vm_page_size - want);
+
+ *writelock = (store->flags & STORE_READONLY);
+
+ if (err || read < want)
+ return EIO;
+ else
+ return 0;
+}
+
+/* For pager PAGER, synchronously write one page from BUF to offset PAGE. In
+ addition, vm_deallocate (or equivalent) BUF. The only permissible error
+ returns are EIO, EDQUOT, and ENOSPC. */
+error_t
+pager_write_page (struct user_pager_info *upi,
+ vm_offset_t page, vm_address_t buf)
+{
+ struct dev *dev = (struct dev *)upi;
+ struct store *store = dev->store;
+
+ if (store->flags & STORE_READONLY)
+ return EROFS;
+ else
+ {
+ error_t err;
+ size_t written;
+ int want = vm_page_size;
+
+ if (page + want > store->size)
+ /* Write a partial page if necessary to avoid reading off the end. */
+ want = store->size - page;
+
+ err = dev_write (dev, page, (char *)buf, want, &written);
+
+ munmap ((caddr_t) buf, vm_page_size);
+
+ if (err || written < want)
+ return EIO;
+ else
+ return 0;
+ }
+}
+
+/* A page should be made writable. */
+error_t
+pager_unlock_page (struct user_pager_info *upi, vm_offset_t address)
+{
+ struct dev *dev = (struct dev *)upi;
+
+ if (dev->store->flags & STORE_READONLY)
+ return EROFS;
+ else
+ return 0;
+}
+
+/* The user must define this function. It should report back (in
+ *OFFSET and *SIZE the minimum valid address the pager will accept
+ and the size of the object. */
+error_t
+pager_report_extent (struct user_pager_info *upi,
+ vm_address_t *offset, vm_size_t *size)
+{
+ *offset = 0;
+ *size = ((struct dev *)upi)->store->size;
+ return 0;
+}
+
+/* This is called when a pager is being deallocated after all extant send
+ rights have been destroyed. */
+void
+pager_clear_user_data (struct user_pager_info *upi)
+{
+ struct dev *dev = (struct dev *)upi;
+ mutex_lock (&dev->pager_lock);
+ dev->pager = 0;
+ mutex_unlock (&dev->pager_lock);
+}
+
+static struct port_bucket *pager_port_bucket = 0;
+
+/* A top-level function for the paging thread that just services paging
+ requests. */
+static void
+service_paging_requests (any_t arg)
+{
+ for (;;)
+ ports_manage_port_operations_multithread (pager_port_bucket,
+ pager_demuxer,
+ 1000 * 30, 1000 * 60 * 5, 0);
+}
+
+/* Initialize paging for this device. */
+static void
+init_dev_paging ()
+{
+ if (! pager_port_bucket)
+ {
+ static struct mutex pager_global_lock = MUTEX_INITIALIZER;
+
+ mutex_lock (&pager_global_lock);
+ if (pager_port_bucket == NULL)
+ {
+ pager_port_bucket = ports_create_bucket ();
+
+ /* Make a thread to service paging requests. */
+ cthread_detach (cthread_fork ((cthread_fn_t)service_paging_requests,
+ (any_t)0));
+ }
+ mutex_unlock (&pager_global_lock);
+ }
+}
+
+void
+pager_dropweak (struct user_pager_info *upi __attribute__ ((unused)))
+{
+}
+
+/* Try to stop all paging activity on DEV, returning true if we were
+ successful. If NOSYNC is true, then we won't write back any (kernel)
+ cached pages to the device. */
+int
+dev_stop_paging (struct dev *dev, int nosync)
+{
+ size_t num_pagers = (pager_port_bucket ?
+ ports_count_bucket (pager_port_bucket) : 0);
+
+ if (num_pagers > 0 && !nosync)
+ {
+ error_t block_cache (void *arg)
+ {
+ struct pager *p = arg;
+ pager_change_attributes (p, 0, MEMORY_OBJECT_COPY_DELAY, 1);
+ return 0;
+ }
+ error_t enable_cache (void *arg)
+ {
+ struct pager *p = arg;
+ pager_change_attributes (p, 1, MEMORY_OBJECT_COPY_DELAY, 0);
+ return 0;
+ }
+
+ /* Loop through the pagers and turn off caching one by one,
+ synchronously. That should cause termination of each pager. */
+ ports_bucket_iterate (pager_port_bucket, block_cache);
+
+ /* Give it a second; the kernel doesn't actually shutdown
+ immediately. XXX */
+ sleep (1);
+
+ num_pagers = ports_count_bucket (pager_port_bucket);
+ if (num_pagers > 0)
+ /* Darn, there are actual honest users. Turn caching back on,
+ and return failure. */
+ ports_bucket_iterate (pager_port_bucket, enable_cache);
+ }
+
+ return num_pagers == 0;
+}
+
+/* Returns in MEMOBJ the port for a memory object backed by the storage on
+ DEV. Returns 0 or the error code if an error occurred. */
+error_t
+dev_get_memory_object (struct dev *dev, vm_prot_t prot, memory_object_t *memobj)
+{
+ error_t err = store_map (dev->store, prot, memobj);
+
+ if (err == EOPNOTSUPP && !dev->inhibit_cache)
+ {
+ int created = 0;
+
+ init_dev_paging ();
+
+ mutex_lock (&dev->pager_lock);
+
+ if (dev->pager == NULL)
+ {
+ dev->pager =
+ pager_create ((struct user_pager_info *)dev, pager_port_bucket,
+ 1, MEMORY_OBJECT_COPY_DELAY);
+ if (dev->pager == NULL)
+ {
+ mutex_unlock (&dev->pager_lock);
+ return errno;
+ }
+ created = 1;
+ }
+
+ *memobj = pager_get_port (dev->pager);
+
+ if (*memobj == MACH_PORT_NULL)
+ /* Pager is currently being destroyed, try again. */
+ {
+ dev->pager = 0;
+ mutex_unlock (&dev->pager_lock);
+ return dev_get_memory_object (dev, prot, memobj);
+ }
+ else
+ err =
+ mach_port_insert_right (mach_task_self (),
+ *memobj, *memobj, MACH_MSG_TYPE_MAKE_SEND);
+
+ if (created)
+ ports_port_deref (dev->pager);
+
+ mutex_unlock (&dev->pager_lock);
+ }
+
+ return err;
+}
diff --git a/storeio/storeio.c b/storeio/storeio.c
new file mode 100644
index 00000000..a88c8e43
--- /dev/null
+++ b/storeio/storeio.c
@@ -0,0 +1,425 @@
+/* A translator for doing I/O to stores
+
+ Copyright (C) 1995,96,97,98,99,2000,01,02 Free Software Foundation, Inc.
+ Written by Miles Bader <miles@gnu.org>
+
+ This program is free software; you can redistribute it and/or
+ modify it under the terms of the GNU General Public License as
+ published by the Free Software Foundation; either version 2, or (at
+ your option) any later version.
+
+ This program is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
+
+#include <stdio.h>
+#include <error.h>
+#include <assert.h>
+#include <fcntl.h>
+#include <argp.h>
+#include <argz.h>
+
+#include <hurd.h>
+#include <hurd/ports.h>
+#include <hurd/trivfs.h>
+#include <version.h>
+
+#include "open.h"
+#include "dev.h"
+
+static struct argp_option options[] =
+{
+ {"readonly", 'r', 0, 0,"Disallow writing"},
+ {"writable", 'w', 0, 0,"Allow writing"},
+ {"no-cache", 'c', 0, 0,"Never cache data--user io does direct device io"},
+ {"no-file-io", 'F', 0, 0,"Never perform io via plain file io RPCs"},
+ {"no-fileio", 0, 0, OPTION_ALIAS | OPTION_HIDDEN},
+ {"enforced", 'e', 0, 0,"Never reveal underlying devices, even to root"},
+ {"rdev", 'n', "ID", 0,
+ "The stat rdev number for this node; may be either a"
+ " single integer, or of the form MAJOR,MINOR"},
+ {0}
+};
+static const char doc[] = "Translator for devices and other stores";
+
+const char *argp_program_version = STANDARD_HURD_VERSION (storeio);
+
+/* Desired store parameters specified by the user. */
+struct storeio_argp_params
+{
+ struct store_argp_params store_params; /* Filled in by store_argp parser. */
+ struct dev *dev; /* We fill in its flag members. */
+};
+
+/* Parse a single option. */
+static error_t
+parse_opt (int key, char *arg, struct argp_state *state)
+{
+ struct storeio_argp_params *params = state->input;
+
+ switch (key)
+ {
+
+ case 'r': params->dev->readonly = 1; break;
+ case 'w': params->dev->readonly = 0; break;
+
+ case 'c': params->dev->inhibit_cache = 1; break;
+ case 'e': params->dev->enforced = 1; break;
+ case 'F': params->dev->no_fileio = 1; break;
+
+ case 'n':
+ {
+ char *start = arg, *end;
+ dev_t rdev;
+
+ rdev = strtoul (start, &end, 0);
+ if (*end == ',')
+ /* MAJOR,MINOR form */
+ {
+ start = end + 1;
+ rdev = makedev (rdev, strtoul (start, &end, 0));
+ }
+
+ if (end == start || *end != '\0')
+ {
+ argp_error (state, "%s: Invalid argument to --rdev", arg);
+ return EINVAL;
+ }
+
+ params->dev->rdev = rdev;
+ }
+ break;
+
+ case ARGP_KEY_INIT:
+ /* Now store_argp's parser will get to initialize its state.
+ The default_type member is our input parameter to it. */
+ bzero (&params->store_params, sizeof params->store_params);
+ params->store_params.default_type = "device";
+ params->store_params.store_optional = 1;
+ state->child_inputs[0] = &params->store_params;
+ break;
+
+ case ARGP_KEY_SUCCESS:
+ params->dev->store_name = params->store_params.result;
+ break;
+
+ default:
+ return ARGP_ERR_UNKNOWN;
+ }
+ return 0;
+}
+
+static const struct argp_child argp_kids[] = { { &store_argp }, {0} };
+static const struct argp argp = { options, parse_opt, 0, doc, argp_kids };
+
+struct trivfs_control *storeio_fsys;
+
+int
+main (int argc, char *argv[])
+{
+ error_t err;
+ mach_port_t bootstrap;
+ struct dev device;
+ struct storeio_argp_params params;
+
+ bzero (&device, sizeof device);
+ mutex_init (&device.lock);
+
+ params.dev = &device;
+ argp_parse (&argp, argc, argv, 0, 0, &params);
+
+ task_get_bootstrap_port (mach_task_self (), &bootstrap);
+ if (bootstrap == MACH_PORT_NULL)
+ error (2, 0, "Must be started as a translator");
+
+ /* Reply to our parent */
+ err = trivfs_startup (bootstrap, 0, 0, 0, 0, 0, &storeio_fsys);
+ if (err)
+ error (3, err, "trivfs_startup");
+
+ storeio_fsys->hook = &device;
+
+ /* Launch. */
+ ports_manage_port_operations_multithread (storeio_fsys->pi.bucket,
+ trivfs_demuxer,
+ 30*1000, 5*60*1000, 0);
+
+ return 0;
+}
+
+error_t
+trivfs_append_args (struct trivfs_control *trivfs_control,
+ char **argz, size_t *argz_len)
+{
+ struct dev *const dev = trivfs_control->hook;
+ error_t err = 0;
+
+ if (dev->rdev != (dev_t) 0)
+ {
+ char buf[40];
+ snprintf (buf, sizeof buf, "--rdev=%d,%d",
+ major (dev->rdev), minor (dev->rdev));
+ err = argz_add (argz, argz_len, buf);
+ }
+
+ if (!err && dev->inhibit_cache)
+ err = argz_add (argz, argz_len, "--no-cache");
+
+ if (!err && dev->enforced)
+ err = argz_add (argz, argz_len, "--enforced");
+
+ if (!err && dev->no_fileio)
+ err = argz_add (argz, argz_len, "--no-file-io");
+
+ if (! err)
+ err = argz_add (argz, argz_len,
+ dev->readonly ? "--readonly" : "--writable");
+
+ if (! err)
+ err = store_parsed_append_args (dev->store_name, argz, argz_len);
+
+ return err;
+}
+
+/* Called whenever a new lookup is done of our node. The only reason we
+ set this hook is to duplicate the check done normally done against
+ trivfs_allow_open in trivfs_S_fsys_getroot, but looking at the
+ per-device state. This gets checked again in check_open_hook, but this
+ hook runs before a little but more overhead gets incurred. In the
+ success case, we just return EAGAIN to have trivfs_S_fsys_getroot
+ continue with its generic processing. */
+static error_t
+getroot_hook (struct trivfs_control *cntl,
+ mach_port_t reply_port,
+ mach_msg_type_name_t reply_port_type,
+ mach_port_t dotdot,
+ uid_t *uids, u_int nuids, uid_t *gids, u_int ngids,
+ int flags,
+ retry_type *do_retry, char *retry_name,
+ mach_port_t *node, mach_msg_type_name_t *node_type)
+{
+ struct dev *const dev = cntl->hook;
+ return (dev_is_readonly (dev) && (flags & O_WRITE)) ? EROFS : EAGAIN;
+}
+
+/* Called whenever someone tries to open our node (even for a stat). We
+ delay opening the kernel device until this point, as we can usefully
+ return errors from here. */
+static error_t
+check_open_hook (struct trivfs_control *trivfs_control,
+ struct iouser *user,
+ int flags)
+{
+ struct dev *const dev = trivfs_control->hook;
+ error_t err = 0;
+
+ if (!err && dev_is_readonly (dev) && (flags & O_WRITE))
+ return EROFS;
+
+ mutex_lock (&dev->lock);
+ if (dev->store == NULL)
+ {
+ /* Try and open the store. */
+ err = dev_open (dev);
+ if (err && (flags & (O_READ|O_WRITE)) == 0)
+ /* If we're not opening for read or write, then just ignore the
+ error, as this allows stat to work correctly. XXX */
+ err = 0;
+ }
+ mutex_unlock (&dev->lock);
+
+ return err;
+}
+
+static error_t
+open_hook (struct trivfs_peropen *peropen)
+{
+ error_t err = 0;
+ struct dev *const dev = peropen->cntl->hook;
+
+ if (dev->store)
+ {
+ mutex_lock (&dev->lock);
+ if (dev->nperopens++ == 0)
+ err = store_clear_flags (dev->store, STORE_INACTIVE);
+ mutex_unlock (&dev->lock);
+ if (!err)
+ err = open_create (dev, (struct open **)&peropen->hook);
+ }
+ return err;
+}
+
+static void
+close_hook (struct trivfs_peropen *peropen)
+{
+ struct dev *const dev = peropen->cntl->hook;
+
+ if (peropen->hook)
+ {
+ mutex_lock (&dev->lock);
+ if (--dev->nperopens == 0)
+ store_set_flags (dev->store, STORE_INACTIVE);
+ mutex_unlock (&dev->lock);
+ open_free (peropen->hook);
+ }
+}
+
+/* ---------------------------------------------------------------- */
+/* Trivfs hooks */
+
+int trivfs_fstype = FSTYPE_DEV;
+int trivfs_fsid = 0;
+
+int trivfs_support_read = 1;
+int trivfs_support_write = 1;
+int trivfs_support_exec = 0;
+
+int trivfs_allow_open = O_READ | O_WRITE;
+
+void
+trivfs_modify_stat (struct trivfs_protid *cred, struct stat *st)
+{
+ struct dev *const dev = cred->po->cntl->hook;
+ struct open *open = cred->po->hook;
+
+ st->st_mode &= ~S_IFMT;
+
+ if (open)
+ /* An open device. */
+ {
+ struct store *store = open->dev->store;
+ store_offset_t size = store->size;
+
+ if (store->block_size > 1)
+ st->st_blksize = store->block_size;
+
+ st->st_size = size;
+ st->st_mode |= ((dev->inhibit_cache || store->block_size == 1)
+ ? S_IFCHR : S_IFBLK);
+ }
+ else
+ /* Try and do things without an open device... */
+ {
+ st->st_blksize = 0;
+ st->st_size = 0;
+
+ st->st_mode |= dev->inhibit_cache ? S_IFCHR : S_IFBLK;
+ }
+
+ st->st_rdev = dev->rdev;
+ if (dev_is_readonly (dev))
+ st->st_mode &= ~(S_IWUSR | S_IWGRP | S_IWOTH);
+}
+
+error_t
+trivfs_goaway (struct trivfs_control *fsys, int flags)
+{
+ struct dev *const device = fsys->hook;
+ error_t err;
+ int force = (flags & FSYS_GOAWAY_FORCE);
+ int nosync = (flags & FSYS_GOAWAY_NOSYNC);
+ struct port_class *root_port_class = fsys->protid_class;
+
+ mutex_lock (&device->lock);
+
+ if (device->store == NULL)
+ /* The device is not actually open.
+ XXX note that exitting here nukes non-io users, like someone
+ in the middle of a stat who will get SIGLOST or something. */
+ exit (0);
+
+ /* Wait until all pending rpcs are done. */
+ err = ports_inhibit_class_rpcs (root_port_class);
+ if (err == EINTR || (err && !force))
+ {
+ mutex_unlock (&device->lock);
+ return err;
+ }
+
+ if (force && nosync)
+ /* Exit with extreme prejudice. */
+ exit (0);
+
+ if (!force && ports_count_class (root_port_class) > 0)
+ /* Still users, so don't exit. */
+ goto busy;
+
+ if (! nosync)
+ /* Sync the device here, if necessary, so that closing it won't result in
+ any I/O (which could get hung up trying to use one of our pagers). */
+ dev_sync (device, 1);
+
+ /* devpager_shutdown may sync the pagers as side-effect (if NOSYNC is 0),
+ so we put that first in this test. */
+ if (dev_stop_paging (device, nosync) || force)
+ /* Bye-bye. */
+ {
+ if (! nosync)
+ /* If NOSYNC is true, we don't close DEV, as that could cause data to
+ be written back. */
+ dev_close (device);
+ exit (0);
+ }
+
+ busy:
+ /* Allow normal operations to proceed. */
+ ports_enable_class (root_port_class);
+ ports_resume_class_rpcs (root_port_class);
+ mutex_unlock (&device->lock);
+
+ /* Complain that there are still users. */
+ return EBUSY;
+}
+
+/* If this variable is set, it is called by trivfs_S_fsys_getroot before any
+ other processing takes place; if the return value is EAGAIN, normal trivfs
+ getroot processing continues, otherwise the rpc returns with that return
+ value. */
+error_t (*trivfs_getroot_hook) (struct trivfs_control *cntl,
+ mach_port_t reply_port,
+ mach_msg_type_name_t reply_port_type,
+ mach_port_t dotdot,
+ uid_t *uids, u_int nuids, uid_t *gids, u_int ngids,
+ int flags,
+ retry_type *do_retry, char *retry_name,
+ mach_port_t *node, mach_msg_type_name_t *node_type)
+ = getroot_hook;
+
+/* If this variable is set, it is called every time an open happens.
+ USER and FLAGS are from the open; CNTL identifies the
+ node being opened. This call need not check permissions on the underlying
+ node. If the open call should block, then return EWOULDBLOCK. Other
+ errors are immediately reflected to the user. If O_NONBLOCK
+ is not set in FLAGS and EWOULDBLOCK is returned, then call
+ trivfs_complete_open when all pending open requests for this
+ file can complete. */
+error_t (*trivfs_check_open_hook)(struct trivfs_control *trivfs_control,
+ struct iouser *user,
+ int flags)
+ = check_open_hook;
+
+/* If this variable is set, it is called every time a new peropen
+ structure is created and initialized. */
+error_t (*trivfs_peropen_create_hook)(struct trivfs_peropen *) = open_hook;
+
+/* If this variable is set, it is called every time a peropen structure
+ is about to be destroyed. */
+void (*trivfs_peropen_destroy_hook) (struct trivfs_peropen *) = close_hook;
+
+/* Sync this filesystem. */
+kern_return_t
+trivfs_S_fsys_syncfs (struct trivfs_control *cntl,
+ mach_port_t reply, mach_msg_type_name_t replytype,
+ int wait, int dochildren)
+{
+ struct dev *dev = cntl->hook;
+ if (dev)
+ return dev_sync (dev, wait);
+ else
+ return 0;
+}