aboutsummaryrefslogtreecommitdiff
path: root/storeio
diff options
context:
space:
mode:
Diffstat (limited to 'storeio')
-rw-r--r--storeio/ChangeLog230
-rw-r--r--storeio/Makefile29
-rw-r--r--storeio/dev.c459
-rw-r--r--storeio/dev.h107
-rw-r--r--storeio/io.c366
-rw-r--r--storeio/open.c124
-rw-r--r--storeio/open.h68
-rw-r--r--storeio/pager.c264
-rw-r--r--storeio/storeio.c363
9 files changed, 2010 insertions, 0 deletions
diff --git a/storeio/ChangeLog b/storeio/ChangeLog
new file mode 100644
index 00000000..0b351f50
--- /dev/null
+++ b/storeio/ChangeLog
@@ -0,0 +1,230 @@
+1999-11-24 Roland McGrath <roland@baalperazim.frob.com>
+
+ * storeio.c (trivfs_modify_stat): Clear writable bits if open store is
+ readonly, not just if we got the --readonly switch.
+
+1999-11-20 Roland McGrath <roland@baalperazim.frob.com>
+
+ * dev.c (dev_read, dev_write): In DEV->inhibit_cache case,
+ handle zero (i.e. unknown) block_size by treating it as 1.
+ Use shift and bitwise-and rather than multiply and modulus
+ for block size arithmetic, since it's a known power of two.
+
+1999-11-14 Roland McGrath <roland@baalperazim.frob.com>
+
+ * io.c (trivfs_S_file_get_storage_info): Fail with EOPNOTSUPP when
+ there is no store open.
+
+ * storeio.c (check_open_hook): Don't check for D_NO_SUCH_DEVICE here.
+ Translation to ENXIO now done in libstore.
+
+ * dev.h (struct dev): New member `enforced'.
+ * storeio.c (enforce_store): New variable.
+ (options, parse_opt, trivfs_append_args): Grok new option
+ --enforced/-e to set enforce_store.
+ (check_open_hook): Set DEVICE->enforced from enforce_store.
+ * io.c (trivfs_S_file_get_storage_info): If DEV->enforced is set
+ and the store flags lack STORE_ENFORCED, then return STORAGE_OTHER.
+
+1999-09-13 Roland McGrath <roland@baalperazim.frob.com>
+
+ * io.c: Reverted changes related to io_map_segment.
+
+1999-09-07 Thomas Bushnell, BSG <tb@mit.edu>
+
+ * io.c (trivfs_S_io_map): Renamed to ...
+ (trivfs_S_io_map_segment): ... here, and accept parameter `index'.
+
+1999-07-11 Roland McGrath <roland@baalperazim.frob.com>
+
+ * dev.c (dev_read: ensure_buf): Fix sloppy bugs in last change.
+
+1999-07-09 Thomas Bushnell, BSG <tb@mit.edu>
+
+ * dev.c (dev_open): Use mmap instead of vm_allocate.
+ (dev_read): Likewise.
+
+1999-07-10 Roland McGrath <roland@baalperazim.frob.com>
+
+ * dev.c: Add #include <sys/mman.h> for munmap decl.
+ * pager.c: Likewise.
+
+1999-07-03 Thomas Bushnell, BSG <tb@mit.edu>
+
+ * dev.c (dev_buf_fill): Use munmap instead of vm_deallocate.
+ (dev_close): Likewise.
+ (dev_read): Likewise.
+ * pager.c (pager_write_page): Likewise.
+
+1999-05-23 Roland McGrath <roland@baalperazim.frob.com>
+
+ * storeio.c (check_open_hook): Translate D_NO_SUCH_DEVICE to ENXIO.
+
+1999-05-09 Mark Kettenis <kettenis@gnu.org>
+
+ * pager.c (dev_stop_paging): Do not try to count ports in
+ PAGER_PORT_BUCKET if it has not been created.
+
+1999-04-30 Mark Kettenis <kettenis@gnu.org>
+
+ * storeio.c (trivfs_modify_stat): Do not assume that we have the
+ device open in the caculation of st_mode. If the device is not
+ open, only return S_IFCHR if inhibit_cache is set.
+
+1999-01-31 Roland McGrath <roland@baalperazim.frob.com>
+
+ * storeio.c (trivfs_modify_stat): Return S_IFCHR if block size is 1.
+
+ * storeio.c (trivfs_modify_stat): Return S_IFCHR iff inhibit_cache
+ set, otherwsie S_IFBLK. For a block-addressed device, S_IFCHR means
+ "must do whole-block i/o".
+
+1999-01-28 Roland McGrath <roland@baalperazim.frob.com>
+
+ * io.c (trivfs_S_file_syncfs): Fix inverted test.
+ Reported by OKUJI Yoshinori <okuji@kuicr.kyoto-u.ac.jp>.
+
+1999-01-27 Roland McGrath <roland@baalperazim.frob.com>
+
+ * storeio.c (options): New option -c/--no-cache.
+ (inhibit_cache): New variable.
+ (parse_opt): Make -c set it.
+ (trivfs_append_args): Report --no-cache if set.
+ (check_open_hook): Pass inhibit_cache flag to dev_open.
+ * dev.h (struct dev): New member `inhibit_cache'.
+ (dev_open): Update decl.
+ * dev.c (dev_open): Take new arg inhibit_cache, store in new dev.
+ If set, don't initialize buf_offs, io_lock, pager, pager_lock.
+ (dev_read, dev_write): If DEV->inhibit_cache is set, allow only
+ whole-block i/o: EINVAL for non-whole-block attempts.
+ * pager.c (dev_get_memory_object): If DEV->inhibit_cache is set, don't
+ make our own pager; if store_map returns EOPNOTSUPP, so do we.
+
+1998-10-20 Roland McGrath <roland@baalperazim.frob.com>
+
+ * dev.c (dev_buf_fill): Add braces to silence gcc warning.
+
+1998-07-20 Roland McGrath <roland@baalperazim.frob.com>
+
+ * storeio.c (main): Fix return type to int, and use return.
+
+Wed Aug 20 14:07:05 1997 Thomas Bushnell, n/BSG <thomas@gnu.ai.mit.edu>
+
+ * pager.c (dev_get_memory_object): Deal with errors from
+ pager_create properly.
+
+ * pager.c (service_paging_requests): New args for
+ ports_manage_port_operations_multithread.
+ * storeio.c (main): Likewise.
+
+1997-07-23 Miles Bader <miles@gnu.ai.mit.edu>
+
+ * io.c (trivfs_S_io_readable, trivfs_S_io_read, trivfs_S_io_map):
+ Return EBADF instead of EINVAL.
+ (trivfs_S_io_select): Don't bother returning EBADF.
+
+1997-07-22 Miles Bader <miles@gnu.ai.mit.edu>
+
+ * io.c (trivfs_S_io_read): Return EINVAL instead of EBADF.
+ (trivfs_S_io_map): Return EINVAL if CRED cannot be read or written.
+
+1997-07-21 Miles Bader <miles@gnu.ai.mit.edu>
+
+ * Makefile (HURDLIBS): Add iohelp.
+
+1997-07-17 Miles Bader <miles@gnu.ai.mit.edu>
+
+ * io.c (trivfs_S_io_map): Pass PROT argument to dev_get_memory_object.
+ * pager.c (dev_get_memory_object): Add PROT arg. Use store_map.
+ * dev.h (dev_get_memory_object): Add PROT arg.
+
+1997-07-16 Miles Bader <miles@gnu.ai.mit.edu>
+
+ * pager.c (pager_read_page): Initialize READ to 0.
+
+ * pager.c (dev_get_memory_object): Implement correctly.
+ (dev_stop_paging): Work entirely by flushing user pagers; don't kill pager.
+ (pager_clear_user_data): Zero our pointer to the pager.
+
+ * io.c (trivfs_S_io_map): Make sure there are enough references to
+ MEMOBJ for both read & write ports.
+
+Wed Feb 19 21:42:45 1997 Miles Bader <miles@gnu.ai.mit.edu>
+
+ * storeio.c (fsys_port_class, root_port_class, port_bucket,
+ trivfs_protid_portclasses, trivfs_protid_nportclasses,
+ trivfs_cntl_portclasses, trivfs_cntl_nportclasses): Variables removed.
+ (main): Don't set or use port class/bucket variables, let
+ trivfs_startup do it.
+ Add FSYS variable.
+ (doc): Make an initialized array.
+ (parse_opt): New function (was nested in main).
+ (argp, argp_kids): New variables (were nested in main).
+ (trivfs_goaway): Get ROOT_PORT_CLASS from FSYS.
+
+Fri Nov 15 17:54:10 1996 Thomas Bushnell, n/BSG <thomas@gnu.ai.mit.edu>
+
+ * storeio.c (trivfs_check_open_hook): New arg syntax.
+ (check_open_hook): Likewise.
+
+Thu Oct 24 16:24:53 1996 Miles Bader <miles@gnu.ai.mit.edu>
+
+ * storeio.c (main): Use elements of type `struct argp_child' for KIDS.
+
+Mon Oct 7 16:38:06 1996 Miles Bader <miles@gnu.ai.mit.edu>
+
+ * io.c (trivfs_S_file_get_storage_info): If necessary to enforce
+ security, make a copy of STORE and inactivate it before returning.
+
+Thu Sep 26 14:26:17 1996 Miles Bader <miles@gnu.ai.mit.edu>
+
+ * storeio.c (trivfs_S_file_check_access): Function removed (trivfs
+ default is now sufficient).
+
+Tue Sep 24 15:52:04 1996 Miles Bader <miles@gnu.ai.mit.edu>
+
+ * storeio.c (trivfs_S_file_check_access): New function.
+ (check_open_hook): Return EROFS if O_WRITE on a readonly device.
+
+Mon Sep 23 17:21:23 1996 Miles Bader <miles@gnu.ai.mit.edu>
+
+ * storeio.c (trivfs_append_args): Make ARGZ_LEN of type `size_t *'.
+ Narrow scope of BUF.
+ <argz.h>: New include.
+
+Thu Sep 19 18:12:48 1996 Miles Bader <miles@gnu.ai.mit.edu>
+
+ * storeio.c (trivfs_append_args): New function.
+
+Wed Sep 18 15:16:27 1996 Miles Bader <miles@gnu.ai.mit.edu>
+
+ * io.c (trivfs_S_io_read, trivfs_S_io_write): Use void * buffers.
+ * open.h (open_read, open_write): Likewise.
+ * open.c (open_read, open_write): Likewise.
+ * dev.h (struct dev, dev_read, dev_write): Likewise.
+ * dev.c (dev_buf_fill, dev_read, dev_write): Likewise.
+ * pager.c (pager_read_page): Cast BUF into void ** for dev_read.
+
+ * storeio.c (main): Use STORE_PARAMS variable to get result from
+ parsing STORE_ARGP.
+
+ * dev.c (dev_open): Remove CLASSES argument to store_parsed_open.
+
+Thu Sep 19 17:18:11 1996 Thomas Bushnell, n/BSG <thomas@gnu.ai.mit.edu>
+
+ * Makefile (HURDLIBS): New variable.
+ (storeio): Deleted special dependencies.
+
+Fri Sep 13 11:05:06 1996 Miles Bader <miles@gnu.ai.mit.edu>
+
+ * storeio.c (argp_program_version): New variable.
+ <version.h>: New include.
+
+Tue Sep 10 17:13:55 1996 Miles Bader <miles@gnu.ai.mit.edu>
+
+ * dev.c (dev_rw): Return EINVAL instead of EIO for too-large offsets.
+
+ * dev.c (dev_open, dev_close): Remove ifdef'd-out code for allocating
+ DEV's buffer with valloc; other things depend on it being vm_allocated.
+
+ * io.c (trivfs_S_file_get_storage_info): Use store_return.
diff --git a/storeio/Makefile b/storeio/Makefile
new file mode 100644
index 00000000..a3e6227b
--- /dev/null
+++ b/storeio/Makefile
@@ -0,0 +1,29 @@
+# Makefile for storeio
+#
+# Copyright (C) 1995, 1996, 1997 Free Software Foundation, Inc.
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License as
+# published by the Free Software Foundation; either version 2, or (at
+# your option) any later version.
+#
+# This program is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+dir := storeio
+makemode := server
+
+target = storeio
+SRCS = dev.c storeio.c open.c pager.c io.c
+LCLHDRS = dev.h open.h
+
+OBJS = $(SRCS:.c=.o)
+HURDLIBS = trivfs pager ports fshelp iohelp store threads ihash shouldbeinlibc
+
+include ../Makeconf
diff --git a/storeio/dev.c b/storeio/dev.c
new file mode 100644
index 00000000..0a713253
--- /dev/null
+++ b/storeio/dev.c
@@ -0,0 +1,459 @@
+/* store `device' I/O
+
+ Copyright (C) 1995, 1996, 1998, 1999 Free Software Foundation, Inc.
+
+ Written by Miles Bader <miles@gnu.ai.mit.edu>
+
+ This program is free software; you can redistribute it and/or
+ modify it under the terms of the GNU General Public License as
+ published by the Free Software Foundation; either version 2, or (at
+ your option) any later version.
+
+ This program is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
+
+#include <hurd.h>
+#include <assert.h>
+#include <string.h>
+#include <hurd/pager.h>
+#include <hurd/store.h>
+#include <sys/mman.h>
+
+#include "dev.h"
+
+/* These functions deal with the buffer used for doing non-block-aligned I/O. */
+
+static inline int
+dev_buf_is_active (struct dev *dev)
+{
+ return dev->buf_offs >= 0;
+}
+
+/* Invalidate DEV's buffer, writing it to disk if necessary. */
+static error_t
+dev_buf_discard (struct dev *dev)
+{
+ if (dev_buf_is_active (dev))
+ {
+ if (dev->buf_dirty)
+ {
+ size_t amount;
+ struct store *store = dev->store;
+ error_t err =
+ store_write (store, dev->buf_offs >> store->log2_block_size,
+ dev->buf, store->block_size, &amount);
+ if (amount < store->block_size)
+ err = EIO;
+ if (err)
+ return err;
+ dev->buf_dirty = 0;
+ }
+ dev->buf_offs = -1;
+ }
+ return 0;
+}
+
+/* Make DEV's buffer active, reading the block from DEV's store which
+ contains OFFS. */
+static error_t
+dev_buf_fill (struct dev *dev, off_t offs)
+{
+ error_t err;
+ unsigned block_mask = dev->block_mask;
+ void *buf = dev->buf;
+ struct store *store = dev->store;
+ size_t buf_len = store->block_size;
+
+ if (dev_buf_is_active (dev))
+ {
+ if ((dev->buf_offs & ~block_mask) == (offs & ~block_mask))
+ return 0; /* Correct block alredy in buffer. */
+ else
+ {
+ err = dev_buf_discard (dev);
+ if (err)
+ return err;
+ }
+ }
+
+ err = store_read (store, offs >> store->log2_block_size, store->block_size,
+ &buf, &buf_len);
+ if (err)
+ return err;
+
+ if (buf != dev->buf)
+ {
+ munmap (dev->buf, store->block_size);
+ dev->buf = buf;
+ }
+
+ dev->buf_offs = offs & ~block_mask;
+
+ return 0;
+}
+
+/* Do an in-buffer partial-block I/O operation. */
+static error_t
+dev_buf_rw (struct dev *dev, size_t buf_offs, size_t *io_offs, size_t *len,
+ inline error_t (*const buf_rw) (size_t buf_offs,
+ size_t io_offs, size_t len))
+{
+ size_t block_size = dev->store->block_size;
+
+ assert (dev_buf_is_active (dev));
+
+ if (buf_offs + *len >= block_size)
+ /* Only part of BUF lies within the buffer (or everything up
+ to the end of the block, in which case we want to flush
+ the buffer anyway). */
+ {
+ size_t buf_len = block_size - buf_offs;
+ error_t err = (*buf_rw) (buf_offs, *io_offs, buf_len);
+ if (err)
+ return err;
+ *io_offs += buf_len;
+ *len -= buf_len;
+ return dev_buf_discard (dev);
+ }
+ else
+ /* All I/O is within the block. */
+ {
+ error_t err = (*buf_rw) (buf_offs, *io_offs, *len);
+ if (err)
+ return err;
+ *io_offs += *len;
+ *len = 0;
+ return 0;
+ }
+}
+
+/* Returns a pointer to a new device structure in DEV for the kernel device
+ NAME, with the given FLAGS. If BLOCK_SIZE is non-zero, it should be the
+ desired block size, and must be a multiple of the device block size.
+ If an error occurs, the error code is returned, otherwise 0. */
+error_t
+dev_open (struct store_parsed *name, int flags, int inhibit_cache,
+ struct dev **dev)
+{
+ error_t err;
+ struct dev *new = malloc (sizeof (struct dev));
+
+ if (! new)
+ return ENOMEM;
+
+ err = store_parsed_open (name, flags, &new->store);
+ if (err)
+ {
+ free (new);
+ return err;
+ }
+
+ new->buf = mmap (0, new->store->block_size, PROT_READ|PROT_WRITE,
+ MAP_ANON, 0, 0);
+ if (new->buf == (void *) -1)
+ {
+ store_free (new->store);
+ free (new);
+ return ENOMEM;
+ }
+
+ new->inhibit_cache = inhibit_cache;
+ new->owner = 0;
+ if (!inhibit_cache)
+ {
+ new->buf_offs = -1;
+ rwlock_init (&new->io_lock);
+ new->block_mask = (1 << new->store->log2_block_size) - 1;
+ new->pager = 0;
+ mutex_init (&new->pager_lock);
+ }
+ *dev = new;
+
+ return 0;
+}
+
+/* Free DEV and any resources it consumes. */
+void
+dev_close (struct dev *dev)
+{
+ if (!dev->inhibit_cache)
+ {
+ if (dev->pager != NULL)
+ pager_shutdown (dev->pager);
+
+ dev_buf_discard (dev);
+
+ munmap (dev->buf, dev->store->block_size);
+ }
+
+ store_free (dev->store);
+
+ free (dev);
+}
+
+/* Try and write out any pending writes to DEV. If WAIT is true, will wait
+ for any paging activity to cease. */
+error_t
+dev_sync(struct dev *dev, int wait)
+{
+ error_t err;
+
+ if (dev->inhibit_cache)
+ return 0;
+
+ /* Sync any paged backing store. */
+ if (dev->pager != NULL)
+ pager_sync (dev->pager, wait);
+
+ rwlock_writer_lock (&dev->io_lock);
+ err = dev_buf_discard (dev);
+ rwlock_writer_unlock (&dev->io_lock);
+
+ return err;
+}
+
+/* Takes care of buffering I/O to/from DEV for a transfer at position OFFS,
+ length LEN; the amount of I/O sucessfully done is returned in AMOUNT.
+ BUF_RW is called to do I/O that's entirely inside DEV's internal buffer,
+ and RAW_RW to do I/O directly to DEV's store. */
+static inline error_t
+buffered_rw (struct dev *dev, off_t offs, size_t len, size_t *amount,
+ inline error_t (* const buf_rw) (size_t buf_offs,
+ size_t io_offs, size_t len),
+ inline error_t (* const raw_rw) (off_t offs,
+ size_t io_offs, size_t len,
+ size_t *amount))
+{
+ error_t err = 0;
+ unsigned block_mask = dev->block_mask;
+ unsigned block_size = dev->store->block_size;
+ size_t io_offs = 0; /* Offset within this I/O operation. */
+ unsigned block_offs = offs & block_mask; /* Offset within a block. */
+
+ rwlock_writer_lock (&dev->io_lock);
+
+ if (block_offs != 0)
+ /* The start of the I/O isn't block aligned. */
+ {
+ err = dev_buf_fill (dev, offs);
+ if (! err)
+ err = dev_buf_rw (dev, block_offs, &io_offs, &len, buf_rw);
+ }
+
+ if (!err && len > 0)
+ /* Now the I/O should be block aligned. */
+ {
+ if (len >= block_size)
+ {
+ size_t amount;
+ err = dev_buf_discard (dev);
+ if (! err)
+ err =
+ (*raw_rw) (offs + io_offs, io_offs, len & ~block_mask, &amount);
+ if (! err)
+ {
+ io_offs += amount;
+ len -= amount;
+ }
+ }
+ if (len > 0 && len < block_size)
+ /* All full blocks were written successfully, so write
+ the tail end into the buffer. */
+ {
+ err = dev_buf_fill (dev, offs + io_offs);
+ if (! err)
+ err = dev_buf_rw (dev, 0, &io_offs, &len, buf_rw);
+ }
+ }
+
+ if (! err)
+ *amount = io_offs;
+
+ rwlock_writer_unlock (&dev->io_lock);
+
+ return err;
+}
+
+/* Takes care of buffering I/O to/from DEV for a transfer at position OFFS,
+ length LEN, and direction DIR. BUF_RW is called to do I/O to/from data
+ buffered in DEV, and RAW_RW to do I/O directly to DEV's store. */
+static inline error_t
+dev_rw (struct dev *dev, off_t offs, size_t len, size_t *amount,
+ inline error_t (* const buf_rw) (size_t buf_offs,
+ size_t io_offs, size_t len),
+ inline error_t (* const raw_rw) (off_t offs,
+ size_t io_offs, size_t len,
+ size_t *amount))
+{
+ error_t err;
+ unsigned block_mask = dev->block_mask;
+
+ if (offs < 0 || offs > dev->store->size)
+ return EINVAL;
+ else if (offs + len > dev->store->size)
+ len = dev->store->size - offs;
+
+ rwlock_reader_lock (&dev->io_lock);
+ if (dev_buf_is_active (dev)
+ || (offs & block_mask) != 0 || (len & block_mask) != 0)
+ /* Some non-aligned I/O has been done, or is needed, so we need to deal
+ with DEV's buffer, which means getting an exclusive lock. */
+ {
+ /* Aquire a writer lock instead of a reader lock. Note that other
+ writers may have aquired the lock by the time we get it. */
+ rwlock_reader_unlock (&dev->io_lock);
+ err = buffered_rw (dev, offs, len, amount, buf_rw, raw_rw);
+ }
+ else
+ /* Only block-aligned I/O is being done, so things are easy. */
+ {
+ err = (*raw_rw) (offs, 0, len, amount);
+ rwlock_reader_unlock (&dev->io_lock);
+ }
+
+ return err;
+}
+
+/* Write LEN bytes from BUF to DEV, returning the amount actually written in
+ AMOUNT. If successful, 0 is returned, otherwise an error code is
+ returned. */
+error_t
+dev_write (struct dev *dev, off_t offs, void *buf, size_t len,
+ size_t *amount)
+{
+ error_t buf_write (size_t buf_offs, size_t io_offs, size_t len)
+ {
+ bcopy (buf + io_offs, dev->buf + buf_offs, len);
+ dev->buf_dirty = 1;
+ return 0;
+ }
+ error_t raw_write (off_t offs, size_t io_offs, size_t len, size_t *amount)
+ {
+ struct store *store = dev->store;
+ return
+ store_write (store, offs >> store->log2_block_size,
+ buf + io_offs, len, amount);
+ }
+
+ if (dev->inhibit_cache)
+ {
+ /* Under --no-cache, we permit only whole-block writes.
+ Note that in this case we handle non-power-of-two block sizes. */
+
+ struct store *store = dev->store;
+
+ if (store->block_size == 0)
+ /* We don't know the block size, so let the device enforce it. */
+ return store_write (dev->store, offs, buf, len, amount);
+
+ if ((offs & (store->block_size - 1)) != 0
+ || (len & (store->block_size - 1)) != 0)
+ /* Not whole blocks. No can do. */
+ return EINVAL; /* EIO? */
+
+ /* Do a direct write to the store. */
+ return store_write (dev->store, offs << store->log2_block_size,
+ buf, len, amount);
+ }
+
+ return dev_rw (dev, offs, len, amount, buf_write, raw_write);
+}
+
+/* Read up to WHOLE_AMOUNT bytes from DEV, returned in BUF and LEN in the
+ with the usual mach memory result semantics. If successful, 0 is
+ returned, otherwise an error code is returned. */
+error_t
+dev_read (struct dev *dev, off_t offs, size_t whole_amount,
+ void **buf, size_t *len)
+{
+ error_t err;
+ int allocated_buf = 0;
+ error_t ensure_buf ()
+ {
+ if (*len < whole_amount)
+ {
+ void *new = mmap (0, whole_amount, PROT_READ|PROT_WRITE,
+ MAP_ANON, 0, 0);
+ if (new == (void *) -1)
+ return errno;
+ *buf = new;
+ allocated_buf = 1;
+ }
+ return 0;
+ }
+ error_t buf_read (size_t buf_offs, size_t io_offs, size_t len)
+ {
+ error_t err = ensure_buf ();
+ if (! err)
+ bcopy (dev->buf + buf_offs, *buf + io_offs, len);
+ return err;
+ }
+ error_t raw_read (off_t offs, size_t io_offs, size_t len, size_t *amount)
+ {
+ struct store *store = dev->store;
+ off_t addr = offs >> store->log2_block_size;
+ if (len == whole_amount)
+ /* Just return whatever the device does. */
+ return store_read (store, addr, len, buf, amount);
+ else
+ /* This read is returning less than the whole request, so we allocate
+ a buffer big enough to hold everything, in case we have to
+ coalesce multiple reads into a single return buffer. */
+ {
+ error_t err = ensure_buf ();
+ if (! err)
+ {
+ void *_req_buf = *buf + io_offs, *req_buf = _req_buf;
+ size_t req_len = len;
+ err = store_read (store, addr, len, &req_buf, &req_len);
+ if (! err)
+ {
+ if (req_buf != _req_buf)
+ /* Copy from wherever the read put it. */
+ {
+ bcopy (req_buf, _req_buf, req_len);
+ munmap (req_buf, req_len);
+ }
+ *amount = req_len;
+ }
+ }
+ return err;
+ }
+ }
+
+ if (dev->inhibit_cache)
+ {
+ /* Under --no-cache, we permit only whole-block reads.
+ Note that in this case we handle non-power-of-two block sizes.
+ We could, that is, but libstore won't have it (see libstore/make.c).
+ If the device does not report a block size, we let any attempt
+ through on the assumption the device will enforce its own limits. */
+
+ struct store *store = dev->store;
+
+ if (store->block_size == 0)
+ /* We don't know the block size, so let the device enforce it. */
+ return store_read (dev->store, offs, whole_amount, buf, len);
+
+ if ((offs & (store->block_size - 1)) != 0
+ || (whole_amount & (store->block_size - 1)) != 0)
+ /* Not whole blocks. No can do. */
+ return EINVAL;
+
+ /* Do a direct read from the store. */
+ return store_read (dev->store, offs << store->log2_block_size,
+ whole_amount, buf, len);
+ }
+
+ err = dev_rw (dev, offs, whole_amount, len, buf_read, raw_read);
+ if (err && allocated_buf)
+ munmap (*buf, whole_amount);
+
+ return err;
+}
diff --git a/storeio/dev.h b/storeio/dev.h
new file mode 100644
index 00000000..d6e50102
--- /dev/null
+++ b/storeio/dev.h
@@ -0,0 +1,107 @@
+/* store `device' I/O
+
+ Copyright (C) 1995, 1996, 1997, 1999 Free Software Foundation, Inc.
+
+ Written by Miles Bader <miles@gnu.ai.mit.edu>
+
+ This program is free software; you can redistribute it and/or
+ modify it under the terms of the GNU General Public License as
+ published by the Free Software Foundation; either version 2, or (at
+ your option) any later version.
+
+ This program is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
+
+#ifndef __DEV_H__
+#define __DEV_H__
+
+#include <mach.h>
+#include <device/device.h>
+#include <rwlock.h>
+#include <hurd/store.h>
+
+/* Information about a kernel device. */
+struct dev
+{
+ /* The device to which we're doing io. */
+ struct store *store;
+
+ /* The current owner of the open device. For terminals, this affects
+ controlling terminal behavior (see term_become_ctty). For all objects
+ this affects old-style async IO. Negative values represent pgrps. This
+ has nothing to do with the owner of a file (as returned by io_stat, and
+ as used for various permission checks by filesystems). An owner of 0
+ indicates that there is no owner. */
+ pid_t owner;
+
+ int enforced; /* Nonzero iff --enforced flag was given. */
+
+ /* Nonzero iff the --no-cache flag was given.
+ If this is set, the remaining members are not used at all
+ and don't need to be initialized or cleaned up. */
+ int inhibit_cache;
+
+
+ /* A bitmask corresponding to the part of an offset that lies within a
+ device block. */
+ unsigned block_mask;
+
+ /* Lock to arbitrate I/O through this device. Block I/O can occur in
+ parallel, and requires only a reader-lock.
+ Non-block I/O is always serialized, and requires a writer-lock. */
+ struct rwlock io_lock;
+
+ /* Non-block I/O is buffered through BUF. BUF_OFFS is the device offset
+ corresponding to the start of BUF (which holds one block); if it is -1,
+ then BUF is inactive. */
+ void *buf;
+ off_t buf_offs;
+ int buf_dirty;
+
+ struct pager *pager;
+ struct mutex pager_lock;
+};
+
+/* Returns a pointer to a new device structure in DEV for the device
+ NAME, with the given FLAGS. If BLOCK_SIZE is non-zero, it should be the
+ desired block size, and must be a multiple of the device block size.
+ If an error occurs, the error code is returned, otherwise 0. */
+error_t dev_open (struct store_parsed *name, int flags, int inhibit_cache,
+ struct dev **dev);
+
+/* Free DEV and any resources it consumes. */
+void dev_close (struct dev *dev);
+
+/* Returns in MEMOBJ the port for a memory object backed by the storage on
+ DEV. Returns 0 or the error code if an error occurred. */
+error_t dev_get_memory_object(struct dev *dev, vm_prot_t prot,
+ memory_object_t *memobj);
+
+/* Try to stop all paging activity on DEV, returning true if we were
+ successful. If NOSYNC is true, then we won't write back any (kernel)
+ cached pages to the device. */
+int dev_stop_paging (struct dev *dev, int nosync);
+
+/* Try and write out any pending writes to DEV. If WAIT is true, will wait
+ for any paging activity to cease. */
+error_t dev_sync (struct dev *dev, int wait);
+
+/* Write LEN bytes from BUF to DEV, returning the amount actually written in
+ AMOUNT. If successful, 0 is returned, otherwise an error code is
+ returned. */
+error_t dev_write (struct dev *dev, off_t offs, void *buf, size_t len,
+ size_t *amount);
+
+/* Read up to AMOUNT bytes from DEV, returned in BUF and LEN in the with the
+ usual mach memory result semantics. If successful, 0 is returned,
+ otherwise an error code is returned. */
+error_t dev_read (struct dev *dev, off_t offs, size_t amount,
+ void **buf, size_t *len);
+
+#endif /* !__DEV_H__ */
diff --git a/storeio/io.c b/storeio/io.c
new file mode 100644
index 00000000..cffe917a
--- /dev/null
+++ b/storeio/io.c
@@ -0,0 +1,366 @@
+/* The hurd io interface to storeio
+
+ Copyright (C) 1995, 1996, 1997, 1999 Free Software Foundation, Inc.
+
+ Written by Miles Bader <miles@gnu.ai.mit.edu>
+
+ This program is free software; you can redistribute it and/or
+ modify it under the terms of the GNU General Public License as
+ published by the Free Software Foundation; either version 2, or (at
+ your option) any later version.
+
+ This program is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
+
+#include <hurd/trivfs.h>
+#include <stdio.h>
+#include <fcntl.h>
+
+#include "open.h"
+#include "dev.h"
+
+/* Return objects mapping the data underlying this memory object. If
+ the object can be read then memobjrd will be provided; if the
+ object can be written then memobjwr will be provided. For objects
+ where read data and write data are the same, these objects will be
+ equal, otherwise they will be disjoint. Servers are permitted to
+ implement io_map but not io_map_cntl. Some objects do not provide
+ mapping; they will set none of the ports and return an error. Such
+ objects can still be accessed by io_read and io_write. */
+error_t
+trivfs_S_io_map (struct trivfs_protid *cred,
+ mach_port_t reply, mach_msg_type_name_t reply_type,
+ memory_object_t *rd_obj, mach_msg_type_name_t *rd_type,
+ memory_object_t *wr_obj, mach_msg_type_name_t *wr_type)
+{
+ if (! cred)
+ return EOPNOTSUPP;
+ else if (! (cred->po->openmodes & (O_READ|O_WRITE)))
+ return EBADF;
+ else
+ {
+ mach_port_t memobj;
+ int flags = cred->po->openmodes;
+ vm_prot_t prot =
+ ((flags & O_READ) ? VM_PROT_READ : 0)
+ | ((flags & O_WRITE) ? VM_PROT_WRITE : 0);
+ struct open *open = (struct open *)cred->po->hook;
+ error_t err = dev_get_memory_object (open->dev, prot, &memobj);
+
+ if (!err)
+ {
+ if (flags & O_READ)
+ *rd_obj = memobj;
+ else
+ *rd_obj = MACH_PORT_NULL;
+ if (flags & O_WRITE)
+ *wr_obj = memobj;
+ else
+ *wr_obj = MACH_PORT_NULL;
+
+ if ((flags & (O_READ|O_WRITE)) == (O_READ|O_WRITE)
+ && memobj != MACH_PORT_NULL)
+ mach_port_mod_refs (mach_task_self (), memobj,
+ MACH_PORT_RIGHT_SEND, 1);
+ }
+
+ *rd_type = *wr_type = MACH_MSG_TYPE_MOVE_SEND;
+
+ return err;
+ }
+}
+
+/* Read data from an IO object. If offset if -1, read from the object
+ maintained file pointer. If the object is not seekable, offset is
+ ignored. The amount desired to be read is in AMOUNT. */
+error_t
+trivfs_S_io_read (struct trivfs_protid *cred,
+ mach_port_t reply, mach_msg_type_name_t reply_type,
+ vm_address_t *data, mach_msg_type_number_t *data_len,
+ off_t offs, mach_msg_type_number_t amount)
+{
+ if (! cred)
+ return EOPNOTSUPP;
+ else if (! (cred->po->openmodes & O_READ))
+ return EBADF;
+ else
+ return open_read ((struct open *)cred->po->hook,
+ offs, amount, (void **)data, data_len);
+}
+
+/* Tell how much data can be read from the object without blocking for
+ a "long time" (this should be the same meaning of "long time" used
+ by the nonblocking flag. */
+error_t
+trivfs_S_io_readable (struct trivfs_protid *cred,
+ mach_port_t reply, mach_msg_type_name_t reply_type,
+ mach_msg_type_number_t *amount)
+{
+ if (! cred)
+ return EOPNOTSUPP;
+ else if (! (cred->po->openmodes & O_READ))
+ return EBADF;
+ else
+ {
+ struct open *open = (struct open *)cred->po->hook;
+ *amount = open->dev->store->size - open->offs;
+ return 0;
+ }
+}
+
+/* Write data to an IO object. If offset is -1, write at the object
+ maintained file pointer. If the object is not seekable, offset is
+ ignored. The amount successfully written is returned in amount. A
+ given user should not have more than one outstanding io_write on an
+ object at a time; servers implement congestion control by delaying
+ responses to io_write. Servers may drop data (returning ENOBUFS)
+ if they recevie more than one write when not prepared for it. */
+error_t
+trivfs_S_io_write (struct trivfs_protid *cred,
+ mach_port_t reply, mach_msg_type_name_t reply_type,
+ vm_address_t data, mach_msg_type_number_t data_len,
+ off_t offs, mach_msg_type_number_t *amount)
+{
+ if (! cred)
+ return EOPNOTSUPP;
+ else if (! (cred->po->openmodes & O_WRITE))
+ return EBADF;
+ else
+ return open_write ((struct open *)cred->po->hook,
+ offs, (void *)data, data_len, amount);
+}
+
+/* Change current read/write offset */
+error_t
+trivfs_S_io_seek (struct trivfs_protid *cred,
+ mach_port_t reply, mach_msg_type_name_t reply_type,
+ off_t offs, int whence, off_t *new_offs)
+{
+ if (! cred)
+ return EOPNOTSUPP;
+ else
+ return open_seek ((struct open *)cred->po->hook, offs, whence, new_offs);
+}
+
+/* SELECT_TYPE is the bitwise OR of SELECT_READ, SELECT_WRITE, and SELECT_URG.
+ Block until one of the indicated types of i/o can be done "quickly", and
+ return the types that are then available. */
+error_t
+trivfs_S_io_select (struct trivfs_protid *cred,
+ mach_port_t reply, mach_msg_type_name_t reply_type,
+ int *type)
+{
+ if (! cred)
+ return EOPNOTSUPP;
+ *type &= ~SELECT_URG;
+ return 0;
+}
+
+/* Truncate file. */
+error_t
+trivfs_S_file_set_size (struct trivfs_protid *cred,
+ mach_port_t reply, mach_msg_type_name_t reply_type,
+ off_t size)
+{
+ if (! cred)
+ return EOPNOTSUPP;
+ else
+ return 0;
+}
+
+/* These four routines modify the O_APPEND, O_ASYNC, O_FSYNC, and
+ O_NONBLOCK bits for the IO object. In addition, io_get_openmodes
+ will tell you which of O_READ, O_WRITE, and O_EXEC the object can
+ be used for. The O_ASYNC bit affects icky async I/O; good async
+ I/O is done through io_async which is orthogonal to these calls. */
+
+error_t
+trivfs_S_io_get_openmodes (struct trivfs_protid *cred,
+ mach_port_t reply, mach_msg_type_name_t reply_type,
+ int *bits)
+{
+ if (! cred)
+ return EOPNOTSUPP;
+ else
+ {
+ *bits = cred->po->openmodes;
+ return 0;
+ }
+}
+
+error_t
+trivfs_S_io_set_all_openmodes (struct trivfs_protid *cred,
+ mach_port_t reply,
+ mach_msg_type_name_t reply_type,
+ int mode)
+{
+ if (! cred)
+ return EOPNOTSUPP;
+ else
+ return 0;
+}
+
+error_t
+trivfs_S_io_set_some_openmodes (struct trivfs_protid *cred,
+ mach_port_t reply,
+ mach_msg_type_name_t reply_type,
+ int bits)
+{
+ if (! cred)
+ return EOPNOTSUPP;
+ else
+ return 0;
+}
+
+error_t
+trivfs_S_io_clear_some_openmodes (struct trivfs_protid *cred,
+ mach_port_t reply,
+ mach_msg_type_name_t reply_type,
+ int bits)
+{
+ if (! cred)
+ return EOPNOTSUPP;
+ else
+ return 0;
+}
+
+/* Get/set the owner of the IO object. For terminals, this affects
+ controlling terminal behavior (see term_become_ctty). For all
+ objects this affects old-style async IO. Negative values represent
+ pgrps. This has nothing to do with the owner of a file (as
+ returned by io_stat, and as used for various permission checks by
+ filesystems). An owner of 0 indicates that there is no owner. */
+error_t
+trivfs_S_io_get_owner (struct trivfs_protid *cred,
+ mach_port_t reply,
+ mach_msg_type_name_t reply_type,
+ pid_t *owner)
+{
+ if (! cred)
+ return EOPNOTSUPP;
+ else
+ {
+ struct open *open = (struct open *)cred->po->hook;
+ *owner = open->dev->owner;
+ return 0;
+ }
+}
+
+error_t
+trivfs_S_io_mod_owner (struct trivfs_protid *cred,
+ mach_port_t reply, mach_msg_type_name_t reply_type,
+ pid_t owner)
+{
+ if (! cred)
+ return EOPNOTSUPP;
+ else
+ {
+ struct open *open = (struct open *)cred->po->hook;
+ open->dev->owner = owner;
+ return 0;
+ }
+}
+
+/* File syncing operations; these all do the same thing, sync the underlying
+ device. */
+
+error_t
+trivfs_S_file_sync (struct trivfs_protid *cred,
+ mach_port_t reply, mach_msg_type_name_t reply_type,
+ int wait)
+{
+ if (cred)
+ return dev_sync (((struct open *)cred->po->hook)->dev, wait);
+ else
+ return EOPNOTSUPP;
+}
+
+error_t
+trivfs_S_file_syncfs (struct trivfs_protid *cred,
+ mach_port_t reply, mach_msg_type_name_t reply_type,
+ int wait, int dochildren)
+{
+ if (cred)
+ return dev_sync (((struct open *)cred->po->hook)->dev, wait);
+ else
+ return EOPNOTSUPP;
+}
+
+error_t
+trivfs_S_file_get_storage_info (struct trivfs_protid *cred,
+ mach_port_t reply,
+ mach_msg_type_name_t reply_type,
+ mach_port_t **ports,
+ mach_msg_type_name_t *ports_type,
+ mach_msg_type_number_t *num_ports,
+ int **ints, mach_msg_type_number_t *num_ints,
+ off_t **offsets,
+ mach_msg_type_number_t *num_offsets,
+ char **data, mach_msg_type_number_t *data_len)
+{
+ *ports_type = MACH_MSG_TYPE_COPY_SEND;
+
+ if (! cred || ! cred->po->hook)
+ return EOPNOTSUPP;
+ else
+ {
+ error_t err;
+ struct dev *dev = ((struct open *)cred->po->hook)->dev;
+ struct store *store = dev->store;
+
+ if (dev->enforced && !(store->flags & STORE_ENFORCED))
+ {
+ /* The --enforced switch tells us not to let anyone
+ get at the device, no matter how trustable they are. */
+ size_t name_len = (store->name ? strlen (store->name) + 1 : 0);
+ int i;
+ *num_ports = 0;
+ i = 0;
+ (*ints)[i++] = STORAGE_OTHER;
+ (*ints)[i++] = store->flags;
+ (*ints)[i++] = store->block_size;
+ (*ints)[i++] = 1; /* num_runs */
+ (*ints)[i++] = name_len;
+ (*ints)[i++] = 0; /* misc_len */
+ *num_ints = i;
+ i = 0;
+ (*offsets)[i++] = 0;
+ (*offsets)[i++] = store->size;
+ *num_offsets = i;
+ if (store->name)
+ memcpy (*data, store->name, name_len);
+ *data_len = name_len;
+ return 0;
+ }
+
+ if (!cred->isroot
+ && !store_is_securely_returnable (store, cred->po->openmodes))
+ {
+ struct store *clone;
+ err = store_clone (store, &clone);
+ if (! err)
+ {
+ err = store_set_flags (clone, STORE_INACTIVE);
+ if (err == EINVAL)
+ err = EACCES;
+ else
+ err = store_return (clone,
+ ports, num_ports, ints, num_ints,
+ offsets, num_offsets, data, data_len);
+ store_free (clone);
+ }
+ }
+ else
+ err = store_return (store,
+ ports, num_ports, ints, num_ints,
+ offsets, num_offsets, data, data_len);
+
+ return err;
+ }
+}
diff --git a/storeio/open.c b/storeio/open.c
new file mode 100644
index 00000000..b1013a94
--- /dev/null
+++ b/storeio/open.c
@@ -0,0 +1,124 @@
+/* Per-open information for storeio
+
+ Copyright (C) 1995, 1996 Free Software Foundation, Inc.
+
+ Written by Miles Bader <miles@gnu.ai.mit.edu>
+
+ This program is free software; you can redistribute it and/or
+ modify it under the terms of the GNU General Public License as
+ published by the Free Software Foundation; either version 2, or (at
+ your option) any later version.
+
+ This program is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
+
+#include <hurd.h>
+#include <stdio.h>
+
+#include "open.h"
+#include "dev.h"
+
+/* Returns a new per-open structure for the device DEV in OPEN. If an error
+ occurs, the error-code is returned, otherwise 0. */
+error_t
+open_create (struct dev *dev, struct open **open)
+{
+ *open = malloc (sizeof (struct open));
+ if (*open == NULL)
+ return ENOMEM;
+
+ (*open)->dev = dev;
+ (*open)->offs = 0;
+ mutex_init (&(*open)->lock);
+
+ return 0;
+}
+
+/* Free OPEN and any resources it holds. */
+void
+open_free (struct open *open)
+{
+ free (open);
+}
+
+/* Writes up to LEN bytes from BUF to OPEN's device at device offset OFFS
+ (which may be ignored if the device doesn't support random access),
+ and returns the number of bytes written in AMOUNT. If no error occurs,
+ zero is returned, otherwise the error code is returned. */
+error_t
+open_write (struct open *open, off_t offs, void *buf, size_t len,
+ vm_size_t *amount)
+{
+ error_t err;
+ if (offs < 0)
+ /* Use OPEN's offset. */
+ {
+ mutex_lock (&open->lock);
+ err = dev_write (open->dev, open->offs, buf, len, amount);
+ if (! err)
+ open->offs += *amount;
+ mutex_unlock (&open->lock);
+ }
+ else
+ err = dev_write (open->dev, offs, buf, len, amount);
+ return err;
+}
+
+/* Reads up to AMOUNT bytes from the device into BUF and LEN using the
+ standard mach out-array convention. If no error occurs, zero is returned,
+ otherwise the error code is returned. */
+error_t
+open_read (struct open *open, off_t offs, size_t amount,
+ void **buf, vm_size_t *len)
+{
+ error_t err;
+ if (offs < 0)
+ /* Use OPEN's offset. */
+ {
+ mutex_lock (&open->lock);
+ err = dev_read (open->dev, open->offs, amount, buf, len);
+ if (! err)
+ open->offs += *len;
+ mutex_unlock (&open->lock);
+ }
+ else
+ err = dev_read (open->dev, offs, amount, buf, len);
+ return err;
+}
+
+/* Set OPEN's location to OFFS, interpreted according to WHENCE as by seek.
+ The new absolute location is returned in NEW_OFFS (and may not be the same
+ as OFFS). If no error occurs, zero is returned, otherwise the error code
+ is returned. */
+error_t
+open_seek (struct open *open, off_t offs, int whence, off_t *new_offs)
+{
+ error_t err = 0;
+
+ mutex_lock (&open->lock);
+
+ switch (whence)
+ {
+ case SEEK_SET:
+ open->offs = offs; break;
+ case SEEK_CUR:
+ open->offs += offs; break;
+ case SEEK_END:
+ open->offs = open->dev->store->size - offs; break;
+ default:
+ err = EINVAL;
+ }
+
+ if (! err)
+ *new_offs = open->offs;
+
+ mutex_unlock (&open->lock);
+
+ return err;
+}
diff --git a/storeio/open.h b/storeio/open.h
new file mode 100644
index 00000000..cbac2a37
--- /dev/null
+++ b/storeio/open.h
@@ -0,0 +1,68 @@
+/* Per-open information for storeio
+
+ Copyright (C) 1995, 1996 Free Software Foundation, Inc.
+
+ Written by Miles Bader <miles@gnu.ai.mit.edu>
+
+ This program is free software; you can redistribute it and/or
+ modify it under the terms of the GNU General Public License as
+ published by the Free Software Foundation; either version 2, or (at
+ your option) any later version.
+
+ This program is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
+
+#ifndef __OPEN_H__
+#define __OPEN_H__
+
+#include "dev.h"
+
+/* ---------------------------------------------------------------- */
+
+/* A structure describing a particular i/o stream on this device. */
+struct open
+{
+ /* The device that this an open on. */
+ struct dev *dev;
+
+ /* The per-open offset used for I/O operations that don't specify an
+ explicit offset. */
+ off_t offs;
+
+ /* A lock used to control write access to OFFS. */
+ struct mutex lock;
+};
+
+/* Returns a new per-open structure for the device DEV in OPEN. If an error
+ occurs, the error-code is returned, otherwise 0. */
+error_t open_create (struct dev *dev, struct open **open);
+
+/* Free OPEN and any resources it holds. */
+void open_free (struct open *open);
+
+/* Writes up to LEN bytes from BUF to OPEN's device at device offset OFFS
+ (which may be ignored if the device doesn't support random access),
+ and returns the number of bytes written in AMOUNT. If no error occurs,
+ zero is returned, otherwise the error code is returned. */
+error_t open_write (struct open *open, off_t offs, void *buf, size_t len,
+ size_t *amount);
+
+/* Reads up to AMOUNT bytes from the device into BUF and BUF_LEN using the
+ standard mach out-array convention. If no error occurs, zero is returned,
+ otherwise the error code is returned. */
+error_t open_read (struct open *open, off_t offs, size_t amount,
+ void **buf, size_t *buf_len);
+
+/* Set OPEN's location to OFFS, interpreted according to WHENCE as by seek.
+ The new absolute location is returned in NEW_OFFS (and may not be the same
+ as OFFS). If no error occurs, zero is returned, otherwise the error code
+ is returned. */
+error_t open_seek (struct open *open, off_t offs, int whence, off_t *new_offs);
+
+#endif /* !__OPEN_H__ */
diff --git a/storeio/pager.c b/storeio/pager.c
new file mode 100644
index 00000000..75456b83
--- /dev/null
+++ b/storeio/pager.c
@@ -0,0 +1,264 @@
+/* Paging interface for storeio devices
+
+ Copyright (C) 1995, 96, 97, 99 Free Software Foundation, Inc.
+
+ Written by Miles Bader <miles@gnu.ai.mit.edu>
+
+ This program is free software; you can redistribute it and/or
+ modify it under the terms of the GNU General Public License as
+ published by the Free Software Foundation; either version 2, or (at
+ your option) any later version.
+
+ This program is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
+
+#include <hurd.h>
+#include <hurd/pager.h>
+#include <assert.h>
+#include <strings.h>
+#include <unistd.h>
+#include <sys/mman.h>
+
+#include "dev.h"
+
+/* ---------------------------------------------------------------- */
+/* Pager library callbacks; see <hurd/pager.h> for more info. */
+
+/* For pager PAGER, read one page from offset PAGE. Set *BUF to be the
+ address of the page, and set *WRITE_LOCK if the page must be provided
+ read-only. The only permissable error returns are EIO, EDQUOT, and
+ ENOSPC. */
+error_t
+pager_read_page (struct user_pager_info *upi,
+ vm_offset_t page, vm_address_t *buf, int *writelock)
+{
+ error_t err;
+ int read = 0; /* bytes actually read */
+ int want = vm_page_size; /* bytes we want to read */
+ struct dev *dev = (struct dev *)upi;
+ struct store *store = dev->store;
+
+ if (page + want > store->size)
+ /* Read a partial page if necessary to avoid reading off the end. */
+ want = store->size - page;
+
+ err = dev_read (dev, page, want, (void **)buf, &read);
+
+ if (!err && want < vm_page_size)
+ /* Zero anything we didn't read. Allocation only happens in page-size
+ multiples, so we know we can write there. */
+ bzero ((char *)*buf + want, vm_page_size - want);
+
+ *writelock = (store->flags & STORE_READONLY);
+
+ if (err || read < want)
+ return EIO;
+ else
+ return 0;
+}
+
+/* For pager PAGER, synchronously write one page from BUF to offset PAGE. In
+ addition, vm_deallocate (or equivalent) BUF. The only permissable error
+ returns are EIO, EDQUOT, and ENOSPC. */
+error_t
+pager_write_page (struct user_pager_info *upi,
+ vm_offset_t page, vm_address_t buf)
+{
+ struct dev *dev = (struct dev *)upi;
+ struct store *store = dev->store;
+
+ if (store->flags & STORE_READONLY)
+ return EROFS;
+ else
+ {
+ error_t err;
+ int written;
+ int want = vm_page_size;
+
+ if (page + want > store->size)
+ /* Write a partial page if necessary to avoid reading off the end. */
+ want = store->size - page;
+
+ err = dev_write (dev, page, (char *)buf, want, &written);
+
+ munmap ((caddr_t) buf, vm_page_size);
+
+ if (err || written < want)
+ return EIO;
+ else
+ return 0;
+ }
+}
+
+/* A page should be made writable. */
+error_t
+pager_unlock_page (struct user_pager_info *upi, vm_offset_t address)
+{
+ struct dev *dev = (struct dev *)upi;
+
+ if (dev->store->flags & STORE_READONLY)
+ return EROFS;
+ else
+ return 0;
+}
+
+/* The user must define this function. It should report back (in
+ *OFFSET and *SIZE the minimum valid address the pager will accept
+ and the size of the object. */
+error_t
+pager_report_extent (struct user_pager_info *upi,
+ vm_address_t *offset, vm_size_t *size)
+{
+ *offset = 0;
+ *size = ((struct dev *)upi)->store->size;
+ return 0;
+}
+
+/* This is called when a pager is being deallocated after all extant send
+ rights have been destroyed. */
+void
+pager_clear_user_data (struct user_pager_info *upi)
+{
+ struct dev *dev = (struct dev *)upi;
+ mutex_lock (&dev->pager_lock);
+ dev->pager = 0;
+ mutex_unlock (&dev->pager_lock);
+}
+
+static struct port_bucket *pager_port_bucket = 0;
+
+/* A top-level function for the paging thread that just services paging
+ requests. */
+static void
+service_paging_requests (any_t arg)
+{
+ for (;;)
+ ports_manage_port_operations_multithread (pager_port_bucket,
+ pager_demuxer,
+ 1000 * 30, 1000 * 60 * 5, 0);
+}
+
+/* Initialize paging for this device. */
+static void
+init_dev_paging ()
+{
+ if (! pager_port_bucket)
+ {
+ static struct mutex pager_global_lock = MUTEX_INITIALIZER;
+
+ mutex_lock (&pager_global_lock);
+ if (pager_port_bucket == NULL)
+ {
+ pager_port_bucket = ports_create_bucket ();
+
+ /* Make a thread to service paging requests. */
+ cthread_detach (cthread_fork ((cthread_fn_t)service_paging_requests,
+ (any_t)0));
+ }
+ mutex_unlock (&pager_global_lock);
+ }
+}
+
+void
+pager_dropweak (struct user_pager_info *upi __attribute__ ((unused)))
+{
+}
+
+/* Try to stop all paging activity on DEV, returning true if we were
+ successful. If NOSYNC is true, then we won't write back any (kernel)
+ cached pages to the device. */
+int
+dev_stop_paging (struct dev *dev, int nosync)
+{
+ size_t num_pagers = (pager_port_bucket ?
+ ports_count_bucket (pager_port_bucket) : 0);
+
+ if (num_pagers > 0 && !nosync)
+ {
+ error_t block_cache (void *arg)
+ {
+ struct pager *p = arg;
+ pager_change_attributes (p, 0, MEMORY_OBJECT_COPY_DELAY, 1);
+ return 0;
+ }
+ error_t enable_cache (void *arg)
+ {
+ struct pager *p = arg;
+ pager_change_attributes (p, 1, MEMORY_OBJECT_COPY_DELAY, 0);
+ return 0;
+ }
+
+ /* Loop through the pagers and turn off caching one by one,
+ synchronously. That should cause termination of each pager. */
+ ports_bucket_iterate (pager_port_bucket, block_cache);
+
+ /* Give it a second; the kernel doesn't actually shutdown
+ immediately. XXX */
+ sleep (1);
+
+ num_pagers = ports_count_bucket (pager_port_bucket);
+ if (num_pagers > 0)
+ /* Darn, there are actual honest users. Turn caching back on,
+ and return failure. */
+ ports_bucket_iterate (pager_port_bucket, enable_cache);
+ }
+
+ return num_pagers == 0;
+}
+
+/* Returns in MEMOBJ the port for a memory object backed by the storage on
+ DEV. Returns 0 or the error code if an error occurred. */
+error_t
+dev_get_memory_object (struct dev *dev, vm_prot_t prot, memory_object_t *memobj)
+{
+ error_t err = store_map (dev->store, prot, memobj);
+
+ if (err == EOPNOTSUPP && !dev->inhibit_cache)
+ {
+ int created = 0;
+
+ init_dev_paging ();
+
+ mutex_lock (&dev->pager_lock);
+
+ if (dev->pager == NULL)
+ {
+ dev->pager =
+ pager_create ((struct user_pager_info *)dev, pager_port_bucket,
+ 1, MEMORY_OBJECT_COPY_DELAY);
+ if (dev->pager == NULL)
+ {
+ mutex_unlock (&dev->pager_lock);
+ return errno;
+ }
+ created = 1;
+ }
+
+ *memobj = pager_get_port (dev->pager);
+
+ if (*memobj == MACH_PORT_NULL)
+ /* Pager is currently being destroyed, try again. */
+ {
+ dev->pager = 0;
+ mutex_unlock (&dev->pager_lock);
+ return dev_get_memory_object (dev, prot, memobj);
+ }
+ else
+ err =
+ mach_port_insert_right (mach_task_self (),
+ *memobj, *memobj, MACH_MSG_TYPE_MAKE_SEND);
+
+ if (created)
+ ports_port_deref (dev->pager);
+
+ mutex_unlock (&dev->pager_lock);
+ }
+
+ return err;
+}
diff --git a/storeio/storeio.c b/storeio/storeio.c
new file mode 100644
index 00000000..8816f4cd
--- /dev/null
+++ b/storeio/storeio.c
@@ -0,0 +1,363 @@
+/* A translator for doing I/O to stores
+
+ Copyright (C) 1995, 96, 97, 98, 99 Free Software Foundation, Inc.
+
+ Written by Miles Bader <miles@gnu.ai.mit.edu>
+
+ This program is free software; you can redistribute it and/or
+ modify it under the terms of the GNU General Public License as
+ published by the Free Software Foundation; either version 2, or (at
+ your option) any later version.
+
+ This program is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
+
+#include <stdio.h>
+#include <error.h>
+#include <assert.h>
+#include <fcntl.h>
+#include <argp.h>
+#include <argz.h>
+
+#include <hurd.h>
+#include <hurd/ports.h>
+#include <hurd/trivfs.h>
+#include <version.h>
+
+#include "open.h"
+#include "dev.h"
+
+static struct argp_option options[] =
+{
+ {"readonly", 'r', 0, 0,"Disallow writing"},
+ {"writable", 'w', 0, 0,"Allow writing"},
+ {"no-cache", 'c', 0, 0,"Never cache data--user io does direct device io"},
+ {"enforced", 'e', 0, 0,"Never reveal underlying devices, even to root"},
+ {"rdev", 'n', "ID", 0,
+ "The stat rdev number for this node; may be either a"
+ " single integer, or of the form MAJOR,MINOR"},
+ {0}
+};
+static const char doc[] = "Translator for devices and other stores";
+
+const char *argp_program_version = STANDARD_HURD_VERSION (storeio);
+
+/* The open store. */
+static struct dev *device = NULL;
+/* And a lock to arbitrate changes to it. */
+static struct mutex device_lock;
+
+/* Desired store parameters specified by the user. */
+struct store_parsed *store_name;
+static int readonly;
+
+/* Nonzero if user gave --no-cache flag. */
+static int inhibit_cache;
+
+/* Nonzero if user gave --enforced flag. */
+static int enforce_store;
+
+/* A unixy device number to return when the device is stat'd. */
+static int rdev;
+
+/* Parse a single option. */
+static error_t
+parse_opt (int key, char *arg, struct argp_state *state)
+{
+ switch (key)
+ {
+ case 'r': readonly = 1; break;
+ case 'w': readonly = 0; break;
+
+ case 'c': inhibit_cache = 1; break;
+ case 'e': enforce_store = 1; break;
+
+ case 'n':
+ {
+ char *start = arg, *end;
+
+ rdev = strtoul (start, &end, 0);
+ if (*end == ',')
+ /* MAJOR,MINOR form */
+ {
+ start = end;
+ rdev = (rdev << 8) + strtoul (start, &end, 0);
+ }
+
+ if (end == start || *end != '\0')
+ {
+ argp_error (state, "%s: Invalid argument to --rdev", arg);
+ return EINVAL;
+ }
+ }
+ break;
+
+ case ARGP_KEY_INIT:
+ state->child_inputs[0] = state->input; break;
+
+ default:
+ return ARGP_ERR_UNKNOWN;
+ }
+ return 0;
+}
+
+static const struct argp_child argp_kids[] = { { &store_argp }, {0} };
+static const struct argp argp = { options, parse_opt, 0, doc, argp_kids };
+
+int
+main (int argc, char *argv[])
+{
+ error_t err;
+ mach_port_t bootstrap;
+ struct trivfs_control *fsys;
+ struct store_argp_params store_params = { default_type: "device" };
+
+ argp_parse (&argp, argc, argv, 0, 0, &store_params);
+ store_name = store_params.result;
+
+ if (readonly)
+ /* Catch illegal writes at the point of open. */
+ trivfs_allow_open &= ~O_WRITE;
+
+ task_get_bootstrap_port (mach_task_self (), &bootstrap);
+ if (bootstrap == MACH_PORT_NULL)
+ error (2, 0, "Must be started as a translator");
+
+ /* Reply to our parent */
+ err = trivfs_startup (bootstrap, 0, 0, 0, 0, 0, &fsys);
+ if (err)
+ error (3, err, "trivfs_startup");
+
+ /* Open the device only when necessary. */
+ device = NULL;
+ mutex_init (&device_lock);
+
+ /* Launch. */
+ ports_manage_port_operations_multithread (fsys->pi.bucket, trivfs_demuxer,
+ 30*1000, 5*60*1000, 0);
+
+ return 0;
+}
+
+error_t
+trivfs_append_args (struct trivfs_control *trivfs_control,
+ char **argz, size_t *argz_len)
+{
+ error_t err = 0;
+
+ if (rdev)
+ {
+ char buf[40];
+ snprintf (buf, sizeof buf, "--rdev=%d,%d", (rdev >> 8), rdev & 0xFF);
+ err = argz_add (argz, argz_len, buf);
+ }
+
+ if (!err && inhibit_cache)
+ err = argz_add (argz, argz_len, "--no-cache");
+
+ if (!err && enforce_store)
+ err = argz_add (argz, argz_len, "--enforced");
+
+ if (! err)
+ err = argz_add (argz, argz_len, readonly ? "--readonly" : "--writable");
+
+ if (! err)
+ err = store_parsed_append_args (store_name, argz, argz_len);
+
+ return err;
+}
+
+/* Called whenever someone tries to open our node (even for a stat). We
+ delay opening the kernel device until this point, as we can usefully
+ return errors from here. */
+static error_t
+check_open_hook (struct trivfs_control *trivfs_control,
+ struct iouser *user,
+ int flags)
+{
+ error_t err = 0;
+
+ if (!err && readonly && (flags & O_WRITE))
+ return EROFS;
+
+ mutex_lock (&device_lock);
+ if (device == NULL)
+ /* Try and open the device. */
+ {
+ err = dev_open (store_name, readonly ? STORE_READONLY : 0, inhibit_cache,
+ &device);
+ if (err)
+ device = NULL;
+ else
+ device->enforced = enforce_store;
+ if (err && (flags & (O_READ|O_WRITE)) == 0)
+ /* If we're not opening for read or write, then just ignore the
+ error, as this allows stat to word correctly. XXX */
+ err = 0;
+ }
+ mutex_unlock (&device_lock);
+
+ return err;
+}
+
+static error_t
+open_hook (struct trivfs_peropen *peropen)
+{
+ struct dev *dev = device;
+ if (dev)
+ return open_create (dev, (struct open **)&peropen->hook);
+ else
+ return 0;
+}
+
+static void
+close_hook (struct trivfs_peropen *peropen)
+{
+ if (peropen->hook)
+ open_free (peropen->hook);
+}
+
+/* ---------------------------------------------------------------- */
+/* Trivfs hooks */
+
+int trivfs_fstype = FSTYPE_DEV;
+int trivfs_fsid = 0;
+
+int trivfs_support_read = 1;
+int trivfs_support_write = 1;
+int trivfs_support_exec = 0;
+
+int trivfs_allow_open = O_READ | O_WRITE;
+
+void
+trivfs_modify_stat (struct trivfs_protid *cred, struct stat *st)
+{
+ struct open *open = cred->po->hook;
+
+ st->st_mode &= ~S_IFMT;
+
+ if (open)
+ /* An open device. */
+ {
+ struct store *store = open->dev->store;
+ vm_size_t size = store->size;
+
+ if (store->block_size > 1)
+ st->st_blksize = store->block_size;
+
+ st->st_size = size;
+ st->st_blocks = size / 512;
+
+ st->st_mode |= ((inhibit_cache || store->block_size == 1)
+ ? S_IFCHR : S_IFBLK);
+ }
+ else
+ /* Try and do things without an open device... */
+ {
+ st->st_blksize = 0;
+ st->st_size = 0;
+ st->st_blocks = 0;
+
+ st->st_mode |= inhibit_cache ? S_IFCHR : S_IFBLK;
+ }
+
+ st->st_rdev = rdev;
+ if (readonly || (open && (open->dev->store->flags & STORE_READONLY)))
+ st->st_mode &= ~(S_IWUSR | S_IWGRP | S_IWOTH);
+}
+
+error_t
+trivfs_goaway (struct trivfs_control *fsys, int flags)
+{
+ error_t err;
+ int force = (flags & FSYS_GOAWAY_FORCE);
+ int nosync = (flags & FSYS_GOAWAY_NOSYNC);
+ struct port_class *root_port_class = fsys->protid_class;
+
+ mutex_lock (&device_lock);
+
+ if (device == NULL)
+ exit (0);
+
+ /* Wait until all pending rpcs are done. */
+ err = ports_inhibit_class_rpcs (root_port_class);
+ if (err == EINTR || (err && !force))
+ {
+ mutex_unlock (&device_lock);
+ return err;
+ }
+
+ if (force && nosync)
+ /* Exit with extreme prejudice. */
+ exit (0);
+
+ if (!force && ports_count_class (root_port_class) > 0)
+ /* Still users, so don't exit. */
+ goto busy;
+
+ if (! nosync)
+ /* Sync the device here, if necessary, so that closing it won't result in
+ any I/O (which could get hung up trying to use one of our pagers). */
+ dev_sync (device, 1);
+
+ /* devpager_shutdown may sync the pagers as side-effect (if NOSYNC is 0),
+ so we put that first in this test. */
+ if (dev_stop_paging (device, nosync) || force)
+ /* Bye-bye. */
+ {
+ if (! nosync)
+ /* If NOSYNC is true, we don't close DEV, as that could cause data to
+ be written back. */
+ dev_close (device);
+ exit (0);
+ }
+
+ busy:
+ /* Allow normal operations to proceed. */
+ ports_enable_class (root_port_class);
+ ports_resume_class_rpcs (root_port_class);
+ mutex_unlock (&device_lock);
+
+ /* Complain that there are still users. */
+ return EBUSY;
+}
+
+/* If this variable is set, it is called every time an open happens.
+ USER and FLAGS are from the open; CNTL identifies the
+ node being opened. This call need not check permissions on the underlying
+ node. If the open call should block, then return EWOULDBLOCK. Other
+ errors are immediately reflected to the user. If O_NONBLOCK
+ is not set in FLAGS and EWOULDBLOCK is returned, then call
+ trivfs_complete_open when all pending open requests for this
+ file can complete. */
+error_t (*trivfs_check_open_hook)(struct trivfs_control *trivfs_control,
+ struct iouser *user,
+ int flags)
+ = check_open_hook;
+
+/* If this variable is set, it is called every time a new peropen
+ structure is created and initialized. */
+error_t (*trivfs_peropen_create_hook)(struct trivfs_peropen *) = open_hook;
+
+/* If this variable is set, it is called every time a peropen structure
+ is about to be destroyed. */
+void (*trivfs_peropen_destroy_hook) (struct trivfs_peropen *) = close_hook;
+
+/* Sync this filesystem. */
+kern_return_t
+trivfs_S_fsys_syncfs (struct trivfs_control *cntl,
+ mach_port_t reply, mach_msg_type_name_t replytype,
+ int wait, int dochildren)
+{
+ struct dev *dev = device;
+ if (dev)
+ return dev_sync (dev, wait);
+ else
+ return 0;
+}