diff options
Diffstat (limited to 'linux/dev/glue/block.c')
-rw-r--r-- | linux/dev/glue/block.c | 1691 |
1 files changed, 1691 insertions, 0 deletions
diff --git a/linux/dev/glue/block.c b/linux/dev/glue/block.c new file mode 100644 index 00000000..9d6bd6dd --- /dev/null +++ b/linux/dev/glue/block.c @@ -0,0 +1,1691 @@ +/* + * Linux block driver support. + * + * Copyright (C) 1996 The University of Utah and the Computer Systems + * Laboratory at the University of Utah (CSL) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. + * + * Author: Shantanu Goel, University of Utah CSL + */ + +/* + * linux/drivers/block/ll_rw_blk.c + * + * Copyright (C) 1991, 1992 Linus Torvalds + * Copyright (C) 1994, Karl Keyte: Added support for disk statistics + */ + +/* + * linux/fs/block_dev.c + * + * Copyright (C) 1991, 1992 Linus Torvalds + */ + +/* + * linux/fs/buffer.c + * + * Copyright (C) 1991, 1992 Linus Torvalds + */ + +#include <sys/types.h> +#include <machine/spl.h> +#include <mach/mach_types.h> +#include <mach/kern_return.h> +#include <mach/mig_errors.h> +#include <mach/port.h> +#include <mach/vm_param.h> +#include <mach/notify.h> + +#include <ipc/ipc_port.h> +#include <ipc/ipc_space.h> + +#include <vm/vm_map.h> +#include <vm/vm_kern.h> +#include <vm/vm_page.h> + +#include <device/device_types.h> +#include <device/device_port.h> +#include <device/disk_status.h> +#include "device_reply.h" + +#include <linux_emul.h> + +#define MACH_INCLUDE +#include <linux/fs.h> +#include <linux/blk.h> +#include <linux/string.h> +#include <linux/errno.h> +#include <linux/fcntl.h> +#include <linux/major.h> +#include <linux/kdev_t.h> +#include <linux/delay.h> +#include <linux/malloc.h> +#include <linux/hdreg.h> +#include <asm/io.h> + +extern int linux_auto_config; +extern int linux_intr_pri; +extern int linux_to_mach_error (int); + +/* This task queue is not used in Mach: just for fixing undefined symbols. */ +DECLARE_TASK_QUEUE (tq_disk); + +/* Location of VTOC in units for sectors (512 bytes). */ +#define PDLOCATION 29 + +/* Linux kernel variables. */ + +/* Temporary data allocated on the stack. */ +struct temp_data +{ + struct inode inode; + struct file file; + struct request req; + queue_head_t pages; +}; + +/* One of these exists for each + driver associated with a major number. */ +struct device_struct +{ + const char *name; /* device name */ + struct file_operations *fops; /* operations vector */ + int busy:1; /* driver is being opened/closed */ + int want:1; /* someone wants to open/close driver */ + struct gendisk *gd; /* DOS partition information */ + int default_slice; /* what slice to use when none is given */ + struct disklabel **labels; /* disklabels for each DOS partition */ +}; + +/* An entry in the Mach name to Linux major number conversion table. */ +struct name_map +{ + const char *name; /* Mach name for device */ + unsigned major; /* Linux major number */ + unsigned unit; /* Linux unit number */ + int read_only; /* 1 if device is read only */ +}; + +/* Driver operation table. */ +static struct device_struct blkdevs[MAX_BLKDEV]; + +/* Driver request function table. */ +struct blk_dev_struct blk_dev[MAX_BLKDEV] = +{ + { NULL, NULL }, /* 0 no_dev */ + { NULL, NULL }, /* 1 dev mem */ + { NULL, NULL }, /* 2 dev fd */ + { NULL, NULL }, /* 3 dev ide0 or hd */ + { NULL, NULL }, /* 4 dev ttyx */ + { NULL, NULL }, /* 5 dev tty */ + { NULL, NULL }, /* 6 dev lp */ + { NULL, NULL }, /* 7 dev pipes */ + { NULL, NULL }, /* 8 dev sd */ + { NULL, NULL }, /* 9 dev st */ + { NULL, NULL }, /* 10 */ + { NULL, NULL }, /* 11 */ + { NULL, NULL }, /* 12 */ + { NULL, NULL }, /* 13 */ + { NULL, NULL }, /* 14 */ + { NULL, NULL }, /* 15 */ + { NULL, NULL }, /* 16 */ + { NULL, NULL }, /* 17 */ + { NULL, NULL }, /* 18 */ + { NULL, NULL }, /* 19 */ + { NULL, NULL }, /* 20 */ + { NULL, NULL }, /* 21 */ + { NULL, NULL } /* 22 dev ide1 */ +}; + +/* + * blk_size contains the size of all block-devices in units of 1024 byte + * sectors: + * + * blk_size[MAJOR][MINOR] + * + * if (!blk_size[MAJOR]) then no minor size checking is done. + */ +int *blk_size[MAX_BLKDEV] = { NULL, NULL, }; + +/* + * blksize_size contains the size of all block-devices: + * + * blksize_size[MAJOR][MINOR] + * + * if (!blksize_size[MAJOR]) then 1024 bytes is assumed. + */ +int *blksize_size[MAX_BLKDEV] = { NULL, NULL, }; + +/* + * hardsect_size contains the size of the hardware sector of a device. + * + * hardsect_size[MAJOR][MINOR] + * + * if (!hardsect_size[MAJOR]) + * then 512 bytes is assumed. + * else + * sector_size is hardsect_size[MAJOR][MINOR] + * This is currently set by some scsi device and read by the msdos fs driver + * This might be a some uses later. + */ +int *hardsect_size[MAX_BLKDEV] = { NULL, NULL, }; + +/* This specifies how many sectors to read ahead on the disk. + This is unused in Mach. It is here to make drivers compile. */ +int read_ahead[MAX_BLKDEV] = {0, }; + +/* Use to wait on when there are no free requests. + This is unused in Mach. It is here to make drivers compile. */ +struct wait_queue *wait_for_request = NULL; + +/* Map for allocating device memory. */ +extern vm_map_t device_io_map; + +/* Initialize block drivers. */ +int +blk_dev_init () +{ +#ifdef CONFIG_BLK_DEV_IDE + ide_init (); +#endif +#ifdef CONFIG_BLK_DEV_FD + floppy_init (); +#else + outb_p (0xc, 0x3f2); +#endif + return 0; +} + +/* Return 1 if major number MAJOR corresponds to a disk device. */ +static inline int +disk_major (int major) +{ + return (major == IDE0_MAJOR + || major == IDE1_MAJOR + || major == IDE2_MAJOR + || major == IDE3_MAJOR + || major == SCSI_DISK_MAJOR); +} + +/* Linux kernel block support routines. */ + +/* Register a driver for major number MAJOR, + with name NAME, and operations vector FOPS. */ +int +register_blkdev (unsigned major, const char *name, + struct file_operations *fops) +{ + int err = 0; + + if (major == 0) + { + for (major = MAX_BLKDEV - 1; major > 0; major--) + if (blkdevs[major].fops == NULL) + goto out; + return -LINUX_EBUSY; + } + if (major >= MAX_BLKDEV) + return -LINUX_EINVAL; + if (blkdevs[major].fops && blkdevs[major].fops != fops) + return -LINUX_EBUSY; + +out: + blkdevs[major].name = name; + blkdevs[major].fops = fops; + blkdevs[major].busy = 0; + blkdevs[major].want = 0; + blkdevs[major].gd = NULL; + blkdevs[major].default_slice = 0; + blkdevs[major].labels = NULL; + return 0; +} + +/* Unregister the driver associated with + major number MAJOR and having the name NAME. */ +int +unregister_blkdev (unsigned major, const char *name) +{ + int err; + + if (major >= MAX_BLKDEV) + return -LINUX_EINVAL; + if (! blkdevs[major].fops || strcmp (blkdevs[major].name, name)) + return -LINUX_EINVAL; + blkdevs[major].fops = NULL; + if (blkdevs[major].labels) + { + assert (blkdevs[major].gd); + kfree ((vm_offset_t) blkdevs[major].labels, + (sizeof (struct disklabel *) + * blkdevs[major].gd->max_p * blkdevs[major].gd->max_nr)); + } + return 0; +} + +void +set_blocksize (kdev_t dev, int size) +{ + extern int *blksize_size[]; + + if (! blksize_size[MAJOR (dev)]) + return; + + switch (size) + { + case 512: + case 1024: + case 2048: + case 4096: + break; + default: + panic ("Invalid blocksize passed to set_blocksize"); + break; + } + blksize_size[MAJOR (dev)][MINOR (dev)] = size; +} + +/* Allocate a buffer SIZE bytes long. */ +static void * +alloc_buffer (int size) +{ + vm_page_t m; + struct temp_data *d; + + assert (size <= PAGE_SIZE); + + if (! linux_auto_config) + { + while ((m = vm_page_grab ()) == 0) + VM_PAGE_WAIT (0); + d = current_thread ()->pcb->data; + assert (d); + queue_enter (&d->pages, m, vm_page_t, pageq); + return (void *) m->phys_addr; + } + return (void *) __get_free_pages (GFP_KERNEL, 0, ~0UL); +} + +/* Free buffer P which is SIZE bytes long. */ +static void +free_buffer (void *p, int size) +{ + int i; + struct temp_data *d; + vm_page_t m; + + assert (size <= PAGE_SIZE); + + if (! linux_auto_config) + { + d = current_thread ()->pcb->data; + assert (d); + queue_iterate (&d->pages, m, vm_page_t, pageq) + { + if (m->phys_addr == (vm_offset_t) p) + { + queue_remove (&d->pages, m, vm_page_t, pageq); + vm_page_lock_queues (); + vm_page_free (m); + vm_page_lock_queues (); + return; + } + } + panic ("free_buffer"); + } + free_pages ((unsigned long) p, 0); +} + +/* Allocate a buffer of SIZE bytes and + associate it with block number BLOCK of device DEV. */ +struct buffer_head * +getblk (kdev_t dev, int block, int size) +{ + struct buffer_head *bh; + static struct buffer_head bhead; + + assert (size <= PAGE_SIZE); + + if (! linux_auto_config) + bh = (struct buffer_head *) kalloc (sizeof (struct buffer_head)); + else + bh = &bhead; + if (bh) + { + memset (bh, 0, sizeof (struct buffer_head)); + bh->b_data = alloc_buffer (size); + if (! bh->b_data) + { + if (! linux_auto_config) + kfree ((vm_offset_t) bh, sizeof (struct buffer_head)); + return NULL; + } + bh->b_dev = dev; + bh->b_size = size; + bh->b_state = 1 << BH_Lock; + bh->b_blocknr = block; + } + return bh; +} + +/* Release buffer BH previously allocated by getblk. */ +void +__brelse (struct buffer_head *bh) +{ + free_buffer (bh->b_data, bh->b_size); + if (! linux_auto_config) + kfree ((vm_offset_t) bh, sizeof (*bh)); +} + +/* Allocate a buffer of SIZE bytes and fill it with data + from device DEV starting at block number BLOCK. */ +struct buffer_head * +bread (kdev_t dev, int block, int size) +{ + int err; + struct buffer_head *bh; + + bh = getblk (dev, block, size); + if (bh) + { + ll_rw_block (READ, 1, &bh); + wait_on_buffer (bh); + if (! buffer_uptodate (bh)) + { + __brelse (bh); + return NULL; + } + } + return bh; +} + +/* Return the block size for device DEV in *BSIZE and + log2(block size) in *BSHIFT. */ +static void +get_block_size (kdev_t dev, int *bsize, int *bshift) +{ + int i; + + *bsize = BLOCK_SIZE; + if (blksize_size[MAJOR (dev)] + && blksize_size[MAJOR (dev)][MINOR (dev)]) + *bsize = blksize_size[MAJOR (dev)][MINOR (dev)]; + for (i = *bsize, *bshift = 0; i != 1; i >>= 1, (*bshift)++) + ; +} + +/* Enqueue request REQ on a driver's queue. */ +static inline void +enqueue_request (struct request *req) +{ + struct request *tmp; + struct blk_dev_struct *dev; + + dev = blk_dev + MAJOR (req->rq_dev); + cli (); + tmp = dev->current_request; + if (! tmp) + { + dev->current_request = req; + (*dev->request_fn) (); + sti (); + return; + } + while (tmp->next) + { + if ((IN_ORDER (tmp, req) || ! IN_ORDER (tmp, tmp->next)) + && IN_ORDER (req, tmp->next)) + break; + tmp = tmp->next; + } + req->next = tmp->next; + tmp->next = req; + if (scsi_blk_major (MAJOR (req->rq_dev))) + (*dev->request_fn) (); + sti (); +} + +/* Perform the I/O operation RW on the buffer list BH + containing NR buffers. */ +void +ll_rw_block (int rw, int nr, struct buffer_head **bh) +{ + int i, bshift, bsize; + unsigned major; + struct request *r; + static struct request req; + + major = MAJOR (bh[0]->b_dev); + assert (major < MAX_BLKDEV); + + get_block_size (bh[0]->b_dev, &bsize, &bshift); + + if (! linux_auto_config) + { + assert (current_thread ()->pcb->data); + r = &((struct temp_data *) current_thread ()->pcb->data)->req; + } + else + r = &req; + + for (i = 0, r->nr_sectors = 0; i < nr - 1; i++) + { + r->nr_sectors += bh[i]->b_size >> 9; + bh[i]->b_reqnext = bh[i + 1]; + } + r->nr_sectors += bh[i]->b_size >> 9; + bh[i]->b_reqnext = NULL; + + r->rq_status = RQ_ACTIVE; + r->rq_dev = bh[0]->b_dev; + r->cmd = rw; + r->errors = 0; + r->sector = bh[0]->b_blocknr << (bshift - 9); + r->current_nr_sectors = bh[0]->b_size >> 9; + r->buffer = bh[0]->b_data; + r->bh = bh[0]; + r->bhtail = bh[nr - 1]; + r->sem = NULL; + r->next = NULL; + + enqueue_request (r); +} + +#define BSIZE (1 << bshift) +#define BMASK (BSIZE - 1) + +/* Perform read/write operation RW on device DEV + starting at *off to/from buffer *BUF of size *RESID. + The device block size is given by BSHIFT. *OFF and + *RESID may be non-multiples of the block size. + *OFF, *BUF and *RESID are updated if the operation + completed successfully. */ +static int +rdwr_partial (int rw, kdev_t dev, loff_t *off, + char **buf, int *resid, int bshift) +{ + int c, err = 0, o; + long sect, nsect; + struct buffer_head bhead, *bh = &bhead; + struct gendisk *gd; + + memset (bh, 0, sizeof (struct buffer_head)); + bh->b_state = 1 << BH_Lock; + bh->b_dev = dev; + bh->b_blocknr = *off >> bshift; + bh->b_size = BSIZE; + + /* Check if this device has non even number of blocks. */ + for (gd = gendisk_head, nsect = -1; gd; gd = gd->next) + if (gd->major == MAJOR (dev)) + { + nsect = gd->part[MINOR (dev)].nr_sects; + break; + } + if (nsect > 0) + { + sect = bh->b_blocknr << (bshift - 9); + assert ((nsect - sect) > 0); + if (nsect - sect < (BSIZE >> 9)) + bh->b_size = (nsect - sect) << 9; + } + bh->b_data = alloc_buffer (bh->b_size); + if (! bh->b_data) + return -LINUX_ENOMEM; + ll_rw_block (READ, 1, &bh); + wait_on_buffer (bh); + if (buffer_uptodate (bh)) + { + o = *off & BMASK; + c = bh->b_size - o; + assert (*resid <= c); + if (c > *resid) + c = *resid; + if (rw == READ) + memcpy (*buf, bh->b_data + o, c); + else + { + memcpy (bh->b_data + o, *buf, c); + bh->b_state = (1 << BH_Dirty) | (1 << BH_Lock); + ll_rw_block (WRITE, 1, &bh); + wait_on_buffer (bh); + if (! buffer_uptodate (bh)) + { + err = -LINUX_EIO; + goto out; + } + } + *buf += c; + *resid -= c; + *off += c; + } + else + err = -LINUX_EIO; +out: + free_buffer (bh->b_data, bh->b_size); + return err; +} + +#define BH_Bounce 16 +#define MAX_BUF VM_MAP_COPY_PAGE_LIST_MAX + +/* Perform read/write operation RW on device DEV + starting at *off to/from buffer *BUF of size *RESID. + The device block size is given by BSHIFT. *OFF and + *RESID must be multiples of the block size. + *OFF, *BUF and *RESID are updated if the operation + completed successfully. */ +static int +rdwr_full (int rw, kdev_t dev, loff_t *off, char **buf, int *resid, int bshift) +{ + int cc, err = 0, i, j, nb, nbuf; + long blk; + struct buffer_head bhead[MAX_BUF], *bh, *bhp[MAX_BUF]; + + assert ((*off & BMASK) == 0); + assert (*resid >= bsize); + + nbuf = *resid >> bshift; + blk = *off >> bshift; + for (i = nb = 0, bh = bhead; nb < nbuf; bh++) + { + memset (bh, 0, sizeof (*bh)); + bh->b_dev = dev; + bh->b_blocknr = blk; + set_bit (BH_Lock, &bh->b_state); + if (rw == WRITE) + set_bit (BH_Dirty, &bh->b_state); + cc = PAGE_SIZE - (((int) *buf) & PAGE_MASK); + if (cc >= BSIZE && ((int) *buf & 511) == 0) + cc &= ~BMASK; + else + { + cc = PAGE_SIZE; + set_bit (BH_Bounce, &bh->b_state); + } + if (cc > ((nbuf - nb) << bshift)) + cc = (nbuf - nb) << bshift; + if (! test_bit (BH_Bounce, &bh->b_state)) + bh->b_data = (char *) pmap_extract (vm_map_pmap (device_io_map), + (((vm_offset_t) *buf) + + (nb << bshift))); + else + { + bh->b_data = alloc_buffer (cc); + if (! bh->b_data) + { + err = -LINUX_ENOMEM; + break; + } + if (rw == WRITE) + memcpy (bh->b_data, *buf + (nb << bshift), cc); + } + bh->b_size = cc; + bhp[i] = bh; + nb += cc >> bshift; + blk += nb; + if (++i == MAX_BUF) + break; + } + if (! err) + { + ll_rw_block (rw, i, bhp); + wait_on_buffer (bhp[i - 1]); + } + for (bh = bhead, cc = 0, j = 0; j < i; cc += bh->b_size, bh++, j++) + { + if (! err && buffer_uptodate (bh) + && rw == READ && test_bit (BH_Bounce, &bh->b_state)) + memcpy (*buf + cc, bh->b_data, bh->b_size); + else if (! err && ! buffer_uptodate (bh)) + err = -LINUX_EIO; + if (test_bit (BH_Bounce, &bh->b_state)) + free_buffer (bh->b_data, bh->b_size); + } + if (! err) + { + *buf += cc; + *resid -= cc; + *off += cc; + } + return err; +} + +/* Perform read/write operation RW on device DEV + starting at *off to/from buffer BUF of size COUNT. + *OFF is updated if the operation completed successfully. */ +static int +do_rdwr (int rw, kdev_t dev, loff_t *off, char *buf, int count) +{ + int bsize, bshift, err = 0, resid = count; + + get_block_size (dev, &bsize, &bshift); + if (*off & BMASK) + err = rdwr_partial (rw, dev, off, &buf, &resid, bshift); + while (resid >= bsize && ! err) + err = rdwr_full (rw, dev, off, &buf, &resid, bshift); + if (! err && resid) + err = rdwr_partial (rw, dev, off, &buf, &resid, bshift); + return err ? err : count - resid; +} + +int +block_write (struct inode *inode, struct file *filp, + const char *buf, int count) +{ + return do_rdwr (WRITE, inode->i_rdev, &filp->f_pos, (char *) buf, count); +} + +int +block_read (struct inode *inode, struct file *filp, char *buf, int count) +{ + return do_rdwr (READ, inode->i_rdev, &filp->f_pos, buf, count); +} + +/* + * This routine checks whether a removable media has been changed, + * and invalidates all buffer-cache-entries in that case. This + * is a relatively slow routine, so we have to try to minimize using + * it. Thus it is called only upon a 'mount' or 'open'. This + * is the best way of combining speed and utility, I think. + * People changing diskettes in the middle of an operation deserve + * to loose :-) + */ +int +check_disk_change (kdev_t dev) +{ + unsigned i; + struct file_operations * fops; + + i = MAJOR(dev); + if (i >= MAX_BLKDEV || (fops = blkdevs[i].fops) == NULL) + return 0; + if (fops->check_media_change == NULL) + return 0; + if (! (*fops->check_media_change) (dev)) + return 0; + + /* printf ("Disk change detected on device %s\n", kdevname(dev));*/ + + if (fops->revalidate) + (*fops->revalidate) (dev); + + return 1; +} + +/* Mach device interface routines. */ + +/* Mach name to Linux major/minor number mapping table. */ +static struct name_map name_to_major[] = +{ + /* IDE disks */ + { "hd0", IDE0_MAJOR, 0, 0 }, + { "hd1", IDE0_MAJOR, 1, 0 }, + { "hd2", IDE1_MAJOR, 0, 0 }, + { "hd3", IDE1_MAJOR, 1, 0 }, + { "hd4", IDE2_MAJOR, 0, 0 }, + { "hd5", IDE2_MAJOR, 1, 0 }, + { "hd6", IDE3_MAJOR, 0, 0 }, + { "hd7", IDE3_MAJOR, 1, 0 }, + + /* IDE CDROMs */ + { "wcd0", IDE0_MAJOR, 0, 1 }, + { "wcd1", IDE0_MAJOR, 1, 1 }, + { "wcd2", IDE1_MAJOR, 0, 1 }, + { "wcd3", IDE1_MAJOR, 1, 1 }, + { "wcd4", IDE2_MAJOR, 0, 1 }, + { "wcd5", IDE2_MAJOR, 1, 1 }, + { "wcd6", IDE3_MAJOR, 0, 1 }, + { "wcd7", IDE3_MAJOR, 1, 1 }, + + /* SCSI disks */ + { "sd0", SCSI_DISK_MAJOR, 0, 0 }, + { "sd1", SCSI_DISK_MAJOR, 1, 0 }, + { "sd2", SCSI_DISK_MAJOR, 2, 0 }, + { "sd3", SCSI_DISK_MAJOR, 3, 0 }, + { "sd4", SCSI_DISK_MAJOR, 4, 0 }, + { "sd5", SCSI_DISK_MAJOR, 5, 0 }, + { "sd6", SCSI_DISK_MAJOR, 6, 0 }, + { "sd7", SCSI_DISK_MAJOR, 7, 0 }, + + /* SCSI CDROMs */ + { "cd0", SCSI_CDROM_MAJOR, 0, 1 }, + { "cd1", SCSI_CDROM_MAJOR, 1, 1 }, + + /* Floppy disks */ + { "fd0", FLOPPY_MAJOR, 0, 0 }, + { "fd1", FLOPPY_MAJOR, 1, 0 }, +}; + +#define NUM_NAMES (sizeof (name_to_major) / sizeof (name_to_major[0])) + +/* One of these is associated with each open instance of a device. */ +struct block_data +{ + const char *name; /* Mach name for device */ + int want:1; /* someone is waiting for I/O to complete */ + int open_count; /* number of opens */ + int iocount; /* number of pending I/O operations */ + int part; /* BSD partition number (-1 if none) */ + int flags; /* Linux file flags */ + int mode; /* Linux file mode */ + kdev_t dev; /* Linux device number */ + ipc_port_t port; /* port representing device */ + struct device_struct *ds; /* driver operation table entry */ + struct device device; /* generic device header */ + struct name_map *np; /* name to inode map */ + struct block_data *next; /* forward link */ +}; + +/* List of open devices. */ +static struct block_data *open_list; + +/* Forward declarations. */ + +extern struct device_emulation_ops linux_block_emulation_ops; + +static io_return_t device_close (void *); + +/* Return a send right for block device BD. */ +static ipc_port_t +dev_to_port (void *bd) +{ + return (bd + ? ipc_port_make_send (((struct block_data *) bd)->port) + : IP_NULL); +} + +/* Return 1 if C is a letter of the alphabet. */ +static inline int +isalpha (int c) +{ + return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z'); +} + +/* Return 1 if C is a digit. */ +static inline int +isdigit (int c) +{ + return c >= '0' && c <= '9'; +} + +/* Find the name map entry for device NAME. + Set *SLICE to be the DOS partition and + *PART the BSD/Mach partition, if any. */ +static struct name_map * +find_name (char *name, int *slice, int *part) +{ + char *p, *q; + int i, len; + struct name_map *np; + + /* Parse name into name, unit, DOS partition (slice) and partition. */ + for (*slice = 0, *part = -1, p = name; isalpha (*p); p++) + ; + if (p == name || ! isdigit (*p)) + return NULL; + do + p++; + while (isdigit (*p)); + if (*p) + { + q = p; + if (*q == 's' && isdigit (*(q + 1))) + { + q++; + do + *slice = *slice * 10 + *q++ - '0'; + while (isdigit (*q)); + if (! *q) + goto find_major; + } + if (! isalpha (*q) || *(q + 1)) + return NULL; + *part = *q - 'a'; + } + +find_major: + /* Convert name to major number. */ + for (i = 0, np = name_to_major; i < NUM_NAMES; i++, np++) + { + len = strlen (np->name); + if (len == (p - name) && ! strncmp (np->name, name, len)) + return np; + } + return NULL; +} + +/* Attempt to read a BSD disklabel from device DEV. */ +static struct disklabel * +read_bsd_label (kdev_t dev) +{ + int bsize, bshift; + struct buffer_head *bh; + struct disklabel *dlp, *lp = NULL; + + get_block_size (dev, &bsize, &bshift); + bh = bread (dev, LBLLOC >> (bshift - 9), bsize); + if (bh) + { + dlp = (struct disklabel *) (bh->b_data + ((LBLLOC << 9) & (bsize - 1))); + if (dlp->d_magic == DISKMAGIC && dlp->d_magic2 == DISKMAGIC) + { + lp = (struct disklabel *) kalloc (sizeof (*lp)); + assert (lp); + memcpy (lp, dlp, sizeof (*lp)); + } + __brelse (bh); + } + return lp; +} + +/* Attempt to read a VTOC from device DEV. */ +static struct disklabel * +read_vtoc (kdev_t dev) +{ + int bshift, bsize, i; + struct buffer_head *bh; + struct evtoc *evp; + struct disklabel *lp = NULL; + + get_block_size (dev, &bsize, &bshift); + bh = bread (dev, PDLOCATION >> (bshift - 9), bsize); + if (bh) + { + evp = (struct evtoc *) (bh->b_data + ((PDLOCATION << 9) & (bsize - 1))); + if (evp->sanity == VTOC_SANE) + { + lp = (struct disklabel *) kalloc (sizeof (*lp)); + assert (lp); + lp->d_npartitions = evp->nparts; + if (lp->d_npartitions > MAXPARTITIONS) + lp->d_npartitions = MAXPARTITIONS; + for (i = 0; i < lp->d_npartitions; i++) + { + lp->d_partitions[i].p_size = evp->part[i].p_size; + lp->d_partitions[i].p_offset = evp->part[i].p_start; + lp->d_partitions[i].p_fstype = FS_BSDFFS; + } + } + __brelse (bh); + } + return lp; +} + +/* Initialize BSD/Mach partition table for device + specified by NP, DS and *DEV. Check SLICE and *PART for validity. */ +static kern_return_t +init_partition (struct name_map *np, kdev_t *dev, + struct device_struct *ds, int slice, int *part) +{ + int err, i, j; + struct disklabel *lp; + struct gendisk *gd = ds->gd; + struct partition *p; + struct temp_data *d = current_thread ()->pcb->data; + + if (! gd) + { + *part = -1; + return 0; + } + if (ds->labels) + goto check; + ds->labels = (struct disklabel **) kalloc (sizeof (struct disklabel *) + * gd->max_nr * gd->max_p); + if (! ds->labels) + return D_NO_MEMORY; + memset ((void *) ds->labels, 0, + sizeof (struct disklabel *) * gd->max_nr * gd->max_p); + for (i = 1; i < gd->max_p; i++) + { + d->inode.i_rdev = *dev | i; + if (gd->part[MINOR (d->inode.i_rdev)].nr_sects <= 0 + || gd->part[MINOR (d->inode.i_rdev)].start_sect < 0) + continue; + linux_intr_pri = SPL5; + d->file.f_flags = 0; + d->file.f_mode = O_RDONLY; + if (ds->fops->open && (*ds->fops->open) (&d->inode, &d->file)) + continue; + lp = read_bsd_label (d->inode.i_rdev); + if (! lp) + lp = read_vtoc (d->inode.i_rdev); + if (ds->fops->release) + (*ds->fops->release) (&d->inode, &d->file); + if (lp) + { + if (ds->default_slice == 0) + ds->default_slice = i; + for (j = 0, p = lp->d_partitions; j < lp->d_npartitions; j++, p++) + { + if (p->p_offset < 0 || p->p_size <= 0) + continue; + + /* Sanity check. */ + if (p->p_size > gd->part[MINOR (d->inode.i_rdev)].nr_sects) + p->p_size = gd->part[MINOR (d->inode.i_rdev)].nr_sects; + } + } + ds->labels[MINOR (d->inode.i_rdev)] = lp; + } + +check: + if (*part >= 0 && slice == 0) + slice = ds->default_slice; + if (*part >= 0 && slice == 0) + return D_NO_SUCH_DEVICE; + *dev = MKDEV (MAJOR (*dev), MINOR (*dev) | slice); + if (slice >= gd->max_p + || gd->part[MINOR (*dev)].start_sect < 0 + || gd->part[MINOR (*dev)].nr_sects <= 0) + return D_NO_SUCH_DEVICE; + if (*part >= 0) + { + lp = ds->labels[MINOR (*dev)]; + if (! lp + || *part >= lp->d_npartitions + || lp->d_partitions[*part].p_offset < 0 + || lp->d_partitions[*part].p_size <= 0) + return D_NO_SUCH_DEVICE; + } + return 0; +} + +#define DECL_DATA struct temp_data td +#define INIT_DATA() \ +{ \ + queue_init (&td.pages); \ + td.inode.i_rdev = bd->dev; \ + td.file.f_mode = bd->mode; \ + td.file.f_flags = bd->flags; \ + current_thread ()->pcb->data = &td; \ +} + +static io_return_t +device_open (ipc_port_t reply_port, mach_msg_type_name_t reply_port_type, + dev_mode_t mode, char *name, device_t *devp) +{ + int part, slice, err; + unsigned major, minor; + kdev_t dev; + ipc_port_t notify; + struct block_data *bd = NULL, *bdp; + struct device_struct *ds; + struct gendisk *gd; + struct name_map *np; + DECL_DATA; + + np = find_name (name, &slice, &part); + if (! np) + return D_NO_SUCH_DEVICE; + major = np->major; + ds = &blkdevs[major]; + + /* Check that driver exists. */ + if (! ds->fops) + return D_NO_SUCH_DEVICE; + + /* Wait for any other open/close calls to finish. */ + ds = &blkdevs[major]; + while (ds->busy) + { + ds->want = 1; + assert_wait ((event_t) ds, FALSE); + schedule (); + } + ds->busy = 1; + + /* Compute minor number. */ + if (! ds->gd) + { + for (gd = gendisk_head; gd && gd->major != major; gd = gd->next) + ; + ds->gd = gd; + } + minor = np->unit; + gd = ds->gd; + if (gd) + minor <<= gd->minor_shift; + dev = MKDEV (major, minor); + + queue_init (&td.pages); + current_thread ()->pcb->data = &td; + + /* Check partition. */ + err = init_partition (np, &dev, ds, slice, &part); + if (err) + goto out; + + /* Initialize file structure. */ + switch (mode & (D_READ|D_WRITE)) + { + case D_WRITE: + td.file.f_mode = O_WRONLY; + break; + + case D_READ|D_WRITE: + td.file.f_mode = O_RDWR; + break; + + default: + td.file.f_mode = O_RDONLY; + break; + } + td.file.f_flags = (mode & D_NODELAY) ? O_NDELAY : 0; + + /* Check if the device is currently open. */ + for (bdp = open_list; bdp; bdp = bdp->next) + if (bdp->dev == dev + && bdp->part == part + && bdp->mode == td.file.f_mode + && bdp->flags == td.file.f_flags) + { + bd = bdp; + goto out; + } + + /* Open the device. */ + if (ds->fops->open) + { + td.inode.i_rdev = dev; + linux_intr_pri = SPL5; + err = (*ds->fops->open) (&td.inode, &td.file); + if (err) + { + err = linux_to_mach_error (err); + goto out; + } + } + + /* Allocate and initialize device data. */ + bd = (struct block_data *) kalloc (sizeof (struct block_data)); + if (! bd) + { + err = D_NO_MEMORY; + goto bad; + } + bd->want = 0; + bd->open_count = 0; + bd->iocount = 0; + bd->part = part; + bd->ds = ds; + bd->device.emul_data = bd; + bd->device.emul_ops = &linux_block_emulation_ops; + bd->dev = dev; + bd->mode = td.file.f_mode; + bd->flags = td.file.f_flags; + bd->port = ipc_port_alloc_kernel (); + if (bd->port == IP_NULL) + { + err = KERN_RESOURCE_SHORTAGE; + goto bad; + } + ipc_kobject_set (bd->port, (ipc_kobject_t) &bd->device, IKOT_DEVICE); + notify = ipc_port_make_sonce (bd->port); + ip_lock (bd->port); + ipc_port_nsrequest (bd->port, 1, notify, ¬ify); + assert (notify == IP_NULL); + goto out; + +bad: + if (ds->fops->release) + (*ds->fops->release) (&td.inode, &td.file); + +out: + ds->busy = 0; + if (ds->want) + { + ds->want = 0; + thread_wakeup ((event_t) ds); + } + + if (bd && bd->open_count > 0) + { + if (err) + *devp = NULL; + else + { + *devp = &bd->device; + bd->open_count++; + } + return err; + } + + if (err) + { + if (bd) + { + if (bd->port != IP_NULL) + { + ipc_kobject_set (bd->port, IKO_NULL, IKOT_NONE); + ipc_port_dealloc_kernel (bd->port); + } + kfree ((vm_offset_t) bd, sizeof (struct block_data)); + bd = NULL; + } + } + else + { + bd->open_count = 1; + bd->next = open_list; + open_list = bd; + } + + if (IP_VALID (reply_port)) + ds_device_open_reply (reply_port, reply_port_type, err, dev_to_port (bd)); + else if (! err) + device_close (bd); + + return MIG_NO_REPLY; +} + +static io_return_t +device_close (void *d) +{ + struct block_data *bd = d, *bdp, **prev; + struct device_struct *ds = bd->ds; + DECL_DATA; + + INIT_DATA (); + + /* Wait for any other open/close to complete. */ + while (ds->busy) + { + ds->want = 1; + assert_wait ((event_t) ds, FALSE); + schedule (); + } + ds->busy = 1; + + if (--bd->open_count == 0) + { + /* Wait for pending I/O to complete. */ + while (bd->iocount > 0) + { + bd->want = 1; + assert_wait ((event_t) bd, FALSE); + schedule (); + } + + /* Remove device from open list. */ + prev = &open_list; + bdp = open_list; + while (bdp) + { + if (bdp == bd) + { + *prev = bdp->next; + break; + } + prev = &bdp->next; + bdp = bdp->next; + } + + assert (bdp == bd); + + if (ds->fops->release) + (*ds->fops->release) (&td.inode, &td.file); + + ipc_kobject_set (bd->port, IKO_NULL, IKOT_NONE); + ipc_port_dealloc_kernel (bd->port); + kfree ((vm_offset_t) bd, sizeof (struct block_data)); + } + + ds->busy = 0; + if (ds->want) + { + ds->want = 0; + thread_wakeup ((event_t) ds); + } + return D_SUCCESS; +} + +#define MAX_COPY (VM_MAP_COPY_PAGE_LIST_MAX << PAGE_SHIFT) + +/* Check block BN and size COUNT for I/O validity + to from device BD. Set *OFF to the byte offset + where I/O is to begin and return the size of transfer. */ +static int +check_limit (struct block_data *bd, loff_t *off, long bn, int count) +{ + int major, minor; + long maxsz, sz; + struct disklabel *lp = NULL; + + if (count <= 0) + return count; + + major = MAJOR (bd->dev); + minor = MINOR (bd->dev); + + if (bd->ds->gd) + { + if (bd->part >= 0) + { + assert (bd->ds->labels); + assert (bd->ds->labels[minor]); + lp = bd->ds->labels[minor]; + maxsz = lp->d_partitions[bd->part].p_size; + } + else + maxsz = bd->ds->gd->part[minor].nr_sects; + } + else + { + assert (blk_size[major]); + maxsz = blk_size[major][minor] << (BLOCK_SIZE_BITS - 9); + } + assert (maxsz > 0); + sz = maxsz - bn; + if (sz <= 0) + return sz; + if (sz < ((count + 511) >> 9)) + count = sz << 9; + if (lp) + bn += (lp->d_partitions[bd->part].p_offset + - bd->ds->gd->part[minor].start_sect); + *off = (loff_t) bn << 9; + bd->iocount++; + return count; +} + +static io_return_t +device_write (void *d, ipc_port_t reply_port, + mach_msg_type_name_t reply_port_type, dev_mode_t mode, + recnum_t bn, io_buf_ptr_t data, unsigned int orig_count, + int *bytes_written) +{ + int resid, amt, i; + int count = (int) orig_count; + io_return_t err = 0; + vm_map_copy_t copy; + vm_offset_t addr, uaddr; + vm_size_t len, size; + struct block_data *bd = d; + DECL_DATA; + + INIT_DATA (); + + *bytes_written = 0; + + if (bd->mode == O_RDONLY) + return D_INVALID_OPERATION; + if (! bd->ds->fops->write) + return D_READ_ONLY; + count = check_limit (bd, &td.file.f_pos, bn, count); + if (count < 0) + return D_INVALID_SIZE; + if (count == 0) + { + vm_map_copy_discard (copy); + return 0; + } + + resid = count; + copy = (vm_map_copy_t) data; + uaddr = copy->offset; + + /* Allocate a kernel buffer. */ + size = round_page (uaddr + count) - trunc_page (uaddr); + if (size > MAX_COPY) + size = MAX_COPY; + addr = vm_map_min (device_io_map); + err = vm_map_enter (device_io_map, &addr, size, 0, TRUE, + NULL, 0, FALSE, VM_PROT_READ|VM_PROT_WRITE, + VM_PROT_READ|VM_PROT_WRITE, VM_INHERIT_NONE); + if (err) + { + vm_map_copy_discard (copy); + goto out; + } + + /* Determine size of I/O this time around. */ + len = size - (uaddr & PAGE_MASK); + if (len > resid) + len = resid; + + while (1) + { + /* Map user pages. */ + for (i = 0; i < copy->cpy_npages; i++) + pmap_enter (vm_map_pmap (device_io_map), + addr + (i << PAGE_SHIFT), + copy->cpy_page_list[i]->phys_addr, + VM_PROT_READ|VM_PROT_WRITE, TRUE); + + /* Do the write. */ + amt = (*bd->ds->fops->write) (&td.inode, &td.file, + (char *) addr + (uaddr & PAGE_MASK), len); + + /* Unmap pages and deallocate copy. */ + pmap_remove (vm_map_pmap (device_io_map), + addr, addr + (copy->cpy_npages << PAGE_SHIFT)); + vm_map_copy_discard (copy); + + /* Check result of write. */ + if (amt > 0) + { + resid -= amt; + if (resid == 0) + break; + uaddr += amt; + } + else + { + if (amt < 0) + err = linux_to_mach_error (amt); + break; + } + + /* Determine size of I/O this time around and copy in pages. */ + len = round_page (uaddr + resid) - trunc_page (uaddr); + if (len > MAX_COPY) + len = MAX_COPY; + len -= uaddr & PAGE_MASK; + if (len > resid) + len = resid; + err = vm_map_copyin_page_list (current_map (), uaddr, len, + FALSE, FALSE, ©, FALSE); + if (err) + break; + } + + /* Delete kernel buffer. */ + vm_map_remove (device_io_map, addr, addr + size); + +out: + if (--bd->iocount == 0 && bd->want) + { + bd->want = 0; + thread_wakeup ((event_t) bd); + } + if (IP_VALID (reply_port)) + ds_device_write_reply (reply_port, reply_port_type, err, count - resid); + return MIG_NO_REPLY; +} + +static io_return_t +device_read (void *d, ipc_port_t reply_port, + mach_msg_type_name_t reply_port_type, dev_mode_t mode, + recnum_t bn, int count, io_buf_ptr_t *data, + unsigned *bytes_read) +{ + boolean_t dirty; + int resid, amt; + io_return_t err = 0; + queue_head_t pages; + vm_map_copy_t copy; + vm_offset_t addr, offset, alloc_offset, o; + vm_object_t object; + vm_page_t m; + vm_size_t len, size; + struct block_data *bd = d; + DECL_DATA; + + INIT_DATA (); + + *data = 0; + *bytes_read = 0; + + if (! bd->ds->fops->read) + return D_INVALID_OPERATION; + count = check_limit (bd, &td.file.f_pos, bn, count); + if (count < 0) + return D_INVALID_SIZE; + if (count == 0) + return 0; + + /* Allocate an object to hold the data. */ + size = round_page (count); + object = vm_object_allocate (size); + if (! object) + { + err = D_NO_MEMORY; + goto out; + } + alloc_offset = offset = 0; + resid = count; + + /* Allocate a kernel buffer. */ + addr = vm_map_min (device_io_map); + if (size > MAX_COPY) + size = MAX_COPY; + err = vm_map_enter (device_io_map, &addr, size, 0, TRUE, NULL, + 0, FALSE, VM_PROT_READ|VM_PROT_WRITE, + VM_PROT_READ|VM_PROT_WRITE, VM_INHERIT_NONE); + if (err) + goto out; + + queue_init (&pages); + + while (resid) + { + /* Determine size of I/O this time around. */ + len = round_page (offset + resid) - trunc_page (offset); + if (len > MAX_COPY) + len = MAX_COPY; + + /* Map any pages left from previous operation. */ + o = trunc_page (offset); + queue_iterate (&pages, m, vm_page_t, pageq) + { + pmap_enter (vm_map_pmap (device_io_map), + addr + o - trunc_page (offset), + m->phys_addr, VM_PROT_READ|VM_PROT_WRITE, TRUE); + o += PAGE_SIZE; + } + assert (o == alloc_offset); + + /* Allocate and map pages. */ + while (alloc_offset < trunc_page (offset) + len) + { + while ((m = vm_page_grab ()) == 0) + VM_PAGE_WAIT (0); + assert (! m->active && ! m->inactive); + m->busy = TRUE; + queue_enter (&pages, m, vm_page_t, pageq); + pmap_enter (vm_map_pmap (device_io_map), + addr + alloc_offset - trunc_page (offset), + m->phys_addr, VM_PROT_READ|VM_PROT_WRITE, TRUE); + alloc_offset += PAGE_SIZE; + } + + /* Do the read. */ + amt = len - (offset & PAGE_MASK); + if (amt > resid) + amt = resid; + amt = (*bd->ds->fops->read) (&td.inode, &td.file, + (char *) addr + (offset & PAGE_MASK), amt); + + /* Compute number of pages to insert in object. */ + o = trunc_page (offset); + if (amt > 0) + { + dirty = TRUE; + resid -= amt; + if (resid == 0) + { + /* Zero any unused space. */ + if (offset + amt < o + len) + memset ((void *) (addr + offset - o + amt), + 0, o + len - offset - amt); + offset = o + len; + } + else + offset += amt; + } + else + { + dirty = FALSE; + offset = o + len; + } + + /* Unmap pages and add them to the object. */ + pmap_remove (vm_map_pmap (device_io_map), addr, addr + len); + vm_object_lock (object); + while (o < trunc_page (offset)) + { + m = (vm_page_t) queue_first (&pages); + assert (! queue_end (&pages, (queue_entry_t) m)); + queue_remove (&pages, m, vm_page_t, pageq); + assert (m->busy); + vm_page_lock_queues (); + if (dirty) + { + PAGE_WAKEUP_DONE (m); + m->dirty = TRUE; + vm_page_insert (m, object, o); + } + else + vm_page_free (m); + vm_page_unlock_queues (); + o += PAGE_SIZE; + } + vm_object_unlock (object); + if (amt <= 0) + { + if (amt < 0) + err = linux_to_mach_error (amt); + break; + } + } + + /* Delete kernel buffer. */ + vm_map_remove (device_io_map, addr, addr + size); + + assert (queue_empty (&pages)); + +out: + if (! err) + err = vm_map_copyin_object (object, 0, round_page (count), ©); + if (! err) + { + *data = (io_buf_ptr_t) copy; + *bytes_read = count - resid; + } + else + vm_object_deallocate (object); + if (--bd->iocount == 0 && bd->want) + { + bd->want = 0; + thread_wakeup ((event_t) bd); + } + return err; +} + +static io_return_t +device_get_status (void *d, dev_flavor_t flavor, dev_status_t status, + mach_msg_type_number_t *status_count) +{ + struct block_data *bd = d; + + switch (flavor) + { + case DEV_GET_SIZE: + if (*status_count != DEV_GET_SIZE_COUNT) + return D_INVALID_SIZE; + if (disk_major (MAJOR (bd->dev))) + { + assert (bd->ds->gd); + + if (bd->part >= 0) + { + struct disklabel *lp; + + assert (bd->ds->labels); + lp = bd->ds->labels[MINOR (bd->dev)]; + assert (lp); + (status[DEV_GET_SIZE_DEVICE_SIZE] + = lp->d_partitions[bd->part].p_size << 9); + } + else + (status[DEV_GET_SIZE_DEVICE_SIZE] + = bd->ds->gd->part[MINOR (bd->dev)].nr_sects << 9); + } + else + { + assert (blk_size[MAJOR (bd->dev)]); + (status[DEV_GET_SIZE_DEVICE_SIZE] + = (blk_size[MAJOR (bd->dev)][MINOR (bd->dev)] + << BLOCK_SIZE_BITS)); + } + /* It would be nice to return the block size as reported by + the driver, but a lot of user level code assumes the sector + size to be 512. */ + status[DEV_GET_SIZE_RECORD_SIZE] = 512; + break; + + case V_GETPARMS: + if (*status_count < (sizeof (struct disk_parms) / sizeof (int))) + return D_INVALID_OPERATION; + else + { + struct disk_parms *dp = status; + struct hd_geometry hg; + DECL_DATA; + + INIT_DATA(); + + if ((*bd->ds->fops->ioctl) (&td.inode, &td.file, + HDIO_GETGEO, &hg)) + return D_INVALID_OPERATION; + + dp->dp_type = DPT_WINI; /* XXX: It may be a floppy... */ + dp->dp_heads = hg.heads; + dp->dp_cyls = hg.cylinders; + dp->dp_sectors = hg.sectors; + dp->dp_dosheads = hg.heads; + dp->dp_doscyls = hg.cylinders; + dp->dp_dossectors = hg.sectors; + dp->dp_secsiz = 512; /* XXX */ + dp->dp_ptag = 0; + dp->dp_pflag = 0; + + /* XXX */ + dp->dp_pstartsec = -1; + dp->dp_pnumsec = -1; + + *status_count = sizeof (struct disk_parms) / sizeof (int); + } + + break; + + default: + return D_INVALID_OPERATION; + } + + return D_SUCCESS; +} + +struct device_emulation_ops linux_block_emulation_ops = +{ + NULL, + NULL, + dev_to_port, + device_open, + device_close, + device_write, + NULL, + device_read, + NULL, + NULL, + device_get_status, + NULL, + NULL, + NULL, + NULL, + NULL +}; |