diff options
Diffstat (limited to 'ext2fs')
-rw-r--r-- | ext2fs/ChangeLog | 1145 | ||||
-rw-r--r-- | ext2fs/Makefile | 29 | ||||
-rw-r--r-- | ext2fs/balloc.c | 490 | ||||
-rw-r--r-- | ext2fs/bitmap.c | 106 | ||||
-rw-r--r-- | ext2fs/devio.c | 51 | ||||
-rw-r--r-- | ext2fs/dir.c | 1082 | ||||
-rw-r--r-- | ext2fs/ext2_fs.h | 628 | ||||
-rw-r--r-- | ext2fs/ext2_fs_i.h | 42 | ||||
-rw-r--r-- | ext2fs/ext2fs.c | 216 | ||||
-rw-r--r-- | ext2fs/ext2fs.h | 445 | ||||
-rw-r--r-- | ext2fs/getblk.c | 303 | ||||
-rw-r--r-- | ext2fs/hyper.c | 210 | ||||
-rw-r--r-- | ext2fs/ialloc.c | 405 | ||||
-rw-r--r-- | ext2fs/inode.c | 796 | ||||
-rw-r--r-- | ext2fs/msg.c | 88 | ||||
-rw-r--r-- | ext2fs/pager.c | 1044 | ||||
-rw-r--r-- | ext2fs/pokel.c | 172 | ||||
-rw-r--r-- | ext2fs/sblock.words | 6 | ||||
-rw-r--r-- | ext2fs/storeinfo.c | 131 | ||||
-rw-r--r-- | ext2fs/truncate.c | 365 |
20 files changed, 7754 insertions, 0 deletions
diff --git a/ext2fs/ChangeLog b/ext2fs/ChangeLog new file mode 100644 index 00000000..42ec867e --- /dev/null +++ b/ext2fs/ChangeLog @@ -0,0 +1,1145 @@ +2000-02-05 Roland McGrath <roland@baalperazim.frob.com> + + * ext2_fs.h: Update from Linux 2.3.42 version (ext2_fs_i.h unchanged). + * inode.c (check_high_bits): In accordance with Linux 2.3.42 behavior, + permit 32-bit uids on non-hurd filesystems unless i_dtime is zero + (which indicates an extra old Linux ext2 implementation I guess). + +2000-01-16 Mark Kettenis <kettenis@gnu.org> + + * pager.c (file_pager_write_page): Lock NODE->dn->alloc_lock + before accessing NODE->allocsize. Fixes Debian bug #40302. + +1999-10-13 Roland McGrath <roland@baalperazim.frob.com> + + * ext2fs.c (diskfs_name_max): New variable. + +1999-10-06 Roland McGrath <roland@baalperazim.frob.com> + + * hyper.c (get_hypermetadata): Avoid overflow in calculation of disk + size vs superblock-specified requirement. + Add a warning for wasted disk blocks after last filesystem block. + +1999-10-03 Roland McGrath <roland@baalperazim.frob.com> + + * dir.c (ext2_file_type): #if 0 out this variable. + (diskfs_get_directs): #if 0 out code to interpret file_type field + and set d_type. Instead, always return DT_UNKNOWN for now. + + * dir.c (file_type_ext2): #if 0 out. + (diskfs_direnter_hard): #if 0 out code to set file_type field in + directory entries. Instead, always set it to zero. + +1999-10-03 Roland McGrath <roland@baalperazim.frob.com> + + * bitmap.c (ffz): Subtract one from (1-origin) result of ffs to get + our (0-origin) result. + + * Makefile (LCLHDRS): bitmap.h -> bitmap.c + + * ext2fs.h (test_bit, set_bit, clear_bit): Rewritten to operate on + 32-bit words instead of bytes. + + * truncate.c (diskfs_truncate): Add missing call to + diskfs_end_catch_exception. + + * ext2fs.h (group_desc): Inline function replaced with macro. + (group_desc_image): New variable. + * hyper.c (get_hypermetadata): Initialize it. + + * ext2fs.h (sblock_block): Declare new variable. + (SBLOCK_LBLOCK): Macro removed. + (SBLOCK_OFFS): Define in terms of sblock_block. + * ext2fs.c (options): Add --sblock/-S. + (parse_opt): Parse it to set sblock_block. + * hyper.c (sblock_block): New variable. + (get_hypermetadata): Use sblock_block instead of constant SBLOCK_BLOCK. + + * ext2fs.c (options): List --debug/-D unconditionally, adding to help + text #ifndef EXT2FS_DEBUG + (parse_opt): Always grok -D. #ifndef EXT2FS_DEBUG, reject it with + message saying debugging support not compiled in. + + * hyper.c (get_hypermetadata): Use EXT2_MAX_BLOCK_SIZE instead of + hard-wired 8192. Don't use ffs to compute log2_block_size, and don't + check for the impossible case of non-power-of-two block size (the + block size specification we start with is given as a power of two!). + * ext2fs.h (block_size): Change type to unsigned int. + (BLOCKSIZE_SCALE): Just use SBLOCK->s_log_block_size directly. + * hyper.c (get_hypermetadata): Fix printf formats to silence warning. + * dir.c (dirscanblock): Likewise. + + * dir.c (file_type_ext2): New const variable, map DT_* -> EXT2_FT_*. + (diskfs_direnter_hard): Move initialization of directory entry content + fields out of switch; use memcpy or memmove as appropriate, instead of + bcopy. Set file_type field in new directory entry to appropriate + type for the node, or to zero if the filesystem doesn't have the + EXT2_FEATURE_INCOMPAT_FILETYPE flag set. + +1999-10-02 Roland McGrath <roland@baalperazim.frob.com> + + * ext2fs.h (SBLOCK_LBLOCK): New macro, filesystem block number of sb. + (BLOCKSIZE_SCALE): New macro for converting min-blocks to fs blocks. + (group_desc): Fix calculation of offset from superblock, so it works + properly with block_size != EXT2_MIN_BLOCK_SIZE. + + * ext2fs.h (EXT2FS_EI): New macro, use it for all extern inlines. + + * bitmap.c (count_free, find_next_zero_bit, find_first_zero_bit): + Make these all static inline. + (ffz, ffz_nibble_map): Function and variable removed. + (ffz): Replace decl with macro defined in terms of ffs. + * ext2fs.h (count_free, find_next_zero_bit, find_first_zero_bit, ffz): + Remove these declarations. + * Makefile (SRCS): Remove bitmap.c. + (LCLHDRS): Add bitmap.c here instead. + * balloc.c, ialloc.c: #include "bitmap.c" here. + +1999-10-01 Roland McGrath <roland@baalperazim.frob.com> + + * bitmap.c (memscan): Function removed. + * ext2fs.h: Removed its decl. + * balloc.c (memscan): New static function, defined using memchr. + +1999-09-13 Roland McGrath <roland@baalperazim.frob.com> + + * dir.c, truncate.c, pager.c: Reverted changes related to + io_map_segment. + +1999-09-09 Roland McGrath <roland@baalperazim.frob.com> + + * Makefile (makemode): server -> servers. + (targets): Replaced with target; remove ext2fs.static. + (ext2fs.static-LDFLAGS): Variable removed. + (ext2fs, ext2fs.static): Remove deps. + +1999-09-07 Thomas Bushnell, BSG <tb@mit.edu> + + * dir.c (diskfs_lookup_hard): Pass additional parameter to + diskfs_get_filemap. + (diskfs_dirempty): Likewise. + * truncate.c (force_delayed_copies): Likewise. + * pager.c (diskfs_get_filemap): Accept additional parameter. + +1999-09-04 Thomas Bushnell, BSG <tb@mit.edu> + + * pager.c (file_pager_write_page): Don't report errors on writes + that extend past NODE->allocsize. This avoids a race between sync + and truncate. Reported by Mark Kettenis <kettenis@wins.uva.nl>. + +1999-08-23 Roland McGrath <roland@baalperazim.frob.com> + + * ext2_fs.h, ext2_fs_i.h: Replaced with Linux 2.3.14 versions. + * ext2fs.h (i_mode_high): New macro, missing from ext2_fs.h. + + * inode.c (read_node): Don't set INFO->i_version. + Extract INFO->i_dir_acl value only for a directory; + otherwise use zero and instead extract INFO->i_high_size. + Fail with EFBIG if INFO->i_high_size is nonzero. + + * ialloc.c (diskfs_alloc_node): Propagate initial value of i_flags + from directory, as Linux 2.3.14 does. + + * inode.c (write_node): Get i_flags from NP->dn->info instead of from + the disk inode, so we can have modified the in-core version. + * dir.c (diskfs_direnter_hard, diskfs_dirremove_hard, + diskfs_dirrewrite_hard): Clear EXT2_BTREE_FL flag bit from + DP->dn->info.i_flags after modifying the directory (this is what + Linux 2.3.14 does). + + * inode.c (read_node): i_version -> i_generation + * inode.c (write_node): Likewise. + + * balloc.c (ext2_free_blocks): Handle freeing across group boundary, + as Linux 2.3.14 does. + + * balloc.c (ext2_check_blocks_bitmap): If RO_COMPAT_SPARSE_SUPER + feature flag is set, or if group number is not a power of 3, 5, or 7 + (I don't know why; this is what Linux 2.3.14 does), skip tests for + superblocks and descriptor blocks being free in bitmap. + + * dir.c: Replace `struct ext2_dir_entry' with `struct ext2_dir_entry_2' + in all uses. + (ext2_file_type): New static const variable. + (diskfs_get_directs): Set d_type member based on file_type field in + directory entry. + + * balloc.c (ext2_new_block): Take new arg PREALLOC_GOAL. + Use that instead of hard-coded 8 as maximum of blocks to preallocate. + Also test that instead of PREALLOC_COUNT to decide whether to + try any preallocation at all. + * getblk.c (ext2_alloc_block): Pass new arg to ext2_new_block. + Use EXT2_DEFAULT_PREALLOC_BLOCKS as default (replaces hard-coded 8); + For a regular file, use SBLOCK->s_prealloc_blocks before default. + For a directory, use SBLOCK->s_dir_prealloc_blocks if the + EXT2_FEATURE_COMPAT_DIR_PREALLOC flag is set, otherwise zero. + * inode.c (diskfs_set_translator): Pass new arg (zero). + +1999-07-11 Roland McGrath <roland@baalperazim.frob.com> + + * dir.c (diskfs_get_directs): Fix sloppy bugs in last change. + * hyper.c (allocate_mod_map): Likewise. + * pager.c (get_page_buf): Likewise. + +1999-07-10 Roland McGrath <roland@baalperazim.frob.com> + + * ext2fs.h: Add #include <sys/mman.h> for munmap decl. + +1999-07-06 Thomas Bushnell, BSG <tb@mit.edu> + + * dir.c (diskfs_get_directs): Use mmap instead of vm_allocate. + * hyper.c (allocate_mod_map): Likewise. + (get_hypermetadata): Likewise. + * pager.c (get_page_buf): Likewise. + + * hyper.c (diskfs_readonly_changed): Use mprotect instead of + vm_protect. + +1999-07-03 Thomas Bushnell, BSG <tb@mit.edu> + + * dir.c (diskfs_lookup_hard): Use munmap instead of vm_deallocate. + (diskfs_direnter_hard): Likewise. + (diskfs_dirremove_hard): Likewise. + (diskfs_dirrewrite_hard): Likewise. + (diskfs_dirempty): Likewise. + (diskfs_drop_dirstat): Likewise. + (diskfs_get_directs): Likewise. + * hyper.c (allocate_mod_map): Likewise. + (get_hypermetadata): Likewise. + * pager.c (free_page_buf): Likewise. + * truncate.c (poke_pages): Likewise. + +1999-06-29 Thomas Bushnell, BSG <tb@mit.edu> + + * hyper.c (diskfs_readonly_changed): Adjust whether the store + should permit writes too. + +1999-06-19 Roland McGrath <roland@baalperazim.frob.com> + + * pager.c (free_page_buf): Fix type cast. + +Tue Jun 15 21:51:58 1999 Thomas Bushnell, BSG <tb@mit.edu> + + * pager.c: Clamp the number of free pages we keep around to some + reasonably small value. Patch from Mark Kettenis + <kettenis@wins.uva.nl>. + +1999-06-15 Thomas Bushnell, BSG <tb@mit.edu> + + * inode.c (diskfs_validate_flags_change): Invert sense of test wrt + bits that haven't yet been defined. Reported by Kalle Olavi + Niemitalo <tosi@ees2.oulu.fi>. + +1999-05-23 Roland McGrath <roland@baalperazim.frob.com> + + * ialloc.c (diskfs_alloc_node): Frob printf format to suppress warning. + * getblk.c (ext2_getblk): Likewise. + * balloc.c (ext2_free_blocks): Likewise. + * hyper.c (get_hypermetadata): Likewise. + + * ialloc.c (diskfs_alloc_node): If creator_os is not EXT2_OS_HURD, + suppress warnings about stale nonzero st_blocks or st_size. + +1999-05-19 Roland McGrath <roland@baalperazim.frob.com> + + * ext2fs.c (main): Include store size in panic msg when it's too small. + +1999-01-23 Roland McGrath <roland@baalperazim.frob.com> + + * ext2fs.c (main): Use diskfs_init_main. + +1998-12-27 Roland McGrath <roland@baalperazim.frob.com> + + * inode.c (diskfs_set_statfs): Remove __ from struct members. + +1998-12-21 Mark Kettenis <kettenis@phys.uva.nl> + + * inode.c (diskfs_set_statfs): Fill in statfs members that are + used to implement statvfs. + +1998-12-27 Roland McGrath <roland@baalperazim.frob.com> + + * ext2fs.c (main): Pass ARGP_IN_ORDER flag to argp_parse because + diskfs options need it. + +1998-12-20 Roland McGrath <roland@baalperazim.frob.com> + + * inode.c (diskfs_write_disknode): Add braces to silence warning. + * pager.c (file_pager_read_page): Likewise. + + * storeinfo.c: Don't include <netinet/in.h>. + +1998-09-04 Roland McGrath <roland@baalperazim.frob.com> + + * dir.c (dirscanblock): Fix defn with `const'. + (diskfs_direnter_hard): Likewise. + * inode.c (diskfs_create_symlink_hook): Likewise. + (write_symlink): Likewise. + + * pager.c: strings.h -> string.h; gets strerror decl. + + * ext2fs.c (main): Fix return type to int. + + * dir.c (diskfs_lookup_hard): Fix defn with `const'. + * inode.c (diskfs_set_translator): Likewise. + +Wed Aug 20 14:28:00 1997 Thomas Bushnell, n/BSG <thomas@gnu.ai.mit.edu> + + * dir.c (diskfs_lookup_hard): Cope with error return from + diskfs_get_filemap. + (diskfs_dirempty): Cope (poorly) with error return from + diskfs_get_filemap. + * truncate.c (force_delayed_copies): Likewise. + + * pager.c (diskfs_get_filemap): If pager_create fails, return + error to caller. + +Mon Jun 30 17:34:27 1997 Thomas Bushnell, n/BSG <thomas@gnu.ai.mit.edu> + + * ext2fs.c (diskfs_readonly): Delete variable definition. + (main): If the store cannot be made writable, then set + diskfs_hard_readonly and diskfs_readonly. + +1997-06-20 Miles Bader <miles@gnu.ai.mit.edu> + + * pager.c (disk_pager_read_page): Initialize READ to 0 to force + store_read to allocate a buffer. + +1997-06-18 Miles Bader <miles@gnu.ai.mit.edu> + + * ext2fs.c (main): Get rid of device-block-size-is-power-of-2 check. + +Tue Jun 3 17:18:35 1997 Miles Bader <miles@gnu.ai.mit.edu> + + * inode.c (read_node): Don't assert that st_blocks is zero for + non-dir/file/long-symlink inodes. + +Tue Nov 19 18:30:37 1996 Miles Bader <miles@gnu.ai.mit.edu> + + * inode.c (read_node): If SBLOCK->s_creator_os != EXT2_OS_HURD, + set NP->author_tracks_uid to true. + +Mon Nov 18 17:14:31 1996 Miles Bader <miles@gnu.ai.mit.edu> + + * ialloc.c (diskfs_alloc_node): Clear S_IPTRANS bit in ST->st_mode + instead of NP->istranslated. + * inode.c (read_node): Set S_IPTRANS bit in ST->st_mode (and clear + S_ITRANS) rather than NP->istranslated. + (write_node): Don't write S_IPTRANS to disk. + (diskfs_set_translator): Frob S_IPTRANS bit rather than + istranslated field. + +Sat Nov 16 17:26:20 1996 Thomas Bushnell, n/BSG <thomas@gnu.ai.mit.edu> + + * storeinfo.c (diskfs_S_file_get_storage_info): diskfs_isuid -> + idvec_contains. + +Tue Nov 12 17:53:08 1996 Miles Bader <miles@gnu.ai.mit.edu> + + * inode.c (diskfs_validate_flags_change): New function. + (write_node): Convert generic flags in ST->st_flags to ext2- + specific flags in DI->i_flags. + (read_node): Renamed from read_disknode; all callers changed. + Convert ext2-specific flags on disk to generic flags in ST. + [!UF_APPEND] (UF_APPEND): New macro (temporary). + [!UF_NODUMP] (UF_NODUMP): New macro (temporary). + [!UF_IMMUTABLE] (UF_IMMUTABLE): New macro (temporary). + <sys/stat.h>: New include. + +Thu Oct 24 16:24:05 1996 Miles Bader <miles@gnu.ai.mit.edu> + + * ext2fs.c (startup_children, runtime_children): New variables. + (startup_parents, runtime_parents): Variables removed. + (startup_argp, runtime_argp): Use new *_CHILDREN variables instead of + corresponding *_PARENT ones. + +Wed Oct 9 13:30:15 1996 Miles Bader <miles@gnu.ai.mit.edu> + + * storeinfo.c (diskfs_S_file_get_storage_info): Return ports with + MACH_MSG_TYPE_COPY_SEND, not MACH_MSG_TYPE_MAKE_SEND. + +Tue Oct 8 23:25:53 1996 Miles Bader <miles@gnu.ai.mit.edu> + + * storeinfo.c (diskfs_S_file_get_storage_info): If st_size + indicates a lower value for NUM_FS_BLOCK, use it instead of the + value derived from st_blocks. + +Mon Oct 7 15:48:49 1996 Miles Bader <miles@gnu.ai.mit.edu> + + * storeinfo.c (diskfs_S_file_get_storage_info): Typo. + +Fri Oct 4 23:51:05 1996 Miles Bader <miles@gnu.ai.mit.edu> + + * storeinfo.c (diskfs_S_file_get_storage_info): Add security + check, and deactivate FILE_STORE if necessary. + +Mon Sep 30 15:40:53 1996 Miles Bader <miles@gnu.ai.mit.edu> + + * storeinfo.c (diskfs_S_file_get_storage_info): Set *PORTS_TYPE. + +Thu Sep 19 17:57:07 1996 Miles Bader <miles@gnu.ai.mit.edu> + + * Makefile (HURDLIBS): Add store. + +Wed Sep 18 15:28:32 1996 Miles Bader <miles@gnu.ai.mit.edu> + + * ext2fs.c (main): Remove CLASSES argument to store_parsed_open. + Use STORE_PARAMS variable to get result from parsing STORE_ARGP. + (diskfs_extra_version): Put `GNU Hurd' in here. + +Fri Sep 13 00:15:56 1996 Miles Bader <miles@gnu.ai.mit.edu> + + * ext2fs.c (diskfs_append_args): Renamed from diskfs_get_options. + Don't initialize *ARGZ & *ARGZ_LEN anymore, or deallocate on errors. + Append store args too. + + * pager.c (file_pager_read_page:do_pending_reads, pager_unlock_page, + pending_blocks_write, diskfs_grow, find_block): Use filesystem blocks, + not device blocks, in block<->offset/size conversions. + + * Makefile (ext2fs ext2fs.static): Add ../libstore/libstore.a. + +1996-09-12 Miles Bader <miles@gnu.ai.mit.edu> + + * ext2fs.c (diskfs_disk_name): Renamed from STORE_NAME. + +Wed Sep 11 12:59:28 1996 Miles Bader <miles@gnu.ai.mit.edu> + + * pager.c (create_disk_pager): Create PAGER_BUCKET. Pass in new + args to diskfs_pager_setup (renamed from disk_pager_setup). + (disk_image): New variable. + (service_paging_request): Function removed. + (diskfs_shutdown_pager): Use DISKFS_DISK_PAGER instead of DISK_PAGER. + * ext2fs.h (disk_image): New declaration. + (sync_global_ptr): Use DISKFS_DISK_PAGER instead of DISK_PAGER. + * ext2fs.c (main, diskfs_reload_global_state): Use + DISKFS_DISK_PAGER instead of DISK_PAGER. + + * ext2fs.c (main): Change store_parsed_get_name to store_parsed_name. + + * storeinfo.c (diskfs_S_file_get_storage_info): Clone STORE before + remapping it. + +Tue Sep 10 17:12:16 1996 Miles Bader <miles@gnu.ai.mit.edu> + + * storeinfo.c (diskfs_S_file_get_storage_info): Rewritten to use + store functions. + +Mon Sep 9 11:10:11 1996 Miles Bader <miles@gnu.ai.mit.edu> + + * ext2fs.c (main): Use store fields instead of diskfs variables. + (store, store_parsed, store_name): New variables. + (free_page_bufs, free_page_bufs_lock): Variables removed. + (get_page_buf, free_page_buf): Functions removed. + <argp.h>, <hurd/store.h>: New includes. + * ext2fs.h (store_parsed, store_name): New declarations. + (get_page_buf, free_page_buf): Declarations removed. + * hyper.c <hurd/store.h>: New include. + * pager.c <hurd/store.h>: New include. + (file_pager_write_pager): Make BUF void *. + (file_pager_read_page): Make BUF void **, NEW_BUF void *, and + LENGTH size_t. Pass &LENGTH to store_read, not LENGTH. + (pending_blocks_write): Make PAGE_BUF void *, and LENGTH size_t. + Check amount written, and return EIO if it's wrong. + (disk_pager_read_page): Make BUF void **, and LENGTH size_t. + Check amount read, and return EIO if it's wrong. + (disk_pager_write_page): Make BUF void *, and LENGTH size_t. + Check amount written, and return EIO if it's wrong. + (pager_read_page, pager_read_page): Convert BUF to a pointer when + calling work functions. + (struct pending_blocks): Make BUF void *. + (free_page_bufs, free_page_bufs_lock): New variables. + (get_page_buf, free_page_buf): New functions. + +Sun Sep 8 18:47:10 1996 Miles Bader <miles@gnu.ai.mit.edu> + + * hyper.c (diskfs_readonly_changed): Use STORE->size. + (get_hypermetadata): Use STORE->size & STORE->block_size. + + * pager.c (file_pager_read_page, pending_blocks_write, + disk_pager_read_page, disk_pager_read_page, disk_pager_write_page, + pager_report_extent, find_block, pager_unlock_page, diskfs_grow): + Use store_ operations instead of the old device ones, and some + store fields instead of globals. + * inode.c (read_disknode): Use STORE->log2_block_size instead of + LOG2_BLOCK_SIZE. + * ext2fs.h (store): New declaration. + * ext2fs.c (startup_parents): Use diskfs_store_startup_argp + instead of diskfs_std_device_startup_argp. + (startup_parents, startup_argp, runtime_parents, runtime_argp, + options, parse_opt, diskfs_get_options): + Define always, not just when EXT2FS_DEBUG is defined. + (parse_opt): Propagate our input to the first child argp. + (parse_opt, diskfs_get_options): Guard debug-specific bits with + #ifdef EXT2FS_DEBUG. + +Thu Sep 12 16:41:20 1996 Thomas Bushnell, n/BSG <thomas@gnu.ai.mit.edu> + + * Makefile (HURDLIBS): New variable. + (ext2fs ext2fs.static): Depend on $(library_deps) instead of + explicit list. + +Fri Sep 6 16:03:11 1996 Thomas Bushnell, n/BSG <thomas@gnu.ai.mit.edu> + + * ext2fs.c: Include <version.h>. + (diskfs_major_version, diskfs_minor_version, diskfs_edit_version): + Deleted variables. + (diskfs_server_version): New variable. + +Thu Aug 29 16:59:51 1996 Miles Bader <miles@gnu.ai.mit.edu> + + Changes from ufs/dir.c: + * dir.c (diskfs_lookup_hard): When setting ds->stat to EXTEND, set + ds->idx by looking at the size of the file. After successful + dirscanblock, record index where we finished in DP->dn->dir_idx. + Start searches at that index. + (dirscanblock): Size dirents correctly when mallocing it. + (diskfs_direnter_hard): Be more careful when sizing or resizing + dirents. Correctly set to -1 all the new entries we create after + realloc call. + * ext2fs.h (struct disknode): New member `dir_idx'. + * inode.c (diskfs_cached_lookup): Initialize DN->dir_idx. + + * dir.c (diskfs_direnter_hard): Initialize OLDSIZE to quiet gcc. + +Sun Aug 18 01:45:42 1996 Miles Bader <miles@gnu.ai.mit.edu> + + * pager.c (file_pager_read_page): Use get_page_buf to get a free page. + (file_pager_read_page): Use free_page_buf to deallocate pages. + * ext2fs.c (get_page_buf): Return 0 if we can't allocate. + +Thu Aug 15 14:55:01 1996 Miles Bader <miles@gnu.ai.mit.edu> + + * ext2fs.c (diskfs_edit_version): Change to `1'. + (diskfs_version_extra): New variable. + * inode.c (read_disknode): Change assertion to allow non-zero + st_size for anything, but assert that st_blocks == 0 for any case + where we set allocsize to 0. + * truncate.c (diskfs_truncate): Allow any sort of node to have a + size without any blocks (as linux apparently does this sometimes + with devices). + +Wed Aug 14 14:03:33 1996 Miles Bader <miles@gnu.ai.mit.edu> + + * ext2fs.c (diskfs_minor_version): Change to `2'. + +Tue Aug 13 15:11:42 1996 Miles Bader <miles@gnu.ai.mit.edu> + + * ext2_fs.h: Update to version from linux-2.0.12. + * ialloc.c (ext2_alloc_inode, diskfs_free_node): Pass SBLOCK as a + parameter to EXT2_FIRST_INO (v2.x change). + * hyper.c (get_hypermetadata): Use EXT2_INODE_SIZE instead of + sizeof (struct ext2_inode). + Deal with various version 2.x features. + * ext2fs.h (dino): Dont recalculate INODES_PER_BLOCK here. + (ext2_debug): redefine macro after including ext2_fs.h. + +Mon Aug 12 13:48:17 1996 Miles Bader <miles@gnu.ai.mit.edu> + + * ext2fs.c (diskfs_minor_version): Changed to `1'. + + * inode.c (read_disknode): Don't set allocsize for in-inode symlinks. + (MAX_INODE_SYMLINK): New macro. + (write_symlink, read_symlink): New functions. + (diskfs_create_symlink_hook, diskfs_read_symlink_hook): New variables. + * truncate.c (diskfs_truncate): For in-inode symlinks, just frob + the size. + + * ext2fs.h (__u32, __u16, __u8, __s32, __s16, __s8): New types. + * ext2_fs.h (u32, u16, u8, s32, s16, s8): All uses of these types + changed to have a leading `__', and the definitions removed. + * ext2_fs_i.h (u32, u16, u8, s32, s16, s8): All uses of these types + changed to have a leading `__'. + +Mon Aug 12 11:18:37 1996 Thomas Bushnell, n/BSG <thomas@gnu.ai.mit.edu> + + * hyper.c (diskfs_set_hypermetadata): Return an error code now. + +Fri Aug 2 12:10:40 1996 Miles Bader <miles@gnu.ai.mit.edu> + + * inode.c (diskfs_write_disknode): If WAIT is false, still record + the write for later, using record_global_poke. + +Thu Aug 1 16:18:59 1996 Miles Bader <miles@gnu.ai.mit.edu> + + * ext2_fs.h (ext2_debug_flag): New decl. + (ext2_debug): Pay attention to EXT2_DEBUG_FLAG. + * ext2fs.c [EXT2FS_DEBUG] (options, ext2_debug_flag): New variables. + [EXT2FS_DEBUG] (parse_opt, diskfs_get_options): New functions. + [EXT2FS_DEBUG] (startup_parents, startup_argp, runtime_parents, + diskfs_runtime_argp): New variables. + [!EXT2FS_DEBUG] (startup_argp): New macro. + (main): Use STARTUP_ARGP instead of DISKFS_STD_DEVICE_STARTUP_ARGP. + * pager.c (diskfs_grow): Fix ext2_debug format strings. + * truncate.c (trunc_direct): Fix ext2_debug call. + +Sat Jul 20 00:58:44 1996 Miles Bader <miles@gnu.ai.mit.edu> + + * ext2fs.h (struct disknode): Remove debugging info. + (RECORD_LAST): Function removed. + (LAST_BUFSZ): Macro removed. + (enum last_act): Type removed. + * inode.c (diskfs_cached_lookup): Don't initialize debugging info. + * pager.c (file_pager_read_page, file_pager_write_page, + pager_unlock_page, diskfs_grow): Don't record debugging info. + * truncate.c (diskfs_truncate): Likewise. + + * pager.c (file_pager_read_page): Set + NODE->dn->last_page_partially_writable if we return such a page. + +Fri Jul 19 15:02:24 1996 Miles Bader <miles@gnu.ai.mit.edu> + + * pager.c (diskfs_grow): Rename OLD_END_BLOCK to END_BLOCK. + Correctly determine whether to set DN->last_page_partially_writable + after allocating new blocks. + + * pager.c (file_pager_read_page, file_pager_write_page): Pass + NODE->dn, not &NODE->dn to RECORD_LAST. + +Mon Jul 15 18:00:26 1996 Miles Bader <miles@gnu.ai.mit.edu> + + * ext2fs.h (struct disknode): Add debugging info. + (RECORD_LAST): New function. + (LAST_BUFSZ): New macro. + (enum last_act): New type. + * pager.c (pager_unlock_page, diskfs_grow, file_pager_read_page, + file_pager_write_page): Record debugging info. + * truncate.c (diskfs_truncate): Likewise. + * inode.c (diskfs_cached_lookup): Initialize debugging info. + + * pager.c (file_pager_read_page): Initialize *WRITELOCK to 0. + +Tue Jun 25 12:22:21 1996 Miles Bader <miles@gnu.ai.mit.edu> + + * ext2fs.h (sync_global): Renamed from sync_global_data. Add WAIT + flag. Don't call diskfs_set_hypermetadata. + (alloc_sync): Call diskfs_set_hypermetadata instead of sync_global_data. + (sync_super_block): Function removed. + * hyper.c (diskfs_readonly_changed): No longer clear the clean bit. + (diskfs_set_hypermetadata): Work correctly. + * truncate.c (diskfs_truncate): Add call diskfs_check_readonly to + clear clean bit. + * inode.c (diskfs_cached_lookup): Use diskfs_check_readonly + instead of diskfs_readonly. + * dir.c (diskfs_lookup_hard, diskfs_dirempty): Likewise. + * pager.c (diskfs_shutdown_pager): Don't shutdown the disk pager, + just sync it. + (diskfs_sync_everything): Call sync_global instead of pokel_sync. + (final_sblock): Variable removed. + (diskfs_grow): Add call diskfs_check_readonly to clear clean bit. + +Mon Jun 24 17:14:25 1996 Miles Bader <miles@gnu.ai.mit.edu> + + * inode.c (check_high_bits, diskfs_validate_owner_change, + diskfs_validate_group_change, diskfs_validate_mode_change, + diskfs_validate_author_change): New functions. + (write_node): For non-hurd filesystems, assert that no hurd + extensions should be used. + +Thu Jun 20 22:36:23 1996 Miles Bader <miles@gnu.ai.mit.edu> + + * ext2fs.c (main): Rename diskfs_device_startup_argp to + diskfs_std_device_startup_argp. + +Sat Jun 15 15:56:01 1996 Miles Bader <miles@gnu.ai.mit.edu> + + * inode.c (read_disknode, write_inode): Use hurd-specific fields + only on a hurd filesystem. + +Fri May 10 09:32:43 1996 Michael I. Bushnell, p/BSG <mib@gnu.ai.mit.edu> + + * inode.c (diskfs_set_statfs): Fix one reference to old name of ST + member. + +Thu May 9 11:52:20 1996 Michael I. Bushnell, p/BSG <mib@gnu.ai.mit.edu> + + * inode.c (diskfs_set_statfs): Expect and fill in new format + statfs buffer. + + * Makefile (ext2fs ext2fs.static): s/ioserver/iohelp/g. + * ext2fs.h: ioserver.h -> iohelp.h. + +Tue May 7 16:22:56 1996 Miles Bader <miles@gnu.ai.mit.edu> + + * storeinfo.c (diskfs_S_file_get_storage_info): Rewrite for new + interface. + +Tue Apr 30 12:51:09 1996 Michael I. Bushnell, p/BSG <mib@gnu.ai.mit.edu> + + * Makefile (targets): Renamed from `target'. + +Fri Apr 26 16:10:19 1996 Michael I. Bushnell, p/BSG <mib@gnu.ai.mit.edu> + + * Makefile (makemode): Now `servers'. + (targets): Renamed from `target'; now include ext2fs.static. + (ext2fs.static-LDFLAGS): Renamed from `LDFLAGS'. + (ext2fs.static): Depend on same things as `ext2fs'. + (include ../Makeconf): Must come before dependency information. + +Wed Apr 17 13:30:49 1996 Miles Bader <miles@gnu.ai.mit.edu> + + * inode.c (diskfs_write_disknode): Only sync DI if WAIT. + + * dir.c (diskfs_lookup_hard): Set atime appropriately, and sync + the new atime if we are running synchronously (!). + (diskfs_dirempty): Likewise. + (diskfs_direnter_hard): Set mtime appropriately. + (diskfs_dirremove_hard): Likewise. + (diskfs_dirrewrite_hard): Likewise. + +Thu Apr 4 18:51:19 1996 Miles Bader <miles@gnu.ai.mit.edu> + + * inode.c (diskfs_cached_lookup): Renamed from iget; all uses updated. + Initialize the CACHE_ID field in the new node. + * ext2fs.h (struct disknode): Get rid of NUMBER field; all references + replaced by references to the CACHE_ID field in the corresponding node. + +Fri Mar 29 11:03:58 1996 Miles Bader <miles@gnu.ai.mit.edu> + + * dir.c (diskfs_null_dirstat): New function. + (diskfs_lookup_hard, diskfs_direnter, diskfs_dirremove_hard, + diskfs_dirrewrite_hard): Renamed from versions without `_hard' suffix. + Get rid of stuff now done by diskfs. + + * ext2fs.c (main): Pass new argument to argp_parse. + +Tue Mar 19 17:52:04 1996 Miles Bader <miles@gnu.ai.mit.edu> + + * pager.c (pager_unlock_page, diskfs_grow): Try to make the logic + a bit simpler and more robust. + +Fri Feb 16 17:05:01 1996 Miles Bader <miles@gnu.ai.mit.edu> + + * ext2fs.c (main): Check error return from diskfs_init_diskfs. + +Tue Feb 6 14:49:40 1996 Miles Bader <miles@gnu.ai.mit.edu> + + * ext2fs.h (ext2_warning): Make a declaration, not a macro. + * msg.c (ext2_warning): Rename from _ext2_warning; don't take (or + print) a function argument any more. + + * dir.c (diskfs_get_directs): When BUFSIZ is 0, allocate enough + extra space over the directory size to account for the worst case + difference between the ext2 and canonical formats. + +Sat Feb 3 11:27:07 1996 Miles Bader <miles@gnu.ai.mit.edu> + + * hyper.c (get_hypermetadata, diskfs_readonly_changed): Use + ext2_warning to print warnings instead of error(). + * msg.c (_ext2_warning): Include `warning:' in message. + +Sat Feb 3 06:10:43 1996 Roland McGrath <roland@churchy.gnu.ai.mit.edu> + + * hyper.c: Fixed handling of the filesystem `clean bit'. + (ext2fs_clean): New boolean variable. + (get_hypermetadata): Set it iff the clean bit is set on entry. + If not clean, complain and force read-only. + (diskfs_set_hypermetadata): Set clean bit only if ext2fs_clean is set. + (diskfs_readonly_changed): Complain if going writable and clean + bit clear. + + * ext2fs.c: Include string.h for strerror decl. + +Tue Jan 30 22:25:19 1996 Miles Bader <miles@gnu.ai.mit.edu> + + * hyper.c (get_hypermetadata): Don't return any error value, just + panic if we can't read the superblock. + * ext2fs.c (main): Move warp_inode() inline. Make sure root inode + is really there. Don't check return value from get_hypermetadata. + (warp_inode): Function removed. + * ext2fs.h (get_hypermetadata): Returns void now. + +Tue Jan 30 17:04:41 1996 Roland McGrath <roland@churchy.gnu.ai.mit.edu> + + * pager.c (file_pager_read_page, file_pager_write_page): Check for + a page offset beyond the allocsize and return EIO. + +Wed Jan 17 15:11:55 1996 Miles Bader <miles@gnu.ai.mit.edu> + + * storeinfo.c (diskfs_S_file_get_storage_info): Calculate the + right value for *RUNS_LEN. + +Tue Jan 16 17:37:00 1996 Miles Bader <miles@gnu.ai.mit.edu> + + * pager.c (diskfs_file_update): Ext2fs doesn't require that the + last block in the file always be allocated, so don't. + (diskfs_grow, pager_unlock_page): Don't set last_block_allocated. + * ext2fs.h (struct disknode): Get rid of last_block_allocated field. + * inode.c (read_disknode): Don't set last_block_allocated. + * truncate.c (diskfs_truncate): Likewise. + + * Makefile (LDFLAGS): Add -static. + +Mon Jan 15 10:25:19 1996 Miles Bader <miles@gnu.ai.mit.edu> + + * pager.c (pager_unlock_page, diskfs_grow): Leave things in a + slightly more consistent state when block allocation errors happen. + + * dir.c (diskfs_direnter): Don't include the terminating '\0' in + on-disk directory entry names. + + * inode.c (diskfs_node_norefs): When losing our in-core copy of an + inode, remember which indirect blocks still have to be written. + * pokel.c (pokel_inherit, pokel_finalize): New functions. + * ext2fs.h (pokel_inherit, pokel_finalize): New declarations. + + * dir.c (diskfs_lookup): Patch from ufs/dir.c: If we are returning + an error, then set the dirstat to be ignored by drop_dirstat. + +Sun Jan 14 13:17:33 1996 Miles Bader <miles@gnu.ai.mit.edu> + + * ialloc.c (diskfs_alloc_node): Set NP->istranslated to 0. + * inode.c (write_node): If NP isn't translated, force + DI->i_translator to 0. + + * getblk.c (inode_getblk, block_getblk): Set dn_set_mtime too. + +Sat Jan 6 11:57:26 1996 Roland McGrath <roland@churchy.gnu.ai.mit.edu> + + * pager.c (pager_bucket): Made global. + (create_disk_pager): Pass MAY_CACHE to disk_pager_setup. + * ext2fs.c (main): Don't map in disk image here; create_disk_pager + now does it. + +Fri Jan 5 16:57:54 1996 Roland McGrath <roland@churchy.gnu.ai.mit.edu> + + * ext2fs.h: Declare create_disk_pager. + +Thu Jan 4 18:46:40 1996 Roland McGrath <roland@churchy.gnu.ai.mit.edu> + + * ext2fs.h (disk_pager, disk_pager_port, disk_image, + create_disk_pager): Decls removed. + Include hurd/diskfs-pager.h instead. + + * pager.c (create_disk_pager): Use disk_pager_setup. + +Tue Nov 14 14:59:32 1995 Miles Bader <miles@gnu.ai.mit.edu> + + * dir.c (dirscanblock): Apply mib's changes to ufs/dir.c. + +Sat Nov 4 20:01:01 1995 Miles Bader <miles@gnu.ai.mit.edu> + + * storeinfo.c (diskfs_S_file_get_storage_info): Add FLAGS argument. + +Wed Nov 1 20:09:59 1995 Miles Bader <miles@gnu.ai.mit.edu> + + * ext2fs.c (main): Add FLAGS arg to diskfs_startup_diskfs call. + +Mon Oct 23 17:49:16 1995 Miles Bader <miles@gnu.ai.mit.edu> + + * inode.c (diskfs_get_translator, diskfs_set_translator): Only + support these if the filesystem's creator-os is `hurd'. + (read_disknode): Only check the i_translator field if the + filesystem's creator-os is `hurd'. + +Fri Oct 20 19:18:16 1995 Miles Bader <miles@gnu.ai.mit.edu> + + * ext2fs.h (MS_RDONLY, MS_NOSUID, MS_NODEV, MS_NOEXEC, + MS_SYNCHRONOUS, MS_REMOUNT, S_APPEND, S_IMMUTABLE, IS_APPEND, + IS_IMMUTABLE): Macros deleted. + +Thu Oct 19 19:15:15 1995 Miles Bader <miles@gnu.ai.mit.edu> + + * pager.c (create_disk_pager, diskfs_get_filemap, + pager_clear_user_data): Don't use the p field in a upi. + (diskfs_get_filemap): Update/initialize the max_prot field. + Add the prot arg. + + * ext2fs.h (struct user_pager_info): Add max_prot field, remove p. + * pager.c (drop_pager_softrefs): Declare PAGER, not UPI. + (enable_caching): The disk node is upi->node, not upi->np. + (diskfs_enable_pagers): Function removed. + * inode.c (read_disknode): Add DN and OFFSET variables. Use + log2_block_size to mask instead of doing a modulo with block_size. + * hyper.c (diskfs_readonly_changed): Typo. + (allocate_mod_map): Declare ERR; OLD_MOD_MAP_SIZE --> MOD_MAP_SIZE. + * dir.c (diskfs_lookup, diskfs_dirempty): Give diskfs_get_filemap + a protection arg. + * truncate.c (force_delayed_copies): Ditto. + +Wed Oct 18 21:00:28 1995 Miles Bader <miles@gnu.ai.mit.edu> + + * ext2fs.h (struct disknode): Replace fileinfo field with pager. + * inode.c (diskfs_node_norefs, iget): Use pager field, not fileinfo. + * pager.c (diskfs_get_filemap_pager_struct, flush_node_pager, + diskfs_file_update, pager_clear_user_data, drop_pager_softrefs): Ditto. + * truncate.c (enable_delayed_copies, force_delayed_copies): Ditto. + + * ext2fs.c (main): Always include VM_PROT_WRITE in max prot. + * hyper.c (diskfs_readonly_changed): Change the protection of + DISK_IMAGE to reflect the new state. + * pager.c (diskfs_enable_pagers): New function. + +Tue Oct 17 21:16:04 1995 Miles Bader <miles@gnu.ai.mit.edu> + + * pokel.c (_pokel_exec, pokel_flush): New functions. + (pokel_sync): Use _pokel_exec to do the work. + * pager.c (flush_node_pager): New function. + * ext2fs.h (pokel_flush, flush_node_pager): New declarations. + + * hyper.c (allocate_mod_map): New function (from get_hypermetadata). + (zeroblock, modified_global_blocks): Define (were common). + (get_hypermetadata): Deallocate ZEROBLOCK if necessary. Use + allocate_mod_map to allocate MODIFIED_GLOBAL_BLOCKS. + (diskfs_readonly_changed): New function. + * main.c (main): Move stuff into get_hypermetadata. + Writable init code moved to diskfs_readonly_changed. + + * inode.c (diskfs_node_reload): New function. + (iget, read_disknode): Code to set allocsize and the last_* fields + moved from iget to read_disknode. + + * ext2fs.h (disk_pager): Type changed to struct pager. + (sync_global_ptr): Use DISK_PAGER, not DISK_PAGER->p. + * pager.c (create_disk_pager): Store the actual pager into DISK_PAGER. + * ext2fs.c (main): Use DISK_PAGER directly, not ->p. + * inode.c (iget): Ditto. + * pager.c (diskfs_shutdown_pager, diskfs_sync_everything): Ditto. + +Mon Oct 16 15:23:25 1995 Miles Bader <miles@gnu.ai.mit.edu> + + * inode.c (diskfs_lost_hardrefs): #ifdef'd out contents removed. + +Fri Oct 13 17:50:23 1995 Miles Bader <miles@gnu.ai.mit.edu> + + * ext2fs.c (main): Use argp for parsing. + (usage, USAGE, SHORT_OPTS, long_opts, console_stdio): Removed + +Thu Oct 12 18:16:00 1995 Miles Bader <miles@gnu.ai.mit.edu> + + * hyper.c (get_hypermetadata): Use diskfs device functions & + variables instead of our own. + * ext2fs.h (device_arg, device_name, device_port, device_start, + device_size, device_block_size): Declarations removed. + + * ext2fs.c (printf, _ext2_error, _ext2_panic, _ext2_warning): + Functions moved to msg.c + * msg.c: New file. + (printf, _ext2_error, _ext2_panic, _ext2_warning): Funcs from ext2fs.c. + * Makefile (SRCS): Remove devio.c, add msg.c. + +Sat Oct 7 20:47:19 1995 Miles Bader <miles@gnu.ai.mit.edu> + + * storeinfo.c (diskfs_S_file_get_storage_info): run_elem_t --> off_t. + + * ext2fs.c (diskfs_init_completed): Func deleted (now in libdiskfs). + +Fri Oct 6 17:24:57 1995 Miles Bader <miles@gnu.ai.mit.edu> + + * storeinfo.c (diskfs_S_file_get_storage_info): Change type of + ADDRESSES to off_t **, and add BLOCK_SIZE parameter. + +Wed Oct 4 20:02:34 1995 Miles Bader <miles@gnu.ai.mit.edu> + + * inode.c (diskfs_set_statfs): fsys_stb_bsize -> fsys_stb_iosize. + fsys_stb_fsize -> fsys_stb_bsize. + +Wed Sep 27 20:07:53 1995 Miles Bader <miles@gnu.ai.mit.edu> + + * ext2fs.c (main): Use diskfs routines to open the device. + Support both file and mach devices. Move the parse function here. + (parse_opt): Move into main (as a nested function). + * ext2fs.h (device_arg, device_start): New declarations. + * devio.c (dev_read_sync, dev_write_sync): Offset the address to + which we're doing i/o with DEVICE_START. + +Tue Sep 26 18:39:58 1995 Miles Bader <miles@gnu.ai.mit.edu> + + * storeinfo.c (diskfs_S_file_get_storage_info): New function. + * Makefile (SRCS): Add storeinfo.c. + +Fri Sep 15 14:21:18 1995 Miles Bader <miles@churchy.gnu.ai.mit.edu> + + * truncate.c (trunc_indirect): Only record an indirect block as + being modified when it actually is. + + * truncate.c (diskfs_truncate): Use the new truncate functions. + (poke_pages): Gratuitous cosmetic changes. + (trunc_direct): Rewritten, new args. + (trunc_indirect): Rewritten, new args, now more general to support + all the various indirection levels. + (trunc_triple_indirect, trunc_double_indirect, + trunc_single_indirect): New functions. + (struct free_block_run): New structure. + (free_block_run_finish, free_block_run_free_ptr, + free_block_run_add, free_block_run_init, _free_block_run_flush): + New functions. + (trunc_dindirect, trunc_tindirect): Functions deleted. + (DIRECT_BLOCK, INDIRECT_BLOCK, DINDIRECT_BLOCK, TINDIRECT_BLOCK): + Macros deleted. + + * getblk.c (block_getblk, ext2_getblk): u32 --> block_t. + * balloc.c (ext2_new_block): Ditto. + * hyper.c (get_hypermetadata): Ditto. + * pager.c (file_pager_write_page): Ditto. + +Wed Sep 13 12:30:23 1995 Michael I. Bushnell, p/BSG <mib@gnu.ai.mit.edu> + + * dir.c (diskfs_lookup): Don't attempt to lock NP if NPP is not + set. Don't even set NP if NPP is not set; use INUM as "lookup + succeeded flag" instead. Lookups for REMOVE and RENAME now *must* + set NPP. + +Tue Sep 12 11:03:19 1995 Miles Bader <miles@churchy.gnu.ai.mit.edu> + + * pokel.c (pokel_init): Initialize the free_pokes field. + (pokel_add): Assert that this malloc should succeed. + (pokel_sync): Don't hold POKEL's spin lock while syncing. + + * ialloc.c (diskfs_alloc_node): Check for a non-zero ALLOCSIZE. + +Tue Sep 5 16:59:40 1995 Miles Bader <miles@churchy.gnu.ai.mit.edu> + + * pager.c (diskfs_pager_users): Ignore the disk pager when seeing + if there are any active pagers. + (diskfs_shutdown_pager): shutdown_one gets passed a pager, not a upi. + (diskfs_sync_everything): sync_one gets passed a pager, not a upi. + +Sun Sep 3 17:28:13 1995 Miles Bader <miles@churchy.gnu.ai.mit.edu> + + * ext2fs.c (thread_cancel): Removed. + +Fri Aug 25 14:37:32 1995 Miles Bader <miles@churchy.gnu.ai.mit.edu> + + * Makefile (ext2fs): Put libports in the right place in the + linking order. + +Thu Aug 24 10:34:15 1995 Miles Bader <miles@churchy.gnu.ai.mit.edu> + + * Makefile (ext2fs): Put all dependencies here. + (HURDLIBS): Removed. + +Tue Aug 22 19:39:06 1995 Miles Bader <miles@churchy.gnu.ai.mit.edu> + + * Makefile (HURDLIBS): Add libshouldbeinlibc. + Remove rules for error.o. + +Fri Jul 21 17:51:33 1995 Michael I Bushnell <mib@geech.gnu.ai.mit.edu> + + * pager.c (diskfs_get_filemap): Free initial reference created by + pager_create. + + * pager.c (diskfs_get_filemap): Avoid race with simultaneous + termination by looping until we win. + (pager_clear_user_data): Only clear UPI->np->dn->fileinfo if it + still points to us. + +Thu Jul 6 15:33:24 1995 Michael I Bushnell <mib@duality.gnu.ai.mit.edu> + + * Makefile: Removed dependencies that are now automatically + generated. + +Thu Jul 6 13:36:25 1995 Miles Bader <miles@geech.gnu.ai.mit.edu> + + * pager.c (diskfs_pager_users): New function. + +Tue Jun 27 13:08:33 1995 Michael I Bushnell <mib@duality.gnu.ai.mit.edu> + + * pager.c (pager_unlock_page): Declare BLOCK volatile. + +Sat Jun 24 17:59:36 1995 Miles Bader <miles@churchy.gnu.ai.mit.edu> + + * Makefile (HURDLIBS): Add libihash. + + * ext2fs.c (thread_cancel): Dummy function. + + * dir.c (diskfs_direnter): Move assignment out of test. + (diskfs_get_directs): Fix print-format types. + * ialloc.c (diskfs_free_node): Fix print-format types. + + * pager.c (thread_function): New function. + (create_disk_pager): Make a new thread to service paging requests. + * ext2fs.c (main): Have the initial thread die when it's done, leaving + other thread to do the work. + + * pager.c (pager_bucket): New variable. + (pager_list_lock, file_pager_list): Variables deleted. + (create_disk_pager): Create pager_bucket. + (create_disk_pager, diskfs_get_filemap): Pass pager_bucket to + pager_create. + (pager_traverse): Function deleted. + (diskfs_get_filemap, pager_clear_user_data): Don't add/remove UPI + to/from the pager list, as there isn't any. + (diskfs_shutdown_pager, diskfs_sync_everything): Use + ports_bucket_iterate on pager_bucket to go through all the pagers, + instead of pager_traverse. + (diskfs_file_update, pager_traverse, allow_pager_softrefs, + drop_pager_softrefs): Change pager [un]ref calls to use the new ports + ref calls directly instead. + (pager_dropweak): New function (does nothing). + * ext2fs.h (struct user_pager_info): Remove the next & prevp fields. + + * truncate.c (force_delayed_copies, enable_delayed_copies): Change + pager [un]ref calls to use the new ports ref calls directly instead. + * inode.c (diskfs_lost_hardrefs): Ditto. + + * inode.c (diskfs_node_iterate): New function. + (write_all_disknodes): Re-implemented using diskfs_node_iterate. + +Wed Jun 14 16:19:49 1995 Michael I Bushnell <mib@duality.gnu.ai.mit.edu> + + * inode.c (diskfs_get_translator): Conform to new memory usage + semantic. + +Fri May 19 20:56:51 1995 Miles Bader <miles@churchy.gnu.ai.mit.edu> + + * ext2fs.c (main): Use options_parse & diskfs_standard_startup_options + to do command line options parsing. + (long_opts): Was `options'. Most things removed, as + they're now handled by libdiskfs. + (parse_opt): New routine to deal with our few meagre remaining + options in the approved options_parse manner. + +Mon May 15 15:55:49 1995 Miles Bader <miles@churchy.gnu.ai.mit.edu> + + * ext2fs.c (main, usage, options): Add --writable & --nosync options. + +Sat May 13 20:04:55 1995 Miles Bader <miles@duality.gnu.ai.mit.edu> + + * inode.c (diskfs_set_statfs): Set st->fsys_stb_bsize, not _fsize, + to the block size. + +Sat May 13 05:02:59 1995 Roland McGrath <roland@churchy.gnu.ai.mit.edu> + + * Makefile (OBJS): Remove exec_server_image.o. + (exec_server_image.o): Rule removed. + +Fri May 12 15:23:02 1995 Miles Bader <miles@churchy.gnu.ai.mit.edu> + + * ext2fs.c (main): Add an optional argument to the --sync option + that lets the user specify an initial sync interval. + +Thu May 11 13:30:06 1995 Miles Bader <miles@churchy.gnu.ai.mit.edu> + + * pager.c (pager_unlock_page): Give an explanation of why the file + system will shortly crash. + + * balloc.c (ext2_free_blocks, ext2_new_block): Get rid of the + CHECK_STRICT variable, and just always do the tests it controlled. + * ext2fs.h: Get rid of the CHECK_STRICT variable. + + * ext2fs.h (ext2_error, ext2_warning, ext2_panic, all callers changed): + Make these into macros that automagically supply the caller's + function name, and rename the original functions (which these + macros call) to have an underline prefix. + * ext2fs.c (ext2_error, ext2_warning, ext2_panic): Rename to add + the underline prefix. Also rearrange a bit to hold the lock + around the use of the global message buffer. + + * ext2fs.c (main): Enable the bootstrap code. + + * inode.c (read_disknode): Make st_blksize larger: 2 * pagesize. + +Wed May 10 14:03:34 1995 Miles Bader <miles@churchy.gnu.ai.mit.edu> + + * getblk.c (block_getblk, inode_getblk): Return ENOSPC instead of + EIO when we can't allocate a new block. + + * bitmap.c (find_next_zero_bit): Fix stupid typos (present in the + original linux source I copied this function from!) which were + causing occasional garbage results. + +Tue May 9 18:08:41 1995 Miles Bader <miles@churchy.gnu.ai.mit.edu> + + * ext2fs.h (DONT_CACHE_MEMORY_OBJECTS): Don't define this any + more, as the bugs we were using it to catch are supposedly gone :-| diff --git a/ext2fs/Makefile b/ext2fs/Makefile new file mode 100644 index 00000000..05a8edfc --- /dev/null +++ b/ext2fs/Makefile @@ -0,0 +1,29 @@ +# Makefile for ext2fs +# +# Copyright (C) 1994, 1995, 1996, 1999 Free Software Foundation, Inc. +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License as +# published by the Free Software Foundation; either version 2, or (at +# your option) any later version. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + +dir := ext2fs +makemode := server + +target = ext2fs +SRCS = balloc.c dir.c ext2fs.c getblk.c hyper.c ialloc.c \ + inode.c pager.c pokel.c truncate.c storeinfo.c msg.c +OBJS = $(SRCS:.c=.o) +LCLHDRS = ext2fs.h ext2_fs.h ext2_fs_i.h bitmap.c +HURDLIBS=diskfs pager iohelp fshelp store ports threads ihash shouldbeinlibc + +include ../Makeconf diff --git a/ext2fs/balloc.c b/ext2fs/balloc.c new file mode 100644 index 00000000..437febaa --- /dev/null +++ b/ext2fs/balloc.c @@ -0,0 +1,490 @@ +/* Block allocation routines + + Copyright (C) 1995, 1999 Free Software Foundation, Inc. + + Converted to work under the hurd by Miles Bader <miles@gnu.org> + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2, or (at + your option) any later version. + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ + +/* + * linux/fs/ext2/balloc.c + * + * Copyright (C) 1992, 1993, 1994, 1995 + * Remy Card (card@masi.ibp.fr) + * Laboratoire MASI - Institut Blaise Pascal + * Universite Pierre et Marie Curie (Paris VI) + * + * Enhanced block allocation by Stephen Tweedie (sct@dcs.ed.ac.uk), 1993 + */ + +/* + * The free blocks are managed by bitmaps. A file system contains several + * blocks groups. Each group contains 1 bitmap block for blocks, 1 bitmap + * block for inodes, N blocks for the inode table and data blocks. + * + * The file system contains group descriptors which are located after the + * super block. Each descriptor contains the number of the bitmap block and + * the free blocks count in the block. The descriptors are loaded in memory + * when a file system is mounted (see ext2_read_super). + */ + +#include <string.h> +#include "ext2fs.h" +#include "bitmap.c" + +/* Returns a pointer to the first occurence of CH in the buffer BUF of len + LEN, or BUF + LEN if CH doesn't occur. */ +static inline void * +memscan (void *buf, unsigned char ch, size_t len) +{ + return memchr (buf, ch, len) ?: buf + len; +} + +#define in_range(b, first, len) ((b) >= (first) && (b) <= (first) + (len) - 1) + +void +ext2_free_blocks (block_t block, unsigned long count) +{ + char *bh; + unsigned long block_group; + unsigned long bit; + unsigned long i; + struct ext2_group_desc *gdp; + + spin_lock (&global_lock); + + if (block < sblock->s_first_data_block || + (block + count) > sblock->s_blocks_count) + { + ext2_error ("freeing blocks not in datazone - " + "block = %u, count = %lu", block, count); + spin_unlock (&global_lock); + return; + } + + ext2_debug ("freeing block %lu[%lu]", block, count); + + do + { + unsigned long int gcount = count; + + block_group = ((block - sblock->s_first_data_block) + / sblock->s_blocks_per_group); + bit = (block - sblock->s_first_data_block) % sblock->s_blocks_per_group; + if (bit + count > sblock->s_blocks_per_group) + { + unsigned long overflow = bit + count - sblock->s_blocks_per_group; + gcount -= overflow; + ext2_debug ("freeing blocks across group boundary - " + "block = %u, count = %lu", + block, count); + } + gdp = group_desc (block_group); + bh = bptr (gdp->bg_block_bitmap); + + if (in_range (gdp->bg_block_bitmap, block, gcount) || + in_range (gdp->bg_inode_bitmap, block, gcount) || + in_range (block, gdp->bg_inode_table, itb_per_group) || + in_range (block + gcount - 1, gdp->bg_inode_table, itb_per_group)) + ext2_panic ("freeing blocks in system zones - " + "block = %u, count = %lu", + block, count); + + for (i = 0; i < gcount; i++) + { + if (!clear_bit (bit + i, bh)) + ext2_warning ("bit already cleared for block %lu", block + i); + else + { + gdp->bg_free_blocks_count++; + sblock->s_free_blocks_count++; + } + } + + record_global_poke (bh); + record_global_poke (gdp); + + block += gcount; + count -= gcount; + } while (count > 0); + + sblock_dirty = 1; + + spin_unlock (&global_lock); + + alloc_sync (0); +} + +/* + * ext2_new_block uses a goal block to assist allocation. If the goal is + * free, or there is a free block within 32 blocks of the goal, that block + * is allocated. Otherwise a forward search is made for a free block; within + * each block group the search first looks for an entire free byte in the block + * bitmap, and then for any free bit if that fails. + */ +block_t +ext2_new_block (block_t goal, + block_t prealloc_goal, + block_t *prealloc_count, block_t *prealloc_block) +{ + char *bh; + char *p, *r; + int i, j, k, tmp; + unsigned long lmap; + struct ext2_group_desc *gdp; + +#ifdef EXT2FS_DEBUG + static int goal_hits = 0, goal_attempts = 0; +#endif + + spin_lock (&global_lock); + +#ifdef XXX /* Auth check to use reserved blocks */ + if (sblock->s_free_blocks_count <= sblock->s_r_blocks_count && + (!fsuser () && (sb->u.ext2_sb.s_resuid != current->fsuid) && + (sb->u.ext2_sb.s_resgid == 0 || + !in_group_p (sb->u.ext2_sb.s_resgid)))) + { + spin_unlock (&global_lock); + return 0; + } +#endif + + ext2_debug ("goal=%lu", goal); + +repeat: + /* + * First, test whether the goal block is free. + */ + if (goal < sblock->s_first_data_block || goal >= sblock->s_blocks_count) + goal = sblock->s_first_data_block; + i = (goal - sblock->s_first_data_block) / sblock->s_blocks_per_group; + gdp = group_desc (i); + if (gdp->bg_free_blocks_count > 0) + { + j = ((goal - sblock->s_first_data_block) % sblock->s_blocks_per_group); +#ifdef EXT2FS_DEBUG + if (j) + goal_attempts++; +#endif + bh = bptr (gdp->bg_block_bitmap); + + ext2_debug ("goal is at %d:%d", i, j); + + if (!test_bit (j, bh)) + { +#ifdef EXT2FS_DEBUG + goal_hits++; + ext2_debug ("goal bit allocated!"); +#endif + goto got_block; + } + if (j) + { + /* + * The goal was occupied; search forward for a free + * block within the next 32 blocks + */ + lmap = ((((unsigned long *) bh)[j >> 5]) >> + ((j & 31) + 1)); + if (j < sblock->s_blocks_per_group - 32) + lmap |= (((unsigned long *) bh)[(j >> 5) + 1]) << + (31 - (j & 31)); + else + lmap |= 0xffffffff << (31 - (j & 31)); + if (lmap != 0xffffffffl) + { + k = ffz (lmap) + 1; + if ((j + k) < sblock->s_blocks_per_group) + { + j += k; + goto got_block; + } + } + } + + ext2_debug ("bit not found near goal"); + + /* + * There has been no free block found in the near vicinity + * of the goal: do a search forward through the block groups, + * searching in each group first for an entire free byte in + * the bitmap and then for any free bit. + * + * Search first in the remainder of the current group; then, + * cyclicly search through the rest of the groups. + */ + p = ((char *) bh) + (j >> 3); + r = memscan (p, 0, (sblock->s_blocks_per_group - j + 7) >> 3); + k = (r - ((char *) bh)) << 3; + if (k < sblock->s_blocks_per_group) + { + j = k; + goto search_back; + } + k = find_next_zero_bit ((unsigned long *) bh, + sblock->s_blocks_per_group, + j); + if (k < sblock->s_blocks_per_group) + { + j = k; + goto got_block; + } + } + + ext2_debug ("bit not found in block group %d", i); + + /* + * Now search the rest of the groups. We assume that + * i and gdp correctly point to the last group visited. + */ + for (k = 0; k < groups_count; k++) + { + i++; + if (i >= groups_count) + i = 0; + gdp = group_desc (i); + if (gdp->bg_free_blocks_count > 0) + break; + } + if (k >= groups_count) + { + spin_unlock (&global_lock); + return 0; + } + bh = bptr (gdp->bg_block_bitmap); + r = memscan (bh, 0, sblock->s_blocks_per_group >> 3); + j = (r - bh) << 3; + if (j < sblock->s_blocks_per_group) + goto search_back; + else + j = find_first_zero_bit ((unsigned long *) bh, + sblock->s_blocks_per_group); + if (j >= sblock->s_blocks_per_group) + { + ext2_error ("free blocks count corrupted for block group %d", i); + spin_unlock (&global_lock); + return 0; + } + +search_back: + /* + * We have succeeded in finding a free byte in the block + * bitmap. Now search backwards up to 7 bits to find the + * start of this group of free blocks. + */ + for (k = 0; k < 7 && j > 0 && !test_bit (j - 1, bh); k++, j--); + +got_block: + + ext2_debug ("using block group %d (%d)", i, gdp->bg_free_blocks_count); + + tmp = j + i * sblock->s_blocks_per_group + sblock->s_first_data_block; + + if (tmp == gdp->bg_block_bitmap || + tmp == gdp->bg_inode_bitmap || + in_range (tmp, gdp->bg_inode_table, itb_per_group)) + ext2_panic ("allocating block in system zone; block = %u", tmp); + + if (set_bit (j, bh)) + { + ext2_warning ("bit already set for block %d", j); + goto repeat; + } + + /* Since due to bletcherousness block-modified bits are never turned off + when writing disk-pager pages, make sure they are here, in case this + block is being allocated to a file (see pager.c). */ + if (modified_global_blocks) + { + spin_lock (&modified_global_blocks_lock); + clear_bit (tmp, modified_global_blocks); + spin_unlock (&modified_global_blocks_lock); + } + + ext2_debug ("found bit %d", j); + + /* + * Do block preallocation now if required. + */ +#ifdef EXT2_PREALLOCATE + if (prealloc_goal) + { + *prealloc_count = 0; + *prealloc_block = tmp + 1; + for (k = 1; + k < prealloc_goal && (j + k) < sblock->s_blocks_per_group; k++) + { + if (set_bit (j + k, bh)) + break; + (*prealloc_count)++; + + /* (See comment before the clear_bit above) */ + if (modified_global_blocks) + { + spin_lock (&modified_global_blocks_lock); + clear_bit (tmp + k, modified_global_blocks); + spin_unlock (&modified_global_blocks_lock); + } + } + gdp->bg_free_blocks_count -= *prealloc_count; + sblock->s_free_blocks_count -= *prealloc_count; + ext2_debug ("preallocated a further %lu bits", *prealloc_count); + } +#endif + + j = tmp; + + record_global_poke (bh); + + if (j >= sblock->s_blocks_count) + { + ext2_error ("block >= blocks count - block_group = %d, block=%d", i, j); + j = 0; + goto sync_out; + } + + ext2_debug ("allocating block %d; goal hits %d of %d", + j, goal_hits, goal_attempts); + + gdp->bg_free_blocks_count--; + record_global_poke (gdp); + + sblock->s_free_blocks_count--; + sblock_dirty = 1; + + sync_out: + spin_unlock (&global_lock); + alloc_sync (0); + + return j; +} + +unsigned long +ext2_count_free_blocks () +{ +#ifdef EXT2FS_DEBUG + unsigned long desc_count, bitmap_count, x; + struct ext2_group_desc *gdp; + int i; + + spin_lock (&global_lock); + + desc_count = 0; + bitmap_count = 0; + gdp = NULL; + for (i = 0; i < groups_count; i++) + { + gdp = group_desc (i); + desc_count += gdp->bg_free_blocks_count; + x = count_free (bptr (gdp->bg_block_bitmap), block_size); + printf ("group %d: stored = %d, counted = %lu", + i, gdp->bg_free_blocks_count, x); + bitmap_count += x; + } + printf ("ext2_count_free_blocks: stored = %lu, computed = %lu, %lu", + sblock->s_free_blocks_count, desc_count, bitmap_count); + spin_unlock (&global_lock); + return bitmap_count; +#else + return sblock->s_free_blocks_count; +#endif +} + +static inline int +block_in_use (block_t block, unsigned char *map) +{ + return test_bit ((block - sblock->s_first_data_block) % + sblock->s_blocks_per_group, map); +} + +void +ext2_check_blocks_bitmap () +{ + char *bh; + unsigned long desc_count, bitmap_count, x; + unsigned long desc_blocks; + struct ext2_group_desc *gdp; + int i, j; + + spin_lock (&global_lock); + + desc_count = 0; + bitmap_count = 0; + gdp = NULL; + + desc_blocks = (groups_count + desc_per_block - 1) / desc_per_block; + + for (i = 0; i < groups_count; i++) + { + inline int test_root (int a, int b) + { + if (a == 0) + return 1; + while (1) + { + if (a == 1) + return 1; + if (a % b) + return 0; + a = a / b; + } + } + inline int ext2_group_sparse (int group) + { + return (test_root (group, 3) || test_root (group, 5) + || test_root (group, 7)); + } + + gdp = group_desc (i); + desc_count += gdp->bg_free_blocks_count; + bh = bptr (gdp->bg_block_bitmap); + + if (!EXT2_HAS_RO_COMPAT_FEATURE (sblock, + EXT2_FEATURE_RO_COMPAT_SPARSE_SUPER) + || ext2_group_sparse (i)) + { + if (!test_bit (0, bh)) + ext2_error ("superblock in group %d is marked free", i); + + for (j = 0; j < desc_blocks; j++) + if (!test_bit (j + 1, bh)) + ext2_error ("descriptor block #%d in group %d is marked free", + j, i); + } + + if (!block_in_use (gdp->bg_block_bitmap, bh)) + ext2_error ("block bitmap for group %d is marked free", i); + + if (!block_in_use (gdp->bg_inode_bitmap, bh)) + ext2_error ("inode bitmap for group %d is marked free", i); + + for (j = 0; j < itb_per_group; j++) + if (!block_in_use (gdp->bg_inode_table + j, bh)) + ext2_error ("block #%d of the inode table in group %d is marked free", j, i); + + x = count_free (bh, block_size); + if (gdp->bg_free_blocks_count != x) + ext2_error ("wrong free blocks count for group %d," + " stored = %d, counted = %lu", + i, gdp->bg_free_blocks_count, x); + bitmap_count += x; + } + if (sblock->s_free_blocks_count != bitmap_count) + ext2_error ("wrong free blocks count in super block," + " stored = %lu, counted = %lu", + (unsigned long) sblock->s_free_blocks_count, bitmap_count); + spin_unlock (&global_lock); +} diff --git a/ext2fs/bitmap.c b/ext2fs/bitmap.c new file mode 100644 index 00000000..e512d011 --- /dev/null +++ b/ext2fs/bitmap.c @@ -0,0 +1,106 @@ +/* Bitmap perusing routines + + Copyright (C) 1995 Free Software Foundation, Inc. + + Converted to work under the hurd by Miles Bader <miles@gnu.ai.mit.edu> + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2, or (at + your option) any later version. + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ + +#define ffz(word) (ffs (~(unsigned int) (word)) - 1) + +/* + * linux/fs/ext2/bitmap.c (&c) + * + * Copyright (C) 1992, 1993, 1994, 1995 + * Remy Card (card@masi.ibp.fr) + * Laboratoire MASI - Institut Blaise Pascal + * Universite Pierre et Marie Curie (Paris VI) + */ + +static int nibblemap[] = {4, 3, 3, 2, 3, 2, 2, 1, 3, 2, 2, 1, 2, 1, 1, 0}; + +static inline +unsigned long count_free (char * map, unsigned int numchars) +{ + unsigned int i; + unsigned long sum = 0; + + if (!map) + return (0); + for (i = 0; i < numchars; i++) + sum += nibblemap[map[i] & 0xf] + + nibblemap[(map[i] >> 4) & 0xf]; + return (sum); +} + +/* ---------------------------------------------------------------- */ + +/* + * Copyright 1994, David S. Miller (davem@caip.rutgers.edu). + */ + +/* find_next_zero_bit() finds the first zero bit in a bit string of length + * 'size' bits, starting the search at bit 'offset'. This is largely based + * on Linus's ALPHA routines, which are pretty portable BTW. + */ + +static inline unsigned long +find_next_zero_bit(void *addr, unsigned long size, unsigned long offset) +{ + unsigned long *p = ((unsigned long *) addr) + (offset >> 5); + unsigned long result = offset & ~31UL; + unsigned long tmp; + + if (offset >= size) + return size; + size -= result; + offset &= 31UL; + if (offset) + { + tmp = *(p++); + tmp |= ~0UL >> (32-offset); + if (size < 32) + goto found_first; + if (~tmp) + goto found_middle; + size -= 32; + result += 32; + } + while (size & ~31UL) + { + if (~(tmp = *(p++))) + goto found_middle; + result += 32; + size -= 32; + } + if (!size) + return result; + tmp = *p; + +found_first: + tmp |= ~0UL << size; +found_middle: + return result + ffz(tmp); +} + +/* Linus sez that gcc can optimize the following correctly, we'll see if this + * holds on the Sparc as it does for the ALPHA. + */ + +static inline int +find_first_zero_bit(void *buf, unsigned len) +{ + return find_next_zero_bit(buf, len, 0); +} diff --git a/ext2fs/devio.c b/ext2fs/devio.c new file mode 100644 index 00000000..3e97fcb6 --- /dev/null +++ b/ext2fs/devio.c @@ -0,0 +1,51 @@ +/* Device input and output + Copyright (C) 1992, 1993, 1994, 1995 Free Software Foundation, Inc. + +This file is part of the GNU Hurd. + +The GNU Hurd is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2, or (at your option) +any later version. + +The GNU Hurd is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with the GNU Hurd; see the file COPYING. If not, write to +the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. */ + +/* Written by Michael I. Bushnell. */ + +#include "ext2fs.h" +#include <device/device.h> +#include <device/device_request.h> + +/* Write disk block ADDR with DATA of LEN bytes, waiting for completion. */ +error_t +dev_write_sync (block_t addr, vm_address_t data, long len) +{ + int written; + assert (!diskfs_readonly); + if (device_write (device_port, 0, device_start + addr, + (io_buf_ptr_t) data, len, &written) + || written != len) + return EIO; + return 0; +} + +/* Read disk block ADDR; put the address of the data in DATA; read LEN + bytes. Always *DATA should be a full page no matter what. */ +error_t +dev_read_sync (block_t addr, vm_address_t *data, long len) +{ + u_int read; + if (device_read (device_port, 0, device_start + addr, len, + (io_buf_ptr_t *)data, &read) + || read != len) + return EIO; + return 0; +} + diff --git a/ext2fs/dir.c b/ext2fs/dir.c new file mode 100644 index 00000000..9d341b17 --- /dev/null +++ b/ext2fs/dir.c @@ -0,0 +1,1082 @@ +/* Directory management routines + + Copyright (C) 1994,95,96,97,98,99 Free Software Foundation, Inc. + + Converted for ext2fs by Miles Bader <miles@gnu.org> + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2, or (at + your option) any later version. + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ + +#include "ext2fs.h" + +#include <string.h> +#include <stdio.h> +#include <dirent.h> +#include <stddef.h> + +/* This isn't quite right because a file system block may straddle several + device blocks, and so a write failure between writing two device blocks + may scramble things up a bit. But the linux doesn't do this. We could + try and make sure that we never wrote any modified directories with + entries that straddle device blocks (but read those that do)... */ +#define DIRBLKSIZ block_size + +enum slot_status +{ + /* This means we haven't yet found room for a new entry. */ + LOOKING, + + /* This means that the specified entry is free and should be used. */ + TAKE, + + /* This means that the specified entry has enough room at the end + to hold the new entry. */ + SHRINK, + + /* This means that there is enough space in the block, but not in + any one single entry, so they all have to be shifted to make + room. */ + COMPRESS, + + /* This means that the directory will have to be grown to hold the + entry. */ + EXTEND, + + /* For removal and rename, this means that this is the location + of the entry found. */ + HERE_TIS, +}; + +struct dirstat +{ + /* Type of followp operation expected */ + enum lookup_type type; + + /* One of the statuses above */ + enum slot_status stat; + + /* Mapped address and length of directory */ + vm_address_t mapbuf; + vm_size_t mapextent; + + /* Index of this directory block. */ + int idx; + + /* For stat COMPRESS, this is the address (inside mapbuf) + of the first direct in the directory block to be compressed. */ + /* For stat HERE_TIS, SHRINK, and TAKE, this is the entry referenced. */ + struct ext2_dir_entry_2 *entry; + + /* For stat HERE_TIS, type REMOVE, this is the address of the immediately + previous direct in this directory block, or zero if this is the first. */ + struct ext2_dir_entry_2 *preventry; + + /* For stat COMPRESS, this is the number of bytes needed to be copied + in order to undertake the compression. */ + size_t nbytes; +}; + +size_t diskfs_dirstat_size = sizeof (struct dirstat); + +/* Initialize DS such that diskfs_drop_dirstat will ignore it. */ +void +diskfs_null_dirstat (struct dirstat *ds) +{ + ds->type = LOOKUP; +} + +static error_t +dirscanblock (vm_address_t blockoff, struct node *dp, int idx, + const char *name, int namelen, enum lookup_type type, + struct dirstat *ds, ino_t *inum); + + +#if 0 /* XXX unused for now */ +static const unsigned char ext2_file_type[EXT2_FT_MAX] = +{ + [EXT2_FT_UNKNOWN] = DT_UNKNOWN, + [EXT2_FT_REG_FILE] = DT_REG, + [EXT2_FT_DIR] = DT_DIR, + [EXT2_FT_CHRDEV] = DT_CHR, + [EXT2_FT_BLKDEV] = DT_BLK, + [EXT2_FT_FIFO] = DT_FIFO, + [EXT2_FT_SOCK] = DT_SOCK, + [EXT2_FT_SYMLINK] = DT_LNK, +}; + +static const unsigned char file_type_ext2[] = +{ + [DT_UNKNOWN] = EXT2_FT_UNKNOWN, + [DT_REG] = EXT2_FT_REG_FILE, + [DT_DIR] = EXT2_FT_DIR, + [DT_CHR] = EXT2_FT_CHRDEV, + [DT_BLK] = EXT2_FT_BLKDEV, + [DT_FIFO] = EXT2_FT_FIFO, + [DT_SOCK] = EXT2_FT_SOCK, + [DT_LNK] = EXT2_FT_SYMLINK, +}; +#endif + +/* Implement the diskfs_lookup from the diskfs library. See + <hurd/diskfs.h> for the interface specification. */ +error_t +diskfs_lookup_hard (struct node *dp, const char *name, enum lookup_type type, + struct node **npp, struct dirstat *ds, struct protid *cred) +{ + error_t err; + ino_t inum; + int namelen; + int spec_dotdot; + struct node *np = 0; + int retry_dotdot = 0; + vm_prot_t prot = + (type == LOOKUP) ? VM_PROT_READ : (VM_PROT_READ | VM_PROT_WRITE); + memory_object_t memobj; + vm_address_t buf = 0; + vm_size_t buflen = 0; + int blockaddr; + int idx, lastidx; + int looped; + + if ((type == REMOVE) || (type == RENAME)) + assert (npp); + + if (npp) + *npp = 0; + + spec_dotdot = type & SPEC_DOTDOT; + type &= ~SPEC_DOTDOT; + + namelen = strlen (name); + + if (namelen > EXT2_NAME_LEN) + return ENAMETOOLONG; + + try_again: + if (ds) + { + ds->type = LOOKUP; + ds->mapbuf = 0; + ds->mapextent = 0; + } + if (buf) + { + munmap ((caddr_t) buf, buflen); + buf = 0; + } + if (ds && (type == CREATE || type == RENAME)) + ds->stat = LOOKING; + + /* Map in the directory contents. */ + memobj = diskfs_get_filemap (dp, prot); + + if (memobj == MACH_PORT_NULL) + return errno; + + buf = 0; + /* We allow extra space in case we have to do an EXTEND. */ + buflen = round_page (dp->dn_stat.st_size + DIRBLKSIZ); + err = vm_map (mach_task_self (), + &buf, buflen, 0, 1, memobj, 0, 0, prot, prot, 0); + mach_port_deallocate (mach_task_self (), memobj); + + inum = 0; + + if (!diskfs_check_readonly ()) + dp->dn_set_atime = 1; + + /* Start the lookup at DP->dn->dir_idx. */ + idx = dp->dn->dir_idx; + if (idx * DIRBLKSIZ > dp->dn_stat.st_size) + idx = 0; /* just in case */ + blockaddr = buf + idx * DIRBLKSIZ; + looped = (idx == 0); + lastidx = idx; + if (lastidx == 0) + lastidx = dp->dn_stat.st_size / DIRBLKSIZ; + + while (!looped || idx < lastidx) + { + err = dirscanblock (blockaddr, dp, idx, name, namelen, type, ds, &inum); + if (!err) + { + dp->dn->dir_idx = idx; + break; + } + if (err != ENOENT) + { + munmap ((caddr_t) buf, buflen); + return err; + } + + blockaddr += DIRBLKSIZ; + idx++; + if (blockaddr - buf >= dp->dn_stat.st_size && !looped) + { + /* We've gotten to the end; start back at the beginning */ + looped = 1; + blockaddr = buf; + idx = 0; + } + } + + if (!diskfs_check_readonly ()) + dp->dn_set_atime = 1; + if (diskfs_synchronous) + diskfs_node_update (dp, 1); + + /* If err is set here, it's ENOENT, and we don't want to + think about that as an error yet. */ + err = 0; + + if (inum && npp) + { + if (namelen != 2 || name[0] != '.' || name[1] != '.') + { + if (inum == dp->cache_id) + { + np = dp; + diskfs_nref (np); + } + else + { + err = diskfs_cached_lookup (inum, &np); + if (err) + goto out; + } + } + + /* We are looking up .. */ + /* Check to see if this is the root of the filesystem. */ + else if (dp->cache_id == 2) + { + err = EAGAIN; + goto out; + } + + /* We can't just do diskfs_cached_lookup, because we would then deadlock. + So we do this. Ick. */ + else if (retry_dotdot) + { + /* Check to see that we got the same answer as last time. */ + if (inum != retry_dotdot) + { + /* Drop what we *thought* was .. (but isn't any more) and + try *again*. */ + diskfs_nput (np); + mutex_unlock (&dp->lock); + err = diskfs_cached_lookup (inum, &np); + mutex_lock (&dp->lock); + if (err) + goto out; + retry_dotdot = inum; + goto try_again; + } + /* Otherwise, we got it fine and np is already set properly. */ + } + else if (!spec_dotdot) + { + /* Lock them in the proper order, and then + repeat the directory scan to see if this is still + right. */ + mutex_unlock (&dp->lock); + err = diskfs_cached_lookup (inum, &np); + mutex_lock (&dp->lock); + if (err) + goto out; + retry_dotdot = inum; + goto try_again; + } + + /* Here below are the spec dotdot cases. */ + else if (type == RENAME || type == REMOVE) + np = ifind (inum); + + else if (type == LOOKUP) + { + diskfs_nput (dp); + err = diskfs_cached_lookup (inum, &np); + if (err) + goto out; + } + else + assert (0); + } + + if ((type == CREATE || type == RENAME) && !inum && ds && ds->stat == LOOKING) + { + /* We didn't find any room, so mark ds to extend the dir */ + ds->type = CREATE; + ds->stat = EXTEND; + ds->idx = dp->dn_stat.st_size / DIRBLKSIZ; + } + + /* Return to the user; if we can't, release the reference + (and lock) we acquired above. */ + out: + /* Deallocate or save the mapping. */ + if ((err && err != ENOENT) + || !ds + || ds->type == LOOKUP) + { + munmap ((caddr_t) buf, buflen); + if (ds) + ds->type = LOOKUP; /* set to be ignored by drop_dirstat */ + } + else + { + ds->mapbuf = buf; + ds->mapextent = buflen; + } + + if (np) + { + assert (npp); + if (err) + { + if (!spec_dotdot) + { + /* Normal case */ + if (np == dp) + diskfs_nrele (np); + else + diskfs_nput (np); + } + else if (type == RENAME || type == REMOVE) + /* We just did ifind to get np; that allocates + no new references, so we don't have anything to do */ + ; + else if (type == LOOKUP) + /* We did diskfs_cached_lookup */ + diskfs_nput (np); + } + else + *npp = np; + } + + return err ? : inum ? 0 : ENOENT; +} + +/* Scan block at address BLKADDR (of node DP; block index IDX), for + name NAME of length NAMELEN. Args TYPE, DS are as for + diskfs_lookup. If found, set *INUM to the inode number, else + return ENOENT. */ +static error_t +dirscanblock (vm_address_t blockaddr, struct node *dp, int idx, + const char *name, int namelen, enum lookup_type type, + struct dirstat *ds, ino_t *inum) +{ + int nfree = 0; + int needed = 0; + vm_address_t currentoff, prevoff; + struct ext2_dir_entry_2 *entry = 0; + int nentries = 0; + size_t nbytes = 0; + int looking = 0; + int countcopies = 0; + int consider_compress = 0; + + if (ds && (ds->stat == LOOKING + || ds->stat == COMPRESS)) + { + looking = 1; + countcopies = 1; + needed = EXT2_DIR_REC_LEN (namelen); + } + + for (currentoff = blockaddr, prevoff = 0; + currentoff < blockaddr + DIRBLKSIZ; + prevoff = currentoff, currentoff += entry->rec_len) + { + entry = (struct ext2_dir_entry_2 *)currentoff; + + if (!entry->rec_len + || entry->rec_len % EXT2_DIR_PAD + || entry->name_len > EXT2_NAME_LEN + || currentoff + entry->rec_len > blockaddr + DIRBLKSIZ + || EXT2_DIR_REC_LEN (entry->name_len) > entry->rec_len + || memchr (entry->name, '\0', entry->name_len)) + { + ext2_warning ("bad directory entry: inode: %d offset: %d", + dp->cache_id, + currentoff - blockaddr + idx * DIRBLKSIZ); + return ENOENT; + } + + if (looking || countcopies) + { + int thisfree; + + /* Count how much free space this entry has in it. */ + if (entry->inode == 0) + thisfree = entry->rec_len; + else + thisfree = entry->rec_len - EXT2_DIR_REC_LEN (entry->name_len); + + /* If this isn't at the front of the block, then it will + have to be copied if we do a compression; count the + number of bytes there too. */ + if (countcopies && currentoff != blockaddr) + nbytes += EXT2_DIR_REC_LEN (entry->name_len); + + if (ds->stat == COMPRESS && nbytes > ds->nbytes) + /* The previously found compress is better than + this one, so don't bother counting any more. */ + countcopies = 0; + + if (thisfree >= needed) + { + ds->type = CREATE; + ds->stat = entry->inode == 0 ? TAKE : SHRINK; + ds->entry = entry; + ds->idx = idx; + looking = countcopies = 0; + } + else + { + nfree += thisfree; + if (nfree >= needed) + consider_compress = 1; + } + } + + if (entry->inode) + nentries++; + + if (entry->name_len == namelen + && entry->name[0] == name[0] + && entry->inode + && !bcmp (entry->name, name, namelen)) + break; + } + + if (consider_compress + && (ds->type == LOOKING + || (ds->type == COMPRESS && ds->nbytes > nbytes))) + { + ds->type = CREATE; + ds->stat = COMPRESS; + ds->entry = (struct ext2_dir_entry_2 *) blockaddr; + ds->idx = idx; + ds->nbytes = nbytes; + } + + if (currentoff >= blockaddr + DIRBLKSIZ) + { + int i; + /* The name is not in this block. */ + + /* Because we scanned the entire block, we should write + down how many entries there were. */ + if (!dp->dn->dirents) + { + dp->dn->dirents = malloc ((dp->dn_stat.st_size / DIRBLKSIZ) + * sizeof (int)); + for (i = 0; i < dp->dn_stat.st_size/DIRBLKSIZ; i++) + dp->dn->dirents[i] = -1; + } + /* Make sure the count is correct if there is one now. */ + assert (dp->dn->dirents[idx] == -1 + || dp->dn->dirents[idx] == nentries); + dp->dn->dirents[idx] = nentries; + + return ENOENT; + } + + /* We have found the required name. */ + + if (ds && type == CREATE) + ds->type = LOOKUP; /* it's invalid now */ + else if (ds && (type == REMOVE || type == RENAME)) + { + ds->type = type; + ds->stat = HERE_TIS; + ds->entry = entry; + ds->idx = idx; + ds->preventry = (struct ext2_dir_entry_2 *) prevoff; + } + + *inum = entry->inode; + return 0; +} + +/* Following a lookup call for CREATE, this adds a node to a directory. + DP is the directory to be modified; NAME is the name to be entered; + NP is the node being linked in; DS is the cached information returned + by lookup; CRED describes the user making the call. This call may + only be made if the directory has been held locked continuously since + the preceding lookup call, and only if that call returned ENOENT. */ +error_t +diskfs_direnter_hard (struct node *dp, const char *name, struct node *np, + struct dirstat *ds, struct protid *cred) +{ + struct ext2_dir_entry_2 *new; + int namelen = strlen (name); + int needed = EXT2_DIR_REC_LEN (namelen); + int oldneeded; + vm_address_t fromoff, tooff; + int totfreed; + error_t err; + off_t oldsize = 0; + + assert (ds->type == CREATE); + + assert (!diskfs_readonly); + + dp->dn_set_mtime = 1; + + /* Select a location for the new directory entry. Each branch of this + switch is responsible for setting NEW to point to the on-disk + directory entry being written, and setting NEW->rec_len appropriately. */ + + switch (ds->stat) + { + case TAKE: + /* We are supposed to consume this slot. */ + assert (ds->entry->inode == 0 && ds->entry->rec_len >= needed); + + new = ds->entry; + break; + + case SHRINK: + /* We are supposed to take the extra space at the end + of this slot. */ + oldneeded = EXT2_DIR_REC_LEN (ds->entry->name_len); + assert (ds->entry->rec_len - oldneeded >= needed); + + new = (struct ext2_dir_entry_2 *) ((vm_address_t) ds->entry + oldneeded); + + new->rec_len = ds->entry->rec_len - oldneeded; + ds->entry->rec_len = oldneeded; + break; + + case COMPRESS: + /* We are supposed to move all the entries to the + front of the block, giving each the minimum + necessary room. This should free up enough space + for the new entry. */ + fromoff = tooff = (vm_address_t) ds->entry; + + while (fromoff < (vm_address_t) ds->entry + DIRBLKSIZ) + { + struct ext2_dir_entry_2 *from = (struct ext2_dir_entry_2 *)fromoff; + struct ext2_dir_entry_2 *to = (struct ext2_dir_entry_2 *) tooff; + int fromreclen = from->rec_len; + + if (from->inode != 0) + { + assert (fromoff >= tooff); + + memmove (to, from, fromreclen); + to->rec_len = EXT2_DIR_REC_LEN (to->name_len); + + tooff += to->rec_len; + } + fromoff += fromreclen; + } + + totfreed = (vm_address_t) ds->entry + DIRBLKSIZ - tooff; + assert (totfreed >= needed); + + new = (struct ext2_dir_entry_2 *) tooff; + new->rec_len = totfreed; + break; + + case EXTEND: + /* Extend the file. */ + assert (needed <= DIRBLKSIZ); + + oldsize = dp->dn_stat.st_size; + while (oldsize + DIRBLKSIZ > dp->allocsize) + { + err = diskfs_grow (dp, oldsize + DIRBLKSIZ, cred); + if (err) + { + munmap ((caddr_t) ds->mapbuf, ds->mapextent); + return err; + } + } + + new = (struct ext2_dir_entry_2 *) (ds->mapbuf + oldsize); + + dp->dn_stat.st_size = oldsize + DIRBLKSIZ; + dp->dn_set_ctime = 1; + + new->rec_len = DIRBLKSIZ; + break; + + default: + new = 0; + assert (! "impossible: bogus status field in dirstat"); + } + + /* NEW points to the directory entry being written, and its + rec_len field is already filled in. Now fill in the rest. */ + + new->inode = np->cache_id; +#if 0 + /* XXX We cannot enable this code because file types can change + (and conceivably quite often) with translator settings. + There is no way for the translator that determines the type of + the virtual node to cause all the directory entries linked to + its underlying inode to reflect the proper type. */ + new->file_type = (EXT2_HAS_INCOMPAT_FEATURE (sblock, + EXT2_FEATURE_INCOMPAT_FILETYPE) + ? file_type_ext2[IFTODT (np->dn_stat.st_mode & S_IFMT)] + : 0); +#else + new->file_type = 0; +#endif + new->name_len = namelen; + memcpy (new->name, name, namelen); + + /* Mark the directory inode has having been written. */ + dp->dn->info.i_flags &= ~EXT2_BTREE_FL; + dp->dn_set_mtime = 1; + + munmap ((caddr_t) ds->mapbuf, ds->mapextent); + + if (ds->stat != EXTEND) + { + /* If we are keeping count of this block, then keep the count up + to date. */ + if (dp->dn->dirents && dp->dn->dirents[ds->idx] != -1) + dp->dn->dirents[ds->idx]++; + } + else + { + int i; + /* It's cheap, so start a count here even if we aren't counting + anything at all. */ + if (dp->dn->dirents) + { + dp->dn->dirents = realloc (dp->dn->dirents, + (dp->dn_stat.st_size / DIRBLKSIZ + * sizeof (int))); + for (i = oldsize / DIRBLKSIZ; + i < dp->dn_stat.st_size / DIRBLKSIZ; + i++) + dp->dn->dirents[i] = -1; + + dp->dn->dirents[ds->idx] = 1; + } + else + { + dp->dn->dirents = malloc (dp->dn_stat.st_size / DIRBLKSIZ + * sizeof (int)); + for (i = 0; i < dp->dn_stat.st_size / DIRBLKSIZ; i++) + dp->dn->dirents[i] = -1; + dp->dn->dirents[ds->idx] = 1; + } + } + + diskfs_file_update (dp, 1); + + return 0; +} + +/* Following a lookup call for REMOVE, this removes the link from the + directory. DP is the directory being changed and DS is the cached + information returned from lookup. This call is only valid if the + directory has been locked continously since the call to lookup, and + only if that call succeeded. */ +error_t +diskfs_dirremove_hard (struct node *dp, struct dirstat *ds) +{ + assert (ds->type == REMOVE); + assert (ds->stat == HERE_TIS); + + assert (!diskfs_readonly); + + dp->dn_set_mtime = 1; + + if (ds->preventry == 0) + ds->entry->inode = 0; + else + { + assert ((vm_address_t) ds->entry - (vm_address_t) ds->preventry + == ds->preventry->rec_len); + ds->preventry->rec_len += ds->entry->rec_len; + } + + dp->dn_set_mtime = 1; + dp->dn->info.i_flags &= ~EXT2_BTREE_FL; + + munmap ((caddr_t) ds->mapbuf, ds->mapextent); + + /* If we are keeping count of this block, then keep the count up + to date. */ + if (dp->dn->dirents && dp->dn->dirents[ds->idx] != -1) + dp->dn->dirents[ds->idx]--; + + diskfs_file_update (dp, 1); + + return 0; +} + + +/* Following a lookup call for RENAME, this changes the inode number + on a directory entry. DP is the directory being changed; NP is + the new node being linked in; DP is the cached information returned + by lookup. This call is only valid if the directory has been locked + continuously since the call to lookup, and only if that call + succeeded. */ +error_t +diskfs_dirrewrite_hard (struct node *dp, struct node *np, struct dirstat *ds) +{ + assert (ds->type == RENAME); + assert (ds->stat == HERE_TIS); + + assert (!diskfs_readonly); + + ds->entry->inode = np->cache_id; + dp->dn_set_mtime = 1; + dp->dn->info.i_flags &= ~EXT2_BTREE_FL; + + munmap ((caddr_t) ds->mapbuf, ds->mapextent); + + diskfs_file_update (dp, 1); + + return 0; +} + +/* Tell if DP is an empty directory (has only "." and ".." entries). + This routine must be called from inside a catch_exception (). */ +int +diskfs_dirempty (struct node *dp, struct protid *cred) +{ + error_t err; + vm_address_t buf = 0, curoff; + struct ext2_dir_entry_2 *entry; + int hit = 0; /* Found something in the directory. */ + memory_object_t memobj = diskfs_get_filemap (dp, VM_PROT_READ); + + if (memobj == MACH_PORT_NULL) + /* XXX should reflect error properly. */ + return 0; + + err = vm_map (mach_task_self (), &buf, dp->dn_stat.st_size, 0, + 1, memobj, 0, 0, VM_PROT_READ, VM_PROT_READ, 0); + mach_port_deallocate (mach_task_self (), memobj); + assert (!err); + + if (! diskfs_check_readonly ()) + dp->dn_set_atime = 1; + + for (curoff = buf; + !hit && curoff < buf + dp->dn_stat.st_size; + curoff += entry->rec_len) + { + entry = (struct ext2_dir_entry_2 *) curoff; + + if (entry->inode != 0 + && (entry->name_len > 2 + || entry->name[0] != '.' + || (entry->name[1] != '.' + && entry->name[1] != '\0'))) + hit = 1; + } + + if (! diskfs_check_readonly ()) + dp->dn_set_atime = 1; + if (diskfs_synchronous) + diskfs_node_update (dp, 1); + + munmap ((caddr_t) buf, dp->dn_stat.st_size); + + return !hit; +} + +/* Make DS an invalid dirstat. */ +error_t +diskfs_drop_dirstat (struct node *dp, struct dirstat *ds) +{ + if (ds->type != LOOKUP) + { + assert (ds->mapbuf); + munmap ((caddr_t) ds->mapbuf, ds->mapextent); + ds->type = LOOKUP; + } + return 0; +} + + +/* Count the entries in directory block NB for directory DP and + write the answer down in its dirents array. As a side affect + fill BUF with the block. */ +static error_t +count_dirents (struct node *dp, int nb, char *buf) +{ + int amt; + char *offinblk; + struct ext2_dir_entry_2 *entry; + int count = 0; + error_t err; + + assert (dp->dn->dirents); + assert ((nb + 1) * DIRBLKSIZ <= dp->dn_stat.st_size); + + err = diskfs_node_rdwr (dp, buf, nb * DIRBLKSIZ, DIRBLKSIZ, 0, 0, &amt); + if (err) + return err; + assert (amt == DIRBLKSIZ); + + for (offinblk = buf; + offinblk < buf + DIRBLKSIZ; + offinblk += entry->rec_len) + { + entry = (struct ext2_dir_entry_2 *) offinblk; + if (entry->inode) + count++; + } + + assert (dp->dn->dirents[nb] == -1 || dp->dn->dirents[nb] == count); + dp->dn->dirents[nb] = count; + return 0; +} + +/* Returned directory entries are aligned to blocks this many bytes long. + Must be a power of two. */ +#define DIRENT_ALIGN 4 + +/* Implement the disikfs_get_directs callback as described in + <hurd/diskfs.h>. */ +error_t +diskfs_get_directs (struct node *dp, + int entry, + int nentries, + char **data, + u_int *datacnt, + vm_size_t bufsiz, + int *amt) +{ + int blkno; + int nblks; + int curentry; + char buf[DIRBLKSIZ]; + char *bufp; + int bufvalid; + error_t err; + int i; + char *datap; + struct ext2_dir_entry_2 *entryp; + int allocsize; + int checklen; + struct dirent *userp; + + nblks = dp->dn_stat.st_size/DIRBLKSIZ; + + if (!dp->dn->dirents) + { + dp->dn->dirents = malloc (nblks * sizeof (int)); + for (i = 0; i < nblks; i++) + dp->dn->dirents[i] = -1; + } + + /* Allocate enough space to hold the maximum we might return */ + if (!bufsiz || bufsiz > dp->dn_stat.st_size) + /* Allocate enough to return the entire directory. Since ext2's + directory format is different than the format used to return the + entries, we allocate enough to hold the on disk directory plus + whatever extra would be necessary in the worst-case. */ + { + /* The minimum size of an ext2fs directory entry. */ + size_t min_entry_size = EXT2_DIR_REC_LEN (0); + /* The minimum size of a returned dirent entry. The +1 is for '\0'. */ + size_t min_dirent_size = offsetof (struct dirent, d_name) + 1; + /* The maximum possible number of ext2fs dir entries in this dir. */ + size_t max_entries = dp->dn_stat.st_size / min_entry_size; + /* The maximum difference in size per directory entry. */ + size_t entry_extra = + DIRENT_ALIGN + + (min_dirent_size > min_entry_size + ? min_dirent_size - min_entry_size : 0); + + allocsize = round_page (dp->dn_stat.st_size + max_entries * entry_extra); + } + else + allocsize = round_page (bufsiz); + + if (allocsize > *datacnt) + *data = mmap (0, allocsize, PROT_READ|PROT_WRITE, MAP_ANON, 0, 0); + + /* Scan through the entries to find ENTRY. If we encounter + a -1 in the process then stop to fill it. When we run + off the end, ENTRY is too big. */ + curentry = 0; + bufvalid = 0; + for (blkno = 0; blkno < nblks; blkno++) + { + if (dp->dn->dirents[blkno] == -1) + { + err = count_dirents (dp, blkno, buf); + if (err) + return err; + bufvalid = 1; + } + + if (curentry + dp->dn->dirents[blkno] > entry) + /* ENTRY starts in this block. */ + break; + + curentry += dp->dn->dirents[blkno]; + + bufvalid = 0; + } + + if (blkno == nblks) + { + *datacnt = 0; + *amt = 0; + return 0; + } + + /* Set bufp appropriately */ + bufp = buf; + if (curentry != entry) + { + /* Look through the block to find out where to start, + setting bufp appropriately. */ + if (!bufvalid) + { + err = diskfs_node_rdwr (dp, buf, blkno * DIRBLKSIZ, DIRBLKSIZ, + 0, 0, &checklen); + if (err) + return err; + assert (checklen == DIRBLKSIZ); + bufvalid = 1; + } + for (i = 0, bufp = buf; + i < entry - curentry && bufp - buf < DIRBLKSIZ; + bufp += ((struct ext2_dir_entry_2 *)bufp)->rec_len, i++) + ; + /* Make sure we didn't run off the end. */ + assert (bufp - buf < DIRBLKSIZ); + } + + i = 0; + datap = *data; + + /* Copy the entries, one at a time. */ + while (((nentries == -1) || (i < nentries)) + && (!bufsiz || (datap - *data < bufsiz) ) + && blkno < nblks) + { + if (!bufvalid) + { + err = diskfs_node_rdwr (dp, buf, blkno * DIRBLKSIZ, DIRBLKSIZ, + 0, 0, &checklen); + if (err) + return err; + assert (checklen == DIRBLKSIZ); + bufvalid = 1; + bufp = buf; + } + + entryp = (struct ext2_dir_entry_2 *)bufp; + + if (entryp->inode) + { + int rec_len; + int name_len = entryp->name_len; + + userp = (struct dirent *) datap; + + /* Length is structure before the name + the name + '\0', all + padded to a four-byte alignment. */ + rec_len = + ((offsetof (struct dirent, d_name) + + name_len + 1 + + (DIRENT_ALIGN - 1)) + & ~(DIRENT_ALIGN - 1)); + + /* See if this record would run over the end of the return buffer. */ + if (bufsiz == 0) + /* It shouldn't ever, as we calculated the worst case size. */ + assert (datap + rec_len <= *data + allocsize); + else + /* It's ok if it does, just leave off returning this entry. */ + if (datap + rec_len > *data + allocsize) + break; + + userp->d_fileno = entryp->inode; + userp->d_reclen = rec_len; + userp->d_namlen = name_len; + +#if 0 + /* We don't bother to check the EXT2_FEATURE_INCOMPAT_FILETYPE + flag in the superblock, because in old filesystems the + file_type field is the high byte of the length field and is + always zero because names cannot be that long. */ + if (entryp->file_type < EXT2_FT_MAX) + userp->d_type = ext2_file_type[entryp->file_type]; + else + { + ext2_warning ("bad type %d in directory entry: " + "inode: %d offset: %d", + entryp->file_type, + dp->cache_id, + blkno * DIRBLKSIZ + bufp - buf); + userp->d_type = DT_UNKNOWN; + } +#else + /* XXX + For complex reasons it might not be correct to return + the filesystem's d_type value to the user. */ + userp->d_type = DT_UNKNOWN; +#endif + memcpy (userp->d_name, entryp->name, name_len); + userp->d_name[name_len] = '\0'; + + datap += rec_len; + i++; + } + + if (entryp->rec_len == 0) + { + ext2_warning ("zero length directory entry: inode: %d offset: %d", + dp->cache_id, + blkno * DIRBLKSIZ + bufp - buf); + return EIO; + } + + bufp += entryp->rec_len; + if (bufp - buf == DIRBLKSIZ) + { + blkno++; + bufvalid = 0; + } + else if (bufp - buf > DIRBLKSIZ) + { + ext2_warning ("directory entry too long: inode: %d offset: %d", + dp->cache_id, + blkno * DIRBLKSIZ + bufp - buf - entryp->rec_len); + return EIO; + } + } + + /* We've copied all we can. If we allocated our own array + but didn't fill all of it, then free whatever memory we didn't use. */ + if (allocsize > *datacnt) + { + if (round_page (datap - *data) < allocsize) + munmap ((caddr_t) (*data + round_page (datap - *data)), + allocsize - round_page (datap - *data)); + } + + /* Set variables for return */ + *datacnt = datap - *data; + *amt = i; + return 0; +} diff --git a/ext2fs/ext2_fs.h b/ext2fs/ext2_fs.h new file mode 100644 index 00000000..a5a19317 --- /dev/null +++ b/ext2fs/ext2_fs.h @@ -0,0 +1,628 @@ +/* + * linux/include/linux/ext2_fs.h + * + * Copyright (C) 1992, 1993, 1994, 1995 + * Remy Card (card@masi.ibp.fr) + * Laboratoire MASI - Institut Blaise Pascal + * Universite Pierre et Marie Curie (Paris VI) + * + * from + * + * linux/include/linux/minix_fs.h + * + * Copyright (C) 1991, 1992 Linus Torvalds + */ + +#ifndef _LINUX_EXT2_FS_H +#define _LINUX_EXT2_FS_H + +/* #include <linux/types.h> */ + +/* + * The second extended filesystem constants/structures + */ + +/* + * Define EXT2FS_DEBUG to produce debug messages + */ +#undef EXT2FS_DEBUG + +/* + * Define EXT2_PREALLOCATE to preallocate data blocks for expanding files + */ +#define EXT2_PREALLOCATE +#define EXT2_DEFAULT_PREALLOC_BLOCKS 8 + +/* + * The second extended file system version + */ +#define EXT2FS_DATE "95/08/09" +#define EXT2FS_VERSION "0.5b" + +/* + * Debug code + */ +#ifdef EXT2FS_DEBUG +# define ext2_debug(f, a...) { \ + printk ("EXT2-fs DEBUG (%s, %d): %s:", \ + __FILE__, __LINE__, __FUNCTION__); \ + printk (f, ## a); \ + } +#else +# define ext2_debug(f, a...) /**/ +#endif + +/* + * Special inodes numbers + */ +#define EXT2_BAD_INO 1 /* Bad blocks inode */ +#define EXT2_ROOT_INO 2 /* Root inode */ +#define EXT2_ACL_IDX_INO 3 /* ACL inode */ +#define EXT2_ACL_DATA_INO 4 /* ACL inode */ +#define EXT2_BOOT_LOADER_INO 5 /* Boot loader inode */ +#define EXT2_UNDEL_DIR_INO 6 /* Undelete directory inode */ + +/* First non-reserved inode for old ext2 filesystems */ +#define EXT2_GOOD_OLD_FIRST_INO 11 + +/* + * The second extended file system magic number + */ +#define EXT2_SUPER_MAGIC 0xEF53 + +/* + * Maximal count of links to a file + */ +#define EXT2_LINK_MAX 32000 + +/* + * Macro-instructions used to manage several block sizes + */ +#define EXT2_MIN_BLOCK_SIZE 1024 +#define EXT2_MAX_BLOCK_SIZE 4096 +#define EXT2_MIN_BLOCK_LOG_SIZE 10 +#ifdef __KERNEL__ +# define EXT2_BLOCK_SIZE(s) ((s)->s_blocksize) +#else +# define EXT2_BLOCK_SIZE(s) (EXT2_MIN_BLOCK_SIZE << (s)->s_log_block_size) +#endif +#define EXT2_ACLE_PER_BLOCK(s) (EXT2_BLOCK_SIZE(s) / sizeof (struct ext2_acl_entry)) +#define EXT2_ADDR_PER_BLOCK(s) (EXT2_BLOCK_SIZE(s) / sizeof (__u32)) +#ifdef __KERNEL__ +# define EXT2_BLOCK_SIZE_BITS(s) ((s)->s_blocksize_bits) +#else +# define EXT2_BLOCK_SIZE_BITS(s) ((s)->s_log_block_size + 10) +#endif +#ifdef __KERNEL__ +#define EXT2_ADDR_PER_BLOCK_BITS(s) ((s)->u.ext2_sb.s_addr_per_block_bits) +#define EXT2_INODE_SIZE(s) ((s)->u.ext2_sb.s_inode_size) +#define EXT2_FIRST_INO(s) ((s)->u.ext2_sb.s_first_ino) +#else +#define EXT2_INODE_SIZE(s) (((s)->s_rev_level == EXT2_GOOD_OLD_REV) ? \ + EXT2_GOOD_OLD_INODE_SIZE : \ + (s)->s_inode_size) +#define EXT2_FIRST_INO(s) (((s)->s_rev_level == EXT2_GOOD_OLD_REV) ? \ + EXT2_GOOD_OLD_FIRST_INO : \ + (s)->s_first_ino) +#endif + +/* + * Macro-instructions used to manage fragments + */ +#define EXT2_MIN_FRAG_SIZE 1024 +#define EXT2_MAX_FRAG_SIZE 4096 +#define EXT2_MIN_FRAG_LOG_SIZE 10 +#ifdef __KERNEL__ +# define EXT2_FRAG_SIZE(s) ((s)->u.ext2_sb.s_frag_size) +# define EXT2_FRAGS_PER_BLOCK(s) ((s)->u.ext2_sb.s_frags_per_block) +#else +# define EXT2_FRAG_SIZE(s) (EXT2_MIN_FRAG_SIZE << (s)->s_log_frag_size) +# define EXT2_FRAGS_PER_BLOCK(s) (EXT2_BLOCK_SIZE(s) / EXT2_FRAG_SIZE(s)) +#endif + +/* + * ACL structures + */ +struct ext2_acl_header /* Header of Access Control Lists */ +{ + __u32 aclh_size; + __u32 aclh_file_count; + __u32 aclh_acle_count; + __u32 aclh_first_acle; +}; + +struct ext2_acl_entry /* Access Control List Entry */ +{ + __u32 acle_size; + __u16 acle_perms; /* Access permissions */ + __u16 acle_type; /* Type of entry */ + __u16 acle_tag; /* User or group identity */ + __u16 acle_pad1; + __u32 acle_next; /* Pointer on next entry for the */ + /* same inode or on next free entry */ +}; + +/* + * Structure of a blocks group descriptor + */ +struct ext2_group_desc +{ + __u32 bg_block_bitmap; /* Blocks bitmap block */ + __u32 bg_inode_bitmap; /* Inodes bitmap block */ + __u32 bg_inode_table; /* Inodes table block */ + __u16 bg_free_blocks_count; /* Free blocks count */ + __u16 bg_free_inodes_count; /* Free inodes count */ + __u16 bg_used_dirs_count; /* Directories count */ + __u16 bg_pad; + __u32 bg_reserved[3]; +}; + +/* + * Macro-instructions used to manage group descriptors + */ +#ifdef __KERNEL__ +# define EXT2_BLOCKS_PER_GROUP(s) ((s)->u.ext2_sb.s_blocks_per_group) +# define EXT2_DESC_PER_BLOCK(s) ((s)->u.ext2_sb.s_desc_per_block) +# define EXT2_INODES_PER_GROUP(s) ((s)->u.ext2_sb.s_inodes_per_group) +# define EXT2_DESC_PER_BLOCK_BITS(s) ((s)->u.ext2_sb.s_desc_per_block_bits) +#else +# define EXT2_BLOCKS_PER_GROUP(s) ((s)->s_blocks_per_group) +# define EXT2_DESC_PER_BLOCK(s) (EXT2_BLOCK_SIZE(s) / sizeof (struct ext2_group_desc)) +# define EXT2_INODES_PER_GROUP(s) ((s)->s_inodes_per_group) +#endif + +/* + * Constants relative to the data blocks + */ +#define EXT2_NDIR_BLOCKS 12 +#define EXT2_IND_BLOCK EXT2_NDIR_BLOCKS +#define EXT2_DIND_BLOCK (EXT2_IND_BLOCK + 1) +#define EXT2_TIND_BLOCK (EXT2_DIND_BLOCK + 1) +#define EXT2_N_BLOCKS (EXT2_TIND_BLOCK + 1) + +/* + * Inode flags + */ +#define EXT2_SECRM_FL 0x00000001 /* Secure deletion */ +#define EXT2_UNRM_FL 0x00000002 /* Undelete */ +#define EXT2_COMPR_FL 0x00000004 /* Compress file */ +#define EXT2_SYNC_FL 0x00000008 /* Synchronous updates */ +#define EXT2_IMMUTABLE_FL 0x00000010 /* Immutable file */ +#define EXT2_APPEND_FL 0x00000020 /* writes to file may only append */ +#define EXT2_NODUMP_FL 0x00000040 /* do not dump file */ +#define EXT2_NOATIME_FL 0x00000080 /* do not update atime */ +/* Reserved for compression usage... */ +#define EXT2_DIRTY_FL 0x00000100 +#define EXT2_COMPRBLK_FL 0x00000200 /* One or more compressed clusters */ +#define EXT2_NOCOMP_FL 0x00000400 /* Don't compress */ +#define EXT2_ECOMPR_FL 0x00000800 /* Compression error */ +/* End compression flags --- maybe not all used */ +#define EXT2_BTREE_FL 0x00001000 /* btree format dir */ +#define EXT2_RESERVED_FL 0x80000000 /* reserved for ext2 lib */ + +#define EXT2_FL_USER_VISIBLE 0x00001FFF /* User visible flags */ +#define EXT2_FL_USER_MODIFIABLE 0x000000FF /* User modifiable flags */ + +/* + * ioctl commands + */ +#define EXT2_IOC_GETFLAGS _IOR('f', 1, long) +#define EXT2_IOC_SETFLAGS _IOW('f', 2, long) +#define EXT2_IOC_GETVERSION _IOR('v', 1, long) +#define EXT2_IOC_SETVERSION _IOW('v', 2, long) + +/* + * Structure of an inode on the disk + */ +struct ext2_inode { + __u16 i_mode; /* File mode */ + __u16 i_uid; /* Low 16 bits of Owner Uid */ + __u32 i_size; /* Size in bytes */ + __u32 i_atime; /* Access time */ + __u32 i_ctime; /* Creation time */ + __u32 i_mtime; /* Modification time */ + __u32 i_dtime; /* Deletion Time */ + __u16 i_gid; /* Low 16 bits of Group Id */ + __u16 i_links_count; /* Links count */ + __u32 i_blocks; /* Blocks count */ + __u32 i_flags; /* File flags */ + union { + struct { + __u32 l_i_reserved1; + } linux1; + struct { + __u32 h_i_translator; + } hurd1; + struct { + __u32 m_i_reserved1; + } masix1; + } osd1; /* OS dependent 1 */ + __u32 i_block[EXT2_N_BLOCKS];/* Pointers to blocks */ + __u32 i_generation; /* File version (for NFS) */ + __u32 i_file_acl; /* File ACL */ + __u32 i_dir_acl; /* Directory ACL */ + __u32 i_faddr; /* Fragment address */ + union { + struct { + __u8 l_i_frag; /* Fragment number */ + __u8 l_i_fsize; /* Fragment size */ + __u16 i_pad1; + __u16 l_i_uid_high; /* these 2 fields */ + __u16 l_i_gid_high; /* were reserved2[0] */ + __u32 l_i_reserved2; + } linux2; + struct { + __u8 h_i_frag; /* Fragment number */ + __u8 h_i_fsize; /* Fragment size */ + __u16 h_i_mode_high; + __u16 h_i_uid_high; + __u16 h_i_gid_high; + __u32 h_i_author; + } hurd2; + struct { + __u8 m_i_frag; /* Fragment number */ + __u8 m_i_fsize; /* Fragment size */ + __u16 m_pad1; + __u32 m_i_reserved2[2]; + } masix2; + } osd2; /* OS dependent 2 */ +}; + +#define i_size_high i_dir_acl + +#if defined(__KERNEL__) || defined(__linux__) +#define i_reserved1 osd1.linux1.l_i_reserved1 +#define i_frag osd2.linux2.l_i_frag +#define i_fsize osd2.linux2.l_i_fsize +#define i_uid_low i_uid +#define i_gid_low i_gid +#define i_uid_high osd2.linux2.l_i_uid_high +#define i_gid_high osd2.linux2.l_i_gid_high +#define i_reserved2 osd2.linux2.l_i_reserved2 +#endif + +#ifdef __hurd__ +#define i_translator osd1.hurd1.h_i_translator +#define i_frag osd2.hurd2.h_i_frag; +#define i_fsize osd2.hurd2.h_i_fsize; +#define i_uid_high osd2.hurd2.h_i_uid_high +#define i_gid_high osd2.hurd2.h_i_gid_high +#define i_author osd2.hurd2.h_i_author +#endif + +#ifdef __masix__ +#define i_reserved1 osd1.masix1.m_i_reserved1 +#define i_frag osd2.masix2.m_i_frag +#define i_fsize osd2.masix2.m_i_fsize +#define i_reserved2 osd2.masix2.m_i_reserved2 +#endif + +/* + * File system states + */ +#define EXT2_VALID_FS 0x0001 /* Unmounted cleanly */ +#define EXT2_ERROR_FS 0x0002 /* Errors detected */ + +/* + * Mount flags + */ +#define EXT2_MOUNT_CHECK_NORMAL 0x0001 /* Do some more checks */ +#define EXT2_MOUNT_CHECK_STRICT 0x0002 /* Do again more checks */ +#define EXT2_MOUNT_CHECK (EXT2_MOUNT_CHECK_NORMAL | \ + EXT2_MOUNT_CHECK_STRICT) +#define EXT2_MOUNT_GRPID 0x0004 /* Create files with directory's group */ +#define EXT2_MOUNT_DEBUG 0x0008 /* Some debugging messages */ +#define EXT2_MOUNT_ERRORS_CONT 0x0010 /* Continue on errors */ +#define EXT2_MOUNT_ERRORS_RO 0x0020 /* Remount fs ro on errors */ +#define EXT2_MOUNT_ERRORS_PANIC 0x0040 /* Panic on errors */ +#define EXT2_MOUNT_MINIX_DF 0x0080 /* Mimics the Minix statfs */ +#define EXT2_MOUNT_NO_UID32 0x0200 /* Disable 32-bit UIDs */ + +#define clear_opt(o, opt) o &= ~EXT2_MOUNT_##opt +#define set_opt(o, opt) o |= EXT2_MOUNT_##opt +#define test_opt(sb, opt) ((sb)->u.ext2_sb.s_mount_opt & \ + EXT2_MOUNT_##opt) +/* + * Maximal mount counts between two filesystem checks + */ +#define EXT2_DFL_MAX_MNT_COUNT 20 /* Allow 20 mounts */ +#define EXT2_DFL_CHECKINTERVAL 0 /* Don't use interval check */ + +/* + * Behaviour when detecting errors + */ +#define EXT2_ERRORS_CONTINUE 1 /* Continue execution */ +#define EXT2_ERRORS_RO 2 /* Remount fs read-only */ +#define EXT2_ERRORS_PANIC 3 /* Panic */ +#define EXT2_ERRORS_DEFAULT EXT2_ERRORS_CONTINUE + +/* + * Structure of the super block + */ +struct ext2_super_block { + __u32 s_inodes_count; /* Inodes count */ + __u32 s_blocks_count; /* Blocks count */ + __u32 s_r_blocks_count; /* Reserved blocks count */ + __u32 s_free_blocks_count; /* Free blocks count */ + __u32 s_free_inodes_count; /* Free inodes count */ + __u32 s_first_data_block; /* First Data Block */ + __u32 s_log_block_size; /* Block size */ + __s32 s_log_frag_size; /* Fragment size */ + __u32 s_blocks_per_group; /* # Blocks per group */ + __u32 s_frags_per_group; /* # Fragments per group */ + __u32 s_inodes_per_group; /* # Inodes per group */ + __u32 s_mtime; /* Mount time */ + __u32 s_wtime; /* Write time */ + __u16 s_mnt_count; /* Mount count */ + __s16 s_max_mnt_count; /* Maximal mount count */ + __u16 s_magic; /* Magic signature */ + __u16 s_state; /* File system state */ + __u16 s_errors; /* Behaviour when detecting errors */ + __u16 s_minor_rev_level; /* minor revision level */ + __u32 s_lastcheck; /* time of last check */ + __u32 s_checkinterval; /* max. time between checks */ + __u32 s_creator_os; /* OS */ + __u32 s_rev_level; /* Revision level */ + __u16 s_def_resuid; /* Default uid for reserved blocks */ + __u16 s_def_resgid; /* Default gid for reserved blocks */ + /* + * These fields are for EXT2_DYNAMIC_REV superblocks only. + * + * Note: the difference between the compatible feature set and + * the incompatible feature set is that if there is a bit set + * in the incompatible feature set that the kernel doesn't + * know about, it should refuse to mount the filesystem. + * + * e2fsck's requirements are more strict; if it doesn't know + * about a feature in either the compatible or incompatible + * feature set, it must abort and not try to meddle with + * things it doesn't understand... + */ + __u32 s_first_ino; /* First non-reserved inode */ + __u16 s_inode_size; /* size of inode structure */ + __u16 s_block_group_nr; /* block group # of this superblock */ + __u32 s_feature_compat; /* compatible feature set */ + __u32 s_feature_incompat; /* incompatible feature set */ + __u32 s_feature_ro_compat; /* readonly-compatible feature set */ + __u8 s_uuid[16]; /* 128-bit uuid for volume */ + char s_volume_name[16]; /* volume name */ + char s_last_mounted[64]; /* directory where last mounted */ + __u32 s_algorithm_usage_bitmap; /* For compression */ + /* + * Performance hints. Directory preallocation should only + * happen if the EXT2_COMPAT_PREALLOC flag is on. + */ + __u8 s_prealloc_blocks; /* Nr of blocks to try to preallocate*/ + __u8 s_prealloc_dir_blocks; /* Nr to preallocate for dirs */ + __u16 s_padding1; + __u32 s_reserved[204]; /* Padding to the end of the block */ +}; + +#ifdef __KERNEL__ +#define EXT2_SB(sb) (&((sb)->u.ext2_sb)) +#else +/* Assume that user mode programs are passing in an ext2fs superblock, not + * a kernel struct super_block. This will allow us to call the feature-test + * macros from user land. */ +#define EXT2_SB(sb) (sb) +#endif + +/* + * Codes for operating systems + */ +#define EXT2_OS_LINUX 0 +#define EXT2_OS_HURD 1 +#define EXT2_OS_MASIX 2 +#define EXT2_OS_FREEBSD 3 +#define EXT2_OS_LITES 4 + +/* + * Revision levels + */ +#define EXT2_GOOD_OLD_REV 0 /* The good old (original) format */ +#define EXT2_DYNAMIC_REV 1 /* V2 format w/ dynamic inode sizes */ + +#define EXT2_CURRENT_REV EXT2_GOOD_OLD_REV +#define EXT2_MAX_SUPP_REV EXT2_DYNAMIC_REV + +#define EXT2_GOOD_OLD_INODE_SIZE 128 + +/* + * Feature set definitions + */ + +#define EXT2_HAS_COMPAT_FEATURE(sb,mask) \ + ( EXT2_SB(sb)->s_feature_compat & (mask) ) +#define EXT2_HAS_RO_COMPAT_FEATURE(sb,mask) \ + ( EXT2_SB(sb)->s_feature_ro_compat & (mask) ) +#define EXT2_HAS_INCOMPAT_FEATURE(sb,mask) \ + ( EXT2_SB(sb)->s_feature_incompat & (mask) ) + +#define EXT2_FEATURE_COMPAT_DIR_PREALLOC 0x0001 + +#define EXT2_FEATURE_RO_COMPAT_SPARSE_SUPER 0x0001 +#define EXT2_FEATURE_RO_COMPAT_LARGE_FILE 0x0002 +#define EXT2_FEATURE_RO_COMPAT_BTREE_DIR 0x0004 + +#define EXT2_FEATURE_INCOMPAT_COMPRESSION 0x0001 +#define EXT2_FEATURE_INCOMPAT_FILETYPE 0x0002 + +#define EXT2_FEATURE_COMPAT_SUPP 0 +#define EXT2_FEATURE_INCOMPAT_SUPP EXT2_FEATURE_INCOMPAT_FILETYPE +#define EXT2_FEATURE_RO_COMPAT_SUPP (EXT2_FEATURE_RO_COMPAT_SPARSE_SUPER| \ + EXT2_FEATURE_RO_COMPAT_LARGE_FILE| \ + EXT2_FEATURE_RO_COMPAT_BTREE_DIR) + +/* + * Default values for user and/or group using reserved blocks + */ +#define EXT2_DEF_RESUID 0 +#define EXT2_DEF_RESGID 0 + +/* + * Structure of a directory entry + */ +#define EXT2_NAME_LEN 255 + +struct ext2_dir_entry { + __u32 inode; /* Inode number */ + __u16 rec_len; /* Directory entry length */ + __u16 name_len; /* Name length */ + char name[EXT2_NAME_LEN]; /* File name */ +}; + +/* + * The new version of the directory entry. Since EXT2 structures are + * stored in intel byte order, and the name_len field could never be + * bigger than 255 chars, it's safe to reclaim the extra byte for the + * file_type field. + */ +struct ext2_dir_entry_2 { + __u32 inode; /* Inode number */ + __u16 rec_len; /* Directory entry length */ + __u8 name_len; /* Name length */ + __u8 file_type; + char name[EXT2_NAME_LEN]; /* File name */ +}; + +/* + * Ext2 directory file types. Only the low 3 bits are used. The + * other bits are reserved for now. + */ +#define EXT2_FT_UNKNOWN 0 +#define EXT2_FT_REG_FILE 1 +#define EXT2_FT_DIR 2 +#define EXT2_FT_CHRDEV 3 +#define EXT2_FT_BLKDEV 4 +#define EXT2_FT_FIFO 5 +#define EXT2_FT_SOCK 6 +#define EXT2_FT_SYMLINK 7 + +#define EXT2_FT_MAX 8 + +/* + * EXT2_DIR_PAD defines the directory entries boundaries + * + * NOTE: It must be a multiple of 4 + */ +#define EXT2_DIR_PAD 4 +#define EXT2_DIR_ROUND (EXT2_DIR_PAD - 1) +#define EXT2_DIR_REC_LEN(name_len) (((name_len) + 8 + EXT2_DIR_ROUND) & \ + ~EXT2_DIR_ROUND) + +#ifdef __KERNEL__ +/* + * Function prototypes + */ + +/* + * Ok, these declarations are also in <linux/kernel.h> but none of the + * ext2 source programs needs to include it so they are duplicated here. + */ +# define NORET_TYPE /**/ +# define ATTRIB_NORET __attribute__((noreturn)) +# define NORET_AND noreturn, + +/* acl.c */ +extern int ext2_permission (struct inode *, int); + +/* balloc.c */ +extern int ext2_group_sparse(int group); +extern int ext2_new_block (const struct inode *, unsigned long, + __u32 *, __u32 *, int *); +extern void ext2_free_blocks (const struct inode *, unsigned long, + unsigned long); +extern unsigned long ext2_count_free_blocks (struct super_block *); +extern void ext2_check_blocks_bitmap (struct super_block *); +extern struct ext2_group_desc * ext2_get_group_desc(struct super_block * sb, + unsigned int block_group, + struct buffer_head ** bh); + +/* bitmap.c */ +extern unsigned long ext2_count_free (struct buffer_head *, unsigned); + +/* dir.c */ +extern int ext2_check_dir_entry (const char *, struct inode *, + struct ext2_dir_entry_2 *, struct buffer_head *, + unsigned long); + +/* file.c */ +extern int ext2_read (struct inode *, struct file *, char *, int); +extern int ext2_write (struct inode *, struct file *, char *, int); + +/* fsync.c */ +extern int ext2_sync_file (struct file *, struct dentry *); + +/* ialloc.c */ +extern struct inode * ext2_new_inode (const struct inode *, int, int *); +extern void ext2_free_inode (struct inode *); +extern unsigned long ext2_count_free_inodes (struct super_block *); +extern void ext2_check_inodes_bitmap (struct super_block *); + +/* inode.c */ +extern long ext2_bmap (struct inode *, long); +extern int ext2_get_block (struct inode *, long, struct buffer_head *, int); + +extern struct buffer_head * ext2_getblk (struct inode *, long, int, int *); +extern int ext2_getblk_block (struct inode *, long, int, int *, int *); +extern struct buffer_head * ext2_bread (struct inode *, int, int, int *); + +extern int ext2_getcluster (struct inode * inode, long block); +extern void ext2_read_inode (struct inode *); +extern void ext2_write_inode (struct inode *); +extern void ext2_put_inode (struct inode *); +extern void ext2_delete_inode (struct inode *); +extern int ext2_sync_inode (struct inode *); +extern void ext2_discard_prealloc (struct inode *); + +/* ioctl.c */ +extern int ext2_ioctl (struct inode *, struct file *, unsigned int, + unsigned long); + +/* namei.c */ +extern void ext2_release (struct inode *, struct file *); +extern struct dentry *ext2_lookup (struct inode *, struct dentry *); +extern int ext2_create (struct inode *,struct dentry *,int); +extern int ext2_mkdir (struct inode *,struct dentry *,int); +extern int ext2_rmdir (struct inode *,struct dentry *); +extern int ext2_unlink (struct inode *,struct dentry *); +extern int ext2_symlink (struct inode *,struct dentry *,const char *); +extern int ext2_link (struct dentry *, struct inode *, struct dentry *); +extern int ext2_mknod (struct inode *, struct dentry *, int, int); +extern int ext2_rename (struct inode *, struct dentry *, + struct inode *, struct dentry *); + +/* super.c */ +extern void ext2_error (struct super_block *, const char *, const char *, ...) + __attribute__ ((format (printf, 3, 4))); +extern NORET_TYPE void ext2_panic (struct super_block *, const char *, + const char *, ...) + __attribute__ ((NORET_AND format (printf, 3, 4))); +extern void ext2_warning (struct super_block *, const char *, const char *, ...) + __attribute__ ((format (printf, 3, 4))); +extern void ext2_put_super (struct super_block *); +extern void ext2_write_super (struct super_block *); +extern int ext2_remount (struct super_block *, int *, char *); +extern struct super_block * ext2_read_super (struct super_block *,void *,int); +extern int ext2_statfs (struct super_block *, struct statfs *, int); + +/* truncate.c */ +extern void ext2_truncate (struct inode *); + +/* + * Inodes and files operations + */ + +/* dir.c */ +extern struct inode_operations ext2_dir_inode_operations; + +/* file.c */ +extern struct inode_operations ext2_file_inode_operations; + +/* symlink.c */ +extern struct inode_operations ext2_symlink_inode_operations; +extern struct inode_operations ext2_fast_symlink_inode_operations; + +#endif /* __KERNEL__ */ + +#endif /* _LINUX_EXT2_FS_H */ diff --git a/ext2fs/ext2_fs_i.h b/ext2fs/ext2_fs_i.h new file mode 100644 index 00000000..72bcd5c0 --- /dev/null +++ b/ext2fs/ext2_fs_i.h @@ -0,0 +1,42 @@ +/* + * linux/include/linux/ext2_fs_i.h + * + * Copyright (C) 1992, 1993, 1994, 1995 + * Remy Card (card@masi.ibp.fr) + * Laboratoire MASI - Institut Blaise Pascal + * Universite Pierre et Marie Curie (Paris VI) + * + * from + * + * linux/include/linux/minix_fs_i.h + * + * Copyright (C) 1991, 1992 Linus Torvalds + */ + +#ifndef _LINUX_EXT2_FS_I +#define _LINUX_EXT2_FS_I + +/* + * second extended file system inode data in memory + */ +struct ext2_inode_info { + __u32 i_data[15]; + __u32 i_flags; + __u32 i_faddr; + __u8 i_frag_no; + __u8 i_frag_size; + __u16 i_osync; + __u32 i_file_acl; + __u32 i_dir_acl; + __u32 i_dtime; + __u32 not_used_1; /* FIX: not used/ 2.2 placeholder */ + __u32 i_block_group; + __u32 i_next_alloc_block; + __u32 i_next_alloc_goal; + __u32 i_prealloc_block; + __u32 i_prealloc_count; + __u32 i_high_size; + int i_new_inode:1; /* Is a freshly allocated inode */ +}; + +#endif /* _LINUX_EXT2_FS_I */ diff --git a/ext2fs/ext2fs.c b/ext2fs/ext2fs.c new file mode 100644 index 00000000..ef22a940 --- /dev/null +++ b/ext2fs/ext2fs.c @@ -0,0 +1,216 @@ +/* Main entry point for the ext2 file system translator + + Copyright (C) 1994, 95, 96, 97, 98, 99 Free Software Foundation, Inc. + + Converted for ext2fs by Miles Bader <miles@gnu.ai.mit.edu> + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2, or (at + your option) any later version. + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ + +#include <stdarg.h> +#include <stdio.h> +#include <device/device.h> +#include <fcntl.h> +#include <unistd.h> +#include <stdlib.h> +#include <string.h> +#include <error.h> +#include <argz.h> +#include <argp.h> +#include <hurd/store.h> +#include <version.h> +#include "ext2fs.h" + +/* ---------------------------------------------------------------- */ + +int diskfs_link_max = EXT2_LINK_MAX; +int diskfs_name_max = EXT2_NAME_LEN; +int diskfs_maxsymlinks = 8; +int diskfs_shortcut_symlink = 1; +int diskfs_shortcut_chrdev = 1; +int diskfs_shortcut_blkdev = 1; +int diskfs_shortcut_fifo = 1; +int diskfs_shortcut_ifsock = 1; + +char *diskfs_server_name = "ext2fs"; +char *diskfs_server_version = HURD_VERSION; +char *diskfs_extra_version = "GNU Hurd; ext2 " EXT2FS_VERSION; + +int diskfs_synchronous = 0; + +struct node *diskfs_root_node; + +struct store *store = 0; +struct store_parsed *store_parsed = 0; + +char *diskfs_disk_name = 0; + +#ifdef EXT2FS_DEBUG +int ext2_debug_flag = 0; +#endif + +/* Ext2fs-specific options. */ +static const struct argp_option +options[] = +{ + {"debug", 'D', 0, 0, "Toggle debugging output" +#ifndef EXT2FS_DEBUG + " (not compiled in)" +#endif + }, + {"sblock", 'S', "BLOCKNO", 0, + "Use alternate superblock location (1kb blocks)"}, + {0} +}; + +/* Parse a command line option. */ +static error_t +parse_opt (int key, char *arg, struct argp_state *state) +{ + /* We save our parsed values in this structure, hung off STATE->hook. + Only after parsing all options successfully will we use these values. */ + struct + { + int debug_flag; + unsigned int sb_block; + } *values = state->hook; + + switch (key) + { + case 'D': + values->debug_flag = 1; + break; + case 'S': + values->sb_block = strtoul (arg, &arg, 0); + if (!arg || *arg != '\0') + { + argp_error (state, "invalid number for --sblock"); + return EINVAL; + } + break; + + case ARGP_KEY_INIT: + state->child_inputs[0] = state->input; + values = malloc (sizeof *values); + if (values == 0) + return ENOMEM; + state->hook = values; + bzero (values, sizeof *values); + values->sb_block = SBLOCK_BLOCK; + break; + + case ARGP_KEY_SUCCESS: + /* All options parse successfully, so implement ours if possible. */ + if (values->debug_flag) + { +#ifdef EXT2FS_DEBUG + ext2_debug_flag = !ext2_debug_flag; +#else + argp_failure (state, 2, 0, "debugging support not compiled in"); + return EINVAL; +#endif + } + + break; + + default: + return ARGP_ERR_UNKNOWN; + } + return 0; +} + +/* Override the standard diskfs routine so we can add our own output. */ +error_t +diskfs_append_args (char **argz, unsigned *argz_len) +{ + error_t err; + + /* Get the standard things. */ + err = diskfs_append_std_options (argz, argz_len); + +#ifdef EXT2FS_DEBUG + if (!err && ext2_debug_flag) + err = argz_add (argz, argz_len, "--debug"); +#endif + if (! err) + err = store_parsed_append_args (store_parsed, argz, argz_len); + + return err; +} + +/* Add our startup arguments to the standard diskfs set. */ +static const struct argp_child startup_children[] = + {{&diskfs_store_startup_argp}, {0}}; +static struct argp startup_argp = {options, parse_opt, 0, 0, startup_children}; + +/* Similarly at runtime. */ +static const struct argp_child runtime_children[] = + {{&diskfs_std_runtime_argp}, {0}}; +static struct argp runtime_argp = {options, parse_opt, 0, 0, runtime_children}; + +struct argp *diskfs_runtime_argp = (struct argp *)&runtime_argp; + +int +main (int argc, char **argv) +{ + error_t err; + mach_port_t bootstrap; + + /* Initialize the diskfs library, parse arguments, and open the store. + This starts the first diskfs thread for us. */ + store = diskfs_init_main (&startup_argp, argc, argv, + &store_parsed, &bootstrap); + + if (store->size < SBLOCK_OFFS + SBLOCK_SIZE) + ext2_panic ("device too small for superblock (%ld bytes)", store->size); + if (store->log2_blocks_per_page < 0) + ext2_panic ("device block size (%u) greater than page size (%d)", + store->block_size, vm_page_size); + + /* Map the entire disk. */ + create_disk_pager (); + + pokel_init (&global_pokel, diskfs_disk_pager, disk_image); + + get_hypermetadata(); + + inode_init (); + + /* Set diskfs_root_node to the root inode. */ + err = diskfs_cached_lookup (EXT2_ROOT_INO, &diskfs_root_node); + if (err) + ext2_panic ("can't get root: %s", strerror (err)); + else if ((diskfs_root_node->dn_stat.st_mode & S_IFMT) == 0) + ext2_panic ("no root node!"); + mutex_unlock (&diskfs_root_node->lock); + + /* Now that we are all set up to handle requests, and diskfs_root_node is + set properly, it is safe to export our fsys control port to the + outside world. */ + diskfs_startup_diskfs (bootstrap, 0); + + /* and so we die, leaving others to do the real work. */ + cthread_exit (0); + /* NOTREACHED */ + return 0; +} + +error_t +diskfs_reload_global_state () +{ + pokel_flush (&global_pokel); + pager_flush (diskfs_disk_pager, 1); + get_hypermetadata (); + return 0; +} diff --git a/ext2fs/ext2fs.h b/ext2fs/ext2fs.h new file mode 100644 index 00000000..8e73ae0a --- /dev/null +++ b/ext2fs/ext2fs.h @@ -0,0 +1,445 @@ +/* Common definitions for the ext2 filesystem translator + + Copyright (C) 1995, 1996, 1999 Free Software Foundation, Inc. + + Written by Miles Bader <miles@gnu.ai.mit.edu> + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2, or (at + your option) any later version. + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ + +#include <mach.h> +#include <hurd.h> +#include <hurd/ports.h> +#include <hurd/pager.h> +#include <hurd/fshelp.h> +#include <hurd/iohelp.h> +#include <hurd/diskfs.h> +#include <assert.h> +#include <rwlock.h> +#include <sys/mman.h> + +#define __hurd__ /* Enable some hurd-specific fields. */ + +/* Types used by the ext2 header files. */ +typedef u_int32_t __u32; +typedef int32_t __s32; +typedef u_int16_t __u16; +typedef int16_t __s16; +typedef u_int8_t __u8; +typedef int8_t __s8; + +#include "ext2_fs.h" +#include "ext2_fs_i.h" + +#define i_mode_high osd2.hurd2.h_i_mode_high /* missing from ext2_fs.h */ + + +/* If ext2_fs.h defined a debug routine, undef it and use our own. */ +#undef ext2_debug + +#ifdef EXT2FS_DEBUG +extern int ext2_debug_flag; +#define ext2_debug(f, a...) \ + do { if (ext2_debug_flag) printf ("ext2fs: (debug) %s: " f "\n", __FUNCTION__ , ## a); } while (0) +#else +#define ext2_debug(f, a...) (void)0 +#endif + +#undef __hurd__ + +/* Define this if memory objects should not be cached by the kernel. + Normally, don't define it, but defining it causes a much greater rate + of paging requests, which may be helpful in catching bugs. */ + +#undef DONT_CACHE_MEMORY_OBJECTS + +int printf (const char *fmt, ...); + +/* A block number. */ +typedef __u32 block_t; + +/* ---------------------------------------------------------------- */ + +struct poke +{ + vm_offset_t offset; + vm_size_t length; + struct poke *next; +}; + +struct pokel +{ + struct poke *pokes, *free_pokes; + spin_lock_t lock; + struct pager *pager; + void *image; +}; + +void pokel_init (struct pokel *pokel, struct pager *pager, void *image); +/* Clean up any state associated with POKEL (but don't free POKEL). */ +void pokel_finalize (struct pokel *pokel); + +/* Remember that data here on the disk has been modified. */ +void pokel_add (struct pokel *pokel, void *loc, vm_size_t length); + +/* Sync all the modified pieces of disk */ +void pokel_sync (struct pokel *pokel, int wait); + +/* Flush (that is, drop on the ground) all pending pokes in POKEL. */ +void pokel_flush (struct pokel *pokel); + +/* Transfer all regions from FROM to POKEL, which must have the same pager. */ +void pokel_inherit (struct pokel *pokel, struct pokel *from); + +#ifndef EXT2FS_EI +#define EXT2FS_EI extern inline +#endif + +/* ---------------------------------------------------------------- */ +/* Bitmap routines. */ + +#include <stdint.h> + +/* Returns TRUE if bit NUM is set in BITMAP. */ +EXT2FS_EI int +test_bit (unsigned num, char *bitmap) +{ + const uint32_t *const bw = (uint32_t *) bitmap + (num >> 5); + const uint_fast32_t mask = 1 << (num & 31); + return *bw & mask; +} + +/* Sets bit NUM in BITMAP, and returns the previous state of the bit. Unlike + the linux version, this function is NOT atomic! */ +EXT2FS_EI int +set_bit (unsigned num, char *bitmap) +{ + uint32_t *const bw = (uint32_t *) bitmap + (num >> 5); + const uint_fast32_t mask = 1 << (num & 31); + return (*bw & mask) ?: (*bw |= mask, 0); +} + +/* Clears bit NUM in BITMAP, and returns the previous state of the bit. + Unlike the linux version, this function is NOT atomic! */ +EXT2FS_EI int +clear_bit (unsigned num, char *bitmap) +{ + uint32_t *const bw = (uint32_t *) bitmap + (num >> 5); + const uint_fast32_t mask = 1 << (num & 31); + return (*bw & mask) ? (*bw &= ~mask, mask) : 0; +} + +/* ---------------------------------------------------------------- */ + +/* ext2fs specific per-file data. */ +struct disknode +{ + /* For a directory, this array holds the number of directory entries in + each DIRBLKSIZE piece of the directory. */ + int *dirents; + + /* Links on hash list. */ + struct node *hnext, **hprevp; + + /* Lock to lock while fiddling with this inode's block allocation info. */ + struct rwlock alloc_lock; + + /* Where changes to our indirect blocks are added. */ + struct pokel indir_pokel; + + /* Random extra info used by the ext2 routines. */ + struct ext2_inode_info info; + + /* This file's pager. */ + struct pager *pager; + + /* True if the last page of the file has been made writable, but is only + partially allocated. */ + int last_page_partially_writable; + + /* Index to start a directory lookup at. */ + int dir_idx; +}; + +struct user_pager_info +{ + enum pager_type + { + DISK, + FILE_DATA, + } type; + struct node *node; + vm_prot_t max_prot; +}; + +/* ---------------------------------------------------------------- */ +/* pager.c */ + +#include <hurd/diskfs-pager.h> + +/* Set up the disk pager. */ +void create_disk_pager (void); + +/* Call this when we should turn off caching so that unused memory object + ports get freed. */ +void drop_pager_softrefs (struct node *node); + +/* Call this when we should turn on caching because it's no longer + important for unused memory object ports to get freed. */ +void allow_pager_softrefs (struct node *node); + +/* Invalidate any pager data associated with NODE. */ +void flush_node_pager (struct node *node); + +/* ---------------------------------------------------------------- */ + +/* The physical media. */ +extern struct store *store; +/* What the user specified. */ +extern struct store_parsed *store_parsed; + +/* Mapped image of the disk. */ +extern void *disk_image; + +/* Our in-core copy of the super-block (pointer into the disk_image). */ +struct ext2_super_block *sblock; +/* True if sblock has been modified. */ +int sblock_dirty; + +/* Where the super-block is located on disk (at min-block 1). */ +#define SBLOCK_BLOCK 1 /* Default location, second 1k block. */ +#define SBLOCK_SIZE (sizeof (struct ext2_super_block)) +extern unsigned int sblock_block; /* Specified location (in 1k blocks). */ +#define SBLOCK_OFFS (sblock_block << 10) /* Byte offset of superblock. */ + +/* The filesystem block-size. */ +unsigned int block_size; +/* The log base 2 of BLOCK_SIZE. */ +unsigned int log2_block_size; + +/* The number of bits to scale min-blocks to get filesystem blocks. */ +#define BLOCKSIZE_SCALE (sblock->s_log_block_size) + +/* log2 of the number of device blocks in a filesystem block. */ +unsigned log2_dev_blocks_per_fs_block; + +/* log2 of the number of stat blocks (512 bytes) in a filesystem block. */ +unsigned log2_stat_blocks_per_fs_block; + +/* A handy page of page-aligned zeros. */ +vm_address_t zeroblock; + +/* Get the superblock from the disk, & setup various global info from it. */ +void get_hypermetadata (); + +/* ---------------------------------------------------------------- */ +/* Random stuff calculated from the super block. */ + +unsigned long frag_size; /* Size of a fragment in bytes */ +unsigned long frags_per_block; /* Number of fragments per block */ +unsigned long inodes_per_block; /* Number of inodes per block */ + +unsigned long itb_per_group; /* Number of inode table blocks per group */ +unsigned long db_per_group; /* Number of descriptor blocks per group */ +unsigned long desc_per_block; /* Number of group descriptors per block */ +unsigned long addr_per_block; /* Number of disk addresses per block */ + +unsigned long groups_count; /* Number of groups in the fs */ + +/* ---------------------------------------------------------------- */ + +spin_lock_t node_to_page_lock; + +spin_lock_t generation_lock; +unsigned long next_generation; + +/* ---------------------------------------------------------------- */ +/* Functions for looking inside disk_image */ + +#define trunc_block(offs) (((offs) >> log2_block_size) << log2_block_size) +#define round_block(offs) \ + ((((offs) + block_size - 1) >> log2_block_size) << log2_block_size) + +/* block num --> byte offset on disk */ +#define boffs(block) ((block) << log2_block_size) +/* byte offset on disk --> block num */ +#define boffs_block(offs) ((offs) >> log2_block_size) + +/* byte offset on disk --> pointer to in-memory block */ +#define boffs_ptr(offs) (((char *)disk_image) + (offs)) +/* pointer to in-memory block --> byte offset on disk */ +#define bptr_offs(ptr) ((char *)(ptr) - ((char *)disk_image)) + +/* block num --> pointer to in-memory block */ +#define bptr(block) boffs_ptr(boffs(block)) +/* pointer to in-memory block --> block num */ +#define bptr_block(ptr) boffs_block(bptr_offs(ptr)) + +/* Get the descriptor for block group NUM. The block group descriptors are + stored starting in the filesystem block following the super block. + We cache a pointer into the disk image for easy lookup. */ +#define group_desc(num) (&group_desc_image[num]) +struct ext2_group_desc *group_desc_image; + +#define inode_group_num(inum) (((inum) - 1) / sblock->s_inodes_per_group) + +/* Convert an inode number to the dinode on disk. */ +EXT2FS_EI struct ext2_inode * +dino (ino_t inum) +{ + unsigned long inodes_per_group = sblock->s_inodes_per_group; + unsigned long bg_num = (inum - 1) / inodes_per_group; + unsigned long group_inum = (inum - 1) % inodes_per_group; + struct ext2_group_desc *bg = group_desc(bg_num); + block_t block = bg->bg_inode_table + (group_inum / inodes_per_block); + return ((struct ext2_inode *)bptr(block)) + group_inum % inodes_per_block; +} + +/* ---------------------------------------------------------------- */ +/* inode.c */ + +/* Write all active disknodes into the inode pager. */ +void write_all_disknodes (); + +/* Lookup node INUM (which must have a reference already) and return it + without allocating any new references. */ +struct node *ifind (ino_t inum); + +void inode_init (void); + +/* ---------------------------------------------------------------- */ + +/* What to lock if changing global data data (e.g., the superblock or block + group descriptors or bitmaps). */ +spin_lock_t global_lock; + +/* Where to record such changes. */ +struct pokel global_pokel; + +/* If the block size is less than the page size, then this bitmap is used to + record which disk blocks are actually modified, so we don't stomp on parts + of the disk which are backed by file pagers. */ +char *modified_global_blocks; +spin_lock_t modified_global_blocks_lock; + +/* Marks the global block BLOCK as being modified, and returns true if we + think it may have been clean before (but we may not be sure). Note that + this isn't enough to cause the block to be synced; you must call + record_global_poke to do that. */ +EXT2FS_EI int +global_block_modified (block_t block) +{ + if (modified_global_blocks) + { + int was_clean; + spin_lock (&modified_global_blocks_lock); + was_clean = !set_bit(block, modified_global_blocks); + spin_unlock (&modified_global_blocks_lock); + return was_clean; + } + else + return 1; +} + +/* This records a modification to a non-file block. */ +EXT2FS_EI void +record_global_poke (void *ptr) +{ + int boffs = trunc_block (bptr_offs (ptr)); + global_block_modified (boffs_block (boffs)); + pokel_add (&global_pokel, boffs_ptr(boffs), block_size); +} + +/* This syncs a modification to a non-file block. */ +EXT2FS_EI void +sync_global_ptr (void *bptr, int wait) +{ + vm_offset_t boffs = trunc_block (bptr_offs (bptr)); + global_block_modified (boffs_block (boffs)); + pager_sync_some (diskfs_disk_pager, trunc_page (boffs), vm_page_size, wait); +} + +/* This records a modification to one of a file's indirect blocks. */ +EXT2FS_EI void +record_indir_poke (struct node *node, void *ptr) +{ + int boffs = trunc_block (bptr_offs (ptr)); + global_block_modified (boffs_block (boffs)); + pokel_add (&node->dn->indir_pokel, boffs_ptr(boffs), block_size); +} + +/* ---------------------------------------------------------------- */ + +EXT2FS_EI void +sync_global (int wait) +{ + pokel_sync (&global_pokel, wait); +} + +/* Sync all allocation information and node NP if diskfs_synchronous. */ +EXT2FS_EI void +alloc_sync (struct node *np) +{ + if (diskfs_synchronous) + { + if (np) + { + diskfs_node_update (np, 1); + pokel_sync (&np->dn->indir_pokel, 1); + } + diskfs_set_hypermetadata (1, 0); + } +} + +/* ---------------------------------------------------------------- */ +/* getblk.c */ + +void ext2_discard_prealloc (struct node *node); + +/* Returns in DISK_BLOCK the disk block correspding to BLOCK in NODE. If + there is no such block yet, but CREATE is true, then it is created, + otherwise EINVAL is returned. */ +error_t ext2_getblk (struct node *node, block_t block, int create, block_t *disk_block); + +block_t ext2_new_block (block_t goal, + block_t prealloc_goal, + block_t *prealloc_count, block_t *prealloc_block); + +void ext2_free_blocks (block_t block, unsigned long count); + +/* ---------------------------------------------------------------- */ + +/* Write disk block ADDR with DATA of LEN bytes, waiting for completion. */ +error_t dev_write_sync (block_t addr, vm_address_t data, long len); + +/* Write diskblock ADDR with DATA of LEN bytes; don't bother waiting + for completion. */ +error_t dev_write (block_t addr, vm_address_t data, long len); + +/* Read disk block ADDR; put the address of the data in DATA; read LEN + bytes. Always *DATA should be a full page no matter what. */ +error_t dev_read_sync (block_t addr, vm_address_t *data, long len); + +/* ---------------------------------------------------------------- */ + +#define ext2_error(fmt, args...) _ext2_error (__FUNCTION__, fmt , ##args) +extern void _ext2_error (const char *, const char *, ...) + __attribute__ ((format (printf, 2, 3))); + +#define ext2_panic(fmt, args...) _ext2_panic (__FUNCTION__, fmt , ##args) +extern void _ext2_panic (const char *, const char *, ...) + __attribute__ ((format (printf, 2, 3))); + +extern void ext2_warning (const char *, ...) + __attribute__ ((format (printf, 1, 2))); diff --git a/ext2fs/getblk.c b/ext2fs/getblk.c new file mode 100644 index 00000000..5dea8f30 --- /dev/null +++ b/ext2fs/getblk.c @@ -0,0 +1,303 @@ +/* File block to disk block mapping routines + + Copyright (C) 1995,96,99 Free Software Foundation, Inc. + + Converted to work under the hurd by Miles Bader <miles@gnu.org> + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2, or (at + your option) any later version. + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ + +/* + * linux/fs/ext2/inode.c + * + * Copyright (C) 1992, 1993, 1994, 1995 + * Remy Card (card@masi.ibp.fr) + * Laboratoire MASI - Institut Blaise Pascal + * Universite Pierre et Marie Curie (Paris VI) + * + * from + * + * linux/fs/minix/inode.c + * + * Copyright (C) 1991, 1992 Linus Torvalds + * + * Goal-directed block allocation by Stephen Tweedie (sct@dcs.ed.ac.uk), 1993 + */ + +#include <string.h> +#include "ext2fs.h" + +/* + * ext2_discard_prealloc and ext2_alloc_block are atomic wrt. the + * superblock in the same manner as are ext2_free_blocks and + * ext2_new_block. We just wait on the super rather than locking it + * here, since ext2_new_block will do the necessary locking and we + * can't block until then. + */ +void +ext2_discard_prealloc (struct node *node) +{ +#ifdef EXT2_PREALLOCATE + if (node->dn->info.i_prealloc_count) + { + int i = node->dn->info.i_prealloc_count; + ext2_debug ("discarding %d prealloced blocks for inode %d", + i, node->cache_id); + node->dn->info.i_prealloc_count = 0; + ext2_free_blocks (node->dn->info.i_prealloc_block, i); + } +#endif +} + +/* Allocate a new block for the file NODE, as close to block GOAL as + possible, and return it, or 0 if none could be had. If ZERO is true, then + zero the block (and add it to NODE's list of modified indirect blocks). */ +static block_t +ext2_alloc_block (struct node *node, block_t goal, int zero) +{ +#ifdef EXT2FS_DEBUG + static unsigned long alloc_hits = 0, alloc_attempts = 0; +#endif + block_t result; + +#ifdef EXT2_PREALLOCATE + if (node->dn->info.i_prealloc_count && + (goal == node->dn->info.i_prealloc_block || + goal + 1 == node->dn->info.i_prealloc_block)) + { + result = node->dn->info.i_prealloc_block++; + node->dn->info.i_prealloc_count--; + ext2_debug ("preallocation hit (%lu/%lu) => %lu", + ++alloc_hits, ++alloc_attempts, result); + } + else + { + ext2_debug ("preallocation miss (%lu/%lu)", + alloc_hits, ++alloc_attempts); + ext2_discard_prealloc (node); + result = ext2_new_block + (goal, + S_ISREG (node->dn_stat.st_mode) + ? (sblock->s_prealloc_blocks ?: EXT2_DEFAULT_PREALLOC_BLOCKS) + : (S_ISDIR (node->dn_stat.st_mode) + && EXT2_HAS_COMPAT_FEATURE(sblock, + EXT2_FEATURE_COMPAT_DIR_PREALLOC)) + ? sblock->s_prealloc_dir_blocks + : 0, + &node->dn->info.i_prealloc_count, + &node->dn->info.i_prealloc_block); + } +#else + result = ext2_new_block (goal, 0, 0); +#endif + + if (result && zero) + { + char *bh = bptr (result); + bzero (bh, block_size); + record_indir_poke (node, bh); + } + + return result; +} + +static error_t +inode_getblk (struct node *node, int nr, int create, int zero, + block_t new_block, block_t *result) +{ + int i; + block_t goal = 0; +#ifdef EXT2FS_DEBUG + block_t hint; +#endif + + *result = node->dn->info.i_data[nr]; + if (*result) + return 0; + + if (!create) + return EINVAL; + + if (node->dn->info.i_next_alloc_block == new_block) + goal = node->dn->info.i_next_alloc_goal; + +#ifdef EXT2FS_DEBUG + hint = goal; +#endif + + if (!goal) + { + for (i = nr - 1; i >= 0; i--) + { + if (node->dn->info.i_data[i]) + { + goal = node->dn->info.i_data[i]; + break; + } + } + if (!goal) + goal = + (node->dn->info.i_block_group * EXT2_BLOCKS_PER_GROUP (sblock)) + + sblock->s_first_data_block; + } + + *result = ext2_alloc_block (node, goal, zero); + + ext2_debug ("%screate, hint = %lu, goal = %lu => %lu", + create ? "" : "no", hint, goal, *result); + + if (!*result) + return ENOSPC; + + node->dn->info.i_data[nr] = *result; + + node->dn->info.i_next_alloc_block = new_block; + node->dn->info.i_next_alloc_goal = *result; + node->dn_set_ctime = node->dn_set_mtime = 1; + node->dn_stat.st_blocks += 1 << log2_stat_blocks_per_fs_block; + node->dn_stat_dirty = 1; + + if (diskfs_synchronous || node->dn->info.i_osync) + diskfs_node_update (node, 1); + + return 0; +} + +error_t +block_getblk (struct node *node, block_t block, int nr, int create, int zero, + block_t new_block, block_t *result) +{ + int i; + block_t goal = 0; + block_t *bh = (block_t *)bptr (block); + + *result = bh[nr]; + if (*result) + return 0; + + if (!create) + return EINVAL; + + if (node->dn->info.i_next_alloc_block == new_block) + goal = node->dn->info.i_next_alloc_goal; + if (!goal) + { + for (i = nr - 1; i >= 0; i--) + { + if (bh[i]) + { + goal = bh[i]; + break; + } + } + if (!goal) + goal = block; + } + + *result = ext2_alloc_block (node, goal, zero); + if (!*result) + return ENOSPC; + + bh[nr] = *result; + + if (diskfs_synchronous || node->dn->info.i_osync) + sync_global_ptr (bh, 1); + else + record_indir_poke (node, bh); + + node->dn->info.i_next_alloc_block = new_block; + node->dn->info.i_next_alloc_goal = *result; + node->dn_set_ctime = node->dn_set_mtime = 1; + node->dn_stat.st_blocks += 1 << log2_stat_blocks_per_fs_block; + node->dn_stat_dirty = 1; + + return 0; +} + +/* Returns in DISK_BLOCK the disk block correspding to BLOCK in NODE. If + there is no such block yet, but CREATE is true, then it is created, + otherwise EINVAL is returned. */ +error_t +ext2_getblk (struct node *node, block_t block, int create, block_t *disk_block) +{ + error_t err; + block_t indir, b; + unsigned long addr_per_block = EXT2_ADDR_PER_BLOCK (sblock); + + if (block > EXT2_NDIR_BLOCKS + addr_per_block + + addr_per_block * addr_per_block + + addr_per_block * addr_per_block * addr_per_block) + { + ext2_warning ("block > big: %u", block); + return EIO; + } + /* + * If this is a sequential block allocation, set the next_alloc_block + * to this block now so that all the indblock and data block + * allocations use the same goal zone + */ + + ext2_debug ("block = %lu, next = %lu, goal = %lu", block, + node->dn->info.i_next_alloc_block, + node->dn->info.i_next_alloc_goal); + + if (block == node->dn->info.i_next_alloc_block + 1) + { + node->dn->info.i_next_alloc_block++; + node->dn->info.i_next_alloc_goal++; + } + + b = block; + + if (block < EXT2_NDIR_BLOCKS) + return inode_getblk (node, block, create, 0, b, disk_block); + + block -= EXT2_NDIR_BLOCKS; + if (block < addr_per_block) + { + err = inode_getblk (node, EXT2_IND_BLOCK, create, 1, b, &indir); + if (!err) + err = block_getblk (node, indir, block, create, 0, b, disk_block); + return err; + } + + block -= addr_per_block; + if (block < addr_per_block * addr_per_block) + { + err = inode_getblk (node, EXT2_DIND_BLOCK, create, 1, b, &indir); + if (!err) + err = block_getblk (node, indir, block / addr_per_block, create, 1, + b, &indir); + if (!err) + err = block_getblk (node, indir, block & (addr_per_block - 1), + create, 0, b, disk_block); + return err; + } + + block -= addr_per_block * addr_per_block; + err = inode_getblk (node, EXT2_TIND_BLOCK, create, 1, b, &indir); + if (!err) + err = block_getblk (node, indir, block / (addr_per_block * addr_per_block), + create, 1, b, &indir); + if (!err) + err = + block_getblk (node, indir, + (block / addr_per_block) & (addr_per_block - 1), + create, 1, b, &indir); + if (!err) + err = block_getblk (node, indir, block & (addr_per_block - 1), create, 0, + b, disk_block); + + return err; +} diff --git a/ext2fs/hyper.c b/ext2fs/hyper.c new file mode 100644 index 00000000..4012f9f7 --- /dev/null +++ b/ext2fs/hyper.c @@ -0,0 +1,210 @@ +/* Fetching and storing the hypermetadata (superblock and bg summary info) + + Copyright (C) 1994,95,96,99, 1999 Free Software Foundation, Inc. + + Written by Miles Bader <miles@gnu.org> + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2, or (at + your option) any later version. + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ + +#include <string.h> +#include <stdio.h> +#include <error.h> +#include <hurd/store.h> +#include "ext2fs.h" + +vm_address_t zeroblock = 0; +char *modified_global_blocks = 0; + +static void +allocate_mod_map (void) +{ + static vm_size_t mod_map_size = 0; + + if (modified_global_blocks && mod_map_size) + /* Get rid of the old one. */ + munmap (modified_global_blocks, mod_map_size); + + if (!diskfs_readonly && block_size < vm_page_size) + /* If the block size is too small, we have to take extra care when + writing out pages from the global pager, to make sure we don't stomp + on any file pager blocks. In this case use a bitmap to record which + global blocks are actually modified so the pager can write only them. */ + { + /* One bit per filesystem block. */ + mod_map_size = sblock->s_blocks_count >> 3; + modified_global_blocks = mmap (0, mod_map_size, PROT_READ|PROT_WRITE, + MAP_ANON, 0, 0); + assert (modified_global_blocks != (void *) -1); + } + else + modified_global_blocks = 0; +} + +unsigned int sblock_block = SBLOCK_BLOCK; /* in 1k blocks */ + +static int ext2fs_clean; /* fs clean before we started writing? */ + +void +get_hypermetadata (void) +{ + error_t err = diskfs_catch_exception (); + if (err) + ext2_panic ("can't read superblock: %s", strerror (err)); + + if (zeroblock) + munmap ((caddr_t) zeroblock, block_size); + + sblock = (struct ext2_super_block *) boffs_ptr (SBLOCK_OFFS); + + if (sblock->s_magic != EXT2_SUPER_MAGIC +#ifdef EXT2FS_PRE_02B_COMPAT + && sblock->s_magic != EXT2_PRE_02B_MAGIC +#endif + ) + ext2_panic ("bad magic number %#x (should be %#x)", + sblock->s_magic, EXT2_SUPER_MAGIC); + + log2_block_size = EXT2_MIN_BLOCK_LOG_SIZE + sblock->s_log_block_size; + block_size = 1 << log2_block_size; + + if (block_size > EXT2_MAX_BLOCK_SIZE) + ext2_panic ("block size %d is too big (max is %d bytes)", + block_size, EXT2_MAX_BLOCK_SIZE); + + log2_dev_blocks_per_fs_block = log2_block_size - store->log2_block_size; + if (log2_dev_blocks_per_fs_block < 0) + ext2_panic ("block size %d isn't a power-of-two multiple of the device" + " block size (%d)!", + block_size, store->block_size); + + log2_stat_blocks_per_fs_block = 0; + while ((512 << log2_stat_blocks_per_fs_block) < block_size) + log2_stat_blocks_per_fs_block++; + if ((512 << log2_stat_blocks_per_fs_block) != block_size) + ext2_panic ("block size %d isn't a power-of-two multiple of 512!", + block_size); + + if ((store->size >> log2_block_size) < sblock->s_blocks_count) + ext2_panic ("disk size (%qd bytes) too small; superblock says we need %qd", + (long long int) store->size, + (long long int) sblock->s_blocks_count << log2_block_size); + if (log2_dev_blocks_per_fs_block != 0 + && (store->size & ((1 << log2_dev_blocks_per_fs_block) - 1)) != 0) + ext2_warning ("%ld (%d byte) device blocks " + " unused after last filesystem (%d byte) block", + (store->size & ((1 << log2_dev_blocks_per_fs_block) - 1)), + store->block_size, block_size); + + /* Set these handy variables. */ + inodes_per_block = block_size / EXT2_INODE_SIZE (sblock); + + frag_size = EXT2_MIN_FRAG_SIZE << sblock->s_log_frag_size; + if (frag_size) + frags_per_block = block_size / frag_size; + else + ext2_panic ("frag size is zero!"); + + if (sblock->s_rev_level > EXT2_GOOD_OLD_REV) + { + if (sblock->s_feature_incompat & ~EXT2_FEATURE_INCOMPAT_SUPP) + ext2_panic ("could not mount because of unsupported optional features" + " (0x%x)", + sblock->s_feature_incompat & ~EXT2_FEATURE_INCOMPAT_SUPP); + if (sblock->s_feature_ro_compat & ~EXT2_FEATURE_RO_COMPAT_SUPP) + { + ext2_warning ("mounted readonly because of" + " unsupported optional features (0x%x)", + sblock->s_feature_ro_compat & ~EXT2_FEATURE_RO_COMPAT_SUPP); + diskfs_readonly = 1; + } + if (sblock->s_inode_size != EXT2_GOOD_OLD_INODE_SIZE) + ext2_panic ("inode size %d isn't supported", sblock->s_inode_size); + } + + groups_count = + ((sblock->s_blocks_count - sblock->s_first_data_block + + sblock->s_blocks_per_group - 1) + / sblock->s_blocks_per_group); + + itb_per_group = sblock->s_inodes_per_group / inodes_per_block; + desc_per_block = block_size / sizeof (struct ext2_group_desc); + addr_per_block = block_size / sizeof (block_t); + db_per_group = (groups_count + desc_per_block - 1) / desc_per_block; + + ext2fs_clean = sblock->s_state & EXT2_VALID_FS; + if (! ext2fs_clean) + { + ext2_warning ("FILESYSTEM NOT UNMOUNTED CLEANLY; PLEASE fsck"); + if (! diskfs_readonly) + { + diskfs_readonly = 1; + ext2_warning ("MOUNTED READ-ONLY; MUST USE `fsysopts --writable'"); + } + } + + allocate_mod_map (); + + diskfs_end_catch_exception (); + + /* Cache a convenient pointer to the block group descriptors for allocation. + These are stored in the filesystem blocks following the superblock. */ + group_desc_image = (struct ext2_group_desc *) bptr (bptr_block (sblock) + 1); + + /* A handy source of page-aligned zeros. */ + zeroblock = (vm_address_t) mmap (0, block_size, PROT_READ|PROT_WRITE, + MAP_ANON, 0, 0); +} + +error_t +diskfs_set_hypermetadata (int wait, int clean) +{ + if (clean && ext2fs_clean && !(sblock->s_state & EXT2_VALID_FS)) + /* The filesystem is clean, so we need to set the clean flag. */ + { + sblock->s_state |= EXT2_VALID_FS; + sblock_dirty = 1; + } + else if (!clean && (sblock->s_state & EXT2_VALID_FS)) + /* The filesystem just became dirty, so clear the clean flag. */ + { + sblock->s_state &= ~EXT2_VALID_FS; + sblock_dirty = 1; + wait = 1; + } + + if (sblock_dirty) + { + sblock_dirty = 0; + record_global_poke (sblock); + } + + sync_global (wait); + + /* Should check writability here and return EROFS if necessary. XXX */ + return 0; +} + +void +diskfs_readonly_changed (int readonly) +{ + allocate_mod_map (); + + (*(readonly ? store_set_flags : store_clear_flags)) (store, STORE_READONLY); + + mprotect (disk_image, store->size, PROT_READ | (readonly ? 0 : PROT_WRITE)); + + if (!readonly && !(sblock->s_state & EXT2_VALID_FS)) + ext2_warning ("UNCLEANED FILESYSTEM NOW WRITABLE"); +} diff --git a/ext2fs/ialloc.c b/ext2fs/ialloc.c new file mode 100644 index 00000000..2ccfb08d --- /dev/null +++ b/ext2fs/ialloc.c @@ -0,0 +1,405 @@ +/* Inode allocation routines. + + Copyright (C) 1995, 1996, 1999 Free Software Foundation, Inc. + + Converted to work under the hurd by Miles Bader <miles@gnu.ai.mit.edu> + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2, or (at + your option) any later version. + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ + +/* + * linux/fs/ext2/ialloc.c + * + * Copyright (C) 1992, 1993, 1994, 1995 + * Remy Card (card@masi.ibp.fr) + * Laboratoire MASI - Institut Blaise Pascal + * Universite Pierre et Marie Curie (Paris VI) + * + * BSD ufs-inspired inode and directory allocation by + * Stephen Tweedie (sct@dcs.ed.ac.uk), 1993 + */ + +/* + * The free inodes are managed by bitmaps. A file system contains several + * blocks groups. Each group contains 1 bitmap block for blocks, 1 bitmap + * block for inodes, N blocks for the inode table and data blocks. + * + * The file system contains group descriptors which are located after the + * super block. Each descriptor contains the number of the bitmap block and + * the free blocks count in the block. The descriptors are loaded in memory + * when a file system is mounted (see ext2_read_super). + */ + +#include "ext2fs.h" +#include "bitmap.c" + +/* ---------------------------------------------------------------- */ + +/* Free node NP; the on disk copy has already been synced with + diskfs_node_update (where NP->dn_stat.st_mode was 0). It's + mode used to be OLD_MODE. */ +void +diskfs_free_node (struct node *np, mode_t old_mode) +{ + char *bh; + unsigned long block_group; + unsigned long bit; + struct ext2_group_desc *gdp; + ino_t inum = np->cache_id; + + assert (!diskfs_readonly); + + ext2_debug ("freeing inode %u", inum); + + spin_lock (&global_lock); + + if (inum < EXT2_FIRST_INO (sblock) || inum > sblock->s_inodes_count) + { + ext2_error ("reserved inode or nonexistent inode: %u", inum); + spin_unlock (&global_lock); + return; + } + + block_group = (inum - 1) / sblock->s_inodes_per_group; + bit = (inum - 1) % sblock->s_inodes_per_group; + + gdp = group_desc (block_group); + bh = bptr (gdp->bg_inode_bitmap); + + if (!clear_bit (bit, bh)) + ext2_warning ("bit already cleared for inode %u", inum); + else + { + record_global_poke (bh); + + gdp->bg_free_inodes_count++; + if (S_ISDIR (old_mode)) + gdp->bg_used_dirs_count--; + record_global_poke (gdp); + + sblock->s_free_inodes_count++; + } + + sblock_dirty = 1; + spin_unlock (&global_lock); + alloc_sync(0); +} + +/* ---------------------------------------------------------------- */ + +/* + * There are two policies for allocating an inode. If the new inode is + * a directory, then a forward search is made for a block group with both + * free space and a low directory-to-inode ratio; if that fails, then of + * the groups with above-average free space, that group with the fewest + * directories already is chosen. + * + * For other inodes, search forward from the parent directory\'s block + * group to find a free inode. + */ +ino_t +ext2_alloc_inode (ino_t dir_inum, mode_t mode) +{ + char *bh; + int i, j, inum, avefreei; + struct ext2_group_desc *gdp; + struct ext2_group_desc *tmp; + + spin_lock (&global_lock); + +repeat: + gdp = NULL; + i = 0; + + if (S_ISDIR (mode)) + { + avefreei = sblock->s_free_inodes_count / groups_count; + +/* I am not yet convinced that this next bit is necessary. + i = inode_group_num(dir_inum); + for (j = 0; j < groups_count; j++) + { + tmp = group_desc (i); + if ((tmp->bg_used_dirs_count << 8) < tmp->bg_free_inodes_count) + { + gdp = tmp; + break; + } + else + i = ++i % groups_count; + } + */ + + if (!gdp) + { + for (j = 0; j < groups_count; j++) + { + tmp = group_desc (j); + if (tmp->bg_free_inodes_count + && tmp->bg_free_inodes_count >= avefreei) + { + if (!gdp || + (tmp->bg_free_blocks_count > gdp->bg_free_blocks_count)) + { + i = j; + gdp = tmp; + } + } + } + } + } + else + { + /* + * Try to place the inode in its parent directory + */ + i = inode_group_num(dir_inum); + tmp = group_desc (i); + if (tmp->bg_free_inodes_count) + gdp = tmp; + else + { + /* + * Use a quadratic hash to find a group with a + * free inode + */ + for (j = 1; j < groups_count; j <<= 1) + { + i += j; + if (i >= groups_count) + i -= groups_count; + tmp = group_desc (i); + if (tmp->bg_free_inodes_count) + { + gdp = tmp; + break; + } + } + } + if (!gdp) + { + /* + * That failed: try linear search for a free inode + */ + i = inode_group_num(dir_inum) + 1; + for (j = 2; j < groups_count; j++) + { + if (++i >= groups_count) + i = 0; + tmp = group_desc (i); + if (tmp->bg_free_inodes_count) + { + gdp = tmp; + break; + } + } + } + } + + if (!gdp) + { + spin_unlock (&global_lock); + return 0; + } + + bh = bptr (gdp->bg_inode_bitmap); + if ((inum = + find_first_zero_bit ((unsigned long *) bh, sblock->s_inodes_per_group)) + < sblock->s_inodes_per_group) + { + if (set_bit (inum, bh)) + { + ext2_warning ("bit already set for inode %d", inum); + goto repeat; + } + record_global_poke (bh); + } + else + { + if (gdp->bg_free_inodes_count != 0) + { + ext2_error ("free inodes count corrupted in group %d", i); + inum = 0; + goto sync_out; + } + goto repeat; + } + + inum += i * sblock->s_inodes_per_group + 1; + if (inum < EXT2_FIRST_INO (sblock) || inum > sblock->s_inodes_count) + { + ext2_error ("reserved inode or inode > inodes count - " + "block_group = %d,inode=%d", i, inum); + inum = 0; + goto sync_out; + } + + gdp->bg_free_inodes_count--; + if (S_ISDIR (mode)) + gdp->bg_used_dirs_count++; + record_global_poke (gdp); + + sblock->s_free_inodes_count--; + sblock_dirty = 1; + + sync_out: + spin_unlock (&global_lock); + alloc_sync (0); + + return inum; +} + +/* ---------------------------------------------------------------- */ + +/* The user must define this function. Allocate a new node to be of + mode MODE in locked directory DP (don't actually set the mode or + modify the dir, that will be done by the caller); the user + responsible for the request can be identified with CRED. Set *NP + to be the newly allocated node. */ +error_t +diskfs_alloc_node (struct node *dir, mode_t mode, struct node **node) +{ + error_t err; + int sex, block; + struct node *np; + struct stat *st; + ino_t inum; + + assert (!diskfs_readonly); + + inum = ext2_alloc_inode (dir->cache_id, mode); + + if (inum == 0) + return ENOSPC; + + err = diskfs_cached_lookup (inum, &np); + if (err) + return err; + + st = &np->dn_stat; + + if (st->st_blocks) + { + if (sblock->s_creator_os == EXT2_OS_HURD) + ext2_warning ("Free inode %d had %ld blocks", inum, st->st_blocks); + st->st_blocks = 0; + np->dn_set_ctime = 1; + } + /* Zero out the block pointers in case there's some noise left on disk. */ + for (block = 0; block < EXT2_N_BLOCKS; block++) + if (np->dn->info.i_data[block] != 0) + { + np->dn->info.i_data[block] = 0; + np->dn_set_ctime = 1; + } + st->st_mode &= ~S_IPTRANS; + if (np->allocsize) + { + if (sblock->s_creator_os == EXT2_OS_HURD) + ext2_warning ("Free inode %d had a size of %ld", inum, st->st_size); + st->st_size = 0; + np->allocsize = 0; + np->dn_set_ctime = 1; + } + + /* Propagate initial inode flags from the directory, as Linux does. */ + np->dn->info.i_flags = dir->dn->info.i_flags; + if (S_ISLNK (mode)) + np->dn->info.i_flags &= ~(EXT2_IMMUTABLE_FL | EXT2_APPEND_FL); + + st->st_flags = 0; + + /* + * Set up a new generation number for this inode. + */ + spin_lock (&generation_lock); + sex = diskfs_mtime->seconds; + if (++next_generation < (u_long)sex) + next_generation = sex; + st->st_gen = next_generation; + spin_unlock (&generation_lock); + + alloc_sync (np); + + *node = np; + return 0; +} + +/* ---------------------------------------------------------------- */ + +unsigned long +ext2_count_free_inodes () +{ +#ifdef EXT2FS_DEBUG + unsigned long desc_count, bitmap_count, x; + struct ext2_group_desc *gdp; + int i; + + spin_lock (&global_lock); + + desc_count = 0; + bitmap_count = 0; + gdp = NULL; + for (i = 0; i < groups_count; i++) + { + gdp = group_desc (i); + desc_count += gdp->bg_free_inodes_count; + x = count_free (bptr (gdp->bg_inode_bitmap), + sblock->s_inodes_per_group / 8); + ext2_debug ("group %d: stored = %d, counted = %lu", + i, gdp->bg_free_inodes_count, x); + bitmap_count += x; + } + ext2_debug ("stored = %lu, computed = %lu, %lu", + sblock->s_free_inodes_count, desc_count, bitmap_count); + spin_unlock (&global_lock); + return desc_count; +#else + return sblock->s_free_inodes_count; +#endif +} + +/* ---------------------------------------------------------------- */ + +void +ext2_check_inodes_bitmap () +{ + int i; + struct ext2_group_desc *gdp; + unsigned long desc_count, bitmap_count, x; + + spin_lock (&global_lock); + + desc_count = 0; + bitmap_count = 0; + gdp = NULL; + for (i = 0; i < groups_count; i++) + { + gdp = group_desc (i); + desc_count += gdp->bg_free_inodes_count; + x = count_free (bptr (gdp->bg_inode_bitmap), + sblock->s_inodes_per_group / 8); + if (gdp->bg_free_inodes_count != x) + ext2_error ("wrong free inodes count in group %d, " + "stored = %d, counted = %lu", + i, gdp->bg_free_inodes_count, x); + bitmap_count += x; + } + if (sblock->s_free_inodes_count != bitmap_count) + ext2_error ("wrong free inodes count in super block, " + "stored = %lu, counted = %lu", + (unsigned long) sblock->s_free_inodes_count, bitmap_count); + + spin_unlock (&global_lock); +} diff --git a/ext2fs/inode.c b/ext2fs/inode.c new file mode 100644 index 00000000..b3f07df4 --- /dev/null +++ b/ext2fs/inode.c @@ -0,0 +1,796 @@ +/* Inode management routines + + Copyright (C) 1994,95,96,97,98,99 Free Software Foundation, Inc. + + Converted for ext2fs by Miles Bader <miles@gnu.org> + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2, or (at + your option) any later version. + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ + +#include "ext2fs.h" +#include <string.h> +#include <unistd.h> +#include <stdio.h> +#include <sys/stat.h> +#include <sys/statfs.h> +#include <sys/statvfs.h> + +/* these flags aren't actually defined by a header file yet, so temporarily + disable them if necessary. */ +#ifndef UF_APPEND +#define UF_APPEND 0 +#endif +#ifndef UF_NODUMP +#define UF_NODUMP 0 +#endif +#ifndef UF_IMMUTABLE +#define UF_IMMUTABLE 0 +#endif + +#define INOHSZ 512 +#if ((INOHSZ&(INOHSZ-1)) == 0) +#define INOHASH(ino) ((ino)&(INOHSZ-1)) +#else +#define INOHASH(ino) (((unsigned)(ino))%INOHSZ) +#endif + +static struct node *nodehash[INOHSZ]; + +static error_t read_node (struct node *np); + +spin_lock_t generation_lock = SPIN_LOCK_INITIALIZER; + +/* Initialize the inode hash table. */ +void +inode_init () +{ + int n; + for (n = 0; n < INOHSZ; n++) + nodehash[n] = 0; +} + +/* Fetch inode INUM, set *NPP to the node structure; + gain one user reference and lock the node. */ +error_t +diskfs_cached_lookup (int inum, struct node **npp) +{ + error_t err; + struct node *np; + struct disknode *dn; + + spin_lock (&diskfs_node_refcnt_lock); + for (np = nodehash[INOHASH(inum)]; np; np = np->dn->hnext) + if (np->cache_id == inum) + { + np->references++; + spin_unlock (&diskfs_node_refcnt_lock); + mutex_lock (&np->lock); + *npp = np; + return 0; + } + + /* Format specific data for the new node. */ + dn = malloc (sizeof (struct disknode)); + if (! dn) + { + spin_unlock (&diskfs_node_refcnt_lock); + return ENOMEM; + } + dn->dirents = 0; + dn->dir_idx = 0; + dn->pager = 0; + rwlock_init (&dn->alloc_lock); + pokel_init (&dn->indir_pokel, diskfs_disk_pager, disk_image); + + /* Create the new node. */ + np = diskfs_make_node (dn); + np->cache_id = inum; + + mutex_lock (&np->lock); + + /* Put NP in NODEHASH. */ + dn->hnext = nodehash[INOHASH(inum)]; + if (dn->hnext) + dn->hnext->dn->hprevp = &dn->hnext; + dn->hprevp = &nodehash[INOHASH(inum)]; + nodehash[INOHASH(inum)] = np; + + spin_unlock (&diskfs_node_refcnt_lock); + + /* Get the contents of NP off disk. */ + err = read_node (np); + + if (!diskfs_check_readonly () && !np->dn_stat.st_gen) + { + spin_lock (&generation_lock); + if (++next_generation < diskfs_mtime->seconds) + next_generation = diskfs_mtime->seconds; + np->dn_stat.st_gen = next_generation; + spin_unlock (&generation_lock); + np->dn_set_ctime = 1; + } + + if (err) + return err; + else + { + *npp = np; + return 0; + } +} + +/* Lookup node INUM (which must have a reference already) and return it + without allocating any new references. */ +struct node * +ifind (ino_t inum) +{ + struct node *np; + + spin_lock (&diskfs_node_refcnt_lock); + for (np = nodehash[INOHASH(inum)]; np; np = np->dn->hnext) + { + if (np->cache_id != inum) + continue; + + assert (np->references); + spin_unlock (&diskfs_node_refcnt_lock); + return np; + } + assert (0); +} + +/* The last reference to a node has gone away; drop + it from the hash table and clean all state in the dn structure. */ +void +diskfs_node_norefs (struct node *np) +{ + *np->dn->hprevp = np->dn->hnext; + if (np->dn->hnext) + np->dn->hnext->dn->hprevp = np->dn->hprevp; + + if (np->dn->dirents) + free (np->dn->dirents); + assert (!np->dn->pager); + + /* Move any pending writes of indirect blocks. */ + pokel_inherit (&global_pokel, &np->dn->indir_pokel); + pokel_finalize (&np->dn->indir_pokel); + + free (np->dn); + free (np); +} + +/* The last hard reference to a node has gone away; arrange to have + all the weak references dropped that can be. */ +void +diskfs_try_dropping_softrefs (struct node *np) +{ + drop_pager_softrefs (np); +} + +/* The last hard reference to a node has gone away. */ +void +diskfs_lost_hardrefs (struct node *np) +{ +} + +/* A new hard reference to a node has been created; it's now OK to + have unused weak references. */ +void +diskfs_new_hardrefs (struct node *np) +{ + allow_pager_softrefs (np); +} + +/* Read stat information out of the ext2_inode. */ +static error_t +read_node (struct node *np) +{ + error_t err; + static int fsid, fsidset; + struct stat *st = &np->dn_stat; + struct disknode *dn = np->dn; + struct ext2_inode *di = dino (np->cache_id); + struct ext2_inode_info *info = &dn->info; + + err = diskfs_catch_exception (); + if (err) + return err; + + if (!fsidset) + { + fsid = getpid (); + fsidset = 1; + } + + st->st_fstype = FSTYPE_EXT2FS; + st->st_fsid = fsid; + st->st_ino = np->cache_id; + st->st_blksize = vm_page_size * 2; + + st->st_nlink = di->i_links_count; + st->st_size = di->i_size; + st->st_gen = di->i_generation; + + st->st_atime = di->i_atime; + st->st_mtime = di->i_mtime; + st->st_ctime = di->i_ctime; + +#ifdef XXX + st->st_atime_usec = di->i_atime.ts_nsec / 1000; + st->st_mtime_usec = di->i_mtime.ts_nsec / 1000; + st->st_ctime_usec = di->i_ctime.ts_nsec / 1000; +#endif + + st->st_blocks = di->i_blocks; + + st->st_flags = 0; + if (di->i_flags & EXT2_APPEND_FL) + st->st_flags |= UF_APPEND; + if (di->i_flags & EXT2_NODUMP_FL) + st->st_flags |= UF_NODUMP; + if (di->i_flags & EXT2_IMMUTABLE_FL) + st->st_flags |= UF_IMMUTABLE; + + if (sblock->s_creator_os == EXT2_OS_HURD) + { + st->st_mode = di->i_mode | (di->i_mode_high << 16); + st->st_mode &= ~S_ITRANS; + if (di->i_translator) + st->st_mode |= S_IPTRANS; + + st->st_uid = di->i_uid | (di->i_uid_high << 16); + st->st_gid = di->i_gid | (di->i_gid_high << 16); + + st->st_author = di->i_author; + if (st->st_author == -1) + st->st_author = st->st_uid; + } + else + { + st->st_mode = di->i_mode & ~S_ITRANS; + st->st_uid = di->i_uid; + st->st_gid = di->i_gid; + st->st_author = st->st_uid; + np->author_tracks_uid = 1; + } + + /* Setup the ext2fs auxiliary inode info. */ + info->i_dtime = di->i_dtime; + info->i_flags = di->i_flags; + info->i_faddr = di->i_faddr; + info->i_frag_no = di->i_frag; + info->i_frag_size = di->i_fsize; + info->i_osync = 0; + info->i_file_acl = di->i_file_acl; + if (S_ISDIR (st->st_mode)) + info->i_dir_acl = di->i_dir_acl; + else + { + info->i_dir_acl = 0; + info->i_high_size = di->i_size_high; + if (info->i_high_size) /* XXX */ + { + ext2_warning ("cannot handle large file inode %d", np->cache_id); + return EFBIG; + } + } + info->i_block_group = inode_group_num (np->cache_id); + info->i_next_alloc_block = 0; + info->i_next_alloc_goal = 0; + info->i_prealloc_count = 0; + + /* Set to a conservative value. */ + dn->last_page_partially_writable = 0; + + if (S_ISCHR (st->st_mode) || S_ISBLK (st->st_mode)) + st->st_rdev = di->i_block[0]; + else + { + memcpy (info->i_data, di->i_block, + EXT2_N_BLOCKS * sizeof info->i_data[0]); + st->st_rdev = 0; + } + + diskfs_end_catch_exception (); + + if (S_ISREG (st->st_mode) || S_ISDIR (st->st_mode) + || (S_ISLNK (st->st_mode) && st->st_blocks)) + { + unsigned offset; + + np->allocsize = np->dn_stat.st_size; + + /* Round up to a block multiple. */ + offset = np->allocsize & ((1 << log2_block_size) - 1); + if (offset > 0) + np->allocsize += block_size - offset; + } + else + /* Allocsize should be zero for anything except directories, files, and + long symlinks. These are the only things allowed to have any blocks + allocated as well, although st_size may be zero for any type (cases + where st_blocks=0 and st_size>0 include fast symlinks, and, under + linux, some devices). */ + np->allocsize = 0; + + return 0; +} + +/* Return EINVAL if this is not a hurd filesystem and any bits are set in L + except the low 16 bits, else 0. */ +static inline error_t +check_high_bits (struct node *np, long l) +{ + if (sblock->s_creator_os == EXT2_OS_HURD) + return 0; + + /* Linux 2.3.42 has a mount-time option (not a bit stored on disk) + NO_UID32 to ignore the high 16 bits of uid and gid, but by default + allows them. It also does this check for "interoperability with old + kernels". Note that our check refuses to change the values, while + Linux 2.3.42 just silently clears the high bits in an inode it updates, + even if it was updating it for an unrelated reason. */ + if (np->dn->info.i_dtime != 0) + return 0; + + return ((l & ~0xFFFF) == 0) ? 0 : EINVAL; +} + +/* Return 0 if NP's owner can be changed to UID; otherwise return an error + code. */ +error_t +diskfs_validate_owner_change (struct node *np, uid_t uid) +{ + return check_high_bits (np, uid); +} + +/* Return 0 if NP's group can be changed to GID; otherwise return an error + code. */ +error_t +diskfs_validate_group_change (struct node *np, gid_t gid) +{ + return check_high_bits (np, gid); +} + +/* Return 0 if NP's mode can be changed to MODE; otherwise return an error + code. It must always be possible to clear the mode; diskfs will not ask + for permission before doing so. */ +error_t +diskfs_validate_mode_change (struct node *np, mode_t mode) +{ + return check_high_bits (np, mode); +} + +/* Return 0 if NP's author can be changed to AUTHOR; otherwise return an + error code. */ +error_t +diskfs_validate_author_change (struct node *np, uid_t author) +{ + if (sblock->s_creator_os == EXT2_OS_HURD) + return 0; + else + /* For non-hurd filesystems, the author & owner are the same. */ + return (author == np->dn_stat.st_uid) ? 0 : EINVAL; +} + +/* The user may define this function. Return 0 if NP's flags can be + changed to FLAGS; otherwise return an error code. It must always + be possible to clear the flags. */ +error_t +diskfs_validate_flags_change (struct node *np, int flags) +{ + if (flags & ~(UF_NODUMP | UF_IMMUTABLE | UF_APPEND)) + return EINVAL; + else + return 0; +} + +/* Writes everything from NP's inode to the disk image, and returns a pointer + to it, or NULL if nothing need be done. */ +static struct ext2_inode * +write_node (struct node *np) +{ + error_t err; + struct stat *st = &np->dn_stat; + struct ext2_inode *di = dino (np->cache_id); + + if (np->dn->info.i_prealloc_count) + ext2_discard_prealloc (np); + + assert (!np->dn_set_ctime && !np->dn_set_atime && !np->dn_set_mtime); + if (np->dn_stat_dirty) + { + struct ext2_inode_info *info = &np->dn->info; + + assert (!diskfs_readonly); + + ext2_debug ("writing inode %d to disk", np->cache_id); + + err = diskfs_catch_exception (); + if (err) + return NULL; + + di->i_generation = st->st_gen; + + /* We happen to know that the stat mode bits are the same + as the ext2fs mode bits. */ + /* XXX? */ + + /* Only the low 16 bits of these fields are standard across all ext2 + implementations. */ + di->i_mode = st->st_mode & 0xFFFF & ~S_ITRANS; + di->i_uid = st->st_uid & 0xFFFF; + di->i_gid = st->st_gid & 0xFFFF; + + if (sblock->s_creator_os == EXT2_OS_HURD) + /* If this is a hurd-compatible filesystem, write the high bits too. */ + { + di->i_mode_high = (st->st_mode >> 16) & 0xffff & ~S_ITRANS; + di->i_uid_high = st->st_uid >> 16; + di->i_gid_high = st->st_gid >> 16; + di->i_author = st->st_author; + } + else + /* No hurd extensions should be turned on. */ + { + assert ((st->st_uid & ~0xFFFF) == 0); + assert ((st->st_gid & ~0xFFFF) == 0); + assert ((st->st_mode & ~0xFFFF) == 0); + assert (np->author_tracks_uid && st->st_author == st->st_uid); + } + + di->i_links_count = st->st_nlink; + di->i_size = st->st_size; + + di->i_atime = st->st_atime; + di->i_mtime = st->st_mtime; + di->i_ctime = st->st_ctime; +#ifdef XXX + di->i_atime.ts_nsec = st->st_atime_usec * 1000; + di->i_mtime.ts_nsec = st->st_mtime_usec * 1000; + di->i_ctime.ts_nsec = st->st_ctime_usec * 1000; +#endif + + di->i_blocks = st->st_blocks; + + /* Convert generic flags in ST->st_flags to ext2-specific flags in DI + (but don't mess with ext2 flags we don't know about). The original + set was copied from DI into INFO by read_node, but might have been + modified for ext2fs-specific reasons; so we use INFO->i_flags + to start with, and then apply the flags in ST->st_flags. */ + info->i_flags &= ~(EXT2_APPEND_FL | EXT2_NODUMP_FL | EXT2_IMMUTABLE_FL); + if (st->st_flags & UF_APPEND) + info->i_flags |= EXT2_APPEND_FL; + if (st->st_flags & UF_NODUMP) + info->i_flags |= EXT2_NODUMP_FL; + if (st->st_flags & UF_IMMUTABLE) + info->i_flags |= EXT2_IMMUTABLE_FL; + di->i_flags = info->i_flags; + + if (!(st->st_mode & S_IPTRANS) && sblock->s_creator_os == EXT2_OS_HURD) + di->i_translator = 0; + + /* Set dtime non-zero to indicate a deleted file. */ + di->i_dtime = (st->st_mode ? 0 : di->i_mtime); + + if (S_ISCHR(st->st_mode) || S_ISBLK(st->st_mode)) + di->i_block[0] = st->st_rdev; + else + { + int block; + for (block = 0; block < EXT2_N_BLOCKS; block++) + di->i_block[block] = np->dn->info.i_data[block]; + } + + diskfs_end_catch_exception (); + np->dn_stat_dirty = 0; + + return di; + } + else + return NULL; +} + +/* Reload all data specific to NODE from disk, without writing anything. + Always called with DISKFS_READONLY true. */ +error_t +diskfs_node_reload (struct node *node) +{ + struct disknode *dn = node->dn; + + if (dn->dirents) + { + free (dn->dirents); + dn->dirents = 0; + } + pokel_flush (&dn->indir_pokel); + flush_node_pager (node); + read_node (node); + + return 0; +} + +/* For each active node, call FUN. The node is to be locked around the call + to FUN. If FUN returns non-zero for any node, then immediately stop, and + return that value. */ +error_t +diskfs_node_iterate (error_t (*fun)(struct node *)) +{ + error_t err = 0; + int n, num_nodes = 0; + struct node *node, **node_list, **p; + + spin_lock (&diskfs_node_refcnt_lock); + + /* We must copy everything from the hash table into another data structure + to avoid running into any problems with the hash-table being modified + during processing (normally we delegate access to hash-table with + diskfs_node_refcnt_lock, but we can't hold this while locking the + individual node locks). */ + + for (n = 0; n < INOHSZ; n++) + for (node = nodehash[n]; node; node = node->dn->hnext) + num_nodes++; + + node_list = alloca (num_nodes * sizeof (struct node *)); + p = node_list; + for (n = 0; n < INOHSZ; n++) + for (node = nodehash[n]; node; node = node->dn->hnext) + { + *p++ = node; + node->references++; + } + + spin_unlock (&diskfs_node_refcnt_lock); + + p = node_list; + while (num_nodes-- > 0) + { + node = *p++; + if (!err) + { + mutex_lock (&node->lock); + err = (*fun)(node); + mutex_unlock (&node->lock); + } + diskfs_nrele (node); + } + + return err; +} + +/* Write all active disknodes into the ext2_inode pager. */ +void +write_all_disknodes () +{ + error_t write_one_disknode (struct node *node) + { + struct ext2_inode *di; + + diskfs_set_node_times (node); + + /* Sync the indirect blocks here; they'll all be done before any + inodes. Waiting for them shouldn't be too bad. */ + pokel_sync (&node->dn->indir_pokel, 1); + + /* Update the inode image. */ + di = write_node (node); + if (di) + record_global_poke (di); + + return 0; + } + + diskfs_node_iterate (write_one_disknode); +} + +/* Sync the info in NP->dn_stat and any associated format-specific + information to disk. If WAIT is true, then return only after the + physicial media has been completely updated. */ +void +diskfs_write_disknode (struct node *np, int wait) +{ + struct ext2_inode *di = write_node (np); + if (di) + { + if (wait) + sync_global_ptr (di, 1); + else + record_global_poke (di); + } +} + +/* Set *ST with appropriate values to reflect the current state of the + filesystem. */ +error_t +diskfs_set_statfs (struct statfs *st) +{ + st->f_type = FSTYPE_EXT2FS; + st->f_bsize = block_size; + st->f_blocks = sblock->s_blocks_count; + st->f_bfree = sblock->s_free_blocks_count; + st->f_bavail = st->f_bfree - sblock->s_r_blocks_count; + st->f_files = sblock->s_inodes_count; + st->f_ffree = sblock->s_free_inodes_count; + st->f_fsid = getpid (); + st->f_namelen = 0; + st->f_favail = st->f_ffree; + st->f_frsize = frag_size; + return 0; +} + +/* Implement the diskfs_set_translator callback from the diskfs + library; see <hurd/diskfs.h> for the interface description. */ +error_t +diskfs_set_translator (struct node *np, const char *name, unsigned namelen, + struct protid *cred) +{ + daddr_t blkno; + error_t err; + char buf[block_size]; + struct ext2_inode *di; + + assert (!diskfs_readonly); + + if (sblock->s_creator_os != EXT2_OS_HURD) + return EOPNOTSUPP; + + if (namelen + 2 > block_size) + return ENAMETOOLONG; + + err = diskfs_catch_exception (); + if (err) + return err; + + di = dino (np->cache_id); + blkno = di->i_translator; + + if (namelen && !blkno) + { + /* Allocate block for translator */ + blkno = + ext2_new_block ((np->dn->info.i_block_group + * EXT2_BLOCKS_PER_GROUP (sblock)) + + sblock->s_first_data_block, + 0, 0, 0); + if (blkno == 0) + { + diskfs_end_catch_exception (); + return ENOSPC; + } + + di->i_translator = blkno; + record_global_poke (di); + + np->dn_stat.st_blocks += 1 << log2_stat_blocks_per_fs_block; + np->dn_set_ctime = 1; + } + else if (!namelen && blkno) + { + /* Clear block for translator going away. */ + di->i_translator = 0; + record_global_poke (di); + ext2_free_blocks (blkno, 1); + + np->dn_stat.st_blocks -= 1 << log2_stat_blocks_per_fs_block; + np->dn_stat.st_mode &= ~S_IPTRANS; + np->dn_set_ctime = 1; + } + + if (namelen) + { + buf[0] = namelen & 0xFF; + buf[1] = (namelen >> 8) & 0xFF; + bcopy (name, buf + 2, namelen); + + bcopy (buf, bptr (blkno), block_size); + record_global_poke (bptr (blkno)); + + np->dn_stat.st_mode |= S_IPTRANS; + np->dn_set_ctime = 1; + } + + diskfs_end_catch_exception (); + return err; +} + +/* Implement the diskfs_get_translator callback from the diskfs library. + See <hurd/diskfs.h> for the interface description. */ +error_t +diskfs_get_translator (struct node *np, char **namep, unsigned *namelen) +{ + error_t err; + daddr_t blkno; + unsigned datalen; + void *transloc; + + assert (sblock->s_creator_os == EXT2_OS_HURD); + + err = diskfs_catch_exception (); + if (err) + return err; + + blkno = (dino (np->cache_id))->i_translator; + assert (blkno); + transloc = bptr (blkno); + + datalen = + ((unsigned char *)transloc)[0] + (((unsigned char *)transloc)[1] << 8); + *namep = malloc (datalen); + bcopy (transloc + 2, *namep, datalen); + + diskfs_end_catch_exception (); + + *namelen = datalen; + return 0; +} + +/* The maximum size of a symlink store in the inode (including '\0'). */ +#define MAX_INODE_SYMLINK \ + (EXT2_N_BLOCKS * sizeof (((struct ext2_inode *)0)->i_block[0])) + +/* Write an in-inode symlink, or return EINVAL if we can't. */ +static error_t +write_symlink (struct node *node, const char *target) +{ + size_t len = strlen (target) + 1; + + if (len > MAX_INODE_SYMLINK) + return EINVAL; + + assert (node->dn_stat.st_blocks == 0); + + bcopy (target, node->dn->info.i_data, len); + node->dn_stat.st_size = len - 1; + node->dn_set_ctime = 1; + node->dn_set_mtime = 1; + + return 0; +} + +/* Read an in-inode symlink, or return EINVAL if we can't. */ +static error_t +read_symlink (struct node *node, char *target) +{ + if (node->dn_stat.st_blocks) + return EINVAL; + + assert (node->dn_stat.st_size < MAX_INODE_SYMLINK); + + bcopy (node->dn->info.i_data, target, node->dn_stat.st_size); + return 0; +} + +/* If this function is nonzero (and diskfs_shortcut_symlink is set) it + is called to set a symlink. If it returns EINVAL or isn't set, + then the normal method (writing the contents into the file data) is + used. If it returns any other error, it is returned to the user. */ +error_t (*diskfs_create_symlink_hook)(struct node *np, const char *target) = + write_symlink; + +/* If this function is nonzero (and diskfs_shortcut_symlink is set) it + is called to read the contents of a symlink. If it returns EINVAL or + isn't set, then the normal method (reading from the file data) is + used. If it returns any other error, it is returned to the user. */ +error_t (*diskfs_read_symlink_hook)(struct node *np, char *target) = + read_symlink; + +/* Called when all hard ports have gone away. */ +void +diskfs_shutdown_soft_ports () +{ + /* Should initiate termination of internally held pager ports + (the only things that should be soft) XXX */ +} diff --git a/ext2fs/msg.c b/ext2fs/msg.c new file mode 100644 index 00000000..727d926d --- /dev/null +++ b/ext2fs/msg.c @@ -0,0 +1,88 @@ +/* Message printing functions + + Copyright (C) 1994, 1995, 1996 Free Software Foundation, Inc. + + Converted for ext2fs by Miles Bader <miles@gnu.ai.mit.edu> + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2, or (at + your option) any later version. + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ + +#include <stdio.h> +#include <stdarg.h> + +#include "ext2fs.h" + +struct mutex printf_lock = MUTEX_INITIALIZER; /* XXX */ + +int printf (const char *fmt, ...) +{ + va_list arg; + int done; + va_start (arg, fmt); + mutex_lock (&printf_lock); + done = vprintf (fmt, arg); + mutex_unlock (&printf_lock); + va_end (arg); + return done; +} + +static char error_buf[1024]; + +void _ext2_error (const char * function, const char * fmt, ...) +{ + va_list args; + + mutex_lock(&printf_lock); + + va_start (args, fmt); + vsprintf (error_buf, fmt, args); + va_end (args); + + fprintf (stderr, "ext2fs: %s: %s: %s\n", diskfs_disk_name, function, error_buf); + + mutex_unlock(&printf_lock); +} + +void _ext2_panic (const char * function, const char * fmt, ...) +{ + va_list args; + + mutex_lock(&printf_lock); + + va_start (args, fmt); + vsprintf (error_buf, fmt, args); + va_end (args); + + fprintf(stderr, "ext2fs: %s: panic: %s: %s\n", + diskfs_disk_name, function, error_buf); + + mutex_unlock(&printf_lock); + + exit (1); +} + +void ext2_warning (const char * fmt, ...) +{ + va_list args; + + mutex_lock(&printf_lock); + + va_start (args, fmt); + vsprintf (error_buf, fmt, args); + va_end (args); + + fprintf (stderr, "ext2fs: %s: warning: %s\n", diskfs_disk_name, error_buf); + + mutex_unlock(&printf_lock); +} diff --git a/ext2fs/pager.c b/ext2fs/pager.c new file mode 100644 index 00000000..bf57d9ed --- /dev/null +++ b/ext2fs/pager.c @@ -0,0 +1,1044 @@ +/* Pager for ext2fs + + Copyright (C) 1994, 95, 96, 97, 98, 99 Free Software Foundation, Inc. + + Converted for ext2fs by Miles Bader <miles@gnu.ai.mit.edu> + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2, or (at + your option) any later version. + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ + +#include <string.h> +#include <hurd/store.h> +#include "ext2fs.h" + +/* A ports bucket to hold pager ports. */ +struct port_bucket *pager_bucket; + +/* Mapped image of the disk. */ +void *disk_image; + +spin_lock_t node_to_page_lock = SPIN_LOCK_INITIALIZER; + +#ifdef DONT_CACHE_MEMORY_OBJECTS +#define MAY_CACHE 0 +#else +#define MAY_CACHE 1 +#endif + +#define STATS + +#ifdef STATS +struct ext2fs_pager_stats +{ + spin_lock_t lock; + + unsigned long disk_pageins; + unsigned long disk_pageouts; + + unsigned long file_pageins; + unsigned long file_pagein_reads; /* Device reads done by file pagein */ + unsigned long file_pagein_freed_bufs; /* Discarded pages */ + unsigned long file_pagein_alloced_bufs; /* Allocated pages */ + + unsigned long file_pageouts; + + unsigned long file_page_unlocks; + unsigned long file_grows; +}; + +static struct ext2fs_pager_stats ext2s_pager_stats; + +#define STAT_INC(field) \ +do { spin_lock (&ext2s_pager_stats.lock); \ + ext2s_pager_stats.field++; \ + spin_unlock (&ext2s_pager_stats.lock); } while (0) + +#else /* !STATS */ +#define STAT_INC(field) /* nop */0 +#endif /* STATS */ + +#define MAX_FREE_PAGE_BUFS 32 + +static spin_lock_t free_page_bufs_lock = SPIN_LOCK_INITIALIZER; +static void *free_page_bufs = 0; +static int num_free_page_bufs = 0; + +/* Returns a single page page-aligned buffer. */ +static void * +get_page_buf () +{ + void *buf; + + spin_lock (&free_page_bufs_lock); + + buf = free_page_bufs; + if (buf == 0) + { + spin_unlock (&free_page_bufs_lock); + buf = mmap (0, vm_page_size, PROT_READ|PROT_WRITE, MAP_ANON, 0, 0); + if (buf == (void *) -1) + buf = 0; + } + else + { + free_page_bufs = *(void **)buf; + num_free_page_bufs--; + spin_unlock (&free_page_bufs_lock); + } + + return buf; +} + +/* Frees a block returned by get_page_buf. */ +static void +free_page_buf (void *buf) +{ + spin_lock (&free_page_bufs_lock); + if (num_free_page_bufs < MAX_FREE_PAGE_BUFS) + { + *(void **)buf = free_page_bufs; + free_page_bufs = buf; + num_free_page_bufs++; + spin_unlock (&free_page_bufs_lock); + } + else + { + spin_unlock (&free_page_bufs_lock); + munmap (buf, vm_page_size); + } +} + +/* Find the location on disk of page OFFSET in NODE. Return the disk block + in BLOCK (if unallocated, then return 0). If *LOCK is 0, then it a reader + lock is aquired on NODE's ALLOC_LOCK before doing anything, and left + locked after return -- even if an error is returned. 0 on success or an + error code otherwise is returned. */ +static error_t +find_block (struct node *node, vm_offset_t offset, + block_t *block, struct rwlock **lock) +{ + error_t err; + + if (!*lock) + { + *lock = &node->dn->alloc_lock; + rwlock_reader_lock (*lock); + } + + if (offset + block_size > node->allocsize) + return EIO; + + err = ext2_getblk (node, offset >> log2_block_size, 0, block); + if (err == EINVAL) + /* Don't barf yet if the node is unallocated. */ + { + *block = 0; + err = 0; + } + + return err; +} + +/* Read one page for the pager backing NODE at offset PAGE, into BUF. This + may need to read several filesystem blocks to satisfy one page, and tries + to consolidate the i/o if possible. */ +static error_t +file_pager_read_page (struct node *node, vm_offset_t page, + void **buf, int *writelock) +{ + error_t err; + int offs = 0; + int partial = 0; /* A page truncated by the EOF. */ + struct rwlock *lock = NULL; + int left = vm_page_size; + block_t pending_blocks = 0; + int num_pending_blocks = 0; + + /* Read the NUM_PENDING_BLOCKS blocks in PENDING_BLOCKS, into the buffer + pointed to by BUF (allocating it if necessary) at offset OFFS. OFFS in + adjusted by the amount read, and NUM_PENDING_BLOCKS is zeroed. Any read + error is returned. */ + error_t do_pending_reads () + { + if (num_pending_blocks > 0) + { + block_t dev_block = pending_blocks << log2_dev_blocks_per_fs_block; + size_t amount = num_pending_blocks << log2_block_size; + /* The buffer we try to read into; on the first read, we pass in a + size of zero, so that the read is guaranteed to allocate a new + buffer, otherwise, we try to read directly into the tail of the + buffer we've already got. */ + void *new_buf = *buf + offs; + size_t new_len = offs == 0 ? 0 : vm_page_size - offs; + + STAT_INC (file_pagein_reads); + + err = store_read (store, dev_block, amount, &new_buf, &new_len); + if (err) + return err; + else if (amount != new_len) + return EIO; + + if (new_buf != *buf + offs) + { + /* The read went into a different buffer than the one we + passed. */ + if (offs == 0) + /* First read, make the returned page be our buffer. */ + *buf = new_buf; + else + /* We've already got some buffer, so copy into it. */ + { + bcopy (new_buf, *buf + offs, new_len); + free_page_buf (new_buf); /* Return NEW_BUF to our pool. */ + STAT_INC (file_pagein_freed_bufs); + } + } + + offs += new_len; + num_pending_blocks = 0; + } + + return 0; + } + + STAT_INC (file_pageins); + + *writelock = 0; + + if (page >= node->allocsize) + { + err = EIO; + left = 0; + } + else if (page + left > node->allocsize) + { + left = node->allocsize - page; + partial = 1; + } + + while (left > 0) + { + block_t block; + + err = find_block (node, page, &block, &lock); + if (err) + break; + + if (block != pending_blocks + num_pending_blocks) + { + err = do_pending_reads (); + if (err) + break; + pending_blocks = block; + } + + if (block == 0) + /* Reading unallocated block, just make a zero-filled one. */ + { + *writelock = 1; + if (offs == 0) + /* No page allocated to read into yet. */ + { + *buf = get_page_buf (); + if (! *buf) + break; + STAT_INC (file_pagein_alloced_bufs); + } + bzero (*buf + offs, block_size); + offs += block_size; + } + else + num_pending_blocks++; + + page += block_size; + left -= block_size; + } + + if (!err && num_pending_blocks > 0) + err = do_pending_reads(); + + if (!err && partial && !*writelock) + node->dn->last_page_partially_writable = 1; + + if (lock) + rwlock_reader_unlock (lock); + + return err; +} + +struct pending_blocks +{ + /* The block number of the first of the blocks. */ + block_t block; + /* How many blocks we have. */ + off_t num; + /* A (page-aligned) buffer pointing to the data we're dealing with. */ + void *buf; + /* And an offset into BUF. */ + int offs; +}; + +/* Write the any pending blocks in PB. */ +static error_t +pending_blocks_write (struct pending_blocks *pb) +{ + if (pb->num > 0) + { + error_t err; + block_t dev_block = pb->block << log2_dev_blocks_per_fs_block; + size_t length = pb->num << log2_block_size, amount; + + ext2_debug ("writing block %lu[%d]", pb->block, pb->num); + + if (pb->offs > 0) + /* Put what we're going to write into a page-aligned buffer. */ + { + void *page_buf = get_page_buf (); + bcopy (pb->buf + pb->offs, (void *)page_buf, length); + err = store_write (store, dev_block, page_buf, length, &amount); + free_page_buf (page_buf); + } + else + err = store_write (store, dev_block, pb->buf, length, &amount); + if (err) + return err; + else if (amount != length) + return EIO; + + pb->offs += length; + pb->num = 0; + } + + return 0; +} + +static void +pending_blocks_init (struct pending_blocks *pb, void *buf) +{ + pb->buf = buf; + pb->block = 0; + pb->num = 0; + pb->offs = 0; +} + +/* Skip writing the next block in PB's buffer (writing out any previous + blocks if necessary). */ +static error_t +pending_blocks_skip (struct pending_blocks *pb) +{ + error_t err = pending_blocks_write (pb); + pb->offs += block_size; + return err; +} + +/* Add the disk block BLOCK to the list of destination disk blocks pending in + PB. */ +static error_t +pending_blocks_add (struct pending_blocks *pb, block_t block) +{ + if (block != pb->block + pb->num) + { + error_t err = pending_blocks_write (pb); + if (err) + return err; + pb->block = block; + } + pb->num++; + return 0; +} + +/* Write one page for the pager backing NODE, at offset PAGE, into BUF. This + may need to write several filesystem blocks to satisfy one page, and tries + to consolidate the i/o if possible. */ +static error_t +file_pager_write_page (struct node *node, vm_offset_t offset, void *buf) +{ + error_t err = 0; + struct pending_blocks pb; + struct rwlock *lock = &node->dn->alloc_lock; + block_t block; + int left = vm_page_size; + + pending_blocks_init (&pb, buf); + + /* Holding NODE->dn->alloc_lock effectively locks NODE->allocsize, + at least for the cases we care about: pager_unlock_page, + diskfs_grow and diskfs_truncate. */ + rwlock_reader_lock (&node->dn->alloc_lock); + + if (offset >= node->allocsize) + left = 0; + else if (offset + left > node->allocsize) + left = node->allocsize - offset; + + ext2_debug ("writing inode %d page %d[%d]", node->cache_id, offset, left); + + STAT_INC (file_pageouts); + + while (left > 0) + { + err = find_block (node, offset, &block, &lock); + if (err) + break; + assert (block); + pending_blocks_add (&pb, block); + offset += block_size; + left -= block_size; + } + + if (!err) + pending_blocks_write (&pb); + + rwlock_reader_unlock (&node->dn->alloc_lock); + + return err; +} + +static error_t +disk_pager_read_page (vm_offset_t page, void **buf, int *writelock) +{ + error_t err; + size_t length = vm_page_size, read = 0; + vm_size_t dev_end = store->size; + + if (page + vm_page_size > dev_end) + length = dev_end - page; + + err = store_read (store, page >> store->log2_block_size, length, buf, &read); + if (read != length) + return EIO; + if (!err && length != vm_page_size) + bzero ((void *)(*buf + length), vm_page_size - length); + + *writelock = 0; + + return err; +} + +static error_t +disk_pager_write_page (vm_offset_t page, void *buf) +{ + error_t err = 0; + size_t length = vm_page_size, amount; + vm_size_t dev_end = store->size; + + if (page + vm_page_size > dev_end) + length = dev_end - page; + + ext2_debug ("writing disk page %d[%d]", page, length); + + STAT_INC (disk_pageouts); + + if (modified_global_blocks) + /* Be picky about which blocks in a page that we write. */ + { + vm_offset_t offs = page; + struct pending_blocks pb; + + pending_blocks_init (&pb, buf); + + while (length > 0 && !err) + { + block_t block = boffs_block (offs); + + /* We don't clear the block modified bit here because this paging + write request may not be the same one that actually set the bit, + and our copy of the page may be out of date; we have to leave + the bit on in case a paging write request corresponding to the + modification comes along later. The bit is only actually ever + cleared if the block is allocated to a file, so this results in + excess writes of blocks from modified pages. Unfortunately I + know of no way to get arount this given the current external + paging interface. XXXX */ + if (test_bit (block, modified_global_blocks)) + /* This block may have been modified, so write it out. */ + err = pending_blocks_add (&pb, block); + else + /* Otherwise just skip it. */ + err = pending_blocks_skip (&pb); + + offs += block_size; + length -= block_size; + } + + if (!err) + err = pending_blocks_write (&pb); + } + else + { + err = store_write (store, page >> store->log2_block_size, + buf, length, &amount); + if (!err && length != amount) + err = EIO; + } + + return err; +} + +/* Satisfy a pager read request for either the disk pager or file pager + PAGER, to the page at offset PAGE into BUF. WRITELOCK should be set if + the pager should make the page writeable. */ +error_t +pager_read_page (struct user_pager_info *pager, vm_offset_t page, + vm_address_t *buf, int *writelock) +{ + if (pager->type == DISK) + return disk_pager_read_page (page, (void **)buf, writelock); + else + return file_pager_read_page (pager->node, page, (void **)buf, writelock); +} + +/* Satisfy a pager write request for either the disk pager or file pager + PAGER, from the page at offset PAGE from BUF. */ +error_t +pager_write_page (struct user_pager_info *pager, vm_offset_t page, + vm_address_t buf) +{ + if (pager->type == DISK) + return disk_pager_write_page (page, (void *)buf); + else + return file_pager_write_page (pager->node, page, (void *)buf); +} + +/* Make page PAGE writable, at least up to ALLOCSIZE. This function and + diskfs_grow are the only places that blocks are actually added to the + file. */ +error_t +pager_unlock_page (struct user_pager_info *pager, vm_offset_t page) +{ + if (pager->type == DISK) + return 0; + else + { + error_t err; + volatile int partial_page; + struct node *node = pager->node; + struct disknode *dn = node->dn; + + rwlock_writer_lock (&dn->alloc_lock); + + partial_page = (page + vm_page_size > node->allocsize); + + err = diskfs_catch_exception (); + if (!err) + { + block_t block = page >> log2_block_size; + int left = (partial_page ? node->allocsize - page : vm_page_size); + + while (left > 0) + { + block_t disk_block; + err = ext2_getblk (node, block++, 1, &disk_block); + if (err) + break; + left -= block_size; + } + } + diskfs_end_catch_exception (); + + if (partial_page) + /* If an error occurred, this page still isn't writable; otherwise, + since it's at the end of the file, it's now partially writable. */ + dn->last_page_partially_writable = !err; + else if (page + vm_page_size == node->allocsize) + /* This makes the last page writable, which ends exactly at the end + of the file. If any error occurred, the page still isn't + writable, and if not, then the whole thing is writable. */ + dn->last_page_partially_writable = 0; + +#ifdef EXT2FS_DEBUG + if (dn->last_page_partially_writable) + ext2_debug ("made page %u[%lu] in inode %d partially writable", + page, node->allocsize - page, node->cache_id); + else + ext2_debug ("made page %u[%u] in inode %d writable", + page, vm_page_size, node->cache_id); +#endif + + STAT_INC (file_page_unlocks); + + rwlock_writer_unlock (&dn->alloc_lock); + + if (err == ENOSPC) + ext2_warning ("This filesystem is out of space, and will now crash. Bye!"); + else if (err) + ext2_warning ("inode=%d, page=0x%x: %s", + node->cache_id, page, strerror (err)); + + return err; + } +} + +/* Grow the disk allocated to locked node NODE to be at least SIZE bytes, and + set NODE->allocsize to the actual allocated size. (If the allocated size + is already SIZE bytes, do nothing.) CRED identifies the user responsible + for the call. */ +error_t +diskfs_grow (struct node *node, off_t size, struct protid *cred) +{ + diskfs_check_readonly (); + assert (!diskfs_readonly); + + if (size > node->allocsize) + { + error_t err = 0; + off_t old_size; + volatile off_t new_size; + volatile block_t end_block; + block_t new_end_block; + struct disknode *dn = node->dn; + + rwlock_writer_lock (&dn->alloc_lock); + + old_size = node->allocsize; + new_size = round_block (size); + + /* The first unallocated blocks after the old and new ends of the + file, respectively. */ + end_block = old_size >> log2_block_size; + new_end_block = new_size >> log2_block_size; + + if (new_end_block > end_block) + { + /* The first block of the first unallocate page after the old end + of the file. If LAST_PAGE_PARTIALLY_WRITABLE is true, any + blocks between this and END_BLOCK were unallocated, but are + considered `unlocked' -- that is pager_unlock_page has been + called on the page they're in. Since after this grow the pager + will expect them to be writable, we'd better allocate them. */ + block_t old_page_end_block = + round_page (old_size) >> log2_block_size; + + ext2_debug ("growing inode %d to %lu bytes (from %lu)", node->cache_id, + new_size, old_size); + + if (dn->last_page_partially_writable + && old_page_end_block > end_block) + { + volatile block_t writable_end = + (old_page_end_block > new_end_block + ? new_end_block + : old_page_end_block); + + ext2_debug ("extending writable page %u by %ld blocks" + "; first new block = %lu", + trunc_page (old_size), + writable_end - end_block, + end_block); + + err = diskfs_catch_exception (); + while (!err && end_block < writable_end) + { + block_t disk_block; + err = ext2_getblk (node, end_block++, 1, &disk_block); + } + diskfs_end_catch_exception (); + + if (err) + /* Reflect how much we allocated successfully. */ + new_size = (end_block - 1) << log2_block_size; + else + /* See if it's still valid to say this. */ + dn->last_page_partially_writable = + (old_page_end_block > end_block); + } + } + + STAT_INC (file_grows); + + ext2_debug ("new size: %ld%s.", new_size, + dn->last_page_partially_writable + ? " (last page writable)": ""); + if (err) + ext2_warning ("inode=%d, target=%ld: %s", + node->cache_id, new_size, strerror (err)); + + node->allocsize = new_size; + + rwlock_writer_unlock (&dn->alloc_lock); + + return err; + } + else + return 0; +} + +/* This syncs a single file (NODE) to disk. Wait for all I/O to complete + if WAIT is set. NODE->lock must be held. */ +void +diskfs_file_update (struct node *node, int wait) +{ + struct pager *pager; + + spin_lock (&node_to_page_lock); + pager = node->dn->pager; + if (pager) + ports_port_ref (pager); + spin_unlock (&node_to_page_lock); + + if (pager) + { + pager_sync (pager, wait); + ports_port_deref (pager); + } + + pokel_sync (&node->dn->indir_pokel, wait); + + diskfs_node_update (node, wait); +} + +/* Invalidate any pager data associated with NODE. */ +void +flush_node_pager (struct node *node) +{ + struct pager *pager; + struct disknode *dn = node->dn; + + spin_lock (&node_to_page_lock); + pager = dn->pager; + if (pager) + ports_port_ref (pager); + spin_unlock (&node_to_page_lock); + + if (pager) + { + pager_flush (pager, 1); + ports_port_deref (pager); + } +} + + +/* Return in *OFFSET and *SIZE the minimum valid address the pager will + accept and the size of the object. */ +inline error_t +pager_report_extent (struct user_pager_info *pager, + vm_address_t *offset, vm_size_t *size) +{ + assert (pager->type == DISK || pager->type == FILE_DATA); + + *offset = 0; + + if (pager->type == DISK) + *size = store->size; + else + *size = pager->node->allocsize; + + return 0; +} + +/* This is called when a pager is being deallocated after all extant send + rights have been destroyed. */ +void +pager_clear_user_data (struct user_pager_info *upi) +{ + if (upi->type == FILE_DATA) + { + struct pager *pager; + + spin_lock (&node_to_page_lock); + pager = upi->node->dn->pager; + if (pager && pager_get_upi (pager) == upi) + upi->node->dn->pager = 0; + spin_unlock (&node_to_page_lock); + + diskfs_nrele_light (upi->node); + } + + free (upi); +} + +/* This will be called when the ports library wants to drop weak references. + The pager library creates no weak references itself. If the user doesn't + either, then it's OK for this function to do nothing. */ +void +pager_dropweak (struct user_pager_info *p __attribute__ ((unused))) +{ +} + +/* Create the DISK pager. */ +void +create_disk_pager (void) +{ + struct user_pager_info *upi = malloc (sizeof (struct user_pager_info)); + upi->type = DISK; + pager_bucket = ports_create_bucket (); + diskfs_start_disk_pager (upi, pager_bucket, MAY_CACHE, store->size, + &disk_image); +} + +/* Call this to create a FILE_DATA pager and return a send right. + NODE must be locked. */ +mach_port_t +diskfs_get_filemap (struct node *node, vm_prot_t prot) +{ + mach_port_t right; + + assert (S_ISDIR (node->dn_stat.st_mode) + || S_ISREG (node->dn_stat.st_mode) + || (S_ISLNK (node->dn_stat.st_mode))); + + spin_lock (&node_to_page_lock); + do + { + struct pager *pager = node->dn->pager; + if (pager) + { + /* Because PAGER is not a real reference, + this might be nearly deallocated. If that's so, then + the port right will be null. In that case, clear here + and loop. The deallocation will complete separately. */ + right = pager_get_port (pager); + if (right == MACH_PORT_NULL) + node->dn->pager = 0; + else + pager_get_upi (pager)->max_prot |= prot; + } + else + { + struct user_pager_info *upi = + malloc (sizeof (struct user_pager_info)); + upi->type = FILE_DATA; + upi->node = node; + upi->max_prot = 0; + diskfs_nref_light (node); + node->dn->pager = + pager_create (upi, pager_bucket, MAY_CACHE, + MEMORY_OBJECT_COPY_DELAY); + if (node->dn->pager == 0) + { + diskfs_nrele_light (node); + free (upi); + spin_unlock (&node_to_page_lock); + return MACH_PORT_NULL; + } + + right = pager_get_port (node->dn->pager); + ports_port_deref (node->dn->pager); + } + } + while (right == MACH_PORT_NULL); + spin_unlock (&node_to_page_lock); + + mach_port_insert_right (mach_task_self (), right, right, + MACH_MSG_TYPE_MAKE_SEND); + + return right; +} + +/* Call this when we should turn off caching so that unused memory object + ports get freed. */ +void +drop_pager_softrefs (struct node *node) +{ + struct pager *pager; + + spin_lock (&node_to_page_lock); + pager = node->dn->pager; + if (pager) + ports_port_ref (pager); + spin_unlock (&node_to_page_lock); + + if (MAY_CACHE && pager) + pager_change_attributes (pager, 0, MEMORY_OBJECT_COPY_DELAY, 0); + if (pager) + ports_port_deref (pager); +} + +/* Call this when we should turn on caching because it's no longer + important for unused memory object ports to get freed. */ +void +allow_pager_softrefs (struct node *node) +{ + struct pager *pager; + + spin_lock (&node_to_page_lock); + pager = node->dn->pager; + if (pager) + ports_port_ref (pager); + spin_unlock (&node_to_page_lock); + + if (MAY_CACHE && pager) + pager_change_attributes (pager, 1, MEMORY_OBJECT_COPY_DELAY, 0); + if (pager) + ports_port_deref (pager); +} + +/* Call this to find out the struct pager * corresponding to the + FILE_DATA pager of inode IP. This should be used *only* as a subsequent + argument to register_memory_fault_area, and will be deleted when + the kernel interface is fixed. NODE must be locked. */ +struct pager * +diskfs_get_filemap_pager_struct (struct node *node) +{ + /* This is safe because pager can't be cleared; there must be + an active mapping for this to be called. */ + return node->dn->pager; +} + +/* Shutdown all the pagers (except the disk pager). */ +void +diskfs_shutdown_pager () +{ + error_t shutdown_one (void *v_p) + { + struct pager *p = v_p; + if (p != diskfs_disk_pager) + pager_shutdown (p); + return 0; + } + + write_all_disknodes (); + + ports_bucket_iterate (pager_bucket, shutdown_one); + + /* Sync everything on the the disk pager. */ + sync_global (1); + + /* Despite the name of this function, we never actually shutdown the disk + pager, just make sure it's synced. */ +} + +/* Sync all the pagers. */ +void +diskfs_sync_everything (int wait) +{ + error_t sync_one (void *v_p) + { + struct pager *p = v_p; + if (p != diskfs_disk_pager) + pager_sync (p, wait); + return 0; + } + + write_all_disknodes (); + ports_bucket_iterate (pager_bucket, sync_one); + + /* Do things on the the disk pager. */ + sync_global (wait); +} + +static void +disable_caching () +{ + error_t block_cache (void *arg) + { + struct pager *p = arg; + + pager_change_attributes (p, 0, MEMORY_OBJECT_COPY_DELAY, 1); + return 0; + } + + /* Loop through the pagers and turn off caching one by one, + synchronously. That should cause termination of each pager. */ + ports_bucket_iterate (pager_bucket, block_cache); +} + +static void +enable_caching () +{ + error_t enable_cache (void *arg) + { + struct pager *p = arg; + struct user_pager_info *upi = pager_get_upi (p); + + pager_change_attributes (p, 1, MEMORY_OBJECT_COPY_DELAY, 0); + + /* It's possible that we didn't have caching on before, because + the user here is the only reference to the underlying node + (actually, that's quite likely inside this particular + routine), and if that node has no links. So dinkle the node + ref counting scheme here, which will cause caching to be + turned off, if that's really necessary. */ + if (upi->type == FILE_DATA) + { + diskfs_nref (upi->node); + diskfs_nrele (upi->node); + } + + return 0; + } + + ports_bucket_iterate (pager_bucket, enable_cache); +} + +/* Tell diskfs if there are pagers exported, and if none, then + prevent any new ones from showing up. */ +int +diskfs_pager_users () +{ + int npagers = ports_count_bucket (pager_bucket); + + if (npagers <= 1) + return 0; + + if (MAY_CACHE) + { + disable_caching (); + + /* Give it a second; the kernel doesn't actually shutdown + immediately. XXX */ + sleep (1); + + npagers = ports_count_bucket (pager_bucket); + if (npagers <= 1) + return 0; + + /* Darn, there are actual honest users. Turn caching back on, + and return failure. */ + enable_caching (); + } + + ports_enable_bucket (pager_bucket); + + return 1; +} + +/* Return the bitwise or of the maximum prot parameter (the second arg to + diskfs_get_filemap) for all active user pagers. */ +vm_prot_t +diskfs_max_user_pager_prot () +{ + vm_prot_t max_prot = 0; + int npagers = ports_count_bucket (pager_bucket); + + if (npagers > 1) + /* More than just the disk pager. */ + { + error_t add_pager_max_prot (void *v_p) + { + struct pager *p = v_p; + struct user_pager_info *upi = pager_get_upi (p); + if (upi->type == FILE_DATA) + max_prot |= upi->max_prot; + /* Stop iterating if MAX_PROT is as filled as it's going to get. */ + return + (max_prot == (VM_PROT_READ|VM_PROT_WRITE|VM_PROT_EXECUTE)) ? 1 : 0; + } + + disable_caching (); /* Make any silly pagers go away. */ + + /* Give it a second; the kernel doesn't actually shutdown + immediately. XXX */ + sleep (1); + + ports_bucket_iterate (pager_bucket, add_pager_max_prot); + + enable_caching (); + } + + ports_enable_bucket (pager_bucket); + + return max_prot; +} diff --git a/ext2fs/pokel.c b/ext2fs/pokel.c new file mode 100644 index 00000000..85b4d2d1 --- /dev/null +++ b/ext2fs/pokel.c @@ -0,0 +1,172 @@ +/* A data structure to remember modifications to a memory region + + Copyright (C) 1995 Free Software Foundation, Inc. + + Written by Miles Bader <miles@gnu.ai.mit.edu> + + This file is part of the GNU Hurd. + + The GNU Hurd is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2, or (at + your option) any later version. + + The GNU Hurd is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ + +#include "ext2fs.h" + +void +pokel_init (struct pokel *pokel, struct pager *pager, void *image) +{ + pokel->lock = SPIN_LOCK_INITIALIZER; + pokel->pokes = NULL; + pokel->free_pokes = NULL; + pokel->pager = pager; + pokel->image = image; +} + +/* Clean up any state associated with POKEL (but don't free POKEL). */ +void +pokel_finalize (struct pokel *pokel) +{ + struct poke *pl, *next; + for (pl = pokel->pokes; pl; pl = next) + { + next = pl->next; + free (pl); + } + for (pl = pokel->free_pokes; pl; pl = next) + { + next = pl->next; + free (pl); + } +} + +/* Remember that data here on the disk has been modified. */ +void +pokel_add (struct pokel *pokel, void *loc, vm_size_t length) +{ + struct poke *pl; + vm_offset_t offset = trunc_page (loc - pokel->image); + vm_offset_t end = round_page (loc + length - pokel->image); + + ext2_debug ("adding %p[%ul] (range 0x%x to 0x%x)", loc, length, offset, end); + + spin_lock (&pokel->lock); + + pl = pokel->pokes; + while (pl != NULL) + { + vm_offset_t p_offs = pl->offset; + vm_size_t p_end = p_offs + pl->length; + + if (p_offs == offset && p_end == end) + break; + else if (p_end >= offset && end >= p_offs) + { + pl->offset = offset < p_offs ? offset : p_offs; + pl->length = (end > p_end ? end : p_end) - pl->offset; + ext2_debug ("extended 0x%x[%ul] to 0x%x[%ul]", + p_offs, p_end - p_offs, pl->offset, pl->length); + break; + } + + pl = pl->next; + } + + if (pl == NULL) + { + pl = pokel->free_pokes; + if (pl == NULL) + { + pl = malloc (sizeof (struct poke)); + assert (pl); + } + else + pokel->free_pokes = pl->next; + pl->offset = offset; + pl->length = end - offset; + pl->next = pokel->pokes; + pokel->pokes = pl; + } + + spin_unlock (&pokel->lock); +} + +/* Move all pending pokes from POKEL into its free list. If SYNC is true, + otherwise do nothing. */ +void +_pokel_exec (struct pokel *pokel, int sync, int wait) +{ + struct poke *pl, *pokes, *last = NULL; + + spin_lock (&pokel->lock); + pokes = pokel->pokes; + pokel->pokes = NULL; + spin_unlock (&pokel->lock); + + for (pl = pokes; pl; last = pl, pl = pl->next) + if (sync) + { + ext2_debug ("syncing 0x%x[%ul]", pl->offset, pl->length); + pager_sync_some (pokel->pager, pl->offset, pl->length, wait); + } + + if (last) + { + spin_lock (&pokel->lock); + last->next = pokel->free_pokes; + pokel->free_pokes = pokes; + spin_unlock (&pokel->lock); + } +} + +/* Sync all the modified pieces of disk */ +void +pokel_sync (struct pokel *pokel, int wait) +{ + _pokel_exec (pokel, 1, wait); +} + +/* Flush (that is, drop on the ground) all pending pokes in POKEL. */ +void +pokel_flush (struct pokel *pokel) +{ + _pokel_exec (pokel, 0, 0); +} + +/* Transfer all regions from FROM to POKEL, which must have the same pager. */ +void +pokel_inherit (struct pokel *pokel, struct pokel *from) +{ + struct poke *pokes, *last; + + assert (pokel->pager == from->pager); + assert (pokel->image == from->image); + + /* Take all pokes from FROM... */ + spin_lock (&from->lock); + pokes = from->pokes; + from->pokes = NULL; + spin_unlock (&from->lock); + + /* And put them in POKEL. */ + spin_lock (&pokel->lock); + last = pokel->pokes; + if (last) + { + while (last->next) + last = last->next; + last->next = pokes; + } + else + pokel->pokes = pokes; + spin_unlock (&pokel->lock); +} diff --git a/ext2fs/sblock.words b/ext2fs/sblock.words new file mode 100644 index 00000000..e17b8fa3 --- /dev/null +++ b/ext2fs/sblock.words @@ -0,0 +1,6 @@ +inodes blocks r_blocks free_blocks +free_inodes first_dblock log_block_size log_frag_size +blocks/group frags/group inodes/group mtime +wtime mnt_cnt;max magic;state errors;pad +lastcheck check_int creator_os rev_level +res_uid;gid diff --git a/ext2fs/storeinfo.c b/ext2fs/storeinfo.c new file mode 100644 index 00000000..ce7bc534 --- /dev/null +++ b/ext2fs/storeinfo.c @@ -0,0 +1,131 @@ +/* Access to file layout information + + Copyright (C) 1996 Free Software Foundation, Inc. + + Written by Miles Bader <miles@gnu.ai.mit.edu> + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2, or (at + your option) any later version. + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ + +#include <string.h> +#include <hurd/store.h> + +#include "ext2fs.h" + +error_t +diskfs_S_file_get_storage_info (struct protid *cred, + mach_port_t **ports, + mach_msg_type_name_t *ports_type, + mach_msg_type_number_t *num_ports, + int **ints, mach_msg_type_number_t *num_ints, + off_t **offsets, + mach_msg_type_number_t *num_offsets, + char **data, mach_msg_type_number_t *data_len) +{ + error_t err = 0; + unsigned num_fs_blocks; + struct store *file_store; + struct store_run *runs, *run = 0; + block_t index = 0; + size_t num_runs = 0, runs_alloced = 10; + struct node *node = cred->po->np; + + runs = malloc (runs_alloced * sizeof (struct store_run)); + if (! runs) + return ENOMEM; + + mutex_lock (&node->lock); + + /* NUM_FS_BLOCKS counts down the blocks in the file that we've not + enumerated yet; when it hits zero, we can stop. */ + if (node->dn_stat.st_size < node->dn_stat.st_blocks * 512) + /* The value indicated by st_blocks is too big (because it includes + indirect blocks), so use the size of the file. */ + num_fs_blocks = + (node->dn_stat.st_size + block_size - 1) >> log2_block_size; + else + num_fs_blocks = node->dn_stat.st_blocks >> log2_stat_blocks_per_fs_block; + + while (num_fs_blocks-- > 0) + { + block_t block; + + err = ext2_getblk (node, index++, 0, &block); + if (err == EINVAL) + /* Either a hole, or past the end of the file. */ + { + block = 0; + err = 0; + } + else if (err) + break; + + block <<= log2_dev_blocks_per_fs_block; + if (num_runs == 0 + || ((block && run->start >= 0) /* Neither is a hole and... */ + ? (block != run->start + run->length) /* BLOCK doesn't follow RUN */ + : (block || run->start >= 0))) /* or one is, but not both */ + /* Add a new run. */ + { + if (num_runs == runs_alloced) + /* Make some more space in RUNS. */ + { + struct store_run *new; + runs_alloced *= 2; + new = realloc (runs, runs_alloced * sizeof (struct store_run)); + if (! new) + { + err = ENOMEM; + break; + } + runs = new; + } + + run = runs + num_runs++; + run->start = block ?: -1; /* -1 means a hole in OFFSETS */ + run->length = 0; /* will get extended just below */ + } + + /* Increase the size of the current run by one filesystem block. */ + run->length += 1 << log2_dev_blocks_per_fs_block; + } + + mutex_unlock (&node->lock); + + if (! err) + err = store_clone (store, &file_store); + if (! err) + { + err = store_remap (file_store, runs, num_runs, &file_store); + if (!err + && !idvec_contains (cred->user->uids, 0) + && !store_is_securely_returnable (file_store, cred->po->openstat)) + { + err = store_set_flags (file_store, STORE_INACTIVE); + if (err == EINVAL) + err = EACCES; + } + if (! err) + { + *ports_type = MACH_MSG_TYPE_COPY_SEND; + err = store_return (file_store, ports, num_ports, ints, num_ints, + offsets, num_offsets, data, data_len); + } + store_free (file_store); + } + + free (runs); + + return err; +} diff --git a/ext2fs/truncate.c b/ext2fs/truncate.c new file mode 100644 index 00000000..336981cc --- /dev/null +++ b/ext2fs/truncate.c @@ -0,0 +1,365 @@ +/* File truncation + + Copyright (C) 1995, 1996, 1997, 1999 Free Software Foundation, Inc. + + Written by Miles Bader <miles@gnu.ai.mit.edu> + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2, or (at + your option) any later version. + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ + +#include "ext2fs.h" + +#ifdef DONT_CACHE_MEMORY_OBJECTS +#define MAY_CACHE 0 +#else +#define MAY_CACHE 1 +#endif + +/* ---------------------------------------------------------------- */ + +/* A sequence of blocks to be freed in NODE. */ +struct free_block_run +{ + block_t first_block; + unsigned long num_blocks; + struct node *node; +}; + +/* Initialize FBR, pointing to NODE. */ +static inline void +free_block_run_init (struct free_block_run *fbr, struct node *node) +{ + fbr->num_blocks = 0; + fbr->node = node; +} + +static inline void +_free_block_run_flush (struct free_block_run *fbr, unsigned long count) +{ + fbr->node->dn_stat.st_blocks -= count << log2_stat_blocks_per_fs_block; + fbr->node->dn_stat_dirty = 1; + ext2_free_blocks (fbr->first_block, count); +} + +/* Add BLOCK to the list of blocks to be freed in FBR. */ +static inline void +free_block_run_add (struct free_block_run *fbr, block_t block) +{ + unsigned long count = fbr->num_blocks; + if (count == 0) + { + fbr->first_block = block; + fbr->num_blocks++; + } + else if (count > 0 && fbr->first_block == block - count) + fbr->num_blocks++; + else + { + _free_block_run_flush (fbr, count); + fbr->first_block = block; + fbr->num_blocks = 1; + } +} + +/* If *P is non-zero, set it to zero, and add the block it pointed to the + list of blocks to be freed in FBR. */ +static inline void +free_block_run_free_ptr (struct free_block_run *fbr, block_t *p) +{ + block_t block = *p; + if (block) + { + *p = 0; + free_block_run_add (fbr, block); + } +} + +/* Free any blocks left in FBR, and cleanup any resources it's using. */ +static inline void +free_block_run_finish (struct free_block_run *fbr) +{ + unsigned long count = fbr->num_blocks; + if (count > 0) + _free_block_run_flush (fbr, count); +} + +/* ---------------------------------------------------------------- */ + +/* Free any direct blocks starting with block END. */ +static void +trunc_direct (struct node *node, block_t end, struct free_block_run *fbr) +{ + block_t *blocks = node->dn->info.i_data; + + ext2_debug ("truncating direct blocks from %ld", end); + + while (end < EXT2_NDIR_BLOCKS) + free_block_run_free_ptr (fbr, blocks + end++); +} + +/* Free any blocks in NODE greater than or equal to END that are rooted in + the indirect block *P; OFFSET should be the block position that *P + corresponds to. For each block pointer in *P that should be freed, + FREE_BLOCK is called with a pointer to the entry for that block, and the + index of the entry within *P. If every block in *P is freed, then *P is + set to 0, otherwise it is left alone. */ +static void +trunc_indirect (struct node *node, block_t end, + block_t *p, block_t offset, + void (*free_block)(block_t *p, unsigned index), + struct free_block_run *fbr) +{ + if (*p) + { + unsigned index; + int modified = 0, all_freed = 1; + block_t *ind_bh = (block_t *)bptr (*p); + unsigned first = end < offset ? 0 : end - offset; + + for (index = first; index < addr_per_block; index++) + if (ind_bh[index]) + { + (*free_block)(ind_bh + index, index); + if (ind_bh[index]) + all_freed = 0; /* Some descendent hasn't been freed. */ + else + modified = 1; + } + + if (first == 0 && all_freed) + free_block_run_free_ptr (fbr, p); + else if (modified) + record_indir_poke (node, ind_bh); + } +} + +static void +trunc_single_indirect (struct node *node, block_t end, + block_t *p, block_t offset, + struct free_block_run *fbr) +{ + void free_block (block_t *p, unsigned index) + { + free_block_run_free_ptr (fbr, p); + } + trunc_indirect (node, end, p, offset, free_block, fbr); +} + +static void +trunc_double_indirect (struct node *node, block_t end, + block_t *p, block_t offset, + struct free_block_run *fbr) +{ + void free_block (block_t *p, unsigned index) + { + block_t entry_offs = offset + (index * addr_per_block); + trunc_single_indirect (node, end, p, entry_offs, fbr); + } + trunc_indirect (node, end, p, offset, free_block, fbr); +} + +static void +trunc_triple_indirect (struct node *node, block_t end, + block_t *p, block_t offset, + struct free_block_run *fbr) +{ + void free_block (block_t *p, unsigned index) + { + block_t entry_offs = offset + (index * addr_per_block * addr_per_block); + trunc_double_indirect (node, end, p, entry_offs, fbr); + } + trunc_indirect (node, end, p, offset, free_block, fbr); +} + +/* ---------------------------------------------------------------- */ + +/* Write something to each page from START to END inclusive of memory + object OBJ, but make sure the data doesns't actually change. */ +static void +poke_pages (memory_object_t obj, vm_offset_t start, vm_offset_t end) +{ + while (start < end) + { + error_t err; + vm_size_t len = 8 * vm_page_size; + vm_address_t addr = 0; + + if (len > end - start) + len = end - start; + + err = vm_map (mach_task_self (), &addr, len, 0, 1, obj, start, 0, + VM_PROT_WRITE|VM_PROT_READ, VM_PROT_READ|VM_PROT_WRITE, 0); + if (!err) + { + vm_address_t poke; + for (poke = addr; poke < addr + len; poke += vm_page_size) + *(volatile int *)poke = *(volatile int *)poke; + munmap ((caddr_t) addr, len); + } + + start += len; + } +} + +/* Flush all the data past the new size from the kernel. Also force any + delayed copies of this data to take place immediately. (We are implicitly + changing the data to zeros and doing it without the kernel's immediate + knowledge; accordingl we must help out the kernel thusly.) */ +static void +force_delayed_copies (struct node *node, off_t length) +{ + struct pager *pager; + + spin_lock (&node_to_page_lock); + pager = node->dn->pager; + if (pager) + ports_port_ref (pager); + spin_unlock (&node_to_page_lock); + + if (pager) + { + mach_port_t obj; + + pager_change_attributes (pager, MAY_CACHE, MEMORY_OBJECT_COPY_NONE, 1); + obj = diskfs_get_filemap (node, VM_PROT_READ); + if (obj != MACH_PORT_NULL) + { + /* XXX should cope with errors from diskfs_get_filemap */ + poke_pages (obj, round_page (length), round_page (node->allocsize)); + mach_port_deallocate (mach_task_self (), obj); + pager_flush_some (pager, round_page(length), + node->allocsize - length, 1); + } + + ports_port_deref (pager); + } +} + +static void +enable_delayed_copies (struct node *node) +{ + struct pager *pager; + + spin_lock (&node_to_page_lock); + pager = node->dn->pager; + if (pager) + ports_port_ref (pager); + spin_unlock (&node_to_page_lock); + + if (pager) + { + pager_change_attributes (pager, MAY_CACHE, MEMORY_OBJECT_COPY_DELAY, 0); + ports_port_deref (pager); + } +} + +/* ---------------------------------------------------------------- */ + +/* The user must define this function. Truncate locked node NODE to be SIZE + bytes long. (If NODE is already less than or equal to SIZE bytes + long, do nothing.) If this is a symlink (and diskfs_shortcut_symlink + is set) then this should clear the symlink, even if + diskfs_create_symlink_hook stores the link target elsewhere. */ +error_t +diskfs_truncate (struct node *node, off_t length) +{ + error_t err; + off_t offset; + + diskfs_check_readonly (); + assert (!diskfs_readonly); + + if (length >= node->dn_stat.st_size) + return 0; + + if (! node->dn_stat.st_blocks) + /* There aren't really any blocks allocated, so just frob the size. This + is true for fast symlinks, and also apparently for some device nodes + in linux. */ + { + node->dn_stat.st_size = length; + node->dn_set_mtime = 1; + node->dn_set_ctime = 1; + diskfs_node_update (node, 1); + return 0; + } + + /* + * If the file is not being truncated to a block boundary, the + * contents of the partial block following the end of the file must be + * zeroed in case it ever becomes accessible again because of + * subsequent file growth. + */ + offset = length % block_size; + if (offset > 0) + { + diskfs_node_rdwr (node, (void *)zeroblock, length, block_size - offset, + 1, 0, 0); + diskfs_file_update (node, 1); + } + + ext2_discard_prealloc(node); + + force_delayed_copies (node, length); + + rwlock_writer_lock (&node->dn->alloc_lock); + + /* Update the size on disk; fsck will finish freeing blocks if necessary + should we crash. */ + node->dn_stat.st_size = length; + node->dn_set_mtime = 1; + node->dn_set_ctime = 1; + diskfs_node_update (node, 1); + + err = diskfs_catch_exception (); + if (!err) + { + block_t end = boffs_block (round_block (length)), offs; + block_t *bptrs = node->dn->info.i_data; + struct free_block_run fbr; + + free_block_run_init (&fbr, node); + + trunc_direct (node, end, &fbr); + + offs = EXT2_NDIR_BLOCKS; + trunc_single_indirect (node, end, bptrs + EXT2_IND_BLOCK, offs, &fbr); + offs += addr_per_block; + trunc_double_indirect (node, end, bptrs + EXT2_DIND_BLOCK, offs, &fbr); + offs += addr_per_block * addr_per_block; + trunc_triple_indirect (node, end, bptrs + EXT2_TIND_BLOCK, offs, &fbr); + + free_block_run_finish (&fbr); + + node->allocsize = round_block (length); + + /* Set our last_page_partially_writable to a pessimistic state -- it + won't hurt if is wrong. */ + node->dn->last_page_partially_writable = + trunc_page (node->allocsize) != node->allocsize; + + diskfs_end_catch_exception (); + } + + node->dn_set_mtime = 1; + node->dn_set_ctime = 1; + node->dn_stat_dirty = 1; + + /* Now we can permit delayed copies again. */ + enable_delayed_copies (node); + + rwlock_writer_unlock (&node->dn->alloc_lock); + + return err; +} |