root/kernel/fs.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. readsb
  2. fsinit
  3. bzero
  4. balloc
  5. bfree
  6. iinit
  7. ialloc
  8. iupdate
  9. iget
  10. idup
  11. ilock
  12. iunlock
  13. iput
  14. iunlockput
  15. ireclaim
  16. bmap
  17. itrunc
  18. stati
  19. readi
  20. writei
  21. namecmp
  22. dirlookup
  23. dirlink
  24. skipelem
  25. namex
  26. namei
  27. nameiparent

   1 // File system implementation.  Five layers:
   2 //   + Blocks: allocator for raw disk blocks.
   3 //   + Log: crash recovery for multi-step updates.
   4 //   + Files: inode allocator, reading, writing, metadata.
   5 //   + Directories: inode with special contents (list of other inodes!)
   6 //   + Names: paths like /usr/rtm/xv6/fs.c for convenient naming.
   7 //
   8 // This file contains the low-level file system manipulation
   9 // routines.  The (higher-level) system call implementations
  10 // are in sysfile.c.
  11 
  12 #include "types.h"
  13 #include "riscv.h"
  14 #include "defs.h"
  15 #include "param.h"
  16 #include "stat.h"
  17 #include "spinlock.h"
  18 #include "proc.h"
  19 #include "sleeplock.h"
  20 #include "fs.h"
  21 #include "buf.h"
  22 #include "file.h"
  23 
  24 #define min(a, b) ((a) < (b) ? (a) : (b))
  25 // there should be one superblock per disk device, but we run with
  26 // only one device
  27 struct superblock sb; 
  28 
  29 // Read the super block.
  30 static void
  31 readsb(int dev, struct superblock *sb)
  32 {
  33   struct buf *bp;
  34 
  35   bp = bread(dev, 1);
  36   memmove(sb, bp->data, sizeof(*sb));
  37   brelse(bp);
  38 }
  39 
  40 // Init fs
  41 void
  42 fsinit(int dev) {
  43   readsb(dev, &sb);
  44   if(sb.magic != FSMAGIC)
  45     panic("invalid file system");
  46   initlog(dev, &sb);
  47   ireclaim(dev);
  48 }
  49 
  50 // Zero a block.
  51 static void
  52 bzero(int dev, int bno)
  53 {
  54   struct buf *bp;
  55 
  56   bp = bread(dev, bno);
  57   memset(bp->data, 0, BSIZE);
  58   log_write(bp);
  59   brelse(bp);
  60 }
  61 
  62 // Blocks.
  63 
  64 // Allocate a zeroed disk block.
  65 // returns 0 if out of disk space.
  66 static uint
  67 balloc(uint dev)
  68 {
  69   int b, bi, m;
  70   struct buf *bp;
  71 
  72   bp = 0;
  73   for(b = 0; b < sb.size; b += BPB){
  74     bp = bread(dev, BBLOCK(b, sb));
  75     for(bi = 0; bi < BPB && b + bi < sb.size; bi++){
  76       m = 1 << (bi % 8);
  77       if((bp->data[bi/8] & m) == 0){  // Is block free?
  78         bp->data[bi/8] |= m;  // Mark block in use.
  79         log_write(bp);
  80         brelse(bp);
  81         bzero(dev, b + bi);
  82         return b + bi;
  83       }
  84     }
  85     brelse(bp);
  86   }
  87   printf("balloc: out of blocks\n");
  88   return 0;
  89 }
  90 
  91 // Free a disk block.
  92 static void
  93 bfree(int dev, uint b)
  94 {
  95   struct buf *bp;
  96   int bi, m;
  97 
  98   bp = bread(dev, BBLOCK(b, sb));
  99   bi = b % BPB;
 100   m = 1 << (bi % 8);
 101   if((bp->data[bi/8] & m) == 0)
 102     panic("freeing free block");
 103   bp->data[bi/8] &= ~m;
 104   log_write(bp);
 105   brelse(bp);
 106 }
 107 
 108 // Inodes.
 109 //
 110 // An inode describes a single unnamed file.
 111 // The inode disk structure holds metadata: the file's type,
 112 // its size, the number of links referring to it, and the
 113 // list of blocks holding the file's content.
 114 //
 115 // The inodes are laid out sequentially on disk at block
 116 // sb.inodestart. Each inode has a number, indicating its
 117 // position on the disk.
 118 //
 119 // The kernel keeps a table of in-use inodes in memory
 120 // to provide a place for synchronizing access
 121 // to inodes used by multiple processes. The in-memory
 122 // inodes include book-keeping information that is
 123 // not stored on disk: ip->ref and ip->valid.
 124 //
 125 // An inode and its in-memory representation go through a
 126 // sequence of states before they can be used by the
 127 // rest of the file system code.
 128 //
 129 // * Allocation: an inode is allocated if its type (on disk)
 130 //   is non-zero. ialloc() allocates, and iput() frees if
 131 //   the reference and link counts have fallen to zero.
 132 //
 133 // * Referencing in table: an entry in the inode table
 134 //   is free if ip->ref is zero. Otherwise ip->ref tracks
 135 //   the number of in-memory pointers to the entry (open
 136 //   files and current directories). iget() finds or
 137 //   creates a table entry and increments its ref; iput()
 138 //   decrements ref.
 139 //
 140 // * Valid: the information (type, size, &c) in an inode
 141 //   table entry is only correct when ip->valid is 1.
 142 //   ilock() reads the inode from
 143 //   the disk and sets ip->valid, while iput() clears
 144 //   ip->valid if ip->ref has fallen to zero.
 145 //
 146 // * Locked: file system code may only examine and modify
 147 //   the information in an inode and its content if it
 148 //   has first locked the inode.
 149 //
 150 // Thus a typical sequence is:
 151 //   ip = iget(dev, inum)
 152 //   ilock(ip)
 153 //   ... examine and modify ip->xxx ...
 154 //   iunlock(ip)
 155 //   iput(ip)
 156 //
 157 // ilock() is separate from iget() so that system calls can
 158 // get a long-term reference to an inode (as for an open file)
 159 // and only lock it for short periods (e.g., in read()).
 160 // The separation also helps avoid deadlock and races during
 161 // pathname lookup. iget() increments ip->ref so that the inode
 162 // stays in the table and pointers to it remain valid.
 163 //
 164 // Many internal file system functions expect the caller to
 165 // have locked the inodes involved; this lets callers create
 166 // multi-step atomic operations.
 167 //
 168 // The itable.lock spin-lock protects the allocation of itable
 169 // entries. Since ip->ref indicates whether an entry is free,
 170 // and ip->dev and ip->inum indicate which i-node an entry
 171 // holds, one must hold itable.lock while using any of those fields.
 172 //
 173 // An ip->lock sleep-lock protects all ip-> fields other than ref,
 174 // dev, and inum.  One must hold ip->lock in order to
 175 // read or write that inode's ip->valid, ip->size, ip->type, &c.
 176 
 177 struct {
 178   struct spinlock lock;
 179   struct inode inode[NINODE];
 180 } itable;
 181 
 182 void
 183 iinit()
 184 {
 185   int i = 0;
 186   
 187   initlock(&itable.lock, "itable");
 188   for(i = 0; i < NINODE; i++) {
 189     initsleeplock(&itable.inode[i].lock, "inode");
 190   }
 191 }
 192 
 193 static struct inode* iget(uint dev, uint inum);
 194 
 195 // Allocate an inode on device dev.
 196 // Mark it as allocated by  giving it type type.
 197 // Returns an unlocked but allocated and referenced inode,
 198 // or NULL if there is no free inode.
 199 struct inode*
 200 ialloc(uint dev, short type)
 201 {
 202   int inum;
 203   struct buf *bp;
 204   struct dinode *dip;
 205 
 206   for(inum = 1; inum < sb.ninodes; inum++){
 207     bp = bread(dev, IBLOCK(inum, sb));
 208     dip = (struct dinode*)bp->data + inum%IPB;
 209     if(dip->type == 0){  // a free inode
 210       memset(dip, 0, sizeof(*dip));
 211       dip->type = type;
 212       log_write(bp);   // mark it allocated on the disk
 213       brelse(bp);
 214       return iget(dev, inum);
 215     }
 216     brelse(bp);
 217   }
 218   printf("ialloc: no inodes\n");
 219   return 0;
 220 }
 221 
 222 // Copy a modified in-memory inode to disk.
 223 // Must be called after every change to an ip->xxx field
 224 // that lives on disk.
 225 // Caller must hold ip->lock.
 226 void
 227 iupdate(struct inode *ip)
 228 {
 229   struct buf *bp;
 230   struct dinode *dip;
 231 
 232   bp = bread(ip->dev, IBLOCK(ip->inum, sb));
 233   dip = (struct dinode*)bp->data + ip->inum%IPB;
 234   dip->type = ip->type;
 235   dip->major = ip->major;
 236   dip->minor = ip->minor;
 237   dip->nlink = ip->nlink;
 238   dip->size = ip->size;
 239   memmove(dip->addrs, ip->addrs, sizeof(ip->addrs));
 240   log_write(bp);
 241   brelse(bp);
 242 }
 243 
 244 // Find the inode with number inum on device dev
 245 // and return the in-memory copy. Does not lock
 246 // the inode and does not read it from disk.
 247 static struct inode*
 248 iget(uint dev, uint inum)
 249 {
 250   struct inode *ip, *empty;
 251 
 252   acquire(&itable.lock);
 253 
 254   // Is the inode already in the table?
 255   empty = 0;
 256   for(ip = &itable.inode[0]; ip < &itable.inode[NINODE]; ip++){
 257     if(ip->ref > 0 && ip->dev == dev && ip->inum == inum){
 258       ip->ref++;
 259       release(&itable.lock);
 260       return ip;
 261     }
 262     if(empty == 0 && ip->ref == 0)    // Remember empty slot.
 263       empty = ip;
 264   }
 265 
 266   // Recycle an inode entry.
 267   if(empty == 0)
 268     panic("iget: no inodes");
 269 
 270   ip = empty;
 271   ip->dev = dev;
 272   ip->inum = inum;
 273   ip->ref = 1;
 274   ip->valid = 0;
 275   release(&itable.lock);
 276 
 277   return ip;
 278 }
 279 
 280 // Increment reference count for ip.
 281 // Returns ip to enable ip = idup(ip1) idiom.
 282 struct inode*
 283 idup(struct inode *ip)
 284 {
 285   acquire(&itable.lock);
 286   ip->ref++;
 287   release(&itable.lock);
 288   return ip;
 289 }
 290 
 291 // Lock the given inode.
 292 // Reads the inode from disk if necessary.
 293 void
 294 ilock(struct inode *ip)
 295 {
 296   struct buf *bp;
 297   struct dinode *dip;
 298 
 299   if(ip == 0 || ip->ref < 1)
 300     panic("ilock");
 301 
 302   acquiresleep(&ip->lock);
 303 
 304   if(ip->valid == 0){
 305     bp = bread(ip->dev, IBLOCK(ip->inum, sb));
 306     dip = (struct dinode*)bp->data + ip->inum%IPB;
 307     ip->type = dip->type;
 308     ip->major = dip->major;
 309     ip->minor = dip->minor;
 310     ip->nlink = dip->nlink;
 311     ip->size = dip->size;
 312     memmove(ip->addrs, dip->addrs, sizeof(ip->addrs));
 313     brelse(bp);
 314     ip->valid = 1;
 315     if(ip->type == 0)
 316       panic("ilock: no type");
 317   }
 318 }
 319 
 320 // Unlock the given inode.
 321 void
 322 iunlock(struct inode *ip)
 323 {
 324   if(ip == 0 || !holdingsleep(&ip->lock) || ip->ref < 1)
 325     panic("iunlock");
 326 
 327   releasesleep(&ip->lock);
 328 }
 329 
 330 // Drop a reference to an in-memory inode.
 331 // If that was the last reference, the inode table entry can
 332 // be recycled.
 333 // If that was the last reference and the inode has no links
 334 // to it, free the inode (and its content) on disk.
 335 // All calls to iput() must be inside a transaction in
 336 // case it has to free the inode.
 337 void
 338 iput(struct inode *ip)
 339 {
 340   acquire(&itable.lock);
 341 
 342   if(ip->ref == 1 && ip->valid && ip->nlink == 0){
 343     // inode has no links and no other references: truncate and free.
 344 
 345     // ip->ref == 1 means no other process can have ip locked,
 346     // so this acquiresleep() won't block (or deadlock).
 347     acquiresleep(&ip->lock);
 348 
 349     release(&itable.lock);
 350 
 351     itrunc(ip);
 352     ip->type = 0;
 353     iupdate(ip);
 354     ip->valid = 0;
 355 
 356     releasesleep(&ip->lock);
 357 
 358     acquire(&itable.lock);
 359   }
 360 
 361   ip->ref--;
 362   release(&itable.lock);
 363 }
 364 
 365 // Common idiom: unlock, then put.
 366 void
 367 iunlockput(struct inode *ip)
 368 {
 369   iunlock(ip);
 370   iput(ip);
 371 }
 372 
 373 void
 374 ireclaim(int dev)
 375 {
 376   for (int inum = 1; inum < sb.ninodes; inum++) {
 377     struct inode *ip = 0;
 378     struct buf *bp = bread(dev, IBLOCK(inum, sb));
 379     struct dinode *dip = (struct dinode *)bp->data + inum % IPB;
 380     if (dip->type != 0 && dip->nlink == 0) {  // is an orphaned inode
 381       printf("ireclaim: orphaned inode %d\n", inum);
 382       ip = iget(dev, inum);
 383     }
 384     brelse(bp);
 385     if (ip) {
 386       begin_op();
 387       ilock(ip);
 388       iunlock(ip);
 389       iput(ip);
 390       end_op();
 391     }
 392   }
 393 }
 394 
 395 // Inode content
 396 //
 397 // The content (data) associated with each inode is stored
 398 // in blocks on the disk. The first NDIRECT block numbers
 399 // are listed in ip->addrs[].  The next NINDIRECT blocks are
 400 // listed in block ip->addrs[NDIRECT].
 401 
 402 // Return the disk block address of the nth block in inode ip.
 403 // If there is no such block, bmap allocates one.
 404 // returns 0 if out of disk space.
 405 static uint
 406 bmap(struct inode *ip, uint bn)
 407 {
 408   uint addr, *a;
 409   struct buf *bp;
 410 
 411   if(bn < NDIRECT){
 412     if((addr = ip->addrs[bn]) == 0){
 413       addr = balloc(ip->dev);
 414       if(addr == 0)
 415         return 0;
 416       ip->addrs[bn] = addr;
 417     }
 418     return addr;
 419   }
 420   bn -= NDIRECT;
 421 
 422   if(bn < NINDIRECT){
 423     // Load indirect block, allocating if necessary.
 424     if((addr = ip->addrs[NDIRECT]) == 0){
 425       addr = balloc(ip->dev);
 426       if(addr == 0)
 427         return 0;
 428       ip->addrs[NDIRECT] = addr;
 429     }
 430     bp = bread(ip->dev, addr);
 431     a = (uint*)bp->data;
 432     if((addr = a[bn]) == 0){
 433       addr = balloc(ip->dev);
 434       if(addr){
 435         a[bn] = addr;
 436         log_write(bp);
 437       }
 438     }
 439     brelse(bp);
 440     return addr;
 441   }
 442 
 443   panic("bmap: out of range");
 444 }
 445 
 446 // Truncate inode (discard contents).
 447 // Caller must hold ip->lock.
 448 void
 449 itrunc(struct inode *ip)
 450 {
 451   int i, j;
 452   struct buf *bp;
 453   uint *a;
 454 
 455   for(i = 0; i < NDIRECT; i++){
 456     if(ip->addrs[i]){
 457       bfree(ip->dev, ip->addrs[i]);
 458       ip->addrs[i] = 0;
 459     }
 460   }
 461 
 462   if(ip->addrs[NDIRECT]){
 463     bp = bread(ip->dev, ip->addrs[NDIRECT]);
 464     a = (uint*)bp->data;
 465     for(j = 0; j < NINDIRECT; j++){
 466       if(a[j])
 467         bfree(ip->dev, a[j]);
 468     }
 469     brelse(bp);
 470     bfree(ip->dev, ip->addrs[NDIRECT]);
 471     ip->addrs[NDIRECT] = 0;
 472   }
 473 
 474   ip->size = 0;
 475   iupdate(ip);
 476 }
 477 
 478 // Copy stat information from inode.
 479 // Caller must hold ip->lock.
 480 void
 481 stati(struct inode *ip, struct stat *st)
 482 {
 483   st->dev = ip->dev;
 484   st->ino = ip->inum;
 485   st->type = ip->type;
 486   st->nlink = ip->nlink;
 487   st->size = ip->size;
 488 }
 489 
 490 // Read data from inode.
 491 // Caller must hold ip->lock.
 492 // If user_dst==1, then dst is a user virtual address;
 493 // otherwise, dst is a kernel address.
 494 int
 495 readi(struct inode *ip, int user_dst, uint64 dst, uint off, uint n)
 496 {
 497   uint tot, m;
 498   struct buf *bp;
 499 
 500   if(off > ip->size || off + n < off)
 501     return 0;
 502   if(off + n > ip->size)
 503     n = ip->size - off;
 504 
 505   for(tot=0; tot<n; tot+=m, off+=m, dst+=m){
 506     uint addr = bmap(ip, off/BSIZE);
 507     if(addr == 0)
 508       break;
 509     bp = bread(ip->dev, addr);
 510     m = min(n - tot, BSIZE - off%BSIZE);
 511     if(either_copyout(user_dst, dst, bp->data + (off % BSIZE), m) == -1) {
 512       brelse(bp);
 513       tot = -1;
 514       break;
 515     }
 516     brelse(bp);
 517   }
 518   return tot;
 519 }
 520 
 521 // Write data to inode.
 522 // Caller must hold ip->lock.
 523 // If user_src==1, then src is a user virtual address;
 524 // otherwise, src is a kernel address.
 525 // Returns the number of bytes successfully written.
 526 // If the return value is less than the requested n,
 527 // there was an error of some kind.
 528 int
 529 writei(struct inode *ip, int user_src, uint64 src, uint off, uint n)
 530 {
 531   uint tot, m;
 532   struct buf *bp;
 533 
 534   if(off > ip->size || off + n < off)
 535     return -1;
 536   if(off + n > MAXFILE*BSIZE)
 537     return -1;
 538 
 539   for(tot=0; tot<n; tot+=m, off+=m, src+=m){
 540     uint addr = bmap(ip, off/BSIZE);
 541     if(addr == 0)
 542       break;
 543     bp = bread(ip->dev, addr);
 544     m = min(n - tot, BSIZE - off%BSIZE);
 545     if(either_copyin(bp->data + (off % BSIZE), user_src, src, m) == -1) {
 546       brelse(bp);
 547       break;
 548     }
 549     log_write(bp);
 550     brelse(bp);
 551   }
 552 
 553   if(off > ip->size)
 554     ip->size = off;
 555 
 556   // write the i-node back to disk even if the size didn't change
 557   // because the loop above might have called bmap() and added a new
 558   // block to ip->addrs[].
 559   iupdate(ip);
 560 
 561   return tot;
 562 }
 563 
 564 // Directories
 565 
 566 int
 567 namecmp(const char *s, const char *t)
 568 {
 569   return strncmp(s, t, DIRSIZ);
 570 }
 571 
 572 // Look for a directory entry in a directory.
 573 // If found, set *poff to byte offset of entry.
 574 struct inode*
 575 dirlookup(struct inode *dp, char *name, uint *poff)
 576 {
 577   uint off, inum;
 578   struct dirent de;
 579 
 580   if(dp->type != T_DIR)
 581     panic("dirlookup not DIR");
 582 
 583   for(off = 0; off < dp->size; off += sizeof(de)){
 584     if(readi(dp, 0, (uint64)&de, off, sizeof(de)) != sizeof(de))
 585       panic("dirlookup read");
 586     if(de.inum == 0)
 587       continue;
 588     if(namecmp(name, de.name) == 0){
 589       // entry matches path element
 590       if(poff)
 591         *poff = off;
 592       inum = de.inum;
 593       return iget(dp->dev, inum);
 594     }
 595   }
 596 
 597   return 0;
 598 }
 599 
 600 // Write a new directory entry (name, inum) into the directory dp.
 601 // Returns 0 on success, -1 on failure (e.g. out of disk blocks).
 602 int
 603 dirlink(struct inode *dp, char *name, uint inum)
 604 {
 605   int off;
 606   struct dirent de;
 607   struct inode *ip;
 608 
 609   // Check that name is not present.
 610   if((ip = dirlookup(dp, name, 0)) != 0){
 611     iput(ip);
 612     return -1;
 613   }
 614 
 615   // Look for an empty dirent.
 616   for(off = 0; off < dp->size; off += sizeof(de)){
 617     if(readi(dp, 0, (uint64)&de, off, sizeof(de)) != sizeof(de))
 618       panic("dirlink read");
 619     if(de.inum == 0)
 620       break;
 621   }
 622 
 623   strncpy(de.name, name, DIRSIZ);
 624   de.inum = inum;
 625   if(writei(dp, 0, (uint64)&de, off, sizeof(de)) != sizeof(de))
 626     return -1;
 627 
 628   return 0;
 629 }
 630 
 631 // Paths
 632 
 633 // Copy the next path element from path into name.
 634 // Return a pointer to the element following the copied one.
 635 // The returned path has no leading slashes,
 636 // so the caller can check *path=='\0' to see if the name is the last one.
 637 // If no name to remove, return 0.
 638 //
 639 // Examples:
 640 //   skipelem("a/bb/c", name) = "bb/c", setting name = "a"
 641 //   skipelem("///a//bb", name) = "bb", setting name = "a"
 642 //   skipelem("a", name) = "", setting name = "a"
 643 //   skipelem("", name) = skipelem("////", name) = 0
 644 //
 645 static char*
 646 skipelem(char *path, char *name)
 647 {
 648   char *s;
 649   int len;
 650 
 651   while(*path == '/')
 652     path++;
 653   if(*path == 0)
 654     return 0;
 655   s = path;
 656   while(*path != '/' && *path != 0)
 657     path++;
 658   len = path - s;
 659   if(len >= DIRSIZ)
 660     memmove(name, s, DIRSIZ);
 661   else {
 662     memmove(name, s, len);
 663     name[len] = 0;
 664   }
 665   while(*path == '/')
 666     path++;
 667   return path;
 668 }
 669 
 670 // Look up and return the inode for a path name.
 671 // If parent != 0, return the inode for the parent and copy the final
 672 // path element into name, which must have room for DIRSIZ bytes.
 673 // Must be called inside a transaction since it calls iput().
 674 static struct inode*
 675 namex(char *path, int nameiparent, char *name)
 676 {
 677   struct inode *ip, *next;
 678 
 679   if(*path == '/')
 680     ip = iget(ROOTDEV, ROOTINO);
 681   else
 682     ip = idup(myproc()->cwd);
 683 
 684   while((path = skipelem(path, name)) != 0){
 685     ilock(ip);
 686     if(ip->type != T_DIR){
 687       iunlockput(ip);
 688       return 0;
 689     }
 690     if(nameiparent && *path == '\0'){
 691       // Stop one level early.
 692       iunlock(ip);
 693       return ip;
 694     }
 695     if((next = dirlookup(ip, name, 0)) == 0){
 696       iunlockput(ip);
 697       return 0;
 698     }
 699     iunlockput(ip);
 700     ip = next;
 701   }
 702   if(nameiparent){
 703     iput(ip);
 704     return 0;
 705   }
 706   return ip;
 707 }
 708 
 709 struct inode*
 710 namei(char *path)
 711 {
 712   char name[DIRSIZ];
 713   return namex(path, 0, name);
 714 }
 715 
 716 struct inode*
 717 nameiparent(char *path, char *name)
 718 {
 719   return namex(path, 1, name);
 720 }

/* [<][>][^][v][top][bottom][index][help] */