root/kernel/virtio_disk.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. virtio_disk_init
  2. alloc_desc
  3. free_desc
  4. free_chain
  5. alloc3_desc
  6. virtio_disk_rw
  7. virtio_disk_intr

   1 //
   2 // driver for qemu's virtio disk device.
   3 // uses qemu's mmio interface to virtio.
   4 //
   5 // qemu ... -drive file=fs.img,if=none,format=raw,id=x0 -device virtio-blk-device,drive=x0,bus=virtio-mmio-bus.0
   6 //
   7 
   8 #include "types.h"
   9 #include "riscv.h"
  10 #include "defs.h"
  11 #include "param.h"
  12 #include "memlayout.h"
  13 #include "spinlock.h"
  14 #include "sleeplock.h"
  15 #include "fs.h"
  16 #include "buf.h"
  17 #include "virtio.h"
  18 
  19 // the address of virtio mmio register r.
  20 #define R(r) ((volatile uint32 *)(VIRTIO0 + (r)))
  21 
  22 static struct disk {
  23   // a set (not a ring) of DMA descriptors, with which the
  24   // driver tells the device where to read and write individual
  25   // disk operations. there are NUM descriptors.
  26   // most commands consist of a "chain" (a linked list) of a couple of
  27   // these descriptors.
  28   struct virtq_desc *desc;
  29 
  30   // a ring in which the driver writes descriptor numbers
  31   // that the driver would like the device to process.  it only
  32   // includes the head descriptor of each chain. the ring has
  33   // NUM elements.
  34   struct virtq_avail *avail;
  35 
  36   // a ring in which the device writes descriptor numbers that
  37   // the device has finished processing (just the head of each chain).
  38   // there are NUM used ring entries.
  39   struct virtq_used *used;
  40 
  41   // our own book-keeping.
  42   char free[NUM];  // is a descriptor free?
  43   uint16 used_idx; // we've looked this far in used[2..NUM].
  44 
  45   // track info about in-flight operations,
  46   // for use when completion interrupt arrives.
  47   // indexed by first descriptor index of chain.
  48   struct {
  49     struct buf *b;
  50     char status;
  51   } info[NUM];
  52 
  53   // disk command headers.
  54   // one-for-one with descriptors, for convenience.
  55   struct virtio_blk_req ops[NUM];
  56   
  57   struct spinlock vdisk_lock;
  58   
  59 } disk;
  60 
  61 void
  62 virtio_disk_init(void)
  63 {
  64   uint32 status = 0;
  65 
  66   initlock(&disk.vdisk_lock, "virtio_disk");
  67 
  68   if(*R(VIRTIO_MMIO_MAGIC_VALUE) != 0x74726976 ||
  69      *R(VIRTIO_MMIO_VERSION) != 2 ||
  70      *R(VIRTIO_MMIO_DEVICE_ID) != 2 ||
  71      *R(VIRTIO_MMIO_VENDOR_ID) != 0x554d4551){
  72     panic("could not find virtio disk");
  73   }
  74   
  75   // reset device
  76   *R(VIRTIO_MMIO_STATUS) = status;
  77 
  78   // set ACKNOWLEDGE status bit
  79   status |= VIRTIO_CONFIG_S_ACKNOWLEDGE;
  80   *R(VIRTIO_MMIO_STATUS) = status;
  81 
  82   // set DRIVER status bit
  83   status |= VIRTIO_CONFIG_S_DRIVER;
  84   *R(VIRTIO_MMIO_STATUS) = status;
  85 
  86   // negotiate features
  87   uint64 features = *R(VIRTIO_MMIO_DEVICE_FEATURES);
  88   features &= ~(1 << VIRTIO_BLK_F_RO);
  89   features &= ~(1 << VIRTIO_BLK_F_SCSI);
  90   features &= ~(1 << VIRTIO_BLK_F_CONFIG_WCE);
  91   features &= ~(1 << VIRTIO_BLK_F_MQ);
  92   features &= ~(1 << VIRTIO_F_ANY_LAYOUT);
  93   features &= ~(1 << VIRTIO_RING_F_EVENT_IDX);
  94   features &= ~(1 << VIRTIO_RING_F_INDIRECT_DESC);
  95   *R(VIRTIO_MMIO_DRIVER_FEATURES) = features;
  96 
  97   // tell device that feature negotiation is complete.
  98   status |= VIRTIO_CONFIG_S_FEATURES_OK;
  99   *R(VIRTIO_MMIO_STATUS) = status;
 100 
 101   // re-read status to ensure FEATURES_OK is set.
 102   status = *R(VIRTIO_MMIO_STATUS);
 103   if(!(status & VIRTIO_CONFIG_S_FEATURES_OK))
 104     panic("virtio disk FEATURES_OK unset");
 105 
 106   // initialize queue 0.
 107   *R(VIRTIO_MMIO_QUEUE_SEL) = 0;
 108 
 109   // ensure queue 0 is not in use.
 110   if(*R(VIRTIO_MMIO_QUEUE_READY))
 111     panic("virtio disk should not be ready");
 112 
 113   // check maximum queue size.
 114   uint32 max = *R(VIRTIO_MMIO_QUEUE_NUM_MAX);
 115   if(max == 0)
 116     panic("virtio disk has no queue 0");
 117   if(max < NUM)
 118     panic("virtio disk max queue too short");
 119 
 120   // allocate and zero queue memory.
 121   disk.desc = kalloc();
 122   disk.avail = kalloc();
 123   disk.used = kalloc();
 124   if(!disk.desc || !disk.avail || !disk.used)
 125     panic("virtio disk kalloc");
 126   memset(disk.desc, 0, PGSIZE);
 127   memset(disk.avail, 0, PGSIZE);
 128   memset(disk.used, 0, PGSIZE);
 129 
 130   // set queue size.
 131   *R(VIRTIO_MMIO_QUEUE_NUM) = NUM;
 132 
 133   // write physical addresses.
 134   *R(VIRTIO_MMIO_QUEUE_DESC_LOW) = (uint64)disk.desc;
 135   *R(VIRTIO_MMIO_QUEUE_DESC_HIGH) = (uint64)disk.desc >> 32;
 136   *R(VIRTIO_MMIO_DRIVER_DESC_LOW) = (uint64)disk.avail;
 137   *R(VIRTIO_MMIO_DRIVER_DESC_HIGH) = (uint64)disk.avail >> 32;
 138   *R(VIRTIO_MMIO_DEVICE_DESC_LOW) = (uint64)disk.used;
 139   *R(VIRTIO_MMIO_DEVICE_DESC_HIGH) = (uint64)disk.used >> 32;
 140 
 141   // queue is ready.
 142   *R(VIRTIO_MMIO_QUEUE_READY) = 0x1;
 143 
 144   // all NUM descriptors start out unused.
 145   for(int i = 0; i < NUM; i++)
 146     disk.free[i] = 1;
 147 
 148   // tell device we're completely ready.
 149   status |= VIRTIO_CONFIG_S_DRIVER_OK;
 150   *R(VIRTIO_MMIO_STATUS) = status;
 151 
 152   // plic.c and trap.c arrange for interrupts from VIRTIO0_IRQ.
 153 }
 154 
 155 // find a free descriptor, mark it non-free, return its index.
 156 static int
 157 alloc_desc()
 158 {
 159   for(int i = 0; i < NUM; i++){
 160     if(disk.free[i]){
 161       disk.free[i] = 0;
 162       return i;
 163     }
 164   }
 165   return -1;
 166 }
 167 
 168 // mark a descriptor as free.
 169 static void
 170 free_desc(int i)
 171 {
 172   if(i >= NUM)
 173     panic("free_desc 1");
 174   if(disk.free[i])
 175     panic("free_desc 2");
 176   disk.desc[i].addr = 0;
 177   disk.desc[i].len = 0;
 178   disk.desc[i].flags = 0;
 179   disk.desc[i].next = 0;
 180   disk.free[i] = 1;
 181   wakeup(&disk.free[0]);
 182 }
 183 
 184 // free a chain of descriptors.
 185 static void
 186 free_chain(int i)
 187 {
 188   while(1){
 189     int flag = disk.desc[i].flags;
 190     int nxt = disk.desc[i].next;
 191     free_desc(i);
 192     if(flag & VRING_DESC_F_NEXT)
 193       i = nxt;
 194     else
 195       break;
 196   }
 197 }
 198 
 199 // allocate three descriptors (they need not be contiguous).
 200 // disk transfers always use three descriptors.
 201 static int
 202 alloc3_desc(int *idx)
 203 {
 204   for(int i = 0; i < 3; i++){
 205     idx[i] = alloc_desc();
 206     if(idx[i] < 0){
 207       for(int j = 0; j < i; j++)
 208         free_desc(idx[j]);
 209       return -1;
 210     }
 211   }
 212   return 0;
 213 }
 214 
 215 void
 216 virtio_disk_rw(struct buf *b, int write)
 217 {
 218   uint64 sector = b->blockno * (BSIZE / 512);
 219 
 220   acquire(&disk.vdisk_lock);
 221 
 222   // the spec's Section 5.2 says that legacy block operations use
 223   // three descriptors: one for type/reserved/sector, one for the
 224   // data, one for a 1-byte status result.
 225 
 226   // allocate the three descriptors.
 227   int idx[3];
 228   while(1){
 229     if(alloc3_desc(idx) == 0) {
 230       break;
 231     }
 232     sleep(&disk.free[0], &disk.vdisk_lock);
 233   }
 234 
 235   // format the three descriptors.
 236   // qemu's virtio-blk.c reads them.
 237 
 238   struct virtio_blk_req *buf0 = &disk.ops[idx[0]];
 239 
 240   if(write)
 241     buf0->type = VIRTIO_BLK_T_OUT; // write the disk
 242   else
 243     buf0->type = VIRTIO_BLK_T_IN; // read the disk
 244   buf0->reserved = 0;
 245   buf0->sector = sector;
 246 
 247   disk.desc[idx[0]].addr = (uint64) buf0;
 248   disk.desc[idx[0]].len = sizeof(struct virtio_blk_req);
 249   disk.desc[idx[0]].flags = VRING_DESC_F_NEXT;
 250   disk.desc[idx[0]].next = idx[1];
 251 
 252   disk.desc[idx[1]].addr = (uint64) b->data;
 253   disk.desc[idx[1]].len = BSIZE;
 254   if(write)
 255     disk.desc[idx[1]].flags = 0; // device reads b->data
 256   else
 257     disk.desc[idx[1]].flags = VRING_DESC_F_WRITE; // device writes b->data
 258   disk.desc[idx[1]].flags |= VRING_DESC_F_NEXT;
 259   disk.desc[idx[1]].next = idx[2];
 260 
 261   disk.info[idx[0]].status = 0xff; // device writes 0 on success
 262   disk.desc[idx[2]].addr = (uint64) &disk.info[idx[0]].status;
 263   disk.desc[idx[2]].len = 1;
 264   disk.desc[idx[2]].flags = VRING_DESC_F_WRITE; // device writes the status
 265   disk.desc[idx[2]].next = 0;
 266 
 267   // record struct buf for virtio_disk_intr().
 268   b->disk = 1;
 269   disk.info[idx[0]].b = b;
 270 
 271   // tell the device the first index in our chain of descriptors.
 272   disk.avail->ring[disk.avail->idx % NUM] = idx[0];
 273 
 274   __sync_synchronize();
 275 
 276   // tell the device another avail ring entry is available.
 277   disk.avail->idx += 1; // not % NUM ...
 278 
 279   __sync_synchronize();
 280 
 281   *R(VIRTIO_MMIO_QUEUE_NOTIFY) = 0; // value is queue number
 282 
 283   // Wait for virtio_disk_intr() to say request has finished.
 284   while(b->disk == 1) {
 285     sleep(b, &disk.vdisk_lock);
 286   }
 287 
 288   disk.info[idx[0]].b = 0;
 289   free_chain(idx[0]);
 290 
 291   release(&disk.vdisk_lock);
 292 }
 293 
 294 void
 295 virtio_disk_intr()
 296 {
 297   acquire(&disk.vdisk_lock);
 298 
 299   // the device won't raise another interrupt until we tell it
 300   // we've seen this interrupt, which the following line does.
 301   // this may race with the device writing new entries to
 302   // the "used" ring, in which case we may process the new
 303   // completion entries in this interrupt, and have nothing to do
 304   // in the next interrupt, which is harmless.
 305   *R(VIRTIO_MMIO_INTERRUPT_ACK) = *R(VIRTIO_MMIO_INTERRUPT_STATUS) & 0x3;
 306 
 307   __sync_synchronize();
 308 
 309   // the device increments disk.used->idx when it
 310   // adds an entry to the used ring.
 311 
 312   while(disk.used_idx != disk.used->idx){
 313     __sync_synchronize();
 314     int id = disk.used->ring[disk.used_idx % NUM].id;
 315 
 316     if(disk.info[id].status != 0)
 317       panic("virtio_disk_intr status");
 318 
 319     struct buf *b = disk.info[id].b;
 320     b->disk = 0;   // disk is done with buf
 321     wakeup(b);
 322 
 323     disk.used_idx += 1;
 324   }
 325 
 326   release(&disk.vdisk_lock);
 327 }

/* [<][>][^][v][top][bottom][index][help] */