OASIS Mailing List ArchivesView the OASIS mailing list archive below
or browse/search using MarkMail.

 


Help: OASIS Mailing Lists Help | MarkMail Help

virtio-comment message

[Date Prev] | [Thread Prev] | [Thread Next] | [Date Next] -- [Date Index] | [Thread Index] | [List Home]


Subject: Re: [virtio] [OASIS Issue Tracker] Created: (VIRTIO-9) Change config space, ring and headers to always be little endian


OASIS Issues Tracker <workgroup_mailer@lists.oasis-open.org> writes:
> Change config space, ring and headers to always be little endian
> ----------------------------------------------------------------
>
>                  Key: VIRTIO-9
>                  URL: http://tools.oasis-open.org/issues/browse/VIRTIO-9
>              Project: OASIS Virtual I/O Device (VIRTIO) TC
>           Issue Type: Improvement
>             Reporter: Rusty Russell
>
>
> They are currently guest-endian, and it's been a source of confusion and pain (esp. for architectures which can switch endian!).
>
> There remains a question mark on virtio-ccw performance implications, but it's probably better to go LE everywhere than per-transport.

Here's a patch which does this.

Note that virtio-scsi defines the lun parameter as 'u8 lun[8];'.  SCSI
is BE, does this field need clarification?

Cheers,
Rusty.
PS.  Spec patch is easy.  Implementations are harder ;)

diff --git a/virtio-v1.0-wd01-part1-specification.txt b/virtio-v1.0-wd01-part1-specification.txt
index a3ee054..3e311ec 100644
--- a/virtio-v1.0-wd01-part1-specification.txt
+++ b/virtio-v1.0-wd01-part1-specification.txt
@@ -151,8 +151,7 @@ before accessing that part of the configuration space.
 Configuration space is generally used for rarely-changing or
 initialization-time parameters.
 
-Note that this space is generally the guest's native endian, 
-rather than PCI's little-endian.
+Like all virtio fields, configuration fields are little-endian.
 
 2.1.4 Virtqueues
 ----------------
@@ -206,14 +205,8 @@ writes the descriptor index into the available ring.  It then
 notifies the device. When the device has finished a buffer, it 
 writes the descriptor into the used ring, and sends an interrupt.
 
-2.1.4.1 A Note on Virtqueue Endianness
---------------------------------------
-
-Note that the endian of fields and in the virtqueue is the native
-endian of the guest, not little-endian as PCI normally is. This makes
-for simpler guest code, and it is assumed that the host already has to
-be deeply aware of the guest endian so such an “endian-aware” device
-is not a significant issue.
+All fields in the virtio ring are little-endian; to reinforce this
+the examples use typenames like "le16" instead of "uint16_t".
 
 2.1.4.2 Message Framing
 -----------------------
@@ -246,13 +239,18 @@ can be chained via the next field. Each descriptor describes a
 buffer which is read-only or write-only, but a chain of 
 descriptors can contain both read-only and write-only buffers.
 
+The actual contents of the memory offered to the device depends on the
+device type.  Most common is to begin the data with a header
+(containing little-endian fields) for the device to read, and postfix
+it with a status tailer for the device to write.
+
 No descriptor chain may be more than 2^32 bytes long in total.
 
 	struct vring_desc {
 		/* Address (guest-physical). */
-		u64 addr;
+		le64 addr;
 		/* Length. */
-		u32 len;
+		le32 len;
 	
 	/* This marks a buffer as continuing via the next field. */
 	#define VRING_DESC_F_NEXT   1
@@ -261,9 +259,9 @@ No descriptor chain may be more than 2^32 bytes long in total.
 	/* This means the buffer contains a list of buffer descriptors. */
 	#define VRING_DESC_F_INDIRECT   4 
 		/* The flags as indicated above. */
-		u16 flags;
+		le16 flags;
 		/* Next field if flags & NEXT */
-		u16 next;
+		le16 next;
 	};
 
 The number of descriptors in the table is defined by the queue size
@@ -320,10 +318,10 @@ entry (modulo the queue size). This starts at 0, and increases.
 
 	struct vring_avail {
 	#define VRING_AVAIL_F_NO_INTERRUPT      1
-		u16 flags;
-		u16 idx;
-		u16 ring[ /* Queue Size */ ];
-		u16 used_event;	/* Only if VIRTIO_RING_F_EVENT_IDX */
+		le16 flags;
+		le16 idx;
+		le16 ring[ /* Queue Size */ ];
+		le16 used_event;	/* Only if VIRTIO_RING_F_EVENT_IDX */
 	}; 
 
 2.1.4.5 The Virtqueue Used Ring
@@ -349,20 +347,20 @@ for guests using untrusted buffers: if you do not know exactly
 how much has been written by the device, you usually have to zero 
 the buffer to ensure no data leakage occurs.
 
-	/* u32 is used here for ids for padding reasons. */
+	/* le32 is used here for ids for padding reasons. */
 	struct vring_used_elem {
 		/* Index of start of used descriptor chain. */
-		u32 id;
+		le32 id;
 		/* Total length of the descriptor chain which was used (written to) */
-		u32 len;
+		le32 len;
 	};
 
 	struct vring_used {
 	#define VRING_USED_F_NO_NOTIFY  1 
-		u16 flags;
-		u16 idx;
+		le16 flags;
+		le16 idx;
 		struct vring_used_elem ring[ /* Queue Size */];
-		u16 avail_event; /* Only if VIRTIO_RING_F_EVENT_IDX */
+		le16 avail_event; /* Only if VIRTIO_RING_F_EVENT_IDX */
 	};
 
 2.1.4.6 Helpers for Operating Virtqueues
@@ -497,7 +495,8 @@ free descriptors before beginning the mappings.
 -------------------------------------
 
 The head of the buffer we mapped is the first d in the algorithm 
-above. A naive implementation would do the following:
+above.  A naive implementation would do the following (with the
+appropriate conversion to-and-from little-endian assumed):
 
 	avail->ring[avail->idx % qsz] = head;
 
@@ -585,15 +584,17 @@ suppressed by the device:
 	vring_disable_interrupts(vq);
 	
 	for (;;) {
-		if (vq->last_seen_used != vring->used.idx) {
+		if (vq->last_seen_used != le16_to_cpu(vring->used.idx)) {
 			vring_enable_interrupts(vq);
 			mb();
 	
-			if (vq->last_seen_used != vring->used.idx)
+			if (vq->last_seen_used != le16_to_cpu(vring->used.idx))
 				break;
 		}
 
-		struct vring_used_elem *e = vring.used->ring[vq->last_seen_used%vsz];
+		struct vring_used_elem *e;
+
+		e = vring.used->ring[le16_to_cpu(vq->last_seen_used) % vsz];
 		process_buffer(e);
 		vq->last_seen_used++;
 	}
@@ -605,7 +606,6 @@ For devices where the configuration information can be changed, an
 interrupt is delivered when a configuration change occurs.
 
 
-
 2.4 Virtio Transport Options
 ============================
 
@@ -642,11 +642,6 @@ used (i.e. 32-bit accesses for 32-bit fields, etc), but the
 device-specific region can be accessed using any width accesses, and
 should obtain the same results.
 
-Note that this is possible because while the virtio header is PCI 
-(i.e. little) endian, the device-specific region is encoded in 
-the native endian of the guest (where such distinction is 
-applicable).
-
 2.4.1.2.1 PCI Device Virtio Header
 ----------------------------------
 
@@ -993,10 +988,9 @@ Virtual queue size is the number of elements in the queue,
 therefore size of the descriptor table and both available and 
 used rings.
 
-The endianness of the registers follows the native endianness of 
-the Guest. Writing to registers described as “R” and reading from 
-registers described as “W” is not permitted and can cause 
-undefined behavior.
+The registers are little-endian encoded.  Writing to registers
+described as “R” and reading from registers described as “W” is not
+permitted and can cause undefined behavior.
 
 2.4.2.3 MMIO-specific Initialization And Device Operation
 ---------------------------------------------------------
@@ -1182,7 +1176,7 @@ features.
 
 	struct virtio_net_config {
 		u8 mac[6];
-		u16 status;
+		le16 status;
 	};
 
 2.5.1.4 Device Initialization
@@ -1245,12 +1239,12 @@ case, the packet itself is preceeded by a header:
 	#define VIRTIO_NET_HDR_GSO_TCPV6       4
 	#define VIRTIO_NET_HDR_GSO_ECN      0x80
 		u8 gso_type;
-		u16 hdr_len;
-		u16 gso_size;
-		u16 csum_start;
-		u16 csum_offset;
+		le16 hdr_len;
+		le16 gso_size;
+		le16 csum_start;
+		le16 csum_offset;
 	/* Only if VIRTIO_NET_F_MRG_RXBUF: */
-		u16 num_buffers;
+		le16 num_buffers;
 	};
 
 The controlq is used to control device features such as 
@@ -1419,7 +1413,7 @@ off. The command-specific-data is one byte containing 0 (off) or
 -----------------------------------------
 
 	struct virtio_net_ctrl_mac {
-		u32 entries;
+		le32 entries;
 		u8 macs[entries][ETH_ALEN];
 	};
 
@@ -1483,7 +1477,7 @@ send control commands for dynamic offloads state configuration.
 
 2.5.1.5.4.3.1 Setting Offloads State
 
-	u64 offloads;
+	le64 offloads;
 
 	#define VIRTIO_NET_F_GUEST_CSUM       1
 	#define VIRTIO_NET_F_GUEST_TSO4       7
@@ -1497,7 +1491,7 @@ send control commands for dynamic offloads state configuration.
 The class VIRTIO_NET_CTRL_GUEST_OFFLOADS has one command:
 VIRTIO_NET_CTRL_GUEST_OFFLOADS_SET applies the new offloads configuration.
 
-u64 value passed as command data is a bitmask, bits set define
+le64 value passed as command data is a bitmask, bits set define
 offloads to be enabled, bits cleared - offloads to be disabled.
 
 There is a corresponding device feature for each offload. Upon feature
@@ -1552,15 +1546,15 @@ device except where noted.
   as indicated above.
 
 	struct virtio_blk_config {
-		u64 capacity;
-		u32 size_max;
-		u32 seg_max;
+		le64 capacity;
+		le32 size_max;
+		le32 seg_max;
 		struct virtio_blk_geometry {
-			u16 cylinders;
+			le16 cylinders;
 			u8 heads;
 			u8 sectors;
 		} geometry;
-		u32 blk_size;
+		le32 blk_size;
 	};
 
 2.5.2.4 Device Initialization
@@ -1586,28 +1580,28 @@ The driver queues requests to the virtqueue, and they are used by
 the device (not necessarily in order). Each request is of form:
 
 	struct virtio_blk_req {
-		u32 type;
-		u32 ioprio;
-		u64 sector;
+		le32 type;
+		le32 ioprio;
+		le64 sector;
 		char data[][512];
-		u8 status;
+		le8 status;
 	};
 
 If the device has VIRTIO_BLK_F_SCSI feature, it can also support 
 scsi packet command requests, each of these requests is of form:
 
 	struct virtio_scsi_pc_req {
-		u32 type;
-		u32 ioprio;
-		u64 sector;
+		le32 type;
+		le32 ioprio;
+		le64 sector;
 		char cmd[];
 		char data[][512];
 #define SCSI_SENSE_BUFFERSIZE   96
 		u8 sense[SCSI_SENSE_BUFFERSIZE];
-		u32 errors;
-		u32 data_len;
-		u32 sense_len;
-		u32 residual;
+		le32 errors;
+		le32 data_len;
+		le32 sense_len;
+		le32 residual;
 		u8 status;
 	};
 
@@ -1731,9 +1725,9 @@ data and outgoing characters are placed in the transmit queue.
   be fetched.
 
 	struct virtio_console_config {
-		u16 cols;
-		u16 rows;
-		u32 max_nr_ports;
+		le16 cols;
+		le16 rows;
+		le32 max_nr_ports;
 	};
 
 2.5.3.5 Device Initialization
@@ -1788,9 +1782,9 @@ data and outgoing characters are placed in the transmit queue.
   buffer and the events associated are:
 
 	struct virtio_console_control {
-		uint32_t id;    /* Port number */
-		uint16_t event; /* The kind of control event */
-		uint16_t value; /* Extra information for the event */
+		le32 id;    /* Port number */
+		le16 event; /* The kind of control event */
+		le16 value; /* Extra information for the event */
 	};
 
 	/* Some events for the internal messages (control packets) */
@@ -1869,12 +1863,11 @@ guest memory statistics to the host.
 2.5.5.4 Device configuration layout
 -----------------------------------
   Both fields of this configuration 
-  are always available. Note that they are little endian, despite 
-  convention that device fields are guest endian:
+  are always available.
 
 	struct virtio_balloon_config {
-		u32 num_pages;
-		u32 actual;
+		le32 num_pages;
+		le32 actual;
 	};
 
 2.5.5.5 Device Initialization
@@ -1952,7 +1945,7 @@ as follows:
 
   Memory Statistics Format Each statistic consists of a 16 bit 
   tag and a 64 bit value. Both quantities are represented in the 
-  native endian of the guest. All statistics are optional and the 
+  little endian. All statistics are optional and the 
   driver may choose which ones to supply. To guarantee backwards 
   compatibility, unsupported statistics should be omitted.
 
@@ -1963,8 +1956,8 @@ as follows:
 	#define VIRTIO_BALLOON_S_MINFLT   3
 	#define VIRTIO_BALLOON_S_MEMFREE  4
 	#define VIRTIO_BALLOON_S_MEMTOT   5
-		u16 tag;
-		u64 val;
+		le16 tag;
+		le64 val;
 	} __attribute__((packed));
 
 2.5.5.6.2 Memory Statistics Tags
@@ -2035,16 +2028,16 @@ targets that receive and process the requests.
   and cdb_size are writable by the guest.
 
 	struct virtio_scsi_config {
-		u32 num_queues;
-		u32 seg_max;
-		u32 max_sectors;
-		u32 cmd_per_lun;
-		u32 event_info_size;
-		u32 sense_size;
-		u32 cdb_size;
-		u16 max_channel;
-		u16 max_target;
-		u32 max_lun;
+		le32 num_queues;
+		le32 seg_max;
+		le32 max_sectors;
+		le32 cmd_per_lun;
+		le32 event_info_size;
+		le32 sense_size;
+		le32 cdb_size;
+		le16 max_channel;
+		le16 max_target;
+		le32 max_lun;
 	};
 
   num_queues is the total number of request virtqueues exposed by 
@@ -2116,16 +2109,16 @@ Requests have the following format:
 	struct virtio_scsi_req_cmd {
 		// Read-only
 		u8 lun[8];
-		u64 id;
+		le64 id;
 		u8 task_attr;
 		u8 prio;
 		u8 crn;
 		char cdb[cdb_size];
 		char dataout[];
 		// Write-only part
-		u32 sense_len;
-		u32 residual;
-		u16 status_qualifier;
+		le32 sense_len;
+		le32 residual;
+		le16 status_qualifier;
 		u8 status;
 		u8 response;
 		u8 sense[sense_size];
@@ -2232,7 +2225,7 @@ The controlq is used for other SCSI transport operations.
 Requests have the following format:
 
 	struct virtio_scsi_ctrl {
-		u32 type;
+		le32 type;
 	...
 		u8 response;
 	};
@@ -2266,12 +2259,12 @@ The following commands are defined:
 	struct virtio_scsi_ctrl_tmf
 	{
 		// Read-only part
-		u32 type;
-		u32 subtype;
-		u8 lun[8];
-		u64 id;
+		le32 type;
+		le32 subtype;
+		u8   lun[8];
+		le64 id;
 		// Write-only part
-		u8 response;
+		u8   response;
 	}
 
 	/* command-specific response values */
@@ -2301,12 +2294,12 @@ The following commands are defined:
 
 	struct virtio_scsi_ctrl_an {
 	    // Read-only part
-	    u32 type;
-	    u8  lun[8];
-	    u32 event_requested;
+	    le32 type;
+	    u8   lun[8];
+	    le32 event_requested;
 	    // Write-only part
-	    u32 event_actual;
-	    u8  response;
+	    le32 event_actual;
+	    u8   response;
 	}
 
 	#define VIRTIO_SCSI_EVT_ASYNC_OPERATIONAL_CHANGE  2
@@ -2334,12 +2327,12 @@ The following commands are defined:
 
 	struct virtio_scsi_ctrl_an {
 		// Read-only part
-		u32 type;
-		u8  lun[8];
-		u32 event_requested;
+		le32 type;
+		u8   lun[8];
+		le32 event_requested;
 		// Write-only part
-		u32 event_actual;
-		u8  response;
+		le32 event_actual;
+		u8   response;
 	}
 
   By sending this command, the driver asks the specified LUN to 
@@ -2384,7 +2377,7 @@ following format:
 
 	struct virtio_scsi_event {
 		// Write-only part
-		u32 event;
+		le32 event;
 		...
 	}
 
@@ -2419,9 +2412,9 @@ contents of the event field. The following events are defined:
 
 	struct virtio_scsi_event_reset {
 		// Write-only part
-		u32 event;
-		u8  lun[8];
-		u32 reason;
+		le32 event;
+		u8   lun[8];
+		le32 reason;
 	}
 
 	#define VIRTIO_SCSI_EVT_RESET_HARD         0
@@ -2484,9 +2477,9 @@ contents of the event field. The following events are defined:
 
 	struct virtio_scsi_event_an {
 		// Write-only part
-		u32 event;
-		u8  lun[8];
-		u32 reason;
+		le32 event;
+		u8   lun[8];
+		le32 reason;
 	}
 
   By sending this event, the device signals that an asynchronous 
@@ -2609,35 +2602,35 @@ and should not be used.
  * These can chain together via "next". */
 struct vring_desc {
         /* Address (guest-physical). */
-        uint64_t addr;
+        le64 addr;
         /* Length. */
-        uint32_t len;
+        le32 len;
         /* The flags as indicated above. */
-        uint16_t flags;
+        le16 flags;
         /* We chain unused descriptors via this, too */
-        uint16_t next;
+        le16 next;
 };
 
 struct vring_avail {
-        uint16_t flags;
-        uint16_t idx;
-        uint16_t ring[];
-        /* Only if VIRTIO_RING_F_EVENT_IDX: uint16_t used_event; */
+        le16 flags;
+        le16 idx;
+        le16 ring[];
+        /* Only if VIRTIO_RING_F_EVENT_IDX: le16 used_event; */
 };
 
-/* u32 is used here for ids for padding reasons. */
+/* le32 is used here for ids for padding reasons. */
 struct vring_used_elem {
         /* Index of start of used descriptor chain. */
-        uint32_t id;
+        le32 id;
         /* Total length of the descriptor chain which was written to. */
-        uint32_t len;
+        le32 len;
 };
 
 struct vring_used {
-        uint16_t flags;
-        uint16_t idx;
+        le16 flags;
+        le16 idx;
         struct vring_used_elem ring[];
-        /* Only if VIRTIO_RING_F_EVENT_IDX: uint16_t avail_event; */
+        /* Only if VIRTIO_RING_F_EVENT_IDX: le16 avail_event; */
 };
 
 struct vring {
@@ -2656,19 +2649,19 @@ struct vring {
  *      struct vring_desc desc[num];
  *
  *      // A ring of available descriptor heads with free-running index.
- *      __u16 avail_flags;
- *      __u16 avail_idx;
- *      __u16 available[num];
- *      __u16 used_event_idx; // Only if VIRTIO_RING_F_EVENT_IDX
+ *      le16 avail_flags;
+ *      le16 avail_idx;
+ *      le16 available[num];
+ *      le16 used_event_idx; // Only if VIRTIO_RING_F_EVENT_IDX
  *
  *      // Padding to the next align boundary.
  *      char pad[];
  *
  *      // A ring of used descriptor heads with free-running index.
- *      __u16 used_flags;
- *      __u16 used_idx;
+ *      le16 used_flags;
+ *      le16 used_idx;
  *      struct vring_used_elem used[num];
- *      __u16 avail_event_idx; // Only if VIRTIO_RING_F_EVENT_IDX
+ *      le16 avail_event_idx; // Only if VIRTIO_RING_F_EVENT_IDX
  * };
  * Note: for virtio PCI, align is 4096.
  */
@@ -2678,16 +2671,16 @@ static inline void vring_init(struct vring *vr, unsigned int num, void *p,
         vr->num = num;
         vr->desc = p;
         vr->avail = p + num*sizeof(struct vring_desc);
-        vr->used = (void *)(((unsigned long)&vr->avail->ring[num] + sizeof(uint16_t)
+        vr->used = (void *)(((unsigned long)&vr->avail->ring[num] + sizeof(le16)
                               + align-1)
                             & ~(align - 1));
 }
 
 static inline unsigned vring_size(unsigned int num, unsigned long align)
 {
-        return ((sizeof(struct vring_desc)*num + sizeof(uint16_t)*(3+num)
+        return ((sizeof(struct vring_desc)*num + sizeof(le16)*(3+num)
                  + align - 1) & ~(align - 1))
-                + sizeof(uint16_t)*3 + sizeof(struct vring_used_elem)*num;
+                + sizeof(le16)*3 + sizeof(struct vring_used_elem)*num;
 }
 
 static inline int vring_need_event(uint16_t event_idx, uint16_t new_idx, uint16_t old_idx)
@@ -2696,16 +2689,16 @@ static inline int vring_need_event(uint16_t event_idx, uint16_t new_idx, uint16_
 }
 
 /* Get location of event indices (only with VIRTIO_RING_F_EVENT_IDX) */
-static inline uint16_t *vring_used_event(struct vring *vr)
+static inline le16 *vring_used_event(struct vring *vr)
 {
         /* For backwards compat, used event index is at *end* of avail ring. */
         return &vr->avail->ring[vr->num];
 }
 
-static inline uint16_t *vring_avail_event(struct vring *vr)
+static inline le16 *vring_avail_event(struct vring *vr)
 {
         /* For backwards compat, avail event index is at *end* of used ring. */
-        return (uint16_t *)&vr->used->ring[vr->num];
+        return (le16 *)&vr->used->ring[vr->num];
 }
 #endif /* VIRTIO_RING_H */
 



[Date Prev] | [Thread Prev] | [Thread Next] | [Date Next] -- [Date Index] | [Thread Index] | [List Home]