OASIS Mailing List ArchivesView the OASIS mailing list archive below
or browse/search using MarkMail.

 


Help: OASIS Mailing Lists Help | MarkMail Help

virtio-dev message

[Date Prev] | [Thread Prev] | [Thread Next] | [Date Next] -- [Date Index] | [Thread Index] | [List Home]


Subject: [RFC PATCH 3/3] virtio_net: make lower netdevs for virtio_bypass hidden


We should move virtio_bypass to a 1-upper-with-2-hidden-lower
driver model for greater compatibility with regard to preserving
userpsace API and ABI.

On the other hand, technically virtio_bypass should make stricter
check before automatically enslaving the corresponding virtual
function or passthrough device. It's more reasonable to pair
virtio_bypass instance with a VF or passthrough device 1:1,
rather than rely on searching for a random non-virtio netdev with
exact same MAC address. One possible way of doing it is to bind
virtio_bypass explicitly to a guest pci device by specifying
its <bus> and <slot>:<function> location. Changing BACKUP feature
to take these configs into account, such that verifying target
device for auto-enslavement no longer relies on the MAC address.

Signed-off-by: Si-Wei Liu <si-wei.liu@oracle.com>
---
 drivers/net/virtio_net.c        | 159 ++++++++++++++++++++++++++++++++++++----
 include/uapi/linux/virtio_net.h |   2 +
 2 files changed, 148 insertions(+), 13 deletions(-)

diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index f850cf6..c54a5bd 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -77,6 +77,8 @@ struct virtnet_stats {
 	u64 rx_packets;
 };
 
+static struct workqueue_struct *virtnet_bypass_wq;
+
 /* Internal representation of a send virtqueue */
 struct send_queue {
 	/* Virtqueue associated with this send _queue */
@@ -196,6 +198,13 @@ struct padded_vnet_hdr {
 	char padding[4];
 };
 
+struct virtnet_bypass_task {
+	struct work_struct	work;
+	unsigned long		event; 
+	struct net_device	*child_netdev;
+	struct net_device	*bypass_netdev;
+};
+
 /* Converting between virtqueue no. and kernel tx/rx queue no.
  * 0:rx0 1:tx0 2:rx1 3:tx1 ... 2N:rxN 2N+1:txN 2N+2:cvq
  */
@@ -2557,6 +2566,11 @@ struct virtnet_bypass_info {
 
 	/* spinlock while updating stats */
 	spinlock_t stats_lock;
+
+	int bus;
+	int slot;
+	int function;
+
 };
 
 static void virtnet_bypass_child_open(struct net_device *dev,
@@ -2822,10 +2836,13 @@ static void virtnet_bypass_ethtool_get_drvinfo(struct net_device *dev,
 	.get_link_ksettings     = virtnet_bypass_ethtool_get_link_ksettings,
 };
 
-static struct net_device *get_virtnet_bypass_bymac(struct net *net,
-						   const u8 *mac)
+static struct net_device *
+get_virtnet_bypass_bymac(struct net_device *child_netdev)
 {
+	struct net *net = dev_net(child_netdev);
 	struct net_device *dev;
+	struct virtnet_bypass_info *vbi;
+	int devfn;
 
 	ASSERT_RTNL();
 
@@ -2833,7 +2850,29 @@ static struct net_device *get_virtnet_bypass_bymac(struct net *net,
 		if (dev->netdev_ops != &virtnet_bypass_netdev_ops)
 			continue;       /* not a virtnet_bypass device */
 
-		if (ether_addr_equal(mac, dev->perm_addr))
+		if (!ether_addr_equal(child_netdev->dev_addr, dev->perm_addr))
+			continue;       /* not matching MAC address */
+
+		if (!child_netdev->dev.parent)
+			continue;
+
+		/* Is child_netdev a backup netdev ? */
+		if (child_netdev->dev.parent == dev->dev.parent)
+			return dev;
+
+		/* Avoid non pci devices as active netdev */
+		if (!dev_is_pci(child_netdev->dev.parent))
+			continue;
+
+		vbi = netdev_priv(dev);
+		devfn = PCI_DEVFN(vbi->slot, vbi->function);
+
+		netdev_info(dev, "bus %d slot %d func %d",
+			    vbi->bus, vbi->slot, vbi->function);
+
+		/* Need to match <bus>:<slot>.<function> */
+		if (pci_get_bus_and_slot(vbi->bus, devfn) ==
+		    to_pci_dev(child_netdev->dev.parent))
 			return dev;
 	}
 
@@ -2878,10 +2917,61 @@ static rx_handler_result_t virtnet_bypass_handle_frame(struct sk_buff **pskb)
 	return RX_HANDLER_ANOTHER;
 }
 
+static int virtnet_bypass_pregetname_child(struct net_device *child_netdev)
+{
+	struct net_device *dev;
+
+	if (child_netdev->addr_len != ETH_ALEN)
+		return NOTIFY_DONE;
+
+	/* We will use the MAC address to locate the virtnet_bypass netdev
+	 * to associate with the child netdev. If we don't find a matching
+	 * bypass netdev, move on.
+	 */
+	dev = get_virtnet_bypass_bymac(child_netdev);
+	if (!dev)
+		return NOTIFY_DONE;
+
+	if (child_netdev->dev.parent &&
+	    child_netdev->dev.parent != dev->dev.parent);
+		netdev_set_hidden(child_netdev);
+
+	return NOTIFY_OK;
+}
+
+static void virtnet_bypass_task_fn(struct work_struct *work)
+{
+	struct virtnet_bypass_task *task;
+	struct net_device *child_netdev;
+	int rc;
+
+	task = container_of(work, struct virtnet_bypass_task, work);
+	child_netdev = task->child_netdev;
+
+	switch (task->event) {
+	case NETDEV_REGISTER:
+		rc = hide_netdevice(child_netdev);
+		if (rc)
+			netdev_err(child_netdev,
+				   "hide netdev %s failed with error %#x",
+				   child_netdev->name, rc);
+
+		break;
+	case NETDEV_UNREGISTER:
+		unhide_netdevice(child_netdev);
+		break;
+	default:
+		break;
+	}
+	dev_put(child_netdev);
+	kfree(task);
+}
+
 static int virtnet_bypass_register_child(struct net_device *child_netdev)
 {
 	struct virtnet_bypass_info *vbi;
 	struct net_device *dev;
+	struct virtnet_bypass_task *task;
 	bool backup;
 	int ret;
 
@@ -2892,25 +2982,34 @@ static int virtnet_bypass_register_child(struct net_device *child_netdev)
 	 * to associate with the child netdev. If we don't find a matching
 	 * bypass netdev, move on.
 	 */
-	dev = get_virtnet_bypass_bymac(dev_net(child_netdev),
-				       child_netdev->perm_addr);
+	dev = get_virtnet_bypass_bymac(child_netdev);
 	if (!dev)
 		return NOTIFY_DONE;
 
 	vbi = netdev_priv(dev);
 	backup = (child_netdev->dev.parent == dev->dev.parent);
 	if (backup ? rtnl_dereference(vbi->backup_netdev) :
-			rtnl_dereference(vbi->active_netdev)) {
+		     rtnl_dereference(vbi->active_netdev)) {
 		netdev_info(dev,
 			    "%s attempting to join bypass dev when %s already present\n",
 			    child_netdev->name, backup ? "backup" : "active");
 		return NOTIFY_DONE;
 	}
 
-	/* Avoid non pci devices as active netdev */
-	if (!backup && (!child_netdev->dev.parent ||
-			!dev_is_pci(child_netdev->dev.parent)))
-		return NOTIFY_DONE;
+	/* Verify <bus>:<slot>.<function> info */
+	if (!backup && !(child_netdev->priv_flags & IFF_HIDDEN)) {
+		task = kzalloc(sizeof(*task), GFP_ATOMIC);
+		if (!task)
+			return NOTIFY_DONE;
+		task->event = NETDEV_REGISTER;
+		task->bypass_netdev = dev;
+		task->child_netdev = child_netdev;
+		INIT_WORK(&task->work, virtnet_bypass_task_fn);
+		queue_work(virtnet_bypass_wq, &task->work);
+		dev_hold(child_netdev);
+
+		return NOTIFY_OK;
+	}
 
 	ret = netdev_rx_handler_register(child_netdev,
 					 virtnet_bypass_handle_frame, dev);
@@ -2981,6 +3080,7 @@ static int virtnet_bypass_unregister_child(struct net_device *child_netdev)
 {
 	struct virtnet_bypass_info *vbi;
 	struct net_device *dev, *backup;
+	struct virtnet_bypass_task *task;
 
 	dev = get_virtnet_bypass_byref(child_netdev);
 	if (!dev)
@@ -3003,6 +3103,16 @@ static int virtnet_bypass_unregister_child(struct net_device *child_netdev)
 			dev->min_mtu = backup->min_mtu;
 			dev->max_mtu = backup->max_mtu;
 		}
+
+		task = kzalloc(sizeof(*task), GFP_ATOMIC);
+		if (task) {
+			task->event = NETDEV_UNREGISTER;
+			task->bypass_netdev = dev;
+			task->child_netdev = child_netdev;
+			INIT_WORK(&task->work, virtnet_bypass_task_fn);
+			queue_work(virtnet_bypass_wq, &task->work);
+			dev_hold(child_netdev);
+		}
 	}
 
 	dev_put(child_netdev);
@@ -3059,6 +3169,8 @@ static int virtnet_bypass_event(struct notifier_block *this,
 		return NOTIFY_DONE;
 
 	switch (event) {
+	case NETDEV_PRE_GETNAME:
+		return virtnet_bypass_pregetname_child(event_dev);
 	case NETDEV_REGISTER:
 		return virtnet_bypass_register_child(event_dev);
 	case NETDEV_UNREGISTER:
@@ -3076,11 +3188,12 @@ static int virtnet_bypass_event(struct notifier_block *this,
 	.notifier_call = virtnet_bypass_event,
 };
 
-static int virtnet_bypass_create(struct virtnet_info *vi)
+static int virtnet_bypass_create(struct virtnet_info *vi, int bsf)
 {
 	struct net_device *backup_netdev = vi->dev;
 	struct device *dev = &vi->vdev->dev;
 	struct net_device *bypass_netdev;
+	struct virtnet_bypass_info *vbi;
 	int res;
 
 	/* Alloc at least 2 queues, for now we are going with 16 assuming
@@ -3095,6 +3208,11 @@ static int virtnet_bypass_create(struct virtnet_info *vi)
 
 	dev_net_set(bypass_netdev, dev_net(backup_netdev));
 	SET_NETDEV_DEV(bypass_netdev, dev);
+	vbi = netdev_priv(bypass_netdev);
+
+	vbi->bus = (bsf >> 8) & 0xFF;
+	vbi->slot = (bsf >> 3) & 0x1F;
+	vbi->function = bsf & 0x7;
 
 	bypass_netdev->netdev_ops = &virtnet_bypass_netdev_ops;
 	bypass_netdev->ethtool_ops = &virtnet_bypass_ethtool_ops;
@@ -3183,7 +3301,7 @@ static int virtnet_probe(struct virtio_device *vdev)
 	struct net_device *dev;
 	struct virtnet_info *vi;
 	u16 max_queue_pairs;
-	int mtu;
+	int mtu, bsf;
 
 	/* Find if host supports multiqueue virtio_net device */
 	err = virtio_cread_feature(vdev, VIRTIO_NET_F_MQ,
@@ -3339,8 +3457,12 @@ static int virtnet_probe(struct virtio_device *vdev)
 	virtnet_init_settings(dev);
 
 	if (virtio_has_feature(vdev, VIRTIO_NET_F_BACKUP)) {
-		if (virtnet_bypass_create(vi) != 0)
+		bsf = virtio_cread16(vdev,
+				     offsetof(struct virtio_net_config,
+					      bsf2backup));
+		if (virtnet_bypass_create(vi, bsf) != 0)
 			goto free_vqs;
+		netdev_set_hidden(dev); 
 	}
 
 	err = register_netdev(dev);
@@ -3384,6 +3506,7 @@ static int virtnet_probe(struct virtio_device *vdev)
 	unregister_netdev(dev);
 free_bypass:
 	virtnet_bypass_destroy(vi);
+
 free_vqs:
 	cancel_delayed_work_sync(&vi->refill);
 	free_receive_page_frags(vi);
@@ -3517,6 +3640,12 @@ static __init int virtio_net_driver_init(void)
 	if (ret)
 		goto err_dead;
 
+	virtnet_bypass_wq = create_singlethread_workqueue("virtio_bypass");
+	if (!virtnet_bypass_wq) {
+		ret = -ENOMEM;
+		goto err_wq;
+	}
+
         ret = register_virtio_driver(&virtio_net_driver);
 	if (ret)
 		goto err_virtio;
@@ -3524,6 +3653,8 @@ static __init int virtio_net_driver_init(void)
 	register_netdevice_notifier(&virtnet_bypass_notifier);
 	return 0;
 err_virtio:
+	destroy_workqueue(virtnet_bypass_wq);
+err_wq:
 	cpuhp_remove_multi_state(CPUHP_VIRT_NET_DEAD);
 err_dead:
 	cpuhp_remove_multi_state(virtionet_online);
@@ -3535,6 +3666,8 @@ static __init int virtio_net_driver_init(void)
 static __exit void virtio_net_driver_exit(void)
 {
 	unregister_netdevice_notifier(&virtnet_bypass_notifier);
+	if (virtnet_bypass_wq)
+		destroy_workqueue(virtnet_bypass_wq);
 	unregister_virtio_driver(&virtio_net_driver);
 	cpuhp_remove_multi_state(CPUHP_VIRT_NET_DEAD);
 	cpuhp_remove_multi_state(virtionet_online);
diff --git a/include/uapi/linux/virtio_net.h b/include/uapi/linux/virtio_net.h
index aa40664..0827b7e 100644
--- a/include/uapi/linux/virtio_net.h
+++ b/include/uapi/linux/virtio_net.h
@@ -80,6 +80,8 @@ struct virtio_net_config {
 	__u16 max_virtqueue_pairs;
 	/* Default maximum transmit unit advice */
 	__u16 mtu;
+	/* Device at bus:slot.function backed up by virtio_net */
+	__u16 bsf2backup;
 } __attribute__((packed));
 
 /*
-- 
1.8.3.1



[Date Prev] | [Thread Prev] | [Thread Next] | [Date Next] -- [Date Index] | [Thread Index] | [List Home]