OASIS Mailing List ArchivesView the OASIS mailing list archive below
or browse/search using MarkMail.

 


Help: OASIS Mailing Lists Help | MarkMail Help

virtio-dev message

[Date Prev] | [Thread Prev] | [Thread Next] | [Date Next] -- [Date Index] | [Thread Index] | [List Home]


Subject: Straw linux/lguest implementation of new "memballoon"


So, I finally got back to playing with a new virtio balloon API.

Linux + lguest patch below (implementation is horrible: DO NOT USE).
I will write up the spec from this, too.

I had some code lying around inside my scratch image, so I compiled it
10 times in a 128MB guest:

        real seconds with balloon: 80-126(99.9+/-14)
        number of M in balloon: 71-74(72.5+/-0.81)

vs without balloon, it takes about 45 seconds :)

(More memory makes compiles faster: film at 11)

Subject: [PATCH 1/2] virtio: permit finer feature control

Proposed new virtio memballoon uses feature bits for page sizes,
but driver is only allowed to accept one.  Our generic code simply
ANDs driver and device features, so we need a hook.

NOTE: This is a demonstration: the code is only needed for VIRTIO 1.0
which will require other changes!

diff --git a/drivers/virtio/virtio.c b/drivers/virtio/virtio.c
index fed0ce198ae3..26cf4b9ea48b 100644
--- a/drivers/virtio/virtio.c
+++ b/drivers/virtio/virtio.c
@@ -130,6 +130,13 @@ static int virtio_dev_probe(struct device *_d)
 	/* Figure out what features the device supports. */
 	device_features = dev->config->get_features(dev);
 
+	/* Let driver strip features it doesn't want (memballoon) */ 
+	if (drv->filter_features) {
+		err = drv->filter_features(dev, &device_features);
+		if (err)
+			goto failed;
+	}
+
 	/* Features supported by both device and driver into dev->features. */
 	memset(dev->features, 0, sizeof(dev->features));
 	for (i = 0; i < drv->feature_table_size; i++) {
@@ -147,9 +154,10 @@ static int virtio_dev_probe(struct device *_d)
 	dev->config->finalize_features(dev);
 
 	err = drv->probe(dev);
-	if (err)
+	if (err) {
+	failed:
 		add_status(dev, VIRTIO_CONFIG_S_FAILED);
-	else {
+	} else {
 		add_status(dev, VIRTIO_CONFIG_S_DRIVER_OK);
 		if (drv->scan)
 			drv->scan(dev);
diff --git a/include/linux/virtio.h b/include/linux/virtio.h
index b46671e28de2..b4527ad45b0f 100644
--- a/include/linux/virtio.h
+++ b/include/linux/virtio.h
@@ -114,6 +114,7 @@ void virtio_break_device(struct virtio_device *dev);
  * @id_table: the ids serviced by this driver.
  * @feature_table: an array of feature numbers supported by this driver.
  * @feature_table_size: number of entries in the feature table array.
+ * @filter_features: frob device features before ANDing with @feature_table.
  * @probe: the function to call when a device is found.  Returns 0 or -errno.
  * @remove: the function to call when a device is removed.
  * @config_changed: optional function to call when the device configuration
@@ -124,6 +125,7 @@ struct virtio_driver {
 	const struct virtio_device_id *id_table;
 	const unsigned int *feature_table;
 	unsigned int feature_table_size;
+	int (*filter_features)(struct virtio_device *dev, u32 *dev_features);
 	int (*probe)(struct virtio_device *dev);
 	void (*scan)(struct virtio_device *dev);
 	void (*remove)(struct virtio_device *dev);

Subject: [PATCH 2/2] memballoon: silly implementation (with lguest guest)

Part of discussion of a new virtio balloon standard.

diff --git a/drivers/virtio/Makefile b/drivers/virtio/Makefile
index 9076635697bb..b8014c6f9b83 100644
--- a/drivers/virtio/Makefile
+++ b/drivers/virtio/Makefile
@@ -1,4 +1,4 @@
 obj-$(CONFIG_VIRTIO) += virtio.o virtio_ring.o
 obj-$(CONFIG_VIRTIO_MMIO) += virtio_mmio.o
 obj-$(CONFIG_VIRTIO_PCI) += virtio_pci.o
-obj-$(CONFIG_VIRTIO_BALLOON) += virtio_balloon.o
+obj-$(CONFIG_VIRTIO_BALLOON) += virtio_memballoon.o virtio_balloon.o 
diff --git a/drivers/virtio/virtio_memballoon.c b/drivers/virtio/virtio_memballoon.c
new file mode 100644
index 000000000000..fa3609e5bead
--- /dev/null
+++ b/drivers/virtio/virtio_memballoon.c
@@ -0,0 +1,584 @@
+/*
+ * Virtio balloon implementation, inspired by Dor Laor and Marcelo
+ * Tosatti's implementations.
+ *
+ *  Copyright 2008, 2014 Rusty Russell IBM Corporation
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+
+#include <linux/virtio.h>
+#include <linux/virtio_memballoon.h>
+#include <linux/swap.h>
+#include <linux/kthread.h>
+#include <linux/freezer.h>
+#include <linux/delay.h>
+#include <linux/slab.h>
+#include <linux/module.h>
+#include <linux/balloon_compaction.h>
+#include <linux/mman.h>
+
+/* Hack to read bytes in balloon from /sys/module/virtio_memballoon/bytes */
+static unsigned long bytes;
+module_param(bytes, ulong, 0444);
+
+/* Don't bother sending less than 1 MB */
+#define BATCH_BYTES		(1024*1024)
+#define BATCH_PAGES		(BATCH_BYTES / PAGE_SIZE)
+
+struct gcmd_get_pages {
+	__le64 type; /* VIRTIO_MEMBALLOON_GCMD_GET_PAGES */
+	__le64 pages[BATCH_PAGES];
+};
+
+struct gcmd_give_pages {
+	__le64 type; /* VIRTIO_MEMBALLOON_GCMD_GIVE_PAGES */
+	__le64 pages[BATCH_PAGES];
+};
+
+struct gcmd_exchange_pages {
+	__le64 type; /* VIRTIO_MEMBALLOON_GCMD_EXCHANGE_PAGES */
+	__le64 from_balloon;
+	__le64 to_balloon;
+};
+
+struct hcmd_min_balloon {
+	__le64 type; /* VIRTIO_MEMBALLOON_HCMD_MIN_BALLOON */
+	__le64 minimum;
+};
+
+struct virtio_memballoon {
+	/* Protects contents of structure (kthread vs. compaction callback). */
+	struct mutex lock;
+
+	struct virtio_device *vdev;
+	struct virtqueue *gcmd_vq, *hcmd_vq;
+
+	/* The thread servicing the balloon. */
+	struct task_struct *thread;
+
+	/* For interrupt/suspend to wake balloon thread. */
+	wait_queue_head_t wait;
+
+	/* How many bytes are we supposed to have in balloon? */
+	s64 min_target;
+
+	/* How many bytes do we have in the balloon now? */
+	u64 num_bytes;
+
+	/* Previous number of swapped bytes. */
+	u64 prev_bytes_swapped;
+
+	/* How many times (in a row) did we have excess memory? */
+	unsigned long num_excess;
+
+	/* Anyone else remember Eeyore? */
+	bool broken;
+
+	/*
+	 * The pages we've told the Host we're not using are enqueued
+	 * at vb_dev_info->pages list.
+	 */
+	struct balloon_dev_info *vb_dev_info;
+
+	/* To avoid kmalloc, we use single hcmd and gcmd buffers. */
+	union gcmd {
+		__le64 type;
+		struct gcmd_get_pages get_pages;
+		struct gcmd_give_pages give_pages;
+		struct gcmd_exchange_pages exchange_pages;
+	} gcmd;
+
+	union hcmd {
+		__le64 type;
+		struct hcmd_min_balloon min_balloon;
+	} hcmd;
+};
+
+static struct virtio_device_id id_table[] = {
+	{ VIRTIO_ID_MEMBALLOON, VIRTIO_DEV_ANY_ID },
+	{ 0 },
+};
+
+static void wake_balloon(struct virtqueue *vq)
+{
+	struct virtio_memballoon *vb = vq->vdev->priv;
+
+	wake_up(&vb->wait);
+}
+
+/* Command is in vb->gcmd, lock is held. */
+static bool send_gcmd(struct virtio_memballoon *vb, size_t len)
+{
+	struct scatterlist sg;
+	unsigned int wlen;
+
+	BUG_ON(len > sizeof(vb->gcmd));
+	sg_init_one(&sg, &vb->gcmd, len);
+
+	/*
+	 * We should always be able to add one buffer to an empty queue.
+	 * If not, it's a broken device.
+	 */
+	if (virtqueue_add_outbuf(vb->gcmd_vq, &sg, 1, vb, GFP_KERNEL) != 0
+	    || !virtqueue_kick(vb->gcmd_vq)) {
+		vb->broken = true;
+		return false;
+	}
+
+	/* When host has read buffer, this completes via wake_balloon */
+	wait_event(vb->wait,
+		   virtqueue_get_buf(vb->gcmd_vq, &wlen)
+		   || (vb->broken = virtqueue_is_broken(vb->gcmd_vq)));
+
+	return !vb->broken;
+}
+
+/* Give a batch of pages to the balloon. */
+static void give_to_balloon(struct virtio_memballoon *vb)
+{
+	struct balloon_dev_info *vb_dev_info = vb->vb_dev_info;
+	unsigned int i;
+
+	vb->gcmd.give_pages.type = cpu_to_le64(VIRTIO_MEMBALLOON_GCMD_GIVE_PAGES);
+	for (i = 0; i < BATCH_PAGES; i++) {
+		struct page *page = balloon_page_enqueue(vb_dev_info);
+
+		if (!page) {
+			dev_info_ratelimited(&vb->vdev->dev,
+					     "Out of puff! Can't get page\n");
+			break;
+		}
+
+		vb->gcmd.give_pages.pages[i] = page_to_pfn(page) << PAGE_SHIFT;
+		adjust_managed_page_count(page, -1);
+	}
+
+	/* Did we find any pages to give? */
+	if (i) {
+		send_gcmd(vb, offsetof(struct gcmd_give_pages, pages[i]));
+		vb->num_bytes += (u64)i * PAGE_SIZE;
+		bytes += (u64)i * PAGE_SIZE;
+	}
+}
+
+static void get_pages_from_balloon(struct virtio_memballoon *vb,
+				   size_t num_pages)
+{
+	while (num_pages) {
+		/* We can only do one array worth at a time. */
+		size_t num, i;
+		num = min(num_pages, ARRAY_SIZE(vb->gcmd.get_pages.pages));
+
+		vb->gcmd.get_pages.type
+			= cpu_to_le64(VIRTIO_MEMBALLOON_GCMD_GET_PAGES);
+		for (i = 0; i < num; i++) {
+			struct page *page = balloon_page_dequeue(vb->vb_dev_info);
+
+			/* In case we ran out of pages (compaction) */
+			if (!page)
+				break;
+
+			vb->gcmd.get_pages.pages[i]
+				= page_to_pfn(page) << PAGE_SHIFT;
+		}
+
+		/* No pages at all? */
+		num = i;
+		if (!num)
+			break;
+
+		send_gcmd(vb, offsetof(struct gcmd_get_pages, pages[num]));
+		vb->num_bytes -= num * PAGE_SIZE;
+		bytes -= num * PAGE_SIZE;
+
+		/* Now release those pages. */
+		for (i = 0; i < num; i++) {
+			struct page *page;
+
+			page = pfn_to_page(vb->gcmd.get_pages.pages[i]
+					   >> PAGE_SHIFT);
+			balloon_page_free(page);
+			adjust_managed_page_count(page, 1);
+		}
+		num_pages -= num;
+	}
+}
+
+static void get_from_balloon(struct virtio_memballoon *vb, u64 bytes)
+{
+	/* Can't have more than are in the balloon. */
+	if (bytes > vb->num_bytes)
+		bytes = vb->num_bytes;
+
+	/* Can't go under host-established minimum */
+	if (vb->num_bytes - bytes < vb->min_target) {
+		BUG_ON(vb->num_bytes < vb->min_target);
+		bytes = vb->num_bytes - vb->min_target;
+	}
+
+	get_pages_from_balloon(vb, bytes / PAGE_SIZE);
+}
+
+static bool process_hcmd(struct virtio_memballoon *vb)
+{
+	union hcmd *hcmd = NULL;
+	unsigned int cmdlen;
+	struct scatterlist sg;
+
+	if (vb->broken)
+		return false;
+
+	mutex_lock(&vb->lock);
+	hcmd = virtqueue_get_buf(vb->hcmd_vq, &cmdlen);
+	if (!hcmd) {
+		mutex_unlock(&vb->lock);
+		return false;
+	}
+
+	switch (hcmd->type) {
+	case cpu_to_le64(VIRTIO_MEMBALLOON_HCMD_MIN_BALLOON):
+		vb->min_target = le64_to_cpu(hcmd->min_balloon.minimum);
+		break;
+	default:
+		dev_err_ratelimited(&vb->vdev->dev, "Unknown hcmd %llu\n",
+				    le64_to_cpu(hcmd->type));
+		break;
+	}
+
+	/* Re-queue the hcmd for next time. */
+	sg_init_one(&sg, &vb->hcmd, sizeof(vb->hcmd));
+	virtqueue_add_inbuf(vb->hcmd_vq, &sg, 1, vb, GFP_KERNEL);
+
+	mutex_unlock(&vb->lock);
+	return true;
+}
+
+static u64 swap_used(void)
+{
+	struct sysinfo i;
+	si_swapinfo(&i);
+	return (u64)(i.totalswap - i.freeswap) * PAGE_SIZE;
+}
+
+extern unsigned long meminfo_available_pages(void);
+
+/* FIXME: WORST IMPLEMENTATION EVER.  DO NOT USE! */
+static int memballoon(void *_vballoon)
+{
+	struct virtio_memballoon *vb = _vballoon;
+
+	set_freezable();
+	while (!kthread_should_stop()) {
+		u64 swapped;
+
+		try_to_freeze();
+
+		/* Wait for (up to) a second. */
+		wait_event_interruptible_timeout(vb->wait,
+						 kthread_should_stop()
+						 || freezing(current)
+						 || process_hcmd(vb),
+						 HZ);
+
+		mutex_lock(&vb->lock);
+
+		/* Host insists we give up more memory? */
+		if (vb->num_bytes < vb->min_target) {
+			give_to_balloon(vb);
+			vb->num_excess = 0;
+			goto again;
+		}
+
+		swapped = swap_used();
+
+		/* We've swapped?  Get that much memory from balloon. */
+		if (swapped > vb->prev_bytes_swapped) {
+			get_from_balloon(vb, swapped - vb->prev_bytes_swapped);
+			vb->num_excess = 0;
+		} else {
+			/* Do we have spare memory? */
+			unsigned long avail_pages;
+
+			avail_pages = meminfo_available_pages();
+			if (avail_pages < BATCH_PAGES)
+				vb->num_excess = 0;
+			else {
+				vb->num_excess++;
+
+				/* 10 times in a row?  Give away memory. */
+				if (vb->num_excess > 10)
+					give_to_balloon(vb);
+			}
+		}
+		vb->prev_bytes_swapped = swapped;
+	again:
+		mutex_unlock(&vb->lock);
+	}
+
+	return 0;
+}
+
+static int init_vqs(struct virtio_memballoon *vb)
+{
+	struct virtqueue *vqs[2];
+	vq_callback_t *callbacks[] = { wake_balloon, wake_balloon };
+	const char *names[] = { "gcmd", "hcmd" };
+	struct scatterlist sg;
+	int err;
+
+	err = vb->vdev->config->find_vqs(vb->vdev, 2, vqs, callbacks, names);
+	if (err)
+		return err;
+
+	vb->gcmd_vq = vqs[0];
+	vb->hcmd_vq = vqs[1];
+
+	/*
+	 * Prime this virtqueue with one buffer so the hypervisor can
+	 * use it to signal us later (it can't be broken yet!).
+	 */
+	sg_init_one(&sg, &vb->hcmd, sizeof(vb->hcmd));
+	if (virtqueue_add_inbuf(vb->hcmd_vq, &sg, 1, vb, GFP_KERNEL) < 0)
+		BUG();
+	virtqueue_kick(vb->hcmd_vq);
+
+	return 0;
+}
+
+static const struct address_space_operations virtio_memballoon_aops;
+#ifdef CONFIG_BALLOON_COMPACTION
+/*
+ * virtmemballoon_migratepage - perform the balloon page migration on behalf of
+ *			     a compaction thread.     (called under page lock)
+ * @mapping: the page->mapping which will be assigned to the new migrated page.
+ * @newpage: page that will replace the isolated page after migration finishes.
+ * @page   : the isolated (old) page that is about to be migrated to newpage.
+ * @mode   : compaction mode -- not used for balloon page migration.
+ *
+ * After a ballooned page gets isolated by compaction procedures, this is the
+ * function that performs the page migration on behalf of a compaction thread
+ * The page migration for virtio balloon is done in a simple swap fashion which
+ * follows these two macro steps:
+ *  1) insert newpage into vb->pages list and update the host about it;
+ *  2) update the host about the old page removed from vb->pages list;
+ *
+ * This function preforms the balloon page migration task.
+ * Called through balloon_mapping->a_ops->migratepage
+ */
+static int virtmemballoon_migratepage(struct address_space *mapping,
+		struct page *newpage, struct page *page, enum migrate_mode mode)
+{
+	struct balloon_dev_info *vb_dev_info = balloon_page_device(page);
+	struct virtio_memballoon *vb;
+	unsigned long flags;
+	int err;
+
+	BUG_ON(!vb_dev_info);
+
+	vb = vb_dev_info->balloon_device;
+
+	/*
+	 * In order to avoid lock contention while migrating pages concurrently
+	 * to leak_balloon() or fill_balloon() we just give up the balloon_lock
+	 * this turn, as it is easier to retry the page migration later.
+	 * This also prevents fill_balloon() getting stuck into a mutex
+	 * recursion in the case it ends up triggering memory compaction
+	 * while it is attempting to inflate the ballon.
+	 */
+	if (!mutex_trylock(&vb->lock))
+		return -EAGAIN;
+
+	vb->gcmd.exchange_pages.type =
+		cpu_to_le64(VIRTIO_MEMBALLOON_GCMD_EXCHANGE_PAGES);
+	vb->gcmd.exchange_pages.from_balloon = page_to_pfn(page) << PAGE_SHIFT;
+	vb->gcmd.exchange_pages.to_balloon = page_to_pfn(newpage) << PAGE_SHIFT;
+	if (!send_gcmd(vb, sizeof(vb->gcmd.exchange_pages))) {
+		err = -EIO;
+		goto unlock;
+	}
+
+	spin_lock_irqsave(&vb_dev_info->pages_lock, flags);
+	balloon_page_insert(newpage, mapping, &vb_dev_info->pages);
+	vb_dev_info->isolated_pages--;
+	spin_unlock_irqrestore(&vb_dev_info->pages_lock, flags);
+
+	/*
+	 * It's safe to delete page->lru here because this page is at
+	 * an isolated migration list, and this step is expected to happen here
+	 */
+	balloon_page_delete(page);
+	err = MIGRATEPAGE_BALLOON_SUCCESS;
+
+unlock:
+	mutex_unlock(&vb->lock);
+	return err;
+}
+
+/* define the balloon_mapping->a_ops callback to allow balloon page migration */
+static const struct address_space_operations virtio_memballoon_aops = {
+			.migratepage = virtmemballoon_migratepage,
+};
+#endif /* CONFIG_BALLOON_COMPACTION */
+
+static int check_pagesize(struct virtio_device *vdev, u32 *dev_features)
+{
+	u32 pgsize_f = 1U << VIRTIO_MEMBALLOON_F_PAGESIZE(PAGE_SHIFT);
+
+	/* FIXME: Support large pages. */
+	if (!(*dev_features & pgsize_f)) {
+		dev_warn(&vdev->dev, "Need pagesize %u (got features 0x%x)\n",
+			 PAGE_SHIFT, *dev_features);
+		return -EINVAL;
+	}
+
+	/* We can't accept more than one, so make others vanish. */
+	*dev_features &= (1 << VIRTIO_MEMBALLOON_F_EXTRA_MEM) | pgsize_f;
+	return 0;
+}
+
+static int virtmemballoon_probe(struct virtio_device *vdev)
+{
+	struct virtio_memballoon *vb;
+	struct address_space *vb_mapping;
+	struct balloon_dev_info *vb_devinfo;
+	int err;
+
+	vdev->priv = vb = kmalloc(sizeof(*vb), GFP_KERNEL);
+	if (!vb) {
+		err = -ENOMEM;
+		goto out;
+	}
+
+	vb->min_target = 0;
+	vb->num_bytes = 0;
+	vb->num_excess = 0;
+	vb->prev_bytes_swapped = 0;
+	vb->broken = false;
+	mutex_init(&vb->lock);
+	init_waitqueue_head(&vb->wait);
+	vb->vdev = vdev;
+
+	vb_devinfo = balloon_devinfo_alloc(vb);
+	if (IS_ERR(vb_devinfo)) {
+		err = PTR_ERR(vb_devinfo);
+		goto out_free_vb;
+	}
+
+	vb_mapping = balloon_mapping_alloc(vb_devinfo,
+					   (balloon_compaction_check()) ?
+					   &virtio_memballoon_aops : NULL);
+	if (IS_ERR(vb_mapping)) {
+		/*
+		 * IS_ERR(vb_mapping) && PTR_ERR(vb_mapping) == -EOPNOTSUPP
+		 * This means !CONFIG_BALLOON_COMPACTION, otherwise we get off.
+		 */
+		err = PTR_ERR(vb_mapping);
+		if (err != -EOPNOTSUPP)
+			goto out_free_vb_devinfo;
+	}
+
+	vb->vb_dev_info = vb_devinfo;
+
+	err = init_vqs(vb);
+	if (err)
+		goto out_free_vb_mapping;
+
+	vb->thread = kthread_run(memballoon, vb, "vmemballoon");
+	if (IS_ERR(vb->thread)) {
+		err = PTR_ERR(vb->thread);
+		goto out_del_vqs;
+	}
+
+	return 0;
+
+out_del_vqs:
+	vdev->config->del_vqs(vdev);
+out_free_vb_mapping:
+	balloon_mapping_free(vb_mapping);
+out_free_vb_devinfo:
+	balloon_devinfo_free(vb_devinfo);
+out_free_vb:
+	kfree(vb);
+out:
+	return err;
+}
+
+/* FIXME: Leave pages alone during suspend, rather than taking them
+ * all back! */
+static void remove_common(struct virtio_memballoon *vb)
+{
+	/* There might be pages left in the balloon: free them. */
+	get_from_balloon(vb, vb->num_bytes);
+
+	/* Now we reset the device so we can clean up the queues. */
+	vb->vdev->config->reset(vb->vdev);
+	vb->vdev->config->del_vqs(vb->vdev);
+}
+
+static void virtmemballoon_remove(struct virtio_device *vdev)
+{
+	struct virtio_memballoon *vb = vdev->priv;
+
+	kthread_stop(vb->thread);
+	remove_common(vb);
+	balloon_mapping_free(vb->vb_dev_info->mapping);
+	balloon_devinfo_free(vb->vb_dev_info);
+	kfree(vb);
+}
+
+#ifdef CONFIG_PM_SLEEP
+static int virtmemballoon_freeze(struct virtio_device *vdev)
+{
+	struct virtio_memballoon *vb = vdev->priv;
+
+	/*
+	 * The kthread is already frozen by the PM core before this
+	 * function is called.
+	 */
+
+	remove_common(vb);
+	return 0;
+}
+
+static int virtmemballoon_restore(struct virtio_device *vdev)
+{
+	return init_vqs(vdev->priv);
+}
+#endif
+
+static unsigned int features[] = {
+	/* FIXME: Support VIRTIO_MEMBALLOON_F_EXTRA_MEM! */
+};
+
+static struct virtio_driver virtio_memballoon_driver = {
+	.feature_table = features,
+	.feature_table_size = ARRAY_SIZE(features),
+	.filter_features = check_pagesize,
+	.driver.name =	KBUILD_MODNAME,
+	.driver.owner =	THIS_MODULE,
+	.id_table =	id_table,
+	.probe =	virtmemballoon_probe,
+	.remove =	virtmemballoon_remove,
+#ifdef CONFIG_PM_SLEEP
+	.freeze	=	virtmemballoon_freeze,
+	.restore =	virtmemballoon_restore,
+#endif
+};
+
+module_virtio_driver(virtio_memballoon_driver);
+MODULE_DEVICE_TABLE(virtio, id_table);
+MODULE_DESCRIPTION("Virtio memballoon driver");
+MODULE_LICENSE("GPL");
diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c
index 7445af0b1aa3..9aaedaa1d9ca 100644
--- a/fs/proc/meminfo.c
+++ b/fs/proc/meminfo.c
@@ -20,6 +20,65 @@ void __attribute__((weak)) arch_report_meminfo(struct seq_file *m)
 {
 }
 
+/* FIXME: Ick! */
+unsigned long meminfo_available_pages(void)
+{
+	struct sysinfo i;
+	unsigned long committed;
+	long cached;
+	long available;
+	unsigned long pagecache;
+	unsigned long wmark_low = 0;
+	unsigned long pages[NR_LRU_LISTS];
+	struct zone *zone;
+	int lru;
+
+	si_meminfo(&i);
+	committed = percpu_counter_read_positive(&vm_committed_as);
+
+	cached = global_page_state(NR_FILE_PAGES) -
+			total_swapcache_pages() - i.bufferram;
+	if (cached < 0)
+		cached = 0;
+
+	for (lru = LRU_BASE; lru < NR_LRU_LISTS; lru++)
+		pages[lru] = global_page_state(NR_LRU_BASE + lru);
+
+	for_each_zone(zone)
+		wmark_low += zone->watermark[WMARK_LOW];
+
+	/*
+	 * Estimate the amount of memory available for userspace allocations,
+	 * without causing swapping.
+	 *
+	 * Free memory cannot be taken below the low watermark, before the
+	 * system starts swapping.
+	 */
+	available = i.freeram - wmark_low;
+
+	/*
+	 * Not all the page cache can be freed, otherwise the system will
+	 * start swapping. Assume at least half of the page cache, or the
+	 * low watermark worth of cache, needs to stay.
+	 */
+	pagecache = pages[LRU_ACTIVE_FILE] + pages[LRU_INACTIVE_FILE];
+	pagecache -= min(pagecache / 2, wmark_low);
+	available += pagecache;
+
+	/*
+	 * Part of the reclaimable swap consists of items that are in use,
+	 * and cannot be freed. Cap this estimate at the low watermark.
+	 */
+	available += global_page_state(NR_SLAB_RECLAIMABLE) -
+		     min(global_page_state(NR_SLAB_RECLAIMABLE) / 2, wmark_low);
+
+	if (available < 0)
+		available = 0;
+
+	return available;
+}
+EXPORT_SYMBOL_GPL(meminfo_available_pages);
+
 static int meminfo_proc_show(struct seq_file *m, void *v)
 {
 	struct sysinfo i;
diff --git a/include/uapi/linux/virtio_ids.h b/include/uapi/linux/virtio_ids.h
index 284fc3a05f7b..fbd1263b65b7 100644
--- a/include/uapi/linux/virtio_ids.h
+++ b/include/uapi/linux/virtio_ids.h
@@ -33,11 +33,12 @@
 #define VIRTIO_ID_BLOCK		2 /* virtio block */
 #define VIRTIO_ID_CONSOLE	3 /* virtio console */
 #define VIRTIO_ID_RNG		4 /* virtio rng */
-#define VIRTIO_ID_BALLOON	5 /* virtio balloon */
+#define VIRTIO_ID_BALLOON	5 /* virtio balloon (legacy) */
 #define VIRTIO_ID_RPMSG		7 /* virtio remote processor messaging */
 #define VIRTIO_ID_SCSI		8 /* virtio scsi */
 #define VIRTIO_ID_9P		9 /* 9p virtio console */
 #define VIRTIO_ID_RPROC_SERIAL 11 /* virtio remoteproc serial link */
 #define VIRTIO_ID_CAIF	       12 /* Virtio caif */
+#define VIRTIO_ID_MEMBALLOON   13 /* virtio memballoon */
 
 #endif /* _LINUX_VIRTIO_IDS_H */
diff --git a/include/uapi/linux/virtio_memballoon.h b/include/uapi/linux/virtio_memballoon.h
new file mode 100644
index 000000000000..e0bfbe3ce61a
--- /dev/null
+++ b/include/uapi/linux/virtio_memballoon.h
@@ -0,0 +1,88 @@
+#ifndef _LINUX_VIRTIO_MEMBALLOON_H
+#define _LINUX_VIRTIO_MEMBALLOON_H
+/* This header is BSD licensed so anyone can use the definitions to implement
+ * compatible drivers/servers.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of IBM nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL IBM OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE. */
+#include <linux/virtio_ids.h>
+#include <linux/virtio_config.h>
+
+/* This means the balloon can go negative (ie. add memory to system) */
+#define VIRTIO_MEMBALLOON_F_EXTRA_MEM	0
+/* Note: device MUST only accept one of these! */
+#define VIRTIO_MEMBALLOON_F_4K		1
+#define VIRTIO_MEMBALLOON_F_8K		2
+#define VIRTIO_MEMBALLOON_F_16K		3
+#define VIRTIO_MEMBALLOON_F_32K		4
+#define VIRTIO_MEMBALLOON_F_64K		5
+#define VIRTIO_MEMBALLOON_F_128K	6
+#define VIRTIO_MEMBALLOON_F_256K	7
+#define VIRTIO_MEMBALLOON_F_512K	8
+#define VIRTIO_MEMBALLOON_F_1M		9
+#define VIRTIO_MEMBALLOON_F_2M		10
+#define VIRTIO_MEMBALLOON_F_4M		11
+#define VIRTIO_MEMBALLOON_F_8M		12
+#define VIRTIO_MEMBALLOON_F_16M		13
+#define VIRTIO_MEMBALLOON_F_32M		14
+#define VIRTIO_MEMBALLOON_F_64M		15
+#define VIRTIO_MEMBALLOON_F_128M	16
+#define VIRTIO_MEMBALLOON_F_256M	17
+#define VIRTIO_MEMBALLOON_F_512M	18
+#define VIRTIO_MEMBALLOON_F_1G		19
+#define VIRTIO_MEMBALLOON_F_2G		20
+#define VIRTIO_MEMBALLOON_F_4G		21
+#define VIRTIO_MEMBALLOON_F_8G		22
+#define VIRTIO_MEMBALLOON_F_16G		23
+
+#define VIRTIO_MEMBALLOON_F_PAGESIZE(bits) ((bits) - 12 + 1)
+
+/* Guest->host command queue. */
+
+/*
+ * Ask the host for more pages.
+ * Followed by array of 1 or more device-readable le64 pageaddr's.
+ * Must be in normal guest memory unless VIRTIO_MEMBALLOON_F_EXTRA_MEM.
+ */
+#define VIRTIO_MEMBALLOON_GCMD_GET_PAGES	((__le64)0)
+/*
+ * Give the host more pages.
+ * Followed by array of 1 or more device-readable le64 pageaddr's
+ */
+#define VIRTIO_MEMBALLOON_GCMD_GIVE_PAGES	((__le64)1)
+/*
+ * Exchange pages in the ballon.
+ * Followed by array of 2N readable le64 pageaddr's.  First N: to extract from
+ * balloon, next N: to add to the balloon
+*/
+#define VIRTIO_MEMBALLOON_GCMD_EXCHANGE_PAGES ((__le64)2)
+
+/* Host->guest command queue. */
+
+/*
+ * Followed by s64 of new balloon minimum (only negative if
+ * VIRTIO_MEMBALLOON_F_EXTRA_MEM).
+ */
+#define VIRTIO_MEMBALLOON_HCMD_MIN_BALLOON	((__le64)0x8000)
+
+#endif /* _LINUX_VIRTIO_MEMBALLOON_H */
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 4a7f7e6992b6..13e4918ae50a 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -2591,6 +2591,7 @@ void si_swapinfo(struct sysinfo *val)
 	val->totalswap = total_swap_pages + nr_to_be_unused;
 	spin_unlock(&swap_lock);
 }
+EXPORT_SYMBOL_GPL(si_swapinfo);
 
 /*
  * Verify that a swap entry is valid and increment its swap map count.
diff --git a/tools/lguest/lguest.c b/tools/lguest/lguest.c
index 32cf2ce15d69..8b3cf658be81 100644
--- a/tools/lguest/lguest.c
+++ b/tools/lguest/lguest.c
@@ -69,6 +69,10 @@ typedef uint8_t u8;
 #include <linux/virtio_ring.h>
 #include <asm/bootparam.h>
 #include "../../include/linux/lguest_launcher.h"
+#include "../../include/uapi/linux/virtio_memballoon.h"
+#ifndef VIRTIO_ID_MEMBALLOON
+#define VIRTIO_ID_MEMBALLOON   13 /* virtio balloon */
+#endif
 
 #define BRIDGE_PFX "bridge:"
 #ifndef SIOCBRADDIF
@@ -1127,6 +1131,8 @@ static void handle_output(unsigned long addr)
 		for (vq = i->vq; vq; vq = vq->next) {
 			if (addr != vq->config.pfn*getpagesize())
 				continue;
+			if (!vq->service)
+				return;
 			errx(1, "Notification on %s before setup!", i->name);
 		}
 	}
@@ -1792,6 +1798,110 @@ static void setup_rng(void)
 
 	verbose("device %u: rng\n", devices.device_num++);
 }
+
+/*
+ * Our balloon device just maps and unmaps pages.
+ */
+struct balloon_info {
+	unsigned long num_pages;
+};
+
+static void *iov_consume_pfn(struct iovec iov[], unsigned num_iov)
+{
+	u64 addr;
+
+	iov_consume(iov, num_iov, &addr, sizeof(addr));
+	addr = le64_to_cpu(addr);
+	if (addr >= guest_limit || addr + getpagesize() > guest_limit)
+		errx(1, "Page address %llu is outside limit %lu",
+		     addr, guest_limit);
+	if (addr % getpagesize())
+		errx(1, "Page address %llu is unaligned", addr);
+
+	return guest_base + addr;
+}
+
+static void release_balloon_page(void *pagestart)
+{
+	char marker[100];
+
+	sprintf(marker, "GIVEN PAGE %p", pagestart);
+	mprotect(pagestart, getpagesize(), PROT_READ|PROT_WRITE|PROT_EXEC);
+	if (strcmp(pagestart, marker) != 0)
+		errx(1, "Page %x does not have marker! Expect %s, got '%s'",
+		     pagestart - guest_base, marker, (char *)pagestart);
+	memset(pagestart, 77, strlen(marker));
+}
+
+static void accept_balloon_page(void *pagestart)
+{
+	sprintf(pagestart, "GIVEN PAGE %p", pagestart);
+	mprotect(pagestart, getpagesize(), PROT_NONE);
+}
+
+/* The balloon sends us pages, or asks for them back. */
+static void balloon_gcmd(struct virtqueue *vq)
+{
+	unsigned int head, in_num, out_num;
+	struct balloon_info *balloon_info = vq->dev->priv;
+	struct iovec iov[vq->vring.num];
+	u64 cmd;
+
+	/* First we need a buffer from the Guests's virtqueue. */
+	head = wait_for_vq_desc(vq, iov, &out_num, &in_num);
+	if (in_num)
+		errx(1, "Input buffers in balloon gcmd?");
+
+	iov_consume(iov, out_num, &cmd, sizeof(cmd));
+	switch (le64_to_cpu(cmd)) {
+	case VIRTIO_MEMBALLOON_GCMD_GET_PAGES:
+		while (!iov_empty(iov, out_num)) {
+			release_balloon_page(iov_consume_pfn(iov, out_num));
+			balloon_info->num_pages--;
+		}
+		break;
+	case VIRTIO_MEMBALLOON_GCMD_GIVE_PAGES:
+		while (!iov_empty(iov, out_num)) {
+			balloon_info->num_pages++;
+			accept_balloon_page(iov_consume_pfn(iov, out_num));
+		}
+		break;
+	case VIRTIO_MEMBALLOON_GCMD_EXCHANGE_PAGES:
+	default:
+		errx(1, "Unsupported op %llu", le64_to_cpu(cmd));
+	}
+
+	/* Tell the Guest we've done it. */
+	add_used(vq, head, 0);
+}
+
+/*
+ * This creates a "memballoon" device for the Guest, to give us its leftover
+ * pages.
+ */
+static void setup_balloon(void)
+{
+	struct device *dev;
+	struct balloon_info *balloon_info = malloc(sizeof(*balloon_info));
+
+	balloon_info->num_pages = 0;
+
+	/* Create the new device. */
+	dev = new_device("memballoon", VIRTIO_ID_MEMBALLOON);
+	dev->priv = balloon_info;
+
+	/*
+	 * The device has two virtqueues, but the second is only for us to
+	 * reduce its memory, which we don't do.
+	 */
+	add_virtqueue(dev, VIRTQUEUE_NUM, balloon_gcmd);
+	add_virtqueue(dev, VIRTQUEUE_NUM, NULL);
+
+	/* Offer our page size (2^12). */
+	add_feature(dev, VIRTIO_MEMBALLOON_F_PAGESIZE(ffs(getpagesize())-1));
+
+	verbose("device %u: memballoon\n", devices.device_num++);
+}
 /* That's the end of device setup. */
 
 /*L:230 Reboot is pretty easy: clean up and exec() the Launcher afresh. */
@@ -1861,13 +1971,14 @@ static struct option opts[] = {
 	{ "initrd", 1, NULL, 'i' },
 	{ "username", 1, NULL, 'u' },
 	{ "chroot", 1, NULL, 'c' },
+	{ "balloon", 0, NULL, 'a' },
 	{ NULL },
 };
 static void usage(void)
 {
 	errx(1, "Usage: lguest [--verbose] "
 	     "[--tunnet=(<ipaddr>:<macaddr>|bridge:<bridgename>:<macaddr>)\n"
-	     "|--block=<filename>|--initrd=<filename>]...\n"
+	     "|--block=<filename>|--initrd=<filename>]|--balloon...\n"
 	     "<mem-in-mb> vmlinux [args...]");
 }
 
@@ -1953,6 +2064,9 @@ int main(int argc, char *argv[])
 		case 'c':
 			chroot_path = optarg;
 			break;
+		case 'a':
+			setup_balloon();
+			break;
 		default:
 			warnx("Unknown argument %s", argv[optind]);
 			usage();



[Date Prev] | [Thread Prev] | [Thread Next] | [Date Next] -- [Date Index] | [Thread Index] | [List Home]