On Thu, Sep 10, 2020 at 01:13:51PM +0200, Guennadi Liakhovetski wrote:
Linux supports running the RPMsg protocol over the VirtIO transport protocol, but currently there is only support for VirtIO clients and no support for VirtIO servers. This patch adds a vhost-based RPMsg server implementation, which makes it possible to use RPMsg over VirtIO between guest VMs and the host.
I now get the client/server concept you are describing above but that happened only after a lot of mental gymnastics. If you drop the whole client/server concept and concentrate on what this patch does, things will go better. I would personally go with what you have in the Kconfig:
Vhost RPMsg API allows vhost drivers to communicate with VirtIO
drivers on guest VMs, using the RPMsg over VirtIO protocol.
It is concise but describes exactly what this patch provide.
Signed-off-by: Guennadi Liakhovetski guennadi.liakhovetski@linux.intel.com
drivers/vhost/Kconfig | 7 + drivers/vhost/Makefile | 3 + drivers/vhost/rpmsg.c | 370 ++++++++++++++++++++++++++++++++++++ drivers/vhost/vhost_rpmsg.h | 74 ++++++++ 4 files changed, 454 insertions(+) create mode 100644 drivers/vhost/rpmsg.c create mode 100644 drivers/vhost/vhost_rpmsg.h
diff --git a/drivers/vhost/Kconfig b/drivers/vhost/Kconfig index 587fbae06182..ee1a19b7ab3d 100644 --- a/drivers/vhost/Kconfig +++ b/drivers/vhost/Kconfig @@ -38,6 +38,13 @@ config VHOST_NET To compile this driver as a module, choose M here: the module will be called vhost_net.
+config VHOST_RPMSG
- tristate
- select VHOST
- help
Vhost RPMsg API allows vhost drivers to communicate with VirtIO
drivers on guest VMs, using the RPMsg over VirtIO protocol.
I suppose you intend this to be selectable from another config option?
config VHOST_SCSI tristate "VHOST_SCSI TCM fabric driver" depends on TARGET_CORE && EVENTFD diff --git a/drivers/vhost/Makefile b/drivers/vhost/Makefile index f3e1897cce85..9cf459d59f97 100644 --- a/drivers/vhost/Makefile +++ b/drivers/vhost/Makefile @@ -2,6 +2,9 @@ obj-$(CONFIG_VHOST_NET) += vhost_net.o vhost_net-y := net.o
+obj-$(CONFIG_VHOST_RPMSG) += vhost_rpmsg.o +vhost_rpmsg-y := rpmsg.o
obj-$(CONFIG_VHOST_SCSI) += vhost_scsi.o vhost_scsi-y := scsi.o
diff --git a/drivers/vhost/rpmsg.c b/drivers/vhost/rpmsg.c new file mode 100644 index 000000000000..0ddee5b5f017 --- /dev/null +++ b/drivers/vhost/rpmsg.c @@ -0,0 +1,370 @@ +// SPDX-License-Identifier: GPL-2.0-only +/*
- Copyright(c) 2020 Intel Corporation. All rights reserved.
- Author: Guennadi Liakhovetski guennadi.liakhovetski@linux.intel.com
- Vhost RPMsg VirtIO interface provides a set of functions to be used on the
- host side as a counterpart to the guest side RPMsg VirtIO API, provided by
- drivers/rpmsg/virtio_rpmsg_bus.c. This API can be used by any vhost driver to
- handle RPMsg specific virtqueue processing.
- Vhost drivers, using this API will use their own VirtIO device IDs, that
- should then also be added to the ID table in virtio_rpmsg_bus.c
- */
+#include <linux/compat.h> +#include <linux/file.h> +#include <linux/miscdevice.h>
As far as I can tell the above two are not needed.
+#include <linux/module.h> +#include <linux/mutex.h> +#include <linux/rpmsg/virtio.h> +#include <linux/vhost.h> +#include <uapi/linux/rpmsg.h>
+#include "vhost.h" +#include "vhost_rpmsg.h"
+/*
- All virtio-rpmsg virtual queue kicks always come with just one buffer -
- either input or output, but we can also handle split messages
- */
+static int vhost_rpmsg_get_msg(struct vhost_virtqueue *vq, unsigned int *cnt) +{
- struct vhost_rpmsg *vr = container_of(vq->dev, struct vhost_rpmsg, dev);
- unsigned int out, in;
- int head = vhost_get_vq_desc(vq, vq->iov, ARRAY_SIZE(vq->iov), &out, &in,
NULL, NULL);
- if (head < 0) {
vq_err(vq, "%s(): error %d getting buffer\n",
__func__, head);
return head;
- }
- /* Nothing new? */
- if (head == vq->num)
return head;
- if (vq == &vr->vq[VIRTIO_RPMSG_RESPONSE]) {
if (out) {
vq_err(vq, "%s(): invalid %d output in response queue\n",
__func__, out);
goto return_buf;
}
*cnt = in;
- }
- if (vq == &vr->vq[VIRTIO_RPMSG_REQUEST]) {
if (in) {
vq_err(vq, "%s(): invalid %d input in request queue\n",
__func__, in);
goto return_buf;
}
*cnt = out;
- }
- return head;
+return_buf:
- vhost_add_used(vq, head, 0);
- return -EINVAL;
+}
+static const struct vhost_rpmsg_ept *vhost_rpmsg_ept_find(struct vhost_rpmsg *vr, int addr) +{
- unsigned int i;
- for (i = 0; i < vr->n_epts; i++)
if (vr->ept[i].addr == addr)
return vr->ept + i;
- return NULL;
+}
+/*
- if len < 0, then for reading a request, the complete virtual queue buffer
- size is prepared, for sending a response, the length in the iterator is used
- */
+int vhost_rpmsg_start_lock(struct vhost_rpmsg *vr, struct vhost_rpmsg_iter *iter,
unsigned int qid, ssize_t len)
- __acquires(vq->mutex)
+{
- struct vhost_virtqueue *vq = vr->vq + qid;
- unsigned int cnt;
- ssize_t ret;
- size_t tmp;
- if (qid >= VIRTIO_RPMSG_NUM_OF_VQS)
return -EINVAL;
- iter->vq = vq;
- mutex_lock(&vq->mutex);
- vhost_disable_notify(&vr->dev, vq);
- iter->head = vhost_rpmsg_get_msg(vq, &cnt);
- if (iter->head == vq->num)
iter->head = -EAGAIN;
- if (iter->head < 0) {
ret = iter->head;
goto unlock;
- }
- tmp = iov_length(vq->iov, cnt);
- if (tmp < sizeof(iter->rhdr)) {
vq_err(vq, "%s(): size %zu too small\n", __func__, tmp);
ret = -ENOBUFS;
goto return_buf;
- }
- switch (qid) {
- case VIRTIO_RPMSG_REQUEST:
if (len >= 0) {
if (tmp < sizeof(iter->rhdr) + len) {
ret = -ENOBUFS;
goto return_buf;
}
tmp = len + sizeof(iter->rhdr);
}
/* len is now the size of the payload */
iov_iter_init(&iter->iov_iter, WRITE, vq->iov, cnt, tmp);
/* Read the RPMSG header with endpoint addresses */
tmp = copy_from_iter(&iter->rhdr, sizeof(iter->rhdr), &iter->iov_iter);
if (tmp != sizeof(iter->rhdr)) {
vq_err(vq, "%s(): got %zu instead of %zu\n", __func__,
tmp, sizeof(iter->rhdr));
ret = -EIO;
goto return_buf;
}
iter->ept = vhost_rpmsg_ept_find(vr, vhost32_to_cpu(vq, iter->rhdr.dst));
if (!iter->ept) {
vq_err(vq, "%s(): no endpoint with address %d\n",
__func__, vhost32_to_cpu(vq, iter->rhdr.dst));
ret = -ENOENT;
goto return_buf;
}
/* Let the endpoint read the payload */
if (iter->ept->read) {
ret = iter->ept->read(vr, iter);
if (ret < 0)
goto return_buf;
iter->rhdr.len = cpu_to_vhost16(vq, ret);
} else {
iter->rhdr.len = 0;
}
/* Prepare for the response phase */
iter->rhdr.dst = iter->rhdr.src;
iter->rhdr.src = cpu_to_vhost32(vq, iter->ept->addr);
I'm a little puzzled here - what will the response look like? And why is it prepared here? From what I can see doing so introduces coupling with function handle_rpmsg_req_single(). I think confirmation of reception should be handled by endpoints rather than in the core.
break;
- case VIRTIO_RPMSG_RESPONSE:
if (!iter->ept && iter->rhdr.dst != cpu_to_vhost32(vq, RPMSG_NS_ADDR)) {
/*
* Usually the iterator is configured when processing a
* message on the request queue, but it's also possible
* to send a message on the response queue without a
* preceding request, in that case the iterator must
* contain source and destination addresses.
*/
iter->ept = vhost_rpmsg_ept_find(vr, vhost32_to_cpu(vq, iter->rhdr.src));
if (!iter->ept) {
ret = -ENOENT;
goto return_buf;
}
}
if (len >= 0) {
if (tmp < sizeof(iter->rhdr) + len) {
ret = -ENOBUFS;
goto return_buf;
}
iter->rhdr.len = cpu_to_vhost16(vq, len);
tmp = len + sizeof(iter->rhdr);
}
/* len is now the size of the payload */
iov_iter_init(&iter->iov_iter, READ, vq->iov, cnt, tmp);
/* Write the RPMSG header with endpoint addresses */
tmp = copy_to_iter(&iter->rhdr, sizeof(iter->rhdr), &iter->iov_iter);
if (tmp != sizeof(iter->rhdr)) {
ret = -EIO;
goto return_buf;
}
/* Let the endpoint write the payload */
I would specifically mention that namespace payloads are taken care of by vhost_rpmsg_ns_announce(). That makes it easier for people to connect the dots.
if (iter->ept && iter->ept->write) {
ret = iter->ept->write(vr, iter);
if (ret < 0)
goto return_buf;
}
break;
- }
- return 0;
+return_buf:
- vhost_add_used(vq, iter->head, 0);
+unlock:
- vhost_enable_notify(&vr->dev, vq);
- mutex_unlock(&vq->mutex);
- return ret;
+} +EXPORT_SYMBOL_GPL(vhost_rpmsg_start_lock);
+size_t vhost_rpmsg_copy(struct vhost_rpmsg *vr, struct vhost_rpmsg_iter *iter,
void *data, size_t size)
+{
- /*
* We could check for excess data, but copy_{to,from}_iter() don't do
* that either
*/
- if (iter->vq == vr->vq + VIRTIO_RPMSG_RESPONSE)
return copy_to_iter(data, size, &iter->iov_iter);
- return copy_from_iter(data, size, &iter->iov_iter);
+} +EXPORT_SYMBOL_GPL(vhost_rpmsg_copy);
+int vhost_rpmsg_finish_unlock(struct vhost_rpmsg *vr,
struct vhost_rpmsg_iter *iter)
- __releases(vq->mutex)
+{
- if (iter->head >= 0)
vhost_add_used_and_signal(iter->vq->dev, iter->vq, iter->head,
vhost16_to_cpu(iter->vq, iter->rhdr.len) +
sizeof(iter->rhdr));
- vhost_enable_notify(&vr->dev, iter->vq);
- mutex_unlock(&iter->vq->mutex);
- return iter->head;
+} +EXPORT_SYMBOL_GPL(vhost_rpmsg_finish_unlock);
+/*
- Return false to terminate the external loop only if we fail to obtain either
- a request or a response buffer
- */
+static bool handle_rpmsg_req_single(struct vhost_rpmsg *vr,
struct vhost_virtqueue *vq)
+{
- struct vhost_rpmsg_iter iter;
- int ret = vhost_rpmsg_start_lock(vr, &iter, VIRTIO_RPMSG_REQUEST, -EINVAL);
- if (!ret)
ret = vhost_rpmsg_finish_unlock(vr, &iter);
- if (ret < 0) {
if (ret != -EAGAIN)
vq_err(vq, "%s(): RPMSG processing failed %d\n",
__func__, ret);
return false;
- }
- if (!iter.ept->write)
return true;
- ret = vhost_rpmsg_start_lock(vr, &iter, VIRTIO_RPMSG_RESPONSE, -EINVAL);
- if (!ret)
ret = vhost_rpmsg_finish_unlock(vr, &iter);
- if (ret < 0) {
vq_err(vq, "%s(): RPMSG finalising failed %d\n", __func__, ret);
return false;
- }
As I said before dealing with the "response" queue here seems to be introducing coupling with vhost_rpmsg_start_lock()... Endpoints should be doing that.
- return true;
+}
+static void handle_rpmsg_req_kick(struct vhost_work *work) +{
- struct vhost_virtqueue *vq = container_of(work, struct vhost_virtqueue,
poll.work);
- struct vhost_rpmsg *vr = container_of(vq->dev, struct vhost_rpmsg, dev);
- while (handle_rpmsg_req_single(vr, vq))
;
+}
+/*
- initialise two virtqueues with an array of endpoints,
- request and response callbacks
- */
+void vhost_rpmsg_init(struct vhost_rpmsg *vr, const struct vhost_rpmsg_ept *ept,
unsigned int n_epts)
+{
- unsigned int i;
- for (i = 0; i < ARRAY_SIZE(vr->vq); i++)
vr->vq_p[i] = &vr->vq[i];
- /* vq[0]: host -> guest, vq[1]: host <- guest */
- vr->vq[VIRTIO_RPMSG_REQUEST].handle_kick = handle_rpmsg_req_kick;
- vr->vq[VIRTIO_RPMSG_RESPONSE].handle_kick = NULL;
The comment depicts vq[0] followed by vq[1] but the code initialise vq[1] before vq[0], which is wildly confusing. At the very least this should be:
vr->vq[VIRTIO_RPMSG_RESPONSE].handle_kick = NULL; vr->vq[VIRTIO_RPMSG_REQUEST].handle_kick = handle_rpmsg_req_kick;
And even better:
/* See configuration of *vq_cbs[] in rpmsg_probe() */ vr->vq[VIRTIO_RPMSG_TX].handle_kick = NULL; vr->vq[VIRTIO_RPMSG_RX].handle_kick = handle_rpmsg_req_kick;
- vr->ept = ept;
- vr->n_epts = n_epts;
- vhost_dev_init(&vr->dev, vr->vq_p, VIRTIO_RPMSG_NUM_OF_VQS,
UIO_MAXIOV, 0, 0, true, NULL);
+} +EXPORT_SYMBOL_GPL(vhost_rpmsg_init);
+void vhost_rpmsg_destroy(struct vhost_rpmsg *vr) +{
- if (vhost_dev_has_owner(&vr->dev))
vhost_poll_flush(&vr->vq[VIRTIO_RPMSG_REQUEST].poll);
- vhost_dev_cleanup(&vr->dev);
+} +EXPORT_SYMBOL_GPL(vhost_rpmsg_destroy);
+/* send namespace */ +int vhost_rpmsg_ns_announce(struct vhost_rpmsg *vr, const char *name, unsigned int src) +{
- struct vhost_virtqueue *vq = &vr->vq[VIRTIO_RPMSG_RESPONSE];
- struct vhost_rpmsg_iter iter = {
.rhdr = {
.src = 0,
.dst = cpu_to_vhost32(vq, RPMSG_NS_ADDR),
},
- };
- struct rpmsg_ns_msg ns = {
.addr = cpu_to_vhost32(vq, src),
.flags = cpu_to_vhost32(vq, RPMSG_NS_CREATE), /* for rpmsg_ns_cb() */
- };
Here we have to assume the source can be found in the endpoints registered in vhost_rpmsg_init(). I would put a check to make sure that is the case and return an error otherwise.
- int ret = vhost_rpmsg_start_lock(vr, &iter, VIRTIO_RPMSG_RESPONSE, sizeof(ns));
- if (ret < 0)
return ret;
- strlcpy(ns.name, name, sizeof(ns.name));
- ret = vhost_rpmsg_copy(vr, &iter, &ns, sizeof(ns));
- if (ret != sizeof(ns))
vq_err(iter.vq, "%s(): added %d instead of %zu bytes\n",
__func__, ret, sizeof(ns));
- ret = vhost_rpmsg_finish_unlock(vr, &iter);
- if (ret < 0)
vq_err(iter.vq, "%s(): namespace announcement failed: %d\n",
__func__, ret);
- return ret;
+} +EXPORT_SYMBOL_GPL(vhost_rpmsg_ns_announce);
+MODULE_LICENSE("GPL v2"); +MODULE_AUTHOR("Intel, Inc."); +MODULE_DESCRIPTION("Vhost RPMsg API"); diff --git a/drivers/vhost/vhost_rpmsg.h b/drivers/vhost/vhost_rpmsg.h new file mode 100644 index 000000000000..c020ea14cd16 --- /dev/null +++ b/drivers/vhost/vhost_rpmsg.h @@ -0,0 +1,74 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/*
- Copyright(c) 2020 Intel Corporation. All rights reserved.
- Author: Guennadi Liakhovetski guennadi.liakhovetski@linux.intel.com
- */
+#ifndef VHOST_RPMSG_H +#define VHOST_RPMSG_H
+#include <linux/rpmsg/virtio.h> +#include <linux/uio.h>
+#include "vhost.h"
+/* RPMsg uses two VirtQueues: one for each direction */ +enum {
- VIRTIO_RPMSG_RESPONSE, /* RPMsg response (host->guest) buffers */
- VIRTIO_RPMSG_REQUEST, /* RPMsg request (guest->host) buffers */
As I said above things would be much clearer if this was VIRTIO_RPMSG_TX and VIRTIO_RPMSG_RX.
I won't be commenting on the mechanic needed to access and send information on the virtqueues as it is completely foreign to me. Other than the above I think this is going somewhere.
Thanks, Mathieu
- /* Keep last */
- VIRTIO_RPMSG_NUM_OF_VQS,
+};
+struct vhost_rpmsg_ept;
+struct vhost_rpmsg_iter {
- struct iov_iter iov_iter;
- struct rpmsg_hdr rhdr;
- struct vhost_virtqueue *vq;
- const struct vhost_rpmsg_ept *ept;
- int head;
- void *priv;
+};
+struct vhost_rpmsg {
- struct vhost_dev dev;
- struct vhost_virtqueue vq[VIRTIO_RPMSG_NUM_OF_VQS];
- struct vhost_virtqueue *vq_p[VIRTIO_RPMSG_NUM_OF_VQS];
- const struct vhost_rpmsg_ept *ept;
- unsigned int n_epts;
+};
+struct vhost_rpmsg_ept {
- ssize_t (*read)(struct vhost_rpmsg *, struct vhost_rpmsg_iter *);
- ssize_t (*write)(struct vhost_rpmsg *, struct vhost_rpmsg_iter *);
- int addr;
+};
+static inline size_t vhost_rpmsg_iter_len(const struct vhost_rpmsg_iter *iter) +{
- return iter->rhdr.len;
+}
+#define VHOST_RPMSG_ITER(_vq, _src, _dst) { \
- .rhdr = { \
.src = cpu_to_vhost32(_vq, _src), \
.dst = cpu_to_vhost32(_vq, _dst), \
}, \
- }
+void vhost_rpmsg_init(struct vhost_rpmsg *vr, const struct vhost_rpmsg_ept *ept,
unsigned int n_epts);
+void vhost_rpmsg_destroy(struct vhost_rpmsg *vr); +int vhost_rpmsg_ns_announce(struct vhost_rpmsg *vr, const char *name,
unsigned int src);
+int vhost_rpmsg_start_lock(struct vhost_rpmsg *vr,
struct vhost_rpmsg_iter *iter,
unsigned int qid, ssize_t len);
+size_t vhost_rpmsg_copy(struct vhost_rpmsg *vr, struct vhost_rpmsg_iter *iter,
void *data, size_t size);
+int vhost_rpmsg_finish_unlock(struct vhost_rpmsg *vr,
struct vhost_rpmsg_iter *iter);
+#endif
2.28.0