aboutsummaryrefslogtreecommitdiff
path: root/drivers/vhost
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/vhost')
-rw-r--r--drivers/vhost/Kconfig12
-rw-r--r--drivers/vhost/Kconfig.tcm7
-rw-r--r--drivers/vhost/Makefile3
-rw-r--r--drivers/vhost/net.c322
-rw-r--r--drivers/vhost/scsi.c (renamed from drivers/vhost/tcm_vhost.c)707
-rw-r--r--drivers/vhost/tcm_vhost.h115
-rw-r--r--drivers/vhost/test.c9
-rw-r--r--drivers/vhost/vhost.c156
-rw-r--r--drivers/vhost/vhost.h32
9 files changed, 891 insertions, 472 deletions
diff --git a/drivers/vhost/Kconfig b/drivers/vhost/Kconfig
index 85b773a93a5d..8b9226da3f54 100644
--- a/drivers/vhost/Kconfig
+++ b/drivers/vhost/Kconfig
@@ -10,13 +10,17 @@ config VHOST_NET
To compile this driver as a module, choose M here: the module will
be called vhost_net.
-if STAGING
-source "drivers/vhost/Kconfig.tcm"
-endif
+config VHOST_SCSI
+ tristate "VHOST_SCSI TCM fabric driver"
+ depends on TARGET_CORE && EVENTFD && m
+ select VHOST_RING
+ default n
+ ---help---
+ Say M here to enable the vhost_scsi TCM fabric module
+ for use with virtio-scsi guests
config VHOST_RING
tristate
---help---
This option is selected by any driver which needs to access
the host side of a virtio ring.
-
diff --git a/drivers/vhost/Kconfig.tcm b/drivers/vhost/Kconfig.tcm
deleted file mode 100644
index c3a8cfa1de72..000000000000
--- a/drivers/vhost/Kconfig.tcm
+++ /dev/null
@@ -1,7 +0,0 @@
-config TCM_VHOST
- tristate "TCM_VHOST fabric module"
- depends on TARGET_CORE && EVENTFD && m
- select VHOST_RING
- default n
- ---help---
- Say M here to enable the TCM_VHOST fabric module for use with virtio-scsi guests
diff --git a/drivers/vhost/Makefile b/drivers/vhost/Makefile
index 1d37f5e12be6..654e9afb11f5 100644
--- a/drivers/vhost/Makefile
+++ b/drivers/vhost/Makefile
@@ -1,6 +1,7 @@
obj-$(CONFIG_VHOST_NET) += vhost_net.o
vhost_net-y := vhost.o net.o
-obj-$(CONFIG_TCM_VHOST) += tcm_vhost.o
+obj-$(CONFIG_VHOST_SCSI) += vhost_scsi.o
+vhost_scsi-y := scsi.o
obj-$(CONFIG_VHOST_RING) += vringh.o
diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
index 959b1cd89e6a..a3645bd163d8 100644
--- a/drivers/vhost/net.c
+++ b/drivers/vhost/net.c
@@ -64,20 +64,36 @@ enum {
VHOST_NET_VQ_MAX = 2,
};
-enum vhost_net_poll_state {
- VHOST_NET_POLL_DISABLED = 0,
- VHOST_NET_POLL_STARTED = 1,
- VHOST_NET_POLL_STOPPED = 2,
+struct vhost_ubuf_ref {
+ struct kref kref;
+ wait_queue_head_t wait;
+ struct vhost_virtqueue *vq;
+};
+
+struct vhost_net_virtqueue {
+ struct vhost_virtqueue vq;
+ /* hdr is used to store the virtio header.
+ * Since each iovec has >= 1 byte length, we never need more than
+ * header length entries to store the header. */
+ struct iovec hdr[sizeof(struct virtio_net_hdr_mrg_rxbuf)];
+ size_t vhost_hlen;
+ size_t sock_hlen;
+ /* vhost zerocopy support fields below: */
+ /* last used idx for outstanding DMA zerocopy buffers */
+ int upend_idx;
+ /* first used idx for DMA done zerocopy buffers */
+ int done_idx;
+ /* an array of userspace buffers info */
+ struct ubuf_info *ubuf_info;
+ /* Reference counting for outstanding ubufs.
+ * Protected by vq mutex. Writers must also take device mutex. */
+ struct vhost_ubuf_ref *ubufs;
};
struct vhost_net {
struct vhost_dev dev;
- struct vhost_virtqueue vqs[VHOST_NET_VQ_MAX];
+ struct vhost_net_virtqueue vqs[VHOST_NET_VQ_MAX];
struct vhost_poll poll[VHOST_NET_VQ_MAX];
- /* Tells us whether we are polling a socket for TX.
- * We only do this when socket buffer fills up.
- * Protected by tx vq lock. */
- enum vhost_net_poll_state tx_poll_state;
/* Number of TX recently submitted.
* Protected by tx vq lock. */
unsigned tx_packets;
@@ -88,6 +104,90 @@ struct vhost_net {
bool tx_flush;
};
+static unsigned vhost_zcopy_mask __read_mostly;
+
+void vhost_enable_zcopy(int vq)
+{
+ vhost_zcopy_mask |= 0x1 << vq;
+}
+
+static void vhost_zerocopy_done_signal(struct kref *kref)
+{
+ struct vhost_ubuf_ref *ubufs = container_of(kref, struct vhost_ubuf_ref,
+ kref);
+ wake_up(&ubufs->wait);
+}
+
+struct vhost_ubuf_ref *vhost_ubuf_alloc(struct vhost_virtqueue *vq,
+ bool zcopy)
+{
+ struct vhost_ubuf_ref *ubufs;
+ /* No zero copy backend? Nothing to count. */
+ if (!zcopy)
+ return NULL;
+ ubufs = kmalloc(sizeof(*ubufs), GFP_KERNEL);
+ if (!ubufs)
+ return ERR_PTR(-ENOMEM);
+ kref_init(&ubufs->kref);
+ init_waitqueue_head(&ubufs->wait);
+ ubufs->vq = vq;
+ return ubufs;
+}
+
+void vhost_ubuf_put(struct vhost_ubuf_ref *ubufs)
+{
+ kref_put(&ubufs->kref, vhost_zerocopy_done_signal);
+}
+
+void vhost_ubuf_put_and_wait(struct vhost_ubuf_ref *ubufs)
+{
+ kref_put(&ubufs->kref, vhost_zerocopy_done_signal);
+ wait_event(ubufs->wait, !atomic_read(&ubufs->kref.refcount));
+ kfree(ubufs);
+}
+
+int vhost_net_set_ubuf_info(struct vhost_net *n)
+{
+ bool zcopy;
+ int i;
+
+ for (i = 0; i < n->dev.nvqs; ++i) {
+ zcopy = vhost_zcopy_mask & (0x1 << i);
+ if (!zcopy)
+ continue;
+ n->vqs[i].ubuf_info = kmalloc(sizeof(*n->vqs[i].ubuf_info) *
+ UIO_MAXIOV, GFP_KERNEL);
+ if (!n->vqs[i].ubuf_info)
+ goto err;
+ }
+ return 0;
+
+err:
+ while (i--) {
+ zcopy = vhost_zcopy_mask & (0x1 << i);
+ if (!zcopy)
+ continue;
+ kfree(n->vqs[i].ubuf_info);
+ }
+ return -ENOMEM;
+}
+
+void vhost_net_vq_reset(struct vhost_net *n)
+{
+ int i;
+
+ for (i = 0; i < VHOST_NET_VQ_MAX; i++) {
+ n->vqs[i].done_idx = 0;
+ n->vqs[i].upend_idx = 0;
+ n->vqs[i].ubufs = NULL;
+ kfree(n->vqs[i].ubuf_info);
+ n->vqs[i].ubuf_info = NULL;
+ n->vqs[i].vhost_hlen = 0;
+ n->vqs[i].sock_hlen = 0;
+ }
+
+}
+
static void vhost_net_tx_packet(struct vhost_net *net)
{
++net->tx_packets;
@@ -155,28 +255,6 @@ static void copy_iovec_hdr(const struct iovec *from, struct iovec *to,
}
}
-/* Caller must have TX VQ lock */
-static void tx_poll_stop(struct vhost_net *net)
-{
- if (likely(net->tx_poll_state != VHOST_NET_POLL_STARTED))
- return;
- vhost_poll_stop(net->poll + VHOST_NET_VQ_TX);
- net->tx_poll_state = VHOST_NET_POLL_STOPPED;
-}
-
-/* Caller must have TX VQ lock */
-static int tx_poll_start(struct vhost_net *net, struct socket *sock)
-{
- int ret;
-
- if (unlikely(net->tx_poll_state != VHOST_NET_POLL_STOPPED))
- return 0;
- ret = vhost_poll_start(net->poll + VHOST_NET_VQ_TX, sock->file);
- if (!ret)
- net->tx_poll_state = VHOST_NET_POLL_STARTED;
- return ret;
-}
-
/* In case of DMA done not in order in lower device driver for some reason.
* upend_idx is used to track end of used idx, done_idx is used to track head
* of used idx. Once lower device DMA done contiguously, we will signal KVM
@@ -185,10 +263,12 @@ static int tx_poll_start(struct vhost_net *net, struct socket *sock)
static int vhost_zerocopy_signal_used(struct vhost_net *net,
struct vhost_virtqueue *vq)
{
+ struct vhost_net_virtqueue *nvq =
+ container_of(vq, struct vhost_net_virtqueue, vq);
int i;
int j = 0;
- for (i = vq->done_idx; i != vq->upend_idx; i = (i + 1) % UIO_MAXIOV) {
+ for (i = nvq->done_idx; i != nvq->upend_idx; i = (i + 1) % UIO_MAXIOV) {
if (vq->heads[i].len == VHOST_DMA_FAILED_LEN)
vhost_net_tx_err(net);
if (VHOST_DMA_IS_DONE(vq->heads[i].len)) {
@@ -200,7 +280,7 @@ static int vhost_zerocopy_signal_used(struct vhost_net *net,
break;
}
if (j)
- vq->done_idx = i;
+ nvq->done_idx = i;
return j;
}
@@ -230,7 +310,8 @@ static void vhost_zerocopy_callback(struct ubuf_info *ubuf, bool success)
* read-size critical section for our kind of RCU. */
static void handle_tx(struct vhost_net *net)
{
- struct vhost_virtqueue *vq = &net->dev.vqs[VHOST_NET_VQ_TX];
+ struct vhost_net_virtqueue *nvq = &net->vqs[VHOST_NET_VQ_TX];
+ struct vhost_virtqueue *vq = &nvq->vq;
unsigned out, in, s;
int head;
struct msghdr msg = {
@@ -242,7 +323,7 @@ static void handle_tx(struct vhost_net *net)
.msg_flags = MSG_DONTWAIT,
};
size_t len, total_len = 0;
- int err, wmem;
+ int err;
size_t hdr_size;
struct socket *sock;
struct vhost_ubuf_ref *uninitialized_var(ubufs);
@@ -253,21 +334,11 @@ static void handle_tx(struct vhost_net *net)
if (!sock)
return;
- wmem = atomic_read(&sock->sk->sk_wmem_alloc);
- if (wmem >= sock->sk->sk_sndbuf) {
- mutex_lock(&vq->mutex);
- tx_poll_start(net, sock);
- mutex_unlock(&vq->mutex);
- return;
- }
-
mutex_lock(&vq->mutex);
vhost_disable_notify(&net->dev, vq);
- if (wmem < sock->sk->sk_sndbuf / 2)
- tx_poll_stop(net);
- hdr_size = vq->vhost_hlen;
- zcopy = vq->ubufs;
+ hdr_size = nvq->vhost_hlen;
+ zcopy = nvq->ubufs;
for (;;) {
/* Release DMAs done buffers first */
@@ -285,23 +356,15 @@ static void handle_tx(struct vhost_net *net)
if (head == vq->num) {
int num_pends;
- wmem = atomic_read(&sock->sk->sk_wmem_alloc);
- if (wmem >= sock->sk->sk_sndbuf * 3 / 4) {
- tx_poll_start(net, sock);
- set_bit(SOCK_ASYNC_NOSPACE, &sock->flags);
- break;
- }
/* If more outstanding DMAs, queue the work.
* Handle upend_idx wrap around
*/
- num_pends = likely(vq->upend_idx >= vq->done_idx) ?
- (vq->upend_idx - vq->done_idx) :
- (vq->upend_idx + UIO_MAXIOV - vq->done_idx);
- if (unlikely(num_pends > VHOST_MAX_PEND)) {
- tx_poll_start(net, sock);
- set_bit(SOCK_ASYNC_NOSPACE, &sock->flags);
+ num_pends = likely(nvq->upend_idx >= nvq->done_idx) ?
+ (nvq->upend_idx - nvq->done_idx) :
+ (nvq->upend_idx + UIO_MAXIOV -
+ nvq->done_idx);
+ if (unlikely(num_pends > VHOST_MAX_PEND))
break;
- }
if (unlikely(vhost_enable_notify(&net->dev, vq))) {
vhost_disable_notify(&net->dev, vq);
continue;
@@ -314,44 +377,45 @@ static void handle_tx(struct vhost_net *net)
break;
}
/* Skip header. TODO: support TSO. */
- s = move_iovec_hdr(vq->iov, vq->hdr, hdr_size, out);
+ s = move_iovec_hdr(vq->iov, nvq->hdr, hdr_size, out);
msg.msg_iovlen = out;
len = iov_length(vq->iov, out);
/* Sanity check */
if (!len) {
vq_err(vq, "Unexpected header len for TX: "
"%zd expected %zd\n",
- iov_length(vq->hdr, s), hdr_size);
+ iov_length(nvq->hdr, s), hdr_size);
break;
}
zcopy_used = zcopy && (len >= VHOST_GOODCOPY_LEN ||
- vq->upend_idx != vq->done_idx);
+ nvq->upend_idx != nvq->done_idx);
/* use msg_control to pass vhost zerocopy ubuf info to skb */
if (zcopy_used) {
- vq->heads[vq->upend_idx].id = head;
+ vq->heads[nvq->upend_idx].id = head;
if (!vhost_net_tx_select_zcopy(net) ||
len < VHOST_GOODCOPY_LEN) {
/* copy don't need to wait for DMA done */
- vq->heads[vq->upend_idx].len =
+ vq->heads[nvq->upend_idx].len =
VHOST_DMA_DONE_LEN;
msg.msg_control = NULL;
msg.msg_controllen = 0;
ubufs = NULL;
} else {
- struct ubuf_info *ubuf = &vq->ubuf_info[head];
+ struct ubuf_info *ubuf;
+ ubuf = nvq->ubuf_info + nvq->upend_idx;
- vq->heads[vq->upend_idx].len =
+ vq->heads[nvq->upend_idx].len =
VHOST_DMA_IN_PROGRESS;
ubuf->callback = vhost_zerocopy_callback;
- ubuf->ctx = vq->ubufs;
- ubuf->desc = vq->upend_idx;
+ ubuf->ctx = nvq->ubufs;
+ ubuf->desc = nvq->upend_idx;
msg.msg_control = ubuf;
msg.msg_controllen = sizeof(ubuf);
- ubufs = vq->ubufs;
+ ubufs = nvq->ubufs;
kref_get(&ubufs->kref);
}
- vq->upend_idx = (vq->upend_idx + 1) % UIO_MAXIOV;
+ nvq->upend_idx = (nvq->upend_idx + 1) % UIO_MAXIOV;
}
/* TODO: Check specific error and bomb out unless ENOBUFS? */
err = sock->ops->sendmsg(NULL, sock, &msg, len);
@@ -359,12 +423,10 @@ static void handle_tx(struct vhost_net *net)
if (zcopy_used) {
if (ubufs)
vhost_ubuf_put(ubufs);
- vq->upend_idx = ((unsigned)vq->upend_idx - 1) %
- UIO_MAXIOV;
+ nvq->upend_idx = ((unsigned)nvq->upend_idx - 1)
+ % UIO_MAXIOV;
}
vhost_discard_vq_desc(vq, 1);
- if (err == -EAGAIN || err == -ENOBUFS)
- tx_poll_start(net, sock);
break;
}
if (err != len)
@@ -469,7 +531,8 @@ err:
* read-size critical section for our kind of RCU. */
static void handle_rx(struct vhost_net *net)
{
- struct vhost_virtqueue *vq = &net->dev.vqs[VHOST_NET_VQ_RX];
+ struct vhost_net_virtqueue *nvq = &net->vqs[VHOST_NET_VQ_RX];
+ struct vhost_virtqueue *vq = &nvq->vq;
unsigned uninitialized_var(in), log;
struct vhost_log *vq_log;
struct msghdr msg = {
@@ -497,8 +560,8 @@ static void handle_rx(struct vhost_net *net)
mutex_lock(&vq->mutex);
vhost_disable_notify(&net->dev, vq);
- vhost_hlen = vq->vhost_hlen;
- sock_hlen = vq->sock_hlen;
+ vhost_hlen = nvq->vhost_hlen;
+ sock_hlen = nvq->sock_hlen;
vq_log = unlikely(vhost_has_feature(&net->dev, VHOST_F_LOG_ALL)) ?
vq->log : NULL;
@@ -528,11 +591,11 @@ static void handle_rx(struct vhost_net *net)
/* We don't need to be notified again. */
if (unlikely((vhost_hlen)))
/* Skip header. TODO: support TSO. */
- move_iovec_hdr(vq->iov, vq->hdr, vhost_hlen, in);
+ move_iovec_hdr(vq->iov, nvq->hdr, vhost_hlen, in);
else
/* Copy the header for use in VIRTIO_NET_F_MRG_RXBUF:
* needed because recvmsg can modify msg_iov. */
- copy_iovec_hdr(vq->iov, vq->hdr, sock_hlen, in);
+ copy_iovec_hdr(vq->iov, nvq->hdr, sock_hlen, in);
msg.msg_iovlen = in;
err = sock->ops->recvmsg(NULL, sock, &msg,
sock_len, MSG_DONTWAIT | MSG_TRUNC);
@@ -546,7 +609,7 @@ static void handle_rx(struct vhost_net *net)
continue;
}
if (unlikely(vhost_hlen) &&
- memcpy_toiovecend(vq->hdr, (unsigned char *)&hdr, 0,
+ memcpy_toiovecend(nvq->hdr, (unsigned char *)&hdr, 0,
vhost_hlen)) {
vq_err(vq, "Unable to write vnet_hdr at addr %p\n",
vq->iov->iov_base);
@@ -554,7 +617,7 @@ static void handle_rx(struct vhost_net *net)
}
/* TODO: Should check and handle checksum. */
if (likely(mergeable) &&
- memcpy_toiovecend(vq->hdr, (unsigned char *)&headcount,
+ memcpy_toiovecend(nvq->hdr, (unsigned char *)&headcount,
offsetof(typeof(hdr), num_buffers),
sizeof hdr.num_buffers)) {
vq_err(vq, "Failed num_buffers write");
@@ -611,23 +674,39 @@ static int vhost_net_open(struct inode *inode, struct file *f)
{
struct vhost_net *n = kmalloc(sizeof *n, GFP_KERNEL);
struct vhost_dev *dev;
- int r;
+ struct vhost_virtqueue **vqs;
+ int r, i;
if (!n)
return -ENOMEM;
+ vqs = kmalloc(VHOST_NET_VQ_MAX * sizeof(*vqs), GFP_KERNEL);
+ if (!vqs) {
+ kfree(n);
+ return -ENOMEM;
+ }
dev = &n->dev;
- n->vqs[VHOST_NET_VQ_TX].handle_kick = handle_tx_kick;
- n->vqs[VHOST_NET_VQ_RX].handle_kick = handle_rx_kick;
- r = vhost_dev_init(dev, n->vqs, VHOST_NET_VQ_MAX);
+ vqs[VHOST_NET_VQ_TX] = &n->vqs[VHOST_NET_VQ_TX].vq;
+ vqs[VHOST_NET_VQ_RX] = &n->vqs[VHOST_NET_VQ_RX].vq;
+ n->vqs[VHOST_NET_VQ_TX].vq.handle_kick = handle_tx_kick;
+ n->vqs[VHOST_NET_VQ_RX].vq.handle_kick = handle_rx_kick;
+ for (i = 0; i < VHOST_NET_VQ_MAX; i++) {
+ n->vqs[i].ubufs = NULL;
+ n->vqs[i].ubuf_info = NULL;
+ n->vqs[i].upend_idx = 0;
+ n->vqs[i].done_idx = 0;
+ n->vqs[i].vhost_hlen = 0;
+ n->vqs[i].sock_hlen = 0;
+ }
+ r = vhost_dev_init(dev, vqs, VHOST_NET_VQ_MAX);
if (r < 0) {
kfree(n);
+ kfree(vqs);
return r;
}
vhost_poll_init(n->poll + VHOST_NET_VQ_TX, handle_tx_net, POLLOUT, dev);
vhost_poll_init(n->poll + VHOST_NET_VQ_RX, handle_rx_net, POLLIN, dev);
- n->tx_poll_state = VHOST_NET_POLL_DISABLED;
f->private_data = n;
@@ -637,32 +716,28 @@ static int vhost_net_open(struct inode *inode, struct file *f)
static void vhost_net_disable_vq(struct vhost_net *n,
struct vhost_virtqueue *vq)
{
+ struct vhost_net_virtqueue *nvq =
+ container_of(vq, struct vhost_net_virtqueue, vq);
+ struct vhost_poll *poll = n->poll + (nvq - n->vqs);
if (!vq->private_data)
return;
- if (vq == n->vqs + VHOST_NET_VQ_TX) {
- tx_poll_stop(n);
- n->tx_poll_state = VHOST_NET_POLL_DISABLED;
- } else
- vhost_poll_stop(n->poll + VHOST_NET_VQ_RX);
+ vhost_poll_stop(poll);
}
static int vhost_net_enable_vq(struct vhost_net *n,
struct vhost_virtqueue *vq)
{
+ struct vhost_net_virtqueue *nvq =
+ container_of(vq, struct vhost_net_virtqueue, vq);
+ struct vhost_poll *poll = n->poll + (nvq - n->vqs);
struct socket *sock;
- int ret;
sock = rcu_dereference_protected(vq->private_data,
lockdep_is_held(&vq->mutex));
if (!sock)
return 0;
- if (vq == n->vqs + VHOST_NET_VQ_TX) {
- n->tx_poll_state = VHOST_NET_POLL_STOPPED;
- ret = tx_poll_start(n, sock);
- } else
- ret = vhost_poll_start(n->poll + VHOST_NET_VQ_RX, sock->file);
- return ret;
+ return vhost_poll_start(poll, sock->file);
}
static struct socket *vhost_net_stop_vq(struct vhost_net *n,
@@ -682,30 +757,30 @@ static struct socket *vhost_net_stop_vq(struct vhost_net *n,
static void vhost_net_stop(struct vhost_net *n, struct socket **tx_sock,
struct socket **rx_sock)
{
- *tx_sock = vhost_net_stop_vq(n, n->vqs + VHOST_NET_VQ_TX);
- *rx_sock = vhost_net_stop_vq(n, n->vqs + VHOST_NET_VQ_RX);
+ *tx_sock = vhost_net_stop_vq(n, &n->vqs[VHOST_NET_VQ_TX].vq);
+ *rx_sock = vhost_net_stop_vq(n, &n->vqs[VHOST_NET_VQ_RX].vq);
}
static void vhost_net_flush_vq(struct vhost_net *n, int index)
{
vhost_poll_flush(n->poll + index);
- vhost_poll_flush(&n->dev.vqs[index].poll);
+ vhost_poll_flush(&n->vqs[index].vq.poll);
}
static void vhost_net_flush(struct vhost_net *n)
{
vhost_net_flush_vq(n, VHOST_NET_VQ_TX);
vhost_net_flush_vq(n, VHOST_NET_VQ_RX);
- if (n->dev.vqs[VHOST_NET_VQ_TX].ubufs) {
- mutex_lock(&n->dev.vqs[VHOST_NET_VQ_TX].mutex);
+ if (n->vqs[VHOST_NET_VQ_TX].ubufs) {
+ mutex_lock(&n->vqs[VHOST_NET_VQ_TX].vq.mutex);
n->tx_flush = true;
- mutex_unlock(&n->dev.vqs[VHOST_NET_VQ_TX].mutex);
+ mutex_unlock(&n->vqs[VHOST_NET_VQ_TX].vq.mutex);
/* Wait for all lower device DMAs done. */
- vhost_ubuf_put_and_wait(n->dev.vqs[VHOST_NET_VQ_TX].ubufs);
- mutex_lock(&n->dev.vqs[VHOST_NET_VQ_TX].mutex);
+ vhost_ubuf_put_and_wait(n->vqs[VHOST_NET_VQ_TX].ubufs);
+ mutex_lock(&n->vqs[VHOST_NET_VQ_TX].vq.mutex);
n->tx_flush = false;
- kref_init(&n->dev.vqs[VHOST_NET_VQ_TX].ubufs->kref);
- mutex_unlock(&n->dev.vqs[VHOST_NET_VQ_TX].mutex);
+ kref_init(&n->vqs[VHOST_NET_VQ_TX].ubufs->kref);
+ mutex_unlock(&n->vqs[VHOST_NET_VQ_TX].vq.mutex);
}
}
@@ -719,6 +794,7 @@ static int vhost_net_release(struct inode *inode, struct file *f)
vhost_net_flush(n);
vhost_dev_stop(&n->dev);
vhost_dev_cleanup(&n->dev, false);
+ vhost_net_vq_reset(n);
if (tx_sock)
fput(tx_sock->file);
if (rx_sock)
@@ -726,6 +802,7 @@ static int vhost_net_release(struct inode *inode, struct file *f)
/* We do an extra flush before freeing memory,
* since jobs can re-queue themselves. */
vhost_net_flush(n);
+ kfree(n->dev.vqs);
kfree(n);
return 0;
}
@@ -799,6 +876,7 @@ static long vhost_net_set_backend(struct vhost_net *n, unsigned index, int fd)
{
struct socket *sock, *oldsock;
struct vhost_virtqueue *vq;
+ struct vhost_net_virtqueue *nvq;
struct vhost_ubuf_ref *ubufs, *oldubufs = NULL;
int r;
@@ -811,7 +889,8 @@ static long vhost_net_set_backend(struct vhost_net *n, unsigned index, int fd)
r = -ENOBUFS;
goto err;
}
- vq = n->vqs + index;
+ vq = &n->vqs[index].vq;
+ nvq = &n->vqs[index];
mutex_lock(&vq->mutex);
/* Verify that ring has been setup correctly. */
@@ -844,8 +923,8 @@ static long vhost_net_set_backend(struct vhost_net *n, unsigned index, int fd)
if (r)
goto err_used;
- oldubufs = vq->ubufs;
- vq->ubufs = ubufs;
+ oldubufs = nvq->ubufs;
+ nvq->ubufs = ubufs;
n->tx_packets = 0;
n->tx_zcopy_err = 0;
@@ -888,14 +967,21 @@ static long vhost_net_reset_owner(struct vhost_net *n)
struct socket *tx_sock = NULL;
struct socket *rx_sock = NULL;
long err;
+ struct vhost_memory *memory;
mutex_lock(&n->dev.mutex);
err = vhost_dev_check_owner(&n->dev);
if (err)
goto done;
+ memory = vhost_dev_reset_owner_prepare();
+ if (!memory) {
+ err = -ENOMEM;
+ goto done;
+ }
vhost_net_stop(n, &tx_sock, &rx_sock);
vhost_net_flush(n);
- err = vhost_dev_reset_owner(&n->dev);
+ vhost_dev_reset_owner(&n->dev, memory);
+ vhost_net_vq_reset(n);
done:
mutex_unlock(&n->dev.mutex);
if (tx_sock)
@@ -931,10 +1017,10 @@ static int vhost_net_set_features(struct vhost_net *n, u64 features)
n->dev.acked_features = features;
smp_wmb();
for (i = 0; i < VHOST_NET_VQ_MAX; ++i) {
- mutex_lock(&n->vqs[i].mutex);
+ mutex_lock(&n->vqs[i].vq.mutex);
n->vqs[i].vhost_hlen = vhost_hlen;
n->vqs[i].sock_hlen = sock_hlen;
- mutex_unlock(&n->vqs[i].mutex);
+ mutex_unlock(&n->vqs[i].vq.mutex);
}
vhost_net_flush(n);
mutex_unlock(&n->dev.mutex);
@@ -971,11 +1057,17 @@ static long vhost_net_ioctl(struct file *f, unsigned int ioctl,
return vhost_net_reset_owner(n);
default:
mutex_lock(&n->dev.mutex);
+ if (ioctl == VHOST_SET_OWNER) {
+ r = vhost_net_set_ubuf_info(n);
+ if (r)
+ goto out;
+ }
r = vhost_dev_ioctl(&n->dev, ioctl, argp);
if (r == -ENOIOCTLCMD)
r = vhost_vring_ioctl(&n->dev, ioctl, argp);
else
vhost_net_flush(n);
+out:
mutex_unlock(&n->dev.mutex);
return r;
}
diff --git a/drivers/vhost/tcm_vhost.c b/drivers/vhost/scsi.c
index 9951297b2427..5179f7aa1b0b 100644
--- a/drivers/vhost/tcm_vhost.c
+++ b/drivers/vhost/scsi.c
@@ -45,14 +45,116 @@
#include <target/target_core_configfs.h>
#include <target/configfs_macros.h>
#include <linux/vhost.h>
-#include <linux/virtio_net.h> /* TODO vhost.h currently depends on this */
#include <linux/virtio_scsi.h>
#include <linux/llist.h>
#include <linux/bitmap.h>
#include "vhost.c"
#include "vhost.h"
-#include "tcm_vhost.h"
+
+#define TCM_VHOST_VERSION "v0.1"
+#define TCM_VHOST_NAMELEN 256
+#define TCM_VHOST_MAX_CDB_SIZE 32
+
+struct vhost_scsi_inflight {
+ /* Wait for the flush operation to finish */
+ struct completion comp;
+ /* Refcount for the inflight reqs */
+ struct kref kref;
+};
+
+struct tcm_vhost_cmd {
+ /* Descriptor from vhost_get_vq_desc() for virt_queue segment */
+ int tvc_vq_desc;
+ /* virtio-scsi initiator task attribute */
+ int tvc_task_attr;
+ /* virtio-scsi initiator data direction */
+ enum dma_data_direction tvc_data_direction;
+ /* Expected data transfer length from virtio-scsi header */
+ u32 tvc_exp_data_len;
+ /* The Tag from include/linux/virtio_scsi.h:struct virtio_scsi_cmd_req */
+ u64 tvc_tag;
+ /* The number of scatterlists associated with this cmd */
+ u32 tvc_sgl_count;
+ /* Saved unpacked SCSI LUN for tcm_vhost_submission_work() */
+ u32 tvc_lun;
+ /* Pointer to the SGL formatted memory from virtio-scsi */
+ struct scatterlist *tvc_sgl;
+ /* Pointer to response */
+ struct virtio_scsi_cmd_resp __user *tvc_resp;
+ /* Pointer to vhost_scsi for our device */
+ struct vhost_scsi *tvc_vhost;
+ /* Pointer to vhost_virtqueue for the cmd */
+ struct vhost_virtqueue *tvc_vq;
+ /* Pointer to vhost nexus memory */
+ struct tcm_vhost_nexus *tvc_nexus;
+ /* The TCM I/O descriptor that is accessed via container_of() */
+ struct se_cmd tvc_se_cmd;
+ /* work item used for cmwq dispatch to tcm_vhost_submission_work() */
+ struct work_struct work;
+ /* Copy of the incoming SCSI command descriptor block (CDB) */
+ unsigned char tvc_cdb[TCM_VHOST_MAX_CDB_SIZE];
+ /* Sense buffer that will be mapped into outgoing status */
+ unsigned char tvc_sense_buf[TRANSPORT_SENSE_BUFFER];
+ /* Completed commands list, serviced from vhost worker thread */
+ struct llist_node tvc_completion_list;
+ /* Used to track inflight cmd */
+ struct vhost_scsi_inflight *inflight;
+};
+
+struct tcm_vhost_nexus {
+ /* Pointer to TCM session for I_T Nexus */
+ struct se_session *tvn_se_sess;
+};
+
+struct tcm_vhost_nacl {
+ /* Binary World Wide unique Port Name for Vhost Initiator port */
+ u64 iport_wwpn;
+ /* ASCII formatted WWPN for Sas Initiator port */
+ char iport_name[TCM_VHOST_NAMELEN];
+ /* Returned by tcm_vhost_make_nodeacl() */
+ struct se_node_acl se_node_acl;
+};
+
+struct vhost_scsi;
+struct tcm_vhost_tpg {
+ /* Vhost port target portal group tag for TCM */
+ u16 tport_tpgt;
+ /* Used to track number of TPG Port/Lun Links wrt to explict I_T Nexus shutdown */
+ int tv_tpg_port_count;
+ /* Used for vhost_scsi device reference to tpg_nexus, protected by tv_tpg_mutex */
+ int tv_tpg_vhost_count;
+ /* list for tcm_vhost_list */
+ struct list_head tv_tpg_list;
+ /* Used to protect access for tpg_nexus */
+ struct mutex tv_tpg_mutex;
+ /* Pointer to the TCM VHost I_T Nexus for this TPG endpoint */
+ struct tcm_vhost_nexus *tpg_nexus;
+ /* Pointer back to tcm_vhost_tport */
+ struct tcm_vhost_tport *tport;
+ /* Returned by tcm_vhost_make_tpg() */
+ struct se_portal_group se_tpg;
+ /* Pointer back to vhost_scsi, protected by tv_tpg_mutex */
+ struct vhost_scsi *vhost_scsi;
+};
+
+struct tcm_vhost_tport {
+ /* SCSI protocol the tport is providing */
+ u8 tport_proto_id;
+ /* Binary World Wide unique Port Name for Vhost Target port */
+ u64 tport_wwpn;
+ /* ASCII formatted WWPN for Vhost Target port */
+ char tport_name[TCM_VHOST_NAMELEN];
+ /* Returned by tcm_vhost_make_tport() */
+ struct se_wwn tport_wwn;
+};
+
+struct tcm_vhost_evt {
+ /* event to be sent to guest */
+ struct virtio_scsi_event event;
+ /* event list, serviced from vhost worker thread */
+ struct llist_node list;
+};
enum {
VHOST_SCSI_VQ_CTL = 0,
@@ -60,20 +162,51 @@ enum {
VHOST_SCSI_VQ_IO = 2,
};
+/*
+ * VIRTIO_RING_F_EVENT_IDX seems broken. Not sure the bug is in
+ * kernel but disabling it helps.
+ * TODO: debug and remove the workaround.
+ */
+enum {
+ VHOST_SCSI_FEATURES = (VHOST_FEATURES & (~VIRTIO_RING_F_EVENT_IDX)) |
+ (1ULL << VIRTIO_SCSI_F_HOTPLUG)
+};
+
#define VHOST_SCSI_MAX_TARGET 256
#define VHOST_SCSI_MAX_VQ 128
+#define VHOST_SCSI_MAX_EVENT 128
+
+struct vhost_scsi_virtqueue {
+ struct vhost_virtqueue vq;
+ /*
+ * Reference counting for inflight reqs, used for flush operation. At
+ * each time, one reference tracks new commands submitted, while we
+ * wait for another one to reach 0.
+ */
+ struct vhost_scsi_inflight inflights[2];
+ /*
+ * Indicate current inflight in use, protected by vq->mutex.
+ * Writers must also take dev mutex and flush under it.
+ */
+ int inflight_idx;
+};
struct vhost_scsi {
/* Protected by vhost_scsi->dev.mutex */
- struct tcm_vhost_tpg *vs_tpg[VHOST_SCSI_MAX_TARGET];
+ struct tcm_vhost_tpg **vs_tpg;
char vs_vhost_wwpn[TRANSPORT_IQN_LEN];
- bool vs_endpoint;
struct vhost_dev dev;
- struct vhost_virtqueue vqs[VHOST_SCSI_MAX_VQ];
+ struct vhost_scsi_virtqueue vqs[VHOST_SCSI_MAX_VQ];
struct vhost_work vs_completion_work; /* cmd completion work item */
struct llist_head vs_completion_list; /* cmd completion queue */
+
+ struct vhost_work vs_event_work; /* evt injection work item */
+ struct llist_head vs_event_list; /* evt injection queue */
+
+ bool vs_events_missed; /* any missed events, protected by vq->mutex */
+ int vs_events_nr; /* num of pending events, protected by vq->mutex */
};
/* Local pointer to allocated TCM configfs fabric module */
@@ -91,6 +224,59 @@ static int iov_num_pages(struct iovec *iov)
((unsigned long)iov->iov_base & PAGE_MASK)) >> PAGE_SHIFT;
}
+void tcm_vhost_done_inflight(struct kref *kref)
+{
+ struct vhost_scsi_inflight *inflight;
+
+ inflight = container_of(kref, struct vhost_scsi_inflight, kref);
+ complete(&inflight->comp);
+}
+
+static void tcm_vhost_init_inflight(struct vhost_scsi *vs,
+ struct vhost_scsi_inflight *old_inflight[])
+{
+ struct vhost_scsi_inflight *new_inflight;
+ struct vhost_virtqueue *vq;
+ int idx, i;
+
+ for (i = 0; i < VHOST_SCSI_MAX_VQ; i++) {
+ vq = &vs->vqs[i].vq;
+
+ mutex_lock(&vq->mutex);
+
+ /* store old infight */
+ idx = vs->vqs[i].inflight_idx;
+ if (old_inflight)
+ old_inflight[i] = &vs->vqs[i].inflights[idx];
+
+ /* setup new infight */
+ vs->vqs[i].inflight_idx = idx ^ 1;
+ new_inflight = &vs->vqs[i].inflights[idx ^ 1];
+ kref_init(&new_inflight->kref);
+ init_completion(&new_inflight->comp);
+
+ mutex_unlock(&vq->mutex);
+ }
+}
+
+static struct vhost_scsi_inflight *
+tcm_vhost_get_inflight(struct vhost_virtqueue *vq)
+{
+ struct vhost_scsi_inflight *inflight;
+ struct vhost_scsi_virtqueue *svq;
+
+ svq = container_of(vq, struct vhost_scsi_virtqueue, vq);
+ inflight = &svq->inflights[svq->inflight_idx];
+ kref_get(&inflight->kref);
+
+ return inflight;
+}
+
+static void tcm_vhost_put_inflight(struct vhost_scsi_inflight *inflight)
+{
+ kref_put(&inflight->kref, tcm_vhost_done_inflight);
+}
+
static int tcm_vhost_check_true(struct se_portal_group *se_tpg)
{
return 1;
@@ -341,6 +527,37 @@ static int tcm_vhost_queue_tm_rsp(struct se_cmd *se_cmd)
return 0;
}
+static void tcm_vhost_free_evt(struct vhost_scsi *vs, struct tcm_vhost_evt *evt)
+{
+ vs->vs_events_nr--;
+ kfree(evt);
+}
+
+static struct tcm_vhost_evt *tcm_vhost_allocate_evt(struct vhost_scsi *vs,
+ u32 event, u32 reason)
+{
+ struct vhost_virtqueue *vq = &vs->vqs[VHOST_SCSI_VQ_EVT].vq;
+ struct tcm_vhost_evt *evt;
+
+ if (vs->vs_events_nr > VHOST_SCSI_MAX_EVENT) {
+ vs->vs_events_missed = true;
+ return NULL;
+ }
+
+ evt = kzalloc(sizeof(*evt), GFP_KERNEL);
+ if (!evt) {
+ vq_err(vq, "Failed to allocate tcm_vhost_evt\n");
+ vs->vs_events_missed = true;
+ return NULL;
+ }
+
+ evt->event.event = event;
+ evt->event.reason = reason;
+ vs->vs_events_nr++;
+
+ return evt;
+}
+
static void vhost_scsi_free_cmd(struct tcm_vhost_cmd *tv_cmd)
{
struct se_cmd *se_cmd = &tv_cmd->tvc_se_cmd;
@@ -356,9 +573,80 @@ static void vhost_scsi_free_cmd(struct tcm_vhost_cmd *tv_cmd)
kfree(tv_cmd->tvc_sgl);
}
+ tcm_vhost_put_inflight(tv_cmd->inflight);
+
kfree(tv_cmd);
}
+static void tcm_vhost_do_evt_work(struct vhost_scsi *vs,
+ struct tcm_vhost_evt *evt)
+{
+ struct vhost_virtqueue *vq = &vs->vqs[VHOST_SCSI_VQ_EVT].vq;
+ struct virtio_scsi_event *event = &evt->event;
+ struct virtio_scsi_event __user *eventp;
+ unsigned out, in;
+ int head, ret;
+
+ if (!vq->private_data) {
+ vs->vs_events_missed = true;
+ return;
+ }
+
+again:
+ vhost_disable_notify(&vs->dev, vq);
+ head = vhost_get_vq_desc(&vs->dev, vq, vq->iov,
+ ARRAY_SIZE(vq->iov), &out, &in,
+ NULL, NULL);
+ if (head < 0) {
+ vs->vs_events_missed = true;
+ return;
+ }
+ if (head == vq->num) {
+ if (vhost_enable_notify(&vs->dev, vq))
+ goto again;
+ vs->vs_events_missed = true;
+ return;
+ }
+
+ if ((vq->iov[out].iov_len != sizeof(struct virtio_scsi_event))) {
+ vq_err(vq, "Expecting virtio_scsi_event, got %zu bytes\n",
+ vq->iov[out].iov_len);
+ vs->vs_events_missed = true;
+ return;
+ }
+
+ if (vs->vs_events_missed) {
+ event->event |= VIRTIO_SCSI_T_EVENTS_MISSED;
+ vs->vs_events_missed = false;
+ }
+
+ eventp = vq->iov[out].iov_base;
+ ret = __copy_to_user(eventp, event, sizeof(*event));
+ if (!ret)
+ vhost_add_used_and_signal(&vs->dev, vq, head, 0);
+ else
+ vq_err(vq, "Faulted on tcm_vhost_send_event\n");
+}
+
+static void tcm_vhost_evt_work(struct vhost_work *work)
+{
+ struct vhost_scsi *vs = container_of(work, struct vhost_scsi,
+ vs_event_work);
+ struct vhost_virtqueue *vq = &vs->vqs[VHOST_SCSI_VQ_EVT].vq;
+ struct tcm_vhost_evt *evt;
+ struct llist_node *llnode;
+
+ mutex_lock(&vq->mutex);
+ llnode = llist_del_all(&vs->vs_event_list);
+ while (llnode) {
+ evt = llist_entry(llnode, struct tcm_vhost_evt, list);
+ llnode = llist_next(llnode);
+ tcm_vhost_do_evt_work(vs, evt);
+ tcm_vhost_free_evt(vs, evt);
+ }
+ mutex_unlock(&vq->mutex);
+}
+
/* Fill in status and signal that we are done processing this command
*
* This is scheduled in the vhost work queue so we are called with the owner
@@ -395,8 +683,10 @@ static void vhost_scsi_complete_cmd_work(struct vhost_work *work)
v_rsp.sense_len);
ret = copy_to_user(tv_cmd->tvc_resp, &v_rsp, sizeof(v_rsp));
if (likely(ret == 0)) {
+ struct vhost_scsi_virtqueue *q;
vhost_add_used(tv_cmd->tvc_vq, tv_cmd->tvc_vq_desc, 0);
- vq = tv_cmd->tvc_vq - vs->vqs;
+ q = container_of(tv_cmd->tvc_vq, struct vhost_scsi_virtqueue, vq);
+ vq = q - vs->vqs;
__set_bit(vq, signal);
} else
pr_err("Faulted on virtio_scsi_cmd_resp\n");
@@ -407,10 +697,11 @@ static void vhost_scsi_complete_cmd_work(struct vhost_work *work)
vq = -1;
while ((vq = find_next_bit(signal, VHOST_SCSI_MAX_VQ, vq + 1))
< VHOST_SCSI_MAX_VQ)
- vhost_signal(&vs->dev, &vs->vqs[vq]);
+ vhost_signal(&vs->dev, &vs->vqs[vq].vq);
}
static struct tcm_vhost_cmd *vhost_scsi_allocate_cmd(
+ struct vhost_virtqueue *vq,
struct tcm_vhost_tpg *tv_tpg,
struct virtio_scsi_cmd_req *v_req,
u32 exp_data_len,
@@ -435,6 +726,7 @@ static struct tcm_vhost_cmd *vhost_scsi_allocate_cmd(
tv_cmd->tvc_exp_data_len = exp_data_len;
tv_cmd->tvc_data_direction = data_direction;
tv_cmd->tvc_nexus = tv_nexus;
+ tv_cmd->inflight = tcm_vhost_get_inflight(vq);
return tv_cmd;
}
@@ -570,9 +862,27 @@ static void tcm_vhost_submission_work(struct work_struct *work)
}
}
+static void vhost_scsi_send_bad_target(struct vhost_scsi *vs,
+ struct vhost_virtqueue *vq, int head, unsigned out)
+{
+ struct virtio_scsi_cmd_resp __user *resp;
+ struct virtio_scsi_cmd_resp rsp;
+ int ret;
+
+ memset(&rsp, 0, sizeof(rsp));
+ rsp.response = VIRTIO_SCSI_S_BAD_TARGET;
+ resp = vq->iov[out].iov_base;
+ ret = __copy_to_user(resp, &rsp, sizeof(rsp));
+ if (!ret)
+ vhost_add_used_and_signal(&vs->dev, vq, head, 0);
+ else
+ pr_err("Faulted on virtio_scsi_cmd_resp\n");
+}
+
static void vhost_scsi_handle_vq(struct vhost_scsi *vs,
struct vhost_virtqueue *vq)
{
+ struct tcm_vhost_tpg **vs_tpg;
struct virtio_scsi_cmd_req v_req;
struct tcm_vhost_tpg *tv_tpg;
struct tcm_vhost_cmd *tv_cmd;
@@ -581,8 +891,16 @@ static void vhost_scsi_handle_vq(struct vhost_scsi *vs,
int head, ret;
u8 target;
- /* Must use ioctl VHOST_SCSI_SET_ENDPOINT */
- if (unlikely(!vs->vs_endpoint))
+ /*
+ * We can handle the vq only after the endpoint is setup by calling the
+ * VHOST_SCSI_SET_ENDPOINT ioctl.
+ *
+ * TODO: Check that we are running from vhost_worker which acts
+ * as read-side critical section for vhost kind of RCU.
+ * See the comments in struct vhost_virtqueue in drivers/vhost/vhost.h
+ */
+ vs_tpg = rcu_dereference_check(vq->private_data, 1);
+ if (!vs_tpg)
return;
mutex_lock(&vq->mutex);
@@ -652,23 +970,11 @@ static void vhost_scsi_handle_vq(struct vhost_scsi *vs,
/* Extract the tpgt */
target = v_req.lun[1];
- tv_tpg = vs->vs_tpg[target];
+ tv_tpg = ACCESS_ONCE(vs_tpg[target]);
/* Target does not exist, fail the request */
if (unlikely(!tv_tpg)) {
- struct virtio_scsi_cmd_resp __user *resp;
- struct virtio_scsi_cmd_resp rsp;
-
- memset(&rsp, 0, sizeof(rsp));
- rsp.response = VIRTIO_SCSI_S_BAD_TARGET;
- resp = vq->iov[out].iov_base;
- ret = __copy_to_user(resp, &rsp, sizeof(rsp));
- if (!ret)
- vhost_add_used_and_signal(&vs->dev,
- vq, head, 0);
- else
- pr_err("Faulted on virtio_scsi_cmd_resp\n");
-
+ vhost_scsi_send_bad_target(vs, vq, head, out);
continue;
}
@@ -676,27 +982,18 @@ static void vhost_scsi_handle_vq(struct vhost_scsi *vs,
for (i = 0; i < data_num; i++)
exp_data_len += vq->iov[data_first + i].iov_len;
- tv_cmd = vhost_scsi_allocate_cmd(tv_tpg, &v_req,
+ tv_cmd = vhost_scsi_allocate_cmd(vq, tv_tpg, &v_req,
exp_data_len, data_direction);
if (IS_ERR(tv_cmd)) {
vq_err(vq, "vhost_scsi_allocate_cmd failed %ld\n",
PTR_ERR(tv_cmd));
- break;
+ goto err_cmd;
}
pr_debug("Allocated tv_cmd: %p exp_data_len: %d, data_direction"
": %d\n", tv_cmd, exp_data_len, data_direction);
tv_cmd->tvc_vhost = vs;
tv_cmd->tvc_vq = vq;
-
- if (unlikely(vq->iov[out].iov_len !=
- sizeof(struct virtio_scsi_cmd_resp))) {
- vq_err(vq, "Expecting virtio_scsi_cmd_resp, got %zu"
- " bytes, out: %d, in: %d\n",
- vq->iov[out].iov_len, out, in);
- break;
- }
-
tv_cmd->tvc_resp = vq->iov[out].iov_base;
/*
@@ -716,7 +1013,7 @@ static void vhost_scsi_handle_vq(struct vhost_scsi *vs,
" exceeds SCSI_MAX_VARLEN_CDB_SIZE: %d\n",
scsi_command_size(tv_cmd->tvc_cdb),
TCM_VHOST_MAX_CDB_SIZE);
- break; /* TODO */
+ goto err_free;
}
tv_cmd->tvc_lun = ((v_req.lun[2] << 8) | v_req.lun[3]) & 0x3FFF;
@@ -729,7 +1026,7 @@ static void vhost_scsi_handle_vq(struct vhost_scsi *vs,
data_direction == DMA_TO_DEVICE);
if (unlikely(ret)) {
vq_err(vq, "Failed to map iov to sgl\n");
- break; /* TODO */
+ goto err_free;
}
}
@@ -750,6 +1047,13 @@ static void vhost_scsi_handle_vq(struct vhost_scsi *vs,
}
mutex_unlock(&vq->mutex);
+ return;
+
+err_free:
+ vhost_scsi_free_cmd(tv_cmd);
+err_cmd:
+ vhost_scsi_send_bad_target(vs, vq, head, out);
+ mutex_unlock(&vq->mutex);
}
static void vhost_scsi_ctl_handle_kick(struct vhost_work *work)
@@ -757,9 +1061,46 @@ static void vhost_scsi_ctl_handle_kick(struct vhost_work *work)
pr_debug("%s: The handling func for control queue.\n", __func__);
}
+static void tcm_vhost_send_evt(struct vhost_scsi *vs, struct tcm_vhost_tpg *tpg,
+ struct se_lun *lun, u32 event, u32 reason)
+{
+ struct tcm_vhost_evt *evt;
+
+ evt = tcm_vhost_allocate_evt(vs, event, reason);
+ if (!evt)
+ return;
+
+ if (tpg && lun) {
+ /* TODO: share lun setup code with virtio-scsi.ko */
+ /*
+ * Note: evt->event is zeroed when we allocate it and
+ * lun[4-7] need to be zero according to virtio-scsi spec.
+ */
+ evt->event.lun[0] = 0x01;
+ evt->event.lun[1] = tpg->tport_tpgt & 0xFF;
+ if (lun->unpacked_lun >= 256)
+ evt->event.lun[2] = lun->unpacked_lun >> 8 | 0x40 ;
+ evt->event.lun[3] = lun->unpacked_lun & 0xFF;
+ }
+
+ llist_add(&evt->list, &vs->vs_event_list);
+ vhost_work_queue(&vs->dev, &vs->vs_event_work);
+}
+
static void vhost_scsi_evt_handle_kick(struct vhost_work *work)
{
- pr_debug("%s: The handling func for event queue.\n", __func__);
+ struct vhost_virtqueue *vq = container_of(work, struct vhost_virtqueue,
+ poll.work);
+ struct vhost_scsi *vs = container_of(vq->dev, struct vhost_scsi, dev);
+
+ mutex_lock(&vq->mutex);
+ if (!vq->private_data)
+ goto out;
+
+ if (vs->vs_events_missed)
+ tcm_vhost_send_evt(vs, NULL, NULL, VIRTIO_SCSI_T_NO_EVENT, 0);
+out:
+ mutex_unlock(&vq->mutex);
}
static void vhost_scsi_handle_kick(struct vhost_work *work)
@@ -771,9 +1112,45 @@ static void vhost_scsi_handle_kick(struct vhost_work *work)
vhost_scsi_handle_vq(vs, vq);
}
+static void vhost_scsi_flush_vq(struct vhost_scsi *vs, int index)
+{
+ vhost_poll_flush(&vs->vqs[index].vq.poll);
+}
+
+/* Callers must hold dev mutex */
+static void vhost_scsi_flush(struct vhost_scsi *vs)
+{
+ struct vhost_scsi_inflight *old_inflight[VHOST_SCSI_MAX_VQ];
+ int i;
+
+ /* Init new inflight and remember the old inflight */
+ tcm_vhost_init_inflight(vs, old_inflight);
+
+ /*
+ * The inflight->kref was initialized to 1. We decrement it here to
+ * indicate the start of the flush operation so that it will reach 0
+ * when all the reqs are finished.
+ */
+ for (i = 0; i < VHOST_SCSI_MAX_VQ; i++)
+ kref_put(&old_inflight[i]->kref, tcm_vhost_done_inflight);
+
+ /* Flush both the vhost poll and vhost work */
+ for (i = 0; i < VHOST_SCSI_MAX_VQ; i++)
+ vhost_scsi_flush_vq(vs, i);
+ vhost_work_flush(&vs->dev, &vs->vs_completion_work);
+ vhost_work_flush(&vs->dev, &vs->vs_event_work);
+
+ /* Wait for all reqs issued before the flush to be finished */
+ for (i = 0; i < VHOST_SCSI_MAX_VQ; i++)
+ wait_for_completion(&old_inflight[i]->comp);
+}
+
/*
* Called from vhost_scsi_ioctl() context to walk the list of available
* tcm_vhost_tpg with an active struct tcm_vhost_nexus
+ *
+ * The lock nesting rule is:
+ * tcm_vhost_mutex -> vs->dev.mutex -> tpg->tv_tpg_mutex -> vq->mutex
*/
static int vhost_scsi_set_endpoint(
struct vhost_scsi *vs,
@@ -781,20 +1158,32 @@ static int vhost_scsi_set_endpoint(
{
struct tcm_vhost_tport *tv_tport;
struct tcm_vhost_tpg *tv_tpg;
+ struct tcm_vhost_tpg **vs_tpg;
+ struct vhost_virtqueue *vq;
+ int index, ret, i, len;
bool match = false;
- int index, ret;
+ mutex_lock(&tcm_vhost_mutex);
mutex_lock(&vs->dev.mutex);
+
/* Verify that ring has been setup correctly. */
for (index = 0; index < vs->dev.nvqs; ++index) {
/* Verify that ring has been setup correctly. */
- if (!vhost_vq_access_ok(&vs->vqs[index])) {
- mutex_unlock(&vs->dev.mutex);
- return -EFAULT;
+ if (!vhost_vq_access_ok(&vs->vqs[index].vq)) {
+ ret = -EFAULT;
+ goto out;
}
}
- mutex_lock(&tcm_vhost_mutex);
+ len = sizeof(vs_tpg[0]) * VHOST_SCSI_MAX_TARGET;
+ vs_tpg = kzalloc(len, GFP_KERNEL);
+ if (!vs_tpg) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ if (vs->vs_tpg)
+ memcpy(vs_tpg, vs->vs_tpg, len);
+
list_for_each_entry(tv_tpg, &tcm_vhost_list, tv_tpg_list) {
mutex_lock(&tv_tpg->tv_tpg_mutex);
if (!tv_tpg->tpg_nexus) {
@@ -808,31 +1197,48 @@ static int vhost_scsi_set_endpoint(
tv_tport = tv_tpg->tport;
if (!strcmp(tv_tport->tport_name, t->vhost_wwpn)) {
- if (vs->vs_tpg[tv_tpg->tport_tpgt]) {
+ if (vs->vs_tpg && vs->vs_tpg[tv_tpg->tport_tpgt]) {
+ kfree(vs_tpg);
mutex_unlock(&tv_tpg->tv_tpg_mutex);
- mutex_unlock(&tcm_vhost_mutex);
- mutex_unlock(&vs->dev.mutex);
- return -EEXIST;
+ ret = -EEXIST;
+ goto out;
}
tv_tpg->tv_tpg_vhost_count++;
- vs->vs_tpg[tv_tpg->tport_tpgt] = tv_tpg;
+ tv_tpg->vhost_scsi = vs;
+ vs_tpg[tv_tpg->tport_tpgt] = tv_tpg;
smp_mb__after_atomic_inc();
match = true;
}
mutex_unlock(&tv_tpg->tv_tpg_mutex);
}
- mutex_unlock(&tcm_vhost_mutex);
if (match) {
memcpy(vs->vs_vhost_wwpn, t->vhost_wwpn,
sizeof(vs->vs_vhost_wwpn));
- vs->vs_endpoint = true;
+ for (i = 0; i < VHOST_SCSI_MAX_VQ; i++) {
+ vq = &vs->vqs[i].vq;
+ /* Flushing the vhost_work acts as synchronize_rcu */
+ mutex_lock(&vq->mutex);
+ rcu_assign_pointer(vq->private_data, vs_tpg);
+ vhost_init_used(vq);
+ mutex_unlock(&vq->mutex);
+ }
ret = 0;
} else {
ret = -EEXIST;
}
+ /*
+ * Act as synchronize_rcu to make sure access to
+ * old vs->vs_tpg is finished.
+ */
+ vhost_scsi_flush(vs);
+ kfree(vs->vs_tpg);
+ vs->vs_tpg = vs_tpg;
+
+out:
mutex_unlock(&vs->dev.mutex);
+ mutex_unlock(&tcm_vhost_mutex);
return ret;
}
@@ -842,28 +1248,37 @@ static int vhost_scsi_clear_endpoint(
{
struct tcm_vhost_tport *tv_tport;
struct tcm_vhost_tpg *tv_tpg;
+ struct vhost_virtqueue *vq;
+ bool match = false;
int index, ret, i;
u8 target;
+ mutex_lock(&tcm_vhost_mutex);
mutex_lock(&vs->dev.mutex);
/* Verify that ring has been setup correctly. */
for (index = 0; index < vs->dev.nvqs; ++index) {
- if (!vhost_vq_access_ok(&vs->vqs[index])) {
+ if (!vhost_vq_access_ok(&vs->vqs[index].vq)) {
ret = -EFAULT;
- goto err;
+ goto err_dev;
}
}
+
+ if (!vs->vs_tpg) {
+ ret = 0;
+ goto err_dev;
+ }
+
for (i = 0; i < VHOST_SCSI_MAX_TARGET; i++) {
target = i;
-
tv_tpg = vs->vs_tpg[target];
if (!tv_tpg)
continue;
+ mutex_lock(&tv_tpg->tv_tpg_mutex);
tv_tport = tv_tpg->tport;
if (!tv_tport) {
ret = -ENODEV;
- goto err;
+ goto err_tpg;
}
if (strcmp(tv_tport->tport_name, t->vhost_wwpn)) {
@@ -872,37 +1287,97 @@ static int vhost_scsi_clear_endpoint(
tv_tport->tport_name, tv_tpg->tport_tpgt,
t->vhost_wwpn, t->vhost_tpgt);
ret = -EINVAL;
- goto err;
+ goto err_tpg;
}
tv_tpg->tv_tpg_vhost_count--;
+ tv_tpg->vhost_scsi = NULL;
vs->vs_tpg[target] = NULL;
- vs->vs_endpoint = false;
+ match = true;
+ mutex_unlock(&tv_tpg->tv_tpg_mutex);
}
+ if (match) {
+ for (i = 0; i < VHOST_SCSI_MAX_VQ; i++) {
+ vq = &vs->vqs[i].vq;
+ /* Flushing the vhost_work acts as synchronize_rcu */
+ mutex_lock(&vq->mutex);
+ rcu_assign_pointer(vq->private_data, NULL);
+ mutex_unlock(&vq->mutex);
+ }
+ }
+ /*
+ * Act as synchronize_rcu to make sure access to
+ * old vs->vs_tpg is finished.
+ */
+ vhost_scsi_flush(vs);
+ kfree(vs->vs_tpg);
+ vs->vs_tpg = NULL;
+ WARN_ON(vs->vs_events_nr);
mutex_unlock(&vs->dev.mutex);
+ mutex_unlock(&tcm_vhost_mutex);
return 0;
-err:
+err_tpg:
+ mutex_unlock(&tv_tpg->tv_tpg_mutex);
+err_dev:
mutex_unlock(&vs->dev.mutex);
+ mutex_unlock(&tcm_vhost_mutex);
return ret;
}
+static int vhost_scsi_set_features(struct vhost_scsi *vs, u64 features)
+{
+ if (features & ~VHOST_SCSI_FEATURES)
+ return -EOPNOTSUPP;
+
+ mutex_lock(&vs->dev.mutex);
+ if ((features & (1 << VHOST_F_LOG_ALL)) &&
+ !vhost_log_access_ok(&vs->dev)) {
+ mutex_unlock(&vs->dev.mutex);
+ return -EFAULT;
+ }
+ vs->dev.acked_features = features;
+ smp_wmb();
+ vhost_scsi_flush(vs);
+ mutex_unlock(&vs->dev.mutex);
+ return 0;
+}
+
static int vhost_scsi_open(struct inode *inode, struct file *f)
{
struct vhost_scsi *s;
+ struct vhost_virtqueue **vqs;
int r, i;
s = kzalloc(sizeof(*s), GFP_KERNEL);
if (!s)
return -ENOMEM;
+ vqs = kmalloc(VHOST_SCSI_MAX_VQ * sizeof(*vqs), GFP_KERNEL);
+ if (!vqs) {
+ kfree(s);
+ return -ENOMEM;
+ }
+
vhost_work_init(&s->vs_completion_work, vhost_scsi_complete_cmd_work);
+ vhost_work_init(&s->vs_event_work, tcm_vhost_evt_work);
+
+ s->vs_events_nr = 0;
+ s->vs_events_missed = false;
+
+ vqs[VHOST_SCSI_VQ_CTL] = &s->vqs[VHOST_SCSI_VQ_CTL].vq;
+ vqs[VHOST_SCSI_VQ_EVT] = &s->vqs[VHOST_SCSI_VQ_EVT].vq;
+ s->vqs[VHOST_SCSI_VQ_CTL].vq.handle_kick = vhost_scsi_ctl_handle_kick;
+ s->vqs[VHOST_SCSI_VQ_EVT].vq.handle_kick = vhost_scsi_evt_handle_kick;
+ for (i = VHOST_SCSI_VQ_IO; i < VHOST_SCSI_MAX_VQ; i++) {
+ vqs[i] = &s->vqs[i].vq;
+ s->vqs[i].vq.handle_kick = vhost_scsi_handle_kick;
+ }
+ r = vhost_dev_init(&s->dev, vqs, VHOST_SCSI_MAX_VQ);
+
+ tcm_vhost_init_inflight(s, NULL);
- s->vqs[VHOST_SCSI_VQ_CTL].handle_kick = vhost_scsi_ctl_handle_kick;
- s->vqs[VHOST_SCSI_VQ_EVT].handle_kick = vhost_scsi_evt_handle_kick;
- for (i = VHOST_SCSI_VQ_IO; i < VHOST_SCSI_MAX_VQ; i++)
- s->vqs[i].handle_kick = vhost_scsi_handle_kick;
- r = vhost_dev_init(&s->dev, s->vqs, VHOST_SCSI_MAX_VQ);
if (r < 0) {
+ kfree(vqs);
kfree(s);
return r;
}
@@ -922,41 +1397,13 @@ static int vhost_scsi_release(struct inode *inode, struct file *f)
vhost_scsi_clear_endpoint(s, &t);
vhost_dev_stop(&s->dev);
vhost_dev_cleanup(&s->dev, false);
+ /* Jobs can re-queue themselves in evt kick handler. Do extra flush. */
+ vhost_scsi_flush(s);
+ kfree(s->dev.vqs);
kfree(s);
return 0;
}
-static void vhost_scsi_flush_vq(struct vhost_scsi *vs, int index)
-{
- vhost_poll_flush(&vs->dev.vqs[index].poll);
-}
-
-static void vhost_scsi_flush(struct vhost_scsi *vs)
-{
- int i;
-
- for (i = 0; i < VHOST_SCSI_MAX_VQ; i++)
- vhost_scsi_flush_vq(vs, i);
-}
-
-static int vhost_scsi_set_features(struct vhost_scsi *vs, u64 features)
-{
- if (features & ~VHOST_FEATURES)
- return -EOPNOTSUPP;
-
- mutex_lock(&vs->dev.mutex);
- if ((features & (1 << VHOST_F_LOG_ALL)) &&
- !vhost_log_access_ok(&vs->dev)) {
- mutex_unlock(&vs->dev.mutex);
- return -EFAULT;
- }
- vs->dev.acked_features = features;
- smp_wmb();
- vhost_scsi_flush(vs);
- mutex_unlock(&vs->dev.mutex);
- return 0;
-}
-
static long vhost_scsi_ioctl(struct file *f, unsigned int ioctl,
unsigned long arg)
{
@@ -964,8 +1411,11 @@ static long vhost_scsi_ioctl(struct file *f, unsigned int ioctl,
struct vhost_scsi_target backend;
void __user *argp = (void __user *)arg;
u64 __user *featurep = argp;
+ u32 __user *eventsp = argp;
+ u32 events_missed;
u64 features;
int r, abi_version = VHOST_SCSI_ABI_VERSION;
+ struct vhost_virtqueue *vq = &vs->vqs[VHOST_SCSI_VQ_EVT].vq;
switch (ioctl) {
case VHOST_SCSI_SET_ENDPOINT:
@@ -986,8 +1436,22 @@ static long vhost_scsi_ioctl(struct file *f, unsigned int ioctl,
if (copy_to_user(argp, &abi_version, sizeof abi_version))
return -EFAULT;
return 0;
+ case VHOST_SCSI_SET_EVENTS_MISSED:
+ if (get_user(events_missed, eventsp))
+ return -EFAULT;
+ mutex_lock(&vq->mutex);
+ vs->vs_events_missed = events_missed;
+ mutex_unlock(&vq->mutex);
+ return 0;
+ case VHOST_SCSI_GET_EVENTS_MISSED:
+ mutex_lock(&vq->mutex);
+ events_missed = vs->vs_events_missed;
+ mutex_unlock(&vq->mutex);
+ if (put_user(events_missed, eventsp))
+ return -EFAULT;
+ return 0;
case VHOST_GET_FEATURES:
- features = VHOST_FEATURES;
+ features = VHOST_SCSI_FEATURES;
if (copy_to_user(featurep, &features, sizeof features))
return -EFAULT;
return 0;
@@ -1057,28 +1521,80 @@ static char *tcm_vhost_dump_proto_id(struct tcm_vhost_tport *tport)
return "Unknown";
}
+static void tcm_vhost_do_plug(struct tcm_vhost_tpg *tpg,
+ struct se_lun *lun, bool plug)
+{
+
+ struct vhost_scsi *vs = tpg->vhost_scsi;
+ struct vhost_virtqueue *vq;
+ u32 reason;
+
+ if (!vs)
+ return;
+
+ mutex_lock(&vs->dev.mutex);
+ if (!vhost_has_feature(&vs->dev, VIRTIO_SCSI_F_HOTPLUG)) {
+ mutex_unlock(&vs->dev.mutex);
+ return;
+ }
+
+ if (plug)
+ reason = VIRTIO_SCSI_EVT_RESET_RESCAN;
+ else
+ reason = VIRTIO_SCSI_EVT_RESET_REMOVED;
+
+ vq = &vs->vqs[VHOST_SCSI_VQ_EVT].vq;
+ mutex_lock(&vq->mutex);
+ tcm_vhost_send_evt(vs, tpg, lun,
+ VIRTIO_SCSI_T_TRANSPORT_RESET, reason);
+ mutex_unlock(&vq->mutex);
+ mutex_unlock(&vs->dev.mutex);
+}
+
+static void tcm_vhost_hotplug(struct tcm_vhost_tpg *tpg, struct se_lun *lun)
+{
+ tcm_vhost_do_plug(tpg, lun, true);
+}
+
+static void tcm_vhost_hotunplug(struct tcm_vhost_tpg *tpg, struct se_lun *lun)
+{
+ tcm_vhost_do_plug(tpg, lun, false);
+}
+
static int tcm_vhost_port_link(struct se_portal_group *se_tpg,
struct se_lun *lun)
{
struct tcm_vhost_tpg *tv_tpg = container_of(se_tpg,
struct tcm_vhost_tpg, se_tpg);
+ mutex_lock(&tcm_vhost_mutex);
+
mutex_lock(&tv_tpg->tv_tpg_mutex);
tv_tpg->tv_tpg_port_count++;
mutex_unlock(&tv_tpg->tv_tpg_mutex);
+ tcm_vhost_hotplug(tv_tpg, lun);
+
+ mutex_unlock(&tcm_vhost_mutex);
+
return 0;
}
static void tcm_vhost_port_unlink(struct se_portal_group *se_tpg,
- struct se_lun *se_lun)
+ struct se_lun *lun)
{
struct tcm_vhost_tpg *tv_tpg = container_of(se_tpg,
struct tcm_vhost_tpg, se_tpg);
+ mutex_lock(&tcm_vhost_mutex);
+
mutex_lock(&tv_tpg->tv_tpg_mutex);
tv_tpg->tv_tpg_port_count--;
mutex_unlock(&tv_tpg->tv_tpg_mutex);
+
+ tcm_vhost_hotunplug(tv_tpg, lun);
+
+ mutex_unlock(&tcm_vhost_mutex);
}
static struct se_node_acl *tcm_vhost_make_nodeacl(
@@ -1620,7 +2136,8 @@ static void tcm_vhost_exit(void)
destroy_workqueue(tcm_vhost_workqueue);
};
-MODULE_DESCRIPTION("TCM_VHOST series fabric driver");
+MODULE_DESCRIPTION("VHOST_SCSI series fabric driver");
+MODULE_ALIAS("tcm_vhost");
MODULE_LICENSE("GPL");
module_init(tcm_vhost_init);
module_exit(tcm_vhost_exit);
diff --git a/drivers/vhost/tcm_vhost.h b/drivers/vhost/tcm_vhost.h
deleted file mode 100644
index 1d2ae7a60e11..000000000000
--- a/drivers/vhost/tcm_vhost.h
+++ /dev/null
@@ -1,115 +0,0 @@
-#define TCM_VHOST_VERSION "v0.1"
-#define TCM_VHOST_NAMELEN 256
-#define TCM_VHOST_MAX_CDB_SIZE 32
-
-struct tcm_vhost_cmd {
- /* Descriptor from vhost_get_vq_desc() for virt_queue segment */
- int tvc_vq_desc;
- /* virtio-scsi initiator task attribute */
- int tvc_task_attr;
- /* virtio-scsi initiator data direction */
- enum dma_data_direction tvc_data_direction;
- /* Expected data transfer length from virtio-scsi header */
- u32 tvc_exp_data_len;
- /* The Tag from include/linux/virtio_scsi.h:struct virtio_scsi_cmd_req */
- u64 tvc_tag;
- /* The number of scatterlists associated with this cmd */
- u32 tvc_sgl_count;
- /* Saved unpacked SCSI LUN for tcm_vhost_submission_work() */
- u32 tvc_lun;
- /* Pointer to the SGL formatted memory from virtio-scsi */
- struct scatterlist *tvc_sgl;
- /* Pointer to response */
- struct virtio_scsi_cmd_resp __user *tvc_resp;
- /* Pointer to vhost_scsi for our device */
- struct vhost_scsi *tvc_vhost;
- /* Pointer to vhost_virtqueue for the cmd */
- struct vhost_virtqueue *tvc_vq;
- /* Pointer to vhost nexus memory */
- struct tcm_vhost_nexus *tvc_nexus;
- /* The TCM I/O descriptor that is accessed via container_of() */
- struct se_cmd tvc_se_cmd;
- /* work item used for cmwq dispatch to tcm_vhost_submission_work() */
- struct work_struct work;
- /* Copy of the incoming SCSI command descriptor block (CDB) */
- unsigned char tvc_cdb[TCM_VHOST_MAX_CDB_SIZE];
- /* Sense buffer that will be mapped into outgoing status */
- unsigned char tvc_sense_buf[TRANSPORT_SENSE_BUFFER];
- /* Completed commands list, serviced from vhost worker thread */
- struct llist_node tvc_completion_list;
-};
-
-struct tcm_vhost_nexus {
- /* Pointer to TCM session for I_T Nexus */
- struct se_session *tvn_se_sess;
-};
-
-struct tcm_vhost_nacl {
- /* Binary World Wide unique Port Name for Vhost Initiator port */
- u64 iport_wwpn;
- /* ASCII formatted WWPN for Sas Initiator port */
- char iport_name[TCM_VHOST_NAMELEN];
- /* Returned by tcm_vhost_make_nodeacl() */
- struct se_node_acl se_node_acl;
-};
-
-struct tcm_vhost_tpg {
- /* Vhost port target portal group tag for TCM */
- u16 tport_tpgt;
- /* Used to track number of TPG Port/Lun Links wrt to explict I_T Nexus shutdown */
- int tv_tpg_port_count;
- /* Used for vhost_scsi device reference to tpg_nexus, protected by tv_tpg_mutex */
- int tv_tpg_vhost_count;
- /* list for tcm_vhost_list */
- struct list_head tv_tpg_list;
- /* Used to protect access for tpg_nexus */
- struct mutex tv_tpg_mutex;
- /* Pointer to the TCM VHost I_T Nexus for this TPG endpoint */
- struct tcm_vhost_nexus *tpg_nexus;
- /* Pointer back to tcm_vhost_tport */
- struct tcm_vhost_tport *tport;
- /* Returned by tcm_vhost_make_tpg() */
- struct se_portal_group se_tpg;
-};
-
-struct tcm_vhost_tport {
- /* SCSI protocol the tport is providing */
- u8 tport_proto_id;
- /* Binary World Wide unique Port Name for Vhost Target port */
- u64 tport_wwpn;
- /* ASCII formatted WWPN for Vhost Target port */
- char tport_name[TCM_VHOST_NAMELEN];
- /* Returned by tcm_vhost_make_tport() */
- struct se_wwn tport_wwn;
-};
-
-/*
- * As per request from MST, keep TCM_VHOST related ioctl defines out of
- * linux/vhost.h (user-space) for now..
- */
-
-#include <linux/vhost.h>
-
-/*
- * Used by QEMU userspace to ensure a consistent vhost-scsi ABI.
- *
- * ABI Rev 0: July 2012 version starting point for v3.6-rc merge candidate +
- * RFC-v2 vhost-scsi userspace. Add GET_ABI_VERSION ioctl usage
- * ABI Rev 1: January 2013. Ignore vhost_tpgt filed in struct vhost_scsi_target.
- * All the targets under vhost_wwpn can be seen and used by guset.
- */
-
-#define VHOST_SCSI_ABI_VERSION 1
-
-struct vhost_scsi_target {
- int abi_version;
- char vhost_wwpn[TRANSPORT_IQN_LEN];
- unsigned short vhost_tpgt;
- unsigned short reserved;
-};
-
-/* VHOST_SCSI specific defines */
-#define VHOST_SCSI_SET_ENDPOINT _IOW(VHOST_VIRTIO, 0x40, struct vhost_scsi_target)
-#define VHOST_SCSI_CLEAR_ENDPOINT _IOW(VHOST_VIRTIO, 0x41, struct vhost_scsi_target)
-/* Changing this breaks userspace. */
-#define VHOST_SCSI_GET_ABI_VERSION _IOW(VHOST_VIRTIO, 0x42, int)
diff --git a/drivers/vhost/test.c b/drivers/vhost/test.c
index 329d3021d059..1ee45bc85f67 100644
--- a/drivers/vhost/test.c
+++ b/drivers/vhost/test.c
@@ -219,13 +219,20 @@ static long vhost_test_reset_owner(struct vhost_test *n)
{
void *priv = NULL;
long err;
+ struct vhost_memory *memory;
+
mutex_lock(&n->dev.mutex);
err = vhost_dev_check_owner(&n->dev);
if (err)
goto done;
+ memory = vhost_dev_reset_owner_prepare();
+ if (!memory) {
+ err = -ENOMEM;
+ goto done;
+ }
vhost_test_stop(n, &priv);
vhost_test_flush(n);
- err = vhost_dev_reset_owner(&n->dev);
+ vhost_dev_reset_owner(&n->dev, memory);
done:
mutex_unlock(&n->dev.mutex);
return err;
diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
index 9759249e6d90..749b5ab5bfbb 100644
--- a/drivers/vhost/vhost.c
+++ b/drivers/vhost/vhost.c
@@ -33,8 +33,6 @@ enum {
VHOST_MEMORY_F_LOG = 0x1,
};
-static unsigned vhost_zcopy_mask __read_mostly;
-
#define vhost_used_event(vq) ((u16 __user *)&vq->avail->ring[vq->num])
#define vhost_avail_event(vq) ((u16 __user *)&vq->used->ring[vq->num])
@@ -89,6 +87,9 @@ int vhost_poll_start(struct vhost_poll *poll, struct file *file)
unsigned long mask;
int ret = 0;
+ if (poll->wqh)
+ return 0;
+
mask = file->f_op->poll(file, &poll->table);
if (mask)
vhost_poll_wakeup(&poll->wait, 0, 0, (void *)mask);
@@ -178,8 +179,6 @@ static void vhost_vq_reset(struct vhost_dev *dev,
vq->used_flags = 0;
vq->log_used = false;
vq->log_addr = -1ull;
- vq->vhost_hlen = 0;
- vq->sock_hlen = 0;
vq->private_data = NULL;
vq->log_base = NULL;
vq->error_ctx = NULL;
@@ -188,9 +187,6 @@ static void vhost_vq_reset(struct vhost_dev *dev,
vq->call_ctx = NULL;
vq->call = NULL;
vq->log_ctx = NULL;
- vq->upend_idx = 0;
- vq->done_idx = 0;
- vq->ubufs = NULL;
}
static int vhost_worker(void *data)
@@ -250,43 +246,29 @@ static void vhost_vq_free_iovecs(struct vhost_virtqueue *vq)
vq->log = NULL;
kfree(vq->heads);
vq->heads = NULL;
- kfree(vq->ubuf_info);
- vq->ubuf_info = NULL;
-}
-
-void vhost_enable_zcopy(int vq)
-{
- vhost_zcopy_mask |= 0x1 << vq;
}
/* Helper to allocate iovec buffers for all vqs. */
static long vhost_dev_alloc_iovecs(struct vhost_dev *dev)
{
int i;
- bool zcopy;
for (i = 0; i < dev->nvqs; ++i) {
- dev->vqs[i].indirect = kmalloc(sizeof *dev->vqs[i].indirect *
+ dev->vqs[i]->indirect = kmalloc(sizeof *dev->vqs[i]->indirect *
UIO_MAXIOV, GFP_KERNEL);
- dev->vqs[i].log = kmalloc(sizeof *dev->vqs[i].log * UIO_MAXIOV,
+ dev->vqs[i]->log = kmalloc(sizeof *dev->vqs[i]->log * UIO_MAXIOV,
GFP_KERNEL);
- dev->vqs[i].heads = kmalloc(sizeof *dev->vqs[i].heads *
+ dev->vqs[i]->heads = kmalloc(sizeof *dev->vqs[i]->heads *
UIO_MAXIOV, GFP_KERNEL);
- zcopy = vhost_zcopy_mask & (0x1 << i);
- if (zcopy)
- dev->vqs[i].ubuf_info =
- kmalloc(sizeof *dev->vqs[i].ubuf_info *
- UIO_MAXIOV, GFP_KERNEL);
- if (!dev->vqs[i].indirect || !dev->vqs[i].log ||
- !dev->vqs[i].heads ||
- (zcopy && !dev->vqs[i].ubuf_info))
+ if (!dev->vqs[i]->indirect || !dev->vqs[i]->log ||
+ !dev->vqs[i]->heads)
goto err_nomem;
}
return 0;
err_nomem:
for (; i >= 0; --i)
- vhost_vq_free_iovecs(&dev->vqs[i]);
+ vhost_vq_free_iovecs(dev->vqs[i]);
return -ENOMEM;
}
@@ -295,11 +277,11 @@ static void vhost_dev_free_iovecs(struct vhost_dev *dev)
int i;
for (i = 0; i < dev->nvqs; ++i)
- vhost_vq_free_iovecs(&dev->vqs[i]);
+ vhost_vq_free_iovecs(dev->vqs[i]);
}
long vhost_dev_init(struct vhost_dev *dev,
- struct vhost_virtqueue *vqs, int nvqs)
+ struct vhost_virtqueue **vqs, int nvqs)
{
int i;
@@ -315,16 +297,15 @@ long vhost_dev_init(struct vhost_dev *dev,
dev->worker = NULL;
for (i = 0; i < dev->nvqs; ++i) {
- dev->vqs[i].log = NULL;
- dev->vqs[i].indirect = NULL;
- dev->vqs[i].heads = NULL;
- dev->vqs[i].ubuf_info = NULL;
- dev->vqs[i].dev = dev;
- mutex_init(&dev->vqs[i].mutex);
- vhost_vq_reset(dev, dev->vqs + i);
- if (dev->vqs[i].handle_kick)
- vhost_poll_init(&dev->vqs[i].poll,
- dev->vqs[i].handle_kick, POLLIN, dev);
+ dev->vqs[i]->log = NULL;
+ dev->vqs[i]->indirect = NULL;
+ dev->vqs[i]->heads = NULL;
+ dev->vqs[i]->dev = dev;
+ mutex_init(&dev->vqs[i]->mutex);
+ vhost_vq_reset(dev, dev->vqs[i]);
+ if (dev->vqs[i]->handle_kick)
+ vhost_poll_init(&dev->vqs[i]->poll,
+ dev->vqs[i]->handle_kick, POLLIN, dev);
}
return 0;
@@ -405,21 +386,19 @@ err_mm:
return err;
}
-/* Caller should have device mutex */
-long vhost_dev_reset_owner(struct vhost_dev *dev)
+struct vhost_memory *vhost_dev_reset_owner_prepare(void)
{
- struct vhost_memory *memory;
-
- /* Restore memory to default empty mapping. */
- memory = kmalloc(offsetof(struct vhost_memory, regions), GFP_KERNEL);
- if (!memory)
- return -ENOMEM;
+ return kmalloc(offsetof(struct vhost_memory, regions), GFP_KERNEL);
+}
+/* Caller should have device mutex */
+void vhost_dev_reset_owner(struct vhost_dev *dev, struct vhost_memory *memory)
+{
vhost_dev_cleanup(dev, true);
+ /* Restore memory to default empty mapping. */
memory->nregions = 0;
RCU_INIT_POINTER(dev->memory, memory);
- return 0;
}
void vhost_dev_stop(struct vhost_dev *dev)
@@ -427,9 +406,9 @@ void vhost_dev_stop(struct vhost_dev *dev)
int i;
for (i = 0; i < dev->nvqs; ++i) {
- if (dev->vqs[i].kick && dev->vqs[i].handle_kick) {
- vhost_poll_stop(&dev->vqs[i].poll);
- vhost_poll_flush(&dev->vqs[i].poll);
+ if (dev->vqs[i]->kick && dev->vqs[i]->handle_kick) {
+ vhost_poll_stop(&dev->vqs[i]->poll);
+ vhost_poll_flush(&dev->vqs[i]->poll);
}
}
}
@@ -440,17 +419,17 @@ void vhost_dev_cleanup(struct vhost_dev *dev, bool locked)
int i;
for (i = 0; i < dev->nvqs; ++i) {
- if (dev->vqs[i].error_ctx)
- eventfd_ctx_put(dev->vqs[i].error_ctx);
- if (dev->vqs[i].error)
- fput(dev->vqs[i].error);
- if (dev->vqs[i].kick)
- fput(dev->vqs[i].kick);
- if (dev->vqs[i].call_ctx)
- eventfd_ctx_put(dev->vqs[i].call_ctx);
- if (dev->vqs[i].call)
- fput(dev->vqs[i].call);
- vhost_vq_reset(dev, dev->vqs + i);
+ if (dev->vqs[i]->error_ctx)
+ eventfd_ctx_put(dev->vqs[i]->error_ctx);
+ if (dev->vqs[i]->error)
+ fput(dev->vqs[i]->error);
+ if (dev->vqs[i]->kick)
+ fput(dev->vqs[i]->kick);
+ if (dev->vqs[i]->call_ctx)
+ eventfd_ctx_put(dev->vqs[i]->call_ctx);
+ if (dev->vqs[i]->call)
+ fput(dev->vqs[i]->call);
+ vhost_vq_reset(dev, dev->vqs[i]);
}
vhost_dev_free_iovecs(dev);
if (dev->log_ctx)
@@ -521,14 +500,14 @@ static int memory_access_ok(struct vhost_dev *d, struct vhost_memory *mem,
for (i = 0; i < d->nvqs; ++i) {
int ok;
- mutex_lock(&d->vqs[i].mutex);
+ mutex_lock(&d->vqs[i]->mutex);
/* If ring is inactive, will check when it's enabled. */
- if (d->vqs[i].private_data)
- ok = vq_memory_access_ok(d->vqs[i].log_base, mem,
+ if (d->vqs[i]->private_data)
+ ok = vq_memory_access_ok(d->vqs[i]->log_base, mem,
log_all);
else
ok = 1;
- mutex_unlock(&d->vqs[i].mutex);
+ mutex_unlock(&d->vqs[i]->mutex);
if (!ok)
return 0;
}
@@ -638,7 +617,7 @@ long vhost_vring_ioctl(struct vhost_dev *d, int ioctl, void __user *argp)
if (idx >= d->nvqs)
return -ENOBUFS;
- vq = d->vqs + idx;
+ vq = d->vqs[idx];
mutex_lock(&vq->mutex);
@@ -849,7 +828,7 @@ long vhost_dev_ioctl(struct vhost_dev *d, unsigned int ioctl, void __user *argp)
for (i = 0; i < d->nvqs; ++i) {
struct vhost_virtqueue *vq;
void __user *base = (void __user *)(unsigned long)p;
- vq = d->vqs + i;
+ vq = d->vqs[i];
mutex_lock(&vq->mutex);
/* If ring is inactive, will check when it's enabled. */
if (vq->private_data && !vq_log_access_ok(d, vq, base))
@@ -876,9 +855,9 @@ long vhost_dev_ioctl(struct vhost_dev *d, unsigned int ioctl, void __user *argp)
} else
filep = eventfp;
for (i = 0; i < d->nvqs; ++i) {
- mutex_lock(&d->vqs[i].mutex);
- d->vqs[i].log_ctx = d->log_ctx;
- mutex_unlock(&d->vqs[i].mutex);
+ mutex_lock(&d->vqs[i]->mutex);
+ d->vqs[i]->log_ctx = d->log_ctx;
+ mutex_unlock(&d->vqs[i]->mutex);
}
if (ctx)
eventfd_ctx_put(ctx);
@@ -1548,38 +1527,3 @@ void vhost_disable_notify(struct vhost_dev *dev, struct vhost_virtqueue *vq)
&vq->used->flags, r);
}
}
-
-static void vhost_zerocopy_done_signal(struct kref *kref)
-{
- struct vhost_ubuf_ref *ubufs = container_of(kref, struct vhost_ubuf_ref,
- kref);
- wake_up(&ubufs->wait);
-}
-
-struct vhost_ubuf_ref *vhost_ubuf_alloc(struct vhost_virtqueue *vq,
- bool zcopy)
-{
- struct vhost_ubuf_ref *ubufs;
- /* No zero copy backend? Nothing to count. */
- if (!zcopy)
- return NULL;
- ubufs = kmalloc(sizeof *ubufs, GFP_KERNEL);
- if (!ubufs)
- return ERR_PTR(-ENOMEM);
- kref_init(&ubufs->kref);
- init_waitqueue_head(&ubufs->wait);
- ubufs->vq = vq;
- return ubufs;
-}
-
-void vhost_ubuf_put(struct vhost_ubuf_ref *ubufs)
-{
- kref_put(&ubufs->kref, vhost_zerocopy_done_signal);
-}
-
-void vhost_ubuf_put_and_wait(struct vhost_ubuf_ref *ubufs)
-{
- kref_put(&ubufs->kref, vhost_zerocopy_done_signal);
- wait_event(ubufs->wait, !atomic_read(&ubufs->kref.refcount));
- kfree(ubufs);
-}
diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h
index 17261e277c02..b58f4ae82cb8 100644
--- a/drivers/vhost/vhost.h
+++ b/drivers/vhost/vhost.h
@@ -54,18 +54,6 @@ struct vhost_log {
struct vhost_virtqueue;
-struct vhost_ubuf_ref {
- struct kref kref;
- wait_queue_head_t wait;
- struct vhost_virtqueue *vq;
-};
-
-struct vhost_ubuf_ref *vhost_ubuf_alloc(struct vhost_virtqueue *, bool zcopy);
-void vhost_ubuf_put(struct vhost_ubuf_ref *);
-void vhost_ubuf_put_and_wait(struct vhost_ubuf_ref *);
-
-struct ubuf_info;
-
/* The virtqueue structure describes a queue attached to a device. */
struct vhost_virtqueue {
struct vhost_dev *dev;
@@ -114,10 +102,7 @@ struct vhost_virtqueue {
/* hdr is used to store the virtio header.
* Since each iovec has >= 1 byte length, we never need more than
* header length entries to store the header. */
- struct iovec hdr[sizeof(struct virtio_net_hdr_mrg_rxbuf)];
struct iovec *indirect;
- size_t vhost_hlen;
- size_t sock_hlen;
struct vring_used_elem *heads;
/* We use a kind of RCU to access private pointer.
* All readers access it from worker, which makes it possible to
@@ -130,16 +115,6 @@ struct vhost_virtqueue {
/* Log write descriptors */
void __user *log_base;
struct vhost_log *log;
- /* vhost zerocopy support fields below: */
- /* last used idx for outstanding DMA zerocopy buffers */
- int upend_idx;
- /* first used idx for DMA done zerocopy buffers */
- int done_idx;
- /* an array of userspace buffers info */
- struct ubuf_info *ubuf_info;
- /* Reference counting for outstanding ubufs.
- * Protected by vq mutex. Writers must also take device mutex. */
- struct vhost_ubuf_ref *ubufs;
};
struct vhost_dev {
@@ -150,7 +125,7 @@ struct vhost_dev {
struct mm_struct *mm;
struct mutex mutex;
unsigned acked_features;
- struct vhost_virtqueue *vqs;
+ struct vhost_virtqueue **vqs;
int nvqs;
struct file *log_file;
struct eventfd_ctx *log_ctx;
@@ -159,9 +134,10 @@ struct vhost_dev {
struct task_struct *worker;
};
-long vhost_dev_init(struct vhost_dev *, struct vhost_virtqueue *vqs, int nvqs);
+long vhost_dev_init(struct vhost_dev *, struct vhost_virtqueue **vqs, int nvqs);
long vhost_dev_check_owner(struct vhost_dev *);
-long vhost_dev_reset_owner(struct vhost_dev *);
+struct vhost_memory *vhost_dev_reset_owner_prepare(void);
+void vhost_dev_reset_owner(struct vhost_dev *, struct vhost_memory *);
void vhost_dev_cleanup(struct vhost_dev *, bool locked);
void vhost_dev_stop(struct vhost_dev *);
long vhost_dev_ioctl(struct vhost_dev *, unsigned int ioctl, void __user *argp);