// SPDX-License-Identifier: BSD-3-Clause /* * Copyright (c) 2009-2012,2016,2023 Microsoft Corp. * Copyright (c) 2012 NetApp Inc. * Copyright (c) 2012 Citrix Inc. * All rights reserved. */ #include #include #include #include #include #include #include #include #include #include #include #include "vmbus_bufring.h" /** * Compiler barrier. * * Guarantees that operation reordering does not occur at compile time * for operations directly before and after the barrier. */ #define rte_compiler_barrier() ({ asm volatile ("" : : : "memory"); }) #define VMBUS_RQST_ERROR 0xFFFFFFFFFFFFFFFF #define ALIGN(val, align) ((typeof(val))((val) & (~((typeof(val))((align) - 1))))) void *vmbus_uio_map(int *fd, int size) { void *map; map = mmap(NULL, 2 * size, PROT_READ | PROT_WRITE, MAP_SHARED, *fd, 0); if (map == MAP_FAILED) return NULL; return map; } /* Increase bufring index by inc with wraparound */ static inline uint32_t vmbus_br_idxinc(uint32_t idx, uint32_t inc, uint32_t sz) { idx += inc; if (idx >= sz) idx -= sz; return idx; } void vmbus_br_setup(struct vmbus_br *br, void *buf, unsigned int blen) { br->vbr = buf; br->windex = br->vbr->windex; br->dsize = blen - sizeof(struct vmbus_bufring); } static inline __always_inline void rte_smp_mb(void) { asm volatile("lock addl $0, -128(%%rsp); " ::: "memory"); } static inline int rte_atomic32_cmpset(volatile uint32_t *dst, uint32_t exp, uint32_t src) { uint8_t res; asm volatile("lock ; " "cmpxchgl %[src], %[dst];" "sete %[res];" : [res] "=a" (res), /* output */ [dst] "=m" (*dst) : [src] "r" (src), /* input */ "a" (exp), "m" (*dst) : "memory"); /* no-clobber list */ return res; } static inline uint32_t vmbus_txbr_copyto(const struct vmbus_br *tbr, uint32_t windex, const void *src0, uint32_t cplen) { uint8_t *br_data = tbr->vbr->data; uint32_t br_dsize = tbr->dsize; const uint8_t *src = src0; /* XXX use double mapping like Linux kernel? */ if (cplen > br_dsize - windex) { uint32_t fraglen = br_dsize - windex; /* Wrap-around detected */ memcpy(br_data + windex, src, fraglen); memcpy(br_data, src + fraglen, cplen - fraglen); } else { memcpy(br_data + windex, src, cplen); } return vmbus_br_idxinc(windex, cplen, br_dsize); } /* * Write scattered channel packet to TX bufring. * * The offset of this channel packet is written as a 64bits value * immediately after this channel packet. * * The write goes through three stages: * 1. Reserve space in ring buffer for the new data. * Writer atomically moves priv_write_index. * 2. Copy the new data into the ring. * 3. Update the tail of the ring (visible to host) that indicates * next read location. Writer updates write_index */ static int vmbus_txbr_write(struct vmbus_br *tbr, const struct iovec iov[], int iovlen) { struct vmbus_bufring *vbr = tbr->vbr; uint32_t ring_size = tbr->dsize; uint32_t old_windex, next_windex, windex, total; uint64_t save_windex; int i; total = 0; for (i = 0; i < iovlen; i++) total += iov[i].iov_len; total += sizeof(save_windex); /* Reserve space in ring */ do { uint32_t avail; /* Get current free location */ old_windex = tbr->windex; /* Prevent compiler reordering this with calculation */ rte_compiler_barrier(); avail = vmbus_br_availwrite(tbr, old_windex); /* If not enough space in ring, then tell caller. */ if (avail <= total) return -EAGAIN; next_windex = vmbus_br_idxinc(old_windex, total, ring_size); /* Atomic update of next write_index for other threads */ } while (!rte_atomic32_cmpset(&tbr->windex, old_windex, next_windex)); /* Space from old..new is now reserved */ windex = old_windex; for (i = 0; i < iovlen; i++) windex = vmbus_txbr_copyto(tbr, windex, iov[i].iov_base, iov[i].iov_len); /* Set the offset of the current channel packet. */ save_windex = ((uint64_t)old_windex) << 32; windex = vmbus_txbr_copyto(tbr, windex, &save_windex, sizeof(save_windex)); /* The region reserved should match region used */ if (windex != next_windex) return -EINVAL; /* Ensure that data is available before updating host index */ rte_compiler_barrier(); /* Checkin for our reservation. wait for our turn to update host */ while (!rte_atomic32_cmpset(&vbr->windex, old_windex, next_windex)) _mm_pause(); return 0; } int rte_vmbus_chan_send(struct vmbus_br *txbr, uint16_t type, void *data, uint32_t dlen, uint32_t flags) { struct vmbus_chanpkt pkt; unsigned int pktlen, pad_pktlen; const uint32_t hlen = sizeof(pkt); uint64_t pad = 0; struct iovec iov[3]; int error; pktlen = hlen + dlen; pad_pktlen = ALIGN(pktlen, sizeof(uint64_t)); pkt.hdr.type = type; pkt.hdr.flags = flags; pkt.hdr.hlen = hlen >> VMBUS_CHANPKT_SIZE_SHIFT; pkt.hdr.tlen = pad_pktlen >> VMBUS_CHANPKT_SIZE_SHIFT; pkt.hdr.xactid = VMBUS_RQST_ERROR; iov[0].iov_base = &pkt; iov[0].iov_len = hlen; iov[1].iov_base = data; iov[1].iov_len = dlen; iov[2].iov_base = &pad; iov[2].iov_len = pad_pktlen - pktlen; error = vmbus_txbr_write(txbr, iov, 3); return error; } static inline uint32_t vmbus_rxbr_copyfrom(const struct vmbus_br *rbr, uint32_t rindex, void *dst0, size_t cplen) { const uint8_t *br_data = rbr->vbr->data; uint32_t br_dsize = rbr->dsize; uint8_t *dst = dst0; if (cplen > br_dsize - rindex) { uint32_t fraglen = br_dsize - rindex; /* Wrap-around detected. */ memcpy(dst, br_data + rindex, fraglen); memcpy(dst + fraglen, br_data, cplen - fraglen); } else { memcpy(dst, br_data + rindex, cplen); } return vmbus_br_idxinc(rindex, cplen, br_dsize); } /* Copy data from receive ring but don't change index */ static int vmbus_rxbr_peek(const struct vmbus_br *rbr, void *data, size_t dlen) { uint32_t avail; /* * The requested data and the 64bits channel packet * offset should be there at least. */ avail = vmbus_br_availread(rbr); if (avail < dlen + sizeof(uint64_t)) return -EAGAIN; vmbus_rxbr_copyfrom(rbr, rbr->vbr->rindex, data, dlen); return 0; } /* * Copy data from receive ring and change index * NOTE: * We assume (dlen + skip) == sizeof(channel packet). */ static int vmbus_rxbr_read(struct vmbus_br *rbr, void *data, size_t dlen, size_t skip) { struct vmbus_bufring *vbr = rbr->vbr; uint32_t br_dsize = rbr->dsize; uint32_t rindex; if (vmbus_br_availread(rbr) < dlen + skip + sizeof(uint64_t)) return -EAGAIN; /* Record where host was when we started read (for debug) */ rbr->windex = rbr->vbr->windex; /* * Copy channel packet from RX bufring. */ rindex = vmbus_br_idxinc(rbr->vbr->rindex, skip, br_dsize); rindex = vmbus_rxbr_copyfrom(rbr, rindex, data, dlen); /* * Discard this channel packet's 64bits offset, which is useless to us. */ rindex = vmbus_br_idxinc(rindex, sizeof(uint64_t), br_dsize); /* Update the read index _after_ the channel packet is fetched. */ rte_compiler_barrier(); vbr->rindex = rindex; return 0; } int rte_vmbus_chan_recv_raw(struct vmbus_br *rxbr, void *data, uint32_t *len) { struct vmbus_chanpkt_hdr pkt; uint32_t dlen, bufferlen = *len; int error; error = vmbus_rxbr_peek(rxbr, &pkt, sizeof(pkt)); if (error) return error; if (unlikely(pkt.hlen < VMBUS_CHANPKT_HLEN_MIN)) /* XXX this channel is dead actually. */ return -EIO; if (unlikely(pkt.hlen > pkt.tlen)) return -EIO; /* Length are in quad words */ dlen = pkt.tlen << VMBUS_CHANPKT_SIZE_SHIFT; *len = dlen; /* If caller buffer is not large enough */ if (unlikely(dlen > bufferlen)) return -ENOBUFS; /* Read data and skip packet header */ error = vmbus_rxbr_read(rxbr, data, dlen, 0); if (error) return error; /* Return the number of bytes read */ return dlen + sizeof(uint64_t); }