mirror of
https://github.com/opnsense/src.git
synced 2026-06-08 16:22:46 -04:00
For requests that handoff queues from userspace to the kernel as well as the request to fetch reconnect parameters from the kernel, switch from using flat structures to nvlists. In particular, this will permit adding support for additional transports in the future without breaking the ABI of the structures. Note that this is an ABI break for the ioctls used by nvmf(4) and nvmft(4). Since this is only present in main I did not bother implementing compatability shims. Inspired by: imp (suggestion on a different review) Reviewed by: imp Sponsored by: Chelsio Communications Differential Revision: https://reviews.freebsd.org/D48230
1479 lines
37 KiB
C
1479 lines
37 KiB
C
/*-
|
|
* SPDX-License-Identifier: BSD-2-Clause
|
|
*
|
|
* Copyright (c) 2022-2024 Chelsio Communications, Inc.
|
|
* Written by: John Baldwin <jhb@FreeBSD.org>
|
|
*/
|
|
|
|
#include <sys/endian.h>
|
|
#include <sys/gsb_crc32.h>
|
|
#include <sys/queue.h>
|
|
#include <sys/uio.h>
|
|
#include <assert.h>
|
|
#include <errno.h>
|
|
#include <stdio.h>
|
|
#include <stdlib.h>
|
|
#include <string.h>
|
|
#include <unistd.h>
|
|
|
|
#include "libnvmf.h"
|
|
#include "internal.h"
|
|
#include "nvmf_tcp.h"
|
|
|
|
struct nvmf_tcp_qpair;
|
|
|
|
struct nvmf_tcp_command_buffer {
|
|
struct nvmf_tcp_qpair *qp;
|
|
|
|
void *data;
|
|
size_t data_len;
|
|
size_t data_xfered;
|
|
uint32_t data_offset;
|
|
|
|
uint16_t cid;
|
|
uint16_t ttag;
|
|
|
|
LIST_ENTRY(nvmf_tcp_command_buffer) link;
|
|
};
|
|
|
|
LIST_HEAD(nvmf_tcp_command_buffer_list, nvmf_tcp_command_buffer);
|
|
|
|
struct nvmf_tcp_association {
|
|
struct nvmf_association na;
|
|
|
|
uint32_t ioccsz;
|
|
};
|
|
|
|
struct nvmf_tcp_rxpdu {
|
|
struct nvme_tcp_common_pdu_hdr *hdr;
|
|
uint32_t data_len;
|
|
};
|
|
|
|
struct nvmf_tcp_capsule {
|
|
struct nvmf_capsule nc;
|
|
|
|
struct nvmf_tcp_rxpdu rx_pdu;
|
|
struct nvmf_tcp_command_buffer *cb;
|
|
|
|
TAILQ_ENTRY(nvmf_tcp_capsule) link;
|
|
};
|
|
|
|
struct nvmf_tcp_qpair {
|
|
struct nvmf_qpair qp;
|
|
int s;
|
|
|
|
uint8_t txpda;
|
|
uint8_t rxpda;
|
|
bool header_digests;
|
|
bool data_digests;
|
|
uint32_t maxr2t;
|
|
uint32_t maxh2cdata;
|
|
uint32_t max_icd; /* Host only */
|
|
uint16_t next_ttag; /* Controller only */
|
|
|
|
struct nvmf_tcp_command_buffer_list tx_buffers;
|
|
struct nvmf_tcp_command_buffer_list rx_buffers;
|
|
TAILQ_HEAD(, nvmf_tcp_capsule) rx_capsules;
|
|
};
|
|
|
|
#define TASSOC(nc) ((struct nvmf_tcp_association *)(na))
|
|
#define TCAP(nc) ((struct nvmf_tcp_capsule *)(nc))
|
|
#define CTCAP(nc) ((const struct nvmf_tcp_capsule *)(nc))
|
|
#define TQP(qp) ((struct nvmf_tcp_qpair *)(qp))
|
|
|
|
static const char zero_padding[NVME_TCP_PDU_PDO_MAX_OFFSET];
|
|
|
|
static uint32_t
|
|
compute_digest(const void *buf, size_t len)
|
|
{
|
|
return (calculate_crc32c(0xffffffff, buf, len) ^ 0xffffffff);
|
|
}
|
|
|
|
static struct nvmf_tcp_command_buffer *
|
|
tcp_alloc_command_buffer(struct nvmf_tcp_qpair *qp, void *data,
|
|
uint32_t data_offset, size_t data_len, uint16_t cid, uint16_t ttag,
|
|
bool receive)
|
|
{
|
|
struct nvmf_tcp_command_buffer *cb;
|
|
|
|
cb = malloc(sizeof(*cb));
|
|
cb->qp = qp;
|
|
cb->data = data;
|
|
cb->data_offset = data_offset;
|
|
cb->data_len = data_len;
|
|
cb->data_xfered = 0;
|
|
cb->cid = cid;
|
|
cb->ttag = ttag;
|
|
|
|
if (receive)
|
|
LIST_INSERT_HEAD(&qp->rx_buffers, cb, link);
|
|
else
|
|
LIST_INSERT_HEAD(&qp->tx_buffers, cb, link);
|
|
return (cb);
|
|
}
|
|
|
|
static struct nvmf_tcp_command_buffer *
|
|
tcp_find_command_buffer(struct nvmf_tcp_qpair *qp, uint16_t cid, uint16_t ttag,
|
|
bool receive)
|
|
{
|
|
struct nvmf_tcp_command_buffer_list *list;
|
|
struct nvmf_tcp_command_buffer *cb;
|
|
|
|
list = receive ? &qp->rx_buffers : &qp->tx_buffers;
|
|
LIST_FOREACH(cb, list, link) {
|
|
if (cb->cid == cid && cb->ttag == ttag)
|
|
return (cb);
|
|
}
|
|
return (NULL);
|
|
}
|
|
|
|
static void
|
|
tcp_purge_command_buffer(struct nvmf_tcp_qpair *qp, uint16_t cid, uint16_t ttag,
|
|
bool receive)
|
|
{
|
|
struct nvmf_tcp_command_buffer *cb;
|
|
|
|
cb = tcp_find_command_buffer(qp, cid, ttag, receive);
|
|
if (cb != NULL)
|
|
LIST_REMOVE(cb, link);
|
|
}
|
|
|
|
static void
|
|
tcp_free_command_buffer(struct nvmf_tcp_command_buffer *cb)
|
|
{
|
|
LIST_REMOVE(cb, link);
|
|
free(cb);
|
|
}
|
|
|
|
static int
|
|
nvmf_tcp_write_pdu(struct nvmf_tcp_qpair *qp, const void *pdu, size_t len)
|
|
{
|
|
ssize_t nwritten;
|
|
const char *cp;
|
|
|
|
cp = pdu;
|
|
while (len != 0) {
|
|
nwritten = write(qp->s, cp, len);
|
|
if (nwritten < 0)
|
|
return (errno);
|
|
len -= nwritten;
|
|
cp += nwritten;
|
|
}
|
|
return (0);
|
|
}
|
|
|
|
static int
|
|
nvmf_tcp_write_pdu_iov(struct nvmf_tcp_qpair *qp, struct iovec *iov,
|
|
u_int iovcnt, size_t len)
|
|
{
|
|
ssize_t nwritten;
|
|
|
|
for (;;) {
|
|
nwritten = writev(qp->s, iov, iovcnt);
|
|
if (nwritten < 0)
|
|
return (errno);
|
|
|
|
len -= nwritten;
|
|
if (len == 0)
|
|
return (0);
|
|
|
|
while (iov->iov_len <= (size_t)nwritten) {
|
|
nwritten -= iov->iov_len;
|
|
iovcnt--;
|
|
iov++;
|
|
}
|
|
|
|
iov->iov_base = (char *)iov->iov_base + nwritten;
|
|
iov->iov_len -= nwritten;
|
|
}
|
|
}
|
|
|
|
static void
|
|
nvmf_tcp_report_error(struct nvmf_association *na, struct nvmf_tcp_qpair *qp,
|
|
uint16_t fes, uint32_t fei, const void *rx_pdu, size_t pdu_len, u_int hlen)
|
|
{
|
|
struct nvme_tcp_term_req_hdr hdr;
|
|
struct iovec iov[2];
|
|
|
|
if (hlen != 0) {
|
|
if (hlen > NVME_TCP_TERM_REQ_ERROR_DATA_MAX_SIZE)
|
|
hlen = NVME_TCP_TERM_REQ_ERROR_DATA_MAX_SIZE;
|
|
if (hlen > pdu_len)
|
|
hlen = pdu_len;
|
|
}
|
|
|
|
memset(&hdr, 0, sizeof(hdr));
|
|
hdr.common.pdu_type = na->na_controller ?
|
|
NVME_TCP_PDU_TYPE_C2H_TERM_REQ : NVME_TCP_PDU_TYPE_H2C_TERM_REQ;
|
|
hdr.common.hlen = sizeof(hdr);
|
|
hdr.common.plen = sizeof(hdr) + hlen;
|
|
hdr.fes = htole16(fes);
|
|
le32enc(hdr.fei, fei);
|
|
iov[0].iov_base = &hdr;
|
|
iov[0].iov_len = sizeof(hdr);
|
|
iov[1].iov_base = __DECONST(void *, rx_pdu);
|
|
iov[1].iov_len = hlen;
|
|
|
|
(void)nvmf_tcp_write_pdu_iov(qp, iov, nitems(iov), sizeof(hdr) + hlen);
|
|
close(qp->s);
|
|
qp->s = -1;
|
|
}
|
|
|
|
static int
|
|
nvmf_tcp_validate_pdu(struct nvmf_tcp_qpair *qp, struct nvmf_tcp_rxpdu *pdu,
|
|
size_t pdu_len)
|
|
{
|
|
const struct nvme_tcp_common_pdu_hdr *ch;
|
|
uint32_t data_len, fei, plen;
|
|
uint32_t digest, rx_digest;
|
|
u_int hlen;
|
|
int error;
|
|
uint16_t fes;
|
|
|
|
/* Determine how large of a PDU header to return for errors. */
|
|
ch = pdu->hdr;
|
|
hlen = ch->hlen;
|
|
plen = le32toh(ch->plen);
|
|
if (hlen < sizeof(*ch) || hlen > plen)
|
|
hlen = sizeof(*ch);
|
|
|
|
error = nvmf_tcp_validate_pdu_header(ch,
|
|
qp->qp.nq_association->na_controller, qp->header_digests,
|
|
qp->data_digests, qp->rxpda, &data_len, &fes, &fei);
|
|
if (error != 0) {
|
|
if (error == ECONNRESET) {
|
|
close(qp->s);
|
|
qp->s = -1;
|
|
} else {
|
|
nvmf_tcp_report_error(qp->qp.nq_association, qp,
|
|
fes, fei, ch, pdu_len, hlen);
|
|
}
|
|
return (error);
|
|
}
|
|
|
|
/* Check header digest if present. */
|
|
if ((ch->flags & NVME_TCP_CH_FLAGS_HDGSTF) != 0) {
|
|
digest = compute_digest(ch, ch->hlen);
|
|
memcpy(&rx_digest, (const char *)ch + ch->hlen,
|
|
sizeof(rx_digest));
|
|
if (digest != rx_digest) {
|
|
printf("NVMe/TCP: Header digest mismatch\n");
|
|
nvmf_tcp_report_error(qp->qp.nq_association, qp,
|
|
NVME_TCP_TERM_REQ_FES_HDGST_ERROR, rx_digest, ch,
|
|
pdu_len, hlen);
|
|
return (EBADMSG);
|
|
}
|
|
}
|
|
|
|
/* Check data digest if present. */
|
|
if ((ch->flags & NVME_TCP_CH_FLAGS_DDGSTF) != 0) {
|
|
digest = compute_digest((const char *)ch + ch->pdo, data_len);
|
|
memcpy(&rx_digest, (const char *)ch + plen - sizeof(rx_digest),
|
|
sizeof(rx_digest));
|
|
if (digest != rx_digest) {
|
|
printf("NVMe/TCP: Data digest mismatch\n");
|
|
return (EBADMSG);
|
|
}
|
|
}
|
|
|
|
pdu->data_len = data_len;
|
|
return (0);
|
|
}
|
|
|
|
/*
|
|
* Read data from a socket, retrying until the data has been fully
|
|
* read or an error occurs.
|
|
*/
|
|
static int
|
|
nvmf_tcp_read_buffer(int s, void *buf, size_t len)
|
|
{
|
|
ssize_t nread;
|
|
char *cp;
|
|
|
|
cp = buf;
|
|
while (len != 0) {
|
|
nread = read(s, cp, len);
|
|
if (nread < 0)
|
|
return (errno);
|
|
if (nread == 0)
|
|
return (ECONNRESET);
|
|
len -= nread;
|
|
cp += nread;
|
|
}
|
|
return (0);
|
|
}
|
|
|
|
static int
|
|
nvmf_tcp_read_pdu(struct nvmf_tcp_qpair *qp, struct nvmf_tcp_rxpdu *pdu)
|
|
{
|
|
struct nvme_tcp_common_pdu_hdr ch;
|
|
uint32_t plen;
|
|
int error;
|
|
|
|
memset(pdu, 0, sizeof(*pdu));
|
|
error = nvmf_tcp_read_buffer(qp->s, &ch, sizeof(ch));
|
|
if (error != 0)
|
|
return (error);
|
|
|
|
plen = le32toh(ch.plen);
|
|
|
|
/*
|
|
* Validate a header with garbage lengths to trigger
|
|
* an error message without reading more.
|
|
*/
|
|
if (plen < sizeof(ch) || ch.hlen > plen) {
|
|
pdu->hdr = &ch;
|
|
error = nvmf_tcp_validate_pdu(qp, pdu, sizeof(ch));
|
|
pdu->hdr = NULL;
|
|
assert(error != 0);
|
|
return (error);
|
|
}
|
|
|
|
/* Read the rest of the PDU. */
|
|
pdu->hdr = malloc(plen);
|
|
memcpy(pdu->hdr, &ch, sizeof(ch));
|
|
error = nvmf_tcp_read_buffer(qp->s, pdu->hdr + 1, plen - sizeof(ch));
|
|
if (error != 0)
|
|
return (error);
|
|
error = nvmf_tcp_validate_pdu(qp, pdu, plen);
|
|
if (error != 0) {
|
|
free(pdu->hdr);
|
|
pdu->hdr = NULL;
|
|
}
|
|
return (error);
|
|
}
|
|
|
|
static void
|
|
nvmf_tcp_free_pdu(struct nvmf_tcp_rxpdu *pdu)
|
|
{
|
|
free(pdu->hdr);
|
|
pdu->hdr = NULL;
|
|
}
|
|
|
|
static int
|
|
nvmf_tcp_handle_term_req(struct nvmf_tcp_rxpdu *pdu)
|
|
{
|
|
struct nvme_tcp_term_req_hdr *hdr;
|
|
|
|
hdr = (void *)pdu->hdr;
|
|
|
|
printf("NVMe/TCP: Received termination request: fes %#x fei %#x\n",
|
|
le16toh(hdr->fes), le32dec(hdr->fei));
|
|
nvmf_tcp_free_pdu(pdu);
|
|
return (ECONNRESET);
|
|
}
|
|
|
|
static int
|
|
nvmf_tcp_save_command_capsule(struct nvmf_tcp_qpair *qp,
|
|
struct nvmf_tcp_rxpdu *pdu)
|
|
{
|
|
struct nvme_tcp_cmd *cmd;
|
|
struct nvmf_capsule *nc;
|
|
struct nvmf_tcp_capsule *tc;
|
|
|
|
cmd = (void *)pdu->hdr;
|
|
|
|
nc = nvmf_allocate_command(&qp->qp, &cmd->ccsqe);
|
|
if (nc == NULL)
|
|
return (ENOMEM);
|
|
|
|
tc = TCAP(nc);
|
|
tc->rx_pdu = *pdu;
|
|
|
|
TAILQ_INSERT_TAIL(&qp->rx_capsules, tc, link);
|
|
return (0);
|
|
}
|
|
|
|
static int
|
|
nvmf_tcp_save_response_capsule(struct nvmf_tcp_qpair *qp,
|
|
struct nvmf_tcp_rxpdu *pdu)
|
|
{
|
|
struct nvme_tcp_rsp *rsp;
|
|
struct nvmf_capsule *nc;
|
|
struct nvmf_tcp_capsule *tc;
|
|
|
|
rsp = (void *)pdu->hdr;
|
|
|
|
nc = nvmf_allocate_response(&qp->qp, &rsp->rccqe);
|
|
if (nc == NULL)
|
|
return (ENOMEM);
|
|
|
|
nc->nc_sqhd_valid = true;
|
|
tc = TCAP(nc);
|
|
tc->rx_pdu = *pdu;
|
|
|
|
TAILQ_INSERT_TAIL(&qp->rx_capsules, tc, link);
|
|
|
|
/*
|
|
* Once the CQE has been received, no further transfers to the
|
|
* command buffer for the associated CID can occur.
|
|
*/
|
|
tcp_purge_command_buffer(qp, rsp->rccqe.cid, 0, true);
|
|
tcp_purge_command_buffer(qp, rsp->rccqe.cid, 0, false);
|
|
|
|
return (0);
|
|
}
|
|
|
|
/*
|
|
* Construct and send a PDU that contains an optional data payload.
|
|
* This includes dealing with digests and the length fields in the
|
|
* common header.
|
|
*/
|
|
static int
|
|
nvmf_tcp_construct_pdu(struct nvmf_tcp_qpair *qp, void *hdr, size_t hlen,
|
|
void *data, uint32_t data_len)
|
|
{
|
|
struct nvme_tcp_common_pdu_hdr *ch;
|
|
struct iovec iov[5];
|
|
u_int iovcnt;
|
|
uint32_t header_digest, data_digest, pad, pdo, plen;
|
|
|
|
plen = hlen;
|
|
if (qp->header_digests)
|
|
plen += sizeof(header_digest);
|
|
if (data_len != 0) {
|
|
pdo = roundup(plen, qp->txpda);
|
|
pad = pdo - plen;
|
|
plen = pdo + data_len;
|
|
if (qp->data_digests)
|
|
plen += sizeof(data_digest);
|
|
} else {
|
|
assert(data == NULL);
|
|
pdo = 0;
|
|
pad = 0;
|
|
}
|
|
|
|
ch = hdr;
|
|
ch->hlen = hlen;
|
|
if (qp->header_digests)
|
|
ch->flags |= NVME_TCP_CH_FLAGS_HDGSTF;
|
|
if (qp->data_digests && data_len != 0)
|
|
ch->flags |= NVME_TCP_CH_FLAGS_DDGSTF;
|
|
ch->pdo = pdo;
|
|
ch->plen = htole32(plen);
|
|
|
|
/* CH + PSH */
|
|
iov[0].iov_base = hdr;
|
|
iov[0].iov_len = hlen;
|
|
iovcnt = 1;
|
|
|
|
/* HDGST */
|
|
if (qp->header_digests) {
|
|
header_digest = compute_digest(hdr, hlen);
|
|
iov[iovcnt].iov_base = &header_digest;
|
|
iov[iovcnt].iov_len = sizeof(header_digest);
|
|
iovcnt++;
|
|
}
|
|
|
|
if (pad != 0) {
|
|
/* PAD */
|
|
iov[iovcnt].iov_base = __DECONST(char *, zero_padding);
|
|
iov[iovcnt].iov_len = pad;
|
|
iovcnt++;
|
|
}
|
|
|
|
if (data_len != 0) {
|
|
/* DATA */
|
|
iov[iovcnt].iov_base = data;
|
|
iov[iovcnt].iov_len = data_len;
|
|
iovcnt++;
|
|
|
|
/* DDGST */
|
|
if (qp->data_digests) {
|
|
data_digest = compute_digest(data, data_len);
|
|
iov[iovcnt].iov_base = &data_digest;
|
|
iov[iovcnt].iov_len = sizeof(data_digest);
|
|
iovcnt++;
|
|
}
|
|
}
|
|
|
|
return (nvmf_tcp_write_pdu_iov(qp, iov, iovcnt, plen));
|
|
}
|
|
|
|
static int
|
|
nvmf_tcp_handle_h2c_data(struct nvmf_tcp_qpair *qp, struct nvmf_tcp_rxpdu *pdu)
|
|
{
|
|
struct nvme_tcp_h2c_data_hdr *h2c;
|
|
struct nvmf_tcp_command_buffer *cb;
|
|
uint32_t data_len, data_offset;
|
|
const char *icd;
|
|
|
|
h2c = (void *)pdu->hdr;
|
|
if (le32toh(h2c->datal) > qp->maxh2cdata) {
|
|
nvmf_tcp_report_error(qp->qp.nq_association, qp,
|
|
NVME_TCP_TERM_REQ_FES_DATA_TRANSFER_LIMIT_EXCEEDED, 0,
|
|
pdu->hdr, le32toh(pdu->hdr->plen), pdu->hdr->hlen);
|
|
nvmf_tcp_free_pdu(pdu);
|
|
return (EBADMSG);
|
|
}
|
|
|
|
cb = tcp_find_command_buffer(qp, h2c->cccid, h2c->ttag, true);
|
|
if (cb == NULL) {
|
|
nvmf_tcp_report_error(qp->qp.nq_association, qp,
|
|
NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD,
|
|
offsetof(struct nvme_tcp_h2c_data_hdr, ttag), pdu->hdr,
|
|
le32toh(pdu->hdr->plen), pdu->hdr->hlen);
|
|
nvmf_tcp_free_pdu(pdu);
|
|
return (EBADMSG);
|
|
}
|
|
|
|
data_len = le32toh(h2c->datal);
|
|
if (data_len != pdu->data_len) {
|
|
nvmf_tcp_report_error(qp->qp.nq_association, qp,
|
|
NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD,
|
|
offsetof(struct nvme_tcp_h2c_data_hdr, datal), pdu->hdr,
|
|
le32toh(pdu->hdr->plen), pdu->hdr->hlen);
|
|
nvmf_tcp_free_pdu(pdu);
|
|
return (EBADMSG);
|
|
}
|
|
|
|
data_offset = le32toh(h2c->datao);
|
|
if (data_offset < cb->data_offset ||
|
|
data_offset + data_len > cb->data_offset + cb->data_len) {
|
|
nvmf_tcp_report_error(qp->qp.nq_association, qp,
|
|
NVME_TCP_TERM_REQ_FES_DATA_TRANSFER_OUT_OF_RANGE, 0,
|
|
pdu->hdr, le32toh(pdu->hdr->plen), pdu->hdr->hlen);
|
|
nvmf_tcp_free_pdu(pdu);
|
|
return (EBADMSG);
|
|
}
|
|
|
|
if (data_offset != cb->data_offset + cb->data_xfered) {
|
|
nvmf_tcp_report_error(qp->qp.nq_association, qp,
|
|
NVME_TCP_TERM_REQ_FES_PDU_SEQUENCE_ERROR, 0, pdu->hdr,
|
|
le32toh(pdu->hdr->plen), pdu->hdr->hlen);
|
|
nvmf_tcp_free_pdu(pdu);
|
|
return (EBADMSG);
|
|
}
|
|
|
|
if ((cb->data_xfered + data_len == cb->data_len) !=
|
|
((pdu->hdr->flags & NVME_TCP_H2C_DATA_FLAGS_LAST_PDU) != 0)) {
|
|
nvmf_tcp_report_error(qp->qp.nq_association, qp,
|
|
NVME_TCP_TERM_REQ_FES_PDU_SEQUENCE_ERROR, 0, pdu->hdr,
|
|
le32toh(pdu->hdr->plen), pdu->hdr->hlen);
|
|
nvmf_tcp_free_pdu(pdu);
|
|
return (EBADMSG);
|
|
}
|
|
|
|
cb->data_xfered += data_len;
|
|
data_offset -= cb->data_offset;
|
|
icd = (const char *)pdu->hdr + pdu->hdr->pdo;
|
|
memcpy((char *)cb->data + data_offset, icd, data_len);
|
|
|
|
nvmf_tcp_free_pdu(pdu);
|
|
return (0);
|
|
}
|
|
|
|
static int
|
|
nvmf_tcp_handle_c2h_data(struct nvmf_tcp_qpair *qp, struct nvmf_tcp_rxpdu *pdu)
|
|
{
|
|
struct nvme_tcp_c2h_data_hdr *c2h;
|
|
struct nvmf_tcp_command_buffer *cb;
|
|
uint32_t data_len, data_offset;
|
|
const char *icd;
|
|
|
|
c2h = (void *)pdu->hdr;
|
|
|
|
cb = tcp_find_command_buffer(qp, c2h->cccid, 0, true);
|
|
if (cb == NULL) {
|
|
/*
|
|
* XXX: Could be PDU sequence error if cccid is for a
|
|
* command that doesn't use a command buffer.
|
|
*/
|
|
nvmf_tcp_report_error(qp->qp.nq_association, qp,
|
|
NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD,
|
|
offsetof(struct nvme_tcp_c2h_data_hdr, cccid), pdu->hdr,
|
|
le32toh(pdu->hdr->plen), pdu->hdr->hlen);
|
|
nvmf_tcp_free_pdu(pdu);
|
|
return (EBADMSG);
|
|
}
|
|
|
|
data_len = le32toh(c2h->datal);
|
|
if (data_len != pdu->data_len) {
|
|
nvmf_tcp_report_error(qp->qp.nq_association, qp,
|
|
NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD,
|
|
offsetof(struct nvme_tcp_c2h_data_hdr, datal), pdu->hdr,
|
|
le32toh(pdu->hdr->plen), pdu->hdr->hlen);
|
|
nvmf_tcp_free_pdu(pdu);
|
|
return (EBADMSG);
|
|
}
|
|
|
|
data_offset = le32toh(c2h->datao);
|
|
if (data_offset < cb->data_offset ||
|
|
data_offset + data_len > cb->data_offset + cb->data_len) {
|
|
nvmf_tcp_report_error(qp->qp.nq_association, qp,
|
|
NVME_TCP_TERM_REQ_FES_DATA_TRANSFER_OUT_OF_RANGE, 0,
|
|
pdu->hdr, le32toh(pdu->hdr->plen), pdu->hdr->hlen);
|
|
nvmf_tcp_free_pdu(pdu);
|
|
return (EBADMSG);
|
|
}
|
|
|
|
if (data_offset != cb->data_offset + cb->data_xfered) {
|
|
nvmf_tcp_report_error(qp->qp.nq_association, qp,
|
|
NVME_TCP_TERM_REQ_FES_PDU_SEQUENCE_ERROR, 0, pdu->hdr,
|
|
le32toh(pdu->hdr->plen), pdu->hdr->hlen);
|
|
nvmf_tcp_free_pdu(pdu);
|
|
return (EBADMSG);
|
|
}
|
|
|
|
if ((cb->data_xfered + data_len == cb->data_len) !=
|
|
((pdu->hdr->flags & NVME_TCP_C2H_DATA_FLAGS_LAST_PDU) != 0)) {
|
|
nvmf_tcp_report_error(qp->qp.nq_association, qp,
|
|
NVME_TCP_TERM_REQ_FES_PDU_SEQUENCE_ERROR, 0, pdu->hdr,
|
|
le32toh(pdu->hdr->plen), pdu->hdr->hlen);
|
|
nvmf_tcp_free_pdu(pdu);
|
|
return (EBADMSG);
|
|
}
|
|
|
|
cb->data_xfered += data_len;
|
|
data_offset -= cb->data_offset;
|
|
icd = (const char *)pdu->hdr + pdu->hdr->pdo;
|
|
memcpy((char *)cb->data + data_offset, icd, data_len);
|
|
|
|
if ((pdu->hdr->flags & NVME_TCP_C2H_DATA_FLAGS_SUCCESS) != 0) {
|
|
struct nvme_completion cqe;
|
|
struct nvmf_tcp_capsule *tc;
|
|
struct nvmf_capsule *nc;
|
|
|
|
memset(&cqe, 0, sizeof(cqe));
|
|
cqe.cid = cb->cid;
|
|
|
|
nc = nvmf_allocate_response(&qp->qp, &cqe);
|
|
if (nc == NULL) {
|
|
nvmf_tcp_free_pdu(pdu);
|
|
return (ENOMEM);
|
|
}
|
|
nc->nc_sqhd_valid = false;
|
|
|
|
tc = TCAP(nc);
|
|
TAILQ_INSERT_TAIL(&qp->rx_capsules, tc, link);
|
|
}
|
|
|
|
nvmf_tcp_free_pdu(pdu);
|
|
return (0);
|
|
}
|
|
|
|
/* NB: cid and ttag and little-endian already. */
|
|
static int
|
|
tcp_send_h2c_pdu(struct nvmf_tcp_qpair *qp, uint16_t cid, uint16_t ttag,
|
|
uint32_t data_offset, void *buf, size_t len, bool last_pdu)
|
|
{
|
|
struct nvme_tcp_h2c_data_hdr h2c;
|
|
|
|
memset(&h2c, 0, sizeof(h2c));
|
|
h2c.common.pdu_type = NVME_TCP_PDU_TYPE_H2C_DATA;
|
|
if (last_pdu)
|
|
h2c.common.flags |= NVME_TCP_H2C_DATA_FLAGS_LAST_PDU;
|
|
h2c.cccid = cid;
|
|
h2c.ttag = ttag;
|
|
h2c.datao = htole32(data_offset);
|
|
h2c.datal = htole32(len);
|
|
|
|
return (nvmf_tcp_construct_pdu(qp, &h2c, sizeof(h2c), buf, len));
|
|
}
|
|
|
|
/* Sends one or more H2C_DATA PDUs, subject to MAXH2CDATA. */
|
|
static int
|
|
tcp_send_h2c_pdus(struct nvmf_tcp_qpair *qp, uint16_t cid, uint16_t ttag,
|
|
uint32_t data_offset, void *buf, size_t len, bool last_pdu)
|
|
{
|
|
char *p;
|
|
|
|
p = buf;
|
|
while (len != 0) {
|
|
size_t todo;
|
|
int error;
|
|
|
|
todo = len;
|
|
if (todo > qp->maxh2cdata)
|
|
todo = qp->maxh2cdata;
|
|
error = tcp_send_h2c_pdu(qp, cid, ttag, data_offset, p, todo,
|
|
last_pdu && todo == len);
|
|
if (error != 0)
|
|
return (error);
|
|
p += todo;
|
|
len -= todo;
|
|
}
|
|
return (0);
|
|
}
|
|
|
|
static int
|
|
nvmf_tcp_handle_r2t(struct nvmf_tcp_qpair *qp, struct nvmf_tcp_rxpdu *pdu)
|
|
{
|
|
struct nvmf_tcp_command_buffer *cb;
|
|
struct nvme_tcp_r2t_hdr *r2t;
|
|
uint32_t data_len, data_offset;
|
|
int error;
|
|
|
|
r2t = (void *)pdu->hdr;
|
|
|
|
cb = tcp_find_command_buffer(qp, r2t->cccid, 0, false);
|
|
if (cb == NULL) {
|
|
nvmf_tcp_report_error(qp->qp.nq_association, qp,
|
|
NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD,
|
|
offsetof(struct nvme_tcp_r2t_hdr, cccid), pdu->hdr,
|
|
le32toh(pdu->hdr->plen), pdu->hdr->hlen);
|
|
nvmf_tcp_free_pdu(pdu);
|
|
return (EBADMSG);
|
|
}
|
|
|
|
data_offset = le32toh(r2t->r2to);
|
|
if (data_offset != cb->data_xfered) {
|
|
nvmf_tcp_report_error(qp->qp.nq_association, qp,
|
|
NVME_TCP_TERM_REQ_FES_PDU_SEQUENCE_ERROR, 0, pdu->hdr,
|
|
le32toh(pdu->hdr->plen), pdu->hdr->hlen);
|
|
nvmf_tcp_free_pdu(pdu);
|
|
return (EBADMSG);
|
|
}
|
|
|
|
/*
|
|
* XXX: The spec does not specify how to handle R2T tranfers
|
|
* out of range of the original command.
|
|
*/
|
|
data_len = le32toh(r2t->r2tl);
|
|
if (data_offset + data_len > cb->data_len) {
|
|
nvmf_tcp_report_error(qp->qp.nq_association, qp,
|
|
NVME_TCP_TERM_REQ_FES_DATA_TRANSFER_OUT_OF_RANGE, 0,
|
|
pdu->hdr, le32toh(pdu->hdr->plen), pdu->hdr->hlen);
|
|
nvmf_tcp_free_pdu(pdu);
|
|
return (EBADMSG);
|
|
}
|
|
|
|
cb->data_xfered += data_len;
|
|
|
|
/*
|
|
* Write out one or more H2C_DATA PDUs containing the
|
|
* requested data.
|
|
*/
|
|
error = tcp_send_h2c_pdus(qp, r2t->cccid, r2t->ttag,
|
|
data_offset, (char *)cb->data + data_offset, data_len, true);
|
|
|
|
nvmf_tcp_free_pdu(pdu);
|
|
return (error);
|
|
}
|
|
|
|
static int
|
|
nvmf_tcp_receive_pdu(struct nvmf_tcp_qpair *qp)
|
|
{
|
|
struct nvmf_tcp_rxpdu pdu;
|
|
int error;
|
|
|
|
error = nvmf_tcp_read_pdu(qp, &pdu);
|
|
if (error != 0)
|
|
return (error);
|
|
|
|
switch (pdu.hdr->pdu_type) {
|
|
default:
|
|
__unreachable();
|
|
break;
|
|
case NVME_TCP_PDU_TYPE_H2C_TERM_REQ:
|
|
case NVME_TCP_PDU_TYPE_C2H_TERM_REQ:
|
|
return (nvmf_tcp_handle_term_req(&pdu));
|
|
case NVME_TCP_PDU_TYPE_CAPSULE_CMD:
|
|
return (nvmf_tcp_save_command_capsule(qp, &pdu));
|
|
case NVME_TCP_PDU_TYPE_CAPSULE_RESP:
|
|
return (nvmf_tcp_save_response_capsule(qp, &pdu));
|
|
case NVME_TCP_PDU_TYPE_H2C_DATA:
|
|
return (nvmf_tcp_handle_h2c_data(qp, &pdu));
|
|
case NVME_TCP_PDU_TYPE_C2H_DATA:
|
|
return (nvmf_tcp_handle_c2h_data(qp, &pdu));
|
|
case NVME_TCP_PDU_TYPE_R2T:
|
|
return (nvmf_tcp_handle_r2t(qp, &pdu));
|
|
}
|
|
}
|
|
|
|
static bool
|
|
nvmf_tcp_validate_ic_pdu(struct nvmf_association *na, struct nvmf_tcp_qpair *qp,
|
|
const struct nvme_tcp_common_pdu_hdr *ch, size_t pdu_len)
|
|
{
|
|
const struct nvme_tcp_ic_req *pdu;
|
|
uint32_t plen;
|
|
u_int hlen;
|
|
|
|
/* Determine how large of a PDU header to return for errors. */
|
|
hlen = ch->hlen;
|
|
plen = le32toh(ch->plen);
|
|
if (hlen < sizeof(*ch) || hlen > plen)
|
|
hlen = sizeof(*ch);
|
|
|
|
/*
|
|
* Errors must be reported for the lowest incorrect field
|
|
* first, so validate fields in order.
|
|
*/
|
|
|
|
/* Validate pdu_type. */
|
|
|
|
/* Controllers only receive PDUs with a PDU direction of 0. */
|
|
if (na->na_controller != ((ch->pdu_type & 0x01) == 0)) {
|
|
na_error(na, "NVMe/TCP: Invalid PDU type %u", ch->pdu_type);
|
|
nvmf_tcp_report_error(na, qp,
|
|
NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD, 0, ch, pdu_len,
|
|
hlen);
|
|
return (false);
|
|
}
|
|
|
|
switch (ch->pdu_type) {
|
|
case NVME_TCP_PDU_TYPE_IC_REQ:
|
|
case NVME_TCP_PDU_TYPE_IC_RESP:
|
|
break;
|
|
default:
|
|
na_error(na, "NVMe/TCP: Invalid PDU type %u", ch->pdu_type);
|
|
nvmf_tcp_report_error(na, qp,
|
|
NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD, 0, ch, pdu_len,
|
|
hlen);
|
|
return (false);
|
|
}
|
|
|
|
/* Validate flags. */
|
|
if (ch->flags != 0) {
|
|
na_error(na, "NVMe/TCP: Invalid PDU header flags %#x",
|
|
ch->flags);
|
|
nvmf_tcp_report_error(na, qp,
|
|
NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD, 1, ch, pdu_len,
|
|
hlen);
|
|
return (false);
|
|
}
|
|
|
|
/* Validate hlen. */
|
|
if (ch->hlen != 128) {
|
|
na_error(na, "NVMe/TCP: Invalid PDU header length %u",
|
|
ch->hlen);
|
|
nvmf_tcp_report_error(na, qp,
|
|
NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD, 2, ch, pdu_len,
|
|
hlen);
|
|
return (false);
|
|
}
|
|
|
|
/* Validate pdo. */
|
|
if (ch->pdo != 0) {
|
|
na_error(na, "NVMe/TCP: Invalid PDU data offset %u", ch->pdo);
|
|
nvmf_tcp_report_error(na, qp,
|
|
NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD, 3, ch, pdu_len,
|
|
hlen);
|
|
return (false);
|
|
}
|
|
|
|
/* Validate plen. */
|
|
if (plen != 128) {
|
|
na_error(na, "NVMe/TCP: Invalid PDU length %u", plen);
|
|
nvmf_tcp_report_error(na, qp,
|
|
NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD, 4, ch, pdu_len,
|
|
hlen);
|
|
return (false);
|
|
}
|
|
|
|
/* Validate fields common to both ICReq and ICResp. */
|
|
pdu = (const struct nvme_tcp_ic_req *)ch;
|
|
if (le16toh(pdu->pfv) != 0) {
|
|
na_error(na, "NVMe/TCP: Unsupported PDU version %u",
|
|
le16toh(pdu->pfv));
|
|
nvmf_tcp_report_error(na, qp,
|
|
NVME_TCP_TERM_REQ_FES_INVALID_DATA_UNSUPPORTED_PARAMETER,
|
|
8, ch, pdu_len, hlen);
|
|
return (false);
|
|
}
|
|
|
|
if (pdu->hpda > NVME_TCP_HPDA_MAX) {
|
|
na_error(na, "NVMe/TCP: Unsupported PDA %u", pdu->hpda);
|
|
nvmf_tcp_report_error(na, qp,
|
|
NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD, 10, ch, pdu_len,
|
|
hlen);
|
|
return (false);
|
|
}
|
|
|
|
if (pdu->dgst.bits.reserved != 0) {
|
|
na_error(na, "NVMe/TCP: Invalid digest settings");
|
|
nvmf_tcp_report_error(na, qp,
|
|
NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD, 11, ch, pdu_len,
|
|
hlen);
|
|
return (false);
|
|
}
|
|
|
|
return (true);
|
|
}
|
|
|
|
static bool
|
|
nvmf_tcp_read_ic_req(struct nvmf_association *na, struct nvmf_tcp_qpair *qp,
|
|
struct nvme_tcp_ic_req *pdu)
|
|
{
|
|
int error;
|
|
|
|
error = nvmf_tcp_read_buffer(qp->s, pdu, sizeof(*pdu));
|
|
if (error != 0) {
|
|
na_error(na, "NVMe/TCP: Failed to read IC request: %s",
|
|
strerror(error));
|
|
return (false);
|
|
}
|
|
|
|
return (nvmf_tcp_validate_ic_pdu(na, qp, &pdu->common, sizeof(*pdu)));
|
|
}
|
|
|
|
static bool
|
|
nvmf_tcp_read_ic_resp(struct nvmf_association *na, struct nvmf_tcp_qpair *qp,
|
|
struct nvme_tcp_ic_resp *pdu)
|
|
{
|
|
int error;
|
|
|
|
error = nvmf_tcp_read_buffer(qp->s, pdu, sizeof(*pdu));
|
|
if (error != 0) {
|
|
na_error(na, "NVMe/TCP: Failed to read IC response: %s",
|
|
strerror(error));
|
|
return (false);
|
|
}
|
|
|
|
return (nvmf_tcp_validate_ic_pdu(na, qp, &pdu->common, sizeof(*pdu)));
|
|
}
|
|
|
|
static struct nvmf_association *
|
|
tcp_allocate_association(bool controller,
|
|
const struct nvmf_association_params *params)
|
|
{
|
|
struct nvmf_tcp_association *ta;
|
|
|
|
if (controller) {
|
|
/* 7.4.10.3 */
|
|
if (params->tcp.maxh2cdata < 4096 ||
|
|
params->tcp.maxh2cdata % 4 != 0)
|
|
return (NULL);
|
|
}
|
|
|
|
ta = calloc(1, sizeof(*ta));
|
|
|
|
return (&ta->na);
|
|
}
|
|
|
|
static void
|
|
tcp_update_association(struct nvmf_association *na,
|
|
const struct nvme_controller_data *cdata)
|
|
{
|
|
struct nvmf_tcp_association *ta = TASSOC(na);
|
|
|
|
ta->ioccsz = le32toh(cdata->ioccsz);
|
|
}
|
|
|
|
static void
|
|
tcp_free_association(struct nvmf_association *na)
|
|
{
|
|
free(na);
|
|
}
|
|
|
|
static bool
|
|
tcp_connect(struct nvmf_tcp_qpair *qp, struct nvmf_association *na, bool admin)
|
|
{
|
|
const struct nvmf_association_params *params = &na->na_params;
|
|
struct nvmf_tcp_association *ta = TASSOC(na);
|
|
struct nvme_tcp_ic_req ic_req;
|
|
struct nvme_tcp_ic_resp ic_resp;
|
|
uint32_t maxh2cdata;
|
|
int error;
|
|
|
|
if (!admin) {
|
|
if (ta->ioccsz == 0) {
|
|
na_error(na, "TCP I/O queues require cdata");
|
|
return (false);
|
|
}
|
|
if (ta->ioccsz < 4) {
|
|
na_error(na, "Invalid IOCCSZ %u", ta->ioccsz);
|
|
return (false);
|
|
}
|
|
}
|
|
|
|
memset(&ic_req, 0, sizeof(ic_req));
|
|
ic_req.common.pdu_type = NVME_TCP_PDU_TYPE_IC_REQ;
|
|
ic_req.common.hlen = sizeof(ic_req);
|
|
ic_req.common.plen = htole32(sizeof(ic_req));
|
|
ic_req.pfv = htole16(0);
|
|
ic_req.hpda = params->tcp.pda;
|
|
if (params->tcp.header_digests)
|
|
ic_req.dgst.bits.hdgst_enable = 1;
|
|
if (params->tcp.data_digests)
|
|
ic_req.dgst.bits.ddgst_enable = 1;
|
|
ic_req.maxr2t = htole32(params->tcp.maxr2t);
|
|
|
|
error = nvmf_tcp_write_pdu(qp, &ic_req, sizeof(ic_req));
|
|
if (error != 0) {
|
|
na_error(na, "Failed to write IC request: %s", strerror(error));
|
|
return (false);
|
|
}
|
|
|
|
if (!nvmf_tcp_read_ic_resp(na, qp, &ic_resp))
|
|
return (false);
|
|
|
|
/* Ensure the controller didn't enable digests we didn't request. */
|
|
if ((!params->tcp.header_digests &&
|
|
ic_resp.dgst.bits.hdgst_enable != 0) ||
|
|
(!params->tcp.data_digests &&
|
|
ic_resp.dgst.bits.ddgst_enable != 0)) {
|
|
na_error(na, "Controller enabled unrequested digests");
|
|
nvmf_tcp_report_error(na, qp,
|
|
NVME_TCP_TERM_REQ_FES_INVALID_DATA_UNSUPPORTED_PARAMETER,
|
|
11, &ic_resp, sizeof(ic_resp), sizeof(ic_resp));
|
|
return (false);
|
|
}
|
|
|
|
/*
|
|
* XXX: Is there an upper-bound to enforce here? Perhaps pick
|
|
* some large value and report larger values as an unsupported
|
|
* parameter?
|
|
*/
|
|
maxh2cdata = le32toh(ic_resp.maxh2cdata);
|
|
if (maxh2cdata < 4096 || maxh2cdata % 4 != 0) {
|
|
na_error(na, "Invalid MAXH2CDATA %u", maxh2cdata);
|
|
nvmf_tcp_report_error(na, qp,
|
|
NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD, 12, &ic_resp,
|
|
sizeof(ic_resp), sizeof(ic_resp));
|
|
return (false);
|
|
}
|
|
|
|
qp->rxpda = (params->tcp.pda + 1) * 4;
|
|
qp->txpda = (ic_resp.cpda + 1) * 4;
|
|
qp->header_digests = ic_resp.dgst.bits.hdgst_enable != 0;
|
|
qp->data_digests = ic_resp.dgst.bits.ddgst_enable != 0;
|
|
qp->maxr2t = params->tcp.maxr2t;
|
|
qp->maxh2cdata = maxh2cdata;
|
|
if (admin)
|
|
/* 7.4.3 */
|
|
qp->max_icd = 8192;
|
|
else
|
|
qp->max_icd = (ta->ioccsz - 4) * 16;
|
|
|
|
return (0);
|
|
}
|
|
|
|
static bool
|
|
tcp_accept(struct nvmf_tcp_qpair *qp, struct nvmf_association *na)
|
|
{
|
|
const struct nvmf_association_params *params = &na->na_params;
|
|
struct nvme_tcp_ic_req ic_req;
|
|
struct nvme_tcp_ic_resp ic_resp;
|
|
int error;
|
|
|
|
if (!nvmf_tcp_read_ic_req(na, qp, &ic_req))
|
|
return (false);
|
|
|
|
memset(&ic_resp, 0, sizeof(ic_resp));
|
|
ic_resp.common.pdu_type = NVME_TCP_PDU_TYPE_IC_RESP;
|
|
ic_resp.common.hlen = sizeof(ic_req);
|
|
ic_resp.common.plen = htole32(sizeof(ic_req));
|
|
ic_resp.pfv = htole16(0);
|
|
ic_resp.cpda = params->tcp.pda;
|
|
if (params->tcp.header_digests && ic_req.dgst.bits.hdgst_enable != 0)
|
|
ic_resp.dgst.bits.hdgst_enable = 1;
|
|
if (params->tcp.data_digests && ic_req.dgst.bits.ddgst_enable != 0)
|
|
ic_resp.dgst.bits.ddgst_enable = 1;
|
|
ic_resp.maxh2cdata = htole32(params->tcp.maxh2cdata);
|
|
|
|
error = nvmf_tcp_write_pdu(qp, &ic_resp, sizeof(ic_resp));
|
|
if (error != 0) {
|
|
na_error(na, "Failed to write IC response: %s",
|
|
strerror(error));
|
|
return (false);
|
|
}
|
|
|
|
qp->rxpda = (params->tcp.pda + 1) * 4;
|
|
qp->txpda = (ic_req.hpda + 1) * 4;
|
|
qp->header_digests = ic_resp.dgst.bits.hdgst_enable != 0;
|
|
qp->data_digests = ic_resp.dgst.bits.ddgst_enable != 0;
|
|
qp->maxr2t = le32toh(ic_req.maxr2t);
|
|
qp->maxh2cdata = params->tcp.maxh2cdata;
|
|
qp->max_icd = 0; /* XXX */
|
|
return (0);
|
|
}
|
|
|
|
static struct nvmf_qpair *
|
|
tcp_allocate_qpair(struct nvmf_association *na,
|
|
const struct nvmf_qpair_params *qparams)
|
|
{
|
|
const struct nvmf_association_params *aparams = &na->na_params;
|
|
struct nvmf_tcp_qpair *qp;
|
|
int error;
|
|
|
|
if (aparams->tcp.pda > NVME_TCP_CPDA_MAX) {
|
|
na_error(na, "Invalid PDA");
|
|
return (NULL);
|
|
}
|
|
|
|
qp = calloc(1, sizeof(*qp));
|
|
qp->s = qparams->tcp.fd;
|
|
LIST_INIT(&qp->rx_buffers);
|
|
LIST_INIT(&qp->tx_buffers);
|
|
TAILQ_INIT(&qp->rx_capsules);
|
|
if (na->na_controller)
|
|
error = tcp_accept(qp, na);
|
|
else
|
|
error = tcp_connect(qp, na, qparams->admin);
|
|
if (error != 0) {
|
|
free(qp);
|
|
return (NULL);
|
|
}
|
|
|
|
return (&qp->qp);
|
|
}
|
|
|
|
static void
|
|
tcp_free_qpair(struct nvmf_qpair *nq)
|
|
{
|
|
struct nvmf_tcp_qpair *qp = TQP(nq);
|
|
struct nvmf_tcp_capsule *ntc, *tc;
|
|
struct nvmf_tcp_command_buffer *ncb, *cb;
|
|
|
|
TAILQ_FOREACH_SAFE(tc, &qp->rx_capsules, link, ntc) {
|
|
TAILQ_REMOVE(&qp->rx_capsules, tc, link);
|
|
nvmf_free_capsule(&tc->nc);
|
|
}
|
|
LIST_FOREACH_SAFE(cb, &qp->rx_buffers, link, ncb) {
|
|
tcp_free_command_buffer(cb);
|
|
}
|
|
LIST_FOREACH_SAFE(cb, &qp->tx_buffers, link, ncb) {
|
|
tcp_free_command_buffer(cb);
|
|
}
|
|
free(qp);
|
|
}
|
|
|
|
static void
|
|
tcp_kernel_handoff_params(struct nvmf_qpair *nq, nvlist_t *nvl)
|
|
{
|
|
struct nvmf_tcp_qpair *qp = TQP(nq);
|
|
|
|
nvlist_add_number(nvl, "fd", qp->s);
|
|
nvlist_add_number(nvl, "rxpda", qp->rxpda);
|
|
nvlist_add_number(nvl, "txpda", qp->txpda);
|
|
nvlist_add_bool(nvl, "header_digests", qp->header_digests);
|
|
nvlist_add_bool(nvl, "data_digests", qp->data_digests);
|
|
nvlist_add_number(nvl, "maxr2t", qp->maxr2t);
|
|
nvlist_add_number(nvl, "maxh2cdata", qp->maxh2cdata);
|
|
nvlist_add_number(nvl, "max_icd", qp->max_icd);
|
|
}
|
|
|
|
static struct nvmf_capsule *
|
|
tcp_allocate_capsule(struct nvmf_qpair *qp __unused)
|
|
{
|
|
struct nvmf_tcp_capsule *nc;
|
|
|
|
nc = calloc(1, sizeof(*nc));
|
|
return (&nc->nc);
|
|
}
|
|
|
|
static void
|
|
tcp_free_capsule(struct nvmf_capsule *nc)
|
|
{
|
|
struct nvmf_tcp_capsule *tc = TCAP(nc);
|
|
|
|
nvmf_tcp_free_pdu(&tc->rx_pdu);
|
|
if (tc->cb != NULL)
|
|
tcp_free_command_buffer(tc->cb);
|
|
free(tc);
|
|
}
|
|
|
|
static int
|
|
tcp_transmit_command(struct nvmf_capsule *nc)
|
|
{
|
|
struct nvmf_tcp_qpair *qp = TQP(nc->nc_qpair);
|
|
struct nvmf_tcp_capsule *tc = TCAP(nc);
|
|
struct nvme_tcp_cmd cmd;
|
|
struct nvme_sgl_descriptor *sgl;
|
|
int error;
|
|
bool use_icd;
|
|
|
|
use_icd = false;
|
|
if (nc->nc_data_len != 0 && nc->nc_send_data &&
|
|
nc->nc_data_len <= qp->max_icd)
|
|
use_icd = true;
|
|
|
|
memset(&cmd, 0, sizeof(cmd));
|
|
cmd.common.pdu_type = NVME_TCP_PDU_TYPE_CAPSULE_CMD;
|
|
cmd.ccsqe = nc->nc_sqe;
|
|
|
|
/* Populate SGL in SQE. */
|
|
sgl = &cmd.ccsqe.sgl;
|
|
memset(sgl, 0, sizeof(*sgl));
|
|
sgl->address = 0;
|
|
sgl->length = htole32(nc->nc_data_len);
|
|
if (use_icd) {
|
|
/* Use in-capsule data. */
|
|
sgl->type = NVME_SGL_TYPE_ICD;
|
|
} else {
|
|
/* Use a command buffer. */
|
|
sgl->type = NVME_SGL_TYPE_COMMAND_BUFFER;
|
|
}
|
|
|
|
/* Send command capsule. */
|
|
error = nvmf_tcp_construct_pdu(qp, &cmd, sizeof(cmd), use_icd ?
|
|
nc->nc_data : NULL, use_icd ? nc->nc_data_len : 0);
|
|
if (error != 0)
|
|
return (error);
|
|
|
|
/*
|
|
* If data will be transferred using a command buffer, allocate a
|
|
* buffer structure and queue it.
|
|
*/
|
|
if (nc->nc_data_len != 0 && !use_icd)
|
|
tc->cb = tcp_alloc_command_buffer(qp, nc->nc_data, 0,
|
|
nc->nc_data_len, cmd.ccsqe.cid, 0, !nc->nc_send_data);
|
|
|
|
return (0);
|
|
}
|
|
|
|
static int
|
|
tcp_transmit_response(struct nvmf_capsule *nc)
|
|
{
|
|
struct nvmf_tcp_qpair *qp = TQP(nc->nc_qpair);
|
|
struct nvme_tcp_rsp rsp;
|
|
|
|
memset(&rsp, 0, sizeof(rsp));
|
|
rsp.common.pdu_type = NVME_TCP_PDU_TYPE_CAPSULE_RESP;
|
|
rsp.rccqe = nc->nc_cqe;
|
|
|
|
return (nvmf_tcp_construct_pdu(qp, &rsp, sizeof(rsp), NULL, 0));
|
|
}
|
|
|
|
static int
|
|
tcp_transmit_capsule(struct nvmf_capsule *nc)
|
|
{
|
|
if (nc->nc_qe_len == sizeof(struct nvme_command))
|
|
return (tcp_transmit_command(nc));
|
|
else
|
|
return (tcp_transmit_response(nc));
|
|
}
|
|
|
|
static int
|
|
tcp_receive_capsule(struct nvmf_qpair *nq, struct nvmf_capsule **ncp)
|
|
{
|
|
struct nvmf_tcp_qpair *qp = TQP(nq);
|
|
struct nvmf_tcp_capsule *tc;
|
|
int error;
|
|
|
|
while (TAILQ_EMPTY(&qp->rx_capsules)) {
|
|
error = nvmf_tcp_receive_pdu(qp);
|
|
if (error != 0)
|
|
return (error);
|
|
}
|
|
tc = TAILQ_FIRST(&qp->rx_capsules);
|
|
TAILQ_REMOVE(&qp->rx_capsules, tc, link);
|
|
*ncp = &tc->nc;
|
|
return (0);
|
|
}
|
|
|
|
static uint8_t
|
|
tcp_validate_command_capsule(const struct nvmf_capsule *nc)
|
|
{
|
|
const struct nvmf_tcp_capsule *tc = CTCAP(nc);
|
|
const struct nvme_sgl_descriptor *sgl;
|
|
|
|
assert(tc->rx_pdu.hdr != NULL);
|
|
|
|
sgl = &nc->nc_sqe.sgl;
|
|
switch (sgl->type) {
|
|
case NVME_SGL_TYPE_ICD:
|
|
if (tc->rx_pdu.data_len != le32toh(sgl->length)) {
|
|
printf("NVMe/TCP: Command Capsule with mismatched ICD length\n");
|
|
return (NVME_SC_DATA_SGL_LENGTH_INVALID);
|
|
}
|
|
break;
|
|
case NVME_SGL_TYPE_COMMAND_BUFFER:
|
|
if (tc->rx_pdu.data_len != 0) {
|
|
printf("NVMe/TCP: Command Buffer SGL with ICD\n");
|
|
return (NVME_SC_INVALID_FIELD);
|
|
}
|
|
break;
|
|
default:
|
|
printf("NVMe/TCP: Invalid SGL type in Command Capsule\n");
|
|
return (NVME_SC_SGL_DESCRIPTOR_TYPE_INVALID);
|
|
}
|
|
|
|
if (sgl->address != 0) {
|
|
printf("NVMe/TCP: Invalid SGL offset in Command Capsule\n");
|
|
return (NVME_SC_SGL_OFFSET_INVALID);
|
|
}
|
|
|
|
return (NVME_SC_SUCCESS);
|
|
}
|
|
|
|
static size_t
|
|
tcp_capsule_data_len(const struct nvmf_capsule *nc)
|
|
{
|
|
assert(nc->nc_qe_len == sizeof(struct nvme_command));
|
|
return (le32toh(nc->nc_sqe.sgl.length));
|
|
}
|
|
|
|
/* NB: cid and ttag are both little-endian already. */
|
|
static int
|
|
tcp_send_r2t(struct nvmf_tcp_qpair *qp, uint16_t cid, uint16_t ttag,
|
|
uint32_t data_offset, uint32_t data_len)
|
|
{
|
|
struct nvme_tcp_r2t_hdr r2t;
|
|
|
|
memset(&r2t, 0, sizeof(r2t));
|
|
r2t.common.pdu_type = NVME_TCP_PDU_TYPE_R2T;
|
|
r2t.cccid = cid;
|
|
r2t.ttag = ttag;
|
|
r2t.r2to = htole32(data_offset);
|
|
r2t.r2tl = htole32(data_len);
|
|
|
|
return (nvmf_tcp_construct_pdu(qp, &r2t, sizeof(r2t), NULL, 0));
|
|
}
|
|
|
|
static int
|
|
tcp_receive_r2t_data(const struct nvmf_capsule *nc, uint32_t data_offset,
|
|
void *buf, size_t len)
|
|
{
|
|
struct nvmf_tcp_qpair *qp = TQP(nc->nc_qpair);
|
|
struct nvmf_tcp_command_buffer *cb;
|
|
int error;
|
|
uint16_t ttag;
|
|
|
|
/*
|
|
* Don't bother byte-swapping ttag as it is just a cookie
|
|
* value returned by the other end as-is.
|
|
*/
|
|
ttag = qp->next_ttag++;
|
|
|
|
error = tcp_send_r2t(qp, nc->nc_sqe.cid, ttag, data_offset, len);
|
|
if (error != 0)
|
|
return (error);
|
|
|
|
cb = tcp_alloc_command_buffer(qp, buf, data_offset, len,
|
|
nc->nc_sqe.cid, ttag, true);
|
|
|
|
/* Parse received PDUs until the data transfer is complete. */
|
|
while (cb->data_xfered < cb->data_len) {
|
|
error = nvmf_tcp_receive_pdu(qp);
|
|
if (error != 0)
|
|
break;
|
|
}
|
|
tcp_free_command_buffer(cb);
|
|
return (error);
|
|
}
|
|
|
|
static int
|
|
tcp_receive_icd_data(const struct nvmf_capsule *nc, uint32_t data_offset,
|
|
void *buf, size_t len)
|
|
{
|
|
const struct nvmf_tcp_capsule *tc = CTCAP(nc);
|
|
const char *icd;
|
|
|
|
icd = (const char *)tc->rx_pdu.hdr + tc->rx_pdu.hdr->pdo + data_offset;
|
|
memcpy(buf, icd, len);
|
|
return (0);
|
|
}
|
|
|
|
static int
|
|
tcp_receive_controller_data(const struct nvmf_capsule *nc, uint32_t data_offset,
|
|
void *buf, size_t len)
|
|
{
|
|
struct nvmf_association *na = nc->nc_qpair->nq_association;
|
|
const struct nvme_sgl_descriptor *sgl;
|
|
size_t data_len;
|
|
|
|
if (nc->nc_qe_len != sizeof(struct nvme_command) || !na->na_controller)
|
|
return (EINVAL);
|
|
|
|
sgl = &nc->nc_sqe.sgl;
|
|
data_len = le32toh(sgl->length);
|
|
if (data_offset + len > data_len)
|
|
return (EFBIG);
|
|
|
|
if (sgl->type == NVME_SGL_TYPE_ICD)
|
|
return (tcp_receive_icd_data(nc, data_offset, buf, len));
|
|
else
|
|
return (tcp_receive_r2t_data(nc, data_offset, buf, len));
|
|
}
|
|
|
|
/* NB: cid is little-endian already. */
|
|
static int
|
|
tcp_send_c2h_pdu(struct nvmf_tcp_qpair *qp, uint16_t cid,
|
|
uint32_t data_offset, const void *buf, size_t len, bool last_pdu,
|
|
bool success)
|
|
{
|
|
struct nvme_tcp_c2h_data_hdr c2h;
|
|
|
|
memset(&c2h, 0, sizeof(c2h));
|
|
c2h.common.pdu_type = NVME_TCP_PDU_TYPE_C2H_DATA;
|
|
if (last_pdu)
|
|
c2h.common.flags |= NVME_TCP_C2H_DATA_FLAGS_LAST_PDU;
|
|
if (success)
|
|
c2h.common.flags |= NVME_TCP_C2H_DATA_FLAGS_SUCCESS;
|
|
c2h.cccid = cid;
|
|
c2h.datao = htole32(data_offset);
|
|
c2h.datal = htole32(len);
|
|
|
|
return (nvmf_tcp_construct_pdu(qp, &c2h, sizeof(c2h),
|
|
__DECONST(void *, buf), len));
|
|
}
|
|
|
|
static int
|
|
tcp_send_controller_data(const struct nvmf_capsule *nc, const void *buf,
|
|
size_t len)
|
|
{
|
|
struct nvmf_association *na = nc->nc_qpair->nq_association;
|
|
struct nvmf_tcp_qpair *qp = TQP(nc->nc_qpair);
|
|
const struct nvme_sgl_descriptor *sgl;
|
|
const char *src;
|
|
size_t todo;
|
|
uint32_t data_len, data_offset;
|
|
int error;
|
|
bool last_pdu, send_success_flag;
|
|
|
|
if (nc->nc_qe_len != sizeof(struct nvme_command) || !na->na_controller)
|
|
return (EINVAL);
|
|
|
|
sgl = &nc->nc_sqe.sgl;
|
|
data_len = le32toh(sgl->length);
|
|
if (len != data_len) {
|
|
nvmf_send_generic_error(nc, NVME_SC_INVALID_FIELD);
|
|
return (EFBIG);
|
|
}
|
|
|
|
if (sgl->type != NVME_SGL_TYPE_COMMAND_BUFFER) {
|
|
nvmf_send_generic_error(nc, NVME_SC_INVALID_FIELD);
|
|
return (EINVAL);
|
|
}
|
|
|
|
/* Use the SUCCESS flag if SQ flow control is disabled. */
|
|
send_success_flag = !qp->qp.nq_flow_control;
|
|
|
|
/*
|
|
* Write out one or more C2H_DATA PDUs containing the data.
|
|
* Each PDU is arbitrarily capped at 256k.
|
|
*/
|
|
data_offset = 0;
|
|
src = buf;
|
|
while (len > 0) {
|
|
if (len > 256 * 1024) {
|
|
todo = 256 * 1024;
|
|
last_pdu = false;
|
|
} else {
|
|
todo = len;
|
|
last_pdu = true;
|
|
}
|
|
error = tcp_send_c2h_pdu(qp, nc->nc_sqe.cid, data_offset,
|
|
src, todo, last_pdu, last_pdu && send_success_flag);
|
|
if (error != 0) {
|
|
nvmf_send_generic_error(nc,
|
|
NVME_SC_TRANSIENT_TRANSPORT_ERROR);
|
|
return (error);
|
|
}
|
|
data_offset += todo;
|
|
src += todo;
|
|
len -= todo;
|
|
}
|
|
if (!send_success_flag)
|
|
nvmf_send_success(nc);
|
|
return (0);
|
|
}
|
|
|
|
struct nvmf_transport_ops tcp_ops = {
|
|
.allocate_association = tcp_allocate_association,
|
|
.update_association = tcp_update_association,
|
|
.free_association = tcp_free_association,
|
|
.allocate_qpair = tcp_allocate_qpair,
|
|
.free_qpair = tcp_free_qpair,
|
|
.kernel_handoff_params = tcp_kernel_handoff_params,
|
|
.allocate_capsule = tcp_allocate_capsule,
|
|
.free_capsule = tcp_free_capsule,
|
|
.transmit_capsule = tcp_transmit_capsule,
|
|
.receive_capsule = tcp_receive_capsule,
|
|
.validate_command_capsule = tcp_validate_command_capsule,
|
|
.capsule_data_len = tcp_capsule_data_len,
|
|
.receive_controller_data = tcp_receive_controller_data,
|
|
.send_controller_data = tcp_send_controller_data,
|
|
};
|