opnsense-src/sys/dev/nvmf/controller/nvmft_qpair.c
John Baldwin a15f7c96a2 nvmft: The in-kernel NVMe over Fabrics controller
This is the server (target in SCSI terms) for NVMe over Fabrics.
Userland is responsible for accepting a new queue pair and receiving
the initial Connect command before handing the queue pair off via an
ioctl to this CTL frontend.

This frontend exposes CTL LUNs as NVMe namespaces to remote hosts.
Users can ask LUNS to CTL that can be shared via either iSCSI or
NVMeoF.

Reviewed by:	imp
Sponsored by:	Chelsio Communications
Differential Revision:	https://reviews.freebsd.org/D44726
2024-05-02 16:38:30 -07:00

361 lines
8.3 KiB
C

/*-
* SPDX-License-Identifier: BSD-2-Clause
*
* Copyright (c) 2023-2024 Chelsio Communications, Inc.
* Written by: John Baldwin <jhb@FreeBSD.org>
*/
#include <sys/types.h>
#include <sys/_bitset.h>
#include <sys/bitset.h>
#include <sys/lock.h>
#include <sys/mutex.h>
#include <dev/nvmf/nvmf_transport.h>
#include <dev/nvmf/controller/nvmft_var.h>
/*
* A bitmask of command ID values. This is used to detect duplicate
* commands with the same ID.
*/
#define NUM_CIDS (UINT16_MAX + 1)
BITSET_DEFINE(cidset, NUM_CIDS);
struct nvmft_qpair {
struct nvmft_controller *ctrlr;
struct nvmf_qpair *qp;
struct cidset *cids;
bool admin;
bool sq_flow_control;
uint16_t qid;
u_int qsize;
uint16_t sqhd;
uint16_t sqtail;
volatile u_int qp_refs; /* Internal references on 'qp'. */
struct mtx lock;
char name[16];
};
static int _nvmft_send_generic_error(struct nvmft_qpair *qp,
struct nvmf_capsule *nc, uint8_t sc_status);
static void
nvmft_qpair_error(void *arg, int error)
{
struct nvmft_qpair *qp = arg;
struct nvmft_controller *ctrlr = qp->ctrlr;
/*
* XXX: The Linux TCP initiator sends a RST immediately after
* the FIN, so treat ECONNRESET as plain EOF to avoid spurious
* errors on shutdown.
*/
if (error == ECONNRESET)
error = 0;
if (error != 0)
nvmft_printf(ctrlr, "error %d on %s\n", error, qp->name);
nvmft_controller_error(ctrlr, qp, error);
}
static void
nvmft_receive_capsule(void *arg, struct nvmf_capsule *nc)
{
struct nvmft_qpair *qp = arg;
struct nvmft_controller *ctrlr = qp->ctrlr;
const struct nvme_command *cmd;
uint8_t sc_status;
cmd = nvmf_capsule_sqe(nc);
if (ctrlr == NULL) {
printf("NVMFT: %s received CID %u opcode %u on newborn queue\n",
qp->name, le16toh(cmd->cid), cmd->opc);
nvmf_free_capsule(nc);
return;
}
sc_status = nvmf_validate_command_capsule(nc);
if (sc_status != NVME_SC_SUCCESS) {
_nvmft_send_generic_error(qp, nc, sc_status);
nvmf_free_capsule(nc);
return;
}
/* Don't bother byte-swapping CID. */
if (BIT_TEST_SET_ATOMIC(NUM_CIDS, cmd->cid, qp->cids)) {
_nvmft_send_generic_error(qp, nc, NVME_SC_COMMAND_ID_CONFLICT);
nvmf_free_capsule(nc);
return;
}
if (qp->admin)
nvmft_handle_admin_command(ctrlr, nc);
else
nvmft_handle_io_command(qp, qp->qid, nc);
}
struct nvmft_qpair *
nvmft_qpair_init(enum nvmf_trtype trtype,
const struct nvmf_handoff_qpair_params *handoff, uint16_t qid,
const char *name)
{
struct nvmft_qpair *qp;
qp = malloc(sizeof(*qp), M_NVMFT, M_WAITOK | M_ZERO);
qp->admin = handoff->admin;
qp->sq_flow_control = handoff->sq_flow_control;
qp->qsize = handoff->qsize;
qp->qid = qid;
qp->sqhd = handoff->sqhd;
qp->sqtail = handoff->sqtail;
strlcpy(qp->name, name, sizeof(qp->name));
mtx_init(&qp->lock, "nvmft qp", NULL, MTX_DEF);
qp->cids = BITSET_ALLOC(NUM_CIDS, M_NVMFT, M_WAITOK | M_ZERO);
qp->qp = nvmf_allocate_qpair(trtype, true, handoff, nvmft_qpair_error,
qp, nvmft_receive_capsule, qp);
if (qp->qp == NULL) {
mtx_destroy(&qp->lock);
free(qp->cids, M_NVMFT);
free(qp, M_NVMFT);
return (NULL);
}
refcount_init(&qp->qp_refs, 1);
return (qp);
}
void
nvmft_qpair_shutdown(struct nvmft_qpair *qp)
{
struct nvmf_qpair *nq;
mtx_lock(&qp->lock);
nq = qp->qp;
qp->qp = NULL;
mtx_unlock(&qp->lock);
if (nq != NULL && refcount_release(&qp->qp_refs))
nvmf_free_qpair(nq);
}
void
nvmft_qpair_destroy(struct nvmft_qpair *qp)
{
nvmft_qpair_shutdown(qp);
mtx_destroy(&qp->lock);
free(qp->cids, M_NVMFT);
free(qp, M_NVMFT);
}
struct nvmft_controller *
nvmft_qpair_ctrlr(struct nvmft_qpair *qp)
{
return (qp->ctrlr);
}
uint16_t
nvmft_qpair_id(struct nvmft_qpair *qp)
{
return (qp->qid);
}
const char *
nvmft_qpair_name(struct nvmft_qpair *qp)
{
return (qp->name);
}
static int
_nvmft_send_response(struct nvmft_qpair *qp, const void *cqe)
{
struct nvme_completion cpl;
struct nvmf_qpair *nq;
struct nvmf_capsule *rc;
int error;
memcpy(&cpl, cqe, sizeof(cpl));
mtx_lock(&qp->lock);
nq = qp->qp;
if (nq == NULL) {
mtx_unlock(&qp->lock);
return (ENOTCONN);
}
refcount_acquire(&qp->qp_refs);
/* Set SQHD. */
if (qp->sq_flow_control) {
qp->sqhd = (qp->sqhd + 1) % qp->qsize;
cpl.sqhd = htole16(qp->sqhd);
} else
cpl.sqhd = 0;
mtx_unlock(&qp->lock);
rc = nvmf_allocate_response(nq, &cpl, M_WAITOK);
error = nvmf_transmit_capsule(rc);
nvmf_free_capsule(rc);
if (refcount_release(&qp->qp_refs))
nvmf_free_qpair(nq);
return (error);
}
void
nvmft_command_completed(struct nvmft_qpair *qp, struct nvmf_capsule *nc)
{
const struct nvme_command *cmd = nvmf_capsule_sqe(nc);
/* Don't bother byte-swapping CID. */
KASSERT(BIT_ISSET(NUM_CIDS, cmd->cid, qp->cids),
("%s: CID %u not busy", __func__, cmd->cid));
BIT_CLR_ATOMIC(NUM_CIDS, cmd->cid, qp->cids);
}
int
nvmft_send_response(struct nvmft_qpair *qp, const void *cqe)
{
const struct nvme_completion *cpl = cqe;
/* Don't bother byte-swapping CID. */
KASSERT(BIT_ISSET(NUM_CIDS, cpl->cid, qp->cids),
("%s: CID %u not busy", __func__, cpl->cid));
BIT_CLR_ATOMIC(NUM_CIDS, cpl->cid, qp->cids);
return (_nvmft_send_response(qp, cqe));
}
void
nvmft_init_cqe(void *cqe, struct nvmf_capsule *nc, uint16_t status)
{
struct nvme_completion *cpl = cqe;
const struct nvme_command *cmd = nvmf_capsule_sqe(nc);
memset(cpl, 0, sizeof(*cpl));
cpl->cid = cmd->cid;
cpl->status = htole16(status);
}
int
nvmft_send_error(struct nvmft_qpair *qp, struct nvmf_capsule *nc,
uint8_t sc_type, uint8_t sc_status)
{
struct nvme_completion cpl;
uint16_t status;
status = NVMEF(NVME_STATUS_SCT, sc_type) |
NVMEF(NVME_STATUS_SC, sc_status);
nvmft_init_cqe(&cpl, nc, status);
return (nvmft_send_response(qp, &cpl));
}
int
nvmft_send_generic_error(struct nvmft_qpair *qp, struct nvmf_capsule *nc,
uint8_t sc_status)
{
return (nvmft_send_error(qp, nc, NVME_SCT_GENERIC, sc_status));
}
/*
* This version doesn't clear CID in qp->cids and is used for errors
* before the CID is validated.
*/
static int
_nvmft_send_generic_error(struct nvmft_qpair *qp, struct nvmf_capsule *nc,
uint8_t sc_status)
{
struct nvme_completion cpl;
uint16_t status;
status = NVMEF(NVME_STATUS_SCT, NVME_SCT_GENERIC) |
NVMEF(NVME_STATUS_SC, sc_status);
nvmft_init_cqe(&cpl, nc, status);
return (_nvmft_send_response(qp, &cpl));
}
int
nvmft_send_success(struct nvmft_qpair *qp, struct nvmf_capsule *nc)
{
return (nvmft_send_generic_error(qp, nc, NVME_SC_SUCCESS));
}
static void
nvmft_init_connect_rsp(struct nvmf_fabric_connect_rsp *rsp,
const struct nvmf_fabric_connect_cmd *cmd, uint16_t status)
{
memset(rsp, 0, sizeof(*rsp));
rsp->cid = cmd->cid;
rsp->status = htole16(status);
}
static int
nvmft_send_connect_response(struct nvmft_qpair *qp,
const struct nvmf_fabric_connect_rsp *rsp)
{
struct nvmf_capsule *rc;
struct nvmf_qpair *nq;
int error;
mtx_lock(&qp->lock);
nq = qp->qp;
if (nq == NULL) {
mtx_unlock(&qp->lock);
return (ENOTCONN);
}
refcount_acquire(&qp->qp_refs);
mtx_unlock(&qp->lock);
rc = nvmf_allocate_response(qp->qp, rsp, M_WAITOK);
error = nvmf_transmit_capsule(rc);
nvmf_free_capsule(rc);
if (refcount_release(&qp->qp_refs))
nvmf_free_qpair(nq);
return (error);
}
void
nvmft_connect_error(struct nvmft_qpair *qp,
const struct nvmf_fabric_connect_cmd *cmd, uint8_t sc_type,
uint8_t sc_status)
{
struct nvmf_fabric_connect_rsp rsp;
uint16_t status;
status = NVMEF(NVME_STATUS_SCT, sc_type) |
NVMEF(NVME_STATUS_SC, sc_status);
nvmft_init_connect_rsp(&rsp, cmd, status);
nvmft_send_connect_response(qp, &rsp);
}
void
nvmft_connect_invalid_parameters(struct nvmft_qpair *qp,
const struct nvmf_fabric_connect_cmd *cmd, bool data, uint16_t offset)
{
struct nvmf_fabric_connect_rsp rsp;
nvmft_init_connect_rsp(&rsp, cmd,
NVMEF(NVME_STATUS_SCT, NVME_SCT_COMMAND_SPECIFIC) |
NVMEF(NVME_STATUS_SC, NVMF_FABRIC_SC_INVALID_PARAM));
rsp.status_code_specific.invalid.ipo = htole16(offset);
rsp.status_code_specific.invalid.iattr = data ? 1 : 0;
nvmft_send_connect_response(qp, &rsp);
}
int
nvmft_finish_accept(struct nvmft_qpair *qp,
const struct nvmf_fabric_connect_cmd *cmd, struct nvmft_controller *ctrlr)
{
struct nvmf_fabric_connect_rsp rsp;
qp->ctrlr = ctrlr;
nvmft_init_connect_rsp(&rsp, cmd, 0);
if (qp->sq_flow_control)
rsp.sqhd = htole16(qp->sqhd);
else
rsp.sqhd = htole16(0xffff);
rsp.status_code_specific.success.cntlid = htole16(ctrlr->cntlid);
return (nvmft_send_connect_response(qp, &rsp));
}