opnsense-src/sys/dev/nvmf/controller/nvmft_controller.c
John Baldwin f5541f9f47 nvmfd/nvmft: Fix a typo "whiled" -> "while"
Sponsored by:	Chelsio Communications
2024-09-03 16:12:04 -04:00

1140 lines
29 KiB
C

/*-
* SPDX-License-Identifier: BSD-2-Clause
*
* Copyright (c) 2023-2024 Chelsio Communications, Inc.
* Written by: John Baldwin <jhb@FreeBSD.org>
*/
#include <sys/param.h>
#include <sys/callout.h>
#include <sys/kernel.h>
#include <sys/lock.h>
#include <sys/malloc.h>
#include <sys/mbuf.h>
#include <sys/memdesc.h>
#include <sys/mutex.h>
#include <sys/sbuf.h>
#include <sys/sx.h>
#include <sys/taskqueue.h>
#include <dev/nvmf/nvmf_transport.h>
#include <dev/nvmf/controller/nvmft_subr.h>
#include <dev/nvmf/controller/nvmft_var.h>
static void nvmft_controller_shutdown(void *arg, int pending);
static void nvmft_controller_terminate(void *arg, int pending);
int
nvmft_printf(struct nvmft_controller *ctrlr, const char *fmt, ...)
{
char buf[128];
struct sbuf sb;
va_list ap;
size_t retval;
sbuf_new(&sb, buf, sizeof(buf), SBUF_FIXEDLEN);
sbuf_set_drain(&sb, sbuf_printf_drain, &retval);
sbuf_printf(&sb, "nvmft%u: ", ctrlr->cntlid);
va_start(ap, fmt);
sbuf_vprintf(&sb, fmt, ap);
va_end(ap);
sbuf_finish(&sb);
sbuf_delete(&sb);
return (retval);
}
static struct nvmft_controller *
nvmft_controller_alloc(struct nvmft_port *np, uint16_t cntlid,
const struct nvmf_fabric_connect_data *data)
{
struct nvmft_controller *ctrlr;
ctrlr = malloc(sizeof(*ctrlr), M_NVMFT, M_WAITOK | M_ZERO);
ctrlr->cntlid = cntlid;
nvmft_port_ref(np);
TAILQ_INSERT_TAIL(&np->controllers, ctrlr, link);
ctrlr->np = np;
mtx_init(&ctrlr->lock, "nvmft controller", NULL, MTX_DEF);
callout_init(&ctrlr->ka_timer, 1);
TASK_INIT(&ctrlr->shutdown_task, 0, nvmft_controller_shutdown, ctrlr);
TIMEOUT_TASK_INIT(taskqueue_thread, &ctrlr->terminate_task, 0,
nvmft_controller_terminate, ctrlr);
ctrlr->cdata = np->cdata;
ctrlr->cdata.ctrlr_id = htole16(cntlid);
memcpy(ctrlr->hostid, data->hostid, sizeof(ctrlr->hostid));
memcpy(ctrlr->hostnqn, data->hostnqn, sizeof(ctrlr->hostnqn));
ctrlr->hip.power_cycles[0] = 1;
ctrlr->create_time = sbinuptime();
ctrlr->changed_ns = malloc(sizeof(*ctrlr->changed_ns), M_NVMFT,
M_WAITOK | M_ZERO);
return (ctrlr);
}
static void
nvmft_controller_free(struct nvmft_controller *ctrlr)
{
mtx_destroy(&ctrlr->lock);
MPASS(ctrlr->io_qpairs == NULL);
free(ctrlr->changed_ns, M_NVMFT);
free(ctrlr, M_NVMFT);
}
static void
nvmft_keep_alive_timer(void *arg)
{
struct nvmft_controller *ctrlr = arg;
int traffic;
if (ctrlr->shutdown)
return;
traffic = atomic_readandclear_int(&ctrlr->ka_active_traffic);
if (traffic == 0) {
nvmft_printf(ctrlr,
"disconnecting due to KeepAlive timeout\n");
nvmft_controller_error(ctrlr, NULL, ETIMEDOUT);
return;
}
callout_schedule_sbt(&ctrlr->ka_timer, ctrlr->ka_sbt, 0, C_HARDCLOCK);
}
int
nvmft_handoff_admin_queue(struct nvmft_port *np,
const struct nvmf_handoff_controller_qpair *handoff,
const struct nvmf_fabric_connect_cmd *cmd,
const struct nvmf_fabric_connect_data *data)
{
struct nvmft_controller *ctrlr;
struct nvmft_qpair *qp;
uint32_t kato;
int cntlid;
if (cmd->qid != htole16(0))
return (EINVAL);
qp = nvmft_qpair_init(handoff->trtype, &handoff->params, 0,
"admin queue");
if (qp == NULL) {
printf("NVMFT: Failed to setup admin queue from %.*s\n",
(int)sizeof(data->hostnqn), data->hostnqn);
return (ENXIO);
}
sx_xlock(&np->lock);
cntlid = alloc_unr(np->ids);
if (cntlid == -1) {
sx_xunlock(&np->lock);
printf("NVMFT: Unable to allocate controller for %.*s\n",
(int)sizeof(data->hostnqn), data->hostnqn);
nvmft_connect_error(qp, cmd, NVME_SCT_COMMAND_SPECIFIC,
NVMF_FABRIC_SC_INVALID_HOST);
nvmft_qpair_destroy(qp);
return (ENOMEM);
}
#ifdef INVARIANTS
TAILQ_FOREACH(ctrlr, &np->controllers, link) {
KASSERT(ctrlr->cntlid != cntlid,
("%s: duplicate controllers with id %d", __func__, cntlid));
}
#endif
ctrlr = nvmft_controller_alloc(np, cntlid, data);
nvmft_printf(ctrlr, "associated with %.*s\n",
(int)sizeof(data->hostnqn), data->hostnqn);
ctrlr->admin = qp;
ctrlr->trtype = handoff->trtype;
/*
* The spec requires a non-zero KeepAlive timer, but allow a
* zero KATO value to match Linux.
*/
kato = le32toh(cmd->kato);
if (kato != 0) {
/*
* Round up to 1 second matching granularity
* advertised in cdata.
*/
ctrlr->ka_sbt = mstosbt(roundup(kato, 1000));
callout_reset_sbt(&ctrlr->ka_timer, ctrlr->ka_sbt, 0,
nvmft_keep_alive_timer, ctrlr, C_HARDCLOCK);
}
nvmft_finish_accept(qp, cmd, ctrlr);
sx_xunlock(&np->lock);
return (0);
}
int
nvmft_handoff_io_queue(struct nvmft_port *np,
const struct nvmf_handoff_controller_qpair *handoff,
const struct nvmf_fabric_connect_cmd *cmd,
const struct nvmf_fabric_connect_data *data)
{
struct nvmft_controller *ctrlr;
struct nvmft_qpair *qp;
char name[16];
uint16_t cntlid, qid;
qid = le16toh(cmd->qid);
if (qid == 0)
return (EINVAL);
cntlid = le16toh(data->cntlid);
snprintf(name, sizeof(name), "I/O queue %u", qid);
qp = nvmft_qpair_init(handoff->trtype, &handoff->params, qid, name);
if (qp == NULL) {
printf("NVMFT: Failed to setup I/O queue %u from %.*s\n", qid,
(int)sizeof(data->hostnqn), data->hostnqn);
return (ENXIO);
}
sx_slock(&np->lock);
TAILQ_FOREACH(ctrlr, &np->controllers, link) {
if (ctrlr->cntlid == cntlid)
break;
}
if (ctrlr == NULL) {
sx_sunlock(&np->lock);
printf("NVMFT: Nonexistent controller %u for I/O queue %u from %.*s\n",
ctrlr->cntlid, qid, (int)sizeof(data->hostnqn),
data->hostnqn);
nvmft_connect_invalid_parameters(qp, cmd, true,
offsetof(struct nvmf_fabric_connect_data, cntlid));
nvmft_qpair_destroy(qp);
return (ENOENT);
}
if (memcmp(ctrlr->hostid, data->hostid, sizeof(ctrlr->hostid)) != 0) {
sx_sunlock(&np->lock);
nvmft_printf(ctrlr,
"hostid mismatch for I/O queue %u from %.*s\n", qid,
(int)sizeof(data->hostnqn), data->hostnqn);
nvmft_connect_invalid_parameters(qp, cmd, true,
offsetof(struct nvmf_fabric_connect_data, hostid));
nvmft_qpair_destroy(qp);
return (EINVAL);
}
if (memcmp(ctrlr->hostnqn, data->hostnqn, sizeof(ctrlr->hostnqn)) != 0) {
sx_sunlock(&np->lock);
nvmft_printf(ctrlr,
"hostnqn mismatch for I/O queue %u from %.*s\n", qid,
(int)sizeof(data->hostnqn), data->hostnqn);
nvmft_connect_invalid_parameters(qp, cmd, true,
offsetof(struct nvmf_fabric_connect_data, hostnqn));
nvmft_qpair_destroy(qp);
return (EINVAL);
}
/* XXX: Require handoff->trtype == ctrlr->trtype? */
mtx_lock(&ctrlr->lock);
if (ctrlr->shutdown) {
mtx_unlock(&ctrlr->lock);
sx_sunlock(&np->lock);
nvmft_printf(ctrlr,
"attempt to create I/O queue %u on disabled controller from %.*s\n",
qid, (int)sizeof(data->hostnqn), data->hostnqn);
nvmft_connect_invalid_parameters(qp, cmd, true,
offsetof(struct nvmf_fabric_connect_data, cntlid));
nvmft_qpair_destroy(qp);
return (EINVAL);
}
if (ctrlr->num_io_queues == 0) {
mtx_unlock(&ctrlr->lock);
sx_sunlock(&np->lock);
nvmft_printf(ctrlr,
"attempt to create I/O queue %u without enabled queues from %.*s\n",
qid, (int)sizeof(data->hostnqn), data->hostnqn);
nvmft_connect_error(qp, cmd, NVME_SCT_GENERIC,
NVME_SC_COMMAND_SEQUENCE_ERROR);
nvmft_qpair_destroy(qp);
return (EINVAL);
}
if (cmd->qid > ctrlr->num_io_queues) {
mtx_unlock(&ctrlr->lock);
sx_sunlock(&np->lock);
nvmft_printf(ctrlr,
"attempt to create invalid I/O queue %u from %.*s\n", qid,
(int)sizeof(data->hostnqn), data->hostnqn);
nvmft_connect_invalid_parameters(qp, cmd, false,
offsetof(struct nvmf_fabric_connect_cmd, qid));
nvmft_qpair_destroy(qp);
return (EINVAL);
}
if (ctrlr->io_qpairs[qid - 1].qp != NULL) {
mtx_unlock(&ctrlr->lock);
sx_sunlock(&np->lock);
nvmft_printf(ctrlr,
"attempt to re-create I/O queue %u from %.*s\n", qid,
(int)sizeof(data->hostnqn), data->hostnqn);
nvmft_connect_error(qp, cmd, NVME_SCT_GENERIC,
NVME_SC_COMMAND_SEQUENCE_ERROR);
nvmft_qpair_destroy(qp);
return (EINVAL);
}
ctrlr->io_qpairs[qid - 1].qp = qp;
mtx_unlock(&ctrlr->lock);
nvmft_finish_accept(qp, cmd, ctrlr);
sx_sunlock(&np->lock);
return (0);
}
static void
nvmft_controller_shutdown(void *arg, int pending)
{
struct nvmft_controller *ctrlr = arg;
MPASS(pending == 1);
/*
* Shutdown all I/O queues to terminate pending datamoves and
* stop receiving new commands.
*/
mtx_lock(&ctrlr->lock);
for (u_int i = 0; i < ctrlr->num_io_queues; i++) {
if (ctrlr->io_qpairs[i].qp != NULL) {
ctrlr->io_qpairs[i].shutdown = true;
mtx_unlock(&ctrlr->lock);
nvmft_qpair_shutdown(ctrlr->io_qpairs[i].qp);
mtx_lock(&ctrlr->lock);
}
}
mtx_unlock(&ctrlr->lock);
/* Terminate active CTL commands. */
nvmft_terminate_commands(ctrlr);
/* Wait for all pending CTL commands to complete. */
mtx_lock(&ctrlr->lock);
while (ctrlr->pending_commands != 0)
mtx_sleep(&ctrlr->pending_commands, &ctrlr->lock, 0, "nvmftsh",
hz / 100);
mtx_unlock(&ctrlr->lock);
/* Delete all of the I/O queues. */
for (u_int i = 0; i < ctrlr->num_io_queues; i++) {
if (ctrlr->io_qpairs[i].qp != NULL)
nvmft_qpair_destroy(ctrlr->io_qpairs[i].qp);
}
free(ctrlr->io_qpairs, M_NVMFT);
ctrlr->io_qpairs = NULL;
mtx_lock(&ctrlr->lock);
ctrlr->num_io_queues = 0;
/* Mark shutdown complete. */
if (NVMEV(NVME_CSTS_REG_SHST, ctrlr->csts) == NVME_SHST_OCCURRING) {
ctrlr->csts &= ~NVMEM(NVME_CSTS_REG_SHST);
ctrlr->csts |= NVMEF(NVME_CSTS_REG_SHST, NVME_SHST_COMPLETE);
}
if (NVMEV(NVME_CSTS_REG_CFS, ctrlr->csts) == 0) {
ctrlr->csts &= ~NVMEM(NVME_CSTS_REG_RDY);
ctrlr->shutdown = false;
}
mtx_unlock(&ctrlr->lock);
/*
* If the admin queue was closed while shutting down or a
* fatal controller error has occurred, terminate the
* association immediately, otherwise wait up to 2 minutes
* (NVMe-over-Fabrics 1.1 4.6).
*/
if (ctrlr->admin_closed || NVMEV(NVME_CSTS_REG_CFS, ctrlr->csts) != 0)
nvmft_controller_terminate(ctrlr, 0);
else
taskqueue_enqueue_timeout(taskqueue_thread,
&ctrlr->terminate_task, hz * 60 * 2);
}
static void
nvmft_controller_terminate(void *arg, int pending)
{
struct nvmft_controller *ctrlr = arg;
struct nvmft_port *np;
bool wakeup_np;
/* If the controller has been re-enabled, nothing to do. */
mtx_lock(&ctrlr->lock);
if (NVMEV(NVME_CC_REG_EN, ctrlr->cc) != 0) {
mtx_unlock(&ctrlr->lock);
if (ctrlr->ka_sbt != 0)
callout_schedule_sbt(&ctrlr->ka_timer, ctrlr->ka_sbt, 0,
C_HARDCLOCK);
return;
}
/* Disable updates to CC while destroying admin qpair. */
ctrlr->shutdown = true;
mtx_unlock(&ctrlr->lock);
nvmft_qpair_destroy(ctrlr->admin);
/* Remove association (CNTLID). */
np = ctrlr->np;
sx_xlock(&np->lock);
TAILQ_REMOVE(&np->controllers, ctrlr, link);
free_unr(np->ids, ctrlr->cntlid);
wakeup_np = (!np->online && TAILQ_EMPTY(&np->controllers));
sx_xunlock(&np->lock);
if (wakeup_np)
wakeup(np);
callout_drain(&ctrlr->ka_timer);
nvmft_printf(ctrlr, "association terminated\n");
nvmft_controller_free(ctrlr);
nvmft_port_rele(np);
}
void
nvmft_controller_error(struct nvmft_controller *ctrlr, struct nvmft_qpair *qp,
int error)
{
/*
* If a queue pair is closed, that isn't an error per se.
* That just means additional commands cannot be received on
* that queue pair.
*
* If the admin queue pair is closed while idle or while
* shutting down, terminate the association immediately.
*
* If an I/O queue pair is closed, just ignore it.
*/
if (error == 0) {
if (qp != ctrlr->admin)
return;
mtx_lock(&ctrlr->lock);
if (ctrlr->shutdown) {
ctrlr->admin_closed = true;
mtx_unlock(&ctrlr->lock);
return;
}
if (NVMEV(NVME_CC_REG_EN, ctrlr->cc) == 0) {
MPASS(ctrlr->num_io_queues == 0);
mtx_unlock(&ctrlr->lock);
/*
* Ok to drop lock here since ctrlr->cc can't
* change if the admin queue pair has closed.
* This also means no new queues can be handed
* off, etc. Note that since there are no I/O
* queues, only the admin queue needs to be
* destroyed, so it is safe to skip
* nvmft_controller_shutdown and just schedule
* nvmft_controller_terminate. Note that we
* cannot call nvmft_controller_terminate from
* here directly as this is called from the
* transport layer and freeing the admin qpair
* might deadlock waiting for the current
* thread to exit.
*/
if (taskqueue_cancel_timeout(taskqueue_thread,
&ctrlr->terminate_task, NULL) == 0)
taskqueue_enqueue_timeout(taskqueue_thread,
&ctrlr->terminate_task, 0);
return;
}
/*
* Treat closing of the admin queue pair while enabled
* as a transport error. Note that the admin queue
* pair has been closed.
*/
ctrlr->admin_closed = true;
} else
mtx_lock(&ctrlr->lock);
/* Ignore transport errors if we are already shutting down. */
if (ctrlr->shutdown) {
mtx_unlock(&ctrlr->lock);
return;
}
ctrlr->csts |= NVMEF(NVME_CSTS_REG_CFS, 1);
ctrlr->cc &= ~NVMEM(NVME_CC_REG_EN);
ctrlr->shutdown = true;
mtx_unlock(&ctrlr->lock);
callout_stop(&ctrlr->ka_timer);
taskqueue_enqueue(taskqueue_thread, &ctrlr->shutdown_task);
}
/* Wrapper around m_getm2 that also sets m_len in the mbufs in the chain. */
static struct mbuf *
m_getml(size_t len, int how)
{
struct mbuf *m, *n;
m = m_getm2(NULL, len, how, MT_DATA, 0);
if (m == NULL)
return (NULL);
for (n = m; len > 0; n = n->m_next) {
n->m_len = M_SIZE(n);
if (n->m_len >= len) {
n->m_len = len;
MPASS(n->m_next == NULL);
}
len -= n->m_len;
}
return (m);
}
static void
m_zero(struct mbuf *m, u_int offset, u_int len)
{
u_int todo;
if (len == 0)
return;
while (m->m_len <= offset) {
offset -= m->m_len;
m = m->m_next;
}
todo = m->m_len - offset;
if (todo > len)
todo = len;
memset(mtodo(m, offset), 0, todo);
m = m->m_next;
len -= todo;
while (len > 0) {
todo = m->m_len;
if (todo > len)
todo = len;
memset(mtod(m, void *), 0, todo);
m = m->m_next;
len -= todo;
}
}
static void
handle_get_log_page(struct nvmft_controller *ctrlr,
struct nvmf_capsule *nc, const struct nvme_command *cmd)
{
struct mbuf *m;
uint64_t offset;
uint32_t numd;
size_t len, todo;
u_int status;
uint8_t lid;
bool rae;
lid = le32toh(cmd->cdw10) & 0xff;
rae = (le32toh(cmd->cdw10) & (1U << 15)) != 0;
numd = le32toh(cmd->cdw10) >> 16 | le32toh(cmd->cdw11) << 16;
offset = le32toh(cmd->cdw12) | (uint64_t)le32toh(cmd->cdw13) << 32;
if (offset % 3 != 0) {
status = NVME_SC_INVALID_FIELD;
goto done;
}
len = (numd + 1) * 4;
switch (lid) {
case NVME_LOG_ERROR:
todo = 0;
m = m_getml(len, M_WAITOK);
if (todo != len)
m_zero(m, todo, len - todo);
status = nvmf_send_controller_data(nc, 0, m, len);
MPASS(status != NVMF_MORE);
break;
case NVME_LOG_HEALTH_INFORMATION:
{
struct nvme_health_information_page hip;
if (offset >= sizeof(hip)) {
status = NVME_SC_INVALID_FIELD;
goto done;
}
todo = sizeof(hip) - offset;
if (todo > len)
todo = len;
mtx_lock(&ctrlr->lock);
hip = ctrlr->hip;
hip.controller_busy_time[0] =
sbintime_getsec(ctrlr->busy_total) / 60;
hip.power_on_hours[0] =
sbintime_getsec(sbinuptime() - ctrlr->create_time) / 3600;
mtx_unlock(&ctrlr->lock);
m = m_getml(len, M_WAITOK);
m_copyback(m, 0, todo, (char *)&hip + offset);
if (todo != len)
m_zero(m, todo, len - todo);
status = nvmf_send_controller_data(nc, 0, m, len);
MPASS(status != NVMF_MORE);
break;
}
case NVME_LOG_FIRMWARE_SLOT:
if (offset >= sizeof(ctrlr->np->fp)) {
status = NVME_SC_INVALID_FIELD;
goto done;
}
todo = sizeof(ctrlr->np->fp) - offset;
if (todo > len)
todo = len;
m = m_getml(len, M_WAITOK);
m_copyback(m, 0, todo, (char *)&ctrlr->np->fp + offset);
if (todo != len)
m_zero(m, todo, len - todo);
status = nvmf_send_controller_data(nc, 0, m, len);
MPASS(status != NVMF_MORE);
break;
case NVME_LOG_CHANGED_NAMESPACE:
if (offset >= sizeof(*ctrlr->changed_ns)) {
status = NVME_SC_INVALID_FIELD;
goto done;
}
todo = sizeof(*ctrlr->changed_ns) - offset;
if (todo > len)
todo = len;
m = m_getml(len, M_WAITOK);
mtx_lock(&ctrlr->lock);
m_copyback(m, 0, todo, (char *)ctrlr->changed_ns + offset);
if (offset == 0 && len == sizeof(*ctrlr->changed_ns))
memset(ctrlr->changed_ns, 0,
sizeof(*ctrlr->changed_ns));
if (!rae)
ctrlr->changed_ns_reported = false;
mtx_unlock(&ctrlr->lock);
if (todo != len)
m_zero(m, todo, len - todo);
status = nvmf_send_controller_data(nc, 0, m, len);
MPASS(status != NVMF_MORE);
break;
default:
nvmft_printf(ctrlr, "Unsupported page %#x for GET_LOG_PAGE\n",
lid);
status = NVME_SC_INVALID_FIELD;
break;
}
done:
if (status == NVMF_SUCCESS_SENT)
nvmft_command_completed(ctrlr->admin, nc);
else
nvmft_send_generic_error(ctrlr->admin, nc, status);
nvmf_free_capsule(nc);
}
static void
m_free_nslist(struct mbuf *m)
{
free(m->m_ext.ext_arg1, M_NVMFT);
}
static void
handle_identify_command(struct nvmft_controller *ctrlr,
struct nvmf_capsule *nc, const struct nvme_command *cmd)
{
struct mbuf *m;
size_t data_len;
u_int status;
uint8_t cns;
cns = le32toh(cmd->cdw10) & 0xFF;
data_len = nvmf_capsule_data_len(nc);
if (data_len != sizeof(ctrlr->cdata)) {
nvmft_printf(ctrlr,
"Invalid length %zu for IDENTIFY with CNS %#x\n", data_len,
cns);
nvmft_send_generic_error(ctrlr->admin, nc,
NVME_SC_INVALID_OPCODE);
nvmf_free_capsule(nc);
return;
}
switch (cns) {
case 0: /* Namespace data. */
case 3: /* Namespace Identification Descriptor list. */
nvmft_dispatch_command(ctrlr->admin, nc, true);
return;
case 1:
/* Controller data. */
m = m_getml(sizeof(ctrlr->cdata), M_WAITOK);
m_copyback(m, 0, sizeof(ctrlr->cdata), (void *)&ctrlr->cdata);
status = nvmf_send_controller_data(nc, 0, m,
sizeof(ctrlr->cdata));
MPASS(status != NVMF_MORE);
break;
case 2:
{
/* Active namespace list. */
struct nvme_ns_list *nslist;
uint32_t nsid;
nsid = le32toh(cmd->nsid);
if (nsid >= 0xfffffffe) {
status = NVME_SC_INVALID_FIELD;
break;
}
nslist = malloc(sizeof(*nslist), M_NVMFT, M_WAITOK | M_ZERO);
nvmft_populate_active_nslist(ctrlr->np, nsid, nslist);
m = m_get(M_WAITOK, MT_DATA);
m_extadd(m, (void *)nslist, sizeof(*nslist), m_free_nslist,
nslist, NULL, 0, EXT_CTL);
m->m_len = sizeof(*nslist);
status = nvmf_send_controller_data(nc, 0, m, m->m_len);
MPASS(status != NVMF_MORE);
break;
}
default:
nvmft_printf(ctrlr, "Unsupported CNS %#x for IDENTIFY\n", cns);
status = NVME_SC_INVALID_FIELD;
break;
}
if (status == NVMF_SUCCESS_SENT)
nvmft_command_completed(ctrlr->admin, nc);
else
nvmft_send_generic_error(ctrlr->admin, nc, status);
nvmf_free_capsule(nc);
}
static void
handle_set_features(struct nvmft_controller *ctrlr,
struct nvmf_capsule *nc, const struct nvme_command *cmd)
{
struct nvme_completion cqe;
uint8_t fid;
fid = NVMEV(NVME_FEAT_SET_FID, le32toh(cmd->cdw10));
switch (fid) {
case NVME_FEAT_NUMBER_OF_QUEUES:
{
uint32_t num_queues;
struct nvmft_io_qpair *io_qpairs;
num_queues = le32toh(cmd->cdw11) & 0xffff;
/* 5.12.1.7: 65535 is invalid. */
if (num_queues == 65535)
goto error;
/* Fabrics requires the same number of SQs and CQs. */
if (le32toh(cmd->cdw11) >> 16 != num_queues)
goto error;
/* Convert to 1's based */
num_queues++;
io_qpairs = mallocarray(num_queues, sizeof(*io_qpairs),
M_NVMFT, M_WAITOK | M_ZERO);
mtx_lock(&ctrlr->lock);
if (ctrlr->num_io_queues != 0) {
mtx_unlock(&ctrlr->lock);
free(io_qpairs, M_NVMFT);
nvmft_send_generic_error(ctrlr->admin, nc,
NVME_SC_COMMAND_SEQUENCE_ERROR);
nvmf_free_capsule(nc);
return;
}
ctrlr->num_io_queues = num_queues;
ctrlr->io_qpairs = io_qpairs;
mtx_unlock(&ctrlr->lock);
nvmft_init_cqe(&cqe, nc, 0);
cqe.cdw0 = cmd->cdw11;
nvmft_send_response(ctrlr->admin, &cqe);
nvmf_free_capsule(nc);
return;
}
case NVME_FEAT_ASYNC_EVENT_CONFIGURATION:
{
uint32_t aer_mask;
aer_mask = le32toh(cmd->cdw11);
/* Check for any reserved or unimplemented feature bits. */
if ((aer_mask & 0xffffc000) != 0)
goto error;
mtx_lock(&ctrlr->lock);
ctrlr->aer_mask = aer_mask;
mtx_unlock(&ctrlr->lock);
nvmft_send_success(ctrlr->admin, nc);
return;
}
default:
nvmft_printf(ctrlr,
"Unsupported feature ID %u for SET_FEATURES\n", fid);
goto error;
}
error:
nvmft_send_generic_error(ctrlr->admin, nc, NVME_SC_INVALID_FIELD);
nvmf_free_capsule(nc);
}
static bool
update_cc(struct nvmft_controller *ctrlr, uint32_t new_cc, bool *need_shutdown)
{
struct nvmft_port *np = ctrlr->np;
uint32_t changes;
*need_shutdown = false;
mtx_lock(&ctrlr->lock);
/* Don't allow any changes while shutting down. */
if (ctrlr->shutdown) {
mtx_unlock(&ctrlr->lock);
return (false);
}
if (!_nvmf_validate_cc(np->max_io_qsize, np->cap, ctrlr->cc, new_cc)) {
mtx_unlock(&ctrlr->lock);
return (false);
}
changes = ctrlr->cc ^ new_cc;
ctrlr->cc = new_cc;
/* Handle shutdown requests. */
if (NVMEV(NVME_CC_REG_SHN, changes) != 0 &&
NVMEV(NVME_CC_REG_SHN, new_cc) != 0) {
ctrlr->csts &= ~NVMEM(NVME_CSTS_REG_SHST);
ctrlr->csts |= NVMEF(NVME_CSTS_REG_SHST, NVME_SHST_OCCURRING);
ctrlr->cc &= ~NVMEM(NVME_CC_REG_EN);
ctrlr->shutdown = true;
*need_shutdown = true;
nvmft_printf(ctrlr, "shutdown requested\n");
}
if (NVMEV(NVME_CC_REG_EN, changes) != 0) {
if (NVMEV(NVME_CC_REG_EN, new_cc) == 0) {
/* Controller reset. */
nvmft_printf(ctrlr, "reset requested\n");
ctrlr->shutdown = true;
*need_shutdown = true;
} else
ctrlr->csts |= NVMEF(NVME_CSTS_REG_RDY, 1);
}
mtx_unlock(&ctrlr->lock);
return (true);
}
static void
handle_property_get(struct nvmft_controller *ctrlr, struct nvmf_capsule *nc,
const struct nvmf_fabric_prop_get_cmd *pget)
{
struct nvmf_fabric_prop_get_rsp rsp;
nvmft_init_cqe(&rsp, nc, 0);
switch (le32toh(pget->ofst)) {
case NVMF_PROP_CAP:
if (pget->attrib.size != NVMF_PROP_SIZE_8)
goto error;
rsp.value.u64 = htole64(ctrlr->np->cap);
break;
case NVMF_PROP_VS:
if (pget->attrib.size != NVMF_PROP_SIZE_4)
goto error;
rsp.value.u32.low = ctrlr->cdata.ver;
break;
case NVMF_PROP_CC:
if (pget->attrib.size != NVMF_PROP_SIZE_4)
goto error;
rsp.value.u32.low = htole32(ctrlr->cc);
break;
case NVMF_PROP_CSTS:
if (pget->attrib.size != NVMF_PROP_SIZE_4)
goto error;
rsp.value.u32.low = htole32(ctrlr->csts);
break;
default:
goto error;
}
nvmft_send_response(ctrlr->admin, &rsp);
return;
error:
nvmft_send_generic_error(ctrlr->admin, nc, NVME_SC_INVALID_FIELD);
}
static void
handle_property_set(struct nvmft_controller *ctrlr, struct nvmf_capsule *nc,
const struct nvmf_fabric_prop_set_cmd *pset)
{
bool need_shutdown;
need_shutdown = false;
switch (le32toh(pset->ofst)) {
case NVMF_PROP_CC:
if (pset->attrib.size != NVMF_PROP_SIZE_4)
goto error;
if (!update_cc(ctrlr, le32toh(pset->value.u32.low),
&need_shutdown))
goto error;
break;
default:
goto error;
}
nvmft_send_success(ctrlr->admin, nc);
if (need_shutdown) {
callout_stop(&ctrlr->ka_timer);
taskqueue_enqueue(taskqueue_thread, &ctrlr->shutdown_task);
}
return;
error:
nvmft_send_generic_error(ctrlr->admin, nc, NVME_SC_INVALID_FIELD);
}
static void
handle_admin_fabrics_command(struct nvmft_controller *ctrlr,
struct nvmf_capsule *nc, const struct nvmf_fabric_cmd *fc)
{
switch (fc->fctype) {
case NVMF_FABRIC_COMMAND_PROPERTY_GET:
handle_property_get(ctrlr, nc,
(const struct nvmf_fabric_prop_get_cmd *)fc);
break;
case NVMF_FABRIC_COMMAND_PROPERTY_SET:
handle_property_set(ctrlr, nc,
(const struct nvmf_fabric_prop_set_cmd *)fc);
break;
case NVMF_FABRIC_COMMAND_CONNECT:
nvmft_printf(ctrlr,
"CONNECT command on connected admin queue\n");
nvmft_send_generic_error(ctrlr->admin, nc,
NVME_SC_COMMAND_SEQUENCE_ERROR);
break;
case NVMF_FABRIC_COMMAND_DISCONNECT:
nvmft_printf(ctrlr, "DISCONNECT command on admin queue\n");
nvmft_send_error(ctrlr->admin, nc, NVME_SCT_COMMAND_SPECIFIC,
NVMF_FABRIC_SC_INVALID_QUEUE_TYPE);
break;
default:
nvmft_printf(ctrlr, "Unsupported fabrics command %#x\n",
fc->fctype);
nvmft_send_generic_error(ctrlr->admin, nc,
NVME_SC_INVALID_OPCODE);
break;
}
nvmf_free_capsule(nc);
}
void
nvmft_handle_admin_command(struct nvmft_controller *ctrlr,
struct nvmf_capsule *nc)
{
const struct nvme_command *cmd = nvmf_capsule_sqe(nc);
/* Only permit Fabrics commands while a controller is disabled. */
if (NVMEV(NVME_CC_REG_EN, ctrlr->cc) == 0 &&
cmd->opc != NVME_OPC_FABRICS_COMMANDS) {
nvmft_printf(ctrlr,
"Unsupported admin opcode %#x while disabled\n", cmd->opc);
nvmft_send_generic_error(ctrlr->admin, nc,
NVME_SC_COMMAND_SEQUENCE_ERROR);
nvmf_free_capsule(nc);
return;
}
atomic_store_int(&ctrlr->ka_active_traffic, 1);
switch (cmd->opc) {
case NVME_OPC_GET_LOG_PAGE:
handle_get_log_page(ctrlr, nc, cmd);
break;
case NVME_OPC_IDENTIFY:
handle_identify_command(ctrlr, nc, cmd);
break;
case NVME_OPC_SET_FEATURES:
handle_set_features(ctrlr, nc, cmd);
break;
case NVME_OPC_ASYNC_EVENT_REQUEST:
mtx_lock(&ctrlr->lock);
if (ctrlr->aer_pending == NVMFT_NUM_AER) {
mtx_unlock(&ctrlr->lock);
nvmft_send_error(ctrlr->admin, nc,
NVME_SCT_COMMAND_SPECIFIC,
NVME_SC_ASYNC_EVENT_REQUEST_LIMIT_EXCEEDED);
} else {
/* NB: Store the CID without byte-swapping. */
ctrlr->aer_cids[ctrlr->aer_pidx] = cmd->cid;
ctrlr->aer_pending++;
ctrlr->aer_pidx = (ctrlr->aer_pidx + 1) % NVMFT_NUM_AER;
mtx_unlock(&ctrlr->lock);
}
nvmf_free_capsule(nc);
break;
case NVME_OPC_KEEP_ALIVE:
nvmft_send_success(ctrlr->admin, nc);
nvmf_free_capsule(nc);
break;
case NVME_OPC_FABRICS_COMMANDS:
handle_admin_fabrics_command(ctrlr, nc,
(const struct nvmf_fabric_cmd *)cmd);
break;
default:
nvmft_printf(ctrlr, "Unsupported admin opcode %#x\n", cmd->opc);
nvmft_send_generic_error(ctrlr->admin, nc,
NVME_SC_INVALID_OPCODE);
nvmf_free_capsule(nc);
break;
}
}
void
nvmft_handle_io_command(struct nvmft_qpair *qp, uint16_t qid,
struct nvmf_capsule *nc)
{
struct nvmft_controller *ctrlr = nvmft_qpair_ctrlr(qp);
const struct nvme_command *cmd = nvmf_capsule_sqe(nc);
atomic_store_int(&ctrlr->ka_active_traffic, 1);
switch (cmd->opc) {
case NVME_OPC_FLUSH:
if (cmd->nsid == htole32(0xffffffff)) {
nvmft_send_generic_error(qp, nc,
NVME_SC_INVALID_NAMESPACE_OR_FORMAT);
nvmf_free_capsule(nc);
break;
}
/* FALLTHROUGH */
case NVME_OPC_WRITE:
case NVME_OPC_READ:
case NVME_OPC_WRITE_UNCORRECTABLE:
case NVME_OPC_COMPARE:
case NVME_OPC_WRITE_ZEROES:
case NVME_OPC_DATASET_MANAGEMENT:
case NVME_OPC_VERIFY:
nvmft_dispatch_command(qp, nc, false);
break;
default:
nvmft_printf(ctrlr, "Unsupported I/O opcode %#x\n", cmd->opc);
nvmft_send_generic_error(qp, nc,
NVME_SC_INVALID_OPCODE);
nvmf_free_capsule(nc);
break;
}
}
static void
nvmft_report_aer(struct nvmft_controller *ctrlr, uint32_t aer_mask,
u_int type, uint8_t info, uint8_t log_page_id)
{
struct nvme_completion cpl;
MPASS(type <= 7);
/* Drop events that are not enabled. */
mtx_lock(&ctrlr->lock);
if ((ctrlr->aer_mask & aer_mask) == 0) {
mtx_unlock(&ctrlr->lock);
return;
}
/*
* If there is no pending AER command, drop it.
* XXX: Should we queue these?
*/
if (ctrlr->aer_pending == 0) {
mtx_unlock(&ctrlr->lock);
nvmft_printf(ctrlr,
"dropping AER type %u, info %#x, page %#x\n",
type, info, log_page_id);
return;
}
memset(&cpl, 0, sizeof(cpl));
cpl.cid = ctrlr->aer_cids[ctrlr->aer_cidx];
ctrlr->aer_pending--;
ctrlr->aer_cidx = (ctrlr->aer_cidx + 1) % NVMFT_NUM_AER;
mtx_unlock(&ctrlr->lock);
cpl.cdw0 = htole32(NVMEF(NVME_ASYNC_EVENT_TYPE, type) |
NVMEF(NVME_ASYNC_EVENT_INFO, info) |
NVMEF(NVME_ASYNC_EVENT_LOG_PAGE_ID, log_page_id));
nvmft_send_response(ctrlr->admin, &cpl);
}
void
nvmft_controller_lun_changed(struct nvmft_controller *ctrlr, int lun_id)
{
struct nvme_ns_list *nslist;
uint32_t new_nsid, nsid;
u_int i;
new_nsid = lun_id + 1;
mtx_lock(&ctrlr->lock);
nslist = ctrlr->changed_ns;
/* If the first entry is 0xffffffff, the list is already full. */
if (nslist->ns[0] != 0xffffffff) {
/* Find the insertion point for this namespace ID. */
for (i = 0; i < nitems(nslist->ns); i++) {
nsid = le32toh(nslist->ns[i]);
if (nsid == new_nsid) {
/* Already reported, nothing to do. */
mtx_unlock(&ctrlr->lock);
return;
}
if (nsid == 0 || nsid > new_nsid)
break;
}
if (nslist->ns[nitems(nslist->ns) - 1] != htole32(0)) {
/* List is full. */
memset(ctrlr->changed_ns, 0,
sizeof(*ctrlr->changed_ns));
ctrlr->changed_ns->ns[0] = 0xffffffff;
} else if (nslist->ns[i] == htole32(0)) {
/*
* Optimize case where this ID is appended to
* the end.
*/
nslist->ns[i] = htole32(new_nsid);
} else {
memmove(&nslist->ns[i + 1], &nslist->ns[i],
(nitems(nslist->ns) - i - 1) *
sizeof(nslist->ns[0]));
nslist->ns[i] = htole32(new_nsid);
}
}
if (ctrlr->changed_ns_reported) {
mtx_unlock(&ctrlr->lock);
return;
}
ctrlr->changed_ns_reported = true;
mtx_unlock(&ctrlr->lock);
nvmft_report_aer(ctrlr, NVME_ASYNC_EVENT_NS_ATTRIBUTE, 0x2, 0x0,
NVME_LOG_CHANGED_NAMESPACE);
}