mirror of
https://github.com/opnsense/src.git
synced 2026-06-05 06:42:56 -04:00
1085 lines
24 KiB
C
1085 lines
24 KiB
C
/*-
|
|
* SPDX-License-Identifier: BSD-2-Clause
|
|
*
|
|
* Copyright (c) 2023-2024 Chelsio Communications, Inc.
|
|
* Written by: John Baldwin <jhb@FreeBSD.org>
|
|
*/
|
|
|
|
#include <sys/param.h>
|
|
#include <sys/bus.h>
|
|
#include <sys/conf.h>
|
|
#include <sys/eventhandler.h>
|
|
#include <sys/lock.h>
|
|
#include <sys/kernel.h>
|
|
#include <sys/malloc.h>
|
|
#include <sys/memdesc.h>
|
|
#include <sys/module.h>
|
|
#include <sys/mutex.h>
|
|
#include <sys/reboot.h>
|
|
#include <sys/sx.h>
|
|
#include <sys/sysctl.h>
|
|
#include <sys/taskqueue.h>
|
|
#include <dev/nvme/nvme.h>
|
|
#include <dev/nvmf/nvmf.h>
|
|
#include <dev/nvmf/nvmf_transport.h>
|
|
#include <dev/nvmf/host/nvmf_var.h>
|
|
|
|
static struct cdevsw nvmf_cdevsw;
|
|
|
|
bool nvmf_fail_disconnect = false;
|
|
SYSCTL_BOOL(_kern_nvmf, OID_AUTO, fail_on_disconnection, CTLFLAG_RWTUN,
|
|
&nvmf_fail_disconnect, 0, "Fail I/O requests on connection failure");
|
|
|
|
MALLOC_DEFINE(M_NVMF, "nvmf", "NVMe over Fabrics host");
|
|
|
|
static void nvmf_disconnect_task(void *arg, int pending);
|
|
static void nvmf_shutdown_pre_sync(void *arg, int howto);
|
|
static void nvmf_shutdown_post_sync(void *arg, int howto);
|
|
|
|
void
|
|
nvmf_complete(void *arg, const struct nvme_completion *cqe)
|
|
{
|
|
struct nvmf_completion_status *status = arg;
|
|
struct mtx *mtx;
|
|
|
|
status->cqe = *cqe;
|
|
mtx = mtx_pool_find(mtxpool_sleep, status);
|
|
mtx_lock(mtx);
|
|
status->done = true;
|
|
mtx_unlock(mtx);
|
|
wakeup(status);
|
|
}
|
|
|
|
void
|
|
nvmf_io_complete(void *arg, size_t xfered, int error)
|
|
{
|
|
struct nvmf_completion_status *status = arg;
|
|
struct mtx *mtx;
|
|
|
|
status->io_error = error;
|
|
mtx = mtx_pool_find(mtxpool_sleep, status);
|
|
mtx_lock(mtx);
|
|
status->io_done = true;
|
|
mtx_unlock(mtx);
|
|
wakeup(status);
|
|
}
|
|
|
|
void
|
|
nvmf_wait_for_reply(struct nvmf_completion_status *status)
|
|
{
|
|
struct mtx *mtx;
|
|
|
|
mtx = mtx_pool_find(mtxpool_sleep, status);
|
|
mtx_lock(mtx);
|
|
while (!status->done || !status->io_done)
|
|
mtx_sleep(status, mtx, 0, "nvmfcmd", 0);
|
|
mtx_unlock(mtx);
|
|
}
|
|
|
|
static int
|
|
nvmf_read_property(struct nvmf_softc *sc, uint32_t offset, uint8_t size,
|
|
uint64_t *value)
|
|
{
|
|
const struct nvmf_fabric_prop_get_rsp *rsp;
|
|
struct nvmf_completion_status status;
|
|
|
|
nvmf_status_init(&status);
|
|
if (!nvmf_cmd_get_property(sc, offset, size, nvmf_complete, &status,
|
|
M_WAITOK))
|
|
return (ECONNABORTED);
|
|
nvmf_wait_for_reply(&status);
|
|
|
|
if (status.cqe.status != 0) {
|
|
device_printf(sc->dev, "PROPERTY_GET failed, status %#x\n",
|
|
le16toh(status.cqe.status));
|
|
return (EIO);
|
|
}
|
|
|
|
rsp = (const struct nvmf_fabric_prop_get_rsp *)&status.cqe;
|
|
if (size == 8)
|
|
*value = le64toh(rsp->value.u64);
|
|
else
|
|
*value = le32toh(rsp->value.u32.low);
|
|
return (0);
|
|
}
|
|
|
|
static int
|
|
nvmf_write_property(struct nvmf_softc *sc, uint32_t offset, uint8_t size,
|
|
uint64_t value)
|
|
{
|
|
struct nvmf_completion_status status;
|
|
|
|
nvmf_status_init(&status);
|
|
if (!nvmf_cmd_set_property(sc, offset, size, value, nvmf_complete, &status,
|
|
M_WAITOK))
|
|
return (ECONNABORTED);
|
|
nvmf_wait_for_reply(&status);
|
|
|
|
if (status.cqe.status != 0) {
|
|
device_printf(sc->dev, "PROPERTY_SET failed, status %#x\n",
|
|
le16toh(status.cqe.status));
|
|
return (EIO);
|
|
}
|
|
return (0);
|
|
}
|
|
|
|
static void
|
|
nvmf_shutdown_controller(struct nvmf_softc *sc)
|
|
{
|
|
uint64_t cc;
|
|
int error;
|
|
|
|
error = nvmf_read_property(sc, NVMF_PROP_CC, 4, &cc);
|
|
if (error != 0) {
|
|
device_printf(sc->dev, "Failed to fetch CC for shutdown\n");
|
|
return;
|
|
}
|
|
|
|
cc |= NVMEF(NVME_CC_REG_SHN, NVME_SHN_NORMAL);
|
|
|
|
error = nvmf_write_property(sc, NVMF_PROP_CC, 4, cc);
|
|
if (error != 0)
|
|
device_printf(sc->dev,
|
|
"Failed to set CC to trigger shutdown\n");
|
|
}
|
|
|
|
static void
|
|
nvmf_check_keep_alive(void *arg)
|
|
{
|
|
struct nvmf_softc *sc = arg;
|
|
int traffic;
|
|
|
|
traffic = atomic_readandclear_int(&sc->ka_active_rx_traffic);
|
|
if (traffic == 0) {
|
|
device_printf(sc->dev,
|
|
"disconnecting due to KeepAlive timeout\n");
|
|
nvmf_disconnect(sc);
|
|
return;
|
|
}
|
|
|
|
callout_schedule_sbt(&sc->ka_rx_timer, sc->ka_rx_sbt, 0, C_HARDCLOCK);
|
|
}
|
|
|
|
static void
|
|
nvmf_keep_alive_complete(void *arg, const struct nvme_completion *cqe)
|
|
{
|
|
struct nvmf_softc *sc = arg;
|
|
|
|
atomic_store_int(&sc->ka_active_rx_traffic, 1);
|
|
if (cqe->status != 0) {
|
|
device_printf(sc->dev,
|
|
"KeepAlive response reported status %#x\n",
|
|
le16toh(cqe->status));
|
|
}
|
|
}
|
|
|
|
static void
|
|
nvmf_send_keep_alive(void *arg)
|
|
{
|
|
struct nvmf_softc *sc = arg;
|
|
int traffic;
|
|
|
|
/*
|
|
* Don't bother sending a KeepAlive command if TKAS is active
|
|
* and another command has been sent during the interval.
|
|
*/
|
|
traffic = atomic_load_int(&sc->ka_active_tx_traffic);
|
|
if (traffic == 0 && !nvmf_cmd_keep_alive(sc, nvmf_keep_alive_complete,
|
|
sc, M_NOWAIT))
|
|
device_printf(sc->dev,
|
|
"Failed to allocate KeepAlive command\n");
|
|
|
|
/* Clear ka_active_tx_traffic after sending the keep alive command. */
|
|
atomic_store_int(&sc->ka_active_tx_traffic, 0);
|
|
|
|
callout_schedule_sbt(&sc->ka_tx_timer, sc->ka_tx_sbt, 0, C_HARDCLOCK);
|
|
}
|
|
|
|
int
|
|
nvmf_init_ivars(struct nvmf_ivars *ivars, struct nvmf_handoff_host *hh)
|
|
{
|
|
size_t len;
|
|
u_int i;
|
|
int error;
|
|
|
|
memset(ivars, 0, sizeof(*ivars));
|
|
|
|
if (!hh->admin.admin || hh->num_io_queues < 1)
|
|
return (EINVAL);
|
|
|
|
ivars->cdata = malloc(sizeof(*ivars->cdata), M_NVMF, M_WAITOK);
|
|
error = copyin(hh->cdata, ivars->cdata, sizeof(*ivars->cdata));
|
|
if (error != 0)
|
|
goto out;
|
|
nvme_controller_data_swapbytes(ivars->cdata);
|
|
|
|
len = hh->num_io_queues * sizeof(*ivars->io_params);
|
|
ivars->io_params = malloc(len, M_NVMF, M_WAITOK);
|
|
error = copyin(hh->io, ivars->io_params, len);
|
|
if (error != 0)
|
|
goto out;
|
|
for (i = 0; i < hh->num_io_queues; i++) {
|
|
if (ivars->io_params[i].admin) {
|
|
error = EINVAL;
|
|
goto out;
|
|
}
|
|
|
|
/* Require all I/O queues to be the same size. */
|
|
if (ivars->io_params[i].qsize != ivars->io_params[0].qsize) {
|
|
error = EINVAL;
|
|
goto out;
|
|
}
|
|
}
|
|
|
|
ivars->hh = hh;
|
|
return (0);
|
|
|
|
out:
|
|
free(ivars->io_params, M_NVMF);
|
|
free(ivars->cdata, M_NVMF);
|
|
return (error);
|
|
}
|
|
|
|
void
|
|
nvmf_free_ivars(struct nvmf_ivars *ivars)
|
|
{
|
|
free(ivars->io_params, M_NVMF);
|
|
free(ivars->cdata, M_NVMF);
|
|
}
|
|
|
|
static int
|
|
nvmf_probe(device_t dev)
|
|
{
|
|
struct nvmf_ivars *ivars = device_get_ivars(dev);
|
|
|
|
if (ivars == NULL)
|
|
return (ENXIO);
|
|
|
|
device_set_descf(dev, "Fabrics: %.256s", ivars->cdata->subnqn);
|
|
return (BUS_PROBE_DEFAULT);
|
|
}
|
|
|
|
static int
|
|
nvmf_establish_connection(struct nvmf_softc *sc, struct nvmf_ivars *ivars)
|
|
{
|
|
char name[16];
|
|
|
|
/* Setup the admin queue. */
|
|
sc->admin = nvmf_init_qp(sc, ivars->hh->trtype, &ivars->hh->admin,
|
|
"admin queue");
|
|
if (sc->admin == NULL) {
|
|
device_printf(sc->dev, "Failed to setup admin queue\n");
|
|
return (ENXIO);
|
|
}
|
|
|
|
/* Setup I/O queues. */
|
|
sc->io = malloc(ivars->hh->num_io_queues * sizeof(*sc->io), M_NVMF,
|
|
M_WAITOK | M_ZERO);
|
|
sc->num_io_queues = ivars->hh->num_io_queues;
|
|
for (u_int i = 0; i < sc->num_io_queues; i++) {
|
|
snprintf(name, sizeof(name), "I/O queue %u", i);
|
|
sc->io[i] = nvmf_init_qp(sc, ivars->hh->trtype,
|
|
&ivars->io_params[i], name);
|
|
if (sc->io[i] == NULL) {
|
|
device_printf(sc->dev, "Failed to setup I/O queue %u\n",
|
|
i + 1);
|
|
return (ENXIO);
|
|
}
|
|
}
|
|
|
|
/* Start KeepAlive timers. */
|
|
if (ivars->hh->kato != 0) {
|
|
sc->ka_traffic = NVMEV(NVME_CTRLR_DATA_CTRATT_TBKAS,
|
|
sc->cdata->ctratt) != 0;
|
|
sc->ka_rx_sbt = mstosbt(ivars->hh->kato);
|
|
sc->ka_tx_sbt = sc->ka_rx_sbt / 2;
|
|
callout_reset_sbt(&sc->ka_rx_timer, sc->ka_rx_sbt, 0,
|
|
nvmf_check_keep_alive, sc, C_HARDCLOCK);
|
|
callout_reset_sbt(&sc->ka_tx_timer, sc->ka_tx_sbt, 0,
|
|
nvmf_send_keep_alive, sc, C_HARDCLOCK);
|
|
}
|
|
|
|
return (0);
|
|
}
|
|
|
|
typedef bool nvmf_scan_active_ns_cb(struct nvmf_softc *, uint32_t,
|
|
const struct nvme_namespace_data *, void *);
|
|
|
|
static bool
|
|
nvmf_scan_active_nslist(struct nvmf_softc *sc, struct nvme_ns_list *nslist,
|
|
struct nvme_namespace_data *data, uint32_t *nsidp,
|
|
nvmf_scan_active_ns_cb *cb, void *cb_arg)
|
|
{
|
|
struct nvmf_completion_status status;
|
|
uint32_t nsid;
|
|
|
|
nvmf_status_init(&status);
|
|
nvmf_status_wait_io(&status);
|
|
if (!nvmf_cmd_identify_active_namespaces(sc, *nsidp, nslist,
|
|
nvmf_complete, &status, nvmf_io_complete, &status, M_WAITOK)) {
|
|
device_printf(sc->dev,
|
|
"failed to send IDENTIFY active namespaces command\n");
|
|
return (false);
|
|
}
|
|
nvmf_wait_for_reply(&status);
|
|
|
|
if (status.cqe.status != 0) {
|
|
device_printf(sc->dev,
|
|
"IDENTIFY active namespaces failed, status %#x\n",
|
|
le16toh(status.cqe.status));
|
|
return (false);
|
|
}
|
|
|
|
if (status.io_error != 0) {
|
|
device_printf(sc->dev,
|
|
"IDENTIFY active namespaces failed with I/O error %d\n",
|
|
status.io_error);
|
|
return (false);
|
|
}
|
|
|
|
for (u_int i = 0; i < nitems(nslist->ns); i++) {
|
|
nsid = nslist->ns[i];
|
|
if (nsid == 0) {
|
|
*nsidp = 0;
|
|
return (true);
|
|
}
|
|
|
|
nvmf_status_init(&status);
|
|
nvmf_status_wait_io(&status);
|
|
if (!nvmf_cmd_identify_namespace(sc, nsid, data, nvmf_complete,
|
|
&status, nvmf_io_complete, &status, M_WAITOK)) {
|
|
device_printf(sc->dev,
|
|
"failed to send IDENTIFY namespace %u command\n",
|
|
nsid);
|
|
return (false);
|
|
}
|
|
nvmf_wait_for_reply(&status);
|
|
|
|
if (status.cqe.status != 0) {
|
|
device_printf(sc->dev,
|
|
"IDENTIFY namespace %u failed, status %#x\n", nsid,
|
|
le16toh(status.cqe.status));
|
|
return (false);
|
|
}
|
|
|
|
if (status.io_error != 0) {
|
|
device_printf(sc->dev,
|
|
"IDENTIFY namespace %u failed with I/O error %d\n",
|
|
nsid, status.io_error);
|
|
return (false);
|
|
}
|
|
|
|
nvme_namespace_data_swapbytes(data);
|
|
if (!cb(sc, nsid, data, cb_arg))
|
|
return (false);
|
|
}
|
|
|
|
MPASS(nsid == nslist->ns[nitems(nslist->ns) - 1] && nsid != 0);
|
|
|
|
if (nsid >= 0xfffffffd)
|
|
*nsidp = 0;
|
|
else
|
|
*nsidp = nsid + 1;
|
|
return (true);
|
|
}
|
|
|
|
static bool
|
|
nvmf_scan_active_namespaces(struct nvmf_softc *sc, nvmf_scan_active_ns_cb *cb,
|
|
void *cb_arg)
|
|
{
|
|
struct nvme_namespace_data *data;
|
|
struct nvme_ns_list *nslist;
|
|
uint32_t nsid;
|
|
bool retval;
|
|
|
|
nslist = malloc(sizeof(*nslist), M_NVMF, M_WAITOK);
|
|
data = malloc(sizeof(*data), M_NVMF, M_WAITOK);
|
|
|
|
nsid = 0;
|
|
retval = true;
|
|
for (;;) {
|
|
if (!nvmf_scan_active_nslist(sc, nslist, data, &nsid, cb,
|
|
cb_arg)) {
|
|
retval = false;
|
|
break;
|
|
}
|
|
if (nsid == 0)
|
|
break;
|
|
}
|
|
|
|
free(data, M_NVMF);
|
|
free(nslist, M_NVMF);
|
|
return (retval);
|
|
}
|
|
|
|
static bool
|
|
nvmf_add_ns(struct nvmf_softc *sc, uint32_t nsid,
|
|
const struct nvme_namespace_data *data, void *arg __unused)
|
|
{
|
|
if (sc->ns[nsid - 1] != NULL) {
|
|
device_printf(sc->dev,
|
|
"duplicate namespace %u in active namespace list\n",
|
|
nsid);
|
|
return (false);
|
|
}
|
|
|
|
/*
|
|
* As in nvme_ns_construct, a size of zero indicates an
|
|
* invalid namespace.
|
|
*/
|
|
if (data->nsze == 0) {
|
|
device_printf(sc->dev,
|
|
"ignoring active namespace %u with zero size\n", nsid);
|
|
return (true);
|
|
}
|
|
|
|
sc->ns[nsid - 1] = nvmf_init_ns(sc, nsid, data);
|
|
|
|
nvmf_sim_rescan_ns(sc, nsid);
|
|
return (true);
|
|
}
|
|
|
|
static bool
|
|
nvmf_add_namespaces(struct nvmf_softc *sc)
|
|
{
|
|
sc->ns = mallocarray(sc->cdata->nn, sizeof(*sc->ns), M_NVMF,
|
|
M_WAITOK | M_ZERO);
|
|
return (nvmf_scan_active_namespaces(sc, nvmf_add_ns, NULL));
|
|
}
|
|
|
|
static int
|
|
nvmf_attach(device_t dev)
|
|
{
|
|
struct make_dev_args mda;
|
|
struct nvmf_softc *sc = device_get_softc(dev);
|
|
struct nvmf_ivars *ivars = device_get_ivars(dev);
|
|
uint64_t val;
|
|
u_int i;
|
|
int error;
|
|
|
|
if (ivars == NULL)
|
|
return (ENXIO);
|
|
|
|
sc->dev = dev;
|
|
sc->trtype = ivars->hh->trtype;
|
|
callout_init(&sc->ka_rx_timer, 1);
|
|
callout_init(&sc->ka_tx_timer, 1);
|
|
sx_init(&sc->connection_lock, "nvmf connection");
|
|
TASK_INIT(&sc->disconnect_task, 0, nvmf_disconnect_task, sc);
|
|
|
|
/* Claim the cdata pointer from ivars. */
|
|
sc->cdata = ivars->cdata;
|
|
ivars->cdata = NULL;
|
|
|
|
nvmf_init_aer(sc);
|
|
|
|
/* TODO: Multiqueue support. */
|
|
sc->max_pending_io = ivars->io_params[0].qsize /* * sc->num_io_queues */;
|
|
|
|
error = nvmf_establish_connection(sc, ivars);
|
|
if (error != 0)
|
|
goto out;
|
|
|
|
error = nvmf_read_property(sc, NVMF_PROP_CAP, 8, &sc->cap);
|
|
if (error != 0) {
|
|
device_printf(sc->dev, "Failed to fetch CAP\n");
|
|
error = ENXIO;
|
|
goto out;
|
|
}
|
|
|
|
error = nvmf_read_property(sc, NVMF_PROP_VS, 4, &val);
|
|
if (error != 0) {
|
|
device_printf(sc->dev, "Failed to fetch VS\n");
|
|
error = ENXIO;
|
|
goto out;
|
|
}
|
|
sc->vs = val;
|
|
|
|
/* Honor MDTS if it is set. */
|
|
sc->max_xfer_size = maxphys;
|
|
if (sc->cdata->mdts != 0) {
|
|
sc->max_xfer_size = ulmin(sc->max_xfer_size,
|
|
1 << (sc->cdata->mdts + NVME_MPS_SHIFT +
|
|
NVME_CAP_HI_MPSMIN(sc->cap >> 32)));
|
|
}
|
|
|
|
error = nvmf_init_sim(sc);
|
|
if (error != 0)
|
|
goto out;
|
|
|
|
error = nvmf_start_aer(sc);
|
|
if (error != 0) {
|
|
nvmf_destroy_sim(sc);
|
|
goto out;
|
|
}
|
|
|
|
if (!nvmf_add_namespaces(sc)) {
|
|
nvmf_destroy_sim(sc);
|
|
goto out;
|
|
}
|
|
|
|
make_dev_args_init(&mda);
|
|
mda.mda_devsw = &nvmf_cdevsw;
|
|
mda.mda_uid = UID_ROOT;
|
|
mda.mda_gid = GID_WHEEL;
|
|
mda.mda_mode = 0600;
|
|
mda.mda_si_drv1 = sc;
|
|
error = make_dev_s(&mda, &sc->cdev, "%s", device_get_nameunit(dev));
|
|
if (error != 0) {
|
|
nvmf_destroy_sim(sc);
|
|
goto out;
|
|
}
|
|
|
|
sc->shutdown_pre_sync_eh = EVENTHANDLER_REGISTER(shutdown_pre_sync,
|
|
nvmf_shutdown_pre_sync, sc, SHUTDOWN_PRI_FIRST);
|
|
sc->shutdown_post_sync_eh = EVENTHANDLER_REGISTER(shutdown_post_sync,
|
|
nvmf_shutdown_post_sync, sc, SHUTDOWN_PRI_FIRST);
|
|
|
|
return (0);
|
|
out:
|
|
if (sc->ns != NULL) {
|
|
for (i = 0; i < sc->cdata->nn; i++) {
|
|
if (sc->ns[i] != NULL)
|
|
nvmf_destroy_ns(sc->ns[i]);
|
|
}
|
|
free(sc->ns, M_NVMF);
|
|
}
|
|
|
|
callout_drain(&sc->ka_tx_timer);
|
|
callout_drain(&sc->ka_rx_timer);
|
|
|
|
if (sc->admin != NULL)
|
|
nvmf_shutdown_controller(sc);
|
|
|
|
for (i = 0; i < sc->num_io_queues; i++) {
|
|
if (sc->io[i] != NULL)
|
|
nvmf_destroy_qp(sc->io[i]);
|
|
}
|
|
free(sc->io, M_NVMF);
|
|
if (sc->admin != NULL)
|
|
nvmf_destroy_qp(sc->admin);
|
|
|
|
nvmf_destroy_aer(sc);
|
|
|
|
taskqueue_drain(taskqueue_thread, &sc->disconnect_task);
|
|
sx_destroy(&sc->connection_lock);
|
|
free(sc->cdata, M_NVMF);
|
|
return (error);
|
|
}
|
|
|
|
void
|
|
nvmf_disconnect(struct nvmf_softc *sc)
|
|
{
|
|
taskqueue_enqueue(taskqueue_thread, &sc->disconnect_task);
|
|
}
|
|
|
|
static void
|
|
nvmf_disconnect_task(void *arg, int pending __unused)
|
|
{
|
|
struct nvmf_softc *sc = arg;
|
|
u_int i;
|
|
|
|
sx_xlock(&sc->connection_lock);
|
|
if (sc->admin == NULL) {
|
|
/*
|
|
* Ignore transport errors if there is no active
|
|
* association.
|
|
*/
|
|
sx_xunlock(&sc->connection_lock);
|
|
return;
|
|
}
|
|
|
|
if (sc->detaching) {
|
|
if (sc->admin != NULL) {
|
|
/*
|
|
* This unsticks the detach process if a
|
|
* transport error occurs during detach.
|
|
*/
|
|
nvmf_shutdown_qp(sc->admin);
|
|
}
|
|
sx_xunlock(&sc->connection_lock);
|
|
return;
|
|
}
|
|
|
|
if (sc->cdev == NULL) {
|
|
/*
|
|
* Transport error occurred during attach (nvmf_add_namespaces).
|
|
* Shutdown the admin queue.
|
|
*/
|
|
nvmf_shutdown_qp(sc->admin);
|
|
sx_xunlock(&sc->connection_lock);
|
|
return;
|
|
}
|
|
|
|
callout_drain(&sc->ka_tx_timer);
|
|
callout_drain(&sc->ka_rx_timer);
|
|
sc->ka_traffic = false;
|
|
|
|
/* Quiesce namespace consumers. */
|
|
nvmf_disconnect_sim(sc);
|
|
for (i = 0; i < sc->cdata->nn; i++) {
|
|
if (sc->ns[i] != NULL)
|
|
nvmf_disconnect_ns(sc->ns[i]);
|
|
}
|
|
|
|
/* Shutdown the existing qpairs. */
|
|
for (i = 0; i < sc->num_io_queues; i++) {
|
|
nvmf_destroy_qp(sc->io[i]);
|
|
}
|
|
free(sc->io, M_NVMF);
|
|
sc->io = NULL;
|
|
sc->num_io_queues = 0;
|
|
nvmf_destroy_qp(sc->admin);
|
|
sc->admin = NULL;
|
|
|
|
sx_xunlock(&sc->connection_lock);
|
|
}
|
|
|
|
static int
|
|
nvmf_reconnect_host(struct nvmf_softc *sc, struct nvmf_handoff_host *hh)
|
|
{
|
|
struct nvmf_ivars ivars;
|
|
u_int i;
|
|
int error;
|
|
|
|
/* XXX: Should we permit changing the transport type? */
|
|
if (sc->trtype != hh->trtype) {
|
|
device_printf(sc->dev,
|
|
"transport type mismatch on reconnect\n");
|
|
return (EINVAL);
|
|
}
|
|
|
|
error = nvmf_init_ivars(&ivars, hh);
|
|
if (error != 0)
|
|
return (error);
|
|
|
|
sx_xlock(&sc->connection_lock);
|
|
if (sc->admin != NULL || sc->detaching) {
|
|
error = EBUSY;
|
|
goto out;
|
|
}
|
|
|
|
/*
|
|
* Ensure this is for the same controller. Note that the
|
|
* controller ID can vary across associations if the remote
|
|
* system is using the dynamic controller model. This merely
|
|
* ensures the new association is connected to the same NVMe
|
|
* subsystem.
|
|
*/
|
|
if (memcmp(sc->cdata->subnqn, ivars.cdata->subnqn,
|
|
sizeof(ivars.cdata->subnqn)) != 0) {
|
|
device_printf(sc->dev,
|
|
"controller subsystem NQN mismatch on reconnect\n");
|
|
error = EINVAL;
|
|
goto out;
|
|
}
|
|
|
|
/*
|
|
* XXX: Require same number and size of I/O queues so that
|
|
* max_pending_io is still correct?
|
|
*/
|
|
|
|
error = nvmf_establish_connection(sc, &ivars);
|
|
if (error != 0)
|
|
goto out;
|
|
|
|
error = nvmf_start_aer(sc);
|
|
if (error != 0)
|
|
goto out;
|
|
|
|
device_printf(sc->dev,
|
|
"established new association with %u I/O queues\n",
|
|
sc->num_io_queues);
|
|
|
|
/* Restart namespace consumers. */
|
|
for (i = 0; i < sc->cdata->nn; i++) {
|
|
if (sc->ns[i] != NULL)
|
|
nvmf_reconnect_ns(sc->ns[i]);
|
|
}
|
|
nvmf_reconnect_sim(sc);
|
|
|
|
nvmf_rescan_all_ns(sc);
|
|
out:
|
|
sx_xunlock(&sc->connection_lock);
|
|
nvmf_free_ivars(&ivars);
|
|
return (error);
|
|
}
|
|
|
|
static void
|
|
nvmf_shutdown_pre_sync(void *arg, int howto)
|
|
{
|
|
struct nvmf_softc *sc = arg;
|
|
|
|
if ((howto & RB_NOSYNC) != 0 || SCHEDULER_STOPPED())
|
|
return;
|
|
|
|
/*
|
|
* If this association is disconnected, abort any pending
|
|
* requests with an error to permit filesystems to unmount
|
|
* without hanging.
|
|
*/
|
|
sx_xlock(&sc->connection_lock);
|
|
if (sc->admin != NULL || sc->detaching) {
|
|
sx_xunlock(&sc->connection_lock);
|
|
return;
|
|
}
|
|
|
|
for (u_int i = 0; i < sc->cdata->nn; i++) {
|
|
if (sc->ns[i] != NULL)
|
|
nvmf_shutdown_ns(sc->ns[i]);
|
|
}
|
|
nvmf_shutdown_sim(sc);
|
|
sx_xunlock(&sc->connection_lock);
|
|
}
|
|
|
|
static void
|
|
nvmf_shutdown_post_sync(void *arg, int howto)
|
|
{
|
|
struct nvmf_softc *sc = arg;
|
|
|
|
if ((howto & RB_NOSYNC) != 0 || SCHEDULER_STOPPED())
|
|
return;
|
|
|
|
/*
|
|
* If this association is connected, disconnect gracefully.
|
|
*/
|
|
sx_xlock(&sc->connection_lock);
|
|
if (sc->admin == NULL || sc->detaching) {
|
|
sx_xunlock(&sc->connection_lock);
|
|
return;
|
|
}
|
|
|
|
callout_drain(&sc->ka_tx_timer);
|
|
callout_drain(&sc->ka_rx_timer);
|
|
|
|
nvmf_shutdown_controller(sc);
|
|
for (u_int i = 0; i < sc->num_io_queues; i++) {
|
|
nvmf_destroy_qp(sc->io[i]);
|
|
}
|
|
nvmf_destroy_qp(sc->admin);
|
|
sc->admin = NULL;
|
|
sx_xunlock(&sc->connection_lock);
|
|
}
|
|
|
|
static int
|
|
nvmf_detach(device_t dev)
|
|
{
|
|
struct nvmf_softc *sc = device_get_softc(dev);
|
|
u_int i;
|
|
|
|
destroy_dev(sc->cdev);
|
|
|
|
sx_xlock(&sc->connection_lock);
|
|
sc->detaching = true;
|
|
sx_xunlock(&sc->connection_lock);
|
|
|
|
EVENTHANDLER_DEREGISTER(shutdown_pre_sync, sc->shutdown_pre_sync_eh);
|
|
EVENTHANDLER_DEREGISTER(shutdown_pre_sync, sc->shutdown_post_sync_eh);
|
|
|
|
nvmf_destroy_sim(sc);
|
|
for (i = 0; i < sc->cdata->nn; i++) {
|
|
if (sc->ns[i] != NULL)
|
|
nvmf_destroy_ns(sc->ns[i]);
|
|
}
|
|
free(sc->ns, M_NVMF);
|
|
|
|
callout_drain(&sc->ka_tx_timer);
|
|
callout_drain(&sc->ka_rx_timer);
|
|
|
|
if (sc->admin != NULL)
|
|
nvmf_shutdown_controller(sc);
|
|
|
|
for (i = 0; i < sc->num_io_queues; i++) {
|
|
nvmf_destroy_qp(sc->io[i]);
|
|
}
|
|
free(sc->io, M_NVMF);
|
|
|
|
taskqueue_drain(taskqueue_thread, &sc->disconnect_task);
|
|
|
|
if (sc->admin != NULL)
|
|
nvmf_destroy_qp(sc->admin);
|
|
|
|
nvmf_destroy_aer(sc);
|
|
|
|
sx_destroy(&sc->connection_lock);
|
|
free(sc->cdata, M_NVMF);
|
|
return (0);
|
|
}
|
|
|
|
static void
|
|
nvmf_rescan_ns_1(struct nvmf_softc *sc, uint32_t nsid,
|
|
const struct nvme_namespace_data *data)
|
|
{
|
|
struct nvmf_namespace *ns;
|
|
|
|
/* XXX: Needs locking around sc->ns[]. */
|
|
ns = sc->ns[nsid - 1];
|
|
if (data->nsze == 0) {
|
|
/* XXX: Needs locking */
|
|
if (ns != NULL) {
|
|
nvmf_destroy_ns(ns);
|
|
sc->ns[nsid - 1] = NULL;
|
|
}
|
|
} else {
|
|
/* XXX: Needs locking */
|
|
if (ns == NULL) {
|
|
sc->ns[nsid - 1] = nvmf_init_ns(sc, nsid, data);
|
|
} else {
|
|
if (!nvmf_update_ns(ns, data)) {
|
|
nvmf_destroy_ns(ns);
|
|
sc->ns[nsid - 1] = NULL;
|
|
}
|
|
}
|
|
}
|
|
|
|
nvmf_sim_rescan_ns(sc, nsid);
|
|
}
|
|
|
|
void
|
|
nvmf_rescan_ns(struct nvmf_softc *sc, uint32_t nsid)
|
|
{
|
|
struct nvmf_completion_status status;
|
|
struct nvme_namespace_data *data;
|
|
|
|
data = malloc(sizeof(*data), M_NVMF, M_WAITOK);
|
|
|
|
nvmf_status_init(&status);
|
|
nvmf_status_wait_io(&status);
|
|
if (!nvmf_cmd_identify_namespace(sc, nsid, data, nvmf_complete,
|
|
&status, nvmf_io_complete, &status, M_WAITOK)) {
|
|
device_printf(sc->dev,
|
|
"failed to send IDENTIFY namespace %u command\n", nsid);
|
|
free(data, M_NVMF);
|
|
return;
|
|
}
|
|
nvmf_wait_for_reply(&status);
|
|
|
|
if (status.cqe.status != 0) {
|
|
device_printf(sc->dev,
|
|
"IDENTIFY namespace %u failed, status %#x\n", nsid,
|
|
le16toh(status.cqe.status));
|
|
free(data, M_NVMF);
|
|
return;
|
|
}
|
|
|
|
if (status.io_error != 0) {
|
|
device_printf(sc->dev,
|
|
"IDENTIFY namespace %u failed with I/O error %d\n",
|
|
nsid, status.io_error);
|
|
free(data, M_NVMF);
|
|
return;
|
|
}
|
|
|
|
nvme_namespace_data_swapbytes(data);
|
|
|
|
nvmf_rescan_ns_1(sc, nsid, data);
|
|
|
|
free(data, M_NVMF);
|
|
}
|
|
|
|
static void
|
|
nvmf_purge_namespaces(struct nvmf_softc *sc, uint32_t first_nsid,
|
|
uint32_t next_valid_nsid)
|
|
{
|
|
struct nvmf_namespace *ns;
|
|
|
|
for (uint32_t nsid = first_nsid; nsid < next_valid_nsid; nsid++)
|
|
{
|
|
/* XXX: Needs locking around sc->ns[]. */
|
|
ns = sc->ns[nsid - 1];
|
|
if (ns != NULL) {
|
|
nvmf_destroy_ns(ns);
|
|
sc->ns[nsid - 1] = NULL;
|
|
|
|
nvmf_sim_rescan_ns(sc, nsid);
|
|
}
|
|
}
|
|
}
|
|
|
|
static bool
|
|
nvmf_rescan_ns_cb(struct nvmf_softc *sc, uint32_t nsid,
|
|
const struct nvme_namespace_data *data, void *arg)
|
|
{
|
|
uint32_t *last_nsid = arg;
|
|
|
|
/* Check for any gaps prior to this namespace. */
|
|
nvmf_purge_namespaces(sc, *last_nsid + 1, nsid);
|
|
*last_nsid = nsid;
|
|
|
|
nvmf_rescan_ns_1(sc, nsid, data);
|
|
return (true);
|
|
}
|
|
|
|
void
|
|
nvmf_rescan_all_ns(struct nvmf_softc *sc)
|
|
{
|
|
uint32_t last_nsid;
|
|
|
|
last_nsid = 0;
|
|
if (!nvmf_scan_active_namespaces(sc, nvmf_rescan_ns_cb, &last_nsid))
|
|
return;
|
|
|
|
/*
|
|
* Check for any namespace devices after the last active
|
|
* namespace.
|
|
*/
|
|
nvmf_purge_namespaces(sc, last_nsid + 1, sc->cdata->nn + 1);
|
|
}
|
|
|
|
int
|
|
nvmf_passthrough_cmd(struct nvmf_softc *sc, struct nvme_pt_command *pt,
|
|
bool admin)
|
|
{
|
|
struct nvmf_completion_status status;
|
|
struct nvme_command cmd;
|
|
struct memdesc mem;
|
|
struct nvmf_host_qpair *qp;
|
|
struct nvmf_request *req;
|
|
void *buf;
|
|
int error;
|
|
|
|
if (pt->len > sc->max_xfer_size)
|
|
return (EINVAL);
|
|
|
|
buf = NULL;
|
|
if (pt->len != 0) {
|
|
/*
|
|
* XXX: Depending on the size we may want to pin the
|
|
* user pages and use a memdesc with vm_page_t's
|
|
* instead.
|
|
*/
|
|
buf = malloc(pt->len, M_NVMF, M_WAITOK);
|
|
if (pt->is_read == 0) {
|
|
error = copyin(pt->buf, buf, pt->len);
|
|
if (error != 0) {
|
|
free(buf, M_NVMF);
|
|
return (error);
|
|
}
|
|
} else {
|
|
/* Ensure no kernel data is leaked to userland. */
|
|
memset(buf, 0, pt->len);
|
|
}
|
|
}
|
|
|
|
memset(&cmd, 0, sizeof(cmd));
|
|
cmd.opc = pt->cmd.opc;
|
|
cmd.fuse = pt->cmd.fuse;
|
|
cmd.nsid = pt->cmd.nsid;
|
|
cmd.cdw10 = pt->cmd.cdw10;
|
|
cmd.cdw11 = pt->cmd.cdw11;
|
|
cmd.cdw12 = pt->cmd.cdw12;
|
|
cmd.cdw13 = pt->cmd.cdw13;
|
|
cmd.cdw14 = pt->cmd.cdw14;
|
|
cmd.cdw15 = pt->cmd.cdw15;
|
|
|
|
if (admin)
|
|
qp = sc->admin;
|
|
else
|
|
qp = nvmf_select_io_queue(sc);
|
|
nvmf_status_init(&status);
|
|
req = nvmf_allocate_request(qp, &cmd, nvmf_complete, &status, M_WAITOK);
|
|
if (req == NULL) {
|
|
device_printf(sc->dev, "failed to send passthrough command\n");
|
|
error = ECONNABORTED;
|
|
goto error;
|
|
}
|
|
|
|
if (pt->len != 0) {
|
|
mem = memdesc_vaddr(buf, pt->len);
|
|
nvmf_capsule_append_data(req->nc, &mem, pt->len,
|
|
pt->is_read == 0, nvmf_io_complete, &status);
|
|
nvmf_status_wait_io(&status);
|
|
}
|
|
|
|
nvmf_submit_request(req);
|
|
nvmf_wait_for_reply(&status);
|
|
|
|
memset(&pt->cpl, 0, sizeof(pt->cpl));
|
|
pt->cpl.cdw0 = status.cqe.cdw0;
|
|
pt->cpl.status = status.cqe.status;
|
|
|
|
error = status.io_error;
|
|
if (error == 0 && pt->len != 0 && pt->is_read != 0)
|
|
error = copyout(buf, pt->buf, pt->len);
|
|
error:
|
|
free(buf, M_NVMF);
|
|
return (error);
|
|
}
|
|
|
|
static int
|
|
nvmf_ioctl(struct cdev *cdev, u_long cmd, caddr_t arg, int flag,
|
|
struct thread *td)
|
|
{
|
|
struct nvmf_softc *sc = cdev->si_drv1;
|
|
struct nvme_get_nsid *gnsid;
|
|
struct nvme_pt_command *pt;
|
|
struct nvmf_reconnect_params *rp;
|
|
struct nvmf_handoff_host *hh;
|
|
|
|
switch (cmd) {
|
|
case NVME_PASSTHROUGH_CMD:
|
|
pt = (struct nvme_pt_command *)arg;
|
|
return (nvmf_passthrough_cmd(sc, pt, true));
|
|
case NVME_GET_NSID:
|
|
gnsid = (struct nvme_get_nsid *)arg;
|
|
strlcpy(gnsid->cdev, device_get_nameunit(sc->dev),
|
|
sizeof(gnsid->cdev));
|
|
gnsid->nsid = 0;
|
|
return (0);
|
|
case NVME_GET_MAX_XFER_SIZE:
|
|
*(uint64_t *)arg = sc->max_xfer_size;
|
|
return (0);
|
|
case NVMF_RECONNECT_PARAMS:
|
|
rp = (struct nvmf_reconnect_params *)arg;
|
|
if ((sc->cdata->fcatt & 1) == 0)
|
|
rp->cntlid = NVMF_CNTLID_DYNAMIC;
|
|
else
|
|
rp->cntlid = sc->cdata->ctrlr_id;
|
|
memcpy(rp->subnqn, sc->cdata->subnqn, sizeof(rp->subnqn));
|
|
return (0);
|
|
case NVMF_RECONNECT_HOST:
|
|
hh = (struct nvmf_handoff_host *)arg;
|
|
return (nvmf_reconnect_host(sc, hh));
|
|
default:
|
|
return (ENOTTY);
|
|
}
|
|
}
|
|
|
|
static struct cdevsw nvmf_cdevsw = {
|
|
.d_version = D_VERSION,
|
|
.d_ioctl = nvmf_ioctl
|
|
};
|
|
|
|
static int
|
|
nvmf_modevent(module_t mod, int what, void *arg)
|
|
{
|
|
switch (what) {
|
|
case MOD_LOAD:
|
|
return (nvmf_ctl_load());
|
|
case MOD_QUIESCE:
|
|
return (0);
|
|
case MOD_UNLOAD:
|
|
nvmf_ctl_unload();
|
|
destroy_dev_drain(&nvmf_cdevsw);
|
|
return (0);
|
|
default:
|
|
return (EOPNOTSUPP);
|
|
}
|
|
}
|
|
|
|
static device_method_t nvmf_methods[] = {
|
|
/* Device interface */
|
|
DEVMETHOD(device_probe, nvmf_probe),
|
|
DEVMETHOD(device_attach, nvmf_attach),
|
|
DEVMETHOD(device_detach, nvmf_detach),
|
|
DEVMETHOD_END
|
|
};
|
|
|
|
driver_t nvme_nvmf_driver = {
|
|
"nvme",
|
|
nvmf_methods,
|
|
sizeof(struct nvmf_softc),
|
|
};
|
|
|
|
DRIVER_MODULE(nvme, root, nvme_nvmf_driver, nvmf_modevent, NULL);
|
|
MODULE_DEPEND(nvmf, nvmf_transport, 1, 1, 1);
|