opnsense-src/sys/dev/vmm/vmm_dev.c
Mark Johnston c76c2a19ae vmm: Consolidate code which manages guest memory regions
On all three platforms supported by vmm, we have mostly duplicated code
to manage guest physical memory regions.  Deduplicate much of this code
and move it into sys/dev/vmm/vmm_mem.c.

To avoid exporting struct vm outside of machdep vmm.c, add a new
struct vm_mem to contain the memory segment descriptors, and add a
vm_mem() accessor, akin to vm_vmspace().  This way vmm_mem.c can
implement its routines without needing to see the layout of struct vm.

The handling of the per-VM vmspace is also duplicated but will be moved
to vmm_mem.c in a follow-up patch.

On amd64, move the ppt_is_mmio() check out of vm_mem_allocated() to keep
the code MI, as PPT is only implemented on amd64.  There are only a
couple of callers, so this is not unreasonable.

No functional change intended.

Reviewed by:	jhb
Differential Revision:	https://reviews.freebsd.org/D48270
2025-02-18 16:00:07 +00:00

1110 lines
25 KiB
C

/*-
* SPDX-License-Identifier: BSD-2-Clause
*
* Copyright (c) 2011 NetApp, Inc.
* Copyright (C) 2015 Mihai Carabas <mihai.carabas@gmail.com>
* All rights reserved.
*/
#include <sys/param.h>
#include <sys/conf.h>
#include <sys/fcntl.h>
#include <sys/ioccom.h>
#include <sys/jail.h>
#include <sys/kernel.h>
#include <sys/malloc.h>
#include <sys/mman.h>
#include <sys/proc.h>
#include <sys/queue.h>
#include <sys/sx.h>
#include <sys/sysctl.h>
#include <sys/ucred.h>
#include <sys/uio.h>
#include <machine/vmm.h>
#include <vm/vm.h>
#include <vm/vm_object.h>
#include <dev/vmm/vmm_dev.h>
#include <dev/vmm/vmm_mem.h>
#include <dev/vmm/vmm_stat.h>
#if defined(__amd64__) && defined(COMPAT_FREEBSD12)
struct vm_memseg_12 {
int segid;
size_t len;
char name[64];
};
_Static_assert(sizeof(struct vm_memseg_12) == 80, "COMPAT_FREEBSD12 ABI");
#define VM_ALLOC_MEMSEG_12 \
_IOW('v', IOCNUM_ALLOC_MEMSEG, struct vm_memseg_12)
#define VM_GET_MEMSEG_12 \
_IOWR('v', IOCNUM_GET_MEMSEG, struct vm_memseg_12)
#endif
struct devmem_softc {
int segid;
char *name;
struct cdev *cdev;
struct vmmdev_softc *sc;
SLIST_ENTRY(devmem_softc) link;
};
struct vmmdev_softc {
struct vm *vm; /* vm instance cookie */
struct cdev *cdev;
struct ucred *ucred;
SLIST_ENTRY(vmmdev_softc) link;
SLIST_HEAD(, devmem_softc) devmem;
int flags;
};
static SLIST_HEAD(, vmmdev_softc) head;
static unsigned pr_allow_flag;
static struct sx vmmdev_mtx;
SX_SYSINIT(vmmdev_mtx, &vmmdev_mtx, "vmm device mutex");
static MALLOC_DEFINE(M_VMMDEV, "vmmdev", "vmmdev");
SYSCTL_DECL(_hw_vmm);
static void devmem_destroy(void *arg);
static int devmem_create_cdev(struct vmmdev_softc *sc, int id, char *devmem);
static int
vmm_priv_check(struct ucred *ucred)
{
if (jailed(ucred) &&
!(ucred->cr_prison->pr_allow & pr_allow_flag))
return (EPERM);
return (0);
}
static int
vcpu_lock_one(struct vcpu *vcpu)
{
return (vcpu_set_state(vcpu, VCPU_FROZEN, true));
}
static void
vcpu_unlock_one(struct vcpu *vcpu)
{
enum vcpu_state state;
state = vcpu_get_state(vcpu, NULL);
if (state != VCPU_FROZEN) {
panic("vcpu %s(%d) has invalid state %d",
vm_name(vcpu_vm(vcpu)), vcpu_vcpuid(vcpu), state);
}
vcpu_set_state(vcpu, VCPU_IDLE, false);
}
static int
vcpu_lock_all(struct vmmdev_softc *sc)
{
struct vcpu *vcpu;
int error;
uint16_t i, j, maxcpus;
error = 0;
vm_slock_vcpus(sc->vm);
maxcpus = vm_get_maxcpus(sc->vm);
for (i = 0; i < maxcpus; i++) {
vcpu = vm_vcpu(sc->vm, i);
if (vcpu == NULL)
continue;
error = vcpu_lock_one(vcpu);
if (error)
break;
}
if (error) {
for (j = 0; j < i; j++) {
vcpu = vm_vcpu(sc->vm, j);
if (vcpu == NULL)
continue;
vcpu_unlock_one(vcpu);
}
vm_unlock_vcpus(sc->vm);
}
return (error);
}
static void
vcpu_unlock_all(struct vmmdev_softc *sc)
{
struct vcpu *vcpu;
uint16_t i, maxcpus;
maxcpus = vm_get_maxcpus(sc->vm);
for (i = 0; i < maxcpus; i++) {
vcpu = vm_vcpu(sc->vm, i);
if (vcpu == NULL)
continue;
vcpu_unlock_one(vcpu);
}
vm_unlock_vcpus(sc->vm);
}
static struct vmmdev_softc *
vmmdev_lookup(const char *name, struct ucred *cred)
{
struct vmmdev_softc *sc;
sx_assert(&vmmdev_mtx, SA_XLOCKED);
SLIST_FOREACH(sc, &head, link) {
if (strcmp(name, vm_name(sc->vm)) == 0)
break;
}
if (sc == NULL)
return (NULL);
if (cr_cansee(cred, sc->ucred))
return (NULL);
return (sc);
}
static struct vmmdev_softc *
vmmdev_lookup2(struct cdev *cdev)
{
return (cdev->si_drv1);
}
static int
vmmdev_rw(struct cdev *cdev, struct uio *uio, int flags)
{
int error, off, c, prot;
vm_paddr_t gpa, maxaddr;
void *hpa, *cookie;
struct vmmdev_softc *sc;
sc = vmmdev_lookup2(cdev);
if (sc == NULL)
return (ENXIO);
/*
* Get a read lock on the guest memory map.
*/
vm_slock_memsegs(sc->vm);
error = 0;
prot = (uio->uio_rw == UIO_WRITE ? VM_PROT_WRITE : VM_PROT_READ);
maxaddr = vmm_sysmem_maxaddr(sc->vm);
while (uio->uio_resid > 0 && error == 0) {
gpa = uio->uio_offset;
off = gpa & PAGE_MASK;
c = min(uio->uio_resid, PAGE_SIZE - off);
/*
* The VM has a hole in its physical memory map. If we want to
* use 'dd' to inspect memory beyond the hole we need to
* provide bogus data for memory that lies in the hole.
*
* Since this device does not support lseek(2), dd(1) will
* read(2) blocks of data to simulate the lseek(2).
*/
hpa = vm_gpa_hold_global(sc->vm, gpa, c, prot, &cookie);
if (hpa == NULL) {
if (uio->uio_rw == UIO_READ && gpa < maxaddr)
error = uiomove(__DECONST(void *, zero_region),
c, uio);
else
error = EFAULT;
} else {
error = uiomove(hpa, c, uio);
vm_gpa_release(cookie);
}
}
vm_unlock_memsegs(sc->vm);
return (error);
}
CTASSERT(sizeof(((struct vm_memseg *)0)->name) >= VM_MAX_SUFFIXLEN + 1);
static int
get_memseg(struct vmmdev_softc *sc, struct vm_memseg *mseg, size_t len)
{
struct devmem_softc *dsc;
int error;
bool sysmem;
error = vm_get_memseg(sc->vm, mseg->segid, &mseg->len, &sysmem, NULL);
if (error || mseg->len == 0)
return (error);
if (!sysmem) {
SLIST_FOREACH(dsc, &sc->devmem, link) {
if (dsc->segid == mseg->segid)
break;
}
KASSERT(dsc != NULL, ("%s: devmem segment %d not found",
__func__, mseg->segid));
error = copystr(dsc->name, mseg->name, len, NULL);
} else {
bzero(mseg->name, len);
}
return (error);
}
static int
alloc_memseg(struct vmmdev_softc *sc, struct vm_memseg *mseg, size_t len)
{
char *name;
int error;
bool sysmem;
error = 0;
name = NULL;
sysmem = true;
/*
* The allocation is lengthened by 1 to hold a terminating NUL. It'll
* by stripped off when devfs processes the full string.
*/
if (VM_MEMSEG_NAME(mseg)) {
sysmem = false;
name = malloc(len, M_VMMDEV, M_WAITOK);
error = copystr(mseg->name, name, len, NULL);
if (error)
goto done;
}
error = vm_alloc_memseg(sc->vm, mseg->segid, mseg->len, sysmem);
if (error)
goto done;
if (VM_MEMSEG_NAME(mseg)) {
error = devmem_create_cdev(sc, mseg->segid, name);
if (error)
vm_free_memseg(sc->vm, mseg->segid);
else
name = NULL; /* freed when 'cdev' is destroyed */
}
done:
free(name, M_VMMDEV);
return (error);
}
static int
vm_get_register_set(struct vcpu *vcpu, unsigned int count, int *regnum,
uint64_t *regval)
{
int error, i;
error = 0;
for (i = 0; i < count; i++) {
error = vm_get_register(vcpu, regnum[i], &regval[i]);
if (error)
break;
}
return (error);
}
static int
vm_set_register_set(struct vcpu *vcpu, unsigned int count, int *regnum,
uint64_t *regval)
{
int error, i;
error = 0;
for (i = 0; i < count; i++) {
error = vm_set_register(vcpu, regnum[i], regval[i]);
if (error)
break;
}
return (error);
}
static int
vmmdev_open(struct cdev *dev, int flags, int fmt, struct thread *td)
{
int error;
/*
* A jail without vmm access shouldn't be able to access vmm device
* files at all, but check here just to be thorough.
*/
error = vmm_priv_check(td->td_ucred);
if (error != 0)
return (error);
return (0);
}
static const struct vmmdev_ioctl vmmdev_ioctls[] = {
VMMDEV_IOCTL(VM_GET_REGISTER, VMMDEV_IOCTL_LOCK_ONE_VCPU),
VMMDEV_IOCTL(VM_SET_REGISTER, VMMDEV_IOCTL_LOCK_ONE_VCPU),
VMMDEV_IOCTL(VM_GET_REGISTER_SET, VMMDEV_IOCTL_LOCK_ONE_VCPU),
VMMDEV_IOCTL(VM_SET_REGISTER_SET, VMMDEV_IOCTL_LOCK_ONE_VCPU),
VMMDEV_IOCTL(VM_GET_CAPABILITY, VMMDEV_IOCTL_LOCK_ONE_VCPU),
VMMDEV_IOCTL(VM_SET_CAPABILITY, VMMDEV_IOCTL_LOCK_ONE_VCPU),
VMMDEV_IOCTL(VM_ACTIVATE_CPU, VMMDEV_IOCTL_LOCK_ONE_VCPU),
VMMDEV_IOCTL(VM_INJECT_EXCEPTION, VMMDEV_IOCTL_LOCK_ONE_VCPU),
VMMDEV_IOCTL(VM_STATS, VMMDEV_IOCTL_LOCK_ONE_VCPU),
#if defined(__amd64__) && defined(COMPAT_FREEBSD12)
VMMDEV_IOCTL(VM_ALLOC_MEMSEG_12,
VMMDEV_IOCTL_XLOCK_MEMSEGS | VMMDEV_IOCTL_LOCK_ALL_VCPUS),
#endif
VMMDEV_IOCTL(VM_ALLOC_MEMSEG,
VMMDEV_IOCTL_XLOCK_MEMSEGS | VMMDEV_IOCTL_LOCK_ALL_VCPUS),
VMMDEV_IOCTL(VM_MMAP_MEMSEG,
VMMDEV_IOCTL_XLOCK_MEMSEGS | VMMDEV_IOCTL_LOCK_ALL_VCPUS),
VMMDEV_IOCTL(VM_MUNMAP_MEMSEG,
VMMDEV_IOCTL_XLOCK_MEMSEGS | VMMDEV_IOCTL_LOCK_ALL_VCPUS),
VMMDEV_IOCTL(VM_REINIT,
VMMDEV_IOCTL_XLOCK_MEMSEGS | VMMDEV_IOCTL_LOCK_ALL_VCPUS),
#if defined(__amd64__) && defined(COMPAT_FREEBSD12)
VMMDEV_IOCTL(VM_GET_MEMSEG_12, VMMDEV_IOCTL_SLOCK_MEMSEGS),
#endif
VMMDEV_IOCTL(VM_GET_MEMSEG, VMMDEV_IOCTL_SLOCK_MEMSEGS),
VMMDEV_IOCTL(VM_MMAP_GETNEXT, VMMDEV_IOCTL_SLOCK_MEMSEGS),
VMMDEV_IOCTL(VM_SUSPEND_CPU, VMMDEV_IOCTL_MAYBE_ALLOC_VCPU),
VMMDEV_IOCTL(VM_RESUME_CPU, VMMDEV_IOCTL_MAYBE_ALLOC_VCPU),
VMMDEV_IOCTL(VM_SUSPEND, 0),
VMMDEV_IOCTL(VM_GET_CPUS, 0),
VMMDEV_IOCTL(VM_GET_TOPOLOGY, 0),
VMMDEV_IOCTL(VM_SET_TOPOLOGY, 0),
};
static int
vmmdev_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag,
struct thread *td)
{
struct vmmdev_softc *sc;
struct vcpu *vcpu;
const struct vmmdev_ioctl *ioctl;
int error, vcpuid;
sc = vmmdev_lookup2(cdev);
if (sc == NULL)
return (ENXIO);
ioctl = NULL;
for (size_t i = 0; i < nitems(vmmdev_ioctls); i++) {
if (vmmdev_ioctls[i].cmd == cmd) {
ioctl = &vmmdev_ioctls[i];
break;
}
}
if (ioctl == NULL) {
for (size_t i = 0; i < vmmdev_machdep_ioctl_count; i++) {
if (vmmdev_machdep_ioctls[i].cmd == cmd) {
ioctl = &vmmdev_machdep_ioctls[i];
break;
}
}
}
if (ioctl == NULL)
return (ENOTTY);
if ((ioctl->flags & VMMDEV_IOCTL_XLOCK_MEMSEGS) != 0)
vm_xlock_memsegs(sc->vm);
else if ((ioctl->flags & VMMDEV_IOCTL_SLOCK_MEMSEGS) != 0)
vm_slock_memsegs(sc->vm);
vcpu = NULL;
vcpuid = -1;
if ((ioctl->flags & (VMMDEV_IOCTL_LOCK_ONE_VCPU |
VMMDEV_IOCTL_ALLOC_VCPU | VMMDEV_IOCTL_MAYBE_ALLOC_VCPU)) != 0) {
vcpuid = *(int *)data;
if (vcpuid == -1) {
if ((ioctl->flags &
VMMDEV_IOCTL_MAYBE_ALLOC_VCPU) == 0) {
error = EINVAL;
goto lockfail;
}
} else {
vcpu = vm_alloc_vcpu(sc->vm, vcpuid);
if (vcpu == NULL) {
error = EINVAL;
goto lockfail;
}
if ((ioctl->flags & VMMDEV_IOCTL_LOCK_ONE_VCPU) != 0) {
error = vcpu_lock_one(vcpu);
if (error)
goto lockfail;
}
}
}
if ((ioctl->flags & VMMDEV_IOCTL_LOCK_ALL_VCPUS) != 0) {
error = vcpu_lock_all(sc);
if (error)
goto lockfail;
}
switch (cmd) {
case VM_SUSPEND: {
struct vm_suspend *vmsuspend;
vmsuspend = (struct vm_suspend *)data;
error = vm_suspend(sc->vm, vmsuspend->how);
break;
}
case VM_REINIT:
error = vm_reinit(sc->vm);
break;
case VM_STAT_DESC: {
struct vm_stat_desc *statdesc;
statdesc = (struct vm_stat_desc *)data;
error = vmm_stat_desc_copy(statdesc->index, statdesc->desc,
sizeof(statdesc->desc));
break;
}
case VM_STATS: {
struct vm_stats *vmstats;
vmstats = (struct vm_stats *)data;
getmicrotime(&vmstats->tv);
error = vmm_stat_copy(vcpu, vmstats->index,
nitems(vmstats->statbuf), &vmstats->num_entries,
vmstats->statbuf);
break;
}
case VM_MMAP_GETNEXT: {
struct vm_memmap *mm;
mm = (struct vm_memmap *)data;
error = vm_mmap_getnext(sc->vm, &mm->gpa, &mm->segid,
&mm->segoff, &mm->len, &mm->prot, &mm->flags);
break;
}
case VM_MMAP_MEMSEG: {
struct vm_memmap *mm;
mm = (struct vm_memmap *)data;
error = vm_mmap_memseg(sc->vm, mm->gpa, mm->segid, mm->segoff,
mm->len, mm->prot, mm->flags);
break;
}
case VM_MUNMAP_MEMSEG: {
struct vm_munmap *mu;
mu = (struct vm_munmap *)data;
error = vm_munmap_memseg(sc->vm, mu->gpa, mu->len);
break;
}
#if defined(__amd64__) && defined(COMPAT_FREEBSD12)
case VM_ALLOC_MEMSEG_12:
error = alloc_memseg(sc, (struct vm_memseg *)data,
sizeof(((struct vm_memseg_12 *)0)->name));
break;
case VM_GET_MEMSEG_12:
error = get_memseg(sc, (struct vm_memseg *)data,
sizeof(((struct vm_memseg_12 *)0)->name));
break;
#endif
case VM_ALLOC_MEMSEG:
error = alloc_memseg(sc, (struct vm_memseg *)data,
sizeof(((struct vm_memseg *)0)->name));
break;
case VM_GET_MEMSEG:
error = get_memseg(sc, (struct vm_memseg *)data,
sizeof(((struct vm_memseg *)0)->name));
break;
case VM_GET_REGISTER: {
struct vm_register *vmreg;
vmreg = (struct vm_register *)data;
error = vm_get_register(vcpu, vmreg->regnum, &vmreg->regval);
break;
}
case VM_SET_REGISTER: {
struct vm_register *vmreg;
vmreg = (struct vm_register *)data;
error = vm_set_register(vcpu, vmreg->regnum, vmreg->regval);
break;
}
case VM_GET_REGISTER_SET: {
struct vm_register_set *vmregset;
uint64_t *regvals;
int *regnums;
vmregset = (struct vm_register_set *)data;
if (vmregset->count > VM_REG_LAST) {
error = EINVAL;
break;
}
regvals = malloc(sizeof(regvals[0]) * vmregset->count, M_VMMDEV,
M_WAITOK);
regnums = malloc(sizeof(regnums[0]) * vmregset->count, M_VMMDEV,
M_WAITOK);
error = copyin(vmregset->regnums, regnums, sizeof(regnums[0]) *
vmregset->count);
if (error == 0)
error = vm_get_register_set(vcpu,
vmregset->count, regnums, regvals);
if (error == 0)
error = copyout(regvals, vmregset->regvals,
sizeof(regvals[0]) * vmregset->count);
free(regvals, M_VMMDEV);
free(regnums, M_VMMDEV);
break;
}
case VM_SET_REGISTER_SET: {
struct vm_register_set *vmregset;
uint64_t *regvals;
int *regnums;
vmregset = (struct vm_register_set *)data;
if (vmregset->count > VM_REG_LAST) {
error = EINVAL;
break;
}
regvals = malloc(sizeof(regvals[0]) * vmregset->count, M_VMMDEV,
M_WAITOK);
regnums = malloc(sizeof(regnums[0]) * vmregset->count, M_VMMDEV,
M_WAITOK);
error = copyin(vmregset->regnums, regnums, sizeof(regnums[0]) *
vmregset->count);
if (error == 0)
error = copyin(vmregset->regvals, regvals,
sizeof(regvals[0]) * vmregset->count);
if (error == 0)
error = vm_set_register_set(vcpu,
vmregset->count, regnums, regvals);
free(regvals, M_VMMDEV);
free(regnums, M_VMMDEV);
break;
}
case VM_GET_CAPABILITY: {
struct vm_capability *vmcap;
vmcap = (struct vm_capability *)data;
error = vm_get_capability(vcpu, vmcap->captype, &vmcap->capval);
break;
}
case VM_SET_CAPABILITY: {
struct vm_capability *vmcap;
vmcap = (struct vm_capability *)data;
error = vm_set_capability(vcpu, vmcap->captype, vmcap->capval);
break;
}
case VM_ACTIVATE_CPU:
error = vm_activate_cpu(vcpu);
break;
case VM_GET_CPUS: {
struct vm_cpuset *vm_cpuset;
cpuset_t *cpuset;
int size;
error = 0;
vm_cpuset = (struct vm_cpuset *)data;
size = vm_cpuset->cpusetsize;
if (size < 1 || size > CPU_MAXSIZE / NBBY) {
error = ERANGE;
break;
}
cpuset = malloc(max(size, sizeof(cpuset_t)), M_TEMP,
M_WAITOK | M_ZERO);
if (vm_cpuset->which == VM_ACTIVE_CPUS)
*cpuset = vm_active_cpus(sc->vm);
else if (vm_cpuset->which == VM_SUSPENDED_CPUS)
*cpuset = vm_suspended_cpus(sc->vm);
else if (vm_cpuset->which == VM_DEBUG_CPUS)
*cpuset = vm_debug_cpus(sc->vm);
else
error = EINVAL;
if (error == 0 && size < howmany(CPU_FLS(cpuset), NBBY))
error = ERANGE;
if (error == 0)
error = copyout(cpuset, vm_cpuset->cpus, size);
free(cpuset, M_TEMP);
break;
}
case VM_SUSPEND_CPU:
error = vm_suspend_cpu(sc->vm, vcpu);
break;
case VM_RESUME_CPU:
error = vm_resume_cpu(sc->vm, vcpu);
break;
case VM_SET_TOPOLOGY: {
struct vm_cpu_topology *topology;
topology = (struct vm_cpu_topology *)data;
error = vm_set_topology(sc->vm, topology->sockets,
topology->cores, topology->threads, topology->maxcpus);
break;
}
case VM_GET_TOPOLOGY: {
struct vm_cpu_topology *topology;
topology = (struct vm_cpu_topology *)data;
vm_get_topology(sc->vm, &topology->sockets, &topology->cores,
&topology->threads, &topology->maxcpus);
error = 0;
break;
}
default:
error = vmmdev_machdep_ioctl(sc->vm, vcpu, cmd, data, fflag,
td);
break;
}
if ((ioctl->flags &
(VMMDEV_IOCTL_XLOCK_MEMSEGS | VMMDEV_IOCTL_SLOCK_MEMSEGS)) != 0)
vm_unlock_memsegs(sc->vm);
if ((ioctl->flags & VMMDEV_IOCTL_LOCK_ALL_VCPUS) != 0)
vcpu_unlock_all(sc);
else if ((ioctl->flags & VMMDEV_IOCTL_LOCK_ONE_VCPU) != 0)
vcpu_unlock_one(vcpu);
/*
* Make sure that no handler returns a kernel-internal
* error value to userspace.
*/
KASSERT(error == ERESTART || error >= 0,
("vmmdev_ioctl: invalid error return %d", error));
return (error);
lockfail:
if ((ioctl->flags &
(VMMDEV_IOCTL_XLOCK_MEMSEGS | VMMDEV_IOCTL_SLOCK_MEMSEGS)) != 0)
vm_unlock_memsegs(sc->vm);
return (error);
}
static int
vmmdev_mmap_single(struct cdev *cdev, vm_ooffset_t *offset, vm_size_t mapsize,
struct vm_object **objp, int nprot)
{
struct vmmdev_softc *sc;
vm_paddr_t gpa;
size_t len;
vm_ooffset_t segoff, first, last;
int error, found, segid;
bool sysmem;
first = *offset;
last = first + mapsize;
if ((nprot & PROT_EXEC) || first < 0 || first >= last)
return (EINVAL);
sc = vmmdev_lookup2(cdev);
if (sc == NULL) {
/* virtual machine is in the process of being created */
return (EINVAL);
}
/*
* Get a read lock on the guest memory map.
*/
vm_slock_memsegs(sc->vm);
gpa = 0;
found = 0;
while (!found) {
error = vm_mmap_getnext(sc->vm, &gpa, &segid, &segoff, &len,
NULL, NULL);
if (error)
break;
if (first >= gpa && last <= gpa + len)
found = 1;
else
gpa += len;
}
if (found) {
error = vm_get_memseg(sc->vm, segid, &len, &sysmem, objp);
KASSERT(error == 0 && *objp != NULL,
("%s: invalid memory segment %d", __func__, segid));
if (sysmem) {
vm_object_reference(*objp);
*offset = segoff + (first - gpa);
} else {
error = EINVAL;
}
}
vm_unlock_memsegs(sc->vm);
return (error);
}
static void
vmmdev_destroy(struct vmmdev_softc *sc)
{
struct devmem_softc *dsc;
int error __diagused;
KASSERT(sc->cdev == NULL, ("%s: cdev not free", __func__));
/*
* Destroy all cdevs:
*
* - any new operations on the 'cdev' will return an error (ENXIO).
*
* - the 'devmem' cdevs are destroyed before the virtual machine 'cdev'
*/
SLIST_FOREACH(dsc, &sc->devmem, link) {
KASSERT(dsc->cdev != NULL, ("devmem cdev already destroyed"));
devmem_destroy(dsc);
}
vm_disable_vcpu_creation(sc->vm);
error = vcpu_lock_all(sc);
KASSERT(error == 0, ("%s: error %d freezing vcpus", __func__, error));
vm_unlock_vcpus(sc->vm);
while ((dsc = SLIST_FIRST(&sc->devmem)) != NULL) {
KASSERT(dsc->cdev == NULL, ("%s: devmem not free", __func__));
SLIST_REMOVE_HEAD(&sc->devmem, link);
free(dsc->name, M_VMMDEV);
free(dsc, M_VMMDEV);
}
if (sc->vm != NULL)
vm_destroy(sc->vm);
if (sc->ucred != NULL)
crfree(sc->ucred);
sx_xlock(&vmmdev_mtx);
SLIST_REMOVE(&head, sc, vmmdev_softc, link);
sx_xunlock(&vmmdev_mtx);
free(sc, M_VMMDEV);
}
static int
vmmdev_lookup_and_destroy(const char *name, struct ucred *cred)
{
struct cdev *cdev;
struct vmmdev_softc *sc;
sx_xlock(&vmmdev_mtx);
sc = vmmdev_lookup(name, cred);
if (sc == NULL || sc->cdev == NULL) {
sx_xunlock(&vmmdev_mtx);
return (EINVAL);
}
/*
* Setting 'sc->cdev' to NULL is used to indicate that the VM
* is scheduled for destruction.
*/
cdev = sc->cdev;
sc->cdev = NULL;
sx_xunlock(&vmmdev_mtx);
destroy_dev(cdev);
vmmdev_destroy(sc);
return (0);
}
static int
sysctl_vmm_destroy(SYSCTL_HANDLER_ARGS)
{
char *buf;
int error, buflen;
error = vmm_priv_check(req->td->td_ucred);
if (error)
return (error);
buflen = VM_MAX_NAMELEN + 1;
buf = malloc(buflen, M_VMMDEV, M_WAITOK | M_ZERO);
strlcpy(buf, "beavis", buflen);
error = sysctl_handle_string(oidp, buf, buflen, req);
if (error == 0 && req->newptr != NULL)
error = vmmdev_lookup_and_destroy(buf, req->td->td_ucred);
free(buf, M_VMMDEV);
return (error);
}
SYSCTL_PROC(_hw_vmm, OID_AUTO, destroy,
CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_PRISON | CTLFLAG_MPSAFE,
NULL, 0, sysctl_vmm_destroy, "A",
NULL);
static struct cdevsw vmmdevsw = {
.d_name = "vmmdev",
.d_version = D_VERSION,
.d_open = vmmdev_open,
.d_ioctl = vmmdev_ioctl,
.d_mmap_single = vmmdev_mmap_single,
.d_read = vmmdev_rw,
.d_write = vmmdev_rw,
};
static struct vmmdev_softc *
vmmdev_alloc(struct vm *vm, struct ucred *cred)
{
struct vmmdev_softc *sc;
sc = malloc(sizeof(*sc), M_VMMDEV, M_WAITOK | M_ZERO);
SLIST_INIT(&sc->devmem);
sc->vm = vm;
sc->ucred = crhold(cred);
return (sc);
}
static int
vmmdev_create(const char *name, struct ucred *cred)
{
struct make_dev_args mda;
struct cdev *cdev;
struct vmmdev_softc *sc;
struct vm *vm;
int error;
sx_xlock(&vmmdev_mtx);
sc = vmmdev_lookup(name, cred);
if (sc != NULL) {
sx_xunlock(&vmmdev_mtx);
return (EEXIST);
}
error = vm_create(name, &vm);
if (error != 0) {
sx_xunlock(&vmmdev_mtx);
return (error);
}
sc = vmmdev_alloc(vm, cred);
SLIST_INSERT_HEAD(&head, sc, link);
make_dev_args_init(&mda);
mda.mda_devsw = &vmmdevsw;
mda.mda_cr = sc->ucred;
mda.mda_uid = UID_ROOT;
mda.mda_gid = GID_WHEEL;
mda.mda_mode = 0600;
mda.mda_si_drv1 = sc;
mda.mda_flags = MAKEDEV_CHECKNAME | MAKEDEV_WAITOK;
error = make_dev_s(&mda, &cdev, "vmm/%s", name);
if (error != 0) {
sx_xunlock(&vmmdev_mtx);
vmmdev_destroy(sc);
return (error);
}
sc->cdev = cdev;
sx_xunlock(&vmmdev_mtx);
return (0);
}
static int
sysctl_vmm_create(SYSCTL_HANDLER_ARGS)
{
char *buf;
int error, buflen;
error = vmm_priv_check(req->td->td_ucred);
if (error != 0)
return (error);
buflen = VM_MAX_NAMELEN + 1;
buf = malloc(buflen, M_VMMDEV, M_WAITOK | M_ZERO);
strlcpy(buf, "beavis", buflen);
error = sysctl_handle_string(oidp, buf, buflen, req);
if (error == 0 && req->newptr != NULL)
error = vmmdev_create(buf, req->td->td_ucred);
free(buf, M_VMMDEV);
return (error);
}
SYSCTL_PROC(_hw_vmm, OID_AUTO, create,
CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_PRISON | CTLFLAG_MPSAFE,
NULL, 0, sysctl_vmm_create, "A",
NULL);
static int
vmmctl_open(struct cdev *cdev, int flags, int fmt, struct thread *td)
{
int error;
error = vmm_priv_check(td->td_ucred);
if (error != 0)
return (error);
if ((flags & FWRITE) == 0)
return (EPERM);
return (0);
}
static int
vmmctl_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag,
struct thread *td)
{
int error;
switch (cmd) {
case VMMCTL_VM_CREATE: {
struct vmmctl_vm_create *vmc;
vmc = (struct vmmctl_vm_create *)data;
vmc->name[VM_MAX_NAMELEN] = '\0';
for (size_t i = 0; i < nitems(vmc->reserved); i++) {
if (vmc->reserved[i] != 0) {
error = EINVAL;
return (error);
}
}
error = vmmdev_create(vmc->name, td->td_ucred);
break;
}
case VMMCTL_VM_DESTROY: {
struct vmmctl_vm_destroy *vmd;
vmd = (struct vmmctl_vm_destroy *)data;
vmd->name[VM_MAX_NAMELEN] = '\0';
for (size_t i = 0; i < nitems(vmd->reserved); i++) {
if (vmd->reserved[i] != 0) {
error = EINVAL;
return (error);
}
}
error = vmmdev_lookup_and_destroy(vmd->name, td->td_ucred);
break;
}
default:
error = ENOTTY;
break;
}
return (error);
}
static struct cdev *vmmctl_cdev;
static struct cdevsw vmmctlsw = {
.d_name = "vmmctl",
.d_version = D_VERSION,
.d_open = vmmctl_open,
.d_ioctl = vmmctl_ioctl,
};
int
vmmdev_init(void)
{
int error;
sx_xlock(&vmmdev_mtx);
error = make_dev_p(MAKEDEV_CHECKNAME, &vmmctl_cdev, &vmmctlsw, NULL,
UID_ROOT, GID_WHEEL, 0600, "vmmctl");
if (error == 0)
pr_allow_flag = prison_add_allow(NULL, "vmm", NULL,
"Allow use of vmm in a jail.");
sx_xunlock(&vmmdev_mtx);
return (error);
}
int
vmmdev_cleanup(void)
{
sx_xlock(&vmmdev_mtx);
if (!SLIST_EMPTY(&head)) {
sx_xunlock(&vmmdev_mtx);
return (EBUSY);
}
if (vmmctl_cdev != NULL) {
destroy_dev(vmmctl_cdev);
vmmctl_cdev = NULL;
}
sx_xunlock(&vmmdev_mtx);
return (0);
}
static int
devmem_mmap_single(struct cdev *cdev, vm_ooffset_t *offset, vm_size_t len,
struct vm_object **objp, int nprot)
{
struct devmem_softc *dsc;
vm_ooffset_t first, last;
size_t seglen;
int error;
bool sysmem;
dsc = cdev->si_drv1;
if (dsc == NULL) {
/* 'cdev' has been created but is not ready for use */
return (ENXIO);
}
first = *offset;
last = *offset + len;
if ((nprot & PROT_EXEC) || first < 0 || first >= last)
return (EINVAL);
vm_slock_memsegs(dsc->sc->vm);
error = vm_get_memseg(dsc->sc->vm, dsc->segid, &seglen, &sysmem, objp);
KASSERT(error == 0 && !sysmem && *objp != NULL,
("%s: invalid devmem segment %d", __func__, dsc->segid));
if (seglen >= last)
vm_object_reference(*objp);
else
error = EINVAL;
vm_unlock_memsegs(dsc->sc->vm);
return (error);
}
static struct cdevsw devmemsw = {
.d_name = "devmem",
.d_version = D_VERSION,
.d_mmap_single = devmem_mmap_single,
};
static int
devmem_create_cdev(struct vmmdev_softc *sc, int segid, char *devname)
{
struct make_dev_args mda;
struct devmem_softc *dsc;
int error;
sx_xlock(&vmmdev_mtx);
dsc = malloc(sizeof(struct devmem_softc), M_VMMDEV, M_WAITOK | M_ZERO);
dsc->segid = segid;
dsc->name = devname;
dsc->sc = sc;
SLIST_INSERT_HEAD(&sc->devmem, dsc, link);
make_dev_args_init(&mda);
mda.mda_devsw = &devmemsw;
mda.mda_cr = sc->ucred;
mda.mda_uid = UID_ROOT;
mda.mda_gid = GID_WHEEL;
mda.mda_mode = 0600;
mda.mda_si_drv1 = dsc;
mda.mda_flags = MAKEDEV_CHECKNAME | MAKEDEV_WAITOK;
error = make_dev_s(&mda, &dsc->cdev, "vmm.io/%s.%s", vm_name(sc->vm),
devname);
if (error != 0) {
SLIST_REMOVE(&sc->devmem, dsc, devmem_softc, link);
free(dsc->name, M_VMMDEV);
free(dsc, M_VMMDEV);
}
sx_xunlock(&vmmdev_mtx);
return (error);
}
static void
devmem_destroy(void *arg)
{
struct devmem_softc *dsc = arg;
destroy_dev(dsc->cdev);
dsc->cdev = NULL;
dsc->sc = NULL;
}