namei: Add the abilty for the ABI to specify an alternate root path

For now a non-native ABI (i.e., Linux) uses the kern_alternate_path()
facility to dynamically reroot lookups. First, an attempt is made to
lookup the file in /compat/linux/original-path. If that fails, the
lookup is done in /original-path. Thats requires a bit of code in
every ABI syscall implementation where path name translation is needed.
Also our kern_alternate_path() does not properly lookups absolute symlinks
in second attempt, i.e., does not append /compat/linux part to the resolved
link.
The change is intended to avoid this by specifiyng the ABI root directory
for namei(), using one call to pwd_altroot() during exec-time into the ABI.
In that case namei() will dynamically reroot lookups as mentioned above.

PR:			72920
Reviewed by:		kib
Differential revision:	https://reviews.freebsd.org/D38933
MFC after:		2 month
This commit is contained in:
Dmitry Chagin 2023-05-29 11:15:28 +03:00
parent 723e25f128
commit 3d2fec7db8
5 changed files with 101 additions and 9 deletions

View file

@ -3839,6 +3839,11 @@ pwd_fill(struct pwd *oldpwd, struct pwd *newpwd)
vrefact(oldpwd->pwd_jdir);
newpwd->pwd_jdir = oldpwd->pwd_jdir;
}
if (newpwd->pwd_adir == NULL && oldpwd->pwd_adir != NULL) {
vrefact(oldpwd->pwd_adir);
newpwd->pwd_adir = oldpwd->pwd_adir;
}
}
struct pwd *
@ -3930,6 +3935,8 @@ pwd_drop(struct pwd *pwd)
vrele(pwd->pwd_rdir);
if (pwd->pwd_jdir != NULL)
vrele(pwd->pwd_jdir);
if (pwd->pwd_adir != NULL)
vrele(pwd->pwd_adir);
uma_zfree_smr(pwd_zone, pwd);
}
@ -3967,6 +3974,8 @@ pwd_chroot(struct thread *td, struct vnode *vp)
vrefact(vp);
newpwd->pwd_rdir = vp;
vrefact(vp);
newpwd->pwd_adir = vp;
if (oldpwd->pwd_jdir == NULL) {
vrefact(vp);
newpwd->pwd_jdir = vp;
@ -3997,6 +4006,40 @@ pwd_chdir(struct thread *td, struct vnode *vp)
pwd_drop(oldpwd);
}
/*
* Process is transitioning to/from a non-native ABI.
*/
void
pwd_altroot(struct thread *td, struct vnode *altroot_vp)
{
struct pwddesc *pdp;
struct pwd *newpwd, *oldpwd;
newpwd = pwd_alloc();
pdp = td->td_proc->p_pd;
PWDDESC_XLOCK(pdp);
oldpwd = PWDDESC_XLOCKED_LOAD_PWD(pdp);
if (altroot_vp != NULL) {
/*
* Native process to a non-native ABI.
*/
vrefact(altroot_vp);
newpwd->pwd_adir = altroot_vp;
} else {
/*
* Non-native process to the native ABI.
*/
vrefact(oldpwd->pwd_rdir);
newpwd->pwd_adir = oldpwd->pwd_rdir;
}
pwd_fill(oldpwd, newpwd);
pwd_set(pdp, newpwd);
PWDDESC_XUNLOCK(pdp);
pwd_drop(oldpwd);
}
/*
* jail_attach(2) changes both root and working directories.
*/
@ -4030,6 +4073,8 @@ pwd_chroot_chdir(struct thread *td, struct vnode *vp)
vrefact(vp);
newpwd->pwd_jdir = vp;
}
vrefact(vp);
newpwd->pwd_adir = vp;
pwd_fill(oldpwd, newpwd);
pwd_set(pdp, newpwd);
PWDDESC_XUNLOCK(pdp);
@ -4046,7 +4091,8 @@ pwd_ensure_dirs(void)
pdp = curproc->p_pd;
PWDDESC_XLOCK(pdp);
oldpwd = PWDDESC_XLOCKED_LOAD_PWD(pdp);
if (oldpwd->pwd_cdir != NULL && oldpwd->pwd_rdir != NULL) {
if (oldpwd->pwd_cdir != NULL && oldpwd->pwd_rdir != NULL &&
oldpwd->pwd_adir != NULL) {
PWDDESC_XUNLOCK(pdp);
return;
}
@ -4064,6 +4110,10 @@ pwd_ensure_dirs(void)
vrefact(rootvnode);
newpwd->pwd_rdir = rootvnode;
}
if (newpwd->pwd_adir == NULL) {
vrefact(rootvnode);
newpwd->pwd_adir = rootvnode;
}
pwd_set(pdp, newpwd);
PWDDESC_XUNLOCK(pdp);
pwd_drop(oldpwd);
@ -4084,6 +4134,8 @@ pwd_set_rootvnode(void)
newpwd->pwd_cdir = rootvnode;
vrefact(rootvnode);
newpwd->pwd_rdir = rootvnode;
vrefact(rootvnode);
newpwd->pwd_adir = rootvnode;
pwd_fill(oldpwd, newpwd);
pwd_set(pdp, newpwd);
PWDDESC_XUNLOCK(pdp);
@ -4119,7 +4171,8 @@ mountcheckdirs(struct vnode *olddp, struct vnode *newdp)
if (oldpwd == NULL ||
(oldpwd->pwd_cdir != olddp &&
oldpwd->pwd_rdir != olddp &&
oldpwd->pwd_jdir != olddp)) {
oldpwd->pwd_jdir != olddp &&
oldpwd->pwd_adir != olddp)) {
PWDDESC_XUNLOCK(pdp);
pddrop(pdp);
continue;
@ -4136,6 +4189,10 @@ mountcheckdirs(struct vnode *olddp, struct vnode *newdp)
vrefact(newdp);
newpwd->pwd_jdir = newdp;
}
if (oldpwd->pwd_adir == olddp) {
vrefact(newdp);
newpwd->pwd_adir = newdp;
}
pwd_fill(oldpwd, newpwd);
pwd_set(pdp, newpwd);
PWDDESC_XUNLOCK(pdp);

View file

@ -4349,7 +4349,7 @@ cache_fpl_terminated(struct cache_fpl *fpl)
(NC_NOMAKEENTRY | NC_KEEPPOSENTRY | LOCKLEAF | LOCKPARENT | WANTPARENT | \
FAILIFEXISTS | FOLLOW | EMPTYPATH | LOCKSHARED | WILLBEDIR | \
ISOPEN | NOMACCHECK | AUDITVNODE1 | AUDITVNODE2 | NOCAPCHECK | OPENREAD | \
OPENWRITE | WANTIOCTLCAPS)
OPENWRITE | WANTIOCTLCAPS | ISRESTARTED)
#define CACHE_FPL_INTERNAL_CN_FLAGS \
(ISDOTDOT | MAKEENTRY | ISLASTCN)
@ -6238,7 +6238,7 @@ cache_fplookup(struct nameidata *ndp, enum cache_fpl_status *status,
fpl.pwd = pwdp;
pwd = pwd_get_smr();
*(fpl.pwd) = pwd;
ndp->ni_rootdir = pwd->pwd_rdir;
namei_setup_rootdir(ndp, cnp, pwd);
ndp->ni_topdir = pwd->pwd_jdir;
if (cnp->cn_pnbuf[0] == '/') {

View file

@ -81,6 +81,13 @@ static void NDVALIDATE_impl(struct nameidata *, int);
#define NDVALIDATE(ndp)
#endif
#define NDRESTART(ndp) do { \
NDREINIT_DBG(ndp); \
ndp->ni_resflags = 0; \
ndp->ni_cnd.cn_flags &= ~NAMEI_INTERNAL_FLAGS; \
ndp->ni_cnd.cn_flags |= ISRESTARTED; \
} while (0)
SDT_PROVIDER_DEFINE(vfs);
SDT_PROBE_DEFINE4(vfs, namei, lookup, entry, "struct vnode *", "char *",
"unsigned long", "bool");
@ -334,7 +341,7 @@ namei_setup(struct nameidata *ndp, struct vnode **dpp, struct pwd **pwdp)
* The reference on ni_rootdir is acquired in the block below to avoid
* back-to-back atomics for absolute lookups.
*/
ndp->ni_rootdir = pwd->pwd_rdir;
namei_setup_rootdir(ndp, cnp, pwd);
ndp->ni_topdir = pwd->pwd_jdir;
if (cnp->cn_pnbuf[0] == '/') {
@ -594,6 +601,7 @@ namei(struct nameidata *ndp)
MPASS(ndp->ni_startdir == NULL || ndp->ni_startdir->v_type == VDIR ||
ndp->ni_startdir->v_type == VBAD);
restart:
ndp->ni_lcf = 0;
ndp->ni_loopcnt = 0;
ndp->ni_vp = NULL;
@ -628,6 +636,12 @@ namei(struct nameidata *ndp)
case CACHE_FPL_STATUS_HANDLED:
if (error == 0)
NDVALIDATE(ndp);
else if (__predict_false(pwd->pwd_adir != pwd->pwd_rdir &&
(cnp->cn_flags & ISRESTARTED) == 0)) {
namei_cleanup_cnp(cnp);
NDRESTART(ndp);
goto restart;
}
return (error);
case CACHE_FPL_STATUS_PARTIAL:
TAILQ_INIT(&ndp->ni_cap_tracker);
@ -668,8 +682,18 @@ namei(struct nameidata *ndp)
for (;;) {
ndp->ni_startdir = dp;
error = vfs_lookup(ndp);
if (error != 0)
goto out;
if (error != 0) {
if (__predict_false(pwd->pwd_adir != pwd->pwd_rdir &&
error == ENOENT &&
(cnp->cn_flags & ISRESTARTED) == 0)) {
nameicap_cleanup(ndp);
pwd_drop(pwd);
namei_cleanup_cnp(cnp);
NDRESTART(ndp);
goto restart;
} else
goto out;
}
/*
* If not a symbolic link, we're done.

View file

@ -89,6 +89,8 @@ struct fdescenttbl {
/*
* This struct is copy-on-write and allocated from an SMR zone.
* All fields are constant after initialization apart from the reference count.
* The ABI root directory is initialized as the root directory and changed
* during process transiting to or from non-native ABI.
*
* Check pwd_* routines for usage.
*/
@ -97,6 +99,7 @@ struct pwd {
struct vnode *pwd_cdir; /* current directory */
struct vnode *pwd_rdir; /* root directory */
struct vnode *pwd_jdir; /* jail root directory */
struct vnode *pwd_adir; /* abi root directory */
};
typedef SMR_POINTER(struct pwd *) smrpwd_t;
@ -342,6 +345,7 @@ struct pwddesc *pdinit(struct pwddesc *pdp, bool keeplock);
struct pwddesc *pdshare(struct pwddesc *pdp);
void pdunshare(struct thread *td);
void pwd_altroot(struct thread *td, struct vnode *altroot_vp);
void pwd_chdir(struct thread *td, struct vnode *vp);
int pwd_chroot(struct thread *td, struct vnode *vp);
int pwd_chroot_chdir(struct thread *td, struct vnode *vp);

View file

@ -159,7 +159,7 @@ int cache_fplookup(struct nameidata *ndp, enum cache_fpl_status *status,
* Namei parameter descriptors.
*/
#define RDONLY 0x00000200 /* lookup with read-only semantics */
/* UNUSED 0x00000400 */
#define ISRESTARTED 0x00000400 /* restarted namei */
/* UNUSED 0x00000800 */
#define ISWHITEOUT 0x00001000 /* found whiteout */
#define DOWHITEOUT 0x00002000 /* do whiteouts */
@ -187,7 +187,7 @@ int cache_fplookup(struct nameidata *ndp, enum cache_fpl_status *status,
*/
#define NAMEI_INTERNAL_FLAGS \
(NOEXECCHECK | MAKEENTRY | ISSYMLINK | ISLASTCN | ISDOTDOT | \
TRAILINGSLASH)
TRAILINGSLASH | ISRESTARTED)
/*
* Namei results flags
@ -293,6 +293,13 @@ int namei(struct nameidata *ndp);
int vfs_lookup(struct nameidata *ndp);
int vfs_relookup(struct vnode *dvp, struct vnode **vpp,
struct componentname *cnp, bool refstart);
#define namei_setup_rootdir(ndp, cnp, pwd) do { \
if (__predict_true((cnp->cn_flags & ISRESTARTED) == 0)) \
ndp->ni_rootdir = pwd->pwd_adir; \
else \
ndp->ni_rootdir = pwd->pwd_rdir; \
} while (0)
#endif
/*