diff --git a/sys/dev/cxgbe/tom/t4_cpl_io.c b/sys/dev/cxgbe/tom/t4_cpl_io.c index 130c2468b20..59d1c367f94 100644 --- a/sys/dev/cxgbe/tom/t4_cpl_io.c +++ b/sys/dev/cxgbe/tom/t4_cpl_io.c @@ -803,7 +803,7 @@ t4_push_frames(struct adapter *sc, struct toepcb *toep, int drop) int newsize = min(sb->sb_hiwat + V_tcp_autosndbuf_inc, V_tcp_autosndbuf_max); - if (!sbreserve_locked(sb, newsize, so, NULL)) + if (!sbreserve_locked(so, SO_SND, newsize, NULL)) sb->sb_flags &= ~SB_AUTOSIZE; else sowwakeup = 1; /* room available */ @@ -1770,7 +1770,7 @@ do_rx_data(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m) unsigned int newsize = min(hiwat + sc->tt.autorcvbuf_inc, V_tcp_autorcvbuf_max); - if (!sbreserve_locked(sb, newsize, so, NULL)) + if (!sbreserve_locked(so, SO_RCV, newsize, NULL)) sb->sb_flags &= ~SB_AUTOSIZE; } diff --git a/sys/dev/cxgbe/tom/t4_ddp.c b/sys/dev/cxgbe/tom/t4_ddp.c index 05bb903a28a..11fea91b060 100644 --- a/sys/dev/cxgbe/tom/t4_ddp.c +++ b/sys/dev/cxgbe/tom/t4_ddp.c @@ -555,7 +555,7 @@ handle_ddp_data(struct toepcb *toep, __be32 ddp_report, __be32 rcv_nxt, int len) unsigned int newsize = min(hiwat + sc->tt.autorcvbuf_inc, V_tcp_autorcvbuf_max); - if (!sbreserve_locked(sb, newsize, so, NULL)) + if (!sbreserve_locked(so, SO_RCV, newsize, NULL)) sb->sb_flags &= ~SB_AUTOSIZE; } SOCKBUF_UNLOCK(sb); diff --git a/sys/dev/hyperv/hvsock/hv_sock.c b/sys/dev/hyperv/hvsock/hv_sock.c index f0238c23318..8c327a22e6f 100644 --- a/sys/dev/hyperv/hvsock/hv_sock.c +++ b/sys/dev/hyperv/hvsock/hv_sock.c @@ -763,7 +763,7 @@ hvs_trans_soreceive(struct socket *so, struct sockaddr **paddr, * Wait and block until (more) data comes in. * Note: Drops the sockbuf lock during wait. */ - error = sbwait(sb); + error = sbwait(so, SO_RCV); if (error) break; @@ -859,7 +859,7 @@ hvs_trans_sosend(struct socket *so, struct sockaddr *addr, struct uio *uio, * Sleep wait until space avaiable to send * Note: Drops the sockbuf lock during wait. */ - error = sbwait(sb); + error = sbwait(so, SO_SND); if (error) break; diff --git a/sys/kern/kern_sendfile.c b/sys/kern/kern_sendfile.c index 30383490ca4..2de015254ab 100644 --- a/sys/kern/kern_sendfile.c +++ b/sys/kern/kern_sendfile.c @@ -814,7 +814,7 @@ retry_space: * state may have changed and we retest * for it. */ - error = sbwait(&so->so_snd); + error = sbwait(so, SO_SND); /* * An error from sbwait usually indicates that we've * been interrupted by a signal. If we've sent anything diff --git a/sys/kern/sys_socket.c b/sys/kern/sys_socket.c index 774b317c6ec..b1b47d3d3c2 100644 --- a/sys/kern/sys_socket.c +++ b/sys/kern/sys_socket.c @@ -179,12 +179,12 @@ soo_ioctl(struct file *fp, u_long cmd, void *data, struct ucred *active_cred, so->sol_sbrcv_flags |= SB_ASYNC; so->sol_sbsnd_flags |= SB_ASYNC; } else { - SOCKBUF_LOCK(&so->so_rcv); + SOCK_RECVBUF_LOCK(so); so->so_rcv.sb_flags |= SB_ASYNC; - SOCKBUF_UNLOCK(&so->so_rcv); - SOCKBUF_LOCK(&so->so_snd); + SOCK_RECVBUF_UNLOCK(so); + SOCK_SENDBUF_LOCK(so); so->so_snd.sb_flags |= SB_ASYNC; - SOCKBUF_UNLOCK(&so->so_snd); + SOCK_SENDBUF_UNLOCK(so); } SOCK_UNLOCK(so); } else { @@ -194,12 +194,12 @@ soo_ioctl(struct file *fp, u_long cmd, void *data, struct ucred *active_cred, so->sol_sbrcv_flags &= ~SB_ASYNC; so->sol_sbsnd_flags &= ~SB_ASYNC; } else { - SOCKBUF_LOCK(&so->so_rcv); + SOCK_RECVBUF_LOCK(so); so->so_rcv.sb_flags &= ~SB_ASYNC; - SOCKBUF_UNLOCK(&so->so_rcv); - SOCKBUF_LOCK(&so->so_snd); + SOCK_RECVBUF_UNLOCK(so); + SOCK_SENDBUF_LOCK(so); so->so_snd.sb_flags &= ~SB_ASYNC; - SOCKBUF_UNLOCK(&so->so_snd); + SOCK_SENDBUF_UNLOCK(so); } SOCK_UNLOCK(so); } @@ -751,10 +751,12 @@ soaio_snd(void *context, int pending) } void -sowakeup_aio(struct socket *so, struct sockbuf *sb) +sowakeup_aio(struct socket *so, sb_which which) { + struct sockbuf *sb = sobuf(so, which); + + SOCK_BUF_LOCK_ASSERT(so, which); - SOCKBUF_LOCK_ASSERT(sb); sb->sb_flags &= ~SB_AIO; if (sb->sb_flags & SB_AIO_RUNNING) return; @@ -799,6 +801,7 @@ soo_aio_queue(struct file *fp, struct kaiocb *job) { struct socket *so; struct sockbuf *sb; + sb_which which; int error; so = fp->f_data; @@ -809,12 +812,14 @@ soo_aio_queue(struct file *fp, struct kaiocb *job) /* Lock through the socket, since this may be a listening socket. */ switch (job->uaiocb.aio_lio_opcode & (LIO_WRITE | LIO_READ)) { case LIO_READ: - sb = &so->so_rcv; SOCK_RECVBUF_LOCK(so); + sb = &so->so_rcv; + which = SO_RCV; break; case LIO_WRITE: - sb = &so->so_snd; SOCK_SENDBUF_LOCK(so); + sb = &so->so_snd; + which = SO_SND; break; default: return (EINVAL); @@ -833,7 +838,7 @@ soo_aio_queue(struct file *fp, struct kaiocb *job) TAILQ_INSERT_TAIL(&sb->sb_aiojobq, job, list); if (!(sb->sb_flags & SB_AIO_RUNNING)) { if (soaio_ready(so, sb)) - sowakeup_aio(so, sb); + sowakeup_aio(so, which); else sb->sb_flags |= SB_AIO; } diff --git a/sys/kern/uipc_sockbuf.c b/sys/kern/uipc_sockbuf.c index 5ac6c79a928..421fa5da37d 100644 --- a/sys/kern/uipc_sockbuf.c +++ b/sys/kern/uipc_sockbuf.c @@ -396,27 +396,27 @@ void socantsendmore_locked(struct socket *so) { - SOCKBUF_LOCK_ASSERT(&so->so_snd); + SOCK_SENDBUF_LOCK_ASSERT(so); so->so_snd.sb_state |= SBS_CANTSENDMORE; sowwakeup_locked(so); - mtx_assert(SOCKBUF_MTX(&so->so_snd), MA_NOTOWNED); + SOCK_SENDBUF_UNLOCK_ASSERT(so); } void socantsendmore(struct socket *so) { - SOCKBUF_LOCK(&so->so_snd); + SOCK_SENDBUF_LOCK(so); socantsendmore_locked(so); - mtx_assert(SOCKBUF_MTX(&so->so_snd), MA_NOTOWNED); + SOCK_SENDBUF_UNLOCK_ASSERT(so); } void socantrcvmore_locked(struct socket *so) { - SOCKBUF_LOCK_ASSERT(&so->so_rcv); + SOCK_RECVBUF_LOCK_ASSERT(so); so->so_rcv.sb_state |= SBS_CANTRCVMORE; #ifdef KERN_TLS @@ -424,53 +424,55 @@ socantrcvmore_locked(struct socket *so) ktls_check_rx(&so->so_rcv); #endif sorwakeup_locked(so); - mtx_assert(SOCKBUF_MTX(&so->so_rcv), MA_NOTOWNED); + SOCK_RECVBUF_UNLOCK_ASSERT(so); } void socantrcvmore(struct socket *so) { - SOCKBUF_LOCK(&so->so_rcv); + SOCK_RECVBUF_LOCK(so); socantrcvmore_locked(so); - mtx_assert(SOCKBUF_MTX(&so->so_rcv), MA_NOTOWNED); + SOCK_RECVBUF_UNLOCK_ASSERT(so); } void soroverflow_locked(struct socket *so) { - SOCKBUF_LOCK_ASSERT(&so->so_rcv); + SOCK_RECVBUF_LOCK_ASSERT(so); if (so->so_options & SO_RERROR) { so->so_rerror = ENOBUFS; sorwakeup_locked(so); } else - SOCKBUF_UNLOCK(&so->so_rcv); + SOCK_RECVBUF_UNLOCK(so); - mtx_assert(SOCKBUF_MTX(&so->so_rcv), MA_NOTOWNED); + SOCK_RECVBUF_UNLOCK_ASSERT(so); } void soroverflow(struct socket *so) { - SOCKBUF_LOCK(&so->so_rcv); + SOCK_RECVBUF_LOCK(so); soroverflow_locked(so); - mtx_assert(SOCKBUF_MTX(&so->so_rcv), MA_NOTOWNED); + SOCK_RECVBUF_UNLOCK_ASSERT(so); } /* * Wait for data to arrive at/drain from a socket buffer. */ int -sbwait(struct sockbuf *sb) +sbwait(struct socket *so, sb_which which) { + struct sockbuf *sb; - SOCKBUF_LOCK_ASSERT(sb); + SOCK_BUF_LOCK_ASSERT(so, which); + sb = sobuf(so, which); sb->sb_flags |= SB_WAIT; - return (msleep_sbt(&sb->sb_acc, SOCKBUF_MTX(sb), + return (msleep_sbt(&sb->sb_acc, soeventmtx(so, which), (sb->sb_flags & SB_NOINTR) ? PSOCK : PSOCK | PCATCH, "sbwait", sb->sb_timeo, 0, 0)); } @@ -487,13 +489,15 @@ sbwait(struct sockbuf *sb) * then release it to avoid lock order issues. It's not clear that's * correct. */ -void -sowakeup(struct socket *so, struct sockbuf *sb) +static __always_inline void +sowakeup(struct socket *so, const sb_which which) { + struct sockbuf *sb; int ret; - SOCKBUF_LOCK_ASSERT(sb); + SOCK_BUF_LOCK_ASSERT(so, which); + sb = sobuf(so, which); selwakeuppri(sb->sb_sel, PSOCK); if (!SEL_WAITING(sb->sb_sel)) sb->sb_flags &= ~SB_SEL; @@ -512,13 +516,43 @@ sowakeup(struct socket *so, struct sockbuf *sb) } else ret = SU_OK; if (sb->sb_flags & SB_AIO) - sowakeup_aio(so, sb); - SOCKBUF_UNLOCK(sb); + sowakeup_aio(so, which); + SOCK_BUF_UNLOCK(so, which); if (ret == SU_ISCONNECTED) soisconnected(so); if ((so->so_state & SS_ASYNC) && so->so_sigio != NULL) pgsigio(&so->so_sigio, SIGIO, 0); - mtx_assert(SOCKBUF_MTX(sb), MA_NOTOWNED); + SOCK_BUF_UNLOCK_ASSERT(so, which); +} + +/* + * Do we need to notify the other side when I/O is possible? + */ +static __always_inline bool +sb_notify(const struct sockbuf *sb) +{ + return ((sb->sb_flags & (SB_WAIT | SB_SEL | SB_ASYNC | + SB_UPCALL | SB_AIO | SB_KNOTE)) != 0); +} + +void +sorwakeup_locked(struct socket *so) +{ + SOCK_RECVBUF_LOCK_ASSERT(so); + if (sb_notify(&so->so_rcv)) + sowakeup(so, SO_RCV); + else + SOCK_RECVBUF_UNLOCK(so); +} + +void +sowwakeup_locked(struct socket *so) +{ + SOCK_SENDBUF_LOCK_ASSERT(so); + if (sb_notify(&so->so_snd)) + sowakeup(so, SO_SND); + else + SOCK_SENDBUF_UNLOCK(so); } /* @@ -557,11 +591,11 @@ soreserve(struct socket *so, u_long sndcc, u_long rcvcc) { struct thread *td = curthread; - SOCKBUF_LOCK(&so->so_snd); - SOCKBUF_LOCK(&so->so_rcv); - if (sbreserve_locked(&so->so_snd, sndcc, so, td) == 0) + SOCK_SENDBUF_LOCK(so); + SOCK_RECVBUF_LOCK(so); + if (sbreserve_locked(so, SO_SND, sndcc, td) == 0) goto bad; - if (sbreserve_locked(&so->so_rcv, rcvcc, so, td) == 0) + if (sbreserve_locked(so, SO_RCV, rcvcc, td) == 0) goto bad2; if (so->so_rcv.sb_lowat == 0) so->so_rcv.sb_lowat = 1; @@ -569,14 +603,14 @@ soreserve(struct socket *so, u_long sndcc, u_long rcvcc) so->so_snd.sb_lowat = MCLBYTES; if (so->so_snd.sb_lowat > so->so_snd.sb_hiwat) so->so_snd.sb_lowat = so->so_snd.sb_hiwat; - SOCKBUF_UNLOCK(&so->so_rcv); - SOCKBUF_UNLOCK(&so->so_snd); + SOCK_RECVBUF_UNLOCK(so); + SOCK_SENDBUF_UNLOCK(so); return (0); bad2: - sbrelease_locked(&so->so_snd, so); + sbrelease_locked(so, SO_SND); bad: - SOCKBUF_UNLOCK(&so->so_rcv); - SOCKBUF_UNLOCK(&so->so_snd); + SOCK_RECVBUF_UNLOCK(so); + SOCK_SENDBUF_UNLOCK(so); return (ENOBUFS); } @@ -600,13 +634,14 @@ sysctl_handle_sb_max(SYSCTL_HANDLER_ARGS) * Allot mbufs to a sockbuf. Attempt to scale mbmax so that mbcnt doesn't * become limiting if buffering efficiency is near the normal case. */ -int -sbreserve_locked(struct sockbuf *sb, u_long cc, struct socket *so, +bool +sbreserve_locked(struct socket *so, sb_which which, u_long cc, struct thread *td) { + struct sockbuf *sb = sobuf(so, which); rlim_t sbsize_limit; - SOCKBUF_LOCK_ASSERT(sb); + SOCK_BUF_LOCK_ASSERT(so, which); /* * When a thread is passed, we take into account the thread's socket @@ -616,24 +651,25 @@ sbreserve_locked(struct sockbuf *sb, u_long cc, struct socket *so, * we don't apply a process limit. */ if (cc > sb_max_adj) - return (0); + return (false); if (td != NULL) { sbsize_limit = lim_cur(td, RLIMIT_SBSIZE); } else sbsize_limit = RLIM_INFINITY; if (!chgsbsize(so->so_cred->cr_uidinfo, &sb->sb_hiwat, cc, sbsize_limit)) - return (0); + return (false); sb->sb_mbmax = min(cc * sb_efficiency, sb_max); if (sb->sb_lowat > sb->sb_hiwat) sb->sb_lowat = sb->sb_hiwat; - return (1); + return (true); } int sbsetopt(struct socket *so, int cmd, u_long cc) { struct sockbuf *sb; + sb_which wh; short *flags; u_int *hiwat, *lowat; int error; @@ -660,16 +696,18 @@ sbsetopt(struct socket *so, int cmd, u_long cc) case SO_SNDLOWAT: case SO_SNDBUF: sb = &so->so_snd; + wh = SO_SND; break; case SO_RCVLOWAT: case SO_RCVBUF: sb = &so->so_rcv; + wh = SO_RCV; break; } flags = &sb->sb_flags; hiwat = &sb->sb_hiwat; lowat = &sb->sb_lowat; - SOCKBUF_LOCK(sb); + SOCK_BUF_LOCK(so, wh); } error = 0; @@ -685,7 +723,7 @@ sbsetopt(struct socket *so, int cmd, u_long cc) if (*lowat > *hiwat) *lowat = *hiwat; } else { - if (!sbreserve_locked(sb, cc, so, curthread)) + if (!sbreserve_locked(so, wh, cc, curthread)) error = ENOBUFS; } if (error == 0) @@ -702,7 +740,7 @@ sbsetopt(struct socket *so, int cmd, u_long cc) } if (!SOLISTENING(so)) - SOCKBUF_UNLOCK(sb); + SOCK_BUF_UNLOCK(so, wh); SOCK_UNLOCK(so); return (error); } @@ -711,8 +749,9 @@ sbsetopt(struct socket *so, int cmd, u_long cc) * Free mbufs held by a socket, and reserved mbuf space. */ static void -sbrelease_internal(struct sockbuf *sb, struct socket *so) +sbrelease_internal(struct socket *so, sb_which which) { + struct sockbuf *sb = sobuf(so, which); sbflush_internal(sb); (void)chgsbsize(so->so_cred->cr_uidinfo, &sb->sb_hiwat, 0, @@ -721,33 +760,34 @@ sbrelease_internal(struct sockbuf *sb, struct socket *so) } void -sbrelease_locked(struct sockbuf *sb, struct socket *so) +sbrelease_locked(struct socket *so, sb_which which) { - SOCKBUF_LOCK_ASSERT(sb); + SOCK_BUF_LOCK_ASSERT(so, which); - sbrelease_internal(sb, so); + sbrelease_internal(so, which); } void -sbrelease(struct sockbuf *sb, struct socket *so) +sbrelease(struct socket *so, sb_which which) { - SOCKBUF_LOCK(sb); - sbrelease_locked(sb, so); - SOCKBUF_UNLOCK(sb); + SOCK_BUF_LOCK(so, which); + sbrelease_locked(so, which); + SOCK_BUF_UNLOCK(so, which); } void -sbdestroy(struct sockbuf *sb, struct socket *so) +sbdestroy(struct socket *so, sb_which which) { - - sbrelease_internal(sb, so); #ifdef KERN_TLS + struct sockbuf *sb = sobuf(so, which); + if (sb->sb_tls_info != NULL) ktls_free(sb->sb_tls_info); sb->sb_tls_info = NULL; #endif + sbrelease_internal(so, which); } /* diff --git a/sys/kern/uipc_socket.c b/sys/kern/uipc_socket.c index 62873017171..49a2b5773cc 100644 --- a/sys/kern/uipc_socket.c +++ b/sys/kern/uipc_socket.c @@ -420,8 +420,8 @@ soalloc(struct vnet *vnet) mtx_init(&so->so_lock, "socket", NULL, MTX_DEF | MTX_DUPOK); so->so_snd.sb_mtx = &so->so_snd_mtx; so->so_rcv.sb_mtx = &so->so_rcv_mtx; - SOCKBUF_LOCK_INIT(&so->so_snd, "so_snd"); - SOCKBUF_LOCK_INIT(&so->so_rcv, "so_rcv"); + mtx_init(&so->so_snd_mtx, "so_snd", NULL, MTX_DEF); + mtx_init(&so->so_rcv_mtx, "so_rcv", NULL, MTX_DEF); so->so_rcv.sb_sel = &so->so_rdsel; so->so_snd.sb_sel = &so->so_wrsel; sx_init(&so->so_snd_sx, "so_snd_sx"); @@ -491,8 +491,8 @@ sodealloc(struct socket *so) &so->so_snd.sb_hiwat, 0, RLIM_INFINITY); sx_destroy(&so->so_snd_sx); sx_destroy(&so->so_rcv_sx); - SOCKBUF_LOCK_DESTROY(&so->so_snd); - SOCKBUF_LOCK_DESTROY(&so->so_rcv); + mtx_destroy(&so->so_snd_mtx); + mtx_destroy(&so->so_rcv_mtx); } crfree(so->so_cred); mtx_destroy(&so->so_lock); @@ -990,8 +990,8 @@ solisten_proto(struct socket *so, int backlog) sbrcv_timeo = so->so_rcv.sb_timeo; sbsnd_timeo = so->so_snd.sb_timeo; - sbdestroy(&so->so_snd, so); - sbdestroy(&so->so_rcv, so); + sbdestroy(so, SO_SND); + sbdestroy(so, SO_RCV); #ifdef INVARIANTS bzero(&so->so_rcv, @@ -1208,8 +1208,8 @@ sofree(struct socket *so) * to be acquired or held. */ if (!SOLISTENING(so)) { - sbdestroy(&so->so_snd, so); - sbdestroy(&so->so_rcv, so); + sbdestroy(so, SO_SND); + sbdestroy(so, SO_RCV); } seldrain(&so->so_rdsel); seldrain(&so->so_wrsel); @@ -1735,7 +1735,7 @@ restart: error = EWOULDBLOCK; goto release; } - error = sbwait(&so->so_snd); + error = sbwait(so, SO_SND); SOCKBUF_UNLOCK(&so->so_snd); if (error) goto release; @@ -2067,7 +2067,7 @@ restart: } SBLASTRECORDCHK(&so->so_rcv); SBLASTMBUFCHK(&so->so_rcv); - error = sbwait(&so->so_rcv); + error = sbwait(so, SO_RCV); SOCKBUF_UNLOCK(&so->so_rcv); if (error) goto release; @@ -2389,7 +2389,7 @@ dontblock: * the protocol. Skip blocking in this case. */ if (so->so_rcv.sb_mb == NULL) { - error = sbwait(&so->so_rcv); + error = sbwait(so, SO_RCV); if (error) { SOCKBUF_UNLOCK(&so->so_rcv); goto release; @@ -2570,7 +2570,7 @@ restart: * Wait and block until (more) data comes in. * NB: Drops the sockbuf lock during wait. */ - error = sbwait(sb); + error = sbwait(so, SO_RCV); if (error) goto out; goto restart; @@ -2742,7 +2742,7 @@ soreceive_dgram(struct socket *so, struct sockaddr **psa, struct uio *uio, } SBLASTRECORDCHK(&so->so_rcv); SBLASTMBUFCHK(&so->so_rcv); - error = sbwait(&so->so_rcv); + error = sbwait(so, SO_RCV); if (error) { SOCKBUF_UNLOCK(&so->so_rcv); return (error); @@ -2960,7 +2960,7 @@ sorflush(struct socket *so) MPASS(pr->pr_domain->dom_dispose != NULL); (*pr->pr_domain->dom_dispose)(so); } else { - sbrelease(&so->so_rcv, so); + sbrelease(so, SO_RCV); SOCK_IO_RECV_UNLOCK(so); } @@ -3610,8 +3610,8 @@ sopoll_generic(struct socket *so, int events, struct ucred *active_cred, } } else { revents = 0; - SOCKBUF_LOCK(&so->so_snd); - SOCKBUF_LOCK(&so->so_rcv); + SOCK_SENDBUF_LOCK(so); + SOCK_RECVBUF_LOCK(so); if (events & (POLLIN | POLLRDNORM)) if (soreadabledata(so)) revents |= events & (POLLIN | POLLRDNORM); @@ -3642,8 +3642,8 @@ sopoll_generic(struct socket *so, int events, struct ucred *active_cred, so->so_snd.sb_flags |= SB_SEL; } } - SOCKBUF_UNLOCK(&so->so_rcv); - SOCKBUF_UNLOCK(&so->so_snd); + SOCK_RECVBUF_UNLOCK(so); + SOCK_SENDBUF_UNLOCK(so); } SOCK_UNLOCK(so); return (revents); @@ -4297,12 +4297,12 @@ so_rdknl_assert_lock(void *arg, int what) if (SOLISTENING(so)) SOCK_LOCK_ASSERT(so); else - SOCKBUF_LOCK_ASSERT(&so->so_rcv); + SOCK_RECVBUF_LOCK_ASSERT(so); } else { if (SOLISTENING(so)) SOCK_UNLOCK_ASSERT(so); else - SOCKBUF_UNLOCK_ASSERT(&so->so_rcv); + SOCK_RECVBUF_UNLOCK_ASSERT(so); } } @@ -4314,7 +4314,7 @@ so_wrknl_lock(void *arg) if (SOLISTENING(so)) SOCK_LOCK(so); else - SOCKBUF_LOCK(&so->so_snd); + SOCK_SENDBUF_LOCK(so); } static void @@ -4325,7 +4325,7 @@ so_wrknl_unlock(void *arg) if (SOLISTENING(so)) SOCK_UNLOCK(so); else - SOCKBUF_UNLOCK(&so->so_snd); + SOCK_SENDBUF_UNLOCK(so); } static void @@ -4337,12 +4337,12 @@ so_wrknl_assert_lock(void *arg, int what) if (SOLISTENING(so)) SOCK_LOCK_ASSERT(so); else - SOCKBUF_LOCK_ASSERT(&so->so_snd); + SOCK_SENDBUF_LOCK_ASSERT(so); } else { if (SOLISTENING(so)) SOCK_UNLOCK_ASSERT(so); else - SOCKBUF_UNLOCK_ASSERT(&so->so_snd); + SOCK_SENDBUF_UNLOCK_ASSERT(so); } } diff --git a/sys/kern/uipc_usrreq.c b/sys/kern/uipc_usrreq.c index efa586d346c..b326dbd825a 100644 --- a/sys/kern/uipc_usrreq.c +++ b/sys/kern/uipc_usrreq.c @@ -2771,7 +2771,7 @@ unp_dispose(struct socket *so) KASSERT(sb->sb_ccc == 0 && sb->sb_mb == 0 && sb->sb_mbcnt == 0, ("%s: ccc %u mb %p mbcnt %u", __func__, sb->sb_ccc, (void *)sb->sb_mb, sb->sb_mbcnt)); - sbrelease_locked(sb, so); + sbrelease_locked(so, SO_RCV); SOCK_RECVBUF_UNLOCK(so); if (SOCK_IO_RECV_OWNED(so)) SOCK_IO_RECV_UNLOCK(so); diff --git a/sys/netinet/sctp_output.c b/sys/netinet/sctp_output.c index 9a892716044..9e2e70313be 100644 --- a/sys/netinet/sctp_output.c +++ b/sys/netinet/sctp_output.c @@ -12992,7 +12992,7 @@ sctp_lower_sosend(struct socket *so, stcb->block_entry = &be; SCTP_TCB_UNLOCK(stcb); hold_tcblock = false; - error = sbwait(&so->so_snd); + error = sbwait(so, SO_SND); if (error == 0) { if (so->so_error != 0) { error = so->so_error; @@ -13352,7 +13352,7 @@ skip_preblock: stcb->block_entry = &be; SCTP_TCB_UNLOCK(stcb); hold_tcblock = false; - error = sbwait(&so->so_snd); + error = sbwait(so, SO_SND); if (error == 0) { if (so->so_error != 0) error = so->so_error; diff --git a/sys/netinet/sctputil.c b/sys/netinet/sctputil.c index 8451ed5e200..e20a49be1ad 100644 --- a/sys/netinet/sctputil.c +++ b/sys/netinet/sctputil.c @@ -5640,7 +5640,7 @@ restart_nosblocks: } } if (block_allowed) { - error = sbwait(&so->so_rcv); + error = sbwait(so, SO_RCV); if (error) { goto out; } @@ -6255,7 +6255,7 @@ wait_some_more: goto release; } if (so->so_rcv.sb_cc <= control->held_length) { - error = sbwait(&so->so_rcv); + error = sbwait(so, SO_RCV); if (error) { goto release; } diff --git a/sys/netinet/tcp_input.c b/sys/netinet/tcp_input.c index 695cd5a916d..be86ceca445 100644 --- a/sys/netinet/tcp_input.c +++ b/sys/netinet/tcp_input.c @@ -1921,8 +1921,8 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so, * Give up when limit is reached. */ if (newsize) - if (!sbreserve_locked(&so->so_rcv, - newsize, so, NULL)) + if (!sbreserve_locked(so, SO_RCV, + newsize, NULL)) so->so_rcv.sb_flags &= ~SB_AUTOSIZE; m_adj(m, drop_hdrlen); /* delayed header drop */ sbappendstream_locked(&so->so_rcv, m, 0); @@ -3848,7 +3848,7 @@ tcp_mss(struct tcpcb *tp, int offer) if (bufsize > sb_max) bufsize = sb_max; if (bufsize > so->so_snd.sb_hiwat) - (void)sbreserve_locked(&so->so_snd, bufsize, so, NULL); + (void)sbreserve_locked(so, SO_SND, bufsize, NULL); } SOCKBUF_UNLOCK(&so->so_snd); /* @@ -3871,7 +3871,7 @@ tcp_mss(struct tcpcb *tp, int offer) if (bufsize > sb_max) bufsize = sb_max; if (bufsize > so->so_rcv.sb_hiwat) - (void)sbreserve_locked(&so->so_rcv, bufsize, so, NULL); + (void)sbreserve_locked(so, SO_RCV, bufsize, NULL); } SOCKBUF_UNLOCK(&so->so_rcv); diff --git a/sys/netinet/tcp_output.c b/sys/netinet/tcp_output.c index 299f1d03471..b0dedb1a24d 100644 --- a/sys/netinet/tcp_output.c +++ b/sys/netinet/tcp_output.c @@ -2166,9 +2166,9 @@ tcp_sndbuf_autoscale(struct tcpcb *tp, struct socket *so, uint32_t sendwin) sbused(&so->so_snd) < V_tcp_autosndbuf_max && sendwin >= (sbused(&so->so_snd) - (tp->snd_nxt - tp->snd_una))) { - if (!sbreserve_locked(&so->so_snd, + if (!sbreserve_locked(so, SO_SND, min(so->so_snd.sb_hiwat + V_tcp_autosndbuf_inc, - V_tcp_autosndbuf_max), so, curthread)) + V_tcp_autosndbuf_max), curthread)) so->so_snd.sb_flags &= ~SB_AUTOSIZE; } } diff --git a/sys/ofed/drivers/infiniband/ulp/sdp/sdp_main.c b/sys/ofed/drivers/infiniband/ulp/sdp/sdp_main.c index c822276185c..def5edb9898 100644 --- a/sys/ofed/drivers/infiniband/ulp/sdp/sdp_main.c +++ b/sys/ofed/drivers/infiniband/ulp/sdp/sdp_main.c @@ -1142,7 +1142,7 @@ restart: error = EWOULDBLOCK; goto release; } - error = sbwait(&so->so_snd); + error = sbwait(so, SO_SND); SOCKBUF_UNLOCK(&so->so_snd); if (error) goto release; @@ -1336,7 +1336,7 @@ restart: * Wait and block until (more) data comes in. * NB: Drops the sockbuf lock during wait. */ - error = sbwait(sb); + error = sbwait(so, SO_RCV); if (error) goto out; goto restart; diff --git a/sys/rpc/clnt_bck.c b/sys/rpc/clnt_bck.c index 514905bf1cc..810a957bb97 100644 --- a/sys/rpc/clnt_bck.c +++ b/sys/rpc/clnt_bck.c @@ -326,7 +326,7 @@ if (error != 0) printf("sosend=%d\n", error); if (error == EMSGSIZE) { printf("emsgsize\n"); SOCKBUF_LOCK(&xprt->xp_socket->so_snd); - sbwait(&xprt->xp_socket->so_snd); + sbwait(xprt->xp_socket, SO_SND); SOCKBUF_UNLOCK(&xprt->xp_socket->so_snd); sx_xunlock(&xprt->xp_lock); AUTH_VALIDATE(auth, xid, NULL, NULL); diff --git a/sys/rpc/clnt_vc.c b/sys/rpc/clnt_vc.c index dfada2bea38..f565de06f4b 100644 --- a/sys/rpc/clnt_vc.c +++ b/sys/rpc/clnt_vc.c @@ -447,7 +447,7 @@ call_again: if (error == EMSGSIZE || (error == ERESTART && (ct->ct_waitflag & PCATCH) == 0 && trycnt-- > 0)) { SOCKBUF_LOCK(&ct->ct_socket->so_snd); - sbwait(&ct->ct_socket->so_snd); + sbwait(ct->ct_socket, SO_SND); SOCKBUF_UNLOCK(&ct->ct_socket->so_snd); AUTH_VALIDATE(auth, xid, NULL, NULL); mtx_lock(&ct->ct_lock); diff --git a/sys/sys/sockbuf.h b/sys/sys/sockbuf.h index 372f04eba54..2484407d557 100644 --- a/sys/sys/sockbuf.h +++ b/sys/sys/sockbuf.h @@ -116,6 +116,9 @@ struct sockbuf { #endif /* defined(_KERNEL) || defined(_WANT_SOCKET) */ #ifdef _KERNEL +/* 'which' values for KPIs that operate on one buffer of a socket. */ +typedef enum { SO_RCV, SO_SND } sb_which; + /* * Per-socket buffer mutex used to protect most fields in the socket buffer. * These make use of the mutex pointer embedded in struct sockbuf, which @@ -124,9 +127,6 @@ struct sockbuf { * these locking macros. */ #define SOCKBUF_MTX(_sb) ((_sb)->sb_mtx) -#define SOCKBUF_LOCK_INIT(_sb, _name) \ - mtx_init(SOCKBUF_MTX(_sb), _name, NULL, MTX_DEF) -#define SOCKBUF_LOCK_DESTROY(_sb) mtx_destroy(SOCKBUF_MTX(_sb)) #define SOCKBUF_LOCK(_sb) mtx_lock(SOCKBUF_MTX(_sb)) #define SOCKBUF_OWNED(_sb) mtx_owned(SOCKBUF_MTX(_sb)) #define SOCKBUF_UNLOCK(_sb) mtx_unlock(SOCKBUF_MTX(_sb)) @@ -162,7 +162,7 @@ struct mbuf * struct mbuf * sbcreatecontrol_how(void *p, int size, int type, int level, int wait); -void sbdestroy(struct sockbuf *sb, struct socket *so); +void sbdestroy(struct socket *, sb_which); void sbdrop(struct sockbuf *sb, int len); void sbdrop_locked(struct sockbuf *sb, int len); struct mbuf * @@ -171,17 +171,17 @@ void sbdroprecord(struct sockbuf *sb); void sbdroprecord_locked(struct sockbuf *sb); void sbflush(struct sockbuf *sb); void sbflush_locked(struct sockbuf *sb); -void sbrelease(struct sockbuf *sb, struct socket *so); -void sbrelease_locked(struct sockbuf *sb, struct socket *so); +void sbrelease(struct socket *, sb_which); +void sbrelease_locked(struct socket *, sb_which); int sbsetopt(struct socket *so, int cmd, u_long cc); -int sbreserve_locked(struct sockbuf *sb, u_long cc, struct socket *so, +bool sbreserve_locked(struct socket *so, sb_which which, u_long cc, struct thread *td); void sbsndptr_adv(struct sockbuf *sb, struct mbuf *mb, u_int len); struct mbuf * sbsndptr_noadv(struct sockbuf *sb, u_int off, u_int *moff); struct mbuf * sbsndmbuf(struct sockbuf *sb, u_int off, u_int *moff); -int sbwait(struct sockbuf *sb); +int sbwait(struct socket *, sb_which); void sballoc(struct sockbuf *, struct mbuf *); void sbfree(struct sockbuf *, struct mbuf *); void sballoc_ktls_rx(struct sockbuf *sb, struct mbuf *m); diff --git a/sys/sys/socketvar.h b/sys/sys/socketvar.h index fe6faa842bd..05eefd7e4fd 100644 --- a/sys/sys/socketvar.h +++ b/sys/sys/socketvar.h @@ -77,8 +77,8 @@ enum socket_qstate { * Locking key to struct socket: * (a) constant after allocation, no locking required. * (b) locked by SOCK_LOCK(so). - * (cr) locked by SOCK_RECVBUF_LOCK(so)/SOCKBUF_LOCK(&so->so_rcv). - * (cs) locked by SOCK_SENDBUF_LOCK(so)/SOCKBUF_LOCK(&so->so_snd). + * (cr) locked by SOCK_RECVBUF_LOCK(so) + * (cs) locked by SOCK_SENDBUF_LOCK(so) * (e) locked by SOLISTEN_LOCK() of corresponding listening socket. * (f) not locked since integer reads/writes are atomic. * (g) used only as a sleep/wakeup address, no value. @@ -256,8 +256,8 @@ struct socket { } while (0) /* - * Socket buffer locks. These manipulate the same mutexes as SOCKBUF_LOCK() - * and related macros. + * Socket buffer locks. These are strongly preferred over SOCKBUF_LOCK(sb) + * macros, as we are moving towards protocol specific socket buffers. */ #define SOCK_RECVBUF_MTX(so) \ (&(so)->so_rcv_mtx) @@ -281,8 +281,26 @@ struct socket { #define SOCK_SENDBUF_UNLOCK_ASSERT(so) \ mtx_assert(SOCK_SENDBUF_MTX(so), MA_NOTOWNED) -/* 'which' values for socket buffer events and upcalls. */ -typedef enum { SO_RCV, SO_SND } sb_which; +#define SOCK_BUF_LOCK(so, which) \ + mtx_lock(soeventmtx(so, which)) +#define SOCK_BUF_UNLOCK(so, which) \ + mtx_unlock(soeventmtx(so, which)) +#define SOCK_BUF_LOCK_ASSERT(so, which) \ + mtx_assert(soeventmtx(so, which), MA_OWNED) +#define SOCK_BUF_UNLOCK_ASSERT(so, which) \ + mtx_assert(soeventmtx(so, which), MA_NOTOWNED) + +static inline struct sockbuf * +sobuf(struct socket *so, const sb_which which) +{ + return (which == SO_RCV ? &so->so_rcv : &so->so_snd); +} + +static inline struct mtx * +soeventmtx(struct socket *so, const sb_which which) +{ + return (which == SO_RCV ? SOCK_RECVBUF_MTX(so) : SOCK_SENDBUF_MTX(so)); +} /* * Macros for sockets and socket buffering. @@ -308,12 +326,6 @@ typedef enum { SO_RCV, SO_SND } sb_which; soiounlock(&(so)->so_rcv_sx) #define SOCK_IO_RECV_OWNED(so) sx_xlocked(&(so)->so_rcv_sx) -/* - * Do we need to notify the other side when I/O is possible? - */ -#define sb_notify(sb) (((sb)->sb_flags & (SB_WAIT | SB_SEL | SB_ASYNC | \ - SB_UPCALL | SB_AIO | SB_KNOTE)) != 0) - /* do we have to send all at once on a socket? */ #define sosendallatonce(so) \ ((so)->so_proto->pr_flags & PR_ATOMIC) @@ -357,29 +369,13 @@ typedef enum { SO_RCV, SO_SND } sb_which; * directly invokes the underlying sowakeup() primitives, it must * maintain the same semantics. */ -#define sorwakeup_locked(so) do { \ - SOCKBUF_LOCK_ASSERT(&(so)->so_rcv); \ - if (sb_notify(&(so)->so_rcv)) \ - sowakeup((so), &(so)->so_rcv); \ - else \ - SOCKBUF_UNLOCK(&(so)->so_rcv); \ -} while (0) - #define sorwakeup(so) do { \ - SOCKBUF_LOCK(&(so)->so_rcv); \ + SOCK_RECVBUF_LOCK(so); \ sorwakeup_locked(so); \ } while (0) -#define sowwakeup_locked(so) do { \ - SOCKBUF_LOCK_ASSERT(&(so)->so_snd); \ - if (sb_notify(&(so)->so_snd)) \ - sowakeup((so), &(so)->so_snd); \ - else \ - SOCKBUF_UNLOCK(&(so)->so_snd); \ -} while (0) - #define sowwakeup(so) do { \ - SOCKBUF_LOCK(&(so)->so_snd); \ + SOCK_SENDBUF_LOCK(so); \ sowwakeup_locked(so); \ } while (0) @@ -520,8 +516,9 @@ int soshutdown(struct socket *so, int how); void soupcall_clear(struct socket *, sb_which); void soupcall_set(struct socket *, sb_which, so_upcall_t, void *); void solisten_upcall_set(struct socket *, so_upcall_t, void *); -void sowakeup(struct socket *so, struct sockbuf *sb); -void sowakeup_aio(struct socket *so, struct sockbuf *sb); +void sorwakeup_locked(struct socket *); +void sowwakeup_locked(struct socket *); +void sowakeup_aio(struct socket *, sb_which); void solisten_wakeup(struct socket *); int selsocket(struct socket *so, int events, struct timeval *tv, struct thread *td);