mirror of
https://github.com/opnsense/src.git
synced 2026-06-07 07:42:26 -04:00
tcp: refactor cwnd during SACK transmissions to allow TSO
Refactoring of cwnd and moving the adjustment for SACKed data into tcp_output() - cwnd tracking the maximum extent starting at snd_una - allows both SACK loss recovery as well as SACK transmissions after RTO during slow start and if allowed, the use of TSO while in loss recovery. Reviewed By: tuexen, cc, #transport Sponsored by: NetApp, Inc. Differential Revision: https://reviews.freebsd.org/D43470
This commit is contained in:
parent
72ae04c733
commit
7dc78150c7
3 changed files with 97 additions and 74 deletions
|
|
@ -2653,13 +2653,14 @@ tcp_do_segment(struct tcpcb *tp, struct mbuf *m, struct tcphdr *th,
|
|||
tcp_do_prr_ack(tp, th, &to,
|
||||
sack_changed, &maxseg);
|
||||
} else if (tcp_is_sack_recovery(tp, &to) &&
|
||||
IN_FASTRECOVERY(tp->t_flags)) {
|
||||
IN_FASTRECOVERY(tp->t_flags) &&
|
||||
(tp->snd_nxt == tp->snd_max)) {
|
||||
int awnd;
|
||||
|
||||
/*
|
||||
* Compute the amount of data in flight first.
|
||||
* We can inject new data into the pipe iff
|
||||
* we have less than 1/2 the original window's
|
||||
* we have less than ssthresh
|
||||
* worth of data in flight.
|
||||
*/
|
||||
if (V_tcp_do_newsack) {
|
||||
|
|
@ -2669,10 +2670,18 @@ tcp_do_segment(struct tcpcb *tp, struct mbuf *m, struct tcphdr *th,
|
|||
tp->sackhint.sack_bytes_rexmit;
|
||||
}
|
||||
if (awnd < tp->snd_ssthresh) {
|
||||
tp->snd_cwnd += maxseg;
|
||||
tp->snd_cwnd += imax(maxseg,
|
||||
imin(2 * maxseg,
|
||||
tp->sackhint.delivered_data));
|
||||
if (tp->snd_cwnd > tp->snd_ssthresh)
|
||||
tp->snd_cwnd = tp->snd_ssthresh;
|
||||
}
|
||||
} else if (tcp_is_sack_recovery(tp, &to) &&
|
||||
IN_FASTRECOVERY(tp->t_flags) &&
|
||||
SEQ_LT(tp->snd_nxt, tp->snd_max)) {
|
||||
tp->snd_cwnd += imax(maxseg,
|
||||
imin(2 * maxseg,
|
||||
tp->sackhint.delivered_data));
|
||||
} else {
|
||||
tp->snd_cwnd += maxseg;
|
||||
}
|
||||
|
|
@ -2696,14 +2705,13 @@ enter_recovery:
|
|||
tcp_seq onxt = tp->snd_nxt;
|
||||
|
||||
/*
|
||||
* If we're doing sack, or prr, check
|
||||
* to see if we're already in sack
|
||||
* If we're doing sack, check to
|
||||
* see if we're already in sack
|
||||
* recovery. If we're not doing sack,
|
||||
* check to see if we're in newreno
|
||||
* recovery.
|
||||
*/
|
||||
if (V_tcp_do_prr ||
|
||||
(tp->t_flags & TF_SACK_PERMIT)) {
|
||||
if (tcp_is_sack_recovery(tp, &to)) {
|
||||
if (IN_FASTRECOVERY(tp->t_flags)) {
|
||||
tp->t_dupacks = 0;
|
||||
break;
|
||||
|
|
@ -2723,29 +2731,40 @@ enter_recovery:
|
|||
tp->t_rtttime = 0;
|
||||
if (V_tcp_do_prr) {
|
||||
/*
|
||||
* snd_ssthresh is already updated by
|
||||
* cc_cong_signal.
|
||||
* snd_ssthresh and snd_recover are
|
||||
* already updated by cc_cong_signal.
|
||||
*/
|
||||
if (tcp_is_sack_recovery(tp, &to)) {
|
||||
/*
|
||||
* Exclude Limited Transmit
|
||||
* Include Limited Transmit
|
||||
* segments here
|
||||
*/
|
||||
tp->sackhint.prr_delivered =
|
||||
maxseg;
|
||||
imin(tp->snd_max - th->th_ack,
|
||||
(tp->snd_limited + 1) * maxseg);
|
||||
} else {
|
||||
tp->sackhint.prr_delivered =
|
||||
imin(tp->snd_max - tp->snd_una,
|
||||
imin(INT_MAX / 65536,
|
||||
tp->t_dupacks) * maxseg);
|
||||
maxseg;
|
||||
}
|
||||
tp->sackhint.recover_fs = max(1,
|
||||
tp->snd_nxt - tp->snd_una);
|
||||
}
|
||||
tp->snd_limited = 0;
|
||||
if (tcp_is_sack_recovery(tp, &to)) {
|
||||
TCPSTAT_INC(tcps_sack_recovery_episode);
|
||||
tp->snd_cwnd = maxseg;
|
||||
/*
|
||||
* When entering LR after RTO due to
|
||||
* Duplicate ACKs, retransmit existing
|
||||
* holes from the scoreboard.
|
||||
*/
|
||||
tcp_resend_sackholes(tp);
|
||||
/* Avoid inflating cwnd in tcp_output */
|
||||
tp->snd_nxt = tp->snd_max;
|
||||
tp->snd_cwnd = tcp_compute_pipe(tp) +
|
||||
maxseg;
|
||||
(void) tcp_output(tp);
|
||||
/* Set cwnd to the expected flightsize */
|
||||
tp->snd_cwnd = tp->snd_ssthresh;
|
||||
if (SEQ_GT(th->th_ack, tp->snd_una)) {
|
||||
goto resume_partialack;
|
||||
}
|
||||
|
|
@ -2790,7 +2809,8 @@ enter_recovery:
|
|||
(tp->t_rxtshift == 0))
|
||||
tp->snd_cwnd =
|
||||
SEQ_SUB(tp->snd_nxt,
|
||||
tp->snd_una);
|
||||
tp->snd_una) -
|
||||
tcp_sack_adjust(tp);
|
||||
tp->snd_cwnd +=
|
||||
(tp->t_dupacks - tp->snd_limited) *
|
||||
maxseg;
|
||||
|
|
@ -3049,9 +3069,8 @@ process_ACK:
|
|||
SEQ_GEQ(th->th_ack, tp->snd_recover)) {
|
||||
cc_post_recovery(tp, th);
|
||||
}
|
||||
if (tp->t_flags & TF_SACK_PERMIT) {
|
||||
if (SEQ_GT(tp->snd_una, tp->snd_recover))
|
||||
tp->snd_recover = tp->snd_una;
|
||||
if (SEQ_GT(tp->snd_una, tp->snd_recover)) {
|
||||
tp->snd_recover = tp->snd_una;
|
||||
}
|
||||
if (SEQ_LT(tp->snd_nxt, tp->snd_una))
|
||||
tp->snd_nxt = tp->snd_una;
|
||||
|
|
@ -4138,9 +4157,7 @@ tcp_do_prr_ack(struct tcpcb *tp, struct tcphdr *th, struct tcpopt *to,
|
|||
*/
|
||||
if (IN_FASTRECOVERY(tp->t_flags)) {
|
||||
if (tcp_is_sack_recovery(tp, to)) {
|
||||
tp->snd_cwnd = tp->snd_nxt - tp->snd_recover +
|
||||
tp->sackhint.sack_bytes_rexmit +
|
||||
(snd_cnt * maxseg);
|
||||
tp->snd_cwnd = pipe - del_data + (snd_cnt * maxseg);
|
||||
} else {
|
||||
tp->snd_cwnd = (tp->snd_max - tp->snd_una) +
|
||||
(snd_cnt * maxseg);
|
||||
|
|
@ -4168,17 +4185,19 @@ tcp_newreno_partial_ack(struct tcpcb *tp, struct tcphdr *th)
|
|||
|
||||
tcp_timer_activate(tp, TT_REXMT, 0);
|
||||
tp->t_rtttime = 0;
|
||||
tp->snd_nxt = th->th_ack;
|
||||
/*
|
||||
* Set snd_cwnd to one segment beyond acknowledged offset.
|
||||
* (tp->snd_una has not yet been updated when this function is called.)
|
||||
*/
|
||||
tp->snd_cwnd = maxseg + BYTES_THIS_ACK(tp, th);
|
||||
tp->t_flags |= TF_ACKNOW;
|
||||
(void) tcp_output(tp);
|
||||
tp->snd_cwnd = ocwnd;
|
||||
if (SEQ_GT(onxt, tp->snd_nxt))
|
||||
tp->snd_nxt = onxt;
|
||||
if (IN_FASTRECOVERY(tp->t_flags)) {
|
||||
tp->snd_nxt = th->th_ack;
|
||||
/*
|
||||
* Set snd_cwnd to one segment beyond acknowledged offset.
|
||||
* (tp->snd_una has not yet been updated when this function is called.)
|
||||
*/
|
||||
tp->snd_cwnd = maxseg + BYTES_THIS_ACK(tp, th);
|
||||
tp->t_flags |= TF_ACKNOW;
|
||||
(void) tcp_output(tp);
|
||||
tp->snd_cwnd = ocwnd;
|
||||
if (SEQ_GT(onxt, tp->snd_nxt))
|
||||
tp->snd_nxt = onxt;
|
||||
}
|
||||
/*
|
||||
* Partial window deflation. Relies on fact that tp->snd_una
|
||||
* not updated yet.
|
||||
|
|
|
|||
|
|
@ -266,8 +266,10 @@ again:
|
|||
* resending already delivered data. Adjust snd_nxt accordingly.
|
||||
*/
|
||||
if ((tp->t_flags & TF_SACK_PERMIT) &&
|
||||
SEQ_LT(tp->snd_nxt, tp->snd_max))
|
||||
(tp->sackhint.nexthole != NULL) &&
|
||||
!IN_FASTRECOVERY(tp->t_flags)) {
|
||||
sendwin = tcp_sack_adjust(tp);
|
||||
}
|
||||
sendalot = 0;
|
||||
tso = 0;
|
||||
mtu = 0;
|
||||
|
|
@ -292,10 +294,13 @@ again:
|
|||
if ((tp->t_flags & TF_SACK_PERMIT) &&
|
||||
(IN_FASTRECOVERY(tp->t_flags) || SEQ_LT(tp->snd_nxt, tp->snd_max)) &&
|
||||
(p = tcp_sack_output(tp, &sack_bytes_rxmt))) {
|
||||
uint32_t cwin;
|
||||
int32_t cwin;
|
||||
|
||||
cwin =
|
||||
imax(min(tp->snd_wnd, tp->snd_cwnd) - sack_bytes_rxmt, 0);
|
||||
if (IN_FASTRECOVERY(tp->t_flags)) {
|
||||
cwin = imax(sendwin - tcp_compute_pipe(tp), 0);
|
||||
} else {
|
||||
cwin = imax(sendwin - off, 0);
|
||||
}
|
||||
/* Do not retransmit SACK segments beyond snd_recover */
|
||||
if (SEQ_GT(p->end, tp->snd_recover)) {
|
||||
/*
|
||||
|
|
@ -314,19 +319,34 @@ again:
|
|||
goto after_sack_rexmit;
|
||||
} else {
|
||||
/* Can rexmit part of the current hole */
|
||||
len = ((int32_t)ulmin(cwin,
|
||||
SEQ_SUB(tp->snd_recover, p->rxmit)));
|
||||
len = SEQ_SUB(tp->snd_recover, p->rxmit);
|
||||
if (cwin <= len) {
|
||||
len = cwin;
|
||||
} else {
|
||||
sendalot = 1;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
len = ((int32_t)ulmin(cwin,
|
||||
SEQ_SUB(p->end, p->rxmit)));
|
||||
len = SEQ_SUB(p->end, p->rxmit);
|
||||
if (cwin <= len) {
|
||||
len = cwin;
|
||||
} else {
|
||||
sendalot = 1;
|
||||
}
|
||||
}
|
||||
/* we could have transmitted from the scoreboard,
|
||||
* but sendwin (expected flightsize) - pipe didn't
|
||||
* allow any transmission.
|
||||
* Bypass recalculating the possible transmission
|
||||
* length further down by setting sack_rxmit.
|
||||
* Wouldn't be here if there would have been
|
||||
* nothing in the scoreboard to transmit.
|
||||
*/
|
||||
sack_rxmit = 1;
|
||||
if (len > 0) {
|
||||
off = SEQ_SUB(p->rxmit, tp->snd_una);
|
||||
KASSERT(off >= 0,("%s: sack block to the left of una : %d",
|
||||
__func__, off));
|
||||
sack_rxmit = 1;
|
||||
sendalot = 1;
|
||||
}
|
||||
}
|
||||
after_sack_rexmit:
|
||||
|
|
@ -390,34 +410,15 @@ after_sack_rexmit:
|
|||
*/
|
||||
if (sack_rxmit == 0) {
|
||||
if ((sack_bytes_rxmt == 0) || SEQ_LT(tp->snd_nxt, tp->snd_max)) {
|
||||
len = ((int32_t)min(sbavail(&so->so_snd), sendwin) -
|
||||
off);
|
||||
len = imin(sbavail(&so->so_snd), sendwin) - off;
|
||||
} else {
|
||||
int32_t cwin;
|
||||
|
||||
/*
|
||||
* We are inside of a SACK recovery episode and are
|
||||
* sending new data, having retransmitted all the
|
||||
* data possible in the scoreboard.
|
||||
*/
|
||||
len = ((int32_t)min(sbavail(&so->so_snd), tp->snd_wnd) -
|
||||
off);
|
||||
/*
|
||||
* Don't remove this (len > 0) check !
|
||||
* We explicitly check for len > 0 here (although it
|
||||
* isn't really necessary), to work around a gcc
|
||||
* optimization issue - to force gcc to compute
|
||||
* len above. Without this check, the computation
|
||||
* of len is bungled by the optimizer.
|
||||
*/
|
||||
if (len > 0) {
|
||||
cwin = tp->snd_cwnd - imax(0, (int32_t)
|
||||
(tp->snd_nxt - tp->snd_recover)) -
|
||||
sack_bytes_rxmt;
|
||||
if (cwin < 0)
|
||||
cwin = 0;
|
||||
len = imin(len, cwin);
|
||||
}
|
||||
len = imin(sbavail(&so->so_snd) - off,
|
||||
sendwin - tcp_compute_pipe(tp));
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -1647,6 +1648,10 @@ timer:
|
|||
if ((error == 0) &&
|
||||
sack_rxmit &&
|
||||
SEQ_LT(tp->snd_nxt, SEQ_MIN(p->rxmit, p->end))) {
|
||||
/*
|
||||
* When transmitting from SACK scoreboard
|
||||
* after an RTO, pull snd_nxt along.
|
||||
*/
|
||||
tp->snd_nxt = SEQ_MIN(p->rxmit, p->end);
|
||||
}
|
||||
if (error) {
|
||||
|
|
|
|||
|
|
@ -961,16 +961,15 @@ tcp_sack_partialack(struct tcpcb *tp, struct tcphdr *th, u_int *maxsegp)
|
|||
/* Send one or 2 segments based on how much new data was acked. */
|
||||
if ((BYTES_THIS_ACK(tp, th) / maxseg) >= 2)
|
||||
num_segs = 2;
|
||||
if (V_tcp_do_newsack) {
|
||||
tp->snd_cwnd = imax(tp->snd_nxt - th->th_ack +
|
||||
tp->sackhint.sack_bytes_rexmit -
|
||||
tp->sackhint.sacked_bytes -
|
||||
tp->sackhint.lost_bytes, maxseg) +
|
||||
num_segs * maxseg;
|
||||
} else {
|
||||
if (tp->snd_nxt == tp->snd_max) {
|
||||
tp->snd_cwnd = (tp->sackhint.sack_bytes_rexmit +
|
||||
imax(0, tp->snd_nxt - tp->snd_recover) +
|
||||
num_segs * maxseg);
|
||||
(tp->snd_nxt - tp->snd_recover) + num_segs * maxseg);
|
||||
} else {
|
||||
/*
|
||||
* Since cwnd is not the expected flightsize during
|
||||
* SACK LR, not deflating cwnd allows the partial
|
||||
* ACKed amount to be sent.
|
||||
*/
|
||||
}
|
||||
if (tp->snd_cwnd > tp->snd_ssthresh)
|
||||
tp->snd_cwnd = tp->snd_ssthresh;
|
||||
|
|
|
|||
Loading…
Reference in a new issue