mirror of
https://github.com/opnsense/src.git
synced 2026-06-13 10:40:19 -04:00
Implement TCP NewReno, as documented in RFC 2582. This allows
better recovery for multiple packet losses in a single window. The algorithm can be toggled via the sysctl net.inet.tcp.newreno, which defaults to "on". Submitted by: Jayanth Vijayaraghavan <jayanth@yahoo-inc.com>
This commit is contained in:
parent
e003b0836e
commit
46f5848237
6 changed files with 163 additions and 15 deletions
|
|
@ -118,6 +118,8 @@ struct tcphdr {
|
|||
|
||||
#define TCP_MAX_WINSHIFT 14 /* maximum window shift */
|
||||
|
||||
#define TCP_MAXBURST 4 /* maximum segments in a burst */
|
||||
|
||||
#define TCP_MAXHLEN (0xf<<2) /* max length of header in bytes */
|
||||
#define TCP_MAXOLEN (TCP_MAXHLEN - sizeof(struct tcphdr))
|
||||
/* max space left for options */
|
||||
|
|
|
|||
|
|
@ -148,6 +148,7 @@ static void tcp_pulloutofband __P((struct socket *,
|
|||
static int tcp_reass __P((struct tcpcb *, struct tcphdr *, int *,
|
||||
struct mbuf *));
|
||||
static void tcp_xmit_timer __P((struct tcpcb *, int));
|
||||
static int tcp_newreno __P((struct tcpcb *, struct tcphdr *));
|
||||
|
||||
/* Neighbor Discovery, Neighbor Unreachability Detection Upper layer hint. */
|
||||
#ifdef INET6
|
||||
|
|
@ -1104,6 +1105,7 @@ findpcb:
|
|||
tp->irs = th->th_seq;
|
||||
tcp_sendseqinit(tp);
|
||||
tcp_rcvseqinit(tp);
|
||||
tp->snd_recover = tp->snd_una;
|
||||
/*
|
||||
* Initialization of the tcpcb for transaction;
|
||||
* set SND.WND = SEG.WND,
|
||||
|
|
@ -1780,10 +1782,20 @@ trimthenstep6:
|
|||
u_int win =
|
||||
min(tp->snd_wnd, tp->snd_cwnd) / 2 /
|
||||
tp->t_maxseg;
|
||||
|
||||
if (tcp_do_newreno && SEQ_LT(th->th_ack,
|
||||
tp->snd_recover)) {
|
||||
/* False retransmit, should not
|
||||
* cut window
|
||||
*/
|
||||
tp->snd_cwnd += tp->t_maxseg;
|
||||
tp->t_dupacks = 0;
|
||||
(void) tcp_output(tp);
|
||||
goto drop;
|
||||
}
|
||||
if (win < 2)
|
||||
win = 2;
|
||||
tp->snd_ssthresh = win * tp->t_maxseg;
|
||||
tp->snd_recover = tp->snd_max;
|
||||
callout_stop(tp->tt_rexmt);
|
||||
tp->t_rtttime = 0;
|
||||
tp->snd_nxt = th->th_ack;
|
||||
|
|
@ -1807,10 +1819,26 @@ trimthenstep6:
|
|||
* If the congestion window was inflated to account
|
||||
* for the other side's cached packets, retract it.
|
||||
*/
|
||||
if (tp->t_dupacks >= tcprexmtthresh &&
|
||||
tp->snd_cwnd > tp->snd_ssthresh)
|
||||
tp->snd_cwnd = tp->snd_ssthresh;
|
||||
tp->t_dupacks = 0;
|
||||
if (tcp_do_newreno == 0) {
|
||||
if (tp->t_dupacks >= tcprexmtthresh &&
|
||||
tp->snd_cwnd > tp->snd_ssthresh)
|
||||
tp->snd_cwnd = tp->snd_ssthresh;
|
||||
tp->t_dupacks = 0;
|
||||
} else if (tp->t_dupacks >= tcprexmtthresh &&
|
||||
!tcp_newreno(tp, th)) {
|
||||
/*
|
||||
* Window inflation should have left us with approx.
|
||||
* snd_ssthresh outstanding data. But in case we
|
||||
* would be inclined to send a burst, better to do
|
||||
* it via the slow start mechanism.
|
||||
*/
|
||||
if (SEQ_GT(th->th_ack + tp->snd_ssthresh, tp->snd_max))
|
||||
tp->snd_cwnd =
|
||||
tp->snd_max - th->th_ack + tp->t_maxseg;
|
||||
else
|
||||
tp->snd_cwnd = tp->snd_ssthresh;
|
||||
tp->t_dupacks = 0;
|
||||
}
|
||||
if (SEQ_GT(th->th_ack, tp->snd_max)) {
|
||||
tcpstat.tcps_rcvacktoomuch++;
|
||||
goto dropafterack;
|
||||
|
|
@ -1903,7 +1931,8 @@ process_ACK:
|
|||
|
||||
if (cw > tp->snd_ssthresh)
|
||||
incr = incr * incr / cw;
|
||||
tp->snd_cwnd = min(cw + incr, TCP_MAXWIN << tp->snd_scale);
|
||||
if (tcp_do_newreno == 0 || SEQ_GEQ(th->th_ack, tp->snd_recover))
|
||||
tp->snd_cwnd = min(cw + incr,TCP_MAXWIN<<tp->snd_scale);
|
||||
}
|
||||
if (acked > so->so_snd.sb_cc) {
|
||||
tp->snd_wnd -= so->so_snd.sb_cc;
|
||||
|
|
@ -2791,3 +2820,41 @@ tcp_mssopt(tp)
|
|||
|
||||
return rt->rt_ifp->if_mtu - min_protoh;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Checks for partial ack. If partial ack arrives, force the retransmission
|
||||
* of the next unacknowledged segment, do not clear tp->t_dupacks, and return
|
||||
* 1. By setting snd_nxt to ti_ack, this forces retransmission timer to
|
||||
* be started again. If the ack advances at least to tp->snd_recover, return 0.
|
||||
*/
|
||||
static int
|
||||
tcp_newreno(tp, th)
|
||||
struct tcpcb *tp;
|
||||
struct tcphdr *th;
|
||||
{
|
||||
if (SEQ_LT(th->th_ack, tp->snd_recover)) {
|
||||
tcp_seq onxt = tp->snd_nxt;
|
||||
tcp_seq ouna = tp->snd_una; /* Haven't updated snd_una yet*/
|
||||
u_long ocwnd = tp->snd_cwnd;
|
||||
|
||||
callout_stop(tp->tt_rexmt);
|
||||
tp->t_rtttime = 0;
|
||||
tp->snd_nxt = th->th_ack;
|
||||
tp->snd_cwnd = tp->t_maxseg;
|
||||
tp->snd_una = th->th_ack;
|
||||
(void) tcp_output(tp);
|
||||
|
||||
tp->snd_cwnd = ocwnd;
|
||||
tp->snd_una = ouna;
|
||||
if (SEQ_GT(onxt, tp->snd_nxt))
|
||||
tp->snd_nxt = onxt;
|
||||
/*
|
||||
* Partial window deflation. Relies on fact that tp->snd_una
|
||||
* not updated yet.
|
||||
*/
|
||||
tp->snd_cwnd -= (th->th_ack - tp->snd_una - tp->t_maxseg);
|
||||
return (1);
|
||||
}
|
||||
return (0);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -98,6 +98,9 @@ int ss_fltsz_local = TCP_MAXWIN; /* something large */
|
|||
SYSCTL_INT(_net_inet_tcp, OID_AUTO, local_slowstart_flightsize, CTLFLAG_RW,
|
||||
&ss_fltsz_local, 1, "Slow start flight size for local networks");
|
||||
|
||||
int tcp_do_newreno = 1;
|
||||
SYSCTL_INT(_net_inet_tcp, OID_AUTO, newreno, CTLFLAG_RW, &tcp_do_newreno,
|
||||
0, "Enable NewReno Algorithms");
|
||||
/*
|
||||
* Tcp output routine: figure out what should be sent and send it.
|
||||
*/
|
||||
|
|
@ -118,6 +121,7 @@ tcp_output(tp)
|
|||
u_char opt[TCP_MAXOLEN];
|
||||
unsigned ipoptlen, optlen, hdrlen;
|
||||
int idle, sendalot;
|
||||
int maxburst = TCP_MAXBURST;
|
||||
struct rmxp_tao *taop;
|
||||
struct rmxp_tao tao_noncached;
|
||||
#ifdef INET6
|
||||
|
|
@ -778,12 +782,12 @@ send:
|
|||
*/
|
||||
if (!callout_active(tp->tt_rexmt) &&
|
||||
tp->snd_nxt != tp->snd_una) {
|
||||
callout_reset(tp->tt_rexmt, tp->t_rxtcur,
|
||||
tcp_timer_rexmt, tp);
|
||||
if (callout_active(tp->tt_persist)) {
|
||||
callout_stop(tp->tt_persist);
|
||||
tp->t_rxtshift = 0;
|
||||
}
|
||||
callout_reset(tp->tt_rexmt, tp->t_rxtcur,
|
||||
tcp_timer_rexmt, tp);
|
||||
}
|
||||
} else
|
||||
if (SEQ_GT(tp->snd_nxt + len, tp->snd_max))
|
||||
|
|
@ -889,7 +893,7 @@ out:
|
|||
tp->t_flags &= ~TF_ACKNOW;
|
||||
if (tcp_delack_enabled)
|
||||
callout_stop(tp->tt_delack);
|
||||
if (sendalot)
|
||||
if (sendalot && (!tcp_do_newreno || --maxburst))
|
||||
goto again;
|
||||
return (0);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -148,6 +148,7 @@ static void tcp_pulloutofband __P((struct socket *,
|
|||
static int tcp_reass __P((struct tcpcb *, struct tcphdr *, int *,
|
||||
struct mbuf *));
|
||||
static void tcp_xmit_timer __P((struct tcpcb *, int));
|
||||
static int tcp_newreno __P((struct tcpcb *, struct tcphdr *));
|
||||
|
||||
/* Neighbor Discovery, Neighbor Unreachability Detection Upper layer hint. */
|
||||
#ifdef INET6
|
||||
|
|
@ -1104,6 +1105,7 @@ findpcb:
|
|||
tp->irs = th->th_seq;
|
||||
tcp_sendseqinit(tp);
|
||||
tcp_rcvseqinit(tp);
|
||||
tp->snd_recover = tp->snd_una;
|
||||
/*
|
||||
* Initialization of the tcpcb for transaction;
|
||||
* set SND.WND = SEG.WND,
|
||||
|
|
@ -1780,10 +1782,20 @@ trimthenstep6:
|
|||
u_int win =
|
||||
min(tp->snd_wnd, tp->snd_cwnd) / 2 /
|
||||
tp->t_maxseg;
|
||||
|
||||
if (tcp_do_newreno && SEQ_LT(th->th_ack,
|
||||
tp->snd_recover)) {
|
||||
/* False retransmit, should not
|
||||
* cut window
|
||||
*/
|
||||
tp->snd_cwnd += tp->t_maxseg;
|
||||
tp->t_dupacks = 0;
|
||||
(void) tcp_output(tp);
|
||||
goto drop;
|
||||
}
|
||||
if (win < 2)
|
||||
win = 2;
|
||||
tp->snd_ssthresh = win * tp->t_maxseg;
|
||||
tp->snd_recover = tp->snd_max;
|
||||
callout_stop(tp->tt_rexmt);
|
||||
tp->t_rtttime = 0;
|
||||
tp->snd_nxt = th->th_ack;
|
||||
|
|
@ -1807,10 +1819,26 @@ trimthenstep6:
|
|||
* If the congestion window was inflated to account
|
||||
* for the other side's cached packets, retract it.
|
||||
*/
|
||||
if (tp->t_dupacks >= tcprexmtthresh &&
|
||||
tp->snd_cwnd > tp->snd_ssthresh)
|
||||
tp->snd_cwnd = tp->snd_ssthresh;
|
||||
tp->t_dupacks = 0;
|
||||
if (tcp_do_newreno == 0) {
|
||||
if (tp->t_dupacks >= tcprexmtthresh &&
|
||||
tp->snd_cwnd > tp->snd_ssthresh)
|
||||
tp->snd_cwnd = tp->snd_ssthresh;
|
||||
tp->t_dupacks = 0;
|
||||
} else if (tp->t_dupacks >= tcprexmtthresh &&
|
||||
!tcp_newreno(tp, th)) {
|
||||
/*
|
||||
* Window inflation should have left us with approx.
|
||||
* snd_ssthresh outstanding data. But in case we
|
||||
* would be inclined to send a burst, better to do
|
||||
* it via the slow start mechanism.
|
||||
*/
|
||||
if (SEQ_GT(th->th_ack + tp->snd_ssthresh, tp->snd_max))
|
||||
tp->snd_cwnd =
|
||||
tp->snd_max - th->th_ack + tp->t_maxseg;
|
||||
else
|
||||
tp->snd_cwnd = tp->snd_ssthresh;
|
||||
tp->t_dupacks = 0;
|
||||
}
|
||||
if (SEQ_GT(th->th_ack, tp->snd_max)) {
|
||||
tcpstat.tcps_rcvacktoomuch++;
|
||||
goto dropafterack;
|
||||
|
|
@ -1903,7 +1931,8 @@ process_ACK:
|
|||
|
||||
if (cw > tp->snd_ssthresh)
|
||||
incr = incr * incr / cw;
|
||||
tp->snd_cwnd = min(cw + incr, TCP_MAXWIN << tp->snd_scale);
|
||||
if (tcp_do_newreno == 0 || SEQ_GEQ(th->th_ack, tp->snd_recover))
|
||||
tp->snd_cwnd = min(cw + incr,TCP_MAXWIN<<tp->snd_scale);
|
||||
}
|
||||
if (acked > so->so_snd.sb_cc) {
|
||||
tp->snd_wnd -= so->so_snd.sb_cc;
|
||||
|
|
@ -2791,3 +2820,41 @@ tcp_mssopt(tp)
|
|||
|
||||
return rt->rt_ifp->if_mtu - min_protoh;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Checks for partial ack. If partial ack arrives, force the retransmission
|
||||
* of the next unacknowledged segment, do not clear tp->t_dupacks, and return
|
||||
* 1. By setting snd_nxt to ti_ack, this forces retransmission timer to
|
||||
* be started again. If the ack advances at least to tp->snd_recover, return 0.
|
||||
*/
|
||||
static int
|
||||
tcp_newreno(tp, th)
|
||||
struct tcpcb *tp;
|
||||
struct tcphdr *th;
|
||||
{
|
||||
if (SEQ_LT(th->th_ack, tp->snd_recover)) {
|
||||
tcp_seq onxt = tp->snd_nxt;
|
||||
tcp_seq ouna = tp->snd_una; /* Haven't updated snd_una yet*/
|
||||
u_long ocwnd = tp->snd_cwnd;
|
||||
|
||||
callout_stop(tp->tt_rexmt);
|
||||
tp->t_rtttime = 0;
|
||||
tp->snd_nxt = th->th_ack;
|
||||
tp->snd_cwnd = tp->t_maxseg;
|
||||
tp->snd_una = th->th_ack;
|
||||
(void) tcp_output(tp);
|
||||
|
||||
tp->snd_cwnd = ocwnd;
|
||||
tp->snd_una = ouna;
|
||||
if (SEQ_GT(onxt, tp->snd_nxt))
|
||||
tp->snd_nxt = onxt;
|
||||
/*
|
||||
* Partial window deflation. Relies on fact that tp->snd_una
|
||||
* not updated yet.
|
||||
*/
|
||||
tp->snd_cwnd -= (th->th_ack - tp->snd_una - tp->t_maxseg);
|
||||
return (1);
|
||||
}
|
||||
return (0);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -415,6 +415,11 @@ tcp_timer_rexmt(xtp)
|
|||
tp->t_srtt = 0;
|
||||
}
|
||||
tp->snd_nxt = tp->snd_una;
|
||||
/*
|
||||
* Note: We overload snd_recover to function also as the
|
||||
* snd_last variable described in RFC 2582
|
||||
*/
|
||||
tp->snd_recover = tp->snd_max;
|
||||
/*
|
||||
* Force a segment to be sent.
|
||||
*/
|
||||
|
|
|
|||
|
|
@ -119,6 +119,8 @@ struct tcpcb {
|
|||
* for slow start exponential to
|
||||
* linear switch
|
||||
*/
|
||||
tcp_seq snd_recover; /* for use in fast recovery */
|
||||
|
||||
u_int t_maxopd; /* mss plus options */
|
||||
|
||||
u_long t_rcvtime; /* inactivity time */
|
||||
|
|
@ -365,6 +367,7 @@ extern struct inpcbinfo tcbinfo;
|
|||
extern struct tcpstat tcpstat; /* tcp statistics */
|
||||
extern int tcp_mssdflt; /* XXX */
|
||||
extern int tcp_delack_enabled;
|
||||
extern int tcp_do_newreno;
|
||||
extern int ss_fltsz;
|
||||
extern int ss_fltsz_local;
|
||||
|
||||
|
|
|
|||
Loading…
Reference in a new issue