1 /* |
1 /* |
2 * TCP CUBIC: Binary Increase Congestion control for TCP v2.2 |
2 * TCP CUBIC: Binary Increase Congestion control for TCP v2.3 |
3 * Home page: |
3 * Home page: |
4 * http://netsrv.csc.ncsu.edu/twiki/bin/view/Main/BIC |
4 * http://netsrv.csc.ncsu.edu/twiki/bin/view/Main/BIC |
5 * This is from the implementation of CUBIC TCP in |
5 * This is from the implementation of CUBIC TCP in |
6 * Injong Rhee, Lisong Xu. |
6 * Sangtae Ha, Injong Rhee and Lisong Xu, |
7 * "CUBIC: A New TCP-Friendly High-Speed TCP Variant |
7 * "CUBIC: A New TCP-Friendly High-Speed TCP Variant" |
8 * in PFLDnet 2005 |
8 * in ACM SIGOPS Operating System Review, July 2008. |
9 * Available from: |
9 * Available from: |
10 * http://netsrv.csc.ncsu.edu/export/cubic-paper.pdf |
10 * http://netsrv.csc.ncsu.edu/export/cubic_a_new_tcp_2008.pdf |
|
11 * |
|
12 * CUBIC integrates a new slow start algorithm, called HyStart. |
|
13 * The details of HyStart are presented in |
|
14 * Sangtae Ha and Injong Rhee, |
|
15 * "Taming the Elephants: New TCP Slow Start", NCSU TechReport 2008. |
|
16 * Available from: |
|
17 * http://netsrv.csc.ncsu.edu/export/hystart_techreport_2008.pdf |
|
18 * |
|
19 * All testing results are available from: |
|
20 * http://netsrv.csc.ncsu.edu/wiki/index.php/TCP_Testing |
11 * |
21 * |
12 * Unless CUBIC is enabled and congestion window is large |
22 * Unless CUBIC is enabled and congestion window is large |
13 * this behaves the same as the original Reno. |
23 * this behaves the same as the original Reno. |
14 */ |
24 */ |
15 |
25 |
20 |
30 |
21 #define BICTCP_BETA_SCALE 1024 /* Scale factor beta calculation |
31 #define BICTCP_BETA_SCALE 1024 /* Scale factor beta calculation |
22 * max_cwnd = snd_cwnd * beta |
32 * max_cwnd = snd_cwnd * beta |
23 */ |
33 */ |
24 #define BICTCP_HZ 10 /* BIC HZ 2^10 = 1024 */ |
34 #define BICTCP_HZ 10 /* BIC HZ 2^10 = 1024 */ |
|
35 |
|
36 /* Two methods of hybrid slow start */ |
|
37 #define HYSTART_ACK_TRAIN 0x1 |
|
38 #define HYSTART_DELAY 0x2 |
|
39 |
|
40 /* Number of delay samples for detecting the increase of delay */ |
|
41 #define HYSTART_MIN_SAMPLES 8 |
|
42 #define HYSTART_DELAY_MIN (2U<<3) |
|
43 #define HYSTART_DELAY_MAX (16U<<3) |
|
44 #define HYSTART_DELAY_THRESH(x) clamp(x, HYSTART_DELAY_MIN, HYSTART_DELAY_MAX) |
25 |
45 |
26 static int fast_convergence __read_mostly = 1; |
46 static int fast_convergence __read_mostly = 1; |
27 static int beta __read_mostly = 717; /* = 717/1024 (BICTCP_BETA_SCALE) */ |
47 static int beta __read_mostly = 717; /* = 717/1024 (BICTCP_BETA_SCALE) */ |
28 static int initial_ssthresh __read_mostly; |
48 static int initial_ssthresh __read_mostly; |
29 static int bic_scale __read_mostly = 41; |
49 static int bic_scale __read_mostly = 41; |
30 static int tcp_friendliness __read_mostly = 1; |
50 static int tcp_friendliness __read_mostly = 1; |
|
51 |
|
52 static int hystart __read_mostly = 1; |
|
53 static int hystart_detect __read_mostly = HYSTART_ACK_TRAIN | HYSTART_DELAY; |
|
54 static int hystart_low_window __read_mostly = 16; |
31 |
55 |
32 static u32 cube_rtt_scale __read_mostly; |
56 static u32 cube_rtt_scale __read_mostly; |
33 static u32 beta_scale __read_mostly; |
57 static u32 beta_scale __read_mostly; |
34 static u64 cube_factor __read_mostly; |
58 static u64 cube_factor __read_mostly; |
35 |
59 |
42 MODULE_PARM_DESC(initial_ssthresh, "initial value of slow start threshold"); |
66 MODULE_PARM_DESC(initial_ssthresh, "initial value of slow start threshold"); |
43 module_param(bic_scale, int, 0444); |
67 module_param(bic_scale, int, 0444); |
44 MODULE_PARM_DESC(bic_scale, "scale (scaled by 1024) value for bic function (bic_scale/1024)"); |
68 MODULE_PARM_DESC(bic_scale, "scale (scaled by 1024) value for bic function (bic_scale/1024)"); |
45 module_param(tcp_friendliness, int, 0644); |
69 module_param(tcp_friendliness, int, 0644); |
46 MODULE_PARM_DESC(tcp_friendliness, "turn on/off tcp friendliness"); |
70 MODULE_PARM_DESC(tcp_friendliness, "turn on/off tcp friendliness"); |
|
71 module_param(hystart, int, 0644); |
|
72 MODULE_PARM_DESC(hystart, "turn on/off hybrid slow start algorithm"); |
|
73 module_param(hystart_detect, int, 0644); |
|
74 MODULE_PARM_DESC(hystart_detect, "hyrbrid slow start detection mechanisms" |
|
75 " 1: packet-train 2: delay 3: both packet-train and delay"); |
|
76 module_param(hystart_low_window, int, 0644); |
|
77 MODULE_PARM_DESC(hystart_low_window, "lower bound cwnd for hybrid slow start"); |
47 |
78 |
48 /* BIC TCP Parameters */ |
79 /* BIC TCP Parameters */ |
49 struct bictcp { |
80 struct bictcp { |
50 u32 cnt; /* increase cwnd by 1 after ACKs */ |
81 u32 cnt; /* increase cwnd by 1 after ACKs */ |
51 u32 last_max_cwnd; /* last maximum snd_cwnd */ |
82 u32 last_max_cwnd; /* last maximum snd_cwnd */ |
57 u32 delay_min; /* min delay */ |
88 u32 delay_min; /* min delay */ |
58 u32 epoch_start; /* beginning of an epoch */ |
89 u32 epoch_start; /* beginning of an epoch */ |
59 u32 ack_cnt; /* number of acks */ |
90 u32 ack_cnt; /* number of acks */ |
60 u32 tcp_cwnd; /* estimated tcp cwnd */ |
91 u32 tcp_cwnd; /* estimated tcp cwnd */ |
61 #define ACK_RATIO_SHIFT 4 |
92 #define ACK_RATIO_SHIFT 4 |
62 u32 delayed_ack; /* estimate the ratio of Packets/ACKs << 4 */ |
93 u16 delayed_ack; /* estimate the ratio of Packets/ACKs << 4 */ |
|
94 u8 sample_cnt; /* number of samples to decide curr_rtt */ |
|
95 u8 found; /* the exit point is found? */ |
|
96 u32 round_start; /* beginning of each round */ |
|
97 u32 end_seq; /* end_seq of the round */ |
|
98 u32 last_jiffies; /* last time when the ACK spacing is close */ |
|
99 u32 curr_rtt; /* the minimum rtt of current round */ |
63 }; |
100 }; |
64 |
101 |
65 static inline void bictcp_reset(struct bictcp *ca) |
102 static inline void bictcp_reset(struct bictcp *ca) |
66 { |
103 { |
67 ca->cnt = 0; |
104 ca->cnt = 0; |
74 ca->delay_min = 0; |
111 ca->delay_min = 0; |
75 ca->epoch_start = 0; |
112 ca->epoch_start = 0; |
76 ca->delayed_ack = 2 << ACK_RATIO_SHIFT; |
113 ca->delayed_ack = 2 << ACK_RATIO_SHIFT; |
77 ca->ack_cnt = 0; |
114 ca->ack_cnt = 0; |
78 ca->tcp_cwnd = 0; |
115 ca->tcp_cwnd = 0; |
|
116 ca->found = 0; |
|
117 } |
|
118 |
|
119 static inline void bictcp_hystart_reset(struct sock *sk) |
|
120 { |
|
121 struct tcp_sock *tp = tcp_sk(sk); |
|
122 struct bictcp *ca = inet_csk_ca(sk); |
|
123 |
|
124 ca->round_start = ca->last_jiffies = jiffies; |
|
125 ca->end_seq = tp->snd_nxt; |
|
126 ca->curr_rtt = 0; |
|
127 ca->sample_cnt = 0; |
79 } |
128 } |
80 |
129 |
81 static void bictcp_init(struct sock *sk) |
130 static void bictcp_init(struct sock *sk) |
82 { |
131 { |
83 bictcp_reset(inet_csk_ca(sk)); |
132 bictcp_reset(inet_csk_ca(sk)); |
84 if (initial_ssthresh) |
133 |
|
134 if (hystart) |
|
135 bictcp_hystart_reset(sk); |
|
136 |
|
137 if (!hystart && initial_ssthresh) |
85 tcp_sk(sk)->snd_ssthresh = initial_ssthresh; |
138 tcp_sk(sk)->snd_ssthresh = initial_ssthresh; |
86 } |
139 } |
87 |
140 |
88 /* calculate the cubic root of x using a table lookup followed by one |
141 /* calculate the cubic root of x using a table lookup followed by one |
89 * Newton-Raphson iteration. |
142 * Newton-Raphson iteration. |
233 struct bictcp *ca = inet_csk_ca(sk); |
286 struct bictcp *ca = inet_csk_ca(sk); |
234 |
287 |
235 if (!tcp_is_cwnd_limited(sk, in_flight)) |
288 if (!tcp_is_cwnd_limited(sk, in_flight)) |
236 return; |
289 return; |
237 |
290 |
238 if (tp->snd_cwnd <= tp->snd_ssthresh) |
291 if (tp->snd_cwnd <= tp->snd_ssthresh) { |
|
292 if (hystart && after(ack, ca->end_seq)) |
|
293 bictcp_hystart_reset(sk); |
239 tcp_slow_start(tp); |
294 tcp_slow_start(tp); |
240 else { |
295 } else { |
241 bictcp_update(ca, tp->snd_cwnd); |
296 bictcp_update(ca, tp->snd_cwnd); |
242 |
297 |
243 /* In dangerous area, increase slowly. |
298 /* In dangerous area, increase slowly. |
244 * In theory this is tp->snd_cwnd += 1 / tp->snd_cwnd |
299 * In theory this is tp->snd_cwnd += 1 / tp->snd_cwnd |
245 */ |
300 */ |
279 return max(tcp_sk(sk)->snd_cwnd, ca->last_max_cwnd); |
334 return max(tcp_sk(sk)->snd_cwnd, ca->last_max_cwnd); |
280 } |
335 } |
281 |
336 |
282 static void bictcp_state(struct sock *sk, u8 new_state) |
337 static void bictcp_state(struct sock *sk, u8 new_state) |
283 { |
338 { |
284 if (new_state == TCP_CA_Loss) |
339 if (new_state == TCP_CA_Loss) { |
285 bictcp_reset(inet_csk_ca(sk)); |
340 bictcp_reset(inet_csk_ca(sk)); |
|
341 bictcp_hystart_reset(sk); |
|
342 } |
|
343 } |
|
344 |
|
345 static void hystart_update(struct sock *sk, u32 delay) |
|
346 { |
|
347 struct tcp_sock *tp = tcp_sk(sk); |
|
348 struct bictcp *ca = inet_csk_ca(sk); |
|
349 |
|
350 if (!(ca->found & hystart_detect)) { |
|
351 u32 curr_jiffies = jiffies; |
|
352 |
|
353 /* first detection parameter - ack-train detection */ |
|
354 if (curr_jiffies - ca->last_jiffies <= msecs_to_jiffies(2)) { |
|
355 ca->last_jiffies = curr_jiffies; |
|
356 if (curr_jiffies - ca->round_start >= ca->delay_min>>4) |
|
357 ca->found |= HYSTART_ACK_TRAIN; |
|
358 } |
|
359 |
|
360 /* obtain the minimum delay of more than sampling packets */ |
|
361 if (ca->sample_cnt < HYSTART_MIN_SAMPLES) { |
|
362 if (ca->curr_rtt == 0 || ca->curr_rtt > delay) |
|
363 ca->curr_rtt = delay; |
|
364 |
|
365 ca->sample_cnt++; |
|
366 } else { |
|
367 if (ca->curr_rtt > ca->delay_min + |
|
368 HYSTART_DELAY_THRESH(ca->delay_min>>4)) |
|
369 ca->found |= HYSTART_DELAY; |
|
370 } |
|
371 /* |
|
372 * Either one of two conditions are met, |
|
373 * we exit from slow start immediately. |
|
374 */ |
|
375 if (ca->found & hystart_detect) |
|
376 tp->snd_ssthresh = tp->snd_cwnd; |
|
377 } |
286 } |
378 } |
287 |
379 |
288 /* Track delayed acknowledgment ratio using sliding window |
380 /* Track delayed acknowledgment ratio using sliding window |
289 * ratio = (15*ratio + sample) / 16 |
381 * ratio = (15*ratio + sample) / 16 |
290 */ |
382 */ |
291 static void bictcp_acked(struct sock *sk, u32 cnt, s32 rtt_us) |
383 static void bictcp_acked(struct sock *sk, u32 cnt, s32 rtt_us) |
292 { |
384 { |
293 const struct inet_connection_sock *icsk = inet_csk(sk); |
385 const struct inet_connection_sock *icsk = inet_csk(sk); |
|
386 const struct tcp_sock *tp = tcp_sk(sk); |
294 struct bictcp *ca = inet_csk_ca(sk); |
387 struct bictcp *ca = inet_csk_ca(sk); |
295 u32 delay; |
388 u32 delay; |
296 |
389 |
297 if (icsk->icsk_ca_state == TCP_CA_Open) { |
390 if (icsk->icsk_ca_state == TCP_CA_Open) { |
298 cnt -= ca->delayed_ack >> ACK_RATIO_SHIFT; |
391 cnt -= ca->delayed_ack >> ACK_RATIO_SHIFT; |
312 delay = 1; |
405 delay = 1; |
313 |
406 |
314 /* first time call or link delay decreases */ |
407 /* first time call or link delay decreases */ |
315 if (ca->delay_min == 0 || ca->delay_min > delay) |
408 if (ca->delay_min == 0 || ca->delay_min > delay) |
316 ca->delay_min = delay; |
409 ca->delay_min = delay; |
|
410 |
|
411 /* hystart triggers when cwnd is larger than some threshold */ |
|
412 if (hystart && tp->snd_cwnd <= tp->snd_ssthresh && |
|
413 tp->snd_cwnd >= hystart_low_window) |
|
414 hystart_update(sk, delay); |
317 } |
415 } |
318 |
416 |
319 static struct tcp_congestion_ops cubictcp = { |
417 static struct tcp_congestion_ops cubictcp = { |
320 .init = bictcp_init, |
418 .init = bictcp_init, |
321 .ssthresh = bictcp_recalc_ssthresh, |
419 .ssthresh = bictcp_recalc_ssthresh, |
370 module_exit(cubictcp_unregister); |
468 module_exit(cubictcp_unregister); |
371 |
469 |
372 MODULE_AUTHOR("Sangtae Ha, Stephen Hemminger"); |
470 MODULE_AUTHOR("Sangtae Ha, Stephen Hemminger"); |
373 MODULE_LICENSE("GPL"); |
471 MODULE_LICENSE("GPL"); |
374 MODULE_DESCRIPTION("CUBIC TCP"); |
472 MODULE_DESCRIPTION("CUBIC TCP"); |
375 MODULE_VERSION("2.2"); |
473 MODULE_VERSION("2.3"); |