From 432649916b0435b608fb3e1fcb97347ac294d38d Mon Sep 17 00:00:00 2001 From: Gerrit Renker Date: Sat, 23 Aug 2008 13:28:27 +0200 Subject: dccp: Toggle debug output without module unloading This sets the sysfs permissions so that root can toggle the `debug' parameter available for nearly every DCCP module. This is useful since there are various module inter-dependencies. The debug flag can now be toggled at runtime using echo 1 > /sys/module/dccp/parameters/dccp_debug echo 1 > /sys/module/dccp_ccid2/parameters/ccid2_debug echo 1 > /sys/module/dccp_ccid3/parameters/ccid3_debug echo 1 > /sys/module/dccp_tfrc_lib/parameters/tfrc_debug The last is not very useful yet, since no code at the moment calls the tfrc_debug() macro. Signed-off-by: Gerrit Renker --- net/dccp/ccids/ccid2.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/dccp/ccids/ccid2.c') diff --git a/net/dccp/ccids/ccid2.c b/net/dccp/ccids/ccid2.c index 8e9580874216..9a430734530c 100644 --- a/net/dccp/ccids/ccid2.c +++ b/net/dccp/ccids/ccid2.c @@ -783,7 +783,7 @@ static struct ccid_operations ccid2 = { }; #ifdef CONFIG_IP_DCCP_CCID2_DEBUG -module_param(ccid2_debug, bool, 0444); +module_param(ccid2_debug, bool, 0644); MODULE_PARM_DESC(ccid2_debug, "Enable debug messages"); #endif -- cgit v1.2.3 From 86349c8d9c6892b57aff4549256ab1aa65aed0f0 Mon Sep 17 00:00:00 2001 From: Gerrit Renker Date: Thu, 4 Sep 2008 07:30:19 +0200 Subject: dccp: Registration routines for changing feature values Two registration routines, for SP and NN features, are provided by this patch, replacing a previous routine which was used for both feature types. These are internal-only routines and therefore start with `__feat_register'. It further exports the known limits of Sequence Window and Ack Ratio as symbolic constants. Signed-off-by: Gerrit Renker Acked-by: Ian McDonald --- net/dccp/ccids/ccid2.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'net/dccp/ccids/ccid2.c') diff --git a/net/dccp/ccids/ccid2.c b/net/dccp/ccids/ccid2.c index 9a430734530c..c9ea19a4d85e 100644 --- a/net/dccp/ccids/ccid2.c +++ b/net/dccp/ccids/ccid2.c @@ -25,7 +25,7 @@ /* * This implementation should follow RFC 4341 */ - +#include "../feat.h" #include "../ccid.h" #include "../dccp.h" #include "ccid2.h" @@ -147,8 +147,8 @@ static void ccid2_change_l_ack_ratio(struct sock *sk, u32 val) DCCP_WARN("Limiting Ack Ratio (%u) to %u\n", val, max_ratio); val = max_ratio; } - if (val > 0xFFFF) /* RFC 4340, 11.3 */ - val = 0xFFFF; + if (val > DCCPF_ACK_RATIO_MAX) + val = DCCPF_ACK_RATIO_MAX; if (val == dp->dccps_l_ack_ratio) return; -- cgit v1.2.3 From 1fb87509606cb19f5f603e54c28af7da149049f3 Mon Sep 17 00:00:00 2001 From: Gerrit Renker Date: Thu, 4 Sep 2008 07:30:19 +0200 Subject: dccp ccid-2: Remove ccid2hc{tx,rx}_ prefixes This patch fixes two problems caused by the ubiquitous long "hctx->ccid2htx_" and "hcrx->ccid2hcrx_" prefixes: * code becomes hard to read; * multiple-line statements are almost inevitable even for simple expressions; The prefixes are not really necessary (compare with "struct tcp_sock"). There had been previous discussion of this on dccp@vger, but so far this was not followed up (most people agreed that the prefixes are too long). Signed-off-by: Gerrit Renker Signed-off-by: Leandro Melo de Sales --- net/dccp/ccids/ccid2.c | 274 ++++++++++++++++++++++++------------------------- 1 file changed, 135 insertions(+), 139 deletions(-) (limited to 'net/dccp/ccids/ccid2.c') diff --git a/net/dccp/ccids/ccid2.c b/net/dccp/ccids/ccid2.c index c9ea19a4d85e..9728bbf0acea 100644 --- a/net/dccp/ccids/ccid2.c +++ b/net/dccp/ccids/ccid2.c @@ -39,16 +39,16 @@ static void ccid2_hc_tx_check_sanity(const struct ccid2_hc_tx_sock *hctx) { int len = 0; int pipe = 0; - struct ccid2_seq *seqp = hctx->ccid2hctx_seqh; + struct ccid2_seq *seqp = hctx->seqh; /* there is data in the chain */ - if (seqp != hctx->ccid2hctx_seqt) { + if (seqp != hctx->seqt) { seqp = seqp->ccid2s_prev; len++; if (!seqp->ccid2s_acked) pipe++; - while (seqp != hctx->ccid2hctx_seqt) { + while (seqp != hctx->seqt) { struct ccid2_seq *prev = seqp->ccid2s_prev; len++; @@ -65,16 +65,16 @@ static void ccid2_hc_tx_check_sanity(const struct ccid2_hc_tx_sock *hctx) } } - BUG_ON(pipe != hctx->ccid2hctx_pipe); + BUG_ON(pipe != hctx->pipe); ccid2_pr_debug("len of chain=%d\n", len); do { seqp = seqp->ccid2s_prev; len++; - } while (seqp != hctx->ccid2hctx_seqh); + } while (seqp != hctx->seqh); ccid2_pr_debug("total len=%d\n", len); - BUG_ON(len != hctx->ccid2hctx_seqbufc * CCID2_SEQBUF_LEN); + BUG_ON(len != hctx->seqbufc * CCID2_SEQBUF_LEN); } #else #define ccid2_pr_debug(format, a...) @@ -87,8 +87,7 @@ static int ccid2_hc_tx_alloc_seq(struct ccid2_hc_tx_sock *hctx) int i; /* check if we have space to preserve the pointer to the buffer */ - if (hctx->ccid2hctx_seqbufc >= (sizeof(hctx->ccid2hctx_seqbuf) / - sizeof(struct ccid2_seq*))) + if (hctx->seqbufc >= sizeof(hctx->seqbuf) / sizeof(struct ccid2_seq *)) return -ENOMEM; /* allocate buffer and initialize linked list */ @@ -104,20 +103,20 @@ static int ccid2_hc_tx_alloc_seq(struct ccid2_hc_tx_sock *hctx) seqp->ccid2s_prev = &seqp[CCID2_SEQBUF_LEN - 1]; /* This is the first allocation. Initiate the head and tail. */ - if (hctx->ccid2hctx_seqbufc == 0) - hctx->ccid2hctx_seqh = hctx->ccid2hctx_seqt = seqp; + if (hctx->seqbufc == 0) + hctx->seqh = hctx->seqt = seqp; else { /* link the existing list with the one we just created */ - hctx->ccid2hctx_seqh->ccid2s_next = seqp; - seqp->ccid2s_prev = hctx->ccid2hctx_seqh; + hctx->seqh->ccid2s_next = seqp; + seqp->ccid2s_prev = hctx->seqh; - hctx->ccid2hctx_seqt->ccid2s_prev = &seqp[CCID2_SEQBUF_LEN - 1]; - seqp[CCID2_SEQBUF_LEN - 1].ccid2s_next = hctx->ccid2hctx_seqt; + hctx->seqt->ccid2s_prev = &seqp[CCID2_SEQBUF_LEN - 1]; + seqp[CCID2_SEQBUF_LEN - 1].ccid2s_next = hctx->seqt; } /* store the original pointer to the buffer so we can free it */ - hctx->ccid2hctx_seqbuf[hctx->ccid2hctx_seqbufc] = seqp; - hctx->ccid2hctx_seqbufc++; + hctx->seqbuf[hctx->seqbufc] = seqp; + hctx->seqbufc++; return 0; } @@ -126,7 +125,7 @@ static int ccid2_hc_tx_send_packet(struct sock *sk, struct sk_buff *skb) { struct ccid2_hc_tx_sock *hctx = ccid2_hc_tx_sk(sk); - if (hctx->ccid2hctx_pipe < hctx->ccid2hctx_cwnd) + if (hctx->pipe < hctx->cwnd) return 0; return 1; /* XXX CCID should dequeue when ready instead of polling */ @@ -135,7 +134,7 @@ static int ccid2_hc_tx_send_packet(struct sock *sk, struct sk_buff *skb) static void ccid2_change_l_ack_ratio(struct sock *sk, u32 val) { struct dccp_sock *dp = dccp_sk(sk); - u32 max_ratio = DIV_ROUND_UP(ccid2_hc_tx_sk(sk)->ccid2hctx_cwnd, 2); + u32 max_ratio = DIV_ROUND_UP(ccid2_hc_tx_sk(sk)->cwnd, 2); /* * Ensure that Ack Ratio does not exceed ceil(cwnd/2), which is (2) from @@ -160,7 +159,7 @@ static void ccid2_change_l_ack_ratio(struct sock *sk, u32 val) static void ccid2_change_srtt(struct ccid2_hc_tx_sock *hctx, long val) { ccid2_pr_debug("change SRTT to %ld\n", val); - hctx->ccid2hctx_srtt = val; + hctx->srtt = val; } static void ccid2_start_rto_timer(struct sock *sk); @@ -173,8 +172,7 @@ static void ccid2_hc_tx_rto_expire(unsigned long data) bh_lock_sock(sk); if (sock_owned_by_user(sk)) { - sk_reset_timer(sk, &hctx->ccid2hctx_rtotimer, - jiffies + HZ / 5); + sk_reset_timer(sk, &hctx->rtotimer, jiffies + HZ / 5); goto out; } @@ -183,28 +181,28 @@ static void ccid2_hc_tx_rto_expire(unsigned long data) ccid2_hc_tx_check_sanity(hctx); /* back-off timer */ - hctx->ccid2hctx_rto <<= 1; + hctx->rto <<= 1; - s = hctx->ccid2hctx_rto / HZ; + s = hctx->rto / HZ; if (s > 60) - hctx->ccid2hctx_rto = 60 * HZ; + hctx->rto = 60 * HZ; ccid2_start_rto_timer(sk); /* adjust pipe, cwnd etc */ - hctx->ccid2hctx_ssthresh = hctx->ccid2hctx_cwnd / 2; - if (hctx->ccid2hctx_ssthresh < 2) - hctx->ccid2hctx_ssthresh = 2; - hctx->ccid2hctx_cwnd = 1; - hctx->ccid2hctx_pipe = 0; + hctx->ssthresh = hctx->cwnd / 2; + if (hctx->ssthresh < 2) + hctx->ssthresh = 2; + hctx->cwnd = 1; + hctx->pipe = 0; /* clear state about stuff we sent */ - hctx->ccid2hctx_seqt = hctx->ccid2hctx_seqh; - hctx->ccid2hctx_packets_acked = 0; + hctx->seqt = hctx->seqh; + hctx->packets_acked = 0; /* clear ack ratio state. */ - hctx->ccid2hctx_rpseq = 0; - hctx->ccid2hctx_rpdupack = -1; + hctx->rpseq = 0; + hctx->rpdupack = -1; ccid2_change_l_ack_ratio(sk, 1); ccid2_hc_tx_check_sanity(hctx); out: @@ -216,11 +214,11 @@ static void ccid2_start_rto_timer(struct sock *sk) { struct ccid2_hc_tx_sock *hctx = ccid2_hc_tx_sk(sk); - ccid2_pr_debug("setting RTO timeout=%ld\n", hctx->ccid2hctx_rto); + ccid2_pr_debug("setting RTO timeout=%ld\n", hctx->rto); - BUG_ON(timer_pending(&hctx->ccid2hctx_rtotimer)); - sk_reset_timer(sk, &hctx->ccid2hctx_rtotimer, - jiffies + hctx->ccid2hctx_rto); + BUG_ON(timer_pending(&hctx->rtotimer)); + sk_reset_timer(sk, &hctx->rtotimer, + jiffies + hctx->rto); } static void ccid2_hc_tx_packet_sent(struct sock *sk, int more, unsigned int len) @@ -229,27 +227,26 @@ static void ccid2_hc_tx_packet_sent(struct sock *sk, int more, unsigned int len) struct ccid2_hc_tx_sock *hctx = ccid2_hc_tx_sk(sk); struct ccid2_seq *next; - hctx->ccid2hctx_pipe++; + hctx->pipe++; - hctx->ccid2hctx_seqh->ccid2s_seq = dp->dccps_gss; - hctx->ccid2hctx_seqh->ccid2s_acked = 0; - hctx->ccid2hctx_seqh->ccid2s_sent = jiffies; + hctx->seqh->ccid2s_seq = dp->dccps_gss; + hctx->seqh->ccid2s_acked = 0; + hctx->seqh->ccid2s_sent = jiffies; - next = hctx->ccid2hctx_seqh->ccid2s_next; + next = hctx->seqh->ccid2s_next; /* check if we need to alloc more space */ - if (next == hctx->ccid2hctx_seqt) { + if (next == hctx->seqt) { if (ccid2_hc_tx_alloc_seq(hctx)) { DCCP_CRIT("packet history - out of memory!"); /* FIXME: find a more graceful way to bail out */ return; } - next = hctx->ccid2hctx_seqh->ccid2s_next; - BUG_ON(next == hctx->ccid2hctx_seqt); + next = hctx->seqh->ccid2s_next; + BUG_ON(next == hctx->seqt); } - hctx->ccid2hctx_seqh = next; + hctx->seqh = next; - ccid2_pr_debug("cwnd=%d pipe=%d\n", hctx->ccid2hctx_cwnd, - hctx->ccid2hctx_pipe); + ccid2_pr_debug("cwnd=%d pipe=%d\n", hctx->cwnd, hctx->pipe); /* * FIXME: The code below is broken and the variables have been removed @@ -272,12 +269,12 @@ static void ccid2_hc_tx_packet_sent(struct sock *sk, int more, unsigned int len) */ #if 0 /* Ack Ratio. Need to maintain a concept of how many windows we sent */ - hctx->ccid2hctx_arsent++; + hctx->arsent++; /* We had an ack loss in this window... */ - if (hctx->ccid2hctx_ackloss) { - if (hctx->ccid2hctx_arsent >= hctx->ccid2hctx_cwnd) { - hctx->ccid2hctx_arsent = 0; - hctx->ccid2hctx_ackloss = 0; + if (hctx->ackloss) { + if (hctx->arsent >= hctx->cwnd) { + hctx->arsent = 0; + hctx->ackloss = 0; } } else { /* No acks lost up to now... */ @@ -287,28 +284,28 @@ static void ccid2_hc_tx_packet_sent(struct sock *sk, int more, unsigned int len) int denom = dp->dccps_l_ack_ratio * dp->dccps_l_ack_ratio - dp->dccps_l_ack_ratio; - denom = hctx->ccid2hctx_cwnd * hctx->ccid2hctx_cwnd / denom; + denom = hctx->cwnd * hctx->cwnd / denom; - if (hctx->ccid2hctx_arsent >= denom) { + if (hctx->arsent >= denom) { ccid2_change_l_ack_ratio(sk, dp->dccps_l_ack_ratio - 1); - hctx->ccid2hctx_arsent = 0; + hctx->arsent = 0; } } else { /* we can't increase ack ratio further [1] */ - hctx->ccid2hctx_arsent = 0; /* or maybe set it to cwnd*/ + hctx->arsent = 0; /* or maybe set it to cwnd*/ } } #endif /* setup RTO timer */ - if (!timer_pending(&hctx->ccid2hctx_rtotimer)) + if (!timer_pending(&hctx->rtotimer)) ccid2_start_rto_timer(sk); #ifdef CONFIG_IP_DCCP_CCID2_DEBUG do { - struct ccid2_seq *seqp = hctx->ccid2hctx_seqt; + struct ccid2_seq *seqp = hctx->seqt; - while (seqp != hctx->ccid2hctx_seqh) { + while (seqp != hctx->seqh) { ccid2_pr_debug("out seq=%llu acked=%d time=%lu\n", (unsigned long long)seqp->ccid2s_seq, seqp->ccid2s_acked, seqp->ccid2s_sent); @@ -386,7 +383,7 @@ static void ccid2_hc_tx_kill_rto_timer(struct sock *sk) { struct ccid2_hc_tx_sock *hctx = ccid2_hc_tx_sk(sk); - sk_stop_timer(sk, &hctx->ccid2hctx_rtotimer); + sk_stop_timer(sk, &hctx->rtotimer); ccid2_pr_debug("deleted RTO timer\n"); } @@ -396,73 +393,73 @@ static inline void ccid2_new_ack(struct sock *sk, { struct ccid2_hc_tx_sock *hctx = ccid2_hc_tx_sk(sk); - if (hctx->ccid2hctx_cwnd < hctx->ccid2hctx_ssthresh) { - if (*maxincr > 0 && ++hctx->ccid2hctx_packets_acked == 2) { - hctx->ccid2hctx_cwnd += 1; - *maxincr -= 1; - hctx->ccid2hctx_packets_acked = 0; + if (hctx->cwnd < hctx->ssthresh) { + if (*maxincr > 0 && ++hctx->packets_acked == 2) { + hctx->cwnd += 1; + *maxincr -= 1; + hctx->packets_acked = 0; } - } else if (++hctx->ccid2hctx_packets_acked >= hctx->ccid2hctx_cwnd) { - hctx->ccid2hctx_cwnd += 1; - hctx->ccid2hctx_packets_acked = 0; + } else if (++hctx->packets_acked >= hctx->cwnd) { + hctx->cwnd += 1; + hctx->packets_acked = 0; } /* update RTO */ - if (hctx->ccid2hctx_srtt == -1 || - time_after(jiffies, hctx->ccid2hctx_lastrtt + hctx->ccid2hctx_srtt)) { + if (hctx->srtt == -1 || + time_after(jiffies, hctx->lastrtt + hctx->srtt)) { unsigned long r = (long)jiffies - (long)seqp->ccid2s_sent; int s; /* first measurement */ - if (hctx->ccid2hctx_srtt == -1) { + if (hctx->srtt == -1) { ccid2_pr_debug("R: %lu Time=%lu seq=%llu\n", r, jiffies, (unsigned long long)seqp->ccid2s_seq); ccid2_change_srtt(hctx, r); - hctx->ccid2hctx_rttvar = r >> 1; + hctx->rttvar = r >> 1; } else { /* RTTVAR */ - long tmp = hctx->ccid2hctx_srtt - r; + long tmp = hctx->srtt - r; long srtt; if (tmp < 0) tmp *= -1; tmp >>= 2; - hctx->ccid2hctx_rttvar *= 3; - hctx->ccid2hctx_rttvar >>= 2; - hctx->ccid2hctx_rttvar += tmp; + hctx->rttvar *= 3; + hctx->rttvar >>= 2; + hctx->rttvar += tmp; /* SRTT */ - srtt = hctx->ccid2hctx_srtt; + srtt = hctx->srtt; srtt *= 7; srtt >>= 3; tmp = r >> 3; srtt += tmp; ccid2_change_srtt(hctx, srtt); } - s = hctx->ccid2hctx_rttvar << 2; + s = hctx->rttvar << 2; /* clock granularity is 1 when based on jiffies */ if (!s) s = 1; - hctx->ccid2hctx_rto = hctx->ccid2hctx_srtt + s; + hctx->rto = hctx->srtt + s; /* must be at least a second */ - s = hctx->ccid2hctx_rto / HZ; + s = hctx->rto / HZ; /* DCCP doesn't require this [but I like it cuz my code sux] */ #if 1 if (s < 1) - hctx->ccid2hctx_rto = HZ; + hctx->rto = HZ; #endif /* max 60 seconds */ if (s > 60) - hctx->ccid2hctx_rto = HZ * 60; + hctx->rto = HZ * 60; - hctx->ccid2hctx_lastrtt = jiffies; + hctx->lastrtt = jiffies; ccid2_pr_debug("srtt: %ld rttvar: %ld rto: %ld (HZ=%d) R=%lu\n", - hctx->ccid2hctx_srtt, hctx->ccid2hctx_rttvar, - hctx->ccid2hctx_rto, HZ, r); + hctx->srtt, hctx->rttvar, + hctx->rto, HZ, r); } /* we got a new ack, so re-start RTO timer */ @@ -474,12 +471,12 @@ static void ccid2_hc_tx_dec_pipe(struct sock *sk) { struct ccid2_hc_tx_sock *hctx = ccid2_hc_tx_sk(sk); - if (hctx->ccid2hctx_pipe == 0) + if (hctx->pipe == 0) DCCP_BUG("pipe == 0"); else - hctx->ccid2hctx_pipe--; + hctx->pipe--; - if (hctx->ccid2hctx_pipe == 0) + if (hctx->pipe == 0) ccid2_hc_tx_kill_rto_timer(sk); } @@ -487,19 +484,19 @@ static void ccid2_congestion_event(struct sock *sk, struct ccid2_seq *seqp) { struct ccid2_hc_tx_sock *hctx = ccid2_hc_tx_sk(sk); - if (time_before(seqp->ccid2s_sent, hctx->ccid2hctx_last_cong)) { + if (time_before(seqp->ccid2s_sent, hctx->last_cong)) { ccid2_pr_debug("Multiple losses in an RTT---treating as one\n"); return; } - hctx->ccid2hctx_last_cong = jiffies; + hctx->last_cong = jiffies; - hctx->ccid2hctx_cwnd = hctx->ccid2hctx_cwnd / 2 ? : 1U; - hctx->ccid2hctx_ssthresh = max(hctx->ccid2hctx_cwnd, 2U); + hctx->cwnd = hctx->cwnd / 2 ? : 1U; + hctx->ssthresh = max(hctx->cwnd, 2U); /* Avoid spurious timeouts resulting from Ack Ratio > cwnd */ - if (dccp_sk(sk)->dccps_l_ack_ratio > hctx->ccid2hctx_cwnd) - ccid2_change_l_ack_ratio(sk, hctx->ccid2hctx_cwnd); + if (dccp_sk(sk)->dccps_l_ack_ratio > hctx->cwnd) + ccid2_change_l_ack_ratio(sk, hctx->cwnd); } static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) @@ -523,21 +520,21 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) * -sorbo. */ /* need to bootstrap */ - if (hctx->ccid2hctx_rpdupack == -1) { - hctx->ccid2hctx_rpdupack = 0; - hctx->ccid2hctx_rpseq = seqno; + if (hctx->rpdupack == -1) { + hctx->rpdupack = 0; + hctx->rpseq = seqno; } else { /* check if packet is consecutive */ - if (dccp_delta_seqno(hctx->ccid2hctx_rpseq, seqno) == 1) - hctx->ccid2hctx_rpseq = seqno; + if (dccp_delta_seqno(hctx->rpseq, seqno) == 1) + hctx->rpseq = seqno; /* it's a later packet */ - else if (after48(seqno, hctx->ccid2hctx_rpseq)) { - hctx->ccid2hctx_rpdupack++; + else if (after48(seqno, hctx->rpseq)) { + hctx->rpdupack++; /* check if we got enough dupacks */ - if (hctx->ccid2hctx_rpdupack >= NUMDUPACK) { - hctx->ccid2hctx_rpdupack = -1; /* XXX lame */ - hctx->ccid2hctx_rpseq = 0; + if (hctx->rpdupack >= NUMDUPACK) { + hctx->rpdupack = -1; /* XXX lame */ + hctx->rpseq = 0; ccid2_change_l_ack_ratio(sk, 2 * dp->dccps_l_ack_ratio); } @@ -546,7 +543,7 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) /* check forward path congestion */ /* still didn't send out new data packets */ - if (hctx->ccid2hctx_seqh == hctx->ccid2hctx_seqt) + if (hctx->seqh == hctx->seqt) return; switch (DCCP_SKB_CB(skb)->dccpd_type) { @@ -558,14 +555,14 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) } ackno = DCCP_SKB_CB(skb)->dccpd_ack_seq; - if (after48(ackno, hctx->ccid2hctx_high_ack)) - hctx->ccid2hctx_high_ack = ackno; + if (after48(ackno, hctx->high_ack)) + hctx->high_ack = ackno; - seqp = hctx->ccid2hctx_seqt; + seqp = hctx->seqt; while (before48(seqp->ccid2s_seq, ackno)) { seqp = seqp->ccid2s_next; - if (seqp == hctx->ccid2hctx_seqh) { - seqp = hctx->ccid2hctx_seqh->ccid2s_prev; + if (seqp == hctx->seqh) { + seqp = hctx->seqh->ccid2s_prev; break; } } @@ -575,7 +572,7 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) * packets per acknowledgement. Rounding up avoids that cwnd is not * advanced when Ack Ratio is 1 and gives a slight edge otherwise. */ - if (hctx->ccid2hctx_cwnd < hctx->ccid2hctx_ssthresh) + if (hctx->cwnd < hctx->ssthresh) maxincr = DIV_ROUND_UP(dp->dccps_l_ack_ratio, 2); /* go through all ack vectors */ @@ -594,7 +591,7 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) * seqnos. */ while (after48(seqp->ccid2s_seq, ackno)) { - if (seqp == hctx->ccid2hctx_seqt) { + if (seqp == hctx->seqt) { done = 1; break; } @@ -626,7 +623,7 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) (unsigned long long)seqp->ccid2s_seq); ccid2_hc_tx_dec_pipe(sk); } - if (seqp == hctx->ccid2hctx_seqt) { + if (seqp == hctx->seqt) { done = 1; break; } @@ -645,11 +642,11 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) /* The state about what is acked should be correct now * Check for NUMDUPACK */ - seqp = hctx->ccid2hctx_seqt; - while (before48(seqp->ccid2s_seq, hctx->ccid2hctx_high_ack)) { + seqp = hctx->seqt; + while (before48(seqp->ccid2s_seq, hctx->high_ack)) { seqp = seqp->ccid2s_next; - if (seqp == hctx->ccid2hctx_seqh) { - seqp = hctx->ccid2hctx_seqh->ccid2s_prev; + if (seqp == hctx->seqh) { + seqp = hctx->seqh->ccid2s_prev; break; } } @@ -660,7 +657,7 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) if (done == NUMDUPACK) break; } - if (seqp == hctx->ccid2hctx_seqt) + if (seqp == hctx->seqt) break; seqp = seqp->ccid2s_prev; } @@ -683,20 +680,20 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) ccid2_congestion_event(sk, seqp); ccid2_hc_tx_dec_pipe(sk); } - if (seqp == hctx->ccid2hctx_seqt) + if (seqp == hctx->seqt) break; seqp = seqp->ccid2s_prev; } - hctx->ccid2hctx_seqt = last_acked; + hctx->seqt = last_acked; } /* trim acked packets in tail */ - while (hctx->ccid2hctx_seqt != hctx->ccid2hctx_seqh) { - if (!hctx->ccid2hctx_seqt->ccid2s_acked) + while (hctx->seqt != hctx->seqh) { + if (!hctx->seqt->ccid2s_acked) break; - hctx->ccid2hctx_seqt = hctx->ccid2hctx_seqt->ccid2s_next; + hctx->seqt = hctx->seqt->ccid2s_next; } ccid2_hc_tx_check_sanity(hctx); @@ -709,17 +706,17 @@ static int ccid2_hc_tx_init(struct ccid *ccid, struct sock *sk) u32 max_ratio; /* RFC 4341, 5: initialise ssthresh to arbitrarily high (max) value */ - hctx->ccid2hctx_ssthresh = ~0U; + hctx->ssthresh = ~0U; /* * RFC 4341, 5: "The cwnd parameter is initialized to at most four * packets for new connections, following the rules from [RFC3390]". * We need to convert the bytes of RFC3390 into the packets of RFC 4341. */ - hctx->ccid2hctx_cwnd = clamp(4380U / dp->dccps_mss_cache, 2U, 4U); + hctx->cwnd = clamp(4380U / dp->dccps_mss_cache, 2U, 4U); /* Make sure that Ack Ratio is enabled and within bounds. */ - max_ratio = DIV_ROUND_UP(hctx->ccid2hctx_cwnd, 2); + max_ratio = DIV_ROUND_UP(hctx->cwnd, 2); if (dp->dccps_l_ack_ratio == 0 || dp->dccps_l_ack_ratio > max_ratio) dp->dccps_l_ack_ratio = max_ratio; @@ -727,13 +724,12 @@ static int ccid2_hc_tx_init(struct ccid *ccid, struct sock *sk) if (ccid2_hc_tx_alloc_seq(hctx)) return -ENOMEM; - hctx->ccid2hctx_rto = 3 * HZ; + hctx->rto = 3 * HZ; ccid2_change_srtt(hctx, -1); - hctx->ccid2hctx_rttvar = -1; - hctx->ccid2hctx_rpdupack = -1; - hctx->ccid2hctx_last_cong = jiffies; - setup_timer(&hctx->ccid2hctx_rtotimer, ccid2_hc_tx_rto_expire, - (unsigned long)sk); + hctx->rttvar = -1; + hctx->rpdupack = -1; + hctx->last_cong = jiffies; + setup_timer(&hctx->rtotimer, ccid2_hc_tx_rto_expire, (unsigned long)sk); ccid2_hc_tx_check_sanity(hctx); return 0; @@ -746,9 +742,9 @@ static void ccid2_hc_tx_exit(struct sock *sk) ccid2_hc_tx_kill_rto_timer(sk); - for (i = 0; i < hctx->ccid2hctx_seqbufc; i++) - kfree(hctx->ccid2hctx_seqbuf[i]); - hctx->ccid2hctx_seqbufc = 0; + for (i = 0; i < hctx->seqbufc; i++) + kfree(hctx->seqbuf[i]); + hctx->seqbufc = 0; } static void ccid2_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb) @@ -759,10 +755,10 @@ static void ccid2_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb) switch (DCCP_SKB_CB(skb)->dccpd_type) { case DCCP_PKT_DATA: case DCCP_PKT_DATAACK: - hcrx->ccid2hcrx_data++; - if (hcrx->ccid2hcrx_data >= dp->dccps_r_ack_ratio) { + hcrx->data++; + if (hcrx->data >= dp->dccps_r_ack_ratio) { dccp_send_ack(sk); - hcrx->ccid2hcrx_data = 0; + hcrx->data = 0; } break; } -- cgit v1.2.3 From c506d91d9ab7681e058afcd750e9118c6cdaabc1 Mon Sep 17 00:00:00 2001 From: Gerrit Renker Date: Thu, 4 Sep 2008 07:30:19 +0200 Subject: dccp: Unused argument in CCID tx function This removes the argument `more' from ccid_hc_tx_packet_sent, since it was nowhere used in the entire code. (Anecdotally, this argument was not even used in the original KAME code where the function originally came from; compare the variable moreToSend in the freebsd61-dccp-kame-28.08.2006.patch now maintained by Emmanuel Lochin.) Signed-off-by: Gerrit Renker --- net/dccp/ccids/ccid2.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/dccp/ccids/ccid2.c') diff --git a/net/dccp/ccids/ccid2.c b/net/dccp/ccids/ccid2.c index 9728bbf0acea..f56ab68a4b78 100644 --- a/net/dccp/ccids/ccid2.c +++ b/net/dccp/ccids/ccid2.c @@ -221,7 +221,7 @@ static void ccid2_start_rto_timer(struct sock *sk) jiffies + hctx->rto); } -static void ccid2_hc_tx_packet_sent(struct sock *sk, int more, unsigned int len) +static void ccid2_hc_tx_packet_sent(struct sock *sk, unsigned int len) { struct dccp_sock *dp = dccp_sk(sk); struct ccid2_hc_tx_sock *hctx = ccid2_hc_tx_sk(sk); -- cgit v1.2.3 From ff49e27089ec363b7fc3849504e0435d447ab18a Mon Sep 17 00:00:00 2001 From: Gerrit Renker Date: Thu, 4 Sep 2008 07:30:19 +0200 Subject: dccp ccid-2: Ack Vector interface clean-up This patch brings the Ack Vector interface up to date. Its main purpose is to lay the basis for the subsequent patches of this set, which will use the new data structure fields and routines. There are no real algorithmic changes, rather an adaptation: (1) Replaced the static Ack Vector size (2) with a #define so that it can be adapted (with low loss / Ack Ratio, a value of 1 works, so 2 seems to be sufficient for the moment) and added a solution so that computing the ECN nonce will continue to work - even with larger Ack Vectors. (2) Replaced the #defines for Ack Vector states with a complete enum. (3) Replaced #defines to compute Ack Vector length and state with general purpose routines (inlines), and updated code to use these. (4) Added a `tail' field (conversion to circular buffer in subsequent patch). (5) Updated the (outdated) documentation for Ack Vector struct. (6) All sequence number containers now trimmed to 48 bits. (7) Removal of unused bits: * removed dccpav_ack_nonce from struct dccp_ackvec, since this is already redundantly stored in the `dccpavr_ack_nonce' (of Ack Vector record); * removed Elapsed Time for Ack Vectors (it was nowhere used); * replaced semantics of dccpavr_sent_len with dccpavr_ack_runlen, since the code needs to be able to remember the old run length; * reduced the de-/allocation routines (redundant / duplicate tests). Justification for removing Elapsed Time information [can be removed]: --------------------------------------------------------------------- 1. The Elapsed Time information for Ack Vectors was nowhere used in the code. 2. DCCP does not implement rate-based pacing of acknowledgments. The only recommendation for always including Elapsed Time is in section 11.3 of RFC 4340: "Receivers that rate-pace acknowledgements SHOULD [...] include Elapsed Time options". But such is not the case here. 3. It does not really improve estimation accuracy. The Elapsed Time field only records the time between the arrival of the last acknowledgeable packet and the time the Ack Vector is sent out. Since Linux does not (yet) implement delayed Acks, the time difference will typically be small, since often the arrival of a data packet triggers sending feedback at the HC-receiver. Justification for changes in de-/allocation routines [can be removed]: ---------------------------------------------------------------------- * INIT_LIST_HEAD in dccp_ackvec_record_new was redundant, since the list pointers were later overwritten when the node was added via list_add(); * dccp_ackvec_record_new() was called in a single place only; * calls to list_del_init() before calling dccp_ackvec_record_delete() were redundant, since subsequently the entire element was k-freed; * since all calls to dccp_ackvec_record_delete() were preceded to a call to list_del_init(), the WARN_ON test would never evaluate to true; * since all calls to dccp_ackvec_record_delete() were made from within list_for_each_entry_safe(), the test for avr == NULL was redundant; * list_empty() in ackvec_free was redundant, since the same condition is embedded in the loop condition of the subsequent list_for_each_entry_safe(). Signed-off-by: Gerrit Renker --- net/dccp/ccids/ccid2.c | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) (limited to 'net/dccp/ccids/ccid2.c') diff --git a/net/dccp/ccids/ccid2.c b/net/dccp/ccids/ccid2.c index f56ab68a4b78..813d5cd40e8b 100644 --- a/net/dccp/ccids/ccid2.c +++ b/net/dccp/ccids/ccid2.c @@ -580,8 +580,7 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) &vector, &veclen)) != -1) { /* go through this ack vector */ while (veclen--) { - const u8 rl = *vector & DCCP_ACKVEC_LEN_MASK; - u64 ackno_end_rl = SUB48(ackno, rl); + u64 ackno_end_rl = SUB48(ackno, dccp_ackvec_runlen(vector)); ccid2_pr_debug("ackvec start:%llu end:%llu\n", (unsigned long long)ackno, @@ -604,17 +603,15 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) * run length */ while (between48(seqp->ccid2s_seq,ackno_end_rl,ackno)) { - const u8 state = *vector & - DCCP_ACKVEC_STATE_MASK; + const u8 state = dccp_ackvec_state(vector); /* new packet received or marked */ - if (state != DCCP_ACKVEC_STATE_NOT_RECEIVED && + if (state != DCCPAV_NOT_RECEIVED && !seqp->ccid2s_acked) { - if (state == - DCCP_ACKVEC_STATE_ECN_MARKED) { + if (state == DCCPAV_ECN_MARKED) ccid2_congestion_event(sk, seqp); - } else + else ccid2_new_ack(sk, seqp, &maxincr); -- cgit v1.2.3 From c8bf462bc567c3dcb083ff95cc13060dd06f138c Mon Sep 17 00:00:00 2001 From: Gerrit Renker Date: Thu, 4 Sep 2008 07:30:19 +0200 Subject: dccp ccid-2: Separate option parsing from CCID processing This patch replaces an almost identical replication of code: large parts of dccp_parse_options() re-appeared as ccid2_ackvector() in ccid2.c. Apart from the duplication, this caused two more problems: 1. CCIDs should not need to be concerned with parsing header options; 2. one can not assume that Ack Vectors appear as a contiguous area within an skb, it is legal to insert other options and/or padding in between. The current code would throw an error and stop reading in such a case. The patch provides a new data structure and associated list housekeeping. Only small changes were necessary to integrate with CCID-2: data structure initialisation, adapt list traversal routine, and add call to the provided cleanup routine. The latter also lead to fixing the following BUG: CCID-2 so far ignored Ack Vectors on all packets other than Ack/DataAck, which is incorrect, since Ack Vectors can be present on any packet that has an Ack field. Details: -------- * received Ack Vectors are parsed by dccp_parse_options() alone, which passes the result on to the CCID-specific routine ccid_hc_tx_parse_options(); * CCIDs interested in using/decoding Ack Vector information will add code to fetch parsed Ack Vectors via this interface; * a data structure, `struct dccp_ackvec_parsed' is provided as interface; * this structure arranges Ack Vectors of the same skb into a FIFO order; * a doubly-linked list is used to keep the required FIFO code small. Signed-off-by: Gerrit Renker --- net/dccp/ccids/ccid2.c | 135 +++++++++++++++---------------------------------- 1 file changed, 42 insertions(+), 93 deletions(-) (limited to 'net/dccp/ccids/ccid2.c') diff --git a/net/dccp/ccids/ccid2.c b/net/dccp/ccids/ccid2.c index 813d5cd40e8b..bbf16b35734d 100644 --- a/net/dccp/ccids/ccid2.c +++ b/net/dccp/ccids/ccid2.c @@ -317,68 +317,6 @@ static void ccid2_hc_tx_packet_sent(struct sock *sk, unsigned int len) #endif } -/* XXX Lame code duplication! - * returns -1 if none was found. - * else returns the next offset to use in the function call. - */ -static int ccid2_ackvector(struct sock *sk, struct sk_buff *skb, int offset, - unsigned char **vec, unsigned char *veclen) -{ - const struct dccp_hdr *dh = dccp_hdr(skb); - unsigned char *options = (unsigned char *)dh + dccp_hdr_len(skb); - unsigned char *opt_ptr; - const unsigned char *opt_end = (unsigned char *)dh + - (dh->dccph_doff * 4); - unsigned char opt, len; - unsigned char *value; - - BUG_ON(offset < 0); - options += offset; - opt_ptr = options; - if (opt_ptr >= opt_end) - return -1; - - while (opt_ptr != opt_end) { - opt = *opt_ptr++; - len = 0; - value = NULL; - - /* Check if this isn't a single byte option */ - if (opt > DCCPO_MAX_RESERVED) { - if (opt_ptr == opt_end) - goto out_invalid_option; - - len = *opt_ptr++; - if (len < 3) - goto out_invalid_option; - /* - * Remove the type and len fields, leaving - * just the value size - */ - len -= 2; - value = opt_ptr; - opt_ptr += len; - - if (opt_ptr > opt_end) - goto out_invalid_option; - } - - switch (opt) { - case DCCPO_ACK_VECTOR_0: - case DCCPO_ACK_VECTOR_1: - *vec = value; - *veclen = len; - return offset + (opt_ptr - options); - } - } - - return -1; - -out_invalid_option: - DCCP_BUG("Invalid option - this should not happen (previous parsing)!"); - return -1; -} - static void ccid2_hc_tx_kill_rto_timer(struct sock *sk) { struct ccid2_hc_tx_sock *hctx = ccid2_hc_tx_sk(sk); @@ -499,15 +437,27 @@ static void ccid2_congestion_event(struct sock *sk, struct ccid2_seq *seqp) ccid2_change_l_ack_ratio(sk, hctx->cwnd); } +static int ccid2_hc_tx_parse_options(struct sock *sk, u8 packet_type, + u8 option, u8 *optval, u8 optlen) +{ + struct ccid2_hc_tx_sock *hctx = ccid2_hc_tx_sk(sk); + + switch (option) { + case DCCPO_ACK_VECTOR_0: + case DCCPO_ACK_VECTOR_1: + return dccp_ackvec_parsed_add(&hctx->av_chunks, optval, optlen, + option - DCCPO_ACK_VECTOR_0); + } + return 0; +} + static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) { struct dccp_sock *dp = dccp_sk(sk); struct ccid2_hc_tx_sock *hctx = ccid2_hc_tx_sk(sk); + struct dccp_ackvec_parsed *avp; u64 ackno, seqno; struct ccid2_seq *seqp; - unsigned char *vector; - unsigned char veclen; - int offset = 0; int done = 0; unsigned int maxincr = 0; @@ -542,17 +492,12 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) } /* check forward path congestion */ - /* still didn't send out new data packets */ - if (hctx->seqh == hctx->seqt) + if (dccp_packet_without_ack(skb)) return; - switch (DCCP_SKB_CB(skb)->dccpd_type) { - case DCCP_PKT_ACK: - case DCCP_PKT_DATAACK: - break; - default: - return; - } + /* still didn't send out new data packets */ + if (hctx->seqh == hctx->seqt) + goto done; ackno = DCCP_SKB_CB(skb)->dccpd_ack_seq; if (after48(ackno, hctx->high_ack)) @@ -576,15 +521,16 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) maxincr = DIV_ROUND_UP(dp->dccps_l_ack_ratio, 2); /* go through all ack vectors */ - while ((offset = ccid2_ackvector(sk, skb, offset, - &vector, &veclen)) != -1) { + list_for_each_entry(avp, &hctx->av_chunks, node) { /* go through this ack vector */ - while (veclen--) { - u64 ackno_end_rl = SUB48(ackno, dccp_ackvec_runlen(vector)); + for (; avp->len--; avp->vec++) { + u64 ackno_end_rl = SUB48(ackno, + dccp_ackvec_runlen(avp->vec)); - ccid2_pr_debug("ackvec start:%llu end:%llu\n", + ccid2_pr_debug("ackvec %llu |%u,%u|\n", (unsigned long long)ackno, - (unsigned long long)ackno_end_rl); + dccp_ackvec_state(avp->vec) >> 6, + dccp_ackvec_runlen(avp->vec)); /* if the seqno we are analyzing is larger than the * current ackno, then move towards the tail of our * seqnos. @@ -603,7 +549,7 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) * run length */ while (between48(seqp->ccid2s_seq,ackno_end_rl,ackno)) { - const u8 state = dccp_ackvec_state(vector); + const u8 state = dccp_ackvec_state(avp->vec); /* new packet received or marked */ if (state != DCCPAV_NOT_RECEIVED && @@ -630,7 +576,6 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) break; ackno = SUB48(ackno_end_rl, 1); - vector++; } if (done) break; @@ -694,6 +639,8 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) } ccid2_hc_tx_check_sanity(hctx); +done: + dccp_ackvec_parsed_cleanup(&hctx->av_chunks); } static int ccid2_hc_tx_init(struct ccid *ccid, struct sock *sk) @@ -727,6 +674,7 @@ static int ccid2_hc_tx_init(struct ccid *ccid, struct sock *sk) hctx->rpdupack = -1; hctx->last_cong = jiffies; setup_timer(&hctx->rtotimer, ccid2_hc_tx_rto_expire, (unsigned long)sk); + INIT_LIST_HEAD(&hctx->av_chunks); ccid2_hc_tx_check_sanity(hctx); return 0; @@ -762,17 +710,18 @@ static void ccid2_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb) } static struct ccid_operations ccid2 = { - .ccid_id = DCCPC_CCID2, - .ccid_name = "TCP-like", - .ccid_owner = THIS_MODULE, - .ccid_hc_tx_obj_size = sizeof(struct ccid2_hc_tx_sock), - .ccid_hc_tx_init = ccid2_hc_tx_init, - .ccid_hc_tx_exit = ccid2_hc_tx_exit, - .ccid_hc_tx_send_packet = ccid2_hc_tx_send_packet, - .ccid_hc_tx_packet_sent = ccid2_hc_tx_packet_sent, - .ccid_hc_tx_packet_recv = ccid2_hc_tx_packet_recv, - .ccid_hc_rx_obj_size = sizeof(struct ccid2_hc_rx_sock), - .ccid_hc_rx_packet_recv = ccid2_hc_rx_packet_recv, + .ccid_id = DCCPC_CCID2, + .ccid_name = "TCP-like", + .ccid_owner = THIS_MODULE, + .ccid_hc_tx_obj_size = sizeof(struct ccid2_hc_tx_sock), + .ccid_hc_tx_init = ccid2_hc_tx_init, + .ccid_hc_tx_exit = ccid2_hc_tx_exit, + .ccid_hc_tx_send_packet = ccid2_hc_tx_send_packet, + .ccid_hc_tx_packet_sent = ccid2_hc_tx_packet_sent, + .ccid_hc_tx_parse_options = ccid2_hc_tx_parse_options, + .ccid_hc_tx_packet_recv = ccid2_hc_tx_packet_recv, + .ccid_hc_rx_obj_size = sizeof(struct ccid2_hc_rx_sock), + .ccid_hc_rx_packet_recv = ccid2_hc_rx_packet_recv, }; #ifdef CONFIG_IP_DCCP_CCID2_DEBUG -- cgit v1.2.3 From 83337dae6ca94d801b6700600244865cd694205b Mon Sep 17 00:00:00 2001 From: Gerrit Renker Date: Thu, 4 Sep 2008 07:30:19 +0200 Subject: dccp ccid-2: Stop polling This updates CCID2 to use the CCID dequeuing mechanism, converting from previous constant-polling to a now event-driven mechanism. Signed-off-by: Gerrit Renker --- net/dccp/ccids/ccid2.c | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) (limited to 'net/dccp/ccids/ccid2.c') diff --git a/net/dccp/ccids/ccid2.c b/net/dccp/ccids/ccid2.c index bbf16b35734d..c7d83e3c1648 100644 --- a/net/dccp/ccids/ccid2.c +++ b/net/dccp/ccids/ccid2.c @@ -123,12 +123,9 @@ static int ccid2_hc_tx_alloc_seq(struct ccid2_hc_tx_sock *hctx) static int ccid2_hc_tx_send_packet(struct sock *sk, struct sk_buff *skb) { - struct ccid2_hc_tx_sock *hctx = ccid2_hc_tx_sk(sk); - - if (hctx->pipe < hctx->cwnd) - return 0; - - return 1; /* XXX CCID should dequeue when ready instead of polling */ + if (ccid2_cwnd_network_limited(ccid2_hc_tx_sk(sk))) + return CCID_PACKET_WILL_DEQUEUE_LATER; + return CCID_PACKET_SEND_AT_ONCE; } static void ccid2_change_l_ack_ratio(struct sock *sk, u32 val) @@ -168,6 +165,7 @@ static void ccid2_hc_tx_rto_expire(unsigned long data) { struct sock *sk = (struct sock *)data; struct ccid2_hc_tx_sock *hctx = ccid2_hc_tx_sk(sk); + const bool sender_was_blocked = ccid2_cwnd_network_limited(hctx); long s; bh_lock_sock(sk); @@ -187,8 +185,6 @@ static void ccid2_hc_tx_rto_expire(unsigned long data) if (s > 60) hctx->rto = 60 * HZ; - ccid2_start_rto_timer(sk); - /* adjust pipe, cwnd etc */ hctx->ssthresh = hctx->cwnd / 2; if (hctx->ssthresh < 2) @@ -205,6 +201,11 @@ static void ccid2_hc_tx_rto_expire(unsigned long data) hctx->rpdupack = -1; ccid2_change_l_ack_ratio(sk, 1); ccid2_hc_tx_check_sanity(hctx); + + /* if we were blocked before, we may now send cwnd=1 packet */ + if (sender_was_blocked) + tasklet_schedule(&dccp_sk(sk)->dccps_xmitlet); + ccid2_start_rto_timer(sk); out: bh_unlock_sock(sk); sock_put(sk); @@ -455,6 +456,7 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) { struct dccp_sock *dp = dccp_sk(sk); struct ccid2_hc_tx_sock *hctx = ccid2_hc_tx_sk(sk); + const bool sender_was_blocked = ccid2_cwnd_network_limited(hctx); struct dccp_ackvec_parsed *avp; u64 ackno, seqno; struct ccid2_seq *seqp; @@ -640,6 +642,9 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) ccid2_hc_tx_check_sanity(hctx); done: + /* check if incoming Acks allow pending packets to be sent */ + if (sender_was_blocked && !ccid2_cwnd_network_limited(hctx)) + tasklet_schedule(&dccp_sk(sk)->dccps_xmitlet); dccp_ackvec_parsed_cleanup(&hctx->av_chunks); } -- cgit v1.2.3 From c6f0f2e71f3088a0f05502d6adb0f667b84028c3 Mon Sep 17 00:00:00 2001 From: Gerrit Renker Date: Thu, 4 Sep 2008 07:30:19 +0200 Subject: dccp ccid-2: Remove redundant sanity tests This removes the ccid2_hc_tx_check_sanity function: it is redundant. Details: ======== The tx_check_sanity function performs three tests: 1) it checks that the circular TX list is sorted - in ascending order of sequence number (ccid2s_seq) - and time (ccid2s_sent), - in the direction from `tail' (hctx_seqt) to `head' (hctx_seqh); 2) it ensures that the entire list has the length seqbufc * CCID2_SEQBUF_LEN; 3) it ensures that pipe equals the number of packets that were not marked `acked' (ccid2s_acked) between `tail' and `head'. The following argues that each of these tests is redundant, this can be verified by going through the code. (1) is not necessary, since both time and GSS increase from one packet to the next, so that subsequent insertions in tx_packet_sent (which advance the `head' pointer) will be in ascending order of time and sequence number. In (2), the length of the list is always equal to seqbufc times CCID2_SEQBUF_LEN (set to 1024) unless allocation caused an earlier failure, because: * at initialisation (tx_init), there is one chunk of size 1024 and seqbufc=1; * subsequent calls to tx_alloc_seq take place whenever head->next == tail in tx_packet_sent; then a new chunk of size 1024 is inserted between head and tail, and seqbufc is incremented by one. To show that (3) is redundant requires looking at two cases. The `pipe' variable of the TX socket is incremented only in tx_packet_sent, and decremented in tx_packet_recv. When head == tail (TX history empty) then pipe should be 0, which is the case directly after initialisation and after a retransmission timeout has occurred (ccid2_hc_tx_rto_expire). The first case involves parsing Ack Vectors for packets recorded in the live portion of the buffer, between tail and head. For each packet marked by the receiver as received (state 0) or ECN-marked (state 1), pipe is decremented by one, so for all such packets the BUG_ON in tx_check_sanity will not trigger. The second case is the loss detection in the second half of tx_packet_recv, below the comment "Check for NUMDUPACK". The first while-loop here ensures that the sequence number of `seqp' is either above or equal to `high_ack', or otherwise equal to the highest sequence number sent so far (of the entry head->prev, as head points to the next unsent entry). The next while-loop ("while (1)") counts the number of acked packets starting from that position of seqp, going backwards in the direction from head->prev to tail. If NUMDUPACK=3 such packets were counted within this loop, `seqp' points to the last acknowledged packet of these, and the "if (done == NUMDUPACK)" block is entered next. The while-loop contained within that block in turn traverses the list backwards, from head to tail; the position of `seqp' is saved in the variable `last_acked'. For each packet not marked as `acked', a congestion event is triggered within the loop, and pipe is decremented. The loop terminates when `seqp' has reached `tail', whereupon tail is set to the position previously stored in `last_acked'. Thus, between `last_acked' and the previous position of `tail', - pipe has been decremented earlier if the packet was marked as state 0 or 1; - pipe was decremented if the packet was not marked as acked. That is, pipe has been decremented by the number of packets between `last_acked' and the previous position of `tail'. As a consequence, pipe now again reflects the number of packets which have not (yet) been acked between the new position of tail (at `last_acked') and head->prev, or 0 if head==tail. The result is that the BUG_ON condition in check_sanity will also not be triggered, hence the test (3) is also redundant. Signed-off-by: Gerrit Renker --- net/dccp/ccids/ccid2.c | 51 -------------------------------------------------- 1 file changed, 51 deletions(-) (limited to 'net/dccp/ccids/ccid2.c') diff --git a/net/dccp/ccids/ccid2.c b/net/dccp/ccids/ccid2.c index c7d83e3c1648..3b2548bd73f3 100644 --- a/net/dccp/ccids/ccid2.c +++ b/net/dccp/ccids/ccid2.c @@ -34,51 +34,8 @@ #ifdef CONFIG_IP_DCCP_CCID2_DEBUG static int ccid2_debug; #define ccid2_pr_debug(format, a...) DCCP_PR_DEBUG(ccid2_debug, format, ##a) - -static void ccid2_hc_tx_check_sanity(const struct ccid2_hc_tx_sock *hctx) -{ - int len = 0; - int pipe = 0; - struct ccid2_seq *seqp = hctx->seqh; - - /* there is data in the chain */ - if (seqp != hctx->seqt) { - seqp = seqp->ccid2s_prev; - len++; - if (!seqp->ccid2s_acked) - pipe++; - - while (seqp != hctx->seqt) { - struct ccid2_seq *prev = seqp->ccid2s_prev; - - len++; - if (!prev->ccid2s_acked) - pipe++; - - /* packets are sent sequentially */ - BUG_ON(dccp_delta_seqno(seqp->ccid2s_seq, - prev->ccid2s_seq ) >= 0); - BUG_ON(time_before(seqp->ccid2s_sent, - prev->ccid2s_sent)); - - seqp = prev; - } - } - - BUG_ON(pipe != hctx->pipe); - ccid2_pr_debug("len of chain=%d\n", len); - - do { - seqp = seqp->ccid2s_prev; - len++; - } while (seqp != hctx->seqh); - - ccid2_pr_debug("total len=%d\n", len); - BUG_ON(len != hctx->seqbufc * CCID2_SEQBUF_LEN); -} #else #define ccid2_pr_debug(format, a...) -#define ccid2_hc_tx_check_sanity(hctx) #endif static int ccid2_hc_tx_alloc_seq(struct ccid2_hc_tx_sock *hctx) @@ -176,8 +133,6 @@ static void ccid2_hc_tx_rto_expire(unsigned long data) ccid2_pr_debug("RTO_EXPIRE\n"); - ccid2_hc_tx_check_sanity(hctx); - /* back-off timer */ hctx->rto <<= 1; @@ -200,7 +155,6 @@ static void ccid2_hc_tx_rto_expire(unsigned long data) hctx->rpseq = 0; hctx->rpdupack = -1; ccid2_change_l_ack_ratio(sk, 1); - ccid2_hc_tx_check_sanity(hctx); /* if we were blocked before, we may now send cwnd=1 packet */ if (sender_was_blocked) @@ -314,7 +268,6 @@ static void ccid2_hc_tx_packet_sent(struct sock *sk, unsigned int len) } } while (0); ccid2_pr_debug("=========\n"); - ccid2_hc_tx_check_sanity(hctx); #endif } @@ -463,7 +416,6 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) int done = 0; unsigned int maxincr = 0; - ccid2_hc_tx_check_sanity(hctx); /* check reverse path congestion */ seqno = DCCP_SKB_CB(skb)->dccpd_seq; @@ -640,7 +592,6 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) hctx->seqt = hctx->seqt->ccid2s_next; } - ccid2_hc_tx_check_sanity(hctx); done: /* check if incoming Acks allow pending packets to be sent */ if (sender_was_blocked && !ccid2_cwnd_network_limited(hctx)) @@ -680,8 +631,6 @@ static int ccid2_hc_tx_init(struct ccid *ccid, struct sock *sk) hctx->last_cong = jiffies; setup_timer(&hctx->rtotimer, ccid2_hc_tx_rto_expire, (unsigned long)sk); INIT_LIST_HEAD(&hctx->av_chunks); - - ccid2_hc_tx_check_sanity(hctx); return 0; } -- cgit v1.2.3 From e9803c0104564698d3b8e84ccdb0b8b0e65427e2 Mon Sep 17 00:00:00 2001 From: Gerrit Renker Date: Thu, 4 Sep 2008 07:30:19 +0200 Subject: dccp ccid-2: Simplify dec_pipe and rearming of RTO timer This removes the dec_pipe function and improves the way the RTO timer is rearmed when a new acknowledgment comes in. Details and justification for removal: -------------------------------------- 1) The BUG_ON in dec_pipe is never triggered: pipe is only decremented for TX history entries between tail and head, for which it had previously been incremented in tx_packet_sent; and it is not decremented twice for the same entry, since it is - either decremented when a corresponding Ack Vector cell in state 0 or 1 was received (and then ccid2s_acked==1), - or it is decremented when ccid2s_acked==0, as part of the loss detection in tx_packet_recv (and hence it can not have been decremented earlier). 2) Restarting the RTO timer happens for every single entry in each Ack Vector parsed by tx_packet_recv (according to RFC 4340, 11.4 this can happen up to 16192 times per Ack Vector). 3) The RTO timer should not be restarted when all outstanding data has been acknowledged. This is currently done similar to (2), in dec_pipe, when pipe has reached 0. The patch onsolidates the code which rearms the RTO timer, combining the segments from new_ack and dec_pipe. As a result, the code becomes clearer (compare with tcp_rearm_rto()). Signed-off-by: Gerrit Renker --- net/dccp/ccids/ccid2.c | 27 ++++++++------------------- 1 file changed, 8 insertions(+), 19 deletions(-) (limited to 'net/dccp/ccids/ccid2.c') diff --git a/net/dccp/ccids/ccid2.c b/net/dccp/ccids/ccid2.c index 3b2548bd73f3..fa074d442065 100644 --- a/net/dccp/ccids/ccid2.c +++ b/net/dccp/ccids/ccid2.c @@ -353,23 +353,6 @@ static inline void ccid2_new_ack(struct sock *sk, hctx->srtt, hctx->rttvar, hctx->rto, HZ, r); } - - /* we got a new ack, so re-start RTO timer */ - ccid2_hc_tx_kill_rto_timer(sk); - ccid2_start_rto_timer(sk); -} - -static void ccid2_hc_tx_dec_pipe(struct sock *sk) -{ - struct ccid2_hc_tx_sock *hctx = ccid2_hc_tx_sk(sk); - - if (hctx->pipe == 0) - DCCP_BUG("pipe == 0"); - else - hctx->pipe--; - - if (hctx->pipe == 0) - ccid2_hc_tx_kill_rto_timer(sk); } static void ccid2_congestion_event(struct sock *sk, struct ccid2_seq *seqp) @@ -518,7 +501,7 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) seqp->ccid2s_acked = 1; ccid2_pr_debug("Got ack for %llu\n", (unsigned long long)seqp->ccid2s_seq); - ccid2_hc_tx_dec_pipe(sk); + hctx->pipe--; } if (seqp == hctx->seqt) { done = 1; @@ -574,7 +557,7 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) * one ack vector. */ ccid2_congestion_event(sk, seqp); - ccid2_hc_tx_dec_pipe(sk); + hctx->pipe--; } if (seqp == hctx->seqt) break; @@ -592,6 +575,12 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) hctx->seqt = hctx->seqt->ccid2s_next; } + /* restart RTO timer if not all outstanding data has been acked */ + if (hctx->pipe == 0) + sk_stop_timer(sk, &hctx->rtotimer); + else + sk_reset_timer(sk, &hctx->rtotimer, + jiffies + hctx->rto); done: /* check if incoming Acks allow pending packets to be sent */ if (sender_was_blocked && !ccid2_cwnd_network_limited(hctx)) -- cgit v1.2.3 From 1435562d7e0412e4885b661843f69859013f9d25 Mon Sep 17 00:00:00 2001 From: Gerrit Renker Date: Thu, 4 Sep 2008 07:30:19 +0200 Subject: dccp ccid-2: Replace broken RTT estimator with better algorithm The current CCID-2 RTT estimator code is in parts broken and lags behind the suggestions in RFC2988 of using scaled variants for SRTT/RTTVAR. That code is replaced by the present patch, which reuses the Linux TCP RTT estimator code - reasons for this code duplication are given below. Further details: ---------------- 1. The minimum RTO of previously one second has been replaced with TCP's, since RFC4341, sec. 5 says that the minimum of 1 sec. (suggested in RFC2988, 2.4) is not necessary. Instead, the TCP_RTO_MIN is used, which agrees with DCCP's concept of a default RTT (RFC 4340, 3.4). 2. The maximum RTO has been set to DCCP_RTO_MAX (64 sec), which agrees with RFC2988, (2.5). 3. De-inlined the function ccid2_new_ack(). 4. Added a FIXME: the RTT is sampled several times per Ack Vector, which will give the wrong estimate. It should be replaced with one sample per Ack. However, at the moment this can not be resolved easily, since - it depends on TX history code (which also needs some work), - the cleanest solution is not to use the `sent' time at all (saves 4 bytes per entry) and use DCCP timestamps / elapsed time to estimated the RTT, which however is non-trivial to get right (but needs to be done). Reasons for reusing the Linux TCP estimator algorithm: ------------------------------------------------------ Some time was spent to find a better alternative, using basic RFC2988 as a first step. Further analysis and experimentation showed that the Linux TCP RTO estimator is superior to a basic RFC2988 implementation. A summary is on http://www.erg.abdn.ac.uk/users/gerrit/dccp/notes/ccid2/rto_estimator/ In addition, this estimator fared well in a recent empirical evaluation: Rewaskar, Sushant, Jasleen Kaur and F. Donelson Smith. A Performance Study of Loss Detection/Recovery in Real-world TCP Implementations. Proceedings of 15th IEEE International Conference on Network Protocols (ICNP-07). 2007. Thus there is significant benefit in reusing the existing TCP code. Signed-off-by: Gerrit Renker --- net/dccp/ccids/ccid2.c | 171 +++++++++++++++++++++++++++---------------------- 1 file changed, 94 insertions(+), 77 deletions(-) (limited to 'net/dccp/ccids/ccid2.c') diff --git a/net/dccp/ccids/ccid2.c b/net/dccp/ccids/ccid2.c index fa074d442065..22753fd98698 100644 --- a/net/dccp/ccids/ccid2.c +++ b/net/dccp/ccids/ccid2.c @@ -110,12 +110,6 @@ static void ccid2_change_l_ack_ratio(struct sock *sk, u32 val) dp->dccps_l_ack_ratio = val; } -static void ccid2_change_srtt(struct ccid2_hc_tx_sock *hctx, long val) -{ - ccid2_pr_debug("change SRTT to %ld\n", val); - hctx->srtt = val; -} - static void ccid2_start_rto_timer(struct sock *sk); static void ccid2_hc_tx_rto_expire(unsigned long data) @@ -123,7 +117,6 @@ static void ccid2_hc_tx_rto_expire(unsigned long data) struct sock *sk = (struct sock *)data; struct ccid2_hc_tx_sock *hctx = ccid2_hc_tx_sk(sk); const bool sender_was_blocked = ccid2_cwnd_network_limited(hctx); - long s; bh_lock_sock(sk); if (sock_owned_by_user(sk)) { @@ -135,10 +128,8 @@ static void ccid2_hc_tx_rto_expire(unsigned long data) /* back-off timer */ hctx->rto <<= 1; - - s = hctx->rto / HZ; - if (s > 60) - hctx->rto = 60 * HZ; + if (hctx->rto > DCCP_RTO_MAX) + hctx->rto = DCCP_RTO_MAX; /* adjust pipe, cwnd etc */ hctx->ssthresh = hctx->cwnd / 2; @@ -279,9 +270,87 @@ static void ccid2_hc_tx_kill_rto_timer(struct sock *sk) ccid2_pr_debug("deleted RTO timer\n"); } -static inline void ccid2_new_ack(struct sock *sk, - struct ccid2_seq *seqp, - unsigned int *maxincr) +/** + * ccid2_rtt_estimator - Sample RTT and compute RTO using RFC2988 algorithm + * This code is almost identical with TCP's tcp_rtt_estimator(), since + * - it has a higher sampling frequency (recommended by RFC 1323), + * - the RTO does not collapse into RTT due to RTTVAR going towards zero, + * - it is simple (cf. more complex proposals such as Eifel timer or research + * which suggests that the gain should be set according to window size), + * - in tests it was found to work well with CCID2 [gerrit]. + */ +static void ccid2_rtt_estimator(struct sock *sk, const long mrtt) +{ + struct ccid2_hc_tx_sock *hctx = ccid2_hc_tx_sk(sk); + long m = mrtt ? : 1; + + if (hctx->srtt == 0) { + /* First measurement m */ + hctx->srtt = m << 3; + hctx->mdev = m << 1; + + hctx->mdev_max = max(TCP_RTO_MIN, hctx->mdev); + hctx->rttvar = hctx->mdev_max; + hctx->rtt_seq = dccp_sk(sk)->dccps_gss; + } else { + /* Update scaled SRTT as SRTT += 1/8 * (m - SRTT) */ + m -= (hctx->srtt >> 3); + hctx->srtt += m; + + /* Similarly, update scaled mdev with regard to |m| */ + if (m < 0) { + m = -m; + m -= (hctx->mdev >> 2); + /* + * This neutralises RTO increase when RTT < SRTT - mdev + * (see P. Sarolahti, A. Kuznetsov,"Congestion Control + * in Linux TCP", USENIX 2002, pp. 49-62). + */ + if (m > 0) + m >>= 3; + } else { + m -= (hctx->mdev >> 2); + } + hctx->mdev += m; + + if (hctx->mdev > hctx->mdev_max) { + hctx->mdev_max = hctx->mdev; + if (hctx->mdev_max > hctx->rttvar) + hctx->rttvar = hctx->mdev_max; + } + + /* + * Decay RTTVAR at most once per flight, exploiting that + * 1) pipe <= cwnd <= Sequence_Window = W (RFC 4340, 7.5.2) + * 2) AWL = GSS-W+1 <= GAR <= GSS (RFC 4340, 7.5.1) + * GAR is a useful bound for FlightSize = pipe, AWL is probably + * too low as it over-estimates pipe. + */ + if (after48(dccp_sk(sk)->dccps_gar, hctx->rtt_seq)) { + if (hctx->mdev_max < hctx->rttvar) + hctx->rttvar -= (hctx->rttvar - + hctx->mdev_max) >> 2; + hctx->rtt_seq = dccp_sk(sk)->dccps_gss; + hctx->mdev_max = TCP_RTO_MIN; + } + } + + /* + * Set RTO from SRTT and RTTVAR + * Clock granularity is ignored since the minimum error for RTTVAR is + * clamped to 50msec (corresponding to HZ=20). This leads to a minimum + * RTO of 200msec. This agrees with TCP and RFC 4341, 5.: "Because DCCP + * does not retransmit data, DCCP does not require TCP's recommended + * minimum timeout of one second". + */ + hctx->rto = (hctx->srtt >> 3) + hctx->rttvar; + + if (hctx->rto > DCCP_RTO_MAX) + hctx->rto = DCCP_RTO_MAX; +} + +static void ccid2_new_ack(struct sock *sk, struct ccid2_seq *seqp, + unsigned int *maxincr) { struct ccid2_hc_tx_sock *hctx = ccid2_hc_tx_sk(sk); @@ -295,64 +364,15 @@ static inline void ccid2_new_ack(struct sock *sk, hctx->cwnd += 1; hctx->packets_acked = 0; } - - /* update RTO */ - if (hctx->srtt == -1 || - time_after(jiffies, hctx->lastrtt + hctx->srtt)) { - unsigned long r = (long)jiffies - (long)seqp->ccid2s_sent; - int s; - - /* first measurement */ - if (hctx->srtt == -1) { - ccid2_pr_debug("R: %lu Time=%lu seq=%llu\n", - r, jiffies, - (unsigned long long)seqp->ccid2s_seq); - ccid2_change_srtt(hctx, r); - hctx->rttvar = r >> 1; - } else { - /* RTTVAR */ - long tmp = hctx->srtt - r; - long srtt; - - if (tmp < 0) - tmp *= -1; - - tmp >>= 2; - hctx->rttvar *= 3; - hctx->rttvar >>= 2; - hctx->rttvar += tmp; - - /* SRTT */ - srtt = hctx->srtt; - srtt *= 7; - srtt >>= 3; - tmp = r >> 3; - srtt += tmp; - ccid2_change_srtt(hctx, srtt); - } - s = hctx->rttvar << 2; - /* clock granularity is 1 when based on jiffies */ - if (!s) - s = 1; - hctx->rto = hctx->srtt + s; - - /* must be at least a second */ - s = hctx->rto / HZ; - /* DCCP doesn't require this [but I like it cuz my code sux] */ -#if 1 - if (s < 1) - hctx->rto = HZ; -#endif - /* max 60 seconds */ - if (s > 60) - hctx->rto = HZ * 60; - - hctx->lastrtt = jiffies; - - ccid2_pr_debug("srtt: %ld rttvar: %ld rto: %ld (HZ=%d) R=%lu\n", - hctx->srtt, hctx->rttvar, - hctx->rto, HZ, r); - } + /* + * FIXME: RTT is sampled several times per acknowledgment (for each + * entry in the Ack Vector), instead of once per Ack (as in TCP SACK). + * This causes the RTT to be over-estimated, since the older entries + * in the Ack Vector have earlier sending times. + * The cleanest solution is to not use the ccid2s_sent field at all + * and instead use DCCP timestamps - need to be resolved at some time. + */ + ccid2_rtt_estimator(sk, jiffies - seqp->ccid2s_sent); } static void ccid2_congestion_event(struct sock *sk, struct ccid2_seq *seqp) @@ -579,8 +599,7 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) if (hctx->pipe == 0) sk_stop_timer(sk, &hctx->rtotimer); else - sk_reset_timer(sk, &hctx->rtotimer, - jiffies + hctx->rto); + sk_reset_timer(sk, &hctx->rtotimer, jiffies + hctx->rto); done: /* check if incoming Acks allow pending packets to be sent */ if (sender_was_blocked && !ccid2_cwnd_network_limited(hctx)) @@ -613,9 +632,7 @@ static int ccid2_hc_tx_init(struct ccid *ccid, struct sock *sk) if (ccid2_hc_tx_alloc_seq(hctx)) return -ENOMEM; - hctx->rto = 3 * HZ; - ccid2_change_srtt(hctx, -1); - hctx->rttvar = -1; + hctx->rto = DCCP_TIMEOUT_INIT; hctx->rpdupack = -1; hctx->last_cong = jiffies; setup_timer(&hctx->rtotimer, ccid2_hc_tx_rto_expire, (unsigned long)sk); -- cgit v1.2.3 From 20bbd0f75ee4b72c1dafc8e5fb6ad39ba506a75c Mon Sep 17 00:00:00 2001 From: Gerrit Renker Date: Thu, 4 Sep 2008 07:30:19 +0200 Subject: dccp ccid-2: Remove wrappers around sk_{reset,stop}_timer() This removes the wrappers around the sk timer functions as it makes the code clearer and not much is gained from using wrappers: the BUG_ON in start_rto_timer will never trigger since that function was called only when * the RTO timer expired (rto_expire, and then timer_pending() is false); * in tx_packet_sent only if !timer_pending() (BUG_ON is redundant here); * previously in new_ack, after stopping the timer (timer_pending() false). One further motive behind this patch is to replace the RTO timer with the icsk retransmission timer, as it is already part of the DCCP socket. Signed-off-by: Gerrit Renker --- net/dccp/ccids/ccid2.c | 28 ++++------------------------ 1 file changed, 4 insertions(+), 24 deletions(-) (limited to 'net/dccp/ccids/ccid2.c') diff --git a/net/dccp/ccids/ccid2.c b/net/dccp/ccids/ccid2.c index 22753fd98698..c539f79ab8e8 100644 --- a/net/dccp/ccids/ccid2.c +++ b/net/dccp/ccids/ccid2.c @@ -110,8 +110,6 @@ static void ccid2_change_l_ack_ratio(struct sock *sk, u32 val) dp->dccps_l_ack_ratio = val; } -static void ccid2_start_rto_timer(struct sock *sk); - static void ccid2_hc_tx_rto_expire(unsigned long data) { struct sock *sk = (struct sock *)data; @@ -150,23 +148,13 @@ static void ccid2_hc_tx_rto_expire(unsigned long data) /* if we were blocked before, we may now send cwnd=1 packet */ if (sender_was_blocked) tasklet_schedule(&dccp_sk(sk)->dccps_xmitlet); - ccid2_start_rto_timer(sk); + /* restart backed-off timer */ + sk_reset_timer(sk, &hctx->rtotimer, jiffies + hctx->rto); out: bh_unlock_sock(sk); sock_put(sk); } -static void ccid2_start_rto_timer(struct sock *sk) -{ - struct ccid2_hc_tx_sock *hctx = ccid2_hc_tx_sk(sk); - - ccid2_pr_debug("setting RTO timeout=%ld\n", hctx->rto); - - BUG_ON(timer_pending(&hctx->rtotimer)); - sk_reset_timer(sk, &hctx->rtotimer, - jiffies + hctx->rto); -} - static void ccid2_hc_tx_packet_sent(struct sock *sk, unsigned int len) { struct dccp_sock *dp = dccp_sk(sk); @@ -245,7 +233,7 @@ static void ccid2_hc_tx_packet_sent(struct sock *sk, unsigned int len) /* setup RTO timer */ if (!timer_pending(&hctx->rtotimer)) - ccid2_start_rto_timer(sk); + sk_reset_timer(sk, &hctx->rtotimer, jiffies + hctx->rto); #ifdef CONFIG_IP_DCCP_CCID2_DEBUG do { @@ -262,14 +250,6 @@ static void ccid2_hc_tx_packet_sent(struct sock *sk, unsigned int len) #endif } -static void ccid2_hc_tx_kill_rto_timer(struct sock *sk) -{ - struct ccid2_hc_tx_sock *hctx = ccid2_hc_tx_sk(sk); - - sk_stop_timer(sk, &hctx->rtotimer); - ccid2_pr_debug("deleted RTO timer\n"); -} - /** * ccid2_rtt_estimator - Sample RTT and compute RTO using RFC2988 algorithm * This code is almost identical with TCP's tcp_rtt_estimator(), since @@ -645,7 +625,7 @@ static void ccid2_hc_tx_exit(struct sock *sk) struct ccid2_hc_tx_sock *hctx = ccid2_hc_tx_sk(sk); int i; - ccid2_hc_tx_kill_rto_timer(sk); + sk_stop_timer(sk, &hctx->rtotimer); for (i = 0; i < hctx->seqbufc; i++) kfree(hctx->seqbuf[i]); -- cgit v1.2.3 From 6224877b2ca4be5de96270a8ae490fe2ba11b0e0 Mon Sep 17 00:00:00 2001 From: Gerrit Renker Date: Thu, 4 Sep 2008 07:30:19 +0200 Subject: tcp/dccp: Consolidate common code for RFC 3390 conversion This patch consolidates the code common to TCP and CCID-2: * TCP uses RFC 3390 in a packet-oriented manner (tcp_input.c) and * CCID-2 uses RFC 3390 in packet-oriented manner (RFC 4341). Signed-off-by: Gerrit Renker --- net/dccp/ccids/ccid2.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) (limited to 'net/dccp/ccids/ccid2.c') diff --git a/net/dccp/ccids/ccid2.c b/net/dccp/ccids/ccid2.c index c539f79ab8e8..fa713227c66f 100644 --- a/net/dccp/ccids/ccid2.c +++ b/net/dccp/ccids/ccid2.c @@ -596,12 +596,8 @@ static int ccid2_hc_tx_init(struct ccid *ccid, struct sock *sk) /* RFC 4341, 5: initialise ssthresh to arbitrarily high (max) value */ hctx->ssthresh = ~0U; - /* - * RFC 4341, 5: "The cwnd parameter is initialized to at most four - * packets for new connections, following the rules from [RFC3390]". - * We need to convert the bytes of RFC3390 into the packets of RFC 4341. - */ - hctx->cwnd = clamp(4380U / dp->dccps_mss_cache, 2U, 4U); + /* Use larger initial windows (RFC 3390, rfc2581bis) */ + hctx->cwnd = rfc3390_bytes_to_packets(dp->dccps_mss_cache); /* Make sure that Ack Ratio is enabled and within bounds. */ max_ratio = DIV_ROUND_UP(hctx->cwnd, 2); -- cgit v1.2.3