Line data Source code
1 : /*
2 : * INET An implementation of the TCP/IP protocol suite for the LINUX
3 : * operating system. INET is implemented using the BSD Socket
4 : * interface as the means of communication with the user level.
5 : *
6 : * Definitions for the TCP module.
7 : *
8 : * Version: @(#)tcp.h 1.0.5 05/23/93
9 : *
10 : * Authors: Ross Biro
11 : * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12 : *
13 : * This program is free software; you can redistribute it and/or
14 : * modify it under the terms of the GNU General Public License
15 : * as published by the Free Software Foundation; either version
16 : * 2 of the License, or (at your option) any later version.
17 : */
18 : #ifndef _TCP_H
19 : #define _TCP_H
20 :
21 : #define TCP_DEBUG 1
22 : #define FASTRETRANS_DEBUG 1
23 :
24 : #include <linux/list.h>
25 : #include <linux/tcp.h>
26 : #include <linux/slab.h>
27 : #include <linux/cache.h>
28 : #include <linux/percpu.h>
29 : #include <linux/skbuff.h>
30 : #include <linux/dmaengine.h>
31 : #include <linux/crypto.h>
32 : #include <linux/cryptohash.h>
33 : #include <linux/kref.h>
34 :
35 : #include <net/inet_connection_sock.h>
36 : #include <net/inet_timewait_sock.h>
37 : #include <net/inet_hashtables.h>
38 : #include <net/checksum.h>
39 : #include <net/request_sock.h>
40 : #include <net/sock.h>
41 : #include <net/snmp.h>
42 : #include <net/ip.h>
43 : #include <net/tcp_states.h>
44 : #include <net/inet_ecn.h>
45 : #include <net/dst.h>
46 :
47 : #include <linux/seq_file.h>
48 :
49 : extern struct inet_hashinfo tcp_hashinfo;
50 :
51 : extern struct percpu_counter tcp_orphan_count;
52 : extern void tcp_time_wait(struct sock *sk, int state, int timeo);
53 :
54 : #define MAX_TCP_HEADER (128 + MAX_HEADER)
55 : #define MAX_TCP_OPTION_SPACE 40
56 :
57 : /*
58 : * Never offer a window over 32767 without using window scaling. Some
59 : * poor stacks do signed 16bit maths!
60 : */
61 : #define MAX_TCP_WINDOW 32767U
62 :
63 : /* Minimal accepted MSS. It is (60+60+8) - (20+20). */
64 : #define TCP_MIN_MSS 88U
65 :
66 : /* The least MTU to use for probing */
67 : #define TCP_BASE_MSS 512
68 :
69 : /* After receiving this amount of duplicate ACKs fast retransmit starts. */
70 : #define TCP_FASTRETRANS_THRESH 3
71 :
72 : /* Maximal reordering. */
73 : #define TCP_MAX_REORDERING 127
74 :
75 : /* Maximal number of ACKs sent quickly to accelerate slow-start. */
76 : #define TCP_MAX_QUICKACKS 16U
77 :
78 : /* urg_data states */
79 : #define TCP_URG_VALID 0x0100
80 : #define TCP_URG_NOTYET 0x0200
81 : #define TCP_URG_READ 0x0400
82 :
83 : #define TCP_RETR1 3 /*
84 : * This is how many retries it does before it
85 : * tries to figure out if the gateway is
86 : * down. Minimal RFC value is 3; it corresponds
87 : * to ~3sec-8min depending on RTO.
88 : */
89 :
90 : #define TCP_RETR2 15 /*
91 : * This should take at least
92 : * 90 minutes to time out.
93 : * RFC1122 says that the limit is 100 sec.
94 : * 15 is ~13-30min depending on RTO.
95 : */
96 :
97 : #define TCP_SYN_RETRIES 5 /* number of times to retry active opening a
98 : * connection: ~180sec is RFC minimum */
99 :
100 : #define TCP_SYNACK_RETRIES 5 /* number of times to retry passive opening a
101 : * connection: ~180sec is RFC minimum */
102 :
103 :
104 : #define TCP_ORPHAN_RETRIES 7 /* number of times to retry on an orphaned
105 : * socket. 7 is ~50sec-16min.
106 : */
107 :
108 :
109 : #define TCP_TIMEWAIT_LEN (60*HZ) /* how long to wait to destroy TIME-WAIT
110 : * state, about 60 seconds */
111 : #define TCP_FIN_TIMEOUT TCP_TIMEWAIT_LEN
112 : /* BSD style FIN_WAIT2 deadlock breaker.
113 : * It used to be 3min, new value is 60sec,
114 : * to combine FIN-WAIT-2 timeout with
115 : * TIME-WAIT timer.
116 : */
117 :
118 : #define TCP_DELACK_MAX ((unsigned)(HZ/5)) /* maximal time to delay before sending an ACK */
119 : #if HZ >= 100
120 : #define TCP_DELACK_MIN ((unsigned)(HZ/25)) /* minimal time to delay before sending an ACK */
121 : #define TCP_ATO_MIN ((unsigned)(HZ/25))
122 : #else
123 : #define TCP_DELACK_MIN 4U
124 : #define TCP_ATO_MIN 4U
125 : #endif
126 : #define TCP_RTO_MAX ((unsigned)(120*HZ))
127 : #define TCP_RTO_MIN ((unsigned)(HZ/5))
128 : #define TCP_TIMEOUT_INIT ((unsigned)(3*HZ)) /* RFC 1122 initial RTO value */
129 :
130 : #define TCP_RESOURCE_PROBE_INTERVAL ((unsigned)(HZ/2U)) /* Maximal interval between probes
131 : * for local resources.
132 : */
133 :
134 : #define TCP_KEEPALIVE_TIME (120*60*HZ) /* two hours */
135 : #define TCP_KEEPALIVE_PROBES 9 /* Max of 9 keepalive probes */
136 : #define TCP_KEEPALIVE_INTVL (75*HZ)
137 :
138 : #define MAX_TCP_KEEPIDLE 32767
139 : #define MAX_TCP_KEEPINTVL 32767
140 : #define MAX_TCP_KEEPCNT 127
141 : #define MAX_TCP_SYNCNT 127
142 :
143 : #define TCP_SYNQ_INTERVAL (HZ/5) /* Period of SYNACK timer */
144 :
145 : #define TCP_PAWS_24DAYS (60 * 60 * 24 * 24)
146 : #define TCP_PAWS_MSL 60 /* Per-host timestamps are invalidated
147 : * after this time. It should be equal
148 : * (or greater than) TCP_TIMEWAIT_LEN
149 : * to provide reliability equal to one
150 : * provided by timewait state.
151 : */
152 : #define TCP_PAWS_WINDOW 1 /* Replay window for per-host
153 : * timestamps. It must be less than
154 : * minimal timewait lifetime.
155 : */
156 : /*
157 : * TCP option
158 : */
159 :
160 : #define TCPOPT_NOP 1 /* Padding */
161 : #define TCPOPT_EOL 0 /* End of options */
162 : #define TCPOPT_MSS 2 /* Segment size negotiating */
163 : #define TCPOPT_WINDOW 3 /* Window scaling */
164 : #define TCPOPT_SACK_PERM 4 /* SACK Permitted */
165 : #define TCPOPT_SACK 5 /* SACK Block */
166 : #define TCPOPT_TIMESTAMP 8 /* Better RTT estimations/PAWS */
167 : #define TCPOPT_MD5SIG 19 /* MD5 Signature (RFC2385) */
168 : #define TCPOPT_COOKIE 253 /* Cookie extension (experimental) */
169 :
170 : /*
171 : * TCP option lengths
172 : */
173 :
174 : #define TCPOLEN_MSS 4
175 : #define TCPOLEN_WINDOW 3
176 : #define TCPOLEN_SACK_PERM 2
177 : #define TCPOLEN_TIMESTAMP 10
178 : #define TCPOLEN_MD5SIG 18
179 : #define TCPOLEN_COOKIE_BASE 2 /* Cookie-less header extension */
180 : #define TCPOLEN_COOKIE_PAIR 3 /* Cookie pair header extension */
181 : #define TCPOLEN_COOKIE_MIN (TCPOLEN_COOKIE_BASE+TCP_COOKIE_MIN)
182 : #define TCPOLEN_COOKIE_MAX (TCPOLEN_COOKIE_BASE+TCP_COOKIE_MAX)
183 :
184 : /* But this is what stacks really send out. */
185 : #define TCPOLEN_TSTAMP_ALIGNED 12
186 : #define TCPOLEN_WSCALE_ALIGNED 4
187 : #define TCPOLEN_SACKPERM_ALIGNED 4
188 : #define TCPOLEN_SACK_BASE 2
189 : #define TCPOLEN_SACK_BASE_ALIGNED 4
190 : #define TCPOLEN_SACK_PERBLOCK 8
191 : #define TCPOLEN_MD5SIG_ALIGNED 20
192 : #define TCPOLEN_MSS_ALIGNED 4
193 :
194 : /* Flags in tp->nonagle */
195 : #define TCP_NAGLE_OFF 1 /* Nagle's algo is disabled */
196 : #define TCP_NAGLE_CORK 2 /* Socket is corked */
197 : #define TCP_NAGLE_PUSH 4 /* Cork is overridden for already queued data */
198 :
199 : extern struct inet_timewait_death_row tcp_death_row;
200 :
201 : /* sysctl variables for tcp */
202 : extern int sysctl_tcp_timestamps;
203 : extern int sysctl_tcp_window_scaling;
204 : extern int sysctl_tcp_sack;
205 : extern int sysctl_tcp_fin_timeout;
206 : extern int sysctl_tcp_keepalive_time;
207 : extern int sysctl_tcp_keepalive_probes;
208 : extern int sysctl_tcp_keepalive_intvl;
209 : extern int sysctl_tcp_syn_retries;
210 : extern int sysctl_tcp_synack_retries;
211 : extern int sysctl_tcp_retries1;
212 : extern int sysctl_tcp_retries2;
213 : extern int sysctl_tcp_orphan_retries;
214 : extern int sysctl_tcp_syncookies;
215 : extern int sysctl_tcp_retrans_collapse;
216 : extern int sysctl_tcp_stdurg;
217 : extern int sysctl_tcp_rfc1337;
218 : extern int sysctl_tcp_abort_on_overflow;
219 : extern int sysctl_tcp_max_orphans;
220 : extern int sysctl_tcp_fack;
221 : extern int sysctl_tcp_reordering;
222 : extern int sysctl_tcp_ecn;
223 : extern int sysctl_tcp_dsack;
224 : extern int sysctl_tcp_mem[3];
225 : extern int sysctl_tcp_wmem[3];
226 : extern int sysctl_tcp_rmem[3];
227 : extern int sysctl_tcp_app_win;
228 : extern int sysctl_tcp_adv_win_scale;
229 : extern int sysctl_tcp_tw_reuse;
230 : extern int sysctl_tcp_frto;
231 : extern int sysctl_tcp_frto_response;
232 : extern int sysctl_tcp_low_latency;
233 : extern int sysctl_tcp_dma_copybreak;
234 : extern int sysctl_tcp_nometrics_save;
235 : extern int sysctl_tcp_moderate_rcvbuf;
236 : extern int sysctl_tcp_tso_win_divisor;
237 : extern int sysctl_tcp_abc;
238 : extern int sysctl_tcp_mtu_probing;
239 : extern int sysctl_tcp_base_mss;
240 : extern int sysctl_tcp_workaround_signed_windows;
241 : extern int sysctl_tcp_slow_start_after_idle;
242 : extern int sysctl_tcp_max_ssthresh;
243 : extern int sysctl_tcp_cookie_size;
244 :
245 : extern atomic_t tcp_memory_allocated;
246 : extern struct percpu_counter tcp_sockets_allocated;
247 : extern int tcp_memory_pressure;
248 :
249 : /*
250 : * The next routines deal with comparing 32 bit unsigned ints
251 : * and worry about wraparound (automatic with unsigned arithmetic).
252 : */
253 :
254 : static inline int before(__u32 seq1, __u32 seq2)
255 : {
256 1 : return (__s32)(seq1-seq2) < 0;
257 : }
258 : #define after(seq2, seq1) before(seq1, seq2)
259 :
260 : /* is s2<=s1<=s3 ? */
261 : static inline int between(__u32 seq1, __u32 seq2, __u32 seq3)
262 : {
263 : return seq3 - seq2 >= seq1 - seq2;
264 : }
265 :
266 : static inline int tcp_too_many_orphans(struct sock *sk, int num)
267 : {
268 : return (num > sysctl_tcp_max_orphans) ||
269 : (sk->sk_wmem_queued > SOCK_MIN_SNDBUF &&
270 : atomic_read(&tcp_memory_allocated) > sysctl_tcp_mem[2]);
271 : }
272 :
273 : /* syncookies: remember time of last synqueue overflow */
274 : static inline void tcp_synq_overflow(struct sock *sk)
275 : {
276 : tcp_sk(sk)->rx_opt.ts_recent_stamp = jiffies;
277 : }
278 :
279 : /* syncookies: no recent synqueue overflow on this listening socket? */
280 : static inline int tcp_synq_no_recent_overflow(const struct sock *sk)
281 : {
282 : unsigned long last_overflow = tcp_sk(sk)->rx_opt.ts_recent_stamp;
283 : return time_after(jiffies, last_overflow + TCP_TIMEOUT_INIT);
284 : }
285 :
286 : extern struct proto tcp_prot;
287 :
288 : #define TCP_INC_STATS(net, field) SNMP_INC_STATS((net)->mib.tcp_statistics, field)
289 : #define TCP_INC_STATS_BH(net, field) SNMP_INC_STATS_BH((net)->mib.tcp_statistics, field)
290 : #define TCP_DEC_STATS(net, field) SNMP_DEC_STATS((net)->mib.tcp_statistics, field)
291 : #define TCP_ADD_STATS_USER(net, field, val) SNMP_ADD_STATS_USER((net)->mib.tcp_statistics, field, val)
292 :
293 : extern void tcp_v4_err(struct sk_buff *skb, u32);
294 :
295 : extern void tcp_shutdown (struct sock *sk, int how);
296 :
297 : extern int tcp_v4_rcv(struct sk_buff *skb);
298 :
299 : extern int tcp_v4_remember_stamp(struct sock *sk);
300 :
301 : extern int tcp_v4_tw_remember_stamp(struct inet_timewait_sock *tw);
302 :
303 : extern int tcp_sendmsg(struct kiocb *iocb, struct socket *sock,
304 : struct msghdr *msg, size_t size);
305 : extern ssize_t tcp_sendpage(struct socket *sock, struct page *page, int offset, size_t size, int flags);
306 :
307 : extern int tcp_ioctl(struct sock *sk,
308 : int cmd,
309 : unsigned long arg);
310 :
311 : extern int tcp_rcv_state_process(struct sock *sk,
312 : struct sk_buff *skb,
313 : struct tcphdr *th,
314 : unsigned len);
315 :
316 : extern int tcp_rcv_established(struct sock *sk,
317 : struct sk_buff *skb,
318 : struct tcphdr *th,
319 : unsigned len);
320 :
321 : extern void tcp_rcv_space_adjust(struct sock *sk);
322 :
323 : extern void tcp_cleanup_rbuf(struct sock *sk, int copied);
324 :
325 : extern int tcp_twsk_unique(struct sock *sk,
326 : struct sock *sktw, void *twp);
327 :
328 : extern void tcp_twsk_destructor(struct sock *sk);
329 :
330 : extern ssize_t tcp_splice_read(struct socket *sk, loff_t *ppos,
331 : struct pipe_inode_info *pipe, size_t len, unsigned int flags);
332 :
333 : static inline void tcp_dec_quickack_mode(struct sock *sk,
334 : const unsigned int pkts)
335 : {
336 : struct inet_connection_sock *icsk = inet_csk(sk);
337 :
338 : if (icsk->icsk_ack.quick) {
339 : if (pkts >= icsk->icsk_ack.quick) {
340 : icsk->icsk_ack.quick = 0;
341 : /* Leaving quickack mode we deflate ATO. */
342 : icsk->icsk_ack.ato = TCP_ATO_MIN;
343 : } else
344 : icsk->icsk_ack.quick -= pkts;
345 : }
346 : }
347 :
348 : extern void tcp_enter_quickack_mode(struct sock *sk);
349 :
350 : #define TCP_ECN_OK 1
351 : #define TCP_ECN_QUEUE_CWR 2
352 : #define TCP_ECN_DEMAND_CWR 4
353 :
354 : static __inline__ void
355 : TCP_ECN_create_request(struct request_sock *req, struct tcphdr *th)
356 : {
357 : if (sysctl_tcp_ecn && th->ece && th->cwr)
358 : inet_rsk(req)->ecn_ok = 1;
359 : }
360 :
361 : enum tcp_tw_status {
362 : TCP_TW_SUCCESS = 0,
363 : TCP_TW_RST = 1,
364 : TCP_TW_ACK = 2,
365 : TCP_TW_SYN = 3
366 : };
367 :
368 :
369 : extern enum tcp_tw_status tcp_timewait_state_process(struct inet_timewait_sock *tw,
370 : struct sk_buff *skb,
371 : const struct tcphdr *th);
372 :
373 : extern struct sock * tcp_check_req(struct sock *sk,struct sk_buff *skb,
374 : struct request_sock *req,
375 : struct request_sock **prev);
376 : extern int tcp_child_process(struct sock *parent,
377 : struct sock *child,
378 : struct sk_buff *skb);
379 : extern int tcp_use_frto(struct sock *sk);
380 : extern void tcp_enter_frto(struct sock *sk);
381 : extern void tcp_enter_loss(struct sock *sk, int how);
382 : extern void tcp_clear_retrans(struct tcp_sock *tp);
383 : extern void tcp_update_metrics(struct sock *sk);
384 :
385 : extern void tcp_close(struct sock *sk,
386 : long timeout);
387 : extern unsigned int tcp_poll(struct file * file, struct socket *sock, struct poll_table_struct *wait);
388 :
389 : extern int tcp_getsockopt(struct sock *sk, int level,
390 : int optname,
391 : char __user *optval,
392 : int __user *optlen);
393 : extern int tcp_setsockopt(struct sock *sk, int level,
394 : int optname, char __user *optval,
395 : unsigned int optlen);
396 : extern int compat_tcp_getsockopt(struct sock *sk,
397 : int level, int optname,
398 : char __user *optval, int __user *optlen);
399 : extern int compat_tcp_setsockopt(struct sock *sk,
400 : int level, int optname,
401 : char __user *optval, unsigned int optlen);
402 : extern void tcp_set_keepalive(struct sock *sk, int val);
403 : extern int tcp_recvmsg(struct kiocb *iocb, struct sock *sk,
404 : struct msghdr *msg,
405 : size_t len, int nonblock,
406 : int flags, int *addr_len);
407 :
408 : extern void tcp_parse_options(struct sk_buff *skb,
409 : struct tcp_options_received *opt_rx,
410 : u8 **hvpp,
411 : int estab);
412 :
413 : extern u8 *tcp_parse_md5sig_option(struct tcphdr *th);
414 :
415 : /*
416 : * TCP v4 functions exported for the inet6 API
417 : */
418 :
419 : extern void tcp_v4_send_check(struct sock *sk, int len,
420 : struct sk_buff *skb);
421 :
422 : extern int tcp_v4_conn_request(struct sock *sk,
423 : struct sk_buff *skb);
424 :
425 : extern struct sock * tcp_create_openreq_child(struct sock *sk,
426 : struct request_sock *req,
427 : struct sk_buff *skb);
428 :
429 : extern struct sock * tcp_v4_syn_recv_sock(struct sock *sk,
430 : struct sk_buff *skb,
431 : struct request_sock *req,
432 : struct dst_entry *dst);
433 :
434 : extern int tcp_v4_do_rcv(struct sock *sk,
435 : struct sk_buff *skb);
436 :
437 : extern int tcp_v4_connect(struct sock *sk,
438 : struct sockaddr *uaddr,
439 : int addr_len);
440 :
441 : extern int tcp_connect(struct sock *sk);
442 :
443 : extern struct sk_buff * tcp_make_synack(struct sock *sk,
444 : struct dst_entry *dst,
445 : struct request_sock *req,
446 : struct request_values *rvp);
447 :
448 : extern int tcp_disconnect(struct sock *sk, int flags);
449 :
450 :
451 : /* From syncookies.c */
452 : extern __u32 syncookie_secret[2][16-4+SHA_DIGEST_WORDS];
453 : extern struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
454 : struct ip_options *opt);
455 : extern __u32 cookie_v4_init_sequence(struct sock *sk, struct sk_buff *skb,
456 : __u16 *mss);
457 :
458 : extern __u32 cookie_init_timestamp(struct request_sock *req);
459 : extern void cookie_check_timestamp(struct tcp_options_received *tcp_opt);
460 :
461 : /* From net/ipv6/syncookies.c */
462 : extern struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb);
463 : extern __u32 cookie_v6_init_sequence(struct sock *sk, struct sk_buff *skb,
464 : __u16 *mss);
465 :
466 : /* tcp_output.c */
467 :
468 : extern void __tcp_push_pending_frames(struct sock *sk, unsigned int cur_mss,
469 : int nonagle);
470 : extern int tcp_may_send_now(struct sock *sk);
471 : extern int tcp_retransmit_skb(struct sock *, struct sk_buff *);
472 : extern void tcp_retransmit_timer(struct sock *sk);
473 : extern void tcp_xmit_retransmit_queue(struct sock *);
474 : extern void tcp_simple_retransmit(struct sock *);
475 : extern int tcp_trim_head(struct sock *, struct sk_buff *, u32);
476 : extern int tcp_fragment(struct sock *, struct sk_buff *, u32, unsigned int);
477 :
478 : extern void tcp_send_probe0(struct sock *);
479 : extern void tcp_send_partial(struct sock *);
480 : extern int tcp_write_wakeup(struct sock *);
481 : extern void tcp_send_fin(struct sock *sk);
482 : extern void tcp_send_active_reset(struct sock *sk, gfp_t priority);
483 : extern int tcp_send_synack(struct sock *);
484 : extern void tcp_push_one(struct sock *, unsigned int mss_now);
485 : extern void tcp_send_ack(struct sock *sk);
486 : extern void tcp_send_delayed_ack(struct sock *sk);
487 :
488 : /* tcp_input.c */
489 : extern void tcp_cwnd_application_limited(struct sock *sk);
490 :
491 : /* tcp_timer.c */
492 : extern void tcp_init_xmit_timers(struct sock *);
493 : static inline void tcp_clear_xmit_timers(struct sock *sk)
494 : {
495 : inet_csk_clear_xmit_timers(sk);
496 : }
497 :
498 : extern unsigned int tcp_sync_mss(struct sock *sk, u32 pmtu);
499 : extern unsigned int tcp_current_mss(struct sock *sk);
500 :
501 : /* Bound MSS / TSO packet size with the half of the window */
502 : static inline int tcp_bound_to_half_wnd(struct tcp_sock *tp, int pktsize)
503 : {
504 : if (tp->max_window && pktsize > (tp->max_window >> 1))
505 : return max(tp->max_window >> 1, 68U - tp->tcp_header_len);
506 : else
507 : return pktsize;
508 : }
509 :
510 : /* tcp.c */
511 : extern void tcp_get_info(struct sock *, struct tcp_info *);
512 :
513 : /* Read 'sendfile()'-style from a TCP socket */
514 : typedef int (*sk_read_actor_t)(read_descriptor_t *, struct sk_buff *,
515 : unsigned int, size_t);
516 : extern int tcp_read_sock(struct sock *sk, read_descriptor_t *desc,
517 : sk_read_actor_t recv_actor);
518 :
519 : extern void tcp_initialize_rcv_mss(struct sock *sk);
520 :
521 : extern int tcp_mtu_to_mss(struct sock *sk, int pmtu);
522 : extern int tcp_mss_to_mtu(struct sock *sk, int mss);
523 : extern void tcp_mtup_init(struct sock *sk);
524 :
525 : static inline void tcp_bound_rto(const struct sock *sk)
526 : {
527 : if (inet_csk(sk)->icsk_rto > TCP_RTO_MAX)
528 : inet_csk(sk)->icsk_rto = TCP_RTO_MAX;
529 : }
530 :
531 : static inline u32 __tcp_set_rto(const struct tcp_sock *tp)
532 : {
533 : return (tp->srtt >> 3) + tp->rttvar;
534 : }
535 :
536 : static inline void __tcp_fast_path_on(struct tcp_sock *tp, u32 snd_wnd)
537 : {
538 : tp->pred_flags = htonl((tp->tcp_header_len << 26) |
539 : ntohl(TCP_FLAG_ACK) |
540 : snd_wnd);
541 : }
542 :
543 : static inline void tcp_fast_path_on(struct tcp_sock *tp)
544 : {
545 : __tcp_fast_path_on(tp, tp->snd_wnd >> tp->rx_opt.snd_wscale);
546 : }
547 :
548 : static inline void tcp_fast_path_check(struct sock *sk)
549 : {
550 : struct tcp_sock *tp = tcp_sk(sk);
551 :
552 : if (skb_queue_empty(&tp->out_of_order_queue) &&
553 : tp->rcv_wnd &&
554 : atomic_read(&sk->sk_rmem_alloc) < sk->sk_rcvbuf &&
555 : !tp->urg_data)
556 : tcp_fast_path_on(tp);
557 : }
558 :
559 : /* Compute the actual rto_min value */
560 : static inline u32 tcp_rto_min(struct sock *sk)
561 : {
562 : struct dst_entry *dst = __sk_dst_get(sk);
563 : u32 rto_min = TCP_RTO_MIN;
564 :
565 : if (dst && dst_metric_locked(dst, RTAX_RTO_MIN))
566 : rto_min = dst_metric_rtt(dst, RTAX_RTO_MIN);
567 : return rto_min;
568 : }
569 :
570 : /* Compute the actual receive window we are currently advertising.
571 : * Rcv_nxt can be after the window if our peer push more data
572 : * than the offered window.
573 : */
574 : static inline u32 tcp_receive_window(const struct tcp_sock *tp)
575 : {
576 : s32 win = tp->rcv_wup + tp->rcv_wnd - tp->rcv_nxt;
577 :
578 : if (win < 0)
579 : win = 0;
580 : return (u32) win;
581 : }
582 :
583 : /* Choose a new window, without checks for shrinking, and without
584 : * scaling applied to the result. The caller does these things
585 : * if necessary. This is a "raw" window selection.
586 : */
587 : extern u32 __tcp_select_window(struct sock *sk);
588 :
589 : /* TCP timestamps are only 32-bits, this causes a slight
590 : * complication on 64-bit systems since we store a snapshot
591 : * of jiffies in the buffer control blocks below. We decided
592 : * to use only the low 32-bits of jiffies and hide the ugly
593 : * casts with the following macro.
594 : */
595 : #define tcp_time_stamp ((__u32)(jiffies))
596 :
597 : /* This is what the send packet queuing engine uses to pass
598 : * TCP per-packet control information to the transmission
599 : * code. We also store the host-order sequence numbers in
600 : * here too. This is 36 bytes on 32-bit architectures,
601 : * 40 bytes on 64-bit machines, if this grows please adjust
602 : * skbuff.h:skbuff->cb[xxx] size appropriately.
603 : */
604 : struct tcp_skb_cb {
605 : union {
606 : struct inet_skb_parm h4;
607 : #if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
608 : struct inet6_skb_parm h6;
609 : #endif
610 : } header; /* For incoming frames */
611 : __u32 seq; /* Starting sequence number */
612 : __u32 end_seq; /* SEQ + FIN + SYN + datalen */
613 : __u32 when; /* used to compute rtt's */
614 : __u8 flags; /* TCP header flags. */
615 :
616 : /* NOTE: These must match up to the flags byte in a
617 : * real TCP header.
618 : */
619 : #define TCPCB_FLAG_FIN 0x01
620 : #define TCPCB_FLAG_SYN 0x02
621 : #define TCPCB_FLAG_RST 0x04
622 : #define TCPCB_FLAG_PSH 0x08
623 : #define TCPCB_FLAG_ACK 0x10
624 : #define TCPCB_FLAG_URG 0x20
625 : #define TCPCB_FLAG_ECE 0x40
626 : #define TCPCB_FLAG_CWR 0x80
627 :
628 : __u8 sacked; /* State flags for SACK/FACK. */
629 : #define TCPCB_SACKED_ACKED 0x01 /* SKB ACK'd by a SACK block */
630 : #define TCPCB_SACKED_RETRANS 0x02 /* SKB retransmitted */
631 : #define TCPCB_LOST 0x04 /* SKB is lost */
632 : #define TCPCB_TAGBITS 0x07 /* All tag bits */
633 :
634 : #define TCPCB_EVER_RETRANS 0x80 /* Ever retransmitted frame */
635 : #define TCPCB_RETRANS (TCPCB_SACKED_RETRANS|TCPCB_EVER_RETRANS)
636 :
637 : __u32 ack_seq; /* Sequence number ACK'd */
638 : };
639 :
640 : #define TCP_SKB_CB(__skb) ((struct tcp_skb_cb *)&((__skb)->cb[0]))
641 :
642 : /* Due to TSO, an SKB can be composed of multiple actual
643 : * packets. To keep these tracked properly, we use this.
644 : */
645 : static inline int tcp_skb_pcount(const struct sk_buff *skb)
646 : {
647 : return skb_shinfo(skb)->gso_segs;
648 : }
649 :
650 : /* This is valid iff tcp_skb_pcount() > 1. */
651 : static inline int tcp_skb_mss(const struct sk_buff *skb)
652 : {
653 : return skb_shinfo(skb)->gso_size;
654 : }
655 1 :
656 : /* Events passed to congestion control interface */
657 : enum tcp_ca_event {
658 : CA_EVENT_TX_START, /* first transmit when no packets in flight */
659 : CA_EVENT_CWND_RESTART, /* congestion window restart */
660 : CA_EVENT_COMPLETE_CWR, /* end of congestion recovery */
661 : CA_EVENT_FRTO, /* fast recovery timeout */
662 : CA_EVENT_LOSS, /* loss timeout */
663 : CA_EVENT_FAST_ACK, /* in sequence ack */
664 : CA_EVENT_SLOW_ACK, /* other ack */
665 1 : };
666 :
667 : /*
668 : * Interface for adding new TCP congestion control handlers
669 : */
670 : #define TCP_CA_NAME_MAX 16
671 : #define TCP_CA_MAX 128
672 : #define TCP_CA_BUF_MAX (TCP_CA_NAME_MAX*TCP_CA_MAX)
673 :
674 : #define TCP_CONG_NON_RESTRICTED 0x1
675 : #define TCP_CONG_RTT_STAMP 0x2
676 :
677 : struct tcp_congestion_ops {
678 : struct list_head list;
679 : unsigned long flags;
680 :
681 : /* initialize private data (optional) */
682 : void (*init)(struct sock *sk);
683 : /* cleanup private data (optional) */
684 : void (*release)(struct sock *sk);
685 :
686 : /* return slow start threshold (required) */
687 : u32 (*ssthresh)(struct sock *sk);
688 : /* lower bound for congestion window (optional) */
689 : u32 (*min_cwnd)(const struct sock *sk);
690 : /* do new cwnd calculation (required) */
691 : void (*cong_avoid)(struct sock *sk, u32 ack, u32 in_flight);
692 : /* call before changing ca_state (optional) */
693 : void (*set_state)(struct sock *sk, u8 new_state);
694 : /* call when cwnd event occurs (optional) */
695 : void (*cwnd_event)(struct sock *sk, enum tcp_ca_event ev);
696 : /* new value of cwnd after loss (optional) */
697 : u32 (*undo_cwnd)(struct sock *sk);
698 : /* hook for packet ack accounting (optional) */
699 : void (*pkts_acked)(struct sock *sk, u32 num_acked, s32 rtt_us);
700 : /* get info for inet_diag (optional) */
701 : void (*get_info)(struct sock *sk, u32 ext, struct sk_buff *skb);
702 :
703 : char name[TCP_CA_NAME_MAX];
704 : struct module *owner;
705 : };
706 :
707 : extern int tcp_register_congestion_control(struct tcp_congestion_ops *type);
708 : extern void tcp_unregister_congestion_control(struct tcp_congestion_ops *type);
709 :
710 : extern void tcp_init_congestion_control(struct sock *sk);
711 : extern void tcp_cleanup_congestion_control(struct sock *sk);
712 : extern int tcp_set_default_congestion_control(const char *name);
713 : extern void tcp_get_default_congestion_control(char *name);
714 : extern void tcp_get_available_congestion_control(char *buf, size_t len);
715 : extern void tcp_get_allowed_congestion_control(char *buf, size_t len);
716 : extern int tcp_set_allowed_congestion_control(char *allowed);
717 : extern int tcp_set_congestion_control(struct sock *sk, const char *name);
718 : extern void tcp_slow_start(struct tcp_sock *tp);
719 : extern void tcp_cong_avoid_ai(struct tcp_sock *tp, u32 w);
720 :
721 : extern struct tcp_congestion_ops tcp_init_congestion_ops;
722 : extern u32 tcp_reno_ssthresh(struct sock *sk);
723 : extern void tcp_reno_cong_avoid(struct sock *sk, u32 ack, u32 in_flight);
724 : extern u32 tcp_reno_min_cwnd(const struct sock *sk);
725 : extern struct tcp_congestion_ops tcp_reno;
726 :
727 : static inline void tcp_set_ca_state(struct sock *sk, const u8 ca_state)
728 : {
729 : struct inet_connection_sock *icsk = inet_csk(sk);
730 :
731 : if (icsk->icsk_ca_ops->set_state)
732 : icsk->icsk_ca_ops->set_state(sk, ca_state);
733 : icsk->icsk_ca_state = ca_state;
734 : }
735 :
736 : static inline void tcp_ca_event(struct sock *sk, const enum tcp_ca_event event)
737 : {
738 : const struct inet_connection_sock *icsk = inet_csk(sk);
739 :
740 : if (icsk->icsk_ca_ops->cwnd_event)
741 : icsk->icsk_ca_ops->cwnd_event(sk, event);
742 : }
743 :
744 : /* These functions determine how the current flow behaves in respect of SACK
745 : * handling. SACK is negotiated with the peer, and therefore it can vary
746 : * between different flows.
747 : *
748 : * tcp_is_sack - SACK enabled
749 : * tcp_is_reno - No SACK
750 : * tcp_is_fack - FACK enabled, implies SACK enabled
751 : */
752 : static inline int tcp_is_sack(const struct tcp_sock *tp)
753 : {
754 : return tp->rx_opt.sack_ok;
755 : }
756 :
757 : static inline int tcp_is_reno(const struct tcp_sock *tp)
758 : {
759 : return !tcp_is_sack(tp);
760 : }
761 :
762 : static inline int tcp_is_fack(const struct tcp_sock *tp)
763 : {
764 : return tp->rx_opt.sack_ok & 2;
765 : }
766 :
767 : static inline void tcp_enable_fack(struct tcp_sock *tp)
768 : {
769 : tp->rx_opt.sack_ok |= 2;
770 : }
771 :
772 : static inline unsigned int tcp_left_out(const struct tcp_sock *tp)
773 : {
774 : return tp->sacked_out + tp->lost_out;
775 : }
776 :
777 : /* This determines how many packets are "in the network" to the best
778 : * of our knowledge. In many cases it is conservative, but where
779 : * detailed information is available from the receiver (via SACK
780 : * blocks etc.) we can make more aggressive calculations.
781 : *
782 : * Use this for decisions involving congestion control, use just
783 : * tp->packets_out to determine if the send queue is empty or not.
784 : *
785 : * Read this equation as:
786 : *
787 : * "Packets sent once on transmission queue" MINUS
788 : * "Packets left network, but not honestly ACKed yet" PLUS
789 : * "Packets fast retransmitted"
790 : */
791 : static inline unsigned int tcp_packets_in_flight(const struct tcp_sock *tp)
792 : {
793 : return tp->packets_out - tcp_left_out(tp) + tp->retrans_out;
794 : }
795 :
796 : #define TCP_INFINITE_SSTHRESH 0x7fffffff
797 :
798 : static inline bool tcp_in_initial_slowstart(const struct tcp_sock *tp)
799 : {
800 : return tp->snd_ssthresh >= TCP_INFINITE_SSTHRESH;
801 : }
802 :
803 : /* If cwnd > ssthresh, we may raise ssthresh to be half-way to cwnd.
804 : * The exception is rate halving phase, when cwnd is decreasing towards
805 : * ssthresh.
806 : */
807 : static inline __u32 tcp_current_ssthresh(const struct sock *sk)
808 : {
809 : const struct tcp_sock *tp = tcp_sk(sk);
810 : if ((1 << inet_csk(sk)->icsk_ca_state) & (TCPF_CA_CWR | TCPF_CA_Recovery))
811 : return tp->snd_ssthresh;
812 : else
813 : return max(tp->snd_ssthresh,
814 : ((tp->snd_cwnd >> 1) +
815 : (tp->snd_cwnd >> 2)));
816 : }
817 :
818 : /* Use define here intentionally to get WARN_ON location shown at the caller */
819 : #define tcp_verify_left_out(tp) WARN_ON(tcp_left_out(tp) > tp->packets_out)
820 :
821 : extern void tcp_enter_cwr(struct sock *sk, const int set_ssthresh);
822 : extern __u32 tcp_init_cwnd(struct tcp_sock *tp, struct dst_entry *dst);
823 :
824 : /* Slow start with delack produces 3 packets of burst, so that
825 : * it is safe "de facto". This will be the default - same as
826 : * the default reordering threshold - but if reordering increases,
827 : * we must be able to allow cwnd to burst at least this much in order
828 : * to not pull it back when holes are filled.
829 : */
830 : static __inline__ __u32 tcp_max_burst(const struct tcp_sock *tp)
831 : {
832 : return tp->reordering;
833 : }
834 :
835 : /* Returns end sequence number of the receiver's advertised window */
836 : static inline u32 tcp_wnd_end(const struct tcp_sock *tp)
837 : {
838 : return tp->snd_una + tp->snd_wnd;
839 : }
840 : extern int tcp_is_cwnd_limited(const struct sock *sk, u32 in_flight);
841 :
842 : static inline void tcp_minshall_update(struct tcp_sock *tp, unsigned int mss,
843 : const struct sk_buff *skb)
844 : {
845 : if (skb->len < mss)
846 : tp->snd_sml = TCP_SKB_CB(skb)->end_seq;
847 : }
848 :
849 : static inline void tcp_check_probe_timer(struct sock *sk)
850 : {
851 : struct tcp_sock *tp = tcp_sk(sk);
852 : const struct inet_connection_sock *icsk = inet_csk(sk);
853 :
854 : if (!tp->packets_out && !icsk->icsk_pending)
855 : inet_csk_reset_xmit_timer(sk, ICSK_TIME_PROBE0,
856 : icsk->icsk_rto, TCP_RTO_MAX);
857 : }
858 :
859 : static inline void tcp_push_pending_frames(struct sock *sk)
860 : {
861 : struct tcp_sock *tp = tcp_sk(sk);
862 :
863 : __tcp_push_pending_frames(sk, tcp_current_mss(sk), tp->nonagle);
864 : }
865 :
866 : static inline void tcp_init_wl(struct tcp_sock *tp, u32 seq)
867 : {
868 : tp->snd_wl1 = seq;
869 : }
870 :
871 : static inline void tcp_update_wl(struct tcp_sock *tp, u32 seq)
872 : {
873 : tp->snd_wl1 = seq;
874 : }
875 :
876 : /*
877 : * Calculate(/check) TCP checksum
878 : */
879 : static inline __sum16 tcp_v4_check(int len, __be32 saddr,
880 : __be32 daddr, __wsum base)
881 : {
882 : return csum_tcpudp_magic(saddr,daddr,len,IPPROTO_TCP,base);
883 : }
884 :
885 : static inline __sum16 __tcp_checksum_complete(struct sk_buff *skb)
886 : {
887 : return __skb_checksum_complete(skb);
888 : }
889 :
890 : static inline int tcp_checksum_complete(struct sk_buff *skb)
891 : {
892 : return !skb_csum_unnecessary(skb) &&
893 : __tcp_checksum_complete(skb);
894 : }
895 :
896 : /* Prequeue for VJ style copy to user, combined with checksumming. */
897 :
898 : static inline void tcp_prequeue_init(struct tcp_sock *tp)
899 : {
900 : tp->ucopy.task = NULL;
901 : tp->ucopy.len = 0;
902 : tp->ucopy.memory = 0;
903 : skb_queue_head_init(&tp->ucopy.prequeue);
904 : #ifdef CONFIG_NET_DMA
905 : tp->ucopy.dma_chan = NULL;
906 : tp->ucopy.wakeup = 0;
907 : tp->ucopy.pinned_list = NULL;
908 : tp->ucopy.dma_cookie = 0;
909 : #endif
910 : }
911 :
912 : /* Packet is added to VJ-style prequeue for processing in process
913 : * context, if a reader task is waiting. Apparently, this exciting
914 : * idea (VJ's mail "Re: query about TCP header on tcp-ip" of 07 Sep 93)
915 : * failed somewhere. Latency? Burstiness? Well, at least now we will
916 : * see, why it failed. 8)8) --ANK
917 : *
918 : * NOTE: is this not too big to inline?
919 : */
920 : static inline int tcp_prequeue(struct sock *sk, struct sk_buff *skb)
921 : {
922 : struct tcp_sock *tp = tcp_sk(sk);
923 :
924 : if (sysctl_tcp_low_latency || !tp->ucopy.task)
925 : return 0;
926 :
927 : __skb_queue_tail(&tp->ucopy.prequeue, skb);
928 : tp->ucopy.memory += skb->truesize;
929 : if (tp->ucopy.memory > sk->sk_rcvbuf) {
930 : struct sk_buff *skb1;
931 :
932 : BUG_ON(sock_owned_by_user(sk));
933 :
934 : while ((skb1 = __skb_dequeue(&tp->ucopy.prequeue)) != NULL) {
935 : sk_backlog_rcv(sk, skb1);
936 : NET_INC_STATS_BH(sock_net(sk),
937 : LINUX_MIB_TCPPREQUEUEDROPPED);
938 : }
939 :
940 : tp->ucopy.memory = 0;
941 : } else if (skb_queue_len(&tp->ucopy.prequeue) == 1) {
942 : wake_up_interruptible_poll(sk->sk_sleep,
943 : POLLIN | POLLRDNORM | POLLRDBAND);
944 : if (!inet_csk_ack_scheduled(sk))
945 : inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK,
946 : (3 * tcp_rto_min(sk)) / 4,
947 : TCP_RTO_MAX);
948 : }
949 : return 1;
950 : }
951 :
952 :
953 : #undef STATE_TRACE
954 :
955 : #ifdef STATE_TRACE
956 : static const char *statename[]={
957 : "Unused","Established","Syn Sent","Syn Recv",
958 : "Fin Wait 1","Fin Wait 2","Time Wait", "Close",
959 : "Close Wait","Last ACK","Listen","Closing"
960 : };
961 : #endif
962 : extern void tcp_set_state(struct sock *sk, int state);
963 :
964 : extern void tcp_done(struct sock *sk);
965 :
966 : static inline void tcp_sack_reset(struct tcp_options_received *rx_opt)
967 : {
968 : rx_opt->dsack = 0;
969 : rx_opt->num_sacks = 0;
970 : }
971 :
972 : /* Determine a window scaling and initial window to offer. */
973 : extern void tcp_select_initial_window(int __space, __u32 mss,
974 : __u32 *rcv_wnd, __u32 *window_clamp,
975 : int wscale_ok, __u8 *rcv_wscale);
976 :
977 : static inline int tcp_win_from_space(int space)
978 : {
979 : return sysctl_tcp_adv_win_scale<=0 ?
980 : (space>>(-sysctl_tcp_adv_win_scale)) :
981 : space - (space>>sysctl_tcp_adv_win_scale);
982 : }
983 :
984 : /* Note: caller must be prepared to deal with negative returns */
985 : static inline int tcp_space(const struct sock *sk)
986 : {
987 : return tcp_win_from_space(sk->sk_rcvbuf -
988 : atomic_read(&sk->sk_rmem_alloc));
989 : }
990 :
991 : static inline int tcp_full_space(const struct sock *sk)
992 : {
993 : return tcp_win_from_space(sk->sk_rcvbuf);
994 : }
995 :
996 : static inline void tcp_openreq_init(struct request_sock *req,
997 : struct tcp_options_received *rx_opt,
998 : struct sk_buff *skb)
999 : {
1000 : struct inet_request_sock *ireq = inet_rsk(req);
1001 :
1002 : req->rcv_wnd = 0; /* So that tcp_send_synack() knows! */
1003 : req->cookie_ts = 0;
1004 : tcp_rsk(req)->rcv_isn = TCP_SKB_CB(skb)->seq;
1005 : req->mss = rx_opt->mss_clamp;
1006 : req->ts_recent = rx_opt->saw_tstamp ? rx_opt->rcv_tsval : 0;
1007 : ireq->tstamp_ok = rx_opt->tstamp_ok;
1008 : ireq->sack_ok = rx_opt->sack_ok;
1009 : ireq->snd_wscale = rx_opt->snd_wscale;
1010 : ireq->wscale_ok = rx_opt->wscale_ok;
1011 : ireq->acked = 0;
1012 : ireq->ecn_ok = 0;
1013 : ireq->rmt_port = tcp_hdr(skb)->source;
1014 : ireq->loc_port = tcp_hdr(skb)->dest;
1015 : }
1016 :
1017 : extern void tcp_enter_memory_pressure(struct sock *sk);
1018 :
1019 : static inline int keepalive_intvl_when(const struct tcp_sock *tp)
1020 : {
1021 : return tp->keepalive_intvl ? : sysctl_tcp_keepalive_intvl;
1022 : }
1023 :
1024 : static inline int keepalive_time_when(const struct tcp_sock *tp)
1025 : {
1026 : return tp->keepalive_time ? : sysctl_tcp_keepalive_time;
1027 : }
1028 :
1029 : static inline int keepalive_probes(const struct tcp_sock *tp)
1030 : {
1031 : return tp->keepalive_probes ? : sysctl_tcp_keepalive_probes;
1032 : }
1033 :
1034 : static inline int tcp_fin_time(const struct sock *sk)
1035 : {
1036 : int fin_timeout = tcp_sk(sk)->linger2 ? : sysctl_tcp_fin_timeout;
1037 : const int rto = inet_csk(sk)->icsk_rto;
1038 :
1039 : if (fin_timeout < (rto << 2) - (rto >> 1))
1040 : fin_timeout = (rto << 2) - (rto >> 1);
1041 :
1042 : return fin_timeout;
1043 : }
1044 :
1045 : static inline int tcp_paws_check(const struct tcp_options_received *rx_opt,
1046 : int paws_win)
1047 : {
1048 : if ((s32)(rx_opt->ts_recent - rx_opt->rcv_tsval) <= paws_win)
1049 : return 1;
1050 : if (unlikely(get_seconds() >= rx_opt->ts_recent_stamp + TCP_PAWS_24DAYS))
1051 : return 1;
1052 :
1053 : return 0;
1054 : }
1055 :
1056 : static inline int tcp_paws_reject(const struct tcp_options_received *rx_opt,
1057 : int rst)
1058 : {
1059 : if (tcp_paws_check(rx_opt, 0))
1060 : return 0;
1061 :
1062 : /* RST segments are not recommended to carry timestamp,
1063 : and, if they do, it is recommended to ignore PAWS because
1064 : "their cleanup function should take precedence over timestamps."
1065 : Certainly, it is mistake. It is necessary to understand the reasons
1066 : of this constraint to relax it: if peer reboots, clock may go
1067 : out-of-sync and half-open connections will not be reset.
1068 : Actually, the problem would be not existing if all
1069 : the implementations followed draft about maintaining clock
1070 : via reboots. Linux-2.2 DOES NOT!
1071 :
1072 : However, we can relax time bounds for RST segments to MSL.
1073 : */
1074 : if (rst && get_seconds() >= rx_opt->ts_recent_stamp + TCP_PAWS_MSL)
1075 : return 0;
1076 : return 1;
1077 : }
1078 :
1079 : #define TCP_CHECK_TIMER(sk) do { } while (0)
1080 :
1081 : static inline void tcp_mib_init(struct net *net)
1082 : {
1083 : /* See RFC 2012 */
1084 : TCP_ADD_STATS_USER(net, TCP_MIB_RTOALGORITHM, 1);
1085 : TCP_ADD_STATS_USER(net, TCP_MIB_RTOMIN, TCP_RTO_MIN*1000/HZ);
1086 : TCP_ADD_STATS_USER(net, TCP_MIB_RTOMAX, TCP_RTO_MAX*1000/HZ);
1087 : TCP_ADD_STATS_USER(net, TCP_MIB_MAXCONN, -1);
1088 : }
1089 :
1090 : /* from STCP */
1091 : static inline void tcp_clear_retrans_hints_partial(struct tcp_sock *tp)
1092 : {
1093 : tp->lost_skb_hint = NULL;
1094 : tp->scoreboard_skb_hint = NULL;
1095 : }
1096 :
1097 : static inline void tcp_clear_all_retrans_hints(struct tcp_sock *tp)
1098 : {
1099 : tcp_clear_retrans_hints_partial(tp);
1100 : tp->retransmit_skb_hint = NULL;
1101 : }
1102 :
1103 : /* MD5 Signature */
1104 : struct crypto_hash;
1105 :
1106 : /* - key database */
1107 : struct tcp_md5sig_key {
1108 : u8 *key;
1109 : u8 keylen;
1110 : };
1111 :
1112 : struct tcp4_md5sig_key {
1113 : struct tcp_md5sig_key base;
1114 : __be32 addr;
1115 : };
1116 :
1117 : struct tcp6_md5sig_key {
1118 : struct tcp_md5sig_key base;
1119 : #if 0
1120 : u32 scope_id; /* XXX */
1121 : #endif
1122 : struct in6_addr addr;
1123 : };
1124 :
1125 : /* - sock block */
1126 : struct tcp_md5sig_info {
1127 : struct tcp4_md5sig_key *keys4;
1128 : #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
1129 : struct tcp6_md5sig_key *keys6;
1130 : u32 entries6;
1131 : u32 alloced6;
1132 : #endif
1133 : u32 entries4;
1134 : u32 alloced4;
1135 : };
1136 :
1137 : /* - pseudo header */
1138 : struct tcp4_pseudohdr {
1139 : __be32 saddr;
1140 : __be32 daddr;
1141 : __u8 pad;
1142 : __u8 protocol;
1143 : __be16 len;
1144 : };
1145 :
1146 : struct tcp6_pseudohdr {
1147 : struct in6_addr saddr;
1148 : struct in6_addr daddr;
1149 : __be32 len;
1150 : __be32 protocol; /* including padding */
1151 : };
1152 :
1153 : union tcp_md5sum_block {
1154 : struct tcp4_pseudohdr ip4;
1155 : #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
1156 : struct tcp6_pseudohdr ip6;
1157 : #endif
1158 : };
1159 :
1160 : /* - pool: digest algorithm, hash description and scratch buffer */
1161 : struct tcp_md5sig_pool {
1162 : struct hash_desc md5_desc;
1163 : union tcp_md5sum_block md5_blk;
1164 : };
1165 :
1166 : #define TCP_MD5SIG_MAXKEYS (~(u32)0) /* really?! */
1167 :
1168 : /* - functions */
1169 : extern int tcp_v4_md5_hash_skb(char *md5_hash,
1170 : struct tcp_md5sig_key *key,
1171 : struct sock *sk,
1172 : struct request_sock *req,
1173 : struct sk_buff *skb);
1174 :
1175 : extern struct tcp_md5sig_key *tcp_v4_md5_lookup(struct sock *sk,
1176 : struct sock *addr_sk);
1177 :
1178 : extern int tcp_v4_md5_do_add(struct sock *sk,
1179 : __be32 addr,
1180 : u8 *newkey,
1181 : u8 newkeylen);
1182 :
1183 : extern int tcp_v4_md5_do_del(struct sock *sk,
1184 : __be32 addr);
1185 :
1186 : #ifdef CONFIG_TCP_MD5SIG
1187 : #define tcp_twsk_md5_key(twsk) ((twsk)->tw_md5_keylen ? \
1188 : &(struct tcp_md5sig_key) { \
1189 : .key = (twsk)->tw_md5_key, \
1190 : .keylen = (twsk)->tw_md5_keylen, \
1191 : } : NULL)
1192 : #else
1193 : #define tcp_twsk_md5_key(twsk) NULL
1194 : #endif
1195 :
1196 : extern struct tcp_md5sig_pool **tcp_alloc_md5sig_pool(struct sock *);
1197 : extern void tcp_free_md5sig_pool(void);
1198 :
1199 : extern struct tcp_md5sig_pool *__tcp_get_md5sig_pool(int cpu);
1200 : extern void __tcp_put_md5sig_pool(void);
1201 : extern int tcp_md5_hash_header(struct tcp_md5sig_pool *, struct tcphdr *);
1202 : extern int tcp_md5_hash_skb_data(struct tcp_md5sig_pool *, struct sk_buff *,
1203 : unsigned header_len);
1204 : extern int tcp_md5_hash_key(struct tcp_md5sig_pool *hp,
1205 : struct tcp_md5sig_key *key);
1206 :
1207 : static inline
1208 : struct tcp_md5sig_pool *tcp_get_md5sig_pool(void)
1209 : {
1210 : int cpu = get_cpu();
1211 : struct tcp_md5sig_pool *ret = __tcp_get_md5sig_pool(cpu);
1212 : if (!ret)
1213 : put_cpu();
1214 : return ret;
1215 : }
1216 :
1217 : static inline void tcp_put_md5sig_pool(void)
1218 : {
1219 : __tcp_put_md5sig_pool();
1220 : put_cpu();
1221 : }
1222 :
1223 : /* write queue abstraction */
1224 : static inline void tcp_write_queue_purge(struct sock *sk)
1225 : {
1226 : struct sk_buff *skb;
1227 :
1228 : while ((skb = __skb_dequeue(&sk->sk_write_queue)) != NULL)
1229 : sk_wmem_free_skb(sk, skb);
1230 : sk_mem_reclaim(sk);
1231 : tcp_clear_all_retrans_hints(tcp_sk(sk));
1232 : }
1233 :
1234 : static inline struct sk_buff *tcp_write_queue_head(struct sock *sk)
1235 : {
1236 : return skb_peek(&sk->sk_write_queue);
1237 : }
1238 :
1239 : static inline struct sk_buff *tcp_write_queue_tail(struct sock *sk)
1240 : {
1241 : return skb_peek_tail(&sk->sk_write_queue);
1242 : }
1243 :
1244 : static inline struct sk_buff *tcp_write_queue_next(struct sock *sk, struct sk_buff *skb)
1245 : {
1246 : return skb_queue_next(&sk->sk_write_queue, skb);
1247 : }
1248 :
1249 : static inline struct sk_buff *tcp_write_queue_prev(struct sock *sk, struct sk_buff *skb)
1250 : {
1251 : return skb_queue_prev(&sk->sk_write_queue, skb);
1252 : }
1253 :
1254 : #define tcp_for_write_queue(skb, sk) \
1255 : skb_queue_walk(&(sk)->sk_write_queue, skb)
1256 :
1257 : #define tcp_for_write_queue_from(skb, sk) \
1258 : skb_queue_walk_from(&(sk)->sk_write_queue, skb)
1259 :
1260 : #define tcp_for_write_queue_from_safe(skb, tmp, sk) \
1261 : skb_queue_walk_from_safe(&(sk)->sk_write_queue, skb, tmp)
1262 :
1263 : static inline struct sk_buff *tcp_send_head(struct sock *sk)
1264 : {
1265 : return sk->sk_send_head;
1266 : }
1267 :
1268 : static inline bool tcp_skb_is_last(const struct sock *sk,
1269 : const struct sk_buff *skb)
1270 : {
1271 : return skb_queue_is_last(&sk->sk_write_queue, skb);
1272 : }
1273 :
1274 : static inline void tcp_advance_send_head(struct sock *sk, struct sk_buff *skb)
1275 : {
1276 : if (tcp_skb_is_last(sk, skb))
1277 : sk->sk_send_head = NULL;
1278 : else
1279 : sk->sk_send_head = tcp_write_queue_next(sk, skb);
1280 : }
1281 :
1282 : static inline void tcp_check_send_head(struct sock *sk, struct sk_buff *skb_unlinked)
1283 : {
1284 : if (sk->sk_send_head == skb_unlinked)
1285 : sk->sk_send_head = NULL;
1286 : }
1287 :
1288 : static inline void tcp_init_send_head(struct sock *sk)
1289 : {
1290 : sk->sk_send_head = NULL;
1291 : }
1292 :
1293 : static inline void __tcp_add_write_queue_tail(struct sock *sk, struct sk_buff *skb)
1294 : {
1295 : __skb_queue_tail(&sk->sk_write_queue, skb);
1296 : }
1297 :
1298 : static inline void tcp_add_write_queue_tail(struct sock *sk, struct sk_buff *skb)
1299 : {
1300 : __tcp_add_write_queue_tail(sk, skb);
1301 :
1302 : /* Queue it, remembering where we must start sending. */
1303 : if (sk->sk_send_head == NULL) {
1304 : sk->sk_send_head = skb;
1305 :
1306 : if (tcp_sk(sk)->highest_sack == NULL)
1307 : tcp_sk(sk)->highest_sack = skb;
1308 : }
1309 : }
1310 :
1311 : static inline void __tcp_add_write_queue_head(struct sock *sk, struct sk_buff *skb)
1312 : {
1313 : __skb_queue_head(&sk->sk_write_queue, skb);
1314 : }
1315 :
1316 : /* Insert buff after skb on the write queue of sk. */
1317 : static inline void tcp_insert_write_queue_after(struct sk_buff *skb,
1318 : struct sk_buff *buff,
1319 : struct sock *sk)
1320 : {
1321 : __skb_queue_after(&sk->sk_write_queue, skb, buff);
1322 : }
1323 :
1324 : /* Insert new before skb on the write queue of sk. */
1325 : static inline void tcp_insert_write_queue_before(struct sk_buff *new,
1326 : struct sk_buff *skb,
1327 : struct sock *sk)
1328 : {
1329 : __skb_queue_before(&sk->sk_write_queue, skb, new);
1330 :
1331 : if (sk->sk_send_head == skb)
1332 : sk->sk_send_head = new;
1333 : }
1334 :
1335 : static inline void tcp_unlink_write_queue(struct sk_buff *skb, struct sock *sk)
1336 : {
1337 : __skb_unlink(skb, &sk->sk_write_queue);
1338 : }
1339 :
1340 : static inline int tcp_write_queue_empty(struct sock *sk)
1341 : {
1342 : return skb_queue_empty(&sk->sk_write_queue);
1343 : }
1344 :
1345 : /* Start sequence of the highest skb with SACKed bit, valid only if
1346 : * sacked > 0 or when the caller has ensured validity by itself.
1347 : */
1348 : static inline u32 tcp_highest_sack_seq(struct tcp_sock *tp)
1349 : {
1350 : if (!tp->sacked_out)
1351 : return tp->snd_una;
1352 :
1353 : if (tp->highest_sack == NULL)
1354 : return tp->snd_nxt;
1355 :
1356 : return TCP_SKB_CB(tp->highest_sack)->seq;
1357 : }
1358 :
1359 : static inline void tcp_advance_highest_sack(struct sock *sk, struct sk_buff *skb)
1360 : {
1361 : tcp_sk(sk)->highest_sack = tcp_skb_is_last(sk, skb) ? NULL :
1362 : tcp_write_queue_next(sk, skb);
1363 : }
1364 :
1365 : static inline struct sk_buff *tcp_highest_sack(struct sock *sk)
1366 : {
1367 : return tcp_sk(sk)->highest_sack;
1368 : }
1369 :
1370 : static inline void tcp_highest_sack_reset(struct sock *sk)
1371 : {
1372 : tcp_sk(sk)->highest_sack = tcp_write_queue_head(sk);
1373 : }
1374 :
1375 : /* Called when old skb is about to be deleted (to be combined with new skb) */
1376 : static inline void tcp_highest_sack_combine(struct sock *sk,
1377 : struct sk_buff *old,
1378 : struct sk_buff *new)
1379 : {
1380 : if (tcp_sk(sk)->sacked_out && (old == tcp_sk(sk)->highest_sack))
1381 : tcp_sk(sk)->highest_sack = new;
1382 : }
1383 :
1384 : /* /proc */
1385 : enum tcp_seq_states {
1386 : TCP_SEQ_STATE_LISTENING,
1387 : TCP_SEQ_STATE_OPENREQ,
1388 : TCP_SEQ_STATE_ESTABLISHED,
1389 : TCP_SEQ_STATE_TIME_WAIT,
1390 : };
1391 :
1392 : struct tcp_seq_afinfo {
1393 : char *name;
1394 : sa_family_t family;
1395 : struct file_operations seq_fops;
1396 : struct seq_operations seq_ops;
1397 : };
1398 :
1399 : struct tcp_iter_state {
1400 : struct seq_net_private p;
1401 : sa_family_t family;
1402 : enum tcp_seq_states state;
1403 : struct sock *syn_wait_sk;
1404 : int bucket, sbucket, num, uid;
1405 : };
1406 :
1407 : extern int tcp_proc_register(struct net *net, struct tcp_seq_afinfo *afinfo);
1408 : extern void tcp_proc_unregister(struct net *net, struct tcp_seq_afinfo *afinfo);
1409 :
1410 : extern struct request_sock_ops tcp_request_sock_ops;
1411 : extern struct request_sock_ops tcp6_request_sock_ops;
1412 :
1413 : extern void tcp_v4_destroy_sock(struct sock *sk);
1414 :
1415 : extern int tcp_v4_gso_send_check(struct sk_buff *skb);
1416 : extern struct sk_buff *tcp_tso_segment(struct sk_buff *skb, int features);
1417 : extern struct sk_buff **tcp_gro_receive(struct sk_buff **head,
1418 : struct sk_buff *skb);
1419 : extern struct sk_buff **tcp4_gro_receive(struct sk_buff **head,
1420 : struct sk_buff *skb);
1421 : extern int tcp_gro_complete(struct sk_buff *skb);
1422 : extern int tcp4_gro_complete(struct sk_buff *skb);
1423 :
1424 : #ifdef CONFIG_PROC_FS
1425 : extern int tcp4_proc_init(void);
1426 : extern void tcp4_proc_exit(void);
1427 : #endif
1428 :
1429 : /* TCP af-specific functions */
1430 : struct tcp_sock_af_ops {
1431 : #ifdef CONFIG_TCP_MD5SIG
1432 : struct tcp_md5sig_key *(*md5_lookup) (struct sock *sk,
1433 : struct sock *addr_sk);
1434 : int (*calc_md5_hash) (char *location,
1435 : struct tcp_md5sig_key *md5,
1436 : struct sock *sk,
1437 : struct request_sock *req,
1438 : struct sk_buff *skb);
1439 : int (*md5_add) (struct sock *sk,
1440 : struct sock *addr_sk,
1441 : u8 *newkey,
1442 : u8 len);
1443 : int (*md5_parse) (struct sock *sk,
1444 : char __user *optval,
1445 : int optlen);
1446 : #endif
1447 : };
1448 :
1449 : struct tcp_request_sock_ops {
1450 : #ifdef CONFIG_TCP_MD5SIG
1451 : struct tcp_md5sig_key *(*md5_lookup) (struct sock *sk,
1452 : struct request_sock *req);
1453 : int (*calc_md5_hash) (char *location,
1454 : struct tcp_md5sig_key *md5,
1455 : struct sock *sk,
1456 : struct request_sock *req,
1457 : struct sk_buff *skb);
1458 : #endif
1459 : };
1460 :
1461 : /* Using SHA1 for now, define some constants.
1462 : */
1463 : #define COOKIE_DIGEST_WORDS (SHA_DIGEST_WORDS)
1464 : #define COOKIE_MESSAGE_WORDS (SHA_MESSAGE_BYTES / 4)
1465 : #define COOKIE_WORKSPACE_WORDS (COOKIE_DIGEST_WORDS + COOKIE_MESSAGE_WORDS)
1466 :
1467 : extern int tcp_cookie_generator(u32 *bakery);
1468 1 :
1469 : /**
1470 : * struct tcp_cookie_values - each socket needs extra space for the
1471 : * cookies, together with (optional) space for any SYN data.
1472 : *
1473 : * A tcp_sock contains a pointer to the current value, and this is
1474 : * cloned to the tcp_timewait_sock.
1475 : *
1476 : * @cookie_pair: variable data from the option exchange.
1477 : *
1478 : * @cookie_desired: user specified tcpct_cookie_desired. Zero
1479 : * indicates default (sysctl_tcp_cookie_size).
1480 : * After cookie sent, remembers size of cookie.
1481 : * Range 0, TCP_COOKIE_MIN to TCP_COOKIE_MAX.
1482 : *
1483 : * @s_data_desired: user specified tcpct_s_data_desired. When the
1484 : * constant payload is specified (@s_data_constant),
1485 : * holds its length instead.
1486 : * Range 0 to TCP_MSS_DESIRED.
1487 : *
1488 : * @s_data_payload: constant data that is to be included in the
1489 : * payload of SYN or SYNACK segments when the
1490 : * cookie option is present.
1491 : */
1492 : struct tcp_cookie_values {
1493 : struct kref kref;
1494 : u8 cookie_pair[TCP_COOKIE_PAIR_SIZE];
1495 : u8 cookie_pair_size;
1496 : u8 cookie_desired;
1497 : u16 s_data_desired:11,
1498 : s_data_constant:1,
1499 : s_data_in:1,
1500 : s_data_out:1,
1501 : s_data_unused:2;
1502 : u8 s_data_payload[0];
1503 : };
1504 :
1505 : static inline void tcp_cookie_values_release(struct kref *kref)
1506 : {
1507 : kfree(container_of(kref, struct tcp_cookie_values, kref));
1508 : }
1509 :
1510 : /* The length of constant payload data. Note that s_data_desired is
1511 : * overloaded, depending on s_data_constant: either the length of constant
1512 : * data (returned here) or the limit on variable data.
1513 : */
1514 : static inline int tcp_s_data_size(const struct tcp_sock *tp)
1515 : {
1516 : return (tp->cookie_values != NULL && tp->cookie_values->s_data_constant)
1517 : ? tp->cookie_values->s_data_desired
1518 : : 0;
1519 : }
1520 :
1521 : /**
1522 : * struct tcp_extend_values - tcp_ipv?.c to tcp_output.c workspace.
1523 : *
1524 : * As tcp_request_sock has already been extended in other places, the
1525 : * only remaining method is to pass stack values along as function
1526 : * parameters. These parameters are not needed after sending SYNACK.
1527 : *
1528 : * @cookie_bakery: cryptographic secret and message workspace.
1529 : *
1530 : * @cookie_plus: bytes in authenticator/cookie option, copied from
1531 : * struct tcp_options_received (above).
1532 : */
1533 : struct tcp_extend_values {
1534 : struct request_values rv;
1535 : u32 cookie_bakery[COOKIE_WORKSPACE_WORDS];
1536 : u8 cookie_plus:6,
1537 : cookie_out_never:1,
1538 : cookie_in_always:1;
1539 : };
1540 :
1541 : static inline struct tcp_extend_values *tcp_xv(struct request_values *rvp)
1542 : {
1543 : return (struct tcp_extend_values *)rvp;
1544 : }
1545 :
1546 : extern void tcp_v4_init(void);
1547 : extern void tcp_init(void);
1548 :
1549 : #endif /* _TCP_H */
|