LCOV - code coverage report
Current view: top level - include/net - inet_hashtables.h (source / functions) Hit Total Coverage
Test: coverage.info Lines: 4 4 100.0 %
Date: 2017-01-25 Functions: 0 0 -

          Line data    Source code
       1             : /*
       2             :  * INET         An implementation of the TCP/IP protocol suite for the LINUX
       3             :  *              operating system.  INET is implemented using the BSD Socket
       4             :  *              interface as the means of communication with the user level.
       5             :  *
       6             :  * Authors:     Lotsa people, from code originally in tcp
       7             :  *
       8             :  *      This program is free software; you can redistribute it and/or
       9             :  *      modify it under the terms of the GNU General Public License
      10             :  *      as published by the Free Software Foundation; either version
      11             :  *      2 of the License, or (at your option) any later version.
      12             :  */
      13             : 
      14             : #ifndef _INET_HASHTABLES_H
      15             : #define _INET_HASHTABLES_H
      16             : 
      17             : 
      18             : #include <linux/interrupt.h>
      19             : #include <linux/ip.h>
      20             : #include <linux/ipv6.h>
      21             : #include <linux/list.h>
      22             : #include <linux/slab.h>
      23             : #include <linux/socket.h>
      24             : #include <linux/spinlock.h>
      25             : #include <linux/types.h>
      26             : #include <linux/wait.h>
      27             : #include <linux/vmalloc.h>
      28             : 
      29             : #include <net/inet_connection_sock.h>
      30             : #include <net/inet_sock.h>
      31             : #include <net/sock.h>
      32             : #include <net/route.h>
      33             : #include <net/tcp_states.h>
      34             : #include <net/netns/hash.h>
      35             : 
      36             : #include <asm/atomic.h>
      37             : #include <asm/byteorder.h>
      38             : 
      39             : /* This is for all connections with a full identity, no wildcards.
      40             :  * One chain is dedicated to TIME_WAIT sockets.
      41             :  * I'll experiment with dynamic table growth later.
      42             :  */
      43             : struct inet_ehash_bucket {
      44             :         struct hlist_nulls_head chain;
      45             :         struct hlist_nulls_head twchain;
      46             : };
      47           1 : 
      48             : /* There are a few simple rules, which allow for local port reuse by
      49             :  * an application.  In essence:
      50             :  *
      51             :  *      1) Sockets bound to different interfaces may share a local port.
      52             :  *         Failing that, goto test 2.
      53             :  *      2) If all sockets have sk->sk_reuse set, and none of them are in
      54             :  *         TCP_LISTEN state, the port may be shared.
      55             :  *         Failing that, goto test 3.
      56             :  *      3) If all sockets are bound to a specific inet_sk(sk)->rcv_saddr local
      57             :  *         address, and none of them are the same, the port may be
      58             :  *         shared.
      59             :  *         Failing this, the port cannot be shared.
      60             :  *
      61             :  * The interesting point, is test #2.  This is what an FTP server does
      62             :  * all day.  To optimize this case we use a specific flag bit defined
      63             :  * below.  As we add sockets to a bind bucket list, we perform a
      64             :  * check of: (newsk->sk_reuse && (newsk->sk_state != TCP_LISTEN))
      65             :  * As long as all sockets added to a bind bucket pass this test,
      66             :  * the flag bit will be set.
      67             :  * The resulting situation is that tcp_v[46]_verify_bind() can just check
      68             :  * for this flag bit, if it is set and the socket trying to bind has
      69             :  * sk->sk_reuse set, we don't even have to walk the owners list at all,
      70             :  * we return that it is ok to bind this socket to the requested local port.
      71             :  *
      72             :  * Sounds like a lot of work, but it is worth it.  In a more naive
      73             :  * implementation (ie. current FreeBSD etc.) the entire list of ports
      74             :  * must be walked for each data port opened by an ftp server.  Needless
      75             :  * to say, this does not scale at all.  With a couple thousand FTP
      76             :  * users logged onto your box, isn't it nice to know that new data
      77             :  * ports are created in O(1) time?  I thought so. ;-)   -DaveM
      78             :  */
      79             : struct inet_bind_bucket {
      80             : #ifdef CONFIG_NET_NS
      81             :         struct net              *ib_net;
      82             : #endif
      83             :         unsigned short          port;
      84             :         signed short            fastreuse;
      85             :         int                     num_owners;
      86             :         struct hlist_node       node;
      87             :         struct hlist_head       owners;
      88             : };
      89             : 
      90             : static inline struct net *ib_net(struct inet_bind_bucket *ib)
      91             : {
      92             :         return read_pnet(&ib->ib_net);
      93             : }
      94           1 : 
      95             : #define inet_bind_bucket_for_each(tb, pos, head) \
      96             :         hlist_for_each_entry(tb, pos, head, node)
      97             : 
      98             : struct inet_bind_hashbucket {
      99             :         spinlock_t              lock;
     100             :         struct hlist_head       chain;
     101             : };
     102           1 : 
     103             : /*
     104             :  * Sockets can be hashed in established or listening table
     105             :  * We must use different 'nulls' end-of-chain value for listening
     106             :  * hash table, or we might find a socket that was closed and
     107             :  * reallocated/inserted into established hash table
     108             :  */
     109             : #define LISTENING_NULLS_BASE (1U << 29)
     110             : struct inet_listen_hashbucket {
     111             :         spinlock_t              lock;
     112             :         struct hlist_nulls_head head;
     113             : };
     114           1 : 
     115             : /* This is for listening sockets, thus all sockets which possess wildcards. */
     116             : #define INET_LHTABLE_SIZE       32      /* Yes, really, this is all you need. */
     117             : 
     118             : struct inet_hashinfo {
     119             :         /* This is for sockets with full identity only.  Sockets here will
     120             :          * always be without wildcards and will have the following invariant:
     121             :          *
     122             :          *          TCP_ESTABLISHED <= sk->sk_state < TCP_CLOSE
     123             :          *
     124             :          * TIME_WAIT sockets use a separate chain (twchain).
     125             :          */
     126             :         struct inet_ehash_bucket        *ehash;
     127             :         spinlock_t                      *ehash_locks;
     128             :         unsigned int                    ehash_mask;
     129             :         unsigned int                    ehash_locks_mask;
     130             : 
     131             :         /* Ok, let's try this, I give up, we do need a local binding
     132             :          * TCP hash as well as the others for fast bind/connect.
     133             :          */
     134             :         struct inet_bind_hashbucket     *bhash;
     135             : 
     136             :         unsigned int                    bhash_size;
     137             :         /* 4 bytes hole on 64 bit */
     138             : 
     139             :         struct kmem_cache               *bind_bucket_cachep;
     140             : 
     141             :         /* All the above members are written once at bootup and
     142             :          * never written again _or_ are predominantly read-access.
     143             :          *
     144             :          * Now align to a new cache line as all the following members
     145             :          * might be often dirty.
     146             :          */
     147             :         /* All sockets in TCP_LISTEN state will be in here.  This is the only
     148             :          * table where wildcard'd TCP sockets can exist.  Hash function here
     149             :          * is just local port number.
     150             :          */
     151             :         struct inet_listen_hashbucket   listening_hash[INET_LHTABLE_SIZE]
     152             :                                         ____cacheline_aligned_in_smp;
     153             : 
     154             :         atomic_t                        bsockets;
     155             : };
     156             : 
     157             : static inline struct inet_ehash_bucket *inet_ehash_bucket(
     158             :         struct inet_hashinfo *hashinfo,
     159             :         unsigned int hash)
     160             : {
     161             :         return &hashinfo->ehash[hash & hashinfo->ehash_mask];
     162             : }
     163             : 
     164             : static inline spinlock_t *inet_ehash_lockp(
     165             :         struct inet_hashinfo *hashinfo,
     166             :         unsigned int hash)
     167             : {
     168             :         return &hashinfo->ehash_locks[hash & hashinfo->ehash_locks_mask];
     169             : }
     170             : 
     171             : static inline int inet_ehash_locks_alloc(struct inet_hashinfo *hashinfo)
     172             : {
     173             :         unsigned int i, size = 256;
     174             : #if defined(CONFIG_PROVE_LOCKING)
     175             :         unsigned int nr_pcpus = 2;
     176             : #else
     177             :         unsigned int nr_pcpus = num_possible_cpus();
     178             : #endif
     179             :         if (nr_pcpus >= 4)
     180             :                 size = 512;
     181             :         if (nr_pcpus >= 8)
     182             :                 size = 1024;
     183             :         if (nr_pcpus >= 16)
     184             :                 size = 2048;
     185             :         if (nr_pcpus >= 32)
     186             :                 size = 4096;
     187             :         if (sizeof(spinlock_t) != 0) {
     188             : #ifdef CONFIG_NUMA
     189             :                 if (size * sizeof(spinlock_t) > PAGE_SIZE)
     190             :                         hashinfo->ehash_locks = vmalloc(size * sizeof(spinlock_t));
     191             :                 else
     192             : #endif
     193             :                 hashinfo->ehash_locks =      kmalloc(size * sizeof(spinlock_t),
     194             :                                                 GFP_KERNEL);
     195             :                 if (!hashinfo->ehash_locks)
     196             :                         return ENOMEM;
     197             :                 for (i = 0; i < size; i++)
     198             :                         spin_lock_init(&hashinfo->ehash_locks[i]);
     199             :         }
     200             :         hashinfo->ehash_locks_mask = size - 1;
     201             :         return 0;
     202             : }
     203             : 
     204             : static inline void inet_ehash_locks_free(struct inet_hashinfo *hashinfo)
     205             : {
     206             :         if (hashinfo->ehash_locks) {
     207             : #ifdef CONFIG_NUMA
     208             :                 unsigned int size = (hashinfo->ehash_locks_mask + 1) *
     209             :                                                         sizeof(spinlock_t);
     210             :                 if (size > PAGE_SIZE)
     211             :                         vfree(hashinfo->ehash_locks);
     212             :                 else
     213             : #endif
     214             :                 kfree(hashinfo->ehash_locks);
     215             :                 hashinfo->ehash_locks = NULL;
     216             :         }
     217             : }
     218             : 
     219             : extern struct inet_bind_bucket *
     220             :                     inet_bind_bucket_create(struct kmem_cache *cachep,
     221             :                                             struct net *net,
     222             :                                             struct inet_bind_hashbucket *head,
     223             :                                             const unsigned short snum);
     224             : extern void inet_bind_bucket_destroy(struct kmem_cache *cachep,
     225             :                                      struct inet_bind_bucket *tb);
     226             : 
     227             : static inline int inet_bhashfn(struct net *net,
     228             :                 const __u16 lport, const int bhash_size)
     229             : {
     230             :         return (lport + net_hash_mix(net)) & (bhash_size - 1);
     231             : }
     232             : 
     233             : extern void inet_bind_hash(struct sock *sk, struct inet_bind_bucket *tb,
     234             :                            const unsigned short snum);
     235             : 
     236             : /* These can have wildcards, don't try too hard. */
     237             : static inline int inet_lhashfn(struct net *net, const unsigned short num)
     238             : {
     239             :         return (num + net_hash_mix(net)) & (INET_LHTABLE_SIZE - 1);
     240             : }
     241             : 
     242             : static inline int inet_sk_listen_hashfn(const struct sock *sk)
     243             : {
     244             :         return inet_lhashfn(sock_net(sk), inet_sk(sk)->inet_num);
     245             : }
     246             : 
     247             : /* Caller must disable local BH processing. */
     248             : extern void __inet_inherit_port(struct sock *sk, struct sock *child);
     249             : 
     250             : extern void inet_put_port(struct sock *sk);
     251             : 
     252             : void inet_hashinfo_init(struct inet_hashinfo *h);
     253             : 
     254             : extern int __inet_hash_nolisten(struct sock *sk, struct inet_timewait_sock *tw);
     255             : extern void inet_hash(struct sock *sk);
     256             : extern void inet_unhash(struct sock *sk);
     257             : 
     258             : extern struct sock *__inet_lookup_listener(struct net *net,
     259             :                                            struct inet_hashinfo *hashinfo,
     260             :                                            const __be32 daddr,
     261             :                                            const unsigned short hnum,
     262             :                                            const int dif);
     263             : 
     264             : static inline struct sock *inet_lookup_listener(struct net *net,
     265             :                 struct inet_hashinfo *hashinfo,
     266             :                 __be32 daddr, __be16 dport, int dif)
     267             : {
     268             :         return __inet_lookup_listener(net, hashinfo, daddr, ntohs(dport), dif);
     269             : }
     270             : 
     271             : /* Socket demux engine toys. */
     272             : /* What happens here is ugly; there's a pair of adjacent fields in
     273             :    struct inet_sock; __be16 dport followed by __u16 num.  We want to
     274             :    search by pair, so we combine the keys into a single 32bit value
     275             :    and compare with 32bit value read from &...->dport.  Let's at least
     276             :    make sure that it's not mixed with anything else...
     277             :    On 64bit targets we combine comparisons with pair of adjacent __be32
     278             :    fields in the same way.
     279             : */
     280             : typedef __u32 __bitwise __portpair;
     281             : #ifdef __BIG_ENDIAN
     282             : #define INET_COMBINED_PORTS(__sport, __dport) \
     283             :         ((__force __portpair)(((__force __u32)(__be16)(__sport) << 16) | (__u32)(__dport)))
     284             : #else /* __LITTLE_ENDIAN */
     285             : #define INET_COMBINED_PORTS(__sport, __dport) \
     286             :         ((__force __portpair)(((__u32)(__dport) << 16) | (__force __u32)(__be16)(__sport)))
     287             : #endif
     288             : 
     289             : #if (BITS_PER_LONG == 64)
     290             : typedef __u64 __bitwise __addrpair;
     291             : #ifdef __BIG_ENDIAN
     292             : #define INET_ADDR_COOKIE(__name, __saddr, __daddr) \
     293             :         const __addrpair __name = (__force __addrpair) ( \
     294             :                                    (((__force __u64)(__be32)(__saddr)) << 32) | \
     295             :                                    ((__force __u64)(__be32)(__daddr)));
     296             : #else /* __LITTLE_ENDIAN */
     297             : #define INET_ADDR_COOKIE(__name, __saddr, __daddr) \
     298             :         const __addrpair __name = (__force __addrpair) ( \
     299             :                                    (((__force __u64)(__be32)(__daddr)) << 32) | \
     300             :                                    ((__force __u64)(__be32)(__saddr)));
     301             : #endif /* __BIG_ENDIAN */
     302             : #define INET_MATCH(__sk, __net, __hash, __cookie, __saddr, __daddr, __ports, __dif)\
     303             :         (((__sk)->sk_hash == (__hash)) && net_eq(sock_net(__sk), (__net)) && \
     304             :          ((*((__addrpair *)&(inet_sk(__sk)->inet_daddr))) == (__cookie))  &&     \
     305             :          ((*((__portpair *)&(inet_sk(__sk)->inet_dport))) == (__ports))   &&     \
     306             :          (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif))))
     307             : #define INET_TW_MATCH(__sk, __net, __hash, __cookie, __saddr, __daddr, __ports, __dif)\
     308             :         (((__sk)->sk_hash == (__hash)) && net_eq(sock_net(__sk), (__net)) && \
     309             :          ((*((__addrpair *)&(inet_twsk(__sk)->tw_daddr))) == (__cookie)) &&      \
     310             :          ((*((__portpair *)&(inet_twsk(__sk)->tw_dport))) == (__ports)) &&       \
     311             :          (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif))))
     312             : #else /* 32-bit arch */
     313             : #define INET_ADDR_COOKIE(__name, __saddr, __daddr)
     314             : #define INET_MATCH(__sk, __net, __hash, __cookie, __saddr, __daddr, __ports, __dif)     \
     315             :         (((__sk)->sk_hash == (__hash)) && net_eq(sock_net(__sk), (__net))    &&      \
     316             :          (inet_sk(__sk)->inet_daddr  == (__saddr))           &&      \
     317             :          (inet_sk(__sk)->inet_rcv_saddr      == (__daddr))           &&      \
     318             :          ((*((__portpair *)&(inet_sk(__sk)->inet_dport))) == (__ports))  &&      \
     319             :          (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif))))
     320             : #define INET_TW_MATCH(__sk, __net, __hash,__cookie, __saddr, __daddr, __ports, __dif)   \
     321             :         (((__sk)->sk_hash == (__hash)) && net_eq(sock_net(__sk), (__net))    &&      \
     322             :          (inet_twsk(__sk)->tw_daddr  == (__saddr))           &&      \
     323             :          (inet_twsk(__sk)->tw_rcv_saddr      == (__daddr))           &&      \
     324             :          ((*((__portpair *)&(inet_twsk(__sk)->tw_dport))) == (__ports)) &&       \
     325             :          (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif))))
     326             : #endif /* 64-bit arch */
     327             : 
     328             : /*
     329             :  * Sockets in TCP_CLOSE state are _always_ taken out of the hash, so we need
     330             :  * not check it for lookups anymore, thanks Alexey. -DaveM
     331             :  *
     332             :  * Local BH must be disabled here.
     333             :  */
     334             : extern struct sock * __inet_lookup_established(struct net *net,
     335             :                 struct inet_hashinfo *hashinfo,
     336             :                 const __be32 saddr, const __be16 sport,
     337             :                 const __be32 daddr, const u16 hnum, const int dif);
     338             : 
     339             : static inline struct sock *
     340             :         inet_lookup_established(struct net *net, struct inet_hashinfo *hashinfo,
     341             :                                 const __be32 saddr, const __be16 sport,
     342             :                                 const __be32 daddr, const __be16 dport,
     343             :                                 const int dif)
     344             : {
     345             :         return __inet_lookup_established(net, hashinfo, saddr, sport, daddr,
     346             :                                          ntohs(dport), dif);
     347             : }
     348             : 
     349             : static inline struct sock *__inet_lookup(struct net *net,
     350             :                                          struct inet_hashinfo *hashinfo,
     351             :                                          const __be32 saddr, const __be16 sport,
     352             :                                          const __be32 daddr, const __be16 dport,
     353             :                                          const int dif)
     354             : {
     355             :         u16 hnum = ntohs(dport);
     356             :         struct sock *sk = __inet_lookup_established(net, hashinfo,
     357             :                                 saddr, sport, daddr, hnum, dif);
     358             : 
     359             :         return sk ? : __inet_lookup_listener(net, hashinfo, daddr, hnum, dif);
     360             : }
     361             : 
     362             : static inline struct sock *inet_lookup(struct net *net,
     363             :                                        struct inet_hashinfo *hashinfo,
     364             :                                        const __be32 saddr, const __be16 sport,
     365             :                                        const __be32 daddr, const __be16 dport,
     366             :                                        const int dif)
     367             : {
     368             :         struct sock *sk;
     369             : 
     370             :         local_bh_disable();
     371             :         sk = __inet_lookup(net, hashinfo, saddr, sport, daddr, dport, dif);
     372             :         local_bh_enable();
     373             : 
     374             :         return sk;
     375             : }
     376             : 
     377             : static inline struct sock *__inet_lookup_skb(struct inet_hashinfo *hashinfo,
     378             :                                              struct sk_buff *skb,
     379             :                                              const __be16 sport,
     380             :                                              const __be16 dport)
     381             : {
     382             :         struct sock *sk;
     383             :         const struct iphdr *iph = ip_hdr(skb);
     384             : 
     385             :         if (unlikely(sk = skb_steal_sock(skb)))
     386             :                 return sk;
     387             :         else
     388             :                 return __inet_lookup(dev_net(skb_dst(skb)->dev), hashinfo,
     389             :                                      iph->saddr, sport,
     390             :                                      iph->daddr, dport, inet_iif(skb));
     391             : }
     392             : 
     393             : extern int __inet_hash_connect(struct inet_timewait_death_row *death_row,
     394             :                 struct sock *sk,
     395             :                 u32 port_offset,
     396             :                 int (*check_established)(struct inet_timewait_death_row *,
     397             :                         struct sock *, __u16, struct inet_timewait_sock **),
     398             :                 int (*hash)(struct sock *sk, struct inet_timewait_sock *twp));
     399             : 
     400             : extern int inet_hash_connect(struct inet_timewait_death_row *death_row,
     401             :                              struct sock *sk);
     402             : #endif /* _INET_HASHTABLES_H */

Generated by: LCOV version 1.10