Line data Source code
1 : /*
2 : * INET An implementation of the TCP/IP protocol suite for the LINUX
3 : * operating system. INET is implemented using the BSD Socket
4 : * interface as the means of communication with the user level.
5 : *
6 : * PACKET - implements raw packet sockets.
7 : *
8 : * Authors: Ross Biro
9 : * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
10 : * Alan Cox, <gw4pts@gw4pts.ampr.org>
11 : *
12 : * Fixes:
13 : * Alan Cox : verify_area() now used correctly
14 : * Alan Cox : new skbuff lists, look ma no backlogs!
15 : * Alan Cox : tidied skbuff lists.
16 : * Alan Cox : Now uses generic datagram routines I
17 : * added. Also fixed the peek/read crash
18 : * from all old Linux datagram code.
19 : * Alan Cox : Uses the improved datagram code.
20 : * Alan Cox : Added NULL's for socket options.
21 : * Alan Cox : Re-commented the code.
22 : * Alan Cox : Use new kernel side addressing
23 : * Rob Janssen : Correct MTU usage.
24 : * Dave Platt : Counter leaks caused by incorrect
25 : * interrupt locking and some slightly
26 : * dubious gcc output. Can you read
27 : * compiler: it said _VOLATILE_
28 : * Richard Kooijman : Timestamp fixes.
29 : * Alan Cox : New buffers. Use sk->mac.raw.
30 : * Alan Cox : sendmsg/recvmsg support.
31 : * Alan Cox : Protocol setting support
32 : * Alexey Kuznetsov : Untied from IPv4 stack.
33 : * Cyrus Durgin : Fixed kerneld for kmod.
34 : * Michal Ostrowski : Module initialization cleanup.
35 : * Ulises Alonso : Frame number limit removal and
36 : * packet_set_ring memory leak.
37 : * Eric Biederman : Allow for > 8 byte hardware addresses.
38 : * The convention is that longer addresses
39 : * will simply extend the hardware address
40 : * byte arrays at the end of sockaddr_ll
41 : * and packet_mreq.
42 : * Johann Baudy : Added TX RING.
43 : *
44 : * This program is free software; you can redistribute it and/or
45 : * modify it under the terms of the GNU General Public License
46 : * as published by the Free Software Foundation; either version
47 : * 2 of the License, or (at your option) any later version.
48 : *
49 : */
50 :
51 : #include <linux/types.h>
52 : #include <linux/mm.h>
53 : #include <linux/capability.h>
54 : #include <linux/fcntl.h>
55 : #include <linux/socket.h>
56 : #include <linux/in.h>
57 : #include <linux/inet.h>
58 : #include <linux/netdevice.h>
59 : #include <linux/if_packet.h>
60 : #include <linux/wireless.h>
61 : #include <linux/kernel.h>
62 : #include <linux/kmod.h>
63 : #include <net/net_namespace.h>
64 : #include <net/ip.h>
65 : #include <net/protocol.h>
66 : #include <linux/skbuff.h>
67 : #include <net/sock.h>
68 : #include <linux/errno.h>
69 : #include <linux/timer.h>
70 : #include <asm/system.h>
71 : #include <asm/uaccess.h>
72 : #include <asm/ioctls.h>
73 : #include <asm/page.h>
74 : #include <asm/cacheflush.h>
75 : #include <asm/io.h>
76 : #include <linux/proc_fs.h>
77 : #include <linux/seq_file.h>
78 : #include <linux/poll.h>
79 : #include <linux/module.h>
80 : #include <linux/init.h>
81 : #include <linux/mutex.h>
82 : #include <linux/if_vlan.h>
83 :
84 : #ifdef CONFIG_INET
85 : #include <net/inet_common.h>
86 : #endif
87 :
88 : /*
89 : Assumptions:
90 : - if device has no dev->hard_header routine, it adds and removes ll header
91 : inside itself. In this case ll header is invisible outside of device,
92 : but higher levels still should reserve dev->hard_header_len.
93 : Some devices are enough clever to reallocate skb, when header
94 : will not fit to reserved space (tunnel), another ones are silly
95 : (PPP).
96 : - packet socket receives packets with pulled ll header,
97 : so that SOCK_RAW should push it back.
98 :
99 : On receive:
100 : -----------
101 :
102 : Incoming, dev->hard_header!=NULL
103 : mac_header -> ll header
104 : data -> data
105 :
106 : Outgoing, dev->hard_header!=NULL
107 : mac_header -> ll header
108 : data -> ll header
109 :
110 : Incoming, dev->hard_header==NULL
111 : mac_header -> UNKNOWN position. It is very likely, that it points to ll
112 : header. PPP makes it, that is wrong, because introduce
113 : assymetry between rx and tx paths.
114 : data -> data
115 :
116 : Outgoing, dev->hard_header==NULL
117 : mac_header -> data. ll header is still not built!
118 : data -> data
119 :
120 : Resume
121 : If dev->hard_header==NULL we are unlikely to restore sensible ll header.
122 :
123 :
124 : On transmit:
125 : ------------
126 :
127 : dev->hard_header != NULL
128 : mac_header -> ll header
129 : data -> ll header
130 :
131 : dev->hard_header == NULL (ll header is added by device, we cannot control it)
132 : mac_header -> data
133 : data -> data
134 :
135 : We should set nh.raw on output to correct posistion,
136 : packet classifier depends on it.
137 : */
138 :
139 : /* Private packet socket structures. */
140 :
141 : struct packet_mclist {
142 : struct packet_mclist *next;
143 : int ifindex;
144 : int count;
145 : unsigned short type;
146 : unsigned short alen;
147 : unsigned char addr[MAX_ADDR_LEN];
148 : };
149 1 : /* identical to struct packet_mreq except it has
150 : * a longer address field.
151 : */
152 : struct packet_mreq_max {
153 : int mr_ifindex;
154 : unsigned short mr_type;
155 : unsigned short mr_alen;
156 : unsigned char mr_address[MAX_ADDR_LEN];
157 : };
158 :
159 : #ifdef CONFIG_PACKET_MMAP
160 : static int packet_set_ring(struct sock *sk, struct tpacket_req *req,
161 : int closing, int tx_ring);
162 :
163 : struct packet_ring_buffer {
164 : char **pg_vec;
165 : unsigned int head;
166 : unsigned int frames_per_block;
167 : unsigned int frame_size;
168 : unsigned int frame_max;
169 :
170 : unsigned int pg_vec_order;
171 : unsigned int pg_vec_pages;
172 : unsigned int pg_vec_len;
173 :
174 : atomic_t pending;
175 : };
176 :
177 : struct packet_sock;
178 : static int tpacket_snd(struct packet_sock *po, struct msghdr *msg);
179 : #endif
180 :
181 : static void packet_flush_mclist(struct sock *sk);
182 1 :
183 : struct packet_sock {
184 : /* struct sock has to be the first member of packet_sock */
185 : struct sock sk;
186 : struct tpacket_stats stats;
187 : #ifdef CONFIG_PACKET_MMAP
188 : struct packet_ring_buffer rx_ring;
189 : struct packet_ring_buffer tx_ring;
190 : int copy_thresh;
191 : #endif
192 : spinlock_t bind_lock;
193 : struct mutex pg_vec_lock;
194 : unsigned int running:1, /* prot_hook is attached*/
195 : auxdata:1,
196 : origdev:1;
197 : int ifindex; /* bound device */
198 : __be16 num;
199 : struct packet_mclist *mclist;
200 : #ifdef CONFIG_PACKET_MMAP
201 : atomic_t mapped;
202 : enum tpacket_versions tp_version;
203 : unsigned int tp_hdrlen;
204 : unsigned int tp_reserve;
205 : unsigned int tp_loss:1;
206 : #endif
207 : struct packet_type prot_hook ____cacheline_aligned_in_smp;
208 : };
209 2 :
210 : struct packet_skb_cb {
211 : unsigned int origlen;
212 : union {
213 : struct sockaddr_pkt pkt;
214 : struct sockaddr_ll ll;
215 : } sa;
216 : };
217 :
218 : #define PACKET_SKB_CB(__skb) ((struct packet_skb_cb *)((__skb)->cb))
219 :
220 : #ifdef CONFIG_PACKET_MMAP
221 :
222 : static void __packet_set_status(struct packet_sock *po, void *frame, int status)
223 : {
224 : union {
225 : struct tpacket_hdr *h1;
226 : struct tpacket2_hdr *h2;
227 : void *raw;
228 : } h;
229 :
230 : h.raw = frame;
231 : switch (po->tp_version) {
232 : case TPACKET_V1:
233 : h.h1->tp_status = status;
234 : flush_dcache_page(virt_to_page(&h.h1->tp_status));
235 : break;
236 : case TPACKET_V2:
237 : h.h2->tp_status = status;
238 : flush_dcache_page(virt_to_page(&h.h2->tp_status));
239 : break;
240 : default:
241 : pr_err("TPACKET version not supported\n");
242 : BUG();
243 : }
244 :
245 : smp_wmb();
246 : }
247 :
248 : static int __packet_get_status(struct packet_sock *po, void *frame)
249 : {
250 : union {
251 : struct tpacket_hdr *h1;
252 : struct tpacket2_hdr *h2;
253 : void *raw;
254 : } h;
255 :
256 : smp_rmb();
257 :
258 : h.raw = frame;
259 : switch (po->tp_version) {
260 : case TPACKET_V1:
261 : flush_dcache_page(virt_to_page(&h.h1->tp_status));
262 : return h.h1->tp_status;
263 : case TPACKET_V2:
264 : flush_dcache_page(virt_to_page(&h.h2->tp_status));
265 : return h.h2->tp_status;
266 : default:
267 : pr_err("TPACKET version not supported\n");
268 : BUG();
269 : return 0;
270 : }
271 : }
272 :
273 : static void *packet_lookup_frame(struct packet_sock *po,
274 : struct packet_ring_buffer *rb,
275 : unsigned int position,
276 : int status)
277 : {
278 : unsigned int pg_vec_pos, frame_offset;
279 : union {
280 : struct tpacket_hdr *h1;
281 : struct tpacket2_hdr *h2;
282 : void *raw;
283 : } h;
284 :
285 : pg_vec_pos = position / rb->frames_per_block;
286 : frame_offset = position % rb->frames_per_block;
287 :
288 : h.raw = rb->pg_vec[pg_vec_pos] + (frame_offset * rb->frame_size);
289 :
290 : if (status != __packet_get_status(po, h.raw))
291 : return NULL;
292 :
293 : return h.raw;
294 : }
295 :
296 : static inline void *packet_current_frame(struct packet_sock *po,
297 : struct packet_ring_buffer *rb,
298 : int status)
299 : {
300 : return packet_lookup_frame(po, rb, rb->head, status);
301 : }
302 :
303 : static inline void *packet_previous_frame(struct packet_sock *po,
304 : struct packet_ring_buffer *rb,
305 : int status)
306 : {
307 : unsigned int previous = rb->head ? rb->head - 1 : rb->frame_max;
308 : return packet_lookup_frame(po, rb, previous, status);
309 : }
310 :
311 : static inline void packet_increment_head(struct packet_ring_buffer *buff)
312 : {
313 : buff->head = buff->head != buff->frame_max ? buff->head+1 : 0;
314 : }
315 :
316 : #endif
317 :
318 : static inline struct packet_sock *pkt_sk(struct sock *sk)
319 : {
320 10 : return (struct packet_sock *)sk;
321 : }
322 :
323 : static void packet_sock_destruct(struct sock *sk)
324 : {
325 0 : WARN_ON(atomic_read(&sk->sk_rmem_alloc));
326 0 : WARN_ON(atomic_read(&sk->sk_wmem_alloc));
327 0 :
328 0 : if (!sock_flag(sk, SOCK_DEAD)) {
329 0 : pr_err("Attempt to release alive packet socket: %p\n", sk);
330 0 : return;
331 0 : }
332 0 :
333 : sk_refcnt_debug_dec(sk);
334 : }
335 :
336 :
337 1 : static const struct proto_ops packet_ops;
338 :
339 1 : static const struct proto_ops packet_ops_spkt;
340 :
341 : static int packet_rcv_spkt(struct sk_buff *skb, struct net_device *dev,
342 : struct packet_type *pt, struct net_device *orig_dev)
343 : {
344 0 : struct sock *sk;
345 0 : struct sockaddr_pkt *spkt;
346 0 :
347 0 : /*
348 0 : * When we registered the protocol we saved the socket in the data
349 0 : * field for just this event.
350 0 : */
351 :
352 0 : sk = pt->af_packet_priv;
353 :
354 : /*
355 : * Yank back the headers [hope the device set this
356 : * right or kerboom...]
357 : *
358 : * Incoming packets have ll header pulled,
359 : * push it back.
360 : *
361 : * For outgoing ones skb->data == skb_mac_header(skb)
362 : * so that this procedure is noop.
363 : */
364 :
365 0 : if (skb->pkt_type == PACKET_LOOPBACK)
366 0 : goto out;
367 :
368 0 : if (!net_eq(dev_net(dev), sock_net(sk)))
369 0 : goto out;
370 :
371 0 : skb = skb_share_check(skb, GFP_ATOMIC);
372 0 : if (skb == NULL)
373 0 : goto oom;
374 :
375 : /* drop any routing info */
376 0 : skb_dst_drop(skb);
377 :
378 : /* drop conntrack reference */
379 0 : nf_reset(skb);
380 :
381 0 : spkt = &PACKET_SKB_CB(skb)->sa.pkt;
382 :
383 0 : skb_push(skb, skb->data - skb_mac_header(skb));
384 :
385 : /*
386 : * The SOCK_PACKET socket receives _all_ frames.
387 : */
388 :
389 0 : spkt->spkt_family = dev->type;
390 0 : strlcpy(spkt->spkt_device, dev->name, sizeof(spkt->spkt_device));
391 0 : spkt->spkt_protocol = skb->protocol;
392 :
393 : /*
394 : * Charge the memory to the socket. This is done specifically
395 : * to prevent sockets using all the memory up.
396 : */
397 :
398 0 : if (sock_queue_rcv_skb(sk, skb) == 0)
399 0 : return 0;
400 :
401 : out:
402 0 : kfree_skb(skb);
403 0 : oom:
404 0 : return 0;
405 : }
406 :
407 :
408 : /*
409 : * Output a raw packet to a device layer. This bypasses all the other
410 : * protocol layers and you must therefore supply it with a complete frame
411 : */
412 :
413 : static int packet_sendmsg_spkt(struct kiocb *iocb, struct socket *sock,
414 : struct msghdr *msg, size_t len)
415 : {
416 2 : struct sock *sk = sock->sk;
417 3 : struct sockaddr_pkt *saddr = (struct sockaddr_pkt *)msg->msg_name;
418 2 : struct sk_buff *skb = NULL;
419 1 : struct net_device *dev;
420 2 : __be16 proto = 0;
421 1 : int err;
422 1 :
423 1 : /*
424 1 : * Get and verify the address.
425 1 : */
426 :
427 2 : if (saddr) {
428 3 : if (msg->msg_namelen < sizeof(struct sockaddr))
429 1 : return -EINVAL;
430 2 : if (msg->msg_namelen == sizeof(struct sockaddr_pkt))
431 1 : proto = saddr->spkt_protocol;
432 : } else
433 1 : return -ENOTCONN; /* SOCK_PACKET must be sent giving an address */
434 :
435 : /*
436 : * Find the device first to size check it
437 : */
438 :
439 1 : saddr->spkt_device[13] = 0;
440 1 : retry:
441 2 : rcu_read_lock();
442 3 : dev = dev_get_by_name_rcu(sock_net(sk), saddr->spkt_device);
443 1 : err = -ENODEV;
444 2 : if (dev == NULL)
445 1 : goto out_unlock;
446 :
447 1 : err = -ENETDOWN;
448 2 : if (!(dev->flags & IFF_UP))
449 1 : goto out_unlock;
450 :
451 : /*
452 : * You may not queue a frame bigger than the mtu. This is the lowest level
453 : * raw protocol and you must do your own fragmentation at this level.
454 : */
455 :
456 1 : err = -EMSGSIZE;
457 3 : if (len > dev->mtu + dev->hard_header_len)
458 1 : goto out_unlock;
459 :
460 2 : if (!skb) {
461 3 : size_t reserved = LL_RESERVED_SPACE(dev);
462 8 : unsigned int hhlen = dev->header_ops ? dev->hard_header_len : 0;
463 :
464 2 : rcu_read_unlock();
465 1 : skb = sock_wmalloc(sk, len + reserved, 0, GFP_KERNEL);
466 2 : if (skb == NULL)
467 1 : return -ENOBUFS;
468 : /* FIXME: Save some space for broken drivers that write a hard
469 : * header at transmission time by themselves. PPP is the notable
470 : * one here. This should really be fixed at the driver level.
471 : */
472 2 : skb_reserve(skb, reserved);
473 2 : skb_reset_network_header(skb);
474 :
475 : /* Try to align data part correctly */
476 2 : if (hhlen) {
477 1 : skb->data -= hhlen;
478 1 : skb->tail -= hhlen;
479 2 : if (len < hhlen)
480 2 : skb_reset_network_header(skb);
481 : }
482 4 : err = memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len);
483 4 : if (err)
484 2 : goto out_free;
485 2 : goto retry;
486 : }
487 :
488 :
489 1 : skb->protocol = proto;
490 1 : skb->dev = dev;
491 1 : skb->priority = sk->sk_priority;
492 1 : skb->mark = sk->sk_mark;
493 :
494 1 : dev_queue_xmit(skb);
495 2 : rcu_read_unlock();
496 1 : return len;
497 1 :
498 : out_unlock:
499 2 : rcu_read_unlock();
500 : out_free:
501 4 : kfree_skb(skb);
502 3 : return err;
503 : }
504 :
505 : static inline unsigned int run_filter(struct sk_buff *skb, struct sock *sk,
506 : unsigned int res)
507 0 : {
508 0 : struct sk_filter *filter;
509 :
510 0 : rcu_read_lock_bh();
511 0 : filter = rcu_dereference(sk->sk_filter);
512 0 : if (filter != NULL)
513 0 : res = sk_run_filter(skb, filter->insns, filter->len);
514 0 : rcu_read_unlock_bh();
515 :
516 0 : return res;
517 : }
518 :
519 : /*
520 : This function makes lazy skb cloning in hope that most of packets
521 : are discarded by BPF.
522 :
523 : Note tricky part: we DO mangle shared skb! skb->data, skb->len
524 : and skb->cb are mangled. It works because (and until) packets
525 : falling here are owned by current CPU. Output packets are cloned
526 : by dev_queue_xmit_nit(), input packets are processed by net_bh
527 : sequencially, so that if we return skb to original state on exit,
528 : we will not harm anyone.
529 : */
530 :
531 : static int packet_rcv(struct sk_buff *skb, struct net_device *dev,
532 : struct packet_type *pt, struct net_device *orig_dev)
533 : {
534 0 : struct sock *sk;
535 0 : struct sockaddr_ll *sll;
536 0 : struct packet_sock *po;
537 0 : u8 *skb_head = skb->data;
538 0 : int skb_len = skb->len;
539 0 : unsigned int snaplen, res;
540 0 :
541 0 : if (skb->pkt_type == PACKET_LOOPBACK)
542 0 : goto drop;
543 0 :
544 0 : sk = pt->af_packet_priv;
545 0 : po = pkt_sk(sk);
546 0 :
547 0 : if (!net_eq(dev_net(dev), sock_net(sk)))
548 0 : goto drop;
549 0 :
550 0 : skb->dev = dev;
551 0 :
552 0 : if (dev->header_ops) {
553 0 : /* The device has an explicit notion of ll header,
554 0 : exported to higher levels.
555 0 :
556 : Otherwise, the device hides datails of it frame
557 : structure, so that corresponding packet head
558 : never delivered to user.
559 : */
560 0 : if (sk->sk_type != SOCK_DGRAM)
561 0 : skb_push(skb, skb->data - skb_mac_header(skb));
562 0 : else if (skb->pkt_type == PACKET_OUTGOING) {
563 : /* Special case: outgoing packets have ll header at head */
564 0 : skb_pull(skb, skb_network_offset(skb));
565 : }
566 : }
567 :
568 0 : snaplen = skb->len;
569 :
570 0 : res = run_filter(skb, sk, snaplen);
571 0 : if (!res)
572 0 : goto drop_n_restore;
573 0 : if (snaplen > res)
574 0 : snaplen = res;
575 :
576 0 : if (atomic_read(&sk->sk_rmem_alloc) + skb->truesize >=
577 : (unsigned)sk->sk_rcvbuf)
578 0 : goto drop_n_acct;
579 :
580 0 : if (skb_shared(skb)) {
581 0 : struct sk_buff *nskb = skb_clone(skb, GFP_ATOMIC);
582 0 : if (nskb == NULL)
583 0 : goto drop_n_acct;
584 :
585 0 : if (skb_head != skb->data) {
586 0 : skb->data = skb_head;
587 0 : skb->len = skb_len;
588 : }
589 0 : kfree_skb(skb);
590 0 : skb = nskb;
591 : }
592 :
593 : BUILD_BUG_ON(sizeof(*PACKET_SKB_CB(skb)) + MAX_ADDR_LEN - 8 >
594 : sizeof(skb->cb));
595 :
596 0 : sll = &PACKET_SKB_CB(skb)->sa.ll;
597 0 : sll->sll_family = AF_PACKET;
598 0 : sll->sll_hatype = dev->type;
599 0 : sll->sll_protocol = skb->protocol;
600 0 : sll->sll_pkttype = skb->pkt_type;
601 0 : if (unlikely(po->origdev))
602 0 : sll->sll_ifindex = orig_dev->ifindex;
603 : else
604 0 : sll->sll_ifindex = dev->ifindex;
605 :
606 0 : sll->sll_halen = dev_parse_header(skb, sll->sll_addr);
607 :
608 0 : PACKET_SKB_CB(skb)->origlen = skb->len;
609 :
610 0 : if (pskb_trim(skb, snaplen))
611 0 : goto drop_n_acct;
612 :
613 0 : skb_set_owner_r(skb, sk);
614 0 : skb->dev = NULL;
615 0 : skb_dst_drop(skb);
616 :
617 : /* drop conntrack reference */
618 0 : nf_reset(skb);
619 :
620 0 : spin_lock(&sk->sk_receive_queue.lock);
621 0 : po->stats.tp_packets++;
622 0 : skb->dropcount = atomic_read(&sk->sk_drops);
623 0 : __skb_queue_tail(&sk->sk_receive_queue, skb);
624 0 : spin_unlock(&sk->sk_receive_queue.lock);
625 0 : sk->sk_data_ready(sk, skb->len);
626 0 : return 0;
627 0 :
628 : drop_n_acct:
629 0 : po->stats.tp_drops = atomic_inc_return(&sk->sk_drops);
630 :
631 0 : drop_n_restore:
632 0 : if (skb_head != skb->data && skb_shared(skb)) {
633 0 : skb->data = skb_head;
634 0 : skb->len = skb_len;
635 : }
636 : drop:
637 0 : consume_skb(skb);
638 0 : return 0;
639 : }
640 :
641 0 : #ifdef CONFIG_PACKET_MMAP
642 : static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
643 : struct packet_type *pt, struct net_device *orig_dev)
644 : {
645 : struct sock *sk;
646 : struct packet_sock *po;
647 : struct sockaddr_ll *sll;
648 : union {
649 : struct tpacket_hdr *h1;
650 : struct tpacket2_hdr *h2;
651 : void *raw;
652 : } h;
653 : u8 *skb_head = skb->data;
654 : int skb_len = skb->len;
655 : unsigned int snaplen, res;
656 : unsigned long status = TP_STATUS_LOSING|TP_STATUS_USER;
657 : unsigned short macoff, netoff, hdrlen;
658 : struct sk_buff *copy_skb = NULL;
659 : struct timeval tv;
660 : struct timespec ts;
661 :
662 : if (skb->pkt_type == PACKET_LOOPBACK)
663 : goto drop;
664 :
665 : sk = pt->af_packet_priv;
666 : po = pkt_sk(sk);
667 :
668 : if (!net_eq(dev_net(dev), sock_net(sk)))
669 : goto drop;
670 :
671 : if (dev->header_ops) {
672 : if (sk->sk_type != SOCK_DGRAM)
673 : skb_push(skb, skb->data - skb_mac_header(skb));
674 : else if (skb->pkt_type == PACKET_OUTGOING) {
675 : /* Special case: outgoing packets have ll header at head */
676 : skb_pull(skb, skb_network_offset(skb));
677 : }
678 : }
679 :
680 : if (skb->ip_summed == CHECKSUM_PARTIAL)
681 : status |= TP_STATUS_CSUMNOTREADY;
682 :
683 : snaplen = skb->len;
684 :
685 : res = run_filter(skb, sk, snaplen);
686 : if (!res)
687 : goto drop_n_restore;
688 : if (snaplen > res)
689 : snaplen = res;
690 :
691 : if (sk->sk_type == SOCK_DGRAM) {
692 : macoff = netoff = TPACKET_ALIGN(po->tp_hdrlen) + 16 +
693 : po->tp_reserve;
694 : } else {
695 : unsigned maclen = skb_network_offset(skb);
696 : netoff = TPACKET_ALIGN(po->tp_hdrlen +
697 : (maclen < 16 ? 16 : maclen)) +
698 : po->tp_reserve;
699 : macoff = netoff - maclen;
700 : }
701 :
702 : if (macoff + snaplen > po->rx_ring.frame_size) {
703 : if (po->copy_thresh &&
704 : atomic_read(&sk->sk_rmem_alloc) + skb->truesize <
705 : (unsigned)sk->sk_rcvbuf) {
706 : if (skb_shared(skb)) {
707 : copy_skb = skb_clone(skb, GFP_ATOMIC);
708 : } else {
709 : copy_skb = skb_get(skb);
710 : skb_head = skb->data;
711 : }
712 : if (copy_skb)
713 : skb_set_owner_r(copy_skb, sk);
714 : }
715 : snaplen = po->rx_ring.frame_size - macoff;
716 : if ((int)snaplen < 0)
717 : snaplen = 0;
718 : }
719 :
720 : spin_lock(&sk->sk_receive_queue.lock);
721 : h.raw = packet_current_frame(po, &po->rx_ring, TP_STATUS_KERNEL);
722 : if (!h.raw)
723 : goto ring_is_full;
724 : packet_increment_head(&po->rx_ring);
725 : po->stats.tp_packets++;
726 : if (copy_skb) {
727 : status |= TP_STATUS_COPY;
728 : __skb_queue_tail(&sk->sk_receive_queue, copy_skb);
729 : }
730 : if (!po->stats.tp_drops)
731 : status &= ~TP_STATUS_LOSING;
732 : spin_unlock(&sk->sk_receive_queue.lock);
733 :
734 : skb_copy_bits(skb, 0, h.raw + macoff, snaplen);
735 :
736 : switch (po->tp_version) {
737 : case TPACKET_V1:
738 : h.h1->tp_len = skb->len;
739 : h.h1->tp_snaplen = snaplen;
740 : h.h1->tp_mac = macoff;
741 : h.h1->tp_net = netoff;
742 : if (skb->tstamp.tv64)
743 : tv = ktime_to_timeval(skb->tstamp);
744 : else
745 : do_gettimeofday(&tv);
746 : h.h1->tp_sec = tv.tv_sec;
747 : h.h1->tp_usec = tv.tv_usec;
748 : hdrlen = sizeof(*h.h1);
749 : break;
750 : case TPACKET_V2:
751 : h.h2->tp_len = skb->len;
752 : h.h2->tp_snaplen = snaplen;
753 : h.h2->tp_mac = macoff;
754 : h.h2->tp_net = netoff;
755 : if (skb->tstamp.tv64)
756 : ts = ktime_to_timespec(skb->tstamp);
757 : else
758 : getnstimeofday(&ts);
759 : h.h2->tp_sec = ts.tv_sec;
760 : h.h2->tp_nsec = ts.tv_nsec;
761 : h.h2->tp_vlan_tci = vlan_tx_tag_get(skb);
762 : h.h2->tp_padding = 0;
763 : hdrlen = sizeof(*h.h2);
764 : break;
765 : default:
766 : BUG();
767 : }
768 :
769 : sll = h.raw + TPACKET_ALIGN(hdrlen);
770 : sll->sll_halen = dev_parse_header(skb, sll->sll_addr);
771 : sll->sll_family = AF_PACKET;
772 : sll->sll_hatype = dev->type;
773 : sll->sll_protocol = skb->protocol;
774 : sll->sll_pkttype = skb->pkt_type;
775 : if (unlikely(po->origdev))
776 : sll->sll_ifindex = orig_dev->ifindex;
777 : else
778 : sll->sll_ifindex = dev->ifindex;
779 :
780 : __packet_set_status(po, h.raw, status);
781 : smp_mb();
782 : {
783 : struct page *p_start, *p_end;
784 : u8 *h_end = h.raw + macoff + snaplen - 1;
785 :
786 : p_start = virt_to_page(h.raw);
787 : p_end = virt_to_page(h_end);
788 : while (p_start <= p_end) {
789 : flush_dcache_page(p_start);
790 : p_start++;
791 : }
792 : }
793 :
794 : sk->sk_data_ready(sk, 0);
795 :
796 : drop_n_restore:
797 : if (skb_head != skb->data && skb_shared(skb)) {
798 : skb->data = skb_head;
799 : skb->len = skb_len;
800 : }
801 : drop:
802 : kfree_skb(skb);
803 : return 0;
804 :
805 : ring_is_full:
806 : po->stats.tp_drops++;
807 : spin_unlock(&sk->sk_receive_queue.lock);
808 :
809 : sk->sk_data_ready(sk, 0);
810 : kfree_skb(copy_skb);
811 : goto drop_n_restore;
812 : }
813 :
814 : static void tpacket_destruct_skb(struct sk_buff *skb)
815 : {
816 : struct packet_sock *po = pkt_sk(skb->sk);
817 : void *ph;
818 :
819 : BUG_ON(skb == NULL);
820 :
821 : if (likely(po->tx_ring.pg_vec)) {
822 : ph = skb_shinfo(skb)->destructor_arg;
823 : BUG_ON(__packet_get_status(po, ph) != TP_STATUS_SENDING);
824 : BUG_ON(atomic_read(&po->tx_ring.pending) == 0);
825 : atomic_dec(&po->tx_ring.pending);
826 : __packet_set_status(po, ph, TP_STATUS_AVAILABLE);
827 : }
828 :
829 : sock_wfree(skb);
830 : }
831 :
832 : static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb,
833 : void *frame, struct net_device *dev, int size_max,
834 : __be16 proto, unsigned char *addr)
835 : {
836 : union {
837 : struct tpacket_hdr *h1;
838 : struct tpacket2_hdr *h2;
839 : void *raw;
840 : } ph;
841 : int to_write, offset, len, tp_len, nr_frags, len_max;
842 : struct socket *sock = po->sk.sk_socket;
843 : struct page *page;
844 : void *data;
845 : int err;
846 :
847 : ph.raw = frame;
848 :
849 : skb->protocol = proto;
850 : skb->dev = dev;
851 : skb->priority = po->sk.sk_priority;
852 : skb->mark = po->sk.sk_mark;
853 : skb_shinfo(skb)->destructor_arg = ph.raw;
854 :
855 : switch (po->tp_version) {
856 : case TPACKET_V2:
857 : tp_len = ph.h2->tp_len;
858 : break;
859 : default:
860 : tp_len = ph.h1->tp_len;
861 : break;
862 : }
863 : if (unlikely(tp_len > size_max)) {
864 : pr_err("packet size is too long (%d > %d)\n", tp_len, size_max);
865 : return -EMSGSIZE;
866 : }
867 :
868 : skb_reserve(skb, LL_RESERVED_SPACE(dev));
869 : skb_reset_network_header(skb);
870 :
871 : data = ph.raw + po->tp_hdrlen - sizeof(struct sockaddr_ll);
872 : to_write = tp_len;
873 :
874 : if (sock->type == SOCK_DGRAM) {
875 : err = dev_hard_header(skb, dev, ntohs(proto), addr,
876 : NULL, tp_len);
877 : if (unlikely(err < 0))
878 : return -EINVAL;
879 : } else if (dev->hard_header_len) {
880 : /* net device doesn't like empty head */
881 : if (unlikely(tp_len <= dev->hard_header_len)) {
882 : pr_err("packet size is too short (%d < %d)\n",
883 : tp_len, dev->hard_header_len);
884 : return -EINVAL;
885 : }
886 :
887 : skb_push(skb, dev->hard_header_len);
888 : err = skb_store_bits(skb, 0, data,
889 : dev->hard_header_len);
890 : if (unlikely(err))
891 : return err;
892 :
893 : data += dev->hard_header_len;
894 : to_write -= dev->hard_header_len;
895 : }
896 :
897 : err = -EFAULT;
898 : page = virt_to_page(data);
899 : offset = offset_in_page(data);
900 : len_max = PAGE_SIZE - offset;
901 : len = ((to_write > len_max) ? len_max : to_write);
902 :
903 : skb->data_len = to_write;
904 : skb->len += to_write;
905 : skb->truesize += to_write;
906 : atomic_add(to_write, &po->sk.sk_wmem_alloc);
907 :
908 : while (likely(to_write)) {
909 : nr_frags = skb_shinfo(skb)->nr_frags;
910 :
911 : if (unlikely(nr_frags >= MAX_SKB_FRAGS)) {
912 : pr_err("Packet exceed the number of skb frags(%lu)\n",
913 : MAX_SKB_FRAGS);
914 : return -EFAULT;
915 : }
916 :
917 : flush_dcache_page(page);
918 : get_page(page);
919 : skb_fill_page_desc(skb,
920 : nr_frags,
921 : page++, offset, len);
922 : to_write -= len;
923 : offset = 0;
924 : len_max = PAGE_SIZE;
925 : len = ((to_write > len_max) ? len_max : to_write);
926 : }
927 :
928 : return tp_len;
929 : }
930 :
931 : static int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
932 : {
933 : struct socket *sock;
934 : struct sk_buff *skb;
935 : struct net_device *dev;
936 : __be16 proto;
937 : int ifindex, err, reserve = 0;
938 : void *ph;
939 : struct sockaddr_ll *saddr = (struct sockaddr_ll *)msg->msg_name;
940 : int tp_len, size_max;
941 : unsigned char *addr;
942 : int len_sum = 0;
943 : int status = 0;
944 :
945 : sock = po->sk.sk_socket;
946 :
947 : mutex_lock(&po->pg_vec_lock);
948 :
949 : err = -EBUSY;
950 : if (saddr == NULL) {
951 : ifindex = po->ifindex;
952 : proto = po->num;
953 : addr = NULL;
954 : } else {
955 : err = -EINVAL;
956 : if (msg->msg_namelen < sizeof(struct sockaddr_ll))
957 : goto out;
958 : if (msg->msg_namelen < (saddr->sll_halen
959 : + offsetof(struct sockaddr_ll,
960 : sll_addr)))
961 : goto out;
962 : ifindex = saddr->sll_ifindex;
963 : proto = saddr->sll_protocol;
964 : addr = saddr->sll_addr;
965 : }
966 :
967 : dev = dev_get_by_index(sock_net(&po->sk), ifindex);
968 : err = -ENXIO;
969 : if (unlikely(dev == NULL))
970 : goto out;
971 :
972 : reserve = dev->hard_header_len;
973 :
974 : err = -ENETDOWN;
975 : if (unlikely(!(dev->flags & IFF_UP)))
976 : goto out_put;
977 :
978 : size_max = po->tx_ring.frame_size
979 : - (po->tp_hdrlen - sizeof(struct sockaddr_ll));
980 :
981 : if (size_max > dev->mtu + reserve)
982 : size_max = dev->mtu + reserve;
983 :
984 : do {
985 : ph = packet_current_frame(po, &po->tx_ring,
986 : TP_STATUS_SEND_REQUEST);
987 :
988 : if (unlikely(ph == NULL)) {
989 : schedule();
990 : continue;
991 : }
992 :
993 : status = TP_STATUS_SEND_REQUEST;
994 : skb = sock_alloc_send_skb(&po->sk,
995 : LL_ALLOCATED_SPACE(dev)
996 : + sizeof(struct sockaddr_ll),
997 : 0, &err);
998 :
999 : if (unlikely(skb == NULL))
1000 : goto out_status;
1001 :
1002 : tp_len = tpacket_fill_skb(po, skb, ph, dev, size_max, proto,
1003 : addr);
1004 :
1005 : if (unlikely(tp_len < 0)) {
1006 : if (po->tp_loss) {
1007 : __packet_set_status(po, ph,
1008 : TP_STATUS_AVAILABLE);
1009 : packet_increment_head(&po->tx_ring);
1010 : kfree_skb(skb);
1011 : continue;
1012 : } else {
1013 : status = TP_STATUS_WRONG_FORMAT;
1014 : err = tp_len;
1015 : goto out_status;
1016 : }
1017 : }
1018 :
1019 : skb->destructor = tpacket_destruct_skb;
1020 : __packet_set_status(po, ph, TP_STATUS_SENDING);
1021 : atomic_inc(&po->tx_ring.pending);
1022 :
1023 : status = TP_STATUS_SEND_REQUEST;
1024 : err = dev_queue_xmit(skb);
1025 : if (unlikely(err > 0)) {
1026 : err = net_xmit_errno(err);
1027 : if (err && __packet_get_status(po, ph) ==
1028 : TP_STATUS_AVAILABLE) {
1029 : /* skb was destructed already */
1030 : skb = NULL;
1031 : goto out_status;
1032 : }
1033 : /*
1034 : * skb was dropped but not destructed yet;
1035 : * let's treat it like congestion or err < 0
1036 : */
1037 : err = 0;
1038 : }
1039 : packet_increment_head(&po->tx_ring);
1040 : len_sum += tp_len;
1041 : } while (likely((ph != NULL) ||
1042 : ((!(msg->msg_flags & MSG_DONTWAIT)) &&
1043 : (atomic_read(&po->tx_ring.pending))))
1044 : );
1045 :
1046 : err = len_sum;
1047 : goto out_put;
1048 :
1049 : out_status:
1050 : __packet_set_status(po, ph, status);
1051 : kfree_skb(skb);
1052 : out_put:
1053 : dev_put(dev);
1054 : out:
1055 : mutex_unlock(&po->pg_vec_lock);
1056 : return err;
1057 : }
1058 : #endif
1059 :
1060 : static int packet_snd(struct socket *sock,
1061 : struct msghdr *msg, size_t len)
1062 0 : {
1063 0 : struct sock *sk = sock->sk;
1064 0 : struct sockaddr_ll *saddr = (struct sockaddr_ll *)msg->msg_name;
1065 0 : struct sk_buff *skb;
1066 0 : struct net_device *dev;
1067 0 : __be16 proto;
1068 0 : unsigned char *addr;
1069 0 : int ifindex, err, reserve = 0;
1070 0 :
1071 0 : /*
1072 0 : * Get and verify the address.
1073 0 : */
1074 0 :
1075 0 : if (saddr == NULL) {
1076 0 : struct packet_sock *po = pkt_sk(sk);
1077 :
1078 0 : ifindex = po->ifindex;
1079 0 : proto = po->num;
1080 0 : addr = NULL;
1081 : } else {
1082 0 : err = -EINVAL;
1083 0 : if (msg->msg_namelen < sizeof(struct sockaddr_ll))
1084 0 : goto out;
1085 0 : if (msg->msg_namelen < (saddr->sll_halen + offsetof(struct sockaddr_ll, sll_addr)))
1086 0 : goto out;
1087 0 : ifindex = saddr->sll_ifindex;
1088 0 : proto = saddr->sll_protocol;
1089 0 : addr = saddr->sll_addr;
1090 : }
1091 :
1092 :
1093 0 : dev = dev_get_by_index(sock_net(sk), ifindex);
1094 0 : err = -ENXIO;
1095 0 : if (dev == NULL)
1096 0 : goto out_unlock;
1097 0 : if (sock->type == SOCK_RAW)
1098 0 : reserve = dev->hard_header_len;
1099 :
1100 0 : err = -ENETDOWN;
1101 0 : if (!(dev->flags & IFF_UP))
1102 0 : goto out_unlock;
1103 :
1104 0 : err = -EMSGSIZE;
1105 0 : if (len > dev->mtu+reserve)
1106 0 : goto out_unlock;
1107 :
1108 0 : skb = sock_alloc_send_skb(sk, len + LL_ALLOCATED_SPACE(dev),
1109 : msg->msg_flags & MSG_DONTWAIT, &err);
1110 0 : if (skb == NULL)
1111 0 : goto out_unlock;
1112 :
1113 0 : skb_reserve(skb, LL_RESERVED_SPACE(dev));
1114 0 : skb_reset_network_header(skb);
1115 :
1116 0 : err = -EINVAL;
1117 0 : if (sock->type == SOCK_DGRAM &&
1118 : dev_hard_header(skb, dev, ntohs(proto), addr, NULL, len) < 0)
1119 0 : goto out_free;
1120 :
1121 : /* Returns -EFAULT on error */
1122 0 : err = memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len);
1123 0 : if (err)
1124 0 : goto out_free;
1125 :
1126 0 : skb->protocol = proto;
1127 0 : skb->dev = dev;
1128 0 : skb->priority = sk->sk_priority;
1129 0 : skb->mark = sk->sk_mark;
1130 :
1131 : /*
1132 : * Now send it
1133 : */
1134 :
1135 0 : err = dev_queue_xmit(skb);
1136 0 : if (err > 0 && (err = net_xmit_errno(err)) != 0)
1137 0 : goto out_unlock;
1138 :
1139 0 : dev_put(dev);
1140 :
1141 0 : return len;
1142 0 :
1143 : out_free:
1144 0 : kfree_skb(skb);
1145 : out_unlock:
1146 0 : if (dev)
1147 0 : dev_put(dev);
1148 : out:
1149 0 : return err;
1150 : }
1151 :
1152 0 : static int packet_sendmsg(struct kiocb *iocb, struct socket *sock,
1153 : struct msghdr *msg, size_t len)
1154 : {
1155 0 : #ifdef CONFIG_PACKET_MMAP
1156 : struct sock *sk = sock->sk;
1157 : struct packet_sock *po = pkt_sk(sk);
1158 : if (po->tx_ring.pg_vec)
1159 : return tpacket_snd(po, msg);
1160 : else
1161 : #endif
1162 0 : return packet_snd(sock, msg, len);
1163 : }
1164 :
1165 : /*
1166 : * Close a PACKET socket. This is fairly simple. We immediately go
1167 : * to 'closed' state and remove our protocol entry in the device list.
1168 : */
1169 :
1170 : static int packet_release(struct socket *sock)
1171 : {
1172 2 : struct sock *sk = sock->sk;
1173 1 : struct packet_sock *po;
1174 1 : struct net *net;
1175 : #ifdef CONFIG_PACKET_MMAP
1176 : struct tpacket_req req;
1177 : #endif
1178 :
1179 2 : if (!sk)
1180 1 : return 0;
1181 :
1182 2 : net = sock_net(sk);
1183 2 : po = pkt_sk(sk);
1184 :
1185 1 : write_lock_bh(&net->packet.sklist_lock);
1186 3 : sk_del_node_init(sk);
1187 1 : sock_prot_inuse_add(net, sk->sk_prot, -1);
1188 1 : write_unlock_bh(&net->packet.sklist_lock);
1189 :
1190 : /*
1191 : * Unhook packet receive handler.
1192 : */
1193 :
1194 2 : if (po->running) {
1195 : /*
1196 : * Remove the protocol hook
1197 : */
1198 1 : dev_remove_pack(&po->prot_hook);
1199 1 : po->running = 0;
1200 1 : po->num = 0;
1201 2 : __sock_put(sk);
1202 : }
1203 :
1204 6 : packet_flush_mclist(sk);
1205 :
1206 : #ifdef CONFIG_PACKET_MMAP
1207 : memset(&req, 0, sizeof(req));
1208 :
1209 : if (po->rx_ring.pg_vec)
1210 : packet_set_ring(sk, &req, 1, 0);
1211 :
1212 : if (po->tx_ring.pg_vec)
1213 : packet_set_ring(sk, &req, 1, 1);
1214 : #endif
1215 :
1216 : /*
1217 : * Now the socket is dead. No more input will appear.
1218 : */
1219 :
1220 2 : sock_orphan(sk);
1221 1 : sock->sk = NULL;
1222 :
1223 : /* Purge queues */
1224 :
1225 1 : skb_queue_purge(&sk->sk_receive_queue);
1226 : sk_refcnt_debug_release(sk);
1227 :
1228 2 : sock_put(sk);
1229 1 : return 0;
1230 : }
1231 :
1232 : /*
1233 : * Attach a packet hook.
1234 : */
1235 :
1236 : static int packet_do_bind(struct sock *sk, struct net_device *dev, __be16 protocol)
1237 : {
1238 4 : struct packet_sock *po = pkt_sk(sk);
1239 1 : /*
1240 1 : * Detach an existing hook if present.
1241 : */
1242 :
1243 2 : lock_sock(sk);
1244 :
1245 2 : spin_lock(&po->bind_lock);
1246 2 : if (po->running) {
1247 2 : __sock_put(sk);
1248 1 : po->running = 0;
1249 1 : po->num = 0;
1250 2 : spin_unlock(&po->bind_lock);
1251 1 : dev_remove_pack(&po->prot_hook);
1252 2 : spin_lock(&po->bind_lock);
1253 : }
1254 :
1255 2 : po->num = protocol;
1256 2 : po->prot_hook.type = protocol;
1257 2 : po->prot_hook.dev = dev;
1258 :
1259 12 : po->ifindex = dev ? dev->ifindex : 0;
1260 :
1261 4 : if (protocol == 0)
1262 2 : goto out_unlock;
1263 :
1264 10 : if (!dev || (dev->flags & IFF_UP)) {
1265 2 : dev_add_pack(&po->prot_hook);
1266 4 : sock_hold(sk);
1267 1 : po->running = 1;
1268 : } else {
1269 2 : sk->sk_err = ENETDOWN;
1270 6 : if (!sock_flag(sk, SOCK_DEAD))
1271 2 : sk->sk_error_report(sk);
1272 : }
1273 :
1274 : out_unlock:
1275 8 : spin_unlock(&po->bind_lock);
1276 1 : release_sock(sk);
1277 3 : return 0;
1278 : }
1279 :
1280 : /*
1281 : * Bind a packet socket to a device
1282 : */
1283 :
1284 : static int packet_bind_spkt(struct socket *sock, struct sockaddr *uaddr,
1285 : int addr_len)
1286 1 : {
1287 2 : struct sock *sk = sock->sk;
1288 1 : char name[15];
1289 1 : struct net_device *dev;
1290 2 : int err = -ENODEV;
1291 1 :
1292 : /*
1293 : * Check legality
1294 : */
1295 :
1296 2 : if (addr_len != sizeof(struct sockaddr))
1297 1 : return -EINVAL;
1298 1 : strlcpy(name, uaddr->sa_data, sizeof(name));
1299 :
1300 3 : dev = dev_get_by_name(sock_net(sk), name);
1301 2 : if (dev) {
1302 5 : err = packet_do_bind(sk, dev, pkt_sk(sk)->num);
1303 2 : dev_put(dev);
1304 : }
1305 2 : return err;
1306 : }
1307 :
1308 : static int packet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
1309 : {
1310 0 : struct sockaddr_ll *sll = (struct sockaddr_ll *)uaddr;
1311 0 : struct sock *sk = sock->sk;
1312 0 : struct net_device *dev = NULL;
1313 0 : int err;
1314 0 :
1315 0 :
1316 0 : /*
1317 : * Check legality
1318 : */
1319 :
1320 0 : if (addr_len < sizeof(struct sockaddr_ll))
1321 0 : return -EINVAL;
1322 0 : if (sll->sll_family != AF_PACKET)
1323 0 : return -EINVAL;
1324 :
1325 0 : if (sll->sll_ifindex) {
1326 0 : err = -ENODEV;
1327 0 : dev = dev_get_by_index(sock_net(sk), sll->sll_ifindex);
1328 0 : if (dev == NULL)
1329 0 : goto out;
1330 : }
1331 0 : err = packet_do_bind(sk, dev, sll->sll_protocol ? : pkt_sk(sk)->num);
1332 0 : if (dev)
1333 0 : dev_put(dev);
1334 :
1335 : out:
1336 0 : return err;
1337 : }
1338 0 :
1339 1 : static struct proto packet_proto = {
1340 : .name = "PACKET",
1341 : .owner = THIS_MODULE,
1342 : .obj_size = sizeof(struct packet_sock),
1343 : };
1344 :
1345 : /*
1346 : * Create a packet of type SOCK_PACKET.
1347 : */
1348 :
1349 : static int packet_create(struct net *net, struct socket *sock, int protocol,
1350 : int kern)
1351 1 : {
1352 1 : struct sock *sk;
1353 1 : struct packet_sock *po;
1354 2 : __be16 proto = (__force __be16)protocol; /* weird, but documented */
1355 1 : int err;
1356 1 :
1357 4 : if (!capable(CAP_NET_RAW))
1358 1 : return -EPERM;
1359 6 : if (sock->type != SOCK_DGRAM && sock->type != SOCK_RAW &&
1360 : sock->type != SOCK_PACKET)
1361 1 : return -ESOCKTNOSUPPORT;
1362 :
1363 1 : sock->state = SS_UNCONNECTED;
1364 :
1365 1 : err = -ENOBUFS;
1366 1 : sk = sk_alloc(net, PF_PACKET, GFP_KERNEL, &packet_proto);
1367 2 : if (sk == NULL)
1368 1 : goto out;
1369 :
1370 1 : sock->ops = &packet_ops;
1371 3 : if (sock->type == SOCK_PACKET)
1372 1 : sock->ops = &packet_ops_spkt;
1373 :
1374 1 : sock_init_data(sock, sk);
1375 :
1376 2 : po = pkt_sk(sk);
1377 1 : sk->sk_family = PF_PACKET;
1378 1 : po->num = proto;
1379 :
1380 1 : sk->sk_destruct = packet_sock_destruct;
1381 : sk_refcnt_debug_inc(sk);
1382 :
1383 : /*
1384 : * Attach a protocol block
1385 : */
1386 :
1387 4 : spin_lock_init(&po->bind_lock);
1388 1 : mutex_init(&po->pg_vec_lock);
1389 1 : po->prot_hook.func = packet_rcv;
1390 :
1391 3 : if (sock->type == SOCK_PACKET)
1392 1 : po->prot_hook.func = packet_rcv_spkt;
1393 :
1394 1 : po->prot_hook.af_packet_priv = sk;
1395 :
1396 2 : if (proto) {
1397 1 : po->prot_hook.type = proto;
1398 1 : dev_add_pack(&po->prot_hook);
1399 2 : sock_hold(sk);
1400 1 : po->running = 1;
1401 : }
1402 :
1403 2 : write_lock_bh(&net->packet.sklist_lock);
1404 4 : sk_add_node(sk, &net->packet.sklist);
1405 1 : sock_prot_inuse_add(net, &packet_proto, 1);
1406 1 : write_unlock_bh(&net->packet.sklist_lock);
1407 1 : return 0;
1408 1 : out:
1409 1 : return err;
1410 : }
1411 :
1412 : /*
1413 : * Pull a packet from our receive queue and hand it to the user.
1414 : * If necessary we block.
1415 : */
1416 :
1417 : static int packet_recvmsg(struct kiocb *iocb, struct socket *sock,
1418 : struct msghdr *msg, size_t len, int flags)
1419 : {
1420 2 : struct sock *sk = sock->sk;
1421 1 : struct sk_buff *skb;
1422 1 : int copied, err;
1423 1 : struct sockaddr_ll *sll;
1424 1 :
1425 2 : err = -EINVAL;
1426 3 : if (flags & ~(MSG_PEEK|MSG_DONTWAIT|MSG_TRUNC|MSG_CMSG_COMPAT))
1427 2 : goto out;
1428 :
1429 : #if 0
1430 : /* What error should we return now? EUNATTACH? */
1431 : if (pkt_sk(sk)->ifindex < 0)
1432 : return -ENODEV;
1433 : #endif
1434 :
1435 : /*
1436 : * Call the generic datagram receiver. This handles all sorts
1437 : * of horrible races and re-entrancy so we can forget about it
1438 : * in the protocol layers.
1439 : *
1440 : * Now it will return ENETDOWN, if device have just gone down,
1441 : * but then it will block.
1442 : */
1443 :
1444 1 : skb = skb_recv_datagram(sk, flags, flags & MSG_DONTWAIT, &err);
1445 :
1446 : /*
1447 : * An error occurred so return it. Because skb_recv_datagram()
1448 : * handles the blocking we don't see and worry about blocking
1449 : * retries.
1450 : */
1451 :
1452 2 : if (skb == NULL)
1453 1 : goto out;
1454 :
1455 : /*
1456 : * If the address length field is there to be filled in, we fill
1457 : * it in now.
1458 : */
1459 :
1460 1 : sll = &PACKET_SKB_CB(skb)->sa.ll;
1461 3 : if (sock->type == SOCK_PACKET)
1462 1 : msg->msg_namelen = sizeof(struct sockaddr_pkt);
1463 : else
1464 2 : msg->msg_namelen = sll->sll_halen + offsetof(struct sockaddr_ll, sll_addr);
1465 :
1466 : /*
1467 : * You lose any data beyond the buffer you gave. If it worries a
1468 : * user program they can ask the device for its MTU anyway.
1469 : */
1470 :
1471 2 : copied = skb->len;
1472 2 : if (copied > len) {
1473 1 : copied = len;
1474 1 : msg->msg_flags |= MSG_TRUNC;
1475 : }
1476 :
1477 1 : err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
1478 2 : if (err)
1479 1 : goto out_free;
1480 :
1481 1 : sock_recv_ts_and_drops(msg, sk, skb);
1482 :
1483 3 : if (msg->msg_name)
1484 2 : memcpy(msg->msg_name, &PACKET_SKB_CB(skb)->sa,
1485 : msg->msg_namelen);
1486 :
1487 4 : if (pkt_sk(sk)->auxdata) {
1488 : struct tpacket_auxdata aux;
1489 :
1490 1 : aux.tp_status = TP_STATUS_USER;
1491 2 : if (skb->ip_summed == CHECKSUM_PARTIAL)
1492 1 : aux.tp_status |= TP_STATUS_CSUMNOTREADY;
1493 1 : aux.tp_len = PACKET_SKB_CB(skb)->origlen;
1494 1 : aux.tp_snaplen = skb->len;
1495 1 : aux.tp_mac = 0;
1496 3 : aux.tp_net = skb_network_offset(skb);
1497 2 : aux.tp_vlan_tci = vlan_tx_tag_get(skb);
1498 :
1499 1 : aux.tp_padding = 0;
1500 1 : put_cmsg(msg, SOL_PACKET, PACKET_AUXDATA, sizeof(aux), &aux);
1501 : }
1502 :
1503 : /*
1504 : * Free or return the buffer as appropriate. Again this
1505 : * hides all the races and re-entrancy issues from us.
1506 : */
1507 14 : err = (flags&MSG_TRUNC) ? skb->len : copied;
1508 2 :
1509 : out_free:
1510 3 : skb_free_datagram(sk, skb);
1511 : out:
1512 6 : return err;
1513 : }
1514 :
1515 : static int packet_getname_spkt(struct socket *sock, struct sockaddr *uaddr,
1516 : int *uaddr_len, int peer)
1517 : {
1518 1 : struct net_device *dev;
1519 2 : struct sock *sk = sock->sk;
1520 1 :
1521 3 : if (peer)
1522 1 : return -EOPNOTSUPP;
1523 :
1524 1 : uaddr->sa_family = AF_PACKET;
1525 2 : rcu_read_lock();
1526 5 : dev = dev_get_by_index_rcu(sock_net(sk), pkt_sk(sk)->ifindex);
1527 2 : if (dev)
1528 1 : strncpy(uaddr->sa_data, dev->name, 14);
1529 : else
1530 1 : memset(uaddr->sa_data, 0, 14);
1531 2 : rcu_read_unlock();
1532 1 : *uaddr_len = sizeof(*uaddr);
1533 :
1534 1 : return 0;
1535 : }
1536 :
1537 : static int packet_getname(struct socket *sock, struct sockaddr *uaddr,
1538 : int *uaddr_len, int peer)
1539 : {
1540 0 : struct net_device *dev;
1541 0 : struct sock *sk = sock->sk;
1542 0 : struct packet_sock *po = pkt_sk(sk);
1543 0 : DECLARE_SOCKADDR(struct sockaddr_ll *, sll, uaddr);
1544 0 :
1545 0 : if (peer)
1546 0 : return -EOPNOTSUPP;
1547 :
1548 0 : sll->sll_family = AF_PACKET;
1549 0 : sll->sll_ifindex = po->ifindex;
1550 0 : sll->sll_protocol = po->num;
1551 0 : sll->sll_pkttype = 0;
1552 0 : rcu_read_lock();
1553 0 : dev = dev_get_by_index_rcu(sock_net(sk), po->ifindex);
1554 0 : if (dev) {
1555 0 : sll->sll_hatype = dev->type;
1556 0 : sll->sll_halen = dev->addr_len;
1557 0 : memcpy(sll->sll_addr, dev->dev_addr, dev->addr_len);
1558 : } else {
1559 0 : sll->sll_hatype = 0; /* Bad: we have no ARPHRD_UNSPEC */
1560 0 : sll->sll_halen = 0;
1561 : }
1562 0 : rcu_read_unlock();
1563 0 : *uaddr_len = offsetof(struct sockaddr_ll, sll_addr) + sll->sll_halen;
1564 :
1565 0 : return 0;
1566 : }
1567 :
1568 : static int packet_dev_mc(struct net_device *dev, struct packet_mclist *i,
1569 : int what)
1570 3 : {
1571 3 : switch (i->type) {
1572 15 : case PACKET_MR_MULTICAST:
1573 9 : if (what > 0)
1574 12 : return dev_mc_add(dev, i->addr, i->alen, 0);
1575 3 : else
1576 9 : return dev_mc_delete(dev, i->addr, i->alen, 0);
1577 : break;
1578 15 : case PACKET_MR_PROMISC:
1579 6 : return dev_set_promiscuity(dev, what);
1580 3 : break;
1581 12 : case PACKET_MR_ALLMULTI:
1582 6 : return dev_set_allmulti(dev, what);
1583 3 : break;
1584 12 : case PACKET_MR_UNICAST:
1585 6 : if (what > 0)
1586 6 : return dev_unicast_add(dev, i->addr);
1587 : else
1588 6 : return dev_unicast_delete(dev, i->addr);
1589 : break;
1590 6 : default:
1591 6 : break;
1592 : }
1593 3 : return 0;
1594 : }
1595 :
1596 : static void packet_dev_mclist(struct net_device *dev, struct packet_mclist *i, int what)
1597 : {
1598 5 : for ( ; i; i = i->next) {
1599 5 : if (i->ifindex == dev->ifindex)
1600 3 : packet_dev_mc(dev, i, what);
1601 : }
1602 : }
1603 :
1604 : static int packet_mc_add(struct sock *sk, struct packet_mreq_max *mreq)
1605 1 : {
1606 0 : struct packet_sock *po = pkt_sk(sk);
1607 0 : struct packet_mclist *ml, *i;
1608 0 : struct net_device *dev;
1609 0 : int err;
1610 0 :
1611 0 : rtnl_lock();
1612 0 :
1613 0 : err = -ENODEV;
1614 0 : dev = __dev_get_by_index(sock_net(sk), mreq->mr_ifindex);
1615 0 : if (!dev)
1616 0 : goto done;
1617 :
1618 0 : err = -EINVAL;
1619 0 : if (mreq->mr_alen > dev->addr_len)
1620 0 : goto done;
1621 :
1622 0 : err = -ENOBUFS;
1623 0 : i = kmalloc(sizeof(*i), GFP_KERNEL);
1624 0 : if (i == NULL)
1625 0 : goto done;
1626 :
1627 0 : err = 0;
1628 0 : for (ml = po->mclist; ml; ml = ml->next) {
1629 0 : if (ml->ifindex == mreq->mr_ifindex &&
1630 0 : ml->type == mreq->mr_type &&
1631 : ml->alen == mreq->mr_alen &&
1632 : memcmp(ml->addr, mreq->mr_address, ml->alen) == 0) {
1633 0 : ml->count++;
1634 : /* Free the new element ... */
1635 0 : kfree(i);
1636 0 : goto done;
1637 : }
1638 : }
1639 :
1640 0 : i->type = mreq->mr_type;
1641 0 : i->ifindex = mreq->mr_ifindex;
1642 0 : i->alen = mreq->mr_alen;
1643 0 : memcpy(i->addr, mreq->mr_address, i->alen);
1644 0 : i->count = 1;
1645 0 : i->next = po->mclist;
1646 0 : po->mclist = i;
1647 0 : err = packet_dev_mc(dev, i, 1);
1648 0 : if (err) {
1649 0 : po->mclist = i->next;
1650 0 : kfree(i);
1651 : }
1652 :
1653 : done:
1654 0 : rtnl_unlock();
1655 0 : return err;
1656 : }
1657 :
1658 : static int packet_mc_drop(struct sock *sk, struct packet_mreq_max *mreq)
1659 : {
1660 0 : struct packet_mclist *ml, **mlp;
1661 0 :
1662 0 : rtnl_lock();
1663 0 :
1664 0 : for (mlp = &pkt_sk(sk)->mclist; (ml = *mlp) != NULL; mlp = &ml->next) {
1665 0 : if (ml->ifindex == mreq->mr_ifindex &&
1666 0 : ml->type == mreq->mr_type &&
1667 : ml->alen == mreq->mr_alen &&
1668 : memcmp(ml->addr, mreq->mr_address, ml->alen) == 0) {
1669 0 : if (--ml->count == 0) {
1670 : struct net_device *dev;
1671 0 : *mlp = ml->next;
1672 0 : dev = __dev_get_by_index(sock_net(sk), ml->ifindex);
1673 0 : if (dev)
1674 0 : packet_dev_mc(dev, ml, -1);
1675 0 : kfree(ml);
1676 : }
1677 0 : rtnl_unlock();
1678 0 : return 0;
1679 : }
1680 : }
1681 0 : rtnl_unlock();
1682 0 : return -EADDRNOTAVAIL;
1683 : }
1684 :
1685 : static void packet_flush_mclist(struct sock *sk)
1686 : {
1687 8 : struct packet_sock *po = pkt_sk(sk);
1688 2 : struct packet_mclist *ml;
1689 2 :
1690 8 : if (!po->mclist)
1691 4 : return;
1692 :
1693 2 : rtnl_lock();
1694 8 : while ((ml = po->mclist) != NULL) {
1695 2 : struct net_device *dev;
1696 2 :
1697 2 : po->mclist = ml->next;
1698 6 : dev = __dev_get_by_index(sock_net(sk), ml->ifindex);
1699 4 : if (dev != NULL)
1700 4 : packet_dev_mc(dev, ml, -1);
1701 4 : kfree(ml);
1702 : }
1703 6 : rtnl_unlock();
1704 2 : }
1705 :
1706 : static int
1707 : packet_setsockopt(struct socket *sock, int level, int optname, char __user *optval, unsigned int optlen)
1708 : {
1709 0 : struct sock *sk = sock->sk;
1710 0 : struct packet_sock *po = pkt_sk(sk);
1711 0 : int ret;
1712 0 :
1713 0 : if (level != SOL_PACKET)
1714 0 : return -ENOPROTOOPT;
1715 0 :
1716 0 : switch (optname) {
1717 0 : case PACKET_ADD_MEMBERSHIP:
1718 0 : case PACKET_DROP_MEMBERSHIP:
1719 0 : {
1720 0 : struct packet_mreq_max mreq;
1721 0 : int len = optlen;
1722 0 : memset(&mreq, 0, sizeof(mreq));
1723 0 : if (len < sizeof(struct packet_mreq))
1724 0 : return -EINVAL;
1725 0 : if (len > sizeof(mreq))
1726 0 : len = sizeof(mreq);
1727 0 : if (copy_from_user(&mreq, optval, len))
1728 0 : return -EFAULT;
1729 0 : if (len < (mreq.mr_alen + offsetof(struct packet_mreq, mr_address)))
1730 0 : return -EINVAL;
1731 0 : if (optname == PACKET_ADD_MEMBERSHIP)
1732 0 : ret = packet_mc_add(sk, &mreq);
1733 : else
1734 0 : ret = packet_mc_drop(sk, &mreq);
1735 0 : return ret;
1736 0 : }
1737 :
1738 : #ifdef CONFIG_PACKET_MMAP
1739 : case PACKET_RX_RING:
1740 : case PACKET_TX_RING:
1741 : {
1742 : struct tpacket_req req;
1743 :
1744 : if (optlen < sizeof(req))
1745 : return -EINVAL;
1746 : if (copy_from_user(&req, optval, sizeof(req)))
1747 : return -EFAULT;
1748 : return packet_set_ring(sk, &req, 0, optname == PACKET_TX_RING);
1749 : }
1750 : case PACKET_COPY_THRESH:
1751 : {
1752 : int val;
1753 :
1754 : if (optlen != sizeof(val))
1755 : return -EINVAL;
1756 : if (copy_from_user(&val, optval, sizeof(val)))
1757 : return -EFAULT;
1758 :
1759 : pkt_sk(sk)->copy_thresh = val;
1760 : return 0;
1761 : }
1762 : case PACKET_VERSION:
1763 : {
1764 : int val;
1765 :
1766 : if (optlen != sizeof(val))
1767 : return -EINVAL;
1768 : if (po->rx_ring.pg_vec || po->tx_ring.pg_vec)
1769 : return -EBUSY;
1770 : if (copy_from_user(&val, optval, sizeof(val)))
1771 : return -EFAULT;
1772 : switch (val) {
1773 : case TPACKET_V1:
1774 : case TPACKET_V2:
1775 : po->tp_version = val;
1776 : return 0;
1777 : default:
1778 : return -EINVAL;
1779 : }
1780 : }
1781 : case PACKET_RESERVE:
1782 : {
1783 : unsigned int val;
1784 :
1785 : if (optlen != sizeof(val))
1786 : return -EINVAL;
1787 : if (po->rx_ring.pg_vec || po->tx_ring.pg_vec)
1788 : return -EBUSY;
1789 : if (copy_from_user(&val, optval, sizeof(val)))
1790 : return -EFAULT;
1791 : po->tp_reserve = val;
1792 : return 0;
1793 : }
1794 : case PACKET_LOSS:
1795 : {
1796 : unsigned int val;
1797 :
1798 : if (optlen != sizeof(val))
1799 : return -EINVAL;
1800 : if (po->rx_ring.pg_vec || po->tx_ring.pg_vec)
1801 : return -EBUSY;
1802 : if (copy_from_user(&val, optval, sizeof(val)))
1803 : return -EFAULT;
1804 : po->tp_loss = !!val;
1805 : return 0;
1806 : }
1807 : #endif
1808 0 : case PACKET_AUXDATA:
1809 : {
1810 : int val;
1811 :
1812 0 : if (optlen < sizeof(val))
1813 0 : return -EINVAL;
1814 0 : if (copy_from_user(&val, optval, sizeof(val)))
1815 0 : return -EFAULT;
1816 :
1817 0 : po->auxdata = !!val;
1818 0 : return 0;
1819 0 : }
1820 0 : case PACKET_ORIGDEV:
1821 : {
1822 : int val;
1823 :
1824 0 : if (optlen < sizeof(val))
1825 0 : return -EINVAL;
1826 0 : if (copy_from_user(&val, optval, sizeof(val)))
1827 0 : return -EFAULT;
1828 :
1829 0 : po->origdev = !!val;
1830 0 : return 0;
1831 0 : }
1832 0 : default:
1833 0 : return -ENOPROTOOPT;
1834 0 : }
1835 : }
1836 :
1837 : static int packet_getsockopt(struct socket *sock, int level, int optname,
1838 : char __user *optval, int __user *optlen)
1839 : {
1840 0 : int len;
1841 0 : int val;
1842 0 : struct sock *sk = sock->sk;
1843 0 : struct packet_sock *po = pkt_sk(sk);
1844 0 : void *data;
1845 0 : struct tpacket_stats st;
1846 0 :
1847 0 : if (level != SOL_PACKET)
1848 0 : return -ENOPROTOOPT;
1849 0 :
1850 0 : if (get_user(len, optlen))
1851 0 : return -EFAULT;
1852 :
1853 0 : if (len < 0)
1854 0 : return -EINVAL;
1855 :
1856 : switch (optname) {
1857 0 : case PACKET_STATISTICS:
1858 0 : if (len > sizeof(struct tpacket_stats))
1859 0 : len = sizeof(struct tpacket_stats);
1860 0 : spin_lock_bh(&sk->sk_receive_queue.lock);
1861 0 : st = po->stats;
1862 0 : memset(&po->stats, 0, sizeof(st));
1863 0 : spin_unlock_bh(&sk->sk_receive_queue.lock);
1864 0 : st.tp_packets += st.tp_drops;
1865 :
1866 0 : data = &st;
1867 0 : break;
1868 0 : case PACKET_AUXDATA:
1869 0 : if (len > sizeof(int))
1870 0 : len = sizeof(int);
1871 0 : val = po->auxdata;
1872 :
1873 0 : data = &val;
1874 0 : break;
1875 0 : case PACKET_ORIGDEV:
1876 0 : if (len > sizeof(int))
1877 0 : len = sizeof(int);
1878 0 : val = po->origdev;
1879 :
1880 0 : data = &val;
1881 0 : break;
1882 0 : #ifdef CONFIG_PACKET_MMAP
1883 : case PACKET_VERSION:
1884 : if (len > sizeof(int))
1885 : len = sizeof(int);
1886 : val = po->tp_version;
1887 : data = &val;
1888 : break;
1889 : case PACKET_HDRLEN:
1890 : if (len > sizeof(int))
1891 : len = sizeof(int);
1892 : if (copy_from_user(&val, optval, len))
1893 : return -EFAULT;
1894 : switch (val) {
1895 : case TPACKET_V1:
1896 : val = sizeof(struct tpacket_hdr);
1897 : break;
1898 : case TPACKET_V2:
1899 : val = sizeof(struct tpacket2_hdr);
1900 : break;
1901 : default:
1902 : return -EINVAL;
1903 : }
1904 : data = &val;
1905 : break;
1906 : case PACKET_RESERVE:
1907 : if (len > sizeof(unsigned int))
1908 : len = sizeof(unsigned int);
1909 : val = po->tp_reserve;
1910 : data = &val;
1911 : break;
1912 : case PACKET_LOSS:
1913 : if (len > sizeof(unsigned int))
1914 : len = sizeof(unsigned int);
1915 : val = po->tp_loss;
1916 : data = &val;
1917 : break;
1918 : #endif
1919 0 : default:
1920 0 : return -ENOPROTOOPT;
1921 : }
1922 :
1923 0 : if (put_user(len, optlen))
1924 0 : return -EFAULT;
1925 0 : if (copy_to_user(optval, data, len))
1926 0 : return -EFAULT;
1927 0 : return 0;
1928 : }
1929 :
1930 :
1931 : static int packet_notifier(struct notifier_block *this, unsigned long msg, void *data)
1932 : {
1933 1 : struct sock *sk;
1934 1 : struct hlist_node *node;
1935 2 : struct net_device *dev = data;
1936 4 : struct net *net = dev_net(dev);
1937 1 :
1938 2 : read_lock(&net->packet.sklist_lock);
1939 13 : sk_for_each(sk, node, &net->packet.sklist) {
1940 9 : struct packet_sock *po = pkt_sk(sk);
1941 2 :
1942 1 : switch (msg) {
1943 4 : case NETDEV_UNREGISTER:
1944 3 : if (po->mclist)
1945 2 : packet_dev_mclist(dev, po->mclist, -1);
1946 : /* fallthrough */
1947 :
1948 3 : case NETDEV_DOWN:
1949 4 : if (dev->ifindex == po->ifindex) {
1950 6 : spin_lock(&po->bind_lock);
1951 2 : if (po->running) {
1952 1 : __dev_remove_pack(&po->prot_hook);
1953 2 : __sock_put(sk);
1954 1 : po->running = 0;
1955 1 : sk->sk_err = ENETDOWN;
1956 4 : if (!sock_flag(sk, SOCK_DEAD))
1957 2 : sk->sk_error_report(sk);
1958 : }
1959 4 : if (msg == NETDEV_UNREGISTER) {
1960 2 : po->ifindex = -1;
1961 2 : po->prot_hook.dev = NULL;
1962 : }
1963 4 : spin_unlock(&po->bind_lock);
1964 : }
1965 3 : break;
1966 4 : case NETDEV_UP:
1967 2 : spin_lock(&po->bind_lock);
1968 7 : if (dev->ifindex == po->ifindex && po->num &&
1969 : !po->running) {
1970 1 : dev_add_pack(&po->prot_hook);
1971 2 : sock_hold(sk);
1972 1 : po->running = 1;
1973 : }
1974 4 : spin_unlock(&po->bind_lock);
1975 1 : break;
1976 1 : }
1977 : }
1978 3 : read_unlock(&net->packet.sklist_lock);
1979 1 : return NOTIFY_DONE;
1980 : }
1981 :
1982 :
1983 : static int packet_ioctl(struct socket *sock, unsigned int cmd,
1984 : unsigned long arg)
1985 1 : {
1986 2 : struct sock *sk = sock->sk;
1987 1 :
1988 1 : switch (cmd) {
1989 4 : case SIOCOUTQ:
1990 1 : {
1991 4 : int amount = sk_wmem_alloc_get(sk);
1992 1 :
1993 11 : return put_user(amount, (int __user *)arg);
1994 3 : }
1995 4 : case SIOCINQ:
1996 1 : {
1997 1 : struct sk_buff *skb;
1998 2 : int amount = 0;
1999 :
2000 2 : spin_lock_bh(&sk->sk_receive_queue.lock);
2001 2 : skb = skb_peek(&sk->sk_receive_queue);
2002 2 : if (skb)
2003 2 : amount = skb->len;
2004 2 : spin_unlock_bh(&sk->sk_receive_queue.lock);
2005 10 : return put_user(amount, (int __user *)arg);
2006 2 : }
2007 3 : case SIOCGSTAMP:
2008 2 : return sock_get_timestamp(sk, (struct timeval __user *)arg);
2009 4 : case SIOCGSTAMPNS:
2010 2 : return sock_get_timestampns(sk, (struct timespec __user *)arg);
2011 1 :
2012 1 : #ifdef CONFIG_INET
2013 4 : case SIOCADDRT:
2014 4 : case SIOCDELRT:
2015 4 : case SIOCDARP:
2016 4 : case SIOCGARP:
2017 4 : case SIOCSARP:
2018 4 : case SIOCGIFADDR:
2019 4 : case SIOCSIFADDR:
2020 4 : case SIOCGIFBRDADDR:
2021 4 : case SIOCSIFBRDADDR:
2022 4 : case SIOCGIFNETMASK:
2023 4 : case SIOCSIFNETMASK:
2024 4 : case SIOCGIFDSTADDR:
2025 3 : case SIOCSIFDSTADDR:
2026 3 : case SIOCSIFFLAGS:
2027 6 : if (!net_eq(sock_net(sk), &init_net))
2028 1 : return -ENOIOCTLCMD;
2029 3 : return inet_dgram_ops.ioctl(sock, cmd, arg);
2030 1 : #endif
2031 :
2032 1 : default:
2033 2 : return -ENOIOCTLCMD;
2034 : }
2035 : return 0;
2036 : }
2037 :
2038 : #ifndef CONFIG_PACKET_MMAP
2039 : #define packet_mmap sock_no_mmap
2040 : #define packet_poll datagram_poll
2041 : #else
2042 :
2043 : static unsigned int packet_poll(struct file *file, struct socket *sock,
2044 : poll_table *wait)
2045 : {
2046 : struct sock *sk = sock->sk;
2047 : struct packet_sock *po = pkt_sk(sk);
2048 : unsigned int mask = datagram_poll(file, sock, wait);
2049 :
2050 : spin_lock_bh(&sk->sk_receive_queue.lock);
2051 : if (po->rx_ring.pg_vec) {
2052 : if (!packet_previous_frame(po, &po->rx_ring, TP_STATUS_KERNEL))
2053 : mask |= POLLIN | POLLRDNORM;
2054 : }
2055 : spin_unlock_bh(&sk->sk_receive_queue.lock);
2056 : spin_lock_bh(&sk->sk_write_queue.lock);
2057 : if (po->tx_ring.pg_vec) {
2058 : if (packet_current_frame(po, &po->tx_ring, TP_STATUS_AVAILABLE))
2059 : mask |= POLLOUT | POLLWRNORM;
2060 : }
2061 : spin_unlock_bh(&sk->sk_write_queue.lock);
2062 : return mask;
2063 : }
2064 :
2065 :
2066 : /* Dirty? Well, I still did not learn better way to account
2067 : * for user mmaps.
2068 : */
2069 :
2070 : static void packet_mm_open(struct vm_area_struct *vma)
2071 : {
2072 : struct file *file = vma->vm_file;
2073 : struct socket *sock = file->private_data;
2074 : struct sock *sk = sock->sk;
2075 :
2076 : if (sk)
2077 : atomic_inc(&pkt_sk(sk)->mapped);
2078 : }
2079 :
2080 : static void packet_mm_close(struct vm_area_struct *vma)
2081 : {
2082 : struct file *file = vma->vm_file;
2083 : struct socket *sock = file->private_data;
2084 : struct sock *sk = sock->sk;
2085 :
2086 : if (sk)
2087 : atomic_dec(&pkt_sk(sk)->mapped);
2088 : }
2089 :
2090 : static const struct vm_operations_struct packet_mmap_ops = {
2091 : .open = packet_mm_open,
2092 : .close = packet_mm_close,
2093 : };
2094 :
2095 : static void free_pg_vec(char **pg_vec, unsigned int order, unsigned int len)
2096 : {
2097 : int i;
2098 :
2099 : for (i = 0; i < len; i++) {
2100 : if (likely(pg_vec[i]))
2101 : free_pages((unsigned long) pg_vec[i], order);
2102 : }
2103 : kfree(pg_vec);
2104 : }
2105 :
2106 : static inline char *alloc_one_pg_vec_page(unsigned long order)
2107 : {
2108 : gfp_t gfp_flags = GFP_KERNEL | __GFP_COMP | __GFP_ZERO | __GFP_NOWARN;
2109 :
2110 : return (char *) __get_free_pages(gfp_flags, order);
2111 : }
2112 :
2113 : static char **alloc_pg_vec(struct tpacket_req *req, int order)
2114 : {
2115 : unsigned int block_nr = req->tp_block_nr;
2116 : char **pg_vec;
2117 : int i;
2118 :
2119 : pg_vec = kzalloc(block_nr * sizeof(char *), GFP_KERNEL);
2120 : if (unlikely(!pg_vec))
2121 : goto out;
2122 :
2123 : for (i = 0; i < block_nr; i++) {
2124 : pg_vec[i] = alloc_one_pg_vec_page(order);
2125 : if (unlikely(!pg_vec[i]))
2126 : goto out_free_pgvec;
2127 : }
2128 :
2129 : out:
2130 : return pg_vec;
2131 :
2132 : out_free_pgvec:
2133 : free_pg_vec(pg_vec, order, block_nr);
2134 : pg_vec = NULL;
2135 : goto out;
2136 : }
2137 :
2138 : static int packet_set_ring(struct sock *sk, struct tpacket_req *req,
2139 : int closing, int tx_ring)
2140 : {
2141 : char **pg_vec = NULL;
2142 : struct packet_sock *po = pkt_sk(sk);
2143 : int was_running, order = 0;
2144 : struct packet_ring_buffer *rb;
2145 : struct sk_buff_head *rb_queue;
2146 : __be16 num;
2147 : int err;
2148 :
2149 : rb = tx_ring ? &po->tx_ring : &po->rx_ring;
2150 : rb_queue = tx_ring ? &sk->sk_write_queue : &sk->sk_receive_queue;
2151 :
2152 : err = -EBUSY;
2153 : if (!closing) {
2154 : if (atomic_read(&po->mapped))
2155 : goto out;
2156 : if (atomic_read(&rb->pending))
2157 : goto out;
2158 : }
2159 :
2160 : if (req->tp_block_nr) {
2161 : /* Sanity tests and some calculations */
2162 : err = -EBUSY;
2163 : if (unlikely(rb->pg_vec))
2164 : goto out;
2165 :
2166 : switch (po->tp_version) {
2167 : case TPACKET_V1:
2168 : po->tp_hdrlen = TPACKET_HDRLEN;
2169 : break;
2170 : case TPACKET_V2:
2171 : po->tp_hdrlen = TPACKET2_HDRLEN;
2172 : break;
2173 : }
2174 :
2175 : err = -EINVAL;
2176 : if (unlikely((int)req->tp_block_size <= 0))
2177 : goto out;
2178 : if (unlikely(req->tp_block_size & (PAGE_SIZE - 1)))
2179 : goto out;
2180 : if (unlikely(req->tp_frame_size < po->tp_hdrlen +
2181 : po->tp_reserve))
2182 : goto out;
2183 : if (unlikely(req->tp_frame_size & (TPACKET_ALIGNMENT - 1)))
2184 : goto out;
2185 :
2186 : rb->frames_per_block = req->tp_block_size/req->tp_frame_size;
2187 : if (unlikely(rb->frames_per_block <= 0))
2188 : goto out;
2189 : if (unlikely((rb->frames_per_block * req->tp_block_nr) !=
2190 : req->tp_frame_nr))
2191 : goto out;
2192 :
2193 : err = -ENOMEM;
2194 : order = get_order(req->tp_block_size);
2195 : pg_vec = alloc_pg_vec(req, order);
2196 : if (unlikely(!pg_vec))
2197 : goto out;
2198 : }
2199 : /* Done */
2200 : else {
2201 : err = -EINVAL;
2202 : if (unlikely(req->tp_frame_nr))
2203 : goto out;
2204 : }
2205 :
2206 : lock_sock(sk);
2207 :
2208 : /* Detach socket from network */
2209 : spin_lock(&po->bind_lock);
2210 : was_running = po->running;
2211 : num = po->num;
2212 : if (was_running) {
2213 : __dev_remove_pack(&po->prot_hook);
2214 : po->num = 0;
2215 : po->running = 0;
2216 : __sock_put(sk);
2217 : }
2218 : spin_unlock(&po->bind_lock);
2219 :
2220 : synchronize_net();
2221 :
2222 : err = -EBUSY;
2223 : mutex_lock(&po->pg_vec_lock);
2224 : if (closing || atomic_read(&po->mapped) == 0) {
2225 : err = 0;
2226 : #define XC(a, b) ({ __typeof__ ((a)) __t; __t = (a); (a) = (b); __t; })
2227 : spin_lock_bh(&rb_queue->lock);
2228 : pg_vec = XC(rb->pg_vec, pg_vec);
2229 : rb->frame_max = (req->tp_frame_nr - 1);
2230 : rb->head = 0;
2231 : rb->frame_size = req->tp_frame_size;
2232 : spin_unlock_bh(&rb_queue->lock);
2233 :
2234 : order = XC(rb->pg_vec_order, order);
2235 : req->tp_block_nr = XC(rb->pg_vec_len, req->tp_block_nr);
2236 :
2237 : rb->pg_vec_pages = req->tp_block_size/PAGE_SIZE;
2238 : po->prot_hook.func = (po->rx_ring.pg_vec) ?
2239 : tpacket_rcv : packet_rcv;
2240 : skb_queue_purge(rb_queue);
2241 : #undef XC
2242 : if (atomic_read(&po->mapped))
2243 : pr_err("packet_mmap: vma is busy: %d\n",
2244 : atomic_read(&po->mapped));
2245 : }
2246 : mutex_unlock(&po->pg_vec_lock);
2247 :
2248 : spin_lock(&po->bind_lock);
2249 : if (was_running && !po->running) {
2250 : sock_hold(sk);
2251 : po->running = 1;
2252 : po->num = num;
2253 : dev_add_pack(&po->prot_hook);
2254 : }
2255 : spin_unlock(&po->bind_lock);
2256 :
2257 : release_sock(sk);
2258 :
2259 : if (pg_vec)
2260 : free_pg_vec(pg_vec, order, req->tp_block_nr);
2261 : out:
2262 : return err;
2263 : }
2264 :
2265 : static int packet_mmap(struct file *file, struct socket *sock,
2266 : struct vm_area_struct *vma)
2267 : {
2268 : struct sock *sk = sock->sk;
2269 : struct packet_sock *po = pkt_sk(sk);
2270 : unsigned long size, expected_size;
2271 : struct packet_ring_buffer *rb;
2272 : unsigned long start;
2273 : int err = -EINVAL;
2274 : int i;
2275 :
2276 : if (vma->vm_pgoff)
2277 : return -EINVAL;
2278 :
2279 : mutex_lock(&po->pg_vec_lock);
2280 :
2281 : expected_size = 0;
2282 : for (rb = &po->rx_ring; rb <= &po->tx_ring; rb++) {
2283 : if (rb->pg_vec) {
2284 : expected_size += rb->pg_vec_len
2285 : * rb->pg_vec_pages
2286 : * PAGE_SIZE;
2287 : }
2288 : }
2289 :
2290 : if (expected_size == 0)
2291 : goto out;
2292 :
2293 : size = vma->vm_end - vma->vm_start;
2294 : if (size != expected_size)
2295 : goto out;
2296 :
2297 : start = vma->vm_start;
2298 : for (rb = &po->rx_ring; rb <= &po->tx_ring; rb++) {
2299 : if (rb->pg_vec == NULL)
2300 : continue;
2301 :
2302 : for (i = 0; i < rb->pg_vec_len; i++) {
2303 : struct page *page = virt_to_page(rb->pg_vec[i]);
2304 : int pg_num;
2305 :
2306 : for (pg_num = 0; pg_num < rb->pg_vec_pages;
2307 : pg_num++, page++) {
2308 : err = vm_insert_page(vma, start, page);
2309 : if (unlikely(err))
2310 : goto out;
2311 : start += PAGE_SIZE;
2312 : }
2313 : }
2314 : }
2315 :
2316 : atomic_inc(&po->mapped);
2317 : vma->vm_ops = &packet_mmap_ops;
2318 : err = 0;
2319 :
2320 : out:
2321 : mutex_unlock(&po->pg_vec_lock);
2322 : return err;
2323 : }
2324 : #endif
2325 :
2326 :
2327 1 : static const struct proto_ops packet_ops_spkt = {
2328 : .family = PF_PACKET,
2329 : .owner = THIS_MODULE,
2330 : .release = packet_release,
2331 : .bind = packet_bind_spkt,
2332 : .connect = sock_no_connect,
2333 : .socketpair = sock_no_socketpair,
2334 : .accept = sock_no_accept,
2335 : .getname = packet_getname_spkt,
2336 : .poll = datagram_poll,
2337 : .ioctl = packet_ioctl,
2338 : .listen = sock_no_listen,
2339 : .shutdown = sock_no_shutdown,
2340 : .setsockopt = sock_no_setsockopt,
2341 : .getsockopt = sock_no_getsockopt,
2342 : .sendmsg = packet_sendmsg_spkt,
2343 : .recvmsg = packet_recvmsg,
2344 : .mmap = sock_no_mmap,
2345 : .sendpage = sock_no_sendpage,
2346 : };
2347 :
2348 1 : static const struct proto_ops packet_ops = {
2349 : .family = PF_PACKET,
2350 : .owner = THIS_MODULE,
2351 : .release = packet_release,
2352 : .bind = packet_bind,
2353 : .connect = sock_no_connect,
2354 : .socketpair = sock_no_socketpair,
2355 : .accept = sock_no_accept,
2356 : .getname = packet_getname,
2357 : .poll = packet_poll,
2358 : .ioctl = packet_ioctl,
2359 : .listen = sock_no_listen,
2360 : .shutdown = sock_no_shutdown,
2361 : .setsockopt = packet_setsockopt,
2362 : .getsockopt = packet_getsockopt,
2363 : .sendmsg = packet_sendmsg,
2364 : .recvmsg = packet_recvmsg,
2365 : .mmap = packet_mmap,
2366 : .sendpage = sock_no_sendpage,
2367 : };
2368 :
2369 1 : static const struct net_proto_family packet_family_ops = {
2370 : .family = PF_PACKET,
2371 : .create = packet_create,
2372 : .owner = THIS_MODULE,
2373 : };
2374 :
2375 1 : static struct notifier_block packet_netdev_notifier = {
2376 : .notifier_call = packet_notifier,
2377 : };
2378 :
2379 : #ifdef CONFIG_PROC_FS
2380 : static inline struct sock *packet_seq_idx(struct net *net, loff_t off)
2381 : {
2382 1 : struct sock *s;
2383 1 : struct hlist_node *node;
2384 1 :
2385 10 : sk_for_each(s, node, &net->packet.sklist) {
2386 6 : if (!off--)
2387 2 : return s;
2388 : }
2389 1 : return NULL;
2390 : }
2391 :
2392 : static void *packet_seq_start(struct seq_file *seq, loff_t *pos)
2393 : __acquires(seq_file_net(seq)->packet.sklist_lock)
2394 1 : {
2395 4 : struct net *net = seq_file_net(seq);
2396 2 : read_lock(&net->packet.sklist_lock);
2397 9 : return *pos ? packet_seq_idx(net, *pos - 1) : SEQ_START_TOKEN;
2398 : }
2399 :
2400 : static void *packet_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2401 : {
2402 4 : struct net *net = seq_file_net(seq);
2403 2 : ++*pos;
2404 12 : return (v == SEQ_START_TOKEN)
2405 1 : ? sk_head(&net->packet.sklist)
2406 1 : : sk_next((struct sock *)v) ;
2407 : }
2408 :
2409 : static void packet_seq_stop(struct seq_file *seq, void *v)
2410 : __releases(seq_file_net(seq)->packet.sklist_lock)
2411 1 : {
2412 4 : struct net *net = seq_file_net(seq);
2413 2 : read_unlock(&net->packet.sklist_lock);
2414 1 : }
2415 :
2416 : static int packet_seq_show(struct seq_file *seq, void *v)
2417 : {
2418 3 : if (v == SEQ_START_TOKEN)
2419 2 : seq_puts(seq, "sk RefCnt Type Proto Iface R Rmem User Inode\n");
2420 1 : else {
2421 2 : struct sock *s = v;
2422 4 : const struct packet_sock *po = pkt_sk(s);
2423 1 :
2424 13 : seq_printf(seq,
2425 1 : "%p %-6d %-4d %04x %-5d %1d %-6u %-6u %-6lu\n",
2426 : s,
2427 : atomic_read(&s->sk_refcnt),
2428 : s->sk_type,
2429 : ntohs(po->num),
2430 : po->ifindex,
2431 : po->running,
2432 : atomic_read(&s->sk_rmem_alloc),
2433 : sock_i_uid(s),
2434 : sock_i_ino(s));
2435 : }
2436 :
2437 2 : return 0;
2438 : }
2439 :
2440 1 : static const struct seq_operations packet_seq_ops = {
2441 : .start = packet_seq_start,
2442 : .next = packet_seq_next,
2443 : .stop = packet_seq_stop,
2444 : .show = packet_seq_show,
2445 : };
2446 :
2447 : static int packet_seq_open(struct inode *inode, struct file *file)
2448 : {
2449 6 : return seq_open_net(inode, file, &packet_seq_ops,
2450 : sizeof(struct seq_net_private));
2451 : }
2452 :
2453 1 : static const struct file_operations packet_seq_fops = {
2454 : .owner = THIS_MODULE,
2455 : .open = packet_seq_open,
2456 : .read = seq_read,
2457 : .llseek = seq_lseek,
2458 : .release = seq_release_net,
2459 : };
2460 :
2461 : #endif
2462 :
2463 : static int packet_net_init(struct net *net)
2464 : {
2465 3 : rwlock_init(&net->packet.sklist_lock);
2466 2 : INIT_HLIST_HEAD(&net->packet.sklist);
2467 :
2468 3 : if (!proc_net_fops_create(net, "packet", 0, &packet_seq_fops))
2469 1 : return -ENOMEM;
2470 :
2471 1 : return 0;
2472 : }
2473 :
2474 : static void packet_net_exit(struct net *net)
2475 : {
2476 1 : proc_net_remove(net, "packet");
2477 1 : }
2478 :
2479 1 : static struct pernet_operations packet_net_ops = {
2480 : .init = packet_net_init,
2481 : .exit = packet_net_exit,
2482 : };
2483 :
2484 :
2485 : static void __exit packet_exit(void)
2486 : {
2487 4 : unregister_netdevice_notifier(&packet_netdev_notifier);
2488 2 : unregister_pernet_subsys(&packet_net_ops);
2489 2 : sock_unregister(PF_PACKET);
2490 2 : proto_unregister(&packet_proto);
2491 2 : }
2492 :
2493 : static int __init packet_init(void)
2494 : {
2495 3 : int rc = proto_register(&packet_proto, 0);
2496 1 :
2497 2 : if (rc != 0)
2498 1 : goto out;
2499 :
2500 1 : sock_register(&packet_family_ops);
2501 1 : register_pernet_subsys(&packet_net_ops);
2502 2 : register_netdevice_notifier(&packet_netdev_notifier);
2503 : out:
2504 3 : return rc;
2505 : }
2506 :
2507 : module_init(packet_init);
2508 : module_exit(packet_exit);
2509 1 : MODULE_LICENSE("GPL");
2510 : MODULE_ALIAS_NETPROTO(PF_PACKET);
|