279 ARPHRD_SKIP, ARPHRD_LOOPBACK, ARPHRD_LOCALTLK, ARPHRD_FDDI, |
265 ARPHRD_SKIP, ARPHRD_LOOPBACK, ARPHRD_LOCALTLK, ARPHRD_FDDI, |
280 ARPHRD_BIF, ARPHRD_SIT, ARPHRD_IPDDP, ARPHRD_IPGRE, |
266 ARPHRD_BIF, ARPHRD_SIT, ARPHRD_IPDDP, ARPHRD_IPGRE, |
281 ARPHRD_PIMREG, ARPHRD_HIPPI, ARPHRD_ASH, ARPHRD_ECONET, |
267 ARPHRD_PIMREG, ARPHRD_HIPPI, ARPHRD_ASH, ARPHRD_ECONET, |
282 ARPHRD_IRDA, ARPHRD_FCPP, ARPHRD_FCAL, ARPHRD_FCPL, |
268 ARPHRD_IRDA, ARPHRD_FCPP, ARPHRD_FCAL, ARPHRD_FCPL, |
283 ARPHRD_FCFABRIC, ARPHRD_IEEE802_TR, ARPHRD_IEEE80211, |
269 ARPHRD_FCFABRIC, ARPHRD_IEEE802_TR, ARPHRD_IEEE80211, |
284 ARPHRD_IEEE80211_PRISM, ARPHRD_IEEE80211_RADIOTAP, ARPHRD_VOID, |
270 ARPHRD_IEEE80211_PRISM, ARPHRD_IEEE80211_RADIOTAP, ARPHRD_PHONET, |
285 ARPHRD_NONE}; |
271 ARPHRD_PHONET_PIPE, ARPHRD_VOID, ARPHRD_NONE}; |
286 |
272 |
287 static const char *netdev_lock_name[] = |
273 static const char *netdev_lock_name[] = |
288 {"_xmit_NETROM", "_xmit_ETHER", "_xmit_EETHER", "_xmit_AX25", |
274 {"_xmit_NETROM", "_xmit_ETHER", "_xmit_EETHER", "_xmit_AX25", |
289 "_xmit_PRONET", "_xmit_CHAOS", "_xmit_IEEE802", "_xmit_ARCNET", |
275 "_xmit_PRONET", "_xmit_CHAOS", "_xmit_IEEE802", "_xmit_ARCNET", |
290 "_xmit_APPLETLK", "_xmit_DLCI", "_xmit_ATM", "_xmit_METRICOM", |
276 "_xmit_APPLETLK", "_xmit_DLCI", "_xmit_ATM", "_xmit_METRICOM", |
296 "_xmit_SKIP", "_xmit_LOOPBACK", "_xmit_LOCALTLK", "_xmit_FDDI", |
282 "_xmit_SKIP", "_xmit_LOOPBACK", "_xmit_LOCALTLK", "_xmit_FDDI", |
297 "_xmit_BIF", "_xmit_SIT", "_xmit_IPDDP", "_xmit_IPGRE", |
283 "_xmit_BIF", "_xmit_SIT", "_xmit_IPDDP", "_xmit_IPGRE", |
298 "_xmit_PIMREG", "_xmit_HIPPI", "_xmit_ASH", "_xmit_ECONET", |
284 "_xmit_PIMREG", "_xmit_HIPPI", "_xmit_ASH", "_xmit_ECONET", |
299 "_xmit_IRDA", "_xmit_FCPP", "_xmit_FCAL", "_xmit_FCPL", |
285 "_xmit_IRDA", "_xmit_FCPP", "_xmit_FCAL", "_xmit_FCPL", |
300 "_xmit_FCFABRIC", "_xmit_IEEE802_TR", "_xmit_IEEE80211", |
286 "_xmit_FCFABRIC", "_xmit_IEEE802_TR", "_xmit_IEEE80211", |
301 "_xmit_IEEE80211_PRISM", "_xmit_IEEE80211_RADIOTAP", "_xmit_VOID", |
287 "_xmit_IEEE80211_PRISM", "_xmit_IEEE80211_RADIOTAP", "_xmit_PHONET", |
302 "_xmit_NONE"}; |
288 "_xmit_PHONET_PIPE", "_xmit_VOID", "_xmit_NONE"}; |
303 |
289 |
304 static struct lock_class_key netdev_xmit_lock_key[ARRAY_SIZE(netdev_lock_type)]; |
290 static struct lock_class_key netdev_xmit_lock_key[ARRAY_SIZE(netdev_lock_type)]; |
305 static struct lock_class_key netdev_addr_lock_key[ARRAY_SIZE(netdev_lock_type)]; |
291 static struct lock_class_key netdev_addr_lock_key[ARRAY_SIZE(netdev_lock_type)]; |
306 |
292 |
307 static inline unsigned short netdev_lock_pos(unsigned short dev_type) |
293 static inline unsigned short netdev_lock_pos(unsigned short dev_type) |
1525 struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT); |
1528 struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT); |
1526 struct packet_type *ptype; |
1529 struct packet_type *ptype; |
1527 __be16 type = skb->protocol; |
1530 __be16 type = skb->protocol; |
1528 int err; |
1531 int err; |
1529 |
1532 |
1530 BUG_ON(skb_shinfo(skb)->frag_list); |
|
1531 |
|
1532 skb_reset_mac_header(skb); |
1533 skb_reset_mac_header(skb); |
1533 skb->mac_len = skb->network_header - skb->mac_header; |
1534 skb->mac_len = skb->network_header - skb->mac_header; |
1534 __skb_pull(skb, skb->mac_len); |
1535 __skb_pull(skb, skb->mac_len); |
1535 |
1536 |
1536 if (WARN_ON(skb->ip_summed != CHECKSUM_PARTIAL)) { |
1537 if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) { |
|
1538 struct net_device *dev = skb->dev; |
|
1539 struct ethtool_drvinfo info = {}; |
|
1540 |
|
1541 if (dev && dev->ethtool_ops && dev->ethtool_ops->get_drvinfo) |
|
1542 dev->ethtool_ops->get_drvinfo(dev, &info); |
|
1543 |
|
1544 WARN(1, "%s: caps=(0x%lx, 0x%lx) len=%d data_len=%d " |
|
1545 "ip_summed=%d", |
|
1546 info.driver, dev ? dev->features : 0L, |
|
1547 skb->sk ? skb->sk->sk_route_caps : 0L, |
|
1548 skb->len, skb->data_len, skb->ip_summed); |
|
1549 |
1537 if (skb_header_cloned(skb) && |
1550 if (skb_header_cloned(skb) && |
1538 (err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC))) |
1551 (err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC))) |
1539 return ERR_PTR(err); |
1552 return ERR_PTR(err); |
1540 } |
1553 } |
1541 |
1554 |
1747 } |
1763 } |
1748 |
1764 |
1749 static struct netdev_queue *dev_pick_tx(struct net_device *dev, |
1765 static struct netdev_queue *dev_pick_tx(struct net_device *dev, |
1750 struct sk_buff *skb) |
1766 struct sk_buff *skb) |
1751 { |
1767 { |
|
1768 const struct net_device_ops *ops = dev->netdev_ops; |
1752 u16 queue_index = 0; |
1769 u16 queue_index = 0; |
1753 |
1770 |
1754 if (dev->select_queue) |
1771 if (ops->ndo_select_queue) |
1755 queue_index = dev->select_queue(dev, skb); |
1772 queue_index = ops->ndo_select_queue(dev, skb); |
1756 else if (dev->real_num_tx_queues > 1) |
1773 else if (dev->real_num_tx_queues > 1) |
1757 queue_index = simple_tx_hash(dev, skb); |
1774 queue_index = simple_tx_hash(dev, skb); |
1758 |
1775 |
1759 skb_set_queue_mapping(skb, queue_index); |
1776 skb_set_queue_mapping(skb, queue_index); |
1760 return netdev_get_tx_queue(dev, queue_index); |
1777 return netdev_get_tx_queue(dev, queue_index); |
2323 __skb_unlink(skb, &queue->input_pkt_queue); |
2336 __skb_unlink(skb, &queue->input_pkt_queue); |
2324 kfree_skb(skb); |
2337 kfree_skb(skb); |
2325 } |
2338 } |
2326 } |
2339 } |
2327 |
2340 |
|
2341 static int napi_gro_complete(struct sk_buff *skb) |
|
2342 { |
|
2343 struct packet_type *ptype; |
|
2344 __be16 type = skb->protocol; |
|
2345 struct list_head *head = &ptype_base[ntohs(type) & PTYPE_HASH_MASK]; |
|
2346 int err = -ENOENT; |
|
2347 |
|
2348 if (NAPI_GRO_CB(skb)->count == 1) |
|
2349 goto out; |
|
2350 |
|
2351 rcu_read_lock(); |
|
2352 list_for_each_entry_rcu(ptype, head, list) { |
|
2353 if (ptype->type != type || ptype->dev || !ptype->gro_complete) |
|
2354 continue; |
|
2355 |
|
2356 err = ptype->gro_complete(skb); |
|
2357 break; |
|
2358 } |
|
2359 rcu_read_unlock(); |
|
2360 |
|
2361 if (err) { |
|
2362 WARN_ON(&ptype->list == head); |
|
2363 kfree_skb(skb); |
|
2364 return NET_RX_SUCCESS; |
|
2365 } |
|
2366 |
|
2367 out: |
|
2368 skb_shinfo(skb)->gso_size = 0; |
|
2369 __skb_push(skb, -skb_network_offset(skb)); |
|
2370 return netif_receive_skb(skb); |
|
2371 } |
|
2372 |
|
2373 void napi_gro_flush(struct napi_struct *napi) |
|
2374 { |
|
2375 struct sk_buff *skb, *next; |
|
2376 |
|
2377 for (skb = napi->gro_list; skb; skb = next) { |
|
2378 next = skb->next; |
|
2379 skb->next = NULL; |
|
2380 napi_gro_complete(skb); |
|
2381 } |
|
2382 |
|
2383 napi->gro_list = NULL; |
|
2384 } |
|
2385 EXPORT_SYMBOL(napi_gro_flush); |
|
2386 |
|
2387 int dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb) |
|
2388 { |
|
2389 struct sk_buff **pp = NULL; |
|
2390 struct packet_type *ptype; |
|
2391 __be16 type = skb->protocol; |
|
2392 struct list_head *head = &ptype_base[ntohs(type) & PTYPE_HASH_MASK]; |
|
2393 int count = 0; |
|
2394 int same_flow; |
|
2395 int mac_len; |
|
2396 int free; |
|
2397 |
|
2398 if (!(skb->dev->features & NETIF_F_GRO)) |
|
2399 goto normal; |
|
2400 |
|
2401 if (skb_is_gso(skb) || skb_shinfo(skb)->frag_list) |
|
2402 goto normal; |
|
2403 |
|
2404 rcu_read_lock(); |
|
2405 list_for_each_entry_rcu(ptype, head, list) { |
|
2406 struct sk_buff *p; |
|
2407 |
|
2408 if (ptype->type != type || ptype->dev || !ptype->gro_receive) |
|
2409 continue; |
|
2410 |
|
2411 skb_reset_network_header(skb); |
|
2412 mac_len = skb->network_header - skb->mac_header; |
|
2413 skb->mac_len = mac_len; |
|
2414 NAPI_GRO_CB(skb)->same_flow = 0; |
|
2415 NAPI_GRO_CB(skb)->flush = 0; |
|
2416 NAPI_GRO_CB(skb)->free = 0; |
|
2417 |
|
2418 for (p = napi->gro_list; p; p = p->next) { |
|
2419 count++; |
|
2420 |
|
2421 if (!NAPI_GRO_CB(p)->same_flow) |
|
2422 continue; |
|
2423 |
|
2424 if (p->mac_len != mac_len || |
|
2425 memcmp(skb_mac_header(p), skb_mac_header(skb), |
|
2426 mac_len)) |
|
2427 NAPI_GRO_CB(p)->same_flow = 0; |
|
2428 } |
|
2429 |
|
2430 pp = ptype->gro_receive(&napi->gro_list, skb); |
|
2431 break; |
|
2432 } |
|
2433 rcu_read_unlock(); |
|
2434 |
|
2435 if (&ptype->list == head) |
|
2436 goto normal; |
|
2437 |
|
2438 same_flow = NAPI_GRO_CB(skb)->same_flow; |
|
2439 free = NAPI_GRO_CB(skb)->free; |
|
2440 |
|
2441 if (pp) { |
|
2442 struct sk_buff *nskb = *pp; |
|
2443 |
|
2444 *pp = nskb->next; |
|
2445 nskb->next = NULL; |
|
2446 napi_gro_complete(nskb); |
|
2447 count--; |
|
2448 } |
|
2449 |
|
2450 if (same_flow) |
|
2451 goto ok; |
|
2452 |
|
2453 if (NAPI_GRO_CB(skb)->flush || count >= MAX_GRO_SKBS) { |
|
2454 __skb_push(skb, -skb_network_offset(skb)); |
|
2455 goto normal; |
|
2456 } |
|
2457 |
|
2458 NAPI_GRO_CB(skb)->count = 1; |
|
2459 skb_shinfo(skb)->gso_size = skb->len; |
|
2460 skb->next = napi->gro_list; |
|
2461 napi->gro_list = skb; |
|
2462 |
|
2463 ok: |
|
2464 return free; |
|
2465 |
|
2466 normal: |
|
2467 return -1; |
|
2468 } |
|
2469 EXPORT_SYMBOL(dev_gro_receive); |
|
2470 |
|
2471 static int __napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb) |
|
2472 { |
|
2473 struct sk_buff *p; |
|
2474 |
|
2475 for (p = napi->gro_list; p; p = p->next) { |
|
2476 NAPI_GRO_CB(p)->same_flow = 1; |
|
2477 NAPI_GRO_CB(p)->flush = 0; |
|
2478 } |
|
2479 |
|
2480 return dev_gro_receive(napi, skb); |
|
2481 } |
|
2482 |
|
2483 int napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb) |
|
2484 { |
|
2485 if (netpoll_receive_skb(skb)) |
|
2486 return NET_RX_DROP; |
|
2487 |
|
2488 switch (__napi_gro_receive(napi, skb)) { |
|
2489 case -1: |
|
2490 return netif_receive_skb(skb); |
|
2491 |
|
2492 case 1: |
|
2493 kfree_skb(skb); |
|
2494 break; |
|
2495 } |
|
2496 |
|
2497 return NET_RX_SUCCESS; |
|
2498 } |
|
2499 EXPORT_SYMBOL(napi_gro_receive); |
|
2500 |
|
2501 void napi_reuse_skb(struct napi_struct *napi, struct sk_buff *skb) |
|
2502 { |
|
2503 __skb_pull(skb, skb_headlen(skb)); |
|
2504 skb_reserve(skb, NET_IP_ALIGN - skb_headroom(skb)); |
|
2505 |
|
2506 napi->skb = skb; |
|
2507 } |
|
2508 EXPORT_SYMBOL(napi_reuse_skb); |
|
2509 |
|
2510 struct sk_buff *napi_fraginfo_skb(struct napi_struct *napi, |
|
2511 struct napi_gro_fraginfo *info) |
|
2512 { |
|
2513 struct net_device *dev = napi->dev; |
|
2514 struct sk_buff *skb = napi->skb; |
|
2515 |
|
2516 napi->skb = NULL; |
|
2517 |
|
2518 if (!skb) { |
|
2519 skb = netdev_alloc_skb(dev, GRO_MAX_HEAD + NET_IP_ALIGN); |
|
2520 if (!skb) |
|
2521 goto out; |
|
2522 |
|
2523 skb_reserve(skb, NET_IP_ALIGN); |
|
2524 } |
|
2525 |
|
2526 BUG_ON(info->nr_frags > MAX_SKB_FRAGS); |
|
2527 skb_shinfo(skb)->nr_frags = info->nr_frags; |
|
2528 memcpy(skb_shinfo(skb)->frags, info->frags, sizeof(info->frags)); |
|
2529 |
|
2530 skb->data_len = info->len; |
|
2531 skb->len += info->len; |
|
2532 skb->truesize += info->len; |
|
2533 |
|
2534 if (!pskb_may_pull(skb, ETH_HLEN)) { |
|
2535 napi_reuse_skb(napi, skb); |
|
2536 skb = NULL; |
|
2537 goto out; |
|
2538 } |
|
2539 |
|
2540 skb->protocol = eth_type_trans(skb, dev); |
|
2541 |
|
2542 skb->ip_summed = info->ip_summed; |
|
2543 skb->csum = info->csum; |
|
2544 |
|
2545 out: |
|
2546 return skb; |
|
2547 } |
|
2548 EXPORT_SYMBOL(napi_fraginfo_skb); |
|
2549 |
|
2550 int napi_gro_frags(struct napi_struct *napi, struct napi_gro_fraginfo *info) |
|
2551 { |
|
2552 struct sk_buff *skb = napi_fraginfo_skb(napi, info); |
|
2553 int err = NET_RX_DROP; |
|
2554 |
|
2555 if (!skb) |
|
2556 goto out; |
|
2557 |
|
2558 if (netpoll_receive_skb(skb)) |
|
2559 goto out; |
|
2560 |
|
2561 err = NET_RX_SUCCESS; |
|
2562 |
|
2563 switch (__napi_gro_receive(napi, skb)) { |
|
2564 case -1: |
|
2565 return netif_receive_skb(skb); |
|
2566 |
|
2567 case 0: |
|
2568 goto out; |
|
2569 } |
|
2570 |
|
2571 napi_reuse_skb(napi, skb); |
|
2572 |
|
2573 out: |
|
2574 return err; |
|
2575 } |
|
2576 EXPORT_SYMBOL(napi_gro_frags); |
|
2577 |
2328 static int process_backlog(struct napi_struct *napi, int quota) |
2578 static int process_backlog(struct napi_struct *napi, int quota) |
2329 { |
2579 { |
2330 int work = 0; |
2580 int work = 0; |
2331 struct softnet_data *queue = &__get_cpu_var(softnet_data); |
2581 struct softnet_data *queue = &__get_cpu_var(softnet_data); |
2332 unsigned long start_time = jiffies; |
2582 unsigned long start_time = jiffies; |
2365 __raise_softirq_irqoff(NET_RX_SOFTIRQ); |
2617 __raise_softirq_irqoff(NET_RX_SOFTIRQ); |
2366 local_irq_restore(flags); |
2618 local_irq_restore(flags); |
2367 } |
2619 } |
2368 EXPORT_SYMBOL(__napi_schedule); |
2620 EXPORT_SYMBOL(__napi_schedule); |
2369 |
2621 |
|
2622 void __napi_complete(struct napi_struct *n) |
|
2623 { |
|
2624 BUG_ON(!test_bit(NAPI_STATE_SCHED, &n->state)); |
|
2625 BUG_ON(n->gro_list); |
|
2626 |
|
2627 list_del(&n->poll_list); |
|
2628 smp_mb__before_clear_bit(); |
|
2629 clear_bit(NAPI_STATE_SCHED, &n->state); |
|
2630 } |
|
2631 EXPORT_SYMBOL(__napi_complete); |
|
2632 |
|
2633 void napi_complete(struct napi_struct *n) |
|
2634 { |
|
2635 unsigned long flags; |
|
2636 |
|
2637 /* |
|
2638 * don't let napi dequeue from the cpu poll list |
|
2639 * just in case its running on a different cpu |
|
2640 */ |
|
2641 if (unlikely(test_bit(NAPI_STATE_NPSVC, &n->state))) |
|
2642 return; |
|
2643 |
|
2644 napi_gro_flush(n); |
|
2645 local_irq_save(flags); |
|
2646 __napi_complete(n); |
|
2647 local_irq_restore(flags); |
|
2648 } |
|
2649 EXPORT_SYMBOL(napi_complete); |
|
2650 |
|
2651 void netif_napi_add(struct net_device *dev, struct napi_struct *napi, |
|
2652 int (*poll)(struct napi_struct *, int), int weight) |
|
2653 { |
|
2654 INIT_LIST_HEAD(&napi->poll_list); |
|
2655 napi->gro_list = NULL; |
|
2656 napi->skb = NULL; |
|
2657 napi->poll = poll; |
|
2658 napi->weight = weight; |
|
2659 list_add(&napi->dev_list, &dev->napi_list); |
|
2660 napi->dev = dev; |
|
2661 #ifdef CONFIG_NETPOLL |
|
2662 spin_lock_init(&napi->poll_lock); |
|
2663 napi->poll_owner = -1; |
|
2664 #endif |
|
2665 set_bit(NAPI_STATE_SCHED, &napi->state); |
|
2666 } |
|
2667 EXPORT_SYMBOL(netif_napi_add); |
|
2668 |
|
2669 void netif_napi_del(struct napi_struct *napi) |
|
2670 { |
|
2671 struct sk_buff *skb, *next; |
|
2672 |
|
2673 list_del_init(&napi->dev_list); |
|
2674 kfree(napi->skb); |
|
2675 |
|
2676 for (skb = napi->gro_list; skb; skb = next) { |
|
2677 next = skb->next; |
|
2678 skb->next = NULL; |
|
2679 kfree_skb(skb); |
|
2680 } |
|
2681 |
|
2682 napi->gro_list = NULL; |
|
2683 } |
|
2684 EXPORT_SYMBOL(netif_napi_del); |
|
2685 |
2370 |
2686 |
2371 static void net_rx_action(struct softirq_action *h) |
2687 static void net_rx_action(struct softirq_action *h) |
2372 { |
2688 { |
2373 struct list_head *list = &__get_cpu_var(softnet_data).poll_list; |
2689 struct list_head *list = &__get_cpu_var(softnet_data).poll_list; |
2374 unsigned long start_time = jiffies; |
2690 unsigned long time_limit = jiffies + 2; |
2375 int budget = netdev_budget; |
2691 int budget = netdev_budget; |
2376 void *have; |
2692 void *have; |
2377 |
2693 |
2378 local_irq_disable(); |
2694 local_irq_disable(); |
2379 |
2695 |
2380 while (!list_empty(list)) { |
2696 while (!list_empty(list)) { |
2381 struct napi_struct *n; |
2697 struct napi_struct *n; |
2382 int work, weight; |
2698 int work, weight; |
2383 |
2699 |
2384 /* If softirq window is exhuasted then punt. |
2700 /* If softirq window is exhuasted then punt. |
2385 * |
2701 * Allow this to run for 2 jiffies since which will allow |
2386 * Note that this is a slight policy change from the |
2702 * an average latency of 1.5/HZ. |
2387 * previous NAPI code, which would allow up to 2 |
|
2388 * jiffies to pass before breaking out. The test |
|
2389 * used to be "jiffies - start_time > 1". |
|
2390 */ |
2703 */ |
2391 if (unlikely(budget <= 0 || jiffies != start_time)) |
2704 if (unlikely(budget <= 0 || time_after(jiffies, time_limit))) |
2392 goto softnet_break; |
2705 goto softnet_break; |
2393 |
2706 |
2394 local_irq_enable(); |
2707 local_irq_enable(); |
2395 |
2708 |
2396 /* Even though interrupts have been re-enabled, this |
2709 /* Even though interrupts have been re-enabled, this |
2983 } |
3266 } |
2984 if (dev->flags != old_flags) { |
3267 if (dev->flags != old_flags) { |
2985 printk(KERN_INFO "device %s %s promiscuous mode\n", |
3268 printk(KERN_INFO "device %s %s promiscuous mode\n", |
2986 dev->name, (dev->flags & IFF_PROMISC) ? "entered" : |
3269 dev->name, (dev->flags & IFF_PROMISC) ? "entered" : |
2987 "left"); |
3270 "left"); |
2988 if (audit_enabled) |
3271 if (audit_enabled) { |
|
3272 current_uid_gid(&uid, &gid); |
2989 audit_log(current->audit_context, GFP_ATOMIC, |
3273 audit_log(current->audit_context, GFP_ATOMIC, |
2990 AUDIT_ANOM_PROMISCUOUS, |
3274 AUDIT_ANOM_PROMISCUOUS, |
2991 "dev=%s prom=%d old_prom=%d auid=%u uid=%u gid=%u ses=%u", |
3275 "dev=%s prom=%d old_prom=%d auid=%u uid=%u gid=%u ses=%u", |
2992 dev->name, (dev->flags & IFF_PROMISC), |
3276 dev->name, (dev->flags & IFF_PROMISC), |
2993 (old_flags & IFF_PROMISC), |
3277 (old_flags & IFF_PROMISC), |
2994 audit_get_loginuid(current), |
3278 audit_get_loginuid(current), |
2995 current->uid, current->gid, |
3279 uid, gid, |
2996 audit_get_sessionid(current)); |
3280 audit_get_sessionid(current)); |
|
3281 } |
2997 |
3282 |
2998 dev_change_rx_flags(dev, IFF_PROMISC); |
3283 dev_change_rx_flags(dev, IFF_PROMISC); |
2999 } |
3284 } |
3000 return 0; |
3285 return 0; |
3001 } |
3286 } |
3073 * filtering it is put in promiscuous mode while unicast addresses |
3358 * filtering it is put in promiscuous mode while unicast addresses |
3074 * are present. |
3359 * are present. |
3075 */ |
3360 */ |
3076 void __dev_set_rx_mode(struct net_device *dev) |
3361 void __dev_set_rx_mode(struct net_device *dev) |
3077 { |
3362 { |
|
3363 const struct net_device_ops *ops = dev->netdev_ops; |
|
3364 |
3078 /* dev_open will call this function so the list will stay sane. */ |
3365 /* dev_open will call this function so the list will stay sane. */ |
3079 if (!(dev->flags&IFF_UP)) |
3366 if (!(dev->flags&IFF_UP)) |
3080 return; |
3367 return; |
3081 |
3368 |
3082 if (!netif_device_present(dev)) |
3369 if (!netif_device_present(dev)) |
3083 return; |
3370 return; |
3084 |
3371 |
3085 if (dev->set_rx_mode) |
3372 if (ops->ndo_set_rx_mode) |
3086 dev->set_rx_mode(dev); |
3373 ops->ndo_set_rx_mode(dev); |
3087 else { |
3374 else { |
3088 /* Unicast addresses changes may only happen under the rtnl, |
3375 /* Unicast addresses changes may only happen under the rtnl, |
3089 * therefore calling __dev_set_promiscuity here is safe. |
3376 * therefore calling __dev_set_promiscuity here is safe. |
3090 */ |
3377 */ |
3091 if (dev->uc_count > 0 && !dev->uc_promisc) { |
3378 if (dev->uc_count > 0 && !dev->uc_promisc) { |
3600 min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len)); |
3893 min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len)); |
3601 call_netdevice_notifiers(NETDEV_CHANGEADDR, dev); |
3894 call_netdevice_notifiers(NETDEV_CHANGEADDR, dev); |
3602 return 0; |
3895 return 0; |
3603 |
3896 |
3604 case SIOCSIFMAP: |
3897 case SIOCSIFMAP: |
3605 if (dev->set_config) { |
3898 if (ops->ndo_set_config) { |
3606 if (!netif_device_present(dev)) |
3899 if (!netif_device_present(dev)) |
3607 return -ENODEV; |
3900 return -ENODEV; |
3608 return dev->set_config(dev, &ifr->ifr_map); |
3901 return ops->ndo_set_config(dev, &ifr->ifr_map); |
3609 } |
3902 } |
3610 return -EOPNOTSUPP; |
3903 return -EOPNOTSUPP; |
3611 |
3904 |
3612 case SIOCADDMULTI: |
3905 case SIOCADDMULTI: |
3613 if ((!dev->set_multicast_list && !dev->set_rx_mode) || |
3906 if ((!ops->ndo_set_multicast_list && !ops->ndo_set_rx_mode) || |
3614 ifr->ifr_hwaddr.sa_family != AF_UNSPEC) |
3907 ifr->ifr_hwaddr.sa_family != AF_UNSPEC) |
3615 return -EINVAL; |
3908 return -EINVAL; |
3616 if (!netif_device_present(dev)) |
3909 if (!netif_device_present(dev)) |
3617 return -ENODEV; |
3910 return -ENODEV; |
3618 return dev_mc_add(dev, ifr->ifr_hwaddr.sa_data, |
3911 return dev_mc_add(dev, ifr->ifr_hwaddr.sa_data, |
3619 dev->addr_len, 1); |
3912 dev->addr_len, 1); |
3620 |
3913 |
3621 case SIOCDELMULTI: |
3914 case SIOCDELMULTI: |
3622 if ((!dev->set_multicast_list && !dev->set_rx_mode) || |
3915 if ((!ops->ndo_set_multicast_list && !ops->ndo_set_rx_mode) || |
3623 ifr->ifr_hwaddr.sa_family != AF_UNSPEC) |
3916 ifr->ifr_hwaddr.sa_family != AF_UNSPEC) |
3624 return -EINVAL; |
3917 return -EINVAL; |
3625 if (!netif_device_present(dev)) |
3918 if (!netif_device_present(dev)) |
3626 return -ENODEV; |
3919 return -ENODEV; |
3627 return dev_mc_delete(dev, ifr->ifr_hwaddr.sa_data, |
3920 return dev_mc_delete(dev, ifr->ifr_hwaddr.sa_data, |
3988 |
4280 |
3989 return features; |
4281 return features; |
3990 } |
4282 } |
3991 EXPORT_SYMBOL(netdev_fix_features); |
4283 EXPORT_SYMBOL(netdev_fix_features); |
3992 |
4284 |
|
4285 /* Some devices need to (re-)set their netdev_ops inside |
|
4286 * ->init() or similar. If that happens, we have to setup |
|
4287 * the compat pointers again. |
|
4288 */ |
|
4289 void netdev_resync_ops(struct net_device *dev) |
|
4290 { |
|
4291 #ifdef CONFIG_COMPAT_NET_DEV_OPS |
|
4292 const struct net_device_ops *ops = dev->netdev_ops; |
|
4293 |
|
4294 dev->init = ops->ndo_init; |
|
4295 dev->uninit = ops->ndo_uninit; |
|
4296 dev->open = ops->ndo_open; |
|
4297 dev->change_rx_flags = ops->ndo_change_rx_flags; |
|
4298 dev->set_rx_mode = ops->ndo_set_rx_mode; |
|
4299 dev->set_multicast_list = ops->ndo_set_multicast_list; |
|
4300 dev->set_mac_address = ops->ndo_set_mac_address; |
|
4301 dev->validate_addr = ops->ndo_validate_addr; |
|
4302 dev->do_ioctl = ops->ndo_do_ioctl; |
|
4303 dev->set_config = ops->ndo_set_config; |
|
4304 dev->change_mtu = ops->ndo_change_mtu; |
|
4305 dev->neigh_setup = ops->ndo_neigh_setup; |
|
4306 dev->tx_timeout = ops->ndo_tx_timeout; |
|
4307 dev->get_stats = ops->ndo_get_stats; |
|
4308 dev->vlan_rx_register = ops->ndo_vlan_rx_register; |
|
4309 dev->vlan_rx_add_vid = ops->ndo_vlan_rx_add_vid; |
|
4310 dev->vlan_rx_kill_vid = ops->ndo_vlan_rx_kill_vid; |
|
4311 #ifdef CONFIG_NET_POLL_CONTROLLER |
|
4312 dev->poll_controller = ops->ndo_poll_controller; |
|
4313 #endif |
|
4314 #endif |
|
4315 } |
|
4316 EXPORT_SYMBOL(netdev_resync_ops); |
|
4317 |
3993 /** |
4318 /** |
3994 * register_netdevice - register a network device |
4319 * register_netdevice - register a network device |
3995 * @dev: device to register |
4320 * @dev: device to register |
3996 * |
4321 * |
3997 * Take a completed network device structure and add it to the kernel |
4322 * Take a completed network device structure and add it to the kernel |
4010 int register_netdevice(struct net_device *dev) |
4335 int register_netdevice(struct net_device *dev) |
4011 { |
4336 { |
4012 struct hlist_head *head; |
4337 struct hlist_head *head; |
4013 struct hlist_node *p; |
4338 struct hlist_node *p; |
4014 int ret; |
4339 int ret; |
4015 struct net *net; |
4340 struct net *net = dev_net(dev); |
4016 |
4341 |
4017 BUG_ON(dev_boot_phase); |
4342 BUG_ON(dev_boot_phase); |
4018 ASSERT_RTNL(); |
4343 ASSERT_RTNL(); |
4019 |
4344 |
4020 might_sleep(); |
4345 might_sleep(); |
4021 |
4346 |
4022 /* When net_device's are persistent, this will be fatal. */ |
4347 /* When net_device's are persistent, this will be fatal. */ |
4023 BUG_ON(dev->reg_state != NETREG_UNINITIALIZED); |
4348 BUG_ON(dev->reg_state != NETREG_UNINITIALIZED); |
4024 BUG_ON(!dev_net(dev)); |
4349 BUG_ON(!net); |
4025 net = dev_net(dev); |
|
4026 |
4350 |
4027 spin_lock_init(&dev->addr_list_lock); |
4351 spin_lock_init(&dev->addr_list_lock); |
4028 netdev_set_addr_lockdep_class(dev); |
4352 netdev_set_addr_lockdep_class(dev); |
4029 netdev_init_queue_locks(dev); |
4353 netdev_init_queue_locks(dev); |
4030 |
4354 |
4031 dev->iflink = -1; |
4355 dev->iflink = -1; |
4032 |
4356 |
|
4357 #ifdef CONFIG_COMPAT_NET_DEV_OPS |
|
4358 /* Netdevice_ops API compatiability support. |
|
4359 * This is temporary until all network devices are converted. |
|
4360 */ |
|
4361 if (dev->netdev_ops) { |
|
4362 netdev_resync_ops(dev); |
|
4363 } else { |
|
4364 char drivername[64]; |
|
4365 pr_info("%s (%s): not using net_device_ops yet\n", |
|
4366 dev->name, netdev_drivername(dev, drivername, 64)); |
|
4367 |
|
4368 /* This works only because net_device_ops and the |
|
4369 compatiablity structure are the same. */ |
|
4370 dev->netdev_ops = (void *) &(dev->init); |
|
4371 } |
|
4372 #endif |
|
4373 |
4033 /* Init, if this function is available */ |
4374 /* Init, if this function is available */ |
4034 if (dev->init) { |
4375 if (dev->netdev_ops->ndo_init) { |
4035 ret = dev->init(dev); |
4376 ret = dev->netdev_ops->ndo_init(dev); |
4036 if (ret) { |
4377 if (ret) { |
4037 if (ret > 0) |
4378 if (ret > 0) |
4038 ret = -EIO; |
4379 ret = -EIO; |
4039 goto out; |
4380 goto out; |
4040 } |
4381 } |
4108 |
4449 |
4109 out: |
4450 out: |
4110 return ret; |
4451 return ret; |
4111 |
4452 |
4112 err_uninit: |
4453 err_uninit: |
4113 if (dev->uninit) |
4454 if (dev->netdev_ops->ndo_uninit) |
4114 dev->uninit(dev); |
4455 dev->netdev_ops->ndo_uninit(dev); |
4115 goto out; |
4456 goto out; |
4116 } |
4457 } |
|
4458 |
|
4459 /** |
|
4460 * init_dummy_netdev - init a dummy network device for NAPI |
|
4461 * @dev: device to init |
|
4462 * |
|
4463 * This takes a network device structure and initialize the minimum |
|
4464 * amount of fields so it can be used to schedule NAPI polls without |
|
4465 * registering a full blown interface. This is to be used by drivers |
|
4466 * that need to tie several hardware interfaces to a single NAPI |
|
4467 * poll scheduler due to HW limitations. |
|
4468 */ |
|
4469 int init_dummy_netdev(struct net_device *dev) |
|
4470 { |
|
4471 /* Clear everything. Note we don't initialize spinlocks |
|
4472 * are they aren't supposed to be taken by any of the |
|
4473 * NAPI code and this dummy netdev is supposed to be |
|
4474 * only ever used for NAPI polls |
|
4475 */ |
|
4476 memset(dev, 0, sizeof(struct net_device)); |
|
4477 |
|
4478 /* make sure we BUG if trying to hit standard |
|
4479 * register/unregister code path |
|
4480 */ |
|
4481 dev->reg_state = NETREG_DUMMY; |
|
4482 |
|
4483 /* initialize the ref count */ |
|
4484 atomic_set(&dev->refcnt, 1); |
|
4485 |
|
4486 /* NAPI wants this */ |
|
4487 INIT_LIST_HEAD(&dev->napi_list); |
|
4488 |
|
4489 /* a dummy interface is started by default */ |
|
4490 set_bit(__LINK_STATE_PRESENT, &dev->state); |
|
4491 set_bit(__LINK_STATE_START, &dev->state); |
|
4492 |
|
4493 return 0; |
|
4494 } |
|
4495 EXPORT_SYMBOL_GPL(init_dummy_netdev); |
|
4496 |
4117 |
4497 |
4118 /** |
4498 /** |
4119 * register_netdev - register a network device |
4499 * register_netdev - register a network device |
4120 * @dev: device to register |
4500 * @dev: device to register |
4121 * |
4501 * |
4594 netif_rx(skb); |
4996 netif_rx(skb); |
4595 |
4997 |
4596 return NOTIFY_OK; |
4998 return NOTIFY_OK; |
4597 } |
4999 } |
4598 |
5000 |
4599 #ifdef CONFIG_NET_DMA |
|
4600 /** |
|
4601 * net_dma_rebalance - try to maintain one DMA channel per CPU |
|
4602 * @net_dma: DMA client and associated data (lock, channels, channel_mask) |
|
4603 * |
|
4604 * This is called when the number of channels allocated to the net_dma client |
|
4605 * changes. The net_dma client tries to have one DMA channel per CPU. |
|
4606 */ |
|
4607 |
|
4608 static void net_dma_rebalance(struct net_dma *net_dma) |
|
4609 { |
|
4610 unsigned int cpu, i, n, chan_idx; |
|
4611 struct dma_chan *chan; |
|
4612 |
|
4613 if (cpus_empty(net_dma->channel_mask)) { |
|
4614 for_each_online_cpu(cpu) |
|
4615 rcu_assign_pointer(per_cpu(softnet_data, cpu).net_dma, NULL); |
|
4616 return; |
|
4617 } |
|
4618 |
|
4619 i = 0; |
|
4620 cpu = first_cpu(cpu_online_map); |
|
4621 |
|
4622 for_each_cpu_mask_nr(chan_idx, net_dma->channel_mask) { |
|
4623 chan = net_dma->channels[chan_idx]; |
|
4624 |
|
4625 n = ((num_online_cpus() / cpus_weight(net_dma->channel_mask)) |
|
4626 + (i < (num_online_cpus() % |
|
4627 cpus_weight(net_dma->channel_mask)) ? 1 : 0)); |
|
4628 |
|
4629 while(n) { |
|
4630 per_cpu(softnet_data, cpu).net_dma = chan; |
|
4631 cpu = next_cpu(cpu, cpu_online_map); |
|
4632 n--; |
|
4633 } |
|
4634 i++; |
|
4635 } |
|
4636 } |
|
4637 |
|
4638 /** |
|
4639 * netdev_dma_event - event callback for the net_dma_client |
|
4640 * @client: should always be net_dma_client |
|
4641 * @chan: DMA channel for the event |
|
4642 * @state: DMA state to be handled |
|
4643 */ |
|
4644 static enum dma_state_client |
|
4645 netdev_dma_event(struct dma_client *client, struct dma_chan *chan, |
|
4646 enum dma_state state) |
|
4647 { |
|
4648 int i, found = 0, pos = -1; |
|
4649 struct net_dma *net_dma = |
|
4650 container_of(client, struct net_dma, client); |
|
4651 enum dma_state_client ack = DMA_DUP; /* default: take no action */ |
|
4652 |
|
4653 spin_lock(&net_dma->lock); |
|
4654 switch (state) { |
|
4655 case DMA_RESOURCE_AVAILABLE: |
|
4656 for (i = 0; i < nr_cpu_ids; i++) |
|
4657 if (net_dma->channels[i] == chan) { |
|
4658 found = 1; |
|
4659 break; |
|
4660 } else if (net_dma->channels[i] == NULL && pos < 0) |
|
4661 pos = i; |
|
4662 |
|
4663 if (!found && pos >= 0) { |
|
4664 ack = DMA_ACK; |
|
4665 net_dma->channels[pos] = chan; |
|
4666 cpu_set(pos, net_dma->channel_mask); |
|
4667 net_dma_rebalance(net_dma); |
|
4668 } |
|
4669 break; |
|
4670 case DMA_RESOURCE_REMOVED: |
|
4671 for (i = 0; i < nr_cpu_ids; i++) |
|
4672 if (net_dma->channels[i] == chan) { |
|
4673 found = 1; |
|
4674 pos = i; |
|
4675 break; |
|
4676 } |
|
4677 |
|
4678 if (found) { |
|
4679 ack = DMA_ACK; |
|
4680 cpu_clear(pos, net_dma->channel_mask); |
|
4681 net_dma->channels[i] = NULL; |
|
4682 net_dma_rebalance(net_dma); |
|
4683 } |
|
4684 break; |
|
4685 default: |
|
4686 break; |
|
4687 } |
|
4688 spin_unlock(&net_dma->lock); |
|
4689 |
|
4690 return ack; |
|
4691 } |
|
4692 |
|
4693 /** |
|
4694 * netdev_dma_register - register the networking subsystem as a DMA client |
|
4695 */ |
|
4696 static int __init netdev_dma_register(void) |
|
4697 { |
|
4698 net_dma.channels = kzalloc(nr_cpu_ids * sizeof(struct net_dma), |
|
4699 GFP_KERNEL); |
|
4700 if (unlikely(!net_dma.channels)) { |
|
4701 printk(KERN_NOTICE |
|
4702 "netdev_dma: no memory for net_dma.channels\n"); |
|
4703 return -ENOMEM; |
|
4704 } |
|
4705 spin_lock_init(&net_dma.lock); |
|
4706 dma_cap_set(DMA_MEMCPY, net_dma.client.cap_mask); |
|
4707 dma_async_client_register(&net_dma.client); |
|
4708 dma_async_client_chan_request(&net_dma.client); |
|
4709 return 0; |
|
4710 } |
|
4711 |
|
4712 #else |
|
4713 static int __init netdev_dma_register(void) { return -ENODEV; } |
|
4714 #endif /* CONFIG_NET_DMA */ |
|
4715 |
5001 |
4716 /** |
5002 /** |
4717 * netdev_increment_features - increment feature set by one |
5003 * netdev_increment_features - increment feature set by one |
4718 * @all: current feature set |
5004 * @all: current feature set |
4719 * @one: new feature set |
5005 * @one: new feature set |
4827 .exit = netdev_exit, |
5113 .exit = netdev_exit, |
4828 }; |
5114 }; |
4829 |
5115 |
4830 static void __net_exit default_device_exit(struct net *net) |
5116 static void __net_exit default_device_exit(struct net *net) |
4831 { |
5117 { |
4832 struct net_device *dev, *next; |
5118 struct net_device *dev; |
4833 /* |
5119 /* |
4834 * Push all migratable of the network devices back to the |
5120 * Push all migratable of the network devices back to the |
4835 * initial network namespace |
5121 * initial network namespace |
4836 */ |
5122 */ |
4837 rtnl_lock(); |
5123 rtnl_lock(); |
4838 for_each_netdev_safe(net, dev, next) { |
5124 restart: |
|
5125 for_each_netdev(net, dev) { |
4839 int err; |
5126 int err; |
4840 char fb_name[IFNAMSIZ]; |
5127 char fb_name[IFNAMSIZ]; |
4841 |
5128 |
4842 /* Ignore unmoveable devices (i.e. loopback) */ |
5129 /* Ignore unmoveable devices (i.e. loopback) */ |
4843 if (dev->features & NETIF_F_NETNS_LOCAL) |
5130 if (dev->features & NETIF_F_NETNS_LOCAL) |
4844 continue; |
5131 continue; |
|
5132 |
|
5133 /* Delete virtual devices */ |
|
5134 if (dev->rtnl_link_ops && dev->rtnl_link_ops->dellink) { |
|
5135 dev->rtnl_link_ops->dellink(dev); |
|
5136 goto restart; |
|
5137 } |
4845 |
5138 |
4846 /* Push remaing network devices to init_net */ |
5139 /* Push remaing network devices to init_net */ |
4847 snprintf(fb_name, IFNAMSIZ, "dev%d", dev->ifindex); |
5140 snprintf(fb_name, IFNAMSIZ, "dev%d", dev->ifindex); |
4848 err = dev_change_net_namespace(dev, &init_net, fb_name); |
5141 err = dev_change_net_namespace(dev, &init_net, fb_name); |
4849 if (err) { |
5142 if (err) { |
4850 printk(KERN_EMERG "%s: failed to move %s to init_net: %d\n", |
5143 printk(KERN_EMERG "%s: failed to move %s to init_net: %d\n", |
4851 __func__, dev->name, err); |
5144 __func__, dev->name, err); |
4852 BUG(); |
5145 BUG(); |
4853 } |
5146 } |
|
5147 goto restart; |
4854 } |
5148 } |
4855 rtnl_unlock(); |
5149 rtnl_unlock(); |
4856 } |
5150 } |
4857 |
5151 |
4858 static struct pernet_operations __net_initdata default_device_ops = { |
5152 static struct pernet_operations __net_initdata default_device_ops = { |
4904 queue->completion_queue = NULL; |
5195 queue->completion_queue = NULL; |
4905 INIT_LIST_HEAD(&queue->poll_list); |
5196 INIT_LIST_HEAD(&queue->poll_list); |
4906 |
5197 |
4907 queue->backlog.poll = process_backlog; |
5198 queue->backlog.poll = process_backlog; |
4908 queue->backlog.weight = weight_p; |
5199 queue->backlog.weight = weight_p; |
4909 } |
5200 queue->backlog.gro_list = NULL; |
4910 |
5201 } |
4911 netdev_dma_register(); |
|
4912 |
5202 |
4913 dev_boot_phase = 0; |
5203 dev_boot_phase = 0; |
|
5204 |
|
5205 /* The loopback device is special if any other network devices |
|
5206 * is present in a network namespace the loopback device must |
|
5207 * be present. Since we now dynamically allocate and free the |
|
5208 * loopback device ensure this invariant is maintained by |
|
5209 * keeping the loopback device as the first device on the |
|
5210 * list of network devices. Ensuring the loopback devices |
|
5211 * is the first device that appears and the last network device |
|
5212 * that disappears. |
|
5213 */ |
|
5214 if (register_pernet_device(&loopback_net_ops)) |
|
5215 goto out; |
|
5216 |
|
5217 if (register_pernet_device(&default_device_ops)) |
|
5218 goto out; |
4914 |
5219 |
4915 open_softirq(NET_TX_SOFTIRQ, net_tx_action); |
5220 open_softirq(NET_TX_SOFTIRQ, net_tx_action); |
4916 open_softirq(NET_RX_SOFTIRQ, net_rx_action); |
5221 open_softirq(NET_RX_SOFTIRQ, net_rx_action); |
4917 |
5222 |
4918 hotcpu_notifier(dev_cpu_callback, 0); |
5223 hotcpu_notifier(dev_cpu_callback, 0); |