108 spin_unlock(&head->lock); |
108 spin_unlock(&head->lock); |
109 } |
109 } |
110 |
110 |
111 EXPORT_SYMBOL_GPL(__inet_inherit_port); |
111 EXPORT_SYMBOL_GPL(__inet_inherit_port); |
112 |
112 |
113 /* |
113 static inline int compute_score(struct sock *sk, struct net *net, |
114 * This lock without WQ_FLAG_EXCLUSIVE is good on UP and it can be very bad on SMP. |
114 const unsigned short hnum, const __be32 daddr, |
115 * Look, when several writers sleep and reader wakes them up, all but one |
115 const int dif) |
116 * immediately hit write lock and grab all the cpus. Exclusive sleep solves |
116 { |
117 * this, _but_ remember, it adds useless work on UP machines (wake up each |
117 int score = -1; |
118 * exclusive lock release). It should be ifdefed really. |
118 struct inet_sock *inet = inet_sk(sk); |
119 */ |
119 |
120 void inet_listen_wlock(struct inet_hashinfo *hashinfo) |
120 if (net_eq(sock_net(sk), net) && inet->num == hnum && |
121 __acquires(hashinfo->lhash_lock) |
121 !ipv6_only_sock(sk)) { |
122 { |
122 __be32 rcv_saddr = inet->rcv_saddr; |
123 write_lock(&hashinfo->lhash_lock); |
123 score = sk->sk_family == PF_INET ? 1 : 0; |
124 |
124 if (rcv_saddr) { |
125 if (atomic_read(&hashinfo->lhash_users)) { |
125 if (rcv_saddr != daddr) |
126 DEFINE_WAIT(wait); |
126 return -1; |
127 |
127 score += 2; |
128 for (;;) { |
128 } |
129 prepare_to_wait_exclusive(&hashinfo->lhash_wait, |
129 if (sk->sk_bound_dev_if) { |
130 &wait, TASK_UNINTERRUPTIBLE); |
130 if (sk->sk_bound_dev_if != dif) |
131 if (!atomic_read(&hashinfo->lhash_users)) |
131 return -1; |
132 break; |
132 score += 2; |
133 write_unlock_bh(&hashinfo->lhash_lock); |
133 } |
134 schedule(); |
134 } |
135 write_lock_bh(&hashinfo->lhash_lock); |
135 return score; |
136 } |
|
137 |
|
138 finish_wait(&hashinfo->lhash_wait, &wait); |
|
139 } |
|
140 } |
136 } |
141 |
137 |
142 /* |
138 /* |
143 * Don't inline this cruft. Here are some nice properties to exploit here. The |
139 * Don't inline this cruft. Here are some nice properties to exploit here. The |
144 * BSD API does not allow a listening sock to specify the remote port nor the |
140 * BSD API does not allow a listening sock to specify the remote port nor the |
145 * remote address for the connection. So always assume those are both |
141 * remote address for the connection. So always assume those are both |
146 * wildcarded during the search since they can never be otherwise. |
142 * wildcarded during the search since they can never be otherwise. |
147 */ |
143 */ |
148 static struct sock *inet_lookup_listener_slow(struct net *net, |
144 |
149 const struct hlist_head *head, |
145 |
150 const __be32 daddr, |
|
151 const unsigned short hnum, |
|
152 const int dif) |
|
153 { |
|
154 struct sock *result = NULL, *sk; |
|
155 const struct hlist_node *node; |
|
156 int hiscore = -1; |
|
157 |
|
158 sk_for_each(sk, node, head) { |
|
159 const struct inet_sock *inet = inet_sk(sk); |
|
160 |
|
161 if (net_eq(sock_net(sk), net) && inet->num == hnum && |
|
162 !ipv6_only_sock(sk)) { |
|
163 const __be32 rcv_saddr = inet->rcv_saddr; |
|
164 int score = sk->sk_family == PF_INET ? 1 : 0; |
|
165 |
|
166 if (rcv_saddr) { |
|
167 if (rcv_saddr != daddr) |
|
168 continue; |
|
169 score += 2; |
|
170 } |
|
171 if (sk->sk_bound_dev_if) { |
|
172 if (sk->sk_bound_dev_if != dif) |
|
173 continue; |
|
174 score += 2; |
|
175 } |
|
176 if (score == 5) |
|
177 return sk; |
|
178 if (score > hiscore) { |
|
179 hiscore = score; |
|
180 result = sk; |
|
181 } |
|
182 } |
|
183 } |
|
184 return result; |
|
185 } |
|
186 |
|
187 /* Optimize the common listener case. */ |
|
188 struct sock *__inet_lookup_listener(struct net *net, |
146 struct sock *__inet_lookup_listener(struct net *net, |
189 struct inet_hashinfo *hashinfo, |
147 struct inet_hashinfo *hashinfo, |
190 const __be32 daddr, const unsigned short hnum, |
148 const __be32 daddr, const unsigned short hnum, |
191 const int dif) |
149 const int dif) |
192 { |
150 { |
193 struct sock *sk = NULL; |
151 struct sock *sk, *result; |
194 const struct hlist_head *head; |
152 struct hlist_nulls_node *node; |
195 |
153 unsigned int hash = inet_lhashfn(net, hnum); |
196 read_lock(&hashinfo->lhash_lock); |
154 struct inet_listen_hashbucket *ilb = &hashinfo->listening_hash[hash]; |
197 head = &hashinfo->listening_hash[inet_lhashfn(net, hnum)]; |
155 int score, hiscore; |
198 if (!hlist_empty(head)) { |
156 |
199 const struct inet_sock *inet = inet_sk((sk = __sk_head(head))); |
157 rcu_read_lock(); |
200 |
158 begin: |
201 if (inet->num == hnum && !sk->sk_node.next && |
159 result = NULL; |
202 (!inet->rcv_saddr || inet->rcv_saddr == daddr) && |
160 hiscore = -1; |
203 (sk->sk_family == PF_INET || !ipv6_only_sock(sk)) && |
161 sk_nulls_for_each_rcu(sk, node, &ilb->head) { |
204 !sk->sk_bound_dev_if && net_eq(sock_net(sk), net)) |
162 score = compute_score(sk, net, hnum, daddr, dif); |
205 goto sherry_cache; |
163 if (score > hiscore) { |
206 sk = inet_lookup_listener_slow(net, head, daddr, hnum, dif); |
164 result = sk; |
207 } |
165 hiscore = score; |
208 if (sk) { |
166 } |
209 sherry_cache: |
167 } |
210 sock_hold(sk); |
168 /* |
211 } |
169 * if the nulls value we got at the end of this lookup is |
212 read_unlock(&hashinfo->lhash_lock); |
170 * not the expected one, we must restart lookup. |
213 return sk; |
171 * We probably met an item that was moved to another chain. |
|
172 */ |
|
173 if (get_nulls_value(node) != hash + LISTENING_NULLS_BASE) |
|
174 goto begin; |
|
175 if (result) { |
|
176 if (unlikely(!atomic_inc_not_zero(&result->sk_refcnt))) |
|
177 result = NULL; |
|
178 else if (unlikely(compute_score(result, net, hnum, daddr, |
|
179 dif) < hiscore)) { |
|
180 sock_put(result); |
|
181 goto begin; |
|
182 } |
|
183 } |
|
184 rcu_read_unlock(); |
|
185 return result; |
214 } |
186 } |
215 EXPORT_SYMBOL_GPL(__inet_lookup_listener); |
187 EXPORT_SYMBOL_GPL(__inet_lookup_listener); |
216 |
188 |
217 struct sock * __inet_lookup_established(struct net *net, |
189 struct sock * __inet_lookup_established(struct net *net, |
218 struct inet_hashinfo *hashinfo, |
190 struct inet_hashinfo *hashinfo, |
221 const int dif) |
193 const int dif) |
222 { |
194 { |
223 INET_ADDR_COOKIE(acookie, saddr, daddr) |
195 INET_ADDR_COOKIE(acookie, saddr, daddr) |
224 const __portpair ports = INET_COMBINED_PORTS(sport, hnum); |
196 const __portpair ports = INET_COMBINED_PORTS(sport, hnum); |
225 struct sock *sk; |
197 struct sock *sk; |
226 const struct hlist_node *node; |
198 const struct hlist_nulls_node *node; |
227 /* Optimize here for direct hit, only listening connections can |
199 /* Optimize here for direct hit, only listening connections can |
228 * have wildcards anyways. |
200 * have wildcards anyways. |
229 */ |
201 */ |
230 unsigned int hash = inet_ehashfn(net, daddr, hnum, saddr, sport); |
202 unsigned int hash = inet_ehashfn(net, daddr, hnum, saddr, sport); |
231 struct inet_ehash_bucket *head = inet_ehash_bucket(hashinfo, hash); |
203 unsigned int slot = hash & (hashinfo->ehash_size - 1); |
232 rwlock_t *lock = inet_ehash_lockp(hashinfo, hash); |
204 struct inet_ehash_bucket *head = &hashinfo->ehash[slot]; |
233 |
205 |
234 prefetch(head->chain.first); |
206 rcu_read_lock(); |
235 read_lock(lock); |
207 begin: |
236 sk_for_each(sk, node, &head->chain) { |
208 sk_nulls_for_each_rcu(sk, node, &head->chain) { |
237 if (INET_MATCH(sk, net, hash, acookie, |
209 if (INET_MATCH(sk, net, hash, acookie, |
238 saddr, daddr, ports, dif)) |
210 saddr, daddr, ports, dif)) { |
239 goto hit; /* You sunk my battleship! */ |
211 if (unlikely(!atomic_inc_not_zero(&sk->sk_refcnt))) |
240 } |
212 goto begintw; |
241 |
213 if (unlikely(!INET_MATCH(sk, net, hash, acookie, |
|
214 saddr, daddr, ports, dif))) { |
|
215 sock_put(sk); |
|
216 goto begin; |
|
217 } |
|
218 goto out; |
|
219 } |
|
220 } |
|
221 /* |
|
222 * if the nulls value we got at the end of this lookup is |
|
223 * not the expected one, we must restart lookup. |
|
224 * We probably met an item that was moved to another chain. |
|
225 */ |
|
226 if (get_nulls_value(node) != slot) |
|
227 goto begin; |
|
228 |
|
229 begintw: |
242 /* Must check for a TIME_WAIT'er before going to listener hash. */ |
230 /* Must check for a TIME_WAIT'er before going to listener hash. */ |
243 sk_for_each(sk, node, &head->twchain) { |
231 sk_nulls_for_each_rcu(sk, node, &head->twchain) { |
244 if (INET_TW_MATCH(sk, net, hash, acookie, |
232 if (INET_TW_MATCH(sk, net, hash, acookie, |
245 saddr, daddr, ports, dif)) |
233 saddr, daddr, ports, dif)) { |
246 goto hit; |
234 if (unlikely(!atomic_inc_not_zero(&sk->sk_refcnt))) { |
247 } |
235 sk = NULL; |
|
236 goto out; |
|
237 } |
|
238 if (unlikely(!INET_TW_MATCH(sk, net, hash, acookie, |
|
239 saddr, daddr, ports, dif))) { |
|
240 sock_put(sk); |
|
241 goto begintw; |
|
242 } |
|
243 goto out; |
|
244 } |
|
245 } |
|
246 /* |
|
247 * if the nulls value we got at the end of this lookup is |
|
248 * not the expected one, we must restart lookup. |
|
249 * We probably met an item that was moved to another chain. |
|
250 */ |
|
251 if (get_nulls_value(node) != slot) |
|
252 goto begintw; |
248 sk = NULL; |
253 sk = NULL; |
249 out: |
254 out: |
250 read_unlock(lock); |
255 rcu_read_unlock(); |
251 return sk; |
256 return sk; |
252 hit: |
|
253 sock_hold(sk); |
|
254 goto out; |
|
255 } |
257 } |
256 EXPORT_SYMBOL_GPL(__inet_lookup_established); |
258 EXPORT_SYMBOL_GPL(__inet_lookup_established); |
257 |
259 |
258 /* called with local bh disabled */ |
260 /* called with local bh disabled */ |
259 static int __inet_check_established(struct inet_timewait_death_row *death_row, |
261 static int __inet_check_established(struct inet_timewait_death_row *death_row, |
268 INET_ADDR_COOKIE(acookie, saddr, daddr) |
270 INET_ADDR_COOKIE(acookie, saddr, daddr) |
269 const __portpair ports = INET_COMBINED_PORTS(inet->dport, lport); |
271 const __portpair ports = INET_COMBINED_PORTS(inet->dport, lport); |
270 struct net *net = sock_net(sk); |
272 struct net *net = sock_net(sk); |
271 unsigned int hash = inet_ehashfn(net, daddr, lport, saddr, inet->dport); |
273 unsigned int hash = inet_ehashfn(net, daddr, lport, saddr, inet->dport); |
272 struct inet_ehash_bucket *head = inet_ehash_bucket(hinfo, hash); |
274 struct inet_ehash_bucket *head = inet_ehash_bucket(hinfo, hash); |
273 rwlock_t *lock = inet_ehash_lockp(hinfo, hash); |
275 spinlock_t *lock = inet_ehash_lockp(hinfo, hash); |
274 struct sock *sk2; |
276 struct sock *sk2; |
275 const struct hlist_node *node; |
277 const struct hlist_nulls_node *node; |
276 struct inet_timewait_sock *tw; |
278 struct inet_timewait_sock *tw; |
277 |
279 |
278 prefetch(head->chain.first); |
280 spin_lock(lock); |
279 write_lock(lock); |
|
280 |
281 |
281 /* Check TIME-WAIT sockets first. */ |
282 /* Check TIME-WAIT sockets first. */ |
282 sk_for_each(sk2, node, &head->twchain) { |
283 sk_nulls_for_each(sk2, node, &head->twchain) { |
283 tw = inet_twsk(sk2); |
284 tw = inet_twsk(sk2); |
284 |
285 |
285 if (INET_TW_MATCH(sk2, net, hash, acookie, |
286 if (INET_TW_MATCH(sk2, net, hash, acookie, |
286 saddr, daddr, ports, dif)) { |
287 saddr, daddr, ports, dif)) { |
287 if (twsk_unique(sk, sk2, twp)) |
288 if (twsk_unique(sk, sk2, twp)) |
336 } |
337 } |
337 |
338 |
338 void __inet_hash_nolisten(struct sock *sk) |
339 void __inet_hash_nolisten(struct sock *sk) |
339 { |
340 { |
340 struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo; |
341 struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo; |
341 struct hlist_head *list; |
342 struct hlist_nulls_head *list; |
342 rwlock_t *lock; |
343 spinlock_t *lock; |
343 struct inet_ehash_bucket *head; |
344 struct inet_ehash_bucket *head; |
344 |
345 |
345 WARN_ON(!sk_unhashed(sk)); |
346 WARN_ON(!sk_unhashed(sk)); |
346 |
347 |
347 sk->sk_hash = inet_sk_ehashfn(sk); |
348 sk->sk_hash = inet_sk_ehashfn(sk); |
348 head = inet_ehash_bucket(hashinfo, sk->sk_hash); |
349 head = inet_ehash_bucket(hashinfo, sk->sk_hash); |
349 list = &head->chain; |
350 list = &head->chain; |
350 lock = inet_ehash_lockp(hashinfo, sk->sk_hash); |
351 lock = inet_ehash_lockp(hashinfo, sk->sk_hash); |
351 |
352 |
352 write_lock(lock); |
353 spin_lock(lock); |
353 __sk_add_node(sk, list); |
354 __sk_nulls_add_node_rcu(sk, list); |
|
355 spin_unlock(lock); |
354 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); |
356 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); |
355 write_unlock(lock); |
|
356 } |
357 } |
357 EXPORT_SYMBOL_GPL(__inet_hash_nolisten); |
358 EXPORT_SYMBOL_GPL(__inet_hash_nolisten); |
358 |
359 |
359 static void __inet_hash(struct sock *sk) |
360 static void __inet_hash(struct sock *sk) |
360 { |
361 { |
361 struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo; |
362 struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo; |
362 struct hlist_head *list; |
363 struct inet_listen_hashbucket *ilb; |
363 rwlock_t *lock; |
|
364 |
364 |
365 if (sk->sk_state != TCP_LISTEN) { |
365 if (sk->sk_state != TCP_LISTEN) { |
366 __inet_hash_nolisten(sk); |
366 __inet_hash_nolisten(sk); |
367 return; |
367 return; |
368 } |
368 } |
369 |
369 |
370 WARN_ON(!sk_unhashed(sk)); |
370 WARN_ON(!sk_unhashed(sk)); |
371 list = &hashinfo->listening_hash[inet_sk_listen_hashfn(sk)]; |
371 ilb = &hashinfo->listening_hash[inet_sk_listen_hashfn(sk)]; |
372 lock = &hashinfo->lhash_lock; |
372 |
373 |
373 spin_lock(&ilb->lock); |
374 inet_listen_wlock(hashinfo); |
374 __sk_nulls_add_node_rcu(sk, &ilb->head); |
375 __sk_add_node(sk, list); |
|
376 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); |
375 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); |
377 write_unlock(lock); |
376 spin_unlock(&ilb->lock); |
378 wake_up(&hashinfo->lhash_wait); |
|
379 } |
377 } |
380 |
378 |
381 void inet_hash(struct sock *sk) |
379 void inet_hash(struct sock *sk) |
382 { |
380 { |
383 if (sk->sk_state != TCP_CLOSE) { |
381 if (sk->sk_state != TCP_CLOSE) { |
388 } |
386 } |
389 EXPORT_SYMBOL_GPL(inet_hash); |
387 EXPORT_SYMBOL_GPL(inet_hash); |
390 |
388 |
391 void inet_unhash(struct sock *sk) |
389 void inet_unhash(struct sock *sk) |
392 { |
390 { |
393 rwlock_t *lock; |
|
394 struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo; |
391 struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo; |
|
392 spinlock_t *lock; |
|
393 int done; |
395 |
394 |
396 if (sk_unhashed(sk)) |
395 if (sk_unhashed(sk)) |
397 goto out; |
396 return; |
398 |
397 |
399 if (sk->sk_state == TCP_LISTEN) { |
398 if (sk->sk_state == TCP_LISTEN) |
400 local_bh_disable(); |
399 lock = &hashinfo->listening_hash[inet_sk_listen_hashfn(sk)].lock; |
401 inet_listen_wlock(hashinfo); |
400 else |
402 lock = &hashinfo->lhash_lock; |
|
403 } else { |
|
404 lock = inet_ehash_lockp(hashinfo, sk->sk_hash); |
401 lock = inet_ehash_lockp(hashinfo, sk->sk_hash); |
405 write_lock_bh(lock); |
402 |
406 } |
403 spin_lock_bh(lock); |
407 |
404 done =__sk_nulls_del_node_init_rcu(sk); |
408 if (__sk_del_node_init(sk)) |
405 if (done) |
409 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); |
406 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); |
410 write_unlock_bh(lock); |
407 spin_unlock_bh(lock); |
411 out: |
|
412 if (sk->sk_state == TCP_LISTEN) |
|
413 wake_up(&hashinfo->lhash_wait); |
|
414 } |
408 } |
415 EXPORT_SYMBOL_GPL(inet_unhash); |
409 EXPORT_SYMBOL_GPL(inet_unhash); |
416 |
410 |
417 int __inet_hash_connect(struct inet_timewait_death_row *death_row, |
411 int __inet_hash_connect(struct inet_timewait_death_row *death_row, |
418 struct sock *sk, u32 port_offset, |
412 struct sock *sk, u32 port_offset, |