|
1 #ifndef _LINUX_CGROUP_H |
|
2 #define _LINUX_CGROUP_H |
|
3 /* |
|
4 * cgroup interface |
|
5 * |
|
6 * Copyright (C) 2003 BULL SA |
|
7 * Copyright (C) 2004-2006 Silicon Graphics, Inc. |
|
8 * |
|
9 */ |
|
10 |
|
11 #include <linux/sched.h> |
|
12 #include <linux/cpumask.h> |
|
13 #include <linux/nodemask.h> |
|
14 #include <linux/rcupdate.h> |
|
15 #include <linux/cgroupstats.h> |
|
16 #include <linux/prio_heap.h> |
|
17 #include <linux/rwsem.h> |
|
18 |
|
19 #ifdef CONFIG_CGROUPS |
|
20 |
|
21 struct cgroupfs_root; |
|
22 struct cgroup_subsys; |
|
23 struct inode; |
|
24 struct cgroup; |
|
25 |
|
26 extern int cgroup_init_early(void); |
|
27 extern int cgroup_init(void); |
|
28 extern void cgroup_lock(void); |
|
29 extern bool cgroup_lock_live_group(struct cgroup *cgrp); |
|
30 extern void cgroup_unlock(void); |
|
31 extern void cgroup_fork(struct task_struct *p); |
|
32 extern void cgroup_fork_callbacks(struct task_struct *p); |
|
33 extern void cgroup_post_fork(struct task_struct *p); |
|
34 extern void cgroup_exit(struct task_struct *p, int run_callbacks); |
|
35 extern int cgroupstats_build(struct cgroupstats *stats, |
|
36 struct dentry *dentry); |
|
37 |
|
38 extern struct file_operations proc_cgroup_operations; |
|
39 |
|
40 /* Define the enumeration of all cgroup subsystems */ |
|
41 #define SUBSYS(_x) _x ## _subsys_id, |
|
42 enum cgroup_subsys_id { |
|
43 #include <linux/cgroup_subsys.h> |
|
44 CGROUP_SUBSYS_COUNT |
|
45 }; |
|
46 #undef SUBSYS |
|
47 |
|
48 /* Per-subsystem/per-cgroup state maintained by the system. */ |
|
49 struct cgroup_subsys_state { |
|
50 /* The cgroup that this subsystem is attached to. Useful |
|
51 * for subsystems that want to know about the cgroup |
|
52 * hierarchy structure */ |
|
53 struct cgroup *cgroup; |
|
54 |
|
55 /* State maintained by the cgroup system to allow |
|
56 * subsystems to be "busy". Should be accessed via css_get() |
|
57 * and css_put() */ |
|
58 |
|
59 atomic_t refcnt; |
|
60 |
|
61 unsigned long flags; |
|
62 }; |
|
63 |
|
64 /* bits in struct cgroup_subsys_state flags field */ |
|
65 enum { |
|
66 CSS_ROOT, /* This CSS is the root of the subsystem */ |
|
67 }; |
|
68 |
|
69 /* |
|
70 * Call css_get() to hold a reference on the cgroup; |
|
71 * |
|
72 */ |
|
73 |
|
74 static inline void css_get(struct cgroup_subsys_state *css) |
|
75 { |
|
76 /* We don't need to reference count the root state */ |
|
77 if (!test_bit(CSS_ROOT, &css->flags)) |
|
78 atomic_inc(&css->refcnt); |
|
79 } |
|
80 /* |
|
81 * css_put() should be called to release a reference taken by |
|
82 * css_get() |
|
83 */ |
|
84 |
|
85 extern void __css_put(struct cgroup_subsys_state *css); |
|
86 static inline void css_put(struct cgroup_subsys_state *css) |
|
87 { |
|
88 if (!test_bit(CSS_ROOT, &css->flags)) |
|
89 __css_put(css); |
|
90 } |
|
91 |
|
92 /* bits in struct cgroup flags field */ |
|
93 enum { |
|
94 /* Control Group is dead */ |
|
95 CGRP_REMOVED, |
|
96 /* Control Group has previously had a child cgroup or a task, |
|
97 * but no longer (only if CGRP_NOTIFY_ON_RELEASE is set) */ |
|
98 CGRP_RELEASABLE, |
|
99 /* Control Group requires release notifications to userspace */ |
|
100 CGRP_NOTIFY_ON_RELEASE, |
|
101 }; |
|
102 |
|
103 struct cgroup { |
|
104 unsigned long flags; /* "unsigned long" so bitops work */ |
|
105 |
|
106 /* count users of this cgroup. >0 means busy, but doesn't |
|
107 * necessarily indicate the number of tasks in the |
|
108 * cgroup */ |
|
109 atomic_t count; |
|
110 |
|
111 /* |
|
112 * We link our 'sibling' struct into our parent's 'children'. |
|
113 * Our children link their 'sibling' into our 'children'. |
|
114 */ |
|
115 struct list_head sibling; /* my parent's children */ |
|
116 struct list_head children; /* my children */ |
|
117 |
|
118 struct cgroup *parent; /* my parent */ |
|
119 struct dentry *dentry; /* cgroup fs entry */ |
|
120 |
|
121 /* Private pointers for each registered subsystem */ |
|
122 struct cgroup_subsys_state *subsys[CGROUP_SUBSYS_COUNT]; |
|
123 |
|
124 struct cgroupfs_root *root; |
|
125 struct cgroup *top_cgroup; |
|
126 |
|
127 /* |
|
128 * List of cg_cgroup_links pointing at css_sets with |
|
129 * tasks in this cgroup. Protected by css_set_lock |
|
130 */ |
|
131 struct list_head css_sets; |
|
132 |
|
133 /* |
|
134 * Linked list running through all cgroups that can |
|
135 * potentially be reaped by the release agent. Protected by |
|
136 * release_list_lock |
|
137 */ |
|
138 struct list_head release_list; |
|
139 |
|
140 /* pids_mutex protects the fields below */ |
|
141 struct rw_semaphore pids_mutex; |
|
142 /* Array of process ids in the cgroup */ |
|
143 pid_t *tasks_pids; |
|
144 /* How many files are using the current tasks_pids array */ |
|
145 int pids_use_count; |
|
146 /* Length of the current tasks_pids array */ |
|
147 int pids_length; |
|
148 }; |
|
149 |
|
150 /* A css_set is a structure holding pointers to a set of |
|
151 * cgroup_subsys_state objects. This saves space in the task struct |
|
152 * object and speeds up fork()/exit(), since a single inc/dec and a |
|
153 * list_add()/del() can bump the reference count on the entire |
|
154 * cgroup set for a task. |
|
155 */ |
|
156 |
|
157 struct css_set { |
|
158 |
|
159 /* Reference count */ |
|
160 atomic_t refcount; |
|
161 |
|
162 /* |
|
163 * List running through all cgroup groups in the same hash |
|
164 * slot. Protected by css_set_lock |
|
165 */ |
|
166 struct hlist_node hlist; |
|
167 |
|
168 /* |
|
169 * List running through all tasks using this cgroup |
|
170 * group. Protected by css_set_lock |
|
171 */ |
|
172 struct list_head tasks; |
|
173 |
|
174 /* |
|
175 * List of cg_cgroup_link objects on link chains from |
|
176 * cgroups referenced from this css_set. Protected by |
|
177 * css_set_lock |
|
178 */ |
|
179 struct list_head cg_links; |
|
180 |
|
181 /* |
|
182 * Set of subsystem states, one for each subsystem. This array |
|
183 * is immutable after creation apart from the init_css_set |
|
184 * during subsystem registration (at boot time). |
|
185 */ |
|
186 struct cgroup_subsys_state *subsys[CGROUP_SUBSYS_COUNT]; |
|
187 }; |
|
188 |
|
189 /* |
|
190 * cgroup_map_cb is an abstract callback API for reporting map-valued |
|
191 * control files |
|
192 */ |
|
193 |
|
194 struct cgroup_map_cb { |
|
195 int (*fill)(struct cgroup_map_cb *cb, const char *key, u64 value); |
|
196 void *state; |
|
197 }; |
|
198 |
|
199 /* struct cftype: |
|
200 * |
|
201 * The files in the cgroup filesystem mostly have a very simple read/write |
|
202 * handling, some common function will take care of it. Nevertheless some cases |
|
203 * (read tasks) are special and therefore I define this structure for every |
|
204 * kind of file. |
|
205 * |
|
206 * |
|
207 * When reading/writing to a file: |
|
208 * - the cgroup to use is file->f_dentry->d_parent->d_fsdata |
|
209 * - the 'cftype' of the file is file->f_dentry->d_fsdata |
|
210 */ |
|
211 |
|
212 #define MAX_CFTYPE_NAME 64 |
|
213 struct cftype { |
|
214 /* By convention, the name should begin with the name of the |
|
215 * subsystem, followed by a period */ |
|
216 char name[MAX_CFTYPE_NAME]; |
|
217 int private; |
|
218 |
|
219 /* |
|
220 * If non-zero, defines the maximum length of string that can |
|
221 * be passed to write_string; defaults to 64 |
|
222 */ |
|
223 size_t max_write_len; |
|
224 |
|
225 int (*open)(struct inode *inode, struct file *file); |
|
226 ssize_t (*read)(struct cgroup *cgrp, struct cftype *cft, |
|
227 struct file *file, |
|
228 char __user *buf, size_t nbytes, loff_t *ppos); |
|
229 /* |
|
230 * read_u64() is a shortcut for the common case of returning a |
|
231 * single integer. Use it in place of read() |
|
232 */ |
|
233 u64 (*read_u64)(struct cgroup *cgrp, struct cftype *cft); |
|
234 /* |
|
235 * read_s64() is a signed version of read_u64() |
|
236 */ |
|
237 s64 (*read_s64)(struct cgroup *cgrp, struct cftype *cft); |
|
238 /* |
|
239 * read_map() is used for defining a map of key/value |
|
240 * pairs. It should call cb->fill(cb, key, value) for each |
|
241 * entry. The key/value pairs (and their ordering) should not |
|
242 * change between reboots. |
|
243 */ |
|
244 int (*read_map)(struct cgroup *cont, struct cftype *cft, |
|
245 struct cgroup_map_cb *cb); |
|
246 /* |
|
247 * read_seq_string() is used for outputting a simple sequence |
|
248 * using seqfile. |
|
249 */ |
|
250 int (*read_seq_string)(struct cgroup *cont, struct cftype *cft, |
|
251 struct seq_file *m); |
|
252 |
|
253 ssize_t (*write)(struct cgroup *cgrp, struct cftype *cft, |
|
254 struct file *file, |
|
255 const char __user *buf, size_t nbytes, loff_t *ppos); |
|
256 |
|
257 /* |
|
258 * write_u64() is a shortcut for the common case of accepting |
|
259 * a single integer (as parsed by simple_strtoull) from |
|
260 * userspace. Use in place of write(); return 0 or error. |
|
261 */ |
|
262 int (*write_u64)(struct cgroup *cgrp, struct cftype *cft, u64 val); |
|
263 /* |
|
264 * write_s64() is a signed version of write_u64() |
|
265 */ |
|
266 int (*write_s64)(struct cgroup *cgrp, struct cftype *cft, s64 val); |
|
267 |
|
268 /* |
|
269 * write_string() is passed a nul-terminated kernelspace |
|
270 * buffer of maximum length determined by max_write_len. |
|
271 * Returns 0 or -ve error code. |
|
272 */ |
|
273 int (*write_string)(struct cgroup *cgrp, struct cftype *cft, |
|
274 const char *buffer); |
|
275 /* |
|
276 * trigger() callback can be used to get some kick from the |
|
277 * userspace, when the actual string written is not important |
|
278 * at all. The private field can be used to determine the |
|
279 * kick type for multiplexing. |
|
280 */ |
|
281 int (*trigger)(struct cgroup *cgrp, unsigned int event); |
|
282 |
|
283 int (*release)(struct inode *inode, struct file *file); |
|
284 }; |
|
285 |
|
286 struct cgroup_scanner { |
|
287 struct cgroup *cg; |
|
288 int (*test_task)(struct task_struct *p, struct cgroup_scanner *scan); |
|
289 void (*process_task)(struct task_struct *p, |
|
290 struct cgroup_scanner *scan); |
|
291 struct ptr_heap *heap; |
|
292 }; |
|
293 |
|
294 /* Add a new file to the given cgroup directory. Should only be |
|
295 * called by subsystems from within a populate() method */ |
|
296 int cgroup_add_file(struct cgroup *cgrp, struct cgroup_subsys *subsys, |
|
297 const struct cftype *cft); |
|
298 |
|
299 /* Add a set of new files to the given cgroup directory. Should |
|
300 * only be called by subsystems from within a populate() method */ |
|
301 int cgroup_add_files(struct cgroup *cgrp, |
|
302 struct cgroup_subsys *subsys, |
|
303 const struct cftype cft[], |
|
304 int count); |
|
305 |
|
306 int cgroup_is_removed(const struct cgroup *cgrp); |
|
307 |
|
308 int cgroup_path(const struct cgroup *cgrp, char *buf, int buflen); |
|
309 |
|
310 int cgroup_task_count(const struct cgroup *cgrp); |
|
311 |
|
312 /* Return true if the cgroup is a descendant of the current cgroup */ |
|
313 int cgroup_is_descendant(const struct cgroup *cgrp); |
|
314 |
|
315 /* Control Group subsystem type. See Documentation/cgroups.txt for details */ |
|
316 |
|
317 struct cgroup_subsys { |
|
318 struct cgroup_subsys_state *(*create)(struct cgroup_subsys *ss, |
|
319 struct cgroup *cgrp); |
|
320 void (*pre_destroy)(struct cgroup_subsys *ss, struct cgroup *cgrp); |
|
321 void (*destroy)(struct cgroup_subsys *ss, struct cgroup *cgrp); |
|
322 int (*can_attach)(struct cgroup_subsys *ss, |
|
323 struct cgroup *cgrp, struct task_struct *tsk); |
|
324 void (*attach)(struct cgroup_subsys *ss, struct cgroup *cgrp, |
|
325 struct cgroup *old_cgrp, struct task_struct *tsk); |
|
326 void (*fork)(struct cgroup_subsys *ss, struct task_struct *task); |
|
327 void (*exit)(struct cgroup_subsys *ss, struct task_struct *task); |
|
328 int (*populate)(struct cgroup_subsys *ss, |
|
329 struct cgroup *cgrp); |
|
330 void (*post_clone)(struct cgroup_subsys *ss, struct cgroup *cgrp); |
|
331 void (*bind)(struct cgroup_subsys *ss, struct cgroup *root); |
|
332 /* |
|
333 * This routine is called with the task_lock of mm->owner held |
|
334 */ |
|
335 void (*mm_owner_changed)(struct cgroup_subsys *ss, |
|
336 struct cgroup *old, |
|
337 struct cgroup *new, |
|
338 struct task_struct *p); |
|
339 int subsys_id; |
|
340 int active; |
|
341 int disabled; |
|
342 int early_init; |
|
343 #define MAX_CGROUP_TYPE_NAMELEN 32 |
|
344 const char *name; |
|
345 |
|
346 /* Protected by RCU */ |
|
347 struct cgroupfs_root *root; |
|
348 |
|
349 struct list_head sibling; |
|
350 }; |
|
351 |
|
352 #define SUBSYS(_x) extern struct cgroup_subsys _x ## _subsys; |
|
353 #include <linux/cgroup_subsys.h> |
|
354 #undef SUBSYS |
|
355 |
|
356 static inline struct cgroup_subsys_state *cgroup_subsys_state( |
|
357 struct cgroup *cgrp, int subsys_id) |
|
358 { |
|
359 return cgrp->subsys[subsys_id]; |
|
360 } |
|
361 |
|
362 static inline struct cgroup_subsys_state *task_subsys_state( |
|
363 struct task_struct *task, int subsys_id) |
|
364 { |
|
365 return rcu_dereference(task->cgroups->subsys[subsys_id]); |
|
366 } |
|
367 |
|
368 static inline struct cgroup* task_cgroup(struct task_struct *task, |
|
369 int subsys_id) |
|
370 { |
|
371 return task_subsys_state(task, subsys_id)->cgroup; |
|
372 } |
|
373 |
|
374 int cgroup_clone(struct task_struct *tsk, struct cgroup_subsys *ss, |
|
375 char *nodename); |
|
376 |
|
377 /* A cgroup_iter should be treated as an opaque object */ |
|
378 struct cgroup_iter { |
|
379 struct list_head *cg_link; |
|
380 struct list_head *task; |
|
381 }; |
|
382 |
|
383 /* To iterate across the tasks in a cgroup: |
|
384 * |
|
385 * 1) call cgroup_iter_start to intialize an iterator |
|
386 * |
|
387 * 2) call cgroup_iter_next() to retrieve member tasks until it |
|
388 * returns NULL or until you want to end the iteration |
|
389 * |
|
390 * 3) call cgroup_iter_end() to destroy the iterator. |
|
391 * |
|
392 * Or, call cgroup_scan_tasks() to iterate through every task in a cpuset. |
|
393 * - cgroup_scan_tasks() holds the css_set_lock when calling the test_task() |
|
394 * callback, but not while calling the process_task() callback. |
|
395 */ |
|
396 void cgroup_iter_start(struct cgroup *cgrp, struct cgroup_iter *it); |
|
397 struct task_struct *cgroup_iter_next(struct cgroup *cgrp, |
|
398 struct cgroup_iter *it); |
|
399 void cgroup_iter_end(struct cgroup *cgrp, struct cgroup_iter *it); |
|
400 int cgroup_scan_tasks(struct cgroup_scanner *scan); |
|
401 int cgroup_attach_task(struct cgroup *, struct task_struct *); |
|
402 |
|
403 void cgroup_mm_owner_callbacks(struct task_struct *old, |
|
404 struct task_struct *new); |
|
405 |
|
406 #else /* !CONFIG_CGROUPS */ |
|
407 |
|
408 static inline int cgroup_init_early(void) { return 0; } |
|
409 static inline int cgroup_init(void) { return 0; } |
|
410 static inline void cgroup_fork(struct task_struct *p) {} |
|
411 static inline void cgroup_fork_callbacks(struct task_struct *p) {} |
|
412 static inline void cgroup_post_fork(struct task_struct *p) {} |
|
413 static inline void cgroup_exit(struct task_struct *p, int callbacks) {} |
|
414 |
|
415 static inline void cgroup_lock(void) {} |
|
416 static inline void cgroup_unlock(void) {} |
|
417 static inline int cgroupstats_build(struct cgroupstats *stats, |
|
418 struct dentry *dentry) |
|
419 { |
|
420 return -EINVAL; |
|
421 } |
|
422 |
|
423 static inline void cgroup_mm_owner_callbacks(struct task_struct *old, |
|
424 struct task_struct *new) {} |
|
425 |
|
426 #endif /* !CONFIG_CGROUPS */ |
|
427 |
|
428 #endif /* _LINUX_CGROUP_H */ |