include/linux/cgroup.h
changeset 0 aa628870c1d3
child 2 d1f6d8b6f81c
equal deleted inserted replaced
-1:000000000000 0:aa628870c1d3
       
     1 #ifndef _LINUX_CGROUP_H
       
     2 #define _LINUX_CGROUP_H
       
     3 /*
       
     4  *  cgroup interface
       
     5  *
       
     6  *  Copyright (C) 2003 BULL SA
       
     7  *  Copyright (C) 2004-2006 Silicon Graphics, Inc.
       
     8  *
       
     9  */
       
    10 
       
    11 #include <linux/sched.h>
       
    12 #include <linux/cpumask.h>
       
    13 #include <linux/nodemask.h>
       
    14 #include <linux/rcupdate.h>
       
    15 #include <linux/cgroupstats.h>
       
    16 #include <linux/prio_heap.h>
       
    17 #include <linux/rwsem.h>
       
    18 
       
    19 #ifdef CONFIG_CGROUPS
       
    20 
       
    21 struct cgroupfs_root;
       
    22 struct cgroup_subsys;
       
    23 struct inode;
       
    24 struct cgroup;
       
    25 
       
    26 extern int cgroup_init_early(void);
       
    27 extern int cgroup_init(void);
       
    28 extern void cgroup_lock(void);
       
    29 extern bool cgroup_lock_live_group(struct cgroup *cgrp);
       
    30 extern void cgroup_unlock(void);
       
    31 extern void cgroup_fork(struct task_struct *p);
       
    32 extern void cgroup_fork_callbacks(struct task_struct *p);
       
    33 extern void cgroup_post_fork(struct task_struct *p);
       
    34 extern void cgroup_exit(struct task_struct *p, int run_callbacks);
       
    35 extern int cgroupstats_build(struct cgroupstats *stats,
       
    36 				struct dentry *dentry);
       
    37 
       
    38 extern struct file_operations proc_cgroup_operations;
       
    39 
       
    40 /* Define the enumeration of all cgroup subsystems */
       
    41 #define SUBSYS(_x) _x ## _subsys_id,
       
    42 enum cgroup_subsys_id {
       
    43 #include <linux/cgroup_subsys.h>
       
    44 	CGROUP_SUBSYS_COUNT
       
    45 };
       
    46 #undef SUBSYS
       
    47 
       
    48 /* Per-subsystem/per-cgroup state maintained by the system. */
       
    49 struct cgroup_subsys_state {
       
    50 	/* The cgroup that this subsystem is attached to. Useful
       
    51 	 * for subsystems that want to know about the cgroup
       
    52 	 * hierarchy structure */
       
    53 	struct cgroup *cgroup;
       
    54 
       
    55 	/* State maintained by the cgroup system to allow
       
    56 	 * subsystems to be "busy". Should be accessed via css_get()
       
    57 	 * and css_put() */
       
    58 
       
    59 	atomic_t refcnt;
       
    60 
       
    61 	unsigned long flags;
       
    62 };
       
    63 
       
    64 /* bits in struct cgroup_subsys_state flags field */
       
    65 enum {
       
    66 	CSS_ROOT, /* This CSS is the root of the subsystem */
       
    67 };
       
    68 
       
    69 /*
       
    70  * Call css_get() to hold a reference on the cgroup;
       
    71  *
       
    72  */
       
    73 
       
    74 static inline void css_get(struct cgroup_subsys_state *css)
       
    75 {
       
    76 	/* We don't need to reference count the root state */
       
    77 	if (!test_bit(CSS_ROOT, &css->flags))
       
    78 		atomic_inc(&css->refcnt);
       
    79 }
       
    80 /*
       
    81  * css_put() should be called to release a reference taken by
       
    82  * css_get()
       
    83  */
       
    84 
       
    85 extern void __css_put(struct cgroup_subsys_state *css);
       
    86 static inline void css_put(struct cgroup_subsys_state *css)
       
    87 {
       
    88 	if (!test_bit(CSS_ROOT, &css->flags))
       
    89 		__css_put(css);
       
    90 }
       
    91 
       
    92 /* bits in struct cgroup flags field */
       
    93 enum {
       
    94 	/* Control Group is dead */
       
    95 	CGRP_REMOVED,
       
    96 	/* Control Group has previously had a child cgroup or a task,
       
    97 	 * but no longer (only if CGRP_NOTIFY_ON_RELEASE is set) */
       
    98 	CGRP_RELEASABLE,
       
    99 	/* Control Group requires release notifications to userspace */
       
   100 	CGRP_NOTIFY_ON_RELEASE,
       
   101 };
       
   102 
       
   103 struct cgroup {
       
   104 	unsigned long flags;		/* "unsigned long" so bitops work */
       
   105 
       
   106 	/* count users of this cgroup. >0 means busy, but doesn't
       
   107 	 * necessarily indicate the number of tasks in the
       
   108 	 * cgroup */
       
   109 	atomic_t count;
       
   110 
       
   111 	/*
       
   112 	 * We link our 'sibling' struct into our parent's 'children'.
       
   113 	 * Our children link their 'sibling' into our 'children'.
       
   114 	 */
       
   115 	struct list_head sibling;	/* my parent's children */
       
   116 	struct list_head children;	/* my children */
       
   117 
       
   118 	struct cgroup *parent;	/* my parent */
       
   119 	struct dentry *dentry;	  	/* cgroup fs entry */
       
   120 
       
   121 	/* Private pointers for each registered subsystem */
       
   122 	struct cgroup_subsys_state *subsys[CGROUP_SUBSYS_COUNT];
       
   123 
       
   124 	struct cgroupfs_root *root;
       
   125 	struct cgroup *top_cgroup;
       
   126 
       
   127 	/*
       
   128 	 * List of cg_cgroup_links pointing at css_sets with
       
   129 	 * tasks in this cgroup. Protected by css_set_lock
       
   130 	 */
       
   131 	struct list_head css_sets;
       
   132 
       
   133 	/*
       
   134 	 * Linked list running through all cgroups that can
       
   135 	 * potentially be reaped by the release agent. Protected by
       
   136 	 * release_list_lock
       
   137 	 */
       
   138 	struct list_head release_list;
       
   139 
       
   140 	/* pids_mutex protects the fields below */
       
   141 	struct rw_semaphore pids_mutex;
       
   142 	/* Array of process ids in the cgroup */
       
   143 	pid_t *tasks_pids;
       
   144 	/* How many files are using the current tasks_pids array */
       
   145 	int pids_use_count;
       
   146 	/* Length of the current tasks_pids array */
       
   147 	int pids_length;
       
   148 };
       
   149 
       
   150 /* A css_set is a structure holding pointers to a set of
       
   151  * cgroup_subsys_state objects. This saves space in the task struct
       
   152  * object and speeds up fork()/exit(), since a single inc/dec and a
       
   153  * list_add()/del() can bump the reference count on the entire
       
   154  * cgroup set for a task.
       
   155  */
       
   156 
       
   157 struct css_set {
       
   158 
       
   159 	/* Reference count */
       
   160 	atomic_t refcount;
       
   161 
       
   162 	/*
       
   163 	 * List running through all cgroup groups in the same hash
       
   164 	 * slot. Protected by css_set_lock
       
   165 	 */
       
   166 	struct hlist_node hlist;
       
   167 
       
   168 	/*
       
   169 	 * List running through all tasks using this cgroup
       
   170 	 * group. Protected by css_set_lock
       
   171 	 */
       
   172 	struct list_head tasks;
       
   173 
       
   174 	/*
       
   175 	 * List of cg_cgroup_link objects on link chains from
       
   176 	 * cgroups referenced from this css_set. Protected by
       
   177 	 * css_set_lock
       
   178 	 */
       
   179 	struct list_head cg_links;
       
   180 
       
   181 	/*
       
   182 	 * Set of subsystem states, one for each subsystem. This array
       
   183 	 * is immutable after creation apart from the init_css_set
       
   184 	 * during subsystem registration (at boot time).
       
   185 	 */
       
   186 	struct cgroup_subsys_state *subsys[CGROUP_SUBSYS_COUNT];
       
   187 };
       
   188 
       
   189 /*
       
   190  * cgroup_map_cb is an abstract callback API for reporting map-valued
       
   191  * control files
       
   192  */
       
   193 
       
   194 struct cgroup_map_cb {
       
   195 	int (*fill)(struct cgroup_map_cb *cb, const char *key, u64 value);
       
   196 	void *state;
       
   197 };
       
   198 
       
   199 /* struct cftype:
       
   200  *
       
   201  * The files in the cgroup filesystem mostly have a very simple read/write
       
   202  * handling, some common function will take care of it. Nevertheless some cases
       
   203  * (read tasks) are special and therefore I define this structure for every
       
   204  * kind of file.
       
   205  *
       
   206  *
       
   207  * When reading/writing to a file:
       
   208  *	- the cgroup to use is file->f_dentry->d_parent->d_fsdata
       
   209  *	- the 'cftype' of the file is file->f_dentry->d_fsdata
       
   210  */
       
   211 
       
   212 #define MAX_CFTYPE_NAME 64
       
   213 struct cftype {
       
   214 	/* By convention, the name should begin with the name of the
       
   215 	 * subsystem, followed by a period */
       
   216 	char name[MAX_CFTYPE_NAME];
       
   217 	int private;
       
   218 
       
   219 	/*
       
   220 	 * If non-zero, defines the maximum length of string that can
       
   221 	 * be passed to write_string; defaults to 64
       
   222 	 */
       
   223 	size_t max_write_len;
       
   224 
       
   225 	int (*open)(struct inode *inode, struct file *file);
       
   226 	ssize_t (*read)(struct cgroup *cgrp, struct cftype *cft,
       
   227 			struct file *file,
       
   228 			char __user *buf, size_t nbytes, loff_t *ppos);
       
   229 	/*
       
   230 	 * read_u64() is a shortcut for the common case of returning a
       
   231 	 * single integer. Use it in place of read()
       
   232 	 */
       
   233 	u64 (*read_u64)(struct cgroup *cgrp, struct cftype *cft);
       
   234 	/*
       
   235 	 * read_s64() is a signed version of read_u64()
       
   236 	 */
       
   237 	s64 (*read_s64)(struct cgroup *cgrp, struct cftype *cft);
       
   238 	/*
       
   239 	 * read_map() is used for defining a map of key/value
       
   240 	 * pairs. It should call cb->fill(cb, key, value) for each
       
   241 	 * entry. The key/value pairs (and their ordering) should not
       
   242 	 * change between reboots.
       
   243 	 */
       
   244 	int (*read_map)(struct cgroup *cont, struct cftype *cft,
       
   245 			struct cgroup_map_cb *cb);
       
   246 	/*
       
   247 	 * read_seq_string() is used for outputting a simple sequence
       
   248 	 * using seqfile.
       
   249 	 */
       
   250 	int (*read_seq_string)(struct cgroup *cont, struct cftype *cft,
       
   251 			       struct seq_file *m);
       
   252 
       
   253 	ssize_t (*write)(struct cgroup *cgrp, struct cftype *cft,
       
   254 			 struct file *file,
       
   255 			 const char __user *buf, size_t nbytes, loff_t *ppos);
       
   256 
       
   257 	/*
       
   258 	 * write_u64() is a shortcut for the common case of accepting
       
   259 	 * a single integer (as parsed by simple_strtoull) from
       
   260 	 * userspace. Use in place of write(); return 0 or error.
       
   261 	 */
       
   262 	int (*write_u64)(struct cgroup *cgrp, struct cftype *cft, u64 val);
       
   263 	/*
       
   264 	 * write_s64() is a signed version of write_u64()
       
   265 	 */
       
   266 	int (*write_s64)(struct cgroup *cgrp, struct cftype *cft, s64 val);
       
   267 
       
   268 	/*
       
   269 	 * write_string() is passed a nul-terminated kernelspace
       
   270 	 * buffer of maximum length determined by max_write_len.
       
   271 	 * Returns 0 or -ve error code.
       
   272 	 */
       
   273 	int (*write_string)(struct cgroup *cgrp, struct cftype *cft,
       
   274 			    const char *buffer);
       
   275 	/*
       
   276 	 * trigger() callback can be used to get some kick from the
       
   277 	 * userspace, when the actual string written is not important
       
   278 	 * at all. The private field can be used to determine the
       
   279 	 * kick type for multiplexing.
       
   280 	 */
       
   281 	int (*trigger)(struct cgroup *cgrp, unsigned int event);
       
   282 
       
   283 	int (*release)(struct inode *inode, struct file *file);
       
   284 };
       
   285 
       
   286 struct cgroup_scanner {
       
   287 	struct cgroup *cg;
       
   288 	int (*test_task)(struct task_struct *p, struct cgroup_scanner *scan);
       
   289 	void (*process_task)(struct task_struct *p,
       
   290 			struct cgroup_scanner *scan);
       
   291 	struct ptr_heap *heap;
       
   292 };
       
   293 
       
   294 /* Add a new file to the given cgroup directory. Should only be
       
   295  * called by subsystems from within a populate() method */
       
   296 int cgroup_add_file(struct cgroup *cgrp, struct cgroup_subsys *subsys,
       
   297 		       const struct cftype *cft);
       
   298 
       
   299 /* Add a set of new files to the given cgroup directory. Should
       
   300  * only be called by subsystems from within a populate() method */
       
   301 int cgroup_add_files(struct cgroup *cgrp,
       
   302 			struct cgroup_subsys *subsys,
       
   303 			const struct cftype cft[],
       
   304 			int count);
       
   305 
       
   306 int cgroup_is_removed(const struct cgroup *cgrp);
       
   307 
       
   308 int cgroup_path(const struct cgroup *cgrp, char *buf, int buflen);
       
   309 
       
   310 int cgroup_task_count(const struct cgroup *cgrp);
       
   311 
       
   312 /* Return true if the cgroup is a descendant of the current cgroup */
       
   313 int cgroup_is_descendant(const struct cgroup *cgrp);
       
   314 
       
   315 /* Control Group subsystem type. See Documentation/cgroups.txt for details */
       
   316 
       
   317 struct cgroup_subsys {
       
   318 	struct cgroup_subsys_state *(*create)(struct cgroup_subsys *ss,
       
   319 						  struct cgroup *cgrp);
       
   320 	void (*pre_destroy)(struct cgroup_subsys *ss, struct cgroup *cgrp);
       
   321 	void (*destroy)(struct cgroup_subsys *ss, struct cgroup *cgrp);
       
   322 	int (*can_attach)(struct cgroup_subsys *ss,
       
   323 			  struct cgroup *cgrp, struct task_struct *tsk);
       
   324 	void (*attach)(struct cgroup_subsys *ss, struct cgroup *cgrp,
       
   325 			struct cgroup *old_cgrp, struct task_struct *tsk);
       
   326 	void (*fork)(struct cgroup_subsys *ss, struct task_struct *task);
       
   327 	void (*exit)(struct cgroup_subsys *ss, struct task_struct *task);
       
   328 	int (*populate)(struct cgroup_subsys *ss,
       
   329 			struct cgroup *cgrp);
       
   330 	void (*post_clone)(struct cgroup_subsys *ss, struct cgroup *cgrp);
       
   331 	void (*bind)(struct cgroup_subsys *ss, struct cgroup *root);
       
   332 	/*
       
   333 	 * This routine is called with the task_lock of mm->owner held
       
   334 	 */
       
   335 	void (*mm_owner_changed)(struct cgroup_subsys *ss,
       
   336 					struct cgroup *old,
       
   337 					struct cgroup *new,
       
   338 					struct task_struct *p);
       
   339 	int subsys_id;
       
   340 	int active;
       
   341 	int disabled;
       
   342 	int early_init;
       
   343 #define MAX_CGROUP_TYPE_NAMELEN 32
       
   344 	const char *name;
       
   345 
       
   346 	/* Protected by RCU */
       
   347 	struct cgroupfs_root *root;
       
   348 
       
   349 	struct list_head sibling;
       
   350 };
       
   351 
       
   352 #define SUBSYS(_x) extern struct cgroup_subsys _x ## _subsys;
       
   353 #include <linux/cgroup_subsys.h>
       
   354 #undef SUBSYS
       
   355 
       
   356 static inline struct cgroup_subsys_state *cgroup_subsys_state(
       
   357 	struct cgroup *cgrp, int subsys_id)
       
   358 {
       
   359 	return cgrp->subsys[subsys_id];
       
   360 }
       
   361 
       
   362 static inline struct cgroup_subsys_state *task_subsys_state(
       
   363 	struct task_struct *task, int subsys_id)
       
   364 {
       
   365 	return rcu_dereference(task->cgroups->subsys[subsys_id]);
       
   366 }
       
   367 
       
   368 static inline struct cgroup* task_cgroup(struct task_struct *task,
       
   369 					       int subsys_id)
       
   370 {
       
   371 	return task_subsys_state(task, subsys_id)->cgroup;
       
   372 }
       
   373 
       
   374 int cgroup_clone(struct task_struct *tsk, struct cgroup_subsys *ss,
       
   375 							char *nodename);
       
   376 
       
   377 /* A cgroup_iter should be treated as an opaque object */
       
   378 struct cgroup_iter {
       
   379 	struct list_head *cg_link;
       
   380 	struct list_head *task;
       
   381 };
       
   382 
       
   383 /* To iterate across the tasks in a cgroup:
       
   384  *
       
   385  * 1) call cgroup_iter_start to intialize an iterator
       
   386  *
       
   387  * 2) call cgroup_iter_next() to retrieve member tasks until it
       
   388  *    returns NULL or until you want to end the iteration
       
   389  *
       
   390  * 3) call cgroup_iter_end() to destroy the iterator.
       
   391  *
       
   392  * Or, call cgroup_scan_tasks() to iterate through every task in a cpuset.
       
   393  *    - cgroup_scan_tasks() holds the css_set_lock when calling the test_task()
       
   394  *      callback, but not while calling the process_task() callback.
       
   395  */
       
   396 void cgroup_iter_start(struct cgroup *cgrp, struct cgroup_iter *it);
       
   397 struct task_struct *cgroup_iter_next(struct cgroup *cgrp,
       
   398 					struct cgroup_iter *it);
       
   399 void cgroup_iter_end(struct cgroup *cgrp, struct cgroup_iter *it);
       
   400 int cgroup_scan_tasks(struct cgroup_scanner *scan);
       
   401 int cgroup_attach_task(struct cgroup *, struct task_struct *);
       
   402 
       
   403 void cgroup_mm_owner_callbacks(struct task_struct *old,
       
   404 			       struct task_struct *new);
       
   405 
       
   406 #else /* !CONFIG_CGROUPS */
       
   407 
       
   408 static inline int cgroup_init_early(void) { return 0; }
       
   409 static inline int cgroup_init(void) { return 0; }
       
   410 static inline void cgroup_fork(struct task_struct *p) {}
       
   411 static inline void cgroup_fork_callbacks(struct task_struct *p) {}
       
   412 static inline void cgroup_post_fork(struct task_struct *p) {}
       
   413 static inline void cgroup_exit(struct task_struct *p, int callbacks) {}
       
   414 
       
   415 static inline void cgroup_lock(void) {}
       
   416 static inline void cgroup_unlock(void) {}
       
   417 static inline int cgroupstats_build(struct cgroupstats *stats,
       
   418 					struct dentry *dentry)
       
   419 {
       
   420 	return -EINVAL;
       
   421 }
       
   422 
       
   423 static inline void cgroup_mm_owner_callbacks(struct task_struct *old,
       
   424 					     struct task_struct *new) {}
       
   425 
       
   426 #endif /* !CONFIG_CGROUPS */
       
   427 
       
   428 #endif /* _LINUX_CGROUP_H */