include/linux/mmu_notifier.h
changeset 0 aa628870c1d3
equal deleted inserted replaced
-1:000000000000 0:aa628870c1d3
       
     1 #ifndef _LINUX_MMU_NOTIFIER_H
       
     2 #define _LINUX_MMU_NOTIFIER_H
       
     3 
       
     4 #include <linux/list.h>
       
     5 #include <linux/spinlock.h>
       
     6 #include <linux/mm_types.h>
       
     7 
       
     8 struct mmu_notifier;
       
     9 struct mmu_notifier_ops;
       
    10 
       
    11 #ifdef CONFIG_MMU_NOTIFIER
       
    12 
       
    13 /*
       
    14  * The mmu notifier_mm structure is allocated and installed in
       
    15  * mm->mmu_notifier_mm inside the mm_take_all_locks() protected
       
    16  * critical section and it's released only when mm_count reaches zero
       
    17  * in mmdrop().
       
    18  */
       
    19 struct mmu_notifier_mm {
       
    20 	/* all mmu notifiers registerd in this mm are queued in this list */
       
    21 	struct hlist_head list;
       
    22 	/* to serialize the list modifications and hlist_unhashed */
       
    23 	spinlock_t lock;
       
    24 };
       
    25 
       
    26 struct mmu_notifier_ops {
       
    27 	/*
       
    28 	 * Called either by mmu_notifier_unregister or when the mm is
       
    29 	 * being destroyed by exit_mmap, always before all pages are
       
    30 	 * freed. This can run concurrently with other mmu notifier
       
    31 	 * methods (the ones invoked outside the mm context) and it
       
    32 	 * should tear down all secondary mmu mappings and freeze the
       
    33 	 * secondary mmu. If this method isn't implemented you've to
       
    34 	 * be sure that nothing could possibly write to the pages
       
    35 	 * through the secondary mmu by the time the last thread with
       
    36 	 * tsk->mm == mm exits.
       
    37 	 *
       
    38 	 * As side note: the pages freed after ->release returns could
       
    39 	 * be immediately reallocated by the gart at an alias physical
       
    40 	 * address with a different cache model, so if ->release isn't
       
    41 	 * implemented because all _software_ driven memory accesses
       
    42 	 * through the secondary mmu are terminated by the time the
       
    43 	 * last thread of this mm quits, you've also to be sure that
       
    44 	 * speculative _hardware_ operations can't allocate dirty
       
    45 	 * cachelines in the cpu that could not be snooped and made
       
    46 	 * coherent with the other read and write operations happening
       
    47 	 * through the gart alias address, so leading to memory
       
    48 	 * corruption.
       
    49 	 */
       
    50 	void (*release)(struct mmu_notifier *mn,
       
    51 			struct mm_struct *mm);
       
    52 
       
    53 	/*
       
    54 	 * clear_flush_young is called after the VM is
       
    55 	 * test-and-clearing the young/accessed bitflag in the
       
    56 	 * pte. This way the VM will provide proper aging to the
       
    57 	 * accesses to the page through the secondary MMUs and not
       
    58 	 * only to the ones through the Linux pte.
       
    59 	 */
       
    60 	int (*clear_flush_young)(struct mmu_notifier *mn,
       
    61 				 struct mm_struct *mm,
       
    62 				 unsigned long address);
       
    63 
       
    64 	/*
       
    65 	 * Before this is invoked any secondary MMU is still ok to
       
    66 	 * read/write to the page previously pointed to by the Linux
       
    67 	 * pte because the page hasn't been freed yet and it won't be
       
    68 	 * freed until this returns. If required set_page_dirty has to
       
    69 	 * be called internally to this method.
       
    70 	 */
       
    71 	void (*invalidate_page)(struct mmu_notifier *mn,
       
    72 				struct mm_struct *mm,
       
    73 				unsigned long address);
       
    74 
       
    75 	/*
       
    76 	 * invalidate_range_start() and invalidate_range_end() must be
       
    77 	 * paired and are called only when the mmap_sem and/or the
       
    78 	 * locks protecting the reverse maps are held. The subsystem
       
    79 	 * must guarantee that no additional references are taken to
       
    80 	 * the pages in the range established between the call to
       
    81 	 * invalidate_range_start() and the matching call to
       
    82 	 * invalidate_range_end().
       
    83 	 *
       
    84 	 * Invalidation of multiple concurrent ranges may be
       
    85 	 * optionally permitted by the driver. Either way the
       
    86 	 * establishment of sptes is forbidden in the range passed to
       
    87 	 * invalidate_range_begin/end for the whole duration of the
       
    88 	 * invalidate_range_begin/end critical section.
       
    89 	 *
       
    90 	 * invalidate_range_start() is called when all pages in the
       
    91 	 * range are still mapped and have at least a refcount of one.
       
    92 	 *
       
    93 	 * invalidate_range_end() is called when all pages in the
       
    94 	 * range have been unmapped and the pages have been freed by
       
    95 	 * the VM.
       
    96 	 *
       
    97 	 * The VM will remove the page table entries and potentially
       
    98 	 * the page between invalidate_range_start() and
       
    99 	 * invalidate_range_end(). If the page must not be freed
       
   100 	 * because of pending I/O or other circumstances then the
       
   101 	 * invalidate_range_start() callback (or the initial mapping
       
   102 	 * by the driver) must make sure that the refcount is kept
       
   103 	 * elevated.
       
   104 	 *
       
   105 	 * If the driver increases the refcount when the pages are
       
   106 	 * initially mapped into an address space then either
       
   107 	 * invalidate_range_start() or invalidate_range_end() may
       
   108 	 * decrease the refcount. If the refcount is decreased on
       
   109 	 * invalidate_range_start() then the VM can free pages as page
       
   110 	 * table entries are removed.  If the refcount is only
       
   111 	 * droppped on invalidate_range_end() then the driver itself
       
   112 	 * will drop the last refcount but it must take care to flush
       
   113 	 * any secondary tlb before doing the final free on the
       
   114 	 * page. Pages will no longer be referenced by the linux
       
   115 	 * address space but may still be referenced by sptes until
       
   116 	 * the last refcount is dropped.
       
   117 	 */
       
   118 	void (*invalidate_range_start)(struct mmu_notifier *mn,
       
   119 				       struct mm_struct *mm,
       
   120 				       unsigned long start, unsigned long end);
       
   121 	void (*invalidate_range_end)(struct mmu_notifier *mn,
       
   122 				     struct mm_struct *mm,
       
   123 				     unsigned long start, unsigned long end);
       
   124 };
       
   125 
       
   126 /*
       
   127  * The notifier chains are protected by mmap_sem and/or the reverse map
       
   128  * semaphores. Notifier chains are only changed when all reverse maps and
       
   129  * the mmap_sem locks are taken.
       
   130  *
       
   131  * Therefore notifier chains can only be traversed when either
       
   132  *
       
   133  * 1. mmap_sem is held.
       
   134  * 2. One of the reverse map locks is held (i_mmap_lock or anon_vma->lock).
       
   135  * 3. No other concurrent thread can access the list (release)
       
   136  */
       
   137 struct mmu_notifier {
       
   138 	struct hlist_node hlist;
       
   139 	const struct mmu_notifier_ops *ops;
       
   140 };
       
   141 
       
   142 static inline int mm_has_notifiers(struct mm_struct *mm)
       
   143 {
       
   144 	return unlikely(mm->mmu_notifier_mm);
       
   145 }
       
   146 
       
   147 extern int mmu_notifier_register(struct mmu_notifier *mn,
       
   148 				 struct mm_struct *mm);
       
   149 extern int __mmu_notifier_register(struct mmu_notifier *mn,
       
   150 				   struct mm_struct *mm);
       
   151 extern void mmu_notifier_unregister(struct mmu_notifier *mn,
       
   152 				    struct mm_struct *mm);
       
   153 extern void __mmu_notifier_mm_destroy(struct mm_struct *mm);
       
   154 extern void __mmu_notifier_release(struct mm_struct *mm);
       
   155 extern int __mmu_notifier_clear_flush_young(struct mm_struct *mm,
       
   156 					  unsigned long address);
       
   157 extern void __mmu_notifier_invalidate_page(struct mm_struct *mm,
       
   158 					  unsigned long address);
       
   159 extern void __mmu_notifier_invalidate_range_start(struct mm_struct *mm,
       
   160 				  unsigned long start, unsigned long end);
       
   161 extern void __mmu_notifier_invalidate_range_end(struct mm_struct *mm,
       
   162 				  unsigned long start, unsigned long end);
       
   163 
       
   164 static inline void mmu_notifier_release(struct mm_struct *mm)
       
   165 {
       
   166 	if (mm_has_notifiers(mm))
       
   167 		__mmu_notifier_release(mm);
       
   168 }
       
   169 
       
   170 static inline int mmu_notifier_clear_flush_young(struct mm_struct *mm,
       
   171 					  unsigned long address)
       
   172 {
       
   173 	if (mm_has_notifiers(mm))
       
   174 		return __mmu_notifier_clear_flush_young(mm, address);
       
   175 	return 0;
       
   176 }
       
   177 
       
   178 static inline void mmu_notifier_invalidate_page(struct mm_struct *mm,
       
   179 					  unsigned long address)
       
   180 {
       
   181 	if (mm_has_notifiers(mm))
       
   182 		__mmu_notifier_invalidate_page(mm, address);
       
   183 }
       
   184 
       
   185 static inline void mmu_notifier_invalidate_range_start(struct mm_struct *mm,
       
   186 				  unsigned long start, unsigned long end)
       
   187 {
       
   188 	if (mm_has_notifiers(mm))
       
   189 		__mmu_notifier_invalidate_range_start(mm, start, end);
       
   190 }
       
   191 
       
   192 static inline void mmu_notifier_invalidate_range_end(struct mm_struct *mm,
       
   193 				  unsigned long start, unsigned long end)
       
   194 {
       
   195 	if (mm_has_notifiers(mm))
       
   196 		__mmu_notifier_invalidate_range_end(mm, start, end);
       
   197 }
       
   198 
       
   199 static inline void mmu_notifier_mm_init(struct mm_struct *mm)
       
   200 {
       
   201 	mm->mmu_notifier_mm = NULL;
       
   202 }
       
   203 
       
   204 static inline void mmu_notifier_mm_destroy(struct mm_struct *mm)
       
   205 {
       
   206 	if (mm_has_notifiers(mm))
       
   207 		__mmu_notifier_mm_destroy(mm);
       
   208 }
       
   209 
       
   210 /*
       
   211  * These two macros will sometime replace ptep_clear_flush.
       
   212  * ptep_clear_flush is impleemnted as macro itself, so this also is
       
   213  * implemented as a macro until ptep_clear_flush will converted to an
       
   214  * inline function, to diminish the risk of compilation failure. The
       
   215  * invalidate_page method over time can be moved outside the PT lock
       
   216  * and these two macros can be later removed.
       
   217  */
       
   218 #define ptep_clear_flush_notify(__vma, __address, __ptep)		\
       
   219 ({									\
       
   220 	pte_t __pte;							\
       
   221 	struct vm_area_struct *___vma = __vma;				\
       
   222 	unsigned long ___address = __address;				\
       
   223 	__pte = ptep_clear_flush(___vma, ___address, __ptep);		\
       
   224 	mmu_notifier_invalidate_page(___vma->vm_mm, ___address);	\
       
   225 	__pte;								\
       
   226 })
       
   227 
       
   228 #define ptep_clear_flush_young_notify(__vma, __address, __ptep)		\
       
   229 ({									\
       
   230 	int __young;							\
       
   231 	struct vm_area_struct *___vma = __vma;				\
       
   232 	unsigned long ___address = __address;				\
       
   233 	__young = ptep_clear_flush_young(___vma, ___address, __ptep);	\
       
   234 	__young |= mmu_notifier_clear_flush_young(___vma->vm_mm,	\
       
   235 						  ___address);		\
       
   236 	__young;							\
       
   237 })
       
   238 
       
   239 #else /* CONFIG_MMU_NOTIFIER */
       
   240 
       
   241 static inline void mmu_notifier_release(struct mm_struct *mm)
       
   242 {
       
   243 }
       
   244 
       
   245 static inline int mmu_notifier_clear_flush_young(struct mm_struct *mm,
       
   246 					  unsigned long address)
       
   247 {
       
   248 	return 0;
       
   249 }
       
   250 
       
   251 static inline void mmu_notifier_invalidate_page(struct mm_struct *mm,
       
   252 					  unsigned long address)
       
   253 {
       
   254 }
       
   255 
       
   256 static inline void mmu_notifier_invalidate_range_start(struct mm_struct *mm,
       
   257 				  unsigned long start, unsigned long end)
       
   258 {
       
   259 }
       
   260 
       
   261 static inline void mmu_notifier_invalidate_range_end(struct mm_struct *mm,
       
   262 				  unsigned long start, unsigned long end)
       
   263 {
       
   264 }
       
   265 
       
   266 static inline void mmu_notifier_mm_init(struct mm_struct *mm)
       
   267 {
       
   268 }
       
   269 
       
   270 static inline void mmu_notifier_mm_destroy(struct mm_struct *mm)
       
   271 {
       
   272 }
       
   273 
       
   274 #define ptep_clear_flush_young_notify ptep_clear_flush_young
       
   275 #define ptep_clear_flush_notify ptep_clear_flush
       
   276 
       
   277 #endif /* CONFIG_MMU_NOTIFIER */
       
   278 
       
   279 #endif /* _LINUX_MMU_NOTIFIER_H */