|
1 /* |
|
2 * linux/include/linux/jbd.h |
|
3 * |
|
4 * Written by Stephen C. Tweedie <sct@redhat.com> |
|
5 * |
|
6 * Copyright 1998-2000 Red Hat, Inc --- All Rights Reserved |
|
7 * |
|
8 * This file is part of the Linux kernel and is made available under |
|
9 * the terms of the GNU General Public License, version 2, or at your |
|
10 * option, any later version, incorporated herein by reference. |
|
11 * |
|
12 * Definitions for transaction data structures for the buffer cache |
|
13 * filesystem journaling support. |
|
14 */ |
|
15 |
|
16 #ifndef _LINUX_JBD_H |
|
17 #define _LINUX_JBD_H |
|
18 |
|
19 /* Allow this file to be included directly into e2fsprogs */ |
|
20 #ifndef __KERNEL__ |
|
21 #include "jfs_compat.h" |
|
22 #define JFS_DEBUG |
|
23 #define jfs_debug jbd_debug |
|
24 #else |
|
25 |
|
26 #include <linux/types.h> |
|
27 #include <linux/buffer_head.h> |
|
28 #include <linux/journal-head.h> |
|
29 #include <linux/stddef.h> |
|
30 #include <linux/bit_spinlock.h> |
|
31 #include <linux/mutex.h> |
|
32 #include <linux/timer.h> |
|
33 #include <linux/lockdep.h> |
|
34 |
|
35 #define journal_oom_retry 1 |
|
36 |
|
37 /* |
|
38 * Define JBD_PARANIOD_IOFAIL to cause a kernel BUG() if ext3 finds |
|
39 * certain classes of error which can occur due to failed IOs. Under |
|
40 * normal use we want ext3 to continue after such errors, because |
|
41 * hardware _can_ fail, but for debugging purposes when running tests on |
|
42 * known-good hardware we may want to trap these errors. |
|
43 */ |
|
44 #undef JBD_PARANOID_IOFAIL |
|
45 |
|
46 /* |
|
47 * The default maximum commit age, in seconds. |
|
48 */ |
|
49 #define JBD_DEFAULT_MAX_COMMIT_AGE 5 |
|
50 |
|
51 #ifdef CONFIG_JBD_DEBUG |
|
52 /* |
|
53 * Define JBD_EXPENSIVE_CHECKING to enable more expensive internal |
|
54 * consistency checks. By default we don't do this unless |
|
55 * CONFIG_JBD_DEBUG is on. |
|
56 */ |
|
57 #define JBD_EXPENSIVE_CHECKING |
|
58 extern u8 journal_enable_debug; |
|
59 |
|
60 #define jbd_debug(n, f, a...) \ |
|
61 do { \ |
|
62 if ((n) <= journal_enable_debug) { \ |
|
63 printk (KERN_DEBUG "(%s, %d): %s: ", \ |
|
64 __FILE__, __LINE__, __func__); \ |
|
65 printk (f, ## a); \ |
|
66 } \ |
|
67 } while (0) |
|
68 #else |
|
69 #define jbd_debug(f, a...) /**/ |
|
70 #endif |
|
71 |
|
72 static inline void *jbd_alloc(size_t size, gfp_t flags) |
|
73 { |
|
74 return (void *)__get_free_pages(flags, get_order(size)); |
|
75 } |
|
76 |
|
77 static inline void jbd_free(void *ptr, size_t size) |
|
78 { |
|
79 free_pages((unsigned long)ptr, get_order(size)); |
|
80 }; |
|
81 |
|
82 #define JFS_MIN_JOURNAL_BLOCKS 1024 |
|
83 |
|
84 |
|
85 /** |
|
86 * typedef handle_t - The handle_t type represents a single atomic update being performed by some process. |
|
87 * |
|
88 * All filesystem modifications made by the process go |
|
89 * through this handle. Recursive operations (such as quota operations) |
|
90 * are gathered into a single update. |
|
91 * |
|
92 * The buffer credits field is used to account for journaled buffers |
|
93 * being modified by the running process. To ensure that there is |
|
94 * enough log space for all outstanding operations, we need to limit the |
|
95 * number of outstanding buffers possible at any time. When the |
|
96 * operation completes, any buffer credits not used are credited back to |
|
97 * the transaction, so that at all times we know how many buffers the |
|
98 * outstanding updates on a transaction might possibly touch. |
|
99 * |
|
100 * This is an opaque datatype. |
|
101 **/ |
|
102 typedef struct handle_s handle_t; /* Atomic operation type */ |
|
103 |
|
104 |
|
105 /** |
|
106 * typedef journal_t - The journal_t maintains all of the journaling state information for a single filesystem. |
|
107 * |
|
108 * journal_t is linked to from the fs superblock structure. |
|
109 * |
|
110 * We use the journal_t to keep track of all outstanding transaction |
|
111 * activity on the filesystem, and to manage the state of the log |
|
112 * writing process. |
|
113 * |
|
114 * This is an opaque datatype. |
|
115 **/ |
|
116 typedef struct journal_s journal_t; /* Journal control structure */ |
|
117 #endif |
|
118 |
|
119 /* |
|
120 * Internal structures used by the logging mechanism: |
|
121 */ |
|
122 |
|
123 #define JFS_MAGIC_NUMBER 0xc03b3998U /* The first 4 bytes of /dev/random! */ |
|
124 |
|
125 /* |
|
126 * On-disk structures |
|
127 */ |
|
128 |
|
129 /* |
|
130 * Descriptor block types: |
|
131 */ |
|
132 |
|
133 #define JFS_DESCRIPTOR_BLOCK 1 |
|
134 #define JFS_COMMIT_BLOCK 2 |
|
135 #define JFS_SUPERBLOCK_V1 3 |
|
136 #define JFS_SUPERBLOCK_V2 4 |
|
137 #define JFS_REVOKE_BLOCK 5 |
|
138 |
|
139 /* |
|
140 * Standard header for all descriptor blocks: |
|
141 */ |
|
142 typedef struct journal_header_s |
|
143 { |
|
144 __be32 h_magic; |
|
145 __be32 h_blocktype; |
|
146 __be32 h_sequence; |
|
147 } journal_header_t; |
|
148 |
|
149 |
|
150 /* |
|
151 * The block tag: used to describe a single buffer in the journal |
|
152 */ |
|
153 typedef struct journal_block_tag_s |
|
154 { |
|
155 __be32 t_blocknr; /* The on-disk block number */ |
|
156 __be32 t_flags; /* See below */ |
|
157 } journal_block_tag_t; |
|
158 |
|
159 /* |
|
160 * The revoke descriptor: used on disk to describe a series of blocks to |
|
161 * be revoked from the log |
|
162 */ |
|
163 typedef struct journal_revoke_header_s |
|
164 { |
|
165 journal_header_t r_header; |
|
166 __be32 r_count; /* Count of bytes used in the block */ |
|
167 } journal_revoke_header_t; |
|
168 |
|
169 |
|
170 /* Definitions for the journal tag flags word: */ |
|
171 #define JFS_FLAG_ESCAPE 1 /* on-disk block is escaped */ |
|
172 #define JFS_FLAG_SAME_UUID 2 /* block has same uuid as previous */ |
|
173 #define JFS_FLAG_DELETED 4 /* block deleted by this transaction */ |
|
174 #define JFS_FLAG_LAST_TAG 8 /* last tag in this descriptor block */ |
|
175 |
|
176 |
|
177 /* |
|
178 * The journal superblock. All fields are in big-endian byte order. |
|
179 */ |
|
180 typedef struct journal_superblock_s |
|
181 { |
|
182 /* 0x0000 */ |
|
183 journal_header_t s_header; |
|
184 |
|
185 /* 0x000C */ |
|
186 /* Static information describing the journal */ |
|
187 __be32 s_blocksize; /* journal device blocksize */ |
|
188 __be32 s_maxlen; /* total blocks in journal file */ |
|
189 __be32 s_first; /* first block of log information */ |
|
190 |
|
191 /* 0x0018 */ |
|
192 /* Dynamic information describing the current state of the log */ |
|
193 __be32 s_sequence; /* first commit ID expected in log */ |
|
194 __be32 s_start; /* blocknr of start of log */ |
|
195 |
|
196 /* 0x0020 */ |
|
197 /* Error value, as set by journal_abort(). */ |
|
198 __be32 s_errno; |
|
199 |
|
200 /* 0x0024 */ |
|
201 /* Remaining fields are only valid in a version-2 superblock */ |
|
202 __be32 s_feature_compat; /* compatible feature set */ |
|
203 __be32 s_feature_incompat; /* incompatible feature set */ |
|
204 __be32 s_feature_ro_compat; /* readonly-compatible feature set */ |
|
205 /* 0x0030 */ |
|
206 __u8 s_uuid[16]; /* 128-bit uuid for journal */ |
|
207 |
|
208 /* 0x0040 */ |
|
209 __be32 s_nr_users; /* Nr of filesystems sharing log */ |
|
210 |
|
211 __be32 s_dynsuper; /* Blocknr of dynamic superblock copy*/ |
|
212 |
|
213 /* 0x0048 */ |
|
214 __be32 s_max_transaction; /* Limit of journal blocks per trans.*/ |
|
215 __be32 s_max_trans_data; /* Limit of data blocks per trans. */ |
|
216 |
|
217 /* 0x0050 */ |
|
218 __u32 s_padding[44]; |
|
219 |
|
220 /* 0x0100 */ |
|
221 __u8 s_users[16*48]; /* ids of all fs'es sharing the log */ |
|
222 /* 0x0400 */ |
|
223 } journal_superblock_t; |
|
224 |
|
225 #define JFS_HAS_COMPAT_FEATURE(j,mask) \ |
|
226 ((j)->j_format_version >= 2 && \ |
|
227 ((j)->j_superblock->s_feature_compat & cpu_to_be32((mask)))) |
|
228 #define JFS_HAS_RO_COMPAT_FEATURE(j,mask) \ |
|
229 ((j)->j_format_version >= 2 && \ |
|
230 ((j)->j_superblock->s_feature_ro_compat & cpu_to_be32((mask)))) |
|
231 #define JFS_HAS_INCOMPAT_FEATURE(j,mask) \ |
|
232 ((j)->j_format_version >= 2 && \ |
|
233 ((j)->j_superblock->s_feature_incompat & cpu_to_be32((mask)))) |
|
234 |
|
235 #define JFS_FEATURE_INCOMPAT_REVOKE 0x00000001 |
|
236 |
|
237 /* Features known to this kernel version: */ |
|
238 #define JFS_KNOWN_COMPAT_FEATURES 0 |
|
239 #define JFS_KNOWN_ROCOMPAT_FEATURES 0 |
|
240 #define JFS_KNOWN_INCOMPAT_FEATURES JFS_FEATURE_INCOMPAT_REVOKE |
|
241 |
|
242 #ifdef __KERNEL__ |
|
243 |
|
244 #include <linux/fs.h> |
|
245 #include <linux/sched.h> |
|
246 |
|
247 #define J_ASSERT(assert) BUG_ON(!(assert)) |
|
248 |
|
249 #if defined(CONFIG_BUFFER_DEBUG) |
|
250 void buffer_assertion_failure(struct buffer_head *bh); |
|
251 #define J_ASSERT_BH(bh, expr) \ |
|
252 do { \ |
|
253 if (!(expr)) \ |
|
254 buffer_assertion_failure(bh); \ |
|
255 J_ASSERT(expr); \ |
|
256 } while (0) |
|
257 #define J_ASSERT_JH(jh, expr) J_ASSERT_BH(jh2bh(jh), expr) |
|
258 #else |
|
259 #define J_ASSERT_BH(bh, expr) J_ASSERT(expr) |
|
260 #define J_ASSERT_JH(jh, expr) J_ASSERT(expr) |
|
261 #endif |
|
262 |
|
263 #if defined(JBD_PARANOID_IOFAIL) |
|
264 #define J_EXPECT(expr, why...) J_ASSERT(expr) |
|
265 #define J_EXPECT_BH(bh, expr, why...) J_ASSERT_BH(bh, expr) |
|
266 #define J_EXPECT_JH(jh, expr, why...) J_ASSERT_JH(jh, expr) |
|
267 #else |
|
268 #define __journal_expect(expr, why...) \ |
|
269 ({ \ |
|
270 int val = (expr); \ |
|
271 if (!val) { \ |
|
272 printk(KERN_ERR \ |
|
273 "EXT3-fs unexpected failure: %s;\n",# expr); \ |
|
274 printk(KERN_ERR why "\n"); \ |
|
275 } \ |
|
276 val; \ |
|
277 }) |
|
278 #define J_EXPECT(expr, why...) __journal_expect(expr, ## why) |
|
279 #define J_EXPECT_BH(bh, expr, why...) __journal_expect(expr, ## why) |
|
280 #define J_EXPECT_JH(jh, expr, why...) __journal_expect(expr, ## why) |
|
281 #endif |
|
282 |
|
283 enum jbd_state_bits { |
|
284 BH_JBD /* Has an attached ext3 journal_head */ |
|
285 = BH_PrivateStart, |
|
286 BH_JWrite, /* Being written to log (@@@ DEBUGGING) */ |
|
287 BH_Freed, /* Has been freed (truncated) */ |
|
288 BH_Revoked, /* Has been revoked from the log */ |
|
289 BH_RevokeValid, /* Revoked flag is valid */ |
|
290 BH_JBDDirty, /* Is dirty but journaled */ |
|
291 BH_State, /* Pins most journal_head state */ |
|
292 BH_JournalHead, /* Pins bh->b_private and jh->b_bh */ |
|
293 BH_Unshadow, /* Dummy bit, for BJ_Shadow wakeup filtering */ |
|
294 }; |
|
295 |
|
296 BUFFER_FNS(JBD, jbd) |
|
297 BUFFER_FNS(JWrite, jwrite) |
|
298 BUFFER_FNS(JBDDirty, jbddirty) |
|
299 TAS_BUFFER_FNS(JBDDirty, jbddirty) |
|
300 BUFFER_FNS(Revoked, revoked) |
|
301 TAS_BUFFER_FNS(Revoked, revoked) |
|
302 BUFFER_FNS(RevokeValid, revokevalid) |
|
303 TAS_BUFFER_FNS(RevokeValid, revokevalid) |
|
304 BUFFER_FNS(Freed, freed) |
|
305 |
|
306 static inline struct buffer_head *jh2bh(struct journal_head *jh) |
|
307 { |
|
308 return jh->b_bh; |
|
309 } |
|
310 |
|
311 static inline struct journal_head *bh2jh(struct buffer_head *bh) |
|
312 { |
|
313 return bh->b_private; |
|
314 } |
|
315 |
|
316 static inline void jbd_lock_bh_state(struct buffer_head *bh) |
|
317 { |
|
318 bit_spin_lock(BH_State, &bh->b_state); |
|
319 } |
|
320 |
|
321 static inline int jbd_trylock_bh_state(struct buffer_head *bh) |
|
322 { |
|
323 return bit_spin_trylock(BH_State, &bh->b_state); |
|
324 } |
|
325 |
|
326 static inline int jbd_is_locked_bh_state(struct buffer_head *bh) |
|
327 { |
|
328 return bit_spin_is_locked(BH_State, &bh->b_state); |
|
329 } |
|
330 |
|
331 static inline void jbd_unlock_bh_state(struct buffer_head *bh) |
|
332 { |
|
333 bit_spin_unlock(BH_State, &bh->b_state); |
|
334 } |
|
335 |
|
336 static inline void jbd_lock_bh_journal_head(struct buffer_head *bh) |
|
337 { |
|
338 bit_spin_lock(BH_JournalHead, &bh->b_state); |
|
339 } |
|
340 |
|
341 static inline void jbd_unlock_bh_journal_head(struct buffer_head *bh) |
|
342 { |
|
343 bit_spin_unlock(BH_JournalHead, &bh->b_state); |
|
344 } |
|
345 |
|
346 struct jbd_revoke_table_s; |
|
347 |
|
348 /** |
|
349 * struct handle_s - this is the concrete type associated with handle_t. |
|
350 * @h_transaction: Which compound transaction is this update a part of? |
|
351 * @h_buffer_credits: Number of remaining buffers we are allowed to dirty. |
|
352 * @h_ref: Reference count on this handle |
|
353 * @h_err: Field for caller's use to track errors through large fs operations |
|
354 * @h_sync: flag for sync-on-close |
|
355 * @h_jdata: flag to force data journaling |
|
356 * @h_aborted: flag indicating fatal error on handle |
|
357 * @h_lockdep_map: lockdep info for debugging lock problems |
|
358 */ |
|
359 struct handle_s |
|
360 { |
|
361 /* Which compound transaction is this update a part of? */ |
|
362 transaction_t *h_transaction; |
|
363 |
|
364 /* Number of remaining buffers we are allowed to dirty: */ |
|
365 int h_buffer_credits; |
|
366 |
|
367 /* Reference count on this handle */ |
|
368 int h_ref; |
|
369 |
|
370 /* Field for caller's use to track errors through large fs */ |
|
371 /* operations */ |
|
372 int h_err; |
|
373 |
|
374 /* Flags [no locking] */ |
|
375 unsigned int h_sync: 1; /* sync-on-close */ |
|
376 unsigned int h_jdata: 1; /* force data journaling */ |
|
377 unsigned int h_aborted: 1; /* fatal error on handle */ |
|
378 |
|
379 #ifdef CONFIG_DEBUG_LOCK_ALLOC |
|
380 struct lockdep_map h_lockdep_map; |
|
381 #endif |
|
382 }; |
|
383 |
|
384 |
|
385 /* The transaction_t type is the guts of the journaling mechanism. It |
|
386 * tracks a compound transaction through its various states: |
|
387 * |
|
388 * RUNNING: accepting new updates |
|
389 * LOCKED: Updates still running but we don't accept new ones |
|
390 * RUNDOWN: Updates are tidying up but have finished requesting |
|
391 * new buffers to modify (state not used for now) |
|
392 * FLUSH: All updates complete, but we are still writing to disk |
|
393 * COMMIT: All data on disk, writing commit record |
|
394 * FINISHED: We still have to keep the transaction for checkpointing. |
|
395 * |
|
396 * The transaction keeps track of all of the buffers modified by a |
|
397 * running transaction, and all of the buffers committed but not yet |
|
398 * flushed to home for finished transactions. |
|
399 */ |
|
400 |
|
401 /* |
|
402 * Lock ranking: |
|
403 * |
|
404 * j_list_lock |
|
405 * ->jbd_lock_bh_journal_head() (This is "innermost") |
|
406 * |
|
407 * j_state_lock |
|
408 * ->jbd_lock_bh_state() |
|
409 * |
|
410 * jbd_lock_bh_state() |
|
411 * ->j_list_lock |
|
412 * |
|
413 * j_state_lock |
|
414 * ->t_handle_lock |
|
415 * |
|
416 * j_state_lock |
|
417 * ->j_list_lock (journal_unmap_buffer) |
|
418 * |
|
419 */ |
|
420 |
|
421 struct transaction_s |
|
422 { |
|
423 /* Pointer to the journal for this transaction. [no locking] */ |
|
424 journal_t *t_journal; |
|
425 |
|
426 /* Sequence number for this transaction [no locking] */ |
|
427 tid_t t_tid; |
|
428 |
|
429 /* |
|
430 * Transaction's current state |
|
431 * [no locking - only kjournald alters this] |
|
432 * [j_list_lock] guards transition of a transaction into T_FINISHED |
|
433 * state and subsequent call of __journal_drop_transaction() |
|
434 * FIXME: needs barriers |
|
435 * KLUDGE: [use j_state_lock] |
|
436 */ |
|
437 enum { |
|
438 T_RUNNING, |
|
439 T_LOCKED, |
|
440 T_RUNDOWN, |
|
441 T_FLUSH, |
|
442 T_COMMIT, |
|
443 T_FINISHED |
|
444 } t_state; |
|
445 |
|
446 /* |
|
447 * Where in the log does this transaction's commit start? [no locking] |
|
448 */ |
|
449 unsigned long t_log_start; |
|
450 |
|
451 /* Number of buffers on the t_buffers list [j_list_lock] */ |
|
452 int t_nr_buffers; |
|
453 |
|
454 /* |
|
455 * Doubly-linked circular list of all buffers reserved but not yet |
|
456 * modified by this transaction [j_list_lock] |
|
457 */ |
|
458 struct journal_head *t_reserved_list; |
|
459 |
|
460 /* |
|
461 * Doubly-linked circular list of all buffers under writeout during |
|
462 * commit [j_list_lock] |
|
463 */ |
|
464 struct journal_head *t_locked_list; |
|
465 |
|
466 /* |
|
467 * Doubly-linked circular list of all metadata buffers owned by this |
|
468 * transaction [j_list_lock] |
|
469 */ |
|
470 struct journal_head *t_buffers; |
|
471 |
|
472 /* |
|
473 * Doubly-linked circular list of all data buffers still to be |
|
474 * flushed before this transaction can be committed [j_list_lock] |
|
475 */ |
|
476 struct journal_head *t_sync_datalist; |
|
477 |
|
478 /* |
|
479 * Doubly-linked circular list of all forget buffers (superseded |
|
480 * buffers which we can un-checkpoint once this transaction commits) |
|
481 * [j_list_lock] |
|
482 */ |
|
483 struct journal_head *t_forget; |
|
484 |
|
485 /* |
|
486 * Doubly-linked circular list of all buffers still to be flushed before |
|
487 * this transaction can be checkpointed. [j_list_lock] |
|
488 */ |
|
489 struct journal_head *t_checkpoint_list; |
|
490 |
|
491 /* |
|
492 * Doubly-linked circular list of all buffers submitted for IO while |
|
493 * checkpointing. [j_list_lock] |
|
494 */ |
|
495 struct journal_head *t_checkpoint_io_list; |
|
496 |
|
497 /* |
|
498 * Doubly-linked circular list of temporary buffers currently undergoing |
|
499 * IO in the log [j_list_lock] |
|
500 */ |
|
501 struct journal_head *t_iobuf_list; |
|
502 |
|
503 /* |
|
504 * Doubly-linked circular list of metadata buffers being shadowed by log |
|
505 * IO. The IO buffers on the iobuf list and the shadow buffers on this |
|
506 * list match each other one for one at all times. [j_list_lock] |
|
507 */ |
|
508 struct journal_head *t_shadow_list; |
|
509 |
|
510 /* |
|
511 * Doubly-linked circular list of control buffers being written to the |
|
512 * log. [j_list_lock] |
|
513 */ |
|
514 struct journal_head *t_log_list; |
|
515 |
|
516 /* |
|
517 * Protects info related to handles |
|
518 */ |
|
519 spinlock_t t_handle_lock; |
|
520 |
|
521 /* |
|
522 * Number of outstanding updates running on this transaction |
|
523 * [t_handle_lock] |
|
524 */ |
|
525 int t_updates; |
|
526 |
|
527 /* |
|
528 * Number of buffers reserved for use by all handles in this transaction |
|
529 * handle but not yet modified. [t_handle_lock] |
|
530 */ |
|
531 int t_outstanding_credits; |
|
532 |
|
533 /* |
|
534 * Forward and backward links for the circular list of all transactions |
|
535 * awaiting checkpoint. [j_list_lock] |
|
536 */ |
|
537 transaction_t *t_cpnext, *t_cpprev; |
|
538 |
|
539 /* |
|
540 * When will the transaction expire (become due for commit), in jiffies? |
|
541 * [no locking] |
|
542 */ |
|
543 unsigned long t_expires; |
|
544 |
|
545 /* |
|
546 * How many handles used this transaction? [t_handle_lock] |
|
547 */ |
|
548 int t_handle_count; |
|
549 |
|
550 }; |
|
551 |
|
552 /** |
|
553 * struct journal_s - this is the concrete type associated with journal_t. |
|
554 * @j_flags: General journaling state flags |
|
555 * @j_errno: Is there an outstanding uncleared error on the journal (from a |
|
556 * prior abort)? |
|
557 * @j_sb_buffer: First part of superblock buffer |
|
558 * @j_superblock: Second part of superblock buffer |
|
559 * @j_format_version: Version of the superblock format |
|
560 * @j_state_lock: Protect the various scalars in the journal |
|
561 * @j_barrier_count: Number of processes waiting to create a barrier lock |
|
562 * @j_barrier: The barrier lock itself |
|
563 * @j_running_transaction: The current running transaction.. |
|
564 * @j_committing_transaction: the transaction we are pushing to disk |
|
565 * @j_checkpoint_transactions: a linked circular list of all transactions |
|
566 * waiting for checkpointing |
|
567 * @j_wait_transaction_locked: Wait queue for waiting for a locked transaction |
|
568 * to start committing, or for a barrier lock to be released |
|
569 * @j_wait_logspace: Wait queue for waiting for checkpointing to complete |
|
570 * @j_wait_done_commit: Wait queue for waiting for commit to complete |
|
571 * @j_wait_checkpoint: Wait queue to trigger checkpointing |
|
572 * @j_wait_commit: Wait queue to trigger commit |
|
573 * @j_wait_updates: Wait queue to wait for updates to complete |
|
574 * @j_checkpoint_mutex: Mutex for locking against concurrent checkpoints |
|
575 * @j_head: Journal head - identifies the first unused block in the journal |
|
576 * @j_tail: Journal tail - identifies the oldest still-used block in the |
|
577 * journal. |
|
578 * @j_free: Journal free - how many free blocks are there in the journal? |
|
579 * @j_first: The block number of the first usable block |
|
580 * @j_last: The block number one beyond the last usable block |
|
581 * @j_dev: Device where we store the journal |
|
582 * @j_blocksize: blocksize for the location where we store the journal. |
|
583 * @j_blk_offset: starting block offset for into the device where we store the |
|
584 * journal |
|
585 * @j_fs_dev: Device which holds the client fs. For internal journal this will |
|
586 * be equal to j_dev |
|
587 * @j_maxlen: Total maximum capacity of the journal region on disk. |
|
588 * @j_list_lock: Protects the buffer lists and internal buffer state. |
|
589 * @j_inode: Optional inode where we store the journal. If present, all journal |
|
590 * block numbers are mapped into this inode via bmap(). |
|
591 * @j_tail_sequence: Sequence number of the oldest transaction in the log |
|
592 * @j_transaction_sequence: Sequence number of the next transaction to grant |
|
593 * @j_commit_sequence: Sequence number of the most recently committed |
|
594 * transaction |
|
595 * @j_commit_request: Sequence number of the most recent transaction wanting |
|
596 * commit |
|
597 * @j_uuid: Uuid of client object. |
|
598 * @j_task: Pointer to the current commit thread for this journal |
|
599 * @j_max_transaction_buffers: Maximum number of metadata buffers to allow in a |
|
600 * single compound commit transaction |
|
601 * @j_commit_interval: What is the maximum transaction lifetime before we begin |
|
602 * a commit? |
|
603 * @j_commit_timer: The timer used to wakeup the commit thread |
|
604 * @j_revoke_lock: Protect the revoke table |
|
605 * @j_revoke: The revoke table - maintains the list of revoked blocks in the |
|
606 * current transaction. |
|
607 * @j_revoke_table: alternate revoke tables for j_revoke |
|
608 * @j_wbuf: array of buffer_heads for journal_commit_transaction |
|
609 * @j_wbufsize: maximum number of buffer_heads allowed in j_wbuf, the |
|
610 * number that will fit in j_blocksize |
|
611 * @j_last_sync_writer: most recent pid which did a synchronous write |
|
612 * @j_private: An opaque pointer to fs-private information. |
|
613 */ |
|
614 |
|
615 struct journal_s |
|
616 { |
|
617 /* General journaling state flags [j_state_lock] */ |
|
618 unsigned long j_flags; |
|
619 |
|
620 /* |
|
621 * Is there an outstanding uncleared error on the journal (from a prior |
|
622 * abort)? [j_state_lock] |
|
623 */ |
|
624 int j_errno; |
|
625 |
|
626 /* The superblock buffer */ |
|
627 struct buffer_head *j_sb_buffer; |
|
628 journal_superblock_t *j_superblock; |
|
629 |
|
630 /* Version of the superblock format */ |
|
631 int j_format_version; |
|
632 |
|
633 /* |
|
634 * Protect the various scalars in the journal |
|
635 */ |
|
636 spinlock_t j_state_lock; |
|
637 |
|
638 /* |
|
639 * Number of processes waiting to create a barrier lock [j_state_lock] |
|
640 */ |
|
641 int j_barrier_count; |
|
642 |
|
643 /* The barrier lock itself */ |
|
644 struct mutex j_barrier; |
|
645 |
|
646 /* |
|
647 * Transactions: The current running transaction... |
|
648 * [j_state_lock] [caller holding open handle] |
|
649 */ |
|
650 transaction_t *j_running_transaction; |
|
651 |
|
652 /* |
|
653 * the transaction we are pushing to disk |
|
654 * [j_state_lock] [caller holding open handle] |
|
655 */ |
|
656 transaction_t *j_committing_transaction; |
|
657 |
|
658 /* |
|
659 * ... and a linked circular list of all transactions waiting for |
|
660 * checkpointing. [j_list_lock] |
|
661 */ |
|
662 transaction_t *j_checkpoint_transactions; |
|
663 |
|
664 /* |
|
665 * Wait queue for waiting for a locked transaction to start committing, |
|
666 * or for a barrier lock to be released |
|
667 */ |
|
668 wait_queue_head_t j_wait_transaction_locked; |
|
669 |
|
670 /* Wait queue for waiting for checkpointing to complete */ |
|
671 wait_queue_head_t j_wait_logspace; |
|
672 |
|
673 /* Wait queue for waiting for commit to complete */ |
|
674 wait_queue_head_t j_wait_done_commit; |
|
675 |
|
676 /* Wait queue to trigger checkpointing */ |
|
677 wait_queue_head_t j_wait_checkpoint; |
|
678 |
|
679 /* Wait queue to trigger commit */ |
|
680 wait_queue_head_t j_wait_commit; |
|
681 |
|
682 /* Wait queue to wait for updates to complete */ |
|
683 wait_queue_head_t j_wait_updates; |
|
684 |
|
685 /* Semaphore for locking against concurrent checkpoints */ |
|
686 struct mutex j_checkpoint_mutex; |
|
687 |
|
688 /* |
|
689 * Journal head: identifies the first unused block in the journal. |
|
690 * [j_state_lock] |
|
691 */ |
|
692 unsigned long j_head; |
|
693 |
|
694 /* |
|
695 * Journal tail: identifies the oldest still-used block in the journal. |
|
696 * [j_state_lock] |
|
697 */ |
|
698 unsigned long j_tail; |
|
699 |
|
700 /* |
|
701 * Journal free: how many free blocks are there in the journal? |
|
702 * [j_state_lock] |
|
703 */ |
|
704 unsigned long j_free; |
|
705 |
|
706 /* |
|
707 * Journal start and end: the block numbers of the first usable block |
|
708 * and one beyond the last usable block in the journal. [j_state_lock] |
|
709 */ |
|
710 unsigned long j_first; |
|
711 unsigned long j_last; |
|
712 |
|
713 /* |
|
714 * Device, blocksize and starting block offset for the location where we |
|
715 * store the journal. |
|
716 */ |
|
717 struct block_device *j_dev; |
|
718 int j_blocksize; |
|
719 unsigned long j_blk_offset; |
|
720 |
|
721 /* |
|
722 * Device which holds the client fs. For internal journal this will be |
|
723 * equal to j_dev. |
|
724 */ |
|
725 struct block_device *j_fs_dev; |
|
726 |
|
727 /* Total maximum capacity of the journal region on disk. */ |
|
728 unsigned int j_maxlen; |
|
729 |
|
730 /* |
|
731 * Protects the buffer lists and internal buffer state. |
|
732 */ |
|
733 spinlock_t j_list_lock; |
|
734 |
|
735 /* Optional inode where we store the journal. If present, all */ |
|
736 /* journal block numbers are mapped into this inode via */ |
|
737 /* bmap(). */ |
|
738 struct inode *j_inode; |
|
739 |
|
740 /* |
|
741 * Sequence number of the oldest transaction in the log [j_state_lock] |
|
742 */ |
|
743 tid_t j_tail_sequence; |
|
744 |
|
745 /* |
|
746 * Sequence number of the next transaction to grant [j_state_lock] |
|
747 */ |
|
748 tid_t j_transaction_sequence; |
|
749 |
|
750 /* |
|
751 * Sequence number of the most recently committed transaction |
|
752 * [j_state_lock]. |
|
753 */ |
|
754 tid_t j_commit_sequence; |
|
755 |
|
756 /* |
|
757 * Sequence number of the most recent transaction wanting commit |
|
758 * [j_state_lock] |
|
759 */ |
|
760 tid_t j_commit_request; |
|
761 |
|
762 /* |
|
763 * Journal uuid: identifies the object (filesystem, LVM volume etc) |
|
764 * backed by this journal. This will eventually be replaced by an array |
|
765 * of uuids, allowing us to index multiple devices within a single |
|
766 * journal and to perform atomic updates across them. |
|
767 */ |
|
768 __u8 j_uuid[16]; |
|
769 |
|
770 /* Pointer to the current commit thread for this journal */ |
|
771 struct task_struct *j_task; |
|
772 |
|
773 /* |
|
774 * Maximum number of metadata buffers to allow in a single compound |
|
775 * commit transaction |
|
776 */ |
|
777 int j_max_transaction_buffers; |
|
778 |
|
779 /* |
|
780 * What is the maximum transaction lifetime before we begin a commit? |
|
781 */ |
|
782 unsigned long j_commit_interval; |
|
783 |
|
784 /* The timer used to wakeup the commit thread: */ |
|
785 struct timer_list j_commit_timer; |
|
786 |
|
787 /* |
|
788 * The revoke table: maintains the list of revoked blocks in the |
|
789 * current transaction. [j_revoke_lock] |
|
790 */ |
|
791 spinlock_t j_revoke_lock; |
|
792 struct jbd_revoke_table_s *j_revoke; |
|
793 struct jbd_revoke_table_s *j_revoke_table[2]; |
|
794 |
|
795 /* |
|
796 * array of bhs for journal_commit_transaction |
|
797 */ |
|
798 struct buffer_head **j_wbuf; |
|
799 int j_wbufsize; |
|
800 |
|
801 pid_t j_last_sync_writer; |
|
802 |
|
803 /* |
|
804 * An opaque pointer to fs-private information. ext3 puts its |
|
805 * superblock pointer here |
|
806 */ |
|
807 void *j_private; |
|
808 }; |
|
809 |
|
810 /* |
|
811 * Journal flag definitions |
|
812 */ |
|
813 #define JFS_UNMOUNT 0x001 /* Journal thread is being destroyed */ |
|
814 #define JFS_ABORT 0x002 /* Journaling has been aborted for errors. */ |
|
815 #define JFS_ACK_ERR 0x004 /* The errno in the sb has been acked */ |
|
816 #define JFS_FLUSHED 0x008 /* The journal superblock has been flushed */ |
|
817 #define JFS_LOADED 0x010 /* The journal superblock has been loaded */ |
|
818 #define JFS_BARRIER 0x020 /* Use IDE barriers */ |
|
819 #define JFS_ABORT_ON_SYNCDATA_ERR 0x040 /* Abort the journal on file |
|
820 * data write error in ordered |
|
821 * mode */ |
|
822 |
|
823 /* |
|
824 * Function declarations for the journaling transaction and buffer |
|
825 * management |
|
826 */ |
|
827 |
|
828 /* Filing buffers */ |
|
829 extern void journal_unfile_buffer(journal_t *, struct journal_head *); |
|
830 extern void __journal_unfile_buffer(struct journal_head *); |
|
831 extern void __journal_refile_buffer(struct journal_head *); |
|
832 extern void journal_refile_buffer(journal_t *, struct journal_head *); |
|
833 extern void __journal_file_buffer(struct journal_head *, transaction_t *, int); |
|
834 extern void __journal_free_buffer(struct journal_head *bh); |
|
835 extern void journal_file_buffer(struct journal_head *, transaction_t *, int); |
|
836 extern void __journal_clean_data_list(transaction_t *transaction); |
|
837 |
|
838 /* Log buffer allocation */ |
|
839 extern struct journal_head * journal_get_descriptor_buffer(journal_t *); |
|
840 int journal_next_log_block(journal_t *, unsigned long *); |
|
841 |
|
842 /* Commit management */ |
|
843 extern void journal_commit_transaction(journal_t *); |
|
844 |
|
845 /* Checkpoint list management */ |
|
846 int __journal_clean_checkpoint_list(journal_t *journal); |
|
847 int __journal_remove_checkpoint(struct journal_head *); |
|
848 void __journal_insert_checkpoint(struct journal_head *, transaction_t *); |
|
849 |
|
850 /* Buffer IO */ |
|
851 extern int |
|
852 journal_write_metadata_buffer(transaction_t *transaction, |
|
853 struct journal_head *jh_in, |
|
854 struct journal_head **jh_out, |
|
855 unsigned long blocknr); |
|
856 |
|
857 /* Transaction locking */ |
|
858 extern void __wait_on_journal (journal_t *); |
|
859 |
|
860 /* |
|
861 * Journal locking. |
|
862 * |
|
863 * We need to lock the journal during transaction state changes so that nobody |
|
864 * ever tries to take a handle on the running transaction while we are in the |
|
865 * middle of moving it to the commit phase. j_state_lock does this. |
|
866 * |
|
867 * Note that the locking is completely interrupt unsafe. We never touch |
|
868 * journal structures from interrupts. |
|
869 */ |
|
870 |
|
871 static inline handle_t *journal_current_handle(void) |
|
872 { |
|
873 return current->journal_info; |
|
874 } |
|
875 |
|
876 /* The journaling code user interface: |
|
877 * |
|
878 * Create and destroy handles |
|
879 * Register buffer modifications against the current transaction. |
|
880 */ |
|
881 |
|
882 extern handle_t *journal_start(journal_t *, int nblocks); |
|
883 extern int journal_restart (handle_t *, int nblocks); |
|
884 extern int journal_extend (handle_t *, int nblocks); |
|
885 extern int journal_get_write_access(handle_t *, struct buffer_head *); |
|
886 extern int journal_get_create_access (handle_t *, struct buffer_head *); |
|
887 extern int journal_get_undo_access(handle_t *, struct buffer_head *); |
|
888 extern int journal_dirty_data (handle_t *, struct buffer_head *); |
|
889 extern int journal_dirty_metadata (handle_t *, struct buffer_head *); |
|
890 extern void journal_release_buffer (handle_t *, struct buffer_head *); |
|
891 extern int journal_forget (handle_t *, struct buffer_head *); |
|
892 extern void journal_sync_buffer (struct buffer_head *); |
|
893 extern void journal_invalidatepage(journal_t *, |
|
894 struct page *, unsigned long); |
|
895 extern int journal_try_to_free_buffers(journal_t *, struct page *, gfp_t); |
|
896 extern int journal_stop(handle_t *); |
|
897 extern int journal_flush (journal_t *); |
|
898 extern void journal_lock_updates (journal_t *); |
|
899 extern void journal_unlock_updates (journal_t *); |
|
900 |
|
901 extern journal_t * journal_init_dev(struct block_device *bdev, |
|
902 struct block_device *fs_dev, |
|
903 int start, int len, int bsize); |
|
904 extern journal_t * journal_init_inode (struct inode *); |
|
905 extern int journal_update_format (journal_t *); |
|
906 extern int journal_check_used_features |
|
907 (journal_t *, unsigned long, unsigned long, unsigned long); |
|
908 extern int journal_check_available_features |
|
909 (journal_t *, unsigned long, unsigned long, unsigned long); |
|
910 extern int journal_set_features |
|
911 (journal_t *, unsigned long, unsigned long, unsigned long); |
|
912 extern int journal_create (journal_t *); |
|
913 extern int journal_load (journal_t *journal); |
|
914 extern int journal_destroy (journal_t *); |
|
915 extern int journal_recover (journal_t *journal); |
|
916 extern int journal_wipe (journal_t *, int); |
|
917 extern int journal_skip_recovery (journal_t *); |
|
918 extern void journal_update_superblock (journal_t *, int); |
|
919 extern void journal_abort (journal_t *, int); |
|
920 extern int journal_errno (journal_t *); |
|
921 extern void journal_ack_err (journal_t *); |
|
922 extern int journal_clear_err (journal_t *); |
|
923 extern int journal_bmap(journal_t *, unsigned long, unsigned long *); |
|
924 extern int journal_force_commit(journal_t *); |
|
925 |
|
926 /* |
|
927 * journal_head management |
|
928 */ |
|
929 struct journal_head *journal_add_journal_head(struct buffer_head *bh); |
|
930 struct journal_head *journal_grab_journal_head(struct buffer_head *bh); |
|
931 void journal_remove_journal_head(struct buffer_head *bh); |
|
932 void journal_put_journal_head(struct journal_head *jh); |
|
933 |
|
934 /* |
|
935 * handle management |
|
936 */ |
|
937 extern struct kmem_cache *jbd_handle_cache; |
|
938 |
|
939 static inline handle_t *jbd_alloc_handle(gfp_t gfp_flags) |
|
940 { |
|
941 return kmem_cache_alloc(jbd_handle_cache, gfp_flags); |
|
942 } |
|
943 |
|
944 static inline void jbd_free_handle(handle_t *handle) |
|
945 { |
|
946 kmem_cache_free(jbd_handle_cache, handle); |
|
947 } |
|
948 |
|
949 /* Primary revoke support */ |
|
950 #define JOURNAL_REVOKE_DEFAULT_HASH 256 |
|
951 extern int journal_init_revoke(journal_t *, int); |
|
952 extern void journal_destroy_revoke_caches(void); |
|
953 extern int journal_init_revoke_caches(void); |
|
954 |
|
955 extern void journal_destroy_revoke(journal_t *); |
|
956 extern int journal_revoke (handle_t *, |
|
957 unsigned long, struct buffer_head *); |
|
958 extern int journal_cancel_revoke(handle_t *, struct journal_head *); |
|
959 extern void journal_write_revoke_records(journal_t *, transaction_t *); |
|
960 |
|
961 /* Recovery revoke support */ |
|
962 extern int journal_set_revoke(journal_t *, unsigned long, tid_t); |
|
963 extern int journal_test_revoke(journal_t *, unsigned long, tid_t); |
|
964 extern void journal_clear_revoke(journal_t *); |
|
965 extern void journal_switch_revoke_table(journal_t *journal); |
|
966 |
|
967 /* |
|
968 * The log thread user interface: |
|
969 * |
|
970 * Request space in the current transaction, and force transaction commit |
|
971 * transitions on demand. |
|
972 */ |
|
973 |
|
974 int __log_space_left(journal_t *); /* Called with journal locked */ |
|
975 int log_start_commit(journal_t *journal, tid_t tid); |
|
976 int __log_start_commit(journal_t *journal, tid_t tid); |
|
977 int journal_start_commit(journal_t *journal, tid_t *tid); |
|
978 int journal_force_commit_nested(journal_t *journal); |
|
979 int log_wait_commit(journal_t *journal, tid_t tid); |
|
980 int log_do_checkpoint(journal_t *journal); |
|
981 |
|
982 void __log_wait_for_space(journal_t *journal); |
|
983 extern void __journal_drop_transaction(journal_t *, transaction_t *); |
|
984 extern int cleanup_journal_tail(journal_t *); |
|
985 |
|
986 /* Debugging code only: */ |
|
987 |
|
988 #define jbd_ENOSYS() \ |
|
989 do { \ |
|
990 printk (KERN_ERR "JBD unimplemented function %s\n", __func__); \ |
|
991 current->state = TASK_UNINTERRUPTIBLE; \ |
|
992 schedule(); \ |
|
993 } while (1) |
|
994 |
|
995 /* |
|
996 * is_journal_abort |
|
997 * |
|
998 * Simple test wrapper function to test the JFS_ABORT state flag. This |
|
999 * bit, when set, indicates that we have had a fatal error somewhere, |
|
1000 * either inside the journaling layer or indicated to us by the client |
|
1001 * (eg. ext3), and that we and should not commit any further |
|
1002 * transactions. |
|
1003 */ |
|
1004 |
|
1005 static inline int is_journal_aborted(journal_t *journal) |
|
1006 { |
|
1007 return journal->j_flags & JFS_ABORT; |
|
1008 } |
|
1009 |
|
1010 static inline int is_handle_aborted(handle_t *handle) |
|
1011 { |
|
1012 if (handle->h_aborted) |
|
1013 return 1; |
|
1014 return is_journal_aborted(handle->h_transaction->t_journal); |
|
1015 } |
|
1016 |
|
1017 static inline void journal_abort_handle(handle_t *handle) |
|
1018 { |
|
1019 handle->h_aborted = 1; |
|
1020 } |
|
1021 |
|
1022 #endif /* __KERNEL__ */ |
|
1023 |
|
1024 /* Comparison functions for transaction IDs: perform comparisons using |
|
1025 * modulo arithmetic so that they work over sequence number wraps. */ |
|
1026 |
|
1027 static inline int tid_gt(tid_t x, tid_t y) |
|
1028 { |
|
1029 int difference = (x - y); |
|
1030 return (difference > 0); |
|
1031 } |
|
1032 |
|
1033 static inline int tid_geq(tid_t x, tid_t y) |
|
1034 { |
|
1035 int difference = (x - y); |
|
1036 return (difference >= 0); |
|
1037 } |
|
1038 |
|
1039 extern int journal_blocks_per_page(struct inode *inode); |
|
1040 |
|
1041 /* |
|
1042 * Return the minimum number of blocks which must be free in the journal |
|
1043 * before a new transaction may be started. Must be called under j_state_lock. |
|
1044 */ |
|
1045 static inline int jbd_space_needed(journal_t *journal) |
|
1046 { |
|
1047 int nblocks = journal->j_max_transaction_buffers; |
|
1048 if (journal->j_committing_transaction) |
|
1049 nblocks += journal->j_committing_transaction-> |
|
1050 t_outstanding_credits; |
|
1051 return nblocks; |
|
1052 } |
|
1053 |
|
1054 /* |
|
1055 * Definitions which augment the buffer_head layer |
|
1056 */ |
|
1057 |
|
1058 /* journaling buffer types */ |
|
1059 #define BJ_None 0 /* Not journaled */ |
|
1060 #define BJ_SyncData 1 /* Normal data: flush before commit */ |
|
1061 #define BJ_Metadata 2 /* Normal journaled metadata */ |
|
1062 #define BJ_Forget 3 /* Buffer superseded by this transaction */ |
|
1063 #define BJ_IO 4 /* Buffer is for temporary IO use */ |
|
1064 #define BJ_Shadow 5 /* Buffer contents being shadowed to the log */ |
|
1065 #define BJ_LogCtl 6 /* Buffer contains log descriptors */ |
|
1066 #define BJ_Reserved 7 /* Buffer is reserved for access by journal */ |
|
1067 #define BJ_Locked 8 /* Locked for I/O during commit */ |
|
1068 #define BJ_Types 9 |
|
1069 |
|
1070 extern int jbd_blocks_per_page(struct inode *inode); |
|
1071 |
|
1072 #ifdef __KERNEL__ |
|
1073 |
|
1074 #define buffer_trace_init(bh) do {} while (0) |
|
1075 #define print_buffer_fields(bh) do {} while (0) |
|
1076 #define print_buffer_trace(bh) do {} while (0) |
|
1077 #define BUFFER_TRACE(bh, info) do {} while (0) |
|
1078 #define BUFFER_TRACE2(bh, bh2, info) do {} while (0) |
|
1079 #define JBUFFER_TRACE(jh, info) do {} while (0) |
|
1080 |
|
1081 #endif /* __KERNEL__ */ |
|
1082 |
|
1083 #endif /* _LINUX_JBD_H */ |