LCOV - code coverage report
Current view: top level - lkbce/fs/jbd - journal.c (source / functions) Hit Total Coverage
Test: coverage.info Lines: 43 840 5.1 %
Date: 2017-01-25 Functions: 8 55 14.5 %

          Line data    Source code
       1             : /*
       2             :  * linux/fs/jbd/journal.c
       3             :  *
       4             :  * Written by Stephen C. Tweedie <sct@redhat.com>, 1998
       5             :  *
       6             :  * Copyright 1998 Red Hat corp --- All Rights Reserved
       7             :  *
       8             :  * This file is part of the Linux kernel and is made available under
       9             :  * the terms of the GNU General Public License, version 2, or at your
      10             :  * option, any later version, incorporated herein by reference.
      11             :  *
      12             :  * Generic filesystem journal-writing code; part of the ext2fs
      13             :  * journaling system.
      14             :  *
      15             :  * This file manages journals: areas of disk reserved for logging
      16             :  * transactional updates.  This includes the kernel journaling thread
      17             :  * which is responsible for scheduling updates to the log.
      18             :  *
      19             :  * We do not actually manage the physical storage of the journal in this
      20             :  * file: that is left to a per-journal policy function, which allows us
      21             :  * to store the journal within a filesystem-specified area for ext2
      22             :  * journaling (ext2 can use a reserved inode for storing the log).
      23             :  */
      24             : 
      25             : #include <linux/module.h>
      26             : #include <linux/time.h>
      27             : #include <linux/fs.h>
      28             : #include <linux/jbd.h>
      29             : #include <linux/errno.h>
      30             : #include <linux/slab.h>
      31             : #include <linux/init.h>
      32             : #include <linux/mm.h>
      33             : #include <linux/freezer.h>
      34             : #include <linux/pagemap.h>
      35             : #include <linux/kthread.h>
      36             : #include <linux/poison.h>
      37             : #include <linux/proc_fs.h>
      38             : #include <linux/debugfs.h>
      39             : 
      40             : #include <asm/uaccess.h>
      41             : #include <asm/page.h>
      42             : 
      43             : EXPORT_SYMBOL(journal_start);
      44             : EXPORT_SYMBOL(journal_restart);
      45             : EXPORT_SYMBOL(journal_extend);
      46             : EXPORT_SYMBOL(journal_stop);
      47             : EXPORT_SYMBOL(journal_lock_updates);
      48             : EXPORT_SYMBOL(journal_unlock_updates);
      49             : EXPORT_SYMBOL(journal_get_write_access);
      50             : EXPORT_SYMBOL(journal_get_create_access);
      51             : EXPORT_SYMBOL(journal_get_undo_access);
      52             : EXPORT_SYMBOL(journal_dirty_data);
      53             : EXPORT_SYMBOL(journal_dirty_metadata);
      54             : EXPORT_SYMBOL(journal_release_buffer);
      55             : EXPORT_SYMBOL(journal_forget);
      56             : #if 0
      57             : EXPORT_SYMBOL(journal_sync_buffer);
      58             : #endif
      59             : EXPORT_SYMBOL(journal_flush);
      60             : EXPORT_SYMBOL(journal_revoke);
      61             : 
      62             : EXPORT_SYMBOL(journal_init_dev);
      63             : EXPORT_SYMBOL(journal_init_inode);
      64             : EXPORT_SYMBOL(journal_update_format);
      65             : EXPORT_SYMBOL(journal_check_used_features);
      66             : EXPORT_SYMBOL(journal_check_available_features);
      67             : EXPORT_SYMBOL(journal_set_features);
      68             : EXPORT_SYMBOL(journal_create);
      69             : EXPORT_SYMBOL(journal_load);
      70             : EXPORT_SYMBOL(journal_destroy);
      71             : EXPORT_SYMBOL(journal_abort);
      72             : EXPORT_SYMBOL(journal_errno);
      73             : EXPORT_SYMBOL(journal_ack_err);
      74             : EXPORT_SYMBOL(journal_clear_err);
      75             : EXPORT_SYMBOL(log_wait_commit);
      76             : EXPORT_SYMBOL(log_start_commit);
      77             : EXPORT_SYMBOL(journal_start_commit);
      78             : EXPORT_SYMBOL(journal_force_commit_nested);
      79             : EXPORT_SYMBOL(journal_wipe);
      80             : EXPORT_SYMBOL(journal_blocks_per_page);
      81             : EXPORT_SYMBOL(journal_invalidatepage);
      82             : EXPORT_SYMBOL(journal_try_to_free_buffers);
      83             : EXPORT_SYMBOL(journal_force_commit);
      84             : 
      85             : static int journal_convert_superblock_v1(journal_t *, journal_superblock_t *);
      86             : static void __journal_abort_soft (journal_t *journal, int errno);
      87             : 
      88             : /*
      89             :  * Helper function used to manage commit timeouts
      90             :  */
      91             : 
      92             : static void commit_timeout(unsigned long __data)
      93             : {
      94           0 :         struct task_struct * p = (struct task_struct *) __data;
      95             : 
      96           0 :         wake_up_process(p);
      97           0 : }
      98             : 
      99             : /*
     100             :  * kjournald: The main thread function used to manage a logging device
     101             :  * journal.
     102             :  *
     103             :  * This kernel thread is responsible for two things:
     104             :  *
     105             :  * 1) COMMIT:  Every so often we need to commit the current state of the
     106             :  *    filesystem to disk.  The journal thread is responsible for writing
     107             :  *    all of the metadata buffers to disk.
     108             :  *
     109             :  * 2) CHECKPOINT: We cannot reuse a used section of the log file until all
     110             :  *    of the data in that part of the log has been rewritten elsewhere on
     111             :  *    the disk.  Flushing these old buffers to reclaim space in the log is
     112             :  *    known as checkpointing, and this thread is responsible for that job.
     113             :  */
     114             : 
     115             : static int kjournald(void *arg)
     116             : {
     117           0 :         journal_t *journal = arg;
     118           0 :         transaction_t *transaction;
     119           0 : 
     120           0 :         /*
     121           0 :          * Set up an interval timer which can be used to trigger a commit wakeup
     122           0 :          * after the commit interval expires
     123           0 :          */
     124           0 :         setup_timer(&journal->j_commit_timer, commit_timeout,
     125             :                         (unsigned long)current);
     126             : 
     127             :         /* Record that the journal thread is running */
     128           0 :         journal->j_task = current;
     129           0 :         wake_up(&journal->j_wait_done_commit);
     130             : 
     131           0 :         printk(KERN_INFO "kjournald starting.  Commit interval %ld seconds\n",
     132             :                         journal->j_commit_interval / HZ);
     133             : 
     134             :         /*
     135             :          * And now, wait forever for commit wakeup events.
     136             :          */
     137           0 :         spin_lock(&journal->j_state_lock);
     138             : 
     139           0 : loop:
     140           0 :         if (journal->j_flags & JFS_UNMOUNT)
     141           0 :                 goto end_loop;
     142             : 
     143             :         jbd_debug(1, "commit_sequence=%d, commit_request=%d\n",
     144             :                 journal->j_commit_sequence, journal->j_commit_request);
     145             : 
     146           0 :         if (journal->j_commit_sequence != journal->j_commit_request) {
     147             :                 jbd_debug(1, "OK, requests differ\n");
     148           0 :                 spin_unlock(&journal->j_state_lock);
     149           0 :                 del_timer_sync(&journal->j_commit_timer);
     150           0 :                 journal_commit_transaction(journal);
     151           0 :                 spin_lock(&journal->j_state_lock);
     152           0 :                 goto loop;
     153             :         }
     154             : 
     155           0 :         wake_up(&journal->j_wait_done_commit);
     156           0 :         if (freezing(current)) {
     157             :                 /*
     158             :                  * The simpler the better. Flushing journal isn't a
     159             :                  * good idea, because that depends on threads that may
     160             :                  * be already stopped.
     161             :                  */
     162             :                 jbd_debug(1, "Now suspending kjournald\n");
     163           0 :                 spin_unlock(&journal->j_state_lock);
     164           0 :                 refrigerator();
     165           0 :                 spin_lock(&journal->j_state_lock);
     166             :         } else {
     167             :                 /*
     168             :                  * We assume on resume that commits are already there,
     169             :                  * so we don't sleep
     170             :                  */
     171           0 :                 DEFINE_WAIT(wait);
     172           0 :                 int should_sleep = 1;
     173             : 
     174           0 :                 prepare_to_wait(&journal->j_wait_commit, &wait,
     175             :                                 TASK_INTERRUPTIBLE);
     176           0 :                 if (journal->j_commit_sequence != journal->j_commit_request)
     177           0 :                         should_sleep = 0;
     178           0 :                 transaction = journal->j_running_transaction;
     179           0 :                 if (transaction && time_after_eq(jiffies,
     180             :                                                 transaction->t_expires))
     181           0 :                         should_sleep = 0;
     182           0 :                 if (journal->j_flags & JFS_UNMOUNT)
     183           0 :                         should_sleep = 0;
     184           0 :                 if (should_sleep) {
     185           0 :                         spin_unlock(&journal->j_state_lock);
     186           0 :                         schedule();
     187           0 :                         spin_lock(&journal->j_state_lock);
     188             :                 }
     189           0 :                 finish_wait(&journal->j_wait_commit, &wait);
     190             :         }
     191             : 
     192             :         jbd_debug(1, "kjournald wakes\n");
     193             : 
     194             :         /*
     195             :          * Were we woken up by a commit wakeup event?
     196             :          */
     197           0 :         transaction = journal->j_running_transaction;
     198           0 :         if (transaction && time_after_eq(jiffies, transaction->t_expires)) {
     199           0 :                 journal->j_commit_request = transaction->t_tid;
     200             :                 jbd_debug(1, "woke because of timeout\n");
     201             :         }
     202           0 :         goto loop;
     203           0 : 
     204             : end_loop:
     205           0 :         spin_unlock(&journal->j_state_lock);
     206           0 :         del_timer_sync(&journal->j_commit_timer);
     207           0 :         journal->j_task = NULL;
     208           0 :         wake_up(&journal->j_wait_done_commit);
     209             :         jbd_debug(1, "Journal thread exiting.\n");
     210           0 :         return 0;
     211             : }
     212             : 
     213             : static int journal_start_thread(journal_t *journal)
     214             : {
     215           0 :         struct task_struct *t;
     216           0 : 
     217           0 :         t = kthread_run(kjournald, journal, "kjournald");
     218           0 :         if (IS_ERR(t))
     219           0 :                 return PTR_ERR(t);
     220           0 : 
     221           0 :         wait_event(journal->j_wait_done_commit, journal->j_task != NULL);
     222           0 :         return 0;
     223           0 : }
     224             : 
     225             : static void journal_kill_thread(journal_t *journal)
     226             : {
     227           0 :         spin_lock(&journal->j_state_lock);
     228           0 :         journal->j_flags |= JFS_UNMOUNT;
     229             : 
     230           0 :         while (journal->j_task) {
     231           0 :                 wake_up(&journal->j_wait_commit);
     232           0 :                 spin_unlock(&journal->j_state_lock);
     233           0 :                 wait_event(journal->j_wait_done_commit,
     234           0 :                                 journal->j_task == NULL);
     235           0 :                 spin_lock(&journal->j_state_lock);
     236             :         }
     237           0 :         spin_unlock(&journal->j_state_lock);
     238           0 : }
     239             : 
     240             : /*
     241             :  * journal_write_metadata_buffer: write a metadata buffer to the journal.
     242             :  *
     243             :  * Writes a metadata buffer to a given disk block.  The actual IO is not
     244             :  * performed but a new buffer_head is constructed which labels the data
     245             :  * to be written with the correct destination disk block.
     246             :  *
     247             :  * Any magic-number escaping which needs to be done will cause a
     248             :  * copy-out here.  If the buffer happens to start with the
     249             :  * JFS_MAGIC_NUMBER, then we can't write it to the log directly: the
     250             :  * magic number is only written to the log for descripter blocks.  In
     251             :  * this case, we copy the data and replace the first word with 0, and we
     252             :  * return a result code which indicates that this buffer needs to be
     253             :  * marked as an escaped buffer in the corresponding log descriptor
     254             :  * block.  The missing word can then be restored when the block is read
     255             :  * during recovery.
     256             :  *
     257             :  * If the source buffer has already been modified by a new transaction
     258             :  * since we took the last commit snapshot, we use the frozen copy of
     259             :  * that data for IO.  If we end up using the existing buffer_head's data
     260             :  * for the write, then we *have* to lock the buffer to prevent anyone
     261             :  * else from using and possibly modifying it while the IO is in
     262             :  * progress.
     263             :  *
     264             :  * The function returns a pointer to the buffer_heads to be used for IO.
     265             :  *
     266             :  * We assume that the journal has already been locked in this function.
     267             :  *
     268             :  * Return value:
     269             :  *  <0: Error
     270             :  * >=0: Finished OK
     271             :  *
     272             :  * On success:
     273             :  * Bit 0 set == escape performed on the data
     274             :  * Bit 1 set == buffer copy-out performed (kfree the data after IO)
     275             :  */
     276             : 
     277             : int journal_write_metadata_buffer(transaction_t *transaction,
     278             :                                   struct journal_head  *jh_in,
     279             :                                   struct journal_head **jh_out,
     280           0 :                                   unsigned int blocknr)
     281           0 : {
     282           0 :         int need_copy_out = 0;
     283           0 :         int done_copy_out = 0;
     284           0 :         int do_escape = 0;
     285           0 :         char *mapped_data;
     286           0 :         struct buffer_head *new_bh;
     287           0 :         struct journal_head *new_jh;
     288           0 :         struct page *new_page;
     289           0 :         unsigned int new_offset;
     290           0 :         struct buffer_head *bh_in = jh2bh(jh_in);
     291           0 :         journal_t *journal = transaction->t_journal;
     292           0 : 
     293           0 :         /*
     294           0 :          * The buffer really shouldn't be locked: only the current committing
     295           0 :          * transaction is allowed to write it, so nobody else is allowed
     296           0 :          * to do any IO.
     297           0 :          *
     298           0 :          * akpm: except if we're journalling data, and write() output is
     299           0 :          * also part of a shared mapping, and another thread has
     300           0 :          * decided to launch a writepage() against this buffer.
     301           0 :          */
     302           0 :         J_ASSERT_BH(bh_in, buffer_jbddirty(bh_in));
     303           0 : 
     304           0 :         new_bh = alloc_buffer_head(GFP_NOFS|__GFP_NOFAIL);
     305           0 :         /* keep subsequent assertions sane */
     306           0 :         new_bh->b_state = 0;
     307           0 :         init_buffer(new_bh, NULL, NULL);
     308           0 :         atomic_set(&new_bh->b_count, 1);
     309           0 :         new_jh = journal_add_journal_head(new_bh);      /* This sleeps */
     310           0 : 
     311           0 :         /*
     312             :          * If a new transaction has already done a buffer copy-out, then
     313             :          * we use that version of the data for the commit.
     314             :          */
     315           0 :         jbd_lock_bh_state(bh_in);
     316             : repeat:
     317           0 :         if (jh_in->b_frozen_data) {
     318           0 :                 done_copy_out = 1;
     319           0 :                 new_page = virt_to_page(jh_in->b_frozen_data);
     320           0 :                 new_offset = offset_in_page(jh_in->b_frozen_data);
     321             :         } else {
     322           0 :                 new_page = jh2bh(jh_in)->b_page;
     323           0 :                 new_offset = offset_in_page(jh2bh(jh_in)->b_data);
     324             :         }
     325             : 
     326           0 :         mapped_data = kmap_atomic(new_page, KM_USER0);
     327             :         /*
     328             :          * Check for escaping
     329             :          */
     330           0 :         if (*((__be32 *)(mapped_data + new_offset)) ==
     331             :                                 cpu_to_be32(JFS_MAGIC_NUMBER)) {
     332           0 :                 need_copy_out = 1;
     333           0 :                 do_escape = 1;
     334             :         }
     335           0 :         kunmap_atomic(mapped_data, KM_USER0);
     336             : 
     337             :         /*
     338             :          * Do we need to do a data copy?
     339             :          */
     340           0 :         if (need_copy_out && !done_copy_out) {
     341             :                 char *tmp;
     342             : 
     343           0 :                 jbd_unlock_bh_state(bh_in);
     344           0 :                 tmp = jbd_alloc(bh_in->b_size, GFP_NOFS);
     345           0 :                 jbd_lock_bh_state(bh_in);
     346           0 :                 if (jh_in->b_frozen_data) {
     347           0 :                         jbd_free(tmp, bh_in->b_size);
     348           0 :                         goto repeat;
     349             :                 }
     350             : 
     351           0 :                 jh_in->b_frozen_data = tmp;
     352           0 :                 mapped_data = kmap_atomic(new_page, KM_USER0);
     353           0 :                 memcpy(tmp, mapped_data + new_offset, jh2bh(jh_in)->b_size);
     354           0 :                 kunmap_atomic(mapped_data, KM_USER0);
     355             : 
     356           0 :                 new_page = virt_to_page(tmp);
     357           0 :                 new_offset = offset_in_page(tmp);
     358           0 :                 done_copy_out = 1;
     359             :         }
     360             : 
     361             :         /*
     362             :          * Did we need to do an escaping?  Now we've done all the
     363             :          * copying, we can finally do so.
     364             :          */
     365           0 :         if (do_escape) {
     366           0 :                 mapped_data = kmap_atomic(new_page, KM_USER0);
     367           0 :                 *((unsigned int *)(mapped_data + new_offset)) = 0;
     368           0 :                 kunmap_atomic(mapped_data, KM_USER0);
     369             :         }
     370             : 
     371           0 :         set_bh_page(new_bh, new_page, new_offset);
     372           0 :         new_jh->b_transaction = NULL;
     373           0 :         new_bh->b_size = jh2bh(jh_in)->b_size;
     374           0 :         new_bh->b_bdev = transaction->t_journal->j_dev;
     375           0 :         new_bh->b_blocknr = blocknr;
     376           0 :         set_buffer_mapped(new_bh);
     377           0 :         set_buffer_dirty(new_bh);
     378             : 
     379           0 :         *jh_out = new_jh;
     380             : 
     381             :         /*
     382             :          * The to-be-written buffer needs to get moved to the io queue,
     383             :          * and the original buffer whose contents we are shadowing or
     384             :          * copying is moved to the transaction's shadow queue.
     385             :          */
     386             :         JBUFFER_TRACE(jh_in, "file as BJ_Shadow");
     387           0 :         spin_lock(&journal->j_list_lock);
     388           0 :         __journal_file_buffer(jh_in, transaction, BJ_Shadow);
     389           0 :         spin_unlock(&journal->j_list_lock);
     390           0 :         jbd_unlock_bh_state(bh_in);
     391             : 
     392             :         JBUFFER_TRACE(new_jh, "file as BJ_IO");
     393           0 :         journal_file_buffer(new_jh, transaction, BJ_IO);
     394             : 
     395           0 :         return do_escape | (done_copy_out << 1);
     396             : }
     397             : 
     398             : /*
     399             :  * Allocation code for the journal file.  Manage the space left in the
     400             :  * journal, so that we can begin checkpointing when appropriate.
     401             :  */
     402             : 
     403             : /*
     404             :  * __log_space_left: Return the number of free blocks left in the journal.
     405             :  *
     406             :  * Called with the journal already locked.
     407             :  *
     408             :  * Called under j_state_lock
     409             :  */
     410             : 
     411             : int __log_space_left(journal_t *journal)
     412             : {
     413           0 :         int left = journal->j_free;
     414             : 
     415           0 :         assert_spin_locked(&journal->j_state_lock);
     416             : 
     417             :         /*
     418             :          * Be pessimistic here about the number of those free blocks which
     419             :          * might be required for log descriptor control blocks.
     420             :          */
     421             : 
     422             : #define MIN_LOG_RESERVED_BLOCKS 32 /* Allow for rounding errors */
     423             : 
     424           0 :         left -= MIN_LOG_RESERVED_BLOCKS;
     425             : 
     426           0 :         if (left <= 0)
     427           0 :                 return 0;
     428           0 :         left -= (left >> 3);
     429           0 :         return left;
     430             : }
     431             : 
     432             : /*
     433             :  * Called under j_state_lock.  Returns true if a transaction commit was started.
     434             :  */
     435             : int __log_start_commit(journal_t *journal, tid_t target)
     436             : {
     437           0 :         /*
     438           0 :          * The only transaction we can possibly wait upon is the
     439           0 :          * currently running transaction (if it exists).  Otherwise,
     440           0 :          * the target tid must be an old one.
     441           0 :          */
     442           0 :         if (journal->j_running_transaction &&
     443           0 :             journal->j_running_transaction->t_tid == target) {
     444             :                 /*
     445             :                  * We want a new commit: OK, mark the request and wakup the
     446             :                  * commit thread.  We do _not_ do the commit ourselves.
     447             :                  */
     448             : 
     449           0 :                 journal->j_commit_request = target;
     450             :                 jbd_debug(1, "JBD: requesting commit %d/%d\n",
     451             :                           journal->j_commit_request,
     452             :                           journal->j_commit_sequence);
     453           0 :                 wake_up(&journal->j_wait_commit);
     454           0 :                 return 1;
     455           0 :         } else if (!tid_geq(journal->j_commit_request, target))
     456             :                 /* This should never happen, but if it does, preserve
     457             :                    the evidence before kjournald goes into a loop and
     458             :                    increments j_commit_sequence beyond all recognition. */
     459           0 :                 WARN_ONCE(1, "jbd: bad log_start_commit: %u %u %u %u\n",
     460             :                     journal->j_commit_request, journal->j_commit_sequence,
     461           0 :                     target, journal->j_running_transaction ?
     462             :                     journal->j_running_transaction->t_tid : 0);
     463           0 :         return 0;
     464             : }
     465             : 
     466             : int log_start_commit(journal_t *journal, tid_t tid)
     467             : {
     468           0 :         int ret;
     469             : 
     470           0 :         spin_lock(&journal->j_state_lock);
     471           0 :         ret = __log_start_commit(journal, tid);
     472           0 :         spin_unlock(&journal->j_state_lock);
     473           0 :         return ret;
     474             : }
     475             : 
     476             : /*
     477             :  * Force and wait upon a commit if the calling process is not within
     478             :  * transaction.  This is used for forcing out undo-protected data which contains
     479             :  * bitmaps, when the fs is running out of space.
     480             :  *
     481             :  * We can only force the running transaction if we don't have an active handle;
     482             :  * otherwise, we will deadlock.
     483             :  *
     484             :  * Returns true if a transaction was started.
     485             :  */
     486             : int journal_force_commit_nested(journal_t *journal)
     487             : {
     488           0 :         transaction_t *transaction = NULL;
     489           0 :         tid_t tid;
     490           0 : 
     491           0 :         spin_lock(&journal->j_state_lock);
     492           0 :         if (journal->j_running_transaction && !current->journal_info) {
     493           0 :                 transaction = journal->j_running_transaction;
     494           0 :                 __log_start_commit(journal, transaction->t_tid);
     495           0 :         } else if (journal->j_committing_transaction)
     496           0 :                 transaction = journal->j_committing_transaction;
     497             : 
     498           0 :         if (!transaction) {
     499           0 :                 spin_unlock(&journal->j_state_lock);
     500           0 :                 return 0;       /* Nothing to retry */
     501             :         }
     502             : 
     503           0 :         tid = transaction->t_tid;
     504           0 :         spin_unlock(&journal->j_state_lock);
     505           0 :         log_wait_commit(journal, tid);
     506           0 :         return 1;
     507             : }
     508             : 
     509             : /*
     510             :  * Start a commit of the current running transaction (if any).  Returns true
     511             :  * if a transaction is going to be committed (or is currently already
     512             :  * committing), and fills its tid in at *ptid
     513             :  */
     514             : int journal_start_commit(journal_t *journal, tid_t *ptid)
     515             : {
     516           0 :         int ret = 0;
     517           0 : 
     518           0 :         spin_lock(&journal->j_state_lock);
     519           0 :         if (journal->j_running_transaction) {
     520           0 :                 tid_t tid = journal->j_running_transaction->t_tid;
     521             : 
     522           0 :                 __log_start_commit(journal, tid);
     523             :                 /* There's a running transaction and we've just made sure
     524             :                  * it's commit has been scheduled. */
     525           0 :                 if (ptid)
     526           0 :                         *ptid = tid;
     527           0 :                 ret = 1;
     528           0 :         } else if (journal->j_committing_transaction) {
     529             :                 /*
     530             :                  * If ext3_write_super() recently started a commit, then we
     531             :                  * have to wait for completion of that transaction
     532             :                  */
     533           0 :                 if (ptid)
     534           0 :                         *ptid = journal->j_committing_transaction->t_tid;
     535           0 :                 ret = 1;
     536             :         }
     537           0 :         spin_unlock(&journal->j_state_lock);
     538           0 :         return ret;
     539             : }
     540             : 
     541             : /*
     542             :  * Wait for a specified commit to complete.
     543             :  * The caller may not hold the journal lock.
     544             :  */
     545             : int log_wait_commit(journal_t *journal, tid_t tid)
     546             : {
     547           0 :         int err = 0;
     548           0 : 
     549           0 : #ifdef CONFIG_JBD_DEBUG
     550           0 :         spin_lock(&journal->j_state_lock);
     551           0 :         if (!tid_geq(journal->j_commit_request, tid)) {
     552           0 :                 printk(KERN_EMERG
     553           0 :                        "%s: error: j_commit_request=%d, tid=%d\n",
     554           0 :                        __func__, journal->j_commit_request, tid);
     555             :         }
     556             :         spin_unlock(&journal->j_state_lock);
     557             : #endif
     558           0 :         spin_lock(&journal->j_state_lock);
     559           0 :         while (tid_gt(tid, journal->j_commit_sequence)) {
     560           0 :                 jbd_debug(1, "JBD: want %d, j_commit_sequence=%d\n",
     561           0 :                                   tid, journal->j_commit_sequence);
     562           0 :                 wake_up(&journal->j_wait_commit);
     563           0 :                 spin_unlock(&journal->j_state_lock);
     564           0 :                 wait_event(journal->j_wait_done_commit,
     565           0 :                                 !tid_gt(tid, journal->j_commit_sequence));
     566           0 :                 spin_lock(&journal->j_state_lock);
     567             :         }
     568           0 :         spin_unlock(&journal->j_state_lock);
     569             : 
     570           0 :         if (unlikely(is_journal_aborted(journal))) {
     571           0 :                 printk(KERN_EMERG "journal commit I/O error\n");
     572           0 :                 err = -EIO;
     573             :         }
     574           0 :         return err;
     575             : }
     576             : 
     577             : /*
     578             :  * Log buffer allocation routines:
     579             :  */
     580             : 
     581             : int journal_next_log_block(journal_t *journal, unsigned int *retp)
     582             : {
     583           0 :         unsigned int blocknr;
     584           0 : 
     585           0 :         spin_lock(&journal->j_state_lock);
     586           0 :         J_ASSERT(journal->j_free > 1);
     587             : 
     588           0 :         blocknr = journal->j_head;
     589           0 :         journal->j_head++;
     590           0 :         journal->j_free--;
     591           0 :         if (journal->j_head == journal->j_last)
     592           0 :                 journal->j_head = journal->j_first;
     593           0 :         spin_unlock(&journal->j_state_lock);
     594           0 :         return journal_bmap(journal, blocknr, retp);
     595             : }
     596             : 
     597             : /*
     598             :  * Conversion of logical to physical block numbers for the journal
     599             :  *
     600             :  * On external journals the journal blocks are identity-mapped, so
     601             :  * this is a no-op.  If needed, we can use j_blk_offset - everything is
     602             :  * ready.
     603             :  */
     604             : int journal_bmap(journal_t *journal, unsigned int blocknr,
     605             :                  unsigned int *retp)
     606           0 : {
     607           0 :         int err = 0;
     608           0 :         unsigned int ret;
     609           0 : 
     610           0 :         if (journal->j_inode) {
     611           0 :                 ret = bmap(journal->j_inode, blocknr);
     612           0 :                 if (ret)
     613           0 :                         *retp = ret;
     614             :                 else {
     615             :                         char b[BDEVNAME_SIZE];
     616             : 
     617           0 :                         printk(KERN_ALERT "%s: journal block not found "
     618             :                                         "at offset %u on %s\n",
     619             :                                 __func__,
     620             :                                 blocknr,
     621             :                                 bdevname(journal->j_dev, b));
     622           0 :                         err = -EIO;
     623           0 :                         __journal_abort_soft(journal, err);
     624             :                 }
     625             :         } else {
     626           0 :                 *retp = blocknr; /* +journal->j_blk_offset */
     627             :         }
     628           0 :         return err;
     629             : }
     630             : 
     631             : /*
     632             :  * We play buffer_head aliasing tricks to write data/metadata blocks to
     633             :  * the journal without copying their contents, but for journal
     634             :  * descriptor blocks we do need to generate bona fide buffers.
     635             :  *
     636             :  * After the caller of journal_get_descriptor_buffer() has finished modifying
     637             :  * the buffer's contents they really should run flush_dcache_page(bh->b_page).
     638             :  * But we don't bother doing that, so there will be coherency problems with
     639             :  * mmaps of blockdevs which hold live JBD-controlled filesystems.
     640             :  */
     641             : struct journal_head *journal_get_descriptor_buffer(journal_t *journal)
     642             : {
     643           0 :         struct buffer_head *bh;
     644           0 :         unsigned int blocknr;
     645           0 :         int err;
     646           0 : 
     647           0 :         err = journal_next_log_block(journal, &blocknr);
     648             : 
     649           0 :         if (err)
     650           0 :                 return NULL;
     651             : 
     652           0 :         bh = __getblk(journal->j_dev, blocknr, journal->j_blocksize);
     653           0 :         if (!bh)
     654           0 :                 return NULL;
     655           0 :         lock_buffer(bh);
     656           0 :         memset(bh->b_data, 0, journal->j_blocksize);
     657           0 :         set_buffer_uptodate(bh);
     658           0 :         unlock_buffer(bh);
     659             :         BUFFER_TRACE(bh, "return this buffer");
     660           0 :         return journal_add_journal_head(bh);
     661             : }
     662             : 
     663             : /*
     664             :  * Management for journal control blocks: functions to create and
     665             :  * destroy journal_t structures, and to initialise and read existing
     666             :  * journal blocks from disk.  */
     667             : 
     668             : /* First: create and setup a journal_t object in memory.  We initialise
     669             :  * very few fields yet: that has to wait until we have created the
     670             :  * journal structures from from scratch, or loaded them from disk. */
     671             : 
     672             : static journal_t * journal_init_common (void)
     673             : {
     674           0 :         journal_t *journal;
     675           0 :         int err;
     676           0 : 
     677           0 :         journal = kzalloc(sizeof(*journal), GFP_KERNEL);
     678           0 :         if (!journal)
     679           0 :                 goto fail;
     680           0 : 
     681           0 :         init_waitqueue_head(&journal->j_wait_transaction_locked);
     682           0 :         init_waitqueue_head(&journal->j_wait_logspace);
     683           0 :         init_waitqueue_head(&journal->j_wait_done_commit);
     684           0 :         init_waitqueue_head(&journal->j_wait_checkpoint);
     685           0 :         init_waitqueue_head(&journal->j_wait_commit);
     686           0 :         init_waitqueue_head(&journal->j_wait_updates);
     687           0 :         mutex_init(&journal->j_barrier);
     688           0 :         mutex_init(&journal->j_checkpoint_mutex);
     689           0 :         spin_lock_init(&journal->j_revoke_lock);
     690           0 :         spin_lock_init(&journal->j_list_lock);
     691           0 :         spin_lock_init(&journal->j_state_lock);
     692             : 
     693           0 :         journal->j_commit_interval = (HZ * JBD_DEFAULT_MAX_COMMIT_AGE);
     694             : 
     695             :         /* The journal is marked for error until we succeed with recovery! */
     696           0 :         journal->j_flags = JFS_ABORT;
     697             : 
     698             :         /* Set up a default-sized revoke table for the new mount. */
     699           0 :         err = journal_init_revoke(journal, JOURNAL_REVOKE_DEFAULT_HASH);
     700           0 :         if (err) {
     701           0 :                 kfree(journal);
     702           0 :                 goto fail;
     703             :         }
     704           0 :         return journal;
     705           0 : fail:
     706           0 :         return NULL;
     707             : }
     708             : 
     709             : /* journal_init_dev and journal_init_inode:
     710             :  *
     711             :  * Create a journal structure assigned some fixed set of disk blocks to
     712             :  * the journal.  We don't actually touch those disk blocks yet, but we
     713             :  * need to set up all of the mapping information to tell the journaling
     714             :  * system where the journal blocks are.
     715             :  *
     716             :  */
     717             : 
     718             : /**
     719             :  *  journal_t * journal_init_dev() - creates and initialises a journal structure
     720             :  *  @bdev: Block device on which to create the journal
     721             :  *  @fs_dev: Device which hold journalled filesystem for this journal.
     722             :  *  @start: Block nr Start of journal.
     723             :  *  @len:  Length of the journal in blocks.
     724             :  *  @blocksize: blocksize of journalling device
     725             :  *
     726             :  *  Returns: a newly created journal_t *
     727             :  *
     728             :  *  journal_init_dev creates a journal which maps a fixed contiguous
     729             :  *  range of blocks on an arbitrary block device.
     730             :  *
     731             :  */
     732             : journal_t * journal_init_dev(struct block_device *bdev,
     733             :                         struct block_device *fs_dev,
     734             :                         int start, int len, int blocksize)
     735           0 : {
     736           0 :         journal_t *journal = journal_init_common();
     737           0 :         struct buffer_head *bh;
     738           0 :         int n;
     739           0 : 
     740           0 :         if (!journal)
     741           0 :                 return NULL;
     742             : 
     743             :         /* journal descriptor can store up to n blocks -bzzz */
     744           0 :         journal->j_blocksize = blocksize;
     745           0 :         n = journal->j_blocksize / sizeof(journal_block_tag_t);
     746           0 :         journal->j_wbufsize = n;
     747           0 :         journal->j_wbuf = kmalloc(n * sizeof(struct buffer_head*), GFP_KERNEL);
     748           0 :         if (!journal->j_wbuf) {
     749           0 :                 printk(KERN_ERR "%s: Cant allocate bhs for commit thread\n",
     750             :                         __func__);
     751           0 :                 goto out_err;
     752             :         }
     753           0 :         journal->j_dev = bdev;
     754           0 :         journal->j_fs_dev = fs_dev;
     755           0 :         journal->j_blk_offset = start;
     756           0 :         journal->j_maxlen = len;
     757             : 
     758           0 :         bh = __getblk(journal->j_dev, start, journal->j_blocksize);
     759           0 :         if (!bh) {
     760           0 :                 printk(KERN_ERR
     761             :                        "%s: Cannot get buffer for journal superblock\n",
     762             :                        __func__);
     763           0 :                 goto out_err;
     764             :         }
     765           0 :         journal->j_sb_buffer = bh;
     766           0 :         journal->j_superblock = (journal_superblock_t *)bh->b_data;
     767             : 
     768           0 :         return journal;
     769           0 : out_err:
     770           0 :         kfree(journal->j_wbuf);
     771           0 :         kfree(journal);
     772           0 :         return NULL;
     773             : }
     774             : 
     775             : /**
     776             :  *  journal_t * journal_init_inode () - creates a journal which maps to a inode.
     777             :  *  @inode: An inode to create the journal in
     778             :  *
     779             :  * journal_init_inode creates a journal which maps an on-disk inode as
     780             :  * the journal.  The inode must exist already, must support bmap() and
     781             :  * must have all data blocks preallocated.
     782             :  */
     783             : journal_t * journal_init_inode (struct inode *inode)
     784             : {
     785           0 :         struct buffer_head *bh;
     786           0 :         journal_t *journal = journal_init_common();
     787           0 :         int err;
     788           0 :         int n;
     789           0 :         unsigned int blocknr;
     790           0 : 
     791           0 :         if (!journal)
     792           0 :                 return NULL;
     793             : 
     794           0 :         journal->j_dev = journal->j_fs_dev = inode->i_sb->s_bdev;
     795           0 :         journal->j_inode = inode;
     796             :         jbd_debug(1,
     797             :                   "journal %p: inode %s/%ld, size %Ld, bits %d, blksize %ld\n",
     798             :                   journal, inode->i_sb->s_id, inode->i_ino,
     799             :                   (long long) inode->i_size,
     800             :                   inode->i_sb->s_blocksize_bits, inode->i_sb->s_blocksize);
     801             : 
     802           0 :         journal->j_maxlen = inode->i_size >> inode->i_sb->s_blocksize_bits;
     803           0 :         journal->j_blocksize = inode->i_sb->s_blocksize;
     804             : 
     805             :         /* journal descriptor can store up to n blocks -bzzz */
     806           0 :         n = journal->j_blocksize / sizeof(journal_block_tag_t);
     807           0 :         journal->j_wbufsize = n;
     808           0 :         journal->j_wbuf = kmalloc(n * sizeof(struct buffer_head*), GFP_KERNEL);
     809           0 :         if (!journal->j_wbuf) {
     810           0 :                 printk(KERN_ERR "%s: Cant allocate bhs for commit thread\n",
     811             :                         __func__);
     812           0 :                 goto out_err;
     813             :         }
     814             : 
     815           0 :         err = journal_bmap(journal, 0, &blocknr);
     816             :         /* If that failed, give up */
     817           0 :         if (err) {
     818           0 :                 printk(KERN_ERR "%s: Cannnot locate journal superblock\n",
     819             :                        __func__);
     820           0 :                 goto out_err;
     821             :         }
     822             : 
     823           0 :         bh = __getblk(journal->j_dev, blocknr, journal->j_blocksize);
     824           0 :         if (!bh) {
     825           0 :                 printk(KERN_ERR
     826             :                        "%s: Cannot get buffer for journal superblock\n",
     827             :                        __func__);
     828           0 :                 goto out_err;
     829             :         }
     830           0 :         journal->j_sb_buffer = bh;
     831           0 :         journal->j_superblock = (journal_superblock_t *)bh->b_data;
     832             : 
     833           0 :         return journal;
     834           0 : out_err:
     835           0 :         kfree(journal->j_wbuf);
     836           0 :         kfree(journal);
     837           0 :         return NULL;
     838             : }
     839             : 
     840             : /*
     841             :  * If the journal init or create aborts, we need to mark the journal
     842             :  * superblock as being NULL to prevent the journal destroy from writing
     843             :  * back a bogus superblock.
     844             :  */
     845             : static void journal_fail_superblock (journal_t *journal)
     846             : {
     847           0 :         struct buffer_head *bh = journal->j_sb_buffer;
     848           0 :         brelse(bh);
     849           0 :         journal->j_sb_buffer = NULL;
     850           0 : }
     851             : 
     852             : /*
     853             :  * Given a journal_t structure, initialise the various fields for
     854             :  * startup of a new journaling session.  We use this both when creating
     855             :  * a journal, and after recovering an old journal to reset it for
     856             :  * subsequent use.
     857             :  */
     858             : 
     859             : static int journal_reset(journal_t *journal)
     860             : {
     861           0 :         journal_superblock_t *sb = journal->j_superblock;
     862           0 :         unsigned int first, last;
     863           0 : 
     864           0 :         first = be32_to_cpu(sb->s_first);
     865           0 :         last = be32_to_cpu(sb->s_maxlen);
     866           0 :         if (first + JFS_MIN_JOURNAL_BLOCKS > last + 1) {
     867           0 :                 printk(KERN_ERR "JBD: Journal too short (blocks %u-%u).\n",
     868             :                        first, last);
     869           0 :                 journal_fail_superblock(journal);
     870           0 :                 return -EINVAL;
     871             :         }
     872             : 
     873           0 :         journal->j_first = first;
     874           0 :         journal->j_last = last;
     875             : 
     876           0 :         journal->j_head = first;
     877           0 :         journal->j_tail = first;
     878           0 :         journal->j_free = last - first;
     879             : 
     880           0 :         journal->j_tail_sequence = journal->j_transaction_sequence;
     881           0 :         journal->j_commit_sequence = journal->j_transaction_sequence - 1;
     882           0 :         journal->j_commit_request = journal->j_commit_sequence;
     883             : 
     884           0 :         journal->j_max_transaction_buffers = journal->j_maxlen / 4;
     885             : 
     886             :         /* Add the dynamic fields and write it to disk. */
     887           0 :         journal_update_superblock(journal, 1);
     888           0 :         return journal_start_thread(journal);
     889             : }
     890             : 
     891             : /**
     892             :  * int journal_create() - Initialise the new journal file
     893             :  * @journal: Journal to create. This structure must have been initialised
     894             :  *
     895             :  * Given a journal_t structure which tells us which disk blocks we can
     896             :  * use, create a new journal superblock and initialise all of the
     897             :  * journal fields from scratch.
     898             :  **/
     899             : int journal_create(journal_t *journal)
     900             : {
     901           0 :         unsigned int blocknr;
     902           0 :         struct buffer_head *bh;
     903           0 :         journal_superblock_t *sb;
     904           0 :         int i, err;
     905           0 : 
     906           0 :         if (journal->j_maxlen < JFS_MIN_JOURNAL_BLOCKS) {
     907           0 :                 printk (KERN_ERR "Journal length (%d blocks) too short.\n",
     908           0 :                         journal->j_maxlen);
     909           0 :                 journal_fail_superblock(journal);
     910           0 :                 return -EINVAL;
     911             :         }
     912             : 
     913           0 :         if (journal->j_inode == NULL) {
     914             :                 /*
     915             :                  * We don't know what block to start at!
     916             :                  */
     917           0 :                 printk(KERN_EMERG
     918             :                        "%s: creation of journal on external device!\n",
     919             :                        __func__);
     920           0 :                 BUG();
     921             :         }
     922             : 
     923             :         /* Zero out the entire journal on disk.  We cannot afford to
     924             :            have any blocks on disk beginning with JFS_MAGIC_NUMBER. */
     925             :         jbd_debug(1, "JBD: Zeroing out journal blocks...\n");
     926           0 :         for (i = 0; i < journal->j_maxlen; i++) {
     927           0 :                 err = journal_bmap(journal, i, &blocknr);
     928           0 :                 if (err)
     929           0 :                         return err;
     930           0 :                 bh = __getblk(journal->j_dev, blocknr, journal->j_blocksize);
     931           0 :                 lock_buffer(bh);
     932           0 :                 memset (bh->b_data, 0, journal->j_blocksize);
     933             :                 BUFFER_TRACE(bh, "marking dirty");
     934           0 :                 mark_buffer_dirty(bh);
     935             :                 BUFFER_TRACE(bh, "marking uptodate");
     936           0 :                 set_buffer_uptodate(bh);
     937           0 :                 unlock_buffer(bh);
     938           0 :                 __brelse(bh);
     939             :         }
     940             : 
     941           0 :         sync_blockdev(journal->j_dev);
     942             :         jbd_debug(1, "JBD: journal cleared.\n");
     943             : 
     944             :         /* OK, fill in the initial static fields in the new superblock */
     945           0 :         sb = journal->j_superblock;
     946             : 
     947           0 :         sb->s_header.h_magic  = cpu_to_be32(JFS_MAGIC_NUMBER);
     948           0 :         sb->s_header.h_blocktype = cpu_to_be32(JFS_SUPERBLOCK_V2);
     949             : 
     950           0 :         sb->s_blocksize      = cpu_to_be32(journal->j_blocksize);
     951           0 :         sb->s_maxlen = cpu_to_be32(journal->j_maxlen);
     952           0 :         sb->s_first  = cpu_to_be32(1);
     953             : 
     954           0 :         journal->j_transaction_sequence = 1;
     955             : 
     956           0 :         journal->j_flags &= ~JFS_ABORT;
     957           0 :         journal->j_format_version = 2;
     958             : 
     959           0 :         return journal_reset(journal);
     960             : }
     961             : 
     962             : /**
     963             :  * void journal_update_superblock() - Update journal sb on disk.
     964             :  * @journal: The journal to update.
     965             :  * @wait: Set to '0' if you don't want to wait for IO completion.
     966             :  *
     967             :  * Update a journal's dynamic superblock fields and write it to disk,
     968             :  * optionally waiting for the IO to complete.
     969             :  */
     970             : void journal_update_superblock(journal_t *journal, int wait)
     971             : {
     972           0 :         journal_superblock_t *sb = journal->j_superblock;
     973           0 :         struct buffer_head *bh = journal->j_sb_buffer;
     974           0 : 
     975           0 :         /*
     976           0 :          * As a special case, if the on-disk copy is already marked as needing
     977             :          * no recovery (s_start == 0) and there are no outstanding transactions
     978             :          * in the filesystem, then we can safely defer the superblock update
     979             :          * until the next commit by setting JFS_FLUSHED.  This avoids
     980             :          * attempting a write to a potential-readonly device.
     981             :          */
     982           0 :         if (sb->s_start == 0 && journal->j_tail_sequence ==
     983             :                                 journal->j_transaction_sequence) {
     984             :                 jbd_debug(1,"JBD: Skipping superblock update on recovered sb "
     985             :                         "(start %u, seq %d, errno %d)\n",
     986             :                         journal->j_tail, journal->j_tail_sequence,
     987             :                         journal->j_errno);
     988           0 :                 goto out;
     989             :         }
     990             : 
     991           0 :         spin_lock(&journal->j_state_lock);
     992             :         jbd_debug(1,"JBD: updating superblock (start %u, seq %d, errno %d)\n",
     993             :                   journal->j_tail, journal->j_tail_sequence, journal->j_errno);
     994             : 
     995           0 :         sb->s_sequence = cpu_to_be32(journal->j_tail_sequence);
     996           0 :         sb->s_start    = cpu_to_be32(journal->j_tail);
     997           0 :         sb->s_errno    = cpu_to_be32(journal->j_errno);
     998           0 :         spin_unlock(&journal->j_state_lock);
     999             : 
    1000             :         BUFFER_TRACE(bh, "marking dirty");
    1001           0 :         mark_buffer_dirty(bh);
    1002           0 :         if (wait)
    1003           0 :                 sync_dirty_buffer(bh);
    1004             :         else
    1005           0 :                 ll_rw_block(SWRITE, 1, &bh);
    1006             : 
    1007             : out:
    1008           0 :         /* If we have just flushed the log (by marking s_start==0), then
    1009             :          * any future commit will have to be careful to update the
    1010             :          * superblock again to re-record the true start of the log. */
    1011             : 
    1012           0 :         spin_lock(&journal->j_state_lock);
    1013           0 :         if (sb->s_start)
    1014           0 :                 journal->j_flags &= ~JFS_FLUSHED;
    1015             :         else
    1016           0 :                 journal->j_flags |= JFS_FLUSHED;
    1017           0 :         spin_unlock(&journal->j_state_lock);
    1018           0 : }
    1019             : 
    1020             : /*
    1021             :  * Read the superblock for a given journal, performing initial
    1022             :  * validation of the format.
    1023             :  */
    1024             : 
    1025             : static int journal_get_superblock(journal_t *journal)
    1026             : {
    1027           0 :         struct buffer_head *bh;
    1028           0 :         journal_superblock_t *sb;
    1029           0 :         int err = -EIO;
    1030           0 : 
    1031           0 :         bh = journal->j_sb_buffer;
    1032           0 : 
    1033           0 :         J_ASSERT(bh != NULL);
    1034           0 :         if (!buffer_uptodate(bh)) {
    1035           0 :                 ll_rw_block(READ, 1, &bh);
    1036           0 :                 wait_on_buffer(bh);
    1037           0 :                 if (!buffer_uptodate(bh)) {
    1038           0 :                         printk (KERN_ERR
    1039             :                                 "JBD: IO error reading journal superblock\n");
    1040           0 :                         goto out;
    1041             :                 }
    1042             :         }
    1043             : 
    1044           0 :         sb = journal->j_superblock;
    1045             : 
    1046           0 :         err = -EINVAL;
    1047             : 
    1048           0 :         if (sb->s_header.h_magic != cpu_to_be32(JFS_MAGIC_NUMBER) ||
    1049             :             sb->s_blocksize != cpu_to_be32(journal->j_blocksize)) {
    1050           0 :                 printk(KERN_WARNING "JBD: no valid journal superblock found\n");
    1051           0 :                 goto out;
    1052             :         }
    1053             : 
    1054           0 :         switch(be32_to_cpu(sb->s_header.h_blocktype)) {
    1055           0 :         case JFS_SUPERBLOCK_V1:
    1056           0 :                 journal->j_format_version = 1;
    1057           0 :                 break;
    1058           0 :         case JFS_SUPERBLOCK_V2:
    1059           0 :                 journal->j_format_version = 2;
    1060           0 :                 break;
    1061           0 :         default:
    1062           0 :                 printk(KERN_WARNING "JBD: unrecognised superblock format ID\n");
    1063           0 :                 goto out;
    1064             :         }
    1065             : 
    1066           0 :         if (be32_to_cpu(sb->s_maxlen) < journal->j_maxlen)
    1067           0 :                 journal->j_maxlen = be32_to_cpu(sb->s_maxlen);
    1068           0 :         else if (be32_to_cpu(sb->s_maxlen) > journal->j_maxlen) {
    1069           0 :                 printk (KERN_WARNING "JBD: journal file too short\n");
    1070           0 :                 goto out;
    1071             :         }
    1072             : 
    1073           0 :         return 0;
    1074           0 : 
    1075             : out:
    1076           0 :         journal_fail_superblock(journal);
    1077           0 :         return err;
    1078             : }
    1079             : 
    1080             : /*
    1081             :  * Load the on-disk journal superblock and read the key fields into the
    1082             :  * journal_t.
    1083             :  */
    1084             : 
    1085             : static int load_superblock(journal_t *journal)
    1086             : {
    1087           0 :         int err;
    1088           0 :         journal_superblock_t *sb;
    1089           0 : 
    1090           0 :         err = journal_get_superblock(journal);
    1091           0 :         if (err)
    1092           0 :                 return err;
    1093           0 : 
    1094           0 :         sb = journal->j_superblock;
    1095             : 
    1096           0 :         journal->j_tail_sequence = be32_to_cpu(sb->s_sequence);
    1097           0 :         journal->j_tail = be32_to_cpu(sb->s_start);
    1098           0 :         journal->j_first = be32_to_cpu(sb->s_first);
    1099           0 :         journal->j_last = be32_to_cpu(sb->s_maxlen);
    1100           0 :         journal->j_errno = be32_to_cpu(sb->s_errno);
    1101             : 
    1102           0 :         return 0;
    1103             : }
    1104             : 
    1105             : 
    1106             : /**
    1107             :  * int journal_load() - Read journal from disk.
    1108             :  * @journal: Journal to act on.
    1109             :  *
    1110             :  * Given a journal_t structure which tells us which disk blocks contain
    1111             :  * a journal, read the journal from disk to initialise the in-memory
    1112             :  * structures.
    1113             :  */
    1114             : int journal_load(journal_t *journal)
    1115             : {
    1116           0 :         int err;
    1117           0 :         journal_superblock_t *sb;
    1118           0 : 
    1119           0 :         err = load_superblock(journal);
    1120           0 :         if (err)
    1121           0 :                 return err;
    1122             : 
    1123           0 :         sb = journal->j_superblock;
    1124             :         /* If this is a V2 superblock, then we have to check the
    1125             :          * features flags on it. */
    1126             : 
    1127           0 :         if (journal->j_format_version >= 2) {
    1128           0 :                 if ((sb->s_feature_ro_compat &
    1129             :                      ~cpu_to_be32(JFS_KNOWN_ROCOMPAT_FEATURES)) ||
    1130             :                     (sb->s_feature_incompat &
    1131             :                      ~cpu_to_be32(JFS_KNOWN_INCOMPAT_FEATURES))) {
    1132           0 :                         printk (KERN_WARNING
    1133             :                                 "JBD: Unrecognised features on journal\n");
    1134           0 :                         return -EINVAL;
    1135             :                 }
    1136             :         }
    1137             : 
    1138             :         /* Let the recovery code check whether it needs to recover any
    1139             :          * data from the journal. */
    1140           0 :         if (journal_recover(journal))
    1141           0 :                 goto recovery_error;
    1142             : 
    1143             :         /* OK, we've finished with the dynamic journal bits:
    1144             :          * reinitialise the dynamic contents of the superblock in memory
    1145             :          * and reset them on disk. */
    1146           0 :         if (journal_reset(journal))
    1147           0 :                 goto recovery_error;
    1148             : 
    1149           0 :         journal->j_flags &= ~JFS_ABORT;
    1150           0 :         journal->j_flags |= JFS_LOADED;
    1151           0 :         return 0;
    1152           0 : 
    1153             : recovery_error:
    1154           0 :         printk (KERN_WARNING "JBD: recovery failed\n");
    1155           0 :         return -EIO;
    1156             : }
    1157             : 
    1158             : /**
    1159             :  * void journal_destroy() - Release a journal_t structure.
    1160             :  * @journal: Journal to act on.
    1161             :  *
    1162             :  * Release a journal_t structure once it is no longer in use by the
    1163             :  * journaled object.
    1164             :  * Return <0 if we couldn't clean up the journal.
    1165             :  */
    1166             : int journal_destroy(journal_t *journal)
    1167             : {
    1168           0 :         int err = 0;
    1169           0 : 
    1170           0 :         /* Wait for the commit thread to wake up and die. */
    1171           0 :         journal_kill_thread(journal);
    1172           0 : 
    1173             :         /* Force a final log commit */
    1174           0 :         if (journal->j_running_transaction)
    1175           0 :                 journal_commit_transaction(journal);
    1176             : 
    1177             :         /* Force any old transactions to disk */
    1178             : 
    1179             :         /* Totally anal locking here... */
    1180           0 :         spin_lock(&journal->j_list_lock);
    1181           0 :         while (journal->j_checkpoint_transactions != NULL) {
    1182           0 :                 spin_unlock(&journal->j_list_lock);
    1183           0 :                 log_do_checkpoint(journal);
    1184           0 :                 spin_lock(&journal->j_list_lock);
    1185             :         }
    1186           0 : 
    1187           0 :         J_ASSERT(journal->j_running_transaction == NULL);
    1188           0 :         J_ASSERT(journal->j_committing_transaction == NULL);
    1189           0 :         J_ASSERT(journal->j_checkpoint_transactions == NULL);
    1190           0 :         spin_unlock(&journal->j_list_lock);
    1191             : 
    1192           0 :         if (journal->j_sb_buffer) {
    1193           0 :                 if (!is_journal_aborted(journal)) {
    1194             :                         /* We can now mark the journal as empty. */
    1195           0 :                         journal->j_tail = 0;
    1196           0 :                         journal->j_tail_sequence =
    1197             :                                 ++journal->j_transaction_sequence;
    1198           0 :                         journal_update_superblock(journal, 1);
    1199             :                 } else {
    1200           0 :                         err = -EIO;
    1201             :                 }
    1202           0 :                 brelse(journal->j_sb_buffer);
    1203             :         }
    1204             : 
    1205           0 :         if (journal->j_inode)
    1206           0 :                 iput(journal->j_inode);
    1207           0 :         if (journal->j_revoke)
    1208           0 :                 journal_destroy_revoke(journal);
    1209           0 :         kfree(journal->j_wbuf);
    1210           0 :         kfree(journal);
    1211             : 
    1212           0 :         return err;
    1213             : }
    1214             : 
    1215             : 
    1216             : /**
    1217             :  *int journal_check_used_features () - Check if features specified are used.
    1218             :  * @journal: Journal to check.
    1219             :  * @compat: bitmask of compatible features
    1220             :  * @ro: bitmask of features that force read-only mount
    1221             :  * @incompat: bitmask of incompatible features
    1222             :  *
    1223             :  * Check whether the journal uses all of a given set of
    1224             :  * features.  Return true (non-zero) if it does.
    1225             :  **/
    1226             : 
    1227             : int journal_check_used_features (journal_t *journal, unsigned long compat,
    1228             :                                  unsigned long ro, unsigned long incompat)
    1229             : {
    1230           0 :         journal_superblock_t *sb;
    1231           0 : 
    1232           0 :         if (!compat && !ro && !incompat)
    1233           0 :                 return 1;
    1234           0 :         if (journal->j_format_version == 1)
    1235           0 :                 return 0;
    1236             : 
    1237           0 :         sb = journal->j_superblock;
    1238             : 
    1239           0 :         if (((be32_to_cpu(sb->s_feature_compat) & compat) == compat) &&
    1240             :             ((be32_to_cpu(sb->s_feature_ro_compat) & ro) == ro) &&
    1241             :             ((be32_to_cpu(sb->s_feature_incompat) & incompat) == incompat))
    1242           0 :                 return 1;
    1243             : 
    1244           0 :         return 0;
    1245             : }
    1246             : 
    1247             : /**
    1248             :  * int journal_check_available_features() - Check feature set in journalling layer
    1249             :  * @journal: Journal to check.
    1250             :  * @compat: bitmask of compatible features
    1251             :  * @ro: bitmask of features that force read-only mount
    1252             :  * @incompat: bitmask of incompatible features
    1253             :  *
    1254             :  * Check whether the journaling code supports the use of
    1255             :  * all of a given set of features on this journal.  Return true
    1256             :  * (non-zero) if it can. */
    1257             : 
    1258             : int journal_check_available_features (journal_t *journal, unsigned long compat,
    1259             :                                       unsigned long ro, unsigned long incompat)
    1260             : {
    1261           0 :         journal_superblock_t *sb;
    1262             : 
    1263           0 :         if (!compat && !ro && !incompat)
    1264           0 :                 return 1;
    1265             : 
    1266           0 :         sb = journal->j_superblock;
    1267             : 
    1268             :         /* We can support any known requested features iff the
    1269             :          * superblock is in version 2.  Otherwise we fail to support any
    1270             :          * extended sb features. */
    1271             : 
    1272           0 :         if (journal->j_format_version != 2)
    1273           0 :                 return 0;
    1274             : 
    1275           0 :         if ((compat   & JFS_KNOWN_COMPAT_FEATURES) == compat &&
    1276             :             (ro       & JFS_KNOWN_ROCOMPAT_FEATURES) == ro &&
    1277             :             (incompat & JFS_KNOWN_INCOMPAT_FEATURES) == incompat)
    1278           0 :                 return 1;
    1279             : 
    1280           0 :         return 0;
    1281             : }
    1282             : 
    1283             : /**
    1284             :  * int journal_set_features () - Mark a given journal feature in the superblock
    1285             :  * @journal: Journal to act on.
    1286             :  * @compat: bitmask of compatible features
    1287             :  * @ro: bitmask of features that force read-only mount
    1288             :  * @incompat: bitmask of incompatible features
    1289             :  *
    1290             :  * Mark a given journal feature as present on the
    1291             :  * superblock.  Returns true if the requested features could be set.
    1292             :  *
    1293             :  */
    1294             : 
    1295             : int journal_set_features (journal_t *journal, unsigned long compat,
    1296             :                           unsigned long ro, unsigned long incompat)
    1297             : {
    1298           0 :         journal_superblock_t *sb;
    1299           0 : 
    1300           0 :         if (journal_check_used_features(journal, compat, ro, incompat))
    1301           0 :                 return 1;
    1302           0 : 
    1303           0 :         if (!journal_check_available_features(journal, compat, ro, incompat))
    1304           0 :                 return 0;
    1305             : 
    1306             :         jbd_debug(1, "Setting new features 0x%lx/0x%lx/0x%lx\n",
    1307             :                   compat, ro, incompat);
    1308             : 
    1309           0 :         sb = journal->j_superblock;
    1310             : 
    1311           0 :         sb->s_feature_compat    |= cpu_to_be32(compat);
    1312           0 :         sb->s_feature_ro_compat |= cpu_to_be32(ro);
    1313           0 :         sb->s_feature_incompat  |= cpu_to_be32(incompat);
    1314             : 
    1315           0 :         return 1;
    1316             : }
    1317             : 
    1318             : 
    1319             : /**
    1320             :  * int journal_update_format () - Update on-disk journal structure.
    1321             :  * @journal: Journal to act on.
    1322             :  *
    1323             :  * Given an initialised but unloaded journal struct, poke about in the
    1324             :  * on-disk structure to update it to the most recent supported version.
    1325             :  */
    1326             : int journal_update_format (journal_t *journal)
    1327             : {
    1328           0 :         journal_superblock_t *sb;
    1329           0 :         int err;
    1330           0 : 
    1331           0 :         err = journal_get_superblock(journal);
    1332           0 :         if (err)
    1333           0 :                 return err;
    1334             : 
    1335           0 :         sb = journal->j_superblock;
    1336             : 
    1337           0 :         switch (be32_to_cpu(sb->s_header.h_blocktype)) {
    1338           0 :         case JFS_SUPERBLOCK_V2:
    1339           0 :                 return 0;
    1340           0 :         case JFS_SUPERBLOCK_V1:
    1341           0 :                 return journal_convert_superblock_v1(journal, sb);
    1342           0 :         default:
    1343           0 :                 break;
    1344             :         }
    1345           0 :         return -EINVAL;
    1346             : }
    1347             : 
    1348             : static int journal_convert_superblock_v1(journal_t *journal,
    1349             :                                          journal_superblock_t *sb)
    1350           0 : {
    1351           0 :         int offset, blocksize;
    1352           0 :         struct buffer_head *bh;
    1353           0 : 
    1354           0 :         printk(KERN_WARNING
    1355             :                 "JBD: Converting superblock from version 1 to 2.\n");
    1356             : 
    1357             :         /* Pre-initialise new fields to zero */
    1358           0 :         offset = ((char *) &(sb->s_feature_compat)) - ((char *) sb);
    1359           0 :         blocksize = be32_to_cpu(sb->s_blocksize);
    1360           0 :         memset(&sb->s_feature_compat, 0, blocksize-offset);
    1361             : 
    1362           0 :         sb->s_nr_users = cpu_to_be32(1);
    1363           0 :         sb->s_header.h_blocktype = cpu_to_be32(JFS_SUPERBLOCK_V2);
    1364           0 :         journal->j_format_version = 2;
    1365             : 
    1366           0 :         bh = journal->j_sb_buffer;
    1367             :         BUFFER_TRACE(bh, "marking dirty");
    1368           0 :         mark_buffer_dirty(bh);
    1369           0 :         sync_dirty_buffer(bh);
    1370           0 :         return 0;
    1371             : }
    1372             : 
    1373             : 
    1374             : /**
    1375             :  * int journal_flush () - Flush journal
    1376             :  * @journal: Journal to act on.
    1377             :  *
    1378             :  * Flush all data for a given journal to disk and empty the journal.
    1379             :  * Filesystems can use this when remounting readonly to ensure that
    1380             :  * recovery does not need to happen on remount.
    1381             :  */
    1382             : 
    1383             : int journal_flush(journal_t *journal)
    1384             : {
    1385           0 :         int err = 0;
    1386           0 :         transaction_t *transaction = NULL;
    1387           0 :         unsigned int old_tail;
    1388           0 : 
    1389           0 :         spin_lock(&journal->j_state_lock);
    1390           0 : 
    1391           0 :         /* Force everything buffered to the log... */
    1392           0 :         if (journal->j_running_transaction) {
    1393           0 :                 transaction = journal->j_running_transaction;
    1394           0 :                 __log_start_commit(journal, transaction->t_tid);
    1395           0 :         } else if (journal->j_committing_transaction)
    1396           0 :                 transaction = journal->j_committing_transaction;
    1397             : 
    1398             :         /* Wait for the log commit to complete... */
    1399           0 :         if (transaction) {
    1400           0 :                 tid_t tid = transaction->t_tid;
    1401             : 
    1402           0 :                 spin_unlock(&journal->j_state_lock);
    1403           0 :                 log_wait_commit(journal, tid);
    1404             :         } else {
    1405           0 :                 spin_unlock(&journal->j_state_lock);
    1406             :         }
    1407             : 
    1408             :         /* ...and flush everything in the log out to disk. */
    1409           0 :         spin_lock(&journal->j_list_lock);
    1410           0 :         while (!err && journal->j_checkpoint_transactions != NULL) {
    1411           0 :                 spin_unlock(&journal->j_list_lock);
    1412           0 :                 mutex_lock(&journal->j_checkpoint_mutex);
    1413           0 :                 err = log_do_checkpoint(journal);
    1414           0 :                 mutex_unlock(&journal->j_checkpoint_mutex);
    1415           0 :                 spin_lock(&journal->j_list_lock);
    1416             :         }
    1417           0 :         spin_unlock(&journal->j_list_lock);
    1418             : 
    1419           0 :         if (is_journal_aborted(journal))
    1420           0 :                 return -EIO;
    1421             : 
    1422           0 :         cleanup_journal_tail(journal);
    1423             : 
    1424             :         /* Finally, mark the journal as really needing no recovery.
    1425             :          * This sets s_start==0 in the underlying superblock, which is
    1426             :          * the magic code for a fully-recovered superblock.  Any future
    1427             :          * commits of data to the journal will restore the current
    1428             :          * s_start value. */
    1429           0 :         spin_lock(&journal->j_state_lock);
    1430           0 :         old_tail = journal->j_tail;
    1431           0 :         journal->j_tail = 0;
    1432           0 :         spin_unlock(&journal->j_state_lock);
    1433           0 :         journal_update_superblock(journal, 1);
    1434           0 :         spin_lock(&journal->j_state_lock);
    1435           0 :         journal->j_tail = old_tail;
    1436             : 
    1437           0 :         J_ASSERT(!journal->j_running_transaction);
    1438           0 :         J_ASSERT(!journal->j_committing_transaction);
    1439           0 :         J_ASSERT(!journal->j_checkpoint_transactions);
    1440           0 :         J_ASSERT(journal->j_head == journal->j_tail);
    1441           0 :         J_ASSERT(journal->j_tail_sequence == journal->j_transaction_sequence);
    1442           0 :         spin_unlock(&journal->j_state_lock);
    1443           0 :         return 0;
    1444             : }
    1445             : 
    1446             : /**
    1447             :  * int journal_wipe() - Wipe journal contents
    1448             :  * @journal: Journal to act on.
    1449             :  * @write: flag (see below)
    1450             :  *
    1451             :  * Wipe out all of the contents of a journal, safely.  This will produce
    1452             :  * a warning if the journal contains any valid recovery information.
    1453             :  * Must be called between journal_init_*() and journal_load().
    1454             :  *
    1455             :  * If 'write' is non-zero, then we wipe out the journal on disk; otherwise
    1456             :  * we merely suppress recovery.
    1457             :  */
    1458             : 
    1459             : int journal_wipe(journal_t *journal, int write)
    1460             : {
    1461           0 :         journal_superblock_t *sb;
    1462           0 :         int err = 0;
    1463           0 : 
    1464           0 :         J_ASSERT (!(journal->j_flags & JFS_LOADED));
    1465             : 
    1466           0 :         err = load_superblock(journal);
    1467           0 :         if (err)
    1468           0 :                 return err;
    1469             : 
    1470           0 :         sb = journal->j_superblock;
    1471             : 
    1472           0 :         if (!journal->j_tail)
    1473           0 :                 goto no_recovery;
    1474             : 
    1475           0 :         printk (KERN_WARNING "JBD: %s recovery information on journal\n",
    1476             :                 write ? "Clearing" : "Ignoring");
    1477             : 
    1478           0 :         err = journal_skip_recovery(journal);
    1479           0 :         if (write)
    1480           0 :                 journal_update_superblock(journal, 1);
    1481             : 
    1482             :  no_recovery:
    1483           0 :         return err;
    1484             : }
    1485           0 : 
    1486             : /*
    1487             :  * journal_dev_name: format a character string to describe on what
    1488             :  * device this journal is present.
    1489             :  */
    1490             : 
    1491             : static const char *journal_dev_name(journal_t *journal, char *buffer)
    1492             : {
    1493           0 :         struct block_device *bdev;
    1494           0 : 
    1495           0 :         if (journal->j_inode)
    1496           0 :                 bdev = journal->j_inode->i_sb->s_bdev;
    1497             :         else
    1498           0 :                 bdev = journal->j_dev;
    1499             : 
    1500           0 :         return bdevname(bdev, buffer);
    1501             : }
    1502             : 
    1503             : /*
    1504             :  * Journal abort has very specific semantics, which we describe
    1505             :  * for journal abort.
    1506             :  *
    1507             :  * Two internal function, which provide abort to te jbd layer
    1508             :  * itself are here.
    1509             :  */
    1510             : 
    1511             : /*
    1512             :  * Quick version for internal journal use (doesn't lock the journal).
    1513             :  * Aborts hard --- we mark the abort as occurred, but do _nothing_ else,
    1514             :  * and don't attempt to make any other journal updates.
    1515             :  */
    1516             : static void __journal_abort_hard(journal_t *journal)
    1517             : {
    1518           0 :         transaction_t *transaction;
    1519           0 :         char b[BDEVNAME_SIZE];
    1520           0 : 
    1521           0 :         if (journal->j_flags & JFS_ABORT)
    1522           0 :                 return;
    1523             : 
    1524           0 :         printk(KERN_ERR "Aborting journal on device %s.\n",
    1525             :                 journal_dev_name(journal, b));
    1526             : 
    1527           0 :         spin_lock(&journal->j_state_lock);
    1528           0 :         journal->j_flags |= JFS_ABORT;
    1529           0 :         transaction = journal->j_running_transaction;
    1530           0 :         if (transaction)
    1531           0 :                 __log_start_commit(journal, transaction->t_tid);
    1532           0 :         spin_unlock(&journal->j_state_lock);
    1533           0 : }
    1534             : 
    1535             : /* Soft abort: record the abort error status in the journal superblock,
    1536             :  * but don't do any other IO. */
    1537             : static void __journal_abort_soft (journal_t *journal, int errno)
    1538             : {
    1539           0 :         if (journal->j_flags & JFS_ABORT)
    1540           0 :                 return;
    1541             : 
    1542           0 :         if (!journal->j_errno)
    1543           0 :                 journal->j_errno = errno;
    1544             : 
    1545           0 :         __journal_abort_hard(journal);
    1546             : 
    1547           0 :         if (errno)
    1548           0 :                 journal_update_superblock(journal, 1);
    1549           0 : }
    1550             : 
    1551             : /**
    1552             :  * void journal_abort () - Shutdown the journal immediately.
    1553             :  * @journal: the journal to shutdown.
    1554             :  * @errno:   an error number to record in the journal indicating
    1555             :  *           the reason for the shutdown.
    1556             :  *
    1557             :  * Perform a complete, immediate shutdown of the ENTIRE
    1558             :  * journal (not of a single transaction).  This operation cannot be
    1559             :  * undone without closing and reopening the journal.
    1560             :  *
    1561             :  * The journal_abort function is intended to support higher level error
    1562             :  * recovery mechanisms such as the ext2/ext3 remount-readonly error
    1563             :  * mode.
    1564             :  *
    1565             :  * Journal abort has very specific semantics.  Any existing dirty,
    1566             :  * unjournaled buffers in the main filesystem will still be written to
    1567             :  * disk by bdflush, but the journaling mechanism will be suspended
    1568             :  * immediately and no further transaction commits will be honoured.
    1569             :  *
    1570             :  * Any dirty, journaled buffers will be written back to disk without
    1571             :  * hitting the journal.  Atomicity cannot be guaranteed on an aborted
    1572             :  * filesystem, but we _do_ attempt to leave as much data as possible
    1573             :  * behind for fsck to use for cleanup.
    1574             :  *
    1575             :  * Any attempt to get a new transaction handle on a journal which is in
    1576             :  * ABORT state will just result in an -EROFS error return.  A
    1577             :  * journal_stop on an existing handle will return -EIO if we have
    1578             :  * entered abort state during the update.
    1579             :  *
    1580             :  * Recursive transactions are not disturbed by journal abort until the
    1581             :  * final journal_stop, which will receive the -EIO error.
    1582             :  *
    1583             :  * Finally, the journal_abort call allows the caller to supply an errno
    1584             :  * which will be recorded (if possible) in the journal superblock.  This
    1585             :  * allows a client to record failure conditions in the middle of a
    1586             :  * transaction without having to complete the transaction to record the
    1587             :  * failure to disk.  ext3_error, for example, now uses this
    1588             :  * functionality.
    1589             :  *
    1590             :  * Errors which originate from within the journaling layer will NOT
    1591             :  * supply an errno; a null errno implies that absolutely no further
    1592             :  * writes are done to the journal (unless there are any already in
    1593             :  * progress).
    1594             :  *
    1595             :  */
    1596             : 
    1597             : void journal_abort(journal_t *journal, int errno)
    1598             : {
    1599           0 :         __journal_abort_soft(journal, errno);
    1600           0 : }
    1601             : 
    1602             : /**
    1603             :  * int journal_errno () - returns the journal's error state.
    1604             :  * @journal: journal to examine.
    1605             :  *
    1606             :  * This is the errno numbet set with journal_abort(), the last
    1607             :  * time the journal was mounted - if the journal was stopped
    1608             :  * without calling abort this will be 0.
    1609             :  *
    1610             :  * If the journal has been aborted on this mount time -EROFS will
    1611             :  * be returned.
    1612             :  */
    1613             : int journal_errno(journal_t *journal)
    1614             : {
    1615           0 :         int err;
    1616             : 
    1617           0 :         spin_lock(&journal->j_state_lock);
    1618           0 :         if (journal->j_flags & JFS_ABORT)
    1619           0 :                 err = -EROFS;
    1620             :         else
    1621           0 :                 err = journal->j_errno;
    1622           0 :         spin_unlock(&journal->j_state_lock);
    1623           0 :         return err;
    1624             : }
    1625             : 
    1626             : /**
    1627             :  * int journal_clear_err () - clears the journal's error state
    1628             :  * @journal: journal to act on.
    1629             :  *
    1630             :  * An error must be cleared or Acked to take a FS out of readonly
    1631             :  * mode.
    1632             :  */
    1633             : int journal_clear_err(journal_t *journal)
    1634             : {
    1635           0 :         int err = 0;
    1636             : 
    1637           0 :         spin_lock(&journal->j_state_lock);
    1638           0 :         if (journal->j_flags & JFS_ABORT)
    1639           0 :                 err = -EROFS;
    1640             :         else
    1641           0 :                 journal->j_errno = 0;
    1642           0 :         spin_unlock(&journal->j_state_lock);
    1643           0 :         return err;
    1644             : }
    1645             : 
    1646             : /**
    1647             :  * void journal_ack_err() - Ack journal err.
    1648             :  * @journal: journal to act on.
    1649             :  *
    1650             :  * An error must be cleared or Acked to take a FS out of readonly
    1651             :  * mode.
    1652             :  */
    1653             : void journal_ack_err(journal_t *journal)
    1654             : {
    1655           0 :         spin_lock(&journal->j_state_lock);
    1656           0 :         if (journal->j_errno)
    1657           0 :                 journal->j_flags |= JFS_ACK_ERR;
    1658           0 :         spin_unlock(&journal->j_state_lock);
    1659           0 : }
    1660             : 
    1661             : int journal_blocks_per_page(struct inode *inode)
    1662             : {
    1663           0 :         return 1 << (PAGE_CACHE_SHIFT - inode->i_sb->s_blocksize_bits);
    1664             : }
    1665             : 
    1666             : /*
    1667             :  * Journal_head storage management
    1668             :  */
    1669           1 : static struct kmem_cache *journal_head_cache;
    1670             : #ifdef CONFIG_JBD_DEBUG
    1671             : static atomic_t nr_journal_heads = ATOMIC_INIT(0);
    1672             : #endif
    1673             : 
    1674             : static int journal_init_journal_head_cache(void)
    1675             : {
    1676           1 :         int retval;
    1677           1 : 
    1678           6 :         J_ASSERT(journal_head_cache == NULL);
    1679           1 :         journal_head_cache = kmem_cache_create("journal_head",
    1680             :                                 sizeof(struct journal_head),
    1681             :                                 0,              /* offset */
    1682             :                                 SLAB_TEMPORARY, /* flags */
    1683             :                                 NULL);          /* ctor */
    1684           1 :         retval = 0;
    1685           2 :         if (!journal_head_cache) {
    1686           1 :                 retval = -ENOMEM;
    1687           1 :                 printk(KERN_EMERG "JBD: no memory for journal_head cache\n");
    1688             :         }
    1689           1 :         return retval;
    1690             : }
    1691             : 
    1692             : static void journal_destroy_journal_head_cache(void)
    1693             : {
    1694           6 :         if (journal_head_cache) {
    1695           3 :                 kmem_cache_destroy(journal_head_cache);
    1696           3 :                 journal_head_cache = NULL;
    1697             :         }
    1698           3 : }
    1699             : 
    1700             : /*
    1701             :  * journal_head splicing and dicing
    1702             :  */
    1703             : static struct journal_head *journal_alloc_journal_head(void)
    1704             : {
    1705           0 :         struct journal_head *ret;
    1706           0 :         static unsigned long last_warning;
    1707           0 : 
    1708           0 : #ifdef CONFIG_JBD_DEBUG
    1709             :         atomic_inc(&nr_journal_heads);
    1710             : #endif
    1711           0 :         ret = kmem_cache_alloc(journal_head_cache, GFP_NOFS);
    1712           0 :         if (ret == NULL) {
    1713             :                 jbd_debug(1, "out of memory for journal_head\n");
    1714           0 :                 if (time_after(jiffies, last_warning + 5*HZ)) {
    1715           0 :                         printk(KERN_NOTICE "ENOMEM in %s, retrying.\n",
    1716             :                                __func__);
    1717           0 :                         last_warning = jiffies;
    1718             :                 }
    1719           0 :                 while (ret == NULL) {
    1720           0 :                         yield();
    1721           0 :                         ret = kmem_cache_alloc(journal_head_cache, GFP_NOFS);
    1722             :                 }
    1723           0 :         }
    1724           0 :         return ret;
    1725             : }
    1726             : 
    1727             : static void journal_free_journal_head(struct journal_head *jh)
    1728             : {
    1729             : #ifdef CONFIG_JBD_DEBUG
    1730             :         atomic_dec(&nr_journal_heads);
    1731             :         memset(jh, JBD_POISON_FREE, sizeof(*jh));
    1732             : #endif
    1733           0 :         kmem_cache_free(journal_head_cache, jh);
    1734           0 : }
    1735             : 
    1736             : /*
    1737             :  * A journal_head is attached to a buffer_head whenever JBD has an
    1738             :  * interest in the buffer.
    1739             :  *
    1740             :  * Whenever a buffer has an attached journal_head, its ->b_state:BH_JBD bit
    1741             :  * is set.  This bit is tested in core kernel code where we need to take
    1742             :  * JBD-specific actions.  Testing the zeroness of ->b_private is not reliable
    1743             :  * there.
    1744             :  *
    1745             :  * When a buffer has its BH_JBD bit set, its ->b_count is elevated by one.
    1746             :  *
    1747             :  * When a buffer has its BH_JBD bit set it is immune from being released by
    1748             :  * core kernel code, mainly via ->b_count.
    1749             :  *
    1750             :  * A journal_head may be detached from its buffer_head when the journal_head's
    1751             :  * b_transaction, b_cp_transaction and b_next_transaction pointers are NULL.
    1752             :  * Various places in JBD call journal_remove_journal_head() to indicate that the
    1753             :  * journal_head can be dropped if needed.
    1754             :  *
    1755             :  * Various places in the kernel want to attach a journal_head to a buffer_head
    1756             :  * _before_ attaching the journal_head to a transaction.  To protect the
    1757             :  * journal_head in this situation, journal_add_journal_head elevates the
    1758             :  * journal_head's b_jcount refcount by one.  The caller must call
    1759             :  * journal_put_journal_head() to undo this.
    1760             :  *
    1761             :  * So the typical usage would be:
    1762             :  *
    1763             :  *      (Attach a journal_head if needed.  Increments b_jcount)
    1764             :  *      struct journal_head *jh = journal_add_journal_head(bh);
    1765             :  *      ...
    1766             :  *      jh->b_transaction = xxx;
    1767             :  *      journal_put_journal_head(jh);
    1768             :  *
    1769             :  * Now, the journal_head's b_jcount is zero, but it is safe from being released
    1770             :  * because it has a non-zero b_transaction.
    1771             :  */
    1772             : 
    1773             : /*
    1774             :  * Give a buffer_head a journal_head.
    1775             :  *
    1776             :  * Doesn't need the journal lock.
    1777             :  * May sleep.
    1778             :  */
    1779             : struct journal_head *journal_add_journal_head(struct buffer_head *bh)
    1780             : {
    1781           0 :         struct journal_head *jh;
    1782           0 :         struct journal_head *new_jh = NULL;
    1783           0 : 
    1784           0 : repeat:
    1785           0 :         if (!buffer_jbd(bh)) {
    1786           0 :                 new_jh = journal_alloc_journal_head();
    1787           0 :                 memset(new_jh, 0, sizeof(*new_jh));
    1788           0 :         }
    1789           0 : 
    1790           0 :         jbd_lock_bh_journal_head(bh);
    1791           0 :         if (buffer_jbd(bh)) {
    1792           0 :                 jh = bh2jh(bh);
    1793             :         } else {
    1794           0 :                 J_ASSERT_BH(bh,
    1795             :                         (atomic_read(&bh->b_count) > 0) ||
    1796             :                         (bh->b_page && bh->b_page->mapping));
    1797             : 
    1798           0 :                 if (!new_jh) {
    1799           0 :                         jbd_unlock_bh_journal_head(bh);
    1800           0 :                         goto repeat;
    1801             :                 }
    1802             : 
    1803           0 :                 jh = new_jh;
    1804           0 :                 new_jh = NULL;          /* We consumed it */
    1805           0 :                 set_buffer_jbd(bh);
    1806           0 :                 bh->b_private = jh;
    1807           0 :                 jh->b_bh = bh;
    1808           0 :                 get_bh(bh);
    1809             :                 BUFFER_TRACE(bh, "added journal_head");
    1810             :         }
    1811           0 :         jh->b_jcount++;
    1812           0 :         jbd_unlock_bh_journal_head(bh);
    1813           0 :         if (new_jh)
    1814           0 :                 journal_free_journal_head(new_jh);
    1815           0 :         return bh->b_private;
    1816             : }
    1817             : 
    1818             : /*
    1819             :  * Grab a ref against this buffer_head's journal_head.  If it ended up not
    1820             :  * having a journal_head, return NULL
    1821             :  */
    1822             : struct journal_head *journal_grab_journal_head(struct buffer_head *bh)
    1823             : {
    1824           0 :         struct journal_head *jh = NULL;
    1825           0 : 
    1826           0 :         jbd_lock_bh_journal_head(bh);
    1827           0 :         if (buffer_jbd(bh)) {
    1828           0 :                 jh = bh2jh(bh);
    1829           0 :                 jh->b_jcount++;
    1830             :         }
    1831           0 :         jbd_unlock_bh_journal_head(bh);
    1832           0 :         return jh;
    1833             : }
    1834             : 
    1835             : static void __journal_remove_journal_head(struct buffer_head *bh)
    1836             : {
    1837           0 :         struct journal_head *jh = bh2jh(bh);
    1838           0 : 
    1839           0 :         J_ASSERT_JH(jh, jh->b_jcount >= 0);
    1840           0 : 
    1841           0 :         get_bh(bh);
    1842           0 :         if (jh->b_jcount == 0) {
    1843           0 :                 if (jh->b_transaction == NULL &&
    1844           0 :                                 jh->b_next_transaction == NULL &&
    1845             :                                 jh->b_cp_transaction == NULL) {
    1846           0 :                         J_ASSERT_JH(jh, jh->b_jlist == BJ_None);
    1847           0 :                         J_ASSERT_BH(bh, buffer_jbd(bh));
    1848           0 :                         J_ASSERT_BH(bh, jh2bh(jh) == bh);
    1849             :                         BUFFER_TRACE(bh, "remove journal_head");
    1850           0 :                         if (jh->b_frozen_data) {
    1851           0 :                                 printk(KERN_WARNING "%s: freeing "
    1852             :                                                 "b_frozen_data\n",
    1853             :                                                 __func__);
    1854           0 :                                 jbd_free(jh->b_frozen_data, bh->b_size);
    1855             :                         }
    1856           0 :                         if (jh->b_committed_data) {
    1857           0 :                                 printk(KERN_WARNING "%s: freeing "
    1858             :                                                 "b_committed_data\n",
    1859             :                                                 __func__);
    1860           0 :                                 jbd_free(jh->b_committed_data, bh->b_size);
    1861             :                         }
    1862           0 :                         bh->b_private = NULL;
    1863           0 :                         jh->b_bh = NULL;     /* debug, really */
    1864           0 :                         clear_buffer_jbd(bh);
    1865           0 :                         __brelse(bh);
    1866           0 :                         journal_free_journal_head(jh);
    1867             :                 } else {
    1868             :                         BUFFER_TRACE(bh, "journal_head was locked");
    1869             :                 }
    1870             :         }
    1871           0 : }
    1872             : 
    1873             : /*
    1874             :  * journal_remove_journal_head(): if the buffer isn't attached to a transaction
    1875             :  * and has a zero b_jcount then remove and release its journal_head.   If we did
    1876             :  * see that the buffer is not used by any transaction we also "logically"
    1877             :  * decrement ->b_count.
    1878             :  *
    1879             :  * We in fact take an additional increment on ->b_count as a convenience,
    1880             :  * because the caller usually wants to do additional things with the bh
    1881             :  * after calling here.
    1882             :  * The caller of journal_remove_journal_head() *must* run __brelse(bh) at some
    1883             :  * time.  Once the caller has run __brelse(), the buffer is eligible for
    1884             :  * reaping by try_to_free_buffers().
    1885             :  */
    1886             : void journal_remove_journal_head(struct buffer_head *bh)
    1887             : {
    1888           0 :         jbd_lock_bh_journal_head(bh);
    1889           0 :         __journal_remove_journal_head(bh);
    1890           0 :         jbd_unlock_bh_journal_head(bh);
    1891           0 : }
    1892             : 
    1893             : /*
    1894             :  * Drop a reference on the passed journal_head.  If it fell to zero then try to
    1895             :  * release the journal_head from the buffer_head.
    1896             :  */
    1897             : void journal_put_journal_head(struct journal_head *jh)
    1898             : {
    1899           0 :         struct buffer_head *bh = jh2bh(jh);
    1900           0 : 
    1901           0 :         jbd_lock_bh_journal_head(bh);
    1902           0 :         J_ASSERT_JH(jh, jh->b_jcount > 0);
    1903           0 :         --jh->b_jcount;
    1904           0 :         if (!jh->b_jcount && !jh->b_transaction) {
    1905           0 :                 __journal_remove_journal_head(bh);
    1906           0 :                 __brelse(bh);
    1907             :         }
    1908           0 :         jbd_unlock_bh_journal_head(bh);
    1909           0 : }
    1910             : 
    1911             : /*
    1912             :  * debugfs tunables
    1913             :  */
    1914             : #ifdef CONFIG_JBD_DEBUG
    1915             : 
    1916             : u8 journal_enable_debug __read_mostly;
    1917             : EXPORT_SYMBOL(journal_enable_debug);
    1918             : 
    1919             : static struct dentry *jbd_debugfs_dir;
    1920             : static struct dentry *jbd_debug;
    1921             : 
    1922             : static void __init jbd_create_debugfs_entry(void)
    1923             : {
    1924             :         jbd_debugfs_dir = debugfs_create_dir("jbd", NULL);
    1925             :         if (jbd_debugfs_dir)
    1926             :                 jbd_debug = debugfs_create_u8("jbd-debug", S_IRUGO | S_IWUSR,
    1927             :                                                jbd_debugfs_dir,
    1928             :                                                &journal_enable_debug);
    1929             : }
    1930             : 
    1931             : static void __exit jbd_remove_debugfs_entry(void)
    1932             : {
    1933             :         debugfs_remove(jbd_debug);
    1934             :         debugfs_remove(jbd_debugfs_dir);
    1935             : }
    1936             : 
    1937             : #else
    1938             : 
    1939             : static inline void jbd_create_debugfs_entry(void)
    1940             : {
    1941             : }
    1942             : 
    1943             : static inline void jbd_remove_debugfs_entry(void)
    1944             : {
    1945             : }
    1946             : 
    1947             : #endif
    1948             : 
    1949             : struct kmem_cache *jbd_handle_cache;
    1950             : 
    1951             : static int __init journal_init_handle_cache(void)
    1952             : {
    1953           2 :         jbd_handle_cache = kmem_cache_create("journal_handle",
    1954             :                                 sizeof(handle_t),
    1955             :                                 0,              /* offset */
    1956             :                                 SLAB_TEMPORARY, /* flags */
    1957             :                                 NULL);          /* ctor */
    1958           4 :         if (jbd_handle_cache == NULL) {
    1959           2 :                 printk(KERN_EMERG "JBD: failed to create handle cache\n");
    1960           2 :                 return -ENOMEM;
    1961             :         }
    1962           2 :         return 0;
    1963             : }
    1964             : 
    1965             : static void journal_destroy_handle_cache(void)
    1966             : {
    1967           6 :         if (jbd_handle_cache)
    1968           3 :                 kmem_cache_destroy(jbd_handle_cache);
    1969           3 : }
    1970             : 
    1971             : /*
    1972             :  * Module startup and shutdown
    1973             :  */
    1974             : 
    1975             : static int __init journal_init_caches(void)
    1976             : {
    1977           1 :         int ret;
    1978             : 
    1979           3 :         ret = journal_init_revoke_caches();
    1980           2 :         if (ret == 0)
    1981           2 :                 ret = journal_init_journal_head_cache();
    1982           4 :         if (ret == 0)
    1983           4 :                 ret = journal_init_handle_cache();
    1984           3 :         return ret;
    1985             : }
    1986             : 
    1987             : static void journal_destroy_caches(void)
    1988             : {
    1989           6 :         journal_destroy_revoke_caches();
    1990           6 :         journal_destroy_journal_head_cache();
    1991           6 :         journal_destroy_handle_cache();
    1992           3 : }
    1993             : 
    1994             : static int __init journal_init(void)
    1995             : {
    1996           1 :         int ret;
    1997             : 
    1998             :         BUILD_BUG_ON(sizeof(struct journal_superblock_s) != 1024);
    1999             : 
    2000           4 :         ret = journal_init_caches();
    2001           2 :         if (ret != 0)
    2002           2 :                 journal_destroy_caches();
    2003           4 :         jbd_create_debugfs_entry();
    2004           1 :         return ret;
    2005             : }
    2006             : 
    2007             : static void __exit journal_exit(void)
    2008             : {
    2009             : #ifdef CONFIG_JBD_DEBUG
    2010             :         int n = atomic_read(&nr_journal_heads);
    2011             :         if (n)
    2012             :                 printk(KERN_EMERG "JBD: leaked %d journal_heads!\n", n);
    2013             : #endif
    2014           4 :         jbd_remove_debugfs_entry();
    2015           4 :         journal_destroy_caches();
    2016           2 : }
    2017             : 
    2018             : MODULE_LICENSE("GPL");
    2019             : module_init(journal_init);
    2020             : module_exit(journal_exit);
    2021           1 : 

Generated by: LCOV version 1.10