LCOV - code coverage report
Current view: top level - drivers/md - md.h (source / functions) Hit Total Coverage
Test: coverage.info Lines: 16 16 100.0 %
Date: 2017-01-25 Functions: 4 4 100.0 %

          Line data    Source code
       1             : /*
       2             :    md_k.h : kernel internal structure of the Linux MD driver
       3             :           Copyright (C) 1996-98 Ingo Molnar, Gadi Oxman
       4             :           
       5             :    This program is free software; you can redistribute it and/or modify
       6             :    it under the terms of the GNU General Public License as published by
       7             :    the Free Software Foundation; either version 2, or (at your option)
       8             :    any later version.
       9             :    
      10             :    You should have received a copy of the GNU General Public License
      11             :    (for example /usr/src/linux/COPYING); if not, write to the Free
      12             :    Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.  
      13             : */
      14             : 
      15             : #ifndef _MD_MD_H
      16             : #define _MD_MD_H
      17             : 
      18             : #include <linux/blkdev.h>
      19             : #include <linux/kobject.h>
      20             : #include <linux/list.h>
      21             : #include <linux/mm.h>
      22             : #include <linux/mutex.h>
      23             : #include <linux/timer.h>
      24             : #include <linux/wait.h>
      25             : #include <linux/workqueue.h>
      26             : 
      27             : #define MaxSector (~(sector_t)0)
      28             : 
      29           2 : typedef struct mddev_s mddev_t;
      30           2 : typedef struct mdk_rdev_s mdk_rdev_t;
      31           1 : 
      32             : /*
      33             :  * MD's 'extended' device
      34             :  */
      35             : struct mdk_rdev_s
      36             : {
      37             :         struct list_head same_set;      /* RAID devices within the same set */
      38             : 
      39             :         sector_t sectors;               /* Device size (in 512bytes sectors) */
      40             :         mddev_t *mddev;                 /* RAID array if running */
      41             :         int last_events;                /* IO event timestamp */
      42             : 
      43             :         struct block_device *bdev;      /* block device handle */
      44             : 
      45             :         struct page     *sb_page;
      46             :         int             sb_loaded;
      47             :         __u64           sb_events;
      48             :         sector_t        data_offset;    /* start of data in array */
      49             :         sector_t        sb_start;       /* offset of the super block (in 512byte sectors) */
      50             :         int             sb_size;        /* bytes in the superblock */
      51             :         int             preferred_minor;        /* autorun support */
      52             : 
      53             :         struct kobject  kobj;
      54             : 
      55             :         /* A device can be in one of three states based on two flags:
      56             :          * Not working:   faulty==1 in_sync==0
      57             :          * Fully working: faulty==0 in_sync==1
      58             :          * Working, but not
      59             :          * in sync with array
      60             :          *                faulty==0 in_sync==0
      61             :          *
      62             :          * It can never have faulty==1, in_sync==1
      63             :          * This reduces the burden of testing multiple flags in many cases
      64             :          */
      65             : 
      66             :         unsigned long   flags;
      67             : #define Faulty          1               /* device is known to have a fault */
      68             : #define In_sync         2               /* device is in_sync with rest of array */
      69             : #define WriteMostly     4               /* Avoid reading if at all possible */
      70             : #define BarriersNotsupp 5               /* BIO_RW_BARRIER is not supported */
      71             : #define AllReserved     6               /* If whole device is reserved for
      72             :                                          * one array */
      73             : #define AutoDetected    7               /* added by auto-detect */
      74             : #define Blocked         8               /* An error occured on an externally
      75             :                                          * managed array, don't allow writes
      76             :                                          * until it is cleared */
      77             : #define StateChanged    9               /* Faulty or Blocked has changed during
      78             :                                          * interrupt, so it needs to be
      79             :                                          * notified by the thread */
      80             :         wait_queue_head_t blocked_wait;
      81             : 
      82             :         int desc_nr;                    /* descriptor index in the superblock */
      83             :         int raid_disk;                  /* role of device in array */
      84             :         int saved_raid_disk;            /* role that device used to have in the
      85             :                                          * array and could again if we did a partial
      86             :                                          * resync from the bitmap
      87             :                                          */
      88             :         sector_t        recovery_offset;/* If this device has been partially
      89             :                                          * recovered, this is where we were
      90             :                                          * up to.
      91             :                                          */
      92             : 
      93             :         atomic_t        nr_pending;     /* number of pending requests.
      94             :                                          * only maintained for arrays that
      95             :                                          * support hot removal
      96             :                                          */
      97             :         atomic_t        read_errors;    /* number of consecutive read errors that
      98             :                                          * we have tried to ignore.
      99             :                                          */
     100             :         struct timespec last_read_error;        /* monotonic time since our
     101             :                                                  * last read error
     102             :                                                  */
     103             :         atomic_t        corrected_errors; /* number of corrected read errors,
     104             :                                            * for reporting to userspace and storing
     105             :                                            * in superblock.
     106             :                                            */
     107             :         struct work_struct del_work;    /* used for delayed sysfs removal */
     108             : 
     109             :         struct sysfs_dirent *sysfs_state; /* handle for 'state'
     110             :                                            * sysfs entry */
     111           5 : };
     112             : 
     113             : struct mddev_s
     114             : {
     115             :         void                            *private;
     116             :         struct mdk_personality          *pers;
     117             :         dev_t                           unit;
     118             :         int                             md_minor;
     119             :         struct list_head                disks;
     120             :         unsigned long                   flags;
     121             : #define MD_CHANGE_DEVS  0       /* Some device status has changed */
     122             : #define MD_CHANGE_CLEAN 1       /* transition to or from 'clean' */
     123             : #define MD_CHANGE_PENDING 2     /* superblock update in progress */
     124             : 
     125             :         int                             suspended;
     126             :         atomic_t                        active_io;
     127             :         int                             ro;
     128             : 
     129             :         struct gendisk                  *gendisk;
     130             : 
     131             :         struct kobject                  kobj;
     132             :         int                             hold_active;
     133             : #define UNTIL_IOCTL     1
     134             : #define UNTIL_STOP      2
     135             : 
     136             :         /* Superblock information */
     137             :         int                             major_version,
     138             :                                         minor_version,
     139             :                                         patch_version;
     140             :         int                             persistent;
     141             :         int                             external;       /* metadata is
     142             :                                                          * managed externally */
     143             :         char                            metadata_type[17]; /* externally set*/
     144             :         int                             chunk_sectors;
     145             :         time_t                          ctime, utime;
     146             :         int                             level, layout;
     147             :         char                            clevel[16];
     148             :         int                             raid_disks;
     149             :         int                             max_disks;
     150             :         sector_t                        dev_sectors;    /* used size of
     151             :                                                          * component devices */
     152             :         sector_t                        array_sectors; /* exported array size */
     153             :         int                             external_size; /* size managed
     154             :                                                         * externally */
     155             :         __u64                           events;
     156             : 
     157             :         char                            uuid[16];
     158             : 
     159             :         /* If the array is being reshaped, we need to record the
     160             :          * new shape and an indication of where we are up to.
     161             :          * This is written to the superblock.
     162             :          * If reshape_position is MaxSector, then no reshape is happening (yet).
     163             :          */
     164             :         sector_t                        reshape_position;
     165             :         int                             delta_disks, new_level, new_layout;
     166             :         int                             new_chunk_sectors;
     167             : 
     168             :         struct mdk_thread_s             *thread;        /* management thread */
     169             :         struct mdk_thread_s             *sync_thread;   /* doing resync or reconstruct */
     170             :         sector_t                        curr_resync;    /* last block scheduled */
     171             :         /* As resync requests can complete out of order, we cannot easily track
     172             :          * how much resync has been completed.  So we occasionally pause until
     173             :          * everything completes, then set curr_resync_completed to curr_resync.
     174             :          * As such it may be well behind the real resync mark, but it is a value
     175             :          * we are certain of.
     176             :          */
     177             :         sector_t                        curr_resync_completed;
     178             :         unsigned long                   resync_mark;    /* a recent timestamp */
     179             :         sector_t                        resync_mark_cnt;/* blocks written at resync_mark */
     180             :         sector_t                        curr_mark_cnt; /* blocks scheduled now */
     181             : 
     182             :         sector_t                        resync_max_sectors; /* may be set by personality */
     183             : 
     184             :         sector_t                        resync_mismatches; /* count of sectors where
     185             :                                                             * parity/replica mismatch found
     186             :                                                             */
     187             : 
     188             :         /* allow user-space to request suspension of IO to regions of the array */
     189             :         sector_t                        suspend_lo;
     190             :         sector_t                        suspend_hi;
     191             :         /* if zero, use the system-wide default */
     192             :         int                             sync_speed_min;
     193             :         int                             sync_speed_max;
     194             : 
     195             :         /* resync even though the same disks are shared among md-devices */
     196             :         int                             parallel_resync;
     197             : 
     198             :         int                             ok_start_degraded;
     199             :         /* recovery/resync flags 
     200             :          * NEEDED:   we might need to start a resync/recover
     201             :          * RUNNING:  a thread is running, or about to be started
     202             :          * SYNC:     actually doing a resync, not a recovery
     203             :          * RECOVER:  doing recovery, or need to try it.
     204             :          * INTR:     resync needs to be aborted for some reason
     205             :          * DONE:     thread is done and is waiting to be reaped
     206             :          * REQUEST:  user-space has requested a sync (used with SYNC)
     207             :          * CHECK:    user-space request for check-only, no repair
     208             :          * RESHAPE:  A reshape is happening
     209             :          *
     210             :          * If neither SYNC or RESHAPE are set, then it is a recovery.
     211             :          */
     212             : #define MD_RECOVERY_RUNNING     0
     213             : #define MD_RECOVERY_SYNC        1
     214             : #define MD_RECOVERY_RECOVER     2
     215             : #define MD_RECOVERY_INTR        3
     216             : #define MD_RECOVERY_DONE        4
     217             : #define MD_RECOVERY_NEEDED      5
     218             : #define MD_RECOVERY_REQUESTED   6
     219             : #define MD_RECOVERY_CHECK       7
     220             : #define MD_RECOVERY_RESHAPE     8
     221             : #define MD_RECOVERY_FROZEN      9
     222             : 
     223             :         unsigned long                   recovery;
     224             :         int                             recovery_disabled; /* if we detect that recovery
     225             :                                                             * will always fail, set this
     226             :                                                             * so we don't loop trying */
     227             : 
     228             :         int                             in_sync;        /* know to not need resync */
     229             :         /* 'open_mutex' avoids races between 'md_open' and 'do_md_stop', so
     230             :          * that we are never stopping an array while it is open.
     231             :          * 'reconfig_mutex' protects all other reconfiguration.
     232             :          * These locks are separate due to conflicting interactions
     233             :          * with bdev->bd_mutex.
     234             :          * Lock ordering is:
     235             :          *  reconfig_mutex -> bd_mutex : e.g. do_md_run -> revalidate_disk
     236             :          *  bd_mutex -> open_mutex:  e.g. __blkdev_get -> md_open
     237             :          */
     238             :         struct mutex                    open_mutex;
     239             :         struct mutex                    reconfig_mutex;
     240             :         atomic_t                        active;         /* general refcount */
     241             :         atomic_t                        openers;        /* number of active opens */
     242             : 
     243             :         int                             changed;        /* true if we might need to reread partition info */
     244             :         int                             degraded;       /* whether md should consider
     245             :                                                          * adding a spare
     246             :                                                          */
     247             :         int                             barriers_work;  /* initialised to true, cleared as soon
     248             :                                                          * as a barrier request to slave
     249             :                                                          * fails.  Only supported
     250             :                                                          */
     251             :         struct bio                      *biolist;       /* bios that need to be retried
     252             :                                                          * because BIO_RW_BARRIER is not supported
     253             :                                                          */
     254             : 
     255             :         atomic_t                        recovery_active; /* blocks scheduled, but not written */
     256             :         wait_queue_head_t               recovery_wait;
     257             :         sector_t                        recovery_cp;
     258             :         sector_t                        resync_min;     /* user requested sync
     259             :                                                          * starts here */
     260             :         sector_t                        resync_max;     /* resync should pause
     261             :                                                          * when it gets here */
     262             : 
     263             :         struct sysfs_dirent             *sysfs_state;   /* handle for 'array_state'
     264             :                                                          * file in sysfs.
     265             :                                                          */
     266             :         struct sysfs_dirent             *sysfs_action;  /* handle for 'sync_action' */
     267             : 
     268             :         struct work_struct del_work;    /* used for delayed sysfs removal */
     269             : 
     270             :         spinlock_t                      write_lock;
     271             :         wait_queue_head_t               sb_wait;        /* for waiting on superblock updates */
     272             :         atomic_t                        pending_writes; /* number of active superblock writes */
     273             : 
     274             :         unsigned int                    safemode;       /* if set, update "clean" superblock
     275             :                                                          * when no writes pending.
     276             :                                                          */ 
     277             :         unsigned int                    safemode_delay;
     278             :         struct timer_list               safemode_timer;
     279             :         atomic_t                        writes_pending; 
     280             :         struct request_queue            *queue; /* for plugging ... */
     281             : 
     282             :         atomic_t                        write_behind; /* outstanding async IO */
     283             :         unsigned int                    max_write_behind; /* 0 = sync */
     284             : 
     285             :         struct bitmap                   *bitmap; /* the bitmap for the device */
     286             :         struct {
     287             :                 struct file             *file; /* the bitmap file */
     288             :                 loff_t                  offset; /* offset from superblock of
     289             :                                                  * start of bitmap. May be
     290             :                                                  * negative, but not '0'
     291             :                                                  * For external metadata, offset
     292             :                                                  * from start of device. 
     293             :                                                  */
     294             :                 loff_t                  default_offset; /* this is the offset to use when
     295             :                                                          * hot-adding a bitmap.  It should
     296             :                                                          * eventually be settable by sysfs.
     297             :                                                          */
     298             :                 struct mutex            mutex;
     299             :                 unsigned long           chunksize;
     300             :                 unsigned long           daemon_sleep; /* how many seconds between updates? */
     301             :                 unsigned long           max_write_behind; /* write-behind mode */
     302             :                 int                     external;
     303             :         } bitmap_info;
     304             : 
     305             :         atomic_t                        max_corr_read_errors; /* max read retries */
     306             :         struct list_head                all_mddevs;
     307             : 
     308             :         struct attribute_group          *to_remove;
     309             :         /* Generic barrier handling.
     310             :          * If there is a pending barrier request, all other
     311             :          * writes are blocked while the devices are flushed.
     312             :          * The last to finish a flush schedules a worker to
     313             :          * submit the barrier request (without the barrier flag),
     314             :          * then submit more flush requests.
     315             :          */
     316             :         struct bio *barrier;
     317             :         atomic_t flush_pending;
     318             :         struct work_struct barrier_work;
     319             : };
     320             : 
     321             : 
     322             : static inline void rdev_dec_pending(mdk_rdev_t *rdev, mddev_t *mddev)
     323             : {
     324         100 :         int faulty = test_bit(Faulty, &rdev->flags);
     325         175 :         if (atomic_dec_and_test(&rdev->nr_pending) && faulty)
     326          75 :                 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
     327          50 : }
     328             : 
     329             : static inline void md_sync_acct(struct block_device *bdev, unsigned long nr_sectors)
     330             : {
     331           2 :         atomic_add(nr_sectors, &bdev->bd_contains->bd_disk->sync_io);
     332           1 : }
     333           1 : 
     334             : struct mdk_personality
     335             : {
     336             :         char *name;
     337             :         int level;
     338             :         struct list_head list;
     339             :         struct module *owner;
     340             :         int (*make_request)(struct request_queue *q, struct bio *bio);
     341             :         int (*run)(mddev_t *mddev);
     342             :         int (*stop)(mddev_t *mddev);
     343             :         void (*status)(struct seq_file *seq, mddev_t *mddev);
     344             :         /* error_handler must set ->faulty and clear ->in_sync
     345             :          * if appropriate, and should abort recovery if needed 
     346             :          */
     347             :         void (*error_handler)(mddev_t *mddev, mdk_rdev_t *rdev);
     348             :         int (*hot_add_disk) (mddev_t *mddev, mdk_rdev_t *rdev);
     349             :         int (*hot_remove_disk) (mddev_t *mddev, int number);
     350             :         int (*spare_active) (mddev_t *mddev);
     351             :         sector_t (*sync_request)(mddev_t *mddev, sector_t sector_nr, int *skipped, int go_faster);
     352             :         int (*resize) (mddev_t *mddev, sector_t sectors);
     353             :         sector_t (*size) (mddev_t *mddev, sector_t sectors, int raid_disks);
     354             :         int (*check_reshape) (mddev_t *mddev);
     355             :         int (*start_reshape) (mddev_t *mddev);
     356             :         void (*finish_reshape) (mddev_t *mddev);
     357             :         /* quiesce moves between quiescence states
     358             :          * 0 - fully active
     359             :          * 1 - no new requests allowed
     360             :          * others - reserved
     361             :          */
     362             :         void (*quiesce) (mddev_t *mddev, int state);
     363             :         /* takeover is used to transition an array from one
     364             :          * personality to another.  The new personality must be able
     365             :          * to handle the data in the current layout.
     366             :          * e.g. 2drive raid1 -> 2drive raid5
     367             :          *      ndrive raid5 -> degraded n+1drive raid6 with special layout
     368             :          * If the takeover succeeds, a new 'private' structure is returned.
     369             :          * This needs to be installed and then ->run used to activate the
     370             :          * array.
     371             :          */
     372             :         void *(*takeover) (mddev_t *mddev);
     373             : };
     374             : 
     375             : 
     376             : struct md_sysfs_entry {
     377             :         struct attribute attr;
     378             :         ssize_t (*show)(mddev_t *, char *);
     379             :         ssize_t (*store)(mddev_t *, const char *, size_t);
     380             : };
     381             : extern struct attribute_group md_bitmap_group;
     382             : 
     383             : static inline char * mdname (mddev_t * mddev)
     384             : {
     385          77 :         return mddev->gendisk ? mddev->gendisk->disk_name : "mdX";
     386             : }
     387           1 : 
     388             : /*
     389             :  * iterates through some rdev ringlist. It's safe to remove the
     390             :  * current 'rdev'. Dont touch 'tmp' though.
     391             :  */
     392             : #define rdev_for_each_list(rdev, tmp, head)                             \
     393             :         list_for_each_entry_safe(rdev, tmp, head, same_set)
     394             : 
     395             : /*
     396             :  * iterates through the 'same array disks' ringlist
     397             :  */
     398             : #define rdev_for_each(rdev, tmp, mddev)                         \
     399             :         list_for_each_entry_safe(rdev, tmp, &((mddev)->disks), same_set)
     400             : 
     401             : #define rdev_for_each_rcu(rdev, mddev)                          \
     402             :         list_for_each_entry_rcu(rdev, &((mddev)->disks), same_set)
     403             : 
     404             : typedef struct mdk_thread_s {
     405             :         void                    (*run) (mddev_t *mddev);
     406             :         mddev_t                 *mddev;
     407             :         wait_queue_head_t       wqueue;
     408             :         unsigned long           flags;
     409             :         struct task_struct      *tsk;
     410             :         unsigned long           timeout;
     411           1 : } mdk_thread_t;
     412             : 
     413             : #define THREAD_WAKEUP  0
     414             : 
     415             : #define __wait_event_lock_irq(wq, condition, lock, cmd)                 \
     416             : do {                                                                    \
     417             :         wait_queue_t __wait;                                            \
     418             :         init_waitqueue_entry(&__wait, current);                             \
     419             :                                                                         \
     420             :         add_wait_queue(&wq, &__wait);                                   \
     421             :         for (;;) {                                                      \
     422             :                 set_current_state(TASK_UNINTERRUPTIBLE);                \
     423             :                 if (condition)                                          \
     424             :                         break;                                          \
     425             :                 spin_unlock_irq(&lock);                                     \
     426             :                 cmd;                                                    \
     427             :                 schedule();                                             \
     428             :                 spin_lock_irq(&lock);                                       \
     429             :         }                                                               \
     430             :         current->state = TASK_RUNNING;                                       \
     431             :         remove_wait_queue(&wq, &__wait);                                \
     432             : } while (0)
     433             : 
     434             : #define wait_event_lock_irq(wq, condition, lock, cmd)                   \
     435             : do {                                                                    \
     436             :         if (condition)                                                  \
     437             :                 break;                                                  \
     438             :         __wait_event_lock_irq(wq, condition, lock, cmd);                \
     439             : } while (0)
     440             : 
     441             : static inline void safe_put_page(struct page *p)
     442             : {
     443          36 :         if (p) put_page(p);
     444          12 : }
     445             : 
     446             : extern int register_md_personality(struct mdk_personality *p);
     447             : extern int unregister_md_personality(struct mdk_personality *p);
     448             : extern mdk_thread_t * md_register_thread(void (*run) (mddev_t *mddev),
     449             :                                 mddev_t *mddev, const char *name);
     450             : extern void md_unregister_thread(mdk_thread_t *thread);
     451             : extern void md_wakeup_thread(mdk_thread_t *thread);
     452             : extern void md_check_recovery(mddev_t *mddev);
     453             : extern void md_write_start(mddev_t *mddev, struct bio *bi);
     454             : extern void md_write_end(mddev_t *mddev);
     455             : extern void md_done_sync(mddev_t *mddev, int blocks, int ok);
     456             : extern void md_error(mddev_t *mddev, mdk_rdev_t *rdev);
     457             : 
     458             : extern int mddev_congested(mddev_t *mddev, int bits);
     459             : extern void md_barrier_request(mddev_t *mddev, struct bio *bio);
     460             : extern void md_super_write(mddev_t *mddev, mdk_rdev_t *rdev,
     461             :                            sector_t sector, int size, struct page *page);
     462             : extern void md_super_wait(mddev_t *mddev);
     463             : extern int sync_page_io(struct block_device *bdev, sector_t sector, int size,
     464             :                         struct page *page, int rw);
     465             : extern void md_do_sync(mddev_t *mddev);
     466             : extern void md_new_event(mddev_t *mddev);
     467             : extern int md_allow_write(mddev_t *mddev);
     468             : extern void md_wait_for_blocked_rdev(mdk_rdev_t *rdev, mddev_t *mddev);
     469             : extern void md_set_array_sectors(mddev_t *mddev, sector_t array_sectors);
     470             : extern int md_check_no_bitmap(mddev_t *mddev);
     471             : extern int md_integrity_register(mddev_t *mddev);
     472             : extern void md_integrity_add_rdev(mdk_rdev_t *rdev, mddev_t *mddev);
     473             : extern int strict_strtoul_scaled(const char *cp, unsigned long *res, int scale);
     474             : extern void restore_bitmap_write_access(struct file *file);
     475             : 
     476             : #endif /* _MD_MD_H */

Generated by: LCOV version 1.10