LCOV - code coverage report
Current view: top level - block - cfq-iosched.c (source / functions) Hit Total Coverage
Test: coverage.info Lines: 118 1720 6.9 %
Date: 2017-01-25 Functions: 32 196 16.3 %

          Line data    Source code
       1             : /*
       2             :  *  CFQ, or complete fairness queueing, disk scheduler.
       3             :  *
       4             :  *  Based on ideas from a previously unfinished io
       5             :  *  scheduler (round robin per-process disk scheduling) and Andrea Arcangeli.
       6             :  *
       7             :  *  Copyright (C) 2003 Jens Axboe <axboe@kernel.dk>
       8             :  */
       9             : #include <linux/module.h>
      10             : #include <linux/blkdev.h>
      11             : #include <linux/elevator.h>
      12             : #include <linux/jiffies.h>
      13             : #include <linux/rbtree.h>
      14             : #include <linux/ioprio.h>
      15             : #include <linux/blktrace_api.h>
      16             : #include "blk-cgroup.h"
      17             : 
      18             : /*
      19             :  * tunables
      20             :  */
      21             : /* max queue in one round of service */
      22           1 : static const int cfq_quantum = 4;
      23           1 : static const int cfq_fifo_expire[2] = { HZ / 4, HZ / 8 };
      24             : /* maximum backwards seek, in KiB */
      25           1 : static const int cfq_back_max = 16 * 1024;
      26             : /* penalty of a backwards seek */
      27           1 : static const int cfq_back_penalty = 2;
      28           1 : static const int cfq_slice_sync = HZ / 10;
      29           1 : static int cfq_slice_async = HZ / 25;
      30           1 : static const int cfq_slice_async_rq = 2;
      31           1 : static int cfq_slice_idle = HZ / 125;
      32             : static const int cfq_target_latency = HZ * 3/10; /* 300 ms */
      33             : static const int cfq_hist_divisor = 4;
      34             : 
      35             : /*
      36             :  * offset from end of service tree
      37             :  */
      38             : #define CFQ_IDLE_DELAY          (HZ / 5)
      39             : 
      40             : /*
      41             :  * below this threshold, we consider thinktime immediate
      42             :  */
      43             : #define CFQ_MIN_TT              (2)
      44             : 
      45             : #define CFQ_SLICE_SCALE         (5)
      46             : #define CFQ_HW_QUEUE_MIN        (5)
      47             : #define CFQ_SERVICE_SHIFT       12
      48             : 
      49             : #define CFQQ_SEEK_THR           8 * 1024
      50             : #define CFQQ_SEEKY(cfqq)        ((cfqq)->seek_mean > CFQQ_SEEK_THR)
      51             : 
      52             : #define RQ_CIC(rq)              \
      53             :         ((struct cfq_io_context *) (rq)->elevator_private)
      54             : #define RQ_CFQQ(rq)             (struct cfq_queue *) ((rq)->elevator_private2)
      55             : 
      56           1 : static struct kmem_cache *cfq_pool;
      57           1 : static struct kmem_cache *cfq_ioc_pool;
      58             : 
      59           1 : static DEFINE_PER_CPU(unsigned long, cfq_ioc_count);
      60           1 : static struct completion *ioc_gone;
      61           1 : static DEFINE_SPINLOCK(ioc_gone_lock);
      62           1 : 
      63             : #define CFQ_PRIO_LISTS          IOPRIO_BE_NR
      64             : #define cfq_class_idle(cfqq)    ((cfqq)->ioprio_class == IOPRIO_CLASS_IDLE)
      65             : #define cfq_class_rt(cfqq)      ((cfqq)->ioprio_class == IOPRIO_CLASS_RT)
      66             : 
      67             : #define sample_valid(samples)   ((samples) > 80)
      68             : #define rb_entry_cfqg(node)     rb_entry((node), struct cfq_group, rb_node)
      69             : 
      70             : /*
      71             :  * Most of our rbtree usage is for sorting with min extraction, so
      72             :  * if we cache the leftmost node we don't have to walk down the tree
      73             :  * to find it. Idea borrowed from Ingo Molnars CFS scheduler. We should
      74             :  * move this into the elevator for the rq sorting as well.
      75             :  */
      76             : struct cfq_rb_root {
      77             :         struct rb_root rb;
      78             :         struct rb_node *left;
      79             :         unsigned count;
      80             :         u64 min_vdisktime;
      81             :         struct rb_node *active;
      82             :         unsigned total_weight;
      83             : };
      84           3 : #define CFQ_RB_ROOT     (struct cfq_rb_root) { RB_ROOT, NULL, 0, 0, }
      85             : 
      86             : /*
      87             :  * Per process-grouping structure
      88             :  */
      89             : struct cfq_queue {
      90             :         /* reference count */
      91             :         atomic_t ref;
      92             :         /* various state flags, see below */
      93             :         unsigned int flags;
      94             :         /* parent cfq_data */
      95             :         struct cfq_data *cfqd;
      96             :         /* service_tree member */
      97             :         struct rb_node rb_node;
      98             :         /* service_tree key */
      99             :         unsigned long rb_key;
     100             :         /* prio tree member */
     101             :         struct rb_node p_node;
     102             :         /* prio tree root we belong to, if any */
     103             :         struct rb_root *p_root;
     104             :         /* sorted list of pending requests */
     105             :         struct rb_root sort_list;
     106             :         /* if fifo isn't expired, next request to serve */
     107             :         struct request *next_rq;
     108             :         /* requests queued in sort_list */
     109             :         int queued[2];
     110             :         /* currently allocated requests */
     111             :         int allocated[2];
     112             :         /* fifo list of requests in sort_list */
     113             :         struct list_head fifo;
     114             : 
     115             :         /* time when queue got scheduled in to dispatch first request. */
     116             :         unsigned long dispatch_start;
     117             :         unsigned int allocated_slice;
     118             :         /* time when first request from queue completed and slice started. */
     119             :         unsigned long slice_start;
     120             :         unsigned long slice_end;
     121             :         long slice_resid;
     122             :         unsigned int slice_dispatch;
     123             : 
     124             :         /* pending metadata requests */
     125             :         int meta_pending;
     126             :         /* number of requests that are on the dispatch list or inside driver */
     127             :         int dispatched;
     128             : 
     129             :         /* io prio of this group */
     130             :         unsigned short ioprio, org_ioprio;
     131             :         unsigned short ioprio_class, org_ioprio_class;
     132             : 
     133             :         unsigned int seek_samples;
     134             :         u64 seek_total;
     135             :         sector_t seek_mean;
     136             :         sector_t last_request_pos;
     137             : 
     138             :         pid_t pid;
     139             : 
     140             :         struct cfq_rb_root *service_tree;
     141             :         struct cfq_queue *new_cfqq;
     142             :         struct cfq_group *cfqg;
     143             :         struct cfq_group *orig_cfqg;
     144             :         /* Sectors dispatched in current dispatch round */
     145             :         unsigned long nr_sectors;
     146             : };
     147           1 : 
     148             : /*
     149             :  * First index in the service_trees.
     150             :  * IDLE is handled separately, so it has negative index
     151             :  */
     152             : enum wl_prio_t {
     153           1 :         BE_WORKLOAD = 0,
     154             :         RT_WORKLOAD = 1,
     155             :         IDLE_WORKLOAD = 2,
     156             : };
     157             : 
     158             : /*
     159           1 :  * Second index in the service_trees.
     160             :  */
     161             : enum wl_type_t {
     162             :         ASYNC_WORKLOAD = 0,
     163             :         SYNC_NOIDLE_WORKLOAD = 1,
     164             :         SYNC_WORKLOAD = 2
     165             : };
     166             : 
     167             : /* This is per cgroup per device grouping structure */
     168             : struct cfq_group {
     169             :         /* group service_tree member */
     170             :         struct rb_node rb_node;
     171             : 
     172             :         /* group service_tree key */
     173             :         u64 vdisktime;
     174             :         unsigned int weight;
     175             :         bool on_st;
     176             : 
     177             :         /* number of cfqq currently on this group */
     178             :         int nr_cfqq;
     179             : 
     180             :         /* Per group busy queus average. Useful for workload slice calc. */
     181             :         unsigned int busy_queues_avg[2];
     182             :         /*
     183             :          * rr lists of queues with requests, onle rr for each priority class.
     184             :          * Counts are embedded in the cfq_rb_root
     185             :          */
     186             :         struct cfq_rb_root service_trees[2][3];
     187             :         struct cfq_rb_root service_tree_idle;
     188             : 
     189             :         unsigned long saved_workload_slice;
     190             :         enum wl_type_t saved_workload;
     191             :         enum wl_prio_t saved_serving_prio;
     192             :         struct blkio_group blkg;
     193             : #ifdef CONFIG_CFQ_GROUP_IOSCHED
     194           1 :         struct hlist_node cfqd_node;
     195             :         atomic_t ref;
     196             : #endif
     197             : };
     198             : 
     199             : /*
     200             :  * Per block device queue structure
     201             :  */
     202             : struct cfq_data {
     203             :         struct request_queue *queue;
     204             :         /* Root service tree for cfq_groups */
     205             :         struct cfq_rb_root grp_service_tree;
     206             :         struct cfq_group root_group;
     207             : 
     208             :         /*
     209             :          * The priority currently being served
     210             :          */
     211             :         enum wl_prio_t serving_prio;
     212             :         enum wl_type_t serving_type;
     213             :         unsigned long workload_expires;
     214             :         struct cfq_group *serving_group;
     215             :         bool noidle_tree_requires_idle;
     216             : 
     217             :         /*
     218             :          * Each priority tree is sorted by next_request position.  These
     219             :          * trees are used when determining if two or more queues are
     220             :          * interleaving requests (see cfq_close_cooperator).
     221             :          */
     222             :         struct rb_root prio_trees[CFQ_PRIO_LISTS];
     223             : 
     224             :         unsigned int busy_queues;
     225             : 
     226             :         int rq_in_driver[2];
     227             :         int sync_flight;
     228             : 
     229             :         /*
     230             :          * queue-depth detection
     231             :          */
     232             :         int rq_queued;
     233             :         int hw_tag;
     234             :         /*
     235             :          * hw_tag can be
     236             :          * -1 => indeterminate, (cfq will behave as if NCQ is present, to allow better detection)
     237             :          *  1 => NCQ is present (hw_tag_est_depth is the estimated max depth)
     238             :          *  0 => no NCQ
     239             :          */
     240             :         int hw_tag_est_depth;
     241             :         unsigned int hw_tag_samples;
     242             : 
     243             :         /*
     244             :          * idle window management
     245             :          */
     246             :         struct timer_list idle_slice_timer;
     247             :         struct work_struct unplug_work;
     248             : 
     249             :         struct cfq_queue *active_queue;
     250             :         struct cfq_io_context *active_cic;
     251             : 
     252             :         /*
     253             :          * async queue for each priority case
     254             :          */
     255             :         struct cfq_queue *async_cfqq[2][IOPRIO_BE_NR];
     256             :         struct cfq_queue *async_idle_cfqq;
     257             : 
     258             :         sector_t last_position;
     259             : 
     260             :         /*
     261             :          * tunables, see top of file
     262             :          */
     263             :         unsigned int cfq_quantum;
     264             :         unsigned int cfq_fifo_expire[2];
     265             :         unsigned int cfq_back_penalty;
     266             :         unsigned int cfq_back_max;
     267             :         unsigned int cfq_slice[2];
     268             :         unsigned int cfq_slice_async_rq;
     269             :         unsigned int cfq_slice_idle;
     270             :         unsigned int cfq_latency;
     271             :         unsigned int cfq_group_isolation;
     272             : 
     273             :         struct list_head cic_list;
     274             : 
     275             :         /*
     276             :          * Fallback dummy cfqq for extreme OOM conditions
     277             :          */
     278             :         struct cfq_queue oom_cfqq;
     279             : 
     280             :         unsigned long last_delayed_sync;
     281             : 
     282             :         /* List of cfq groups being managed on this device*/
     283             :         struct hlist_head cfqg_list;
     284             :         struct rcu_head rcu;
     285             : };
     286             : 
     287             : static struct cfq_group *cfq_get_next_cfqg(struct cfq_data *cfqd);
     288             : 
     289             : static struct cfq_rb_root *service_tree_for(struct cfq_group *cfqg,
     290             :                                             enum wl_prio_t prio,
     291             :                                             enum wl_type_t type)
     292             : {
     293           0 :         if (!cfqg)
     294           0 :                 return NULL;
     295             : 
     296           0 :         if (prio == IDLE_WORKLOAD)
     297           0 :                 return &cfqg->service_tree_idle;
     298             : 
     299           0 :         return &cfqg->service_trees[prio][type];
     300             : }
     301             : 
     302             : enum cfqq_state_flags {
     303             :         CFQ_CFQQ_FLAG_on_rr = 0,        /* on round-robin busy list */
     304             :         CFQ_CFQQ_FLAG_wait_request,     /* waiting for a request */
     305             :         CFQ_CFQQ_FLAG_must_dispatch,    /* must be allowed a dispatch */
     306             :         CFQ_CFQQ_FLAG_must_alloc_slice, /* per-slice must_alloc flag */
     307             :         CFQ_CFQQ_FLAG_fifo_expire,      /* FIFO checked in this slice */
     308             :         CFQ_CFQQ_FLAG_idle_window,      /* slice idling enabled */
     309             :         CFQ_CFQQ_FLAG_prio_changed,     /* task priority has changed */
     310             :         CFQ_CFQQ_FLAG_slice_new,        /* no requests dispatched in slice */
     311             :         CFQ_CFQQ_FLAG_sync,             /* synchronous queue */
     312             :         CFQ_CFQQ_FLAG_coop,             /* cfqq is shared */
     313             :         CFQ_CFQQ_FLAG_split_coop,       /* shared cfqq will be splitted */
     314             :         CFQ_CFQQ_FLAG_deep,             /* sync cfqq experienced large depth */
     315             :         CFQ_CFQQ_FLAG_wait_busy,        /* Waiting for next request */
     316             : };
     317             : 
     318             : #define CFQ_CFQQ_FNS(name)                                              \
     319             : static inline void cfq_mark_cfqq_##name(struct cfq_queue *cfqq)         \
     320             : {                                                                       \
     321             :         (cfqq)->flags |= (1 << CFQ_CFQQ_FLAG_##name);                  \
     322             : }                                                                       \
     323             : static inline void cfq_clear_cfqq_##name(struct cfq_queue *cfqq)        \
     324             : {                                                                       \
     325             :         (cfqq)->flags &= ~(1 << CFQ_CFQQ_FLAG_##name);                     \
     326             : }                                                                       \
     327             : static inline int cfq_cfqq_##name(const struct cfq_queue *cfqq)         \
     328             : {                                                                       \
     329             :         return ((cfqq)->flags & (1 << CFQ_CFQQ_FLAG_##name)) != 0; \
     330             : }
     331             : 
     332           0 : CFQ_CFQQ_FNS(on_rr);
     333           0 : CFQ_CFQQ_FNS(wait_request);
     334           0 : CFQ_CFQQ_FNS(must_dispatch);
     335           0 : CFQ_CFQQ_FNS(must_alloc_slice);
     336           0 : CFQ_CFQQ_FNS(fifo_expire);
     337           0 : CFQ_CFQQ_FNS(idle_window);
     338           0 : CFQ_CFQQ_FNS(prio_changed);
     339           0 : CFQ_CFQQ_FNS(slice_new);
     340           0 : CFQ_CFQQ_FNS(sync);
     341           0 : CFQ_CFQQ_FNS(coop);
     342           0 : CFQ_CFQQ_FNS(split_coop);
     343           0 : CFQ_CFQQ_FNS(deep);
     344           0 : CFQ_CFQQ_FNS(wait_busy);
     345           0 : #undef CFQ_CFQQ_FNS
     346             : 
     347             : #ifdef CONFIG_DEBUG_CFQ_IOSCHED
     348             : #define cfq_log_cfqq(cfqd, cfqq, fmt, args...)  \
     349             :         blk_add_trace_msg((cfqd)->queue, "cfq%d%c %s " fmt, (cfqq)->pid, \
     350             :                         cfq_cfqq_sync((cfqq)) ? 'S' : 'A', \
     351             :                         blkg_path(&(cfqq)->cfqg->blkg), ##args);
     352             : 
     353             : #define cfq_log_cfqg(cfqd, cfqg, fmt, args...)                          \
     354             :         blk_add_trace_msg((cfqd)->queue, "%s " fmt,                        \
     355             :                                 blkg_path(&(cfqg)->blkg), ##args);      \
     356             : 
     357             : #else
     358             : #define cfq_log_cfqq(cfqd, cfqq, fmt, args...)  \
     359             :         blk_add_trace_msg((cfqd)->queue, "cfq%d " fmt, (cfqq)->pid, ##args)
     360             : #define cfq_log_cfqg(cfqd, cfqg, fmt, args...)          do {} while (0);
     361             : #endif
     362             : #define cfq_log(cfqd, fmt, args...)     \
     363             :         blk_add_trace_msg((cfqd)->queue, "cfq " fmt, ##args)
     364             : 
     365             : /* Traverses through cfq group service trees */
     366             : #define for_each_cfqg_st(cfqg, i, j, st) \
     367             :         for (i = 0; i <= IDLE_WORKLOAD; i++) \
     368             :                 for (j = 0, st = i < IDLE_WORKLOAD ? &cfqg->service_trees[i][j]\
     369             :                         : &cfqg->service_tree_idle; \
     370             :                         (i < IDLE_WORKLOAD && j <= SYNC_WORKLOAD) || \
     371             :                         (i == IDLE_WORKLOAD && j == 0); \
     372             :                         j++, st = i < IDLE_WORKLOAD ? \
     373             :                         &cfqg->service_trees[i][j]: NULL) \
     374             : 
     375             : 
     376             : static inline enum wl_prio_t cfqq_prio(struct cfq_queue *cfqq)
     377             : {
     378           0 :         if (cfq_class_idle(cfqq))
     379           0 :                 return IDLE_WORKLOAD;
     380           0 :         if (cfq_class_rt(cfqq))
     381           0 :                 return RT_WORKLOAD;
     382           0 :         return BE_WORKLOAD;
     383             : }
     384             : 
     385             : 
     386             : static enum wl_type_t cfqq_type(struct cfq_queue *cfqq)
     387             : {
     388           0 :         if (!cfq_cfqq_sync(cfqq))
     389           0 :                 return ASYNC_WORKLOAD;
     390           0 :         if (!cfq_cfqq_idle_window(cfqq))
     391           0 :                 return SYNC_NOIDLE_WORKLOAD;
     392           0 :         return SYNC_WORKLOAD;
     393             : }
     394             : 
     395             : static inline int cfq_group_busy_queues_wl(enum wl_prio_t wl,
     396             :                                         struct cfq_data *cfqd,
     397             :                                         struct cfq_group *cfqg)
     398             : {
     399           0 :         if (wl == IDLE_WORKLOAD)
     400           0 :                 return cfqg->service_tree_idle.count;
     401             : 
     402           0 :         return cfqg->service_trees[wl][ASYNC_WORKLOAD].count
     403             :                 + cfqg->service_trees[wl][SYNC_NOIDLE_WORKLOAD].count
     404             :                 + cfqg->service_trees[wl][SYNC_WORKLOAD].count;
     405             : }
     406             : 
     407             : static inline int cfqg_busy_async_queues(struct cfq_data *cfqd,
     408             :                                         struct cfq_group *cfqg)
     409             : {
     410           0 :         return cfqg->service_trees[RT_WORKLOAD][ASYNC_WORKLOAD].count
     411             :                 + cfqg->service_trees[BE_WORKLOAD][ASYNC_WORKLOAD].count;
     412             : }
     413             : 
     414             : static void cfq_dispatch_insert(struct request_queue *, struct request *);
     415             : static struct cfq_queue *cfq_get_queue(struct cfq_data *, bool,
     416             :                                        struct io_context *, gfp_t);
     417             : static struct cfq_io_context *cfq_cic_lookup(struct cfq_data *,
     418             :                                                 struct io_context *);
     419             : 
     420             : static inline int rq_in_driver(struct cfq_data *cfqd)
     421             : {
     422           0 :         return cfqd->rq_in_driver[0] + cfqd->rq_in_driver[1];
     423             : }
     424             : 
     425             : static inline struct cfq_queue *cic_to_cfqq(struct cfq_io_context *cic,
     426             :                                             bool is_sync)
     427             : {
     428           0 :         return cic->cfqq[is_sync];
     429             : }
     430             : 
     431             : static inline void cic_set_cfqq(struct cfq_io_context *cic,
     432             :                                 struct cfq_queue *cfqq, bool is_sync)
     433             : {
     434           0 :         cic->cfqq[is_sync] = cfqq;
     435           0 : }
     436             : 
     437             : /*
     438             :  * We regard a request as SYNC, if it's either a read or has the SYNC bit
     439             :  * set (in which case it could also be direct WRITE).
     440             :  */
     441             : static inline bool cfq_bio_sync(struct bio *bio)
     442             : {
     443           0 :         return bio_data_dir(bio) == READ || bio_rw_flagged(bio, BIO_RW_SYNCIO);
     444           0 : }
     445             : 
     446             : /*
     447             :  * scheduler run of queue, if there are requests pending and no one in the
     448             :  * driver that will restart queueing
     449             :  */
     450             : static inline void cfq_schedule_dispatch(struct cfq_data *cfqd)
     451             : {
     452           0 :         if (cfqd->busy_queues) {
     453             :                 cfq_log(cfqd, "schedule dispatch");
     454           0 :                 kblockd_schedule_work(cfqd->queue, &cfqd->unplug_work);
     455           0 :         }
     456             : }
     457             : 
     458             : static int cfq_queue_empty(struct request_queue *q)
     459             : {
     460           0 :         struct cfq_data *cfqd = q->elevator->elevator_data;
     461             : 
     462           0 :         return !cfqd->rq_queued;
     463             : }
     464             : 
     465             : /*
     466             :  * Scale schedule slice based on io priority. Use the sync time slice only
     467             :  * if a queue is marked sync and has sync io queued. A sync queue with async
     468             :  * io only, should not get full sync slice length.
     469             :  */
     470             : static inline int cfq_prio_slice(struct cfq_data *cfqd, bool sync,
     471             :                                  unsigned short prio)
     472           0 : {
     473           0 :         const int base_slice = cfqd->cfq_slice[sync];
     474           0 : 
     475           0 :         WARN_ON(prio >= IOPRIO_BE_NR);
     476             : 
     477           0 :         return base_slice + (base_slice/CFQ_SLICE_SCALE * (4 - prio));
     478             : }
     479             : 
     480             : static inline int
     481             : cfq_prio_to_slice(struct cfq_data *cfqd, struct cfq_queue *cfqq)
     482             : {
     483           0 :         return cfq_prio_slice(cfqd, cfq_cfqq_sync(cfqq), cfqq->ioprio);
     484           0 : }
     485             : 
     486             : static inline u64 cfq_scale_slice(unsigned long delta, struct cfq_group *cfqg)
     487             : {
     488           0 :         u64 d = delta << CFQ_SERVICE_SHIFT;
     489           0 : 
     490           0 :         d = d * BLKIO_WEIGHT_DEFAULT;
     491           0 :         do_div(d, cfqg->weight);
     492           0 :         return d;
     493             : }
     494             : 
     495             : static inline u64 max_vdisktime(u64 min_vdisktime, u64 vdisktime)
     496             : {
     497           0 :         s64 delta = (s64)(vdisktime - min_vdisktime);
     498           0 :         if (delta > 0)
     499           0 :                 min_vdisktime = vdisktime;
     500             : 
     501           0 :         return min_vdisktime;
     502             : }
     503             : 
     504             : static inline u64 min_vdisktime(u64 min_vdisktime, u64 vdisktime)
     505             : {
     506           0 :         s64 delta = (s64)(vdisktime - min_vdisktime);
     507           0 :         if (delta < 0)
     508           0 :                 min_vdisktime = vdisktime;
     509             : 
     510           0 :         return min_vdisktime;
     511             : }
     512             : 
     513             : static void update_min_vdisktime(struct cfq_rb_root *st)
     514             : {
     515           0 :         u64 vdisktime = st->min_vdisktime;
     516           0 :         struct cfq_group *cfqg;
     517           0 : 
     518           0 :         if (st->active) {
     519           0 :                 cfqg = rb_entry_cfqg(st->active);
     520           0 :                 vdisktime = cfqg->vdisktime;
     521             :         }
     522             : 
     523           0 :         if (st->left) {
     524           0 :                 cfqg = rb_entry_cfqg(st->left);
     525           0 :                 vdisktime = min_vdisktime(vdisktime, cfqg->vdisktime);
     526             :         }
     527             : 
     528           0 :         st->min_vdisktime = max_vdisktime(st->min_vdisktime, vdisktime);
     529           0 : }
     530             : 
     531             : /*
     532             :  * get averaged number of queues of RT/BE priority.
     533             :  * average is updated, with a formula that gives more weight to higher numbers,
     534             :  * to quickly follows sudden increases and decrease slowly
     535             :  */
     536             : 
     537             : static inline unsigned cfq_group_get_avg_queues(struct cfq_data *cfqd,
     538             :                                         struct cfq_group *cfqg, bool rt)
     539             : {
     540           0 :         unsigned min_q, max_q;
     541           0 :         unsigned mult  = cfq_hist_divisor - 1;
     542           0 :         unsigned round = cfq_hist_divisor / 2;
     543           0 :         unsigned busy = cfq_group_busy_queues_wl(rt, cfqd, cfqg);
     544           0 : 
     545           0 :         min_q = min(cfqg->busy_queues_avg[rt], busy);
     546           0 :         max_q = max(cfqg->busy_queues_avg[rt], busy);
     547           0 :         cfqg->busy_queues_avg[rt] = (mult * max_q + min_q + round) /
     548           0 :                 cfq_hist_divisor;
     549           0 :         return cfqg->busy_queues_avg[rt];
     550             : }
     551             : 
     552             : static inline unsigned
     553             : cfq_group_slice(struct cfq_data *cfqd, struct cfq_group *cfqg)
     554             : {
     555           0 :         struct cfq_rb_root *st = &cfqd->grp_service_tree;
     556             : 
     557           0 :         return cfq_target_latency * cfqg->weight / st->total_weight;
     558             : }
     559             : 
     560             : static inline void
     561             : cfq_set_prio_slice(struct cfq_data *cfqd, struct cfq_queue *cfqq)
     562             : {
     563           0 :         unsigned slice = cfq_prio_to_slice(cfqd, cfqq);
     564           0 :         if (cfqd->cfq_latency) {
     565           0 :                 /*
     566           0 :                  * interested queues (we consider only the ones with the same
     567           0 :                  * priority class in the cfq group)
     568           0 :                  */
     569           0 :                 unsigned iq = cfq_group_get_avg_queues(cfqd, cfqq->cfqg,
     570           0 :                                                 cfq_class_rt(cfqq));
     571           0 :                 unsigned sync_slice = cfqd->cfq_slice[1];
     572           0 :                 unsigned expect_latency = sync_slice * iq;
     573           0 :                 unsigned group_slice = cfq_group_slice(cfqd, cfqq->cfqg);
     574           0 : 
     575           0 :                 if (expect_latency > group_slice) {
     576           0 :                         unsigned base_low_slice = 2 * cfqd->cfq_slice_idle;
     577             :                         /* scale low_slice according to IO priority
     578             :                          * and sync vs async */
     579             :                         unsigned low_slice =
     580           0 :                                 min(slice, base_low_slice * slice / sync_slice);
     581             :                         /* the adapted slice value is scaled to fit all iqs
     582             :                          * into the target latency */
     583           0 :                         slice = max(slice * group_slice / expect_latency,
     584             :                                     low_slice);
     585             :                 }
     586             :         }
     587           0 :         cfqq->slice_start = jiffies;
     588           0 :         cfqq->slice_end = jiffies + slice;
     589           0 :         cfqq->allocated_slice = slice;
     590           0 :         cfq_log_cfqq(cfqd, cfqq, "set_slice=%lu", cfqq->slice_end - jiffies);
     591             : }
     592             : 
     593             : /*
     594             :  * We need to wrap this check in cfq_cfqq_slice_new(), since ->slice_end
     595             :  * isn't valid until the first request from the dispatch is activated
     596             :  * and the slice time set.
     597             :  */
     598             : static inline bool cfq_slice_used(struct cfq_queue *cfqq)
     599             : {
     600           0 :         if (cfq_cfqq_slice_new(cfqq))
     601           0 :                 return 0;
     602           0 :         if (time_before(jiffies, cfqq->slice_end))
     603           0 :                 return 0;
     604             : 
     605           0 :         return 1;
     606             : }
     607             : 
     608             : /*
     609             :  * Lifted from AS - choose which of rq1 and rq2 that is best served now.
     610             :  * We choose the request that is closest to the head right now. Distance
     611             :  * behind the head is penalized and only allowed to a certain extent.
     612             :  */
     613             : static struct request *
     614             : cfq_choose_req(struct cfq_data *cfqd, struct request *rq1, struct request *rq2, sector_t last)
     615             : {
     616           0 :         sector_t s1, s2, d1 = 0, d2 = 0;
     617           0 :         unsigned long back_max;
     618           0 : #define CFQ_RQ1_WRAP    0x01 /* request 1 wraps */
     619           0 : #define CFQ_RQ2_WRAP    0x02 /* request 2 wraps */
     620           0 :         unsigned wrap = 0; /* bit mask: requests behind the disk head? */
     621           0 : 
     622           0 :         if (rq1 == NULL || rq1 == rq2)
     623           0 :                 return rq2;
     624           0 :         if (rq2 == NULL)
     625           0 :                 return rq1;
     626           0 : 
     627           0 :         if (rq_is_sync(rq1) && !rq_is_sync(rq2))
     628           0 :                 return rq1;
     629           0 :         else if (rq_is_sync(rq2) && !rq_is_sync(rq1))
     630           0 :                 return rq2;
     631           0 :         if (rq_is_meta(rq1) && !rq_is_meta(rq2))
     632           0 :                 return rq1;
     633           0 :         else if (rq_is_meta(rq2) && !rq_is_meta(rq1))
     634           0 :                 return rq2;
     635             : 
     636           0 :         s1 = blk_rq_pos(rq1);
     637           0 :         s2 = blk_rq_pos(rq2);
     638             : 
     639             :         /*
     640             :          * by definition, 1KiB is 2 sectors
     641             :          */
     642           0 :         back_max = cfqd->cfq_back_max * 2;
     643             : 
     644             :         /*
     645             :          * Strict one way elevator _except_ in the case where we allow
     646             :          * short backward seeks which are biased as twice the cost of a
     647             :          * similar forward seek.
     648             :          */
     649           0 :         if (s1 >= last)
     650           0 :                 d1 = s1 - last;
     651           0 :         else if (s1 + back_max >= last)
     652           0 :                 d1 = (last - s1) * cfqd->cfq_back_penalty;
     653             :         else
     654           0 :                 wrap |= CFQ_RQ1_WRAP;
     655             : 
     656           0 :         if (s2 >= last)
     657           0 :                 d2 = s2 - last;
     658           0 :         else if (s2 + back_max >= last)
     659           0 :                 d2 = (last - s2) * cfqd->cfq_back_penalty;
     660             :         else
     661           0 :                 wrap |= CFQ_RQ2_WRAP;
     662             : 
     663             :         /* Found required data */
     664             : 
     665             :         /*
     666             :          * By doing switch() on the bit mask "wrap" we avoid having to
     667             :          * check two variables for all permutations: --> faster!
     668             :          */
     669             :         switch (wrap) {
     670           0 :         case 0: /* common case for CFQ: rq1 and rq2 not wrapped */
     671           0 :                 if (d1 < d2)
     672           0 :                         return rq1;
     673           0 :                 else if (d2 < d1)
     674           0 :                         return rq2;
     675             :                 else {
     676           0 :                         if (s1 >= s2)
     677           0 :                                 return rq1;
     678             :                         else
     679           0 :                                 return rq2;
     680             :                 }
     681           0 : 
     682           0 :         case CFQ_RQ2_WRAP:
     683           0 :                 return rq1;
     684           0 :         case CFQ_RQ1_WRAP:
     685           0 :                 return rq2;
     686           0 :         case (CFQ_RQ1_WRAP|CFQ_RQ2_WRAP): /* both rqs wrapped */
     687             :         default:
     688           0 :                 /*
     689             :                  * Since both rqs are wrapped,
     690             :                  * start with the one that's further behind head
     691             :                  * (--> only *one* back seek required),
     692             :                  * since back seek takes more time than forward.
     693             :                  */
     694           0 :                 if (s1 <= s2)
     695           0 :                         return rq1;
     696             :                 else
     697           0 :                         return rq2;
     698             :         }
     699             : }
     700             : 
     701             : /*
     702             :  * The below is leftmost cache rbtree addon
     703             :  */
     704             : static struct cfq_queue *cfq_rb_first(struct cfq_rb_root *root)
     705             : {
     706           0 :         /* Service tree is empty */
     707           0 :         if (!root->count)
     708           0 :                 return NULL;
     709             : 
     710           0 :         if (!root->left)
     711           0 :                 root->left = rb_first(&root->rb);
     712             : 
     713           0 :         if (root->left)
     714           0 :                 return rb_entry(root->left, struct cfq_queue, rb_node);
     715             : 
     716           0 :         return NULL;
     717             : }
     718             : 
     719             : static struct cfq_group *cfq_rb_first_group(struct cfq_rb_root *root)
     720             : {
     721           0 :         if (!root->left)
     722           0 :                 root->left = rb_first(&root->rb);
     723             : 
     724           0 :         if (root->left)
     725           0 :                 return rb_entry_cfqg(root->left);
     726             : 
     727           0 :         return NULL;
     728             : }
     729             : 
     730             : static void rb_erase_init(struct rb_node *n, struct rb_root *root)
     731             : {
     732           0 :         rb_erase(n, root);
     733           0 :         RB_CLEAR_NODE(n);
     734           0 : }
     735             : 
     736             : static void cfq_rb_erase(struct rb_node *n, struct cfq_rb_root *root)
     737             : {
     738           0 :         if (root->left == n)
     739           0 :                 root->left = NULL;
     740           0 :         rb_erase_init(n, &root->rb);
     741           0 :         --root->count;
     742           0 : }
     743             : 
     744             : /*
     745             :  * would be nice to take fifo expire time into account as well
     746             :  */
     747             : static struct request *
     748             : cfq_find_next_rq(struct cfq_data *cfqd, struct cfq_queue *cfqq,
     749             :                   struct request *last)
     750             : {
     751           0 :         struct rb_node *rbnext = rb_next(&last->rb_node);
     752           0 :         struct rb_node *rbprev = rb_prev(&last->rb_node);
     753           0 :         struct request *next = NULL, *prev = NULL;
     754           0 : 
     755           0 :         BUG_ON(RB_EMPTY_NODE(&last->rb_node));
     756           0 : 
     757           0 :         if (rbprev)
     758           0 :                 prev = rb_entry_rq(rbprev);
     759           0 : 
     760           0 :         if (rbnext)
     761           0 :                 next = rb_entry_rq(rbnext);
     762           0 :         else {
     763           0 :                 rbnext = rb_first(&cfqq->sort_list);
     764           0 :                 if (rbnext && rbnext != &last->rb_node)
     765           0 :                         next = rb_entry_rq(rbnext);
     766             :         }
     767             : 
     768           0 :         return cfq_choose_req(cfqd, next, prev, blk_rq_pos(last));
     769             : }
     770             : 
     771             : static unsigned long cfq_slice_offset(struct cfq_data *cfqd,
     772             :                                       struct cfq_queue *cfqq)
     773           0 : {
     774           0 :         /*
     775           0 :          * just an approximation, should be ok.
     776             :          */
     777           0 :         return (cfqq->cfqg->nr_cfqq - 1) * (cfq_prio_slice(cfqd, 1, 0) -
     778             :                        cfq_prio_slice(cfqd, cfq_cfqq_sync(cfqq), cfqq->ioprio));
     779             : }
     780             : 
     781             : static inline s64
     782             : cfqg_key(struct cfq_rb_root *st, struct cfq_group *cfqg)
     783             : {
     784           0 :         return cfqg->vdisktime - st->min_vdisktime;
     785             : }
     786             : 
     787             : static void
     788             : __cfq_group_service_tree_add(struct cfq_rb_root *st, struct cfq_group *cfqg)
     789             : {
     790           0 :         struct rb_node **node = &st->rb.rb_node;
     791           0 :         struct rb_node *parent = NULL;
     792           0 :         struct cfq_group *__cfqg;
     793           0 :         s64 key = cfqg_key(st, cfqg);
     794           0 :         int left = 1;
     795           0 : 
     796           0 :         while (*node != NULL) {
     797           0 :                 parent = *node;
     798           0 :                 __cfqg = rb_entry_cfqg(parent);
     799             : 
     800           0 :                 if (key < cfqg_key(st, __cfqg))
     801           0 :                         node = &parent->rb_left;
     802             :                 else {
     803           0 :                         node = &parent->rb_right;
     804           0 :                         left = 0;
     805             :                 }
     806           0 :         }
     807             : 
     808           0 :         if (left)
     809           0 :                 st->left = &cfqg->rb_node;
     810             : 
     811           0 :         rb_link_node(&cfqg->rb_node, parent, node);
     812           0 :         rb_insert_color(&cfqg->rb_node, &st->rb);
     813           0 : }
     814             : 
     815             : static void
     816             : cfq_group_service_tree_add(struct cfq_data *cfqd, struct cfq_group *cfqg)
     817             : {
     818           0 :         struct cfq_rb_root *st = &cfqd->grp_service_tree;
     819           0 :         struct cfq_group *__cfqg;
     820           0 :         struct rb_node *n;
     821           0 : 
     822           0 :         cfqg->nr_cfqq++;
     823           0 :         if (cfqg->on_st)
     824           0 :                 return;
     825             : 
     826             :         /*
     827             :          * Currently put the group at the end. Later implement something
     828             :          * so that groups get lesser vtime based on their weights, so that
     829             :          * if group does not loose all if it was not continously backlogged.
     830             :          */
     831           0 :         n = rb_last(&st->rb);
     832           0 :         if (n) {
     833           0 :                 __cfqg = rb_entry_cfqg(n);
     834           0 :                 cfqg->vdisktime = __cfqg->vdisktime + CFQ_IDLE_DELAY;
     835             :         } else
     836           0 :                 cfqg->vdisktime = st->min_vdisktime;
     837             : 
     838           0 :         __cfq_group_service_tree_add(st, cfqg);
     839           0 :         cfqg->on_st = true;
     840           0 :         st->total_weight += cfqg->weight;
     841           0 : }
     842             : 
     843             : static void
     844             : cfq_group_service_tree_del(struct cfq_data *cfqd, struct cfq_group *cfqg)
     845             : {
     846           0 :         struct cfq_rb_root *st = &cfqd->grp_service_tree;
     847           0 : 
     848           0 :         if (st->active == &cfqg->rb_node)
     849           0 :                 st->active = NULL;
     850             : 
     851           0 :         BUG_ON(cfqg->nr_cfqq < 1);
     852           0 :         cfqg->nr_cfqq--;
     853             : 
     854             :         /* If there are other cfq queues under this group, don't delete it */
     855           0 :         if (cfqg->nr_cfqq)
     856           0 :                 return;
     857             : 
     858             :         cfq_log_cfqg(cfqd, cfqg, "del_from_rr group");
     859           0 :         cfqg->on_st = false;
     860           0 :         st->total_weight -= cfqg->weight;
     861           0 :         if (!RB_EMPTY_NODE(&cfqg->rb_node))
     862           0 :                 cfq_rb_erase(&cfqg->rb_node, st);
     863           0 :         cfqg->saved_workload_slice = 0;
     864           0 :         blkiocg_update_blkio_group_dequeue_stats(&cfqg->blkg, 1);
     865           0 : }
     866             : 
     867             : static inline unsigned int cfq_cfqq_slice_usage(struct cfq_queue *cfqq)
     868             : {
     869           0 :         unsigned int slice_used;
     870           0 : 
     871           0 :         /*
     872             :          * Queue got expired before even a single request completed or
     873             :          * got expired immediately after first request completion.
     874             :          */
     875           0 :         if (!cfqq->slice_start || cfqq->slice_start == jiffies) {
     876             :                 /*
     877             :                  * Also charge the seek time incurred to the group, otherwise
     878             :                  * if there are mutiple queues in the group, each can dispatch
     879             :                  * a single request on seeky media and cause lots of seek time
     880             :                  * and group will never know it.
     881             :                  */
     882           0 :                 slice_used = max_t(unsigned, (jiffies - cfqq->dispatch_start),
     883             :                                         1);
     884             :         } else {
     885           0 :                 slice_used = jiffies - cfqq->slice_start;
     886           0 :                 if (slice_used > cfqq->allocated_slice)
     887           0 :                         slice_used = cfqq->allocated_slice;
     888             :         }
     889             : 
     890             :         cfq_log_cfqq(cfqq->cfqd, cfqq, "sl_used=%u sect=%lu", slice_used,
     891             :                                 cfqq->nr_sectors);
     892           0 :         return slice_used;
     893             : }
     894             : 
     895             : static void cfq_group_served(struct cfq_data *cfqd, struct cfq_group *cfqg,
     896             :                                 struct cfq_queue *cfqq)
     897           0 : {
     898           0 :         struct cfq_rb_root *st = &cfqd->grp_service_tree;
     899           0 :         unsigned int used_sl, charge_sl;
     900           0 :         int nr_sync = cfqg->nr_cfqq - cfqg_busy_async_queues(cfqd, cfqg)
     901           0 :                         - cfqg->service_tree_idle.count;
     902           0 : 
     903           0 :         BUG_ON(nr_sync < 0);
     904           0 :         used_sl = charge_sl = cfq_cfqq_slice_usage(cfqq);
     905             : 
     906           0 :         if (!cfq_cfqq_sync(cfqq) && !nr_sync)
     907           0 :                 charge_sl = cfqq->allocated_slice;
     908             : 
     909             :         /* Can't update vdisktime while group is on service tree */
     910           0 :         cfq_rb_erase(&cfqg->rb_node, st);
     911           0 :         cfqg->vdisktime += cfq_scale_slice(charge_sl, cfqg);
     912           0 :         __cfq_group_service_tree_add(st, cfqg);
     913             : 
     914             :         /* This group is being expired. Save the context */
     915           0 :         if (time_after(cfqd->workload_expires, jiffies)) {
     916           0 :                 cfqg->saved_workload_slice = cfqd->workload_expires
     917             :                                                 - jiffies;
     918           0 :                 cfqg->saved_workload = cfqd->serving_type;
     919           0 :                 cfqg->saved_serving_prio = cfqd->serving_prio;
     920             :         } else
     921           0 :                 cfqg->saved_workload_slice = 0;
     922             : 
     923             :         cfq_log_cfqg(cfqd, cfqg, "served: vt=%llu min_vt=%llu", cfqg->vdisktime,
     924             :                                         st->min_vdisktime);
     925           0 :         blkiocg_update_blkio_group_stats(&cfqg->blkg, used_sl,
     926             :                                                 cfqq->nr_sectors);
     927           0 : }
     928             : 
     929             : #ifdef CONFIG_CFQ_GROUP_IOSCHED
     930             : static inline struct cfq_group *cfqg_of_blkg(struct blkio_group *blkg)
     931             : {
     932             :         if (blkg)
     933             :                 return container_of(blkg, struct cfq_group, blkg);
     934             :         return NULL;
     935             : }
     936             : 
     937             : void
     938             : cfq_update_blkio_group_weight(struct blkio_group *blkg, unsigned int weight)
     939             : {
     940             :         cfqg_of_blkg(blkg)->weight = weight;
     941             : }
     942             : 
     943             : static struct cfq_group *
     944             : cfq_find_alloc_cfqg(struct cfq_data *cfqd, struct cgroup *cgroup, int create)
     945             : {
     946             :         struct blkio_cgroup *blkcg = cgroup_to_blkio_cgroup(cgroup);
     947             :         struct cfq_group *cfqg = NULL;
     948             :         void *key = cfqd;
     949             :         int i, j;
     950             :         struct cfq_rb_root *st;
     951             :         struct backing_dev_info *bdi = &cfqd->queue->backing_dev_info;
     952             :         unsigned int major, minor;
     953             : 
     954             :         /* Do we need to take this reference */
     955             :         if (!blkiocg_css_tryget(blkcg))
     956             :                 return NULL;;
     957             : 
     958             :         cfqg = cfqg_of_blkg(blkiocg_lookup_group(blkcg, key));
     959             :         if (cfqg || !create)
     960             :                 goto done;
     961             : 
     962             :         cfqg = kzalloc_node(sizeof(*cfqg), GFP_ATOMIC, cfqd->queue->node);
     963             :         if (!cfqg)
     964             :                 goto done;
     965             : 
     966             :         cfqg->weight = blkcg->weight;
     967             :         for_each_cfqg_st(cfqg, i, j, st)
     968             :                 *st = CFQ_RB_ROOT;
     969             :         RB_CLEAR_NODE(&cfqg->rb_node);
     970             : 
     971             :         /*
     972             :          * Take the initial reference that will be released on destroy
     973             :          * This can be thought of a joint reference by cgroup and
     974             :          * elevator which will be dropped by either elevator exit
     975             :          * or cgroup deletion path depending on who is exiting first.
     976             :          */
     977             :         atomic_set(&cfqg->ref, 1);
     978             : 
     979             :         /* Add group onto cgroup list */
     980             :         sscanf(dev_name(bdi->dev), "%u:%u", &major, &minor);
     981             :         blkiocg_add_blkio_group(blkcg, &cfqg->blkg, (void *)cfqd,
     982             :                                         MKDEV(major, minor));
     983             : 
     984             :         /* Add group on cfqd list */
     985             :         hlist_add_head(&cfqg->cfqd_node, &cfqd->cfqg_list);
     986             : 
     987             : done:
     988             :         blkiocg_css_put(blkcg);
     989             :         return cfqg;
     990             : }
     991             : 
     992             : /*
     993             :  * Search for the cfq group current task belongs to. If create = 1, then also
     994             :  * create the cfq group if it does not exist. request_queue lock must be held.
     995             :  */
     996             : static struct cfq_group *cfq_get_cfqg(struct cfq_data *cfqd, int create)
     997             : {
     998             :         struct cgroup *cgroup;
     999             :         struct cfq_group *cfqg = NULL;
    1000             : 
    1001             :         rcu_read_lock();
    1002             :         cgroup = task_cgroup(current, blkio_subsys_id);
    1003             :         cfqg = cfq_find_alloc_cfqg(cfqd, cgroup, create);
    1004             :         if (!cfqg && create)
    1005             :                 cfqg = &cfqd->root_group;
    1006             :         rcu_read_unlock();
    1007             :         return cfqg;
    1008             : }
    1009             : 
    1010             : static void cfq_link_cfqq_cfqg(struct cfq_queue *cfqq, struct cfq_group *cfqg)
    1011             : {
    1012             :         /* Currently, all async queues are mapped to root group */
    1013             :         if (!cfq_cfqq_sync(cfqq))
    1014             :                 cfqg = &cfqq->cfqd->root_group;
    1015             : 
    1016             :         cfqq->cfqg = cfqg;
    1017             :         /* cfqq reference on cfqg */
    1018             :         atomic_inc(&cfqq->cfqg->ref);
    1019             : }
    1020             : 
    1021             : static void cfq_put_cfqg(struct cfq_group *cfqg)
    1022             : {
    1023             :         struct cfq_rb_root *st;
    1024             :         int i, j;
    1025             : 
    1026             :         BUG_ON(atomic_read(&cfqg->ref) <= 0);
    1027             :         if (!atomic_dec_and_test(&cfqg->ref))
    1028             :                 return;
    1029             :         for_each_cfqg_st(cfqg, i, j, st)
    1030             :                 BUG_ON(!RB_EMPTY_ROOT(&st->rb) || st->active != NULL);
    1031             :         kfree(cfqg);
    1032             : }
    1033             : 
    1034             : static void cfq_destroy_cfqg(struct cfq_data *cfqd, struct cfq_group *cfqg)
    1035             : {
    1036             :         /* Something wrong if we are trying to remove same group twice */
    1037             :         BUG_ON(hlist_unhashed(&cfqg->cfqd_node));
    1038             : 
    1039             :         hlist_del_init(&cfqg->cfqd_node);
    1040             : 
    1041             :         /*
    1042             :          * Put the reference taken at the time of creation so that when all
    1043             :          * queues are gone, group can be destroyed.
    1044             :          */
    1045             :         cfq_put_cfqg(cfqg);
    1046             : }
    1047             : 
    1048             : static void cfq_release_cfq_groups(struct cfq_data *cfqd)
    1049             : {
    1050             :         struct hlist_node *pos, *n;
    1051             :         struct cfq_group *cfqg;
    1052             : 
    1053             :         hlist_for_each_entry_safe(cfqg, pos, n, &cfqd->cfqg_list, cfqd_node) {
    1054             :                 /*
    1055             :                  * If cgroup removal path got to blk_group first and removed
    1056             :                  * it from cgroup list, then it will take care of destroying
    1057             :                  * cfqg also.
    1058             :                  */
    1059             :                 if (!blkiocg_del_blkio_group(&cfqg->blkg))
    1060             :                         cfq_destroy_cfqg(cfqd, cfqg);
    1061             :         }
    1062             : }
    1063             : 
    1064             : /*
    1065             :  * Blk cgroup controller notification saying that blkio_group object is being
    1066             :  * delinked as associated cgroup object is going away. That also means that
    1067             :  * no new IO will come in this group. So get rid of this group as soon as
    1068             :  * any pending IO in the group is finished.
    1069             :  *
    1070             :  * This function is called under rcu_read_lock(). key is the rcu protected
    1071             :  * pointer. That means "key" is a valid cfq_data pointer as long as we are rcu
    1072             :  * read lock.
    1073             :  *
    1074             :  * "key" was fetched from blkio_group under blkio_cgroup->lock. That means
    1075             :  * it should not be NULL as even if elevator was exiting, cgroup deltion
    1076             :  * path got to it first.
    1077             :  */
    1078             : void cfq_unlink_blkio_group(void *key, struct blkio_group *blkg)
    1079             : {
    1080             :         unsigned long  flags;
    1081             :         struct cfq_data *cfqd = key;
    1082             : 
    1083             :         spin_lock_irqsave(cfqd->queue->queue_lock, flags);
    1084             :         cfq_destroy_cfqg(cfqd, cfqg_of_blkg(blkg));
    1085             :         spin_unlock_irqrestore(cfqd->queue->queue_lock, flags);
    1086             : }
    1087             : 
    1088             : #else /* GROUP_IOSCHED */
    1089             : static struct cfq_group *cfq_get_cfqg(struct cfq_data *cfqd, int create)
    1090             : {
    1091           0 :         return &cfqd->root_group;
    1092             : }
    1093             : static inline void
    1094             : cfq_link_cfqq_cfqg(struct cfq_queue *cfqq, struct cfq_group *cfqg) {
    1095           0 :         cfqq->cfqg = cfqg;
    1096           0 : }
    1097             : 
    1098             : static void cfq_release_cfq_groups(struct cfq_data *cfqd) {}
    1099             : static inline void cfq_put_cfqg(struct cfq_group *cfqg) {}
    1100           0 : 
    1101           0 : #endif /* GROUP_IOSCHED */
    1102             : 
    1103             : /*
    1104             :  * The cfqd->service_trees holds all pending cfq_queue's that have
    1105             :  * requests waiting to be processed. It is sorted in the order that
    1106             :  * we will service the queues.
    1107             :  */
    1108             : static void cfq_service_tree_add(struct cfq_data *cfqd, struct cfq_queue *cfqq,
    1109             :                                  bool add_front)
    1110             : {
    1111           0 :         struct rb_node **p, *parent;
    1112           0 :         struct cfq_queue *__cfqq;
    1113           0 :         unsigned long rb_key;
    1114           0 :         struct cfq_rb_root *service_tree;
    1115           0 :         int left;
    1116           0 :         int new_cfqq = 1;
    1117           0 :         int group_changed = 0;
    1118           0 : 
    1119           0 : #ifdef CONFIG_CFQ_GROUP_IOSCHED
    1120           0 :         if (!cfqd->cfq_group_isolation
    1121           0 :             && cfqq_type(cfqq) == SYNC_NOIDLE_WORKLOAD
    1122           0 :             && cfqq->cfqg && cfqq->cfqg != &cfqd->root_group) {
    1123           0 :                 /* Move this cfq to root group */
    1124           0 :                 cfq_log_cfqq(cfqd, cfqq, "moving to root group");
    1125             :                 if (!RB_EMPTY_NODE(&cfqq->rb_node))
    1126             :                         cfq_group_service_tree_del(cfqd, cfqq->cfqg);
    1127             :                 cfqq->orig_cfqg = cfqq->cfqg;
    1128             :                 cfqq->cfqg = &cfqd->root_group;
    1129             :                 atomic_inc(&cfqd->root_group.ref);
    1130             :                 group_changed = 1;
    1131             :         } else if (!cfqd->cfq_group_isolation
    1132             :                    && cfqq_type(cfqq) == SYNC_WORKLOAD && cfqq->orig_cfqg) {
    1133             :                 /* cfqq is sequential now needs to go to its original group */
    1134             :                 BUG_ON(cfqq->cfqg != &cfqd->root_group);
    1135             :                 if (!RB_EMPTY_NODE(&cfqq->rb_node))
    1136             :                         cfq_group_service_tree_del(cfqd, cfqq->cfqg);
    1137             :                 cfq_put_cfqg(cfqq->cfqg);
    1138             :                 cfqq->cfqg = cfqq->orig_cfqg;
    1139             :                 cfqq->orig_cfqg = NULL;
    1140             :                 group_changed = 1;
    1141             :                 cfq_log_cfqq(cfqd, cfqq, "moved to origin group");
    1142             :         }
    1143             : #endif
    1144             : 
    1145           0 :         service_tree = service_tree_for(cfqq->cfqg, cfqq_prio(cfqq),
    1146             :                                                 cfqq_type(cfqq));
    1147           0 :         if (cfq_class_idle(cfqq)) {
    1148           0 :                 rb_key = CFQ_IDLE_DELAY;
    1149           0 :                 parent = rb_last(&service_tree->rb);
    1150           0 :                 if (parent && parent != &cfqq->rb_node) {
    1151           0 :                         __cfqq = rb_entry(parent, struct cfq_queue, rb_node);
    1152           0 :                         rb_key += __cfqq->rb_key;
    1153             :                 } else
    1154           0 :                         rb_key += jiffies;
    1155           0 :         } else if (!add_front) {
    1156             :                 /*
    1157             :                  * Get our rb key offset. Subtract any residual slice
    1158             :                  * value carried from last service. A negative resid
    1159             :                  * count indicates slice overrun, and this should position
    1160             :                  * the next service time further away in the tree.
    1161             :                  */
    1162           0 :                 rb_key = cfq_slice_offset(cfqd, cfqq) + jiffies;
    1163           0 :                 rb_key -= cfqq->slice_resid;
    1164           0 :                 cfqq->slice_resid = 0;
    1165             :         } else {
    1166           0 :                 rb_key = -HZ;
    1167           0 :                 __cfqq = cfq_rb_first(service_tree);
    1168           0 :                 rb_key += __cfqq ? __cfqq->rb_key : jiffies;
    1169             :         }
    1170             : 
    1171           0 :         if (!RB_EMPTY_NODE(&cfqq->rb_node)) {
    1172           0 :                 new_cfqq = 0;
    1173             :                 /*
    1174             :                  * same position, nothing more to do
    1175             :                  */
    1176           0 :                 if (rb_key == cfqq->rb_key &&
    1177             :                     cfqq->service_tree == service_tree)
    1178           0 :                         return;
    1179             : 
    1180           0 :                 cfq_rb_erase(&cfqq->rb_node, cfqq->service_tree);
    1181           0 :                 cfqq->service_tree = NULL;
    1182             :         }
    1183             : 
    1184           0 :         left = 1;
    1185           0 :         parent = NULL;
    1186           0 :         cfqq->service_tree = service_tree;
    1187           0 :         p = &service_tree->rb.rb_node;
    1188           0 :         while (*p) {
    1189           0 :                 struct rb_node **n;
    1190           0 : 
    1191           0 :                 parent = *p;
    1192           0 :                 __cfqq = rb_entry(parent, struct cfq_queue, rb_node);
    1193             : 
    1194             :                 /*
    1195             :                  * sort by key, that represents service time.
    1196             :                  */
    1197           0 :                 if (time_before(rb_key, __cfqq->rb_key))
    1198           0 :                         n = &(*p)->rb_left;
    1199             :                 else {
    1200           0 :                         n = &(*p)->rb_right;
    1201           0 :                         left = 0;
    1202             :                 }
    1203             : 
    1204           0 :                 p = n;
    1205           0 :         }
    1206             : 
    1207           0 :         if (left)
    1208           0 :                 service_tree->left = &cfqq->rb_node;
    1209             : 
    1210           0 :         cfqq->rb_key = rb_key;
    1211           0 :         rb_link_node(&cfqq->rb_node, parent, p);
    1212           0 :         rb_insert_color(&cfqq->rb_node, &service_tree->rb);
    1213           0 :         service_tree->count++;
    1214           0 :         if ((add_front || !new_cfqq) && !group_changed)
    1215           0 :                 return;
    1216           0 :         cfq_group_service_tree_add(cfqd, cfqq->cfqg);
    1217           0 : }
    1218             : 
    1219             : static struct cfq_queue *
    1220             : cfq_prio_tree_lookup(struct cfq_data *cfqd, struct rb_root *root,
    1221             :                      sector_t sector, struct rb_node **ret_parent,
    1222             :                      struct rb_node ***rb_link)
    1223             : {
    1224           0 :         struct rb_node **p, *parent;
    1225           0 :         struct cfq_queue *cfqq = NULL;
    1226           0 : 
    1227           0 :         parent = NULL;
    1228           0 :         p = &root->rb_node;
    1229           0 :         while (*p) {
    1230           0 :                 struct rb_node **n;
    1231           0 : 
    1232           0 :                 parent = *p;
    1233           0 :                 cfqq = rb_entry(parent, struct cfq_queue, p_node);
    1234             : 
    1235           0 :                 /*
    1236             :                  * Sort strictly based on sector.  Smallest to the left,
    1237             :                  * largest to the right.
    1238             :                  */
    1239           0 :                 if (sector > blk_rq_pos(cfqq->next_rq))
    1240           0 :                         n = &(*p)->rb_right;
    1241           0 :                 else if (sector < blk_rq_pos(cfqq->next_rq))
    1242           0 :                         n = &(*p)->rb_left;
    1243             :                 else
    1244           0 :                         break;
    1245           0 :                 p = n;
    1246           0 :                 cfqq = NULL;
    1247           0 :         }
    1248             : 
    1249           0 :         *ret_parent = parent;
    1250           0 :         if (rb_link)
    1251           0 :                 *rb_link = p;
    1252           0 :         return cfqq;
    1253             : }
    1254             : 
    1255             : static void cfq_prio_tree_add(struct cfq_data *cfqd, struct cfq_queue *cfqq)
    1256             : {
    1257           0 :         struct rb_node **p, *parent;
    1258           0 :         struct cfq_queue *__cfqq;
    1259           0 : 
    1260           0 :         if (cfqq->p_root) {
    1261           0 :                 rb_erase(&cfqq->p_node, cfqq->p_root);
    1262           0 :                 cfqq->p_root = NULL;
    1263             :         }
    1264             : 
    1265           0 :         if (cfq_class_idle(cfqq))
    1266           0 :                 return;
    1267           0 :         if (!cfqq->next_rq)
    1268           0 :                 return;
    1269             : 
    1270           0 :         cfqq->p_root = &cfqd->prio_trees[cfqq->org_ioprio];
    1271           0 :         __cfqq = cfq_prio_tree_lookup(cfqd, cfqq->p_root,
    1272             :                                       blk_rq_pos(cfqq->next_rq), &parent, &p);
    1273           0 :         if (!__cfqq) {
    1274           0 :                 rb_link_node(&cfqq->p_node, parent, p);
    1275           0 :                 rb_insert_color(&cfqq->p_node, cfqq->p_root);
    1276             :         } else
    1277           0 :                 cfqq->p_root = NULL;
    1278           0 : }
    1279             : 
    1280             : /*
    1281             :  * Update cfqq's position in the service tree.
    1282             :  */
    1283             : static void cfq_resort_rr_list(struct cfq_data *cfqd, struct cfq_queue *cfqq)
    1284             : {
    1285           0 :         /*
    1286             :          * Resorting requires the cfqq to be on the RR list already.
    1287             :          */
    1288           0 :         if (cfq_cfqq_on_rr(cfqq)) {
    1289           0 :                 cfq_service_tree_add(cfqd, cfqq, 0);
    1290           0 :                 cfq_prio_tree_add(cfqd, cfqq);
    1291             :         }
    1292           0 : }
    1293             : 
    1294             : /*
    1295             :  * add to busy list of queues for service, trying to be fair in ordering
    1296             :  * the pending list according to last request service
    1297             :  */
    1298             : static void cfq_add_cfqq_rr(struct cfq_data *cfqd, struct cfq_queue *cfqq)
    1299             : {
    1300           0 :         cfq_log_cfqq(cfqd, cfqq, "add_to_rr");
    1301           0 :         BUG_ON(cfq_cfqq_on_rr(cfqq));
    1302           0 :         cfq_mark_cfqq_on_rr(cfqq);
    1303           0 :         cfqd->busy_queues++;
    1304             : 
    1305           0 :         cfq_resort_rr_list(cfqd, cfqq);
    1306           0 : }
    1307             : 
    1308             : /*
    1309             :  * Called when the cfqq no longer has requests pending, remove it from
    1310             :  * the service tree.
    1311             :  */
    1312             : static void cfq_del_cfqq_rr(struct cfq_data *cfqd, struct cfq_queue *cfqq)
    1313             : {
    1314           0 :         cfq_log_cfqq(cfqd, cfqq, "del_from_rr");
    1315           0 :         BUG_ON(!cfq_cfqq_on_rr(cfqq));
    1316           0 :         cfq_clear_cfqq_on_rr(cfqq);
    1317             : 
    1318           0 :         if (!RB_EMPTY_NODE(&cfqq->rb_node)) {
    1319           0 :                 cfq_rb_erase(&cfqq->rb_node, cfqq->service_tree);
    1320           0 :                 cfqq->service_tree = NULL;
    1321             :         }
    1322           0 :         if (cfqq->p_root) {
    1323           0 :                 rb_erase(&cfqq->p_node, cfqq->p_root);
    1324           0 :                 cfqq->p_root = NULL;
    1325             :         }
    1326             : 
    1327           0 :         cfq_group_service_tree_del(cfqd, cfqq->cfqg);
    1328           0 :         BUG_ON(!cfqd->busy_queues);
    1329           0 :         cfqd->busy_queues--;
    1330           0 : }
    1331             : 
    1332             : /*
    1333             :  * rb tree support functions
    1334             :  */
    1335             : static void cfq_del_rq_rb(struct request *rq)
    1336             : {
    1337           0 :         struct cfq_queue *cfqq = RQ_CFQQ(rq);
    1338           0 :         const int sync = rq_is_sync(rq);
    1339           0 : 
    1340           0 :         BUG_ON(!cfqq->queued[sync]);
    1341           0 :         cfqq->queued[sync]--;
    1342             : 
    1343           0 :         elv_rb_del(&cfqq->sort_list, rq);
    1344             : 
    1345           0 :         if (cfq_cfqq_on_rr(cfqq) && RB_EMPTY_ROOT(&cfqq->sort_list)) {
    1346             :                 /*
    1347             :                  * Queue will be deleted from service tree when we actually
    1348             :                  * expire it later. Right now just remove it from prio tree
    1349             :                  * as it is empty.
    1350             :                  */
    1351           0 :                 if (cfqq->p_root) {
    1352           0 :                         rb_erase(&cfqq->p_node, cfqq->p_root);
    1353           0 :                         cfqq->p_root = NULL;
    1354             :                 }
    1355           0 :         }
    1356             : }
    1357             : 
    1358             : static void cfq_add_rq_rb(struct request *rq)
    1359             : {
    1360           0 :         struct cfq_queue *cfqq = RQ_CFQQ(rq);
    1361           0 :         struct cfq_data *cfqd = cfqq->cfqd;
    1362           0 :         struct request *__alias, *prev;
    1363           0 : 
    1364           0 :         cfqq->queued[rq_is_sync(rq)]++;
    1365           0 : 
    1366           0 :         /*
    1367             :          * looks a little odd, but the first insert might return an alias.
    1368             :          * if that happens, put the alias on the dispatch list
    1369             :          */
    1370           0 :         while ((__alias = elv_rb_add(&cfqq->sort_list, rq)) != NULL)
    1371           0 :                 cfq_dispatch_insert(cfqd->queue, __alias);
    1372           0 : 
    1373           0 :         if (!cfq_cfqq_on_rr(cfqq))
    1374           0 :                 cfq_add_cfqq_rr(cfqd, cfqq);
    1375             : 
    1376             :         /*
    1377             :          * check if this request is a better next-serve candidate
    1378             :          */
    1379           0 :         prev = cfqq->next_rq;
    1380           0 :         cfqq->next_rq = cfq_choose_req(cfqd, cfqq->next_rq, rq, cfqd->last_position);
    1381             : 
    1382             :         /*
    1383             :          * adjust priority tree position, if ->next_rq changes
    1384             :          */
    1385           0 :         if (prev != cfqq->next_rq)
    1386           0 :                 cfq_prio_tree_add(cfqd, cfqq);
    1387             : 
    1388           0 :         BUG_ON(!cfqq->next_rq);
    1389             : }
    1390           0 : 
    1391             : static void cfq_reposition_rq_rb(struct cfq_queue *cfqq, struct request *rq)
    1392             : {
    1393           0 :         elv_rb_del(&cfqq->sort_list, rq);
    1394           0 :         cfqq->queued[rq_is_sync(rq)]--;
    1395           0 :         cfq_add_rq_rb(rq);
    1396           0 : }
    1397             : 
    1398             : static struct request *
    1399             : cfq_find_rq_fmerge(struct cfq_data *cfqd, struct bio *bio)
    1400             : {
    1401           0 :         struct task_struct *tsk = current;
    1402           0 :         struct cfq_io_context *cic;
    1403           0 :         struct cfq_queue *cfqq;
    1404           0 : 
    1405           0 :         cic = cfq_cic_lookup(cfqd, tsk->io_context);
    1406           0 :         if (!cic)
    1407           0 :                 return NULL;
    1408             : 
    1409           0 :         cfqq = cic_to_cfqq(cic, cfq_bio_sync(bio));
    1410           0 :         if (cfqq) {
    1411           0 :                 sector_t sector = bio->bi_sector + bio_sectors(bio);
    1412             : 
    1413           0 :                 return elv_rb_find(&cfqq->sort_list, sector);
    1414             :         }
    1415             : 
    1416           0 :         return NULL;
    1417             : }
    1418             : 
    1419             : static void cfq_activate_request(struct request_queue *q, struct request *rq)
    1420             : {
    1421           0 :         struct cfq_data *cfqd = q->elevator->elevator_data;
    1422           0 : 
    1423           0 :         cfqd->rq_in_driver[rq_is_sync(rq)]++;
    1424           0 :         cfq_log_cfqq(cfqd, RQ_CFQQ(rq), "activate rq, drv=%d",
    1425             :                                                 rq_in_driver(cfqd));
    1426             : 
    1427           0 :         cfqd->last_position = blk_rq_pos(rq) + blk_rq_sectors(rq);
    1428           0 : }
    1429             : 
    1430             : static void cfq_deactivate_request(struct request_queue *q, struct request *rq)
    1431             : {
    1432           0 :         struct cfq_data *cfqd = q->elevator->elevator_data;
    1433           0 :         const int sync = rq_is_sync(rq);
    1434           0 : 
    1435           0 :         WARN_ON(!cfqd->rq_in_driver[sync]);
    1436           0 :         cfqd->rq_in_driver[sync]--;
    1437           0 :         cfq_log_cfqq(cfqd, RQ_CFQQ(rq), "deactivate rq, drv=%d",
    1438             :                                                 rq_in_driver(cfqd));
    1439             : }
    1440             : 
    1441             : static void cfq_remove_request(struct request *rq)
    1442             : {
    1443           0 :         struct cfq_queue *cfqq = RQ_CFQQ(rq);
    1444           0 : 
    1445           0 :         if (cfqq->next_rq == rq)
    1446           0 :                 cfqq->next_rq = cfq_find_next_rq(cfqq->cfqd, cfqq, rq);
    1447             : 
    1448           0 :         list_del_init(&rq->queuelist);
    1449           0 :         cfq_del_rq_rb(rq);
    1450             : 
    1451           0 :         cfqq->cfqd->rq_queued--;
    1452           0 :         if (rq_is_meta(rq)) {
    1453           0 :                 WARN_ON(!cfqq->meta_pending);
    1454           0 :                 cfqq->meta_pending--;
    1455             :         }
    1456           0 : }
    1457             : 
    1458             : static int cfq_merge(struct request_queue *q, struct request **req,
    1459             :                      struct bio *bio)
    1460           0 : {
    1461           0 :         struct cfq_data *cfqd = q->elevator->elevator_data;
    1462           0 :         struct request *__rq;
    1463             : 
    1464           0 :         __rq = cfq_find_rq_fmerge(cfqd, bio);
    1465           0 :         if (__rq && elv_rq_merge_ok(__rq, bio)) {
    1466           0 :                 *req = __rq;
    1467           0 :                 return ELEVATOR_FRONT_MERGE;
    1468             :         }
    1469             : 
    1470           0 :         return ELEVATOR_NO_MERGE;
    1471             : }
    1472             : 
    1473             : static void cfq_merged_request(struct request_queue *q, struct request *req,
    1474             :                                int type)
    1475           0 : {
    1476           0 :         if (type == ELEVATOR_FRONT_MERGE) {
    1477           0 :                 struct cfq_queue *cfqq = RQ_CFQQ(req);
    1478             : 
    1479           0 :                 cfq_reposition_rq_rb(cfqq, req);
    1480             :         }
    1481           0 : }
    1482             : 
    1483             : static void
    1484             : cfq_merged_requests(struct request_queue *q, struct request *rq,
    1485             :                     struct request *next)
    1486           0 : {
    1487           0 :         struct cfq_queue *cfqq = RQ_CFQQ(rq);
    1488           0 :         /*
    1489             :          * reposition in fifo if next is older than rq
    1490             :          */
    1491           0 :         if (!list_empty(&rq->queuelist) && !list_empty(&next->queuelist) &&
    1492           0 :             time_before(rq_fifo_time(next), rq_fifo_time(rq))) {
    1493           0 :                 list_move(&rq->queuelist, &next->queuelist);
    1494           0 :                 rq_set_fifo_time(rq, rq_fifo_time(next));
    1495             :         }
    1496             : 
    1497           0 :         if (cfqq->next_rq == next)
    1498           0 :                 cfqq->next_rq = rq;
    1499           0 :         cfq_remove_request(next);
    1500           0 : }
    1501             : 
    1502             : static int cfq_allow_merge(struct request_queue *q, struct request *rq,
    1503             :                            struct bio *bio)
    1504           0 : {
    1505           0 :         struct cfq_data *cfqd = q->elevator->elevator_data;
    1506           0 :         struct cfq_io_context *cic;
    1507           0 :         struct cfq_queue *cfqq;
    1508           0 : 
    1509           0 :         /*
    1510           0 :          * Disallow merge of a sync bio into an async request.
    1511           0 :          */
    1512           0 :         if (cfq_bio_sync(bio) && !rq_is_sync(rq))
    1513           0 :                 return false;
    1514             : 
    1515             :         /*
    1516             :          * Lookup the cfqq that this bio will be queued with. Allow
    1517             :          * merge only if rq is queued there.
    1518             :          */
    1519           0 :         cic = cfq_cic_lookup(cfqd, current->io_context);
    1520           0 :         if (!cic)
    1521           0 :                 return false;
    1522             : 
    1523           0 :         cfqq = cic_to_cfqq(cic, cfq_bio_sync(bio));
    1524           0 :         return cfqq == RQ_CFQQ(rq);
    1525             : }
    1526             : 
    1527             : static void __cfq_set_active_queue(struct cfq_data *cfqd,
    1528             :                                    struct cfq_queue *cfqq)
    1529             : {
    1530           0 :         if (cfqq) {
    1531             :                 cfq_log_cfqq(cfqd, cfqq, "set_active");
    1532           0 :                 cfqq->slice_start = 0;
    1533           0 :                 cfqq->dispatch_start = jiffies;
    1534           0 :                 cfqq->allocated_slice = 0;
    1535           0 :                 cfqq->slice_end = 0;
    1536           0 :                 cfqq->slice_dispatch = 0;
    1537           0 :                 cfqq->nr_sectors = 0;
    1538             : 
    1539           0 :                 cfq_clear_cfqq_wait_request(cfqq);
    1540           0 :                 cfq_clear_cfqq_must_dispatch(cfqq);
    1541           0 :                 cfq_clear_cfqq_must_alloc_slice(cfqq);
    1542           0 :                 cfq_clear_cfqq_fifo_expire(cfqq);
    1543           0 :                 cfq_mark_cfqq_slice_new(cfqq);
    1544             : 
    1545           0 :                 del_timer(&cfqd->idle_slice_timer);
    1546             :         }
    1547             : 
    1548           0 :         cfqd->active_queue = cfqq;
    1549           0 : }
    1550             : 
    1551             : /*
    1552             :  * current cfqq expired its slice (or was too idle), select new one
    1553             :  */
    1554             : static void
    1555             : __cfq_slice_expired(struct cfq_data *cfqd, struct cfq_queue *cfqq,
    1556             :                     bool timed_out)
    1557           0 : {
    1558           0 :         cfq_log_cfqq(cfqd, cfqq, "slice expired t=%d", timed_out);
    1559           0 : 
    1560           0 :         if (cfq_cfqq_wait_request(cfqq))
    1561           0 :                 del_timer(&cfqd->idle_slice_timer);
    1562             : 
    1563           0 :         cfq_clear_cfqq_wait_request(cfqq);
    1564           0 :         cfq_clear_cfqq_wait_busy(cfqq);
    1565             : 
    1566             :         /*
    1567             :          * If this cfqq is shared between multiple processes, check to
    1568             :          * make sure that those processes are still issuing I/Os within
    1569             :          * the mean seek distance.  If not, it may be time to break the
    1570             :          * queues apart again.
    1571             :          */
    1572           0 :         if (cfq_cfqq_coop(cfqq) && CFQQ_SEEKY(cfqq))
    1573           0 :                 cfq_mark_cfqq_split_coop(cfqq);
    1574             : 
    1575             :         /*
    1576             :          * store what was left of this slice, if the queue idled/timed out
    1577             :          */
    1578           0 :         if (timed_out && !cfq_cfqq_slice_new(cfqq)) {
    1579           0 :                 cfqq->slice_resid = cfqq->slice_end - jiffies;
    1580             :                 cfq_log_cfqq(cfqd, cfqq, "resid=%ld", cfqq->slice_resid);
    1581             :         }
    1582             : 
    1583           0 :         cfq_group_served(cfqd, cfqq->cfqg, cfqq);
    1584             : 
    1585           0 :         if (cfq_cfqq_on_rr(cfqq) && RB_EMPTY_ROOT(&cfqq->sort_list))
    1586           0 :                 cfq_del_cfqq_rr(cfqd, cfqq);
    1587             : 
    1588           0 :         cfq_resort_rr_list(cfqd, cfqq);
    1589             : 
    1590           0 :         if (cfqq == cfqd->active_queue)
    1591           0 :                 cfqd->active_queue = NULL;
    1592             : 
    1593           0 :         if (&cfqq->cfqg->rb_node == cfqd->grp_service_tree.active)
    1594           0 :                 cfqd->grp_service_tree.active = NULL;
    1595             : 
    1596           0 :         if (cfqd->active_cic) {
    1597           0 :                 put_io_context(cfqd->active_cic->ioc);
    1598           0 :                 cfqd->active_cic = NULL;
    1599             :         }
    1600           0 : }
    1601             : 
    1602             : static inline void cfq_slice_expired(struct cfq_data *cfqd, bool timed_out)
    1603             : {
    1604           0 :         struct cfq_queue *cfqq = cfqd->active_queue;
    1605             : 
    1606           0 :         if (cfqq)
    1607           0 :                 __cfq_slice_expired(cfqd, cfqq, timed_out);
    1608           0 : }
    1609             : 
    1610             : /*
    1611             :  * Get next queue for service. Unless we have a queue preemption,
    1612             :  * we'll simply select the first cfqq in the service tree.
    1613             :  */
    1614             : static struct cfq_queue *cfq_get_next_queue(struct cfq_data *cfqd)
    1615             : {
    1616           0 :         struct cfq_rb_root *service_tree =
    1617           0 :                 service_tree_for(cfqd->serving_group, cfqd->serving_prio,
    1618           0 :                                         cfqd->serving_type);
    1619             : 
    1620           0 :         if (!cfqd->rq_queued)
    1621           0 :                 return NULL;
    1622             : 
    1623             :         /* There is nothing to dispatch */
    1624           0 :         if (!service_tree)
    1625           0 :                 return NULL;
    1626           0 :         if (RB_EMPTY_ROOT(&service_tree->rb))
    1627           0 :                 return NULL;
    1628           0 :         return cfq_rb_first(service_tree);
    1629             : }
    1630             : 
    1631             : static struct cfq_queue *cfq_get_next_queue_forced(struct cfq_data *cfqd)
    1632             : {
    1633           0 :         struct cfq_group *cfqg;
    1634           0 :         struct cfq_queue *cfqq;
    1635           0 :         int i, j;
    1636           0 :         struct cfq_rb_root *st;
    1637           0 : 
    1638           0 :         if (!cfqd->rq_queued)
    1639           0 :                 return NULL;
    1640             : 
    1641           0 :         cfqg = cfq_get_next_cfqg(cfqd);
    1642           0 :         if (!cfqg)
    1643           0 :                 return NULL;
    1644             : 
    1645           0 :         for_each_cfqg_st(cfqg, i, j, st)
    1646           0 :                 if ((cfqq = cfq_rb_first(st)) != NULL)
    1647           0 :                         return cfqq;
    1648           0 :         return NULL;
    1649             : }
    1650             : 
    1651             : /*
    1652             :  * Get and set a new active queue for service.
    1653             :  */
    1654             : static struct cfq_queue *cfq_set_active_queue(struct cfq_data *cfqd,
    1655             :                                               struct cfq_queue *cfqq)
    1656             : {
    1657           0 :         if (!cfqq)
    1658           0 :                 cfqq = cfq_get_next_queue(cfqd);
    1659             : 
    1660           0 :         __cfq_set_active_queue(cfqd, cfqq);
    1661           0 :         return cfqq;
    1662             : }
    1663             : 
    1664             : static inline sector_t cfq_dist_from_last(struct cfq_data *cfqd,
    1665             :                                           struct request *rq)
    1666           0 : {
    1667           0 :         if (blk_rq_pos(rq) >= cfqd->last_position)
    1668           0 :                 return blk_rq_pos(rq) - cfqd->last_position;
    1669             :         else
    1670           0 :                 return cfqd->last_position - blk_rq_pos(rq);
    1671             : }
    1672             : 
    1673             : static inline int cfq_rq_close(struct cfq_data *cfqd, struct cfq_queue *cfqq,
    1674             :                                struct request *rq, bool for_preempt)
    1675             : {
    1676           0 :         sector_t sdist = cfqq->seek_mean;
    1677           0 : 
    1678           0 :         if (!sample_valid(cfqq->seek_samples))
    1679           0 :                 sdist = CFQQ_SEEK_THR;
    1680             : 
    1681             :         /* if seek_mean is big, using it as close criteria is meaningless */
    1682           0 :         if (sdist > CFQQ_SEEK_THR && !for_preempt)
    1683           0 :                 sdist = CFQQ_SEEK_THR;
    1684             : 
    1685           0 :         return cfq_dist_from_last(cfqd, rq) <= sdist;
    1686             : }
    1687             : 
    1688             : static struct cfq_queue *cfqq_close(struct cfq_data *cfqd,
    1689             :                                     struct cfq_queue *cur_cfqq)
    1690           0 : {
    1691           0 :         struct rb_root *root = &cfqd->prio_trees[cur_cfqq->org_ioprio];
    1692           0 :         struct rb_node *parent, *node;
    1693           0 :         struct cfq_queue *__cfqq;
    1694           0 :         sector_t sector = cfqd->last_position;
    1695           0 : 
    1696           0 :         if (RB_EMPTY_ROOT(root))
    1697           0 :                 return NULL;
    1698           0 : 
    1699           0 :         /*
    1700             :          * First, if we find a request starting at the end of the last
    1701             :          * request, choose it.
    1702             :          */
    1703           0 :         __cfqq = cfq_prio_tree_lookup(cfqd, root, sector, &parent, NULL);
    1704           0 :         if (__cfqq)
    1705           0 :                 return __cfqq;
    1706             : 
    1707             :         /*
    1708             :          * If the exact sector wasn't found, the parent of the NULL leaf
    1709             :          * will contain the closest sector.
    1710             :          */
    1711           0 :         __cfqq = rb_entry(parent, struct cfq_queue, p_node);
    1712           0 :         if (cfq_rq_close(cfqd, cur_cfqq, __cfqq->next_rq, false))
    1713           0 :                 return __cfqq;
    1714             : 
    1715           0 :         if (blk_rq_pos(__cfqq->next_rq) < sector)
    1716           0 :                 node = rb_next(&__cfqq->p_node);
    1717             :         else
    1718           0 :                 node = rb_prev(&__cfqq->p_node);
    1719           0 :         if (!node)
    1720           0 :                 return NULL;
    1721             : 
    1722           0 :         __cfqq = rb_entry(node, struct cfq_queue, p_node);
    1723           0 :         if (cfq_rq_close(cfqd, cur_cfqq, __cfqq->next_rq, false))
    1724           0 :                 return __cfqq;
    1725             : 
    1726           0 :         return NULL;
    1727             : }
    1728             : 
    1729             : /*
    1730             :  * cfqd - obvious
    1731             :  * cur_cfqq - passed in so that we don't decide that the current queue is
    1732             :  *            closely cooperating with itself.
    1733             :  *
    1734             :  * So, basically we're assuming that that cur_cfqq has dispatched at least
    1735             :  * one request, and that cfqd->last_position reflects a position on the disk
    1736             :  * associated with the I/O issued by cur_cfqq.  I'm not sure this is a valid
    1737             :  * assumption.
    1738             :  */
    1739             : static struct cfq_queue *cfq_close_cooperator(struct cfq_data *cfqd,
    1740             :                                               struct cfq_queue *cur_cfqq)
    1741           0 : {
    1742           0 :         struct cfq_queue *cfqq;
    1743           0 : 
    1744           0 :         if (!cfq_cfqq_sync(cur_cfqq))
    1745           0 :                 return NULL;
    1746           0 :         if (CFQQ_SEEKY(cur_cfqq))
    1747           0 :                 return NULL;
    1748             : 
    1749             :         /*
    1750             :          * Don't search priority tree if it's the only queue in the group.
    1751             :          */
    1752           0 :         if (cur_cfqq->cfqg->nr_cfqq == 1)
    1753           0 :                 return NULL;
    1754             : 
    1755             :         /*
    1756             :          * We should notice if some of the queues are cooperating, eg
    1757             :          * working closely on the same area of the disk. In that case,
    1758             :          * we can group them together and don't waste time idling.
    1759             :          */
    1760           0 :         cfqq = cfqq_close(cfqd, cur_cfqq);
    1761           0 :         if (!cfqq)
    1762           0 :                 return NULL;
    1763             : 
    1764             :         /* If new queue belongs to different cfq_group, don't choose it */
    1765           0 :         if (cur_cfqq->cfqg != cfqq->cfqg)
    1766           0 :                 return NULL;
    1767             : 
    1768             :         /*
    1769             :          * It only makes sense to merge sync queues.
    1770             :          */
    1771           0 :         if (!cfq_cfqq_sync(cfqq))
    1772           0 :                 return NULL;
    1773           0 :         if (CFQQ_SEEKY(cfqq))
    1774           0 :                 return NULL;
    1775             : 
    1776             :         /*
    1777             :          * Do not merge queues of different priority classes
    1778             :          */
    1779           0 :         if (cfq_class_rt(cfqq) != cfq_class_rt(cur_cfqq))
    1780           0 :                 return NULL;
    1781             : 
    1782           0 :         return cfqq;
    1783             : }
    1784             : 
    1785             : /*
    1786             :  * Determine whether we should enforce idle window for this queue.
    1787             :  */
    1788             : 
    1789             : static bool cfq_should_idle(struct cfq_data *cfqd, struct cfq_queue *cfqq)
    1790             : {
    1791           0 :         enum wl_prio_t prio = cfqq_prio(cfqq);
    1792           0 :         struct cfq_rb_root *service_tree = cfqq->service_tree;
    1793           0 : 
    1794           0 :         BUG_ON(!service_tree);
    1795           0 :         BUG_ON(!service_tree->count);
    1796           0 : 
    1797           0 :         /* We never do for idle class queues. */
    1798           0 :         if (prio == IDLE_WORKLOAD)
    1799           0 :                 return false;
    1800             : 
    1801             :         /* We do for queues that were marked with idle window flag. */
    1802           0 :         if (cfq_cfqq_idle_window(cfqq) &&
    1803             :            !(blk_queue_nonrot(cfqd->queue) && cfqd->hw_tag))
    1804           0 :                 return true;
    1805             : 
    1806             :         /*
    1807             :          * Otherwise, we do only if they are the last ones
    1808             :          * in their service tree.
    1809             :          */
    1810           0 :         return service_tree->count == 1 && cfq_cfqq_sync(cfqq);
    1811             : }
    1812             : 
    1813             : static void cfq_arm_slice_timer(struct cfq_data *cfqd)
    1814             : {
    1815           0 :         struct cfq_queue *cfqq = cfqd->active_queue;
    1816           0 :         struct cfq_io_context *cic;
    1817           0 :         unsigned long sl;
    1818           0 : 
    1819           0 :         /*
    1820           0 :          * SSD device without seek penalty, disable idling. But only do so
    1821           0 :          * for devices that support queuing, otherwise we still have a problem
    1822           0 :          * with sync vs async workloads.
    1823           0 :          */
    1824           0 :         if (blk_queue_nonrot(cfqd->queue) && cfqd->hw_tag)
    1825           0 :                 return;
    1826           0 : 
    1827           0 :         WARN_ON(!RB_EMPTY_ROOT(&cfqq->sort_list));
    1828           0 :         WARN_ON(cfq_cfqq_slice_new(cfqq));
    1829             : 
    1830             :         /*
    1831             :          * idle is disabled, either manually or by past process history
    1832             :          */
    1833           0 :         if (!cfqd->cfq_slice_idle || !cfq_should_idle(cfqd, cfqq))
    1834           0 :                 return;
    1835             : 
    1836             :         /*
    1837             :          * still active requests from this queue, don't idle
    1838             :          */
    1839           0 :         if (cfqq->dispatched)
    1840           0 :                 return;
    1841             : 
    1842             :         /*
    1843             :          * task has exited, don't wait
    1844             :          */
    1845           0 :         cic = cfqd->active_cic;
    1846           0 :         if (!cic || !atomic_read(&cic->ioc->nr_tasks))
    1847           0 :                 return;
    1848             : 
    1849             :         /*
    1850             :          * If our average think time is larger than the remaining time
    1851             :          * slice, then don't idle. This avoids overrunning the allotted
    1852             :          * time slice.
    1853             :          */
    1854           0 :         if (sample_valid(cic->ttime_samples) &&
    1855             :             (cfqq->slice_end - jiffies < cic->ttime_mean))
    1856           0 :                 return;
    1857             : 
    1858           0 :         cfq_mark_cfqq_wait_request(cfqq);
    1859             : 
    1860           0 :         sl = cfqd->cfq_slice_idle;
    1861             : 
    1862           0 :         mod_timer(&cfqd->idle_slice_timer, jiffies + sl);
    1863           0 :         cfq_log_cfqq(cfqd, cfqq, "arm_idle: %lu", sl);
    1864             : }
    1865             : 
    1866             : /*
    1867             :  * Move request from internal lists to the request queue dispatch list.
    1868             :  */
    1869             : static void cfq_dispatch_insert(struct request_queue *q, struct request *rq)
    1870             : {
    1871           0 :         struct cfq_data *cfqd = q->elevator->elevator_data;
    1872           0 :         struct cfq_queue *cfqq = RQ_CFQQ(rq);
    1873           0 : 
    1874           0 :         cfq_log_cfqq(cfqd, cfqq, "dispatch_insert");
    1875             : 
    1876           0 :         cfqq->next_rq = cfq_find_next_rq(cfqd, cfqq, rq);
    1877           0 :         cfq_remove_request(rq);
    1878           0 :         cfqq->dispatched++;
    1879           0 :         elv_dispatch_sort(q, rq);
    1880             : 
    1881           0 :         if (cfq_cfqq_sync(cfqq))
    1882           0 :                 cfqd->sync_flight++;
    1883           0 :         cfqq->nr_sectors += blk_rq_sectors(rq);
    1884           0 : }
    1885             : 
    1886             : /*
    1887             :  * return expired entry, or NULL to just start from scratch in rbtree
    1888             :  */
    1889             : static struct request *cfq_check_fifo(struct cfq_queue *cfqq)
    1890             : {
    1891           0 :         struct request *rq = NULL;
    1892           0 : 
    1893           0 :         if (cfq_cfqq_fifo_expire(cfqq))
    1894           0 :                 return NULL;
    1895             : 
    1896           0 :         cfq_mark_cfqq_fifo_expire(cfqq);
    1897             : 
    1898           0 :         if (list_empty(&cfqq->fifo))
    1899           0 :                 return NULL;
    1900             : 
    1901           0 :         rq = rq_entry_fifo(cfqq->fifo.next);
    1902           0 :         if (time_before(jiffies, rq_fifo_time(rq)))
    1903           0 :                 rq = NULL;
    1904             : 
    1905             :         cfq_log_cfqq(cfqq->cfqd, cfqq, "fifo=%p", rq);
    1906           0 :         return rq;
    1907             : }
    1908             : 
    1909             : static inline int
    1910             : cfq_prio_to_maxrq(struct cfq_data *cfqd, struct cfq_queue *cfqq)
    1911             : {
    1912           0 :         const int base_rq = cfqd->cfq_slice_async_rq;
    1913           0 : 
    1914           0 :         WARN_ON(cfqq->ioprio >= IOPRIO_BE_NR);
    1915             : 
    1916           0 :         return 2 * (base_rq + base_rq * (CFQ_PRIO_LISTS - 1 - cfqq->ioprio));
    1917             : }
    1918             : 
    1919             : /*
    1920             :  * Must be called with the queue_lock held.
    1921             :  */
    1922             : static int cfqq_process_refs(struct cfq_queue *cfqq)
    1923             : {
    1924           0 :         int process_refs, io_refs;
    1925           0 : 
    1926           0 :         io_refs = cfqq->allocated[READ] + cfqq->allocated[WRITE];
    1927           0 :         process_refs = atomic_read(&cfqq->ref) - io_refs;
    1928           0 :         BUG_ON(process_refs < 0);
    1929           0 :         return process_refs;
    1930             : }
    1931             : 
    1932             : static void cfq_setup_merge(struct cfq_queue *cfqq, struct cfq_queue *new_cfqq)
    1933             : {
    1934           0 :         int process_refs, new_process_refs;
    1935           0 :         struct cfq_queue *__cfqq;
    1936           0 : 
    1937           0 :         /*
    1938             :          * If there are no process references on the new_cfqq, then it is
    1939             :          * unsafe to follow the ->new_cfqq chain as other cfqq's in the
    1940             :          * chain may have dropped their last reference (not just their
    1941             :          * last process reference).
    1942             :          */
    1943           0 :         if (!cfqq_process_refs(new_cfqq))
    1944           0 :                 return;
    1945             : 
    1946             :         /* Avoid a circular list and skip interim queue merges */
    1947           0 :         while ((__cfqq = new_cfqq->new_cfqq)) {
    1948           0 :                 if (__cfqq == cfqq)
    1949           0 :                         return;
    1950           0 :                 new_cfqq = __cfqq;
    1951           0 :         }
    1952             : 
    1953           0 :         process_refs = cfqq_process_refs(cfqq);
    1954           0 :         new_process_refs = cfqq_process_refs(new_cfqq);
    1955             :         /*
    1956             :          * If the process for the cfqq has gone away, there is no
    1957             :          * sense in merging the queues.
    1958             :          */
    1959           0 :         if (process_refs == 0 || new_process_refs == 0)
    1960           0 :                 return;
    1961             : 
    1962             :         /*
    1963             :          * Merge in the direction of the lesser amount of work.
    1964             :          */
    1965           0 :         if (new_process_refs >= process_refs) {
    1966           0 :                 cfqq->new_cfqq = new_cfqq;
    1967           0 :                 atomic_add(process_refs, &new_cfqq->ref);
    1968             :         } else {
    1969           0 :                 new_cfqq->new_cfqq = cfqq;
    1970           0 :                 atomic_add(new_process_refs, &cfqq->ref);
    1971             :         }
    1972           0 : }
    1973             : 
    1974             : static enum wl_type_t cfq_choose_wl(struct cfq_data *cfqd,
    1975             :                                 struct cfq_group *cfqg, enum wl_prio_t prio)
    1976             : {
    1977           0 :         struct cfq_queue *queue;
    1978           0 :         int i;
    1979           0 :         bool key_valid = false;
    1980           0 :         unsigned long lowest_key = 0;
    1981           0 :         enum wl_type_t cur_best = SYNC_NOIDLE_WORKLOAD;
    1982           0 : 
    1983           0 :         for (i = 0; i <= SYNC_WORKLOAD; ++i) {
    1984           0 :                 /* select the one with lowest rb_key */
    1985           0 :                 queue = cfq_rb_first(service_tree_for(cfqg, prio, i));
    1986             :                 if (queue &&
    1987           0 :                     (!key_valid || time_before(queue->rb_key, lowest_key))) {
    1988           0 :                         lowest_key = queue->rb_key;
    1989           0 :                         cur_best = i;
    1990           0 :                         key_valid = true;
    1991             :                 }
    1992             :         }
    1993             : 
    1994           0 :         return cur_best;
    1995             : }
    1996             : 
    1997             : static void choose_service_tree(struct cfq_data *cfqd, struct cfq_group *cfqg)
    1998             : {
    1999           0 :         unsigned slice;
    2000           0 :         unsigned count;
    2001           0 :         struct cfq_rb_root *st;
    2002           0 :         unsigned group_slice;
    2003           0 : 
    2004           0 :         if (!cfqg) {
    2005           0 :                 cfqd->serving_prio = IDLE_WORKLOAD;
    2006           0 :                 cfqd->workload_expires = jiffies + 1;
    2007           0 :                 return;
    2008           0 :         }
    2009           0 : 
    2010           0 :         /* Choose next priority. RT > BE > IDLE */
    2011           0 :         if (cfq_group_busy_queues_wl(RT_WORKLOAD, cfqd, cfqg))
    2012           0 :                 cfqd->serving_prio = RT_WORKLOAD;
    2013           0 :         else if (cfq_group_busy_queues_wl(BE_WORKLOAD, cfqd, cfqg))
    2014           0 :                 cfqd->serving_prio = BE_WORKLOAD;
    2015           0 :         else {
    2016           0 :                 cfqd->serving_prio = IDLE_WORKLOAD;
    2017           0 :                 cfqd->workload_expires = jiffies + 1;
    2018           0 :                 return;
    2019             :         }
    2020             : 
    2021             :         /*
    2022             :          * For RT and BE, we have to choose also the type
    2023             :          * (SYNC, SYNC_NOIDLE, ASYNC), and to compute a workload
    2024             :          * expiration time
    2025             :          */
    2026           0 :         st = service_tree_for(cfqg, cfqd->serving_prio, cfqd->serving_type);
    2027           0 :         count = st->count;
    2028             : 
    2029             :         /*
    2030             :          * check workload expiration, and that we still have other queues ready
    2031             :          */
    2032           0 :         if (count && !time_after(jiffies, cfqd->workload_expires))
    2033           0 :                 return;
    2034             : 
    2035             :         /* otherwise select new workload type */
    2036           0 :         cfqd->serving_type =
    2037             :                 cfq_choose_wl(cfqd, cfqg, cfqd->serving_prio);
    2038           0 :         st = service_tree_for(cfqg, cfqd->serving_prio, cfqd->serving_type);
    2039           0 :         count = st->count;
    2040             : 
    2041             :         /*
    2042             :          * the workload slice is computed as a fraction of target latency
    2043             :          * proportional to the number of queues in that workload, over
    2044             :          * all the queues in the same priority class
    2045             :          */
    2046           0 :         group_slice = cfq_group_slice(cfqd, cfqg);
    2047             : 
    2048             :         slice = group_slice * count /
    2049           0 :                 max_t(unsigned, cfqg->busy_queues_avg[cfqd->serving_prio],
    2050             :                       cfq_group_busy_queues_wl(cfqd->serving_prio, cfqd, cfqg));
    2051             : 
    2052           0 :         if (cfqd->serving_type == ASYNC_WORKLOAD) {
    2053             :                 unsigned int tmp;
    2054             : 
    2055             :                 /*
    2056             :                  * Async queues are currently system wide. Just taking
    2057             :                  * proportion of queues with-in same group will lead to higher
    2058             :                  * async ratio system wide as generally root group is going
    2059             :                  * to have higher weight. A more accurate thing would be to
    2060             :                  * calculate system wide asnc/sync ratio.
    2061             :                  */
    2062           0 :                 tmp = cfq_target_latency * cfqg_busy_async_queues(cfqd, cfqg);
    2063           0 :                 tmp = tmp/cfqd->busy_queues;
    2064           0 :                 slice = min_t(unsigned, slice, tmp);
    2065             : 
    2066             :                 /* async workload slice is scaled down according to
    2067             :                  * the sync/async slice ratio. */
    2068           0 :                 slice = slice * cfqd->cfq_slice[0] / cfqd->cfq_slice[1];
    2069             :         } else
    2070             :                 /* sync workload slice is at least 2 * cfq_slice_idle */
    2071           0 :                 slice = max(slice, 2 * cfqd->cfq_slice_idle);
    2072             : 
    2073           0 :         slice = max_t(unsigned, slice, CFQ_MIN_TT);
    2074           0 :         cfqd->workload_expires = jiffies + slice;
    2075           0 :         cfqd->noidle_tree_requires_idle = false;
    2076           0 : }
    2077             : 
    2078             : static struct cfq_group *cfq_get_next_cfqg(struct cfq_data *cfqd)
    2079             : {
    2080           0 :         struct cfq_rb_root *st = &cfqd->grp_service_tree;
    2081           0 :         struct cfq_group *cfqg;
    2082             : 
    2083           0 :         if (RB_EMPTY_ROOT(&st->rb))
    2084           0 :                 return NULL;
    2085           0 :         cfqg = cfq_rb_first_group(st);
    2086           0 :         st->active = &cfqg->rb_node;
    2087           0 :         update_min_vdisktime(st);
    2088           0 :         return cfqg;
    2089             : }
    2090             : 
    2091             : static void cfq_choose_cfqg(struct cfq_data *cfqd)
    2092             : {
    2093           0 :         struct cfq_group *cfqg = cfq_get_next_cfqg(cfqd);
    2094           0 : 
    2095           0 :         cfqd->serving_group = cfqg;
    2096             : 
    2097             :         /* Restore the workload type data */
    2098           0 :         if (cfqg->saved_workload_slice) {
    2099           0 :                 cfqd->workload_expires = jiffies + cfqg->saved_workload_slice;
    2100           0 :                 cfqd->serving_type = cfqg->saved_workload;
    2101           0 :                 cfqd->serving_prio = cfqg->saved_serving_prio;
    2102             :         } else
    2103           0 :                 cfqd->workload_expires = jiffies - 1;
    2104             : 
    2105           0 :         choose_service_tree(cfqd, cfqg);
    2106           0 : }
    2107             : 
    2108             : /*
    2109             :  * Select a queue for service. If we have a current active queue,
    2110             :  * check whether to continue servicing it, or retrieve and set a new one.
    2111             :  */
    2112             : static struct cfq_queue *cfq_select_queue(struct cfq_data *cfqd)
    2113             : {
    2114           0 :         struct cfq_queue *cfqq, *new_cfqq = NULL;
    2115           0 : 
    2116           0 :         cfqq = cfqd->active_queue;
    2117           0 :         if (!cfqq)
    2118           0 :                 goto new_queue;
    2119           0 : 
    2120           0 :         if (!cfqd->rq_queued)
    2121           0 :                 return NULL;
    2122             : 
    2123             :         /*
    2124             :          * We were waiting for group to get backlogged. Expire the queue
    2125             :          */
    2126           0 :         if (cfq_cfqq_wait_busy(cfqq) && !RB_EMPTY_ROOT(&cfqq->sort_list))
    2127           0 :                 goto expire;
    2128             : 
    2129             :         /*
    2130             :          * The active queue has run out of time, expire it and select new.
    2131             :          */
    2132           0 :         if (cfq_slice_used(cfqq) && !cfq_cfqq_must_dispatch(cfqq)) {
    2133             :                 /*
    2134             :                  * If slice had not expired at the completion of last request
    2135             :                  * we might not have turned on wait_busy flag. Don't expire
    2136             :                  * the queue yet. Allow the group to get backlogged.
    2137             :                  *
    2138             :                  * The very fact that we have used the slice, that means we
    2139             :                  * have been idling all along on this queue and it should be
    2140             :                  * ok to wait for this request to complete.
    2141             :                  */
    2142           0 :                 if (cfqq->cfqg->nr_cfqq == 1 && RB_EMPTY_ROOT(&cfqq->sort_list)
    2143             :                     && cfqq->dispatched && cfq_should_idle(cfqd, cfqq)) {
    2144           0 :                         cfqq = NULL;
    2145           0 :                         goto keep_queue;
    2146             :                 } else
    2147           0 :                         goto expire;
    2148             :         }
    2149             : 
    2150             :         /*
    2151             :          * The active queue has requests and isn't expired, allow it to
    2152             :          * dispatch.
    2153             :          */
    2154           0 :         if (!RB_EMPTY_ROOT(&cfqq->sort_list))
    2155           0 :                 goto keep_queue;
    2156             : 
    2157             :         /*
    2158             :          * If another queue has a request waiting within our mean seek
    2159             :          * distance, let it run.  The expire code will check for close
    2160             :          * cooperators and put the close queue at the front of the service
    2161             :          * tree.  If possible, merge the expiring queue with the new cfqq.
    2162             :          */
    2163           0 :         new_cfqq = cfq_close_cooperator(cfqd, cfqq);
    2164           0 :         if (new_cfqq) {
    2165           0 :                 if (!cfqq->new_cfqq)
    2166           0 :                         cfq_setup_merge(cfqq, new_cfqq);
    2167           0 :                 goto expire;
    2168             :         }
    2169             : 
    2170             :         /*
    2171             :          * No requests pending. If the active queue still has requests in
    2172             :          * flight or is idling for a new request, allow either of these
    2173             :          * conditions to happen (or time out) before selecting a new queue.
    2174             :          */
    2175           0 :         if (timer_pending(&cfqd->idle_slice_timer) ||
    2176             :             (cfqq->dispatched && cfq_should_idle(cfqd, cfqq))) {
    2177           0 :                 cfqq = NULL;
    2178           0 :                 goto keep_queue;
    2179             :         }
    2180             : 
    2181             : expire:
    2182           0 :         cfq_slice_expired(cfqd, 0);
    2183             : new_queue:
    2184           0 :         /*
    2185           0 :          * Current queue expired. Check if we have to switch to a new
    2186             :          * service tree
    2187             :          */
    2188           0 :         if (!new_cfqq)
    2189           0 :                 cfq_choose_cfqg(cfqd);
    2190             : 
    2191           0 :         cfqq = cfq_set_active_queue(cfqd, new_cfqq);
    2192             : keep_queue:
    2193           0 :         return cfqq;
    2194             : }
    2195             : 
    2196             : static int __cfq_forced_dispatch_cfqq(struct cfq_queue *cfqq)
    2197             : {
    2198           0 :         int dispatched = 0;
    2199           0 : 
    2200           0 :         while (cfqq->next_rq) {
    2201           0 :                 cfq_dispatch_insert(cfqq->cfqd->queue, cfqq->next_rq);
    2202           0 :                 dispatched++;
    2203             :         }
    2204           0 : 
    2205           0 :         BUG_ON(!list_empty(&cfqq->fifo));
    2206             : 
    2207             :         /* By default cfqq is not expired if it is empty. Do it explicitly */
    2208           0 :         __cfq_slice_expired(cfqq->cfqd, cfqq, 0);
    2209           0 :         return dispatched;
    2210             : }
    2211             : 
    2212             : /*
    2213             :  * Drain our current requests. Used for barriers and when switching
    2214             :  * io schedulers on-the-fly.
    2215             :  */
    2216             : static int cfq_forced_dispatch(struct cfq_data *cfqd)
    2217             : {
    2218           0 :         struct cfq_queue *cfqq;
    2219           0 :         int dispatched = 0;
    2220           0 : 
    2221           0 :         while ((cfqq = cfq_get_next_queue_forced(cfqd)) != NULL)
    2222           0 :                 dispatched += __cfq_forced_dispatch_cfqq(cfqq);
    2223           0 : 
    2224           0 :         cfq_slice_expired(cfqd, 0);
    2225           0 :         BUG_ON(cfqd->busy_queues);
    2226             : 
    2227             :         cfq_log(cfqd, "forced_dispatch=%d", dispatched);
    2228           0 :         return dispatched;
    2229             : }
    2230             : 
    2231             : static bool cfq_may_dispatch(struct cfq_data *cfqd, struct cfq_queue *cfqq)
    2232             : {
    2233           0 :         unsigned int max_dispatch;
    2234           0 : 
    2235           0 :         /*
    2236           0 :          * Drain async requests before we start sync IO
    2237           0 :          */
    2238           0 :         if (cfq_should_idle(cfqd, cfqq) && cfqd->rq_in_driver[BLK_RW_ASYNC])
    2239           0 :                 return false;
    2240             : 
    2241             :         /*
    2242             :          * If this is an async queue and we have sync IO in flight, let it wait
    2243             :          */
    2244           0 :         if (cfqd->sync_flight && !cfq_cfqq_sync(cfqq))
    2245           0 :                 return false;
    2246             : 
    2247           0 :         max_dispatch = cfqd->cfq_quantum;
    2248           0 :         if (cfq_class_idle(cfqq))
    2249           0 :                 max_dispatch = 1;
    2250             : 
    2251             :         /*
    2252             :          * Does this cfqq already have too much IO in flight?
    2253             :          */
    2254           0 :         if (cfqq->dispatched >= max_dispatch) {
    2255             :                 /*
    2256             :                  * idle queue must always only have a single IO in flight
    2257             :                  */
    2258           0 :                 if (cfq_class_idle(cfqq))
    2259           0 :                         return false;
    2260             : 
    2261             :                 /*
    2262             :                  * We have other queues, don't allow more IO from this one
    2263             :                  */
    2264           0 :                 if (cfqd->busy_queues > 1)
    2265           0 :                         return false;
    2266             : 
    2267             :                 /*
    2268             :                  * Sole queue user, no limit
    2269             :                  */
    2270           0 :                 max_dispatch = -1;
    2271             :         }
    2272             : 
    2273             :         /*
    2274             :          * Async queues must wait a bit before being allowed dispatch.
    2275             :          * We also ramp up the dispatch depth gradually for async IO,
    2276             :          * based on the last sync IO we serviced
    2277             :          */
    2278           0 :         if (!cfq_cfqq_sync(cfqq) && cfqd->cfq_latency) {
    2279           0 :                 unsigned long last_sync = jiffies - cfqd->last_delayed_sync;
    2280             :                 unsigned int depth;
    2281             : 
    2282           0 :                 depth = last_sync / cfqd->cfq_slice[1];
    2283           0 :                 if (!depth && !cfqq->dispatched)
    2284           0 :                         depth = 1;
    2285           0 :                 if (depth < max_dispatch)
    2286           0 :                         max_dispatch = depth;
    2287             :         }
    2288             : 
    2289             :         /*
    2290             :          * If we're below the current max, allow a dispatch
    2291             :          */
    2292           0 :         return cfqq->dispatched < max_dispatch;
    2293             : }
    2294             : 
    2295             : /*
    2296             :  * Dispatch a request from cfqq, moving them to the request queue
    2297             :  * dispatch list.
    2298             :  */
    2299             : static bool cfq_dispatch_request(struct cfq_data *cfqd, struct cfq_queue *cfqq)
    2300             : {
    2301           0 :         struct request *rq;
    2302           0 : 
    2303           0 :         BUG_ON(RB_EMPTY_ROOT(&cfqq->sort_list));
    2304           0 : 
    2305           0 :         if (!cfq_may_dispatch(cfqd, cfqq))
    2306           0 :                 return false;
    2307             : 
    2308             :         /*
    2309             :          * follow expired path, else get first next available
    2310             :          */
    2311           0 :         rq = cfq_check_fifo(cfqq);
    2312           0 :         if (!rq)
    2313           0 :                 rq = cfqq->next_rq;
    2314             : 
    2315             :         /*
    2316             :          * insert request into driver dispatch list
    2317             :          */
    2318           0 :         cfq_dispatch_insert(cfqd->queue, rq);
    2319             : 
    2320           0 :         if (!cfqd->active_cic) {
    2321           0 :                 struct cfq_io_context *cic = RQ_CIC(rq);
    2322             : 
    2323           0 :                 atomic_long_inc(&cic->ioc->refcount);
    2324           0 :                 cfqd->active_cic = cic;
    2325             :         }
    2326             : 
    2327           0 :         return true;
    2328             : }
    2329             : 
    2330             : /*
    2331             :  * Find the cfqq that we need to service and move a request from that to the
    2332             :  * dispatch list
    2333             :  */
    2334             : static int cfq_dispatch_requests(struct request_queue *q, int force)
    2335             : {
    2336           0 :         struct cfq_data *cfqd = q->elevator->elevator_data;
    2337           0 :         struct cfq_queue *cfqq;
    2338           0 : 
    2339           0 :         if (!cfqd->busy_queues)
    2340           0 :                 return 0;
    2341           0 : 
    2342           0 :         if (unlikely(force))
    2343           0 :                 return cfq_forced_dispatch(cfqd);
    2344             : 
    2345           0 :         cfqq = cfq_select_queue(cfqd);
    2346           0 :         if (!cfqq)
    2347           0 :                 return 0;
    2348             : 
    2349             :         /*
    2350             :          * Dispatch a request from this cfqq, if it is allowed
    2351             :          */
    2352           0 :         if (!cfq_dispatch_request(cfqd, cfqq))
    2353           0 :                 return 0;
    2354             : 
    2355           0 :         cfqq->slice_dispatch++;
    2356           0 :         cfq_clear_cfqq_must_dispatch(cfqq);
    2357             : 
    2358             :         /*
    2359             :          * expire an async queue immediately if it has used up its slice. idle
    2360             :          * queue always expire after 1 dispatch round.
    2361             :          */
    2362           0 :         if (cfqd->busy_queues > 1 && ((!cfq_cfqq_sync(cfqq) &&
    2363             :             cfqq->slice_dispatch >= cfq_prio_to_maxrq(cfqd, cfqq)) ||
    2364             :             cfq_class_idle(cfqq))) {
    2365           0 :                 cfqq->slice_end = jiffies + 1;
    2366           0 :                 cfq_slice_expired(cfqd, 0);
    2367             :         }
    2368             : 
    2369             :         cfq_log_cfqq(cfqd, cfqq, "dispatched a request");
    2370           0 :         return 1;
    2371             : }
    2372             : 
    2373             : /*
    2374             :  * task holds one reference to the queue, dropped when task exits. each rq
    2375             :  * in-flight on this queue also holds a reference, dropped when rq is freed.
    2376             :  *
    2377             :  * Each cfq queue took a reference on the parent group. Drop it now.
    2378             :  * queue lock must be held here.
    2379             :  */
    2380             : static void cfq_put_queue(struct cfq_queue *cfqq)
    2381             : {
    2382           0 :         struct cfq_data *cfqd = cfqq->cfqd;
    2383           0 :         struct cfq_group *cfqg, *orig_cfqg;
    2384           0 : 
    2385           0 :         BUG_ON(atomic_read(&cfqq->ref) <= 0);
    2386           0 : 
    2387           0 :         if (!atomic_dec_and_test(&cfqq->ref))
    2388           0 :                 return;
    2389           0 : 
    2390           0 :         cfq_log_cfqq(cfqd, cfqq, "put_queue");
    2391           0 :         BUG_ON(rb_first(&cfqq->sort_list));
    2392           0 :         BUG_ON(cfqq->allocated[READ] + cfqq->allocated[WRITE]);
    2393           0 :         cfqg = cfqq->cfqg;
    2394           0 :         orig_cfqg = cfqq->orig_cfqg;
    2395             : 
    2396           0 :         if (unlikely(cfqd->active_queue == cfqq)) {
    2397           0 :                 __cfq_slice_expired(cfqd, cfqq, 0);
    2398           0 :                 cfq_schedule_dispatch(cfqd);
    2399             :         }
    2400             : 
    2401           0 :         BUG_ON(cfq_cfqq_on_rr(cfqq));
    2402           0 :         kmem_cache_free(cfq_pool, cfqq);
    2403           0 :         cfq_put_cfqg(cfqg);
    2404           0 :         if (orig_cfqg)
    2405           0 :                 cfq_put_cfqg(orig_cfqg);
    2406           0 : }
    2407             : 
    2408             : /*
    2409             :  * Must always be called with the rcu_read_lock() held
    2410             :  */
    2411             : static void
    2412             : __call_for_each_cic(struct io_context *ioc,
    2413             :                     void (*func)(struct io_context *, struct cfq_io_context *))
    2414             : {
    2415          11 :         struct cfq_io_context *cic;
    2416          11 :         struct hlist_node *n;
    2417          11 : 
    2418         132 :         hlist_for_each_entry_rcu(cic, n, &ioc->cic_list, cic_list)
    2419          66 :                 func(ioc, cic);
    2420          22 : }
    2421             : 
    2422             : /*
    2423             :  * Call func for each cic attached to this ioc.
    2424             :  */
    2425          11 : static void
    2426             : call_for_each_cic(struct io_context *ioc,
    2427             :                   void (*func)(struct io_context *, struct cfq_io_context *))
    2428             : {
    2429           0 :         rcu_read_lock();
    2430           0 :         __call_for_each_cic(ioc, func);
    2431           0 :         rcu_read_unlock();
    2432           0 : }
    2433             : 
    2434             : static void cfq_cic_free_rcu(struct rcu_head *head)
    2435             : {
    2436           0 :         struct cfq_io_context *cic;
    2437           0 : 
    2438           0 :         cic = container_of(head, struct cfq_io_context, rcu_head);
    2439           0 : 
    2440           0 :         kmem_cache_free(cfq_ioc_pool, cic);
    2441           0 :         elv_ioc_count_dec(cfq_ioc_count);
    2442           0 : 
    2443           0 :         if (ioc_gone) {
    2444             :                 /*
    2445             :                  * CFQ scheduler is exiting, grab exit lock and check
    2446             :                  * the pending io context count. If it hits zero,
    2447             :                  * complete ioc_gone and set it back to NULL
    2448             :                  */
    2449           0 :                 spin_lock(&ioc_gone_lock);
    2450           0 :                 if (ioc_gone && !elv_ioc_count_read(cfq_ioc_count)) {
    2451           0 :                         complete(ioc_gone);
    2452           0 :                         ioc_gone = NULL;
    2453             :                 }
    2454           0 :                 spin_unlock(&ioc_gone_lock);
    2455             :         }
    2456           0 : }
    2457             : 
    2458             : static void cfq_cic_free(struct cfq_io_context *cic)
    2459             : {
    2460          11 :         call_rcu(&cic->rcu_head, cfq_cic_free_rcu);
    2461          11 : }
    2462             : 
    2463             : static void cic_free_func(struct io_context *ioc, struct cfq_io_context *cic)
    2464             : {
    2465          11 :         unsigned long flags;
    2466          11 : 
    2467          77 :         BUG_ON(!cic->dead_key);
    2468             : 
    2469          33 :         spin_lock_irqsave(&ioc->lock, flags);
    2470          11 :         radix_tree_delete(&ioc->radix_root, cic->dead_key);
    2471          22 :         hlist_del_rcu(&cic->cic_list);
    2472          22 :         spin_unlock_irqrestore(&ioc->lock, flags);
    2473             : 
    2474          22 :         cfq_cic_free(cic);
    2475          11 : }
    2476             : 
    2477             : /*
    2478             :  * Must be called with rcu_read_lock() held or preemption otherwise disabled.
    2479             :  * Only two callers of this - ->dtor() which is called with the rcu_read_lock(),
    2480             :  * and ->trim() which is called with the task lock held
    2481             :  */
    2482             : static void cfq_free_io_context(struct io_context *ioc)
    2483             : {
    2484             :         /*
    2485             :          * ioc->refcount is zero here, or we are called from elv_unregister(),
    2486             :          * so no more cic's are allowed to be linked into this ioc.  So it
    2487             :          * should be ok to iterate over the known list, we will see all cic's
    2488             :          * since no new ones are added.
    2489             :          */
    2490          22 :         __call_for_each_cic(ioc, cic_free_func);
    2491          11 : }
    2492             : 
    2493             : static void cfq_put_cooperator(struct cfq_queue *cfqq)
    2494             : {
    2495           0 :         struct cfq_queue *__cfqq, *next;
    2496           0 : 
    2497           0 :         /*
    2498           0 :          * If this queue was scheduled to merge with another queue, be
    2499             :          * sure to drop the reference taken on that queue (and others in
    2500             :          * the merge chain).  See cfq_setup_merge and cfq_merge_cfqqs.
    2501             :          */
    2502           0 :         __cfqq = cfqq->new_cfqq;
    2503           0 :         while (__cfqq) {
    2504           0 :                 if (__cfqq == cfqq) {
    2505           0 :                         WARN(1, "cfqq->new_cfqq loop detected\n");
    2506           0 :                         break;
    2507             :                 }
    2508           0 :                 next = __cfqq->new_cfqq;
    2509           0 :                 cfq_put_queue(__cfqq);
    2510           0 :                 __cfqq = next;
    2511             :         }
    2512           0 : }
    2513             : 
    2514             : static void cfq_exit_cfqq(struct cfq_data *cfqd, struct cfq_queue *cfqq)
    2515             : {
    2516           0 :         if (unlikely(cfqq == cfqd->active_queue)) {
    2517           0 :                 __cfq_slice_expired(cfqd, cfqq, 0);
    2518           0 :                 cfq_schedule_dispatch(cfqd);
    2519             :         }
    2520             : 
    2521           0 :         cfq_put_cooperator(cfqq);
    2522             : 
    2523           0 :         cfq_put_queue(cfqq);
    2524           0 : }
    2525             : 
    2526             : static void __cfq_exit_single_io_context(struct cfq_data *cfqd,
    2527             :                                          struct cfq_io_context *cic)
    2528           0 : {
    2529           0 :         struct io_context *ioc = cic->ioc;
    2530             : 
    2531           0 :         list_del_init(&cic->queue_list);
    2532             : 
    2533             :         /*
    2534             :          * Make sure key == NULL is seen for dead queues
    2535             :          */
    2536           0 :         smp_wmb();
    2537           0 :         cic->dead_key = (unsigned long) cic->key;
    2538           0 :         cic->key = NULL;
    2539             : 
    2540           0 :         rcu_read_lock();
    2541           0 :         if (rcu_dereference(ioc->ioc_data) == cic) {
    2542           0 :                 rcu_read_unlock();
    2543           0 :                 spin_lock(&ioc->lock);
    2544           0 :                 rcu_assign_pointer(ioc->ioc_data, NULL);
    2545           0 :                 spin_unlock(&ioc->lock);
    2546             :         } else
    2547           0 :                 rcu_read_unlock();
    2548             : 
    2549           0 :         if (cic->cfqq[BLK_RW_ASYNC]) {
    2550           0 :                 cfq_exit_cfqq(cfqd, cic->cfqq[BLK_RW_ASYNC]);
    2551           0 :                 cic->cfqq[BLK_RW_ASYNC] = NULL;
    2552             :         }
    2553             : 
    2554           0 :         if (cic->cfqq[BLK_RW_SYNC]) {
    2555           0 :                 cfq_exit_cfqq(cfqd, cic->cfqq[BLK_RW_SYNC]);
    2556           0 :                 cic->cfqq[BLK_RW_SYNC] = NULL;
    2557             :         }
    2558           0 : }
    2559             : 
    2560             : static void cfq_exit_single_io_context(struct io_context *ioc,
    2561             :                                        struct cfq_io_context *cic)
    2562           0 : {
    2563           0 :         struct cfq_data *cfqd = cic->key;
    2564           0 : 
    2565           0 :         if (cfqd) {
    2566           0 :                 struct request_queue *q = cfqd->queue;
    2567             :                 unsigned long flags;
    2568             : 
    2569           0 :                 spin_lock_irqsave(q->queue_lock, flags);
    2570             : 
    2571             :                 /*
    2572             :                  * Ensure we get a fresh copy of the ->key to prevent
    2573             :                  * race between exiting task and queue
    2574             :                  */
    2575             :                 smp_read_barrier_depends();
    2576           0 :                 if (cic->key)
    2577           0 :                         __cfq_exit_single_io_context(cfqd, cic);
    2578             : 
    2579           0 :                 spin_unlock_irqrestore(q->queue_lock, flags);
    2580             :         }
    2581           0 : }
    2582             : 
    2583             : /*
    2584             :  * The process that ioc belongs to has exited, we need to clean up
    2585             :  * and put the internal structures we have that belongs to that process.
    2586             :  */
    2587             : static void cfq_exit_io_context(struct io_context *ioc)
    2588             : {
    2589           0 :         call_for_each_cic(ioc, cfq_exit_single_io_context);
    2590           0 : }
    2591             : 
    2592             : static struct cfq_io_context *
    2593             : cfq_alloc_io_context(struct cfq_data *cfqd, gfp_t gfp_mask)
    2594             : {
    2595           0 :         struct cfq_io_context *cic;
    2596           0 : 
    2597           0 :         cic = kmem_cache_alloc_node(cfq_ioc_pool, gfp_mask | __GFP_ZERO,
    2598           0 :                                                         cfqd->queue->node);
    2599           0 :         if (cic) {
    2600           0 :                 cic->last_end_request = jiffies;
    2601           0 :                 INIT_LIST_HEAD(&cic->queue_list);
    2602           0 :                 INIT_HLIST_NODE(&cic->cic_list);
    2603           0 :                 cic->dtor = cfq_free_io_context;
    2604           0 :                 cic->exit = cfq_exit_io_context;
    2605           0 :                 elv_ioc_count_inc(cfq_ioc_count);
    2606           0 :         }
    2607             : 
    2608           0 :         return cic;
    2609             : }
    2610             : 
    2611             : static void cfq_init_prio_data(struct cfq_queue *cfqq, struct io_context *ioc)
    2612             : {
    2613           0 :         struct task_struct *tsk = current;
    2614           0 :         int ioprio_class;
    2615           0 : 
    2616           0 :         if (!cfq_cfqq_prio_changed(cfqq))
    2617           0 :                 return;
    2618           0 : 
    2619           0 :         ioprio_class = IOPRIO_PRIO_CLASS(ioc->ioprio);
    2620           0 :         switch (ioprio_class) {
    2621           0 :         default:
    2622           0 :                 printk(KERN_ERR "cfq: bad prio %x\n", ioprio_class);
    2623           0 :         case IOPRIO_CLASS_NONE:
    2624           0 :                 /*
    2625             :                  * no prio set, inherit CPU scheduling settings
    2626             :                  */
    2627           0 :                 cfqq->ioprio = task_nice_ioprio(tsk);
    2628           0 :                 cfqq->ioprio_class = task_nice_ioclass(tsk);
    2629           0 :                 break;
    2630           0 :         case IOPRIO_CLASS_RT:
    2631           0 :                 cfqq->ioprio = task_ioprio(ioc);
    2632           0 :                 cfqq->ioprio_class = IOPRIO_CLASS_RT;
    2633           0 :                 break;
    2634           0 :         case IOPRIO_CLASS_BE:
    2635           0 :                 cfqq->ioprio = task_ioprio(ioc);
    2636           0 :                 cfqq->ioprio_class = IOPRIO_CLASS_BE;
    2637           0 :                 break;
    2638           0 :         case IOPRIO_CLASS_IDLE:
    2639           0 :                 cfqq->ioprio_class = IOPRIO_CLASS_IDLE;
    2640           0 :                 cfqq->ioprio = 7;
    2641           0 :                 cfq_clear_cfqq_idle_window(cfqq);
    2642           0 :                 break;
    2643             :         }
    2644             : 
    2645             :         /*
    2646             :          * keep track of original prio settings in case we have to temporarily
    2647             :          * elevate the priority of this queue
    2648             :          */
    2649           0 :         cfqq->org_ioprio = cfqq->ioprio;
    2650           0 :         cfqq->org_ioprio_class = cfqq->ioprio_class;
    2651           0 :         cfq_clear_cfqq_prio_changed(cfqq);
    2652           0 : }
    2653             : 
    2654             : static void changed_ioprio(struct io_context *ioc, struct cfq_io_context *cic)
    2655             : {
    2656           0 :         struct cfq_data *cfqd = cic->key;
    2657           0 :         struct cfq_queue *cfqq;
    2658           0 :         unsigned long flags;
    2659           0 : 
    2660           0 :         if (unlikely(!cfqd))
    2661           0 :                 return;
    2662             : 
    2663           0 :         spin_lock_irqsave(cfqd->queue->queue_lock, flags);
    2664             : 
    2665           0 :         cfqq = cic->cfqq[BLK_RW_ASYNC];
    2666           0 :         if (cfqq) {
    2667             :                 struct cfq_queue *new_cfqq;
    2668           0 :                 new_cfqq = cfq_get_queue(cfqd, BLK_RW_ASYNC, cic->ioc,
    2669             :                                                 GFP_ATOMIC);
    2670           0 :                 if (new_cfqq) {
    2671           0 :                         cic->cfqq[BLK_RW_ASYNC] = new_cfqq;
    2672           0 :                         cfq_put_queue(cfqq);
    2673             :                 }
    2674             :         }
    2675             : 
    2676           0 :         cfqq = cic->cfqq[BLK_RW_SYNC];
    2677           0 :         if (cfqq)
    2678           0 :                 cfq_mark_cfqq_prio_changed(cfqq);
    2679             : 
    2680           0 :         spin_unlock_irqrestore(cfqd->queue->queue_lock, flags);
    2681           0 : }
    2682             : 
    2683             : static void cfq_ioc_set_ioprio(struct io_context *ioc)
    2684             : {
    2685           0 :         call_for_each_cic(ioc, changed_ioprio);
    2686           0 :         ioc->ioprio_changed = 0;
    2687           0 : }
    2688             : 
    2689             : static void cfq_init_cfqq(struct cfq_data *cfqd, struct cfq_queue *cfqq,
    2690             :                           pid_t pid, bool is_sync)
    2691             : {
    2692           0 :         RB_CLEAR_NODE(&cfqq->rb_node);
    2693           0 :         RB_CLEAR_NODE(&cfqq->p_node);
    2694           0 :         INIT_LIST_HEAD(&cfqq->fifo);
    2695             : 
    2696           0 :         atomic_set(&cfqq->ref, 0);
    2697           0 :         cfqq->cfqd = cfqd;
    2698             : 
    2699           0 :         cfq_mark_cfqq_prio_changed(cfqq);
    2700             : 
    2701           0 :         if (is_sync) {
    2702           0 :                 if (!cfq_class_idle(cfqq))
    2703           0 :                         cfq_mark_cfqq_idle_window(cfqq);
    2704           0 :                 cfq_mark_cfqq_sync(cfqq);
    2705             :         }
    2706           0 :         cfqq->pid = pid;
    2707           0 : }
    2708             : 
    2709             : #ifdef CONFIG_CFQ_GROUP_IOSCHED
    2710             : static void changed_cgroup(struct io_context *ioc, struct cfq_io_context *cic)
    2711             : {
    2712             :         struct cfq_queue *sync_cfqq = cic_to_cfqq(cic, 1);
    2713             :         struct cfq_data *cfqd = cic->key;
    2714             :         unsigned long flags;
    2715             :         struct request_queue *q;
    2716             : 
    2717             :         if (unlikely(!cfqd))
    2718             :                 return;
    2719             : 
    2720             :         q = cfqd->queue;
    2721             : 
    2722             :         spin_lock_irqsave(q->queue_lock, flags);
    2723             : 
    2724             :         if (sync_cfqq) {
    2725             :                 /*
    2726             :                  * Drop reference to sync queue. A new sync queue will be
    2727             :                  * assigned in new group upon arrival of a fresh request.
    2728             :                  */
    2729             :                 cfq_log_cfqq(cfqd, sync_cfqq, "changed cgroup");
    2730             :                 cic_set_cfqq(cic, NULL, 1);
    2731             :                 cfq_put_queue(sync_cfqq);
    2732             :         }
    2733             : 
    2734             :         spin_unlock_irqrestore(q->queue_lock, flags);
    2735             : }
    2736             : 
    2737             : static void cfq_ioc_set_cgroup(struct io_context *ioc)
    2738             : {
    2739             :         call_for_each_cic(ioc, changed_cgroup);
    2740             :         ioc->cgroup_changed = 0;
    2741             : }
    2742             : #endif  /* CONFIG_CFQ_GROUP_IOSCHED */
    2743             : 
    2744             : static struct cfq_queue *
    2745             : cfq_find_alloc_queue(struct cfq_data *cfqd, bool is_sync,
    2746             :                      struct io_context *ioc, gfp_t gfp_mask)
    2747             : {
    2748           0 :         struct cfq_queue *cfqq, *new_cfqq = NULL;
    2749           0 :         struct cfq_io_context *cic;
    2750           0 :         struct cfq_group *cfqg;
    2751           0 : 
    2752           0 : retry:
    2753           0 :         cfqg = cfq_get_cfqg(cfqd, 1);
    2754           0 :         cic = cfq_cic_lookup(cfqd, ioc);
    2755             :         /* cic always exists here */
    2756           0 :         cfqq = cic_to_cfqq(cic, is_sync);
    2757             : 
    2758             :         /*
    2759             :          * Always try a new alloc if we fell back to the OOM cfqq
    2760             :          * originally, since it should just be a temporary situation.
    2761             :          */
    2762           0 :         if (!cfqq || cfqq == &cfqd->oom_cfqq) {
    2763           0 :                 cfqq = NULL;
    2764           0 :                 if (new_cfqq) {
    2765           0 :                         cfqq = new_cfqq;
    2766           0 :                         new_cfqq = NULL;
    2767           0 :                 } else if (gfp_mask & __GFP_WAIT) {
    2768           0 :                         spin_unlock_irq(cfqd->queue->queue_lock);
    2769           0 :                         new_cfqq = kmem_cache_alloc_node(cfq_pool,
    2770             :                                         gfp_mask | __GFP_ZERO,
    2771             :                                         cfqd->queue->node);
    2772           0 :                         spin_lock_irq(cfqd->queue->queue_lock);
    2773           0 :                         if (new_cfqq)
    2774           0 :                                 goto retry;
    2775             :                 } else {
    2776           0 :                         cfqq = kmem_cache_alloc_node(cfq_pool,
    2777             :                                         gfp_mask | __GFP_ZERO,
    2778             :                                         cfqd->queue->node);
    2779             :                 }
    2780             : 
    2781           0 :                 if (cfqq) {
    2782           0 :                         cfq_init_cfqq(cfqd, cfqq, current->pid, is_sync);
    2783           0 :                         cfq_init_prio_data(cfqq, ioc);
    2784           0 :                         cfq_link_cfqq_cfqg(cfqq, cfqg);
    2785             :                         cfq_log_cfqq(cfqd, cfqq, "alloced");
    2786             :                 } else
    2787           0 :                         cfqq = &cfqd->oom_cfqq;
    2788             :         }
    2789             : 
    2790           0 :         if (new_cfqq)
    2791           0 :                 kmem_cache_free(cfq_pool, new_cfqq);
    2792             : 
    2793           0 :         return cfqq;
    2794             : }
    2795             : 
    2796             : static struct cfq_queue **
    2797             : cfq_async_queue_prio(struct cfq_data *cfqd, int ioprio_class, int ioprio)
    2798             : {
    2799             :         switch (ioprio_class) {
    2800           0 :         case IOPRIO_CLASS_RT:
    2801           0 :                 return &cfqd->async_cfqq[0][ioprio];
    2802           0 :         case IOPRIO_CLASS_BE:
    2803           0 :                 return &cfqd->async_cfqq[1][ioprio];
    2804           0 :         case IOPRIO_CLASS_IDLE:
    2805           0 :                 return &cfqd->async_idle_cfqq;
    2806           0 :         default:
    2807           0 :                 BUG();
    2808             :         }
    2809           0 : }
    2810             : 
    2811             : static struct cfq_queue *
    2812             : cfq_get_queue(struct cfq_data *cfqd, bool is_sync, struct io_context *ioc,
    2813             :               gfp_t gfp_mask)
    2814             : {
    2815           0 :         const int ioprio = task_ioprio(ioc);
    2816           0 :         const int ioprio_class = task_ioprio_class(ioc);
    2817           0 :         struct cfq_queue **async_cfqq = NULL;
    2818           0 :         struct cfq_queue *cfqq = NULL;
    2819           0 : 
    2820           0 :         if (!is_sync) {
    2821           0 :                 async_cfqq = cfq_async_queue_prio(cfqd, ioprio_class, ioprio);
    2822           0 :                 cfqq = *async_cfqq;
    2823             :         }
    2824             : 
    2825           0 :         if (!cfqq)
    2826           0 :                 cfqq = cfq_find_alloc_queue(cfqd, is_sync, ioc, gfp_mask);
    2827             : 
    2828             :         /*
    2829             :          * pin the queue now that it's allocated, scheduler exit will prune it
    2830             :          */
    2831           0 :         if (!is_sync && !(*async_cfqq)) {
    2832           0 :                 atomic_inc(&cfqq->ref);
    2833           0 :                 *async_cfqq = cfqq;
    2834             :         }
    2835             : 
    2836           0 :         atomic_inc(&cfqq->ref);
    2837           0 :         return cfqq;
    2838             : }
    2839             : 
    2840             : /*
    2841             :  * We drop cfq io contexts lazily, so we may find a dead one.
    2842             :  */
    2843             : static void
    2844             : cfq_drop_dead_cic(struct cfq_data *cfqd, struct io_context *ioc,
    2845             :                   struct cfq_io_context *cic)
    2846           0 : {
    2847           0 :         unsigned long flags;
    2848           0 : 
    2849           0 :         WARN_ON(!list_empty(&cic->queue_list));
    2850           0 : 
    2851           0 :         spin_lock_irqsave(&ioc->lock, flags);
    2852             : 
    2853           0 :         BUG_ON(ioc->ioc_data == cic);
    2854             : 
    2855           0 :         radix_tree_delete(&ioc->radix_root, (unsigned long) cfqd);
    2856           0 :         hlist_del_rcu(&cic->cic_list);
    2857           0 :         spin_unlock_irqrestore(&ioc->lock, flags);
    2858             : 
    2859           0 :         cfq_cic_free(cic);
    2860           0 : }
    2861             : 
    2862             : static struct cfq_io_context *
    2863             : cfq_cic_lookup(struct cfq_data *cfqd, struct io_context *ioc)
    2864             : {
    2865           0 :         struct cfq_io_context *cic;
    2866           0 :         unsigned long flags;
    2867           0 :         void *k;
    2868           0 : 
    2869           0 :         if (unlikely(!ioc))
    2870           0 :                 return NULL;
    2871           0 : 
    2872           0 :         rcu_read_lock();
    2873             : 
    2874             :         /*
    2875             :          * we maintain a last-hit cache, to avoid browsing over the tree
    2876             :          */
    2877           0 :         cic = rcu_dereference(ioc->ioc_data);
    2878           0 :         if (cic && cic->key == cfqd) {
    2879           0 :                 rcu_read_unlock();
    2880           0 :                 return cic;
    2881             :         }
    2882             : 
    2883             :         do {
    2884           0 :                 cic = radix_tree_lookup(&ioc->radix_root, (unsigned long) cfqd);
    2885           0 :                 rcu_read_unlock();
    2886           0 :                 if (!cic)
    2887           0 :                         break;
    2888             :                 /* ->key must be copied to avoid race with cfq_exit_queue() */
    2889           0 :                 k = cic->key;
    2890           0 :                 if (unlikely(!k)) {
    2891           0 :                         cfq_drop_dead_cic(cfqd, ioc, cic);
    2892           0 :                         rcu_read_lock();
    2893           0 :                         continue;
    2894             :                 }
    2895             : 
    2896           0 :                 spin_lock_irqsave(&ioc->lock, flags);
    2897           0 :                 rcu_assign_pointer(ioc->ioc_data, cic);
    2898           0 :                 spin_unlock_irqrestore(&ioc->lock, flags);
    2899           0 :                 break;
    2900           0 :         } while (1);
    2901           0 : 
    2902           0 :         return cic;
    2903             : }
    2904             : 
    2905             : /*
    2906             :  * Add cic into ioc, using cfqd as the search key. This enables us to lookup
    2907             :  * the process specific cfq io context when entered from the block layer.
    2908             :  * Also adds the cic to a per-cfqd list, used when this queue is removed.
    2909             :  */
    2910             : static int cfq_cic_link(struct cfq_data *cfqd, struct io_context *ioc,
    2911             :                         struct cfq_io_context *cic, gfp_t gfp_mask)
    2912             : {
    2913           0 :         unsigned long flags;
    2914           0 :         int ret;
    2915           0 : 
    2916           0 :         ret = radix_tree_preload(gfp_mask);
    2917           0 :         if (!ret) {
    2918           0 :                 cic->ioc = ioc;
    2919           0 :                 cic->key = cfqd;
    2920             : 
    2921           0 :                 spin_lock_irqsave(&ioc->lock, flags);
    2922           0 :                 ret = radix_tree_insert(&ioc->radix_root,
    2923             :                                                 (unsigned long) cfqd, cic);
    2924           0 :                 if (!ret)
    2925           0 :                         hlist_add_head_rcu(&cic->cic_list, &ioc->cic_list);
    2926           0 :                 spin_unlock_irqrestore(&ioc->lock, flags);
    2927             : 
    2928           0 :                 radix_tree_preload_end();
    2929             : 
    2930           0 :                 if (!ret) {
    2931           0 :                         spin_lock_irqsave(cfqd->queue->queue_lock, flags);
    2932           0 :                         list_add(&cic->queue_list, &cfqd->cic_list);
    2933           0 :                         spin_unlock_irqrestore(cfqd->queue->queue_lock, flags);
    2934             :                 }
    2935             :         }
    2936             : 
    2937           0 :         if (ret)
    2938           0 :                 printk(KERN_ERR "cfq: cic link failed!\n");
    2939             : 
    2940           0 :         return ret;
    2941             : }
    2942             : 
    2943             : /*
    2944             :  * Setup general io context and cfq io context. There can be several cfq
    2945             :  * io contexts per general io context, if this process is doing io to more
    2946             :  * than one device managed by cfq.
    2947             :  */
    2948             : static struct cfq_io_context *
    2949             : cfq_get_io_context(struct cfq_data *cfqd, gfp_t gfp_mask)
    2950             : {
    2951           0 :         struct io_context *ioc = NULL;
    2952           0 :         struct cfq_io_context *cic;
    2953           0 : 
    2954           0 :         might_sleep_if(gfp_mask & __GFP_WAIT);
    2955             : 
    2956           0 :         ioc = get_io_context(gfp_mask, cfqd->queue->node);
    2957           0 :         if (!ioc)
    2958           0 :                 return NULL;
    2959             : 
    2960           0 :         cic = cfq_cic_lookup(cfqd, ioc);
    2961           0 :         if (cic)
    2962           0 :                 goto out;
    2963             : 
    2964           0 :         cic = cfq_alloc_io_context(cfqd, gfp_mask);
    2965           0 :         if (cic == NULL)
    2966           0 :                 goto err;
    2967             : 
    2968           0 :         if (cfq_cic_link(cfqd, ioc, cic, gfp_mask))
    2969           0 :                 goto err_free;
    2970             : 
    2971             : out:
    2972             :         smp_read_barrier_depends();
    2973           0 :         if (unlikely(ioc->ioprio_changed))
    2974           0 :                 cfq_ioc_set_ioprio(ioc);
    2975             : 
    2976             : #ifdef CONFIG_CFQ_GROUP_IOSCHED
    2977             :         if (unlikely(ioc->cgroup_changed))
    2978             :                 cfq_ioc_set_cgroup(ioc);
    2979             : #endif
    2980           0 :         return cic;
    2981           0 : err_free:
    2982           0 :         cfq_cic_free(cic);
    2983             : err:
    2984           0 :         put_io_context(ioc);
    2985           0 :         return NULL;
    2986             : }
    2987             : 
    2988             : static void
    2989             : cfq_update_io_thinktime(struct cfq_data *cfqd, struct cfq_io_context *cic)
    2990             : {
    2991           0 :         unsigned long elapsed = jiffies - cic->last_end_request;
    2992           0 :         unsigned long ttime = min(elapsed, 2UL * cfqd->cfq_slice_idle);
    2993           0 : 
    2994           0 :         cic->ttime_samples = (7*cic->ttime_samples + 256) / 8;
    2995           0 :         cic->ttime_total = (7*cic->ttime_total + 256*ttime) / 8;
    2996           0 :         cic->ttime_mean = (cic->ttime_total + 128) / cic->ttime_samples;
    2997           0 : }
    2998             : 
    2999             : static void
    3000             : cfq_update_io_seektime(struct cfq_data *cfqd, struct cfq_queue *cfqq,
    3001             :                        struct request *rq)
    3002             : {
    3003           0 :         sector_t sdist;
    3004           0 :         u64 total;
    3005           0 : 
    3006           0 :         if (!cfqq->last_request_pos)
    3007           0 :                 sdist = 0;
    3008           0 :         else if (cfqq->last_request_pos < blk_rq_pos(rq))
    3009           0 :                 sdist = blk_rq_pos(rq) - cfqq->last_request_pos;
    3010           0 :         else
    3011           0 :                 sdist = cfqq->last_request_pos - blk_rq_pos(rq);
    3012           0 : 
    3013           0 :         /*
    3014             :          * Don't allow the seek distance to get too large from the
    3015             :          * odd fragment, pagein, etc
    3016             :          */
    3017           0 :         if (cfqq->seek_samples <= 60) /* second&third seek */
    3018           0 :                 sdist = min(sdist, (cfqq->seek_mean * 4) + 2*1024*1024);
    3019             :         else
    3020           0 :                 sdist = min(sdist, (cfqq->seek_mean * 4) + 2*1024*64);
    3021             : 
    3022           0 :         cfqq->seek_samples = (7*cfqq->seek_samples + 256) / 8;
    3023           0 :         cfqq->seek_total = (7*cfqq->seek_total + (u64)256*sdist) / 8;
    3024           0 :         total = cfqq->seek_total + (cfqq->seek_samples/2);
    3025           0 :         do_div(total, cfqq->seek_samples);
    3026           0 :         cfqq->seek_mean = (sector_t)total;
    3027           0 : }
    3028             : 
    3029             : /*
    3030             :  * Disable idle window if the process thinks too long or seeks so much that
    3031             :  * it doesn't matter
    3032             :  */
    3033             : static void
    3034             : cfq_update_idle_window(struct cfq_data *cfqd, struct cfq_queue *cfqq,
    3035             :                        struct cfq_io_context *cic)
    3036             : {
    3037           0 :         int old_idle, enable_idle;
    3038           0 : 
    3039           0 :         /*
    3040           0 :          * Don't idle for async or idle io prio class
    3041           0 :          */
    3042           0 :         if (!cfq_cfqq_sync(cfqq) || cfq_class_idle(cfqq))
    3043           0 :                 return;
    3044             : 
    3045           0 :         enable_idle = old_idle = cfq_cfqq_idle_window(cfqq);
    3046             : 
    3047           0 :         if (cfqq->queued[0] + cfqq->queued[1] >= 4)
    3048           0 :                 cfq_mark_cfqq_deep(cfqq);
    3049             : 
    3050           0 :         if (!atomic_read(&cic->ioc->nr_tasks) || !cfqd->cfq_slice_idle ||
    3051             :             (!cfq_cfqq_deep(cfqq) && sample_valid(cfqq->seek_samples)
    3052             :              && CFQQ_SEEKY(cfqq)))
    3053           0 :                 enable_idle = 0;
    3054           0 :         else if (sample_valid(cic->ttime_samples)) {
    3055           0 :                 if (cic->ttime_mean > cfqd->cfq_slice_idle)
    3056           0 :                         enable_idle = 0;
    3057             :                 else
    3058           0 :                         enable_idle = 1;
    3059             :         }
    3060             : 
    3061           0 :         if (old_idle != enable_idle) {
    3062             :                 cfq_log_cfqq(cfqd, cfqq, "idle=%d", enable_idle);
    3063           0 :                 if (enable_idle)
    3064           0 :                         cfq_mark_cfqq_idle_window(cfqq);
    3065             :                 else
    3066           0 :                         cfq_clear_cfqq_idle_window(cfqq);
    3067           0 :         }
    3068             : }
    3069             : 
    3070             : /*
    3071             :  * Check if new_cfqq should preempt the currently active queue. Return 0 for
    3072             :  * no or if we aren't sure, a 1 will cause a preempt.
    3073             :  */
    3074             : static bool
    3075             : cfq_should_preempt(struct cfq_data *cfqd, struct cfq_queue *new_cfqq,
    3076             :                    struct request *rq)
    3077             : {
    3078           0 :         struct cfq_queue *cfqq;
    3079           0 : 
    3080           0 :         cfqq = cfqd->active_queue;
    3081           0 :         if (!cfqq)
    3082           0 :                 return false;
    3083           0 : 
    3084           0 :         if (cfq_class_idle(new_cfqq))
    3085           0 :                 return false;
    3086             : 
    3087           0 :         if (cfq_class_idle(cfqq))
    3088           0 :                 return true;
    3089             : 
    3090             :         /*
    3091             :          * Don't allow a non-RT request to preempt an ongoing RT cfqq timeslice.
    3092             :          */
    3093           0 :         if (cfq_class_rt(cfqq) && !cfq_class_rt(new_cfqq))
    3094           0 :                 return false;
    3095             : 
    3096             :         /*
    3097             :          * if the new request is sync, but the currently running queue is
    3098             :          * not, let the sync request have priority.
    3099             :          */
    3100           0 :         if (rq_is_sync(rq) && !cfq_cfqq_sync(cfqq))
    3101           0 :                 return true;
    3102             : 
    3103           0 :         if (new_cfqq->cfqg != cfqq->cfqg)
    3104           0 :                 return false;
    3105             : 
    3106           0 :         if (cfq_slice_used(cfqq))
    3107           0 :                 return true;
    3108             : 
    3109             :         /* Allow preemption only if we are idling on sync-noidle tree */
    3110           0 :         if (cfqd->serving_type == SYNC_NOIDLE_WORKLOAD &&
    3111             :             cfqq_type(new_cfqq) == SYNC_NOIDLE_WORKLOAD &&
    3112             :             new_cfqq->service_tree->count == 2 &&
    3113             :             RB_EMPTY_ROOT(&cfqq->sort_list))
    3114           0 :                 return true;
    3115             : 
    3116             :         /*
    3117             :          * So both queues are sync. Let the new request get disk time if
    3118             :          * it's a metadata request and the current queue is doing regular IO.
    3119             :          */
    3120           0 :         if (rq_is_meta(rq) && !cfqq->meta_pending)
    3121           0 :                 return true;
    3122             : 
    3123             :         /*
    3124             :          * Allow an RT request to pre-empt an ongoing non-RT cfqq timeslice.
    3125             :          */
    3126           0 :         if (cfq_class_rt(new_cfqq) && !cfq_class_rt(cfqq))
    3127           0 :                 return true;
    3128             : 
    3129           0 :         if (!cfqd->active_cic || !cfq_cfqq_wait_request(cfqq))
    3130           0 :                 return false;
    3131             : 
    3132             :         /*
    3133             :          * if this request is as-good as one we would expect from the
    3134             :          * current cfqq, let it preempt
    3135             :          */
    3136           0 :         if (cfq_rq_close(cfqd, cfqq, rq, true))
    3137           0 :                 return true;
    3138             : 
    3139           0 :         return false;
    3140             : }
    3141             : 
    3142             : /*
    3143             :  * cfqq preempts the active queue. if we allowed preempt with no slice left,
    3144             :  * let it have half of its nominal slice.
    3145             :  */
    3146             : static void cfq_preempt_queue(struct cfq_data *cfqd, struct cfq_queue *cfqq)
    3147             : {
    3148           0 :         cfq_log_cfqq(cfqd, cfqq, "preempt");
    3149           0 :         cfq_slice_expired(cfqd, 1);
    3150             : 
    3151             :         /*
    3152             :          * Put the new queue at the front of the of the current list,
    3153             :          * so we know that it will be selected next.
    3154             :          */
    3155           0 :         BUG_ON(!cfq_cfqq_on_rr(cfqq));
    3156             : 
    3157           0 :         cfq_service_tree_add(cfqd, cfqq, 1);
    3158             : 
    3159           0 :         cfqq->slice_end = 0;
    3160           0 :         cfq_mark_cfqq_slice_new(cfqq);
    3161           0 : }
    3162             : 
    3163             : /*
    3164             :  * Called when a new fs request (rq) is added (to cfqq). Check if there's
    3165             :  * something we should do about it
    3166             :  */
    3167             : static void
    3168             : cfq_rq_enqueued(struct cfq_data *cfqd, struct cfq_queue *cfqq,
    3169             :                 struct request *rq)
    3170           0 : {
    3171           0 :         struct cfq_io_context *cic = RQ_CIC(rq);
    3172           0 : 
    3173           0 :         cfqd->rq_queued++;
    3174           0 :         if (rq_is_meta(rq))
    3175           0 :                 cfqq->meta_pending++;
    3176             : 
    3177           0 :         cfq_update_io_thinktime(cfqd, cic);
    3178           0 :         cfq_update_io_seektime(cfqd, cfqq, rq);
    3179           0 :         cfq_update_idle_window(cfqd, cfqq, cic);
    3180             : 
    3181           0 :         cfqq->last_request_pos = blk_rq_pos(rq) + blk_rq_sectors(rq);
    3182             : 
    3183           0 :         if (cfqq == cfqd->active_queue) {
    3184             :                 /*
    3185             :                  * Remember that we saw a request from this process, but
    3186             :                  * don't start queuing just yet. Otherwise we risk seeing lots
    3187             :                  * of tiny requests, because we disrupt the normal plugging
    3188             :                  * and merging. If the request is already larger than a single
    3189             :                  * page, let it rip immediately. For that case we assume that
    3190             :                  * merging is already done. Ditto for a busy system that
    3191             :                  * has other work pending, don't risk delaying until the
    3192             :                  * idle timer unplug to continue working.
    3193             :                  */
    3194           0 :                 if (cfq_cfqq_wait_request(cfqq)) {
    3195           0 :                         if (blk_rq_bytes(rq) > PAGE_CACHE_SIZE ||
    3196             :                             cfqd->busy_queues > 1) {
    3197           0 :                                 del_timer(&cfqd->idle_slice_timer);
    3198           0 :                                 cfq_clear_cfqq_wait_request(cfqq);
    3199           0 :                                 __blk_run_queue(cfqd->queue);
    3200             :                         } else
    3201           0 :                                 cfq_mark_cfqq_must_dispatch(cfqq);
    3202             :                 }
    3203           0 :         } else if (cfq_should_preempt(cfqd, cfqq, rq)) {
    3204             :                 /*
    3205             :                  * not the active queue - expire current slice if it is
    3206             :                  * idle and has expired it's mean thinktime or this new queue
    3207             :                  * has some old slice time left and is of higher priority or
    3208             :                  * this new queue is RT and the current one is BE
    3209             :                  */
    3210           0 :                 cfq_preempt_queue(cfqd, cfqq);
    3211           0 :                 __blk_run_queue(cfqd->queue);
    3212             :         }
    3213           0 : }
    3214             : 
    3215             : static void cfq_insert_request(struct request_queue *q, struct request *rq)
    3216             : {
    3217           0 :         struct cfq_data *cfqd = q->elevator->elevator_data;
    3218           0 :         struct cfq_queue *cfqq = RQ_CFQQ(rq);
    3219           0 : 
    3220             :         cfq_log_cfqq(cfqd, cfqq, "insert_request");
    3221           0 :         cfq_init_prio_data(cfqq, RQ_CIC(rq)->ioc);
    3222             : 
    3223           0 :         rq_set_fifo_time(rq, jiffies + cfqd->cfq_fifo_expire[rq_is_sync(rq)]);
    3224           0 :         list_add_tail(&rq->queuelist, &cfqq->fifo);
    3225           0 :         cfq_add_rq_rb(rq);
    3226             : 
    3227           0 :         cfq_rq_enqueued(cfqd, cfqq, rq);
    3228           0 : }
    3229             : 
    3230             : /*
    3231             :  * Update hw_tag based on peak queue depth over 50 samples under
    3232             :  * sufficient load.
    3233             :  */
    3234             : static void cfq_update_hw_tag(struct cfq_data *cfqd)
    3235             : {
    3236           0 :         struct cfq_queue *cfqq = cfqd->active_queue;
    3237           0 : 
    3238           0 :         if (rq_in_driver(cfqd) > cfqd->hw_tag_est_depth)
    3239           0 :                 cfqd->hw_tag_est_depth = rq_in_driver(cfqd);
    3240           0 : 
    3241           0 :         if (cfqd->hw_tag == 1)
    3242           0 :                 return;
    3243             : 
    3244           0 :         if (cfqd->rq_queued <= CFQ_HW_QUEUE_MIN &&
    3245             :             rq_in_driver(cfqd) <= CFQ_HW_QUEUE_MIN)
    3246           0 :                 return;
    3247             : 
    3248             :         /*
    3249             :          * If active queue hasn't enough requests and can idle, cfq might not
    3250             :          * dispatch sufficient requests to hardware. Don't zero hw_tag in this
    3251             :          * case
    3252             :          */
    3253           0 :         if (cfqq && cfq_cfqq_idle_window(cfqq) &&
    3254             :             cfqq->dispatched + cfqq->queued[0] + cfqq->queued[1] <
    3255             :             CFQ_HW_QUEUE_MIN && rq_in_driver(cfqd) < CFQ_HW_QUEUE_MIN)
    3256           0 :                 return;
    3257             : 
    3258           0 :         if (cfqd->hw_tag_samples++ < 50)
    3259           0 :                 return;
    3260             : 
    3261           0 :         if (cfqd->hw_tag_est_depth >= CFQ_HW_QUEUE_MIN)
    3262           0 :                 cfqd->hw_tag = 1;
    3263             :         else
    3264           0 :                 cfqd->hw_tag = 0;
    3265           0 : }
    3266             : 
    3267             : static bool cfq_should_wait_busy(struct cfq_data *cfqd, struct cfq_queue *cfqq)
    3268             : {
    3269           0 :         struct cfq_io_context *cic = cfqd->active_cic;
    3270           0 : 
    3271             :         /* If the queue already has requests, don't wait */
    3272           0 :         if (!RB_EMPTY_ROOT(&cfqq->sort_list))
    3273           0 :                 return false;
    3274             : 
    3275             :         /* If there are other queues in the group, don't wait */
    3276           0 :         if (cfqq->cfqg->nr_cfqq > 1)
    3277           0 :                 return false;
    3278             : 
    3279           0 :         if (cfq_slice_used(cfqq))
    3280           0 :                 return true;
    3281             : 
    3282             :         /* if slice left is less than think time, wait busy */
    3283           0 :         if (cic && sample_valid(cic->ttime_samples)
    3284             :             && (cfqq->slice_end - jiffies < cic->ttime_mean))
    3285           0 :                 return true;
    3286             : 
    3287             :         /*
    3288             :          * If think times is less than a jiffy than ttime_mean=0 and above
    3289             :          * will not be true. It might happen that slice has not expired yet
    3290             :          * but will expire soon (4-5 ns) during select_queue(). To cover the
    3291             :          * case where think time is less than a jiffy, mark the queue wait
    3292             :          * busy if only 1 jiffy is left in the slice.
    3293             :          */
    3294           0 :         if (cfqq->slice_end - jiffies == 1)
    3295           0 :                 return true;
    3296             : 
    3297           0 :         return false;
    3298             : }
    3299             : 
    3300             : static void cfq_completed_request(struct request_queue *q, struct request *rq)
    3301             : {
    3302           0 :         struct cfq_queue *cfqq = RQ_CFQQ(rq);
    3303           0 :         struct cfq_data *cfqd = cfqq->cfqd;
    3304           0 :         const int sync = rq_is_sync(rq);
    3305           0 :         unsigned long now;
    3306           0 : 
    3307           0 :         now = jiffies;
    3308           0 :         cfq_log_cfqq(cfqd, cfqq, "complete rqnoidle %d", !!rq_noidle(rq));
    3309           0 : 
    3310           0 :         cfq_update_hw_tag(cfqd);
    3311           0 : 
    3312           0 :         WARN_ON(!cfqd->rq_in_driver[sync]);
    3313           0 :         WARN_ON(!cfqq->dispatched);
    3314           0 :         cfqd->rq_in_driver[sync]--;
    3315           0 :         cfqq->dispatched--;
    3316           0 : 
    3317           0 :         if (cfq_cfqq_sync(cfqq))
    3318           0 :                 cfqd->sync_flight--;
    3319             : 
    3320           0 :         if (sync) {
    3321           0 :                 RQ_CIC(rq)->last_end_request = now;
    3322           0 :                 if (!time_after(rq->start_time + cfqd->cfq_fifo_expire[1], now))
    3323           0 :                         cfqd->last_delayed_sync = now;
    3324             :         }
    3325             : 
    3326             :         /*
    3327             :          * If this is the active queue, check if it needs to be expired,
    3328             :          * or if we want to idle in case it has no pending requests.
    3329             :          */
    3330           0 :         if (cfqd->active_queue == cfqq) {
    3331           0 :                 const bool cfqq_empty = RB_EMPTY_ROOT(&cfqq->sort_list);
    3332             : 
    3333           0 :                 if (cfq_cfqq_slice_new(cfqq)) {
    3334           0 :                         cfq_set_prio_slice(cfqd, cfqq);
    3335           0 :                         cfq_clear_cfqq_slice_new(cfqq);
    3336             :                 }
    3337             : 
    3338             :                 /*
    3339             :                  * Should we wait for next request to come in before we expire
    3340             :                  * the queue.
    3341             :                  */
    3342           0 :                 if (cfq_should_wait_busy(cfqd, cfqq)) {
    3343           0 :                         cfqq->slice_end = jiffies + cfqd->cfq_slice_idle;
    3344           0 :                         cfq_mark_cfqq_wait_busy(cfqq);
    3345             :                 }
    3346             : 
    3347             :                 /*
    3348             :                  * Idling is not enabled on:
    3349             :                  * - expired queues
    3350             :                  * - idle-priority queues
    3351             :                  * - async queues
    3352             :                  * - queues with still some requests queued
    3353             :                  * - when there is a close cooperator
    3354             :                  */
    3355           0 :                 if (cfq_slice_used(cfqq) || cfq_class_idle(cfqq))
    3356           0 :                         cfq_slice_expired(cfqd, 1);
    3357           0 :                 else if (sync && cfqq_empty &&
    3358             :                          !cfq_close_cooperator(cfqd, cfqq)) {
    3359           0 :                         cfqd->noidle_tree_requires_idle |= !rq_noidle(rq);
    3360             :                         /*
    3361             :                          * Idling is enabled for SYNC_WORKLOAD.
    3362             :                          * SYNC_NOIDLE_WORKLOAD idles at the end of the tree
    3363             :                          * only if we processed at least one !rq_noidle request
    3364             :                          */
    3365           0 :                         if (cfqd->serving_type == SYNC_WORKLOAD
    3366             :                             || cfqd->noidle_tree_requires_idle
    3367             :                             || cfqq->cfqg->nr_cfqq == 1)
    3368           0 :                                 cfq_arm_slice_timer(cfqd);
    3369             :                 }
    3370             :         }
    3371             : 
    3372           0 :         if (!rq_in_driver(cfqd))
    3373           0 :                 cfq_schedule_dispatch(cfqd);
    3374           0 : }
    3375             : 
    3376             : /*
    3377             :  * we temporarily boost lower priority queues if they are holding fs exclusive
    3378             :  * resources. they are boosted to normal prio (CLASS_BE/4)
    3379             :  */
    3380             : static void cfq_prio_boost(struct cfq_queue *cfqq)
    3381             : {
    3382           0 :         if (has_fs_excl()) {
    3383           0 :                 /*
    3384             :                  * boost idle prio on transactions that would lock out other
    3385             :                  * users of the filesystem
    3386             :                  */
    3387           0 :                 if (cfq_class_idle(cfqq))
    3388           0 :                         cfqq->ioprio_class = IOPRIO_CLASS_BE;
    3389           0 :                 if (cfqq->ioprio > IOPRIO_NORM)
    3390           0 :                         cfqq->ioprio = IOPRIO_NORM;
    3391             :         } else {
    3392             :                 /*
    3393             :                  * unboost the queue (if needed)
    3394             :                  */
    3395           0 :                 cfqq->ioprio_class = cfqq->org_ioprio_class;
    3396           0 :                 cfqq->ioprio = cfqq->org_ioprio;
    3397             :         }
    3398           0 : }
    3399             : 
    3400             : static inline int __cfq_may_queue(struct cfq_queue *cfqq)
    3401             : {
    3402           0 :         if (cfq_cfqq_wait_request(cfqq) && !cfq_cfqq_must_alloc_slice(cfqq)) {
    3403           0 :                 cfq_mark_cfqq_must_alloc_slice(cfqq);
    3404           0 :                 return ELV_MQUEUE_MUST;
    3405             :         }
    3406             : 
    3407           0 :         return ELV_MQUEUE_MAY;
    3408             : }
    3409             : 
    3410             : static int cfq_may_queue(struct request_queue *q, int rw)
    3411             : {
    3412           0 :         struct cfq_data *cfqd = q->elevator->elevator_data;
    3413           0 :         struct task_struct *tsk = current;
    3414           0 :         struct cfq_io_context *cic;
    3415           0 :         struct cfq_queue *cfqq;
    3416           0 : 
    3417           0 :         /*
    3418           0 :          * don't force setup of a queue from here, as a call to may_queue
    3419             :          * does not necessarily imply that a request actually will be queued.
    3420             :          * so just lookup a possibly existing queue, or return 'may queue'
    3421             :          * if that fails
    3422             :          */
    3423           0 :         cic = cfq_cic_lookup(cfqd, tsk->io_context);
    3424           0 :         if (!cic)
    3425           0 :                 return ELV_MQUEUE_MAY;
    3426             : 
    3427           0 :         cfqq = cic_to_cfqq(cic, rw_is_sync(rw));
    3428           0 :         if (cfqq) {
    3429           0 :                 cfq_init_prio_data(cfqq, cic->ioc);
    3430           0 :                 cfq_prio_boost(cfqq);
    3431             : 
    3432           0 :                 return __cfq_may_queue(cfqq);
    3433             :         }
    3434             : 
    3435           0 :         return ELV_MQUEUE_MAY;
    3436             : }
    3437             : 
    3438             : /*
    3439             :  * queue lock held here
    3440             :  */
    3441             : static void cfq_put_request(struct request *rq)
    3442             : {
    3443           0 :         struct cfq_queue *cfqq = RQ_CFQQ(rq);
    3444           0 : 
    3445           0 :         if (cfqq) {
    3446           0 :                 const int rw = rq_data_dir(rq);
    3447             : 
    3448           0 :                 BUG_ON(!cfqq->allocated[rw]);
    3449           0 :                 cfqq->allocated[rw]--;
    3450             : 
    3451           0 :                 put_io_context(RQ_CIC(rq)->ioc);
    3452             : 
    3453           0 :                 rq->elevator_private = NULL;
    3454           0 :                 rq->elevator_private2 = NULL;
    3455             : 
    3456           0 :                 cfq_put_queue(cfqq);
    3457             :         }
    3458           0 : }
    3459             : 
    3460             : static struct cfq_queue *
    3461             : cfq_merge_cfqqs(struct cfq_data *cfqd, struct cfq_io_context *cic,
    3462             :                 struct cfq_queue *cfqq)
    3463             : {
    3464           0 :         cfq_log_cfqq(cfqd, cfqq, "merging with queue %p", cfqq->new_cfqq);
    3465           0 :         cic_set_cfqq(cic, cfqq->new_cfqq, 1);
    3466           0 :         cfq_mark_cfqq_coop(cfqq->new_cfqq);
    3467           0 :         cfq_put_queue(cfqq);
    3468           0 :         return cic_to_cfqq(cic, 1);
    3469             : }
    3470             : 
    3471             : /*
    3472             :  * Returns NULL if a new cfqq should be allocated, or the old cfqq if this
    3473             :  * was the last process referring to said cfqq.
    3474             :  */
    3475             : static struct cfq_queue *
    3476             : split_cfqq(struct cfq_io_context *cic, struct cfq_queue *cfqq)
    3477             : {
    3478           0 :         if (cfqq_process_refs(cfqq) == 1) {
    3479           0 :                 cfqq->pid = current->pid;
    3480           0 :                 cfq_clear_cfqq_coop(cfqq);
    3481           0 :                 cfq_clear_cfqq_split_coop(cfqq);
    3482           0 :                 return cfqq;
    3483             :         }
    3484             : 
    3485           0 :         cic_set_cfqq(cic, NULL, 1);
    3486             : 
    3487           0 :         cfq_put_cooperator(cfqq);
    3488             : 
    3489           0 :         cfq_put_queue(cfqq);
    3490           0 :         return NULL;
    3491             : }
    3492             : /*
    3493             :  * Allocate cfq data structures associated with this request.
    3494             :  */
    3495             : static int
    3496             : cfq_set_request(struct request_queue *q, struct request *rq, gfp_t gfp_mask)
    3497             : {
    3498           0 :         struct cfq_data *cfqd = q->elevator->elevator_data;
    3499           0 :         struct cfq_io_context *cic;
    3500           0 :         const int rw = rq_data_dir(rq);
    3501           0 :         const bool is_sync = rq_is_sync(rq);
    3502           0 :         struct cfq_queue *cfqq;
    3503           0 :         unsigned long flags;
    3504           0 : 
    3505           0 :         might_sleep_if(gfp_mask & __GFP_WAIT);
    3506           0 : 
    3507           0 :         cic = cfq_get_io_context(cfqd, gfp_mask);
    3508             : 
    3509           0 :         spin_lock_irqsave(q->queue_lock, flags);
    3510             : 
    3511           0 :         if (!cic)
    3512           0 :                 goto queue_fail;
    3513             : 
    3514             : new_queue:
    3515           0 :         cfqq = cic_to_cfqq(cic, is_sync);
    3516           0 :         if (!cfqq || cfqq == &cfqd->oom_cfqq) {
    3517           0 :                 cfqq = cfq_get_queue(cfqd, is_sync, cic->ioc, gfp_mask);
    3518           0 :                 cic_set_cfqq(cic, cfqq, is_sync);
    3519             :         } else {
    3520             :                 /*
    3521             :                  * If the queue was seeky for too long, break it apart.
    3522             :                  */
    3523           0 :                 if (cfq_cfqq_coop(cfqq) && cfq_cfqq_split_coop(cfqq)) {
    3524             :                         cfq_log_cfqq(cfqd, cfqq, "breaking apart cfqq");
    3525           0 :                         cfqq = split_cfqq(cic, cfqq);
    3526           0 :                         if (!cfqq)
    3527           0 :                                 goto new_queue;
    3528             :                 }
    3529             : 
    3530             :                 /*
    3531             :                  * Check to see if this queue is scheduled to merge with
    3532             :                  * another, closely cooperating queue.  The merging of
    3533             :                  * queues happens here as it must be done in process context.
    3534             :                  * The reference on new_cfqq was taken in merge_cfqqs.
    3535             :                  */
    3536           0 :                 if (cfqq->new_cfqq)
    3537           0 :                         cfqq = cfq_merge_cfqqs(cfqd, cic, cfqq);
    3538             :         }
    3539             : 
    3540           0 :         cfqq->allocated[rw]++;
    3541           0 :         atomic_inc(&cfqq->ref);
    3542             : 
    3543           0 :         spin_unlock_irqrestore(q->queue_lock, flags);
    3544             : 
    3545           0 :         rq->elevator_private = cic;
    3546           0 :         rq->elevator_private2 = cfqq;
    3547           0 :         return 0;
    3548           0 : 
    3549             : queue_fail:
    3550           0 :         if (cic)
    3551           0 :                 put_io_context(cic->ioc);
    3552             : 
    3553           0 :         cfq_schedule_dispatch(cfqd);
    3554           0 :         spin_unlock_irqrestore(q->queue_lock, flags);
    3555             :         cfq_log(cfqd, "set_request fail");
    3556           0 :         return 1;
    3557             : }
    3558             : 
    3559             : static void cfq_kick_queue(struct work_struct *work)
    3560             : {
    3561           0 :         struct cfq_data *cfqd =
    3562           0 :                 container_of(work, struct cfq_data, unplug_work);
    3563           0 :         struct request_queue *q = cfqd->queue;
    3564             : 
    3565           0 :         spin_lock_irq(q->queue_lock);
    3566           0 :         __blk_run_queue(cfqd->queue);
    3567           0 :         spin_unlock_irq(q->queue_lock);
    3568           0 : }
    3569             : 
    3570             : /*
    3571             :  * Timer running if the active_queue is currently idling inside its time slice
    3572             :  */
    3573             : static void cfq_idle_slice_timer(unsigned long data)
    3574             : {
    3575           0 :         struct cfq_data *cfqd = (struct cfq_data *) data;
    3576           0 :         struct cfq_queue *cfqq;
    3577           0 :         unsigned long flags;
    3578           0 :         int timed_out = 1;
    3579           0 : 
    3580           0 :         cfq_log(cfqd, "idle timer fired");
    3581           0 : 
    3582           0 :         spin_lock_irqsave(cfqd->queue->queue_lock, flags);
    3583             : 
    3584           0 :         cfqq = cfqd->active_queue;
    3585           0 :         if (cfqq) {
    3586           0 :                 timed_out = 0;
    3587             : 
    3588             :                 /*
    3589             :                  * We saw a request before the queue expired, let it through
    3590             :                  */
    3591           0 :                 if (cfq_cfqq_must_dispatch(cfqq))
    3592           0 :                         goto out_kick;
    3593             : 
    3594             :                 /*
    3595             :                  * expired
    3596             :                  */
    3597           0 :                 if (cfq_slice_used(cfqq))
    3598           0 :                         goto expire;
    3599             : 
    3600             :                 /*
    3601             :                  * only expire and reinvoke request handler, if there are
    3602             :                  * other queues with pending requests
    3603             :                  */
    3604           0 :                 if (!cfqd->busy_queues)
    3605           0 :                         goto out_cont;
    3606             : 
    3607             :                 /*
    3608             :                  * not expired and it has a request pending, let it dispatch
    3609             :                  */
    3610           0 :                 if (!RB_EMPTY_ROOT(&cfqq->sort_list))
    3611           0 :                         goto out_kick;
    3612             : 
    3613             :                 /*
    3614             :                  * Queue depth flag is reset only when the idle didn't succeed
    3615             :                  */
    3616           0 :                 cfq_clear_cfqq_deep(cfqq);
    3617             :         }
    3618             : expire:
    3619           0 :         cfq_slice_expired(cfqd, timed_out);
    3620             : out_kick:
    3621           0 :         cfq_schedule_dispatch(cfqd);
    3622             : out_cont:
    3623           0 :         spin_unlock_irqrestore(cfqd->queue->queue_lock, flags);
    3624           0 : }
    3625             : 
    3626             : static void cfq_shutdown_timer_wq(struct cfq_data *cfqd)
    3627             : {
    3628           0 :         del_timer_sync(&cfqd->idle_slice_timer);
    3629           0 :         cancel_work_sync(&cfqd->unplug_work);
    3630           0 : }
    3631             : 
    3632             : static void cfq_put_async_queues(struct cfq_data *cfqd)
    3633             : {
    3634           0 :         int i;
    3635             : 
    3636           0 :         for (i = 0; i < IOPRIO_BE_NR; i++) {
    3637           0 :                 if (cfqd->async_cfqq[0][i])
    3638           0 :                         cfq_put_queue(cfqd->async_cfqq[0][i]);
    3639           0 :                 if (cfqd->async_cfqq[1][i])
    3640           0 :                         cfq_put_queue(cfqd->async_cfqq[1][i]);
    3641             :         }
    3642             : 
    3643           0 :         if (cfqd->async_idle_cfqq)
    3644           0 :                 cfq_put_queue(cfqd->async_idle_cfqq);
    3645           0 : }
    3646             : 
    3647             : static void cfq_cfqd_free(struct rcu_head *head)
    3648             : {
    3649           0 :         kfree(container_of(head, struct cfq_data, rcu));
    3650             : }
    3651           0 : 
    3652             : static void cfq_exit_queue(struct elevator_queue *e)
    3653             : {
    3654           0 :         struct cfq_data *cfqd = e->elevator_data;
    3655           0 :         struct request_queue *q = cfqd->queue;
    3656           0 : 
    3657           0 :         cfq_shutdown_timer_wq(cfqd);
    3658           0 : 
    3659           0 :         spin_lock_irq(q->queue_lock);
    3660             : 
    3661           0 :         if (cfqd->active_queue)
    3662           0 :                 __cfq_slice_expired(cfqd, cfqd->active_queue, 0);
    3663             : 
    3664           0 :         while (!list_empty(&cfqd->cic_list)) {
    3665           0 :                 struct cfq_io_context *cic = list_entry(cfqd->cic_list.next,
    3666           0 :                                                         struct cfq_io_context,
    3667             :                                                         queue_list);
    3668             : 
    3669           0 :                 __cfq_exit_single_io_context(cfqd, cic);
    3670             :         }
    3671           0 : 
    3672           0 :         cfq_put_async_queues(cfqd);
    3673           0 :         cfq_release_cfq_groups(cfqd);
    3674           0 :         blkiocg_del_blkio_group(&cfqd->root_group.blkg);
    3675             : 
    3676           0 :         spin_unlock_irq(q->queue_lock);
    3677             : 
    3678           0 :         cfq_shutdown_timer_wq(cfqd);
    3679             : 
    3680             :         /* Wait for cfqg->blkg->key accessors to exit their grace periods. */
    3681           0 :         call_rcu(&cfqd->rcu, cfq_cfqd_free);
    3682           0 : }
    3683             : 
    3684             : static void *cfq_init_queue(struct request_queue *q)
    3685             : {
    3686           0 :         struct cfq_data *cfqd;
    3687           0 :         int i, j;
    3688           0 :         struct cfq_group *cfqg;
    3689           0 :         struct cfq_rb_root *st;
    3690           0 : 
    3691           0 :         cfqd = kmalloc_node(sizeof(*cfqd), GFP_KERNEL | __GFP_ZERO, q->node);
    3692           0 :         if (!cfqd)
    3693           0 :                 return NULL;
    3694           0 : 
    3695           0 :         /* Init root service tree */
    3696           0 :         cfqd->grp_service_tree = CFQ_RB_ROOT;
    3697             : 
    3698             :         /* Init root group */
    3699           0 :         cfqg = &cfqd->root_group;
    3700           0 :         for_each_cfqg_st(cfqg, i, j, st)
    3701           0 :                 *st = CFQ_RB_ROOT;
    3702           0 :         RB_CLEAR_NODE(&cfqg->rb_node);
    3703             : 
    3704             :         /* Give preference to root group over other groups */
    3705           0 :         cfqg->weight = 2*BLKIO_WEIGHT_DEFAULT;
    3706             : 
    3707             : #ifdef CONFIG_CFQ_GROUP_IOSCHED
    3708             :         /*
    3709             :          * Take a reference to root group which we never drop. This is just
    3710             :          * to make sure that cfq_put_cfqg() does not try to kfree root group
    3711             :          */
    3712             :         atomic_set(&cfqg->ref, 1);
    3713             :         blkiocg_add_blkio_group(&blkio_root_cgroup, &cfqg->blkg, (void *)cfqd,
    3714             :                                         0);
    3715             : #endif
    3716             :         /*
    3717             :          * Not strictly needed (since RB_ROOT just clears the node and we
    3718             :          * zeroed cfqd on alloc), but better be safe in case someone decides
    3719             :          * to add magic to the rb code
    3720             :          */
    3721           0 :         for (i = 0; i < CFQ_PRIO_LISTS; i++)
    3722           0 :                 cfqd->prio_trees[i] = RB_ROOT;
    3723           0 : 
    3724             :         /*
    3725             :          * Our fallback cfqq if cfq_find_alloc_queue() runs into OOM issues.
    3726             :          * Grab a permanent reference to it, so that the normal code flow
    3727             :          * will not attempt to free it.
    3728             :          */
    3729           0 :         cfq_init_cfqq(cfqd, &cfqd->oom_cfqq, 1, 0);
    3730           0 :         atomic_inc(&cfqd->oom_cfqq.ref);
    3731           0 :         cfq_link_cfqq_cfqg(&cfqd->oom_cfqq, &cfqd->root_group);
    3732             : 
    3733           0 :         INIT_LIST_HEAD(&cfqd->cic_list);
    3734             : 
    3735           0 :         cfqd->queue = q;
    3736             : 
    3737           0 :         init_timer(&cfqd->idle_slice_timer);
    3738           0 :         cfqd->idle_slice_timer.function = cfq_idle_slice_timer;
    3739           0 :         cfqd->idle_slice_timer.data = (unsigned long) cfqd;
    3740             : 
    3741           0 :         INIT_WORK(&cfqd->unplug_work, cfq_kick_queue);
    3742             : 
    3743           0 :         cfqd->cfq_quantum = cfq_quantum;
    3744           0 :         cfqd->cfq_fifo_expire[0] = cfq_fifo_expire[0];
    3745           0 :         cfqd->cfq_fifo_expire[1] = cfq_fifo_expire[1];
    3746           0 :         cfqd->cfq_back_max = cfq_back_max;
    3747           0 :         cfqd->cfq_back_penalty = cfq_back_penalty;
    3748           0 :         cfqd->cfq_slice[0] = cfq_slice_async;
    3749           0 :         cfqd->cfq_slice[1] = cfq_slice_sync;
    3750           0 :         cfqd->cfq_slice_async_rq = cfq_slice_async_rq;
    3751           0 :         cfqd->cfq_slice_idle = cfq_slice_idle;
    3752           0 :         cfqd->cfq_latency = 1;
    3753           0 :         cfqd->cfq_group_isolation = 0;
    3754           0 :         cfqd->hw_tag = -1;
    3755             :         /*
    3756             :          * we optimistically start assuming sync ops weren't delayed in last
    3757             :          * second, in order to have larger depth for async operations.
    3758             :          */
    3759           0 :         cfqd->last_delayed_sync = jiffies - HZ;
    3760           0 :         INIT_RCU_HEAD(&cfqd->rcu);
    3761           0 :         return cfqd;
    3762             : }
    3763             : 
    3764             : static void cfq_slab_kill(void)
    3765             : {
    3766             :         /*
    3767             :          * Caller already ensured that pending RCU callbacks are completed,
    3768             :          * so we should have no busy allocations at this point.
    3769             :          */
    3770           6 :         if (cfq_pool)
    3771           3 :                 kmem_cache_destroy(cfq_pool);
    3772           6 :         if (cfq_ioc_pool)
    3773           3 :                 kmem_cache_destroy(cfq_ioc_pool);
    3774           3 : }
    3775             : 
    3776             : static int __init cfq_slab_setup(void)
    3777             : {
    3778           1 :         cfq_pool = KMEM_CACHE(cfq_queue, 0);
    3779           2 :         if (!cfq_pool)
    3780           1 :                 goto fail;
    3781             : 
    3782           1 :         cfq_ioc_pool = KMEM_CACHE(cfq_io_context, 0);
    3783           2 :         if (!cfq_ioc_pool)
    3784           1 :                 goto fail;
    3785             : 
    3786           1 :         return 0;
    3787           1 : fail:
    3788           2 :         cfq_slab_kill();
    3789           1 :         return -ENOMEM;
    3790             : }
    3791             : 
    3792             : /*
    3793             :  * sysfs parts below -->
    3794             :  */
    3795             : static ssize_t
    3796             : cfq_var_show(unsigned int var, char *page)
    3797             : {
    3798          33 :         return sprintf(page, "%d\n", var);
    3799             : }
    3800             : 
    3801             : static ssize_t
    3802             : cfq_var_store(unsigned int *var, const char *page, size_t count)
    3803             : {
    3804          22 :         char *p = (char *) page;
    3805          11 : 
    3806          22 :         *var = simple_strtoul(p, &p, 10);
    3807          11 :         return count;
    3808             : }
    3809             : 
    3810             : #define SHOW_FUNCTION(__FUNC, __VAR, __CONV)                            \
    3811             : static ssize_t __FUNC(struct elevator_queue *e, char *page)             \
    3812             : {                                                                       \
    3813             :         struct cfq_data *cfqd = e->elevator_data;                    \
    3814             :         unsigned int __data = __VAR;                                    \
    3815             :         if (__CONV)                                                     \
    3816             :                 __data = jiffies_to_msecs(__data);                      \
    3817             :         return cfq_var_show(__data, (page));                            \
    3818             : }
    3819           6 : SHOW_FUNCTION(cfq_quantum_show, cfqd->cfq_quantum, 0);
    3820           7 : SHOW_FUNCTION(cfq_fifo_expire_sync_show, cfqd->cfq_fifo_expire[1], 1);
    3821           8 : SHOW_FUNCTION(cfq_fifo_expire_async_show, cfqd->cfq_fifo_expire[0], 1);
    3822           8 : SHOW_FUNCTION(cfq_back_seek_max_show, cfqd->cfq_back_max, 0);
    3823           9 : SHOW_FUNCTION(cfq_back_seek_penalty_show, cfqd->cfq_back_penalty, 0);
    3824          10 : SHOW_FUNCTION(cfq_slice_idle_show, cfqd->cfq_slice_idle, 1);
    3825          10 : SHOW_FUNCTION(cfq_slice_sync_show, cfqd->cfq_slice[1], 1);
    3826          10 : SHOW_FUNCTION(cfq_slice_async_show, cfqd->cfq_slice[0], 1);
    3827           9 : SHOW_FUNCTION(cfq_slice_async_rq_show, cfqd->cfq_slice_async_rq, 0);
    3828           9 : SHOW_FUNCTION(cfq_low_latency_show, cfqd->cfq_latency, 0);
    3829           9 : SHOW_FUNCTION(cfq_group_isolation_show, cfqd->cfq_group_isolation, 0);
    3830           3 : #undef SHOW_FUNCTION
    3831           3 : 
    3832           2 : #define STORE_FUNCTION(__FUNC, __PTR, MIN, MAX, __CONV)                 \
    3833           1 : static ssize_t __FUNC(struct elevator_queue *e, const char *page, size_t count) \
    3834             : {                                                                       \
    3835             :         struct cfq_data *cfqd = e->elevator_data;                    \
    3836             :         unsigned int __data;                                            \
    3837             :         int ret = cfq_var_store(&__data, (page), count);            \
    3838             :         if (__data < (MIN))                                          \
    3839             :                 __data = (MIN);                                         \
    3840             :         else if (__data > (MAX))                                     \
    3841             :                 __data = (MAX);                                         \
    3842             :         if (__CONV)                                                     \
    3843             :                 *(__PTR) = msecs_to_jiffies(__data);                    \
    3844             :         else                                                            \
    3845             :                 *(__PTR) = __data;                                      \
    3846             :         return ret;                                                     \
    3847             : }
    3848          10 : STORE_FUNCTION(cfq_quantum_store, &cfqd->cfq_quantum, 1, UINT_MAX, 0);
    3849          11 : STORE_FUNCTION(cfq_fifo_expire_sync_store, &cfqd->cfq_fifo_expire[1], 1,
    3850           1 :                 UINT_MAX, 1);
    3851          12 : STORE_FUNCTION(cfq_fifo_expire_async_store, &cfqd->cfq_fifo_expire[0], 1,
    3852           2 :                 UINT_MAX, 1);
    3853           9 : STORE_FUNCTION(cfq_back_seek_max_store, &cfqd->cfq_back_max, 0, UINT_MAX, 0);
    3854          12 : STORE_FUNCTION(cfq_back_seek_penalty_store, &cfqd->cfq_back_penalty, 1,
    3855           2 :                 UINT_MAX, 0);
    3856          11 : STORE_FUNCTION(cfq_slice_idle_store, &cfqd->cfq_slice_idle, 0, UINT_MAX, 1);
    3857          14 : STORE_FUNCTION(cfq_slice_sync_store, &cfqd->cfq_slice[1], 1, UINT_MAX, 1);
    3858          14 : STORE_FUNCTION(cfq_slice_async_store, &cfqd->cfq_slice[0], 1, UINT_MAX, 1);
    3859          13 : STORE_FUNCTION(cfq_slice_async_rq_store, &cfqd->cfq_slice_async_rq, 1,
    3860           3 :                 UINT_MAX, 0);
    3861          13 : STORE_FUNCTION(cfq_low_latency_store, &cfqd->cfq_latency, 0, 1, 0);
    3862          14 : STORE_FUNCTION(cfq_group_isolation_store, &cfqd->cfq_group_isolation, 0, 1, 0);
    3863           4 : #undef STORE_FUNCTION
    3864           4 : 
    3865           4 : #define CFQ_ATTR(name) \
    3866           2 :         __ATTR(name, S_IRUGO|S_IWUSR, cfq_##name##_show, cfq_##name##_store)
    3867           2 : 
    3868           2 : static struct elv_fs_entry cfq_attrs[] = {
    3869             :         CFQ_ATTR(quantum),
    3870             :         CFQ_ATTR(fifo_expire_sync),
    3871             :         CFQ_ATTR(fifo_expire_async),
    3872             :         CFQ_ATTR(back_seek_max),
    3873             :         CFQ_ATTR(back_seek_penalty),
    3874             :         CFQ_ATTR(slice_sync),
    3875             :         CFQ_ATTR(slice_async),
    3876             :         CFQ_ATTR(slice_async_rq),
    3877             :         CFQ_ATTR(slice_idle),
    3878             :         CFQ_ATTR(low_latency),
    3879             :         CFQ_ATTR(group_isolation),
    3880             :         __ATTR_NULL
    3881             : };
    3882             : 
    3883           1 : static struct elevator_type iosched_cfq = {
    3884             :         .ops = {
    3885             :                 .elevator_merge_fn =            cfq_merge,
    3886             :                 .elevator_merged_fn =           cfq_merged_request,
    3887             :                 .elevator_merge_req_fn =        cfq_merged_requests,
    3888             :                 .elevator_allow_merge_fn =      cfq_allow_merge,
    3889             :                 .elevator_dispatch_fn =         cfq_dispatch_requests,
    3890             :                 .elevator_add_req_fn =          cfq_insert_request,
    3891             :                 .elevator_activate_req_fn =     cfq_activate_request,
    3892             :                 .elevator_deactivate_req_fn =   cfq_deactivate_request,
    3893             :                 .elevator_queue_empty_fn =      cfq_queue_empty,
    3894             :                 .elevator_completed_req_fn =    cfq_completed_request,
    3895             :                 .elevator_former_req_fn =       elv_rb_former_request,
    3896             :                 .elevator_latter_req_fn =       elv_rb_latter_request,
    3897             :                 .elevator_set_req_fn =          cfq_set_request,
    3898             :                 .elevator_put_req_fn =          cfq_put_request,
    3899             :                 .elevator_may_queue_fn =        cfq_may_queue,
    3900             :                 .elevator_init_fn =             cfq_init_queue,
    3901             :                 .elevator_exit_fn =             cfq_exit_queue,
    3902             :                 .trim =                         cfq_free_io_context,
    3903             :         },
    3904             :         .elevator_attrs =       cfq_attrs,
    3905             :         .elevator_name =        "cfq",
    3906             :         .elevator_owner =       THIS_MODULE,
    3907             : };
    3908             : 
    3909             : #ifdef CONFIG_CFQ_GROUP_IOSCHED
    3910             : static struct blkio_policy_type blkio_policy_cfq = {
    3911             :         .ops = {
    3912             :                 .blkio_unlink_group_fn =        cfq_unlink_blkio_group,
    3913             :                 .blkio_update_group_weight_fn = cfq_update_blkio_group_weight,
    3914             :         },
    3915             : };
    3916             : #else
    3917           1 : static struct blkio_policy_type blkio_policy_cfq;
    3918             : #endif
    3919             : 
    3920             : static int __init cfq_init(void)
    3921             : {
    3922           1 :         /*
    3923             :          * could be 0 on HZ < 1000 setups
    3924             :          */
    3925           2 :         if (!cfq_slice_async)
    3926           1 :                 cfq_slice_async = 1;
    3927           2 :         if (!cfq_slice_idle)
    3928           1 :                 cfq_slice_idle = 1;
    3929             : 
    3930           5 :         if (cfq_slab_setup())
    3931           1 :                 return -ENOMEM;
    3932             : 
    3933           1 :         elv_register(&iosched_cfq);
    3934           2 :         blkio_policy_register(&blkio_policy_cfq);
    3935             : 
    3936           1 :         return 0;
    3937             : }
    3938             : 
    3939             : static void __exit cfq_exit(void)
    3940             : {
    3941          10 :         DECLARE_COMPLETION_ONSTACK(all_gone);
    3942           6 :         blkio_policy_unregister(&blkio_policy_cfq);
    3943           4 :         elv_unregister(&iosched_cfq);
    3944           4 :         ioc_gone = &all_gone;
    3945           2 :         /* ioc_gone's update must be visible before reading ioc_count */
    3946           2 :         smp_wmb();
    3947             : 
    3948             :         /*
    3949             :          * this also protects us from entering cfq_slab_kill() with
    3950             :          * pending RCU callbacks
    3951             :          */
    3952          28 :         if (elv_ioc_count_read(cfq_ioc_count))
    3953           6 :                 wait_for_completion(&all_gone);
    3954           6 :         cfq_slab_kill();
    3955           2 : }
    3956             : 
    3957             : module_init(cfq_init);
    3958             : module_exit(cfq_exit);
    3959           1 : 
    3960             : MODULE_AUTHOR("Jens Axboe");
    3961             : MODULE_LICENSE("GPL");
    3962             : MODULE_DESCRIPTION("Completely Fair Queueing IO scheduler");

Generated by: LCOV version 1.10