15. (補足) dm_kcopyd_copyの
dispatch_job (リストいじり) + do_worker (リストの処理)
static void do_work(struct work_struct *work)
{
struct dm_kcopyd_client *kc = container_of(work,
struct dm_kcopyd_client, kcopyd_work);
struct blk_plug plug;
/*
* The order that these are called is *very* important.
* complete jobs can free some pages for pages jobs.
* Pages jobs when successful will jump onto the io jobs
* list. io jobs call wake when they complete and it all
* starts again.
*/
blk_start_plug(&plug);
process_jobs(&kc->complete_jobs, kc, run_complete_job);
process_jobs(&kc->pages_jobs, kc, run_pages_job);
process_jobs(&kc->io_jobs, kc, run_io_job);
blk_finish_plug(&plug);
}
static void dispatch_job(struct kcopyd_job *job)
{
struct dm_kcopyd_client *kc = job->kc;
atomic_inc(&kc->nr_jobs);
if (unlikely(!job->source.count))
push(&kc->complete_jobs, job);
else if (job->pages == &zero_page_list)
push(&kc->io_jobs, job);
else
push(&kc->pages_jobs, job);
wake(kc);
}
リスト(complete_jobs, io_jobs, pages_jobs)をいじってworker
を叩き起こして抜ける.
それぞれのリストに入ってるjobを処理する.
(順番がとても重要だとコメントに書いてある)
Tuesday, May 27, 2014
16. process_bio -> process_shared_bio
-> break_sharing
static void process_deferred_bios(struct pool *pool)
{
bio_list_init(&bios);
spin_lock_irqsave(&pool->lock, flags);
bio_list_merge(&bios, &pool->deferred_bios);
bio_list_init(&pool->deferred_bios);
spin_unlock_irqrestore(&pool->lock, flags);
while ((bio = bio_list_pop(&bios))) {
if (bio->bi_rw & REQ_DISCARD)
pool->process_discard(tc, bio);
else
pool->process_bio(tc, bio);
}
static void break_sharing(struct thin_c *tc, struct bio *bio, dm_block_t block,
struct dm_cell_key *key,
struct dm_thin_lookup_result *lookup_result,
struct dm_bio_prison_cell *cell)
{
r = alloc_data_block(tc, &data_block);
switch (r) {
case 0:
schedule_internal_copy(tc, block, lookup_result->block,
data_block, cell, bio);
static void process_bio(struct thin_c *tc, struct bio *bio)
{
r = dm_thin_find_block(tc->td, block, 1, &lookup_result);
switch (r) {
case 0:
if (lookup_result.shared) {
process_shared_bio(tc, bio, block, &lookup_result);
static void process_shared_bio(struct thin_c *tc, struct bio *bio,
dm_block_t block,
struct dm_thin_lookup_result *lookup_result)
{
struct dm_cell_key key;
/*
* If cell is already occupied, then sharing is already in the process
* of being broken so we have nothing further to do here.
*/
build_data_key(tc->td, lookup_result->block, &key);
if (bio_detain(pool, &key, bio, &cell))
return;
if (bio_data_dir(bio) == WRITE && bio->bi_iter.bi_size)
break_sharing(tc, bio, block, &key, lookup_result, cell);
deferred_biosからbiosに全部引っこ抜いて,
while文で回す(実装パターン)
bio_detain: このbioは, remap後に処理される必要
がある. それまでack返さない. ここで拘留
新しい物理ブロック(data_block)を拝受して,
その物理ブロックにcopyを行う.
一見重複っぽいが,
backgroundでも
ここでもう一度lookupが
行われる.
(意味はあとで分かる)
Tuesday, May 27, 2014
17. schedule_copy
-> (callback) prepared_remappingsいじり
schedule_copy
/*
* IO to pool_dev remaps to the pool target's data_dev.
*
* If the whole block of data is being overwritten, we can issue the
* bio immediately. Otherwise we use kcopyd to clone the data first.
*/
if (io_overwrites_block(pool, bio)) {
...
} else {
from.bdev = origin->bdev;
from.sector = data_origin * pool->sectors_per_block;
from.count = pool->sectors_per_block;
to.bdev = tc->pool_dev->bdev;
to.sector = data_dest * pool->sectors_per_block;
to.count = pool->sectors_per_block;
r = dm_kcopyd_copy(pool->copier, &from, 1, &to,
0, copy_complete, m);
static void copy_complete(int read_err, unsigned long write_err, void *context)
{
spin_lock_irqsave(&pool->lock, flags);
m->prepared = true;
__maybe_add_mapping(m);
spin_unlock_irqrestore(&pool->lock, flags);
}
static void __maybe_add_mapping(struct dm_thin_new_mapping *m)
{
if (m->quiesced && m->prepared) {
list_add_tail(&m->list, &pool->prepared_mappings);
wake_worker(pool);
}
}
ライトがブロック大(例:64KB)ならば, 新しいブ
ロックにそのままライトすれば良い. 省略
パーシャルならば, コピーしてから勾留中のbio
を流す. dm_kcopyd_copyでコピーする.
callback(copy_complete)でprepared_mappingを追
加する(__maybe_add_mapping)
Tuesday, May 27, 2014
18. もう一度do_worker ->
process_prepared_mappings
static void process_prepared_mapping(struct dm_thin_new_mapping *m)
{
bio = m->bio;
if (bio) {
bio->bi_end_io = m->saved_bi_end_io;
atomic_inc(&bio->bi_remaining);
}
/*
* Commit the prepared block into the mapping btree.
* Any I/O for this block arriving after this point will get
* remapped to it directly.
*/
r = dm_thin_insert_block(tc->td, m->virt_block, m->data_block);
/*
* Release any bios held while the block was being provisioned.
* If we are processing a write bio that completely covers the block,
* we already processed it so can ignore it now when processing
* the bios in the cell.
*/
if (bio) {
cell_defer_no_holder(tc, m->cell);
bio_endio(bio, 0);
} else
cell_defer(tc, m->cell);
}
/*
* This sends the bios in the cell back to the deferred_bios list.
*/
static void cell_defer(struct thin_c *tc, struct dm_bio_prison_cell *cell)
process_prepared_mappingの中で勾留中のbioを解
放(もう一度defereed_biosにつなぐ).
process_deferred_bios以下process_bioでもう一度
lookupが行われて, 見つかった新しい物理ページは
sharedでないのでライトしてack(拝承)して終わり
// 再掲
static void do_worker(struct work_struct *ws)
{
process_prepared(pool, &pool->prepared_mappings, &pool->process_prepared_mapping);
process_prepared(pool, &pool->prepared_discards, &pool->process_prepared_discard);
process_deferred_bios(pool);
}
Tuesday, May 27, 2014