Sign Up
Log In
Log In
or
Sign Up
Places
All Projects
Status Monitor
Collapse sidebar
openSUSE:Leap:15.5:Update
device-mapper.13753
bug-1150021_02-suse-special-bcache-bug-fix.patch
Overview
Repositories
Revisions
Requests
Users
Attributes
Meta
File bug-1150021_02-suse-special-bcache-bug-fix.patch of Package device-mapper.13753
The bcache issues (see: 2938b4dcca0 & 6b0d969b2a) had been fixed in the end of Oct (2019). But the stable-2.02 still unfix. I made a patch from master branch to stable-2.02, and the patch had been passed in unit test. For the patch code, there is one ugly function bcache_abort_fd(), the stable-2.02 doesn't have easy way to search block. So I wrote 4 iterators loop. It will cost O(n) on every execution. How the bug happen: When bcache writes data error, the errored fd and its data is saved in cache->errored, then this fd is closed. Later lvm will reuse this closed fd to new opened devs, but the fd related data still in cache->errored and flags with BF_DIRTY. It makes the data may mistakenly write to another disk. Related master branch commits: 2938b4dcca0a1df661758abfab7f402ea7aab018 6b0d969b2a85ba69046afa26af4d7bcddddbccd5 5fdebf9bbf68a53b9d2153dbd8bf27a36e0ef3cd 25e7bf021a4e7c5ad5f925b86605bf025ff1a949 In stable-2.02 branch, bcache uses hash-table to manage blocks, but master branch had been switched to radix tree. This commit changed related (radix tree) functions to hash-table style. Signed-off-by: Zhao Heming <heming.zhao@suse.com> --- lib/device/bcache.c | 61 +++++++++++++++++++++++++++-- lib/device/bcache.h | 7 ++++ lib/label/label.c | 32 +++++++++------ test/unit/bcache_t.c | 92 +++++++++++++++++++++++++++++++++++++++++++- 4 files changed, 174 insertions(+), 18 deletions(-) diff -Nupr a/lib/device/bcache.c b/lib/device/bcache.c --- a/lib/device/bcache.c 2019-12-23 14:37:52.179166465 +0800 +++ b/lib/device/bcache.c 2019-12-23 14:40:11.803782520 +0800 @@ -1165,7 +1165,7 @@ static bool _invalidate_block(struct bca _wait_specific(b); if (b->error) - return false; + return false; } _recycle_block(cache, b); @@ -1186,18 +1186,71 @@ bool bcache_invalidate_fd(struct bcache bool r = true; // Start writing back any dirty blocks on this fd. - dm_list_iterate_items_safe (b, tmp, &cache->dirty) + dm_list_iterate_items_safe(b, tmp, &cache->dirty) if (b->fd == fd) _issue_write(b); _wait_all(cache); // Everything should be in the clean list now. - dm_list_iterate_items_safe (b, tmp, &cache->clean) + dm_list_iterate_items_safe(b, tmp, &cache->clean) if (b->fd == fd) r = _invalidate_block(cache, b) && r; - return r; + if (r) + _hash_remove(b); + + return r; +} + +//---------------------------------------------------------------- + +static bool _abort_v(struct block *b) +{ + if (b->ref_count) { + log_fatal("bcache_abort: block (%d, %llu) still held", + b->fd, (unsigned long long) b->index); + return true; + } + + _unlink_block(b); + dm_list_add(&b->cache->free, &b->list); + + // We can't remove the block from the radix tree yet because + // we're in the middle of an iteration. + return true; +} + +/* This function searches all impossible list to find out fd. */ +void bcache_abort_fd(struct bcache *cache, int fd) +{ + struct block *b, *tmp; + + dm_list_iterate_items_safe(b, tmp, &cache->errored) + if (b->fd == fd) { + _abort_v(b); + _hash_remove(b); + } + + dm_list_iterate_items_safe(b, tmp, &cache->dirty) + if (b->fd == fd) { + _abort_v(b); + _hash_remove(b); + } + + dm_list_iterate_items_safe(b, tmp, &cache->io_pending) + if (b->fd == fd) { + _abort_v(b); + _hash_remove(b); + } + + dm_list_iterate_items_safe(b, tmp, &cache->clean) + if (b->fd == fd) { + _abort_v(b); + _hash_remove(b); + } + + return; } //---------------------------------------------------------------- diff -Nupr a/lib/device/bcache.h b/lib/device/bcache.h --- a/lib/device/bcache.h 2019-12-23 14:37:52.179166465 +0800 +++ b/lib/device/bcache.h 2019-12-23 14:40:15.123797169 +0800 @@ -145,6 +145,13 @@ bool bcache_invalidate(struct bcache *ca */ bool bcache_invalidate_fd(struct bcache *cache, int fd); +/* + * Call this function if flush, or invalidate fail and you do not + * wish to retry the writes. This will throw away any dirty data + * not written. If any blocks for fd are held, then it will call + * abort(). + */ +void bcache_abort_fd(struct bcache *cache, int fd); //---------------------------------------------------------------- // The next four functions are utilities written in terms of the above api. diff -Nupr a/lib/label/label.c b/lib/label/label.c --- a/lib/label/label.c 2019-12-23 14:38:05.131223614 +0800 +++ b/lib/label/label.c 2019-12-23 14:40:27.739852834 +0800 @@ -592,6 +592,14 @@ static void _drop_bad_aliases(struct dev } } +// Like bcache_invalidate, only it throws any dirty data away if the +// write fails. +static void _invalidate_fd(struct bcache *cache, int fd) +{ + if (!bcache_invalidate_fd(cache, fd)) + bcache_abort_fd(cache, fd); +} + /* * Read or reread label/metadata from selected devs. * @@ -702,7 +710,7 @@ static int _scan_list(struct cmd_context * drop it from bcache. */ if (scan_failed || !is_lvm_device) { - bcache_invalidate_fd(scan_bcache, devl->dev->bcache_fd); + _invalidate_fd(scan_bcache, devl->dev->bcache_fd); _scan_dev_close(devl->dev); } @@ -859,7 +867,7 @@ int label_scan(struct cmd_context *cmd) * so this will usually not be true. */ if (_in_bcache(dev)) { - bcache_invalidate_fd(scan_bcache, dev->bcache_fd); + _invalidate_fd(scan_bcache, dev->bcache_fd); _scan_dev_close(dev); } @@ -943,7 +951,7 @@ int label_scan_pvscan_all(struct cmd_con * so this will usually not be true. */ if (_in_bcache(dev)) { - bcache_invalidate_fd(scan_bcache, dev->bcache_fd); + _invalidate_fd(scan_bcache, dev->bcache_fd); _scan_dev_close(dev); } @@ -1006,7 +1014,7 @@ int label_scan_devs(struct cmd_context * dm_list_iterate_items(devl, devs) { if (_in_bcache(devl->dev)) { - bcache_invalidate_fd(scan_bcache, devl->dev->bcache_fd); + _invalidate_fd(scan_bcache, devl->dev->bcache_fd); _scan_dev_close(devl->dev); } } @@ -1025,7 +1033,7 @@ int label_scan_devs_rw(struct cmd_contex dm_list_iterate_items(devl, devs) { if (_in_bcache(devl->dev)) { - bcache_invalidate_fd(scan_bcache, devl->dev->bcache_fd); + _invalidate_fd(scan_bcache, devl->dev->bcache_fd); _scan_dev_close(devl->dev); } @@ -1047,7 +1055,7 @@ int label_scan_devs_excl(struct dm_list dm_list_iterate_items(devl, devs) { if (_in_bcache(devl->dev)) { - bcache_invalidate_fd(scan_bcache, devl->dev->bcache_fd); + _invalidate_fd(scan_bcache, devl->dev->bcache_fd); _scan_dev_close(devl->dev); } /* @@ -1067,7 +1075,7 @@ int label_scan_devs_excl(struct dm_list void label_scan_invalidate(struct device *dev) { if (_in_bcache(dev)) { - bcache_invalidate_fd(scan_bcache, dev->bcache_fd); + _invalidate_fd(scan_bcache, dev->bcache_fd); _scan_dev_close(dev); } } @@ -1147,7 +1155,7 @@ int label_read(struct device *dev) dm_list_add(&one_dev, &devl->list); if (_in_bcache(dev)) { - bcache_invalidate_fd(scan_bcache, dev->bcache_fd); + _invalidate_fd(scan_bcache, dev->bcache_fd); _scan_dev_close(dev); } @@ -1253,7 +1261,7 @@ int label_scan_open_excl(struct device * if (_in_bcache(dev) && !(dev->flags & DEV_BCACHE_EXCL)) { /* FIXME: avoid tossing out bcache blocks just to replace fd. */ log_debug("Close and reopen excl %s", dev_name(dev)); - bcache_invalidate_fd(scan_bcache, dev->bcache_fd); + _invalidate_fd(scan_bcache, dev->bcache_fd); _scan_dev_close(dev); } dev->flags |= DEV_BCACHE_EXCL; @@ -1302,7 +1310,7 @@ bool dev_write_bytes(struct device *dev, if (!(dev->flags & DEV_BCACHE_WRITE)) { /* FIXME: avoid tossing out bcache blocks just to replace fd. */ log_debug("Close and reopen to write %s", dev_name(dev)); - bcache_invalidate_fd(scan_bcache, dev->bcache_fd); + _invalidate_fd(scan_bcache, dev->bcache_fd); _scan_dev_close(dev); dev->flags |= DEV_BCACHE_WRITE; @@ -1350,7 +1358,7 @@ bool dev_write_zeros(struct device *dev, if (!(dev->flags & DEV_BCACHE_WRITE)) { /* FIXME: avoid tossing out bcache blocks just to replace fd. */ log_debug("Close and reopen to write %s", dev_name(dev)); - bcache_invalidate_fd(scan_bcache, dev->bcache_fd); + _invalidate_fd(scan_bcache, dev->bcache_fd); _scan_dev_close(dev); dev->flags |= DEV_BCACHE_WRITE; @@ -1401,7 +1409,7 @@ bool dev_set_bytes(struct device *dev, u if (!(dev->flags & DEV_BCACHE_WRITE)) { /* FIXME: avoid tossing out bcache blocks just to replace fd. */ log_debug("Close and reopen to write %s", dev_name(dev)); - bcache_invalidate_fd(scan_bcache, dev->bcache_fd); + _invalidate_fd(scan_bcache, dev->bcache_fd); _scan_dev_close(dev); dev->flags |= DEV_BCACHE_WRITE; diff -Nupr a/test/unit/bcache_t.c b/test/unit/bcache_t.c --- a/test/unit/bcache_t.c 2019-12-23 14:38:19.691287854 +0800 +++ b/test/unit/bcache_t.c 2019-12-23 14:40:45.459931020 +0800 @@ -794,7 +794,6 @@ static void test_invalidate_after_write_ static void test_invalidate_held_block(void *context) { - struct fixture *f = context; struct mock_engine *me = f->me; struct bcache *cache = f->cache; @@ -811,6 +810,89 @@ static void test_invalidate_held_block(v } //---------------------------------------------------------------- +// abort tests +static void test_abort_no_blocks(void *context) +{ + struct fixture *f = context; + struct bcache *cache = f->cache; + int fd = 17; + + // We have no expectations + bcache_abort_fd(cache, fd); +} + +static void test_abort_single_block(void *context) +{ + struct fixture *f = context; + struct bcache *cache = f->cache; + struct block *b; + int fd = 17; + + T_ASSERT(bcache_get(cache, fd, 0, GF_ZERO, &b)); + bcache_put(b); + + bcache_abort_fd(cache, fd); + + // no write should be issued + T_ASSERT(bcache_flush(cache)); +} + +static void test_abort_forces_reread(void *context) +{ + struct fixture *f = context; + struct mock_engine *me = f->me; + struct bcache *cache = f->cache; + struct block *b; + int fd = 17; + + _expect_read(me, fd, 0); + _expect(me, E_WAIT); + T_ASSERT(bcache_get(cache, fd, 0, GF_DIRTY, &b)); + bcache_put(b); + + bcache_abort_fd(cache, fd); + T_ASSERT(bcache_flush(cache)); + + // Check the block is re-read + _expect_read(me, fd, 0); + _expect(me, E_WAIT); + T_ASSERT(bcache_get(cache, fd, 0, 0, &b)); + bcache_put(b); +} + +static void test_abort_only_specific_fd(void *context) +{ + struct fixture *f = context; + struct mock_engine *me = f->me; + struct bcache *cache = f->cache; + struct block *b; + int fd1 = 17, fd2 = 18; + + T_ASSERT(bcache_get(cache, fd1, 0, GF_ZERO, &b)); + bcache_put(b); + + T_ASSERT(bcache_get(cache, fd1, 1, GF_ZERO, &b)); + bcache_put(b); + + T_ASSERT(bcache_get(cache, fd2, 0, GF_ZERO, &b)); + bcache_put(b); + + T_ASSERT(bcache_get(cache, fd2, 1, GF_ZERO, &b)); + bcache_put(b); + + bcache_abort_fd(cache, fd2); + + // writes for fd1 should still be issued + _expect_write(me, fd1, 0); + _expect_write(me, fd1, 1); + + _expect(me, E_WAIT); + _expect(me, E_WAIT); + + T_ASSERT(bcache_flush(cache)); +} + +//---------------------------------------------------------------- // Chasing a bug reported by dct static void _cycle(struct fixture *f, unsigned nr_cache_blocks) @@ -898,6 +980,12 @@ static struct test_suite *_small_tests(v T("invalidate-read-error", "invalidate a block that errored", test_invalidate_after_read_error); T("invalidate-write-error", "invalidate a block that errored", test_invalidate_after_write_error); T("invalidate-fails-in-held", "invalidating a held block fails", test_invalidate_held_block); + + T("abort-with-no-blocks", "you can call abort, even if there are no blocks in the cache", test_abort_no_blocks); + T("abort-single-block", "single block get silently discarded", test_abort_single_block); + T("abort-forces-read", "if a block has been discarded then another read is necc.", test_abort_forces_reread); + T("abort-specific-fd", "abort doesn't effect other fds", test_abort_only_specific_fd); + T("concurrent-reads-after-invalidate", "prefetch should still issue concurrent reads after invalidate", test_concurrent_reads_after_invalidate); @@ -919,7 +1007,7 @@ static struct test_suite *_large_tests(v void bcache_tests(struct dm_list *all_tests) { - dm_list_add(all_tests, &_tiny_tests()->list); + dm_list_add(all_tests, &_tiny_tests()->list); dm_list_add(all_tests, &_small_tests()->list); dm_list_add(all_tests, &_large_tests()->list); }
Locations
Projects
Search
Status Monitor
Help
OpenBuildService.org
Documentation
API Documentation
Code of Conduct
Contact
Support
@OBShq
Terms
openSUSE Build Service is sponsored by
The Open Build Service is an
openSUSE project
.
Sign Up
Log In
Places
Places
All Projects
Status Monitor