Sign Up
Log In
Log In
or
Sign Up
Places
All Projects
Status Monitor
Collapse sidebar
openSUSE:Leap:15.5:Update
drbd.24683
fix-stuck-resync-when-cancelled.patch
Overview
Repositories
Revisions
Requests
Users
Attributes
Meta
File fix-stuck-resync-when-cancelled.patch of Package drbd.24683
{ "version": "drbd-9.0.17-0rc1~13", "commit": "f571cabed0e2955cc3b3bf5ba89fb371780bf0c6", "comment": "drbd: Improve the resync controller for fast back end devices and network", "author": "Philipp Reisner <philipp.reisner@linbit.com>", "date": "Tue Jan 29 12:45:31 2019 +0100" } { "version": "drbd-9.0.19-1~30", "commit": "c2d3d9150402c71ae94d3146a7de0c10a3b6e25e", "comment": "drbd: Fix stuck resync when many resync requests are cancelled", "author": "Joel Colledge <joel.colledge@linbit.com>", "date": "Mon May 27 11:49:29 2019 +0200" } { "version": "drbd-9.0.25-1~2", "commit": "eceb2bc40a31f06acdd8d3d12dd36156934ede04", "comment": "drbd: Fix handing of P_NEG_RS_DREPLY packet", "author": "Philipp Reisner <philipp.reisner@linbit.com>", "date": "Tue Sep 22 11:42:08 2020 +0200" } diff -Naur drbd-9.0.14+git.62f906cf.orig/drbd/drbd_int.h drbd-9.0.14+git.62f906cf.test/drbd/drbd_int.h --- drbd-9.0.14+git.62f906cf.orig/drbd/drbd_int.h 2021-09-10 13:54:03.216030195 +0800 +++ drbd-9.0.14+git.62f906cf.test/drbd/drbd_int.h 2021-09-10 13:57:52.775120933 +0800 @@ -1155,6 +1155,7 @@ int rs_last_events; /* counter of read or write "events" (unit sectors) * on the lower level device when we last looked. */ int rs_in_flight; /* resync sectors in flight (to proxy, in proxy and from proxy) */ + unsigned long rs_last_mk_req_jif; unsigned long ov_left; /* in bits */ unsigned long ov_skipped; /* in bits */ diff -Naur drbd-9.0.14+git.62f906cf.orig/drbd/drbd_receiver.c drbd-9.0.14+git.62f906cf.test/drbd/drbd_receiver.c --- drbd-9.0.14+git.62f906cf.orig/drbd/drbd_receiver.c 2021-09-10 13:54:03.216030195 +0800 +++ drbd-9.0.14+git.62f906cf.test/drbd/drbd_receiver.c 2021-09-10 14:13:28.475414699 +0800 @@ -217,6 +217,17 @@ return NULL; } +static void rs_sectors_came_in(struct drbd_peer_device *peer_device, int size) +{ + int rs_sect_in = atomic_add_return(size >> 9, &peer_device->rs_sect_in); + + /* In case resync runs faster than anticipated, run the resync_work early */ + if (rs_sect_in >= peer_device->rs_in_flight) + drbd_queue_work_if_unqueued( + &peer_device->connection->sender_work, + &peer_device->resync_work); +} + /* kick lower level device, if we have more than (arbitrary number) * reference counts on it, which typically are locally submitted io * requests. don't use unacked_cnt, so we speed up proto A and B, too. */ @@ -2174,7 +2185,7 @@ drbd_send_ack_dp(peer_device, P_NEG_ACK, &d); } - atomic_add(d.bi_size >> 9, &peer_device->rs_sect_in); + rs_sectors_came_in(peer_device, d.bi_size); return err; } @@ -3082,7 +3093,7 @@ peer_device->use_csums = true; } else if (pi->cmd == P_OV_REPLY) { /* track progress, we may need to throttle */ - atomic_add(size >> 9, &peer_device->rs_sect_in); + rs_sectors_came_in(peer_device, size); peer_req->w.cb = w_e_end_ov_reply; dec_rs_pending(peer_device); /* drbd_rs_begin_io done when we sent this request, @@ -7314,7 +7325,7 @@ drbd_send_ack_ex(peer_device, P_NEG_ACK, sector, size, ID_SYNCER); } - atomic_add(size >> 9, &peer_device->rs_sect_in); + rs_sectors_came_in(peer_device, size); return err; } @@ -8182,7 +8193,7 @@ put_ldev(device); } dec_rs_pending(peer_device); - atomic_add(blksize >> 9, &peer_device->rs_sect_in); + rs_sectors_came_in(peer_device, blksize); return 0; } @@ -8355,12 +8366,12 @@ mutex_unlock(&device->bm_resync_fo_mutex); } - atomic_add(size >> 9, &peer_device->rs_sect_in); - mod_timer(&peer_device->resync_timer, jiffies + SLEEP_TIME); break; default: BUG(); } + rs_sectors_came_in(peer_device, size); + mod_timer(&peer_device->resync_timer, jiffies + SLEEP_TIME); put_ldev(device); } diff -Naur drbd-9.0.14+git.62f906cf.orig/drbd/drbd_sender.c drbd-9.0.14+git.62f906cf.test/drbd/drbd_sender.c --- drbd-9.0.14+git.62f906cf.orig/drbd/drbd_sender.c 2021-09-10 13:54:03.216030195 +0800 +++ drbd-9.0.14+git.62f906cf.test/drbd/drbd_sender.c 2021-09-10 16:11:54.594827839 +0800 @@ -551,7 +551,7 @@ return fb; } -static int drbd_rs_controller(struct drbd_peer_device *peer_device, unsigned int sect_in) +static int drbd_rs_controller(struct drbd_peer_device *peer_device, unsigned int sect_in, unsigned long duration) { struct peer_device_conf *pdc; unsigned int want; /* The number of sectors we want in-flight */ @@ -563,6 +563,13 @@ int max_sect; struct fifo_buffer *plan; + if (duration == 0) + duration = 1; + else if (duration > SLEEP_TIME * 10) + duration = SLEEP_TIME * 10; + + sect_in = (u64)sect_in * SLEEP_TIME / duration; + pdc = rcu_dereference(peer_device->conf); plan = rcu_dereference(peer_device->rs_plan_s); @@ -572,7 +579,7 @@ want = ((pdc->resync_rate * 2 * SLEEP_TIME) / HZ) * steps; } else { /* normal path */ want = pdc->c_fill_target ? pdc->c_fill_target : - sect_in * pdc->c_delay_target * HZ / (SLEEP_TIME * 10); + sect_in * pdc->c_delay_target * HZ / (duration * 10); } correction = want - peer_device->rs_in_flight - plan->total; @@ -590,12 +597,12 @@ if (req_sect < 0) req_sect = 0; - max_sect = (pdc->c_max_rate * 2 * SLEEP_TIME) / HZ; + max_sect = (pdc->c_max_rate * 2 * duration) / HZ; if (req_sect > max_sect) req_sect = max_sect; /* - drbd_warn(device, "si=%u if=%d wa=%u co=%d st=%d cps=%d pl=%d cc=%d rs=%d\n", + drbd_warn(device, "si=%llu if=%d wa=%u co=%d st=%d cps=%d pl=%d cc=%d rs=%d\n", sect_in, peer_device->rs_in_flight, want, correction, steps, cps, peer_device->rs_planed, curr_corr, req_sect); */ @@ -606,17 +613,21 @@ static int drbd_rs_number_requests(struct drbd_peer_device *peer_device) { struct net_conf *nc; + unsigned long duration, now; unsigned int sect_in; /* Number of sectors that came in since the last turn */ int number, mxb; sect_in = atomic_xchg(&peer_device->rs_sect_in, 0); peer_device->rs_in_flight -= sect_in; + now = jiffies; + duration = now - peer_device->rs_last_mk_req_jif; + rcu_read_lock(); nc = rcu_dereference(peer_device->connection->transport.net_conf); mxb = nc ? nc->max_buffers : 0; if (rcu_dereference(peer_device->rs_plan_s)->size) { - number = drbd_rs_controller(peer_device, sect_in) >> (BM_BLOCK_SHIFT - 9); + number = drbd_rs_controller(peer_device, sect_in, duration) >> (BM_BLOCK_SHIFT - 9); peer_device->c_sync_rate = number * HZ * (BM_BLOCK_SIZE / 1024) / SLEEP_TIME; } else { peer_device->c_sync_rate = rcu_dereference(peer_device->conf)->resync_rate; @@ -648,8 +659,8 @@ const sector_t capacity = drbd_get_capacity(device->this_bdev); int max_bio_size; int number, rollback_i, size; - int align, requeue = 0; - int i = 0; + int align; + int i; int discard_granularity = 0; if (unlikely(cancel)) @@ -678,10 +689,9 @@ max_bio_size = queue_max_hw_sectors(device->rq_queue) << 9; number = drbd_rs_number_requests(peer_device); - if (number <= 0) - goto requeue; for (i = 0; i < number; i++) { + bool send_buffer_ok = true; /* Stop generating RS requests, when half of the send buffer is filled */ mutex_lock(&peer_device->connection->mutex[DATA_STREAM]); if (transport->ops->stream_ok(transport, DATA_STREAM)) { @@ -692,14 +702,14 @@ queued = transport_stats.send_buffer_used; sndbuf = transport_stats.send_buffer_size; if (queued > sndbuf / 2) { - requeue = 1; + send_buffer_ok = false; transport->ops->hint(transport, DATA_STREAM, NOSPACE); } } else - requeue = 1; + send_buffer_ok = false; mutex_unlock(&peer_device->connection->mutex[DATA_STREAM]); - if (requeue) - goto requeue; + if (!send_buffer_ok) + goto request_done; next_sector: size = BM_BLOCK_SIZE; @@ -707,24 +717,22 @@ if (bit == DRBD_END_OF_BITMAP) { device->bm_resync_fo = drbd_bm_bits(device); - put_ldev(device); - return 0; + goto request_done; } sector = BM_BIT_TO_SECT(bit); if (drbd_try_rs_begin_io(peer_device, sector, true)) { device->bm_resync_fo = bit; - goto requeue; + goto request_done; } - device->bm_resync_fo = bit + 1; if (unlikely(drbd_bm_test_bit(peer_device, bit) == 0)) { + device->bm_resync_fo = bit + 1; drbd_rs_complete_io(peer_device, sector); goto next_sector; } -#if DRBD_MAX_BIO_SIZE > BM_BLOCK_SIZE /* try to find some adjacent bits. * we stop if we have already the maximum req size. * @@ -733,7 +741,7 @@ */ align = 1; rollback_i = i; - while (i < number) { + while (i + 1 < number) { if (size + BM_BLOCK_SIZE > max_bio_size) break; @@ -760,11 +768,8 @@ align++; i++; } - /* if we merged some, - * reset the offset to start the next drbd_bm_find_next from */ - if (size > BM_BLOCK_SIZE) - device->bm_resync_fo = bit + 1; -#endif + /* set the offset to start the next drbd_bm_find_next from */ + device->bm_resync_fo = bit + 1; /* adjust very last sectors, in case we are oddly sized */ if (sector + (size>>9) > capacity) @@ -779,7 +784,7 @@ drbd_rs_complete_io(peer_device, sector); device->bm_resync_fo = BM_SECT_TO_BIT(sector); i = rollback_i; - goto requeue; + goto request_done; case 0: /* everything ok */ break; @@ -802,6 +807,10 @@ } } +request_done: + /* ... but do a correction, in case we had to break/goto request_done; */ + peer_device->rs_in_flight -= (number - i) * BM_SECT_PER_BIT; + if (device->bm_resync_fo >= drbd_bm_bits(device)) { /* last syncer _request_ was sent, * but the P_RS_DATA_REPLY not yet received. sync will end (and @@ -813,7 +822,6 @@ return 0; } - requeue: peer_device->rs_in_flight += (i << (BM_BLOCK_SHIFT - 9)); mod_timer(&peer_device->resync_timer, jiffies + SLEEP_TIME); put_ldev(device); @@ -1806,6 +1814,7 @@ atomic_set(&peer_device->rs_sect_in, 0); atomic_set(&peer_device->device->rs_sect_ev, 0); /* FIXME: ??? */ + peer_device->rs_last_mk_req_jif = jiffies; peer_device->rs_in_flight = 0; peer_device->rs_last_events = drbd_backing_bdev_events(peer_device->device->ldev->backing_bdev->bd_contains->bd_disk);
Locations
Projects
Search
Status Monitor
Help
OpenBuildService.org
Documentation
API Documentation
Code of Conduct
Contact
Support
@OBShq
Terms
openSUSE Build Service is sponsored by
The Open Build Service is an
openSUSE project
.
Sign Up
Log In
Places
Places
All Projects
Status Monitor