Sign Up
Log In
Log In
or
Sign Up
Places
All Projects
Status Monitor
Collapse sidebar
network:ha-clustering:Factory
drbd
0001-drbd-properly-rate-limit-resync-progress-r...
Overview
Repositories
Revisions
Requests
Users
Attributes
Meta
File 0001-drbd-properly-rate-limit-resync-progress-reports.patch of Package drbd
From aab03bfc73a62f95011316545a5c0fbb4817741b Mon Sep 17 00:00:00 2001 From: Lars Ellenberg <lars.ellenberg@linbit.com> Date: Wed, 14 Aug 2024 11:49:42 +0200 Subject: [PATCH 01/32] drbd: properly rate-limit resync progress reports A peer_device in "paused" sync would have flooded the "drbd events2" generic netlink broadcast with "resync progress reports", if it cleared significant out-of-sync bits, as is the case with application writes, or several peers syncing from the same sync source and having a "paused sync" replication state between themselves. If you have "many" such resources, this storm may even overflow receive buffers. At most one progress report every three seconds should be enough, and is what was intended. Use a new "last progress report time stamp" to throttle advancing resync progress marks and progress report broadcasts. --- drbd/drbd_actlog.c | 35 +++++++++++++++++++++++------------ drbd/drbd_int.h | 1 + drbd/drbd_receiver.c | 1 + drbd/drbd_state.c | 2 ++ 4 files changed, 27 insertions(+), 12 deletions(-) diff --git a/drbd/drbd_actlog.c b/drbd/drbd_actlog.c index b96560843878..646dcb29e1d9 100644 --- a/drbd/drbd_actlog.c +++ b/drbd/drbd_actlog.c @@ -1020,19 +1020,30 @@ static bool update_rs_extent(struct drbd_peer_device *peer_device, void drbd_advance_rs_marks(struct drbd_peer_device *peer_device, unsigned long still_to_go) { - unsigned long now = jiffies; - unsigned long last = peer_device->rs_mark_time[peer_device->rs_last_mark]; - int next = (peer_device->rs_last_mark + 1) % DRBD_SYNC_MARKS; - if (time_after_eq(now, last + DRBD_SYNC_MARK_STEP)) { - if (peer_device->rs_mark_left[peer_device->rs_last_mark] != still_to_go && - peer_device->repl_state[NOW] != L_PAUSED_SYNC_T && - peer_device->repl_state[NOW] != L_PAUSED_SYNC_S) { - peer_device->rs_mark_time[next] = now; - peer_device->rs_mark_left[next] = still_to_go; - peer_device->rs_last_mark = next; - } - drbd_peer_device_post_work(peer_device, RS_PROGRESS); + unsigned long now; + int next; + + /* report progress and advance marks only if we made progress */ + if (peer_device->rs_mark_left[peer_device->rs_last_mark] == still_to_go) + return; + + /* report progress and advance marks at most once every DRBD_SYNC_MARK_STEP (3 seconds) */ + now = jiffies; + if (!time_after_eq(now, peer_device->rs_last_progress_report_ts + DRBD_SYNC_MARK_STEP)) + return; + + /* Do not advance marks if we are "paused" */ + if (peer_device->repl_state[NOW] != L_PAUSED_SYNC_T && + peer_device->repl_state[NOW] != L_PAUSED_SYNC_S) { + next = (peer_device->rs_last_mark + 1) % DRBD_SYNC_MARKS; + peer_device->rs_mark_time[next] = now; + peer_device->rs_mark_left[next] = still_to_go; + peer_device->rs_last_mark = next; } + + /* But still report progress even if paused. */ + peer_device->rs_last_progress_report_ts = now; + drbd_peer_device_post_work(peer_device, RS_PROGRESS); } /* It is called lazy update, so don't do write-out too often. */ diff --git a/drbd/drbd_int.h b/drbd/drbd_int.h index 49bd7b0c407c..c18407899f59 100644 --- a/drbd/drbd_int.h +++ b/drbd/drbd_int.h @@ -1285,6 +1285,7 @@ struct drbd_peer_device { unsigned long rs_paused; /* skipped because csum was equal [unit BM_BLOCK_SIZE] */ unsigned long rs_same_csum; + unsigned long rs_last_progress_report_ts; #define DRBD_SYNC_MARKS 8 #define DRBD_SYNC_MARK_STEP (3*HZ) /* block not up-to-date at mark [unit BM_BLOCK_SIZE] */ diff --git a/drbd/drbd_receiver.c b/drbd/drbd_receiver.c index 19634f6423bd..ee54cf3ac116 100644 --- a/drbd/drbd_receiver.c +++ b/drbd/drbd_receiver.c @@ -3409,6 +3409,7 @@ static int receive_DataRequest(struct drbd_connection *connection, struct packet peer_device->ov_skipped = 0; peer_device->rs_total = ov_left; peer_device->rs_last_writeout = now; + peer_device->rs_last_progress_report_ts = now; for (i = 0; i < DRBD_SYNC_MARKS; i++) { peer_device->rs_mark_left[i] = ov_left; peer_device->rs_mark_time[i] = now; diff --git a/drbd/drbd_state.c b/drbd/drbd_state.c index be1de8f0653b..44f55ee5c939 100644 --- a/drbd/drbd_state.c +++ b/drbd/drbd_state.c @@ -2483,6 +2483,7 @@ static void initialize_resync_progress_marks(struct drbd_peer_device *peer_devic unsigned long now = jiffies; int i; + peer_device->rs_last_progress_report_ts = now; for (i = 0; i < DRBD_SYNC_MARKS; i++) { peer_device->rs_mark_left[i] = tw; peer_device->rs_mark_time[i] = now; @@ -2730,6 +2731,7 @@ static void finish_state_change(struct drbd_resource *resource, const char *tag) peer_device->ov_last_skipped_size = 0; peer_device->ov_last_skipped_start = 0; peer_device->rs_last_writeout = now; + peer_device->rs_last_progress_report_ts = now; for (i = 0; i < DRBD_SYNC_MARKS; i++) { peer_device->rs_mark_left[i] = peer_device->rs_total; peer_device->rs_mark_time[i] = now; -- 2.35.3
Locations
Projects
Search
Status Monitor
Help
OpenBuildService.org
Documentation
API Documentation
Code of Conduct
Contact
Support
@OBShq
Terms
openSUSE Build Service is sponsored by
The Open Build Service is an
openSUSE project
.
Sign Up
Log In
Places
Places
All Projects
Status Monitor