From c6315ba66d795b73caac9731e11803894e454f0b Mon Sep 17 00:00:00 2001 From: raychu86 <14917648+raychu86@users.noreply.github.com> Date: Wed, 13 Mar 2024 18:02:19 -0700 Subject: [PATCH 1/3] Add tracker for num_blocks_behind --- node/bft/src/sync/mod.rs | 5 +++++ node/sync/src/block_sync.rs | 13 ++++++++++++- 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/node/bft/src/sync/mod.rs b/node/bft/src/sync/mod.rs index cf01843ed2..dbc27dbb9b 100644 --- a/node/bft/src/sync/mod.rs +++ b/node/bft/src/sync/mod.rs @@ -379,6 +379,11 @@ impl Sync { self.block_sync.is_block_synced() } + /// Returns the number of blocks the node is behind the greatest peer height. + pub fn num_blocks_behind(&self) -> u32 { + self.block_sync.num_blocks_behind() + } + /// Returns `true` if the node is in gateway mode. pub const fn is_gateway_mode(&self) -> bool { self.block_sync.mode().is_gateway() diff --git a/node/sync/src/block_sync.rs b/node/sync/src/block_sync.rs index 97e05561bd..8ccbd686c9 100644 --- a/node/sync/src/block_sync.rs +++ b/node/sync/src/block_sync.rs @@ -30,7 +30,7 @@ use std::{ collections::BTreeMap, net::{IpAddr, Ipv4Addr, SocketAddr}, sync::{ - atomic::{AtomicBool, Ordering}, + atomic::{AtomicBool, AtomicU32, Ordering}, Arc, }, time::Instant, @@ -109,6 +109,8 @@ pub struct BlockSync { request_timeouts: Arc>>>, /// The boolean indicator of whether the node is synced up to the latest block (within the given tolerance). is_block_synced: Arc, + /// The number of blocks the peer is behind the greatest peer height. + num_blocks_behind: Arc, /// The lock to guarantee advance_with_sync_blocks() is called only once at a time. advance_with_sync_blocks_lock: Arc>, } @@ -126,6 +128,7 @@ impl BlockSync { request_timestamps: Default::default(), request_timeouts: Default::default(), is_block_synced: Default::default(), + num_blocks_behind: Default::default(), advance_with_sync_blocks_lock: Default::default(), } } @@ -141,6 +144,12 @@ impl BlockSync { pub fn is_block_synced(&self) -> bool { self.is_block_synced.load(Ordering::SeqCst) } + + /// Returns the number of blocks the node is behind the greatest peer height. + #[inline] + pub fn num_blocks_behind(&self) -> u32 { + self.num_blocks_behind.load(Ordering::SeqCst) + } } #[allow(dead_code)] @@ -439,6 +448,8 @@ impl BlockSync { let num_blocks_behind = greatest_peer_height.saturating_sub(canon_height); // Determine if the primary is synced. let is_synced = num_blocks_behind <= max_blocks_behind; + // Update the num blocks behind. + self.num_blocks_behind.store(num_blocks_behind, Ordering::SeqCst); // Update the sync status. self.is_block_synced.store(is_synced, Ordering::SeqCst); } From 191f00b3e846fe3c504615a1e637652028ea456c Mon Sep 17 00:00:00 2001 From: raychu86 <14917648+raychu86@users.noreply.github.com> Date: Wed, 13 Mar 2024 18:03:30 -0700 Subject: [PATCH 2/3] Start processing certificates earlier --- node/bft/src/primary.rs | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/node/bft/src/primary.rs b/node/bft/src/primary.rs index f91cd52bf4..1fefc3db98 100644 --- a/node/bft/src/primary.rs +++ b/node/bft/src/primary.rs @@ -40,6 +40,7 @@ use crate::{ use snarkos_account::Account; use snarkos_node_bft_events::PrimaryPing; use snarkos_node_bft_ledger_service::LedgerService; +use snarkos_node_sync::MAX_BLOCKS_BEHIND; use snarkvm::{ console::{ account::Signature, @@ -1005,8 +1006,9 @@ impl Primary { let self_ = self.clone(); self.spawn(async move { while let Some((peer_ip, batch_certificate)) = rx_batch_certified.recv().await { - // If the primary is not synced, then do not store the certificate. - if !self_.sync.is_synced() { + // If the primary is not synced and lagging by more than `MAX_BLOCKS_BEHIND`, then do not store the certificate. + // This allows us to start processing the certificate as soon as we are within `MAX_BLOCKS_BEHIND` blocks. + if !self_.sync.is_synced() && self_.sync.num_blocks_behind() > MAX_BLOCKS_BEHIND { trace!("Skipping a certified batch from '{peer_ip}' {}", "(node is syncing)".dimmed()); continue; } From acc1d753abd84008fde51b9e6de20cc29d1ee330 Mon Sep 17 00:00:00 2001 From: raychu86 <14917648+raychu86@users.noreply.github.com> Date: Thu, 14 Mar 2024 13:39:32 -0700 Subject: [PATCH 3/3] Fix leniency --- node/bft/src/primary.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/node/bft/src/primary.rs b/node/bft/src/primary.rs index 1fefc3db98..6d3c7fef60 100644 --- a/node/bft/src/primary.rs +++ b/node/bft/src/primary.rs @@ -1006,9 +1006,9 @@ impl Primary { let self_ = self.clone(); self.spawn(async move { while let Some((peer_ip, batch_certificate)) = rx_batch_certified.recv().await { - // If the primary is not synced and lagging by more than `MAX_BLOCKS_BEHIND`, then do not store the certificate. - // This allows us to start processing the certificate as soon as we are within `MAX_BLOCKS_BEHIND` blocks. - if !self_.sync.is_synced() && self_.sync.num_blocks_behind() > MAX_BLOCKS_BEHIND { + // If the primary is not synced and lagging by more than `MAX_BLOCKS_BEHIND + 1`, then do not store the certificate. + // This allows us to start processing the certificate as soon as we are within `MAX_BLOCKS_BEHIND + 1` blocks. + if !self_.sync.is_synced() && self_.sync.num_blocks_behind() > MAX_BLOCKS_BEHIND.saturating_add(1) { trace!("Skipping a certified batch from '{peer_ip}' {}", "(node is syncing)".dimmed()); continue; }