From 5a5b01137917b9e4c40c849b39c929345adf33fe Mon Sep 17 00:00:00 2001 From: David Date: Fri, 24 May 2024 16:14:12 -0400 Subject: [PATCH 1/2] Add option to specify chunk size --- replibyte/src/cli.rs | 3 +++ replibyte/src/commands/dump.rs | 1 + replibyte/src/source/mod.rs | 1 + replibyte/src/tasks/full_dump.rs | 5 +++-- 4 files changed, 8 insertions(+), 2 deletions(-) diff --git a/replibyte/src/cli.rs b/replibyte/src/cli.rs index 50910299..c0447d31 100644 --- a/replibyte/src/cli.rs +++ b/replibyte/src/cli.rs @@ -110,6 +110,9 @@ pub struct DumpCreateArgs { /// dump name #[clap(short, long)] pub name: Option, + /// chunk size, default to 100 + #[clap(short, long)] + pub chunk_size: Option, } #[derive(Args, Debug)] diff --git a/replibyte/src/commands/dump.rs b/replibyte/src/commands/dump.rs index 22eb765a..a9237ef4 100644 --- a/replibyte/src/commands/dump.rs +++ b/replibyte/src/commands/dump.rs @@ -132,6 +132,7 @@ where skip_config: &skip_config, database_subset: &source.database_subset, only_tables: &only_tables_config, + chunk_size: args.chunk_size, }; match args.source_type.as_ref().map(|x| x.as_str()) { diff --git a/replibyte/src/source/mod.rs b/replibyte/src/source/mod.rs index 694e3447..e19f0645 100644 --- a/replibyte/src/source/mod.rs +++ b/replibyte/src/source/mod.rs @@ -29,4 +29,5 @@ pub struct SourceOptions<'a> { pub skip_config: &'a Vec, pub database_subset: &'a Option, pub only_tables: &'a Vec, + pub chunk_size: Option, } diff --git a/replibyte/src/tasks/full_dump.rs b/replibyte/src/tasks/full_dump.rs index 907d271a..bf6961d1 100644 --- a/replibyte/src/tasks/full_dump.rs +++ b/replibyte/src/tasks/full_dump.rs @@ -69,8 +69,9 @@ where Ok(()) }); - // buffer of 100MB in memory to use and re-use to upload data into datastore - let buffer_size = 100 * 1024 * 1024; + // buffer default of 100MB (unless specified) in memory to use and re-use to upload data into datastore + let chunk_size = self.options.chunk_size.unwrap_or(100); + let buffer_size = chunk_size * 1024 * 1024; let mut queries = vec![]; let mut consumed_buffer_size = 0usize; let mut total_transferred_bytes = 0usize; From 1aa0e09a6351d9ac401777aec44e704e3cbdc3ea Mon Sep 17 00:00:00 2001 From: David Date: Sun, 26 May 2024 22:36:57 -0400 Subject: [PATCH 2/2] Add default option and fix tests --- replibyte/src/commands/dump.rs | 2 +- replibyte/src/source/mod.rs | 2 +- replibyte/src/source/mongodb.rs | 3 +++ replibyte/src/source/mysql.rs | 3 +++ replibyte/src/source/postgres.rs | 7 +++++++ 5 files changed, 15 insertions(+), 2 deletions(-) diff --git a/replibyte/src/commands/dump.rs b/replibyte/src/commands/dump.rs index a9237ef4..34230d85 100644 --- a/replibyte/src/commands/dump.rs +++ b/replibyte/src/commands/dump.rs @@ -132,7 +132,7 @@ where skip_config: &skip_config, database_subset: &source.database_subset, only_tables: &only_tables_config, - chunk_size: args.chunk_size, + chunk_size: &args.chunk_size, }; match args.source_type.as_ref().map(|x| x.as_str()) { diff --git a/replibyte/src/source/mod.rs b/replibyte/src/source/mod.rs index e19f0645..504ea48c 100644 --- a/replibyte/src/source/mod.rs +++ b/replibyte/src/source/mod.rs @@ -29,5 +29,5 @@ pub struct SourceOptions<'a> { pub skip_config: &'a Vec, pub database_subset: &'a Option, pub only_tables: &'a Vec, - pub chunk_size: Option, + pub chunk_size: &'a Option, } diff --git a/replibyte/src/source/mongodb.rs b/replibyte/src/source/mongodb.rs index 5857febd..0ff6bd49 100644 --- a/replibyte/src/source/mongodb.rs +++ b/replibyte/src/source/mongodb.rs @@ -368,6 +368,7 @@ mod tests { skip_config: &vec![], database_subset: &None, only_tables: &vec![], + chunk_size: &None, }; assert!(p.read(source_options, |_, _| {}).is_ok()); @@ -380,6 +381,7 @@ mod tests { skip_config: &vec![], database_subset: &None, only_tables: &vec![], + chunk_size: &None, }; assert!(p.read(source_options, |_, _| {}).is_err()); @@ -395,6 +397,7 @@ mod tests { skip_config: &vec![], database_subset: &None, only_tables: &vec![], + chunk_size: &None, }; p.read(source_options, |original_query, query| { diff --git a/replibyte/src/source/mysql.rs b/replibyte/src/source/mysql.rs index 04dd820f..e06cf146 100644 --- a/replibyte/src/source/mysql.rs +++ b/replibyte/src/source/mysql.rs @@ -456,6 +456,7 @@ mod tests { skip_config: &vec![], database_subset: &None, only_tables: &vec![], + chunk_size: &None, }; assert!(p.read(source_options, |_original_query, _query| {}).is_ok()); @@ -468,6 +469,7 @@ mod tests { skip_config: &vec![], database_subset: &None, only_tables: &vec![], + chunk_size: &None, }; assert!(p .read(source_options, |_original_query, _query| {}) @@ -484,6 +486,7 @@ mod tests { skip_config: &vec![], database_subset: &None, only_tables: &vec![], + chunk_size: &None, }; let _ = p.read(source_options, |original_query, query| { assert!(original_query.data().len() > 0); diff --git a/replibyte/src/source/postgres.rs b/replibyte/src/source/postgres.rs index 60ebae72..ab9853a3 100644 --- a/replibyte/src/source/postgres.rs +++ b/replibyte/src/source/postgres.rs @@ -581,6 +581,7 @@ mod tests { skip_config: &vec![], database_subset: &None, only_tables: &vec![], + chunk_size: &None, }; assert!(p.read(source_options, |original_query, query| {}).is_ok()); @@ -593,6 +594,7 @@ mod tests { skip_config: &vec![], database_subset: &None, only_tables: &vec![], + chunk_size: &None, }; assert!(p.read(source_options, |original_query, query| {}).is_err()); @@ -608,6 +610,7 @@ mod tests { skip_config: &vec![], database_subset: &None, only_tables: &vec![], + chunk_size: &None, }; let _ = p.read(source_options, |original_query, query| { @@ -734,6 +737,7 @@ mod tests { skip_config: &vec![], database_subset: &None, only_tables: &vec![], + chunk_size: &None, }; let _ = p.read(source_options, |original_query, query| { @@ -775,6 +779,7 @@ mod tests { skip_config: &skip_config, database_subset: &None, only_tables: &vec![], + chunk_size: &None, }; let _ = p.read(source_options, |_original_query, query| { @@ -826,6 +831,7 @@ mod tests { passthrough_tables: None, }), only_tables: &vec![], + chunk_size: &None, }; let mut rows_percent_50 = vec![]; @@ -863,6 +869,7 @@ mod tests { passthrough_tables: None, }), only_tables: &vec![], + chunk_size: &None, }; let mut rows_percent_30 = vec![];