From b29c7b47748caedb1ddeb12bfa4c4cc5e6fa8a93 Mon Sep 17 00:00:00 2001 From: Nathan <95725385+treefern@users.noreply.github.com> Date: Thu, 25 Jul 2024 06:00:28 +0000 Subject: [PATCH 1/3] NPI-3421 additional comments and docstring detail on SP3 POS nodata handling, to clarify that all components must be 0 to indicate a nodata value. --- gnssanalysis/gn_io/sp3.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/gnssanalysis/gn_io/sp3.py b/gnssanalysis/gn_io/sp3.py index a52abf8..5417891 100644 --- a/gnssanalysis/gn_io/sp3.py +++ b/gnssanalysis/gn_io/sp3.py @@ -50,16 +50,23 @@ def sp3_pos_nodata_to_nan( """ Converts the SP3 Positional column's nodata values (0.000000) to NaNs. See https://files.igs.org/pub/data/format/sp3_docu.txt + Note: these values represent a vector giving the satellite's position relative to the centre of Earth. + It is theoretically possible for up to two of these values to be 0, and still represent a valid + position. + Therefore, we only consider a value to be nodata if ALL components of the vector (X,Y,Z) are 0. :param _pd.DataFrame sp3_df: SP3 data frame to filter nodata values for :return None """ + # Create a mask for the index values (rows if you will) where the *complete* POS vector (X, Y, Z) is nodata + # Note the use of & here to logically AND together the three binary masks. nan_mask = ( (sp3_df[("EST", "X")] == SP3_POS_NODATA_NUMERIC) & (sp3_df[("EST", "Y")] == SP3_POS_NODATA_NUMERIC) & (sp3_df[("EST", "Z")] == SP3_POS_NODATA_NUMERIC) ) - sp3_df.loc[nan_mask, [("EST", "X"), ("EST", "Y"), ("EST", "Z")]] = _np.NAN + # For all index values where the entire POS vector (X, Y and Z components) are 0, set all components to NaN. + sp3_df.loc[nan_mask, [("EST", "X"), ("EST", "Y"), ("EST", "Z")]] = _np.nan def sp3_clock_nodata_to_nan( From 96efa466bba28907136a43be71945d710dfe8cd5 Mon Sep 17 00:00:00 2001 From: Nathan <95725385+treefern@users.noreply.github.com> Date: Fri, 26 Jul 2024 04:29:01 +0000 Subject: [PATCH 2/3] NPI-3421 fix format handling of STD nodata values (formatter was not getting invoked because the specified column names to use it for, were missing the underscore). --- gnssanalysis/gn_io/sp3.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/gnssanalysis/gn_io/sp3.py b/gnssanalysis/gn_io/sp3.py index 5417891..40f2eeb 100644 --- a/gnssanalysis/gn_io/sp3.py +++ b/gnssanalysis/gn_io/sp3.py @@ -73,7 +73,7 @@ def sp3_clock_nodata_to_nan( sp3_df: _pd.DataFrame ) -> None: """ - Converts the SP3 Clock column's nodata values (999999 or 999999.999999 - the fractional component optional) to NaNs. + Converts the SP3 Clock column's nodata values (999999 or 999999.999999 - fractional component optional) to NaNs. See https://files.igs.org/pub/data/format/sp3_docu.txt :param _pd.DataFrame sp3_df: SP3 data frame to filter nodata values for @@ -395,6 +395,8 @@ def clk_formatter(x): # NOTE: the following formatters are fine, as the nodata value is actually a *numeric value*, # so DataFrame.to_string() will invoke them for those values. + # TODO A future improvement would be to use NaN rather than specific integer values, as this is an internal + # only representation. def pos_std_formatter(x): # We use -100 as our integer NaN/"missing" marker if x <= SP3_POS_STD_NODATA: @@ -413,10 +415,10 @@ def clk_std_formatter(x): "Y": pos_formatter, "Z": pos_formatter, "CLK": clk_formatter, # Can't handle CLK nodata (Inf or NaN). Handled prior to invoking DataFrame.to_string() - "STDX": pos_std_formatter, # Nodata is represented as an integer, so can be handled here. - "STDY": pos_std_formatter, - "STDZ": pos_std_formatter, - "STDCLK": clk_std_formatter, # ditto above + "STD_X": pos_std_formatter, # Nodata is represented as an integer, so can be handled here. + "STD_Y": pos_std_formatter, + "STD_Z": pos_std_formatter, + "STD_CLK": clk_std_formatter, # ditto above } for epoch, epoch_vals in out_df.reset_index("PRN").groupby(axis=0, level="J2000"): # Format and write out the epoch in the SP3 format From c6687e89e44d19d76e1ed6d70b5bc177990b8826 Mon Sep 17 00:00:00 2001 From: Nathan <95725385+treefern@users.noreply.github.com> Date: Fri, 26 Jul 2024 04:29:01 +0000 Subject: [PATCH 3/3] NPI-3421 fix format handling of STD nodata values (formatter was not getting invoked because the specified column names to use it for, were missing the underscore). --- gnssanalysis/gn_io/sp3.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/gnssanalysis/gn_io/sp3.py b/gnssanalysis/gn_io/sp3.py index 5417891..13cc166 100644 --- a/gnssanalysis/gn_io/sp3.py +++ b/gnssanalysis/gn_io/sp3.py @@ -73,14 +73,14 @@ def sp3_clock_nodata_to_nan( sp3_df: _pd.DataFrame ) -> None: """ - Converts the SP3 Clock column's nodata values (999999 or 999999.999999 - the fractional component optional) to NaNs. + Converts the SP3 Clock column's nodata values (999999 or 999999.999999 - fractional component optional) to NaNs. See https://files.igs.org/pub/data/format/sp3_docu.txt :param _pd.DataFrame sp3_df: SP3 data frame to filter nodata values for :return None """ nan_mask = sp3_df[("EST", "CLK")] >= SP3_CLOCK_NODATA_NUMERIC - sp3_df.loc[nan_mask, ("EST", "CLK")] = _np.NAN + sp3_df.loc[nan_mask, ("EST", "CLK")] = _np.nan def mapparm(old, new): @@ -395,6 +395,8 @@ def clk_formatter(x): # NOTE: the following formatters are fine, as the nodata value is actually a *numeric value*, # so DataFrame.to_string() will invoke them for those values. + # TODO A future improvement would be to use NaN rather than specific integer values, as this is an internal + # only representation. def pos_std_formatter(x): # We use -100 as our integer NaN/"missing" marker if x <= SP3_POS_STD_NODATA: @@ -413,10 +415,10 @@ def clk_std_formatter(x): "Y": pos_formatter, "Z": pos_formatter, "CLK": clk_formatter, # Can't handle CLK nodata (Inf or NaN). Handled prior to invoking DataFrame.to_string() - "STDX": pos_std_formatter, # Nodata is represented as an integer, so can be handled here. - "STDY": pos_std_formatter, - "STDZ": pos_std_formatter, - "STDCLK": clk_std_formatter, # ditto above + "STD_X": pos_std_formatter, # Nodata is represented as an integer, so can be handled here. + "STD_Y": pos_std_formatter, + "STD_Z": pos_std_formatter, + "STD_CLK": clk_std_formatter, # ditto above } for epoch, epoch_vals in out_df.reset_index("PRN").groupby(axis=0, level="J2000"): # Format and write out the epoch in the SP3 format