diff --git a/gnssanalysis/gn_io/common.py b/gnssanalysis/gn_io/common.py index abca0d2..f043fa2 100644 --- a/gnssanalysis/gn_io/common.py +++ b/gnssanalysis/gn_io/common.py @@ -15,32 +15,43 @@ MB = 1024 * 1024 -def path2bytes(path: _Union[_Path, str, bytes]) -> bytes: +def path2bytes(path_or_bytes: _Union[_Path, str, bytes]) -> bytes: """Main file reading function. Checks file extension and calls appropriate reading function. Passes through bytes if given, thus one may not routinely leave it in the top of the specific file reading function and be able to call it with bytes or str path without additional modifications. :param str path: input file path :return bytes: bytes object, decompressed if necessary + :raise FileNotFoundError: path didn't resolve to a file + :raise Exception: wrapped exception for all other exceptions raised + :raise EOFError: if input bytes is empty, input file is empty, or decompressed result of input file is empty. """ - if isinstance(path, bytes): # no reading is necessary - pass through. - return path + if isinstance(path_or_bytes, bytes): # no reading is necessary - pass through. + if len(path_or_bytes) == 0: + raise EOFError("Input bytes object was empty!") + return path_or_bytes + + if isinstance(path_or_bytes, _Path): + path_string = path_or_bytes.as_posix() + elif isinstance(path_or_bytes, str): + path_string = path_or_bytes + else: + raise TypeError("Must be Path, str, or bytes") - if isinstance(path, _Path): - path = path.as_posix() try: - if path.endswith(".Z"): - databytes = _lzw2bytes(path) - elif path.endswith(".gz"): - databytes = _gz2bytes(path) + if path_string.endswith(".Z"): + databytes = _lzw2bytes(path_string) + elif path_string.endswith(".gz"): + databytes = _gz2bytes(path_string) else: - databytes = _txt2bytes(path) - except FileNotFoundError: - _logging.error(f"File {path} not found. Returning empty bytes.") - return None + databytes = _txt2bytes(path_string) + except FileNotFoundError as fe: + raise fe except Exception as e: - _logging.error(f"Error reading file {path} with error {e}. Returning empty bytes.") - return None + raise Exception(f"Error reading file '{path_string}'. Exception: {e}") + + if len(databytes) == 0: + raise EOFError(f"Input file (or decompressed result of it) was empty. Path: '{path_string}'") return databytes diff --git a/gnssanalysis/gn_io/sp3.py b/gnssanalysis/gn_io/sp3.py index 82e4195..5ceaa37 100644 --- a/gnssanalysis/gn_io/sp3.py +++ b/gnssanalysis/gn_io/sp3.py @@ -2,7 +2,8 @@ import io as _io import os as _os import re as _re -from typing import Literal, Union, List, Tuple +from typing import Literal, Optional, Union, List, Tuple +from pathlib import Path import numpy as _np import pandas as _pd @@ -238,7 +239,16 @@ def _process_sp3_block( return temp_sp3 -def read_sp3(sp3_path: str, pOnly: bool = True, nodata_to_nan: bool = True) -> _pd.DataFrame: +def description_for_path_or_bytes(path_or_bytes: Union[str, Path, bytes]) -> Optional[str]: + if isinstance(path_or_bytes, str) or isinstance(path_or_bytes, Path): + return str(path_or_bytes) + else: + return "Data passed as bytes: no path available" + + +def read_sp3( + sp3_path_or_bytes: Union[str, Path, bytes], pOnly: bool = True, nodata_to_nan: bool = True +) -> _pd.DataFrame: """Reads an SP3 file and returns the data as a pandas DataFrame. @@ -247,7 +257,8 @@ def read_sp3(sp3_path: str, pOnly: bool = True, nodata_to_nan: bool = True) -> _ :param bool nodata_to_nan: If True, converts 0.000000 (indicating nodata) to NaN in the SP3 POS column and converts 999999* (indicating nodata) to NaN in the SP3 CLK column. Defaults to True. :return pandas.DataFrame: The SP3 data as a DataFrame. - :raise FileNotFoundError: If the SP3 file specified by sp3_path does not exist. + :raise FileNotFoundError: If the SP3 file specified by sp3_path_or_bytes does not exist. + :raise Exception: For other errors reading SP3 file/bytes :note: The SP3 file format is a standard format used for representing precise satellite ephemeris and clock data. This function reads the SP3 file, parses the header information, and extracts the data into a DataFrame. @@ -256,7 +267,7 @@ def read_sp3(sp3_path: str, pOnly: bool = True, nodata_to_nan: bool = True) -> _ (mm/ps) and remove unnecessary columns. If pOnly is True, only P* values are included in the DataFrame. If nodata_to_nan is True, nodata values in the SP3 POS and CLK columns are converted to NaN. """ - content = _gn_io.common.path2bytes(str(sp3_path)) + content = _gn_io.common.path2bytes(sp3_path_or_bytes) # Will raise EOFError if file empty # Match comment lines, including the trailing newline (so that it gets removed in a second too): ^(\/\*.*$\n) comments: list = _RE_SP3_COMMENT_STRIP.findall(content) @@ -306,13 +317,13 @@ def read_sp3(sp3_path: str, pOnly: bool = True, nodata_to_nan: bool = True) -> _ logging.warning( f"Duplicate epoch(s) found in SP3 ({duplicated_indexes.sum()} additional entries, potentially non-unique). " f"First duplicate (as J2000): {first_dupe} (as date): {first_dupe + _gn_const.J2000_ORIGIN} " - f"SP3 path is: '{str(sp3_path)}'. Duplicates will be removed, keeping first." + f"SP3 path is: '{description_for_path_or_bytes(sp3_path_or_bytes)}'. Duplicates will be removed, keeping first." ) # Now dedupe them, keeping the first of any clashes: sp3_df = sp3_df[~sp3_df.index.duplicated(keep="first")] # Write header data to dataframe attributes: sp3_df.attrs["HEADER"] = parsed_header - sp3_df.attrs["path"] = sp3_path + sp3_df.attrs["path"] = sp3_path_or_bytes if type(sp3_path_or_bytes) in (str, Path) else "" return sp3_df diff --git a/tests/test_common.py b/tests/test_common.py index 51d379c..01ccd82 100644 --- a/tests/test_common.py +++ b/tests/test_common.py @@ -1,9 +1,7 @@ import unittest -from unittest.mock import patch, mock_open, MagicMock -from pathlib import Path -import logging +from unittest.mock import patch +from pyfakefs.fake_filesystem_unittest import TestCase -# Assuming the function path2bytes is in a module named common from gnssanalysis.gn_io.common import path2bytes @@ -34,19 +32,32 @@ def test_bytes_input(self): result = path2bytes(b"test data") self.assertEqual(result, b"test data") - @patch("gnssanalysis.gn_io.common._logging.error") - def test_file_not_found(self, mock_logging_error): - with patch("gnssanalysis.gn_io.common._txt2bytes", side_effect=FileNotFoundError): - print("testing path") - result = path2bytes("nonexistent.txt") - self.assertIsNone(result) - mock_logging_error.assert_called_once_with("File nonexistent.txt not found. Returning empty bytes.") - - @patch("gnssanalysis.gn_io.common._logging.error") - def test_generic_exception(self, mock_logging_error): - with patch("gnssanalysis.gn_io.common._txt2bytes", side_effect=Exception("Generic error")): - result = path2bytes("test.txt") - self.assertIsNone(result) - mock_logging_error.assert_called_once_with( - "Error reading file test.txt with error Generic error. Returning empty bytes." - ) + +class TestPath2BytesWithFakeFs(TestCase): + def setUp(self): + self.setUpPyfakefs() + + def test_file_not_found_and_file_read(self): + # Create a mock file, but not the one we're looking for + self.fs.create_file("testfile.txt", contents=b"hello") + with self.assertRaises(FileNotFoundError): + path2bytes("nonexistent.txt") + + # Now open the file that does exist and check the contents + self.assertEqual(path2bytes("testfile.txt"), b"hello") + + def test_empty_file_exception(self): + # Create a mock empty file + self.fs.create_file("emptyfile.txt", contents=b"") + # We raise EOFError for empty files, and (valid) compressed files that expand to a zero-length output + with self.assertRaises(EOFError): + path2bytes("emptyfile.txt") + + def test_invalid_archive_expand_exception(self): + # Test that trying to unpack an archive file which isn't valid archive data, raises an exception + self.fs.create_file("invalidarchive.gz", contents=b"hello") + self.fs.create_file("invalidarchive.Z", contents=b"hello") + with self.assertRaises(Exception): + path2bytes("invalidarchive.gz") + with self.assertRaises(Exception): + path2bytes("invalidarchive.Z")