From 1dc32bb7a4beb5e22202a92b9ee6a30b9a170c9a Mon Sep 17 00:00:00 2001 From: Andrew Turner Date: Tue, 18 Jan 2022 17:02:15 +1100 Subject: [PATCH 1/7] Removed the basemap layer as it was causing issues. Added script argvs for path and variable_to_map in main(). --- examples/plot_point_dataset.py | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/examples/plot_point_dataset.py b/examples/plot_point_dataset.py index 975c972..8fd710b 100644 --- a/examples/plot_point_dataset.py +++ b/examples/plot_point_dataset.py @@ -28,6 +28,7 @@ import cartopy.crs as ccrs import cartopy.io.img_tiles as cimgt from math import log10, floor, pow +import sys def plot_point_dataset(netcdf_point_utils, variable_to_map, @@ -69,7 +70,6 @@ def rescale_array(input_np_array, new_range_min=0, new_range_max=1): utm_zone = abs(utm_zone) projection = ccrs.UTM(zone=utm_zone, southern_hemisphere=southern_hemisphere) print('utm_zone = {}'.format(utm_zone)) - #print(utm_coords) variable = netcdf_point_utils.netcdf_dataset.variables[variable_to_map] @@ -91,8 +91,7 @@ def rescale_array(input_np_array, new_range_min=0, new_range_max=1): utm_coords = utm_coords[spatial_mask] print('{} points in UTM bounding box: {}'.format(np.count_nonzero(spatial_mask), utm_bbox)) - #print(utm_coords) - + colour_array = rescale_array(variable[spatial_mask], 0, 1) fig = plt.figure(figsize=(30,30)) @@ -103,9 +102,9 @@ def rescale_array(input_np_array, new_range_min=0, new_range_max=1): #map_image = cimgt.OSM() # https://www.openstreetmap.org/about #map_image = cimgt.StamenTerrain() # http://maps.stamen.com/ - map_image = cimgt.QuadtreeTiles() - #print(map_image.__dict__) - ax.add_image(map_image, 10) + #map_image = cimgt.GoogleTiles(style='satellite') + + #ax.add_image(map_image, 10) # Compute and set regular tick spacing range_x = utm_bbox[2] - utm_bbox[0] @@ -139,11 +138,10 @@ def rescale_array(input_np_array, new_range_min=0, new_range_max=1): try: # not all variables have units. These will fail on the try and produce the map without tick labels. cb = plt.colorbar(sc, ticks=[0, 1]) cb.ax.set_yticklabels([str(np.min(variable[spatial_mask])), str(np.max(variable[spatial_mask]))]) # vertically oriented colorbar - cb.set_label("{} {}".format(variable.long_name, variable.units)) except: pass - + print("show") plt.show() @@ -151,19 +149,21 @@ def main(): ''' main function for quick and dirty testing ''' - # Create NetCDFPointUtils object for specified netCDF dataset - netcdf_path = 'http://dapds00.nci.org.au/thredds/dodsC/uc0/rr2_dev/axi547/ground_gravity/point_datasets/201780.nc' - #netcdf_path = 'E:\\Temp\\gravity_point_test\\201780.nc' - - netcdf_dataset = netCDF4.Dataset(netcdf_path) + nc_path = sys.argv[1] + variable_to_plot = sys.argv[2] + + netcdf_dataset = netCDF4.Dataset(nc_path) npu = NetCDFPointUtils(netcdf_dataset) + print('1D Point variables:\n\t{}'.format('\n\t'.join([key for key, value in netcdf_dataset.variables.items() + if value.dimensions == ('point',)]))) # Plot spatial subset plot_point_dataset(npu, - 'Bouguer', - utm_bbox=[630000,7980000,680000,8030000], + variable_to_plot, + #utm_bbox=[660000,7080000,680000,7330000], colour_scheme='gist_heat', - point_size=50 + point_size=30, + point_step=100 ) if __name__ == '__main__': From 600e2c3d658f13e93a0f92a8df1ac6619aa0349a Mon Sep 17 00:00:00 2001 From: richardt94 <29562583+richardt94@users.noreply.github.com> Date: Wed, 6 Apr 2022 17:55:06 +1000 Subject: [PATCH 2/7] attempt to make netCDF access for list of points faster --- geophys_utils/_netcdf_grid_utils.py | 29 +++++++++++++++++++++-------- 1 file changed, 21 insertions(+), 8 deletions(-) diff --git a/geophys_utils/_netcdf_grid_utils.py b/geophys_utils/_netcdf_grid_utils.py index ad3ddd1..9779d5e 100644 --- a/geophys_utils/_netcdf_grid_utils.py +++ b/geophys_utils/_netcdf_grid_utils.py @@ -20,6 +20,7 @@ @author: Alex Ip ''' +from operator import index import numpy as np import math from scipy.ndimage import map_coordinates @@ -234,6 +235,11 @@ def get_fractional_indices_from_coords(self, coordinates, wkt=None): return fractional_indices + def _get_nbytes(self, index_array, start_index, end_index, data_variable): #number of bytes used for an indexing request + dx = abs(index_array[end_index,0]-index_array[start_index,0]) + dy = abs(index_array[end_index,0]-index_array[start_index,0]) + return dx*dy*data_variable.dtype.itemsize + def get_value_at_coords(self, coordinates, wkt=None, max_bytes=None, variable_name=None): ''' @@ -281,21 +287,28 @@ def get_value_at_coords(self, coordinates, wkt=None, result_array = np.ones( shape=(len(mask_array)), dtype=data_variable.dtype) * no_data_value start_index = 0 - end_index = min(max_points, len(index_array)) - while True: + end_index = 1 + while self._get_nbytes(index_array, start_index, end_index, data_variable) < max_bytes: + end_index += 1 + end_index = min(end_index, len(index_array)) + while start_index < len(index_array): # N.B: ".diagonal()" is required because NetCDF doesn't do advanced indexing exactly like numpy # Hack is required to take values from leading diagonal. Requires n^2 elements retrieved instead of n. Not good, but better than whole array # TODO: Think of a better way of doing this - value_array[start_index:end_index] = data_variable[ - (index_array[start_index:end_index, 0], index_array[start_index:end_index, 1])].diagonal() - if end_index == len(index_array): # Finished - break + query_result = data_variable[ + (index_array[start_index:end_index, 0], index_array[start_index:end_index, 1])] + residx0 = index_array[start_index:end_index,0] - index_array[start_index,0] + residx1 = index_array[start_index:end_index,1] - index_array[start_index,1] + value_array[start_index:end_index] = query_result[residx0, residx1] start_index = end_index - end_index = min(start_index + max_points, len(index_array)) + end_index = start_index + 1 + while self._get_nbytes(index_array, start_index, end_index, data_variable) < max_bytes: + end_index += 1 + end_index = min(end_index, len(index_array)) result_array[mask_array] = value_array return list(result_array) - except: + except IndexError: return data_variable[indices[0], indices[1]] def get_interpolated_value_at_coords( From 2f787d509a924f121376881e86f19e35f6fab94d Mon Sep 17 00:00:00 2001 From: richardt94 <29562583+richardt94@users.noreply.github.com> Date: Thu, 7 Apr 2022 11:15:19 +1000 Subject: [PATCH 3/7] faster point access for sorted points --- geophys_utils/_netcdf_grid_utils.py | 98 ++++++++++++++--------------- 1 file changed, 46 insertions(+), 52 deletions(-) diff --git a/geophys_utils/_netcdf_grid_utils.py b/geophys_utils/_netcdf_grid_utils.py index 9779d5e..50c4d95 100644 --- a/geophys_utils/_netcdf_grid_utils.py +++ b/geophys_utils/_netcdf_grid_utils.py @@ -20,7 +20,6 @@ @author: Alex Ip ''' -from operator import index import numpy as np import math from scipy.ndimage import map_coordinates @@ -235,11 +234,6 @@ def get_fractional_indices_from_coords(self, coordinates, wkt=None): return fractional_indices - def _get_nbytes(self, index_array, start_index, end_index, data_variable): #number of bytes used for an indexing request - dx = abs(index_array[end_index,0]-index_array[start_index,0]) - dy = abs(index_array[end_index,0]-index_array[start_index,0]) - return dx*dy*data_variable.dtype.itemsize - def get_value_at_coords(self, coordinates, wkt=None, max_bytes=None, variable_name=None): ''' @@ -264,52 +258,41 @@ def get_value_at_coords(self, coordinates, wkt=None, indices = np.array(self.get_indices_from_coords(coordinates, wkt)) # return data_variable[indices[:,0], indices[:,1]].diagonal() # This could get too big - - # Allow for the fact that the NetCDF advanced indexing will pull back - # n^2 cells rather than n - max_points = max( - int(math.sqrt(max_bytes / data_variable.dtype.itemsize)), 1) - try: - # Make this a vectorised operation for speed (one query for as many - # points as possible) - # Array of valid index pairs only - index_array = np.array( - [index_pair for index_pair in indices if index_pair is not None]) - assert len(index_array.shape) == 2 and index_array.shape[ - 1] == 2, 'Not an iterable containing index pairs' - # Boolean mask indicating which index pairs are valid - mask_array = np.array([(index_pair is not None) - for index_pair in indices]) - # Array of values read from variable - value_array = np.ones(shape=(len(index_array)), - dtype=data_variable.dtype) * no_data_value - # Final result array including no-data for invalid index pairs - result_array = np.ones( - shape=(len(mask_array)), dtype=data_variable.dtype) * no_data_value - start_index = 0 - end_index = 1 - while self._get_nbytes(index_array, start_index, end_index, data_variable) < max_bytes: - end_index += 1 - end_index = min(end_index, len(index_array)) - while start_index < len(index_array): - # N.B: ".diagonal()" is required because NetCDF doesn't do advanced indexing exactly like numpy - # Hack is required to take values from leading diagonal. Requires n^2 elements retrieved instead of n. Not good, but better than whole array - # TODO: Think of a better way of doing this - query_result = data_variable[ - (index_array[start_index:end_index, 0], index_array[start_index:end_index, 1])] - residx0 = index_array[start_index:end_index,0] - index_array[start_index,0] - residx1 = index_array[start_index:end_index,1] - index_array[start_index,1] - value_array[start_index:end_index] = query_result[residx0, residx1] - start_index = end_index - end_index = start_index + 1 - while self._get_nbytes(index_array, start_index, end_index, data_variable) < max_bytes: - end_index += 1 - end_index = min(end_index, len(index_array)) - - result_array[mask_array] = value_array - return list(result_array) - except IndexError: - return data_variable[indices[0], indices[1]] + # try: + # Make this a vectorised operation for speed (one query for as many + # points as possible) + # Array of valid index pairs only + index_array = np.array( + [index_pair for index_pair in indices if index_pair is not None]) + assert len(index_array.shape) == 2 and index_array.shape[ + 1] == 2, 'Not an iterable containing index pairs' + # Boolean mask indicating which index pairs are valid + mask_array = np.array([(index_pair is not None) + for index_pair in indices]) + # Array of values read from variable + value_array = np.ones(shape=(len(index_array)), + dtype=data_variable.dtype) * no_data_value + # Final result array including no-data for invalid index pairs + result_array = np.ones( + shape=(len(mask_array)), dtype=data_variable.dtype) * no_data_value + start_index = 0 + end_index = _max_index(index_array, start_index, data_variable, max_bytes) + while start_index < len(index_array): + i00 = min(index_array[start_index,0],index_array[end_index,0]) + i01 = max(index_array[start_index,0],index_array[end_index,0])+1 + i10 = min(index_array[start_index,1],index_array[end_index,1]) + i11 = max(index_array[start_index,1],index_array[end_index,1])+1 + query_result = data_variable[i00:i01, i10:i11] + residx0 = index_array[start_index:end_index+1,0] - index_array[start_index,0] + residx1 = index_array[start_index:end_index+1,1] - index_array[start_index,1] + value_array[start_index:end_index+1] = query_result[residx0, residx1] + start_index = end_index + 1 + end_index = _max_index(index_array, start_index, data_variable, max_bytes) + + result_array[mask_array] = value_array + return list(result_array) + # except ValueError: + # return data_variable[indices[0], indices[1]] def get_interpolated_value_at_coords( self, coordinates, wkt=None, max_bytes=None, variable_name=None): @@ -916,6 +899,17 @@ def iterate_through_data_chunks_and_find_mins_and_maxs(self, variable, num_of_ch logger.debug("current_min: {}".format(current_min)) return current_min, current_max +def _max_index(index_array, start_index, data_variable, max_bytes): + #find the maximum ending index to use for a request under max_bytes + end_index = start_index + dbytes = data_variable.dtype.itemsize + nbytes = 0 + while end_index < len(index_array) and nbytes <= max_bytes: + dx = abs(index_array[end_index,0]-index_array[start_index,0]) + 1 + dy = abs(index_array[end_index,0]-index_array[start_index,0]) + 1 + nbytes = dx*dy*dbytes + end_index += 1 + return max(start_index, end_index - 1) def main(): ''' From e3d79d08c2c1bbeca13d4d605c854e49897e3a49 Mon Sep 17 00:00:00 2001 From: richardt94 <29562583+richardt94@users.noreply.github.com> Date: Thu, 7 Apr 2022 11:44:11 +1000 Subject: [PATCH 4/7] restore original try-except block --- geophys_utils/_netcdf_grid_utils.py | 66 ++++++++++++++--------------- 1 file changed, 33 insertions(+), 33 deletions(-) diff --git a/geophys_utils/_netcdf_grid_utils.py b/geophys_utils/_netcdf_grid_utils.py index 50c4d95..07ba3e5 100644 --- a/geophys_utils/_netcdf_grid_utils.py +++ b/geophys_utils/_netcdf_grid_utils.py @@ -258,41 +258,41 @@ def get_value_at_coords(self, coordinates, wkt=None, indices = np.array(self.get_indices_from_coords(coordinates, wkt)) # return data_variable[indices[:,0], indices[:,1]].diagonal() # This could get too big - # try: - # Make this a vectorised operation for speed (one query for as many - # points as possible) - # Array of valid index pairs only - index_array = np.array( - [index_pair for index_pair in indices if index_pair is not None]) - assert len(index_array.shape) == 2 and index_array.shape[ - 1] == 2, 'Not an iterable containing index pairs' - # Boolean mask indicating which index pairs are valid - mask_array = np.array([(index_pair is not None) - for index_pair in indices]) - # Array of values read from variable - value_array = np.ones(shape=(len(index_array)), - dtype=data_variable.dtype) * no_data_value - # Final result array including no-data for invalid index pairs - result_array = np.ones( - shape=(len(mask_array)), dtype=data_variable.dtype) * no_data_value - start_index = 0 - end_index = _max_index(index_array, start_index, data_variable, max_bytes) - while start_index < len(index_array): - i00 = min(index_array[start_index,0],index_array[end_index,0]) - i01 = max(index_array[start_index,0],index_array[end_index,0])+1 - i10 = min(index_array[start_index,1],index_array[end_index,1]) - i11 = max(index_array[start_index,1],index_array[end_index,1])+1 - query_result = data_variable[i00:i01, i10:i11] - residx0 = index_array[start_index:end_index+1,0] - index_array[start_index,0] - residx1 = index_array[start_index:end_index+1,1] - index_array[start_index,1] - value_array[start_index:end_index+1] = query_result[residx0, residx1] - start_index = end_index + 1 + try: + # Make this a vectorised operation for speed (one query for as many + # points as possible) + # Array of valid index pairs only + index_array = np.array( + [index_pair for index_pair in indices if index_pair is not None]) + assert len(index_array.shape) == 2 and index_array.shape[ + 1] == 2, 'Not an iterable containing index pairs' + # Boolean mask indicating which index pairs are valid + mask_array = np.array([(index_pair is not None) + for index_pair in indices]) + # Array of values read from variable + value_array = np.ones(shape=(len(index_array)), + dtype=data_variable.dtype) * no_data_value + # Final result array including no-data for invalid index pairs + result_array = np.ones( + shape=(len(mask_array)), dtype=data_variable.dtype) * no_data_value + start_index = 0 end_index = _max_index(index_array, start_index, data_variable, max_bytes) + while start_index < len(index_array): + i00 = min(index_array[start_index,0],index_array[end_index,0]) + i01 = max(index_array[start_index,0],index_array[end_index,0])+1 + i10 = min(index_array[start_index,1],index_array[end_index,1]) + i11 = max(index_array[start_index,1],index_array[end_index,1])+1 + query_result = data_variable[i00:i01, i10:i11] + residx0 = index_array[start_index:end_index+1,0] - index_array[start_index,0] + residx1 = index_array[start_index:end_index+1,1] - index_array[start_index,1] + value_array[start_index:end_index+1] = query_result[residx0, residx1] + start_index = end_index + 1 + end_index = _max_index(index_array, start_index, data_variable, max_bytes) - result_array[mask_array] = value_array - return list(result_array) - # except ValueError: - # return data_variable[indices[0], indices[1]] + result_array[mask_array] = value_array + return list(result_array) + except: + return data_variable[indices[0], indices[1]] def get_interpolated_value_at_coords( self, coordinates, wkt=None, max_bytes=None, variable_name=None): From 722f2e45c0a4170c71b8ce9cf1da3fe097146d8e Mon Sep 17 00:00:00 2001 From: richardt94 <29562583+richardt94@users.noreply.github.com> Date: Thu, 7 Apr 2022 15:02:54 +1000 Subject: [PATCH 5/7] generalise to any list of points --- geophys_utils/_netcdf_grid_utils.py | 44 +++++++++++++++++++---------- 1 file changed, 29 insertions(+), 15 deletions(-) diff --git a/geophys_utils/_netcdf_grid_utils.py b/geophys_utils/_netcdf_grid_utils.py index 07ba3e5..6f3cb85 100644 --- a/geophys_utils/_netcdf_grid_utils.py +++ b/geophys_utils/_netcdf_grid_utils.py @@ -276,18 +276,13 @@ def get_value_at_coords(self, coordinates, wkt=None, result_array = np.ones( shape=(len(mask_array)), dtype=data_variable.dtype) * no_data_value start_index = 0 - end_index = _max_index(index_array, start_index, data_variable, max_bytes) while start_index < len(index_array): - i00 = min(index_array[start_index,0],index_array[end_index,0]) - i01 = max(index_array[start_index,0],index_array[end_index,0])+1 - i10 = min(index_array[start_index,1],index_array[end_index,1]) - i11 = max(index_array[start_index,1],index_array[end_index,1])+1 - query_result = data_variable[i00:i01, i10:i11] - residx0 = index_array[start_index:end_index+1,0] - index_array[start_index,0] - residx1 = index_array[start_index:end_index+1,1] - index_array[start_index,1] + end_index, bbox = _get_query_params(index_array, start_index, data_variable, max_bytes) + query_result = data_variable[bbox[0]:bbox[1]+1, bbox[2]:bbox[3]+1] + residx0 = index_array[start_index:end_index+1,0] - bbox[0] + residx1 = index_array[start_index:end_index+1,1] - bbox[2] value_array[start_index:end_index+1] = query_result[residx0, residx1] start_index = end_index + 1 - end_index = _max_index(index_array, start_index, data_variable, max_bytes) result_array[mask_array] = value_array return list(result_array) @@ -899,17 +894,36 @@ def iterate_through_data_chunks_and_find_mins_and_maxs(self, variable, num_of_ch logger.debug("current_min: {}".format(current_min)) return current_min, current_max -def _max_index(index_array, start_index, data_variable, max_bytes): +def _get_query_params(index_array, start_index, data_variable, max_bytes): #find the maximum ending index to use for a request under max_bytes + # and the associated bounding box + if start_index >= len(index_array): + return start_index end_index = start_index dbytes = data_variable.dtype.itemsize - nbytes = 0 + nbytes = dbytes + bbox = [index_array[start_index,0], index_array[start_index,0], + index_array[start_index,1], index_array[start_index,1]] while end_index < len(index_array) and nbytes <= max_bytes: - dx = abs(index_array[end_index,0]-index_array[start_index,0]) + 1 - dy = abs(index_array[end_index,0]-index_array[start_index,0]) + 1 - nbytes = dx*dy*dbytes end_index += 1 - return max(start_index, end_index - 1) + if end_index >= len(index_array): + break + x = index_array[end_index,0] + y = index_array[end_index,1] + new_bbox = bbox.copy() + if x < bbox[0]: + new_bbox[0] = x + elif x > bbox[1]: + new_bbox[1] = x + if y < bbox[2]: + new_bbox[2] = y + elif y > bbox[3]: + new_bbox[3] = y + nbytes = dbytes*(new_bbox[1]-new_bbox[0]+1)*(new_bbox[3]-new_bbox[2]+1) + if nbytes > max_bytes: + break + bbox=new_bbox + return max(start_index, end_index - 1), bbox def main(): ''' From e1deecbc4485d960b6f3c7fbdd5d0b01e53d191d Mon Sep 17 00:00:00 2001 From: richardt94 <29562583+richardt94@users.noreply.github.com> Date: Thu, 7 Apr 2022 16:53:48 +1000 Subject: [PATCH 6/7] add tests for get_value_at_coords with different request sizes --- geophys_utils/test/test_netcdf_grid_utils.py | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/geophys_utils/test/test_netcdf_grid_utils.py b/geophys_utils/test/test_netcdf_grid_utils.py index edebeaf..b64e0c2 100644 --- a/geophys_utils/test/test_netcdf_grid_utils.py +++ b/geophys_utils/test/test_netcdf_grid_utils.py @@ -32,15 +32,19 @@ netcdf_grid_utils = None NC_PATH = 'test_grid.nc' -MAX_BYTES = 1600 +MAX_BYTES_SM = 100 +MAX_BYTES_LG = 5000 MAX_ERROR = 0.000001 TEST_COORDS = (148.213, -36.015) TEST_MULTI_COORDS = np.array([[148.213, -36.015], [148.516, -35.316]]) +TEST_MANY_COORDS = np.array([[148.484, -35.352], + [148.328, -35.428], [148.436, -35.744], [148.300, -35.436]]) TEST_INDICES = [1, 1] TEST_MULTI_INDICES = [[1, 1], [176, 77]] TEST_FRACTIONAL_INDICES = [1.25, 1.25] TEST_VALUE = -99999. TEST_MULTI_VALUES = [-99999.0, -134.711334229] +TEST_MANY_VALS = [-136.13321, -31.7626, -58.755764, -90.484276] TEST_INTERPOLATED_VALUE = -99997.6171875 class TestNetCDFGridUtilsConstructor(unittest.TestCase): @@ -82,6 +86,18 @@ def test_get_value_at_coords(self): multi_values = netcdf_grid_utils.get_value_at_coords(TEST_MULTI_COORDS) assert (np.abs(np.array(multi_values) - np.array(TEST_MULTI_VALUES)) < MAX_ERROR).all(), 'Incorrect retrieved value: {} instead of {}'.format(multi_values, TEST_MULTI_VALUES) + print('Testing get_value_at_coords with long coordinate list {}'.format(TEST_MANY_COORDS)) + many_values = netcdf_grid_utils.get_value_at_coords(TEST_MANY_COORDS) + assert (np.abs(np.array(many_values) - np.array(TEST_MANY_VALS)) < MAX_ERROR).all(), 'Incorrect retrieved value: {} instead of {}'.format(many_values, TEST_MANY_VALS) + + print('Testing get_value_at_coords with long coordinate list {} and request size {} bytes'.format(TEST_MANY_COORDS, MAX_BYTES_SM)) + many_values = netcdf_grid_utils.get_value_at_coords(TEST_MANY_COORDS, max_bytes=MAX_BYTES_SM) + assert (np.abs(np.array(many_values) - np.array(TEST_MANY_VALS)) < MAX_ERROR).all(), 'Incorrect retrieved value: {} instead of {}'.format(many_values, TEST_MANY_VALS) + + print('Testing get_value_at_coords with long coordinate list {} and request size {} bytes'.format(TEST_MANY_COORDS, MAX_BYTES_LG)) + many_values = netcdf_grid_utils.get_value_at_coords(TEST_MANY_COORDS, max_bytes=MAX_BYTES_LG) + assert (np.abs(np.array(many_values) - np.array(TEST_MANY_VALS)) < MAX_ERROR).all(), 'Incorrect retrieved value: {} instead of {}'.format(many_values, TEST_MANY_VALS) + def test_get_interpolated_value_at_coords(self): print('Testing get_interpolated_value_at_coords function') interpolated_value = netcdf_grid_utils.get_interpolated_value_at_coords(TEST_COORDS) From c1a4e26dd501199a5ec81a2322d2dbd10a955a52 Mon Sep 17 00:00:00 2001 From: richardt94 <29562583+richardt94@users.noreply.github.com> Date: Thu, 7 Apr 2022 17:02:59 +1000 Subject: [PATCH 7/7] get rid of unsafe try-except and clean up comments --- geophys_utils/_netcdf_grid_utils.py | 67 ++++++++++++++--------------- 1 file changed, 33 insertions(+), 34 deletions(-) diff --git a/geophys_utils/_netcdf_grid_utils.py b/geophys_utils/_netcdf_grid_utils.py index 6f3cb85..44643a1 100644 --- a/geophys_utils/_netcdf_grid_utils.py +++ b/geophys_utils/_netcdf_grid_utils.py @@ -243,10 +243,8 @@ def get_value_at_coords(self, coordinates, wkt=None, @parameter max_bytes: Maximum number of bytes to read in a single query. Defaults to NetCDFGridUtils.DEFAULT_MAX_BYTES @parameter variable_name: NetCDF variable_name if not default data variable ''' - # Use arbitrary maximum request size of NetCDFGridUtils.DEFAULT_MAX_BYTES - # (500,000,000 bytes => 11180 points per query) - #TODO: Find a better way of overcoming the netCDF problem where whole rows & columns are retrieved - max_bytes = max_bytes or 100 # NetCDFGridUtils.DEFAULT_MAX_BYTES + # Use small max request size by default + max_bytes = max_bytes or 100 if variable_name: data_variable = self.netcdf_dataset.variables[variable_name] @@ -256,38 +254,39 @@ def get_value_at_coords(self, coordinates, wkt=None, no_data_value = data_variable._FillValue indices = np.array(self.get_indices_from_coords(coordinates, wkt)) - -# return data_variable[indices[:,0], indices[:,1]].diagonal() # This could get too big - try: - # Make this a vectorised operation for speed (one query for as many - # points as possible) - # Array of valid index pairs only - index_array = np.array( - [index_pair for index_pair in indices if index_pair is not None]) - assert len(index_array.shape) == 2 and index_array.shape[ - 1] == 2, 'Not an iterable containing index pairs' - # Boolean mask indicating which index pairs are valid - mask_array = np.array([(index_pair is not None) - for index_pair in indices]) - # Array of values read from variable - value_array = np.ones(shape=(len(index_array)), - dtype=data_variable.dtype) * no_data_value - # Final result array including no-data for invalid index pairs - result_array = np.ones( - shape=(len(mask_array)), dtype=data_variable.dtype) * no_data_value - start_index = 0 - while start_index < len(index_array): - end_index, bbox = _get_query_params(index_array, start_index, data_variable, max_bytes) - query_result = data_variable[bbox[0]:bbox[1]+1, bbox[2]:bbox[3]+1] - residx0 = index_array[start_index:end_index+1,0] - bbox[0] - residx1 = index_array[start_index:end_index+1,1] - bbox[2] - value_array[start_index:end_index+1] = query_result[residx0, residx1] - start_index = end_index + 1 - result_array[mask_array] = value_array - return list(result_array) - except: + if indices.ndim == 1: #single coordinate pair return data_variable[indices[0], indices[1]] + + # Make this a vectorised operation for speed (one query for as many + # points as possible) + # Array of valid index pairs only + index_array = np.array( + [index_pair for index_pair in indices if index_pair is not None]) + assert len(index_array.shape) == 2 and index_array.shape[ + 1] == 2, 'Not an iterable containing index pairs' + # Boolean mask indicating which index pairs are valid + mask_array = np.array([(index_pair is not None) + for index_pair in indices]) + # Array of values read from variable + value_array = np.ones(shape=(len(index_array)), + dtype=data_variable.dtype) * no_data_value + # Final result array including no-data for invalid index pairs + result_array = np.ones( + shape=(len(mask_array)), dtype=data_variable.dtype) * no_data_value + start_index = 0 + while start_index < len(index_array): + #read up to max_bytes of data_variable containing as many of the next points in + #index_array as possible + end_index, bbox = _get_query_params(index_array, start_index, data_variable, max_bytes) + query_result = data_variable[bbox[0]:bbox[1]+1, bbox[2]:bbox[3]+1] + residx0 = index_array[start_index:end_index+1,0] - bbox[0] + residx1 = index_array[start_index:end_index+1,1] - bbox[2] + value_array[start_index:end_index+1] = query_result[residx0, residx1] + start_index = end_index + 1 + + result_array[mask_array] = value_array + return list(result_array) def get_interpolated_value_at_coords( self, coordinates, wkt=None, max_bytes=None, variable_name=None):