diff --git a/h5pyd/_hl/attrs.py b/h5pyd/_hl/attrs.py index 24dfdb0..80780e1 100644 --- a/h5pyd/_hl/attrs.py +++ b/h5pyd/_hl/attrs.py @@ -166,6 +166,65 @@ def __getitem__(self, name): return arr + def get_attributes(self, names=None, pattern=None, limit=None, marker=None): + """ + Get all attributes or a subset of attributes from the target object. + If 'use_cache' is True, use the objdb cache if available. + The cache cannot be used with pattern, limit, or marker parameters. + - if 'pattern' is provided, retrieve all attributes with names that match the pattern + according to Unix pathname pattern expansion rules. + - if 'limit' is provided, retrieve at most 'limit' attributes. + - if 'marker' is provided, retrieve attributes whose names occur after the name 'marker' in the target object + """ + if names and (pattern or limit or marker): + raise ValueError("names cannot be used with pattern, limit or marker") + + if self._objdb_attributes is not None: + # use the objdb cache + out = {} + for a in self._objdb_attributes: + name = a['name'] + out[name] = self._objdb_attributes[name] + return out + + # Omit trailing slash + req = self._req_prefix[:-1] + + body = {} + params = {"IncludeData": 1} + + if pattern: + params["pattern"] = pattern + if limit: + params["Limit"] = limit + if marker: + params["Marker"] = marker + + if names: + if isinstance(names, list): + names = [name.decode('utf-8') if isinstance(name, bytes) else name for name in names] + else: + if isinstance(names, bytes): + names = names.decode("utf-8") + names = [names] + + body['attr_names'] = names + + if body: + rsp = self._parent.POST(req, body=body, params=params) + else: + rsp = self._parent.GET(req, params=params) + + attrs_json = rsp['attributes'] + names = [attr['name'] for attr in attrs_json] + values = [attr['value'] for attr in attrs_json] + out = {} + + for i in range(len(names)): + out[names[i]] = values[i] + + return out + def __setitem__(self, name, value): """ Set a new attribute, overwriting any existing attribute. @@ -173,17 +232,25 @@ def __setitem__(self, name, value): use a specific type or shape, or to preserve the type of an attribute, use the methods create() and modify(). """ - self.create(name, data=value, dtype=base.guess_dtype(value)) + self.create(name, values=value, dtype=base.guess_dtype(value)) def __delitem__(self, name): """ Delete an attribute (which must already exist). """ - if isinstance(name, bytes): - name = name.decode("utf-8") - req = self._req_prefix + name - self._parent.DELETE(req) + params = {} - def create(self, name, data, shape=None, dtype=None): - """ Create a new attribute, overwriting any existing attribute. + if isinstance(name, list): + names = [name.decode('utf-8') if isinstance(name, bytes) else name for name in name] + # Omit trailing slash + req = self._req_prefix[:-1] + params["attr_names"] = "/".join(names) + else: + if isinstance(name, bytes): + name = name.decode("utf-8") + req = self._req_prefix + name + self._parent.DELETE(req, params=params) + + def create(self, names, values, shape=None, dtype=None): + """ Create new attribute(s), overwriting any existing attributes. name Name of the new attribute (required) @@ -196,104 +263,149 @@ def create(self, name, data, shape=None, dtype=None): Data type of the attribute. Overrides data.dtype if both are given. """ - self._parent.log.info("attrs.create({})".format(name)) - - # First, make sure we have a NumPy array. We leave the data - # type conversion for HDF5 to perform. - if isinstance(data, Reference): - dtype = special_dtype(ref=Reference) - if not isinstance(data, Empty): - data = numpy.asarray(data, dtype=dtype, order='C') - - if shape is None and not isinstance(data, Empty): - shape = data.shape + self._parent.log.info(f"attrs.create({names})") - use_htype = None # If a committed type is given, we must use it in h5a.create. + # Standardize single attribute arguments to lists + if not isinstance(names, list): + names = [names] + values = [values] - if isinstance(dtype, Datatype): - use_htype = dtype.id - dtype = dtype.dtype + # Do not permit duplicate names + if len(names) != len(set(names)): + raise ValueError("Duplicate attribute names are not allowed") - # Special case if data are complex numbers - is_complex = (data.dtype.kind == 'c') and (dtype.names is None) or ( - dtype.names != ('r', 'i')) or ( - any(dt.kind != 'f' for dt, off in dtype.fields.values())) or ( - dtype.fields['r'][0] == dtype.fields['i'][0]) + if shape is not None and not isinstance(shape, list): + shapes = [shape] + elif shape is None: + shapes = [None] * len(names) + else: + # Given shape is already a list of shapes + shapes = shape - if is_complex: - raise TypeError( - f'Wrong committed datatype for complex numbers: {dtype.name}') + if dtype is not None and not isinstance(dtype, list): + dtypes = [dtype] elif dtype is None: - if data.dtype.kind == 'U': - # use vlen for unicode strings - dtype = special_dtype(vlen=str) - else: - dtype = data.dtype + dtypes = [None] * len(names) else: - dtype = numpy.dtype(dtype) # In case a string, e.g. 'i8' is passed - - # Where a top-level array type is requested, we have to do some - # fiddling around to present the data as a smaller array of - # subarrays. - if not isinstance(data, Empty): - if dtype.subdtype is not None: + # Given dtype is already a list of dtypes + dtypes = dtype + + type_jsons = [None] * len(names) + + if (len(names) != len(values)) or (shapes is not None and len(shapes) != len(values)) or\ + (dtypes is not None and len(dtypes) != len(values)): + raise ValueError("provided names, values, shapes and dtypes must have the same length") + + for i in range(len(names)): + # First, make sure we have a NumPy array. We leave the data + # type conversion for HDF5 to perform. + if isinstance(values[i], Reference): + dtypes[i] = special_dtype(ref=Reference) + if not isinstance(values[i], Empty): + values[i] = numpy.asarray(values[i], dtype=dtypes[i], order='C') + + if shapes[i] is None and not isinstance(values[i], Empty): + shapes[i] = values[i].shape + + use_htype = None # If a committed type is given, we must use it in h5a.create. + + if isinstance(dtypes[i], Datatype): + use_htype = dtypes[i].id + dtypes[i] = dtypes[i].dtype + + # Special case if data are complex numbers + is_complex = (values[i].dtype.kind == 'c') and (dtypes[i].names is None) or ( + dtypes[i].names != ('r', 'i')) or ( + any(dt.kind != 'f' for dt, off in dtypes[i].fields.values())) or ( + dtypes[i].fields['r'][0] == dtypes[i].fields['i'][0]) + + if is_complex: + raise TypeError( + f'Wrong committed datatype for complex numbers: {dtypes[i].name}') + elif dtypes[i] is None: + if values[i].dtype.kind == 'U': + # use vlen for unicode strings + dtypes[i] = special_dtype(vlen=str) + else: + dtypes[i] = values[i].dtype + else: + dtypes[i] = numpy.dtype(dtypes[i]) # In case a string, e.g. 'i8' is passed - subdtype, subshape = dtype.subdtype + # Where a top-level array type is requested, we have to do some + # fiddling around to present the data as a smaller array of + # subarrays. + if not isinstance(values[i], Empty): + if dtypes[i].subdtype is not None: - # Make sure the subshape matches the last N axes' sizes. - if shape[-len(subshape):] != subshape: - raise ValueError(f"Array dtype shape {subshape} is incompatible with data shape {shape}") + subdtype, subshape = dtypes[i].subdtype - # New "advertised" shape and dtype - shape = shape[0:len(shape) - len(subshape)] - dtype = subdtype + # Make sure the subshape matches the last N axes' sizes. + if shapes[i][-len(subshape):] != subshape: + raise ValueError(f"Array dtype shape {subshape} is incompatible with data shape {shapes[i]}") - # Not an array type; make sure to check the number of elements - # is compatible, and reshape if needed. - else: - if numpy.prod(shape) != numpy.prod(data.shape): - raise ValueError("Shape of new attribute conflicts with shape of data") + # New "advertised" shape and dtype + shapes[i] = shapes[i][0:len(shapes[i]) - len(subshape)] + dtypes[i] = subdtype - if shape != data.shape: - data = data.reshape(shape) + # Not an array type; make sure to check the number of elements + # is compatible, and reshape if needed. + else: + if numpy.prod(shapes[i]) != numpy.prod(values[i].shape): + raise ValueError("Shape of new attribute conflicts with shape of data") - # We need this to handle special string types. + if shapes[i] != values[i].shape: + values[i] = values[i].reshape(shapes[i]) - data = numpy.asarray(data, dtype=dtype) + # We need this to handle special string types. - # Make HDF5 datatype and dataspace for the H5A calls - if use_htype is None: - type_json = getTypeItem(dtype) - self._parent.log.debug("attrs.create type_json: {}".format(type_json)) + values[i] = numpy.asarray(values[i], dtype=dtypes[i]) - # This mess exists because you can't overwrite attributes in HDF5. - # So we write to a temporary attribute first, and then rename. + # Make HDF5 datatype and dataspace for the H5A calls + if use_htype is None: + type_jsons[i] = getTypeItem(dtypes[i]) + self._parent.log.debug(f"attrs.create type_json: {format(type_jsons[i])}") - req = self._req_prefix + name + params = {} body = {} - body['type'] = type_json - if isinstance(data, Empty): - body['shape'] = 'H5S_NULL' - else: - body['shape'] = shape - if data.dtype.kind != 'c': - body['value'] = self._bytesArrayToList(data) + params['replace'] = 1 + + attributes = {} + + for i in range(len(names)): + attr = {} + attr['type'] = type_jsons[i] + if isinstance(values[i], Empty): + attr['shape'] = 'H5S_NULL' else: - # Special case: complex numbers - special_dt = createDataType(type_json) - tmp = numpy.empty(shape=data.shape, dtype=special_dt) - tmp['r'] = data.real - tmp['i'] = data.imag - body['value'] = json.loads(json.dumps(tmp.tolist())) + attr['shape'] = shapes[i] + if values[i].dtype.kind != 'c': + attr['value'] = self._bytesArrayToList(values[i]) + else: + # Special case: complex numbers + special_dt = createDataType(type_jsons[i]) + tmp = numpy.empty(shape=values[i].shape, dtype=special_dt) + tmp['r'] = values[i].real + tmp['i'] = values[i].imag + attr['value'] = json.loads(json.dumps(tmp.tolist())) + attributes[names[i]] = attr + + if len(names) > 1: + # Create multiple attributes + # Omit trailing slash + req = self._req_prefix[:-1] + body['attributes'] = attributes + + else: + # Create single attribute + req = self._req_prefix + names[0] + for key in attributes[names[0]]: + body[key] = attributes[names[0]][key] try: - self._parent.PUT(req, body=body) + self._parent.PUT(req, body=body, params=params) except RuntimeError: - # Resource already exist, try deleting it - self._parent.log.info("Update to existing attribute ({}), deleting it".format(name)) - self._parent.DELETE(req) - # now add again - self._parent.PUT(req, body=body) + # 'replace' parameter is used, so failure is not due to attribute already existing + raise RuntimeError("Failued to create attribute(s)") def modify(self, name, value): """ Change the value of an attribute while preserving its type. diff --git a/h5pyd/_hl/base.py b/h5pyd/_hl/base.py index 52e79c0..52b2f34 100644 --- a/h5pyd/_hl/base.py +++ b/h5pyd/_hl/base.py @@ -1020,13 +1020,13 @@ def PUT(self, req, body=None, params=None, format="json", replace=False): else: raise RuntimeError(rsp.reason) else: - raise IOError(rsp.reason) + raise IOError(f"{rsp.reason}:{rsp.status_code}") if rsp.text: rsp_json = json.loads(rsp.text) return rsp_json - def POST(self, req, body=None, format="json"): + def POST(self, req, body=None, params=None, format="json"): if self.id.http_conn is None: raise IOError("object not initialized") @@ -1034,7 +1034,7 @@ def POST(self, req, body=None, format="json"): self.log.info("POST: {} [{}]".format(req, self.id.domain)) - rsp = self.id._http_conn.POST(req, body=body, format=format) + rsp = self.id._http_conn.POST(req, body=body, params=params, format=format) if rsp.status_code == 409: raise ValueError("name already exists") if rsp.status_code not in (200, 201): @@ -1053,14 +1053,14 @@ def POST(self, req, body=None, format="json"): rsp_json = json.loads(rsp.text) return rsp_json - def DELETE(self, req): + def DELETE(self, req, params=None): if self.id.http_conn is None: raise IOError("object not initialized") # try to do a DELETE of the resource self.log.info("DEL: {} [{}]".format(req, self.id.domain)) - rsp = self.id._http_conn.DELETE(req) + rsp = self.id._http_conn.DELETE(req, params=params) # self.log.info("RSP: " + str(rsp.status_code) + ':' + rsp.text) if rsp.status_code != 200: raise IOError(rsp.reason) diff --git a/h5pyd/_hl/httpconn.py b/h5pyd/_hl/httpconn.py index 393ea8b..9fce52d 100644 --- a/h5pyd/_hl/httpconn.py +++ b/h5pyd/_hl/httpconn.py @@ -434,9 +434,14 @@ def GET(self, req, format="json", params=None, headers=None, use_cache=True): if format == "binary": headers["accept"] = "application/octet-stream" + # list of parameters which should disable cache usage + no_cache_params = ["select", "query", "Limit", "Marker", "pattern", "attr"] + check_cache = self._cache is not None and use_cache and format == "json" check_cache = check_cache and params["domain"] == self._domain - check_cache = check_cache and "select" not in params and "query" not in params + + if any(param in params for param in no_cache_params): + check_cache = False if check_cache: self.log.debug("httpcon - checking cache") diff --git a/test/hl/common.py b/test/hl/common.py index cbf4d9b..2d8e7c1 100644 --- a/test/hl/common.py +++ b/test/hl/common.py @@ -21,7 +21,7 @@ import numpy as np import unittest as ut from platform import system - +from h5pyd import getServerInfo # Check if non-ascii filenames are supported # Evidently this is the most reliable way to check @@ -275,3 +275,10 @@ def is_hsds(self, id=None): return True else: return False + + def hsds_version(self): + """ Return the version of the HSDS server, or None if not HSDS. + """ + + rsp = getServerInfo() + return rsp["hsds_version"] diff --git a/test/hl/test_attribute.py b/test/hl/test_attribute.py index 3e3ec89..4c524c7 100644 --- a/test/hl/test_attribute.py +++ b/test/hl/test_attribute.py @@ -26,7 +26,7 @@ class TestAttribute(TestCase): def test_create(self): - filename = self.getFileName("create_attribute") + filename = self.getFileName("create_attribfute") print("filename:", filename) f = h5py.File(filename, 'w') @@ -118,6 +118,177 @@ def test_create(self): # close file f.close() + def test_create_multiple(self): + if config.get('use_h5py') or self.hsds_version() < "0.9.0": + return + + filename = self.getFileName("create_attribute_multiple") + print("filename:", filename) + f = h5py.File(filename, 'w') + + g1 = f.create_group('g1') + + num_attrs = 10 + # No shape or dtype specified + names = ['attr' + str(i) for i in range(num_attrs)] + values = [np.arange(50)] * num_attrs + g1.attrs.create(names, values) + + for i in range(num_attrs): + self.assertTrue(names[i] in g1.attrs) + self.assertTrue(np.array_equal(g1.attrs[names[i]], values[i])) + + # Test replacing existing attributes + new_values = [np.arange(100)] * num_attrs + g1.attrs.create(names, new_values) + + for i in range(num_attrs): + self.assertTrue(names[i] in g1.attrs) + self.assertTrue(np.array_equal(g1.attrs[names[i]], new_values[i])) + + # Test creating attributes with shape and dtype specified + names = ['attr' + str(i) for i in range(num_attrs, 2 * num_attrs)] + values = [np.arange(i + 1) for i in range(num_attrs)] + dtypes = [np.int32] * num_attrs + shapes = [(i + 1,) for i in range(num_attrs)] + g1.attrs.create(names, values, shapes, dtypes) + + for i in range(num_attrs): + self.assertTrue(names[i] in g1.attrs) + self.assertTrue(np.array_equal(g1.attrs[names[i]], values[i])) + self.assertEqual(g1.attrs[names[i]].dtype, dtypes[i]) + self.assertEqual(g1.attrs[names[i]].shape, shapes[i]) + + def test_get_multiple(self): + if config.get('use_h5py') or self.hsds_version() < "0.9.0": + return + + filename = self.getFileName("get_attribute_multiple") + print("filename:", filename) + f = h5py.File(filename, 'w') + + # create attributes + num_attrs = 10 + g1 = f.create_group('g1') + names = ['attr' + str(i) for i in range(num_attrs)] + values = [np.arange(50) for i in range(num_attrs)] + + for i in range(10): + g1.attrs[names[i]] = values[i] + + # get all attributes + values_out = g1.attrs.get_attributes() + + self.assertEqual(len(values_out), 10) + for i in range(10): + self.assertTrue(names[i] in values_out) + self.assertTrue(np.array_equal(values_out[names[i]], values[i])) + + # get attributes from cache + values_out = g1.attrs.get_attributes() + self.assertEqual(len(values_out), 10) + for i in range(10): + self.assertTrue(names[i] in values_out) + self.assertTrue(np.array_equal(values_out[names[i]], values[i])) + + # get attributes that match the pattern 'attr5' + pattern = "attr5" + values_out = g1.attrs.get_attributes(pattern=pattern) + + self.assertTrue("attr5" in values_out) + self.assertTrue(np.array_equal(values_out["attr5"], values[5])) + + # get only attributes that match the pattern 'att*' + g1.attrs['new_attr'] = np.arange(100) + pattern = "att*" + values_out = g1.attrs.get_attributes(pattern=pattern) + + self.assertEqual(len(values_out), 10) + + for i in range(10): + self.assertTrue(names[i] in values_out) + self.assertTrue(np.array_equal(values_out[names[i]], values[i])) + + # get the first five attributes + limit = 5 + values_out = g1.attrs.get_attributes(limit=limit) + + self.assertEqual(len(values_out), 5) + + for i in range(5): + self.assertTrue(names[i] in values_out) + self.assertTrue(np.array_equal(values_out[names[i]], values[i])) + + # get all attributes after 'attr4 + marker = "attr4" + values_out = g1.attrs.get_attributes(marker=marker, limit=limit) + + self.assertEqual(len(values_out), 5) + + for i in range(6, 10): + self.assertTrue(names[i] in values_out) + self.assertTrue(np.array_equal(values_out[names[i]], values[i])) + + # get set of attributes by name + names = ['attr5', 'attr7', 'attr9'] + + values_out = g1.attrs.get_attributes(names=names) + + self.assertEqual(len(values_out), 3) + + for name in names: + self.assertTrue(name in values_out) + i = int(name[4]) + self.assertTrue(np.array_equal(values_out[name], values[i])) + + def test_delete_multiple(self): + if config.get('use_h5py') or self.hsds_version() < "0.9.0": + return + + filename = self.getFileName("delete_attribute_multiple") + print("filename:", filename) + f = h5py.File(filename, 'w') + + # create attributes + num_attrs = 10 + g1 = f.create_group('g1') + names = ['attr' + str(i) for i in range(num_attrs)] + values = [np.arange(50) for i in range(num_attrs)] + + for i in range(10): + g1.attrs[names[i]] = values[i] + + # delete the first five attributes + del g1.attrs[names[0:5]] + + # check that the first five attributes are gone + for i in range(5): + self.assertFalse(names[i] in g1.attrs) + + # check that the last five attributes are still there + for i in range(5, 10): + self.assertTrue(names[i] in g1.attrs) + self.assertTrue(np.array_equal(g1.attrs[names[i]], values[i])) + + # delete single attribute + del g1.attrs[names[5]] + + self.assertFalse(names[5] in g1.attrs) + + for i in range(6, 10): + self.assertTrue(names[i] in g1.attrs) + self.assertTrue(np.array_equal(g1.attrs[names[i]], values[i])) + + # delete attributes with name that must be URL-encoded + names = ['attr with spaces', 'attr%', 'unicodeå…«attr'] + for name in names: + g1.attrs[name] = np.arange(100) + + del g1.attrs[names] + + for name in names: + self.assertTrue(name not in g1.attrs) + class TestTrackOrder(TestCase):