Skip to content

Commit

Permalink
Multi-attribute API (#202)
Browse files Browse the repository at this point in the history
* Multiple attribute creation

* Skip multi attr test for old HSDS

* Add AttributeManager.get_attibutes

* Support multiple attribute deletion

* Support retrieving multiple attributes by name

* Send get_attributes params as dict

* Remove duplicate logic in attr create

* Remove redundant h5py check
  • Loading branch information
mattjala committed Jun 7, 2024
1 parent 9352dd4 commit c56e40b
Show file tree
Hide file tree
Showing 5 changed files with 387 additions and 92 deletions.
280 changes: 196 additions & 84 deletions h5pyd/_hl/attrs.py
Original file line number Diff line number Diff line change
Expand Up @@ -166,24 +166,91 @@ def __getitem__(self, name):

return arr

def get_attributes(self, names=None, pattern=None, limit=None, marker=None):
"""
Get all attributes or a subset of attributes from the target object.
If 'use_cache' is True, use the objdb cache if available.
The cache cannot be used with pattern, limit, or marker parameters.
- if 'pattern' is provided, retrieve all attributes with names that match the pattern
according to Unix pathname pattern expansion rules.
- if 'limit' is provided, retrieve at most 'limit' attributes.
- if 'marker' is provided, retrieve attributes whose names occur after the name 'marker' in the target object
"""
if names and (pattern or limit or marker):
raise ValueError("names cannot be used with pattern, limit or marker")

if self._objdb_attributes is not None:
# use the objdb cache
out = {}
for a in self._objdb_attributes:
name = a['name']
out[name] = self._objdb_attributes[name]
return out

# Omit trailing slash
req = self._req_prefix[:-1]

body = {}
params = {"IncludeData": 1}

if pattern:
params["pattern"] = pattern
if limit:
params["Limit"] = limit
if marker:
params["Marker"] = marker

if names:
if isinstance(names, list):
names = [name.decode('utf-8') if isinstance(name, bytes) else name for name in names]
else:
if isinstance(names, bytes):
names = names.decode("utf-8")
names = [names]

body['attr_names'] = names

if body:
rsp = self._parent.POST(req, body=body, params=params)
else:
rsp = self._parent.GET(req, params=params)

attrs_json = rsp['attributes']
names = [attr['name'] for attr in attrs_json]
values = [attr['value'] for attr in attrs_json]
out = {}

for i in range(len(names)):
out[names[i]] = values[i]

return out

def __setitem__(self, name, value):
""" Set a new attribute, overwriting any existing attribute.
The type and shape of the attribute are determined from the data. To
use a specific type or shape, or to preserve the type of an attribute,
use the methods create() and modify().
"""
self.create(name, data=value, dtype=base.guess_dtype(value))
self.create(name, values=value, dtype=base.guess_dtype(value))

def __delitem__(self, name):
""" Delete an attribute (which must already exist). """
if isinstance(name, bytes):
name = name.decode("utf-8")
req = self._req_prefix + name
self._parent.DELETE(req)
params = {}

def create(self, name, data, shape=None, dtype=None):
""" Create a new attribute, overwriting any existing attribute.
if isinstance(name, list):
names = [name.decode('utf-8') if isinstance(name, bytes) else name for name in name]
# Omit trailing slash
req = self._req_prefix[:-1]
params["attr_names"] = "/".join(names)
else:
if isinstance(name, bytes):
name = name.decode("utf-8")
req = self._req_prefix + name
self._parent.DELETE(req, params=params)

def create(self, names, values, shape=None, dtype=None):
""" Create new attribute(s), overwriting any existing attributes.
name
Name of the new attribute (required)
Expand All @@ -196,104 +263,149 @@ def create(self, name, data, shape=None, dtype=None):
Data type of the attribute. Overrides data.dtype if both
are given.
"""
self._parent.log.info("attrs.create({})".format(name))

# First, make sure we have a NumPy array. We leave the data
# type conversion for HDF5 to perform.
if isinstance(data, Reference):
dtype = special_dtype(ref=Reference)
if not isinstance(data, Empty):
data = numpy.asarray(data, dtype=dtype, order='C')

if shape is None and not isinstance(data, Empty):
shape = data.shape
self._parent.log.info(f"attrs.create({names})")

use_htype = None # If a committed type is given, we must use it in h5a.create.
# Standardize single attribute arguments to lists
if not isinstance(names, list):
names = [names]
values = [values]

if isinstance(dtype, Datatype):
use_htype = dtype.id
dtype = dtype.dtype
# Do not permit duplicate names
if len(names) != len(set(names)):
raise ValueError("Duplicate attribute names are not allowed")

# Special case if data are complex numbers
is_complex = (data.dtype.kind == 'c') and (dtype.names is None) or (
dtype.names != ('r', 'i')) or (
any(dt.kind != 'f' for dt, off in dtype.fields.values())) or (
dtype.fields['r'][0] == dtype.fields['i'][0])
if shape is not None and not isinstance(shape, list):
shapes = [shape]
elif shape is None:
shapes = [None] * len(names)
else:
# Given shape is already a list of shapes
shapes = shape

if is_complex:
raise TypeError(
f'Wrong committed datatype for complex numbers: {dtype.name}')
if dtype is not None and not isinstance(dtype, list):
dtypes = [dtype]
elif dtype is None:
if data.dtype.kind == 'U':
# use vlen for unicode strings
dtype = special_dtype(vlen=str)
else:
dtype = data.dtype
dtypes = [None] * len(names)
else:
dtype = numpy.dtype(dtype) # In case a string, e.g. 'i8' is passed

# Where a top-level array type is requested, we have to do some
# fiddling around to present the data as a smaller array of
# subarrays.
if not isinstance(data, Empty):
if dtype.subdtype is not None:
# Given dtype is already a list of dtypes
dtypes = dtype

type_jsons = [None] * len(names)

if (len(names) != len(values)) or (shapes is not None and len(shapes) != len(values)) or\
(dtypes is not None and len(dtypes) != len(values)):
raise ValueError("provided names, values, shapes and dtypes must have the same length")

for i in range(len(names)):
# First, make sure we have a NumPy array. We leave the data
# type conversion for HDF5 to perform.
if isinstance(values[i], Reference):
dtypes[i] = special_dtype(ref=Reference)
if not isinstance(values[i], Empty):
values[i] = numpy.asarray(values[i], dtype=dtypes[i], order='C')

if shapes[i] is None and not isinstance(values[i], Empty):
shapes[i] = values[i].shape

use_htype = None # If a committed type is given, we must use it in h5a.create.

if isinstance(dtypes[i], Datatype):
use_htype = dtypes[i].id
dtypes[i] = dtypes[i].dtype

# Special case if data are complex numbers
is_complex = (values[i].dtype.kind == 'c') and (dtypes[i].names is None) or (
dtypes[i].names != ('r', 'i')) or (
any(dt.kind != 'f' for dt, off in dtypes[i].fields.values())) or (
dtypes[i].fields['r'][0] == dtypes[i].fields['i'][0])

if is_complex:
raise TypeError(
f'Wrong committed datatype for complex numbers: {dtypes[i].name}')
elif dtypes[i] is None:
if values[i].dtype.kind == 'U':
# use vlen for unicode strings
dtypes[i] = special_dtype(vlen=str)
else:
dtypes[i] = values[i].dtype
else:
dtypes[i] = numpy.dtype(dtypes[i]) # In case a string, e.g. 'i8' is passed

subdtype, subshape = dtype.subdtype
# Where a top-level array type is requested, we have to do some
# fiddling around to present the data as a smaller array of
# subarrays.
if not isinstance(values[i], Empty):
if dtypes[i].subdtype is not None:

# Make sure the subshape matches the last N axes' sizes.
if shape[-len(subshape):] != subshape:
raise ValueError(f"Array dtype shape {subshape} is incompatible with data shape {shape}")
subdtype, subshape = dtypes[i].subdtype

# New "advertised" shape and dtype
shape = shape[0:len(shape) - len(subshape)]
dtype = subdtype
# Make sure the subshape matches the last N axes' sizes.
if shapes[i][-len(subshape):] != subshape:
raise ValueError(f"Array dtype shape {subshape} is incompatible with data shape {shapes[i]}")

# Not an array type; make sure to check the number of elements
# is compatible, and reshape if needed.
else:
if numpy.prod(shape) != numpy.prod(data.shape):
raise ValueError("Shape of new attribute conflicts with shape of data")
# New "advertised" shape and dtype
shapes[i] = shapes[i][0:len(shapes[i]) - len(subshape)]
dtypes[i] = subdtype

if shape != data.shape:
data = data.reshape(shape)
# Not an array type; make sure to check the number of elements
# is compatible, and reshape if needed.
else:
if numpy.prod(shapes[i]) != numpy.prod(values[i].shape):
raise ValueError("Shape of new attribute conflicts with shape of data")

# We need this to handle special string types.
if shapes[i] != values[i].shape:
values[i] = values[i].reshape(shapes[i])

data = numpy.asarray(data, dtype=dtype)
# We need this to handle special string types.

# Make HDF5 datatype and dataspace for the H5A calls
if use_htype is None:
type_json = getTypeItem(dtype)
self._parent.log.debug("attrs.create type_json: {}".format(type_json))
values[i] = numpy.asarray(values[i], dtype=dtypes[i])

# This mess exists because you can't overwrite attributes in HDF5.
# So we write to a temporary attribute first, and then rename.
# Make HDF5 datatype and dataspace for the H5A calls
if use_htype is None:
type_jsons[i] = getTypeItem(dtypes[i])
self._parent.log.debug(f"attrs.create type_json: {format(type_jsons[i])}")

req = self._req_prefix + name
params = {}
body = {}
body['type'] = type_json
if isinstance(data, Empty):
body['shape'] = 'H5S_NULL'
else:
body['shape'] = shape
if data.dtype.kind != 'c':
body['value'] = self._bytesArrayToList(data)
params['replace'] = 1

attributes = {}

for i in range(len(names)):
attr = {}
attr['type'] = type_jsons[i]
if isinstance(values[i], Empty):
attr['shape'] = 'H5S_NULL'
else:
# Special case: complex numbers
special_dt = createDataType(type_json)
tmp = numpy.empty(shape=data.shape, dtype=special_dt)
tmp['r'] = data.real
tmp['i'] = data.imag
body['value'] = json.loads(json.dumps(tmp.tolist()))
attr['shape'] = shapes[i]
if values[i].dtype.kind != 'c':
attr['value'] = self._bytesArrayToList(values[i])
else:
# Special case: complex numbers
special_dt = createDataType(type_jsons[i])
tmp = numpy.empty(shape=values[i].shape, dtype=special_dt)
tmp['r'] = values[i].real
tmp['i'] = values[i].imag
attr['value'] = json.loads(json.dumps(tmp.tolist()))
attributes[names[i]] = attr

if len(names) > 1:
# Create multiple attributes
# Omit trailing slash
req = self._req_prefix[:-1]
body['attributes'] = attributes

else:
# Create single attribute
req = self._req_prefix + names[0]
for key in attributes[names[0]]:
body[key] = attributes[names[0]][key]

try:
self._parent.PUT(req, body=body)
self._parent.PUT(req, body=body, params=params)
except RuntimeError:
# Resource already exist, try deleting it
self._parent.log.info("Update to existing attribute ({}), deleting it".format(name))
self._parent.DELETE(req)
# now add again
self._parent.PUT(req, body=body)
# 'replace' parameter is used, so failure is not due to attribute already existing
raise RuntimeError("Failued to create attribute(s)")

def modify(self, name, value):
""" Change the value of an attribute while preserving its type.
Expand Down
10 changes: 5 additions & 5 deletions h5pyd/_hl/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -1020,21 +1020,21 @@ def PUT(self, req, body=None, params=None, format="json", replace=False):
else:
raise RuntimeError(rsp.reason)
else:
raise IOError(rsp.reason)
raise IOError(f"{rsp.reason}:{rsp.status_code}")

if rsp.text:
rsp_json = json.loads(rsp.text)
return rsp_json

def POST(self, req, body=None, format="json"):
def POST(self, req, body=None, params=None, format="json"):
if self.id.http_conn is None:
raise IOError("object not initialized")

# try to do a POST to the domain

self.log.info("POST: {} [{}]".format(req, self.id.domain))

rsp = self.id._http_conn.POST(req, body=body, format=format)
rsp = self.id._http_conn.POST(req, body=body, params=params, format=format)
if rsp.status_code == 409:
raise ValueError("name already exists")
if rsp.status_code not in (200, 201):
Expand All @@ -1053,14 +1053,14 @@ def POST(self, req, body=None, format="json"):
rsp_json = json.loads(rsp.text)
return rsp_json

def DELETE(self, req):
def DELETE(self, req, params=None):
if self.id.http_conn is None:
raise IOError("object not initialized")

# try to do a DELETE of the resource

self.log.info("DEL: {} [{}]".format(req, self.id.domain))
rsp = self.id._http_conn.DELETE(req)
rsp = self.id._http_conn.DELETE(req, params=params)
# self.log.info("RSP: " + str(rsp.status_code) + ':' + rsp.text)
if rsp.status_code != 200:
raise IOError(rsp.reason)
Expand Down
7 changes: 6 additions & 1 deletion h5pyd/_hl/httpconn.py
Original file line number Diff line number Diff line change
Expand Up @@ -434,9 +434,14 @@ def GET(self, req, format="json", params=None, headers=None, use_cache=True):
if format == "binary":
headers["accept"] = "application/octet-stream"

# list of parameters which should disable cache usage
no_cache_params = ["select", "query", "Limit", "Marker", "pattern", "attr"]

check_cache = self._cache is not None and use_cache and format == "json"
check_cache = check_cache and params["domain"] == self._domain
check_cache = check_cache and "select" not in params and "query" not in params

if any(param in params for param in no_cache_params):
check_cache = False

if check_cache:
self.log.debug("httpcon - checking cache")
Expand Down
Loading

0 comments on commit c56e40b

Please sign in to comment.