Source code for quantify_core.data.dataset_adapters
# Repository: https://gitlab.com/quantify-os/quantify-core
# Licensed according to the LICENCE file on the main branch
"""Utilities for dataset (python object) handling."""
# pylint: disable=too-many-instance-attributes
from __future__ import annotations
import json
from abc import abstractmethod
from copy import deepcopy
from typing import Any, Callable
import xarray as xr
[docs]
class DatasetAdapterBase:
"""
A generic interface for a dataset adapter.
.. note::
It might be difficult to grasp the generic purpose of this class.
See :class:`~.AdapterH5NetCDF` for a specialized use case.
A dataset adapter is intended to "adapt"/"convert" a dataset to a format compatible
with some other piece of software such as a function, interface, read/write
back end, etc.. The main use case is to define the interface of the
:class:`~.AdapterH5NetCDF` that converts the Quantify dataset for loading and
writing to/from disk.
Subclasses implementing this interface are intended to be a two-way bridge to some
other object/interface/backend to which we refer to as the "Target" of the adapter.
The function ``.adapt()`` should return a dataset to be consumed by the Target.
The function ``.recover()`` should receive a dataset generated by the Target.
"""
[docs]
@classmethod
@abstractmethod
def adapt(cls, dataset: xr.Dataset) -> xr.Dataset:
"""Converts the ``dataset`` to a format consumed by the Target."""
[docs]
@classmethod
@abstractmethod
def recover(cls, dataset: xr.Dataset) -> xr.Dataset:
"""Inverts the action of the ``.adapt()`` method."""
[docs]
class DatasetAdapterIdentity:
"""
A dataset adapter that does not modify the datasets in any way.
Intended to be used just as an object that respects the adapter interface defined
by :class:`~.DatasetAdapterBase`.
A particular use case is the backwards compatibility for loading and writing
older versions of the Quantify dataset.
"""
[docs]
@classmethod
def adapt(cls, dataset: xr.Dataset) -> xr.Dataset:
"""
Returns
-------
:
Same dataset with no modifications.
"""
return dataset
[docs]
@classmethod
def recover(cls, dataset: xr.Dataset) -> xr.Dataset:
"""
Returns
-------
:
Same dataset with no modifications.
"""
return dataset
[docs]
class AdapterH5NetCDF(DatasetAdapterBase):
"""
Quantify dataset adapter for the ``h5netcdf`` engine.
It has the functionality of adapting the Quantify dataset to a format compatible
with the ``h5netcdf`` xarray backend engine that is used to write and load the
dataset to/from disk.
.. warning::
The ``h5netcdf`` engine has minor issues when performing a two-way trip of the
dataset. The ``type`` of some attributes are not preserved. E.g., list- and
tuple-like objects are loaded as numpy arrays of ``dtype=object``.
"""
[docs]
@classmethod
def adapt(cls, dataset: xr.Dataset) -> xr.Dataset:
"""
Serializes to JSON the dataset and variables attributes.
To prevent the JSON serialization for specific items, their names should be
listed under the attribute named ``json_serialize_exclude`` (for each ``attrs``
dictionary).
Parameters
----------
dataset
Dataset that needs to be adapted.
Returns
-------
:
Dataset in which the attributes have been replaced with their JSON strings
version.
"""
return cls._transform(dataset, vals_converter=json.dumps)
[docs]
@classmethod
def recover(cls, dataset: xr.Dataset) -> xr.Dataset:
"""
Reverts the action of ``.adapt()``.
To prevent the JSON de-serialization for specific items, their names should be
listed under the attribute named ``json_serialize_exclude``
(for each ``attrs`` dictionary).
Parameters
----------
dataset
Dataset from which to recover the original format.
Returns
-------
:
Dataset in which the attributes have been replaced with their python objects
version.
"""
return cls._transform(dataset, vals_converter=json.loads)
[docs]
@staticmethod
def attrs_convert(
attrs: dict,
inplace: bool = False,
vals_converter: Callable[Any, Any] = json.dumps,
) -> dict:
"""
Converts to/from JSON string the values of the keys which are not listed in the
``json_serialize_exclude`` list.
Parameters
----------
attrs
The input dictionary.
inplace
If ``True`` the values are replaced in place, otherwise a deepcopy of
``attrs`` is performed first.
"""
json_serialize_exclude = attrs.get("json_serialize_exclude", [])
attrs = attrs if inplace else deepcopy(attrs)
for attr_name, attr_val in attrs.items():
if attr_name not in json_serialize_exclude:
attrs[attr_name] = vals_converter(attr_val)
return attrs
@classmethod
def _transform(
cls, dataset: xr.Dataset, vals_converter: Callable[Any, Any] = json.dumps
) -> xr.Dataset:
dataset = xr.Dataset(
dataset,
attrs=cls.attrs_convert(
dataset.attrs, inplace=False, vals_converter=vals_converter
),
)
for var_name in dataset.variables.keys():
# The new dataset generated above has already a deepcopy of the attributes.
_ = cls.attrs_convert(
dataset[var_name].attrs, inplace=True, vals_converter=vals_converter
)
return dataset