Source code for msl.io.dataset

"""
A :class:`Dataset` is essentially a :class:`numpy.ndarray` with :class:`~msl.io.metadata.Metadata`.
"""
import numpy as np

from .vertex import Vertex


[docs]class Dataset(Vertex): def __init__(self, name, parent, read_only, shape=(0,), dtype=float, buffer=None, offset=0, strides=None, order=None, data=None, **metadata): """A :class:`Dataset` is essentially a :class:`numpy.ndarray` with :class:`~msl.io.metadata.Metadata`. Do not instantiate directly. Create a new :class:`Dataset` using :meth:`~msl.io.group.Group.create_dataset`. Parameters ---------- name : :class:`str` A name to associate with this :class:`Dataset`. parent : :class:`~msl.io.group.Group` The parent :class:`~msl.io.group.Group` to the :class:`Dataset`. read_only : :class:`bool` Whether the :class:`Dataset` is to be accessed in read-only mode. shape See :class:`numpy.ndarray` dtype See :class:`numpy.ndarray` buffer See :class:`numpy.ndarray` offset See :class:`numpy.ndarray` strides See :class:`numpy.ndarray` order See :class:`numpy.ndarray` data If not :data:`None` then it must be either a :class:`numpy.ndarray` or an array-like object which will be passed to :func:`numpy.asarray`, as well as `dtype` and `order`, to be used as the underlying data. **metadata All other key-value pairs will be used as :class:`~msl.io.metadata.Metadata` for this :class:`Dataset`. """ super(Dataset, self).__init__(name, parent, read_only, **metadata) if data is None: self._data = np.ndarray( shape, dtype=dtype, buffer=buffer, offset=offset, strides=strides, order=order ) else: if isinstance(data, np.ndarray): self._data = data else: self._data = np.asarray(data, dtype=dtype, order=order) self.read_only = read_only def __repr__(self): return '<{} {!r} shape={} dtype={!r} ({} metadata)>'\ .format(self.__class__.__name__, self._name, self._data.shape, self._data.dtype.str, len(self.metadata)) def __str__(self): return repr(self._data) @property def read_only(self): """:class:`bool`: Whether this :class:`Dataset` is in read-only mode. This is equivalent to setting the ``WRITEABLE`` property in :meth:`numpy.ndarray.setflags`. """ return not self._data.flags.writeable @read_only.setter def read_only(self, value): val = bool(value) self._metadata.read_only = val self._data.setflags(write=not val)
[docs] def copy(self, read_only=None): """Create a copy of this :class:`Dataset`. Parameters ---------- read_only : :class:`bool`, optional Whether the copy should be created in read-only mode. If :data:`None` then creates a copy using the mode for the :class:`Dataset` that is being copied. Returns ------- :class:`Dataset` A copy of this :class:`Dataset`. """ return Dataset( self._name, self._parent, self._read_only if read_only is None else read_only, data=self._data.copy(), **self._metadata.copy() )
@property def data(self): """:class:`numpy.ndarray`: The data of the :class:`Dataset`. .. note:: You do not have to call this attribute to get access to the :class:`numpy.ndarray`. You can directly call the :class:`numpy.ndarray` attribute from the :class:`Dataset` object. For example, .. invisible-code-block: pycon >>> SKIP_IF_PYTHON_LESS_THAN_36() >>> from msl.io import JSONWriter >>> root = JSONWriter() >>> dset = root.create_dataset('my_data', data=[[0, 1, 2], [3, 4, 5], [6, 7, 8], [9, 10, 11]]) .. code-block:: pycon >>> dset <Dataset '/my_data' shape=(4, 3) dtype='<f8' (0 metadata)> >>> dset.data array([[ 0., 1., 2.], [ 3., 4., 5.], [ 6., 7., 8.], [ 9., 10., 11.]]) >>> dset.size 12 >>> dset.tolist() [[0.0, 1.0, 2.0], [3.0, 4.0, 5.0], [6.0, 7.0, 8.0], [9.0, 10.0, 11.0]] >>> dset.mean(axis=0) array([4.5, 5.5, 6.5]) >>> dset[::2] array([[0., 1., 2.], [6., 7., 8.]]) """ return self._data def __getitem__(self, item): return self._data[item] def __setitem__(self, item, value): self._data[item] = value def __getattr__(self, item): try: return getattr(self._data, item) except AttributeError as err: try: return self._data[item] except (IndexError, ValueError): pass raise err def __len__(self): # if the ndarray is a scalar then the following exception is raised # TypeError: len() of unsized object try: return len(self._data) except TypeError: return self._data.size def __iter__(self): return iter(self._data) def __add__(self, rhs): return self._data + rhs def __radd__(self, lhs): return lhs + self._data def __sub__(self, rhs): return self._data - rhs def __rsub__(self, lhs): return lhs - self._data def __mul__(self, rhs): return self._data * rhs def __rmul__(self, lhs): return lhs * self._data def __matmul__(self, rhs): # TODO could use the @ operator when dropping support for Python 2 return np.matmul(self._data, rhs) def __rmatmul__(self, lhs): # TODO could use the @ operator when dropping support for Python 2 return np.matmul(lhs, self._data) def __truediv__(self, rhs): return self._data / rhs def __rtruediv__(self, lhs): return lhs / self._data def __div__(self, rhs): # TODO could remove this method when dropping support for Python 2 return self._data / rhs def __rdiv__(self, lhs): # TODO could remove this method when dropping support for Python 2 return lhs / self._data def __floordiv__(self, rhs): return self._data // rhs def __rfloordiv__(self, lhs): return lhs // self._data def __mod__(self, rhs): return self._data % rhs def __rmod__(self, lhs): return lhs % self._data def __divmod__(self, rhs): return np.divmod(self._data, rhs) def __rdivmod__(self, lhs): return np.divmod(lhs, self._data) def __pow__(self, rhs): return self._data ** rhs def __rpow__(self, lhs): return lhs ** self._data def __lshift__(self, rhs): return self._data << rhs def __rlshift__(self, lhs): return lhs << self._data def __rshift__(self, rhs): return self._data >> rhs def __rrshift__(self, lhs): return lhs >> self._data def __and__(self, rhs): return self._data & rhs def __rand__(self, lhs): return lhs & self._data def __xor__(self, rhs): return self._data ^ rhs def __rxor__(self, lhs): return lhs ^ self._data def __or__(self, rhs): return self._data | rhs def __ror__(self, lhs): return lhs | self._data def __neg__(self): return -self._data def __pos__(self): return self._data def __abs__(self): return abs(self._data) def __invert__(self): return ~self._data