"""
A :class:`Group` can contain sub-:class:`Group`\\s and/or :class:`~msl.io.dataset.Dataset`\\s.
"""
import re
from .dataset import Dataset
from .dataset_logging import DatasetLogging
from .vertex import Vertex
[docs]class Group(Vertex):
def __init__(self, name, parent, read_only, **metadata):
"""A :class:`Group` can contain sub-:class:`Group`\\s and/or :class:`~msl.io.dataset.Dataset`\\s.
Do not instantiate directly. Create a new :class:`Group` using
:meth:`~msl.io.group.Group.create_group`.
Parameters
----------
name : :class:`str`
The name of this :class:`Group`. Uses a naming convention analogous to UNIX
file systems where each :class:`~msl.io.group.Group` can be thought
of as a directory and where every subdirectory is separated from its
parent directory by the ``'/'`` character.
parent : :class:`Group`
The parent :class:`Group` to this :class:`Group`.
read_only : :class:`bool`
Whether the :class:`Group` is to be accessed in read-only mode.
**metadata
Key-value pairs that are used to create the :class:`~msl.io.metadata.Metadata`
for this :class:`Group`.
"""
super(Group, self).__init__(name, parent, read_only, **metadata)
def __repr__(self):
g = len(list(self.groups()))
d = len(list(self.datasets()))
m = len(self.metadata)
return '<Group {!r} ({} groups, {} datasets, {} metadata)>'.format(self._name, g, d, m)
def __getitem__(self, item):
if item and not item[0] == '/':
item = '/' + item
try:
return self._mapping[item]
except KeyError:
pass # raise a more detailed error message below
self._raise_key_error(item)
def __getattr__(self, item):
try:
return self.__getitem__('/' + item)
except KeyError as e:
msg = str(e)
raise AttributeError(msg)
def __delattr__(self, item):
try:
return self.__delitem__('/' + item)
except KeyError as e:
msg = str(e)
raise AttributeError(msg)
[docs] @staticmethod
def is_dataset(obj):
"""Test whether an object is a :class:`~msl.io.dataset.Dataset`.
Parameters
----------
obj : :class:`object`
The object to test.
Returns
-------
:class:`bool`
Whether `obj` is an instance of :class:`~msl.io.dataset.Dataset`.
"""
return isinstance(obj, Dataset)
[docs] @staticmethod
def is_dataset_logging(obj):
"""Test whether an object is a :class:`~msl.io.dataset_logging.DatasetLogging`.
Parameters
----------
obj : :class:`object`
The object to test.
Returns
-------
:class:`bool`
Whether `obj` is an instance of :class:`~msl.io.dataset_logging.DatasetLogging`.
"""
return isinstance(obj, DatasetLogging)
[docs] @staticmethod
def is_group(obj):
"""Test whether an object is a :class:`~msl.io.group.Group`.
Parameters
----------
obj : :class:`object`
The object to test.
Returns
-------
:class:`bool`
Whether `obj` is an instance of :class:`~msl.io.group.Group`.
"""
return isinstance(obj, Group)
[docs] def datasets(self, exclude=None, include=None, flags=0):
"""Get the :class:`~msl.io.dataset.Dataset`\\s in this :class:`Group`.
Parameters
----------
exclude : :class:`str`, optional
A regex pattern to use to exclude :class:`~msl.io.dataset.Dataset`\\s.
The :func:`re.search` function is used to compare the `exclude` regex
pattern with the `name` of each :class:`~msl.io.dataset.Dataset`. If
there is a match, the :class:`~msl.io.dataset.Dataset` is not yielded.
include : :class:`str`, optional
A regex pattern to use to include :class:`~msl.io.dataset.Dataset`\\s.
The :func:`re.search` function is used to compare the `include` regex
pattern with the `name` of each :class:`~msl.io.dataset.Dataset`. If
there is a match, the :class:`~msl.io.dataset.Dataset` is yielded.
flags : :class:`int`, optional
Regex flags that are passed to :func:`re.compile`.
Yields
------
:class:`~msl.io.dataset.Dataset`
The filtered :class:`~msl.io.dataset.Dataset`\\s based on the
`exclude` and `include` regex patterns. The `exclude` pattern
has more precedence than the `include` pattern if there is a
conflict.
"""
e = False if exclude is None else re.compile(exclude, flags=flags)
i = False if include is None else re.compile(include, flags=flags)
for obj in self._mapping.values():
if self.is_dataset(obj):
if e and e.search(obj.name):
continue
if i and not i.search(obj.name):
continue
yield obj
[docs] def groups(self, exclude=None, include=None, flags=0):
"""Get the sub-:class:`.Group`\\s of this :class:`.Group`.
Parameters
----------
exclude : :class:`str`, optional
A regex pattern to use to exclude :class:`.Group`\\s. The
:func:`re.search` function is used to compare the `exclude` regex
pattern with the `name` of each :class:`.Group`. If there is a match,
the :class:`.Group` is not yielded.
include : :class:`str`, optional
A regex pattern to use to include :class:`.Group`\\s. The
:func:`re.search` function is used to compare the `include` regex
pattern with the `name` of each :class:`.Group`. If there is a match,
the :class:`.Group` is yielded.
flags : :class:`int`, optional
Regex flags that are passed to :func:`re.compile`.
Yields
------
:class:`Group`
The filtered :class:`.Group`\\s based on the `exclude` and `include`
regex patterns. The `exclude` pattern has more precedence than the
`include` pattern if there is a conflict.
"""
e = False if exclude is None else re.compile(exclude, flags=flags)
i = False if include is None else re.compile(include, flags=flags)
for obj in self._mapping.values():
if self.is_group(obj):
if e and e.search(obj.name):
continue
if i and not i.search(obj.name):
continue
yield obj
[docs] def descendants(self):
"""Get all descendant (children) :class:`.Group`\\s of this :class:`.Group`.
Yields
------
:class:`.Group`
The descendants of this :class:`.Group`.
"""
for obj in self._mapping.values():
if self.is_group(obj):
yield obj
[docs] def ancestors(self):
"""Get all ancestor (parent) :class:`.Group`\\s of this :class:`.Group`.
Yields
------
:class:`.Group`
The ancestors of this :class:`.Group`.
"""
parent = self.parent
while parent is not None:
yield parent
parent = parent.parent
[docs] def add_group(self, name, group):
"""Add a :class:`Group`.
Automatically creates the ancestor :class:`Group`\\s if they do not exist.
Parameters
----------
name : :class:`str`
The name of the new :class:`Group` to add.
group : :class:`Group`
The :class:`Group` to add. The :class:`~msl.io.dataset.Dataset`\\s and
:class:`~msl.io.metadata.Metadata` that are contained within the
`group` will be copied.
"""
if not isinstance(group, Group):
raise TypeError('Must pass in a Group object, got {!r}'.format(group))
name = '/' + name.strip('/')
if not group: # no sub-Groups or Datasets, only add the Metadata
self.create_group(name + group.name, **group.metadata.copy())
return
for key, vertex in group.items():
n = name + key
if self.is_group(vertex):
self.create_group(n, read_only=vertex.read_only, **vertex.metadata.copy())
else: # must be a Dataset
self.create_dataset(
n, read_only=vertex.read_only, data=vertex.data.copy(), **vertex.metadata.copy()
)
[docs] def create_group(self, name, read_only=None, **metadata):
"""Create a new :class:`Group`.
Automatically creates the ancestor :class:`Group`\\s if they do not exist.
Parameters
----------
name : :class:`str`
The name of the new :class:`Group`.
read_only : :class:`bool`, optional
Whether to create this :class:`Group` in read-only mode.
If :data:`None` then uses the mode for this :class:`Group`.
**metadata
Key-value pairs that are used to create the :class:`~msl.io.metadata.Metadata`
for this :class:`Group`.
Returns
-------
:class:`Group`
The new :class:`Group` that was created.
"""
read_only, metadata = self._check(read_only, **metadata)
name, parent = self._create_ancestors(name, read_only)
return Group(name, parent, read_only, **metadata)
[docs] def require_group(self, name, read_only=None, **metadata):
"""Require that a :class:`Group` exists.
If the :class:`Group` exists then it will be returned if it does not exist
then it is created.
Automatically creates the ancestor :class:`Group`\\s if they do not exist.
Parameters
----------
name : :class:`str`
The name of the :class:`Group`.
read_only : :class:`bool`, optional
Whether to return the :class:`Group` in read-only mode.
If :data:`None` then uses the mode for this :class:`Group`.
**metadata
Key-value pairs that are used as :class:`~msl.io.metadata.Metadata`
for this :class:`Group`.
Returns
-------
:class:`Group`
The :class:`Group` that was created or that already existed.
"""
name = '/' + name.strip('/')
group_name = name if self.parent is None else self.name + name
for group in self.groups():
if group.name == group_name:
if read_only is not None:
group.read_only = read_only
group.add_metadata(**metadata)
return group
return self.create_group(name, read_only=read_only, **metadata)
[docs] def add_dataset(self, name, dataset):
"""Add a :class:`~msl.io.dataset.Dataset`.
Automatically creates the ancestor :class:`Group`\\s if they do not exist.
Parameters
----------
name : :class:`str`
The name of the new :class:`~msl.io.dataset.Dataset` to add.
dataset : :class:`~msl.io.dataset.Dataset`
The :class:`~msl.io.dataset.Dataset` to add. The :class:`~msl.io.dataset.Dataset`
and the :class:`~msl.io.metadata.Metadata` are copied.
"""
if not isinstance(dataset, Dataset):
raise TypeError('Must pass in a Dataset object, got {!r}'.format(dataset))
name = '/' + name.strip('/')
self.create_dataset(
name, read_only=dataset.read_only, data=dataset.data.copy(), **dataset.metadata.copy()
)
[docs] def create_dataset(self, name, read_only=None, **kwargs):
"""Create a new :class:`~msl.io.dataset.Dataset`.
Automatically creates the ancestor :class:`Group`\\s if they do not exist.
Parameters
----------
name : :class:`str`
The name of the new :class:`~msl.io.dataset.Dataset`.
read_only : :class:`bool`, optional
Whether to create this :class:`~msl.io.dataset.Dataset` in read-only mode.
If :data:`None` then uses the mode for this :class:`Group`.
**kwargs
Key-value pairs that are passed to :class:`~msl.io.dataset.Dataset`.
Returns
-------
:class:`~msl.io.dataset.Dataset`
The new :class:`~msl.io.dataset.Dataset` that was created.
"""
read_only, kwargs = self._check(read_only, **kwargs)
name, parent = self._create_ancestors(name, read_only)
return Dataset(name, parent, read_only, **kwargs)
[docs] def require_dataset(self, name, read_only=None, **kwargs):
"""Require that a :class:`~msl.io.dataset.Dataset` exists.
If the :class:`~msl.io.dataset.Dataset` exists then it will be returned
if it does not exist then it is created.
Automatically creates the ancestor :class:`Group`\\s if they do not exist.
Parameters
----------
name : :class:`str`
The name of the :class:`~msl.io.dataset.Dataset`.
read_only : :class:`bool`, optional
Whether to create this :class:`~msl.io.dataset.Dataset` in read-only mode.
If :data:`None` then uses the mode for this :class:`Group`.
**kwargs
Key-value pairs that are passed to :class:`~msl.io.dataset.Dataset`.
Returns
-------
:class:`~msl.io.dataset.Dataset`
The :class:`~msl.io.dataset.Dataset` that was created or that already existed.
"""
name = '/' + name.strip('/')
dataset_name = name if self.parent is None else self.name + name
for dataset in self.datasets():
if dataset.name == dataset_name:
if read_only is not None:
dataset.read_only = read_only
if kwargs: # only add the kwargs that should be Metadata
for kw in ['shape', 'dtype', 'buffer', 'offset', 'strides', 'order', 'data']:
kwargs.pop(kw, None)
dataset.add_metadata(**kwargs)
return dataset
return self.create_dataset(name, read_only=read_only, **kwargs)
[docs] def add_dataset_logging(self, name, dataset_logging):
"""Add a :class:`~msl.io.dataset_logging.DatasetLogging`.
Automatically creates the ancestor :class:`Group`\\s if they do not exist.
Parameters
----------
name : :class:`str`
The name of the new :class:`~msl.io.dataset_logging.DatasetLogging` to add.
dataset_logging : :class:`~msl.io.dataset_logging.DatasetLogging`
The :class:`~msl.io.dataset_logging.DatasetLogging` to add. The
:class:`~msl.io.dataset_logging.DatasetLogging` and the
:class:`~msl.io.metadata.Metadata` are copied.
"""
if not isinstance(dataset_logging, DatasetLogging):
raise TypeError('Must pass in a DatasetLogging object, got {!r}'.format(dataset_logging))
name = '/' + name.strip('/')
self.create_dataset_logging(
name,
level=dataset_logging.level,
attributes=dataset_logging.attributes,
logger=dataset_logging.logger,
date_fmt=dataset_logging.date_fmt,
data=dataset_logging.data.copy(),
**dataset_logging.metadata.copy()
)
[docs] def create_dataset_logging(self, name, level='INFO', attributes=None, logger=None, date_fmt=None, **kwargs):
"""Create a :class:`~msl.io.dataset.Dataset` that handles :mod:`logging` records.
Automatically creates the ancestor :class:`Group`\\s if they do not exist.
Parameters
----------
name : :class:`str`
A name to associate with the :class:`~msl.io.dataset.Dataset`.
level : :class:`int` or :class:`str`, optional
The :ref:`logging level <levels>` to use.
attributes : :class:`list` or :class:`tuple` of :class:`str`, optional
The :ref:`attribute names <logrecord-attributes>` to include in the
:class:`~msl.io.dataset.Dataset` for each :ref:`logging record <log-record>`.
If :data:`None` then uses ``asctime``, ``levelname``, ``name``, and ``message``.
logger : :class:`~logging.Logger`, optional
The :class:`~logging.Logger` that the :class:`~msl.io.dataset_logging.DatasetLogging` object
will be added to. If :data:`None` then it is added to the ``root`` :class:`~logging.Logger`.
date_fmt : :class:`str`, optional
The :class:`~datetime.datetime` :ref:`format code <strftime-strptime-behavior>`
to use to represent the ``asctime`` :ref:`attribute <logrecord-attributes>` in.
If :data:`None` then uses the ISO 8601 format ``'%Y-%m-%dT%H:%M:%S.%f'``.
**kwargs
Additional keyword arguments are passed to :class:`~msl.io.dataset.Dataset`.
The default behaviour is to append every :ref:`logging record <log-record>`
to the :class:`~msl.io.dataset.Dataset`. This guarantees that the size of the
:class:`~msl.io.dataset.Dataset` is equal to the number of
:ref:`logging records <log-record>` that were added to it. However, this behaviour
can decrease the performance if many :ref:`logging records <log-record>` are
added often because a copy of the data in the :class:`~msl.io.dataset.Dataset` is
created for each :ref:`logging record <log-record>` that is added. You can improve
the performance by specifying an initial size of the :class:`~msl.io.dataset.Dataset`
by including a `shape` or a `size` keyword argument. This will also automatically
create additional empty rows in the :class:`~msl.io.dataset.Dataset`, that is
proportional to the size of the :class:`~msl.io.dataset.Dataset`, if the size of the
:class:`~msl.io.dataset.Dataset` needs to be increased. If you do this then you will
want to call :meth:`~msl.io.dataset_logging.DatasetLogging.remove_empty_rows` before
writing :class:`~msl.io.dataset_logging.DatasetLogging` to a file or interacting
with the data in :class:`~msl.io.dataset_logging.DatasetLogging` to remove the
extra rows that were created.
Returns
-------
:class:`~msl.io.dataset_logging.DatasetLogging`
The :class:`~msl.io.dataset_logging.DatasetLogging` that was created.
Examples
--------
>>> import logging
>>> from msl.io import JSONWriter
>>> logger = logging.getLogger('my_logger')
>>> root = JSONWriter()
>>> log_dset = root.create_dataset_logging('log')
>>> logger.info('hi')
>>> logger.error('cannot do that!')
>>> log_dset.data
array([(..., 'INFO', 'my_logger', 'hi'), (..., 'ERROR', 'my_logger', 'cannot do that!')],
dtype=[('asctime', 'O'), ('levelname', 'O'), ('name', 'O'), ('message', 'O')])
Get all ``ERROR`` :ref:`logging records <log-record>`
>>> errors = log_dset[log_dset['levelname'] == 'ERROR']
>>> print(errors)
[(..., 'ERROR', 'my_logger', 'cannot do that!')]
Stop the :class:`~msl.io.dataset_logging.DatasetLogging` object
from receiving :ref:`logging records <log-record>`
>>> log_dset.remove_handler()
"""
read_only, metadata = self._check(False, **kwargs)
name, parent = self._create_ancestors(name, read_only)
if attributes is None:
# if the default attribute names are changed then update the `attributes`
# description in the docstring of create_dataset_logging() and require_dataset_logging()
attributes = ['asctime', 'levelname', 'name', 'message']
if date_fmt is None:
# if the default date_fmt is changed then update the `date_fmt`
# description in the docstring of create_dataset_logging() and require_dataset_logging()
date_fmt = '%Y-%m-%dT%H:%M:%S.%f'
return DatasetLogging(name, parent, level=level, attributes=attributes,
logger=logger, date_fmt=date_fmt, **metadata)
[docs] def require_dataset_logging(self, name, level='INFO', attributes=None, logger=None, date_fmt=None, **kwargs):
"""Require that a :class:`~msl.io.dataset.Dataset` exists for handling :mod:`logging` records.
If the :class:`~msl.io.dataset_logging.DatasetLogging` exists then it will be returned
if it does not exist then it is created.
Automatically creates the ancestor :class:`Group`\\s if they do not exist.
Parameters
----------
name : :class:`str`
A name to associate with the :class:`~msl.io.dataset.Dataset`.
level : :class:`int` or :class:`str`, optional
The :ref:`logging level <levels>` to use.
attributes : :class:`list` or :class:`tuple` of :class:`str`, optional
The :ref:`attribute names <logrecord-attributes>` to include in the
:class:`~msl.io.dataset.Dataset` for each :ref:`logging record <log-record>`.
If the :class:`~msl.io.dataset.Dataset` exists and if `attributes`
are specified, and they do not match those of the existing
:class:`~msl.io.dataset.Dataset`, then a :exc:`ValueError` is raised.
If :data:`None` and the :class:`~msl.io.dataset.Dataset` does not exist
then uses ``asctime``, ``levelname``, ``name``, and ``message``.
logger : :class:`~logging.Logger`, optional
The :class:`~logging.Logger` that the :class:`~msl.io.dataset_logging.DatasetLogging` object
will be added to. If :data:`None` then it is added to the ``root`` :class:`~logging.Logger`.
date_fmt : :class:`str`, optional
The :class:`~datetime.datetime` :ref:`format code <strftime-strptime-behavior>`
to use to represent the ``asctime`` :ref:`attribute <logrecord-attributes>` in.
If :data:`None` then uses the ISO 8601 format ``'%Y-%m-%dT%H:%M:%S.%f'``.
**kwargs
Additional keyword arguments are passed to :class:`~msl.io.dataset.Dataset`.
The default behaviour is to append every :ref:`logging record <log-record>`
to the :class:`~msl.io.dataset.Dataset`. This guarantees that the size of the
:class:`~msl.io.dataset.Dataset` is equal to the number of
:ref:`logging records <log-record>` that were added to it. However, this behaviour
can decrease the performance if many :ref:`logging records <log-record>` are
added often because a copy of the data in the :class:`~msl.io.dataset.Dataset` is
created for each :ref:`logging record <log-record>` that is added. You can improve
the performance by specifying an initial size of the :class:`~msl.io.dataset.Dataset`
by including a `shape` or a `size` keyword argument. This will also automatically
create additional empty rows in the :class:`~msl.io.dataset.Dataset`, that is
proportional to the size of the :class:`~msl.io.dataset.Dataset`, if the size of the
:class:`~msl.io.dataset.Dataset` needs to be increased. If you do this then you will
want to call :meth:`~msl.io.dataset_logging.DatasetLogging.remove_empty_rows` before
writing :class:`~msl.io.dataset_logging.DatasetLogging` to a file or interacting
with the data in :class:`~msl.io.dataset_logging.DatasetLogging` to remove the
extra rows that were created.
Returns
-------
:class:`~msl.io.dataset_logging.DatasetLogging`
The :class:`~msl.io.dataset_logging.DatasetLogging` that was created or
that already existed.
"""
name = '/' + name.strip('/')
dataset_name = name if self.parent is None else self.name + name
for dataset in self.datasets():
if dataset.name == dataset_name:
if ('logging_level' not in dataset.metadata) or \
('logging_level_name' not in dataset.metadata) or \
('logging_date_format' not in dataset.metadata):
raise ValueError('The required Dataset was found but it is not used for logging')
if attributes and (dataset.dtype.names != tuple(attributes)):
raise ValueError('The attribute names of the existing '
'logging Dataset are {} which does not equal {}'
.format(dataset.dtype.names, tuple(attributes)))
if isinstance(dataset, DatasetLogging):
return dataset
# replace the existing Dataset with a new DatasetLogging object
meta = dataset.metadata.copy()
data = dataset.data.copy()
# remove the existing Dataset from its descendants, itself and its ancestors
groups = tuple(self.descendants()) + (self,) + tuple(self.ancestors())
for group in groups:
for dset in group.datasets():
if dset is dataset:
key = '/' + dset.name.lstrip(group.name)
del group._mapping[key]
# temporarily make this Group not in read-only mode
original_read_only_mode = bool(self._read_only)
self._read_only = False
kwargs.update(meta)
dset = self.create_dataset_logging(name, level=level, attributes=data.dtype.names,
logger=logger, date_fmt=meta.logging_date_format,
data=data, **kwargs)
self._read_only = original_read_only_mode
return dset
return self.create_dataset_logging(name, level=level, attributes=attributes,
logger=logger, date_fmt=date_fmt, **kwargs)
[docs] def remove(self, name):
"""Remove a :class:`Group` or a :class:`~msl.io.dataset.Dataset`.
Parameters
----------
name : :class:`str`
The name of the :class:`Group` or :class:`~msl.io.dataset.Dataset` to remove.
Returns
-------
:class:`Group`, :class:`~msl.io.dataset.Dataset` or :data:`None`
The :class:`Group` or :class:`~msl.io.dataset.Dataset` that was
removed or :data:`None` if there was no :class:`Group` or
:class:`~msl.io.dataset.Dataset` with the specified `name`.
"""
name = '/' + name.strip('/')
return self.pop(name, None)
def _check(self, read_only, **kwargs):
self._raise_if_read_only()
kwargs.pop('parent', None)
if read_only is None:
return self._read_only, kwargs
return read_only, kwargs
def _create_ancestors(self, name, read_only):
# automatically create the ancestor Groups if they do not already exist
names = name.strip('/').split('/')
parent = self
for n in names[:-1]:
if n not in parent:
parent = Group(n, parent, read_only)
else:
parent = parent[n]
return names[-1], parent