Source code for msl.io.dataset_logging

"""
A :class:`~msl.io.dataset.Dataset` that handles :mod:`logging` records.
"""
import logging
from datetime import datetime

import numpy as np

from .dataset import Dataset


[docs]class DatasetLogging(Dataset, logging.Handler): def __init__(self, name, parent, level=logging.NOTSET, attributes=None, logger=None, date_fmt=None, **kwargs): """A :class:`~msl.io.dataset.Dataset` that handles :mod:`logging` records. Do not instantiate directly. Create a new :class:`DatasetLogging` using :meth:`~msl.io.group.Group.create_dataset_logging`. Parameters ---------- name : :class:`str` A name to associate with the :class:`~msl.io.dataset.Dataset`. parent : :class:`~msl.io.group.Group` The parent :class:`~msl.io.group.Group` to the :class:`~msl.io.dataset.Dataset`. level : :class:`int` or :class:`str`, optional The :ref:`logging level <levels>` to use. attributes : :class:`list` or :class:`tuple` of :class:`str`, optional The :ref:`attribute names <logrecord-attributes>` to include in the :class:`~msl.io.dataset.Dataset` for each :ref:`logging record <log-record>`. logger : :class:`~logging.Logger`, optional The :class:`~logging.Logger` that this :class:`DatasetLogging` object will be added to. If :data:`None` then it is added to the ``root`` :class:`~logging.Logger`. date_fmt : :class:`str`, optional The :class:`~datetime.datetime` :ref:`format code <strftime-strptime-behavior>` to use to represent the ``asctime`` :ref:`attribute <logrecord-attributes>` in. **kwargs Additional keyword arguments are passed to :class:`~msl.io.dataset.Dataset`. The default behaviour is to append every :ref:`logging record <log-record>` to the :class:`~msl.io.dataset.Dataset`. This guarantees that the size of the :class:`~msl.io.dataset.Dataset` is equal to the number of :ref:`logging records <log-record>` that were added to it. However, this behaviour can decrease the performance if many :ref:`logging records <log-record>` are added often because a copy of the data in the :class:`~msl.io.dataset.Dataset` is created for each :ref:`logging record <log-record>` that is added. You can improve the performance by specifying an initial size of the :class:`~msl.io.dataset.Dataset` by including a `shape` or a `size` keyword argument. This will also automatically create additional empty rows in the :class:`~msl.io.dataset.Dataset`, that is proportional to the size of the :class:`~msl.io.dataset.Dataset`, if the size of the :class:`~msl.io.dataset.Dataset` needs to be increased. If you do this then you will want to call :meth:`.remove_empty_rows` before writing :class:`DatasetLogging` to a file or interacting with the data in :class:`DatasetLogging` to remove the extra rows that were created. """ if not attributes or not all(isinstance(a, str) for a in attributes): raise ValueError('Must specify attribute names as strings, got: {}'.format(attributes)) self._logger = None self._attributes = tuple(attributes) self._uses_asctime = 'asctime' in attributes self._date_fmt = date_fmt if isinstance(level, str): level = getattr(logging, level) # these 3 keys in the metadata are used to distinguish a DatasetLogging # object from a regular Dataset object kwargs['logging_level'] = level kwargs['logging_level_name'] = logging.getLevelName(level) kwargs['logging_date_format'] = date_fmt self._dtype = np.dtype([(a, object) for a in attributes]) self._auto_resize = 'size' in kwargs or 'shape' in kwargs if self._auto_resize: if 'size' in kwargs: kwargs['shape'] = (kwargs.pop('size'),) elif isinstance(kwargs['shape'], int): kwargs['shape'] = (kwargs['shape'],) shape = kwargs['shape'] if len(shape) != 1: raise ValueError('Invalid shape {}, the number of dimensions must be 1'.format(shape)) if shape[0] < 0: raise ValueError('Invalid shape {}'.format(shape)) # call Dataset.__init__ before Handler.__init__ in case the Dataset cannot be created Dataset.__init__(self, name, parent, False, dtype=self._dtype, **kwargs) self._index = np.count_nonzero(self._data) if self._auto_resize and self._data.shape < kwargs['shape']: self._resize(new_allocated=kwargs['shape'][0]) # the Handler will overwrite the self._name attribute, so we create a reference to the # `name` of the Dataset and then set the `name` of the Handler after it is initialized name = self._name logging.Handler.__init__(self, level=level) self.set_name(name) self.set_logger(logger or logging.getLogger()) def __eq__(self, other): # Must override __eq__ because calling root.addHandler(self) in # DatasetLogging.__init__ checks for the following condition: # "if not (hdlr in self.handlers):" # and this could fail if adding multiple DatasetLogging instances # to the RootLogger and the Datasets are emtpy. Every DatasetLogging # instance must be unique because every Vertex is unique so this # method can simply return False. return False def __hash__(self): # need to override for linux and macOS running Python 3.7+ return logging.Handler.__hash__(self) @property def attributes(self): """:class:`tuple` of :class:`str`: The :ref:`attribute names <logrecord-attributes>` used by the :class:`DatasetLogging` object. """ return self._attributes @property def date_fmt(self): """:class:`str`: The :class:`~datetime.datetime` :ref:`format code <strftime-strptime-behavior>` that is used to represent the ``asctime`` :ref:`attribute <logrecord-attributes>` in. """ return self._date_fmt @property def logger(self): """:class:`~logging.Logger`: The :class:`~logging.Logger` that this :class:`DatasetLogging` object is added to. """ return self._logger
[docs] def remove_empty_rows(self): """Remove empty rows from the :class:`~msl.io.dataset.Dataset`. If the :class:`DatasetLogging` object was initialized with a `shape` or a `size` keyword argument then the size of the :class:`~msl.io.dataset.Dataset` is always :math:`\\geq` to the number of :ref:`logging records <log-record>` that were added to it. Calling this method will remove the rows in the :class:`~msl.io.dataset.Dataset` that were not from a :ref:`logging record <log-record>`. """ # don't use "is not None" since this does not work as expected self._data = self._data[self._data[self._dtype.names[0]] != None]
[docs] def remove_handler(self): """Remove this class's :class:`~logging.Handler` from the associated :class:`~logging.Logger`. After calling this method :ref:`logging records <log-record>` are no longer added to the :class:`~msl.io.dataset.Dataset`. """ if self._logger is not None: self._logger.removeHandler(self)
[docs] def set_logger(self, logger): """Add this class's :class:`~logging.Handler` to a :class:`~logging.Logger`. Parameters ---------- logger : :class:`~logging.Logger` The :class:`~logging.Logger` to add this class's :class:`~logging.Handler` to. """ if not isinstance(logger, logging.Logger): raise TypeError('Must be a logging.Logger object') level = self.metadata.logging_level if logger.level == 0 or logger.level > level: logger.setLevel(level) self.remove_handler() logger.addHandler(self) self._logger = logger
def emit(self, record): """Overrides the :meth:`~logging.Handler.emit` method.""" record.message = record.getMessage() if self._uses_asctime: record.asctime = datetime.fromtimestamp(record.created).strftime(self._date_fmt) latest = tuple(record.__dict__[a] for a in self._attributes) row = np.asarray(latest, dtype=self._dtype) if self._auto_resize: if self._index >= self._data.size: self._resize() self._data[self._index] = row self._index += 1 else: self._data = np.append(self._data, row) def _resize(self, new_allocated=None): # Over-allocates proportional to the size of the ndarray, making room # for additional growth. This follows the over-allocating procedure that # Python uses when appending to a list object, see `list_resize` in # https://github.com/python/cpython/blob/master/Objects/listobject.c if new_allocated is None: new_size = self._data.size + 1 new_allocated = new_size + (new_size >> 3) + (3 if new_size < 9 else 6) # don't use self._data.resize() because that fills the newly-created rows # with 0 and want to fill the new rows with None to be explicit that the # new rows are not associated with logging records array = np.empty((new_allocated,), dtype=self._dtype) array[:self._data.size] = self._data self._data = array