Source code for msl.io.readers.excel

"""
Read an Excel spreadsheet (.xls and .xlsx).
"""
from datetime import datetime

from . import _xlrd
from .spreadsheet import Spreadsheet


[docs]class ExcelReader(Spreadsheet):

    def __init__(self, file, **kwargs):
        """Read an Excel spreadsheet (.xls and .xlsx).

        This class simply provides a convenience for reading information
        from Excel spreadsheets. It is not registered as a :class:`~msl.io.base.Reader`
        because the information in an Excel spreadsheet is unstructured and therefore
        one cannot generalize how to parse an Excel spreadsheet to create a
        :class:`~msl.io.base.Root`.

        Parameters
        ----------
        file : :class:`str`
            The location of an Excel spreadsheet on a local hard drive or on a network.
        **kwargs
            All keyword arguments are passed to :func:`~xlrd.open_workbook`. Can use
            an `encoding` keyword argument as an alias for `encoding_override`. The
            default `on_demand` value is :data:`True`.

        Examples
        --------
        >>> from msl.io import ExcelReader  # doctest: +SKIP
        >>> excel = ExcelReader('lab_environment.xlsx')  # doctest: +SKIP
        """
        super(ExcelReader, self).__init__(file)

        # change the default on_demand value
        if 'on_demand' not in kwargs:
            kwargs['on_demand'] = True

        # 'encoding' is an alias for 'encoding_override'
        encoding = kwargs.pop('encoding', None)
        if encoding is not None:
            kwargs['encoding_override'] = encoding

        self._workbook = _xlrd.open_workbook(file, **kwargs)

    def __enter__(self):
        return self

    def __exit__(self, exc_type, exc_val, exc_tb):
        self.close()

    @property
    def workbook(self):
        """:class:`~xlrd.book.Book`: The workbook instance."""
        return self._workbook

[docs]    def close(self):
        """Calls :meth:`~xlrd.book.Book.release_resources`."""
        self._workbook.release_resources()

[docs]    def read(self, cell=None, sheet=None, as_datetime=True):
        """Read values from the Excel spreadsheet.

        Parameters
        ----------
        cell : :class:`str`, optional
            The cell(s) to read. For example, ``C9`` will return a single value
            and ``C9:G20`` will return all values in the specified range. If not
            specified then returns all values in the specified `sheet`.
        sheet : :class:`str`, optional
            The name of the sheet to read the value(s) from. If there is only
            one sheet in the spreadsheet then you do not need to specify the name
            of the sheet.
        as_datetime : :class:`bool`, optional
            Whether dates should be returned as :class:`~datetime.datetime` or
            :class:`~datetime.date` objects. If :data:`False` then dates are
            returned as an ISO 8601 string.

        Returns
        -------
        The value(s) of the requested cell(s).

        Examples
        --------
        .. invisible-code-block: pycon

           >>> from msl.io import ExcelReader
           >>> excel = ExcelReader('./tests/samples/lab_environment.xlsx')

        >>> excel.read()
        [('temperature', 'humidity'), (20.33, 49.82), (20.23, 46.06), (20.41, 47.06), (20.29, 48.32)]
        >>> excel.read('B2')
        49.82
        >>> excel.read('A:A')
        [('temperature',), (20.33,), (20.23,), (20.41,), (20.29,)]
        >>> excel.read('A1:B1')
        [('temperature', 'humidity')]
        >>> excel.read('A2:B4')
        [(20.33, 49.82), (20.23, 46.06), (20.41, 47.06)]
        """
        if not sheet:
            names = self.sheet_names()
            if len(names) > 1:
                raise ValueError('{!r} contains the following sheets:\n  {}\n'
                                 'You must specify the name of the sheet to read'
                                 .format(self._file, ', '.join(repr(n) for n in names)))
            sheet_name = names[0]
        else:
            sheet_name = sheet

        try:
            sheet = self._workbook.sheet_by_name(sheet_name)
        except _xlrd.XLRDError:
            # TODO want to raise ValueError without nested exceptions
            #  Can "raise from None" when dropping Python 2 support
            sheet = None

        if sheet is None:
            raise ValueError('There is no sheet named {!r} in {!r}'.format(sheet_name, self._file))

        if not cell:
            return [tuple(self._value(sheet, r, c, as_datetime) for c in range(sheet.ncols))
                    for r in range(sheet.nrows)]

        split = cell.split(':')
        r1, c1 = self.to_indices(split[0])
        if r1 is None:
            r1 = 0

        if len(split) == 1:
            try:
                return self._value(sheet, r1, c1, as_datetime)
            except IndexError:
                return

        if r1 >= sheet.nrows or c1 >= sheet.ncols:
            return []

        r2, c2 = self.to_indices(split[1])
        r2 = sheet.nrows if r2 is None else min(r2+1, sheet.nrows)
        c2 = min(c2+1, sheet.ncols)
        return [tuple(self._value(sheet, r, c, as_datetime) for c in range(c1, c2))
                for r in range(r1, r2)]

[docs]    def sheet_names(self):
        """Get the names of all sheets in the Excel spreadsheet.

        Returns
        -------
        :class:`tuple` of :class:`str`
            The names of all sheets.
        """
        return tuple(self._workbook.sheet_names())

    def _value(self, sheet, row, col, as_datetime):
        """Get the value of a cell."""
        cell = sheet.cell(row, col)
        t = cell.ctype
        if t == _xlrd.XL_CELL_NUMBER:
            if cell.value.is_integer():
                return int(cell.value)
            return cell.value
        elif t == _xlrd.XL_CELL_DATE:
            dt = datetime(*_xlrd.xldate_as_tuple(cell.value, self._workbook.datemode))
            if dt.hour + dt.minute + dt.second + dt.microsecond == 0:
                dt = dt.date()
            if as_datetime:
                return dt
            return str(dt)
        elif t == _xlrd.XL_CELL_BOOLEAN:
            return bool(cell.value)
        elif t == _xlrd.XL_CELL_EMPTY:
            return None
        elif t == _xlrd.XL_CELL_ERROR:
            return _xlrd.error_text_from_code[cell.value]
        else:
            return cell.value.strip()