Source code for msl.io.readers.excel

"""
Read an Excel spreadsheet (.xls and .xlsx).
"""
from datetime import datetime

from . import _xlrd
from .spreadsheet import Spreadsheet


[docs]class ExcelReader(Spreadsheet): def __init__(self, file, **kwargs): """Read an Excel spreadsheet (.xls and .xlsx). This class simply provides a convenience for reading information from Excel spreadsheets. It is not registered as a :class:`~msl.io.base.Reader` because the information in an Excel spreadsheet is unstructured and therefore one cannot generalize how to parse an Excel spreadsheet to create a :class:`~msl.io.base.Root`. Parameters ---------- file : :class:`str` The location of an Excel spreadsheet on a local hard drive or on a network. **kwargs All keyword arguments are passed to :func:`~xlrd.open_workbook`. Can use an `encoding` keyword argument as an alias for `encoding_override`. The default `on_demand` value is :data:`True`. Examples -------- >>> from msl.io import ExcelReader # doctest: +SKIP >>> excel = ExcelReader('lab_environment.xlsx') # doctest: +SKIP """ super(ExcelReader, self).__init__(file) # change the default on_demand value if 'on_demand' not in kwargs: kwargs['on_demand'] = True # 'encoding' is an alias for 'encoding_override' encoding = kwargs.pop('encoding', None) if encoding is not None: kwargs['encoding_override'] = encoding self._workbook = _xlrd.open_workbook(file, **kwargs) def __enter__(self): return self def __exit__(self, exc_type, exc_val, exc_tb): self.close() @property def workbook(self): """:class:`~xlrd.book.Book`: The workbook instance.""" return self._workbook
[docs] def close(self): """Calls :meth:`~xlrd.book.Book.release_resources`.""" self._workbook.release_resources()
[docs] def read(self, cell=None, sheet=None, as_datetime=True): """Read values from the Excel spreadsheet. Parameters ---------- cell : :class:`str`, optional The cell(s) to read. For example, ``C9`` will return a single value and ``C9:G20`` will return all values in the specified range. If not specified then returns all values in the specified `sheet`. sheet : :class:`str`, optional The name of the sheet to read the value(s) from. If there is only one sheet in the spreadsheet then you do not need to specify the name of the sheet. as_datetime : :class:`bool`, optional Whether dates should be returned as :class:`~datetime.datetime` or :class:`~datetime.date` objects. If :data:`False` then dates are returned as an ISO 8601 string. Returns ------- The value(s) of the requested cell(s). Examples -------- .. invisible-code-block: pycon >>> from msl.io import ExcelReader >>> excel = ExcelReader('./tests/samples/lab_environment.xlsx') >>> excel.read() [('temperature', 'humidity'), (20.33, 49.82), (20.23, 46.06), (20.41, 47.06), (20.29, 48.32)] >>> excel.read('B2') 49.82 >>> excel.read('A:A') [('temperature',), (20.33,), (20.23,), (20.41,), (20.29,)] >>> excel.read('A1:B1') [('temperature', 'humidity')] >>> excel.read('A2:B4') [(20.33, 49.82), (20.23, 46.06), (20.41, 47.06)] """ if not sheet: names = self.sheet_names() if len(names) > 1: raise ValueError('{!r} contains the following sheets:\n {}\n' 'You must specify the name of the sheet to read' .format(self._file, ', '.join(repr(n) for n in names))) sheet_name = names[0] else: sheet_name = sheet try: sheet = self._workbook.sheet_by_name(sheet_name) except _xlrd.XLRDError: # TODO want to raise ValueError without nested exceptions # Can "raise from None" when dropping Python 2 support sheet = None if sheet is None: raise ValueError('There is no sheet named {!r} in {!r}'.format(sheet_name, self._file)) if not cell: return [tuple(self._value(sheet, r, c, as_datetime) for c in range(sheet.ncols)) for r in range(sheet.nrows)] split = cell.split(':') r1, c1 = self.to_indices(split[0]) if r1 is None: r1 = 0 if len(split) == 1: try: return self._value(sheet, r1, c1, as_datetime) except IndexError: return if r1 >= sheet.nrows or c1 >= sheet.ncols: return [] r2, c2 = self.to_indices(split[1]) r2 = sheet.nrows if r2 is None else min(r2+1, sheet.nrows) c2 = min(c2+1, sheet.ncols) return [tuple(self._value(sheet, r, c, as_datetime) for c in range(c1, c2)) for r in range(r1, r2)]
[docs] def sheet_names(self): """Get the names of all sheets in the Excel spreadsheet. Returns ------- :class:`tuple` of :class:`str` The names of all sheets. """ return tuple(self._workbook.sheet_names())
def _value(self, sheet, row, col, as_datetime): """Get the value of a cell.""" cell = sheet.cell(row, col) t = cell.ctype if t == _xlrd.XL_CELL_NUMBER: if cell.value.is_integer(): return int(cell.value) return cell.value elif t == _xlrd.XL_CELL_DATE: dt = datetime(*_xlrd.xldate_as_tuple(cell.value, self._workbook.datemode)) if dt.hour + dt.minute + dt.second + dt.microsecond == 0: dt = dt.date() if as_datetime: return dt return str(dt) elif t == _xlrd.XL_CELL_BOOLEAN: return bool(cell.value) elif t == _xlrd.XL_CELL_EMPTY: return None elif t == _xlrd.XL_CELL_ERROR: return _xlrd.error_text_from_code[cell.value] else: return cell.value.strip()