Source code for msl.io.readers.gsheets

"""
Read a Google Sheets spreadsheet.
"""
import os
import re

from .spreadsheet import Spreadsheet
from ..google_api import GCellType
from ..google_api import GDrive
from ..google_api import GSheets

_google_file_id_regex = re.compile(r'^1[a-zA-Z0-9_-]{43}$')


[docs]class GSheetsReader(Spreadsheet): def __init__(self, file, **kwargs): """Read a Google Sheets spreadsheet. This class simply provides a convenience for reading information from Google spreadsheets. It is not registered as a :class:`~msl.io.base.Reader` because the information in a spreadsheet is unstructured and therefore one cannot generalize how to parse a spreadsheet to create a :class:`~msl.io.base.Root`. Parameters ---------- file : :class:`str` The ID or path of a Google Sheets spreadsheet. **kwargs All keyword arguments are passed to :class:`~msl.io.google_api.GSheets`. Examples -------- >>> from msl.io import GSheetsReader # doctest: +SKIP >>> sheets = GSheetsReader('Google Drive/registers/equipment.gsheet') # doctest: +SKIP >>> sheets = GSheetsReader('1TI3pM-534SZ5DQTEZ-7HCI04648f8ZpLGbfHWJu9FSo') # doctest: +SKIP """ super(GSheetsReader, self).__init__(file) if not kwargs.get('read_only', True): raise ValueError('Must instantiate {} in read-only mode'.format(self.__class__.__name__)) path, ext = os.path.splitext(file) folders, _ = os.path.split(path) if ext or folders or not _google_file_id_regex.match(path): self._spreadsheet_id = GDrive(**kwargs).file_id(path, mime_type=GSheets.MIME_TYPE) else: self._spreadsheet_id = path self._gsheets = GSheets(**kwargs) self._cached_sheet_name = None def __enter__(self): return self def __exit__(self, exc_type, exc_val, exc_tb): self.close()
[docs] def close(self): """Close the connection to the GSheet API service. .. versionadded:: 0.2 """ self._gsheets.close()
[docs] def read(self, cell=None, sheet=None, as_datetime=True): """Read values from the Google Sheets spreadsheet. Parameters ---------- cell : :class:`str`, optional The cell(s) to read. For example, ``C9`` will return a single value and ``C9:G20`` will return all values in the specified range. If not specified then returns all values in the specified `sheet`. sheet : :class:`str`, optional The name of the sheet to read the value(s) from. If there is only one sheet in the spreadsheet then you do not need to specify the name of the sheet. as_datetime : :class:`bool`, optional Whether dates should be returned as :class:`~datetime.datetime` or :class:`~datetime.date` objects. If :data:`False` then dates are returned as a string in the format of the spreadsheet cell. Returns ------- The value(s) of the requested cell(s). Examples -------- .. invisible-code-block: pycon >>> SKIP_IF_NO_GOOGLE_SHEETS_READ_TOKEN() >>> from msl.io import GSheetsReader >>> sheets = GSheetsReader('1TI3pM-534SZ5DQTEZ-7vCI04l48f8ZpLGbfEWJuCFSo', account='testing') >>> sheets.read() [('temperature', 'humidity'), (20.33, 49.82), (20.23, 46.06), (20.41, 47.06), (20.29, 48.32)] >>> sheets.read('B2') 49.82 >>> sheets.read('A:A') [('temperature',), (20.33,), (20.23,), (20.41,), (20.29,)] >>> sheets.read('A1:B1') [('temperature', 'humidity')] >>> sheets.read('A2:B4') [(20.33, 49.82), (20.23, 46.06), (20.41, 47.06)] """ if not sheet: if self._cached_sheet_name: sheet = self._cached_sheet_name else: names = self.sheet_names() if len(names) != 1: raise ValueError('{!r} contains the following sheets:\n {}\n' 'You must specify the name of the sheet to read' .format(self._file, ', '.join(repr(n) for n in names))) sheet = names[0] self._cached_sheet_name = sheet if cell: ranges = '{}!{}'.format(sheet, cell) else: ranges = sheet cells = self._gsheets.cells(self._spreadsheet_id, ranges=ranges) if sheet not in cells: raise ValueError('There is no sheet named {!r} in {!r}'.format(sheet, self._file)) values = [] for row in cells[sheet]: row_values = [] for item in row: if item.type == GCellType.DATE: value = GSheets.to_datetime(item.value).date() if as_datetime else item.formatted elif item.type == GCellType.DATE_TIME: value = GSheets.to_datetime(item.value) if as_datetime else item.formatted else: value = item.value row_values.append(value) values.append(tuple(row_values)) if not cell: return values if ':' not in cell: if values: return values[0][0] return return values
[docs] def sheet_names(self): """Get the names of all sheets in the Google Sheets spreadsheet. Returns ------- :class:`tuple` of :class:`str` The names of all sheets. """ return self._gsheets.sheet_names(self._spreadsheet_id)