def _columns(cls, o, n_cols=0): """ Wraps columns from meta['schema'] with RowProxy and generates them. Args: o (any having .meta dict attr): Generates: RowProxy: column wrapped with RowProxy """ s = o.meta['schema'] assert len(s) >= 1 # Should always have header row. assert o.meta['schema'][0] == MPRowsFile.SCHEMA_TEMPLATE, (o.meta['schema'][0], MPRowsFile.SCHEMA_TEMPLATE) # n_cols here is for columns in the data table, which are rows in the headers table n_cols = max(n_cols, o.n_cols, len(s) - 1) for i in range(1, n_cols + 1): # Normally, we'd only create one of these, and set the row on the singleton for # each row. But in this case, the caller may turn the output of the method into a list, # in which case all of the rows would have the values of the last one. rp = RowProxy(s[0]) try: row = s[i] except IndexError: # Extend the row, but make sure the pos value is set property. ext_row = [i, 'col{}'.format(i)] + [None] * (len(s[0]) - 2) s.append(ext_row) row = s[i] yield rp.set_row(row) assert o.meta['schema'][0] == MPRowsFile.SCHEMA_TEMPLATE
def _columns(cls, o, n_cols=0): """ Wraps columns from meta['schema'] with RowProxy and generates them. Args: o (any having .meta dict attr): Generates: RowProxy: column wrapped with RowProxy """ s = o.meta['schema'] assert len(s) >= 1 # Should always have header row. assert o.meta['schema'][0] == MPRowsFile.SCHEMA_TEMPLATE, ( o.meta['schema'][0], MPRowsFile.SCHEMA_TEMPLATE) # n_cols here is for columns in the data table, which are rows in the headers table n_cols = max(n_cols, o.n_cols, len(s) - 1) for i in range(1, n_cols + 1): # Normally, we'd only create one of these, and set the row on the singleton for # each row. But in this case, the caller may turn the output of the method into a list, # in which case all of the rows would have the values of the last one. rp = RowProxy(s[0]) try: row = s[i] except IndexError: # Extend the row, but make sure the pos value is set property. ext_row = [i, 'col{}'.format(i)] + [None] * (len(s[0]) - 2) s.append(ext_row) row = s[i] yield rp.set_row(row) assert o.meta['schema'][0] == MPRowsFile.SCHEMA_TEMPLATE
def select(self, predicate=None, headers=None): """ Select rows from the reader using a predicate and itemgetter to return a subset of elements. Args: predicate (callable, optional): if defined, a callable that is called for each rowm and if it returns true, the row is included in the output. headers (list, optional): if defined, a list or tuple of header names to return from each row Returns: iterable: iterable of results WARNING: This routine works from the reader iterator, which returns RowProxy objects. RowProxy objects are reused, so if you construct a list directly from the output from this method, the list will have multiple copies of a single RowProxy, which will have as an inner row the last result row. If you will be directly constructing a list, use a getter that extracts the inner row, or which converted the RowProxy to a dict: list(s.datafile.select(lambda r: r.stusab == 'CA', lambda r: r.dict)) """ if headers: from operator import itemgetter ig = itemgetter(*headers) rp = RowProxy(headers) getter = lambda r: rp.set_row(ig(r.dict)) else: getter = None if getter is not None and predicate is not None: return six.moves.map(getter, filter(predicate, iter(self))) elif getter is not None and predicate is None: return six.moves.map(getter, iter(self)) elif getter is None and predicate is not None: return six.moves.filter(predicate, self) else: return iter(self)
def __iter__(self): """ Iterator for reading rows as RowProxy objects WARNING: This routine generates RowProxy objects. RowProxy objects are reused, so if you construct a list directly from the output from this method, the list will have multiple copies of a single RowProxy, which will have as an inner row the last result row. If you will be directly constructing a list, use a getter that extracts the inner row, or which converted the RowProxy to a dict. """ rp = RowProxy(self.headers) try: if 'rows' not in self._h5_file.root.partition: # rows table was not created. raise StopIteration self._in_iteration = True table = self._h5_file.root.partition.rows for row in table.iterrows(): r = [_deserialize(row[c]) for c in table.colnames] yield rp.set_row(r) self.pos += 1 finally: self._in_iteration = False