def recordlookup(table, key, dictionary=None): """ Load a dictionary with data from the given table, mapping to record objects. """ if dictionary is None: dictionary = dict() it = iter(table) hdr = next(it) flds = list(map(text_type, hdr)) keyindices = asindices(hdr, key) assert len(keyindices) > 0, 'no key selected' getkey = operator.itemgetter(*keyindices) for row in it: k = getkey(row) rec = Record(row, flds) if k in dictionary: # work properly with shelve l = dictionary[k] l.append(rec) dictionary[k] = l else: dictionary[k] = [rec] return dictionary
def iteraddfield(source, field, value, index): it = iter(source) hdr = next(it) flds = list(map(text_type, hdr)) # determine index of new field if index is None: index = len(hdr) # construct output fields outhdr = list(hdr) outhdr.insert(index, field) yield tuple(outhdr) if callable(value): # wrap rows as records if using calculated value it = (Record(row, flds) for row in it) for row in it: outrow = list(row) v = value(row) outrow.insert(index, v) yield tuple(outrow) else: for row in it: outrow = list(row) outrow.insert(index, value) yield tuple(outrow)
def iteraddfieldusingcontext(table, field, query): it = iter(table) hdr = tuple(next(it)) flds = list(map(text_type, hdr)) yield hdr + (field, ) flds.append(field) it = (Record(row, flds) for row in it) prv = None cur = next(it) for nxt in it: v = query(prv, cur, nxt) yield tuple(cur) + (v, ) prv = Record(tuple(cur) + (v, ), flds) cur = nxt # handle last row v = query(prv, cur, None) yield tuple(cur) + (v, )
def iterrowselect(source, where, missing, complement): it = iter(source) hdr = next(it) flds = list(map(text_type, hdr)) yield tuple(hdr) it = (Record(row, flds, missing=missing) for row in it) for row in it: if where(row) != complement: # XOR yield tuple(row) # need to convert back to tuple?
def iterrowmap(source, rowmapper, header, failonerror): it = iter(source) hdr = next(it) flds = list(map(text_type, hdr)) yield tuple(header) it = (Record(row, flds) for row in it) for row in it: try: outrow = rowmapper(row) yield tuple(outrow) except Exception as e: if failonerror: raise e
def __iter__(self): table = self.table source = self.source encoding = self.encoding errors = self.errors lineterminator = self.lineterminator caption = self.caption index_header = self.index_header tr_style = self.tr_style td_styles = self.td_styles vrepr = self.vrepr truncate = self.truncate with source.open('wb') as buf: # deal with text encoding if PY2: codec = getcodec(encoding) f = codec.streamwriter(buf, errors=errors) else: f = io.TextIOWrapper(buf, encoding=encoding, errors=errors, newline='') # write the table try: it = iter(table) # write header hdr = next(it) _write_begin(f, hdr, lineterminator, caption, index_header, truncate) yield hdr # write body if tr_style and callable(tr_style): # wrap as records it = (Record(row, hdr) for row in it) for row in it: _write_row(f, hdr, row, lineterminator, vrepr, tr_style, td_styles, truncate) yield row # finish up _write_end(f, lineterminator) f.flush() finally: if not PY2: f.detach()
def iterrowmapmany(source, rowgenerator, header, failonerror): it = iter(source) hdr = next(it) flds = list(map(text_type, hdr)) yield tuple(header) it = (Record(row, flds) for row in it) for row in it: try: for outrow in rowgenerator(row): yield tuple(outrow) except Exception as e: if failonerror: raise e else: pass
def iterselectusingcontext(table, query): it = iter(table) hdr = tuple(next(it)) flds = list(map(text_type, hdr)) yield hdr it = (Record(row, flds) for row in it) prv = None cur = next(it) for nxt in it: if query(prv, cur, nxt): yield cur prv = cur cur = nxt # handle last row if query(prv, cur, None): yield cur
def iterfieldmap(source, mappings, failonerror, errorvalue): it = iter(source) hdr = next(it) flds = list(map(text_type, hdr)) outhdr = mappings.keys() yield tuple(outhdr) mapfuns = dict() for outfld, m in mappings.items(): if m in hdr: mapfuns[outfld] = operator.itemgetter(m) elif isinstance(m, int) and m < len(hdr): mapfuns[outfld] = operator.itemgetter(m) elif isinstance(m, string_types): mapfuns[outfld] = expr(m) elif callable(m): mapfuns[outfld] = m elif isinstance(m, (tuple, list)) and len(m) == 2: srcfld = m[0] fm = m[1] if callable(fm): mapfuns[outfld] = composefun(fm, srcfld) elif isinstance(fm, dict): mapfuns[outfld] = composedict(fm, srcfld) else: raise ArgumentError('expected callable or dict') else: raise ArgumentError('invalid mapping %r: %r' % (outfld, m)) # wrap rows as records it = (Record(row, flds) for row in it) for row in it: outrow = list() for outfld in outhdr: try: val = mapfuns[outfld](row) except Exception as e: if failonerror == 'inline': val = e elif failonerror: raise e else: val = errorvalue outrow.append(val) yield tuple(outrow)
def iteraddfields(source, field_defs): it = iter(source) hdr = next(it) flds = list(map(text_type, hdr)) # initialize output fields and indices outhdr = list(hdr) value_indexes = [] for fdef in field_defs: # determine the defined field index if len(fdef) == 2: name, value = fdef index = len(outhdr) else: name, value, index = fdef # insert the name into the header at the appropriate index outhdr.insert(index, name) # remember the value/index pairs for later value_indexes.append((value, index)) yield tuple(outhdr) for row in it: outrow = list(row) # add each defined field into the row at the appropriate index for value, index in value_indexes: if callable(value): # wrap row as record if using calculated value row = Record(row, flds) v = value(row) outrow.insert(index, v) else: outrow.insert(index, value) yield tuple(outrow)
def recordlookupone(table, key, dictionary=None, strict=False): """ Load a dictionary with data from the given table, mapping to record objects, assuming there is at most one row for each key. """ if dictionary is None: dictionary = dict() it = iter(table) hdr = next(it) flds = list(map(text_type, hdr)) keyindices = asindices(hdr, key) assert len(keyindices) > 0, 'no key selected' getkey = operator.itemgetter(*keyindices) for row in it: k = getkey(row) if strict and k in dictionary: raise DuplicateKeyError(k) elif k not in dictionary: d = Record(row, flds) dictionary[k] = d return dictionary
def iterproblems(table, constraints, expected_header): outhdr = ('name', 'row', 'field', 'value', 'error') yield outhdr it = iter(table) actual_header = next(it) if expected_header is None: flds = list(map(text_type, actual_header)) else: expected_flds = list(map(text_type, expected_header)) actual_flds = list(map(text_type, actual_header)) try: assert expected_flds == actual_flds except Exception as e: yield ('__header__', 0, None, None, type(e).__name__) flds = expected_flds local_constraints = normalize_constraints(constraints, flds) # setup getters for constraint in local_constraints: if 'getter' not in constraint: if 'field' in constraint: # should ensure FieldSelectionError if bad field in constraint indices = asindices(flds, constraint['field']) getter = operator.itemgetter(*indices) constraint['getter'] = getter # generate problems expected_len = len(flds) for i, row in enumerate(it): row = tuple(row) # row length constraint l = None try: l = len(row) assert l == expected_len except Exception as e: yield ('__len__', i + 1, None, l, type(e).__name__) # user defined constraints row = Record(row, flds) for constraint in local_constraints: name = constraint.get('name', None) field = constraint.get('field', None) assertion = constraint.get('assertion', None) test = constraint.get('test', None) getter = constraint.get('getter', lambda x: x) try: target = getter(row) except Exception as e: # getting target value failed, report problem yield (name, i + 1, field, None, type(e).__name__) else: value = target if field else None if test is not None: try: test(target) except Exception as e: # test raised exception, report problem yield (name, i + 1, field, value, type(e).__name__) if assertion is not None: try: assert assertion(target) except Exception as e: # assertion raised exception, report problem yield (name, i + 1, field, value, type(e).__name__)
def iterfieldconvert(source, converters, failonerror, errorvalue, where, pass_row): # grab the fields in the source table it = iter(source) hdr = next(it) flds = list(map(text_type, hdr)) yield tuple(hdr) # these are not modified # build converter functions converter_functions = dict() for k, c in converters.items(): # turn field names into row indices if not isinstance(k, integer_types): try: k = flds.index(k) except ValueError: # not in list raise FieldSelectionError(k) assert isinstance(k, int), 'expected integer, found %r' % k # is converter a function? if callable(c): converter_functions[k] = c # is converter a method name? elif isinstance(c, string_types): converter_functions[k] = methodcaller(c) # is converter a method name with arguments? elif isinstance(c, (tuple, list)) and isinstance(c[0], string_types): methnm = c[0] methargs = c[1:] converter_functions[k] = methodcaller(methnm, *methargs) # is converter a dictionary? elif isinstance(c, dict): converter_functions[k] = dictconverter(c) # is it something else? elif c is None: pass # ignore else: raise ArgumentError( 'unexpected converter specification on field %r: %r' % (k, c)) # define a function to transform a value def transform_value(i, v, *args): if i not in converter_functions: # no converter defined on this field, return value as-is return v else: try: return converter_functions[i](v, *args) except Exception as e: if failonerror: raise e else: return errorvalue # define a function to transform a row if pass_row: def transform_row(_row): return tuple( transform_value(i, v, _row) for i, v in enumerate(_row)) else: def transform_row(_row): return tuple(transform_value(i, v) for i, v in enumerate(_row)) # prepare where function if isinstance(where, string_types): where = expr(where) elif where is not None: assert callable(where), 'expected callable for "where" argument, ' \ 'found %r' % where # prepare iterator if pass_row or where: # wrap rows as records it = (Record(row, flds) for row in it) # construct the data rows if where is None: # simple case, transform all rows for row in it: yield transform_row(row) else: # conditionally transform rows for row in it: if where(row): yield transform_row(row) else: yield row
def tohtml(table, source=None, encoding=None, errors=None, caption=None, vrepr=text_type, lineterminator='\n', index_header=False, tr_style=None, td_styles=None, truncate=None): """ Write the table as HTML to a file. E.g.:: >>> import petl as etl >>> table1 = [['foo', 'bar'], ... ['a', 1], ... ['b', 2], ... ['c', 2]] >>> etl.tohtml(table1, 'example.html', caption='example table') >>> print(open('example.html').read()) <table class='petl'> <caption>example table</caption> <thead> <tr> <th>foo</th> <th>bar</th> </tr> </thead> <tbody> <tr> <td>a</td> <td style='text-align: right'>1</td> </tr> <tr> <td>b</td> <td style='text-align: right'>2</td> </tr> <tr> <td>c</td> <td style='text-align: right'>2</td> </tr> </tbody> </table> The `caption` keyword argument is used to provide a table caption in the output HTML. """ source = write_source_from_arg(source) with source.open('wb') as buf: # deal with text encoding if PY2: codec = getcodec(encoding) f = codec.streamwriter(buf, errors=errors) else: f = io.TextIOWrapper(buf, encoding=encoding, errors=errors, newline='') # write the table try: it = iter(table) # write header hdr = next(it) _write_begin(f, hdr, lineterminator, caption, index_header, truncate) # write body if tr_style and callable(tr_style): # wrap as records it = (Record(row, hdr) for row in it) for row in it: _write_row(f, hdr, row, lineterminator, vrepr, tr_style, td_styles, truncate) # finish up _write_end(f, lineterminator) f.flush() finally: if not PY2: f.detach()
def accept(self, row): row = Record(row, self.fields) key = self.discriminator(row) self.broadcast(key, row)