def counts(request, uuid): try: csvdownload = CSVDownload.objects.get(uuid=uuid) except CSVDownload.DoesNotExist: return HttpResponseNotFound("Not found.") fname = '{0}.csv'.format(csvdownload.uuid) full_fname = os.path.join(settings.CSV_DIR, fname) people = fromcsv(full_fname) columns_str = request.GET.get('columns', '') columns = sorted([c for c in columns_str.split(',') if c.strip()]) for column in columns: if column not in header(people): return HttpResponseBadRequest('Bad request.') if not columns: return redirect(to=reverse('people_list', kwargs={'uuid': uuid})) counts = valuecounts(people, *columns) counts = cutout(counts, 'frequency') return render( request, 'counts.html', { 'csvdownload': csvdownload, 'columns': header(people), 'headers': header(counts), 'counts': data(counts), 'queryparams': { 'columns': columns } })
def test_header(): table = (('foo', 'bar'), ('a', 1), ('b', 2)) actual = header(table) expect = ('foo', 'bar') eq_(expect, actual) table = (['foo', 'bar'], ['a', 1], ['b', 2]) actual = header(table) eq_(expect, actual)
def natural_key(left, right): # determine key field or fields lhdr = header(left) lflds = list(map(str, lhdr)) rhdr = header(right) rflds = list(map(str, rhdr)) key = [f for f in lflds if f in rflds] assert len(key) > 0, 'no fields in common' if len(key) == 1: key = key[0] # deal with singletons return key
def recordcomplement(a, b, buffersize=None, tempdir=None, cache=True, strict=False): """ Find records in `a` that are not in `b`. E.g.:: >>> import petl as etl >>> a = [['foo', 'bar', 'baz'], ... ['A', 1, True], ... ['C', 7, False], ... ['B', 2, False], ... ['C', 9, True]] >>> b = [['bar', 'foo', 'baz'], ... [2, 'B', False], ... [9, 'A', False], ... [3, 'B', True], ... [9, 'C', True]] >>> aminusb = etl.recordcomplement(a, b) >>> aminusb +-----+-----+-------+ | foo | bar | baz | +=====+=====+=======+ | 'A' | 1 | True | +-----+-----+-------+ | 'C' | 7 | False | +-----+-----+-------+ >>> bminusa = etl.recordcomplement(b, a) >>> bminusa +-----+-----+-------+ | bar | foo | baz | +=====+=====+=======+ | 3 | 'B' | True | +-----+-----+-------+ | 9 | 'A' | False | +-----+-----+-------+ Note that both tables must have the same set of fields, but that the order of the fields does not matter. See also the :func:`petl.transform.setops.complement` function. See also the discussion of the `buffersize`, `tempdir` and `cache` arguments under the :func:`petl.transform.sorts.sort` function. """ # TODO possible with only one pass? ha = header(a) hb = header(b) assert set(ha) == set(hb), 'both tables must have the same set of fields' # make sure fields are in the same order bv = cut(b, *ha) return complement(a, bv, buffersize=buffersize, tempdir=tempdir, cache=cache, strict=strict)
def itercrossjoin(sources, prefix): # construct fields outhdr = list() for i, s in enumerate(sources): if prefix: # use one-based numbering outhdr.extend([str(i+1) + '_' + str(f) for f in header(s)]) else: outhdr.extend(header(s)) yield tuple(outhdr) datasrcs = [data(src) for src in sources] for prod in itertools.product(*datasrcs): outrow = list() for row in prod: outrow.extend(row) yield tuple(outrow)
def diffheaders(t1, t2): """ Return the difference between the headers of the two tables as a pair of sets. E.g.:: >>> import petl as etl >>> table1 = [['foo', 'bar', 'baz'], ... ['a', 1, .3]] >>> table2 = [['baz', 'bar', 'quux'], ... ['a', 1, .3]] >>> add, sub = etl.diffheaders(table1, table2) >>> add {'quux'} >>> sub {'foo'} """ t1h = set(header(t1)) t2h = set(header(t2)) return t2h - t1h, t1h - t2h
def people_list(request, uuid): try: csvdownload = CSVDownload.objects.get(uuid=uuid) except CSVDownload.DoesNotExist: return HttpResponseNotFound("Not found.") fname = '{0}.csv'.format(csvdownload.uuid) full_fname = os.path.join(settings.CSV_DIR, fname) people = fromcsv(full_fname) sortby = request.GET.get('sortby', 'name') ordering = request.GET.get('ordering', 'asc') count_str = request.GET.get('count', '10') if sortby not in header(people): return HttpResponseBadRequest('Bad request.') if ordering not in ('asc', 'desc'): return HttpResponseBadRequest('Bad request.') try: count = int(count_str) except ValueError: return HttpResponseBadRequest('Bad request.') if count < 1: return HttpResponseBadRequest('Bad request.') people = sort(people, sortby, reverse=ordering == 'desc') people = head(people, count) return render( request, 'people_list.html', { 'csvdownload': csvdownload, 'headers': header(people), 'people': data(people), 'has_more': len(people) > count, 'queryparams': { 'sortby': sortby, 'ordering': ordering, 'count': str(count + 10) } })
def convertall(table, *args, **kwargs): """ Convenience function to convert all fields in the table using a common function or mapping. See also :func:`convert`. The ``where`` keyword argument can be given with a callable or expression which is evaluated on each row and which should return True if the conversion should be applied on that row, else False. """ # TODO don't read the data twice! return convert(table, header(table), *args, **kwargs)
def itertranspose(source): hdr = header(source) its = [iter(source) for _ in hdr] for i in range(len(hdr)): yield tuple(row[i] for row in its[i])