Exemplo n.º 1
0
def save_tsv(p, vs):
    'Write sheet to file `fn` as TSV.'
    delim = options.delimiter
    trdict = tsv_trdict()

    save_tsv_header(p, vs)

    with p.open_text(mode='a') as fp:
        for r in Progress(vs.rows):
            dispvals = []
            for col in vs.visibleCols:
                v = col.getDisplayValue(r)
                if isinstance(v, TypedWrapper):
                    if not options.save_errors:
                        continue
                    v = str(v)

                if trdict:
                    v = str(v).translate(trdict)

                dispvals.append(v)
            fp.write(delim.join(dispvals))
            fp.write('\n')

    status('%s save finished' % p)
Exemplo n.º 2
0
def save_json(p, vs):
    with p.open_text(mode='w') as fp:
        vcols = vs.visibleCols
        jsonenc = _vjsonEncoder(indent=options.json_indent)
        for chunk in jsonenc.iterencode(
            [_rowdict(vcols, r) for r in Progress(vs.rows, 'saving')]):
            fp.write(chunk)
Exemplo n.º 3
0
def setValuesFromExpr(self, rows, expr):
    compiledExpr = compile(expr, '<expr>', 'eval')
    vd.addUndoSetValues([self], rows)
    for row in Progress(rows, 'setting'):
        self.setValueSafe(row, self.sheet.evalexpr(compiledExpr, row))
    self.recalc()
    status('set %d values = %s' % (len(rows), expr))
Exemplo n.º 4
0
def genAllValues(rows, cols, trdict={}, format=True):
    transformers = collections.OrderedDict()  # list of transformers for each column in order
    for col in cols:
        transformers[col] = [ col.type ]
        if format:
            transformers[col].append(
                lambda v,fmtfunc=getType(col.type).formatter,fmtstr=col.fmtstr: fmtfunc(fmtstr, '' if v is None else v)
            )
        if trdict:
            transformers[col].append(lambda v,trdict=trdict: v.translate(trdict))

    options_safe_error = options.safe_error
    for r in Progress(rows):
        dispvals = []
        for col, transforms in transformers.items():
            try:
                dispval = col.getValue(r)
            except Exception as e:
                exceptionCaught(e)
                dispval = options_safe_error or str(e)

            try:
                for t in transforms:
                    if dispval is None:
                        dispval = ''
                        break
                    dispval = t(dispval)
            except Exception as e:
                dispval = str(dispval)

            dispvals.append(dispval)

        yield dispvals
Exemplo n.º 5
0
def gen_identify_duplicates(sheet):
    """
    Takes a sheet, and returns a generator yielding a tuple for each row
    encountered. The tuple's structure is `(row_object, is_dupe)`, where
    is_dupe is True/False.

    See note in Usage section above regarding how duplicates are determined.
    """

    keyCols = sheet.keyCols

    cols_to_check = None
    if len(keyCols) == 0:
        warning("No key cols specified. Using all columns.")
        cols_to_check = sheet.visibleCols
    else:
        cols_to_check = sheet.keyCols

    seen = set()
    for r in Progress(sheet.rows):
        vals = tuple(col.getValue(r) for col in cols_to_check)
        is_dupe = vals in seen
        if not is_dupe:
            seen.add(vals)
        yield (r, is_dupe)
Exemplo n.º 6
0
def fillNullValues(col, rows):
    'Fill null cells in col with the previous non-null value'
    lastval = None
    oldvals = []  # for undo
    nullfunc = isNullFunc()
    n = 0
    rowsToFill = list(rows)
    for r in Progress(col.sheet.rows, 'filling'):  # loop over all rows
        try:
            val = col.getValue(r)
        except Exception as e:
            val = e

        if nullfunc(val) and r in rowsToFill:
            if lastval:
                oldvals.append((col, r, val))
                col.setValue(r, lastval)
                n += 1
        else:
            lastval = val

    def _undo():
        for c, r, v in oldvals:
            c.setValue(r, v)

    vd.addUndo(_undo)

    col.recalc()
    status("filled %d values" % n)
Exemplo n.º 7
0
 def _reload(self=vs):
     self.rows = []
     gen = gen_identify_duplicates(sheet)
     prog = Progress(gen, gerund="deduplicating", total=sheet.nRows)
     for row, is_dupe in prog:
         if not is_dupe:
             self.addRow(row)
Exemplo n.º 8
0
def deleteBy(self, func):
    'Delete rows for which func(row) is true.  Returns number of deleted rows.'
    oldrows = copy(self.rows)
    oldidx = self.cursorRowIndex
    ndeleted = 0

    row = None  # row to re-place cursor after
    while oldidx < len(oldrows):
        if not func(oldrows[oldidx]):
            row = self.rows[oldidx]
            break
        oldidx += 1

    self.rows.clear()
    for r in Progress(oldrows, 'deleting'):
        if not func(r):
            self.rows.append(r)
            if r is row:
                self.cursorRowIndex = len(self.rows) - 1
        else:
            ndeleted += 1

    vd.addUndo(setattr, self, 'rows', oldrows)

    status('deleted %s %s' % (ndeleted, self.rowtype))
    return ndeleted
Exemplo n.º 9
0
def save_jsonl(p, vs):
    with p.open_text(mode='w') as fp:
        vcols = vs.visibleCols
        jsonenc = _vjsonEncoder()
        for r in Progress(vs.rows):
            rowdict = _rowdict(vcols, r)
            fp.write(jsonenc.encode(rowdict) + '\n')
Exemplo n.º 10
0
def addRegexColumns(regexMaker, vs, colIndex, origcol, regexstr):
    regexstr or vd.fail('regex required')

    regex = re.compile(regexstr, vs.regex_flags())

    func = regexMaker(regex, origcol)

    n = options.default_sample_size
    if n and n < len(vs.rows):
        exampleRows = random.sample(vs.rows, max(
            0, n - 1))  # -1 to account for included cursorRow
    else:
        exampleRows = vs.rows

    ncols = 0  # number of new columns added already
    for r in Progress(exampleRows + [vs.cursorRow]):
        try:
            m = func(r)
            if not m:
                continue
        except Exception as e:
            vd.exceptionCaught(e)

        for _ in range(len(m) - ncols):
            c = Column(
                origcol.name + '_re' + str(ncols),
                getter=lambda col, row, i=ncols, func=func: func(row)[i],
                origCol=origcol)
            vs.addColumn(c, index=colIndex + ncols + 1)
            ncols += 1
Exemplo n.º 11
0
def setValuesFromRegex(cols, rows, rex):
    transforms = [regexTransform(col, rex) for col in cols]
    vd.addUndoSetValues(cols, rows)
    for r in Progress(rows, 'replacing'):
        for col, transform in zip(cols, transforms):
            col.setValueSafe(r, transform(col, r))
    for col in cols:
        col.recalc()
Exemplo n.º 12
0
def unselect(self, rows, status=True, progress=True):
    "Unselect given rows. Don't show progress if progress=False; don't show status if status=False."
    self.addUndoSelection()
    before = self.nSelected
    for r in (Progress(rows, 'unselecting') if progress else rows):
        self.unselectRow(r)
    if status:
        vd.status('unselected %s/%s %s' %
                  (before - self.nSelected, before, self.rowtype))
Exemplo n.º 13
0
    def reload(self):
        self.rows = []
        self.columns = []

        if len(self.source) == 0:
            return

        for i, row in enumerate(Progress(self.source, total=len(self.source))):
            if i == 0:
                self.set_columns_from_row(row)
            self.addRow(row)
Exemplo n.º 14
0
def gatherBy(self, func, gerund='gathering'):
    'Generate only rows for which the given func returns True.'
    for i in Progress(rotateRange(self.nRows, self.cursorRowIndex - 1),
                      total=self.nRows,
                      gerund=gerund):
        try:
            r = self.rows[i]
            if func(r):
                yield r
        except Exception:
            pass
Exemplo n.º 15
0
def getValueRows(self, rows):
    'Generate (val, row) for the given `rows` at this Column, excluding errors and nulls.'
    f = isNullFunc()

    for r in Progress(rows, 'calculating'):
        try:
            v = self.getTypedValue(r)
            if not f(v):
                yield v, r
        except Exception:
            pass
Exemplo n.º 16
0
def to_tabulate_table(sheet, fmt):
    if fmt not in SUPPORTED_FORMATS:
        fail(f"'{fmt}' is not a supported 'tabulate' format")

    headers = [col.name for col in sheet.visibleCols]

    def get_row_values(row):
        return [col.getDisplayValue(row) for col in sheet.visibleCols]

    return tabulate(map(get_row_values, Progress(sheet.rows)),
                    headers,
                    tablefmt=fmt)
Exemplo n.º 17
0
def addNewRows(sheet, n, idx):
    addedRows = {}
    for i in Progress(range(n), 'adding'):
        row = sheet.newRow()
        addedRows[sheet.rowid(row)] = row
        sheet.addRow(row, idx + 1)

    @asyncthread
    def _removeRows():
        sheet.deleteBy(lambda r, sheet=sheet, addedRows=addedRows: sheet.rowid(
            r) in addedRows)

    vd.addUndo(_removeRows)
Exemplo n.º 18
0
    def reload_json(self):
        self.rows = []
        with self.source.open_text() as fp:
            ret = json.load(fp)

        if isinstance(ret, dict):
            self.rows = [ret]
            self.columns = []
            for k in self.rows[0]:
                self.addColumn(ColumnItem(k, type=deduceType(self.rows[0][k])))
        else:
            self.rows = []
            for row in Progress(ret):
                self.addRow(row)
Exemplo n.º 19
0
    def reload(self):
        sheets = self.sources

        # first item in joined row is the key tuple from the first sheet.
        # first columns are the key columns from the first sheet, using its row (0)
        self.columns = []
        for i, c in enumerate(sheets[0].keyCols):
            self.addColumn(
                SubrowColumn(c.name,
                             ColumnItem(c.name, i, type=c.type, width=c.width),
                             0))
        self.setKeys(self.columns)

        for sheetnum, vs in enumerate(sheets):
            # subsequent elements are the rows from each source, in order of the source sheets
            ctr = collections.Counter(c.name for c in vs.nonKeyVisibleCols)
            for c in vs.nonKeyVisibleCols:
                newname = c.name if ctr[c.name] == 1 else '%s_%s' % (vs.name,
                                                                     c.name)
                self.addColumn(SubrowColumn(newname, c, sheetnum + 1))

        rowsBySheetKey = {}
        rowsByKey = {}

        groupRowsByKey(sheets, rowsBySheetKey, rowsByKey)

        self.rows = []

        with Progress(gerund='joining', total=len(rowsByKey)) as prog:
            for k, combinedRows in rowsByKey.items():
                prog.addProgress(1)

                if self.jointype == 'full':  # keep all rows from all sheets
                    for combinedRow in combinedRows:
                        self.addRow(combinedRow)

                elif self.jointype == 'inner':  # only rows with matching key on all sheets
                    for combinedRow in combinedRows:
                        if all(combinedRow):
                            self.addRow(combinedRow)

                elif self.jointype == 'outer':  # all rows from first sheet
                    for combinedRow in combinedRows:
                        if combinedRow[1]:
                            self.addRow(combinedRow)

                elif self.jointype == 'diff':  # only rows without matching key on all sheets
                    for combinedRow in combinedRows:
                        if not all(combinedRow):
                            self.addRow(combinedRow)
Exemplo n.º 20
0
def save_dot(vd, p, vs):
    unusedColors = 'orange green purple cyan red blue black'.split()
    assignedColors = {}

    srccol = vs.keyCols[0]
    dstcol = vs.keyCols[1]
    with p.open_text(mode='w') as fp:
        print('graph { concentrate=true;', file=fp)
        for row in Progress(vs.rows, 'saving'):
            src = srccol.getTypedValue(row)
            dst = dstcol.getTypedValue(row)
            if not is_valid(src) or not is_valid(dst):
                continue

            downsrc = clean_to_id(str(src)) or src
            downdst = clean_to_id(str(dst)) or dst
            edgenotes = [
                c.getTypedValue(row) for c in vs.nonKeyVisibleCols
                if not isNumeric(c)
            ]
            edgetype = '-'.join(str(x) for x in edgenotes if is_valid(x))
            color = assignedColors.get(edgetype, None)
            if not color:
                color = unusedColors.pop() if unusedColors else 'black'
                assignedColors[edgetype] = color

            if options.graphviz_edge_labels:
                nodelabels = [
                    wrapply(SI, c.getTypedValue(row))
                    for c in vs.nonKeyVisibleCols if isNumeric(c)
                ]
                label = '/'.join(str(x) for x in nodelabels if is_valid(x))
            else:
                label = ''
            print('\t%s[label="%s"];' % (downsrc, src), file=fp)
            print('\t%s[label="%s"];' % (downdst, dst), file=fp)
            print('\t%s -- %s[label="%s", color=%s];' %
                  (downsrc, downdst, label, color),
                  file=fp)

        print('label="%s"' % vs.name, file=fp)
        print('node[shape=plaintext];', file=fp)
        print('subgraph cluster_legend {', file=fp)
        print('label="Legend";', file=fp)
        for i, (k, color) in enumerate(assignedColors.items()):
            print('key%d[label="%s", fontcolor=%s];' % (i, k, color), file=fp)

        print('}', file=fp)  # legend subgraph
        print('}', file=fp)
Exemplo n.º 21
0
def select(self, rows, status=True, progress=True):
    "Bulk select given rows. Don't show progress if progress=False; don't show status if status=False."
    self.addUndoSelection()
    before = self.nSelected
    if options.bulk_select_clear:
        self.clearSelected()
    for r in (Progress(rows, 'selecting') if progress else rows):
        self.selectRow(r)
    if status:
        if options.bulk_select_clear:
            msg = 'selected %s %s%s' % (self.nSelected, self.rowtype,
                                        ' instead' if before > 0 else '')
        else:
            msg = 'selected %s%s %s' % (self.nSelected - before, ' more'
                                        if before > 0 else '', self.rowtype)
        vd.status(msg)
Exemplo n.º 22
0
def rotateRange(n, idx, reverse=False):
    'Wraps an iter starting from idx. Yields indices from idx to n and then 0 to idx.'
    if reverse:
        rng = range(idx - 1, -1, -1)
        rng2 = range(n - 1, idx - 1, -1)
    else:
        rng = range(idx + 1, n)
        rng2 = range(0, idx + 1)

    wrapped = False
    with Progress(total=n) as prog:
        for r in itertools.chain(rng, rng2):
            prog.addProgress(1)
            if not wrapped and r in rng2:
                status('search wrapped')
                wrapped = True
            yield r
Exemplo n.º 23
0
    def iterload(self):
        delim = self.options.delimiter
        rowdelim = self.options.row_delimiter

        with self.source.open_text() as fp:
            with Progress(total=filesize(self.source)) as prog:
                for line in splitter(fp, rowdelim):
                    if not line:
                        continue

                    prog.addProgress(len(line))
                    row = list(line.split(delim))

                    if len(row) < self.nVisibleCols:
                        # extend rows that are missing entries
                        row.extend([None] * (self.nVisibleCols - len(row)))

                    yield row
Exemplo n.º 24
0
def normalize_column_names(sheet):
    """
    Normalize the names of all non-hidden columns on the active sheet.
    """

    init_names = []
    gen = gen_normalize_names(c.name for c in sheet.visibleCols)
    prog = Progress(gen, gerund="normalizing", total=sheet.nVisibleCols)

    for i, norm_name in enumerate(prog):
        col = sheet.visibleCols[i]
        init_names.append(col.name)  # Store for undo
        col.name = norm_name

    @asyncthread
    def undo():
        for i, c in enumerate(init_names):
            sheet.visibleCols[i].name = c

    vd.addUndo(undo)
Exemplo n.º 25
0
def sort(self):
    'Sort rows according to the current self._ordering.'
    try:
        with Progress(gerund='sorting', total=self.nRows) as prog:
            def sortkey(r):
                ret = []
                for col, reverse in self._ordering:
                    if isinstance(col, str):
                        col = self.column(col)
                    val = col.getTypedValue(r)
                    ret.append(Reversor(val) if reverse else val)

                prog.addProgress(1)
                return ret

            # must not reassign self.rows: use .sort() instead of sorted()
            self.rows.sort(key=sortkey)
    except TypeError as e:
        vd.warning('sort incomplete due to TypeError; change column type')
        vd.exceptionCaught(e, status=False)
Exemplo n.º 26
0
    def reload_sync(self):
        'Perform synchronous loading of TSV file, discarding header lines.'
        header_lines = options.get('header', self)

        with self.source.open_text() as fp:
            # get one line anyway to determine number of columns
            lines = list(getlines(fp, int(header_lines) or 1))
            headers = [L.split(options.delimiter) for L in lines]

            if header_lines <= 0:
                self.columns = [
                    ColumnItem('', i) for i in range(len(headers[0]))
                ]
            else:
                self.columns = [
                    ColumnItem('\\n'.join(x), i)
                    for i, x in enumerate(zip(*headers[:header_lines]))
                ]

            lines = lines[header_lines:]  # in case of header_lines == 0
            self._rowtype = namedlist('tsvobj', [c.name for c in self.columns])

            self.recalc()
            delim = options.delimiter
            self.rows = []

            with Progress(total=self.source.filesize) as prog:
                for L in itertools.chain(lines, getlines(fp)):
                    row = L.split(delim)
                    ncols = self._rowtype.length()  # current number of cols
                    if len(row) > ncols:
                        newcols = [
                            ColumnItem('', len(row) + i, width=8)
                            for i in range(len(row) - ncols)
                        ]
                        self._rowtype = namedlist(
                            self._rowtype.__name__,
                            list(self._rowtype._fields) +
                            ['_' for c in newcols])
                    self.addRow(self._rowtype(row))
                    prog.addProgress(len(L))
Exemplo n.º 27
0
def select_duplicate_rows(sheet, duplicates=True):
    """
    Given a sheet, sets the selection status in VisiData to `selected` for each
    row that is a duplicate of a prior row.

    If `duplicates = False`, then the behavior is reversed; sets the selection
    status to `selected` for each row that is *not* a duplicate.
    """
    before = len(sheet.selectedRows)

    gen = gen_identify_duplicates(sheet)
    prog = Progress(gen, gerund="selecting", total=sheet.nRows)

    for row, is_dupe in prog:
        if is_dupe == duplicates:
            sheet.selectRow(row)

    sel_count = len(sheet.selectedRows) - before

    more_str = " more" if before > 0 else ""

    vd.status(f"selected {sel_count}{more_str} {sheet.rowtype}")
Exemplo n.º 28
0
def groupRowsByKey(sheets, rowsBySheetKey, rowsByKey):
    with Progress(total=sum(len(vs.rows) for vs in sheets) * 2) as prog:
        for vs in sheets:
            # tally rows by keys for each sheet
            rowsBySheetKey[vs] = collections.defaultdict(list)
            for r in vs.rows:
                prog.addProgress(1)
                key = joinkey(vs, r)
                rowsBySheetKey[vs][key].append(r)

        for vs in sheets:
            for r in vs.rows:
                prog.addProgress(1)
                key = joinkey(vs, r)
                if key not in rowsByKey:  # gather for this key has not been done yet
                    # multiplicative for non-unique keys
                    rowsByKey[key] = []
                    for crow in itertools.product(*[
                            rowsBySheetKey[vs2].get(key, [None])
                            for vs2 in sheets
                    ]):
                        rowsByKey[key].append([key] + list(crow))
Exemplo n.º 29
0
def ExtendedSheet_reload(self, sheets):
    self.joinSources = sheets

    # first item in joined row is the key tuple from the first sheet.
    # first columns are the key columns from the first sheet, using its row (0)
    self.columns = []
    for i, c in enumerate(sheets[0].keyCols):
        self.addColumn(copy(c))
    self.setKeys(self.columns)

    for i, c in enumerate(sheets[0].nonKeyVisibleCols):
        self.addColumn(copy(c))

    for sheetnum, vs in enumerate(sheets[1:]):
        # subsequent elements are the rows from each source, in order of the source sheets
        ctr = collections.Counter(c.name for c in vs.nonKeyVisibleCols)
        for c in vs.nonKeyVisibleCols:
            newname = '%s_%s' % (vs.name, c.name)
            newcol = ExtendedColumn(newname,
                                    sheetnum=sheetnum + 1,
                                    sourceCol=c)
            self.addColumn(newcol)

    self.rowsBySheetKey = {}  # [srcSheet][key] -> list(rowobjs from sheets[0])
    rowsByKey = {}  # [key] -> [key, rows0, rows1, ...]

    groupRowsByKey(sheets, self.rowsBySheetKey, rowsByKey)

    self.rows = []

    with Progress(gerund='joining', total=len(rowsByKey)) as prog:
        for k, combinedRows in rowsByKey.items():
            prog.addProgress(1)
            for combinedRow in combinedRows:
                if combinedRow[1]:
                    self.addRow(combinedRow[1])
Exemplo n.º 30
0
def toggle(self, rows):
    'Toggle selection of given `rows`.'
    self.addUndoSelection()
    for r in Progress(rows, 'toggling', total=len(self.rows)):
        if not self.unselectRow(r):
            self.selectRow(r)