예제 #1
0
 def rowdex(self, index):
     ret = None
     if isinstance(index, int) and 0 <= index < len(self.rowheads):
         ret = index
     elif index in self.rowmap:
         ret = self.rowmap[index]
     else:
         self.report("Bad row index <{}>.".format(index), die=True)
         zu.die()
     return ret
예제 #2
0
파일: frame.py 프로젝트: sagun98/zopy
def verify_dict(d):
    bad = False
    ll = set()
    for k, v in d:
        if type(k) is not str:
            bad = True
        elif type(v) is not list:
            bad = True
        ll.add(len(v))
    if len(ll) != 1:
        bad = True
    if bad:
        die("Can't convert dict to Frame")
예제 #3
0
    def __init__(
        self,
        path=None,
        fh=None,
        matrix=None,
        data=None,
        rowheads=None,
        colheads=None,
        headers=True,
        name=None,
        verbose=True,
        numeric=False,
    ):
        """ """

        self.data = None
        self.rowheads = None
        self.colheads = None
        self.axheads = []
        self.origin = c_default_origin
        self.name = name
        self.shape = []
        self.is_verbose = verbose
        self.is_numeric = numeric

        if data is not None:
            self.data = coerce(data)
            self.rowheads = rowheads if rowheads is not None \
                else range( self.data.shape[0] )
            self.colheads = colheads if colheads is not None \
                else range( self.data.shape[1] )
        else:
            if path is not None:
                matrix = load_from_path(path)
            elif fh is not None:
                matrix = load_from_handle(fh)
            elif matrix is not None:
                matrix = coerce(matrix)
            else:
                die("No loading option.")
            if not headers:
                self.data = matrix
                self.rowheads = range(self.data.shape[0])
                self.colheads = range(self.data.shape[1])
            else:
                self.data = matrix[1:, 1:]
                self.rowheads = list(matrix[1:, 0])
                self.colheads = list(matrix[0, 1:])
                self.origin = matrix[0][0]
        self.axheads = [self.rowheads, self.colheads]
        self.index()
예제 #4
0
파일: frame.py 프로젝트: sagun98/zopy
def verify_list_of_lists(aa):
    bad = False
    ll = set()
    for a in aa:
        if type(a) is not list:
            bad = True
        ll.add(len(a))
    for k in aa[0]:
        if type(k) is not str:
            bad = True
    if len(ll) != 1:
        bad = True
    if bad:
        die("Can't convert lists to Frame")
예제 #5
0
    def apfilter(self, minabund=0, minprev=1, **kwargs):
        if not self.is_numeric:
            die("Can't apfilter non-numeric table:", self.name)
        if type(minprev) is float:
            self.report("will interpret minimum prevalence", minprev,
                        "as a fraction of samples")
        if not (kwargs.get("t", False) or kwargs.get("transposed", False)):
            self.report(
                "using apfilter on non-transposed table is not standard")

        def inner(vector, minabund=minabund, minprev=minprev):
            if type(minprev) is float:
                minprev = int(len(vector) * minprev)
            return list(vector >= minabund).count(True) >= minprev

        return self.filter(inner, vectors=True, **kwargs)
예제 #6
0
    def __init__(self,
                 source=None,
                 data=None,
                 colheads=None,
                 rowheads=None,
                 origin="#",
                 missing="#N/A",
                 headless=False,
                 transposed=False,
                 verbose=True):

        # set up object attributes
        self.source = source
        self.data = data
        self.rowheads = rowheads
        self.colheads = colheads
        self.origin = origin
        self.missing = missing
        self.transposed = transposed
        self.headless = headless
        self.verbose = verbose
        self.sourcename = None
        self.rowmap = None
        self.colmap = None

        # decide how to load table
        if source is None:
            self.sourcename = "<runtime>"
            if None in [data, rowheads, colheads]:
                zu.die("If no <source> then <data/rowheads/colheads> required")
        elif isinstance(source, list):
            self.sourcename = "<list of lists>"
            self.load_from_nested_lists(source)
        elif isinstance(source, dict):
            self.sourcename = "<dict of dicts>"
            self.load_from_nested_dicts(source)
        elif isinstance(source, file):
            self.sourcename = "<file handle>"
            self.load_from_file_handle(source)
        elif os.path.exists(source):
            self.sourcename = "<{}>".format(source)
            self.load_from_file(source)

        # set up other table elements
        self.remap()
        self.report("New table with size {}.".format(self.size()))
예제 #7
0
 def limit(self, header, criterion, **kwargs):
     """ keep only rows where field value satisfies numerical criterion """
     M = re.search("([<>=]+) *(.*)", criterion)
     if M is None:
         zu.die(criterion, "is not a valid limit criterion")
     op, threshold = M.groups()
     threshold = float(threshold)
     choices = {
         "<": lambda x: float(x) < threshold,
         "<=": lambda x: float(x) <= threshold,
         ">": lambda x: float(x) > threshold,
         ">=": lambda x: float(x) >= threshold,
     }
     selector = choices[op]
     function = lambda r: selector(self.get(r, header))
     self.report("applying <limit>, requiring", header, "to be", op,
                 threshold, kwargs)
     return self.filter(function, **kwargs)
예제 #8
0
파일: vitals.py 프로젝트: sagun98/zopy
def row_stats(row, as_strings=True, engin=False):
    q1, q2, q3 = mquantiles(row)
    stats = {}
    stats["N"] = len(row)
    stats["#0s"] = len([k for k in row if abs(k) < c_eps])
    stats["%0s"] = stats["#0s"] / float(len(row))
    stats["Sum"] = sum(row)
    stats["Min"] = min(row)
    stats["Q1"] = q1
    stats["Q2_Med"] = q2
    stats["Q3"] = q3
    stats["Max"] = max(row)
    stats["Mean"] = mean(row)
    stats["StDev"] = std(row)
    stats[
        "CfVar"] = stats["StDev"] / stats["Mean"] if stats["Mean"] != 0 else 0
    if set(stats.keys()) != set(c_props):
        die("Inconsistent stat lists. Check code.")
    if as_strings:
        stats = {k: pretty(v, engin) for k, v in stats.items()}
    return stats
예제 #9
0
 def __init__(self, gff_row, counter):
     # unique tag for locus based on position in GFF
     self.index = counter
     # gff fields
     if len(gff_row) != len(c_gff_fields):
         zu.die("Bad GFF row:", gff_row)
     for [fname, ftype], value in zip(c_gff_fields, gff_row):
         setattr(self, fname, ftype(value) if value != "." else value)
     # attributes
     temp = {}
     for item in self.attributes.split(";"):
         if "=" not in item:
             continue
         item = item.strip()
         system, value = item.split("=")
         if system in temp:
             zu.say("Warning: Multiple definitions for system", system)
         temp[system] = value
     self.attributes = temp
     # no name by default
     self.name = self.attributes.get("ID", None)
     self.code = ":".join([str(self.start), str(self.end), self.strand])
예제 #10
0
 def __init__(
     self,
     row,
     config="6 qseqid sseqid pident length mismatch gapopen qstart qend sstart send evalue bitscore",
 ):
     if config[0:2] == "6 ":
         config = config[2:]
     config = config.split(" ")
     if len(config) != len(row):
         die("config doesn't match row")
     self.data = {}
     for value, field in zip(row, config):
         value = format[field](value)
         self.data[field] = value
     # qcov
     self.data["qcov"] = None
     if contains("qstart qend qlen".split(), self.data):
         self.data["qcov"] = abs(self.data["qend"] -
                                 self.data["qstart"]) + 1
         self.data["qcov"] /= float(self.data["qlen"])
     # scov
     self.data["scov"] = None
     if contains("sstart send slen".split(), self.data):
         self.data["scov"] = abs(self.data["send"] -
                                 self.data["sstart"]) + 1
         self.data["scov"] /= float(self.data["slen"])
     # mcov
     self.data["mcov"] = None
     if self.data["qcov"] is not None and self.data["scov"] is not None:
         self.data["mcov"] = min(self.data["qcov"], self.data["scov"])
     # score
     self.data["strength"] = None
     if contains("pident mcov".split(), self.data):
         self.data[
             "strength"] = self.data["mcov"] * self.data["pident"] / 100.0
     # set as attr
     for f, v in self.data.items():
         setattr(self, f, v)
예제 #11
0
    def remap(self):

        # integrity checks
        if len(self.data) > 0:
            if len(set([len(row) for row in self.data])) != 1:
                zu.die("Table has inconsistent row lengths")
            if len(self.colheads) != len(self.data[0]):
                zu.die("Colheads do not align to data")
            if len(self.rowheads) != len(self.data):
                zu.die("Rowheads do not align to data")

        # auto-replace duplicate field names
        deduplicate(self.rowheads)
        deduplicate(self.colheads)

        # build fast maps
        self.rowmap = {k: i for i, k in enumerate(self.rowheads)}
        self.colmap = {k: i for i, k in enumerate(self.colheads)}
예제 #12
0
파일: xlsx2tsv.py 프로젝트: sagun98/zopy
#!/usr/bin/env python

import os
import sys
import re
import argparse
import csv

from zopy.utils import path2name, die

try:
    import openpyxl as xl
except:
    die("This script requires the OPENPYXL module")

# argument parsing (python argparse)
parser = argparse.ArgumentParser()
parser.add_argument("xlsx", help="")
args = parser.parse_args()

wb = xl.load_workbook(filename=args.xlsx)

for ws in wb:

    basename = path2name(args.xlsx)
    sheet = ws.title
    sheet = re.sub("[^A-Za-z0-9]+", "_", sheet)
    newname = "{}.{}.tsv".format(basename, sheet)
    fh = open(newname, "w")
    ww = csv.writer(fh, csv.excel_tab)
예제 #13
0
파일: frame.py 프로젝트: sagun98/zopy
 def __getitem__(self, field):
     if field not in self.data:
         die("Non-existing field:", field)
     return self.data[field]
예제 #14
0
파일: stats.py 프로젝트: franzosa/zopy
def curve_check(labels, scores):
    if not set(labels) <= set([0, 1]):
        die("non-binary [0, 1] labels vector")
    if not len(labels) == len(scores):
        die("labels and scores have non-equal lengths")
    return None
예제 #15
0
 def report(self, *args, **kwargs):
     items = [self.sourcename, "::", " ".join([str(k) for k in args])]
     if kwargs.get("die", False):
         zu.die(*items)
     elif self.verbose:
         zu.say(*items)
예제 #16
0
파일: zojoin.py 프로젝트: sagun98/zopy
    for items in csv.reader(fh, dialect="excel-tab"):
        lengths2.append(len(items))
        if headers2 is None and args.head2:
            headers2 = c_sep.join(items)
            continue
        key = items[args.key2]
        d.setdefault(key, {})["\t".join(items)] = 1
print("finished loading file2", file=sys.stderr)

# make dummy line to add when join fails
if len(set(lengths2)) != 1:
    warn("file2 lines have unequal lengths")
    if args.het:
        dummyline2 = c_na
    else:
        die()
else:
    dummyline2 = "\t".join(c_na for k in range(lengths2[0]))
if not args.head2:
    headers2 = dummyline2

# load first file, print join
counts = Counter()
lengths1 = []
hits = {}
headers1 = None
with (try_open(args.file1) if args.file1 != "-" else sys.stdin) as fh:
    for items in csv.reader(fh, dialect="excel-tab"):
        line = "\t".join(items)
        lengths1.append(len(items))
        if headers1 is None and args.head1: