def __init__(self, path): self.loci = [] counter = 0 for row in zu.iter_rows(path): if row[0][0] != "#": counter += 1 self.loci.append(Locus(row, counter))
def shapeize( meta, p_smap, ax ): smap = {} for row in iter_rows( p_smap ): print row m, s = row smap[m] = s if m in meta: ax.scatter( [], [], color="black", marker=s, label=m ) return [smap.get( m, "." ) for m in meta]
def colorize( meta, p_cmap, ax ): cmap = {} for row in iter_rows( p_cmap ): print row m, c = row cmap[m] = c if m in meta: ax.scatter( [], [], color=c, marker="s", label=m ) return [cmap.get( m, "black" ) for m in meta]
def loader( path, field ): ret = set( ) cc = {} for row in zu.iter_rows( path ): key = row[field - 1] if not args.unique: cc[key] = cc.get( key, 0 ) + 1 key = tuple( [key, cc[key]] ) ret.add( key ) return ret
def __init__(self, source=None, allowed=None): self.source = source if source is not None else sys.stdin self.data = {} rows = iter_rows(self.source) self.fields = rows.next() for row in rows: for h, v in zip(self.fields, row): if allowed is None or h in allowed: self.data.setdefault(h, []).append(v) if allowed is not None: self.fields = [k for k in self.fields if k in allowed] # add robustness to fields-only file for f in self.fields: self.data.setdefault(f, [])
parser = argparse.ArgumentParser( ) parser.add_argument( "mapping", help="" ) parser.add_argument( "--cmap", default="Paired", help="" ) parser.add_argument( "--prioritize", default=15, type=int, help="" ) parser.add_argument( "--max-colors", default=None, type=int, help="" ) parser.add_argument( "--seed", default=1701, type=int, help="" ) args = parser.parse_args( ) random.seed( args.seed ) c_cmap = args.cmap c_ntop = args.prioritize c_nmax = args.max_colors pairs = [] for row in iter_rows( args.mapping ): if len( row ) == 1: pairs.append( [0, row[0]] ) else: pairs.append( [-float( row[1] ), row[0]] ) pairs.sort( ) cmap = plt.get_cmap( c_cmap ) cmap_max = cmap.N # get evenly spaced colors for the top n features even = [cmap( int( k * cmap_max / (c_ntop - 1) ) ) for k in range( c_ntop )] random.shuffle( even ) colors = {} for i, pair in enumerate( pairs ):
parser.add_argument("--require-cafa-code", action="store_true") parser.add_argument("--gene-prefix", default="") args = parser.parse_args() #------------------------------------------------------------------------------- # constants #------------------------------------------------------------------------------- cafa_codes = {"EXP", "TAS", "IC"} #------------------------------------------------------------------------------- # load gene set #------------------------------------------------------------------------------- genes = set() for row in iter_rows(args.gene_list): genes.add(row[0]) say("Loaded", len(genes), "genes") #------------------------------------------------------------------------------- # process goa file #------------------------------------------------------------------------------- """ The GOA (.gaf) file is tab-delimited. Comment lines start with "!" Col2 is the uniprot id (a superset of uniref50). Col5 is the Gene Ontology annotation. Col4 is a logical modifier of the uniprot->go mapping. Must exclude the cases where this is "NOT". Col7 is a short evidence code """
parser = argparse.ArgumentParser() parser.add_argument("tsv_files", nargs="+", help="") parser.add_argument("--outfile", default=None) args = parser.parse_args() wb = xl.Workbook() sheets = [] for i, p in enumerate(args.tsv_files): if i == 0: sheets.append(wb.active) else: sheets.append(wb.create_sheet()) sheets[-1].title = path2name(p) for p, ws in zip(args.tsv_files, sheets): for i, row in enumerate(iter_rows(p)): for j, val in enumerate(row): try: val = float(val) except: pass ws.cell(row=i + 1, column=j + 1, value=val) if args.outfile is not None: outfile = args.outfile elif len(args.tsv_files) == 1: outfile = "{}.xlsx".format(path2name(args.tsv_files[0])) else: outfile = "multisheet.xlsx" wb.save(filename=outfile)