Пример #1
0
 def __init__(self, path):
     self.loci = []
     counter = 0
     for row in zu.iter_rows(path):
         if row[0][0] != "#":
             counter += 1
             self.loci.append(Locus(row, counter))
Пример #2
0
def shapeize( meta, p_smap, ax ):
    smap = {}
    for row in iter_rows( p_smap ):
        print row
        m, s = row
        smap[m] = s
        if m in meta:
            ax.scatter( [], [], color="black", marker=s, label=m )
    return [smap.get( m, "." ) for m in meta]
Пример #3
0
def colorize( meta, p_cmap, ax ):
    cmap = {}
    for row in iter_rows( p_cmap ):
        print row
        m, c = row
        cmap[m] = c
        if m in meta:
            ax.scatter( [], [], color=c, marker="s", label=m )
    return [cmap.get( m, "black" ) for m in meta]
Пример #4
0
def loader( path, field ):
    ret = set( )
    cc = {}
    for row in zu.iter_rows( path ):
        key = row[field - 1]
        if not args.unique:
            cc[key] = cc.get( key, 0 ) + 1
            key = tuple( [key, cc[key]] )
        ret.add( key )
    return ret
Пример #5
0
 def __init__(self, source=None, allowed=None):
     self.source = source if source is not None else sys.stdin
     self.data = {}
     rows = iter_rows(self.source)
     self.fields = rows.next()
     for row in rows:
         for h, v in zip(self.fields, row):
             if allowed is None or h in allowed:
                 self.data.setdefault(h, []).append(v)
     if allowed is not None:
         self.fields = [k for k in self.fields if k in allowed]
     # add robustness to fields-only file
     for f in self.fields:
         self.data.setdefault(f, [])
Пример #6
0
parser = argparse.ArgumentParser( )
parser.add_argument( "mapping", help="" )
parser.add_argument( "--cmap", default="Paired", help="" )
parser.add_argument( "--prioritize", default=15, type=int, help="" )
parser.add_argument( "--max-colors", default=None, type=int, help="" )
parser.add_argument( "--seed", default=1701, type=int, help="" )
args = parser.parse_args( )

random.seed( args.seed )

c_cmap = args.cmap
c_ntop = args.prioritize
c_nmax = args.max_colors

pairs = []
for row in iter_rows( args.mapping ):
    if len( row ) == 1:
        pairs.append( [0, row[0]] )
    else:
        pairs.append( [-float( row[1] ), row[0]] )
pairs.sort( )

cmap = plt.get_cmap( c_cmap )
cmap_max = cmap.N

# get evenly spaced colors for the top n features
even = [cmap( int( k * cmap_max / (c_ntop - 1) ) ) for k in range( c_ntop )]
random.shuffle( even )

colors = {}
for i, pair in enumerate( pairs ):
Пример #7
0
parser.add_argument("--require-cafa-code", action="store_true")
parser.add_argument("--gene-prefix", default="")
args = parser.parse_args()

#-------------------------------------------------------------------------------
# constants
#-------------------------------------------------------------------------------

cafa_codes = {"EXP", "TAS", "IC"}

#-------------------------------------------------------------------------------
# load gene set
#-------------------------------------------------------------------------------

genes = set()
for row in iter_rows(args.gene_list):
    genes.add(row[0])
say("Loaded", len(genes), "genes")

#-------------------------------------------------------------------------------
# process goa file
#-------------------------------------------------------------------------------
"""
The GOA (.gaf) file is tab-delimited. 
Comment lines start with "!"
Col2 is the uniprot id (a superset of uniref50).
Col5 is the Gene Ontology annotation.
Col4 is a logical modifier of the uniprot->go mapping.
  Must exclude the cases where this is "NOT".
Col7 is a short evidence code
"""
Пример #8
0
parser = argparse.ArgumentParser()
parser.add_argument("tsv_files", nargs="+", help="")
parser.add_argument("--outfile", default=None)
args = parser.parse_args()

wb = xl.Workbook()
sheets = []
for i, p in enumerate(args.tsv_files):
    if i == 0:
        sheets.append(wb.active)
    else:
        sheets.append(wb.create_sheet())
    sheets[-1].title = path2name(p)

for p, ws in zip(args.tsv_files, sheets):
    for i, row in enumerate(iter_rows(p)):
        for j, val in enumerate(row):
            try:
                val = float(val)
            except:
                pass
            ws.cell(row=i + 1, column=j + 1, value=val)

if args.outfile is not None:
    outfile = args.outfile
elif len(args.tsv_files) == 1:
    outfile = "{}.xlsx".format(path2name(args.tsv_files[0]))
else:
    outfile = "multisheet.xlsx"
wb.save(filename=outfile)