def main(args): species2counts = {} for species in args['species']: hmmoutfiles = glob.glob(args['hmmoutdir'] + "/" + species + "*.hmmout") if not len(hmmoutfiles) == 1: sys.exit("ERROR: no single hmmout file found for species %s" % species) if args.has_key('pidsfile'): pidsfiles = glob.glob(args['pidsdir'] + "/" + species + "*.gids") if not len(pidsfiles) == 1: sys.exit("ERROR: no single pids file found for species %s" % species) pids = get_pids(pidsfiles[0]).keys() else: pids = False species2counts[species] = did2count(hmmoutfiles[0], pids) ignore = {} if args.has_key('ignorefile'): ignore = get_pids(args['ignorefile']) hash = {} for s, counts in species2counts.iteritems(): for did in counts.keys(): if not ignore.has_key(did): hash[did] = 1 fwt = open("pfam-table", "w") fwt.write(string.join(["DID"] + args['species'], "\t") + "\n") for did in hash.keys(): out = did for species in args['species']: count = 0 if species2counts[species].has_key(did): count = species2counts[species][did] out += "\t" + str(count) fwt.write(out + "\n") fwt.close() fwm = open("pfam-matrix.csv", "w") fwm.write("," + string.join(args['species'], ",") + "\n") for i in range(len(args['species'])): s1 = args['species'][i] fwm.write(s1) for j in range(len(args['species'])): if i == j: fwm.write(",1") continue s2 = args['species'][j] v1, v2 = [], [] for did in hash.keys(): v1.append(species2counts[s1].get(did, 0)) v2.append(species2counts[s2].get(did, 0)) cor, p = stats.correlate(v1, v2) fwm.write("," + str(cor)) #print string.join([s1, s2, str(cor), str(p)], "\t")# fwm.write("\n") fwm.close()
def score_pair(v1, v2, method=1): if method == 1: cor, p = stats.correlate(v1, v2) return cor elif method == 2: x1, x2 = [], [] for i in range(len(v1)): if v1[i] != 0 or v2[i] != 0: x1.append(v1[i]), x2.append(v2[i]) cor, p = stats.correlate(x1, x2) return cor elif method == 3: up = 0 for i in range(len(v1)): if v1[i] != 0 and v2[i] != 0: up += 1 return 1.0 * up / len(v1) elif method == 4: up = 0 for i in range(len(v1)): if v1[i] != 0 and v2[i] != 0: up += 1 return up
def score_pair(v1, v2, method=1): if method == 1: cor, p = stats.correlate(v1, v2) return cor elif method == 2: x1, x2 = [], [] for i in range(len(v1)): if v1[i] != 0 or v2[i] != 0: x1.append(v1[i]), x2.append(v2[i]) cor, p = stats.correlate(x1, x2) return cor elif method == 3: up = 0 for i in range(len(v1)): if v1[i] != 0 and v2[i] != 0: up += 1 return 1.0*up/len(v1) elif method == 4: up = 0 for i in range(len(v1)): if v1[i] != 0 and v2[i] != 0: up += 1 return up
def main( args ): species2counts = {} for species in args['species']: hmmoutfiles = glob.glob(args['hmmoutdir'] + "/" + species + "*.hmmout") if not len(hmmoutfiles) == 1: sys.exit("ERROR: no single hmmout file found for species %s" % species) if args.has_key('pidsfile'): pidsfiles = glob.glob(args['pidsdir'] + "/" + species + "*.gids") if not len(pidsfiles) == 1: sys.exit("ERROR: no single pids file found for species %s" % species) pids = get_pids(pidsfiles[0]).keys() else: pids = False species2counts[species] = did2count(hmmoutfiles[0], pids) ignore = {} if args.has_key('ignorefile'): ignore = get_pids(args['ignorefile']) hash = {} for s, counts in species2counts.iteritems(): for did in counts.keys(): if not ignore.has_key(did): hash[did] = 1 fwt = open("pfam-table", "w") fwt.write(string.join(["DID"] + args['species'], "\t") + "\n") for did in hash.keys(): out = did for species in args['species']: count = 0 if species2counts[species].has_key(did): count = species2counts[species][did] out += "\t" + str(count) fwt.write(out + "\n") fwt.close() fwm = open("pfam-matrix.csv", "w") fwm.write("," + string.join(args['species'], ",") + "\n") for i in range(len(args['species'])): s1 = args['species'][i] fwm.write(s1) for j in range(len(args['species'])): if i == j: fwm.write(",1") continue s2 = args['species'][j] v1, v2 = [], [] for did in hash.keys(): v1.append(species2counts[s1].get(did, 0)) v2.append(species2counts[s2].get(did, 0)) cor, p = stats.correlate(v1, v2) fwm.write("," + str(cor)) #print string.join([s1, s2, str(cor), str(p)], "\t")# fwm.write("\n") fwm.close()