else: if not ins.endswith("/"): ins = ins + "/" filelist = os.listdir(ins) tot_pos = 0 numlines = len(filelist) numbars = 0 donepercent = [] i = 0 for each in filelist: if disp_file == 0: numbars, donepercent = core.loadingBar(i, numlines, donepercent, numbars) elif disp_file == 1: print "----------" print each print "Sequence\tLength" i = i + 1 if each.find(".fa") == -1: continue specpos = 0 infilename = ins + each inseqs = core.fastaGetDict(infilename)
if a.find(".fa") != -1: num_aligns = num_aligns + 1; if v == 0: gb_logfile = outdir + "gblocks.log"; core.logCheck(l, logfilename, core.getTime() + " | Runnning GBlocks on " + str(num_aligns) + " alignments..."); acc_mask = 0; i = 0; numbars = 0; donepercent = []; for each in filelist: if v == 0 and fileflag == 0: numbars, donepercent = core.loadingBar(i, len(filelist), donepercent, numbars); i = i + 1; if each.find(".fa") == -1: continue; if fileflag == 1: infilename = each; if each.find("/") != -1: gb_outfile = each[each.rfind("/")+1:each.index(".fa")] + "-gb.fa"; else: gb_outfile = each[:each.index(".fa")] + "-gb.fa"; else: infilename = indir + each; gb_outfile = each[:each.index(".fa")] + "-gb.fa";
if v == 0: gb_logfile = outdir + "gblocks.log" core.logCheck( l, logfilename, core.getTime() + " | Runnning GBlocks on " + str(num_aligns) + " alignments...") acc_mask = 0 i = 0 numbars = 0 donepercent = [] for each in filelist: if v == 0 and fileflag == 0: numbars, donepercent = core.loadingBar(i, len(filelist), donepercent, numbars) i = i + 1 if each.find(".fa") == -1: continue if fileflag == 1: infilename = each if each.find("/") != -1: gb_outfile = each[each.rfind("/") + 1:each.index(".fa")] + "-gb.fa" else: gb_outfile = each[:each.index(".fa")] + "-gb.fa" else: infilename = indir + each gb_outfile = each[:each.index(".fa")] + "-gb.fa"
else: if not ins.endswith("/"): ins = ins + "/"; filelist = os.listdir(ins); tot_pos = 0; numlines = len(filelist); numbars = 0; donepercent = []; i = 0; for each in filelist: if disp_file == 0: numbars, donepercent = core.loadingBar(i, numlines, donepercent, numbars); elif disp_file == 1: print "----------"; print each; print "Sequence\tLength"; i = i + 1; if each.find(".fa") == -1: continue; specpos = 0; infilename = ins + each; inseqs = core.fastaGetDict(infilename);
print "# " + core.getTime() + " Retrieving key IDs in both files..." comb_ids = list(set.intersection(*map(set, key_ids.values()))) num_ids = len(comb_ids) print "# -------------------------------------" print "# " + core.getTime( ) + " Retrieving lines with shared key IDs (all combinations)..." i = 0 numbars = 0 donepercent = [] outfile = open(outfilename, "w") for cid in comb_ids: numbars, donepercent = core.loadingBar(i, num_ids, donepercent, numbars) i += 1 cur_id = {} for each in infiles: cur_id[each] = [] for line in file_lines[each]: if line[0] == cid: cur_id[each].append(line) id_vals = [] for each in cur_id: id_vals.append(cur_id[each]) for b in itertools.product(*id_vals): outline = b[0] + b[1][1:] outline = "\t".join(outline) + "\n"
main_seq_dict[spec][new_title] = tmp_seq_dict[spec][title] del tmp_seq_dict print "# -------------------------------------" count = core.getFileLen(infilename) print "# " + core.getTime() + " Combining", count, "orthologs..." i = 0 numbars = 0 donepercent = [] nonorth = 0 for line in open(infilename): numbars, donepercent = core.loadingBar(i, count, donepercent, numbars) tmpline = line.replace("\n", "").split("\t") if i == 0: numspec = len(tmpline) i = i + 1 if len(tmpline) != numspec: print line nonorth = nonroth + 1 continue sn = 0 finalseqs = {} for gid in tmpline: if sn == 0:
fcritcount = 0; ocritcount = 0; zcount = 0; noln_genes = []; i = 0; numfiles = len(filelist); numbars = 0; donepercent = []; print "Performing LRT on PAML output to test for positive selection..."; for each in filelist: if each.find(".fa") == -1: continue; numbars, donepercent = core.loadingBar(i, numfiles, donepercent, numbars); i = i + 1; # gid = each[:each.index(".fa")]; altfilename = os.path.join(altdir + "codeml_out", each, each + ".out"); nullfilename = os.path.join(nulldir + "codeml_out", each, each + ".out"); #Reading the alt file... altfile = open(altfilename, "r"); altlines = altfile.readlines(); altfile.close(); altlnflag = 0; for alt in altlines: if alt[:3] == "lnL": if alt.find("nan") != -1:
fcritcount = 0 ocritcount = 0 zcount = 0 noln_genes = [] i = 0 numfiles = len(filelist) numbars = 0 donepercent = [] print "Performing LRT on PAML output to test for positive selection..." for each in filelist: if each.find(".fa") == -1: continue numbars, donepercent = core.loadingBar(i, numfiles, donepercent, numbars) i = i + 1 gid = each[: each.index(".fa")] altfilename = os.path.join(altdir + "codeml_out", gid, gid + ".out") nullfilename = os.path.join(nulldir + "codeml_out", gid, gid + ".out") # Reading the alt file... altfile = open(altfilename, "r") altlines = altfile.readlines() altfile.close() altlnflag = 0 for alt in altlines: if alt[:3] == "lnL": if alt.find("nan") != -1: