def main_validate(args, stdout, stderr) : def mean(x) : return sum(x) * 1. / len(x) if args.outDir is None : args.outDir = "." for inputFile in args.input : try : stderr.write("Processing file " + inputFile + "\n") aln = AlignIO.read(inputFile, "fasta") if args.N_seqs is None or len(aln) <= args.N_seqs : seqScores = pyalign.sequenceConservation(aln) seqsKept = [seq for (seq, score) in zip(aln, seqScores) if score >= args.seqcons] cleanAln = MultipleSeqAlignment(seqsKept) alnCons = mean(pyalign.conservationProfile(cleanAln)) outFile = os.path.join(args.outDir, inputFile) if (len(cleanAln) >= args.n_seqs) and (alnCons >= args.conservation) : with open(outFile, "w") as fo : for seq in cleanAln : fo.write(">" + seq.description + "\n") fo.write(str(seq.seq) + "\n") else : if args.outDir == "." : os.remove(inputFile) else : if args.outDir == "." : os.remove(inputFile) except : stderr.write("Problem with " + inputFile + "\n")
def main_consensus(args, stdout, stderr) : stderr.write("Note that positions with only gaps are removed before " "calculations\n") consensus = dict() profiles = dict() i = 0 total = str(len(args.input)) for fastaFile in args.input : i += 1 stderr.write("Processing file " + str(i) + "/" + total + " ") try : aln = pyalign.AlignIO.read(fastaFile, "fasta") stderr.write("- " + str(len(aln)) + " sequences ") stderr.write(".") aln = pyalign.ungapAln(aln)["ungappedAln"] stderr.write(".") k = os.path.basename(fastaFile) assert not k in consensus consensus[k] = pyalign.makeConsensus(aln) stderr.write(".") stdout.write(">" + k + "\n") stdout.write(consensus[k] + "\n") profiles[k] = pyalign.conservationProfile(aln) stderr.write(".\n") except ValueError : msg = "Problem with " + fastaFile + "\n" stderr.write(msg) keys = list(consensus.keys()) if args.profiles is not None : stderr.write("Writing profiles\n") with open(args.profiles, "w") as fo : for k in keys : fo.write("\t".join([k] + [str(x) for x in profiles[k]]) + "\n") if args.conservation is not None : stderr.write("Writing conservation values\n") def mean(x) : return sum(x) * 1. / len(x) with open(args.conservation, "w") as fo : for k in keys : fo.write(k + "\t" + str(mean(profiles[k])) + "\n")