def test_run_local_alignment(): seq_list = get_seq_list("./NC_000913.fna.gz", "./candidate_list.table") # seq_list holds tuples of (name, seq_obj) seq_file_name = "./tmp/%s.fasta" % seq_list[0][0] base_record = SeqRecord(seq_list[0][1], id=seq_file_name, name=seq_file_name) SeqIO.write(base_record, seq_file_name, "fasta") local_alignment_list = \ run_local_alignment(seq_file_name, "./genome_list.table") print "*" * 100 print "local alignments results" print "*" * 100 for row in local_alignment_list: print row records_for_emma = \ prepare_for_multiple_aignment(base_record, local_alignment_list) print "*" * 100 print "records for emma" print "*" * 100 for record in records_for_emma: print record print "*" * 100 print "Running Emma" print "*" * 100 run_multiple_sequence_alignment(records_for_emma)
def run(base_genome_file, candidate_list_file, workdir, genome_list_file, project_to_taxamonies_file, tree_file): os.system("mkdir -p %s" % workdir) os.system("mkdir -p %s" % os.path.join(workdir, "logs")) os.system("mkdir -p %s" % os.path.join(workdir, "aln")) os.system("mkdir -p %s" % os.path.join(workdir, "stats")) os.system("mkdir -p %s" % os.path.join(workdir, "results")) candidates_path = os.path.join(workdir, "candiates") os.system("mkdir -p %s" % candidates_path) seq_list = get_seq_list(base_genome_file, candidate_list_file) conv_table = build_project_id_to_tax_id_dictionary(project_to_taxamonies_file) secondary_conv_table = \ build_secondary_project_id_to_tax_id_dictionary(project_to_taxamonies_file) # seq_list holds tuples of (name, seq_obj) for seq in seq_list: candid_workdir = os.path.join(candidates_path, seq[0]) os.system("mkdir -p %s" % candid_workdir) print seq # Generate sequence file seq_file_name = "%s/%s.fasta" % (candid_workdir, seq[0]) base_record = SeqRecord(seq[1], id=seq[0], name=seq[0]) SeqIO.write(base_record, seq_file_name, "fasta") # Create local alignments for the sequence local_alignment_list = \ run_local_alignment(seq_file_name, genome_list_file, WORK_DIR) print "*" * 100 print "local alignments results" print "*" * 100 for row in local_alignment_list: print row # Generate histogram generate_score_histogram("%s/stats/%s.png" % (workdir, seq[0]), local_alignment_list) # Exctract only the significant alignments records_for_emma = \ prepare_for_multiple_aignment(local_alignment_list, conv_table, secondary_conv_table) print "*" * 100 print "records for emma" print "*" * 100 for record in records_for_emma: print record print "*" * 100 print "Running Emma" print "*" * 100 # Run emma run_multiple_sequence_alignment(records_for_emma, "%s/emma.aln" % candid_workdir, "%s/emma.dnd" % candid_workdir, candid_workdir) # Run Rate4Site rate_runner = Rate4Site("%s/emma.aln" % candid_workdir, tree_file, "./rate4site64") rate_runner.runRate(outname="%s/results/%s.rate" % (workdir, seq[0]))