names_number = 0 for line in f: if line[0] == ">": #write new name to name file and phylip document h.write(new_name + "\n") g.write("\n" + new_name + " ") #write this line to name file (minus >) h.write(line.strip('>')) #set up next name names_number = names_number + 1 new_name = new_name[:-len(str(int(names_number)))] + str(names_number) else: g.write(''.join(line.split())) dante.log ("Program ran: FASTAtoPHYLIPandNAMES.py") dante.log ("Input file: " + file_name) dante.log ("Output file 1: " + file_name_phylip) dante.log ("Output file 2: " + file_name_names) except: traceback.print_exc(file=sys.stdout) exit(0) #TEST COMMAND # python link/dante/FASTAtoPHYLIPandNAMES.py link/dante/testfiles/TestAlign.fasta
#REQUIRES BIOPYTHON TO BE INSTALLED #REQUIRES DANTE MODULE import sys #for helping with command line interface import dante #main dante functions import traceback #for helping with errors try: dante.log("Running BLAST_Summary.py") #allows multiple inputs at command line or will ask for an input file #gets a list of fasta files for filename in dante.makeFileList(sys.argv): file_name_new = dante.makeNewFileName ("Desktop/Output", filename,".summary.tsv") dante.log(str("Original File Name: " + filename)) dante.log(str("New File Name: " + file_name_new)) dante.BLASTSummary(filename, file_name_new) print "_____Summary_____" print "For filename: ", filename print "Output file: ", file_name_new except: traceback.print_exc(file=sys.stdout) dante.log(traceback.format_exc()) exit(0)
#REQUIRES BIOPYTHON TO BE INSTALLED #REQUIRES NCBI+ TO BE INSTALLED #REQUIRES DANTE MODULE from Bio.Blast import NCBIXML from Bio.Blast.Applications import NcbiblastnCommandline import sys #for helping with command line interface import dante #main dante functions import traceback #for helping with errors try: dante.log("Running BLAST.GetCloseReps.py") #Get search information from user rep_number = dante.getNumber("How many representatives do you want?") dante.log("How many representatives do you want?") dante.log(rep_number) database = str(raw_input("Which database do you want to search?")) dante.log("Which database do you want to search?") blast_db_format = 5 #exports hits as XML dante.log("Database format") dante.log(blast_db_format) #allows multiple inputs at command line or will ask for an input file #gets a list of fasta files for filename in dante.makeFileList(sys.argv): dante.log("Input Filename") dante.log(filename) file_name_xml = dante.makeNewFileName("Desktop/Output", filename, str(".BLAST." + database + ".top" + str(rep_number) + "hits.xml"))
result_handle = open(filename,'r') #open the xml file for reading blast_records = NCBIXML.parse(result_handle) #parses the file to a blast_records object total = 0 for record in blast_records: #go through every record generated for alignment in record.alignments: name_set.add(alignment.title.split('|')[1]) #adds id number to set (removes duplicates) new_file = dante.makeNewFileName ('Desktop/Output', filename, "seqs.fasta") with open(new_file,'w') as f: for value in name_set: #walks through every id number #Biopython for retreving fasta files handle = Entrez.efetch(db="nucleotide", id=value, rettype="fasta", retmode="text") f.write(handle.read()) dante.log("Program Ran: BlastXMLtoFasta.py") dante.log("Input file: " + filename) dante.log("Output file:" + new_file) except: traceback.print_exc(file=sys.stdout) exit(0) #Test File # python link/dante/BlastXMLtoFasta.py link/dante/testfiles/TestBLAST.xml