def query_seqs_in_file(filename, new_dir_path): print "printing out sequences!" try: os.stat(new_dir_path) except: os.mkdir(new_dir_path) records = SeqIO.parse(filename, "fastq") i = 0 file_names=[] json_dest = new_dir_path+"data.json" for record in records: print "printing out a sequence!" print "%s: %d" % (record.id, len(record.seq)) xml_obj = blastquery.query(record.seq) # print str(xml_obj.read()) xml_dest=new_dir_path+"file_"+str(i)+".xml" with open(xml_dest,'w') as open_file: open_file.write(xml_obj.read()) open_file.close() file_names.append(xml_dest) species_counts = classifier.getSpeciesFromXMLs(file_names,0) with open(json_dest,'w') as open_file: json.dump(species_counts,open_file) open_file.close() i+=1
import os from classifier import getSpeciesFromXMLs from confusionMat import getConfusionMatrix # Python's open() assumes current working directory, so relative path is okay for this call. # Otherwise, absolute path is needed. xmls = [] for file in os.listdir(os.getcwd()): if file.endswith(".xml"): xmls.append(file) speciesDict = getSpeciesFromXMLs(xmls,1) confusionMat = getConfusionMatrix("Salmo salar", xmls, speciesDict) # Prints out answer to question 3 for key in confusionMat: print key + " : " + str(confusionMat[key])