def run(): file_step1 = './02-pipeline/step6.fna' """Retrieve known sequences""" knownseqfile = known() """Check if a BLAST DB of step7_knownseq already exists; Make BLAST DB if it doesn't""" blastdb = blast.check(knownseqfile) """BLAST unique mature peptides against known sequences""" blastoutfile = './02-pipeline/step7_blastp.csv' blast.blastp(blastdb, file_step1, blastoutfile) """Parse the BLASTp output file""" print 'Parsing BLASTp output...\n' blastout = [] for b in blast.parse( blastoutfile): #[cds_id, known sequence, %ID, length, evalue] blastout.append([str(b[0]), b[1], b[2], b[3], b[4]]) """Populate the SQLite Annotated table with BLASTp results""" print 'Populating the Annotated table in the SQLite DB.' count = 0 for b in blastout: c = mysqlpop.annotated(b) count += c print '{} hits have been entered in the SQLite annotated table.'.format( count) print 'DATA ENTRY INTO MYSQL ANNOTATED TABLE IS COMPLETE' filename = './02-pipeline/step7.csv' header = ['cds_id', 'knownNP id', 'PID', 'length', 'evalue'] output.csv(filename, header, blastout) print 'The BLASTp results have were written to {}.\n'.format(filename)
def main(): """ Función que ejecuta todas las demas funciones """ # Control de argumentos try: query, subject, prosite, cov, ide = ctrl_arguments() es_fasta(query) results() except: sys.exit() # Blastp try: print("EJECUTANDO MODULO BLASTP...") multifasta = bl.convert(subject) db = bl.data_base(multifasta) dict_query = bl.dictionary(query) output = bl.blastp(query, db) bl.blastp_final(output, cov, ide, dict_query) print("\n~ Se ha realizado Blastp\n") except: print("\n~ No se ha realizado Blastp\n") sys.exit() # Muscle try: print("EJECUTANDO MODULO MUSCLE...") mu.muscle() print("\n~ Se ha realizado Muscle\n") except: print("\n~ No se ha realizado Muscle\n") sys.exit() # Dominios try: print("EJECUTANDO MODULO DOMINIOS...") db = do.parseo_dat(prosite) dict_pattern = do.dictionary_patterns(db) do.search_pattern(dict_pattern) print("\n~ Se ha realizado la busqueda de dominios\n") except: print("\n~ No se ha realizado la busqueda de dominios\n") sys.exit() # Graficos de Blastp try: print("EJECUTANDO MODULO GRAFICACIÓN...") gr.result_blast(output, cov, ide, dict_query) gr.graph_blast() gr.archivos_heatmap() gr.graph_heatmap() print("\n~ Se ha realizado la grafiaccion de los resultados" " del Blastp") except: print("\n~ No se ha realizado la graficacion de los resultados" " del Blastp") sys.exit() # Eliminar residuos clear() # Concluir message = (""" -------------------------------------------------------------\n SE HA EJECUTADO EL PROGRAMA CON EXITO\n ------------------------------------------------------------- """) print(message)
#Definición argumentos obligatorios query = sys.argv[1] subject = sys.argv[2] #Definición argumentos opcionales if len(sys.argv) == 3: cov = 50 identity = 25 elif len(sys.argv) == 5: cov = sys.argv[3] identity = sys.argv[4] else: print("Error: number of arguments introduced is not valid") help() sys.exist() #Ejecución del blast.py: bs.multifasta(subject) bs.database() dicti = bs.dictionary(query) output = bs.blastp(query) bs.finalblast(cov, identity, dicti) #Ejecución de muscle.py: mc.muscle() #Ejecución de dominios.py: a = dm.parsear() dictionary = dm.dicti() dm.search(dictionary)
try: subject_info_allDB = pickle.load(open(blast_all_pickle, 'rb')) except (EOFError, KeyError) as e: repr(e) print('Removing pickle file, redoing blast') os.remove(blast_all_pickle) ###### use if not instead of else incase the error above got thrown, and file got removed if not developing or not os.path.isfile(blast_all_pickle): print 'blasting against all' blast.makeBLASTdb(script_path + 'fasta_files' + os.sep + organism_name + '_all_proteins_smaller_than_500aa.fasta', script_path + 'databases' + os.sep + 'allDB_' + organism_name, blast_folder) # make all proteins database blast_records = blast.blastp( interest_proteins_path, script_path + 'databases' + os.sep + 'allDB_' + organism_name, args['eval_all'], blast_folder, script_path + 'blast_results/' + organism_name + '_blastpAllOutput.xml') subject_info_allDB = blast.getSubjectInfo(blast_records, proteins_of_interest, args['eval_all']) if developing: print 'saving in ' + blast_all_pickle if not os.path.exists(script_path + 'blast_results'): os.makedirs(script_path + 'blast_results') f = open(blast_all_pickle, 'wb') pickle.dump(subject_info_allDB, f) print('len subject_info_allDB after < ' + str(args['all_size']) + 'a.a. Protein blast: ' + str(len(subject_info_allDB)) + '\n') subject_info_total = dict(subject_info_allDB.items())
os.makedirs(script_path+'blast_results') blast_all_pickle = script_path+'blast_results'+os.sep+fix_file_names(organism_name+'_blast_all_'+str(args['eval_all'])+'.p') if developing: if os.path.isfile(blast_all_pickle): print 'BLAST records with this e-value against ALL database already exists, loading: '+blast_all_pickle try: subject_info_allDB = pickle.load( open(blast_all_pickle, 'rb' )) except (EOFError, KeyError) as e: repr(e) print('Renoving pickle file, redoing blast') os.remove(blast_all_pickle) ###### use if not instead of else incase the error above got thrown, and file got removed if not developing or not os.path.isfile(blast_all_pickle): print 'blasting against all' blast.makeBLASTdb(script_path+'fasta_files'+os.sep+organism_name + '_all_proteins_smaller_than_500aa.fasta', script_path+'databases'+os.sep+'allDB_'+organism_name, blast_folder) # make all proteins database blast_records = blast.blastp(interest_proteins_path, script_path+'databases'+os.sep+'allDB_'+organism_name, args['eval_all'], blast_folder, script_path+'blast_results/'+organism_name+'_blastpAllOutput.xml') subject_info_allDB = blast.getSubjectInfo(blast_records, proteins_of_interest, args['eval_all']) if developing: print 'saving in '+blast_all_pickle if not os.path.exists(script_path+'blast_results'): os.makedirs(script_path+'blast_results') f = open(blast_all_pickle, 'wb' ) pickle.dump( subject_info_allDB, f ) print('len subject_info_allDB after < '+str(args['all_size'])+'a.a. Protein blast: '+str(len(subject_info_allDB))+'\n') #if developing: # for subject in subject_info_allDB: # Loop over all # print subject_info_allDB[subject]['query_title'] # for prot in not_found: # if prot.lower() in subject.lower(): # print('found in allDb')