def _get_b2g_blast(self, input_fpath, goblast_settings): 'It gets a chopped blast ready for use with blast2go' if 'kind' in goblast_settings: db_kind = goblast_settings['kind'] else: db_kind = guess_blastdb_kind(goblast_settings['path']) seq_type = scrape_info_from_fname(input_fpath)['st'] blast_program = guess_blast_program(seq_type, db_kind, prefer_tblastx=True) blastdb = goblast_settings['path'] project_dir = self._project_settings['General_settings']['project_path'] blast = backbone_blast_runner(query_fpath=input_fpath, project_dir=project_dir, blast_program=blast_program, blast_db=blastdb, dbtype=db_kind, threads=self.threads) chop_big_xml, num_items = True, 2 if chop_big_xml: #chopped_blast = open('/tmp/blast_itemized.xml', 'w') chopped_blast = NamedTemporaryFile(suffix='.xml') for blast_parts in xml_itemize(blast, 'Iteration', num_items): chopped_blast.write(blast_parts) chopped_blast.flush() return chopped_blast else: return open(blast)
def run(self): 'It runs the analysis' inputs, output_dirs = self._get_inputs_and_prepare_outputs() db_dir = output_dirs['db_dir'] blast_settings = self._project_settings['blast'] settings = self._project_settings['Annotation'] annot_settings = settings['description_annotation'] description_databases = annot_settings['description_databases'] general_settings = self._project_settings['General_settings'] #first we need some blasts project_dir = general_settings['project_path'] blasts = {} for input_ in inputs['input']: input_fpath = input_.last_version for database in description_databases: if 'kind' in blast_settings[database]: db_kind = blast_settings[database]['kind'] else: db_kind = guess_blastdb_kind(blast_settings[database]['path']) seq_type = scrape_info_from_fname(input_.last_version)['st'] blast_program = guess_blast_program(seq_type, db_kind, prefer_tblastx=True) blastdb = blast_settings[database]['path'] blast = backbone_blast_runner(query_fpath=input_fpath, project_dir=project_dir, blast_program=blast_program, blast_db=blastdb, dbtype=db_kind, threads=self.threads) if input_ not in blasts: blasts[input_fpath] = [] blasts[input_fpath].append({'blast':blast, 'modifier':None}) #print blasts pipeline = [] configuration = {} for database in description_databases: step = annotate_with_descriptions step['name_in_config'] = database pipeline.append(step) for input_ in inputs['input']: step_config = {'blasts': blasts[input_.last_version]} configuration[input_.basename] = {} configuration[input_.basename][database] = step_config #print configuration return self._run_annotation(pipeline=pipeline, configuration=configuration, inputs=inputs, output_dir=db_dir)
def run(self): 'It runs the analysis' inputs, output_dirs = self._get_inputs_and_prepare_outputs() output_dir = output_dirs['result'] blast_settings = self._project_settings['blast'] settings = self._project_settings['Annotation']['ortholog_annotation'] ortholog_databases = settings['ortholog_databases'] general_settings = self._project_settings['General_settings'] project_dir = general_settings['project_path'] #first we need some blasts blasts = {} for input_ in inputs['input']: for database in ortholog_databases: if 'kind' in blast_settings[database]: db_kind = blast_settings[database]['kind'] else: db_kind = guess_blastdb_kind(blast_settings[database]['path']) seq_type = scrape_info_from_fname(input_.last_version)['st'] blast_program = guess_blast_program(seq_type, db_kind, prefer_tblastx=True) blastdb = blast_settings[database]['path'] if 'subj_def_as_acc' in blast_settings[database]: subj_def_as_acc = blast_settings[database]['subj_def_as_acc'] else: subj_def_as_acc = None #this could be different adding something to the settings blastdb_seq_fpath = blastdb blast = backbone_blast_runner(query_fpath=input_.last_version, project_dir=project_dir, blast_program=blast_program, blast_db=blastdb, dbtype=db_kind, threads=self.threads) blast = {'fpath':blast, 'subj_def_as_acc': subj_def_as_acc} blast_program = guess_blast_program(db_kind, seq_type, prefer_tblastx=True) reverse_blast = backbone_blast_runner( query_fpath=blastdb_seq_fpath, project_dir=project_dir, blast_program=blast_program, blast_db_seq=input_.last_version, dbtype='nucl', threads=self.threads) reverse_blast = {'fpath':reverse_blast, 'subj_def_as_acc':None} if input_ not in blasts: blasts[input_] = {} blasts[input_][database] = {'blast':blast, 'reverse_blast':reverse_blast} pipeline = [] configuration = {} for database in ortholog_databases: step = copy.deepcopy(annotate_orthologs) step['name_in_config'] = database #an annotation step for every ortholog database pipeline.append(step) for input_ in inputs['input']: reverse_blast = '' step_config = { 'blast':{'blast': blasts[input_][database]['blast']['fpath'], 'subj_def_as_acc':blasts[input_][database]['blast']['subj_def_as_acc']}, 'reverse_blast':{'blast': blasts[input_][database]['reverse_blast']['fpath'], 'subj_def_as_acc':blasts[input_][database]['reverse_blast']['subj_def_as_acc']}, 'species': database} if input_.basename not in configuration: configuration[input_.basename] = {} configuration[input_.basename][database] = step_config return self._run_annotation(pipeline=pipeline, configuration=configuration, inputs=inputs, output_dir=output_dir)