def _get_b2g_blast(self, input_fpath, goblast_settings): 'It gets a chopped blast ready for use with blast2go' if 'kind' in goblast_settings: db_kind = goblast_settings['kind'] else: db_kind = guess_blastdb_kind(goblast_settings['path']) seq_type = scrape_info_from_fname(input_fpath)['st'] blast_program = guess_blast_program(seq_type, db_kind, prefer_tblastx=True) blastdb = goblast_settings['path'] project_dir = self._project_settings['General_settings']['project_path'] blast = backbone_blast_runner(query_fpath=input_fpath, project_dir=project_dir, blast_program=blast_program, blast_db=blastdb, dbtype=db_kind, threads=self.threads) chop_big_xml, num_items = True, 2 if chop_big_xml: #chopped_blast = open('/tmp/blast_itemized.xml', 'w') chopped_blast = NamedTemporaryFile(suffix='.xml') for blast_parts in xml_itemize(blast, 'Iteration', num_items): chopped_blast.write(blast_parts) chopped_blast.flush() return chopped_blast else: return open(blast)
def test_scrape_info_from_fname(): 'scrape info from fpath' fhand = NamedTemporaryFile(prefix='st_prot.pl_454.A.', suffix='.fasta') fhand.write('>seq\nTGATGC') fhand.flush() info = scrape_info_from_fname(fhand.name) assert info['st'] == 'prot'
def test_scrape_info_from_fname(self): 'it tests scrape_info_from_fname' fhand = NamedTemporaryFile(prefix='pl_illumina.sm_test.lb_lib1.', suffix='sfastq') fhand.write(READS_ILL) fhand.flush() file_info = scrape_info_from_fname(fhand.name) assert file_info['lb'] == 'lib1' assert file_info['format'] == 'fastq' # this should fail fhand = NamedTemporaryFile(prefix='pl__illumina.sm_test.lb_lib1.', suffix='sfastq') fhand.write(READS_ILL) fhand.flush() try: file_info = scrape_info_from_fname(fhand.name) self.fail() except RuntimeError: pass
def run(self): 'It runs the analysis' inputs, output_dirs = self._get_inputs_and_prepare_outputs() db_dir = output_dirs['db_dir'] blast_settings = self._project_settings['blast'] settings = self._project_settings['Annotation'] annot_settings = settings['description_annotation'] description_databases = annot_settings['description_databases'] general_settings = self._project_settings['General_settings'] #first we need some blasts project_dir = general_settings['project_path'] blasts = {} for input_ in inputs['input']: input_fpath = input_.last_version for database in description_databases: if 'kind' in blast_settings[database]: db_kind = blast_settings[database]['kind'] else: db_kind = guess_blastdb_kind(blast_settings[database]['path']) seq_type = scrape_info_from_fname(input_.last_version)['st'] blast_program = guess_blast_program(seq_type, db_kind, prefer_tblastx=True) blastdb = blast_settings[database]['path'] blast = backbone_blast_runner(query_fpath=input_fpath, project_dir=project_dir, blast_program=blast_program, blast_db=blastdb, dbtype=db_kind, threads=self.threads) if input_ not in blasts: blasts[input_fpath] = [] blasts[input_fpath].append({'blast':blast, 'modifier':None}) #print blasts pipeline = [] configuration = {} for database in description_databases: step = annotate_with_descriptions step['name_in_config'] = database pipeline.append(step) for input_ in inputs['input']: step_config = {'blasts': blasts[input_.last_version]} configuration[input_.basename] = {} configuration[input_.basename][database] = step_config #print configuration return self._run_annotation(pipeline=pipeline, configuration=configuration, inputs=inputs, output_dir=db_dir)
def run(self): '''It runs the analysis. It checks if the analysis is already done per input file''' logger = logging.getLogger("franklin") self._log({'analysis_started': True}) input_paths = self._get_input_fpaths()['reads'] output_dir = self._create_output_dirs()['reads'] for input_path in input_paths: input_fpath = str(input_path) fname = os.path.split(input_fpath)[-1] output_fpath = os.path.join(output_dir, fname) if os.path.exists(output_fpath): msg = '%s already cleaned. Not cleaned again' % output_fpath logger.info(msg) continue file_info = scrape_info_from_fname(input_path) input_fhand = open(input_fpath) output_fhand = open(output_fpath, 'w') pipeline = self._guess_cleaning_pipepile(file_info) infhands = {'in_seq': input_fhand} writer = SequenceWriter(output_fhand, file_format=file_info['format']) configuration = self.create_cleaning_configuration( platform=file_info['pl'], library=file_info['lb']) try: seq_pipeline_runner(pipeline, configuration, infhands, file_info['format'], processes=self.threads, writers={'seq': writer}) except Exception as error: output_fhand.close() os.remove(output_fpath) raise(error) output_fhand.close() input_fhand.close() self._log({'analysis_finished': True}) return
def run(self): '''It runs the analysis.''' self._log({'analysis_started':True}) project_settings = self._project_settings settings = project_settings['Mappers'] tmp_dir = project_settings['General_settings']['tmpdir'] project_path = project_settings['General_settings']['project_path'] unmapped_fhand = None if 'keep_unmapped_reads_in_bam' in settings: if settings['keep_unmapped_reads_in_bam'] == False: unmapped_fpath = os.path.join(project_path, BACKBONE_DIRECTORIES['mappings'][0], BACKBONE_BASENAMES['unmapped_list']) unmapped_fhand = GzipFile(unmapped_fpath, 'w') inputs = self._get_input_fpaths() reads_fpaths = inputs['reads'] output_dir = self._create_output_dirs(timestamped=True)['result'] # define color and sequence references reference_path = inputs['reference'] mapping_index_dir = inputs['mapping_index'] #print reference_path, mapping_index_dir #memory for the java programs java_mem = self._project_settings['Other_settings']['java_memory'] picard_path = self._project_settings['Other_settings']['picard_path'] for read_fpath in reads_fpaths: mapping_parameters = {} read_info = scrape_info_from_fname(read_fpath) platform = read_info['pl'] #which maper are we using for this platform mapper = settings['mapper_for_%s' % platform] (reference_fpath, color_space) = self._prepare_mapper_index(mapping_index_dir, reference_path, platform, mapper) mapping_parameters['unmapped_fhand'] = unmapped_fhand mapping_parameters['colorspace'] = color_space out_bam_fpath = os.path.join(output_dir, read_fpath.basename + '.bam') if platform in ('454', 'sanger'): mapping_parameters['reads_length'] = 'long' else: mapping_parameters['reads_length'] = 'short' if not os.path.exists(out_bam_fpath): mapping_parameters['threads'] = self.threads mapping_parameters['java_conf'] = {'java_memory':java_mem, 'picard_path':picard_path} mapping_parameters['tmp_dir'] = tmp_dir map_reads(mapper, reads_fpath=read_fpath.last_version, reference_fpath=reference_fpath, out_bam_fpath=out_bam_fpath, parameters=mapping_parameters) # Now we run the select _last mapping self._spawn_analysis(DEFINITIONS['_select_last_mapping'], silent=self._silent) self._log({'analysis_finished':True})
def run(self): 'It runs the analysis' inputs, output_dirs = self._get_inputs_and_prepare_outputs() output_dir = output_dirs['result'] blast_settings = self._project_settings['blast'] settings = self._project_settings['Annotation']['ortholog_annotation'] ortholog_databases = settings['ortholog_databases'] general_settings = self._project_settings['General_settings'] project_dir = general_settings['project_path'] #first we need some blasts blasts = {} for input_ in inputs['input']: for database in ortholog_databases: if 'kind' in blast_settings[database]: db_kind = blast_settings[database]['kind'] else: db_kind = guess_blastdb_kind(blast_settings[database]['path']) seq_type = scrape_info_from_fname(input_.last_version)['st'] blast_program = guess_blast_program(seq_type, db_kind, prefer_tblastx=True) blastdb = blast_settings[database]['path'] if 'subj_def_as_acc' in blast_settings[database]: subj_def_as_acc = blast_settings[database]['subj_def_as_acc'] else: subj_def_as_acc = None #this could be different adding something to the settings blastdb_seq_fpath = blastdb blast = backbone_blast_runner(query_fpath=input_.last_version, project_dir=project_dir, blast_program=blast_program, blast_db=blastdb, dbtype=db_kind, threads=self.threads) blast = {'fpath':blast, 'subj_def_as_acc': subj_def_as_acc} blast_program = guess_blast_program(db_kind, seq_type, prefer_tblastx=True) reverse_blast = backbone_blast_runner( query_fpath=blastdb_seq_fpath, project_dir=project_dir, blast_program=blast_program, blast_db_seq=input_.last_version, dbtype='nucl', threads=self.threads) reverse_blast = {'fpath':reverse_blast, 'subj_def_as_acc':None} if input_ not in blasts: blasts[input_] = {} blasts[input_][database] = {'blast':blast, 'reverse_blast':reverse_blast} pipeline = [] configuration = {} for database in ortholog_databases: step = copy.deepcopy(annotate_orthologs) step['name_in_config'] = database #an annotation step for every ortholog database pipeline.append(step) for input_ in inputs['input']: reverse_blast = '' step_config = { 'blast':{'blast': blasts[input_][database]['blast']['fpath'], 'subj_def_as_acc':blasts[input_][database]['blast']['subj_def_as_acc']}, 'reverse_blast':{'blast': blasts[input_][database]['reverse_blast']['fpath'], 'subj_def_as_acc':blasts[input_][database]['reverse_blast']['subj_def_as_acc']}, 'species': database} if input_.basename not in configuration: configuration[input_.basename] = {} configuration[input_.basename][database] = step_config return self._run_annotation(pipeline=pipeline, configuration=configuration, inputs=inputs, output_dir=output_dir)