Python scrape_info_from_fname示例，franklin.backbone.analysis.scrape_info_from_fname Python示例

示例#1

0

显示文件

文件： annotation.py 项目： BioinformaticsArchive/franklin

    def _get_b2g_blast(self, input_fpath, goblast_settings):
        'It gets a chopped blast ready for use with blast2go'
        if 'kind' in goblast_settings:
            db_kind = goblast_settings['kind']
        else:
            db_kind = guess_blastdb_kind(goblast_settings['path'])

        seq_type = scrape_info_from_fname(input_fpath)['st']
        blast_program = guess_blast_program(seq_type, db_kind,
                                            prefer_tblastx=True)
        blastdb = goblast_settings['path']

        project_dir = self._project_settings['General_settings']['project_path']
        blast = backbone_blast_runner(query_fpath=input_fpath,
                                            project_dir=project_dir,
                                            blast_program=blast_program,
                                            blast_db=blastdb,
                                            dbtype=db_kind,
                                            threads=self.threads)

        chop_big_xml, num_items = True, 2
        if chop_big_xml:
            #chopped_blast = open('/tmp/blast_itemized.xml', 'w')
            chopped_blast = NamedTemporaryFile(suffix='.xml')
            for blast_parts in xml_itemize(blast, 'Iteration', num_items):
                chopped_blast.write(blast_parts)
            chopped_blast.flush()
            return chopped_blast
        else:
            return open(blast)

示例#2

0

显示文件

文件： create_project_test.py 项目： BioinformaticsArchive/franklin

 def test_scrape_info_from_fname():
     'scrape info from fpath'
     fhand = NamedTemporaryFile(prefix='st_prot.pl_454.A.', suffix='.fasta')
     fhand.write('>seq\nTGATGC')
     fhand.flush()
     info = scrape_info_from_fname(fhand.name)
     assert info['st'] == 'prot'

示例#3

0

显示文件

文件： create_project_test.py 项目： BioinformaticsArchive/franklin

    def test_scrape_info_from_fname(self):
        'it tests scrape_info_from_fname'
        fhand = NamedTemporaryFile(prefix='pl_illumina.sm_test.lb_lib1.',
                                   suffix='sfastq')
        fhand.write(READS_ILL)
        fhand.flush()
        file_info = scrape_info_from_fname(fhand.name)
        assert file_info['lb'] == 'lib1'
        assert file_info['format'] == 'fastq'

        # this should fail
        fhand = NamedTemporaryFile(prefix='pl__illumina.sm_test.lb_lib1.',
                                   suffix='sfastq')
        fhand.write(READS_ILL)
        fhand.flush()
        try:
            file_info = scrape_info_from_fname(fhand.name)
            self.fail()
        except RuntimeError:
            pass

示例#4

0

显示文件

文件： annotation.py 项目： BioinformaticsArchive/franklin

    def run(self):
        'It runs the analysis'
        inputs, output_dirs = self._get_inputs_and_prepare_outputs()
        db_dir = output_dirs['db_dir']
        blast_settings = self._project_settings['blast']

        settings = self._project_settings['Annotation']
        annot_settings = settings['description_annotation']
        description_databases = annot_settings['description_databases']

        general_settings = self._project_settings['General_settings']

        #first we need some blasts
        project_dir = general_settings['project_path']
        blasts = {}
        for input_ in inputs['input']:
            input_fpath = input_.last_version
            for database in description_databases:
                if 'kind' in blast_settings[database]:
                    db_kind = blast_settings[database]['kind']
                else:
                    db_kind = guess_blastdb_kind(blast_settings[database]['path'])

                seq_type = scrape_info_from_fname(input_.last_version)['st']
                blast_program = guess_blast_program(seq_type, db_kind,
                                                    prefer_tblastx=True)

                blastdb = blast_settings[database]['path']
                blast = backbone_blast_runner(query_fpath=input_fpath,
                                                project_dir=project_dir,
                                                blast_program=blast_program,
                                                blast_db=blastdb,
                                                dbtype=db_kind,
                                                threads=self.threads)
                if input_ not in blasts:
                    blasts[input_fpath] = []
                blasts[input_fpath].append({'blast':blast, 'modifier':None})
        #print blasts
        pipeline = []
        configuration = {}
        for database in description_databases:
            step = annotate_with_descriptions
            step['name_in_config'] = database
            pipeline.append(step)
            for input_ in inputs['input']:
                step_config = {'blasts': blasts[input_.last_version]}
                configuration[input_.basename] = {}
                configuration[input_.basename][database] = step_config
        #print configuration
        return self._run_annotation(pipeline=pipeline,
                                    configuration=configuration,
                                    inputs=inputs,
                                    output_dir=db_dir)

示例#5

0

显示文件

文件： cleaning.py 项目： BioinformaticsArchive/franklin

    def run(self):
        '''It runs the analysis. It checks if the analysis is already done per
        input file'''
        logger = logging.getLogger("franklin")
        self._log({'analysis_started': True})
        input_paths = self._get_input_fpaths()['reads']
        output_dir = self._create_output_dirs()['reads']
        for input_path in input_paths:
            input_fpath = str(input_path)
            fname = os.path.split(input_fpath)[-1]
            output_fpath = os.path.join(output_dir, fname)
            if os.path.exists(output_fpath):
                msg = '%s already cleaned. Not cleaned again' % output_fpath
                logger.info(msg)
                continue
            file_info = scrape_info_from_fname(input_path)
            input_fhand = open(input_fpath)
            output_fhand = open(output_fpath, 'w')
            pipeline = self._guess_cleaning_pipepile(file_info)
            infhands = {'in_seq': input_fhand}
            writer = SequenceWriter(output_fhand,
                                    file_format=file_info['format'])

            configuration = self.create_cleaning_configuration(
                                                      platform=file_info['pl'],
                                                      library=file_info['lb'])
            try:
                seq_pipeline_runner(pipeline, configuration, infhands,
                                    file_info['format'],
                                    processes=self.threads,
                                    writers={'seq': writer})
            except Exception as error:
                output_fhand.close()
                os.remove(output_fpath)
                raise(error)
            output_fhand.close()
            input_fhand.close()

        self._log({'analysis_finished': True})
        return

示例#6

0

显示文件

    def run(self):
        '''It runs the analysis.'''
        self._log({'analysis_started':True})
        project_settings = self._project_settings
        settings = project_settings['Mappers']
        tmp_dir  = project_settings['General_settings']['tmpdir']
        project_path = project_settings['General_settings']['project_path']
        unmapped_fhand = None
        if 'keep_unmapped_reads_in_bam' in settings:
            if settings['keep_unmapped_reads_in_bam'] == False:
                unmapped_fpath = os.path.join(project_path,
                                            BACKBONE_DIRECTORIES['mappings'][0],
                                            BACKBONE_BASENAMES['unmapped_list'])
                unmapped_fhand = GzipFile(unmapped_fpath, 'w')
        inputs = self._get_input_fpaths()
        reads_fpaths = inputs['reads']
        output_dir = self._create_output_dirs(timestamped=True)['result']

        # define color and sequence references
        reference_path    = inputs['reference']
        mapping_index_dir = inputs['mapping_index']
        #print reference_path, mapping_index_dir

        #memory for the java programs
        java_mem = self._project_settings['Other_settings']['java_memory']
        picard_path = self._project_settings['Other_settings']['picard_path']

        for read_fpath in reads_fpaths:
            mapping_parameters = {}
            read_info = scrape_info_from_fname(read_fpath)
            platform = read_info['pl']
            #which maper are we using for this platform
            mapper = settings['mapper_for_%s' % platform]

            (reference_fpath,
             color_space) = self._prepare_mapper_index(mapping_index_dir,
                                                      reference_path,
                                                      platform, mapper)

            mapping_parameters['unmapped_fhand'] = unmapped_fhand
            mapping_parameters['colorspace'] = color_space
            out_bam_fpath = os.path.join(output_dir,
                                         read_fpath.basename + '.bam')

            if platform in ('454', 'sanger'):
                mapping_parameters['reads_length'] = 'long'
            else:
                mapping_parameters['reads_length'] = 'short'

            if not os.path.exists(out_bam_fpath):
                mapping_parameters['threads']   = self.threads
                mapping_parameters['java_conf'] = {'java_memory':java_mem,
                                                   'picard_path':picard_path}
                mapping_parameters['tmp_dir'] = tmp_dir
                map_reads(mapper,
                          reads_fpath=read_fpath.last_version,
                          reference_fpath=reference_fpath,
                          out_bam_fpath=out_bam_fpath,
                          parameters=mapping_parameters)

        # Now we run the select _last mapping
        self._spawn_analysis(DEFINITIONS['_select_last_mapping'],
                             silent=self._silent)

        self._log({'analysis_finished':True})

示例#7

0

显示文件

文件： annotation.py 项目： BioinformaticsArchive/franklin

    def run(self):
        'It runs the analysis'
        inputs, output_dirs = self._get_inputs_and_prepare_outputs()
        output_dir = output_dirs['result']
        blast_settings = self._project_settings['blast']
        settings = self._project_settings['Annotation']['ortholog_annotation']
        ortholog_databases = settings['ortholog_databases']


        general_settings = self._project_settings['General_settings']
        project_dir = general_settings['project_path']

        #first we need some blasts
        blasts = {}
        for input_ in inputs['input']:
            for database in ortholog_databases:
                if 'kind' in blast_settings[database]:
                    db_kind = blast_settings[database]['kind']
                else:
                    db_kind = guess_blastdb_kind(blast_settings[database]['path'])

                seq_type = scrape_info_from_fname(input_.last_version)['st']
                blast_program = guess_blast_program(seq_type, db_kind,
                                                    prefer_tblastx=True)

                blastdb = blast_settings[database]['path']
                if 'subj_def_as_acc' in blast_settings[database]:
                    subj_def_as_acc = blast_settings[database]['subj_def_as_acc']
                else:
                    subj_def_as_acc = None


                #this could be different adding something to the settings
                blastdb_seq_fpath = blastdb
                blast = backbone_blast_runner(query_fpath=input_.last_version,
                                              project_dir=project_dir,
                                              blast_program=blast_program,
                                              blast_db=blastdb,
                                              dbtype=db_kind,
                                              threads=self.threads)

                blast = {'fpath':blast,
                         'subj_def_as_acc': subj_def_as_acc}
                blast_program = guess_blast_program(db_kind, seq_type,
                                                    prefer_tblastx=True)
                reverse_blast = backbone_blast_runner(
                                              query_fpath=blastdb_seq_fpath,
                                              project_dir=project_dir,
                                              blast_program=blast_program,
                                              blast_db_seq=input_.last_version,
                                              dbtype='nucl',
                                              threads=self.threads)
                reverse_blast = {'fpath':reverse_blast,
                                  'subj_def_as_acc':None}

                if input_ not in blasts:
                    blasts[input_] = {}
                blasts[input_][database] = {'blast':blast,
                                            'reverse_blast':reverse_blast}

        pipeline = []
        configuration = {}
        for database in ortholog_databases:
            step = copy.deepcopy(annotate_orthologs)
            step['name_in_config'] = database
            #an annotation step for every ortholog database
            pipeline.append(step)
            for input_ in inputs['input']:
                reverse_blast = ''
                step_config = {
                    'blast':{'blast': blasts[input_][database]['blast']['fpath'],
                             'subj_def_as_acc':blasts[input_][database]['blast']['subj_def_as_acc']},
                    'reverse_blast':{'blast':
                                     blasts[input_][database]['reverse_blast']['fpath'],
                                     'subj_def_as_acc':blasts[input_][database]['reverse_blast']['subj_def_as_acc']},
                    'species': database}
                if input_.basename not in configuration:
                    configuration[input_.basename] = {}
                configuration[input_.basename][database] = step_config

        return self._run_annotation(pipeline=pipeline,
                                    configuration=configuration,
                                    inputs=inputs,
                                    output_dir=output_dir)