def test_blast_seq_against_bad_db(self):
        'We can blast a seq file against a database'
        test_dir = NamedTemporaryDir()
        project_name = 'backbone'

        create_project(directory=test_dir.name,
                                       name=project_name)
        project_dir = join(test_dir.name, project_name)

        #some query fasta file
        query = '>seq1\nGATCGGCCTTCTTGCGCATCTCACGCGCTCCTGCGGCGGCCTGTAGGGCAGGCT'
        query += 'CATACCCCTGCCGAACCGCTTTTGTCA|n'
        query_fhand = NamedTemporaryFile(mode='w')
        query_fhand.write(query)
        query_fhand.flush()

        #the blast db
        blast_db_fname = 'uni'
        blast_db = join(TEST_DATA_DIR, 'blast', blast_db_fname)

        blast_program = 'blastn'
        try:
            backbone_blast_runner(query_fpath=query_fhand.name,
                                  project_dir=project_dir,
                                  blast_program=blast_program,
                                  blast_db=blast_db)
            self.fail('RuntimeError expected')
        except RuntimeError:
            pass
        test_dir.close()
    def test_blast_seq_against_db():
        'We can blast a seq file against a database'
        test_dir = NamedTemporaryDir()
        project_name = 'backbone'

        create_project(directory=test_dir.name,
                                       name=project_name)
        project_dir = join(test_dir.name, project_name)

        #some query fasta file
        query = '>seq1\nGATCGGCCTTCTTGCGCATCTCACGCGCTCCTGCGGCGGCCTGTAGGGCAGGCT'
        query += 'CATACCCCTGCCGAACCGCTTTTGTCA|n'
        query_fhand = NamedTemporaryFile(mode='w')
        query_fhand.write(query)
        query_fhand.flush()

        #the blast db
        blast_db_fname = 'univec+'
        blast_db = join(TEST_DATA_DIR, 'blast', blast_db_fname)

        blast_program = 'blastn'
        backbone_blast_runner(query_fpath=query_fhand.name,
                              project_dir=project_dir,
                              blast_program=blast_program,
                              blast_db=blast_db)

        #is the blast ok?
        blast_fpath = join(project_dir,
                           BACKBONE_DIRECTORIES['blast_dir'],
                           _get_basename(query_fhand.name),
                           blast_db_fname,
                           '%s.%s.xml' % (BACKBONE_BASENAMES['blast_basename'],
                                          blast_program))
        assert '<Hit_def>vec1</Hit_def>' in open(blast_fpath).read()
        test_dir.close()
    def _get_b2g_blast(self, input_fpath, goblast_settings):
        'It gets a chopped blast ready for use with blast2go'
        if 'kind' in goblast_settings:
            db_kind = goblast_settings['kind']
        else:
            db_kind = guess_blastdb_kind(goblast_settings['path'])

        seq_type = scrape_info_from_fname(input_fpath)['st']
        blast_program = guess_blast_program(seq_type, db_kind,
                                            prefer_tblastx=True)
        blastdb = goblast_settings['path']

        project_dir = self._project_settings['General_settings']['project_path']
        blast = backbone_blast_runner(query_fpath=input_fpath,
                                            project_dir=project_dir,
                                            blast_program=blast_program,
                                            blast_db=blastdb,
                                            dbtype=db_kind,
                                            threads=self.threads)

        chop_big_xml, num_items = True, 2
        if chop_big_xml:
            #chopped_blast = open('/tmp/blast_itemized.xml', 'w')
            chopped_blast = NamedTemporaryFile(suffix='.xml')
            for blast_parts in xml_itemize(blast, 'Iteration', num_items):
                chopped_blast.write(blast_parts)
            chopped_blast.flush()
            return chopped_blast
        else:
            return open(blast)
    def run(self):
        'It runs the analysis'
        inputs, output_dirs = self._get_inputs_and_prepare_outputs()
        db_dir = output_dirs['db_dir']
        blast_settings = self._project_settings['blast']

        settings = self._project_settings['Annotation']
        annot_settings = settings['description_annotation']
        description_databases = annot_settings['description_databases']

        general_settings = self._project_settings['General_settings']

        #first we need some blasts
        project_dir = general_settings['project_path']
        blasts = {}
        for input_ in inputs['input']:
            input_fpath = input_.last_version
            for database in description_databases:
                if 'kind' in blast_settings[database]:
                    db_kind = blast_settings[database]['kind']
                else:
                    db_kind = guess_blastdb_kind(blast_settings[database]['path'])

                seq_type = scrape_info_from_fname(input_.last_version)['st']
                blast_program = guess_blast_program(seq_type, db_kind,
                                                    prefer_tblastx=True)

                blastdb = blast_settings[database]['path']
                blast = backbone_blast_runner(query_fpath=input_fpath,
                                                project_dir=project_dir,
                                                blast_program=blast_program,
                                                blast_db=blastdb,
                                                dbtype=db_kind,
                                                threads=self.threads)
                if input_ not in blasts:
                    blasts[input_fpath] = []
                blasts[input_fpath].append({'blast':blast, 'modifier':None})
        #print blasts
        pipeline = []
        configuration = {}
        for database in description_databases:
            step = annotate_with_descriptions
            step['name_in_config'] = database
            pipeline.append(step)
            for input_ in inputs['input']:
                step_config = {'blasts': blasts[input_.last_version]}
                configuration[input_.basename] = {}
                configuration[input_.basename][database] = step_config
        #print configuration
        return self._run_annotation(pipeline=pipeline,
                                    configuration=configuration,
                                    inputs=inputs,
                                    output_dir=db_dir)
    def test_blast_seq_against_seq_db():
        'We can blast a seq file against a sequence file database'
        test_dir = NamedTemporaryDir()
        project_name = 'backbone'

        create_project(directory=test_dir.name,
                                       name=project_name)
        project_dir = join(test_dir.name, project_name)

        #some query fasta file
        query = '>seq1\nGATCGGCCTTCTTGCGCATCTCACGCGCTCCTGCGGCGGCCTGTAGGGCAGGCT'
        query += 'CATACCCCTGCCGAACCGCTTTTGTCA\n'
        query_fhand = NamedTemporaryFile(mode='w')
        query_fhand.write(query)
        query_fhand.flush()

        #the blast db
        blast = '@seq\nGATCGGCCTTCTTGCGCATCTCACGCGCTCCTGCGGCGGCCTGTAGGGCAGGC\n'
        blast += '+\n'
        blast += '11111111111111111111111111111111111111111111111111111\n'
        blast_db_fhand = NamedTemporaryFile(mode='w', suffix='.sfastq')
        blast_db_fhand.write(blast)
        blast_db_fhand.flush()

        blast_program = 'blastn'
        backbone_blast_runner(query_fpath=query_fhand.name,
                              project_dir=project_dir,
                              blast_program=blast_program,
                              blast_db_seq=blast_db_fhand.name)

        #is the blast ok?
        blast_fpath = join(project_dir,
                           BACKBONE_DIRECTORIES['blast_dir'],
                           _get_basename(query_fhand.name),
                           _get_basename(blast_db_fhand.name),
                           '%s.%s.xml' % (BACKBONE_BASENAMES['blast_basename'],
                                          blast_program))
        result = open(blast_fpath).read()
        assert '<Hit_num>1</Hit_num>' in result
    def run(self):
        'It runs the analysis'
        inputs, output_dirs = self._get_inputs_and_prepare_outputs()
        output_dir = output_dirs['result']
        blast_settings = self._project_settings['blast']
        settings = self._project_settings['Annotation']['ortholog_annotation']
        ortholog_databases = settings['ortholog_databases']


        general_settings = self._project_settings['General_settings']
        project_dir = general_settings['project_path']

        #first we need some blasts
        blasts = {}
        for input_ in inputs['input']:
            for database in ortholog_databases:
                if 'kind' in blast_settings[database]:
                    db_kind = blast_settings[database]['kind']
                else:
                    db_kind = guess_blastdb_kind(blast_settings[database]['path'])

                seq_type = scrape_info_from_fname(input_.last_version)['st']
                blast_program = guess_blast_program(seq_type, db_kind,
                                                    prefer_tblastx=True)

                blastdb = blast_settings[database]['path']
                if 'subj_def_as_acc' in blast_settings[database]:
                    subj_def_as_acc = blast_settings[database]['subj_def_as_acc']
                else:
                    subj_def_as_acc = None


                #this could be different adding something to the settings
                blastdb_seq_fpath = blastdb
                blast = backbone_blast_runner(query_fpath=input_.last_version,
                                              project_dir=project_dir,
                                              blast_program=blast_program,
                                              blast_db=blastdb,
                                              dbtype=db_kind,
                                              threads=self.threads)

                blast = {'fpath':blast,
                         'subj_def_as_acc': subj_def_as_acc}
                blast_program = guess_blast_program(db_kind, seq_type,
                                                    prefer_tblastx=True)
                reverse_blast = backbone_blast_runner(
                                              query_fpath=blastdb_seq_fpath,
                                              project_dir=project_dir,
                                              blast_program=blast_program,
                                              blast_db_seq=input_.last_version,
                                              dbtype='nucl',
                                              threads=self.threads)
                reverse_blast = {'fpath':reverse_blast,
                                  'subj_def_as_acc':None}

                if input_ not in blasts:
                    blasts[input_] = {}
                blasts[input_][database] = {'blast':blast,
                                            'reverse_blast':reverse_blast}

        pipeline = []
        configuration = {}
        for database in ortholog_databases:
            step = copy.deepcopy(annotate_orthologs)
            step['name_in_config'] = database
            #an annotation step for every ortholog database
            pipeline.append(step)
            for input_ in inputs['input']:
                reverse_blast = ''
                step_config = {
                    'blast':{'blast': blasts[input_][database]['blast']['fpath'],
                             'subj_def_as_acc':blasts[input_][database]['blast']['subj_def_as_acc']},
                    'reverse_blast':{'blast':
                                     blasts[input_][database]['reverse_blast']['fpath'],
                                     'subj_def_as_acc':blasts[input_][database]['reverse_blast']['subj_def_as_acc']},
                    'species': database}
                if input_.basename not in configuration:
                    configuration[input_.basename] = {}
                configuration[input_.basename][database] = step_config

        return self._run_annotation(pipeline=pipeline,
                                    configuration=configuration,
                                    inputs=inputs,
                                    output_dir=output_dir)