示例#1
0
    def __init__(self, input_file_paths, genes_table_structure):
        files_expected = {
            'functions': 'functions.tbl',
            'gene_otus': 'gene_otus.tbl',
            'peg': 'peg.tbl'
        }

        files_structure = {
            'functions': {
                'col_names':
                ['prot', 'figfam', 'field3', 'field4', 'field5', 'function'],
                'col_mapping': [str, str, int, int, int, str]
            },
            'gene_otus': {
                'col_names': ['prot', 't_species'],
                'col_mapping': None
            },
            'peg': {
                'col_names': ['prot', 'contig', 'start', 'stop'],
                'col_mapping': [str, str, int, int]
            },
        }

        self.genes_table_structure = genes_table_structure
        Parser.__init__(self, 'MyRastGUI', input_file_paths, files_expected,
                        files_structure)
示例#2
0
    def __init__(self, input_files, taxonomy_table_structure, run=terminal.Run(), progress=terminal.Progress(), skip_fix_input=False):
        self.run = run
        self.progress = progress
        self.just_do_it = False

        if type(input_files) != type(list()):
            input_files = [input_files]

        if not skip_fix_input:
            input_files[0] = self.fix_input_file(input_files[0])

        files_expected = {'kaiju_output': input_files[0]}

        files_structure = {'kaiju_output':
                                {'col_names': ['_', 'gene_callers_id', '_', '_', '_', '_', '_', 'taxonomy'],
                                 'col_mapping': [str, int, str, int, str, str, str, str],
                                 'separator': '\t',
                                 'indexing_field': -1
                                 },
                           }

        Parser.__init__(self, 'Kaiju', input_files, files_expected, files_structure)

        if not skip_fix_input:
            os.remove(input_files[0])
示例#3
0
    def __init__(self,
                 input_file_paths,
                 run=terminal.Run(),
                 progress=terminal.Progress()):
        self.run = run
        self.progress = progress
        self.just_do_it = False

        input_file_path = self.fix_input_file(input_file_paths[0])

        files_expected = {'agnostos_output': input_file_path}

        files_structure = {
            'agnostos_output': {
                'col_names': [
                    'gene_callers_id', 'cl_name', 'contig', 'gene_x_contig',
                    'cl_size', 'category', 'pfam', 'is.HQ', 'is.LS',
                    'lowest_rank', 'lowest_level', 'niche_breadth_sign'
                ],
                'col_mapping':
                [int, str, str, str, str, str, str, str, str, str, str, str],
                'indexing_field':
                -1,
                'separator':
                '\t'
            },
        }

        self.progress.new('Initializing the parser')
        self.progress.update('...')
        Parser.__init__(self, 'agnostos', [input_file_path], files_expected,
                        files_structure)
        self.progress.end()
示例#4
0
    def __init__(self,
                 hmmer_table_txt,
                 alphabet='AA',
                 context='GENE',
                 program='hmmscan',
                 run=terminal.Run()):
        self.alphabet = alphabet
        self.context = context
        self.program = program

        self.run = run

        if self.context == "GENE":
            col_info = self.get_col_info_for_GENE_context()
        elif self.context == "CONTIG" and (self.alphabet == "DNA"
                                           or self.alphabet == "RNA"):
            col_info = self.get_col_info_for_CONTIG_context()
        elif self.context == "DOMAIN" and self.alphabet == "AA":
            if program != 'hmmsearch':
                raise ConfigError(
                    "HMMScan :: the 'DOMAIN' context is only available for hmmsearch."
                )
            col_info = self.get_col_info_for_DOMAIN_context()
        else:
            raise ConfigError(
                "HMMScan driver is confused. Yor context and alphabet pair ('%s' and '%s') "
                "does not seem to be implemented in the parser module. If you think this is "
                "not a mistake on your part, please get in touch with the anvi'o developers "
                "and watch them fix it like actual pros." %
                (self.context, self.alphabet))

        col_names, col_mapping = col_info

        files_structure = {
            'hits': {
                'col_names': col_names,
                'col_mapping': col_mapping,
                'indexing_field': -1,
                'no_header': True,
            },
        }

        ## Here we have some sad extra parsing that will hopefully go away once HMMER4 comes out. Due to stupid
        ## space-delimited and column-aligned HMMER output with a description field that can contain internal spaces,
        ## when anvio.driver.HMMER converts spaces into to tabs we can end up with lines of variable columns. Which
        ## means that before we can send this file to the base parser, we have to combine the split description fields
        ## into one column. Yeah, it sucks doing it this way. But we tried asking the HMMER people to just give us tab-
        ## delimited output already (https://github.com/EddyRivasLab/hmmer/issues/235) and it seems that we'll have to
        ## wait for HMMER4 to have this feature. So it has to be this way until HMMER 4 comes out.
        ##
        ## We could just parse everything with pandas right here, but the base parser is already set up nicely to do it,
        ## and it will be easy to switch back to using that parser directly once the promised tab-delimited output is
        ## implemented. So for now we just make a slight detour to fix the shitty format before going back to the base
        ## parser.
        fixed_hmmer_table_txt = self.fix_sad_hmmer_table_output(
            hmmer_table_txt, col_names)
        files_expected = {'hits': fixed_hmmer_table_txt}

        Parser.__init__(self, self.program, [fixed_hmmer_table_txt],
                        files_expected, files_structure)
示例#5
0
    def __init__(self,
                 input_file_paths,
                 taxonomy_table_structure,
                 run=terminal.Run(),
                 progress=terminal.Progress()):
        self.run = run
        self.progress = progress

        matrix_txt = input_file_paths[0]
        files_expected = {'matrix': matrix_txt}

        files_structure = {
            'matrix': {
                'col_names': [
                    'gene_callers_id', 't_phylum', 't_class', 't_order',
                    't_family', 't_genus', 't_species'
                ],
                'col_mapping': [int, str, str, str, str, str, str],
                'only_expected_fields':
                True,
            }
        }

        self.taxonomy_table_structure = taxonomy_table_structure
        Parser.__init__(self, 'DefaultMatrix', [matrix_txt], files_expected,
                        files_structure)
示例#6
0
    def __init__(self, hmm_scan_hits_txt, alphabet='AA', context='GENE'):
        self.alphabet = alphabet
        self.context = context

        files_expected = {'hits': hmm_scan_hits_txt}

        if self.context == "GENE":
            # see the HMMER user guide for details of the fields for AA sequence search, and DNA sequence search.
            col_names = ['gene_name', 'gene_hmm_id', 'gene_callers_id', 'f', 'e_value', 'f', 'f', 'f', 'f', 'f', 'f', 'f', 'f', 'f', 'f', 'f', 'f', 'f']
            col_mapping = [str, str, int, str, float, str, str, str, str, str, str, str, str, str, str, str, str, str]
        elif self.context == "CONTIG" and (self.alphabet == "DNA" or self.alphabet == "RNA"):
            # 'hmm_target', 'hmm_acc', 'query_id', 'query_acc', 'hmm_from', 'hmm_to', 'alignment_from', 'alignment_to', 'envelope_from', 'envelope_to', 'seq_len', 'strand', 'e_value', 'score', 'bias', 'desc']
            col_names = ['gene_name', 'gene_hmm_id', 'contig_name', 'f', 'hmm_from', 'hmm_to', 'alignment_from', 'alignment_to', 'envelope_from', 'envelope_to', 'f', 'f', 'e_value', 'f', 'f', 'f']
            col_mapping = [str, str, str, str, str, str, int, int, int, int, str, str, float, str, str, str]
        else:
            raise ConfigError("HMMScan driver is confused. Yor context and alphaet pair ('%s' and '%s')\
                               does not seem to be implemented in the parser module. If you think this is\
                               not a mistake on your part, please get in touch with the anvi'o developers\
                               and watch them fix it like actual pros." % (self.context, self.alphabet))

        files_structure = {'hits':
                                {'col_names': col_names,
                                 'col_mapping': col_mapping,
                                 'indexing_field': -1,
                                 'no_header': True
                                 },
                        }

        Parser.__init__(self, 'HMMScan', [hmm_scan_hits_txt], files_expected, files_structure)
    def __init__(self, input_file_paths, splits_taxonomy_table_structure):
        files_expected = {'svr_output': 'svr_assign_to_dna_using_figfams.txt'}

        files_structure = {'svr_output': 
                                {'col_names': ['contig', 'field1', 'prot', 'function', 't_species'],
                                 'col_mapping': [str, int, str, str, str],
                                 'indexing_field': 2}}

        self.splits_taxonomy_table_structure = splits_taxonomy_table_structure
        Parser.__init__(self, 'MyRastCMDLine', input_file_paths, files_expected, files_structure)
示例#8
0
    def __init__(self, input_file_paths, splits_taxonomy_table_structure):
        matrix_txt = input_file_paths[0]
        files_expected = {'matrix': matrix_txt}

        files_structure = {'matrix': 
                                {'col_names': ['prot', 'contig', 'start', 'stop', 'direction', 'figfam', 'function', 't_phylum', 't_class', 't_order', 't_family', 't_genus', 't_species'],
                                 'col_mapping': [str, str, int, int, str, str, str, str, str, str, str, str, str],
                                 }
                          }

        self.splits_taxonomy_table_structure = splits_taxonomy_table_structure
        Parser.__init__(self, 'DefaultMatrix', [matrix_txt], files_expected, files_structure)
示例#9
0
    def __init__(self, input_file_paths, genes_table_structure):
        files_expected = {'functions': 'svr_assign_using_figfams.txt', 'genes': 'svr_call_pegs.txt'}

        files_structure = {'functions': 
                                {'col_names': ['t_species', 'field2', 'prot', 'function'],
                                 'col_mapping': [str, int, str, str],
                                 'indexing_field': 2},
                           'genes': 
                                {'type': 'fasta'},}

        self.genes_table_structure = genes_table_structure
        Parser.__init__(self, 'MyRastCMDLine', input_file_paths, files_expected, files_structure)
示例#10
0
    def __init__(self, input_file_paths, genes_table_structure):
        matrix_txt = input_file_paths[0]
        files_expected = {'matrix': matrix_txt}

        files_structure = {'matrix': 
                                {'col_names': ['prot', 'contig', 'start', 'stop', 'direction', 'figfam', 'function', 't_phylum', 't_class', 't_order', 't_family', 't_genus', 't_species'],
                                 'col_mapping': [str, str, int, int, str, str, str, str, str, str, str, str, str],
                                 }
                          }

        self.genes_table_structure = genes_table_structure
        Parser.__init__(self, 'DefaultMatrix', [matrix_txt], files_expected, files_structure)
示例#11
0
    def __init__(self, hmm_scan_hits_txt):
        files_expected = {'hits': hmm_scan_hits_txt}

        files_structure = {'hits': 
                                {'col_names': ['gene_name', 'gene_hmm_id', 'gene_callers_id', 'f', 'e_value', 'f', 'f', 'f', 'f', 'f', 'f', 'f', 'f', 'f', 'f','f', 'f','f'],
                                 'col_mapping': [str, str, int, str, float, str, str, str, str, str, str, str, str, str, str, str, str, str],
                                 'indexing_field': -1,
                                 'no_header': True
                                 },
                        }

        Parser.__init__(self, 'HMMScan', [hmm_scan_hits_txt], files_expected, files_structure)
示例#12
0
    def __init__(self, input_file_paths, taxonomy_table_structure):
        files_expected = {'functions': 'svr_assign_using_figfams.txt', 'genes': 'svr_call_pegs.txt'}

        files_structure = {'functions': 
                                {'col_names': ['t_species', 'field2', 'prot', 'function'],
                                 'col_mapping': [str, int, str, str],
                                 'indexing_field': 2},
                           'genes': 
                                {'type': 'fasta'},}

        self.taxonomy_table_structure = taxonomy_table_structure
        Parser.__init__(self, 'MyRastCMDLine', input_file_paths, files_expected, files_structure)
示例#13
0
    def __init__(self, input_file_paths):
        input_file_path = input_file_paths[0]
        files_expected = {'matrix': input_file_path}

        files_structure = {'matrix':
                                {'col_names': ['gene_callers_id', 'hash', 'length', 'source', 'accession', 'function', 'start', 'stop', 'e_value', 'status', 'date'],
                                 'col_mapping': [int, str, int, str, str, str, int, int, str, str, str],
                                 'indexing_field': -1,
                                 'no_header': True},
                            }

        Parser.__init__(self, 'InterProScan', input_file_paths, files_expected, files_structure)
示例#14
0
文件: hmmscan.py 项目: psaxcode/anvio
    def __init__(self, hmm_scan_hits_txt):
        files_expected = {'hits': hmm_scan_hits_txt}

        files_structure = {'hits':
                                {'col_names': ['gene_name', 'gene_hmm_id', 'gene_callers_id', 'f', 'e_value', 'f', 'f', 'f', 'f', 'f', 'f', 'f', 'f', 'f', 'f', 'f', 'f', 'f'],
                                 'col_mapping': [str, str, int, str, float, str, str, str, str, str, str, str, str, str, str, str, str, str],
                                 'indexing_field': -1,
                                 'no_header': True
                                 },
                        }

        Parser.__init__(self, 'HMMScan', [hmm_scan_hits_txt], files_expected, files_structure)
示例#15
0
    def __init__(self, input_file_paths):
        input_file_path = input_file_paths[0]
        files_expected = {'matrix': input_file_path}

        files_structure = {'matrix':
                                {'col_names': ['gene_callers_id', 'hash', 'length', 'source', 'accession', 'function', 'start', 'stop', 'e_value', 'status', 'date'],
                                 'col_mapping': [int, str, int, str, str, str, int, int, str, str, str],
                                 'indexing_field': -1,
                                 'no_header': True},
                            }

        Parser.__init__(self, 'InterProScan', input_file_paths, files_expected, files_structure)
示例#16
0
    def __init__(self, proteins_in_contigs_fasta, hmm_scan_hits_txt):
        files_expected = {'proteins': proteins_in_contigs_fasta, 'hits': hmm_scan_hits_txt}

        files_structure = {'hits': 
                                {'col_names': ['gene_name', 'gene_id', 'query_name', 'f', 'e_value', 'f', 'f', 'f', 'f', 'f', 'f', 'f', 'f', 'f', 'f','f', 'f','f'],
                                 'col_mapping': [str, str, str, str, float, str, str, str, str, str, str, str, str, str, str, str, str, str],
                                 'indexing_field': -1,
                                 'no_header': True
                                 },
                           'proteins': 
                                {'type': 'fasta'},}

        Parser.__init__(self, 'HMMScan', [proteins_in_contigs_fasta, hmm_scan_hits_txt], files_expected, files_structure)
示例#17
0
    def __init__(self, input_files, contigs = 'False'):
        if type(input_files) != type(list()):
            input_files = [input_files]

        files_expected = {'clusters': input_files[0]}

        files_structure = {'clusters': 
                                {'col_names': ['split', 'cluster_id'],
                                 'col_mapping': [str, str],
                                 'separator': ',',
                                 'indexing_field': -1,
                                 'no_header': True
                                 },
                           }
        Parser.__init__(self, 'CONCOCT', input_files, files_expected, files_structure)
示例#18
0
    def __init__(self, input_file_paths, genes_table_structure):
        files_expected = {'functions': 'functions.tbl', 'gene_otus': 'gene_otus.tbl', 'peg': 'peg.tbl'}

        files_structure = {'functions': 
                                {'col_names': ['prot', 'figfam', 'field3', 'field4', 'field5', 'function'],
                                 'col_mapping': [str, str, int, int, int, str]},
                           'gene_otus': 
                                {'col_names': ['prot', 't_species'],
                                 'col_mapping': None},
                           'peg':
                                {'col_names': ['prot', 'contig', 'start', 'stop'],
                                 'col_mapping': [str, str, int, int]},}

        self.genes_table_structure = genes_table_structure
        Parser.__init__(self, 'MyRastGUI', input_file_paths, files_expected, files_structure)
    def __init__(self, input_file_paths, splits_taxonomy_table_structure):
        files_expected = {'svr_output': 'svr_assign_to_dna_using_figfams.txt'}

        files_structure = {
            'svr_output': {
                'col_names':
                ['contig', 'field1', 'prot', 'function', 't_species'],
                'col_mapping': [str, int, str, str, str],
                'indexing_field': 2
            }
        }

        self.splits_taxonomy_table_structure = splits_taxonomy_table_structure
        Parser.__init__(self, 'MyRastCMDLine', input_file_paths,
                        files_expected, files_structure)
示例#20
0
    def __init__(self, input_files, contigs = 'False'):
        if type(input_files) != type(list()):
            input_files = [input_files]

        files_expected = {'clusters': input_files[0]}

        files_structure = {'clusters': 
                                {'col_names': ['split', 'bin_name'],
                                 'col_mapping': [str, str],
                                 'separator': ',',
                                 'indexing_field': -1,
                                 'no_header': True
                                 },
                           }
        Parser.__init__(self, 'CONCOCT', input_files, files_expected, files_structure)
示例#21
0
    def __init__(self, input_file_paths, taxonomy_table_structure, run=terminal.Run(), progress=terminal.Progress()):
        self.run = run
        self.progress = progress

        matrix_txt = input_file_paths[0]
        files_expected = {'matrix': matrix_txt}

        files_structure = {'matrix':
                                {'col_names': ['gene_callers_id'] + levels_of_taxonomy,
                                 'col_mapping': [int] + [str] * len(levels_of_taxonomy),
                                 'only_expected_fields': True,
                                 }
                          }

        self.taxonomy_table_structure = taxonomy_table_structure
        Parser.__init__(self, 'DefaultMatrix', [matrix_txt], files_expected, files_structure)
示例#22
0
    def __init__(self, input_file_paths, taxonomy_table_structure, run=terminal.Run(), progress=terminal.Progress()):
        self.run = run
        self.progress = progress

        matrix_txt = input_file_paths[0]
        files_expected = {'matrix': matrix_txt}

        files_structure = {'matrix':
                                {'col_names': ['gene_callers_id', 't_phylum', 't_class', 't_order', 't_family', 't_genus', 't_species'],
                                 'col_mapping': [int, str, str, str, str, str, str],
                                 'only_expected_fields': True,
                                 }
                          }

        self.taxonomy_table_structure = taxonomy_table_structure
        Parser.__init__(self, 'DefaultMatrix', [matrix_txt], files_expected, files_structure)
示例#23
0
文件: hmmer.py 项目: FBBJBB/anvio
    def __init__(self,
                 hmmer_table_txt,
                 alphabet='AA',
                 context='GENE',
                 program='hmmscan',
                 run=terminal.Run()):
        self.alphabet = alphabet
        self.context = context
        self.program = program

        self.run = run

        files_expected = {'hits': hmmer_table_txt}

        if self.context == "GENE":
            col_info = self.get_col_info_for_GENE_context()
        elif self.context == "CONTIG" and (self.alphabet == "DNA"
                                           or self.alphabet == "RNA"):
            col_info = self.get_col_info_for_CONTIG_context()
        elif self.context == "DOMAIN" and self.alphabet == "AA":
            if program != 'hmmsearch':
                raise ConfigError(
                    "HMMScan :: the 'DOMAIN' context is only available for hmmsearch."
                )
            col_info = self.get_col_info_for_DOMAIN_context()
        else:
            raise ConfigError(
                "HMMScan driver is confused. Yor context and alphabet pair ('%s' and '%s') "
                "does not seem to be implemented in the parser module. If you think this is "
                "not a mistake on your part, please get in touch with the anvi'o developers "
                "and watch them fix it like actual pros." %
                (self.context, self.alphabet))

        col_names, col_mapping = col_info

        files_structure = {
            'hits': {
                'col_names': col_names,
                'col_mapping': col_mapping,
                'indexing_field': -1,
                'no_header': True,
            },
        }

        Parser.__init__(self, 'HMMScan', [hmmer_table_txt], files_expected,
                        files_structure)
示例#24
0
    def __init__(self, input_files, taxonomy_table_structure, run=terminal.Run(), progress=terminal.Progress()):
        self.run = run
        self.progress = progress

        if type(input_files) != type(list()):
            input_files = [input_files]

        files_expected = {'kraken_output': input_files[0]}

        files_structure = {'kraken_output':
                                {'col_names': ['taxonomy', 'count'],
                                 'col_mapping': [str, int],
                                 'separator': '\t',
                                 'indexing_field': -1
                                 },
                           }

        Parser.__init__(self, 'KrakenHLL', input_files, files_expected, files_structure)
示例#25
0
    def __init__(self,
                 input_file_paths,
                 taxonomy_table_structure,
                 run=terminal.Run(),
                 progress=terminal.Progress()):
        self.run = run
        self.progress = progress

        self.min_hit_score = 250

        files_expected = {
            'report':
            input_file_paths[0]
            if len(input_file_paths) > 0 else 'centrifuge_report.tsv',
            'hits':
            input_file_paths[1]
            if len(input_file_paths) > 1 else 'centrifuge_hits.tsv'
        }

        files_structure = {
            'report': {
                'col_names':
                ['t_species', 'taxon_id', 'f1', 'f2', 'f3', 'f4', 'f5'],
                'col_mapping': [str, int, str, str, str, str, str],
                'indexing_field':
                1
            },
            'hits': {
                'col_names': [
                    'gene_callers_id', 'f1', 'taxon_id', 'score', 'f2', 'f3',
                    'f4', 'f5'
                ],
                'col_mapping': [
                    lambda x: int(x.split('|')[0]), str, int, int, str, str,
                    str, str
                ],
                'indexing_field':
                -1
            },
        }

        self.taxonomy_table_structure = taxonomy_table_structure
        Parser.__init__(self, 'centrifuge', input_file_paths, files_expected,
                        files_structure)
示例#26
0
    def __init__(self, hmm_scan_hits_txt, alphabet='AA', context='GENE', program='hmmscan'):
        self.alphabet = alphabet
        self.context = context
        self.program = program

        self.run = run

        files_expected = {'hits': hmm_scan_hits_txt}

        if self.context == "GENE":
            if self.program == 'hmmscan':
                # see the HMMER user guide for details of the fields for AA sequence search, and DNA sequence search.
                #                                                               --- full sequence ---- --- best 1 domain ---- --- domain number estimation ----
                # target name        accession  query name           accession    E-value  score  bias   E-value  score  bias   exp reg clu  ov env dom rep inc description
                col_names = ['gene_name', 'gene_hmm_id', 'gene_callers_id', 'f', 'e_value', 'bit_score', 'f', 'f', 'dom_bit_score', 'f', 'f', 'f', 'f', 'f', 'f', 'f', 'f', 'f']
                col_mapping = [str, str, int, str, float, float, str, str, float, str, str, str, str, str, str, str, str, str]
            elif self.program == 'hmmsearch':
                #                                                               --- full sequence ---- --- best 1 domain ---- --- domain number estimation ----
                # target name        accession  query name           accession    E-value  score  bias   E-value  score  bias   exp reg clu  ov env dom rep inc description of target
                #------------------- ---------- -------------------- ---------- --------- ------ ----- --------- ------ -----   --- --- --- --- --- --- --- --- ---------------------
                col_names = ['gene_callers_id', 'f', 'gene_name', 'gene_hmm_id', 'e_value', 'bit_score', 'f', 'f', 'dom_bit_score', 'f', 'f', 'f', 'f', 'f', 'f', 'f', 'f', 'f']
                col_mapping = [int, str, str, str, float, float, str, str, float, str, str, str, str, str, str, str, str, str]
            else:
                raise ConfigError("The HMMScan Parser class is not sure if you know what you are doing. You told it that you wanted to "
                                    "parse HMM hits from the program %s, but this class doesn't know how to handle those." % (self.program))
        elif self.context == "CONTIG" and (self.alphabet == "DNA" or self.alphabet == "RNA"):
            # 'hmm_target', 'hmm_acc', 'query_id', 'query_acc', 'hmm_from', 'hmm_to', 'alignment_from', 'alignment_to', 'envelope_from', 'envelope_to', 'seq_len', 'strand', 'e_value', 'score', 'bias', 'desc']
            col_names = ['gene_name', 'gene_hmm_id', 'contig_name', 'f', 'hmm_from', 'hmm_to', 'alignment_from', 'alignment_to', 'envelope_from', 'envelope_to', 'f', 'f', 'e_value', 'f', 'f', 'f']
            col_mapping = [str, str, str, str, str, str, int, int, int, int, str, str, float, str, str, str]
        else:
            raise ConfigError("HMMScan driver is confused. Yor context and alphaet pair ('%s' and '%s') "
                              "does not seem to be implemented in the parser module. If you think this is "
                              "not a mistake on your part, please get in touch with the anvi'o developers "
                              "and watch them fix it like actual pros." % (self.context, self.alphabet))

        files_structure = {'hits':
                                {'col_names': col_names,
                                 'col_mapping': col_mapping,
                                 'indexing_field': -1,
                                 'no_header': True
                                 },
                        }

        Parser.__init__(self, 'HMMScan', [hmm_scan_hits_txt], files_expected, files_structure)
示例#27
0
    def __init__(self, input_file_paths, taxonomy_table_structure, run=terminal.Run(), progress=terminal.Progress()):
        self.run = run
        self.progress = progress

        self.min_hit_score = 250

        files_expected = {'report': 'centrifuge_report.tsv', 'hits': 'centrifuge_hits.tsv'}

        files_structure = {'report':
                                {'col_names': ['t_species', 'taxon_id', 'f1', 'f2', 'f3', 'f4', 'f5'],
                                 'col_mapping': [str, int, str, str, str, str, str],
                                 'indexing_field': 1},
                           'hits':
                                {'col_names': ['gene_callers_id', 'f1', 'taxon_id', 'score', 'f2', 'f3', 'f4', 'f5'],
                                 'col_mapping': [lambda x: int(x.split('|')[0]), str, int, int, str, str, str, str],
                                 'indexing_field': -1},
                          }

        self.taxonomy_table_structure = taxonomy_table_structure
        Parser.__init__(self, 'centrifuge', input_file_paths, files_expected, files_structure)