Exemplo n.º 1
0
    def __init__(self, args=None, run=terminal.Run(), progress=terminal.Progress()):
        self.init_workflow_super_class(args, workflow_name='pangenomics')

        # initialize the base class
        ContigsDBWorkflow.__init__(self)

        self.rules.extend(['anvi_gen_genomes_storage',
                           'anvi_pan_genome'])

        self.general_params.extend(["project_name",
                                    "fasta_txt",
                                    "internal_genomes",
                                    "external_genomes"])

        self.dirs_dict.update({"FASTA_DIR": "01_FASTA",
                               "CONTIGS_DIR": "02_CONTIGS",
                               "PAN_DIR": "03_PAN"})

        self.default_config.update({"fasta_txt": "fasta.txt",
                                    "anvi_pan_genome": {"threads": 7}})

        pan_params = ["--project-name", "--genome-names", "--skip-alignments",\
                     "--align-with", "--exclude-partial-gene-calls", "--use-ncbi-blast",\
                     "--minbit", "--mcl-inflation", "--min-occurrence",\
                     "--min-percent-identity", "--sensitive", "--description",\
                     "--overwrite-output-destinations", "--skip-hierarchical-clustering",\
                     "--enforce-hierarchical-clustering", "--distance", "--linkage"]
        self.rule_acceptable_params_dict['anvi_pan_genome'] = pan_params

        storage_params = ["--gene-caller"]
        self.rule_acceptable_params_dict['anvi_gen_genomes_storage'] = storage_params
Exemplo n.º 2
0
    def __init__(self, args, run=terminal.Run(), progress=terminal.Progress()):
        self.args = args
        self.run = run
        self.progress = progress

        # know thyself.
        self.name = 'pangenomics'

        # initialize the base class
        ContigsDBWorkflow.__init__(self)

        self.rules.extend(['gen_external_genome_file',
                           'anvi_gen_genomes_storage',
                           'anvi_pan_genome'])

        self.general_params.extend(["project_name", "fasta_txt"])

        self.dirs_dict.update({"FASTA_DIR": "01_FASTA",
                               "CONTIGS_DIR": "02_CONTIGS",
                               "PAN_DIR": "03_PAN"})

        self.default_config.update({"fasta_txt": "fasta.txt",
                                    "anvi_pan_genome": {"threads": 20}})

        pan_params = ["--project-name", "--genome-names", "--skip-alignments",\
                     "--align-with", "--exclude-partial-gene-calls", "--use-ncbi-blast",\
                     "--minbit", "--mcl-inflation", "--min-occurrence",\
                     "--min-percent-identity", "--sensitive", "--description",\
                     "--overwrite-output-destinations", "--skip-hierarchical-clustering",\
                     "--enforce-hierarchical-clustering", "--distance", "--linkage"]
        self.rule_acceptable_params_dict['anvi_pan_genome'] = pan_params

        storage_params = ["--internal-genomes", "--external-genomes", "--gene-caller"]
        self.rule_acceptable_params_dict['anvi_gen_genomes_storage'] = storage_params
Exemplo n.º 3
0
    def __init__(self, args=None, run=terminal.Run(), progress=terminal.Progress()):
        self.init_workflow_super_class(args, workflow_name='phylogenomics')

        # initialize the base class
        ContigsDBWorkflow.__init__(self)

        self.input_for_anvi_get_sequences_for_hmm_hits = {}
        self.internal_genomes_file = ''
        self.external_genomes_file = ''

        # initialize the base class

        self.rules.extend(['anvi_get_sequences_for_hmm_hits', 'trimal', 'iqtree'])

        self.general_params.extend(['project_name', 'internal_genomes', 'external_genomes'])

        self.dirs_dict.update({"PHYLO_DIR": "01_PHYLOGENOMICS"})

        self.default_config.update({'anvi_get_sequences_for_hmm_hits': {'--return-best-hit': True,
                                                                        '--align-with': 'famsa',
                                                                        '--concatenate-genes': True,
                                                                        '--get-aa-sequences': True,
                                                                        '--hmm-sources': 'Campbell_et_al'},
                                    'trimal': {'-gt': 0.5},
                                    'iqtree': {'threads': 8, '-m': 'WAG', '-bb': 1000}})

        get_sequences_params = ['--return-best-hit', \
                                '--separator', '--align-with', '--min-num-bins-gene-occurs', \
                                '--max-num-genes-missing-from-bin', '--concatenate-genes', \
                                '--get-aa-sequences', '--gene-names', '--hmm-sources']
        self.rule_acceptable_params_dict['anvi_get_sequences_for_hmm_hits'] = get_sequences_params
        self.rule_acceptable_params_dict['trimal'] = ['-gt', 'additional_params']
        self.rule_acceptable_params_dict['iqtree'] = ['-m', '-bb', 'additional_params']
Exemplo n.º 4
0
    def __init__(self,
                 args=None,
                 run=terminal.Run(),
                 progress=terminal.Progress()):
        self.init_workflow_super_class(args, workflow_name='phylogenomics')

        # initialize the base class
        ContigsDBWorkflow.__init__(self)

        self.input_for_anvi_get_sequences_for_hmm_hits = {}
        self.internal_genomes_file = ''
        self.external_genomes_file = ''
        self.phylogenomics_sequence_file = None
        self.project_name = None
        self.use_hmms_for_phylogeny = True

        # initialize the base class

        self.rules.extend(
            ['anvi_get_sequences_for_hmm_hits', 'trimal', 'iqtree'])

        self.general_params.extend(
            ['project_name', 'internal_genomes', 'external_genomes'])

        self.dirs_dict.update({"PHYLO_DIR": "01_PHYLOGENOMICS"})

        self.default_config.update({
            'anvi_get_sequences_for_hmm_hits': {
                '--return-best-hit': True,
                '--align-with': 'famsa',
                '--concatenate-genes': True,
                '--get-aa-sequences': True,
                '--hmm-sources': 'Bacteria_71'
            },
            'trimal': {
                '-gt': 0.5
            },
            'iqtree': {
                'threads': 8,
                '-m': 'WAG',
                '-bb': 1000
            }
        })

        get_sequences_params = ['--return-best-hit', \
                                '--separator', '--align-with', '--min-num-bins-gene-occurs', \
                                '--max-num-genes-missing-from-bin', '--concatenate-genes', \
                                '--get-aa-sequences', '--gene-names', '--hmm-sources']
        self.rule_acceptable_params_dict[
            'anvi_get_sequences_for_hmm_hits'] = get_sequences_params
        self.rule_acceptable_params_dict['trimal'] = [
            '-gt', 'additional_params'
        ]
        self.rule_acceptable_params_dict['iqtree'] = [
            '-m', '-bb', 'additional_params'
        ]
Exemplo n.º 5
0
    def __init__(self):
        ContigsDBWorkflow.__init__(self, config)

        self.rules = ['iu_gen_configs', 'iu_filter_quality_minoche', 'gen_qc_report', 'gzip_fastqs',\
                     'fq2fa', 'merge_fastas_for_co_assembly', 'megahit', 'anvi_script_anvi_script_reformat_fasta',\
                     'anvi_gen_contigs_database', 'export_gene_calls', 'centrifuge',\
                     'anvi_import_taxonomy', 'anvi_run_hmms', 'anvi_run_ncbi_cogs',\
                     'bowtie_build', 'bowtie', 'samtools_view', 'anvi_init_bam',\
                     'anvi_profile', 'annotate_contigs_database', 'anvi_merge']

        rule_acceptable_params_dict = {}

        # defining the accesible params per rule
        rule_acceptable_params_dict['iu_gen_configs'] = [
            "--r1-prefix", "--r2-prefix"
        ]
        rule_acceptable_params_dict['iu_filter_quality_minoche'] = [
            'visualize_quality_curves', 'ignore_deflines', 'limit_num_pairs',
            'print_qual_scores', 'store_read_fate'
        ]
        rule_acceptable_params_dict['gzip_fastqs'] = ["run"]
        rule_acceptable_params_dict['megahit'] = []
        rule_acceptable_params_dict['anvi_script_reformat_fasta'] = []
        rule_acceptable_params_dict['anvi_gen_contigs_database'] = []
        rule_acceptable_params_dict['export_gene_calls'] = []
        rule_acceptable_params_dict['centrifuge'] = []
        rule_acceptable_params_dict['anvi_import_taxonomy'] = []
        rule_acceptable_params_dict['anvi_run_hmms'] = []
        rule_acceptable_params_dict['anvi_run_ncbi_cogs'] = []
        rule_acceptable_params_dict['bowtie_build'] = []
        rule_acceptable_params_dict['bowtie'] = []
        rule_acceptable_params_dict['samtools_view'] = []
        rule_acceptable_params_dict['anvi_init_bam'] = []
        rule_acceptable_params_dict['anvi_profile'] = []
        rule_acceptable_params_dict['annotate_contigs_database'] = []
        rule_acceptable_params_dict['anvi_merge'] = []

        self.rule_acceptable_params_dict = rule_acceptable_params_dict
Exemplo n.º 6
0
    def __init__(self, config):
        ContigsDBWorkflow.__init__(self, config)

        self.rules.extend([
            'gen_external_genome_file', 'anvi_gen_genomes_storage',
            'anvi_pan_genome'
        ])

        self.general_params.extend(["project_name", "samples_txt"])

        pan_params = ["--project-name", "--output-dir", "--genome-names", "--skip-alignments",\
                     "--align-with", "--exclude-partial-gene-calls", "--use-ncbi-blast",\
                     "--minbit", "--mcl-inflation", "--min-occurrence",\
                     "--min-percent-identity", "--sensitive", "--description",\
                     "--overwrite-output-destinations", "--skip-hierarchical-clustering",\
                     "--enforce-hierarchical-clustering", "--distance", "--linkage"]
        self.rule_acceptable_params_dict['anvi_pan_genome'] = pan_params

        storage_params = [
            "--internal-genomes", "--external-genomes", "--gene-caller"
        ]
        self.rule_acceptable_params_dict[
            'anvi_gen_genomes_storage'] = storage_params
Exemplo n.º 7
0
    def __init__(self, args, run=terminal.Run(), progress=terminal.Progress()):
        self.args = args
        self.run = run
        self.progress = progress

        # know thyself.
        self.name = 'metagenomics'

        self.samples_information = {}
        self.kraken_annotation_dict = {}
        self.run_metaspades = None
        self.use_scaffold_from_metaspades = None
        self.remove_short_reads_based_on_references = None
        self.references_for_removal_txt = None
        self.references_for_removal = {}
        self.references_mode = None
        self.fasta_txt_file = None
        self.samples_txt_file = None
        self.sample_names = None
        self.group_sizes = None

        # initialize the base class
        ContigsDBWorkflow.__init__(self)

        self.rules.extend(['iu_gen_configs', 'iu_filter_quality_minoche', 'gen_qc_report', 'gzip_fastqs',\
                     'merge_fastqs_for_co_assembly', 'megahit', 'merge_fastas_for_co_assembly',\
                     'anvi_gen_contigs_database', 'anvi_export_gene_calls', 'centrifuge',\
                     'anvi_import_taxonomy', 'anvi_run_hmms', 'anvi_run_ncbi_cogs',\
                     'bowtie_build', 'bowtie', 'samtools_view', 'anvi_init_bam', 'idba_ud',\
                     'anvi_profile', 'annotate_contigs_database', 'anvi_merge', 'import_percent_of_reads_mapped',\
                     'krakenhll', 'krakenhll_mpa_report', 'import_kraken_hll_taxonomy', 'metaspades',\
                     'remove_short_reads_based_on_references'])

        self.general_params.extend(['samples_txt', "references_mode", "all_against_all",\
                                    "kraken_txt"])

        rule_acceptable_params_dict = {}

        # defining the accesible params per rule
        rule_acceptable_params_dict['iu_gen_configs'] = ["--r1-prefix", "--r2-prefix"]
        rule_acceptable_params_dict['iu_filter_quality_minoche'] = ['run', '--visualize-quality-curves', '--ignore-deflines', '--limit-num-pairs', '--print-qual-scores', '--store-read-fate']
        rule_acceptable_params_dict['gzip_fastqs'] = ["run"]
        rule_acceptable_params_dict['metaspades'] = ["run", "additional_params", "use_scaffolds"]
        rule_acceptable_params_dict['megahit'] = ["run", "--min-contig-len", "--min-count", "--k-min",
                                                  "--k-max", "--k-step", "--k-list",
                                                  "--no-mercy", "--no-bubble", "--merge-level",
                                                  "--prune-level", "--prune-depth", "--low-local-ratio",
                                                  "--max-tip-len", "--no-local", "--kmin-1pass",
                                                  "--presets", "--memory", "--mem-flag",
                                                  "--use-gpu", "--gpu-mem", "--keep-tmp-files",
                                                  "--tmp-dir", "--continue", "--verbose"]
        rule_acceptable_params_dict['idba_ud'] = ["run", "--mink", "--maxk", "--step", "--inner_mink",
                                                  "--inner_step", "--prefix", "--min_count",
                                                  "--min_support", "--seed_kmer", "--min_contig",
                                                  "--similar", "--max_mismatch", "--min_pairs",
                                                  "--no_bubble", "--no_local", "--no_coverage",
                                                  "--no_correct", "--pre_correction"]
        rule_acceptable_params_dict['bowtie'] = ["additional_params"]
        rule_acceptable_params_dict['samtools_view'] = ["additional_params"]
        rule_acceptable_params_dict['anvi_profile'] = ["--overwrite-output-destinations", "--sample-name", "--report-variability-full",
                                                        "--skip-SNV-profiling", "--profile-SCVs", "--description",
                                                        "--skip-hierarchical-clustering", "--distance", "--linkage", "--min-contig-length",
                                                        "--min-mean-coverage", "--min-coverage-for-variability", "--cluster-contigs",
                                                        "--contigs-of-interest", "--queue-size", "--write-buffer-size", "--max-contig-length"]
        rule_acceptable_params_dict['annotate_contigs_database'] = []
        rule_acceptable_params_dict['merge_fastas_for_co_assembly'] = []
        rule_acceptable_params_dict['merge_fastqs_for_co_assembly'] = []
        rule_acceptable_params_dict['anvi_merge'] = ["--sample-name", "--description", "--skip-hierarchical-clustering",
                                                     "--enforce-hierarchical-clustering", "--distance", "--linkage",
                                                     "--skip-concoct-binning", "--overwrite-output-destinations"]
        rule_acceptable_params_dict['import_percent_of_reads_mapped'] = ["run"]
        rule_acceptable_params_dict['krakenhll'] = ["additional_params", "run", "--db", "--gzip-compressed"]
        rule_acceptable_params_dict['krakenhll_mpa_report'] = ["additional_params"]
        rule_acceptable_params_dict['import_kraken_hll_taxonomy'] = ["--min-abundance"]
        rule_acceptable_params_dict['remove_short_reads_based_on_references'] = ["dont_remove_just_map", \
                                                                                 "references_for_removal_txt", \
                                                                                 "delimiter-for-iu-remove-ids-from-fastq"]

        self.rule_acceptable_params_dict.update(rule_acceptable_params_dict)

        forbidden_params = {}
        forbidden_params['krakenhll'] = ['--fastq-input', '--paired', '--output']

        self.forbidden_params.update(forbidden_params)

        self.dirs_dict.update({"QC_DIR": "01_QC",
                               "FASTA_DIR": "02_FASTA",
                               "CONTIGS_DIR": "03_CONTIGS",
                               "MAPPING_DIR": "04_MAPPING",
                               "PROFILE_DIR": "05_ANVIO_PROFILE",
                               "MERGE_DIR": "06_MERGED",
                               "TAXONOMY_DIR": "07_TAXONOMY"})

        self.default_config.update({'samples_txt': "samples.txt",
                                    'metaspades': {"additional_params": "--only-assembler", "threads": 7},
                                    'megahit': {"--min-contig-len": min_contig_length_for_assembly, "--memory": 0.4, "threads": 7},
                                    'idba_ud': {"--min_contig": min_contig_length_for_assembly, "threads": 7},
                                    'iu_filter_quality_minoche': {"run": True, "--ignore-deflines": True},
                                    "gzip_fastqs": {"run": True},
                                    "bowtie": {"additional_params": "--no-unal", "threads": 3},
                                    "samtools_view": {"additional_params": "-F 4"},
                                    "anvi_profile": {"threads": 3, "--sample-name": "{sample}", "--overwrite-output-destinations": True},
                                    "anvi_merge": {"--sample-name": "{group}", "--overwrite-output-destinations": True},
                                    "import_percent_of_reads_mapped": {"run": True},
                                    "krakenhll": {"threads": 3, "--gzip-compressed": True, "additional_params": "--preload"},
                                    "remove_short_reads_based_on_references": {"delimiter-for-iu-remove-ids-from-fastq": " "}})
Exemplo n.º 8
0
    def __init__(self, args=None, run=terminal.Run(), progress=terminal.Progress()):
        self.init_workflow_super_class(args, workflow_name='metagenomics')

        self.target_files = [] # TODO: Once we update all other workflows then this will be initiated in WorkflowSuperClass
        self.samples_information = {}
        self.kraken_annotation_dict = {}
        self.run_krakenuniq = None
        self.run_metaspades = None
        self.use_scaffold_from_metaspades = None
        self.remove_short_reads_based_on_references = None
        self.references_for_removal_txt = None
        self.references_for_removal = {}
        self.references_mode = None
        self.fasta_txt_file = None
        self.samples_txt_file = None
        self.sample_names = None
        self.group_sizes = None
        self.collections_txt = None
        self.collections = None

        # initialize the base class
        ContigsDBWorkflow.__init__(self)

        self.rules.extend(['iu_gen_configs', 'iu_filter_quality_minoche', 'gen_qc_report', 'gzip_fastqs',\
                     'merge_fastqs_for_co_assembly', 'megahit', 'merge_fastas_for_co_assembly',\
                     'anvi_gen_contigs_database', 'anvi_export_gene_calls', 'centrifuge',\
                     'anvi_import_taxonomy', 'anvi_run_hmms', 'anvi_run_ncbi_cogs',\
                     'bowtie_build', 'bowtie', 'samtools_view', 'anvi_init_bam', 'idba_ud',\
                     'anvi_profile', 'annotate_contigs_database', 'anvi_merge', 'import_percent_of_reads_mapped',\
                     'krakenuniq', 'krakenuniq_mpa_report', 'import_krakenuniq_taxonomy', 'metaspades',\
                     'remove_short_reads_based_on_references', 'anvi_summarize', 'anvi_split'])

        self.general_params.extend(['samples_txt', "references_mode", "all_against_all",\
                                    "kraken_txt", "collections_txt"])

        rule_acceptable_params_dict = {}

        # defining the accesible params per rule
        rule_acceptable_params_dict['iu_gen_configs'] = ["--r1-prefix", "--r2-prefix"]
        rule_acceptable_params_dict['iu_filter_quality_minoche'] = ['run', '--visualize-quality-curves', '--ignore-deflines', '--limit-num-pairs', '--print-qual-scores', '--store-read-fate']
        rule_acceptable_params_dict['gzip_fastqs'] = ["run"]
        rule_acceptable_params_dict['anvi_summarize'] = ["additional_params", "run"]
        rule_acceptable_params_dict['anvi_split'] = ["additional_params", "run"]
        rule_acceptable_params_dict['metaspades'] = ["run", "additional_params", "use_scaffolds"]
        rule_acceptable_params_dict['megahit'] = ["run", "--min-contig-len", "--min-count", "--k-min",
                                                  "--k-max", "--k-step", "--k-list",
                                                  "--no-mercy", "--no-bubble", "--merge-level",
                                                  "--prune-level", "--prune-depth", "--low-local-ratio",
                                                  "--max-tip-len", "--no-local", "--kmin-1pass",
                                                  "--presets", "--memory", "--mem-flag",
                                                  "--use-gpu", "--gpu-mem", "--keep-tmp-files",
                                                  "--tmp-dir", "--continue", "--verbose"]
        rule_acceptable_params_dict['idba_ud'] = ["run", "--mink", "--maxk", "--step", "--inner_mink",
                                                  "--inner_step", "--prefix", "--min_count",
                                                  "--min_support", "--seed_kmer", "--min_contig",
                                                  "--similar", "--max_mismatch", "--min_pairs",
                                                  "--no_bubble", "--no_local", "--no_coverage",
                                                  "--no_correct", "--pre_correction"]
        rule_acceptable_params_dict['bowtie'] = ["additional_params"]
        rule_acceptable_params_dict['samtools_view'] = ["additional_params"]
        rule_acceptable_params_dict['anvi_profile'] = ["--overwrite-output-destinations", "--sample-name", "--report-variability-full",
                                                        "--skip-SNV-profiling", "--profile-SCVs", "--description",
                                                        "--skip-hierarchical-clustering", "--distance", "--linkage", "--min-contig-length",
                                                        "--min-mean-coverage", "--min-coverage-for-variability", "--cluster-contigs",
                                                        "--contigs-of-interest", "--queue-size", "--write-buffer-size", "--max-contig-length", "--max-coverage-depth"]
        rule_acceptable_params_dict['annotate_contigs_database'] = []
        rule_acceptable_params_dict['merge_fastas_for_co_assembly'] = []
        rule_acceptable_params_dict['merge_fastqs_for_co_assembly'] = []
        rule_acceptable_params_dict['anvi_merge'] = ["--sample-name", "--description", "--skip-hierarchical-clustering",
                                                     "--enforce-hierarchical-clustering", "--distance", "--linkage",
                                                     "--skip-concoct-binning", "--overwrite-output-destinations"]
        rule_acceptable_params_dict['import_percent_of_reads_mapped'] = ["run"]
        rule_acceptable_params_dict['krakenuniq'] = ["additional_params", "run", "--db", "--gzip-compressed"]
        rule_acceptable_params_dict['import_krakenuniq_taxonomy'] = ["--min-abundance"]
        rule_acceptable_params_dict['remove_short_reads_based_on_references'] = ["dont_remove_just_map", \
                                                                                 "references_for_removal_txt", \
                                                                                 "delimiter-for-iu-remove-ids-from-fastq"]

        self.rule_acceptable_params_dict.update(rule_acceptable_params_dict)

        forbidden_params = {}
        forbidden_params['krakenuniq'] = ['--fastq-input', '--paired', '--output']

        self.forbidden_params.update(forbidden_params)

        self.dirs_dict.update({"QC_DIR": "01_QC",
                               "FASTA_DIR": "02_FASTA",
                               "CONTIGS_DIR": "03_CONTIGS",
                               "MAPPING_DIR": "04_MAPPING",
                               "PROFILE_DIR": "05_ANVIO_PROFILE",
                               "MERGE_DIR": "06_MERGED",
                               "TAXONOMY_DIR": "07_TAXONOMY",
                               "SUMMARY_DIR": "08_SUMMARY",
                               "SPLIT_PROFILES_DIR": "09_SPLIT_PROFILES"})

        self.default_config.update({'samples_txt': "samples.txt",
                                    'metaspades': {"additional_params": "--only-assembler", "threads": 7},
                                    'megahit': {"--min-contig-len": min_contig_length_for_assembly, "--memory": 0.4, "threads": 7},
                                    'idba_ud': {"--min_contig": min_contig_length_for_assembly, "threads": 7},
                                    'iu_filter_quality_minoche': {"run": True, "--ignore-deflines": True},
                                    "gzip_fastqs": {"run": True},
                                    "bowtie": {"additional_params": "--no-unal", "threads": 3},
                                    "samtools_view": {"additional_params": "-F 4"},
                                    "anvi_profile": {"threads": 3, "--sample-name": "{sample}", "--overwrite-output-destinations": True},
                                    "anvi_merge": {"--sample-name": "{group}", "--overwrite-output-destinations": True},
                                    "import_percent_of_reads_mapped": {"run": True},
                                    "krakenuniq": {"threads": 3, "--gzip-compressed": True, "additional_params": ""},
                                    "remove_short_reads_based_on_references": {"delimiter-for-iu-remove-ids-from-fastq": " "}})