def run_enrichment(self, args): self._check_enrichment(args) enrichment = Enrichment() enrichment.enrichment_pipeline( # Input options args.annotate_output, args.annotation_matrix, args.gff_files, args.metadata, args.abundance, args.abundance_metadata, args.transcriptome, args.transcriptome_metadata, # Runtime options args.pval_cutoff, args.proportions_cutoff, args.threshold, args.multi_test_correction, args.batchfile, args.processes, args.allow_negative_values, args.ko, args.pfam, args.tigrfam, args.cluster, args.ortholog, args.cazy, args.ec, args.ko_hmm, args.range, args.subblock_size, args.operon_mismatch_cutoff, args.operon_match_score_cutoff, # Outputs args.output)
def test_check_annotation_type(self): pfam = ['PF10117'] self.assertEqual(Enrichment().check_annotation_type(pfam), Enrichment.PFAM) cazy = ['GH42'] self.assertEqual(Enrichment().check_annotation_type(cazy), Enrichment.CAZY) tigrfam = ['TIGR00008'] self.assertEqual(Enrichment().check_annotation_type(tigrfam), Enrichment.TIGRFAM) ko = ['K00399'] self.assertEqual(Enrichment().check_annotation_type(ko), Enrichment.KEGG) ec = ['1.2.3.4'] self.assertEqual(Enrichment().check_annotation_type(ec), Enrichment.EC)
def run_enrichm(self, args, command): ''' Parameters ---------- Output ------ ''' self._check_general(args) self._logging_setup(args) logging.info("Command: %s" % ' '.join(command)) logging.info("Running the %s pipeline" % args.subparser_name) if args.subparser_name == self.DATA: d = Data() d.do(args.uninstall, args.dry) if args.subparser_name == self.ANNOTATE: self._check_annotate(args) annotate = Annotate(# Define inputs and outputs args.output, # Define type of annotation to be carried out args.ko, args.ko_hmm, args.pfam, args.tigrfam, args.clusters, args.orthologs, args.cazy, args.ec, # Cutoffs args.evalue, args.bit, args.id, args.aln_query, args.aln_reference, args.c, args.cut_ga, args.cut_nc, args.cut_tc, args.cut_ko, args.inflation, args.chunk_number, args.chunk_max, args.count_domains, # Parameters args.threads, args.parallel, args.suffix, args.light) annotate.annotate_pipeline(args.genome_directory, args.protein_directory, args.genome_files, args.protein_files) elif args.subparser_name == self.CLASSIFY: self._check_classify(args) classify = Classify() classify.classify_pipeline(args.custom_modules, args.cutoff, args.aggregate, args.genome_and_annotation_matrix, args.output) elif args.subparser_name == self.ENRICHMENT: self._check_enrichment(args) enrichment = Enrichment() enrichment.enrichment_pipeline(# Input options args.annotate_output, args.annotation_matrix, args.metadata, args.abundance, args.abundance_metadata, args.transcriptome, args.transcriptome_metadata, # Runtime options args.pval_cutoff, args.proportions_cutoff, args.threshold, args.multi_test_correction, args.batchfile, args.processes, args.allow_negative_values, args.ko, args.pfam, args.tigrfam, args.cluster, args.ortholog, args.cazy, args.ec, args.ko_hmm, # Outputs args.output) elif(args.subparser_name == NetworkAnalyser.PATHWAY or args.subparser_name == NetworkAnalyser.EXPLORE): self._check_network(args) network_analyser=NetworkAnalyser() network_analyser.network_pipeline(args.subparser_name, args.matrix, args.genome_metadata, args.tpm_values, args.tpm_metadata, args.abundance, args.abundance_metadata, args.metabolome, args.enrichment_output, args.depth, args.filter, args.limit, args.queries, args.output) if args.subparser_name == self.PREDICT: self._check_predict(args) predict = Predict() predict.predict_pipeline(args.forester_model_directory, args.input_matrix, args.output) elif args.subparser_name == self.GENERATE: self._check_generate(args) generate_model = GenerateModel() generate_model.generate_pipeline(args.input_matrix, args.groups, args.model_type, args.testing_portion, args.grid_search, args.threads, args.output) elif args.subparser_name == self.USES: self._check_uses(args) uses = Uses() uses.uses_pipeline(args.compounds_list, args.annotation_matrix, args.metadata, args.output, args.count) logging.info('Finished running EnrichM')
def main(self, args, command): ''' Parameters ---------- Output ------ ''' self._check_general(args) self._logging_setup(args) logging.info("Running command: %s" % ' '.join(command)) if args.subparser_name == self.DATA: d = Data() d.do(args.uninstall) if args.subparser_name == self.ANNOTATE: self._check_annotate(args) a = Annotate(# Define inputs and outputs args.output, # Define type of annotation to be carried out args.ko, args.pfam, args.tigrfam, args.hypothetical, args.cazy, # Cutoffs args.evalue, args.bit, args.id, args.aln_query, args.aln_reference, args.c, args.cut_ga, args.cut_nc, args.cut_tc, args.inflation, args.chunk_number, args.chunk_max, # Parameters args.threads, args.parallel, args.suffix, args.light) a.do(args.genome_directory, args.protein_directory, args.genome_files, args.protein_files) elif args.subparser_name == self.CLASSIFY: self._check_classify(args) c = Classify() c.do(args.custom_modules, args.cutoff, args.genome_and_annotation_file, args.genome_and_annotation_matrix, args.output) elif args.subparser_name == self.ENRICHMENT: self._check_enrichment(args) e = Enrichment() e.do(# Input options args.annotate_output, args.metadata, args.modules, args.abundances, # Runtime options args.genomes_to_compare_with_group, args.pval_cutoff, args.proportions_cutoff, args.threshold, args.multi_test_correction, args.batchfile, args.processes, args.ko, args.pfam, args.tigrfam, args.hypothetical, args.cazy, # Outputs args.output) elif args.subparser_name == self.CONNECT: self._check_connect(args) c = Connect() c.do(args.annotate_output, args.metadata, args.custom_modules, args.cutoff, args.output) elif(args.subparser_name == NetworkAnalyser.PATHWAY or args.subparser_name == NetworkAnalyser.EXPLORE or args.subparser_name == NetworkAnalyser.TRAVERSE): self._check_network(args) na=NetworkAnalyser(args.metadata) na.do(args.matrix, args.transcriptome, args.metabolome, args.depth, args.filter, args.limit, args.queries, args.subparser_name, args.starting_compounds, args.steps, args.number_of_queries, args.output) if args.subparser_name == self.PREDICT: self._check_predict(args) p = Predict() p.do(args.forester_model_directory, args.input_matrix, args.output) elif args.subparser_name == self.GENERATE: self._check_generate(args) gm = GenerateModel() gm.do(args.input_matrix, args.groups, args.model_type, args.testing_portion, args.grid_search, args.threads, args.output) logging.info('Done!')
class Tests(unittest.TestCase): genome_annotation_simple_example = { "genome_1": { "K00001": 1, "K00002": 2 }, "genome_2": { "K00003": 1 }, "genome_3": { "K00001": 5, "K00002": 4, "K00003": 5 } } genome_groups_simple_example = { "group_1": ["genome_1"], "group_2": ["genome_2", "genome_3"] } sample_abundance = { "sample_1": { "genome_1": 1.0, "genome_2": 0.5, "genome_3": 3.0 }, "sample_2": { "genome_1": 0.5, "genome_2": 1.2, "genome_3": 5.0 }, "sample_3": { "genome_1": 0.1, "genome_2": 1.1, "genome_3": 6.0 }, "sample_4": { "genome_1": 5.0, "genome_2": 5.2, "genome_3": 0.2 }, "sample_5": { "genome_1": 6.0, "genome_2": 4.9, "genome_3": 0.1 }, "sample_6": { "genome_1": 7.0, "genome_2": 5.0, "genome_3": 0.0 } } sample_groups = { "sample_group_1": ["sample_1", "sample_2", "sample_3"], "sample_group_2": ["sample_4", "sample_5", "sample_6"] } genomes = ["genome_1", "genome_2", "genome_3"] annotations = ["K00001", "K00002", "K00003"] enrichment_test_object = Enrichment() def test_check_annotation_type(self): pfam = ['PF10117'] self.assertEqual(Enrichment().check_annotation_type(pfam), Enrichment.PFAM) cazy = ['GH42'] self.assertEqual(Enrichment().check_annotation_type(cazy), Enrichment.CAZY) tigrfam = ['TIGR00008'] self.assertEqual(Enrichment().check_annotation_type(tigrfam), Enrichment.TIGRFAM) ko = ['K00399'] self.assertEqual(Enrichment().check_annotation_type(ko), Enrichment.KEGG) ec = ['1.2.3.4'] self.assertEqual(Enrichment().check_annotation_type(ec), Enrichment.EC) def test_calculate_portions(self): expected = [['Annotation', 'group_1', 'group_2'], ['K00001', '1.0', '0.5'], ['K00002', '1.0', '0.5'], ['K00003', '0.0', '1.0']] result = self.enrichment_test_object.calculate_portions( self.annotations, self.genome_groups_simple_example, self.genome_annotation_simple_example, self.genomes, 1) self.assertEqual(result, expected) def test_enrichment_from_ko_matrix(self): tmp = tempfile.mkdtemp() expected_output = os.path.join(path_to_data, 'enrichm_enrichment_ko') metadata = os.path.join(path_to_data, 'metadata.tsv') cmd = '%s enrichment --annotate_output %s --metadata %s --output %s --force --ko --verbosity 1' \ % (path_to_script, path_to_annotate, metadata, tmp) subprocess.call(cmd, shell=True) self.assertTrue(filecmp.dircmp(tmp, expected_output)) # The pvalues are never exact - cannot compare files directly #for file in os.listdir(tmp): # if file == 'enrichment.log': continue # output_file = os.path.join(tmp, file) # expected_file = os.path.join(expected_output, file) # self.assertTrue(filecmp.cmp(output_file, expected_file)) def test_enrichment_from_pfam_matrix(self): tmp = tempfile.mkdtemp() expected_output = os.path.join(path_to_data, 'enrichm_enrichment_pfam') metadata = os.path.join(path_to_data, 'metadata.tsv') cmd = '%s enrichment --annotate_output %s --metadata %s --output %s --force --pfam --verbosity 1' \ % (path_to_script, path_to_annotate, metadata, tmp) subprocess.call(cmd, shell=True) self.assertTrue(filecmp.dircmp(tmp, expected_output)) # The pvalues are never exact - cannot compare files directly #for file in os.listdir(tmp): # if file == 'enrichment.log': continue # output_file = os.path.join(tmp, file) # expected_file = os.path.join(expected_output, file) # self.assertTrue(filecmp.cmp(output_file, expected_file)) def test_weight_annotation_matrix(self): expected = { 'sample_group_1': { 'K00001': [16.0, 25.5, 30.1], 'K00002': [14.0, 21.0, 24.2], 'K00003': [15.5, 26.2, 31.1] }, 'sample_group_2': { 'K00001': [6.0, 6.5, 7.0], 'K00002': [10.8, 12.4, 14.0], 'K00003': [6.2, 5.4, 5.0] } } result = self.enrichment_test_object.weight_annotation_matrix( self.sample_abundance, self.genome_annotation_simple_example, self.sample_groups, self.annotations) self.assertEqual(result, expected)