def test_motif_scoring(self): """tests the motif scoring in integration""" search_distances = {'upstream': (-20, 150)} scan_distances = {'upstream': (-30, 250)} matrix_factory = dm.DataMatrixFactory([dm.nochange_filter, dm.center_scale_filter]) infile = util.read_dfile('example_data/hal/halo_ratios5.tsv', has_header=True, quote='\"') ratio_matrix = matrix_factory.create_from(infile) meme_suite = meme.MemeSuite430(remove_tempfiles=True) sequence_filters = [ motif.unique_filter, motif.get_remove_low_complexity_filter(meme_suite), motif.get_remove_atgs_filter(search_distances['upstream'])] organism = make_halo(ratio_matrix, search_distances, scan_distances) membership = FakeMembership() config_params = {'memb.min_cluster_rows_allowed': 3, 'memb.max_cluster_rows_allowed': 70, 'multiprocessing': False, 'num_clusters': 1, 'output_dir': 'out', 'debug': False, 'num_iterations': 2000} func = motif.MemeScoringFunction(organism, membership, ratio_matrix, meme_suite, sequence_filters=sequence_filters, scaling_func=lambda iter: 1.0, num_motif_func=lambda iter: 1, update_in_iteration=lambda x: True, motif_in_iteration=lambda x: True, config_params=config_params) iteration_result = { 'iteration': 100 } matrix = func.compute(iteration_result) """
def test_meme_simple(self): """simplest of all: just run meme and parse the output, just tests if there will be appropriate output for the input""" meme_suite = meme.MemeSuite430() motif_infos, out = meme_suite.meme('testdata/meme_input1.fasta', 'testdata/meme1.bg', num_motifs=1) self.assertEquals(1, len(motif_infos)) self.assertEquals(24, motif_infos[0].width) self.assertEquals(3, motif_infos[0].num_sites) self.assertEquals(79, motif_infos[0].llr) self.assertAlmostEquals(1700, motif_infos[0].evalue)
def __setup_meme_suite(self, config_params): background_file = None bgmodel = None meme_version = config_params['MEME']['version'] search_distance = config_params['search_distances'][self.seqtype] if config_params['MEME']['global_background'] == 'True': background_file, bgmodel = meme.global_background_file( self.organism, self.ratios.row_names, self.seqtype, bgorder=int(self.config_params['MEME']['background_order'])) # store background in results database conn = sqlite3.connect(config_params['out_database'], 15, isolation_level='DEFERRED') for order in bgmodel: for subseq, pvalue in order.items(): conn.execute( 'insert into global_background (subsequence, pvalue) values (?,?)', (subseq, pvalue)) conn.commit() conn.close() if meme_version == '4.3.0': self.meme_suite = meme.MemeSuite430( self.config_params, background_file=background_file, bgmodel=bgmodel) elif meme_version and (meme_version.startswith('4.8') or meme_version.startswith('4.9') or meme_version.startswith('4.10')): self.meme_suite = meme.MemeSuite481( self.config_params, background_file=background_file, bgmodel=bgmodel) else: logging.error("MEME version %s currently not supported !", meme_version) raise Exception("unsupported MEME version: '%s'" % meme_version) self.__sequence_filters = [ unique_filter, get_remove_low_complexity_filter(self.meme_suite), get_remove_atgs_filter(search_distance) ]
def test_meme_simple(self): """simplest of all: just run meme and parse the output, just tests if there will be appropriate output for the input""" meme_suite = meme.MemeSuite430({ 'MEME': { 'max_width': 24, 'background_order': 3, 'use_revcomp': 'True', 'arg_mod': 'zoops' } }) motif_infos, out = meme_suite.meme('testdata/meme_input1.fasta', 'testdata/meme1.bg', num_motifs=1) self.assertEquals(1, len(motif_infos)) self.assertEquals(24, motif_infos[0].width) self.assertEquals(3, motif_infos[0].num_sites) self.assertEquals(79, motif_infos[0].llr) self.assertAlmostEquals(1700, motif_infos[0].evalue)
def test_motif_scoring(self): meme_suite = meme.MemeSuite430({ 'MEME': { 'max_width': 24, 'background_order': 3, 'use_revcomp': 'True', 'arg_mod': 'zoops' } }) sequence_filters = [ motif.unique_filter, motif.get_remove_low_complexity_filter(meme_suite), motif.get_remove_atgs_filter(self.search_distances['upstream']) ] motif_scoring = motif.MemeScoringFunction( self.organism, self.membership, self.ratio_matrix, config_params=self.config_params) motscores = motif_scoring.compute( self.iteration_result).sorted_by_row_name() motscores.fix_extreme_values() ref_motscores = read_matrix('testdata/ref_motscores.tsv') self.assertTrue(check_matrix_values(motscores, ref_motscores))
def test_motif_scoring(self): meme_suite = meme.MemeSuite430() sequence_filters = [ motif.unique_filter, motif.get_remove_low_complexity_filter(meme_suite), motif.get_remove_atgs_filter(self.search_distances['upstream']) ] motif_scaling_fun = get_default_motif_scaling(2000, offset=0) motif_scoring = motif.MemeScoringFunction( self.organism, self.membership, self.ratio_matrix, meme_suite, sequence_filters=sequence_filters, scaling_func=motif_scaling_fun, num_motif_func=lambda iteration: 1, update_in_iteration=lambda x: True, motif_in_iteration=lambda x: True, config_params=self.config_params) motscores = motif_scoring.compute( self.iteration_result).sorted_by_row_name() motscores.fix_extreme_values() ref_motscores = read_matrix('testdata/motscores_fixed.tsv') self.assertTrue(check_matrix_values(motscores, ref_motscores))
def make_row_scoring(self): """makes a row scoring function on demand""" # Default row scoring functions row_scaling_fun = scoring.get_scaling(self, 'row_') row_scoring = microarray.RowScoringFunction( self.organism(), self.membership(), self.ratio_matrix, scaling_func=row_scaling_fun, schedule=self["row_schedule"], config_params=self.config_params) row_scoring_functions = [row_scoring] if self['domotifs']: background_file = None if self['global_background']: background_file = meme.global_background_file( self.organism(), self.ratio_matrix.row_names, self['sequence_types'][0]) if self['meme_version'] == '4.3.0': meme_suite = meme.MemeSuite430(background_file=background_file) elif (self['meme_version'] and (self['meme_version'].startswith('4.8') or self['meme_version'].startswith('4.9'))): meme_suite = meme.MemeSuite481(background_file=background_file) else: logging.error("MEME version %s currently not supported !", self['meme_version']) raise Exception("unsupported MEME version") sequence_filters = [ motif.unique_filter, motif.get_remove_low_complexity_filter(meme_suite), motif.get_remove_atgs_filter( self['search_distances']['upstream']) ] motif_scaling_fun = scoring.get_scaling(self, 'motif_') nmotif_fun = motif.num_meme_motif_fun(self) motif_scoring = motif.MemeScoringFunction( self.organism(), self.membership(), self.ratio_matrix, meme_suite, sequence_filters=sequence_filters, scaling_func=motif_scaling_fun, num_motif_func=nmotif_fun, update_in_iteration=self['motif_schedule'], motif_in_iteration=self['meme_schedule'], config_params=self.config_params) row_scoring_functions.append(motif_scoring) if self['donetworks']: network_scaling_fun = scoring.get_scaling(self, 'network_') network_scoring = nw.ScoringFunction( self.organism(), self.membership(), self.ratio_matrix, scaling_func=network_scaling_fun, schedule=self['network_schedule'], config_params=self.config_params) row_scoring_functions.append(network_scoring) return scoring.ScoringFunctionCombiner( self.organism(), self.membership(), row_scoring_functions, config_params=self.config_params)