def __make_membership(self): """returns the seeded membership on demand""" if 'random_seed' in self['debug']: util.r_set_seed(10) new_membs = memb.create_membership(self.ratios, self.row_seeder, self.column_seeder, self.config_params) return new_membs
def setup_default(args, config_parser): """default configuration method""" # no organism provided -> dummy organism if args.organism is None: logging.warn("no organism provided - assuming that you want to score ratios only or don't use automatic download") if not args.rsat_dir: args.nomotifs = True if not args.string and not args.operons: args.nonetworks = True # user overrides in config files if args.config is not None: config_parser.read(args.config) # Initial configuration from default + user config params = set_config(config_parser) ratios = read_ratios(params, args) args.clusters_per_row = params['memb.clusters_per_row'] # debug options debug_options = set(args.debug.split(',')) if args.debug is not None else set() if 'dump_results' in debug_options: debug_options.add('keep_memeout') if debug_options == {'all'}: debug_options = ALL_DEBUG_OPTIONS """The overrides dictionary holds all the values that will overwrite or add to the settings defined in the default and user-defined ini files """ overrides = {'organism_code': args.organism, 'ratios_file': args.ratios, 'string_file': args.string, 'logfile': args.logfile, 'rsat_organism': args.rsat_organism, 'num_clusters': __num_clusters(config_parser, args, ratios), 'memb.clusters_per_row': args.clusters_per_row, 'remap_network_nodes': args.remap_network_nodes, 'ncbi_code': args.ncbi_code, 'operon_file': args.operons, 'rsat_dir': args.rsat_dir, 'rsat_base_url': args.rsat_base_url, 'rsat_features': args.rsat_features, 'rsat_organism': args.rsat_organism, 'rsat_dir': args.rsat_dir, 'use_operons': True, 'use_string': True, 'debug': debug_options, 'nomotifs': False, 'minimize_io': args.minimize_io, 'nonetworks': args.nonetworks, 'checkratios': args.checkratios, 'random_seed': args.random_seed, 'pipeline_file': args.pipeline, 'synonym_file': args.synonym_file, 'fasta_file': args.fasta_file, 'interactive': args.interactive, 'resume': args.resume, 'case_sensitive': args.case_sensitive, 'command_line': args.command_line, 'use_BSCM': args.use_BSCM, 'use_chi2': args.use_chi2, 'db_url': args.dburl} if overrides['random_seed'] is None: del overrides['random_seed'] if overrides['case_sensitive'] is None: del overrides['case_sensitive'] if overrides['pipeline_file'] is None: del overrides['pipeline_file'] if overrides['rsat_base_url'] is None: del overrides['rsat_base_url'] if overrides['db_url'] is None: del overrides['db_url'] # membership update default parameters # these come first, since a lot depends on clustering numbers num_clusters = overrides['num_clusters'] if ratios.num_columns >= 60: overrides['memb.clusters_per_col'] = int(round(num_clusters / 2.0)) else: overrides['memb.clusters_per_col'] = int(round(num_clusters * 2.0 / 3.0)) params['MEME']['version'] = meme.check_meme_version() overrides['nomotifs'] = args.nomotifs or not params['MEME']['version'] overrides['use_string'] = not args.nostring overrides['use_operons'] = not args.nooperons if args.num_cores is not None: overrides['num_cores'] = args.num_cores if args.out: overrides['output_dir'] = args.out if args.cachedir: overrides['cache_dir'] = args.cachedir if args.num_iterations is not None: overrides['num_iterations'] = args.num_iterations for key, value in overrides.items(): params[key] = value if params['random_seed'] is not None: random.seed(params['random_seed']) util.r_set_seed(params['random_seed']) params['out_database'] = os.path.join(params['output_dir'], params['dbfile_name']) params['new_data_file'] = False #It should only be possible to change data files in a resume # we return the args here, too, in case we have some parameters not # processed return args, params, ratios
set_config(cmonkey_run, config) cmonkey_run['output_dir'] = args.out cmonkey_run['cache_dir'] = args.cachedir cmonkey_run['debug'] = args.debug cmonkey_run['keep_memeout'] = args.keep_memeout or args.debug cmonkey_run['donetworks'] = not args.nonetworks cmonkey_run['domotifs'] = not args.nomotifs and cmonkey_run['meme_version'] cmonkey_run['use_string'] = not args.nostring cmonkey_run['use_operons'] = not args.nooperons if args.random_seed: cmonkey_run['random_seed'] = args.random_seed if cmonkey_run['random_seed']: random.seed(cmonkey_run['random_seed']) util.r_set_seed(cmonkey_run['random_seed']) proceed = True checkratios = args.checkratios if args.checkratios: thesaurus = cmonkey_run.organism().thesaurus() logging.info("Checking the quality of the input matrix names...") found = [name for name in matrix.row_names if name in thesaurus] num_found = len(found) total = len(matrix.row_names) percent = (float(num_found) / float(total)) * 100.0 proceed = percent > 50.0 # Set update frequency to every iteration, so the full results are written if cmonkey_run['debug']:
def arg_ext(argparser): """additional parameters added for ensembles""" argparser.add_argument('--ensemble_run_id', type=int, default=None) BGORDER = [None, 0, 1, 2, 3, 4, 5] if __name__ == '__main__': """process configuration""" args, params, ratios = conf.setup(arg_ext) if args.ensemble_run_id is not None: # setup for ensemble run params['random_seed'] = args.ensemble_run_id random.seed(params['random_seed']) util.r_set_seed(params['random_seed']) # these currently are experimental and very eco-centric params['num_clusters'] = random.randint(150, 550) # row, net scaling params['Rows']['scaling'] = ('scaling_const', random.uniform(0.0, 3.0)) params['Networks']['scaling'] = ('scaling_const', random.uniform(0.0, 1.0)) # MEME parameters maxw = random.randint(12, 30) # meme parameter mmotifs = random.randint(1, 3) motif_upstream_scan = (random.randint(-50, 0), random.randint(150, 250)) motif_upstream_search = (random.randint(-20, 0), random.randint(100, 200))
def setup_default(args, config_parser): """default configuration method""" # no organism provided -> dummy organism if args.organism is None: logging.warn( "no organism provided - assuming that you want to score ratios only or don't use automatic download" ) if not args.rsat_dir: args.nomotifs = True if not args.string and not args.operons: args.nonetworks = True # user overrides in config files if args.config is not None: config_parser.read(args.config) # Initial configuration from default + user config params = set_config(config_parser) ratios = read_ratios(params, args) args.clusters_per_row = params["memb.clusters_per_row"] # debug options debug_options = set(args.debug.split(",")) if args.debug is not None else set() if "dump_results" in debug_options: debug_options.add("keep_memeout") if debug_options == {"all"}: debug_options = ALL_DEBUG_OPTIONS """The overrides dictionary holds all the values that will overwrite or add to the settings defined in the default and user-defined ini files """ overrides = { "organism_code": args.organism, "ratios_file": args.ratios, "string_file": args.string, "logfile": args.logfile, "rsat_organism": args.rsat_organism, "num_clusters": __num_clusters(config_parser, args, ratios), "memb.clusters_per_row": args.clusters_per_row, "remap_network_nodes": args.remap_network_nodes, "ncbi_code": args.ncbi_code, "operon_file": args.operons, "rsat_dir": args.rsat_dir, "rsat_base_url": args.rsat_base_url, "rsat_features": args.rsat_features, "rsat_organism": args.rsat_organism, "rsat_dir": args.rsat_dir, "use_operons": True, "use_string": True, "debug": debug_options, "nomotifs": False, "minimize_io": args.minimize_io, "nonetworks": args.nonetworks, "checkratios": args.checkratios, "random_seed": args.random_seed, "pipeline_file": args.pipeline, "synonym_file": args.synonym_file, "fasta_file": args.fasta_file, "interactive": args.interactive, "resume": args.resume, "case_sensitive": args.case_sensitive, "command_line": args.command_line, "use_BSCM": args.use_BSCM, "use_chi2": args.use_chi2, } if overrides["random_seed"] is None: del overrides["random_seed"] if overrides["case_sensitive"] is None: del overrides["case_sensitive"] if overrides["pipeline_file"] is None: del overrides["pipeline_file"] if overrides["rsat_base_url"] is None: del overrides["rsat_base_url"] # membership update default parameters # these come first, since a lot depends on clustering numbers num_clusters = overrides["num_clusters"] if ratios.num_columns >= 60: overrides["memb.clusters_per_col"] = int(round(num_clusters / 2.0)) else: overrides["memb.clusters_per_col"] = int(round(num_clusters * 2.0 / 3.0)) params["MEME"]["version"] = meme.check_meme_version() overrides["nomotifs"] = args.nomotifs or not params["MEME"]["version"] overrides["use_string"] = not args.nostring overrides["use_operons"] = not args.nooperons if args.num_cores is not None: overrides["num_cores"] = args.num_cores if args.out: overrides["output_dir"] = args.out if args.cachedir: overrides["cache_dir"] = args.cachedir if args.num_iterations is not None: overrides["num_iterations"] = args.num_iterations for key, value in overrides.items(): params[key] = value if params["random_seed"] is not None: random.seed(params["random_seed"]) util.r_set_seed(params["random_seed"]) params["out_database"] = os.path.join(params["output_dir"], params["dbfile_name"]) params["new_data_file"] = False # It should only be possible to change data files in a resume # we return the args here, too, in case we have some parameters not # processed return args, params, ratios