Пример #1
0
    def __make_membership(self):
        """returns the seeded membership on demand"""
        if 'random_seed' in self['debug']:
            util.r_set_seed(10)

        new_membs = memb.create_membership(self.ratios,
                               self.row_seeder, self.column_seeder,
                               self.config_params)
        return new_membs
Пример #2
0
    def __make_membership(self):
        """returns the seeded membership on demand"""
        if 'random_seed' in self['debug']:
            util.r_set_seed(10)

        new_membs = memb.create_membership(self.ratios, self.row_seeder,
                                           self.column_seeder,
                                           self.config_params)
        return new_membs
Пример #3
0
def setup_default(args, config_parser):
    """default configuration method"""
    # no organism provided -> dummy organism
    if args.organism is None:
        logging.warn("no organism provided - assuming that you want to score ratios only or don't use automatic download")
        if not args.rsat_dir:
            args.nomotifs = True
        if not args.string and not args.operons:
            args.nonetworks = True

    # user overrides in config files
    if args.config is not None:
        config_parser.read(args.config)

    # Initial configuration from default + user config
    params = set_config(config_parser)
    ratios = read_ratios(params, args)
    args.clusters_per_row = params['memb.clusters_per_row']

    # debug options
    debug_options = set(args.debug.split(',')) if args.debug is not None else set()
    if 'dump_results' in debug_options:
        debug_options.add('keep_memeout')
    if debug_options == {'all'}:
        debug_options = ALL_DEBUG_OPTIONS

    """The overrides dictionary holds all the values that will overwrite or add
    to the settings defined in the default and user-defined ini files
    """
    overrides = {'organism_code': args.organism,
                 'ratios_file': args.ratios,
                 'string_file': args.string,
                 'logfile': args.logfile,
                 'rsat_organism': args.rsat_organism,
                 'num_clusters': __num_clusters(config_parser, args, ratios),
                 'memb.clusters_per_row': args.clusters_per_row,
                 'remap_network_nodes': args.remap_network_nodes,
                 'ncbi_code': args.ncbi_code,
                 'operon_file': args.operons,
                 'rsat_dir': args.rsat_dir,
                 'rsat_base_url': args.rsat_base_url,
                 'rsat_features': args.rsat_features,
                 'rsat_organism': args.rsat_organism,
                 'rsat_dir': args.rsat_dir,
                 'use_operons': True,
                 'use_string': True,
                 'debug': debug_options,
                 'nomotifs': False,
                 'minimize_io': args.minimize_io,
                 'nonetworks': args.nonetworks,
                 'checkratios': args.checkratios,
                 'random_seed': args.random_seed,
                 'pipeline_file': args.pipeline,
                 'synonym_file': args.synonym_file,
                 'fasta_file': args.fasta_file,
                 'interactive': args.interactive,
                 'resume': args.resume,
                 'case_sensitive': args.case_sensitive,
                 'command_line': args.command_line,
                 'use_BSCM': args.use_BSCM,
                 'use_chi2': args.use_chi2,
                 'db_url': args.dburl}

    if overrides['random_seed'] is None:
        del overrides['random_seed']
    if overrides['case_sensitive'] is None:
        del overrides['case_sensitive']
    if overrides['pipeline_file'] is None:
        del overrides['pipeline_file']
    if overrides['rsat_base_url'] is None:
        del overrides['rsat_base_url']
    if overrides['db_url'] is None:
        del overrides['db_url']

    # membership update default parameters
    # these come first, since a lot depends on clustering numbers
    num_clusters = overrides['num_clusters']
    if ratios.num_columns >= 60:
        overrides['memb.clusters_per_col'] = int(round(num_clusters / 2.0))
    else:
        overrides['memb.clusters_per_col'] = int(round(num_clusters * 2.0 / 3.0))

    params['MEME']['version'] = meme.check_meme_version()
    overrides['nomotifs'] = args.nomotifs or not params['MEME']['version']
    overrides['use_string'] = not args.nostring
    overrides['use_operons'] = not args.nooperons

    if args.num_cores is not None:
        overrides['num_cores'] = args.num_cores
    if args.out:
        overrides['output_dir'] = args.out
    if args.cachedir:
        overrides['cache_dir'] = args.cachedir

    if args.num_iterations is not None:
        overrides['num_iterations'] = args.num_iterations

    for key, value in overrides.items():
        params[key] = value

    if params['random_seed'] is not None:
        random.seed(params['random_seed'])
        util.r_set_seed(params['random_seed'])

    params['out_database'] = os.path.join(params['output_dir'], params['dbfile_name'])

    params['new_data_file'] = False #It should only be possible to change data files in a resume
    # we return the args here, too, in case we have some parameters not
    # processed
    return args, params, ratios
Пример #4
0
    set_config(cmonkey_run, config)

    cmonkey_run['output_dir'] = args.out
    cmonkey_run['cache_dir'] = args.cachedir
    cmonkey_run['debug'] = args.debug
    cmonkey_run['keep_memeout'] = args.keep_memeout or args.debug
    cmonkey_run['donetworks'] = not args.nonetworks
    cmonkey_run['domotifs'] = not args.nomotifs and cmonkey_run['meme_version']
    cmonkey_run['use_string'] = not args.nostring
    cmonkey_run['use_operons'] = not args.nooperons
    if args.random_seed:
        cmonkey_run['random_seed'] = args.random_seed

    if cmonkey_run['random_seed']:
        random.seed(cmonkey_run['random_seed'])
        util.r_set_seed(cmonkey_run['random_seed'])

    proceed = True
    checkratios = args.checkratios

    if args.checkratios:
        thesaurus = cmonkey_run.organism().thesaurus()
        logging.info("Checking the quality of the input matrix names...")
        found = [name for name in matrix.row_names if name in thesaurus]
        num_found = len(found)
        total = len(matrix.row_names)
        percent = (float(num_found) / float(total)) * 100.0
        proceed = percent > 50.0

    # Set update frequency to every iteration, so the full results are written
    if cmonkey_run['debug']:
Пример #5
0
def arg_ext(argparser):
    """additional parameters added for ensembles"""
    argparser.add_argument('--ensemble_run_id', type=int, default=None)


BGORDER = [None, 0, 1, 2, 3, 4, 5]

if __name__ == '__main__':
    """process configuration"""
    args, params, ratios = conf.setup(arg_ext)

    if args.ensemble_run_id is not None:
        # setup for ensemble run
        params['random_seed'] = args.ensemble_run_id
        random.seed(params['random_seed'])
        util.r_set_seed(params['random_seed'])

        # these currently are experimental and very eco-centric
        params['num_clusters'] = random.randint(150, 550)
        # row, net scaling
        params['Rows']['scaling'] = ('scaling_const', random.uniform(0.0, 3.0))
        params['Networks']['scaling'] = ('scaling_const',
                                         random.uniform(0.0, 1.0))

        # MEME parameters
        maxw = random.randint(12, 30)  # meme parameter
        mmotifs = random.randint(1, 3)
        motif_upstream_scan = (random.randint(-50,
                                              0), random.randint(150, 250))
        motif_upstream_search = (random.randint(-20,
                                                0), random.randint(100, 200))
Пример #6
0
    set_config(cmonkey_run, config)

    cmonkey_run['output_dir'] = args.out
    cmonkey_run['cache_dir'] = args.cachedir
    cmonkey_run['debug'] = args.debug
    cmonkey_run['keep_memeout'] = args.keep_memeout or args.debug
    cmonkey_run['donetworks'] = not args.nonetworks
    cmonkey_run['domotifs'] = not args.nomotifs and cmonkey_run['meme_version']
    cmonkey_run['use_string'] = not args.nostring
    cmonkey_run['use_operons'] = not args.nooperons
    if args.random_seed:
        cmonkey_run['random_seed'] = args.random_seed

    if cmonkey_run['random_seed']:
        random.seed(cmonkey_run['random_seed'])
        util.r_set_seed(cmonkey_run['random_seed'])

    proceed = True
    checkratios = args.checkratios

    if args.checkratios:
        thesaurus = cmonkey_run.organism().thesaurus()
        logging.info("Checking the quality of the input matrix names...")
        found = [name for name in matrix.row_names if name in thesaurus]
        num_found = len(found)
        total = len(matrix.row_names)
        percent = (float(num_found) / float(total)) * 100.0
        proceed = percent > 50.0

    # Set update frequency to every iteration, so the full results are written
    if cmonkey_run['debug']:
Пример #7
0
def setup_default(args, config_parser):
    """default configuration method"""
    # no organism provided -> dummy organism
    if args.organism is None:
        logging.warn(
            "no organism provided - assuming that you want to score ratios only or don't use automatic download"
        )
        if not args.rsat_dir:
            args.nomotifs = True
        if not args.string and not args.operons:
            args.nonetworks = True

    # user overrides in config files
    if args.config is not None:
        config_parser.read(args.config)

    # Initial configuration from default + user config
    params = set_config(config_parser)
    ratios = read_ratios(params, args)
    args.clusters_per_row = params["memb.clusters_per_row"]

    # debug options
    debug_options = set(args.debug.split(",")) if args.debug is not None else set()
    if "dump_results" in debug_options:
        debug_options.add("keep_memeout")
    if debug_options == {"all"}:
        debug_options = ALL_DEBUG_OPTIONS

    """The overrides dictionary holds all the values that will overwrite or add
    to the settings defined in the default and user-defined ini files
    """
    overrides = {
        "organism_code": args.organism,
        "ratios_file": args.ratios,
        "string_file": args.string,
        "logfile": args.logfile,
        "rsat_organism": args.rsat_organism,
        "num_clusters": __num_clusters(config_parser, args, ratios),
        "memb.clusters_per_row": args.clusters_per_row,
        "remap_network_nodes": args.remap_network_nodes,
        "ncbi_code": args.ncbi_code,
        "operon_file": args.operons,
        "rsat_dir": args.rsat_dir,
        "rsat_base_url": args.rsat_base_url,
        "rsat_features": args.rsat_features,
        "rsat_organism": args.rsat_organism,
        "rsat_dir": args.rsat_dir,
        "use_operons": True,
        "use_string": True,
        "debug": debug_options,
        "nomotifs": False,
        "minimize_io": args.minimize_io,
        "nonetworks": args.nonetworks,
        "checkratios": args.checkratios,
        "random_seed": args.random_seed,
        "pipeline_file": args.pipeline,
        "synonym_file": args.synonym_file,
        "fasta_file": args.fasta_file,
        "interactive": args.interactive,
        "resume": args.resume,
        "case_sensitive": args.case_sensitive,
        "command_line": args.command_line,
        "use_BSCM": args.use_BSCM,
        "use_chi2": args.use_chi2,
    }

    if overrides["random_seed"] is None:
        del overrides["random_seed"]
    if overrides["case_sensitive"] is None:
        del overrides["case_sensitive"]
    if overrides["pipeline_file"] is None:
        del overrides["pipeline_file"]
    if overrides["rsat_base_url"] is None:
        del overrides["rsat_base_url"]

    # membership update default parameters
    # these come first, since a lot depends on clustering numbers
    num_clusters = overrides["num_clusters"]
    if ratios.num_columns >= 60:
        overrides["memb.clusters_per_col"] = int(round(num_clusters / 2.0))
    else:
        overrides["memb.clusters_per_col"] = int(round(num_clusters * 2.0 / 3.0))

    params["MEME"]["version"] = meme.check_meme_version()
    overrides["nomotifs"] = args.nomotifs or not params["MEME"]["version"]
    overrides["use_string"] = not args.nostring
    overrides["use_operons"] = not args.nooperons

    if args.num_cores is not None:
        overrides["num_cores"] = args.num_cores
    if args.out:
        overrides["output_dir"] = args.out
    if args.cachedir:
        overrides["cache_dir"] = args.cachedir

    if args.num_iterations is not None:
        overrides["num_iterations"] = args.num_iterations

    for key, value in overrides.items():
        params[key] = value

    if params["random_seed"] is not None:
        random.seed(params["random_seed"])
        util.r_set_seed(params["random_seed"])

    params["out_database"] = os.path.join(params["output_dir"], params["dbfile_name"])

    params["new_data_file"] = False  # It should only be possible to change data files in a resume
    # we return the args here, too, in case we have some parameters not
    # processed
    return args, params, ratios