def foo(uid, a, b, c, d, e=1, epoch=0, experiment_name=NAMESPACE, client=None):
    result = a + 2 * b - c**2 + d + e
    client = new_client(URI, DATABASE)
    for i in range(epoch + 1):
        data = {'obj': i + result, 'valid': i + result, 'uid': uid, 'epoch': i}
        client.push(METRIC_QUEUE, experiment_name, data, mtype=METRIC_ITEM)
    return result + i
示例#2
0
def run(uri,
        database,
        namespace,
        function,
        num_experiments,
        num_repro,
        objective,
        variables,
        defaults,
        params,
        resumable,
        sleep_time=60,
        save_dir='.'):

    client = new_client(uri, database)

    defaults.update(dict(list(variables.items()) + list(params.items())))

    configs = generate(num_experiments, num_repro, objective,
                       list(sorted(variables)), defaults, resumable)
    register(client, function, namespace, configs)

    wait(client, namespace, sleep=sleep_time)

    data = fetch_results(client, namespace, configs, list(sorted(variables)),
                         params, defaults)

    save_results(namespace, data, save_dir)

    test(data, num_experiments, num_repro, objective, variables, resumable)
示例#3
0
def run(uri, database, namespace, function, fidelity, space, count, variables, 
        plot_filename, objective, defaults, save_dir='.', sleep_time=60, 
        register=True):
    if fidelity is None:
        fidelity = Fidelity(1, 1, name='epoch').to_dict()

    defaults.update(variables)

    config = {
        'name': 'random_search',
        'fidelity': fidelity,
        'space': space,
        'count': count
        }

    client = new_client(uri, database)

    if not is_registered(client, namespace) and register:
        register_hpo(client, namespace, function, config, defaults=defaults)

    while not is_hpo_completed(client, namespace):
        print_status(client, namespace)
        time.sleep(sleep_time)

    # get the result of the HPO
    print(f'HPO is done')
    data = fetch_hpo_valid_curves(client, namespace, list(sorted(variables.keys())))
    save_results(namespace, data, save_dir)

    plot(space, objective, data, plot_filename, model_seed=1)
示例#4
0
def my_important_transaction():
    # Nothing is done there, everything is lazy
    client = RecordQueue()
    client.do_something_1()
    time.sleep(5)
    client.do_something_2()

    # Our Transaction is running there
    client.execute(FakeClient(new_client(URI, DATABASE)))
示例#5
0
    def __init__(self, uri=None, database=None, experiment=None, client=None):
        if ERROR is not None:
            raise ERROR

        self.experiment = experiment
        if client is None:
            client = new_client(uri, database)
        self.client = client
        self.uid = None
示例#6
0
    def __enter__(self):
        self.server = new_server(uri=self.uri, database=DATABASE)
        try:
            self.server.start(wait=True)
        except Exception as e:
            self.server.stop()
            shutil.rmtree('/tmp/queue/', ignore_errors=True)
            raise e

        self.client = new_client(self.uri, DATABASE, 'client-test')
        self.monitor = self.client.monitor()
        return self
示例#7
0
def test_hpo_serializable(model_type):
    namespace = 'test-robo-' + model_type
    n_init = 2
    count = 10

    # First run using a remote worker where serialization is necessary
    # and for which hpo is resumed between each braning call
    hpo = build_robo(model_type, n_init=n_init, count=count)

    namespace = 'test_hpo_serializable'
    hpo = {
        'hpo': make_remote_call(HPOptimizer, **hpo.kwargs),
        'hpo_state': None,
        'work': make_remote_call(branin),
        'experiment': namespace
    }
    client = new_client(URI, DATABASE)
    client.push(WORK_QUEUE, namespace, message=hpo, mtype=HPO_ITEM)
    worker = TrialWorker(URI, DATABASE, 0, None)
    worker.max_retry = 0
    worker.timeout = 1
    worker.run()

    messages = client.monitor().unread_messages(RESULT_QUEUE, namespace)
    for m in messages:
        if m.mtype == HPO_ITEM:
            break

    assert m.mtype == HPO_ITEM, 'HPO not completed'
    worker_hpo = build_robo(model_type)
    worker_hpo.load_state_dict(m.message['hpo_state'])
    assert len(worker_hpo.trials) == count

    # Then run locally where BO is not resumed
    local_hpo = build_robo(model_type, n_init=n_init, count=count)
    i = 0
    best = float('inf')
    while local_hpo.remaining() and i < local_hpo.hpo.count:
        samples = local_hpo.suggest()
        for sample in samples:
            z = branin(**sample)
            local_hpo.observe(sample['uid'], z)
            best = min(z, best)
            i += 1

    assert i == local_hpo.hpo.count

    # Although remote worker was resumed many times, it should give the same
    # results as the local one which was executed in a single run.
    assert worker_hpo.trials == local_hpo.trials
示例#8
0
def test_check_sigkill_nothing_happened(signal):
    client = new_client(URI, DATABASE)
    client.db[QUEUE].drop()
    client.push(QUEUE, NAMESPACE, {'my_work': 0})

    p = Process(target=my_important_transaction)

    p.start()
    time.sleep(1)
    os.kill(p.pid, signal)

    # Nothing was done, the process died before the transaction
    assert not client.monitor().messages(QUEUE, NAMESPACE)[0].read
    assert not client.monitor().messages(QUEUE, NAMESPACE)[0].actioned
示例#9
0
文件: worker.py 项目: bouthilx/cqueue
 def __init__(self, queue_uri, database, namespace, worker_id, work_queue, result_queue=None):
     self.uri = queue_uri
     self.namespace = namespace
     self.client: MessageQueue = new_client(queue_uri, database)
     self.running = False
     self.work_id = worker_id
     self.broker = None
     self.work_queue = work_queue
     self.result_queue = result_queue
     self.context = {}
     self.client.name = f'worker-{self.work_id}'
     self.namespaced = True
     self.timeout = 5 * 60
     self.max_retry = 3
     self.dispatcher = {
         SHUTDOWN: self.shutdown_worker
     }
示例#10
0
def test_check_sigterm_everything_finished(signal):
    client = new_client(URI, DATABASE)
    client.db[QUEUE].drop()
    client.push(QUEUE, NAMESPACE, {'my_work': 0})

    p = Process(target=my_important_transaction)

    p.start()
    time.sleep(10)
    # Kill during the transaction
    os.kill(p.pid, signal)

    # Should not be able to kill it until the end of the thread
    p.join()

    assert client.monitor().messages(QUEUE, NAMESPACE)[0].read
    assert client.monitor().messages(QUEUE, NAMESPACE)[0].actioned
示例#11
0
def run(uri,
        database,
        namespace,
        function,
        objective,
        medians,
        defaults,
        variables,
        params,
        num_experiments,
        add_reference,
        sleep_time=60,
        save_dir='.'):
    if num_experiments is None:
        num_experiments = 20

    client = new_client(uri, database)

    defaults.update(dict(list(variables.items()) + list(params.items())))

    configs = generate(range(num_experiments),
                       medians,
                       defaults,
                       add_reference=False)
    register(client, function, namespace, configs)

    wait(client, namespace, sleep=sleep_time)

    data = fetch_results(client, namespace, configs, medians, params, defaults)
    defaults.update(get_medians(data, medians, objective))
    new_configs = generate(range(num_experiments),
                           variables,
                           defaults,
                           add_reference=add_reference)
    register(client, function, namespace, new_configs)

    wait(client, namespace, sleep=5)

    configs.update(new_configs)
    data = fetch_results(client, namespace, configs, variables, params,
                         defaults)

    save_results(namespace, data, save_dir)
示例#12
0
    def __init__(self,
                 uri,
                 database,
                 experiment,
                 clean=False,
                 launch_server=False):
        # Start a message broker
        self.database = database
        self.uri = uri

        self.broker = None
        if launch_server:
            self.broker = new_server(uri, database)
            self.broker.start()

        self.client = new_client(uri, database)
        self.client.name = 'group-leader'
        self.experiment = experiment
        self.workers = []

        if clean:
            self.clear_queue()
示例#13
0
    def __init__(self,
                 hpo,
                 rank,
                 uri,
                 experiment,
                 database=option('olympus.database', 'olympus')):
        self.hpo = hpo
        self.experiment = experiment
        self.client = new_client(uri, database)
        self.current_message = None

        # check that HPO is not finished
        state = self._fetch_final_state()
        if state is not None:
            raise ExperimentFinished(
                f'Experiment `{experiment}` is finished, change the experiment name'
            )

        # first worker queue HPO
        if rank == 0:
            self._queue_hpo()

        # broadcast that one worker is joining
        self.client.push(RESULT_QUEUE, self.experiment, {}, mtype=WORKER_JOIN)
示例#14
0
def main(argv=None):
    parser = argparse.ArgumentParser()
    parser.add_argument('--uri', default='mongo://127.0.0.1:27017', type=str)
    parser.add_argument('--database', default='olympus', type=str)
    parser.add_argument('--namespace', type=str)

    options = parser.parse_args(argv)

    client = new_client(options.uri, options.database)

    if options.namespace is None:
        print('Found')
        print(client.db[METRIC_QUEUE].count())
        print(client.db[WORK_QUEUE].count())
        print(client.db[RESULT_QUEUE].count())

        stats = client.db[WORK_QUEUE].aggregate([
            {
                '$project': {
                    'namespace': 1,
                }
            },
            {
                '$group': {
                    '_id': '$namespace',
                }
            },
        ])
        stats = sorted(doc['_id'] for doc in stats)

        if not stats:
            print(f'No namespace found for {options.namespace}')
            return 0

        print('\n'.join(stats))
        output = input(
            'Do you want to delete all matching namespaces above. (y/n):')

        if output != 'y':
            print('Cancel purge')
            return

        client.db[METRIC_QUEUE].drop()
        client.db[WORK_QUEUE].drop()
        client.db[RESULT_QUEUE].drop()

        print(client.db[METRIC_QUEUE].count())
        print(client.db[WORK_QUEUE].count())
        print(client.db[RESULT_QUEUE].count())

    else:
        query = {
            'namespace': {
                '$regex': re.compile(f"^{options.namespace}", re.IGNORECASE)
            }
        }
        stats = client.db[WORK_QUEUE].aggregate([
            {
                '$match': query
            },
            {
                '$project': {
                    'namespace': 1,
                }
            },
            {
                '$group': {
                    '_id': '$namespace',
                }
            },
        ])
        stats = sorted(doc['_id'] for doc in stats)

        if not stats:
            print(f'No namespace found for {options.namespace}')
            return 0

        print('\n'.join(stats))
        output = input(
            'Do you want to delete all matching namespaces above. (y/n):')

        if output != 'y':
            print('Cancel purge')
            return

        print('Found')
        print(client.db[METRIC_QUEUE].count(query))
        print(client.db[WORK_QUEUE].count(query))
        print(client.db[RESULT_QUEUE].count(query))

        client.db[METRIC_QUEUE].remove(query)
        client.db[WORK_QUEUE].remove(query)
        client.db[RESULT_QUEUE].remove(query)

        print('Now there is')
        print(client.db[METRIC_QUEUE].count(query))
        print(client.db[WORK_QUEUE].count(query))
        print(client.db[RESULT_QUEUE].count(query))
示例#15
0
def test_user_pass():
    uri = f'mongo://*****:*****@127.0.0.1:27017'
    client = new_client(uri, 'test')
    client.push(QUEUE, NAMESPACE, 'test')
    _ = client.pop(QUEUE, NAMESPACE)
def client():
    return new_client(URI, DATABASE)
示例#17
0
def main(argv=None):
    parser = argparse.ArgumentParser()
    parser.add_argument('--uri', default='mongo://127.0.0.1:27017', type=str)
    parser.add_argument('--database', default='olympus', type=str)
    parser.add_argument('--namespace', default=None, type=str)
    parser.add_argument('--test-only', action='store_true')
    parser.add_argument('--show-errors', action='store_true')

    args = parser.parse_args(argv)

    set_verbose_level(3)

    client = new_client(args.uri, args.database)

    query = {
        'namespace': {
            '$regex': re.compile(f"^{args.namespace}", re.IGNORECASE)
        }
    }
    stats = client.db[WORK_QUEUE].aggregate([
        {
            '$match': query
        },
        {
            '$project': {
                'namespace': 1,
            }
        },
        {
            '$group': {
                '_id': '$namespace',
            }
        },
    ])
    stats = sorted(doc['_id'] for doc in stats)

    if not stats:
        print(f'No namespace found for {args.namespace}')
        return 0

    if len(stats) > 1:
        print('\n'.join(stats))
        print('All these namespaces were found.')
        namespaces = stats
    else:
        namespaces = [args.namespace]

    for namespace in namespaces:
        print()
        print(namespace)
        if args.show_errors:
            show_errors(client, namespace, HPO_ITEM)
            show_errors(client, namespace, WORK_ITEM)
        repair_hpo_duplicates(client, namespace, test_only=args.test_only)
        repair_trials_duplicates(client, namespace, test_only=args.test_only)
        repair_hpo_lost_results(client,
                                args.uri,
                                args.database,
                                namespace,
                                test_only=args.test_only)
        failover_broken(client, namespace, test_only=args.test_only)
示例#18
0
def run(uri,
        database,
        namespace,
        function,
        num_experiments,
        num_simuls,
        fidelity,
        space,
        objective,
        variables,
        defaults,
        num_replicates=None,
        sleep_time=60,
        do_full_train=False,
        save_dir='.',
        seed=1,
        register=True,
        rep_types=REP_TYPES):

    hpo_budget = 100
    surrogate_budget = 200

    if num_replicates is None:
        num_replicates = num_experiments

    # We use 200 trials to fit the surrogate models (surrogate_budget is 200)
    # but we only need 100 for the ideal (hpo_budget is 100)
    # therefore, since num_simuls is at least half smaller than number of
    # replicates, we can run only (num_replicates / 2) hpo runs and use
    # first half and second 100 half as 2 separe ideal runs.
    # This is possible since we are using random search.

    assert (num_experiments % 2) == 0
    assert num_simuls <= (num_experiments / 2)

    num_ideal = num_experiments // 2

    hpo = 'random_search'

    # TODO
    # for each repetition, vary all sources of variations
    # when one hpo is done, create all biased and simulations

    if fidelity is None:
        fidelity = Fidelity(1, 1, name='epoch').to_dict()

    client = new_client(uri, database)

    configs = generate_hpos(list(range(num_ideal)), [hpo], surrogate_budget,
                            fidelity, space, namespace, defaults)

    to_replicate = get_configs_to_replicate(configs, num_simuls)

    reset_pool_size(configs['random_search'])
    randomize_seeds(configs['random_search'], variables, seed)

    variable_names = list(sorted(variables.keys()))

    hpo_stats = fetch_all_hpo_stats(client, namespace)

    namespaces = register_hpos(client,
                               namespace,
                               function,
                               configs,
                               defaults,
                               hpo_stats,
                               register=register)
    remainings = namespaces

    data_hpo = defaultdict(dict)
    all_replicates = dict(random_search=dict())
    while sum(remainings.values(), []):
        print_status(client, namespace, namespaces)
        hpos_ready, remainings = fetch_hpos_valid_curves(
            client, remainings, variable_names, data_hpo)

        ready_configs = get_ready_configs(hpos_ready, configs, to_replicate)

        replicates = generate_replicates(ready_configs,
                                         data_hpo,
                                         variables,
                                         objective,
                                         hpo_budget,
                                         num_replicates,
                                         early_stopping=False,
                                         rep_types=rep_types)
        if register:
            registered_replicates = register_all_replicates(
                client, function, namespace, replicates)

        if replicates.get('random_search'):
            all_replicates['random_search'].update(replicates['random_search'])
        if sum(remainings.values(), []) and not registered_replicates:
            time.sleep(sleep_time)

    wait(client, namespace, sleep=sleep_time)

    data_replicates = fetch_hpos_replicates(client, configs, all_replicates,
                                            variable_names, space, rep_types)

    # Save valid results
    data = consolidate_results(data_hpo, data_replicates, rep_types)
    save_results(namespace, data, save_dir)
示例#19
0
def client():
    return new_client('mongo://127.0.0.1:27017', 'olympus')
示例#20
0
def run(uri, database, namespace, function, num_experiments, budget, fidelity, space, objective,
        variables, defaults, sleep_time=60, do_full_train=False, save_dir='.', partial=False,
        register=True):

    # TODO: Add hyperband
    hpos = ['grid_search', 'nudged_grid_search', 'noisy_grid_search', 'random_search',
            'bayesopt']

    if fidelity is None:
        fidelity = Fidelity(1, 1, name='epoch').to_dict()

    # TODO: Add back when hyperband is implemented
    # if fidelity['min'] == fidelity['max']:
    #     hpos.remove(hpos.index('hyperband'))

    if num_experiments is None:
        num_experiments = 2

    client = new_client(uri, database)

    hpo_stats = fetch_all_hpo_stats(client, namespace)

    configs = generate_hpos(
        list(range(num_experiments)), hpos, budget,
        fidelity, space, namespace, defaults)

    variable_names = list(sorted(variables.keys()))

    if partial:
        namespaces = defaultdict(list)
        for hpo, hpo_configs in configs.items():
            for hpo_namespace, config in hpo_configs.items():
                namespaces[hpo].append(hpo_namespace)

        data = defaultdict(dict)
        fetch_hpos_valid_curves(client, namespaces, variable_names, data, partial=True)

        data = consolidate_results(data)
        save_results(namespace, data, save_dir)

        return

    namespaces = register_hpos(
        client, namespace, function, configs,
        dict(list(variables.items()) + list(defaults.items())),
        hpo_stats, register)
    remainings = namespaces

    print_status(client, namespace, namespaces)
    data = defaultdict(dict)
    while sum(remainings.values(), []):
        hpos_ready, remainings = fetch_hpos_valid_curves(client, remainings, variable_names, data)

        # TODO: Implement full-train part
        if do_full_train:
            configs = generate_tests(data, defaults, registered)
            new_registered_tests = register_tests(client, namespace, function, configs)

        if not sum(hpos_ready.values(), []):
            print_status(client, namespace, namespaces)
            time.sleep(sleep_time)

    # Save valid results
    data = consolidate_results(data)
    save_results(namespace, data, save_dir)

    if not do_full_train:
        return

    # TODO: Implement full-train part
    wait(completed)  # take the sum of all hpo_namespaces

    # NOTE & TODO: This should follow the same format as valid results, but we need to
    #              make sure the mapping in order of trials is the same.
    data = fetch_results(client, namespace, namespaces)

    # Save test results
    save_results(namespace, data, save_dir)