Пример #1
0
def compute_elo(elo_params, params, generations, elos):
    nw = elo_params.n_workers if elo_params.n_workers else mp.cpu_count() - 1
    gpw = elo_params.games_per_workers if elo_params.games_per_workers else max(1, elo_params.n_games//nw)
    games_idxs = np.array_split(np.arange(elo_params.n_games), elo_params.n_games//gpw)
    nw = min(nw, len(games_idxs))

    lock = mp.Lock()
    params = copy.deepcopy(params)
    params[0].self_play.merge(elo_params.self_play_override)
    params[1].self_play.merge(elo_params.self_play_override)
    devices = params[0].self_play.pytorch_devices
    nn_classes = list(p.nn.model_class for p in params)
    with mp.Pool(nw, initializer=_worker_init, initargs=(elo_params.hdf_file, devices, lock, generations, nn_classes, params)) as pool:
        try:
            tasks = [pool.apply_async(_worker_run, (idxs,), error_callback=_err_cb) for idxs in games_idxs]
            [t.wait() for t in tasks]
        except Exception as e:
            logger.exception("An error occured")
            raise e
        pool.map(_worker_teardown, range(nw))

    # compute new elo score
    with pd.HDFStore(elo_params.hdf_file, mode="a") as store:
        games = store.get("/fresh")
        del store["/fresh"]
        store.put("elo{}vs{}".format(*generations), games, format="table", append=False)

    winners = games[games.z==1].sort_index(level=["move_idx"]).groupby(level=["game_idx"]).head(1)
    n0 = sum(winners.index.get_level_values("generation")==generations[0])
    n1 = len(winners)-n0
    elo0, elo1 = elo_rating2(elos[0], elos[1], n0, n1, K=30)

    print(f"{params[0].nn.model_class.__name__} generation {generations[0]}: wins={n0}, elo={elos[0]} -> {elo0}")
    print(f"{params[1].nn.model_class.__name__} generation {generations[1]}: wins={n1}, elo={elos[1]} -> {elo1}")
    
    return elo0, elo1, n1/len(winners)
    def run_n_episodes(self):
        """Runs game to completion n times and then summarises results and saves model (if asked to)"""
        start = time.time()
        results_queue = Queue()
        gradient_updates_queue = Queue()
        episode_number = multiprocessing.Value('i', 0)
        self.optimizer_lock = multiprocessing.Lock()
        episodes_per_process = int(
            self.config.num_episodes_to_run / self.worker_processes) + 1
        processes = []
        self.actor_critic.share_memory()
        self.actor_critic_optimizer.share_memory()

        optimizer_worker = multiprocessing.Process(
            target=self.update_shared_model, args=(gradient_updates_queue, ))
        optimizer_worker.start()

        for process_num in range(self.worker_processes):
            worker = Actor_Critic_Worker(
                process_num, copy.deepcopy(self.environment),
                self.actor_critic, episode_number, self.optimizer_lock,
                self.actor_critic_optimizer, self.config, episodes_per_process,
                self.hyperparameters["epsilon_decay_rate_denominator"],
                self.action_size, self.action_types, results_queue,
                copy.deepcopy(self.actor_critic), gradient_updates_queue)
            worker.start()
            processes.append(worker)
        self.print_results(episode_number, results_queue)
        for worker in processes:
            worker.join()
        # optimizer_worker.kill()
        optimizer_worker.terminate()

        if self.config.save_model: self.locally_save_policy()
        time_taken = time.time() - start
        return self.game_full_episode_scores, self.rolling_results, time_taken
    def __init__(self, args):
        super(SharedMemory, self).__init__(args)

        # params for this memory

        # setup
        self.pos = mp.Value('l', 0)
        self.full = mp.Value('b', False)

        if self.tensortype == torch.FloatTensor:
            self.state0s = torch.zeros(
                (self.memory_size, ) + tuple(self.state_shape),
                dtype=torch.float32)
            self.state1s = torch.zeros(
                (self.memory_size, ) + tuple(self.state_shape),
                dtype=torch.float32)
        elif self.tensortype == torch.ByteTensor:
            self.state0s = torch.zeros(
                (self.memory_size, ) + tuple(self.state_shape),
                dtype=torch.uint8)
            self.state1s = torch.zeros(
                (self.memory_size, ) + tuple(self.state_shape),
                dtype=torch.uint8)
        self.actions = torch.zeros(self.memory_size, self.action_shape)
        self.rewards = torch.zeros(self.memory_size, self.reward_shape)
        self.gamma1s = torch.zeros(self.memory_size, self.gamma_shape)
        self.terminal1s = torch.zeros(self.memory_size, self.terminal_shape)

        self.state0s.share_memory_()
        self.actions.share_memory_()
        self.rewards.share_memory_()
        self.gamma1s.share_memory_()
        self.state1s.share_memory_()
        self.terminal1s.share_memory_()

        self.memory_lock = mp.Lock()
Пример #4
0
    def stream(self,
               modules,
               num_sketches,
               num_epochs,
               num_workers=-1,
               max_id=None):
        """starts a stream of sketches

        modules: ModulesDataset object
            the dataset of function to iterate upon
        num_sketches: int
            the number of sketches to compute per epoch: each sketch
            corresponds to one particular functions.
        num_epochs: int
            the number of epochs
        num_workers: int
            the number of workers to have. a negative value will lead to
            picking half of the local cores
        max_id: int or None
            the maximum index for modules.
        """
        # first stop if it was started before
        self.stop()

        # get the number of workers
        if num_workers < 0 or num_workers is None:
            # if not defined, take at least 1 and at most half of the cores
            num_workers = 1e7  # should be enough as a max number =)
            num_workers = max(1, min(num_workers, int(
                (mp.cpu_count() - 1) / 2)))

        print('SketchStream using ', num_workers, 'workers')
        # now create a queue with a maxsize corresponding to a few times
        # the number of workers
        self.queue = mp.Queue(maxsize=2 * num_workers)
        manager = mp.Manager()

        # prepare some data for the synchronization of the workers
        self.shared_data = manager.dict()
        self.shared_data['num_epochs'] = num_epochs
        if max_id is None:
            self.shared_data['max_id'] = (len(modules) if not isinstance(
                modules, ModulesDataset) else torch.iinfo(torch.int16).max)
        else:
            self.shared_data['max_id'] = max_id
        self.shared_data['pause'] = False
        self.shared_data['current_pick_epoch'] = 0
        self.shared_data['current_put_epoch'] = 0
        self.shared_data['current_sketch'] = 0
        self.shared_data['done_in_current_epoch'] = 0
        self.shared_data['num_sketches'] = (num_sketches
                                            if num_sketches > 0 else -1)
        self.shared_data['sketch_list'] = (
            None if num_sketches == -1 else torch.randint(
                low=0,
                high=self.shared_data['max_id'],
                size=(self.shared_data['num_sketches'], )).int())
        self.lock = mp.Lock()

        # prepare the workers
        processes = [
            mp.Process(target=sketch_worker,
                       kwargs={
                           'sketcher': self,
                           'modules': modules
                       }) for n in range(num_workers)
        ]
        #
        # atexit.register(partial(exit_handler, stream=self,
        #                         processes=processes))

        # go
        for p in processes:
            p.start()

        return self.queue
Пример #5
0
 def __init__(self):
     self._value = mp.Value("i", 0)
     self._lock = mp.Lock()
Пример #6
0
def main(method):

    params = {
        'obs_size': (160, 100),  # screen size of cv2 window
        'dt': 0.025,  # time interval between two frames
        'ego_vehicle_filter':
        'vehicle.lincoln*',  # filter for defining ego vehicle
        'port': 2000,  # connection port
        'task_mode':
        'Straight',  # mode of the task, [random, roundabout (only for Town03)]
        'code_mode': 'train',
        'max_time_episode': 100,  # maximum timesteps per episode
        'desired_speed': 15,  # desired speed (m/s)
        'max_ego_spawn_times': 100,  # maximum times to spawn ego vehicle
    }

    args = built_parser(method=method)
    env = gym.make(args.env_name, params=params)
    state_dim = env.state_space.shape
    action_dim = env.action_space.shape[0]

    args.state_dim = state_dim
    args.action_dim = action_dim
    action_high = env.action_space.high
    action_low = env.action_space.low
    args.action_high = action_high.tolist()
    args.action_low = action_low.tolist()
    args.seed = np.random.randint(0, 30)
    args.init_time = time.time()
    num_cpu = mp.cpu_count()
    print(state_dim, action_dim, action_high, num_cpu)

    if args.alpha == 'auto' and args.target_entropy == 'auto':
        delta_a = np.array(args.action_high, dtype=np.float32) - np.array(
            args.action_low, dtype=np.float32)
        args.target_entropy = -1 * args.action_dim  # + sum(np.log(delta_a/2))

    Q_net1 = QNet(args)
    Q_net1.train()
    Q_net1.share_memory()
    Q_net1_target = QNet(args)
    Q_net1_target.train()
    Q_net1_target.share_memory()
    Q_net2 = QNet(args)
    Q_net2.train()
    Q_net2.share_memory()
    Q_net2_target = QNet(args)
    Q_net2_target.train()
    Q_net2_target.share_memory()
    actor1 = PolicyNet(args)

    print("Network inited")

    if args.code_model == "eval":
        actor1.load_state_dict(
            torch.load('./' + args.env_name + '/method_' + str(args.method) +
                       '/model/policy_' + str(args.max_train) + '.pkl'))
    actor1.train()
    actor1.share_memory()
    actor1_target = PolicyNet(args)
    actor1_target.train()
    actor1_target.share_memory()
    actor2 = PolicyNet(args)
    actor2.train()
    actor2.share_memory()
    actor2_target = PolicyNet(args)
    actor2_target.train()
    actor2_target.share_memory()

    print("Network set")

    Q_net1_target.load_state_dict(Q_net1.state_dict())
    Q_net2_target.load_state_dict(Q_net2.state_dict())
    actor1_target.load_state_dict(actor1.state_dict())
    actor2_target.load_state_dict(actor2.state_dict())

    print("Network loaded!")

    Q_net1_optimizer = my_optim.SharedAdam(Q_net1.parameters(),
                                           lr=args.critic_lr)
    Q_net1_optimizer.share_memory()
    Q_net2_optimizer = my_optim.SharedAdam(Q_net2.parameters(),
                                           lr=args.critic_lr)
    Q_net2_optimizer.share_memory()
    actor1_optimizer = my_optim.SharedAdam(actor1.parameters(),
                                           lr=args.actor_lr)
    actor1_optimizer.share_memory()
    actor2_optimizer = my_optim.SharedAdam(actor2.parameters(),
                                           lr=args.actor_lr)
    actor2_optimizer.share_memory()
    log_alpha = torch.zeros(1, dtype=torch.float32, requires_grad=True)
    log_alpha.share_memory_()
    alpha_optimizer = my_optim.SharedAdam([log_alpha], lr=args.alpha_lr)
    alpha_optimizer.share_memory()

    print("Optimizer done")

    share_net = [
        Q_net1, Q_net1_target, Q_net2, Q_net2_target, actor1, actor1_target,
        actor2, actor2_target, log_alpha
    ]
    share_optimizer = [
        Q_net1_optimizer, Q_net2_optimizer, actor1_optimizer, actor2_optimizer,
        alpha_optimizer
    ]

    experience_in_queue = []
    experience_out_queue = []
    for i in range(args.num_buffers):
        experience_in_queue.append(Queue(maxsize=10))
        experience_out_queue.append(Queue(maxsize=10))
    shared_queue = [experience_in_queue, experience_out_queue]
    step_counter = mp.Value('i', 0)
    stop_sign = mp.Value('i', 0)
    iteration_counter = mp.Value('i', 0)
    shared_value = [step_counter, stop_sign, iteration_counter]
    lock = mp.Lock()
    procs = []
    if args.code_model == "train":
        for i in range(args.num_learners):
            if i % 2 == 0:
                device = torch.device("cuda:1")
            else:
                device = torch.device("cuda:0")
            # device = torch.device("cpu")
            procs.append(
                Process(target=leaner_agent,
                        args=(args, shared_queue, shared_value, share_net,
                              share_optimizer, device, lock, i)))
        for i in range(args.num_actors):
            procs.append(
                Process(target=actor_agent,
                        args=(args, shared_queue, shared_value,
                              [actor1, Q_net1], lock, i)))
        for i in range(args.num_buffers):
            procs.append(
                Process(target=buffer,
                        args=(args, shared_queue, shared_value, i)))
        procs.append(
            Process(target=evaluate_agent,
                    args=(args, shared_value, share_net)))
    elif args.code_model == "simu":
        procs.append(Process(target=simu_agent, args=(args, shared_value)))

    for p in procs:
        p.start()
    for p in procs:
        p.join()
Пример #7
0
 def __init__(self):
     self.val = mp.Value('i', 0)
     self.lock = mp.Lock()
Пример #8
0
 def __init__(self, val=True):
     self.val = mp.Value("b", False)
     self.lock = mp.Lock()
Пример #9
0
 def __init__(self, config):
     self.config = config
     self.config.steps_lock = mp.Lock()
     self.config.network_lock = mp.Lock()
     self.config.total_steps = mp.Value('i', 0)
     self.config.stop_signal = mp.Value('i', False)
Пример #10
0
    def __init__(self, config):
        super(TrainManager, self).__init__()

        device = torch.device(
            'cuda') if torch.cuda.is_available() else torch.device('cpu')

        self.config = config
        self.training_config = self.config['training']

        if self.training_config['transfer']:
            self.target_model = load_model(self.training_config['model_path'])
        else:
            self.target_model = create_model(self.config['model'])
            alt_model = None
            if self.training_config.get('init_actor', False):
                alt_model = load_model(self.training_config['model_path'])
                self.target_model.init_actor(alt_model)
            if self.training_config.get('init_critic', False):
                if alt_model is None:
                    alt_model = load_model(self.training_config['model_path'])
                self.target_model.init_critic(alt_model)
            self._prime_model(self.target_model, device)

        self.models = []
        self.proxy_models = []

        for _ in range(self.training_config['num_threads_training']):
            model = copy.deepcopy(self.target_model)
            proxy_model = copy.deepcopy(self.target_model)
            self._prime_model(model, device)
            self._prime_model(proxy_model, device)
            self.models.append(model)
            self.proxy_models.append(proxy_model)

        self.processes = []

        self.episode_queues = [
            torch_mp.Queue(maxsize=128)
            for _ in range(self.training_config['num_threads_sampling'])
        ]

        self.sample_queues = [
            torch_mp.Queue(
                maxsize=self.training_config['sampling_queue_max_len'])
            for _ in range(self.training_config['num_threads_training'])
        ]

        self.action_conns = [
            torch_mp.Pipe(duplex=False) for _ in range(
                self.training_config['num_threads_exploring_virtual'])
        ]

        self.observation_conns = [
            torch_mp.Pipe(duplex=False) for _ in range(
                self.training_config['num_threads_exploring_virtual'])
        ]

        self.observation_queue = torch_mp.Queue()
        self.action_queue = torch_mp.Queue()

        self.start_barrier = torch_mp.Barrier(
            self.training_config['num_threads_training'])
        self.finish_barrier = torch_mp.Barrier(
            self.training_config['num_threads_training'])
        self.update_lock = torch_mp.Lock()

        self.best_reward = Value('f', 0.0)
        self.global_episode = Value('i', 0)
        self.global_update_step = Value('i', 0)
Пример #11
0
def batch_training(fileprefix='', tasks=[]):

    if fileprefix:
        filename = '{}-main.out'.format(fileprefix)
        filepath = pathlib.Path(filename).resolve()
        if not filepath.parent.exists():
            filepath.parent.mkdir(parents=True)
        stdout_target = filepath.open('wt')
    else:
        stdout_target = sys.__stdout__

    with contextlib.redirect_stdout(stdout_target):

        print('System-wide logical CPUs:', psutil.cpu_count())
        print('System-wide physical CPUs:', psutil.cpu_count(logical=False))
        oversubscribe = 2
        ngpus = torch.cuda.device_count()
        nworkers = ngpus * oversubscribe
        curproc = psutil.Process()
        createtime = curproc.create_time()
        print('Main process {} on CPU {} with {} threads'.format(
            curproc.pid, curproc.cpu_num(), curproc.num_threads()))
        print('Presently available CPUs:', len(curproc.cpu_affinity()))
        print('Presently available GPUs:', ngpus)
        print('Worker processes:', nworkers)
        # load input tasks into queue
        task_queue = mp.SimpleQueue()
        for i, task in enumerate(tasks):
            print('Task', i + 1, task)
            task_queue.put(task)
        # worker locks
        locks = []
        active_processes = []
        for i in range(nworkers):
            locks.append(mp.Lock())
            active_processes.append(None)
        # results queue
        result_queue = mp.SimpleQueue()
        itask = 0
        while not task_queue.empty():
            for ilock, lock in enumerate(locks):
                if lock.acquire(timeout=1):
                    # acquire lock and expect process == None
                    assert (active_processes[ilock] is None)
                    if task_queue.empty():
                        lock.release()
                        continue
                    train_kwargs = task_queue.get()
                    igpu = ilock % ngpus
                    args = (itask, ilock, igpu, fileprefix, train_kwargs,
                            result_queue)
                    p = mp.Process(target=gpu_worker, args=args)
                    print(
                        '  Launching task {}/{} on worker {} on GPU {}'.format(
                            itask, len(tasks), ilock, igpu))
                    itask += 1
                    p.start()
                    active_processes[ilock] = p
                else:
                    # locked and expect process != None
                    existing_process = active_processes[ilock]
                    assert (existing_process is not None)
                    if existing_process.exitcode is not None:
                        # process is complete; close and release
                        print('  Process {} finished'.format(
                            existing_process.pid))
                        active_processes[ilock] = None
                        lock.release()
        print('Finished task loop')
        still_running = True
        while still_running:
            still_running = False
            for i, process in enumerate(active_processes):
                if process is None: continue
                if process.exitcode is None:
                    still_running = True
                    break
                else:
                    print('  Process {} finished'.format(process.pid))
                    active_processes[i] = None
            time.sleep(1)
        results = []
        while not result_queue.empty():
            results.append(result_queue.get())
        print('Tasks:', len(tasks), 'results:', len(results))

        def sort_func(element):
            return element[0]

        results = sorted(results, key=sort_func)
        for i, result in enumerate(results):
            print(
                'Task {:3d} worker/GPU {:2d}/{:1d}  dt {:5.1f}s  max/med acc {:5.1f}%/{:5.1f}%  kw: {}'
                .format(*result[0:4], result[4].max(), np.median(result[4]),
                        result[6]))
        delta_seconds = time.time() - createtime
        print('Main execution: {:.1f} s'.format(delta_seconds))
Пример #12
0
import matplotlib.pyplot as plt
from tqdm import tqdm
import numpy as np

'''
Let's see how just non-MAML version adapts

'''



task_queue = mp.JoinableQueue()
train_episodes_queue = mp.Queue()
valid_episodes_queue = mp.Queue()
policy_lock = mp.Lock()
env_name = "2DNavigation-v0"
env_kwargs = {
                    "low": -0.5,
                    "high": 0.5,
                    "task": {"goal": np.array([1, 1])}
                }
env = gym.make(env_name, **env_kwargs)
print(env.)

policy = get_policy_for_env(env,
                            hidden_sizes=(64, 64),
                            nonlinearity='tanh')
policy.share_memory()
baseline = LinearFeatureBaseline(get_input_size(env))
seed = None
Пример #13
0
 def __init__(self, x, y):
     self.ctrl = ReadWriteControl(self)
     self.ctrl_flick = mp.Lock()
     self.which_buffer = mp.Value("l", 0)
     self.buffers = [x, y]
def main(method):
    args = built_parser(method=method)
    env = gym.make(args.env_name)
    state_dim = env.observation_space.shape
    action_dim = env.action_space.shape[0]

    args.state_dim = state_dim
    args.action_dim = action_dim
    action_high = env.action_space.high
    action_low = env.action_space.low
    args.action_high = action_high.tolist()
    args.action_low = action_low.tolist()
    args.seed = np.random.randint(0, 30)
    args.init_time = time.time()

    if args.alpha == 'auto' and args.target_entropy == 'auto':
        delta_a = np.array(args.action_high, dtype=np.float32) - np.array(
            args.action_low, dtype=np.float32)
        args.target_entropy = -1 * args.action_dim  #+ sum(np.log(delta_a/2))

    Q_net1 = QNet(args)
    Q_net1.train()
    Q_net1.share_memory()
    Q_net1_target = QNet(args)
    Q_net1_target.train()
    Q_net1_target.share_memory()
    Q_net2 = QNet(args)
    Q_net2.train()
    Q_net2.share_memory()
    Q_net2_target = QNet(args)
    Q_net2_target.train()
    Q_net2_target.share_memory()
    actor1 = PolicyNet(args)

    actor1.train()
    actor1.share_memory()
    actor1_target = PolicyNet(args)
    actor1_target.train()
    actor1_target.share_memory()
    actor2 = PolicyNet(args)
    actor2.train()
    actor2.share_memory()
    actor2_target = PolicyNet(args)
    actor2_target.train()
    actor2_target.share_memory()

    Q_net1_target.load_state_dict(Q_net1.state_dict())
    Q_net2_target.load_state_dict(Q_net2.state_dict())
    actor1_target.load_state_dict(actor1.state_dict())
    actor2_target.load_state_dict(actor2.state_dict())

    Q_net1_optimizer = my_optim.SharedAdam(Q_net1.parameters(),
                                           lr=args.critic_lr)
    Q_net1_optimizer.share_memory()
    Q_net2_optimizer = my_optim.SharedAdam(Q_net2.parameters(),
                                           lr=args.critic_lr)
    Q_net2_optimizer.share_memory()
    actor1_optimizer = my_optim.SharedAdam(actor1.parameters(),
                                           lr=args.actor_lr)
    actor1_optimizer.share_memory()
    actor2_optimizer = my_optim.SharedAdam(actor2.parameters(),
                                           lr=args.actor_lr)
    actor2_optimizer.share_memory()
    log_alpha = torch.zeros(1, dtype=torch.float32, requires_grad=True)
    log_alpha.share_memory_()
    alpha_optimizer = my_optim.SharedAdam([log_alpha], lr=args.alpha_lr)
    alpha_optimizer.share_memory()

    share_net = [
        Q_net1, Q_net1_target, Q_net2, Q_net2_target, actor1, actor1_target,
        actor2, actor2_target, log_alpha
    ]
    share_optimizer = [
        Q_net1_optimizer, Q_net2_optimizer, actor1_optimizer, actor2_optimizer,
        alpha_optimizer
    ]

    experience_in_queue = []
    experience_out_queue = []
    for i in range(args.num_buffers):
        experience_in_queue.append(Queue(maxsize=10))
        experience_out_queue.append(Queue(maxsize=10))
    shared_queue = [experience_in_queue, experience_out_queue]
    step_counter = mp.Value('i', 0)
    stop_sign = mp.Value('i', 0)
    iteration_counter = mp.Value('i', 0)
    shared_value = [step_counter, stop_sign, iteration_counter]
    lock = mp.Lock()
    procs = []
    if args.code_model == "train":
        for i in range(args.num_actors):
            procs.append(
                Process(target=actor_agent,
                        args=(args, shared_queue, shared_value,
                              [actor1, Q_net1], lock, i)))
        for i in range(args.num_buffers):
            procs.append(
                Process(target=buffer,
                        args=(args, shared_queue, shared_value, i)))
        procs.append(
            Process(target=evaluate_agent,
                    args=(args, shared_value, share_net)))
        for i in range(args.num_learners):
            #device = torch.device("cuda")
            device = torch.device("cpu")
            procs.append(
                Process(target=leaner_agent,
                        args=(args, shared_queue, shared_value, share_net,
                              share_optimizer, device, lock, i)))
    elif args.code_model == "simu":
        procs.append(Process(target=simu_agent, args=(args, shared_value)))

    for p in procs:
        p.start()
    for p in procs:
        p.join()
Пример #15
0
 def __init__(self, num_inputs):
     self.lock = mp.Lock()
     self.n = torch.zeros(num_inputs).share_memory_()
     self.mean = torch.zeros(num_inputs).share_memory_()
     self.s = torch.zeros(num_inputs).share_memory_()
     self.var = torch.zeros(num_inputs).share_memory_()
Пример #16
0
 def __init__(self, models):
     self.lock = mp.Lock()
     self.grads = {}
     for name, p in models.named_parameters():
         self.grads[name + '_grad'] = torch.zeros(p.size()).share_memory_()
Пример #17
0
                                     outputs=act_dim,
                                     n_hidden_layers=3,
                                     n_hidden_units=128)
    shared_model = networks.DiscreteActorCriticSplit(actor=act_net,
                                                     critic=value_net,
                                                     add_softmax=True)
    shared_average_model = copy.deepcopy(shared_model)
    shared_average_model.no_grads(
    )  # Set requires_grad to false for all parameters
    shared_model.share_memory()
    shared_average_model.share_memory()
    shared_opt = optimizers.SharedAdam(shared_model.parameters(), lr=args.lr)

    # Create shared variables
    shared_counter = utils.Counter()
    shared_model_lock = mp.Lock() if not args.no_lock else None
    summary_queue = mp.Queue()

    processes = []
    workers = []
    for i in range(args.num_workers):
        w = worker.Worker(worker_id=i,
                          env_name=args.env_name,
                          n_steps=args.worker_steps,
                          max_steps=args.t_max,
                          shared_model=shared_model,
                          shared_avg_model=shared_average_model,
                          shared_optimizer=shared_opt,
                          shared_counter=shared_counter,
                          df=args.discount,
                          c=args.c,
Пример #18
0
                        help='Value for gamma')
    parser.add_argument('--beta',
                        default=0.0001,
                        type=float,
                        help='Value for beta')
    args = parser.parse_args()

    args.training = True

    dataset = mp.Manager().list(Dataset(file=args.data))

    model = MCTSnet()
    model.load(args.load_model)
    model.share_memory()

    model_lock = mp.Lock()
    dataset_lock = mp.Lock()

    processes = [
        mp.Process(target=collector,
                   args=(idx, model, dataset, args, dataset_lock))
        for idx in range(args.n_collectors)
    ]
    processes.extend([
        mp.Process(target=optimiser,
                   args=(idx, model, dataset, args, model_lock))
        for idx in range(args.n_collectors, args.n_collectors +
                         args.n_optimisers)
    ])
    processes.append(mp.Process(target=checkpoint,
                                args=(model, dataset, args)))
def main():
    os.environ['PYTHONWARNINGS'] = 'ignore:semaphore_tracker:UserWarning'
    mp.set_start_method('spawn', True)

    shutil.rmtree('runs', ignore_errors=True)
    if not os.path.exists('logs'):
        os.makedirs('logs')
    if not os.path.exists('trained'):
        os.makedirs('trained')

    parser = argparse.ArgumentParser()

    parser.add_argument(
        '--environment',
        default='RLBench',
        help='Environment to use for training [default = RLBench]')
    parser.add_argument(
        '--save_model',
        default='./model.model',
        help='Path to save the model [default = "./model.model"]')
    parser.add_argument('--load_model',
                        default='',
                        help='Path to load the model [default = '
                        ']')
    parser.add_argument('--n_workers',
                        default=1,
                        type=int,
                        help='Number of workers [default = 1]')
    parser.add_argument(
        '--target_update_frequency',
        default=100,
        type=int,
        help='Frequency for syncing target network [default = 100]')
    parser.add_argument(
        '--checkpoint_frequency',
        default=30,
        type=int,
        help='Frequency for creating checkpoints [default = 30]')
    parser.add_argument('--lr',
                        default=1e-6,
                        type=float,
                        help='Learning rate for the training [default = 1e-6]')
    parser.add_argument('--batch_size',
                        default=64,
                        type=int,
                        help='Batch size for the training [default = 64]')
    parser.add_argument(
        '--gamma',
        default=0.99,
        type=float,
        help='Discount factor for the training [default = 0.99]')
    parser.add_argument(
        '--eps',
        default=0.997,
        type=float,
        help='Greedy constant for the training [default = 0.997]')
    parser.add_argument(
        '--min_eps',
        default=0.1,
        type=float,
        help='Minimum value for greedy constant [default = 0.1]')
    parser.add_argument('--buffer_size',
                        default=200000,
                        type=int,
                        help='Buffer size [default = 200000]')
    parser.add_argument('--episode_length',
                        default=900,
                        type=int,
                        help='Episode length [default=900]')
    parser.add_argument('--headless',
                        default=False,
                        type=bool,
                        help='Run simulation headless [default=False]')
    parser.add_argument(
        '--advance_iteration',
        default=0,
        type=int,
        help='By how many iteration extended eps decay [default=0]')
    parser.add_argument(
        '--warmup',
        default=100,
        type=int,
        help='How many full exploration iterations [default=100]')

    args = parser.parse_args()

    SIMULATOR, NETWORK = environments[args.environment]
    model_shared = NETWORK()
    model_shared.load(args.load_model)
    model_shared.share_memory()

    lock = mp.Lock()

    # Queues
    queues = [mp.Queue() for idx in range(args.n_workers)]

    # Workers
    workers_explore = [
        mp.Process(target=explore,
                   args=(idx, SIMULATOR, model_shared, queues[idx], args,
                         lock)) for idx in range(args.n_workers)
    ]
    workers_explore.append(
        mp.Process(target=optimise,
                   args=(args.n_workers, model_shared, queues, args, lock)))
    workers_explore.append(
        mp.Process(target=checkpoint, args=(model_shared, args)))

    [p.start() for p in workers_explore]
    print("Succesfully started workers!")

    try:
        [p.join() for p in workers_explore]

    except Exception as e:
        print(e)
    except KeyboardInterrupt:
        print('<< EXITING >>')
    finally:
        [p.kill() for p in workers_explore]
        [q.close() for q in queues]

        os.system('clear')
        if input('Save model? (y/n): ') in ['y', 'Y', 'yes']:
            print('<< SAVING MODEL >>')
            model_shared.save(args.save_model)
Пример #20
0
    critic_global = Critic(n_inputs, args.hiddensize)
    critic_global.share_memory()

    if (args.input_path is not None):
        policy_path = os.path.join(args.input_path, 'policy.pt')
        critic_path = os.path.join(args.input_path, 'critic.pt')

        policy_global.load_state_dict(torch.load(policy_path))
        critic_global.load_state_dict(torch.load(critic_path))

    episode_count = mp.Value('i', 0)

    steps_global = mp.Value('i', 0)
    atr = mp.Value('d', 0)
    steps_lock = mp.Lock()

    processes = []

    p = mp.Process(target=validate,
                   args=(policy_global, critic_global, steps_global,
                         episode_count, args))
    p.start()
    processes.append(p)

    if (args.render):
        p = mp.Process(target=render, args=(policy_global, args))
        p.start()

    for i in range(args.num_envs):
        p = mp.Process(target=train,
Пример #21
0
 def __init__(self):
     self.lock = mp.Lock()
     self.weights = None
Пример #22
0
 def __init__(self):
     self.episodes = mp.Value('i', 0)
     self.frames = mp.Value('i', 0)
     self.lock = mp.Lock()
Пример #23
0
 def __init__(self, val=True):
     self.val = mp.Value("i", 0)
     self.lock = mp.Lock()
Пример #24
0
def train(args, model):
    print("training...")
    model.train()

    optimizer = optim.Adam(model.parameters(), lr=args.learning_rate)
    if args.load is not None and os.path.isfile(args.load + '_optimizer.pth'):
        source_optimizer = torch.load(args.load+'_optimizer.pth')
        optimizer.load_state_dict(source_optimizer.state_dict())
        del source_optimizer

    optimizer.zero_grad()

    state = NormalizedState(screen=None, depth=None, labels=None, variables=None)
    state.screen = torch.Tensor(args.batch_size, *args.screen_size)
    state.variables = torch.Tensor(args.batch_size, args.variable_num)
    action = torch.LongTensor(args.batch_size, 1)
    reward = torch.Tensor(args.batch_size, 1)
    terminal = torch.Tensor(args.batch_size)
    episode_return = torch.zeros(args.batch_size)

    state.screen.share_memory_()
    state.variables.share_memory_()
    action.share_memory_()
    reward.share_memory_()
    terminal.share_memory_()
    episode_return.share_memory_()

    counter = torch.zeros(1)
    counter.share_memory_()

    def instance(args, state, main_lock, main_event, event, id):
        game = DoomInstance(args.vizdoom_config, args.wad_path, args.skiprate, id,
                            actions=args.action_set, join=True, visible=False, color=id)
        first_pass = True
        while True:
            event.clear()
            if first_pass:
                first_pass = False
                normalized_state = game.get_state_normalized()
                state.screen[id, :] = torch.from_numpy(normalized_state.screen)
                state.variables[id, :] = torch.from_numpy(normalized_state.variables)
            else:
                normalized_state, step_reward, finished = game.step_normalized(action[id, 0])
                #normalized_state = game.get_state_normalized()
                state.screen[id, :] = torch.from_numpy(normalized_state.screen)
                state.variables[id, :] = torch.from_numpy(normalized_state.variables)
                reward[id, 0] = step_reward
                if finished:
                    episode_return[id] = float(game.get_episode_return())
                    # cut rewards from future actions
                    terminal[id] = 0
                else:
                    terminal[id] = 1
            # increase counter and wait main process
            with main_lock:
                counter[0] += 1
                if counter[0] >= args.batch_size:
                    main_event.set()
            event.wait()

    main_event = mp.Event()
    main_lock = mp.Lock()

    procs = []
    events = []
    #mp.set_start_method('spawn')
    for i in range(args.batch_size):
        event = mp.Event()
        p = mp.Process(target=instance, args=(args, state, main_lock, main_event, event, i))
        p.start()
        procs.append(p)
        events.append(event)
    main_event.wait()
    main_event.clear()
    counter[0] = 0

    # start training
    for episode in range(args.episode_num):
        batch_time = time.time()
        for step in range(args.episode_size):
            # get action
            action.copy_(model.get_action(state))
            # step
            for event in events:
                event.set()
            main_event.wait()
            main_event.clear()
            counter[0] = 0
            # get step info
            model.set_reward(reward)
            model.set_terminal(terminal)

        # update model
        model.backward()
        optimizer.step()
        optimizer.zero_grad()

        if episode % 1 == 0:
            print("{}: mean_return = {:f}, batch_time = {:.3f}".format(episode, episode_return.mean(), time.time()-batch_time))

        if episode % 500 == 0:
            torch.save(model, args.model + '_model_server_cp.pth')
            torch.save(optimizer, args.model + '_optimizer_server_cp.pth')

    # terminate games

    torch.save(model, args.model+'_model.pth')
    torch.save(optimizer, args.model+'_optimizer.pth')
Пример #25
0
 def __init__(self, grad_norm, optimizer, scheduler):
     self.optimizer: torch.optim.Optimizer = optimizer
     self.scheduler = scheduler
     self.grad_norm = grad_norm
     self.global_step = torch.tensor(0)
     self.lock = mp.Lock()
Пример #26
0
                     actor=actor,
                     argp=parser_args)
    worker.run()


if __name__ == '__main__':
    device = CUDA if torch.cuda.is_available() else CPU

    model_root_dir = str(pathlib.Path(__file__).resolve().parents[1]) + "/models/"
    if not os.path.isdir(model_root_dir):
        os.mkdir(model_root_dir)
    model_dir = model_root_dir + datetime.datetime.now().strftime("%H_%M__%d_%m")
    if not os.path.isdir(model_dir):
        os.mkdir(model_dir)

    lock = mp.Lock()
    args = parser.parse_args()
    logger = SummaryWriter()
    # logger.add_hparams(get_hparam_dict(args), {'mean reward': 0})

    actor = Actor(num_actions=NUM_ACTIONS, num_obs=NUM_OBSERVATIONS, log_std_init=np.log(args.init_std))

    # logger.add_graph(actor, torch.zeros(3))

    actor.share_memory()

    critic = Critic(num_actions=NUM_ACTIONS, num_obs=NUM_OBSERVATIONS)

    critic.share_memory()

    shared_replay_buffer = SharedReplayBuffer(capacity=args.replay_buffer_size,
Пример #27
0
def main():
    params = Params()

    if not os.path.exists('./log'):
        os.mkdir('./log')

    logging.basicConfig(filename='./log/' + params.log_file + '.log',
                        level=logging.INFO)

    mp.set_start_method('spawn')

    test_files = [
        'model3115920.ckpt', 'model3070538.ckpt', 'model3067604.ckpt',
        'model3043059.ckpt', 'model2994943.ckpt', 'model2983232.ckpt',
        'model2912569.ckpt', 'model2849037.ckpt', 'model2741430.ckpt',
        'model2696001.ckpt', 'model2685407.ckpt', 'model2659828.ckpt',
        'model2626517.ckpt', 'model2621966.ckpt', 'model2583286.ckpt',
        'model2583025.ckpt', 'model2548002.ckpt', 'model2545110.ckpt',
        'model2484209.ckpt', 'model2461454.ckpt', 'model2449942.ckpt',
        'model2444853.ckpt', 'model2424837.ckpt', 'model2414733.ckpt',
        'model2383330.ckpt'
    ]

    for ckpt in test_files:
        init_msg = " ".join([
            "\n\n++++++++++++++++++++ Initial Task info +++++++++++++++++++++\n",
            "weight file name = {:s}\n".format(ckpt)
        ])
        print(init_msg)
        logging.info(init_msg)

        seen_succ = mp.Value('i', 0)
        seen_length = mp.Value('i', 0)
        unseen_succ = mp.Value('i', 0)
        unseen_length = mp.Value('i', 0)
        lock = mp.Lock()

        # with lock:  # initialize, is it right?
        #     seen_succ = 0
        #     seen_length = 0
        #     unseen_succ = 0
        #     unseen_length = 0

        load_model = params.weight_dir + ckpt
        # load_model = torch.load(params.weight_dir + ckpt)

        #test(params, shared_model, count, lock, best_acc)

        processes = []

        test_process = 0

        for rank in range(params.n_process):
            p = mp.Process(target=run_test,
                           args=(
                               test_process,
                               params,
                               load_model,
                               lock,
                               seen_succ,
                               seen_length,
                               unseen_succ,
                               unseen_length,
                           ))
            test_process += 1
            p.start()
            processes.append(p)

        for p in processes:
            p.join()

        msg = " ".join([
            "++++++++++++++++++++ Total Task Stats +++++++++++++++++++++\n",
            "Seen Avg Length = {:.3f}\n".format(seen_length.value /
                                                (20 * params.n_test)),
            "Seen Total Success rate {:3.2f}%".format(100 * seen_succ.value /
                                                      (20 * params.n_test)),
            "UnSeen Avg Length = {:.3f}\n".format(unseen_length.value /
                                                  (50 * params.n_test)),
            "UnSeen Total Success rate {:3.2f}%\n\n".format(
                100 * unseen_succ.value / (50 * params.n_test)),
        ])
        print(msg)
        logging.info(msg)

    print("Done")
Пример #28
0
    num_processes = args.nproc
    criterion = nn.CrossEntropyLoss()
    sta_lidx = work_partition[wid]
    end_lidx = work_partition[wid + 1]
    sub_net = VGG('VGG19', sta_lidx=sta_lidx, end_lidx=end_lidx)
    sub_net.to(device)
    train_proc_list = []
    sync_proc_list = []
    fp_send_proc_list = []
    fp_recv_proc_list = []
    bp_send_proc_list = []
    bp_recv_proc_list = []
    # fp_to_send, fp_recved, bp_send, bp_recv, grad_aggregated  should be conter auto-increment
    sub_net.share_memory()
    grad_dict = gen_shared_grad(sub_net)
    sync_lock = mp.Lock()
    sync_counter = torch.zeros(1, dtype=torch.int32)
    sync_counter = sync_counter.share_memory_()
    global_step = torch.zeros(1, dtype=torch.int32)
    global_step = global_step.share_memory_()

    for rank in range(num_processes):
        #fp_head_tensor, fp_tail_tensor, bp_head_tensor, bp_tail_tensor = gen_fp_bp_tensor_list(bs, wid, wn)
        fp_head_list, fp_tail_list, bp_head_list, bp_tail_list = gen_fp_bp_tensor_list(
            iter_thresh, wid, wn, input_shp, output_shp)
        #print(fp_tail_tensor.size())
        #print("########")
        shared_cnters = gen_shared_counter()

        #rank, bs, wid, wn,fp_tail_list, shared_cnters
        fp_send_p = mp.Process(target=fp_send_proc,
Пример #29
0
def batch_training():
    print('System-wide logical CPUs:', psutil.cpu_count())
    print('System-wide physical CPUs:', psutil.cpu_count(logical=False))
    oversubscribe = 2
    ngpus = torch.cuda.device_count()
    nworkers = ngpus * oversubscribe
    curproc = psutil.Process()
    createtime = curproc.create_time()
    print('Main process {} on CPU {} with {} threads'.format(
        curproc.pid, curproc.cpu_num(), curproc.num_threads()))
    print('Presently available CPUs:', len(curproc.cpu_affinity()))
    print('Presently available GPUs:', ngpus)
    print('Worker processes:', nworkers)
    # tasks and queue
    tasks = []
    #data = 1
    #nrepeat = 10
    #epochs = 12
    #optim_names = ['Adam','Adagrad','Adamax','SGD','ASGD']
    #learning_rates = [1e-2,1e-3,1e-4,1e-5]
    data = 0
    nrepeat = 3
    epochs = 4
    optim_names = ['Adam', 'Adagrad', 'SGD']
    learning_rates = [1e-3, 1e-5]
    for optim_name in optim_names:
        for lr in learning_rates:
            task_kw = {
                'data': data,
                'epochs': epochs,
                'optim_name': optim_name,
                'optim_kwargs': {
                    'lr': lr
                },
            }
            tasks.extend([task_kw] * nrepeat)
    task_queue = mp.SimpleQueue()
    for i, task in enumerate(tasks):
        print('Task', i + 1, task)
        task_queue.put(task)
    # worker locks
    locks = []
    active_processes = []
    for i in range(nworkers):
        locks.append(mp.Lock())
        active_processes.append(None)
    # results queue
    result_queue = mp.SimpleQueue()
    itask = 0
    while not task_queue.empty():
        for ilock, lock in enumerate(locks):
            if lock.acquire(timeout=1):
                # acquire lock and expect process == None
                assert (active_processes[ilock] is None)
                if task_queue.empty():
                    lock.release()
                    continue
                train_kwargs = task_queue.get()
                igpu = ilock % ngpus
                args = (itask, ilock, igpu, train_kwargs, result_queue)
                p = mp.Process(target=gpu_worker, args=args)
                print('*** Launching task {}/{} on worker {} on GPU {}'.format(
                    itask, len(tasks), ilock, igpu))
                itask += 1
                p.start()
                active_processes[ilock] = p
            else:
                # locked and expect process != None
                existing_process = active_processes[ilock]
                assert (existing_process is not None)
                if existing_process.exitcode is not None:
                    # process is complete; close and release
                    print('*** Process {} finished'.format(
                        existing_process.pid))
                    active_processes[ilock] = None
                    lock.release()
    print('Finished task loop')
    still_running = True
    while still_running:
        still_running = False
        for i, process in enumerate(active_processes):
            if process is None: continue
            if process.exitcode is None:
                still_running = True
                break
            else:
                print('*** Process {} finished'.format(process.pid))
                active_processes[i] = None
        time.sleep(1)
    results = []
    while not result_queue.empty():
        results.append(result_queue.get())
    print('Tasks:', len(tasks), 'results:', len(results))

    def sort_func(element):
        return element[0]

    results = sorted(results, key=sort_func)
    for i, result in enumerate(results):
        print(
            'Task {:4d} worker {:2d} GPU {:2d} dt {:5.1f} s  acc {:5.2f}%  kw: {}'
            .format(*result))
    delta_seconds = time.time() - createtime
    print('Main execution: {:.1f} s'.format(delta_seconds))
Пример #30
0
 def __init__(self):
     self._value = mp.Value("b", False)
     self._lock = mp.Lock()