Пример #1
0
    def __init__(self,
                 data,
                 batch_size,
                 num_steps=1,
                 sample_coverage=50,
                 save_dir=None,
                 num_workers=0,
                 log=True):
        assert data.edge_index is not None
        assert 'node_norm' not in data
        assert 'edge_norm' not in data

        self.N = N = data.num_nodes
        self.E = data.num_edges

        self.adj = SparseTensor(row=data.edge_index[0],
                                col=data.edge_index[1],
                                value=data.edge_attr,
                                sparse_sizes=(N, N))

        self.data = copy.copy(data)
        self.data.edge_index = None
        self.data.edge_attr = None

        self.batch_size = batch_size
        self.num_steps = num_steps
        self.sample_coverage = sample_coverage
        self.num_workers = num_workers
        self.log = log
        self.__count__ = 0

        if self.num_workers > 0:
            self.__sample_queue__ = Queue()
            self.__sample_workers__ = []
            for _ in range(self.num_workers):
                worker = Process(target=self.__put_sample__,
                                 args=(self.__sample_queue__, ))
                worker.daemon = True
                worker.start()
                self.__sample_workers__.append(worker)

        path = osp.join(save_dir or '', self.__filename__)
        if save_dir is not None and osp.exists(path):  # pragma: no cover
            self.node_norm, self.edge_norm = torch.load(path)
        else:
            self.node_norm, self.edge_norm = self.__compute_norm__()
            if save_dir is not None:  # pragma: no cover
                torch.save((self.node_norm, self.edge_norm), path)

        if self.num_workers > 0:
            self.__data_queue__ = Queue()
            self.__data_workers__ = []
            for _ in range(self.num_workers):
                worker = Process(target=self.__put_data__,
                                 args=(self.__data_queue__, ))
                worker.daemon = True
                worker.start()
                self.__data_workers__.append(worker)
def main():
    # reproducible
    # env.seed(RANDOMSEED)
    np.random.seed(RANDOMSEED)
    torch.manual_seed(RANDOMSEED)

    env = gym.make(ENV_NAME)
    state_dim = env.observation_space.shape[0]
    action_dim = env.action_space.shape[0]

    ppo = PPO(state_dim, action_dim, hidden_dim=HIDDEN_DIM)

    if args.train:
        ppo.actor.share_memory()
        ppo.critic.share_memory()
        ShareParameters(ppo.actor_optimizer)
        ShareParameters(ppo.critic_optimizer)
        rewards_queue = mp.Queue(
        )  # used for get rewards from all processes and plot the curve
        processes = []
        rewards = []

        for i in range(NUM_WORKERS):
            process = Process(
                target=worker,
                args=(i, ppo,
                      rewards_queue))  # the args contain shared and not shared
            process.daemon = True  # all processes closed when the main stops
            processes.append(process)

        [p.start() for p in processes]
        while True:  # keep geting the episode reward from the queue
            r = rewards_queue.get()
            if r is not None:
                if len(rewards) == 0:
                    rewards.append(r)
                else:
                    rewards.append(rewards[-1] * 0.9 + r * 0.1)
            else:
                break

            if len(rewards) % 20 == 0 and len(rewards) > 0:
                plot(rewards)

        [p.join() for p in processes]  # finished at the same time

        ppo.save_model(MODEL_PATH)

    if args.test:
        ppo.load_model(MODEL_PATH)
        while True:
            s = env.reset()
            eps_r = 0
            for i in range(EP_LEN):
                env.render()
                s, r, done, _ = env.step(ppo.choose_action(s, True))
                eps_r += r
                if done:
                    break
            print('Episode reward: {}  | Episode length: {}'.format(eps_r, i))
def init_jobs(queue, batch_size,num_features):
    # Process를 여러개 만들수도 있다. Peocess list로
    task = Process(target=prefetch_data, args=(queue, batch_size,num_features))

    task.daemon = True
    task.start()
    return task
Пример #4
0
    def run(self):
        # mp.set_start_method('spawn', force=True)
        with Manager() as manager:
            d = manager.dict()
            self.d = d
            d['train_progress'] = []
            d['best_epoch'] = None
            d['best_epoch_summary'] = None
            d['model'] = None
            d['labels'] = None
            pqueue = mp.Queue()
            out_pqueue = mp.Queue()
            model = self.trainer_kwargs['model']
            # model = copy.deepcopy(model)
            model.share_memory()
            self.trainer_kwargs['model'] = model
            self.trainer_kwargs['num_workers'] = 0
            p = Process(target=main_q, args=(pqueue, out_pqueue, d))
            p.daemon = True
            p.start()
            # pool.apply_async(main_q, args=(pqueue, out_pqueue, d, ))
            # pool.apply_async(main_train, args=(d, self.num_epochs, self.trainer_args, self.trainer_kwargs, self.datbaase_items))
            # pool.starmap(main_q, [(pqueue, out_pqueue, d),])
            pqueue.put(None)
            pqueue.put(self.num_epochs)
            pqueue.put(self.trainer_args)
            pqueue.put(self.trainer_kwargs)
            pqueue.put(self.database_items)
            p.join()
            # pool.close()
            # pool.join()
            print('Process results: ', len(d.keys()))
            # best_epoch = d['best_epoch']
            # best_epoch_sumamry = d['best_epoch_summary']
            # model = d['model']
            # labels = d['labels']
            self.d = get_queue_dict(out_pqueue,
                                    item_names=[
                                        'best_epoch',
                                        'best_epoch_summary',
                                        'model',
                                        'labels',
                                    ])
            best_epoch = self.d['best_epoch']
            best_epoch_sumamry = self.d['best_epoch_summary']
            model = model.load_state_dict(self.d['model'])
            labels = self.d['labels']

            self.d = {
                "train_progress": d['train_progress'],
            }
        # best_epoch, best_epoch_summary = self.trainer.train(epochs=self.num_epochs)
        self.complete_func(
            self.host, {
                "best_epoch": best_epoch,
                "best_epoch_summary": best_epoch_sumamry,
                "model": model,
                "labels": labels,
            })
Пример #5
0
def init_parallel_jobs(cfg, queue, fn, ped_data=None, emp_data=None):
    tasks = Process(target=prefetch_data, args=(cfg, queue, fn, ped_data, emp_data))
    # for task in tasks:
    #     task.daemon = True
    #     task.start()
    tasks.daemon = True
    tasks.start()
    return tasks
def daemon_process_run(read_conn, write_conn, lock, testcase=0):
    '''
    Two test case:
    1. generated data from background process
    2. read data from PCIE background process
    '''
    # testcase 1: Generate data from background process
    if testcase == 0:
        read_proc = Process(target=gen_data,
                            args=(shared_arr, npts, nCh, lock))
    # testcase 2: Read data from background process
    elif testcase == 1:
        read_proc = Process(target=get_data,
                            args=(shared_arr, npts, read_conn))
        write_proc = Process(target=write_data, args=(shared_arr, write_conn))
    read_proc.daemon = True
    if testcase == 1:
        write_proc.daemon = True
    read_proc.start()
    if testcase == 1:
        write_proc.start()
Пример #7
0
def specific_policy_learn(epi,
                          environment_params,
                          environment_wrappers,
                          environment_wrapper_arguments,
                          no_reset=True):
    """ 
    multi-process for learning the task-specific policy rather than
    using the single-process in epi class
    """
    epi.load_model('predictor_and_embedding')
    epi.load_model('epi_policy')
    epi.task_specific_policy.share_memory()
    rewards_queue = mp.Queue(
    )  # used for get rewards from all processes and plot the curve
    eval_rewards_queue = mp.Queue(
    )  # used for get offline evaluated rewards from all processes and plot the curve
    success_queue = mp.Queue(
    )  # used for get success events from all processes
    eval_success_queue = mp.Queue()
    processes = []
    rewards = []
    success = []
    eval_rewards = []
    eval_success = []

    for i in range(NUM_WORKERS):
        if TASK_POLICY_ALG == 'ppo':
            process = Process(target=ppo_worker, args=(i, epi, environment_params, environment_wrappers, \
            environment_wrapper_arguments, eval_rewards_queue, eval_success_queue, batch_size, no_reset))  # the args contain shared and not shared
        elif TASK_POLICY_ALG == 'td3':
            process = Process(target=td3_worker, args=(i, epi, environment_params, environment_wrappers,\
            environment_wrapper_arguments, rewards_queue, eval_rewards_queue, success_queue, eval_success_queue,\
            replay_buffer, batch_size, explore_steps, noise_decay,\
            update_itr, explore_noise_scale, eval_noise_scale, reward_scale, DETERMINISTIC, hidden_dim, no_reset))
        else:
            raise NotImplementedError
        process.daemon = True  # all processes closed when the main stops
        processes.append(process)

    [p.start() for p in processes]
    while True:  # keep geting the episode reward from the queue
        eval_r = eval_rewards_queue.get()
        eval_succ = eval_success_queue.get()

        eval_rewards.append(eval_r)
        eval_success.append(eval_succ)

        if len(eval_rewards) % 20 == 0 and len(eval_rewards) > 0:
            np.save(PREFIX + 'eval_rewards', eval_rewards)
            np.save(PREFIX + 'eval_success', eval_success)

    [p.join() for p in processes]  # finished at the same time
Пример #8
0
    def __iter__(self):
        print('Starting processes')
        random.seed(0)
        random.shuffle(self.filepaths)
        filepaths = deque()
        for path in self.filepaths:
            filepaths.append(path)
        self.buffr_processes = []
        args = (self.filepaths, self.buffer, self.partial)
        for i in range(10):
            process = Process(target=fill_buffer, args=args)
            process.daemon = True
            process.start()
            self.buffr_processes.append(process)

        args = (self.buffer, self.batch_queue, self.batch_size)
        self.batch_process = Process(target=fill_batch, args=args)
        self.batch_process.daemon = True
        self.batch_process.start()
        return self
Пример #9
0
    def __init__(self, loader):
        self.loader = loader
        self.data_source = loader.data_source
        self.args = loader.args
        self.num_workers = 8
        self.batch_size = loader.batch_size
        self.tokenizer = loader.tokenizer
        self.max_len = loader.max_len
        self.mode = loader.mode
        self._batch_count_in_queue = 0
        self._data = self.get_data()

        self.workers = []
        if self.mode in {'train', 'eval'}:
            self.input_queue = Queue(-1)
            self.output_queue = Queue(-1)
            for _ in range(self.num_workers):
                worker = Process(target=self._data_loop)
                self.workers.append(worker)
            self.__prefetch()
            for worker in self.workers:
                worker.daemon = True
                worker.start()
Пример #10
0
def crop_face(args):
    for k, v in default_args.items():
        setattr(args, k, v)
    assert osp.exists(args.data_dir), "The input dir not exist"
    root_folder_name = args.data_dir.split('/')[-1]
    src_folder = args.data_dir
    dst_folder = args.data_dir.replace(root_folder_name, root_folder_name + '_OPPOFaces')
    lz.mkdir_p(dst_folder, delete=False)
    ds = TestData(src_folder)
    loader = torch.utils.data.DataLoader(ds, batch_size=args.batch_size,
                                         num_workers=args.num_workers,
                                         shuffle=False,
                                         pin_memory=True,
                                         drop_last=False
                                         )
    # 1. load pre-tained model
    checkpoint_fp = 'models/phase1_wpdc_vdc.pth.tar'
    arch = 'mobilenet_1'
    
    checkpoint = torch.load(checkpoint_fp, map_location=lambda storage, loc: storage)['state_dict']
    model = getattr(mobilenet_v1, arch)(num_classes=62)  # 62 = 12(pose) + 40(shape) +10(expression)
    
    model_dict = model.state_dict()
    # because the model is trained by multiple gpus, prefix module should be removed
    for k in checkpoint.keys():
        model_dict[k.replace('module.', '')] = checkpoint[k]
    model.load_state_dict(model_dict)
    if args.mode == 'gpu':
        cudnn.benchmark = True
        model = model.cuda()
    model.eval()
    
    # 2. load dlib model for face detection and landmark used for face cropping
    queue = Queue()
    lock = Lock()
    consumers = []
    for i in range(args.num_consumers):
        p = Process(target=consumer, args=(queue, lock))
        p.daemon = True
        consumers.append(p)
    for c in consumers:
        c.start()
    # 3. forward
    ttl_nimgs = 0
    ttl_imgs = []
    data_meter = lz.AverageMeter()
    model_meter = lz.AverageMeter()
    post_meter = lz.AverageMeter()
    lz.timer.since_last_check('start crop face')
    for ind, data in enumerate(loader):
        
        data_meter.update(lz.timer.since_last_check(verbose=False))
        if (data['finish'] == 1).all().item():
            logging.info('finish')
            break
        if ind % 10 == 0:
            logging.info(
                f'proc batch {ind}, data time: {data_meter.avg:.2f}, model: {model_meter.avg:.2f}, post: {post_meter.avg:.2f}')
        mask = data['finish'] == 0
        input = data['img'][mask]
        input_np = input.numpy()
        roi_box = data['roi_box'][mask].numpy()
        imgfn = np.asarray(data['imgfn'])[mask.numpy().astype(bool)]
        dst_imgfn = [img_fp.replace(root_folder_name, root_folder_name + '_OPPOFaces') for img_fp in imgfn]
        ttl_imgs.extend(dst_imgfn)
        ttl_nimgs += mask.sum().item()
        with torch.no_grad():
            if args.mode == 'gpu':
                input = input.cuda()
            param = model(input)
            param = param.squeeze().cpu().numpy().astype(np.float32)
        model_meter.update(lz.timer.since_last_check(verbose=False))
        queue.put((imgfn, param, roi_box, dst_imgfn))
        # pts68 = [predict_68pts(param[i], roi_box[i]) for i in range(param.shape[0])]
        # pts68_proc = [predict_68pts(param[i], [0, 0, STD_SIZE, STD_SIZE]) for i in range(param.shape[0])]
        # for img_fp, pts68_, pts68_proc_, img_, dst in zip(imgfn, pts68, pts68_proc, input_np, dst_imgfn):
        #     ## this may need opt to async read write
        #     img_ori = cvb.read_img(img_fp)
        #     pts5 = to_landmark5(pts68_[:2, :].transpose())
        #     warped = preprocess(img_ori, landmark=pts5)
        #     # plt_imshow(warped, inp_mode='bgr');  plt.show()
        #     lz.mkdir_p(osp.dirname(dst), delete=False)
        #     cvb.write_img(warped, dst)
        #
        #     ## this may cause black margin
        #     # pts5 = to_landmark5(pts68_proc_[:2, :].transpose())
        #     # warped = preprocess(to_img(img_), landmark=pts5)
        #     # # plt_imshow(warped, inp_mode='bgr'); plt.show()
        #     # dst = img_fp.replace(root_folder_name, root_folder_name + '_OPPOFaces')
        #     # cvb.write_img(warped, dst)
        #     if args.dump_res:
        #         img_ori = cvb.read_img(img_fp)
        #         pts_res = [pts68_]
        #         dst = img_fp.replace(root_folder_name, root_folder_name + '_kpts.demo')
        #         lz.mkdir_p(osp.dirname(dst), delete=False)
        #         draw_landmarks(img_ori, pts_res,
        #                        wfp=dst,
        #                        show_flg=args.show_flg)
        post_meter.update(lz.timer.since_last_check(verbose=False))
    lz.msgpack_dump(ttl_imgs, dst_folder + '/' + 'all_imgs.pk')
    del model, input
    torch.cuda.empty_cache()
    while not queue.empty():
        time.sleep(1)
Пример #11
0
def main():
    # reproducible
    # env.seed(RANDOMSEED)
    np.random.seed(RANDOMSEED)
    torch.manual_seed(RANDOMSEED)

    env = NormalizedActions(gym.make(ENV_NAME).unwrapped)
    state_dim = env.observation_space.shape[0]
    action_dim = env.action_space.shape[0]

    ppo = PPO(state_dim, action_dim, hidden_dim=256)
    try:
        ppo.load_model(MODEL_PATH)
    except Exception as e:
        print(
            f'Pretrained models not found in {MODEL_PATH}.\nBuckle up it is going to be a long hot night'
        )
        print("error {}".format(e))
    if args.train:
        ppo.actor.share_memory()
        ppo.actor_old.share_memory()
        ppo.critic.share_memory()
        ShareParameters(ppo.actor_optimizer)
        ShareParameters(ppo.critic_optimizer)
        rewards_queue = mp.Queue(
        )  # used for get rewards from all processes and plot the curve
        processes = []
        rewards = []

        for i in range(NUM_WORKERS):
            process = Process(
                target=worker,
                args=(i, ppo,
                      rewards_queue))  # the args contain shared and not shared
            process.daemon = True  # all processes closed when the main stops
            processes.append(process)

        [p.start() for p in processes]
        while True:  # keep geting the episode reward from the queue
            r = rewards_queue.get()
            if r is not None:
                if len(rewards) == 0:
                    rewards.append(r)
                else:
                    rewards.append(rewards[-1] * 0.9 + r * 0.1)
            else:
                break

            if len(rewards) % 20 == 0 and len(rewards) > 0:
                plot(rewards)

        [p.join() for p in processes]  # finished at the same time

        ppo.save_model(MODEL_PATH)

    if args.test:
        ppo.load_model(MODEL_PATH)
        while True:
            s = env.reset()
            for i in range(EP_LEN):
                env.render()
                s, r, done, _ = env.step(ppo.choose_action(s))
                if done:
                    break
Пример #12
0
def register(freq, func, args):
    print(f'Registered {func} with freq: {freq}')
    p = Process(target=periodic_executor, args=(freq, func, args))
    p.daemon = True
    p.start()
Пример #13
0
def async_save(model_states: OrderedDict, filename) -> Process:
    model_states = copy_cpu_state_dict(model_states)
    p = Process(target=_save, args=(model_states, filename), daemon=True)
    p.daemon = True
    p.start()
    return p
Пример #14
0
def train_eval(name, model, dataset, optimizer, scheduler, lr=1e-1, weight_decay=5e-4, bs=128, n_epochs=300,
               start_epoch=None, print_freq=1000, val_freq=10000, checkpoint_folder=None, version=-1,
               use_jit=True, use_amp=False, opt_level='O1', **kwargs):
    assert dataset in ('cifar10', 'cifar100')
    if use_amp:
        import apex

    net = model(num_classes=10 if dataset == 'cifar10' else 100, default_init=False)
    net = net.to(device[0])

    opt_sig = signature(optimizer)
    opt_kwargs = dict([(k, kwargs[k]) for k in kwargs.keys() if k in opt_sig.parameters.keys()])
    optimizer = optimizer(net.trainable, lr=lr, weight_decay=weight_decay, **opt_kwargs)
    if scheduler is not None:
        sch_sig = signature(scheduler)
        sch_kwargs = dict([(k, kwargs[k]) for k in kwargs.keys() if k in sch_sig.parameters.keys()])
        scheduler = scheduler(optimizer, **sch_kwargs)

    dataset_ = torchvision.datasets.CIFAR10 if dataset == 'cifar10' else torchvision.datasets.CIFAR100
    train_data = dataset_(root='./data', train=True, download=True, transform=transform_train)
    train_loader = T.utils.data.DataLoader(train_data, batch_size=bs, shuffle=True, num_workers=5)

    if checkpoint_folder is None:
        mon = nnt.Monitor(name, print_freq=print_freq, num_iters=int(np.ceil(len(train_data) / bs)),
                          use_tensorboard=True)
        mon.backup(backup_files)

        mon.dump_rep('network', net)
        mon.dump_rep('optimizer', optimizer)

        states = {
            'model_state_dict': net.state_dict(),
            'opt_state_dict': optimizer.state_dict()
        }

        if scheduler is not None:
            mon.dump_rep('scheduler', scheduler)
            states['scheduler_state_dict'] = scheduler.state_dict()

    else:
        mon = nnt.Monitor(current_folder=checkpoint_folder, print_freq=print_freq, num_iters=len(train_data) // bs,
                          use_tensorboard=True)
        states = mon.load('training.pt', method='torch', version=version)
        net.load_state_dict(states['model_state_dict'])
        optimizer.load_state_dict(states['opt_state_dict'])
        if scheduler:
            scheduler.load_state_dict(states['scheduler_state_dict'])

        if use_amp and 'amp' in states.keys():
            apex.amp.load_state_dict(states['amp'])

        if start_epoch:
            start_epoch = start_epoch - 1
            mon.epoch = start_epoch

        print('Resume from epoch %d...' % mon.epoch)

    if not no_wait_eval:
        eval_data = dataset_(root='./data', train=False, download=True, transform=transform_test)
        eval_loader = T.utils.data.DataLoader(eval_data, batch_size=bs, shuffle=False, num_workers=2)

    if nnt.cuda_available:
        train_loader = nnt.DataPrefetcher(train_loader, device=device[0])
        if not no_wait_eval:
            eval_loader = nnt.DataPrefetcher(eval_loader, device=device[0])

    if use_jit:
        img = T.rand(1, 3, 32, 32).to(device[0])
        net.train(True)
        net_train = T.jit.trace(net, img)
        net.eval()
        net_eval = T.jit.trace(net, img)

    if use_amp:
        if use_jit:
            net_train, optimizer = apex.amp.initialize(net_train, optimizer, opt_level=opt_level)
            net_eval = apex.amp.initialize(net_eval, opt_level=opt_level)
        else:
            net, optimizer = apex.amp.initialize(net, optimizer, opt_level=opt_level)

        if 'amp' not in states.keys():
            states['amp'] = apex.amp.state_dict()

    if use_jit:
        net_train = T.nn.DataParallel(net_train, device_ids=device)
        net_eval = T.nn.DataParallel(net_eval, device_ids=device)
    else:
        net = T.nn.DataParallel(net, device_ids=device)

    def learn(images, labels, reduction='mean'):
        net.train(True)
        optimizer.zero_grad()
        loss, accuracy = get_loss(net_train if use_jit else net, images, labels, reduction=reduction)
        if not (T.isnan(loss) or T.isinf(loss)):
            if use_amp:
                with apex.amp.scale_loss(loss, optimizer) as scaled_loss:
                    scaled_loss.backward()
            else:
                loss.backward()
            optimizer.step()
        else:
            raise ValueError('NaN encountered!')

        mon.plot('train-loss', nnt.utils.to_numpy(loss), smooth=.99)
        mon.plot('train-accuracy', nnt.utils.to_numpy(accuracy), smooth=.99)
        del loss, accuracy

    if no_wait_eval:
        q = Queue()
        eval_proc = Process(target=eval_queue,
                            args=(q, mon.current_folder, dataset, bs, use_jit, use_amp, opt_level))
        eval_proc.daemon = True
        eval_proc.start()

    start_epoch = mon.epoch if start_epoch is None else start_epoch
    print('Training...')
    with T.jit.optimized_execution(use_jit):
        for _ in mon.iter_epoch(range(start_epoch, n_epochs)):
            for idx, lr_ in enumerate(scheduler.get_last_lr()):
                mon.plot('lr-%d' % idx, lr_, filter_outliers=False)

            for batch in mon.iter_batch(train_loader):
                batch = nnt.utils.batch_to_device(batch, device[0])

                learn(*batch)
                if val_freq and mon.iter % val_freq == 0:
                    if no_wait_eval:
                        lock.acquire_write()
                        mon.dump('tmp.pt', states, method='torch')
                        lock.release_write()
                        q.put((mon.epoch, mon.iter))
                        q.put(None)
                    else:
                        net.eval()
                        with T.set_grad_enabled(False):
                            losses, accuracies = [], []
                            for itt, batch in enumerate(eval_loader):
                                batch = nnt.utils.batch_to_device(batch, device[0])

                                loss, acc = get_loss(net_eval if use_jit else net, *batch)
                                losses.append(nnt.utils.to_numpy(loss))
                                accuracies.append(nnt.utils.to_numpy(acc))

                            mon.plot('test-loss', np.mean(losses))
                            mon.plot('test-accuracy', np.mean(accuracies))
            mon.dump('training.pt', states, method='torch', keep=10)
            if scheduler is not None:
                scheduler.step()

    if no_wait_eval:
        q.put('DONE')
        eval_proc.join()

    print('Training finished!')
Пример #15
0
def launch_actor(id_actor, args, redis_servor):

    print("id actor = ", id_actor)
    env_actor = Env(args)

    start_time_actor = time.time()

    if args.continue_experiment:
        print(
            "We are restarting a stopped experience with a model trained for "
            + str(args.step_actors_already_done)
            + "steps"
        )
        initial_T_actor = int(
            (args.step_actors_already_done - args.memory_capacity) / args.nb_actor
        )
        print("initial T actor equal ", initial_T_actor)
        step_to_start_sleep = int(args.step_actors_already_done / args.nb_actor)
    else:
        initial_T_actor = 0
        step_to_start_sleep = int(args.learn_start / args.nb_actor)
    T_actor = initial_T_actor

    index_actor_in_memory = 0
    timestep = 0
    actor_buffer = []
    mem_actor = ReplayRedisMemory(args, redis_servor)

    actor = Actor(args, env_actor.action_space(), redis_servor)

    done_actor = True

    tab_state = []
    tab_action = []
    tab_reward = []
    tab_nonterminal = []

    # We want to warn the user when the agent reachs 100 hours of gameplay continuously improving
    # score. On thoses game the agent is superhuman (and learning should be stop maybe?)
    if not args.disable_SABER_mode:  # SABER mode: length episode can be infinite (100 hours)
        step_100_hours = int(args.max_episode_length / args.action_repeat) - 1

    if id_actor == 0:
        reward_buffer = RewardBuffer(args.evaluation_episodes, args.action_repeat)

    while T_actor <= (args.T_max / args.nb_actor):
        if done_actor:
            if not args.disable_SABER_mode and timestep >= step_100_hours:
                print("Agent reachs 100 hours of gameplay while continuously improving score!"
                      "Agent is superhuman (happened only on Atlantis, Defender and Asteroids)."
                      "Learning could be stopped now...")
            if id_actor == 0 and T_actor > initial_T_actor:
                reward_buffer.update_score_episode_buffer(timestep)
            timestep = 0
            state_buffer_actor = env_actor.reset()
            done_actor = False

        if T_actor % args.replay_frequency == 0:
            actor.reset_noise()  # Draw a new set of noisy weights

        if T_actor < args.learn_start / args.nb_actor:
            # Do random actions before learning start
            action = random.randint(0, env_actor.action_space() - 1)
        else:
            # Choose an action greedily (with noisy weights)
            action = actor.act(state_buffer_actor)

        next_state_buffer_actor, reward, done_actor = env_actor.step(action)  # Step
        if args.render and id_actor == 0:
            env_actor.render()

        if id_actor == 0:
            reward_buffer.update_current_reward_buffer(timestep, reward)

        if args.reward_clip > 0:
            reward = max(min(reward, args.reward_clip), -args.reward_clip)  # Clip rewards
        actor_buffer.append([timestep, state_buffer_actor[-1], action, reward, done_actor])

        if len(tab_state) == 0:
            for current_state in state_buffer_actor:
                tab_state.append(current_state)
        else:
            tab_state.append(state_buffer_actor[-1])
        tab_action.append(action)
        tab_reward.append(reward)
        tab_nonterminal.append(not done_actor)

        if T_actor % args.log_interval == 0:
            log(f"T = {T_actor} / {args.T_max}")
            duration_actor = time.time() - start_time_actor
            print(f"Time between 2 log_interval for actor {id_actor} ({duration_actor:.3f} sec)")
            start_time_actor = time.time()

        if T_actor % args.weight_synchro_frequency == 0:
            actor.load_weight_from_redis()

        # We want to send actor buffer in the redis memory with right initialized priorities
        if len(actor_buffer) >= args.length_actor_buffer:
            if (not mem_actor.transitions.actor_full) and (
                (index_actor_in_memory + len(actor_buffer)) >= mem_actor.transitions.actor_capacity
            ):
                redis_servor.set(cst.IS_FULL_ACTOR_STR + str(id_actor), 1)
                mem_actor.transitions.actor_full = True

            priorities_buffer = actor.compute_priorities(
                tab_state, tab_action, tab_reward, tab_nonterminal, mem_actor.priority_exponent
            )

            # We dont have the next_states for the last n_step states in the buffer so we just
            # set their priorities to max priorities (should be 3/args.length_buffer_actor
            # experience so a bit negligeable...)
            max_priority = np.float64(redis_servor.get(cst.MAX_PRIORITY_STR))
            last_priorities = np.ones(mem_actor.n) * max_priority

            all_priorities = np.concatenate((priorities_buffer, last_priorities))

            p = Process(
                target=mem_actor.transitions.append_actor_buffer,
                args=(actor_buffer, index_actor_in_memory, id_actor, all_priorities, T_actor),
            )
            p.daemon = True
            p.start()
            index_actor_in_memory = (
                index_actor_in_memory + len(actor_buffer)
            ) % args.actor_capacity
            # Make actors sleep to wait learner if synchronization is on!
            if args.synchronize_actors_with_learner and (T_actor >= step_to_start_sleep):
                # Actors are always faster than learner
                T_learner = int(redis_servor.get(cst.STEP_LEARNER_STR))
                while (
                    T_learner + 2 * args.weight_synchro_frequency <= T_actor * args.nb_actor
                ):  # We had a bug at the end because learner don't put in redis memory that
                    # he reached 50 M and actor was sleeping all time...
                    time.sleep(cst.TIME_TO_SLEEP)
                    T_learner = int(redis_servor.get(cst.STEP_LEARNER_STR))
            actor_buffer = []

            tab_state = []
            tab_action = []
            tab_reward = []
            tab_nonterminal = []

        # Update target network
        if T_actor % args.target_update == 0:
            actor.update_target_net()

        # Plot and dump in csv every evaluation_interval steps (there is in fact not any
        # evaluation done, we just keep track of score while training)
        if (
            T_actor % (args.evaluation_interval / args.nb_actor) == 0
            and id_actor == 0
            and T_actor >= (initial_T_actor + args.evaluation_interval / 2)
        ):
            dump_in_csv_and_plot_reward(redis_servor, args, T_actor, reward_buffer, actor)

        state_buffer_actor = next_state_buffer_actor
        timestep += 1
        T_actor += 1
Пример #16
0
def bwas_cpp(args, env: Environment, states: List[State], results_file: str):
    assert (args.env.upper() in [
        'CUBE3', 'CUBE4', 'PUZZLE15', 'PUZZLE24', 'PUZZLE35', 'PUZZLE48',
        'LIGHTSOUT7'
    ])

    # Make c++ socket
    socket_name: str = "%s_cpp_socket" % results_file.split(".")[0]

    try:
        os.unlink(socket_name)
    except OSError:
        if os.path.exists(socket_name):
            raise

    sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
    sock.bind(socket_name)

    # Get state dimension
    if args.env.upper() == 'CUBE3':
        state_dim: int = 54
    elif args.env.upper() == 'PUZZLE15':
        state_dim: int = 16
    elif args.env.upper() == 'PUZZLE24':
        state_dim: int = 25
    elif args.env.upper() == 'PUZZLE35':
        state_dim: int = 36
    elif args.env.upper() == 'PUZZLE48':
        state_dim: int = 49
    elif args.env.upper() == 'LIGHTSOUT7':
        state_dim: int = 49
    else:
        raise ValueError("Unknown c++ environment: %s" % args.env)

    # start heuristic proc
    num_parallel: int = len(os.environ['CUDA_VISIBLE_DEVICES'].split(","))
    device, devices, on_gpu = nnet_utils.get_device()
    heur_fn_i_q, heur_fn_o_qs, heur_procs = nnet_utils.start_heur_fn_runners(
        num_parallel,
        args.model_dir,
        device,
        on_gpu,
        env,
        all_zeros=False,
        clip_zero=True,
        batch_size=args.nnet_batch_size)
    nnet_utils.heuristic_fn_par(states, env, heur_fn_i_q,
                                heur_fn_o_qs)  # initialize

    heur_proc = Process(target=cpp_listener,
                        args=(sock, args, env, state_dim, heur_fn_i_q,
                              heur_fn_o_qs))
    heur_proc.daemon = True
    heur_proc.start()

    time.sleep(2)  # give socket time to intialize

    solns: List[List[int]] = []
    paths: List[List[State]] = []
    times: List = []
    num_nodes_gen: List[int] = []

    for state_idx, state in enumerate(states):
        # Get string rep of state
        if args.env.upper() == "CUBE3":
            state_str: str = " ".join([str(x) for x in state.colors])
        elif args.env.upper() in [
                "PUZZLE15", "PUZZLE24", "PUZZLE35", "PUZZLE48"
        ]:
            state_str: str = " ".join([str(x) for x in state.tiles])
        elif args.env.upper() in ["LIGHTSOUT7"]:
            state_str: str = " ".join([str(x) for x in state.tiles])
        else:
            raise ValueError("Unknown c++ environment: %s" % args.env)

        popen = Popen([
            './cpp/parallel_weighted_astar', state_str,
            str(args.weight),
            str(args.batch_size), socket_name, args.env, "0"
        ],
                      stdout=PIPE,
                      stderr=PIPE,
                      bufsize=1,
                      universal_newlines=True)
        lines = []
        for stdout_line in iter(popen.stdout.readline, ""):
            stdout_line = stdout_line.strip('\n')
            lines.append(stdout_line)
            if args.verbose:
                sys.stdout.write("%s\n" % stdout_line)
                sys.stdout.flush()

        moves = [int(x) for x in lines[-5].split(" ")[:-1]]
        soln = [x for x in moves][::-1]
        num_nodes_gen_idx = int(lines[-3])
        solve_time = float(lines[-1])

        # record solution information
        path: List[State] = [state]
        next_state: State = state
        transition_costs: List[float] = []

        for move in soln:
            next_states, tcs = env.next_state([next_state], move)

            next_state = next_states[0]
            tc = tcs[0]

            path.append(next_state)
            transition_costs.append(tc)

        solns.append(soln)
        paths.append(path)
        times.append(solve_time)
        num_nodes_gen.append(num_nodes_gen_idx)

        path_cost: float = sum(transition_costs)

        # check soln
        assert search_utils.is_valid_soln(state, soln, env)

        # print to screen
        print("State: %i, SolnCost: %.2f, # Moves: %i, "
              "# Nodes Gen: %s, Time: %.2f" %
              (state_idx, path_cost, len(soln), format(num_nodes_gen_idx,
                                                       ","), solve_time))

    os.unlink(socket_name)

    nnet_utils.stop_heuristic_fn_runners(heur_procs, heur_fn_i_q)

    return solns, paths, times, num_nodes_gen
Пример #17
0
 def wrapper(*args, **kwargs):
     func = Process(target=f, args=args, kwargs=kwargs)
     func.daemon = False
     func.start()
     return func
Пример #18
0
        assert f1[-1] == f2[-1]
        last = f1[-1]
        f = np.vstack((f1[:-1], f2[:-1]))
        f = normalize(f, axis=0)
        f = np.vstack((f, [last]))
        # if not osp.exists(osp.dirname(dstfn)):
        mkdir_p(osp.dirname(dstfn), delete=False, verbose=False)
        save_mat(dstfn, f)


queue = Queue(60)
lock = Lock()
consumers = []
for i in range(12):
    p = Process(target=consumer, args=(queue, lock))
    p.daemon = True
    consumers.append(p)
for c in consumers:
    c.start()
comb_from_ = comb_from[0]
assert osp.exists(f'{fea_root}/{comb_from_}')
for fn in glob.glob(f'{fea_root}/{comb_from_}/facescrub/**/*.bin',
                    recursive=True):
    fn2 = fn.replace(comb_from[0], comb_from[1])
    assert osp.exists(fn2), fn2
    fn3 = None  # fn3 = fn.replace(comb_from[0], comb_from[2])
    dstfn = fn.replace(comb_from[0], dst_name)
    queue.put((fn, fn2, fn3, dstfn))
for ind, imgfn in enumerate(imgfns):
    if ind % 99 == 0:
        print(ind, len(imgfns))
Пример #19
0
        td3_trainer.target_policy_net.share_memory()
        ShareParameters(td3_trainer.q_optimizer1)
        ShareParameters(td3_trainer.q_optimizer2)
        ShareParameters(td3_trainer.policy_optimizer)

        rewards_queue = mp.Queue(
        )  # used for get rewards from all processes and plot the curve

        num_workers = 4  # or: mp.cpu_count()
        processes = []
        rewards = []

        for i in range(num_workers):
            process = Process(target=worker, args=(i, td3_trainer, rewards_queue, replay_buffer, max_episodes, max_steps, batch_size, explore_steps, \
            update_itr, explore_noise_scale, eval_noise_scale, reward_scale, DETERMINISTIC, hidden_dim, model_path))  # the args contain shared and not shared
            process.daemon = True  # all processes closed when the main stops
            processes.append(process)

        [p.start() for p in processes]
        while True:  # keep geting the episode reward from the queue
            r = rewards_queue.get()
            if r is not None:
                rewards.append(r)
            else:
                break

            if len(rewards) % 50 == 0 and len(rewards) > 0:
                plot(rewards)
                np.savetxt('rewards_thre_random02.txt',
                           np.array(rewards)[:, np.newaxis],
                           fmt='%.4f',
Пример #20
0
def main():
    args = get_args()
    log_dir = create_log_dir(args)
    if not args.test:
        writer = SummaryWriter(log_dir)
    else:
        writer = None

    SEED = 721
    if args.ram_obs or args.env == "slimevolley_v0":
        obs_type = 'ram'
    else:
        obs_type = 'rgb_image'
    env = make_env(args.env, SEED, obs_type=obs_type)

    state_spaces = env.observation_spaces
    action_spaces = env.action_spaces
    print('state_spaces: ', state_spaces, ',  action_spaces: ', action_spaces)

    learner_args = {'device': args.device}
    env.reset()
    print(env.agents)
    agents = env.agents
    if args.train_both:
        fixed_agents = []
    else:
        fixed_agents = [
            'first_0'
        ]  # SlimeVolley: opponent is the first, the second agent is the learnable one

    if obs_type == 'ram':
        model = MultiPPODiscrete(agents, state_spaces, action_spaces, 'MLP',
                                 fixed_agents, learner_args,
                                 **hyperparams).to(args.device)
    else:
        # model = PPODiscrete(state_space, action_space, 'CNN', learner_args, **hyperparams).to(device)
        model = MultiPPODiscrete(agents, state_spaces, action_spaces, 'CNN',
                                 fixed_agents, learner_args,
                                 **hyperparams).to(args.device)

    load_model(model, args)

    for individual_model in model.agents.values():
        individual_model.policy.share_memory()
        individual_model.policy_old.share_memory()
        individual_model.value.share_memory()
        ShareParameters(individual_model.optimizer)

    path = 'model/' + args.env
    os.makedirs(path, exist_ok=True)

    if args.fictitious:
        path = path + '/fictitious_'

    processes = []
    for p in range(args.num_envs):
        process = Process(target=parallel_rollout, args=(p, args.env, model, writer, max_eps, \
            max_timesteps, selfplay_interval,\
            args.render, path, args.against_baseline, \
            args.selfplay, args.fictitious, SEED))  # the args contain shared and not shared
        process.daemon = True  # all processes closed when the main stops
        processes.append(process)

    [p.start() for p in processes]

    [p.join() for p in processes]  # finished at the same time

    env.close()