Exemplo n.º 1
0
def HandleWorkers(server: socket.socket, replay_memory: ReplayMemory,
                  mem_lock: Lock, param_queue: Queue, shutdown: Value):
    print("Listening for new workers...")
    server.settimeout(1)  # timeout period of 1 second

    num_workers = 0
    workers: Dict[int, socket.socket] = dict()
    state_dict = None

    while shutdown.value <= 0:
        try:
            worker, _ = server.accept()
            print("Connected to new worker")
            worker_id = num_workers
            worker_proc = Process(target=ReceivePlayouts,
                                  args=(worker, worker_id, replay_memory,
                                        mem_lock),
                                  daemon=True)
            worker_proc.start()

            if state_dict is not None:
                # Send the new worker the most up-to-date params
                buffer = io.BytesIO()
                torch.save(state_dict, buffer)
                param_bytes = buffer.getvalue()
                communication.Send(worker, buffer.getvalue())

            workers[worker_id] = worker
            num_workers += 1
        except socket.timeout:
            pass

        if not param_queue.empty():
            # Send the most up-to-date params to all the workers
            state_dict = None
            while not param_queue.empty():
                state_dict = param_queue.get()
            assert (state_dict is not None)

            buffer = io.BytesIO()
            torch.save(state_dict, buffer)
            param_bytes = buffer.getvalue()
            print("Sending new params to workers")
            for worker_id in workers.keys():
                worker: socket.socket = workers[worker_id]
                try:
                    communication.Send(worker, param_bytes)
                except:
                    # Something went wrong with this connection, so remove
                    # this worker
                    print(f"Error with worker {worker_id}, ending connection")
                    workers.pop(worker_id)
Exemplo n.º 2
0
def self_multiplay(policy):
    q = Queue()
    finq = []
    procs = []
    policy.train(False)
    for i in range(MaxProcessNum):
        fin = Queue()
        t = Process(target=PlayProcess, args=(i, q, fin, policy))
        t.start()
        procs.append(t)
        finq.append(fin)
    for i in range(MaxProcessNum):
        id = finq[i].get()
        print("finish process(%d)" % id)
        sys.stdout.flush()
    try:
        while not q.empty():
            data_buffer.append(q.get(timeout=1))
    except TimeoutError:
        pass
    print('finish Queue get')
    sys.stdout.flush()
    for i in range(len(procs)):
        p = procs[i]
        p.join(timeout=10)
        if p.is_alive():
            print('forcing process(%d) to terminate' % i)
            sys.stdout.flush()
            p.terminate()
    print('finish join')
    sys.stdout.flush()
def _run_game(process_id: int, game_factory: GameExecutorFactory,
              network: nn.Module, device: torch.device, request_queue: Queue,
              experience_queue: Queue, batch_size: int, transfer_blocks: int,
              transfer_to_device: bool) -> None:
    exploration_rate = 1.
    game = game_factory.create()
    print('* worker %d started' % process_id)
    while True:
        try:
            if not request_queue.empty():
                request: _RunGameRequest = request_queue.get(block=False)
                if request.do_terminate:
                    print('* game worker %d terminated' % process_id)
                    experience_queue.close()
                    request_queue.close()
                    return
                if request.set_exploration_rate is not None:
                    exploration_rate = request.set_exploration_rate

            block = []
            for _ in range(transfer_blocks):
                eps, exps = game.multi_step(network, device, exploration_rate,
                                            batch_size)
                if transfer_to_device:
                    exps = [
                        e.to_device(device, non_blocking=False) for e in exps
                    ]
                block.append((eps, exps))
            experience_queue.put(block, block=True)
        except Exception as e:
            print('error in worker %d: ' % process_id, e)
Exemplo n.º 4
0
def main():
    args = parse_args()
    categories = parse_categories(parse_data(args.data)['names'])

    cap = cv2.VideoCapture(0)
    frame_queue = Queue()
    preds_queue = Queue()
    cur_dets = None
    frame_lock = Lock()

    proc = Process(target=detect,
                   args=(frame_queue, preds_queue, frame_lock, args))
    proc.start()

    try:

        while (True):
            ret, frame = cap.read()
            frame_lock.acquire()
            while not frame_queue.empty():
                frame_queue.get()

            frame_queue.put(frame)
            frame_lock.release()

            if not preds_queue.empty():
                cur_dets = preds_queue.get()

            if cur_dets is not None and len(cur_dets) > 0:
                frame = draw_detections_opencv(frame, cur_dets[0], categories)

            cv2.imshow('frame', frame)
            cv2.waitKey(1)

    except KeyboardInterrupt:
        print('Interrupted')
        proc.join()
        cap.release()
        cv2.destroyAllWindows()
Exemplo n.º 5
0
def dynamic_power(model, input_shape):
    q = Queue()
    power_return = Queue()
    interval_return = Queue()
    latency_return = Queue()
    input_tensor_queue = Queue()
    model_queue = Queue()

    input_tensor = torch.ones([*input_shape])
    input_tensor_queue.put(input_tensor)

    model.share_memory()

    model_queue.put(model)

    context = torch.multiprocessing.get_context('spawn')

    p_thread = context.Process(target=power_thread,
                               args=(power_return, interval_return, q))
    l_thread = context.Process(target=latency_thread,
                               args=(model_queue, input_tensor_queue,
                                     latency_return, q))

    l_thread.start()
    p_thread.start()

    power_l = list()  # GPU power list
    interval_l = list()  # power interval list
    latency_l = list()  # latency list

    l_thread.join()

    while True:
        if not power_return.empty():
            power_l.append(power_return.get())
        if not interval_return.empty():
            interval_l.append(interval_return.get())
        if not latency_return.empty():
            latency_l.append(latency_return.get())
        if power_return.empty() and interval_return.empty(
        ) and latency_return.empty():
            break

    power_return.close()
    interval_return.close()
    latency_return.close()
    q.close()

    del q
    del power_return
    del latency_return
    del interval_return

    return latency_l, power_l, interval_l
Exemplo n.º 6
0
class IterableParquetDataset(IterableDataset):
    def __init__(self, path, process_func):
        super().__init__()
        dataset = ds.dataset(path)
        self.process_func = process_func

        self.batches = Queue()
        [self.batches.put(batch) for batch in dataset.to_batches()]

    def __iter__(self):
        while True:
            if self.batches.empty() == True:
                self.batches.close()
                break

            batch = self.batches.get().to_pydict()
            batch.update(self.process_func(batch))
            yield batch
Exemplo n.º 7
0
class WorkerManager:
    def __init__(self, n_workers, actor, args):
        self._now_episode = Value('i', 0)

        self.queue = Queue()
        self.collect_event = Event()

        self.worker = []
        for i in range(n_workers):
            self.worker.append(
                Worker(self.queue, self.collect_event, actor, args))
            time.sleep(1)

        self.process = [
            Process(target=self.worker[i].run, args=(self._now_episode, ))
            for i in range(n_workers)
        ]

        for p in self.process:
            p.start()
        print(f'Start {n_workers} workers.')

    def collect(self):
        result = []
        self.collect_event.set()
        while self.collect_event.is_set():
            # WAIT FOR DATA COLLECT END
            pass

        for w in self.worker:
            w.event.wait()

        while not self.queue.empty():
            result.append(self.queue.get())

        for w in self.worker:
            w.event.clear()
        return result

    def now_episode(self):
        value = self._now_episode.value
        return value
def crop_face(args):
    for k, v in default_args.items():
        setattr(args, k, v)
    assert osp.exists(args.data_dir), "The input dir not exist"
    root_folder_name = args.data_dir.split('/')[-1]
    src_folder = args.data_dir
    dst_folder = args.data_dir.replace(root_folder_name, root_folder_name + '_OPPOFaces')
    lz.mkdir_p(dst_folder, delete=False)
    ds = TestData(src_folder)
    loader = torch.utils.data.DataLoader(ds, batch_size=args.batch_size,
                                         num_workers=args.num_workers,
                                         shuffle=False,
                                         pin_memory=True,
                                         drop_last=False
                                         )
    # 1. load pre-tained model
    checkpoint_fp = 'models/phase1_wpdc_vdc.pth.tar'
    arch = 'mobilenet_1'
    
    checkpoint = torch.load(checkpoint_fp, map_location=lambda storage, loc: storage)['state_dict']
    model = getattr(mobilenet_v1, arch)(num_classes=62)  # 62 = 12(pose) + 40(shape) +10(expression)
    
    model_dict = model.state_dict()
    # because the model is trained by multiple gpus, prefix module should be removed
    for k in checkpoint.keys():
        model_dict[k.replace('module.', '')] = checkpoint[k]
    model.load_state_dict(model_dict)
    if args.mode == 'gpu':
        cudnn.benchmark = True
        model = model.cuda()
    model.eval()
    
    # 2. load dlib model for face detection and landmark used for face cropping
    queue = Queue()
    lock = Lock()
    consumers = []
    for i in range(args.num_consumers):
        p = Process(target=consumer, args=(queue, lock))
        p.daemon = True
        consumers.append(p)
    for c in consumers:
        c.start()
    # 3. forward
    ttl_nimgs = 0
    ttl_imgs = []
    data_meter = lz.AverageMeter()
    model_meter = lz.AverageMeter()
    post_meter = lz.AverageMeter()
    lz.timer.since_last_check('start crop face')
    for ind, data in enumerate(loader):
        
        data_meter.update(lz.timer.since_last_check(verbose=False))
        if (data['finish'] == 1).all().item():
            logging.info('finish')
            break
        if ind % 10 == 0:
            logging.info(
                f'proc batch {ind}, data time: {data_meter.avg:.2f}, model: {model_meter.avg:.2f}, post: {post_meter.avg:.2f}')
        mask = data['finish'] == 0
        input = data['img'][mask]
        input_np = input.numpy()
        roi_box = data['roi_box'][mask].numpy()
        imgfn = np.asarray(data['imgfn'])[mask.numpy().astype(bool)]
        dst_imgfn = [img_fp.replace(root_folder_name, root_folder_name + '_OPPOFaces') for img_fp in imgfn]
        ttl_imgs.extend(dst_imgfn)
        ttl_nimgs += mask.sum().item()
        with torch.no_grad():
            if args.mode == 'gpu':
                input = input.cuda()
            param = model(input)
            param = param.squeeze().cpu().numpy().astype(np.float32)
        model_meter.update(lz.timer.since_last_check(verbose=False))
        queue.put((imgfn, param, roi_box, dst_imgfn))
        # pts68 = [predict_68pts(param[i], roi_box[i]) for i in range(param.shape[0])]
        # pts68_proc = [predict_68pts(param[i], [0, 0, STD_SIZE, STD_SIZE]) for i in range(param.shape[0])]
        # for img_fp, pts68_, pts68_proc_, img_, dst in zip(imgfn, pts68, pts68_proc, input_np, dst_imgfn):
        #     ## this may need opt to async read write
        #     img_ori = cvb.read_img(img_fp)
        #     pts5 = to_landmark5(pts68_[:2, :].transpose())
        #     warped = preprocess(img_ori, landmark=pts5)
        #     # plt_imshow(warped, inp_mode='bgr');  plt.show()
        #     lz.mkdir_p(osp.dirname(dst), delete=False)
        #     cvb.write_img(warped, dst)
        #
        #     ## this may cause black margin
        #     # pts5 = to_landmark5(pts68_proc_[:2, :].transpose())
        #     # warped = preprocess(to_img(img_), landmark=pts5)
        #     # # plt_imshow(warped, inp_mode='bgr'); plt.show()
        #     # dst = img_fp.replace(root_folder_name, root_folder_name + '_OPPOFaces')
        #     # cvb.write_img(warped, dst)
        #     if args.dump_res:
        #         img_ori = cvb.read_img(img_fp)
        #         pts_res = [pts68_]
        #         dst = img_fp.replace(root_folder_name, root_folder_name + '_kpts.demo')
        #         lz.mkdir_p(osp.dirname(dst), delete=False)
        #         draw_landmarks(img_ori, pts_res,
        #                        wfp=dst,
        #                        show_flg=args.show_flg)
        post_meter.update(lz.timer.since_last_check(verbose=False))
    lz.msgpack_dump(ttl_imgs, dst_folder + '/' + 'all_imgs.pk')
    del model, input
    torch.cuda.empty_cache()
    while not queue.empty():
        time.sleep(1)
Exemplo n.º 9
0
def train():
    np.random.seed(random_seed)
    torch.manual_seed(random_seed)
    writer = SummaryWriter()
    ac = AC(latent_num, cnn_chanel_num, stat_dim)
    writer.add_graph(ac, (torch.zeros([1, 1, img_shape[0], img_shape[1]
                                       ]), torch.zeros([1, stat_dim])))
    optim = GlobalAdam([{
        'params': ac.encode_img.parameters(),
        'lr': 2.5e-5
    }, {
        'params': ac.encode_stat.parameters(),
        'lr': 2.5e-5
    }, {
        'params': ac.pi.parameters(),
        'lr': 2.5e-5
    }, {
        'params': ac.actor.parameters(),
        'lr': 2.5e-5
    }, {
        'params': ac.f.parameters()
    }, {
        'params': ac.V.parameters()
    }],
                       lr=5e-3,
                       weight_decay=weight_decay)

    if os.path.exists('S3_state_dict.pt'):
        ac.load_state_dict(torch.load('S3_state_dict.pt'))
        optim.load_state_dict(torch.load('S3_Optim_state_dict.pt'))
    else:
        ac.load_state_dict(torch.load('../stage2/S2_state_dict.pt'),
                           strict=False)

    result_queue = Queue()
    validate_queue = Queue()
    gradient_queue = Queue()
    loss_queue = Queue()
    ep_cnt = Value('i', 0)
    optimizer_lock = Lock()
    processes = []
    ac.share_memory()

    optimizer_worker = Process(target=update_shared_model,
                               args=(gradient_queue, optimizer_lock, optim,
                                     ac))
    optimizer_worker.start()

    for no in range(mp.cpu_count() - 3):
        worker = Worker(no, ac, ep_cnt, optimizer_lock, result_queue,
                        gradient_queue, loss_queue)
        worker.start()
        processes.append(worker)
    validater = Validate(ac, ep_cnt, optimizer_lock, validate_queue)
    validater.start()

    best_reward = 0
    while True:
        with ep_cnt.get_lock():
            if not result_queue.empty():
                ep_cnt.value += 1
                reward, money, win_rate = result_queue.get()
                objective_actor, loss_critic, loss_f = loss_queue.get()

                writer.add_scalar('Interaction/Reward', reward, ep_cnt.value)
                writer.add_scalar('Interaction/Money', money, ep_cnt.value)
                writer.add_scalar('Interaction/win_rate', win_rate,
                                  ep_cnt.value)

                writer.add_scalar('Update/objective_actor', objective_actor,
                                  ep_cnt.value)
                writer.add_scalar('Update/loss_critic', loss_critic,
                                  ep_cnt.value)
                writer.add_scalar('Update/loss_f', loss_f, ep_cnt.value)

                with optimizer_lock:
                    if reward > best_reward:
                        best_reward = reward
                        torch.save(ac.state_dict(), 'S3_BEST_state_dict.pt')
                    if ep_cnt.value % save_every == 0:
                        torch.save(ac.state_dict(), 'S3_state_dict.pt')
                        torch.save(optim.state_dict(),
                                   'S3_Optim_state_dict.pt')

            if not validate_queue.empty():
                val_reward, val_money, val_win_rate = validate_queue.get()

                writer.add_scalar('Validation/reward', val_reward,
                                  ep_cnt.value)
                writer.add_scalar('Validation/money', val_money, ep_cnt.value)
                writer.add_scalar('Validation/win_rate', val_win_rate,
                                  ep_cnt.value)

    for worker in processes:
        worker.join()
    optimizer_worker.kill()
Exemplo n.º 10
0
    consumers.append(p)
for c in consumers:
    c.start()
comb_from_ = comb_from[0]
assert osp.exists(f'{fea_root}/{comb_from_}')
for fn in glob.glob(f'{fea_root}/{comb_from_}/facescrub/**/*.bin',
                    recursive=True):
    fn2 = fn.replace(comb_from[0], comb_from[1])
    assert osp.exists(fn2), fn2
    fn3 = None  # fn3 = fn.replace(comb_from[0], comb_from[2])
    dstfn = fn.replace(comb_from[0], dst_name)
    queue.put((fn, fn2, fn3, dstfn))
for ind, imgfn in enumerate(imgfns):
    if ind % 99 == 0:
        print(ind, len(imgfns))
    fn = f'{fea_root}/{comb_from[0]}/megaface/{imgfn}'
    fn2 = f'{fea_root}/{comb_from[1]}/megaface/{imgfn}'
    fn3 = f'{fea_root}/{comb_from[2]}/megaface/{imgfn}'
    fn = glob.glob(f'{fn}*.bin')[0]
    fn2 = glob.glob(f'{fn2}*.bin')[0]
    assert osp.exists(fn2), fn2
    fn3 = None  # fn3 = glob.glob(f'{fn3}*.bin')[0]
    dstfn = fn2.replace(comb_from[1], dst_name)
    # if not osp.exists((dstfn)):
    #     mkdir_p(osp.dirname(dstfn), delete=False)
    queue.put((fn, fn2, fn3, dstfn))

while not queue.empty():
    time.sleep(1)
    print('wait ...')
class MultiprocessAsyncGameExecutor(AsyncGameExecutor):
    def __init__(self, game_factory: GameExecutorFactory, network: nn.Module,
                 device: torch.device, processes: int, batches_ahead: int,
                 batch_size: int, states_on_device: bool):
        self._states_on_device = states_on_device
        self._device = device
        self._experience_queue = Queue(maxsize=processes + 1)
        block_size = max(1, batches_ahead - processes)
        self.block_buffer = []
        print('* starting %d workers (batch size: %d, block size: %d)' %
              (processes, batch_size, block_size))
        self._processes = []
        self._request_queues = []
        for i in range(processes):
            request_queue = Queue(maxsize=10)
            # Transfer to GPU in the other process does not work.. it does not throw an error, but training does not converge
            p = Process(target=_run_game,
                        args=(
                            i,
                            game_factory,
                            network,
                            device,
                            request_queue,
                            self._experience_queue,
                            batch_size,
                            block_size,
                            False,
                        ))
            p.start()
            self._request_queues.append(request_queue)
            self._processes.append(p)

    def _send_to_all(self, request, block=False):
        for request_queue in self._request_queues:
            request_queue.put(request, block=block)

    def get_experiences(self):
        if len(self.block_buffer) == 0:
            block_buffer = self._experience_queue.get(block=True)
            if self._states_on_device:
                for eps, exps in block_buffer:
                    exps = [e.to_device(self._device) for e in exps]
                    self.block_buffer.append((eps, exps))
            else:
                self.block_buffer.extend(block_buffer)
        return self.block_buffer.pop()

    def update_exploration_rate(self, exploration_rate):
        self._send_to_all(
            _RunGameRequest(set_exploration_rate=exploration_rate), block=True)

    def close(self):
        print('* shutting down workers')
        self._send_to_all(_RunGameRequest(do_terminate=True))
        # wake the workers
        try:
            while not self._experience_queue.empty():
                try:
                    self._experience_queue.get(block=False)
                except queue.Empty:
                    pass
        except ConnectionResetError:
            pass
        except FileNotFoundError:
            pass

        self._experience_queue.close()
        for p in self._processes:
            p.join(1000)
        for q in self._request_queues:
            q.close()
        self._experience_queue.close()
Exemplo n.º 12
0
        print("Already better than target, breaking...")
        break

    r_list = [0] * pop_size  # result list
    solutions = es.ask()

    # push parameters to queue
    for s_id, s in enumerate(solutions):
        for _ in range(n_samples):
            p_queue.put((s_id, s))

    # retrieve results
    if args.display:
        pbar = tqdm(total=pop_size * n_samples)
    for _ in range(pop_size * n_samples):
        while r_queue.empty():
            sleep(.1)
        r_s_id, r = r_queue.get()
        r_list[r_s_id] += r / n_samples
        if args.display:
            pbar.update(1)
    if args.display:
        pbar.close()

    es.tell(solutions, r_list)
    es.disp()

    # evaluation and saving
    if epoch % log_step == log_step - 1:
        best_params, best, std_best = evaluate(solutions, r_list)
        print("Current evaluation: {}".format(best))
Exemplo n.º 13
0
def train_explorer(logdir,
                   epochs=10,
                   n_samples=4,
                   pop_size=4,
                   display=True,
                   max_workers=10):
    results = {}
    results['best'] = []
    # multiprocessing variables
    num_workers = min(max_workers, n_samples * pop_size)
    time_limit = 1000

    # create tmp dir if non existent and clean it if existent
    tmp_dir = join(logdir, 'tmp_exp')
    if not exists(tmp_dir):
        mkdir(tmp_dir)
    else:
        for fname in listdir(tmp_dir):
            unlink(join(tmp_dir, fname))

    # create exp dir if non exitent
    explore_dir = join(logdir, 'explore')
    if not exists(explore_dir):
        mkdir(explore_dir)

    ################################################################################
    #                           Thread routines                                    #
    ################################################################################
    def slave_routine(p_queue, r_queue, e_queue, p_index):
        """ Thread routine.

        Threads interact with p_queue, the parameters queue, r_queue, the result
        queue and e_queue the end queue. They pull parameters from p_queue, execute
        the corresponding rollout, then place the result in r_queue.

        Each parameter has its own unique id. Parameters are pulled as tuples
        (s_id, params) and results are pushed as (s_id, result).  The same
        parameter can appear multiple times in p_queue, displaying the same id
        each time.

        As soon as e_queue is non empty, the thread terminate.

        When multiple gpus are involved, the assigned gpu is determined by the
        process index p_index (gpu = p_index % n_gpus).

        :args p_queue: queue containing couples (s_id, parameters) to evaluate
        :args r_queue: where to place results (s_id, results)
        :args e_queue: as soon as not empty, terminate
        :args p_index: the process index
        """
        # init routine
        gpu = p_index % torch.cuda.device_count()
        device = torch.device(
            'cuda:{}'.format(gpu) if torch.cuda.is_available() else 'cpu')

        # redirect streams
        sys.stdout = open(join(tmp_dir, str(getpid()) + '.out'), 'a')
        sys.stderr = open(join(tmp_dir, str(getpid()) + '.err'), 'a')

        # with torch.no_grad():
        #     r_gen = RolloutGenerator(logdir, device, time_limit)

        #     while e_queue.empty():
        #         if p_queue.empty():
        #             sleep(.1)
        #         else:
        #             s_id, params = p_queue.get()
        #             r_queue.put((s_id, r_gen.rollout(params)))

        with torch.no_grad():
            r_gen = RolloutGenerator(logdir, device, time_limit)

            while e_queue.empty():
                if p_queue.empty():
                    sleep(.1)
                else:
                    s_id, params = p_queue.get()
                    r_queue.put((s_id, r_gen.rollout(params)))

    ################################################################################
    #                Define queues and start workers                               #
    ################################################################################
    p_queue = Queue()
    r_queue = Queue()
    e_queue = Queue()

    for p_index in range(num_workers):
        Process(target=slave_routine,
                args=(p_queue, r_queue, e_queue, p_index)).start()

    ################################################################################
    #                           Evaluation                                         #
    ################################################################################
    def evaluate(solutions, results, rollouts=100):
        """ Give current controller evaluation.

        Evaluation is minus the cumulated reward averaged over rollout runs.

        :args solutions: CMA set of solutions
        :args results: corresponding results
        :args rollouts: number of rollouts

        :returns: minus averaged cumulated reward
        """
        index_min = np.argmin(results)
        best_guess = solutions[index_min]
        restimates = []

        for s_id in range(rollouts):
            p_queue.put((s_id, best_guess))

        print("Evaluating...")
        for _ in tqdm(range(rollouts)):
            while r_queue.empty():
                sleep(.1)
            restimates.append(r_queue.get()[1])

        return best_guess, np.mean(restimates), np.std(restimates)

    ################################################################################
    #                           Launch CMA                                         #
    ################################################################################
    controller = Controller(LSIZE, RSIZE, ASIZE)  # dummy instance

    # define current best and load parameters
    cur_best = None
    ctrl_file = join(explore_dir, 'best.tar')
    print("Attempting to load previous best...")
    if exists(ctrl_file):
        state = torch.load(ctrl_file, map_location={'cuda:0': 'cpu'})
        cur_best = -state['reward']
        controller.load_state_dict(state['state_dict'])
        print("Previous best was {}...".format(-cur_best))

    parameters = controller.parameters()
    es = cma.CMAEvolutionStrategy(flatten_parameters(parameters), 0.1,
                                  {'popsize': pop_size})

    epoch = 0
    log_step = 3
    while not es.stop():
        if cur_best is not None and -cur_best > target_return:
            print("Already better than target, breaking...")
            break

        r_list = [0] * pop_size  # result list
        solutions = es.ask()

        # push parameters to queue
        for s_id, s in enumerate(solutions):
            for _ in range(n_samples):
                p_queue.put((s_id, s))

        # retrieve results
        if display:
            pbar = tqdm(total=pop_size * n_samples)
        for _ in range(pop_size * n_samples):
            while r_queue.empty():
                sleep(.1)
            r_s_id, r = r_queue.get()
            r_list[r_s_id] += r / n_samples
            if display:
                pbar.update(1)
        if display:
            pbar.close()

        es.tell(solutions, r_list)
        es.disp()

        # evaluation and saving
        if epoch % log_step == log_step - 1:
            best_params, best, std_best = evaluate(solutions, r_list)

            # log the best
            results['best'].append(best)

            print("Current evaluation: {}".format(best))
            if not cur_best or cur_best > best:
                cur_best = best
                print("Saving new best with value {}+-{}...".format(
                    -cur_best, std_best))
                load_parameters(best_params, controller)
                torch.save(
                    {
                        'epoch': epoch,
                        'reward': -cur_best,
                        'state_dict': controller.state_dict()
                    }, join(explore_dir, 'best.tar'))

            if -best > target_return:
                print(
                    "Terminating controller training with value {}...".format(
                        best))
                break

        epoch += 1

    es.result_pretty()
    e_queue.put('EOP')

    return results
class VideoProcessingPipeline(object):
    """
    Manages the acquisition and preprocessing of video frames from the webcam.
    A pipeline with two processes is used: the first process denoises frames and
    queues the result to the second process which calculates the optical flows
    on CPU, and queues back the moving average to the main process. This moving
    average is used as attention prior by the model.
    """
    def __init__(self,
                 img_size,
                 img_cfg,
                 frames_window=13,
                 flows_window=5,
                 skip_frames=2,
                 cam_res=(640, 480),
                 denoising=True):
        """
        :param img_size: the images input size of the neural network.
        :param img_cfg: the config parameters for image processing.
        :param frames_window: the number of webcam frames input at once into
            the neural network to make a prediction step. Best results tend
            to be obtained for roughly a bit less than one second.
        :param flows_window: the number of optical flows used to calculate an
            attention prior. Defaults to 5. Change at your own risks.
        :param skip_frames: down-sampling factor of the webcam frames. Defaults
            to 2 in order to roughly obtain 15 FPS with a 30 FPS webcam. This
            down-sampling is basic and could be improved to support ratios such
            as 2/3 to obtain 20 FPS.
        :param cam_res: webcam resolution (width, height). The application was
            only tested in 640x480. Change at your own risks.
        :param denoising: activate the denoising process. Defaults to True.
            Most usefull with low quality webcams.
        """
        if frames_window not in [9, 13, 17, 21]:
            raise ValueError('Invalid window size for webcam frames: `%s`' %
                             str(frames_window))
        if flows_window not in [3, 5, 7, 9]:
            raise ValueError('Invalid window size for optical flows: `%s`' %
                             str(flows_window))
        if flows_window > frames_window:
            raise ValueError(
                'Optical flow window cannot be wider than camera frames window'
            )

        self.img_size = img_size
        # optical flows can be computed in lower resolution w/o harming results
        self.opt_size = img_size // 2
        self.frames_window = frames_window
        self.flows_window = flows_window
        self.skip_frames = skip_frames
        self.total_frames = 0  # total number of frames acquired
        self.cam_res = cam_res
        self.denoising = denoising
        self.img_frames = [
            np.zeros((self.img_size, self.img_size, 3), dtype=np.uint8)
        ] * (self.frames_window // 2)
        self.gray_frames = [
            np.zeros((self.opt_size, self.opt_size), dtype=np.uint8)
        ] * (self.frames_window // 2)
        self.priors = []

        # init multiprocessing
        self.q_parent, self.q_prior = Queue(), Queue()

        # start denoising process
        if self.denoising:
            self.q_denoise = Queue()
            self.p_denoise = Process(
                target=denoise_frame,
                args=(self.q_denoise, self.q_prior, img_cfg.getint('h'),
                      img_cfg.getint('template_window_size'),
                      img_cfg.getint('search_window_size')))
            self.p_denoise.start()
            print('Denoising enabled')
        else:
            print('Denoising disabled')

        # start prior calculation process
        self.p_prior = Process(target=calc_attention_prior,
                               args=(self.opt_size, self.flows_window,
                                     self.q_prior, self.q_parent))
        self.p_prior.start()

        # initialise camera
        self.cap = cv.VideoCapture(0)
        if self.cap.isOpened():
            self.cap_fps = int(round(self.cap.get(cv.CAP_PROP_FPS)))
            self.cap.set(3, self.cam_res[0])
            self.cap.set(4, self.cam_res[1])
            print('Device @%d FPS' % self.cap_fps)
        else:
            raise IOError('Failed to open webcam capture')

        # raw images
        self.last_frame = collections.deque(maxlen=self.cap_fps)
        # cropped region of the raw images
        self.last_cropped_frame = collections.deque(maxlen=self.cap_fps)

        # acquire and preprocess the exact number of frames needed
        # to make the first prior map
        for i in range((frames_window // 2) + 1):
            self.acquire_next_frame(enable_skip=False)

        # now wait for the first prior to be returned
        while len(self.priors) == 0:
            if not self.q_parent.empty():
                # de-queue a prior
                prior, flow = self.q_parent.get(block=False)
                self.priors.append(prior)

            # sleep while the queue is empty
            time.sleep(0.01)

    def _center_crop(self, img, target_shape):
        """
        Returns a center crop of the provided image.

        :param img: the image to crop.
        :param target_shape: the dimensions of the crop.
        :return the cropped image
        """
        h, w = target_shape
        y, x = img.shape[:2]
        start_y = max(0, y // 2 - (h // 2))
        start_x = max(0, x // 2 - (w // 2))
        return img[start_y:start_y + h, start_x:start_x + w]

    def acquire_next_frame(self, enable_skip=True):
        """
        Reads the next frame from the webcam and starts the asynchronous
        preprocessing. The video stream is down-sampled as necessary to
        reach the desired FPS.

        :param enable_skip: enables down-sampling of the webcam stream.
            Must be True except during initialisation.
        :return: the last frame acquired or None if that frame was skipped
            due to down-sampling of the webcam stream.
        """
        ret, frame = self.cap.read()
        if not ret:
            self.terminate()
            raise IOError('Failed to read the next frame from webcam')

        self.total_frames += 1
        if not enable_skip:
            return self._preprocess_frame(frame)
        elif (self.total_frames % self.skip_frames) == 0:
            return self._preprocess_frame(frame)
        return None

    def _preprocess_frame(self, frame):
        """
        Crops, change to gray scale, resizes and sends the newly acquired
        webcam frame to the preprocessing pipeline.

        :param frame: the last acquired frame.
        :return the last acquired frame.
        """
        # crop a square at the center of the frame
        rgb = cv.cvtColor(frame, cv.COLOR_BGR2RGB)
        rgb = self._center_crop(rgb, (self.cam_res[1], self.cam_res[1]))
        self.last_frame.append(frame)
        self.last_cropped_frame.append(rgb)
        # convert to gray scale and resize
        gray = cv.cvtColor(rgb, cv.COLOR_RGB2GRAY)
        gray = cv.resize(gray, (self.opt_size, self.opt_size))
        rgb = cv.resize(rgb, (self.img_size, self.img_size))
        # queue to relevant child process
        if self.denoising:
            self.q_denoise.put(gray)
        else:
            self.q_prior.put(gray)
        self.img_frames.append(rgb)
        self.gray_frames.append(gray)
        return frame

    def get_model_input(self, dequeue=True):
        """
        Gets the list of images and the prior needed for the inference
        of the current frame. Use `dequeue` to retrieve the next prior
        from the queue. The caller must first verify that the queue is
        non-empty.

        :param dequeue: must be set to True except during initialisation.
        :return: images ndarray and the corresponding prior
        """
        # de-queue a prior
        if dequeue:
            prior, flow = self.q_parent.get(block=False)
            self.priors.append(prior)

        # ensure enough frames have been preprocessed
        n_frames = self.frames_window
        assert len(self.img_frames) >= n_frames
        assert len(self.gray_frames) >= n_frames
        assert len(self.priors) == 1

        imgs = np.stack(self.img_frames[:self.frames_window], axis=0)
        self.img_frames.pop(0)  # slide window to the right
        self.gray_frames.pop(0)

        return imgs, [self.priors.pop(0)]

    def terminate(self):
        """Terminates processes, closes queues and releases video capture."""
        if self.denoising:
            self.q_denoise.put(None)
            time.sleep(0.2)
            self.p_denoise.terminate()
        else:
            self.q_prior.put(None)
            time.sleep(0.2)
        self.p_prior.terminate()
        time.sleep(0.1)

        if self.denoising:
            self.p_denoise.join(timeout=0.5)
        self.p_prior.join(timeout=0.5)

        if self.denoising:
            self.q_denoise.close()
        self.q_parent.close()
        self.cap.release()
class PPOTrainer:
    def __init__(self, args):
        tmp_env = make_env(args.env)
        self.obs_shape = tmp_env.observation_space.shape
        self.num_actions = tmp_env.action_space.n
        self.c_in = self.obs_shape[0]
        del tmp_env

        self.horizon = args.horizon
        self.eta = args.eta
        self.epoch = args.epoch
        self.batch_size = args.batch * args.actors
        self.gamma = args.gamma
        self.lam = args.lam
        self.num_actors = args.actors
        self.eps = args.eps
        self.num_iter = (
            args.epoch * args.actors * args.horizon
        ) // self.batch_size  # how many times to run SGD on the buffer

        self.device = 'cuda' if torch.cuda.is_available() else 'cpu'

        self.queues = [Queue() for i in range(self.num_actors)]
        self.barrier = Queue(
        )  # This is used as a waiting mechanism, to wait for all the agents to env.step()
        self.score_channel = Queue()

        # these are shmem np.arrays
        self.state, self.reward, self.finished = self.init_shared()

        self.workers = [
            Worker(i, args.env, self.queues[i], self.barrier, self.state,
                   self.reward, self.finished, self.score_channel)
            for i in range(self.num_actors)
        ]
        self.start_workers()

        self.model = Policy(self.c_in, self.num_actions).to(self.device)
        self.optim = torch.optim.Adam(self.model.parameters(), lr=self.eta)

        # used for logging and graphing
        self.stat = {
            'scores': [],
            'steps': [],
            'clip_losses': [],
            'value_losses': [],
            'entropies': []
        }

    def init_shared(self):
        state_shape = (self.num_actors, *self.obs_shape)
        scalar_shape = (self.num_actors, 1)

        state = np.empty(state_shape, dtype=np.float32)
        state = RawArray(c_float, state.reshape(-1))
        state = np.frombuffer(state, c_float).reshape(state_shape)

        reward = np.empty(scalar_shape, dtype=np.float32)
        reward = RawArray(c_float, reward.reshape(-1))
        reward = np.frombuffer(reward, c_float).reshape(scalar_shape)

        finished = np.empty(scalar_shape, dtype=np.float32)
        finished = RawArray(c_float, finished.reshape(-1))
        finished = np.frombuffer(finished, c_float).reshape(scalar_shape)

        return state, reward, finished

    def start_workers(self):
        for worker in self.workers:
            worker.start()

    def initialize_state(self):
        for i in range(self.num_actors):
            self.queues[i].put(-1)
        self.wait_for_agents()

    @timing_wrapper
    def broadcast_actions(self, actions):
        actions = actions.cpu().numpy()
        for i in range(self.num_actors):
            self.queues[i].put(actions[i])
        self.wait_for_agents()

        next_state = torch.tensor(self.state).to(self.device)
        reward = torch.tensor(self.reward).to(self.device)
        done = torch.tensor(self.finished).to(self.device)
        return next_state, reward, done

    def wait_for_agents(self):
        for i in range(self.num_actors):
            self.barrier.get()

    def setup_scheduler(self, T_max):
        num_steps = T_max // (self.horizon * self.num_actors)
        self.scheduler = torch.optim.lr_scheduler.LambdaLR(
            self.optim, lambda x: max(1 - x / num_steps, 0))

    @timing_wrapper
    def train(self, T_max, graph_name=None):
        self.setup_scheduler(T_max)

        global_step = 0

        self.initialize_state()
        state = torch.tensor(self.state).to(self.device)
        while global_step < T_max:

            states = []
            actions = []
            rewards = []
            finished = []
            sampled_lps = []  # sampled log probabilities
            values = []

            time_start = time.time()
            duration_fwd = 0
            with torch.no_grad():
                for t in range(self.horizon):
                    global_step += self.num_actors

                    logit, value = self.model(state)
                    prob = torch.softmax(logit, dim=1)
                    log_prob = torch.log_softmax(logit, dim=1)

                    action = prob.multinomial(1)
                    sampled_lp = log_prob.gather(1, action)

                    (next_state, reward,
                     done), duration_brdcst = self.broadcast_actions(action)

                    # appending to buffer
                    states.append(state)
                    actions.append(action)
                    rewards.append(reward)
                    finished.append(done)
                    sampled_lps.append(sampled_lp)
                    values.append(value)

                    state = next_state

                    duration_fwd += duration_brdcst

                _, V = self.model(next_state)
                values.append(V)

            time_forward = time.time()

            # GAE estimation
            GAEs, duration_GAE = self.compute_GAE(rewards, finished, values)

            duration_backward = self.run_gradient_descent(
                states, actions, sampled_lps, values, GAEs)

            time_end = time.time()

            total_duration = time_end - time_start
            percent_broadcast = duration_fwd / total_duration * 100
            percent_forward = (time_forward -
                               time_start) / total_duration * 100
            percent_GAE = duration_GAE / total_duration * 100
            percent_backward = duration_backward / total_duration * 100

            # print(f"<Time> Total: {total_duration:.2f} | forward: {percent_forward:.2f}% (broadcast {percent_broadcast:.2f}%) | GAE: {percent_GAE:.2f}% | backward: {percent_backward:.2f}%")
            if global_step % (self.num_actors * self.horizon * 30) == 0:
                while not self.score_channel.empty():
                    score, step = self.score_channel.get()
                    self.stat['scores'].append(score)
                    self.stat['steps'].append(step)
                now = datetime.datetime.now().strftime("%H:%M")
                print(
                    f"Step {global_step} | Mean of last 10 scores: {np.mean(self.stat['scores'][-10:]):.2f} | Time: {now}"
                )
                if graph_name is not None:
                    plot(global_step, self.stat, graph_name)
        # Finish
        plot(global_step, self.stat, graph_name)

    @timing_wrapper
    def compute_GAE(self, rewards, finished, values):
        GAEs = []
        advantage = 0
        for i in reversed(range(self.horizon)):
            td_error = rewards[i] + (
                1 - finished[i]) * self.gamma * values[i + 1] - values[i]
            advantage = td_error + (
                1 - finished[i]) * self.gamma * self.lam * advantage
            GAEs.append(advantage)
        GAEs = torch.cat(GAEs[::-1]).to(self.device)

        # NOTE: Below is currently not in use because I don't know how to incorporate the 'finished' tensor into account
        # NOTE: This version is much, much faster than the python-looped version above
        # NOTE: But in terms of the total time taken, it doesn't make much of a difference. (~2% compared to ~0.05%)
        # rewards = torch.stack(rewards)
        # finished = torch.stack(finished)
        # values = torch.stack(values)

        # td_error = rewards + (1 - finished) * self.gamma * values[1:] - values[:-1]
        # td_error = td_error.cpu()

        # GAEs = scipy.signal.lfilter([1], [1, -self.gamma * self.lam], td_error.flip(dims=(0,)), axis=0)
        # GAEs = np.flip(GAEs, axis=0)  # flip it back again
        # GAEs = GAEs.reshape(-1, GAEs.shape[-1])  # (horizon, num_actors, 1) --> (horizon * num_actors, 1)
        # GAEs = torch.tensor(GAEs).float().to(self.device)

        return GAEs

    @timing_wrapper
    def run_gradient_descent(self, states, actions, sampled_lps, values, GAEs):

        states = torch.cat(states)
        actions = torch.cat(actions)
        sampled_lps = torch.cat(sampled_lps)
        values = torch.cat(values[:-1])
        targets = GAEs + values

        self.scheduler.step()
        # Running SGD for K epochs
        for it in range(self.num_iter):
            # Batch indices
            idx = np.random.randint(0, self.horizon * self.num_actors,
                                    self.batch_size)

            state = states[idx]
            action = actions[idx]
            sampled_lp = sampled_lps[idx]
            GAE = GAEs[idx]
            value = values[idx]
            target = targets[idx]

            # Normalize advantages
            GAE = (GAE - GAE.mean()) / (GAE.std() + 1e-8)

            logit_new, value_new = self.model(state)
            # Clipped values are needed because sometimes values can unexpectedly get really big
            clipped_value_new = value + torch.clamp(value_new - value,
                                                    -self.eps, self.eps)

            # Calculating policy loss
            prob_new = torch.softmax(logit_new, dim=1)
            lp_new = torch.log_softmax(logit_new, dim=1)
            entropy = -(prob_new * lp_new).sum(1).mean()

            sampled_lp_new = lp_new.gather(1, action)

            ratio = torch.exp(sampled_lp_new - sampled_lp)
            surr1 = ratio * GAE
            surr2 = torch.clamp(ratio, 1 - self.eps, 1 + self.eps) * GAE
            clip_loss = torch.min(surr1, surr2).mean()

            # Calculating value loss
            value_loss1 = (value_new - target).pow(2)
            value_loss2 = (clipped_value_new - target).pow(2)
            value_loss = 0.5 * torch.max(value_loss1, value_loss2).mean()

            final_loss = -clip_loss + value_loss - 0.01 * entropy

            self.optim.zero_grad()
            final_loss.backward()

            # total_norm = 0
            # for p in self.model.parameters():
            #     param_norm = p.grad.data.norm(2)
            #     total_norm += param_norm.item() ** 2
            # total_norm = total_norm ** (1. / 2)
            # print(total_norm)

            torch.nn.utils.clip_grad_norm_(self.model.parameters(), 1)
            self.optim.step()

            # graphing
            self.stat['clip_losses'].append(clip_loss.item())
            self.stat['value_losses'].append(value_loss.item())
            self.stat['entropies'].append(entropy.item())
Exemplo n.º 16
0
def train(experiment: int, batch: int, resume: bool):
    cfg = OthelloConfig(experiment, batch)
    manager = Manager()
    buffer = manager.list()
    replay_buffer = ReplayBuffer(buffer)
    shared_state_dicts = manager.dict()
    message_queue = Queue()
    log_queue = Queue(
    )  # a single log is dictionary and "gs", "type" keys are must
    writer = SummaryWriter(cfg.dir_log)
    if resume:
        print("Loading replay buffer to resume training...")
        with open(cfg.dir_replay_buffer, "rb") as f:
            buff_list = pickle.load(f)
        replay_buffer.save_training_data(buff_list)
        del buff_list
        print("Replay buffer loaded.")
    training_worker = TrainingWorker("Training Worker", message_queue,
                                     log_queue, shared_state_dicts,
                                     replay_buffer, cfg.device_name_tw, cfg,
                                     resume)
    evaluation_worker = EvaluationWorker("Evaluation Worker", message_queue,
                                         log_queue, shared_state_dicts,
                                         cfg.device_name_ew, cfg, resume)
    self_play_workers = []
    for i in range(cfg.num_self_play_workers):
        self_play_workers.append(
            SelfPlayWorker("Self-Play Worker-" + str(i), message_queue,
                           log_queue, shared_state_dicts, replay_buffer,
                           cfg.device_names_sp[i], cfg))
    print("Starting training...")
    training_worker.start()
    evaluation_worker.start()
    for worker in self_play_workers:
        worker.start()
    print("Training started.")
    try:
        while training_worker.is_alive():
            if log_queue.empty():
                time.sleep(1.0)
                continue
            log = log_queue.get()
            for k, v in log.items():
                if k in ["gs", "type"]:
                    continue
                if log["type"] == "scalar":
                    writer.add_scalar(k, v, log["gs"])
                else:
                    print("Unknown log type found:", log["type"])
            del log
    except KeyboardInterrupt:
        print("KeyboardInterrupt, stopping training...")
    finally:
        for i in range(cfg.num_self_play_workers * 5):
            message_queue.put(cfg.message_interrupt)
        training_worker.join()
        evaluation_worker.join()
        for worker in self_play_workers:
            worker.join()
        print("Saving replay buffer...")
        buff_list = list(buffer)
        with open(cfg.dir_replay_buffer, "wb") as f:
            pickle.dump(buff_list, f)
        del buff_list
        print("Replay buffer saved.")
Exemplo n.º 17
0
class Synthetic(Process):
    def __init__(self, agent, dataloader, settings):
        super().__init__()

        self.agent = agent
        self.dataloader = dataloader
        self.settings = settings

        self.queue = Queue(maxsize=settings.QUEUE_LEN)
        self.put_flag = Queue(maxsize=1)
        self.get_flag = Queue(maxsize=1)
        self.done = False

    def update_settings(self, settings):
        self.settings = settings

    def update_agent(self, target_agent):
        self.agent.load_state_dict(target_agent.state_dict())

    def fetch_data(self):
        num_batch = self.settings.NUM_BATCH_WHILE_SYNTHETIC
        while self.put_flag.empty():
            out = []
            for _ in range(num_batch):
                d = self.queue.get()
                if self.queue.qsize() < num_batch:
                    self.queue.put(d)
                out.append(d)
            yield utils.cat_namedtuple_list(out, dim=0)

        # Put a single to flag
        self.get_flag.put(True)

    def run(self):
        """ Generate Data Queue
        """
        settings = self.settings
        for d in self.dataloader:
            episode_data, episode_interpolate_ratio, episode_source_pose = [], [], []

            mesh = d["mesh"].to(settings.SYNTHETIC_DEVICE)
            raw_data = utils.variable_namedtuple(d["data"],
                                                 settings.SYNTHETIC_DEVICE)

            source_pose = raw_data.init_pose
            target_pose = raw_data.target_pose
            intrinsic = raw_data.Intrinsic
            settings.set_intrinsic(intrinsic)

            for _ in range(settings.SYNTHETIC_EPISODE_LEN):
                episode_source_pose.append(source_pose)
                center_points, center_depth = utils.translation_to_voxel_and_depth(
                    source_pose.Translation.translation, intrinsic,
                    self.settings)
                try:
                    syn_data, interpolate_ratio = self.agent.synthetic(
                        observed_image=raw_data.image,
                        observed_depth=raw_data.depth,
                        observed_mask=raw_data.mask,
                        init_pose=source_pose,
                        mesh=mesh,
                        center_points=center_points,
                        center_depth=center_depth,
                        settings=settings)
                    if settings.SYNTHETIC_EPISODE_LEN > 1:
                        state_feature, mask, flow = self.agent.state_encoding(
                            syn_data)
                        action = self.agent.action_encoding(
                            state_feature, interpolate_ratio)
                        source_pose = utils.apply_action_to_pose(
                            action, source_pose, settings)
                        source_pose = utils.detach_namedtuple(source_pose)
                    episode_data.append(syn_data)
                    episode_interpolate_ratio.append(interpolate_ratio)
                except Exception as e:
                    print(e)
            if len(episode_data) != settings.SYNTHETIC_EPISODE_LEN or len(
                    episode_interpolate_ratio
            ) != settings.SYNTHETIC_EPISODE_LEN:
                # Something may be wrong while generating data
                continue
            # append data to queue
            for i in range(settings.SYNTHETIC_EPISODE_LEN):
                syn_raw_data = utils.SynRawData(
                    data=episode_data[i],
                    Intrinsic=intrinsic,
                    target_pose=target_pose,
                    init_pose=episode_source_pose[i],
                    model_points=raw_data.model_points,
                    interpolate_ratio=episode_interpolate_ratio[i])
                syn_raw_data = utils.variable_namedtuple(syn_raw_data,
                                                         device="cpu")
                self.queue.put(syn_raw_data)
        # Put a single to flag
        self.put_flag.put(True)
        # Waiting for main thread finish last data fetch
        while self.get_flag.empty():
            time.sleep(2)
Exemplo n.º 18
0
def run(args):
    p_queue = Queue()
    r_queue = Queue()
    e_queue = Queue()

    latent = 32
    mixture = 256
    size = latent + mixture
    controller = Controller(size, 3)

    for i in range(args.max_workers):
        Process(target=slave_routine,
                args=(p_queue, r_queue, e_queue, i, args.logdir)).start()

    cur_best = None
    savefile = args.logdir/'best.tar'
    if savefile.exists():
        print(f'Loading from {savefile}')
        state = torch.load(savefile.as_posix(), map_location={'cuda:0': 'cpu'})
        cur_best = -state['reward']
        controller.load_state_dict(state['state_dict'])


    parameters = controller.parameters()
    sigma = 0.1
    es = cma.CMAEvolutionStrategy(flatten_parameters(parameters), sigma,
                                  {'popsize': args.pop_size})

    epoch = 0
    while not es.stop():
        if cur_best is not None and -cur_best > args.target_return:
            print('Already better than target, breaking...')
            break

        r_list = [0] * args.pop_size  # result list
        solutions = es.ask()

        # push parameters to queue
        for s_id, s in enumerate(solutions):
            for _ in range(args.n_samples):
                p_queue.put((s_id, s))

        # Retrieve results
        if args.display:
            pbar = tqdm(total=args.pop_size * args.n_samples)
        for _ in range(args.pop_size * args.n_samples):
            while r_queue.empty():
                sleep(.1)
            r_s_id, r = r_queue.get()
            r_list[r_s_id] += r / args.n_samples
            if args.display:
                pbar.update(1)
        if args.display:
            pbar.close()

        es.tell(solutions, r_list)
        es.disp()

        # CMA-ES seeks to minimize, so we want to multiply the reward we
        # get in a rollout by -1.

        best_params, best, std_best = evaluate(solutions, r_list, p_queue,
                                               r_queue)
        if (not cur_best) or (cur_best > best):
            cur_best = best
            print(f'Saving new best with value {-cur_best}+{-std_best}')
            load_parameters(best_params, controller)
            torch.save({'epoch': epoch,
                        'reward': -cur_best,
                        'state_dict': controller.state_dict()},
                       savefile)
            # Save after every epoch
            torch.save(controller.state_dict(), f'{controller_pt}')
        if -best > args.target_return:
            print(f'Terminating controller training with value {best}...')
            break
        epoch += 1

    es.result_pretty()
    e_queue.put('EOP')
Exemplo n.º 19
0
            0
        ] * pop_size  # result list. like np.zeros(pop_size).tolist()
        solutions = es.ask()

        # push parameters to queue
        for s_id, s in enumerate(solutions):
            for _ in range(n_samples):
                p_queue.put((s_id, s))

        # This slave call is stealing the data the other slave calls needs..
        if epoch % log_step != 0:
            slave_routine()  # fill r_queque with p_queue WITH IS FROM ABOVE

        # print("we just put something in p_queue")

        while not r_queue.empty():
            # print("We are in this for loop?")
            result_list_idx, r = r_queue.get()
            try:
                result_list[result_list_idx] += r / n_samples
                # print(f'r_queue is not empty', result_list)
            except Exception as e:
                print(f'result_list_idx is {result_list_idx}')
                print(f'Caught error. {e}')

        es.tell(solutions, result_list)
        es.disp()

        # evaluation and saving
        if epoch % log_step == 0:
            slave_routine(
Exemplo n.º 20
0
                          a_dim,
                          g_net,
                          g_opt,
                          update_iter=10,
                          is_render=is_render,
                          use_cuda=use_cuda)
        # (self, env_id, idx, child_conn, queue, s_dim, a_dim, g_net, g_opt, update_iter=10, is_render=False, use_cuda=False):
        worker.start()
        workers.append(worker)
        parent_conns.append(parent_conn)

    g_episode = 0
    g_step = 0
    while g_episode < max_episode:

        while queue.empty():  # Wait for worker's state
            continue

        # Received some data
        idx, command, parameter = queue.get()

        if command == "Result":
            episode, step, reward, x_pos = parameter
            g_episode += 1
            g_step += step

            print('[ Worker %2d ] ' % (idx), end='')
            print("Episode : %5d\tStep : %5d\tReward : %5d\t\tX_pos : %5d" %
                  (g_episode, g_step, reward, x_pos))

            writer.add_scalar('perf/x_pos', x_pos, g_step)
if __name__ == '__main__':
    np.random.seed(random_seed)
    torch.manual_seed(random_seed)

    result_queue = Queue()

    x = []
    sample_num = 64
    for t_step in range(500, 20001, 500):
        workers = []
        for _ in range(8):
            worker = Worker(result_queue, t_step)
            worker.start()
            workers.append(worker)
        
        seen = 0
        while seen < sample_num:
            if not result_queue.empty():
                profit, max_drawdown = result_queue.get()
                x.append([t_step, profit, max_drawdown])
                print(t_step, profit, max_drawdown)
                seen += 1

        for worker in workers:
            worker.join()
                


    df = pd.DataFrame(np.array(x), columns=['total_step', 'profit', 'max_drawdown'])
    df.to_csv('draw-profit-vs-step-arg-max.csv')
Exemplo n.º 22
0
class AsyncLogger(Logger):
    @staticmethod
    def log_fn(self, stop_event: Event):
        try:
            self._super_create_loggers()
            self.resposne_queue.put({
                k: self.__dict__[k]
                for k in ["save_dir", "tb_logdir", "is_sweep"]
            })

            while True:
                try:
                    cmd = self.draw_queue.get(True, 0.1)
                except EmptyQueue:
                    if stop_event.is_set():
                        break
                    else:
                        continue

                self._super_log(*cmd)
                self.resposne_queue.put(True)
        except:
            print("Logger process crashed.")
            raise
        finally:
            print("Logger: syncing")
            if self.use_wandb:
                wandb.join()

            stop_event.set()
            print("Logger process terminating...")

    def create_loggers(self):
        self._super_create_loggers = super().create_loggers
        self.stop_event = Event()
        self.proc = Process(target=self.log_fn, args=(self, self.stop_event))
        self.proc.start()

        atexit.register(self.finish)

    def __init__(self, *args, **kwargs):
        self.queue = []

        self.draw_queue = Queue()
        self.resposne_queue = Queue()
        self._super_log = super().log
        self.waiting = 0

        super().__init__(*args, **kwargs)

        self.__dict__.update(self.resposne_queue.get(True))

    def log(self, plotlist, step=None):
        if self.stop_event.is_set():
            return

        if not isinstance(plotlist, list):
            plotlist = [plotlist]

        plotlist = [p for p in plotlist if p]
        if not plotlist:
            return

        plotlist = U.apply_to_tensors(plotlist, lambda x: x.detach().cpu())
        self.queue.append((plotlist, step))
        self.flush(wait=False)

    def enqueue(self, data, step: Optional[int]):
        self.draw_queue.put((data, step))
        self.waiting += 1

    def wait_logger(self, wait=False):
        cond = (lambda: not self.resposne_queue.empty()) if not wait else (
            lambda: self.waiting > 0)
        already_printed = False
        while cond() and not self.stop_event.is_set():
            will_wait = self.resposne_queue.empty()
            if will_wait and not already_printed:
                already_printed = True
                sys.stdout.write("Warning: waiting for logger... ")
                sys.stdout.flush()
            try:
                self.resposne_queue.get(True, 0.2)
            except EmptyQueue:
                continue
            self.waiting -= 1

        if already_printed:
            print("done.")

    def flush(self, wait: bool = True):
        while self.queue:
            plotlist, step = self.queue[0]

            for i, p in enumerate(plotlist):
                if isinstance(p, PlotAsync):
                    res = p.get(wait)
                    if res is not None:
                        plotlist[i] = res
                    else:
                        if wait:
                            assert p.failed
                            # Exception in the worker thread
                            print(
                                "Exception detected in a PlotAsync object. Syncing logger and ignoring further plots."
                            )
                            self.wait_logger(True)
                            self.stop_event.set()
                            self.proc.join()

                        return

            self.queue.pop(0)
            self.enqueue(plotlist, step)

        self.wait_logger(wait)

    def finish(self):
        if self.stop_event.is_set():
            return

        self.flush(True)
        self.stop_event.set()
        self.proc.join()
Exemplo n.º 23
0
    def learn(
        self,
        total_timesteps: int,
        callback: MaybeCallback = None,
        log_interval: int = 4,
        eval_env: Optional[GymEnv] = None,
        eval_freq: int = -1,
        n_eval_episodes: int = 5,
        tb_log_name: str = "run",
        eval_log_path: Optional[str] = None,
        reset_num_timesteps: bool = True,
    ) -> "OffPolicyAlgorithm":

        total_timesteps, callback = self._setup_learn(
            total_timesteps, eval_env, callback, eval_freq, n_eval_episodes,
            eval_log_path, reset_num_timesteps, tb_log_name)

        callback.on_training_start(locals(), globals())

        # train vae
        print("Train VAE...")
        while self.num_timesteps < total_timesteps:
            rollout = self.collect_rollouts(
                self.env,
                train_freq=self.train_freq,
                action_noise=self.action_noise,
                callback=callback,
                learning_starts=self.learning_starts,
                replay_buffer=self.replay_buffer,
                log_interval=log_interval,
            )

            if rollout.continue_training is False:
                break

            if self.num_timesteps > 0 and self.num_timesteps > self.learning_starts:
                # If no `gradient_steps` is specified,
                # do as many gradients steps as steps performed during the rollout
                print("T VAE")
                gradient_steps = self.gradient_steps if self.gradient_steps > 0 else rollout.episode_timesteps
                self.train_vae(batch_size=self.batch_size,
                               gradient_steps=gradient_steps)
            """
            gradient_steps = self.gradient_steps if self.gradient_steps > 0 else rollout.episode_timesteps
            self.train_vae(batch_size=self.batch_size, gradient_steps=gradient_steps)
            """
        # train mdnrnn
        print("Train MDNRNN...")
        self.replay_buffer = ReplayBufferAD(
            self.buffer_size,
            self.observation_space,
            self.action_space,
            self.device,
            optimize_memory_usage=self.optimize_memory_usage,
        )

        total_timesteps = 30
        while self.num_timesteps < total_timesteps:
            rollout = self.collect_rollouts(
                self.env,
                train_freq=self.train_freq,
                action_noise=self.action_noise,
                callback=callback,
                learning_starts=self.learning_starts,
                replay_buffer=self.replay_buffer,
                log_interval=log_interval,
            )

            if rollout.continue_training is False:
                break

            if self.num_timesteps > 0 and self.num_timesteps > self.learning_starts:
                # If no `gradient_steps` is specified,
                # do as many gradients steps as steps performed during the rollout
                print("T MDNRNN")
                gradient_steps = self.gradient_steps if self.gradient_steps > 0 else rollout.episode_timesteps
                self.train_mdnrnn(batch_size=self.batch_size,
                                  gradient_steps=gradient_steps)
            """
            gradient_steps = self.gradient_steps if self.gradient_steps > 0 else rollout.episode_timesteps
            self.train_mdnrnn(batch_size=self.batch_size, gradient_steps=gradient_steps)
            """
        # train controller
        print("Train Controller...")
        p_queue = Queue()
        r_queue = Queue()
        e_queue = Queue()
        num_workers = 16

        for p_index in range(num_workers):
            Process(target=self.slave_routine,
                    args=(p_queue, r_queue, e_queue, p_index)).start()

        cur_best = None

        parameters = self.controller.parameters()
        es = cma.CMAEvolutionStrategy(flatten_parameters(parameters), 0.1,
                                      {'popsize': 4})

        epoch = 0
        log_step = 3
        while not es.stop():
            if cur_best is not None and -cur_best > 950:
                print("Already better than target, breaking...")
                break

            r_list = [0] * 4  # result list
            solutions = es.ask()
            # push parameters to queue
            i = 0
            for s_id, s in enumerate(solutions):
                for _ in range(4):
                    i += 1
                    p_queue.put((s_id, s))

            # retrieve results
            for _ in range(16):
                while r_queue.empty():
                    sleep(.1)
                r_s_id, r = r_queue.get()
                r_list[r_s_id] += r / 4

            es.tell(solutions, r_list)
            es.disp()
            # evaluation and saving
            if epoch % log_step == log_step - 1:
                best_params, best, std_best = self.evaluate(
                    p_queue, r_queue, solutions, r_list)
                print("Current evaluation: {}".format(best))
                if not cur_best or cur_best > best:
                    cur_best = best
                    print("Saving new best with value {}+-{}...".format(
                        -cur_best, std_best))
                    load_parameters(best_params, self.controller)
                if -best > 950:
                    print("Terminating controller training with value {}...".
                          format(best))
                    break

            epoch += 1

        es.result_pretty()
        e_queue.put('EOP')

        callback.on_training_end()

        return self
Exemplo n.º 24
0
class StatProcess(Process):
    def __init__(self, *args):
        """
        Statistics process saves the statistics obtained from workers.
        In particular, the shared models are saved every Config.MODEL_SAVE_FREQUENCY episodes.
        Moreover, some statistics are logged every Config.LOG_STATS_FREQUENCY episodes.
        """
        super(StatProcess, self).__init__()
        self.episode_log_q = Queue(maxsize=Config.MAX_STATS_QUEUE_SIZE)
        self.ae_loss_log_q = Queue(maxsize=Config.MAX_STATS_QUEUE_SIZE)
        self.episode_count = Value('i', 0)
        self.model_save = Value('i', 0)
        self.exit_flag = Value('i', 0)

        #:obj:`dict`: Dictionary of DPS models for RL.
        self.agents = {}
        for model, env_id in zip(args, Config.ENV_IDS):
            self.agents[env_id] = model
        #float: Time at start for logging.
        self._start_time = time.time()

    def run(self):
        """
        Runs the statistics process.
        (i) Get statistics from shared memory queue.
            If process cannot find data for some time, it may time out.
        (ii) Saves statistics to file.
        (iii) Increments episode count.
        (iv) Communicates to server that model may be saved after n episodes.
        (v) Logs current episode statistics after m episodes.
        """
        print('Start gathering statistics.')
        sys.stdout.flush()
        with open(Config.RESULTS_FILE, 'a') as results_logger, \
             open(Config.RESULTS_LOSS_FILE, 'a') as loss_logger, \
             open(Config.SELECTION_FILE, 'a') as select_logger:
            while True:
                # (i) Get statistics. Ignore errors when exiting.
                try:
                    if Config.TRAIN_MODE == 'policy':
                        # Get episode log.
                        episode_time, env_id, \
                        total_reward, length = self.episode_log_q.get(timeout=Config.WAIT_STATS_INTERRUPT)
                    loss_q_empty = self.ae_loss_log_q.empty()
                    if Config.TRAIN_MODE == 'selection' and not loss_q_empty:
                        # Get loss log.
                        training_time, loss_type, env_id_loss, \
                        loss, training_count = self.ae_loss_log_q.get(timeout=Config.WAIT_STATS_INTERRUPT)
                        self.episode_count.value += 1
                except (FileNotFoundError, ConnectionResetError) as error:
                    if self.exit_flag.value:
                        warnings.warn(
                            f'Ignored error in statistics while trying to close: {error}'
                        )
                    else:
                        raise error

                # (ii) Saves statistics.
                if Config.TRAIN_MODE == 'policy':
                    # Save episode log.
                    results_logger.write(
                        '%s, %s, %10.4f, %d\n' %
                        (episode_time.strftime("%Y-%m-%d %H:%M:%S"), env_id,
                         total_reward, length))
                    results_logger.flush()
                if Config.TRAIN_MODE == 'selection' and not loss_q_empty:
                    # Save loss log.
                    loss_logger.write(
                        '%s, %s, %s, %d, %10.8f\n' %
                        (training_time.strftime("%Y-%m-%d %H:%M:%S"),
                         loss_type, env_id_loss, training_count, loss))
                    loss_logger.flush()
                if (Config.TRAIN_MODE == 'selection'
                        and self.episode_count.value %
                        Config.SELECTION_SAVE_FREQUENCY == 0
                        and self.episode_count.value != 0
                        and not loss_q_empty):
                    # Save selection log.
                    for env_id in Config.ENV_IDS:
                        selection = self.agents[
                            env_id].selection.selectors.data.tolist()
                        select_logger.write('%s, %s\n' %
                                            (env_id, str(selection)))
                    select_logger.flush()

                # (iii) Increments episode count.
                if Config.TRAIN_MODE == 'policy':
                    self.episode_count.value += 1

                # (iv) Tells server to save model.
                if Config.SAVE_MODELS and self.episode_count.value % Config.MODEL_SAVE_FREQUENCY == 0:
                    self.model_save.value = 1

                # (v) Logs some statistiscs.
                if Config.TRAIN_MODE == 'policy' and self.episode_count.value % Config.LOG_STATS_FREQUENCY == 0:
                    print(
                        '[ Time: %8d ]         '
                        '[ Environment type: %5s ]    '
                        '[ Episode #%8d with total Score %10.4f and length %8d. ]'
                        % (int(time.time() - self._start_time), env_id,
                           self.episode_count.value, total_reward, length))
                if Config.TRAIN_MODE == 'selection' and not loss_q_empty:
                    print('[ Training #%12d ] '
                          '[ Episode #%8d ] '
                          '[ Loss for type: %6s ]    '
                          '[ Trainer for type: %5s ]    '
                          '[ Loss: %10.8f. ]' %
                          (training_count, self.episode_count.value, loss_type,
                           env_id_loss, loss))
                sys.stdout.flush()
        print('Statistics have been closed.')
        sys.stdout.flush()
Exemplo n.º 25
0
def _call_mods_from_fast5s_cpu2(motif_seqs, chrom2len, fast5s_q, len_fast5s,
                                positions, model_path, success_file, args):
    # features_batch_q = mp.Queue()
    # errornum_q = mp.Queue()
    features_batch_q = Queue()
    errornum_q = Queue()

    # pred_str_q = mp.Queue()
    pred_str_q = Queue()

    nproc = args.nproc
    nproc_call_mods = nproc_to_call_mods_in_cpu_mode
    if nproc <= nproc_call_mods + 1:
        nproc = nproc_call_mods + 1 + 1

    fast5s_q.put("kill")
    features_batch_procs = []
    for _ in range(nproc - nproc_call_mods - 1):
        p = mp.Process(target=_read_features_fast5s_q,
                       args=(fast5s_q, features_batch_q, errornum_q,
                             motif_seqs, chrom2len, positions, args))
        p.daemon = True
        p.start()
        features_batch_procs.append(p)

    call_mods_gpu_procs = []
    for _ in range(nproc_call_mods):
        p_call_mods_gpu = mp.Process(target=_call_mods_q,
                                     args=(model_path, features_batch_q,
                                           pred_str_q, success_file, args))
        p_call_mods_gpu.daemon = True
        p_call_mods_gpu.start()
        call_mods_gpu_procs.append(p_call_mods_gpu)

    # print("write_process started..")
    p_w = mp.Process(target=_write_predstr_to_file,
                     args=(args.result_file, pred_str_q))
    p_w.daemon = True
    p_w.start()

    errornum_sum = 0
    while True:
        running = any(p.is_alive() for p in features_batch_procs)
        while not errornum_q.empty():
            errornum_sum += errornum_q.get()
        if not running:
            break

    for p in features_batch_procs:
        p.join()
    features_batch_q.put("kill")

    for p_call_mods_gpu in call_mods_gpu_procs:
        p_call_mods_gpu.join()

    # print("finishing the write_process..")
    pred_str_q.put("kill")

    p_w.join()

    print("%d of %d fast5 files failed.." % (errornum_sum, len_fast5s))
Exemplo n.º 26
0
def train():
    np.random.seed(random_seed)
    torch.manual_seed(random_seed)
    writer = SummaryWriter()
    s2 = S2(latent_num, cnn_chanel_num, stat_dim).to(device).share_memory()

    writer.add_graph(
        s2, (torch.zeros([1, 1, img_shape[0], img_shape[1]]).to(device),
             torch.zeros([1, stat_dim]).to(device)))

    optim = GlobalAdam([{
        'params': s2.encode_img.parameters()
    }, {
        'params': s2.encode_stat.parameters()
    }, {
        'params': s2.pi.parameters()
    }, {
        'params': s2.actor.parameters()
    }],
                       lr=1e-2,
                       weight_decay=0.01)

    if os.path.exists('S2_state_dict.pt'):
        s2.load_state_dict(torch.load('S2_state_dict.pt'))
        optim.load_state_dict(torch.load('S2_Optim_state_dict.pt'))

    pair_queue = Queue(10000)
    validate_queue = Queue()
    optimizer_lock = Lock()

    process = []
    data_list = [
        'A8888.XDCE', 'AL8888.XSGE', 'AU8888.XSGE', 'C8888.XDCE', 'M8888.XDCE',
        'RU8888.XSGE', 'SR8888.XZCE'
    ]
    for no in range(mp.cpu_count() - 1):
        data = pd.read_csv(f"../data/{data_list[no]}_5m.csv")
        worker = Worker_Generator(no, data, pair_queue)
        worker.start()
        process.append(worker)
    validater = Validate(s2, optimizer_lock, validate_queue)
    validater.start()

    epochs = 0
    while True:
        imgs = []
        stats = []
        cates = []
        seen = 0
        while seen < minibatch:
            img, stat, cate = pair_queue.get()
            imgs.append(img)
            stats.append(stat)
            cates.append(cate)
            seen += 1

        imgs = torch.tensor(imgs).float().to(device)
        stats = torch.tensor(stats).float().to(device)
        g_t = torch.tensor(cates).long().to(device)
        pred = s2(imgs, stats)
        loss = F.cross_entropy(pred, g_t)
        accr = (pred.argmax(1) == g_t).sum().item() / minibatch

        with optimizer_lock:
            optim.zero_grad()
            loss.backward()
            optim.step()

        if not validate_queue.empty():
            val_reward, val_money, val_win = validate_queue.get()
            writer.add_scalar('Validate/reward', val_reward, epochs)
            writer.add_scalar('Validate/money', val_money, epochs)
            writer.add_scalar('Validate/win_rate', val_win, epochs)

        writer.add_scalar('Train/Loss', loss.item(), epochs)
        writer.add_scalar('Train/Accr', accr, epochs)
        epochs += 1

        if epochs % save_every == 0:
            torch.save(s2.state_dict(), 'S2_state_dict.pt')
            torch.save(optim.state_dict(), 'S2_Optim_state_dict.pt')

    for worker in process:
        worker.join()
Exemplo n.º 27
0
            break

    r_list = [0] * pop_size  # result list
    solutions = es.ask()


    # push parameters to queue
    for s_id, s in enumerate(solutions):
        for _ in range(n_samples):
            p_queue.put((s_id, s))

    # retrieve results
    if args.display:
        pbar = tqdm(total=pop_size * n_samples)
    for _ in range(pop_size * n_samples):
        while r_queue.empty():
            sleep(.1)
        r_s_id, r = r_queue.get()
        r_list[r_s_id] += r / n_samples
        if args.display:
            pbar.update(1)
    if args.display:
        pbar.close()

    es.tell(solutions, r_list)
    es.disp()

    # evaluation and saving
    if epoch % log_step == log_step - 1:
        best_params, best, std_best = evaluate(solutions, r_list)
Exemplo n.º 28
0
def controller_train_proc(ctrl_dir,
                          controller,
                          vae,
                          mdrnn,
                          target_return=950,
                          skip_train=False,
                          display=True):
    step_log('4-2. controller_train_proc START!!')
    # define current best and load parameters
    cur_best = None
    if not os.path.exists(ctrl_dir):
        os.mkdir(ctrl_dir)
    ctrl_file = os.path.join(ctrl_dir, 'best.tar')

    p_queue = Queue()
    r_queue = Queue()
    #e_queue = Queue()   # pipaek : not necessary if not multiprocessing

    print("Attempting to load previous best...")
    if os.path.exists(ctrl_file):
        #state = torch.load(ctrl_file, map_location={'cuda:0': 'cpu'})
        state = torch.load(ctrl_file)
        cur_best = -state['reward']
        controller.load_state_dict(state['state_dict'])
        print("Previous best was {}...".format(-cur_best))

    if skip_train:
        return  # pipaek : 트레이닝을 통한 모델 개선을 skip하고 싶을 때..

    def evaluate(solutions,
                 results,
                 rollouts=100):  # pipaek : rollout 100 -> 10 , originally 100
        """ Give current controller evaluation.

        Evaluation is minus the cumulated reward averaged over rollout runs.

        :args solutions: CMA set of solutions
        :args results: corresponding results
        :args rollouts: number of rollouts

        :returns: minus averaged cumulated reward
        """
        index_min = np.argmin(results)
        best_guess = solutions[index_min]
        restimates = []

        for s_id in range(rollouts):
            print('p_queue.put(), s_id=%d' % s_id)
            p_queue.put((s_id, best_guess))
            print('>>>rollout_routine!!')
            rollout_routine()  # pipaek : 여기서도 p_queue.put 하자마자 바로 처리..

        print(">>>Evaluating...")
        for _ in tqdm(range(rollouts)):
            #while r_queue.empty():
            #    sleep(.1)   # pipaek : multi-process가 아니므로
            if not r_queue.empty(
            ):  # pipaek : 20180718 r_queue.get()에서 stuck되어 있는 것을 방지하기 위해 체크!!
                #print('r_queue.get()')
                #restimates.append(r_queue.get()[1])
                r_s_id, r = r_queue.get()
                print(
                    'in evaluate r_queue.get() r_s_id=%d, r_queue remain=%d' %
                    (r_s_id, r_queue.qsize()))
                restimates.append(r)
            else:
                print('r_queue.empty() -> break!!')
                break

        return best_guess, np.mean(restimates), np.std(restimates)

    def rollout_routine():
        """ Thread routine.

        Threads interact with p_queue, the parameters queue, r_queue, the result
        queue and e_queue the end queue. They pull parameters from p_queue, execute
        the corresponding rollout, then place the result in r_queue.

        Each parameter has its own unique id. Parameters are pulled as tuples
        (s_id, params) and results are pushed as (s_id, result).  The same
        parameter can appear multiple times in p_queue, displaying the same id
        each time.

        As soon as e_queue is non empty, the thread terminate.

        When multiple gpus are involved, the assigned gpu is determined by the
        process index p_index (gpu = p_index % n_gpus).

        :args p_queue: queue containing couples (s_id, parameters) to evaluate
        :args r_queue: where to place results (s_id, results)
        :args e_queue: as soon as not empty, terminate
        :args p_index: the process index
        """
        # init routine
        #gpu = p_index % torch.cuda.device_count()
        #device = torch.device('cuda:{}'.format(gpu) if torch.cuda.is_available() else 'cpu')

        # redirect streams
        #if not os.path.exists(tmp_dir):
        #    os.mkdir(tmp_dir)

        #sys.stdout = open(os.path.join(tmp_dir, 'rollout.out'), 'a')
        #sys.stderr = open(os.path.join(tmp_dir, 'rollout.err'), 'a')

        with torch.no_grad():
            r_gen = RolloutGenerator(vae, mdrnn, controller, device,
                                     rollout_time_limit)

            while not p_queue.empty():
                print('in rollout_routine, p_queue.get()')
                s_id, params = p_queue.get()
                print('r_queue.put() sid=%d' % s_id)
                r_queue.put((s_id, r_gen.rollout(params)))
                print('r_gen.rollout OK, r_queue.put()')
                #r_queue.qsize()

    parameters = controller.parameters()
    es = cma.CMAEvolutionStrategy(flatten_parameters(parameters), 0.1,
                                  {'popsize': C_POP_SIZE})
    print("CMAEvolutionStrategy start OK!!")

    epoch = 0
    log_step = 3
    while not es.stop():
        print("--------------------------------------")
        print("CURRENT EPOCH = %d" % epoch)
        if cur_best is not None and -cur_best > target_return:
            print("Already better than target, breaking...")
            break

        r_list = [0] * C_POP_SIZE  # result list
        solutions = es.ask()
        print("CMAEvolutionStrategy-ask")

        # push parameters to queue
        for s_id, s in enumerate(
                solutions):  # pipaek : 이 for가 C_POP_SIZE 만큼 반복된다.
            #for _ in range(C_POP_SIZE * C_N_SAMPLES):
            for _ in range(C_N_SAMPLES):
                print('in controller_train_proc p_queue.put() s_id : %d' %
                      s_id)
                p_queue.put((s_id, s))
                #print("p_queue.put %d" % s_id)
                rollout_routine(
                )  # pipaek : p_queue.put 하자마자 바로 get해서 rollout하고 나서 r_queue에 결과 입력.
                print("rollout_routine OK, r_queue size=%d" % r_queue.qsize())

        # retrieve results
        if display:
            pbar = tqdm(total=C_POP_SIZE * C_N_SAMPLES)
        #for idx in range(C_POP_SIZE * C_N_SAMPLES):
        while not r_queue.empty(
        ):  # pipaek : 20180718 여기서 r_queue.get을 못해서 영원히 걸려있는 상태를 방지하기 위해 for문을 while문으로 바꾼다.
            #while r_queue.empty():
            #    sleep(.1)
            try:
                r_s_id, r = r_queue.get()
                print(
                    'in controller_train_proc r_queue.get() r_s_id=%d, r_queue remain=%d'
                    % (r_s_id, r_queue.qsize()))
                r_list[r_s_id] += r / C_N_SAMPLES
                if display:
                    pbar.update(1)
            except IndexError as err:
                print('IndexError during r_queue.get()')
                print('cur r_list size:%d, index:%d' % (len(r_list), r_s_id))
        if display:
            pbar.close()

        es.tell(solutions,
                r_list)  # pipaek : solution array에다가 r_list 결과를 업데이트..
        es.disp()

        # evaluation and saving
        if epoch % log_step == log_step - 1:
            print(">>>> TRYING EVALUATION, CURRENT EPOCH = %d" % epoch)
            best_params, best, std_best = evaluate(
                solutions, r_list, rollouts=100
            )  # pipaek : evaluate을 위해서 rollout은 10번만 하자.. originally 100
            print("Current evaluation: {}".format(best))
            if not cur_best or cur_best > best:
                cur_best = best
                print("Saving new best with value {}+-{}...".format(
                    -cur_best, std_best))
                load_parameters(best_params, controller)
                torch.save(
                    {
                        'epoch': epoch,
                        'reward': -cur_best,
                        'state_dict': controller.state_dict()
                    }, os.path.join(ctrl_dir, 'best.tar'))
            if -best > target_return:
                print(
                    "Terminating controller training with value {}...".format(
                        best))
                break

        epoch += 1

    print("es.stop!!")
    es.result_pretty()