Пример #1
0
    def evaluate(self, batch_size=100, cpu=-1, filtering=True) -> Dict[str, float]:
        """Evaluates a model by retrieving scores from the (implemented) score_batch function.

            :param batch_size:
                Size of a test batch
            :param cpu:
                Number of processors to use, -1 means all processors are used.

            :return:
                Dictionary containing the evaluation results (keys: 'hits@1', 'hits@3', 'hits@10', 'mrr')
        """
        self.filtering = filtering

        start = time.time()
        n_batches, batches = self.dl.get_test_batches(batch_size)

        if cpu == 1 or cpu == 0:
            result = []
            for batch in tqdm(batches, total=n_batches):
                result.append(self.evaluate_batch(batch))
        elif cpu == -1:
            pool = mp.Pool(mp.cpu_count())
            result = pool.map(self.evaluate_batch, batches)
        else:
            pool = mp.Pool(cpu)
            result = pool.map(self.evaluate_batch, batches)
        print('Evaluation took {:.3f} seconds'.format(time.time() - start))
        return self.get_result(result)
Пример #2
0
def average_several_run(run_func, args, n_times=4, n_paral=2, **run_params):
    """Get average result after several running.

    Args:
        run_func [func]: running function like 'Classify.train_test'
        args [dict]: all model arguments
        n_times [int]: run several times for average
        n_paral [int]: number of parallel processes
        run_params [dict]: parameters for runing function

    Returns:
        max_socres [dict]: dict of the maximum scores after n_times running
    """
    dnnnlp.verbose.config(0)

    assert not n_times % n_paral, ValueError(
        "'n_times' should be an integral multiple of 'n_paral'.")

    pool = mp.Pool(processes=1)
    check = pool.apply_async(_device_count)
    device_count = check.get()

    scores, processes = [], []
    pool = mp.Pool(processes=n_paral)

    if args.n_gpu > 0:
        assert n_paral * args.n_gpu <= device_count, "Not enough GPU devices."

    for t in range(n_times):
        if args.n_gpu > 0:
            run_params['device_id'] = (t % n_paral) * args.n_gpu
        else:
            run_params['device_id'] = -1

        processes.append(pool.apply_async(run_func, kwds=run_params.copy()))

        if (t + 1) % n_paral == 0:
            for i, p in enumerate(processes):
                result = p.get()
                scores.append(result)

                print()
                ptable = utils.display_prfacc(args.eval_metric, verbose=0)
                ptable.row(dict(result, **{"iter":
                                           t + 2 - len(processes) + i}))
            processes.clear()

    avg_scores = utils.average_prfacc(*scores)
    print()
    ptable = utils.display_prfacc(args.eval_metric, verbose=0)
    ptable.row(dict(avg_scores, **{"iter": 'AVG'}))

    dnnnlp.verbose.config(2)
    return avg_scores
Пример #3
0
def evaluate_mp(model, dataloader, Ks, num_processes, device):
    test_batch_size = dataloader.test_batch_size
    test_user_dict = dataloader.test_user_dict

    model.eval()
    model.to("cpu")

    user_ids = list(test_user_dict.keys())
    user_ids_batches = [
        user_ids[i:i + test_batch_size]
        for i in range(0, len(user_ids), test_batch_size)
    ]

    pool = mp.Pool(num_processes)
    res = pool.starmap(evaluate_batch, [(model, dataloader, batch_user, Ks)
                                        for batch_user in user_ids_batches])
    pool.close()

    score_matrix = np.concatenate([r[0] for r in res], axis=0)
    metrics_dict = {k: {} for k in Ks}
    for k in Ks:
        for m in ['precision', 'recall', 'ndcg']:
            metrics_dict[k][m] = np.concatenate([r[1][k][m]
                                                 for r in res]).mean()

    torch.cuda.empty_cache()
    model.to(device)
    return score_matrix, metrics_dict
Пример #4
0
  def save_cache(self, cache_folder, epochs):
    """ Generates batch/episode indices and saves them into a torch file

    Args:
      cache_folder: string. folder where to save the cached indices
      epochs: int. number of epochs to generate

    Returns: list. Cached indices.

    """
    logging.info("Saving cache to %s" % cache_folder)
    queue = multiprocessing.Queue()
    queue.put(1)
    nworkers = 32
    last = nworkers + 1
    seed = FLAGS.random_seed if FLAGS.random_seed is not None else get_random_initializer()
    with multiprocessing.Pool(nworkers, initializer=init_fn, initargs=(seed, queue, self, last)) as pool:
      _cache = pool.map_async(build_episode_indices, range(epochs))
      for _ in tqdm(range(epochs)):
        queue.get(block=True)
      cache = _cache.get()
      del queue
    torch.save(cache, os.path.join(cache_folder, "cache.pt"))

    with open(os.path.join(cache_folder, 'ready'), 'w') as outfile:
      outfile.write('\n')

    self.cache = cache
Пример #5
0
def cost_fn_nav2d(theta_vals, params):

    global FVAL_CALLS
    FVAL_CALLS = FVAL_CALLS + 1
    print("[cost_fn_nav2d] FVAL_CALLS: {0}".format(FVAL_CALLS))

    theta = theta_vals_to_obj(theta_vals, params)
    n_data = params.leo.n_data_train

    # parallelized optimizer run
    pool = mp.Pool(processes=params.leo.pool_processes)
    optimizer_soln_fn = partial(optimizer_soln, theta, params)
    data_idxs = np.arange(0, n_data)
    result_opt = pool.map(optimizer_soln_fn, data_idxs)
    pool.close()
    pool.join()

    loss = 0.0
    for data_idx in range(0, n_data):
        # x_opt, data = optimizer_soln(theta, params, data_idx) # serial run
        x_opt, data = result_opt[data_idx][0], result_opt[data_idx][1]
        x_gt = groundtruth_poses(data, params)

        loss = loss + loss_nav2d(x_opt, x_gt)

    loss = loss / n_data

    return loss
Пример #6
0
        def get_pred_large(pan_2ch_all, vid_num, nframes_per_video=6):
            vid_num = len(pan_2ch_all) // nframes_per_video  # 10
            cpu_num = multiprocessing.cpu_count() // 2  # 32 --> 16
            nprocs = min(vid_num, cpu_num)  # 10
            max_nframes = cpu_num * nframes_per_video
            nsplits = (len(pan_2ch_all) - 1) // max_nframes + 1
            annotations, pan_all = [], []
            for i in range(0, len(pan_2ch_all), max_nframes):
                print('==> Read and convert VPS output - split %d/%d' %
                      ((i // max_nframes) + 1, nsplits))
                pan_2ch_part = pan_2ch_all[i:min(i +
                                                 max_nframes, len(pan_2ch_all
                                                                  ))]
                pan_2ch_split = np.array_split(pan_2ch_part, nprocs)
                workers = multiprocessing.Pool(processes=nprocs)
                processes = []
                for proc_id, pan_2ch_set in enumerate(pan_2ch_split):
                    p = workers.apply_async(
                        self.converter_2ch_track_core,
                        (proc_id, pan_2ch_set, color_generator))
                    processes.append(p)
                workers.close()
                workers.join()

                for p in processes:
                    p = p.get()
                    annotations.extend(p[0])
                    pan_all.extend(p[1])

            pan_json = {'annotations': annotations}
            return pan_all, pan_json
Пример #7
0
    def load_data(self, data_dir: str, num_workers: int = 1):

        data = list()

        if num_workers == -1:
            num_workers = mp.cpu_count()
        else:
            num_workers = num_workers

        chunk_paths = [
            os.path.join(data_dir, chunk_file)
            for chunk_file in os.listdir(data_dir)
            if chunk_file.startswith('chunk') and chunk_file.endswith('pkl')
        ]

        if num_workers > 1:
            with mp.Pool(num_workers) as pool:
                result = pool.map(self.load_file, chunk_paths)
            for batches in result:
                data.extend(batches)
        else:
            for file_path in chunk_paths:
                data.extend(self.load_file(file_path))

        return data
Пример #8
0
    def validate(self, verbose=True):
        """Runs inference over the validation set periodically during training.

        Prints the validation loss and accuracy to their respective files.
        """
        print('[INFO] Beginning validation.')
        with torch.no_grad():
            n_val = len(self.X_val)
            pool = mp.Pool(self.params['n_processes'])
            inputs = []
            for i in range(n_val):
                x = self.X_val[i, :, :].unsqueeze(0)
                y = self.y_val[i, :, :].unsqueeze(0)
                inputs.append((x, y))
            results = pool.starmap(self.train_step, inputs)
            val_losses, val_accuracies = zip(*results)
            val_losses = torch.tensor(val_losses)
            val_accuracies = torch.tensor(val_accuracies)
            val_loss = torch.mean(val_losses, dim=0)
            val_acc = torch.mean(val_accuracies)
            val_loss = tuple(val_loss)

        print('[INFO] Validation complete.')
        with open(VAL_LOSS_FILE, 'a') as f:
            f.write('%d %f %f %f %f\n' %
                    ((self.iter_count.item(), ) + val_loss))
        f.close()
        with open(VAL_ACC_FILE, 'a') as f:
            f.write('%d %f\n' % (self.iter_count.item(), val_acc))
        f.close()
Пример #9
0
def run_multiple_times(args, run_fct):
    cpu_count = mp.cpu_count()
    gpu_count = torch.cuda.device_count()

    # Clone arguments into list & Distribute workload across GPUs
    args_across_workers = [copy.deepcopy(args) for r in range(args.RUN_TIMES)]
    if gpu_count > 0:
        gpu_counter = 0
        for r in range(args.RUN_TIMES):
            args_across_workers[r].device_id = gpu_counter
            gpu_counter += 1
            if gpu_counter > gpu_count - 1:
                gpu_counter = 0

    # Execute different runs/random seeds in parallel
    pool = mp.Pool(cpu_count - 1)
    df_across_runs = pool.map(run_fct, args_across_workers)
    pool.close()

    # Post process results
    df_concat = pd.concat(df_across_runs)
    by_row_index = df_concat.groupby(df_concat.index)
    df_means, df_stds = by_row_index.mean(), by_row_index.std()
    if args.SAVE:
        df_means.to_csv("logs/" + args.SAVE_FNAME + ".csv")
    return df_means, df_stds
Пример #10
0
def main(model, modeltype, gamename, ncpu):
    if modeltype == '2015':
        from model_15 import build_model
    elif modeltype == '2013':
        from model_13 import build_model
    config = pd.read_csv("config.csv")
    CONFIG = dict()
    CONFIG['game'] = gamename + '-v0'
    CONFIG['ep_max_step'] = 1500
    CONFIG['eval_threshold'] = config[config['gamename']==gamename].iloc[0,1]
    CONFIG['l2coeff'] = 0.005
    pool = mp.Pool(processes=ncpu)
    env = gym.make(CONFIG['game'])
    CONFIG['n_action'] = env.action_space.n

    test_times = 100
    
    test_model =build_model(CONFIG) 
    test_model.load_state_dict(torch.load(os.path.join(model_storage_path, model)))
    test_model.switch_to_vbn()

    test_rewards, _ = test(test_model, pool, env, test_times, CONFIG)

    print("test results:", np.array(test_rewards).mean())
    print(str(test_rewards))
Пример #11
0
    def __init__(self, args):
        self.args = args
        self.pool = multiprocessing.Pool(args.worker)
        env = gym.make(args.env_name)
        o_dim = env.observation_space.shape[0]
        s_dim = 128
        a_dim = env.action_space.n

        self.population_status = []
        for _ in range(args.population):
            individual_status = {}
            name = ''.join(
                random.choice(string.ascii_letters + string.digits)
                for _ in range(8))
            individual_status['name'] = name
            env_name = self.args.env_name
            individual_status['env_name'] = env_name
            policy_net = PolicyNet(o_dim, s_dim, a_dim)
            policy_net.share_memory()
            individual_status['policy_net'] = policy_net
            evaluate_net = EvaluateNet(o_dim, s_dim, a_dim)
            evaluate_net.share_memory()
            individual_status['evolution_net'] = evaluate_net
            steps = self.args.step_per_generation
            individual_status['steps'] = steps
            self.population_status.append(individual_status)
Пример #12
0
def pool_map(fn, data, num_workers=0, dlen=None, title=None):
    ''' Multithreaded map function that displays a progress bar

    Args:
        fn (function): Function to be applied to the elements in `data`
        data (iterable): Iterable on which the function `fn` is applied.
        num_workers (int): Number of threads to do the computation
        dlen (int): A way to supply the length of `data` separately (to display in progress bar)
        title (str): Title to be displayed next to the progress bar

    Returns:
        A list of results [fn(data[0]), .... fn(data[-1])]
    '''
    result = []
    if num_workers > 0:
        n = len(data) if hasattr(data, '__len__') else dlen
        desc = title if title is not None else fn.__name__ if hasattr(
            fn, '__name__') else None
        with mp.Pool(num_workers) as p:
            with tqdm.tqdm(total=n, desc=desc) as bar:
                for r in p.imap(fn, data):
                    result.append(r)
                    bar.update()
    else:
        result = [fn(d) for d in tqdm.tqdm(data)]
    return result
Пример #13
0
    def _get_dataset(self, filename):
        dicts = self.processor._file_to_dicts(filename)
        #shuffle list of dicts here if we later want to have a random dev set splitted from train set
        if filename == self.processor.train_filename:
            if not self.processor.dev_filename:
                if self.processor.dev_split > 0.0:
                    dicts = random.shuffle(dicts)

        dict_batches_to_process = int(
            len(dicts) / self.multiprocessing_chunk_size)
        num_cpus = min(mp.cpu_count(), self.max_processes,
                       dict_batches_to_process) or 1

        with ExitStack() as stack:
            p = stack.enter_context(mp.Pool(processes=num_cpus))

            logger.info(
                f"Got ya {num_cpus} parallel workers to convert dict chunks to datasets (chunksize = {self.multiprocessing_chunk_size})..."
            )
            log_ascii_workers(num_cpus, logger)

            results = p.imap(
                partial(self._multiproc, processor=self.processor),
                grouper(dicts, self.multiprocessing_chunk_size),
                chunksize=1,
            )

            datasets = []
            for dataset, tensor_names in tqdm(results,
                                              total=len(dicts) /
                                              self.multiprocessing_chunk_size):
                datasets.append(dataset)

            concat_datasets = ConcatDataset(datasets)
            return concat_datasets, tensor_names
Пример #14
0
 def __init__(self, env, n_processes=0):
     if n_processes <= 0: n_processes = mp.cpu_count()
     init_seeds = mp.Queue()
     for seed in np.random.randint(Config.SEED_RANGE, size=n_processes):
         init_seeds.put(int(seed))  # initseeds
     self.pool = mp.Pool(n_processes, mp_sampler_init, (env, init_seeds))
     self.rollout_f = stochastic_policy_rollout
Пример #15
0
    def sample_parallel(self,
                        env_fn,
                        policy,
                        min_steps,
                        max_traj_len,
                        deterministic=False):
        import torch.multiprocessing as mp
        from functools import partial, reduce

        worker = partial(self._sample, env_fn, policy, min_steps, max_traj_len,
                         deterministic)

        with mp.Pool(processes=self.n_proc) as pool:
            # Call pool of workers, don't apply any arguments
            # TODO: this is a weird use of starmap, maybe Process is more suited?
            result = pool.starmap(worker, [() for _ in range(self.n_proc)])

        def merge(buf1, buf2):
            buf2.states += buf1.states
            buf2.actions += buf1.actions
            buf2.rewards += buf1.rewards
            buf2.values += buf1.values
            buf2.returns += buf1.returns

            buf2.ep_returns += buf1.ep_returns
            buf2.ep_lens += buf1.ep_lens

            return buf2

        memory = reduce(merge, result)
        return memory
Пример #16
0
def main(args):
    if args.load is False and os.path.isfile('./model/walker.pt'):
        while True:
            load = input('Are you sure you want to erase the previous training? (y/n) ')
            if load.lower() in ('y', 'yes', '1'):
                break
            elif load.lower() in ('n', 'no', '0'):
                import sys
                sys.exit()

    # create shared variables between all the processes
    manager = mp.Manager()
    # used to send the results of the net
    common_dict = manager.dict()
    # a queue of batches to be fed to the training net
    mem_queue = manager.Queue(1500 * mp.cpu_count())
    # a queue of operations pending
    process_queue = manager.Queue(mp.cpu_count()-1)
    workers = mp.cpu_count() if args.train else 2
    with mp.Pool(workers) as pool:
        try:
            print(f"Running pool with {workers} workers")
            pool.apply_async(gpu_thread, (args.load, mem_queue, process_queue, common_dict, 0))
            if args.render:
                pool.apply_async(cpu_thread, (2 if not args.train else 1, mem_queue, process_queue, common_dict, 1))
            for i in range(1+args.render, workers):
                pool.apply_async(cpu_thread, (0, mem_queue, process_queue, common_dict, i))

            # Wait for children to finish
            pool.close()
            pool.join()
        except KeyboardInterrupt:
            pool.join()
Пример #17
0
    def update_holdout_chromosomes(self, holdout_set):
        images = []
        time_zero = time.time()
        block_size = HDF_MULTI_BLOCK_SIZE
        loc_chunks = list(range(0, self.__len__(), block_size))
        num_processes = multiprocessing.cpu_count()
        print('Process with %d processors' % num_processes)
        with multiprocessing.Pool(num_processes) as pool:
            f = functools.partial(process_location,
                                  block_size=block_size,
                                  path=self.h5_path,
                                  holdout_set=holdout_set)
            for map_image in tqdm.tqdm(pool.imap_unordered(f, loc_chunks),
                                       total=len(loc_chunks)):
                images.extend(map_image)

        print('Saving num output locations:')
        print(len(images))
        # Not necessary -- just for debug consistency -- remove to save time if big
        images.sort()
        print(images[:10])
        print('Took %.2fs to process %d loc with %d processes' %
              (time.time() - time_zero, self.__len__(), num_processes))
        for idx in tqdm.tqdm(images, total=len(images)):
            self.chromosome_holdout[idx] = True
Пример #18
0
def collect_data(top_player,
                 players,
                 times=100,
                 repeat=1,
                 log_dict=log_dict,
                 value_policy=value_policy,
                 p=None,
                 queue=None,
                 cuda=False,
                 cpus=None):
    assert isinstance(players, list)
    if p is not None:
        assert isinstance(p, list) and len(p) == len(players)
        sum_p = sum(p)
        p = [i / sum_p for i in p]

    log_dict = os.path.join(log_dict, top_player)

    if not (os.path.exists(log_dict) and os.path.isdir(log_dict)):
        os.mkdir(log_dict)

    play_list = np.random.choice(len(players), times, replace=True, p=p)
    play_list = [(players[play_list[i]], i) for i in range(len(play_list))]

    with multiprocessing.Pool(cpus) as pool:
        pool.map(
            functools.partial(__collect_a_game,
                              top_player=top_player,
                              repeat=repeat,
                              log_dict=log_dict,
                              value_policy=value_policy,
                              queue=queue,
                              cuda=cuda), play_list)
Пример #19
0
    def _get_dataset(self, filename):
        dicts = self.processor.file_to_dicts(filename)
        #shuffle list of dicts here if we later want to have a random dev set splitted from train set
        if self.processor.train_filename in filename:
            if not self.processor.dev_filename:
                if self.processor.dev_split > 0.0:
                    random.shuffle(dicts)
        num_dicts = len(dicts)
        multiprocessing_chunk_size, num_cpus_used = calc_chunksize(num_dicts)

        with ExitStack() as stack:
            p = stack.enter_context(mp.Pool(processes=num_cpus_used))

            logger.info(
                f"Got ya {num_cpus_used} parallel workers to convert {num_dicts} dictionaries "
                f"to pytorch datasets (chunksize = {multiprocessing_chunk_size})..."
            )
            log_ascii_workers(num_cpus_used, logger)

            results = p.imap(
                partial(self._multiproc, processor=self.processor),
                grouper(dicts, multiprocessing_chunk_size),
                chunksize=1,
            )

            datasets = []
            with tqdm(total=len(dicts), unit=' Dicts') as pbar:
                for dataset, tensor_names in results:
                    datasets.append(dataset)
                    pbar.update(multiprocessing_chunk_size)
            
            concat_datasets = ConcatDataset(datasets)
            return concat_datasets, tensor_names
Пример #20
0
    def _setup_multiprocess_pool(self, initargs=()):
        if self.number_of_processes > 1:
            logger.info('Starting multiprocess using ' +
                        str(self.number_of_processes) + ' processes')
            assert len(
                mp.active_children()
            ) == 0, 'This should not happen. Has the cleanup() method been called ?'
            start = time.perf_counter()
            process_id = mp.Value('i', 0,
                                  lock=True)  # shared between processes
            initargs = (process_id, initargs)

            self.pool = mp.Pool(processes=self.number_of_processes,
                                maxtasksperchild=None,
                                initializer=_initializer,
                                initargs=initargs)
            logger.info('Multiprocess pool started using start method "' +
                        mp.get_sharing_strategy() + '"' + ' in: ' +
                        str(time.perf_counter() - start) + ' seconds')

            if torch.cuda.is_available(
            ) and self.number_of_processes > torch.cuda.device_count():
                logger.warning(
                    "You are trying to run more processes than there are available GPUs, "
                    "it is advised to run `nvidia-cuda-mps-control` to leverage concurrent cuda executions. "
                    "If run in background mode, don't forget to stop the daemon when done."
                )
Пример #21
0
 def __call__(self, func, *func_args):
     with mp.Pool(self.n_threads) as pool:
         func_pickle = _FilterPickleHelper(func, *func_args)
         for keep, c in pool.imap_unordered(func_pickle, self.data,
                                            self.chunk_size):
             if keep:
                 yield c
Пример #22
0
        def get_gt(pan_gt_json_file=None, pan_gt_folder=None):
            if pan_gt_json_file is None:
                pan_gt_json_file = self.panoptic_json_file
            if pan_gt_folder is None:
                pan_gt_folder = self.panoptic_gt_folder
            with open(pan_gt_json_file, 'r') as f:
                pan_gt_json = json.load(f)
            files = [item['file_name'] for item in pan_gt_json['images']]
            if 'viper' in pan_gt_folder:
                files = [
                    _.split('/')[-1].replace('.jpg', '.png') for _ in files
                ]
            cpu_num = multiprocessing.cpu_count()
            files_split = np.array_split(files, cpu_num)
            workers = multiprocessing.Pool(processes=cpu_num)
            processes = []
            for proc_id, files_set in enumerate(files_split):
                p = workers.apply_async(BaseDataset._load_image_single_core,
                                        (proc_id, files_set, pan_gt_folder))
                processes.append(p)
            workers.close()
            workers.join()
            pan_gt_all = []
            for p in processes:
                pan_gt_all.extend(p.get())

            categories = pan_gt_json['categories']
            categories = {el['id']: el for el in categories}
            color_gererator = IdGenerator(categories)
            return pan_gt_all, pan_gt_json, categories, color_gererator
Пример #23
0
    def eval(self, data, output_trace_flag, max_eval_size=None):
        data_size = len(data)
        if max_eval_size is not None:
            data_size = min(data_size, max_eval_size)
        eval_data = data[:data_size]
        if self.processes == 1:
            cum_loss, cum_reward = self.batch_eval(eval_data,
                                                   output_trace_flag, 0)
        else:
            cum_loss = 0
            cum_reward = 0
            try:
                mp.set_start_method('spawn')
            except RuntimeError:
                pass
            pool = mp.Pool(processes=self.processes)
            res = []
            batch_per_process = data_size // self.processes
            if data_size % batch_per_process > 0:
                batch_per_process += 1
            for st in range(0, data_size, batch_per_process):
                res += [
                    pool.apply_async(self.batch_eval,
                                     (eval_data[st:st + batch_per_process],
                                      output_trace_flag, st))
                ]
            for i in range(len(res)):
                cur_cum_loss, cur_cum_reward = res[i].get()
                cum_loss += cur_cum_loss
                cum_reward += cur_cum_reward

        avg_loss = cum_loss / data_size
        avg_reward = cum_reward / data_size
        print('average pred reward: %.4f' % avg_reward)
        return avg_loss, avg_reward
Пример #24
0
def uniform_valid_perplexity(seqs, mask_function, num_workers=0):
    if num_workers is None or num_workers > 0:
        pool = multiprocessing.Pool(num_workers)
        map_fn = functools.partial(pool.imap, chunksize=8)
    else:
        map_fn = map

    valid_choices_fn = functools.partial(total_valid_choices,
                                         mask_function=mask_function)

    average_choices = np.empty(len(seqs))
    average_entropy = np.empty(len(seqs))
    seq_length = np.empty(len(seqs), dtype=np.int)

    for i, (choices, entropy, length) in enumerate(
            tqdm.tqdm(map_fn(valid_choices_fn, seqs), total=len(seqs))):
        average_choices[i] = choices
        average_entropy[i] = entropy
        seq_length[i] = length

    return {
        'choices': average_choices,
        'entropy': average_entropy,
        'sequence_length': seq_length
    }
Пример #25
0
    def start_pool(self, n_games, game_fn, device, *args):
        parent_conns = []
        child_conns = []
        parent_conns2 = []
        child_conns2 = []
        for i in range(n_games):
            parent_conn, child_conn = multiprocessing.Pipe()
            parent_conns.append(parent_conn)
            child_conns.append(child_conn)
        if self.net2:
            for i in range(n_games):
                parent_conn2, child_conn2 = multiprocessing.Pipe()
                parent_conns2.append(parent_conn2)
                child_conns2.append(child_conn2)
        pool = multiprocessing.Pool(processes=self.n_processes, initializer=np.random.seed)
        gpu_handler = multiprocessing.Process(target=handle_gpu, args=(copy.deepcopy(self.net), parent_conns, device))
        gpu_handler2 = None
        if self.net2:
            gpu_handler2 = multiprocessing.Process(target=handle_gpu,
                                                  args=(copy.deepcopy(self.net2), parent_conns2, device))
            gpu_handler2.start()

        gpu_handler.start()
        if self.net2:
            examples = pool.map_async(self.single_game_fn,
                                      [(conn, self.game_name, self.kwargs, game_fn, child_conns2[idx], args)
                                       for idx, conn in enumerate(child_conns)])
        else:
            examples = pool.map_async(self.single_game_fn,
                                      [(conn, self.game_name, self.kwargs, game_fn, None, args)
                                       for conn in child_conns])

        return [gpu_handler, pool, examples, child_conns, parent_conns, gpu_handler2, child_conns2, parent_conns2]
Пример #26
0
    def multi_process(self):
        print("Video processing using {} processes...".format(self.num_processes))

        p = mp.Pool(self.num_processes)
        p.map(self.process_video_multiprocessing, range(self.num_processes))

        self.combine_output_files(self.num_processes)
Пример #27
0
def main():
    mp.set_start_method('spawn', force=True)
    with open(val_fpath, 'r') as fin:
        vid_list = [osp.join(vid_dir, el.strip()) for el in fin.readlines()]
    # vid_list = vid_list[:10]  # DEBUG!!!!
    pool = mp.Pool(processes=8)
    all_preds = list(tqdm(pool.imap(compute_preds, vid_list),
                          desc='Evaluating', total=len(vid_list)))
    pool.close()
    pool.join()
    preds, preds_gt, lbls, vid_fpaths = zip(*all_preds)
    nclasses = max(lbls) + 1  # lbls are 0 indexed
    preds_1hot = one_hot(preds, nclasses)
    preds_gt_1hot = one_hot(preds_gt, nclasses)
    lbls = np.array(lbls)
    # print(preds, preds_gt, lbls)
    corr = (np.array(preds) == np.array(lbls))
    all_acc = all_localization_accuracies(lbls, preds_1hot)
    assert np.isclose(all_acc['top_1'], np.mean(corr))
    print('Tracking baseline accuracies: {}'.format(all_acc))
    store_preds(preds)
    store_failure_cases(vid_fpaths, preds, lbls)
    gt_acc = np.mean(np.array(np.array(preds_gt) == np.array(lbls)))
    all_acc_gt = all_localization_accuracies(lbls, preds_gt_1hot)
    assert all_acc_gt['top_1'] > 0.99
    assert np.isclose(all_acc_gt['top_1'], gt_acc)
    print('Tracking baseline accuracy (GT end loc, this should be 1.0): {}'.format(all_acc_gt))
Пример #28
0
def run_multiple_times(args, run_fct):
    cpu_count = mp.cpu_count()
    gpu_count = torch.cuda.device_count()

    # Clone arguments into list & Distribute workload across GPUs
    args_across_workers = [copy.deepcopy(args) for r in range(args.RUN_TIMES)]
    if gpu_count > 0:
        gpu_counter = 0
        for r in range(args.RUN_TIMES):
            args_across_workers[r].seed = r
            args_across_workers[r].device_id = gpu_counter
            gpu_counter += 1
            if gpu_counter > gpu_count-1:
                gpu_counter = 0

    # Execute different runs/random seeds in parallel
    pool = mp.Pool(cpu_count-1)
    df_across_runs = pool.map(run_fct, args_across_workers)
    pool.close()

    # Post process results
    df_concat = pd.concat(df_across_runs)
    by_row_index = df_concat.groupby(df_concat.index)
    df_means, df_stds = by_row_index.mean(), by_row_index.std()

    if args.ENV_ID == "dense-v0":
        sfname = "results/GRIDWORLD/" + str(args.RUN_TIMES) + "_RUNS_" + str(args.AGENT) + "_" + args.SAVE_FNAME
    else:
        sfname = "results/ATARI/" + str(args.RUN_TIMES) + "_RUNS_" + str(args.AGENT) + "_" + args.SAVE_FNAME
    print("Saved agents to {}".format(sfname))
    df_means.to_csv(sfname)
    return df_means, df_stds
Пример #29
0
    def __init__(self,
                 fitness_fn: Callable,
                 init_mean: np.ndarray=None,
                 init_std: np.ndarray or float=0.5,
                 pop_size: int=None,
                 std_step: float=None,
                 lr: float=0.3,
                 log_time_interval: float=None,
                 num_processes: int=None):
        if num_processes is None:
            num_processes = os.cpu_count()

        self.fitness_fn = fitness_fn
        self.cur_solution = init_mean
        self.lr = lr
        self.std = init_std
        self.log_time_interval = log_time_interval
        self.num_processes = num_processes
        self.dim = init_mean.size
        self.std_step = (3 + np.log(self.dim)) / (20 * np.sqrt(self.dim)) if std_step is None else std_step
        self.pop_size = int(round(4 + 3 * np.log(self.dim))) if pop_size is None else pop_size
        self.pop_size += self.pop_size % 2
        self.R_rank = self.get_ranks(self.pop_size)
        self.grad_mean = np.zeros_like(self.cur_solution)
        self.max_R = None
        self.best_solution = self.cur_solution
        self._log_as_best = True
        self._last_log_time = time.time()
        self._pool = mp.Pool(initializer=self.process_fitness_init, initargs=(self.fitness_fn,)) \
            if num_processes > 1 else None
Пример #30
0
def build_dataset(paths, num_workers, word_vocab, min_count, window,
                  num_total_words, archive):
    func = partial(file_to_features,
                   word_vocab=word_vocab,
                   window=window,
                   min_count=min_count,
                   total_w=num_total_words)
    p = multiprocessing.Pool(num_workers, init_worker)
    files = []
    file_counter = 0
    filename = archive.format(file_counter)
    files.append(filename)
    archive_f = open(archive.format(file_counter), 'w', encoding='utf-8')
    archive_f.write('Source\tTarget\n')
    counter = 0
    for x in p.imap(func, paths):
        if counter % 100 == 0:
            print(counter)
        counter += 1
        for word, target in x:
            archive_f.write('{}\t{}\n'.format(word, target))
        if archive_f.tell() > 5e+8:
            print('Changing file. Counter at {}'.format(counter))
            file_counter += 1
            filename = archive.format(file_counter)
            files.append(filename)
            archive_f = open(archive.format(file_counter),
                             'w',
                             encoding='utf-8')
    archive_f.close()
    return files