Exemplo n.º 1
0
 def runMultiProcessTrajectories(self, repeat):
     pool=Pool(processes=len(self.posIni))
     result = pool.map(partial(self.runNtrajectory, repeat=repeat) , [(x, y) for x, y in self.posIni])
     pool.close()
     pool.join()
     meanCost, meanTraj=0., 0.
     for Cost, traj in result:
         meanCost+=Cost
         meanTraj+=traj
     size = len(result)
     return meanCost/size, meanTraj/size
Exemplo n.º 2
0
 def __init__(self, processes=None, initializer=None, initargs=()):
     ProcessPool.__init__(self, processes, initializer, initargs)
     return
Exemplo n.º 3
0
       
    def runNtrajectory(self, (x, y), repeat):
        costAll, trajTimeAll = np.zeros(repeat), np.zeros(repeat)
        for i in range(repeat):
            costAll[i], trajTimeAll[i]  = self.runOneTrajectoryOpti(x, y) 
        meanCost = np.mean(costAll)
        meanTrajTime = np.mean(trajTimeAll)
        self.costStore.append([x, y, meanCost])
        self.trajTimeStore.append([x, y, meanTrajTime])
        return meanCost, meanTrajTime
    
    def mapableTrajecrtoryFunction(self,x,y,useless):
        return self.runOneTrajectory(x, y)
    
    def runNtrajectoryMulti(self, (x, y), repeat):
        pool=Pool(processes=4)
        result = pool.map(partial(self.mapableTrajecrtoryFunction,x,y) , range(repeat))
        pool.close()
        pool.join()
        meanCost, meanTraj=0., 0.
        for Cost, traj in result:
            meanCost+=Cost
            meanTraj+=traj
        size = len(result)
        return meanCost/size, meanTraj/size

    
    def runOneTrajectoryOpti(self, x, y):
        #self.tm.saveTraj = True
        cost, trajTime, lastX = self.tm.runTrajectoryOpti(x, y)
        #cost, trajTime, lastX = self.tm.runTrajectoryOpti(x, y)
Exemplo n.º 4
0
def Pool(processes=None, initializer=None, initargs=()):
    '''
    Returns a process pool object
    '''
    from multiprocess.pool import Pool
    return Pool(processes, initializer, initargs)
Exemplo n.º 5
0
    def create_test_raw_data(self,
                             ticker_list=None,
                             start_date=None,
                             finish_date=None,
                             folder_prefix=None):
        """Downloads FX tick data from DukasCopy and then dumps each ticker in a separate HDF5 file if a folder is specified.
        If no folder is specified returns a list of DataFrames (note: can be a very large list in memory)

        Parameters
        ----------
        ticker_list : str (list)
            List of FX tickers to download

        start_date : datetime/str
            Start date of FX tick data download

        finish_date : datetime/str
            Finish date of FX tick data download

        folder_prefix : str
            Folder to dump everything

        Returns
        -------
        DataFrame (list)
        """

        from findatapy.market import MarketDataRequest, MarketDataGenerator, Market

        if start_date is None and finish_date is None:
            finish_date = datetime.datetime.utcnow().date() - timedelta(
                days=30)
            start_date = finish_date - timedelta(days=30 * 15)

            start_date = self._compute_random_date(start_date, finish_date)
            finish_date = start_date + timedelta(days=90)

        df_list = []
        result = []

        # From multiprocessing.dummy import Pool # threading
        from multiprocess.pool import Pool  # actuall new processes
        import time

        # If we don't specify a folder
        if folder_prefix is None:

            mini_ticker_list = self._split_list(ticker_list, 2)

            # Use multiprocess to speed up the download
            for mini in mini_ticker_list:
                pool = Pool(processes=2)

                for ticker in mini:
                    time.sleep(1)
                    self.logger.info("Loading " + ticker)
                    md_request = MarketDataRequest(
                        start_date=start_date,
                        finish_date=finish_date,
                        category='fx',
                        tickers=ticker,
                        fields=['bid', 'ask', 'bidv', 'askv'],
                        data_source='dukascopy',
                        freq='tick')

                    # self._download(md_request)
                    result.append(
                        pool.apply_async(self._download,
                                         args=(
                                             md_request,
                                             folder_prefix,
                                         )))

                pool.close()
                pool.join()

        else:
            market = Market(market_data_generator=MarketDataGenerator())

            for ticker in ticker_list:

                md_request = MarketDataRequest(
                    start_date=start_date,
                    finish_date=finish_date,
                    category='fx',
                    tickers=ticker,
                    fields=['bid', 'ask', 'bidv', 'askv'],
                    data_source='dukascopy',
                    freq='tick')

                df = market.fetch_market(md_request=md_request)

                df.columns = ['bid', 'ask', 'bidv', 'askv']

                df['venue'] = 'dukascopy'
                df['ticker'] = ticker

                # print(df)

                if folder_prefix is not None:
                    self.dump_hdf5_file(df,
                                        folder_prefix + "_" + ticker + ".h5")
                    # df.to_csv(folder_prefix + "_" + ticker + ".csv") # CSV files can be very large, so try to avoid
                else:
                    df_list.append(df)

            return df_list
Exemplo n.º 6
0
    def __call__(
        self,
        batch_or_dataset: Union[Batch, Dataset],
        columns: List[str],
        mask: List[int] = None,
        store_compressed: bool = None,
        store: bool = None,
        num_proc: int = None,
        *args,
        **kwargs,
    ):

        if mask:
            raise NotImplementedError(
                "Mask not supported for SubpopulationCollection yet.")

        if not num_proc or num_proc == 1:
            slices = []
            slice_membership = []
            # Apply each slicebuilder in sequence
            for i, slicebuilder in tqdm(enumerate(self.subpopulations)):
                # Apply the slicebuilder
                batch_or_dataset, slices_i, slice_membership_i = slicebuilder(
                    batch_or_dataset=batch_or_dataset,
                    columns=columns,
                    mask=mask,
                    store_compressed=store_compressed,
                    store=store,
                    *args,
                    **kwargs,
                )

                # Add in the slices and slice membership
                slices.extend(slices_i)
                slice_membership.append(slice_membership_i)

        else:
            # TODO(karan): cleanup, make mp.Pool support simpler across the library
            with Pool(num_proc) as pool:
                batches_or_datasets, slices, slice_membership = zip(*pool.map(
                    lambda sb: sb(
                        batch_or_dataset=batch_or_dataset,
                        columns=columns,
                        mask=mask,
                        store_compressed=store_compressed,
                        store=store,
                        *args,
                        **kwargs,
                    ),
                    [slicebuilder for slicebuilder in self.subpopulations],
                ))

                # Combine all the slices
                slices = list(tz.concat(slices))

            def _store_updates(batch, indices):

                # Each Subpopulation will generate slices
                for i, subpopulation in enumerate(self.subpopulations):
                    updates = subpopulation.construct_updates(
                        slice_membership=slice_membership[i][indices],
                        columns=columns,
                        mask=mask,
                        # TODO(karan): this option should be set correctly
                        compress=True,
                    )

                    batch = subpopulation.store(
                        batch=batch,
                        updates=updates,
                    )

                return batch

            if isinstance(batch_or_dataset, Dataset):
                batch_or_dataset = batch_or_dataset.map(
                    _store_updates,
                    with_indices=True,
                    batched=True,
                )

                for subpopulation in self.subpopulations:
                    # Update the Dataset's history
                    batch_or_dataset.update_tape(
                        path=[SLICEBUILDERS, subpopulation.category],
                        identifiers=subpopulation.identifiers,
                        columns=columns,
                    )

            else:
                batch_or_dataset = recmerge(*batches_or_datasets,
                                            merge_sequences=True)

        # Combine all the slice membership matrices
        slice_membership = np.concatenate(slice_membership, axis=1)

        return batch_or_dataset, slices, slice_membership
Exemplo n.º 7
0
def Pool(processes=None, initializer=None, initargs=(), maxtasksperchild=None):
    '''
    Returns a process pool object
    '''
    from multiprocess.pool import Pool
    return Pool(processes, initializer, initargs, maxtasksperchild)
Exemplo n.º 8
0
 def __enter__(self):
     if self.backend is None or isinstance(self.backend, int):
         self._backend_activated = Pool(self.backend)
     elif HAS_SHAREDMEM and isinstance(self.backend, MapReduce):
         self.backend.__enter__()
     return self
Exemplo n.º 9
0
def save(
    target: Optional[str],
    name: Optional[str],
    config: Optional[str],
    seed: Optional[str],
    destination: Optional[str],
    multiprocessing: bool,
) -> None:
    """Save the sketch to a SVG file.

    TARGET may either point at a Python file or at a directory and is interpreted in the same
    way as the `vsk run` command (see `vsk run --help`).

    By default, the output is named after the sketch and the provided options. An alternative
    name my be provided with the --name option.

    If the sketch as parameters, their default values are used. Alternatively, a pre-existing
    configuration can be used instead with the --config option.

    By default, a random seed is used for vsketch's random number generator. If --config is
    used, the seed saved in the configuration is used instead. A seed may also be provided with
    the --seed option, in which case it will override the configuration's seed.

    The --seed option also accepts seed range in the form of FIRST..LAST, e.g. 0..100. In this
    case, one output file per seed is generated.

    If the number of files to generate is greater than 4, all available cores are used for the
    process. This behaviour can be disabled with --no-multiprocessing or the
    VSK_MULTIPROCESSING variable.

    By default, all SVG are saved in the sketch's "output" sub-directory. This can be
    overridden using the --destination option.
    """

    try:
        path = _find_sketch_script(target)
    except ValueError as err:
        print_error("Sketch could not be found: ", str(err))
        raise click.Abort()

    # load configuration
    param_set: Dict[str, vsketch.ParamType] = {}
    config_postfix = ""
    if config is not None:
        config_path = pathlib.Path(config)
        if not config_path.exists():
            config_path = get_config_path(path) / (config + ".json")

        if config_path.exists():
            param_set = load_config(config_path)
            config_postfix = "_" + config_path.stem
        else:
            print_error("Config file not found: ", str(config_path))

    # compute name
    if name is None:
        name = canonical_name(path) + config_postfix
    seed_in_name = seed is not None

    if seed is None:
        if param_set is not None and "__seed__" in param_set:
            seed_start = seed_end = int(param_set["__seed__"])
        else:
            seed_start = seed_end = random.randint(0, 2**31 - 1)
    else:
        try:
            seed_start, seed_end = _parse_seed(seed)
        except ValueError as err:
            print_error(f"Could not parse seed {seed}: ", str(err))
            raise click.Abort()

    # prepare output path
    if destination is not None:
        output_path = pathlib.Path(destination)
        if not output_path.exists():
            print_error("Provided output path does not exist: ",
                        str(output_path.absolute()))
            raise click.Abort()
        if not output_path.is_dir():
            print_error("Provided output path is not a directory: ",
                        str(output_path.absolute()))
            raise click.Abort()
    else:
        output_path = path.parent / "output"
        if not output_path.exists():
            output_path.mkdir()
        elif not output_path.is_dir():
            print_error("Could not create output directory: ",
                        str(output_path))
            raise click.Abort()

    # noinspection PyShadowingNames
    def _write_output(seed: int) -> None:
        # this needs to be there because the sketch class cannot be pickled apparently
        sketch_class = load_sketch_class(path)
        if sketch_class is None:
            print_error("Could not load script: ", str(path))
            raise click.Abort()

        sketch_class.set_param_set(param_set)

        output_name = name
        if seed_in_name:
            output_name += "_s" + str(seed)  # type: ignore
        output_name += ".svg"  # type: ignore

        output_file = output_path / output_name

        sketch = sketch_class.execute(finalize=True, seed=seed)

        if sketch is None:
            print_error("Could not execute script: ", str(path))
            raise click.Abort()

        doc = sketch.vsk.document
        with open(output_file, "w") as fp:
            print_info("Exporting SVG: ", str(output_file))
            vp.write_svg(fp,
                         doc,
                         source_string=f"vsketch save -s {seed} {path}",
                         color_mode="layer")

    seed_range = range(seed_start, seed_end + 1)

    if len(seed_range) < 4 or not multiprocessing:
        for s in seed_range:
            _write_output(s)
    else:
        with Pool() as p:
            list(p.imap(_write_output, seed_range))
Exemplo n.º 10
0
class ParallelBackend:
    """
    The unified backend for parallelization.
    
    Currently, we support `multiprocess`, `dask`, `sharedmem` and `loky`.
    `multiprocess` usually has better performance on single-node machines, while
    `dask` can be used for multi-node parallelization. Note the following known
    issues: when used for sampling, (1) `dask` and `loky` do not respect the
    global bayesfast random seed; (2) `sharedmem` may not display the progress
    messages correctly (multiple messages in the same line); (3) `loky` does not
    print any messages at all in Jupyter. So we recommend using the default
    `multiprocess` backend when possible.
    
    Parameters
    ----------
    backend : None, int, Pool, Client or MapReduce, optional
        The backend for parallelization. If `None` or `int`, will be passed as
        the `processes` argument to initialize a Pool in a with context. Set to
        `None` by default.
    """
    def __new__(cls, backend=None):
        if isinstance(backend, ParallelBackend):
            return backend
        else:
            return super(ParallelBackend, cls).__new__(cls)

    def __init__(self, backend=None):
        if isinstance(backend, ParallelBackend):
            return
        self.backend = backend

    def __enter__(self):
        if self.backend is None or isinstance(self.backend, int):
            self._backend_activated = Pool(self.backend)
        elif HAS_SHAREDMEM and isinstance(self.backend, MapReduce):
            self.backend.__enter__()
        return self

    def __exit__(self, exc_type, exc_val, exc_tb):
        if self.backend is None or isinstance(self.backend, int):
            self._backend_activated.close()
            self._backend_activated.join()
            self._backend_activated = None
        elif HAS_SHAREDMEM and isinstance(self.backend, MapReduce):
            self.backend.__exit__(exc_type, exc_val, exc_tb)

    @property
    def backend(self):
        return self._backend

    @backend.setter
    def backend(self, be):
        if be is None or (isinstance(be, int) and be > 0):
            pass
        elif isinstance(be, Pool):
            pass
        elif HAS_RAY and isinstance(be, RayPool):
            pass
        elif HAS_DASK and isinstance(be, Client):
            pass
        elif HAS_SHAREDMEM and isinstance(be, MapReduce):
            pass
        elif HAS_LOKY and isinstance(be,
                                     reusable_executor._ReusablePoolExecutor):
            pass
        # elif be == 'serial':
        #     pass
        else:
            raise ValueError('invalid value for backend.')
        self._backend_activated = be
        self._backend = be

    @property
    def backend_activated(self):
        return self._backend_activated

    @property
    def kind(self):
        if self.backend is None or isinstance(self.backend, int):
            return 'multiprocess'
        elif isinstance(self.backend, Pool):
            return 'multiprocess'
        elif HAS_RAY and isinstance(self.backend, RayPool):
            return 'ray'
        elif HAS_DASK and isinstance(self.backend, Client):
            return 'dask'
        elif HAS_SHAREDMEM and isinstance(self.backend, MapReduce):
            return 'sharedmem'
        elif HAS_LOKY and isinstance(self.backend,
                                     reusable_executor._ReusablePoolExecutor):
            return 'loky'
        # elif self.backend == 'serial':
        #     return 'serial'
        else:
            raise RuntimeError('unexpected value for self.backend.')

    def map(self, fun, *iters):
        if self.backend_activated is None:
            raise RuntimeError(
                'the backend is not activated. Please use it in '
                'a with context.')
        elif isinstance(self.backend_activated, Pool):
            return self.backend_activated.starmap(fun, zip(*iters))
        elif HAS_RAY and isinstance(self.backend_activated, RayPool):
            return self.backend_activated.starmap(fun, list(zip(*iters)))
            # https://github.com/ray-project/ray/issues/11451
            # that's why I need to explicitly convert it to a list for now
        elif HAS_DASK and isinstance(self.backend_activated, Client):
            return self.gather(self.backend_activated.map(fun, *iters))
        elif HAS_SHAREDMEM and isinstance(self.backend_activated, MapReduce):
            return self.backend_activated.map(fun,
                                              list(zip(*iters)),
                                              star=True)
        elif HAS_LOKY and isinstance(self.backend_activated,
                                     reusable_executor._ReusablePoolExecutor):
            return self.gather(self.backend_activated.map(fun, *iters))
        # elif self.backend_activated == 'serial':
        #     return [deepcopy(fun)(*[i[j] for i in iters]) for j in range(l)]
        else:
            raise RuntimeError('unexpected value for self.backend_activated.')

    def map_async(self, fun, *iters):
        if self.backend_activated is None:
            raise RuntimeError(
                'the backend is not activated. Please use it in '
                'a with context.')
        elif isinstance(self.backend_activated, Pool):
            return self.backend_activated.starmap_async(fun, zip(*iters))
        elif HAS_RAY and isinstance(self.backend_activated, RayPool):
            return self.backend_activated.starmap_async(fun, list(zip(*iters)))
        elif HAS_DASK and isinstance(self.backend_activated, Client):
            return self.backend_activated.map(fun, *iters)
        elif HAS_SHAREDMEM and isinstance(self.backend_activated, MapReduce):
            warnings.warn(
                'sharedmem does not support map_async. Using map '
                'instead.', RuntimeWarning)
            return self.backend_activated.map(fun,
                                              list(zip(*iters)),
                                              star=True)
        elif HAS_LOKY and isinstance(self.backend_activated,
                                     reusable_executor._ReusablePoolExecutor):
            return self.backend_activated.map(fun, *iters)
        # elif self.backend_activated == 'serial':
        #     return self.map(fun, *iters)
        else:
            raise RuntimeError('unexpected value for self.backend_activated.')

    def gather(self, async_result):
        if self.backend_activated is None:
            raise RuntimeError(
                'the backend is not activated. Please use it in '
                'a with context.')
        elif isinstance(self.backend_activated, Pool):
            return async_result.get()
        elif isinstance(self.backend_activated, RayPool):
            return async_result.get()
        elif HAS_DASK and isinstance(self.backend_activated, Client):
            return self.backend_activated.gather(async_result)
        elif HAS_SHAREDMEM and isinstance(self.backend_activated, MapReduce):
            return async_result
        elif HAS_LOKY and isinstance(self.backend_activated,
                                     reusable_executor._ReusablePoolExecutor):
            return list(async_result)
        # elif self.backend_activated == 'serial':
        #     return async_result
        else:
            raise RuntimeError('unexpected value for self.backend_activated.')
Exemplo n.º 11
0
 def __init__(self, processes=None, initializer=None, initargs=()):
     ProcessPool.__init__(self, processes, initializer, initargs)
     return
        return wav

    def local_mel2samp(filepath):
        filepath = filepath.split("|")[0]
        new_filepath = filepath + ".pt"
        if os.path.isfile(new_filepath):
            print("skip", new_filepath)
            return
        audio = preprocess_wav(filepath, sampling_rate=args.sampling_rate)
        audio = torch.FloatTensor(audio.astype(np.float32))

        melspectrogram = mel2samp.get_mel(audio)

        print(new_filepath)
        torch.save(melspectrogram, new_filepath)

    filepaths = files_to_list(args.filelist_path)

    with Pool(args.num_processes) as pool:  # ThreadPool(8) as pool:
        # list(tqdm(pool.imap(preprocess_speaker, speaker_dirs), dataset_name, len(speaker_dirs),
        list(pool.map(local_mel2samp, filepaths))

    # for filepath in filepaths:
    #     filepath = filepath.split("|")[0]
    #     audio = preprocess_wav(filepath, sampling_rate=args.sampling_rate)
    #     filename = os.path.basename(filepath)
    #     new_filepath = args.output_dir + '/' + filename + '.npy'
    #     print(new_filepath)
    #     np.save(new_filepath, audio)
        ('SN20_Bleb_fromStart', 14, 0, 1, -235, 30),
        ('SN20_Bleb_fromStart', 14, 0, 2, 120, 230),
        ('SN20_Bleb_fromStart', 14, 0, 3, -230, 105),
        ('SN20_Bleb_fromStart', 14, 0, 4, 205, 35),
        ('SN20_Bleb_fromStart', 14, 1, 2, 110, -180),
        ('SN20_Bleb_fromStart', 14, 1, 3, -220, 25),
        ('SN20_Bleb_fromStart', 14, 1, 4, -150, 0),
        ('SN20_Bleb_fromStart', 14, 2, 3, 160, -130),
        ('SN20_Bleb_fromStart', 14, 2, 4, -75, 210),
        ('SN20_Bleb_fromStart', 14, 3, 4, 220, 105),
        ('SN20_Bleb_fromStart', 15, 0, 1, 0, 235),
        ('SN20_Bleb_fromStart', 16, 0, 1, 0, -225),
        ('SN20_Bleb_fromStart', 16, 0, 2, -80, 130),
        ('SN20_Bleb_fromStart', 16, 1, 2, -60, -120),
        ('SN20_Bleb_fromStart', 17, 0, 2, -180, 0),
        ('SN20_Bleb_fromStart', 17, 0, 3, 155, 0),
        ('SN20_Bleb_fromStart', 17, 1, 2, -225, -115),
        ('SN20_Bleb_fromStart', 17, 1, 3, -135, 20),
        ('SN20_Bleb_fromStart', 18, 0, 1, -110, -175),
        ('SN20_Bleb_fromStart', 19, 0, 1, 70, -150),
        ('SN20_Bleb_fromStart', 19, 1, 2, -100, 115),
        ('SN20_Bleb_fromStart', 19, 1, 3, 60, -170),
        ('SN20_Bleb_fromStart', 19, 2, 3, 135, 185),
        ('SN20_Bleb_fromStart', 20, 0, 1, 175, 20),
        ('SN20_Bleb_fromStart', 20, 0, 2, 205, -60),
        ('SN20_Bleb_fromStart', 20, 1, 2, -135, 80),
    ]
    _p = Pool(CPUS_TO_USE)
    _answers = _p.starmap(process_fake_following, _arguments)
    _p.close()
Exemplo n.º 14
0
class DataPipelineWithReward:
    """
    Creates a data pipeline that also outputs discounted reward.
    """
    def __init__(self, observables: List[AgentHandler],
                 actionables: List[AgentHandler],
                 mission_handlers: List[AgentHandler], nsteps, gamma,
                 data_directory, num_workers, worker_batch_size,
                 min_size_to_dequeue):
        """
        Sets up a tensorflow dataset to load videos from a given data directory.
        :param data_directory: the directory of the data to be loaded, eg: 'minerl.herobraine_parse/output/rendered/'
        """

        self.data_dir = data_directory
        self.observables = observables
        self.actionables = actionables
        self.mission_handlers = mission_handlers
        # self.vectorizer = vectorizer

        self.number_of_workers = num_workers
        self.worker_batch_size = worker_batch_size
        self.size_to_dequeue = min_size_to_dequeue
        self.nsteps = nsteps
        self.gamma = gamma

        self.processing_pool = Pool(self.number_of_workers)
        self.m = multiprocessing.Manager()
        self.data_queue = self.m.Queue(maxsize=self.size_to_dequeue //
                                       self.worker_batch_size * 4)

        pool_size = self.size_to_dequeue * 4
        self.random_queue = PriorityQueue(maxsize=pool_size)

    def batch_iter(self, batch_size):
        """
        Returns a generator for iterating through batches of the dataset.
        :param batch_size:
        :param number_of_workers:
        :param worker_batch_size:
        :param size_to_dequeue:
        :return:
        """
        logger.info("Starting batch iterator on {}".format(self.data_dir))
        data_list = self._get_all_valid_recordings(self.data_dir)

        load_data_func = self._get_load_data_func(self.data_queue, self.nsteps,
                                                  self.worker_batch_size,
                                                  self.mission_handlers,
                                                  self.observables,
                                                  self.actionables, self.gamma)
        map_promise = self.processing_pool.map_async(load_data_func, data_list)

        # We map the files -> load_data -> batch_pool -> random shuffle -> yield.
        # batch_pool = []
        start = 0
        incr = 0
        while not map_promise.ready() or not self.data_queue.empty(
        ) or not self.random_queue.empty():
            #print("d: {} r: {}".format(data_queue.qsize(), random_queue.qsize()))

            while not self.data_queue.empty() and not self.random_queue.full():
                for ex in self.data_queue.get():
                    if not self.random_queue.full():
                        r_num = np.random.rand(1)[0] * (1 - start) + start
                        self.random_queue.put((r_num, ex))
                        incr += 1
                        # print("d: {} r: {} rqput".format(data_queue.qsize(), random_queue.qsize()))
                    else:
                        break

            if incr > self.size_to_dequeue:
                if self.random_queue.qsize() < (batch_size):
                    if map_promise.ready():
                        break
                    else:
                        continue
                batch_with_incr = [
                    self.random_queue.get() for _ in range(batch_size)
                ]

                r1, batch = zip(*batch_with_incr)
                start = 0
                traj_obs, traj_acts, traj_handlers, traj_n_obs, discounted_rewards, elapsed = zip(
                    *batch)

                observation_batch = [
                    HandlerCollection({
                        o: np.asarray(traj_ob[i])
                        for i, o in enumerate(self.observables)
                    }) for traj_ob in traj_obs
                ]
                action_batch = [
                    HandlerCollection({
                        a: np.asarray(traj_act[i])
                        for i, a in enumerate(self.actionables)
                    }) for traj_act in traj_acts
                ]
                mission_handler_batch = [
                    HandlerCollection({
                        m: np.asarray(traj_handler[i])
                        for i, m in enumerate(self.mission_handlers)
                    }) for traj_handler in traj_handlers
                ]
                next_observation_batch = [
                    HandlerCollection({
                        o: np.asarray(traj_n_ob[i])
                        for i, o in enumerate(self.observables)
                    }) for traj_n_ob in traj_n_obs
                ]
                yield observation_batch, action_batch, mission_handler_batch, next_observation_batch, discounted_rewards, elapsed
            # Move on to the next batch bool.
            # Todo: Move to a running pool, sampling as we enqueue. This is basically the random queue impl.
            # Todo: This will prevent the data from getting arbitrarily segmented.
            # batch_pool = []
        try:
            map_promise.get()
        except RuntimeError as e:
            logger.error("Failure in data pipeline: {}".format(e))

        logger.info("Epoch complete.")

    def close(self):
        self.processing_pool.close()
        self.processing_pool.join()

    ############################
    ## PRIVATE METHODS
    #############################

    @staticmethod
    def _get_load_data_func(data_queue, nsteps, worker_batch_size,
                            mission_handlers, observables, actionables, gamma):
        def _load_data(inst_dir):
            recording_path = str(os.path.join(inst_dir, 'recording.mp4'))
            univ_path = str(os.path.join(inst_dir, 'univ.json'))

            try:
                cap = cv2.VideoCapture(recording_path)
                # Litty uni
                with open(univ_path, 'r') as f:
                    univ = {int(k): v for (k, v) in (json.load(f)).items()}
                    univ = OrderedDict(univ)
                    univ = np.array(list(univ.values()))

                # Litty viddy
                batches = []
                rewards = []
                frames_queue = Queue(maxsize=nsteps)

                # Loop through the video and construct frames
                # of observations to be sent via the multiprocessing queue
                # in chunks of worker_batch_size to the batch_iter loop.
                frame_num = 0
                while True:
                    ret, frame = cap.read()

                    if not ret or frame_num >= len(univ):
                        break
                    else:
                        #print("Batches {} and worker batch size {}".format(len(batches), self.worker_batch_size))
                        if len(batches) >= worker_batch_size:
                            data_queue.put(batches)
                            batches = []

                        try:
                            # Construct a single observation object.
                            vf = (np.clip(frame[:, :, ::-1], 0, 255))
                            uf = univ[frame_num]

                            frame = {'pov': vf}
                            frame.update(uf)

                            cur_reward = 0
                            for m in mission_handlers:
                                try:
                                    if isinstance(m, RewardHandler):
                                        cur_reward += m.from_universal(frame)
                                except NotImplementedError:
                                    pass
                            rewards.append(cur_reward)

                            #print("Frames queue size {}".format(frames_queue.qsize()))
                            frames_queue.put(frame)
                            if frames_queue.full():
                                next_obs = [
                                    o.from_universal(frame)
                                    for o in observables
                                ]
                                frame = frames_queue.get()
                                obs = [
                                    o.from_universal(frame)
                                    for o in observables
                                ]
                                act = [
                                    a.from_universal(frame)
                                    for a in actionables
                                ]
                                mission = []
                                for m in mission_handlers:
                                    try:
                                        mission.append(m.from_universal(frame))
                                    except NotImplementedError:
                                        mission.append(None)
                                        pass

                                batches.append(
                                    (obs, act, mission, next_obs,
                                     DataPipelineWithReward.
                                     _calculate_discount_rew(
                                         rewards[-nsteps:],
                                         gamma), frame_num + 1 - nsteps))
                        except Exception as e:
                            # If there is some error constructing the batch we just start a new sequence
                            # at the point that the exception was observed
                            logger.warn(
                                "Exception {} caught in the middle of parsing {} in "
                                "a worker of the data pipeline.".format(
                                    e, inst_dir))

                    frame_num += 1

                return batches
            except Exception as e:
                logger.error("Caught Exception")
                raise e
                return None

        return _load_data

    @staticmethod
    def _calculate_discount_rew(rewards, gamma):
        total_reward = 0
        for i, rew in enumerate(rewards):
            total_reward += (gamma**i) * rew
        return total_reward

    @staticmethod
    def _get_all_valid_recordings(path):
        directoryList = []

        # return nothing if path is a file
        if os.path.isfile(path):
            return []

        # add dir to directorylist if it contains .txt files
        if len([f for f in os.listdir(path) if f.endswith('.mp4')]) > 0:
            if len([f for f in os.listdir(path) if f.endswith('.json')]) > 0:
                directoryList.append(path)

        for d in os.listdir(path):
            new_path = os.path.join(path, d)
            if os.path.isdir(new_path):
                directoryList += DataPipelineWithReward._get_all_valid_recordings(
                    new_path)

        directoryList = np.array(directoryList)
        np.random.shuffle(directoryList)
        return directoryList.tolist()