Exemplo n.º 1
0
    def test_join_flattens_filters(self):

        filter1 = Pipe.join([IdentityFilter(), IdentityFilter()])
        filter2 = Pipe.join([filter1, IdentityFilter()])
        filter3 = Pipe.join([filter2, filter2])

        self.assertEqual(6, len(filter3._filters))
Exemplo n.º 2
0
    def test_run(self):
        source = MemorySource(list(range(10)))
        sink = MemorySink()

        Pipe.join(source, [Pipe_Tests.ProcessNameFilter()], sink).run()

        self.assertEqual(sink.items, ['MainProcess'] * 10)
Exemplo n.º 3
0
    def from_file(filename: str) -> 'Result':
        """Create a Result from a transaction file."""
        
        #Why is this here??? This is really confusing in practice
        #if filename is None or not Path(filename).exists(): return Result()

        json_encode = Cartesian(JsonEncode())
        json_decode = Cartesian(JsonDecode())

        Pipe.join(DiskSource(filename), [json_decode, ResultPromote(), json_encode], DiskSink(filename, 'w')).run()
        
        return Result.from_transactions(Pipe.join(DiskSource(filename), [json_decode]).read())
Exemplo n.º 4
0
    def test_repr3(self):

        filters = [Pipe_Tests.ReprFilter(), Pipe_Tests.ReprFilter()]
        sink = Pipe_Tests.ReprSink()

        expected_repr = "ReprFilter,ReprFilter,ReprSink"
        self.assertEqual(expected_repr, str(Pipe.join(filters, sink)))
Exemplo n.º 5
0
    def test_repr2(self):

        source = Pipe_Tests.ReprSource([0, 1, 2])
        filters = [Pipe_Tests.ReprFilter(), Pipe_Tests.ReprFilter()]

        expected_repr = "ReprSource,ReprFilter,ReprFilter"
        self.assertEqual(expected_repr, str(Pipe.join(source, filters)))
Exemplo n.º 6
0
    def test_repr4(self):

        filter = Pipe.FiltersFilter(
            [Pipe_Tests.ReprFilter(),
             Pipe_Tests.ReprFilter()])

        expected_repr = "ReprFilter,ReprFilter"
        self.assertEqual(expected_repr, str(filter))
Exemplo n.º 7
0
    def test_repr1(self):

        source = Pipe_Tests.ReprSource([0, 1, 2])
        filters = [Pipe_Tests.ReprFilter(), Pipe_Tests.ReprFilter()]
        sink = Pipe_Tests.ReprSink()

        expected_repr = "ReprSource,ReprFilter,ReprFilter,ReprSink"
        self.assertEqual(expected_repr, str(Pipe.join(source, filters, sink)))
Exemplo n.º 8
0
    def __init__(self,
                 reader: Filter[Iterable[str], Any],
                 source: Union[str, Source[Iterable[str]]],
                 label_column: Union[str, int],
                 with_header: bool = True) -> None:

        self._reader = reader

        if isinstance(source, str) and source.startswith('http'):
            self._source = Pipe.join(HttpSource(source), [ResponseToLines()])
        elif isinstance(source, str):
            self._source = DiskSource(source)
        else:
            self._source = source

        self._label_column = label_column
        self._with_header = with_header
        self._interactions = cast(Optional[Sequence[Interaction]], None)
Exemplo n.º 9
0
        def _construct(item:Any) -> Sequence[Any]:
            result = None

            if isinstance(item, str) and item in variables:
                result = variables[item]

            if isinstance(item, str) and item not in variables:
                result = CobaRegistry.construct(item)

            if isinstance(item, dict):
                result = CobaRegistry.construct(item)

            if isinstance(item, list):
                if any([ isinstance(i,list) for i in item ]):
                    raise Exception("Recursive structures are not supported in benchmark simulation configs.")
                pieces = list(map(_construct, item))
                result = [ Pipe.join(s, f) for s in pieces[0] for f in product(*pieces[1:])]

            if result is None:
                raise Exception(f"We were unable to construct {item} in the given benchmark file.")

            return result if isinstance(result, collections.Sequence) else [result]
Exemplo n.º 10
0
    def __init__(self,
                 simulations: Sequence[Simulation],
                 shuffle: Sequence[Optional[int]] = [None],
                 take: int = None) -> None:
        """Instantiate a Benchmark.

        Args:
            simulations: The collection of simulations to benchmark against.
            shuffle: A collection of seeds to use for simulation shuffling. A seed of `None` means no shuffle will be applied.
            take: The number of interactions to take from each simulation for evaluation.
        """
        ...

        sources: List[Simulation] = simulations
        filters: List[Sequence[Filter[Iterable[Interaction],
                                      Iterable[Interaction]]]] = []

        if shuffle != [None]:
            filters.append([Shuffle(seed) for seed in shuffle])

        if take is not None:
            filters.append([Take(take)])

        if len(filters) > 0:
            simulation_sources = [
                cast(Source[Simulation], Pipe.join(s, f))
                for s, f in product(sources, product(*filters))
            ]
        else:
            simulation_sources = list(sources)

        self._simulations: Sequence[Source[Simulation]] = simulation_sources
        self._processes: Optional[int] = None
        self._maxtasksperchild: Optional[int] = None
        self._maxtasksperchild_set: bool = False
        self._chunk_by: Optional[str] = None
Exemplo n.º 11
0
    def filter(self, items: Iterable[Any]) -> Iterable[Any]:

        if len(self._filters) == 0:
            return items

        try:
            with Manager() as manager:

                stdout_queue = manager.Queue()  #type: ignore
                stdlog_queue = manager.Queue()  #type: ignore

                stdout_writer, stdout_reader = QueueSink(
                    stdout_queue), QueueSource(stdout_queue)
                stdlog_writer, stdlog_reader = QueueSink(
                    stdlog_queue), QueueSource(stdlog_queue)

                class MyPool(multiprocessing.pool.Pool):

                    _missing_error_definition_error_is_new = True

                    def _join_exited_workers(self):

                        for worker in self._pool:
                            if worker.exitcode == 1000 and MyPool._missing_error_definition_error_is_new:
                                #this is a hack... This only works so long as we just
                                #process one job at a time... This is true in our case.
                                #this is necessary because multiprocessing can get stuck
                                #waiting for failed workers and that is frustrating for users.

                                MyPool._missing_error_definition_error_is_new = False

                                message = (
                                    "Coba attempted to evaluate your benchmark in multiple processes but the pickle module was unable to "
                                    "find all the definitions needed to pass the tasks to the processes. The two most common causes of "
                                    "this error are: 1) a learner or simulation is defined in a Jupyter Notebook cell or 2) a necessary "
                                    "class definition exists inside the `__name__=='__main__'` code block in the main execution script. In "
                                    "either case there are two simple solutions: 1) evalute your benchmark in a single processed with no "
                                    "limit on child tasks or 2) define all you classes in a separate python file that is imported when "
                                    "evaluating.")

                                CobaConfig.Logger.log(message)

                            if worker.exitcode is not None and worker.exitcode != 0:
                                #A worker exited in an uncontrolled manner and was unable to clean its job
                                #up. We therefore mark one of the jobs as "finished" but failed to prevent an
                                #infinite wait on a failed job to finish that is actually no longer running.
                                list(self._cache.values())[0]._set(
                                    None, (False, None))

                        return super()._join_exited_workers()

                with MyPool(self._processes,
                            maxtasksperchild=self._maxtasksperchild) as pool:

                    # handle not picklable (this is handled by done_or_failed)
                    # handle empty list (this is done by checking result.ready())
                    # handle exceptions in process (unhandled exceptions can cause children to hang so we pass them to stderr)
                    # handle ctrl-c without hanging
                    #   > don't call result.get when KeyboardInterrupt has been hit
                    #   > handle EOFError,BrokenPipeError errors with queue since ctr-c kills manager
                    # handle AttributeErrors. These occure when... (this is handled by shadowing several pool methods)
                    #   > a class that is defined in a Jupyter Notebook cell is pickled
                    #   > a class that is defined inside the __name__=='__main__' block is pickeled
                    # handle Benchmark.evaluate not being called inside of __name__=='__main__' (this is handled by a big try/catch)

                    def done_or_failed(results_or_exception=None):
                        #This method is called one time at the completion of map_async
                        #in the case that one of our jobs threw an exception the argument
                        #will contain an exception otherwise it will be the returned results
                        #of all the jobs. This method is executed on a thread in the Main context.

                        if isinstance(results_or_exception, Exception):
                            from coba.config import CobaConfig

                            if "Can't pickle" in str(
                                    results_or_exception) or "Pickling" in str(
                                        results_or_exception):

                                message = (
                                    str(results_or_exception) +
                                    ". Coba attempted to process your Benchmark on multiple processes and "
                                    "the named class was not able to be pickled. This problem can be fixed in one of two ways: 1) "
                                    "evaluate the benchmark in question on a single process with no limit on the tasks per child or 2) "
                                    "modify the named class to be picklable. The easiest way to make the given class picklable is to "
                                    "add `def __reduce__ (self) return (<the class in question>, (<tuple of constructor arguments>))` to "
                                    "the class. For more information see https://docs.python.org/3/library/pickle.html#object.__reduce__."
                                )

                                CobaConfig.Logger.log(message)
                            else:
                                CobaConfig.Logger.log_exception(
                                    results_or_exception)

                        stdout_writer.write([None])
                        stdlog_writer.write([None])

                    log_thread = Thread(target=Pipe.join(
                        stdlog_reader, [], CobaConfig.Logger.sink).run)
                    log_thread.daemon = True
                    log_thread.start()

                    processor = MultiprocessFilter.Processor(
                        self._filters, stdout_writer, stdlog_writer,
                        self._processes)
                    result = pool.map_async(processor.process,
                                            items,
                                            callback=done_or_failed,
                                            error_callback=done_or_failed,
                                            chunksize=1)

                    # When items is empty finished_callback will not be called and we'll get stuck waiting for the poison pill.
                    # When items is empty ready() will be true immediately and this check will place the poison pill into the queues.
                    if result.ready(): done_or_failed()

                    try:
                        for item in stdout_reader.read():
                            yield item
                        pool.close()
                    except (KeyboardInterrupt, Exception):
                        try:
                            pool.terminate()
                        except:
                            pass
                        raise
                    finally:
                        pool.join()
                        log_thread.join()

        except RuntimeError as e:
            #This happens when importing main causes this code to run again
            raise CobaFatal(str(e))
Exemplo n.º 12
0
 def __init__(self, filters: Sequence[Filter], stdout: Sink,
              stdlog: Sink, n_proc: int) -> None:
     self._filter = Pipe.join(filters)
     self._stdout = stdout
     self._stdlog = stdlog
     self._n_proc = n_proc
Exemplo n.º 13
0
    def evaluate(self,
                 learners: Sequence[Learner],
                 result_file: str = None,
                 seed: int = 1) -> Result:
        """Collect observations of a Learner playing the benchmark's simulations to calculate Results.

        Args:
            learners: The collection of learners that we'd like to evalute.
            result_file: The file we'd like to use for writing/restoring results for the requested evaluation.
            seed: The random seed we'd like to use when choosing which action to take from the learner's predictions.

        Returns:
            See the base class for more information.
        """

        restored = Result.from_file(result_file) if result_file and Path(
            result_file).exists() else Result()

        n_given_learners = len(learners)
        n_given_simulations = len(self._simulations)

        if len(restored.benchmark) != 0:
            assert n_given_learners == restored.benchmark[
                'n_learners'], "The currently evaluating benchmark doesn't match the given transaction log"
            assert n_given_simulations == restored.benchmark[
                'n_simulations'], "The currently evaluating benchmark doesn't match the given transaction log"

        preamble = []
        preamble.append(Transaction.version())
        preamble.append(
            Transaction.benchmark(n_given_learners, n_given_simulations))
        preamble.extend(Transaction.learners(learners))
        preamble.extend(Transaction.simulations(self._simulations))

        cb = self._chunk_by if self._chunk_by else CobaConfig.Benchmark[
            'chunk_by']
        mp = self._processes if self._processes else CobaConfig.Benchmark[
            'processes']
        mt = self._maxtasksperchild if self._maxtasksperchild_set else CobaConfig.Benchmark[
            'maxtasksperchild']

        tasks = Tasks(self._simulations, learners, seed)
        unfinished = Unfinished(restored)
        chunked = ChunkByTask() if cb == 'task' else ChunkByNone(
        ) if cb == 'none' else ChunkBySource()
        process = Transactions()
        transaction_sink = TransactionSink(result_file, restored)

        if mp > 1 or mt is not None:
            process = MultiprocessFilter([process], mp, mt)  #type: ignore

        try:
            Pipe.join(MemorySource(preamble), [], transaction_sink).run()
            Pipe.join(tasks, [unfinished, chunked, process],
                      transaction_sink).run()
        except KeyboardInterrupt:
            CobaConfig.Logger.log(
                "Benchmark evaluation was manually aborted via Ctrl-C")
        except CobaFatal:
            raise
        except Exception as ex:
            CobaConfig.Logger.log_exception(ex)

        return transaction_sink.result
Exemplo n.º 14
0
    def test_exception(self):
        source = MemorySource(list(range(4)))
        sink = MemorySink()

        with self.assertRaises(Exception):
            Pipe.join(source, [Pipe_Tests.ExceptionFilter()], sink).run()