def __init__( self, source: Union[str, Source[Iterable[str]], SimulatedEnvironment]) -> None: if isinstance(source, str): self._source = UrlSource(source) elif isinstance(source, SimulatedEnvironment): self._source = self._make_serialized_source(source) else: self._source = source self._decoder = JsonDecode()
class EnvironmentDefinitionFileV1(Source[Sequence[Environment]]): @overload def __init__(self, filesource: Source[Iterable[str]]) -> None: ... @overload def __init__(self, filename: str) -> None: ... def __init__(self, arg) -> None: self._source = UrlSource(arg) if isinstance(arg, str) else arg def read(self) -> Sequence[Environment]: definitions: dict = JsonDecode().filter('\n'.join(self._source.read())) variables = { k: CobaRegistry.construct(v) for k, v in definitions.get("variables", {}).items() } def _construct(item: Any) -> Sequence[Any]: result = None if isinstance(item, str) and item in variables: result = variables[item] if isinstance(item, str) and item not in variables: result = CobaRegistry.construct(item) if isinstance(item, dict): result = CobaRegistry.construct(item) if isinstance(item, list): pieces = list(map(_construct, item)) if hasattr(pieces[0][0], 'read'): result = [ Pipes.join(s, *f) for s in pieces[0] for f in product(*pieces[1:]) ] else: result = sum(pieces, []) if result is None: raise CobaException( f"We were unable to construct {item} in the given environment definition file." ) return result if isinstance( result, collections.abc.Sequence) else [result] if not isinstance(definitions['environments'], list): definitions['environments'] = [definitions['environments']] return [ environment for recipe in definitions['environments'] for environment in _construct(recipe) ]
def __init__(self, source: Union[str, Source[Iterable[str]]]) -> None: """Instantiate a LibsvmSource. Args: source: The data source. Accepts either a string representing the source location or another Source. """ source = UrlSource(source) if isinstance(source, str) else source reader = LibsvmReader() self._source = Pipes.join(source, reader)
class SerializedSimulation(SimulatedEnvironment): def _make_serialized_source( self, sim: SimulatedEnvironment) -> Source[Iterable[str]]: def serialized_generator() -> Iterable[str]: json_encoder = JsonEncode() yield json_encoder.filter(sim.params) for interaction in sim.read(): context = json_encoder.filter(interaction.context) actions = json_encoder.filter(interaction.actions) kwargs = json_encoder.filter(interaction.kwargs) yield f"[{context},{actions},{kwargs}]" return LambdaSource(serialized_generator) def __init__( self, source: Union[str, Source[Iterable[str]], SimulatedEnvironment]) -> None: if isinstance(source, str): self._source = UrlSource(source) elif isinstance(source, SimulatedEnvironment): self._source = self._make_serialized_source(source) else: self._source = source self._decoder = JsonDecode() @property def params(self) -> Dict[str, Any]: return self._decoder.filter(next(iter(self._source.read()))) def read(self) -> Iterable[SimulatedInteraction]: for interaction_json in islice(self._source.read(), 1, None): deocded_interaction = self._decoder.filter(interaction_json) yield SimulatedInteraction(deocded_interaction[0], deocded_interaction[1], **deocded_interaction[2]) def write(self, sink: Sink[str]): for line in self._source.read(): sink.write(line)
def __init__(self, source: Union[str, Source[Iterable[str]]], has_header: bool = False, **dialect) -> None: """Instantiate a CsvSource. Args: source: The data source. Accepts either a string representing the source location or another Source. has_header: Indicates if the CSV files has a header row. """ source = UrlSource(source) if isinstance(source, str) else source reader = CsvReader(has_header, **dialect) self._source = Pipes.join(source, reader)
def __init__(self, source: Union[str, Source[Iterable[str]]], cat_as_str: bool = False, skip_encoding: bool = False, lazy_encoding: bool = True, header_indexing: bool = True) -> None: """Instantiate an ArffSource. Args: source: The data source. Accepts either a string representing the source location or another Source. cat_as_str: Indicates that categorical features should be encoded as a string rather than one hot encoded. skip_encoding: Indicates that features should not be encoded (this means all features will be strings). lazy_encoding: Indicates that features should be encoded lazily (this can save time if rows will be dropped). header_indexing: Indicates that header data should be preserved so rows can be indexed by header name. """ source = UrlSource(source) if isinstance(source, str) else source reader = ArffReader(cat_as_str, skip_encoding, lazy_encoding, header_indexing) self._source = Pipes.join(source, reader)
def test_unknown_scheme(self): with self.assertRaises(CobaException): UrlSource("irc://fail")
def test_no_scheme(self): url = "c:/users" self.assertIsInstance(UrlSource(url)._source, DiskSource) self.assertEqual(url, UrlSource(url)._source._filename)
def test_file_scheme(self): url = "file://c:/users" self.assertIsInstance(UrlSource(url)._source, DiskSource) self.assertEqual(url[7:], UrlSource(url)._source._filename)
def test_https_scheme(self): url = "https://www.google.com" self.assertIsInstance(UrlSource(url)._source, HttpSource) self.assertEqual(url, UrlSource(url)._source._url)
def __init__(self, arg) -> None: self._source = UrlSource(arg) if isinstance(arg, str) else arg