Exemplo n.º 1
0
    def __init__(self, source: Union[str, Source[Iterable[str]]]) -> None:
        """Instantiate a LibsvmSource.

        Args:
            source: The data source. Accepts either a string representing the source location or another Source.
        """
        source = UrlSource(source) if isinstance(source, str) else source
        reader = LibsvmReader()
        self._source = Pipes.join(source, reader)
Exemplo n.º 2
0
    def __init__(
        self, source: Union[str, Source[Iterable[str]],
                            SimulatedEnvironment]) -> None:

        if isinstance(source, str):
            self._source = UrlSource(source)
        elif isinstance(source, SimulatedEnvironment):
            self._source = self._make_serialized_source(source)
        else:
            self._source = source

        self._decoder = JsonDecode()
Exemplo n.º 3
0
    def __init__(self,
                 source: Union[str, Source[Iterable[str]]],
                 has_header: bool = False,
                 **dialect) -> None:
        """Instantiate a CsvSource.

        Args:
            source: The data source. Accepts either a string representing the source location or another Source.
            has_header: Indicates if the CSV files has a header row. 
        """
        source = UrlSource(source) if isinstance(source, str) else source
        reader = CsvReader(has_header, **dialect)
        self._source = Pipes.join(source, reader)
Exemplo n.º 4
0
    def __init__(self,
                 source: Union[str, Source[Iterable[str]]],
                 cat_as_str: bool = False,
                 skip_encoding: bool = False,
                 lazy_encoding: bool = True,
                 header_indexing: bool = True) -> None:
        """Instantiate an ArffSource.

        Args:
            source: The data source. Accepts either a string representing the source location or another Source.
            cat_as_str: Indicates that categorical features should be encoded as a string rather than one hot encoded. 
            skip_encoding: Indicates that features should not be encoded (this means all features will be strings).
            lazy_encoding: Indicates that features should be encoded lazily (this can save time if rows will be dropped).
            header_indexing: Indicates that header data should be preserved so rows can be indexed by header name. 
        """
        source = UrlSource(source) if isinstance(source, str) else source
        reader = ArffReader(cat_as_str, skip_encoding, lazy_encoding,
                            header_indexing)
        self._source = Pipes.join(source, reader)
Exemplo n.º 5
0
 def test_unknown_scheme(self):
     with self.assertRaises(CobaException):
         UrlSource("irc://fail")
Exemplo n.º 6
0
 def test_no_scheme(self):
     url = "c:/users"
     self.assertIsInstance(UrlSource(url)._source, DiskSource)
     self.assertEqual(url, UrlSource(url)._source._filename)
Exemplo n.º 7
0
 def test_file_scheme(self):
     url = "file://c:/users"
     self.assertIsInstance(UrlSource(url)._source, DiskSource)
     self.assertEqual(url[7:], UrlSource(url)._source._filename)
Exemplo n.º 8
0
 def test_https_scheme(self):
     url = "https://www.google.com"
     self.assertIsInstance(UrlSource(url)._source, HttpSource)
     self.assertEqual(url, UrlSource(url)._source._url)
Exemplo n.º 9
0
    def __init__(self, arg) -> None:

        self._source = UrlSource(arg) if isinstance(arg, str) else arg