def _read_data(self, dataset, format=None): """Reads a data file and returns and iterable that can be used as testing or training data.""" # Attempt to detect file format if "format" isn't specified if not format: format_class = formats.detect(dataset) else: if format not in formats.AVAILABLE.keys(): raise ValueError("'{0}' format not supported.".format(format)) format_class = formats.AVAILABLE[format] return format_class(dataset).to_iterable()
def _read_data(self, dataset, format=None): """Reads a data file and returns an iterable that can be used as testing or training data. """ # Attempt to detect file format if "format" isn't specified if not format: format_class = formats.detect(dataset) if not format_class: raise FormatError("Could not automatically detect format for the given " "data source.") else: registry = formats.get_registry() if format not in registry.keys(): raise ValueError("'{0}' format not supported.".format(format)) format_class = registry[format] return format_class(dataset, **self.format_kwargs).to_iterable()
def _read_data(self, dataset, format=None): """Reads a data file and returns an iterable that can be used as testing or training data. """ # Attempt to detect file format if "format" isn't specified if not format: format_class = formats.detect(dataset) if not format_class: raise FormatError('Could not automatically detect format for the given ' 'data source.') else: registry = formats.get_registry() if format not in registry.keys(): raise ValueError("'{0}' format not supported.".format(format)) format_class = registry[format] return format_class(dataset, **self.format_kwargs).to_iterable()
def read_data(file, format=None, **kwargs): """Reads a data file and returns an iterable that can be used as testing or training data. Adapted from: https://github.com/sloria/TextBlob/blob/dev/textblob/classifiers.py#L128 """ # Attempt to detect file format if "format" isn't specified if not format: format_class = formats.detect(file) if not format_class: raise FormatError('Could not automatically detect format for the ' 'given data source.') else: registry = formats.get_registry() if format not in registry.keys(): raise ValueError("'{0}' format not supported.".format(format)) format_class = registry[format] return format_class(file, **kwargs).to_iterable()
def test_detect_json(self): format = formats.detect(JSON_FILE) assert_equal(format, formats.JSON)
def test_detect_csv(self): format = formats.detect(CSV_FILE) assert_equal(format, formats.CSV)
def test_detect_json(self): with open(JSON_FILE) as fp: format = formats.detect(fp) assert_equal(format, formats.JSON)
def test_detect_csv(self): with open(CSV_FILE) as fp: format = formats.detect(fp) assert_equal(format, formats.CSV)