def test_can_handle_tag_non_supported_asset_with_base_path(ncs_reader): tag = SensorTag("WEIRD-123", "UNKNOWN-ASSET") assert not ncs_reader.can_handle_tag(tag) ncs_reader_with_base = NcsReader(AzureDLFileSystemMock(), dl_base_path="/this/is/a/base/path") assert ncs_reader_with_base.can_handle_tag(tag)
def test_get_file_lookups(): with pytest.raises(ValueError): NcsReader.get_file_lookups([]) with pytest.raises(ValueError): NcsReader.get_file_lookups(["excel"]) file_lookups = NcsReader.get_file_lookups(["parquet"]) assert len(file_lookups) == 1 assert isinstance(file_lookups[0], NcsParquetLookup)
def test_with_conflicted_file_types(dates): ncs_reader = NcsReader(AzureDLFileSystemMock(), remove_status_codes=[0]) valid_tag_list = normalize_sensor_tags(["TRC-324"]) series_gen = ncs_reader.load_series(dates[0], dates[1], valid_tag_list) tags_series = [v for v in series_gen] assert len(tags_series) == 1 trc_324_series = tags_series[0] # Parquet file should be with 15 rows assert len(trc_324_series) == 15
def test_parquet_files_lookup(dates): ncs_reader = NcsReader(AzureDLFileSystemMock(), remove_status_codes=[0]) valid_tag_list = normalize_sensor_tags(["TRC-323"]) series_gen = ncs_reader.load_series(dates[0], dates[1], valid_tag_list) tags_series = [v for v in series_gen] assert len(tags_series) == 1 trc_323_series = tags_series[0] assert trc_323_series.name == "TRC-323" assert trc_323_series.dtype.name == "float64" assert len(trc_323_series) == 20
def test_load_series_with_filter_bad_data(dates, remove_status_codes): ncs_reader = NcsReader(AzureDLFileSystemMock(), remove_status_codes=remove_status_codes) valid_tag_list = normalize_sensor_tags(["TRC-322"]) series_gen = ncs_reader.load_series(dates[0], dates[1], valid_tag_list) # Checks if the bad data from the files under tests/gordo/data_provider/data/datalake/TRC-322 # are filtered out. 20 rows exists, 5 of then have the value 0. n_expected = 15 if remove_status_codes != [] else 20 assert all(len(series) == n_expected for series in series_gen)
def test_with_conflicted_file_types_with_preferable_csv(dates): ncs_reader = NcsReader(AzureDLFileSystemMock(), remove_status_codes=[0], lookup_for=["csv"]) valid_tag_list = normalize_sensor_tags(["TRC-324"]) series_gen = ncs_reader.load_series(dates[0], dates[1], valid_tag_list) tags_series = [v for v in series_gen] assert len(tags_series) == 1 trc_324_series = tags_series[0] # CSV file should be with 1 row assert len(trc_324_series) == 1
def test_load_series_need_base_path(ncs_reader, dates): tag = SensorTag("WEIRD-123", "BASE-PATH-ASSET") with pytest.raises(ValueError): for _ in ncs_reader.load_series(dates[0], dates[1], [tag]): pass path_to_weird_base_path_asset = os.path.join( os.path.dirname(os.path.realpath(__file__)), "data", "datalake", "base_path_asset", ) ncs_reader_with_base = NcsReader( AzureDLFileSystemMock(), dl_base_path=path_to_weird_base_path_asset) for tag_series in ncs_reader_with_base.load_series(dates[0], dates[1], [tag]): assert len(tag_series) == 20
def ncs_reader(): return NcsReader(AzureDLFileSystemMock())
def test_ncs_reader_valid_tag_path(): with pytest.raises(FileNotFoundError): NcsReader._verify_tag_path_exist(AzureDLFileSystemMock(), "not/valid/path")