def test_random_dataset_attrs(self): """ Test expected attributes """ start = dateutil.parser.isoparse("2017-12-25 06:00:00Z") end = dateutil.parser.isoparse("2017-12-29 06:00:00Z") dataset = RandomDataset( from_ts=start, to_ts=end, tag_list=[SensorTag("Tag 1", None), SensorTag("Tag 2", None)], ) self.assertTrue(isinstance(dataset, GordoBaseDataset)) self.assertTrue(hasattr(dataset, "get_data")) self.assertTrue(hasattr(dataset, "get_metadata")) X, y = dataset.get_data() self.assertTrue(isinstance(X, pd.DataFrame)) # y can either be None or an numpy array self.assertTrue(isinstance(y, pd.DataFrame) or y is None) metadata = dataset.get_metadata() self.assertTrue(isinstance(metadata, dict))
def test_row_filter(self): """Tests that row_filter filters away rows""" tag_list = [ SensorTag("Tag 1", None), SensorTag("Tag 2", None), SensorTag("Tag 3", None), ] start = dateutil.parser.isoparse("2017-12-25 06:00:00Z") end = dateutil.parser.isoparse("2017-12-29 06:00:00Z") X, _ = TimeSeriesDataset( MockDataSource(), start, end, tag_list=tag_list ).get_data() self.assertEqual(577, len(X)) X, _ = TimeSeriesDataset( MockDataSource(), start, end, tag_list=tag_list, row_filter="'Tag 1' < 5000" ).get_data() self.assertEqual(8, len(X)) X, _ = TimeSeriesDataset( MockDataSource(), start, end, tag_list=tag_list, row_filter="'Tag 1' / 'Tag 3' < 0.999", ).get_data() self.assertEqual(3, len(X))
def get_random_data(): data = { "type": "RandomDataset", "from_ts": dateutil.parser.isoparse("2017-12-25 06:00:00Z"), "to_ts": dateutil.parser.isoparse("2017-12-30 06:00:00Z"), "tag_list": [SensorTag("Tag 1", None), SensorTag("Tag 2", None)], } return data
def test_load_from_multiple_providers(self): """ Two tags, each belonging to different data producers, and both gets loaded """ series_collection = list( load_series_from_multiple_providers( [self.ab_producer, self.containing_b_producer], None, None, [SensorTag("abba", None), SensorTag("cba", None)], )) self.assertEqual(series_collection[0].name, "ab.*") self.assertEqual(series_collection[1].name, ".*b.*")
def test_load_multiple_raises_with_no_matches(self): """If no provider matches a tag then load_series_from_multiple_providers raises a ValueError when the generator is realized""" with self.assertRaises(ValueError): list( load_series_from_multiple_providers( [self.ab_producer, self.containing_b_producer], None, None, [ SensorTag("ab", None), SensorTag("tag_not_matching_any_of_the_regexps", None), ], ))
def test_time_series_no_resolution(): kwargs = dict( data_provider=MockDataSource(), tag_list=[ SensorTag("Tag 1", None), SensorTag("Tag 2", None), SensorTag("Tag 3", None), ], from_ts=dateutil.parser.isoparse("2017-12-25 06:00:00Z"), to_ts=dateutil.parser.isoparse("2017-12-29 06:00:00Z"), ) no_resolution, _ = TimeSeriesDataset(resolution=None, **kwargs).get_data() wi_resolution, _ = TimeSeriesDataset(resolution="10T", **kwargs).get_data() assert len(no_resolution) > len(wi_resolution)
def test_load_series_need_asset_hint(dates, ncs_reader): with pytest.raises(ValueError): for _ in ncs_reader.load_series(dates[0], dates[1], [SensorTag("XYZ-123", None)]): pass path_to_xyz = os.path.join(os.path.dirname(os.path.realpath(__file__)), "data", "datalake", "gordoplatform") with patch( "gordo_components.data_provider.ncs_reader.NcsReader.ASSET_TO_PATH", {"gordoplatform": path_to_xyz}, ): valid_tag_list_with_asset = [SensorTag("XYZ-123", "gordoplatform")] for frame in ncs_reader.load_series(dates[0], dates[1], valid_tag_list_with_asset): assert len(frame) == 20
def test_can_handle_tag_non_supported_asset_with_base_path(ncs_reader): tag = SensorTag("WEIRD-123", "UNKNOWN-ASSET") assert not ncs_reader.can_handle_tag(tag) ncs_reader_with_base = NcsReader( AzureDLFileSystemMock(), dl_base_path="/this/is/a/base/path" ) assert ncs_reader_with_base.can_handle_tag(tag)
def test_aggregation_methods(self): """Tests that it works to set aggregation method(s)""" tag_list = [ SensorTag("Tag 1", None), SensorTag("Tag 2", None), SensorTag("Tag 3", None), ] start = dateutil.parser.isoparse("2017-12-25 06:00:00Z") end = dateutil.parser.isoparse("2017-12-29 06:00:00Z") # Default aggregation gives no extra columns X, _ = TimeSeriesDataset(MockDataSource(), start, end, tag_list=tag_list).get_data() self.assertEqual((577, 3), X.shape) # The default single aggregation method gives the tag-names as columns self.assertEqual(list(X.columns), ["Tag 1", "Tag 2", "Tag 3"]) # Using two aggregation methods give a multi-level column with tag-names # on top and aggregation_method as second level X, _ = TimeSeriesDataset( MockDataSource(), start, end, tag_list=tag_list, aggregation_methods=["mean", "max"], ).get_data() self.assertEqual((577, 6), X.shape) self.assertEqual( list(X.columns), [ ("Tag 1", "mean"), ("Tag 1", "max"), ("Tag 2", "mean"), ("Tag 2", "max"), ("Tag 3", "mean"), ("Tag 3", "max"), ], )
def test_load_multiple_matches_loads_from_first(self): """When a tag can be read from multiple providers it is the first provider in the list of providers which gets the job""" series_collection = list( load_series_from_multiple_providers( [self.ab_producer, self.containing_b_producer], None, None, [SensorTag("abba", None)], )) self.assertEqual(series_collection[0].name, "ab.*")
def test_load_series_checks_date(self): """load_series will raise ValueError if to_ts<from_ts""" iroc_reader = IrocReader(client=None, threads=1) with self.assertRaises(ValueError): list( iroc_reader.load_series( from_ts=isoparse("2018-05-03T01:56:00+00:00"), to_ts=isoparse("2018-05-02T01:56:00+00:00"), tag_list=[SensorTag("jalla", None)], # Not a tag in the input ) )
def test_load_series_no_data(self, _mocked_method): """load_series will raise ValueError if it does not find any tags""" iroc_reader = IrocReader(client=None, threads=1) with self.assertRaises(ValueError): list( iroc_reader.load_series( from_ts=isoparse("2018-05-02T01:56:00+00:00"), to_ts=isoparse("2018-05-03T01:56:00+00:00"), tag_list=[SensorTag("jalla", None)], # Not a tag in the input ) )
def test_load_series_missing_columns_data(self, _mocked_method): """load_series will raise ValueError if there is a single tag it can not find""" iroc_reader = IrocReader(client=None, threads=1) with self.assertRaises(ValueError): list( iroc_reader.load_series( from_ts=isoparse("2018-05-02T01:56:00+00:00"), to_ts=isoparse("2018-05-03T01:56:00+00:00"), tag_list=IROC_HAPPY_TAG_LIST + [SensorTag("jalla", None)], # "jalla" is not a tag ))
def test_row_filter(self): """Tests that row_filter filters away rows""" kwargs = dict( data_provider=MockDataSource(), tag_list=[ SensorTag("Tag 1", None), SensorTag("Tag 2", None), SensorTag("Tag 3", None), ], from_ts=dateutil.parser.isoparse("2017-12-25 06:00:00Z"), to_ts=dateutil.parser.isoparse("2017-12-29 06:00:00Z"), ) X, _ = TimeSeriesDataset(**kwargs).get_data() self.assertEqual(577, len(X)) X, _ = TimeSeriesDataset(row_filter="'Tag 1' < 5000", **kwargs).get_data() self.assertEqual(8, len(X)) X, _ = TimeSeriesDataset( row_filter="'Tag 1' / 'Tag 3' < 0.999", **kwargs ).get_data() self.assertEqual(3, len(X))
def test_load_series_need_base_path(ncs_reader, dates): tag = SensorTag("WEIRD-123", "BASE-PATH-ASSET") with pytest.raises(ValueError): for _ in ncs_reader.load_series(dates[0], dates[1], [tag]): pass path_to_weird_base_path_asset = os.path.join( os.path.dirname(os.path.realpath(__file__)), "data", "datalake", "base_path_asset", ) ncs_reader_with_base = NcsReader( AzureDLFileSystemMock(), dl_base_path=path_to_weird_base_path_asset ) for tag_series in ncs_reader_with_base.load_series(dates[0], dates[1], [tag]): assert len(tag_series) == 20
import pandas as pd import responses import requests from asynctest import mock as async_mock from influxdb import InfluxDBClient from flask import Request from gordo_components.model import models from gordo_components.watchman import server as watchman_server from gordo_components.dataset.sensor_tag import SensorTag from gordo_components.dataset.sensor_tag import to_list_of_strings logger = logging.getLogger(__name__) SENSORTAG_LIST = [SensorTag(f"tag-{i}", None) for i in range(4)] SENSORS_STR_LIST = to_list_of_strings(SENSORTAG_LIST) INFLUXDB_NAME = "testdb" INFLUXDB_USER = "******" INFLUXDB_PASSWORD = "******" INFLUXDB_MEASUREMENT = "sensors" INFLUXDB_URI = f"{INFLUXDB_USER}:{INFLUXDB_PASSWORD}@localhost:8086/{INFLUXDB_NAME}" INFLUXDB_FIXTURE_ARGS = ( SENSORS_STR_LIST, INFLUXDB_NAME, INFLUXDB_USER, INFLUXDB_PASSWORD, SENSORS_STR_LIST, )
self.assertGreaterEqual( len(dirs), 1, msg="Expected saving of model to create at " f"least one subdir, but got {len(dirs)}", ) @pytest.mark.parametrize( "should_be_equal,metadata,tag_list", [ (True, None, None), (False, { "metadata": "something" }, None), (False, None, [SensorTag("extra_tag", None)]), ], ) def test_provide_saved_model_caching(should_be_equal: bool, metadata: Optional[Dict], tag_list: Optional[List[SensorTag]]): """ Test provide_saved_model with caching and possible cache busting if metadata or tag_list is set. Parameters ---------- should_be_equal : bool Should the two generated models be at the same location or not? metadata Optional metadata which will be used as metadata instead of the default
TAG_NAME1 = "MyBeautifulTag1" TAG_NAME2 = "MyBeautifulTag2" asset_nonsense = "ImaginaryAsset" @pytest.mark.parametrize( "good_input_tags,asset,expected_output_tags", [ ( [ {"name": TAG_NAME1, "asset": asset_nonsense}, {"name": TAG_NAME2, "asset": asset_nonsense}, ], "ThisAssetCodeWillBeIgnored", [ SensorTag(TAG_NAME1, asset_nonsense), SensorTag(TAG_NAME2, asset_nonsense), ], ), ( ["TRC-123", "GRA-214", "ASGB-212"], "ThisWillBeTheAsset", [ SensorTag("TRC-123", "ThisWillBeTheAsset"), SensorTag("GRA-214", "ThisWillBeTheAsset"), SensorTag("ASGB-212", "ThisWillBeTheAsset"), ], ), ( ["TRC-123", "GRA-214", "ASGB-212"], None, # Will deduce asset
def ncs_reader(): return NcsReader(AzureDLFileSystemMock()) @pytest.fixture def dates(): return ( dateutil.parser.isoparse("2000-01-01T08:56:00+00:00"), dateutil.parser.isoparse("2001-09-01T10:01:00+00:00"), ) @pytest.mark.parametrize( "tag_to_check", [normalize_sensor_tags(["TRC-123"])[0], SensorTag("XYZ-123", "1776-TROC")], ) def test_can_handle_tag_ok(tag_to_check, ncs_reader): assert ncs_reader.can_handle_tag(tag_to_check) @pytest.mark.parametrize( "tag_to_check", [SensorTag("TRC-123", None), SensorTag("XYZ-123", "123-XXX")]) def test_can_handle_tag_notok(tag_to_check, ncs_reader): assert not ncs_reader.can_handle_tag(tag_to_check) def test_can_handle_tag_unknow_prefix_raise(ncs_reader): with pytest.raises(ValueError):
import unittest from io import StringIO from unittest import mock from dateutil.parser import isoparse # type: ignore from gordo_components.data_provider.iroc_reader import IrocReader, read_iroc_file from gordo_components.dataset.sensor_tag import normalize_sensor_tags from gordo_components.dataset.sensor_tag import SensorTag IROC_HAPPY_TAG_LIST = [ SensorTag("NINENINE.OPCIS::NNFCDPC01.AI1410J0", "NINENINE"), SensorTag("NINENINE.OPCIS::NNFCDPC01.AI1840C1J0", "NINENINE"), SensorTag("NINENINE.OPCIS::NNFCDPC01.AI1840E1J0", "NINENINE"), ] HAPPY_FROM_TS = isoparse("2018-05-02T01:56:00+00:00") HAPPY_TO_TS = isoparse("2018-05-03T01:56:00+00:00") IROC_MANY_ASSETS_TAG_LIST = [ "NINENINE.OPCIS::NNFCDPC01.AI1410J0", "NINENINE.OPCIS::NNFCDPC01.AI1840C1J0", "NINENINE.OPCIS::NNFCDPC01.AI1840E1J0", "UON_EF.OPCIS::LO006-B1H.PRCASXIN", "UON_EF.OPCIS::LO006-B1H.PRTUBXIN", "UON_EF.OPCIS::LO006-B1H_M1.PRSTAXIN", "UON_EF.OPCIS::LO006-B1H_M1.RTGASDIN", ] IROC_NO_ASSET_TAG_LIST = [ SensorTag("NOT.OPCIS::NNFCDPC01.AI1410J0", "NOT"),
def test_can_handle_tag_ok(self): iroc_reader = IrocReader(client=None, threads=1) assert iroc_reader.can_handle_tag(SensorTag("UON_EF.xxx", "UON_EF"))
import unittest from io import StringIO from unittest import mock from dateutil.parser import isoparse # type: ignore from gordo_components.data_provider.iroc_reader import IrocReader, read_iroc_file from gordo_components.dataset.sensor_tag import SensorTag IROC_HAPPY_TAG_LIST = [ SensorTag("NINENINE.OPCIS::NNFCDPC01.AI1410J0", None), SensorTag("NINENINE.OPCIS::NNFCDPC01.AI1840C1J0", None), SensorTag("NINENINE.OPCIS::NNFCDPC01.AI1840E1J0", None), ] HAPPY_FROM_TS = isoparse("2018-05-02T01:56:00+00:00") HAPPY_TO_TS = isoparse("2018-05-03T01:56:00+00:00") # Functioning CSV for IROC. Has 6 lines, 5 different timestamps # (2018-05-02T06:44:29.7830000Z occurs twice) and 3 different tags. IROC_HAPPY_PATH_CSV = u"""tag,value,timestamp,status NINENINE.OPCIS::NNFCDPC01.AI1410J0,5,2018-05-02T06:00:11.3860000Z,Analog Normal NINENINE.OPCIS::NNFCDPC01.AI1410J0,76.86899,2018-05-02T06:44:29.7830000Z,Analog Normal NINENINE.OPCIS::NNFCDPC01.AI1840C1J0,-23.147645,2018-05-02T06:43:53.8490000Z,Analog Normal NINENINE.OPCIS::NNFCDPC01.AI1840C1J0,-10.518037,2018-05-02T06:44:29.9130000Z,Analog Normal NINENINE.OPCIS::NNFCDPC01.AI1840E1J0,48.92137,2018-05-02T06:43:59.7240000Z,Analog Normal NINENINE.OPCIS::NNFCDPC01.AI1840E1J0,-0.497645,2018-05-02T06:44:29.7830000Z,Analog Normal """ class IrocDataSourceTestCase(unittest.TestCase):
def test_can_handle_tag_unknown_asset(self): iroc_reader = IrocReader(client=None, threads=1) assert not iroc_reader.can_handle_tag( SensorTag("UON_EF.xxx", "UNKNOWǸ_ASSET"))
def test_can_handle_tag_no_asset(self): iroc_reader = IrocReader(client=None, threads=1) assert not iroc_reader.can_handle_tag(SensorTag("UON_EF.xxx", None))
@pytest.fixture def ncs_reader(): return NcsReader(AzureDLFileSystemMock()) @pytest.fixture def dates(): return ( dateutil.parser.isoparse("2000-01-01T08:56:00+00:00"), dateutil.parser.isoparse("2001-09-01T10:01:00+00:00"), ) @pytest.mark.parametrize( "tag_to_check", [normalize_sensor_tags(["TRC-123"])[0], SensorTag("XYZ-123", "1776-TROC")], ) def test_can_handle_tag_ok(tag_to_check, ncs_reader): assert ncs_reader.can_handle_tag(tag_to_check) @pytest.mark.parametrize( "tag_to_check", [SensorTag("TRC-123", None), SensorTag("XYZ-123", "123-XXX")] ) def test_can_handle_tag_notok(tag_to_check, ncs_reader): assert not ncs_reader.can_handle_tag(tag_to_check) def test_can_handle_tag_unknow_prefix_raise(ncs_reader): with pytest.raises(ValueError): ncs_reader.can_handle_tag(normalize_sensor_tags(["XYZ-123"])[0])
[ ("Tag 1", "mean"), ("Tag 1", "max"), ("Tag 2", "mean"), ("Tag 2", "max"), ("Tag 3", "mean"), ("Tag 3", "max"), ], ) @pytest.mark.parametrize( "tag_list", [ [ SensorTag("Tag 1", None), SensorTag("Tag 2", None), SensorTag("Tag 3", None) ], [SensorTag("Tag 1", None)], ], ) @pytest.mark.parametrize( "target_tag_list", [ [ SensorTag("Tag 2", None), SensorTag("Tag 1", None), SensorTag("Tag 3", None) ], [SensorTag("Tag 1", None)],