Пример #1
0
    def test_random_dataset_attrs(self):
        """
        Test expected attributes
        """

        start = dateutil.parser.isoparse("2017-12-25 06:00:00Z")
        end = dateutil.parser.isoparse("2017-12-29 06:00:00Z")

        dataset = RandomDataset(
            from_ts=start,
            to_ts=end,
            tag_list=[SensorTag("Tag 1", None), SensorTag("Tag 2", None)],
        )

        self.assertTrue(isinstance(dataset, GordoBaseDataset))
        self.assertTrue(hasattr(dataset, "get_data"))
        self.assertTrue(hasattr(dataset, "get_metadata"))

        X, y = dataset.get_data()
        self.assertTrue(isinstance(X, pd.DataFrame))

        # y can either be None or an numpy array
        self.assertTrue(isinstance(y, pd.DataFrame) or y is None)

        metadata = dataset.get_metadata()
        self.assertTrue(isinstance(metadata, dict))
Пример #2
0
    def test_row_filter(self):
        """Tests that row_filter filters away rows"""

        tag_list = [
            SensorTag("Tag 1", None),
            SensorTag("Tag 2", None),
            SensorTag("Tag 3", None),
        ]
        start = dateutil.parser.isoparse("2017-12-25 06:00:00Z")
        end = dateutil.parser.isoparse("2017-12-29 06:00:00Z")
        X, _ = TimeSeriesDataset(
            MockDataSource(), start, end, tag_list=tag_list
        ).get_data()

        self.assertEqual(577, len(X))

        X, _ = TimeSeriesDataset(
            MockDataSource(), start, end, tag_list=tag_list, row_filter="'Tag 1' < 5000"
        ).get_data()

        self.assertEqual(8, len(X))

        X, _ = TimeSeriesDataset(
            MockDataSource(),
            start,
            end,
            tag_list=tag_list,
            row_filter="'Tag 1' / 'Tag 3' < 0.999",
        ).get_data()

        self.assertEqual(3, len(X))
Пример #3
0
def get_random_data():
    data = {
        "type": "RandomDataset",
        "from_ts": dateutil.parser.isoparse("2017-12-25 06:00:00Z"),
        "to_ts": dateutil.parser.isoparse("2017-12-30 06:00:00Z"),
        "tag_list": [SensorTag("Tag 1", None),
                     SensorTag("Tag 2", None)],
    }
    return data
 def test_load_from_multiple_providers(self):
     """ Two tags, each belonging to different data producers, and both gets loaded
     """
     series_collection = list(
         load_series_from_multiple_providers(
             [self.ab_producer, self.containing_b_producer],
             None,
             None,
             [SensorTag("abba", None),
              SensorTag("cba", None)],
         ))
     self.assertEqual(series_collection[0].name, "ab.*")
     self.assertEqual(series_collection[1].name, ".*b.*")
 def test_load_multiple_raises_with_no_matches(self):
     """If no provider matches a tag then load_series_from_multiple_providers
     raises a ValueError when the generator is realized"""
     with self.assertRaises(ValueError):
         list(
             load_series_from_multiple_providers(
                 [self.ab_producer, self.containing_b_producer],
                 None,
                 None,
                 [
                     SensorTag("ab", None),
                     SensorTag("tag_not_matching_any_of_the_regexps", None),
                 ],
             ))
Пример #6
0
def test_time_series_no_resolution():
    kwargs = dict(
        data_provider=MockDataSource(),
        tag_list=[
            SensorTag("Tag 1", None),
            SensorTag("Tag 2", None),
            SensorTag("Tag 3", None),
        ],
        from_ts=dateutil.parser.isoparse("2017-12-25 06:00:00Z"),
        to_ts=dateutil.parser.isoparse("2017-12-29 06:00:00Z"),
    )
    no_resolution, _ = TimeSeriesDataset(resolution=None, **kwargs).get_data()
    wi_resolution, _ = TimeSeriesDataset(resolution="10T", **kwargs).get_data()
    assert len(no_resolution) > len(wi_resolution)
Пример #7
0
def test_load_series_need_asset_hint(dates, ncs_reader):
    with pytest.raises(ValueError):
        for _ in ncs_reader.load_series(dates[0], dates[1],
                                        [SensorTag("XYZ-123", None)]):
            pass

    path_to_xyz = os.path.join(os.path.dirname(os.path.realpath(__file__)),
                               "data", "datalake", "gordoplatform")
    with patch(
            "gordo_components.data_provider.ncs_reader.NcsReader.ASSET_TO_PATH",
        {"gordoplatform": path_to_xyz},
    ):
        valid_tag_list_with_asset = [SensorTag("XYZ-123", "gordoplatform")]
        for frame in ncs_reader.load_series(dates[0], dates[1],
                                            valid_tag_list_with_asset):
            assert len(frame) == 20
def test_can_handle_tag_non_supported_asset_with_base_path(ncs_reader):
    tag = SensorTag("WEIRD-123", "UNKNOWN-ASSET")
    assert not ncs_reader.can_handle_tag(tag)

    ncs_reader_with_base = NcsReader(
        AzureDLFileSystemMock(), dl_base_path="/this/is/a/base/path"
    )
    assert ncs_reader_with_base.can_handle_tag(tag)
Пример #9
0
    def test_aggregation_methods(self):
        """Tests that it works to set aggregation method(s)"""

        tag_list = [
            SensorTag("Tag 1", None),
            SensorTag("Tag 2", None),
            SensorTag("Tag 3", None),
        ]
        start = dateutil.parser.isoparse("2017-12-25 06:00:00Z")
        end = dateutil.parser.isoparse("2017-12-29 06:00:00Z")

        # Default aggregation gives no extra columns
        X, _ = TimeSeriesDataset(MockDataSource(),
                                 start,
                                 end,
                                 tag_list=tag_list).get_data()

        self.assertEqual((577, 3), X.shape)
        # The default single aggregation method gives the tag-names as columns
        self.assertEqual(list(X.columns), ["Tag 1", "Tag 2", "Tag 3"])

        # Using two aggregation methods give a multi-level column with tag-names
        # on top and aggregation_method as second level
        X, _ = TimeSeriesDataset(
            MockDataSource(),
            start,
            end,
            tag_list=tag_list,
            aggregation_methods=["mean", "max"],
        ).get_data()

        self.assertEqual((577, 6), X.shape)
        self.assertEqual(
            list(X.columns),
            [
                ("Tag 1", "mean"),
                ("Tag 1", "max"),
                ("Tag 2", "mean"),
                ("Tag 2", "max"),
                ("Tag 3", "mean"),
                ("Tag 3", "max"),
            ],
        )
Пример #10
0
 def test_load_multiple_matches_loads_from_first(self):
     """When a tag can be read from multiple providers it is the first provider in
     the list of providers which gets the job"""
     series_collection = list(
         load_series_from_multiple_providers(
             [self.ab_producer, self.containing_b_producer],
             None,
             None,
             [SensorTag("abba", None)],
         ))
     self.assertEqual(series_collection[0].name, "ab.*")
 def test_load_series_checks_date(self):
     """load_series will raise ValueError if to_ts<from_ts"""
     iroc_reader = IrocReader(client=None, threads=1)
     with self.assertRaises(ValueError):
         list(
             iroc_reader.load_series(
                 from_ts=isoparse("2018-05-03T01:56:00+00:00"),
                 to_ts=isoparse("2018-05-02T01:56:00+00:00"),
                 tag_list=[SensorTag("jalla", None)],  # Not a tag in the input
             )
         )
 def test_load_series_no_data(self, _mocked_method):
     """load_series will raise ValueError if it does not find any tags"""
     iroc_reader = IrocReader(client=None, threads=1)
     with self.assertRaises(ValueError):
         list(
             iroc_reader.load_series(
                 from_ts=isoparse("2018-05-02T01:56:00+00:00"),
                 to_ts=isoparse("2018-05-03T01:56:00+00:00"),
                 tag_list=[SensorTag("jalla", None)],  # Not a tag in the input
             )
         )
Пример #13
0
 def test_load_series_missing_columns_data(self, _mocked_method):
     """load_series will raise ValueError if there is a single tag it can not
     find"""
     iroc_reader = IrocReader(client=None, threads=1)
     with self.assertRaises(ValueError):
         list(
             iroc_reader.load_series(
                 from_ts=isoparse("2018-05-02T01:56:00+00:00"),
                 to_ts=isoparse("2018-05-03T01:56:00+00:00"),
                 tag_list=IROC_HAPPY_TAG_LIST + [SensorTag("jalla", None)],
                 # "jalla" is not a tag
             ))
Пример #14
0
    def test_row_filter(self):
        """Tests that row_filter filters away rows"""
        kwargs = dict(
            data_provider=MockDataSource(),
            tag_list=[
                SensorTag("Tag 1", None),
                SensorTag("Tag 2", None),
                SensorTag("Tag 3", None),
            ],
            from_ts=dateutil.parser.isoparse("2017-12-25 06:00:00Z"),
            to_ts=dateutil.parser.isoparse("2017-12-29 06:00:00Z"),
        )
        X, _ = TimeSeriesDataset(**kwargs).get_data()
        self.assertEqual(577, len(X))

        X, _ = TimeSeriesDataset(row_filter="'Tag 1' < 5000", **kwargs).get_data()
        self.assertEqual(8, len(X))

        X, _ = TimeSeriesDataset(
            row_filter="'Tag 1' / 'Tag 3' < 0.999", **kwargs
        ).get_data()
        self.assertEqual(3, len(X))
def test_load_series_need_base_path(ncs_reader, dates):
    tag = SensorTag("WEIRD-123", "BASE-PATH-ASSET")
    with pytest.raises(ValueError):
        for _ in ncs_reader.load_series(dates[0], dates[1], [tag]):
            pass

    path_to_weird_base_path_asset = os.path.join(
        os.path.dirname(os.path.realpath(__file__)),
        "data",
        "datalake",
        "base_path_asset",
    )
    ncs_reader_with_base = NcsReader(
        AzureDLFileSystemMock(), dl_base_path=path_to_weird_base_path_asset
    )
    for tag_series in ncs_reader_with_base.load_series(dates[0], dates[1], [tag]):
        assert len(tag_series) == 20
Пример #16
0
import pandas as pd

import responses
import requests
from asynctest import mock as async_mock
from influxdb import InfluxDBClient
from flask import Request

from gordo_components.model import models
from gordo_components.watchman import server as watchman_server
from gordo_components.dataset.sensor_tag import SensorTag
from gordo_components.dataset.sensor_tag import to_list_of_strings

logger = logging.getLogger(__name__)

SENSORTAG_LIST = [SensorTag(f"tag-{i}", None) for i in range(4)]
SENSORS_STR_LIST = to_list_of_strings(SENSORTAG_LIST)
INFLUXDB_NAME = "testdb"
INFLUXDB_USER = "******"
INFLUXDB_PASSWORD = "******"
INFLUXDB_MEASUREMENT = "sensors"

INFLUXDB_URI = f"{INFLUXDB_USER}:{INFLUXDB_PASSWORD}@localhost:8086/{INFLUXDB_NAME}"

INFLUXDB_FIXTURE_ARGS = (
    SENSORS_STR_LIST,
    INFLUXDB_NAME,
    INFLUXDB_USER,
    INFLUXDB_PASSWORD,
    SENSORS_STR_LIST,
)
Пример #17
0
            self.assertGreaterEqual(
                len(dirs),
                1,
                msg="Expected saving of model to create at "
                f"least one subdir, but got {len(dirs)}",
            )


@pytest.mark.parametrize(
    "should_be_equal,metadata,tag_list",
    [
        (True, None, None),
        (False, {
            "metadata": "something"
        }, None),
        (False, None, [SensorTag("extra_tag", None)]),
    ],
)
def test_provide_saved_model_caching(should_be_equal: bool,
                                     metadata: Optional[Dict],
                                     tag_list: Optional[List[SensorTag]]):
    """
    Test provide_saved_model with caching and possible cache busting if metadata or
    tag_list is set.

    Parameters
    ----------
    should_be_equal : bool
        Should the two generated models be at the same location or not?
    metadata
        Optional metadata which will be used as metadata instead of the default
TAG_NAME1 = "MyBeautifulTag1"
TAG_NAME2 = "MyBeautifulTag2"
asset_nonsense = "ImaginaryAsset"


@pytest.mark.parametrize(
    "good_input_tags,asset,expected_output_tags",
    [
        (
            [
                {"name": TAG_NAME1, "asset": asset_nonsense},
                {"name": TAG_NAME2, "asset": asset_nonsense},
            ],
            "ThisAssetCodeWillBeIgnored",
            [
                SensorTag(TAG_NAME1, asset_nonsense),
                SensorTag(TAG_NAME2, asset_nonsense),
            ],
        ),
        (
            ["TRC-123", "GRA-214", "ASGB-212"],
            "ThisWillBeTheAsset",
            [
                SensorTag("TRC-123", "ThisWillBeTheAsset"),
                SensorTag("GRA-214", "ThisWillBeTheAsset"),
                SensorTag("ASGB-212", "ThisWillBeTheAsset"),
            ],
        ),
        (
            ["TRC-123", "GRA-214", "ASGB-212"],
            None,  # Will deduce asset
Пример #19
0
def ncs_reader():
    return NcsReader(AzureDLFileSystemMock())


@pytest.fixture
def dates():
    return (
        dateutil.parser.isoparse("2000-01-01T08:56:00+00:00"),
        dateutil.parser.isoparse("2001-09-01T10:01:00+00:00"),
    )


@pytest.mark.parametrize(
    "tag_to_check",
    [normalize_sensor_tags(["TRC-123"])[0],
     SensorTag("XYZ-123", "1776-TROC")],
)
def test_can_handle_tag_ok(tag_to_check, ncs_reader):
    assert ncs_reader.can_handle_tag(tag_to_check)


@pytest.mark.parametrize(
    "tag_to_check",
    [SensorTag("TRC-123", None),
     SensorTag("XYZ-123", "123-XXX")])
def test_can_handle_tag_notok(tag_to_check, ncs_reader):
    assert not ncs_reader.can_handle_tag(tag_to_check)


def test_can_handle_tag_unknow_prefix_raise(ncs_reader):
    with pytest.raises(ValueError):
Пример #20
0
import unittest
from io import StringIO
from unittest import mock

from dateutil.parser import isoparse  # type: ignore

from gordo_components.data_provider.iroc_reader import IrocReader, read_iroc_file
from gordo_components.dataset.sensor_tag import normalize_sensor_tags
from gordo_components.dataset.sensor_tag import SensorTag

IROC_HAPPY_TAG_LIST = [
    SensorTag("NINENINE.OPCIS::NNFCDPC01.AI1410J0", "NINENINE"),
    SensorTag("NINENINE.OPCIS::NNFCDPC01.AI1840C1J0", "NINENINE"),
    SensorTag("NINENINE.OPCIS::NNFCDPC01.AI1840E1J0", "NINENINE"),
]

HAPPY_FROM_TS = isoparse("2018-05-02T01:56:00+00:00")
HAPPY_TO_TS = isoparse("2018-05-03T01:56:00+00:00")

IROC_MANY_ASSETS_TAG_LIST = [
    "NINENINE.OPCIS::NNFCDPC01.AI1410J0",
    "NINENINE.OPCIS::NNFCDPC01.AI1840C1J0",
    "NINENINE.OPCIS::NNFCDPC01.AI1840E1J0",
    "UON_EF.OPCIS::LO006-B1H.PRCASXIN",
    "UON_EF.OPCIS::LO006-B1H.PRTUBXIN",
    "UON_EF.OPCIS::LO006-B1H_M1.PRSTAXIN",
    "UON_EF.OPCIS::LO006-B1H_M1.RTGASDIN",
]

IROC_NO_ASSET_TAG_LIST = [
    SensorTag("NOT.OPCIS::NNFCDPC01.AI1410J0", "NOT"),
Пример #21
0
 def test_can_handle_tag_ok(self):
     iroc_reader = IrocReader(client=None, threads=1)
     assert iroc_reader.can_handle_tag(SensorTag("UON_EF.xxx", "UON_EF"))
import unittest
from io import StringIO
from unittest import mock

from dateutil.parser import isoparse  # type: ignore

from gordo_components.data_provider.iroc_reader import IrocReader, read_iroc_file
from gordo_components.dataset.sensor_tag import SensorTag

IROC_HAPPY_TAG_LIST = [
    SensorTag("NINENINE.OPCIS::NNFCDPC01.AI1410J0", None),
    SensorTag("NINENINE.OPCIS::NNFCDPC01.AI1840C1J0", None),
    SensorTag("NINENINE.OPCIS::NNFCDPC01.AI1840E1J0", None),
]

HAPPY_FROM_TS = isoparse("2018-05-02T01:56:00+00:00")
HAPPY_TO_TS = isoparse("2018-05-03T01:56:00+00:00")

# Functioning CSV for IROC. Has 6 lines, 5 different timestamps
# (2018-05-02T06:44:29.7830000Z occurs twice) and 3 different tags.
IROC_HAPPY_PATH_CSV = u"""tag,value,timestamp,status
NINENINE.OPCIS::NNFCDPC01.AI1410J0,5,2018-05-02T06:00:11.3860000Z,Analog Normal
NINENINE.OPCIS::NNFCDPC01.AI1410J0,76.86899,2018-05-02T06:44:29.7830000Z,Analog Normal
NINENINE.OPCIS::NNFCDPC01.AI1840C1J0,-23.147645,2018-05-02T06:43:53.8490000Z,Analog Normal
NINENINE.OPCIS::NNFCDPC01.AI1840C1J0,-10.518037,2018-05-02T06:44:29.9130000Z,Analog Normal
NINENINE.OPCIS::NNFCDPC01.AI1840E1J0,48.92137,2018-05-02T06:43:59.7240000Z,Analog Normal
NINENINE.OPCIS::NNFCDPC01.AI1840E1J0,-0.497645,2018-05-02T06:44:29.7830000Z,Analog Normal
                """


class IrocDataSourceTestCase(unittest.TestCase):
Пример #23
0
 def test_can_handle_tag_unknown_asset(self):
     iroc_reader = IrocReader(client=None, threads=1)
     assert not iroc_reader.can_handle_tag(
         SensorTag("UON_EF.xxx", "UNKNOWǸ_ASSET"))
Пример #24
0
 def test_can_handle_tag_no_asset(self):
     iroc_reader = IrocReader(client=None, threads=1)
     assert not iroc_reader.can_handle_tag(SensorTag("UON_EF.xxx", None))
@pytest.fixture
def ncs_reader():
    return NcsReader(AzureDLFileSystemMock())


@pytest.fixture
def dates():
    return (
        dateutil.parser.isoparse("2000-01-01T08:56:00+00:00"),
        dateutil.parser.isoparse("2001-09-01T10:01:00+00:00"),
    )


@pytest.mark.parametrize(
    "tag_to_check",
    [normalize_sensor_tags(["TRC-123"])[0], SensorTag("XYZ-123", "1776-TROC")],
)
def test_can_handle_tag_ok(tag_to_check, ncs_reader):
    assert ncs_reader.can_handle_tag(tag_to_check)


@pytest.mark.parametrize(
    "tag_to_check", [SensorTag("TRC-123", None), SensorTag("XYZ-123", "123-XXX")]
)
def test_can_handle_tag_notok(tag_to_check, ncs_reader):
    assert not ncs_reader.can_handle_tag(tag_to_check)


def test_can_handle_tag_unknow_prefix_raise(ncs_reader):
    with pytest.raises(ValueError):
        ncs_reader.can_handle_tag(normalize_sensor_tags(["XYZ-123"])[0])
Пример #26
0
            [
                ("Tag 1", "mean"),
                ("Tag 1", "max"),
                ("Tag 2", "mean"),
                ("Tag 2", "max"),
                ("Tag 3", "mean"),
                ("Tag 3", "max"),
            ],
        )


@pytest.mark.parametrize(
    "tag_list",
    [
        [
            SensorTag("Tag 1", None),
            SensorTag("Tag 2", None),
            SensorTag("Tag 3", None)
        ],
        [SensorTag("Tag 1", None)],
    ],
)
@pytest.mark.parametrize(
    "target_tag_list",
    [
        [
            SensorTag("Tag 2", None),
            SensorTag("Tag 1", None),
            SensorTag("Tag 3", None)
        ],
        [SensorTag("Tag 1", None)],