Пример #1
0
def _load_signal(signal, test_split):
    if isinstance(test_split, float):
        train, test = load_signal(signal, test_size=test_split)
    elif test_split:
        train = load_signal(signal + '-train')
        test = load_signal(signal + '-test')
    else:
        train = test = load_signal(signal)

    return train, test
Пример #2
0
def _add_dataset(explorer, args):
    if args.signal is None:
        args.signal = args.name

    if not args.start or not args.stop:
        path_or_name = args.location or args.name

        try:
            data = load_signal(path_or_name, None, args.timestamp_column,
                               args.value_column)
        except HTTPError:
            print('Unknown signal: {}'.format(path_or_name))
            sys.exit(1)
        else:
            timestamps = data['timestamp']
            if not args.start:
                args.start = timestamps.min()

            if not args.stop:
                args.stop = timestamps.max()

    explorer.add_dataset(
        args.name,
        args.signal,
        args.satellite,
        args.start,
        args.stop,
        args.location,
        args.timestamp_column,
        args.value_column,
        args.user,
    )
Пример #3
0
def test_load_signal_test_size(isfile_mock, load_csv_mock):
    # setup
    isfile_mock.return_value = True

    data = pd.DataFrame({
        'timestamp': list(range(10)),
        'value': list(range(10, 20))
    })
    load_csv_mock.return_value = data

    # run
    returned = load_signal('a/path/to/a.csv', test_size=0.33)

    # assert
    assert isinstance(returned, tuple)
    assert len(returned) == 2

    train, test = returned

    expected_train = pd.DataFrame({
        'timestamp':
        list(range(7)),
        'value':
        list(np.arange(10, 17).astype(float))
    })

    pd.testing.assert_frame_equal(train, expected_train)

    expected_test = pd.DataFrame({
        'timestamp': list(range(7, 10)),
        'value': list(np.arange(17, 20).astype(float))
    })
    expected_test.index = range(7, 10)
    pd.testing.assert_frame_equal(test, expected_test)
Пример #4
0
    def add_signal(self,
                   name,
                   dataset,
                   data_location=None,
                   start_time=None,
                   stop_time=None,
                   timestamp_column=None,
                   value_column=None):
        """Add a new Signal object to the database.

        The signal needs to be given a name and be associated to a Dataset.

        Args:
            name (str):
                Name of the Signal.
            dataset (Dataset or ObjectID or str):
                Dataset object which the created Signal belongs to or the
                corresponding ObjectId.
            data_location (str):
                Path to the CSV containing the Signal data. If the signal is
                one of the signals provided by Orion, this can be omitted and
                the signal will be loaded based on the signal name.
            start_time (int):
                Optional. Minimum timestamp to use for this signal. If not provided
                this defaults to the minimum timestamp found in the signal data.
            stop_time (int):
                Optional. Maximum timestamp to use for this signal. If not provided
                this defaults to the maximum timestamp found in the signal data.
            timestamp_column (int):
                Optional. Index of the timestamp column.
            value_column (int):
                Optional. Index of the value column.

        Raises:
            NotUniqueError:
                If a Signal with the same name already exists for this Dataset.

        Returns:
            Signal
        """
        data_location = data_location or name
        data = load_signal(data_location, None, timestamp_column, value_column)
        timestamps = data['timestamp']
        if not start_time:
            start_time = timestamps.min()

        if not stop_time:
            stop_time = timestamps.max()

        dataset = self.get_dataset(dataset)

        return schema.Signal.insert(name=name,
                                    dataset=dataset,
                                    start_time=start_time,
                                    stop_time=stop_time,
                                    data_location=data_location,
                                    timestamp_column=timestamp_column,
                                    value_column=value_column,
                                    created_by=self.user)
Пример #5
0
def _evaluate_on_signal(pipeline, signal, metrics):
    data = load_signal(signal)
    anomalies = analyze(pipeline, data)

    truth = load_anomalies(signal)

    return {
        name: scorer(truth, anomalies, data)
        for name, scorer in metrics.items()
    }
Пример #6
0
    def load(self):
        data = load_signal(self.data_location, None, self.timestamp_column,
                           self.value_column)
        if self.start_time:
            data = data[data['timestamp'] >= self.start_time].copy()

        if self.stop_time:
            data = data[data['timestamp'] <= self.stop_time].copy()

        return data
Пример #7
0
    def load_dataset(self, dataset):
        path_or_name = dataset.data_location or dataset.name
        LOGGER.info("Loading dataset %s", path_or_name)
        data = load_signal(path_or_name, None, dataset.timestamp_column, dataset.value_column)
        if dataset.start_time:
            data = data[data['timestamp'] >= dataset.start_time].copy()

        if dataset.stop_time:
            data = data[data['timestamp'] <= dataset.stop_time].copy()

        return data
Пример #8
0
    def load_signal(self, signal):
        path_or_name = signal.data_location or signal.name
        LOGGER.info("Loading dataset %s", path_or_name)
        data = load_signal(path_or_name, None, signal.timestamp_column, signal.value_column)
        if signal.start_time:
            data = data[data['timestamp'] >= signal.start_time].copy()

        if signal.stop_time:
            data = data[data['timestamp'] <= signal.stop_time].copy()

        return data
Пример #9
0
def test_load_signal_filename(isfile_mock, load_csv_mock):
    # setup
    isfile_mock.return_value = True

    # run
    returned = load_signal('a/path/to/a.csv')

    # assert
    assert returned == load_csv_mock.return_value

    load_csv_mock.assert_called_once_with('a/path/to/a.csv', None, None)
Пример #10
0
def _evaluate_on_signal(pipeline, signal, metrics, holdout=True):
    if holdout:
        train = load_signal(signal + '-train')
    else:
        train = load_signal(signal)

    test = load_signal(signal + '-test')
    start = datetime.utcnow()
    anomalies = analyze(pipeline, train, test)
    elapsed = datetime.utcnow() - start

    truth = load_anomalies(signal)

    scores = {
        name: scorer(truth, anomalies, test)
        for name, scorer in metrics.items()
    }
    scores['elapsed'] = elapsed.total_seconds()

    return scores
Пример #11
0
def test_load_signal_nasa_signal_name(isfile_mock, load_csv_mock, lns_mock):
    # setup
    isfile_mock.return_value = False

    # run
    returned = load_signal('S-1')

    # assert
    assert returned == lns_mock.return_value

    load_csv_mock.assert_not_called()
    lns_mock.assert_called_once_with('S-1')
Пример #12
0
def test_load_signal_filename(isfile_mock, load_csv_mock):
    # setup
    data = pd.DataFrame({
        'timestamp': list(range(10)),
        'value': list(np.arange(10, 20, dtype=float))
    })
    load_csv_mock.return_value = data
    isfile_mock.return_value = True

    # run
    returned = load_signal('a/path/to/a.csv')

    # assert
    pd.testing.assert_frame_equal(returned, load_csv_mock.return_value)

    load_csv_mock.assert_called_once_with('a/path/to/a.csv', None, None)
Пример #13
0
def test_load_signal_nasa_signal_name(isfile_mock, load_csv_mock, lns_mock):
    # setup
    data = pd.DataFrame({
        'timestamp': list(range(10)),
        'value': list(np.arange(10, 20, dtype=float))
    })
    lns_mock.return_value = data
    isfile_mock.return_value = False

    # run
    returned = load_signal('S-1')

    # assert
    pd.testing.assert_frame_equal(returned, data)

    load_csv_mock.assert_not_called()
    lns_mock.assert_called_once_with('S-1')
Пример #14
0
    def add_dataset(self, name, signal_set, satellite_id=None, start_time=None, stop_time=None,
                    location=None, timestamp_column=None, value_column=None, user_id=None):

        location = location or name
        data = load_signal(location, None, timestamp_column, value_column)
        timestamps = data['timestamp']
        if not start_time:
            start_time = timestamps.min()

        if not stop_time:
            stop_time = timestamps.max()

        return model.Dataset.find_or_insert(
            name=name,
            signal_set=signal_set,
            satellite_id=satellite_id,
            start_time=start_time,
            stop_time=stop_time,
            data_location=location,
            timestamp_column=timestamp_column,
            value_column=value_column,
            created_by=user_id
        )