Exemplo n.º 1
0
def test_featurize_time_series_default_errors():
    """Test featurize wrapper function for time series w/ missing errors"""
    n_channels = 3
    t, m, _ = sample_values(channels=n_channels)
    features_to_use = ['amplitude', 'std_err']
    meta_features = {}
    fset = featurize.featurize_time_series(t,
                                           m,
                                           None,
                                           features_to_use,
                                           meta_features,
                                           scheduler=dask.get)

    t = [[t, t[0:-5], t[0:-10]]]
    m = [[m[0], m[1][0:-5], m[2][0:-10]]]
    fset = featurize.featurize_time_series(t,
                                           m,
                                           None,
                                           features_to_use,
                                           meta_features,
                                           scheduler=dask.get)

    t = t[0][0]
    m = m[0][0]
    fset = featurize.featurize_time_series(t,
                                           m,
                                           None,
                                           features_to_use,
                                           meta_features,
                                           scheduler=dask.get)
    assert ('amplitude', 0) in fset.columns
Exemplo n.º 2
0
def test_featurize_time_series_default_times():
    """Test featurize wrapper function for time series w/ missing times"""
    n_channels = 3
    _, m, e = sample_values(channels=n_channels)
    features_to_use = ['amplitude', 'std_err']
    meta_features = {}
    fset = featurize.featurize_time_series(None,
                                           m,
                                           e,
                                           features_to_use,
                                           meta_features,
                                           scheduler=get_sync)

    m = [[m[0], m[1][0:-5], m[2][0:-10]]]
    e = [[e[0], e[1][0:-5], e[2][0:-10]]]
    fset = featurize.featurize_time_series(None,
                                           m,
                                           e,
                                           features_to_use,
                                           meta_features,
                                           scheduler=get_sync)

    m = m[0][0]
    e = e[0][0]
    fset = featurize.featurize_time_series(None,
                                           m,
                                           e,
                                           features_to_use,
                                           meta_features,
                                           scheduler=get_sync)
Exemplo n.º 3
0
def test_ignore_exceptions():
    import cesium.features.graphs

    def raise_exc(x):
        raise ValueError()

    old_value = cesium.features.graphs.dask_feature_graph['mean']
    try:
        cesium.features.graphs.dask_feature_graph['mean'] = (raise_exc, 't')
        t, m, e = sample_values()
        features_to_use = ['mean']
        with pytest.raises(ValueError):
            fset = featurize.featurize_time_series(t,
                                                   m,
                                                   e,
                                                   features_to_use,
                                                   scheduler=dask.get,
                                                   raise_exceptions=True)
        fset = featurize.featurize_time_series(t,
                                               m,
                                               e,
                                               features_to_use,
                                               scheduler=dask.get,
                                               raise_exceptions=False)
        assert np.isnan(fset.values).all()
    finally:
        cesium.features.graphs.dask_feature_graph['mean'] = old_value
Exemplo n.º 4
0
def test_featurize_time_series_pandas_metafeatures():
    """Test featurize function for metafeatures passed as Series/DataFrames."""
    t, m, e = sample_values()
    features_to_use = ['amplitude', 'std_err']
    meta_features = pd.Series({'meta1': 0.5})
    fset = featurize.featurize_time_series(t,
                                           m,
                                           e,
                                           features_to_use,
                                           meta_features,
                                           scheduler=dask.get)
    npt.assert_allclose(fset['meta1'], 0.5)

    n_series = 5
    list_of_series = [sample_values() for i in range(n_series)]
    times, values, errors = [list(x) for x in zip(*list_of_series)]
    features_to_use = ['amplitude', 'std_err']
    meta_features = pd.DataFrame({
        'meta1': [0.5] * n_series,
        'meta2': [0.8] * n_series
    })
    fset = featurize.featurize_time_series(times,
                                           values,
                                           errors,
                                           features_to_use,
                                           meta_features,
                                           scheduler=dask.get)
    npt.assert_allclose(fset['meta1'], 0.5)
    npt.assert_allclose(fset['meta2'], 0.8)
Exemplo n.º 5
0
    def post(self):
        ts_data = json_decode(self.get_argument('ts_data'))
        model_id = json_decode(self.get_argument('modelID'))
        meta_feats = json_decode(self.get_argument('meta_features', 'null'))
        impute_kwargs = json_decode(self.get_argument('impute_kwargs', '{}'))

        model = Model.query.get(model_id)
        model_data = joblib.load(model.file_uri)
        if hasattr(model_data, 'best_estimator_'):
            model_data = model_data.best_estimator_
        features_to_use = model.featureset.features_list

        fset = featurize.featurize_time_series(*ts_data,
                                               features_to_use=features_to_use,
                                               meta_features=meta_feats,
                                               raise_exceptions=False)
        fset = featurize.impute_featureset(fset, **impute_kwargs)
        fset.index = fset.index.astype(str)  # ensure JSON-encodable
        data = {'preds': model_data.predict(fset)}
        if hasattr(model_data, 'predict_proba'):
            data['pred_probs'] = pd.DataFrame(model_data.predict_proba(fset),
                                              index=fset.index,
                                              columns=model_data.classes_)
        else:
            data['pred_probs'] = []
        pred_info = Prediction.format_pred_data(fset, data)
        return self.success(pred_info)
Exemplo n.º 6
0
def get_freq_features(N,
                      train_series,
                      times_list,
                      flux_list,
                      train_metadata,
                      subsetting_pos=None):
    if subsetting_pos is None:
        subset_times_list = times_list
        subset_flux_list = flux_list
    else:
        subset_times_list = [
            v for i, v in enumerate(times_list) if i in set(subsetting_pos)
        ]
        subset_flux_list = [
            v for i, v in enumerate(flux_list) if i in set(subsetting_pos)
        ]
    feats = featurize.featurize_time_series(
        times=subset_times_list[:N],
        values=subset_flux_list[:N],
        features_to_use=[
            'skew', 'percent_beyond_1_std',
            'percent_difference_flux_percentile'
        ],
        scheduler=None,
    )
    subset = train_series[train_series['object_id'].isin(
        train_metadata['object_id'].iloc[subsetting_pos].iloc[:N])]
    models = list(map(fit_multiband_freq, subset.groupby('object_id')))
    feats['object_pos'] = subsetting_pos[:N]
    feats['freq1_freq'] = [model.best_period for model in models]
    return feats, models
Exemplo n.º 7
0
    def post(self):
        ts_data = json_decode(self.get_argument('ts_data'))
        model_id = json_decode(self.get_argument('modelID'))
        meta_feats = json_decode(self.get_argument('meta_features', 'null'))
        impute_kwargs = json_decode(self.get_argument('impute_kwargs', '{}'))

        model = Model.query.get(model_id)
        model_data = joblib.load(model.file_uri)
        if hasattr(model_data, 'best_estimator_'):
            model_data = model_data.best_estimator_
        features_to_use = model.featureset.features_list

        fset = featurize.featurize_time_series(*ts_data,
                                               features_to_use=features_to_use,
                                               meta_features=meta_feats,
                                               raise_exceptions=False)
        fset = featurize.impute_featureset(fset, **impute_kwargs)
        fset.index = fset.index.astype(str)  # ensure JSON-encodable
        data = {'preds': model_data.predict(fset)}
        if hasattr(model_data, 'predict_proba'):
            data['pred_probs'] = pd.DataFrame(model_data.predict_proba(fset),
                                              index=fset.index,
                                              columns=model_data.classes_)
        else:
            data['pred_probs'] = []
        pred_info = Prediction.format_pred_data(fset, data)
        return self.success(pred_info)
Exemplo n.º 8
0
def test_featurize_time_series_single():
    """Test featurize wrapper function for single time series"""
    t, m, e = sample_values()
    features_to_use = ['amplitude', 'std_err']
    meta_features = {'meta1': 0.5}
    fset = featurize.featurize_time_series(t, m, e, features_to_use,
                                           meta_features, scheduler=dask.get)
    assert fset['amplitude'].values.dtype == np.float64
Exemplo n.º 9
0
def test_featurize_time_series_single():
    """Test featurize wrapper function for single time series"""
    t, m, e = sample_values()
    features_to_use = ['amplitude', 'std_err']
    meta_features = {'meta1': 0.5}
    fset = featurize.featurize_time_series(t, m, e, features_to_use,
                                           meta_features, scheduler=dask.get)
    assert fset['amplitude'].values.dtype == np.float64
Exemplo n.º 10
0
def get_features(time_series):
    times = [df['mjd'].tolist() for df in time_series]
    values = [df['flux'].tolist() for df in time_series]
    errors = [df['flux_err'].tolist() for df in time_series]

    features = featurize_time_series(times,
                                     values,
                                     errors,
                                     features_to_use=FEATURES)
    return features.values
Exemplo n.º 11
0
def test_featurize_time_series_single_multichannel():
    """Test featurize wrapper function for single multichannel time series"""
    n_channels = 3
    t, m, e = sample_values(channels=n_channels)
    features_to_use = ['amplitude', 'std_err']
    meta_features = {'meta1': 0.5}
    fset = featurize.featurize_time_series(t, m, e, features_to_use,
                                           meta_features, scheduler=dask.get)
    assert ('amplitude', 0) in fset.columns
    assert 'meta1' in fset.columns
Exemplo n.º 12
0
def test_featurize_time_series_single_multichannel():
    """Test featurize wrapper function for single multichannel time series"""
    n_channels = 3
    t, m, e = sample_values(channels=n_channels)
    features_to_use = ['amplitude', 'std_err']
    meta_features = {'meta1': 0.5}
    fset = featurize.featurize_time_series(t, m, e, features_to_use,
                                           meta_features, scheduler=dask.get)
    assert ('amplitude', 0) in fset.columns
    assert 'meta1' in fset.columns
Exemplo n.º 13
0
def test_featurize_time_series_default_errors():
    """Test featurize wrapper function for time series w/ missing errors"""
    n_channels = 3
    t, m, _ = sample_values(channels=n_channels)
    features_to_use = ['amplitude', 'std_err']
    meta_features = {}
    fset = featurize.featurize_time_series(t, m, None, features_to_use,
                                           meta_features, scheduler=dask.get)

    t = [[t, t[0:-5], t[0:-10]]]
    m = [[m[0], m[1][0:-5], m[2][0:-10]]]
    fset = featurize.featurize_time_series(t, m, None, features_to_use,
                                           meta_features, scheduler=dask.get)

    t = t[0][0]
    m = m[0][0]
    fset = featurize.featurize_time_series(t, m, None, features_to_use,
                                           meta_features, scheduler=dask.get)
    assert ('amplitude', 0) in fset.columns
Exemplo n.º 14
0
def test_ignore_exceptions():
    import cesium.features.graphs
    def raise_exc(x):
        raise ValueError()
    old_value = cesium.features.graphs.dask_feature_graph['mean']
    try:
        cesium.features.graphs.dask_feature_graph['mean'] = (raise_exc, 't')
        t, m, e = sample_values()
        features_to_use = ['mean']
        with pytest.raises(ValueError):
            fset = featurize.featurize_time_series(t, m, e, features_to_use,
                                                   scheduler=dask.get,
                                                   raise_exceptions=True)
        fset = featurize.featurize_time_series(t, m, e, features_to_use,
                                               scheduler=dask.get,
                                               raise_exceptions=False)
        assert np.isnan(fset.values).all()
    finally:
        cesium.features.graphs.dask_feature_graph['mean'] = old_value
Exemplo n.º 15
0
def test_featurize_time_series_single():
    """Test featurize wrapper function for single time series"""
    t, m, e = sample_values()
    features_to_use = ['amplitude', 'std_err']
    target = 'class1'
    meta_features = {'meta1': 0.5}
    fset = featurize.featurize_time_series(t, m, e, features_to_use, target,
                                           meta_features, scheduler=get_sync)
    npt.assert_array_equal(sorted(fset.data_vars),
                           ['amplitude', 'meta1', 'std_err'])
    npt.assert_array_equal(fset.target.values, ['class1'])
Exemplo n.º 16
0
def test_featurize_time_series_single():
    """Test featurize wrapper function for single time series"""
    t, m, e = sample_time_series()
    features_to_use = ['amplitude', 'std_err']
    target = 'class1'
    meta_features = {'meta1': 0.5}
    fset = featurize.featurize_time_series(t, m, e, features_to_use, target,
                                           meta_features, use_celery=False)
    npt.assert_array_equal(sorted(fset.data_vars),
                           ['amplitude', 'meta1', 'std_err'])
    npt.assert_array_equal(fset.target.values, ['class1'])
Exemplo n.º 17
0
def test_featurize_time_series_pandas_metafeatures():
    """Test featurize function for metafeatures passed as Series/DataFrames."""
    t, m, e = sample_values()
    features_to_use = ['amplitude', 'std_err']
    meta_features = pd.Series({'meta1': 0.5})
    fset = featurize.featurize_time_series(t, m, e, features_to_use,
                                           meta_features, scheduler=dask.get)
    npt.assert_allclose(fset['meta1'], 0.5)

    n_series = 5
    list_of_series = [sample_values() for i in range(n_series)]
    times, values, errors = [list(x) for x in zip(*list_of_series)]
    features_to_use = ['amplitude', 'std_err']
    meta_features = pd.DataFrame({'meta1': [0.5] * n_series,
                                  'meta2': [0.8] * n_series})
    fset = featurize.featurize_time_series(times, values, errors,
                                           features_to_use,
                                           meta_features, scheduler=dask.get)
    npt.assert_allclose(fset['meta1'], 0.5)
    npt.assert_allclose(fset['meta2'], 0.8)
Exemplo n.º 18
0
def test_featurize_time_series_default_errors():
    """Test featurize wrapper function for time series w/ missing errors"""
    n_channels = 3
    t, m, _ = sample_time_series(channels=n_channels)
    features_to_use = ['amplitude', 'std_err']
    target = 'class1'
    meta_features = {}
    fset = featurize.featurize_time_series(t, m, None, features_to_use, target,
                                           meta_features, use_celery=False)
    npt.assert_array_equal(fset.channel, np.arange(n_channels))
    t = [[t, t[0:-5], t[0:-10]]]
    m = [[m[0], m[1][0:-5], m[2][0:-10]]]
    fset = featurize.featurize_time_series(t, m, None, features_to_use, target,
                                           meta_features, use_celery=False)
    npt.assert_array_equal(fset.channel, np.arange(n_channels))
    t = t[0][0]
    m = m[0][0]
    fset = featurize.featurize_time_series(t, m, None, features_to_use, target,
                                           meta_features, use_celery=False)
    npt.assert_array_equal(fset.channel, [0])
Exemplo n.º 19
0
def test_featurize_time_series_multiple():
    """Test featurize wrapper function for multiple time series"""
    n_series = 5
    list_of_series = [sample_values() for i in range(n_series)]
    times, values, errors = [list(x) for x in zip(*list_of_series)]
    features_to_use = ['amplitude', 'std_err']
    meta_features = [{'meta1': 0.5}] * n_series
    fset = featurize.featurize_time_series(times, values, errors,
                                           features_to_use,
                                           meta_features, scheduler=dask.get)
    npt.assert_array_equal(sorted(fset.columns.get_level_values('feature')),
                           ['amplitude', 'meta1', 'std_err'])
Exemplo n.º 20
0
def test_featurize_time_series_multiple():
    """Test featurize wrapper function for multiple time series"""
    n_series = 5
    list_of_series = [sample_values() for i in range(n_series)]
    times, values, errors = [list(x) for x in zip(*list_of_series)]
    features_to_use = ['amplitude', 'std_err']
    meta_features = [{'meta1': 0.5}] * n_series
    fset = featurize.featurize_time_series(times, values, errors,
                                           features_to_use,
                                           meta_features, scheduler=dask.get)
    npt.assert_array_equal(sorted(fset.columns.get_level_values('feature')),
                           ['amplitude', 'meta1', 'std_err'])
Exemplo n.º 21
0
def test_featurize_time_series_no_targets():
    t, m, e = sample_values()
    features_to_use = ['amplitude', 'std_err']
    target = 'class1'
    meta_features = {'meta1': 0.5}
    fset = featurize.featurize_time_series(t, m, e, features_to_use,
                                           targets=None,
                                           meta_features=meta_features,
                                           scheduler=get_sync)
    npt.assert_array_equal(sorted(fset.data_vars),
                           ['amplitude', 'meta1', 'std_err'])
    assert('target' not in fset)
Exemplo n.º 22
0
def test_featurize_time_series_uneven_multichannel():
    """Test featurize wrapper function for uneven-length multichannel data"""
    n_channels = 3
    t, m, e = sample_values(channels=n_channels)
    t = [[t, t[0:-5], t[0:-10]]]
    m = [[m[0], m[1][0:-5], m[2][0:-10]]]
    e = [[e[0], e[1][0:-5], e[2][0:-10]]]
    features_to_use = ['amplitude', 'std_err']
    meta_features = {'meta1': 0.5}
    fset = featurize.featurize_time_series(t, m, e, features_to_use,
                                           meta_features, scheduler=dask.get)
    assert ('amplitude', 0) in fset.columns
    assert 'meta1' in fset.columns
Exemplo n.º 23
0
def test_featurize_time_series_default_times():
    """Test featurize wrapper function for time series w/ missing times"""
    n_channels = 3
    _, m, e = sample_values(channels=n_channels)
    features_to_use = ['amplitude', 'std_err']
    target = 'class1'
    meta_features = {}
    fset = featurize.featurize_time_series(None, m, e, features_to_use, target,
                                           meta_features, scheduler=get_sync)
    npt.assert_array_equal(fset.channel, np.arange(n_channels))

    m = [[m[0], m[1][0:-5], m[2][0:-10]]]
    e = [[e[0], e[1][0:-5], e[2][0:-10]]]
    fset = featurize.featurize_time_series(None, m, e, features_to_use, target,
                                           meta_features, scheduler=get_sync)

    npt.assert_array_equal(fset.channel, np.arange(n_channels))
    m = m[0][0]
    e = e[0][0]
    fset = featurize.featurize_time_series(None, m, e, features_to_use, target,
                                           meta_features, scheduler=get_sync)
    npt.assert_array_equal(fset.channel, [0])
Exemplo n.º 24
0
def test_featurize_time_series_uneven_multichannel():
    """Test featurize wrapper function for uneven-length multichannel data"""
    n_channels = 3
    t, m, e = sample_values(channels=n_channels)
    t = [[t, t[0:-5], t[0:-10]]]
    m = [[m[0], m[1][0:-5], m[2][0:-10]]]
    e = [[e[0], e[1][0:-5], e[2][0:-10]]]
    features_to_use = ['amplitude', 'std_err']
    meta_features = {'meta1': 0.5}
    fset = featurize.featurize_time_series(t, m, e, features_to_use,
                                           meta_features, scheduler=dask.get)
    assert ('amplitude', 0) in fset.columns
    assert 'meta1' in fset.columns
Exemplo n.º 25
0
def test_featurize_time_series_default_errors():
    """Test featurize wrapper function for time series w/ missing errors"""
    n_channels = 3
    t, m, _ = sample_values(channels=n_channels)
    features_to_use = ['amplitude', 'std_err']
    target = 'class1'
    meta_features = {}
    fset = featurize.featurize_time_series(t, m, None, features_to_use, target,
                                           meta_features, scheduler=get_sync)
    npt.assert_array_equal(fset.channel, np.arange(n_channels))

    t = [[t, t[0:-5], t[0:-10]]]
    m = [[m[0], m[1][0:-5], m[2][0:-10]]]
    fset = featurize.featurize_time_series(t, m, None, features_to_use, target,
                                           meta_features, scheduler=get_sync)
    npt.assert_array_equal(fset.channel, np.arange(n_channels))

    t = t[0][0]
    m = m[0][0]
    fset = featurize.featurize_time_series(t, m, None, features_to_use, target,
                                           meta_features, scheduler=get_sync)
    npt.assert_array_equal(fset.channel, [0])
Exemplo n.º 26
0
def test_featurize_time_series_multiple():
    """Test featurize wrapper function for multiple time series"""
    n_series = 5
    list_of_series = [sample_time_series() for i in range(n_series)]
    times, values, errors = [list(x) for x in zip(*list_of_series)]
    features_to_use = ['amplitude', 'std_err']
    targets = np.array(['class1'] * n_series)
    meta_features = [{'meta1': 0.5}] * n_series
    fset = featurize.featurize_time_series(times, values, errors,
                                           features_to_use, targets,
                                           meta_features, use_celery=False)
    npt.assert_array_equal(sorted(fset.data_vars),
                           ['amplitude', 'meta1', 'std_err'])
    npt.assert_array_equal(fset.target.values, ['class1'] * n_series)
Exemplo n.º 27
0
def test_featurize_time_series_multiple_multichannel():
    """Test featurize wrapper function for multiple multichannel time series"""
    n_series = 5
    n_channels = 3
    list_of_series = [sample_values(channels=n_channels)
                      for i in range(n_series)]
    times, values, errors = [list(x) for x in zip(*list_of_series)]
    features_to_use = ['amplitude', 'std_err']
    meta_features = {'meta1': 0.5}
    fset = featurize.featurize_time_series(times, values, errors,
                                           features_to_use,
                                           meta_features, scheduler=dask.get)
    assert ('amplitude', 0) in fset.columns
    assert 'meta1' in fset.columns
Exemplo n.º 28
0
def test_featurize_time_series_custom_functions():
    """Test featurize wrapper function for time series w/ custom functions"""
    n_channels = 3
    t, m, e = sample_values(channels=n_channels)
    features_to_use = ['amplitude', 'std_err', 'test_f']
    meta_features = {'meta1': 0.5}
    custom_functions = {'test_f': lambda t, m, e: np.pi}
    fset = featurize.featurize_time_series(t, m, e, features_to_use,
                                           meta_features,
                                           custom_functions=custom_functions,
                                           scheduler=dask.get)
    npt.assert_array_equal(fset['test_f', 0], np.pi)
    assert ('amplitude', 0) in fset.columns
    assert 'meta1' in fset.columns
Exemplo n.º 29
0
def test_featurize_time_series_custom_functions():
    """Test featurize wrapper function for time series w/ custom functions"""
    n_channels = 3
    t, m, e = sample_values(channels=n_channels)
    features_to_use = ['amplitude', 'std_err', 'test_f']
    meta_features = {'meta1': 0.5}
    custom_functions = {'test_f': lambda t, m, e: np.pi}
    fset = featurize.featurize_time_series(t, m, e, features_to_use,
                                           meta_features,
                                           custom_functions=custom_functions,
                                           scheduler=dask.get)
    npt.assert_array_equal(fset['test_f', 0], np.pi)
    assert ('amplitude', 0) in fset.columns
    assert 'meta1' in fset.columns
Exemplo n.º 30
0
def test_featurize_time_series_multiple_multichannel():
    """Test featurize wrapper function for multiple multichannel time series"""
    n_series = 5
    n_channels = 3
    list_of_series = [sample_values(channels=n_channels)
                      for i in range(n_series)]
    times, values, errors = [list(x) for x in zip(*list_of_series)]
    features_to_use = ['amplitude', 'std_err']
    meta_features = {'meta1': 0.5}
    fset = featurize.featurize_time_series(times, values, errors,
                                           features_to_use,
                                           meta_features, scheduler=dask.get)
    assert ('amplitude', 0) in fset.columns
    assert 'meta1' in fset.columns
Exemplo n.º 31
0
def test_featurize_time_series_multiple():
    """Test featurize wrapper function for multiple time series"""
    n_series = 5
    list_of_series = [sample_values() for i in range(n_series)]
    times, values, errors = [list(x) for x in zip(*list_of_series)]
    features_to_use = ['amplitude', 'std_err']
    targets = np.array(['class1'] * n_series)
    meta_features = [{'meta1': 0.5}] * n_series
    fset = featurize.featurize_time_series(times, values, errors,
                                           features_to_use, targets,
                                           meta_features, scheduler=get_sync)
    npt.assert_array_equal(sorted(fset.data_vars),
                           ['amplitude', 'meta1', 'std_err'])
    npt.assert_array_equal(fset.target.values, ['class1'] * n_series)
Exemplo n.º 32
0
def test_featurize_time_series_custom_dask_graph():
    """Test featurize wrapper function for time series w/ custom dask graph"""
    n_channels = 3
    t, m, e = sample_values(channels=n_channels)
    features_to_use = ['amplitude', 'std_err', 'test_f', 'test_meta']
    meta_features = {'meta1': 0.5}
    custom_functions = {'test_f': (lambda x: x.min() - x.max(), 'amplitude'),
                        'test_meta': (lambda x: 2. * x, 'meta1')}
    fset = featurize.featurize_time_series(t, m, e, features_to_use,
                                           meta_features,
                                           custom_functions=custom_functions,
                                           scheduler=dask.get)
    assert ('amplitude', 0) in fset.columns
    assert ('test_f', 0) in fset.columns
    assert ('test_meta', 0) in fset.columns
Exemplo n.º 33
0
def test_featurize_time_series_single_multichannel():
    """Test featurize wrapper function for single multichannel time series"""
    n_channels = 3
    t, m, e = sample_values(channels=n_channels)
    features_to_use = ['amplitude', 'std_err']
    target = 'class1'
    meta_features = {'meta1': 0.5}
    fset = featurize.featurize_time_series(t, m, e, features_to_use, target,
                                           meta_features, scheduler=get_sync)
    npt.assert_array_equal(sorted(fset.data_vars),
                           ['amplitude', 'meta1', 'std_err'])
    npt.assert_array_equal(fset.channel, np.arange(n_channels))
    npt.assert_array_equal(sorted(fset.amplitude.coords),
                           ['channel', 'name', 'target'])
    npt.assert_array_equal(fset.target.values, ['class1'])
Exemplo n.º 34
0
def test_featurize_time_series_custom_dask_graph():
    """Test featurize wrapper function for time series w/ custom dask graph"""
    n_channels = 3
    t, m, e = sample_values(channels=n_channels)
    features_to_use = ['amplitude', 'std_err', 'test_f', 'test_meta']
    meta_features = {'meta1': 0.5}
    custom_functions = {'test_f': (lambda x: x.min() - x.max(), 'amplitude'),
                        'test_meta': (lambda x: 2. * x, 'meta1')}
    fset = featurize.featurize_time_series(t, m, e, features_to_use,
                                           meta_features,
                                           custom_functions=custom_functions,
                                           scheduler=dask.get)
    assert ('amplitude', 0) in fset.columns
    assert ('test_f', 0) in fset.columns
    assert ('test_meta', 0) in fset.columns
Exemplo n.º 35
0
def test_featurize_time_series_single_multichannel():
    """Test featurize wrapper function for single multichannel time series"""
    n_channels = 3
    t, m, e = sample_time_series(channels=n_channels)
    features_to_use = ['amplitude', 'std_err']
    target = 'class1'
    meta_features = {'meta1': 0.5}
    fset = featurize.featurize_time_series(t, m, e, features_to_use, target,
                                           meta_features, use_celery=False)
    npt.assert_array_equal(sorted(fset.data_vars),
                           ['amplitude', 'meta1', 'std_err'])
    npt.assert_array_equal(fset.channel, np.arange(n_channels))
    npt.assert_array_equal(sorted(fset.amplitude.coords),
                           ['channel', 'name', 'target'])
    npt.assert_array_equal(fset.target.values, ['class1'])
Exemplo n.º 36
0
    def encode(self, values_data, times=None):
        """
        Encode a column data into time series

        :param values_data: a list of timeseries data eg: ['91.0 92.0 93.0 94.0', '92.0 93.0 94.0 95.0' ...]
        :param times: (optional) a list of lists such that, len(times[i])=len(values_data[i]) for
                      all i in range(len(times))
        :return: a torch.floatTensor
        """
        features_to_use = self._features
        ret = []
        for i, values in enumerate(values_data):
            if type(values) == type([]):
                values = list(map(float, values))
            else:
                values = list(map(lambda x: float(x), values.split()))
            if times is None:
                times_row = np.array(
                    [float(i) for i in range(1,
                                             len(values) + 1)])
            else:
                times_row = np.array(
                    list(map(lambda x: float(x),
                             times[i].split())))  # np.array(times[i])
            with warnings.catch_warnings():
                warnings.simplefilter("ignore")
                row = featurize.featurize_time_series(
                    times=times_row,
                    values=np.array(values),
                    errors=None,
                    features_to_use=features_to_use)

            vector_row = []
            for col in features_to_use:
                val = list(row[col][0])[0]
                val1 = 0
                if (val in ['nan', None, 'NaN', False]) \
                        or math.isnan(val) or math.isinf(val):
                    val = 0
                    val1 = 1

                if col in FEATURES_WITH_DEFAULT_NONE:
                    vector_row += [val, val1]  # val1 is 1 if its null
                else:
                    vector_row += [val]
            ret += [vector_row]
        ret_tensor = self._pytorch_wrapper(ret)
        return ret_tensor
Exemplo n.º 37
0
def lcFreq(df_main):
    df_main = df_main.sort_values('mjd').reset_index(drop=True)
    groups = df_main.groupby('passband')
    t_list = groups.apply(lambda gr: gr['mjd'].values).tolist()
    flx_list = groups.apply(lambda gr: gr['flux'].values).tolist()
    flxer_list = groups.apply(lambda gr: gr['flux_err'].values).tolist()
    feats = featurize.featurize_time_series(times=t_list,
                                            values=flx_list,
                                            errors=flxer_list,
                                            features_to_use=['freq1_freq'],
                                            scheduler=None)
    feats.columns = feats.columns.droplevel(1)
    feats['freq1_freq'].mean()
    feats['freq1_freq'].std()
    return pd.Series([feats['freq1_freq'].mean(), feats['freq1_freq'].std()],
                     index=['freq', 'freq_std'])
Exemplo n.º 38
0
def test_featurize_time_series_custom_script():
    """Test featurize wrapper function for time series w/ custom script path"""
    n_channels = 3
    t, m, e = sample_time_series(channels=n_channels)
    features_to_use = ['amplitude', 'std_err', 'f']
    target = 'class1'
    meta_features = {'meta1': 0.5}
    fset = featurize.featurize_time_series(t, m, e, features_to_use, target,
                                           meta_features,
                                           custom_script_path=CUSTOM_SCRIPT,
                                           use_celery=False)
    npt.assert_array_equal(sorted(fset.data_vars),
                           ['amplitude', 'f', 'meta1', 'std_err'])
    npt.assert_array_equal(fset.channel, np.arange(n_channels))
    npt.assert_array_equal(sorted(fset.amplitude.coords),
                           ['channel', 'name', 'target'])
    npt.assert_array_equal(fset.target.values, ['class1'])
Exemplo n.º 39
0
def test_featurize_time_series_custom_dask_graph():
    """Test featurize wrapper function for time series w/ custom dask graph"""
    n_channels = 3
    t, m, e = sample_time_series(channels=n_channels)
    features_to_use = ['amplitude', 'std_err', 'test_f']
    target = 'class1'
    meta_features = {'meta1': 0.5}
    custom_functions = {'test_f': (lambda x: x.min() - x.max(), 'amplitude')}
    fset = featurize.featurize_time_series(t, m, e, features_to_use, target,
                                           meta_features,
                                           custom_functions=custom_functions,
                                           use_celery=False)
    npt.assert_array_equal(sorted(fset.data_vars),
                           ['amplitude', 'meta1', 'std_err', 'test_f'])
    npt.assert_array_equal(fset.channel, np.arange(n_channels))
    npt.assert_array_equal(sorted(fset.amplitude.coords),
                           ['channel', 'name', 'target'])
    npt.assert_array_equal(fset.target.values, ['class1'])
Exemplo n.º 40
0
def test_featurize_time_series_uneven_multichannel():
    """Test featurize wrapper function for uneven-length multichannel data"""
    n_channels = 3
    t, m, e = sample_values(channels=n_channels)
    t = [[t, t[0:-5], t[0:-10]]]
    m = [[m[0], m[1][0:-5], m[2][0:-10]]]
    e = [[e[0], e[1][0:-5], e[2][0:-10]]]
    features_to_use = ['amplitude', 'std_err']
    target = 'class1'
    meta_features = {'meta1': 0.5}
    fset = featurize.featurize_time_series(t, m, e, features_to_use, target,
                                           meta_features, scheduler=get_sync)
    npt.assert_array_equal(sorted(fset.data_vars),
                           ['amplitude', 'meta1', 'std_err'])
    npt.assert_array_equal(fset.channel, np.arange(n_channels))
    npt.assert_array_equal(sorted(fset.amplitude.coords),
                           ['channel', 'name', 'target'])
    npt.assert_array_equal(fset.target.values, ['class1'])
Exemplo n.º 41
0
def test_featurize_time_series_uneven_multichannel():
    """Test featurize wrapper function for uneven-length multichannel data"""
    n_channels = 3
    t, m, e = sample_time_series(channels=n_channels)
    t = [[t, t[0:-5], t[0:-10]]]
    m = [[m[0], m[1][0:-5], m[2][0:-10]]]
    e = [[e[0], e[1][0:-5], e[2][0:-10]]]
    features_to_use = ['amplitude', 'std_err']
    target = 'class1'
    meta_features = {'meta1': 0.5}
    fset = featurize.featurize_time_series(t, m, e, features_to_use, target,
                                           meta_features, use_celery=False)
    npt.assert_array_equal(sorted(fset.data_vars),
                           ['amplitude', 'meta1', 'std_err'])
    npt.assert_array_equal(fset.channel, np.arange(n_channels))
    npt.assert_array_equal(sorted(fset.amplitude.coords),
                           ['channel', 'name', 'target'])
    npt.assert_array_equal(fset.target.values, ['class1'])
Exemplo n.º 42
0
def test_featurize_time_series_custom_functions():
    """Test featurize wrapper function for time series w/ custom functions"""
    n_channels = 3
    t, m, e = sample_values(channels=n_channels)
    features_to_use = ['amplitude', 'std_err', 'test_f']
    target = 'class1'
    meta_features = {'meta1': 0.5}
    custom_functions = {'test_f': lambda t, m, e: np.pi}
    fset = featurize.featurize_time_series(t, m, e, features_to_use, target,
                                           meta_features,
                                           custom_functions=custom_functions,
                                           scheduler=get_sync)
    npt.assert_array_equal(sorted(fset.data_vars),
                           ['amplitude', 'meta1', 'std_err', 'test_f'])
    npt.assert_array_equal(fset.channel, np.arange(n_channels))
    npt.assert_array_equal(sorted(fset.amplitude.coords),
                           ['channel', 'name', 'target'])
    npt.assert_array_equal(fset.test_f.values, np.pi)
    npt.assert_array_equal(fset.target.values, ['class1'])
Exemplo n.º 43
0
def featurize_time_series(ts):
    from cesium import featurize
    features_to_use = ["amplitude",
                       "percent_beyond_1_std",
                       "maximum",
                       "max_slope",
                       "median",
                       "median_absolute_deviation",
                       "percent_close_to_median",
                       "minimum",
                       "skew",
                       "std",
                       "weighted_average"]

    new = featurize.featurize_time_series(times=np.arange(0, np.shape(ts)[0]),
                                          values=ts,
                                          errors=None,
                                          features_to_use=features_to_use)
    return new.values
Exemplo n.º 44
0
def test_featurize_time_series_multiple_multichannel():
    """Test featurize wrapper function for multiple multichannel time series"""
    n_series = 5
    n_channels = 3
    list_of_series = [sample_values(channels=n_channels)
                      for i in range(n_series)]
    times, values, errors = [list(x) for x in zip(*list_of_series)]
    features_to_use = ['amplitude', 'std_err']
    targets = np.array(['class1', 'class1', 'class1', 'class2', 'class2'])
    meta_features = {'meta1': 0.5}
    fset = featurize.featurize_time_series(times, values, errors,
                                           features_to_use, targets,
                                           meta_features, scheduler=get_sync)
    npt.assert_array_equal(sorted(fset.data_vars),
                           ['amplitude', 'meta1', 'std_err'])
    npt.assert_array_equal(fset.channel, np.arange(n_channels))
    npt.assert_array_equal(sorted(fset.amplitude.coords),
                           ['channel', 'name', 'target'])
    npt.assert_array_equal(fset.target.values, targets)
Exemplo n.º 45
0
def printFrequency(dataTable):
    """ This function obtain the frequency using cesium, which implies the period.

        Arguments:
            dataTable (array-like) : this is the .tbl data file downloaded from Caltech
            IRSA website
    """

    # freq1_amplitude1: Get the amplitude of the jth harmonic of the ith frequency from a fitted Lomb-Scargle model.

    features_to_use = ["freq1_freq", "amplitude", "freq1_amplitude1"]

    fset_cesium = featurize.featurize_time_series(
        times=dataTable["obsmjd"],
        values=dataTable["mag_autocorr"],
        errors=dataTable["magerr_auto"],
        features_to_use=features_to_use)

    print(fset_cesium)
Exemplo n.º 46
0
def test_featurize_time_series_multiple_multichannel():
    """Test featurize wrapper function for multiple multichannel time series"""
    n_series = 5
    n_channels = 3
    list_of_series = [sample_time_series(channels=n_channels)
                      for i in range(n_series)]
    times, values, errors = [list(x) for x in zip(*list_of_series)]
    features_to_use = ['amplitude', 'std_err']
    targets = np.array(['class1', 'class1', 'class1', 'class2', 'class2'])
    meta_features = {'meta1': 0.5}
    fset = featurize.featurize_time_series(times, values, errors,
                                           features_to_use, targets,
                                           meta_features, use_celery=False)
    npt.assert_array_equal(sorted(fset.data_vars),
                           ['amplitude', 'meta1', 'std_err'])
    npt.assert_array_equal(fset.channel, np.arange(n_channels))
    npt.assert_array_equal(sorted(fset.amplitude.coords),
                           ['channel', 'name', 'target'])
    npt.assert_array_equal(fset.target.values, targets)
Exemplo n.º 47
0
def test_featurize_time_series_custom_dask_graph():
    """Test featurize wrapper function for time series w/ custom dask graph"""
    n_channels = 3
    t, m, e = sample_values(channels=n_channels)
    features_to_use = ['amplitude', 'std_err', 'test_f']
    target = 'class1'
    meta_features = {'meta1': 0.5}
    custom_functions = {'test_f': (lambda x: x.min() - x.max(), 'amplitude'),
                        'test_meta': (lambda x: 2. * x, 'meta1')}
    fset = featurize.featurize_time_series(t, m, e, features_to_use, target,
                                           meta_features,
                                           custom_functions=custom_functions,
                                           scheduler=get_sync)
    npt.assert_array_equal(sorted(fset.data_vars),
                           ['amplitude', 'meta1', 'std_err', 'test_f'])
    npt.assert_array_equal(fset.channel, np.arange(n_channels))
    npt.assert_array_equal(sorted(fset.amplitude.coords),
                           ['channel', 'name', 'target'])
    npt.assert_array_equal(fset.target.values, ['class1'])
def get_freq_features(N, subsetting_pos=None):
    if subsetting_pos is None:
        subset_times_list = times_list
        subset_flux_list = flux_list
    else:
        subset_times_list = [
            v for i, v in enumerate(times_list) if i in set(subsetting_pos)
        ]
        subset_flux_list = [
            v for i, v in enumerate(flux_list) if i in set(subsetting_pos)
        ]
    feats = featurize.featurize_time_series(
        times=subset_times_list[:N],
        values=subset_flux_list[:N],
        features_to_use=[
            'freq1_freq', 'freq1_signif', 'freq1_amplitude1', 'skew',
            'percent_beyond_1_std', 'percent_difference_flux_percentile'
        ],
        scheduler=None)
    feats['object_pos'] = subsetting_pos[:N]
    return feats
Exemplo n.º 49
0
def test_featurize_time_series_celery():
    """Test `featurize_time_series` with Celery.

    The actual featurization work is being done by
    `featurize_tools.featurize_single_time_series`, which is called by both the
    Celery and non-Celery versions; thus, besides the above tests, we only need
    to check that the Celery task is configured properly."""
    t, m, e = sample_time_series()
    features_to_use = ['amplitude', 'std_err', 'test_f']
    # This ideally would be a dummy lambda function but celery can't do that
    from cesium.science_features import lomb_scargle_fast as lsf
    custom_functions = {'test_f': lsf.lomb_scargle_fast_period}
    target = 'class1'
    meta_features = {'meta1': 0.5}
    fset = featurize.featurize_time_series(t, m, e, features_to_use, target,
                                           meta_features,
                                           custom_functions=custom_functions,
                                           use_celery=True)
    npt.assert_array_equal(sorted(fset.data_vars),
                           ['amplitude', 'meta1', 'std_err', 'test_f'])
    npt.assert_array_equal(fset.target.values, ['class1'])
Exemplo n.º 50
0
def try_cesium(df):

    from cesium import featurize

    features_to_use = [
        "amplitude",
        "percent_beyond_1_std",
        "maximum",
        "max_slope",
        "median",
        "median_absolute_deviation",
        "percent_close_to_median",
        "minimum",
        "skew",
        "std",
        "weighted_average",
    ]
    fset_cesium = featurize.featurize_time_series(
        times=eeg["times"],
        values=eeg["measurements"],
        errors=None,
        features_to_use=features_to_use,
    )
    print(fset_cesium.head())
Exemplo n.º 51
0
# By default, the time series will featurized in parallel using the
# ``dask.threaded`` scheduler; other approaches, including serial and
# distributed approaches, can be implemented by passing in other ``dask``
# schedulers as the ``get`` argument to ``featurize_time_series``.
#
# .. |cesium.featurize| replace:: ``cesium.featurize``
# .. _cesium.featurize: http://cesium-ml.org/docs/api/cesium.featurize.html

from cesium import featurize
features_to_use = [
    "amplitude", "percent_beyond_1_std", "maximum", "max_slope", "median",
    "median_absolute_deviation", "percent_close_to_median", "minimum", "skew",
    "std", "weighted_average"
]
fset_cesium = featurize.featurize_time_series(times=eeg["times"],
                                              values=eeg["measurements"],
                                              errors=None,
                                              features_to_use=features_to_use)
print(fset_cesium.head())

###############################################################################
# The output of ``featurize_time_series`` is a ``pandas.DataFrame`` which contains all
# the feature information needed to train a machine learning model: feature
# names are stored as column indices (as well as channel numbers, as we'll see
# later for multi-channel data), and the time series index/class label are
# stored as row indices.

###############################################################################
# Custom feature functions
# ~~~~~~~~~~~~~~~~~~~~~~~~
# Custom feature functions not built into ``cesium`` may be passed in using the
# ``custom_functions`` keyword, either as a dictionary ``{feature_name: function}``, or as a
Exemplo n.º 52
0
# .. _cesium.featurize: http://cesium-ml.org/docs/api/cesium.featurize.html

from cesium import featurize
features_to_use = ["amplitude",
                   "percent_beyond_1_std",
                   "maximum",
                   "max_slope",
                   "median",
                   "median_absolute_deviation",
                   "percent_close_to_median",
                   "minimum",
                   "skew",
                   "std",
                   "weighted_average"]
fset_cesium = featurize.featurize_time_series(times=eeg["times"],
                                              values=eeg["measurements"],
                                              errors=None,
                                              features_to_use=features_to_use)
print(fset_cesium.head())

###############################################################################
# The output of ``featurize_time_series`` is a ``pandas.DataFrame`` which contains all
# the feature information needed to train a machine learning model: feature
# names are stored as column indices (as well as channel numbers, as we'll see
# later for multi-channel data), and the time series index/class label are
# stored as row indices.

###############################################################################
# Custom feature functions
# ~~~~~~~~~~~~~~~~~~~~~~~~
# Custom feature functions not built into ``cesium`` may be passed in using the
# ``custom_functions`` keyword, either as a dictionary ``{feature_name: function}``, or as a
Exemplo n.º 53
0
    def get_cesium_features(self, recompute=False):
        """
        Compute all relevant cesium features.
        """

        cesium = getattr(self, 'cesium', None)
        if cesium is not None:
            if not recompute:
                return cesium

        cesium = {}
        outlc = self.get_lc(recompute=recompute)

        for i, pb in enumerate(outlc):
            tlc = outlc.get(pb)
            ttime, tFlux, tFluxErr, tFluxUnred, tFluxErrUnred, tFluxRenorm, tFluxErrRenorm, tphotflag, tzeropoint, tobsId = tlc

            photmask = tphotflag >= constants.GOOD_PHOTFLAG
            ttime = ttime[photmask]
            tFluxRenorm = tFluxRenorm[photmask]
            tFluxErrRenorm = tFluxErrRenorm[photmask]

            features_general = ['flux_percentile_ratio_mid20',
                                'flux_percentile_ratio_mid50',
                                'flux_percentile_ratio_mid65',
                                'flux_percentile_ratio_mid80',
                                'max_slope',
                                'maximum',
                                'median',
                                'median_absolute_deviation',
                                'percent_amplitude',
                                'period_fast',
                                'qso_log_chi2_qsonu',
                                'qso_log_chi2nuNULL_chi2nu',
                                'fold2P_slope_90percentile',
                                'freq1_amplitude1',
                                'freq1_amplitude2',
                                'freq1_amplitude3',
                                'freq1_amplitude4',
                                'freq1_freq',
                                'freq1_lambda',
                                'freq1_rel_phase2',
                                'freq1_rel_phase3',
                                'freq1_rel_phase4',
                                'freq1_signif',
                                'freq2_amplitude1',
                                'freq2_amplitude2',
                                'freq2_amplitude3',
                                'freq2_amplitude4',
                                'freq2_rel_phase2',
                                'freq2_rel_phase3',
                                'freq2_rel_phase4',
                                'freq3_amplitude1',
                                'freq3_amplitude2',
                                'freq3_amplitude3',
                                'freq3_amplitude4',
                                'freq3_rel_phase2',
                                'freq3_rel_phase3',
                                'freq3_rel_phase4',
                                'freq_amplitude_ratio_21',
                                'freq_amplitude_ratio_31',
                                'freq_n_alias',
                                'freq_signif_ratio_21',
                                'freq_signif_ratio_31',
                                'freq_varrat',
                                'freq_y_offset',
                                'medperc90_2p_p',
                                'p2p_scatter_pfold_over_mad',
                                'p2p_ssqr_diff_over_var',
                                'scatter_res_raw']

            if len(tFluxRenorm) <= 1:  # if t Flux is empty
                fset_cesium = pd.DataFrame({f: {'channel': {0: 0}} for f in features_general})
            else:
                fset_cesium = featurize.featurize_time_series(times=ttime,
                                                              values=tFluxRenorm,
                                                              errors=tFluxErrRenorm,
                                                              features_to_use=features_general)

            cesium[pb] = fset_cesium

        self.cesium = cesium
        return cesium