示例#1
0
        def f(**kwargs):

            kwargs['objective'] = 'reg:squarederror'

            kwargs = Jsonize(kwargs)()

            model = Model(inputs=inputs,
                          outputs=outputs,
                          lookback=1,
                          batches="2d",
                          val_data="same",
                          test_fraction=0.3,
                          model={"xgboostregressor": kwargs},
                          transformation=None,
                          data=data,
                          prefix='testing',
                          verbosity=0)

            model.fit(indices="random")

            t, p = model.predict(indices=model.test_indices, prefix='test')
            mse = RegressionMetrics(t, p).mse()
            print(f"Validation mse {mse}")

            return mse
示例#2
0
    def test_random_idx_with_nan_inputs_outputs(self):
        """
        Test that when nans are present in inputs and outputs and we use random indices, then x,y data is correctly made.
        """

        df = get_df_with_nans(inputs=True, outputs=True, frac=0.1)

        model = Model(inputs=['in1', 'in2'],
                      outputs=['out1'],
                      transformation=None,
                      val_data='same',
                      test_fraction=0.3,
                      epochs=1,
                      data=df,
                      input_nans={'fillna': {
                          'method': 'bfill'
                      }},
                      verbosity=1)

        model.fit(indices='random')

        x, _, y = model.train_data(indices=model.train_indices)

        # for i in range(100):
        #     idx = model.train_indices[i]
        #     df_x = df[['in1', 'in2']].iloc[idx]
        #     if idx > model.lookback and int(df_x.isna().sum()) == 0:
        #         self.assertAlmostEqual(float(df['out1'].iloc[idx]), y[i], 6)
        #         self.assertTrue(np.allclose(df[['in1', 'in2']].iloc[idx], x[0][i, -1]))

        assert np.max(model.test_indices) < (
            model.data.shape[0] - int(model.data[model.out_cols].isna().sum()))
        assert np.max(model.train_indices) < (
            model.data.shape[0] - int(model.data[model.out_cols].isna().sum()))
        return
示例#3
0
def run_same_train_val_data(**kwargs):

    model = Model(data=nasdaq_df,
                  val_data="same",
                  test_fraction=0.2,
                  epochs=1,
                  verbosity=0)

    model.fit(**kwargs)

    x, _, y = model.train_data(indices=model.train_indices)
    return x, y
示例#4
0
    def fn(**suggestion):
        model = Model(inputs=inputs,
                      outputs=outputs,
                      model={"xgboostregressor": suggestion},
                      data=data,
                      prefix=f'test_{algorithm}_xgboost_{backend}',
                      verbosity=0)

        model.fit(indices="random")

        t, p = model.predict(indices=model.test_indices, prefix='test')
        mse = RegressionMetrics(t, p).mse()

        return mse
示例#5
0
    def objective_fn(**suggestion):

        print(suggestion, 'suggestion')

        model = Model(model={
            'layers': {
                'lstm': {
                    'config': {
                        'units': 64,
                        'activation': suggestion['activation'],
                        'dropout': 0.2,
                        'recurrent_dropout': 0.2
                    }
                }
            }
        },
                      inputs=inputs,
                      outputs=outputs,
                      lookback=int(suggestion['lookback']),
                      lr=float(suggestion['lr']),
                      batch_size=int(suggestion['batch_size']),
                      data=data['224206'],
                      verbosity=0,
                      epochs=500,
                      prefix=_suffix)

        h = model.fit()
        return np.min(h.history['val_loss'])
示例#6
0
        def fn(**suggestion):

            model = Model(inputs=inputs,
                          outputs=outputs,
                          model={"xgboostregressor": suggestion},
                          data=data,
                          prefix='test_tpe_xgboost',
                          verbosity=0)

            model.fit(indices="random")

            t, p = model.predict(indices=model.test_indices, prefix='test')
            mse = RegressionMetrics(t, p).mse()
            print(f"Validation mse {mse}")

            return mse
示例#7
0
    def test_random_idx_with_nan_in_outputs(self):
        # testing that if output contains nans and we use random indices, then correct examples are assinged
        # for training and testing given val_data is 'same'.
        df = get_df_with_nans(inputs=False, outputs=True, frac=0.8)

        model = Model(inputs=['in1', 'in2'],
                      outputs=['out1'],
                      transformation=None,
                      val_data='same',
                      test_fraction=0.3,
                      epochs=1,
                      data=df,
                      verbosity=0)

        model.fit(indices='random')
        idx5 = [50, 0, 72, 153, 39, 31, 170, 8]  # last 8 train indices
        self.assertTrue(np.allclose(idx5, model.train_indices[-8:]))

        x, _, y = model.train_data(indices=model.train_indices)

        eighth_non_nan_val_4m_st = df['out1'][df['out1'].notnull()].iloc[8]
        # the last training index is 8, so the last y value must be 8th non-nan value
        self.assertAlmostEqual(float(y[-1]), eighth_non_nan_val_4m_st)

        # checking that x values are also correct
        eighth_non_nan_val_4m_st = df[['in1',
                                       'in2']][df['out1'].notnull()].iloc[8]
        self.assertTrue(
            np.allclose(df[['in1', 'in2']].iloc[86], eighth_non_nan_val_4m_st))
        self.assertTrue(np.allclose(x[0][-1, -1], eighth_non_nan_val_4m_st))

        xx, _, yy = model.test_data(indices=model.test_indices)
        # the second test index is 9, so second value of yy must be 9th non-nan value
        self.assertEqual(model.test_indices[2], 10)
        self.assertAlmostEqual(float(yy[2]),
                               df['out1'][df['out1'].notnull()].iloc[10])
        self.assertTrue(
            np.allclose(xx[0][2, -1],
                        df[['in1', 'in2']][df['out1'].notnull()].iloc[10]))

        assert np.max(model.test_indices) < (
            model.data.shape[0] - int(model.data[model.out_cols].isna().sum()))
        assert np.max(model.train_indices) < (
            model.data.shape[0] - int(model.data[model.out_cols].isna().sum()))
        return
示例#8
0
def build_and_run(outputs, transformation=None, indices=None):
    model = Model(model={"layers": make_layers(len(outputs['inp_1d']))},
                  lookback=lookback,
                  inputs={
                      "inp_1d": inp_1d,
                      "inp_2d": inp_2d
                  },
                  outputs=outputs,
                  data={
                      'inp_1d': make_1d(outputs['inp_1d']),
                      'inp_2d': data_2d
                  },
                  transformation=transformation,
                  epochs=2,
                  verbosity=0)

    model.fit(indices=indices)
    return model.predict(indices=model.test_indices if indices else None)
示例#9
0
    def test_ml_random_indices(self):

        model = Model(inputs=data_reg['feature_names'],
                      outputs=["target"],
                      lookback=1,
                      batches="2d",
                      val_fraction=0.0,
                      val_data="same",
                      test_fraction=0.3,
                      category="ML",
                      problem="regression",
                      model={"xgboostregressor": {}},
                      transformation=None,
                      data=df_reg,
                      verbosity=0)

        model.fit(indices="random")
        trtt, trp = model.predict(indices=model.train_indices, prefix='train')
        t, p = model.predict(indices=model.test_indices, prefix='test')
        self.assertGreater(len(t), 1)
        self.assertGreater(len(trtt), 1)
        return
示例#10
0
    def test_datetimeindex(self):
        # makes sure that using datetime_index=True during prediction, the returned values are in correct order

        model = Model(data=data1,
                      inputs=in_cols,
                      outputs=out_cols,
                      epochs=2,
                      model={
                          'layers': {
                              "LSTM": {
                                  "config": {
                                      "units": 2
                                  }
                              },
                              "Dense": {
                                  "config": {
                                      "units": 1
                                  }
                              },
                              "Reshape": {
                                  "config": {
                                      "target_shape": (1, 1)
                                  }
                              }
                          }
                      },
                      lookback=lookback,
                      verbosity=0)

        model.fit(indices="random")
        t, p = model.predict(indices=model.train_indices,
                             use_datetime_index=True)
        # the values in t must match the corresponding indices after adding 10000, because y column starts from 100000
        for i in range(100):
            self.assertEqual(int(t[i]), model.train_indices[i] + 10000)
        return
示例#11
0
    def test_multi_out_nans(self):
        """
        Test that when multiple outputs are the target and they contain nans, then we ignore these nans during
        loss calculation.
        """
        if int(''.join(tf.__version__.split('.')[0:2])) < 23 or int(
                tf.__version__[0]) < 2:
            warnings.warn(
                f"test with ignoring nan in labels can not be done in tf version {tf.__version__}"
            )
        else:
            df = get_df_with_nans(200,
                                  inputs=False,
                                  outputs=True,
                                  output_cols=['out1', 'out2'],
                                  frac=0.5)

            layers = {
                "Flatten": {
                    "config": {}
                },
                "Dense": {
                    "config": {
                        "units": 2
                    }
                },
                "Reshape": {
                    "config": {
                        "target_shape": (2, 1)
                    }
                }
            }

            model = Model(allow_nan_labels=True,
                          model={'layers': layers},
                          inputs=['in1', 'in2'],
                          outputs=['out1', 'out2'],
                          epochs=10,
                          verbosity=0,
                          data=df)

            history = model.fit()

            self.assertTrue(np.abs(np.sum(history.history['nse'])) > 0.0)
            self.assertTrue(np.abs(np.sum(history.history['val_nse'])) > 0.0)
            return
示例#12
0
    def test_nan_labels1(self):
        if int(''.join(tf.__version__.split('.')[0:2])) < 23 or int(
                tf.__version__[0]) < 2:
            warnings.warn(
                f"test with ignoring nan in labels can not be done in tf version {tf.__version__}"
            )
        else:
            df = get_df_with_nans(500,
                                  inputs=False,
                                  outputs=True,
                                  output_cols=['out1', 'out2'],
                                  frac=0.9)

            layers = {
                "Flatten": {
                    "config": {}
                },
                "Dense": {
                    "config": {
                        "units": 2
                    }
                },
                "Reshape": {
                    "config": {
                        "target_shape": (2, 1)
                    }
                }
            }

            model = Model(allow_nan_labels=1,
                          transformation=None,
                          model={'layers': layers},
                          inputs=['in1', 'in2'],
                          outputs=['out1', 'out2'],
                          epochs=10,
                          verbosity=0,
                          data=df.copy())

            history = model.fit(indices='random')

            self.assertFalse(
                any(np.isin(model.train_indices, model.test_indices)))
            self.assertTrue(np.abs(np.sum(history.history['val_nse'])) > 0.0)
            return
示例#13
0
def run_class_test(method):

    problem = "classification" if method.lower().startswith(
        "class") else "regression"

    if method not in [
            "STACKINGREGRESSOR",
            "VOTINGREGRESSOR",
            "LOGISTICREGRESSIONCV",  # has convergence issues
            "RIDGE_REGRESSION",
            "MULTIOUTPUTREGRESSOR",
            "REGRESSORCHAIN",
            "REGRESSORMIXIN",
            # classifications methods
            "STACKINGCLASSIFIER",
            "VOTINGCLASSIFIER",
            "CLASSIFIERCHAIN",
            "CLASSIFIERMIXIN",
            "MULTIOUTPUTCLASSIFIER",
            "CHECK_CLASSIFICATION_TARGETS",
            "IS_CLASSIFIER"
    ]:

        kwargs = {}
        if "CATBOOST" in method:
            kwargs = {'iterations': 2}
        elif "TPOT" in method.upper():
            kwargs = {'generations': 2, 'population_size': 2}

        print(f"testing {method}")

        model = Model(inputs=data_reg['feature_names'] if problem
                      == "regression" else data_class['feature_names'],
                      outputs=['target'],
                      val_fraction=0.2,
                      problem=problem,
                      transformation=None,
                      data=df_reg if problem == "regression" else data_class,
                      model={method: kwargs},
                      verbosity=0)

        return model.fit()
示例#14
0
    def test_as_fns(self):
        layers = {}
        for idx, act_fn in enumerate([
                'tanh', 'relu', 'elu', 'leakyrelu', 'crelu', 'selu', 'relu6',
                'sigmoid', 'hardsigmoid', 'swish'
        ]):

            layers["Dense_" + str(idx)] = {
                'config': {
                    'units': 1,
                    'activation': act_fn
                }
            }

        layers["reshape"] = {'config': {'target_shape': (1, 1)}}

        model = Model(epochs=2,
                      lookback=1,
                      model={'layers': layers},
                      data=df,
                      verbosity=0)

        history = model.fit()
        val = {
            '21_nt': [0.8971164431680119, 0.7911620726129243],
            '23_nt': [0.10781528055667877, 0.09552989155054092],
            '24_nt': [0.10781528055667877, 0.09552989155054092],
            '23_posix': [0.10781528055667877, 0.09552989155054092],
            '24_posix': [0.10781528055667877, 0.09552989155054092],
            '21_posix': [0.10688107734841351, 0.0938945620801094],
            '20_posix': [0.8971164431680119, 0.10688107734841351]
        }

        if int(tf.__version__.split('.')[0]) > 1:
            for t, p in zip(history.history['val_loss'],
                            val[version + '_' + os.name]):
                self.assertAlmostEqual(t, p, 2)
        return
示例#15
0
    def test_as_layers(self):

        layers = {}

        for lyr in [
                'PRELU', "RELU", "TANH", "ELU", "LEAKYRELU", "THRESHOLDRELU",
                "SELU", 'sigmoid', 'hardsigmoid', 'crelu', 'relu6', 'softmax',
                'softplus', 'softsign', 'swish'
        ]:
            layers[lyr] = {'config': {}}

        layers["Dense"] = {'config': {'units': 1}}
        layers["reshape"] = {'config': {'target_shape': (1, 1)}}

        model = Model(epochs=2,
                      lookback=1,
                      model={'layers': layers},
                      data=df,
                      verbosity=0)

        val = {
            '21_nt': [0.09297575600513237, 0.09400989675627566],
            '23_posix': [0.0870760977268219, 0.1053781732916832],
            '24_posix': [0.0870760977268219, 0.1053781732916832],
            '21_posix': [0.09297575600513237, 0.095427157656984],
            '20_posix': [0.09297575600513237, 0.095427157656984],
            '23_nt': [0.0870760977268219, 0.1053781732916832],
            '24_nt': [0.0870760977268219, 0.1053781732916832]
        }

        history = model.fit()
        if int(tf.__version__.split('.')[0]) > 1:
            for t, p in zip(history.history['val_loss'],
                            val[version + '_' + os.name]):
                self.assertAlmostEqual(t, p, 2)
        return
示例#16
0
#How to use AI4Water for regression problems

import pandas as pd
import numpy as np
from sklearn.datasets import load_diabetes

from AI4Water import Model

data_class = load_diabetes()
cols = data_class['feature_names'] + ['target']
df = pd.DataFrame(np.concatenate(
    [data_class['data'], data_class['target'].reshape(-1, 1)], axis=1),
                  columns=cols)

model = Model(
    data=df,
    inputs=data_class['feature_names'],
    outputs=['target'],
    lookback=1,
    batches="2d",
    val_fraction=0.0,
    model={'DecisionTreeRegressor': {
        "max_depth": 3,
        "criterion": "mae"
    }},
    transformation=None)

h = model.fit()

x, _, y = model.train_data()
示例#17
0
# Put channel and spatial attention of CBAM model for time-series prediction


from AI4Water import Model
from AI4Water.utils.datasets import arg_beach


layers = {
    "Conv1D": {"config": {"filters": 64, "kernel_size": 7}},
    "MaxPool1D": {"config": {}},
    "ChannelAttention": {"config": {"conv_dim": "1d", "in_planes": 32}},
    "SpatialAttention": {"config": {"conv_dim": "1d"}},

    "Flatten": {"config": {}},
    "Dense": {"config": {"units": 1}},
    "Reshape": {"config": {"target_shape": (1,1)}}
}

model = Model(
    model={'layers':layers},
    lookback=10,
    data=arg_beach())

history = model.fit(indices="random")
示例#18
0
        "Dense_0": {
            'units': 64,
            'activation': 'relu'
        },
        "Flatten": {},
        "Dense_3": {
            'units': 1
        },
    }
}

df = arg_beach()

input_features = list(df.columns)[0:-1]

# column in dataframe to bse used as output/target
outputs = list(df.columns)[-1]

model = Model(data=df,
              batch_size=16,
              lookback=1,
              model=mlp_model,
              inputs=input_features,
              outputs=[outputs],
              lr=0.0001)

history = model.fit(indices='random')

y, obs = model.predict()
model.view_model(st=0)
示例#19
0
        "config": {
            "units": units,
            "return_sequences": True
        }
    },
    "Flatten": {
        "config": {}
    },
    "Dense": {
        "config": {
            "units": outs
        }
    },
}

model = Model(model={'layers': layers},
              lookback=lookback,
              epochs=epochs,
              batch_size=batch_size,
              inputs=[f'in_{i}' for i in range(ins)],
              outputs=['out'],
              data=None)

x = np.random.random((examples, lookback, ins))
y = np.random.random((examples, outs, 1))
model.fit(data=(x, y))

model.plot_layer_outputs(data=(x, y))
model.plot_act_grads(data=(x, y))
model.plot_weights()
model.plot_weight_grads(data=(x, y))