示例#1
0
 def test_sklearn_compatibility_reg(self):
     reg = DAGRegressor(
         alpha=0.0,
         fit_intercept=True,
         dependent_target=True,
         hidden_layer_units=[0],
         standardize=True,
     )
     reg.get_params(deep=True)
示例#2
0
    def test_nonlinear_performance(self, standardize):
        np.random.seed(42)
        sm = dg.generate_structure(num_nodes=10, degree=3)
        sm.threshold_till_dag()
        data = dg.generate_continuous_dataframe(sm,
                                                n_samples=1000,
                                                intercept=True,
                                                seed=42,
                                                noise_scale=0.1,
                                                kernel=RBF(1))
        node = 1
        y = data.iloc[:, node]
        X = data.drop(node, axis=1)

        reg = DAGRegressor(
            alpha=0.0,
            fit_intercept=True,
            dependent_target=True,
            hidden_layer_units=[0],
            standardize=standardize,
        )
        linear_score = cross_val_score(reg,
                                       X,
                                       y,
                                       cv=KFold(shuffle=True,
                                                random_state=42)).mean()

        reg = DAGRegressor(
            alpha=0.1,
            fit_intercept=True,
            hidden_layer_units=[2],
            standardize=standardize,
        )
        small_nl_score = cross_val_score(reg,
                                         X,
                                         y,
                                         cv=KFold(shuffle=True,
                                                  random_state=42)).mean()

        reg = DAGRegressor(
            alpha=0.1,
            fit_intercept=True,
            hidden_layer_units=[4],
            standardize=standardize,
        )
        medium_nl_score = cross_val_score(reg,
                                          X,
                                          y,
                                          cv=KFold(shuffle=True,
                                                   random_state=42)).mean()

        assert small_nl_score > linear_score
        assert medium_nl_score > small_nl_score
示例#3
0
 def test_feature_importances(self, hidden_layer_units):
     reg = DAGRegressor(hidden_layer_units=hidden_layer_units)
     X, y = (
         pd.DataFrame(np.random.normal(size=(100, 1))),
         pd.Series(np.random.normal(size=(100, ))),
     )
     X["true_feat"] = y * -3
     reg.fit(X, y)
     assert isinstance(reg.feature_importances_, np.ndarray)
     coef_ = pd.Series(reg.feature_importances_, index=X.columns)
     # assert that the sign of the coefficient is positive for both nonlinear and linear cases
     assert coef_["true_feat"] > 0
示例#4
0
 def test_wrong_target_dist_error(self, target_dist_type):
     with pytest.raises(
             NotImplementedError,
             # match=f"Currently only implements [{', '.join(DAGRegressor._supported_types)}] dist types."
             # " Got: {target_dist_type}"
     ):
         DAGRegressor(target_dist_type=target_dist_type)
示例#5
0
 def train(self, data, train_sample_fraction, target_col):
     self.target_col = target_col
     self.features = [col for col in data.columns
                      if col not in [target_col, 'interval'] and 'fleet-dispatch' not in col]
     tabu_child_nodes = [col for col in self.generic_tabu_edges if col in self.features]
     self.regressor = DAGRegressor(threshold=0.0,
                                   alpha=0.0001,
                                   beta=0.5,
                                   fit_intercept=True,
                                   hidden_layer_units=[10],
                                   standardize=True,
                                   tabu_child_nodes=tabu_child_nodes,
                                   tabu_edges=self._expand_tabu_edges(self.features)
                                   )
     n_rows = len(data.index)
     sample_size = int(n_rows * train_sample_fraction)
     train = data.sample(sample_size, random_state=1)
     train = train.reset_index(drop=True)
     X, y = train.loc[:, self.features], np.asarray(train[target_col])
     self.regressor.fit(X, y)
示例#6
0
def test_X_dtype_prediction(standardize):
    """
    tests whether providing an int or float X returns the same prediction
    """
    training_data = pd.DataFrame(
        {"x": np.linspace(0, 500, num=500), "y": np.linspace(0, 500, num=500)}
    )

    reg = DAGRegressor(
        threshold=0.0,
        alpha=0.0001,
        beta=0.5,
        fit_intercept=True,
        hidden_layer_units=[10],
        standardize=standardize,
    )

    X = training_data.loc[:, ["x"]]
    y = training_data["y"]

    reg.fit(X, y)

    test_data_int = pd.DataFrame({"x": [0, 250, 500]})

    test_data_float = pd.DataFrame({"x": [0.0, 250.0, 500.0]})

    pred_int = reg.predict(test_data_int)
    pred_float = reg.predict(test_data_float)

    assert np.all(pred_float == pred_int)
示例#7
0
def test_independent_predictions(hidden_layer_units):
    x = np.linspace(0.0, 100, 100)
    X = pd.DataFrame({"x": x})
    Y = pd.Series(x**2, name="y")

    reg = DAGRegressor(
        threshold=0.0,
        alpha=0.0,
        beta=0.5,
        fit_intercept=True,
        hidden_layer_units=hidden_layer_units,
        standardize=False,
    )
    reg.fit(X, Y)

    pred_alone = reg.predict(pd.DataFrame({"x": [10.0]}))
    pred_joint0 = reg.predict(pd.DataFrame({"x": [10.0, 0.0]}))
    pred_joint1 = reg.predict(pd.DataFrame({"x": [10.0] + x.tolist()}))

    assert np.isclose(pred_alone[0], pred_joint0[0])
    assert np.isclose(pred_alone[0], pred_joint1[0])
    assert np.isclose(pred_joint0[0], pred_joint1[0])
示例#8
0
 def test_glm(self, target_dist_type, y):
     reg = DAGRegressor(target_dist_type=target_dist_type)
     X = np.random.normal(size=(100, 2))
     reg.fit(X, y)
     reg.predict(X)
示例#9
0
class Forecaster:
    def __init__(self, tabu_child_nodes=['hour', 'dayofweak', 'dayofyear'],
                 tabu_edges=[('demand', 'demand')]):
        self.generic_tabu_child_nodes = tabu_child_nodes
        self.generic_tabu_edges = tabu_edges

    def _expand_tabu_edges(self, data_columns):
        """Prepare the tabu_edges input for the DAGregressor

        Examples
        --------

        >>> f = Forecaster()

        >>> f._expand_tabu_edges(data_columns=['demand-1', 'demand-2', 'constraint-1',
        ...                                    'availability-1', 'availability-2'])

        Parameters
        ----------
        data_columns

        Returns
        -------

        """
        expanded_edges = []
        for generic_edge in self.generic_tabu_edges:
            first_generic_node = generic_edge[0]
            second_generic_node = generic_edge[1]
            specific_first_nodes = [col for col in data_columns if first_generic_node in col]
            specific_second_nodes = [col for col in data_columns if second_generic_node in col]
            specific_edges = product(specific_first_nodes, specific_second_nodes)
            specific_edges = [edge for edge in specific_edges if edge[0] != edge[1]]
            expanded_edges += specific_edges

        return expanded_edges

    def train(self, data, train_sample_fraction, target_col):
        self.target_col = target_col
        self.features = [col for col in data.columns
                         if col not in [target_col, 'interval'] and 'fleet-dispatch' not in col]
        tabu_child_nodes = [col for col in self.generic_tabu_edges if col in self.features]
        self.regressor = DAGRegressor(threshold=0.0,
                                      alpha=0.0001,
                                      beta=0.5,
                                      fit_intercept=True,
                                      hidden_layer_units=[10],
                                      standardize=True,
                                      tabu_child_nodes=tabu_child_nodes,
                                      tabu_edges=self._expand_tabu_edges(self.features)
                                      )
        n_rows = len(data.index)
        sample_size = int(n_rows * train_sample_fraction)
        train = data.sample(sample_size, random_state=1)
        train = train.reset_index(drop=True)
        X, y = train.loc[:, self.features], np.asarray(train[target_col])
        self.regressor.fit(X, y)

    def price_forecast_with_generation_sensitivities(self, forward_data, region, market, min_delta, max_delta, steps):
        prediction = forward_data.loc[:, ['interval']]

        if market + '-fleet-dispatch' in forward_data.columns:
            forward_data['old_demand'] = forward_data[region + '-demand'] + forward_data[market + '-fleet-dispatch']
        else:
            forward_data['old_demand'] = forward_data[region + '-demand']

        delta_step_size = max(int((max_delta - min_delta) / steps), 1)
        for delta in range(int(min_delta), int(max_delta) + delta_step_size, delta_step_size):
            forward_data[region + '-demand'] = forward_data['old_demand'] - delta
            X = forward_data.loc[:, self.features]
            Y = self.regressor.predict(X)
            prediction[delta] = Y
        return prediction

    def single_trace_forecast(self, forward_data):
        prediction = forward_data.loc[:, ['interval']]
        X = forward_data.loc[:, self.features]
        Y = self.regressor.predict(X)
        prediction[self.target_col] = Y
        return prediction
示例#10
0
data = load_boston()
X, y = data.data, data.target
names = data["feature_names"]

from sklearn.preprocessing import StandardScaler

ss = StandardScaler()
X = ss.fit_transform(X)
y = (y - y.mean()) / y.std()

from causalnex.structure.pytorch import DAGRegressor

reg = DAGRegressor(
    alpha=0.1,
    beta=0.9,
    fit_intercept=True,
    hidden_layer_units=None,
    dependent_target=True,
    enforce_dag=True,
)

from sklearn.model_selection import cross_val_score
from sklearn.model_selection import KFold

scores = cross_val_score(reg, X, y, cv=KFold(shuffle=True, random_state=42))
print(f'MEAN R2: {np.mean(scores).mean():.3f}')

X = pd.DataFrame(X, columns=names)
y = pd.Series(y, name="MEDV")
reg.fit(X, y)
print(pd.Series(reg.coef_, index=names))
reg.plot_dag(enforce_dag=True)