示例#1
0
    def provide_regression_model(self, features_and_labels):
        t.manual_seed(12)

        model = PytorchModel(RegressionModule, features_and_labels, nn.MSELoss,
                             lambda params: SGD(params, lr=0.01, momentum=0.0))

        return model
示例#2
0
    def test_regularized_loss(self):
        df = pd.DataFrame({
            "f": np.sin(np.linspace(0, 12, 40)),
            "l": np.sin(np.linspace(5, 17, 40))
        })

        class TestModel(PytorchNN):
            def __init__(self):
                super().__init__()
                self.net = nn.Sequential(nn.Linear(1, 3), nn.ReLU(),
                                         nn.Linear(3, 2), nn.ReLU(),
                                         nn.Linear(2, 1), nn.Sigmoid())

            def forward_training(self, x):
                return self.net(x)

            def L2(self) -> Dict[str, float]:
                return {'**/2/**/weight': 99999999999.99}

        fit = df.model.fit(
            PytorchModel(TestModel, FeaturesAndLabels(["f"], ["l"]),
                         nn.MSELoss, Adam),
            FittingParameter(epochs=1000, splitter=naive_splitter(0.5)))

        print(fit.model._current_model.net.net[2].weight.detach().numpy())
        print(
            fit.model._current_model.net.net[2].weight.norm().detach().item())
        self.assertLess(
            fit.model._current_model.net.net[2].weight.norm().detach().item(),
            0.1)
示例#3
0
    def provide_classification_model(self, features_and_labels):
        t.manual_seed(42)

        model = PytorchModel(ClassificationModule, features_and_labels,
                             nn.MSELoss, lambda params: SGD(params, lr=0.03))

        return model
示例#4
0
    def test_make_model(self):
        notebooks_path = os.path.join(PWD, '..', 'examples')
        df = pd.read_csv(os.path.join(notebooks_path, 'SPY.csv'))

        with df.model("/tmp/pijsfnwuacpa.model") as m:
            from torch import nn
            from torch.optim import SGD
            from pandas_ml_common.utils.column_lagging_utils import lag_columns

            from pandas_ml_utils import FeaturesAndLabels, RegressionSummary, FittingParameter
            from pandas_ml_utils_torch import PytorchModel
            from pandas_ml_utils_torch.merging_cross_folds import take_the_best

            def net_provider():
                from pandas_ml_utils_torch import PytorchNN

                class Net(PytorchNN):

                    def __init__(self):
                        super().__init__()
                        self.net = nn.Sequential(
                            nn.Linear(10, 4),
                            nn.Tanh(),
                            nn.Linear(4, 4),
                            nn.Tanh(),
                            nn.Linear(4, 1),
                            nn.Tanh(),
                        )

                    def L1(self):
                        # path to the parameters which should be regularized
                        # the path is constructed from self.named_parameters() and allows the use of wildcards
                        return {'net/0/**/weight': 0.02}

                    def L2(self):
                        return {
                            'net/0/**/weight': 0.02,
                            'net/2/**/weight': 0.05
                        }

                    def forward_training(self, x):
                        return self.net(x)

                return Net()

            fit = m.fit(
                PytorchModel(
                    net_provider,
                    FeaturesAndLabels(
                        [lambda df: lag_columns(df["Close"].pct_change(), range(10))],
                        [lambda df: df["Close"].pct_change().shift(-1)]),
                    nn.MSELoss,
                    lambda params: SGD(params, lr=0.01, momentum=0.0),
                    merge_cross_folds=take_the_best,
                    summary_provider=RegressionSummary
                ),
                FittingParameter(epochs=2),
                verbose=1
            )
示例#5
0
    def test_probabilistic(self):
        def create_sine_data(n=300):
            np.random.seed(32)
            n = 300
            x = np.linspace(0, 1 * 2 * np.pi, n)
            y1 = 3 * np.sin(x)
            y1 = np.concatenate(
                (np.zeros(60), y1 + np.random.normal(0, 0.15 * np.abs(y1), n),
                 np.zeros(60)))
            x = np.concatenate(
                (np.linspace(-3, 0, 60), np.linspace(0, 3 * 2 * np.pi, n),
                 np.linspace(3 * 2 * np.pi, 3 * 2 * np.pi + 3, 60)))
            y2 = 0.1 * x + 1
            y = y1 + y2
            return x, y

        df = pd.DataFrame(np.array(create_sine_data(300)).T,
                          columns=["x", "y"])
        with df.model() as m:
            from pandas_ml_utils import FeaturesAndLabels
            from pandas_ml_utils_torch import PytorchNN, PytorchModel
            from pandas_ml_utils_torch.loss import HeteroscedasticityLoss
            from pandas_ml_common.sampling.splitter import duplicate_data
            from torch.optim import Adam
            from torch import nn

            class Net(PytorchNN):
                def __init__(self):
                    super().__init__()
                    self.l = nn.Sequential(
                        nn.Linear(1, 20),
                        nn.ReLU(),
                        nn.Linear(20, 50),
                        nn.ReLU(),
                        nn.Linear(50, 20),
                        nn.ReLU(),
                        nn.Linear(20, 2),
                    )

                def forward_training(self, x):
                    return self.l(x)

            fit = m.fit(
                PytorchModel(Net,
                             FeaturesAndLabels(["x"], ["y"]),
                             HeteroscedasticityLoss,
                             Adam,
                             restore_best_weights=True),
                FittingParameter(batch_size=128,
                                 epochs=10,
                                 splitter=duplicate_data()))
示例#6
0
    def provide_linear_regression_model(self):
        from pandas_ml_utils_torch import PytorchModel, PytorchNN
        from pandas_ml_utils import FeaturesAndLabels
        from torch.optim import Adam
        from torch import nn
        import torch as t

        class Net(PytorchNN):
            def __init__(self):
                super(Net, self).__init__()
                self.net = nn.Linear(1, 1)

            def forward_training(self, *input) -> t.Tensor:
                return self.net(input[0])

        return [
            (
                PytorchModel(Net, FeaturesAndLabels(["x"], ["y"]), nn.MSELoss,
                             Adam),
                FittingParameter(epochs=5000, context="epoch fit"),
            ),
            (
                PytorchModel(Net, FeaturesAndLabels(["x"], ["y"]), nn.MSELoss,
                             Adam),
                FittingParameter(epochs=5000,
                                 batch_size=64,
                                 context="epoch fit batched"),
            ),
            (
                PytorchModel(Net, FeaturesAndLabels(["x"], ["y"]), nn.MSELoss,
                             Adam),
                FittingParameter(epochs=1,
                                 fold_epochs=5000,
                                 context="fold epoch fit"),
            ),
        ]
示例#7
0
    def test_mult_epoch_cross_validation(self):
        df = pd.DataFrame({
            "a": [
                1,
                0,
                1,
                0,
                1,
                0,
                1,
                0,
            ],
            "b": [
                0,
                1,
                0,
                1,
                1,
                0,
                1,
                0,
            ],
        })

        with df.model() as m:

            class NN(PytorchNN):
                def __init__(self, *args, **kwargs):
                    super().__init__(*args, **kwargs)
                    self.nn = nn.Sequential(
                        nn.Linear(1, 2),
                        nn.ReLU(),
                        nn.Linear(2, 1),
                    )

                def forward_training(self, x):
                    return self.nn(x)

            fit = m.fit(
                PytorchModel(NN, FeaturesAndLabels(["a"], ["b"]), nn.MSELoss,
                             Adam),
                FittingParameter(splitter=naive_splitter(0.5),
                                 epochs=2,
                                 fold_epochs=10,
                                 batch_size=2))

        print(fit)
示例#8
0
    def test_multi_objective_loss(self):
        df = pd.DataFrame(
            np.array([
                # train
                [0, 0, 0],
                [0, 1, 1],
                [1, 0, 1],
                [1, 1, 1],
                # test
                [0, 0, 0],
                [0, 1, 1],
                [1, 0, 1],
                [1, 1, 1],
            ]),
            columns=["f1", "f2", "l"])

        class XorModule(PytorchNN):
            def __init__(self):
                super().__init__()
                self.x1 = nn.Linear(2, 1)
                self.s1 = nn.Sigmoid()
                self.x2 = nn.Linear(2, 1)
                self.s2 = nn.Sigmoid()
                self.s = nn.Softmax()

            def forward_training(self, x):
                return self.s1(self.x1(x)), self.s2(self.x2(x))

            def forward_predict(self, x):
                return self.s1(self.x1(x))

        fit = df.model.fit(
            PytorchModel(
                XorModule, FeaturesAndLabels(["f1", "f2"], ["l"]),
                lambda: MultiObjectiveLoss(
                    (1, nn.MSELoss(reduction='none')),
                    (1, nn.L1Loss(reduction='none')),
                    on_epoch=lambda criterion, epoch: criterion.update_weights(
                        (0, 1.1))), Adam),
            FittingParameter(splitter=naive_splitter(0.5)))

        print(fit.test_summary.df)
示例#9
0
    def provide_non_linear_regression_model(self):
        from pandas_ml_utils_torch import PytorchModel, PytorchNN
        from pandas_ml_utils import FeaturesAndLabels
        from torch.optim import Adagrad
        from torch import nn
        import torch as t

        # t.manual_seed(0)

        class Net(PytorchNN):
            def __init__(self):
                super().__init__()
                self.net = nn.Sequential(nn.Linear(1, 200), nn.ReLU(),
                                         nn.Linear(200, 200), nn.ReLU(),
                                         nn.Linear(200, 200), nn.ReLU(),
                                         nn.Linear(200, 1), nn.ReLU())

            def forward_training(self, *input) -> t.Tensor:
                return self.net(input[0])

        t.manual_seed(0)
        model = PytorchModel(Net, FeaturesAndLabels(["x"], ["y"]), nn.MSELoss,
                             Adagrad)

        return [(
            model,
            FittingParameter(epochs=600,
                             batch_size=64,
                             context="epoch fit batched"),
        ), (
            model,
            FittingParameter(epochs=600, context="epoch fit"),
        ),
                (
                    model,
                    FittingParameter(epochs=1,
                                     fold_epochs=600,
                                     context="fold epoch fit"),
                )]
示例#10
0
    def test_pytorch_mfs(self):
        df = pd.DataFrame({
            "a": [
                1,
                0,
                1,
                0,
                1,
                0,
                1,
                0,
            ],
            "b": [
                [0, 0],
                [0, 0],
                [1, 1],
                [1, 1],
                [0, 0],
                [0, 0],
                [1, 1],
                [1, 1],
            ],
            "c": [
                1,
                0,
                0,
                1,
                1,
                0,
                0,
                1,
            ]
        })

        def module_provider():
            class ClassificationModule(PytorchNN):
                def __init__(self):
                    super().__init__()
                    self.net0 = nn.Sequential(nn.Linear(1, 5), nn.ReLU(),
                                              nn.Linear(5, 1), nn.Sigmoid())
                    self.net1 = nn.Sequential(nn.Linear(2, 5), nn.ReLU(),
                                              nn.Linear(5, 1), nn.Sigmoid())

                def forward_training(self, x) -> t.Tensor:
                    x0, x1 = x
                    return self.net0(x0) + self.net1(x1)

            return ClassificationModule()

        model = PytorchModel(
            module_provider,
            FeaturesAndLabels(features=(["a"], ["b"]), labels=["c"]),
            nn.MSELoss, lambda params: Adam(params, lr=0.03))

        fl: FeaturesWithLabels = df._.extract(model.features_and_labels)
        self.assertIsInstance(fl.features_with_required_samples.features,
                              MultiFrameDecorator)
        print(fl.features_with_required_samples.features)

        fit = df.model.fit(model, fold_epochs=10)
        print(fit.test_summary.df)

        self.assertIn(FEATURE_COLUMN_NAME, fit.test_summary.df)
        np.testing.assert_almost_equal(
            np.array([0, 0, 1]), fit.test_summary.df["label"].values.squeeze())
    def test_probabilistic_model_with_callback(self):
        try:
            pandas_ml_quant_data_provider = importlib.import_module(
                "pandas_ml_quant")
            from pandas_ml_quant import PricePredictionSummary
            from pandas_ml_quant.model.summary.price_prediction_summary import PriceSampledSummary
        except:
            print("pandas_ml_quant not found, skipping!")
            return

        df = pd.DataFrame({
            "Returns":
            np.random.normal(-0.02, 0.03, 500) +
            np.random.normal(0.03, 0.02, 500)
        })

        fl = PostProcessedFeaturesAndLabels(
            features=["Returns"],
            feature_post_processor=lambda df: df.ta.rnn(20),
            labels=[
                lambda df: df["Returns"].shift(-1).rename("Future_Returns")
            ],
            targets=lambda df: (1 + df["Returns"]).cumprod().rename("Close"))

        model_factory = PytorchNNFactory.create(
            nn.Sequential(
                nn.Linear(20, 10),
                nn.Tanh(),
                nn.Linear(10, 6),
                LambdaSplitter(
                    lambda x: T.softmax(x[..., :2], dim=1),
                    lambda x: T.exp(x[..., 2:4]),
                    # enforce one mean positive and the other negativ
                    lambda x: T.cat([T.exp(x[..., 4:5]), -T.exp(x[..., 5:6])],
                                    dim=1),
                )),
            predictor=lambda n, i: T.cat(n(i), dim=1),
            trainer=lambda n, i: n(i))

        def dist(probs, scales, locs):
            return MixtureSameFamily(Categorical(probs=probs),
                                     Normal(loc=locs, scale=scales))

        def loss(y_pred):
            probs, scales, locs = y_pred
            return dist(probs, scales, locs)

        def cdf_cb(arg):
            probs, scales, locs = arg[..., :2], arg[..., 2:4], arg[..., 4:6]
            return dist(probs, scales, locs)

        summary_provider = PriceSampledSummary.with_reconstructor(
            sampler=wrap_applyable(lambda params, samples: cdf_cb(params).
                                   sample([int(samples.item())]),
                                   nr_args=2),
            samples=100,
            confidence=0.8)

        model = PytorchModel(module_provider=model_factory,
                             features_and_labels=fl,
                             criterion_provider=lambda: DistributionNLL(
                                 loss, penalize_toal_variance_lambda=1.1),
                             optimizer_provider=Adam,
                             summary_provider=summary_provider)

        fit = df.model.fit(
            model,
            FittingParameter(epochs=10,
                             batch_size=6,
                             splitter=naive_splitter(0.25)),
            #verbose=1,
            callbacks=[
                TestConfidenceInterval(
                    TestConfidenceInterval.CdfConfidenceInterval(
                        wrap_applyable(
                            lambda params, val: cdf_cb(params).cdf(val),
                            nr_args=2),
                        interval=0.8),
                    wrap_applyable(lambda params: cdf_cb(params).variance),
                    early_stopping=True)
            ])

        print(fit.test_summary.calc_scores())