Python DataFrame примеры, pandas_ml_utils.pd.DataFrame Python примеры использования

Пример #1

0

Показать файл

Файл: test_pytorch_model.py Проект: GitouYou/pandas-ml-quant

    def test_callbacks(self):
        # a test with a early stopping callback and pass restore_best_weights=True as kwarg
        df = pd.DataFrame({
            "a": [
                1,
                0,
                1,
                0,
                1,
                0,
                1,
                0,
            ],
            "b": [
                0,
                1,
                0,
                1,
                1,
                0,
                1,
                0,
            ],
        })

        model = PytorchModel(FeaturesAndLabels(["a", "b"], ["b"]),
                             ClassificationModule, nn.MSELoss,
                             lambda params: SGD(params, lr=0.1, momentum=0.9))

        fit = df.model.fit(
            model,
            on_epoch=[Callbacks.early_stopping(patience=3, tolerance=-100)],
            restore_best_weights=True)
        self.assertEqual(4, len(fit.model.history["loss"]))

Пример #2

0

Показать файл

    def test_regularized_loss(self):
        df = pd.DataFrame({
            "f": np.sin(np.linspace(0, 12, 40)),
            "l": np.sin(np.linspace(5, 17, 40))
        })

        class TestModel(PytorchNN):
            def __init__(self):
                super().__init__()
                self.net = nn.Sequential(nn.Linear(1, 3), nn.ReLU(),
                                         nn.Linear(3, 2), nn.ReLU(),
                                         nn.Linear(2, 1), nn.Sigmoid())

            def forward_training(self, x):
                return self.net(x)

            def L2(self) -> Dict[str, float]:
                return {'**/2/**/weight': 99999999999.99}

        fit = df.model.fit(
            PytorchModel(TestModel, FeaturesAndLabels(["f"], ["l"]),
                         nn.MSELoss, Adam),
            FittingParameter(epochs=1000, splitter=naive_splitter(0.5)))

        print(fit.model._current_model.net.net[2].weight.detach().numpy())
        print(
            fit.model._current_model.net.net[2].weight.norm().detach().item())
        self.assertLess(
            fit.model._current_model.net.net[2].weight.norm().detach().item(),
            0.1)

Пример #3

0

Показать файл

Файл: test__training_test_data_split.py Проект: seanahmad/pandas-ml-quant

    def test_no_training_data(self):
        """given"""
        df = pd.DataFrame({"featureA": [1,2,3,4,5],
                           "labelA": [1,2,3,4,5]})

        """when"""
        train_ix, test_ix = RandomSplits(0).train_test_split(df.index)

        """then"""
        np.testing.assert_array_almost_equal(train_ix.values, df.index.values)
        self.assertEqual(0, len(test_ix))

Пример #4

0

Показать файл

Файл: test__training_test_data_split.py Проект: seanahmad/pandas-ml-quant

    def test_youngest_portion(self):
        """given"""
        df = pd.DataFrame({"featureA": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
                           "labelA": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]})

        """when"""
        train_ix, test_ix = RandomSplits(test_size=0.6, youngest_size=0.25).train_test_split(df.index)

        "then"
        self.assertEqual(6, len(test_ix))
        np.testing.assert_array_equal(test_ix[-2:], np.array([8, 9]))

Пример #5

0

Показать файл

Файл: test__training_test_data_split.py Проект: seanahmad/pandas-ml-quant

    def test_make_training_data(self):
        """given"""
        df = pd.DataFrame({"featureA": [1, 2, 3, 4, 5],
                           "labelA": [1, 2, 3, 4, 5]})

        """when"""
        train_ix, test_ix = RandomSplits(test_size=0.5).train_test_split(df.index)

        """then"""
        self.assertEqual(2, len(train_ix))
        self.assertEqual(3, len(test_ix))

Пример #6

0

Показать файл

    def test_callbacks(self):
        # a test with a early stopping callback and pass restore_best_weights=True as kwarg
        df = pd.DataFrame({
            "a": [
                1,
                0,
                1,
                0,
                1,
                0,
                1,
                0,
            ],
            "b": [
                0,
                1,
                0,
                1,
                1,
                0,
                1,
                0,
            ],
        })

        def module_provider():
            class ClassificationModule(nn.Module):
                def __init__(self):
                    super().__init__()
                    self.classifier = nn.Sequential(nn.Linear(2, 5), nn.ReLU(),
                                                    nn.Linear(5, 1),
                                                    nn.Sigmoid())

                def forward(self, x):
                    x = self.classifier(x)
                    return x

            return ClassificationModule()

        model = PytorchModel(FeaturesAndLabels(["a", "b"], ["b"]),
                             module_provider, nn.MSELoss,
                             lambda params: SGD(params, lr=0.1, momentum=0.9))

        fit = df.model.fit(model,
                           on_epoch=[
                               PytorchModel.Callbacks.early_stopping(
                                   patience=3, tolerance=-100)
                           ],
                           restore_best_weights=True)
        print(fit.model._history)
        self.assertEqual(4, len(fit.model._history[0][0]))

Пример #7

0

Показать файл

    def test_probabilistic(self):
        def create_sine_data(n=300):
            np.random.seed(32)
            n = 300
            x = np.linspace(0, 1 * 2 * np.pi, n)
            y1 = 3 * np.sin(x)
            y1 = np.concatenate(
                (np.zeros(60), y1 + np.random.normal(0, 0.15 * np.abs(y1), n),
                 np.zeros(60)))
            x = np.concatenate(
                (np.linspace(-3, 0, 60), np.linspace(0, 3 * 2 * np.pi, n),
                 np.linspace(3 * 2 * np.pi, 3 * 2 * np.pi + 3, 60)))
            y2 = 0.1 * x + 1
            y = y1 + y2
            return x, y

        df = pd.DataFrame(np.array(create_sine_data(300)).T,
                          columns=["x", "y"])
        with df.model() as m:
            from pandas_ml_utils import FeaturesAndLabels
            from pandas_ml_utils_torch import PytorchNN, PytorchModel
            from pandas_ml_utils_torch.loss import HeteroscedasticityLoss
            from pandas_ml_common.sampling.splitter import duplicate_data
            from torch.optim import Adam
            from torch import nn

            class Net(PytorchNN):
                def __init__(self):
                    super().__init__()
                    self.l = nn.Sequential(
                        nn.Linear(1, 20),
                        nn.ReLU(),
                        nn.Linear(20, 50),
                        nn.ReLU(),
                        nn.Linear(50, 20),
                        nn.ReLU(),
                        nn.Linear(20, 2),
                    )

                def forward_training(self, x):
                    return self.l(x)

            fit = m.fit(
                PytorchModel(Net,
                             FeaturesAndLabels(["x"], ["y"]),
                             HeteroscedasticityLoss,
                             Adam,
                             restore_best_weights=True),
                FittingParameter(batch_size=128,
                                 epochs=10,
                                 splitter=duplicate_data()))

Пример #8

0

Показать файл

    def test_feature_selection(self):
        df = pd.DataFrame({"featureA": [1, 2, 3, 4, 5],
                           "featureB": [5, 4, 3, 2, 1],
                           "featureC": [1, 2, 1, 2, 1],
                           "labelA": [1, 2, 3, 4, 5],
                           "labelB": [5, 4, 3, 2, 1]})


        analysis = df.model.feature_selection(FeaturesAndLabels(["featureA", "featureB", "featureC"], ["labelA"]),
                                              lags=[2], show_plots=False)


        print(analysis)
        # top features are A, B, C
        self.assertListEqual(["featureA", "featureB", "featureC"], analysis[0])
        self.assertListEqual([0, 1], analysis[1])

Пример #9

0

Показать файл

Файл: test_pytorch_model.py Проект: KIC/pandas-ml-quant

    def test_mult_epoch_cross_validation(self):
        df = pd.DataFrame({
            "a": [
                1,
                0,
                1,
                0,
                1,
                0,
                1,
                0,
            ],
            "b": [
                0,
                1,
                0,
                1,
                1,
                0,
                1,
                0,
            ],
        })

        with df.model() as m:

            class NN(PytorchNN):
                def __init__(self, *args, **kwargs):
                    super().__init__(*args, **kwargs)
                    self.nn = nn.Sequential(
                        nn.Linear(1, 2),
                        nn.ReLU(),
                        nn.Linear(2, 1),
                    )

                def forward_training(self, x):
                    return self.nn(x)

            fit = m.fit(
                PytorchModel(NN, FeaturesAndLabels(["a"], ["b"]), nn.MSELoss,
                             Adam),
                FittingParameter(splitter=naive_splitter(0.5),
                                 epochs=2,
                                 fold_epochs=10,
                                 batch_size=2))

        print(fit)

Пример #10

0

Показать файл

Файл: test__feature_selection.py Проект: KIC/pandas-ml-quant

    def test_feature_selection_classification(self):
        data = make_classification(n_samples=20,
                                   n_features=5,
                                   n_informative=4,
                                   n_redundant=1,
                                   n_classes=2)
        df = pd.DataFrame(data[0])
        df["label"] = data[1]

        report = df.model.feature_selection(
            features_and_labels=FeaturesAndLabels(features=list(range(5)),
                                                  labels=["label"],
                                                  label_type=int),
            training_data_splitter=stratified_random_splitter(0.5),
            rfecv_splits=2,
            forest_splits=2)

        print(report)

Пример #11

0

Показать файл

Файл: test_loss.py Проект: seanahmad/pandas-ml-quant

    def test_multi_objective_loss(self):
        df = pd.DataFrame(np.array([
            # train
            [0, 0, 0],
            [0, 1, 1],
            [1, 0, 1],
            [1, 1, 1],
            # test
            [0, 0, 0],
            [0, 1, 1],
            [1, 0, 1],
            [1, 1, 1],
        ]), columns=["f1", "f2", "l"])

        class XorModule(nn.Module):

            def __init__(self):
                super().__init__()
                self.x1 = nn.Linear(2, 1)
                self.s1 = nn.Sigmoid()
                self.x2 = nn.Linear(2, 1)
                self.s2 = nn.Sigmoid()
                self.s = nn.Softmax()

            def forward(self, x):
                if self.training:
                    return self.s1(self.x1(x)), self.s2(self.x2(x))
                else:
                    return self.s1(self.x1(x))

        fit = df.model.fit(
            PytorchModel(
                FeaturesAndLabels(["f1", "f2"], ["l"]),
                XorModule,
                lambda: MultiObjectiveLoss((1, nn.MSELoss(reduction='none')),
                                           (1, nn.L1Loss(reduction='none')),
                                           on_epoch=lambda criterion, epoch: criterion.update_weights((0, 1.1))),
                Adam
            ),
            NaiveSplitter(0.5)
        )

        print(fit.test_summary.df)

Пример #12

0

Показать файл

    def test_pytorch_mfs(self):
        df = pd.DataFrame({
            "a": [
                1,
                0,
                1,
                0,
                1,
                0,
                1,
                0,
            ],
            "b": [
                [0, 0],
                [0, 0],
                [1, 1],
                [1, 1],
                [0, 0],
                [0, 0],
                [1, 1],
                [1, 1],
            ],
            "c": [
                1,
                0,
                0,
                1,
                1,
                0,
                0,
                1,
            ]
        })

        def module_provider():
            class ClassificationModule(PytorchNN):
                def __init__(self):
                    super().__init__()
                    self.net0 = nn.Sequential(nn.Linear(1, 5), nn.ReLU(),
                                              nn.Linear(5, 1), nn.Sigmoid())
                    self.net1 = nn.Sequential(nn.Linear(2, 5), nn.ReLU(),
                                              nn.Linear(5, 1), nn.Sigmoid())

                def forward_training(self, x) -> t.Tensor:
                    x0, x1 = x
                    return self.net0(x0) + self.net1(x1)

            return ClassificationModule()

        model = PytorchModel(
            module_provider,
            FeaturesAndLabels(features=(["a"], ["b"]), labels=["c"]),
            nn.MSELoss, lambda params: Adam(params, lr=0.03))

        fl: FeaturesWithLabels = df._.extract(model.features_and_labels)
        self.assertIsInstance(fl.features_with_required_samples.features,
                              MultiFrameDecorator)
        print(fl.features_with_required_samples.features)

        fit = df.model.fit(model, fold_epochs=10)
        print(fit.test_summary.df)

        self.assertIn(FEATURE_COLUMN_NAME, fit.test_summary.df)
        np.testing.assert_almost_equal(
            np.array([0, 0, 1]), fit.test_summary.df["label"].values.squeeze())

Пример #13

0

Показать файл

def ta_markowitz(df: pd.DataFrame,
                 covariances=None,
                 risk_aversion=5,
                 return_period=60,
                 prices='Close',
                 expected_returns=None,
                 rebalance_trigger=None,
                 solver='cvxopt',
                 tail=None):
    assert isinstance(df.columns, pd.MultiIndex), \
        "expect multi index columns 'prices', 'expected returns' and rebalance trigger"

    # risk
    if covariances is None:
        if isinstance(df.columns, pd.MultiIndex) and prices in df.columns.get_level_values(1):
            # we need to flip levels
            cov = ta_ewma_covariance(df.cloc2(prices))
        else:
            cov = ta_ewma_covariance(df[prices])
    elif isinstance(covariances, str):
        cov = df[covariances]
    else:
        cov = covariances
    cov = cov.dropna()

    # return
    exp_ret = _default_returns_estimator(df, prices, expected_returns, return_period, len(cov.columns))

    # re-balance
    trigger = (pd.Series(np.ones(len(df)), index=df.index) if rebalance_trigger is None else df[rebalance_trigger]).dropna()

    # non negative weight constraint and weights sum to 1
    h = np.zeros(len(cov.columns)).reshape((-1, 1))
    G = -np.eye(len(h))
    A = np.ones(len(h)).reshape((1, -1))
    b = np.ones(1)

    # magic solution's
    keep_solution = (np.empty(len(h)) * np.nan)
    uninvest = np.zeros(len(h))

    # keep last solution
    last_solution = None

    def optimize(t, sigma, pi):
        nonlocal last_solution
        nr_of_assets = len(sigma)

        # only optimize if we have a re-balance trigger (early exit)
        if last_solution is not None and last_solution.sum() > 0.99:
            # so we had at least one valid solution in the past
            # we can early exit if we do not have any signal or or no signal for any currently hold asset
            if len(t.shape) > 1 and t.shape[1] == nr_of_assets:
                if t[:, last_solution >= 0.01].sum().any() < 1:
                    return keep_solution
            else:
                if t.sum().any() < 1:
                    return keep_solution

        # make sure covariance matrix is positive definite
        simga = cov_nearest(sigma)

        # we perform optimization except when all expected returns are < 0
        # then we early exit with an un-invest command
        if len(pi[:, pi[0] < 0]) == pi.shape[1]:
            return uninvest
        else:
            try:
                sol = solve_qp(risk_aversion * sigma, -pi.T, G=G, h=h, A=A, b=b, solver=solver)
                if sol is None:
                    _log.error("no solution found")
                    return uninvest
                else:
                    return sol
            except Exception as e:
                _log.error(traceback.format_exc())
                return uninvest

    index = sorted(set(df.index.intersection(cov.index.get_level_values(0)).intersection(exp_ret.index).intersection(trigger.index)))
    if tail is not None:
        index = index[-abs(tail)]

    weights = [optimize(trigger.loc[[i]].values, cov.loc[[i]].values, exp_ret[cov.columns].loc[[i]].values) for i in index]

    # turn weights into a data frame
    return pd.DataFrame(weights, index=index, columns=cov.columns)

Python DataFrame примеры использования