def provide_regression_model(self, features_and_labels): t.manual_seed(12) model = PytorchModel(RegressionModule, features_and_labels, nn.MSELoss, lambda params: SGD(params, lr=0.01, momentum=0.0)) return model
def test_regularized_loss(self): df = pd.DataFrame({ "f": np.sin(np.linspace(0, 12, 40)), "l": np.sin(np.linspace(5, 17, 40)) }) class TestModel(PytorchNN): def __init__(self): super().__init__() self.net = nn.Sequential(nn.Linear(1, 3), nn.ReLU(), nn.Linear(3, 2), nn.ReLU(), nn.Linear(2, 1), nn.Sigmoid()) def forward_training(self, x): return self.net(x) def L2(self) -> Dict[str, float]: return {'**/2/**/weight': 99999999999.99} fit = df.model.fit( PytorchModel(TestModel, FeaturesAndLabels(["f"], ["l"]), nn.MSELoss, Adam), FittingParameter(epochs=1000, splitter=naive_splitter(0.5))) print(fit.model._current_model.net.net[2].weight.detach().numpy()) print( fit.model._current_model.net.net[2].weight.norm().detach().item()) self.assertLess( fit.model._current_model.net.net[2].weight.norm().detach().item(), 0.1)
def provide_classification_model(self, features_and_labels): t.manual_seed(42) model = PytorchModel(ClassificationModule, features_and_labels, nn.MSELoss, lambda params: SGD(params, lr=0.03)) return model
def test_make_model(self): notebooks_path = os.path.join(PWD, '..', 'examples') df = pd.read_csv(os.path.join(notebooks_path, 'SPY.csv')) with df.model("/tmp/pijsfnwuacpa.model") as m: from torch import nn from torch.optim import SGD from pandas_ml_common.utils.column_lagging_utils import lag_columns from pandas_ml_utils import FeaturesAndLabels, RegressionSummary, FittingParameter from pandas_ml_utils_torch import PytorchModel from pandas_ml_utils_torch.merging_cross_folds import take_the_best def net_provider(): from pandas_ml_utils_torch import PytorchNN class Net(PytorchNN): def __init__(self): super().__init__() self.net = nn.Sequential( nn.Linear(10, 4), nn.Tanh(), nn.Linear(4, 4), nn.Tanh(), nn.Linear(4, 1), nn.Tanh(), ) def L1(self): # path to the parameters which should be regularized # the path is constructed from self.named_parameters() and allows the use of wildcards return {'net/0/**/weight': 0.02} def L2(self): return { 'net/0/**/weight': 0.02, 'net/2/**/weight': 0.05 } def forward_training(self, x): return self.net(x) return Net() fit = m.fit( PytorchModel( net_provider, FeaturesAndLabels( [lambda df: lag_columns(df["Close"].pct_change(), range(10))], [lambda df: df["Close"].pct_change().shift(-1)]), nn.MSELoss, lambda params: SGD(params, lr=0.01, momentum=0.0), merge_cross_folds=take_the_best, summary_provider=RegressionSummary ), FittingParameter(epochs=2), verbose=1 )
def test_probabilistic(self): def create_sine_data(n=300): np.random.seed(32) n = 300 x = np.linspace(0, 1 * 2 * np.pi, n) y1 = 3 * np.sin(x) y1 = np.concatenate( (np.zeros(60), y1 + np.random.normal(0, 0.15 * np.abs(y1), n), np.zeros(60))) x = np.concatenate( (np.linspace(-3, 0, 60), np.linspace(0, 3 * 2 * np.pi, n), np.linspace(3 * 2 * np.pi, 3 * 2 * np.pi + 3, 60))) y2 = 0.1 * x + 1 y = y1 + y2 return x, y df = pd.DataFrame(np.array(create_sine_data(300)).T, columns=["x", "y"]) with df.model() as m: from pandas_ml_utils import FeaturesAndLabels from pandas_ml_utils_torch import PytorchNN, PytorchModel from pandas_ml_utils_torch.loss import HeteroscedasticityLoss from pandas_ml_common.sampling.splitter import duplicate_data from torch.optim import Adam from torch import nn class Net(PytorchNN): def __init__(self): super().__init__() self.l = nn.Sequential( nn.Linear(1, 20), nn.ReLU(), nn.Linear(20, 50), nn.ReLU(), nn.Linear(50, 20), nn.ReLU(), nn.Linear(20, 2), ) def forward_training(self, x): return self.l(x) fit = m.fit( PytorchModel(Net, FeaturesAndLabels(["x"], ["y"]), HeteroscedasticityLoss, Adam, restore_best_weights=True), FittingParameter(batch_size=128, epochs=10, splitter=duplicate_data()))
def provide_linear_regression_model(self): from pandas_ml_utils_torch import PytorchModel, PytorchNN from pandas_ml_utils import FeaturesAndLabels from torch.optim import Adam from torch import nn import torch as t class Net(PytorchNN): def __init__(self): super(Net, self).__init__() self.net = nn.Linear(1, 1) def forward_training(self, *input) -> t.Tensor: return self.net(input[0]) return [ ( PytorchModel(Net, FeaturesAndLabels(["x"], ["y"]), nn.MSELoss, Adam), FittingParameter(epochs=5000, context="epoch fit"), ), ( PytorchModel(Net, FeaturesAndLabels(["x"], ["y"]), nn.MSELoss, Adam), FittingParameter(epochs=5000, batch_size=64, context="epoch fit batched"), ), ( PytorchModel(Net, FeaturesAndLabels(["x"], ["y"]), nn.MSELoss, Adam), FittingParameter(epochs=1, fold_epochs=5000, context="fold epoch fit"), ), ]
def test_mult_epoch_cross_validation(self): df = pd.DataFrame({ "a": [ 1, 0, 1, 0, 1, 0, 1, 0, ], "b": [ 0, 1, 0, 1, 1, 0, 1, 0, ], }) with df.model() as m: class NN(PytorchNN): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self.nn = nn.Sequential( nn.Linear(1, 2), nn.ReLU(), nn.Linear(2, 1), ) def forward_training(self, x): return self.nn(x) fit = m.fit( PytorchModel(NN, FeaturesAndLabels(["a"], ["b"]), nn.MSELoss, Adam), FittingParameter(splitter=naive_splitter(0.5), epochs=2, fold_epochs=10, batch_size=2)) print(fit)
def test_multi_objective_loss(self): df = pd.DataFrame( np.array([ # train [0, 0, 0], [0, 1, 1], [1, 0, 1], [1, 1, 1], # test [0, 0, 0], [0, 1, 1], [1, 0, 1], [1, 1, 1], ]), columns=["f1", "f2", "l"]) class XorModule(PytorchNN): def __init__(self): super().__init__() self.x1 = nn.Linear(2, 1) self.s1 = nn.Sigmoid() self.x2 = nn.Linear(2, 1) self.s2 = nn.Sigmoid() self.s = nn.Softmax() def forward_training(self, x): return self.s1(self.x1(x)), self.s2(self.x2(x)) def forward_predict(self, x): return self.s1(self.x1(x)) fit = df.model.fit( PytorchModel( XorModule, FeaturesAndLabels(["f1", "f2"], ["l"]), lambda: MultiObjectiveLoss( (1, nn.MSELoss(reduction='none')), (1, nn.L1Loss(reduction='none')), on_epoch=lambda criterion, epoch: criterion.update_weights( (0, 1.1))), Adam), FittingParameter(splitter=naive_splitter(0.5))) print(fit.test_summary.df)
def provide_non_linear_regression_model(self): from pandas_ml_utils_torch import PytorchModel, PytorchNN from pandas_ml_utils import FeaturesAndLabels from torch.optim import Adagrad from torch import nn import torch as t # t.manual_seed(0) class Net(PytorchNN): def __init__(self): super().__init__() self.net = nn.Sequential(nn.Linear(1, 200), nn.ReLU(), nn.Linear(200, 200), nn.ReLU(), nn.Linear(200, 200), nn.ReLU(), nn.Linear(200, 1), nn.ReLU()) def forward_training(self, *input) -> t.Tensor: return self.net(input[0]) t.manual_seed(0) model = PytorchModel(Net, FeaturesAndLabels(["x"], ["y"]), nn.MSELoss, Adagrad) return [( model, FittingParameter(epochs=600, batch_size=64, context="epoch fit batched"), ), ( model, FittingParameter(epochs=600, context="epoch fit"), ), ( model, FittingParameter(epochs=1, fold_epochs=600, context="fold epoch fit"), )]
def test_pytorch_mfs(self): df = pd.DataFrame({ "a": [ 1, 0, 1, 0, 1, 0, 1, 0, ], "b": [ [0, 0], [0, 0], [1, 1], [1, 1], [0, 0], [0, 0], [1, 1], [1, 1], ], "c": [ 1, 0, 0, 1, 1, 0, 0, 1, ] }) def module_provider(): class ClassificationModule(PytorchNN): def __init__(self): super().__init__() self.net0 = nn.Sequential(nn.Linear(1, 5), nn.ReLU(), nn.Linear(5, 1), nn.Sigmoid()) self.net1 = nn.Sequential(nn.Linear(2, 5), nn.ReLU(), nn.Linear(5, 1), nn.Sigmoid()) def forward_training(self, x) -> t.Tensor: x0, x1 = x return self.net0(x0) + self.net1(x1) return ClassificationModule() model = PytorchModel( module_provider, FeaturesAndLabels(features=(["a"], ["b"]), labels=["c"]), nn.MSELoss, lambda params: Adam(params, lr=0.03)) fl: FeaturesWithLabels = df._.extract(model.features_and_labels) self.assertIsInstance(fl.features_with_required_samples.features, MultiFrameDecorator) print(fl.features_with_required_samples.features) fit = df.model.fit(model, fold_epochs=10) print(fit.test_summary.df) self.assertIn(FEATURE_COLUMN_NAME, fit.test_summary.df) np.testing.assert_almost_equal( np.array([0, 0, 1]), fit.test_summary.df["label"].values.squeeze())
def test_probabilistic_model_with_callback(self): try: pandas_ml_quant_data_provider = importlib.import_module( "pandas_ml_quant") from pandas_ml_quant import PricePredictionSummary from pandas_ml_quant.model.summary.price_prediction_summary import PriceSampledSummary except: print("pandas_ml_quant not found, skipping!") return df = pd.DataFrame({ "Returns": np.random.normal(-0.02, 0.03, 500) + np.random.normal(0.03, 0.02, 500) }) fl = PostProcessedFeaturesAndLabels( features=["Returns"], feature_post_processor=lambda df: df.ta.rnn(20), labels=[ lambda df: df["Returns"].shift(-1).rename("Future_Returns") ], targets=lambda df: (1 + df["Returns"]).cumprod().rename("Close")) model_factory = PytorchNNFactory.create( nn.Sequential( nn.Linear(20, 10), nn.Tanh(), nn.Linear(10, 6), LambdaSplitter( lambda x: T.softmax(x[..., :2], dim=1), lambda x: T.exp(x[..., 2:4]), # enforce one mean positive and the other negativ lambda x: T.cat([T.exp(x[..., 4:5]), -T.exp(x[..., 5:6])], dim=1), )), predictor=lambda n, i: T.cat(n(i), dim=1), trainer=lambda n, i: n(i)) def dist(probs, scales, locs): return MixtureSameFamily(Categorical(probs=probs), Normal(loc=locs, scale=scales)) def loss(y_pred): probs, scales, locs = y_pred return dist(probs, scales, locs) def cdf_cb(arg): probs, scales, locs = arg[..., :2], arg[..., 2:4], arg[..., 4:6] return dist(probs, scales, locs) summary_provider = PriceSampledSummary.with_reconstructor( sampler=wrap_applyable(lambda params, samples: cdf_cb(params). sample([int(samples.item())]), nr_args=2), samples=100, confidence=0.8) model = PytorchModel(module_provider=model_factory, features_and_labels=fl, criterion_provider=lambda: DistributionNLL( loss, penalize_toal_variance_lambda=1.1), optimizer_provider=Adam, summary_provider=summary_provider) fit = df.model.fit( model, FittingParameter(epochs=10, batch_size=6, splitter=naive_splitter(0.25)), #verbose=1, callbacks=[ TestConfidenceInterval( TestConfidenceInterval.CdfConfidenceInterval( wrap_applyable( lambda params, val: cdf_cb(params).cdf(val), nr_args=2), interval=0.8), wrap_applyable(lambda params: cdf_cb(params).variance), early_stopping=True) ]) print(fit.test_summary.calc_scores())