def run_tests(n_cases, n_features, sparse, exposure_type, distribution, time_drift): n_intervals = 5 n_lags = np.repeat(2, n_features).astype('uint64') sim = SimuSCCS(n_cases, n_intervals, n_features, n_lags, time_drift, exposure_type, distribution, sparse, verbose=False) X, X_c, y, c, coeffs = sim.simulate() self.assertEqual(len(X), n_cases) self.assertEqual(len(y), n_cases) self.assertEqual(X[0].shape, (n_intervals, n_features)) self.assertEqual(y[0].shape, (n_intervals, )) self.assertEqual(c.shape, (n_cases, )) [ self.assertEqual(co.shape, (int(n_lags[i] + 1), )) for i, co in enumerate(coeffs) ] self.assertEqual(np.sum([1 for f in X if f.sum() <= 0]), 0) self.assertEqual(np.sum([1 for f in X_c if f.sum() <= 0]), 0)
def test_grad_loss_consistency(self): """Test longitudinal multinomial model gradient properties.""" n_intervals = 16 n_lags = 4 sim = SimuSCCS(500, n_intervals, 3, n_lags, None, True, "infinite", seed=42, verbose=False) X, y, censoring, coeffs = sim.simulate() X = LongitudinalFeaturesLagger(n_lags=n_lags) \ .fit_transform(X, censoring) model = ModelSCCS(n_intervals=n_intervals, n_lags=n_lags) \ .fit(X, y, censoring) self._test_grad(model, coeffs) X_sparse = [csr_matrix(x) for x in X] model = ModelSCCS(n_intervals=n_intervals, n_lags=n_lags) \ .fit(X_sparse, y, censoring) self._test_grad(model, coeffs)
def test_convergence_with_lags(self): """Test longitudinal multinomial model convergence.""" n_intervals = 10 n_lags = 3 n_samples = 1500 n_features = 3 sim = SimuSCCS(n_samples, n_intervals, n_features, n_lags, None, True, "short", seed=42, verbose=False) X, y, censoring, coeffs = sim.simulate() X = LongitudinalFeaturesLagger(n_lags=n_lags) \ .fit_transform(X, censoring) model = ModelSCCS(n_intervals=n_intervals, n_lags=n_lags).fit(X, y, censoring) solver = SVRG(max_iter=15, verbose=False) solver.set_model(model).set_prox(ProxZero()) coeffs_svrg = solver.solve(step=1 / model.get_lip_max()) np.testing.assert_almost_equal(coeffs, coeffs_svrg, decimal=1)
def test_LearnerSCCS_fit(self): seed = 42 n_lags = np.repeat(2, 2).astype('uint64') sim = SimuSCCS(n_cases=800, n_intervals=10, n_features=2, n_lags=n_lags, verbose=False, seed=seed, exposure_type='multiple_exposures') features, _, labels, censoring, coeffs = sim.simulate() lrn = ConvSCCS(n_lags=n_lags, penalized_features=[], tol=0, max_iter=10, random_state=seed) estimated_coeffs, _ = lrn.fit(features, labels, censoring) np.testing.assert_almost_equal(np.hstack(estimated_coeffs), np.hstack(coeffs), decimal=1)
def setUp(self): self.n_lags = np.repeat(1, 2).astype('uint64') self.seed = 42 self.coeffs = [ np.log(np.array([2.1, 2.5])), np.log(np.array([.8, .5])) ] self.n_features = len(self.n_lags) self.n_correlations = 2 # Create data sim = SimuSCCS(n_cases=500, n_intervals=10, n_features=self.n_features, n_lags=self.n_lags, verbose=False, seed=self.seed, coeffs=self.coeffs, n_correlations=self.n_correlations) _, self.features, self.labels, self.censoring, self.coeffs =\ sim.simulate()
def test_grad_loss_consistency(self): """Test longitudinal multinomial model gradient properties.""" n_lags = np.repeat(9, 3).astype(dtype="uint64") sim = SimuSCCS(500, 36, 3, n_lags, None, "single_exposure", seed=42, verbose=False) _, X, y, censoring, coeffs = sim.simulate() coeffs = np.hstack(coeffs) X, _, _ = LongitudinalFeaturesLagger(n_lags=n_lags) \ .fit_transform(X, censoring) model = ModelSCCS(n_intervals=36, n_lags=n_lags)\ .fit(X, y, censoring) self._test_grad(model, coeffs) X_sparse = [csr_matrix(x) for x in X] model = ModelSCCS(n_intervals=36, n_lags=n_lags)\ .fit(X_sparse, y, censoring) self._test_grad(model, coeffs)
def run_tests(n_samples, n_features, sparse, exposure_type, distribution, first_tick_only, censoring): n_intervals = 5 n_lags = 2 sim = SimuSCCS(n_samples, n_intervals, n_features, n_lags, None, sparse, exposure_type, distribution, first_tick_only, censoring, seed=42, verbose=False) X, y, c, coeffs = sim.simulate() self.assertEqual(len(X), n_samples) self.assertEqual(len(y), n_samples) self.assertEqual(X[0].shape, (n_intervals, n_features)) self.assertEqual(y[0].shape, (n_intervals, )) self.assertEqual(c.shape, (n_samples, )) self.assertEqual(coeffs.shape, (n_features * (n_lags + 1), ))
def test_convergence_with_lags(self): """Test longitudinal multinomial model convergence.""" n_intervals = 10 n_samples = 800 n_features = 2 n_lags = np.repeat(2, n_features).astype(dtype="uint64") sim = SimuSCCS(n_samples, n_intervals, n_features, n_lags, None, "multiple_exposures", seed=42) _, X, y, censoring, coeffs = sim.simulate() coeffs = np.hstack(coeffs) X, _, _ = LongitudinalFeaturesLagger(n_lags=n_lags) \ .fit_transform(X, censoring) model = ModelSCCS(n_intervals=n_intervals, n_lags=n_lags).fit(X, y, censoring) solver = SVRG(max_iter=15, verbose=False) solver.set_model(model).set_prox(ProxZero()) coeffs_svrg = solver.solve(step=1 / model.get_lip_max()) np.testing.assert_almost_equal(coeffs, coeffs_svrg, decimal=1)
normalized_time_drift = np.exp(time_drift(np.arange(750))) normalized_time_drift /= normalized_time_drift.sum() sim = SimuSCCS( int(n_cases), n_intervals, n_features, n_lags, time_drift=time_drift, n_correlations=n_features, coeffs=coeffs, seed=seed, verbose=False, ) features, censored_features, labels, censoring, coeffs = sim.simulate() adjacency_matrix = sim.hawkes_exp_kernels.adjacency.tobytes() # Convert to R format df = to_nonparasccs(censored_features, labels, censoring, lags) df["indiv"] = df.index df = df.astype("int64") exposures_frequencies = df.drugid.value_counts() exp_log = Experiment( experiment_id=experiment_id, version=version, description=experiment_desc, features_set=features_set,