Exemplo n.º 1
0
 def run_tests(n_cases, n_features, sparse, exposure_type, distribution,
               time_drift):
     n_intervals = 5
     n_lags = np.repeat(2, n_features).astype('uint64')
     sim = SimuSCCS(n_cases,
                    n_intervals,
                    n_features,
                    n_lags,
                    time_drift,
                    exposure_type,
                    distribution,
                    sparse,
                    verbose=False)
     X, X_c, y, c, coeffs = sim.simulate()
     self.assertEqual(len(X), n_cases)
     self.assertEqual(len(y), n_cases)
     self.assertEqual(X[0].shape, (n_intervals, n_features))
     self.assertEqual(y[0].shape, (n_intervals, ))
     self.assertEqual(c.shape, (n_cases, ))
     [
         self.assertEqual(co.shape, (int(n_lags[i] + 1), ))
         for i, co in enumerate(coeffs)
     ]
     self.assertEqual(np.sum([1 for f in X if f.sum() <= 0]), 0)
     self.assertEqual(np.sum([1 for f in X_c if f.sum() <= 0]), 0)
Exemplo n.º 2
0
    def _bootstrap(self, p_features, p_labels, p_censoring, coeffs, rep,
                   confidence):
        # WARNING: _bootstrap inputs are already preprocessed p_features,
        # p_labels and p_censoring
        # Coeffs here are assumed to be an array (same object than self._coeffs)
        if confidence <= 0 or confidence >= 1:
            raise ValueError("`confidence_level` should be in (0, 1)")
        confidence = 1 - confidence
        if not self._fitted:
            raise RuntimeError('You must fit the model first')

        bootstrap_coeffs = []
        sim = SimuSCCS(self.n_cases,
                       self.n_intervals,
                       self.n_features,
                       self.n_lags,
                       coeffs=self._format_coeffs(coeffs))
        # TODO later: parallelize bootstrap (everything should be pickable...)
        for k in range(rep):
            y = sim._simulate_multinomial_outcomes(p_features, coeffs)
            self._model_obj.fit(p_features, y, p_censoring)
            bootstrap_coeffs.append(self._fit(True))

        bootstrap_coeffs = np.exp(np.array(bootstrap_coeffs))
        bootstrap_coeffs.sort(axis=0)
        lower_bound = np.log(bootstrap_coeffs[int(
            np.floor(rep * confidence / 2))])
        upper_bound = np.log(bootstrap_coeffs[int(
            np.floor(rep * (1 - confidence / 2)))])
        return Confidence_intervals(self._format_coeffs(coeffs),
                                    self._format_coeffs(lower_bound),
                                    self._format_coeffs(upper_bound),
                                    confidence)
Exemplo n.º 3
0
 def test_simulated_features(self):
     n_features = 3
     n_lags = np.repeat(2, n_features)
     sim = SimuSCCS(100,
                    10,
                    n_features,
                    n_lags,
                    None,
                    'multiple_exposures',
                    verbose=False)
     feat, n_samples = sim.simulate_features(100)
     self.assertEqual(100, len(feat))
     print(np.sum([1 for f in feat if f.sum() <= 0]))
Exemplo n.º 4
0
 def test_grad_loss_consistency(self):
     """Test longitudinal multinomial model gradient properties."""
     n_intervals = 16
     n_lags = 4
     sim = SimuSCCS(500, n_intervals, 3, n_lags, None,
                    True, "infinite", seed=42, verbose=False)
     X, y, censoring, coeffs = sim.simulate()
     X = LongitudinalFeaturesLagger(n_lags=n_lags) \
         .fit_transform(X, censoring)
     model = ModelSCCS(n_intervals=n_intervals, n_lags=n_lags) \
         .fit(X, y, censoring)
     self._test_grad(model, coeffs)
     X_sparse = [csr_matrix(x) for x in X]
     model = ModelSCCS(n_intervals=n_intervals, n_lags=n_lags) \
         .fit(X_sparse, y, censoring)
     self._test_grad(model, coeffs)
Exemplo n.º 5
0
 def test_convergence_with_lags(self):
     """Test longitudinal multinomial model convergence."""
     n_intervals = 10
     n_lags = 3
     n_samples = 1500
     n_features = 3
     sim = SimuSCCS(n_samples, n_intervals, n_features, n_lags, None,
                    True, "short", seed=42, verbose=False)
     X, y, censoring, coeffs = sim.simulate()
     X = LongitudinalFeaturesLagger(n_lags=n_lags) \
         .fit_transform(X, censoring)
     model = ModelSCCS(n_intervals=n_intervals,
                       n_lags=n_lags).fit(X, y, censoring)
     solver = SVRG(max_iter=15, verbose=False)
     solver.set_model(model).set_prox(ProxZero())
     coeffs_svrg = solver.solve(step=1 / model.get_lip_max())
     np.testing.assert_almost_equal(coeffs, coeffs_svrg, decimal=1)
Exemplo n.º 6
0
    def test_censoring(self):
        array_list = [np.ones((2, 3)) for i in range(3)]
        expected = [np.zeros((2, 3)) for i in range(3)]
        for i in range(1, 3):
            expected[i][:i] += 1
        censoring = np.arange(3)

        output = SimuSCCS._censor_array_list(array_list, censoring)

        for i in range(3):
            np.testing.assert_equal(output[i], expected[i])
Exemplo n.º 7
0
 def test_LearnerSCCS_fit(self):
     seed = 42
     n_lags = np.repeat(2, 2).astype('uint64')
     sim = SimuSCCS(n_cases=800,
                    n_intervals=10,
                    n_features=2,
                    n_lags=n_lags,
                    verbose=False,
                    seed=seed,
                    exposure_type='multiple_exposures')
     features, _, labels, censoring, coeffs = sim.simulate()
     lrn = ConvSCCS(n_lags=n_lags,
                    penalized_features=[],
                    tol=0,
                    max_iter=10,
                    random_state=seed)
     estimated_coeffs, _ = lrn.fit(features, labels, censoring)
     np.testing.assert_almost_equal(np.hstack(estimated_coeffs),
                                    np.hstack(coeffs),
                                    decimal=1)
Exemplo n.º 8
0
 def setUp(self):
     self.n_lags = np.repeat(1, 2).astype('uint64')
     self.seed = 42
     self.coeffs = [
         np.log(np.array([2.1, 2.5])),
         np.log(np.array([.8, .5]))
     ]
     self.n_features = len(self.n_lags)
     self.n_correlations = 2
     # Create data
     sim = SimuSCCS(n_cases=500,
                    n_intervals=10,
                    n_features=self.n_features,
                    n_lags=self.n_lags,
                    verbose=False,
                    seed=self.seed,
                    coeffs=self.coeffs,
                    n_correlations=self.n_correlations)
     _, self.features, self.labels, self.censoring, self.coeffs =\
         sim.simulate()
Exemplo n.º 9
0
 def test_grad_loss_consistency(self):
     """Test longitudinal multinomial model gradient properties."""
     n_lags = np.repeat(9, 3).astype(dtype="uint64")
     sim = SimuSCCS(500,
                    36,
                    3,
                    n_lags,
                    None,
                    "single_exposure",
                    seed=42,
                    verbose=False)
     _, X, y, censoring, coeffs = sim.simulate()
     coeffs = np.hstack(coeffs)
     X, _, _ = LongitudinalFeaturesLagger(n_lags=n_lags) \
         .fit_transform(X, censoring)
     model = ModelSCCS(n_intervals=36, n_lags=n_lags)\
         .fit(X, y, censoring)
     self._test_grad(model, coeffs)
     X_sparse = [csr_matrix(x) for x in X]
     model = ModelSCCS(n_intervals=36, n_lags=n_lags)\
         .fit(X_sparse, y, censoring)
     self._test_grad(model, coeffs)
Exemplo n.º 10
0
 def run_tests(n_samples, n_features, sparse, exposure_type,
               distribution, first_tick_only, censoring):
     n_intervals = 5
     n_lags = 2
     sim = SimuSCCS(n_samples,
                    n_intervals,
                    n_features,
                    n_lags,
                    None,
                    sparse,
                    exposure_type,
                    distribution,
                    first_tick_only,
                    censoring,
                    seed=42,
                    verbose=False)
     X, y, c, coeffs = sim.simulate()
     self.assertEqual(len(X), n_samples)
     self.assertEqual(len(y), n_samples)
     self.assertEqual(X[0].shape, (n_intervals, n_features))
     self.assertEqual(y[0].shape, (n_intervals, ))
     self.assertEqual(c.shape, (n_samples, ))
     self.assertEqual(coeffs.shape, (n_features * (n_lags + 1), ))
Exemplo n.º 11
0
 def test_convergence_with_lags(self):
     """Test longitudinal multinomial model convergence."""
     n_intervals = 10
     n_samples = 800
     n_features = 2
     n_lags = np.repeat(2, n_features).astype(dtype="uint64")
     sim = SimuSCCS(n_samples,
                    n_intervals,
                    n_features,
                    n_lags,
                    None,
                    "multiple_exposures",
                    seed=42)
     _, X, y, censoring, coeffs = sim.simulate()
     coeffs = np.hstack(coeffs)
     X, _, _ = LongitudinalFeaturesLagger(n_lags=n_lags) \
         .fit_transform(X, censoring)
     model = ModelSCCS(n_intervals=n_intervals,
                       n_lags=n_lags).fit(X, y, censoring)
     solver = SVRG(max_iter=15, verbose=False)
     solver.set_model(model).set_prox(ProxZero())
     coeffs_svrg = solver.solve(step=1 / model.get_lip_max())
     np.testing.assert_almost_equal(coeffs, coeffs_svrg, decimal=1)
Exemplo n.º 12
0
    effects_compiled = compile(effects_str, "<string>", "exec")
    exec(effects_compiled)  # create sim_effects
    td_compiled = compile(time_drift_str, "<string>", "exec")
    exec(td_compiled)  # create time_drift
    n_features = len(sim_effects)
    sim_effects = np.hstack(sim_effects)
    coeffs = np.log(sim_effects)
    normalized_time_drift = np.exp(time_drift(np.arange(750)))
    normalized_time_drift /= normalized_time_drift.sum()

    sim = SimuSCCS(
        int(n_cases),
        n_intervals,
        n_features,
        n_lags,
        time_drift=time_drift,
        n_correlations=n_features,
        coeffs=coeffs,
        seed=seed,
        verbose=False,
    )

    features, censored_features, labels, censoring, coeffs = sim.simulate()

    adjacency_matrix = sim.hawkes_exp_kernels.adjacency.tobytes()

    # Convert to R format
    df = to_nonparasccs(censored_features, labels, censoring, lags)
    df["indiv"] = df.index
    df = df.astype("int64")
Exemplo n.º 13
0
    ce = CustomEffects(lags + 1)
    effects_compiled = compile(effects_str, "<string>", "exec")
    exec(effects_compiled)  # create sim_effects
    td_compiled = compile(time_drift_str, "<string>", "exec")
    exec(td_compiled)  # create time_drift
    n_features = len(sim_effects)
    sim_effects = np.hstack(sim_effects)
    coeffs = np.log(sim_effects)
    normalized_time_drift = np.exp(time_drift(np.arange(750)))
    normalized_time_drift /= normalized_time_drift.sum()

    n_lags = np.repeat(lags, n_features + 1).astype("uint64")
    sim = SimuSCCS(n_cases,
                   n_intervals,
                   n_features + 1,
                   n_lags,
                   time_drift=time_drift,
                   exposure_type="multiple_exposures",
                   n_correlations=n_corr,
                   seed=seed)

    features, censored_features, labels, censoring, coeffs = sim.simulate()

    adjacency_matrix = sim.hawkes_exp_kernels.adjacency.tobytes()

    # Convert to R format
    df = to_nonparasccs(censored_features, labels, censoring, lags)
    df["indiv"] = df.index
    df = df.astype("int64")

    exposures_frequencies = df.drugid.value_counts()