def test_load_parameters(self): """ Test the method load_parameters. """ leaspy_object = Leaspy.load(example_logisticmodel_path) abstract_model = AbstractModel("dummy_model") abstract_model.load_parameters(leaspy_object.model.parameters) self.assertTrue(torch.equal(abstract_model.parameters['g'], torch.tensor([1.8669992685317993, 2.4921786785125732, 2.471605062484741, 2.1240732669830322]))) self.assertTrue(torch.equal(abstract_model.parameters['v0'], torch.tensor([-2.8300716876983643, -3.3241398334503174, -3.4701175689697266, -2.6136295795440674]))) self.assertTrue(torch.equal(abstract_model.parameters['betas'], torch.tensor([[0.011530596762895584, 0.06039918214082718], [0.008324957452714443, 0.048168670386075974], [0.01144738681614399, 0.0822334811091423]]))) self.assertTrue(torch.equal(abstract_model.parameters['tau_mean'], torch.tensor(75.30111694335938))) self.assertTrue(torch.equal(abstract_model.parameters['tau_std'], torch.tensor(7.103002071380615))) self.assertTrue(torch.equal(abstract_model.parameters['xi_mean'], torch.tensor(0.0))) self.assertTrue(torch.equal(abstract_model.parameters['xi_std'], torch.tensor(0.2835913300514221))) self.assertTrue(torch.equal(abstract_model.parameters['sources_mean'], torch.tensor(0.0))) self.assertTrue(torch.equal(abstract_model.parameters['sources_std'], torch.tensor(1.0))) self.assertTrue(torch.equal(abstract_model.parameters['noise_std'], torch.tensor(0.1988248974084854)))
def test_estimate(self): model_parameters_path = os.path.join(test_data_dir, 'model_parameters', 'fitted_multivariate_model.json') leaspy = Leaspy.load(model_parameters_path) ip_path = os.path.join(test_data_dir, 'io', 'outputs', 'ip_save.json') ip = IndividualParameters.load(ip_path) timepoints = {'idx1': [78, 81], 'idx2': [91]} estimations = leaspy.estimate(timepoints, ip) test = { 'idx1': [[0.9168074, 0.88841885, 0.80543965, 0.9921461], [0.98348546, 0.9773835, 0.9456895, 0.99938035]], 'idx2': [[0.9999131, 0.9998343, 0.9991264, 0.99999964]] } # Test self.assertEqual(estimations.keys(), test.keys()) for k in estimations.keys(): self.assertTrue(k in test.keys()) for v1, v2 in zip(estimations[k], test[k]): self.assertTrue((v1 - v2 < 10e-8).all())
def test_acceptation(self): n_patients = 17 n_draw = 200 # temperature_inv = 1.0 path_model_sampler = os.path.join(test_data_dir, "model_parameters", "multivariate_model_sampler.json") path_data = os.path.join(test_data_dir, "io", "data_tiny.csv") # data = Dataset(Data.from_csv_file(path_data)) leaspy = Leaspy.load(path_model_sampler) # realizations = leaspy.model.get_realization_object(n_patients) # Test with taus var_name = 'tau' gsampler = GibbsSampler( leaspy.model.random_variable_informations()[var_name], n_patients) for i in range(n_draw): gsampler._update_acceptation_rate( torch.tensor([1.0] * 10 + [0.0] * 7, dtype=torch.float32)) self.assertAlmostEqual(gsampler.acceptation_temp.mean(), 10 / 17, delta=0.05)
def test_append_spaceshifts_to_individual_parameters_dataframe(self): df = pd.DataFrame(data=[[0.1, 70, 0.1, -0.3], [0.2, 73, -0.4, 0.1], [0.3, 58, -0.6, 0.2]], index=["idx1", "idx2", "idx3"], columns=["xi", "tau", "sources_0", "sources_1"]) leaspy = Leaspy.load( os.path.join(test_data_dir, 'model_parameters', 'test_api.json')) df_w = append_spaceshifts_to_individual_parameters_dataframe( df, leaspy)
def test_compute_trajectory_of_population(self): leaspy = Leaspy.load( os.path.join(test_data_dir, 'model_parameters', 'test_api.json')) ip = IndividualParameters.load( os.path.join(test_data_dir, 'io', 'outputs', 'ip_save.json')) timepoints = [70, 71, 72, 73, 74, 75, 76] trajectory = compute_trajectory_of_population(timepoints, ip, leaspy) #self.assertTrue(torch.is_tensor(trajectory)) # TODO : choose a convention for output type : Numpy or Torch ? For now it seems numpy in api.estimate self.assertEqual(trajectory.shape[0], 7) self.assertEqual(trajectory.shape[1], 4)
def test_get_reparametrized_ages(self): leaspy = Leaspy.load( os.path.join(test_data_dir, 'model_parameters', 'test_api.json')) ip = IndividualParameters.load( os.path.join(test_data_dir, 'io', 'outputs', 'ip_save.json')) ages = {'idx1': [70, 80], 'idx3': [100]} reparametrized_ages = get_reparametrized_ages(ages, ip, leaspy) self.assertEqual(reparametrized_ages.keys(), ages.keys()) self.assertEqual(reparametrized_ages['idx1'], [78.02704620361328, 89.0787582397461]) self.assertEqual(reparametrized_ages['idx3'], [134.7211151123047])
def test_personalize_scipy(self): """ Load logistic model from file, and personalize it to data from ... :return: """ # Inputs data = Data.from_csv_file(example_data_path) # Initialize leaspy = Leaspy.load(example_logisticmodel_path) # Launch algorithm algo_personalize_settings = AlgorithmSettings('scipy_minimize', seed=0) ips, noise_std = leaspy.personalize(data, settings=algo_personalize_settings, return_noise=True) self.assertAlmostEqual(noise_std.item(), 0.1169, delta=0.01)
def test_personalize_scipy_diag_noise(self): """ Load logistic model (diag noise) from file, and personalize it to data from ... :return: """ # Inputs data = Data.from_csv_file(example_data_path) # Initialize leaspy = Leaspy.load(example_logisticmodel_diag_noise_path) # Launch algorithm algo_personalize_settings = AlgorithmSettings('scipy_minimize', seed=0) ips, noise_std = leaspy.personalize(data, settings=algo_personalize_settings, return_noise=True) diff_noise = noise_std - torch.tensor([0.3299, 0.1236, 0.1642, 0.2582]) self.assertAlmostEqual((diff_noise**2).sum(), 0., delta=0.01)
def test_personalize_modereal(self): """ Load logistic model from file, and personalize it to data from ... :return: """ # Inputs data = Data.from_csv_file(example_data_path) # Initialize leaspy = Leaspy.load(example_logisticmodel_path) # Launch algorithm path_settings = os.path.join(os.path.dirname(__file__), "data/settings_mode_real.json") algo_personalize_settings = AlgorithmSettings.load(path_settings) ips, noise_std = leaspy.personalize(data, settings=algo_personalize_settings, return_noise=True) self.assertAlmostEqual(noise_std.item(), 0.12152, delta=0.01)
def test_check_cofactors(self, get_result=False): """ Test Leaspy.simulate return a ``ValueError`` if the ``cofactor`` and ``cofactor_state`` parameters given in the ``AlgorithmSettings`` are invalid. Parameters ---------- get_result : bool If set to ``True``, return the leaspy model and result object used to do the test. Else return nothing. Returns ------- model : leaspy.Leaspy results : leaspy.io.outputs.result.Result """ data = Data.from_csv_file(example_data_path) cofactors = pd.read_csv( os.path.join(test_data_dir, "io/data/data_tiny_covariate.csv")) cofactors.columns = ("ID", "Treatments") cofactors['ID'] = cofactors['ID'].apply(lambda x: str(x)) cofactors = cofactors.set_index("ID") data.load_cofactors(cofactors, ["Treatments"]) model = Leaspy.load( os.path.join(test_data_dir, "model_parameters/multivariate_model_sampler.json")) settings = AlgorithmSettings('mode_real') individual_parameters = model.personalize(data, settings) settings = AlgorithmSettings('simulation', cofactor="dummy") self.assertRaises(ValueError, model.simulate, individual_parameters, data, settings) settings = AlgorithmSettings('simulation', cofactor="Treatments", cofactor_state="dummy") self.assertRaises(ValueError, model.simulate, individual_parameters, data, settings) if get_result: return model, individual_parameters, data
def get_individual_parameters(data): # Data leaspy_data = convert_data(data) # Algorithm settings = AlgorithmSettings('scipy_minimize') # Leaspy #leaspy = Leaspy.load(data['model']) # TO CORRECT #if data['model']['name'] == 'logistic_parallel': leaspy = Leaspy.load(data['model']) #elif data['model']['name'] == 'logistic': # leaspy = Leaspy.load(os.path.join(os.getcwd(), 'data', 'example', 'parkinson_model.json')) individual_parameters = leaspy.personalize(leaspy_data, settings=settings) output = { 'individual_parameters' : individual_parameters["patient"], 'scores': leaspy_data.to_dataframe().values.T.tolist() } return output
def test_get_error_distribution_dataframe(self): model_path = os.path.join(test_data_dir, "model_parameters", "fitted_multivariate_model.json") leaspy_session = Leaspy.load(model_path) self.results.get_error_distribution_dataframe(leaspy_session.model)
def test_sample(self): """ Test if samples values are the one expected :return: """ # TODO change this instanciation n_patients = 17 n_draw = 50 temperature_inv = 1.0 path_model_sampler = os.path.join(test_data_dir, "model_parameters", "multivariate_model_sampler.json") path_data = os.path.join(test_data_dir, "io", "data", "data_tiny.csv") data = Dataset(Data.from_csv_file(path_data)) leaspy = Leaspy.load(path_model_sampler) realizations = leaspy.model.get_realization_object(n_patients) # Test with taus var_name = 'tau' gsampler = GibbsSampler( leaspy.model.random_variable_informations()[var_name], n_patients) random_draws = [] for i in range(n_draw): gsampler.sample(data, leaspy.model, realizations, temperature_inv) random_draws.append( realizations[var_name].tensor_realizations.clone()) stack_random_draws = torch.stack(random_draws) stack_random_draws_mean = (stack_random_draws[1:, :, :] - stack_random_draws[:-1, :, :]).mean(dim=0) stack_random_draws_std = (stack_random_draws[1:, :, :] - stack_random_draws[:-1, :, :]).std(dim=0) self.assertAlmostEqual(stack_random_draws_mean.mean(), 0.0160, delta=0.05) self.assertAlmostEqual(stack_random_draws_std.mean(), 0.0861, delta=0.05) # Test with g var_name = 'g' gsampler = GibbsSampler( leaspy.model.random_variable_informations()[var_name], n_patients) random_draws = [] for i in range(n_draw): gsampler.sample(data, leaspy.model, realizations, temperature_inv) random_draws.append( realizations[var_name].tensor_realizations.clone()) stack_random_draws = torch.stack(random_draws) stack_random_draws_mean = (stack_random_draws[1:, :] - stack_random_draws[:-1, :]).mean(dim=0) stack_random_draws_std = (stack_random_draws[1:, :] - stack_random_draws[:-1, :]).std(dim=0) self.assertAlmostEqual(stack_random_draws_mean.mean(), 4.2792e-05, delta=0.05) self.assertAlmostEqual(stack_random_draws_std.mean(), 0.0045, delta=0.05)
def test_usecase(self): """ Functional test of a basic analysis using leaspy package 1 - Data loading 2 - Fit logistic model with MCMC algorithm 3 - Save paramaters & reload (remove created files to keep the repo clean) 4 - Personalize model with 'mode_real' algorithm 5 - Plot results 6 - Simulate new patients """ data = Data.from_csv_file(example_data_path) # Fit algo_settings = AlgorithmSettings('mcmc_saem', n_iter=10, seed=0) leaspy = Leaspy("logistic") leaspy.model.load_hyperparameters({'source_dimension': 2}) leaspy.fit(data, algorithm_settings=algo_settings) self.model_values_test(leaspy.model) # Save parameters and check its consistency path_to_saved_model = os.path.join(test_data_dir, 'model_parameters', 'test_api-copy.json') leaspy.save(path_to_saved_model) with open( os.path.join(test_data_dir, "model_parameters", 'test_api.json'), 'r') as f1: model_parameters = json.load(f1) with open(path_to_saved_model) as f2: model_parameters_new = json.load(f2) # self.assertTrue(ordered(model_parameters) == ordered(model_parameters_new)) self.assertTrue( dict_compare_and_display(model_parameters, model_parameters_new)) # Load data and check its consistency leaspy = Leaspy.load(path_to_saved_model) os.remove(path_to_saved_model) self.assertTrue(leaspy.model.is_initialized) self.model_values_test(leaspy.model) # Personalize algo_personalize_settings = AlgorithmSettings('mode_real', seed=0) individual_parameters = leaspy.personalize( data, settings=algo_personalize_settings) # TODO REFORMAT: compute the noise std afterwards #self.assertAlmostEqual(result.noise_std, 0.21146, delta=0.01) ## Plot TODO #path_output = os.path.join(os.path.dirname(__file__), '../../_data', "_outputs") #plotter = Plotter(path_output) # plotter.plot_mean_trajectory(leaspy.model, save_as="mean_trajectory_plot") #plt.close() # Simulate simulation_settings = AlgorithmSettings('simulation', seed=0) simulation_results = leaspy.simulate(individual_parameters, data, simulation_settings) self.assertTrue(type(simulation_results) == Result) self.assertTrue(simulation_results.data.headers == data.headers) n = simulation_settings.parameters['number_of_subjects'] self.assertEqual(simulation_results.data.n_individuals, n) self.assertEqual( len(simulation_results.get_parameter_distribution('xi')), n) self.assertEqual( len(simulation_results.get_parameter_distribution('tau')), n) self.assertEqual( len( simulation_results.get_parameter_distribution('sources') ['sources0']), n) # simulation_results.data.to_dataframe().to_csv(os.path.join( # test_data_dir, "_outputs/simulation/test_api_simulation_df-post_merge-result_fix.csv"), index=False) # Test the reproducibility of simulate # round is necessary, writing and reading induces numerical errors of magnitude ~ 1e-13 # BUT ON DIFFERENT MACHINE I CAN SEE ERROR OF MAGNITUDE 1e-5 !!! # TODO: Can we improve this?? simulation_df = pd.read_csv( os.path.join( test_data_dir, "_outputs/simulation/test_api_simulation_df-post_merge-result_fix.csv" )) id_simulation_is_reproducible = simulation_df['ID'].equals( simulation_results.data.to_dataframe()['ID']) # Check ID before - str doesn't seem to work with numpy.allclose self.assertTrue(id_simulation_is_reproducible) round_decimal = 5 simulation_is_reproducible = allclose( simulation_df.loc[:, simulation_df.columns != 'ID'].values, simulation_results.data.to_dataframe(). loc[:, simulation_results.data.to_dataframe().columns != 'ID'].values, atol=10**(-round_decimal), rtol=10**(-round_decimal)) # Use of numpy.allclose instead of pandas.testing.assert_frame_equal because of buggy behaviour reported # in https://github.com/pandas-dev/pandas/issues/22052 # If reproducibility error > 1e-5 => display it + visit with the biggest reproducibility error error_message = '' if not simulation_is_reproducible: # simulation_df = pd.read_csv( # os.path.join(test_data_dir, "_outputs/simulation/test_api_simulation_df-post_merge-result_fix.csv")) max_diff = 0. value_v1 = 0. value_v2 = 0. count = 0 tol = 10**(-round_decimal) actual_simu_df = simulation_results.data.to_dataframe() for v1, v2 in zip( simulation_df.loc[:, simulation_df.columns != 'ID'].values. tolist(), actual_simu_df. loc[:, actual_simu_df.columns != 'ID'].values.tolist()): diff = [abs(val1 - val2) for val1, val2 in zip(v1, v2)] if max(diff) > tol: count += 1 if max(diff) > max_diff: value_v1 = v1 value_v2 = v2 max_diff = max(diff) error_message += '\nTolerance error = %.1e' % tol error_message += '\nMaximum error = %.3e' % max_diff error_message += '\n' + str( [round(v, round_decimal + 1) for v in value_v1]) error_message += '\n' + str( [round(v, round_decimal + 1) for v in value_v2]) error_message += '\nNumber of simulated visits above tolerance error = %d / %d \n' \ % (count, simulation_df.shape[0]) # For loop before the last self.assert - otherwise no display is made self.assertTrue(simulation_is_reproducible, error_message)
import os import sys import plotly.graph_objects as go import seaborn as sns import sys import numpy as np from sklearn.decomposition import PCA sys.path.append("../") print(os.getcwd()) from leaspy import IndividualParameters, Data, Leaspy from src.leaspype import get_reparametrized_ages, append_spaceshifts_to_individual_parameters_dataframe #%% Load Data path_datadashboard = "data/" leaspy = Leaspy.load(os.path.join(path_datadashboard, "leaspy.json")) individual_parameters = IndividualParameters.load( os.path.join(path_datadashboard, "ip.csv")) data = Data.from_csv_file(os.path.join(path_datadashboard, "data.csv")) n_sources = leaspy.model.source_dimension sources_name = ["sources_{}".format(i) for i in range(n_sources)] df_data = data.to_dataframe().set_index(["ID", "TIME"]) features = leaspy.model.features # ind parameters df_ind = individual_parameters.to_dataframe() ind_param_names = list(df_ind.columns) + ["pca1", "pca2"] df_ind = df_ind.reset_index() # PCA on the w_i res = append_spaceshifts_to_individual_parameters_dataframe(
name = "personalize_0" path_output_personalize = os.path.join(output_directory, experiment_folder, "personalize", name) if not os.path.exists(path_output_personalize): os.makedirs(path_output_personalize) # Get calibrated models paths model_paths = [ os.path.join(path_output_calibrate, "fold_{}".format(i), "model_parameters.json") for i in range(n_resampling_iter) ] # Load estimated leaspy models leaspy_iter = [] for i in range(n_resampling_iter): leaspy = Leaspy.load(model_paths[i]) leaspy_iter.append(leaspy) # Algo settings iter algo_settings_personalize_iter = [] algo_settings_personalize = AlgorithmSettings(personalize_algorithm, seed=seed, n_iter=n_iter_personalize) for i in range(n_resampling_iter): algo_settings_personalize_iter.append(algo_settings_personalize) # Save algo settings algo_settings_personalize.save( os.path.join(path_output_personalize, "algo_settings_personalize.json"))