def setup_data_sets(seed): start = time.time() print("Generating training and test data") dist = AntibioticsDatabase(n_x=n_x, antibiotic_limit=50, seed=seed) training_data, test_data = dist.get_data() training_data = split_patients(training_data) print("Generating data took {:.3f} seconds".format(time.time() - start)) return dist, training_data, test_data
def __init__(self, seed=None): super().__init__(seed=seed) dist = AntibioticsDatabase(n_x=6, antibiotic_limit=50, seed=seed) training_data, test_data = dist.get_data() data = split_patients(training_data) n_x = dist.n_x n_a = dist.n_a n_y = dist.n_y self.doctor_approximator = DoctorApproximator(n_x, n_a, n_y, data) self.statistics = self.doctor_approximator.get_patient_statistics()
def plot_time_vs_effect(values, times, settings): plot_colors = ['k', 'r', 'b', 'g', 'm', 'c', 'y'] plot_markers = ['s', 'v', 'P', '1', '2', '3', '4'] plot_lines = ['-', '--', ':', '-.'] setup_algorithms = settings.setup_algorithms starting_seed, n_data_sets, n_deltas, file_name_prefix = settings.load_settings( ) dist = AntibioticsDatabase(AntibioticsDeltaSweepSettings.n_x, 50, seed=10342) training_data, test_data = dist.get_data() training_data = split_patients(training_data) algs = setup_algorithms(training_data, dist, 0) n_algorithms = len(algs) values_mean = np.sum(values, 0) / n_data_sets times_mean = np.sum(times, 0) / n_data_sets zipped_mean = np.zeros((n_algorithms, 2, n_deltas)) for i_alg in range(n_algorithms): zipped_mean[i_alg][0] = times_mean[:, i_alg] zipped_mean[i_alg][1] = values_mean[:, i_alg] fig, ax1 = plt.subplots(figsize=(6, 4)) plt.rcParams["font.family"] = "serif" for i_alg in range(n_algorithms): ax1.plot(zipped_mean[i_alg, 0], zipped_mean[i_alg, 1], plot_colors[i_alg] + plot_markers[i_alg] + plot_lines[0], label='{}'.format(algs[i_alg].label)) ax1.invert_xaxis() ax1.legend() plt.xlabel("Mean time") plt.ylabel("Efficacy") ax1.grid(True) plt.savefig("saved_values/" + file_name_prefix + "_time_vs_effect4.pdf")
from Database.sql_cocktail_statistics import get_antibiotcsevents from Database.antibioticsdatabase import AntibioticsDatabase import matplotlib.pyplot as plt import numpy as np database = AntibioticsDatabase() database.cur.execute(get_antibiotcsevents) data = database.cur.fetchall() datapoints = {} prev_hadm_id = 0 for chartevent in data: hadm_id = chartevent[0] label = chartevent[1] start_time = chartevent[2] if hadm_id in datapoints: datapoints[hadm_id].append([label, start_time]) else: datapoints[hadm_id] = [[label, start_time]] times = [] for hadm_id, value in datapoints.items(): for i, entry in enumerate(value): label = entry[0] time = entry[1] if i != 0: if label != prev_label: diff_time = time - prev_time minutes_diff_time = diff_time.seconds/60 times.append(minutes_diff_time) prev_label = label prev_time = time
res.append(max_mean_treatment_effects / n_test_samples) res.append(mean_times / n_test_samples) return res if __name__ == '__main__': settings = get_settings() # Settings plot_var = False starting_seed, n_data_sets, delta, file_name_prefix = settings.load_settings( ) # Quick hack to get n_algorithms tmp_dist = AntibioticsDatabase(AntibioticsSettings.n_x, 50, seed=10342) training_data, test_data = tmp_dist.get_data() training_data = training_data n_x = tmp_dist.n_x n_a = tmp_dist.n_a n_y = tmp_dist.n_y algs = settings.setup_algorithms(tmp_dist, training_data, n_x, n_a, n_y, delta) n_algorithms = len(algs) values = np.zeros((n_data_sets, n_algorithms, n_a)) times = np.zeros((n_data_sets, n_algorithms)) main_start = time.time() pool = Pool(processes=n_data_sets) results = []
def plot_data(values, times, settings, plot_var=False): plot_colors = ['k', 'r', 'b', 'g', 'm', 'c', 'y'] plot_markers = ['s', 'v', 'P', '1', '2', '3', '4'] plot_lines = ['-', '--', ':', '-.'] + [(i, (1, 4, i, i^2)) for i in range(0, 4)] # Extract settings load_settings = settings.load_settings setup_algorithms = settings.setup_algorithms starting_seed, n_data_sets, delta, file_name_prefix = load_settings() dist = AntibioticsDatabase(AntibioticsSettings.n_x, 50, seed=10342) training_data, test_data = dist.get_data() training_data = split_patients(training_data) n_x = dist.n_x n_a = dist.n_a n_y = dist.n_y algs = setup_algorithms(dist, training_data, n_x, n_a, n_y, delta) n_algorithms = len(algs) values_mean = np.sum(values, 0) / n_data_sets times_mean = np.sum(times, 0) / n_data_sets tmp = algs[-1] algs[-1] = algs[-2] algs[-2] = tmp tmp = values_mean[-1].copy() values_mean[-1] = values_mean[-2] values_mean[-2] = tmp tmp = times_mean[-1].copy() times_mean[-1] = times_mean[-2] times_mean[-2] = tmp ''' values_var = np.zeros(n_algorithms) times_var = np.zeros(n_algorithms) for i_alg in range(n_algorithms): v_var = 0 t_var = 0 for i_data_set in range(n_data_sets): v_var += (values_mean[i_alg] - values[i_data_set][i_alg])**2 t_var += (times_mean[i_alg] - times[i_data_set][i_alg])**2 values_var[i_alg] = v_var / (n_data_sets - 1) times_var[i_alg] = t_var / (n_data_sets - 1) ''' x = np.arange(0, n_a) x_ticks = list(np.arange(1, n_a + 1)) plt.figure() plt.title(r'Treatment effect. $\delta$: {}'.format(delta)) plt.ylabel('Mean treatment effect') plt.xlabel('Number of tried treatments') average_max_treatment_effect = sum([max(data[-1]) for data in test_data]) / len(test_data) mean_lines = np.linspace(0, 1, n_algorithms) algs[-3].label = "NDP_F" algs[-2].label = "Emulated doctor" for i_alg in range(n_algorithms): if algs[i_alg].name != "Doctor": plt.plot(x, values_mean[i_alg], plot_markers[i_alg] + plot_colors[i_alg], linestyle=plot_lines[i_alg % len(plot_lines)], label=algs[i_alg].label) #plt.plot(x, values_mean[i_alg], plot_colors[i_alg], linestyle='-', # alpha=0.3) # plt.plot(x, mean_treatment_effects[i_plot], plot_markers[i_plot] + plot_colors[i_plot] + plot_lines[1]) # plt.fill_between(x, mean_treatment_effects[i_plot], max_mean_treatment_effects[i_plot], color=plot_colors[i_plot], alpha=0.1) plt.axvline(times_mean[i_alg] - 1, ymin=mean_lines[i_alg], ymax=mean_lines[i_alg + 1], color=plot_colors[i_alg]) plt.axvline(times_mean[i_alg] - 1, ymin=0, ymax=1, color=plot_colors[i_alg], alpha=0.1) else: plt.plot(0, values_mean[i_alg][0], plot_markers[i_alg] + plot_colors[i_alg], markersize=20, linestyle=plot_lines[i_alg % len(plot_lines)], linewidth=4, label=algs[i_alg].label) #plt.rcParams["text.usetex"] = True plt.rcParams["font.family"] = "serif" plt.grid(True) plt.xticks(x, x_ticks) plt.plot(x, np.ones(len(x)) * average_max_treatment_effect, linestyle=plot_lines[-1], label='MAX_POSS_AVG') plt.legend(loc='lower right') plt.savefig("saved_values/" + file_name_prefix + "_plotNew.pdf") # Plot mean number of treatments tried plt.figure() plt.title('Search time') plt.ylabel('Mean number of treatments tried') plt.xlabel('Policy') x_bars = [] for i_alg, alg in enumerate(algs): x_bars.append(alg.name) x_bars = [label.replace(" ", '\n') for label in x_bars] rects = plt.bar(x_bars, times_mean) for rect in rects: h = rect.get_height() plt.text(rect.get_x() + rect.get_width() / 2., 0.90 * h, "%f" % h, ha="center", va="bottom") plt.show()
plot_mean_treatment_effect, plot_treatment_efficiency, plot_delta_efficiency, plot_search_time, plot_strictly_better ] main_start = time.time() # Generate the data # dist = DiscreteDistribution(n_z, n_x, n_a, n_y, seed=seed, outcome_sensitivity_x_z=1) #dist = DiscreteDistributionWithSmoothOutcomes(n_z, n_x, n_a, n_y, seed=seed, outcome_sensitivity_x_z=1) # dist = DiscreteDistributionWithInformation(n_z, n_x, n_a, n_y, seed=seed) ''' dist.print_moderator_statistics() dist.print_covariate_statistics() dist.print_treatment_statistics() dist.print_detailed_treatment_statistics() ''' dist = AntibioticsDatabase(n_x=1, antibiotic_limit=50, seed=seed) ''' dist = NewDistribution(seed=seed) #dist = NewDistributionSlightlyRandom(seed=seed) n_x = 1 n_a = 3 n_y = 3 ''' ''' dist = FredrikDistribution() n_x = 1 n_a = 3 n_y = 2 ''' if type(dist) != AntibioticsDatabase:
def plot_sweep_delta(values, times, settings, plot_var=False, split_plot=True): plot_colors = ['k', 'r', 'b', 'g', 'm', 'c', 'y'] plot_markers = ['s', 'v', 'P', '1', '2', '3', '4'] plot_lines = ['-', '--', ':', '-.', '-', '--', ':'] # Extract settings load_settings = settings.load_settings setup_algorithms = settings.setup_algorithms starting_seed, n_data_sets, n_deltas, file_name_prefix = load_settings() dist = AntibioticsDatabase(AntibioticsDeltaSweepSettings.n_x, 50, seed=10342) training_data, test_data = dist.get_data() training_data = split_patients(training_data) algs = setup_algorithms(training_data, dist, 0) n_algorithms = len(algs) deltas = np.linspace(0.0, 1.0, n_deltas) values_mean = np.sum(values, 0) / n_data_sets times_mean = np.sum(times, 0) / n_data_sets values_var = np.zeros((n_deltas, n_algorithms)) times_var = np.zeros((n_deltas, n_algorithms)) for i_delta in range(n_deltas): for i_alg in range(n_algorithms): v_var = 0 t_var = 0 for i_data_set in range(n_data_sets): v_var += (values_mean[i_delta][i_alg] - values[i_data_set][i_delta][i_alg])**2 t_var += (times_mean[i_delta][i_alg] - times[i_data_set][i_delta][i_alg])**2 values_var[i_delta][i_alg] = v_var / (n_data_sets - 1) times_var[i_delta][i_alg] = t_var / (n_data_sets - 1) # Plot mean treatment effect vs delta if not split_plot: fig, ax1 = plt.subplots(figsize=(6, 4)) ax2 = ax1.twinx() else: fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(6, 10)) ax1.set_title(r'Mean treatment effect/mean search time vs $\delta$') ax1.set_xlabel(r'$\delta$') ax2.set_xlabel(r'$\delta$') ax1.set_ylabel('Efficacy') ax2.set_ylabel('Mean search time') lns = [] for i_alg in range(n_algorithms): ln1 = ax1.plot(deltas, values_mean[:, i_alg], plot_colors[i_alg] + plot_markers[i_alg] + plot_lines[0], label='{} {}'.format(algs[i_alg].label, 'effect'), markevery=3) ln2 = ax2.plot(deltas, times_mean[:, i_alg], plot_colors[i_alg] + plot_markers[i_alg] + plot_lines[1], label='{} {}'.format(algs[i_alg].label, 'time'), markevery=3) lns.append(ln1) lns.append(ln2) if plot_var: ln1v = ax1.fill_between( deltas, values_mean[:, i_alg] - values_var[:, i_alg], values_mean[:, i_alg] + values_var[:, i_alg], facecolor=plot_colors[i_alg], alpha=0.3) ln2v = ax2.fill_between(deltas, times_mean[:, i_alg] - times_var[:, i_alg], times_mean[:, i_alg] + times_var[:, i_alg], facecolor=plot_colors[i_alg], alpha=0.3) lns.append(ln1v) lns.append(ln2v) ax1.grid(True) ax2.grid(True) plt.rcParams["font.family"] = "serif" lines1, labels1 = ax1.get_legend_handles_labels() lines2, labels2 = ax2.get_legend_handles_labels() ax1.legend(lines1, labels1, loc='upper right') ax2.legend(lines2, labels2, loc='lower left') plt.savefig("saved_values/" + file_name_prefix + "_plot2.png")