def main(): parser = argparse.ArgumentParser() parser.add_argument('visits', nargs='+', type=str, help='the viscodes of the visits that are available') parser.add_argument('-m', '--method', choices=DataHandler.get_method_choices(), default='all', help='the method to collect data for') parser.add_argument('-b', '--biomarkers', nargs='+', default=None, help='name of the biomarker to be plotted') parser.add_argument('-p', '--phase', default=None, choices=DataHandler.get_phase_choices(), help='the phase for which the model is to be trained') parser.add_argument('--predict_biomarker', type=str, default='MMSE', help='the biomarker to predict') parser.add_argument('--recompute_estimates', action='store_true', help='recompute the dpi / dpr estimations') parser.add_argument('--recompute_predictions', action='store_true', help='recompute the biomarker predictions') parser.add_argument('--estimate_dprs', action='store_true', help='estimate dpis and dprs') parser.add_argument('--consistent_data', action='store_true', help='use only subjects with bl, m12 and m24 visits') parser.add_argument('--exclude_cn', action='store_true', help='exclude healthy subjects from analysis') parser.add_argument('--use_last_visit', action='store_true', help='use only the last visit for prediction') parser.add_argument('--naive_use_diagnosis', action='store_true', help='use the specific mean change for the diagnosis') parser.add_argument('--no_plot', action='store_true', help='do not plot the results') parser.add_argument('--plot_file', type=str, default=None, help='filename of the output file') parser.add_argument('--latex_file', type=str, default=None, help='add output to a LaTeX file') args = parser.parse_args() _, diagnoses, values_observed, values_naive, values_model = \ et.get_biomarker_predictions(args.visits, args.predict_biomarker, method=args.method, biomarkers=args.biomarkers, phase=args.phase, recompute_estimates=args.recompute_estimates, recompute_predictions=args.recompute_predictions, estimate_dprs=args.estimate_dprs, select_test_set=True, consistent_data=args.consistent_data, exclude_cn=args.exclude_cn, use_last_visit=args.use_last_visit, naive_use_diagnosis=args.naive_use_diagnosis) if not args.no_plot: plot_biomarker_predictions(args, diagnoses, values_observed, values_model) analyse_biomarker_predictions(args, diagnoses, values_observed, values_naive, values_model)
def generate_csv_files(args, data_handler): """ Generate the CSV file used to call the R script. :param Namespace args: :param DataHandler data_handler: """ assert isinstance(args, argparse.Namespace) assert isinstance(data_handler, DataHandler) biomarkers = data_handler.get_biomarker_names() measurements = data_handler.get_measurements_as_dict(min_visits=args.min_visits, select_training_set=True, exclude_deceased=args.exclude_deceased) for biomarker in biomarkers: print log.INFO, 'Generating output CSV for {0}...'.format(biomarker) samples_file = data_handler.get_samples_file(biomarker) writer = csv.writer(open(samples_file, 'wb'), delimiter=',') writer.writerow(['rid', 'progress', 'value', 'diagnosis']) subjects = set() num_samples = 0 for rid, visits in measurements.items(): for _, visit_data in visits.items(): try: progress = DataHandler.safe_cast(visit_data['progress'], int) value = DataHandler.safe_cast(visit_data[biomarker], float) diagnosis = DataHandler.safe_cast(visit_data['DX.scan'], float) if progress is not None and value is not None: writer.writerow([rid, progress, value, diagnosis]) subjects.add(rid) num_samples += 1 except KeyError: pass print log.RESULT, 'Collected {0} samples from {1} subjects.'.format(num_samples, len(subjects))
def main(): parser = argparse.ArgumentParser() parser.add_argument('visits', nargs='+', type=str, help='the viscodes to be sampled') parser.add_argument('-m', '--method', choices=DataHandler.get_method_choices(), default='all', help='the method to collect data for') parser.add_argument('-b', '--biomarkers', nargs='+', default=None, help='name of the biomarker to be plotted') parser.add_argument('-p', '--phase', default=None, choices=DataHandler.get_phase_choices(), help='the phase for which the model is to be trained') parser.add_argument('-c', '--classifier', default='svm', choices=['lda', 'svm', 'lsvm', 'rf'], help='the approach used to classify the subjects') parser.add_argument('--estimate_dprs', action='store_true', help='recompute the dpis estimations') parser.add_argument('--recompute_estimates', action='store_true', help='recompute the dpis estimations') parser.add_argument('--consistent_data', action='store_true', help='us only subjects with bl, m12 and m24 visits') parser.add_argument('--num_folds', type=int, default=10, help='number of folds for the n-fold cross validation') parser.add_argument('--latex_file', type=str, default=None, help='add output to a LaTeX file') args = parser.parse_args() # Get estimates rids, diagnoses, dpis, dprs, _, _ = et.get_progress_estimates( args.visits, method=args.method, biomarkers=args.biomarkers, phase=args.phase, estimate_dprs=args.estimate_dprs, recompute_estimates=args.recompute_estimates, consistent_data=args.consistent_data) # Select converters and non-converters sets rcds, non_rcds = get_rcds(args, rids, diagnoses, dpis, dprs) rfds, non_rfds = get_rfds(args, rids, diagnoses, dpis, dprs) # Analyse output analyse_decline(args, rids, dpis, dprs, rcds, non_rcds) analyse_decline(args, rids, dpis, dprs, rfds, non_rfds)
def main(): parser = argparse.ArgumentParser() parser.add_argument('visits', nargs='+', type=str, help='the viscodes to be sampled') parser.add_argument('-m', '--method', choices=DataHandler.get_method_choices(), default='all', help='the method to collect data for') parser.add_argument('-b', '--biomarkers', nargs='+', default=None, help='name of the biomarker to be plotted') parser.add_argument('-p', '--phase', default=None, choices=DataHandler.get_phase_choices(), help='the phase for which the model is to be trained') parser.add_argument('--estimate_dprs', action='store_true', help='recompute the dpis estimations') parser.add_argument('--recompute_estimates', action='store_true', help='recompute the dpis estimations') parser.add_argument('--consistent_data', action='store_true', help='us only subjects with bl, m12 and m24 visits') parser.add_argument('--no_plot', action='store_true', help='do not plot the results') parser.add_argument('--plot_lines', action='store_true', help='plot graphs instead of matrix') parser.add_argument('--plot_steps', type=int, default=15, help='number of steps for the DPI scale') parser.add_argument('--plot_file', type=str, default=None, help='filename of the output file') parser.add_argument('--plot_cmap_jet', action='store_true', help='use the colour map jet') args = parser.parse_args() # Get estimates _, diagnoses, dpis, dprs, mean_min, mean_max = et.get_progress_estimates( args.visits, method=args.method, biomarkers=args.biomarkers, phase=args.phase, estimate_dprs=args.estimate_dprs, recompute_estimates=args.recompute_estimates, select_test_set=True, consistent_data=args.consistent_data) # Plot results if not args.no_plot: plot_dpi_estimates(args, dpis, diagnoses, mean_min, mean_max) if args.estimate_dprs: plot_dpi_dpr_distribution(args, dpis, dprs, diagnoses)
def main(): parser = argparse.ArgumentParser() parser.add_argument('-m', '--method', choices=DataHandler.get_method_choices(), default='all', help='the method to collect data for') parser.add_argument('-b', '--biomarkers', nargs='+', default=None, help='name of the biomarker to be plotted') parser.add_argument('-p', '--phase', default='mciad', choices=DataHandler.get_phase_choices(), help='the phase for which the model is to be trained') parser.add_argument('-e', '--extrapolator', type=str, choices=['lin', 'sqrt', 'exp'], default='exp', help='the type of extrapolator') parser.add_argument('--xlim', type=float, nargs=2, default=None, help='force certain x limits for plotting') parser.add_argument('--ylim', type=float, nargs=2, default=None, help='force certain y limits for plotting') parser.add_argument('--no_model', action='store_true', default=False, help='do not plot the fitted model') parser.add_argument('--no_points', action='store_true', default=False, help='do not plot points') parser.add_argument('--points_alpha', type=float, default=0.25, help='alpha value of the plotted points') parser.add_argument('--no_densities', action='store_true', default=False, help='do not plot densities') parser.add_argument('--no_sample_lines', action='store_true', default=False, help='do not plot the sample lines') parser.add_argument('--only_densities', action='store_true', default=False, help='only plot densities') parser.add_argument('--no_extrapolation', action='store_true', default=False, help='do not extrapolate the model') parser.add_argument('--plot_eta', type=str, choices=['lambda', 'mu', 'sigma'], default=None, help='plot a predictor function') parser.add_argument('--plot_errors', action='store_true', default=False, help='plot the errors') parser.add_argument('--plot_synth_model', action='store_true', default=False, help='plot density distributions for synthetic data') parser.add_argument('--plot_quantile_label', action='store_true', default=False, help='plot labels on the quantile curces') parser.add_argument('--plot_donohue', action='store_true', default=False, help='plot the trajectory estimated with Donohue et al.') parser.add_argument('--save_plots', action='store_true', default=False, help='save the plots with a default filename') parser.add_argument('--plot_file', type=str, default=None, help='filename of the output file') args = parser.parse_args() data_handler = DataHandler.get_data_handler(method=args.method, biomarkers=args.biomarkers, phase=args.phase) for biomarker in data_handler.get_biomarker_names(): plot_model(args, data_handler, biomarker)
def main(): parser = argparse.ArgumentParser() parser.add_argument('-m', '--method', choices=DataHandler.get_method_choices(), default='all', help='the method to collect data for') parser.add_argument('-b', '--biomarkers', nargs='+', default=None, help='name of the biomarker to be plotted') parser.add_argument('-p', '--phase', default='mciad', choices=DataHandler.get_phase_choices(), help='the phase for which the model is to be trained') parser.add_argument('-e', '--extrapolator', type=str, choices=['lin', 'sqrt', 'exp'], default='exp', help='the type of extrapolator') args = parser.parse_args() data_handler = DataHandler.get_data_handler(method=args.method, biomarkers=args.biomarkers, phase=args.phase) biomarkers = data_handler.get_biomarker_names() if args.method == 'joint': offsets = np.linspace(500, 3000, 26) else: offsets = np.linspace(-1000, 1000, 21) all_diffs = np.zeros((len(offsets), len(biomarkers))) for i, biomarker in enumerate(biomarkers): diffs = get_model_differences(args, data_handler, biomarker, offsets) all_diffs[:, i] = diffs print biomarker, offsets[np.argmin(diffs)] optimum_index = np.argmin(np.mean(all_diffs, axis=1)) print 'all', offsets[optimum_index] mins = all_diffs[optimum_index, :] # np.min(all_diffs, axis=0) indices = np.argsort(mins) for i in indices: print biomarkers[i], mins[i] fig = plt.figure() ax1 = plt.subplot(1, 1, 1) ax1.plot(offsets, all_diffs, color='r') ax1.plot(offsets, np.mean(all_diffs, axis=1), color='b') plt.show() plt.close(fig)
def main(): parser = argparse.ArgumentParser(description='Estimate model curves for biomarkers using VGAM.') parser.add_argument('-m', '--method', choices=DataHandler.get_method_choices(), default='all', help='the method to collect data for') parser.add_argument('-b', '--biomarkers', nargs='+', default=None, help='name of the biomarker to be plotted') parser.add_argument('-e', '--extrapolator', type=str, choices=['lin', 'sqrt', 'exp'], default='exp', help='the type of extrapolator') parser.add_argument('--plot_threshold', type=float, default=0.3, help='the threshold above which praphs are plotted') parser.add_argument('--recompute_errors', action='store_true', help='recompute the matrix containing the fitting errors') parser.add_argument('--search_range', nargs=3, default=(1000, 5000, 10), help='the range in which the offset is sought') args = parser.parse_args() # Get the data files and biomarkers data_handler_joint = DataHandler.get_data_handler(method=args.method, biomarkers=args.biomarkers, phase='joint') biomarkers, offsets, errors, descriminativeness, overlap = get_fitting_data(args, data_handler_joint) # Plot single biomarker fits fig, ax = plt.subplots() pt.setup_axes(plt, ax, xgrid=False) ax.set_title('Optimal offset between CN/MCI and MCI/AD models') ax.set_xlabel('Offset (days)') ax.set_ylabel('Fitting error') for i, biomarker in enumerate(biomarkers): if descriminativeness[i] > args.plot_threshold: print log.RESULT, 'Min error for {0} at {1}'.format(biomarker, offsets[np.argmin(errors[i, :])]) ax.plot(offsets, errors[i, :], label=biomarker, linestyle='--') # Get optimal offset mean_errors = np.mean(errors, 0) weighted_mean_errors = np.dot(errors.T, descriminativeness) / np.sum(descriminativeness) # Plot joint fit ax.plot(offsets, mean_errors, label='Mean', linewidth=2, color='g') ax.plot(offsets, weighted_mean_errors, label='Weighted mean', linewidth=2, color='r') # Get and lot optimal offset optimal_offset = offsets[np.argmin(mean_errors)] optimal_offset_weighted = offsets[np.argmin(weighted_mean_errors)] print log.RESULT, 'Optimal threshold: {0}'.format(optimal_offset) print log.RESULT, 'Optimal threshold (weighted): {0}'.format(optimal_offset_weighted) ax.axvline(optimal_offset, linestyle=':', color='g') ax.axvline(optimal_offset_weighted, linestyle=':', color='r') # Plot overlap ax.axvline(overlap, color='0.15', linestyle=':') ax.legend() plt.show() plt.close(fig)
def classify_converters(args, dpis_conv, dprs_conv, dpis_nonconv, dprs_nonconv): print log.INFO, 'Analysing classification accuracies...' dpis = np.concatenate((dpis_conv, dpis_nonconv)) dprs = np.concatenate((dprs_conv, dprs_nonconv)) labels = np.concatenate( (np.ones(len(dpis_conv)), np.zeros(len(dpis_nonconv)))) # Assemble features features = np.zeros((len(dpis), 2)) features[:, 0] = dpis if args.estimate_dprs: features[:, 1] = dprs else: # Copy DPIs as second features as LDA needs two features features[:, 1] = dpis features = preprocessing.scale(features) acc, sens, spec = run_classification(args, features, labels) print log.RESULT, '{0}-fold cross validation, converters vs. non-converters ACC={1:.2f}, SENS={2:.2f}, SPEC={3:.2f}'.format( args.num_folds, acc, sens, spec) if args.latex_file is not None: data_handler = DataHandler.get_data_handler(method=args.method, biomarkers=args.biomarkers, phase=args.phase) filename = os.path.join(data_handler.get_eval_folder(), args.latex_file) print log.INFO, 'Writing classification results to {0}...'.format( filename) with open(filename, 'a') as latex_file: latex_file.write( '{0} & {1} & {2:.2f} & {3:.2f} & {4:.2f}\\\\\n'.format( args.method, len(args.visits), acc, sens, spec))
def classify_converters(args, dpis_conv, dprs_conv, dpis_nonconv, dprs_nonconv): print log.INFO, 'Analysing classification accuracies...' dpis = np.concatenate((dpis_conv, dpis_nonconv)) dprs = np.concatenate((dprs_conv, dprs_nonconv)) labels = np.concatenate((np.ones(len(dpis_conv)), np.zeros(len(dpis_nonconv)))) # Assemble features features = np.zeros((len(dpis), 2)) features[:, 0] = dpis if args.estimate_dprs: features[:, 1] = dprs else: # Copy DPIs as second features as LDA needs two features features[:, 1] = dpis features = preprocessing.scale(features) acc, sens, spec = run_classification(args, features, labels) print log.RESULT, '{0}-fold cross validation, converters vs. non-converters ACC={1:.2f}, SENS={2:.2f}, SPEC={3:.2f}'.format(args.num_folds, acc, sens, spec) if args.latex_file is not None: data_handler = DataHandler.get_data_handler(method=args.method, biomarkers=args.biomarkers, phase=args.phase) filename = os.path.join(data_handler.get_eval_folder(), args.latex_file) print log.INFO, 'Writing classification results to {0}...'.format(filename) with open(filename, 'a') as latex_file: latex_file.write('{0} & {1} & {2:.2f} & {3:.2f} & {4:.2f}\\\\\n'.format( args.method, len(args.visits), acc, sens, spec))
def select_converters(args, rids, diagnoses, dpis, dprs): ''' Select data from subjects that convert within 2 years from MCI to AD. ''' data_handler = DataHandler.get_data_handler(method=args.method) measurements = data_handler.get_measurements_as_dict( visits=['bl', 'm12', 'm24'], no_regression=True, select_training_set=True, select_complete=True) # Select RIDSs of converters rids_select = set() for rid in measurements: if 0.25 <= measurements[rid]['bl']['DX.scan'] <= 0.75 and measurements[ rid]['m24']['DX.scan'] == 1.0: rids_select.add(rid) selected_rids = [] selected_diagnoses = [] selected_dpis = [] selected_dprs = [] for i, rid in enumerate(rids): if rid in rids_select: selected_rids.append(rid) selected_diagnoses.append(diagnoses[i]) selected_dpis.append(dpis[i]) selected_dprs.append(dprs[i]) print log.RESULT, 'Selected {0} converting subjects.'.format( len(selected_rids)) return selected_rids, selected_diagnoses, selected_dpis, selected_dprs
def main(): # Parse input arguments parser = argparse.ArgumentParser() parser.add_argument('--estimate_dpr', action='store_true', help='recompute the dpis estimations') parser.add_argument('--samples_file', type=str, default='measurements_sample.csv', help='recompute the dpis estimations') args = parser.parse_args() # Read the measurements as dict from the csv file measurements, biomarkers = read_measurements_from_cvs(args.samples_file) visits = measurements[0].keys() # Get estimates data_handler = DataHandler.get_data_handler(method='all', biomarkers=biomarkers, phase='joint') # Setup model model = MultiBiomarkerProgressionModel() for biomarker in biomarkers: model_file = data_handler.get_model_file(biomarker) model.add_model(biomarker, model_file) fitter = ModelFitter(model) # Estimate dpis (and dprs) and save data if args.estimate_dpr: rids, diagnoses, dpis, dprs = estimate_dpis_dprs(measurements, visits, fitter, phase='joint') else: rids, diagnoses, dpis = estimate_dpis(measurements, visits, fitter, phase='joint') dprs = np.ones(len(dpis)).tolist() # Plot the models with the fitted samples for biomarker in biomarkers: plot_biomarker(data_handler, biomarker, measurements, dpis[0], dprs[0])
def read_measurements_from_cvs(filename): """ Created a dict from the sample measurements file. For compatibility with the library, the dict has to have the { <rid> : { <viscode> : { DX.scan : <diagnosis> } { scantime : <days after bl> } { <biomarker1> : <volume> } ... } { <viscode> : ... }} :param filename: filename of the *.csv file :rtype: dict :return: the generated dict with the measurements """ scantime_dict = {'bl': 0, 'm12': 365, 'm24': 730, 'm36': 1095} biomarkers = set() measurements = {0: {}} with open(filename) as csvfile: reader = csv.DictReader(csvfile) visits = reader.fieldnames[1:] for visit in visits: measurements[0].update({visit: {'scantime': scantime_dict[visit], 'DX.scan': 'UNKNOWN'}}) for row in reader: biomarker = row['Biomarker Name'] if biomarker in DataHandler.get_all_biomarker_names(): for visit in visits: try: measurements[0][visit].update({biomarker: float(row[visit])}) biomarkers.add(biomarker) except ValueError: pass return measurements, list(biomarkers)
def main(): parser = argparse.ArgumentParser() parser.add_argument('-b', '--biomarkers', nargs='+', default=None, help='name of the biomarker to be plotted') parser.add_argument('-p', '--phase', default=None, choices=DataHandler.get_phase_choices(), help='the phase for which the model is to be trained') parser.add_argument('--predict_biomarker', type=str, default='MMSE', help='the biomarker to predict') parser.add_argument('--recompute_estimates', action='store_true', help='recompute the dpi / dpr estimations') parser.add_argument('--recompute_predictions', action='store_true', help='recompute the biomarker predictions') parser.add_argument('--plot_file', type=str, default=None, help='filename of the output file') args = parser.parse_args() visits = ['bl', 'm12', 'm24'] methods = ['cog', 'vol', 'ml', 'img', 'all'] values = {} for method in methods: values.update({method: {}}) _, _, values_observed, values_naive, values_model = \ et.get_biomarker_predictions(visits, args.predict_biomarker, method=method, phase=args.phase, recompute_estimates=args.recompute_estimates, recompute_predictions=args.recompute_predictions, estimate_dprs=False, exclude_cn=True, select_test_set=True, consistent_data=True) values[method].update({'observed': values_observed}) values[method].update({'naive': values_naive}) values[method].update({'model_dpi': values_model}) _, _, values_observed, values_naive, values_model = \ et.get_biomarker_predictions(visits, args.predict_biomarker, method=method, phase=args.phase, recompute_estimates=args.recompute_estimates, recompute_predictions=args.recompute_predictions, estimate_dprs=True, exclude_cn=True, select_test_set=True, consistent_data=True) values[method].update({'model_dpi_dpr': values_model}) plot_errors(args, values, methods)
def select_nonconverters(args, rids, diagnoses, dpis, dprs): ''' Select data from MCI subjects that do not convert. ''' data_handler = DataHandler.get_data_handler(method=args.method) measurements = data_handler.get_measurements_as_dict(visits=['bl', 'm12', 'm24'], no_regression=True, select_test_set=True, select_complete=True) # Select RIDSs of non-converters rids_select = set() for rid in measurements: if 0.25 <= measurements[rid]['bl']['DX.scan'] <= 0.75 and 0.25 <= measurements[rid]['m24']['DX.scan'] <= 0.75: rids_select.add(rid) selected_rids = [] selected_diagnoses = [] selected_dpis = [] selected_dprs = [] for i, rid in enumerate(rids): if rid in rids_select: selected_rids.append(rid) selected_diagnoses.append(diagnoses[i]) selected_dpis.append(dpis[i]) selected_dprs.append(dprs[i]) print log.RESULT, 'Selected {0} non-converting subjects.'.format(len(selected_rids)) return selected_rids, selected_diagnoses, selected_dpis, selected_dprs
def main(): # Collect data for test data_handler = DataHandler.get_data_handler() biomarkers = DataHandler.get_all_biomarker_names() mean_changes = {} for biomarker in biomarkers: measurements = data_handler.get_measurements_as_dict( visits=['bl', 'm12'], biomarkers=[biomarker], select_complete=True) mean_changes_biomarker = {0.0: 0.0, 0.25: 0.0, 0.75: 0.0, 1.0: 0.0} num_subjects = {0.0: 0, 0.25: 0, 0.75: 0, 1.0: 0} for rid in measurements: diagnosis = measurements[rid]['bl']['DX.scan'] value_bl = measurements[rid]['bl'][biomarker] value_y1 = measurements[rid]['m12'][biomarker] scantime_bl = measurements[rid]['bl']['scantime'] scantime_y1 = measurements[rid]['m12']['scantime'] change = (value_y1 - value_bl) / (scantime_y1 - scantime_bl) mean_changes_biomarker[diagnosis] += change num_subjects[diagnosis] += 1 mean_change_mci_ad = mean_changes_biomarker[ 0.25] + mean_changes_biomarker[0.75] + mean_changes_biomarker[1.0] num_subjects_mci_ad = num_subjects[0.25] + num_subjects[ 0.75] + num_subjects[1.0] for diagnosis in mean_changes_biomarker: mean_changes_biomarker[diagnosis] /= num_subjects[diagnosis] mean_changes_biomarker.update( {0.66: mean_change_mci_ad / num_subjects_mci_ad}) mean_changes.update({biomarker: mean_changes_biomarker}) print log.RESULT, '{0} CN: {1}, (n={2})'.format( biomarker, mean_changes_biomarker[0.0], num_subjects[0.0]) print log.RESULT, '{0} EMCI: {1}, (n={2})'.format( biomarker, mean_changes_biomarker[0.25], num_subjects[0.25]) print log.RESULT, '{0} LMCI: {1}, (n={2})'.format( biomarker, mean_changes_biomarker[0.75], num_subjects[0.75]) print log.RESULT, '{0} AD: {1}, (n={2})'.format( biomarker, mean_changes_biomarker[1.0], num_subjects[1.0]) mean_changes_file = os.path.join(data_handler.get_eval_folder(), 'mean_changes.p') pickle.dump(mean_changes, open(mean_changes_file, 'wb'))
def main(): parser = argparse.ArgumentParser( description='Estimate model curves for biomarkers using VGAM.') parser.add_argument('-m', '--method', choices=DataHandler.get_method_choices(), default='all', help='the method to collect data for') parser.add_argument('-b', '--biomarkers', nargs='+', default=None, help='name of the biomarker to be plotted') parser.add_argument('-p', '--phase', default=None, choices=DataHandler.get_phase_choices(), help='the phase for which the model is to be trained') parser.add_argument('-n', '--nr_threads', type=int, default=1, help='number of threads') parser.add_argument('--min_visits', type=int, default=0, help='the minimal number of visits') parser.add_argument( '--no_regression', action='store_true', default=False, help='do not perform age regression of biomarker values') parser.add_argument('--recompute_models', action='store_true', help='recompute the models with new samples') args = parser.parse_args() # Get the data files and biomarkers data_handler = DataHandler.get_data_handler(method=args.method, biomarkers=args.biomarkers, phase=args.phase) # Estimate curves # generate_csv_file(args, data_handler) # print_gender_statistics(args, data_handler) print_terminal_decline_statistics(args, data_handler)
def main(): parser = argparse.ArgumentParser(description='Estimate model curves for biomarkers using VGAM.') parser.add_argument('-m', '--method', choices=DataHandler.get_method_choices(), default='all', help='the method to collect data for') parser.add_argument('-b', '--biomarkers', nargs='+', default=None, help='name of the biomarker to be plotted') parser.add_argument('-p', '--phase', default=None, choices=DataHandler.get_phase_choices(), help='the phase for which the model is to be trained') parser.add_argument('-n', '--nr_threads', type=int, default=1, help='number of threads') parser.add_argument('--min_visits', type=int, default=0, help='the minimal number of visits') parser.add_argument('--no_regression', action='store_true', default=False, help='do not perform age regression of biomarker values') parser.add_argument('--recompute_models', action='store_true', help='recompute the models with new samples') args = parser.parse_args() # Get the data files and biomarkers data_handler = DataHandler.get_data_handler(method=args.method, biomarkers=args.biomarkers, phase=args.phase) # Estimate curves # generate_csv_file(args, data_handler) # print_gender_statistics(args, data_handler) print_terminal_decline_statistics(args, data_handler)
def from_string(cls, s): if "#" in s: id, charm_list = s.split("#") else: id = s charm_list = s gear = [] for charms in charm_list.split(): num, charm_id = charms.split("x") gear.append((DataHandler.load_charm_from_id(charm_id), int(num))) return cls(id, gear)
def print_training_samples_statistics(args, data_handler): biomarkers = data_handler.get_biomarker_names() measurements = data_handler.get_measurements_as_dict(min_visits=args.min_visits, select_training_set=True, no_regression=True) for biomarker in biomarkers: subjects = set() num_samples = 0 for rid, visits in measurements.items(): for _, visit_data in visits.items(): try: progress = DataHandler.safe_cast(visit_data['progress'], int) value = DataHandler.safe_cast(visit_data[biomarker], float) if progress is not None and value is not None: subjects.add(rid) num_samples += 1 except KeyError: pass print log.RESULT, 'Biomarker {0}: collected {1} samples from {2} subjects.'.format(biomarker, num_samples, len(subjects))
def print_to_latex(args, results_naive, results_model, num_subjects): data_handler = DataHandler.get_data_handler(method=args.method, biomarkers=args.biomarkers, phase=args.phase) filename = os.path.join(data_handler.get_eval_folder(), args.latex_file) with open(filename, 'a') as latex_file: latex_file.write( '{0} & {1} {2} & ${3:.2f}\pm{4:.2f}$ & ${5:.2f}$ & ${6:.2f}\pm{7:.2f}$ & ${8:.2f}$ & {9}\\\\\n' .format(args.predict_biomarker, args.method, len(args.visits), results_naive['MEAN'], results_naive['STD'], results_naive['CORR'], results_model['MEAN'], results_model['STD'], results_model['CORR'], num_subjects))
def main(): # Collect data for test data_handler = DataHandler.get_data_handler() biomarkers = DataHandler.get_all_biomarker_names() mean_changes = {} for biomarker in biomarkers: measurements = data_handler.get_measurements_as_dict(visits=['bl', 'm12'], biomarkers=[biomarker], select_complete=True) mean_changes_biomarker = {0.0: 0.0, 0.25: 0.0, 0.75: 0.0, 1.0: 0.0} num_subjects = {0.0: 0, 0.25: 0, 0.75: 0, 1.0: 0} for rid in measurements: diagnosis = measurements[rid]['bl']['DX.scan'] value_bl = measurements[rid]['bl'][biomarker] value_y1 = measurements[rid]['m12'][biomarker] scantime_bl = measurements[rid]['bl']['scantime'] scantime_y1 = measurements[rid]['m12']['scantime'] change = (value_y1 - value_bl) / (scantime_y1 - scantime_bl) mean_changes_biomarker[diagnosis] += change num_subjects[diagnosis] += 1 mean_change_mci_ad = mean_changes_biomarker[0.25] + mean_changes_biomarker[0.75] + mean_changes_biomarker[1.0] num_subjects_mci_ad = num_subjects[0.25] + num_subjects[0.75] + num_subjects[1.0] for diagnosis in mean_changes_biomarker: mean_changes_biomarker[diagnosis] /= num_subjects[diagnosis] mean_changes_biomarker.update({0.66: mean_change_mci_ad / num_subjects_mci_ad}) mean_changes.update({biomarker: mean_changes_biomarker}) print log.RESULT, '{0} CN: {1}, (n={2})'.format(biomarker, mean_changes_biomarker[0.0], num_subjects[0.0]) print log.RESULT, '{0} EMCI: {1}, (n={2})'.format(biomarker, mean_changes_biomarker[0.25], num_subjects[0.25]) print log.RESULT, '{0} LMCI: {1}, (n={2})'.format(biomarker, mean_changes_biomarker[0.75], num_subjects[0.75]) print log.RESULT, '{0} AD: {1}, (n={2})'.format(biomarker, mean_changes_biomarker[1.0], num_subjects[1.0]) mean_changes_file = os.path.join(data_handler.get_eval_folder(), 'mean_changes.p') pickle.dump(mean_changes, open(mean_changes_file, 'wb'))
def print_training_samples_statistics(args, data_handler): biomarkers = data_handler.get_biomarker_names() measurements = data_handler.get_measurements_as_dict( min_visits=args.min_visits, select_training_set=True, no_regression=True) for biomarker in biomarkers: subjects = set() num_samples = 0 for rid, visits in measurements.items(): for _, visit_data in visits.items(): try: progress = DataHandler.safe_cast(visit_data['progress'], int) value = DataHandler.safe_cast(visit_data[biomarker], float) if progress is not None and value is not None: subjects.add(rid) num_samples += 1 except KeyError: pass print log.RESULT, 'Biomarker {0}: collected {1} samples from {2} subjects.'.format( biomarker, num_samples, len(subjects))
def test_vs(name1, gear1, name2, gear2): battlelog.log_close() player1_win_cnt = 0 player2_win_cnt = 0 for cnt in xrange(simulating_times): if cnt == 0: battlelog.log_open() else: battlelog.log_close() player1 = DataHandler.load_player_from_id(name1) player2 = DataHandler.load_player_from_id(name2) player1.import_gear(Gear.from_string(gear1)) player2.import_gear(Gear.from_string(gear2)) attackers = [player1] defenders = [player2] env = Envioronment(attackers, defenders) result = env.start() if result == EnvioronmentType.Win: player1_win_cnt += 1 else: player2_win_cnt += 1 battlelog.log_open() battlelog.log("%s vs %s, total %d times: %s win probability = %f%%, %s win probability = %f%%\n" %(player1.name, player2.name, simulating_times, player1.name, float(player1_win_cnt*100)/simulating_times, player2.name, float(player2_win_cnt*100)/simulating_times))
def main(): parser = argparse.ArgumentParser() parser.add_argument('-m', '--method', choices=DataHandler.get_method_choices(), default='all', help='the method to collect data for') parser.add_argument('-b', '--biomarkers', nargs='+', default=None, help='name of the biomarker to be plotted') parser.add_argument('-p', '--phase', default=None, choices=DataHandler.get_phase_choices(), help='the phase for which the model is to be trained') parser.add_argument('-n', '--nr_threads', type=int, default=4, help='number of threads') parser.add_argument('--recompute_metric', action='store_true', help='recompute the metric') parser.add_argument('--value_samples', type=int, default=100, help='the number of values samples') parser.add_argument('--progress_samples', type=int, default=50, help='the number of progress samples') parser.add_argument('--quantiles', type=float, nargs=2, default=[0.01, 0.99], help='the quantiles for the interval computation') parser.add_argument('--metric', type=str, default='cover', help='the metric used for the evaluation') args = parser.parse_args() # Collect data for test data_handler = DataHandler.get_data_handler(method=args.method, biomarkers=args.biomarkers, phase=args.phase) # Compute error for each biomarker biomarkers = data_handler.get_biomarker_names() evaluation_function = evaluate_biomarker_cover if args.metric == 'cover' else evaluate_biomarker_disc jl.Parallel(n_jobs=args.nr_threads)(jl.delayed(evaluation_function)(args, data_handler, biomarker) for biomarker in biomarkers) sort_biomarkers(args, data_handler, biomarkers)
def analyse_decline(args, rids, dpis, dprs, rds, non_rds): print log.INFO, 'Analysing classification accuracies...' # dpis = np.array(dpis) # dprs = np.array(dprs) # labels = np.array([1 if rid in rds else 0 for rid in rids]) dpis_rds = [] dpis_nonrds = [] dprs_rds = [] dprs_nonrds = [] for rid, dpi, dpr in zip(rids, dpis, dprs): if rid in rds: dpis_rds.append(dpi) dprs_rds.append(dpr) elif rid in non_rds: dpis_nonrds.append(dpi) dprs_nonrds.append(dpr) dpis = np.concatenate((dpis_rds, dpis_nonrds)) dprs = np.concatenate((dprs_rds, dprs_nonrds)) labels = np.concatenate( (np.ones(len(dpis_rds)), np.zeros(len(dpis_nonrds)))) # Assemble features features = np.zeros((len(dpis), 2)) features[:, 0] = dpis if args.estimate_dprs: features[:, 1] = dprs else: # Copy DPIs as second features as LDA needs two features features[:, 1] = dpis features = preprocessing.scale(features) acc, sens, spec = run_classification(args, features, labels) print log.RESULT, '{0}-fold cross validation, RD vs. non-RD ACC={1:.2f}, SENS={2:.2f}, SPEC={3:.2f}'.format( args.num_folds, acc, sens, spec) if args.latex_file is not None: data_handler = DataHandler.get_data_handler(method=args.method, biomarkers=args.biomarkers, phase=args.phase) filename = os.path.join(data_handler.get_eval_folder(), args.latex_file) print log.INFO, 'Writing classification results to {0}...'.format( filename) with open(filename, 'a') as latex_file: latex_file.write( '{0} & {1} & {2:.2f} & {3:.2f} & {4:.2f}\\\\\n'.format( args.method, len(args.visits), acc, sens, spec))
def main(): parser = argparse.ArgumentParser() parser.add_argument('-b', '--biomarkers', nargs=2, default=['D1', 'D2'], help='name of the biomarker to be plotted') parser.add_argument('--plot_file', type=str, default=None, help='filename of the output file') args = parser.parse_args() # Collect data for test data_handler = DataHandler.get_data_handler(biomarkers=args.biomarkers) biomarkers = data_handler.get_biomarker_names() measurements = data_handler.get_measurements_as_dict(biomarkers=biomarkers, select_complete=True) # Collect biomarker values biomarkers_1 = [] biomarkers_2 = [] diagnoses = [] for rid in measurements: for visit in measurements[rid]: biomarkers_1.append(measurements[rid][visit][biomarkers[0]]) biomarkers_2.append(measurements[rid][visit][biomarkers[1]]) diagnoses.append(measurements[rid][visit]['DX.scan']) diagnoses = np.array(diagnoses) diagnoses[(0.25 <= diagnoses) & (diagnoses <= 0.75)] = 0.5 # Setup plot fig, ax = plt.subplots() pt.setup_axes(plt, ax) ax.scatter(biomarkers_1, biomarkers_2, s=15.0, c=diagnoses, edgecolor='none', vmin=0.0, vmax=1.0, cmap=pt.progression_cmap, alpha=0.25) ax.set_xlabel(biomarkers[0]) ax.set_ylabel(biomarkers[1]) # Plot legend rects = [mpl.patches.Rectangle((0, 0), 1, 1, fc=pt.color_cn + (0.25,), linewidth=0), mpl.patches.Rectangle((0, 0), 1, 1, fc=pt.color_mci + (0.25,), linewidth=0), mpl.patches.Rectangle((0, 0), 1, 1, fc=pt.color_ad + (0.25,), linewidth=0)] labels = ['CN', 'MCI', 'AD'] legend = ax.legend(rects, labels, fontsize=10, ncol=len(rects), loc='upper center', framealpha=0.9) legend.get_frame().set_edgecolor((0.6, 0.6, 0.6)) # Draw or save the plot plt.tight_layout() if args.plot_file is not None: plt.savefig(args.plot_file, transparent=True) else: plt.show() plt.close(fig)
def print_to_latex(args, results_naive, results_model, num_subjects): data_handler = DataHandler.get_data_handler(method=args.method, biomarkers=args.biomarkers, phase=args.phase) filename = os.path.join(data_handler.get_eval_folder(), args.latex_file) with open(filename, 'a') as latex_file: latex_file.write('{0} & {1} {2} & ${3:.2f}\pm{4:.2f}$ & ${5:.2f}$ & ${6:.2f}\pm{7:.2f}$ & ${8:.2f}$ & {9}\\\\\n'.format( args.predict_biomarker, args.method, len(args.visits), results_naive['MEAN'], results_naive['STD'], results_naive['CORR'], results_model['MEAN'], results_model['STD'], results_model['CORR'], num_subjects))
def analyse_decline(args, rids, dpis, dprs, rds, non_rds): print log.INFO, 'Analysing classification accuracies...' # dpis = np.array(dpis) # dprs = np.array(dprs) # labels = np.array([1 if rid in rds else 0 for rid in rids]) dpis_rds = [] dpis_nonrds = [] dprs_rds = [] dprs_nonrds = [] for rid, dpi, dpr in zip(rids, dpis, dprs): if rid in rds: dpis_rds.append(dpi) dprs_rds.append(dpr) elif rid in non_rds: dpis_nonrds.append(dpi) dprs_nonrds.append(dpr) dpis = np.concatenate((dpis_rds, dpis_nonrds)) dprs = np.concatenate((dprs_rds, dprs_nonrds)) labels = np.concatenate((np.ones(len(dpis_rds)), np.zeros(len(dpis_nonrds)))) # Assemble features features = np.zeros((len(dpis), 2)) features[:, 0] = dpis if args.estimate_dprs: features[:, 1] = dprs else: # Copy DPIs as second features as LDA needs two features features[:, 1] = dpis features = preprocessing.scale(features) acc, sens, spec = run_classification(args, features, labels) print log.RESULT, '{0}-fold cross validation, RD vs. non-RD ACC={1:.2f}, SENS={2:.2f}, SPEC={3:.2f}'.format(args.num_folds, acc, sens, spec) if args.latex_file is not None: data_handler = DataHandler.get_data_handler(method=args.method, biomarkers=args.biomarkers, phase=args.phase) filename = os.path.join(data_handler.get_eval_folder(), args.latex_file) print log.INFO, 'Writing classification results to {0}...'.format(filename) with open(filename, 'a') as latex_file: latex_file.write('{0} & {1} & {2:.2f} & {3:.2f} & {4:.2f}\\\\\n'.format( args.method, len(args.visits), acc, sens, spec))
def read_measurements_from_cvs(filename): """ Created a dict from the sample measurements file. For compatibility with the library, the dict has to have the { <rid> : { <viscode> : { DX.scan : <diagnosis> } { scantime : <days after bl> } { <biomarker1> : <volume> } ... } { <viscode> : ... }} :param filename: filename of the *.csv file :rtype: dict :return: the generated dict with the measurements """ scantime_dict = {'bl': 0, 'm12': 365, 'm24': 730, 'm36': 1095} biomarkers = set() measurements = {0: {}} with open(filename) as csvfile: reader = csv.DictReader(csvfile) visits = reader.fieldnames[1:] for visit in visits: measurements[0].update({ visit: { 'scantime': scantime_dict[visit], 'DX.scan': 'UNKNOWN' } }) for row in reader: biomarker = row['Biomarker Name'] if biomarker in DataHandler.get_all_biomarker_names(): for visit in visits: try: measurements[0][visit].update( {biomarker: float(row[visit])}) biomarkers.add(biomarker) except ValueError: pass return measurements, list(biomarkers)
def get_rfds(args, rids, diagnoses, dpis, dprs): data_handler = DataHandler.get_data_handler() measurements = data_handler.get_measurements_as_dict( visits=['bl', 'm24'], biomarkers=['FAQ'], select_complete=True, no_regression=True) rfds = set() non_rfds = set() for rid in rids: if rid in measurements: faq_bl = measurements[rid]['bl']['FAQ'] faq_m24 = measurements[rid]['m24']['FAQ'] rcd = (faq_m24 - faq_bl) >= 10 if rcd: rfds.add(rid) else: non_rfds.add(rid) print log.RESULT, 'Selected {0} subjects with rapid functional decline (RFD).'.format(len(rfds)) print log.RESULT, 'Selected {0} subjects without rapid functional decline (non-RFD).'.format(len(non_rfds)) return rfds, non_rfds
def get_rcds(args, rids, diagnoses, dpis, dprs): data_handler = DataHandler.get_data_handler() measurements = data_handler.get_measurements_as_dict( visits=['bl', 'm24'], biomarkers=['MMSE'], select_complete=True, no_regression=True) rcds = set() non_rcds = set() for rid in rids: if rid in measurements: mmse_bl = measurements[rid]['bl']['MMSE'] mmse_m24 = measurements[rid]['m24']['MMSE'] rcd = (mmse_bl - mmse_m24) >= 8 if rcd: rcds.add(rid) else: non_rcds.add(rid) print log.RESULT, 'Selected {0} subjects with rapid cognitive decline (RCD).'.format(len(rcds)) print log.RESULT, 'Selected {0} subjects without rapid cognitive decline (non-RCD).'.format(len(non_rcds)) return rcds, non_rcds
def main(): parser = argparse.ArgumentParser() parser.add_argument('-p', '--phase', default=None, choices=DataHandler.get_phase_choices(), help='the phase for which the model is to be trained') parser.add_argument('--consistent_data', action='store_true', help='us only subjects with bl, m12 and m24 visits') parser.add_argument('--estimate_dprs', action='store_true', help='estimate dpis and dprs') parser.add_argument('--recompute_estimates', action='store_true', help='recompute the dpi / dpr estimations') parser.add_argument('--recompute_predictions', action='store_true', help='recompute the biomarker predictions') parser.add_argument('--exclude_cn', action='store_true', help='exclude healthy subjects from analysis') args = parser.parse_args() estimates = {} methods = ['cog', 'vol', 'ml', 'img', 'all'] for method in methods: estimates.update({method: {}}) for visits in [['bl'], ['m12'], ['m24'], ['bl', 'm12'], ['m12', 'm24']]: _, diagnoses, dpis, _, _, _ = et.get_progress_estimates(visits, method=method, phase=args.phase, estimate_dprs=args.estimate_dprs) diagnoses = np.array(diagnoses) dpis = np.array(dpis) visits_string = '_'.join(visits) estimates[method].update({visits_string: {}}) estimates[method][visits_string].update({'CN': np.mean(dpis[np.where(diagnoses == 0.0)])}) estimates[method][visits_string].update({'EMCI': np.mean(dpis[np.where(diagnoses == 0.25)])}) estimates[method][visits_string].update({'LMCI': np.mean(dpis[np.where(diagnoses == 0.75)])}) estimates[method][visits_string].update({'AD': np.mean(dpis[np.where(diagnoses == 1.0)])}) for method in methods: print log.INFO, 'Results for {0}'.format(method) for diagnosis in ['CN', 'EMCI', 'LMCI', 'AD']: print log.RESULT, '{0: <4}: {1:.2f} {2:.2f} | {3:.2f} '.format( diagnosis, estimates[method]['m12'][diagnosis] - estimates[method]['bl'][diagnosis], estimates[method]['m24'][diagnosis] - estimates[method]['m12'][diagnosis], estimates[method]['m12_m24'][diagnosis] - estimates[method]['bl_m12'][diagnosis])
def get_rfds(args, rids, diagnoses, dpis, dprs): data_handler = DataHandler.get_data_handler() measurements = data_handler.get_measurements_as_dict(visits=['bl', 'm24'], biomarkers=['FAQ'], select_complete=True, no_regression=True) rfds = set() non_rfds = set() for rid in rids: if rid in measurements: faq_bl = measurements[rid]['bl']['FAQ'] faq_m24 = measurements[rid]['m24']['FAQ'] rcd = (faq_m24 - faq_bl) >= 10 if rcd: rfds.add(rid) else: non_rfds.add(rid) print log.RESULT, 'Selected {0} subjects with rapid functional decline (RFD).'.format( len(rfds)) print log.RESULT, 'Selected {0} subjects without rapid functional decline (non-RFD).'.format( len(non_rfds)) return rfds, non_rfds
def get_rcds(args, rids, diagnoses, dpis, dprs): data_handler = DataHandler.get_data_handler() measurements = data_handler.get_measurements_as_dict(visits=['bl', 'm24'], biomarkers=['MMSE'], select_complete=True, no_regression=True) rcds = set() non_rcds = set() for rid in rids: if rid in measurements: mmse_bl = measurements[rid]['bl']['MMSE'] mmse_m24 = measurements[rid]['m24']['MMSE'] rcd = (mmse_bl - mmse_m24) >= 8 if rcd: rcds.add(rid) else: non_rcds.add(rid) print log.RESULT, 'Selected {0} subjects with rapid cognitive decline (RCD).'.format( len(rcds)) print log.RESULT, 'Selected {0} subjects without rapid cognitive decline (non-RCD).'.format( len(non_rcds)) return rcds, non_rcds
def test_add_gear(gear): gears = DataHandler.load_gears() if gear.id not in gears: gears[gear.id] = gear.to_json_obj() DataHandler.save_gears(gears)
def get_fitting_data(args, data_handler_joint): biomarkers = data_handler_joint.get_biomarker_names() offsets = range(args.search_range[0], args.search_range[1], args.search_range[2]) errors_file = os.path.join(data_handler_joint.get_eval_folder(), 'offset_errors_{0}.p'.format(args.extrapolator)) if os.path.isfile(errors_file) and not args.recompute_errors: print log.INFO, 'Reading errors estimations from file {0}...'.format(errors_file) (errors, descriminativeness, overlap) = pickle.load(open(errors_file, 'rb')) else: data_handler_1 = DataHandler.get_data_handler(method=args.method, biomarkers=args.biomarkers, phase='cnmci') data_handler_2 = DataHandler.get_data_handler(method=args.method, biomarkers=args.biomarkers, phase='mciad') errors = np.zeros((len(biomarkers), len(offsets))) descriminativeness = np.zeros(len(biomarkers)) overlap = [] for i, biomarker in enumerate(biomarkers): # Get error matrix for all biomarkers and offsets model_file_1 = data_handler_1.get_model_file(biomarker) model_file_2 = data_handler_2.get_model_file(biomarker) if os.path.isfile(model_file_1) and os.path.isfile(model_file_2): print log.INFO, 'Analysing {0}...'.format(biomarker) # Get discriminativeness for all biomarkers as a scaling factor eval_file_1 = model_file_1.replace('.csv', '_eval_cover.csv') eval_file_2 = model_file_2.replace('.csv', '_eval_cover.csv') if os.path.isfile(eval_file_1) and os.path.isfile(eval_file_2): descriminate_1 = np.mean(mlab.csv2rec(eval_file_1)['error']) descriminate_2 = np.mean(mlab.csv2rec(eval_file_2)['error']) descriminativeness[i] = 0.5 * (descriminate_1 + descriminate_2) else: print log.WARNING, 'Evaluation file missing for {0}'.format(biomarker) continue # Initialise models model_1 = ProgressionModel(biomarker, model_file_1, extrapolator=args.extrapolator) model_2 = ProgressionModel(biomarker, model_file_2, extrapolator=args.extrapolator) # Assemble errors for each offset min_val_1, max_val_1 = model_1.get_value_range([0.1, 0.9]) min_val_2, max_val_2 = model_2.get_value_range([0.1, 0.9]) values = np.linspace(min(min_val_1, min_val_2), max(max_val_1, max_val_2), 250) values_delta = (values.max() - values.min()) / len(values) for j, offset in enumerate(offsets): dens_11 = np.array(model_1.get_density_distribution(values, offset + model_2.min_progress)) dens_12 = np.array(model_2.get_density_distribution(values, model_2.min_progress)) dens_21 = np.array(model_1.get_density_distribution(values, model_1.max_progress)) dens_22 = np.array(model_2.get_density_distribution(values, -offset + model_1.max_progress)) errors[i, j] = 0.5 * values_delta * (np.sum(np.abs(dens_11 - dens_12)) + np.sum(np.abs(dens_21 - dens_22))) # Get overlap overlap.append(model_1.max_progress - model_2.min_progress) overlap = np.mean(overlap) print log.INFO, 'Saving errors to file {0}...'.format(errors_file) pickle.dump((errors, descriminativeness, overlap), open(errors_file, 'wb')) return biomarkers, offsets, errors, descriminativeness, overlap
def get_biomarker_predictions(visits, predict_biomarker, method=None, biomarkers=None, phase=None, recompute_estimates=False, recompute_predictions=False, estimate_dprs=False, select_test_set=False, consistent_data=False, exclude_cn=False, use_last_visit=False, naive_use_diagnosis=False): # Get prediction file data_handler = DataHandler.get_data_handler(method=method, biomarkers=biomarkers, phase=phase) predict_biomarker_str = predict_biomarker.replace(' ', '_') predict_file_trunk = 'predict_{0}_with_dpr_{1}_{2}{3}.p' if estimate_dprs else 'predict_{0}_with_{1}_{2}{3}.p' if biomarkers is None: predict_file_basename = predict_file_trunk.format( predict_biomarker_str, method, '_'.join(visits), '_last' if use_last_visit else '') else: estimate_biomarkers_string = '_'.join(biomarkers).replace(' ', '_') predict_file_basename = predict_file_trunk.format( predict_biomarker_str, estimate_biomarkers_string, '_'.join(visits), '_last' if use_last_visit else '') prediction_file = os.path.join(data_handler.get_eval_folder(), predict_file_basename) # Read if predictions exist, else recompute if os.path.isfile(prediction_file) and not recompute_predictions: # Read biomarker predictions from file print log.INFO, 'Reading {0} predictions from {1}...'.format( predict_biomarker, prediction_file) (rids, diagnoses, values_observed, values_naive, values_model) = pickle.load(open(prediction_file, 'rb')) else: predict_visit = get_predicted_visit(visits) print log.INFO, 'Predicting {0} at {1}...'.format( predict_biomarker, predict_visit) # Get mean changes from file mean_changes_file = os.path.join(data_handler.get_eval_folder(), 'mean_changes.p') if not os.path.isfile(mean_changes_file): print log.ERROR, 'Mean changes unknown, run misc/compute_mean_biomarker_changes.py first!' mean_changes = pickle.load(open(mean_changes_file, 'rb')) # Get DPI estimates rids_all, diagnoses_all, dpis, dprs, _, _ = get_progress_estimates( visits, method=method, biomarkers=biomarkers, phase=phase, recompute_estimates=recompute_estimates, estimate_dprs=estimate_dprs, select_test_set=select_test_set, consistent_data=consistent_data) # Collect biomarker data for test measurements = data_handler.get_measurements_as_dict( visits=visits + [predict_visit], biomarkers=[predict_biomarker], select_test_set=select_test_set, select_complete=True) model = ProgressionModel( predict_biomarker, data_handler.get_model_file(predict_biomarker)) print log.INFO, 'Predicting {0} for {1}'.format( predict_biomarker, predict_visit) rids = [] diagnoses = [] values_observed = [] values_model = [] values_naive = [] for rid, diagnosis, dpi, dpr in zip(rids_all, diagnoses_all, dpis, dprs): if rid in measurements: # Get real biomarker value value at next visit scantime_first_visit = measurements[rid][visits[0]]['scantime'] scantime_next_visit = measurements[rid][predict_visit][ 'scantime'] progress_next_visit = ModelFitter.scantime_to_progress( scantime_next_visit, scantime_first_visit, dpi, dpr) value_observed = measurements[rid][predict_visit][ predict_biomarker] values_observed.append(value_observed) # Predict biomarker value value at next visit if use_last_visit: value = measurements[rid][visits[-1]][predict_biomarker] scantime = measurements[rid][visits[-1]]['scantime'] progress = ModelFitter.scantime_to_progress( scantime, scantime_first_visit, dpi, dpr) mean_quantile = model.approximate_quantile(progress, value) else: mean_quantile = 0.0 for visit in visits: value = measurements[rid][visit][predict_biomarker] scantime = measurements[rid][visit]['scantime'] progress = ModelFitter.scantime_to_progress( scantime, scantime_first_visit, dpi, dpr) mean_quantile += model.approximate_quantile( progress, value) mean_quantile /= len(visits) value_model = model.get_value_at_quantile( progress_next_visit, mean_quantile) values_model.append(value_model) # Predict biomarker value naively if naive_use_diagnosis: mean_change = mean_changes[predict_biomarker][diagnosis] else: mean_change = mean_changes[predict_biomarker][0.66] if use_last_visit: x = measurements[rid][visits[-1]]['scantime'] y = measurements[rid][visits[-1]][predict_biomarker] intercept = -(mean_change * x - y) else: x = np.zeros(len(visits)) y = np.zeros(len(visits)) for i, visit in enumerate(visits): x[i] = measurements[rid][visit]['scantime'] y[i] = measurements[rid][visit][predict_biomarker] intercept = -np.sum(mean_change * x - y) / len(x) value_naive = intercept + mean_change * measurements[rid][ predict_visit]['scantime'] values_naive.append(value_naive) # Plot estimates plot = True if plot and diagnosis > 0.0 and dpr > 0.0: plot_predictions(predict_biomarker, model, visits, measurements[rid], dpi, dpr, value_model, value_naive, mean_quantile, mean_change, intercept, rid) # Append rid and diagnosis rids.append(rid) diagnoses.append(diagnosis) # Print result print log.RESULT, '{0} for subject {1}: Observed: {2}, Naive {3}, Model: {4}'.format( predict_biomarker, rid, value_observed, value_naive, value_model) # Save results print log.INFO, 'Saving {0} predictions to {1}...'.format( predict_biomarker, prediction_file) pickle.dump( (rids, diagnoses, values_observed, values_naive, values_model), open(prediction_file, 'wb')) rids = np.array(rids) diagnoses = np.array(diagnoses) values_observed = np.array(values_observed) values_naive = np.array(values_naive) values_model = np.array(values_model) # Exclude healthy subjects if exclude_cn: indices = np.where(diagnoses > 0.25) rids = rids[indices] diagnoses = diagnoses[indices] values_observed = values_observed[indices] values_naive = values_naive[indices] values_model = values_model[indices] return rids, diagnoses, values_observed, values_naive, values_model
def plot_model(args, data_handler, biomarker): model_file = data_handler.get_model_file(biomarker) if not os.path.isfile(model_file): print log.ERROR, 'Model file not found: {0}'.format(model_file) return print log.INFO, 'Generating plot for {0}...'.format(biomarker) plot_synth_model = args.plot_synth_model and biomarker in SynthModel.get_biomarker_names( ) # # Read model # pm = ProgressionModel(biomarker, model_file, extrapolator=args.extrapolator) progress_extrapolate = 0.3 * (pm.max_progress - pm.min_progress) min_progress_extrapolate = int(pm.min_progress - progress_extrapolate) max_progress_extrapolate = int(pm.max_progress + progress_extrapolate) progress_linspace_ex1 = np.linspace(min_progress_extrapolate, pm.min_progress, 20) progress_linspace_int = np.linspace(pm.min_progress, pm.max_progress, 60) progress_linspace_ex2 = np.linspace(pm.max_progress, max_progress_extrapolate, 20) # Calc min and max val in interval between 1% and 99% percentie min_val, max_val = pm.get_value_range([0.1, 0.9]) # progress_linspace = np.linspace(min_progress_extrapolate, max_progress_extrapolate, 100) # min_val = float('inf') # max_val = float('-inf') # for quantile in [0.1, 0.9]: # curve = pm.get_quantile_curve(progress_linspace, quantile) # min_val = min(min_val, np.min(curve)) # max_val = max(max_val, np.max(curve)) # # Setup plot # biomarker_string = pt.get_biomarker_string(biomarker) figure_width = 6 if args.no_densities or args.only_densities else 12 fig = plt.figure(figsize=(figure_width, 5)) if args.only_densities: ax1 = None ax2 = plt.subplot(1, 1, 1) pt.setup_axes(plt, ax2, xgrid=False, ygrid=False) elif args.no_densities: ax1 = plt.subplot(1, 1, 1) ax2 = None pt.setup_axes(plt, ax1, xgrid=False, ygrid=False) else: ax1 = plt.subplot(1, 2, 1) ax2 = plt.subplot(1, 2, 2) pt.setup_axes(plt, ax1, xgrid=False, ygrid=False) pt.setup_axes(plt, ax2) if not args.only_densities: if args.no_model and not args.plot_synth_model: ax1.set_title('Aligned samples for {0}'.format(biomarker_string)) else: ax1.set_title('Quantile curves for {0}'.format(biomarker_string)) if args.phase == 'mciad': ax1.set_xlabel( 'Disease progress (days before/after conversion to AD)') else: ax1.set_xlabel( 'Disease progress (days before/after conversion to MCI)') ax1.set_ylabel(DataHandler.get_biomarker_unit(biomarker)) if args.xlim is not None: ax1.set_xlim(args.xlim[0], args.xlim[1]) else: ax1.set_xlim(min_progress_extrapolate, max_progress_extrapolate) if args.ylim is not None: ax1.set_ylim(args.ylim[0], args.ylim[1]) # # Plot the percentile curves of the fitted model # if not args.no_model and not args.only_densities: ax1.axvline(pm.min_progress, color='0.15', linestyle=':') ax1.axvline(pm.max_progress, color='0.15', linestyle=':') quantiles = [0.1, 0.25, 0.5, 0.75, 0.9] grey_values = ['0.4', '0.2', '0', '0.2', '0.4'] for grey_value, quantile in zip(grey_values, quantiles): curve_int = pm.get_quantile_curve(progress_linspace_int, quantile) ax1.plot(progress_linspace_int, curve_int, color=grey_value) if not args.no_extrapolation: curve_ex1 = pm.get_quantile_curve(progress_linspace_ex1, quantile) curve_ex2 = pm.get_quantile_curve(progress_linspace_ex2, quantile) ax1.plot(progress_linspace_ex1, curve_ex1, '--', color=grey_value) ax1.plot(progress_linspace_ex2, curve_ex2, '--', color=grey_value) if args.plot_quantile_label: label = '$q={0}\%$'.format(quantile * 100) ax1.text(progress_linspace_int[-1] + 10, curve_int[-1], label, fontsize=10) if args.plot_donohue: print 'Plotting Donohue' donohue_file = os.path.join( data_handler._conf.models_folder, 'donohue', 'population_{0}.csv'.format(biomarker.replace(' ', '.'))) if not os.path.isfile(donohue_file): print log.ERROR, 'Donohue model file not found: {0}'.format( donohue_file) return r = mlab.csv2rec(donohue_file) if args.method == 'joint': offset = 2200 else: offset = 300 progrs = r[r.dtype.names[0]] * 30.44 + offset vals = r[r.dtype.names[1]] curve_donohue = [] progr_donohue = [] for p in progress_linspace_int: if progrs[0] < p < progrs[-1]: i = 1 while p > progrs[i]: i += 1 # TODO linear interpolation progr_donohue.append(progrs[i]) curve_donohue.append(vals[i]) ax1.plot(progr_donohue, curve_donohue, '--', color='b', linewidth=2) # # Plot synthetic model curve # if plot_synth_model: progress_linspace_synth = np.linspace(-2500, 2500, 100) quantiles = [0.1, 0.25, 0.5, 0.75, 0.9] alphas = [0.4, 0.7, 1.0, 0.7, 0.4] for quantile, alpha in zip(quantiles, alphas): curve_synth = [ SynthModel.get_distributed_value(biomarker, p, cdf=quantile) for p in progress_linspace_synth ] ax1.plot(progress_linspace_synth, curve_synth, color='b', alpha=alpha) # # Plot predictor function # if args.plot_eta is not None and not args.only_densities: # Get second axis of plot 1 ax1b = ax1.twinx() # Plot all progresses # ax1b.scatter(pm.all_progresses, pm.all_mus, facecolor='b', marker='o', edgecolor='none', alpha=0.2) ax1b.text(pm.progresses[-1], pm.sigmas[-1], '$\mu$', color='b', fontsize=11) # Plot binned progresses ax1b.scatter(pm.progresses, pm.sigmas, color='b', marker='x') # Plot interpolated model mus = [pm.get_eta(pm.sigmas, p) for p in progress_linspace_int] ax1b.plot(progress_linspace_int, mus, color='b') if not args.no_extrapolation: mus = [pm.get_eta(pm.sigmas, p) for p in progress_linspace_ex1] ax1b.plot(progress_linspace_ex1, mus, '--', color='b') mus = [pm.get_eta(pm.sigmas, p) for p in progress_linspace_ex2] ax1b.plot(progress_linspace_ex2, mus, '--', color='b') if args.xlim is not None: ax1b.set_xlim(args.xlim[0], args.xlim[1]) else: ax1b.set_xlim(min_progress_extrapolate, max_progress_extrapolate) # # Plot errors # if args.plot_errors and not args.only_densities: eval_file = model_file.replace('.csv', '_eval_cover.csv') if not os.path.isfile(eval_file): print log.ERROR, 'Evaluation file not found: {0}'.format(eval_file) else: m = mlab.csv2rec(eval_file) progresses = m['progress'] errors = m['error'] # Get second axis of plot 1 ax1b = ax1.twinx() # ax1b.set_ylim(0, max(150, 1.2 * np.max(errors))) ax1b.plot(progresses, errors, color='g', marker='x') ax1b.text(progresses[-1], errors[-1], 'Discr.', color='g', fontsize=11) ax1b.axhline(np.mean(errors), color='g', linestyle='--', alpha=0.5) median_curve = pm.get_quantile_curve(progresses, 0.5) min_value = np.min(median_curve) max_value = np.max(median_curve) rect = mpl.patches.Rectangle((progresses[0], min_value), progresses[-1] - progresses[0], max_value - min_value, fc=(0.0, 0.5, 0.0, 0.1), ec=(0.0, 0.5, 0.0, 0.8), linewidth=1) ax1.add_patch(rect) # # Plot points # if not args.no_points and not args.only_densities: samples_file = data_handler.get_samples_file(biomarker) if not os.path.isfile(samples_file): print log.ERROR, 'Samples file not found: {0}'.format(samples_file) else: m = mlab.csv2rec(samples_file) progr_points = m['progress'] value_points = m['value'] # diagn_points = [0.5 if p < 0 else 1.0 for p in progr_points] diagn_points = m['diagnosis'] diagn_points[(0.25 <= diagn_points) & (diagn_points <= 0.75)] = 0.5 print log.INFO, 'Plotting {0} sample points...'.format( len(progr_points)) ax1.scatter(progr_points, value_points, s=15.0, c=diagn_points, edgecolor='none', vmin=0.0, vmax=1.0, cmap=pt.progression_cmap, alpha=args.points_alpha) if args.phase == 'cnmci': rects = [ mpl.patches.Rectangle( (0, 0), 1, 1, fc=pt.color_cn + (args.points_alpha, ), linewidth=0), mpl.patches.Rectangle( (0, 0), 1, 1, fc=pt.color_mci + (args.points_alpha, ), linewidth=0) ] labels = ['CN', 'MCI'] elif args.phase == 'mciad': rects = [ mpl.patches.Rectangle( (0, 0), 1, 1, fc=pt.color_mci + (args.points_alpha, ), linewidth=0), mpl.patches.Rectangle( (0, 0), 1, 1, fc=pt.color_ad + (args.points_alpha, ), linewidth=0) ] labels = ['MCI', 'AD'] else: rects = [ mpl.patches.Rectangle( (0, 0), 1, 1, fc=pt.color_cn + (args.points_alpha, ), linewidth=0), mpl.patches.Rectangle( (0, 0), 1, 1, fc=pt.color_mci + (args.points_alpha, ), linewidth=0), mpl.patches.Rectangle( (0, 0), 1, 1, fc=pt.color_ad + (args.points_alpha, ), linewidth=0) ] labels = ['CN', 'MCI', 'AD'] legend = ax1.legend(rects, labels, fontsize=10, ncol=len(rects), loc='upper center', framealpha=0.9) legend.get_frame().set_edgecolor((0.6, 0.6, 0.6)) # # Plot PDFs # progr_samples = [-2000, -1000, 0, 1000, 2000, 3000, 4000] if args.phase == 'joint' else \ [-2000, -1500, -1000, -500, 0, 500, 1000, 1500, 2000] if args.phase == 'cnmci': vmin = -2000 vmax = 6000 elif args.phase == 'mciad': vmin = -6000 vmax = 2000 elif args.phase == 'joint': vmin = -2000 vmax = 4000 sample_cmap = cmx.ScalarMappable(norm=colors.Normalize(vmin=vmin, vmax=vmax), cmap=plt.get_cmap(pt.progression_cmap)) if not args.no_sample_lines and not args.only_densities: for progr in progr_samples: if not args.no_extrapolation or pm.min_progress < progr < pm.max_progress: # sample_color = sample_cmap.to_rgba(progr_samples.index(progr)) sample_color = sample_cmap.to_rgba(progr) linestyle = '--' if progr < pm.min_progress or progr > pm.max_progress else '-' ax1.axvline(progr, color=sample_color, linestyle=linestyle, alpha=0.3) if not args.no_densities: ax2.set_title( 'Probability density function for {0}'.format(biomarker_string)) ax2.set_xlabel(DataHandler.get_biomarker_unit(biomarker)) ax2.set_ylabel('Probability') if args.ylim is None: values = np.linspace(min_val, max_val, 250) ax2.set_xlim(min_val, max_val) else: values = np.linspace(args.ylim[0], args.ylim[1], 250) ax2.set_xlim(args.ylim[0], args.ylim[1]) for progr in progr_samples: if not args.no_extrapolation or pm.min_progress < progr < pm.max_progress: # sample_color = sample_cmap.to_rgba(progr_samples.index(progr)) sample_color = sample_cmap.to_rgba(progr) linestyle = '--' if progr < pm.min_progress or progr > pm.max_progress else '-' probs = pm.get_density_distribution(values, progr) ax2.plot(values, probs, label=str(progr), color=sample_color, linestyle=linestyle) if plot_synth_model: probs = [ SynthModel.get_probability(biomarker, progr, v) for v in values ] ax2.plot(values, probs, color='b', linestyle='--') legend = ax2.legend(fontsize=10, loc='best', framealpha=0.9) legend.get_frame().set_edgecolor((0.6, 0.6, 0.6)) # # Draw or save the plot # plt.tight_layout() if args.save_plots or args.plot_file is not None: if args.plot_file is not None: plot_filename = args.plot_file else: plot_filename = model_file.replace('.csv', '.pdf') plt.savefig(plot_filename, transparent=True) else: plt.show() plt.close(fig)
def plot_biomarker(data_handler, biomarker, measurements, dpi, dpr): """ Plot the model of one biomarker with the fitted values :param data_handler: the data handler :param biomarker: the biomarker to plot :param measurements: the measurements containing the biomarker samples of one subject :param dpi: the estimated DPI :param dpr: the estimated DPR """ model_file = data_handler.get_model_file(biomarker) if not os.path.isfile(model_file): print log.ERROR, 'Model file not found: {0}'.format(model_file) return print log.INFO, 'Generating plot for {0}...'.format(biomarker) # # Read model # pm = ProgressionModel(biomarker, model_file) progress_extrapolate = 0.3 * (pm.max_progress - pm.min_progress) min_progress_extrapolate = int(pm.min_progress - progress_extrapolate) max_progress_extrapolate = int(pm.max_progress + progress_extrapolate) progress_linspace_ex1 = np.linspace(min_progress_extrapolate, pm.min_progress, 20) progress_linspace_int = np.linspace(pm.min_progress, pm.max_progress, 60) progress_linspace_ex2 = np.linspace(pm.max_progress, max_progress_extrapolate, 20) # # Setup plot # biomarker_string = pt.get_biomarker_string(biomarker) figure_width = 6 fig = plt.figure(figsize=(figure_width, 5)) ax1 = plt.subplot(1, 1, 1) pt.setup_axes(plt, ax1, xgrid=False, ygrid=False) ax1.set_title('Model for {0} with fitted sample values'.format(biomarker_string)) ax1.set_xlabel('Disease progress (days before/after conversion to MCI)') ax1.set_ylabel(DataHandler.get_biomarker_unit(biomarker)) ax1.set_xlim(min_progress_extrapolate, max_progress_extrapolate) # # Plot the percentile curves of the fitted model # ax1.axvline(pm.min_progress, color='0.15', linestyle=':') ax1.axvline(pm.max_progress, color='0.15', linestyle=':') quantiles = [0.1, 0.25, 0.5, 0.75, 0.9] grey_values = ['0.4', '0.2', '0', '0.2', '0.4'] for grey_value, quantile in zip(grey_values, quantiles): curve_int = pm.get_quantile_curve(progress_linspace_int, quantile) ax1.plot(progress_linspace_int, curve_int, color=grey_value) curve_ex1 = pm.get_quantile_curve(progress_linspace_ex1, quantile) curve_ex2 = pm.get_quantile_curve(progress_linspace_ex2, quantile) ax1.plot(progress_linspace_ex1, curve_ex1, '--', color=grey_value) ax1.plot(progress_linspace_ex2, curve_ex2, '--', color=grey_value) label = 'q = {0}'.format(quantile * 100) ax1.text(progress_linspace_int[-1] + 100, curve_int[-1], label, fontsize=10) # # Plot points # progr_points = [] value_points = [] diagn_points = [] for visit in measurements[0]: if biomarker in measurements[0][visit]: progress = measurements[0][visit]['scantime'] * dpr + dpi value = measurements[0][visit][biomarker] progr_points.append(progress) value_points.append(value) diagn_points.append(1.0) ax1.axvline(progress, color='b', linestyle='--') ax1.text(progress + 150, value, visit, color='b', fontsize=10) ax1.scatter(progr_points, value_points, s=25.0, color='b', edgecolor='none', vmin=0.0, vmax=1.0, alpha=0.9) # # Draw or save the plot # plt.tight_layout() plt.show() plt.close(fig)
def from_json_obj(cls, obj): id = obj['id'] gear = obj['gear'] gear = [(DataHandler.load_charm_from_id(charm['charm_id']), charm['number']) for charm in gear] return cls(id, gear)
def main(): parser = argparse.ArgumentParser() parser.add_argument('-b', '--biomarkers', nargs=2, default=['D1', 'D2'], help='name of the biomarker to be plotted') parser.add_argument('--plot_file', type=str, default=None, help='filename of the output file') args = parser.parse_args() # Collect data for test data_handler = DataHandler.get_data_handler(biomarkers=args.biomarkers) biomarkers = data_handler.get_biomarker_names() measurements = data_handler.get_measurements_as_dict(biomarkers=biomarkers, select_complete=True) # Collect biomarker values biomarkers_1 = [] biomarkers_2 = [] diagnoses = [] for rid in measurements: for visit in measurements[rid]: biomarkers_1.append(measurements[rid][visit][biomarkers[0]]) biomarkers_2.append(measurements[rid][visit][biomarkers[1]]) diagnoses.append(measurements[rid][visit]['DX.scan']) diagnoses = np.array(diagnoses) diagnoses[(0.25 <= diagnoses) & (diagnoses <= 0.75)] = 0.5 # Setup plot fig, ax = plt.subplots() pt.setup_axes(plt, ax) ax.scatter(biomarkers_1, biomarkers_2, s=15.0, c=diagnoses, edgecolor='none', vmin=0.0, vmax=1.0, cmap=pt.progression_cmap, alpha=0.25) ax.set_xlabel(biomarkers[0]) ax.set_ylabel(biomarkers[1]) # Plot legend rects = [ mpl.patches.Rectangle((0, 0), 1, 1, fc=pt.color_cn + (0.25, ), linewidth=0), mpl.patches.Rectangle((0, 0), 1, 1, fc=pt.color_mci + (0.25, ), linewidth=0), mpl.patches.Rectangle((0, 0), 1, 1, fc=pt.color_ad + (0.25, ), linewidth=0) ] labels = ['CN', 'MCI', 'AD'] legend = ax.legend(rects, labels, fontsize=10, ncol=len(rects), loc='upper center', framealpha=0.9) legend.get_frame().set_edgecolor((0.6, 0.6, 0.6)) # Draw or save the plot plt.tight_layout() if args.plot_file is not None: plt.savefig(args.plot_file, transparent=True) else: plt.show() plt.close(fig)
def get_progress_estimates(visits, method=None, biomarkers=None, phase=None, recompute_estimates=False, estimate_dprs=False, consistent_data=False, select_training_set=False, select_test_set=False): # Get data handler and biomarker names data_handler = DataHandler.get_data_handler(method=method, biomarkers=biomarkers, phase=phase) # Get filename estimates_file_trunk = 'estimate_dpi_dpr_with_{0}_{1}.p' if estimate_dprs else 'estimate_dpi_with_{0}_{1}.p' if biomarkers is None: estimates_file_basename = estimates_file_trunk.format(method, '_'.join(visits)) else: biomarkers_string = '_'.join(biomarkers).replace(' ', '_') estimates_file_basename = estimates_file_trunk.format(biomarkers_string, '_'.join(visits)) estimates_file = os.path.join(data_handler.get_eval_folder(), estimates_file_basename) # Read if estimates exist, else recompute if os.path.isfile(estimates_file) and not recompute_estimates: # Read test results from file print log.INFO, 'Reading DPI{0} estimations from {1}...'.format('\DPR' if estimate_dprs else '', estimates_file) (rids, diagnoses, dpis, dprs, mean_min, mean_max) = pickle.load(open(estimates_file, 'rb')) else: # Collect data for test biomarkers = data_handler.get_biomarker_names() measurements = data_handler.get_measurements_as_dict(visits=['bl', 'm12', 'm24'], biomarkers=biomarkers, select_complete=True) # Setup model model = MultiBiomarkerProgressionModel() for biomarker in biomarkers: model_file = data_handler.get_model_file(biomarker) model.add_model(biomarker, model_file) fitter = ModelFitter(model) # Calculate mean and max progress mean_min = model.get_mean_min_progress() mean_max = model.get_mean_max_progress() # Estimate dpis (and dprs) and save data if not estimate_dprs or len(visits) == 1: if estimate_dprs and len(visits) == 1: print log.WARNING, 'Only one visit, cannot estimate DPR (setting to one)' rids, diagnoses, dpis = estimate_dpis(measurements, visits, fitter, phase=phase) dprs = np.ones(len(dpis)).tolist() else: rids, diagnoses, dpis, dprs = estimate_dpis_dprs(measurements, visits, fitter, phase=phase) print log.INFO, 'Saving DPI{0} estimations to {1}...'.format('\DPR' if estimate_dprs else '', estimates_file) pickle.dump((rids, diagnoses, dpis, dprs, mean_min, mean_max), open(estimates_file, 'wb')) # Reduce to consistent data sets with bl, m12 and m24 samples if consistent_data or select_training_set or select_test_set: consistent_method = 'all' if consistent_data else method consistent_data_handler = DataHandler.get_data_handler(method=consistent_method) consistent_measurements = consistent_data_handler.get_measurements_as_dict( visits=['bl', 'm12', 'm24'], select_training_set=select_training_set, select_test_set=select_test_set, select_complete=True, no_regression=True) consistent_rids = [] consistent_diagnoses = [] consistent_dpis = [] consistent_dprs = [] for i, rid in enumerate(rids): if rid in consistent_measurements: consistent_rids.append(rid) consistent_diagnoses.append(diagnoses[i]) consistent_dpis.append(dpis[i]) consistent_dprs.append(dprs[i]) rids = consistent_rids diagnoses = consistent_diagnoses dpis = consistent_dpis dprs = consistent_dprs print log.RESULT, 'Selected {0} consistent subjects.'.format(len(dpis)) # Return results return rids, diagnoses, dpis, dprs, mean_min, mean_max
def get_biomarker_predictions(visits, predict_biomarker, method=None, biomarkers=None, phase=None, recompute_estimates=False, recompute_predictions=False, estimate_dprs=False, select_test_set=False, consistent_data=False, exclude_cn=False, use_last_visit=False, naive_use_diagnosis=False): # Get prediction file data_handler = DataHandler.get_data_handler(method=method, biomarkers=biomarkers, phase=phase) predict_biomarker_str = predict_biomarker.replace(' ', '_') predict_file_trunk = 'predict_{0}_with_dpr_{1}_{2}{3}.p' if estimate_dprs else 'predict_{0}_with_{1}_{2}{3}.p' if biomarkers is None: predict_file_basename = predict_file_trunk.format(predict_biomarker_str, method, '_'.join(visits), '_last' if use_last_visit else '') else: estimate_biomarkers_string = '_'.join(biomarkers).replace(' ', '_') predict_file_basename = predict_file_trunk.format(predict_biomarker_str, estimate_biomarkers_string, '_'.join(visits), '_last' if use_last_visit else '') prediction_file = os.path.join(data_handler.get_eval_folder(), predict_file_basename) # Read if predictions exist, else recompute if os.path.isfile(prediction_file) and not recompute_predictions: # Read biomarker predictions from file print log.INFO, 'Reading {0} predictions from {1}...'.format(predict_biomarker, prediction_file) (rids, diagnoses, values_observed, values_naive, values_model) = pickle.load(open(prediction_file, 'rb')) else: predict_visit = get_predicted_visit(visits) print log.INFO, 'Predicting {0} at {1}...'.format(predict_biomarker, predict_visit) # Get mean changes from file mean_changes_file = os.path.join(data_handler.get_eval_folder(), 'mean_changes.p') if not os.path.isfile(mean_changes_file): print log.ERROR, 'Mean changes unknown, run misc/compute_mean_biomarker_changes.py first!' mean_changes = pickle.load(open(mean_changes_file, 'rb')) # Get DPI estimates rids_all, diagnoses_all, dpis, dprs, _, _ = get_progress_estimates(visits, method=method, biomarkers=biomarkers, phase=phase, recompute_estimates=recompute_estimates, estimate_dprs=estimate_dprs, select_test_set=select_test_set, consistent_data=consistent_data) # Collect biomarker data for test measurements = data_handler.get_measurements_as_dict(visits=visits + [predict_visit], biomarkers=[predict_biomarker], select_test_set=select_test_set, select_complete=True) model = ProgressionModel(predict_biomarker, data_handler.get_model_file(predict_biomarker)) print log.INFO, 'Predicting {0} for {1}'.format(predict_biomarker, predict_visit) rids = [] diagnoses = [] values_observed = [] values_model = [] values_naive = [] for rid, diagnosis, dpi, dpr in zip(rids_all, diagnoses_all, dpis, dprs): if rid in measurements: # Get real biomarker value value at next visit scantime_first_visit = measurements[rid][visits[0]]['scantime'] scantime_next_visit = measurements[rid][predict_visit]['scantime'] progress_next_visit = ModelFitter.scantime_to_progress(scantime_next_visit, scantime_first_visit, dpi, dpr) value_observed = measurements[rid][predict_visit][predict_biomarker] values_observed.append(value_observed) # Predict biomarker value value at next visit if use_last_visit: value = measurements[rid][visits[-1]][predict_biomarker] scantime = measurements[rid][visits[-1]]['scantime'] progress = ModelFitter.scantime_to_progress(scantime, scantime_first_visit, dpi, dpr) mean_quantile = model.approximate_quantile(progress, value) else: mean_quantile = 0.0 for visit in visits: value = measurements[rid][visit][predict_biomarker] scantime = measurements[rid][visit]['scantime'] progress = ModelFitter.scantime_to_progress(scantime, scantime_first_visit, dpi, dpr) mean_quantile += model.approximate_quantile(progress, value) mean_quantile /= len(visits) value_model = model.get_value_at_quantile(progress_next_visit, mean_quantile) values_model.append(value_model) # Predict biomarker value naively if naive_use_diagnosis: mean_change = mean_changes[predict_biomarker][diagnosis] else: mean_change = mean_changes[predict_biomarker][0.66] if use_last_visit: x = measurements[rid][visits[-1]]['scantime'] y = measurements[rid][visits[-1]][predict_biomarker] intercept = -(mean_change * x - y) else: x = np.zeros(len(visits)) y = np.zeros(len(visits)) for i, visit in enumerate(visits): x[i] = measurements[rid][visit]['scantime'] y[i] = measurements[rid][visit][predict_biomarker] intercept = -np.sum(mean_change * x - y) / len(x) value_naive = intercept + mean_change * measurements[rid][predict_visit]['scantime'] values_naive.append(value_naive) # Plot estimates plot = True if plot and diagnosis > 0.0 and dpr > 0.0: plot_predictions(predict_biomarker, model, visits, measurements[rid], dpi, dpr, value_model, value_naive, mean_quantile, mean_change, intercept, rid) # Append rid and diagnosis rids.append(rid) diagnoses.append(diagnosis) # Print result print log.RESULT, '{0} for subject {1}: Observed: {2}, Naive {3}, Model: {4}'.format(predict_biomarker, rid, value_observed, value_naive, value_model) # Save results print log.INFO, 'Saving {0} predictions to {1}...'.format(predict_biomarker, prediction_file) pickle.dump((rids, diagnoses, values_observed, values_naive, values_model), open(prediction_file, 'wb')) rids = np.array(rids) diagnoses = np.array(diagnoses) values_observed = np.array(values_observed) values_naive = np.array(values_naive) values_model = np.array(values_model) # Exclude healthy subjects if exclude_cn: indices = np.where(diagnoses > 0.25) rids = rids[indices] diagnoses = diagnoses[indices] values_observed = values_observed[indices] values_naive = values_naive[indices] values_model = values_model[indices] return rids, diagnoses, values_observed, values_naive, values_model
def plot_predictions(biomarker, model, visits, rid_measurements, dpi, dpr, value_model, value_naive, mean_quantile, change, intercept, rid): next_visit = get_predicted_visit(visits) scantime_first_visit = rid_measurements[visits[0]]['scantime'] scantime_next_visit = rid_measurements[next_visit]['scantime'] progress_first_visit = ModelFitter.scantime_to_progress(scantime_first_visit, scantime_first_visit, dpi, dpr) progress_next_visit = ModelFitter.scantime_to_progress(scantime_next_visit, scantime_first_visit, dpi, dpr) total_scantime = scantime_next_visit - scantime_first_visit progress_linspace = np.linspace(progress_first_visit - total_scantime * 0.05, progress_next_visit + total_scantime * 0.05, 100) fig, ax = plt.subplots() pt.setup_axes(plt, ax, xgrid=False, ygrid=False) ax.set_title('{0} predictions for RID {1} (DPI={2}, DPR={3})'.format(pt.get_biomarker_string(biomarker), rid, dpi, dpr)) ax.set_xlabel('Disease progress (days before/after conversion to AD)') ax.set_ylabel(DataHandler.get_biomarker_unit(biomarker)) ax.set_xlim(progress_first_visit - total_scantime * 0.1, progress_next_visit + total_scantime * 0.1) color_mapper = cm.ScalarMappable(cmap=plt.get_cmap(pt.progression_cmap), norm=colors.Normalize(vmin=0.0, vmax=1.0)) # Plot the percentile curves of the fitted model quantiles = [0.1, 0.25, 0.5, 0.75, 0.9] grey_values = ['0.8', '0.6', '0.4', '0.62', '0.84'] for grey_value, quantile in zip(grey_values, quantiles): curve = model.get_quantile_curve(progress_linspace, quantile) ax.plot(progress_linspace, curve, zorder=1, color=grey_value) # Collect points progr_points = [] value_points = [] diagn_points = [] for visit in visits + [next_visit]: value_points.append(rid_measurements[visit][biomarker]) progr_points.append(ModelFitter.scantime_to_progress(rid_measurements[visit]['scantime'], scantime_first_visit, dpi, dpr)) diagn_points.append(rid_measurements[visit]['DX.scan']) # Collect lines predict_diagnosis = rid_measurements[next_visit]['DX.scan'] predict_linspace = np.linspace(progress_first_visit, progress_next_visit, 50) curve = [model.get_value_at_quantile(p, mean_quantile) for p in predict_linspace] line = [change * ModelFitter.progress_to_scantime(p, scantime_first_visit, dpi, dpr) + intercept for p in predict_linspace] # Plot model and linear prediction line ax.plot(predict_linspace, line, zorder=1, linestyle='--', linewidth=2, color='k', label='naive prediction') ax.plot(predict_linspace, curve, zorder=1, linestyle='-', linewidth=2, color='k', label='model-based prediction') ax.scatter(progr_points, value_points, zorder=2, s=50.0, c=[color_mapper.to_rgba(d) for d in diagn_points], edgecolor='none') # Plot the predicted values ax.scatter([progress_next_visit], [value_naive], zorder=2, s=50.0, c='w', edgecolor=color_mapper.to_rgba(predict_diagnosis)) ax.scatter([progress_next_visit], [value_model], zorder=2, s=50.0, c='w', edgecolor=color_mapper.to_rgba(predict_diagnosis)) plt.tight_layout() plt.legend() plot_filename = os.path.join('/Users/aschmiri/Desktop/temp', 'plot_predictions_{0}_{1}.pdf'.format(rid, biomarker)) plt.savefig(plot_filename, transparent=True) # plt.show() plt.close(fig)
def plot_predictions(biomarker, model, visits, rid_measurements, dpi, dpr, value_model, value_naive, mean_quantile, change, intercept, rid): next_visit = get_predicted_visit(visits) scantime_first_visit = rid_measurements[visits[0]]['scantime'] scantime_next_visit = rid_measurements[next_visit]['scantime'] progress_first_visit = ModelFitter.scantime_to_progress( scantime_first_visit, scantime_first_visit, dpi, dpr) progress_next_visit = ModelFitter.scantime_to_progress( scantime_next_visit, scantime_first_visit, dpi, dpr) total_scantime = scantime_next_visit - scantime_first_visit progress_linspace = np.linspace( progress_first_visit - total_scantime * 0.05, progress_next_visit + total_scantime * 0.05, 100) fig, ax = plt.subplots() pt.setup_axes(plt, ax, xgrid=False, ygrid=False) ax.set_title('{0} predictions for RID {1} (DPI={2}, DPR={3})'.format( pt.get_biomarker_string(biomarker), rid, dpi, dpr)) ax.set_xlabel('Disease progress (days before/after conversion to AD)') ax.set_ylabel(DataHandler.get_biomarker_unit(biomarker)) ax.set_xlim(progress_first_visit - total_scantime * 0.1, progress_next_visit + total_scantime * 0.1) color_mapper = cm.ScalarMappable(cmap=plt.get_cmap(pt.progression_cmap), norm=colors.Normalize(vmin=0.0, vmax=1.0)) # Plot the percentile curves of the fitted model quantiles = [0.1, 0.25, 0.5, 0.75, 0.9] grey_values = ['0.8', '0.6', '0.4', '0.62', '0.84'] for grey_value, quantile in zip(grey_values, quantiles): curve = model.get_quantile_curve(progress_linspace, quantile) ax.plot(progress_linspace, curve, zorder=1, color=grey_value) # Collect points progr_points = [] value_points = [] diagn_points = [] for visit in visits + [next_visit]: value_points.append(rid_measurements[visit][biomarker]) progr_points.append( ModelFitter.scantime_to_progress( rid_measurements[visit]['scantime'], scantime_first_visit, dpi, dpr)) diagn_points.append(rid_measurements[visit]['DX.scan']) # Collect lines predict_diagnosis = rid_measurements[next_visit]['DX.scan'] predict_linspace = np.linspace(progress_first_visit, progress_next_visit, 50) curve = [ model.get_value_at_quantile(p, mean_quantile) for p in predict_linspace ] line = [ change * ModelFitter.progress_to_scantime(p, scantime_first_visit, dpi, dpr) + intercept for p in predict_linspace ] # Plot model and linear prediction line ax.plot(predict_linspace, line, zorder=1, linestyle='--', linewidth=2, color='k', label='naive prediction') ax.plot(predict_linspace, curve, zorder=1, linestyle='-', linewidth=2, color='k', label='model-based prediction') ax.scatter(progr_points, value_points, zorder=2, s=50.0, c=[color_mapper.to_rgba(d) for d in diagn_points], edgecolor='none') # Plot the predicted values ax.scatter([progress_next_visit], [value_naive], zorder=2, s=50.0, c='w', edgecolor=color_mapper.to_rgba(predict_diagnosis)) ax.scatter([progress_next_visit], [value_model], zorder=2, s=50.0, c='w', edgecolor=color_mapper.to_rgba(predict_diagnosis)) plt.tight_layout() plt.legend() plot_filename = os.path.join( '/Users/aschmiri/Desktop/temp', 'plot_predictions_{0}_{1}.pdf'.format(rid, biomarker)) plt.savefig(plot_filename, transparent=True) # plt.show() plt.close(fig)
def main(): parser = argparse.ArgumentParser() parser.add_argument('-m', '--method', choices=DataHandler.get_method_choices(), default='all', help='the method to collect data for') parser.add_argument('-b', '--biomarkers', nargs='+', default=None, help='name of the biomarker to be plotted') parser.add_argument('-p', '--phase', default='mciad', choices=DataHandler.get_phase_choices(), help='the phase for which the model is to be trained') parser.add_argument('-e', '--extrapolator', type=str, choices=['lin', 'sqrt', 'exp'], default='exp', help='the type of extrapolator') parser.add_argument('--xlim', type=float, nargs=2, default=None, help='force certain x limits for plotting') parser.add_argument('--ylim', type=float, nargs=2, default=None, help='force certain y limits for plotting') parser.add_argument('--no_model', action='store_true', default=False, help='do not plot the fitted model') parser.add_argument('--no_points', action='store_true', default=False, help='do not plot points') parser.add_argument('--points_alpha', type=float, default=0.25, help='alpha value of the plotted points') parser.add_argument('--no_densities', action='store_true', default=False, help='do not plot densities') parser.add_argument('--no_sample_lines', action='store_true', default=False, help='do not plot the sample lines') parser.add_argument('--only_densities', action='store_true', default=False, help='only plot densities') parser.add_argument('--no_extrapolation', action='store_true', default=False, help='do not extrapolate the model') parser.add_argument('--plot_eta', type=str, choices=['lambda', 'mu', 'sigma'], default=None, help='plot a predictor function') parser.add_argument('--plot_errors', action='store_true', default=False, help='plot the errors') parser.add_argument('--plot_synth_model', action='store_true', default=False, help='plot density distributions for synthetic data') parser.add_argument('--plot_quantile_label', action='store_true', default=False, help='plot labels on the quantile curces') parser.add_argument( '--plot_donohue', action='store_true', default=False, help='plot the trajectory estimated with Donohue et al.') parser.add_argument('--save_plots', action='store_true', default=False, help='save the plots with a default filename') parser.add_argument('--plot_file', type=str, default=None, help='filename of the output file') args = parser.parse_args() data_handler = DataHandler.get_data_handler(method=args.method, biomarkers=args.biomarkers, phase=args.phase) for biomarker in data_handler.get_biomarker_names(): plot_model(args, data_handler, biomarker)
def plot_dpi_estimates(args, dpis, diagnoses, mean_min, mean_max): print log.INFO, 'Plotting estimates...' test_dpi_min, test_dpi_max, _ = ModelFitter.get_test_dpi_range(args.phase) dpi_range = float(test_dpi_max - test_dpi_min) dpi_factor = float(args.plot_steps) / dpi_range # Setup plot fig, ax = plt.subplots(figsize=(6, 2)) biomarkers_str = args.method if args.biomarkers is None else ', '.join( args.biomarkers) ax.set_title('DP estimation using {0} at {1}'.format( biomarkers_str, ', '.join(args.visits))) ax.spines['left'].set_position(('outward', 10)) ax.spines['bottom'].set_position(('outward', 10)) ax.spines['right'].set_visible(False) ax.spines['top'].set_visible(False) ax.yaxis.set_ticks_position('left') ax.xaxis.set_ticks_position('bottom') xticks = np.linspace(0, args.plot_steps, 7) ax.set_xticks(xticks) ax.set_xticklabels( [int(float(tick) / dpi_factor + test_dpi_min) for tick in xticks]) # Compute matrix diagnosis_indices = {0.0: 0, 0.25: 1, 0.5: 1, 0.75: 2, 1.0: 3} matrix = np.zeros((4, args.plot_steps + 1)) for dpi, diag in zip(dpis, diagnoses): row = diagnosis_indices[diag] dpi_index = round((dpi - test_dpi_min) * dpi_factor) matrix[row, dpi_index] += 1.0 # Draw annotations dpis = np.array(dpis) diagnoses = np.array(diagnoses) medians = [] q25 = [] q75 = [] for diag in [0.0, 0.25, 0.75, 1.0]: row = diagnosis_indices[diag] matrix[row] /= np.sum(matrix[row]) indices = np.where(diagnoses == diag) median = np.median(dpis[indices]) medians.append((median - test_dpi_min) * dpi_factor) q25.append((median - np.percentile(dpis[indices], 25)) * dpi_factor) q75.append((np.percentile(dpis[indices], 75) - median) * dpi_factor) if args.plot_lines: ax.set_ylim(-0.01, 0.36) sample_cmap = cmx.ScalarMappable(norm=colors.Normalize(0.0, 1.0), cmap=plt.get_cmap( pt.progression_cmap)) for diag in [0.0, 0.25, 0.75, 1.0]: row = diagnosis_indices[diag] plt.plot(matrix[row], color=sample_cmap.to_rgba(diag)) else: ax.set_yticks([0, 1, 2, 3]) ax.set_yticklabels(['CN', 'EMCI', 'LMCI', 'AD']) cmap = plt.get_cmap('jet') if args.plot_cmap_jet else plt.get_cmap( 'Greys') bar_color = 'w' if args.plot_cmap_jet else 'r' plt.errorbar(medians, [0, 1, 2, 3], xerr=[q25, q75], fmt='none', ecolor=bar_color, elinewidth=2, capsize=4, capthick=2) plt.plot(medians, [0, 1, 2, 3], linestyle='', color=bar_color, marker='|', markersize=15, markeredgewidth=2) plt.imshow(matrix, cmap=cmap, interpolation='nearest') plt.axvline((mean_min - test_dpi_min) * dpi_factor, color='k', linestyle=':', alpha=0.6) plt.axvline((mean_max - test_dpi_min) * dpi_factor, color='k', linestyle=':', alpha=0.6) plt.axvline((0.0 - test_dpi_min) * dpi_factor, color='k', linestyle='-', alpha=0.6) if args.phase == 'joint': data_handler = DataHandler.get_data_handler(method=args.method, biomarkers=args.biomarkers, phase=args.phase) plt.axvline( (data_handler.get_model_offset() - test_dpi_min) * dpi_factor, color='k', linestyle='-', alpha=0.6) # Draw or save the plot plt.tight_layout() if args.plot_file is not None: plt.savefig(args.plot_file, transparent=True) else: plt.show() plt.close(fig)
def main(): parser = argparse.ArgumentParser() parser.add_argument('visits', nargs='+', type=str, help='the viscodes to be sampled') parser.add_argument('-m', '--method', choices=DataHandler.get_method_choices(), default='all', help='the method to collect data for') parser.add_argument('-b', '--biomarkers', nargs='+', default=None, help='name of the biomarker to be plotted') parser.add_argument('-p', '--phase', default=None, choices=DataHandler.get_phase_choices(), help='the phase for which the model is to be trained') parser.add_argument('-c', '--classifier', default='svm', choices=['lda', 'svm', 'lsvm', 'rf'], help='the approach used to classify the subjects') parser.add_argument('--estimate_dprs', action='store_true', help='recompute the dpis estimations') parser.add_argument('--recompute_estimates', action='store_true', help='recompute the dpis estimations') parser.add_argument('--consistent_data', action='store_true', help='us only subjects with bl, m12 and m24 visits') parser.add_argument('--num_folds', type=int, default=10, help='number of folds for the n-fold cross validation') parser.add_argument( '--num_runs', type=int, default=1, help='number of runs the x-fold cross-validation is performed') parser.add_argument('--latex_file', type=str, default=None, help='add output to a LaTeX file') args = parser.parse_args() # Get estimates _, diagnoses, dpis, dprs, _, _ = et.get_progress_estimates( args.visits, method=args.method, biomarkers=args.biomarkers, phase=args.phase, estimate_dprs=args.estimate_dprs, recompute_estimates=args.recompute_estimates, consistent_data=args.consistent_data) # Analyse estimates classify_diagnoses(args, dpis, dprs, diagnoses)
def get_progress_estimates(visits, method=None, biomarkers=None, phase=None, recompute_estimates=False, estimate_dprs=False, consistent_data=False, select_training_set=False, select_test_set=False): # Get data handler and biomarker names data_handler = DataHandler.get_data_handler(method=method, biomarkers=biomarkers, phase=phase) # Get filename estimates_file_trunk = 'estimate_dpi_dpr_with_{0}_{1}.p' if estimate_dprs else 'estimate_dpi_with_{0}_{1}.p' if biomarkers is None: estimates_file_basename = estimates_file_trunk.format( method, '_'.join(visits)) else: biomarkers_string = '_'.join(biomarkers).replace(' ', '_') estimates_file_basename = estimates_file_trunk.format( biomarkers_string, '_'.join(visits)) estimates_file = os.path.join(data_handler.get_eval_folder(), estimates_file_basename) # Read if estimates exist, else recompute if os.path.isfile(estimates_file) and not recompute_estimates: # Read test results from file print log.INFO, 'Reading DPI{0} estimations from {1}...'.format( '\DPR' if estimate_dprs else '', estimates_file) (rids, diagnoses, dpis, dprs, mean_min, mean_max) = pickle.load(open(estimates_file, 'rb')) else: # Collect data for test biomarkers = data_handler.get_biomarker_names() measurements = data_handler.get_measurements_as_dict( visits=['bl', 'm12', 'm24'], biomarkers=biomarkers, select_complete=True) # Setup model model = MultiBiomarkerProgressionModel() for biomarker in biomarkers: model_file = data_handler.get_model_file(biomarker) model.add_model(biomarker, model_file) fitter = ModelFitter(model) # Calculate mean and max progress mean_min = model.get_mean_min_progress() mean_max = model.get_mean_max_progress() # Estimate dpis (and dprs) and save data if not estimate_dprs or len(visits) == 1: if estimate_dprs and len(visits) == 1: print log.WARNING, 'Only one visit, cannot estimate DPR (setting to one)' rids, diagnoses, dpis = estimate_dpis(measurements, visits, fitter, phase=phase) dprs = np.ones(len(dpis)).tolist() else: rids, diagnoses, dpis, dprs = estimate_dpis_dprs(measurements, visits, fitter, phase=phase) print log.INFO, 'Saving DPI{0} estimations to {1}...'.format( '\DPR' if estimate_dprs else '', estimates_file) pickle.dump((rids, diagnoses, dpis, dprs, mean_min, mean_max), open(estimates_file, 'wb')) # Reduce to consistent data sets with bl, m12 and m24 samples if consistent_data or select_training_set or select_test_set: consistent_method = 'all' if consistent_data else method consistent_data_handler = DataHandler.get_data_handler( method=consistent_method) consistent_measurements = consistent_data_handler.get_measurements_as_dict( visits=['bl', 'm12', 'm24'], select_training_set=select_training_set, select_test_set=select_test_set, select_complete=True, no_regression=True) consistent_rids = [] consistent_diagnoses = [] consistent_dpis = [] consistent_dprs = [] for i, rid in enumerate(rids): if rid in consistent_measurements: consistent_rids.append(rid) consistent_diagnoses.append(diagnoses[i]) consistent_dpis.append(dpis[i]) consistent_dprs.append(dprs[i]) rids = consistent_rids diagnoses = consistent_diagnoses dpis = consistent_dpis dprs = consistent_dprs print log.RESULT, 'Selected {0} consistent subjects.'.format(len(dpis)) # Return results return rids, diagnoses, dpis, dprs, mean_min, mean_max
def main(): parser = argparse.ArgumentParser() parser.add_argument('-m', '--method', choices=DataHandler.get_method_choices(), default='all', help='the method to collect data for') parser.add_argument('-b', '--biomarkers', nargs='+', default=None, help='name of the biomarker to be plotted') parser.add_argument('-p', '--phase', default=None, choices=DataHandler.get_phase_choices(), help='the phase for which the model is to be trained') parser.add_argument('--save_plots', action='store_true', default=False, help='save the plots with a default filename') args = parser.parse_args() # Collect data for test data_handler = DataHandler.get_data_handler(method=args.method, biomarkers=args.biomarkers, phase=args.phase) biomarkers = data_handler.get_biomarker_names() measurements = data_handler.get_measurements_as_dict( visits=['bl', 'm12'], biomarkers=biomarkers, select_training_set=True, select_complete=True) # Setup plotting folder eval_folder = DataHandler.make_dir(data_handler.get_eval_folder(), 'quants') # Process all biomarkers for biomarker in biomarkers: print log.INFO, 'Generating quantile correlation plot for {0}...'.format( biomarker) model_file = data_handler.get_model_file(biomarker) pm = ProgressionModel(biomarker, model_file) q_file = os.path.join(eval_folder, '{0}.p'.format(biomarker)) if os.path.isfile(q_file): (q_bl, q_m12) = pickle.load(open(q_file, 'rb')) else: q_bl = [] q_m12 = [] for rid in measurements: val_bl = measurements[rid]['bl'][biomarker] val_m12 = measurements[rid]['m12'][biomarker] p_bl = measurements[rid]['bl']['progress'] p_m12 = measurements[rid]['m12']['progress'] q_bl.append(pm.approximate_quantile(p_bl, val_bl)) q_m12.append(pm.approximate_quantile(p_m12, val_m12)) pickle.dump((q_bl, q_m12), open(q_file, 'wb')) # Setup plot fig, axs = plt.subplots(1, 2) plt.suptitle('Correlation between bl and m12 quantiles') # Plot 1 ax = axs[0] pt.setup_axes(plt, ax, yspine=True) ax.set_xlabel('Quantile bl') ax.set_ylabel('Quantile m12') ax.scatter(q_bl, q_m12, edgecolor='none', s=25.0, alpha=0.5) # Plot 2 q_bl = np.array(q_bl) q_m12 = np.array(q_m12) errors = q_bl - q_m12 loc, scale = norm.fit(errors, floc=0.0) ax = axs[1] pt.setup_axes(plt, ax) ax.set_xlabel('Difference bl to m12') ax.set_ylabel('Probability') ax.set_xlim(-1.05, 1.05) ax.hist(errors, bins=15, normed=True, histtype='stepfilled', alpha=0.3) x = np.linspace(-1.0, 1.0, 100) ax.plot(x, norm.pdf(x, loc=loc, scale=scale), color='k') # Draw or save the plot plt.tight_layout() if args.save_plots: plot_file = os.path.join(eval_folder, '{0}.pdf'.format(biomarker)) plt.savefig(plot_file, transparent=True) else: plt.show() plt.close(fig)
def plot_dpi_estimates(args, dpis, diagnoses, mean_min, mean_max): print log.INFO, 'Plotting estimates...' test_dpi_min, test_dpi_max, _ = ModelFitter.get_test_dpi_range(args.phase) dpi_range = float(test_dpi_max - test_dpi_min) dpi_factor = float(args.plot_steps) / dpi_range # Setup plot fig, ax = plt.subplots(figsize=(6, 2)) biomarkers_str = args.method if args.biomarkers is None else ', '.join(args.biomarkers) ax.set_title('DP estimation using {0} at {1}'.format(biomarkers_str, ', '.join(args.visits))) ax.spines['left'].set_position(('outward', 10)) ax.spines['bottom'].set_position(('outward', 10)) ax.spines['right'].set_visible(False) ax.spines['top'].set_visible(False) ax.yaxis.set_ticks_position('left') ax.xaxis.set_ticks_position('bottom') xticks = np.linspace(0, args.plot_steps, 7) ax.set_xticks(xticks) ax.set_xticklabels([int(float(tick) / dpi_factor + test_dpi_min) for tick in xticks]) # Compute matrix diagnosis_indices = {0.0: 0, 0.25: 1, 0.5: 1, 0.75: 2, 1.0: 3} matrix = np.zeros((4, args.plot_steps + 1)) for dpi, diag in zip(dpis, diagnoses): row = diagnosis_indices[diag] dpi_index = round((dpi - test_dpi_min) * dpi_factor) matrix[row, dpi_index] += 1.0 # Draw annotations dpis = np.array(dpis) diagnoses = np.array(diagnoses) medians = [] q25 = [] q75 = [] for diag in [0.0, 0.25, 0.75, 1.0]: row = diagnosis_indices[diag] matrix[row] /= np.sum(matrix[row]) indices = np.where(diagnoses == diag) median = np.median(dpis[indices]) medians.append((median - test_dpi_min) * dpi_factor) q25.append((median - np.percentile(dpis[indices], 25)) * dpi_factor) q75.append((np.percentile(dpis[indices], 75) - median) * dpi_factor) if args.plot_lines: ax.set_ylim(-0.01, 0.36) sample_cmap = cmx.ScalarMappable( norm=colors.Normalize(0.0, 1.0), cmap=plt.get_cmap(pt.progression_cmap)) for diag in [0.0, 0.25, 0.75, 1.0]: row = diagnosis_indices[diag] plt.plot(matrix[row], color=sample_cmap.to_rgba(diag)) else: ax.set_yticks([0, 1, 2, 3]) ax.set_yticklabels(['CN', 'EMCI', 'LMCI', 'AD']) cmap = plt.get_cmap('jet') if args.plot_cmap_jet else plt.get_cmap('Greys') bar_color = 'w' if args.plot_cmap_jet else 'r' plt.errorbar(medians, [0, 1, 2, 3], xerr=[q25, q75], fmt='none', ecolor=bar_color, elinewidth=2, capsize=4, capthick=2) plt.plot(medians, [0, 1, 2, 3], linestyle='', color=bar_color, marker='|', markersize=15, markeredgewidth=2) plt.imshow(matrix, cmap=cmap, interpolation='nearest') plt.axvline((mean_min - test_dpi_min) * dpi_factor, color='k', linestyle=':', alpha=0.6) plt.axvline((mean_max - test_dpi_min) * dpi_factor, color='k', linestyle=':', alpha=0.6) plt.axvline((0.0 - test_dpi_min) * dpi_factor, color='k', linestyle='-', alpha=0.6) if args.phase == 'joint': data_handler = DataHandler.get_data_handler(method=args.method, biomarkers=args.biomarkers, phase=args.phase) plt.axvline((data_handler.get_model_offset() - test_dpi_min) * dpi_factor, color='k', linestyle='-', alpha=0.6) # Draw or save the plot plt.tight_layout() if args.plot_file is not None: plt.savefig(args.plot_file, transparent=True) else: plt.show() plt.close(fig)
def plot_biomarker(data_handler, biomarker, measurements, dpi, dpr): """ Plot the model of one biomarker with the fitted values :param data_handler: the data handler :param biomarker: the biomarker to plot :param measurements: the measurements containing the biomarker samples of one subject :param dpi: the estimated DPI :param dpr: the estimated DPR """ model_file = data_handler.get_model_file(biomarker) if not os.path.isfile(model_file): print log.ERROR, 'Model file not found: {0}'.format(model_file) return print log.INFO, 'Generating plot for {0}...'.format(biomarker) # # Read model # pm = ProgressionModel(biomarker, model_file) progress_extrapolate = 0.3 * (pm.max_progress - pm.min_progress) min_progress_extrapolate = int(pm.min_progress - progress_extrapolate) max_progress_extrapolate = int(pm.max_progress + progress_extrapolate) progress_linspace_ex1 = np.linspace(min_progress_extrapolate, pm.min_progress, 20) progress_linspace_int = np.linspace(pm.min_progress, pm.max_progress, 60) progress_linspace_ex2 = np.linspace(pm.max_progress, max_progress_extrapolate, 20) # # Setup plot # biomarker_string = pt.get_biomarker_string(biomarker) figure_width = 6 fig = plt.figure(figsize=(figure_width, 5)) ax1 = plt.subplot(1, 1, 1) pt.setup_axes(plt, ax1, xgrid=False, ygrid=False) ax1.set_title( 'Model for {0} with fitted sample values'.format(biomarker_string)) ax1.set_xlabel('Disease progress (days before/after conversion to MCI)') ax1.set_ylabel(DataHandler.get_biomarker_unit(biomarker)) ax1.set_xlim(min_progress_extrapolate, max_progress_extrapolate) # # Plot the percentile curves of the fitted model # ax1.axvline(pm.min_progress, color='0.15', linestyle=':') ax1.axvline(pm.max_progress, color='0.15', linestyle=':') quantiles = [0.1, 0.25, 0.5, 0.75, 0.9] grey_values = ['0.4', '0.2', '0', '0.2', '0.4'] for grey_value, quantile in zip(grey_values, quantiles): curve_int = pm.get_quantile_curve(progress_linspace_int, quantile) ax1.plot(progress_linspace_int, curve_int, color=grey_value) curve_ex1 = pm.get_quantile_curve(progress_linspace_ex1, quantile) curve_ex2 = pm.get_quantile_curve(progress_linspace_ex2, quantile) ax1.plot(progress_linspace_ex1, curve_ex1, '--', color=grey_value) ax1.plot(progress_linspace_ex2, curve_ex2, '--', color=grey_value) label = 'q = {0}'.format(quantile * 100) ax1.text(progress_linspace_int[-1] + 100, curve_int[-1], label, fontsize=10) # # Plot points # progr_points = [] value_points = [] diagn_points = [] for visit in measurements[0]: if biomarker in measurements[0][visit]: progress = measurements[0][visit]['scantime'] * dpr + dpi value = measurements[0][visit][biomarker] progr_points.append(progress) value_points.append(value) diagn_points.append(1.0) ax1.axvline(progress, color='b', linestyle='--') ax1.text(progress + 150, value, visit, color='b', fontsize=10) ax1.scatter(progr_points, value_points, s=25.0, color='b', edgecolor='none', vmin=0.0, vmax=1.0, alpha=0.9) # # Draw or save the plot # plt.tight_layout() plt.show() plt.close(fig)