def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('visits', nargs='+', type=str, help='the viscodes of the visits that are available')
    parser.add_argument('-m', '--method', choices=DataHandler.get_method_choices(), default='all', help='the method to collect data for')
    parser.add_argument('-b', '--biomarkers', nargs='+', default=None, help='name of the biomarker to be plotted')
    parser.add_argument('-p', '--phase', default=None, choices=DataHandler.get_phase_choices(), help='the phase for which the model is to be trained')
    parser.add_argument('--predict_biomarker', type=str, default='MMSE', help='the biomarker to predict')
    parser.add_argument('--recompute_estimates', action='store_true', help='recompute the dpi / dpr estimations')
    parser.add_argument('--recompute_predictions', action='store_true', help='recompute the biomarker predictions')
    parser.add_argument('--estimate_dprs', action='store_true', help='estimate dpis and dprs')
    parser.add_argument('--consistent_data', action='store_true', help='use only subjects with bl, m12 and m24 visits')
    parser.add_argument('--exclude_cn', action='store_true', help='exclude healthy subjects from analysis')
    parser.add_argument('--use_last_visit', action='store_true', help='use only the last visit for prediction')
    parser.add_argument('--naive_use_diagnosis', action='store_true', help='use the specific mean change for the diagnosis')
    parser.add_argument('--no_plot', action='store_true', help='do not plot the results')
    parser.add_argument('--plot_file', type=str, default=None, help='filename of the output file')
    parser.add_argument('--latex_file', type=str, default=None, help='add output to a LaTeX file')
    args = parser.parse_args()

    _, diagnoses, values_observed, values_naive, values_model = \
        et.get_biomarker_predictions(args.visits, args.predict_biomarker,
                                     method=args.method,
                                     biomarkers=args.biomarkers,
                                     phase=args.phase,
                                     recompute_estimates=args.recompute_estimates,
                                     recompute_predictions=args.recompute_predictions,
                                     estimate_dprs=args.estimate_dprs,
                                     select_test_set=True,
                                     consistent_data=args.consistent_data,
                                     exclude_cn=args.exclude_cn,
                                     use_last_visit=args.use_last_visit,
                                     naive_use_diagnosis=args.naive_use_diagnosis)
    if not args.no_plot:
        plot_biomarker_predictions(args, diagnoses, values_observed, values_model)
    analyse_biomarker_predictions(args, diagnoses, values_observed, values_naive, values_model)
def generate_csv_files(args, data_handler):
    """
    Generate the CSV file used to call the R script.

    :param Namespace args:
    :param DataHandler data_handler:
    """
    assert isinstance(args, argparse.Namespace)
    assert isinstance(data_handler, DataHandler)

    biomarkers = data_handler.get_biomarker_names()
    measurements = data_handler.get_measurements_as_dict(min_visits=args.min_visits,
                                                         select_training_set=True,
                                                         exclude_deceased=args.exclude_deceased)
    for biomarker in biomarkers:
        print log.INFO, 'Generating output CSV for {0}...'.format(biomarker)
        samples_file = data_handler.get_samples_file(biomarker)
        writer = csv.writer(open(samples_file, 'wb'), delimiter=',')
        writer.writerow(['rid', 'progress', 'value', 'diagnosis'])

        subjects = set()
        num_samples = 0
        for rid, visits in measurements.items():
            for _, visit_data in visits.items():
                try:
                    progress = DataHandler.safe_cast(visit_data['progress'], int)
                    value = DataHandler.safe_cast(visit_data[biomarker], float)
                    diagnosis = DataHandler.safe_cast(visit_data['DX.scan'], float)
                    if progress is not None and value is not None:
                        writer.writerow([rid, progress, value, diagnosis])
                        subjects.add(rid)
                        num_samples += 1
                except KeyError:
                    pass
        print log.RESULT, 'Collected {0} samples from {1} subjects.'.format(num_samples, len(subjects))
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('visits', nargs='+', type=str, help='the viscodes to be sampled')
    parser.add_argument('-m', '--method', choices=DataHandler.get_method_choices(), default='all', help='the method to collect data for')
    parser.add_argument('-b', '--biomarkers', nargs='+', default=None, help='name of the biomarker to be plotted')
    parser.add_argument('-p', '--phase', default=None, choices=DataHandler.get_phase_choices(), help='the phase for which the model is to be trained')
    parser.add_argument('-c', '--classifier', default='svm', choices=['lda', 'svm', 'lsvm', 'rf'], help='the approach used to classify the subjects')
    parser.add_argument('--estimate_dprs', action='store_true', help='recompute the dpis estimations')
    parser.add_argument('--recompute_estimates', action='store_true', help='recompute the dpis estimations')
    parser.add_argument('--consistent_data', action='store_true', help='us only subjects with bl, m12 and m24 visits')
    parser.add_argument('--num_folds', type=int, default=10, help='number of folds for the n-fold cross validation')
    parser.add_argument('--latex_file', type=str, default=None, help='add output to a LaTeX file')
    args = parser.parse_args()

    # Get estimates
    rids, diagnoses, dpis, dprs, _, _ = et.get_progress_estimates(
        args.visits,
        method=args.method,
        biomarkers=args.biomarkers,
        phase=args.phase,
        estimate_dprs=args.estimate_dprs,
        recompute_estimates=args.recompute_estimates,
        consistent_data=args.consistent_data)

    # Select converters and non-converters sets
    rcds, non_rcds = get_rcds(args, rids, diagnoses, dpis, dprs)
    rfds, non_rfds = get_rfds(args, rids, diagnoses, dpis, dprs)

    # Analyse output
    analyse_decline(args, rids, dpis, dprs, rcds, non_rcds)
    analyse_decline(args, rids, dpis, dprs, rfds, non_rfds)
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('visits', nargs='+', type=str, help='the viscodes to be sampled')
    parser.add_argument('-m', '--method', choices=DataHandler.get_method_choices(), default='all',
                        help='the method to collect data for')
    parser.add_argument('-b', '--biomarkers', nargs='+', default=None, help='name of the biomarker to be plotted')
    parser.add_argument('-p', '--phase', default=None, choices=DataHandler.get_phase_choices(),
                        help='the phase for which the model is to be trained')
    parser.add_argument('--estimate_dprs', action='store_true', help='recompute the dpis estimations')
    parser.add_argument('--recompute_estimates', action='store_true', help='recompute the dpis estimations')
    parser.add_argument('--consistent_data', action='store_true', help='us only subjects with bl, m12 and m24 visits')
    parser.add_argument('--no_plot', action='store_true', help='do not plot the results')
    parser.add_argument('--plot_lines', action='store_true', help='plot graphs instead of matrix')
    parser.add_argument('--plot_steps', type=int, default=15, help='number of steps for the DPI scale')
    parser.add_argument('--plot_file', type=str, default=None, help='filename of the output file')
    parser.add_argument('--plot_cmap_jet', action='store_true', help='use the colour map jet')
    args = parser.parse_args()

    # Get estimates
    _, diagnoses, dpis, dprs, mean_min, mean_max = et.get_progress_estimates(
        args.visits,
        method=args.method,
        biomarkers=args.biomarkers,
        phase=args.phase,
        estimate_dprs=args.estimate_dprs,
        recompute_estimates=args.recompute_estimates,
        select_test_set=True,
        consistent_data=args.consistent_data)

    # Plot results
    if not args.no_plot:
        plot_dpi_estimates(args, dpis, diagnoses, mean_min, mean_max)
        if args.estimate_dprs:
            plot_dpi_dpr_distribution(args, dpis, dprs, diagnoses)
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('-m', '--method', choices=DataHandler.get_method_choices(), default='all', help='the method to collect data for')
    parser.add_argument('-b', '--biomarkers', nargs='+', default=None, help='name of the biomarker to be plotted')
    parser.add_argument('-p', '--phase', default='mciad', choices=DataHandler.get_phase_choices(), help='the phase for which the model is to be trained')
    parser.add_argument('-e', '--extrapolator', type=str, choices=['lin', 'sqrt', 'exp'], default='exp', help='the type of extrapolator')
    parser.add_argument('--xlim', type=float, nargs=2, default=None, help='force certain x limits for plotting')
    parser.add_argument('--ylim', type=float, nargs=2, default=None, help='force certain y limits for plotting')
    parser.add_argument('--no_model', action='store_true', default=False, help='do not plot the fitted model')
    parser.add_argument('--no_points', action='store_true', default=False, help='do not plot points')
    parser.add_argument('--points_alpha', type=float, default=0.25, help='alpha value of the plotted points')
    parser.add_argument('--no_densities', action='store_true', default=False, help='do not plot densities')
    parser.add_argument('--no_sample_lines', action='store_true', default=False, help='do not plot the sample lines')
    parser.add_argument('--only_densities', action='store_true', default=False, help='only plot densities')
    parser.add_argument('--no_extrapolation', action='store_true', default=False, help='do not extrapolate the model')
    parser.add_argument('--plot_eta', type=str, choices=['lambda', 'mu', 'sigma'], default=None, help='plot a predictor function')
    parser.add_argument('--plot_errors', action='store_true', default=False, help='plot the errors')
    parser.add_argument('--plot_synth_model', action='store_true', default=False, help='plot density distributions for synthetic data')
    parser.add_argument('--plot_quantile_label', action='store_true', default=False, help='plot labels on the quantile curces')
    parser.add_argument('--plot_donohue', action='store_true', default=False, help='plot the trajectory estimated with Donohue et al.')
    parser.add_argument('--save_plots', action='store_true', default=False, help='save the plots with a default filename')
    parser.add_argument('--plot_file', type=str, default=None, help='filename of the output file')
    args = parser.parse_args()

    data_handler = DataHandler.get_data_handler(method=args.method,
                                                biomarkers=args.biomarkers,
                                                phase=args.phase)
    for biomarker in data_handler.get_biomarker_names():
        plot_model(args, data_handler, biomarker)
示例#6
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('-m',
                        '--method',
                        choices=DataHandler.get_method_choices(),
                        default='all',
                        help='the method to collect data for')
    parser.add_argument('-b',
                        '--biomarkers',
                        nargs='+',
                        default=None,
                        help='name of the biomarker to be plotted')
    parser.add_argument('-p',
                        '--phase',
                        default='mciad',
                        choices=DataHandler.get_phase_choices(),
                        help='the phase for which the model is to be trained')
    parser.add_argument('-e',
                        '--extrapolator',
                        type=str,
                        choices=['lin', 'sqrt', 'exp'],
                        default='exp',
                        help='the type of extrapolator')
    args = parser.parse_args()

    data_handler = DataHandler.get_data_handler(method=args.method,
                                                biomarkers=args.biomarkers,
                                                phase=args.phase)

    biomarkers = data_handler.get_biomarker_names()
    if args.method == 'joint':
        offsets = np.linspace(500, 3000, 26)
    else:
        offsets = np.linspace(-1000, 1000, 21)
    all_diffs = np.zeros((len(offsets), len(biomarkers)))

    for i, biomarker in enumerate(biomarkers):
        diffs = get_model_differences(args, data_handler, biomarker, offsets)
        all_diffs[:, i] = diffs
        print biomarker, offsets[np.argmin(diffs)]

    optimum_index = np.argmin(np.mean(all_diffs, axis=1))

    print 'all', offsets[optimum_index]

    mins = all_diffs[optimum_index, :]  # np.min(all_diffs, axis=0)
    indices = np.argsort(mins)
    for i in indices:
        print biomarkers[i], mins[i]

    fig = plt.figure()
    ax1 = plt.subplot(1, 1, 1)
    ax1.plot(offsets, all_diffs, color='r')
    ax1.plot(offsets, np.mean(all_diffs, axis=1), color='b')
    plt.show()
    plt.close(fig)
示例#7
0
def main():
    parser = argparse.ArgumentParser(description='Estimate model curves for biomarkers using VGAM.')
    parser.add_argument('-m', '--method', choices=DataHandler.get_method_choices(), default='all', help='the method to collect data for')
    parser.add_argument('-b', '--biomarkers', nargs='+', default=None, help='name of the biomarker to be plotted')
    parser.add_argument('-e', '--extrapolator', type=str, choices=['lin', 'sqrt', 'exp'], default='exp', help='the type of extrapolator')
    parser.add_argument('--plot_threshold', type=float, default=0.3, help='the threshold above which praphs are plotted')    
    parser.add_argument('--recompute_errors', action='store_true', help='recompute the matrix containing the fitting errors')
    parser.add_argument('--search_range', nargs=3, default=(1000, 5000, 10), help='the range in which the offset is sought')
    args = parser.parse_args()

    # Get the data files and biomarkers
    data_handler_joint = DataHandler.get_data_handler(method=args.method,
                                                      biomarkers=args.biomarkers,
                                                      phase='joint')
    biomarkers, offsets, errors, descriminativeness, overlap = get_fitting_data(args, data_handler_joint)

    # Plot single biomarker fits
    fig, ax = plt.subplots()
    pt.setup_axes(plt, ax, xgrid=False)
    ax.set_title('Optimal offset between CN/MCI and MCI/AD models')
    ax.set_xlabel('Offset (days)')
    ax.set_ylabel('Fitting error')
    for i, biomarker in enumerate(biomarkers):
        if descriminativeness[i] > args.plot_threshold:
            print log.RESULT, 'Min error for {0} at {1}'.format(biomarker, offsets[np.argmin(errors[i, :])])
            ax.plot(offsets, errors[i, :], label=biomarker, linestyle='--')

    # Get optimal offset
    mean_errors = np.mean(errors, 0)
    weighted_mean_errors = np.dot(errors.T, descriminativeness) / np.sum(descriminativeness)

    # Plot joint fit
    ax.plot(offsets, mean_errors, label='Mean', linewidth=2, color='g')
    ax.plot(offsets, weighted_mean_errors, label='Weighted mean', linewidth=2, color='r')

    # Get and lot optimal offset
    optimal_offset = offsets[np.argmin(mean_errors)]
    optimal_offset_weighted = offsets[np.argmin(weighted_mean_errors)]
    print log.RESULT, 'Optimal threshold: {0}'.format(optimal_offset)
    print log.RESULT, 'Optimal threshold (weighted): {0}'.format(optimal_offset_weighted)
    ax.axvline(optimal_offset, linestyle=':', color='g')
    ax.axvline(optimal_offset_weighted, linestyle=':', color='r')

    # Plot overlap
    ax.axvline(overlap, color='0.15', linestyle=':')

    ax.legend()
    plt.show()
    plt.close(fig)
def classify_converters(args, dpis_conv, dprs_conv, dpis_nonconv,
                        dprs_nonconv):
    print log.INFO, 'Analysing classification accuracies...'
    dpis = np.concatenate((dpis_conv, dpis_nonconv))
    dprs = np.concatenate((dprs_conv, dprs_nonconv))
    labels = np.concatenate(
        (np.ones(len(dpis_conv)), np.zeros(len(dpis_nonconv))))

    # Assemble features
    features = np.zeros((len(dpis), 2))
    features[:, 0] = dpis
    if args.estimate_dprs:
        features[:, 1] = dprs
    else:
        # Copy DPIs as second features as LDA needs two features
        features[:, 1] = dpis
    features = preprocessing.scale(features)

    acc, sens, spec = run_classification(args, features, labels)
    print log.RESULT, '{0}-fold cross validation, converters vs. non-converters ACC={1:.2f}, SENS={2:.2f}, SPEC={3:.2f}'.format(
        args.num_folds, acc, sens, spec)

    if args.latex_file is not None:
        data_handler = DataHandler.get_data_handler(method=args.method,
                                                    biomarkers=args.biomarkers,
                                                    phase=args.phase)
        filename = os.path.join(data_handler.get_eval_folder(),
                                args.latex_file)
        print log.INFO, 'Writing classification results to {0}...'.format(
            filename)
        with open(filename, 'a') as latex_file:
            latex_file.write(
                '{0} & {1} & {2:.2f} & {3:.2f} & {4:.2f}\\\\\n'.format(
                    args.method, len(args.visits), acc, sens, spec))
def classify_converters(args, dpis_conv, dprs_conv, dpis_nonconv, dprs_nonconv):
    print log.INFO, 'Analysing classification accuracies...'
    dpis = np.concatenate((dpis_conv, dpis_nonconv))
    dprs = np.concatenate((dprs_conv, dprs_nonconv))
    labels = np.concatenate((np.ones(len(dpis_conv)), np.zeros(len(dpis_nonconv))))

    # Assemble features
    features = np.zeros((len(dpis), 2))
    features[:, 0] = dpis
    if args.estimate_dprs:
        features[:, 1] = dprs
    else:
        # Copy DPIs as second features as LDA needs two features
        features[:, 1] = dpis
    features = preprocessing.scale(features)

    acc, sens, spec = run_classification(args, features, labels)
    print log.RESULT, '{0}-fold cross validation, converters vs. non-converters ACC={1:.2f}, SENS={2:.2f}, SPEC={3:.2f}'.format(args.num_folds, acc, sens, spec)

    if args.latex_file is not None:
        data_handler = DataHandler.get_data_handler(method=args.method,
                                                    biomarkers=args.biomarkers,
                                                    phase=args.phase)
        filename = os.path.join(data_handler.get_eval_folder(), args.latex_file)
        print log.INFO, 'Writing classification results to {0}...'.format(filename)
        with open(filename, 'a') as latex_file:
            latex_file.write('{0} & {1} & {2:.2f} & {3:.2f} & {4:.2f}\\\\\n'.format(
                             args.method,
                             len(args.visits),
                             acc, sens, spec))
def select_converters(args, rids, diagnoses, dpis, dprs):
    ''' Select data from subjects that convert within 2 years from MCI to AD. '''
    data_handler = DataHandler.get_data_handler(method=args.method)
    measurements = data_handler.get_measurements_as_dict(
        visits=['bl', 'm12', 'm24'],
        no_regression=True,
        select_training_set=True,
        select_complete=True)

    # Select RIDSs of converters
    rids_select = set()
    for rid in measurements:
        if 0.25 <= measurements[rid]['bl']['DX.scan'] <= 0.75 and measurements[
                rid]['m24']['DX.scan'] == 1.0:
            rids_select.add(rid)

    selected_rids = []
    selected_diagnoses = []
    selected_dpis = []
    selected_dprs = []
    for i, rid in enumerate(rids):
        if rid in rids_select:
            selected_rids.append(rid)
            selected_diagnoses.append(diagnoses[i])
            selected_dpis.append(dpis[i])
            selected_dprs.append(dprs[i])

    print log.RESULT, 'Selected {0} converting subjects.'.format(
        len(selected_rids))
    return selected_rids, selected_diagnoses, selected_dpis, selected_dprs
def main():
    # Parse input arguments
    parser = argparse.ArgumentParser()
    parser.add_argument('--estimate_dpr', action='store_true', help='recompute the dpis estimations')
    parser.add_argument('--samples_file', type=str, default='measurements_sample.csv',
                        help='recompute the dpis estimations')
    args = parser.parse_args()

    # Read the measurements as dict from the csv file
    measurements, biomarkers = read_measurements_from_cvs(args.samples_file)
    visits = measurements[0].keys()

    # Get estimates
    data_handler = DataHandler.get_data_handler(method='all',
                                                biomarkers=biomarkers,
                                                phase='joint')

    # Setup model
    model = MultiBiomarkerProgressionModel()
    for biomarker in biomarkers:
        model_file = data_handler.get_model_file(biomarker)
        model.add_model(biomarker, model_file)
    fitter = ModelFitter(model)

    # Estimate dpis (and dprs) and save data
    if args.estimate_dpr:
        rids, diagnoses, dpis, dprs = estimate_dpis_dprs(measurements, visits, fitter, phase='joint')
    else:
        rids, diagnoses, dpis = estimate_dpis(measurements, visits, fitter, phase='joint')
        dprs = np.ones(len(dpis)).tolist()

    # Plot the models with the fitted samples
    for biomarker in biomarkers:
        plot_biomarker(data_handler, biomarker, measurements, dpis[0], dprs[0])
def read_measurements_from_cvs(filename):
    """
    Created a dict from the sample measurements file. For compatibility with the library, the dict has to have the
    { <rid> : { <viscode> : { DX.scan : <diagnosis> }
                            { scantime : <days after bl> }
                            { <biomarker1> : <volume> }
                        ... }
              { <viscode> : ... }}

    :param filename: filename of the *.csv file
    :rtype: dict
    :return: the generated dict with the measurements
    """
    scantime_dict = {'bl': 0, 'm12': 365, 'm24': 730, 'm36': 1095}

    biomarkers = set()
    measurements = {0: {}}
    with open(filename) as csvfile:
        reader = csv.DictReader(csvfile)
        visits = reader.fieldnames[1:]
        for visit in visits:
            measurements[0].update({visit: {'scantime': scantime_dict[visit], 'DX.scan': 'UNKNOWN'}})

        for row in reader:
            biomarker = row['Biomarker Name']
            if biomarker in DataHandler.get_all_biomarker_names():
                for visit in visits:
                    try:
                        measurements[0][visit].update({biomarker: float(row[visit])})
                        biomarkers.add(biomarker)
                    except ValueError:
                        pass

    return measurements, list(biomarkers)
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('-b',
                        '--biomarkers',
                        nargs='+',
                        default=None,
                        help='name of the biomarker to be plotted')
    parser.add_argument('-p',
                        '--phase',
                        default=None,
                        choices=DataHandler.get_phase_choices(),
                        help='the phase for which the model is to be trained')
    parser.add_argument('--predict_biomarker',
                        type=str,
                        default='MMSE',
                        help='the biomarker to predict')
    parser.add_argument('--recompute_estimates',
                        action='store_true',
                        help='recompute the dpi / dpr estimations')
    parser.add_argument('--recompute_predictions',
                        action='store_true',
                        help='recompute the biomarker predictions')
    parser.add_argument('--plot_file',
                        type=str,
                        default=None,
                        help='filename of the output file')
    args = parser.parse_args()

    visits = ['bl', 'm12', 'm24']
    methods = ['cog', 'vol', 'ml', 'img', 'all']
    values = {}
    for method in methods:
        values.update({method: {}})
        _, _, values_observed, values_naive, values_model = \
            et.get_biomarker_predictions(visits, args.predict_biomarker,
                                         method=method,
                                         phase=args.phase,
                                         recompute_estimates=args.recompute_estimates,
                                         recompute_predictions=args.recompute_predictions,
                                         estimate_dprs=False,
                                         exclude_cn=True,
                                         select_test_set=True,
                                         consistent_data=True)
        values[method].update({'observed': values_observed})
        values[method].update({'naive': values_naive})
        values[method].update({'model_dpi': values_model})

        _, _, values_observed, values_naive, values_model = \
            et.get_biomarker_predictions(visits, args.predict_biomarker,
                                         method=method,
                                         phase=args.phase,
                                         recompute_estimates=args.recompute_estimates,
                                         recompute_predictions=args.recompute_predictions,
                                         estimate_dprs=True,
                                         exclude_cn=True,
                                         select_test_set=True,
                                         consistent_data=True)
        values[method].update({'model_dpi_dpr': values_model})

    plot_errors(args, values, methods)
def select_nonconverters(args, rids, diagnoses, dpis, dprs):
    ''' Select data from MCI subjects that do not convert. '''
    data_handler = DataHandler.get_data_handler(method=args.method)
    measurements = data_handler.get_measurements_as_dict(visits=['bl', 'm12', 'm24'],
                                                         no_regression=True,
                                                         select_test_set=True,
                                                         select_complete=True)
    # Select RIDSs of non-converters
    rids_select = set()
    for rid in measurements:
        if 0.25 <= measurements[rid]['bl']['DX.scan'] <= 0.75 and 0.25 <= measurements[rid]['m24']['DX.scan'] <= 0.75:
            rids_select.add(rid)

    selected_rids = []
    selected_diagnoses = []
    selected_dpis = []
    selected_dprs = []
    for i, rid in enumerate(rids):
        if rid in rids_select:
            selected_rids.append(rid)
            selected_diagnoses.append(diagnoses[i])
            selected_dpis.append(dpis[i])
            selected_dprs.append(dprs[i])

    print log.RESULT, 'Selected {0} non-converting subjects.'.format(len(selected_rids))
    return selected_rids, selected_diagnoses, selected_dpis, selected_dprs
def main():
    # Collect data for test
    data_handler = DataHandler.get_data_handler()
    biomarkers = DataHandler.get_all_biomarker_names()

    mean_changes = {}
    for biomarker in biomarkers:
        measurements = data_handler.get_measurements_as_dict(
            visits=['bl', 'm12'], biomarkers=[biomarker], select_complete=True)

        mean_changes_biomarker = {0.0: 0.0, 0.25: 0.0, 0.75: 0.0, 1.0: 0.0}
        num_subjects = {0.0: 0, 0.25: 0, 0.75: 0, 1.0: 0}
        for rid in measurements:
            diagnosis = measurements[rid]['bl']['DX.scan']
            value_bl = measurements[rid]['bl'][biomarker]
            value_y1 = measurements[rid]['m12'][biomarker]
            scantime_bl = measurements[rid]['bl']['scantime']
            scantime_y1 = measurements[rid]['m12']['scantime']

            change = (value_y1 - value_bl) / (scantime_y1 - scantime_bl)

            mean_changes_biomarker[diagnosis] += change
            num_subjects[diagnosis] += 1

        mean_change_mci_ad = mean_changes_biomarker[
            0.25] + mean_changes_biomarker[0.75] + mean_changes_biomarker[1.0]
        num_subjects_mci_ad = num_subjects[0.25] + num_subjects[
            0.75] + num_subjects[1.0]
        for diagnosis in mean_changes_biomarker:
            mean_changes_biomarker[diagnosis] /= num_subjects[diagnosis]
        mean_changes_biomarker.update(
            {0.66: mean_change_mci_ad / num_subjects_mci_ad})

        mean_changes.update({biomarker: mean_changes_biomarker})

        print log.RESULT, '{0} CN:   {1}, (n={2})'.format(
            biomarker, mean_changes_biomarker[0.0], num_subjects[0.0])
        print log.RESULT, '{0} EMCI: {1}, (n={2})'.format(
            biomarker, mean_changes_biomarker[0.25], num_subjects[0.25])
        print log.RESULT, '{0} LMCI: {1}, (n={2})'.format(
            biomarker, mean_changes_biomarker[0.75], num_subjects[0.75])
        print log.RESULT, '{0} AD:   {1}, (n={2})'.format(
            biomarker, mean_changes_biomarker[1.0], num_subjects[1.0])

    mean_changes_file = os.path.join(data_handler.get_eval_folder(),
                                     'mean_changes.p')
    pickle.dump(mean_changes, open(mean_changes_file, 'wb'))
def main():
    parser = argparse.ArgumentParser(
        description='Estimate model curves for biomarkers using VGAM.')
    parser.add_argument('-m',
                        '--method',
                        choices=DataHandler.get_method_choices(),
                        default='all',
                        help='the method to collect data for')
    parser.add_argument('-b',
                        '--biomarkers',
                        nargs='+',
                        default=None,
                        help='name of the biomarker to be plotted')
    parser.add_argument('-p',
                        '--phase',
                        default=None,
                        choices=DataHandler.get_phase_choices(),
                        help='the phase for which the model is to be trained')
    parser.add_argument('-n',
                        '--nr_threads',
                        type=int,
                        default=1,
                        help='number of threads')
    parser.add_argument('--min_visits',
                        type=int,
                        default=0,
                        help='the minimal number of visits')
    parser.add_argument(
        '--no_regression',
        action='store_true',
        default=False,
        help='do not perform age regression of biomarker values')
    parser.add_argument('--recompute_models',
                        action='store_true',
                        help='recompute the models with new samples')
    args = parser.parse_args()

    # Get the data files and biomarkers
    data_handler = DataHandler.get_data_handler(method=args.method,
                                                biomarkers=args.biomarkers,
                                                phase=args.phase)

    # Estimate curves
    # generate_csv_file(args, data_handler)
    # print_gender_statistics(args, data_handler)
    print_terminal_decline_statistics(args, data_handler)
def main():
    parser = argparse.ArgumentParser(description='Estimate model curves for biomarkers using VGAM.')
    parser.add_argument('-m', '--method', choices=DataHandler.get_method_choices(), default='all', help='the method to collect data for')
    parser.add_argument('-b', '--biomarkers', nargs='+', default=None, help='name of the biomarker to be plotted')
    parser.add_argument('-p', '--phase', default=None, choices=DataHandler.get_phase_choices(), help='the phase for which the model is to be trained')
    parser.add_argument('-n', '--nr_threads', type=int, default=1, help='number of threads')
    parser.add_argument('--min_visits', type=int, default=0, help='the minimal number of visits')
    parser.add_argument('--no_regression', action='store_true', default=False, help='do not perform age regression of biomarker values')
    parser.add_argument('--recompute_models', action='store_true', help='recompute the models with new samples')
    args = parser.parse_args()

    # Get the data files and biomarkers
    data_handler = DataHandler.get_data_handler(method=args.method,
                                                biomarkers=args.biomarkers,
                                                phase=args.phase)

    # Estimate curves
    # generate_csv_file(args, data_handler)
    # print_gender_statistics(args, data_handler)
    print_terminal_decline_statistics(args, data_handler)
示例#18
0
	def from_string(cls, s):
		if "#" in s:
			id, charm_list = s.split("#")
		else:
			id = s
			charm_list = s
		gear = []
		for charms in charm_list.split():
			num, charm_id = charms.split("x")
			gear.append((DataHandler.load_charm_from_id(charm_id), int(num)))
		return cls(id, gear)
def print_training_samples_statistics(args, data_handler):
    biomarkers = data_handler.get_biomarker_names()
    measurements = data_handler.get_measurements_as_dict(min_visits=args.min_visits,
                                                         select_training_set=True,
                                                         no_regression=True)
    for biomarker in biomarkers:
        subjects = set()
        num_samples = 0
        for rid, visits in measurements.items():
            for _, visit_data in visits.items():
                try:
                    progress = DataHandler.safe_cast(visit_data['progress'], int)
                    value = DataHandler.safe_cast(visit_data[biomarker], float)
                    if progress is not None and value is not None:
                        subjects.add(rid)
                        num_samples += 1
                except KeyError:
                    pass

        print log.RESULT, 'Biomarker {0}: collected {1} samples from {2} subjects.'.format(biomarker, num_samples, len(subjects))
示例#20
0
def print_to_latex(args, results_naive, results_model, num_subjects):
    data_handler = DataHandler.get_data_handler(method=args.method,
                                                biomarkers=args.biomarkers,
                                                phase=args.phase)
    filename = os.path.join(data_handler.get_eval_folder(), args.latex_file)
    with open(filename, 'a') as latex_file:
        latex_file.write(
            '{0} & {1} {2} & ${3:.2f}\pm{4:.2f}$ & ${5:.2f}$ & ${6:.2f}\pm{7:.2f}$ & ${8:.2f}$ & {9}\\\\\n'
            .format(args.predict_biomarker, args.method, len(args.visits),
                    results_naive['MEAN'], results_naive['STD'],
                    results_naive['CORR'], results_model['MEAN'],
                    results_model['STD'], results_model['CORR'], num_subjects))
def main():
    # Collect data for test
    data_handler = DataHandler.get_data_handler()
    biomarkers = DataHandler.get_all_biomarker_names()

    mean_changes = {}
    for biomarker in biomarkers:
        measurements = data_handler.get_measurements_as_dict(visits=['bl', 'm12'],
                                                             biomarkers=[biomarker],
                                                             select_complete=True)

        mean_changes_biomarker = {0.0: 0.0, 0.25: 0.0, 0.75: 0.0, 1.0: 0.0}
        num_subjects = {0.0: 0, 0.25: 0, 0.75: 0, 1.0: 0}
        for rid in measurements:
            diagnosis = measurements[rid]['bl']['DX.scan']
            value_bl = measurements[rid]['bl'][biomarker]
            value_y1 = measurements[rid]['m12'][biomarker]
            scantime_bl = measurements[rid]['bl']['scantime']
            scantime_y1 = measurements[rid]['m12']['scantime']

            change = (value_y1 - value_bl) / (scantime_y1 - scantime_bl)

            mean_changes_biomarker[diagnosis] += change
            num_subjects[diagnosis] += 1

        mean_change_mci_ad = mean_changes_biomarker[0.25] + mean_changes_biomarker[0.75] + mean_changes_biomarker[1.0]
        num_subjects_mci_ad = num_subjects[0.25] + num_subjects[0.75] + num_subjects[1.0]
        for diagnosis in mean_changes_biomarker:
            mean_changes_biomarker[diagnosis] /= num_subjects[diagnosis]
        mean_changes_biomarker.update({0.66: mean_change_mci_ad / num_subjects_mci_ad})

        mean_changes.update({biomarker: mean_changes_biomarker})

        print log.RESULT, '{0} CN:   {1}, (n={2})'.format(biomarker, mean_changes_biomarker[0.0], num_subjects[0.0])
        print log.RESULT, '{0} EMCI: {1}, (n={2})'.format(biomarker, mean_changes_biomarker[0.25], num_subjects[0.25])
        print log.RESULT, '{0} LMCI: {1}, (n={2})'.format(biomarker, mean_changes_biomarker[0.75], num_subjects[0.75])
        print log.RESULT, '{0} AD:   {1}, (n={2})'.format(biomarker, mean_changes_biomarker[1.0], num_subjects[1.0])

    mean_changes_file = os.path.join(data_handler.get_eval_folder(), 'mean_changes.p')
    pickle.dump(mean_changes, open(mean_changes_file, 'wb'))
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('-m', '--method', choices=DataHandler.get_method_choices(), default='all', help='the method to collect data for')
    parser.add_argument('-b', '--biomarkers', nargs='+', default=None, help='name of the biomarker to be plotted')
    parser.add_argument('-p', '--phase', default='mciad', choices=DataHandler.get_phase_choices(), help='the phase for which the model is to be trained')
    parser.add_argument('-e', '--extrapolator', type=str, choices=['lin', 'sqrt', 'exp'], default='exp', help='the type of extrapolator')
    args = parser.parse_args()

    data_handler = DataHandler.get_data_handler(method=args.method,
                                                biomarkers=args.biomarkers,
                                                phase=args.phase)

    biomarkers = data_handler.get_biomarker_names()
    if args.method == 'joint':
        offsets = np.linspace(500, 3000, 26)
    else:
        offsets = np.linspace(-1000, 1000, 21)
    all_diffs = np.zeros((len(offsets), len(biomarkers)))

    for i, biomarker in enumerate(biomarkers):
        diffs = get_model_differences(args, data_handler, biomarker, offsets)
        all_diffs[:, i] = diffs
        print biomarker, offsets[np.argmin(diffs)]

    optimum_index = np.argmin(np.mean(all_diffs, axis=1))

    print 'all', offsets[optimum_index]

    mins = all_diffs[optimum_index, :]  # np.min(all_diffs, axis=0)
    indices = np.argsort(mins)
    for i in indices:
        print biomarkers[i], mins[i]

    fig = plt.figure()
    ax1 = plt.subplot(1, 1, 1)
    ax1.plot(offsets, all_diffs, color='r')
    ax1.plot(offsets, np.mean(all_diffs, axis=1), color='b')
    plt.show()
    plt.close(fig)
def print_training_samples_statistics(args, data_handler):
    biomarkers = data_handler.get_biomarker_names()
    measurements = data_handler.get_measurements_as_dict(
        min_visits=args.min_visits,
        select_training_set=True,
        no_regression=True)
    for biomarker in biomarkers:
        subjects = set()
        num_samples = 0
        for rid, visits in measurements.items():
            for _, visit_data in visits.items():
                try:
                    progress = DataHandler.safe_cast(visit_data['progress'],
                                                     int)
                    value = DataHandler.safe_cast(visit_data[biomarker], float)
                    if progress is not None and value is not None:
                        subjects.add(rid)
                        num_samples += 1
                except KeyError:
                    pass

        print log.RESULT, 'Biomarker {0}: collected {1} samples from {2} subjects.'.format(
            biomarker, num_samples, len(subjects))
示例#24
0
def test_vs(name1, gear1, name2, gear2):
	battlelog.log_close()
	player1_win_cnt = 0
	player2_win_cnt = 0
	for cnt in xrange(simulating_times):
		if cnt == 0:
			battlelog.log_open()
		else:
			battlelog.log_close()
		player1 = DataHandler.load_player_from_id(name1)
		player2 = DataHandler.load_player_from_id(name2)
		player1.import_gear(Gear.from_string(gear1))
		player2.import_gear(Gear.from_string(gear2))
		attackers = [player1]
		defenders = [player2]
		env = Envioronment(attackers, defenders)
		result = env.start()
		if result == EnvioronmentType.Win:
			player1_win_cnt += 1
		else:
			player2_win_cnt += 1
	battlelog.log_open()
	battlelog.log("%s vs %s, total %d times: %s win probability = %f%%, %s win probability = %f%%\n" %(player1.name, player2.name, simulating_times, player1.name, float(player1_win_cnt*100)/simulating_times, player2.name, float(player2_win_cnt*100)/simulating_times))
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('-m', '--method', choices=DataHandler.get_method_choices(), default='all', help='the method to collect data for')
    parser.add_argument('-b', '--biomarkers', nargs='+', default=None, help='name of the biomarker to be plotted')
    parser.add_argument('-p', '--phase', default=None, choices=DataHandler.get_phase_choices(), help='the phase for which the model is to be trained')
    parser.add_argument('-n', '--nr_threads', type=int, default=4, help='number of threads')
    parser.add_argument('--recompute_metric', action='store_true', help='recompute the metric')
    parser.add_argument('--value_samples', type=int, default=100, help='the number of values samples')
    parser.add_argument('--progress_samples', type=int, default=50, help='the number of progress samples')
    parser.add_argument('--quantiles', type=float, nargs=2, default=[0.01, 0.99], help='the quantiles for the interval computation')
    parser.add_argument('--metric', type=str, default='cover', help='the metric used for the evaluation')
    args = parser.parse_args()

    # Collect data for test
    data_handler = DataHandler.get_data_handler(method=args.method,
                                                biomarkers=args.biomarkers,
                                                phase=args.phase)

    # Compute error for each biomarker
    biomarkers = data_handler.get_biomarker_names()
    evaluation_function = evaluate_biomarker_cover if args.metric == 'cover' else evaluate_biomarker_disc
    jl.Parallel(n_jobs=args.nr_threads)(jl.delayed(evaluation_function)(args, data_handler, biomarker) for biomarker in biomarkers)

    sort_biomarkers(args, data_handler, biomarkers)
示例#26
0
def analyse_decline(args, rids, dpis, dprs, rds, non_rds):
    print log.INFO, 'Analysing classification accuracies...'
    # dpis = np.array(dpis)
    # dprs = np.array(dprs)
    # labels = np.array([1 if rid in rds else 0 for rid in rids])
    dpis_rds = []
    dpis_nonrds = []
    dprs_rds = []
    dprs_nonrds = []
    for rid, dpi, dpr in zip(rids, dpis, dprs):
        if rid in rds:
            dpis_rds.append(dpi)
            dprs_rds.append(dpr)
        elif rid in non_rds:
            dpis_nonrds.append(dpi)
            dprs_nonrds.append(dpr)

    dpis = np.concatenate((dpis_rds, dpis_nonrds))
    dprs = np.concatenate((dprs_rds, dprs_nonrds))
    labels = np.concatenate(
        (np.ones(len(dpis_rds)), np.zeros(len(dpis_nonrds))))

    # Assemble features
    features = np.zeros((len(dpis), 2))
    features[:, 0] = dpis
    if args.estimate_dprs:
        features[:, 1] = dprs
    else:
        # Copy DPIs as second features as LDA needs two features
        features[:, 1] = dpis
    features = preprocessing.scale(features)

    acc, sens, spec = run_classification(args, features, labels)
    print log.RESULT, '{0}-fold cross validation, RD vs. non-RD ACC={1:.2f}, SENS={2:.2f}, SPEC={3:.2f}'.format(
        args.num_folds, acc, sens, spec)

    if args.latex_file is not None:
        data_handler = DataHandler.get_data_handler(method=args.method,
                                                    biomarkers=args.biomarkers,
                                                    phase=args.phase)
        filename = os.path.join(data_handler.get_eval_folder(),
                                args.latex_file)
        print log.INFO, 'Writing classification results to {0}...'.format(
            filename)
        with open(filename, 'a') as latex_file:
            latex_file.write(
                '{0} & {1} & {2:.2f} & {3:.2f} & {4:.2f}\\\\\n'.format(
                    args.method, len(args.visits), acc, sens, spec))
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('-b', '--biomarkers', nargs=2, default=['D1', 'D2'], help='name of the biomarker to be plotted')
    parser.add_argument('--plot_file', type=str, default=None, help='filename of the output file')
    args = parser.parse_args()

    # Collect data for test
    data_handler = DataHandler.get_data_handler(biomarkers=args.biomarkers)
    biomarkers = data_handler.get_biomarker_names()
    measurements = data_handler.get_measurements_as_dict(biomarkers=biomarkers,
                                                         select_complete=True)

    # Collect biomarker values
    biomarkers_1 = []
    biomarkers_2 = []
    diagnoses = []
    for rid in measurements:
        for visit in measurements[rid]:
            biomarkers_1.append(measurements[rid][visit][biomarkers[0]])
            biomarkers_2.append(measurements[rid][visit][biomarkers[1]])
            diagnoses.append(measurements[rid][visit]['DX.scan'])
    diagnoses = np.array(diagnoses)
    diagnoses[(0.25 <= diagnoses) & (diagnoses <= 0.75)] = 0.5

    # Setup plot
    fig, ax = plt.subplots()
    pt.setup_axes(plt, ax)
    ax.scatter(biomarkers_1, biomarkers_2, s=15.0, c=diagnoses, edgecolor='none',
               vmin=0.0, vmax=1.0, cmap=pt.progression_cmap, alpha=0.25)
    ax.set_xlabel(biomarkers[0])
    ax.set_ylabel(biomarkers[1])

    # Plot legend
    rects = [mpl.patches.Rectangle((0, 0), 1, 1, fc=pt.color_cn + (0.25,), linewidth=0),
             mpl.patches.Rectangle((0, 0), 1, 1, fc=pt.color_mci + (0.25,), linewidth=0),
             mpl.patches.Rectangle((0, 0), 1, 1, fc=pt.color_ad + (0.25,), linewidth=0)]
    labels = ['CN', 'MCI', 'AD']
    legend = ax.legend(rects, labels, fontsize=10, ncol=len(rects), loc='upper center', framealpha=0.9)
    legend.get_frame().set_edgecolor((0.6, 0.6, 0.6))

    # Draw or save the plot
    plt.tight_layout()
    if args.plot_file is not None:
        plt.savefig(args.plot_file, transparent=True)
    else:
        plt.show()
    plt.close(fig)
def print_to_latex(args, results_naive, results_model, num_subjects):
    data_handler = DataHandler.get_data_handler(method=args.method,
                                                biomarkers=args.biomarkers,
                                                phase=args.phase)
    filename = os.path.join(data_handler.get_eval_folder(), args.latex_file)
    with open(filename, 'a') as latex_file:
        latex_file.write('{0} & {1} {2} & ${3:.2f}\pm{4:.2f}$ & ${5:.2f}$ & ${6:.2f}\pm{7:.2f}$ & ${8:.2f}$ & {9}\\\\\n'.format(
                         args.predict_biomarker,
                         args.method,
                         len(args.visits),
                         results_naive['MEAN'],
                         results_naive['STD'],
                         results_naive['CORR'],
                         results_model['MEAN'],
                         results_model['STD'],
                         results_model['CORR'],
                         num_subjects))
def analyse_decline(args, rids, dpis, dprs, rds, non_rds):
    print log.INFO, 'Analysing classification accuracies...'
    # dpis = np.array(dpis)
    # dprs = np.array(dprs)
    # labels = np.array([1 if rid in rds else 0 for rid in rids])
    dpis_rds = []
    dpis_nonrds = []
    dprs_rds = []
    dprs_nonrds = []
    for rid, dpi, dpr in zip(rids, dpis, dprs):
        if rid in rds:
            dpis_rds.append(dpi)
            dprs_rds.append(dpr)
        elif rid in non_rds:
            dpis_nonrds.append(dpi)
            dprs_nonrds.append(dpr)

    dpis = np.concatenate((dpis_rds, dpis_nonrds))
    dprs = np.concatenate((dprs_rds, dprs_nonrds))
    labels = np.concatenate((np.ones(len(dpis_rds)), np.zeros(len(dpis_nonrds))))

    # Assemble features
    features = np.zeros((len(dpis), 2))
    features[:, 0] = dpis
    if args.estimate_dprs:
        features[:, 1] = dprs
    else:
        # Copy DPIs as second features as LDA needs two features
        features[:, 1] = dpis
    features = preprocessing.scale(features)

    acc, sens, spec = run_classification(args, features, labels)
    print log.RESULT, '{0}-fold cross validation, RD vs. non-RD ACC={1:.2f}, SENS={2:.2f}, SPEC={3:.2f}'.format(args.num_folds, acc, sens, spec)

    if args.latex_file is not None:
        data_handler = DataHandler.get_data_handler(method=args.method,
                                                    biomarkers=args.biomarkers,
                                                    phase=args.phase)
        filename = os.path.join(data_handler.get_eval_folder(), args.latex_file)
        print log.INFO, 'Writing classification results to {0}...'.format(filename)
        with open(filename, 'a') as latex_file:
            latex_file.write('{0} & {1} & {2:.2f} & {3:.2f} & {4:.2f}\\\\\n'.format(
                             args.method,
                             len(args.visits),
                             acc, sens, spec))
def main():
    # Parse input arguments
    parser = argparse.ArgumentParser()
    parser.add_argument('--estimate_dpr',
                        action='store_true',
                        help='recompute the dpis estimations')
    parser.add_argument('--samples_file',
                        type=str,
                        default='measurements_sample.csv',
                        help='recompute the dpis estimations')
    args = parser.parse_args()

    # Read the measurements as dict from the csv file
    measurements, biomarkers = read_measurements_from_cvs(args.samples_file)
    visits = measurements[0].keys()

    # Get estimates
    data_handler = DataHandler.get_data_handler(method='all',
                                                biomarkers=biomarkers,
                                                phase='joint')

    # Setup model
    model = MultiBiomarkerProgressionModel()
    for biomarker in biomarkers:
        model_file = data_handler.get_model_file(biomarker)
        model.add_model(biomarker, model_file)
    fitter = ModelFitter(model)

    # Estimate dpis (and dprs) and save data
    if args.estimate_dpr:
        rids, diagnoses, dpis, dprs = estimate_dpis_dprs(measurements,
                                                         visits,
                                                         fitter,
                                                         phase='joint')
    else:
        rids, diagnoses, dpis = estimate_dpis(measurements,
                                              visits,
                                              fitter,
                                              phase='joint')
        dprs = np.ones(len(dpis)).tolist()

    # Plot the models with the fitted samples
    for biomarker in biomarkers:
        plot_biomarker(data_handler, biomarker, measurements, dpis[0], dprs[0])
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('-b', '--biomarkers', nargs='+', default=None, help='name of the biomarker to be plotted')
    parser.add_argument('-p', '--phase', default=None, choices=DataHandler.get_phase_choices(), help='the phase for which the model is to be trained')
    parser.add_argument('--predict_biomarker', type=str, default='MMSE', help='the biomarker to predict')
    parser.add_argument('--recompute_estimates', action='store_true', help='recompute the dpi / dpr estimations')
    parser.add_argument('--recompute_predictions', action='store_true', help='recompute the biomarker predictions')
    parser.add_argument('--plot_file', type=str, default=None, help='filename of the output file')
    args = parser.parse_args()

    visits = ['bl', 'm12', 'm24']
    methods = ['cog', 'vol', 'ml', 'img', 'all']
    values = {}
    for method in methods:
        values.update({method: {}})
        _, _, values_observed, values_naive, values_model = \
            et.get_biomarker_predictions(visits, args.predict_biomarker,
                                         method=method,
                                         phase=args.phase,
                                         recompute_estimates=args.recompute_estimates,
                                         recompute_predictions=args.recompute_predictions,
                                         estimate_dprs=False,
                                         exclude_cn=True,
                                         select_test_set=True,
                                         consistent_data=True)
        values[method].update({'observed': values_observed})
        values[method].update({'naive': values_naive})
        values[method].update({'model_dpi': values_model})

        _, _, values_observed, values_naive, values_model = \
            et.get_biomarker_predictions(visits, args.predict_biomarker,
                                         method=method,
                                         phase=args.phase,
                                         recompute_estimates=args.recompute_estimates,
                                         recompute_predictions=args.recompute_predictions,
                                         estimate_dprs=True,
                                         exclude_cn=True,
                                         select_test_set=True,
                                         consistent_data=True)
        values[method].update({'model_dpi_dpr': values_model})

    plot_errors(args, values, methods)
def read_measurements_from_cvs(filename):
    """
    Created a dict from the sample measurements file. For compatibility with the library, the dict has to have the
    { <rid> : { <viscode> : { DX.scan : <diagnosis> }
                            { scantime : <days after bl> }
                            { <biomarker1> : <volume> }
                        ... }
              { <viscode> : ... }}

    :param filename: filename of the *.csv file
    :rtype: dict
    :return: the generated dict with the measurements
    """
    scantime_dict = {'bl': 0, 'm12': 365, 'm24': 730, 'm36': 1095}

    biomarkers = set()
    measurements = {0: {}}
    with open(filename) as csvfile:
        reader = csv.DictReader(csvfile)
        visits = reader.fieldnames[1:]
        for visit in visits:
            measurements[0].update({
                visit: {
                    'scantime': scantime_dict[visit],
                    'DX.scan': 'UNKNOWN'
                }
            })

        for row in reader:
            biomarker = row['Biomarker Name']
            if biomarker in DataHandler.get_all_biomarker_names():
                for visit in visits:
                    try:
                        measurements[0][visit].update(
                            {biomarker: float(row[visit])})
                        biomarkers.add(biomarker)
                    except ValueError:
                        pass

    return measurements, list(biomarkers)
def get_rfds(args, rids, diagnoses, dpis, dprs):
    data_handler = DataHandler.get_data_handler()
    measurements = data_handler.get_measurements_as_dict(
        visits=['bl', 'm24'],
        biomarkers=['FAQ'],
        select_complete=True,
        no_regression=True)

    rfds = set()
    non_rfds = set()
    for rid in rids:
        if rid in measurements:
            faq_bl = measurements[rid]['bl']['FAQ']
            faq_m24 = measurements[rid]['m24']['FAQ']
            rcd = (faq_m24 - faq_bl) >= 10
            if rcd:
                rfds.add(rid)
            else:
                non_rfds.add(rid)

    print log.RESULT, 'Selected {0} subjects with rapid functional decline (RFD).'.format(len(rfds))
    print log.RESULT, 'Selected {0} subjects without rapid functional decline (non-RFD).'.format(len(non_rfds))
    return rfds, non_rfds
def get_rcds(args, rids, diagnoses, dpis, dprs):
    data_handler = DataHandler.get_data_handler()
    measurements = data_handler.get_measurements_as_dict(
        visits=['bl', 'm24'],
        biomarkers=['MMSE'],
        select_complete=True,
        no_regression=True)

    rcds = set()
    non_rcds = set()
    for rid in rids:
        if rid in measurements:
            mmse_bl = measurements[rid]['bl']['MMSE']
            mmse_m24 = measurements[rid]['m24']['MMSE']
            rcd = (mmse_bl - mmse_m24) >= 8
            if rcd:
                rcds.add(rid)
            else:
                non_rcds.add(rid)

    print log.RESULT, 'Selected {0} subjects with rapid cognitive decline (RCD).'.format(len(rcds))
    print log.RESULT, 'Selected {0} subjects without rapid cognitive decline (non-RCD).'.format(len(non_rcds))
    return rcds, non_rcds
示例#35
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('-p', '--phase', default=None, choices=DataHandler.get_phase_choices(), help='the phase for which the model is to be trained')
    parser.add_argument('--consistent_data', action='store_true', help='us only subjects with bl, m12 and m24 visits')
    parser.add_argument('--estimate_dprs', action='store_true', help='estimate dpis and dprs')
    parser.add_argument('--recompute_estimates', action='store_true', help='recompute the dpi / dpr estimations')
    parser.add_argument('--recompute_predictions', action='store_true', help='recompute the biomarker predictions')
    parser.add_argument('--exclude_cn', action='store_true', help='exclude healthy subjects from analysis')
    args = parser.parse_args()

    estimates = {}
    methods = ['cog', 'vol', 'ml', 'img', 'all']
    for method in methods:
        estimates.update({method: {}})
        for visits in [['bl'], ['m12'], ['m24'], ['bl', 'm12'], ['m12', 'm24']]:
            _, diagnoses, dpis, _, _, _ = et.get_progress_estimates(visits,
                                                                    method=method,
                                                                    phase=args.phase,
                                                                    estimate_dprs=args.estimate_dprs)

            diagnoses = np.array(diagnoses)
            dpis = np.array(dpis)
            visits_string = '_'.join(visits)
            estimates[method].update({visits_string: {}})
            estimates[method][visits_string].update({'CN': np.mean(dpis[np.where(diagnoses == 0.0)])})
            estimates[method][visits_string].update({'EMCI': np.mean(dpis[np.where(diagnoses == 0.25)])})
            estimates[method][visits_string].update({'LMCI': np.mean(dpis[np.where(diagnoses == 0.75)])})
            estimates[method][visits_string].update({'AD': np.mean(dpis[np.where(diagnoses == 1.0)])})

    for method in methods:
        print log.INFO, 'Results for {0}'.format(method)
        for diagnosis in ['CN', 'EMCI', 'LMCI', 'AD']:
            print log.RESULT, '{0: <4}:   {1:.2f} {2:.2f} | {3:.2f}  '.format(
                              diagnosis,
                              estimates[method]['m12'][diagnosis] - estimates[method]['bl'][diagnosis],
                              estimates[method]['m24'][diagnosis] - estimates[method]['m12'][diagnosis],
                              estimates[method]['m12_m24'][diagnosis] - estimates[method]['bl_m12'][diagnosis])
示例#36
0
def get_rfds(args, rids, diagnoses, dpis, dprs):
    data_handler = DataHandler.get_data_handler()
    measurements = data_handler.get_measurements_as_dict(visits=['bl', 'm24'],
                                                         biomarkers=['FAQ'],
                                                         select_complete=True,
                                                         no_regression=True)

    rfds = set()
    non_rfds = set()
    for rid in rids:
        if rid in measurements:
            faq_bl = measurements[rid]['bl']['FAQ']
            faq_m24 = measurements[rid]['m24']['FAQ']
            rcd = (faq_m24 - faq_bl) >= 10
            if rcd:
                rfds.add(rid)
            else:
                non_rfds.add(rid)

    print log.RESULT, 'Selected {0} subjects with rapid functional decline (RFD).'.format(
        len(rfds))
    print log.RESULT, 'Selected {0} subjects without rapid functional decline (non-RFD).'.format(
        len(non_rfds))
    return rfds, non_rfds
示例#37
0
def get_rcds(args, rids, diagnoses, dpis, dprs):
    data_handler = DataHandler.get_data_handler()
    measurements = data_handler.get_measurements_as_dict(visits=['bl', 'm24'],
                                                         biomarkers=['MMSE'],
                                                         select_complete=True,
                                                         no_regression=True)

    rcds = set()
    non_rcds = set()
    for rid in rids:
        if rid in measurements:
            mmse_bl = measurements[rid]['bl']['MMSE']
            mmse_m24 = measurements[rid]['m24']['MMSE']
            rcd = (mmse_bl - mmse_m24) >= 8
            if rcd:
                rcds.add(rid)
            else:
                non_rcds.add(rid)

    print log.RESULT, 'Selected {0} subjects with rapid cognitive decline (RCD).'.format(
        len(rcds))
    print log.RESULT, 'Selected {0} subjects without rapid cognitive decline (non-RCD).'.format(
        len(non_rcds))
    return rcds, non_rcds
示例#38
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('visits',
                        nargs='+',
                        type=str,
                        help='the viscodes of the visits that are available')
    parser.add_argument('-m',
                        '--method',
                        choices=DataHandler.get_method_choices(),
                        default='all',
                        help='the method to collect data for')
    parser.add_argument('-b',
                        '--biomarkers',
                        nargs='+',
                        default=None,
                        help='name of the biomarker to be plotted')
    parser.add_argument('-p',
                        '--phase',
                        default=None,
                        choices=DataHandler.get_phase_choices(),
                        help='the phase for which the model is to be trained')
    parser.add_argument('--predict_biomarker',
                        type=str,
                        default='MMSE',
                        help='the biomarker to predict')
    parser.add_argument('--recompute_estimates',
                        action='store_true',
                        help='recompute the dpi / dpr estimations')
    parser.add_argument('--recompute_predictions',
                        action='store_true',
                        help='recompute the biomarker predictions')
    parser.add_argument('--estimate_dprs',
                        action='store_true',
                        help='estimate dpis and dprs')
    parser.add_argument('--consistent_data',
                        action='store_true',
                        help='use only subjects with bl, m12 and m24 visits')
    parser.add_argument('--exclude_cn',
                        action='store_true',
                        help='exclude healthy subjects from analysis')
    parser.add_argument('--use_last_visit',
                        action='store_true',
                        help='use only the last visit for prediction')
    parser.add_argument('--naive_use_diagnosis',
                        action='store_true',
                        help='use the specific mean change for the diagnosis')
    parser.add_argument('--no_plot',
                        action='store_true',
                        help='do not plot the results')
    parser.add_argument('--plot_file',
                        type=str,
                        default=None,
                        help='filename of the output file')
    parser.add_argument('--latex_file',
                        type=str,
                        default=None,
                        help='add output to a LaTeX file')
    args = parser.parse_args()

    _, diagnoses, values_observed, values_naive, values_model = \
        et.get_biomarker_predictions(args.visits, args.predict_biomarker,
                                     method=args.method,
                                     biomarkers=args.biomarkers,
                                     phase=args.phase,
                                     recompute_estimates=args.recompute_estimates,
                                     recompute_predictions=args.recompute_predictions,
                                     estimate_dprs=args.estimate_dprs,
                                     select_test_set=True,
                                     consistent_data=args.consistent_data,
                                     exclude_cn=args.exclude_cn,
                                     use_last_visit=args.use_last_visit,
                                     naive_use_diagnosis=args.naive_use_diagnosis)
    if not args.no_plot:
        plot_biomarker_predictions(args, diagnoses, values_observed,
                                   values_model)
    analyse_biomarker_predictions(args, diagnoses, values_observed,
                                  values_naive, values_model)
	def test_add_gear(gear):
		gears = DataHandler.load_gears()
		if gear.id not in gears:
			gears[gear.id] = gear.to_json_obj()
		DataHandler.save_gears(gears)
示例#40
0
def get_fitting_data(args, data_handler_joint):
    biomarkers = data_handler_joint.get_biomarker_names()
    offsets = range(args.search_range[0], args.search_range[1], args.search_range[2])
    errors_file = os.path.join(data_handler_joint.get_eval_folder(),
                               'offset_errors_{0}.p'.format(args.extrapolator))
    if os.path.isfile(errors_file) and not args.recompute_errors:
        print log.INFO, 'Reading errors estimations from file {0}...'.format(errors_file)
        (errors, descriminativeness, overlap) = pickle.load(open(errors_file, 'rb'))
    else:
        data_handler_1 = DataHandler.get_data_handler(method=args.method,
                                                      biomarkers=args.biomarkers,
                                                      phase='cnmci')
        data_handler_2 = DataHandler.get_data_handler(method=args.method,
                                                      biomarkers=args.biomarkers,
                                                      phase='mciad')

        errors = np.zeros((len(biomarkers), len(offsets)))
        descriminativeness = np.zeros(len(biomarkers))
        overlap = []
        for i, biomarker in enumerate(biomarkers):
            # Get error matrix for all biomarkers and offsets
            model_file_1 = data_handler_1.get_model_file(biomarker)
            model_file_2 = data_handler_2.get_model_file(biomarker)
            if os.path.isfile(model_file_1) and os.path.isfile(model_file_2):
                print log.INFO, 'Analysing {0}...'.format(biomarker)

                # Get discriminativeness for all biomarkers as a scaling factor
                eval_file_1 = model_file_1.replace('.csv', '_eval_cover.csv')
                eval_file_2 = model_file_2.replace('.csv', '_eval_cover.csv')
                if os.path.isfile(eval_file_1) and os.path.isfile(eval_file_2):
                    descriminate_1 = np.mean(mlab.csv2rec(eval_file_1)['error'])
                    descriminate_2 = np.mean(mlab.csv2rec(eval_file_2)['error'])
                    descriminativeness[i] = 0.5 * (descriminate_1 + descriminate_2)
                else:
                    print log.WARNING, 'Evaluation file missing for {0}'.format(biomarker)
                    continue

                # Initialise models
                model_1 = ProgressionModel(biomarker, model_file_1, extrapolator=args.extrapolator)
                model_2 = ProgressionModel(biomarker, model_file_2, extrapolator=args.extrapolator)

                # Assemble errors for each offset
                min_val_1, max_val_1 = model_1.get_value_range([0.1, 0.9])
                min_val_2, max_val_2 = model_2.get_value_range([0.1, 0.9])
                values = np.linspace(min(min_val_1, min_val_2), max(max_val_1, max_val_2), 250)
                values_delta = (values.max() - values.min()) / len(values)
                for j, offset in enumerate(offsets):
                    dens_11 = np.array(model_1.get_density_distribution(values, offset + model_2.min_progress))
                    dens_12 = np.array(model_2.get_density_distribution(values, model_2.min_progress))

                    dens_21 = np.array(model_1.get_density_distribution(values, model_1.max_progress))
                    dens_22 = np.array(model_2.get_density_distribution(values, -offset + model_1.max_progress))

                    errors[i, j] = 0.5 * values_delta * (np.sum(np.abs(dens_11 - dens_12)) + np.sum(np.abs(dens_21 - dens_22)))

                # Get overlap
                overlap.append(model_1.max_progress - model_2.min_progress)

        overlap = np.mean(overlap)
        print log.INFO, 'Saving errors to file {0}...'.format(errors_file)
        pickle.dump((errors, descriminativeness, overlap), open(errors_file, 'wb'))

    return biomarkers, offsets, errors, descriminativeness, overlap
def get_biomarker_predictions(visits,
                              predict_biomarker,
                              method=None,
                              biomarkers=None,
                              phase=None,
                              recompute_estimates=False,
                              recompute_predictions=False,
                              estimate_dprs=False,
                              select_test_set=False,
                              consistent_data=False,
                              exclude_cn=False,
                              use_last_visit=False,
                              naive_use_diagnosis=False):

    # Get prediction file
    data_handler = DataHandler.get_data_handler(method=method,
                                                biomarkers=biomarkers,
                                                phase=phase)
    predict_biomarker_str = predict_biomarker.replace(' ', '_')
    predict_file_trunk = 'predict_{0}_with_dpr_{1}_{2}{3}.p' if estimate_dprs else 'predict_{0}_with_{1}_{2}{3}.p'
    if biomarkers is None:
        predict_file_basename = predict_file_trunk.format(
            predict_biomarker_str, method, '_'.join(visits),
            '_last' if use_last_visit else '')
    else:
        estimate_biomarkers_string = '_'.join(biomarkers).replace(' ', '_')
        predict_file_basename = predict_file_trunk.format(
            predict_biomarker_str, estimate_biomarkers_string,
            '_'.join(visits), '_last' if use_last_visit else '')
    prediction_file = os.path.join(data_handler.get_eval_folder(),
                                   predict_file_basename)

    # Read if predictions exist, else recompute
    if os.path.isfile(prediction_file) and not recompute_predictions:
        # Read biomarker predictions from file
        print log.INFO, 'Reading {0} predictions from {1}...'.format(
            predict_biomarker, prediction_file)
        (rids, diagnoses, values_observed, values_naive,
         values_model) = pickle.load(open(prediction_file, 'rb'))
    else:
        predict_visit = get_predicted_visit(visits)
        print log.INFO, 'Predicting {0} at {1}...'.format(
            predict_biomarker, predict_visit)

        # Get mean changes from file
        mean_changes_file = os.path.join(data_handler.get_eval_folder(),
                                         'mean_changes.p')
        if not os.path.isfile(mean_changes_file):
            print log.ERROR, 'Mean changes unknown, run misc/compute_mean_biomarker_changes.py first!'
        mean_changes = pickle.load(open(mean_changes_file, 'rb'))

        # Get DPI estimates
        rids_all, diagnoses_all, dpis, dprs, _, _ = get_progress_estimates(
            visits,
            method=method,
            biomarkers=biomarkers,
            phase=phase,
            recompute_estimates=recompute_estimates,
            estimate_dprs=estimate_dprs,
            select_test_set=select_test_set,
            consistent_data=consistent_data)

        # Collect biomarker data for test
        measurements = data_handler.get_measurements_as_dict(
            visits=visits + [predict_visit],
            biomarkers=[predict_biomarker],
            select_test_set=select_test_set,
            select_complete=True)
        model = ProgressionModel(
            predict_biomarker, data_handler.get_model_file(predict_biomarker))

        print log.INFO, 'Predicting {0} for {1}'.format(
            predict_biomarker, predict_visit)
        rids = []
        diagnoses = []
        values_observed = []
        values_model = []
        values_naive = []
        for rid, diagnosis, dpi, dpr in zip(rids_all, diagnoses_all, dpis,
                                            dprs):
            if rid in measurements:
                # Get real biomarker value value at next visit
                scantime_first_visit = measurements[rid][visits[0]]['scantime']
                scantime_next_visit = measurements[rid][predict_visit][
                    'scantime']
                progress_next_visit = ModelFitter.scantime_to_progress(
                    scantime_next_visit, scantime_first_visit, dpi, dpr)
                value_observed = measurements[rid][predict_visit][
                    predict_biomarker]
                values_observed.append(value_observed)

                # Predict biomarker value value at next visit
                if use_last_visit:
                    value = measurements[rid][visits[-1]][predict_biomarker]
                    scantime = measurements[rid][visits[-1]]['scantime']
                    progress = ModelFitter.scantime_to_progress(
                        scantime, scantime_first_visit, dpi, dpr)
                    mean_quantile = model.approximate_quantile(progress, value)
                else:
                    mean_quantile = 0.0
                    for visit in visits:
                        value = measurements[rid][visit][predict_biomarker]
                        scantime = measurements[rid][visit]['scantime']
                        progress = ModelFitter.scantime_to_progress(
                            scantime, scantime_first_visit, dpi, dpr)
                        mean_quantile += model.approximate_quantile(
                            progress, value)
                    mean_quantile /= len(visits)

                value_model = model.get_value_at_quantile(
                    progress_next_visit, mean_quantile)
                values_model.append(value_model)

                # Predict biomarker value naively
                if naive_use_diagnosis:
                    mean_change = mean_changes[predict_biomarker][diagnosis]
                else:
                    mean_change = mean_changes[predict_biomarker][0.66]

                if use_last_visit:
                    x = measurements[rid][visits[-1]]['scantime']
                    y = measurements[rid][visits[-1]][predict_biomarker]
                    intercept = -(mean_change * x - y)
                else:
                    x = np.zeros(len(visits))
                    y = np.zeros(len(visits))
                    for i, visit in enumerate(visits):
                        x[i] = measurements[rid][visit]['scantime']
                        y[i] = measurements[rid][visit][predict_biomarker]
                    intercept = -np.sum(mean_change * x - y) / len(x)

                value_naive = intercept + mean_change * measurements[rid][
                    predict_visit]['scantime']
                values_naive.append(value_naive)

                # Plot estimates
                plot = True
                if plot and diagnosis > 0.0 and dpr > 0.0:
                    plot_predictions(predict_biomarker, model, visits,
                                     measurements[rid], dpi, dpr, value_model,
                                     value_naive, mean_quantile, mean_change,
                                     intercept, rid)

                # Append rid and diagnosis
                rids.append(rid)
                diagnoses.append(diagnosis)

                # Print result
                print log.RESULT, '{0} for subject {1}: Observed: {2}, Naive {3}, Model: {4}'.format(
                    predict_biomarker, rid, value_observed, value_naive,
                    value_model)

        # Save results
        print log.INFO, 'Saving {0} predictions to {1}...'.format(
            predict_biomarker, prediction_file)
        pickle.dump(
            (rids, diagnoses, values_observed, values_naive, values_model),
            open(prediction_file, 'wb'))

    rids = np.array(rids)
    diagnoses = np.array(diagnoses)
    values_observed = np.array(values_observed)
    values_naive = np.array(values_naive)
    values_model = np.array(values_model)

    # Exclude healthy subjects
    if exclude_cn:
        indices = np.where(diagnoses > 0.25)
        rids = rids[indices]
        diagnoses = diagnoses[indices]
        values_observed = values_observed[indices]
        values_naive = values_naive[indices]
        values_model = values_model[indices]

    return rids, diagnoses, values_observed, values_naive, values_model
示例#42
0
def plot_model(args, data_handler, biomarker):
    model_file = data_handler.get_model_file(biomarker)
    if not os.path.isfile(model_file):
        print log.ERROR, 'Model file not found: {0}'.format(model_file)
        return

    print log.INFO, 'Generating plot for {0}...'.format(biomarker)
    plot_synth_model = args.plot_synth_model and biomarker in SynthModel.get_biomarker_names(
    )

    #
    # Read model
    #
    pm = ProgressionModel(biomarker,
                          model_file,
                          extrapolator=args.extrapolator)
    progress_extrapolate = 0.3 * (pm.max_progress - pm.min_progress)
    min_progress_extrapolate = int(pm.min_progress - progress_extrapolate)
    max_progress_extrapolate = int(pm.max_progress + progress_extrapolate)
    progress_linspace_ex1 = np.linspace(min_progress_extrapolate,
                                        pm.min_progress, 20)
    progress_linspace_int = np.linspace(pm.min_progress, pm.max_progress, 60)
    progress_linspace_ex2 = np.linspace(pm.max_progress,
                                        max_progress_extrapolate, 20)

    # Calc min and max val in interval between 1% and 99% percentie
    min_val, max_val = pm.get_value_range([0.1, 0.9])
    #     progress_linspace = np.linspace(min_progress_extrapolate, max_progress_extrapolate, 100)
    #     min_val = float('inf')
    #     max_val = float('-inf')
    #     for quantile in [0.1, 0.9]:
    #         curve = pm.get_quantile_curve(progress_linspace, quantile)
    #         min_val = min(min_val, np.min(curve))
    #         max_val = max(max_val, np.max(curve))

    #
    # Setup plot
    #
    biomarker_string = pt.get_biomarker_string(biomarker)
    figure_width = 6 if args.no_densities or args.only_densities else 12
    fig = plt.figure(figsize=(figure_width, 5))
    if args.only_densities:
        ax1 = None
        ax2 = plt.subplot(1, 1, 1)
        pt.setup_axes(plt, ax2, xgrid=False, ygrid=False)
    elif args.no_densities:
        ax1 = plt.subplot(1, 1, 1)
        ax2 = None
        pt.setup_axes(plt, ax1, xgrid=False, ygrid=False)
    else:
        ax1 = plt.subplot(1, 2, 1)
        ax2 = plt.subplot(1, 2, 2)
        pt.setup_axes(plt, ax1, xgrid=False, ygrid=False)
        pt.setup_axes(plt, ax2)

    if not args.only_densities:
        if args.no_model and not args.plot_synth_model:
            ax1.set_title('Aligned samples for {0}'.format(biomarker_string))
        else:
            ax1.set_title('Quantile curves for {0}'.format(biomarker_string))
        if args.phase == 'mciad':
            ax1.set_xlabel(
                'Disease progress (days before/after conversion to AD)')
        else:
            ax1.set_xlabel(
                'Disease progress (days before/after conversion to MCI)')
        ax1.set_ylabel(DataHandler.get_biomarker_unit(biomarker))
        if args.xlim is not None:
            ax1.set_xlim(args.xlim[0], args.xlim[1])
        else:
            ax1.set_xlim(min_progress_extrapolate, max_progress_extrapolate)
        if args.ylim is not None:
            ax1.set_ylim(args.ylim[0], args.ylim[1])

    #
    # Plot the percentile curves of the fitted model
    #
    if not args.no_model and not args.only_densities:
        ax1.axvline(pm.min_progress, color='0.15', linestyle=':')
        ax1.axvline(pm.max_progress, color='0.15', linestyle=':')

        quantiles = [0.1, 0.25, 0.5, 0.75, 0.9]
        grey_values = ['0.4', '0.2', '0', '0.2', '0.4']
        for grey_value, quantile in zip(grey_values, quantiles):
            curve_int = pm.get_quantile_curve(progress_linspace_int, quantile)
            ax1.plot(progress_linspace_int, curve_int, color=grey_value)

            if not args.no_extrapolation:
                curve_ex1 = pm.get_quantile_curve(progress_linspace_ex1,
                                                  quantile)
                curve_ex2 = pm.get_quantile_curve(progress_linspace_ex2,
                                                  quantile)
                ax1.plot(progress_linspace_ex1,
                         curve_ex1,
                         '--',
                         color=grey_value)
                ax1.plot(progress_linspace_ex2,
                         curve_ex2,
                         '--',
                         color=grey_value)

            if args.plot_quantile_label:
                label = '$q={0}\%$'.format(quantile * 100)
                ax1.text(progress_linspace_int[-1] + 10,
                         curve_int[-1],
                         label,
                         fontsize=10)

        if args.plot_donohue:
            print 'Plotting Donohue'
            donohue_file = os.path.join(
                data_handler._conf.models_folder, 'donohue',
                'population_{0}.csv'.format(biomarker.replace(' ', '.')))
            if not os.path.isfile(donohue_file):
                print log.ERROR, 'Donohue model file not found: {0}'.format(
                    donohue_file)
                return

            r = mlab.csv2rec(donohue_file)
            if args.method == 'joint':
                offset = 2200
            else:
                offset = 300
            progrs = r[r.dtype.names[0]] * 30.44 + offset
            vals = r[r.dtype.names[1]]
            curve_donohue = []
            progr_donohue = []
            for p in progress_linspace_int:
                if progrs[0] < p < progrs[-1]:
                    i = 1
                    while p > progrs[i]:
                        i += 1
                    # TODO linear interpolation
                    progr_donohue.append(progrs[i])
                    curve_donohue.append(vals[i])
            ax1.plot(progr_donohue,
                     curve_donohue,
                     '--',
                     color='b',
                     linewidth=2)

    #
    # Plot synthetic model curve
    #
    if plot_synth_model:
        progress_linspace_synth = np.linspace(-2500, 2500, 100)
        quantiles = [0.1, 0.25, 0.5, 0.75, 0.9]
        alphas = [0.4, 0.7, 1.0, 0.7, 0.4]
        for quantile, alpha in zip(quantiles, alphas):
            curve_synth = [
                SynthModel.get_distributed_value(biomarker, p, cdf=quantile)
                for p in progress_linspace_synth
            ]
            ax1.plot(progress_linspace_synth,
                     curve_synth,
                     color='b',
                     alpha=alpha)

    #
    # Plot predictor function
    #
    if args.plot_eta is not None and not args.only_densities:
        # Get second axis of plot 1
        ax1b = ax1.twinx()

        # Plot all progresses
        # ax1b.scatter(pm.all_progresses, pm.all_mus, facecolor='b', marker='o', edgecolor='none', alpha=0.2)
        ax1b.text(pm.progresses[-1],
                  pm.sigmas[-1],
                  '$\mu$',
                  color='b',
                  fontsize=11)

        # Plot binned progresses
        ax1b.scatter(pm.progresses, pm.sigmas, color='b', marker='x')

        # Plot interpolated model
        mus = [pm.get_eta(pm.sigmas, p) for p in progress_linspace_int]
        ax1b.plot(progress_linspace_int, mus, color='b')

        if not args.no_extrapolation:
            mus = [pm.get_eta(pm.sigmas, p) for p in progress_linspace_ex1]
            ax1b.plot(progress_linspace_ex1, mus, '--', color='b')
            mus = [pm.get_eta(pm.sigmas, p) for p in progress_linspace_ex2]
            ax1b.plot(progress_linspace_ex2, mus, '--', color='b')
        if args.xlim is not None:
            ax1b.set_xlim(args.xlim[0], args.xlim[1])
        else:
            ax1b.set_xlim(min_progress_extrapolate, max_progress_extrapolate)

    #
    # Plot errors
    #
    if args.plot_errors and not args.only_densities:
        eval_file = model_file.replace('.csv', '_eval_cover.csv')
        if not os.path.isfile(eval_file):
            print log.ERROR, 'Evaluation file not found: {0}'.format(eval_file)
        else:
            m = mlab.csv2rec(eval_file)
            progresses = m['progress']
            errors = m['error']

            # Get second axis of plot 1
            ax1b = ax1.twinx()
            # ax1b.set_ylim(0, max(150, 1.2 * np.max(errors)))
            ax1b.plot(progresses, errors, color='g', marker='x')
            ax1b.text(progresses[-1],
                      errors[-1],
                      'Discr.',
                      color='g',
                      fontsize=11)
            ax1b.axhline(np.mean(errors), color='g', linestyle='--', alpha=0.5)

            median_curve = pm.get_quantile_curve(progresses, 0.5)
            min_value = np.min(median_curve)
            max_value = np.max(median_curve)
            rect = mpl.patches.Rectangle((progresses[0], min_value),
                                         progresses[-1] - progresses[0],
                                         max_value - min_value,
                                         fc=(0.0, 0.5, 0.0, 0.1),
                                         ec=(0.0, 0.5, 0.0, 0.8),
                                         linewidth=1)
            ax1.add_patch(rect)

    #
    # Plot points
    #
    if not args.no_points and not args.only_densities:
        samples_file = data_handler.get_samples_file(biomarker)
        if not os.path.isfile(samples_file):
            print log.ERROR, 'Samples file not found: {0}'.format(samples_file)
        else:
            m = mlab.csv2rec(samples_file)
            progr_points = m['progress']
            value_points = m['value']
            # diagn_points = [0.5 if p < 0 else 1.0 for p in progr_points]
            diagn_points = m['diagnosis']
            diagn_points[(0.25 <= diagn_points) & (diagn_points <= 0.75)] = 0.5

            print log.INFO, 'Plotting {0} sample points...'.format(
                len(progr_points))
            ax1.scatter(progr_points,
                        value_points,
                        s=15.0,
                        c=diagn_points,
                        edgecolor='none',
                        vmin=0.0,
                        vmax=1.0,
                        cmap=pt.progression_cmap,
                        alpha=args.points_alpha)
            if args.phase == 'cnmci':
                rects = [
                    mpl.patches.Rectangle(
                        (0, 0),
                        1,
                        1,
                        fc=pt.color_cn + (args.points_alpha, ),
                        linewidth=0),
                    mpl.patches.Rectangle(
                        (0, 0),
                        1,
                        1,
                        fc=pt.color_mci + (args.points_alpha, ),
                        linewidth=0)
                ]
                labels = ['CN', 'MCI']
            elif args.phase == 'mciad':
                rects = [
                    mpl.patches.Rectangle(
                        (0, 0),
                        1,
                        1,
                        fc=pt.color_mci + (args.points_alpha, ),
                        linewidth=0),
                    mpl.patches.Rectangle(
                        (0, 0),
                        1,
                        1,
                        fc=pt.color_ad + (args.points_alpha, ),
                        linewidth=0)
                ]
                labels = ['MCI', 'AD']
            else:
                rects = [
                    mpl.patches.Rectangle(
                        (0, 0),
                        1,
                        1,
                        fc=pt.color_cn + (args.points_alpha, ),
                        linewidth=0),
                    mpl.patches.Rectangle(
                        (0, 0),
                        1,
                        1,
                        fc=pt.color_mci + (args.points_alpha, ),
                        linewidth=0),
                    mpl.patches.Rectangle(
                        (0, 0),
                        1,
                        1,
                        fc=pt.color_ad + (args.points_alpha, ),
                        linewidth=0)
                ]
                labels = ['CN', 'MCI', 'AD']
            legend = ax1.legend(rects,
                                labels,
                                fontsize=10,
                                ncol=len(rects),
                                loc='upper center',
                                framealpha=0.9)
            legend.get_frame().set_edgecolor((0.6, 0.6, 0.6))

    #
    # Plot PDFs
    #
    progr_samples = [-2000, -1000, 0, 1000, 2000, 3000, 4000] if args.phase == 'joint' else \
                    [-2000, -1500, -1000, -500, 0, 500, 1000, 1500, 2000]

    if args.phase == 'cnmci':
        vmin = -2000
        vmax = 6000
    elif args.phase == 'mciad':
        vmin = -6000
        vmax = 2000
    elif args.phase == 'joint':
        vmin = -2000
        vmax = 4000
    sample_cmap = cmx.ScalarMappable(norm=colors.Normalize(vmin=vmin,
                                                           vmax=vmax),
                                     cmap=plt.get_cmap(pt.progression_cmap))

    if not args.no_sample_lines and not args.only_densities:
        for progr in progr_samples:
            if not args.no_extrapolation or pm.min_progress < progr < pm.max_progress:
                # sample_color = sample_cmap.to_rgba(progr_samples.index(progr))
                sample_color = sample_cmap.to_rgba(progr)
                linestyle = '--' if progr < pm.min_progress or progr > pm.max_progress else '-'
                ax1.axvline(progr,
                            color=sample_color,
                            linestyle=linestyle,
                            alpha=0.3)

    if not args.no_densities:
        ax2.set_title(
            'Probability density function for {0}'.format(biomarker_string))
        ax2.set_xlabel(DataHandler.get_biomarker_unit(biomarker))
        ax2.set_ylabel('Probability')
        if args.ylim is None:
            values = np.linspace(min_val, max_val, 250)
            ax2.set_xlim(min_val, max_val)
        else:
            values = np.linspace(args.ylim[0], args.ylim[1], 250)
            ax2.set_xlim(args.ylim[0], args.ylim[1])

        for progr in progr_samples:
            if not args.no_extrapolation or pm.min_progress < progr < pm.max_progress:
                # sample_color = sample_cmap.to_rgba(progr_samples.index(progr))
                sample_color = sample_cmap.to_rgba(progr)
                linestyle = '--' if progr < pm.min_progress or progr > pm.max_progress else '-'
                probs = pm.get_density_distribution(values, progr)
                ax2.plot(values,
                         probs,
                         label=str(progr),
                         color=sample_color,
                         linestyle=linestyle)

                if plot_synth_model:
                    probs = [
                        SynthModel.get_probability(biomarker, progr, v)
                        for v in values
                    ]
                    ax2.plot(values, probs, color='b', linestyle='--')

        legend = ax2.legend(fontsize=10, loc='best', framealpha=0.9)
        legend.get_frame().set_edgecolor((0.6, 0.6, 0.6))

    #
    # Draw or save the plot
    #
    plt.tight_layout()
    if args.save_plots or args.plot_file is not None:
        if args.plot_file is not None:
            plot_filename = args.plot_file
        else:
            plot_filename = model_file.replace('.csv', '.pdf')
        plt.savefig(plot_filename, transparent=True)
    else:
        plt.show()
    plt.close(fig)
def plot_biomarker(data_handler, biomarker, measurements, dpi, dpr):
    """
    Plot the model of one biomarker with the fitted values

    :param data_handler: the data handler
    :param biomarker: the biomarker to plot
    :param measurements: the measurements containing the biomarker samples of one subject
    :param dpi: the estimated DPI
    :param dpr: the estimated DPR
    """
    model_file = data_handler.get_model_file(biomarker)
    if not os.path.isfile(model_file):
        print log.ERROR, 'Model file not found: {0}'.format(model_file)
        return

    print log.INFO, 'Generating plot for {0}...'.format(biomarker)

    #
    # Read model
    #
    pm = ProgressionModel(biomarker, model_file)
    progress_extrapolate = 0.3 * (pm.max_progress - pm.min_progress)
    min_progress_extrapolate = int(pm.min_progress - progress_extrapolate)
    max_progress_extrapolate = int(pm.max_progress + progress_extrapolate)
    progress_linspace_ex1 = np.linspace(min_progress_extrapolate, pm.min_progress, 20)
    progress_linspace_int = np.linspace(pm.min_progress, pm.max_progress, 60)
    progress_linspace_ex2 = np.linspace(pm.max_progress, max_progress_extrapolate, 20)

    #
    # Setup plot
    #
    biomarker_string = pt.get_biomarker_string(biomarker)
    figure_width = 6
    fig = plt.figure(figsize=(figure_width, 5))
    ax1 = plt.subplot(1, 1, 1)
    pt.setup_axes(plt, ax1, xgrid=False, ygrid=False)
    ax1.set_title('Model for {0} with fitted sample values'.format(biomarker_string))
    ax1.set_xlabel('Disease progress (days before/after conversion to MCI)')
    ax1.set_ylabel(DataHandler.get_biomarker_unit(biomarker))
    ax1.set_xlim(min_progress_extrapolate, max_progress_extrapolate)

    #
    # Plot the percentile curves of the fitted model
    #
    ax1.axvline(pm.min_progress, color='0.15', linestyle=':')
    ax1.axvline(pm.max_progress, color='0.15', linestyle=':')

    quantiles = [0.1, 0.25, 0.5, 0.75, 0.9]
    grey_values = ['0.4', '0.2', '0', '0.2', '0.4']
    for grey_value, quantile in zip(grey_values, quantiles):
        curve_int = pm.get_quantile_curve(progress_linspace_int, quantile)
        ax1.plot(progress_linspace_int, curve_int, color=grey_value)

        curve_ex1 = pm.get_quantile_curve(progress_linspace_ex1, quantile)
        curve_ex2 = pm.get_quantile_curve(progress_linspace_ex2, quantile)
        ax1.plot(progress_linspace_ex1, curve_ex1, '--', color=grey_value)
        ax1.plot(progress_linspace_ex2, curve_ex2, '--', color=grey_value)

        label = 'q = {0}'.format(quantile * 100)
        ax1.text(progress_linspace_int[-1] + 100, curve_int[-1], label, fontsize=10)

    #
    # Plot points
    #
    progr_points = []
    value_points = []
    diagn_points = []
    for visit in measurements[0]:
        if biomarker in measurements[0][visit]:
            progress = measurements[0][visit]['scantime'] * dpr + dpi
            value = measurements[0][visit][biomarker]
            progr_points.append(progress)
            value_points.append(value)
            diagn_points.append(1.0)
            ax1.axvline(progress, color='b', linestyle='--')
            ax1.text(progress + 150, value, visit, color='b', fontsize=10)

    ax1.scatter(progr_points, value_points, s=25.0, color='b', edgecolor='none',
                vmin=0.0, vmax=1.0, alpha=0.9)

    #
    # Draw or save the plot
    #
    plt.tight_layout()
    plt.show()
    plt.close(fig)
示例#44
0
	def from_json_obj(cls, obj):
		id = obj['id']
		gear = obj['gear']
		gear = [(DataHandler.load_charm_from_id(charm['charm_id']), charm['number']) for charm in gear]
		return cls(id, gear)
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('-b',
                        '--biomarkers',
                        nargs=2,
                        default=['D1', 'D2'],
                        help='name of the biomarker to be plotted')
    parser.add_argument('--plot_file',
                        type=str,
                        default=None,
                        help='filename of the output file')
    args = parser.parse_args()

    # Collect data for test
    data_handler = DataHandler.get_data_handler(biomarkers=args.biomarkers)
    biomarkers = data_handler.get_biomarker_names()
    measurements = data_handler.get_measurements_as_dict(biomarkers=biomarkers,
                                                         select_complete=True)

    # Collect biomarker values
    biomarkers_1 = []
    biomarkers_2 = []
    diagnoses = []
    for rid in measurements:
        for visit in measurements[rid]:
            biomarkers_1.append(measurements[rid][visit][biomarkers[0]])
            biomarkers_2.append(measurements[rid][visit][biomarkers[1]])
            diagnoses.append(measurements[rid][visit]['DX.scan'])
    diagnoses = np.array(diagnoses)
    diagnoses[(0.25 <= diagnoses) & (diagnoses <= 0.75)] = 0.5

    # Setup plot
    fig, ax = plt.subplots()
    pt.setup_axes(plt, ax)
    ax.scatter(biomarkers_1,
               biomarkers_2,
               s=15.0,
               c=diagnoses,
               edgecolor='none',
               vmin=0.0,
               vmax=1.0,
               cmap=pt.progression_cmap,
               alpha=0.25)
    ax.set_xlabel(biomarkers[0])
    ax.set_ylabel(biomarkers[1])

    # Plot legend
    rects = [
        mpl.patches.Rectangle((0, 0),
                              1,
                              1,
                              fc=pt.color_cn + (0.25, ),
                              linewidth=0),
        mpl.patches.Rectangle((0, 0),
                              1,
                              1,
                              fc=pt.color_mci + (0.25, ),
                              linewidth=0),
        mpl.patches.Rectangle((0, 0),
                              1,
                              1,
                              fc=pt.color_ad + (0.25, ),
                              linewidth=0)
    ]
    labels = ['CN', 'MCI', 'AD']
    legend = ax.legend(rects,
                       labels,
                       fontsize=10,
                       ncol=len(rects),
                       loc='upper center',
                       framealpha=0.9)
    legend.get_frame().set_edgecolor((0.6, 0.6, 0.6))

    # Draw or save the plot
    plt.tight_layout()
    if args.plot_file is not None:
        plt.savefig(args.plot_file, transparent=True)
    else:
        plt.show()
    plt.close(fig)
def get_progress_estimates(visits,
                           method=None, biomarkers=None, phase=None,
                           recompute_estimates=False,
                           estimate_dprs=False, consistent_data=False,
                           select_training_set=False, select_test_set=False):
    # Get data handler and biomarker names
    data_handler = DataHandler.get_data_handler(method=method,
                                                biomarkers=biomarkers,
                                                phase=phase)

    # Get filename
    estimates_file_trunk = 'estimate_dpi_dpr_with_{0}_{1}.p' if estimate_dprs else 'estimate_dpi_with_{0}_{1}.p'
    if biomarkers is None:
        estimates_file_basename = estimates_file_trunk.format(method, '_'.join(visits))
    else:
        biomarkers_string = '_'.join(biomarkers).replace(' ', '_')
        estimates_file_basename = estimates_file_trunk.format(biomarkers_string, '_'.join(visits))
    estimates_file = os.path.join(data_handler.get_eval_folder(), estimates_file_basename)

    # Read if estimates exist, else recompute
    if os.path.isfile(estimates_file) and not recompute_estimates:
        # Read test results from file
        print log.INFO, 'Reading DPI{0} estimations from {1}...'.format('\DPR' if estimate_dprs else '', estimates_file)
        (rids, diagnoses, dpis, dprs, mean_min, mean_max) = pickle.load(open(estimates_file, 'rb'))
    else:
        # Collect data for test
        biomarkers = data_handler.get_biomarker_names()
        measurements = data_handler.get_measurements_as_dict(visits=['bl', 'm12', 'm24'],
                                                             biomarkers=biomarkers,
                                                             select_complete=True)

        # Setup model
        model = MultiBiomarkerProgressionModel()
        for biomarker in biomarkers:
            model_file = data_handler.get_model_file(biomarker)
            model.add_model(biomarker, model_file)
        fitter = ModelFitter(model)

        # Calculate mean and max progress
        mean_min = model.get_mean_min_progress()
        mean_max = model.get_mean_max_progress()

        # Estimate dpis (and dprs) and save data
        if not estimate_dprs or len(visits) == 1:
            if estimate_dprs and len(visits) == 1:
                print log.WARNING, 'Only one visit, cannot estimate DPR (setting to one)'
            rids, diagnoses, dpis = estimate_dpis(measurements, visits, fitter, phase=phase)
            dprs = np.ones(len(dpis)).tolist()
        else:
            rids, diagnoses, dpis, dprs = estimate_dpis_dprs(measurements, visits, fitter, phase=phase)

        print log.INFO, 'Saving DPI{0} estimations to {1}...'.format('\DPR' if estimate_dprs else '', estimates_file)
        pickle.dump((rids, diagnoses, dpis, dprs, mean_min, mean_max), open(estimates_file, 'wb'))

    # Reduce to consistent data sets with bl, m12 and m24 samples
    if consistent_data or select_training_set or select_test_set:
        consistent_method = 'all' if consistent_data else method
        consistent_data_handler = DataHandler.get_data_handler(method=consistent_method)
        consistent_measurements = consistent_data_handler.get_measurements_as_dict(
            visits=['bl', 'm12', 'm24'],
            select_training_set=select_training_set,
            select_test_set=select_test_set,
            select_complete=True,
            no_regression=True)

        consistent_rids = []
        consistent_diagnoses = []
        consistent_dpis = []
        consistent_dprs = []
        for i, rid in enumerate(rids):
            if rid in consistent_measurements:
                consistent_rids.append(rid)
                consistent_diagnoses.append(diagnoses[i])
                consistent_dpis.append(dpis[i])
                consistent_dprs.append(dprs[i])
        rids = consistent_rids
        diagnoses = consistent_diagnoses
        dpis = consistent_dpis
        dprs = consistent_dprs

        print log.RESULT, 'Selected {0} consistent subjects.'.format(len(dpis))

    # Return results
    return rids, diagnoses, dpis, dprs, mean_min, mean_max
def get_biomarker_predictions(visits, predict_biomarker,
                              method=None, biomarkers=None, phase=None,
                              recompute_estimates=False, recompute_predictions=False, estimate_dprs=False,
                              select_test_set=False, consistent_data=False, exclude_cn=False,
                              use_last_visit=False, naive_use_diagnosis=False):

    # Get prediction file
    data_handler = DataHandler.get_data_handler(method=method,
                                                biomarkers=biomarkers,
                                                phase=phase)
    predict_biomarker_str = predict_biomarker.replace(' ', '_')
    predict_file_trunk = 'predict_{0}_with_dpr_{1}_{2}{3}.p' if estimate_dprs else 'predict_{0}_with_{1}_{2}{3}.p'
    if biomarkers is None:
        predict_file_basename = predict_file_trunk.format(predict_biomarker_str,
                                                          method, '_'.join(visits),
                                                          '_last' if use_last_visit else '')
    else:
        estimate_biomarkers_string = '_'.join(biomarkers).replace(' ', '_')
        predict_file_basename = predict_file_trunk.format(predict_biomarker_str,
                                                          estimate_biomarkers_string,
                                                          '_'.join(visits),
                                                          '_last' if use_last_visit else '')
    prediction_file = os.path.join(data_handler.get_eval_folder(), predict_file_basename)

    # Read if predictions exist, else recompute
    if os.path.isfile(prediction_file) and not recompute_predictions:
        # Read biomarker predictions from file
        print log.INFO, 'Reading {0} predictions from {1}...'.format(predict_biomarker, prediction_file)
        (rids, diagnoses, values_observed, values_naive, values_model) = pickle.load(open(prediction_file, 'rb'))
    else:
        predict_visit = get_predicted_visit(visits)
        print log.INFO, 'Predicting {0} at {1}...'.format(predict_biomarker, predict_visit)

        # Get mean changes from file
        mean_changes_file = os.path.join(data_handler.get_eval_folder(), 'mean_changes.p')
        if not os.path.isfile(mean_changes_file):
            print log.ERROR, 'Mean changes unknown, run misc/compute_mean_biomarker_changes.py first!'
        mean_changes = pickle.load(open(mean_changes_file, 'rb'))

        # Get DPI estimates
        rids_all, diagnoses_all, dpis, dprs, _, _ = get_progress_estimates(visits,
                                                                           method=method,
                                                                           biomarkers=biomarkers,
                                                                           phase=phase,
                                                                           recompute_estimates=recompute_estimates,
                                                                           estimate_dprs=estimate_dprs,
                                                                           select_test_set=select_test_set,
                                                                           consistent_data=consistent_data)

        # Collect biomarker data for test
        measurements = data_handler.get_measurements_as_dict(visits=visits + [predict_visit],
                                                             biomarkers=[predict_biomarker],
                                                             select_test_set=select_test_set,
                                                             select_complete=True)
        model = ProgressionModel(predict_biomarker, data_handler.get_model_file(predict_biomarker))

        print log.INFO, 'Predicting {0} for {1}'.format(predict_biomarker, predict_visit)
        rids = []
        diagnoses = []
        values_observed = []
        values_model = []
        values_naive = []
        for rid, diagnosis, dpi, dpr in zip(rids_all, diagnoses_all, dpis, dprs):
            if rid in measurements:
                # Get real biomarker value value at next visit
                scantime_first_visit = measurements[rid][visits[0]]['scantime']
                scantime_next_visit = measurements[rid][predict_visit]['scantime']
                progress_next_visit = ModelFitter.scantime_to_progress(scantime_next_visit, scantime_first_visit, dpi, dpr)
                value_observed = measurements[rid][predict_visit][predict_biomarker]
                values_observed.append(value_observed)

                # Predict biomarker value value at next visit
                if use_last_visit:
                    value = measurements[rid][visits[-1]][predict_biomarker]
                    scantime = measurements[rid][visits[-1]]['scantime']
                    progress = ModelFitter.scantime_to_progress(scantime, scantime_first_visit, dpi, dpr)
                    mean_quantile = model.approximate_quantile(progress, value)
                else:
                    mean_quantile = 0.0
                    for visit in visits:
                        value = measurements[rid][visit][predict_biomarker]
                        scantime = measurements[rid][visit]['scantime']
                        progress = ModelFitter.scantime_to_progress(scantime, scantime_first_visit, dpi, dpr)
                        mean_quantile += model.approximate_quantile(progress, value)
                    mean_quantile /= len(visits)

                value_model = model.get_value_at_quantile(progress_next_visit, mean_quantile)
                values_model.append(value_model)

                # Predict biomarker value naively
                if naive_use_diagnosis:
                    mean_change = mean_changes[predict_biomarker][diagnosis]
                else:
                    mean_change = mean_changes[predict_biomarker][0.66]

                if use_last_visit:
                    x = measurements[rid][visits[-1]]['scantime']
                    y = measurements[rid][visits[-1]][predict_biomarker]
                    intercept = -(mean_change * x - y)
                else:
                    x = np.zeros(len(visits))
                    y = np.zeros(len(visits))
                    for i, visit in enumerate(visits):
                        x[i] = measurements[rid][visit]['scantime']
                        y[i] = measurements[rid][visit][predict_biomarker]
                    intercept = -np.sum(mean_change * x - y) / len(x)

                value_naive = intercept + mean_change * measurements[rid][predict_visit]['scantime']
                values_naive.append(value_naive)

                # Plot estimates
                plot = True
                if plot and diagnosis > 0.0 and dpr > 0.0:
                    plot_predictions(predict_biomarker, model, visits, measurements[rid], dpi, dpr,
                                     value_model, value_naive,
                                     mean_quantile, mean_change, intercept, rid)

                # Append rid and diagnosis
                rids.append(rid)
                diagnoses.append(diagnosis)

                # Print result
                print log.RESULT, '{0} for subject {1}: Observed: {2}, Naive {3}, Model: {4}'.format(predict_biomarker, rid, value_observed, value_naive, value_model)

        # Save results
        print log.INFO, 'Saving {0} predictions to {1}...'.format(predict_biomarker, prediction_file)
        pickle.dump((rids, diagnoses, values_observed, values_naive, values_model), open(prediction_file, 'wb'))

    rids = np.array(rids)
    diagnoses = np.array(diagnoses)
    values_observed = np.array(values_observed)
    values_naive = np.array(values_naive)
    values_model = np.array(values_model)

    # Exclude healthy subjects
    if exclude_cn:
        indices = np.where(diagnoses > 0.25)
        rids = rids[indices]
        diagnoses = diagnoses[indices]
        values_observed = values_observed[indices]
        values_naive = values_naive[indices]
        values_model = values_model[indices]

    return rids, diagnoses, values_observed, values_naive, values_model
def plot_predictions(biomarker, model, visits, rid_measurements, dpi, dpr,
                     value_model, value_naive, mean_quantile, change, intercept, rid):
    next_visit = get_predicted_visit(visits)
    scantime_first_visit = rid_measurements[visits[0]]['scantime']
    scantime_next_visit = rid_measurements[next_visit]['scantime']
    progress_first_visit = ModelFitter.scantime_to_progress(scantime_first_visit, scantime_first_visit, dpi, dpr)
    progress_next_visit = ModelFitter.scantime_to_progress(scantime_next_visit, scantime_first_visit, dpi, dpr)
    total_scantime  = scantime_next_visit - scantime_first_visit
    progress_linspace = np.linspace(progress_first_visit - total_scantime * 0.05,
                                    progress_next_visit + total_scantime * 0.05, 100)

    fig, ax = plt.subplots()
    pt.setup_axes(plt, ax, xgrid=False, ygrid=False)
    ax.set_title('{0} predictions for RID {1} (DPI={2}, DPR={3})'.format(pt.get_biomarker_string(biomarker), rid, dpi, dpr))
    ax.set_xlabel('Disease progress (days before/after conversion to AD)')
    ax.set_ylabel(DataHandler.get_biomarker_unit(biomarker))
    ax.set_xlim(progress_first_visit - total_scantime * 0.1, progress_next_visit + total_scantime * 0.1)

    color_mapper = cm.ScalarMappable(cmap=plt.get_cmap(pt.progression_cmap),
                                     norm=colors.Normalize(vmin=0.0, vmax=1.0))

    # Plot the percentile curves of the fitted model
    quantiles = [0.1, 0.25, 0.5, 0.75, 0.9]
    grey_values = ['0.8', '0.6', '0.4', '0.62', '0.84']
    for grey_value, quantile in zip(grey_values, quantiles):
        curve = model.get_quantile_curve(progress_linspace, quantile)
        ax.plot(progress_linspace, curve, zorder=1, color=grey_value)

    # Collect points
    progr_points = []
    value_points = []
    diagn_points = []
    for visit in visits + [next_visit]:
        value_points.append(rid_measurements[visit][biomarker])
        progr_points.append(ModelFitter.scantime_to_progress(rid_measurements[visit]['scantime'],
                                                             scantime_first_visit, dpi, dpr))
        diagn_points.append(rid_measurements[visit]['DX.scan'])

    # Collect lines
    predict_diagnosis = rid_measurements[next_visit]['DX.scan']
    predict_linspace = np.linspace(progress_first_visit, progress_next_visit, 50)
    curve = [model.get_value_at_quantile(p, mean_quantile) for p in predict_linspace]
    line = [change * ModelFitter.progress_to_scantime(p, scantime_first_visit, dpi, dpr) + intercept for p in predict_linspace]

    # Plot model and linear prediction line
    ax.plot(predict_linspace, line, zorder=1, linestyle='--', linewidth=2, color='k',
            label='naive prediction')
    ax.plot(predict_linspace, curve, zorder=1, linestyle='-', linewidth=2, color='k',
            label='model-based prediction')
    ax.scatter(progr_points, value_points, zorder=2, s=50.0,
               c=[color_mapper.to_rgba(d) for d in diagn_points], edgecolor='none')

    # Plot the predicted values
    ax.scatter([progress_next_visit], [value_naive], zorder=2, s=50.0, c='w',
               edgecolor=color_mapper.to_rgba(predict_diagnosis))
    ax.scatter([progress_next_visit], [value_model], zorder=2, s=50.0, c='w',
               edgecolor=color_mapper.to_rgba(predict_diagnosis))

    plt.tight_layout()
    plt.legend()
    plot_filename = os.path.join('/Users/aschmiri/Desktop/temp',
                                 'plot_predictions_{0}_{1}.pdf'.format(rid, biomarker))
    plt.savefig(plot_filename, transparent=True)
    # plt.show()
    plt.close(fig)
def plot_predictions(biomarker, model, visits, rid_measurements, dpi, dpr,
                     value_model, value_naive, mean_quantile, change,
                     intercept, rid):
    next_visit = get_predicted_visit(visits)
    scantime_first_visit = rid_measurements[visits[0]]['scantime']
    scantime_next_visit = rid_measurements[next_visit]['scantime']
    progress_first_visit = ModelFitter.scantime_to_progress(
        scantime_first_visit, scantime_first_visit, dpi, dpr)
    progress_next_visit = ModelFitter.scantime_to_progress(
        scantime_next_visit, scantime_first_visit, dpi, dpr)
    total_scantime = scantime_next_visit - scantime_first_visit
    progress_linspace = np.linspace(
        progress_first_visit - total_scantime * 0.05,
        progress_next_visit + total_scantime * 0.05, 100)

    fig, ax = plt.subplots()
    pt.setup_axes(plt, ax, xgrid=False, ygrid=False)
    ax.set_title('{0} predictions for RID {1} (DPI={2}, DPR={3})'.format(
        pt.get_biomarker_string(biomarker), rid, dpi, dpr))
    ax.set_xlabel('Disease progress (days before/after conversion to AD)')
    ax.set_ylabel(DataHandler.get_biomarker_unit(biomarker))
    ax.set_xlim(progress_first_visit - total_scantime * 0.1,
                progress_next_visit + total_scantime * 0.1)

    color_mapper = cm.ScalarMappable(cmap=plt.get_cmap(pt.progression_cmap),
                                     norm=colors.Normalize(vmin=0.0, vmax=1.0))

    # Plot the percentile curves of the fitted model
    quantiles = [0.1, 0.25, 0.5, 0.75, 0.9]
    grey_values = ['0.8', '0.6', '0.4', '0.62', '0.84']
    for grey_value, quantile in zip(grey_values, quantiles):
        curve = model.get_quantile_curve(progress_linspace, quantile)
        ax.plot(progress_linspace, curve, zorder=1, color=grey_value)

    # Collect points
    progr_points = []
    value_points = []
    diagn_points = []
    for visit in visits + [next_visit]:
        value_points.append(rid_measurements[visit][biomarker])
        progr_points.append(
            ModelFitter.scantime_to_progress(
                rid_measurements[visit]['scantime'], scantime_first_visit, dpi,
                dpr))
        diagn_points.append(rid_measurements[visit]['DX.scan'])

    # Collect lines
    predict_diagnosis = rid_measurements[next_visit]['DX.scan']
    predict_linspace = np.linspace(progress_first_visit, progress_next_visit,
                                   50)
    curve = [
        model.get_value_at_quantile(p, mean_quantile) for p in predict_linspace
    ]
    line = [
        change *
        ModelFitter.progress_to_scantime(p, scantime_first_visit, dpi, dpr) +
        intercept for p in predict_linspace
    ]

    # Plot model and linear prediction line
    ax.plot(predict_linspace,
            line,
            zorder=1,
            linestyle='--',
            linewidth=2,
            color='k',
            label='naive prediction')
    ax.plot(predict_linspace,
            curve,
            zorder=1,
            linestyle='-',
            linewidth=2,
            color='k',
            label='model-based prediction')
    ax.scatter(progr_points,
               value_points,
               zorder=2,
               s=50.0,
               c=[color_mapper.to_rgba(d) for d in diagn_points],
               edgecolor='none')

    # Plot the predicted values
    ax.scatter([progress_next_visit], [value_naive],
               zorder=2,
               s=50.0,
               c='w',
               edgecolor=color_mapper.to_rgba(predict_diagnosis))
    ax.scatter([progress_next_visit], [value_model],
               zorder=2,
               s=50.0,
               c='w',
               edgecolor=color_mapper.to_rgba(predict_diagnosis))

    plt.tight_layout()
    plt.legend()
    plot_filename = os.path.join(
        '/Users/aschmiri/Desktop/temp',
        'plot_predictions_{0}_{1}.pdf'.format(rid, biomarker))
    plt.savefig(plot_filename, transparent=True)
    # plt.show()
    plt.close(fig)
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('visits',
                        nargs='+',
                        type=str,
                        help='the viscodes to be sampled')
    parser.add_argument('-m',
                        '--method',
                        choices=DataHandler.get_method_choices(),
                        default='all',
                        help='the method to collect data for')
    parser.add_argument('-b',
                        '--biomarkers',
                        nargs='+',
                        default=None,
                        help='name of the biomarker to be plotted')
    parser.add_argument('-p',
                        '--phase',
                        default=None,
                        choices=DataHandler.get_phase_choices(),
                        help='the phase for which the model is to be trained')
    parser.add_argument('--estimate_dprs',
                        action='store_true',
                        help='recompute the dpis estimations')
    parser.add_argument('--recompute_estimates',
                        action='store_true',
                        help='recompute the dpis estimations')
    parser.add_argument('--consistent_data',
                        action='store_true',
                        help='us only subjects with bl, m12 and m24 visits')
    parser.add_argument('--no_plot',
                        action='store_true',
                        help='do not plot the results')
    parser.add_argument('--plot_lines',
                        action='store_true',
                        help='plot graphs instead of matrix')
    parser.add_argument('--plot_steps',
                        type=int,
                        default=15,
                        help='number of steps for the DPI scale')
    parser.add_argument('--plot_file',
                        type=str,
                        default=None,
                        help='filename of the output file')
    parser.add_argument('--plot_cmap_jet',
                        action='store_true',
                        help='use the colour map jet')
    args = parser.parse_args()

    # Get estimates
    _, diagnoses, dpis, dprs, mean_min, mean_max = et.get_progress_estimates(
        args.visits,
        method=args.method,
        biomarkers=args.biomarkers,
        phase=args.phase,
        estimate_dprs=args.estimate_dprs,
        recompute_estimates=args.recompute_estimates,
        select_test_set=True,
        consistent_data=args.consistent_data)

    # Plot results
    if not args.no_plot:
        plot_dpi_estimates(args, dpis, diagnoses, mean_min, mean_max)
        if args.estimate_dprs:
            plot_dpi_dpr_distribution(args, dpis, dprs, diagnoses)
示例#51
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('-m',
                        '--method',
                        choices=DataHandler.get_method_choices(),
                        default='all',
                        help='the method to collect data for')
    parser.add_argument('-b',
                        '--biomarkers',
                        nargs='+',
                        default=None,
                        help='name of the biomarker to be plotted')
    parser.add_argument('-p',
                        '--phase',
                        default='mciad',
                        choices=DataHandler.get_phase_choices(),
                        help='the phase for which the model is to be trained')
    parser.add_argument('-e',
                        '--extrapolator',
                        type=str,
                        choices=['lin', 'sqrt', 'exp'],
                        default='exp',
                        help='the type of extrapolator')
    parser.add_argument('--xlim',
                        type=float,
                        nargs=2,
                        default=None,
                        help='force certain x limits for plotting')
    parser.add_argument('--ylim',
                        type=float,
                        nargs=2,
                        default=None,
                        help='force certain y limits for plotting')
    parser.add_argument('--no_model',
                        action='store_true',
                        default=False,
                        help='do not plot the fitted model')
    parser.add_argument('--no_points',
                        action='store_true',
                        default=False,
                        help='do not plot points')
    parser.add_argument('--points_alpha',
                        type=float,
                        default=0.25,
                        help='alpha value of the plotted points')
    parser.add_argument('--no_densities',
                        action='store_true',
                        default=False,
                        help='do not plot densities')
    parser.add_argument('--no_sample_lines',
                        action='store_true',
                        default=False,
                        help='do not plot the sample lines')
    parser.add_argument('--only_densities',
                        action='store_true',
                        default=False,
                        help='only plot densities')
    parser.add_argument('--no_extrapolation',
                        action='store_true',
                        default=False,
                        help='do not extrapolate the model')
    parser.add_argument('--plot_eta',
                        type=str,
                        choices=['lambda', 'mu', 'sigma'],
                        default=None,
                        help='plot a predictor function')
    parser.add_argument('--plot_errors',
                        action='store_true',
                        default=False,
                        help='plot the errors')
    parser.add_argument('--plot_synth_model',
                        action='store_true',
                        default=False,
                        help='plot density distributions for synthetic data')
    parser.add_argument('--plot_quantile_label',
                        action='store_true',
                        default=False,
                        help='plot labels on the quantile curces')
    parser.add_argument(
        '--plot_donohue',
        action='store_true',
        default=False,
        help='plot the trajectory estimated with Donohue et al.')
    parser.add_argument('--save_plots',
                        action='store_true',
                        default=False,
                        help='save the plots with a default filename')
    parser.add_argument('--plot_file',
                        type=str,
                        default=None,
                        help='filename of the output file')
    args = parser.parse_args()

    data_handler = DataHandler.get_data_handler(method=args.method,
                                                biomarkers=args.biomarkers,
                                                phase=args.phase)
    for biomarker in data_handler.get_biomarker_names():
        plot_model(args, data_handler, biomarker)
def plot_dpi_estimates(args, dpis, diagnoses, mean_min, mean_max):
    print log.INFO, 'Plotting estimates...'
    test_dpi_min, test_dpi_max, _ = ModelFitter.get_test_dpi_range(args.phase)
    dpi_range = float(test_dpi_max - test_dpi_min)
    dpi_factor = float(args.plot_steps) / dpi_range

    # Setup plot
    fig, ax = plt.subplots(figsize=(6, 2))
    biomarkers_str = args.method if args.biomarkers is None else ', '.join(
        args.biomarkers)
    ax.set_title('DP estimation using {0} at {1}'.format(
        biomarkers_str, ', '.join(args.visits)))
    ax.spines['left'].set_position(('outward', 10))
    ax.spines['bottom'].set_position(('outward', 10))
    ax.spines['right'].set_visible(False)
    ax.spines['top'].set_visible(False)
    ax.yaxis.set_ticks_position('left')
    ax.xaxis.set_ticks_position('bottom')

    xticks = np.linspace(0, args.plot_steps, 7)
    ax.set_xticks(xticks)
    ax.set_xticklabels(
        [int(float(tick) / dpi_factor + test_dpi_min) for tick in xticks])

    # Compute matrix
    diagnosis_indices = {0.0: 0, 0.25: 1, 0.5: 1, 0.75: 2, 1.0: 3}
    matrix = np.zeros((4, args.plot_steps + 1))
    for dpi, diag in zip(dpis, diagnoses):
        row = diagnosis_indices[diag]
        dpi_index = round((dpi - test_dpi_min) * dpi_factor)
        matrix[row, dpi_index] += 1.0

    # Draw annotations
    dpis = np.array(dpis)
    diagnoses = np.array(diagnoses)
    medians = []
    q25 = []
    q75 = []
    for diag in [0.0, 0.25, 0.75, 1.0]:
        row = diagnosis_indices[diag]
        matrix[row] /= np.sum(matrix[row])

        indices = np.where(diagnoses == diag)
        median = np.median(dpis[indices])
        medians.append((median - test_dpi_min) * dpi_factor)
        q25.append((median - np.percentile(dpis[indices], 25)) * dpi_factor)
        q75.append((np.percentile(dpis[indices], 75) - median) * dpi_factor)

    if args.plot_lines:
        ax.set_ylim(-0.01, 0.36)

        sample_cmap = cmx.ScalarMappable(norm=colors.Normalize(0.0, 1.0),
                                         cmap=plt.get_cmap(
                                             pt.progression_cmap))
        for diag in [0.0, 0.25, 0.75, 1.0]:
            row = diagnosis_indices[diag]
            plt.plot(matrix[row], color=sample_cmap.to_rgba(diag))
    else:
        ax.set_yticks([0, 1, 2, 3])
        ax.set_yticklabels(['CN', 'EMCI', 'LMCI', 'AD'])

        cmap = plt.get_cmap('jet') if args.plot_cmap_jet else plt.get_cmap(
            'Greys')
        bar_color = 'w' if args.plot_cmap_jet else 'r'
        plt.errorbar(medians, [0, 1, 2, 3],
                     xerr=[q25, q75],
                     fmt='none',
                     ecolor=bar_color,
                     elinewidth=2,
                     capsize=4,
                     capthick=2)
        plt.plot(medians, [0, 1, 2, 3],
                 linestyle='',
                 color=bar_color,
                 marker='|',
                 markersize=15,
                 markeredgewidth=2)
        plt.imshow(matrix, cmap=cmap, interpolation='nearest')
    plt.axvline((mean_min - test_dpi_min) * dpi_factor,
                color='k',
                linestyle=':',
                alpha=0.6)
    plt.axvline((mean_max - test_dpi_min) * dpi_factor,
                color='k',
                linestyle=':',
                alpha=0.6)
    plt.axvline((0.0 - test_dpi_min) * dpi_factor,
                color='k',
                linestyle='-',
                alpha=0.6)
    if args.phase == 'joint':
        data_handler = DataHandler.get_data_handler(method=args.method,
                                                    biomarkers=args.biomarkers,
                                                    phase=args.phase)
        plt.axvline(
            (data_handler.get_model_offset() - test_dpi_min) * dpi_factor,
            color='k',
            linestyle='-',
            alpha=0.6)

    # Draw or save the plot
    plt.tight_layout()
    if args.plot_file is not None:
        plt.savefig(args.plot_file, transparent=True)
    else:
        plt.show()
    plt.close(fig)
示例#53
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('visits',
                        nargs='+',
                        type=str,
                        help='the viscodes to be sampled')
    parser.add_argument('-m',
                        '--method',
                        choices=DataHandler.get_method_choices(),
                        default='all',
                        help='the method to collect data for')
    parser.add_argument('-b',
                        '--biomarkers',
                        nargs='+',
                        default=None,
                        help='name of the biomarker to be plotted')
    parser.add_argument('-p',
                        '--phase',
                        default=None,
                        choices=DataHandler.get_phase_choices(),
                        help='the phase for which the model is to be trained')
    parser.add_argument('-c',
                        '--classifier',
                        default='svm',
                        choices=['lda', 'svm', 'lsvm', 'rf'],
                        help='the approach used to classify the subjects')
    parser.add_argument('--estimate_dprs',
                        action='store_true',
                        help='recompute the dpis estimations')
    parser.add_argument('--recompute_estimates',
                        action='store_true',
                        help='recompute the dpis estimations')
    parser.add_argument('--consistent_data',
                        action='store_true',
                        help='us only subjects with bl, m12 and m24 visits')
    parser.add_argument('--num_folds',
                        type=int,
                        default=10,
                        help='number of folds for the n-fold cross validation')
    parser.add_argument(
        '--num_runs',
        type=int,
        default=1,
        help='number of runs the x-fold cross-validation is performed')
    parser.add_argument('--latex_file',
                        type=str,
                        default=None,
                        help='add output to a LaTeX file')
    args = parser.parse_args()

    # Get estimates
    _, diagnoses, dpis, dprs, _, _ = et.get_progress_estimates(
        args.visits,
        method=args.method,
        biomarkers=args.biomarkers,
        phase=args.phase,
        estimate_dprs=args.estimate_dprs,
        recompute_estimates=args.recompute_estimates,
        consistent_data=args.consistent_data)

    # Analyse estimates
    classify_diagnoses(args, dpis, dprs, diagnoses)
def get_progress_estimates(visits,
                           method=None,
                           biomarkers=None,
                           phase=None,
                           recompute_estimates=False,
                           estimate_dprs=False,
                           consistent_data=False,
                           select_training_set=False,
                           select_test_set=False):
    # Get data handler and biomarker names
    data_handler = DataHandler.get_data_handler(method=method,
                                                biomarkers=biomarkers,
                                                phase=phase)

    # Get filename
    estimates_file_trunk = 'estimate_dpi_dpr_with_{0}_{1}.p' if estimate_dprs else 'estimate_dpi_with_{0}_{1}.p'
    if biomarkers is None:
        estimates_file_basename = estimates_file_trunk.format(
            method, '_'.join(visits))
    else:
        biomarkers_string = '_'.join(biomarkers).replace(' ', '_')
        estimates_file_basename = estimates_file_trunk.format(
            biomarkers_string, '_'.join(visits))
    estimates_file = os.path.join(data_handler.get_eval_folder(),
                                  estimates_file_basename)

    # Read if estimates exist, else recompute
    if os.path.isfile(estimates_file) and not recompute_estimates:
        # Read test results from file
        print log.INFO, 'Reading DPI{0} estimations from {1}...'.format(
            '\DPR' if estimate_dprs else '', estimates_file)
        (rids, diagnoses, dpis, dprs, mean_min,
         mean_max) = pickle.load(open(estimates_file, 'rb'))
    else:
        # Collect data for test
        biomarkers = data_handler.get_biomarker_names()
        measurements = data_handler.get_measurements_as_dict(
            visits=['bl', 'm12', 'm24'],
            biomarkers=biomarkers,
            select_complete=True)

        # Setup model
        model = MultiBiomarkerProgressionModel()
        for biomarker in biomarkers:
            model_file = data_handler.get_model_file(biomarker)
            model.add_model(biomarker, model_file)
        fitter = ModelFitter(model)

        # Calculate mean and max progress
        mean_min = model.get_mean_min_progress()
        mean_max = model.get_mean_max_progress()

        # Estimate dpis (and dprs) and save data
        if not estimate_dprs or len(visits) == 1:
            if estimate_dprs and len(visits) == 1:
                print log.WARNING, 'Only one visit, cannot estimate DPR (setting to one)'
            rids, diagnoses, dpis = estimate_dpis(measurements,
                                                  visits,
                                                  fitter,
                                                  phase=phase)
            dprs = np.ones(len(dpis)).tolist()
        else:
            rids, diagnoses, dpis, dprs = estimate_dpis_dprs(measurements,
                                                             visits,
                                                             fitter,
                                                             phase=phase)

        print log.INFO, 'Saving DPI{0} estimations to {1}...'.format(
            '\DPR' if estimate_dprs else '', estimates_file)
        pickle.dump((rids, diagnoses, dpis, dprs, mean_min, mean_max),
                    open(estimates_file, 'wb'))

    # Reduce to consistent data sets with bl, m12 and m24 samples
    if consistent_data or select_training_set or select_test_set:
        consistent_method = 'all' if consistent_data else method
        consistent_data_handler = DataHandler.get_data_handler(
            method=consistent_method)
        consistent_measurements = consistent_data_handler.get_measurements_as_dict(
            visits=['bl', 'm12', 'm24'],
            select_training_set=select_training_set,
            select_test_set=select_test_set,
            select_complete=True,
            no_regression=True)

        consistent_rids = []
        consistent_diagnoses = []
        consistent_dpis = []
        consistent_dprs = []
        for i, rid in enumerate(rids):
            if rid in consistent_measurements:
                consistent_rids.append(rid)
                consistent_diagnoses.append(diagnoses[i])
                consistent_dpis.append(dpis[i])
                consistent_dprs.append(dprs[i])
        rids = consistent_rids
        diagnoses = consistent_diagnoses
        dpis = consistent_dpis
        dprs = consistent_dprs

        print log.RESULT, 'Selected {0} consistent subjects.'.format(len(dpis))

    # Return results
    return rids, diagnoses, dpis, dprs, mean_min, mean_max
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('-m',
                        '--method',
                        choices=DataHandler.get_method_choices(),
                        default='all',
                        help='the method to collect data for')
    parser.add_argument('-b',
                        '--biomarkers',
                        nargs='+',
                        default=None,
                        help='name of the biomarker to be plotted')
    parser.add_argument('-p',
                        '--phase',
                        default=None,
                        choices=DataHandler.get_phase_choices(),
                        help='the phase for which the model is to be trained')
    parser.add_argument('--save_plots',
                        action='store_true',
                        default=False,
                        help='save the plots with a default filename')
    args = parser.parse_args()

    # Collect data for test
    data_handler = DataHandler.get_data_handler(method=args.method,
                                                biomarkers=args.biomarkers,
                                                phase=args.phase)
    biomarkers = data_handler.get_biomarker_names()
    measurements = data_handler.get_measurements_as_dict(
        visits=['bl', 'm12'],
        biomarkers=biomarkers,
        select_training_set=True,
        select_complete=True)

    # Setup plotting folder
    eval_folder = DataHandler.make_dir(data_handler.get_eval_folder(),
                                       'quants')

    # Process all biomarkers
    for biomarker in biomarkers:
        print log.INFO, 'Generating quantile correlation plot for {0}...'.format(
            biomarker)
        model_file = data_handler.get_model_file(biomarker)
        pm = ProgressionModel(biomarker, model_file)

        q_file = os.path.join(eval_folder, '{0}.p'.format(biomarker))

        if os.path.isfile(q_file):
            (q_bl, q_m12) = pickle.load(open(q_file, 'rb'))
        else:
            q_bl = []
            q_m12 = []

            for rid in measurements:
                val_bl = measurements[rid]['bl'][biomarker]
                val_m12 = measurements[rid]['m12'][biomarker]

                p_bl = measurements[rid]['bl']['progress']
                p_m12 = measurements[rid]['m12']['progress']

                q_bl.append(pm.approximate_quantile(p_bl, val_bl))
                q_m12.append(pm.approximate_quantile(p_m12, val_m12))

            pickle.dump((q_bl, q_m12), open(q_file, 'wb'))

        # Setup plot
        fig, axs = plt.subplots(1, 2)
        plt.suptitle('Correlation between bl and m12 quantiles')

        # Plot 1
        ax = axs[0]
        pt.setup_axes(plt, ax, yspine=True)
        ax.set_xlabel('Quantile bl')
        ax.set_ylabel('Quantile m12')

        ax.scatter(q_bl, q_m12, edgecolor='none', s=25.0, alpha=0.5)

        # Plot 2
        q_bl = np.array(q_bl)
        q_m12 = np.array(q_m12)

        errors = q_bl - q_m12
        loc, scale = norm.fit(errors, floc=0.0)

        ax = axs[1]
        pt.setup_axes(plt, ax)
        ax.set_xlabel('Difference bl to m12')
        ax.set_ylabel('Probability')
        ax.set_xlim(-1.05, 1.05)
        ax.hist(errors, bins=15, normed=True, histtype='stepfilled', alpha=0.3)
        x = np.linspace(-1.0, 1.0, 100)
        ax.plot(x, norm.pdf(x, loc=loc, scale=scale), color='k')

        # Draw or save the plot
        plt.tight_layout()
        if args.save_plots:
            plot_file = os.path.join(eval_folder, '{0}.pdf'.format(biomarker))
            plt.savefig(plot_file, transparent=True)
        else:
            plt.show()
        plt.close(fig)
def plot_dpi_estimates(args, dpis, diagnoses, mean_min, mean_max):
    print log.INFO, 'Plotting estimates...'
    test_dpi_min, test_dpi_max, _ = ModelFitter.get_test_dpi_range(args.phase)
    dpi_range = float(test_dpi_max - test_dpi_min)
    dpi_factor = float(args.plot_steps) / dpi_range

    # Setup plot
    fig, ax = plt.subplots(figsize=(6, 2))
    biomarkers_str = args.method if args.biomarkers is None else ', '.join(args.biomarkers)
    ax.set_title('DP estimation using {0} at {1}'.format(biomarkers_str, ', '.join(args.visits)))
    ax.spines['left'].set_position(('outward', 10))
    ax.spines['bottom'].set_position(('outward', 10))
    ax.spines['right'].set_visible(False)
    ax.spines['top'].set_visible(False)
    ax.yaxis.set_ticks_position('left')
    ax.xaxis.set_ticks_position('bottom')

    xticks = np.linspace(0, args.plot_steps, 7)
    ax.set_xticks(xticks)
    ax.set_xticklabels([int(float(tick) / dpi_factor + test_dpi_min) for tick in xticks])

    # Compute matrix
    diagnosis_indices = {0.0: 0, 0.25: 1, 0.5: 1, 0.75: 2, 1.0: 3}
    matrix = np.zeros((4, args.plot_steps + 1))
    for dpi, diag in zip(dpis, diagnoses):
        row = diagnosis_indices[diag]
        dpi_index = round((dpi - test_dpi_min) * dpi_factor)
        matrix[row, dpi_index] += 1.0

    # Draw annotations
    dpis = np.array(dpis)
    diagnoses = np.array(diagnoses)
    medians = []
    q25 = []
    q75 = []
    for diag in [0.0, 0.25, 0.75, 1.0]:
        row = diagnosis_indices[diag]
        matrix[row] /= np.sum(matrix[row])

        indices = np.where(diagnoses == diag)
        median = np.median(dpis[indices])
        medians.append((median - test_dpi_min) * dpi_factor)
        q25.append((median - np.percentile(dpis[indices], 25)) * dpi_factor)
        q75.append((np.percentile(dpis[indices], 75) - median) * dpi_factor)

    if args.plot_lines:
        ax.set_ylim(-0.01, 0.36)

        sample_cmap = cmx.ScalarMappable(
            norm=colors.Normalize(0.0, 1.0),
            cmap=plt.get_cmap(pt.progression_cmap))
        for diag in [0.0, 0.25, 0.75, 1.0]:
            row = diagnosis_indices[diag]
            plt.plot(matrix[row], color=sample_cmap.to_rgba(diag))
    else:
        ax.set_yticks([0, 1, 2, 3])
        ax.set_yticklabels(['CN', 'EMCI', 'LMCI', 'AD'])

        cmap = plt.get_cmap('jet') if args.plot_cmap_jet else plt.get_cmap('Greys')
        bar_color = 'w' if args.plot_cmap_jet else 'r'
        plt.errorbar(medians, [0, 1, 2, 3], xerr=[q25, q75], fmt='none',
                     ecolor=bar_color, elinewidth=2,
                     capsize=4, capthick=2)
        plt.plot(medians, [0, 1, 2, 3], linestyle='', color=bar_color, marker='|', markersize=15, markeredgewidth=2)
        plt.imshow(matrix, cmap=cmap, interpolation='nearest')
    plt.axvline((mean_min - test_dpi_min) * dpi_factor, color='k', linestyle=':', alpha=0.6)
    plt.axvline((mean_max - test_dpi_min) * dpi_factor, color='k', linestyle=':', alpha=0.6)
    plt.axvline((0.0 - test_dpi_min) * dpi_factor, color='k', linestyle='-', alpha=0.6)
    if args.phase == 'joint':
        data_handler = DataHandler.get_data_handler(method=args.method, biomarkers=args.biomarkers, phase=args.phase)
        plt.axvline((data_handler.get_model_offset() - test_dpi_min) * dpi_factor, color='k', linestyle='-', alpha=0.6)

    # Draw or save the plot
    plt.tight_layout()
    if args.plot_file is not None:
        plt.savefig(args.plot_file, transparent=True)
    else:
        plt.show()
    plt.close(fig)
def plot_biomarker(data_handler, biomarker, measurements, dpi, dpr):
    """
    Plot the model of one biomarker with the fitted values

    :param data_handler: the data handler
    :param biomarker: the biomarker to plot
    :param measurements: the measurements containing the biomarker samples of one subject
    :param dpi: the estimated DPI
    :param dpr: the estimated DPR
    """
    model_file = data_handler.get_model_file(biomarker)
    if not os.path.isfile(model_file):
        print log.ERROR, 'Model file not found: {0}'.format(model_file)
        return

    print log.INFO, 'Generating plot for {0}...'.format(biomarker)

    #
    # Read model
    #
    pm = ProgressionModel(biomarker, model_file)
    progress_extrapolate = 0.3 * (pm.max_progress - pm.min_progress)
    min_progress_extrapolate = int(pm.min_progress - progress_extrapolate)
    max_progress_extrapolate = int(pm.max_progress + progress_extrapolate)
    progress_linspace_ex1 = np.linspace(min_progress_extrapolate,
                                        pm.min_progress, 20)
    progress_linspace_int = np.linspace(pm.min_progress, pm.max_progress, 60)
    progress_linspace_ex2 = np.linspace(pm.max_progress,
                                        max_progress_extrapolate, 20)

    #
    # Setup plot
    #
    biomarker_string = pt.get_biomarker_string(biomarker)
    figure_width = 6
    fig = plt.figure(figsize=(figure_width, 5))
    ax1 = plt.subplot(1, 1, 1)
    pt.setup_axes(plt, ax1, xgrid=False, ygrid=False)
    ax1.set_title(
        'Model for {0} with fitted sample values'.format(biomarker_string))
    ax1.set_xlabel('Disease progress (days before/after conversion to MCI)')
    ax1.set_ylabel(DataHandler.get_biomarker_unit(biomarker))
    ax1.set_xlim(min_progress_extrapolate, max_progress_extrapolate)

    #
    # Plot the percentile curves of the fitted model
    #
    ax1.axvline(pm.min_progress, color='0.15', linestyle=':')
    ax1.axvline(pm.max_progress, color='0.15', linestyle=':')

    quantiles = [0.1, 0.25, 0.5, 0.75, 0.9]
    grey_values = ['0.4', '0.2', '0', '0.2', '0.4']
    for grey_value, quantile in zip(grey_values, quantiles):
        curve_int = pm.get_quantile_curve(progress_linspace_int, quantile)
        ax1.plot(progress_linspace_int, curve_int, color=grey_value)

        curve_ex1 = pm.get_quantile_curve(progress_linspace_ex1, quantile)
        curve_ex2 = pm.get_quantile_curve(progress_linspace_ex2, quantile)
        ax1.plot(progress_linspace_ex1, curve_ex1, '--', color=grey_value)
        ax1.plot(progress_linspace_ex2, curve_ex2, '--', color=grey_value)

        label = 'q = {0}'.format(quantile * 100)
        ax1.text(progress_linspace_int[-1] + 100,
                 curve_int[-1],
                 label,
                 fontsize=10)

    #
    # Plot points
    #
    progr_points = []
    value_points = []
    diagn_points = []
    for visit in measurements[0]:
        if biomarker in measurements[0][visit]:
            progress = measurements[0][visit]['scantime'] * dpr + dpi
            value = measurements[0][visit][biomarker]
            progr_points.append(progress)
            value_points.append(value)
            diagn_points.append(1.0)
            ax1.axvline(progress, color='b', linestyle='--')
            ax1.text(progress + 150, value, visit, color='b', fontsize=10)

    ax1.scatter(progr_points,
                value_points,
                s=25.0,
                color='b',
                edgecolor='none',
                vmin=0.0,
                vmax=1.0,
                alpha=0.9)

    #
    # Draw or save the plot
    #
    plt.tight_layout()
    plt.show()
    plt.close(fig)