Пример #1
0
def run_dummy_baseline(set_name, attribute):
    '''
    Runs a simple baseline that always predicts the most frequent value for a given attribute.
    :param set_name: the name of the dataset to run on
    :param attribute: the attribute to predict
    :return: a score reflecting the performance
    '''
    set_spec_dict = get_default_set_spec_dict()
    set_spec = set_spec_dict[set_name]
    class_values = problem_legal_values[attribute] if attribute in problem_legal_values else None
    patients = get_data_for_spec(set_spec, loader_type='bag', attribute_to_filter=attribute,
                                 legal_attribute_values=class_values,
                                 muscles_to_use=None)

    y_true = [patient.attributes[attribute] for patient in patients]

    kind = problem_kind[attribute]

    if kind == 'regression':
        d = DummyRegressor(strategy='mean')
        scorer = mean_absolute_error

    else:
        d = DummyClassifier(strategy='most_frequent')
        scorer = classification_report

    d.fit([0] * len(y_true), y_true)
    train_preds = d.predict([0] * len(y_true))
    if kind != 'regression':
        mapping = {'NMD': 1, 'no NMD': 0}
        y_true_rv = [mapping[y] for y in y_true]
        train_preds_rv = [mapping[y] for y in train_preds]
        print(roc_auc_score(y_true_rv, train_preds_rv))

    return scorer(y_true, train_preds)
Пример #2
0
def export_selected_records(set_name):
    '''For the patients in this set, get the relevant record for each and then store them all in one DataFrame.'''
    set_spec_dict = get_default_set_spec_dict()
    set_spec = set_spec_dict[set_name]

    patients = get_data_for_spec(set_spec, loader_type='bag', attribute_to_filter='Class',
                                 legal_attribute_values=problem_legal_values['Class'],
                                 muscles_to_use=None)
    info_dicts = []
    for patient in patients:
        patient.select_closest()
        record = patient.get_selected_record()
        info_dicts.append(record.meta_info)
    return pd.DataFrame(info_dicts)
Пример #3
0
def extract_y_true(set_name):
    '''Extract ground truth NMD diagnosis values for each patient in this set.'''
    set_spec_dict = get_default_set_spec_dict()
    set_spec = set_spec_dict[set_name]
    patients = get_data_for_spec(set_spec, loader_type='bag', attribute_to_filter='Class',
                                 legal_attribute_values=problem_legal_values['Class'],
                                 muscles_to_use=None)
    y_true = []
    meta_infos = []

    for patient in patients:
        y_true.append(patient.attributes['Class'])
        meta_infos.append(patient.attributes)

    mapping = {'NMD': 1, 'no NMD': 0}
    y_true_rv = [mapping[y] for y in y_true]

    return y_true_rv, pd.DataFrame(meta_infos)
Пример #4
0
def compute_brightness(set_name, device_name):
    """A method for computing the average brightness of a set of images. """
    att_spec_dict = make_att_specs()
    set_spec_dict = get_default_set_spec_dict()
    # e.g. "ESAOTE_6100_train"
    set_spec = set_spec_dict[set_name]
    images = get_data_for_spec(set_spec, loader_type='image', dropna_values=False)
    # e.g. "ESAOTE_6100"
    transform = make_basic_transform(device_name, limit_image_size=False, to_tensor=True)

    ds = SingleImageDataset(image_frame=images, root_dir=set_spec.img_root_path, attribute_specs=[att_spec_dict['Sex']],
                            return_attribute_dict=False, transform=transform,
                            use_one_channel=True)


    mean, std  = compute_normalization_parameters(ds, 1)

    print(mean)
    print(std)
Пример #5
0
def run_rule_based_baseline(set_name, ei_extraction_method):
    """
    Get the rule-based prediction for each patient in this set.
    :param set_name: The data set to use.
    :param ei_extraction_method: The method to use for adjust EIZ scores.
    :return: Rule-based predictions.
    """
    set_spec_dict = get_default_set_spec_dict()
    set_spec = set_spec_dict[set_name]
    patients = get_data_for_spec(
        set_spec,
        loader_type='bag',
        attribute_to_filter='Class',
        legal_attribute_values=problem_legal_values['Class'],
        muscles_to_use=None)

    preds = []
    for patient in tqdm.tqdm(patients):
        patient.try_closest_fallback_to_latest()
        record = patient.get_selected_record()

        eiz = ei_extraction_method(record)['EIZ']

        feature_rep = get_feature_rep_for_rule_based(eiz, record)
        pred = predict_rule_based(feature_rep)
        preds.append(pred)

    preds = pd.Series(preds)
    # the rule-based model can also predict uncertain disease state, map this to 0.5 to allow an
    # additional threshold during ROC computation.
    y_proba_rv = preds.replace({
        'NMD': 1,
        'no NMD': 0,
        'unknown or uncertain': 0.5
    }).values

    return y_proba_rv
Пример #6
0
def obtain_feature_rep_ml_experiment(set_name,
                                     use_eiz=True,
                                     ei_extraction_method=None,
                                     additional_features=None):
    '''
    A method that maps the entire provided set into the feature representation used for the Trad ML experiments.
    :param set_name: The name of the dataset to be mapped.
    :param use_eiz: Use EIZ scores? If false, use raw EI scores.
    :param ei_extraction_method: The method for extracting EI scores from records. Can use original scores or recompute.
    :param additional_features: Additional demographic features to be included (extracted from the records)
    :return: A DataFrame of mapped patient records for classification.
    '''

    # use the original scores as default
    if not ei_extraction_method:
        ei_extraction_method = partial(get_original_scores)

    set_spec_dict = get_default_set_spec_dict()
    set_spec = set_spec_dict[set_name]
    patients = get_data_for_spec(
        set_spec,
        loader_type='bag',
        attribute_to_filter='Class',
        legal_attribute_values=problem_legal_values['Class'],
        muscles_to_use=None)
    feature_reps = []
    if not additional_features:
        additional_features = []

    for patient in patients:
        patient.try_closest_fallback_to_latest()
        record = patient.get_selected_record()
        # allow swapping between z_scores and EI values
        return_dict = ei_extraction_method(record)
        if use_eiz:
            if 'EIZ' not in return_dict:
                raise ValueError(
                    f'Required z-score computation, but method {ei_extraction_method} did not provide it.'
                )
            vector = return_dict['EIZ']
            prefix = 'EIZ'
        else:
            vector = return_dict['EI']
            prefix = 'EI'
        # drop all na vectors
        vector = vector[~np.isnan(vector)]
        if len(vector) == 0:
            continue
        # optionally smooth using different bins
        smoothed_vectors = {}
        smoothing_factors = []
        for smoothing_factor in smoothing_factors:
            smoothed_vectors['smoothed_' +
                             str(smoothing_factor)] = smooth_vector(
                                 vector, (0, 256), smoothing_factor)
        # always use the original scale
        smoothed_vectors['base'] = vector

        feature_rep = {}
        # additionally filter
        filtered_frame = extractor_frame  #[extractor_frame['scale_inv']]
        # feature extraction starts here
        for smoothing_name, smoothed_vector in smoothed_vectors.items():
            for i, row in filtered_frame.iterrows():
                func = row['func']
                if smoothing_name == 'base':
                    name = prefix + '_' + row['name']
                else:
                    name = prefix + '_' + row['name'] + '_' + smoothing_name
                value = func(smoothed_vector)
                feature_rep[name] = value

        demographic_features = extract_features_from_meta_info(
            record, additional_features)

        feature_rep = {**feature_rep, **demographic_features}

        feature_rep['Class'] = record.meta_info['Class']
        # color = 'r' if feature_rep['Class'] == 'NMD' else 'b'
        #  plt.hist(ei, 5, (0, 150), color=color)
        plt.show()
        feature_reps.append(feature_rep)
    feature_frame = pd.DataFrame(feature_reps)
    return feature_frame
import os

import itk
import numpy

from utils.experiment_utils import get_default_set_spec_dict
from loading.loaders import get_data_for_spec, make_bag_dataset, make_basic_transform
from loading.datasets import make_att_specs, PatientBagDataset
from loading.datasets import problem_legal_values
if __name__ == '__main__':
    set_name = 'ESAOTE_6100_val'
    set_spec_dict = get_default_set_spec_dict()
    set_spec = set_spec_dict[set_name]
    patients = get_data_for_spec(
        set_spec,
        loader_type='bag',
        attribute_to_filter='Class',
        legal_attribute_values=problem_legal_values['Class'],
        muscles_to_use=None)

    att_spec_dict = make_att_specs()

    transform = make_basic_transform(set_spec.device,
                                     normalizer_name=None,
                                     to_tensor=False,
                                     limit_image_size=True)

    ds = PatientBagDataset(patient_list=patients,
                           root_dir=set_spec.img_root_path,
                           attribute_specs=[att_spec_dict['Sex']],
                           transform=transform,
                           use_pseudopatients=False,