Пример #1
0
def main(settingsfname,
         verbose=False,
         store_models=True,
         store_features=False,
         save_training_detailed=False,
         load_pickled=False,
         parallel=0):

    settings = utils.get_settings(settingsfname)

    utils.print_verbose('=== Settings file   ===', flag=verbose)
    utils.print_verbose(settingsfname, flag=verbose)
    utils.print_verbose('=== Settings loaded ===', flag=verbose)
    utils.print_verbose(settings, flag=verbose)
    utils.print_verbose('=======================', flag=verbose)

    subjects = settings['SUBJECTS']

    data = utils.get_data(settings, verbose=verbose)

    metadata = utils.get_metadata()

    features_that_parsed = [
        feature for feature in settings['FEATURES']
        if feature in list(data.keys())
    ]

    settings['FEATURES'] = features_that_parsed

    if not settings['FEATURES']:
        raise EnvironmentError('No features could be loaded')

    utils.print_verbose("=====Feature HDF5s parsed=====", flag=verbose)

    model_pipe = utils.build_model_pipe(settings)

    utils.print_verbose("=== Model Used ===\n"
                        "{0}\n==================".format(model_pipe),
                        flag=verbose)

    # dictionary to store results
    subject_predictions = {}

    # dictionary to store features in
    transformed_features = {}

    # if we're loading pickled features then load them
    if load_pickled:
        if isinstance(load_pickled, str):
            with open(load_pickled, "rb") as fh:
                Xtra = pickle.load(fh)
        else:
            with open(
                    settingsfname.split(".")[0] + "_feature_dump.pickle",
                    "rb") as fh:
                Xtra = pickle.load(fh)
    else:
        Xtra = None

    # dictionary for final scores
    auc_scores = {}

    if not parallel:
        for subject in subjects:
            utils.print_verbose("=====Training {0} Model=====".format(
                str(subject)),
                                flag=verbose)

            if 'RFE' in settings:
                transformed_features, auc = utils.train_RFE(
                    settings,
                    data,
                    metadata,
                    subject,
                    model_pipe,
                    transformed_features,
                    store_models,
                    store_features,
                    load_pickled,
                    settingsfname,
                    verbose,
                    extra_data=Xtra)
                subject_predictions = None
            elif 'CUSTOM' in settings:
                results, auc = utils.train_custom_model(settings,
                                                        data,
                                                        metadata,
                                                        subject,
                                                        model_pipe,
                                                        store_models,
                                                        load_pickled,
                                                        verbose,
                                                        extra_data=Xtra)
                subject_predictions[subject] = results

            else:
                results, auc = utils.train_model(settings,
                                                 data,
                                                 metadata,
                                                 subject,
                                                 model_pipe,
                                                 store_models,
                                                 load_pickled,
                                                 verbose,
                                                 extra_data=Xtra)
                subject_predictions[subject] = results

            auc_scores.update({subject: auc})

    if parallel:
        if 'RFE' in settings:
            raise NotImplementedError('Parallel RFE is not implemented')

        else:
            output = joblib.Parallel(n_jobs=parallel)(
                joblib.delayed(utils.train_model)(settings,
                                                  data,
                                                  metadata,
                                                  subject,
                                                  model_pipe,
                                                  store_models,
                                                  load_pickled,
                                                  verbose,
                                                  extra_data=Xtra,
                                                  parallel=parallel)
                for subject in subjects)

            results = [x[0] for x in output]
            aucs = [x[1] for x in output]

        for result in results:
            subject_predictions.update(result)

        for auc in aucs:
            auc_scores.update(auc)

    if save_training_detailed:
        with open(save_training_detailed, "wb") as fh:
            pickle.dump(subject_predictions[subject], fh)

    combined_auc = utils.combined_auc_score(settings,
                                            auc_scores,
                                            subj_pred=subject_predictions)

    print(
        "predicted AUC score over all subjects: {0:.2f}".format(combined_auc))
    auc_scores.update({'all': combined_auc})
    utils.output_auc_scores(auc_scores, settings)

    return auc_scores
Пример #2
0
def main(settings_file='SETTINGS.json', verbose=False):

    # load the settings
    settings = utils.get_settings(settings_file)

    subjects = settings['SUBJECTS']
    features = settings['FEATURES']

    # load the data
    data = utils.get_data(settings)

    # load the metadata
    metadata = utils.get_metadata()

    features_that_parsed = list(data.keys())
    settings['FEATURES'] = [
        feature for feature in settings['FEATURES']
        if feature in features_that_parsed
    ]

    # check if features are 1-minute
    if all('10feat' in feature for feature in settings['FEATURES']):
        # set the flage
        minutefeatures = True
    elif not all('10feat' in feature for feature in settings['FEATURES']) and \
            any('10feat' in feature for feature in settings['FEATURES']):
        raise ValueError("Cannot mix 1-minute and 10-minute features.")
    else:
        minutefeatures = False

    # iterate over subjects
    prediction_dict = {}

    for subject in subjects:
        # load the trained model:
        model = utils.read_trained_model(subject, settings, verbose=verbose)

        # initialise the data assembler
        assembler = utils.DataAssembler(settings, data, metadata)
        # build test set
        X = assembler.build_test(subject)

        # make predictions
        predictions = model.predict_proba(X)

        # if using minute features combine the estimates
        # on each segment by averaging
        if minutefeatures:
            segmentdict = {}
            for segment, prediction in zip(assembler.test_segments,
                                           predictions):
                if segment not in segmentdict:
                    segmentdict[segment] = []
                segmentdict[segment].append(prediction)
            # gathered all predictions corresponding to a segment together
            # now average them along their columns
            for segment in assembler.test_segments:
                segmentdict[segment] = np.vstack(segmentdict[segment])
                segmentdict[segment] = np.mean(segmentdict[segment], axis=0)

        for segment, prediction in zip(assembler.test_segments, predictions):
            prediction_dict[segment] = prediction

    utils.output_csv(prediction_dict, settings, verbose=verbose)
Пример #3
0
def main(settingsfname, verbose=False, store_models=True,
         store_features=False, save_training_detailed=False,
         load_pickled=False, parallel=0):

    settings = utils.get_settings(settingsfname)

    utils.print_verbose('=== Settings file   ===', flag=verbose)
    utils.print_verbose(settingsfname, flag=verbose)
    utils.print_verbose('=== Settings loaded ===', flag=verbose)
    utils.print_verbose(settings, flag=verbose)
    utils.print_verbose('=======================', flag=verbose)

    subjects = settings['SUBJECTS']

    data = utils.get_data(settings, verbose=verbose)

    metadata = utils.get_metadata()

    features_that_parsed = [feature for feature in
                            settings['FEATURES'] if feature in list(data.keys())]

    settings['FEATURES'] = features_that_parsed

    if not settings['FEATURES']:
        raise EnvironmentError('No features could be loaded')

    utils.print_verbose("=====Feature HDF5s parsed=====", flag=verbose)

    model_pipe = utils.build_model_pipe(settings)

    utils.print_verbose("=== Model Used ===\n"
                        "{0}\n==================".format(model_pipe),
                        flag=verbose)

    # dictionary to store results
    subject_predictions = {}

    # dictionary to store features in
    transformed_features = {}

    # if we're loading pickled features then load them
    if load_pickled:
        if isinstance(load_pickled, str):
            with open(load_pickled, "rb") as fh:
                Xtra = pickle.load(fh)
        else:
            with open(settingsfname.split(".")[0]
                      + "_feature_dump.pickle", "rb") as fh:
                Xtra = pickle.load(fh)
    else:
        Xtra = None

    # dictionary for final scores
    auc_scores = {}

    if not parallel:
        for subject in subjects:
            utils.print_verbose(
                "=====Training {0} Model=====".format(str(subject)),
                                flag=verbose)

            if 'RFE' in settings:
                transformed_features, auc = utils.train_RFE(settings,
                                                            data,
                                                            metadata,
                                                            subject,
                                                            model_pipe,
                                                            transformed_features,
                                                            store_models,
                                                            store_features,
                                                            load_pickled,
                                                            settingsfname,
                                                            verbose,
                                                            extra_data=Xtra)
                subject_predictions = None
            elif 'CUSTOM' in settings:
                results, auc = utils.train_custom_model(settings,
                                                        data,
                                                        metadata,
                                                        subject,
                                                        model_pipe,
                                                        store_models,
                                                        load_pickled,
                                                        verbose,
                                                        extra_data=Xtra)
                subject_predictions[subject] = results

            else:
                results, auc = utils.train_model(settings,
                                                 data,
                                                 metadata,
                                                 subject,
                                                 model_pipe,
                                                 store_models,
                                                 load_pickled,
                                                 verbose,
                                                 extra_data=Xtra)
                subject_predictions[subject] = results

            auc_scores.update({subject: auc})

    if parallel:
        if 'RFE' in settings:
            raise NotImplementedError('Parallel RFE is not implemented')

        else:
            output = joblib.Parallel(n_jobs=parallel)(
                joblib.delayed(utils.train_model)(settings,
                                                  data,
                                                  metadata,
                                                  subject,
                                                  model_pipe,
                                                  store_models,
                                                  load_pickled,
                                                  verbose,
                                                  extra_data=Xtra,
                                                  parallel=parallel)
                                                      for subject in subjects)

            results = [x[0] for x in output]
            aucs = [x[1] for x in output]

        for result in results:
            subject_predictions.update(result)

        for auc in aucs:
            auc_scores.update(auc)

    if save_training_detailed:
        with open(save_training_detailed, "wb") as fh:
            pickle.dump(subject_predictions[subject], fh)

    combined_auc = utils.combined_auc_score(settings,
                                            auc_scores,
                                            subj_pred=subject_predictions)

    print(
        "predicted AUC score over all subjects: {0:.2f}".format(combined_auc))
    auc_scores.update({'all': combined_auc})
    utils.output_auc_scores(auc_scores, settings)

    return auc_scores