示例#1
0
def prepare_and_run(subjects, classes, out_dir, options_path,
                    user_feature_paths, user_feature_type, fs_subject_dir,
                    train_perc, num_rep_cv, positive_class, sub_group_list,
                    feature_selection_size, num_procs, grid_search_level,
                    classifier, feat_select_method):
    "Organizes the inputs and prepares them for CV"

    feature_dir, method_list = make_method_list(fs_subject_dir,
                                                user_feature_paths,
                                                user_feature_type)

    method_names, dataset_paths_file = import_datasets(method_list, out_dir,
                                                       subjects, classes,
                                                       feature_dir,
                                                       user_feature_type)

    print('Requested processing for the following subgroups:'
          '\n{}\n'.format('\n'.join([','.join(sg) for sg in sub_group_list])))

    # iterating through the given set of subgroups
    num_sg = len(sub_group_list)
    for sgi, sub_group in enumerate(sub_group_list):
        print('{}\nProcessing subgroup : {} ({}/{})'
              '\n{}'.format('-' * 80, ','.join(sub_group), sgi + 1, num_sg,
                            '-' * 80))
        out_dir_sg = pjoin(out_dir, sub_group_identifier(sub_group))
        results_file_path = rhst.run(dataset_paths_file,
                                     method_names,
                                     out_dir_sg,
                                     train_perc=train_perc,
                                     num_repetitions=num_rep_cv,
                                     positive_class=positive_class,
                                     sub_group=sub_group,
                                     feat_sel_size=feature_selection_size,
                                     num_procs=num_procs,
                                     grid_search_level=grid_search_level,
                                     classifier_name=classifier,
                                     feat_select_method=feat_select_method,
                                     options_path=options_path)

        print('\n\nSaving the visualizations to \n{}'.format(out_dir))
        make_visualizations(results_file_path, out_dir_sg, options_path)
        print('\n')

    return
示例#2
0
def load_results_from_folder(results_folder):
    """

    Given a base output folder, possibly containing results for multiple sub-groups,
        returns a dictionary of results, keyed in by sub group identifier.

    """

    results = dict()
    options = load_options(results_folder)
    for ix, sg in enumerate(options['sub_groups']):
        sg_id = sub_group_identifier(sg, ix)
        results_file_path = pjoin(results_folder, sg_id, cfg.file_name_results)
        if not pexists(
                results_file_path) or os.path.getsize(results_file_path) <= 0:
            raise IOError('Results file for sub group {} does not exist'
                          ' or is empty!'.format(sg_id))
        results[sg_id] = load_results_dict(results_file_path)

    return results
示例#3
0
def cli():
    """ Main entry point. """

    print('\nneuropredict version {} for Classification'.format(__version__))
    from datetime import datetime
    init_time = datetime.now()
    print('\tTime stamp : {}\n'.format(
        init_time.strftime('%Y-%m-%d %H:%M:%S')))

    subjects, classes, out_dir, options_path, user_feature_paths, \
    user_feature_type, fs_subject_dir, train_perc, num_rep_cv, positive_class, \
    sub_group_list, feature_selection_size, impute_strategy, num_procs, \
    grid_search_level, classifier, feat_select_method, covar_list, covar_method = \
        parse_args()

    feature_dir, method_list = make_method_list(fs_subject_dir,
                                                user_feature_paths,
                                                user_feature_type)
    # noinspection PyTupleAssignmentBalance
    method_names, outpath_list = import_datasets(method_list, out_dir,
                                                 subjects, classes,
                                                 feature_dir,
                                                 user_feature_type)

    print('Requested processing for the following subgroups:'
          '\n{}\n'.format('\n'.join([','.join(sg) for sg in sub_group_list])))

    # iterating through the given set of subgroups
    num_sg = len(sub_group_list)
    result_paths = dict()
    for sgi, sub_group in enumerate(sub_group_list):
        print('{line}\nProcessing subgroup : {id_} ({idx}/{cnt})\n{line}'
              ''.format(line='-' * 80,
                        id_=','.join(sub_group),
                        idx=sgi + 1,
                        cnt=num_sg))
        sub_group_id = sub_group_identifier(sub_group, sg_index=sgi + 1)
        out_dir_sg = pjoin(out_dir, sub_group_id)

        multi_ds = load_datasets(outpath_list,
                                 task_type='classify',
                                 subgroup=sub_group,
                                 name=sub_group_id)

        covariates, deconfounder = check_covariates(multi_ds, covar_list,
                                                    covar_method)

        print(multi_ds)
        impute_strategy = detect_missing_data(multi_ds, impute_strategy)

        clf_expt = ClassificationWorkflow(
            datasets=multi_ds,
            pred_model=classifier,
            impute_strategy=impute_strategy,
            dim_red_method=feat_select_method,
            covariates=covariates,
            deconfounder=deconfounder,
            reduced_dim=feature_selection_size,
            train_perc=train_perc,
            num_rep_cv=num_rep_cv,
            scoring=cfg.default_metric_set_classification,
            positive_class=positive_class,
            grid_search_level=grid_search_level,
            out_dir=out_dir_sg,
            num_procs=num_procs,
            user_options=options_path,
            checkpointing=cfg.default_checkpointing)

        result_paths[sub_group_id] = clf_expt.run()

    timedelta = datetime.now() - init_time
    print('All done. Elapsed time: {} HH:MM:SS\n'.format(timedelta))

    return result_paths