Exemplo n.º 1
0
def run_multiple_cross_validation(feature: CandidateFeature,
                                  splitted_values_train, splitted_target_train,
                                  parameters, model, score):

    #try:
    X_train = splitted_values_train
    y_train = splitted_target_train

    pipeline = generate_pipeline(feature, model)
    #pipeline = generate_smote_pipeline(feature, model)

    multiple_cv_score = []

    multiple_cv_complexity_score = []

    hyperparameters2count = {}

    #print(str(feature) + ' before: ' + str(feature.runtime_properties['hyperparameters']))

    for m_i in range(len(nested_my_globale_module.model_seeds)):
        preprocessed_folds = []
        for train, test in StratifiedKFold(
                n_splits=len(nested_my_globale_module.splitting_seeds),
                shuffle=True,
                random_state=nested_my_globale_module.splitting_seeds[m_i]
        ).split(splitted_values_train, splitted_target_train):
            preprocessed_folds.append((train, test))

        #replace parameter keys
        new_parameters = copy.deepcopy(parameters)
        new_parameters['random_state'] = [
            int(nested_my_globale_module.model_seeds[m_i])
        ]
        old_keys = list(new_parameters.keys())
        for k in old_keys:
            if not str(k).startswith('c__'):
                new_parameters['c__' + str(k)] = new_parameters.pop(k)

        scoring = {
            'accuracy':
            score,
            'complexity':
            make_scorer(customAICc,
                        greater_is_better=False,
                        needs_proba=True,
                        k=feature.get_complexity())
        }

        cv = GridSearchCV(pipeline,
                          param_grid=new_parameters,
                          scoring=scoring,
                          cv=preprocessed_folds,
                          refit='accuracy')
        cv.fit(X_train, y_train)
        multiple_cv_score.append(cv.best_score_)

        multiple_cv_complexity_score.append(
            cv.cv_results_['mean_test_complexity'][cv.best_index_])

        if not hashabledict(cv.best_params_) in hyperparameters2count:
            hyperparameters2count[hashabledict(cv.best_params_)] = 0
        hyperparameters2count[hashabledict(cv.best_params_)] += 1
        '''
			new_parameters = copy.deepcopy(feature.runtime_properties['hyperparameters'])
			new_parameters['random_state'] = int(nested_my_globale_module.model_seeds[m_i])
			old_keys = list(new_parameters.keys())
			for k in old_keys:
				if not str(k).startswith('c__'):
					new_parameters['c__' + str(k)] = new_parameters.pop(k)

			pipeline.set_params(**new_parameters)

			cv_results = cross_validate(pipeline, X_train, y_train, scoring=score, cv=preprocessed_folds)
			multiple_cv_score.append(np.mean(cv_results['test_score']))
			'''

    feature.runtime_properties['hyperparameters'] = max(
        hyperparameters2count.items(), key=operator.itemgetter(1))[0]

    new_parameters = copy.deepcopy(
        feature.runtime_properties['hyperparameters'])
    old_keys = list(new_parameters.keys())
    for k in old_keys:
        if str(k).startswith('c__'):
            new_parameters[str(k[3:])] = new_parameters.pop(k)
    feature.runtime_properties['hyperparameters'] = new_parameters

    print(
        str(feature) + ' AICc: ' + str(np.mean(multiple_cv_complexity_score)))

    #print(str(feature) + ' after: ' + str(feature.runtime_properties['hyperparameters']))

    return np.mean(multiple_cv_score), np.std(multiple_cv_score)
Exemplo n.º 2
0
def calc_score(c: CandidateFeature):
	return harmonic_mean(c.runtime_properties['score'] ** 2, (1 / float(c.get_complexity())))