Python Utilities.get_f_ranking_from_predictions示例

编程语言: Python

类/类型: Utilities

方法/功能: get_f_ranking_from_predictions

hotexamples.com的示例: 2

Python Utilities.get_f_ranking_from_predictions - 已找到2个示例。这些是从开源项目中提取的最受好评的Utilities.get_f_ranking_from_predictions 来自程序包 facebook_page_scraper现实Python示例。您可以评价示例，以帮助我们提高示例质量。

常用方法

显示隐藏

format_CIK(13)

write_result_to_file(10)

AverageMeter(9)

mkdir(9)

tokenizeFile(7)

init_distribution(7)

get_err_from_predict(7)

sanitize_filing_year(6)

failExecution(6)

setup_db(5)

printFrequencies(5)

connect(5)

replace_zero_label_with_neg_one(5)

insert_entries(5)

pre_compute_threshes(4)

getIndentSize(4)

print_to_file(3)

findNextNonWhiteSpaceCharIndex(3)

getPyQt4ModulesDirectory(3)

get_alpha_numeric_count(3)

pwDecode(3)

get_random_name(3)

get_suffix(3)

printInfo(3)

rmTree(3)

is_CIK_valid(3)

parse_time(3)

isInsideTextLiteral(3)

get_prefix(3)

check_number(3)

check_capital(3)

check_bar(3)

get_f_ranking_from_predictions(2)

from_dungeon_level(2)

rot_center(2)

pwEncode(2)

get_auc_from_predict(2)

BhattacharyaCoeff(2)

checkBlacklistedVersions(2)

maximalElements(2)

collateFrequencies(2)

chat(2)

pre_compute_threshes_uci(2)

pre_compute_threshes_8news(2)

character_counter(2)

isSubList(2)

is_inside_frustum(2)

listMerge(2)

loadAll(2)

normalise_plurk_id(2)

示例#1

显示文件

文件： PB1_B_polluted_data.py 项目： Juncai/CS6140

def main():
    st = time.time()
    # training parameter
    round_limit = 15
    result_path = 'results/PB1_B_spam_2.acc'
    model_name = 'spam_'
    model_path = result_path + '.model'
    threshes_path = 'data/spambase_polluted.threshes'
    train_data_path = 'data/spam_polluted/train/data.pickle'
    test_data_path = 'data/spam_polluted/test/data.pickle'

    # laod and preprocess training data
    tr_data = loader.load_pickle_file(train_data_path)
    te_data = loader.load_pickle_file(test_data_path)
    print('{:.2f} Data loaded!'.format(time.time() - st))
    # TODO convert labels from {0, 1} to {-1, 1}
    util.replace_zero_label_with_neg_one(tr_data)
    util.replace_zero_label_with_neg_one(te_data)
    print('{:.2f} Label converted!'.format(time.time() - st))

    # load thresholds
    threshes = loader.load_pickle_file(threshes_path)
    print('{:.2f} Thresholds loaded!'.format(time.time() - st))
    # start training
    training_errs = []
    testing_errs = []
    round_err_1st_boost = None
    tr_errs_1st_boost = None
    te_errs_1st_boost = None
    te_auc_1st_boost = None
    te_roc_1st_boost = None
    ranked_f = None
    roc = []
    auc = 0.0
    thresh_cs = None

    tr_n, f_d = np.shape(tr_data[0])
    te_n, = np.shape(te_data[1])
    # TODO prepare distribution
    d = util.init_distribution(len(tr_data[0]))

    # TODO compute thresholds cheat sheet (not a solution due to huge thresh_cs table)
    # thresh_cs = util.pre_compute_threshes(tr_data[0], tr_data[1], threshes)
    # print('{:.2f} Thresholds cheat sheet computed!'.format(time.time() - st))

    boost = b.Boosting(d)
    testing_predict = np.zeros((1, te_n)).tolist()[0]
    training_predict = np.zeros((1, tr_n)).tolist()[0]
    round_tr_err = []
    round_te_err = []
    round_model_err = []
    round_te_auc = []
    converged = False
    tol = 1e-5
    te_auc = 2.
    round = 0
    while round < round_limit:  # and not converged:
        round += 1
        boost.add_model(ds.DecisionStump, tr_data[0], tr_data[1], threshes, thresh_cs)
        boost.update_predict(tr_data[0], training_predict)
        boost.update_predict(te_data[0], testing_predict)
        c_model_err = boost.model[-1].w_err
        round_model_err.append(c_model_err)
        c_f_ind = boost.model[-1].f_ind
        c_thresh = boost.model[-1].thresh
        c_tr_err = util.get_err_from_predict(training_predict, tr_data[1])
        c_te_err = util.get_err_from_predict(testing_predict, te_data[1])
        # TODO calculate the AUC for testing results
        # c_te_auc = util.get_auc_from_predict(testing_predict, te_data[1])
        round_tr_err.append(c_tr_err)
        round_te_err.append(c_te_err)
        # round_te_auc.append(c_te_auc)
        print('{:.2f} Round: {} Feature: {} Threshold: {} Round_err: {:.12f} Train_err: {:.12f} Test_err {:.12f} AUC {:.12f}'.format(time.time() - st, round, c_f_ind, c_thresh, c_model_err, c_tr_err, c_te_err, 0))
        # converged =  abs(c_te_auc - te_auc) / te_auc <= tol
        # te_auc = c_te_auc

    training_errs.append(round_tr_err[-1])
    testing_errs.append(round_te_err[-1])
    # TODO get feature ranking from the predictions
    ranked_f = util.get_f_ranking_from_predictions(boost, threshes)
    round_err_1st_boost = round_model_err
    tr_errs_1st_boost = round_tr_err
    te_errs_1st_boost = round_te_err
    # te_auc_1st_boost = round_te_auc

    # _, te_roc_1st_boost = util.get_auc_from_predict(testing_predict, te_data[1], True)

        # break      # for testing

    mean_training_err = np.mean(training_errs)
    mean_testing_err = np.mean(testing_errs)

    print('Final results. Mean Train err: {}, Mean Test err: {}'.format(mean_training_err, mean_testing_err))
    print('Top 10 features: ')
    # print(ranked_f[:10])

    result = {}
    result['Trainingerrs'] = training_errs
    result['MeanTrainingAcc'] = mean_training_err
    result['Testingerrs'] = testing_errs
    result['MeanTestingAcc'] = mean_testing_err
    result['1stBoostTrainingError'] = tr_errs_1st_boost
    result['1stBoostTestingError'] = te_errs_1st_boost
    result['1stBoostModelError'] = round_err_1st_boost
    result['1stBoostTestingAUC'] = te_auc_1st_boost
    result['1stBoostTestingROC'] = te_roc_1st_boost
    result['rankedFeatures'] = ranked_f

    # result['ROC'] = str(roc)
    result['AUC'] = auc

    # store the model
    loader.save(model_path, boost)
    # log the training result to file
    util.write_result_to_file(result_path, model_name, result, True)

示例#2

显示文件

文件： PB1_A_feature_analysis.py 项目： Juncai/CS6140

def main():
    # training parameter
    k = 10  # fold
    round_limit = 300
    result_path = 'results/PB1_A_spam_final.acc'
    model_name = 'spam_' + str(k) + 'fold'
    threshes_path = 'data/spambase.threshes'
    data_path = 'data/spam/data.pickle'

    # laod and preprocess training data
    training_data = loader.load_pickle_file(data_path)
    # TODO convert labels from {0, 1} to {-1, 1}
    util.replace_zero_label_with_neg_one(training_data)

    # load thresholds
    threshes = loader.load_pickle_file(threshes_path)

    # start training
    training_errs = []
    testing_errs = []
    round_err_1st_boost = None
    tr_errs_1st_boost = None
    te_errs_1st_boost = None
    te_auc_1st_boost = None
    te_roc_1st_boost = None
    ranked_f = None
    roc = []
    auc = 0.0

    tr_data = training_data
    tr_n, f_d = np.shape(tr_data[0])
    # TODO prepare distribution
    d = util.init_distribution(len(tr_data[0]))
    # TODO compute thresholds cheat sheet
    thresh_cs = util.pre_compute_threshes(tr_data[0], tr_data[1], threshes)
    boost = b.Boosting(d)
    training_predict = np.zeros((1, tr_n)).tolist()[0]
    round_tr_err = []
    round_te_err = []
    round_model_err = []
    round = 0
    while round < round_limit:  # and not converged:
        round += 1
        boost.add_model(ds.DecisionStump, tr_data[0], tr_data[1], threshes, thresh_cs)
        boost.update_predict(tr_data[0], training_predict)
        c_model_err = boost.model[-1].w_err
        round_model_err.append(c_model_err)
        c_f_ind = boost.model[-1].f_ind
        c_thresh = boost.model[-1].thresh
        c_tr_err = util.get_err_from_predict(training_predict, tr_data[1])
        # TODO calculate the AUC for testing results
        round_tr_err.append(c_tr_err)
        print('Round: {} Feature: {} Threshold: {} Round_err: {:.12f} Train_err: {:.12f} Test_err {:.12f} AUC {:.12f}'.format(round, c_f_ind, c_thresh, c_model_err, c_tr_err, 0, 0))

        training_errs.append(round_tr_err[-1])
    ranked_f = util.get_f_ranking_from_predictions(boost, threshes)


        # break      # for testing

    mean_training_err = np.mean(training_errs)

    print('Final results. Mean Train err: {}, Mean Test err: {}'.format(mean_training_err, 0))
    print('Top 10 features: ')
    print(ranked_f[:10])

    result = {}
    result['Fold'] = k
    result['Trainingerrs'] = training_errs
    result['MeanTrainingAcc'] = mean_training_err
    result['Testingerrs'] = testing_errs
    result['1stBoostTrainingError'] = tr_errs_1st_boost
    result['1stBoostTestingError'] = te_errs_1st_boost
    result['1stBoostModelError'] = round_err_1st_boost
    result['1stBoostTestingAUC'] = te_auc_1st_boost
    result['1stBoostTestingROC'] = te_roc_1st_boost
    result['rankedFeatures'] = ranked_f

    # result['ROC'] = str(roc)
    result['AUC'] = auc

    # log the training result to file
    util.write_result_to_file(result_path, model_name, result, True)