def train_test(): X_features, y = create_training_vectors() n_folds = 10 kf = StratifiedKFold(n_splits=n_folds, shuffle=True) kf.get_n_splits(X_features, y) total_acc, total_pre, total_recall, total_macro_f1, total_micro_f1 = [], [], [], [], [] for train_index, test_index in kf.split(X_features, y): print("TRAIN:", train_index, "TEST:", test_index) X_train, X_test = X_features[train_index], X_features[test_index] y_train, y_test = y[train_index], y[test_index] acc, pre, recall, macro_f1, micro_f1 = classify(train_X=X_train, train_y=y_train, test_X=X_test, test_y=y_test) total_acc.append(acc) total_pre.append(pre) total_recall.append(recall) total_macro_f1.append(macro_f1) total_micro_f1.append(micro_f1) del X_train, X_test, y_train, y_test print("======================") print("avg acc:", np.mean(total_acc)) print("avg pre:", np.mean(total_pre)) print("avg recall:", np.mean(total_recall)) print("avg macro_f1:", np.mean(total_macro_f1)) print("avg micro_f1:", np.mean(total_micro_f1)) print("======================")
def train_test(): contents, y = create_training_content() tfidf = MYTFIDF() tfidf.tfidf_model_path = "/media/iiip/Elements/数据集/user_profiling/weibo/cache/tfidf_model.m" X_features = tfidf.get_tfidf_vector(corpus=contents) print(np.shape(X_features)) del contents n_folds = 10 kf = StratifiedKFold(n_splits=n_folds, shuffle=True) kf.get_n_splits(X_features, y) total_acc, total_pre, total_recall, total_macro_f1, total_micro_f1 = [], [], [], [], [] for train_index, test_index in kf.split(X_features, y): print("TRAIN:", train_index, "TEST:", test_index) X_train, X_test = X_features[train_index], X_features[test_index] y_train, y_test = y[train_index], y[test_index] acc, pre, recall, macro_f1, micro_f1 = classify(train_X=X_train, train_y=y_train, test_X=X_test, test_y=y_test) total_acc.append(acc) total_pre.append(pre) total_recall.append(recall) total_macro_f1.append(macro_f1) total_micro_f1.append(micro_f1) del X_train, X_test, y_train, y_test print("======================") print("avg acc:", np.mean(total_acc)) print("avg pre:", np.mean(total_pre)) print("avg recall:", np.mean(total_recall)) print("avg macro_f1:", np.mean(total_macro_f1)) print("avg micro_f1:", np.mean(total_micro_f1)) print("======================")
def train_chi2_model(): contents, y = create_training_content() contents = np.array(contents) chi2 = MYCHI2() chi2.cv_model_path = "/media/iiip/Elements/数据集/user_profiling/weibo/cache/cv_model.m" chi2.chi2_model_path = "/media/iiip/Elements/数据集/user_profiling/weibo/cache/chi2_model.m" n_folds = 10 kf = StratifiedKFold(n_splits=n_folds, shuffle=True) kf.get_n_splits(contents, y) total_acc, total_pre, total_recall, total_macro_f1, total_micro_f1 = [], [], [], [], [] for train_index, test_index in kf.split(contents, y): print("TRAIN:", train_index, "TEST:", test_index) X_train, X_test = contents[train_index], contents[test_index] y_train, y_test = y[train_index], y[test_index] chi2.train_chi2(X_train, y_train) X_chi2_train = chi2.get_chi2_vector(X_train) X_chi2_test = chi2.get_chi2_vector(X_test) del X_train, X_test acc, pre, recall, macro_f1, micro_f1 = classify(train_X=X_chi2_train, train_y=y_train, test_X=X_chi2_test, test_y=y_test) total_acc.append(acc) total_pre.append(pre) total_recall.append(recall) total_macro_f1.append(macro_f1) total_micro_f1.append(micro_f1) del X_chi2_train, X_chi2_test, y_train, y_test print("======================") print("avg acc:", np.mean(total_acc)) print("avg pre:", np.mean(total_pre)) print("avg recall:", np.mean(total_recall)) print("avg macro_f1:", np.mean(total_macro_f1)) print("avg micro_f1:", np.mean(total_micro_f1)) print("======================")