np.random.seed(2019) if __name__ == '__main__': dataset_train, dataset_test = handout.get_text_classification_datasets() categories = dataset_train.target_names # training data and labels training_data = (dataset_train.data) training_labels = np.array((dataset_train.target)) clean_training_data = utils.clean_dataset(training_data) mapping_dict = utils.build_mapping_dict(clean_training_data) feature_vector = utils.data2vec(clean_training_data, mapping_dict) print(len(feature_vector[0])) # build model softmax_model = model.Softmax_CrossEntropy_model(class_num=len(categories), feature_length=feature_vector.shape[1], learning_rate=learning_rate, regularization_rate=regularization_rate) present_epoch = 0 example_num = len(feature_vector) step = 0 # initial auto_terminate import queue loss_dif_queue = queue.Queue() for i in range(observe_dif_times):
# 768*5=3840维特征 from utils import load_data, data2vec import pandas as pd from keras.layers import LSTM from keras.models import Sequential from keras.layers.core import Dense, Dropout, Activation from keras.callbacks import Callback import numpy as np from sklearn.metrics import f1_score, recall_score, precision_score train_data, valid_data, test_data = load_data( r"D:\learning\competition\人岗匹配\human_co_match\human_co_match\data\text_data\\" ) data = pd.concat([train_data, valid_data, test_data]) print(data.columns) # txt2vec & padding person_list = data2vec(data, "求职者文本内容") intent_list = data2vec(data, "投递意向文本内容") work_exp_list = data2vec(data, "工作经历文本内容") cert_list = data2vec(data, "证书文本内容") project_exp_list = data2vec(data, "项目经历文本内容") job_desc_list = data2vec(data, "岗位文本内容") y_list = data["标签"].values # data split train_idx, valid_idx = train_data.shape[ 0], train_data.shape[0] + valid_data.shape[0] person_train, person_eval, person_test = person_list[:train_idx], \ person_list[train_idx:valid_idx], \ person_list[valid_idx:] intent_train, intent_eval, intent_test = intent_list[:train_idx], \ intent_list[train_idx:valid_idx], \