def test_demo(): dataa = of.read_txt_and_deal(path_words_deal) sents_train_deal = list() for s in dataa: nr1 = s[0] nr2 = s[1] x = s[2] x = pre.hide_nr(x, nr1, nr2) words = jieba.lcut(x) # word_str = ' '.join(words) sents_train_deal.append(words) sents_train_deal.append(jieba.lcut('胡挺和胡磊结婚了')) sents_train_deal.append(jieba.lcut('摩拜单车被美团收购了,由美图经营')) for s in dataa: if len(s) != 4: print(s) bags_train_deal = [x[3] for x in dataa] bags_train_deal.append(1) bags_train_deal.append(0) data, labels, tokenizer = cnn.fit_tokenizer(sents_train_deal, bags_train_deal) data_test, labels_test = cnn.deal_data(tokenizer, sents_train_deal, bags_train_deal) model = cnn.fit_model(data[:-2], labels[:-2], tokenizer) cnn.evaluate_model(model, data_test, labels_test, bags_train_deal)
def train_test(): train = of.read_txt_and_deal(CONFIG.PATH_TRAIN) train = pre.hide_nr_demo(train) sent_train_deal = [x[2] for x in train] bags_train_deal = [x[3] for x in train] data, labels, tokenizer = cnn.fit_tokenizer(sent_train_deal, bags_train_deal) x_train, y_train, x_test, y_test = cnn.split_data(data, labels) # train_word2vec.word2vec_train(sent_train_deal) # data_test, labels_test = cnn.deal_data(tokenizer, x_test, y_test) model = cnn.fit_model(x_train, y_train, tokenizer) cnn.evaluate_model(model, x_test, y_test)
def train_test(): sent_train = of.read_txt_and_deal(CONFIG.PATH_TRAIN_SENT) bags_train = of.read_txt_and_deal(CONFIG.PATH_TRAIN_BAG) # sent_train, bags_train = pre.delete_line(sent_train, bags_train, 5000) sent_test = of.read_txt_and_deal(CONFIG.PATH_TEST_SENT) bags_test = of.read_txt_and_deal(CONFIG.PATH_TEST_BAG) # sent_test, bags_test = pre.delete_line(sent_test, bags_test, 3000) sent_train_deal = [x[3] for x in sent_train] bags_train_deal = [x[1] for x in bags_train] bags_train_deal = utils.standard_bags(bags_train_deal) sent_test_deal = [x[3] for x in sent_test] bags_test_deal = [x[1] for x in bags_test] bags_test_deal = utils.standard_bags(bags_test_deal) data, labels, tokenizer = cnn.fit_tokenizer(sent_train_deal, bags_train_deal) data_test, labels_test = cnn.deal_data(tokenizer, sent_test_deal, bags_test_deal) model = cnn.fit_model(data, labels, tokenizer) cnn.evaluate_model(model, data_test, labels_test)
def train_test(): train = of.read_txt_and_deal(CONFIG.PATH_TEST_DEAL) train = pre.hide_nr_demo(train) sent_train_deal = list() for s in train: nr1 = s[0] nr2 = s[1] x = s[2] x = pre.hide_nr(x, nr1, nr2) words = jieba.lcut(x) # word_str = ' '.join(words) sent_train_deal.append(words) bags_train_deal = [int(x[3]) for x in train] with open('model/tokenizer_' + str(CONFIG.VERSION) + '.pickle', 'rb') as f: tokenizer = pickle.load(f) data_test, labels_test = cnn.deal_data(tokenizer, sent_train_deal, bags_train_deal) model = cnn.load_models() cnn.evaluate_model(model, data_test, labels_test)
def test_fit_tokenizer(): sents = of.read_txt_and_deal(path_sent) bags = of.read_txt_and_deal(path_bag) sents_deal = [x[3] for x in sents] bags_deal = [x[1] for x in bags] model = cnn.fit_model(sents_deal, bags_deal)
def test_query_word_count(): words = pre.query_word_count(of.read_txt_and_deal(path_sent), 3) print(len(words))
def test_check_txt_column_number(): print(pre.check_txt_column_number(of.read_txt_and_deal(path_sent), 4))
def test_read_txt_and_deal(): print(of.read_txt_and_deal(path_sent))