def run_keras(): pretreatment = Pretreatment() train_x, test_x, train_y, test_y = pretreatment.train_test_split(c=3, test_size=0.6) embedding_matrix = pretreatment.create_embedding_matrix(15000) textrnn = TextRNN(pretreatment.nnparam) textrnn.train(train_x, train_y, embedding_matrix) textrnn.predict(test_x, test_y)
def run_keras(): pretreatment = Pretreatment() train_x, test_x, train_y, test_y = pretreatment.train_test_split(c=2, test_size=0.1) # embedding_matrix = pretreatment.create_embedding_matrix(30000) textrnn = FastText(pretreatment.nnparam) # textrnn.train(train_x, train_y, embedding_matrix) # 精度 0.9323043484250149 损失 0.270193725742771 textrnn.train(train_x, train_y, '') # 精度 0.9353858005601531 损失 0.2599002837189978 textrnn.predict(test_x, test_y)
def run_keras(): pretreatment = Pretreatment() train_x, test_x, train_y, test_y = pretreatment.train_test_split(c=5, y_one_hot=False, test_size=0.6) embedding_matrix = pretreatment.create_embedding_matrix(20000) textrnn = TextCNN(pretreatment.nnparam) textrnn.train(train_x, train_y, embedding_matrix) textrnn.predict(test_x, test_y) """
def run_pytorch(): pretreatment = Pretreatment() train_x, test_x, train_y, test_y = pretreatment.train_test_split(c=3, y_one_hot=False, test_size=0.1) train_x, train_y, test_x, test_y = data2tensor(train_x, train_y, test_x, test_y) m = TextRNN2(pretreatment.nnparam) # m = BiGRUAttention(pretreatment.nnparam) print(m) get_parameter_number(m) for epoch in range(epochs): train_model(net=m, train_x=train_x, train_y=train_y, epoch=epoch, lr=0.0001) test_model(m, test_x, test_y) torch.save(m, os.path.join(corpus_root_path, 'torch_demo', 'textrnn_attention.pkl'))
def run_pytorch(): pretreatment = Pretreatment() train_x, test_x, train_y, test_y = pretreatment.train_test_split(c=3, y_one_hot=False, test_size=0.1) # 0.9785 train_x, train_y, test_x, test_y = data2tensor(train_x, train_y, test_x, test_y) fasttext = FastText2(pretreatment.nnparam) p = 0 for k, v in fasttext.named_parameters(): print('name', k, 'param', v.size()) p += reduce(lambda x, y: x * y, list(v.size())) print(fasttext) print('参数量', p) for epoch in range(epochs): train_model(net=fasttext, train_x=train_x, train_y=train_y, epoch=epoch, lr=0.0001) test_model(fasttext, test_x, test_y) torch.save(fasttext, os.path.join(corpus_root_path, 'torch_demo', 'fasttext.pkl'))
def run_pytorch(): pretreatment = Pretreatment() train_x, test_x, train_y, test_y = pretreatment.train_test_split( c=2, test_size=0.1) train_x, train_y, test_x, test_y = data2tensor(train_x, train_y, test_x, test_y) m = BiGRUAttention(pretreatment.nnparam) print(m) get_parameter_number(m) train_model = TrainTorch(model=m) train_model.train(train_set=(train_x[:1000], train_y[:1000]), test_set=(test_x, test_y), batch_size=batch_size, epochs=epochs, lr=0.0001, use_test=True) torch.save( m, os.path.join(corpus_root_path, 'torch_demo', 'text_{}.pkl'.format(m.instance_name)))