示例#1
0
print("---------------Training document embedding-----------------")


# %%
if embedding_type == "LOD":
    doc2vec = OnlyLeafDoc2Vec(data_name, dataset_train.number_of_classes(), size=embedding_size, epoch=270, batch_size=10000)
elif embedding_type == "Normal":
    doc2vec = NoTag_Doc2Vec(data_name, dataset_train.number_of_classes(), size=embedding_size, epoch=270, batch_size=10000)
else:
    doc2vec = Doc2Vec(data_name, dataset_train.number_of_classes(), size=embedding_size, epoch=270, batch_size=10000)
# doc2vec.fit(dataset_train.datas, dataset_train.labels, dataset_validate.datas, dataset_validate.labels, early_stopping=False)
doc2vec.load_model('export/%s/doc2vec.model' % data_name)


# %%
dataset_train.change_to_Doc2Vec(doc2vec)
dataset_validate.change_to_Doc2Vec(doc2vec)
if (test_split or predict_test or evaluate_test):
    dataset_test.change_to_Doc2Vec(doc2vec)


# %%
if hidden == 'auto' or target_hidden == 'auto':
    a = []
    for i in range(len(dataset_train.level)-1):
        a.append(dataset_train.level[i+1] - dataset_train.level[i])
    a = np.array(a)

    if hidden == 'auto':
        hidden = a*2 + 300
        hidden[hidden > 3000] = 3000
示例#2
0
# %%
print("---------------Document embedding-----------------")


# %%
if embedding_type == "LOD":
    doc2vec = OnlyLeafDoc2Vec(data_name, dataset_test.number_of_classes(), size=embedding_size, epoch=270, batch_size=10000)
elif embedding_type == "Normal":
    doc2vec = NoTag_Doc2Vec(data_name, dataset_test.number_of_classes(), size=embedding_size, epoch=270, batch_size=10000)
else:
    doc2vec = Doc2Vec(data_name, dataset_test.number_of_classes(), size=embedding_size, epoch=270, batch_size=10000)
doc2vec.load_model('export/%s/doc2vec.model' % data_name)


# %%
dataset_test.change_to_Doc2Vec(doc2vec)


# %%
print("---------------Training classifiers-----------------")


# %%
model = ESLNN(data_name, dataset_test, "temp", dataset_test, iteration=2000, stopping_time=300, batch_size=batch_size, hidden_size=hidden, target_hidden_size=target_hidden, use_dropout=True, start_level=99999)


# %%
model.train()
model.apply_threshold(threshold)