Пример #1
0
#! /usr/bin python
import sys
import os
import numpy as np

sys.path.append("./scripts/")
from scripts.classification.textcnn import TextCnn
from data.data_loader import DataLoader

conf_file = sys.argv[1]
loader = DataLoader()
loader.load_dict("data/title_dict.json")
loader.build(conf_file)
data = loader.get_predict()
conf2 = {
    "embedding_size": loader.word_vec_len,
    "vocab_size": len(loader.weights),
    "sequence_len": loader.max_len,
    "epochs": 10
}
model = TextCnn(conf2)
model.load_model("model/weights.010-0.9952.hdf5")
result = model.predict(data)
f = open("/mnt/hgfs/share/pornCensor/query.sug.title/query.random.5w", "r")
fw = open("neg", "w")
lines = f.readlines()
for i in range(result.shape[0]):
    if result[i][1] > 0.5:
        print(lines[i])
    else:
        fw.write(lines[i])
Пример #2
0
                    type=str,
                    default="conf/query_conf",
                    help="conf_file containes sample files and labels")

parser.add_argument("--w2v_path",
                    type=str,
                    default="/mnt/hgfs/share/pornCensor/query.skip.vec.win3",
                    help="w2v file which provide w2v")
FLAGS, unparsed = parser.parse_known_args()
print("unparsed: ", unparsed)

params = {"ratio": 0.2, "max_len": 15, "embedding_size": 100}
loader = DataLoader()
loader.set_params(params)
loader.set_w2v(FLAGS.w2v_path)
loader.build(FLAGS.conf_file)
#loader.save_dict("data/title_dict.json")
train_data, test_data, train_label, test_label = loader.get_train_test()

conf = {
    "embedding_size": loader.word_vec_len,
    "vocab_size": len(loader.weights),
    "sequence_len": loader.max_len,
    "epochs": 100,
    "classes": loader.classes
}
#model  = Lr(conf)
model = TextCnn(conf)
#model = Fasttext(conf)
#model = TextRnn(conf)
#model = AttentiveTextRnn(conf)