예제 #1
0
def test_demo():
    dataa = of.read_txt_and_deal(path_words_deal)
    sents_train_deal = list()
    for s in dataa:
        nr1 = s[0]
        nr2 = s[1]
        x = s[2]
        x = pre.hide_nr(x, nr1, nr2)
        words = jieba.lcut(x)
        # word_str = ' '.join(words)
        sents_train_deal.append(words)

    sents_train_deal.append(jieba.lcut('胡挺和胡磊结婚了'))
    sents_train_deal.append(jieba.lcut('摩拜单车被美团收购了,由美图经营'))

    for s in dataa:
        if len(s) != 4:
            print(s)

    bags_train_deal = [x[3] for x in dataa]
    bags_train_deal.append(1)
    bags_train_deal.append(0)

    data, labels, tokenizer = cnn.fit_tokenizer(sents_train_deal,
                                                bags_train_deal)
    data_test, labels_test = cnn.deal_data(tokenizer, sents_train_deal,
                                           bags_train_deal)
    model = cnn.fit_model(data[:-2], labels[:-2], tokenizer)
    cnn.evaluate_model(model, data_test, labels_test, bags_train_deal)
예제 #2
0
def train_test():
    train = of.read_txt_and_deal(CONFIG.PATH_TRAIN)
    train = pre.hide_nr_demo(train)
    sent_train_deal = [x[2] for x in train]
    bags_train_deal = [x[3] for x in train]
    data, labels, tokenizer = cnn.fit_tokenizer(sent_train_deal,
                                                bags_train_deal)
    x_train, y_train, x_test, y_test = cnn.split_data(data, labels)
    # train_word2vec.word2vec_train(sent_train_deal)
    # data_test, labels_test = cnn.deal_data(tokenizer, x_test, y_test)
    model = cnn.fit_model(x_train, y_train, tokenizer)
    cnn.evaluate_model(model, x_test, y_test)
예제 #3
0
def train_test():
    sent_train = of.read_txt_and_deal(CONFIG.PATH_TRAIN_SENT)
    bags_train = of.read_txt_and_deal(CONFIG.PATH_TRAIN_BAG)
    # sent_train, bags_train = pre.delete_line(sent_train, bags_train, 5000)

    sent_test = of.read_txt_and_deal(CONFIG.PATH_TEST_SENT)
    bags_test = of.read_txt_and_deal(CONFIG.PATH_TEST_BAG)
    # sent_test, bags_test = pre.delete_line(sent_test, bags_test, 3000)

    sent_train_deal = [x[3] for x in sent_train]
    bags_train_deal = [x[1] for x in bags_train]
    bags_train_deal = utils.standard_bags(bags_train_deal)

    sent_test_deal = [x[3] for x in sent_test]
    bags_test_deal = [x[1] for x in bags_test]
    bags_test_deal = utils.standard_bags(bags_test_deal)

    data, labels, tokenizer = cnn.fit_tokenizer(sent_train_deal,
                                                bags_train_deal)
    data_test, labels_test = cnn.deal_data(tokenizer, sent_test_deal,
                                           bags_test_deal)
    model = cnn.fit_model(data, labels, tokenizer)
    cnn.evaluate_model(model, data_test, labels_test)
예제 #4
0
def train_test():
    train = of.read_txt_and_deal(CONFIG.PATH_TEST_DEAL)
    train = pre.hide_nr_demo(train)
    sent_train_deal = list()

    for s in train:
        nr1 = s[0]
        nr2 = s[1]
        x = s[2]
        x = pre.hide_nr(x, nr1, nr2)
        words = jieba.lcut(x)
        # word_str = ' '.join(words)
        sent_train_deal.append(words)

    bags_train_deal = [int(x[3]) for x in train]

    with open('model/tokenizer_' + str(CONFIG.VERSION) + '.pickle', 'rb') as f:
        tokenizer = pickle.load(f)
    data_test, labels_test = cnn.deal_data(tokenizer, sent_train_deal,
                                           bags_train_deal)
    model = cnn.load_models()
    cnn.evaluate_model(model, data_test, labels_test)
예제 #5
0
def test_fit_tokenizer():
    sents = of.read_txt_and_deal(path_sent)
    bags = of.read_txt_and_deal(path_bag)
    sents_deal = [x[3] for x in sents]
    bags_deal = [x[1] for x in bags]
    model = cnn.fit_model(sents_deal, bags_deal)
예제 #6
0
def test_query_word_count():
    words = pre.query_word_count(of.read_txt_and_deal(path_sent), 3)
    print(len(words))
예제 #7
0
def test_check_txt_column_number():
    print(pre.check_txt_column_number(of.read_txt_and_deal(path_sent), 4))
예제 #8
0
def test_read_txt_and_deal():
    print(of.read_txt_and_deal(path_sent))