from sklearn import svm,tree,linear_model
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
from read_utils import TextConverter

train_files = '../data/cnews.train.txt'
val_files = '../data/cnews.val.txt'
test_files = '../data/cnews.test.txt'
save_file = 'cnews.vocab_label.pkl'

converter = TextConverter(train_files, save_file, max_vocab=5000)
print(converter.vocab_size)
print(converter.label)

train_texts, train_labels = converter.load_data(train_files)
# train_x, train_y = converter.texts_to_arr(train_texts, train_labels)

val_texts, val_labels = converter.load_data(val_files)
# val_x, val_y = converter.texts_to_arr(val_texts, val_labels)

test_texts, test_labels = converter.load_data(test_files)
# test_x, test_y = converter.texts_to_arr(test_texts, test_labels)

# -------------feature extract --------------------
vec = TfidfVectorizer(ngram_range=(1,1),min_df=3, max_df=0.9,use_idf=1,smooth_idf=1, sublinear_tf=1, token_pattern=r"(?u)\w")


train_features = vec.fit_transform(train_texts).toarray()
val_features = vec.transform(val_texts).toarray()
test_features = vec.transform(test_texts).toarray()

        os.makedirs(model_path)

    train_files = '../data/cnews.train.txt'
    val_files = '../data/cnews.val.txt'
    test_files = '../data/cnews.test.txt'
    save_file = 'cnews.vocab_label.pkl'

    # 数据处理
    converter = TextConverter(train_files,
                              save_file,
                              max_vocab=Config.vocab_size,
                              seq_length=Config.seq_length)
    print('vocab size:', converter.vocab_size)
    print('labels:', converter.label)

    test_texts, test_labels = converter.load_data(test_files)
    test_x, test_x_len, test_y = converter.texts_to_arr(
        test_texts, test_labels)

    test_g = converter.val_samples_generator(test_x, test_x_len, test_y,
                                             Config.batch_size)

    model = Model(Config)

    # 加载上一次保存的模型
    checkpoint_path = tf.train.latest_checkpoint(model_path)
    if checkpoint_path:
        model.load(checkpoint_path)

    print('start to testing...')
    model.test(test_g)