def main(): path_pfx = '' max_len = 37 dm = DataManager() dm.add_data('test', os.path.join(sys.argv[1]), False, True) print(len(dm.data['test'][0])) dm.preprocessing() dm.load_word2vec(os.path.join(path_pfx, 'model/word2vec')) #dm.load_tokenizer(os.path.join(path_pfx, 'token.pkl')) dm.to_sequence(max_len, use_pretrain=True) result = predict(dm.data['test'][0], path_pfx) write(sys.argv[2], result) print('finished')
def main(): voc_size = None max_len = 39 path_pfx = '' dm = DataManager() dm.add_data('train', sys.argv[1]) #dm.add_data('semi', os.path.join(path_pfx, 'training_nolabel.txt'), False) #dm.add_data('test', os.path.join(path_pfx, 'testing_data.txt'), False, True) dm.preprocessing() dm.load_word2vec(os.path.join(path_pfx, 'model/word2vec')) #dm.load_embedding_matrix(os.path.join(path_pfx, 'word2vec.wv.vectors.npy')) dm.to_sequence(max_len, use_pretrain=True) #dm.to_bow() print(max_len) #emb_mat = dm.get_embedding_matrix() emb_mat = None train(dm, voc_size=voc_size, max_len=max_len, emb_mat=emb_mat)