,emb_size = 128 ,hidden_size = 128 ,nb_filter = 250 ,filter_length = 3 ,pool_length = 2 ,init_type = 'random' ,emb_file = "../data/unlabeled_corpus.vec" ,tune_emb = True ) options,args = parser.parse_args(sys.argv) print('Loading data...') (X_train, y_train), (X_test, y_test), (X_dev, y_dev), max_features, E, label_id = aidr.load_and_numberize_data(path=options.data_dir, nb_words=options.max_features, init_type=options.init_type, embfile=options.emb_file, dev_train_merge=0, map_labels_to_five_class=0) # print("Padding sequences....") X_train = sequence.pad_sequences(X_train, maxlen=options.maxlen) X_test = sequence.pad_sequences(X_test, maxlen=options.maxlen) X_dev = sequence.pad_sequences(X_dev, maxlen=options.maxlen) #build model... nb_classes = np.max(y_train) + 1 print('............................') print(len(X_train), 'train tweets') print(len(X_dev), 'dev tweets') print(max_features - 3, 'vocabulary size')
,nb_filter = 250 ,init_type = 'random' ,filter_length = 3 ,pool_length = 2 ,add_feat = 0 ,maxlen = 80 ,tune_emb = True ,max_features = 80 ,map_class = 0 ) options,args = parser.parse_args(sys.argv) print('Loading data...') (X_train, y_train), (X_test, y_test), (X_dev, y_dev), vocab_size, E, label_id = aidr.load_and_numberize_data(path=options.data_dir, seed=113, nb_words=options.max_features, init_type=options.init_type, embfile=options.emb_file) X_train_f, X_test_f, X_dev_f = aidr_feat.load_tfidf_vectors(path=options.data_dir, seed=113) # load features assert len(X_train) == X_train_f.shape[0] and len(X_test) == X_test_f.shape[0] print("Padding sequences....") X_train = sequence.pad_sequences(X_train, maxlen=options.maxlen) X_test = sequence.pad_sequences(X_test, maxlen=options.maxlen) X_dev = sequence.pad_sequences(X_dev, maxlen=options.maxlen) #build model... nb_classes = len(label_id) max_features = X_train_f.shape[1]
,emb_size = 128 ,hidden_size = 128 ,nb_filter = 250 ,filter_length = 3 ,pool_length = 2 ,init_type = 'random' ,emb_file = "../data/unlabeled_corpus.vec" ,tune_emb = True ) options,args = parser.parse_args(sys.argv) print('Loading data...') (X_train, y_train), (X_test, y_test), (X_dev, y_dev), max_features, E, label_id = aidr.load_and_numberize_data(path=options.data_dir, nb_words=options.max_features, init_type=options.init_type, embfile=options.emb_file, dev_train_merge=0, map_labels_to_five_class=0) # print("Padding sequences....") X_train = sequence.pad_sequences(X_train, maxlen=options.maxlen) X_test = sequence.pad_sequences(X_test, maxlen=options.maxlen) X_dev = sequence.pad_sequences(X_dev, maxlen=options.maxlen) #build model... nb_classes = np.max(y_train) + 1 print('............................') print(len(X_train), 'train tweets') print(len(X_test), 'test tweets') print(len(X_dev), 'dev tweets')
,learn_alg = "adam" # sgd, adagrad, rmsprop, adadelta, adam (default) ,loss = "hinge"#"binary_crossentropy" # hinge, squared_hinge, binary_crossentropy (default) ,minibatch_size = 32 ,dropout_ratio = 0.2 ,epochs = 25 ,hidden_size = 128 ,nb_layers = 1 ,model_type = 'mlp' ,add_feat = 0 ,map_class = 0 ) options,args = parser.parse_args(sys.argv) print('Loading data...') (X_train, y_train), (X_test, y_test), (X_dev, y_dev), max_features, E, label_id = aidr.load_and_numberize_data(path=options.data_dir, seed=113) X_train_f, X_test_f, X_dev_f = aidr_feat.load_tfidf_vectors(path=options.data_dir, seed=113) # load features assert len(X_train) == X_train_f.shape[0] and len(X_test) == X_test_f.shape[0] #build model... nb_classes = len(label_id) max_features = X_train_f.shape[1] print('............................') print(len(X_train), 'train tweets') print(len(X_test), 'test tweets') print(len(X_dev), 'dev tweets') print(max_features, 'features') print(nb_classes, 'different classes') print('............................')
, recur_type='lstm' # gru, simplernn, lstm (default) , init_type= 'conv_glove' # 'random', 'word2vec', 'glove', 'conv_word2vec', 'conv_glove', 'meta_conv', 'meta_orig' , emb_file="../data/unlabeled_corpus.vec", tune_emb=True, map_class=1, numClasses=5, evalMinibatches=100) options, args = parser.parse_args(sys.argv) (X_train, y_train), (X_test, y_test), (X_dev, y_dev), max_features, E, label_id, sequence_len = \ aidr.load_and_numberize_data(path=options.data_dir, nb_words=options.max_features, maxlen=options.maxlen, init_type=options.init_type, dev_train_merge=1, embfile=None, map_labels_to_five_class=options.map_class) # Placeholders input_data = tf.placeholder(tf.int32, [None, options.maxlen], name="input_data") sequence_lengths = tf.placeholder(tf.int32, shape=[None], name="sequence_lengths") y_values = tf.placeholder(tf.int32, [None]) labels = tf.one_hot(y_values, options.numClasses) prediction = forward_propagation_bidirectional(input_data, sequence_lengths, E) #prediction = forward_propagation_unidirectional(input_data, sequence_lengths, E) #prediction = forward_propagation_averaging(input_data, sequence_lengths, E)
pool_length=2, init_type='random', emb_file="../data/unlabeled_corpus.vec", tune_emb=True) options, args = parser.parse_args(sys.argv) #print('Loading data...') print('LOADING DATA...') print('----------------------------------------------------------------') (X_train, y_train), (X_test, y_test), (X_validate, y_validate), ( X_newinput, y_newinput), max_features, E, label_id = aidr.load_and_numberize_data( path=options.data_dir, nb_words=options.max_features, init_type=options.init_type, embfile=options.emb_file, validate_train_merge=0, map_labels_to_five_class=0) # print("Padding sequences....") X_train = sequence.pad_sequences(X_train, maxlen=options.maxlen) X_test = sequence.pad_sequences(X_test, maxlen=options.maxlen) X_validate = sequence.pad_sequences(X_validate, maxlen=options.maxlen) X_newinput = sequence.pad_sequences( X_newinput, maxlen=options.maxlen) #Quan them vao-------------------- #print(X_train[0]) #quan them vao #build model... nb_classes = np.max(y_train) + 1