def load_data(): ''' ''' print('Loading data...') (x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=max_features) print(len(x_train), 'train sequences') print(len(x_test), 'test sequences') print('Pad sequences (samples x time)') x_train = sequence.pad_sequences(x_train, maxlen=maxlen) x_test = sequence.pad_sequences(x_test, maxlen=maxlen) print('x_train shape:', x_train.shape) print('x_test shape:', x_test.shape) return [x_train, y_train, x_test, y_test]
def main(unused_argv): # Loading the data # data from: https://keras.io/datasets/ # Dataset of 25,000 movies reviews from IMDB, labeled by sentiment # (positive/negative). # Reviews have been preprocessed, and each review is encoded as a sequence # of word indexes (integers). # For convenience, words are indexed by overall frequency in the dataset. print('Loading data...') (x_train, y_train), (x_test, y_test) = imdb.load_data( num_words=FLAGS.num_words) print('size of the train dataset:', x_train.shape[0]) print('size of the test dataset:', x_test.shape[0]) # run experiment run_config = tf.contrib.learn.RunConfig(model_dir=FLAGS.model_dir) learn_runner.run(generate_experiment_fn(x_train, y_train, x_test, y_test), run_config=run_config)
def main(unused_argv): # Loading the data # data from: https://keras.io/datasets/ # Dataset of 25,000 movies reviews from IMDB, labeled by sentiment # (positive/negative). # Reviews have been preprocessed, and each review is encoded as a sequence # of word indexes (integers). # For convenience, words are indexed by overall frequency in the dataset. print('Loading data...') (x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=FLAGS.num_words) print('size of the train dataset:', x_train.shape[0]) print('size of the test dataset:', x_test.shape[0]) # run config run_config = tf.estimator.RunConfig() # creates estimator if FLAGS.use_canned_estimator: xc = tf.contrib.layers.sparse_column_with_integerized_feature( 'x', FLAGS.num_words) xc = tf.contrib.layers.embedding_column(xc, FLAGS.embed_dim) # creates estimator estimator = tf.contrib.learn.DynamicRnnEstimator( config=run_config, model_dir=FLAGS.model_dir, problem_type=constants.ProblemType.CLASSIFICATION, prediction_type=PredictionType.SINGLE_VALUE, sequence_feature_columns=[xc], context_feature_columns=None, num_units=FLAGS.num_rnn_units, cell_type=FLAGS.cell_type, optimizer=FLAGS.optimizer, learning_rate=FLAGS.learning_rate, num_classes=FLAGS.num_classes, dropout_keep_probabilities=FLAGS.dropout_keep_probabilities) else: model_fn = CustomRNNEstimator(rnn_cell_sizes=FLAGS.num_rnn_units, label_dimension=FLAGS.num_classes, num_words=FLAGS.num_words, dnn_layer_sizes=FLAGS.num_dnn_units, optimizer=FLAGS.optimizer, learning_rate=FLAGS.learning_rate, embed_dim=FLAGS.embed_dim) estimator = tf.estimator.Estimator(model_fn=model_fn, model_dir=FLAGS.model_dir, config=run_config) # input functions train_input = get_input_fn(x_train, y_train, FLAGS.train_batch_size, epochs=FLAGS.num_epochs, max_length=FLAGS.max_len, batch_by_seq_len=FLAGS.batch_by_seq_len) test_input = get_input_fn(x_test, y_test, FLAGS.eval_batch_size, epochs=1, max_length=FLAGS.max_len) # training # estimator.train(input_fn=train_input) # evalutaion # estimator.evaluate(input_fn=test_input) # predict predictions = list(estimator.predict(input_fn=test_input)) # loading map from word to index and index to word word_to_index, index_to_word = _load_map_dicts() for i in range(5): index = random.randint( 0, x_test.shape[0]) # choose random index in the test dataset print(_ids_to_sentence(x_test[index], index_to_word)) print('Prediction:', predictions[index]) print('Label:', y_test[index])
from tensorflow.contrib.keras.python.keras.datasets import imdb from tensorflow.contrib.keras.python.keras.layers import Embedding, SimpleRNN, Dropout, Dense, Activation, LSTM, GRU from tensorflow.contrib.keras.python.keras.models import Sequential from tensorflow.contrib.keras.python.keras.preprocessing import sequence max_features = 20000 maxlen = 100 batch_size = 32 (X_train, y_train), (X_test, y_test) = imdb.load_data(num_words=max_features) X_train = sequence.pad_sequences(X_train, maxlen=maxlen) X_test = sequence.pad_sequences(X_test, maxlen=maxlen) print('X_train shape:', X_train.shape) print('X_test shape:', X_test.shape) model = Sequential() model.add(Embedding(max_features, 128, input_length=maxlen)) #model.add(SimpleRNN(128)) #model.add(GRU(128)) model.add(LSTM(128)) model.add(Dropout(0.5)) model.add(Dense(1)) model.add(Activation('sigmoid')) model.compile(loss='binary_crossentropy', optimizer='adam')
# Global params: NB_WORDS = 20000 SKIP_TOP = 0 TEST_SPLIT = 0.2 INIT_SEED = 2017 GLOBAL_SEED = 2018 MAXLEN = 80 BATCH_SIZE = 128 TEST_BATCH_SIZE = 512 # In[2]: (X_train, Y_train), (X_test, Y_test) = imdb.load_data(num_words=NB_WORDS) print(len(X_train), 'train sequences') print(len(X_test), 'test sequences') print('Pad sequences (samples x time)') X_train = sequence.pad_sequences(X_train, maxlen=MAXLEN) X_test = sequence.pad_sequences(X_test, maxlen=MAXLEN) print('x_train shape:', X_train.shape) print('x_test shape:', X_test.shape) # In[3]: class Model(nn.Module): def __init__(self, nb_words, hidden_size=128, embedding_size=128, n_layers=1, wdrop=0.25, odrop=0.25, edrop=0.1, idrop=0.25, variational=False,
def main(unused_argv): # loading data (x_train, y_train), (x_test, y_test) = imdb.load_data(maxlen=FLAGS.max_len, num_words=FLAGS.num_words, index_from=FLAGS.index_from) # loading map from word to index and index to word word_to_index, index_to_word = _load_map_dicts() # run config run_config = tf.estimator.RunConfig() # loading estimators if FLAGS.use_canned_estimator: xc = tf.contrib.layers.sparse_column_with_integerized_feature( 'x', FLAGS.num_words) xc = tf.contrib.layers.embedding_column(xc, FLAGS.embed_dim) # creates estimator estimator = tf.contrib.learn.DynamicRnnEstimator( config=run_config, model_dir=FLAGS.model_dir, problem_type=constants.ProblemType.CLASSIFICATION, prediction_type=PredictionType.SINGLE_VALUE, sequence_feature_columns=[xc], context_feature_columns=None, num_units=FLAGS.num_rnn_units, cell_type=FLAGS.cell_type, optimizer=FLAGS.optimizer, learning_rate=FLAGS.learning_rate, num_classes=FLAGS.num_classes, dropout_keep_probabilities=FLAGS.dropout_keep_probabilities) else: model_fn = CustomRNNEstimator(rnn_cell_sizes=FLAGS.num_rnn_units, label_dimension=FLAGS.num_classes, num_words=FLAGS.num_words, dnn_layer_sizes=FLAGS.num_dnn_units, optimizer=FLAGS.optimizer, learning_rate=FLAGS.learning_rate, embed_dim=FLAGS.embed_dim) estimator = tf.estimator.Estimator(model_fn=model_fn, model_dir=FLAGS.model_dir, config=run_config) # getting test input_function test_input = get_input_fn(x_test, y_test, FLAGS.eval_batch_size, epochs=1, max_length=FLAGS.max_len, shuffle=False) predictions = list(estimator.predict(input_fn=test_input)) for i in range(5): print(_ids_to_sentence(x_test[i], index_to_word)) print('Prediction:', predictions[i]) print('Label:', y_test[i])