def conv_model(X, Y_, mode): XX = tf.reshape(X, [-1, 28, 28, 1]) biasInit = tf.constant_initializer(0.1, dtype=tf.float32) Y1 = layers.conv2d(XX, num_outputs=6, kernel_size=[6, 6], biases_initializer=biasInit) Y2 = layers.conv2d(Y1, num_outputs=12, kernel_size=[5, 5], stride=2, biases_initializer=biasInit) Y3 = layers.conv2d(Y2, num_outputs=24, kernel_size=[4, 4], stride=2, biases_initializer=biasInit) Y4 = layers.flatten(Y3) Y5 = layers.relu(Y4, 200, biases_initializer=biasInit) Ylogits = layers.linear(Y5, 10) predict = tf.nn.softmax(Ylogits) classes = tf.cast(tf.argmax(predict, 1), tf.uint8) loss = conv_model_loss(Ylogits, Y_, mode) train_op = conv_model_train_op(loss, mode) eval_metrics = conv_model_eval_metrics(classes, Y_, mode) return learn.ModelFnOps(mode=mode, predictions={ "predictions": predict, "classes": classes }, loss=loss, train_op=train_op, eval_metric_ops=eval_metrics)
def cnn_model(features, target, mode): embeds = tf.contrib.layers.embed_sequence(features, vocab_size=22, embed_dim=EMBEDDING_SIZE) n_classes = len(labels) logits = tf.contrib.layers.fully_connected(embeds, n_classes, activation_fn=None) predictions_dict = { 'source': tf.gather(labels, tf.argmax(logits, 1)), 'class': tf.argmax(logits, 1), 'prob': tf.nn.softmax(logits) } learning_rate = 0.001 loss = tf.losses.sparse_softmax_cross_entropy(targetX, logits) train_op = tf.contrib.layers.optimize_loss( loss, tf.contrib.framework.get_global_step(), optimizer='Adam', learning_rate=learning_rate) return tflearn.ModelFnOps(mode=mode, predictions=predictions_dict, loss=loss, train_op=train_op)
def linear_model(features, target, mode): # make input features numeric from tensorflow.contrib import lookup table = lookup.index_table_from_file( vocabulary_file=WORD_VOCAB_FILE, num_oov_buckets=1, vocab_size=N_WORDS, default_value=-1, name="word_to_index") titles = tf.squeeze(features['title'], [1]) words = tf.string_split(titles) words = tf.sparse_tensor_to_dense(words, default_value='ZYXW') words = table.lookup(words) print('lookup_words={}'.format(words)) # each row has variable length of words # take the first MAX_DOCUMENT_LENGTH words (pad shorter titles to this) padding = tf.stack([tf.zeros_like(titles,dtype=tf.int64),tf.ones_like(titles,dtype=tf.int64)*MAX_DOCUMENT_LENGTH]) words = tf.pad(words, padding) words = tf.slice(words, [0,0], [-1,MAX_DOCUMENT_LENGTH]) print('words_sliced={}'.format(words)) # (?, 20) # embed the words in a common way words = tf.contrib.layers.embed_sequence( words, vocab_size=(N_WORDS+1), embed_dim=EMBEDDING_SIZE, scope='words') print('words_embed={}'.format(words)) # (?, 20, 10) # now do convolution conv = tf.contrib.layers.convolution2d( words, 5, [3, EMBEDDING_SIZE] , padding='VALID') conv = tf.nn.relu(conv1) words = tf.nn.max_pool(conv, ksize=[1, POOLING_WINDOW, 1, 1], strides=[1, POOLING_STRIDE, 1, 1], padding='SAME') print('words_conv={}'.format(words)) # n_classes = len(TARGETS) logits = tf.contrib.layers.fully_connected(words, n_classes, activation_fn=None) print('logits={}'.format(logits)) logits = tf.squeeze(logits, squeeze_dims=[1]) # from (?,1,3) to (?,3) predictions_dict = { 'source': tf.gather(TARGETS, tf.argmax(logits, 1)), 'class': tf.argmax(logits, 1), 'prob': tf.nn.softmax(logits) } if mode == tf.contrib.learn.ModeKeys.TRAIN or mode == tf.contrib.learn.ModeKeys.EVAL: loss = tf.losses.sparse_softmax_cross_entropy(target, logits) train_op = tf.contrib.layers.optimize_loss( loss, tf.contrib.framework.get_global_step(), optimizer='Adam', learning_rate=0.01) else: loss = None train_op = None return tflearn.ModelFnOps( mode=mode, predictions=predictions_dict, loss=loss, train_op=train_op)
def nn_model(features, target, mode): tf.logging.info('Inside nn_model') logits = tf.contrib.layers.fully_connected(features, num_outputs=1, activation_fn=tf.sigmoid) predictions_dict = { 'ethnicity': tf.gather(CLASSES, tf.argmax(logits, 1)), 'class': tf.argmax(logits, 1), 'prob': tf.nn.softmax(logits) } if mode == tf.contrib.learn.ModeKeys.TRAIN or \ mode == tf.contrib.learn.ModeKeys.EVAL: loss = tf.nn.sigmoid_cross_entropy_with_logits(labels=target, logits=logits) loss = tf.reduce_mean(loss) train_op = tf.train.AdamOptimizer(learning_rate=0.1).minimize(loss) else: loss = None train_op = None return tflearn.ModelFnOps(mode=mode, predictions=predictions_dict, loss=loss, train_op=train_op)
def conv_model(X, Y_, mode): XX = tf.reshape(X, [-1, 28, 28, 1]) biasInit = tf.constant_initializer(0.1, dtype=tf.float32) Y1 = layers.conv2d(XX, num_outputs=6, kernel_size=[6, 6], biases_initializer=biasInit) Y2 = layers.conv2d(Y1, num_outputs=12, kernel_size=[5, 5], stride=2, biases_initializer=biasInit) Y3 = layers.conv2d(Y2, num_outputs=24, kernel_size=[4, 4], stride=2, biases_initializer=biasInit) Y4 = layers.flatten(Y3) Y5 = layers.relu(Y4, 200, biases_initializer=biasInit) # to deactivate dropout on the dense layer, set keep_prob=1 Y5d = layers.dropout(Y5, keep_prob=0.75, noise_shape=None, is_training=mode==learn.ModeKeys.TRAIN) Ylogits = layers.linear(Y5d, 10) predict = tf.nn.softmax(Ylogits) classes = tf.cast(tf.argmax(predict, 1), tf.uint8) loss = conv_model_loss(Ylogits, Y_, mode) train_op = conv_model_train_op(loss, mode) eval_metrics = conv_model_eval_metrics(classes, Y_, mode) return learn.ModelFnOps( mode=mode, # You can name the fields of your predictions dictionary as you like. predictions={"predictions": predict, "classes": classes}, loss=loss, train_op=train_op, eval_metric_ops=eval_metrics )
def model(features, targets, mode): are_args = functools.partial(util.are_instances, [features, targets]) def_model_fn = functools.partial(functools.partial, original_model_fn) if are_args(tf.Tensor): model_fn = def_model_fn(features, targets) elif are_args(dict): model_fn = def_model_fn(**features, **targets) elif isinstance(features, tf.Tensor) and targets is None: model_fn = def_model_fn(features) elif isinstance(features, dict) and targets is None: model_fn = def_model_fn(**features) else: raise ValueError( "features and targets should be both tf.Tensor or dict.") results = ( model_fn(mode=mode) if "mode" in inspect.signature(model_fn).parameters.keys() else model_fn()) return ( results if isinstance(results, learn.ModelFnOps) else learn.ModelFnOps( mode, *(results if isinstance(results, typing.Sequence) else (results,))))
def simple_rnn(features, targets, mode): x = tf.split(features[TIMESERIES_COL], N_INPUTS, 1) lstm_cell = rnn.BasicLSTMCell(LSTM_SIZE, forget_bias=1.0) outputs, _ = rnn.static_rnn(lstm_cell, x, dtype=tf.float32) outputs = outputs[-1] weight = tf.Variable(tf.random_normal([LSTM_SIZE, N_OUTPUTS])) bias = tf.Variable(tf.random_normal([N_OUTPUTS])) predictions = tf.matmul(outputs, weight) + bias if mode == tf.contrib.learn.ModeKeys.TRAIN or mode == tf.contrib.learn.ModeKeys.EVAL: loss = tf.losses.mean_squared_error(targets, predictions) train_op = tf.contrib.layers.optimize_loss( loss=loss, global_step=tf.contrib.framework.get_global_step(), learning_rate=0.01, optimizer="SGD") eval_metric_ops = { "rmse": tf.metrics.root_mean_squared_error(targets, predictions) } else: loss = None train_op = None eval_metric_ops = None predictions_dict = {"predicted": predictions} return tflearn.ModelFnOps(mode=mode, predictions=predictions_dict, loss=loss, train_op=train_op, eval_metric_ops=eval_metric_ops)
def cnn_model(features, target, mode): table = lookup.index_table_from_file(vocabulary_file=WORD_VOCAB_FILE, num_oov_buckets=1, default_value=-1) # string operations titles = tf.squeeze(features['title'], [1]) words = tf.string_split(titles) densewords = tf.sparse_tensor_to_dense(words, default_value=PADWORD) numbers = table.lookup(densewords) padding = tf.constant([[0, 0], [0, MAX_DOCUMENT_LENGTH]]) padded = tf.pad(numbers, padding) sliced = tf.slice(padded, [0, 0], [-1, MAX_DOCUMENT_LENGTH]) print('words_sliced={}'.format(words)) # (?, 20) # layer to take the words and convert them into vectors (embeddings) embeds = tf.contrib.layers.embed_sequence(sliced, vocab_size=N_WORDS, embed_dim=EMBEDDING_SIZE) print('words_embed={}'.format(embeds)) # (?, 20, 10) # now do convolution conv = tf.contrib.layers.conv2d(embeds, 1, WINDOW_SIZE, stride=STRIDE, padding='SAME') # (?, 4, 1) conv = tf.nn.relu(conv) # (?, 4, 1) words = tf.squeeze(conv, [2]) # (?, 4) print('words_conv={}'.format(words)) # (?, 4) n_classes = len(TARGETS) logits = tf.contrib.layers.fully_connected(words, n_classes, activation_fn=None) #print('logits={}'.format(logits)) # (?, 3) predictions_dict = { 'source': tf.gather(TARGETS, tf.argmax(logits, 1)), 'class': tf.argmax(logits, 1), 'prob': tf.nn.softmax(logits) } if mode == tf.contrib.learn.ModeKeys.TRAIN or mode == tf.contrib.learn.ModeKeys.EVAL: loss = tf.losses.sparse_softmax_cross_entropy(target, logits) train_op = tf.contrib.layers.optimize_loss( loss, tf.contrib.framework.get_global_step(), optimizer='Adam', learning_rate=0.01) else: loss = None train_op = None return tflearn.ModelFnOps(mode=mode, predictions=predictions_dict, loss=loss, train_op=train_op)
def rnn_segment(features, targets, mode, params): seq_feature = features['seq_feature'] seq_length = features['seq_length'] with tf.variable_scope("emb"): embeddings = tf.get_variable( "char_emb", shape=[params['num_char'], params['emb_size']]) seq_emb = tf.nn.embedding_lookup(embeddings, seq_feature) batch_size = tf.shape(seq_feature)[0] time_step = tf.shape(seq_feature)[1] flat_seq_emb = tf.reshape( seq_emb, shape=[batch_size, time_step, (params['k'] + 1) * params['emb_size']]) cell = rnn.LSTMCell(params['rnn_units']) if mode == ModeKeys.TRAIN: cell = rnn.DropoutWrapper(cell, params['input_keep_prob'], params['output_keep_prob']) projection_cell = rnn.OutputProjectionWrapper(cell, params['num_class']) logits, _ = tf.nn.dynamic_rnn(projection_cell, flat_seq_emb, sequence_length=seq_length, dtype=tf.float32) weight_mask = tf.to_float(tf.sequence_mask(seq_length)) loss = seq2seq.sequence_loss(logits, targets, weights=weight_mask) train_op = layers.optimize_loss( loss=loss, global_step=tf.contrib.framework.get_global_step(), learning_rate=params["learning_rate"], optimizer=tf.train.AdamOptimizer, clip_gradients=params['grad_clip'], summaries=[ "learning_rate", "loss", "gradients", "gradient_norm", ]) pred_classes = tf.to_int32(tf.argmax(input=logits, axis=2)) pred_words = tf.logical_or(tf.equal(pred_classes, 0), tf.equal(pred_classes, 3)) target_words = tf.logical_or(tf.equal(targets, 0), tf.equal(targets, 3)) precision = metrics.streaming_precision(pred_words, target_words, weights=weight_mask) recall = metrics.streaming_recall(pred_words, target_words, weights=weight_mask) predictions = {"classes": pred_classes} eval_metric_ops = {"precision": precision, "recall": recall} return learn.ModelFnOps(mode, predictions, loss, train_op, eval_metric_ops=eval_metric_ops)
def model(features, labels, mode): # Build a linear model and predict values W = tf.get_variable("W", [1], dtype=tf.float64) b = tf.get_variable("b", [1], dtype=tf.float64) y = W * features['x'] + b # Loss sub-graph loss = tf.reduce_sum(tf.square(y - labels)) # Training sub-graph global_step = tf.train.get_global_step() optimizer = tf.train.GradientDescentOptimizer(0.01) train = tf.group(optimizer.minimize(loss), tf.assign_add(global_step, 1)) # ModelFnOps connects subgraphs we built to the appropriate functionality return learn.ModelFnOps(mode=mode, predictions=y, loss=loss, train_op=train)
def simple_rnn(features, targets, mode, params): print('-' * 100) print(features[TIMESERIES_COL]) # 0. Reformat input shape to become a sequence x = tf.split(features[TIMESERIES_COL], N_INPUTS, 1) #print 'x={}'.format(x) # 1. configure the RNN lstm_cell = rnn.BasicLSTMCell(LSTM_SIZE, forget_bias=1.0) outputs, _ = rnn.static_rnn(lstm_cell, x, dtype=tf.float32) # slice to keep only the last cell of the RNN outputs = outputs[-1] #print 'last outputs={}'.format(outputs) # output is result of linear activation of last layer of RNN weight = tf.Variable(tf.random_normal([LSTM_SIZE, N_OUTPUTS])) bias = tf.Variable(tf.random_normal([N_OUTPUTS])) predictions = tf.matmul(outputs, weight) + bias # 2. Define the loss function for training/evaluation #print 'targets={}'.format(targets) #print 'preds={}'.format(predictions) loss = tf.losses.mean_squared_error(targets, predictions) eval_metric_ops = { "rmse": tf.metrics.root_mean_squared_error(targets, predictions) } # 3. Define the training operation/optimizer train_op = tf.contrib.layers.optimize_loss( loss=loss, global_step=tf.contrib.framework.get_global_step(), learning_rate=0.01, optimizer="SGD") # 4. Create predictions predictions_dict = {"predicted": predictions} # 5. return ModelFnOps return tflearn.ModelFnOps( mode=mode, predictions=predictions_dict, loss=loss, train_op=train_op, eval_metric_ops=eval_metric_ops)
def _get_predict_ops(self, features): """ Get ops for prediction. Inputs: `features`: image batch Returns: `ModelFnOps` object. Predictions is dict with `heatmaps`: final heatmap inference (sometimes) `example_id`: same as `example_id` passed in via features if it is. """ images = features self._check_images(images) raise NotImplementedError() predictions = None return learn.ModelFnOps(learn.ModeKeys.INFER, predictions=predictions)
def linear_model(features, target, mode): # make input features numeric from tensorflow.contrib import lookup table = lookup.index_table_from_file(vocabulary_file=WORD_VOCAB_FILE, num_oov_buckets=1, vocab_size=N_WORDS, default_value=-1, name="word_to_index") word_indexes = table.lookup(features['title']) word_vectors = tf.contrib.layers.embed_sequence(word_indexes, vocab_size=(N_WORDS + 1), embed_dim=EMBEDDING_SIZE, scope='words') n_classes = len(TARGETS) logits = tf.contrib.layers.fully_connected(word_vectors, n_classes, activation_fn=None) logits = tf.squeeze(logits, squeeze_dims=[1]) # from (?,1,3) to (?,3) predictions_dict = { 'source': tf.gather(TARGETS, tf.argmax(logits, 1)), 'class': tf.argmax(logits, 1), 'prob': tf.nn.softmax(logits) } if mode == tf.contrib.learn.ModeKeys.TRAIN or mode == tf.contrib.learn.ModeKeys.EVAL: loss = tf.losses.sparse_softmax_cross_entropy(target, logits) train_op = tf.contrib.layers.optimize_loss( loss, tf.contrib.framework.get_global_step(), optimizer='Adam', learning_rate=0.01) else: loss = None train_op = None return tflearn.ModelFnOps(mode=mode, predictions=predictions_dict, loss=loss, train_op=train_op)
def _get_eval_ops(self, features, labels, metrics): """ Get evaluation ops. Inputs: `features`: per-image standardized images batch, dtype tf.float32, shape (None, 299, 299, 3) `labels`: int32 label, shape (None,) Returns: `ModelFnOps` object. """ images = features self._check_images(images) self._check_labels(labels) raise NotImplementedError() predictions = None loss = None return learn.ModelFnOps(learn.ModeKeys.EVAL, predictions=predictions, loss=loss)
def _get_train_ops(self, features, labels): """ Get training ops. Inputs: `features`: per-image standardized image batch, dtype tf.float32, shape (None, 299, 299, 3) `labels`: int32 label, shape (None,) Returns: `ModelFnOps` object. """ images = features self._check_images(images) self._check_labels(labels) logits = self.get_logits(images, True) loss = self._get_loss(logits, labels) tf.summary.scalar('loss', loss) steps = tf.get_collection(tf.GraphKeys.GLOBAL_STEP) if len(steps) == 1: step = steps[0] else: raise Exception('Multiple global steps disallowed') with tf.name_scope('train_op_generation'): # train_op = tf.contrib.layers.optimize_loss( # loss, step, self.learning_rate, self.optimizer) train_op = tf.train.AdamOptimizer().minimize(loss, step) predictions = tf.nn.softmax(logits) return learn.ModelFnOps(learn.ModeKeys.TRAIN, predictions=predictions, loss=loss, train_op=train_op)
def cnn_model_fn(features, labels, mode): """Model function for CNN.""" # Input Layer # Reshape X to 4-D tensor: [batch_size, width, height, channels] # MNIST images are 28x28 pixels, and have one color channel input_layer = tf.reshape(features, [-1, 28, 28, 1]) # Convolutional Layer #1 # Computes 32 features using a 5x5 filter with ReLU activation. # Padding is added to preserve width and height. # Input Tensor Shape: [batch_size, 28, 28, 1] # Output Tensor Shape: [batch_size, 28, 28, 32] conv1 = tf.layers.conv2d(inputs=input_layer, filters=32, kernel_size=[5, 5], padding="same", activation=tf.nn.relu) # Pooling Layer #1 # First max pooling layer with a 2x2 filter and stride of 2 # Input Tensor Shape: [batch_size, 28, 28, 32] # Output Tensor Shape: [batch_size, 14, 14, 32] pool1 = tf.layers.max_pooling2d(inputs=conv1, pool_size=[2, 2], strides=2) # Convolutional Layer #2 # Computes 64 features using a 5x5 filter. # Padding is added to preserve width and height. # Input Tensor Shape: [batch_size, 14, 14, 32] # Output Tensor Shape: [batch_size, 14, 14, 64] conv2 = tf.layers.conv2d(inputs=pool1, filters=64, kernel_size=[5, 5], padding="same", activation=tf.nn.relu) # Pooling Layer #2 # Second max pooling layer with a 2x2 filter and stride of 2 # Input Tensor Shape: [batch_size, 14, 14, 64] # Output Tensor Shape: [batch_size, 7, 7, 64] pool2 = tf.layers.max_pooling2d(inputs=conv2, pool_size=[2, 2], strides=2) # Flatten tensor into a batch of vectors # Input Tensor Shape: [batch_size, 7, 7, 64] # Output Tensor Shape: [batch_size, 7 * 7 * 64] pool2_flat = tf.reshape(pool2, [-1, 7 * 7 * 64]) # Dense Layer # Densely connected layer with 1024 neurons # Input Tensor Shape: [batch_size, 7 * 7 * 64] # Output Tensor Shape: [batch_size, 1024] dense = tf.layers.dense(inputs=pool2_flat, units=1024, activation=tf.nn.relu) # Add dropout operation; 0.6 probability that element will be kept dropout = tf.layers.dropout(inputs=dense, rate=0.4, training=mode == learn.ModeKeys.TRAIN) # Logits layer # Input Tensor Shape: [batch_size, 1024] # Output Tensor Shape: [batch_size, 10] logits = tf.layers.dense(inputs=dropout, units=10) loss = None train_op = None # Calculate Loss (for both TRAIN and EVAL modes) if mode != learn.ModeKeys.INFER: onehot_labels = tf.one_hot(indices=tf.cast(labels, tf.int32), depth=10) loss = tf.losses.softmax_cross_entropy(onehot_labels=onehot_labels, logits=logits) # Configure the Training Op (for TRAIN mode) if mode == learn.ModeKeys.TRAIN: train_op = tf.contrib.layers.optimize_loss( loss=loss, global_step=tf.train.get_global_step(), learning_rate=0.001, optimizer="SGD") # Generate Predictions classes = tf.argmax(input=logits, axis=1) softmax_tensor = tf.nn.softmax(logits, name='softmax_tensor') predictions = { "classes": classes, "probabilities": softmax_tensor, } # Return a ModelFnOps object return learn.ModelFnOps(mode=mode, predictions=predictions, loss=loss, train_op=train_op)
def model_fn(features, targets, mode, params): """Model function to be used for `Experiment` object. Should not access `flags.FLAGS`. Args: features: a dictionary of feature tensors. targets: a dictionary of target tensors. mode: `learn.ModeKeys.TRAIN` or `learn.ModeKeys.EVAL`. params: `HParams` object. Returns: `ModelFnOps` object. Raises: ValueError: rasied if `params.model` is not an appropriate value. """ with tf.variable_scope('model'): if params.model == 'feature': logits_start, logits_end, tensors = feature_model( features, mode, params) elif params.model == 'kernel': logits_start, logits_end, tensors = kernel_model( features, mode, params) else: raise ValueError( '`%s` is an invalid argument for `model` parameter.' % params.model) no_answer_bias = tf.get_variable('no_answer_bias', shape=[], dtype='float') no_answer_bias = tf.tile( tf.reshape(no_answer_bias, [1, 1]), [tf.shape(features['context_words'])[0], 1]) predictions = get_pred_ops(features, params, logits_start, logits_end, no_answer_bias) predictions.update(tensors) predictions.update(features) if mode == learn.ModeKeys.INFER: eval_metric_ops, loss = None, None else: eval_metric_ops = squad_data.get_eval_metric_ops(targets, predictions) loss = get_loss(targets['word_answer_starts'], targets['word_answer_ends'], logits_start, logits_end, no_answer_bias) emas = { decay: tf.train.ExponentialMovingAverage( decay=decay, name='EMA_%f' % decay) for decay in params.ema_decays } ema_ops = [ema.apply() for ema in emas.values()] if mode == learn.ModeKeys.TRAIN: train_op = get_train_op( loss, learning_rate=params.learning_rate, clip_norm=params.clip_norm, post_ops=ema_ops) # TODO(seominjoon): Checking `Exists` is not the best way to do this. if params.restore_dir and not tf.gfile.Exists(params.output_dir): assert params.restore_scopes checkpoint_dir = params.restore_dir if params.restore_step: checkpoint_dir = os.path.join(params.restore_dir, 'model.ckpt-%d' % params.restore_step) restore_vars = [] for restore_scope in params.restore_scopes: restore_vars.extend( tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, restore_scope)) assignment_map = {var.op.name: var for var in restore_vars} tf.contrib.framework.init_from_checkpoint(checkpoint_dir, assignment_map) else: if params.restore_decay < 1.0: ema = emas[params.restore_decay] assign_ops = [] for var in tf.trainable_variables(): assign_op = tf.assign(var, ema.average(var)) assign_ops.append(assign_op) with tf.control_dependencies(assign_ops): for key, val in predictions.items(): predictions[key] = tf.identity(val) train_op = None return learn.ModelFnOps( mode=mode, predictions=predictions, loss=loss, train_op=train_op, eval_metric_ops=eval_metric_ops)
def model_fn(features, labels, mode, params): """Applies the model to the input features to produce predictions.""" if 'initializer' in params: initializer = configurable.Configurable.initialize( params['initializer']) tf.logging.info('Using %s initializer', params['initializer']) tf.get_variable_scope().set_initializer(initializer) else: tf.logging.info( 'Not setting a global initializer. TF defaults to Xavier.') model_instance = cls(mode=mode, config=params, dataset=dataset) predictions = model_instance(features) # pylint: disable=not-callable if labels: loss = model_instance.loss( predictions=predictions, targets=labels, multi_answer_loss=params['train_with_multi_answer_loss']) else: assert mode == MODE_KEYS.PREDICT loss = None # Always instantiate optimizer to (exponential moving averages at eval) optimizer = configurable.Configurable.load(params['optimizer']) optimizer_instance = optimizer(config=params['optimizer']) if mode == MODE_KEYS.TRAIN: train_op = optimizer_instance(loss, train_steps) else: train_op = None # Initialization if params['init_checkpoint']: # Checkpoint is from a different model latest_checkpoint = tf.train.latest_checkpoint(model_dir) # only init if there's no saved checkpoint. if not latest_checkpoint: misc_util.init_from_checkpoint( params['init_checkpoint'], params['fn']) else: tf.logging.info('Latest checkpoint %s exists. No init from %s.' % ( latest_checkpoint, params['init_checkpoint'])) tf.logging.info('mode: %s' % mode) tf.logging.info('params: %s' % params) if params['optimizer']['ema_decay'] != 1.0 and mode != MODE_KEYS.TRAIN: ema = optimizer_instance.exponential_moving_average trainable_vars, _, has_partition = misc_util.get_trainable_vars( exclude_pattern=params['optimizer']['nograd_var']) # Restored variables variable_map = ema.variables_to_restore(trainable_vars) if has_partition: # Update partition info _update_partition_info( variable_map, params['optimizer']['nograd_var']) saver = tf.train.Saver(variable_map) scaffold = tf.train.Scaffold(saver=saver) else: scaffold = None # Eval metrics eval_metric_ops = None if mode in [MODE_KEYS.TRAIN, MODE_KEYS.EVAL]: eval_metric_ops = model_instance.metrics( predictions=predictions, targets=labels) if use_estimator: return tf.estimator.EstimatorSpec( mode=mode, predictions=predictions, loss=loss, train_op=train_op, eval_metric_ops=eval_metric_ops, scaffold=scaffold) else: # Maintain backwards compatibility return contrib_learn.ModelFnOps( mode=mode, predictions=predictions, loss=loss, train_op=train_op, eval_metric_ops=eval_metric_ops, scaffold=scaffold)
def cnn_model(features, classes, mode): vocab_table = lookup.index_table_from_file(vocabulary_file=WORD_VOCAB_FILE, num_oov_buckets=1, default_value=-1) logger.info('mode={}'.format(mode)) text_tokens = features['text'] text_ids = vocab_table.lookup(text_tokens) text_ids_padded = tf.pad(text_ids, tf.constant([[0, 0], [0, MAX_DOCUMENT_LENGTH]])) text_ids_padded = tf.slice(text_ids_padded, [0, 0], [-1, MAX_DOCUMENT_LENGTH]) # Keeping track of l2 regularization loss (optional) l2_loss = tf.constant(0.0) # Embedding layer with tf.device('/cpu:0'), tf.name_scope("embedding"): embedding = tf.get_variable('embedding', [N_WORDS, EMBEDDING_SIZE]) embedding_inputs = tf.nn.embedding_lookup(embedding, text_ids_padded) embedded_chars_expanded = tf.expand_dims(embedding_inputs, -1) # Create a convolution + maxpool layer for each filter size pooled_outputs = [] for i, filter_size in enumerate(FILTER_SIZES): with tf.name_scope("conv-maxpool-%s" % filter_size): # Convolution Layer filter_shape = [filter_size, EMBEDDING_SIZE, 1, NUM_FILTERS] W = tf.Variable(tf.truncated_normal(filter_shape, stddev=0.1), name="W") b = tf.Variable(tf.constant(0.1, shape=[NUM_FILTERS]), name="b") conv = tf.nn.conv2d(embedded_chars_expanded, W, strides=[1, 1, 1, 1], padding="VALID", name="conv") # Apply nonlinearity h = tf.nn.relu(tf.nn.bias_add(conv, b), name="relu") # Maxpooling over the outputs pooled = tf.nn.max_pool( h, ksize=[1, MAX_DOCUMENT_LENGTH - filter_size + 1, 1, 1], strides=[1, 1, 1, 1], padding='VALID', name="pool") pooled_outputs.append(pooled) # Combine all the pooled features num_filters_total = NUM_FILTERS * len(FILTER_SIZES) h_pool = tf.concat(pooled_outputs, 3) h_pool_flat = tf.reshape(h_pool, [-1, num_filters_total]) # Add dropout with tf.name_scope("dropout"): h_drop = tf.nn.dropout(h_pool_flat, DROPOUT_KEEP_PROB) # Final (unnormalized) scores and predictions with tf.name_scope("output"): W = tf.get_variable("W", shape=[num_filters_total, NUM_CLASSES], initializer=tf.contrib.layers.xavier_initializer()) b = tf.Variable(tf.constant(0.1, shape=[NUM_CLASSES]), name="b") l2_loss += tf.nn.l2_loss(W) l2_loss += tf.nn.l2_loss(b) logits = tf.nn.xw_plus_b(h_drop, W, b, name="scores") predictions = tf.argmax(logits, 1, name="predictions") # Accuracy #with tf.name_scope("accuracy"): # correct_predictions = tf.equal(predictions, tf.argmax(target, 1)) # accuracy = tf.reduce_mean(tf.cast(correct_predictions, "float"), name="accuracy") predictions_dict = { 'source': tf.gather(TARGETS, tf.argmax(logits, 1)), 'class': tf.argmax(logits, 1), 'prob': tf.nn.softmax(logits) } if mode == tf.contrib.learn.ModeKeys.TRAIN or mode == tf.contrib.learn.ModeKeys.EVAL: loss = tf.losses.sparse_softmax_cross_entropy(classes, logits) train_op = tf.contrib.layers.optimize_loss(loss, tf.train.get_global_step(), optimizer='Adam', learning_rate=LEARNING_RATE) else: loss = None train_op = None return tflearn.ModelFnOps(mode=mode, predictions=predictions_dict, loss=loss, train_op=train_op)
def rnn_model(features, target, mode): table = lookup.index_table_from_file(vocabulary_file=str(WORD_VOCAB_FILE), num_oov_buckets=1, default_value=-1) print('features={}'.format(features)) # (?, 20) def my_func(x, target): # x will be a numpy array with the contents of the placeholder below for _x in zip(x,target): print(_x) return x #f = tf.py_func(my_func, [features["text"], target], tf.string) # string operations #titles = tf.squeeze(features['text'], [1]) #titles = tf.squeeze(f, [1]) # string operations #words = tf.string_split(titles) words = tf.string_split(features["text"]) #TODO: calc sequence_length #words = tf.Print(words, [words]) densewords = tf.sparse_tensor_to_dense(words, default_value=PADWORD) numbers = table.lookup(densewords) padding = tf.constant([[0,0],[0,MAX_DOCUMENT_LENGTH]]) padded = tf.pad(numbers, padding) sliced = tf.slice(padded, [0,0], [-1, MAX_DOCUMENT_LENGTH]) print('words_sliced={}'.format(words)) # (?, 20) # layer to take the words and convert them into vectors (embeddings) print(N_WORDS) embeds = tf.contrib.layers.embed_sequence(sliced, vocab_size=N_WORDS, embed_dim=EMBEDDING_SIZE) print('words_embed={}'.format(embeds)) # (?, 20, 10) n_classes = len(TARGETS) print(n_classes, TARGETS) with tf.variable_scope("lstm"): lstm_cell = tf.contrib.rnn.BasicLSTMCell(hidden_layer_size, forget_bias=1.0) outputs, states = tf.nn.dynamic_rnn(lstm_cell, embeds, dtype=tf.float32) last = outputs[:,-1,:]#tf.gather(outputs, int(outputs.get_shape()[0]) - 1) # =========== fc1bn = dense_batch_relu(last, (mode == "train"), "dense1") fc1bn_do = tf.contrib.layers.dropout(fc1bn, keep_prob=0.8) # Create a Gated Recurrent Unit cell with hidden size of EMBEDDING_SIZE. ##cell = tf.nn.rnn_cell.GRUCell(EMBEDDING_SIZE) # Create an unrolled Recurrent Neural Networks to length of # MAX_DOCUMENT_LENGTH and passes word_list as inputs for each # unit. ##_, encoding = tf.nn.rnn(cell, word_list, dtype=tf.float32) logits = tf.contrib.layers.fully_connected(fc1bn_do, n_classes, activation_fn=None) print('logits={}'.format(logits)) # (?, 3) predictions_dict = { 'author': tf.gather(TARGETS, tf.argmax(logits, 1)), 'class': tf.argmax(logits, 1), 'prob': tf.nn.softmax(logits) } if mode == tf.contrib.learn.ModeKeys.TRAIN or mode == tf.contrib.learn.ModeKeys.EVAL: loss = tf.losses.sparse_softmax_cross_entropy(target, logits) train_op = tf.contrib.layers.optimize_loss( loss, tf.contrib.framework.get_global_step(), optimizer='Adam', #optimizer='SGD', learning_rate=0.001) else: loss = None train_op = None return tflearn.ModelFnOps( mode=mode, predictions=predictions_dict, loss=loss, train_op=train_op)
def cnn_model(features, target, mode): table = lookup.index_table_from_file(vocabulary_file=str(WORD_VOCAB_FILE), num_oov_buckets=1, default_value=-1) def my_func(x, target): # x will be a numpy array with the contents of the placeholder below for _x in zip(x,target): print(_x) return x f = tf.py_func(my_func, [features["text"], target], tf.string) # string operations titles = tf.squeeze(features['text'], [1]) #titles = tf.squeeze(f, [1]) #features['text'] words = tf.string_split(titles) #words = tf.Print(words, [words]) densewords = tf.sparse_tensor_to_dense(words, default_value=PADWORD) numbers = table.lookup(densewords) padding = tf.constant([[0,0],[0,MAX_DOCUMENT_LENGTH]]) padded = tf.pad(numbers, padding) sliced = tf.slice(padded, [0,0], [-1, MAX_DOCUMENT_LENGTH]) print('words_sliced={}'.format(words)) # (?, 20) # layer to take the words and convert them into vectors (embeddings) print(N_WORDS) embeds = tf.contrib.layers.embed_sequence(sliced, vocab_size=N_WORDS, embed_dim=EMBEDDING_SIZE) print('words_embed={}'.format(embeds)) # (?, 20, 10) # now do convolution with tf.name_scope("convolution"): conv = tf.contrib.layers.conv2d(embeds, 1, WINDOW_SIZE, stride=STRIDE, padding='SAME') # (?, 4, 1) conv = tf.nn.relu(conv) # (?, 4, 1) words = tf.squeeze(conv, [2]) # (?, 4) print('words_conv={}'.format(words)) # (?, 4) n_classes = len(TARGETS) print(n_classes, TARGETS) fc1bn = dense_batch_relu(words, (mode == tf.contrib.learn.ModeKeys.TRAIN), "dense1") fc1bn_do = tf.contrib.layers.dropout(fc1bn, keep_prob=0.9) logits = tf.contrib.layers.fully_connected(fc1bn_do, n_classes, activation_fn=None) print('logits={}'.format(logits)) # (?, 3) predictions_dict = { 'author': tf.gather(TARGETS, tf.argmax(logits, 1)), 'class': tf.argmax(logits, 1), 'prob': tf.nn.softmax(logits) } if mode == tf.contrib.learn.ModeKeys.TRAIN or mode == tf.contrib.learn.ModeKeys.EVAL: loss = tf.losses.sparse_softmax_cross_entropy(target, logits) train_op = tf.contrib.layers.optimize_loss( loss, tf.contrib.framework.get_global_step(), optimizer='Adam', #optimizer='SGD', learning_rate=0.001) else: loss = None train_op = None return tflearn.ModelFnOps( mode=mode, predictions=predictions_dict, loss=loss, train_op=train_op)
def cnn_model(features, target, mode): """2 layer ConvNet to predict from sequence of words to a class.""" # make input features numeric from tensorflow.contrib import lookup table = lookup.index_table_from_file(vocabulary_file=WORD_VOCAB_FILE, num_oov_buckets=1, vocab_size=N_WORDS, default_value=-1, name="word_to_index") word_indexes = table.lookup(features['title']) word_vectors = tf.contrib.layers.embed_sequence(word_indexes, vocab_size=(N_WORDS + 1), embed_dim=EMBEDDING_SIZE, scope='words') word_vectors = tf.expand_dims(word_vectors, 3) # (1, embedding_size, 1) # one-hot encode the targets n_classes = len(TARGETS) #target = tf.one_hot(target, n_classes, 1, 0) #target = tf.squeeze(target, squeeze_dims=[1]) with tf.variable_scope('CNN_Layer1'): # Apply Convolution filtering on input sequence. conv1 = tf.contrib.layers.convolution2d(word_vectors, N_FILTERS, FILTER_SHAPE1, padding='VALID') # Add a RELU for non linearity. conv1 = tf.nn.relu(conv1) # Max pooling across output of Convolution+Relu. pool1 = tf.nn.max_pool(conv1, ksize=[1, POOLING_WINDOW, 1, 1], strides=[1, POOLING_STRIDE, 1, 1], padding='SAME') # Transpose matrix so that n_filters from convolution becomes width. pool1 = tf.transpose(pool1, [0, 1, 3, 2]) with tf.variable_scope('CNN_Layer2'): # Second level of convolution filtering. conv2 = tf.contrib.layers.convolution2d(pool1, N_FILTERS, FILTER_SHAPE2, padding='VALID') # Max across each filter to get useful features for classification. pool2 = tf.squeeze(tf.reduce_max(conv2, 1), squeeze_dims=[1]) # Apply regular WX + B and classification. logits = tf.contrib.layers.fully_connected(pool2, n_classes, activation_fn=None) predictions_dict = { 'source': tf.gather(TARGETS, tf.argmax(logits, 1)), 'class': tf.argmax(logits, 1), 'prob': tf.nn.softmax(logits) } if mode == tf.contrib.learn.ModeKeys.TRAIN or mode == tf.contrib.learn.ModeKeys.EVAL: loss = tf.losses.sparse_softmax_cross_entropy(target, logits) train_op = tf.contrib.layers.optimize_loss( loss, tf.contrib.framework.get_global_step(), optimizer='Adam', learning_rate=0.01) else: loss = None train_op = None return tflearn.ModelFnOps(mode=mode, predictions=predictions_dict, loss=loss, train_op=train_op)