def __init__(self, cfg): FormSelect.__init__(self) TFModel.__init__(self, scope_name='formselect-' + cfg.get('scope_suffix', '')) # load configuration self._sample = cfg.get('form_sample', False) self.randomize = cfg.get('randomize', True) self.emb_size = cfg.get('emb_size', 50) self.passes = cfg.get('passes', 200) self.alpha = cfg.get('alpha', 1) self.batch_size = cfg.get('batch_size', 1) self.max_sent_len = cfg.get('max_sent_len', 32) self.cell_type = cfg.get('cell_type', 'lstm') self.max_grad_norm = cfg.get('max_grad_norm', 100) self.optimizer_type = cfg.get('optimizer_type', 'adam') self.max_cores = cfg.get('max_cores', 4) self.alpha_decay = cfg.get('alpha_decay', 0.0) self.validation_freq = cfg.get('validation_freq', 1) self.min_passes = cfg.get('min_passes', self.passes / 2) self.vocab = {'<VOID>': self.VOID, '<GO>': self.GO, '<STOP>': self.STOP, '<UNK>': self.UNK} self.reverse_dict = {self.VOID: '<VOID>', self.GO: '<GO>', self.STOP: '<STOP>', self.UNK: '<UNK>'} self.vocab_size = None self._checkpoint_params = None self._checkpoint_settings = None np.random.seed(rnd.randint(0, 2**32 - 1)) tf.set_random_seed(rnd.randint(-sys.maxint, sys.maxint))
def _init_neural_network(self): """Create the neural network for classification, according to the self.nn_shape parameter (as set in configuration).""" # set TensorFlow random seed tf.set_random_seed(rnd.randint(-sys.maxint, sys.maxint)) self.targets = tf.placeholder(tf.float32, [None, self.num_outputs], name='targets') with tf.variable_scope(self.scope_name): # feedforward networks if self.nn_shape.startswith('ff'): self.inputs = tf.placeholder(tf.float32, [None] + self.input_shape, name='inputs') num_ff_layers = 2 if self.nn_shape[-1] in ['0', '1', '3', '4']: num_ff_layers = int(self.nn_shape[-1]) self.outputs = self._ff_layers('ff', num_ff_layers, self.inputs) # RNNs elif self.nn_shape.startswith('rnn'): self.initial_state = tf.placeholder(tf.float32, [None, self.emb_size]) self.inputs = [ tf.placeholder(tf.int32, [None], name=('enc_inp-%d' % i)) for i in xrange(self.input_shape[0]) ] self.cell = tf.nn.rnn_cell.BasicLSTMCell(self.emb_size) self.outputs = self._rnn('rnn', self.inputs) # the cost as computed by TF actually adds a "fake" sigmoid layer on top # (or is computed as if there were a sigmoid layer on top) self.cost = tf.reduce_mean( tf.reduce_sum( tf.nn.sigmoid_cross_entropy_with_logits(self.outputs, self.targets, name='CE'), 1)) # NB: this would have been the "true" cost function, if there were a "real" sigmoid layer on top. # However, it is not numerically stable in practice, so we have to use the TF function. # self.cost = tf.reduce_mean(tf.reduce_sum(self.targets * -tf.log(self.outputs) # + (1 - self.targets) * -tf.log(1 - self.outputs), 1)) self.optimizer = tf.train.AdamOptimizer(self.alpha) self.train_func = self.optimizer.minimize(self.cost) # initialize session session_config = None if self.max_cores: session_config = tf.ConfigProto( inter_op_parallelism_threads=self.max_cores, intra_op_parallelism_threads=self.max_cores) self.session = tf.Session(config=session_config) # this helps us load/save the model self.saver = tf.train.Saver(tf.all_variables())
def _init_neural_network(self): """Create the neural network for classification, according to the self.nn_shape parameter (as set in configuration).""" # set TensorFlow random seed tf.set_random_seed(rnd.randint(-sys.maxint, sys.maxint)) self.targets = tf.placeholder(tf.float32, [None, self.num_outputs], name='targets') with tf.variable_scope(self.scope_name): # feedforward networks if self.nn_shape.startswith('ff'): self.inputs = tf.placeholder(tf.float32, [None] + self.input_shape, name='inputs') num_ff_layers = 2 if self.nn_shape[-1] in ['0', '1', '3', '4']: num_ff_layers = int(self.nn_shape[-1]) self.outputs = self._ff_layers('ff', num_ff_layers, self.inputs) # RNNs elif self.nn_shape.startswith('rnn'): self.initial_state = tf.placeholder(tf.float32, [None, self.emb_size]) self.inputs = [tf.placeholder(tf.int32, [None], name=('enc_inp-%d' % i)) for i in xrange(self.input_shape[0])] self.cell = tf.contrib.rnn.BasicLSTMCell(self.emb_size) self.outputs = self._rnn('rnn', self.inputs) # the cost as computed by TF actually adds a "fake" sigmoid layer on top # (or is computed as if there were a sigmoid layer on top) self.cost = tf.reduce_mean(tf.reduce_sum( tf.nn.sigmoid_cross_entropy_with_logits(logits=self.outputs, labels=self.targets, name='CE'), 1)) # NB: this would have been the "true" cost function, if there were a "real" sigmoid layer on top. # However, it is not numerically stable in practice, so we have to use the TF function. # self.cost = tf.reduce_mean(tf.reduce_sum(self.targets * -tf.log(self.outputs) # + (1 - self.targets) * -tf.log(1 - self.outputs), 1)) self.optimizer = tf.train.AdamOptimizer(self.alpha) self.train_func = self.optimizer.minimize(self.cost) # Tensorboard summaries if self.train_summary_dir: self.loss_summary_reranker = tf.summary.scalar("loss_reranker", self.cost) self.train_summary_op = tf.summary.merge([self.loss_summary_reranker]) # initialize session session_config = None if self.max_cores: session_config = tf.ConfigProto(inter_op_parallelism_threads=self.max_cores, intra_op_parallelism_threads=self.max_cores) self.session = tf.Session(config=session_config) # this helps us load/save the model self.saver = tf.train.Saver(tf.global_variables()) if self.train_summary_dir: # Tensorboard summary writer self.train_summary_writer = tf.summary.FileWriter( os.path.join(self.train_summary_dir, "reranker"), self.session.graph)
def __init__(self, cfg): super(FrequencyFormSelect, self).__init__(cfg) self._sample = cfg.get('form_sample', False) self._word_freq = None np.random.seed(rnd.randint(0, 2**32 - 1))
def __init__(self, cfg): super(KenLMFormSelect, self).__init__(cfg) self._sample = cfg.get('form_sample', False) self._trained_model = None np.random.seed(rnd.randint(0, 2**32 - 1))
def _init_neural_network(self): """Initializing the NN (building a TensorFlow graph and initializing session).""" # set TensorFlow random seed tf.set_random_seed(rnd.randint(-sys.maxint, sys.maxint)) # create placeholders for input & output (always batch-size * 1, list of up to num. steps) self.enc_inputs = [] self.enc_inputs_drop = [] for i in xrange(self.max_da_len): enc_input = tf.placeholder(tf.int32, [None], name=('enc_inp-%d' % i)) self.enc_inputs.append(enc_input) if self.dropout_keep_prob < 1: enc_input_drop = tf.nn.dropout(enc_input, self.dropout_keep_prob, name=('enc_inp-drop-%d' % i)) self.enc_inputs_drop.append(enc_input_drop) self.dec_inputs = [] for i in xrange(self.max_tree_len): self.dec_inputs.append( tf.placeholder(tf.int32, [None], name=('dec_inp-%d' % i))) # targets are just decoder inputs shifted by one (+pad with one empty spot) self.targets = [ self.dec_inputs[i + 1] for i in xrange(len(self.dec_inputs) - 1) ] self.targets.append( tf.placeholder(tf.int32, [None], name=('target-pad'))) # prepare cells self.initial_state = tf.placeholder(tf.float32, [None, self.emb_size]) if self.cell_type.startswith('gru'): self.cell = rnn_cell.GRUCell(self.emb_size) else: self.cell = rnn_cell.BasicLSTMCell(self.emb_size) if self.cell_type.endswith('/2'): self.cell = rnn_cell.MultiRNNCell([self.cell] * 2) # build the actual LSTM Seq2Seq network (for training and decoding) with tf.variable_scope(self.scope_name) as scope: rnn_func = embedding_rnn_seq2seq if self.nn_type == 'emb_attention_seq2seq': rnn_func = embedding_attention_seq2seq elif self.nn_type == 'emb_attention2_seq2seq': rnn_func = partial(embedding_attention_seq2seq, num_heads=2) elif self.nn_type == 'emb_attention_seq2seq_context': rnn_func = embedding_attention_seq2seq_context elif self.nn_type == 'emb_attention2_seq2seq_context': rnn_func = partial(embedding_attention_seq2seq_context, num_heads=2) # for training: feed_previous == False, using dropout if available # outputs = batch_size * num_decoder_symbols ~ i.e. output logits at each steps # states = cell states at each steps self.outputs, self.states = rnn_func( self.enc_inputs_drop if self.enc_inputs_drop else self.enc_inputs, self.dec_inputs, self.cell, self.da_dict_size, self.tree_dict_size, scope=scope) scope.reuse_variables() # for decoding: feed_previous == True self.dec_outputs, self.dec_states = rnn_func(self.enc_inputs, self.dec_inputs, self.cell, self.da_dict_size, self.tree_dict_size, feed_previous=True, scope=scope) # TODO use output projection ??? # target weights # TODO change to actual weights, zero after the end of tree ??? self.cost_weights = [ tf.ones_like(trg, tf.float32, name='cost_weights') for trg in self.targets ] # cost self.tf_cost = sequence_loss(self.outputs, self.targets, self.cost_weights, self.tree_dict_size) self.dec_cost = sequence_loss(self.dec_outputs, self.targets, self.cost_weights, self.tree_dict_size) if self.use_dec_cost: self.cost = 0.5 * (self.tf_cost + self.dec_cost) else: self.cost = self.tf_cost self.learning_rate = tf.placeholder(tf.float32, name="learning_rate") # optimizer (default to Adam) if self.optimizer_type == 'sgd': self.optimizer = tf.train.GradientDescentOptimizer( self.learning_rate) if self.optimizer_type == 'adagrad': self.optimizer = tf.train.AdagradOptimizer(self.learning_rate) else: self.optimizer = tf.train.AdamOptimizer(self.learning_rate) self.train_func = self.optimizer.minimize(self.cost) # initialize session session_config = None if self.max_cores: session_config = tf.ConfigProto( inter_op_parallelism_threads=self.max_cores, intra_op_parallelism_threads=self.max_cores) self.session = tf.Session(config=session_config) # this helps us load/save the model self.saver = tf.train.Saver(tf.all_variables())
def _init_neural_network(self): """Initializing the NN (building a TensorFlow graph and initializing session).""" # set TensorFlow random seed tf.set_random_seed(rnd.randint(-sys.maxint, sys.maxint)) # create placeholders for input & output (always batch-size * 1, list of up to num. steps) self.enc_inputs = [] self.enc_inputs_drop = [] for i in xrange(self.max_da_len): enc_input = tf.placeholder(tf.int32, [None], name=('enc_inp-%d' % i)) self.enc_inputs.append(enc_input) if self.dropout_keep_prob < 1: enc_input_drop = tf.nn.dropout(enc_input, self.dropout_keep_prob, name=('enc_inp-drop-%d' % i)) self.enc_inputs_drop.append(enc_input_drop) self.dec_inputs = [] for i in xrange(self.max_tree_len): self.dec_inputs.append(tf.placeholder(tf.int32, [None], name=('dec_inp-%d' % i))) # targets are just decoder inputs shifted by one (+pad with one empty spot) self.targets = [self.dec_inputs[i + 1] for i in xrange(len(self.dec_inputs) - 1)] self.targets.append(tf.placeholder(tf.int32, [None], name=('target-pad'))) # prepare cells self.initial_state = tf.placeholder(tf.float32, [None, self.emb_size]) if self.cell_type.startswith('gru'): self.cell = rnn_cell.GRUCell(self.emb_size) else: self.cell = rnn_cell.BasicLSTMCell(self.emb_size) if self.cell_type.endswith('/2'): self.cell = rnn_cell.MultiRNNCell([self.cell] * 2) # build the actual LSTM Seq2Seq network (for training and decoding) with tf.variable_scope(self.scope_name) as scope: rnn_func = embedding_rnn_seq2seq if self.nn_type == 'emb_attention_seq2seq': rnn_func = embedding_attention_seq2seq elif self.nn_type == 'emb_attention2_seq2seq': rnn_func = partial(embedding_attention_seq2seq, num_heads=2) elif self.nn_type == 'emb_attention_seq2seq_context': rnn_func = embedding_attention_seq2seq_context elif self.nn_type == 'emb_attention2_seq2seq_context': rnn_func = partial(embedding_attention_seq2seq_context, num_heads=2) # for training: feed_previous == False, using dropout if available # outputs = batch_size * num_decoder_symbols ~ i.e. output logits at each steps # states = cell states at each steps self.outputs, self.states = rnn_func( self.enc_inputs_drop if self.enc_inputs_drop else self.enc_inputs, self.dec_inputs, self.cell, self.da_dict_size, self.tree_dict_size, scope=scope) scope.reuse_variables() # for decoding: feed_previous == True self.dec_outputs, self.dec_states = rnn_func( self.enc_inputs, self.dec_inputs, self.cell, self.da_dict_size, self.tree_dict_size, feed_previous=True, scope=scope) # TODO use output projection ??? # target weights # TODO change to actual weights, zero after the end of tree ??? self.cost_weights = [tf.ones_like(trg, tf.float32, name='cost_weights') for trg in self.targets] # cost self.tf_cost = sequence_loss(self.outputs, self.targets, self.cost_weights, self.tree_dict_size) self.dec_cost = sequence_loss(self.dec_outputs, self.targets, self.cost_weights, self.tree_dict_size) if self.use_dec_cost: self.cost = 0.5 * (self.tf_cost + self.dec_cost) else: self.cost = self.tf_cost self.learning_rate = tf.placeholder(tf.float32, name="learning_rate") # optimizer (default to Adam) if self.optimizer_type == 'sgd': self.optimizer = tf.train.GradientDescentOptimizer(self.learning_rate) if self.optimizer_type == 'adagrad': self.optimizer = tf.train.AdagradOptimizer(self.learning_rate) else: self.optimizer = tf.train.AdamOptimizer(self.learning_rate) self.train_func = self.optimizer.minimize(self.cost) # initialize session session_config = None if self.max_cores: session_config = tf.ConfigProto(inter_op_parallelism_threads=self.max_cores, intra_op_parallelism_threads=self.max_cores) self.session = tf.Session(config=session_config) # this helps us load/save the model self.saver = tf.train.Saver(tf.all_variables())
def __init__(self, cfg): super(KenLMFormSelect, self).__init__(cfg) import kenlm # needed only if KenLMFormSelect is used self._sample = cfg.get('form_sample', False) self._trained_model = None np.random.seed(rnd.randint(0, 2**32 - 1))
def _init_neural_network(self): """Create the neural network for classification, according to the self.nn_shape parameter (as set in configuration).""" # set TensorFlow random seed tf.set_random_seed(rnd.randint(-sys.maxsize, sys.maxsize)) self.targets = tf.placeholder(tf.float32, [None, self.num_outputs], name='targets') with tf.variable_scope(self.scope_name): # feedforward networks if self.nn_shape.startswith('ff'): self.inputs = tf.placeholder(tf.float32, [None] + self.input_shape, name='inputs') num_ff_layers = 2 if self.nn_shape[-1] in ['0', '1', '3', '4']: num_ff_layers = int(self.nn_shape[-1]) self.outputs = self._ff_layers('ff', num_ff_layers, self.inputs) # RNNs elif self.nn_shape.endswith('rnn'): self.initial_state = tf.placeholder(tf.float32, [None, self.emb_size]) self.inputs = [tf.placeholder(tf.int32, [None], name=('enc_inp-%d' % i)) for i in range(self.input_shape[0])] self.cell = tf.contrib.rnn.BasicLSTMCell(self.emb_size) self.outputs = self._rnn('rnn', self.inputs, bidi=self.nn_shape.startswith('bidi')) # older versions of the model put the optimizer into the default scope -- we want them in a separate scope # (to be able to swap rerankers with the same main generator), but want to keep loading older models # -> version setting decides where the variables will be created with tf.variable_scope(self.scope_name if self.version > 1 else tf.get_variable_scope()): # the cost as computed by TF actually adds a "fake" sigmoid layer on top # (or is computed as if there were a sigmoid layer on top) self.cost = tf.reduce_mean(tf.reduce_sum( tf.nn.sigmoid_cross_entropy_with_logits(logits=self.outputs, labels=self.targets, name='CE'), 1)) # NB: this would have been the "true" cost function, if there were a "real" sigmoid layer on top. # However, it is not numerically stable in practice, so we have to use the TF function. # self.cost = tf.reduce_mean(tf.reduce_sum(self.targets * -tf.log(self.outputs) # + (1 - self.targets) * -tf.log(1 - self.outputs), 1)) self.optimizer = tf.train.AdamOptimizer(self.alpha) self.train_func = self.optimizer.minimize(self.cost) # Tensorboard summaries if self.train_summary_dir: self.loss_summary_reranker = tf.summary.scalar("loss_reranker", self.cost) self.train_summary_op = tf.summary.merge([self.loss_summary_reranker]) # initialize session session_config = None if self.max_cores: session_config = tf.ConfigProto(inter_op_parallelism_threads=self.max_cores, intra_op_parallelism_threads=self.max_cores) self.session = tf.Session(config=session_config) # this helps us load/save the model self.saver = tf.train.Saver(tf.global_variables()) if self.train_summary_dir: # Tensorboard summary writer self.train_summary_writer = tf.summary.FileWriter( os.path.join(self.train_summary_dir, "reranker"), self.session.graph)