def calculate_outputs(self, x): h = lstm_layer(x, self.history_length, self.lstm_size) c = wavenet(x, self.dilations, self.filter_widths, self.skip_channels, self.residual_channels) h = tf.concat([h, c, x], axis=2) self.h_final = time_distributed_dense_layer(h, 50, activation=tf.nn.relu, scope='dense-1') y_hat = time_distributed_dense_layer(self.h_final, 1, activation=tf.nn.sigmoid, scope='dense-2') y_hat = tf.squeeze(y_hat, 2) final_temporal_idx = tf.stack([ tf.range(tf.shape(self.history_length)[0]), self.history_length - 1 ], axis=1) self.final_states = tf.gather_nd(self.h_final, final_temporal_idx) self.final_predictions = tf.gather_nd(y_hat, final_temporal_idx) self.prediction_tensors = { 'user_ids': self.user_id, 'product_ids': self.product_id, 'final_states': self.final_states, 'predictions': self.final_predictions } return y_hat
def calculate_outputs(self, x): h = lstm_layer(x, self.history_length, self.lstm_size, scope='lstm-1') h = tf.concat([h, x], axis=2) h_final = time_distributed_dense_layer(h, 50, activation=tf.nn.relu, scope='dense-1') n_components = 1 params = time_distributed_dense_layer(h_final, n_components*2, scope='dense-2', activation=None) ps, mixing_coefs = tf.split(params, 2, axis=2) # this is implemented incorrectly, but it still helped... mixing_coefs = tf.nn.softmax(mixing_coefs - tf.reduce_min(mixing_coefs, 2, keep_dims=True)) ps = tf.nn.sigmoid(ps) labels = tf.tile(tf.expand_dims(self.next_is_ordered, 2), (1, 1, n_components)) losses = tf.reduce_sum(mixing_coefs*log_loss(labels, ps), axis=2) sequence_mask = tf.cast(tf.sequence_mask(self.history_length, maxlen=100), tf.float32) avg_loss = tf.reduce_sum(losses*sequence_mask) / tf.cast(tf.reduce_sum(self.history_length), tf.float32) final_temporal_idx = tf.stack([tf.range(tf.shape(self.history_length)[0]), self.history_length - 1], axis=1) self.final_states = tf.gather_nd(h_final, final_temporal_idx) self.prediction_tensors = { 'user_ids': self.user_id, 'product_ids': self.product_id, 'final_states': self.final_states } return avg_loss
def wavenet_logits_target(self): x = self.get_inputs(self.opens_, self.highs_, self.lows_, self.closes_, self.volumes_, self.positions_, self.order_prices_, self.current_prices_, self.time_since_, self.todays_) inputs, w, b = temporal_convolution_layer(inputs=x, output_units=8, convolution_width=1, scope='target-CNN-1x1') self.w_target["wcnn1"] = w self.w_target["bcnn1"] = b outputs = lstm_layer(inputs, self.lengths, self.lstm_size, scope="series-lstm-target") h, w, b = time_distributed_dense_layer(outputs, 128, scope='target-dense-encode-1', activation=tf.nn.relu, reuse=tf.AUTO_REUSE) self.w_target["wtf1"] = w self.w_target["btf1"] = b out, w, b = time_distributed_dense_layer(h, 32, scope='target-dense-encode-2', activation=tf.nn.relu, reuse=tf.AUTO_REUSE) self.w_target["wtf2"] = w self.w_target["btf2"] = b shape = out.get_shape().as_list() out_flat = tf.reshape(out, [tf.shape(out)[0], 1, shape[1] * shape[2]]) out, state = stateful_lstm(out_flat, self.num_lstm_layers, self.lstm_size, tuple([self.lstm_state_target]), scope_name="lstm_target") self.state_output_target_c = state[0][0] self.state_output_target_h = state[0][1] shape = out.get_shape().as_list() out = tf.reshape(out, [tf.shape(out)[0], shape[2]]) out, w, b = fully_connected_layer(out, self.n_actions, scope_name='target-dense-encode-2', activation=None) self.w_target["wout"] = w self.w_target["bout"] = b self.q_target_out = out self.q_target_action = tf.argmax(self.q_target_out, axis=1)
def calculate_outputs(self, x): # lstm h = lstm_layer(x, self.history_length, self.lstm_size, scope='lstm-1') # cnn c = time_distributed_dense_layer(x, self.lstm_size, activation=tf.nn.relu, scope='dense-1') for i in range(6): c_i = temporal_convolution_layer( inputs=c, output_units=self.lstm_size, convolution_width=2, activation=tf.nn.relu, causal=True, dilation_rate=[2**i], scope='cnn-exp-{}'.format(i) ) c += c_i h = tf.concat([h, c, x], axis=2) self.h_final = time_distributed_dense_layer(h, 50, activation=tf.nn.relu, scope='dense-2') y_hat = time_distributed_dense_layer(self.h_final, 1, activation=tf.nn.sigmoid, scope='dense-3') y_hat = tf.squeeze(y_hat, 2) final_temporal_idx = tf.stack([tf.range(tf.shape(self.history_length)[0]), self.history_length - 1], axis=1) self.final_states = tf.gather_nd(self.h_final, final_temporal_idx) self.final_predictions = tf.gather_nd(y_hat, final_temporal_idx) self.prediction_tensors = { 'user_ids': self.user_id, 'product_ids': self.product_id, 'final_states': self.final_states, 'predictions': self.final_predictions } return y_hat
def calculate_outputs(self, x): h = lstm_layer(x, self.history_length, self.lstm_size, scope='lstm1') h = tf.concat([h, x], axis=2) self.h_final = time_distributed_dense_layer(h, 50, activation=tf.nn.relu, scope='dense1') y_hat = tf.squeeze( time_distributed_dense_layer(self.h_final, 1, activation=tf.nn.sigmoid, scope='dense2'), 2) final_temporal_idx = tf.stack([ tf.range(tf.shape(self.history_length)[0]), self.history_length - 1 ], axis=1) self.final_states = tf.gather_nd(self.h_final, final_temporal_idx) self.final_predictions = tf.gather_nd(y_hat, final_temporal_idx) self.prediction_tensors = { 'user_ids': self.user_id, 'aisle_ids': self.aisle_id, 'final_states': self.final_states, 'predictions': self.final_predictions } return y_hat
def calculate_outputs(self, x): h = lstm_layer(self.x, self.history_length, self.lstm_size, scope='lstm1') self.h_final = time_distributed_dense_layer(h, 50, activation=tf.nn.relu, scope='dense0') n_components = 3 params = time_distributed_dense_layer(self.h_final, n_components * 3, scope='dense1') means, variances, mixing_coefs = tf.split(params, 3, axis=2) mixing_coefs = tf.nn.softmax( mixing_coefs - tf.reduce_min(mixing_coefs, 2, keep_dims=True)) variances = tf.exp(variances) + 1e-5 labels = tf.cast( tf.tile(tf.expand_dims(self.next_reorder_size, 2), (1, 1, n_components)), tf.float32) n_likelihoods = 1.0 / (tf.sqrt(2 * np.pi * variances)) * tf.exp( -tf.square(labels - means) / (2 * variances)) log_likelihood = -tf.log( tf.reduce_sum(mixing_coefs * n_likelihoods, axis=2) + 1e-10) self.means = means self.variances = variances self.mixing_coefs = mixing_coefs self.nll = log_likelihood samples = tf.cast(tf.reshape(tf.range(25), (1, 1, 1, 25)), tf.float32) means = tf.tile(tf.expand_dims(means, 3), (1, 1, 1, 25)) variances = tf.tile(tf.expand_dims(variances, 3), (1, 1, 1, 25)) mixing_coefs = tf.tile(tf.expand_dims(mixing_coefs, 3), (1, 1, 1, 25)) sample_n_likelihoods = 1.0 / (tf.sqrt(2 * np.pi * variances)) * tf.exp( -tf.square(samples - means) / (2 * variances)) self.sample_log_likelihoods = tf.reduce_sum(mixing_coefs * sample_n_likelihoods, axis=2) final_temporal_idx = tf.stack([ tf.range(tf.shape(self.history_length)[0]), self.history_length - 1 ], axis=1) self.final_states = tf.gather_nd(self.h_final, final_temporal_idx) self.prediction_tensors = { 'user_ids': self.user_id, 'final_states': self.final_states, 'predictions': self.sample_log_likelihoods } return self.nll
def calculate_outputs(self, x): h = lstm_layer(x, self.history_length, self.lstm_size, scope='lstm-1') h_final = time_distributed_dense_layer(h, 50, activation=tf.nn.relu, scope='dense-1') n_components = 3 params = time_distributed_dense_layer(h_final, n_components * 3, scope='dense-2') means, variances, mixing_coefs = tf.split(params, 3, axis=2) mixing_coefs = tf.nn.softmax( mixing_coefs - tf.reduce_min(mixing_coefs, 2, keep_dims=True)) variances = tf.exp(variances) + 1e-5 labels = tf.cast( tf.tile(tf.expand_dims(self.next_reorder_size, 2), (1, 1, n_components)), tf.float32) n_likelihoods = 1.0 / (tf.sqrt(2 * np.pi * variances)) * tf.exp( -tf.square(labels - means) / (2 * variances)) nlls = -tf.log( tf.reduce_sum(mixing_coefs * n_likelihoods, axis=2) + 1e-10) sequence_mask = tf.cast( tf.sequence_mask(self.history_length, maxlen=100), tf.float32) nll = tf.reduce_sum(nlls * sequence_mask) / tf.cast( tf.reduce_sum(self.history_length), tf.float32) # evaluate likelihood at a sample of discrete points samples = tf.cast(tf.reshape(tf.range(25), (1, 1, 1, 25)), tf.float32) means = tf.tile(tf.expand_dims(means, 3), (1, 1, 1, 25)) variances = tf.tile(tf.expand_dims(variances, 3), (1, 1, 1, 25)) mixing_coefs = tf.tile(tf.expand_dims(mixing_coefs, 3), (1, 1, 1, 25)) n_sample_likelihoods = 1.0 / (tf.sqrt(2 * np.pi * variances)) * tf.exp( -tf.square(samples - means) / (2 * variances)) sample_nlls = -tf.log( tf.reduce_sum(mixing_coefs * n_sample_likelihoods, axis=2) + 1e-10) final_temporal_idx = tf.stack([ tf.range(tf.shape(self.history_length)[0]), self.history_length - 1 ], axis=1) final_states = tf.gather_nd(h_final, final_temporal_idx) final_sample_nlls = tf.gather_nd(sample_nlls, final_temporal_idx) self.final_states = tf.concat([final_states, final_sample_nlls], axis=1) self.prediction_tensors = { 'user_ids': self.user_id, 'final_states': self.final_states } return nll
def initialize_decode_params(self, x, features): # x shape (batch_size, seq_len , 1) # features (batch_size, num_decode_step, 79) # after concat, x shape is (batch_size, num_decode_steps, 80) x = tf.concat([x, features], axis=2) # shape (batch_size, num_decode_step, residual_channels) inputs = time_distributed_dense_layer( inputs=x, output_units=self.residual_channels, activation=tf.nn.tanh, scope='x-proj-decode') skip_outputs = [] conv_inputs = [inputs] for i, (dilation, filter_width) in enumerate( zip(self.dilations, self.filter_widths)): # convolution with dilation dilated_conv = temporal_convolution_layer( inputs=inputs, output_units=2 * self.residual_channels, convolution_width=filter_width, causal=True, dilation_rate=[dilation], scope='dilated-conv-decode-{}'.format(i)) # split into filter and gate conv_filter, conv_gate = tf.split(dilated_conv, 2, axis=2) # combine by multiplying dilated_conv = tf.nn.tanh(conv_filter) * tf.nn.sigmoid(conv_gate) # change shape from (batch_size, num_decode_step, residual_channel) to # (batch_size, num_decode_step, residual_channel + skip_channel) outputs = time_distributed_dense_layer( inputs=dilated_conv, output_units=self.skip_channels + self.residual_channels, scope='dilated-conv-proj-decode-{}'.format(i)) # split # skips shape (batch_size, num_decode_step, skip_channels) # residual shape (batch_size, num_decode_step, residual_channels) skips, residuals = tf.split( outputs, [self.skip_channels, self.residual_channels], axis=2) inputs += residuals conv_inputs.append(inputs) skip_outputs.append(skips) # Turn skip_outputs into y_hat skip_outputs = tf.nn.relu(tf.concat(skip_outputs, axis=2)) h = time_distributed_dense_layer(skip_outputs, 128, scope='dense-decode-1', activation=tf.nn.relu) y_hat = time_distributed_dense_layer(h, 1, scope='dense-decode-2') return y_hat
def encode(self, x, features): # shape (batch_size, seq_len, 1 + 17 = 18 ) x = tf.concat([x, features], axis=2) # Use tf.einsum to change shape (batch_size, seq_len, 18) to (batch_size, seq_len, residual_channels) inputs = time_distributed_dense_layer( inputs=x, output_units=self.residual_channels, activation=tf.nn.tanh, scope='x-proj-encode') # Use for encoding result skip_outputs = [] # Convolution results based on inputs conv_inputs = [inputs] for i, (dilation, filter_width) in enumerate( zip(self.dilations, self.filter_widths)): # convolution with dilation dilated_conv = temporal_convolution_layer( inputs=inputs, output_units=2 * self.residual_channels, convolution_width=filter_width, causal=True, dilation_rate=[dilation], scope='dilated-conv-encode-{}'.format(i)) # split dilated conv into filter and gate, and combine them by multiplying conv_filter, conv_gate = tf.split(dilated_conv, 2, axis=2) dilated_conv = tf.nn.tanh(conv_filter) * tf.nn.sigmoid(conv_gate) # change dilated_conv the shape(batch_size, seq_len, residual_channels) to # (batch_size, seq_len, residual_channels + skip_channels) outputs = time_distributed_dense_layer( inputs=dilated_conv, output_units=self.skip_channels + self.residual_channels, scope='dilated-conv-proj-encode-{}'.format(i)) # split into skips and residuals skips, residuals = tf.split( outputs, [self.skip_channels, self.residual_channels], axis=2) inputs += residuals conv_inputs.append(inputs) skip_outputs.append(skips) # skip_outputs shape (batch_size, seq_len, 32*24=768) -> (batch_size, seq_len, 1) skip_outputs = tf.nn.relu(tf.concat(skip_outputs, axis=2)) h = time_distributed_dense_layer(skip_outputs, 128, scope='dense-encode-1', activation=tf.nn.relu) y_hat = time_distributed_dense_layer(h, 1, scope='dense-encode-2') # conv_inputs shape(batch_size, seq_len, residual_channel) * 25 return y_hat, conv_inputs[:-1]
def encode(self, x, features): """ Encode :param x: time series values :param features: extra features :return: """ x = tf.concat([x, features], axis=2) # output from time distributed dense layer, use as the input to convolution layer inputs = time_distributed_dense_layer( inputs=x, output_units=self.residual_channels, activation=tf.nn.tanh, scope='x-proj-encode' ) skip_outputs = [] conv_inputs = [inputs] # stack multiple convolutions for i, (dilation, filter_width) in enumerate(zip(self.dilations, self.filter_widths)): dilated_conv = temporal_convolution_layer( inputs=inputs, output_units=2*self.residual_channels, # double the convolution channels convolution_width=filter_width, causal=True, dilation_rate=[dilation], scope='dilated-conv-encode-{}'.format(i) ) # gated activation units based on output from dilated convolutions conv_filter, conv_gate = tf.split(dilated_conv, 2, axis=2) dilated_conv = tf.nn.tanh(conv_filter)*tf.nn.sigmoid(conv_gate) outputs = time_distributed_dense_layer( inputs=dilated_conv, output_units=self.skip_channels + self.residual_channels, scope='dilated-conv-proj-encode-{}'.format(i) ) skips, residuals = tf.split(outputs, [self.skip_channels, self.residual_channels], axis=2) inputs += residuals conv_inputs.append(inputs) skip_outputs.append(skips) # skip connections from each layer to the final dense layer skip_outputs = tf.nn.relu(tf.concat(skip_outputs, axis=2)) h = time_distributed_dense_layer(skip_outputs, 128, scope='dense-encode-1', activation=tf.nn.relu) y_hat = time_distributed_dense_layer(h, 1, scope='dense-encode-2') return y_hat, conv_inputs[:-1]
def encode(self, x, features): """ 返回值: y_hat:skip(每次残差) concat后全连接成输出的预测值 conv_inputs=[inputs] :每层残差与输入的和 组成的数组(去除最后一层) :param x: log_x_encode 销量的对数 :param features: 需要encoding的其他特征 :return: """ # batch,seq,1+17 x = tf.concat([x, features], axis=2) inputs = time_distributed_dense_layer( inputs=x, output_units=self.residual_channels, activation=tf.nn.tanh, scope='x-proj-encode' ) # 保存每一步的skip skip_outputs = [] # 保存每一步的残差 conv_inputs = [inputs] for i, (dilation, filter_width) in enumerate(zip(self.dilations, self.filter_widths)): dilated_conv = temporal_convolution_layer( inputs=inputs, output_units=2 * self.residual_channels, convolution_width=filter_width, causal=True, dilation_rate=[dilation], scope='dilated-conv-encode-{}'.format(i) ) conv_filter, conv_gate = tf.split(dilated_conv, 2, axis=2) dilated_conv = tf.nn.tanh(conv_filter) * tf.nn.sigmoid(conv_gate) outputs = time_distributed_dense_layer( inputs=dilated_conv, output_units=self.skip_channels + self.residual_channels, scope='dilated-conv-proj-encode-{}'.format(i) ) skips, residuals = tf.split(outputs, [self.skip_channels, self.residual_channels], axis=2) # 残差网累加作为下一层输入 inputs += residuals conv_inputs.append(inputs) # skip 合并 skip_outputs.append(skips) # skip 合并 skip_outputs = tf.nn.relu(tf.concat(skip_outputs, axis=2)) h = time_distributed_dense_layer(skip_outputs, 128, scope='dense-encode-1', activation=tf.nn.relu) y_hat = time_distributed_dense_layer(h, 1, scope='dense-encode-2') return y_hat, conv_inputs[:-1]
def encode(self, x, features): x = tf.concat([x, features], axis=2) inputs = time_distributed_dense_layer( inputs=x, output_units=self.residual_channels, activation=tf.nn.tanh, scope='x-proj-encode', reuse=tf.AUTO_REUSE) skip_outputs = [] conv_inputs = [inputs] for i, (dilation, filter_width) in enumerate( zip(self.dilations, self.filter_widths)): dilated_conv = temporal_convolution_layer( inputs=inputs, output_units=2 * self.residual_channels, convolution_width=filter_width, causal=True, dilation_rate=[dilation], scope='dilated-conv-encode-{}'.format(i), reuse=tf.AUTO_REUSE) conv_filter, conv_gate = tf.split(dilated_conv, 2, axis=2) dilated_conv = tf.nn.tanh(conv_filter) * tf.nn.sigmoid(conv_gate) outputs = time_distributed_dense_layer( inputs=dilated_conv, output_units=self.skip_channels + self.residual_channels, scope='dilated-conv-proj-encode-{}'.format(i), reuse=tf.AUTO_REUSE) skips, residuals = tf.split( outputs, [self.skip_channels, self.residual_channels], axis=2) inputs += residuals conv_inputs.append(inputs) skip_outputs.append(skips) skip_outputs = tf.nn.relu(tf.concat(skip_outputs, axis=2)) h = time_distributed_dense_layer(skip_outputs, 128, scope='dense-encode-1', activation=tf.nn.relu, reuse=tf.AUTO_REUSE) y_hat = time_distributed_dense_layer(h, 1, scope='dense-encode-2', reuse=tf.AUTO_REUSE) return y_hat, conv_inputs[:-1]
def calculate_outputs(self, x): h = lstm_layer(x, self.history_length, self.lstm_size, scope='lstm-1') h_final = time_distributed_dense_layer(h, 50, activation=tf.nn.relu, scope='dense-1') y_hat = tf.squeeze(time_distributed_dense_layer(h_final, 1, scope='dense2'), 2) final_temporal_idx = tf.stack([tf.range(tf.shape(self.history_length)[0]), self.history_length - 1], axis=1) self.final_states = tf.gather_nd(h_final, final_temporal_idx) self.final_predictions = tf.gather_nd(y_hat, final_temporal_idx) self.prediction_tensors = { 'user_ids': self.user_id, 'final_states': self.final_states, 'predictions': self.final_predictions } return y_hat
def initialize_decode_params(self, x, features): x = tf.concat([x, features], axis=2) h = time_distributed_dense_layer( inputs=x, output_units=self.residual_channels, activation=tf.nn.tanh, scope='h-init-decode', ) c = time_distributed_dense_layer( inputs=x, output_units=self.residual_channels, activation=tf.nn.tanh, scope='c-init-decode', ) skip_outputs = [] conv_inputs = [h] for i, (dilation, filter_width) in enumerate( zip(self.dilations, self.filter_widths)): dilated_conv = temporal_convolution_layer( inputs=h, output_units=4 * self.residual_channels, convolution_width=filter_width, causal=True, dilation_rate=[dilation], scope='dilated-conv-decode-{}'.format(i), ) input_gate, conv_filter, conv_gate, emit_gate = tf.split( dilated_conv, 4, axis=2) c = tf.nn.sigmoid(input_gate) * c + tf.nn.tanh( conv_filter) * tf.nn.sigmoid(conv_gate) h = tf.nn.sigmoid(emit_gate) * tf.nn.tanh(c) skip_outputs.append(h) conv_inputs.append(h) skip_outputs = tf.concat(skip_outputs, axis=2) h = time_distributed_dense_layer(skip_outputs, 128, scope='dense-decode-1', activation=tf.nn.relu) y_hat = time_distributed_dense_layer(h, 2, scope='dense-decode-2') return y_hat
def calculate_outputs(self, x): h = lstm_layer(x, self.history_length, self.lstm_size, scope='lstm-1') h = tf.concat([h, x], axis=2) h_final = time_distributed_dense_layer(h, 50, activation=tf.nn.relu, scope='dense-1') n_components = 1 params = time_distributed_dense_layer(h_final, n_components * 2, scope='dense-2', activation=None) ps, mixing_coefs = tf.split(params, 2, axis=2) # this is implemented incorrectly, but it still helped... mixing_coefs = tf.nn.softmax( mixing_coefs - tf.reduce_min(mixing_coefs, 2, keep_dims=True)) ps = tf.nn.sigmoid(ps) labels = tf.tile(tf.expand_dims(self.next_is_ordered, 2), (1, 1, n_components)) losses = tf.reduce_sum(mixing_coefs * log_loss(labels, ps), axis=2) sequence_mask = tf.cast( tf.sequence_mask(self.history_length, maxlen=100), tf.float32) avg_loss = tf.reduce_sum(losses * sequence_mask) / tf.cast( tf.reduce_sum(self.history_length), tf.float32) final_temporal_idx = tf.stack([ tf.range(tf.shape(self.history_length)[0]), self.history_length - 1 ], axis=1) self.final_states = tf.gather_nd(h_final, final_temporal_idx) self.prediction_tensors = { 'user_ids': self.user_id, 'product_ids': self.product_id, 'final_states': self.final_states } return avg_loss
def calculate_outputs(self, x): h = lstm_layer(x, self.history_length, self.lstm_size) c = wavenet(x, self.dilations, self.filter_widths, self.skip_channels, self.residual_channels) h = tf.concat([h, c, x], axis=2) self.h_final = time_distributed_dense_layer(h, 50, activation=tf.nn.relu, scope='dense-1') y_hat = time_distributed_dense_layer(self.h_final, 1, activation=tf.nn.sigmoid, scope='dense-2') y_hat = tf.squeeze(y_hat, 2) final_temporal_idx = tf.stack([tf.range(tf.shape(self.history_length)[0]), self.history_length - 1], axis=1) self.final_states = tf.gather_nd(self.h_final, final_temporal_idx) self.final_predictions = tf.gather_nd(y_hat, final_temporal_idx) self.prediction_tensors = { 'user_ids': self.user_id, 'product_ids': self.product_id, 'final_states': self.final_states, 'predictions': self.final_predictions } return y_hat
def calculate_loss(self): self.x = tf.placeholder(tf.float32, [None, None, 3]) self.y = tf.placeholder(tf.float32, [None, None, 3]) self.x_len = tf.placeholder(tf.int32, [None]) self.c = tf.placeholder(tf.int32, [None, None]) self.c_len = tf.placeholder(tf.int32, [None]) self.sample_tsteps = tf.placeholder(tf.int32, []) self.num_samples = tf.placeholder(tf.int32, []) self.prime = tf.placeholder(tf.bool, []) self.x_prime = tf.placeholder(tf.float32, [None, None, 3]) self.x_prime_len = tf.placeholder(tf.int32, [None]) self.bias = tf.placeholder_with_default( tf.zeros([self.num_samples], dtype=tf.float32), [None]) cell = LSTMAttentionCell( lstm_size=self.lstm_size, num_attn_mixture_components=self.attention_mixture_components, """ Use attention model on alphabet """ attention_values=tf.one_hot(self.c, len(drawing.alphabet)), attention_values_lengths=self.c_len, num_output_mixture_components=self.output_mixture_components, bias=self.bias ) self.initial_state = cell.zero_state(tf.shape(self.x)[0], dtype=tf.float32) outputs, self.final_state = tf.nn.dynamic_rnn( inputs=self.x, cell=cell, sequence_length=self.x_len, dtype=tf.float32, initial_state=self.initial_state, scope='rnn' ) """ use time distrubuted layer for store time singly """ params = time_distributed_dense_layer(outputs, self.output_units, scope='rnn/gmm') pis, mus, sigmas, rhos, es = self.parse_parameters(params) sequence_loss, self.loss = self.NLL(self.y, self.x_len, pis, mus, sigmas, rhos, es) self.sampled_sequence = tf.cond( self.prime, lambda: self.primed_sample(cell), lambda: self.sample(cell) ) return self.loss
def encode(self, x, features): x = tf.concat([x, features], axis=2) h = time_distributed_dense_layer( inputs=x, output_units=self.residual_channels, activation=tf.nn.tanh, scope='x-init', ) c = time_distributed_dense_layer( inputs=x, output_units=self.residual_channels, activation=tf.nn.tanh, scope='c-init', ) conv_inputs = [h] for i, (dilation, filter_width) in enumerate( list(zip(self.dilations, self.filter_widths))[:-1]): dilated_conv = temporal_convolution_layer( inputs=h, output_units=4 * self.residual_channels, convolution_width=filter_width, causal=True, dilation_rate=[dilation], scope='dilated-conv-encode-{}'.format(i), ) input_gate, conv_filter, conv_gate, emit_gate = tf.split( dilated_conv, 4, axis=2) c = tf.nn.sigmoid(input_gate) * c + tf.nn.tanh( conv_filter) * tf.nn.sigmoid(conv_gate) h = tf.nn.sigmoid(emit_gate) * tf.nn.tanh(c) conv_inputs.append(h) return conv_inputs
def calculate_loss(self): self.x = tf.placeholder(tf.float32, [None, None, 3]) self.y = tf.placeholder(tf.float32, [None, None, 3]) self.x_len = tf.placeholder(tf.int32, [None]) self.c = tf.placeholder(tf.int32, [None, None]) self.c_len = tf.placeholder(tf.int32, [None]) self.sample_tsteps = tf.placeholder(tf.int32, []) self.num_samples = tf.placeholder(tf.int32, []) self.prime = tf.placeholder(tf.bool, []) self.x_prime = tf.placeholder(tf.float32, [None, None, 3]) self.x_prime_len = tf.placeholder(tf.int32, [None]) self.bias = tf.placeholder_with_default( tf.zeros([self.num_samples], dtype=tf.float32), [None]) cell = LSTMAttentionCell( lstm_size=self.lstm_size, num_attn_mixture_components=self.attention_mixture_components, attention_values=tf.one_hot(self.c, len(drawing.alphabet)), attention_values_lengths=self.c_len, num_output_mixture_components=self.output_mixture_components, bias=self.bias ) self.initial_state = cell.zero_state(tf.shape(self.x)[0], dtype=tf.float32) outputs, self.final_state = tf.nn.dynamic_rnn( inputs=self.x, cell=cell, sequence_length=self.x_len, dtype=tf.float32, initial_state=self.initial_state, scope='rnn' ) params = time_distributed_dense_layer(outputs, self.output_units, scope='rnn/gmm') pis, mus, sigmas, rhos, es = self.parse_parameters(params) sequence_loss, self.loss = self.NLL(self.y, self.x_len, pis, mus, sigmas, rhos, es) self.sampled_sequence = tf.cond( self.prime, lambda: self.primed_sample(cell), lambda: self.sample(cell) ) return self.loss
def encode(self, x, features): """ Parameters ---------- x: sequence input shape = [batch_size, seq_len] features: features shape = [batch_size, seq_len, num_features] Returns ------- y_hat: Tensor projected skip outputs (thought vector) shape = [batch_size, seq_len, 1] conv_inputs: [Tensor] outputs of convolution length = len(dilations) each element has shape [batch_size, seq_len, residual_channels] """ # x.shape = [batch_size, seq_len, num_features + 1] x = tf.concat([x, features], axis=2) """ Pass initial inputs through dense layer so that they'll have the same shape as the residuals. We're expanding the number of "channels". inputs.shape = [batch_size, seq_len, residual_channels] """ inputs = time_distributed_dense_layer( inputs=x, output_units=self.residual_channels, activation=tf.nn.tanh, scope='x-proj-encode') skip_outputs = [] conv_inputs = [inputs] for i, (dilation, filter_width) in enumerate( zip(self.dilations, self.filter_widths)): # dilated_conv.shape = [batch_size, seq_len, 2*residual_channels] dilated_conv = temporal_convolution_layer( inputs=inputs, output_units=2 * self.residual_channels, convolution_width=filter_width, causal=True, dilation_rate=[dilation], scope='dilated-conv-encode-{}'.format(i)) # dilated_conv.shape = [batch_size, seq_len, residual_channels] conv_filter, conv_gate = tf.split(dilated_conv, 2, axis=2) dilated_conv = tf.nn.tanh(conv_filter) * tf.nn.sigmoid(conv_gate) # Pass dilated_conv through dense layer to expand the number of channels. # outputs.shape = [batch_size, seq_len, skip_channels + residual_channels] outputs = time_distributed_dense_layer( inputs=dilated_conv, output_units=self.skip_channels + self.residual_channels, scope='dilated-conv-proj-encode-{}'.format(i)) # skips.shape = [batch_size, seq_len, skip_channels] # residuals.shape = [batch_size, seq_len, residual_channels] skips, residuals = tf.split( outputs, [self.skip_channels, self.residual_channels], axis=2) inputs += residuals conv_inputs.append(inputs) skip_outputs.append(skips) # skip_output.shape = [batch_size, seq_len, len(dilations)*skip_channels] skip_outputs = tf.nn.relu(tf.concat(skip_outputs, axis=2)) # h.shape = [batch_size, seq_len, 128] h = time_distributed_dense_layer(skip_outputs, 128, scope='dense-encode-1', activation=tf.nn.relu) # y_hat.shape = [batch_size, seq_len, 1] y_hat = time_distributed_dense_layer(h, 1, scope='dense-encode-2') return y_hat, conv_inputs[:-1]
def wavenet_logits_target(self): x = self.x inputs, w, b = time_distributed_dense_layer( inputs=x, output_units=self.residual_channels, activation=tf.nn.tanh, scope='target-x-proj-encode', reuse=False) self.w_target["wf0"] = w self.w_target["bf0"] = b skip_outputs = [] conv_inputs = [inputs] for i, (dilation, filter_width) in enumerate( zip(self.dilations, self.filter_widths)): dilated_conv, w, b = temporal_convolution_layer( inputs=inputs, output_units=2 * self.residual_channels, convolution_width=filter_width, causal=True, dilation_rate=[dilation], scope='target-dilated-conv-encode-{}'.format(i), reuse=tf.AUTO_REUSE) self.w_target["wc{}".format(i)] = w self.w_target["wb{}".format(i)] = b conv_filter, conv_gate = tf.split(dilated_conv, 2, axis=2) dilated_conv = tf.nn.tanh(conv_filter) * tf.nn.sigmoid(conv_gate) outputs, w, b = time_distributed_dense_layer( inputs=dilated_conv, output_units=self.skip_channels + self.residual_channels, scope='target-dilated-conv-proj-encode-{}'.format(i), reuse=tf.AUTO_REUSE) self.w_target["wtf-{}".format(i)] = w self.w_target["btf-{}".format(i)] = b skips, residuals = tf.split( outputs, [self.skip_channels, self.residual_channels], axis=2) inputs += residuals conv_inputs.append(inputs) skip_outputs.append(skips) skip_outputs = tf.nn.relu(tf.concat(skip_outputs, axis=2)) h, w, b = time_distributed_dense_layer(skip_outputs, 128, scope='target-dense-encode-1', activation=tf.nn.relu, reuse=tf.AUTO_REUSE) self.w_target["wtf1"] = w self.w_target["btf1"] = b h, w, b = time_distributed_dense_layer(h, 3, scope='target-dense-encode-2', activation=tf.nn.relu, reuse=tf.AUTO_REUSE) self.w_target["wtf2"] = w self.w_target["btf2"] = b s = h.get_shape().as_list() out_flat = tf.reshape(h, [-1, reduce(lambda x, y: x * y, s[1:])]) h, w, b = fully_connected_layer(out_flat, 128, scope_name='target-dense-encode-1', activation=tf.nn.relu) self.w_target["wf1"] = w self.w_target["bf1"] = b out, w, b = fully_connected_layer(h, self.n_actions, scope_name='target-dense-encode-2', activation=None) self.w_target["wout"] = w self.w_target["bout"] = b self.q_target_out = out self.q_target_action = tf.argmax(self.q_target_out, axis=1)