def __init__(
        self,
        learning_rate,
        num_layers,
        size,
        size_layer,
        output_size,
        kernel_size=3,
        n_attn_heads=16,
        dropout=0.9,
    ):
        self.X = tf.placeholder(tf.float32, (None, None, size))
        self.Y = tf.placeholder(tf.float32, (None, output_size))

        encoder_embedded = tf.layers.dense(self.X, size_layer)
        encoder_embedded += position_encoding(encoder_embedded)

        e = tf.identity(encoder_embedded)
        for i in range(num_layers):
            dilation_rate = 2**i
            pad_sz = (kernel_size - 1) * dilation_rate
            with tf.variable_scope('block_%d' % i):
                encoder_embedded += cnn_block(encoder_embedded, dilation_rate,
                                              pad_sz, size_layer, kernel_size)

        encoder_output, output_memory = encoder_embedded, encoder_embedded + e
        g = tf.identity(encoder_embedded)

        for i in range(num_layers):
            dilation_rate = 2**i
            pad_sz = (kernel_size - 1) * dilation_rate
            with tf.variable_scope('decode_%d' % i):
                attn_res = h = cnn_block(encoder_embedded, dilation_rate,
                                         pad_sz, size_layer, kernel_size)

            C = []
            for j in range(n_attn_heads):
                h_ = tf.layers.dense(h, size_layer // n_attn_heads)
                g_ = tf.layers.dense(g, size_layer // n_attn_heads)
                zu_ = tf.layers.dense(encoder_output,
                                      size_layer // n_attn_heads)
                ze_ = tf.layers.dense(output_memory,
                                      size_layer // n_attn_heads)

                d = tf.layers.dense(h_, size_layer // n_attn_heads) + g_
                dz = tf.matmul(d, tf.transpose(zu_, [0, 2, 1]))
                a = tf.nn.softmax(dz)
                c_ = tf.matmul(a, ze_)
                C.append(c_)

            c = tf.concat(C, 2)
            h = tf.layers.dense(attn_res + c, size_layer)
            h = tf.nn.dropout(h, keep_prob=dropout)
            encoder_embedded += h

        encoder_embedded = tf.sigmoid(encoder_embedded[-1])
        self.logits = tf.layers.dense(encoder_embedded, output_size)
        self.cost = tf.reduce_mean(tf.square(self.Y - self.logits))
        self.optimizer = tf.train.AdamOptimizer(learning_rate).minimize(
            self.cost)
 def __init__(self, name, input_size, output_size, size_layer,
              learning_rate):
     with tf.variable_scope(name):
         self.X = tf.placeholder(tf.float32, (None, None, input_size))
         self.Y = tf.placeholder(tf.float32, (None, output_size))
         self.hidden_layer = tf.placeholder(tf.float32,
                                            (None, 2 * size_layer))
         self.REWARD = tf.placeholder(tf.float32, (None, 1))
         feed_critic = tf.layers.dense(self.X,
                                       size_layer,
                                       activation=tf.nn.relu)
         cell = tf.nn.rnn_cell.LSTMCell(size_layer, state_is_tuple=False)
         self.rnn, self.last_state = tf.nn.dynamic_rnn(
             inputs=self.X,
             cell=cell,
             dtype=tf.float32,
             initial_state=self.hidden_layer)
         feed_critic = tf.layers.dense(
             self.rnn[:, -1], output_size, activation=tf.nn.relu) + self.Y
         feed_critic = tf.layers.dense(feed_critic,
                                       size_layer // 2,
                                       activation=tf.nn.relu)
         self.logits = tf.layers.dense(feed_critic, 1)
         self.cost = tf.reduce_mean(tf.square(self.REWARD - self.logits))
         self.optimizer = tf.train.AdamOptimizer(learning_rate).minimize(
             self.cost)
    def __init__(self,
                 learning_rate,
                 num_layers,
                 size,
                 size_layer,
                 forget_bias=0.8):
        def lstm_cell(size_layer):
            return tf.nn.rnn_cell.LSTMCell(size_layer, state_is_tuple=False)

        rnn_cells = tf.nn.rnn_cell.MultiRNNCell(
            [lstm_cell(size_layer) for _ in range(num_layers)],
            state_is_tuple=False)
        self.X = tf.placeholder(tf.float32, (None, None, size))
        self.Y = tf.placeholder(tf.float32, (None, size))
        drop = tf.contrib.rnn.DropoutWrapper(rnn_cells,
                                             output_keep_prob=forget_bias)
        self.hidden_layer = tf.placeholder(tf.float32,
                                           (None, num_layers * 2 * size_layer))
        self.outputs, self.last_state = tf.nn.dynamic_rnn(
            drop, self.X, initial_state=self.hidden_layer, dtype=tf.float32)
        self.logits = tf.layers.dense(
            self.outputs[-1],
            size,
            kernel_initializer=tf.glorot_uniform_initializer())
        self.cost = tf.reduce_mean(
            tf.nn.sigmoid_cross_entropy_with_logits(labels=self.Y,
                                                    logits=self.logits))
        self.optimizer = tf.train.AdamOptimizer(learning_rate).minimize(
            self.cost)
 def __init__(self, state_size, window_size, trend, skip):
     self.state_size = state_size
     self.window_size = window_size
     self.half_window = window_size // 2
     self.trend = trend
     self.skip = skip
     tf.reset_default_graph()
     self.INITIAL_FEATURES = np.zeros((4, self.state_size))
     self.X = tf.placeholder(tf.float32, (None, None, self.state_size))
     self.Y = tf.placeholder(tf.float32, (None, self.OUTPUT_SIZE))
     cell = tf.nn.rnn_cell.LSTMCell(self.LAYER_SIZE, state_is_tuple=False)
     self.hidden_layer = tf.placeholder(tf.float32,
                                        (None, 2 * self.LAYER_SIZE))
     self.rnn, self.last_state = tf.nn.dynamic_rnn(
         inputs=self.X,
         cell=cell,
         dtype=tf.float32,
         initial_state=self.hidden_layer)
     tensor_action, tensor_validation = tf.split(self.rnn[:, -1], 2, 1)
     feed_action = tf.layers.dense(tensor_action, self.OUTPUT_SIZE)
     feed_validation = tf.layers.dense(tensor_validation, 1)
     self.logits = feed_validation + tf.subtract(
         feed_action, tf.reduce_mean(feed_action, axis=1, keep_dims=True))
     self.cost = tf.reduce_sum(tf.square(self.Y - self.logits))
     self.optimizer = tf.train.AdamOptimizer(
         learning_rate=self.LEARNING_RATE).minimize(self.cost)
     self.sess = tf.InteractiveSession()
     self.sess.run(tf.global_variables_initializer())
Пример #5
0
 def __init__(self, state_size, window_size, trend, skip):
     self.state_size = state_size
     self.window_size = window_size
     self.half_window = window_size // 2
     self.trend = trend
     self.skip = skip
     self.X = tf.placeholder(tf.float32, (None, self.state_size))
     self.REWARDS = tf.placeholder(tf.float32, (None))
     self.ACTIONS = tf.placeholder(tf.int32, (None))
     feed_forward = tf.layers.dense(self.X,
                                    self.LAYER_SIZE,
                                    activation=tf.nn.relu)
     self.logits = tf.layers.dense(feed_forward,
                                   self.OUTPUT_SIZE,
                                   activation=tf.nn.softmax)
     input_y = tf.one_hot(self.ACTIONS, self.OUTPUT_SIZE)
     loglike = tf.log((input_y * (input_y - self.logits) + (1 - input_y) *
                       (input_y + self.logits)) + 1)
     rewards = tf.tile(tf.reshape(self.REWARDS, (-1, 1)),
                       [1, self.OUTPUT_SIZE])
     self.cost = -tf.reduce_mean(loglike * (rewards + 1))
     self.optimizer = tf.train.AdamOptimizer(
         learning_rate=self.LEARNING_RATE).minimize(self.cost)
     self.sess = tf.InteractiveSession()
     self.sess.run(tf.global_variables_initializer())
Пример #6
0
    def __init__(
        self,
        learning_rate,
        num_layers,
        size,
        size_layer,
        output_size,
        forget_bias = 0.1,
    ):
        def lstm_cell(size_layer):
            return tf.nn.rnn_cell.GRUCell(size_layer)

        rnn_cells = tf.nn.rnn_cell.MultiRNNCell(
            [lstm_cell(size_layer) for _ in range(num_layers)],
            state_is_tuple = False,
        )
        self.X = tf.placeholder(tf.float32, (None, None, size))
        self.Y = tf.placeholder(tf.float32, (None, output_size))
        drop = tf.contrib.rnn.DropoutWrapper(
            rnn_cells, output_keep_prob = forget_bias
        )
        self.hidden_layer = tf.placeholder(
            tf.float32, (None, num_layers * size_layer)
        )
        self.outputs, self.last_state = tf.nn.dynamic_rnn(
            drop, self.X, initial_state = self.hidden_layer, dtype = tf.float32
        )
        self.logits = tf.layers.dense(self.outputs[-1], output_size)
        self.cost = tf.reduce_mean(tf.square(self.Y - self.logits))
        self.optimizer = tf.train.AdamOptimizer(learning_rate).minimize(
            self.cost
        )
    def __init__(self, state_size, window_size, trend, skip, batch_size):
        self.state_size = state_size
        self.window_size = window_size
        self.half_window = window_size // 2
        self.trend = trend
        self.skip = skip
        self.action_size = 3
        self.batch_size = batch_size
        self.memory = deque(maxlen=1000)
        self.inventory = []

        self.gamma = 0.95
        self.epsilon = 0.5
        self.epsilon_min = 0.01
        self.epsilon_decay = 0.999

        tf.reset_default_graph()
        self.sess = tf.InteractiveSession()
        self.X = tf.placeholder(tf.float32, [None, self.state_size])
        self.Y = tf.placeholder(tf.float32, [None, self.action_size])
        feed = tf.layers.dense(self.X, 512, activation=tf.nn.relu)
        tensor_action, tensor_validation = tf.split(feed, 2, 1)
        feed_action = tf.layers.dense(tensor_action, self.action_size)
        feed_validation = tf.layers.dense(tensor_validation, 1)
        self.logits = feed_validation + tf.subtract(
            feed_action, tf.reduce_mean(feed_action, axis=1, keep_dims=True))
        self.cost = tf.reduce_mean(tf.square(self.Y - self.logits))
        self.optimizer = tf.train.GradientDescentOptimizer(1e-5).minimize(
            self.cost)
        self.sess.run(tf.global_variables_initializer())
Пример #8
0
    def __init__(
        self,
        learning_rate,
        num_layers,
        size,
        size_layer,
        output_size,
        forget_bias = 0.1,
        lambda_coeff = 0.5
    ):
        def lstm_cell(size_layer):
            return tf.nn.rnn_cell.GRUCell(size_layer)

        rnn_cells = tf.nn.rnn_cell.MultiRNNCell(
            [lstm_cell(size_layer) for _ in range(num_layers)],
            state_is_tuple = False,
        )
        self.X = tf.placeholder(tf.float32, (None, None, size))
        self.Y = tf.placeholder(tf.float32, (None, output_size))
        drop = tf.contrib.rnn.DropoutWrapper(
            rnn_cells, output_keep_prob = forget_bias
        )
        self.hidden_layer = tf.placeholder(
            tf.float32, (None, num_layers * size_layer)
        )
        _, last_state = tf.nn.dynamic_rnn(
            drop, self.X, initial_state = self.hidden_layer, dtype = tf.float32
        )
        
        self.z_mean = tf.layers.dense(last_state, size)
        self.z_log_sigma = tf.layers.dense(last_state, size)
        
        epsilon = tf.random_normal(tf.shape(self.z_log_sigma))
        self.z_vector = self.z_mean + tf.exp(self.z_log_sigma)
        
        with tf.variable_scope('decoder', reuse = False):
            rnn_cells_dec = tf.nn.rnn_cell.MultiRNNCell(
                [lstm_cell(size_layer) for _ in range(num_layers)], state_is_tuple = False
            )
            drop_dec = tf.contrib.rnn.DropoutWrapper(
                rnn_cells_dec, output_keep_prob = forget_bias
            )
            x = tf.concat([tf.expand_dims(self.z_vector, axis=0), self.X], axis = 1)
            self.outputs, self.last_state = tf.nn.dynamic_rnn(
                drop_dec, self.X, initial_state = last_state, dtype = tf.float32
            )
            
        self.logits = tf.layers.dense(self.outputs[-1], output_size)
        self.lambda_coeff = lambda_coeff
        
        self.kl_loss = -0.5 * tf.reduce_sum(1.0 + 2 * self.z_log_sigma - self.z_mean ** 2 - 
                             tf.exp(2 * self.z_log_sigma), 1)
        self.kl_loss = tf.scalar_mul(self.lambda_coeff, self.kl_loss)
        self.cost = tf.reduce_mean(tf.square(self.Y - self.logits) + self.kl_loss)
        self.optimizer = tf.train.AdamOptimizer(learning_rate).minimize(
            self.cost
        )
 def __init__(self, input_size, output_size, layer_size, learning_rate):
     self.X = tf.placeholder(tf.float32, (None, input_size))
     self.Y = tf.placeholder(tf.float32, (None, output_size))
     feed_forward = tf.layers.dense(self.X,
                                    layer_size,
                                    activation=tf.nn.relu)
     self.logits = tf.layers.dense(feed_forward, output_size)
     self.cost = tf.reduce_sum(tf.square(self.Y - self.logits))
     self.optimizer = tf.train.AdamOptimizer(
         learning_rate=learning_rate).minimize(self.cost)
Пример #10
0
 def __init__(self, name, input_size, output_size, size_layer, learning_rate):
     with tf.variable_scope(name):
         self.X = tf.placeholder(tf.float32, (None, input_size))
         self.Y = tf.placeholder(tf.float32, (None, output_size))
         self.REWARD = tf.placeholder(tf.float32, (None, 1))
         feed_critic = tf.layers.dense(self.X, size_layer, activation = tf.nn.relu)
         feed_critic = tf.layers.dense(feed_critic, output_size, activation = tf.nn.relu) + self.Y
         feed_critic = tf.layers.dense(feed_critic, size_layer//2, activation = tf.nn.relu)
         self.logits = tf.layers.dense(feed_critic, 1)
         self.cost = tf.reduce_mean(tf.square(self.REWARD - self.logits))
         self.optimizer = tf.train.AdamOptimizer(learning_rate).minimize(self.cost)
Пример #11
0
 def __init__(self, input_size, output_size, layer_size, learning_rate):
     self.X = tf.placeholder(tf.float32, (None, input_size))
     self.Y = tf.placeholder(tf.float32, (None, output_size))
     feed = tf.layers.dense(self.X, layer_size, activation=tf.nn.relu)
     tensor_action, tensor_validation = tf.split(feed, 2, 1)
     feed_action = tf.layers.dense(tensor_action, output_size)
     feed_validation = tf.layers.dense(tensor_validation, 1)
     self.logits = feed_validation + tf.subtract(
         feed_action, tf.reduce_mean(feed_action, axis=1, keep_dims=True))
     self.cost = tf.reduce_sum(tf.square(self.Y - self.logits))
     self.optimizer = tf.train.AdamOptimizer(
         learning_rate=learning_rate).minimize(self.cost)
 def __init__(self, input_size, output_size, layer_size, learning_rate, name):
     with tf.variable_scope(name):
         self.X = tf.placeholder(tf.float32, (None, None, input_size))
         self.Y = tf.placeholder(tf.float32, (None, output_size))
         cell = tf.nn.rnn_cell.LSTMCell(layer_size, state_is_tuple = False)
         self.hidden_layer = tf.placeholder(tf.float32, (None, 2 * layer_size))
         self.rnn,self.last_state = tf.nn.dynamic_rnn(inputs=self.X,cell=cell,
                                                 dtype=tf.float32,
                                                 initial_state=self.hidden_layer)
         self.logits = tf.layers.dense(self.rnn[:,-1], output_size)
         self.cost = tf.reduce_sum(tf.square(self.Y - self.logits))
         self.optimizer = tf.train.AdamOptimizer(learning_rate = learning_rate).minimize(self.cost)
 def __init__(self, name, input_size, output_size, size_layer):
     with tf.variable_scope(name):
         self.X = tf.placeholder(tf.float32, (None, None, input_size))
         self.hidden_layer = tf.placeholder(tf.float32,
                                            (None, 2 * size_layer))
         cell = tf.nn.rnn_cell.LSTMCell(size_layer, state_is_tuple=False)
         self.rnn, self.last_state = tf.nn.dynamic_rnn(
             inputs=self.X,
             cell=cell,
             dtype=tf.float32,
             initial_state=self.hidden_layer)
         self.logits = tf.layers.dense(self.rnn[:, -1], output_size)
Пример #14
0
    def __init__(
        self,
        learning_rate,
        num_layers,
        size,
        size_layer,
        output_size,
        forget_bias=0.1,
    ):
        def lstm_cell(size_layer):
            return tf.nn.rnn_cell.BasicRNNCell(size_layer)

        with tf.variable_scope('forward', reuse=False):
            rnn_cells_forward = tf.nn.rnn_cell.MultiRNNCell(
                [lstm_cell(size_layer) for _ in range(num_layers)],
                state_is_tuple=False,
            )
            self.X_forward = tf.placeholder(tf.float32, (None, None, size))
            drop_forward = tf.contrib.rnn.DropoutWrapper(
                rnn_cells_forward, output_keep_prob=forget_bias)
            self.hidden_layer_forward = tf.placeholder(
                tf.float32, (None, num_layers * size_layer))
            self.outputs_forward, self.last_state_forward = tf.nn.dynamic_rnn(
                drop_forward,
                self.X_forward,
                initial_state=self.hidden_layer_forward,
                dtype=tf.float32,
            )

        with tf.variable_scope('backward', reuse=False):
            rnn_cells_backward = tf.nn.rnn_cell.MultiRNNCell(
                [lstm_cell(size_layer) for _ in range(num_layers)],
                state_is_tuple=False,
            )
            self.X_backward = tf.placeholder(tf.float32, (None, None, size))
            drop_backward = tf.contrib.rnn.DropoutWrapper(
                rnn_cells_backward, output_keep_prob=forget_bias)
            self.hidden_layer_backward = tf.placeholder(
                tf.float32, (None, num_layers * size_layer))
            self.outputs_backward, self.last_state_backward = tf.nn.dynamic_rnn(
                drop_backward,
                self.X_backward,
                initial_state=self.hidden_layer_backward,
                dtype=tf.float32,
            )

        self.outputs = self.outputs_backward - self.outputs_forward
        self.Y = tf.placeholder(tf.float32, (None, output_size))
        self.logits = tf.layers.dense(self.outputs[-1], output_size)
        self.cost = tf.reduce_mean(tf.square(self.Y - self.logits))
        self.optimizer = tf.train.AdamOptimizer(learning_rate).minimize(
            self.cost)
 def __init__(self, name, input_size, output_size, size_layer):
     with tf.variable_scope(name):
         self.X = tf.placeholder(tf.float32, (None, None, input_size))
         self.hidden_layer = tf.placeholder(tf.float32, (None, 2 * size_layer))
         cell = tf.nn.rnn_cell.LSTMCell(size_layer, state_is_tuple = False)
         self.rnn,self.last_state = tf.nn.dynamic_rnn(inputs=self.X, cell=cell,
                                                 dtype=tf.float32,
                                                 initial_state=self.hidden_layer)
         tensor_action, tensor_validation = tf.split(self.rnn[:,-1],2,1)
         feed_action = tf.layers.dense(tensor_action, output_size)
         feed_validation = tf.layers.dense(tensor_validation, 1)
         self.logits = feed_validation + tf.subtract(feed_action,
                                                     tf.reduce_mean(feed_action,axis=1,keep_dims=True))
 def __init__(self, state_size, window_size, trend, skip):
     self.state_size = state_size
     self.window_size = window_size
     self.half_window = window_size // 2
     self.trend = trend
     self.skip = skip
     tf.reset_default_graph()
     self.X = tf.placeholder(tf.float32, (None, self.state_size))
     self.Y = tf.placeholder(tf.float32, (None, self.state_size))
     self.ACTION = tf.placeholder(tf.float32, (None))
     self.REWARD = tf.placeholder(tf.float32, (None))
     self.batch_size = tf.shape(self.ACTION)[0]
     
     with tf.variable_scope('curiosity_model'):
         action = tf.reshape(self.ACTION, (-1,1))
         state_action = tf.concat([self.X, action], axis=1)
         save_state = tf.identity(self.Y)
         
         feed = tf.layers.dense(state_action, 32, activation=tf.nn.relu)
         self.curiosity_logits = tf.layers.dense(feed, self.state_size)
         self.curiosity_cost = tf.reduce_sum(tf.square(save_state - self.curiosity_logits), axis=1)
         
         self.curiosity_optimizer = tf.train.RMSPropOptimizer(self.LEARNING_RATE)            .minimize(tf.reduce_mean(self.curiosity_cost))
     
     total_reward = tf.add(self.curiosity_cost, self.REWARD)
     
     with tf.variable_scope("q_model"):
         with tf.variable_scope("eval_net"):
             x_action = tf.layers.dense(self.X, 128, tf.nn.relu)
             self.logits = tf.layers.dense(x_action, self.OUTPUT_SIZE)
         
         with tf.variable_scope("target_net"):
             y_action = tf.layers.dense(self.Y, 128, tf.nn.relu)
             y_q = tf.layers.dense(y_action, self.OUTPUT_SIZE)
         
         q_target = total_reward + self.GAMMA * tf.reduce_max(y_q, axis=1)
         action = tf.cast(self.ACTION, tf.int32)
         action_indices = tf.stack([tf.range(self.batch_size, dtype=tf.int32), action], axis=1)
         q = tf.gather_nd(params=self.logits, indices=action_indices)
         self.cost = tf.losses.mean_squared_error(labels=q_target, predictions=q)
         self.optimizer = tf.train.RMSPropOptimizer(self.LEARNING_RATE).minimize(
         self.cost, var_list=tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, "q_model/eval_net"))
         
     t_params = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='q_model/target_net')
     e_params = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='q_model/eval_net')
     self.target_replace_op = [tf.assign(t, e) for t, e in zip(t_params, e_params)]
     
     self.sess = tf.InteractiveSession()
     self.sess.run(tf.global_variables_initializer())
Пример #17
0
 def __init__(self, name, input_size, output_size, size_layer, learning_rate):
     with tf.variable_scope(name):
         self.X = tf.placeholder(tf.float32, (None, input_size))
         self.Y = tf.placeholder(tf.float32, (None, output_size))
         self.REWARD = tf.placeholder(tf.float32, (None, 1))
         feed_critic = tf.layers.dense(self.X, size_layer, activation = tf.nn.relu)
         tensor_action, tensor_validation = tf.split(feed_critic,2,1)
         feed_action = tf.layers.dense(tensor_action, output_size)
         feed_validation = tf.layers.dense(tensor_validation, 1)
         feed_critic = feed_validation + tf.subtract(feed_action,tf.reduce_mean(feed_action,axis=1,keep_dims=True))
         feed_critic = tf.nn.relu(feed_critic) + self.Y
         feed_critic = tf.layers.dense(feed_critic, size_layer//2, activation = tf.nn.relu)
         self.logits = tf.layers.dense(feed_critic, 1)
         self.cost = tf.reduce_mean(tf.square(self.REWARD - self.logits))
         self.optimizer = tf.train.AdamOptimizer(learning_rate).minimize(self.cost)
Пример #18
0
    def __init__(
        self,
        learning_rate,
        num_layers,
        size,
        size_layer,
        output_size,
        forget_bias = 0.1,
    ):
        def lstm_cell(size_layer):
            return tf.nn.rnn_cell.LSTMCell(size_layer, state_is_tuple = False)

        backward_rnn_cells = tf.nn.rnn_cell.MultiRNNCell(
            [lstm_cell(size_layer) for _ in range(num_layers)],
            state_is_tuple = False,
        )
        forward_rnn_cells = tf.nn.rnn_cell.MultiRNNCell(
            [lstm_cell(size_layer) for _ in range(num_layers)],
            state_is_tuple = False,
        )
        self.X = tf.placeholder(tf.float32, (None, None, size))
        self.Y = tf.placeholder(tf.float32, (None, output_size))
        drop_backward = tf.contrib.rnn.DropoutWrapper(
            backward_rnn_cells, output_keep_prob = forget_bias
        )
        forward_backward = tf.contrib.rnn.DropoutWrapper(
            forward_rnn_cells, output_keep_prob = forget_bias
        )
        self.backward_hidden_layer = tf.placeholder(
            tf.float32, shape = (None, num_layers * 2 * size_layer)
        )
        self.forward_hidden_layer = tf.placeholder(
            tf.float32, shape = (None, num_layers * 2 * size_layer)
        )
        self.outputs, self.last_state = tf.nn.bidirectional_dynamic_rnn(
            forward_backward,
            drop_backward,
            self.X,
            initial_state_fw = self.forward_hidden_layer,
            initial_state_bw = self.backward_hidden_layer,
            dtype = tf.float32,
        )
        self.outputs = tf.concat(self.outputs, 2)
        self.logits = tf.layers.dense(self.outputs[-1], output_size)
        self.cost = tf.reduce_mean(tf.square(self.Y - self.logits))
        self.optimizer = tf.train.AdamOptimizer(learning_rate).minimize(
            self.cost
        )
 def __init__(self, state_size, window_size, trend, skip):
     self.state_size = state_size
     self.window_size = window_size
     self.half_window = window_size // 2
     self.trend = trend
     self.INITIAL_FEATURES = np.zeros((4, self.state_size))
     self.skip = skip
     tf.reset_default_graph()
     self.actor = Actor('actor-original', self.state_size, self.OUTPUT_SIZE,
                        self.LAYER_SIZE)
     self.actor_target = Actor('actor-target', self.state_size,
                               self.OUTPUT_SIZE, self.LAYER_SIZE)
     self.critic = Critic('critic-original', self.state_size,
                          self.OUTPUT_SIZE, self.LAYER_SIZE,
                          self.LEARNING_RATE)
     self.critic_target = Critic('critic-target', self.state_size,
                                 self.OUTPUT_SIZE, self.LAYER_SIZE,
                                 self.LEARNING_RATE)
     self.grad_critic = tf.gradients(self.critic.logits, self.critic.Y)
     self.actor_critic_grad = tf.placeholder(tf.float32,
                                             [None, self.OUTPUT_SIZE])
     weights_actor = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES,
                                       scope='actor')
     self.grad_actor = tf.gradients(self.actor.logits, weights_actor,
                                    -self.actor_critic_grad)
     grads = zip(self.grad_actor, weights_actor)
     self.optimizer = tf.train.AdamOptimizer(
         self.LEARNING_RATE).apply_gradients(grads)
     self.sess = tf.InteractiveSession()
     self.sess.run(tf.global_variables_initializer())
Пример #20
0
 def __init__(self, name, input_size, output_size, size_layer):
     with tf.variable_scope(name):
         self.X = tf.placeholder(tf.float32, (None, input_size))
         feed_actor = tf.layers.dense(self.X, size_layer, activation = tf.nn.relu)
         tensor_action, tensor_validation = tf.split(feed_actor,2,1)
         feed_action = tf.layers.dense(tensor_action, output_size)
         feed_validation = tf.layers.dense(tensor_validation, 1)
         self.logits = feed_validation + tf.subtract(feed_action,
                                                     tf.reduce_mean(feed_action,axis=1,keep_dims=True))
    def __init__(self,
                 size_layer,
                 embedded_size,
                 learning_rate,
                 size,
                 output_size,
                 num_blocks=2,
                 num_heads=8,
                 min_freq=50):
        self.X = tf.placeholder(tf.float32, (None, None, size))
        self.Y = tf.placeholder(tf.float32, (None, output_size))

        encoder_embedded = tf.layers.dense(self.X, embedded_size)
        encoder_embedded = tf.nn.dropout(encoder_embedded, keep_prob=0.8)
        x_mean = tf.reduce_mean(self.X, axis=2)
        en_masks = tf.sign(x_mean)
        encoder_embedded += sinusoidal_position_encoding(
            self.X, en_masks, embedded_size)

        for i in range(num_blocks):
            with tf.variable_scope('encoder_self_attn_%d' % i,
                                   reuse=tf.AUTO_REUSE):
                encoder_embedded = multihead_attn(queries=encoder_embedded,
                                                  keys=encoder_embedded,
                                                  q_masks=en_masks,
                                                  k_masks=en_masks,
                                                  future_binding=False,
                                                  num_units=size_layer,
                                                  num_heads=num_heads)

            with tf.variable_scope('encoder_feedforward_%d' % i,
                                   reuse=tf.AUTO_REUSE):
                encoder_embedded = pointwise_feedforward(encoder_embedded,
                                                         embedded_size,
                                                         activation=tf.nn.relu)

        self.logits = tf.layers.dense(encoder_embedded[-1], output_size)
        self.cost = tf.reduce_mean(tf.square(self.Y - self.logits))
        self.optimizer = tf.train.AdamOptimizer(learning_rate).minimize(
            self.cost)
    def __init__(self,
                 input_,
                 dimension=2,
                 learning_rate=0.01,
                 hidden_layer=256,
                 epoch=20):
        input_size = input_.shape[1]
        self.X = tf.placeholder("float", [None, input_.shape[1]])

        weights = {
            'encoder_h1':
            tf.Variable(tf.random_normal([input_size, hidden_layer])),
            'encoder_h2':
            tf.Variable(tf.random_normal([hidden_layer, dimension])),
            'decoder_h1':
            tf.Variable(tf.random_normal([dimension, hidden_layer])),
            'decoder_h2':
            tf.Variable(tf.random_normal([hidden_layer, input_size])),
        }

        biases = {
            'encoder_b1': tf.Variable(tf.random_normal([hidden_layer])),
            'encoder_b2': tf.Variable(tf.random_normal([dimension])),
            'decoder_b1': tf.Variable(tf.random_normal([hidden_layer])),
            'decoder_b2': tf.Variable(tf.random_normal([input_size])),
        }

        first_layer_encoder = tf.nn.sigmoid(
            tf.add(tf.matmul(self.X, weights['encoder_h1']),
                   biases['encoder_b1']))
        self.second_layer_encoder = tf.nn.sigmoid(
            tf.add(tf.matmul(first_layer_encoder, weights['encoder_h2']),
                   biases['encoder_b2']))
        first_layer_decoder = tf.nn.sigmoid(
            tf.add(tf.matmul(self.second_layer_encoder, weights['decoder_h1']),
                   biases['decoder_b1']))
        second_layer_decoder = tf.nn.sigmoid(
            tf.add(tf.matmul(first_layer_decoder, weights['decoder_h2']),
                   biases['decoder_b2']))
        self.cost = tf.reduce_mean(tf.pow(self.X - second_layer_decoder, 2))
        self.optimizer = tf.train.RMSPropOptimizer(learning_rate).minimize(
            self.cost)
        self.sess = tf.InteractiveSession()
        self.sess.run(tf.global_variables_initializer())

        for i in range(epoch):
            last_time = time.time()
            _, loss = self.sess.run([self.optimizer, self.cost],
                                    feed_dict={self.X: input_})
            if (i + 1) % 10 == 0:
                print('epoch:', i + 1, 'loss:', loss, 'time:',
                      time.time() - last_time)
Пример #23
0
 def __init__(self, name, input_size, output_size, size_layer):
     with tf.variable_scope(name):
         self.X = tf.placeholder(tf.float32, (None, input_size))
         feed_actor = tf.layers.dense(self.X, size_layer, activation = tf.nn.relu)
         self.logits = tf.layers.dense(feed_actor, output_size)
    def __init__(self, state_size, window_size, trend, skip):
        self.state_size = state_size
        self.window_size = window_size
        self.half_window = window_size // 2
        self.trend = trend
        self.skip = skip
        tf.reset_default_graph()
        self.INITIAL_FEATURES = np.zeros((4, self.state_size))
        self.X = tf.placeholder(tf.float32, (None, None, self.state_size))
        self.Y = tf.placeholder(tf.float32, (None, None, self.state_size))
        self.hidden_layer = tf.placeholder(tf.float32,
                                           (None, 2 * self.LAYER_SIZE))
        self.ACTION = tf.placeholder(tf.float32, (None))
        self.REWARD = tf.placeholder(tf.float32, (None))
        self.batch_size = tf.shape(self.ACTION)[0]
        self.seq_len = tf.shape(self.X)[1]

        with tf.variable_scope('curiosity_model'):
            action = tf.reshape(self.ACTION, (-1, 1, 1))
            repeat_action = tf.tile(action, [1, self.seq_len, 1])
            state_action = tf.concat([self.X, repeat_action], axis=-1)
            save_state = tf.identity(self.Y)
            cell = tf.nn.rnn_cell.LSTMCell(self.LAYER_SIZE,
                                           state_is_tuple=False)
            self.rnn, last_state = tf.nn.dynamic_rnn(
                inputs=state_action,
                cell=cell,
                dtype=tf.float32,
                initial_state=self.hidden_layer)
            self.curiosity_logits = tf.layers.dense(self.rnn[:, -1],
                                                    self.state_size)
            self.curiosity_cost = tf.reduce_sum(
                tf.square(save_state[:, -1] - self.curiosity_logits), axis=1)

            self.curiosity_optimizer = tf.train.RMSPropOptimizer(
                self.LEARNING_RATE).minimize(
                    tf.reduce_mean(self.curiosity_cost))

        total_reward = tf.add(self.curiosity_cost, self.REWARD)

        with tf.variable_scope("q_model"):
            with tf.variable_scope("eval_net"):
                cell = tf.nn.rnn_cell.LSTMCell(self.LAYER_SIZE,
                                               state_is_tuple=False)
                rnn, self.last_state = tf.nn.dynamic_rnn(
                    inputs=self.X,
                    cell=cell,
                    dtype=tf.float32,
                    initial_state=self.hidden_layer)
                self.logits = tf.layers.dense(rnn[:, -1], self.OUTPUT_SIZE)

            with tf.variable_scope("target_net"):
                cell = tf.nn.rnn_cell.LSTMCell(self.LAYER_SIZE,
                                               state_is_tuple=False)
                rnn, last_state = tf.nn.dynamic_rnn(
                    inputs=self.Y,
                    cell=cell,
                    dtype=tf.float32,
                    initial_state=self.hidden_layer)
                y_q = tf.layers.dense(rnn[:, -1], self.OUTPUT_SIZE)

            q_target = total_reward + self.GAMMA * tf.reduce_max(y_q, axis=1)
            action = tf.cast(self.ACTION, tf.int32)
            action_indices = tf.stack(
                [tf.range(self.batch_size, dtype=tf.int32), action], axis=1)
            q = tf.gather_nd(params=self.logits, indices=action_indices)
            self.cost = tf.losses.mean_squared_error(labels=q_target,
                                                     predictions=q)
            self.optimizer = tf.train.RMSPropOptimizer(
                self.LEARNING_RATE).minimize(
                    self.cost,
                    var_list=tf.get_collection(
                        tf.GraphKeys.TRAINABLE_VARIABLES, "q_model/eval_net"))

        t_params = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES,
                                     scope='q_model/target_net')
        e_params = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES,
                                     scope='q_model/eval_net')
        self.target_replace_op = [
            tf.assign(t, e) for t, e in zip(t_params, e_params)
        ]

        self.sess = tf.InteractiveSession()
        self.sess.run(tf.global_variables_initializer())
Пример #25
0
    def __init__(
        self,
        learning_rate,
        num_layers,
        size,
        size_layer,
        output_size,
        kernel_size=3,
        n_attn_heads=16,
        dropout=0.9,
    ):
        self.X = tf.placeholder(tf.float32, (None, None, size))
        self.Y = tf.placeholder(tf.float32, (None, output_size))

        encoder_embedded = tf.layers.dense(self.X, size_layer)

        e = tf.identity(encoder_embedded)
        for i in range(num_layers):
            z = layer(
                encoder_embedded,
                encoder_block,
                kernel_size,
                size_layer * 2,
                encoder_embedded,
            )
            z = tf.nn.dropout(z, keep_prob=dropout)
            encoder_embedded = z

        encoder_output, output_memory = z, z + e
        g = tf.identity(encoder_embedded)

        for i in range(num_layers):
            attn_res = h = layer(
                encoder_embedded,
                decoder_block,
                kernel_size,
                size_layer * 2,
                residual=tf.zeros_like(encoder_embedded),
            )
            C = []
            for j in range(n_attn_heads):
                h_ = tf.layers.dense(h, size_layer // n_attn_heads)
                g_ = tf.layers.dense(g, size_layer // n_attn_heads)
                zu_ = tf.layers.dense(encoder_output,
                                      size_layer // n_attn_heads)
                ze_ = tf.layers.dense(output_memory,
                                      size_layer // n_attn_heads)

                d = tf.layers.dense(h_, size_layer // n_attn_heads) + g_
                dz = tf.matmul(d, tf.transpose(zu_, [0, 2, 1]))
                a = tf.nn.softmax(dz)
                c_ = tf.matmul(a, ze_)
                C.append(c_)

            c = tf.concat(C, 2)
            h = tf.layers.dense(attn_res + c, size_layer)
            h = tf.nn.dropout(h, keep_prob=dropout)
            encoder_embedded = h

        encoder_embedded = tf.sigmoid(encoder_embedded[-1])
        self.logits = tf.layers.dense(encoder_embedded, output_size)
        self.cost = tf.reduce_mean(tf.square(self.Y - self.logits))
        self.optimizer = tf.train.AdamOptimizer(learning_rate).minimize(
            self.cost)