def __init__(self, l2_regularization=0.0, l1_regularization=0.0, scope='eieeoutput', summary_labels=()): """ 2D convolutional layer. Args: size: Number of filters set to 1 window: Convolution window size, either an integer or pair of integers. calculated stride: Convolution stride, either an integer or pair of integers. padding: Convolution padding, one of 'VALID' or 'SAME' bias: If true, a bias is added activation: Type of nonlinearity, or dict with name & arguments l2_regularization: L2 regularization weight l1_regularization: L1 regularization weight """ self.size = 1 self.stride = 1 self.padding = 'VALID' self.bias = True activation = 'relu' self.l2_regularization = l2_regularization self.l1_regularization = l1_regularization self.nonlinearity = Nonlinearity(name=activation, summary_labels=summary_labels) super(EIIE_OutPut, self).__init__(scope=scope, summary_labels=summary_labels)
def __init__(self, size=20, bias=True, activation='relu', l2_regularization=0.0, l1_regularization=0.0, scope='EIIE', summary_labels=()): self.size = size # Expectation is broadcast back over advantage values so output is of size 1 self.conv1 = Conv2d(size=3, bias=bias, stride=(1, 1), window=(1, 3), padding='VALID', l2_regularization=l2_regularization, l1_regularization=l1_regularization, summary_labels=summary_labels) # self.conv1= tf.nn.conv2d() self.conv2 = Conv2d(size=size, bias=bias, stride=(1, window_length - 2 - 1), window=(1, window_length - 2 - 1), padding='VALID', l2_regularization=l2_regularization, l1_regularization=l1_regularization, summary_labels=summary_labels) self.conv3 = Conv2d(size=1, bias=bias, stride=(1, 1), window=(1, 1), l2_regularization=l2_regularization, l1_regularization=l1_regularization, summary_labels=summary_labels) self.nonlinearity = Nonlinearity(name=activation, summary_labels=summary_labels) self.nonlinearity2 = Nonlinearity(name=activation, summary_labels=summary_labels) super(EIIE, self).__init__(scope=scope, summary_labels=summary_labels)
def __init__(self, scope='ddpg-critic-network', summary_labels=(), size_t0=400, size_t1=300): super(DDPGCriticNetwork, self).__init__(scope=scope, summary_labels=summary_labels) self.t0l = Linear(size=size_t0, scope='linear0') self.t0b = TFLayer(layer='batch_normalization', scope='batchnorm0', center=True, scale=True) self.t0n = Nonlinearity(name='relu', scope='relu0') self.t1l = Linear(size=size_t1, scope='linear1') self.t1b = TFLayer(layer='batch_normalization', scope='batchnorm1', center=True, scale=True) self.t1n = Nonlinearity(name='relu', scope='relu1') self.t2d = Dense(size=1, activation='tanh', scope='dense0', weights=tf.random_uniform_initializer(minval=-3e-3, maxval=3e-3)) self.add_layer(self.t0l) self.add_layer(self.t0b) self.add_layer(self.t0n) self.add_layer(self.t1l) self.add_layer(self.t1b) self.add_layer(self.t1n) self.add_layer(self.t2d)
class EIIE(Layer): """ EIIE layer """ def __init__(self, size=20, bias=True, activation='relu', l2_regularization=0.0, l1_regularization=0.0, scope='EIIE', summary_labels=()): self.size = size # Expectation is broadcast back over advantage values so output is of size 1 self.conv1 = Conv2d(size=3, bias=bias, stride=(1, 1), window=(1, 3), padding='VALID', l2_regularization=l2_regularization, l1_regularization=l1_regularization, summary_labels=summary_labels) # self.conv1= tf.nn.conv2d() self.conv2 = Conv2d(size=size, bias=bias, stride=(1, window_length - 2 - 1), window=(1, window_length - 2 - 1), padding='VALID', l2_regularization=l2_regularization, l1_regularization=l1_regularization, summary_labels=summary_labels) self.conv3 = Conv2d(size=1, bias=bias, stride=(1, 1), window=(1, 1), l2_regularization=l2_regularization, l1_regularization=l1_regularization, summary_labels=summary_labels) self.nonlinearity = Nonlinearity(name=activation, summary_labels=summary_labels) self.nonlinearity2 = Nonlinearity(name=activation, summary_labels=summary_labels) super(EIIE, self).__init__(scope=scope, summary_labels=summary_labels) def tf_apply(self, x0, update): # where window_size=50, actions=4 (giving the 3), data cols=5 # x0 = (None,3,50,5) # x = (None,3,49,5) # x = (None,3,1,1) # conv1 => (None,3, 47,3) # conv2 => (None,3, 1, 20) # concat=> (None,3, 1, 21) # conv3 => (None,3, 1, 1) # concat=> (None,2, 1, 1) w0 = x0[:, :, :1, :1] x = x0[:, :, 1:, :] x = self.conv1.apply(x, update=update) # x = self.nonlinearity.apply(x=x, update=update) x = self.conv2.apply(x, update=update) # x = self.nonlinearity2.apply(x=x, update=update) x = tf.concat([x, w0], 3) x = self.conv3.apply(x, update=update) # concat on cash_bias cash_bias_int = 0 # FIXME not sure how to make shape with a flexible size in tensorflow but this works for now # cash_bias = tf.ones(shape=(batch_size,1,1,1)) * cash_bias_int cash_bias = x[:, :1, :1, :1] * 0 x = tf.concat([cash_bias, x], 1) if 'activations' in self.summary_labels: summary = tf.summary.histogram(name='activations', values=x) self.summaries.append(summary) return x def tf_regularization_loss(self): if super(EIIE, self).tf_regularization_loss() is None: losses = list() else: losses = [super(EIIE, self).tf_regularization_loss()] if self.conv1.regularization_loss() is not None: losses.append(self.conv1.regularization_loss()) if self.conv2.regularization_loss() is not None: losses.append(self.conv2.regularization_loss()) if self.conv1.regularization_loss() is not None: losses.append(self.conv3.regularization_loss()) if len(losses) > 0: return tf.add_n(inputs=losses) else: return None def get_variables(self, include_non_trainable=False): layer_variables = super( EIIE, self).get_variables(include_non_trainable=include_non_trainable) layer_variables += self.conv1.get_variables( include_non_trainable=include_non_trainable) layer_variables += self.conv2.get_variables( include_non_trainable=include_non_trainable) layer_variables += self.conv3.get_variables( include_non_trainable=include_non_trainable) layer_variables += self.nonlinearity.get_variables( include_non_trainable=include_non_trainable) layer_variables += self.nonlinearity.get_variables( include_non_trainable=include_non_trainable) return layer_variables
class DDPGCriticNetwork(LayerBasedNetwork): def __init__(self, scope='ddpg-critic-network', summary_labels=(), size_t0=400, size_t1=300): super(DDPGCriticNetwork, self).__init__(scope=scope, summary_labels=summary_labels) self.t0l = Linear(size=size_t0, scope='linear0') self.t0b = TFLayer(layer='batch_normalization', scope='batchnorm0', center=True, scale=True) self.t0n = Nonlinearity(name='relu', scope='relu0') self.t1l = Linear(size=size_t1, scope='linear1') self.t1b = TFLayer(layer='batch_normalization', scope='batchnorm1', center=True, scale=True) self.t1n = Nonlinearity(name='relu', scope='relu1') self.t2d = Dense(size=1, activation='tanh', scope='dense0', weights=tf.random_uniform_initializer(minval=-3e-3, maxval=3e-3)) self.add_layer(self.t0l) self.add_layer(self.t0b) self.add_layer(self.t0n) self.add_layer(self.t1l) self.add_layer(self.t1b) self.add_layer(self.t1n) self.add_layer(self.t2d) def tf_apply(self, x, internals, update, return_internals=False): assert x['states'], x['actions'] if isinstance(x['states'], dict): if len(x['states']) != 1: raise TensorForceError( 'DDPG critic network must have only one state input, but {} given.' .format(len(x['states']))) x_states = x['states'][next(iter(sorted(x['states'])))] else: x_states = x['states'] if isinstance(x['actions'], dict): if len(x['actions']) != 1: raise TensorForceError( 'DDPG critic network must have only one action input, but {} given.' .format(len(x['actions']))) x_actions = x['actions'][next(iter(sorted(x['actions'])))] else: x_actions = x['actions'] out = self.t0l.apply(x=x_states, update=update) out = self.t0b.apply(x=out, update=update) out = self.t0n.apply(x=out, update=update) out = self.t1l.apply(x=tf.concat([out, x_actions], axis=1), update=update) out = self.t1b.apply(x=out, update=update) out = self.t1n.apply(x=out, update=update) out = self.t2d.apply(x=out, update=update) # Remove last dimension because we only return Q values for one state and action # out = tf.squeeze(out) if return_internals: # Todo: Internals management return out, None else: return out
class EIIE_OutPut(Layer): """ EIIE Output layer based on 2-dimensional convolutional layer. use of two entries: - * : the precedent treatments - last_w : the last weights coming from environment """ def __init__(self, l2_regularization=0.0, l1_regularization=0.0, scope='eieeoutput', summary_labels=()): """ 2D convolutional layer. Args: size: Number of filters set to 1 window: Convolution window size, either an integer or pair of integers. calculated stride: Convolution stride, either an integer or pair of integers. padding: Convolution padding, one of 'VALID' or 'SAME' bias: If true, a bias is added activation: Type of nonlinearity, or dict with name & arguments l2_regularization: L2 regularization weight l1_regularization: L1 regularization weight """ self.size = 1 self.stride = 1 self.padding = 'VALID' self.bias = True activation = 'relu' self.l2_regularization = l2_regularization self.l1_regularization = l1_regularization self.nonlinearity = Nonlinearity(name=activation, summary_labels=summary_labels) super(EIIE_OutPut, self).__init__(scope=scope, summary_labels=summary_labels) def tf_apply(self, x, update): if util.rank(x) != 4: raise TensorForceError( 'Invalid input rank for conv2d layer: {}, must be 4'.format( util.rank(x))) self.window = (1, x.shape[2]) filters_shape = self.window + (x.shape[3].value, self.size) stddev = min(0.1, sqrt(2.0 / self.size)) filters_init = tf.random_normal_initializer(mean=0.0, stddev=stddev, dtype=tf.float32) self.filters = tf.get_variable(name='W', shape=filters_shape, dtype=tf.float32, initializer=filters_init) stride_h, stride_w = self.stride if type(self.stride) is tuple else ( self.stride, self.stride) x = tf.nn.conv2d(input=x, filter=self.filters, strides=(1, stride_h, stride_w, 1), padding=self.padding) if self.bias: bias_shape = (self.size, ) bias_init = tf.zeros_initializer(dtype=tf.float32) self.bias = tf.get_variable(name='b', shape=bias_shape, dtype=tf.float32, initializer=bias_init) x = tf.nn.bias_add(value=x, bias=self.bias) x = self.nonlinearity.apply(x=x, update=update) if 'activations' in self.summary_labels: summary = tf.summary.histogram(name='activations', values=x) self.summaries.append(summary) return x def tf_regularization_loss(self): regularization_loss = super(EIIE_OutPut, self).tf_regularization_loss() if regularization_loss is None: losses = list() else: losses = [regularization_loss] if self.l2_regularization > 0.0: losses.append(self.l2_regularization * tf.nn.l2_loss(t=self.filters)) if self.bias is not None: losses.append(self.l2_regularization * tf.nn.l2_loss(t=self.bias)) if self.l1_regularization > 0.0: losses.append(self.l1_regularization * tf.reduce_sum(input_tensor=tf.abs(x=self.filters))) if self.bias is not None: losses.append(self.l1_regularization * tf.reduce_sum(input_tensor=tf.abs(x=self.bias))) regularization_loss = self.nonlinearity.regularization_loss() if regularization_loss is not None: losses.append(regularization_loss) if len(losses) > 0: return tf.add_n(inputs=losses) else: return None def get_variables(self, include_non_trainable=False): layer_variables = super( EIIE_OutPut, self).get_variables(include_non_trainable=include_non_trainable) nonlinearity_variables = self.nonlinearity.get_variables( include_non_trainable=include_non_trainable) return layer_variables + nonlinearity_variables def get_summaries(self): layer_summaries = super(EIIE_OutPut, self).get_summaries() nonlinearity_summaries = self.nonlinearity.get_summaries() return layer_summaries + nonlinearity_summaries