def loss(self, truth, prediction): y = prediction[0] mu = truth sigma = op.get(prediction[1], self.inputs.actions) # Gaussian log-likelihood result = op.tofloat(y - mu) # Primarily to prevent under/overflow since they are already float16 result = tf.cast(result, 'float32') * tf.inv(sigma) result = -tf.square(result) / 2 result = result + tf.log(tf.inv(sigma)) return tf.reduce_mean(-result)
def image(): try: size = int(request.args.get('size')) size_all = int(request.args.get('size_all')) num_child = int(request.args.get('num_child')) except Exception as e: return (jsonify({ 'error': "parameter error", 'code': 400, 'detail': str(e) }), 400) response = make_response() response.data = ops.get(size, size_all, num_child) response.headers['Content-Disposition'] = 'attachment; filename=image.jpg' response.mimetype = 'image/jpeg' return response
def prediction(self, train_output_states): return op.get(op.tofloat(train_output_states), self.inputs.actions)
def truth(self, train_output_states, train_output_next_states, target_output_next_states): # Double DQN - http://arxiv.org/pdf/1509.06461v3.pdf double_q_next = op.get(target_output_next_states, op.argmax(train_output_next_states)) return (op.tofloat(self.inputs.rewards) + self.args.discount * (1.0 - op.tofloat(self.inputs.terminals)) * op.tofloat(double_q_next))