def __init__(self, handicappers=None):
     super(DeepQLearningApproxActionValueFunction, self).__init__()
     self._handicappers = handicappers
     self.prediction_cache = {}  # (features, prediction)
     self.delegate = LinearModelScaledScalarFeaturesValueFunction(
         blind_structure, self._handicappers)
     self.delegate.setup()
class ApproxActionValueFunction(DeepQLearningApproxActionValueFunction):

    Q_NET_SAVE_NAME = "q_weight.h5"
    Q_HAT_NET_SAVE_NAME = "q_hat_weight.h5"

    def __init__(self, handicappers=None):
        super(DeepQLearningApproxActionValueFunction, self).__init__()
        self._handicappers = handicappers
        self.prediction_cache = {}  # (features, prediction)
        self.delegate = LinearModelScaledScalarFeaturesValueFunction(
            blind_structure, self._handicappers)
        self.delegate.setup()

    def initialize_network(self):
        return self.delegate.build_model()

    def deepcopy_network(self, q_network):
        q_hat_network = self.initialize_network()
        for original_layer, copy_layer in zip(q_network.layers,
                                              q_hat_network.layers):
            copy_layer.set_weights(original_layer.get_weights())
        return q_hat_network

    def predict_value_by_network(self, network, state, action):
        X, action = self.delegate.construct_features(state, action)
        values = network.predict_on_batch(np.array([X]))[0].tolist()
        valur_for_action = values[action_index(action)]
        return valur_for_action

    def backup_on_minibatch(self, q_network, backup_minibatch):
        X = np.array([
            self.delegate.construct_features(state, action)[0]
            for state, action, target in backup_minibatch
        ])
        Y_info = [(action, target)
                  for _state, action, target in backup_minibatch]
        Y = q_network.predict_on_batch(X)
        assert len(Y) == len(Y_info)
        for y, (action, target) in zip(Y, Y_info):
            y[action_index(action)] = target
        loss = q_network.train_on_batch(X, Y)

    def save_networks(self, q_network, q_hat_network, save_dir_path):
        q_network.save_weights(
            os.path.join(save_dir_path, self.Q_NET_SAVE_NAME))
        q_hat_network.save_weights(
            os.path.join(save_dir_path, self.Q_HAT_NET_SAVE_NAME))

    def load_networks(self, load_dir_path):
        q_network = self.initialize_network()
        q_network.load_weights(
            os.path.join(load_dir_path, self.Q_NET_SAVE_NAME))
        q_hat_network = self.initialize_network()
        q_hat_network.load_weights(
            os.path.join(load_dir_path, self.Q_HAT_NET_SAVE_NAME))
        return q_network, q_hat_network

    def visualize_feature_weights(self):
        return self.delegate.visualize_feature_weights()
 def setup(self):
     self.delegate = LinearModelScaledScalarFeaturesValueFunction(
         blind_structure, self._handicappers)
     self.delegate.setup()
class ApproxActionValueFunction(SarsaApproxActionValueFunction):
    def __init__(self, handicappers=None):
        super(SarsaApproxActionValueFunction, self).__init__()
        self._handicappers = handicappers

    def setup(self):
        self.delegate = LinearModelScaledScalarFeaturesValueFunction(
            blind_structure, self._handicappers)
        self.delegate.setup()

    def construct_features(self, state, action):
        return self.delegate.construct_features(state, action)

    def approx_predict_value(self, features):
        return self.delegate.approx_predict_value(features)

    def approx_backup(self, features, backup_target, alpha):
        self.delegate.approx_backup(features, backup_target, alpha)

    def visualize_feature_weights(self):
        return self.delegate.visualize_feature_weights()

    def save(self, save_dir_path):
        self.delegate.save(save_dir_path)

    def load(self, load_dir_path):
        self.delegate.load(load_dir_path)