def __init__(self, handicappers=None): super(DeepQLearningApproxActionValueFunction, self).__init__() self._handicappers = handicappers self.prediction_cache = {} # (features, prediction) self.delegate = LinearModelBinaryOnehotFeaturesValueFunction( blind_structure, self._handicappers) self.delegate.setup()
class ApproxActionValueFunction(DeepQLearningApproxActionValueFunction): Q_NET_SAVE_NAME = "q_weight.h5" Q_HAT_NET_SAVE_NAME = "q_hat_weight.h5" def __init__(self, handicappers=None): super(DeepQLearningApproxActionValueFunction, self).__init__() self._handicappers = handicappers self.prediction_cache = {} # (features, prediction) self.delegate = LinearModelBinaryOnehotFeaturesValueFunction( blind_structure, self._handicappers) self.delegate.setup() def initialize_network(self): return self.delegate.build_model() def deepcopy_network(self, q_network): q_hat_network = self.initialize_network() for original_layer, copy_layer in zip(q_network.layers, q_hat_network.layers): copy_layer.set_weights(original_layer.get_weights()) return q_hat_network def predict_value_by_network(self, network, state, action): X, action = self.delegate.construct_features(state, action) values = network.predict_on_batch(np.array([X]))[0].tolist() valur_for_action = values[action_index(action)] return valur_for_action def backup_on_minibatch(self, q_network, backup_minibatch): X = np.array([ self.delegate.construct_features(state, action)[0] for state, action, target in backup_minibatch ]) Y_info = [(action, target) for _state, action, target in backup_minibatch] Y = q_network.predict_on_batch(X) assert len(Y) == len(Y_info) for y, (action, target) in zip(Y, Y_info): y[action_index(action)] = target loss = q_network.train_on_batch(X, Y) def save_networks(self, q_network, q_hat_network, save_dir_path): q_network.save_weights( os.path.join(save_dir_path, self.Q_NET_SAVE_NAME)) q_hat_network.save_weights( os.path.join(save_dir_path, self.Q_HAT_NET_SAVE_NAME)) def load_networks(self, load_dir_path): q_network = self.initialize_network() q_network.load_weights( os.path.join(load_dir_path, self.Q_NET_SAVE_NAME)) q_hat_network = self.initialize_network() q_hat_network.load_weights( os.path.join(load_dir_path, self.Q_HAT_NET_SAVE_NAME)) return q_network, q_hat_network def visualize_feature_weights(self): return self.delegate.visualize_feature_weights()
def setup(self): self.delegate = LinearModelBinaryOnehotFeaturesValueFunction( blind_structure, self._handicappers) self.delegate.setup()
class ApproxActionValueFunction(QLearningApproxActionValueFunction): def __init__(self, handicappers=None): super(QLearningApproxActionValueFunction, self).__init__() self._handicappers = handicappers def setup(self): self.delegate = LinearModelBinaryOnehotFeaturesValueFunction( blind_structure, self._handicappers) self.delegate.setup() def construct_features(self, state, action): return self.delegate.construct_features(state, action) def approx_predict_value(self, features): return self.delegate.approx_predict_value(features) def approx_backup(self, features, backup_target, alpha): self.delegate.approx_backup(features, backup_target, alpha) def visualize_feature_weights(self): return self.delegate.visualize_feature_weights() def save(self, save_dir_path): self.delegate.save(save_dir_path) def load(self, load_dir_path): self.delegate.load(load_dir_path)