示例#1
0
class ApproxActionValueFunction(DeepSarsaApproxActionValueFunction):

    def __init__(self, handicappers=None):
        super(DeepSarsaApproxActionValueFunction, self).__init__()
        self._handicappers = handicappers

    def setup(self):
        self.delegate = MLPOneLayerActionRecordScaledScalarFeaturesValueFunction(NB_UNIT, blind_structure, self._handicappers)
        self.delegate.setup()

    def construct_features(self, state, action):
        return self.delegate.construct_features(state, action)

    def approx_predict_value(self, features):
        return self.delegate.approx_predict_value(features)

    def backup_on_minibatch(self, backup_minibatch):
        X = np.array([self.delegate.construct_features(state, action)[0]
            for state, action, target in backup_minibatch])
        Y_info = [(action, target) for _state, action, target in backup_minibatch]
        Y = self.delegate.model.predict_on_batch(X)
        assert len(Y) == len(Y_info)
        for y, (action, target) in zip(Y, Y_info): y[action_index(action)] = target
        loss = self.delegate.model.train_on_batch(X, Y)
        self.delegate.loss_history.append(loss)
        self.delegate.prediction_cache = (None, None)

    def save(self, save_dir_path):
        self.delegate.save(save_dir_path)

    def load(self, load_dir_path):
        self.delegate.load(load_dir_path)
示例#2
0
class ApproxActionValueFunction(SarsaApproxActionValueFunction):
    def __init__(self, handicappers=None):
        super(SarsaApproxActionValueFunction, self).__init__()
        self._handicappers = handicappers

    def setup(self):
        self.delegate = MLPOneLayerActionRecordScaledScalarFeaturesValueFunction(
            NB_UNIT, blind_structure, self._handicappers)
        self.delegate.setup()

    def construct_features(self, state, action):
        return self.delegate.construct_features(state, action)

    def approx_predict_value(self, features):
        return self.delegate.approx_predict_value(features)

    def approx_backup(self, features, backup_target, alpha):
        self.delegate.approx_backup(features, backup_target, alpha)

    def save(self, save_dir_path):
        self.delegate.save(save_dir_path)

    def load(self, load_dir_path):
        self.delegate.load(load_dir_path)
示例#3
0
 def setup(self):
     self.delegate = MLPOneLayerActionRecordScaledScalarFeaturesValueFunction(
         NB_UNIT, blind_structure, self._handicappers)
     self.delegate.setup()