def __init__(self, args): """ 我们人为生成一些上下文来模拟 :param args: 臂个数参数,以及各个臂的穿越参数等 """ Policy.__init__(self, args) self.alpha = args[1] self.travel_args = args[2:] # 穿越过来的臂均值参数 self.d = 3 # 上下文维度 self.A = np.array([np.identity(self.d) for _ in range(self.n_bandits)]) self.b = np.array([np.zeros(self.d) for _ in range(self.n_bandits)]) self.context = None
def __init__(self, args): Policy.__init__(self, args) self.try_perSlot = int(args[1])
def __init__(self, args): Policy.__init__(self, args) self.temperature = args[1] # 降火参数,温度越高,分子越随机,成为气体;低温的时候有序排列,成为固体 self.anneal = args[2] > 0.0
def __init__(self, args): Policy.__init__(self, args)
def __init__(self, args): Policy.__init__(self, args) self.anneal = args[2] > 0.0 self.decay = args[1]
def __init__(self, args): Policy.__init__(self, args) # 记录每个臂的beta分布参数 self.betaArgs = [[args[1], args[2]] for _ in range(self.n_bandits)]
def __init__(self, args): Policy.__init__(self, args) self.gamma = args[1] self._weights = np.array([1] * self.n_bandits) self._probs = None
def __init__(self, args): Policy.__init__(self, args) self.squared_reward = [0.0] * self.n_bandits