示例#1
0
 def algorithm(action):
     if action.mean_cost == 0.0:
         return MAX
     ucb_value = ucb(action.parent.N, action.N)
     return action.mean_reward / action.mean_cost + c * (
         (1. + 1. / min_cost) * ucb_value) / (min_cost - ucb_value)
示例#2
0
 def algorithm(action):
     if action.mean_cost == 0.0:
         return MAX
     return action.V + c0 * action.parent.budget * ucb(
         action.parent.N, action.N)
示例#3
0
 def algorithm(action):
     return action.V + c * ucb(action.parent.N, action.N)