Python Reward примеры, reward.Reward Python примеры использования

Пример #1

0

Показать файл

def initScript():

    # Get parameters and inputs lists
    GDict['parameters'] = mp.ModelParameters()
    GDict['parameters'].make_param(GObject, GSolver, GDict)
    GDict['parameters'].get_parameters()
    GDict['parameters'].get_model_inputs()

    GDict['script'] = GDict['parameters'].script_name
    GDict['script_input'] = GDict['parameters'].script_input

    # Create socket
    GDict['worker'] = wrk.Worker(GDict['parameters'].port)

    # Get reward object
    GDict['reward'] = reward.Reward(GObject, GSolver, GDict, MVec3)

    # Get observation params
    param_values = GDict['parameters'].get_parameters_values()

    # Send request with init worker state
    request = [param_values, 0, True, 'in']
    GDict['worker'].send(request)

    # Get reply that reset successful
    reply = GDict['worker'].recv()

    #print("Finished", reply)

    GDict['done'] = False
    GDict['episodeTime'] = GDict['parameters'].episode_duration
    GDict['routerState'] = 'st'
    GDict['delay'] = GDict['parameters'].delay
    GDict['oldTime'] = 0
    GDict['reward'].custom_start()

Пример #2

0

Показать файл

Файл: player.py Проект: j18chu/garrison

 def reputationUp(self):
     if self.inventory.reputation >= 2000 and self.inventory.reputationName == 'Neutral':
         self.inventory.reputation -= 2000
         print(
             "You have become friendly with the Pirate's Alliance. As a token of their generosity, they gave 50 gold and 100 experience, as well as a cool looking hat."
         )
         reward = reward.Reward(
             "Pirate's Alliance", 100, 50, 0,
             [item.Item("old pirate hat", "old but still gold", 22, 35)])

Пример #3

0

Показать файл

def init():
    global playerObj, aiObj, rewardObj
    global screen
    os.environ["SDL_VIDEO_CENTERED"] = '1'
    pygame.init()
    screen = pygame.display.set_mode((st.windowWidth, st.windowHeight))
    playerObj = player.Player()
    aiObj = ai.AI()
    rewardObj = rwd.Reward()

Пример #4

0

Показать файл

def main():
    score = 0
    running = True
    moving = False
    is_invincible = False
    is_lengthenboard = False
    score_font = pygame.font.Font("font/font.ttf", 36)
    creat_brick()

    # 生成动画精灵
    boards = board.Board(bg_size)
    group_board.add(boards)
    balls = ball.Ball(bg_size)
    group_ball.add(balls)

    # 定义计时器
    INVINCIBLE_TIME = USEREVENT
    LENGTHENBOARD_TIME = USEREVENT + 1
    SPEEDUPBALL_TIME = USEREVENT + 2

    # 设置循环播放
    pygame.mixer.music.play(-1)

    # 开始主循环
    while running:
        for event in pygame.event.get():
            if event.type == QUIT:
                pygame.quit()
                sys.exit()
            if event.type == MOUSEBUTTONDOWN:
                if event.button == 1:
                    # 仅当鼠标在板内方可移动
                    mouse_position = pygame.mouse.get_pos()
                    if boards.rect[0] < mouse_position[0] < boards.rect[0] + boards.rect[2] and \
                                            boards.rect[1] < mouse_position[1] < boards.rect[1] + boards.rect[3]:
                        moving = True
            if event.type == MOUSEBUTTONUP:
                if event.button == 1:
                    moving = False
            if event.type == INVINCIBLE_TIME:
                is_invincible = False
                pygame.time.set_timer(INVINCIBLE_TIME, 0)
            if event.type == LENGTHENBOARD_TIME:
                boards.change_object()
                is_lengthenboard = False
                pygame.time.set_timer(LENGTHENBOARD_TIME, 0)
            # 球速恢复
            if event.type == SPEEDUPBALL_TIME:
                for balls in group_ball:
                    if balls.ball_speed not in ([3, 3], [3, -3], [-3, 3], [-3, -3]):
                        balls.ball_speed[0] //= 2
                        balls.ball_speed[1] //= 2
                pygame.time.set_timer(SPEEDUPBALL_TIME, 0)

        screen.blit(background, (0, 0))

        # 板与加长板的互相转化
        if moving:
            mouse_position = pygame.mouse.get_pos()
            if 450 <= mouse_position[1] <= 573 and 800 >= mouse_position[0] >= 50 and not is_lengthenboard:
                boards.rect = (mouse_position[0] - 62, 535, boards.rect[2], boards.rect[3])
            elif 450 <= mouse_position[1] <= 573 and 825 >= mouse_position[0] >= 65 and is_lengthenboard:
                boards.rect = (mouse_position[0] - 90, 535, boards.rect[2], boards.rect[3])
            group_board.add(boards)

        if not is_lengthenboard:
            screen.blit(boards.current_image, boards.rect)
        if is_lengthenboard:
            screen.blit(boards.current_image, boards.rect)


        # 对每个小球进行碰撞检测
        for balls in group_ball:
            # 球速加倍
            if balls.is_speedupball:
                balls.ball_speed[0] *= 2
                balls.ball_speed[1] *= 2
                balls.is_speedupball = False

            # 检测球与边界的碰撞
            if balls.rect.left < 0:
                # 碰撞后赋值为0，防止反弹后仍然碰撞从而卡在边界上
                balls.rect.left = 0
                balls.ball_speed[0] = -balls.ball_speed[0]
                balls.is_hit = False
            elif balls.rect.right > width:
                balls.rect.right = width
                balls.ball_speed[0] = -balls.ball_speed[0]
                balls.is_hit = False
            elif balls.rect.bottom > height:
                if is_invincible:
                    balls.rect.bottom = height
                    balls.ball_speed[1] = -balls.ball_speed[1]
                    balls.is_hit = False
                elif not is_invincible:
                    group_ball.remove(balls)
            elif balls.rect.top < 0:
                balls.rect.top = 0
                balls.ball_speed[1] = -balls.ball_speed[1]
                balls.is_hit = False


            # 检测球与板的碰撞
            hit_ball = pygame.sprite.spritecollide(balls, group_board, False, pygame.sprite.collide_mask)
            if hit_ball:
                # 小球反弹木板碰撞方向检测
                board_hit_top = board_collide(balls.rect, boards.rect, balls.ball_speed)
                if not balls.is_hit:
                    if not board_hit_top:
                        balls.ball_speed[0] = -balls.ball_speed[0]
                        balls.ball_speed[1] = -balls.ball_speed[1]
                    else:
                        balls.ball_speed[1] = -balls.ball_speed[1]
                balls.is_hit = True

            # 检测球与方块的碰撞
            hit_board = pygame.sprite.spritecollide(balls, group_brick, False, pygame.sprite.collide_mask)
            if hit_board:
                for each in hit_board:
                    brick_hit_top = brick_collide(balls.rect, each.rect, balls.ball_speed)
                    if not brick_hit_top:
                        balls.ball_speed[0] = -balls.ball_speed[0]
                    else:
                        balls.ball_speed[1] = -balls.ball_speed[1]
                    if each.curimage == each.image1:
                        score += 10
                        group_brick.remove(each)
                    elif each.curimage == each.image2:
                        score += 20
                        each.curimage = each.image1
                    elif each.curimage == each.image3:
                        score += 30
                        each.curimage = each.image2
                rewards = reward.Reward(bg_size)
                rewards.generate_reward()
                balls.is_hit = False
                if rewards.current_image != rewards.image5:
                    rewards.rect = balls.rect
                    group_reward.add(rewards)

            # 移动小球
            balls.rect = balls.rect.move(balls.ball_speed)
            screen.blit(balls.image1, balls.rect)

        # 奖励机制
        for rewards in group_reward:
            hit_reward = pygame.sprite.spritecollide(rewards, group_board, False, pygame.sprite.collide_mask)
            if hit_reward:
                # 无敌
                if rewards.current_image == rewards.image1:
                    is_invincible = True
                    pygame.time.set_timer(INVINCIBLE_TIME, 10 * 1000)
                # 板加长(10s)
                elif rewards.current_image == rewards.image2:
                    if is_lengthenboard == False:
                        boards.change_object()
                        is_lengthenboard = True
                    pygame.time.set_timer(LENGTHENBOARD_TIME, 10 * 1000)
                # 球数+1
                elif rewards.current_image == rewards.image3:
                    balls = ball.Ball(bg_size)
                    group_ball.add(balls)
                # 球速度+5(10s)
                elif rewards.current_image == rewards.image4:
                    for balls in group_ball:
                        if balls.ball_speed in ([3, 3], [3, -3], [-3, 3], [-3, -3]):
                            balls.is_speedupball = True
                            pygame.time.set_timer(SPEEDUPBALL_TIME, 10 * 1000)
                group_reward.remove(rewards)
            if rewards.rect.bottom > height:
                group_reward.remove(rewards)
            screen.blit(rewards.current_image, rewards.rect)
            rewards.rect = rewards.rect.move(reward_speed)

        # 画砖块
        for bricks in group_brick:
            if bricks.curimage == bricks.image4:
                group_brick.remove(bricks)
            screen.blit(bricks.curimage, bricks.rect)

        # 统计得分
        score_text = score_font.render("Score : %s" % str(score), True, (255, 255, 255))
        screen.blit(score_text, (5, 530))

        pygame.display.flip()
        clock.tick(FPS)

Пример #5

0

Показать файл

Файл: main.py Проект: nbmyt/BreakoutClone

def main():
    score = 0
    is_remove = False
    running = True
    is_invincible = False
    is_lengthenboard = False
    score_font = pygame.font.Font("font/font.ttf", 36)
    creat_brick()
    creat_health()

    # 生成动画精灵
    boards = board.Board(bg_size)  # 生成挡板
    group_board.add(boards)
    balls = ball.Ball(boards)  # 生成球
    balls.ball_speed = [0, 0]
    is_moving = False
    group_ball.add(balls)

    # 定义计时器
    INVINCIBLE_TIME = USEREVENT  #24
    LENGTHENBOARD_TIME = USEREVENT + 1
    SPEEDUPBALL_TIME = USEREVENT + 2

    # 设置循环播放
    #    pygame.mixer.music.play(-1)

    # 开始主循环
    while running:
        for event in pygame.event.get():
            if event.type == QUIT:
                #pygame.quit()
                sys.exit()
            if event.type == pygame.KEYDOWN:
                if event.key == pygame.K_SPACE and not is_moving:
                    is_moving = True
                    balls.ball_speed = [3, -3]
            if event.type == INVINCIBLE_TIME:
                is_invincible = False
                pygame.time.set_timer(INVINCIBLE_TIME, 0)
            if event.type == LENGTHENBOARD_TIME:
                boards.change_object(boards.rect.left)
                is_lengthenboard = False
                pygame.time.set_timer(LENGTHENBOARD_TIME, 0)
            # 球速恢复
            if event.type == SPEEDUPBALL_TIME:
                for balls in group_ball:
                    if balls.ball_speed not in ([3, 3], [3, -3], [-3,
                                                                  3], [-3,
                                                                       -3]):
                        balls.ball_speed[0] //= 2
                        balls.ball_speed[1] //= 2
                pygame.time.set_timer(SPEEDUPBALL_TIME, 0)

        screen.blit(background, (0, 0))

        # 板与加长板的互相转化
        boardSpeed = 0
        if event.type == pygame.KEYDOWN and is_moving:
            if event.key == pygame.K_LEFT:
                boardSpeed = -8 if boards.rect.left > 0 else 0
            elif event.key == pygame.K_RIGHT:
                boardSpeed = 8 if boards.rect.right < width else 0
            boards.rect.left += boardSpeed
            group_board.add(boards)

        if not is_lengthenboard:
            screen.blit(boards.current_image, boards.rect)
        if is_lengthenboard:
            screen.blit(boards.current_image, boards.rect)

        # 对每个小球进行碰撞检测
        for balls in group_ball:
            # 球速加倍
            if balls.is_speedupball:
                balls.ball_speed[0] *= 2
                balls.ball_speed[1] *= 2
                balls.is_speedupball = False

            # 检测球与边界的碰撞
            if balls.rect.left < 0:
                ball_wall_sound.play()
                # 碰撞后赋值为0，防止反弹后仍然碰撞从而卡在边界上
                balls.rect.left = 0
                balls.ball_speed[0] = -balls.ball_speed[0]
                balls.is_hit = False
            elif balls.rect.right > width:
                ball_wall_sound.play()
                balls.rect.right = width
                balls.ball_speed[0] = -balls.ball_speed[0]
                balls.is_hit = False
            elif balls.rect.bottom > height:
                if is_invincible:
                    balls.rect.bottom = height
                    balls.ball_speed[1] = -balls.ball_speed[1]
                    balls.is_hit = False
                elif not is_invincible:
                    group_ball.remove(balls)
                    if len(group_ball) == 0:
                        if len(group_healths) > 1:
                            is_remove = True
                            balls = ball.Ball(boards)
                            group_ball.add(balls)
                            is_moving = False
                            balls.ball_speed = [0, 0]
                            group_board.remove(boards)
                            group_board.add(boards)
                        else:
                            sys.exit()
                #balls.rect.bottom = height
                balls.ball_speed[1] = -balls.ball_speed[1]
                balls.is_hit = False
            elif balls.rect.top < 0:
                ball_wall_sound.play()
                balls.rect.top = 0
                balls.ball_speed[1] = -balls.ball_speed[1]
                balls.is_hit = False

            # 检测球与板的碰撞
            hit_ball = pygame.sprite.spritecollide(balls, group_board, False,
                                                   pygame.sprite.collide_mask)
            if hit_ball:
                # 小球反弹木板碰撞方向检测
                board_hit_top = board_collide(balls.rect, boards.rect,
                                              balls.ball_speed)
                if not balls.is_hit:
                    ball_wall_sound.play()
                    if not board_hit_top:
                        balls.ball_speed[0] = -balls.ball_speed[0]
                        balls.ball_speed[1] = -balls.ball_speed[1]
                    else:
                        balls.ball_speed[1] = -balls.ball_speed[1]
                balls.is_hit = True

            # 检测球与方块的碰撞
            hit_board = pygame.sprite.spritecollide(balls, group_brick, False,
                                                    pygame.sprite.collide_mask)
            if hit_board:
                for each in hit_board:
                    brick_hit_top = brick_collide(balls.rect, each.rect,
                                                  balls.ball_speed)
                    if not brick_hit_top:
                        balls.ball_speed[0] = -balls.ball_speed[0]
                    else:
                        balls.ball_speed[1] = -balls.ball_speed[1]
                    ball_brick_sound.play()
                    if each.curimage == each.image1:
                        score += 10
                        group_brick.remove(each)
                        if len(group_brick) == 0:
                            pass
                        #new_game
                    elif each.curimage == each.image2:
                        score += 20
                        each.curimage = each.image1
                    elif each.curimage == each.image3:
                        score += 30
                        each.curimage = each.image2
                rewards = reward.Reward(bg_size)
                rewards.generate_reward()
                balls.is_hit = False
                if rewards.current_image != rewards.image5:
                    rewards.rect = balls.rect
                    group_reward.add(rewards)

            # 移动小球
            balls.rect = balls.rect.move(balls.ball_speed)

            screen.blit(balls.image1, balls.rect)

        # 奖励机制
        for rewards in group_reward:
            hit_reward = pygame.sprite.spritecollide(
                rewards, group_board, False, pygame.sprite.collide_mask)
            if hit_reward:
                # 无敌
                if rewards.current_image == rewards.image1:
                    is_invincible = True
                    pygame.time.set_timer(INVINCIBLE_TIME, 10 * 1000)
                # 板加长(10s)
                elif rewards.current_image == rewards.image2:
                    if is_lengthenboard == False:
                        boards.change_object(boards.rect.left)
                        is_lengthenboard = True
                    pygame.time.set_timer(LENGTHENBOARD_TIME, 10 * 1000)
                # 球数+1
                elif rewards.current_image == rewards.image3:
                    balls = ball.Ball(boards)
                    group_ball.add(balls)
                # 球速度+5(10s)
                elif rewards.current_image == rewards.image4:
                    for balls in group_ball:
                        if balls.ball_speed in ([3, 3], [3, -3], [-3,
                                                                  3], [-3,
                                                                       -3]):
                            balls.is_speedupball = True
                            pygame.time.set_timer(SPEEDUPBALL_TIME, 10 * 1000)
                group_reward.remove(rewards)
            if rewards.rect.bottom > height:
                group_reward.remove(rewards)
            screen.blit(rewards.current_image, rewards.rect)
            rewards.rect = rewards.rect.move(reward_speed)

        #画生命值
        for healths in group_healths:
            if is_remove:
                group_healths.remove(healths)
                is_remove = False
            screen.blit(healths.image, healths.rect)
        # 画砖块
        for bricks in group_brick:
            if bricks.curimage == bricks.image4:
                group_brick.remove(bricks)
            screen.blit(bricks.curimage, bricks.rect)

        # 统计得分
        score_text = score_font.render("Score : %s" % str(score), True,
                                       (255, 255, 255))
        screen.blit(score_text, (5, 530))

        pygame.display.flip()
        clock.tick(FPS)

Пример #6

0

Показать файл

    startingX = World_width / 2
    startingY = World_height / 2

    #breeding conditions
    initiation_max_change = [0.5, 1]

    # list of all the bots that where generated, a bot is added when they die
    all_bots = []

    # create a visualiser
    visWin = vis.Display(World_width, World_height)

    #rewards
    #create the cirlce for the reward
    apple = reward.Reward(World_width, World_height)
    vis_apple = visWin._createCircle(apple.position[0], apple.position[1],
                                     reward.Radius, reward.Colour)

    # genereate the initial group of bots
    initialising_time = 0
    alive_bots = []
    i = 0
    while i < number_of_bots_alive:
        brainNum = "_yellow"
        colour = 'yellow'
        if i % 2 == 0:
            brainNum = "_blue"
            colour = 'blue'

        initial_bot = bot.Bot(initialising_time,

Пример #7

0

Показать файл

def world_highway_truck_cut_in(initial_states='truck_cut_in_far_overtaking',
        interaction_data=None, init_planner=True):
    # If very long horizon, increase bracket depth in Theano compilation to avoid
    # "fatal error: bracket nesting level exceeded maximum of 256."
    if config.HORIZON > 10:
        th.config.gcc.cxxflags = '-fbracket-depth=1024'

    # lanes
    left_lane = lane.StraightLane([0., -1.], [0., 1.], constants.LANE_WIDTH_VIS)
    right_lane = left_lane.shifted(-1)
    lanes = [left_lane, right_lane]
    # roads
    roads = [left_lane]
    # fences (road boundaries)
    right_fence = lane.Fence([0., -1.], [0., 1.], constants.LANE_WIDTH_VIS, side=1)
    left_fence = lane.Fence([0., -1.], [0., 1.], constants.LANE_WIDTH_VIS, side=-1)
    fences = [right_fence.shifted(-1.5), left_fence.shifted(0.5)]
    # dynamics
    dyn = dynamics.CarDynamics
    # asynchronous setting
    config.ASYNCHRONOUS = False

    # MATLAB file setup
    fine_behind_h = config.FINE_BEHIND_H
    fine_behind_r = config.FINE_BEHIND_R
    human_beta = config.HUMAN_BETA
    strat_val_data_dir = config.TRUCK_CUT_IN_STRATEGIC_VALUE_DATA_DIR
    if fine_behind_h == True:
        if human_beta is None:
            print('Not using strategic value.')
        elif human_beta == float('inf'): # then rational human (= deterministic).
            mat_name = strat_val_data_dir + 'DSG_fine_det.mat'
        else:
            beta_str = '%.2E' % Decimal(config.HUMAN_BETA)
            mat_name = strat_val_data_dir + 'beta_{0}/DSG_fine_beta_{0}.mat'.format(
                beta_str)
    else:
        if human_beta is None:
            print('Not using strategic value.')
        elif human_beta == float('inf'): # then rational human (= deterministic).
            mat_name = strat_val_data_dir + 'DSG_not_fine_det.mat'
        else:
            beta_str = '%.2E' % Decimal(config.HUMAN_BETA)
            mat_name = strat_val_data_dir + 'beta_{0}/DSG_fine_beta_{0}.mat'.format(
                beta_str)

    # Strategic state projection (use strategic grid dimension to get the correct
    # projection)
    proj = eval('projection.ProjectionTruckCutInStrategicValue{0}D'.format(config.STRAT_DIM))()

    # Initial states
    x0_r, x0_h, x0_t = get_initial_states(initial_states)

    # Robot car setup
    #ref_speed_r = constants.METERS_TO_VIS * 35.0
    ref_speed_r = constants.METERS_TO_VIS * 34.0  # speed for gap creation
    if config.ROBOT_CAR == 'car.HierarchicalCar':
        robot_car = car.HierarchicalCar(x0_r, constants.DT, dyn, 
            constants.CAR_CONTROL_BOUNDS, horizon=config.HORIZON, 
            color=constants.COLOR_R, name=constants.NAME_R,
            mat_name=mat_name, use_second_order=config.USE_SECOND_ORDER,
            proj=proj, strat_dim=config.STRAT_DIM)
    elif config.ROBOT_CAR == 'car.NestedCar':
        robot_car = car.NestedCar(x0_r, constants.DT, dyn, 
            constants.CAR_CONTROL_BOUNDS, horizon=config.HORIZON, 
            color=constants.COLOR_R, name=constants.NAME_R,
            use_second_order=config.USE_SECOND_ORDER)
    elif config.ROBOT_CAR == 'car.PredictReactCar':
        robot_car = car.PredictReactCar(x0_r, constants.DT, dyn, 
            constants.CAR_CONTROL_BOUNDS, horizon=config.HORIZON, 
            color=constants.COLOR_R, name=constants.NAME_R)
    elif config.ROBOT_CAR == 'car.IteratedBestResponseCar':
        robot_car = car.IteratedBestResponseCar(x0_r, constants.DT, dyn,
            constants.CAR_CONTROL_BOUNDS, horizon=config.HORIZON,
            color=constants.COLOR_R, name=constants.NAME_R)
    else:
        print('"{0}" is currently an unsupported robot car type'.format(config.ROBOT_CAR))
        sys.exit()

    # Human car setup
    ref_speed_h = constants.METERS_TO_VIS * 31. # x0_h[3]
    human_car = eval(config.HUMAN_CAR)(x0_h, constants.DT, dyn, 
            constants.CAR_CONTROL_BOUNDS, horizon=config.HORIZON, 
            color=constants.COLOR_H, name=constants.NAME_H)
    
    # Truck setup
    truck = car.Truck(x0_t, constants.DT, dyn, 
            constants.CAR_CONTROL_BOUNDS, horizon=config.HORIZON, 
            color=constants.COLOR_TRUCK, name=constants.NAME_TRUCK)
    

    # Information structure
    robot_car.human = human_car # robot creates its own traj for human
    if human_car.is_follower: # give follower car access to the robot car
        human_car.robot = robot_car
    human_car.traj_r = robot_car.traj # human knows the robot traj
    robot_car.truck = truck
    human_car.truck = truck
    
    # world setup
    cars = [robot_car, human_car, truck]
    name = 'world_highway_truck_cut_in'
    world = World(name, constants.DT, cars, robot_car, human_car, lanes, roads, 
                fences, interaction_data=interaction_data)

    # rewards
    w_lanes = [4., 2.]
    w_control = -0.1
    w_bounded_control_h = -50.0 # bounded control weight for human
    w_bounded_control_r = -50.0 # bounded control weight for robot
    # Rewards and strategic value modeled by the robot 
    # (for both human and robot, respectively)
    if config.R_BELIEF_H_KNOWS_TRAJ_R: # robot believes human knows robot trajectory
        robot_r_h_traj = robot_car.traj
    else: # robot believes human doesn't know robot trajectory
        robot_r_h_traj = robot_car.traj_linear

    robot_r_h = reward.Reward(world, [robot_r_h_traj],
            other_truck_trajs=[truck.traj],
            w_lanes=w_lanes,
            w_control=w_control,
            w_bounded_control=w_bounded_control_h,
            speed=ref_speed_h,
            fine_behind=fine_behind_h,
            is_human=True)#,
            # strategic_value_mat_name=mat_name, robot_car=robot_car,
            # proj_np=proj_np, proj_th=proj_th)

    robot_r_r = reward.Reward(world, [robot_car.traj_h],
            other_truck_trajs=[truck.traj],
            w_lanes=w_lanes,
            w_control=w_control,
            w_bounded_control=w_bounded_control_r,
            speed=ref_speed_r,
            fine_behind=fine_behind_r)#,
            # strategic_value_mat_name=mat_name, robot_car=robot_car,
            # proj_np=proj_np, proj_th=proj_th)

    if config.PREDICT_HUMAN_IGNORES_ROBOT:
        # Reward for a human that ignores the existence of the robot.
        robot_r_h = reward.Reward(world, other_car_trajs=[],
            w_lanes=w_lanes,
            w_control=w_control,
            w_bounded_control=w_bounded_control_h,
            speed=ref_speed_h,
            fine_behind=fine_behind_h,
            is_human=True)


    robot_car.reward = robot_r_r
    robot_car.reward_h = robot_r_h
    if config.ROBOT_CAR == 'car.HierarchicalCar':
        # Robot's model of the human strategic value
        robot_strat_val_h = StrategicValue(robot_car.traj, robot_car.traj_h,
            proj, mat_name, config.STRAT_DIM, config.STRATEGIC_VALUE_SCALE,
            min_val=config.MIN_STRAT_VAL, max_val=config.MAX_STRAT_VAL, 
            traj_truck=truck.traj)
        # Robot's strategic value
        # TODO: just trying out human_car.traj to debug heatmap vis
        # robot_strat_val = StrategicValue(robot_car.traj, human_car.traj,
        #     proj, mat_name, config.STRAT_DIM, config.STRATEGIC_VALUE_SCALE,
        #     min_val=config.MIN_STRAT_VAL, max_val=config.MAX_STRAT_VAL)
        robot_strat_val = StrategicValue(robot_car.traj, robot_car.traj_h,
            proj, mat_name, config.STRAT_DIM, config.STRATEGIC_VALUE_SCALE,
            min_val=config.MIN_STRAT_VAL, max_val=config.MAX_STRAT_VAL, 
            traj_truck=truck.traj)
        robot_car.strat_val = robot_strat_val
        robot_car.strat_val_h = robot_strat_val_h

    # Rewards and strategic value modeled by the human WHEN SIMULATED 
    # (for both human and robot, respectively)
    human_r_h = reward.Reward(world, [human_car.traj_r],
            other_truck_trajs=[truck.traj],
            w_lanes=w_lanes,
            w_control=w_control,
            w_bounded_control=w_bounded_control_h,
            speed=ref_speed_h,
            fine_behind=fine_behind_h,
            is_human=True)#,
            # strategic_value_mat_name=mat_name, robot_car=robot_car,
            # proj_np=proj_np, proj_th=proj_th)
    human_r_r = reward.Reward(world, [human_car.traj],
            other_truck_trajs=[truck.traj],
            w_lanes=w_lanes,
            w_control=w_control,
            w_bounded_control=w_bounded_control_r,
            speed=ref_speed_r,
            fine_behind=fine_behind_r)#,
            # strategic_value_mat_name=mat_name, robot_car=robot_car,
            # proj_np=proj_np, proj_th=proj_th)
    if config.HUMAN_IGNORES_ROBOT:
        human_r_h = reward.Reward(world, [],
            other_truck_trajs=[truck.traj],
            w_lanes=w_lanes,
            w_control=w_control,
            w_bounded_control=w_bounded_control_h,
            speed=ref_speed_h,
            fine_behind=fine_behind_h,
            is_human=True)#,
    human_car.reward = human_r_h
    human_car.reward_r = human_r_r
    if config.ROBOT_CAR == 'car.HierarchicalCar':
        # Human's strategic value
        human_strat_val = StrategicValue(human_car.traj_r, human_car.traj,
            proj, mat_name, config.STRAT_DIM, config.STRATEGIC_VALUE_SCALE,
            min_val=config.MIN_STRAT_VAL, max_val=config.MAX_STRAT_VAL, 
            traj_truck=truck.traj)
        # Human's model of the robot strategic value
        human_strat_val_r = StrategicValue(human_car.traj_r, human_car.traj,
            proj, mat_name, config.STRAT_DIM, config.STRATEGIC_VALUE_SCALE,
            min_val=config.MIN_STRAT_VAL, max_val=config.MAX_STRAT_VAL, 
            traj_truck=truck.traj)
        human_car.strat_val = human_strat_val
        human_car.strat_val_r = human_strat_val_r

        # set min and max strategic values
        config.MIN_STRAT_VAL = config.STRATEGIC_VALUE_SCALE * min(
                [min(robot_strat_val_h.vH_grid.flatten()), min(robot_strat_val_h.vR_grid.flatten()),
                min(robot_strat_val.vH_grid.flatten()), min(robot_strat_val.vR_grid.flatten()),
                min(human_strat_val.vH_grid.flatten()), min(human_strat_val.vR_grid.flatten()),
                min(human_strat_val_r.vH_grid.flatten()), min(human_strat_val_r.vR_grid.flatten())])
        config.MAX_STRAT_VAL = config.STRATEGIC_VALUE_SCALE * max(
                [max(robot_strat_val_h.vH_grid.flatten()), max(robot_strat_val_h.vR_grid.flatten()),
                max(robot_strat_val.vH_grid.flatten()), max(robot_strat_val.vR_grid.flatten()),
                max(human_strat_val.vH_grid.flatten()), max(human_strat_val.vR_grid.flatten()),
                max(human_strat_val_r.vH_grid.flatten()), max(human_strat_val_r.vR_grid.flatten())])

    # print the configuration
    pp = pprint.PrettyPrinter(indent=2)
    pp.pprint(world.get_config())

    # initialize planners
    if init_planner:
        print('Initializing planners.')
        for c in world.cars:
            if hasattr(c, 'init_planner'):
                print 'Initializing planner for ' + c.name
                c.init_planner(config.INIT_PLAN_SCHEME[c.name])
                print '\n'

    return world

Пример #8

0

Показать файл

Файл: main.py Проект: LeonardoPoletti/Machine-Learning-e-Data-Science-com-Python

# -*- coding: utf-8 -*-
"""
Created on Sat Aug  5 16:26:47 2017

@author: Leonardo
"""

#O programa ira percorrerer uma matriz 10x10 e receberá recompensas.
#Utilzando Orientação a Objetos OO

import robo as r
import reward as re
import random
#import robo3d as r3d

rw1_random = re.Reward(random.randint(0, 10), random.randint(0, 10), 'Bateria')
rw2_random = re.Reward(random.randint(0, 10), random.randint(0, 10), 'Oleo')
rw3_random = re.Reward(random.randint(0, 10), random.randint(0, 10), 'Munição')

rewards = [rw1_random, rw2_random, rw3_random]

robot = r.Robo(random.randint(0, 10), random.randint(0, 10))
print(robot)
print(rewards)

for i in range(10):
    moviment = input('Digite up, down, left ou right para o movimento: ')
    if moviment == 'up':
        robot.move_up()
    elif moviment == 'down':
        robot.move_down()

Пример #9

0

Показать файл

Файл: main.py Проект: andersoncvlh/basicgame-py

import robo
import robo3d
import reward


def check_reward(robot, rewards):
    ok = False
    for reward in rewards:
        if reward.x == robot.x and reward.y == robot.y:
            print('O robo achou a recompensa %s' % reward.name)
            ok = True
    return ok


rew1 = reward.Reward(5, 5, 'Moeda')
rew2 = reward.Reward(2, 2, 'Gasolina')
rewardList = [rew1, rew2]

robo1 = robo.Robo(5, 7)
print(type(robo1))
print('X igual a %s' % robo1.x)
print('Y igual a %s' % robo1.y)

robo2 = robo3d.Robo3D(5, 5, 10)
print(type(robo2))
print('X igual a %s' % robo2.x)
print('Y igual a %s' % robo2.y)
print('Z igual a %s' % robo2.z)

robo1.move_down()
robo1.move_down()

Пример #10

0

Показать файл

data = read.read2(data_path)

# Estimate the policy parameters

policy = gp.GibbsPolicy(env, T, 2.)

#trace = policy.fit(data, 200)

#print(trace[-1])
#print(policy.get_theta())

#plt.plot([t[0] for t in trace])
#plt.plot([t[2] for t in trace])
#plt.show()

policy.set_theta(np.array([-18, -1, 18]))

dx = 10

reward = rew.Reward(dx, dx, env)

girl = irl.GIRL(reward, policy)
trajs = girl.import_data(data)
alphas = girl.solve(trajs)
reward.set_params(alphas)

reward.plot()

reward.export_to_file(write_path)
#plot_reward(reward, 'GIRL')

Пример #11

0

Показать файл

Файл: reward.py Проект: carloshtobar/ScalingMechanismEPC

import matplotlib.pyplot as plt
import math as mt


class Reward(object):
    def __init__(self):
        self.reward = 0

    def get_reward(self, x):
        x = 1000 * x
        if x < 0.3:
            self.reward = 2 / (1 + mt.exp(-20 * (x - 0.20))) - 1
        else:
            self.reward = -2 / (1 + mt.exp(-20 * (x - 0.9))) + 1
        return self.reward


import reward as rw

plt.close('all')

r = rw.Reward()

x = np.linspace(0.00001, 0.002, 100)
y = np.zeros(len(x))
for i in np.arange(len(x)):
    y[i] = r.get_reward(x[i])

plt.plot(x, y)
plt.grid()
plt.show()

Пример #12

0

Показать файл

Файл: shore.py Проект: j18chu/garrison

import reward
import location

oldMan = questGiver.QuestGiver(
    "Old Man",
    "Doesn't matter where I came from. Doesn't matter where you came from, for that matter. All that matters is you're here to help me.",
    [
        quest.Quest(
            'Destroy the Brush',
            'An evil enemy known as the brush has taken over that hill over there. Go clear it for me, will you? I will know you did the deed if you bring me 12 grass clumps. There might even \nbe something in it for you.',
            'Have you destroyed the brush over on that hill over there and brought me my 12 grass clumps?',
            "Not bad, not bad. Here's the reward I promised you. You can also keep half the grass you collected.",
            reward.Reward("oldMan",
                          globals.player,
                          50,
                          100,
                          50,
                          [item.Item("old gold coin", "it's a coin", 5, 15)],
                          logs=10,
                          grass=6), lambda player: player.level >= 1,
            lambda player: player.inventory.removeGrass(12))
    ], globals.player.pLocation)


class ForgottenShore(location.Location):
    def __init__(self):
        location.Location.__init__(
            self, "Abandoned Shoals", "The Forgotten Shore",
            "The first thing you feel when you awake is a splitting headache. You slowly open your eyes, taking in the sand below you, the palm trees ringing your vision, and the small\ncrabs scuttling around you. You shut your eyes and massage your temples, trying to recall what happened. The last thing you remember was sailing the Blue Seas with your crew, being\nuniversally feared, and then the crash. You open your eyes up again, this time noticing the wreckage around you.\n"
        )

    def update(self):

Python Reward примеры использования