def initScript(): # Get parameters and inputs lists GDict['parameters'] = mp.ModelParameters() GDict['parameters'].make_param(GObject, GSolver, GDict) GDict['parameters'].get_parameters() GDict['parameters'].get_model_inputs() GDict['script'] = GDict['parameters'].script_name GDict['script_input'] = GDict['parameters'].script_input # Create socket GDict['worker'] = wrk.Worker(GDict['parameters'].port) # Get reward object GDict['reward'] = reward.Reward(GObject, GSolver, GDict, MVec3) # Get observation params param_values = GDict['parameters'].get_parameters_values() # Send request with init worker state request = [param_values, 0, True, 'in'] GDict['worker'].send(request) # Get reply that reset successful reply = GDict['worker'].recv() #print("Finished", reply) GDict['done'] = False GDict['episodeTime'] = GDict['parameters'].episode_duration GDict['routerState'] = 'st' GDict['delay'] = GDict['parameters'].delay GDict['oldTime'] = 0 GDict['reward'].custom_start()
def reputationUp(self): if self.inventory.reputation >= 2000 and self.inventory.reputationName == 'Neutral': self.inventory.reputation -= 2000 print( "You have become friendly with the Pirate's Alliance. As a token of their generosity, they gave 50 gold and 100 experience, as well as a cool looking hat." ) reward = reward.Reward( "Pirate's Alliance", 100, 50, 0, [item.Item("old pirate hat", "old but still gold", 22, 35)])
def init(): global playerObj, aiObj, rewardObj global screen os.environ["SDL_VIDEO_CENTERED"] = '1' pygame.init() screen = pygame.display.set_mode((st.windowWidth, st.windowHeight)) playerObj = player.Player() aiObj = ai.AI() rewardObj = rwd.Reward()
def main(): score = 0 running = True moving = False is_invincible = False is_lengthenboard = False score_font = pygame.font.Font("font/font.ttf", 36) creat_brick() # 生成动画精灵 boards = board.Board(bg_size) group_board.add(boards) balls = ball.Ball(bg_size) group_ball.add(balls) # 定义计时器 INVINCIBLE_TIME = USEREVENT LENGTHENBOARD_TIME = USEREVENT + 1 SPEEDUPBALL_TIME = USEREVENT + 2 # 设置循环播放 pygame.mixer.music.play(-1) # 开始主循环 while running: for event in pygame.event.get(): if event.type == QUIT: pygame.quit() sys.exit() if event.type == MOUSEBUTTONDOWN: if event.button == 1: # 仅当鼠标在板内方可移动 mouse_position = pygame.mouse.get_pos() if boards.rect[0] < mouse_position[0] < boards.rect[0] + boards.rect[2] and \ boards.rect[1] < mouse_position[1] < boards.rect[1] + boards.rect[3]: moving = True if event.type == MOUSEBUTTONUP: if event.button == 1: moving = False if event.type == INVINCIBLE_TIME: is_invincible = False pygame.time.set_timer(INVINCIBLE_TIME, 0) if event.type == LENGTHENBOARD_TIME: boards.change_object() is_lengthenboard = False pygame.time.set_timer(LENGTHENBOARD_TIME, 0) # 球速恢复 if event.type == SPEEDUPBALL_TIME: for balls in group_ball: if balls.ball_speed not in ([3, 3], [3, -3], [-3, 3], [-3, -3]): balls.ball_speed[0] //= 2 balls.ball_speed[1] //= 2 pygame.time.set_timer(SPEEDUPBALL_TIME, 0) screen.blit(background, (0, 0)) # 板与加长板的互相转化 if moving: mouse_position = pygame.mouse.get_pos() if 450 <= mouse_position[1] <= 573 and 800 >= mouse_position[0] >= 50 and not is_lengthenboard: boards.rect = (mouse_position[0] - 62, 535, boards.rect[2], boards.rect[3]) elif 450 <= mouse_position[1] <= 573 and 825 >= mouse_position[0] >= 65 and is_lengthenboard: boards.rect = (mouse_position[0] - 90, 535, boards.rect[2], boards.rect[3]) group_board.add(boards) if not is_lengthenboard: screen.blit(boards.current_image, boards.rect) if is_lengthenboard: screen.blit(boards.current_image, boards.rect) # 对每个小球进行碰撞检测 for balls in group_ball: # 球速加倍 if balls.is_speedupball: balls.ball_speed[0] *= 2 balls.ball_speed[1] *= 2 balls.is_speedupball = False # 检测球与边界的碰撞 if balls.rect.left < 0: # 碰撞后赋值为0,防止反弹后仍然碰撞从而卡在边界上 balls.rect.left = 0 balls.ball_speed[0] = -balls.ball_speed[0] balls.is_hit = False elif balls.rect.right > width: balls.rect.right = width balls.ball_speed[0] = -balls.ball_speed[0] balls.is_hit = False elif balls.rect.bottom > height: if is_invincible: balls.rect.bottom = height balls.ball_speed[1] = -balls.ball_speed[1] balls.is_hit = False elif not is_invincible: group_ball.remove(balls) elif balls.rect.top < 0: balls.rect.top = 0 balls.ball_speed[1] = -balls.ball_speed[1] balls.is_hit = False # 检测球与板的碰撞 hit_ball = pygame.sprite.spritecollide(balls, group_board, False, pygame.sprite.collide_mask) if hit_ball: # 小球反弹木板碰撞方向检测 board_hit_top = board_collide(balls.rect, boards.rect, balls.ball_speed) if not balls.is_hit: if not board_hit_top: balls.ball_speed[0] = -balls.ball_speed[0] balls.ball_speed[1] = -balls.ball_speed[1] else: balls.ball_speed[1] = -balls.ball_speed[1] balls.is_hit = True # 检测球与方块的碰撞 hit_board = pygame.sprite.spritecollide(balls, group_brick, False, pygame.sprite.collide_mask) if hit_board: for each in hit_board: brick_hit_top = brick_collide(balls.rect, each.rect, balls.ball_speed) if not brick_hit_top: balls.ball_speed[0] = -balls.ball_speed[0] else: balls.ball_speed[1] = -balls.ball_speed[1] if each.curimage == each.image1: score += 10 group_brick.remove(each) elif each.curimage == each.image2: score += 20 each.curimage = each.image1 elif each.curimage == each.image3: score += 30 each.curimage = each.image2 rewards = reward.Reward(bg_size) rewards.generate_reward() balls.is_hit = False if rewards.current_image != rewards.image5: rewards.rect = balls.rect group_reward.add(rewards) # 移动小球 balls.rect = balls.rect.move(balls.ball_speed) screen.blit(balls.image1, balls.rect) # 奖励机制 for rewards in group_reward: hit_reward = pygame.sprite.spritecollide(rewards, group_board, False, pygame.sprite.collide_mask) if hit_reward: # 无敌 if rewards.current_image == rewards.image1: is_invincible = True pygame.time.set_timer(INVINCIBLE_TIME, 10 * 1000) # 板加长(10s) elif rewards.current_image == rewards.image2: if is_lengthenboard == False: boards.change_object() is_lengthenboard = True pygame.time.set_timer(LENGTHENBOARD_TIME, 10 * 1000) # 球数+1 elif rewards.current_image == rewards.image3: balls = ball.Ball(bg_size) group_ball.add(balls) # 球速度+5(10s) elif rewards.current_image == rewards.image4: for balls in group_ball: if balls.ball_speed in ([3, 3], [3, -3], [-3, 3], [-3, -3]): balls.is_speedupball = True pygame.time.set_timer(SPEEDUPBALL_TIME, 10 * 1000) group_reward.remove(rewards) if rewards.rect.bottom > height: group_reward.remove(rewards) screen.blit(rewards.current_image, rewards.rect) rewards.rect = rewards.rect.move(reward_speed) # 画砖块 for bricks in group_brick: if bricks.curimage == bricks.image4: group_brick.remove(bricks) screen.blit(bricks.curimage, bricks.rect) # 统计得分 score_text = score_font.render("Score : %s" % str(score), True, (255, 255, 255)) screen.blit(score_text, (5, 530)) pygame.display.flip() clock.tick(FPS)
def main(): score = 0 is_remove = False running = True is_invincible = False is_lengthenboard = False score_font = pygame.font.Font("font/font.ttf", 36) creat_brick() creat_health() # 生成动画精灵 boards = board.Board(bg_size) # 生成挡板 group_board.add(boards) balls = ball.Ball(boards) # 生成球 balls.ball_speed = [0, 0] is_moving = False group_ball.add(balls) # 定义计时器 INVINCIBLE_TIME = USEREVENT #24 LENGTHENBOARD_TIME = USEREVENT + 1 SPEEDUPBALL_TIME = USEREVENT + 2 # 设置循环播放 # pygame.mixer.music.play(-1) # 开始主循环 while running: for event in pygame.event.get(): if event.type == QUIT: #pygame.quit() sys.exit() if event.type == pygame.KEYDOWN: if event.key == pygame.K_SPACE and not is_moving: is_moving = True balls.ball_speed = [3, -3] if event.type == INVINCIBLE_TIME: is_invincible = False pygame.time.set_timer(INVINCIBLE_TIME, 0) if event.type == LENGTHENBOARD_TIME: boards.change_object(boards.rect.left) is_lengthenboard = False pygame.time.set_timer(LENGTHENBOARD_TIME, 0) # 球速恢复 if event.type == SPEEDUPBALL_TIME: for balls in group_ball: if balls.ball_speed not in ([3, 3], [3, -3], [-3, 3], [-3, -3]): balls.ball_speed[0] //= 2 balls.ball_speed[1] //= 2 pygame.time.set_timer(SPEEDUPBALL_TIME, 0) screen.blit(background, (0, 0)) # 板与加长板的互相转化 boardSpeed = 0 if event.type == pygame.KEYDOWN and is_moving: if event.key == pygame.K_LEFT: boardSpeed = -8 if boards.rect.left > 0 else 0 elif event.key == pygame.K_RIGHT: boardSpeed = 8 if boards.rect.right < width else 0 boards.rect.left += boardSpeed group_board.add(boards) if not is_lengthenboard: screen.blit(boards.current_image, boards.rect) if is_lengthenboard: screen.blit(boards.current_image, boards.rect) # 对每个小球进行碰撞检测 for balls in group_ball: # 球速加倍 if balls.is_speedupball: balls.ball_speed[0] *= 2 balls.ball_speed[1] *= 2 balls.is_speedupball = False # 检测球与边界的碰撞 if balls.rect.left < 0: ball_wall_sound.play() # 碰撞后赋值为0,防止反弹后仍然碰撞从而卡在边界上 balls.rect.left = 0 balls.ball_speed[0] = -balls.ball_speed[0] balls.is_hit = False elif balls.rect.right > width: ball_wall_sound.play() balls.rect.right = width balls.ball_speed[0] = -balls.ball_speed[0] balls.is_hit = False elif balls.rect.bottom > height: if is_invincible: balls.rect.bottom = height balls.ball_speed[1] = -balls.ball_speed[1] balls.is_hit = False elif not is_invincible: group_ball.remove(balls) if len(group_ball) == 0: if len(group_healths) > 1: is_remove = True balls = ball.Ball(boards) group_ball.add(balls) is_moving = False balls.ball_speed = [0, 0] group_board.remove(boards) group_board.add(boards) else: sys.exit() #balls.rect.bottom = height balls.ball_speed[1] = -balls.ball_speed[1] balls.is_hit = False elif balls.rect.top < 0: ball_wall_sound.play() balls.rect.top = 0 balls.ball_speed[1] = -balls.ball_speed[1] balls.is_hit = False # 检测球与板的碰撞 hit_ball = pygame.sprite.spritecollide(balls, group_board, False, pygame.sprite.collide_mask) if hit_ball: # 小球反弹木板碰撞方向检测 board_hit_top = board_collide(balls.rect, boards.rect, balls.ball_speed) if not balls.is_hit: ball_wall_sound.play() if not board_hit_top: balls.ball_speed[0] = -balls.ball_speed[0] balls.ball_speed[1] = -balls.ball_speed[1] else: balls.ball_speed[1] = -balls.ball_speed[1] balls.is_hit = True # 检测球与方块的碰撞 hit_board = pygame.sprite.spritecollide(balls, group_brick, False, pygame.sprite.collide_mask) if hit_board: for each in hit_board: brick_hit_top = brick_collide(balls.rect, each.rect, balls.ball_speed) if not brick_hit_top: balls.ball_speed[0] = -balls.ball_speed[0] else: balls.ball_speed[1] = -balls.ball_speed[1] ball_brick_sound.play() if each.curimage == each.image1: score += 10 group_brick.remove(each) if len(group_brick) == 0: pass #new_game elif each.curimage == each.image2: score += 20 each.curimage = each.image1 elif each.curimage == each.image3: score += 30 each.curimage = each.image2 rewards = reward.Reward(bg_size) rewards.generate_reward() balls.is_hit = False if rewards.current_image != rewards.image5: rewards.rect = balls.rect group_reward.add(rewards) # 移动小球 balls.rect = balls.rect.move(balls.ball_speed) screen.blit(balls.image1, balls.rect) # 奖励机制 for rewards in group_reward: hit_reward = pygame.sprite.spritecollide( rewards, group_board, False, pygame.sprite.collide_mask) if hit_reward: # 无敌 if rewards.current_image == rewards.image1: is_invincible = True pygame.time.set_timer(INVINCIBLE_TIME, 10 * 1000) # 板加长(10s) elif rewards.current_image == rewards.image2: if is_lengthenboard == False: boards.change_object(boards.rect.left) is_lengthenboard = True pygame.time.set_timer(LENGTHENBOARD_TIME, 10 * 1000) # 球数+1 elif rewards.current_image == rewards.image3: balls = ball.Ball(boards) group_ball.add(balls) # 球速度+5(10s) elif rewards.current_image == rewards.image4: for balls in group_ball: if balls.ball_speed in ([3, 3], [3, -3], [-3, 3], [-3, -3]): balls.is_speedupball = True pygame.time.set_timer(SPEEDUPBALL_TIME, 10 * 1000) group_reward.remove(rewards) if rewards.rect.bottom > height: group_reward.remove(rewards) screen.blit(rewards.current_image, rewards.rect) rewards.rect = rewards.rect.move(reward_speed) #画生命值 for healths in group_healths: if is_remove: group_healths.remove(healths) is_remove = False screen.blit(healths.image, healths.rect) # 画砖块 for bricks in group_brick: if bricks.curimage == bricks.image4: group_brick.remove(bricks) screen.blit(bricks.curimage, bricks.rect) # 统计得分 score_text = score_font.render("Score : %s" % str(score), True, (255, 255, 255)) screen.blit(score_text, (5, 530)) pygame.display.flip() clock.tick(FPS)
startingX = World_width / 2 startingY = World_height / 2 #breeding conditions initiation_max_change = [0.5, 1] # list of all the bots that where generated, a bot is added when they die all_bots = [] # create a visualiser visWin = vis.Display(World_width, World_height) #rewards #create the cirlce for the reward apple = reward.Reward(World_width, World_height) vis_apple = visWin._createCircle(apple.position[0], apple.position[1], reward.Radius, reward.Colour) # genereate the initial group of bots initialising_time = 0 alive_bots = [] i = 0 while i < number_of_bots_alive: brainNum = "_yellow" colour = 'yellow' if i % 2 == 0: brainNum = "_blue" colour = 'blue' initial_bot = bot.Bot(initialising_time,
def world_highway_truck_cut_in(initial_states='truck_cut_in_far_overtaking', interaction_data=None, init_planner=True): # If very long horizon, increase bracket depth in Theano compilation to avoid # "fatal error: bracket nesting level exceeded maximum of 256." if config.HORIZON > 10: th.config.gcc.cxxflags = '-fbracket-depth=1024' # lanes left_lane = lane.StraightLane([0., -1.], [0., 1.], constants.LANE_WIDTH_VIS) right_lane = left_lane.shifted(-1) lanes = [left_lane, right_lane] # roads roads = [left_lane] # fences (road boundaries) right_fence = lane.Fence([0., -1.], [0., 1.], constants.LANE_WIDTH_VIS, side=1) left_fence = lane.Fence([0., -1.], [0., 1.], constants.LANE_WIDTH_VIS, side=-1) fences = [right_fence.shifted(-1.5), left_fence.shifted(0.5)] # dynamics dyn = dynamics.CarDynamics # asynchronous setting config.ASYNCHRONOUS = False # MATLAB file setup fine_behind_h = config.FINE_BEHIND_H fine_behind_r = config.FINE_BEHIND_R human_beta = config.HUMAN_BETA strat_val_data_dir = config.TRUCK_CUT_IN_STRATEGIC_VALUE_DATA_DIR if fine_behind_h == True: if human_beta is None: print('Not using strategic value.') elif human_beta == float('inf'): # then rational human (= deterministic). mat_name = strat_val_data_dir + 'DSG_fine_det.mat' else: beta_str = '%.2E' % Decimal(config.HUMAN_BETA) mat_name = strat_val_data_dir + 'beta_{0}/DSG_fine_beta_{0}.mat'.format( beta_str) else: if human_beta is None: print('Not using strategic value.') elif human_beta == float('inf'): # then rational human (= deterministic). mat_name = strat_val_data_dir + 'DSG_not_fine_det.mat' else: beta_str = '%.2E' % Decimal(config.HUMAN_BETA) mat_name = strat_val_data_dir + 'beta_{0}/DSG_fine_beta_{0}.mat'.format( beta_str) # Strategic state projection (use strategic grid dimension to get the correct # projection) proj = eval('projection.ProjectionTruckCutInStrategicValue{0}D'.format(config.STRAT_DIM))() # Initial states x0_r, x0_h, x0_t = get_initial_states(initial_states) # Robot car setup #ref_speed_r = constants.METERS_TO_VIS * 35.0 ref_speed_r = constants.METERS_TO_VIS * 34.0 # speed for gap creation if config.ROBOT_CAR == 'car.HierarchicalCar': robot_car = car.HierarchicalCar(x0_r, constants.DT, dyn, constants.CAR_CONTROL_BOUNDS, horizon=config.HORIZON, color=constants.COLOR_R, name=constants.NAME_R, mat_name=mat_name, use_second_order=config.USE_SECOND_ORDER, proj=proj, strat_dim=config.STRAT_DIM) elif config.ROBOT_CAR == 'car.NestedCar': robot_car = car.NestedCar(x0_r, constants.DT, dyn, constants.CAR_CONTROL_BOUNDS, horizon=config.HORIZON, color=constants.COLOR_R, name=constants.NAME_R, use_second_order=config.USE_SECOND_ORDER) elif config.ROBOT_CAR == 'car.PredictReactCar': robot_car = car.PredictReactCar(x0_r, constants.DT, dyn, constants.CAR_CONTROL_BOUNDS, horizon=config.HORIZON, color=constants.COLOR_R, name=constants.NAME_R) elif config.ROBOT_CAR == 'car.IteratedBestResponseCar': robot_car = car.IteratedBestResponseCar(x0_r, constants.DT, dyn, constants.CAR_CONTROL_BOUNDS, horizon=config.HORIZON, color=constants.COLOR_R, name=constants.NAME_R) else: print('"{0}" is currently an unsupported robot car type'.format(config.ROBOT_CAR)) sys.exit() # Human car setup ref_speed_h = constants.METERS_TO_VIS * 31. # x0_h[3] human_car = eval(config.HUMAN_CAR)(x0_h, constants.DT, dyn, constants.CAR_CONTROL_BOUNDS, horizon=config.HORIZON, color=constants.COLOR_H, name=constants.NAME_H) # Truck setup truck = car.Truck(x0_t, constants.DT, dyn, constants.CAR_CONTROL_BOUNDS, horizon=config.HORIZON, color=constants.COLOR_TRUCK, name=constants.NAME_TRUCK) # Information structure robot_car.human = human_car # robot creates its own traj for human if human_car.is_follower: # give follower car access to the robot car human_car.robot = robot_car human_car.traj_r = robot_car.traj # human knows the robot traj robot_car.truck = truck human_car.truck = truck # world setup cars = [robot_car, human_car, truck] name = 'world_highway_truck_cut_in' world = World(name, constants.DT, cars, robot_car, human_car, lanes, roads, fences, interaction_data=interaction_data) # rewards w_lanes = [4., 2.] w_control = -0.1 w_bounded_control_h = -50.0 # bounded control weight for human w_bounded_control_r = -50.0 # bounded control weight for robot # Rewards and strategic value modeled by the robot # (for both human and robot, respectively) if config.R_BELIEF_H_KNOWS_TRAJ_R: # robot believes human knows robot trajectory robot_r_h_traj = robot_car.traj else: # robot believes human doesn't know robot trajectory robot_r_h_traj = robot_car.traj_linear robot_r_h = reward.Reward(world, [robot_r_h_traj], other_truck_trajs=[truck.traj], w_lanes=w_lanes, w_control=w_control, w_bounded_control=w_bounded_control_h, speed=ref_speed_h, fine_behind=fine_behind_h, is_human=True)#, # strategic_value_mat_name=mat_name, robot_car=robot_car, # proj_np=proj_np, proj_th=proj_th) robot_r_r = reward.Reward(world, [robot_car.traj_h], other_truck_trajs=[truck.traj], w_lanes=w_lanes, w_control=w_control, w_bounded_control=w_bounded_control_r, speed=ref_speed_r, fine_behind=fine_behind_r)#, # strategic_value_mat_name=mat_name, robot_car=robot_car, # proj_np=proj_np, proj_th=proj_th) if config.PREDICT_HUMAN_IGNORES_ROBOT: # Reward for a human that ignores the existence of the robot. robot_r_h = reward.Reward(world, other_car_trajs=[], w_lanes=w_lanes, w_control=w_control, w_bounded_control=w_bounded_control_h, speed=ref_speed_h, fine_behind=fine_behind_h, is_human=True) robot_car.reward = robot_r_r robot_car.reward_h = robot_r_h if config.ROBOT_CAR == 'car.HierarchicalCar': # Robot's model of the human strategic value robot_strat_val_h = StrategicValue(robot_car.traj, robot_car.traj_h, proj, mat_name, config.STRAT_DIM, config.STRATEGIC_VALUE_SCALE, min_val=config.MIN_STRAT_VAL, max_val=config.MAX_STRAT_VAL, traj_truck=truck.traj) # Robot's strategic value # TODO: just trying out human_car.traj to debug heatmap vis # robot_strat_val = StrategicValue(robot_car.traj, human_car.traj, # proj, mat_name, config.STRAT_DIM, config.STRATEGIC_VALUE_SCALE, # min_val=config.MIN_STRAT_VAL, max_val=config.MAX_STRAT_VAL) robot_strat_val = StrategicValue(robot_car.traj, robot_car.traj_h, proj, mat_name, config.STRAT_DIM, config.STRATEGIC_VALUE_SCALE, min_val=config.MIN_STRAT_VAL, max_val=config.MAX_STRAT_VAL, traj_truck=truck.traj) robot_car.strat_val = robot_strat_val robot_car.strat_val_h = robot_strat_val_h # Rewards and strategic value modeled by the human WHEN SIMULATED # (for both human and robot, respectively) human_r_h = reward.Reward(world, [human_car.traj_r], other_truck_trajs=[truck.traj], w_lanes=w_lanes, w_control=w_control, w_bounded_control=w_bounded_control_h, speed=ref_speed_h, fine_behind=fine_behind_h, is_human=True)#, # strategic_value_mat_name=mat_name, robot_car=robot_car, # proj_np=proj_np, proj_th=proj_th) human_r_r = reward.Reward(world, [human_car.traj], other_truck_trajs=[truck.traj], w_lanes=w_lanes, w_control=w_control, w_bounded_control=w_bounded_control_r, speed=ref_speed_r, fine_behind=fine_behind_r)#, # strategic_value_mat_name=mat_name, robot_car=robot_car, # proj_np=proj_np, proj_th=proj_th) if config.HUMAN_IGNORES_ROBOT: human_r_h = reward.Reward(world, [], other_truck_trajs=[truck.traj], w_lanes=w_lanes, w_control=w_control, w_bounded_control=w_bounded_control_h, speed=ref_speed_h, fine_behind=fine_behind_h, is_human=True)#, human_car.reward = human_r_h human_car.reward_r = human_r_r if config.ROBOT_CAR == 'car.HierarchicalCar': # Human's strategic value human_strat_val = StrategicValue(human_car.traj_r, human_car.traj, proj, mat_name, config.STRAT_DIM, config.STRATEGIC_VALUE_SCALE, min_val=config.MIN_STRAT_VAL, max_val=config.MAX_STRAT_VAL, traj_truck=truck.traj) # Human's model of the robot strategic value human_strat_val_r = StrategicValue(human_car.traj_r, human_car.traj, proj, mat_name, config.STRAT_DIM, config.STRATEGIC_VALUE_SCALE, min_val=config.MIN_STRAT_VAL, max_val=config.MAX_STRAT_VAL, traj_truck=truck.traj) human_car.strat_val = human_strat_val human_car.strat_val_r = human_strat_val_r # set min and max strategic values config.MIN_STRAT_VAL = config.STRATEGIC_VALUE_SCALE * min( [min(robot_strat_val_h.vH_grid.flatten()), min(robot_strat_val_h.vR_grid.flatten()), min(robot_strat_val.vH_grid.flatten()), min(robot_strat_val.vR_grid.flatten()), min(human_strat_val.vH_grid.flatten()), min(human_strat_val.vR_grid.flatten()), min(human_strat_val_r.vH_grid.flatten()), min(human_strat_val_r.vR_grid.flatten())]) config.MAX_STRAT_VAL = config.STRATEGIC_VALUE_SCALE * max( [max(robot_strat_val_h.vH_grid.flatten()), max(robot_strat_val_h.vR_grid.flatten()), max(robot_strat_val.vH_grid.flatten()), max(robot_strat_val.vR_grid.flatten()), max(human_strat_val.vH_grid.flatten()), max(human_strat_val.vR_grid.flatten()), max(human_strat_val_r.vH_grid.flatten()), max(human_strat_val_r.vR_grid.flatten())]) # print the configuration pp = pprint.PrettyPrinter(indent=2) pp.pprint(world.get_config()) # initialize planners if init_planner: print('Initializing planners.') for c in world.cars: if hasattr(c, 'init_planner'): print 'Initializing planner for ' + c.name c.init_planner(config.INIT_PLAN_SCHEME[c.name]) print '\n' return world
# -*- coding: utf-8 -*- """ Created on Sat Aug 5 16:26:47 2017 @author: Leonardo """ #O programa ira percorrerer uma matriz 10x10 e receberá recompensas. #Utilzando Orientação a Objetos OO import robo as r import reward as re import random #import robo3d as r3d rw1_random = re.Reward(random.randint(0, 10), random.randint(0, 10), 'Bateria') rw2_random = re.Reward(random.randint(0, 10), random.randint(0, 10), 'Oleo') rw3_random = re.Reward(random.randint(0, 10), random.randint(0, 10), 'Munição') rewards = [rw1_random, rw2_random, rw3_random] robot = r.Robo(random.randint(0, 10), random.randint(0, 10)) print(robot) print(rewards) for i in range(10): moviment = input('Digite up, down, left ou right para o movimento: ') if moviment == 'up': robot.move_up() elif moviment == 'down': robot.move_down()
import robo import robo3d import reward def check_reward(robot, rewards): ok = False for reward in rewards: if reward.x == robot.x and reward.y == robot.y: print('O robo achou a recompensa %s' % reward.name) ok = True return ok rew1 = reward.Reward(5, 5, 'Moeda') rew2 = reward.Reward(2, 2, 'Gasolina') rewardList = [rew1, rew2] robo1 = robo.Robo(5, 7) print(type(robo1)) print('X igual a %s' % robo1.x) print('Y igual a %s' % robo1.y) robo2 = robo3d.Robo3D(5, 5, 10) print(type(robo2)) print('X igual a %s' % robo2.x) print('Y igual a %s' % robo2.y) print('Z igual a %s' % robo2.z) robo1.move_down() robo1.move_down()
data = read.read2(data_path) # Estimate the policy parameters policy = gp.GibbsPolicy(env, T, 2.) #trace = policy.fit(data, 200) #print(trace[-1]) #print(policy.get_theta()) #plt.plot([t[0] for t in trace]) #plt.plot([t[2] for t in trace]) #plt.show() policy.set_theta(np.array([-18, -1, 18])) dx = 10 reward = rew.Reward(dx, dx, env) girl = irl.GIRL(reward, policy) trajs = girl.import_data(data) alphas = girl.solve(trajs) reward.set_params(alphas) reward.plot() reward.export_to_file(write_path) #plot_reward(reward, 'GIRL')
import matplotlib.pyplot as plt import math as mt class Reward(object): def __init__(self): self.reward = 0 def get_reward(self, x): x = 1000 * x if x < 0.3: self.reward = 2 / (1 + mt.exp(-20 * (x - 0.20))) - 1 else: self.reward = -2 / (1 + mt.exp(-20 * (x - 0.9))) + 1 return self.reward import reward as rw plt.close('all') r = rw.Reward() x = np.linspace(0.00001, 0.002, 100) y = np.zeros(len(x)) for i in np.arange(len(x)): y[i] = r.get_reward(x[i]) plt.plot(x, y) plt.grid() plt.show()
import reward import location oldMan = questGiver.QuestGiver( "Old Man", "Doesn't matter where I came from. Doesn't matter where you came from, for that matter. All that matters is you're here to help me.", [ quest.Quest( 'Destroy the Brush', 'An evil enemy known as the brush has taken over that hill over there. Go clear it for me, will you? I will know you did the deed if you bring me 12 grass clumps. There might even \nbe something in it for you.', 'Have you destroyed the brush over on that hill over there and brought me my 12 grass clumps?', "Not bad, not bad. Here's the reward I promised you. You can also keep half the grass you collected.", reward.Reward("oldMan", globals.player, 50, 100, 50, [item.Item("old gold coin", "it's a coin", 5, 15)], logs=10, grass=6), lambda player: player.level >= 1, lambda player: player.inventory.removeGrass(12)) ], globals.player.pLocation) class ForgottenShore(location.Location): def __init__(self): location.Location.__init__( self, "Abandoned Shoals", "The Forgotten Shore", "The first thing you feel when you awake is a splitting headache. You slowly open your eyes, taking in the sand below you, the palm trees ringing your vision, and the small\ncrabs scuttling around you. You shut your eyes and massage your temples, trying to recall what happened. The last thing you remember was sailing the Blue Seas with your crew, being\nuniversally feared, and then the crash. You open your eyes up again, this time noticing the wreckage around you.\n" ) def update(self):