Пример #1
0
def test_outofbounds():
    env = World(10, 10, 0.2)
    free_tiles = env.list_available_tiles()
    new_node = [Node(i, 84, 0, None, env) for i in free_tiles]

    for elem in new_node:
        assert (elem.tile_pos >= 0 and elem.tile_pos <= (env.L * env.H))
Пример #2
0
def main(args):

    with tf.Session() as sess:
        env = World()

        np.random.seed(int(args['random_seed']))
        tf.set_random_seed(int(args['random_seed']))

        state_dim = env.observation_space.shape[0]
        action_dim = env.action_space.shape[0]
        action_bound = env.action_space.high
        # Ensure action bound is symmetric

        assert (env.action_space.high == -env.action_space.low)

        actor = ActorNetwork(sess, state_dim, action_dim, action_bound,
                             float(args['actor_lr']), float(args['tau']),
                             int(args['minibatch_size']))

        saver = tf.train.Saver(max_to_keep=5)
        latest_checkpoint = tf.train.latest_checkpoint('./models/ddpg/')
        saver.restore(sess, latest_checkpoint)

        while (True):
            observation, reward, done = env.reset(), 0, False
            throw_away_action = actor.predict([observation])  # prime network.

            while (True):
                action = actor.predict([observation])
                observation, reward, done = env.step(action)
                if observation == False:  # some sort of error communicating w Arduino
                    done = False  # prob not needed (already false) but just to be safe
                    break  # lets try another rollout
                if done:
                    break
Пример #3
0
class BasicWorldTest( WorldSetup ):
	""" """

	def setUp(self):
		""" """

		WorldSetup.setUp( self )
		self.world = World( self.savePath, self.settings, self.blockTypes
				, self.blockTypeIds, self.players, self.playerIds
				)

		return None

	def toChunkPos(self):
		""" """

		p = Point3( 17, 20, 9 )
		newP = self.world.toChunkPos( p )
		self.assertEqual( newP, Point3(0, 0, 0), '' )

		p = Point3( 66, 30, 201 )
		newP = self.world.toChunkPos( p )
		self.assertEqual( newP, Point3(1, 0, 3  ) )

		p = Point3( -17, -20, -9 )
		newP = self.world.toChunkPos( p )
		self.assertEqual( newP, Point3(0, 0, 0), '' )

		p = Point3( -66, -30, -201 )
		newP = self.world.toChunkPos( p )
		self.assertEqual( newP, Point3(-1, 0, -3) )

		return None
Пример #4
0
def test_availability():
    env = World(10, 10, 0.2)
    free_tiles = env.list_available_tiles()

    new_node = [Node(i, 84, 0, None, env) for i in free_tiles]

    for elem in new_node:
        assert (env.w[elem.tile_pos] != 1)
Пример #5
0
  def __init__(self, args):
    self.args = args
    self.env = World()

    # Pin Setup:
    self.buttonPin = 4
    GPIO.setmode(GPIO.BCM)
    GPIO.setup(self.buttonPin, GPIO.IN, pull_up_down=GPIO.PUD_UP)
Пример #6
0
def simulate_cruise_control(controller, hill=False):
    # Setup
    car = Car(constants.CAR_MASS)
    traj = Reference(CRUISE_CONTROL_TEST, offset=car.length/2)
    if hill:
        world = World(constants.HILL_SLOPE, hill=True)
    else:
        world = World(constants.GROUND_HEIGHT)

    # Simulate and animate
    results = simulate(car, world, traj, controller, slope=hill)
    animate(world, [results])

    return results
Пример #7
0
def test_uniqueness():
    env = World(10, 10, 0.2)
    free_tiles = env.list_available_tiles()
    nodes = [Node(i, 84, 0, None, env) for i in free_tiles]

    # list of positions
    pos = []
    for elem in nodes:
        pos.append(elem.tile_pos)

    # check uniqueness
    seen = set()
    uniqueness = not any(k in seen or seen.add(k) for k in pos)

    assert (uniqueness == True)
Пример #8
0
def generate_world():
    length = random.randint(10, 50)
    height = random.randint(10, 50)
    w_percentage = random.random() / 4
    env = World(length, height, w_percentage)
    print(str(length) + "x" + str(height) + "x" + str(w_percentage))
    return env
Пример #9
0
    def __init__(self, start, target, allow_diagonals, World):
        self.start = Node(start, target, 0, None, World, True, allow_diagonals)
        self.target = Node(target, target, -1, None, World, True,
                           allow_diagonals)

        self.open_nodes = [self.start]
        self.closed_nodes = []

        self.last_node = None

        self.reached = False
        self.available_tiles = World.list_available_tiles()

        while (not (self.start.is_accessible())):
            stdout.write("\033[;1m" + "\033[1;31m")
            stdout.write('START Tile have no children, choose another one ! ')
            stdout.write("\033[0;0m")
            start = int(input("New START Tile --->  "))
            self.start = Node(start, target, 0, None, World, False,
                              allow_diagonals, True)
        while (not (self.target.is_accessible())):
            stdout.write("\033[;1m" + "\033[1;31m")
            stdout.write('TARGET Tile have no children, choose another one ! ')
            stdout.write("\033[0;0m")
            target = int(input("New TARGET Tile --->  "))
            self.target = Node(target, target, 0, None, World, False,
                               allow_diagonals, True)

        self.path = [self.start.tile_pos]
        self.costs = [0]
Пример #10
0
	def setUp(self):
		""" """

		WorldSetup.setUp( self )
		self.world = World( self.savePath, self.settings, self.blockTypes
				, self.blockTypeIds, self.players, self.playerIds
				)

		return None
Пример #11
0
    def runAll(self, n_times):
        env = self.env

        costLists = {}
        for alg in self.algorithms.keys():
            costLists[alg] = []

        timeLists = {}
        for alg in self.algorithms.keys():
            timeLists[alg] = []

        for i in range(n_times):
            # Computation
            for alg in self.algorithms.keys():
                path, time = self.algorithms[alg]()
                costLists[alg].append(path["Costs"][-1])
                timeLists[alg].append(time)
                print(alg + " successfully computed.")

            self.setEnv(World(env.L, env.H, env.pWalls), self.diagonals)

        self.plotPaths()

        return costLists, timeLists
Пример #12
0
    pathfinder.plotPaths()


def computeAndDisplayDijkstra(pathfinder):
    path, time = pathfinder.computePathDijkstra()
    pathfinder.plotPaths()


def computeAndDisplayDFS(pathfinder):
    path, time = pathfinder.computePathDFS()
    pathfinder.displayEnv()


def computeAndDisplayBidirAStar(pathfinder):
    path, time = pathfinder.computePathBidirAStar()
    pathfinder.plotPaths()


def showComparisonPlots(pathfinder, test_samples):
    pathfinder_api.benchmark(test_samples, True, True)


if __name__ == "__main__":
    env = World(filename="worlds/colliders.csv")
    pathfinder_api = PathFinder(env)
    #pathfinder_api.displayEnvFigure()
    showComparisonPlots(pathfinder_api, 10)
    #computeAndDisplayDFS(pathfinder_api)
    #computeAndDisplayAStar(pathfinder_api)
    #computeAndDisplayDijkstra(pathfinder_api)
    #computeAndDisplayBidirAStar(pathfinder_api)
Пример #13
0
import sys
import time

if __name__ == '__main__':
    pygame.init()

    WIDTH = 800
    HEIGHT = 800

    SCREEN_SIZE = (WIDTH, HEIGHT)

    clock = pygame.time.Clock()
    screen = pygame.display.set_mode(SCREEN_SIZE, 0, 32)
    pixels_per_sec = 10

    world = World(SCREEN_SIZE)

    gen_h = True
    gen_c = True

    # Add Creatures
    for i in range(50):
        world.add_creture(Plant(world))
        time.sleep(0.01)

    for i in range(15):
        world.add_creture(Herbivore(world, gender=gen_h))
        time.sleep(0.01)
        world.add_creture(Carnivore(world, gender=gen_c))
        time.sleep(0.01)
        gen_c = not gen_c
Пример #14
0
class TestWorld(unittest.TestCase):
    """
    Welcome to the test class for Conway's Game Of Life.

    The test cases that are layed out here are optional, but
    might make it easier to get started. It's recommended
    to start from top to bottom as the last test cases require more
    complete implementation. Try figure out WHAT you wish to test
    before figuring out implementation.

    The basic idea here:

    1) WRITE TEST CASE.
    2) WRITE IMPLEMENTATION.
    3) RUN TEST.
    4) GOTO 1.

    The target class (test subject) where we'll implement the expected code in
    should exist in the ./lib/ folder, whereas this file expects to
    live in ./tests/lib/ folder. Run your tests from "." (root
    of project folder).

    Read more about the unittest module by visiting:
    https://docs.python.org/3.5/library/unittest.html
    """

    def setUp(self):
        """
        The "setUp" method runs before every test,
        usefull for initializing test subjects.
        """
        self.world = World()

    def test_canary_test(self):
        """
        The canary test is nice for testing if unit tests executes correctly.

        Run all tests:
        $ python -m unittest discover
        """
        self.assertTrue(True)
        self.assertIsInstance(self.world, World)

    def test_world_is_x_width(self):
        """
        Implement the constructor in World class so
        the class gets its property then remove the decorator.

        You can test this specific test alone by running:
        $ python -m unittest tests.lib.test_world.TestWorld.test_world_is_x_width
        """
        self.assertEqual(self.world.x_width, 50, "World width not 50")

    def test_world_is_y_heigth(self):
        self.assertEqual(self.world.y_height, 50, "World height not 50")

    def test_world_has_set_for_live_cells(self):
        self.assertIsInstance(self.world.live_cells, set, 'Live cells set missing')

    def test_world_supports_setting_initial_set_of_live_cells(self):
        world = World(live_cells = set([(1,2), (3,4)]))
        self.assertEqual(len(world.live_cells), 2, "Wrong number of cells")
        myset = set([(1,2), (3,4)])
        self.assertEqual(world.live_cells, myset, "Cells not equal")

    def test_autopopulate_world(self):
        self.world.autopopulate()
        self.assertEqual(len(self.world.live_cells), 25, "Not autopopulated")

    def test_world_can_be_erased(self):
        self.world.autopopulate()
        self.world.clearcells()
        self.assertEqual(len(self.world.live_cells), 0, "Not cleared")

    def test_add_single_cell_to_the_world(self):
        self.world.addcell(5,5)
        self.assertIn((5,5), self.world.live_cells)

    def test_add_single_cell_outside_world_bounderies_should_fail(self):
        with self.assertRaises(WorldBoundariesError):
            self.world.addcell(-2,-2)
        with self.assertRaises(WorldBoundariesError):
            self.world.addcell(999,999)

    def test_world_can_check_if_cell_coordinate_is_legal(self):
        self.assertTrue(self.world.checkpos(2,3))
        self.assertIsInstance(self.world.checkpos(-2,-3), str)
        self.assertIsInstance(self.world.checkpos(999,999), str)

    def test_world_can_count_neighbors_of_a_cell(self):
        self.world.clearcells()
        self.world.addcell(5,5)
        self.world.addcell(6,5)
        self.world.addcell(5,6)
        self.assertEqual(self.world.countneighbours(6,6), 3)
        self.assertEqual(self.world.countneighbours(5,5), 2)
        self.assertEqual(self.world.countneighbours(5,7), 1)
        self.assertEqual(self.world.countneighbours(7,7), 0)

    def test_world_correctly_count_neighbors_even_at_edge_of_map(self):
        self.world.clearcells()
        self.world.addcell(0,0)
        self.assertEqual(self.world.countneighbours(49,49), 1)
        self.world.clearcells()
        self.world.addcell(49,49)
        self.assertEqual(self.world.countneighbours(0,0), 1)

    def test_world_generate_new_population_glider_pattern(self):
        """
        Glider is a famous game of life pattern.

        .O
        ..O => O.O
        OOO    .OO
               .O

        http://www.conwaylife.com/wiki/Glider
        """
        glider_step_one = set([
            (5, 5),
            (6, 6),
            (4, 7),
            (5, 7),
            (6, 7)
            ])
        glider_step_two = set([
            (4, 6),
            (6, 6),
            (5, 7),
            (6, 7),
            (5, 8)
            ])
        self.world.live_cells = glider_step_one
        self.world.generate_new_generation()
        self.assertEqual(self.world.live_cells, glider_step_two, "New population didn't match expected pattern")

    def test_world_generate_new_population_claw_with_tail(self):
        """
        Claw with tail is a still life pattern that doesn't change.

        OO        OO
        .O        .O
        .O.OO  => .O.OO
        ..O..O    ..O..O
        ....OO    ....OO

        http://www.conwaylife.com/wiki/Claw_with_tail
        """
        claw_step_one = set([
            (2,2),
            (3,2),
            (3,3),
            (3,4),
            (5,4),
            (6,4),
            (4,5),
            (7,5),
            (6,6),
            (7,6)
            ])
        claw_step_two = set([
            (2,2),
            (3,2),
            (3,3),
            (3,4),
            (5,4),
            (6,4),
            (4,5),
            (7,5),
            (6,6),
            (7,6)
            ])
        self.world.live_cells = claw_step_one
        self.world.generate_new_generation()
        self.assertEqual(self.world.live_cells, claw_step_two, "New population didn't match expected pattern")
Пример #15
0
 def setUp(self):
     """
     The "setUp" method runs before every test,
     usefull for initializing test subjects.
     """
     self.world = World()
Пример #16
0
import numpy as np
from lib.world import World
world = World()
print(world.getObservation())
Пример #17
0
import numpy as np
import time
print("Loading world...")
from lib.world import World
print("instantiating world...")
world = World()
world.updateMotors(.20)
time.sleep(1)
world.updateMotors(0)


Пример #18
0
debug = False
lib.planner.debug = debug


def int_handler(signal, frame):
    planner_timeout()


signal.signal(signal.SIGINT, int_handler)

timer = threading.Timer(140, planner_timeout)
timer.start()

try:
    w_init = World()
    w_init.parse(sys.stdin.readlines())
    if debug:
        w_init.show_full()

    w = w_init.clone()
    goals = [(dist(l, w.robot), l) for l in w.lambdas]
    plan, w2 = make_plan(w)

    print plan
except:
    pass

timer.cancel()

# plan = "LDRDDUULLLDDL"
Пример #19
0
 def __init__(self, args):
     self.args = args
     self.env = World()
Пример #20
0
import numpy as np
from lib.world import World

world = World()
world.arduino.give_robot_slack()
Пример #21
0
	savePath = Filename( 'saveData/' )

	files = { 'main': Filename('config/settings.cfg') }
	configs = {}

	for s in ['players', 'playerIds', 'blockTypes', 'blockTypeIds']:
		files[ s ] = savePath + s + '.dat'

	for name, filename in files.items(): files[name] = FileObject( filename )
	for name, fileObj in files.items(): configs[ name ] = Config( fileObj )
	for config in configs.values(): config.process()

	scene = Scene()

	playerFactory = PlayerFactory( configs['players'], configs['playerIds'] )
	blockTypeFactory = BlockTypeFactory( configs['blockTypes']
			, configs['blockTypeIds'], scene.loader
			)

	playerFactory.process()
	blockTypeFactory.process()

	world = World( configs['main'], playerFactory, blockTypeFactory )
	world.setup()

	# assume we are at position (0, 0, 0)
	for chunk in world.getChunks( Vec3(0, 0, 0) ): chunk.load( scene.render )

	scene.run()
Пример #22
0
class Agent:
  def __init__(self, args):
    self.args = args
    self.env = World()

    # Pin Setup:
    self.buttonPin = 4
    GPIO.setmode(GPIO.BCM)
    GPIO.setup(self.buttonPin, GPIO.IN, pull_up_down=GPIO.PUD_UP)

  def print_episode_results(self, ep_index, action_lengths):
    print("Episode {0}. Steps {1}. Avg {2:.2f}".format(
      ep_index,
      action_lengths[-1],
      np.average(action_lengths[-10:])
    ))

  def run(self):
    with tf.Graph().as_default(), tf.Session() as session:
      # policy = Policy(session, self.env.observation_space.shape[0], self.env.action_space.low[0], self.env.action_space.high[0])
      policy = Policy(session, self.env.observation_space.shape[0], -0.7, 0.7)
      session.run(tf.global_variables_initializer())
      log_name = datetime.datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d_%H_%M_%S')
      writer = tf.summary.FileWriter("./logs/" + log_name, graph=tf.get_default_graph())
      saver = tf.train.Saver(max_to_keep=5)
      ep_index = 0
      action_lengths = []
      if self.args['restore'] or self.args['resume']:
        print("Restoring from saved model.")
        saver.restore(session, tf.train.latest_checkpoint('./models/'))
        file = open("./models/at_episode.txt", "r")
        ep_index = int(file.read())
        print("Restoring to episode: {}".format(ep_index))
        file.close()
        file = open("./models/avg_length.txt", "r")
        avg_length = float(file.read())
        print("Restoring to avg length: {}".format(avg_length))
        file.close()
        for x in range (0,10):
          action_lengths.append(avg_length)
      while(True):
        observations, actions, rewards = self.policy_rollout(policy)
        if observations == False:
          print("Errored on getObservation. Restarting Episode...")
          continue
        if len(actions) < 5:
          print("Skipping short {}-step rollout".format(len(actions)))
          continue # this was almost certainly a run with robot not ready
        if len(action_lengths) < 10:
          action_lengths.append(len(actions))
          print("Skipping until we have 10 episode lengths saved. {} Saved so far.".format(len(action_lengths)))
          continue
        returns = self.discount_rewards(rewards)
        returns = returns / returns[0]
        relative = len(actions) - np.average(action_lengths[-10:])
        returns = returns * relative
        # returns = (returns - np.mean(returns)) / (np.std(returns) + 1e-10)
        print('\a') # episode done bell
        print("waiting on Beaker button to update Params...")
        while 1:
            if GPIO.input(self.buttonPin): # button is released
              a = 1
               # do Nothing
            else: # button is pressed:
              print("button pressed! Updating Params!")
              print('\a')
              break
        time.sleep(0.5) # ghetto debouncing
        summaries = policy.update_parameters(observations, actions, returns)
        action_lengths.append(len(actions))
        avg_length = np.average(action_lengths[-10:])
        self.log_scalar('avg_length', avg_length, ep_index, writer)
        writer.add_summary(summaries, global_step=ep_index)
        writer.flush()
        self.print_episode_results(ep_index, action_lengths)
        ep_index = ep_index + 1
        if ep_index > 0:
          print("=====> Saving model")
          saver.save(
            session,
            './models/my_model',
            global_step=ep_index,
            write_meta_graph=False)
          file = open("./models/at_episode.txt","w")
          file.write(str(ep_index))
          file.close
          file = open("./models/avg_length.txt","w")
          file.write(str(avg_length))
          file.close


  def policy_rollout(self, policy):
    while True:
      observation, reward, done = self.env.reset(), 0, False
      observations, actions, rewards  = [], [], []
      throw_away_action = policy.select_action(observation) # prime network.

      step = 0
      last_time = current_milli_time()
      while not done:
        time_delta = current_milli_time() - last_time
        print("Local Loop Time: {}, Arduino Loop Time: {}".format(time_delta, self.env.outerDt))
        last_time = current_milli_time()
        action = policy.select_action(observation)
        time_delta = current_milli_time() - last_time
        observations.append(observation)
        actions.append(action)
        print("obsv: {}, action: {}, oldTargetRadPerSec: {}".format(observation, action, self.env.targetRPS))
        observation, reward, done = self.env.step(action)
        if observation == False: # some sort of error communicating w Arduino
            done = False # prob not needed (already false) but just to be safe
            break # lets try another rollout
        time_delta = current_milli_time() - last_time
        rewards.append(reward)
        step = step + 1

      if done:
        self.env.updateMotors(0)
        return observations, actions, rewards

  def discount_rewards(self, rewards):
    discounted_rewards = np.zeros_like(rewards)
    running_add = 0
    for t in reversed(range(0, len(rewards))):
      running_add = running_add * 0.99 + rewards[t]
      discounted_rewards[t] = running_add
    return discounted_rewards

  def log_scalar(self, tag, value, step, writer):
    summary = tf.Summary(value=[tf.Summary.Value(tag=tag, simple_value=value)])
    writer.add_summary(summary, step)
Пример #23
0
import numpy as np
import time
from lib.world import World
world = World()
world.reset()
Пример #24
0
import time
import datetime
from lib.world import World

world = World()

while True:
    observation = world.getObservation()
    print('.')
    if (world.isDone()):
        print("\nDone!\n")
        world.episodeStartTime = datetime.datetime.now()
        world.arduino.resetRobot()
    time.sleep(0.25)
Пример #25
0
def main():
    """
    Prepara las ventanas, define modelos, controlador y vista y corre el programa.
    :return: void
    """

    # Se obtiene el checksum del juego
    checksum = [
        path_checksum('lib', VERBOSE),
        '8e1fd1c03d2bfe89d7dbdab8b0c4c69a'.upper(),
        path_checksum('bin', VERBOSE)
    ]

    # Se cargan las configuraciones
    control_config = Configloader(DIR_CONFIG + 'control.ini', verbose=VERBOSE)
    game_config = Configloader(DIR_CONFIG + 'game.ini', verbose=VERBOSE)
    map_config = Configloader(DIR_CONFIG + 'map.ini', verbose=VERBOSE)
    score_config = Configloader(DIR_CONFIG + 'scoreboard.ini', verbose=VERBOSE)
    user_config = Configloader(DIR_CONFIG + 'user.ini', verbose=VERBOSE)
    view_config = Configloader(DIR_CONFIG + 'view.ini', verbose=VERBOSE)
    window_config = Configloader(DIR_CONFIG + 'window.ini', verbose=VERBOSE)
    world_config = Configloader(DIR_CONFIG + 'world.ini', verbose=VERBOSE)

    # Se carga el idioma
    lang = langs.Langloader(game_config.getValue('LANG'))

    # Se carga la información de la pantalla del cliente
    display_info = pygame.display.Info()

    # Se comprueba que el nombre de jugador no sea Player, si no es valido se pide uno nuevo
    if not username.validate(user_config.getValue('NAME')):
        new_name = username.request(lang.get(111), lang.get(112))
        if new_name is not username.NO_VALID_NAME:
            user_config.setParameter('NAME', new_name)
            user_config.export()
        else:
            utils.destroy_process()

    # Creación de ventana
    window = Window(window_config, lang.get(10),
                    pygame.image.load(getIcons('icon')), display_info)
    clock = pygame.time.Clock()  # Reloj
    fps = int(game_config.getValue('FPS'))  # FPS a dibujar

    # Se crea el mundo
    world = World(world_config,
                  map_config,
                  window,
                  checksum,
                  score_config,
                  user_config,
                  lang,
                  game_config,
                  verbose=VERBOSE)
    # world.load_map(1)

    # Se crean los menús de inicio y pause
    menus = Createuimenu(lang, window, world, game_config, user_config,
                         view_config, window_config, world_config, map_config)

    # Se crea la vista
    vista = View(window, clock, world, lang, view_config, menus)
    menus.addView(vista)

    # Se crea el controlador
    control = Controller(world,
                         clock,
                         lang,
                         control_config,
                         window,
                         menus,
                         verbose=VERBOSE)
    menus.addController(control)
    vista.add_controller(control)

    # Se lanza el mainloop
    while True:
        clock.tick(fps)
        vista.draw(control.event_loop())
Пример #26
0
class Agent:
    def __init__(self, args):
        self.args = args
        self.env = World()

    def print_episode_results(self, ep_index, action_lengths):
        print("Episode {0}. Steps {1}. Avg {2:.2f}".format(
            ep_index, action_lengths[-1], np.average(action_lengths[-10:])))

    def run(self):
        with tf.Graph().as_default(), tf.Session() as session:
            # policy = Policy(session, self.env.observation_space.shape[0], self.env.action_space.low[0], self.env.action_space.high[0])
            policy = Policy(session, self.env.observation_space.shape[0], -0.7,
                            0.7)
            session.run(tf.global_variables_initializer())
            writer = tf.summary.FileWriter("./logs/",
                                           graph=tf.get_default_graph())
            saver = tf.train.Saver(max_to_keep=5)
            batch = 0
            if self.args['restore'] or self.args['resume']:
                print("Restoring from saved model.")
                saver.restore(session, tf.train.latest_checkpoint('./models/'))
                file = open("./models/at_batch.txt", "r")
                batch = int(file.read())
                file.close()

            while (True):
                print('=====\nBATCH {}\n===='.format(batch))
                batch_observations, batch_actions, batch_rewards = [], [], []
                ep_lengths = []
                global_episode = 0
                for ep_index in range(10):
                    observations, actions, rewards = self.policy_rollout(
                        policy)
                    batch_observations.extend(observations)
                    batch_actions.extend(actions)
                    advantages = [len(rewards)] * len(rewards)
                    batch_rewards.extend(advantages)
                    ep_length = len(actions)
                    ep_lengths.append(ep_length)
                    global_episode = (ep_index + 1) + (10 * batch)
                    print('Episode {} steps: {}'.format(
                        global_episode, ep_length))
                batch_rewards = (batch_rewards - np.mean(batch_rewards)) / (
                    np.std(batch_rewards) + 1e-10)
                policy.update_parameters(batch_observations, batch_actions,
                                         batch_rewards)
                print("AVG: {0:.2f}".format(np.mean(np.array(ep_lengths))))
                saver.save(session,
                           './models/my_model',
                           global_step=global_episode,
                           write_meta_graph=False)
                file = open("./models/at_batch.txt", "w")
                batch = batch + 1
                file.write(str(batch))
                file.close

    def policy_rollout(self, policy):
        observation, reward, done = self.env.reset(), 0, False
        observations, actions, rewards = [], [], []
        throw_away_action = policy.select_action(observation)  # prime network.

        step = 0
        last_time = current_milli_time()
        while not done:
            time_delta = current_milli_time() - last_time
            print("Loop Time: {}".format(time_delta))
            last_time = current_milli_time()
            if step == 2:
                self.env.arduino.give_robot_slack()
                time_delta = current_milli_time() - last_time
            action = policy.select_action(observation)
            time_delta = current_milli_time() - last_time
            observations.append(observation)
            actions.append(action)
            print("obsv: {}, action: {}".format(observation, action))
            observation, reward, done = self.env.step(action)
            time_delta = current_milli_time() - last_time
            rewards.append(reward)
            step = step + 1

        return observations, actions, rewards

    def discount_rewards(self, rewards):
        discounted_rewards = np.zeros_like(rewards)
        running_add = 0
        for t in reversed(range(0, len(rewards))):
            running_add = running_add * 0.99 + rewards[t]
            discounted_rewards[t] = running_add
        return discounted_rewards