print "Resetting" long_press(0) long_press(0) # Main loop for epoch in range(MAX_EPOCHS): print "New epoch: %d\n" % epoch reset() for step in range(MAX_STEPS): # Keep note of the fact that we don't have the concept of an episode unlike nathan's implementation image = get_observation() best_action = choose_action(step) # get best possible action from the current neural network images.push(image) actions.push(best_action) # If the current state is dead, push 0 reward and mark state as terminal. then reset and continue loop execution if am_i_dead(): terminals.push(1) rewards.push(0) reset() continue # Still alive, still alive! terminals.push(0) # long press the best action because humans press keys for longer durations reward = long_press(best_action) rewards.push(reward)
list1.Find("3") print("Reseting the RingBuffer to capacity 0") list1 = RingBuffer(0) print("Trying to insert_keep_new(1)") list1.insert_keep_new("1") print("-----------------------") print("Test Case of Stack") print( "Assumed Stack capacity is 5 for testing. All the test cases below are according to size 5" ) sizeOfS = int(input("Enter size of Stack")) list1 = Stack(sizeOfS) print("Adding 1") list1.push(1) print("Adding 2") list1.push(2) print("Adding 3") list1.push(3) list1.__str__() print("poping ", list1.peek()) list1.pop() list1.__str__() print("Pushing 4") list1.push(4) print("Pushing 5") list1.push(5) list1.__str__() print("Pushing 6") list1.push(6)
#long_press(0) # Main loop for epoch in range(MAX_EPOCHS): print "New epoch: %d\n" % epoch reset(epoch) for step in range(MAX_STEPS): # Keep note of the fact that we don't have the concept of an episode unlike nathan's implementation image = get_observation() best_action = choose_action(image, step, epoch) # get best possible action from the current neural network images.push(image) actions.push(best_action) # If the current state is dead, push 0 reward and mark state as terminal. then reset and continue loop execution if am_i_dead(): terminals.push(1) rewards.push(0) reset(epoch) continue # Still alive, still alive! terminals.push(0) # long press the best action because humans press keys for longer durations reward = long_press(best_action) rewards.push(reward)