def run(self, agent): Input.Reset(keyboard) time.sleep(3) start = time.time() states = [] actions = [] s, running = getScrn() while True: a = agent.act(s) actions.append(a) states.append([s]) Input.Act(keyboard, a) s_, running = getScrn() if not running: s_ = list(np.zeros((10, 40)).flatten()) agent.step() s = s_ if not running: break R = (time.time() - start - 1.75) * 100 print 'Reward On Try {}:'.format(self.i), R r = R / len(states) for j in range(len(states)): agent.observe( states[j], actions[j], r * (((len(states) - j)**(1 / 8)) / (len(states)**(1 / 8)))) agent.replay() time.sleep(0.5)
while (1): running = True epsilon = MIN_EPSILON + (MAX_EPSILON - MIN_EPSILON) * math.exp( -LAMBDA * steps) cgame = [] steps += 1 Input.Reload(keyboard) start = time.time() while (running): im, running = getScrn(sess) ctime = time.time() - start a = act(sess, im, ctime, epsilon) Input.Act(keyboard, a) cgame.append([im, ctime, a]) for i in range(5): try: del cgame[-1] except: pass score = len(cgame) same = False for i in lens: if len(cgame) == i: same = True