print 'episode:', episode, 'step:', step, 'eps:', epsilon, 'ave:', time / 100., 'Q:', Q[ 0] time = 0. #t = deepcopy(Q) if step < 100: continue sample = [Memory.ReplayMemory[(Memory.count - 1) % 10**6]] #sample(16) #sample = [] #for i in range(10): # sample.append(Memory.ReplayMemory[np.random.randint(0,min(10**6,Memory.count-1))]) #sample = Memory.sample(32) #print len(sample) loss = 0 for s in sample: Q = model(s[0]) t = deepcopy(Q) if s[4]: t[0][s[1]] = s[2] loss = Q - t model.update(loss) else: next_Q = target_model(s[3], save=False) t[0][s[1]] = s[2] + gamma * np.max(next_Q[0]) loss = Q - t model.update(loss) #model.update(loss) if step % 10 == 0: target_model = deepcopy(model)
loss = 0 for i in tqdm(range(6000000)): #if train_label[i%60000]>1: # continue #count2 += 1 #inp = randint(0,2,(1,2)) inp = np.zeros((1, 784)) inp[0] = train_data[i % 60000] y = model(inp) t = np.zeros((1, 10)) #t[0][0] = train_label[i%60000] t[0][train_label[i % 60000]] = 1. #t = np.zeros((1,1)) #if int(inp[0][0]) ^ int(inp[0][1]): # t[0][0] = 1. #inp = inp.astype(np.float32) loss += y - t if i % 100: model.update(loss / 100.) loss = 0 #print loss #if y[0][0] > 0.5 and t[0][0] > 0.5 or y[0][0] < 0.5 and t[0][0] < 0.5: # count += 1 #print np.argmax(y[0]) #print y[0] if np.argmax(y[0]) == train_label[i % 60000]: count += 1 if i % 60000 == 0 and i != 0: print count / 60000. count = 0