Exemplo n.º 1
0
        print 'episode:', episode, 'step:', step, 'eps:', epsilon, 'ave:', time / 100., 'Q:', Q[
            0]
        time = 0.

    #t = deepcopy(Q)
    if step < 100:
        continue
    sample = [Memory.ReplayMemory[(Memory.count - 1) % 10**6]]  #sample(16)
    #sample = []
    #for i in range(10):
    #    sample.append(Memory.ReplayMemory[np.random.randint(0,min(10**6,Memory.count-1))])
    #sample = Memory.sample(32)
    #print len(sample)
    loss = 0
    for s in sample:
        Q = model(s[0])
        t = deepcopy(Q)
        if s[4]:
            t[0][s[1]] = s[2]
            loss = Q - t
            model.update(loss)
        else:
            next_Q = target_model(s[3], save=False)
            t[0][s[1]] = s[2] + gamma * np.max(next_Q[0])
            loss = Q - t
            model.update(loss)
    #model.update(loss)

    if step % 10 == 0:
        target_model = deepcopy(model)
Exemplo n.º 2
0
loss = 0
for i in tqdm(range(6000000)):
    #if train_label[i%60000]>1:
    #    continue
    #count2 += 1
    #inp = randint(0,2,(1,2))
    inp = np.zeros((1, 784))
    inp[0] = train_data[i % 60000]
    y = model(inp)
    t = np.zeros((1, 10))
    #t[0][0] = train_label[i%60000]
    t[0][train_label[i % 60000]] = 1.
    #t = np.zeros((1,1))
    #if int(inp[0][0]) ^ int(inp[0][1]):
    #    t[0][0] = 1.
    #inp = inp.astype(np.float32)
    loss += y - t
    if i % 100:
        model.update(loss / 100.)
        loss = 0
    #print loss
    #if y[0][0] > 0.5 and t[0][0] > 0.5 or y[0][0] < 0.5 and t[0][0] < 0.5:
    #        count += 1
    #print np.argmax(y[0])
    #print y[0]
    if np.argmax(y[0]) == train_label[i % 60000]:
        count += 1
    if i % 60000 == 0 and i != 0:
        print count / 60000.
        count = 0