Exemplo n.º 1
0
 def train(self, trainSeq, validSeq, nEpochs=800, epochLen=175, validateEvery=25, vbs=500, printEvery=5, noiseSigma=0.4):
     print('-- Starting Training (nE=' + str(nEpochs) + ',eL=' + str(epochLen) + ') --')
     optimizer = optim.Adam(self.parameters(), lr = 0.03 * epochLen / 150.0)
     ns, na, tenv = self.stateSize, self.actionSize, trainSeq.env
     for epoch in range(nEpochs):
         if epoch % printEvery == 0: print('Epoch:',epoch, end='')
         loss = 0.0
         self.zero_grad() # Zero out gradients
         for i in range(epochLen):
             self.reInitialize() # Reset LSTM hidden state
             seq,label = trainSeq.randomTrainingPair() # Current value
             seq = [ s + npr.randn(len(s))*noiseSigma for s in seq ]
             seq = [ avar(torch.from_numpy(s).float()) for s in seq] 
             seq = [ torch.cat([self.stateSoftmax(sa[0:ns], tenv), F.softmax(sa[ns:ns+na])]) for sa in seq ]
             seqn = torch.cat(seq).view(len(seq), 1, -1) # [seqlen x batchlen x featureLen]
             prediction = self.forward(seqn)#[-1,:]
             label = avar(torch.from_numpy(label).float())
             loss += self._lossFunction(prediction, label, env=tenv)
         loss.backward()
         optimizer.step()
         if epoch % printEvery == 0: print(" -> AvgLoss",str(loss.data[0] / epochLen))
         if epoch % validateEvery == 0:
             bdata,blabels,bseqlen = validSeq.next(vbs,nopad=True)
             acc1, _ = self._accuracyBatch(bdata,blabels,validSeq.env)
             bdata,blabels,bseqlen = trainSeq.next(vbs,nopad=True)
             acc2, _ = self._accuracyBatch(bdata,blabels,tenv)
             print('\tCurrent Training Acc (est) =', acc1)
             print('\tCurrent Validation Acc (est) =', acc2)
     # Check training & final validation accuracy
     print('----')
     nmax = 5000 # Num from total to check at the end
     totalTrainAcc,nt = self._accuracyBatch(trainSeq.unpaddedData()[0:nmax],trainSeq.labels[0:nmax],trainSeq.env)
     print('Final Train Acc ('+str(nt)+'):',totalTrainAcc)
     totalValidAcc,nv = self._accuracyBatch(validSeq.unpaddedData()[0:nmax],validSeq.labels[0:nmax],validSeq.env)
     print('Final Validation Acc ('+str(nv)+'):',totalValidAcc)
 def getLossFromAllNodes(self,
                         alpha=0.5,
                         lambda_h=-0.025,
                         useHolder=False,
                         holderp=-5.0,
                         useOnlyLeaves=False,
                         gamma=0.01):
     targetNodes = self.allNodes
     if useOnlyLeaves: targetNodes = self.leaves
     totalInverseValue = avar(torch.FloatTensor([0.0]))
     totalEntropy = avar(torch.FloatTensor([0.0]))
     totalBranching = avar(torch.FloatTensor([0.0]))
     if not useHolder: holderp = 1.0
     nNodes = len(targetNodes)
     for i, node in enumerate(targetNodes):
         if i == 0:
             node.loss = avar(torch.FloatTensor([float('inf')]))
             continue
         if not node.branchingBreadth is None:
             totalBranching += node.branchingBreadth.type(
                 torch.FloatTensor)  # IGNORES PARENT TODO
         node.loss = -self.valueF(node.state)
         totalInverseValue += node.loss.pow(holderp)
         if not node.action is None:
             totalEntropy += -torch.sum(
                 node.action[0] * torch.log(node.action[0]))
     # Penalize negative reward and entropy
     totalLosses = alpha * (totalInverseValue / nNodes).pow(
         1.0 / holderp) + lambda_h * totalEntropy
     # Penalize too many branches
     totalLosses += gamma * totalBranching / nNodes
     return totalLosses
Exemplo n.º 3
0
 def train(self, trainSet, validSet, nEpochs=1500, batch_size=200, validateEvery=200, vbs=500, printEvery=200):
     optimizer = optim.Adam(self.parameters(), lr = 0.0003)
     state_size = self.stateSize
     lossFunction = nn.BCELoss()
     
     train_x, train_y = trainSet
     train_x = avar( torch.FloatTensor(train_x), requires_grad=False)
     train_y = avar( torch.FloatTensor(train_y), requires_grad=False)
     valid_x, valid_y = validSet 
     valid_x = avar( torch.FloatTensor(valid_x), requires_grad=False)
     valid_y = avar( torch.FloatTensor(valid_y), requires_grad=False)
     ntrain, nvalid = len(train_x), len(valid_x)
     
     def getRandomMiniBatch(dsx,dsy,mbs,nmax):
         choices = torch.LongTensor( np.random.choice(nmax, size=mbs, replace=False) )
         return dsx[choices], dsy[choices]
     for epoch in range(nEpochs):
         if epoch % printEvery == 0: print('Epoch:',epoch, end='')
         loss = 0.0
         self.zero_grad() # Zero out gradients
         batch_x, batch_y = getRandomMiniBatch(train_x,train_y,batch_size,ntrain)
         prediction = self.forward(batch_x) #[-1,:]
         label = batch_y.unsqueeze(dim=1)
         #print(label.shape, prediction.shape)
         loss = lossFunction(prediction, label)
         loss.backward()
         optimizer.step()
         if epoch % printEvery == 0: print(" -> AvgLoss",str(loss.data[0]/ batch_size))
         if epoch % validateEvery == 0:
             batch_vx, batch_vy = getRandomMiniBatch(valid_x,valid_y,batch_size,nvalid)
             predv = self.forward(batch_vx) #[-1,:]
             vy = batch_vy.unsqueeze(dim=1)
             acc = self._accuracyBatch(vy,predv)
             print("VACC (noiseless) =",'%.4f' % acc,end=', ')
             print('/n')
Exemplo n.º 4
0
    def train(self,trainSet,validSet,minibatch_size=200,maxIters=30000,testEvery=250,noiseSigma=0.2,
            noisyDataSetTxLoc=None,f_model_name=None):
        optimizer = optim.Adam(self.parameters(), lr = 0.0000025 * minibatch_size)
        lossf = nn.MSELoss() # nn.L1Loss() # nn.MSELoss() 
        train_x, train_y = trainSet 
        np.set_printoptions(precision=3)
        
        if not noisyDataSetTxLoc is None and os.path.exists(noisyDataSetTxLoc):
            print('Loading noised data (Note this ignores any changes to sigma)')
            with open(noisyDataSetTxLoc,'rb') as fff:
                train_x_noisy = pickle.load(fff)
        else:
            print('Noisifying data')
            train_x_noisy = self.noisify(train_x,noiseSigma)
            if not noisyDataSetTxLoc is None:
                print('Saving noised data to',noisyDataSetTxLoc)
                with open(noisyDataSetTxLoc,'wb') as fff:
                    pickle.dump(train_x_noisy, fff)
        np.set_printoptions()
        train_x = avar( torch.FloatTensor(train_x), requires_grad=False)
        train_x_noisy = avar( torch.FloatTensor(train_x_noisy), requires_grad=False)
        train_y = avar( torch.FloatTensor(train_y), requires_grad=False)
        valid_x, valid_y = validSet 
        valid_x = avar( torch.FloatTensor(valid_x), requires_grad=False)
        valid_y = avar( torch.FloatTensor(valid_y), requires_grad=False)
        ntrain, nvalid = len(train_x), len(valid_x)
        def getRandomMiniBatch(dsx,dsy,mbs,nmax):
            choices = torch.LongTensor( np.random.choice(nmax, size=mbs, replace=False) )
            return dsx[choices], dsy[choices]
        print('Starting training')
        switchTime = 0
        noiselessProb = 0.1
        for i in range(0,maxIters):
            self.zero_grad()
            if i == switchTime: print('Changing to noisy dataset')
            train = train_x_noisy if i > switchTime and npr.uniform() > noiselessProb else train_x
            x, y = getRandomMiniBatch(train,train_y,minibatch_size,ntrain)
            y_hat = self.forward(x)
            loss = lossf(y_hat, y)
            loss.backward()
            optimizer.step()
            if i % testEvery == 0:
                print('Epoch', str(i) + ': L_t =', '%.4f' % loss.data[0], end=', ')
                vx, vy = getRandomMiniBatch(valid_x,valid_y,2000,nvalid)
                predv = self.forward(vx)
                lossv = lossf(predv, vy)
                print('L_v =','%.4f' % lossv.data[0],end=', ')
                acc = self._accuracyBatch(vy,predv)
                print("VACC (noiseless) =",'%.4f' % acc,end=', ')

                tx, ty = getRandomMiniBatch(train_x_noisy,train_y,2000,ntrain)
                predt = self.forward(tx)
                acctn = self._accuracyBatch(ty,predt)
                print("TACC (noisy) =",'%.4f' % acctn)

                if not f_model_name is None:
                    torch.save(self.state_dict(), f_model_name)
 def getLossFromLeaves(self, lambda_h=-0.0):
     totalLosses = avar(torch.FloatTensor([0.0]))
     totalEntropy = avar(torch.FloatTensor([0.0]))
     for leaf in self.leaves:
         totalLosses += -self.valueF(leaf.state)
         totalEntropy += -torch.sum(
             leaf.action[0] * torch.log(leaf.action[0]))
     loss = totalLosses + lambda_h * totalEntropy
     return loss / len(self.leaves)
Exemplo n.º 6
0
        def getRandomMiniBatch(dsx,
                               dsy,
                               mbs,
                               nmax,
                               noiseType=1,
                               maxUniformNoiseLevel=0.001,
                               gaussianSigma=0.001):
            choices = npr.choice(nmax, size=mbs, replace=False)
            xs, ys = dsx[choices], dsy[choices]
            newMB_x = np.zeros((mbs, self.inputSize))
            newMB_y = np.zeros(mbs)
            if noiseType == 0:
                for i in range(mbs):
                    jx, jy = int(xs[i, 0]), int(ys[i, 0])
                    newMB_x[i, jx] = 1.0
                    newMB_x[i, -10:] = xs[i, -10:]
                    newMB_y[i] = jy
            elif noiseType == 1:
                if self.stateSize * maxUniformNoiseLevel > 1:
                    print('Noise level is untenable! Max =',
                          1.0 / self.stateSize)
                    sys.exit(0)
                for i in range(mbs):
                    unifNoisedState = npr.uniform(low=0.0,
                                                  high=maxUniformNoiseLevel,
                                                  size=self.stateSize)
                    jx, jy = int(xs[i, 0]), int(ys[i, 0])
                    spikeValue = 1.0 - np.sum(
                        unifNoisedState) + unifNoisedState[jx]
                    newMB_x[i, 0:self.stateSize] = unifNoisedState
                    newMB_x[i, jx] = spikeValue
                    newMB_x[i, 0:self.stateSize] /= np.sum(
                        newMB_x[i, 0:self.stateSize])
                    newMB_x[i, -10:] = xs[i, -10:]
                    newMB_y[i] = jy
                    # print(np.argmax(newMB_x[i,0:self.stateSize]),',',
                    #      np.sum(newMB_x[i, 0:self.stateSize]),',',
                    #      newMB_x[i,np.argmax(newMB_x[i,0:self.stateSize])])
            elif noiseType == 2:
                for i in range(mbs):
                    jx, jy = int(xs[i, 0]), int(ys[i, 0])
                    noise = gaussianSigma * npr.normal(size=self.stateSize)
                    newMB_x[i, jx] = 1.0
                    newMB_x[i, 0:self.stateSize] += noise
                    newMB_x[i, 0:self.stateSize] = Utils.softmax(
                        newMB_x[i, 0:self.stateSize])
                    newMB_x[i, -10:] = xs[i, -10:]
                    newMB_y[i] = jy
                    # print(newMB_x[i,0:self.stateSize])
                    # sys.exit(0)

            newMB_x = avar(torch.FloatTensor(newMB_x), requires_grad=False)
            newMB_y = avar(torch.LongTensor(newMB_y), requires_grad=False)
            return newMB_x, newMB_y
 def grow(self, node, d, b, verbose=False):
     if verbose: print('Grow depth: ', d)
     if verbose: self.env.printState(node.state[0].data.numpy())
     if d == self.maxDepth: return node
     if type(b) is int: b = avar(torch.LongTensor([b]))
     i = 0
     while (i < b.data).all():
         # Sample the current action
         hard_action, soft_a_s, new_branching_breadth, softBranching = self.simPolicy.sample(
             node.state)
         a_s = [torch.squeeze(hard_action)]
         inital_state = torch.squeeze(node.state)
         self.forwardModel.setHiddenState(node.hidden)
         current_state, _, current_hidden = self.forwardModel.forward(
             inital_state, a_s, 1)
         # Build the next subtre
         current_state = current_state.unsqueeze(dim=0)
         self.allStates.append(current_state)
         self.allActions.append(a_s)
         if verbose:
             print("int_state at depth", d)
             self.env.printState(node.state[0].data.numpy())
             print("a_s at depth ", d, " and breath", i)
             self.env.printAction(a_s[0])
             self.env.printAction(a_s[0])
             print("curr_state at depth", d)
             self.env.printState(current_state[0].data.numpy())
         childNode = Node(node, current_state, [soft_a_s], [hard_action],
                          current_hidden)
         self.allNodes.append(childNode)
         childNode.branchingBreadth = new_branching_breadth
         childNode.softBranching = F.softmax(softBranching, dim=1)
         node.addChild(self.grow(childNode, d + 1, new_branching_breadth))
         i += 1
     return node
Exemplo n.º 8
0
 def measureLossAtTestTime(self, useOnlyLeaves=False):
     targetNodes = self.allNodes
     if useOnlyLeaves: targetNodes = self.leaves
     for i, node in enumerate(targetNodes):
         if i == 0:
             node.loss = avar(torch.FloatTensor([float('inf')]))
             continue
         node.loss = -self.valueF(node.state)
Exemplo n.º 9
0
 def test(self, x, y=None):
     if not type(x) is avar:
         x = avar(torch.FloatTensor(x))
     print('Input State')
     s_0 = x[0:-10]
     self.printState(s_0, '\t')
     print('Input Action')
     self.printAction(x[-10:], '\t')
     print('Predicted Final State')
     yhat = self.forward(x)
     self.printState(yhat, '\t')
     if not y is None:
         if not type(y) is avar:
             y = avar(torch.FloatTensor(y))
         print('Actual Final State')
         self.printState(y, '\t')
         print('Acc: ', self._accuracySingle(y, yhat))
 def getBestPlanFromLeaves(self):
     bestInd, bestVal = 0, avar(torch.FloatTensor([float('-inf')
                                                   ]))  #float('-inf')
     for i, leaf in enumerate(self.leaves):
         currVal = self.valueF(leaf.state)
         if currVal.data.numpy() > bestVal.data.numpy():
             bestInd = i
             bestVal = currVal
     return self.getPathFromLeaf(bestInd)
Exemplo n.º 11
0
 def getLossFromLeaves(self, lambda_h=0.001):
     totalLosses = avar(torch.FloatTensor([0.0]))
     #totalLosses = avar(torch.FloatTensor(len(self.leaves)))
     for i, leaf in enumerate(self.leaves):
         #totalLosses[i] = -self.valueF( leaf.state )
         totalLosses += -self.valueF(leaf.state) + lambda_h * torch.sum(
             leaf.action[0] * torch.log(leaf.action[0]))
         #print(leaf.action[0].data.numpy().argmax(),-self.valueF( leaf.state ).data[0])
     return totalLosses / len(self.leaves)  #torch.min(totalLosses)
Exemplo n.º 12
0
def main():

    ###
    runTraining = True

    ###

    f_model_name = 'LSTM_FM_1_99'
    s = 'navigation'  # 'transport'
    # Read training/validation data
    print('Reading Data')
    trainf, validf = s + "-data-train-small.pickle", s + "-data-test-small.pickle"
    train, valid = SeqData(trainf), SeqData(validf)
    # Load forward model
    ForwardModel = LSTMForwardModel(train.lenOfInput, train.lenOfState)
    ForwardModel.load_state_dict(torch.load(f_model_name))
    # Initialize forward policy
    exampleEnv = generateTask(
        0, 0, 0, 3, 0)  # This takes about 10 sec to train & solve on my comp
    SimPolicy = SimulationPolicy(exampleEnv)
    # Run training
    if runTraining:
        maxDepth = 3
        SimPolicy.trainSad(
            exampleEnv,
            ForwardModel,
            printActions=True,
            maxDepth=maxDepth,
            # treeBreadth=2,
            eta_lr=0.001,  #0.000375,
            trainIters=500,
            alpha=0.5,
            lambda_h=-0.005,  #-0.0125, # negative = encourage entropy
            useHolder=True,
            holderp=-2.0,
            useOnlyLeaves=False,
            gamma=0.9  #1.5
        )

        # NOTE: the branching factor parameter here is merely the branching level AT THE PARENT
        # It has no effect anywhere else
        s_0 = torch.unsqueeze(avar(torch.FloatTensor(
            exampleEnv.getStateRep())),
                              dim=0)
        tree = Tree(s_0,
                    ForwardModel,
                    SimPolicy,
                    greedy_valueF,
                    exampleEnv,
                    maxDepth=maxDepth)  #, branchingFactor=2)
        tree.measureLossAtTestTime()
        states, actions = tree.getBestPlan()
        print('Final Actions')
        for i in range(len(actions)):
            jq = actions[i][0].data.numpy().argmax()
            print('A' + str(i) + ':', jq, NavigationTask.actions[jq])
 def getBestPlan(self, useOnlyLeaves=False):
     bestInd, bestVal = 0, avar(torch.FloatTensor([float('inf')
                                                   ]))  #float('-inf')\n",
     targetNodes = self.allNodes
     if useOnlyLeaves: targetNodes = self.leaves
     for i, node in enumerate(targetNodes):
         currVal = node.loss
         if currVal.data.numpy() < bestVal.data.numpy():
             bestInd = i
             bestVal = currVal
     return self.getPathFromNode(bestInd)
Exemplo n.º 14
0
 def runOnActionSequence(self, start_state, actions, hidden=None):
     steps = len(actions)
     outputs = avar(torch.zeros(
         steps, 1, self.stateSize))  # seqlen x batchlen x stateSize
     output = start_state
     for i in range(steps):
         action = actions[i]
         inputv = torch.cat([output, action.unsqueeze(0)], dim=1)
         output, hidden = self.step(inputv, hidden)
         outputs[i] = output
     return outputs, hidden
Exemplo n.º 15
0
    def train(self,
              trainSet,
              validSet,
              minibatch_size=120,
              maxIters=4000,
              testEvery=150):
        optimizer = optim.Adam(self.parameters(), lr=0.000002 * minibatch_size)
        lossf = nn.MSELoss()  # nn.L1Loss() # nn.MSELoss()
        train_x, train_y = trainSet
        train_x = avar(torch.FloatTensor(train_x), requires_grad=False)
        train_y = avar(torch.FloatTensor(train_y), requires_grad=False)
        valid_x, valid_y = validSet
        valid_x = avar(torch.FloatTensor(valid_x), requires_grad=False)
        valid_y = avar(torch.FloatTensor(valid_y), requires_grad=False)
        ntrain, nvalid = len(train_x), len(valid_x)

        def getRandomMiniBatch(dsx, dsy, mbs, nmax):
            choices = torch.LongTensor(
                np.random.choice(nmax, size=mbs, replace=False))
            return dsx[choices], dsy[choices]

        print('Starting training')
        for i in range(0, maxIters):
            self.zero_grad()
            x, y = getRandomMiniBatch(train_x, train_y, minibatch_size, ntrain)
            y_hat = self.forward(x)
            loss = lossf(y_hat, y)
            #print(i,loss)
            loss.backward()
            optimizer.step()
            if i % testEvery == 0:
                print('Epoch',
                      str(i) + ': L_t =',
                      '%.4f' % loss.data[0],
                      end=', ')
                vx, vy = getRandomMiniBatch(valid_x, valid_y, 2000, nvalid)
                predv = self.forward(vx)
                lossv = lossf(predv, vy)
                print('L_v =', '%.4f' % lossv.data[0], end=', ')
                acc = self._accuracyBatch(vy, predv)
                print("VACC =", '%.4f' % acc)
Exemplo n.º 16
0
    def generatePlanOld(self,start_state,env,eta=0.05,niters=None):
        x_t = avar( torch.randn(self.nacts,self.action_size) * self.start_sigma, requires_grad=True )
        deconStartState = env.deconcatenateOneHotStateVector(start_state)
        lossf = nn.CrossEntropyLoss()
        gx, gy = avar(torch.FloatTensor(deconStartState[-2])), avar(torch.FloatTensor(deconStartState[-1]))
        _,sindx = avar(torch.FloatTensor(deconStartState[0])).max(0)
        _,sindy = avar(torch.FloatTensor(deconStartState[1])).max(0)
        _,indx = gx.max(0)
        _,indy = gy.max(0)
        niters = self.niters if niters is None else niters
        for i in range(niters):
            # Generate soft action sequence
            epsilon = avar( torch.randn(self.nacts, self.action_size) * self.sigma )
            y_t = x_t + epsilon
            a_t = F.softmax( y_t, dim=1 )
            # Compute predicted state
            self.f.reInitialize() # Reset LSTM hidden state
            currState = avar(torch.FloatTensor(start_state))
            for k in range(0,self.nacts):
                action = a_t[k,:]
                currState = self.f.stateSoftmax(currState,env)
                currInput = torch.cat([currState,action],0)
                currInput = currInput.view(1, 1, -1) # [seqlen x batchlen x feat_size]
                lstm_out, self.f.hidden = self.f.lstm( currInput, self.f.hidden )
                currState = self.f.hiddenToState( lstm_out[-1,0,:] ) # [seqlen x batchlen x hidden_size]
            # Compute loss
            predFinal = env.deconcatenateOneHotStateVector( self.f.stateSoftmax(currState,env) )
            pvx = predFinal[0]
            pvy = predFinal[1]
            #
            lossx = lossf(pvx.view(1,len(pvx)), indx) 
            lossy = lossf(pvy.view(1,len(pvy)), indy)
            loss = lossx + lossy
            #
            print(i, '-> L =', lossx.data[0],' + ',lossy.data[0])
            print(indx.data[0],indy.data[0],end='  ###  ')
            print( pvx.max(0)[1].data[0], pvy.max(0)[1].data[0] )
            print('--')
            loss.backward()
            x_t.data -= eta * x_t.grad.data
            print('g_t',x_t.grad.data)
            print('x_t',x_t.data)
            print('Predicted End:',pvx.max(0)[1].data[0],pvy.max(0)[1].data[0])
            x_t.grad.data.zero_()

        print('\nEnd\n')
        print(F.softmax( x_t, dim=1 ))
        for k in range(0,self.nacts):
            action = x_t[k,:]
            print(action.max(0)[1].data[0],end=' -> ')
            print(NavigationTask.actions[action.max(0)[1].data[0]])
        print('--')
        print('START ',sindx.data[0],sindy.data[0])
        print('TARGET END ',indx.data[0],indy.data[0])
        print('--')
 def trainSad(self,
              taskEnv,
              forwardModel,
              printActions=False,
              maxDepth=5,
              treeBreadth=2,
              eta_lr=0.0005,
              trainIters=500,
              alpha=0.5,
              lambda_h=-0.025,
              useHolder=False,
              holderp=-6.0,
              useOnlyLeaves=False,
              gamma=0.01,
              temperature=2,
              branching_temperature=1):
     optimizer = optim.Adam(self.parameters(), lr=eta_lr)
     for p in forwardModel.parameters():
         p.requires_grad = False
     s0 = avar(torch.FloatTensor([self.env.getStateRep()]),
               requires_grad=False)
     for i in range(0, trainIters):
         tree = Tree(s0,
                     forwardModel,
                     self,
                     greedy_valueF,
                     self.env,
                     maxDepth,
                     treeBreadth,
                     temperature=temperature,
                     branching_temperature=branching_temperature)
         loss = tree.getLossFromAllNodes(alpha=alpha,
                                         lambda_h=lambda_h,
                                         useHolder=useHolder,
                                         holderp=holderp,
                                         useOnlyLeaves=useOnlyLeaves,
                                         gamma=gamma)
         loss.backward()
         optimizer.step()
         optimizer.zero_grad()
         if i % 50 == 0:
             # print('Loss',i,":",loss.data[0])
             # print('NumTreeNodes:', len(tree.allNodes))
             if printActions:
                 plan = tree.getBestPlan()
                 # print(plan)
                 print("\n".join([
                     "A" + str(qi) + ": " + ",".join([
                         ",".join(["%.3f" % q for q in qq])
                         for qq in a[0].data.numpy()
                     ]) for qi, a in enumerate(plan[1])
                 ]))
Exemplo n.º 18
0
 def getBestPlan(self):
     bestInd, bestVal = 0, avar(torch.FloatTensor([float('-inf')
                                                   ]))  #float('-inf')
     for i, leaf in enumerate(self.leaves):
         currVal = self.valueF(leaf.state)
         #print('State')
         #self.forwardModel.printState(leaf.state[0])
         #print('Value',currVal)
         if currVal.data.numpy() > bestVal.data.numpy():
             bestInd = i
             bestVal = currVal
     #print(bestVal)
     return self.getPathFromLeaf(bestInd)
Exemplo n.º 19
0
 def _lossFunction(self, outputs, targets, useMSE=False, env=None):
     if useMSE:
         loss = nn.MSELoss()
         return loss(outputs, targets)
     else:  # Use Cross-entropy
         loss = nn.CrossEntropyLoss()
         cost = avar(torch.FloatTensor([0]))
         predVec = env.deconcatenateOneHotStateVector(outputs)
         labelVec = env.deconcatenateOneHotStateVector(targets)
         for pv, lv in zip(predVec, labelVec):
             val, ind = lv.max(0)
             cost += loss(pv.view(1, len(pv)), ind)
         return cost / len(predVec)
Exemplo n.º 20
0
 def _accuracySingle(self,seq,label,env):
     seq = [avar(torch.from_numpy(s).float()) for s in seq] 
     seq = torch.cat(seq).view(len(seq), 1, -1) # [seqlen x batchlen x hidden_size]
     self.reInitialize() # Reset LSTM hidden state
     prediction = self.forward(seq) # Only retrieves final time state
     predVec = env.deconcatenateOneHotStateVector(prediction)
     labelVec = env.deconcatenateOneHotStateVector(label)
     locAcc = 0.0
     for pv, lv in zip(predVec, labelVec):
         _, ind_pred = pv.max(0)
         ind_label = np.argmax(lv)
         locAcc += 1.0 if ind_pred.data[0] == ind_label else 0.0
     return locAcc / len(predVec)
Exemplo n.º 21
0
 def forward(self, inputs, hidden=None, force=True, steps=0):
     if force or steps == 0: steps = len(inputs)
     outputs = avar(torch.zeros(steps, 1, self.stateSize))
     for i in range(steps):
         if force or i == 0:
             inputv = inputs[i]
         else:
             trueInput = inputs[
                 i]  # Even if not teacher forcing, still take true action
             inputv = torch.cat(
                 [output, trueInput[-self.actionSize:].unsqueeze(0)], dim=1)
         output, hidden = self.step(inputv, hidden)
         outputs[i] = output
     return outputs, hidden
Exemplo n.º 22
0
 def train(self,
           trainSeq,
           validSeq,
           nEpochs=1500,
           epochLen=500,
           validateEvery=20,
           vbs=500,
           printEvery=5,
           noiseSigma=0.4):
     optimizer = optim.Adam(self.parameters(), lr=0.003)
     state_size, action_size, tenv = self.stateSize, self.actionSize, trainSeq.env
     for epoch in range(nEpochs):
         if epoch % printEvery == 0: print('Epoch:', epoch, end='')
         loss = 0.0
         self.zero_grad()  # Zero out gradients
         for i in range(epochLen):
             self.reInitialize(1)  # Reset LSTM hidden state
             seq, label = trainSeq.randomTrainingPair()  # Current value
             actions = [s[64:74] for s in seq]
             actions = [avar(torch.from_numpy(s).float()) for s in actions]
             intial_state = seq[0][0:64]
             seqn = len(seq)
             prediction, _ = self.forward(intial_state, actions,
                                          seqn)  #[-1,:]
             label = avar(torch.from_numpy(label).float())
             loss += self._lossFunction(prediction, label, env=tenv)
         loss.backward()
         optimizer.step()
         if epoch % printEvery == 0:
             print(" -> AvgLoss", str(loss.data[0] / epochLen))
         if epoch % validateEvery == 0:
             bdata, blabels, bseqlen = validSeq.next(vbs, nopad=True)
             acc1, _ = self._accuracyBatch(bdata, blabels, validSeq.env)
             bdata, blabels, bseqlen = trainSeq.next(vbs, nopad=True)
             acc2, _ = self._accuracyBatch(bdata, blabels, tenv)
             print('\tCurrent Training Acc (est) =', acc1)
             print('\tCurrent Validation Acc (est) =', acc2)
Exemplo n.º 23
0
def main():
    f_model_name = 'LSTM_FM_1_99'
    gvp_model_name = "greedy_value_predictor_3"

    numRepeats = 5
    tasks = [[6, generateTask(0, 0, 0, 12, 10)]]

    exampleEnv = NavigationTask()
    ForwardModel = LSTMForwardModel(74, 64)
    ForwardModel.load_state_dict(torch.load(f_model_name))
    GreedyVP = GreedyValuePredictor(exampleEnv)
    GreedyVP.load_state_dict(torch.load(gvp_model_name))

    print("Running the tasks")
    for i, task in enumerate(tasks):
        for j in range(numRepeats):
            task_state = task[1].getStateRep(oneHotOutput=False)
            px = int(task_state[0])
            py = int(task_state[1])
            orien = np.argmax(task_state[2:6])
            gx = int(task_state[-2])
            gy = int(task_state[-1])
            print("$$###############################")
            print("Repeat " + str(j) + " for " + str(gx) + " , " + str(gy))
            #print('www',px,py,orien,gx,gy)
            cenv = generateTask(px, py, orien, gx, gy)
            SimPolicy = SimulationPolicy(cenv)
            SimPolicy.trainSad(ForwardModel,
                               GreedyVP,
                               maxDepth=task[0],
                               niters=2000)

            s_0 = torch.unsqueeze(avar(torch.FloatTensor(cenv.getStateRep())),
                                  dim=0)
            tree = Tree(s_0, ForwardModel, SimPolicy, greedy_valueF, cenv,
                        task[0], 2)
            states, actions = tree.getBestPlan()

            for i in range(len(actions)):
                cenv.performAction(actions[i][0].data.numpy().argmax())
            r = cenv.getReward()
            correct = (r == 1)

            #print('Correct?',correct)
            if correct:
                print('Correct final state', str(gx), str(gy))
                torch.save(
                    SimPolicy.state_dict(), "SimPolicy_solve_" + str(gx) +
                    "_" + str(gy) + "_" + str(j))
 def __init__(self, env, layerSizes=[100, 100], maxBranchingFactor=3):
     super(SimulationPolicy, self).__init__()
     self.actionSize = len(env.actions)
     self.stateSize = len(env.getStateRep(oneHotOutput=True))
     self.env = env
     self.maxBranchingFactor = maxBranchingFactor
     self.intvec = avar(
         torch.LongTensor(list(range(maxBranchingFactor + 1)))).unsqueeze(0)
     #print("State Size: " , self.stateSize, "\nAction Size: ", self.actionSize)
     # Input space: [Batch, observations], output:[Batch, action_space]
     self.layer1 = nn.Linear(self.stateSize, layerSizes[0])
     self.layer2 = nn.Linear(layerSizes[0], layerSizes[1])
     self.layer3 = nn.Linear(layerSizes[1], self.actionSize)
     # Layer to sample branching factor
     self.intSamplingLayer = nn.Linear(layerSizes[1],
                                       self.maxBranchingFactor + 1)
    def getLossFromAllNodes(self,
                            alpha=0.5,
                            lambda_h=-0.025,
                            useHolder=False,
                            holderp=-5.0,
                            useOnlyLeaves=False,
                            gamma=0.01,
                            xi=0.01):
        targetNodes = self.allNodes
        if useOnlyLeaves: targetNodes = self.leaves
        totalInverseValue = avar(torch.FloatTensor([0.0])).unsqueeze(0)
        totalEntropy = avar(torch.FloatTensor([0.0]))
        totalBranching = avar(torch.FloatTensor([0.0]))
        totalEntropyB = avar(torch.FloatTensor([0.0]))  # For branching sampler
        if not useHolder: holderp = 1.0
        nNodes = len(targetNodes)
        mbf = avar(
            torch.FloatTensor(
                np.array(list(range(1,
                                    self.simPolicy.maxBranchingFactor + 1)))))
        for i, node in enumerate(targetNodes):
            if i == 0:
                node.loss = avar(torch.FloatTensor([float('inf')]))
                continue
            if not node.branchingBreadth is None:
                expectedBranching = torch.sum(node.softBranching * mbf)
                totalBranching += expectedBranching
                # print('----')
                # print('Exp',expectedBranching)
                # print('TrueBB',node.branchingBreadth)
                # totalBranching += node.branchingBreadth.type(torch.FloatTensor) # IGNORES PARENT TODO
            currloss = -self.valueF(node.state)
            node.loss = currloss
            # print('closs', currloss)
            # print('Holderp', holderp)
            currloss_pow = currloss.pow(holderp)
            # print('clp', currloss_pow)
            # print('totinv', totalInverseValue)

            totalInverseValue += currloss_pow
            if not node.action is None:
                totalEntropy += -torch.sum(
                    node.action[0] * torch.log(node.action[0]))
            if not node.softBranching is None:
                totalEntropyB += -torch.sum(
                    node.softBranching * torch.log(node.softBranching))
        # Penalize negative reward and entropy
        totalLosses = alpha * (totalInverseValue / nNodes).pow(
            1.0 / holderp) + lambda_h * totalEntropy / nNodes
        # Penalize too many branches
        totalLosses += gamma * totalBranching / nNodes
        # Penalize entropy in the branching sampler
        totalLosses += xi * totalEntropyB / nNodes
        return totalLosses
Exemplo n.º 26
0
def main():
    f_model_name = 'LSTM_FM_1_99'
    s = 'navigation'  # 'transport'
    trainf, validf = s + "-data-train-small.pickle", s + "-data-test-small.pickle"
    print('Reading Data')
    train, valid = SeqData(trainf), SeqData(validf)
    exampleEnv = generateTask(0, 0, 0, 14, 14)
    ForwardModel = LSTMForwardModel(train.lenOfInput, train.lenOfState)
    ForwardModel.load_state_dict(torch.load(f_model_name))
    SimPolicy = SimulationPolicy(exampleEnv)
    SimPolicy.trainSad(ForwardModel)
    s_0 = torch.unsqueeze(avar(torch.FloatTensor(exampleEnv.getStateRep())),
                          dim=0)
    tree = Tree(s_0, ForwardModel, SimPolicy, greedy_cont_valueF, exampleEnv,
                5, 2)
    states, actions = tree.getBestPlan()
    for i in range(len(actions)):
        print(actions[i][0].data.numpy().argmax())
Exemplo n.º 27
0
    def forward(self, inital_state, actions, seqn):
        #initalState [1*1*state_size] actions[batch*noOfActions*Action_size]
        #print(actions[0].shape)
        #print(seqn)
        int_states = []

        current_state = avar(torch.from_numpy(inital_state).float())
        #print(current_state.shape)
        #print(torch.cat((current_state, actions[0]),0))
        for i in range(seqn):
            concat_vec = torch.cat((current_state, actions[i]),
                                   0).view(1, 1, -1)
            lstm_out, self.hidden = self.lstm(concat_vec, self.hidden)
            output_state = self.hiddenToState(lstm_out[0, 0, :])
            int_states.append(output_state)
            current_state = output_state

        return current_state, int_states
Exemplo n.º 28
0
 def getBestPlan(self):
     bestInd, bestVal = 0, avar(torch.FloatTensor([float('inf')
                                                   ]))  #float('-inf')\n",
     currpath = None
     for i, node in enumerate(self.allNodes):
         if i == 0: continue
         currVal = node.loss
         if currVal.data.numpy() < bestVal.data.numpy():
             putPath = self.getPathFromNode_branchingAndActions(i)
             bestInd = i
             bestVal = currVal
             currpath = putPath
         elif currVal.data.numpy() == bestVal.data.numpy():
             if (currpath is None) or (len(putPath) < len(currpath)):
                 putPath = self.getPathFromNode_branchingAndActions(i)
                 bestInd = i
                 bestVal = currVal
                 currpath = putPath
     return currpath
Exemplo n.º 29
0
 def _accuracySingle(self, seq, label, env):
     seq = [avar(torch.from_numpy(s).float()) for s in seq]
     seq = torch.cat(seq).view(len(seq), 1,
                               -1)  # [seqlen x batchlen x hidden_size]
     self.reInitialize(1)  # Reset LSTM hidden state
     #print(seq.shape)
     actions = [s[0][64:74] for s in seq]
     #actions = [ avar(torch.from_numpy(s).float()) for s in actions]
     intial_state = seq[0][0][0:64].data.numpy()
     seqn = len(seq)
     prediction, _ = self.forward(intial_state, actions, seqn)  #[-1,:]
     #prediction = self.forward(seq) # Only retrieves final time state
     predVec = env.deconcatenateOneHotStateVector(prediction)
     labelVec = env.deconcatenateOneHotStateVector(label)
     locAcc = 0.0
     for pv, lv in zip(predVec, labelVec):
         _, ind_pred = pv.max(0)
         ind_label = np.argmax(lv)
         locAcc += 1.0 if ind_pred.data[0] == ind_label else 0.0
     return locAcc / len(predVec)
Exemplo n.º 30
0
def main():
    ts = "navigation-data-state_to_reward-train.pickle"
    vs = "navigation-data-state_to_reward-valid.pickle"
    ############
    print('Reading Data')
    with open(ts,'rb') as inFile:
        print('\tReading',ts); trainSet = pickle.load(inFile)
    with open(vs,'rb') as inFile:
        print('\tReading',vs); validSet = pickle.load(inFile)
    env = NavigationTask()
    greedyvp = GreedyValuePredictor(env)
    greedyvp.train( trainSet, validSet)
    def generateTask(px,py,orien,gx,gy):
        direction = NavigationTask.oriens[orien]
        gs = np.array([gx, gy])
        env = NavigationTask(agent_start_pos=[np.array([px,py]), direction],goal_pos=gs)
        return env
    env = generateTask(0,1,2,3,2)
    state = avar( torch.FloatTensor(env.getStateRep()), requires_grad=False).view(1,-1)
    print(state.shape)
    greedyvp.forward(state).data.numpy()
    torch.save(greedyvp.state_dict(), "greedy_value_predictor")