Exemplo n.º 1
0
    def test(self, name):
        '''
        Takes a supposedly unseen dataset and finds the top-1 predicted labels.
        Stores those values in a csv file called name.csv, where name is the value
        set for the name parameter for this function.
        '''
        loader, iteration = data_util.load_data(partition='test')
        #iteration = 1
        data_iter = data_util.inf_generator(loader)
        results = []
        for i in range(iteration):
            X, y = data_iter.__next__()

            X = [x.numpy()[0] for x in X]

            predX = Variable(torch.FloatTensor([X]),
                             requires_grad=True).to(device)
            y = Variable(torch.LongTensor([y]), requires_grad=False).to(device)

            y_pred = self.model(predX)

            results.append(
                [y.cpu().numpy()[0],
                 y_pred.max(-1)[1].cpu().numpy()[0]])
        pd.DataFrame(results,
                     columns=['y_true', 'y_pred'
                              ]).to_csv(f'../model_train_results/{name}.csv',
                                        index=False)
Exemplo n.º 2
0
    def test(self, name):
        '''
        Returns the top-1 accuracy on an unseen test dataset.
        '''

        loader, iteration = data_util.load_data(partition='test')

        data_iter = data_util.inf_generator(loader)
        results = []
        for i in range(iteration):
            X, y = data_iter.__next__()

            X = [x.numpy()[0] for x in X]

            predX = Variable(torch.FloatTensor([X]),
                             requires_grad=True).to(device)
            y = Variable(torch.LongTensor([y]), requires_grad=False).to(device)

            y_pred = self.model(predX)

            results.append(
                [y.cpu().numpy()[0],
                 y_pred.max(-1)[1].cpu().numpy()[0]])
        pd.DataFrame(results,
                     columns=['y_true', 'y_pred'
                              ]).to_csv(f'../model_train_results/{name}.csv',
                                        index=False)
Exemplo n.º 3
0
 def score(self):
     loader, iteration = data_util.load_data(partition='test')
     
     data_iter = data_util.inf_generator(loader)
     correct = 0
     for i in range(iteration):
         X, y = data_iter.__next__()
         
         X=[x.numpy()[0] for x in X] 
         
         predX = Variable(torch.FloatTensor([X]), requires_grad=True).to(device)
         y = Variable(torch.LongTensor([y]), requires_grad=False).to(device)
         
         y_pred = self.model(predX)
         
         if y_pred.max(-1)[1]==y:
             correct += 1
     return correct/iteration
Exemplo n.º 4
0
    def score(self):
        '''
        Returns the top-1 accuracy on an unseen test dataset.
        '''
        loader, iteration = data_util.load_data(partition='test')
        #iteration = 1
        data_iter = data_util.inf_generator(loader)
        correct = 0
        for i in range(iteration):
            X, y = data_iter.__next__()

            X = [x.numpy()[0] for x in X]

            predX = Variable(torch.FloatTensor([X]),
                             requires_grad=True).to(device)
            y = Variable(torch.LongTensor([y]), requires_grad=False).to(device)

            y_pred = self.model(predX)

            if y_pred.max(
                    -1
            )[1] == y:  # if class corresponding to max log softmax is the ground truth class
                correct += 1
        return correct / iteration
Exemplo n.º 5
0
    def fit(self, name, save_weights=False):
        '''
        Trains model using predefined number of epochs, learning rate and number of neurons in
        each hidden layer. Saves epoch results to a file name.csv, where name is replaced with the
        value put in for the name parameter.
        
        name: (str) The name of the file to save the results of this run
        save_weights: (bool) Saves the weights of the best iteration based on validation accuracy
        '''
        print(name)
        optimizer = optim.Adam(self.model.parameters(), lr=self.lr)
        loss = nn.CrossEntropyLoss()  # cross-entropy loss
        lossVal = []  # to create dataframe for logging
        bestValAcc = 0  # to check for saving weights
        for i in range(self.epochs):
            start = time.time()
            loader, iteration = data_util.load_data()
            data_iter = data_util.inf_generator(
                loader
            )  # inf generator taken from rtiqchen implementation of NODE
            train_size = int(iteration *
                             0.8)  # takes 80% of data as train and 20 as test
            val_size = int(iteration * 0.2)
            epoch_train_loss = [
            ]  # collect values to update log for post-processing
            epoch_val_loss = []
            train_correct = 0
            val_correct = 0

            for j in range(
                    train_size
            ):  #calculated train size to do train dev split will calculate mean loss at end
                X, y = data_iter.__next__()

                X = [x.numpy()[0] for x in X]

                X = Variable(torch.FloatTensor([X]), requires_grad=True).to(
                    device)  # have to convert to tensor
                #print(X.shape)

                y = Variable(torch.LongTensor([y]),
                             requires_grad=False).to(device)

                optimizer.zero_grad()
                y_pred = self.model(X)
                output = loss(y_pred, y)
                epoch_train_loss.append(output.cpu().detach().numpy())
                if y_pred.max(
                        -1
                )[1] == y:  # if max log softmax corresponds to correct class add 1
                    train_correct += 1

                output.backward()
                optimizer.step()

                #print(list(self.model.parameters()))

            for k in range(val_size):
                X, y = data_iter.__next__()

                X = [x.numpy()[0] for x in X]

                X = Variable(torch.FloatTensor([X]), requires_grad=True).to(
                    device)  # have to convert to tensor

                y = Variable(torch.LongTensor([y]),
                             requires_grad=False).to(device)
                optimizer.zero_grad()
                y_pred = self.model(X)
                output = loss(y_pred, y)
                epoch_val_loss.append(output.cpu().detach().numpy())
                if y_pred.max(
                        -1
                )[1] == y:  # if max log softmax corresponds to correct class add 1
                    val_correct += 1
                valAcc = val_correct / val_size
            if save_weights and valAcc > bestValAcc:
                torch.save(self.model.state_dict(),
                           f'../model_weights/{name}.pt'
                           )  # save if we do better than current best

            end = time.time()
            lossVal.append([(end - start) / 60,
                            np.mean(epoch_train_loss),
                            np.mean(epoch_val_loss),
                            train_correct / train_size, val_correct / val_size
                            ])  # save values for reporting
            print('epoch time:', (end - start) / 60, 'min', 'epoch:',
                  '{0}/{1}'.format(i, self.epochs), 'train accuracy:',
                  train_correct / train_size, ', val accuracy:',
                  val_correct / val_size)
            print(
                f'Train loss: {np.mean(epoch_train_loss)}     Val loss: {np.mean(epoch_val_loss)}'
            )
        if 'model_train_results' not in os.listdir('../'):
            os.mkdir('../model_train_results')

        pd.DataFrame(lossVal,
                     columns=[
                         'epoch_time', 'mean_train_loss', 'mean_val_loss',
                         'train_acc', 'val_acc'
                     ]).to_csv('../model_train_results/' + name + '.csv',
                               index=False)  # add epoch length
Exemplo n.º 6
0
    def fit(self,name):
        
        optimizer = optim.SGD(self.model.parameters(),lr=self.lr,momentum=0.4)
        loss = nn.CrossEntropyLoss() # cross-entropy loss
        lossVal = []
        for i in range(self.epochs):
            start = time.time()
            loader,iteration = data_util.load_data()
            data_iter = data_util.inf_generator(loader)
            train_size = int(iteration*0.8)
            val_size = int(iteration*0.2)
            epoch_train_loss = []
            epoch_val_loss = []
            train_correct = 0
            val_correct = 0

            
            for j in range(train_size): #calculated train size to do train dev split will calculate mean loss at end
                X, y = data_iter.__next__()
            
                X=[x.numpy()[0] for x in X] 
    
                X = Variable(torch.FloatTensor([X]), requires_grad=True).to(device) # have to convert to tensor
    
                y = Variable(torch.LongTensor([y]), requires_grad=False).to(device)

                X = X.squeeze().t().unsqueeze(0)
                print(X.shape)
                optimizer.zero_grad()
                y_pred = self.model(X)
                output = loss(y_pred, y)
                epoch_train_loss.append(output.cpu().detach().numpy())
                if y_pred.max(-1)[1]==y:
                    train_correct += 1

                output.backward()
                optimizer.step()
            
            for k in range(val_size):
                X, y = data_iter.__next__()
            
                X=[x.numpy()[0] for x in X] 
    
                X = Variable(torch.FloatTensor([X]), requires_grad=True).to(device) # have to convert to tensor
    
                y = Variable(torch.LongTensor([y]), requires_grad=False).to(device)
                optimizer.zero_grad()
                y_pred = self.model(X)
                output = loss(y_pred, y)
                epoch_val_loss.append(output.cpu().detach().numpy())
                if y_pred.max(-1)[1]==y:
                    val_correct += 1

            lossVal.append([np.mean(epoch_train_loss),np.mean(epoch_val_loss),train_correct/train_size,val_correct/val_size])
            print('epoch time:',time.time()-start,'seconds','epoch:','{0}/{1}'.format(i,self.epochs),'train accuracy:',train_correct/train_size,', val accuracy:',val_correct/val_size)
        if 'model_train_results' not in os.listdir('../'):
            os.mkdir('../model_train_results')
            
        pd.DataFrame(lossVal,columns=['mean_train_loss','mean_val_loss','train_acc','val_acc']).to_csv('../model_train_results/'+name+'.csv',index=False)



        torch.save(self.model.state_dict(), '../model_weights/VDCNN.pt')
Exemplo n.º 7
0
    def fit(self, name, save_weights=False):
        '''
        Trains model using predefined number of epochs, learning rate and number of neurons in
        each hidden layer. Saves epoch results to a file name.csv, where name is replaced with the
        value put in for the name parameter.
        
        name: (str) The name of the file to save the results of this run
        save_weights: (bool) Saves the weights of the best iteration based on validation accuracy
        '''

        optimizer = optim.Adam(self.model.parameters(), lr=self.lr)
        loss = nn.CrossEntropyLoss()
        lossVal = []
        bestValAcc = 0
        for i in range(self.epochs):
            start = time.time()
            loader, iteration = data_util.load_data()
            data_iter = data_util.inf_generator(loader)
            train_size = int(iteration * 0.8)
            val_size = int(iteration * 0.2)
            epoch_train_loss = []
            epoch_val_loss = []
            train_correct = 0
            val_correct = 0

            c = 0
            for j in range(train_size):
                X, y = data_iter.__next__()

                X = [x.numpy()[0] for x in X]

                X = Variable(torch.FloatTensor([X]),
                             requires_grad=True).to(device)

                y = Variable(torch.LongTensor([y]),
                             requires_grad=False).to(device)

                optimizer.zero_grad()

                y_pred = self.model(X)
                output = loss(y_pred, y)
                epoch_train_loss.append(output.cpu().detach().numpy())

                if y_pred.max(-1)[1] == y:
                    train_correct += 1

                output.backward()
                optimizer.step()

                if not c % 1000:
                    print('train time:', (time.time() - start) / 60, 'min', c,
                          'loops')
                c += 1

            for k in range(val_size):
                X, y = data_iter.__next__()

                X = [x.numpy()[0] for x in X]

                X = Variable(torch.FloatTensor([X]), requires_grad=True).to(
                    device)  # have to convert to tensor

                y = Variable(torch.LongTensor([y]),
                             requires_grad=False).to(device)
                optimizer.zero_grad()
                y_pred = self.model(X)
                output = loss(y_pred, y)
                epoch_val_loss.append(output.cpu().detach().numpy())
                if y_pred.max(-1)[1] == y:
                    val_correct += 1
                valAcc = val_correct / val_size
            if save_weights and valAcc > bestValAcc:
                torch.save(self.model.state_dict(),
                           f'../model_weights/{name}.pt'
                           )  # save if we do better than current best

            end = time.time()
            lossVal.append([(end - start) / 60,
                            np.mean(epoch_train_loss),
                            np.mean(epoch_val_loss),
                            train_correct / train_size, val_correct / val_size
                            ])  # save values for reporting
            print('epoch time:', (end - start) / 60, 'min', 'epoch:',
                  '{0}/{1}'.format(i, self.epochs), 'train accuracy:',
                  train_correct / train_size, ', val accuracy:',
                  val_correct / val_size)
            print(
                f'Train loss: {np.mean(epoch_train_loss)}		Val loss: {np.mean(epoch_val_loss)}'
            )
        if 'model_train_results' not in os.listdir('../'):
            os.mkdir('../model_train_results')

        pd.DataFrame(lossVal,
                     columns=[
                         'epoch_time', 'mean_train_loss', 'mean_val_loss',
                         'train_acc', 'val_acc'
                     ]).to_csv('../model_train_results/' + name + '.csv',
                               index=False)  # add epoch length