示例#1
0
def main():

    ## get model/training params 
    args = parser.parse_args()
    if args.debug:
        print ('==== DEBUGGING MODE ====')
    
    # get name of script for saving models
    script_name = os.path.basename(__file__)

    ## Initialize metrics  ###
    TrainingEval = utils.TrainingMetrics(script_name)
    working_dir  = TrainingEval.working_dir
    valid = Prepare_Data(args.data,'valid/valid')
    valid_batches = DataLoader(valid, args.batch_size, 
                               drop_last=True, shuffle=True)
    Validation = utils.Metrics(valid_batches, working_dir ,'validation')
    
    # cp running script to working dir. 
    os.system('cp {} {}'.format(script_name, working_dir))  

    
    ## Initialize model
    if torch.cuda.is_available(): 
        model = ConvNet(args.kernel_size, args.stride, args.padding,
                args.ks_pool, args.str_pool, args.pad_pool).cuda()
    else:
        model = ConvNet(args.kernel_size, args.stride, args.padding, 
                args.ks_pool, args.str_pool, args.pad_pool) 
    
    ## log model/training params to file 
    LogFile = utils.LogFile(args, model, working_dir)
    
    ## Loss and optimizer 
    criterion = nn.CrossEntropyLoss() # doees not ignore padding (0) ignore_index=0
    optimizer = torch.optim.Adam(model.parameters(), lr=args.learning_rate)
    
    
    # Train the model
    step = -1 # nr of batches 
    loss_list = []
    acc_list = []
    valid_loss_list = []
    valid_acc_list = []
    
    
    for epoch in range(args.num_epochs):
        
        for train_ds in range(0,10):
            f = args.data
            name = 'train/train_{}'.format(train_ds)
            train = Prepare_Data(f,name)
            train_batches = DataLoader(train, batch_size=args.batch_size, 
                               drop_last=True, shuffle=True)
           
            for i, batch in enumerate(train_batches):
                step += 1
    
                # one hot encode
                batch = utils.to_one_hot(batch)
    
                # transpose to input seq as vector
                batch = torch.transpose(batch,1,2) #transpose dim 1,2 => channels=aa
    
                ## Run the forward pass ##
                out = model(batch) # sandsynligheder=> skal være [10,25,502] hvor de 25 er sandsynligheder
                
                # convert back to aa labels from one hot for loss 
                batch_labels = utils.from_one_hot(batch) # integers for labels med 100% sikkerhed
    
    
                ## loss ##
                loss = criterion(out, batch_labels)
                loss_list.append(loss.item())
    
                ## switch model to training mode, clear gradient accumulators ##
                model.train()
                optimizer.zero_grad()
    
                ##  Backprop and perform Adam optimisation  ##
                loss.backward()
                optimizer.step()
    
                ##  Track the accuracy  ##
                if i  % 50 == 0:   
    #               ##########
                    acc = TrainingEval.get_acc(out,batch_labels)
                    acc_list.append(acc)
                    TrainingEval.save_metrics(acc, loss.item(), step, epoch)
                    print('Epoch [{}/{}], Step: {}, Loss: {:.4f}, Accuracy: {:.4f}%'
                            .format(epoch + 1, args.num_epochs, step, 
                                    loss.item(), acc*100))
    
                # Validation ##
                if i % 1000 == 0:
                    val_loss, val_acc, conf_matrix = \
                    Validation.get_performance(model,criterion,
                            confusion_matrix = True)
                    Validation.save(val_acc, val_loss, epoch, step)

                    # add to list for fast plotting
                    valid_loss_list.append(val_loss)
                    valid_acc_list.append(val_acc)
                    print('Validation:  Loss: {:.4f}, Accuracy: {:.4f}%\n'
                            .format(val_loss, val_acc*100))  
                    # plot 
                    TrainingEval.plot_metrics(acc_list, loss_list,
                            valid_acc_list, valid_loss_list, epoch)

                    Validation.plot_confusion_matrix(conf_matrix)
                    Validation.plot_per_class(conf_matrix)
    
    #            if i % 2000 == 0:
    #                 # Save the model
    #                 TrainingEval.save_model(model.state_dict(), i)
    #                 LogFile.log_saved_model(step)

            # Save the model every two train_-ds
            if train_ds % 5 ==0:
                utils.save_checkpoint(model, optimizer, epoch, train_ds,loss_list, acc_list, working_dir)
                utils.save_final_model(model, working_dir)
                LogFile.log_saved_model(step)
    
    LogFile.log_performance(acc, loss.item(), ds_type='Training')

    
    if args.testing: 

        f = args.data
        name = 'test/test_1'
        test = Prepare_Data(f,name)
        test_batches = DataLoader(test, batch_size=args.batch_size, 
                           drop_last=True, shuffle=True)

        Test = utils.Metrics(test_batches, working_dir ,'test')
        test_loss, test_acc, conf_matrix = Test.get_performance(
                            model, criterion, confusion_matrix = True)
        Test.save(test_acc, test_loss, epoch=-1, step=-1)
        Test.plot_confusion_matrix(conf_matrix)
        Test.save_conf_matrix(conf_matrix)
        Test.plot_per_class(conf_matrix)
        LogFile.log_performance(test_acc, test_loss, ds_type='Test')
示例#2
0
def main():

    ## get model/training params ##
    args = parser.parse_args()

    ## specify name of output dir ##
    # dir to be created once initializing TrainingMetrics
    if args.debug:
        top_working_dir = 'debugging'

    elif args.out_dir is not None:
        top_working_dir = args.out_dir

    else:
        top_working_dir = str(args.nn_model.split(".py")[0])

    ## Initialize training metrics  ###

    # simultanously creates working_dir
    TrainingEval = utils.TrainingMetrics(top_working_dir, args.restart)

    # get name of output/working dir
    working_dir = TrainingEval.working_dir

    ## Initialize Validation metrics ##
    Validation = utils.PerformMetrics(args.data, working_dir, args.batch_size,
                                      'validation')

    ## Initialise Test metrics: ##
    if args.testing:
        Test = utils.PerformMetrics(args.data, working_dir, args.batch_size,
                                    'test')

    ## Logging of scripts, models and params ##
    # cp nn_model script to working dir.
    os.system('cp nn_models/{} {}'.format(args.nn_model, working_dir))

    ## Load nn model architecture ##
    path = './nn_models/' + args.nn_model
    spec = importlib.util.spec_from_file_location('nn_module', path)
    nn_module = importlib.util.module_from_spec(spec)
    spec.loader.exec_module(nn_module)
    model = nn_module.ConvNet(args.kernel_size, args.stride, args.padding,
                              args.ks_pool, args.str_pool, args.pad_pool)

    #     nn_model = importlib.import_module('.{}'.format(args.nn_model), package='nn_models')
    #     model = nn_model.ConvNet(args.kernel_size, args.stride, args.padding,
    #                              args.ks_pool, args.str_pool, args.pad_pool)
    # CUDA
    if torch.cuda.is_available():
        model = model.cuda()

    # initalise optimizer
    optimizer = torch.optim.Adam(model.parameters(), lr=args.learning_rate)

    # load from restart file, params are conv to cuda in loading
    if args.restart is not None:
        model, optimizer, epoch_start, train_idx, loss_list, acc_list = \
            utils.load_checkpoint(model, optimizer, filename=args.restart)
        print('loaded checkpoint model', flush=True)
    else:
        loss_list = []
        acc_list = []
        epoch_start = 0

    # log model/training params to file
    LogFile = utils.LogFile(args, model, working_dir)

    ## Loss
    criterion = nn.CrossEntropyLoss(
    )  # does not ignore padding (0) ignore_index=0

    # Train the model
    nr_of_batches = -1  # count batches for logging
    valid_loss_list = []
    valid_acc_list = []

    #initiate random shuffle between  training sub dataset
    random_ds = list(h5py.File(args.data,
                               'r')['train'].keys())  # get sub-names
    random_ds = np.array(random_ds)
    np.random.shuffle(random_ds)  # shuffle

    # loop over entire training set multiple times
    for epoch in range(epoch_start, args.num_epochs):

        # loop over sub training sets (for memory reasons)
        for train_idx, sub_name in enumerate(random_ds):
            # load data
            f = args.data
            name = 'train/{}'.format((sub_name))
            train = utils.Prepare_Data(f, name, debug=args.debug)

            # make batches of the data
            train_batches = DataLoader(train,
                                       batch_size=args.batch_size,
                                       drop_last=True,
                                       shuffle=True)

            for i, batch in enumerate(train_batches):
                nr_of_batches += 1

                # one hot encode
                batch = utils.to_one_hot(batch)

                # transpose to input seq as vector
                batch = torch.transpose(batch, 1,
                                        2)  #transpose dim 1,2 => channels=aa

                ## Run the forward pass ##
                out = model(
                    batch
                )  # sandsynligheder=> skal vaere [10,25,502] hvor de 25 er sandsynligheder

                # convert back to aa labels from one hot for loss
                batch_labels = utils.from_one_hot(
                    batch)  # integers for labels med 100% sikkerhed

                ## loss ##
                loss = criterion(out, batch_labels)
                loss_list.append(loss.item())

                ## switch model to training mode, clear gradient accumulators ##
                model.train()
                optimizer.zero_grad()

                ##  Backprop and perform Adam optimisation  ##
                loss.backward()
                optimizer.step()

            ##  Track the training accuracy  ##
            if train_idx % 1 == 0:
                acc = TrainingEval.get_acc(out, batch_labels)
                acc_list.append(acc)
                TrainingEval.save_metrics(acc, loss.item(), nr_of_batches,
                                          epoch)
                print(
                    'Epoch [{}/{}], sub training set: {} , nr_batches: {}, Loss: {:.4f}, Accuracy: {:.4f}%'
                    .format(epoch, args.num_epochs, train_idx, nr_of_batches,
                            loss.item(), acc * 100),
                    flush=True)

                # Validation ##
        #    # if i % 1000 == 0:
            if train_idx % 5 == 0:
                # get nn model performance on valid set
                val_loss, val_acc, val_acc_pad, N_term, C_term, N_pad = Validation.get_performance(
                    model, criterion, pos_acc=True, debug=args.debug)

                # save validation metrics to file
                Validation.save(val_acc, val_loss, val_acc_pad, epoch,
                                nr_of_batches)

                # add to list for fast plotting
                valid_loss_list.append(val_loss)
                valid_acc_list.append(val_acc)
                print('Validation:  Loss: {:.4f}, Accuracy: {:.4f}%\n'.format(
                    val_loss, val_acc * 100),
                      flush=True)
                # plot
                TrainingEval.plot_metrics(acc_list, loss_list, valid_acc_list,
                                          valid_loss_list, epoch)

        # Save the model every 2 epochs
        if train_idx % 5 == 0:
            # save nn model as checkpoint to restart from
            utils.save_checkpoint(model, optimizer, \
                                  epoch, train_idx,  \
                                  loss_list, acc_list,\
                                  working_dir)

            # save nn model as final (weights only)
            #             utils.save_final_model(model, working_dir)
            # log current training status to log file
            LogFile.log_saved_model(steps=nr_of_batches)
            LogFile.log_performance(\
                    acc, loss.item(), ds_type='Training')

            # test nn model on test data set
            if args.testing:

                # get performance of current nn model on test data
                test_loss, test_acc, test_acc_pad, conf_matrix, N_term,C_term, N_pad = \
                            Test.get_performance(
                            model, criterion, \
                            confusion_matrix = True, \
                            pos_acc=True, \
                            debug = args.debug)

                # save test set metrics of nn model
                Test.save(test_acc,
                          test_loss,
                          test_acc_pad,
                          epoch=epoch,
                          step=nr_of_batches)

                # plots different model analyses
                Test.plot_confusion_matrix(conf_matrix)
                Test.save_conf_matrix(conf_matrix)

                # plot performance prediction on each aa type
                Test.plot_per_class(conf_matrix)

                # plot positional accuracy, i.e. how well predicts from N-term and C-term
                Test.plot_pos_acc(N_term, C_term, N_pad)

                # log test metrics in log file
                LogFile.log_performance(test_acc, test_loss, ds_type='Test')