Exemplo n.º 1
0
def init_pred():
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    # YOLO network hyperparameters
    B = 2  # number of bounding box predictions per cell
    S = 14  # width/height of network output grid (larger than 7x7 from paper since we use a different network)
    # load_network_path = '/mnt/c/Users/herbe/CS242/fa18-cs242-final/yolo/best_detector.pth'
    load_network_path = '/home/herbertwangwrt/fa18-cs242-final/yolo/best_detector.pth'
    pretrained = True

    # use to load a previously trained network
    if load_network_path is not None:
        # print('Loading saved network from {}'.format(load_network_path))
        net = resnet50().to(device)
        net.load_state_dict(
            torch.load(load_network_path,
                       map_location=lambda storage, loc: storage))
    else:
        # print('Load pre-trained model')
        net = resnet50(pretrained=pretrained).to(device)
    batch_size = 24
    file_root_train = 'VOCdevkit_2007/VOC2007/JPEGImages/'
    annotation_file_train = currentdir + '/voc2007.txt'

    train_dataset = VocDetectorDataset(root_img_dir=file_root_train,
                                       dataset_file=annotation_file_train,
                                       train=True,
                                       S=S)
    train_loader = DataLoader(train_dataset,
                              batch_size=batch_size,
                              shuffle=True,
                              num_workers=4)
    file_root_test = 'VOCdevkit_2007/VOC2007test/JPEGImages/'
    annotation_file_test = currentdir + '/voc2007test.txt'

    test_dataset = VocDetectorDataset(root_img_dir=file_root_test,
                                      dataset_file=annotation_file_test,
                                      train=False,
                                      S=S)
    test_loader = DataLoader(test_dataset,
                             batch_size=batch_size,
                             shuffle=False,
                             num_workers=4)

    net.eval()
    return net
Exemplo n.º 2
0
num_epochs = 50
batch_size = 24

# Yolo loss component coefficients (as given in Yolo v1 paper)
lambda_coord = 5
lambda_noobj = 0.5


criterion = YoloLoss(S, B, lambda_coord, lambda_noobj)
optimizer = torch.optim.SGD(net.parameters(), lr=learning_rate, momentum=0.9, weight_decay=5e-4)


file_root_train = 'VOCdevkit_2007/VOC2007/JPEGImages/'
annotation_file_train = 'voc2007.txt'

train_dataset = VocDetectorDataset(root_img_dir=file_root_train,dataset_file=annotation_file_train,train=True, S=S)
train_loader = DataLoader(train_dataset,batch_size=batch_size,shuffle=True,num_workers=4)
print('Loaded %d train images' % len(train_dataset))


file_root_test = 'VOCdevkit_2007/VOC2007test/JPEGImages/'
annotation_file_test = 'voc2007test.txt'

test_dataset = VocDetectorDataset(root_img_dir=file_root_test,dataset_file=annotation_file_test,train=False, S=S)
test_loader = DataLoader(test_dataset,batch_size=batch_size,shuffle=False,num_workers=4)
print('Loaded %d test images' % len(test_dataset))



best_test_loss = np.inf
Exemplo n.º 3
0
def main():
    global args
    args = parser.parse_args()

    load_network_path = args.model_path
    batch_size = args.batch_size
    S = args.S
    ''' To implement Yolo we will rely on a pretrained classifier as the backbone for our detection network. 
    PyTorch offers a variety of models which are pretrained on ImageNet in the [`torchvision.models`]
    (https://pytorch.org/docs/stable/torchvision/models.html) package. In particular, we will use the ResNet50
    architecture as a base for our detector. This is different from the base architecture in the Yolo paper
    and also results in a different output grid size (14x14 instead of 7x7). Models are typically pretrained on 
    ImageNet since the dataset is very large. The pretrained model provides a very useful weight initialization 
    for our detector, so that the network is able to learn quickly and effictively.
    '''

    if args.eval:
        if load_network_path is None:
            print("Model path not specified!!")
            exit(0)
        else:
            print('Loading saved network from {}'.format(load_network_path))
            net = resnet50().to(device)
            net.load_state_dict(torch.load(load_network_path))
        # To evaluate detection results we use mAP (mean of average precision over each class)
        net.eval()
        test_aps = evaluate(net, test_dataset_file=annotation_file_test)
        output_submission_csv('my_solution.csv', test_aps)
    else:
        pretrained = True
        # use to load a previously trained network
        if load_network_path is not None:
            print('Loading saved network from {}'.format(load_network_path))
            net = resnet50().to(device)
            net.load_state_dict(torch.load(load_network_path))
        else:
            print('Load pre-trained model')
            net = resnet50(pretrained=pretrained).to(device)
        ''' Since Pascal is a small dataset (5000 in train+val) we have combined the train and val splits
        to train our detector. The train dataset loader also using a variety of data augmentation techniques
        including random shift, scaling, crop, and flips. Data augmentation is slightly more complicated for 
        detection dataset since the bounding box annotations must be kept consistent through the transformations.
        Since the output of the dector network we train is an SxSx(B*5+C), we use an encoder to convert the 
        original bounding box coordinates into relative grid bounding box coordinates corresponding to the the
        expected output. We also use a decoder which allows us to convert the opposite direction into image 
        coordinate bounding boxes.
        '''
        train_dataset = VocDetectorDataset(root_img_dir=file_root_train,
                                           dataset_file=annotation_file_train,
                                           train=True,
                                           S=S)
        train_loader = DataLoader(train_dataset,
                                  batch_size=batch_size,
                                  shuffle=True,
                                  num_workers=4)
        print('Loaded %d train images' % len(train_dataset))

        test_dataset = VocDetectorDataset(root_img_dir=file_root_test,
                                          dataset_file=annotation_file_test,
                                          train=False,
                                          S=S)
        test_loader = DataLoader(test_dataset,
                                 batch_size=batch_size,
                                 shuffle=False,
                                 num_workers=4)
        print('Loaded %d test images' % len(test_dataset))

        loss_history = train(args, net, train_loader, test_loader)

        plt.plot(np.squeeze(loss_history))
        plt.ylabel('loss')
        plt.xlabel('iterations')
        plt.title("Training Loss")
        plt.savefig('training_loss.png')