from dataset import yoloDataset from visualize import Visualizer import numpy as np use_gpu = torch.cuda.is_available() file_root = '/home/xzh/data/VOCdevkit/VOC2012/allimgs/' learning_rate = 0.001 num_epochs = 50 batch_size = 24 use_resnet = True if use_resnet: net = resnet50() else: net = vgg16_bn() # net.classifier = nn.Sequential( # nn.Linear(512 * 7 * 7, 4096), # nn.ReLU(True), # nn.Dropout(), # #nn.Linear(4096, 4096), # #nn.ReLU(True), # #nn.Dropout(), # nn.Linear(4096, 1470), # ) #net = resnet18(pretrained=True) #net.fc = nn.Linear(512,1470) # initial Linear # for m in net.modules(): # if isinstance(m, nn.Linear): # m.weight.data.normal_(0, 0.01)
cls_index = cls_indexs[i] cls_index = int(cls_index) # convert LongTensor to int prob = probs[i] prob = float(prob) result.append([(x1, y1), (x2, y2), VOC_CLASSES[cls_index], image_name, prob]) return result if __name__ == '__main__': # model = resnet50() if torch.cuda.is_available(): device = torch.device("cuda") else: device = torch.device("cpu") model = vgg16_bn() print('load model...') model.load_state_dict(torch.load('best.pth')) model.eval() model.to(device) image_name = 'person.jpg' image = cv2.imread(image_name) print('predicting...') result = predict_gpu(model, image_name) for left_up, right_bottom, class_name, _, prob in result: color = Color[VOC_CLASSES.index(class_name)] cv2.rectangle(image, left_up, right_bottom, color, 2) label = class_name + str(round(prob, 2)) text_size, baseline = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.4, 1) p1 = (left_up[0], left_up[1] - text_size[1])