def main(): torch.set_grad_enabled(False) torch.backends.cudnn.benchmark = True test_dir = "./input/deepfake-detection-challenge/test_videos" csv_path = "./input/deepfake-detection-challenge/sample_submission.csv" face_detector = FaceDetector() face_detector.load_checkpoint( "./input/dfdc-pretrained-2/RetinaFace-Resnet50-fixed.pth") loader = DFDCLoader(test_dir, face_detector, T.ToTensor()) model1 = xception(num_classes=2, pretrained=False) ckpt = torch.load("./input/dfdc-pretrained-2/xception-hg-2.pth", map_location=torch.device('cpu')) model1.load_state_dict(ckpt["state_dict"]) model1 = model1.cpu() model1.eval() model2 = WSDAN(num_classes=2, M=8, net="xception", pretrained=False).cpu() ckpt = torch.load("./input/dfdc-pretrained-2/ckpt_x.pth", map_location=torch.device('cpu')) model2.load_state_dict(ckpt["state_dict"]) model2.eval() model3 = WSDAN(num_classes=2, M=8, net="efficientnet", pretrained=False).cpu() ckpt = torch.load("./input/dfdc-pretrained-2/ckpt_e.pth", map_location=torch.device('cpu')) model3.load_state_dict(ckpt["state_dict"]) model3.eval() zhq_nm_avg = torch.Tensor([.4479, .3744, .3473]).view(1, 3, 1, 1).cpu() zhq_nm_std = torch.Tensor([.2537, .2502, .2424]).view(1, 3, 1, 1).cpu() for batch in loader: batch = batch.cpu() i1 = F.interpolate(batch, size=299, mode="bilinear") i1.sub_(0.5).mul_(2.0) o1 = model1(i1).softmax(-1)[:, 1].cpu().numpy() i2 = (batch - zhq_nm_avg) / zhq_nm_std o2, _, _ = model2(i2) o2 = o2.softmax(-1)[:, 1].cpu().numpy() i3 = F.interpolate(i2, size=300, mode="bilinear") o3, _, _ = model3(i3) o3 = o3.softmax(-1)[:, 1].cpu().numpy() out = 0.2 * o1 + 0.7 * o2 + 0.1 * o3 loader.feedback(out) with open(csv_path) as fin, open("submission.csv", "w") as fout: fout.write(next(fin)) for line in fin: fname = line.split(",", 1)[0] pred = loader.score[fname] print("%s,%.6f" % (fname, pred), file=fout)
def test_on_image_directory(path): net = SimpleConvNet() net.forward(Variable(torch.FloatTensor(1, 1, 28, 28))) net.load_state_dict(torch.load(open(MODEL_FILE, "rb"))) label_regex = re.compile("([0123456789]+)\\..*?") correct = 0 total = 0 transform = transforms.Compose( [transforms.ToTensor(), transforms.Normalize((0.1307, ), (0.3081, ))]) for file_path in glob.glob(os.path.join(path, "*.png")): cur_correct = int(label_regex.findall(os.path.basename(file_path))[0]) image = PIL.Image.open(file_path).convert("L") transformed_image = transform(image) transformed_image = Variable(transformed_image.view(1, 1, 28, 28)) cur_predicted = net.forward(transformed_image).data.max(1)[1][0][0] print os.path.basename(file_path) + " - " + str(cur_predicted) total += 1 if cur_correct == cur_predicted: correct += 1 print "Identified {} of {}, {:.2%}".format(correct, total, float(correct) / float(total))
def show_saved_net_accuracy(): train_dataset, test_dataset = load_normalized_datasets() test_dataset_loader = DataLoader(test_dataset, batch_size=TEST_BATCH_SIZE) net = SimpleConvNet() net.forward(Variable(torch.FloatTensor(1, 1, 28, 28))) net.load_state_dict(torch.load(open(MODEL_FILE, "rb"))) test_training_accuracy(net, test_dataset_loader, 0)
def single_run(corpus, index, title, overwrite, only_test=False): if cfg.BATCH_TYPE == "multi": collate_fn = multi_batchify else: collate_fn = lambda x: \ (x[0].X, x[0].C, x[0].POS, x[0].REL, x[0].DEP, x[0].Y) model_save_path = os.path.join(cfg.MODEL_SAVE_DIR, title + ".m") plot_save_path = os.path.join(cfg.PLOT_SAVE_DIR, title + ".png") if not only_test: the_model = build_model(corpus.train, corpus.dev, corpus.test, collate_fn, corpus.tag_idx, corpus.is_oov, corpus.embedding_matrix, model_save_path, plot_save_path) else: the_model = torch.load(model_save_path) print("Testing ...") test_loader = DataLoader(corpus.test, batch_size=cfg.BATCH_SIZE, num_workers=28, collate_fn=collate_fn) test_eval, only_ent_eval, pred_list, true_list = test( "test", test_loader, corpus.tag_idx, the_model) print("Writing Brat File ...") bratfile_full = Writer(cfg.CONF_DIR, os.path.join(cfg.BRAT_DIR, title), "full_out", corpus.tag_idx) bratfile_inc = Writer(cfg.CONF_DIR, os.path.join(cfg.BRAT_DIR, title), "inc_out", corpus.tag_idx) # convert idx to label test_eval.print_results() only_ent_eval.print_results() txt_res_file = os.path.join(cfg.TEXT_RESULT_DIR, title + ".txt") csv_res_file = os.path.join(cfg.CSV_RESULT_DIR, title + ".csv") test_eval.write_results(txt_res_file, title + "g={0}".format(cfg.LM_GAMMA), overwrite) only_ent_eval.write_results(txt_res_file, title + " g={0}".format(cfg.LM_GAMMA), overwrite) test_eval.write_csv_results(csv_res_file, title + "g={0}".format(cfg.LM_GAMMA), overwrite) test_loader = DataLoader(corpus.test, batch_size=cfg.BATCH_SIZE, num_workers=28, collate_fn=collate_fn) sents = [(sent, p) for SENT, X, C, POS, Y, P in test_loader for sent, p in zip(SENT, P)] bratfile_full.from_labels(sents, true_list, pred_list, doFull=True) bratfile_inc.from_labels(sents, true_list, pred_list, doFull=False) return test_eval
def load_pretrained_rnn(rnn_params, rnn_path): # load pretrained rnn rnn_net = Text_RNN(*rnn_params) rnn_net.load_state_dict(torch.load(rnn_path)) rnn_net.eval() # freeze weigths for param in rnn_net.parameters(): param.requires_grad = False return(rnn_net)
def merge_checkpoints(checkpoint_paths, output_path): if checkpoint_paths is None or len(checkpoint_paths) < 1: raise ValueError( 'Need to specify at least one checkpoint, %d provided.' % len(checkpoint_paths)) if len(checkpoint_paths) < 2: shutil.copyfile(checkpoint_paths[0], output_path) def __sum(source, destination): for key, value in source.items(): if isinstance(value, dict): node = destination.setdefault(key, {}) __sum(value, node) else: if isinstance(value, torch.FloatTensor): destination[key] = torch.add(destination[key], 1.0, value) return destination def __divide(source, denominator): for key, value in source.items(): if isinstance(value, dict): node = source.setdefault(key, {}) __divide(node, denominator) else: if isinstance(value, torch.FloatTensor): source[key] = torch.div(value, denominator) return source output_checkpoint = torch.load(checkpoint_paths[0]) for checkpoint_path in checkpoint_paths[1:]: checkpoint = torch.load(checkpoint_path) output_checkpoint = __sum(checkpoint, output_checkpoint) output_checkpoint = __divide(output_checkpoint, len(checkpoint_paths)) torch.save(output_checkpoint, output_path)
def loadModel(optional=True): model_exists = os.path.isfile(MODEL_PATH_BEST) if model_exists: checkpoint = torch.load(MODEL_PATH_BEST) net.load_state_dict(checkpoint['state_dict']) return "TRAINING AVG LOSS: {}\n" \ "TRAINING AVG DIFF: {}".format( checkpoint["epoch_avg_loss"], checkpoint["epoch_avg_diff"]) else: if optional: pass # model loading was optional, so nothing to do else: #shit, no model raise Exception("model couldn't be found:", MODEL_PATH_BEST)
def load_checkpoint(self, gpu_arg, checkpoint_file): """ Method to load a checkpoint file and reassign required variables. INPUT: 1. GPU user selection: <bool> 2. Checkpoint file: <string> RETURNS: 1. Selected model definition: <model object> 2. Gradient descent def: <optimizer object> """ # check if the GPU is currently available and set device flag appropriately _ = self.gpu_status(gpu_arg) # load the old model state checkpoint = torch.load(checkpoint_file, map_location=self.device_location) # load a pre-trained network model based on the command line argument, if supplied if checkpoint['arch'] == 'vgg13': model = models.vgg13(pretrained=True) elif checkpoint['arch'] == 'vgg16': model = models.vgg16(pretrained=True) elif checkpoint['arch'] == 'densenet121': model = models.densenet121(pretrained=True) else: model = models.vgg16(pretrained=True) print("Checkpoint model architecture not recoginized or supported. \n" "Using default VGG16 instead. Available architectures: VGG13, \n" "VGG16, and DenseNet121.\n") # freeze the networks parameters so no backprop occurs for param in model.parameters(): param.requires_grad = False # in case more training is desired, assign needed values self.arch = checkpoint['arch'] self.epochs = checkpoint['epochs'] self.training_loss = checkpoint['loss'] model.classifier = checkpoint['classifier'] #criterion = checkpoint['criterion'] # prepare to train the model using NLLLoss, Adam - for momentum & a learning rate of 0.001 optimizer = optim.Adam(model.classifier.parameters(), lr=checkpoint['learning_rate']) # ressign the state dictionaries and label indices model.load_state_dict(checkpoint['model_state_dict']) optimizer.load_state_dict(checkpoint['optimizer_state_dict']) model.class_to_idx = checkpoint['class_to_idx'] return model, optimizer
def restore_from(model: nn.Module, restore_from: str) -> nn.Module: '''从存档点恢复模型 Args: model(nn.Module): 模型 restore_from(str): 存档路径 Return: model(nn.Module): 恢复的模型 ''' assert os.path.exists(restore_from), '不存在的路径!{}'.format(restore_from) ckpt = torch.load(restore_from) model.load_state_dict(ckpt['model_state_dict']) return model, ckpt['epoch']
def build_model(train_dataset, dev_dataset, test_dataset, collate_fn, tag_idx, is_oov, embedding_matrix, model_save_path, plot_save_path): # init model model = BiLSTM_CRF(embedding_matrix, tag_idx) # Turn on cuda model = model.cuda() # verify model print(model) # remove paramters that have required_grad = False optimizer = optim.Adadelta(filter(lambda p: p.requires_grad, model.parameters()), lr=cfg.LEARNING_RATE) # optimizer = optim.SGD(model.parameters(), lr=cfg.LEARNING_RATE, momentum=0.9) optimizer.zero_grad() model.zero_grad() # init loss criteria best_res_val_0 = 0.0 best_epoch = 0 dev_eval_history = [] test_eval_history = [] for epoch in range(cfg.MAX_EPOCH): print('-' * 40) print("EPOCH = {0}".format(epoch)) print('-' * 40) random.seed(epoch) train_loader = DataLoader(train_dataset, batch_size=cfg.BATCH_SIZE, shuffle=cfg.RANDOM_TRAIN, num_workers=28, collate_fn=collate_fn) train_eval, model = train_a_epoch(name="train", data=train_loader, tag_idx=tag_idx, model=model, optimizer=optimizer) dev_loader = DataLoader(dev_dataset, batch_size=cfg.BATCH_SIZE, num_workers=28, collate_fn=collate_fn) test_loader = DataLoader(test_dataset, batch_size=cfg.BATCH_SIZE, num_workers=28, collate_fn=collate_fn) dev_eval, _, _ = test("dev", dev_loader, tag_idx, model) test_eval, _, _ = test("test", test_loader, tag_idx, model) dev_eval.verify_results() test_eval.verify_results() dev_eval_history.append(dev_eval.results[cfg.BEST_MODEL_SELECTOR[0]]) test_eval_history.append(test_eval.results['test_conll_f']) plot_curve(epoch, dev_eval_history, test_eval_history, "epochs", "fscore", "epoch learning curve", plot_save_path) pickle.dump((dev_eval_history, test_eval_history), open("plot_data.p", "wb")) # pick the best epoch if epoch < cfg.MIN_EPOCH_IMP or ( dev_eval.results[cfg.BEST_MODEL_SELECTOR[0]] > best_res_val_0): best_epoch = epoch best_res_val_0 = dev_eval.results[cfg.BEST_MODEL_SELECTOR[0]] torch.save(model, model_save_path) print("current dev micro_score: {0}".format( dev_eval.results[cfg.BEST_MODEL_SELECTOR[0]])) print("current dev macro_score: {0}".format( dev_eval.results[cfg.BEST_MODEL_SELECTOR[1]])) print("best dev micro_score: {0}".format(best_res_val_0)) print("best_epoch: {0}".format(str(best_epoch))) # if the best epoch model outperforms MA if 0 < cfg.MAX_EPOCH_IMP <= (epoch - best_epoch): break print("Loading Best Model ...") model = torch.load(model_save_path) return model
def detect(save_img=False): out, source, weights, half, view_img, save_txt, imgsz = \ opt.output, opt.source, opt.weights, opt.half, opt.view_img, opt.save_txt, opt.img_size webcam = source == '0' or source.startswith('rtsp') or source.startswith( 'http') or source.endswith('.txt') # Initialize #device = torch_utils.select_device(opt.device) device = select_device(opt.device) if os.path.exists(out): shutil.rmtree(out) # delete output folder os.makedirs(out) # make new output folder # Load model ''' The original method needs exactly the same folder structure and imports it was trained on this is a pickle limitation in the torch model. The alternative would be loading from github, which is not working Another issue is when training and detection have different devices (CPU/GPU) ''' # DB 20201018 = Original method #google_utils.attempt_download(weights) attempt_download(weights) model = torch.load(weights, map_location=device)['model'] # ORIGINAL # torch.save(torch.load(weights, map_location=device), weights) # update model if SourceChangeWarning # model.fuse() model.to(device).eval( ) # ATTENTION! UMCOMMENT THIS IF YOU UNCOMMENT model = torch.load(weights, map_location=device)['model'] # ORIGINAL #model.to(device).float().eval() # DB 20201018: detect on CPU using GPU trained model # DB 20201018: Load from github method #model = torch.hub.load('ultralytics/yolov5', 'yolov5s', pretrained=True).to(device).eval() # DB 20201016 MODEL IMPORT FIX #model = torch.hub.load('danfbento/SIB2', 'mod5_test_weight', pretrained=True).to(device).eval() # DB 20201016 MODEL IMPORT FIX # Second-stage classifier classify = False if classify: #modelc = torch_utils.load_classifier(name='resnet101', n=2) # initialize modelc = load_classifier(name='resnet101', n=2) # initialize modelc.load_state_dict( torch.load('weights/resnet101.pt', map_location=device)['model']) # load weights modelc.to(device).eval() # Half precision half = half and device.type != 'cpu' # half precision only supported on CUDA if half: model.half() # Set Dataloader vid_path, vid_writer = None, None if webcam: view_img = True torch.backends.cudnn.benchmark = True # set True to speed up constant image size inference dataset = LoadStreams(source, img_size=imgsz) else: save_img = True dataset = LoadImages(source, img_size=imgsz) # Get names and colors names = model.names if hasattr(model, 'names') else model.modules.names colors = [[random.randint(0, 255) for _ in range(3)] for _ in range(len(names))] # Run inference t0 = time.time() img = torch.zeros((1, 3, imgsz, imgsz), device=device) # init img _ = model(img.half() if half else img.float() ) if device.type != 'cpu' else None # run once for path, img, im0s, vid_cap in dataset: img = torch.from_numpy(img).to(device) img = img.half() if half else img.float() # uint8 to fp16/32 img /= 255.0 # 0 - 255 to 0.0 - 1.0 if img.ndimension() == 3: img = img.unsqueeze(0) # Inference #t1 = torch_utils.time_synchronized() t1 = time_synchronized() pred = model(img, augment=opt.augment)[0] #t2 = torch_utils.time_synchronized() t2 = time_synchronized() # to float if half: pred = pred.float() # Apply NMS pred = non_max_suppression(pred, opt.conf_thres, opt.iou_thres, fast=True, classes=opt.classes, agnostic=opt.agnostic_nms) # Apply Classifier if classify: pred = apply_classifier(pred, modelc, img, im0s) # Process detections for i, det in enumerate(pred): # detections per image if webcam: # batch_size >= 1 p, s, im0 = path[i], '%g: ' % i, im0s[i].copy() else: p, s, im0 = path, '', im0s save_path = str(Path(out) / Path(p).name) s += '%gx%g ' % img.shape[2:] # print string gn = torch.tensor(im0.shape)[[1, 0, 1, 0]] # normalization gain whwh if det is not None and len(det): # Rescale boxes from img_size to im0 size det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round() # Print results for c in det[:, -1].unique(): n = (det[:, -1] == c).sum() # detections per class s += '%g %ss, ' % (n, names[int(c)]) # add to string # Write results for *xyxy, conf, cls in det: if save_txt: # Write to file xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist() # normalized xywh with open(save_path[:save_path.rfind('.')] + '.txt', 'a') as file: file.write(('%g ' * 5 + '\n') % (cls, *xywh)) # label format if save_img or view_img: # Add bbox to image label = '%s %.2f' % (names[int(cls)], conf) plot_one_box(xyxy, im0, label=label, color=colors[int(cls)], line_thickness=3) # Print time (inference + NMS) print('%sDone. (%.3fs)' % (s, t2 - t1)) # Stream results if view_img: cv2.imshow(p, im0) if cv2.waitKey(1) == ord('q'): # q to quit raise StopIteration # Save results (image with detections) if save_img: if dataset.mode == 'images': cv2.imwrite(save_path, im0) else: if vid_path != save_path: # new video vid_path = save_path if isinstance(vid_writer, cv2.VideoWriter): vid_writer.release( ) # release previous video writer fps = vid_cap.get(cv2.CAP_PROP_FPS) w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH)) h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) vid_writer = cv2.VideoWriter( save_path, cv2.VideoWriter_fourcc(*opt.fourcc), fps, (w, h)) vid_writer.write(im0) if save_txt or save_img: print('Results saved to %s' % os.getcwd() + os.sep + out) if platform == 'darwin': # MacOS os.system('open ' + save_path) print('Done. (%.3fs)' % (time.time() - t0))
def build_model(train_dataset, dev_dataset, test_dataset, collate_fn, tag_idx, is_oov, embedding_matrix, model_save_path, plot_save_path): # init model model = MultiBatchSeqNet(embedding_matrix, batch_size=cfg.BATCH_SIZE, isCrossEnt=False, char_level=cfg.CHAR_LEVEL, pos_feat=cfg.POS_FEATURE, dep_rel_feat=cfg.DEP_LABEL_FEATURE, dep_word_feat=cfg.DEP_WORD_FEATURE) # Turn on cuda model = model.cuda() # verify model print(model) # remove paramters that have required_grad = False optimizer = optim.Adadelta(filter(lambda p: p.requires_grad, model.parameters()), lr=cfg.LEARNING_RATE) # optimizer = optim.SGD(model.parameters(), lr=cfg.LEARNING_RATE, momentum=0.9) optimizer.zero_grad() model.zero_grad() # init loss criteria seq_criterion = nn.NLLLoss(size_average=False) lm_f_criterion = nn.NLLLoss(size_average=False) lm_b_criterion = nn.NLLLoss(size_average=False) att_loss = nn.CosineEmbeddingLoss(margin=1) best_res_val_0 = 0.0 best_res_val_1 = 0.0 best_epoch = 0 dev_eval_history = [] test_eval_history = [] for epoch in range(cfg.MAX_EPOCH): print('-' * 40) print("EPOCH = {0}".format(epoch)) print('-' * 40) random.seed(epoch) train_loader = DataLoader(train_dataset, batch_size=cfg.BATCH_SIZE, shuffle=cfg.RANDOM_TRAIN, num_workers=28, collate_fn=collate_fn) train_eval, model = train_a_epoch(name="train", data=train_loader, tag_idx=tag_idx, is_oov=is_oov, model=model, optimizer=optimizer, seq_criterion=seq_criterion, lm_f_criterion=lm_f_criterion, lm_b_criterion=lm_b_criterion, att_loss=att_loss, gamma=cfg.LM_GAMMA) dev_loader = DataLoader(dev_dataset, batch_size=cfg.BATCH_SIZE, num_workers=28, collate_fn=collate_fn) test_loader = DataLoader(test_dataset, batch_size=cfg.BATCH_SIZE, num_workers=28, collate_fn=collate_fn) dev_eval, _, _, _ = test("dev", dev_loader, tag_idx, model) test_eval, _, _, _ = test("test", test_loader, tag_idx, model) dev_eval.verify_results() test_eval.verify_results() dev_eval_history.append(dev_eval.results[cfg.BEST_MODEL_SELECTOR[0]]) test_eval_history.append(test_eval.results['test_conll_f']) plot_curve(epoch, dev_eval_history, test_eval_history, "epochs", "fscore", "epoch learning curve", plot_save_path) pickle.dump((dev_eval_history, test_eval_history), open("plot_data.p", "wb")) # pick the best epoch if epoch < cfg.MIN_EPOCH_IMP or ( dev_eval.results[cfg.BEST_MODEL_SELECTOR[0]] > best_res_val_0): best_epoch = epoch best_res_val_0 = dev_eval.results[cfg.BEST_MODEL_SELECTOR[0]] torch.save(model, model_save_path) print("current dev micro_score: {0}".format( dev_eval.results[cfg.BEST_MODEL_SELECTOR[0]])) print("current dev macro_score: {0}".format( dev_eval.results[cfg.BEST_MODEL_SELECTOR[1]])) print("best dev micro_score: {0}".format(best_res_val_0)) print("best_epoch: {0}".format(str(best_epoch))) # if the best epoch model outperforms MA if 0 < cfg.MAX_EPOCH_IMP <= (epoch - best_epoch): break print("Loading Best Model ...") model = torch.load(model_save_path) return model
def load_checkpoint(self, path): self.net.load_state_dict(torch.load(path))
def main(): ########################################################################################################## #preparation part args = arg_parse() confidence = float(args.confidence) nms_thesh = float(args.nms_thresh) start = 0 CUDA = torch.cuda.is_available() num_classes = 80 model = Darknet(cfgfile) model.load_weights(weightsfile) model.net_info["height"] = args.reso inp_dim = int(model.net_info["height"]) assert inp_dim % 32 == 0 #assert后面语句为false时触发,中断程序 assert inp_dim > 32 if CUDA: model.cuda() model.eval() global confirm global person fps = 0.0 count = 0 frame = 0 person = [] confirm = False reconfirm = False count_yolo = 0 model_filename = 'model_data/mars-small128.pb' encoder = gdet.create_box_encoder(model_filename,batch_size=1) metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget) tracker = Tracker(metric) #record the video fourcc = cv2.VideoWriter_fourcc(*'XVID') #out = cv2.VideoWriter('output/testwrite_normal.avi',fourcc, 15.0, (640,480),True) cap = cv2.VideoCapture(0) detect_time = [] recogn_time = [] kalman_time = [] aux_time = [] while True: start = time.time() ret, color_image = cap.read() ''' frames = pipeline.wait_for_frames() color_frame = frames.get_color_frame() color_image = np.asanyarray(color_frame.get_data()) ''' if color_image is None: break img, orig_im, dim = prep_image(color_image, inp_dim) im_dim = torch.FloatTensor(dim).repeat(1,2) ########################################################################################################## #people detection part if CUDA: im_dim = im_dim.cuda() img = img.cuda() time_a = time.time() if count_yolo %3 == 0: #detect people every 3 frames output = model(Variable(img), CUDA) #适配后的图像放进yolo网络中,得到检测的结果 output = write_results(output, confidence, num_classes, nms = True, nms_conf = nms_thesh) if type(output) == int: fps = ( fps + (1./(time.time()-start)) ) / 2 print("fps= %f"%(fps)) cv2.imshow("frame", orig_im) key = cv2.waitKey(1) if key & 0xFF == ord('q'): break continue output[:,1:5] = torch.clamp(output[:,1:5], 0.0, float(inp_dim))/inp_dim #夹紧张量,限制在一个区间内 #im_dim = im_dim.repeat(output.size(0), 1) output[:,[1,3]] *= color_image.shape[1] output[:,[2,4]] *= color_image.shape[0] output = output.cpu().numpy() output = sellect_person(output) #把标签不是人的output去掉,减少计算量 output = np.array(output) output_update = output elif count_yolo %3 != 0: output = output_update count_yolo += 1 list(map(lambda x: write(x, orig_im), output)) #把结果加到原来的图像中 #output的[0,1:4]分别为框的左上和右下的点的位置 detect_time.append(time.time() - time_a) ########################################################################################################## time_a = time.time() #kalman filter part outputs_tlwh = to_tlwh(output) ##把output数据变成适合kalman更新的类型 features = encoder(orig_im,outputs_tlwh) detections = [Detection(output_tlwh, 1.0, feature) for output_tlwh, feature in zip(outputs_tlwh, features)] # Run non-maxima suppression. boxes = np.array([d.tlwh for d in detections]) scores = np.array([d.confidence for d in detections]) indices = preprocessing.non_max_suppression(boxes, nms_max_overlap, scores) detections = [detections[i] for i in indices] # Call the tracker tracker.predict() tracker.update(detections) for track in tracker.tracks: if not track.is_confirmed() or track.time_since_update > 1: continue box = track.to_tlbr() cv2.rectangle(orig_im, (int(box[0]), int(box[1])), (int(box[2]), int(box[3])),(255,255,255), 2) cv2.putText(orig_im, str(track.track_id),(int(box[0]), int(box[1])),0, 5e-3 * 200, (0,255,0),2) kalman_time.append(time.time() - time_a) ########################################################################################################## #face recognition part time_a = time.time() if confirm == False: saved_model = './ArcFace/model/068.pth' name_list = os.listdir('./users') path_list = [os.path.join('./users',i,'%s.txt'%(i)) for i in name_list] total_features = np.empty((128,),np.float32) for i in path_list: temp = np.loadtxt(i) total_features = np.vstack((total_features,temp)) total_features = total_features[1:] #threshold = 0.30896 #阈值并不合适,可能是因为训练集和测试集的差异所致!!! threshold = 0.5 model_facenet = mobileFaceNet() model_facenet.load_state_dict(torch.load(saved_model)['backbone_net_list']) model_facenet.eval() #use_cuda = torch.cuda.is_available() and True #device = torch.device("cuda" if use_cuda else "cpu") device = torch.device("cuda") # is_cuda_avilable trans = transforms.Compose([ transforms.Resize((112,112)), transforms.ToTensor(), transforms.Normalize([0.5,0.5,0.5],[0.5,0.5,0.5]) ]) model_facenet.to(device) img = Image.fromarray(color_image) bboxes, landmark = detect_faces(img) #首先检测脸 if len(bboxes) == 0: print('detect no people') else: for bbox in bboxes: loc_x_y = [bbox[2], bbox[1]] person_img = color_image[int(bbox[1]):int(bbox[3]), int(bbox[0]):int(bbox[2])].copy() #从图像中截取框 feature = np.squeeze(get_feature(person_img, model_facenet, trans, device)) #框里的图像计算feature cos_distance = cosin_metric(total_features, feature) index = np.argmax(cos_distance) if cos_distance[index] <= threshold: continue person = name_list[index] #在这里加框加文字 orig_im = draw_ch_zn(orig_im,person,font,loc_x_y) #加名字 cv2.rectangle(orig_im,(int(bbox[0]),int(bbox[1])),(int(bbox[2]),int(bbox[3])),(0,0,255)) #加box #cv2.imshow("frame", orig_im) ########################################################################################################## #confirmpart print('confirmation rate: {} %'.format(count*10)) cv2.putText(orig_im, 'confirmation rate: {} %'.format(count*10), (10,30),cv2.FONT_HERSHEY_PLAIN, 2, [0,255,0], 2) if len(bboxes)!=0 and len(output)!=0: if bboxes[0,0]>output[0,1] and bboxes[0,1]>output[0,2] and bboxes[0,2]<output[0,3] and bboxes[0,3]<output[0,4] and person: count+=1 frame+=1 if count>=10 and frame<=30: confirm = True print('confirm the face is belong to that people') elif frame >= 30: print('fail confirm, and start again') reconfirm = True count = 0 frame = 0 if reconfirm == True: cv2.putText(orig_im, 'fail confirm, and start again', (10,60),cv2.FONT_HERSHEY_PLAIN, 2, [0,255,0], 2) ########################################################################################################## recogn_time.append(time.time() - time_a) time_a = time.time() #show the final output result if not confirm: cv2.putText(orig_im, 'still not confirm', (output[0,1].astype(np.int32)+100,output[0,2].astype(np.int32)+20), cv2.FONT_HERSHEY_PLAIN, 2, [0,0,255], 2) #把识别的名字加上去 if confirm: for track in tracker.tracks: bbox = track.to_tlbr() if track.track_id == 1: cv2.putText(orig_im, person, (int(bbox[0])+100,int(bbox[1])+20), cv2.FONT_HERSHEY_PLAIN, 2, [0,255,0], 2) #rate.sleep() cv2.imshow("frame", orig_im) #out.write(orig_im) key = cv2.waitKey(1) if key & 0xFF == ord('q'): break aux_time.append(time.time()-time_a) fps = ( fps + (1./(time.time()-start)) ) / 2 print("fps= %f"%(fps)) #calculate how long each part takes avg_detect_time = np.mean(detect_time) avg_recogn_time = np.mean(recogn_time) avg_kalman_time = np.mean(kalman_time) avg_aux_time = np.mean(aux_time) print("avg detect: {}".format(avg_detect_time)) print("avg recogn: {}".format(avg_recogn_time)) print("avg kalman: {}".format(avg_kalman_time)) print("avg aux: {}".format(avg_aux_time)) print("avg fps: {}".format(1/(avg_detect_time + avg_recogn_time + avg_kalman_time + avg_aux_time)))
def __init__(self, text_params, audio_params, fusion_params, paths, p_drop=0.15, post_tfn_subnet=128): super(Hierarchy_Attn, self).__init__() # define text & audio recurrent subnet self.text_rnn = Text_Encoder(*text_params) self.audio_rnn = Audio_Encoder(*audio_params) self.text_rnn.load_state_dict(torch.load(paths["text"])) self.text_rnn.eval() for p in self.text_rnn.parameters(): p.requires_grad = False self.audio_rnn.load_state_dict(torch.load(paths["audio"])) self.audio_rnn.eval() for p in self.audio_rnn.parameters(): p.requires_grad = False # define fusion RNN net self.fusion_rnn = Text_RNN(*fusion_params) # define cat-fusion attention level text_dim_tuple, audio_dim_tuple = \ self.get_rnn_tuples(text_params[1], audio_params[1]) self.fusion_net = Attn_Fusion(text_dim_tuple, audio_dim_tuple) # define mul-fusion layer self.mul_fusion = Mul_Fusion(text_dim_tuple, audio_dim_tuple) # get text hidden size and audio hidden size H = text_dim_tuple[2] D = audio_dim_tuple[2] # fused reps dimensionality reduction ''' self.fusion_transform = nn.Sequential(nn.Linear(2*H+D, H), nn.Dropout(p_drop), nn.ReLU(), nn.Linear(H, H//2), nn.Dropout(p_drop), nn.ReLU()) ''' # deep representations self.deep_audio = nn.Sequential(nn.Linear(D, D), nn.Dropout(p_drop), nn.ReLU(), nn.Linear(D, D), nn.Dropout(p_drop), nn.ReLU()) self.deep_text = nn.Sequential(nn.Linear(H, H), nn.ReLU(), nn.Linear(H, H), nn.ReLU()) W = fusion_params[1] * 2 _W = W + H + D self.deep_fused = nn.Sequential(nn.Linear(_W, _W), nn.Dropout(p_drop), nn.ReLU(), nn.Linear(_W, W), nn.Dropout(p_drop), nn.ReLU(), nn.Linear(W, W), nn.Dropout(p_drop), nn.ReLU()) ################################ ## deep feature + reps networks ############################### ''' self.deep_audio_2 = nn.Sequential(nn.Linear(2*D,D), nn.Dropout(p_drop), nn.ReLU()) self.deep_text_2 = nn.Sequential(nn.Linear(2*H,H), nn.ReLU()) self.deep_fusion_2 = nn.Sequential(nn.Linear(2*D,D), nn.Dropout(p_drop), nn.ReLU()) ''' ################################### ### final fusion layer ################################### self.deep_mul = nn.Sequential(nn.Linear(D, D), nn.Dropout(p_drop), nn.ReLU(), nn.Linear(D, D), nn.Dropout(p_drop), nn.ReLU()) # define dense layers cat_size = H + D + W self.dense = nn.Sequential(nn.Linear(cat_size, cat_size), nn.Dropout(p_drop), nn.ReLU(), nn.Linear(cat_size, cat_size // 2), nn.Dropout(p_drop), nn.ReLU(), nn.Linear(cat_size // 2, cat_size // 2), nn.Dropout(p_drop), nn.ReLU(), nn.Linear(cat_size // 2, H), nn.Dropout(p_drop), nn.ReLU(), nn.Linear(H, D), nn.Dropout(p_drop), nn.ReLU()) self.softmax = nn.Softmax(dim=-1) # map according to task self.fusion_mapping = nn.Linear(D, 1) self.audio_mapping = nn.Linear(D, 1) self.text_mapping = nn.Linear(H, 1)