def __init__(self, fps=12, width=176, height=100, use_gpu=True, model_data="model_best.pth.tar"): Thread.__init__(self) self.isRunning = True self._capture = cv.VideoCapture(0) self._target_frame_size = (width, height) self._sleeping_time = 1/fps self._event_queue = queue.Queue() self._frame_queue = queue.Queue(maxsize=18) self._predict_queue = queue.Queue(maxsize=3) self._model = ConvColumn(8) if use_gpu: self._model.cuda() if os.path.isfile(model_data): last_checkpoint = torch.load(model_data, map_location='cpu') new_state_dict = OrderedDict() for k, v in last_checkpoint.items(): if k == 'state_dict': del last_checkpoint['state_dict'] for j, val in v.items(): name = j[7:] # we need name without 'module.' prefix new_state_dict[name] = val last_checkpoint['state_dict'] = new_state_dict break self._model.load_state_dict(last_checkpoint['state_dict']) else: raise FileNotFoundError(errno.ENOENT, os.strerror(errno.ENOENT), model_data) self._transform = Compose([ ToPILImage(), CenterCrop(84), ToTensor(), Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ]) self._device = torch.device("cuda" if use_gpu and torch.cuda.is_available() else "cpu") self._gestures = {0: GestureDetectorThread.SWIPE_LEFT, 1: GestureDetectorThread.SWIPE_RIGHT, 2: GestureDetectorThread.SWIPE_DOWN, 3: GestureDetectorThread.SWIPE_UP, 4: GestureDetectorThread.THUMB_OK, 5: GestureDetectorThread.THUMB_NOT, 6: GestureDetectorThread.NO_GESTURE, 7: GestureDetectorThread.OTHER_GESTURE, GestureDetectorThread.SWIPE_LEFT: 0, GestureDetectorThread.SWIPE_RIGHT: 1, GestureDetectorThread.SWIPE_DOWN: 2, GestureDetectorThread.SWIPE_UP: 3, GestureDetectorThread.THUMB_OK: 4, GestureDetectorThread.THUMB_NOT: 5, GestureDetectorThread.NO_GESTURE: 6, GestureDetectorThread.OTHER_GESTURE: 7} self.TRESHOLD = 0.7
def test(): # adds a handler for Ctrl+C def signal_handler(signal, frame): """ Remove the output dir, if you exit with Ctrl+C and if there are less then 3 files. It prevents the noise of experimental runs. """ num_files = len(glob.glob(save_dir + "/*")) if num_files < 1: shutil.rmtree(save_dir) print('You pressed Ctrl+C!') sys.exit(0) # assign Ctrl+C signal handler signal.signal(signal.SIGINT, signal_handler) # create model model = ConvColumn(config['num_classes']) # multi GPU setting model = torch.nn.DataParallel(model, device_ids=gpus).cuda() #Loading the model print("=> loading checkpoint '{}'".format(args.resume)) checkpoint = torch.load(config['checkpoint']) args.start_epoch = checkpoint['epoch'] best_prec1 = checkpoint['best_prec1'] model.load_state_dict(checkpoint['state_dict']) print("=> loaded checkpoint '{}' (epoch {})".format( config['checkpoint'], checkpoint['epoch'])) transform = Compose([ ToPILImage(), CenterCrop(84), ToTensor(), Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ]) # define loss function (criterion) and pptimizer criterion = nn.CrossEntropyLoss().cuda() #Load the Data val_data = VideoFolder( root=config['val_data_folder'], csv_file_input=config['val_data_csv'], csv_file_labels=config['labels_csv'], clip_size=config['clip_size'], nclips=1, step_size=config['step_size'], is_val=True, transform=transform, ) val_loader = torch.utils.data.DataLoader(val_data, batch_size=1, shuffle=False, num_workers=config['num_workers'], pin_memory=True, drop_last=False) validate(val_loader, model, criterion)
print('Using %s for inference' % ('GPU' if args.use_gpu else 'CPU')) # initialise some variables verbose = args.verbose device = torch.device("cuda" if args.use_gpu and torch.cuda.is_available() else "cpu") transform = Compose([ ToPILImage(), CenterCrop(84), ToTensor(), Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ]) model = ConvColumn(num_classes) # read in configuration file for mapping of gestures to keyboard keys mapping = configparser.ConfigParser() action = {} if os.path.isfile(args.mapping): mapping.read(args.mapping) for m in mapping['MAPPING']: val = mapping['MAPPING'][m].split(',') action[m] = {'fn': val[0], 'keys': val[1:]} # fn: hotkey, press, typewrite else: # print('[ERROR] Mapping file for gestures to keyboard keys is not found at ' + args.mapping) raise FileNotFoundError( errno.ENOENT, os.strerror(errno.ENOENT), args.mapping)
# Set up some storage variables t = time() seq_len = 18 imgs = [] pred = 27 # Load model print('Loading model...') state_dict = torch.load('model_best.pth.tar', map_location='cpu')['state_dict'] state_dict_rename = OrderedDict() for k, v in state_dict.items(): name = k[7:] # remove 'module.' state_dict_rename[name] = v model = ConvColumn(27,(3, 3, 3)) model.load_state_dict(state_dict_rename) transform = Compose([CenterCrop(84), ToTensor()]) print('Starting prediction') # Run program till q is pressed while(True): # Capture frame-by-frame ret, frame = cam.read() #print(np.shape(frame)) # Set up input for model resized_frame = cv2.resize(frame, (149, 84))
device = torch.device("cpu") dataset_object = JpegDataset(config['train_data_csv'], config['labels_csv'], config['train_data_folder']) label_dict = dataset_object.classes_dict transform = Compose([ CenterCrop(84), ToTensor(), Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ]) #init model # create model model = ConvColumn(config['num_classes']) # multi GPU setting model = torch.nn.DataParallel(model).to(device) checkpoint = torch.load(config['checkpoint'], map_location='cpu') model.load_state_dict(checkpoint['state_dict']) model.eval() def model_caculate(input): # compute the model input = input.to(device) out = model(input) label_number = np.argmax(out.detach().cpu().numpy()) label = label_dict[label_number] print(label_number) print(label) return label, label_number
def main(): global args, best_prec1 # set run output folder model_name = config["model_name"] output_dir = config["output_dir"] print("=> Output folder for this run -- {}".format(model_name)) save_dir = os.path.join(output_dir, model_name) if not os.path.exists(save_dir): os.makedirs(save_dir) os.makedirs(os.path.join(save_dir, 'plots')) # adds a handler for Ctrl+C def signal_handler(signal, frame): """ Remove the output dir, if you exit with Ctrl+C and if there are less then 3 files. It prevents the noise of experimental runs. """ num_files = len(glob.glob(save_dir + "/*")) if num_files < 1: shutil.rmtree(save_dir) print('You pressed Ctrl+C!') sys.exit(0) # assign Ctrl+C signal handler signal.signal(signal.SIGINT, signal_handler) # create model model = ConvColumn(config['num_classes']) # multi GPU setting if args.use_gpu: model = torch.nn.DataParallel(model, device_ids=gpus).to(device) # optionally resume from a checkpoint if args.resume: if os.path.isfile(config['checkpoint']): print("=> loading checkpoint '{}'".format(args.resume)) checkpoint = torch.load(config['checkpoint']) args.start_epoch = checkpoint['epoch'] best_prec1 = checkpoint['best_prec1'] model.load_state_dict(checkpoint['state_dict']) print("=> loaded checkpoint '{}' (epoch {})" .format(config['checkpoint'], checkpoint['epoch'])) else: print("=> no checkpoint found at '{}'".format( config['checkpoint'])) transform = Compose([ CenterCrop(84), ToTensor(), Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ]) train_data = VideoFolder(root=config['train_data_folder'], csv_file_input=config['train_data_csv'], csv_file_labels=config['labels_csv'], clip_size=config['clip_size'], nclips=1, step_size=config['step_size'], is_val=False, transform=transform, ) print(" > Using {} processes for data loader.".format( config["num_workers"])) train_loader = torch.utils.data.DataLoader( train_data, batch_size=config['batch_size'], shuffle=True, num_workers=config['num_workers'], pin_memory=True, drop_last=True) val_data = VideoFolder(root=config['val_data_folder'], csv_file_input=config['val_data_csv'], csv_file_labels=config['labels_csv'], clip_size=config['clip_size'], nclips=1, step_size=config['step_size'], is_val=True, transform=transform, ) val_loader = torch.utils.data.DataLoader( val_data, batch_size=config['batch_size'], shuffle=False, num_workers=config['num_workers'], pin_memory=True, drop_last=False) assert len(train_data.classes) == config["num_classes"] # define loss function (criterion) and optimizer criterion = nn.CrossEntropyLoss().to(device) # define optimizer lr = config["lr"] last_lr = config["last_lr"] momentum = config['momentum'] weight_decay = config['weight_decay'] optimizer = torch.optim.SGD(model.parameters(), lr, momentum=momentum, weight_decay=weight_decay) if args.eval_only: validate(val_loader, model, criterion, train_data.classes_dict) return # set callbacks plotter = PlotLearning(os.path.join( save_dir, "plots"), config["num_classes"]) lr_decayer = MonitorLRDecay(0.6, 3) val_loss = 9999999 # set end condition by num epochs num_epochs = int(config["num_epochs"]) if num_epochs == -1: num_epochs = 999999 print(" > Training is getting started...") print(" > Training takes {} epochs.".format(num_epochs)) start_epoch = args.start_epoch if args.resume else 0 for epoch in range(start_epoch, num_epochs): lr = lr_decayer(val_loss, lr) print(" > Current LR : {}".format(lr)) if lr < last_lr and last_lr > 0: print(" > Training is done by reaching the last learning rate {}". format(last_lr)) sys.exit(1) # train for one epoch train_loss, train_top1, train_top5 = train( train_loader, model, criterion, optimizer, epoch) # evaluate on validation set val_loss, val_top1, val_top5 = validate(val_loader, model, criterion) # plot learning plotter_dict = {} plotter_dict['loss'] = train_loss plotter_dict['val_loss'] = val_loss plotter_dict['acc'] = train_top1 plotter_dict['val_acc'] = val_top1 plotter_dict['learning_rate'] = lr plotter.plot(plotter_dict) # remember best prec@1 and save checkpoint is_best = val_top1 > best_prec1 best_prec1 = max(val_top1, best_prec1) save_checkpoint({ 'epoch': epoch + 1, 'arch': "Conv4Col", 'state_dict': model.state_dict(), 'best_prec1': best_prec1, }, is_best, config)
print('Using %s for inference' % ('GPU' if args.use_gpu else 'CPU')) # initialise some variables verbose = args.verbose device = torch.device( "cuda" if args.use_gpu and torch.cuda.is_available() else "cpu") transform = Compose([ ToPILImage(), CenterCrop(84), ToTensor(), Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ]) model = ConvColumn(num_classes) # read in configuration file for mapping of gestures to keyboard keys mapping = configparser.ConfigParser() action = {} if os.path.isfile(args.mapping): mapping.read(args.mapping) for m in mapping['MAPPING']: val = mapping['MAPPING'][m].split(',') action[m] = { 'fn': val[0], 'keys': val[1:] } # fn: hotkey, press, typewrite else:
def FrameCapture(path): str2bool = lambda x: (str(x).lower() == 'true') parser = argparse.ArgumentParser( description='PyTorch Jester Training using JPEG') parser.add_argument('--use_gpu', default=False, type=str2bool, help="flag to use gpu or not.") parser.add_argument('--config', '-c', help='json config file path') parser.add_argument('--resume', '-r', default=False, type=str2bool, help="resume training from given checkpoint.") parser.add_argument('--gpus', '-g', help="gpu ids for use.") args = parser.parse_args() device = torch.device( "cuda" if args.use_gpu and torch.cuda.is_available() else "cpu") if args.use_gpu: gpus = [int(i) for i in args.gpus.split(',')] print("=> active GPUs: {}".format(args.gpus)) with open("configs/config.json") as data_file: config = json.load(data_file) transform = Compose([ CenterCrop(84), ToTensor(), Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ]) model = ConvColumn(8) if args.use_gpu: model = torch.nn.DataParallel(model, device_ids=gpus).to(device) if 1: if os.path.isfile(config['checkpoint']): print("=> loading checkpoint '{}'".format(args.resume)) checkpoint = torch.load(config['checkpoint'], map_location='cpu') new_state_dict = OrderedDict() for k, v in checkpoint.items(): if (k == 'state_dict'): del checkpoint['state_dict'] for j, val in v.items(): name = j[7:] # remove `module.` new_state_dict[name] = val checkpoint['state_dict'] = new_state_dict break args.start_epoch = checkpoint['epoch'] best_prec1 = checkpoint['best_prec1'] model.load_state_dict(checkpoint['state_dict']) print("=> loaded checkpoint '{}' (epoch {})".format( config['checkpoint'], checkpoint['epoch'])) else: print("=> no checkpoint found at '{}'".format( config['checkpoint'])) img_paths = get_frame_names('test_img') imgs = [] for img_path in img_paths: img = Image.open(img_path).convert('RGB') img = transform(img) imgs.append(torch.unsqueeze(img, 0)) # format data to torch data = torch.cat(imgs) data = data.permute(1, 0, 2, 3) data = data[None, :, :, :, :] target = [2] target = torch.tensor(target) data = data.to(device) model.eval() output = model(data) print("\nOutput values for all the 8 classes: ") print(output.detach()) gesture_label_int = accuracy(output.detach(), target.detach().cpu(), topk=(1, 5)) return gesture_label_int
class GestureDetectorThread(Thread): SWIPE_LEFT = 'Swiping Left' SWIPE_RIGHT = 'Swiping Right' SWIPE_UP = 'Swiping Up' SWIPE_DOWN = 'Swiping Down' THUMB_OK = 'Thumb Up' THUMB_NOT = 'Thumb Down' NO_GESTURE = 'No gesture' OTHER_GESTURE = 'Doing other things' def __init__(self, fps=12, width=176, height=100, use_gpu=True, model_data="model_best.pth.tar"): Thread.__init__(self) self.isRunning = True self._capture = cv.VideoCapture(0) self._target_frame_size = (width, height) self._sleeping_time = 1/fps self._event_queue = queue.Queue() self._frame_queue = queue.Queue(maxsize=18) self._predict_queue = queue.Queue(maxsize=3) self._model = ConvColumn(8) if use_gpu: self._model.cuda() if os.path.isfile(model_data): last_checkpoint = torch.load(model_data, map_location='cpu') new_state_dict = OrderedDict() for k, v in last_checkpoint.items(): if k == 'state_dict': del last_checkpoint['state_dict'] for j, val in v.items(): name = j[7:] # we need name without 'module.' prefix new_state_dict[name] = val last_checkpoint['state_dict'] = new_state_dict break self._model.load_state_dict(last_checkpoint['state_dict']) else: raise FileNotFoundError(errno.ENOENT, os.strerror(errno.ENOENT), model_data) self._transform = Compose([ ToPILImage(), CenterCrop(84), ToTensor(), Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ]) self._device = torch.device("cuda" if use_gpu and torch.cuda.is_available() else "cpu") self._gestures = {0: GestureDetectorThread.SWIPE_LEFT, 1: GestureDetectorThread.SWIPE_RIGHT, 2: GestureDetectorThread.SWIPE_DOWN, 3: GestureDetectorThread.SWIPE_UP, 4: GestureDetectorThread.THUMB_OK, 5: GestureDetectorThread.THUMB_NOT, 6: GestureDetectorThread.NO_GESTURE, 7: GestureDetectorThread.OTHER_GESTURE, GestureDetectorThread.SWIPE_LEFT: 0, GestureDetectorThread.SWIPE_RIGHT: 1, GestureDetectorThread.SWIPE_DOWN: 2, GestureDetectorThread.SWIPE_UP: 3, GestureDetectorThread.THUMB_OK: 4, GestureDetectorThread.THUMB_NOT: 5, GestureDetectorThread.NO_GESTURE: 6, GestureDetectorThread.OTHER_GESTURE: 7} self.TRESHOLD = 0.7 def run(self): while self.isRunning: start_time = time.time() _, frame = self._capture.read() frame = cv.resize(frame, self._target_frame_size) try: self._frame_queue.put_nowait(frame) except queue.Full: _ = self._frame_queue.get() self._frame_queue.put_nowait(frame) frames = [torch.unsqueeze(self._transform(img), 0) for img in list(self._frame_queue.queue)] data = torch.cat(frames) data = data.permute(1, 0, 2, 3) data = data[None, :, :, :, :] data = data.to(self._device) self._model.eval() nn_output = self._model(data) nn_output = torch.nn.functional.softmax(nn_output, dim=1) pred, class_index = nn_output.max(1) pred = pred.item() class_index = class_index.item() g = self._gestures[class_index] if pred > self.TRESHOLD and g != GestureDetectorThread.OTHER_GESTURE and g != GestureDetectorThread.NO_GESTURE: try: self._predict_queue.put_nowait((pred, g)) except queue.Full: self._predict_queue.get() self._predict_queue.put_nowait((pred, g)) predictions = sorted(list(self._predict_queue.queue)) print(predictions) g = predictions[-1][1] self._event_queue.put(g) # Clear queues while not self._frame_queue.empty(): self._frame_queue.get_nowait() while not self._predict_queue.empty(): self._predict_queue.get_nowait() else: while not self._predict_queue.empty(): self._predict_queue.get_nowait() time_diff = time.time() - start_time try: time.sleep(self._sleeping_time - time_diff) except: pass self._capture.release() def get_event(self) -> int: try: return self._event_queue.get(block=False) except: return None def stop_detector(self): self.isRunning = False