def initNets(self): """ Init network in current process :return: """ # Force network to compile output in the beginning if isinstance(self.poseNet, PoseRegNetParams): self.poseNet = PoseRegNet(numpy.random.RandomState(23455), cfgParams=self.poseNet) self.poseNet.computeOutput( numpy.zeros(self.poseNet.cfgParams.inputDim, dtype='float32')) elif isinstance(self.poseNet, ResNetParams): self.poseNet = ResNet(numpy.random.RandomState(23455), cfgParams=self.poseNet) self.poseNet.computeOutput( numpy.zeros(self.poseNet.cfgParams.inputDim, dtype='float32')) else: raise RuntimeError("Unknown pose estimation method!") if self.comrefNet is not None: if isinstance(self.comrefNet, ScaleNetParams): self.comrefNet = ScaleNet(numpy.random.RandomState(23455), cfgParams=self.comrefNet) self.comrefNet.computeOutput([ numpy.zeros(sz, dtype='float32') for sz in self.comrefNet.cfgParams.inputDim ]) else: raise RuntimeError("Unknown refine method!")
def _init_net(self): if self.args.net == 'ConvNet': self.network = ConvNet(self.args) elif self.args.net == 'ResNet': self.network = ResNet(self.args) self.args.hidden_channels = 640 self.network.train() self.network.cuda() return None
def load_net(net_name): if net_name == "simplenet": net = SimpleNet() elif net_name == "resnet": net = ResNet() elif net_name == "resnet152": net = ResNet152() else: net = None return net
def __init__(self, out_channels, num_anchors, num_classes): super(RetinaNet, self).__init__() self.resnet = ResNet(50) self.fpn = FPN() self.subnet = SubNet(out_channels=out_channels, num_anchors=num_anchors, num_classes=num_classes)
def retinanet(inputs, out_channels, num_classes, num_anchors): resnet = ResNet(50) C2, C3, C4, C5 = resnet(inputs) P5 = keras.layers.Conv2D(256, kernel_size=1, strides=1, padding='same', name='C5_reduced')(C5) # 38x38x256 P5_upsampled = layers.UpSampling2D(name='P5_upsampled')(P5) P5 = keras.layers.Conv2D(256, kernel_size=3, strides=1, padding='same', name='P5')(P5) # 38x38x256 P4 = keras.layers.Conv2D(256, kernel_size=1, strides=1, padding='same', name='C4_reduced')(C4) P4 = keras.layers.Add(name='P4_merged')([P5_upsampled, P4]) P4_upsampled = layers.UpSampling2D(name='P4_upsampled')(P4) P4 = keras.layers.Conv2D(256, kernel_size=3, strides=1, padding='same', name='P4')(P4) # 75x75x256 P3 = keras.layers.Conv2D(256, kernel_size=1, strides=1, padding='same', name='C3_reduced')(C3) P3 = keras.layers.Add(name='P3_merged')([P4_upsampled, P3]) P3 = keras.layers.Conv2D(256, kernel_size=3, strides=1, padding='same', name='P3')(P3) P6 = keras.layers.Conv2D(256, kernel_size=3, strides=2, padding='same', name='P6')(C5) P7 = keras.layers.Activation('relu', name='C6_relu')(P6) P7 = keras.layers.Conv2D(256, kernel_size=3, strides=2, padding='same', name='P7')(P7) features = [P3, P4, P5, P6, P7] class_results = [] box_results = [] classi_model = class_subnet(out_channels=out_channels, num_anchors=num_anchors, num_classes=num_classes) box_model = box_subnet(out_channels=out_channels, num_anchors=num_anchors) for feature in features: class_results.append(classi_model(feature)) box_results.append(box_model(feature)) # concatenate -> (batch, 52*52*9, 4), (batch, 52*52*9, num_classes) class_results = layers.Concatenate(axis=1)(class_results) box_results = layers.Concatenate(axis=1)(box_results) results = [box_results, class_results] return keras.Model(inputs, results)
elif model_name == model_zoo[ 4]: # VGG16 with ImageNet pretrained weights model = Vgg16_imageNet.build(input_shape, CLASS_NUM, dense_layers=2, hidden_units=512, dropout_rate=0.5, weights='imagenet') elif model_name == model_zoo[5]: # GoogLeNet V1 model = GoogLeNetV1.build(input_shape, CLASS_NUM, dense_layers=1, hidden_units=512, subsample_initial_block=False) elif model_name == model_zoo[6]: # ResNet V1 model = ResNet.build(input_shape, CLASS_NUM) elif model_name == model_zoo[7]: # DenseNet V1 model = DenseNet.build(input_shape, CLASS_NUM, dense_layers=2, hidden_units=512, dropout_rate=0.5, subsample_initial_block=False) else: # default lenet model = LeNet.build(input_shape, CLASS_NUM) # Start training process model_name = '{}_{}_{}_{}_{}'.format(model_name, time_stamp, op, INIT_LR, EPOCHS) train(aug, trainX, trainY, testX, testY, BS, EPOCHS, model, model_name, optimizer)
input_shape = (img_rows, img_cols, 1) chanDim = -1 x_train = x_train.astype('float32') x_test = x_test.astype('float32') x_train /= 255 x_test /= 255 print('x_train shape:', x_train.shape) print(x_train.shape[0], 'train samples') print(x_test.shape[0], 'test samples') # convert class vectors to binary class matrices y_train = keras.utils.to_categorical(y_train, num_classes) y_test = keras.utils.to_categorical(y_test, num_classes) model = ResNet.build(chanDim, input_shape, num_classes) # keras.utils.plot_model(model, "resnet.png") # model.compile(loss='mse', optimizer=SGD(lr=0.1), metrics=['accuracy']) # model.compile(loss='categorical_crossentropy', optimizer=SGD(lr=0.1), metrics=['accuracy']) model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy']) # start train----------------------------------- # model.fit(x_train, y_train, batch_size=1000, epochs=20) model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, verbose=1, validation_data=(x_test, y_test))
class GBML: ''' Gradient-Based Meta-Learning ''' def __init__(self, args): self.args = args self.batch_size = self.args.batch_size return None def _init_net(self): if self.args.net == 'ConvNet': self.network = ConvNet(self.args) elif self.args.net == 'ResNet': self.network = ResNet(self.args) self.args.hidden_channels = 640 elif self.args.net == 'functional_net': self.network = functional_net(self.args) self.network.train() self.network.cuda() return None def _init_opt(self): if self.args.inner_opt == 'SGD': self.inner_optimizer = torch.optim.SGD(self.network.parameters(), lr=self.args.inner_lr) elif self.args.inner_opt == 'Adam': self.inner_optimizer = torch.optim.Adam(self.network.parameters(), lr=self.args.inner_lr, betas=(0.0, 0.9)) else: raise ValueError('Not supported inner optimizer.') if self.args.outer_opt == 'SGD': self.outer_optimizer = torch.optim.SGD(self.network.parameters(), lr=self.args.outer_lr, nesterov=True, momentum=0.9) elif self.args.outer_opt == 'Adam': self.outer_optimizer = torch.optim.Adam(self.network.parameters(), lr=self.args.outer_lr) else: raise ValueError('Not supported outer optimizer.') # self.lr_scheduler = torch.optim.lr_scheduler.StepLR( # self.outer_optimizer, step_size=10, gamma=0.5) self.lr_scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau( self.outer_optimizer) return None def _init_opt_MAML_native(self): self.outer_optimizer = torch.optim.Adam(self.network.parameters(), lr=self.args.outer_lr) self.lr_scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau( self.outer_optimizer) def _init_opt_MAML_george(self): self.outer_optimizer = torch.optim.Adam(self.network.parameters(), lr=self.args.outer_lr) self.lr_scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau( self.outer_optimizer) def _init_opt_ttsa(self): self.phi = [None] * self.batch_size self.inner_optimizer = [None] * self.batch_size for i in range(self.batch_size): self.phi[i] = copy.deepcopy(self.network) if self.args.inner_opt == 'SGD': self.inner_optimizer[i] = torch.optim.SGD( self.phi[i].parameters(), lr=self.args.inner_lr) elif self.args.inner_opt == 'Adam': self.inner_optimizer[i] = torch.optim.Adam( self.phi[i].parameters(), lr=self.args.inner_lr, betas=(0.0, 0.9)) else: raise ValueError('Not supported inner optimizer.') if self.args.outer_opt == 'SGD': self.outer_optimizer = torch.optim.SGD(self.network.parameters(), lr=self.args.outer_lr, nesterov=True, momentum=0.9) elif self.args.outer_opt == 'Adam': self.outer_optimizer = torch.optim.Adam(self.network.parameters(), lr=self.args.outer_lr) else: raise ValueError('Not supported outer optimizer.') self.lr_scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau( self.outer_optimizer) return None def init_opt_Reptile(self): self.phi = [[None] * self.args.n_inner for _ in range(self.batch_size)] self.inner_optimizer = [[None] * self.args.n_inner for _ in range(self.batch_size)] for i in range(self.batch_size): for j in range(self.args.n_inner): self.phi[i][j] = copy.deepcopy(self.network) if self.args.inner_opt == 'SGD': self.inner_optimizer[i][j] = torch.optim.SGD( self.phi[i][j].parameters(), lr=self.args.inner_lr ) # inner parameters initialized with theta parameters elif self.args.inner_opt == 'Adam': self.inner_optimizer[i][j] = torch.optim.Adam( self.phi[i][j].parameters(), lr=self.args.inner_lr, betas=(0.0, 0.9)) else: raise ValueError('Not supported inner optimizer.') if self.args.outer_opt == 'SGD': self.outer_optimizer = torch.optim.SGD(self.network.parameters(), lr=self.args.outer_lr, nesterov=True, momentum=0.9) elif self.args.outer_opt == 'Adam': self.outer_optimizer = torch.optim.Adam(self.network.parameters(), lr=self.args.outer_lr) else: raise ValueError('Not supported outer optimizer.') # self.lr_scheduler = torch.optim.lr_scheduler.StepLR( # self.outer_optimizer, step_size=10, gamma=0.5) self.lr_scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau( self.outer_optimizer) return None def unpack_batch(self, batch): train_inputs, train_targets = batch['train'] train_inputs = train_inputs.cuda() train_targets = train_targets.cuda() test_inputs, test_targets = batch['test'] test_inputs = test_inputs.cuda() test_targets = test_targets.cuda() return train_inputs, train_targets, test_inputs, test_targets def inner_loop(self): raise NotImplementedError def outer_loop(self): raise NotImplementedError def lr_sched(self, loss): self.lr_scheduler.step(loss) return None def load(self): path = os.path.join( self.args.result_path, self.args.alg, str(self.args.num_shot) + '_' + str(self.args.num_way), self.args.load_path) self.network.load_state_dict(torch.load(path)) def save(self): path = os.path.join( self.args.result_path, self.args.alg, str(self.args.num_shot) + '_' + str(self.args.num_way), self.args.save_path) torch.save(self.network.state_dict(), path)
class GBML: ''' Gradient-Based Meta-Learning ''' def __init__(self, args): self.args = args self.batch_size = self.args.batch_size return None def _init_net(self): if self.args.net == 'ConvNet': self.network = ConvNet(self.args) elif self.args.net == 'ResNet': self.network = ResNet(self.args) self.args.hidden_channels = 640 self.network.train() self.network.cuda() return None def _init_opt(self): if self.args.inner_opt == 'SGD': self.inner_optimizer = torch.optim.SGD(self.network.parameters(), lr=self.args.inner_lr) elif self.args.inner_opt == 'Adam': self.inner_optimizer = torch.optim.Adam(self.network.parameters(), lr=self.args.inner_lr, betas=(0.0, 0.9)) else: raise ValueError('Not supported inner optimizer.') if self.args.outer_opt == 'SGD': self.outer_optimizer = torch.optim.SGD(self.network.parameters(), lr=self.args.outer_lr, nesterov=True, momentum=0.9) elif self.args.outer_opt == 'Adam': self.outer_optimizer = torch.optim.Adam(self.network.parameters(), lr=self.args.outer_lr) else: raise ValueError('Not supported outer optimizer.') self.lr_scheduler = torch.optim.lr_scheduler.StepLR(self.outer_optimizer, step_size=10, gamma=0.5) return None def unpack_batch(self, batch): train_inputs, train_targets = batch['train'] train_inputs = train_inputs.cuda() train_targets = train_targets.cuda() test_inputs, test_targets = batch['test'] test_inputs = test_inputs.cuda() test_targets = test_targets.cuda() return train_inputs, train_targets, test_inputs, test_targets def inner_loop(self): raise NotImplementedError def outer_loop(self): raise NotImplementedError def lr_sched(self): self.lr_scheduler.step() return None def load(self): path = os.path.join(self.args.result_path, self.args.alg, self.args.load_path) self.network.load_state_dict(torch.load(path)) def load_encoder(self): path = os.path.join(self.args.result_path, self.args.alg, self.args.load_path) self.network.encoder.load_state_dict(torch.load(path)) def save(self): path = os.path.join(self.args.result_path, self.args.alg, self.args.save_path) torch.save(self.network.state_dict(), path)
# -*- coding:utf-8 -*- # author:平手友梨奈ii # e-mail:[email protected] # datetime:1993/12/01 # filename:test_net.py # software: PyCharm import tensorflow.keras as keras from net.resnet import ResNet from net.fpn import FPN from net.subnet import class_subnet, box_subnet inputs = keras.Input(shape=(416, 416, 3)) resnet = ResNet(depth=50) c2, c3, c4, c5 = resnet(inputs) fpn = FPN() p3, p4, p5, p6, p7 = fpn([c2, c3, c4, c5]) box_subnet_model = box_subnet(out_channels=256, num_anchors=9) class_subnet_model = class_subnet(out_channels=256, num_classes=80, num_anchors=9) results = [[box_subnet_model(x) for x in [p3, p4, p5, p6, p7]], [class_subnet_model(y) for y in [p3, p4, p5, p6, p7]]] retinanet_model = keras.Model(inputs, results) # retinanet_model.summary() # layers that have trainable parameters length = len([layer for layer in retinanet_model.layers if len(layer.trainable_weights) > 0]) print(length)
collate_fn=collate_function, batch_size=cfg.train.batchSize, shuffle=True, num_workers=cfg.train.workers, pin_memory=True, # 如果机器计算能力好的话,就可以设置为True, ) datay = DataY( inputHW=cfg.model.netInput, # 指定了inputsize 这是因为输入的是经过resize后的图片 gride=cfg.model.featSize, # 将网络输入成了多少个网格 stride=cfg.model.stride, boxNum=cfg.model.bboxPredNum, clsNum=cfg.model.clsNum) """准备网络""" # network = ResNet(ResnetBasic, [2, 2, 2, 2], channel_out = 15) network = ResNet(ResnetBasicSlim, [2, 2, 2, 2], channel_out=(cfg.model.bboxPredNum * 5 + cfg.model.clsNum)) network.to(device) if cfg.dir.modelReloadPath is not None: weights = torch.load(cfg.dir.modelReloadPath) # 加载参数 network.load_state_dict(weights) # 给自己的模型加载参数 """指定loss""" lossF = yoloLoss(boxNum=cfg.model.bboxPredNum, clsNum=cfg.model.clsNum) """其余""" optimizer = torch.optim.Adam(network.parameters(), lr=cfg.train.lr0) warmUpFlag = True if cfg.train.warmupBatch is not None else False warmUpIter = 0 """log""" writer = SummaryWriter() for e in range(1, 1 + cfg.train.epoch):
class RealtimeHandposePipeline(object): """ Realtime pipeline for handpose estimation """ # states of pipeline STATE_IDLE = 0 STATE_INIT = 1 STATE_RUN = 2 # different hands HAND_LEFT = 0 HAND_RIGHT = 1 # different detectors DETECTOR_COM = 0 def __init__(self, poseNet, config, di, verbose=False, comrefNet=None): """ Initialize data :param poseNet: network for pose estimation :param config: configuration :param di: depth importer :param verbose: print additional info :param comrefNet: refinement network from center of mass detection :return: None """ # handpose CNN self.importer = di self.poseNet = poseNet self.comrefNet = comrefNet # configuration self.initialconfig = copy.deepcopy(config) # synchronization between processes self.sync = Manager().dict(config=config, fid=0, crop=numpy.ones((128, 128), dtype='float32'), com3D=numpy.asarray([0, 0, 300]), frame=numpy.ones((240, 320), dtype='float32'), M=numpy.eye(3)) self.start_prod = Value(c_bool, False) self.start_con = Value(c_bool, False) self.stop = Value(c_bool, False) # for calculating FPS self.lastshow = time.time() self.runningavg_fps = deque(100*[0], 100) self.verbose = verbose # hand left/right self.hand = Value('i', self.HAND_LEFT) # initial state self.state = Value('i', self.STATE_IDLE) # detector self.detection = Value('i', self.DETECTOR_COM) # hand size estimation self.handsizes = [] self.numinitframes = 50 # hand tracking or detection self.tracking = Value(c_bool, False) self.lastcom = (0, 0, 0) # show different results self.show_pose = False self.show_crop = False def initNets(self): """ Init network in current process :return: """ # Force network to compile output in the beginning if isinstance(self.poseNet, PoseRegNetParams): self.poseNet = PoseRegNet(numpy.random.RandomState(23455), cfgParams=self.poseNet) self.poseNet.computeOutput(numpy.zeros(self.poseNet.cfgParams.inputDim, dtype='float32')) elif isinstance(self.poseNet, ResNetParams): self.poseNet = ResNet(numpy.random.RandomState(23455), cfgParams=self.poseNet) self.poseNet.computeOutput(numpy.zeros(self.poseNet.cfgParams.inputDim, dtype='float32')) else: raise RuntimeError("Unknown pose estimation method!") if self.comrefNet is not None: if isinstance(self.comrefNet, ScaleNetParams): self.comrefNet = ScaleNet(numpy.random.RandomState(23455), cfgParams=self.comrefNet) self.comrefNet.computeOutput([numpy.zeros(sz, dtype='float32') for sz in self.comrefNet.cfgParams.inputDim]) else: raise RuntimeError("Unknown refine method!") def threadProducer(self, device): """ Thread that produces frames from video capture :param device: device :return: None """ device.start() self.initNets() # Nets are compiled, ready to run self.start_prod.value = True fid = 0 while True: if self.start_con.value is False: time.sleep(0.1) continue if self.stop.value is True: break # Capture frame-by-frame start = time.time() ret, frame = device.getDepth() if ret is False: print "Error while reading frame." time.sleep(0.1) continue if self.verbose is True: print("{}ms capturing".format((time.time() - start)*1000.)) startd = time.time() crop, M, com3D = self.detect(frame.copy()) if self.verbose is True: print("{}ms detection".format((time.time() - startd)*1000.)) self.sync.update(fid=fid, crop=crop, com3D=com3D, frame=frame, M=M) fid += 1 # we are done print "Exiting producer..." device.stop() return True def threadConsumer(self): """ Thread that consumes the frames, estimate the pose and display :return: None """ self.initNets() # Nets are compiled, ready to run self.start_con.value = True while True: if self.start_prod.value is False: time.sleep(0.1) continue if self.stop.value is True: break frm = copy.deepcopy(self.sync) startp = time.time() pose = self.estimatePose(frm['crop'], frm['com3D']) pose = pose * self.sync['config']['cube'][2]/2. + frm['com3D'] if self.verbose is True: print("{}ms pose".format((time.time() - startp)*1000.)) # Display the resulting frame starts = time.time() img, poseimg = self.show(frm['frame'], pose, frm['com3D']) img = self.addStatusBar(img) cv2.imshow('frame', img) self.lastshow = time.time() if self.show_pose: cv2.imshow('pose', poseimg) if self.show_crop: cv2.imshow('crop', numpy.clip((frm['crop'] + 1.)*128., 0, 255).astype('uint8')) self.processKey(cv2.waitKey(1) & 0xFF) if self.verbose is True: print("{}ms display".format((time.time() - starts)*1000.)) cv2.destroyAllWindows() # we are done print "Exiting consumer..." return True def processVideoThreaded(self, device): """ Use video as input :param device: device id :return: None """ print "Create producer process..." p = Process(target=self.threadProducer, args=[device]) p.daemon = True print"Create consumer process..." c = Process(target=self.threadConsumer, args=[]) c.daemon = True p.start() c.start() c.join() p.join() def processVideo(self, device): """ Use video as input :param device: device id :return: None """ device.start() self.initNets() i = 0 while True: i += 1 if self.stop.value is True: break # Capture frame-by-frame start = time.time() ret, frame = device.getDepth() if ret is False: print "Error while reading frame." time.sleep(0.1) continue if self.verbose is True: print("{}ms capturing".format((time.time() - start)*1000.)) startd = time.time() crop, M, com3D = self.detect(frame.copy()) if self.verbose is True: print("{}ms detection".format((time.time() - startd)*1000.)) startp = time.time() pose = self.estimatePose(crop, com3D) pose = pose*self.sync['config']['cube'][2]/2. + com3D if self.verbose is True: print("{}ms pose".format((time.time() - startp)*1000.)) # Display the resulting frame starts = time.time() img, poseimg = self.show(frame, pose, com3D) img = self.addStatusBar(img) cv2.imshow('frame', img) self.lastshow = time.time() if self.show_pose: cv2.imshow('pose', poseimg) if self.show_crop: cv2.imshow('crop', numpy.clip((crop + 1.)*128., 0, 255).astype('uint8')) self.processKey(cv2.waitKey(1) & 0xFF) if self.verbose is True: print("{}ms display".format((time.time() - starts)*1000.)) print("-> {}ms per frame".format((time.time() - start)*1000.)) # When everything done, release the capture cv2.destroyAllWindows() device.stop() def detect(self, frame): """ Detect the hand :param frame: image frame :return: cropped image, transformation, center """ hd = HandDetector(frame, self.sync['config']['fx'], self.sync['config']['fy'], importer=self.importer, refineNet=self.comrefNet) doHS = (self.state.value == self.STATE_INIT) if self.tracking.value and not numpy.allclose(self.lastcom, 0): loc, handsz = hd.track(self.lastcom, self.sync['config']['cube'], doHandSize=doHS) else: loc, handsz = hd.detect(size=self.sync['config']['cube'], doHandSize=doHS) self.lastcom = loc if self.state.value == self.STATE_INIT: self.handsizes.append(handsz) if self.verbose is True: print numpy.median(numpy.asarray(self.handsizes), axis=0) else: self.handsizes = [] if self.state.value == self.STATE_INIT and len(self.handsizes) >= self.numinitframes: cfg = self.sync['config'] cfg['cube'] = tuple(numpy.median(numpy.asarray(self.handsizes), axis=0).astype('int')) self.sync.update(config=cfg) self.state.value = self.STATE_RUN self.handsizes = [] if numpy.allclose(loc, 0): return numpy.zeros((self.poseNet.cfgParams.inputDim[2], self.poseNet.cfgParams.inputDim[3]), dtype='float32'), numpy.eye(3), loc else: crop, M, com = hd.cropArea3D(com=loc, size=self.sync['config']['cube'], dsize=(self.poseNet.layers[0].cfgParams.inputDim[2], self.poseNet.layers[0].cfgParams.inputDim[3])) com3D = self.importer.jointImgTo3D(com) sc = (self.sync['config']['cube'][2] / 2.) crop[crop == 0] = com3D[2] + sc crop.clip(com3D[2] - sc, com3D[2] + sc) crop -= com3D[2] crop /= sc return crop, M, com3D def estimatePose(self, crop, com3D): """ Estimate the hand pose :param crop: cropped hand depth map :param com3D: com detection crop position :return: joint positions """ # mirror hand if left/right changed if self.hand.value == self.HAND_LEFT: inp = crop[None, None, :, :].astype('float32') else: inp = crop[None, None, :, ::-1].astype('float32') jts = self.poseNet.computeOutput(inp) jj = jts[0].reshape((-1, 3)) if 'invX' in self.sync['config']: if self.sync['config']['invX'] is True: # mirror coordinates jj[:, 1] *= (-1.) if 'invY' in self.sync['config']: if self.sync['config']['invY'] is True: # mirror coordinates jj[:, 0] *= (-1.) # mirror pose if left/right changed if self.hand.value == self.HAND_RIGHT: # mirror coordinates jj[:, 0] *= (-1.) return jj def show(self, frame, handpose, com3D): """ Show depth with overlaid joints :param frame: depth frame :param handpose: joint positions :return: image """ upsample = 1. if 'upsample' in self.sync['config']: upsample = self.sync['config']['upsample'] # plot depth image with annotations imgcopy = frame.copy() # display hack to hide nd depth msk = numpy.logical_and(32001 > imgcopy, imgcopy > 0) msk2 = numpy.logical_or(imgcopy == 0, imgcopy == 32001) min = imgcopy[msk].min() max = imgcopy[msk].max() imgcopy = (imgcopy - min) / (max - min) * 255. imgcopy[msk2] = 255. imgcopy = imgcopy.astype('uint8') imgcopy = cv2.cvtColor(imgcopy, cv2.COLOR_GRAY2BGR) if not numpy.allclose(upsample, 1): imgcopy = cv2.resize(imgcopy, dsize=None, fx=upsample, fy=upsample, interpolation=cv2.INTER_LINEAR) if handpose.shape[0] == 16: hpe = ICVLHandposeEvaluation(numpy.zeros((3, 3)), numpy.zeros((3, 3))) elif handpose.shape[0] == 14: hpe = NYUHandposeEvaluation(numpy.zeros((3, 3)), numpy.zeros((3, 3))) elif handpose.shape[0] == 21: hpe = MSRAHandposeEvaluation(numpy.zeros((3, 3)), numpy.zeros((3, 3))) else: raise ValueError("Invalid number of joints {}".format(handpose.shape[0])) jtI = self.importer.joints3DToImg(handpose) jtI[:, 0:2] -= numpy.asarray([frame.shape[0]//2, frame.shape[1]//2]) jtI[:, 0:2] *= upsample jtI[:, 0:2] += numpy.asarray([imgcopy.shape[0]//2, imgcopy.shape[1]//2]) for i in xrange(handpose.shape[0]): cv2.circle(imgcopy, (jtI[i, 0], jtI[i, 1]), 3, (255, 0, 0), -1) for i in xrange(len(hpe.jointConnections)): cv2.line(imgcopy, (jtI[hpe.jointConnections[i][0], 0], jtI[hpe.jointConnections[i][0], 1]), (jtI[hpe.jointConnections[i][1], 0], jtI[hpe.jointConnections[i][1], 1]), 255.*hpe.jointConnectionColors[i], 2) comI = self.importer.joint3DToImg(com3D) comI[0:2] -= numpy.asarray([frame.shape[0]//2, frame.shape[1]//2]) comI[0:2] *= upsample comI[0:2] += numpy.asarray([imgcopy.shape[0]//2, imgcopy.shape[1]//2]) cv2.circle(imgcopy, (comI[0], comI[1]), 3, (0, 255, 0), 1) poseimg = numpy.zeros_like(imgcopy) # rotate 3D pose and project to 2D jtP = self.importer.joints3DToImg(rotatePoints3D(handpose, handpose[self.importer.crop_joint_idx], 0., 90., 0.)) jtP[:, 0:2] -= numpy.asarray([frame.shape[0]//2, frame.shape[1]//2]) jtP[:, 0:2] *= upsample jtP[:, 0:2] += numpy.asarray([imgcopy.shape[0]//2, imgcopy.shape[1]//2]) for i in xrange(handpose.shape[0]): cv2.circle(poseimg, (jtP[i, 0], jtP[i, 1]), 3, (255, 0, 0), -1) for i in xrange(len(hpe.jointConnections)): cv2.line(poseimg, (jtP[hpe.jointConnections[i][0], 0], jtP[hpe.jointConnections[i][0], 1]), (jtP[hpe.jointConnections[i][1], 0], jtP[hpe.jointConnections[i][1], 1]), 255.*hpe.jointConnectionColors[i], 2) comP = self.importer.joint3DToImg(rotatePoint3D(com3D, handpose[self.importer.crop_joint_idx], 0., 90., 0.)) comP[0:2] -= numpy.asarray([frame.shape[0]//2, frame.shape[1]//2]) comP[0:2] *= upsample comP[0:2] += numpy.asarray([imgcopy.shape[0]//2, imgcopy.shape[1]//2]) cv2.circle(poseimg, (comP[0], comP[1]), 3, (0, 255, 0), 1) return imgcopy, poseimg def addStatusBar(self, img): """ Add status bar to image :param img: image :return: image with status bar """ barsz = 20 retimg = numpy.ones((img.shape[0]+barsz, img.shape[1], img.shape[2]), dtype='uint8')*255 retimg[barsz:img.shape[0]+barsz, 0:img.shape[1], :] = img # FPS text fps = 1./(time.time()-self.lastshow) self.runningavg_fps.append(fps) avg_fps = numpy.mean(self.runningavg_fps) cv2.putText(retimg, "FPS {0:2.1f}".format(avg_fps), (20, 10), cv2.FONT_HERSHEY_SIMPLEX, 0.3, (0, 0, 0)) # hand text cv2.putText(retimg, "Left" if self.hand.value == self.HAND_LEFT else "Right", (80, 10), cv2.FONT_HERSHEY_SIMPLEX, 0.3, (0, 0, 0)) # hand size ss = "HC-{0:d}".format(self.sync['config']['cube'][0]) cv2.putText(retimg, ss, (120, 10), cv2.FONT_HERSHEY_SIMPLEX, 0.3, (0, 0, 0)) # hand tracking mode, tracking or detection cv2.putText(retimg, "T" if self.tracking.value else "D", (260, 10), cv2.FONT_HERSHEY_SIMPLEX, 0.3, (0, 0, 0)) # hand detection mode, COM or CNN if self.detection.value == self.DETECTOR_COM: mode = "COM" else: mode = "???" cv2.putText(retimg, mode, (280, 10), cv2.FONT_HERSHEY_SIMPLEX, 0.3, (0, 0, 0)) # status symbol if self.state.value == self.STATE_IDLE: col = (0, 0, 255) elif self.state.value == self.STATE_INIT: col = (0, 255, 255) elif self.state.value == self.STATE_RUN: col = (0, 255, 0) else: col = (0, 0, 255) cv2.circle(retimg, (5, 5), 5, col, -1) return retimg def processKey(self, key): """ Process key :param key: key value :return: None """ if key == ord('q'): self.stop.value = True elif key == ord('h'): if self.hand.value == self.HAND_LEFT: self.hand.value = self.HAND_RIGHT else: self.hand.value = self.HAND_LEFT elif key == ord('+'): cfg = self.sync['config'] cfg['cube'] = tuple([lst + 10 for lst in list(cfg['cube'])]) self.sync.update(config=cfg) elif key == ord('-'): cfg = self.sync['config'] cfg['cube'] = tuple([lst - 10 for lst in list(cfg['cube'])]) self.sync.update(config=cfg) elif key == ord('r'): self.reset() elif key == ord('i'): self.state.value = self.STATE_INIT elif key == ord('t'): self.tracking.value = not self.tracking.value elif key == ord('s'): self.show_crop = not self.show_crop self.show_pose = not self.show_pose else: pass def reset(self): """ Reset stateful parts :return: None """ self.state.value = self.STATE_IDLE self.sync.update(config=copy.deepcopy(self.initialconfig)) self.detection.value = self.DETECTOR_COM
p3_up = layers.UpSampling2D(name='fpn_p3upsampled')(p3) p2 = layers.Conv2D(256, 1, name='fpn_c2p2')(c2) p2 = layers.Add(name='fpn_p2add')([p3_up, p2]) # Attach 3x3 conv to all P layers to get the final feature maps. p2 = layers.Conv2D(256, kernel_size=3, padding='same', name='fpn_p2')(p2) p3 = layers.Conv2D(256, kernel_size=3, padding='same', name='fpn_p3')(p3) p4 = layers.Conv2D(256, kernel_size=3, padding='same', name='fpn_p4')(p4) p5 = layers.Conv2D(256, kernel_size=3, padding='same', name='fpn_p5')(p5) # p6 is generated by subsampling from p5 with stride of 2 p6 = layers.MaxPool2D(pool_size=1, strides=2, name='fpn_p6')(p5) return p2, p3, p4, p5, p6 if __name__ == '__main__': resnet_50 = ResNet(50) inputs_ = keras.Input(shape=(608, 608, 3)) c2_, c3_, c4_, c5_ = resnet_50(inputs_) p2_, p3_, p4_, p5_, p6_ = neck_network(c2_, c3_, c4_, c5_) # Total # params: 26, 905, 536 # Trainable # params: 26, 852, 416 # Non - trainable # params: 53, 120 model = keras.Model(inputs_, [p2_, p3_, p4_, p5_, p6_]) model.summary()
class_mode="categorical", target_size=(64, 64), color_mode="rgb", shuffle=False, batch_size=BS) # initialize the testing generator testGen = valAug.flow_from_directory(config.TEST_PATH, class_mode="categorical", target_size=(64, 64), color_mode="rgb", shuffle=False, batch_size=BS) # initialize our ResNet model and compile it model = ResNet.build(64, 64, 3, 2, (3, 4, 6), (64, 128, 256, 512), reg=0.0005) opt = SGD(lr=INIT_LR, momentum=0.9) model.compile(loss="binary_crossentropy", optimizer=opt, metrics=["accuracy"]) # define our set of callbacks and fit the model callbacks = [LearningRateScheduler(poly_decay)] H = model.fit_generator(trainGen, steps_per_epoch=totalTrain // BS, validation_data=valGen, validation_steps=totalVal // BS, epochs=NUM_EPOCHS, callbacks=callbacks) # reset the testing generator and then use our trained model to # make predictions on the data print("[INFO] evaluating network...")
batch_size=cfg.train.batchSize, shuffle=cfg.data.shuffle, num_workers=0, pin_memory=True, # 如果机器计算能力好的话,就可以设置为True, ) datay = DataY( inputHW=cfg.model.netInput, # 指定了inputsize 这是因为输入的是经过resize后的图片 gride=cfg.model.featSize, # 将网络输入成了多少个网格 stride=cfg.model.stride, boxNum=cfg.model.bboxPredNum, clsNum=cfg.model.clsNum) """准备网络""" network = ResNet( ResnetBasicSlim, # [2, 2, 2, 2], # [1,1,1,1], [3, 4, 6, 3], channel_in=cfg.data.imgChannelNumber, channel_out=(cfg.model.bboxPredNum * 5 + cfg.model.clsNum)) # network = YOLOv1(params={"dropout": 0.5, "num_class": cfg.model.clsNum}) # network = Number(cfg.data.imgChannelNumber, cfg.model.bboxPredNum * 5 + cfg.model.clsNum) network.to(device) startEpoch = 1 if cfg.dir.modelReloadFlag: savedDict = torch.load(cfg.dir.logSaveDir + "weight/" + cfg.dir.modelName) # 加载参数 weights = savedDict['savedModel'] startEpoch = savedDict['epoch'] + 1 network.load_state_dict(weights) # 给自己的模型加载参数 """指定loss""" lossF = yoloLoss(boxNum=cfg.model.bboxPredNum,
shuffle=False, num_workers=cfg.train.workers, pin_memory=True, # 如果机器计算能力好的话,就可以设置为True, ) datay = DataY( inputHW=cfg.model.netInput, # 指定了inputsize 这是因为输入的是经过resize后的图片 gride=cfg.model.featSize, # 将网络输入成了多少个网格 stride=cfg.model.stride, boxNum=cfg.model.bboxPredNum, clsNum=cfg.model.clsNum) """准备网络""" # network = ResNet(ResnetBasic, [2, 2, 2, 2], channel_out = 15) network = ResNet( ResnetBasicSlim, # [2, 2, 2, 2], [3, 4, 6, 3], channel_in=cfg.data.imgChannelNumber, channel_out=(cfg.model.bboxPredNum * 5 + cfg.model.clsNum)) # network = Number(cfg.data.imgChannelNumber, cfg.model.bboxPredNum * 5 + cfg.model.clsNum) network = network.eval() # network = YOLOv1(params={"dropout": 0.5, "num_class": cfg.model.clsNum}) network.to(device) weights = torch.load(cfg.dir.logSaveDir + "weight/" + cfg.dir.modelName) # 加载参数 network.load_state_dict(weights["savedModel"]) # 给自己的模型加载参数 """ video Writer""" fourcc = cv2.VideoWriter_fourcc(*'XVID') writer = cv2.VideoWriter(videoSavePath, fourcc, 1.0, videoSaveSize, True) with torch.no_grad(): if modeDict[mode] == "camMode":