def main(): parser = argparse.ArgumentParser(description='Train Network') parser.add_argument('--data-dir', default='data', help='data directory') args = parser.parse_args() td = TrainingData(args.data_dir) with tf.Session() as sess: net = SSD(sess) net.create_from_vgg(args.vgg_dir, td.num_classes, td.conf) labels = tf.placeholder(tf.float32, shape=[None, None, td.num_classes+5]) optimizer, loss = net.get_optimizer(labels) summary_writer = tf.summary.FileWriter(args.tensorboard_dir, sess.graph) saver = tf.train.Saver(max_to_keep=10) n_batches = int(math.ceil(td.num_train/args.batch_size)) init_vars(sess) validation_loss = tf.placeholder(tf.float32) validation_loss_summary_op = tf.summary.scalar('validation_loss', validation_loss) training_loss = tf.placeholder(tf.float32) training_loss_summary_op = tf.summary.scalar('training_loss', training_loss) for e in range(args.epochs): generator = td.train_generator(args.batch_size) description = 'Epoch {}/{}'.format(e+1, args.epochs) training_loss_total = 0 for x, y in tqdm(generator, total=n_train_batches, desc=description, unit='batches'): feed = {net.image_input: x, labels: y, net.keep_prob: 1} loss_batch, _ = sess.run([loss, optimizer], feed_dict=feed) training_loss_total += loss_batch * x.shape[0] training_loss_total /= td.num_train generator = tf.valid_generator(args.batch_size) validation_loss_total = 0 for x, y in generator: feed = {net.image_input: x, labels: y, net.keep_prob: 1} loss_batch, _ = sess.run([loss], feed_dict=feed) validation_loss_total += loss_batch * x.shape[0] validation_loss_total /= td.num_valid feed = {validation_loss: validation_loss_total, training_loss: training_loss_total} loss_summary = sess.run([validation_loss_summary_op, training_loss_summary_op], feed_dict=feed) summary_writer.add_summary(loss_summary[0], e) summary_writer.add_summary(loss_summary[1], e) if (e+1) % args.checkpoint_interval == 0: checkpoint = '{}/e{}.ckpt'.format(args.name, e+1) saver.save(sess, checkpoint) checkpoint = '{}/final.ckpt'.format(args.name) saver.save(sess, checkpoint) return 0
def __init__(self): self.node_name = "ssd_keras" rospy.init_node(self.node_name) self.class_names = [ "background", "aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow", "diningtable", "dog", "horse", "motorbike", "person", "pottedplant", "sheep", "sofa", "train", "tvmonitor" ] self.num_classes = len(self.class_names) self.input_shape = (300, 300, 3) self.model = SSD(self.input_shape, num_classes=self.num_classes) self.model.load_weights(pkg_path + '/resources/ssd_keras/weights_SSD300.hdf5') self.bbox_util = BBoxUtility(self.num_classes) self.conf_thresh = 0.25 self.model._make_predict_function() self.graph = tf.get_default_graph() self.detection_index = DL_msgs_boxes() # Create unique and somewhat visually distinguishable bright # colors for the different classes. self.class_colors = [] for i in range(0, self.num_classes): # This can probably be written in a more elegant manner hue = 255 * i / self.num_classes col = np.zeros((1, 1, 3)).astype("uint8") col[0][0][0] = hue col[0][0][1] = 128 # Saturation col[0][0][2] = 255 # Value cvcol = cv2.cvtColor(col, cv2.COLOR_HSV2BGR) col = (int(cvcol[0][0][0]), int(cvcol[0][0][1]), int(cvcol[0][0][2])) self.class_colors.append(col) self.bridge = CvBridge() # Create the cv_bridge object self.Image_Status = "Not_Ready" self.StartImage = cv2.imread(pkg_path + '/resources/start.jpg') self.to_draw = cv2.resize(self.StartImage, (640, 480)) self.image_sub = rospy.Subscriber( "/floating_sensor/camera/rgb/image_raw", Image, self.detect_image, queue_size=1) # the appropriate callbacks self.box_coordinate_pub = rospy.Publisher( "/ssd_detction/box", DL_msgs_boxes, queue_size=5) # the appropriate callbacks self.SSD_Serv = rospy.Service('SSD_Detection', DL_box, self.SSD_Detection_Server)
def build(): model = SSD() image = fluid.layers.data( name='image', shape=[3, 300, 300], dtype='float32') gt_box = fluid.layers.data( name='gt_box', shape=[4], dtype='float32', lod_level=1) gt_label = fluid.layers.data( name='gt_label', shape=[1], dtype='int32', lod_level=1) return model(image, gt_box, gt_label)
def detect_img(indir,outdir): ssd = SSD() #遍历该目录下的所有图片文件 for filename in glob.glob(indir): print("Start, the detect image is:",filename) img = Image.open(filename) img = ssd.detect_image(img) #img.show() # 显示图片 img.save(os.path.join(outdir,os.path.basename(filename))) print("End, the detection of this image") print('---------------------------------')
def load_model(): global file_num file_num = 0 global class_model class_model = feature_extractor() model_path = './class_model.pki' tmp = torch.load(model_path, map_location={'cuda:0': 'cpu'}) class_model.load_state_dict(tmp) class_model.eval() del tmp global object_model object_model = SSD(depth=50, width=1) model_path = './ssd_patch.pki' tmp = torch.load(model_path, map_location={'cuda:0': 'cpu'}) object_model.load_state_dict(tmp) object_model.eval()
def __init__(self, modelfile, shape=(300, 300, 3), num_classes=21, conf_thresh=0.6): self.input_shape = shape self.num_classes = num_classes self.conf_thresh = conf_thresh # モデル作成 model = SSD(shape, num_classes=num_classes) model.load_weights(modelfile) self.model = model # バウンディングボックス作成ユーティリティ self.bbox_util = BBoxUtility(self.num_classes)
def __init__(self, dirname=DEFAULT_MODEL_DIR, gpu=-1, nms_thresh=DEFAULT_NMS_THRESH, score_thresh=DEFAULT_SCORE_THRESH): with open(os.path.join(dirname, "model.json"), 'r') as fp: metadata = json.load(fp) n_class = metadata['n_class'] n_channel = metadata['n_channel'] npz_file = metadata['file'] self.class_labels = metadata['class_labels'] self.model = SSD(n_class=n_class, n_channel=n_channel, nms_thresh=nms_thresh, score_thresh=score_thresh, grids=DEFAULT_GRIDS, aspect_ratios=DEFAULT_ASPECT_RATIOS, variance=DEFAULT_VARIANCE) chainer.serializers.load_npz(os.path.join(dirname, npz_file), self.model) if gpu >= 0: chainer.backends.cuda.get_device_from_id(gpu).use() self.model.to_gpu(gpu)
def initialize_net() -> None: global ssd_net # if already defined, return it if ssd_net is not None: print('use cached ssd_net') return ssd_net # get device ( cpu / gpu ) to be used use_cuda = torch.cuda.is_available() device = torch.device("cuda" if use_cuda else "cpu") print(f'device : {device}') ssd_cfg = { 'num_classes': num_classes, # number of classes including background class 'input_size': Parameters.IMG_SIZE, 'bbox_aspect_num': Parameters.BBOX_ASPECT_NUM, 'feature_maps': Parameters.FEATURE_MAPS, 'steps': Parameters.STEPS, 'min_sizes': Parameters.MIN_SIZES, 'max_sizes': Parameters.MAX_SIZES, 'aspect_ratios': Parameters.ASPECT_RATIOS, 'conf_thresh': Parameters.CONF_THRESHOLD, 'top_k': Parameters.TOP_K, 'nms_thresh': Parameters.NMS_THRESHOLD } print(f'initializing ssd with : {ssd_cfg}') ssd_net = SSD(phase="inference", cfg=ssd_cfg) # load weight created in training weight_file_path = os.path.join(Parameters.ABEJA_TRAINING_RESULT_DIR, 'model.pth') print(f'weight_file_path : {weight_file_path}') # cf. https://pytorch.org/tutorials/beginner/saving_loading_models.html#save-on-gpu-load-on-gpu weight = torch.load(weight_file_path, map_location=device) ssd_net.load_state_dict(weight) ssd_net = ssd_net.to(device) ssd_net.eval() return ssd_net
def main(): try: checkpoint = torch.load(config.PATH_TO_CHECKPOINT, map_location=torch.device('cpu')) start_epoch = checkpoint['epoch'] + 1 print('\nLoaded checkpoint from epoch %d.\n' % start_epoch) model = checkpoint['model'] optimizer = checkpoint['optimizer'] except FileNotFoundError: print('PATH_TO_CHECKPOINT not specified in SSDConfig.\nMaking new model and optimizer.') start_epoch = 0 model = SSD(config) model_parameters = utils.get_model_params(model) optimizer = SGD(params=[{'params': model_parameters['biases'], 'lr': 2 * config.LEARNING_RATE}, {'params': model_parameters['not_biases']}], lr=config.LEARNING_RATE, momentum=config.MOMENTUM, weight_decay=config.WEIGHT_DECAY) # dataloader df = get_dataframe(config.PATH_TO_ANNOTATIONS) dataset = ShelfImageDataset(df, config.PATH_TO_IMAGES, train=True) dataloader = DataLoader(dataset, shuffle=True, collate_fn=collate_fn, batch_size=config.TRAIN_BATCH_SIZE, num_workers=config.NUM_DATALOADER_WORKERS) # move to device model.to(device) criterion = MultiBoxLoss(model.priors_cxcy, config).to(device) # num epochs to train epochs = config.NUM_ITERATIONS_TRAIN // len(dataloader) # epoch where LR is decayed decay_at_epoch = [int(epochs*x) for x in config.DECAY_LR_AT] # fooh!!!! :) for epoch in range(start_epoch, epochs): if epoch in decay_at_epoch: utils.adjust_learning_rate(optimizer, config.DECAY_FRAC) train(dataloader, model, criterion, optimizer, epoch) utils.save_checkpoint(epoch, model, optimizer, config, config.PATH_TO_CHECKPOINT)
def create_mobilenetv2_ssd_lite(num_classes, width_mult=1.0, use_batch_norm=True, onnx_compatible=False, is_test=False): base_net = MobileNetV2(width_mult=width_mult, use_batch_norm=use_batch_norm, onnx_compatible=onnx_compatible).features source_layer_indexes = [ GraphPath(14, 'conv', 3), 19, ] extras = ModuleList([ InvertedResidual(1280, 512, stride=2, expand_ratio=0.2), InvertedResidual(512, 256, stride=2, expand_ratio=0.25), InvertedResidual(256, 256, stride=2, expand_ratio=0.5), InvertedResidual(256, 64, stride=2, expand_ratio=0.25) ]) regression_headers = ModuleList([ SeperableConv2d(in_channels=round(576 * width_mult), out_channels=6 * 4, kernel_size=3, padding=1, onnx_compatible=False), SeperableConv2d(in_channels=1280, out_channels=6 * 4, kernel_size=3, padding=1, onnx_compatible=False), SeperableConv2d(in_channels=512, out_channels=6 * 4, kernel_size=3, padding=1, onnx_compatible=False), SeperableConv2d(in_channels=256, out_channels=6 * 4, kernel_size=3, padding=1, onnx_compatible=False), SeperableConv2d(in_channels=256, out_channels=6 * 4, kernel_size=3, padding=1, onnx_compatible=False), Conv2d(in_channels=64, out_channels=6 * 4, kernel_size=1), ]) classification_headers = ModuleList([ SeperableConv2d(in_channels=round(576 * width_mult), out_channels=6 * num_classes, kernel_size=3, padding=1), SeperableConv2d(in_channels=1280, out_channels=6 * num_classes, kernel_size=3, padding=1), SeperableConv2d(in_channels=512, out_channels=6 * num_classes, kernel_size=3, padding=1), SeperableConv2d(in_channels=256, out_channels=6 * num_classes, kernel_size=3, padding=1), SeperableConv2d(in_channels=256, out_channels=6 * num_classes, kernel_size=3, padding=1), Conv2d(in_channels=64, out_channels=6 * num_classes, kernel_size=1), ]) return SSD(num_classes, base_net, source_layer_indexes, extras, classification_headers, regression_headers, is_test=is_test, config=config)
cv2.imshow("SSD result", orig_image) if cv2.waitKey(5) & 0xFF == ord('s'): if len(image_stack) == frame_number: if not os.path.exists(save_path + str(sample_count + 1)): os.mkdir(save_path + str(sample_count + 1)) for pic in range(frame_number): cv2.imwrite( save_path + str(sample_count + 1) + '/' + str(1000 + pic) + '.jpg', image_stack[pic]) print('saving ' + save_path + str(sample_count + 1) + '/' + str(1000 + pic) + '.jpg') image_stack = [] empty_count = 0 sample_count += 1 if __name__ == '__main__': action_class = 'stand/' root_path = 'images/' save_path = root_path + action_class if not os.path.exists(root_path): os.mkdir(root_path) if not os.path.exists(save_path): os.mkdir(save_path) save_frames = 16 input_shape = (300, 300, 3) ssd_model = SSD(input_shape, num_classes=21) ssd_model.load_weights('weights_SSD300.hdf5') run_camera(input_shape, ssd_model, save_path, save_frames)
def create_mobilenetv1_ssd(num_classes, is_test=False): base_net = MobileNetV1(1001).model # disable dropout layer source_layer_indexes = [ 12, 14, ] extras = ModuleList([ Sequential( Conv2d(in_channels=1024, out_channels=256, kernel_size=1), ReLU(), Conv2d(in_channels=256, out_channels=512, kernel_size=3, stride=2, padding=1), ReLU()), Sequential( Conv2d(in_channels=512, out_channels=128, kernel_size=1), ReLU(), Conv2d(in_channels=128, out_channels=256, kernel_size=3, stride=2, padding=1), ReLU()), Sequential( Conv2d(in_channels=256, out_channels=128, kernel_size=1), ReLU(), Conv2d(in_channels=128, out_channels=256, kernel_size=3, stride=2, padding=1), ReLU()), Sequential( Conv2d(in_channels=256, out_channels=128, kernel_size=1), ReLU(), Conv2d(in_channels=128, out_channels=256, kernel_size=3, stride=2, padding=1), ReLU()) ]) regression_headers = ModuleList([ Conv2d(in_channels=512, out_channels=6 * 4, kernel_size=3, padding=1), Conv2d(in_channels=1024, out_channels=6 * 4, kernel_size=3, padding=1), Conv2d(in_channels=512, out_channels=6 * 4, kernel_size=3, padding=1), Conv2d(in_channels=256, out_channels=6 * 4, kernel_size=3, padding=1), Conv2d(in_channels=256, out_channels=6 * 4, kernel_size=3, padding=1), Conv2d(in_channels=256, out_channels=6 * 4, kernel_size=3, padding=1), ]) classification_headers = ModuleList([ Conv2d(in_channels=512, out_channels=6 * num_classes, kernel_size=3, padding=1), Conv2d(in_channels=1024, out_channels=6 * num_classes, kernel_size=3, padding=1), Conv2d(in_channels=512, out_channels=6 * num_classes, kernel_size=3, padding=1), Conv2d(in_channels=256, out_channels=6 * num_classes, kernel_size=3, padding=1), Conv2d(in_channels=256, out_channels=6 * num_classes, kernel_size=3, padding=1), Conv2d(in_channels=256, out_channels=6 * num_classes, kernel_size=3, padding=1), ]) return SSD(num_classes, base_net, source_layer_indexes, extras, classification_headers, regression_headers, is_test=is_test, config=config)
def train(train_file, test_file, num_epoch): use_gpu = torch.cuda.is_available() Loss = MultiBoxLoss_2() ## loss learning_rate = 0.01 num_epochs = num_epoch batch_size = 4 model = SSD(depth=50, width=1) #optimizer = torch.optim.SGD([{"params":model.parameters()}], lr=learning_rate, momentum=0.9, weight_decay=5e-4) optimizer = torch.optim.Adam([{ "params": model.parameters() }], lr=learning_rate) scheduler = ReduceLROnPlateau(optimizer) if use_gpu: model.cuda() model.train() train_dataset = ListDataset(root='GUN/WeaponS/', list_file=train_file, train=True, transform=transforms.ToTensor()) train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True, num_workers=2) test_dataset = ListDataset(root='GUN/WeaponS/', list_file=test_file, train=True, transform=transforms.ToTensor()) test_loader = DataLoader(test_dataset, batch_size=16, shuffle=True, num_workers=2) for epoch in range(num_epochs): t1 = time.time() model.train() total_loss, valid_loss = 0, 0 # Adjust learninig rate ## train model print("Train {} epoch: ".format(epoch + 1)) for i, (imgs, loc, conf) in enumerate(train_loader): imgs, loc, conf = Variable(imgs), Variable(loc), Variable(conf) if use_gpu: imgs = imgs.cuda() loc = loc.cuda() conf = conf.cuda() loc_pred, con_pred = model(imgs) loss = Loss(loc_pred, loc, con_pred, conf) total_loss += loss.item() #loss = conf_loss + loc_loss optimizer.zero_grad() loss.backward() optimizer.step() #print('Training progress %.1f %%' %(100*(i+1)/len(train_loader)), end='') #print('loc loss: ', loc_loss_total/len(train_loader)) #print('conf loss: ', conf_loss_total/len(train_loader)) print('\rEpoch [%d/%d], Training loss: %.4f' % (epoch + 1, num_epochs, total_loss / len(train_loader)), end='\n') ## test model model.eval() with torch.no_grad(): for i, (imgs, loc, conf) in enumerate(test_loader): imgs, loc, conf = Variable(imgs), Variable(loc), Variable(conf) if use_gpu: imgs = imgs.cuda() loc = loc.cuda() conf = conf.cuda() loc_pred, con_pred = model(imgs) loss = Loss(loc_pred, loc, con_pred, conf) valid_loss += loss.item() #print('Validing progress %.1f %%' %(100*(i+1)/len(test_loader)), end='') print('\rEpoch [%d/%d], Validing loss: %.4f' % (epoch + 1, num_epochs, valid_loss / len(test_loader)), end='\n') print('\n') scheduler.step(valid_loss) t2 = time.time() #print('epoch escape time %f secs' %t2-t1) # Save model #PATH_1 = 'drive/My Drive/BootCamp4/SSD/ssd_2.pki' #torch.save(model, PATH_1) PATH = 'drive/My Drive/BootCamp4/SSD/ssd_state_dict.pki' torch.save(model.state_dict(), PATH)
help=help_) help_ = "Number of layers" parser.add_argument("-l", "--layers", default=6, type=int, help=help_) help_ = "Camera index" parser.add_argument("--camera", default=0, type=int, help=help_) help_ = "Record video" parser.add_argument("-r", "--record", default=False, action='store_true', help=help_) help_ = "Video filename" parser.add_argument("-f", "--filename", default="demo.mp4", help=help_) args = parser.parse_args() if args.tiny: ssd = SSD(n_layers=args.layers, normalize=args.normalize) else: ssd = SSD(n_layers=args.layers, build_basenet=build_resnet, normalize=args.normalize) if args.weights: ssd.load_weights(args.weights) videodemo = VideoDemo(detector=ssd, camera=args.camera, record=args.record, filename=args.filename) videodemo.loop()
from matplotlib import pyplot as plt %matplotlib inline #エラー回避のための記述 import tensorflow as tf tf.to_float = lambda x: tf.cast(x, tf.float32) tf.to_int32 = lambda x: tf.cast(x, tf.int32) from ssd import SSD #SSDトレーニング #モデルの名前を指定する. ここではSSD7を指定する. #学習を行うには学習モードにするため, ``mode``に'training'を渡す model_name = 'ssd_7' ssd = SSD(model_name, mode='training') #モデル構造を表示 #ssd.model.summary() #画像データが格納されているディレクトリと対応する学習用データと検証用データに対するメタデータを指定する. train_images_dir = os.path.join('train_data', 'train_images') val_images_dir = os.path.join('train_data', 'train_images') train_annotation_path = os.path.join('train_data', 'train_annotations.json') val_annotation_path = os.path.join('train_data', 'val_annotations.json') #学習用と検証用でデータ生成器を``set_generator``メソッドで作成する. #あらかじめ``batch_size``を決めておく. ここでは16にしておく. batch_size =16 ssd.set_generator(train_images_dir, train_annotation_path, batch_size, val_images_dir, val_annotation_path)
'car','cat','chair','cow','diningtable','dog','horse',\ 'motorbike','person','pottedplant','sheep','sofa','train','tvmonitor'] #SSD300の設定 ssd_cfg = { 'num_classes': 21, #背景クラスを含めた合計クラス数 'input_size': 300, #画像の入力サイズ 'bbox_aspect_num': [4, 6, 6, 6, 4, 4], #出力するDBoxのアスペクト比の種類 'feature_maps': [38, 19, 10, 5, 3, 1], #各sourceの画像サイズ 'steps': [8, 16, 32, 64, 100, 300], #DBOXの大きさを決める 'min_sizes': [30, 60, 111, 162, 213, 264], #DBOXの大きさを決める 'max_sizes': [60, 111, 162, 213, 264, 315], #DBOXの大きさを決める 'aspect_ratios': [[2], [2, 3], [2, 3], [2, 3], [2], [2]], } #SSDネットワークモデル net = SSD(phase="inference", cfg=ssd_cfg) #device = torch.device('cpu') #SSDの学習済み重みを設定 #net_weights = torch.load('./weights/ssd300_600.pth', map_location=device) #net_weights = torch.load('./weights/ssd300_50.pth', map_location={'cuda0': 'cpu'}) net_weights = torch.load('./weights/ssd300_1.pth', map_location={'cuda0': 'cpu'}) #net_weights = torch.load('./weights/ssd300_mAP_77.43_v2.pth', map_location={'cuda0': 'cpu'}) ###net_weights = torch.load('./weights/ssd300_mAP_77.43_v2.pth', map_location=device) net.load_state_dict(net_weights) print('ネットワーク設定完了:学習済みの重みをロードしました')
dataset = VOC_Dataset(train_roots, val_roots, test_roots) else: raise ValueError( "Wrong or unsupported dataset. Available 'COCO' or 'VOC'") print("\n\Testing on %s dataset" % (DATASET_NAME + TESTSET_YEAR)) dataset.show_info() _ = input("Press Enter to continue...") ## 2. Dataloader initialization print("\t2. Dataloader initialization...") dataloader = Dataloader(dataset, TEST_SIZE) test_generator = dataloader.generate_batch("test") ## 3. Network initialization print("\t3. Network initialization...") ssd = SSD(num_classes=len(dataset.label_ids) + 1, input_shape=INPUT_SHAPE) latest = tf.train.latest_checkpoint(CHECKPOINT_DIR) ssd.load_weights(latest) ssd.summary() _ = input("Press Enter to continue...") ## 4. Generate default boxes print("\t4. Default boxes generation...") fm_shapes = ssd.output_shape aspect_ratios = ASPECT_RATIOS scales = SCALES default_boxes = Image.generate_default_boxes(fm_shapes, aspect_ratios, scales) # ---------------------------------------------------------------- # print("Initialization completed!")
dataloaders_dict = {"train": train_dataloader, "val": val_dataloader} #SSD300の設定 ssd_cfg = { #'num_classes': 21, #背景クラスを含めた合計クラス数 'num_classes': 12, #背景クラスを含めた合計クラス数 'input_size': 300, #画像の入力サイズ 'bbox_aspect_num': [4, 6, 6, 6, 4, 4], #出力するDBoxのアスペクト比の種類 'feature_maps': [38, 19, 10, 5, 3, 1], #各sourceの画像サイズ 'steps': [8, 16, 32, 64, 100, 300], #DBOXの大きさを決める 'min_sizes': [30, 60, 111, 162, 213, 264], #DBOXの大きさを決める 'max_sizes': [60, 111, 162, 213, 264, 315], #DBOXの大きさを決める 'aspect_ratios': [[2], [2, 3], [2, 3], [2, 3], [2], [2]], } #SSDネットワークモデル net = SSD(phase="train", cfg=ssd_cfg) #SSDの初期の重みを設定 #ssdのvgg部分に重みをロードする vgg_weights = torch.load('./weights/vgg16_reducedfc.pth') net.vgg.load_state_dict(vgg_weights) #ssdのその他のネットワークの重みはHeの初期値で初期化 def weights_init(m): if isinstance(m, nn.Conv2d): nn.init.kaiming_normal_(m.weight.data) if m.bias is not None: #バイアス項がある場合 nn.init.constant_(m.bias, 0.0) #Heの初期値を適応 net.extras.apply(weights_init)
for i, _ in enumerate(heading_cls): h, w, l, tx, ty, tz, ry = provider.from_prediction_to_label_format( centers[i], heading_cls[i], heading_res[i], size_cls[i], size_res[i], frustum_angles[i]) detection = Detection(xyz=np.array((tx, ty, tz)), angle=ry, lwh=np.array((l, w, h)), confidence=detection_conf[i]) scene.detections.append(detection) return scene if __name__ == '__main__': ssd = SSD('') detector = FrustumPointnetsDetector(ssd_detector=ssd, ssd_threshold=0.3, frustum_pointnet=inference, frustum_batch_size=1, frustum_num_pts=256) dataset = kitti_object(root_dir='kitti_hw_dataset') idx = 8 img = dataset.get_image(idx) lidar = dataset.get_lidar(idx) calib = dataset.get_calibration(idx) labels = dataset.get_label_objects(idx) result = detector.predict(lidar, img, calib)
"VOC2007/ImageSets/Main/test.txt")).read().strip().split() if not os.path.exists(map_out_path): os.makedirs(map_out_path) if not os.path.exists(os.path.join(map_out_path, 'ground-truth')): os.makedirs(os.path.join(map_out_path, 'ground-truth')) if not os.path.exists(os.path.join(map_out_path, 'detection-results')): os.makedirs(os.path.join(map_out_path, 'detection-results')) if not os.path.exists(os.path.join(map_out_path, 'images-optional')): os.makedirs(os.path.join(map_out_path, 'images-optional')) class_names, _ = get_classes(classes_path) if map_mode == 0 or map_mode == 1: print("Load model.") ssd = SSD(confidence=confidence, nms_iou=nms_iou) print("Load model done.") print("Get predict result.") for image_id in tqdm(image_ids): image_path = os.path.join( VOCdevkit_path, "VOC2007/JPEGImages/" + image_id + ".jpg") image = Image.open(image_path) if map_vis: image.save( os.path.join(map_out_path, "images-optional/" + image_id + ".jpg")) ssd.get_map_txt(image_id, image, class_names, map_out_path) print("Get predict result done.") if map_mode == 0 or map_mode == 2:
if __name__ == '__main__': parser = ssd_parser() help_ = "Camera index" parser.add_argument("--camera", default=0, type=int, help=help_) help_ = "Record video" parser.add_argument("--record", default=False, action='store_true', help=help_) help_ = "Video filename" parser.add_argument("--filename", default="demo.mp4", help=help_) args = parser.parse_args() ssd = SSD(args) if args.restore_weights: ssd.restore_weights() videodemo = VideoDemo(detector=ssd, camera=args.camera, record=args.record, filename=args.filename) videodemo.loop()
def handler(context): print( f'start training with parameters : {Parameters.as_dict()}, context : {context}' ) try: dataset_alias = context.datasets # for older version except AttributeError: dataset_alias = context['datasets'] train_dataset_id, val_dataset_id = get_dataset_ids(dataset_alias) id2index, _ = set_categories(list(dataset_alias.values())) num_classes = len(id2index) num_classes += 1 # add for background class print(f'number of classes : {num_classes}') print("Start downloading datasets.") dataset_items = list( load_dataset_from_api(train_dataset_id, max_num=Parameters.MAX_ITEMS)) print("Finish downloading datasets.") random.shuffle(dataset_items) if val_dataset_id is not None: val_dataset_items = list( load_dataset_from_api(val_dataset_id, max_num=Parameters.MAX_ITEMS)) random.shuffle(val_dataset_items) train_dataset_items = dataset_items else: test_size = int(len(dataset_items) * Parameters.TEST_SIZE) train_dataset_items, val_dataset_items = dataset_items[ test_size:], dataset_items[:test_size] train_dataset = ABEJAPlatformDataset(train_dataset_items, phase="train", transform=DataTransform( Parameters.IMG_SIZE, Parameters.MEANS)) val_dataset = ABEJAPlatformDataset(val_dataset_items, phase="val", transform=DataTransform( Parameters.IMG_SIZE, Parameters.MEANS)) print(f'train dataset : {len(train_dataset)}') print(f'val dataset : {len(val_dataset)}') train_dataloader = data.DataLoader(train_dataset, batch_size=Parameters.BATCH_SIZE, shuffle=Parameters.SHUFFLE, collate_fn=od_collate_fn) val_dataloader = data.DataLoader(val_dataset, batch_size=Parameters.BATCH_SIZE, shuffle=False, collate_fn=od_collate_fn) dataloaders_dict = {"train": train_dataloader, "val": val_dataloader} print(f'data loaders : {dataloaders_dict}') ssd_cfg = { 'num_classes': num_classes, # number of classes including background class 'input_size': Parameters.IMG_SIZE, 'bbox_aspect_num': Parameters.BBOX_ASPECT_NUM, 'feature_maps': Parameters.FEATURE_MAPS, 'steps': Parameters.STEPS, 'min_sizes': Parameters.MIN_SIZES, 'max_sizes': Parameters.MAX_SIZES, 'aspect_ratios': Parameters.ASPECT_RATIOS, 'conf_thresh': Parameters.CONF_THRESHOLD, 'top_k': Parameters.TOP_K, 'nms_thresh': Parameters.NMS_THRESHOLD } net = SSD(phase="train", cfg=ssd_cfg) # TODO: better to host this file by ourselves # https://github.com/amdegroot/ssd.pytorch#training-ssd url = 'https://s3.amazonaws.com/amdegroot-models/vgg16_reducedfc.pth' weight_file = os.path.join(Parameters.ABEJA_TRAINING_RESULT_DIR, 'vgg16_reducedfc.pth') download(url, weight_file) vgg_weights = torch.load(weight_file) print('finish loading base network...') net.vgg.load_state_dict(vgg_weights) def weights_init(m): if isinstance(m, nn.Conv2d): init.kaiming_normal_(m.weight.data) if m.bias is not None: # in case of bias nn.init.constant_(m.bias, 0.0) # apply initial values of He net.extras.apply(weights_init) net.loc.apply(weights_init) net.conf.apply(weights_init) # configure loss function criterion = MultiBoxLoss(jaccard_thresh=Parameters.OVERLAP_THRESHOLD, neg_pos=Parameters.NEG_POS, device=device) # configure optimizer optimizer = optim.SGD(net.parameters(), lr=Parameters.LR, momentum=Parameters.MOMENTUM, dampening=Parameters.DAMPENING, weight_decay=Parameters.WEIGHT_DECAY, nesterov=Parameters.NESTEROV) # move network to device net.to(device) # NOTE: This flag allows to enable the inbuilt cudnn auto-tuner # to find the best algorithm to use for your hardware. # cf. https://discuss.pytorch.org/t/what-does-torch-backends-cudnn-benchmark-do/5936/2 torch.backends.cudnn.benchmark = True iteration = 1 epoch_train_loss = 0.0 epoch_val_loss = 0.0 latest_epoch_train_loss = epoch_train_loss latest_epoch_val_loss = epoch_val_loss for epoch in range(Parameters.EPOCHS): t_epoch_start = time.time() t_iter_start = time.time() print('-------------') print('Epoch {}/{}'.format(epoch + 1, Parameters.EPOCHS)) print('-------------') # loop of train and validation for each epoch for phase in ['train', 'val']: if phase == 'train': net.train() print('(train)') else: if (epoch + 1) % 10 == 0: net.eval() print('-------------') print('(val)') else: # perform validation once every ten times continue # loop each mini-batch from data loader for images, targets in dataloaders_dict[phase]: images = images.to(device) targets = [ann.to(device) for ann in targets] # initialize optimizer optimizer.zero_grad() # calculate forward with torch.set_grad_enabled(phase == 'train'): outputs = net(images) # calculate loss loss_l, loss_c = criterion(outputs, targets) loss = loss_l + loss_c if phase == 'train': # back propagate when training loss.backward() # calculate gradient nn.utils.clip_grad_value_( net.parameters(), clip_value=Parameters.CLIP_VALUE) optimizer.step() # update parameters if iteration % 10 == 0: # display loss once every ten iterations t_iter_finish = time.time() duration = t_iter_finish - t_iter_start print( 'iter {} || Loss: {:.4f} || 10iter: {:.4f} sec.' .format(iteration, loss.item(), duration)) t_iter_start = time.time() epoch_train_loss += loss.item() iteration += 1 else: epoch_val_loss += loss.item() # loss and accuracy rate of each phase of epoch t_epoch_finish = time.time() # keep latest epoch loss if epoch_train_loss != 0.0: num_total = len(dataloaders_dict['train']) latest_epoch_train_loss = epoch_train_loss / num_total if epoch_val_loss != 0.0: num_total = len(dataloaders_dict['val']) latest_epoch_val_loss = epoch_val_loss / num_total print('-------------') print('epoch {} || Epoch_TRAIN_Loss:{:.4f} || Epoch_VAL_Loss:{:.4f}'. format(epoch + 1, latest_epoch_train_loss, latest_epoch_val_loss)) print('timer: {:.4f} sec.'.format(t_epoch_finish - t_epoch_start)) t_epoch_start = time.time() statistics(epoch + 1, latest_epoch_train_loss, None, latest_epoch_val_loss, None) writer.add_scalar('main/loss', latest_epoch_train_loss, epoch + 1) if (epoch + 1) % 10 == 0: writer.add_scalar('test/loss', latest_epoch_val_loss, epoch + 1) model_path = os.path.join(Parameters.ABEJA_TRAINING_RESULT_DIR, f'ssd300_{str(epoch + 1)}.pth') torch.save(net.state_dict(), model_path) writer.flush() epoch_train_loss = 0.0 epoch_val_loss = 0.0 torch.save(net.state_dict(), os.path.join(Parameters.ABEJA_TRAINING_RESULT_DIR, 'model.pth')) writer.close()
import keras import pickle from videotest import VideoTest import sys sys.path.append("..") from ssd import SSD300 as SSD input_shape = (300, 300, 3) # Change this if you run with other classes than VOC class_names = [ "background", "dog", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow", "diningtable", "dog", "horse", "motorbike", "person", "pottedplant", "sheep", "sofa", "train", "tvmonitor" ] NUM_CLASSES = len(class_names) model = SSD(input_shape, num_classes=NUM_CLASSES) # Change this path if you want to use your own trained weights model.load_weights('../weights_SSD300.hdf5') vid_test = VideoTest(class_names, model, input_shape) # To test on webcam 0, remove the parameter (or change it to another number # to test on that webcam) vid_test.run('path/to/your/video.mkv')
def train(train_config): logger = Logger(HOME+'/log', train_config.basenet) if train_config.dataset_name == 'VOC': cfg = voc_config dataset = VOCDataset(DATA_DIR, transform=SSDAugmentation( cfg['min_dim'], MEANS)) elif train_config.dataset_name == 'COCO': cfg = coco_config dataset = COCODataset(DATA_DIR, transform=SSDAugmentation( cfg['min_dim'], MEANS)) if train_config.visdom: import visdom viz = visdom.Visdom() ssd_net = SSD('train', train_config.basenet, cfg['min_dim'], cfg['num_classes'], with_fpn=train_config.with_fpn) net = ssd_net if train_config.cuda: net = nn.DataParallel(ssd_net) cudnn.benchmark = True if train_config.resume: logger('Loading {} ...'.format(train_config.resume)) load_weights = torch.load( train_config.resume, map_location=lambda storage, loc: storage) ssd_net.load_state_dict(load_weights) if train_config.cuda: net = net.cuda() if not train_config.resume: logger('Initializing weights ...') ssd_net.topnet.apply(weights_init) ssd_net.loc_layers.apply(weights_init) ssd_net.conf_layers.apply(weights_init) optimizer = optim.Adam(net.parameters(), lr=train_config.lr, weight_decay=train_config.weight_decay) criterion = MultiBoxLoss(cfg['num_classes'], 0.5, True, 0, True, 3, 0.5, False, train_config.cuda) net.train() # loss counters loc_loss = 0 conf_loss = 0 epoch = 0 logger('Loading the dataset...') epoch_size = len(dataset) // train_config.batch_size logger('Training SSD on:{}'.format(dataset.name)) # logger('using the specified args:') step_index = 0 if train_config.visdom: vis_title = 'SSD.PyTorch on ' + dataset.name vis_legend = ['Loc Loss', 'Conf Loss', 'Total Loss'] iter_plot = create_vis_plot('Iteration', 'Loss', vis_title, vis_legend) epoch_plot = create_vis_plot('Epoch', 'Loss', vis_title, vis_legend) data_loader = data.DataLoader(dataset, train_config.batch_size, num_workers=train_config.num_workers, shuffle=True, collate_fn=detection_collate, pin_memory=True) # create batch iterator batch_iterator = iter(data_loader) t0 = time.time() for iteration in range(train_config.start_iter, cfg['max_iter']): if train_config.visdom and iteration != 0 and (iteration % epoch_size == 0): update_vis_plot(epoch, loc_loss.item(), conf_loss.item(), epoch_plot, None, 'append', epoch_size) logger('epoch = {} : loss = {}, loc_loss = {}, conf_loss = {}'.format( epoch, loc_loss + conf_loss, loc_loss, conf_loss)) # reset epoch loss counters loc_loss = 0 conf_loss = 0 epoch += 1 if iteration in cfg['lr_steps']: step_index += 1 adjust_learning_rate(optimizer, train_config.lr, train_config.gamma, step_index) # load train data images, targets = next(batch_iterator) if iteration//epoch_size > 0 and iteration % epoch_size == 0: batch_iterator = iter(data_loader) print(iteration) if train_config.cuda: images = images.cuda() targets = [ann.cuda()for ann in targets] # else: # images=torch.tensor(images) # targets=torch.tensor(targets) # forward out = net(images) # backprop optimizer.zero_grad() loss_l, loss_c = criterion(out, targets) loss = loss_l + loss_c loss.backward() optimizer.step() t1 = time.time() if train_config.visdom: loc_loss += loss_l.item() conf_loss += loss_c.item() if iteration % 50 == 0: t1 = time.time() logger('timer: %.4f sec. || ' % (t1 - t0)+'iter ' + repr(iteration) + ' || Loss: %.4f ||' % (loss.item()) + ' || loc_loss: %.4f ||' % (loss_l.item()) + ' || conf_loss: %.4f ||' % (loss_c.item())) t0 = time.time() if train_config.visdom: update_vis_plot(iteration, loss_l.item(), loss_c.item(), iter_plot, epoch_plot, 'append') if iteration != 0 and iteration % 5000 == 0: logger('Saving state, iter:%d' % iteration) torch.save(ssd_net.state_dict(), train_config.save_folder + 'ssd224_VOC_' + repr(iteration) + '.pth') torch.save(ssd_net.state_dict(), train_config.save_folder + 'ssd224_VOC.pth')
#%% # ssd = SSD(filt_params_signal=dict(l_freq=freqs_sig[0], h_freq=freqs_sig[1], # l_trans_bandwidth=1, h_trans_bandwidth=1, # fir_design='firwin'),\ # filt_params_noise=dict(l_freq=freqs_noise[0], h_freq=freqs_noise[1], # l_trans_bandwidth=1, h_trans_bandwidth=1, # fir_design='firwin'), # filt_params_noise_stop=dict(l_freq=freqs_noise2[1], h_freq=freqs_noise2[0], # l_trans_bandwidth=1, h_trans_bandwidth=1, # fir_design='firwin'), # sampling_freq=sf, picks=picks, rank="full") ssd = SSD(filt_params_signal=dict(l_freq=freqs_sig[0], h_freq=freqs_sig[1], l_trans_bandwidth=1, h_trans_bandwidth=1, fir_design='firwin'),\ filt_params_noise=dict(l_freq=freqs_noise[0], h_freq=freqs_noise[1], l_trans_bandwidth=1, h_trans_bandwidth=1, fir_design='firwin'), sampling_freq=sf, picks=picks, rank="full", n_fft=4096) #%% ssd.fit(raw.copy().crop(0, 120)) #%% ssd_sources = ssd.transform(raw) #%% psd, freqs = mne.time_frequency.psd_array_welch(ssd_sources, sfreq=raw.info['sfreq'], n_fft=4096) # psd, freqs = mne.time_frequency.psd_array_welch( # raw.get_data(), sfreq=raw.info['sfreq'], n_fft=int(np.ceil(raw.info['sfreq']/2))) #%%
parser.add_argument('--weights', default='checkpoints/model_300_VGG16_final_logos.pth.tar', type=str, help='Checkpoint of the model') parser.add_argument('--cuda', default=True, type=str2bool, help='Enable or not cuda') parser.add_argument('--test_filenames', default='test_images/*.jpg', type=str, help='Regex of filenames') args = parser.parse_args() net = SSD(cuda=args.cuda, architecture='300_VGG16', num_classes=len(LogoDataset.CLASSES)) has_cuda = args.cuda and torch.cuda.is_available() if has_cuda: weights = torch.load(args.weights)['model'] else: weights = torch.load(args.weights, map_location='cpu')['model'] net = SSD.load(weights=weights) COLORMAP = [(255, 0, 0), (0, 255, 0), (0, 0, 255)] images = [] images = [cv2.imread(filename) for filename in glob.glob(args.test_filenames)] results = net.predict(images) for im, result_image in zip(images, results):
image_files = sorted(os.listdir(image_dir)) #画像データの読み込みと可視化 annotations_dir = os.path.join('competition_data', 'val_annotations') annotations_files = sorted(os.listdir(annotations_dir)) annotation = [] for file in annotations_files: with open(os.path.join(annotations_dir,file)) as f: data = json.load(f) annotation.append(data) #SSDインポート from ssd import SSD ssd = SSD("ssd_7") models_dir = os.path.join(".", "trained_models") model_path = os.path.join(models_dir, "ssd7.h5") ssd.load_weights(model_path) def plot_bbox(img, gt, out): # グラフサイズの指定 plt.figure(figsize=(11,11)) # 衛星データを表示(BGR->RGB) plt.imshow(img[:,:,::-1]) # 今操作したいaxis(画像)を選択 current_axis = plt.gca() # 正解となるbboxを可視化(赤色で表示)
default=True) parser.add_argument('-n', "--num2show", type=int, help='num img 2 show', default=1) parser.add_argument('-r', "--root", type=str, help='root dir filled with *.jpg', default='VOCdevkit/VOC2007/JPEGImages') parser.add_argument('-i', "--filename", type=str, help='filename', default='') args = parser.parse_args() model = SSD(args.model_path, args.conf, args.cuda) if args.num2show == 1: image = Image.open(os.path.join(args.root, args.filename)) res, cls, score = efficientdet.detect_image(image) print(cls, score) # r_image.show() else: print('结果将会保存到temp.png') files = os.listdir(args.root) idx = [ int(len(os.listdir(args.root)) * random.random()) for i in range(args.num2show) ] imgs = [Image.open(os.path.join(args.root, files[id])) for id in idx]
#-------------------------------------# # 调用摄像头检测 #-------------------------------------# from keras.layers import Input from ssd import SSD from PIL import Image import numpy as np import cv2 ssd = SSD() # 调用摄像头 capture = cv2.VideoCapture(0) # capture=cv2.VideoCapture("1.mp4") while (True): # 读取某一帧 ref, frame = capture.read() # 格式转变,BGRtoRGB frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) # 转变成Image frame = Image.fromarray(np.uint8(frame)) # 进行检测 frame = np.array(ssd.detect_image(frame)) # RGBtoBGR满足opencv显示格式 frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR) cv2.imshow("video", frame) c = cv2.waitKey(30) & 0xff if c == 27: capture.release() break
def main(): parser = argparse.ArgumentParser() parser.add_argument('--channel', type=int, default=DEFAULT_CHANNEL) parser.add_argument('--batchsize', type=int, default=DEFAULT_BATCHSIZE) parser.add_argument('--epoch', type=int, default=DEFAULT_EPOCH) parser.add_argument('--frequency', type=int, default=DEFAULT_FREQUENCY) parser.add_argument('--alpha', type=float, default=DEFAULT_ALPHA) parser.add_argument('--opt', choices=('adam', 'adabound', 'amsgrad', 'amsbound'), default=DEFAULT_OPTIMIZER) parser.add_argument('--gpu', type=int, default=-1) parser.add_argument('--model', default='model') parser.add_argument('--resume', action='store_true', default=False) parser.add_argument('--retrain', action='store_true', default=False) args = parser.parse_args() if args.resume and args.retrain: print('--resume and --retrain are exclusive') exit(1) dataset = Dataset(DEFAULT_DATASET_DIR) n_data = len(dataset) thresh = int(n_data * 0.9 + 0.5) print("{} records found in the dataset. {} records will be used for training".format(n_data, thresh)) n_class = dataset.n_class class_ids = dataset.class_ids class_labels = dataset.class_labels model = SSD(n_class=n_class, n_channel=args.channel, grids=DEFAULT_GRIDS, aspect_ratios=DEFAULT_ASPECT_RATIOS, nms_thresh=DEFAULT_NMS_THRESH, score_thresh=DEFAULT_SCORE_THRESH, variance=DEFAULT_VARIANCE) train_chain = MultiboxTrainChain(model) if args.gpu >= 0: chainer.cuda.get_device_from_id(args.gpu).use() model.to_gpu() train = TransformDataset(dataset[:thresh], Transform(model.coder)) train_iter = chainer.iterators.SerialIterator(train, args.batchsize) test = dataset[thresh:] test_iter = chainer.iterators.SerialIterator(test, args.batchsize, repeat=False, shuffle=False) # ('adam', 'adabound', 'amsgrad', 'amsbound') if args.opt == 'adam': adabound = False amsgrad = False elif args.opt == 'adabound': adabound = True amsgrad = False elif args.opt == 'amsgrad': adabound = False amsgrad = True elif args.opt == 'amsbound': adabound = True amsgrad = True else: raise ValueExcept('invalid optimizer') optimizer = chainer.optimizers.Adam(alpha=args.alpha, adabound=adabound, amsgrad=amsgrad) optimizer.setup(train_chain) updater = training.updaters.StandardUpdater( train_iter, optimizer, device=args.gpu) trainer = training.Trainer(updater, (args.epoch, 'epoch'), args.model) log_interval = 1, 'epoch' trainer.extend(extensions.LogReport(trigger=log_interval)) trainer.extend(extensions.observe_lr(), trigger=log_interval) trainer.extend(extensions.PrintReport( ['epoch', 'iteration', 'lr', 'main/loss', 'main/loss/loc', 'main/loss/conf', 'validation/main/acc', 'elapsed_time']), trigger=log_interval) trainer.extend(extensions.ProgressBar(update_interval=5)) trainer.extend(Evaluator(test_iter, model, device=args.gpu)) trainer.extend(extensions.snapshot(filename='snapshot_epoch_{.updater.epoch}'), trigger=(args.frequency, 'epoch')) trainer.extend(extensions.PlotReport(['main/loss', 'main/loss/loc', 'main/loss/conf'], x_key='epoch', file_name='loss.png')) model_file = os.path.join(args.model, "model.npz") if args.retrain: if not os.path.isfile(model_file): print("{}: not found".format(model_file)) exit(1) print("Loading pretrained model from {}...".format(model_file)) chainer.serializers.load_npz(model_file, model) if args.resume: maxnum = -1 for s in glob.glob(os.path.join(args.model, "snapshot_epoch_*")): m = re.search('[0-9]+$', s) if m: maxnum = max(maxnum, int(m.group(0))) if maxnum < 0: print("No snapshot file found. Ignore --resume option") else: snapshot_file = os.path.join(args.model, "snapshot_epoch_{}".format(maxnum)) print("Loading the snapshot data from {}.".format(snapshot_file)) chainer.serializers.load_npz(snapshot_file, trainer) trainer.run() print("Saving the model to {}.".format(model_file)) chainer.serializers.save_npz(model_file, model) metadata = { 'file': "model.npz", 'n_channel': args.channel, 'n_class': n_class, 'class_labels': class_labels } with open(os.path.join(args.model, "model.json"), "w") as fp: json.dump(metadata, fp, sort_keys=True) return