def load_model(): global file_num file_num = 0 global class_model class_model = feature_extractor() model_path = './class_model.pki' tmp = torch.load(model_path, map_location={'cuda:0': 'cpu'}) class_model.load_state_dict(tmp) class_model.eval() del tmp global object_model object_model = SSD(depth=50, width=1) model_path = './ssd_patch.pki' tmp = torch.load(model_path, map_location={'cuda:0': 'cpu'}) object_model.load_state_dict(tmp) object_model.eval()
def initialize_net() -> None: global ssd_net # if already defined, return it if ssd_net is not None: print('use cached ssd_net') return ssd_net # get device ( cpu / gpu ) to be used use_cuda = torch.cuda.is_available() device = torch.device("cuda" if use_cuda else "cpu") print(f'device : {device}') ssd_cfg = { 'num_classes': num_classes, # number of classes including background class 'input_size': Parameters.IMG_SIZE, 'bbox_aspect_num': Parameters.BBOX_ASPECT_NUM, 'feature_maps': Parameters.FEATURE_MAPS, 'steps': Parameters.STEPS, 'min_sizes': Parameters.MIN_SIZES, 'max_sizes': Parameters.MAX_SIZES, 'aspect_ratios': Parameters.ASPECT_RATIOS, 'conf_thresh': Parameters.CONF_THRESHOLD, 'top_k': Parameters.TOP_K, 'nms_thresh': Parameters.NMS_THRESHOLD } print(f'initializing ssd with : {ssd_cfg}') ssd_net = SSD(phase="inference", cfg=ssd_cfg) # load weight created in training weight_file_path = os.path.join(Parameters.ABEJA_TRAINING_RESULT_DIR, 'model.pth') print(f'weight_file_path : {weight_file_path}') # cf. https://pytorch.org/tutorials/beginner/saving_loading_models.html#save-on-gpu-load-on-gpu weight = torch.load(weight_file_path, map_location=device) ssd_net.load_state_dict(weight) ssd_net = ssd_net.to(device) ssd_net.eval() return ssd_net
#元画像の表示 plt.imshow(cv2.cvtColor(img, cv2.COLOR_BGR2RGB)) plt.show() #前処理クラスの作成 color_mean = (104, 117, 123) #(BGR)の色の平均値 input_size = 300 #画像のinputサイズを300×300にする transform = DataTransform(input_size, color_mean) #前処理 phase = 'val' img_transformed, boxes, labels = transform(img, phase, "", "") #アノテーションはないので、""にする img = torch.from_numpy(img_transformed[:, :, (2, 1, 0)]).permute(2, 0, 1) #SSDで予測 net.eval() #ネットワークを推論モードに x = img.unsqueeze(0) #ミニバッチ化:torch.Size([1, 3, 300, 300]) detections = net(x) print(detections.shape) print(detections) #output : torch.Size([batch_num, 21, 200, 5]) # = (batch_num, クラス, confのtop200, 規格化されたBBoxの情報) #規格化されたBBoxの情報 (確信度, xmin, ymin, xmax, ymax) #ファイルパス image_file_path = "./data/cowboy-757575_640.jpg" #予測と、予測結果を画像で描画する. ssd = SSDPredictShow(eval_categories=voc_classes, net=net)
def handler(context): print( f'start training with parameters : {Parameters.as_dict()}, context : {context}' ) try: dataset_alias = context.datasets # for older version except AttributeError: dataset_alias = context['datasets'] train_dataset_id, val_dataset_id = get_dataset_ids(dataset_alias) id2index, _ = set_categories(list(dataset_alias.values())) num_classes = len(id2index) num_classes += 1 # add for background class print(f'number of classes : {num_classes}') print("Start downloading datasets.") dataset_items = list( load_dataset_from_api(train_dataset_id, max_num=Parameters.MAX_ITEMS)) print("Finish downloading datasets.") random.shuffle(dataset_items) if val_dataset_id is not None: val_dataset_items = list( load_dataset_from_api(val_dataset_id, max_num=Parameters.MAX_ITEMS)) random.shuffle(val_dataset_items) train_dataset_items = dataset_items else: test_size = int(len(dataset_items) * Parameters.TEST_SIZE) train_dataset_items, val_dataset_items = dataset_items[ test_size:], dataset_items[:test_size] train_dataset = ABEJAPlatformDataset(train_dataset_items, phase="train", transform=DataTransform( Parameters.IMG_SIZE, Parameters.MEANS)) val_dataset = ABEJAPlatformDataset(val_dataset_items, phase="val", transform=DataTransform( Parameters.IMG_SIZE, Parameters.MEANS)) print(f'train dataset : {len(train_dataset)}') print(f'val dataset : {len(val_dataset)}') train_dataloader = data.DataLoader(train_dataset, batch_size=Parameters.BATCH_SIZE, shuffle=Parameters.SHUFFLE, collate_fn=od_collate_fn) val_dataloader = data.DataLoader(val_dataset, batch_size=Parameters.BATCH_SIZE, shuffle=False, collate_fn=od_collate_fn) dataloaders_dict = {"train": train_dataloader, "val": val_dataloader} print(f'data loaders : {dataloaders_dict}') ssd_cfg = { 'num_classes': num_classes, # number of classes including background class 'input_size': Parameters.IMG_SIZE, 'bbox_aspect_num': Parameters.BBOX_ASPECT_NUM, 'feature_maps': Parameters.FEATURE_MAPS, 'steps': Parameters.STEPS, 'min_sizes': Parameters.MIN_SIZES, 'max_sizes': Parameters.MAX_SIZES, 'aspect_ratios': Parameters.ASPECT_RATIOS, 'conf_thresh': Parameters.CONF_THRESHOLD, 'top_k': Parameters.TOP_K, 'nms_thresh': Parameters.NMS_THRESHOLD } net = SSD(phase="train", cfg=ssd_cfg) # TODO: better to host this file by ourselves # https://github.com/amdegroot/ssd.pytorch#training-ssd url = 'https://s3.amazonaws.com/amdegroot-models/vgg16_reducedfc.pth' weight_file = os.path.join(Parameters.ABEJA_TRAINING_RESULT_DIR, 'vgg16_reducedfc.pth') download(url, weight_file) vgg_weights = torch.load(weight_file) print('finish loading base network...') net.vgg.load_state_dict(vgg_weights) def weights_init(m): if isinstance(m, nn.Conv2d): init.kaiming_normal_(m.weight.data) if m.bias is not None: # in case of bias nn.init.constant_(m.bias, 0.0) # apply initial values of He net.extras.apply(weights_init) net.loc.apply(weights_init) net.conf.apply(weights_init) # configure loss function criterion = MultiBoxLoss(jaccard_thresh=Parameters.OVERLAP_THRESHOLD, neg_pos=Parameters.NEG_POS, device=device) # configure optimizer optimizer = optim.SGD(net.parameters(), lr=Parameters.LR, momentum=Parameters.MOMENTUM, dampening=Parameters.DAMPENING, weight_decay=Parameters.WEIGHT_DECAY, nesterov=Parameters.NESTEROV) # move network to device net.to(device) # NOTE: This flag allows to enable the inbuilt cudnn auto-tuner # to find the best algorithm to use for your hardware. # cf. https://discuss.pytorch.org/t/what-does-torch-backends-cudnn-benchmark-do/5936/2 torch.backends.cudnn.benchmark = True iteration = 1 epoch_train_loss = 0.0 epoch_val_loss = 0.0 latest_epoch_train_loss = epoch_train_loss latest_epoch_val_loss = epoch_val_loss for epoch in range(Parameters.EPOCHS): t_epoch_start = time.time() t_iter_start = time.time() print('-------------') print('Epoch {}/{}'.format(epoch + 1, Parameters.EPOCHS)) print('-------------') # loop of train and validation for each epoch for phase in ['train', 'val']: if phase == 'train': net.train() print('(train)') else: if (epoch + 1) % 10 == 0: net.eval() print('-------------') print('(val)') else: # perform validation once every ten times continue # loop each mini-batch from data loader for images, targets in dataloaders_dict[phase]: images = images.to(device) targets = [ann.to(device) for ann in targets] # initialize optimizer optimizer.zero_grad() # calculate forward with torch.set_grad_enabled(phase == 'train'): outputs = net(images) # calculate loss loss_l, loss_c = criterion(outputs, targets) loss = loss_l + loss_c if phase == 'train': # back propagate when training loss.backward() # calculate gradient nn.utils.clip_grad_value_( net.parameters(), clip_value=Parameters.CLIP_VALUE) optimizer.step() # update parameters if iteration % 10 == 0: # display loss once every ten iterations t_iter_finish = time.time() duration = t_iter_finish - t_iter_start print( 'iter {} || Loss: {:.4f} || 10iter: {:.4f} sec.' .format(iteration, loss.item(), duration)) t_iter_start = time.time() epoch_train_loss += loss.item() iteration += 1 else: epoch_val_loss += loss.item() # loss and accuracy rate of each phase of epoch t_epoch_finish = time.time() # keep latest epoch loss if epoch_train_loss != 0.0: num_total = len(dataloaders_dict['train']) latest_epoch_train_loss = epoch_train_loss / num_total if epoch_val_loss != 0.0: num_total = len(dataloaders_dict['val']) latest_epoch_val_loss = epoch_val_loss / num_total print('-------------') print('epoch {} || Epoch_TRAIN_Loss:{:.4f} || Epoch_VAL_Loss:{:.4f}'. format(epoch + 1, latest_epoch_train_loss, latest_epoch_val_loss)) print('timer: {:.4f} sec.'.format(t_epoch_finish - t_epoch_start)) t_epoch_start = time.time() statistics(epoch + 1, latest_epoch_train_loss, None, latest_epoch_val_loss, None) writer.add_scalar('main/loss', latest_epoch_train_loss, epoch + 1) if (epoch + 1) % 10 == 0: writer.add_scalar('test/loss', latest_epoch_val_loss, epoch + 1) model_path = os.path.join(Parameters.ABEJA_TRAINING_RESULT_DIR, f'ssd300_{str(epoch + 1)}.pth') torch.save(net.state_dict(), model_path) writer.flush() epoch_train_loss = 0.0 epoch_val_loss = 0.0 torch.save(net.state_dict(), os.path.join(Parameters.ABEJA_TRAINING_RESULT_DIR, 'model.pth')) writer.close()
def train(train_file, test_file, num_epoch): use_gpu = torch.cuda.is_available() Loss = MultiBoxLoss_2() ## loss learning_rate = 0.01 num_epochs = num_epoch batch_size = 4 model = SSD(depth=50, width=1) #optimizer = torch.optim.SGD([{"params":model.parameters()}], lr=learning_rate, momentum=0.9, weight_decay=5e-4) optimizer = torch.optim.Adam([{ "params": model.parameters() }], lr=learning_rate) scheduler = ReduceLROnPlateau(optimizer) if use_gpu: model.cuda() model.train() train_dataset = ListDataset(root='GUN/WeaponS/', list_file=train_file, train=True, transform=transforms.ToTensor()) train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True, num_workers=2) test_dataset = ListDataset(root='GUN/WeaponS/', list_file=test_file, train=True, transform=transforms.ToTensor()) test_loader = DataLoader(test_dataset, batch_size=16, shuffle=True, num_workers=2) for epoch in range(num_epochs): t1 = time.time() model.train() total_loss, valid_loss = 0, 0 # Adjust learninig rate ## train model print("Train {} epoch: ".format(epoch + 1)) for i, (imgs, loc, conf) in enumerate(train_loader): imgs, loc, conf = Variable(imgs), Variable(loc), Variable(conf) if use_gpu: imgs = imgs.cuda() loc = loc.cuda() conf = conf.cuda() loc_pred, con_pred = model(imgs) loss = Loss(loc_pred, loc, con_pred, conf) total_loss += loss.item() #loss = conf_loss + loc_loss optimizer.zero_grad() loss.backward() optimizer.step() #print('Training progress %.1f %%' %(100*(i+1)/len(train_loader)), end='') #print('loc loss: ', loc_loss_total/len(train_loader)) #print('conf loss: ', conf_loss_total/len(train_loader)) print('\rEpoch [%d/%d], Training loss: %.4f' % (epoch + 1, num_epochs, total_loss / len(train_loader)), end='\n') ## test model model.eval() with torch.no_grad(): for i, (imgs, loc, conf) in enumerate(test_loader): imgs, loc, conf = Variable(imgs), Variable(loc), Variable(conf) if use_gpu: imgs = imgs.cuda() loc = loc.cuda() conf = conf.cuda() loc_pred, con_pred = model(imgs) loss = Loss(loc_pred, loc, con_pred, conf) valid_loss += loss.item() #print('Validing progress %.1f %%' %(100*(i+1)/len(test_loader)), end='') print('\rEpoch [%d/%d], Validing loss: %.4f' % (epoch + 1, num_epochs, valid_loss / len(test_loader)), end='\n') print('\n') scheduler.step(valid_loss) t2 = time.time() #print('epoch escape time %f secs' %t2-t1) # Save model #PATH_1 = 'drive/My Drive/BootCamp4/SSD/ssd_2.pki' #torch.save(model, PATH_1) PATH = 'drive/My Drive/BootCamp4/SSD/ssd_state_dict.pki' torch.save(model.state_dict(), PATH)