raise ValueError(f'{args.dataset_dir} does not exist.') if torch.cuda.device_count() == 0: device = torch.device("cpu") print('[Training] Running on CPU.') else: device = torch.device("cuda:0") print('[Training] Running on GPU.') image_files = read_instances_with_box(args.mask_dir, args.dataset_dir, args.box_file) print('Instantiating neural network...') cf = Configuration() if args.test_run: cf.batch_size = 2 cf.batches_per_print = 1 cf.epoches_per_save = 1 net = UNet(has_sigmoid=cf.has_sigmoid, multiplier=cf.multiplier).float() # criterion = torch.nn.BCELoss() # criterion = torch.nn.BCEWithLogitsLoss(pos_weight=torch.tensor([cf.pos_weight])) criterion = torch.nn.BCEWithLogitsLoss( pos_weight=torch.tensor([cf.pos_weight]).to(device)) if cf.optimizer == 'adam': print('Using Adam optimizer.') optimizer = optim.Adam(net.parameters(), lr=cf.learning_rate) else: print('Using SGD optimizer.') optimizer = optim.SGD(net.parameters(),
def run_model(mode, path, in_file, o_file): global feature, encoder, indp, crf, mldecoder, rltrain, f_opt, e_opt, i_opt, c_opt, m_opt, r_opt cfg = Configuration() #General mode has two values: 'train' or 'test' cfg.mode = mode #Set Random Seeds random.seed(cfg.seed) np.random.seed(cfg.seed) torch.manual_seed(cfg.seed) if hasCuda: torch.cuda.manual_seed_all(cfg.seed) #Load Embeddings load_embeddings(cfg) #Only for testing if mode == 'test': cfg.test_raw = in_file #Construct models feature = Feature(cfg) if cfg.model_type == 'AC-RNN': f_opt = optim.SGD(ifilter(lambda p: p.requires_grad, feature.parameters()), lr=cfg.actor_step_size) else: f_opt = optim.Adam(ifilter(lambda p: p.requires_grad, feature.parameters()), lr=cfg.learning_rate) if hasCuda: feature.cuda() encoder = Encoder(cfg) if cfg.model_type == 'AC-RNN': e_opt = optim.SGD(ifilter(lambda p: p.requires_grad, encoder.parameters()), lr=cfg.actor_step_size) else: e_opt = optim.Adam(ifilter(lambda p: p.requires_grad, encoder.parameters()), lr=cfg.learning_rate) if hasCuda: encoder.cuda() if cfg.model_type == 'INDP': indp = INDP(cfg) i_opt = optim.Adam(ifilter(lambda p: p.requires_grad, indp.parameters()), lr=cfg.learning_rate) if hasCuda: indp.cuda() elif cfg.model_type == 'CRF': crf = CRF(cfg) c_opt = optim.Adam(ifilter(lambda p: p.requires_grad, crf.parameters()), lr=cfg.learning_rate) if hasCuda: crf.cuda() elif cfg.model_type == 'TF-RNN': mldecoder = MLDecoder(cfg) m_opt = optim.Adam(ifilter(lambda p: p.requires_grad, mldecoder.parameters()), lr=cfg.learning_rate) if hasCuda: mldecoder.cuda() cfg.mldecoder_type = 'TF' elif cfg.model_type == 'SS-RNN': mldecoder = MLDecoder(cfg) m_opt = optim.Adam(ifilter(lambda p: p.requires_grad, mldecoder.parameters()), lr=cfg.learning_rate) if hasCuda: mldecoder.cuda() cfg.mldecoder_type = 'SS' elif cfg.model_type == 'AC-RNN': mldecoder = MLDecoder(cfg) m_opt = optim.SGD(ifilter(lambda p: p.requires_grad, mldecoder.parameters()), lr=cfg.actor_step_size) if hasCuda: mldecoder.cuda() cfg.mldecoder_type = 'TF' rltrain = RLTrain(cfg) r_opt = optim.Adam(ifilter(lambda p: p.requires_grad, rltrain.parameters()), lr=cfg.learning_rate, weight_decay=0.001) if hasCuda: rltrain.cuda() cfg.rltrain_type = 'AC' #For RL, the network should be pre-trained with teacher forced ML decoder. feature.load_state_dict(torch.load(path + 'TF-RNN' + '_feature')) encoder.load_state_dict(torch.load(path + 'TF-RNN' + '_encoder')) mldecoder.load_state_dict(torch.load(path + 'TF-RNN' + '_predictor')) if mode == 'train': o_file = './temp.predicted_' + cfg.model_type best_val_cost = float('inf') best_val_epoch = 0 first_start = time.time() epoch = 0 while (epoch < cfg.max_epochs): print print 'Model:{} | Epoch:{}'.format(cfg.model_type, epoch) if cfg.model_type == 'SS-RNN': #Specify the decaying schedule for sampling probability. #inverse sigmoid schedule: cfg.sampling_p = float( cfg.k) / float(cfg.k + np.exp(float(epoch) / cfg.k)) start = time.time() run_epoch(cfg) print '\nValidation:' predict(cfg, o_file) val_cost = 100 - evaluate(cfg, cfg.dev_ref, o_file) print 'Validation score:{}'.format(100 - val_cost) if val_cost < best_val_cost: best_val_cost = val_cost best_val_epoch = epoch torch.save(feature.state_dict(), path + cfg.model_type + '_feature') torch.save(encoder.state_dict(), path + cfg.model_type + '_encoder') if cfg.model_type == 'INDP': torch.save(indp.state_dict(), path + cfg.model_type + '_predictor') elif cfg.model_type == 'CRF': torch.save(crf.state_dict(), path + cfg.model_type + '_predictor') elif cfg.model_type == 'TF-RNN' or cfg.model_type == 'SS-RNN': torch.save(mldecoder.state_dict(), path + cfg.model_type + '_predictor') elif cfg.model_type == 'AC-RNN': torch.save(mldecoder.state_dict(), path + cfg.model_type + '_predictor') torch.save(rltrain.state_dict(), path + cfg.model_type + '_critic') #For early stopping if epoch - best_val_epoch > cfg.early_stopping: break ### print 'Epoch training time:{} seconds'.format(time.time() - start) epoch += 1 print 'Total training time:{} seconds'.format(time.time() - first_start) elif mode == 'test': cfg.batch_size = 256 feature.load_state_dict(torch.load(path + cfg.model_type + '_feature')) encoder.load_state_dict(torch.load(path + cfg.model_type + '_encoder')) if cfg.model_type == 'INDP': indp.load_state_dict( torch.load(path + cfg.model_type + '_predictor')) elif cfg.model_type == 'CRF': crf.load_state_dict( torch.load(path + cfg.model_type + '_predictor')) elif cfg.model_type == 'TF-RNN' or cfg.model_type == 'SS-RNN': mldecoder.load_state_dict( torch.load(path + cfg.model_type + '_predictor')) elif cfg.model_type == 'AC-RNN': mldecoder.load_state_dict( torch.load(path + cfg.model_type + '_predictor')) rltrain.load_state_dict( torch.load(path + cfg.model_type + '_critic')) print print 'Model:{} Predicting'.format(cfg.model_type) start = time.time() predict(cfg, o_file) print 'Total prediction time:{} seconds'.format(time.time() - start) return
print(f"Box for {bname} does not exist. Skipping.") continue image_file = os.path.join(args.dataset_dir, bname) if not os.path.exists(image_file): print(f"{image_file} does not exist. Skipping.") continue image_files.append((image_file, mask_file, box)) # Random shuffle if args.max_images > 0: if args.random_seed > 0: random.Random(args.random_seed).shuffle(image_files) image_files = image_files[0:args.max_images] configs = Configuration() configs.batch_size = 1 # For memory considerations dataset = COCOTextSegmentationDataset(image_files, configs.im_size, configs.random_scale, configs.random_displacement, configs.random_flip) dataloader = DataLoader(dataset, batch_size=configs.batch_size, shuffle=False, num_workers=0) os.makedirs(args.output_dir, exist_ok=True) for i_batch, sample_batched in enumerate(dataloader): for i in range(configs.batch_size): if i >= sample_batched['image'].shape[0]: break img = sample_batched['image'][i].numpy().transpose((1,2,0)) mask = sample_batched['mask'][i].numpy().transpose((1,2,0)) overlay_img = draw_overlay_image(img, mask, threshold=0.5)