def iterate(self, data): for key in data: t = data[key] if torch.is_tensor(t): data[key] = t.to(self.device) tensor = data['tensor'] outputs = pybenchmark.profile('inference')(self.inference)(tensor) height, width = data['image'].size()[1:3] loss = pybenchmark.profile('loss')(model.Loss(self.config, data, self.limbs_index, height, width)) losses = [loss(**output) for output in outputs] losses_hparam = [{name: self.loss_hparam(i, name, l) for name, l in loss.items()} for i, loss in enumerate(losses)] loss_total = sum(sum(loss.values()) for loss in losses_hparam) self.optimizer.zero_grad() loss_total.backward() try: clip = self.config.getfloat('train', 'clip') nn.utils.clip_grad_norm(self.inference.parameters(), clip) except configparser.NoOptionError: pass self.optimizer.step() return dict( height=height, width=width, data=data, outputs=outputs, loss_total=loss_total, losses=losses, losses_hparam=losses_hparam, )
def get_model(model_file=os.path.join(args.result_dir, 'mimic-kg-gp.ckpt')): dataset = data_loader.DataBowl(args, phase='valid') args.vocab = dataset.vocab args.relation = dataset.relation net, _ = model.FCModel(args), model.Loss() net = _cuda(net) net.load_state_dict(torch.load(model_file)) return net
def loss(smooth, lam, gamma, loss_type): ''' Parameters: @smooth: number to be added on denominator and numerator when compute dice loss. @lam: weight to balance the dice loss in the final combined loss. @gamma: for focal loss. @loss_type: 'bce' or 'focal'. Return: object for combined loss ''' return model.Loss(smooth, lam, gamma, loss_type)
def main(): dataset = data_loader.DataBowl(args, phase='train') train_loader = DataLoader(dataset, batch_size=args.batch_size, shuffle=True, num_workers=args.workers, pin_memory=True) dataset = data_loader.DataBowl(args, phase='valid') valid_loader = DataLoader(dataset, batch_size=args.batch_size, shuffle=False, num_workers=args.workers, pin_memory=True) args.vocab = dataset.vocab args.relation = dataset.relation # net, loss = model.Net(args), model.Loss() net, loss = model.FCModel(args), model.Loss() net = _cuda(net, 0) loss = _cuda(loss, 0) parameters_all = [] for p in net.parameters(): parameters_all.append(p) optimizer = torch.optim.Adam(parameters_all, args.lr) best_auc = [0, 0, 0, 0, 0, 0] cui_con_dict = {} if args.phase == 'train': for epoch in range(args.epochs): train(train_loader, net, loss, epoch, optimizer, best_auc) best_auc, cui_con_dict = test(valid_loader, net, loss, epoch, best_auc, 'valid', cui_con_dict) print args.words if 1: cons_dir = '../result/cons/{:s}/{:d}'.format( args.model, args.predict_day) py_op.mkdir(cons_dir) num = len(os.listdir(cons_dir)) py_op.mywritejson(os.path.join(cons_dir, '{:d}.json'.format(num)), cui_con_dict) # break print 'best auc', best_auc auc = best_auc[0] with open('../result/log.txt', 'a') as f: f.write('#model {:s} #auc {:3.4f}\n'.format(args.model, auc)) elif args.phase == 'test': net.load_state_dict(torch.load(args.resume)) test(valid_loader, net, loss, 0, best_auc, 'valid', cui_con_dict)
def get_model(model_file, use_kg): dataset = data_loader.DataBowl(args, phase='valid') args.vocab = dataset.vocab args.relation = dataset.relation net, _ = model.FCModel(args, use_kg), model.Loss() net = _cuda(net) # return net try: net.load_state_dict(torch.load(model_file)) except: # print(os.path.exists(model_file)) d = torch.load(model_file, map_location=torch.device('cpu')) for k, v in d.items(): d[k] = v.cpu() # print(k, type(v)) net.load_state_dict(d) return net
def main(): """ main(): the default wrapper for training and inference process Please prepare config.py and model.py """ # arguments initialization args = nii_arg_parse.f_args_parsed() # initialization torch.manual_seed(args.seed) use_cuda = not args.no_cuda and torch.cuda.is_available() device = torch.device("cuda" if use_cuda else "cpu") # prepare data io if not args.inference: params = { 'batch_size': args.batch_size, 'shuffle': args.shuffle, 'num_workers': args.num_workers } # Load file list and create data loader trn_lst = nii_list_tool.read_list_from_text(prj_conf.trn_list) trn_set = nii_dset.NIIDataSetLoader( prj_conf.trn_set_name, \ trn_lst, prj_conf.input_dirs, \ prj_conf.input_exts, \ prj_conf.input_dims, \ prj_conf.input_reso, \ prj_conf.input_norm, \ prj_conf.output_dirs, \ prj_conf.output_exts, \ prj_conf.output_dims, \ prj_conf.output_reso, \ prj_conf.output_norm, \ './', params = params, truncate_seq = prj_conf.truncate_seq, min_seq_len = prj_conf.minimum_len, save_mean_std = True, wav_samp_rate = prj_conf.wav_samp_rate) if prj_conf.val_list is not None: val_lst = nii_list_tool.read_list_from_text(prj_conf.val_list) val_set = nii_dset.NIIDataSetLoader( prj_conf.val_set_name, val_lst, prj_conf.input_dirs, \ prj_conf.input_exts, \ prj_conf.input_dims, \ prj_conf.input_reso, \ prj_conf.input_norm, \ prj_conf.output_dirs, \ prj_conf.output_exts, \ prj_conf.output_dims, \ prj_conf.output_reso, \ prj_conf.output_norm, \ './', \ params = params, truncate_seq= prj_conf.truncate_seq, min_seq_len = prj_conf.minimum_len, save_mean_std = False, wav_samp_rate = prj_conf.wav_samp_rate) else: val_set = None # initialize the model and loss function model = prj_model.Model(trn_set.get_in_dim(), \ trn_set.get_out_dim(), \ args, trn_set.get_data_mean_std()) loss_wrapper = prj_model.Loss(args) # initialize the optimizer optimizer_wrapper = nii_op_wrapper.OptimizerWrapper(model, args) # if necessary, resume training if args.trained_model == "": checkpoint = None else: checkpoint = torch.load(args.trained_model) # start training nii_nn_wrapper.f_train_wrapper(args, model, loss_wrapper, device, optimizer_wrapper, trn_set, val_set, checkpoint) # done for traing else: # for inference # default, no truncating, no shuffling params = { 'batch_size': args.batch_size, 'shuffle': False, 'num_workers': args.num_workers } if type(prj_conf.test_list) is list: t_lst = prj_conf.test_list else: t_lst = nii_list_tool.read_list_from_text(prj_conf.test_list) test_set = nii_dset.NIIDataSetLoader( prj_conf.test_set_name, \ t_lst, \ prj_conf.test_input_dirs, prj_conf.input_exts, prj_conf.input_dims, prj_conf.input_reso, prj_conf.input_norm, prj_conf.test_output_dirs, prj_conf.output_exts, prj_conf.output_dims, prj_conf.output_reso, prj_conf.output_norm, './', params = params, truncate_seq = None, min_seq_len = None, save_mean_std = False, wav_samp_rate = prj_conf.wav_samp_rate) # initialize model model = prj_model.Model(test_set.get_in_dim(), \ test_set.get_out_dim(), \ args) if args.trained_model == "": print("No model is loaded by ---trained-model for inference") print("By default, load %s%s" % (args.save_trained_name, args.save_model_ext)) checkpoint = torch.load( "%s%s" % (args.save_trained_name, args.save_model_ext)) else: checkpoint = torch.load(args.trained_model) # do inference and output data nii_nn_wrapper.f_inference_wrapper(args, model, device, \ test_set, checkpoint) # done return
def main(): config = tf.compat.v1.ConfigProto() config.gpu_options.allow_growth = True session = tf.compat.v1.Session(config=config) # K.set_epsilon(1e-4) # K.backend.set_floatx('float16') model = mnet.MNET_complete(416, training=True) anchors = mnet.mnet_anchors anchor_masks = mnet.mnet_anchor_masks batch_size = 8 # Get the training set train_dataset = dataset.load_tfrecord_dataset(train_path) # Allows you to filter out a class of one type, doesnt seem to work though @tf.function def filter_person(x , y): if tf.reduce_any(tf.math.equal(tf.constant([1]), tf.cast(y, dtype='int32'))): return False else: return True # train_dataset = train_dataset.filter(filter_person) train_dataset = train_dataset.shuffle(buffer_size=1024) train_dataset = train_dataset.batch(batch_size) train_dataset = train_dataset.map(lambda x, y: ( dataset.transform_images(x, 416), dataset.transform_targets(y, anchors, anchor_masks, 80))) train_dataset = train_dataset.prefetch( buffer_size=tf.data.experimental.AUTOTUNE) val_dataset = dataset.load_tfrecord_dataset(val_path) val_dataset = val_dataset.batch(batch_size) val_dataset = val_dataset.map(lambda x, y: ( dataset.transform_images(x, 416), dataset.transform_targets(y, anchors, anchor_masks, 80))) optimizer = tf.keras.optimizers.Adam(lr = 1e-3) loss = [mnet.Loss(anchors[mask], classes = 80) for mask in anchor_masks] mAP = [mnet.map(anchors[mask]) for mask in anchor_masks] avg_loss = tf.keras.metrics.Mean('loss', dtype=tf.float32) avg_val_loss = tf.keras.metrics.Mean('val_loss', dtype=tf.float32) eager = False logdir = "logs/" + datetime.now().strftime("%Y-%m-%d-%H:%M") tensorboard_callback = K.callbacks.TensorBoard(log_dir=logdir) # model.compile(optimizer=optimizer, loss=loss, run_eagerly=(False)) model.compile(optimizer=optimizer, loss=loss, run_eagerly=(False), metrics=[*mAP]) callbacks = [ # ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=3, verbose=1), EarlyStopping(monitor='val_loss', patience=3, verbose=1), ModelCheckpoint('checkpoints/mnet_train_{epoch}_' + datetime.now().strftime("%m-%d") + '.tf', verbose=1, save_weights_only=True), tensorboard_callback] history = model.fit(train_dataset, validation_data=val_dataset, epochs=100, callbacks=callbacks, validation_steps=int(val_size/batch_size))
def trainer(cfgs, train_dataset): dataloader = torch.utils.data.DataLoader(train_dataset, batch_size=cfgs['batch_size'], shuffle=True, num_workers=0) # model net = model.DRNet(cfgs).train() # loss criterion = model.Loss(cfgs) # optimal if cfgs['method'] == 'Adam': optimizer = torch.optim.Adam([{ 'params': net.parameters() }, { 'params': criterion.parameters() }], weight_decay=cfgs['weight_decay']) elif cfgs['method'] == 'SGD': optimizer = torch.optim.SGD([{ 'params': net.parameters() }, { 'params': criterion.parameters() }], lr=cfgs['lr'], momentum=cfgs['momentum'], weight_decay=cfgs['weight_decay']) # GPU device = torch.device( cfgs['device'] if torch.cuda.is_available() else "cpu") net.to(device) criterion.to(device) # train for epoch in range( cfgs['max_epoch']): # loop over the dataset multiple times model.learning_rate_decay(optimizer, epoch, decay_rate=cfgs['decay_rate'], decay_steps=cfgs['decay_steps']) running_loss = 0.0 data_collects = [] idx = 0 for i, data in enumerate(dataloader, 0): data_collects.append(data) if len(data_collects ) == cfgs['acc_grad'] or i == train_dataset.length - 1: duration, loss = acc_grad(net, criterion, optimizer, data_collects, device) data_collects.clear() idx += cfgs['batch_size'] running_loss += loss else: continue print_epoch = 10 * cfgs['acc_grad'] if i % print_epoch == print_epoch - 1: examples_per_sec = 10 / duration sec_per_batch = float(duration) format_str = '%s: step [%d, %5d], loss = %.3f (%.1f examples/sec; %.3f sec/batch)' print(format_str % (datetime.now(), epoch + 1, i + 1, running_loss / 10, examples_per_sec, sec_per_batch)) running_loss = 0.0 save_name = utils.cfgs2name(cfgs) if not os.path.exists(save_name): os.mkdir(save_name) torch.save(net.state_dict(), './' + save_name + '/' + save_name + '.pth') print('Finished Training')
def train(net, batch_size, epochs, lr): print('-' * 30) print(" HYPERPARAMETERS ") print('-' * 30) print("Batch size = ", batch_size) print("Epochs = ", epochs) print("Learning rate = ", lr) print('-' * 30) # function to call in data train_loader, test_loader = data.getTrainingSets(filename1, filename2, batch_size) # Create loss and optimizer functions loss = model.Loss() optimizer = model.Optimizer(net, lr) trainingStartTime = time.time() # Start training totalStep = len(train_loader) losses = [] val_losses = [] for epoch in range(epochs): runningLoss = 0.0 totalTrainLoss = 0.0 startTime = time.time() for i, (input, target) in enumerate(train_loader): input = Variable(input) target = Variable(target) outputs = net(input) loss_size = loss(outputs, target) losses.append(loss_size.item()) optimizer.zero_grad() loss_size.backward() optimizer.step() runningLoss += loss_size.item() totalTrainLoss += loss_size.item() for i, (pred, real) in enumerate(test_loader): pred = Variable(pred) real = Variable(real) val_outputs = net(pred) val_loss_size = loss(val_outputs, real) val_losses.append(val_loss_size.item()) print('Epoch: {}/{}, Loss: {:.4f}, Val loss: {:0.4f}, Time: {:0.2f}s'. format(epoch + 1, epochs, loss_size.item(), val_loss_size.item(), time.time() - startTime)) return losses, val_losses