def main(args): database_filepath= args.database_filepath model_filepath = args.model_filepath model_name = args.model_name save_results = args.save_results == "yes" logging.info('Loading data...\n DATABASE: {}'.format(database_filepath)) X, Y, category_names = load_data(database_filepath) X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2) logging.info('Build model: {}'.format(model_name)) model = build_model(model_name) logging.info('Training model on {} training samples'.format(X_train.shape)) model.fit(X_train, Y_train) logging.info('Evaluating model...') evaluate_model(model, X_test, Y_test, category_names, save_results, model_filepath) logging.info('Saving model...\n MODEL: {}'.format(model_name)) save_model(model, model_filepath, model_name) logging.info('Trained model saved!')
def build_with(filename, oldtype, nres): newp = convert_netw_type(oldtype) type, wbits, abits = get_netw_type_wbits_abits(newp) print(type, wbits, abits) cf = { "architecture": "RESNET", "dim": 32, "channels": 3, "classes": 10, "nres": nres, "kernel_initializer": 'he_normal', "kernel_regularizer": 1e-4, "dataset": "CIFAR-10", "network_type": type, "wbits": wbits, "abits": abits, "pfilt": 1 } cf = obj(cf) model = build_model(cf) wname = filename model.load_weights(wname) # loss = 'categorical_crossentropy' # model.compile(loss=loss, optimizer='Adam', metrics=['accuracy']) train_data, val_data, test_data = load_dataset("CIFAR-10", cf) for la in range(20, 21): layer_name = f"conv2d_{la+1}" intermediate_layer_model = Model( inputs=model.input, outputs=model.get_layer(layer_name).output) # import matplotlib.pyplot as plt # import matplotlib.image as mpimg # imgplot = plt.imshow(test_data.X[0]) # plt.show() # score = intermediate_layer_model.predict(np.array([np.ones(test_data.X[0].shape)]), verbose=0) score = intermediate_layer_model.predict(test_data.X, verbose=1) # print(score[0]) for elem in range(score.shape[0]): for i in range(score.shape[1]): for j in range(score.shape[2]): for k in range(score.shape[3]): if abs(score[elem][i][j][k]) > 63: print( f"SO SAAAAAAAAAd: element:{elem} layer:{la+1} value:{abs(score[elem][i][j][k])}" ) break # print(f"{score[0][i][j][k]:2.1}", end=' ') # print('') # print('Test loss:', score[0]) # print('Test accuracy:', score[1]) # print(score) return score #[1]
def train(): ds_train = DataSource('audio_files/speech/train_data.npy', 'audio_files/speech/train_labels.npy', classes_dict='audio_files/speech/classes') ds_eval = DataSource('audio_files/speech/eval_data.npy', 'audio_files/speech/eval_labels.npy', classes_dict='audio_files/speech/classes') ds_test = DataSource('audio_files/speech/test_data.npy', 'audio_files/speech/test_labels.npy', classes_dict='audio_files/speech/classes') model = mf.build_model('CNN', input_shape=ds_train.input_shape, num_classes = ds_train.num_classes) optimizer = Optimizer('Adam') EPOCHS = 1 for epoch in range(EPOCHS): train_losses = [] train_accuracy = 0 train_instances = 0 for (batch, (img_tensor, label)) in enumerate(ds_train.dataset): train_loss, predicted = optimizer.compute_and_apply_gradients(model, img_tensor, label) train_losses.append(train_loss) train_accuracy += sum(np.argmax(predicted, axis=1) == label.numpy()) train_instances += label.numpy().shape[0] # print ('Epoch {} Batch {} Train Loss {:.6f}'.format(epoch + 1, batch + 1, sum(train_losses)/len(train_losses))) eval_losses = [] accuracy = 0 instances = 0 for (batch, (img_tensor, label)) in enumerate(ds_eval.dataset): eval_loss, predicted = model.compute_loss(img_tensor, label, training=False) accuracy += sum(np.argmax(predicted, axis=1) == label.numpy()) instances += label.numpy().shape[0] eval_losses.append(eval_loss) print ('Epoch {} Train Accuracy: {:.6f} | Test Accuracy: {:.6f}'.format(epoch +1, train_accuracy/train_instances, accuracy/instances)) print ('Epoch {} Train Loss {:.6f} | Eval Loss {:.6f}'.format(epoch + 1, sum(train_losses)/len(train_losses), sum(eval_losses)/len(eval_losses))) eval_losses = [] accuracy = 0 instances = 0 for (batch, (img_tensor, label)) in enumerate(ds_test.dataset): eval_loss, predicted = model.compute_loss(img_tensor, label, training=False) accuracy += sum(np.argmax(predicted, axis=1) == label.numpy()) instances += label.numpy().shape[0] eval_losses.append(eval_loss) print ('Epoch {} Train Accuracy: {:.6f} | Test Accuracy: {:.6f}'.format(epoch +1, train_accuracy/train_instances, accuracy/instances)) print ('Epoch {} Train Loss {:.6f} | Eval Loss {:.6f}'.format(epoch + 1, sum(train_losses)/len(train_losses), sum(eval_losses)/len(eval_losses))) import ipdb; ipdb.set_trace() saver = tf.train.Checkpoint(model=model, optimizer=optimizer.optimizer) saver.save('/home/marianne/deep-audio/saver/save') tfjs.converters.convert_tf_saved_model('/home/marianne/deep-audio/saver/save.ckpt', output_node_names=['test'], output_dir='/home/marianne/deep-audio/')
def _build_model(self): super()._build_model() self.bias_predictor = build_model( self.option, self.option.bias_predictor_name, in_dims=self.option.bias_predictor_in_dims, hid_dims=self.option.bias_predictor_hid_dims, out_dims=self.option.num_bias_classes) logging.getLogger().info(f"Bias predictor {self.bias_predictor}") if self.option.cuda: self.model.cuda() self.bias_predictor.cuda()
def _build_model(self): super()._build_model() self.bias_model = build_model(self.option, self.option.model_name, out_dims=self.option.num_classes, in_dims=self.option.in_dims, hid_dims=self.option.hid_dims, freeze_layers=self.option.freeze_layers) logging.getLogger().info("Bias model") logging.getLogger().info(self.bias_model) self.bias_amplification_loss = GCELoss(q=self.option.bias_loss_gamma) if self.option.cuda: self.model.cuda() self.bias_model.cuda() self.loss.cuda()
def _build_model(self): """ Constructs the model using the model factory :return: """ self.model = build_model( self.option, self.option.model_name, out_dims=self.option.num_classes, in_dims=self.option.in_dims, hid_dims=self.option.hid_dims, freeze_layers=self.option.freeze_layers) logging.getLogger().info(f"Model {self.model}") self.loss = eval(self.option.loss_type)(reduction='none') if self.option.cuda: self.model.cuda() self.loss.cuda()
def _build_model(self): super()._build_model() if self.option.bias_model_name is None: self.option.bias_model_name = self.option.model_name self.bias_model = model_factory.build_model( self.option, self.option.bias_model_name, in_dims=self.option.bias_variable_dims, hid_dims=self.option.bias_model_hid_dims, out_dims=self.option.num_classes) logging.getLogger().info("Bias Model") logging.getLogger().info(self.bias_model) self.bias_retriever = build_bias_retriever( self.option.bias_variable_name) if self.option.cuda: self.model.cuda() self.bias_model.cuda() self.loss.cuda()
def main(): torch_model = build_model(model_name=args.model_name, num_classes=args.num_classes, global_pool=args.global_pool) if args.best_checkpoint is not None: assert os.path.isfile(args.best_checkpoint), '{} not found'.format( args.best_checkpoint) checkpoint = torch.load(args.best_checkpoint, map_location=None) # Load all tensors onto the CPU # checkpoint = torch.load(args.best_checkpoint, map_location=lambda storage, loc: storage) # >>> torch.load('tensors.pt') # # Load all tensors onto the CPU # >>> torch.load('tensors.pt', map_location=torch.device('cpu')) # # Load all tensors onto the CPU, using a function # >>> torch.load('tensors.pt', map_location=lambda storage, loc: storage) # # Load all tensors onto GPU 1 # >>> torch.load('tensors.pt', map_location=lambda storage, loc: storage.cuda(1)) # # Map tensors from GPU 1 to GPU 0 # >>> torch.load('tensors.pt', map_location={'cuda:1':'cuda:0'}) print('threshold: {} '.format(checkpoint['threshold'])) print('Restoring model with {} architecture...'.format( checkpoint['arch'])) if checkpoint['num_gpu'] > 1: torch_model = torch.nn.DataParallel(torch_model).cuda() else: torch_model.cuda() # load model weights torch_model.load_state_dict(checkpoint['state_dict']) # convert to onnx convert_onnx(torch_model, batch_size=batch_size, onnx_model_path=onnx_model_path) # verify the model’s structure and confirm that the model has a valid schema check_onnx(onnx_model_path) compare_torch_and_onnx_model(torch_model, onnx_model_path)
v = "=".join(s_s[1:]).strip() override_dir[k] = v arguments.override = override_dir cfg = arguments.config_path cf = Config(cfg, cmd_args=arguments.override) # if necessary, only use the CPU for debugging if cf.cpu: os.environ["CUDA_VISIBLE_DEVICES"] = "" else: os.environ["CUDA_VISIBLE_DEVICES"] = cf.cuda # ## Construct the network print('Construct the Network\n') model = build_model(cf) print('loading data\n') train_data, val_data, test_data = load_dataset(cf.dataset, cf) print('setting up the network and creating callbacks\n') checkpoint = ModelCheckpoint(cf.out_wght_path, monitor='val_acc', verbose=1, save_best_only=True, mode='max', period=1) tensorboard = TensorBoard(log_dir=str(cf.tensorboard_name), histogram_freq=0, write_graph=True, write_images=False)
def build_model(self, args): self.net_output, self.predictions, self.model_variables, self.is_training = model_factory.build_model(self.inputs, self.nclasses, args)
def inference(data_path, threshold=0.3): name_index, index_name = read_class_names(args.classes) output_col = ['image_name'] + list(name_index.keys()) submission_col = ['image_name', 'tags'] inference_dir = None #----------------------------dataset generator-------------------------------- test_transform = get_transform(size=args.image_size, mode='test') test_dataset = PlanetDataset(image_root=data_path, phase='test', img_type=args.image_type, img_size=args.image_size, transform=test_transform) test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=args.batch_size, shuffle=False, num_workers=args.num_works) # ---------------------------------load model and param-------------------------------- model = build_model(model_name=args.model_name, num_classes=args.num_classes, global_pool=args.global_pool) if args.best_checkpoint is not None: assert os.path.isfile(args.best_checkpoint), '{} not found'.format( args.best_checkpoint) checkpoint = torch.load(args.best_checkpoint) print('Restoring model with {} architecture...'.format( checkpoint['arch'])) # load model weights if use_cuda: if checkpoint['num_gpu'] > 1: model = torch.nn.DataParallel(model, device_ids=gpu_ids).cuda() else: model.cuda() else: if checkpoint['num_gpu'] > 1: model = torch.nn.DataParallel(model) else: model.cuda() model.load_state_dict(checkpoint['state_dict']) # update threshold if 'threshold' in checkpoint: threshold = checkpoint['threshold'] threshold = torch.tensor(threshold, dtype=torch.float32) print('Using thresholds:', threshold) else: threshold = 0.3 if use_cuda: threshold = threshold.cuda() # generate save path inference_dir = os.path.join( os.path.normcase(args.inference_path), '{}-f{}-{:.6f}'.format(checkpoint['arch'], checkpoint['fold'], checkpoint['f2'])) os.makedirs(inference_dir, exist_ok=True) print('Model restored from file: {}'.format(args.best_checkpoint)) else: assert False and "No checkpoint specified" # -------------------------------------inference--------------------------------------- model.eval() batch_time_meter = AverageMeter() results_raw = [] results_label = [] results_submission = [] since_time = time.time() pbar = tqdm(enumerate(test_loader)) try: with torch.no_grad(): start = time.time() for batch_idx, (inputs, _, indices) in pbar: if use_cuda: inputs = inputs.cuda() # input_var = autograd.Variable(input, volatile=True) # input_var = torch.autograd.Variable(inputs) input_var = inputs outputs = model(input_var) if args.multi_label: if args.loss == 'nll': outputs = F.softmax(outputs) else: outputs = torch.sigmoid(outputs) expand_threshold = torch.unsqueeze(threshold, 0).expand_as(outputs) output_labels = (outputs.data > expand_threshold).byte() # move data to CPU and collect outputs = outputs.cpu().data.numpy() output_labels = output_labels.cpu().numpy() indices = indices.cpu().numpy().flatten() for index, output, output_label in zip(indices, outputs, output_labels): image_name = os.path.splitext( os.path.basename(test_dataset.images[index]))[0] results_raw.append([image_name] + list(output)) results_label.append([image_name] + list(output_label)) results_submission.append( [image_name] + [index_to_tag(output_label, index_name)]) batch_time_meter.update(time.time() - start) if batch_idx % args.summary_iter == 0: print( 'Inference: [{}/{} ({:.0f}%)] ' 'Time: {batch_time.val:.3f}s, {rate:.3f}/s ' '({batch_time.avg:.3f}s, {rate_avg:.3f}/s) '.format( batch_idx * len(inputs), len(test_loader.sampler), 100. * batch_idx / len(test_loader), batch_time=batch_time_meter, rate=input_var.size(0) / batch_time_meter.val, rate_avg=input_var.size(0) / batch_time_meter.avg)) start = time.time() except KeyboardInterrupt: pass results_raw_df = pd.DataFrame(results_raw, columns=output_col) results_raw_df.to_csv(os.path.join(inference_dir, 'results_raw.csv'), index=False) results_label_df = pd.DataFrame(results_label, columns=output_col) results_label_df.to_csv(os.path.join(inference_dir, 'results_thr.csv'), index=False) results_submission_df = pd.DataFrame(results_submission, columns=submission_col) results_submission_df.to_csv(os.path.join(inference_dir, 'submission.csv'), index=False) time_elapsed = time.time() - since_time print('*** Training complete in {:.0f}m {:.0f}s'.format( time_elapsed // 60, time_elapsed % 60))
def main(): # --------------------------------config------------------------------- global use_cuda global gpu_ids threshold = args.threshold best_loss = None best_f2 = None start_epoch = args.start_epoch # start from epoch 0 or last checkpoint epoch # ------------------------------ load dataset--------------------------- print('==> Loader dataset {}'.format(args.train_data)) train_transform = get_transform(size=args.image_size, mode='train') train_dataset = PlanetDataset(image_root=args.train_data, target_path=args.labels, phase='train', fold=args.fold, img_type=args.image_type, img_size=args.image_size, transform=train_transform) train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=args.batch_size, shuffle=True, num_workers=args.num_works) eval_transform = get_transform(size=args.image_size, mode='eval') eval_dataset = PlanetDataset(image_root=args.train_data, target_path=args.labels, phase='eval', fold=args.fold, img_type=args.image_type, img_size=args.image_size, transform=eval_transform) eval_loader = torch.utils.data.DataLoader(eval_dataset, batch_size=args.batch_size, shuffle=False, num_workers=args.num_works) # ---------------------------------model--------------------------------- model = build_model(model_name=args.model_name, num_classes=args.num_classes, pretrained=args.pretrained, global_pool=args.global_pool) if use_cuda: if len(gpu_ids) > 1: model = torch.nn.DataParallel( model, device_ids=gpu_ids).cuda() # load model to cuda else: model.cuda() # show model size print('\t Total params volumes: {:.2f} M'.format( sum(param.numel() for param in model.parameters()) / 1000000.0)) # --------------------------------criterion----------------------- criterion = None if args.reweight: class_weights = torch.from_numpy( train_dataset.get_class_weights()).float() class_weights_norm = class_weights / class_weights.sum() if use_cuda: class_weights = class_weights.cuda() class_weights_norm = class_weights_norm.cuda() else: class_weights = None class_weights_norm = None if args.loss.lower() == 'nll': # assert not args.multi_label and 'Cannot use crossentropy with multi-label target.' criterion = torch.nn.CrossEntropyLoss(weight=class_weights) elif args.loss.lower() == 'mlsm': assert args.multi_label criterion = torch.nn.MultiLabelSoftMarginLoss(weight=class_weights) else: assert False and "Invalid loss function" #---------------------------------optimizer---------------------------- optimizer = get_optimizer(model, args) # apex optimizer # Initialization # opt_level = 'O1' # model, optimizer = amp.initialize(model, optimizer, opt_level=opt_level) # lr scheduler if not args.decay_epoch: lr_scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau( optimizer, mode='min', factor=0.1, patience=8, verbose=False) else: lr_scheduler = torch.optim.lr_scheduler.StepLR( optimizer, step_size=args.decay_epoch, gamma=0.1) # # Resume model if args.resume: if os.path.isfile(args.resume): print("=> loading checkpoint '{}'".format(args.resume)) checkpoint = torch.load(args.resume) args.start_epoch = checkpoint['epoch'] model.load_state_dict(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['optimizer']) print("=> loaded checkpoint '{}' (epoch {})".format( args.resume, checkpoint['epoch'])) start_epoch = checkpoint['epoch'] else: print("=> no checkpoint found at '{}'".format(args.resume)) exit(-1) # eval model # if args.evaluate: # print('\nEvaluation only') # test_loss, test_acc_1, test_acc_5 = test(val_loader, model, criterion, use_cuda) # print(' Test => loss {:.4f} | acc_top1 {:.4f} acc_top5'.format(test_loss, test_acc_1, test_acc_5)) # # return None # # best_model_weights = copy.deepcopy(model.state_dict()) since = time.time() try: for epoch in range(start_epoch, args.epochs): # print('Epoch {}/{} | LR {:.8f}'.format(epoch, args.epochs, optimizer.param_groups[0]['lr'])) train_metrics = train(loader=train_loader, model=model, epoch=epoch, criterion=criterion, optimizer=optimizer, threshold=threshold, class_weights=class_weights_norm, use_cuda=use_cuda) eval_metrics, latest_threshold = eval(loader=eval_loader, model=model, epoch=epoch, criterion=criterion, threshold=threshold, use_cuda=use_cuda) if args.decay_epoch is None: lr_scheduler.step(eval_metrics['loss']) else: lr_scheduler.step() # save train and eval metric writer.add_scalars(main_tag='epoch/loss', tag_scalar_dict={ 'train': train_metrics['loss'], 'val': eval_metrics['loss'] }, global_step=epoch) if args.multi_label: writer.add_scalars(main_tag='epoch/acc', tag_scalar_dict={ 'train': train_metrics['acc'], 'val': eval_metrics['acc'] }, global_step=epoch) else: writer.add_scalars(main_tag='epoch/acc_top1', tag_scalar_dict={ 'train': train_metrics['acc_top1'], 'val': eval_metrics['acc_top1'] }, global_step=epoch) writer.add_scalars(main_tag='epoch/acc_top5', tag_scalar_dict={ 'train': train_metrics['acc_top5'], 'val': eval_metrics['acc_top5'] }, global_step=epoch) writer.add_scalar(tag='epoch/f2_score', scalar_value=eval_metrics['f2'], global_step=epoch) # add learning_rate to logs writer.add_scalar(tag='lr', scalar_value=optimizer.param_groups[0]['lr'], global_step=epoch) # -----------------------------save model every epoch ----------------------------- # get param state dict if len(args.gpu_id) > 1: model_weights = model.module.state_dict() else: model_weights = model.state_dict() # model_weights = amp.state_dict() # -------------------------- save model state-------------------------- is_best = False if best_loss is not None or best_f2 is not None: if eval_metrics['loss'] < best_loss[0]: best_loss = (eval_metrics['loss'], epoch) if args.score_metric == 'loss': is_best = True elif eval_metrics['f2'] > best_f2[0]: best_f2 = (eval_metrics['f2'], epoch) if args.score_metric == 'f2': is_best = True else: is_best = False pass else: best_loss = (eval_metrics['loss'], epoch) best_f2 = (eval_metrics['f2'], epoch) is_best = True state = { 'epoch': epoch + 1, 'arch': args.model_name, 'state_dict': model_weights, 'optimizer': optimizer.state_dict(), 'threshold': latest_threshold, 'loss': eval_metrics['loss'], 'f2': eval_metrics['f2'], 'fold': args.fold, 'num_gpu': len(gpu_ids) } save_checkpoint(state, os.path.join( args.checkpoint, 'ckpt-{}-f{}-{:.6f}.pth.tar'.format( epoch, args.fold, eval_metrics['f2'])), is_best=is_best) except KeyboardInterrupt: pass writer.close() time_elapsed = time.time() - since print('*** Training complete in {:.0f}m {:.0f}s'.format( time_elapsed // 60, time_elapsed % 60)) print('*** Eval best loss: {0} (epoch {1})'.format(best_loss[1], best_loss[0])) print('*** Eval best f2_score: {0} (epoch {1})'.format( best_f2[1], best_f2[0]))