def test(task=0): # Load a trained model that you have fine-tuned model_state_dict = torch.load(rx_output_model_file) model.load_state_dict(model_state_dict) model.to(device) model.eval() y_preds = [] y_trues = [] for test_input in tqdm(test_dataloader, desc="Testing"): test_input = tuple(t.to(device) for t in test_input) input_ids, dx_labels, rx_labels = test_input input_ids, dx_labels, rx_labels = input_ids.squeeze( ), dx_labels.squeeze(), rx_labels.squeeze(dim=0) with torch.no_grad(): loss, rx_logits = model(input_ids, dx_labels=dx_labels, rx_labels=rx_labels) y_preds.append(t2n(torch.sigmoid(rx_logits))) y_trues.append(t2n(rx_labels)) print('') acc_container = metric_report(np.concatenate(y_preds, axis=0), np.concatenate(y_trues, axis=0), args.therhold) # save report if args.do_train: for k, v in acc_container.items(): writer.add_scalar('test/{}'.format(k), v, 0) return acc_container
def compute_split_loss(S_log, S, points, blob_dict): blobs = blob_dict["blobs"] S_numpy = ut.t2n(S[0]) points_numpy = ut.t2n(points).squeeze() loss = 0. for b in blob_dict["blobList"]: if b["n_points"] < 2: continue l = b["class"] + 1 probs = S_numpy[b["class"] + 1] points_class = (points_numpy == l).astype("int") blob_ind = blobs[b["class"]] == b["label"] T = watersplit(probs, points_class * blob_ind) * blob_ind T = 1 - T scale = b["n_points"] + 1 loss += float(scale) * F.nll_loss(S_log, torch.LongTensor(T).cuda()[None], ignore_index=1, reduction='elementwise_mean') return loss
def compute_split_loss(S_log, S, points, blob_dict): blobs = blob_dict["blobs"] S_numpy = ut.t2n(S[0]) #convert to numpy array points_numpy = ut.t2n( points).squeeze() #convert to numpy array and squeeze it loss = 0. for b in blob_dict["blobList"]: if b["n_points"] < 2: #ignore if single point in a blob continue l = b["class"] + 1 #get the class (+1 cause screw background) probs = S_numpy[b["class"] + 1] #get corresponding predictions points_class = (points_numpy == l).astype( "int") #get the particular class' annotation blob_ind = blobs[b["class"]] == b[ "label"] #get the particlar blob of a particlar class T = watersplit( probs, points_class * blob_ind) * blob_ind #watershed segmentation on the probabilities T = 1 - T #convert the borders to 0 (background) scale = b["n_points"] + 1 #loss scale loss += float(scale) * F.nll_loss(S_log, torch.LongTensor(T).cuda()[None], ignore_index=1, reduction='mean') return loss
def lc_loss(model, batch): model.train() N = batch["images"].size(0) assert N == 1 blob_dict = get_blob_dict(model, batch) # put variables in cuda images = batch["images"].cuda() points = batch["points"].cuda() counts = batch["counts"].cuda() name = batch["name"][0] # save the predicted blobs blobs = blob_dict["blobs"] O = model(images) S = F.softmax(O, 1) #softmax in class channel S_log = F.log_softmax(O, 1) # Point LOSS loss = F.nll_loss(S_log, points, ignore_index=0, reduction='sum') # Rectification LOSS if blob_dict["n_fp"] > 0: loss += compute_fp_loss(S_log, blob_dict) #Separate LOSS if blob_dict["n_multi"] > 0: loss += compute_split_loss(S_log, S, points, blob_dict) S_npy = ut.t2n(S.squeeze()) points_npy = ut.t2n(points).squeeze() for l in range(1, S.shape[1]): points_class = (points_npy==l).astype(int) if points_class.sum() == 0: continue T = watersplit(S_npy[l], points_class) T = 1 - T scale = float(counts.sum()) loss += float(scale) * F.nll_loss(S_log, torch.LongTensor(T).cuda()[None], ignore_index=1, reduction='elementwise_mean') # Add to trained images model.trained_images.add(batch["image_path"][0]) return loss / N
def lc_loss(model, batch): model.train() N = batch["images"].size(0) assert N == 1 blob_dict = get_blob_dict(model, batch) # put variables in cuda images = batch["images"].cuda() points = batch["points"].cuda() counts = batch["counts"].cuda() #print(images.shape) O = model(images) S = F.softmax(O, 1) S_log = F.log_softmax(O, 1) # IMAGE LOSS loss = compute_image_loss(S, counts) # POINT LOSS loss += F.nll_loss(S_log, points, ignore_index=0, reduction='sum') # FP loss if blob_dict["n_fp"] > 0: loss += compute_fp_loss(S_log, blob_dict) # split_mode loss if blob_dict["n_multi"] > 0: loss += compute_split_loss(S_log, S, points, blob_dict) # Global loss S_npy = ut.t2n(S.squeeze()) points_npy = ut.t2n(points).squeeze() for l in range(1, S.shape[1]): points_class = (points_npy == l).astype(int) if points_class.sum() == 0: continue T = watersplit(S_npy[l], points_class) T = 1 - T scale = float(counts.sum()) loss += float(scale) * F.nll_loss(S_log, torch.LongTensor(T).cuda()[None], ignore_index=1, reduction='elementwise_mean') # Add to trained images model.trained_images.add(batch["image_path"][0]) return loss / N
def lc_loss(model, batch): model.train( ) #set model to training mode (batchnorm and dropout layers work accordingly) N = batch["images"].size(0) #get the number of images in batch assert N == 1 #make sure batch size is 1 blob_dict = get_blob_dict(model, batch) #get all blob information # put variables in gpu memory images = batch["images"].cuda() points = batch["points"].cuda() counts = batch["counts"].cuda() O = model(images) #output of model (1, k, h, w) S = F.softmax(O, 1) #softmax along the k dimension S_log = F.log_softmax(O, 1) #log softmax along k dimension # IMAGE LOSS loss = compute_image_loss(S, counts) # POINT LOSS loss += F.nll_loss(S_log, points, ignore_index=0, reduction='sum') # FP loss if blob_dict["n_fp"] > 0: loss += compute_fp_loss(S_log, blob_dict) # split_mode loss if blob_dict["n_multi"] > 0: loss += compute_split_loss(S_log, S, points, blob_dict) # Global loss S_npy = ut.t2n(S.squeeze()) #convert to numpy points_npy = ut.t2n(points).squeeze() #convert to numpy and squeeze for l in range(1, S.shape[1]): #iterate over all classes points_class = (points_npy == l).astype( int) #locations of that particular class if points_class.sum() == 0: #if no points in the class skip continue T = watersplit( S_npy[l], points_class) #watershed segmentation on the probabilities T = 1 - T #convert the borders to 0 (background) scale = float(counts.sum()) loss += float(scale) * F.nll_loss(S_log, torch.LongTensor(T).cuda()[None], ignore_index=1, reduction='mean') return loss
def get_blob_dict(model, batch, training=False): blobs = model.predict(batch, method="blobs").squeeze( ) #predicted blobs of shape (k, h, w) with labeled blobs points = ut.t2n(batch["points"]).squeeze() #actual location of objects if blobs.ndim == 2: #If k == 1 squeeze would also remove that blobs = blobs[None] #so to add an extra dimension if k==1 blobList = [] n_multi = 0 n_single = 0 n_fp = 0 for l in range(blobs.shape[0]): #iterating through k classes class_blobs = blobs[ l] #class_blobs now contains labelled blobs of that specific class points_mask = points == ( l + 1 ) #points_mask now contains actual location of object of that particular class # Intersecting blob_uniques, blob_counts = np.unique( class_blobs * (points_mask), return_counts=True ) #blob_uniques now only contains labelled points that are inside a predicted blob uniques = np.delete(np.unique(class_blobs), blob_uniques) #Delete correctly predicted blobs for u in uniques: #iterate over falsely predicted blobs blobList += [{"class": l, "label": u, "n_points": 0}] n_fp += 1 #counter to increment false positives for i, u in enumerate(blob_uniques): #iterate over the labelled blobs if u == 0: #ignore if background continue blob_ind = class_blobs == u #work with one specific labelled blob locs = np.where( blob_ind * (points_mask) ) #get location of all the points in the labelled blob if blob_counts[i] == 1: #If only one point in blob n_single += 1 else: #if multiple points in blob n_multi += 1 blobList += [{"class": l, "label": u, "n_points": blob_counts[i]}] blob_dict = { "blobs": blobs, #labelled blobs from morph.label "blobList": blobList, #information on each blob "n_fp": n_fp, #number of false positives "n_single": n_single, #number of blobs with only one point "n_multi": n_multi, } #number of blobs with multiple points return blob_dict
def addBatch(self, model, batch, **options): n_classes = 21 pred_annList = model.predict(batch, method="annList") gt_annList = batch["annList"][0] # preds = ms.t2n(model.predict(batch, metric="maskClasses")) # maskClasses = ms.t2n(batch["maskClasses"]) preds = ut.t2n(au.annList2mask(pred_annList)["mask"]) maskClasses = ut.t2n(au.annList2mask(gt_annList)["mask"]) if preds is None: preds = maskClasses*0 if self.hist is None: self.hist = np.zeros((n_classes, n_classes)) self.hist += fast_hist(maskClasses.flatten(), preds.flatten(), n_classes)
def predict(self, batch, method="probs"): if method == "counts": images = batch["images"].cuda() origin_mask = F.softmax(self(images), 1) mask_numpy = ut.t2n(origin_mask[0]) pred_mask = np.argmax(mask_numpy, axis=0) #visualize origin mask counts = np.zeros(self.n_classes - 1) for category_id in np.unique(pred_mask): if category_id == 0: continue blobs_category = morph.label(pred_mask == category_id) n_blobs = (np.unique(blobs_category) != 0).sum() counts[category_id - 1] = n_blobs print('counts[None]: ', counts[None]) return counts[None] elif method == "blobs": images = batch["images"].cuda() origin_mask = F.softmax(self(images), 1) mask_numpy = ut.t2n(origin_mask[0]) #foreground pred_mask = np.argmax(mask_numpy, axis=0) h, w = pred_mask.shape blobs = np.zeros((self.n_classes - 1, h, w), int) for category_id in np.unique(pred_mask): if category_id == 0: continue blobs[category_id - 1] = morph.label(pred_mask == category_id) return blobs[None]
def get_blob_dict(model, batch, training=False): blobs = model.predict(batch, method="blobs").squeeze() points = ut.t2n(batch["points"]).squeeze() if blobs.ndim == 2: blobs = blobs[None] blobList = [] n_multi = 0 n_single = 0 n_fp = 0 total_size = 0 for l in range(blobs.shape[0]): class_blobs = blobs[l] points_mask = points == (l + 1) # Intersecting blob_uniques, blob_counts = np.unique(class_blobs * (points_mask), return_counts=True) uniques = np.delete(np.unique(class_blobs), blob_uniques) for u in uniques: blobList += [{ "class": l, "label": u, "n_points": 0, "size": 0, "pointsList": [] }] n_fp += 1 for i, u in enumerate(blob_uniques): if u == 0: continue pointsList = [] blob_ind = class_blobs == u locs = np.where(blob_ind * (points_mask)) for j in range(locs[0].shape[0]): pointsList += [{"y": locs[0][j], "x": locs[1][j]}] assert len(pointsList) == blob_counts[i] if blob_counts[i] == 1: n_single += 1 else: n_multi += 1 size = blob_ind.sum() total_size += size blobList += [{ "class": l, "size": size, "label": u, "n_points": blob_counts[i], "pointsList": pointsList }] blob_dict = { "blobs": blobs, "blobList": blobList, "n_fp": n_fp, "n_single": n_single, "n_multi": n_multi, "total_size": total_size } return blob_dict
def main(): parser = argparse.ArgumentParser() # Required parameters parser.add_argument("--model_name", default='GBert-predict', type=str, required=False, help="model name") parser.add_argument("--data_dir", default='../data', type=str, required=False, help="The input data dir.") parser.add_argument("--pretrain_dir", default='../saved/GBert-pretraining', type=str, required=False, help="pretraining model") parser.add_argument("--train_file", default='data-multi-visit.pkl', type=str, required=False, help="training data file.") parser.add_argument( "--output_dir", default='../saved/', type=str, required=False, help="The output directory where the model checkpoints will be written." ) # Other parameters parser.add_argument("--use_pretrain", default=False, action='store_true', help="is use pretrain") parser.add_argument("--graph", default=False, action='store_true', help="if use ontology embedding") parser.add_argument("--therhold", default=0.3, type=float, help="therhold.") parser.add_argument( "--max_seq_length", default=55, type=int, help= "The maximum total input sequence length after WordPiece tokenization. \n" "Sequences longer than this will be truncated, and sequences shorter \n" "than this will be padded.") parser.add_argument("--do_train", default=False, action='store_true', help="Whether to run training.") parser.add_argument("--do_eval", default=True, action='store_true', help="Whether to run on the dev set.") parser.add_argument("--do_test", default=True, action='store_true', help="Whether to run on the test set.") parser.add_argument("--train_batch_size", default=1, type=int, help="Total batch size for training.") parser.add_argument("--learning_rate", default=5e-4, type=float, help="The initial learning rate for Adam.") parser.add_argument("--num_train_epochs", default=20.0, type=float, help="Total number of training epochs to perform.") parser.add_argument("--no_cuda", action='store_true', help="Whether not to use CUDA when available") parser.add_argument('--seed', type=int, default=1203, help="random seed for initialization") parser.add_argument( "--warmup_proportion", default=0.1, type=float, help= "Proportion of training to perform linear learning rate warmup for. " "E.g., 0.1 = 10%% of training.") args = parser.parse_args() args.output_dir = os.path.join(args.output_dir, args.model_name) random.seed(args.seed) np.random.seed(args.seed) torch.manual_seed(args.seed) device = torch.device( "cuda" if torch.cuda.is_available() and not args.no_cuda else "cpu") if not args.do_train and not args.do_eval: raise ValueError( "At least one of `do_train` or `do_eval` must be True.") # if os.path.exists(args.output_dir) and os.listdir(args.output_dir) and args.do_train: # raise ValueError( # "Output directory ({}) already exists and is not empty.".format(args.output_dir)) os.makedirs(args.output_dir, exist_ok=True) print("Loading Dataset") tokenizer, (train_dataset, eval_dataset, test_dataset) = load_dataset(args) train_dataloader = DataLoader(train_dataset, sampler=RandomSampler(train_dataset), batch_size=1) eval_dataloader = DataLoader(eval_dataset, sampler=SequentialSampler(eval_dataset), batch_size=1) test_dataloader = DataLoader(test_dataset, sampler=SequentialSampler(test_dataset), batch_size=1) print('Loading Model: ' + args.model_name) # config = BertConfig(vocab_size_or_config_json_file=len(tokenizer.vocab.word2idx), side_len=train_dataset.side_len) # config.graph = args.graph # model = SeperateBertTransModel(config, tokenizer.dx_voc, tokenizer.rx_voc) if args.use_pretrain: logger.info("Use Pretraining model") model = GBERT_Predict.from_pretrained(args.pretrain_dir, tokenizer=tokenizer) else: config = BertConfig( vocab_size_or_config_json_file=len(tokenizer.vocab.word2idx)) config.graph = args.graph model = GBERT_Predict(config, tokenizer) logger.info('# of model parameters: ' + str(get_n_params(model))) model.to(device) model_to_save = model.module if hasattr( model, 'module') else model # Only save the model it-self rx_output_model_file = os.path.join(args.output_dir, "pytorch_model.bin") # Prepare optimizer # num_train_optimization_steps = int( # len(train_dataset) / args.train_batch_size) * args.num_train_epochs # param_optimizer = list(model.named_parameters()) # no_decay = ['bias', 'LayerNorm.bias', 'LayerNorm.weight'] # optimizer_grouped_parameters = [ # {'params': [p for n, p in param_optimizer if not any( # nd in n for nd in no_decay)], 'weight_decay': 0.01}, # {'params': [p for n, p in param_optimizer if any( # nd in n for nd in no_decay)], 'weight_decay': 0.0} # ] # optimizer = BertAdam(optimizer_grouped_parameters, # lr=args.learning_rate, # warmup=args.warmup_proportion, # t_total=num_train_optimization_steps) optimizer = Adam(model.parameters(), lr=args.learning_rate) global_step = 0 if args.do_train: writer = SummaryWriter(args.output_dir) logger.info("***** Running training *****") logger.info(" Num examples = %d", len(train_dataset)) logger.info(" Batch size = %d", 1) dx_acc_best, rx_acc_best = 0, 0 acc_name = 'prauc' dx_history = {'prauc': []} rx_history = {'prauc': []} for _ in trange(int(args.num_train_epochs), desc="Epoch"): print('') tr_loss = 0 nb_tr_examples, nb_tr_steps = 0, 0 prog_iter = tqdm(train_dataloader, leave=False, desc='Training') model.train() for _, batch in enumerate(prog_iter): batch = tuple(t.to(device) for t in batch) input_ids, dx_labels, rx_labels = batch input_ids, dx_labels, rx_labels = input_ids.squeeze( dim=0), dx_labels.squeeze(dim=0), rx_labels.squeeze(dim=0) loss, rx_logits = model(input_ids, dx_labels=dx_labels, rx_labels=rx_labels, epoch=global_step) loss.backward() tr_loss += loss.item() nb_tr_examples += 1 nb_tr_steps += 1 # Display loss prog_iter.set_postfix(loss='%.4f' % (tr_loss / nb_tr_steps)) optimizer.step() optimizer.zero_grad() writer.add_scalar('train/loss', tr_loss / nb_tr_steps, global_step) global_step += 1 if args.do_eval: print('') logger.info("***** Running eval *****") model.eval() dx_y_preds = [] dx_y_trues = [] rx_y_preds = [] rx_y_trues = [] for eval_input in tqdm(eval_dataloader, desc="Evaluating"): eval_input = tuple(t.to(device) for t in eval_input) input_ids, dx_labels, rx_labels = eval_input input_ids, dx_labels, rx_labels = input_ids.squeeze( ), dx_labels.squeeze(), rx_labels.squeeze(dim=0) with torch.no_grad(): loss, rx_logits = model(input_ids, dx_labels=dx_labels, rx_labels=rx_labels) rx_y_preds.append(t2n(torch.sigmoid(rx_logits))) rx_y_trues.append(t2n(rx_labels)) # dx_y_preds.append(t2n(torch.sigmoid(dx_logits))) # dx_y_trues.append( # t2n(dx_labels.view(-1, len(tokenizer.dx_voc.word2idx)))) # rx_y_preds.append(t2n(torch.sigmoid(rx_logits))[ # :, tokenizer.rx_singe2multi]) # rx_y_trues.append( # t2n(rx_labels)[:, tokenizer.rx_singe2multi]) print('') # dx_acc_container = metric_report(np.concatenate(dx_y_preds, axis=0), np.concatenate(dx_y_trues, axis=0), # args.therhold) rx_acc_container = metric_report( np.concatenate(rx_y_preds, axis=0), np.concatenate(rx_y_trues, axis=0), args.therhold) for k, v in rx_acc_container.items(): writer.add_scalar('eval/{}'.format(k), v, global_step) if rx_acc_container[acc_name] > rx_acc_best: rx_acc_best = rx_acc_container[acc_name] # save model torch.save(model_to_save.state_dict(), rx_output_model_file) with open(os.path.join(args.output_dir, 'bert_config.json'), 'w', encoding='utf-8') as fout: fout.write(model.config.to_json_string()) if args.do_test: logger.info("***** Running test *****") logger.info(" Num examples = %d", len(test_dataset)) logger.info(" Batch size = %d", 1) def test(task=0): # Load a trained model that you have fine-tuned model_state_dict = torch.load(rx_output_model_file) model.load_state_dict(model_state_dict) model.to(device) model.eval() y_preds = [] y_trues = [] for test_input in tqdm(test_dataloader, desc="Testing"): test_input = tuple(t.to(device) for t in test_input) input_ids, dx_labels, rx_labels = test_input input_ids, dx_labels, rx_labels = input_ids.squeeze( ), dx_labels.squeeze(), rx_labels.squeeze(dim=0) with torch.no_grad(): loss, rx_logits = model(input_ids, dx_labels=dx_labels, rx_labels=rx_labels) y_preds.append(t2n(torch.sigmoid(rx_logits))) y_trues.append(t2n(rx_labels)) print('') acc_container = metric_report(np.concatenate(y_preds, axis=0), np.concatenate(y_trues, axis=0), args.therhold) # save report if args.do_train: for k, v in acc_container.items(): writer.add_scalar('test/{}'.format(k), v, 0) return acc_container test(task=0)