def ranging(model_path, x, y, win_len, col="k", model_pred=None): """ plot a scattergram of F1 score for each patient :return: list of F1 scores """ offsets = (5000 - win_len) // 2 Y = y[:, offsets:5000 - offsets, :] if model_pred == None: model = load_model(model_path) prediction = np.array(model.predict(x)) else: prediction = model_pred prediction = prediction[:, offsets:5000 - offsets, :] dict = {} for i in range(len(x)): prediction_i = prediction[i, :, :] y_i = Y[i, :, :] stat = statistics(np.expand_dims(y_i, axis=0), np.expand_dims(prediction_i, axis=0)) F = F_score(stat) dict[i] = F dict = sorted(dict.items()) x, y_i = zip(*dict) plt.scatter(x, y_i, c=col, alpha=0.3) plt.show() return y_i
def draw_one(model_path, x, y, pacient, win_len): offsets = (5000 - win_len)//2 model = load_model(model_path) X = np.expand_dims(x[pacient, :, :], axis=0) Y = np.expand_dims(y[pacient,offsets:5000 - offsets,:], axis=0) prediction = np.array(model.predict(X)) prediction = prediction[:,offsets:5000-offsets,:] x_axis = np.arange(offsets/500, (win_len +offsets)/500, 1/500) plt.figure(figsize=(20, 5)) plt.plot(x_axis, x[pacient, offsets:5000 - offsets, 0], 'k') i = 0 predict_rounded = np.argmax(prediction, axis=2)[i] one_hot = np.zeros((predict_rounded.size, predict_rounded.max()+1)) one_hot[np.arange(predict_rounded.size), predict_rounded] = 1 plt.fill_between(x_axis, Y[i, :win_len, 1]*40 + -50, -50, color='r', alpha=0.3) plt.fill_between(x_axis, Y[i, :win_len, 2]*40 + -50, -50, color='g', alpha=0.3) plt.fill_between(x_axis, Y[i, :win_len, 0]*40 + -50, -50, color='b', alpha=0.3) plt.fill_between(x_axis, list(one_hot[:win_len, 1]*40), 0, color='r', alpha=0.3) plt.fill_between(x_axis, list(one_hot[:win_len, 2]*40), 0, color='g', alpha=0.3) plt.fill_between(x_axis, list(one_hot[:win_len, 0]*40), 0, color='b', alpha=0.3) stat = statistics(Y, prediction) F = F_score(stat) print(stat) print(F) plt.show()
def trim(model, xtrain, ytrain, name, threshold, path_to_data, win_len): """ removes from xtrain, ytrain elements on which the model has F1 greater than threshold :param path_to_data: path to the folder where the trimmed dataset will be saved :return: trimmed dataset """ pred_train = np.array(model.predict(xtrain)) xtrain_new = xtrain.copy() ytrain_new = ytrain.copy() counter = 0 for i in range(len(xtrain)): pred = pred_train[i, win_len // 2:5000 - win_len // 2, :] y = ytrain[i, win_len // 2:5000 - win_len // 2, :] stat = statistics(np.expand_dims(y, axis=0), np.expand_dims(pred, axis=0)) F = F_score(stat) if F >= threshold: xtrain_new = np.delete(xtrain_new, i - counter, axis=0) ytrain_new = np.delete(ytrain_new, i - counter, axis=0) counter += 1 if not os.path.exists(path_to_data): os.makedirs(path_to_data) outfile = open(path_to_data + "\\trim_" + name + ".pkl", 'wb') pkl.dump({"x": xtrain_new, "y": ytrain_new}, outfile) outfile.close() return xtrain_new, ytrain_new
def loss_function(weights: np.ndarray) -> float: all_predicts = np.zeros_like(all_labels) for lvl1_predicts, w in zip(level1_train_predicts, weights): model_predict = np.zeros_like(all_labels) for fold, lvl1_pred in enumerate(lvl1_predicts): predict = lvl1_pred * w model_predict[fold_num == fold] = predict all_predicts += model_predict score = F_score(all_predicts, all_labels, beta=2, threshold=0) print('score', score, 'weights', weights) return -score
def histogram(model_paths_list, x, y, win_len, threshold=0.99): dict = {} for path in model_paths_list: _, filename = split(path) model_num = int(filename[len("ens_model_"):-3]) dict[model_num] = 0 model = load_model(path) predict = np.array(model.predict(x)) for i in range(len(x)): pred = predict[i, win_len // 2:5000 - win_len // 2, :] y_i = y[i, win_len // 2:5000 - win_len // 2, :] stat = statistics(np.expand_dims(y_i, axis=0), np.expand_dims(pred, axis=0)) F = F_score(stat) if F >= threshold: dict[model_num] += 1 return dict
def validate(data_loader: Any, model: Any) -> float: ''' Performs validation, returns validation score. ''' model.eval() sigmoid = nn.Sigmoid() predicts_list, targets_list = [], [] with torch.no_grad(): for input_data in tqdm(data_loader): if data_loader.dataset.mode != 'test': input_, target = input_data else: input_, target = input_data, None if data_loader.dataset.num_ttas != 1: bs, ncrops, c, h, w = input_.size() input_ = input_.view(-1, c, h, w) output = model(input_) output = sigmoid(output) if config.test.tta_combine_func == 'max': output = output.view(bs, ncrops, -1).max(1)[0] elif config.test.tta_combine_func == 'mean': output = output.view(bs, ncrops, -1).mean(1) else: assert False else: output = model(input_.cuda()) output = sigmoid(output) predicts_list.append(output.detach().cpu()) targets_list.append(target) predicts, targets = torch.cat(predicts_list), torch.cat(targets_list) best_score, best_thresh = 0.0, 0.0 for threshold in tqdm(np.linspace(0.05, 0.25, 100)): score = F_score(predicts, targets, beta=2, threshold=threshold) if score > best_score: best_score, best_thresh = score, threshold.item() print(f'F2 {best_score:.4f} threshold {best_thresh:.4f}') return best_score
def validate(val_loader: Any, model: Any, epoch: int) -> Tuple[float, float, np.ndarray]: ''' Calculates validation score. 1. Infers predictions 2. Finds optimal threshold 3. Returns the best score and a threshold. ''' logger.info('validate()') predicts, targets = inference(val_loader, model) predicts, targets = torch.tensor(predicts), torch.tensor(targets) best_score, best_thresh = 0.0, 0.0 for threshold in tqdm(np.linspace(0.05, 0.25, 100), disable=IN_KERNEL): score = F_score(predicts, targets, beta=2, threshold=threshold) if score > best_score: best_score, best_thresh = score, threshold.item() logger.info(f'{epoch} F2 {best_score:.4f} threshold {best_thresh:.4f}') logger.info(f' * F2 on validation {best_score:.4f}') return best_score, best_thresh, predicts.numpy()
def draw_all(model_path, x, y, win_len, model2=None): offsets = (5000 - win_len)//2 model = load_model(model_path) X = x Y = y[:,offsets:5000 - offsets,:] prediction = np.array(model.predict(X)) prediction = prediction[:,offsets:5000-offsets,:] if model2 != None: model2 = load_model(model2) prediction2 = np.array(model2.predict(X))[:,offsets:5000-offsets,:] x_axis = np.arange(offsets/500, (win_len +offsets)/500, 1/500) for i in range(len(X)): plt.figure(figsize=(20, 5)) plt.plot(x_axis, x[i, offsets:5000 - offsets, 0], 'k') predict_rounded = np.argmax(prediction, axis=2)[i] one_hot = np.zeros((predict_rounded.size, predict_rounded.max()+1)) one_hot[np.arange(predict_rounded.size), predict_rounded] = 1 plt.fill_between(x_axis, Y[i, :win_len, 1]*40 + -50, -50, color='r', alpha=0.3) plt.fill_between(x_axis, Y[i, :win_len, 2]*40 + -50, -50, color='g', alpha=0.3) plt.fill_between(x_axis, Y[i, :win_len, 0]*40 + -50, -50, color='b', alpha=0.3) plt.fill_between(x_axis, list(one_hot[:win_len, 1]*40), 0, color='r', alpha=0.3) plt.fill_between(x_axis, list(one_hot[:win_len, 2]*40), 0, color='g', alpha=0.3) plt.fill_between(x_axis, list(one_hot[:win_len, 0]*40), 0, color='b', alpha=0.3) if model2 != None: predict_rounded = np.argmax(prediction2, axis=2)[i] one_hot = np.zeros((predict_rounded.size, predict_rounded.max()+1)) one_hot[np.arange(predict_rounded.size), predict_rounded] = 1 plt.fill_between(x_axis, list(one_hot[:win_len, 1]*40+50), 50, color='r', alpha=0.3) plt.fill_between(x_axis, list(one_hot[:win_len, 2]*40+50), 50, color='g', alpha=0.3) plt.fill_between(x_axis, list(one_hot[:win_len, 0]*40+50), 50, color='b', alpha=0.3) stat = statistics(Y, prediction) F = F_score(stat) print(stat) print(F) plt.savefig("ill"+str(i)+".png") plt.clf()
def trim(model, xtrain, ytrain, data_name, threshold, path_to_data, win_len): pred_train = np.array(model.predict(xtrain)) xtrain_new = xtrain.copy() ytrain_new = ytrain.copy() counter = 0 for i in range(len(xtrain)): pred = pred_train[i, win_len // 2:5000 - win_len // 2, :] y = ytrain[i, win_len // 2:5000 - win_len // 2, :] stat = statistics(np.expand_dims(y, axis=0), np.expand_dims(pred, axis=0)) F = F_score(stat) if F >= threshold: xtrain_new = np.delete(xtrain_new, i - counter, axis=0) ytrain_new = np.delete(ytrain_new, i - counter, axis=0) counter += 1 outfile = open(path_to_data + "\\trim_" + data_name + ".pkl", 'wb') pkl.dump({"x": xtrain_new, "y": ytrain_new}, outfile) outfile.close() return xtrain_new, ytrain_new
def ranging(model_path, x, y, win_len, col= "k", is_path = True): offsets = (5000 - win_len)//2 Y = y[:,offsets:5000 - offsets,:] if is_path: model = load_model(model_path) prediction = np.array(model.predict(x)) else: prediction = model_path prediction = prediction[:,offsets:5000-offsets,:] dict = {} for i in range(len(x)): prediction_i = prediction[i,:,:] y_i = Y[i,:,:] stat = statistics(np.expand_dims(y_i, axis=0), np.expand_dims(prediction_i, axis=0)) F = F_score(stat) dict[i] = F dict = sorted(dict.items()) x, y_i = zip(*dict) plt.scatter(x, y_i, c=col, alpha=0.3) return y_i
def histogram(model_paths_list, x, y, win_len, threshold=0.99): """ returns a dictionary: {model number: number of patients from x with F1 score > threshold} :param model_paths_list: list of paths to the saved models :param x: dataset :param y: GT annotation """ dict = {} for path in model_paths_list: _, filename = split(path) model_num = int(filename[len("ens_model_"):-3]) dict[model_num] = 0 model = load_model(path) predict = np.array(model.predict(x)) for i in range(len(x)): pred = predict[i, win_len // 2:5000 - win_len // 2, :] y_i = y[i, win_len // 2:5000 - win_len // 2, :] stat = statistics(np.expand_dims(y_i, axis=0), np.expand_dims(pred, axis=0)) F = F_score(stat) if F >= threshold: dict[model_num] += 1 return dict
def train_epoch(train_loader: Any, model: Any, criterion: Any, optimizer: Any, epoch: int, lr_scheduler: Any, lr_scheduler2: Any, max_steps: Optional[int]) -> None: logger.info(f'epoch: {epoch}') logger.info(f'learning rate: {get_lr(optimizer)}') batch_time = AverageMeter() losses = AverageMeter() avg_score = AverageMeter() model.train() optimizer.zero_grad() num_steps = len(train_loader) if max_steps: num_steps = min(max_steps, num_steps) num_steps -= num_steps % config.train.accum_batches_num logger.info(f'total batches: {num_steps}') end = time.time() lr_str = '' for i, (input_, target) in enumerate(train_loader): if i >= num_steps: break input_ = input_.cuda() if config.train.mixup.enable: input_, target = mixup(input_, target) output = model(input_) loss = criterion(output, target.cuda()) predict = (output.detach() > 0.1).type(torch.FloatTensor) avg_score.update(F_score(predict, target, beta=2)) losses.update(loss.data.item(), input_.size(0)) loss.backward() if (i + 1) % config.train.accum_batches_num == 0: optimizer.step() optimizer.zero_grad() if is_scheduler_continuous(lr_scheduler): lr_scheduler.step() lr_str = f'\tlr {get_lr(optimizer):.02e}' elif is_scheduler_continuous(lr_scheduler2): lr_scheduler2.step() lr_str = f'\tlr {get_lr(optimizer):.08f}' batch_time.update(time.time() - end) end = time.time() if i % config.train.log_freq == 0: logger.info(f'{epoch} [{i}/{num_steps}]\t' f'time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' f'loss {losses.val:.4f} ({losses.avg:.4f})\t' f'F2 {avg_score.val:.4f} ({avg_score.avg:.4f})' + lr_str) logger.info(f' * average F2 on train {avg_score.avg:.4f}')
def lr_finder(train_loader: Any, model: Any, criterion: Any, optimizer: Any) -> None: ''' Finds the optimal LR range and sets up first optimizer parameters. ''' logger.info('lr_finder called') batch_time = AverageMeter() num_steps = min(len(train_loader), config.train.lr_finder.num_steps) logger.info(f'total batches: {num_steps}') end = time.time() lr_str = '' model.train() init_value = config.train.lr_finder.init_value final_value = config.train.lr_finder.final_value beta = config.train.lr_finder.beta mult = (final_value / init_value) ** (1 / (num_steps - 1)) lr = init_value avg_loss = best_loss = 0.0 losses = np.zeros(num_steps) logs = np.zeros(num_steps) for i, (input_, target) in enumerate(train_loader): if i >= num_steps: break set_lr(optimizer, lr) output = model(input_.cuda()) loss = criterion(output, target.cuda()) loss_val = loss.data.item() predict = (output.detach() > 0.1).type(torch.FloatTensor) f2 = F_score(predict, target, beta=2) optimizer.zero_grad() loss.backward() optimizer.step() lr_str = f'\tlr {lr:.08f}' # compute the smoothed loss avg_loss = beta * avg_loss + (1 - beta) * loss_val smoothed_loss = avg_loss / (1 - beta ** (i + 1)) # stop if the loss is exploding if i > 0 and smoothed_loss > 4 * best_loss: break # record the best loss if smoothed_loss < best_loss or i == 0: best_loss = smoothed_loss # store the values losses[i] = smoothed_loss logs[i] = math.log10(lr) # update the lr for the next step lr *= mult batch_time.update(time.time() - end) end = time.time() if i % config.train.log_freq == 0: logger.info(f'lr_finder [{i}/{num_steps}]\t' f'time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' f'loss {loss:.4f} ({smoothed_loss:.4f})\t' f'F2 {f2:.4f} {lr_str}') np.savez(os.path.join(config.experiment_dir, f'lr_finder_{config.version}'), logs=logs, losses=losses) d1 = np.zeros_like(losses); d1[1:] = losses[1:] - losses[:-1] first, last = np.argmin(d1), np.argmin(losses) MAGIC_COEFF = 4 highest_lr = 10 ** logs[last] best_high_lr = highest_lr / MAGIC_COEFF best_low_lr = 10 ** logs[first] logger.info(f'best_low_lr={best_low_lr} best_high_lr={best_high_lr} ' f'highest_lr={highest_lr}') def find_nearest(array: np.array, value: float) -> int: return (np.abs(array - value)).argmin() last = find_nearest(logs, math.log10(best_high_lr)) logger.info(f'first={first} last={last}') import matplotlib.pyplot as plt plt.plot(logs, losses, '-D', markevery=[first, last]) plt.savefig(os.path.join(config.experiment_dir, 'lr_finder_plot.png'))
join(path_to_ensemble_models, f) for f in listdir(path_to_ensemble_models) if isfile(join(path_to_ensemble_models, f)) ] xy = load_dataset() X = xy["x"] Y = xy["y"] offsets = (5000 - win_len) // 2 xtrain, xtest, ytrain, ytest = train_test_split(X, Y, test_size=0.33, random_state=42) model = load_model(path_to_ensemble_models + "\\ens_model_1.h5") pred_e = ensemble_predict(model_paths_list, xtest) pred_ = model.predict(xtest) stat = statistics(ytest[:, win_len // 2:5000 - win_len // 2, :], pred_e[:, win_len // 2:5000 - win_len // 2, :]) print(F_score(stat)) #stat.to_csv("stats_one_test.csv", sep = ';') ranging(pred_e, xtest, ytest, win_len, col="k", is_path=False) plt.show() dict = histogram(model_paths_list, xtrain, ytrain, win_len, threshold=0.99) plt.bar(list(dict.keys()), dict.values(), color='g', alpha=0.5) plt.show() plot_two_prediction(pred_e, pred_, xtest, ytest, win_len, [5])
def draw_one(model_path, x, y, patients, win_len): """ print F1_score, plot ECG annotation of the network and ground true :param model_path: path to the trained model :param x: array of ECG :param y: array of annotation :param pacients: list of patients numbers to be plotted """ for pacient in patients: offsets = (5000 - win_len) // 2 model = load_model(model_path) X = np.expand_dims(x[pacient, :, :], axis=0) Y = np.expand_dims(y[pacient, offsets:5000 - offsets, :], axis=0) prediction = np.array(model.predict(X)) prediction = prediction[:, offsets:5000 - offsets, :] x_axis = np.arange(offsets / 500, (win_len + offsets) / 500, 1 / 500) plt.figure(figsize=(20, 5)) plt.plot(x_axis, x[pacient, offsets:5000 - offsets, 0], 'k') predict_rounded = np.argmax(prediction, axis=2)[pacient] one_hot = np.zeros((predict_rounded.size, predict_rounded.max() + 1)) one_hot[np.arange(predict_rounded.size), predict_rounded] = 1 plt.fill_between(x_axis, Y[0, :win_len, 1] * 40 + -50, -50, color='r', alpha=0.3) plt.fill_between(x_axis, Y[0, :win_len, 2] * 40 + -50, -50, color='g', alpha=0.3) plt.fill_between(x_axis, Y[0, :win_len, 0] * 40 + -50, -50, color='b', alpha=0.3) plt.fill_between(x_axis, list(one_hot[:win_len, 1] * 40), 0, color='r', alpha=0.3) plt.fill_between(x_axis, list(one_hot[:win_len, 2] * 40), 0, color='g', alpha=0.3) plt.fill_between(x_axis, list(one_hot[:win_len, 0] * 40), 0, color='b', alpha=0.3) stat = statistics(Y, prediction) F = F_score(stat) print(stat) print(F) plt.show()