Пример #1
0
 def __init__(self, path, s, size, shuffle=False, transform=None):
     # path : ./Predict/10
     slidenames = []
     targets = []
     grid = []  #path of patch
     temp_grid = []
     for each_slide in path:
         if 'pos' in each_slide:
             targets.append(1)
         elif 'neg' in each_slide:
             targets.append(0)
         slidenames.append(each_slide.split('/')[-1])
         for each_patch in os.listdir(each_slide):
             temp_grid.append(os.path.join(each_slide,each_patch))
         grid.append(temp_grid)
         temp_grid = []
     cp('(#g)Total length: {}(#) (#y)grid: {}(#)'.format(len(targets),len(grid)))
     print(grid[0])
     assert len(targets) == len(slidenames) , cp('(#r) targets and slidenames not match (#)')
     self.s = s
     self.transform = transform
     self.slidenames = slidenames
     self.targets = targets
     self.grid = grid
     self.level = 0
     self.size = size
     self.shuffle = shuffle
Пример #2
0
def train_single(epoch, embedder, rnn, loader, criterion, optimizer):
    rnn.train()
    running_loss = 0.
    running_fps = 0.
    running_fns = 0.

    for i,(inputs,target) in enumerate(loader):
        cp('(#y)Training - Epoch: [{}/{}](#)\t(#g)Batch: [{}/{}](#)'.format(epoch+1, args.nepochs, i+1, len(loader)))
        print(inputs[0].size(),len(inputs),len(target))
        batch_size = inputs[0].size(0)
        rnn.zero_grad()

        state = rnn.module.init_hidden(batch_size).cuda()
        for s in range(len(inputs)):
            input = inputs[s].cuda()
            _, input = embedder(input)
            output, state = rnn(input, state)

        target = target.cuda()
        loss = criterion(output, target)
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()*target.size(0)
        fps, fns = errors(output.detach(), target.cpu())
        running_fps += fps
        running_fns += fns

    running_loss = running_loss/len(loader.dataset)
    running_fps = running_fps/(np.array(loader.dataset.targets)==0).sum()
    running_fns = running_fns/(np.array(loader.dataset.targets)==1).sum()
    cp('(#y)Training - Epoch: [{}/{}](#)\t(#r)Loss: {}(#)\t(#g)FPR: {}(#)\t(#b)FNR: {}(#)'
       .format(epoch+1, args.nepochs, running_loss, running_fps, running_fns))
    return running_loss, running_fps, running_fns
Пример #3
0
    def __init__(self, data_path, transform=None):
        # Flatten grid
        grid = []  # path for per patch
        slideIDX = []
        slidenames = []
        targets = []
        slideLen = [0]
        idx = 0
        for each_file in data_path:
            slidenames.append(each_file.split('/')[-1])
            if 'pos' in each_file:
                targets.append(1)
            else:
                targets.append(0)
            slideLen.append(slideLen[-1] + len(os.listdir(each_file)))
            for each_patch in os.listdir(each_file):
                img_path = os.path.join(each_file, each_patch)
                grid.append(img_path)
                slideIDX.append(idx)
            idx += 1
            cp('(#g)index: {}(#)\t(#r)name: {}(#)\t(#y)len: {}(#)'.format(
                idx,
                each_file.split('/')[-1], len(os.listdir(each_file))))
        cp('(#g)total: {}(#)'.format(len(grid)))

        assert len(targets) == len(slidenames), print(
            "targets and names not match")
        assert len(slideIDX) == len(grid), print("idx and mask not match")

        seq = iaa.Sequential(
            [
                iaa.Fliplr(0.5),
                iaa.Flipud(0.5),
                iaa.Crop(percent=(0.1, 0.3)),
                # iaa.Sometimes(0.4, iaa.GaussianBlur(sigma=(0, 0.5))),
                # iaa.Sometimes(0.6, iaa.ContrastNormalization((0.75, 1.5))),
                # iaa.AdditiveGaussianNoise(loc=0, scale=(0.0, 0.05 * 255), per_channel=0.5),
                iaa.Rot90((1, 3))
            ],
            random_order=True)

        self.slidenames = slidenames
        self.targets = targets
        self.grid = grid
        self.slideIDX = slideIDX
        self.transform = transform
        self.mode = None
        self.slideLen = slideLen  # patches for each slide
        self.size = config.DATASET.PATCHSIZE
        self.seq = seq
        self.multi_scale = config.DATASET.MULTISCALE
        self.unit = self.size
        self.overlap = config.DATASET.OVERLAP
        self.step = self.unit
        self.blocks_per_slide = 1
        self.ms_slideLen = self.slideLen  #for multi_scale
        self.ms_slideIDX = self.slideIDX
Пример #4
0
def rename():
    for root, dirs, filenames in os.walk(cfg.data_append_path):
        for each_tiff in filenames:
            if '.tif' in each_tiff and '.enp' not in each_tiff :
                img_path = os.path.join(root, each_tiff)
                new_path = os.path.join(root, each_tiff.split('_')[0].replace(".tif", "")+(".tif"))

                if img_path == new_path:
                    continue
                os.rename(img_path, new_path)
                cp('(#r){}(#)\t(#g){}(#)'.format(img_path, new_path))
Пример #5
0
def main():

    #load model
    with procedure('load model'):
        model = models.resnet34(True)
        model.fc = nn.Linear(model.fc.in_features, cfg.n_classes)
        model = torch.nn.DataParallel(model.cuda())
        if args.resume:
            ch = torch.load(args.model)
            model.load_state_dict(ch['state_dict'])
        cudnn.benchmark = True

    with procedure('prepare dataset'):
        # normalization
        normalize = transforms.Normalize(mean=cfg.mean, std=cfg.std)
        trans = transforms.Compose([transforms.ToTensor(), normalize])
        #load data
        with open(cfg.data_split) as f:
            data = json.load(f)
        dset = MILdataset(data['train_pos'][:2], args.patch_size, trans)
        loader = torch.utils.data.DataLoader(dset,
                                             batch_size=args.batch_size,
                                             shuffle=False,
                                             num_workers=args.workers,
                                             pin_memory=True)

    dset.setmode(1)
    probs = inference(loader, model)
    maxs, index = group_topk(np.array(dset.slideIDX), probs, args.k)
    if not os.path.isdir(cfg.color_img_path):
        os.makedirs(cfg.color_img_path)
    for name, target, probs, idxs in zip(dset.slidenames, dset.targets, maxs,
                                         index):
        assert len(dset.slidenames) == len(maxs), print("length, error")
        flag = 'pos' if target == 1 else 'neg'
        orign_img_path = os.path.join(cfg.data_path, flag, name,
                                      name + '_orign.jpg')
        color_img_path = os.path.join(cfg.color_img_path, name + '.jpg')
        #print("orign_img_path: ",orign_img_path)
        patch_names = []
        orign_img = cv2.imread(orign_img_path)
        for i in range(args.k):
            idx = idxs[i]
            src = dset.grid[idx]
            dst = os.path.join(cfg.patch_predict, flag, src.split('/')[-2])
            if not os.path.isdir(dst):
                os.makedirs(dst)
            shutil.copy(src, dst)
            cp('(#r){}(#)\t(#g){}(#)'.format(src, dst))
            patch_names.append(src.split('/')[-1])

        plot_label(orign_img, patch_names, probs, color_img_path)
Пример #6
0
def inference(run, loader, model):
    model.eval()
    probs = torch.FloatTensor(len(loader.dataset))
    with torch.no_grad():
        for i, input in enumerate(loader):
            #print('Inference\tEpoch: [{}/{}]\tBatch: [{}/{}]'.format(run+1, args.nepochs, i+1, len(loader)))
            if (i + 1) % 50 == 0:
                cp('(#y)Inference\t(#)(#b)Epoch:[{}/{}]\t(#)(#g)Batch: [{}/{}](#)'
                   .format(run + 1, args.nepochs, i + 1, len(loader)))
            input = input.cuda()
            output = F.softmax(model(input), dim=1)
            probs[i * args.batch_size:i * args.batch_size +
                  input.size(0)] = output.detach()[:, 1].clone()
    return probs.cpu().numpy()
Пример #7
0
def group_topk(groups, data, k=1):
    out = []  #topk prob
    out_index = []  #topk index
    order = np.lexsort((data, groups))
    groups = groups[order]
    data = data[order]
    index = np.empty(len(groups), 'bool')
    index[-k:] = True
    index[:-k] = groups[k:] != groups[:-k]
    data = data[index]
    res_order = order[index]
    assert len(data) % k == 0, cp('(#r) topk lenth error(#): {}'.format(
        len(data)))
    assert len(data) == len(res_order), cp('(#r)prob and index not match(#)')
    for i in range(0, len(data), k):
        out.append(list(data[i:i + k]))
        out_index.append(list(res_order[i:i + k]))
    return out, out_index
Пример #8
0
def work(args):
    img_path, size, scale, output_patch_path, patch_size, nums, bin, thresh = args
    '''
    img_path: path of tif  (e.g. ./data_append/1/1.tif)
    size: size of patch (from tiff to jpeg) (e.g. 20000)
    scale: scale (riff2jpeg)  (e.g. 4)
    output_patch_path: path of patch (e.g. ./Patch/pos/1)
    patch_size: during cut_image (2048)
    '''
    output_mask_path = img_path[:-4] + '_mask.jpg'
    output_img_path = img_path[:-4] + '_orign.jpg'

    slide = opsl.OpenSlide(img_path)
    [n, m] = slide.dimensions

    with procedure('Tiff2jpeg'):
        if not os.path.isfile(output_mask_path) or not os.path.isfile(output_img_path) :
            blocks_pre_col = math.ceil(m / size)
            blocks_pre_row = math.ceil(n / size)
            row_cache = []
            img_cache = []
            for i in range(blocks_pre_col):
                for j in range(blocks_pre_row):
                    x = i * size
                    y = j * size
                    height = min(x + size, m) - x
                    width = min(y + size, n) - y
                    img = np.array(slide.read_region((y, x), 0, (width, height)))
                    img = cv2.resize(img, (width // scale, height // scale))
                    row_cache.append(img)
                img_cache.append(np.concatenate(row_cache, axis=1))
                row_cache = []
            img = np.concatenate(img_cache, axis=0)
            gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
            ret1, th1 = cv2.threshold(gray, 0, 255, cv2.THRESH_OTSU)
            mask = 255 - th1
            cv2.imwrite(output_mask_path, mask)
            cv2.imwrite(output_img_path,img)
            cp('(#g)save_mask_path:{}(#)'.format(output_mask_path))
            cp('(#g)save_orign_path:{}(#)'.format(output_img_path))

    with procedure('Cut image'):
        if not os.path.isdir(output_patch_path):
            mask = cv2.imread(output_mask_path,0)
            assert  len(mask.shape) == 2 ,print('size error')
            mask_patch_size = patch_size // scale
            step = mask_patch_size // 2
            try:
                os.makedirs(output_patch_path)
            except:
                pass
            data = {}
            #patch_overlap_count = []
            data['roi'] = []
            h, w =mask.shape[0], mask.shape[1]
            threshold = get_threshold(img_path.split('/')[-2], nums, bin, thresh)

            data['threshold'] = threshold
            cp('(#r)Processinf:{}\tThreshold:{}'.format(img_path.split('/')[-2],threshold))
            for i in range(0, h, step):
                for j in range(0, w, step):
                    si = min(i, h - mask_patch_size)
                    sj = min(j, w - mask_patch_size)
                    si = max(0, si)  # 有可能h比size还要小
                    sj = max(0, sj)
                    x = min(scale * si, m - patch_size)
                    y = min(scale * sj, n - patch_size)
                    sub_img = mask[si: si + mask_patch_size, sj: sj + mask_patch_size]
                    cur_scale = (np.sum(sub_img) // 255) / (sub_img.shape[0] * sub_img.shape[1])
                    #patch_overlap_count.append([x, y, cur_threshold])
                    if cur_scale > threshold:
                        data['roi'].append([x, y,cur_scale])
                        patch = np.array(slide.read_region((y, x), 0, (patch_size, patch_size)).convert('RGB'))
                        patch_name = "{}_{}.jpg".format(x, y)
                        patch_path = os.path.join(output_patch_path, patch_name)
                        cv2.imwrite(patch_path, patch)
                        cp('(#y)save_path:\t{}(#)'.format(patch_path))
                    if sj != j:
                        break
                if si != i:
                    break
            json_path = output_mask_path[:-9] + '_mask.json'
            data['id'] = img_path.split('/')[-2]
            with open(json_path, 'w') as f:
                json.dump(data, f)
            cp('(#g)save_json:\t{}(#)'.format(json_path))
            '''
Пример #9
0
    labels = df.values
    m = {}
    for val in labels:
        m[val[0]] = val[1]
    params = []
    idx = 0
    for root, dirs, filenames in os.walk(cfg.data_neg_path):
        for each_tif in filenames:
            if '.tif' in each_tif:
                name = each_tif.split('.')[0]
                flag = 'pos' if m[int(name)] == 'Positive' else 'neg'
                path = os.path.join(root, each_tif)  # ./EDCP/data_append/1/1.tif
                out_patch_path = os.path.join(cfg.patch_data, flag, name)  # ./EDCP_PATCH/pos/1/
                idx += 1
                params.append([path, args.size, args.scale, out_patch_path, args.patch_size, args.nums, args.bin, args.threshold])

    # print(idx)
    cp('(#b)total_img:\t{}(#)'.format(idx))
    pool = threadpool.ThreadPool(args.poolsize)
    requests = threadpool.makeRequests(work, params)
    [pool.putRequest(req) for req in requests]
    pool.wait()








Пример #10
0
def main():
    global args, best_acc
    args = get_args()

    #cnn
    with procedure('init model'):
        model = models.resnet34(True)
        model.fc = nn.Linear(model.fc.in_features, 2)
        model = torch.nn.parallel.DataParallel(model.cuda())

    with procedure('loss and optimizer'):
        if cfg.weights == 0.5:
            criterion = nn.CrossEntropyLoss().cuda()
        else:
            w = torch.Tensor([1 - cfg.weights, cfg.weights])
            criterion = nn.CrossEntropyLoss(w).cuda()
        optimizer = optim.Adam(model.parameters(), lr=1e-4, weight_decay=1e-4)

    cudnn.benchmark = True

    #normalization
    normalize = transforms.Normalize(mean=cfg.mean, std=cfg.std)
    trans = transforms.Compose([transforms.ToTensor(), normalize])

    with procedure('prepare dataset'):
        #load data
        with open(cfg.data_split) as f:  #
            data = json.load(f)
        train_dset = MILdataset(data['train_neg'][:14] + data['train_pos'],
                                args.patch_size, trans)
        train_loader = torch.utils.data.DataLoader(train_dset,
                                                   batch_size=args.batch_size,
                                                   shuffle=False,
                                                   num_workers=args.workers,
                                                   pin_memory=True)
        if args.val:
            val_dset = MILdataset(data['val_pos'] + data['val_neg'],
                                  args.patch_size, trans)
            val_loader = torch.utils.data.DataLoader(
                val_dset,
                batch_size=args.batch_size,
                shuffle=False,
                num_workers=args.workers,
                pin_memory=True)
    with procedure('init tensorboardX'):
        tensorboard_path = os.path.join(args.output, 'tensorboard')
        if not os.path.isdir(tensorboard_path):
            os.makedirs(tensorboard_path)
        summary = TensorboardSummary(tensorboard_path, args.dis_slide)
        writer = summary.create_writer()

    #open output file
    fconv = open(os.path.join(args.output, 'convergence.csv'), 'w')
    fconv.write('epoch,metric,value\n')
    fconv.close()

    #loop throuh epochs
    for epoch in range(args.nepochs):
        train_dset.setmode(1)
        probs = inference(epoch, train_loader, model)
        topk = group_argtopk(np.array(train_dset.slideIDX), probs, args.k)
        images, names, labels = train_dset.getpatchinfo(topk)
        summary.plot_calsses_pred(writer, images, names, labels,
                                  np.array([probs[k] for k in topk]), args.k,
                                  epoch)
        slidenames, length = train_dset.getslideinfo()
        summary.plot_histogram(writer, slidenames, probs, length, epoch)
        #print([probs[k] for k in topk ])
        train_dset.maketraindata(topk)
        train_dset.shuffletraindata()
        train_dset.setmode(2)
        loss = train(epoch, train_loader, model, criterion, optimizer, writer)
        cp('(#r)Training(#)\t(#b)Epoch: [{}/{}](#)\t(#g)Loss:{}(#)'.format(
            epoch + 1, args.nepochs, loss))
        fconv = open(os.path.join(args.output, 'convergence.csv'), 'a')
        fconv.write('{},loss,{}\n'.format(epoch + 1, loss))
        fconv.close()

        #Validation
        if args.val and (epoch + 1) % args.test_every == 0:
            val_dset.setmode(1)
            probs = inference(epoch, val_loader, model)
            maxs = group_max(np.array(val_dset.slideIDX), probs,
                             len(val_dset.targets))
            pred = [1 if x >= 0.5 else 0 for x in maxs]
            err, fpr, fnr = calc_err(pred, val_dset.targets)
            #print('Validation\tEpoch: [{}/{}]\tError: {}\tFPR: {}\tFNR: {}'.format(epoch+1, args.nepochs, err, fpr, fnr))
            cp('(#y)Vaildation\t(#)(#b)Epoch: [{}/{}]\t(#)(#g)Error: {}\tFPR: {}\tFNR: {}(#)'
               .format(epoch + 1, args.nepochs, err, fpr, fnr))
            fconv = open(os.path.join(args.output, 'convergence.csv'), 'a')
            fconv.write('{},error,{}\n'.format(epoch + 1, err))
            fconv.write('{},fpr,{}\n'.format(epoch + 1, fpr))
            fconv.write('{},fnr,{}\n'.format(epoch + 1, fnr))
            fconv.close()
            #Save best model
            err = (fpr + fnr) / 2.
            if 1 - err >= best_acc:
                best_acc = 1 - err
                obj = {
                    'epoch': epoch + 1,
                    'state_dict': model.state_dict(),
                    'best_acc': best_acc,
                    'optimizer': optimizer.state_dict()
                }
                torch.save(obj, os.path.join(args.output,
                                             'checkpoint_best.pth'))
Пример #11
0
def main():

    args = get_args()
    global best_dsc
    #cnn
    with procedure('init model'):
        model = get_model(config)
        model = torch.nn.parallel.DataParallel(model.cuda())

    with procedure('loss and optimizer'):
        criterion = FocalLoss(config.TRAIN.LOSS.GAMMA,
                              config.DATASET.ALPHA).cuda()
        optimizer = optim.Adam(model.parameters(),
                               lr=config.TRAIN.LR,
                               weight_decay=config.TRAIN.LR)
    start_epoch = 0

    if config.TRAIN.RESUME:
        with procedure('resume model'):
            start_epoch, best_acc, model, optimizer = load_model(
                model, optimizer)

    cudnn.benchmark = True
    #normalization
    normalize = transforms.Normalize(mean=config.DATASET.MEAN,
                                     std=config.DATASET.STD)
    trans = transforms.Compose([transforms.ToTensor(), normalize])

    with procedure('prepare dataset'):
        #load data
        data_split = config.DATASET.SPLIT
        with open(data_split) as f:
            data = json.load(f)

        train_dset = MILdataset(data['train_neg'] + data['train_pos'], trans)
        train_loader = DataLoader(train_dset,
                                  batch_size=config.TRAIN.BATCHSIZE,
                                  shuffle=False,
                                  num_workers=config.WORKERS,
                                  pin_memory=True)
        if config.TRAIN.VAL:
            val_dset = MILdataset(data['val_pos'] + data['val_neg'], trans)
            val_loader = DataLoader(val_dset,
                                    batch_size=config.TEST.BATCHSIZE,
                                    shuffle=False,
                                    num_workers=config.WORKERS,
                                    pin_memory=True)

    with procedure('init tensorboardX'):
        train_log_path = os.path.join(
            config.TRAIN.OUTPUT,
            time.strftime('%Y%m%d_%H%M%S', time.localtime()))
        if not os.path.isdir(train_log_path):
            os.makedirs(train_log_path)
        tensorboard_path = os.path.join(train_log_path, 'tensorboard')
        with open(os.path.join(train_log_path, 'cfg.yaml'), 'w') as f:
            print(config, file=f)
        if not os.path.isdir(tensorboard_path):
            os.makedirs(tensorboard_path)
        summary = TensorboardSummary(tensorboard_path)
        writer = summary.create_writer()

    for epoch in range(start_epoch, config.TRAIN.EPOCHS):
        index = []
        for idx, each_scale in enumerate(config.DATASET.MULTISCALE):
            train_dset.setmode(idx)
            #print(len(train_loader), len(train_dset))
            probs = inference(epoch, train_loader, model)
            topk = group_argtopk(train_dset.ms_slideIDX[:], probs,
                                 train_dset.targets[:],
                                 train_dset.ms_slideLen[:], each_scale)
            index.extend([[each[0], each[1]]
                          for each in zip(topk, [idx] * len(topk))])
        train_dset.maketraindata(index)
        train_dset.shuffletraindata()
        train_dset.setmode(-1)
        loss = trainer(epoch, train_loader, model, criterion, optimizer,
                       writer)
        cp('(#r)Training(#)\t(#b)Epoch: [{}/{}](#)\t(#g)Loss:{}(#)'.format(
            epoch + 1, config.TRAIN.EPOCHS, loss))

        if config.TRAIN.VAL and (epoch + 1) % config.TRAIN.VALGAP == 0:
            patch_info = {}
            for idx, each_scale in enumerate(config.DATASET.MULTISCALE):
                val_dset.setmode(idx)
                probs, img_idxs, rows, cols = inference_vt(
                    epoch, val_loader, model)
                res = probs_parser(probs, img_idxs, rows, cols, val_dset,
                                   each_scale)

                for key, val in res.items():
                    if key not in patch_info:
                        patch_info[key] = val
                    else:
                        patch_info[key].extend(val)
            res = []
            dsc = []
            with multiprocessing.Pool(processes=16) as pool:
                for each_img, each_labels in patch_info.items():
                    res.append(
                        pool.apply(get_mask,
                                   (each_img, each_labels, None, False)))
            pool.join()
            for each_res in res:
                dsc.extend([each_val for each_val in each_res.values()])

            dsc = np.array(dsc).mean()
            '''
            maxs = group_max(np.array(val_dset.slideLen), probs, len(val_dset.targets), config.DATASET.MULTISCALE[-1])
            threshold = 0.5
            pred = [1 if x >= threshold else 0 for x in maxs]
            err, fpr, fnr, f1 = calc_err(pred, val_dset.targets)

            cp('(#y)Vaildation\t(#)(#b)Epoch: [{}/{}]\t(#)(#g)Error: {}\tFPR: {}\tFNR: {}\tF1: {}(#)'.format(epoch+1, config.TRAIN.EPOCHS, err, fpr, fnr, f1))
            '''
            cp('(#y)Vaildation\t(#)(#b)Epoch: [{}/{}]\t(#)(#g)DSC: {}(#)'.
               format(epoch + 1, config.TRAIN.EPOCHS, dsc))
            writer.add_scalar('Val/dsc', dsc, epoch)
            if dsc >= best_dsc:
                best_dsc = dsc
                obj = {
                    'epoch': epoch + 1,
                    'state_dict': model.state_dict(),
                    'best_dsc': best_dsc,
                    'optimizer': optimizer.state_dict()
                }
                torch.save(obj,
                           os.path.join(train_log_path, 'BestCheckpoint.pth'))