Пример #1
0
def get_train_test_loader(anno_csv, croppath, testfold, transform_train, transform_test):
    trfnamelst = []
    trlabellst = []
    trfeatlst = []
    tefnamelst = []
    telabellst = []
    tefeatlst = []

    dataframe = pd.read_csv(anno_csv,
                            names=['seriesuid', 'coordZ', 'coordY', 'coordX', 'diameter_mm', 'malignant'], header=0)
    alllst = dataframe['seriesuid'].tolist()
    labellst = dataframe['malignant'].tolist()
    crdxlst = dataframe['coordX'].tolist()
    crdylst = dataframe['coordY'].tolist()
    crdzlst = dataframe['coordZ'].tolist()
    dimlst = dataframe['diameter_mm'].tolist()

    # Make a test dataset
    print('Using subset{} as test split.'.format(testfold))
    teidlst = []
    for fname in os.listdir('/data/LUNA16/subset'+str(testfold)+'/'):
        if fname.endswith('.mhd'):
            teidlst.append(fname[:-4])

    for srsid, label, x, y, z, d in zip(alllst, labellst, crdxlst, crdylst, crdzlst, dimlst):
        if srsid.split('-')[0] in blklst:
            continue

        # crop raw pixel as feature
        data = np.load(os.path.join(croppath, srsid + '.npy'))
        bgx = data.shape[2]/2-CROPSIZE/2
        bgy = data.shape[1]/2-CROPSIZE/2
        bgz = data.shape[0]/2-CROPSIZE/2
        assert bgx==bgy==bgz==0
        data = np.array(data[bgz:bgz+CROPSIZE, bgy:bgy+CROPSIZE, bgx:bgx+CROPSIZE])
        feat = np.hstack((np.reshape(data, (-1,)) / 255, float(d)))
        # print(feat.shape)
        if srsid.split('-')[0] in teidlst:
            tefnamelst.append(srsid + '.npy')
            telabellst.append(int(label))
            tefeatlst.append(feat)
        else:
            trfnamelst.append(srsid + '.npy')
            trlabellst.append(int(label))
            trfeatlst.append(feat)


    trainset = lunanod(croppath, trfnamelst, trlabellst, trfeatlst,
                       train=True, transform=transform_train, target_transform=None, download=True)
    trainloader = torch.utils.data.DataLoader(trainset, batch_size=24, shuffle=True, num_workers=8)

    testset = lunanod(croppath, tefnamelst, telabellst, tefeatlst, train=False, transform=transform_test,
                      target_transform=None, download=True)
    testloader = torch.utils.data.DataLoader(testset, batch_size=24, shuffle=False, num_workers=8)

    return trainloader, testloader
Пример #2
0
def load_data(trained_data_path, test_data_path, fold, batch_size,
              num_workers):
    crop_size = 32
    black_list = []

    preprocess_path = trained_data_path
    pix_value, npix = 0, 0
    for file_name in os.listdir(preprocess_path):
        if file_name.endswith('.npy'):
            if file_name[:-4] in black_list:
                continue
            data = np.load(os.path.join(preprocess_path, file_name))
            pix_value += np.sum(data)
            npix += np.prod(data.shape)
    pix_mean = pix_value / float(npix)
    pix_value = 0
    for file_name in os.listdir(preprocess_path):
        if file_name.endswith('.npy'):
            if file_name[:-4] in black_list: continue
            data = np.load(os.path.join(preprocess_path, file_name)) - pix_mean
            pix_value += np.sum(data * data)
    pix_std = np.sqrt(pix_value / float(npix))
    print(pix_mean, pix_std)
    transform_train = transforms.Compose([
        # transforms.RandomScale(range(28, 38)),
        transforms.RandomCrop(32, padding=4),
        transforms.RandomHorizontalFlip(),
        transforms.RandomYFlip(),
        transforms.RandomZFlip(),
        transforms.ZeroOut(4),
        transforms.ToTensor(),
        transforms.Normalize(
            (pix_mean),
            (pix_std)),  # need to cal mean and std, revise norm func
    ])

    transform_test = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((pix_mean), (pix_std)),
    ])

    # load data list
    train_file_name_list = []
    train_label_list = []
    train_feat_list = []
    test_file_name_list = []
    test_label_list = []
    test_feat_list = []

    data_frame = pd.read_csv('./data/annotationdetclsconvfnl_v3.csv',
                             names=[
                                 'seriesuid', 'coordX', 'coordY', 'coordZ',
                                 'diameter_mm', 'malignant'
                             ])

    all_list = data_frame['seriesuid'].tolist()[1:]
    label_list = data_frame['malignant'].tolist()[1:]
    crdx_list = data_frame['coordX'].tolist()[1:]
    crdy_list = data_frame['coordY'].tolist()[1:]
    crdz_list = data_frame['coordZ'].tolist()[1:]
    dim_list = data_frame['diameter_mm'].tolist()[1:]
    # test id
    test_id_list = []
    for file_name in os.listdir(test_data_path + str(fold) + '/'):

        if file_name.endswith('.mhd'):
            test_id_list.append(file_name[:-4])
    mxx = mxy = mxz = mxd = 0
    for srsid, label, x, y, z, d in zip(all_list, label_list, crdx_list,
                                        crdy_list, crdz_list, dim_list):
        mxx = max(abs(float(x)), mxx)
        mxy = max(abs(float(y)), mxy)
        mxz = max(abs(float(z)), mxz)
        mxd = max(abs(float(d)), mxd)
        if srsid in black_list:
            continue
        # crop raw pixel as feature
        data = np.load(os.path.join(preprocess_path, srsid + '.npy'))
        bgx = int(data.shape[0] / 2 - crop_size / 2)
        bgy = int(data.shape[1] / 2 - crop_size / 2)
        bgz = int(data.shape[2] / 2 - crop_size / 2)
        data = np.array(data[bgx:bgx + crop_size, bgy:bgy + crop_size,
                             bgz:bgz + crop_size])
        y, x, z = np.ogrid[-crop_size / 2:crop_size / 2,
                           -crop_size / 2:crop_size / 2,
                           -crop_size / 2:crop_size / 2]
        mask = abs(y**3 + x**3 + z**3) <= abs(float(d))**3
        feat = np.zeros((crop_size, crop_size, crop_size), dtype=float)
        feat[mask] = 1
        if srsid.split('-')[0] in test_id_list:
            test_file_name_list.append(srsid + '.npy')
            test_label_list.append(int(label))
            test_feat_list.append(feat)
        else:
            train_file_name_list.append(srsid + '.npy')
            train_label_list.append(int(label))
            train_feat_list.append(feat)
    for idx in range(len(train_feat_list)):
        train_feat_list[idx][-1] /= mxd
    for idx in range(len(test_feat_list)):
        test_feat_list[idx][-1] /= mxd
    train_set = lunanod(preprocess_path,
                        train_file_name_list,
                        train_label_list,
                        train_feat_list,
                        train=True,
                        download=True,
                        transform=transform_train)
    train_loader = torch.utils.data.DataLoader(train_set,
                                               batch_size=batch_size,
                                               shuffle=True,
                                               num_workers=num_workers)

    test_set = lunanod(preprocess_path,
                       test_file_name_list,
                       test_label_list,
                       test_feat_list,
                       train=False,
                       download=True,
                       transform=transform_test)
    test_loader = torch.utils.data.DataLoader(test_set,
                                              batch_size=batch_size,
                                              shuffle=False,
                                              num_workers=num_workers)
    return train_loader, test_loader
Пример #3
0
        tefeatlst.append(feat)
    else:
        trfnamelst.append(srsid + '.npy')
        trlabellst.append(int(label))
        trfeatlst.append(feat)
for idx in range(len(trfeatlst)):
    # trfeatlst[idx][0] /= mxx
    # trfeatlst[idx][1] /= mxy
    # trfeatlst[idx][2] /= mxz
    trfeatlst[idx][-1] /= mxd
for idx in range(len(tefeatlst)):
    # tefeatlst[idx][0] /= mxx
    # tefeatlst[idx][1] /= mxy
    # tefeatlst[idx][2] /= mxz
    tefeatlst[idx][-1] /= mxd
trainset = lunanod(preprocesspath, trfnamelst, trlabellst, trfeatlst, train=True, download=True,
                   transform=transform_train)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=args.batch_size, shuffle=True, num_workers=20)

testset = lunanod(preprocesspath, tefnamelst, telabellst, tefeatlst, train=False, download=True,
                  transform=transform_test)
testloader = torch.utils.data.DataLoader(testset, batch_size=args.batch_size, shuffle=False, num_workers=20)
savemodelpath = './checkpoint-' + str(fold) + '/'
# Model
print(args.resume)
if args.resume:
    print('==> Resuming from checkpoint..')
    print(args.savemodel)
    if args.savemodel == '':
        logging.info('==> Resuming from checkpoint..')
        assert os.path.isdir(savemodelpath), 'Error: no checkpoint directory found!'
        checkpoint = torch.load(savemodelpath + 'ckpt.t7')
Пример #4
0
    def __init__(self):
        self.best_acc = 0
        self.best_acc_gbt = 0
        self.use_cuda = torch.cuda.is_available()
        pixvlu, npix = 0, 0
        for fname in os.listdir(preprocesspath):
            if fname.endswith('.npy'):
                if fname[:-4] in blklst: continue
                data = np.load(os.path.join(preprocesspath, fname))
                pixvlu += np.sum(data)
                npix += np.prod(data.shape)
        pixmean = pixvlu / float(npix)
        pixvlu = 0
        for fname in os.listdir(preprocesspath):
            if fname.endswith('.npy'):
                if fname[:-4] in blklst: continue
                data = np.load(os.path.join(preprocesspath, fname)) - pixmean
                pixvlu += np.sum(data * data)
        pixstd = np.sqrt(pixvlu / float(npix))
        print('pixmean:%.3f, pixstd:%.3f' % (pixmean, pixstd))
        logging.info('mean ' + str(pixmean) + ' std ' + str(pixstd))
        # Datatransforms
        logging.info(
            '==> Preparing data..')  # Random Crop, Zero out, x z flip, scale,
        transform_train = transforms.Compose([
            # transforms.RandomScale(range(28, 38)),
            transforms.RandomCrop(32, padding=4),
            transforms.RandomHorizontalFlip(),
            transforms.RandomYFlip(),
            transforms.RandomZFlip(),
            transforms.ZeroOut(4),
            transforms.ToTensor(),
            transforms.Normalize(
                (pixmean),
                (pixstd)),  # need to cal mean and std, revise norm func
        ])
        transform_test = transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize((pixmean), (pixstd)),
        ])
        # load data list
        self.trfnamelst = []
        trlabellst = []
        trfeatlst = []
        self.tefnamelst = []
        telabellst = []
        tefeatlst = []
        dataframe = pd.read_csv(csvfilepath,
                                names=[
                                    'seriesuid', 'coordX', 'coordY', 'coordZ',
                                    'diameter_mm', 'malignant'
                                ])
        alllst = dataframe['seriesuid'].tolist()[1:]
        labellst = dataframe['malignant'].tolist()[1:]
        crdxlst = dataframe['coordX'].tolist()[1:]
        crdylst = dataframe['coordY'].tolist()[1:]
        crdzlst = dataframe['coordZ'].tolist()[1:]
        dimlst = dataframe['diameter_mm'].tolist()[1:]
        # test id
        teidlst = []
        for fname in os.listdir(luna16path + '/subset' + str(fold) + '/'):
            if fname.endswith('.mhd'):
                teidlst.append(fname[:-4])
        mxx = mxy = mxz = mxd = 0
        for srsid, label, x, y, z, d in zip(alllst, labellst, crdxlst, crdylst,
                                            crdzlst, dimlst):
            mxx = max(abs(float(x)), mxx)
            mxy = max(abs(float(y)), mxy)
            mxz = max(abs(float(z)), mxz)
            mxd = max(abs(float(d)), mxd)
            if srsid in blklst: continue
            # crop raw pixel as feature
            if os.path.exists(os.path.join(preprocesspath, srsid + '.npy')):
                data = np.load(os.path.join(preprocesspath, srsid + '.npy'))
            bgx = data.shape[0] / 2 - CROPSIZE / 2
            bgy = data.shape[1] / 2 - CROPSIZE / 2
            bgz = data.shape[2] / 2 - CROPSIZE / 2
            data = np.array(data[bgx:bgx + CROPSIZE, bgy:bgy + CROPSIZE,
                                 bgz:bgz + CROPSIZE])
            feat = np.hstack((np.reshape(data, (-1, )) / 255, float(d)))
            if srsid.split('-')[0] in teidlst:
                self.tefnamelst.append(srsid + '.npy')
                telabellst.append(int(label))
                tefeatlst.append(feat)
            else:
                self.trfnamelst.append(srsid + '.npy')
                trlabellst.append(int(label))
                trfeatlst.append(feat)
        for idx in xrange(len(trfeatlst)):
            trfeatlst[idx][-1] /= mxd

        for idx in xrange(len(tefeatlst)):
            tefeatlst[idx][-1] /= mxd
        trainset = lunanod(preprocesspath,
                           self.trfnamelst,
                           trlabellst,
                           trfeatlst,
                           train=True,
                           download=True,
                           transform=transform_train)
        self.trainloader = torch.utils.data.DataLoader(trainset,
                                                       batch_size=16,
                                                       shuffle=True,
                                                       num_workers=30)

        testset = lunanod(preprocesspath,
                          self.tefnamelst,
                          telabellst,
                          tefeatlst,
                          train=False,
                          download=True,
                          transform=transform_test)
        self.testloader = torch.utils.data.DataLoader(testset,
                                                      batch_size=16,
                                                      shuffle=False,
                                                      num_workers=30)

        # Model
        if args.resume:
            # Load checkpoint.
            logging.info('==> Resuming from checkpoint..')
            # assert os.path.isdir('checkpoint'), 'Error: no checkpoint directory found!'
            checkpoint = torch.load(savemodelpath + 'ckpt.t7')
            self.net = checkpoint['net']
            best_acc = checkpoint['acc']
            start_epoch = checkpoint['epoch']
        else:
            logging.info('==> Building model..')
            self.net = dpn3d.DPN92_3D()

        if self.use_cuda:
            self.net.cuda()
            self.net = torch.nn.DataParallel(self.net,
                                             device_ids=range(
                                                 torch.cuda.device_count()))
            cudnn.benchmark = False  # True

        self.criterion = nn.CrossEntropyLoss()
        self.optimizer = optim.SGD(self.net.parameters(),
                                   lr=args.lr,
                                   momentum=0.9,
                                   weight_decay=5e-4)
        pass