def get_train_test_loader(anno_csv, croppath, testfold, transform_train, transform_test): trfnamelst = [] trlabellst = [] trfeatlst = [] tefnamelst = [] telabellst = [] tefeatlst = [] dataframe = pd.read_csv(anno_csv, names=['seriesuid', 'coordZ', 'coordY', 'coordX', 'diameter_mm', 'malignant'], header=0) alllst = dataframe['seriesuid'].tolist() labellst = dataframe['malignant'].tolist() crdxlst = dataframe['coordX'].tolist() crdylst = dataframe['coordY'].tolist() crdzlst = dataframe['coordZ'].tolist() dimlst = dataframe['diameter_mm'].tolist() # Make a test dataset print('Using subset{} as test split.'.format(testfold)) teidlst = [] for fname in os.listdir('/data/LUNA16/subset'+str(testfold)+'/'): if fname.endswith('.mhd'): teidlst.append(fname[:-4]) for srsid, label, x, y, z, d in zip(alllst, labellst, crdxlst, crdylst, crdzlst, dimlst): if srsid.split('-')[0] in blklst: continue # crop raw pixel as feature data = np.load(os.path.join(croppath, srsid + '.npy')) bgx = data.shape[2]/2-CROPSIZE/2 bgy = data.shape[1]/2-CROPSIZE/2 bgz = data.shape[0]/2-CROPSIZE/2 assert bgx==bgy==bgz==0 data = np.array(data[bgz:bgz+CROPSIZE, bgy:bgy+CROPSIZE, bgx:bgx+CROPSIZE]) feat = np.hstack((np.reshape(data, (-1,)) / 255, float(d))) # print(feat.shape) if srsid.split('-')[0] in teidlst: tefnamelst.append(srsid + '.npy') telabellst.append(int(label)) tefeatlst.append(feat) else: trfnamelst.append(srsid + '.npy') trlabellst.append(int(label)) trfeatlst.append(feat) trainset = lunanod(croppath, trfnamelst, trlabellst, trfeatlst, train=True, transform=transform_train, target_transform=None, download=True) trainloader = torch.utils.data.DataLoader(trainset, batch_size=24, shuffle=True, num_workers=8) testset = lunanod(croppath, tefnamelst, telabellst, tefeatlst, train=False, transform=transform_test, target_transform=None, download=True) testloader = torch.utils.data.DataLoader(testset, batch_size=24, shuffle=False, num_workers=8) return trainloader, testloader
def load_data(trained_data_path, test_data_path, fold, batch_size, num_workers): crop_size = 32 black_list = [] preprocess_path = trained_data_path pix_value, npix = 0, 0 for file_name in os.listdir(preprocess_path): if file_name.endswith('.npy'): if file_name[:-4] in black_list: continue data = np.load(os.path.join(preprocess_path, file_name)) pix_value += np.sum(data) npix += np.prod(data.shape) pix_mean = pix_value / float(npix) pix_value = 0 for file_name in os.listdir(preprocess_path): if file_name.endswith('.npy'): if file_name[:-4] in black_list: continue data = np.load(os.path.join(preprocess_path, file_name)) - pix_mean pix_value += np.sum(data * data) pix_std = np.sqrt(pix_value / float(npix)) print(pix_mean, pix_std) transform_train = transforms.Compose([ # transforms.RandomScale(range(28, 38)), transforms.RandomCrop(32, padding=4), transforms.RandomHorizontalFlip(), transforms.RandomYFlip(), transforms.RandomZFlip(), transforms.ZeroOut(4), transforms.ToTensor(), transforms.Normalize( (pix_mean), (pix_std)), # need to cal mean and std, revise norm func ]) transform_test = transforms.Compose([ transforms.ToTensor(), transforms.Normalize((pix_mean), (pix_std)), ]) # load data list train_file_name_list = [] train_label_list = [] train_feat_list = [] test_file_name_list = [] test_label_list = [] test_feat_list = [] data_frame = pd.read_csv('./data/annotationdetclsconvfnl_v3.csv', names=[ 'seriesuid', 'coordX', 'coordY', 'coordZ', 'diameter_mm', 'malignant' ]) all_list = data_frame['seriesuid'].tolist()[1:] label_list = data_frame['malignant'].tolist()[1:] crdx_list = data_frame['coordX'].tolist()[1:] crdy_list = data_frame['coordY'].tolist()[1:] crdz_list = data_frame['coordZ'].tolist()[1:] dim_list = data_frame['diameter_mm'].tolist()[1:] # test id test_id_list = [] for file_name in os.listdir(test_data_path + str(fold) + '/'): if file_name.endswith('.mhd'): test_id_list.append(file_name[:-4]) mxx = mxy = mxz = mxd = 0 for srsid, label, x, y, z, d in zip(all_list, label_list, crdx_list, crdy_list, crdz_list, dim_list): mxx = max(abs(float(x)), mxx) mxy = max(abs(float(y)), mxy) mxz = max(abs(float(z)), mxz) mxd = max(abs(float(d)), mxd) if srsid in black_list: continue # crop raw pixel as feature data = np.load(os.path.join(preprocess_path, srsid + '.npy')) bgx = int(data.shape[0] / 2 - crop_size / 2) bgy = int(data.shape[1] / 2 - crop_size / 2) bgz = int(data.shape[2] / 2 - crop_size / 2) data = np.array(data[bgx:bgx + crop_size, bgy:bgy + crop_size, bgz:bgz + crop_size]) y, x, z = np.ogrid[-crop_size / 2:crop_size / 2, -crop_size / 2:crop_size / 2, -crop_size / 2:crop_size / 2] mask = abs(y**3 + x**3 + z**3) <= abs(float(d))**3 feat = np.zeros((crop_size, crop_size, crop_size), dtype=float) feat[mask] = 1 if srsid.split('-')[0] in test_id_list: test_file_name_list.append(srsid + '.npy') test_label_list.append(int(label)) test_feat_list.append(feat) else: train_file_name_list.append(srsid + '.npy') train_label_list.append(int(label)) train_feat_list.append(feat) for idx in range(len(train_feat_list)): train_feat_list[idx][-1] /= mxd for idx in range(len(test_feat_list)): test_feat_list[idx][-1] /= mxd train_set = lunanod(preprocess_path, train_file_name_list, train_label_list, train_feat_list, train=True, download=True, transform=transform_train) train_loader = torch.utils.data.DataLoader(train_set, batch_size=batch_size, shuffle=True, num_workers=num_workers) test_set = lunanod(preprocess_path, test_file_name_list, test_label_list, test_feat_list, train=False, download=True, transform=transform_test) test_loader = torch.utils.data.DataLoader(test_set, batch_size=batch_size, shuffle=False, num_workers=num_workers) return train_loader, test_loader
tefeatlst.append(feat) else: trfnamelst.append(srsid + '.npy') trlabellst.append(int(label)) trfeatlst.append(feat) for idx in range(len(trfeatlst)): # trfeatlst[idx][0] /= mxx # trfeatlst[idx][1] /= mxy # trfeatlst[idx][2] /= mxz trfeatlst[idx][-1] /= mxd for idx in range(len(tefeatlst)): # tefeatlst[idx][0] /= mxx # tefeatlst[idx][1] /= mxy # tefeatlst[idx][2] /= mxz tefeatlst[idx][-1] /= mxd trainset = lunanod(preprocesspath, trfnamelst, trlabellst, trfeatlst, train=True, download=True, transform=transform_train) trainloader = torch.utils.data.DataLoader(trainset, batch_size=args.batch_size, shuffle=True, num_workers=20) testset = lunanod(preprocesspath, tefnamelst, telabellst, tefeatlst, train=False, download=True, transform=transform_test) testloader = torch.utils.data.DataLoader(testset, batch_size=args.batch_size, shuffle=False, num_workers=20) savemodelpath = './checkpoint-' + str(fold) + '/' # Model print(args.resume) if args.resume: print('==> Resuming from checkpoint..') print(args.savemodel) if args.savemodel == '': logging.info('==> Resuming from checkpoint..') assert os.path.isdir(savemodelpath), 'Error: no checkpoint directory found!' checkpoint = torch.load(savemodelpath + 'ckpt.t7')
def __init__(self): self.best_acc = 0 self.best_acc_gbt = 0 self.use_cuda = torch.cuda.is_available() pixvlu, npix = 0, 0 for fname in os.listdir(preprocesspath): if fname.endswith('.npy'): if fname[:-4] in blklst: continue data = np.load(os.path.join(preprocesspath, fname)) pixvlu += np.sum(data) npix += np.prod(data.shape) pixmean = pixvlu / float(npix) pixvlu = 0 for fname in os.listdir(preprocesspath): if fname.endswith('.npy'): if fname[:-4] in blklst: continue data = np.load(os.path.join(preprocesspath, fname)) - pixmean pixvlu += np.sum(data * data) pixstd = np.sqrt(pixvlu / float(npix)) print('pixmean:%.3f, pixstd:%.3f' % (pixmean, pixstd)) logging.info('mean ' + str(pixmean) + ' std ' + str(pixstd)) # Datatransforms logging.info( '==> Preparing data..') # Random Crop, Zero out, x z flip, scale, transform_train = transforms.Compose([ # transforms.RandomScale(range(28, 38)), transforms.RandomCrop(32, padding=4), transforms.RandomHorizontalFlip(), transforms.RandomYFlip(), transforms.RandomZFlip(), transforms.ZeroOut(4), transforms.ToTensor(), transforms.Normalize( (pixmean), (pixstd)), # need to cal mean and std, revise norm func ]) transform_test = transforms.Compose([ transforms.ToTensor(), transforms.Normalize((pixmean), (pixstd)), ]) # load data list self.trfnamelst = [] trlabellst = [] trfeatlst = [] self.tefnamelst = [] telabellst = [] tefeatlst = [] dataframe = pd.read_csv(csvfilepath, names=[ 'seriesuid', 'coordX', 'coordY', 'coordZ', 'diameter_mm', 'malignant' ]) alllst = dataframe['seriesuid'].tolist()[1:] labellst = dataframe['malignant'].tolist()[1:] crdxlst = dataframe['coordX'].tolist()[1:] crdylst = dataframe['coordY'].tolist()[1:] crdzlst = dataframe['coordZ'].tolist()[1:] dimlst = dataframe['diameter_mm'].tolist()[1:] # test id teidlst = [] for fname in os.listdir(luna16path + '/subset' + str(fold) + '/'): if fname.endswith('.mhd'): teidlst.append(fname[:-4]) mxx = mxy = mxz = mxd = 0 for srsid, label, x, y, z, d in zip(alllst, labellst, crdxlst, crdylst, crdzlst, dimlst): mxx = max(abs(float(x)), mxx) mxy = max(abs(float(y)), mxy) mxz = max(abs(float(z)), mxz) mxd = max(abs(float(d)), mxd) if srsid in blklst: continue # crop raw pixel as feature if os.path.exists(os.path.join(preprocesspath, srsid + '.npy')): data = np.load(os.path.join(preprocesspath, srsid + '.npy')) bgx = data.shape[0] / 2 - CROPSIZE / 2 bgy = data.shape[1] / 2 - CROPSIZE / 2 bgz = data.shape[2] / 2 - CROPSIZE / 2 data = np.array(data[bgx:bgx + CROPSIZE, bgy:bgy + CROPSIZE, bgz:bgz + CROPSIZE]) feat = np.hstack((np.reshape(data, (-1, )) / 255, float(d))) if srsid.split('-')[0] in teidlst: self.tefnamelst.append(srsid + '.npy') telabellst.append(int(label)) tefeatlst.append(feat) else: self.trfnamelst.append(srsid + '.npy') trlabellst.append(int(label)) trfeatlst.append(feat) for idx in xrange(len(trfeatlst)): trfeatlst[idx][-1] /= mxd for idx in xrange(len(tefeatlst)): tefeatlst[idx][-1] /= mxd trainset = lunanod(preprocesspath, self.trfnamelst, trlabellst, trfeatlst, train=True, download=True, transform=transform_train) self.trainloader = torch.utils.data.DataLoader(trainset, batch_size=16, shuffle=True, num_workers=30) testset = lunanod(preprocesspath, self.tefnamelst, telabellst, tefeatlst, train=False, download=True, transform=transform_test) self.testloader = torch.utils.data.DataLoader(testset, batch_size=16, shuffle=False, num_workers=30) # Model if args.resume: # Load checkpoint. logging.info('==> Resuming from checkpoint..') # assert os.path.isdir('checkpoint'), 'Error: no checkpoint directory found!' checkpoint = torch.load(savemodelpath + 'ckpt.t7') self.net = checkpoint['net'] best_acc = checkpoint['acc'] start_epoch = checkpoint['epoch'] else: logging.info('==> Building model..') self.net = dpn3d.DPN92_3D() if self.use_cuda: self.net.cuda() self.net = torch.nn.DataParallel(self.net, device_ids=range( torch.cuda.device_count())) cudnn.benchmark = False # True self.criterion = nn.CrossEntropyLoss() self.optimizer = optim.SGD(self.net.parameters(), lr=args.lr, momentum=0.9, weight_decay=5e-4) pass