def split_dataset(fnamesp, fnamesn, index_labelsp, index_labelsn): permp = np.random.permutation(len(fnamesp)) permn = np.random.permutation(len(fnamesn)) split_p = np.split(np.array(fnamesp)[permp], 2) t_indexp = np.split(np.array(index_labelsp)[permp], 2) split_n = np.split(np.array(fnamesn)[permn], [int(len(fnamesn) / 2)]) t_indexn = np.split( np.array(index_labelsn)[permn], [int(len(fnamesn) / 2)]) permt = np.random.permutation( len(split_p[0][:9500]) + len(split_n[0][:15000])) permv = np.random.permutation(len(split_p[1]) + len(split_n[1])) d1 = LabeledImageDataset( list( zip(list(np.r_[split_p[0][:9500], split_n[0][:15000]][permt]), list(np.r_[t_indexp[0][:9500], t_indexn[0][:15000]][permt])))) d2 = LabeledImageDataset( list( zip(list(np.r_[split_p[1], split_n[1]][permv]), list(np.r_[t_indexp[1], t_indexn[1]][permv])))) train = TransformDataset(d1, transform) valid = TransformDataset(d2, transform) return train, valid
def transform(self, x): dataset = LabeledImageDataset(x) def augumentaion(in_data): img, label = in_data img = self._image_process(img) return img, label return TransformDataset(dataset, augumentaion)
def transform(self, x): dataset = LabeledImageDataset(x) def normarize(in_data): img, label = in_data img = img / .255 return img, label return TransformDataset(dataset, normarize)
def transform(self, x): dataset = LabeledImageDataset(x) def normarize(in_data): img, label = in_data img = chainer.links.model.vision.vgg.prepare(img) return img, label return TransformDataset(dataset, normarize)
def __init__(self, pair, root, crop_size, scales, mean, random=True): # ここでのrandomは簡易validationをやるため # ガチのtestはそれぞれのclassを用意する self.base = LabeledImageDataset(pair, root) self.crop_size = crop_size self.scales = scales self.mean = mean self.random = random
def load_dataset(tsv, aug=False): pairs = [] with open(tsv) as fp: for line in fp: label, image_file = line.strip().split('\t') pairs.append((DATA_DIR + image_file, numpy.int32(label))) if aug: return LabeledImageDatasetWithAugmentation(pairs) else: return LabeledImageDataset(pairs)
def transform(self, x): dataset = LabeledImageDataset(x) def normarize(in_data): img, label = in_data img = img / 255 img = resize(img, (224, 224)) return img, label return TransformDataset(dataset, normarize)
def transform(self, x): dataset = LabeledImageDataset(x) def _transform(in_data): img, label = in_data img = random_sized_crop(img, scale_ratio_range=(0.3, 1)) img = random_flip(img, x_random=True) img = chainer.links.model.vision.vgg.prepare(img) return img, label return TransformDataset(dataset, _transform)
def __init__(self, path, root, mean, crop_size=224, scales=[256, 384, 512]): self.base = LabeledImageDataset(path, root) self.mean = mean # BGR self.crop_size = crop_size self.scales = scales self.process_dict = { 'horizontal': 5, 'vertical': 5, 'mirror': 2, 'scales': len(scales)} self.index_extractor = IndexExtractor(self.process_dict)
def __init__(self, path, root, mean, crop_size, random=True, normalize=False): self.base = LabeledImageDataset(path, root) self.mean = mean.astype('f') self.crop_size = crop_size self.random = random self.normalize = normalize
def __init__(self, path, root, mean, crop_size=224): self.base = LabeledImageDataset(path, root) self.mean = mean # BGR self.crop_size = crop_size self.scales = scales self.process_dict = { '1st_preprocess': 3, # left, center, right cropping '2nd_preprocess': 5, # 4-corner and center cropping, resize 'mirror': 2, 'scales': len(scales) } self.index_extractor = IndexExtractor(self.process_dict) self.num_crop = len(self.index_extractor)
def load_dataset_train(datatype = 'train'): def transform(data): img, label = data img = img / 255. return img, label IMG_DIR = str(args.dataset) + datatype + '/' dnames = glob.glob('{}/*'.format(IMG_DIR)) fnames = [glob.glob('{}/*.bmp'.format(d)) for d in dnames] fnames = list(chain.from_iterable(fnames)) labels = [os.path.basename(os.path.dirname(fn)) for fn in fnames] dnames = [os.path.basename(d) for d in dnames] labels = [dnames.index(l) for l in labels] d = LabeledImageDataset(list(zip(fnames, labels))) d = chainer.datasets.TransformDataset(d, transform) return d
def main(): args = parser() save_dir = Path(args.save_dir) save_dir.mkdir(exist_ok=True, parents=True) root = args.dataset dataset = DirectoryParsingLabelDataset(root) mean_path = root + '/mean.npy' if os.path.exists(mean_path): mean = np.load(mean_path) else: mean = compute_mean(datasets, root) np.save(mean_path, mean) use_mean = args.use_mean print('use mean flag is ', use_mean) if not use_mean: print('not using mean') X = np.array([image_paths for image_paths in dataset.img_paths]) y = np.array([label for label in dataset.labels]) test_data = LabeledImageDataset([(x, y) for x, y in zip(X, y)]) test = chainer.datasets.TransformDataset( test_data, partial(_transform2, mean=mean, train=False, mean_flag=args.use_mean)) #test = chainer.datasets.TransformDataset(test_data, _validation_transform) #test_model = L.Classifier(VGG16()).to_gpu() class_num = len(set(dataset.labels)) model = L.Classifier(archs[args.arch](output=class_num)).to_gpu() serializers.load_npz(args.load_npz, model) dnames = glob.glob('{}/*'.format(root)) labels_list = [] for d in dnames: p_dir = Path(d) labels_list.append(p_dir.name) if 'mean.npy' in labels_list: labels_list.remove('mean.npy') confusion_matrix_cocoa(test, args.gpu, class_num, model, save_dir, 1, labels_list)
def load_dataset(lines): pathsAndLabels = [] for line in lines: words = line.replace("\n", "").split(",") pathsAndLabels.append(np.asarray(["/data/" + words[0] + "/", words[1]])) # Make data for chainer fnames = [] labels = [] for pathAndLabel in pathsAndLabels: path = pathAndLabel[0] label = pathAndLabel[1] imagelist = glob.glob(path + "*") for imgName in imagelist: try: file_check = Image.open(imgName) file_check = np.array(file_check, dtype=np.uint8) fnames.append(imgName) labels.append(label) except Exception: pass dataset = LabeledImageDataset(list(zip(fnames, labels))) return TransformDataset(dataset, transform)
def __init__(self, pair, root, transformer, crop_size, random=True): self.base = LabeledImageDataset(pair, root) self.transformer = transformer self.crop_size = crop_size self.random = random
def main(): parser = argparse.ArgumentParser(description='Chainer example: MNIST') parser.add_argument('--batchsize', '-b', type=int, default=100, help='Number of images in each mini-batch') parser.add_argument('--epoch', '-e', type=int, default=5, help='Number of sweeps over the dataset to train') parser.add_argument('--frequency', '-f', type=int, default=-1, help='Frequency of taking a snapshot') parser.add_argument('--device', '-d', type=str, default='-1', help='Device specifier. Either ChainerX device ' 'specifier or an integer. If non-negative integer, ' 'CuPy arrays with specified device id are used. If ' 'negative integer, NumPy arrays are used') parser.add_argument('--out', '-o', default='result', help='Directory to output the result') parser.add_argument('--resume', '-r', type=str, help='Resume the training from snapshot') parser.add_argument('--unit', '-u', type=int, default=1000, help='Number of units') group = parser.add_argument_group('deprecated arguments') group.add_argument('--gpu', '-g', dest='device', type=int, nargs='?', const=0, help='GPU ID (negative value indicates CPU)') args = parser.parse_args() device = chainer.get_device(args.device) print('Device: {}'.format(device)) print('# unit: {}'.format(args.unit)) print('# Minibatch-size: {}'.format(args.batchsize)) print('# epoch: {}'.format(args.epoch)) print('') # Set up a neural network to train # Classifier reports softmax cross entropy loss and accuracy at every # iteration, which will be used by the PrintReport extension below. model = L.Classifier(MLP(args.unit, 10)) model.to_device(device) device.use() # Setup an optimizer optimizer = chainer.optimizers.Adam() optimizer.setup(model) # Load the MNIST dataset # train, test = chainer.datasets.get_mnist() train = LabeledImageDataset( os.path.join(data_directory, 'train/train_labels.txt'), os.path.join(data_directory, 'train/images')) test = LabeledImageDataset( os.path.join(data_directory, 'test/test_labels.txt'), os.path.join(data_directory, 'test/images')) train_iter = chainer.iterators.SerialIterator(train, args.batchsize) test_iter = chainer.iterators.SerialIterator(test, args.batchsize, repeat=False, shuffle=False) # Set up a trainer updater = training.updaters.StandardUpdater(train_iter, optimizer, device=device) trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.out) # Evaluate the model with the test dataset for each epoch trainer.extend(extensions.Evaluator(test_iter, model, device=device)) # Dump a computational graph from 'loss' variable at the first iteration # The "main" refers to the target link of the "main" optimizer. # TODO(niboshi): Temporarily disabled for chainerx. Fix it. if device.xp is not chainerx: trainer.extend(extensions.DumpGraph('main/loss')) # Take a snapshot for each specified epoch frequency = args.epoch if args.frequency == -1 else max(1, args.frequency) trainer.extend(extensions.snapshot(), trigger=(frequency, 'epoch')) # Write a log of evaluation statistics for each epoch trainer.extend(extensions.LogReport()) # Save two plot images to the result dir trainer.extend( extensions.PlotReport(['main/loss', 'validation/main/loss'], 'epoch', file_name='loss.png')) trainer.extend( extensions.PlotReport(['main/accuracy', 'validation/main/accuracy'], 'epoch', file_name='accuracy.png')) # Print selected entries of the log to stdout # Here "main" refers to the target link of the "main" optimizer again, and # "validation" refers to the default name of the Evaluator extension. # Entries other than 'epoch' are reported by the Classifier link, called by # either the updater or the evaluator. trainer.extend( extensions.PrintReport([ 'epoch', 'main/loss', 'validation/main/loss', 'main/accuracy', 'validation/main/accuracy', 'elapsed_time' ])) # Print a progress bar to stdout trainer.extend(extensions.ProgressBar()) if args.resume is not None: # Resume from a snapshot chainer.serializers.load_npz(args.resume, trainer) # Run the training trainer.run() chainer.serializers.save_npz('my_model.npz', model, compression=True)
def transform(self, x): return LabeledImageDataset(x)
import datetime as dt import numpy as np import scipy.stats import matplotlib.pylab as plt import chainer from chainer import cuda, Function, gradient_check, Variable, optimizers, optimizer, serializers, utils, Link, Chain, ChainList from chainer.datasets import LabeledImageDataset #from chainercv.transforms import resize from chainer.datasets import TransformDataset import chainer.functions as F import chainer.links as L import csv import category_encoders as ce #train,test = chainer.datasets.get_mnist(ndim=3) dataset = LabeledImageDataset('train_master.txt', 'train_images') def transform(in_data): img, label = in_data #img = resize(img, (96, 96)) return img, label dataset = TransformDataset(dataset, transform) #print(dataset[1]) split_at = int(len(dataset) * 0.8) train, test = chainer.datasets.split_dataset(dataset, split_at)
# 画像フォルダのパス IMG_DIR = 'classed_image' # 各キャラクターごとのフォルダ dnames = glob.glob('{}/*'.format(IMG_DIR)) # 画像ファイルパス一覧 fnames = [glob.glob('{}/*.tif'.format(d)) for d in dnames] fnames = list(chain.from_iterable(fnames)) # それぞれにフォルダ名から一意なIDを付与 labels = [os.path.basename(os.path.dirname(fn)) for fn in fnames] dnames = [os.path.basename(d) for d in dnames] labels = [dnames.index(l) for l in labels] d = LabeledImageDataset(list(zip(fnames, labels))) print("labels :", labels) print("dnames :", dnames) #print("fnames :", fnames) #exit() def transform(data): img, label = data np_img = Image.open(img) np_img = numpy.asarray(np_img, dtype="float32") img_rgb = convert_RGB.convert_RGB(np_img, 0, 5) img = Image.fromarray(img_rgb) img = cuda.to_gpu(img, device=0) label = numpy.asarray(label, dtype="int")
def __init__(self, dataset_dir, model_name, train=True): pairs = get_pairs(dataset_dir, train=train) self.base = LabeledImageDataset(pairs) self.train = train self.pairs = pairs self.model_name = model_name
dnames_test = sorted(dnames_test, key=lambda s: int(re.search(r'\d+', s).group())) #画像ファイルパス一覧 fnames_train = [glob.glob('{}/*.jpg'.format(d)) for d in dnames_train] fnames_train = list(chain.from_iterable(fnames_train)) fnames_test = [glob.glob('{}/*.jpg'.format(d)) for d in dnames_test] fnames_test = list(chain.from_iterable(fnames_test)) # それぞれにフォルダ名から一意なIDを付与し、画像を読み込んでデータセット作成 labels_train = [ os.path.basename(os.path.dirname(fn)) for fn in fnames_train ] dnames_train = [os.path.basename(d) for d in dnames_train] labels_train = [dnames_train.index(l) for l in labels_train] d_train = LabeledImageDataset(list(zip(fnames_train, labels_train))) labels_test = [ os.path.basename(os.path.dirname(fn)) for fn in fnames_test ] dnames_test = [os.path.basename(d) for d in dnames_test] labels_test = [dnames_test.index(l) for l in labels_test] d_test = LabeledImageDataset(list(zip(fnames_test, labels_test))) #VGG用の前処理関数 def transform(data): img, label = data img = L.model.vision.vgg.prepare(img, size=(h, w)) img = img / 255. #正規化する.0〜1に落とし込む return img, label
def __init__(self, path, root, transformer, crop_size): self.base = LabeledImageDataset(path, root) self.transformer = transformer self.crop_size = crop_size
def __init__(self, path, root, mean, crop_size): self.base = LabeledImageDataset(path, root) self.mean = mean.astype('f') self.crop_size = crop_size
import numpy import numpy as np import chainer.links as L import chainer.functions as F from chainer import optimizers from chainer.cuda import to_cpu from chainer.datasets import mnist from chainer.datasets import LabeledImageDataset from chainer.datasets import split_dataset_random from chainer.datasets import TransformDataset from chainer.dataset import concat_examples from chainer import iterators from chainer import serializers train_val = LabeledImageDataset('./after/cut_set.txt',root='./') test = LabeledImageDataset('./test/cut_set.txt',root='./') def transform(data): img,label = data img = img/255 return img,label train_val = chainer.datasets.TransformDataset(train_val,transform) test = chainer.datasets.TransformDataset(test,transform) train, valid = split_dataset_random(train_val, int(len(cf.afterImageElement)*cf.modelCreateLearningPercentage), seed=0) print('Training dataset size:', len(train))
def __init__(self, pair, root, transformer): self.base = LabeledImageDataset(pair, root) self.transformer = transformer
return False else: if (((misalignment * i) >= left and (cutSize + (misalignment * i) <= right)) or ((misalignment * i) >= (left - cutSize / 2) and (cutSize + (misalignment * i) <= (right - cutSize / 2))) or ((misalignment * i) >= (left + cutSize / 2) and (cutSize + (misalignment * i) <= (right + cutSize / 2)))): return True else: return False test = LabeledImageDataset('./test/cut_set.txt', root='./') #切り取りサイズ(px) cutSize = cf.cutSize #一度にズラす値(px) misalignment = cf.misalignment imageWidth = (350 - cutSize) / misalignment imageHeight = (400 - cutSize) / misalignment with open('blood_test.csv') as f: reader = csv.reader(f) csvResult = [row for row in reader] teahcerAverage = [] for j in range(len(csvResult)): tmp = []
def __init__(self, pair, root, mean, crop_size, random=True): self.mean = mean self.scales = (256, 480) self.base = LabeledImageDataset(pair, root) self.crop_size = crop_size self.random = random
def main(): args = parser() # 時間読み込み now = datetime.datetime.now().strftime("%Y-%m-%d-%H-%M-%S") # 保存ディレクトリ先 save_dir = Path('result') / now log_dir = save_dir / 'log' model_dir = save_dir / 'model' snap_dir = save_dir / 'snap' matrix_dir = save_dir / 'matrix' # 保存ディレクトリ先作成 save_dir.mkdir(exist_ok=True, parents=True) log_dir.mkdir(exist_ok=True, parents=True) model_dir.mkdir(exist_ok=True, parents=True) snap_dir.mkdir(exist_ok=True, parents=True) matrix_dir.mkdir(exist_ok=True, parents=True) # Dataset読み込み root = args.dataset dir_list = os.listdir(root) dir_list.sort() if 'mean.npy' in dir_list: dir_list.remove('mean.npy') # datasetに画像ファイルとラベルを読み込む print('dataset loading ...') datasets = DirectoryParsingLabelDataset(root) print('finish!') # クラス数 class_num = len(set(datasets.labels)) print('class number : {}'.format(class_num)) # fold数 k_fold = args.kfold print('k_fold : {}'.format(k_fold)) X = np.array([image_paths for image_paths in datasets.img_paths]) y = np.array([label for label in datasets.labels]) kfold = StratifiedKFold(n_splits=k_fold, shuffle=True, random_state=402).split(X, y) for k, (train_idx, val_idx) in enumerate(kfold): print("============= {} fold training =============".format(k + 1)) X_train, y_train = X[train_idx], y[train_idx] X_val, y_val = X[val_idx], y[val_idx] # 画像とラベルをセットにしたデータセットを作る train = LabeledImageDataset([(x, y) for x, y in zip(X_train, y_train)]) validation = LabeledImageDataset([(x, y) for x, y in zip(X_val, y_val)]) train, validation, mean = get_dataset(train, validation, root, datasets, use_mean=False) # model setup model = StabilityClassifer(archs[args.arch](output=class_num)) #model = ABNClassifier(archs[args.arch](output=class_num)) lr = args.lr optimizer = chainer.optimizers.MomentumSGD(lr) optimizer.setup(model) optimizer.add_hook(chainer.optimizer.WeightDecay(rate=0.0001)) # using GPU if args.gpu >= 0: chainer.cuda.get_device_from_id(args.gpu).use() model.to_gpu() # setup iterators train_iter = chainer.iterators.MultithreadIterator(train, args.batchsize, n_threads=8) validation_iter = chainer.iterators.MultithreadIterator(validation, args.batchsize, repeat=False, shuffle=False, n_threads=8) # setup updater and trainer updater = training.StandardUpdater( train_iter, optimizer, device=args.gpu) trainer = training.Trainer( updater, (args.epoch, 'epoch'), out=save_dir) # set extensions log_trigger = (1, 'epoch') target = 'lr' trainer.extend(CosineShift(target, args.epoch, 1), trigger=(1, "epoch")) trainer.extend(extensions.Evaluator(validation_iter, model, device=args.gpu), trigger=log_trigger) snap_name = '{}-{}_fold_model.npz'.format(k_fold, k+1) trainer.extend(extensions.snapshot_object(model, str(snap_name)), trigger=chainer.training.triggers.MaxValueTrigger( key='validation/main/accuracy', trigger=(1, 'epoch'))) log_name = '{}-{}_fold_log.json'.format(k_fold, k+1) trainer.extend(extensions.LogReport( log_name=str(log_name), trigger=log_trigger)) trainer.extend(extensions.observe_lr(), trigger=log_trigger) trainer.extend(extensions.PrintReport([ 'epoch', 'iteration', 'main/loss','main/lossL2', 'validation/main/loss', 'main/accuracy', 'validation/main/accuracy', 'elapsed_time', 'lr' ]), trigger=(1, 'epoch')) trainer.extend(extensions.PlotReport(['main/loss', 'validation/main/loss'], 'epoch',file_name='loss{}.png'.format(k+1))) trainer.extend(extensions.PlotReport(['main/accuracy', 'validation/main/accuracy'], 'epoch', file_name='accuracy{}.png'.format(k+1))) trainer.extend(extensions.ProgressBar(update_interval=10)) #if args.resume: #chainer.serializers.load_npz(args.resume, trainer) trainer.run() snap_file = save_dir / snap_name shutil.move(str(snap_file), str(snap_dir)) log_file = save_dir / log_name shutil.move(str(log_file), str(log_dir)) # model save save_model = model_dir / "{}_{}-{}_fold.npz".format(now, k_fold, k + 1) chainer.serializers.save_npz(str(save_model), model) print("============= {} fold Evaluation =============".format(k + 1)) # 画像フォルダ dnames = glob.glob('{}/*'.format(root)) labels_list = [] for d in dnames: p_dir = Path(d) labels_list.append(p_dir.name) if 'mean.npy' in labels_list: labels_list.remove('mean.npy') confusion_matrix_cocoa(validation, args.gpu, 8, model, matrix_dir, k, labels_list)
width, height = 224, 224 #ここは好きなサイズで構いません。 # 各データに行う変換 def transform(data): img, label = data img = img.astype(np.uint8) img = Image.fromarray(img.transpose(1, 2, 0)) img = img.resize((width, height)) img = np.asarray(img).transpose(2, 0, 1).astype(np.float32) / 255. return img, label if __name__ == '__main__': #画像の前処理 train = LabeledImageDataset('data/train/train_labels.txt', root='data/train/images') train = TransformDataset(train, transform) valid = LabeledImageDataset('data/valid/valid_labels.txt', root='data/valid/images') valid = TransformDataset(valid, transform) epoch = 5 batch = 9 model = L.Classifier(Chainer()) optimizer = optimizers.Adam() optimizer.setup(model) train_iter = iterators.SerialIterator(train, batch) updater = training.StandardUpdater(train_iter, optimizer)