#下载下来是一种二进制文件,并不是图片,因此我先转换成了图片。 #我先解压gz文件到文件夹 #然后运行代码: import os from skimage import io import torchvision.datasets.mnist as mnist root = "F:/python_project/demo05pytorch训练和测试自己的图片数据/" train_set = (mnist.read_image_file( os.path.join(root, 'train-images-idx3-ubyte')), mnist.read_label_file( os.path.join(root, 'train-labels-idx1-ubyte'))) test_set = (mnist.read_image_file(os.path.join(root, 't10k-images-idx3-ubyte')), mnist.read_label_file(os.path.join(root, 't10k-labels-idx1-ubyte'))) print("training set :", train_set[0].size()) print("test set :", test_set[0].size()) def convert_to_img(train=True): if (train): f = open(root + 'train.txt', 'w') data_path = root + '/train/' if (not os.path.exists(data_path)): os.makedirs(data_path) for i, (img, label) in enumerate(zip(train_set[0], train_set[1])): img_path = data_path + str(i) + '.jpg' io.imsave(img_path, img.numpy()) f.write(img_path + ' ' + str(label) + '\n') f.close()
def test_to_superpixels(): root = osp.join('/', 'tmp', str(random.randrange(sys.maxsize))) raw_folder = osp.join(root, 'MNIST', 'raw') processed_folder = osp.join(root, 'MNIST', 'processed') makedirs(raw_folder) makedirs(processed_folder) for resource in resources: path = download_url(resource, raw_folder) extract_gz(path, osp.join(root, raw_folder)) test_set = ( read_image_file(osp.join(raw_folder, 't10k-images-idx3-ubyte')), read_label_file(osp.join(raw_folder, 't10k-labels-idx1-ubyte')), ) torch.save(test_set, osp.join(processed_folder, 'training.pt')) torch.save(test_set, osp.join(processed_folder, 'test.pt')) dataset = MNIST(root, download=False) dataset.transform = T.Compose([T.ToTensor(), ToSLIC()]) data, y = dataset[0] assert len(data) == 2 assert data.pos.dim() == 2 and data.pos.size(1) == 2 assert data.x.dim() == 2 and data.x.size(1) == 1 assert data.pos.size(0) == data.x.size(0) assert y == 7 loader = DataLoader(dataset, batch_size=2, shuffle=False) for data, y in loader: assert len(data) == 4 assert data.pos.dim() == 2 and data.pos.size(1) == 2 assert data.x.dim() == 2 and data.x.size(1) == 1 assert data.batch.dim() == 1 assert data.ptr.dim() == 1 assert data.pos.size(0) == data.x.size(0) == data.batch.size(0) assert y.tolist() == [7, 2] break dataset.transform = T.Compose( [T.ToTensor(), ToSLIC(add_seg=True, add_img=True)]) data, y = dataset[0] assert len(data) == 4 assert data.pos.dim() == 2 and data.pos.size(1) == 2 assert data.x.dim() == 2 and data.x.size(1) == 1 assert data.pos.size(0) == data.x.size(0) assert data.seg.size() == (1, 28, 28) assert data.img.size() == (1, 1, 28, 28) assert data.seg.max().item() + 1 == data.x.size(0) assert y == 7 loader = DataLoader(dataset, batch_size=2, shuffle=False) for data, y in loader: assert len(data) == 6 assert data.pos.dim() == 2 and data.pos.size(1) == 2 assert data.x.dim() == 2 and data.x.size(1) == 1 assert data.batch.dim() == 1 assert data.ptr.dim() == 1 assert data.pos.size(0) == data.x.size(0) == data.batch.size(0) assert data.seg.size() == (2, 28, 28) assert data.img.size() == (2, 1, 28, 28) assert y.tolist() == [7, 2] break shutil.rmtree(root)
def main(): parser = get_parser() args = parser.parse_args() ###################################################################### # Loading the data # ---------------- # # In this post we experiment with the classic MNIST dataset. Using a # standard convolutional network augmented with a spatial transformer # network. data_dir = args.data_dir training_dir = args.training_dir # If already processed if (not path.exists( path.join(training_dir, mnist.MNIST.processed_folder, mnist.MNIST.training_file)) or not path.join(training_dir, mnist.MNIST.processed_folder, mnist.MNIST.test_file)): # process and save as torch files LOG.info('Processing dataset...') files = os.listdir(data_dir) for file in files: full_path = path.join(data_dir, file) save_path = path.join(training_dir, file.replace('.gz', '')) with open(save_path, 'wb') as out_f, gzip.GzipFile(full_path) as zip_f: out_f.write(zip_f.read()) training_set = (mnist.read_image_file( path.join(training_dir, 'train-images-idx3-ubyte')), mnist.read_label_file( path.join(training_dir, 'train-labels-idx1-ubyte'))) test_set = (mnist.read_image_file( path.join(training_dir, 't10k-images-idx3-ubyte')), mnist.read_label_file( path.join(training_dir, 't10k-labels-idx1-ubyte'))) os.makedirs(path.join(training_dir, mnist.MNIST.processed_folder)) with open( path.join(training_dir, mnist.MNIST.processed_folder, mnist.MNIST.training_file), 'wb') as f: torch.save(training_set, f) with open( path.join(training_dir, mnist.MNIST.processed_folder, mnist.MNIST.test_file), 'wb') as f: torch.save(test_set, f) LOG.info('Dataset processing done!') # Training dataset train_loader = torch.utils.data.DataLoader(datasets.MNIST( root=training_dir, train=True, download=False, transform=transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.1307, ), (0.3081, )) ])), batch_size=args.batch_size, shuffle=True, num_workers=4) # Test dataset test_loader = torch.utils.data.DataLoader(datasets.MNIST( root=training_dir, train=False, transform=transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.1307, ), (0.3081, )) ])), batch_size=args.batch_size, shuffle=True, num_workers=4) model = Net() if use_cuda: model.cuda() ###################################################################### # Training the model # ------------------ # # Now, let's use the SGD algorithm to train the model. The network is # learning the classification task in a supervised way. In the same time # the model is learning STN automatically in an end-to-end fashion. optimizer = optim.SGD(model.parameters(), lr=0.01) for epoch in range(1, args.epochs + 1): train(epoch, train_loader, model, optimizer, use_mlboard=not args.skip_mlboard) test(test_loader, model, use_mlboard=not args.skip_mlboard) # Visualize the STN transformation on some input batch visualize_stn(test_loader, model, args.training_dir)
def download(self): """Download the EMNIST data if it doesn't exist in processed_folder already.""" import errno from six.moves import urllib import gzip import shutil import zipfile if self._check_exists(): return # download files try: os.makedirs(os.path.join(self.root, self.raw_folder)) os.makedirs(os.path.join(self.root, self.processed_folder)) except OSError as e: if e.errno == errno.EEXIST: pass else: raise print('Downloading ' + self.url) data = urllib.request.urlopen(self.url) filename = self.url.rpartition('/')[2] raw_folder = os.path.join(self.root, self.raw_folder) file_path = os.path.join(raw_folder, filename) with open(file_path, 'wb') as f: f.write(data.read()) print('Extracting zip archive') with zipfile.ZipFile(file_path) as zip_f: zip_f.extractall(raw_folder) os.unlink(file_path) gzip_folder = os.path.join(raw_folder, 'gzip') for gzip_file in os.listdir(gzip_folder): if gzip_file.endswith('.gz'): print('Extracting ' + gzip_file) with open(os.path.join(raw_folder, gzip_file.replace('.gz', '')), 'wb') as out_f, \ gzip.GzipFile(os.path.join(gzip_folder, gzip_file)) as zip_f: out_f.write(zip_f.read()) shutil.rmtree(gzip_folder) # process and save as torch files for split in self.splits: print('Processing ' + split) training_set = ( read_image_file( os.path.join( raw_folder, 'emnist-{}-train-images-idx3-ubyte'.format(split))), read_label_file( os.path.join( raw_folder, 'emnist-{}-train-labels-idx1-ubyte'.format(split)))) test_set = ( read_image_file( os.path.join( raw_folder, 'emnist-{}-test-images-idx3-ubyte'.format(split))), read_label_file( os.path.join( raw_folder, 'emnist-{}-test-labels-idx1-ubyte'.format(split)))) with open( os.path.join(self.root, self.processed_folder, self._training_file(split)), 'wb') as f: torch.save(training_set, f) with open( os.path.join(self.root, self.processed_folder, self._test_file(split)), 'wb') as f: torch.save(test_set, f) print('Done!')
# 调用一些和操作系统相关的函数 import os # 输入输出相关 from skimage import io # dataset相关 import torchvision.datasets.mnist as mnist # 路径 # root="/home/s/PycharmProjects/untitled/fashion-mnist/data/fashion" root = "H:/Paper Code/fashion-mnist" # 读取二进制文件,这里不知道是不是必须使用mnist读 train_set = ( mnist.read_image_file(os.path.join( root, 'train-images-idx3-ubyte')), # 路径拼接,split()是分割路径与文件名,和这个正好相反 mnist.read_label_file(os.path.join(root, 'train-labels-idx1-ubyte'))) test_set = (mnist.read_image_file(os.path.join(root, 't10k-images-idx3-ubyte')), mnist.read_label_file(os.path.join(root, 't10k-labels-idx1-ubyte'))) # 打印test_set类型 print(type(test_set)) # 打印test_set中元素个数 print(len(test_set)) # 打印第元素类型,都是tensor print(type(test_set[0])) print(type(test_set[1]))