def _tf_save_data(self, path, value): np_array = value.np_array if np_array.shape == (): np_array = np.array([np_array]) with open(path, "wb") as fid: idx2np.convert_to_file(fid, np_array) logger.info("saving %s", path)
def save_div(self, steps): div = np.zeros((steps), dtype=np.float_) for i in range(steps): self.read_coef(str(i)) div[i] = self.calc_div() print("Calculated ", i, "th divergence:", div[i]) idx2numpy.convert_to_file(self.path + "Results/div", div)
def generate_idx_files(datas, types): for type_str in types: type = getattr(np, type_str) for i, data in enumerate(datas): a = np.array(data, dtype=type) name = type_str + '_' + str(i).zfill(2) + '.idx' idx2numpy.convert_to_file(name, a)
def calc(number): curve = idx2numpy.convert_from_file('../MNIST/curve') images = idx2numpy.convert_from_file("../MNIST/spin_train_images") labels = idx2numpy.convert_from_file("../MNIST/train_labels") double = np.zeros((410, 410), dtype=np.float_) for n in range(10000 * number, 10000 * (number + 1)): for i in range(400): for j in range(400): double[i][j] += images[n][curve[i][0]][ curve[i][1]] * images[n][curve[j][0]][curve[j][1]] if i == 0 and j == 0: print(n) for i in range(400): for n in range(10000 * number, 10000 * (number + 1)): for j in range(10): if labels[n] == j: double[i][400 + j] += images[n][curve[i][0]][curve[i][1]] double[400 + j][i] += images[n][curve[i][0]][curve[i][1]] else: double[i][400 + j] -= images[n][curve[i][0]][curve[i][1]] double[400 + j][i] -= images[n][curve[i][0]][curve[i][1]] for i in range(410): for j in range(410): double[i][j] = double[i][j] / 60000 idx2numpy.convert_to_file("../Data/double" + str(number), double)
def make_steps(self, n): starting_step = idx2numpy.convert_from_file(self.path + "current_step") new_step = np.zeros((1), dtype=np.int32) for i in range(n): step = starting_step[0] + i self.make_step(step) new_step[0] = step + 1 idx2numpy.convert_to_file(self.path + "current_step", new_step)
def pcaReduction(trainIdxPath, testIdxPath, outTrainPath, outTestIdx, outRatioFile, pcaEigenDir): trainData = idx2numpy.convert_from_file(trainIdxPath) testData = idx2numpy.convert_from_file(testIdxPath) shape = trainData.shape if (len(shape) > 1): trainData = trainData.reshape(shape[0], shape[1] * shape[2]) shape = testData.shape if (len(shape) > 1): testData = testData.reshape(shape[0], shape[1] * shape[2]) pca = PCA() pca.fit(trainData) cumSumRatio = numpy.cumsum(pca.explained_variance_ratio_) indOf09 = numpy.argmax(cumSumRatio >= 0.9) indOf095 = numpy.argmax(cumSumRatio >= 0.95) indOf099 = numpy.argmax(cumSumRatio >= 0.99) print('explained variance ratio: ', pca.explained_variance_ratio_[0:indOf099]) print('cumsum of explained variance ratio: ', cumSumRatio[0:indOf099]) print('indexes of 0.9, 095, 0.99 ', indOf09, indOf095, indOf099) #print('Shape of components:',pca.components_.shape) A = pca.components_[:, 0:indOf095 + 1] for i in range(0, indOf095 + 1): v = A[:, i] v = abs(v) * 255 v = v.reshape(28, 28) im = Image.fromarray(~v.astype('uint8'), 'L') im.save(pcaEigenDir + os.sep + str(i) + '.png', 'png') #print('xxx',A.shape) trainData = numpy.dot(trainData, A) print('train shape', trainData.shape, trainData.dtype) testData = numpy.dot(testData, A) print('test shape', testData.shape, trainData.dtype) f_write = open(outTrainPath, 'wb') idx2numpy.convert_to_file(f_write, trainData) f_write = open(outTestIdx, 'wb') idx2numpy.convert_to_file(f_write, testData) pickle.dump(cumSumRatio[0:indOf099], open(outRatioFile, 'wb'))
def test_correct(self): # Unsigned byte. ndarr = np.array([0x0A, 0x0B, 0xFF], dtype='uint8') with contextlib.closing(BytesIO()) as bytesio: idx2numpy.convert_to_file(bytesio, ndarr) self.assertEqual(bytesio.getvalue(), b'\x00\x00\x08\x01\x00\x00\x00\x03' + b'\x0A' + b'\x0B' + b'\xFF')
def save_idx(arr, fname): if arr.shape == (): arr = np.array([arr], dtype=arr.dtype) if arr.dtype in [np.int64]: logger.warning( "unsupported int format for idx detected: %s, using int32 instead", arr.dtype) arr = arr.astype(np.int32) out_dir = os.path.dirname(fname) if out_dir and not os.path.exists(out_dir): os.makedirs(out_dir) with open(fname, "wb") as fid: idx2np.convert_to_file(fid, arr) logger.info("%s saved", fname)
def create_data(shuffle=True): """ TODO: - [x] Shuffle the data - [x] Randomize the pixels within some range """ from idx2numpy import convert_to_file width = 4 train_data = [] train_labels = [] train_size = 10000 for i in range(10): x = np.zeros((train_size, 28, 28), dtype=np.uint8) offset = i * 2 line = np.random.randint(150, 240, (train_size, 28, width)) x[:, :, (offset + 3):(offset + 3 + width)] = line train_data.append(x) y = np.zeros((train_size, ), dtype=np.uint8) y[:] = i train_labels.append(y) train_data = np.concatenate(train_data, axis=0) train_labels = np.concatenate(train_labels, axis=0) if shuffle: perm = range(train_data.shape[0]) random.shuffle(perm) train_data = train_data[perm] train_labels = train_labels[perm] eval_data = [] eval_labels = [] eval_size = 1000 for i in range(10): x = np.zeros((eval_size, 28, 28), dtype=np.uint8) offset = i * 2 line = np.random.randint(150, 240, (eval_size, 28, width)) x[:, :, (offset + 3):(offset + 3 + width)] = line eval_data.append(x) y = np.zeros((eval_size, ), dtype=np.uint8) y[:] = i eval_labels.append(y) eval_data = np.concatenate(eval_data, axis=0) eval_labels = np.concatenate(eval_labels, axis=0) if shuffle: perm = range(eval_data.shape[0]) random.shuffle(perm) eval_data = eval_data[perm] eval_labels = eval_labels[perm] mkdir_p('BASICPROP') convert_to_file('BASICPROP/t10k-images-idx3-ubyte', eval_data) convert_to_file('BASICPROP/t10k-labels-idx1-ubyte', eval_labels) convert_to_file('BASICPROP/train-images-idx3-ubyte', train_data) convert_to_file('BASICPROP/train-labels-idx1-ubyte', train_labels)
def _save_data(self, path, value, tf_dtype): if tf_dtype in [tf.uint8, tf.qint8, tf.quint8]: np_dtype = np.uint8 elif tf_dtype in [tf.int32, tf.qint32]: np_dtype = np.int32 else: np_dtype = np.float32 if value.shape == (): value = np.array([value], dtype=np_dtype) else: value = value.astype(np_dtype) with open(path, "wb") as fid: idx2np.convert_to_file(fid, value) print("saving {}".format(path))
def csv2idx(csvpath): with open(csvpath, newline='') as csvfile: picturereader = csv.reader(csvfile) first = True withLabels = False labelList = [] pictureList = [] filename = os.path.basename(csvpath) filename = filename.split('.')[0] picname = filename + '-pic.idx3-ubyte' labname = filename + '-lab.idx3-ubyte' for row in picturereader: if first: first = False withLabels = (len(row) == 28 * 28 + 1) continue if withLabels: labelList.append(row[0]) row = row[1:] picArray = numpy.array(row) picArray = picArray.astype('uint8') pictureList.append(picArray) #im = Image.fromarray(~picArray.reshape(28,28),'L') #im.save('XXX.png') idx2numpy.convert_to_file(open(picname, 'wb'), numpy.array(pictureList)) if withLabels: idx2numpy.convert_to_file(open(labname, 'wb'), numpy.array(labelList).astype('uint8'))
def export_tensor(self, node_name, path="./", outdir="out"): """ Export Tensor in Graph Arguments ========= - node_name <`str`>: the name of node to export - path <`str`>: the root path, default './' - outdir <`str`>: the output directory name under root path, default 'out' Returns ======= `bool`: `True` if success, `False` otherwise """ print(node_name) # i.e. tName = 'import/Variable_quint8_const:0' t = self._graph.get_tensor_by_name(node_name) with tf.Session(graph=self._graph) as sess: tf.global_variables_initializer().run() arr = t.eval(self._feed_dict) # a work-around for idx2numpy, doesn't play well with single values if arr.shape == (): arr = np.array([arr]) # string process tName: sub / and : for _ # append .idx and use it for the file name outputName = self._prepare_name(node_name) + ".idx" print("outputName: " + outputName) outPath = Path(path) if not outPath.exists(): print("invalid path") return False outPath = outPath / outdir if not outPath.exists(): os.makedirs(outPath) elif outPath.exists() and not outPath.is_dir(): print("invalid path") return False outPath = outPath / outputName # print("outPath: " + str(outPath)) with open(str(outPath), 'wb') as fid: if t.dtype == tf.uint8 or t.dtype == tf.quint8: idx2numpy.convert_to_file(fid, arr.astype(np.uint8)) elif t.dtype == tf.int32 or t.dtype == tf.qint32: idx2numpy.convert_to_file(fid, arr.astype(np.int32)) else: idx2numpy.convert_to_file(fid, arr.astype(np.float32)) return True
def save_answer(self, number="test"): #save data as idx filename1 = self.path + "Resp/" + number + ".samples" filename2 = self.path + "Resp/" + number + ".occs" filename3 = self.path + "Resp/" + number + ".lens" lens = np.zeros((2), dtype="int16") lens[0] = self.hidlen lens[1] = self.vislen idx2numpy.convert_to_file(filename1, self.answer) idx2numpy.convert_to_file(filename2, self.answer_occ) idx2numpy.convert_to_file(filename3, lens)
import pandas as pd import idx2numpy import numpy as np from math import * import random images = idx2numpy.convert_from_file('../MNIST/curved_test_images') labels = idx2numpy.convert_from_file('../MNIST/test_labels') newi = np.ones((10000, 410), dtype = np.int8) * -1 for i in range(10000): if i % 1000 == 0: print(i) for j in range(400): newi[i][j] = images[i][j] newi[i][400 + labels[i]] = 1 idx2numpy.convert_to_file('../MNIST/complete_test_images', newi)
def dump(width, height, file_name, variable): s = path+'IDX_'+ file_name + str(width)+ 'by'+ str(height)+ '.idx' idx2numpy.convert_to_file(s,variable)
import pandas as pd import idx2numpy import numpy as np from math import * import random dset = np.zeros((1000, 6), dtype = np.int32) for i in range(1000): coef = random.randrange(4) if coef == 0: dset[i] = [-1, -1, 1, -1, -1, -1] if coef == 1: dset[i] = [1, -1, -1, 1, -1, -1] if coef == 2: dset[i] = [-1, 1, -1, -1, 1, -1] if coef == 3: dset[i] = [1, 1, -1, -1, -1, 1] idx2numpy.convert_to_file("../Data/tests/data", dset)
import pandas as pd import idx2numpy import numpy as np from math import * from random import randrange as rnd, choice images = idx2numpy.convert_from_file("../MNIST/new_test_images") spin = np.zeros((10000, 20, 20), dtype=np.int8) for i in range(10000): for j in range(20): for k in range(20): if images[i][j][k] == 0: spin[i][j][k] = -1 else: spin[i][j][k] = 1 idx2numpy.convert_to_file("../MNIST/spin_test_images", spin)
img_dir = train_dir + c img_fnames = [(img_dir + "/" + f) for f in listdir(img_dir) if isfile(img_dir + "/" + f)] for i in range(NUM_TRAINING_IMAGES_IN_CLASS, NUM_TRAINING_IMAGES_IN_CLASS + NUM_TEST_IMAGES_IN_CLASS): iindex = i - NUM_TRAINING_IMAGES_IN_CLASS if i < len(img_fnames): print("class", c, "image", i) img = cv2.imread(img_fnames[i]) img = cv2.resize(img, (IMG_HEIGHT, IMG_WIDTH)) gimg = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) #nimg = cv2.normalize(gimg, None, 0, 1, cv2.NORM_MINMAX, dtype=cv2.CV_32F) np_img_reshape = np.zeros((NUM_CHANNELS, IMG_HEIGHT, IMG_WIDTH)) for ch in range(0, NUM_CHANNELS): np_img_reshape[ch, 0:IMG_HEIGHT, 0:IMG_WIDTH] = gimg[:, :] np_test_features[ind, :, :, :] = np_img_reshape np_test_labels[ind, :] = cid ind += 1 cid += 1 # Convert to idx format np.save('mio_training_images.npy', np_features) np.save('mio_training_labels.npy', np_labels) np.save('mio_test_images.npy', np_test_features) np.save('mio_test_labels.npy', np_test_labels) idx2numpy.convert_to_file('mio_training_images.idx', np_features) idx2numpy.convert_to_file('mio_training_labels.idx', np_labels) idx2numpy.convert_to_file('mio_test_images.idx', np_test_features) idx2numpy.convert_to_file('mio_test_labels.idx', np_test_labels)
args = parser.parse_args() if args.cmd == "eval": with open(args.log, 'r') as file: title = args.title if args.title else splitext(basename(abspath(args.log)))[0] content = ''.join(file.readlines()) best_acc = 0.0 def repl(m): global best_acc acc = float(m.groups(1)[0]) if acc > best_acc: best_acc = acc return None re.sub(r"accuracy = ([\d\.]+)", repl, content) print('\n'.join([ "[{}]", "Best Test Accuracy = {}%", "Best Test Error Rate = {}%"]).format(title, best_acc*100, (1-best_acc)*100)) elif args.cmd == "rotate": src = idx2numpy.convert_from_file(args.src_idx) size = args.size count = src.shape[0] dst = np.ndarray((count, size, size), dtype="uint8") for i in range(0, count): dst[i] = rotate(imresize(src[i], size=(size, size), interp='bilinear'), random.randint(0, 360), reshape=False) idx2numpy.convert_to_file(args.dst_idx, dst) else: print("Invalid cmd [{}]".format(args.cmd))
import idx2numpy import numpy as np from __main__ import * arr = np.zeros([3, 3, 3, 3], dtype=np.int32) tmp = 1 for i0 in range(0, arr.shape[0]): for i1 in range(0, arr.shape[1]): for i2 in range(0, arr.shape[2]): for i3 in range(0, arr.shape[3]): arr[i0, i1, i2, i3] = tmp - 1 tmp = tmp * 4 if tmp > 2**32: tmp = 1 outpath = mkdir(TEST_DATA_DIR, 'idxImport') out_file_name = str(outpath / 'int32_4d_power2.idx') f_write = open(out_file_name, 'wb') idx2numpy.convert_to_file(f_write, arr) f_write.close() print("int32_4d_power2 sum: ", np.sum(arr)) # print('data size: ', arr.size) # print('expected output: ') # print(arr.flatten())
def dump(width, height, file_name, variable): s = path + "IDX_" + file_name + str(width) + "by" + str(height) + ".idx" idx2numpy.convert_to_file(s, variable)
size = int(sys.argv[1]) folder_name = "./MNIST_" + str(size) try: os.mkdir(folder_name) except OSError as err: print(err) print("folder created\n---------------------") print("partitioning images\n--------------------") nump = idx2numpy.convert_from_file("train-images-idx3-ubyte") print("Before images:", nump.shape) nump = nump[:size] #nump=numpy.concatenate((nump,nump),axis=0) print("After images:", nump.shape) nump = idx2numpy.convert_to_file(folder_name + "/train-images-idx3-ubyte", nump) print("partitioning labels\n----------------------") nump = idx2numpy.convert_from_file("train-labels-idx1-ubyte") print("Before labels:", nump.shape) nump = nump[:size] #nump=numpy.concatenate((nump,nump),axis=0) print("After labels:", nump.shape) nump = idx2numpy.convert_to_file(folder_name + "/train-labels-idx1-ubyte", nump) print("Done\n------------------------------------") print("Copying validation file\n-------------------------") shutil.copy("./t10k-images-idx3-ubyte", folder_name + "/t10k-images-idx3-ubyte") shutil.copy("./t10k-labels-idx1-ubyte",
def test_correct(self): with contextlib.closing(BytesIO()) as bytesio: idx2numpy.convert_to_file(bytesio, self._ndarr_to_convert) self.assertEqual(bytesio.getvalue(), self._expected)
def writeData(self): images = np.array(self.imgStorage, dtype=np.uint8) labels = np.array(self.lblStorage, dtype=np.uint8) convert_to_file(os.path.join(data_path, startTimeStr + "_" + str(len(images)) + "_img.idx"), images) convert_to_file(os.path.join(data_path, startTimeStr + "_" + str(len(images)) + "_lbl-" + labelsStr + ".idx"), labels)
def idx_export(args, data_matrix, ext): """Export given matrix out in idx format to args.out_dir""" path = args.out_dir+'/'+args.setname+ext idx.convert_to_file(path, data_matrix)
import idx2numpy from PIL import Image from PIL import ImageEnhance from PIL import ImageOps coll=array([]) # TBD # 1. Take a file list # 2. Example Data Images # for n in range("""Nimber of Images"""): img=Image.open("""Image Name"""{0}.format(n)).convert("L") #Opening the image in Monochrome version img contr = ImageEnhance.Contrast(img) img = contr.enhance(3) bright = ImageEnhance.Brightness(img) img = bright.enhance(4) #Enhancement of Contrast and Brightness img=ImageOps.invert(img) #Invert Image (White in Black) img=img.resize((28,28)) #Conversion of Resolution to 28 X 28 img arr=array(img) #Conversion of Image to 2D numpy array for i in range(28): for j in range(28): if(arr[i][j]==0): arr[i][j]=1 else: arr[i][j]=255 #Concersion to MNIST Datset format idx2numpy.convert_to_file("""File Name""",arr) #Conversion 2D numpy array to idx2 format
import numpy as np import idx2numpy from PIL import Image, ImageDraw, ImageFilter from random import random size = width, height = 20, 20 max_radius = 10 n = 10000 output_file = 'circles.idx' def generate_random_circle(): image = Image.new('L', size) # grayscale draw = ImageDraw.Draw(image) radius = 5 + max_radius * random() x, y = (width - radius) * random(), (height - radius) * random() draw.ellipse((x, y, x + radius, y + radius), outline=255) image = image.filter(ImageFilter.GaussianBlur(radius=1)) return np.array(image.getdata(), dtype=np.uint8).reshape(size) data = np.array([generate_random_circle() for _ in range(n)]) idx2numpy.convert_to_file(output_file, data)
def save_coef(self, filename="last"): filename = self.path + "Coef/" + filename + ".coef" idx2numpy.convert_to_file(filename, self.coef)
def test_correct_with_filename_argument(self): idx2numpy.convert_to_file(self._test_output_file, self._ndarr_to_convert) with open(self._test_output_file, 'rb') as fp: read_bytes = fp.read() self.assertEqual(read_bytes, self._expected)
def applyDeskew(inputDir, outPutDir, listOfPicture): for path in os.listdir(inputDir): #print(path) outPath = outPutDir + os.sep + path path = inputDir + os.sep + path if os.path.isdir(path): if not os.path.exists(outPath): os.makedirs(outPath) applyDeskew(path, outPath, listOfPicture) else: listOfPicture.append(skewAngle(path, outPath)) return listOfPicture print('Deskew train data...') res = applyDeskew(in_dir_train, out_train, []) print("Train len=", len(res)) f_write = open(out_train_idx, 'wb') idx2numpy.convert_to_file(f_write, numpy.array(res)) print('Deskew test data...') res = applyDeskew(in_dir_test, out_test, []) print("Test len=", len(res)) f_write = open(out_test_idx, 'wb') idx2numpy.convert_to_file(f_write, numpy.array(res))
import pandas as pd import idx2numpy import numpy as np from math import * from random import randrange as rnd,choice curve = idx2numpy.convert_from_file('../MNIST/curve') images = idx2numpy.convert_from_file("../MNIST/spin_test_images") curved = np.zeros((10000, 400), dtype = np.int8) for n in range(10000): for i in range(400): curved[n][i] = images[n][curve[i][0]][curve[i][1]] idx2numpy.convert_to_file("../MNIST/curved_test_images", curved)
import idx2numpy from numpy import vstack from tensorflow.examples.tutorials.mnist import input_data mnist = input_data.read_data_sets('./MNIST', one_hot=True) mnist_images = vstack( (mnist.train.images, mnist.test.images, mnist.validation.images)) mnist_labels = vstack( (mnist.train.labels, mnist.test.labels, mnist.validation.labels)) idx2numpy.convert_to_file('./MNIST/mnist-images.idx', mnist_images) idx2numpy.convert_to_file('./MNIST/mnist-labels.idx', mnist_labels) print('images:', len(mnist_images), '/ labels:', len(mnist_labels))