def shuffle(list_of_files): tf_os, tpath = mkstemp() tf = open(tpath, 'w') fds = [open(ff) for ff in list_of_files] for l in fds[0]: lines = [l.strip()] + [ff.readline().strip() for ff in fds[1:]] print("<CONCATE4SHUF>".join(lines), file=tf) [ff.close() for ff in fds] tf.close() os.system('shuf %s > %s' % (tpath, tpath + '.shuf')) fnames = [ '/tmp/{}.{}.{}.shuf'.format(i, os.getpid(), time.time()) for i, ff in enumerate(list_of_files) ] fds = [open(fn, 'w') for fn in fnames] for l in open(tpath + '.shuf'): s = l.strip().split('<CONCATE4SHUF>') for i, fd in enumerate(fds): print(s[i], file=fd) [ff.close() for ff in fds] os.remove(tpath) os.remove(tpath + '.shuf') return fnames
def shuffle(list_of_files): tf_os, tpath = mkstemp() tf = open(tpath, 'w') fds = [open(ff) for ff in list_of_files] for l in fds[0]: lines = [l.strip()] + [ff.readline().strip() for ff in fds[1:]] print("|||||".join(lines), file=tf) [ff.close() for ff in fds] tf.close() os.system('shuf %s > %s' % (tpath, tpath + '.shuf')) fds = [ open(ff + '.{}.shuf'.format(os.getpid()), 'w') for ff in list_of_files ] for l in open(tpath + '.shuf'): s = l.strip().split('|||||') for i, fd in enumerate(fds): print(s[i], file=fd) [ff.close() for ff in fds] os.remove(tpath) os.remove(tpath + '.shuf') return [ff + '.{}.shuf'.format(os.getpid()) for ff in list_of_files]
def untar_data(path): for file in os.scandir(path): if file.name.endswith('.zip'): print( os.stat(path + file.name).st_size, file.name, "this is zip file") with zipfile.ZipFile(file.name, 'r') as z: z.extractall(path) elif file.name.endswith('.gz'): '''specail condition needs to be added for .bin files''' print( os.stat(path + file.name).st_size, file.name, "this is gz file") tf = tarfile.open(file.name, "r") tf.extractall() tf.close() elif file.name.endswith('.tar'): print( os.stat(path + file.name).st_size, file.name, "this is tar file") tf = tarfile.open(file.name, "r") tf.extractall() tf.close() for file in os.scandir(path): print(file.name)
def shuffle(file): tf_os, tpath = tempfile.mkstemp(dir='data') tf = open(tpath, 'w') fd = open(file, "r") for l in fd: print >> tf, l.strip("\n") tf.close() lines = open(tpath, 'r').readlines() random.shuffle(lines) path, filename = os.path.split(os.path.realpath(file)) fd = tempfile.TemporaryFile(prefix=filename + '.shuf', dir=path) for l in lines: s = l.strip("\n") print >> fd, s fd.seek(0) os.remove(tpath) return fd
def promptuser(lines): """Display <lines> to user in their favourite editor. Then return the lines he entered ommiting empty lines and ones beginning with a #.""" tf = tempfile.NamedTemporaryFile(delete=False) for line in lines: tf.write(line + '\n') tf.close() editor = os.getenv('EDITOR') if editor == None: editor = 'vi' subp = subprocess.Popen([editor, tf.name]) subp.wait() f = open(tf.name) res = f.readlines() f.close() os.remove(tf.name) res = map(lambda x: x.strip(), res) #strip res = filter(None, res) #remove empty lines res = filter(lambda x: x[0] != '#', res) #remove comment lines return res
# scikit-image package import skimage import numpy as np import random from sklearn.model_selection import train_test_split tf.enable_eager_execution() tf.set_random_seed(0) np.random.seed(0) # unzip tar file import tarfile tf = tarfile.open("drive/My Drive/Colab Notebooks/notMNIST_large.tar") tf.extractall() tf.close() train_dir = "notMNIST_large/" imgs = [] labels = [] imageSize = 28 # load data and labels def get_data(folder): imgs = [] labels = [] for folderName in os.listdir(folder): if not folderName.startswith('.'):