def process_data_greyscale(args): files=['data_batch_1', 'data_batch_2', 'data_batch_3', 'data_batch_4', 'data_batch_5', 'test_batch'] for file in files: labels, images = unpickle(os.path.join(args.data_path,file)) num_examples=images.shape[0] images = 0.21 * images[:, :, :, 0] + 0.72 * images[:, :, :, 1] + 0.07 * images[:, :, :, 2] images = np.asarray(images, dtype=np.int32) print num_examples try: writer.close() except: pass writer = tf.python_io.TFRecordWriter(os.path.join(args.target_path, file + '.tfrecord')) for index in range(num_examples): label = int(labels[index]) image_raw = imresize(images[index], [args.size, args.size]).tostring() example = tf.train.Example(features=tf.train.Features(feature={ 'height': tfrecord_utils.int64_feature([args.size]), 'width': tfrecord_utils.int64_feature([args.size]), 'depth': tfrecord_utils.int64_feature([1]), 'label': tfrecord_utils.int64_feature([label]), 'image_raw': tfrecord_utils.bytes_feature([image_raw])})) writer.write(example.SerializeToString()) try: writer.close() except: pass
def write_to_tfrecords(filename, destination_dir, responses, prompts, q_ids, grades, speakers, targets, predictions, debug=False): # Check that all the input lists are of equal lengths assert len({len(responses), len(prompts), len(q_ids), len(grades), len(speakers), len(targets), len(predictions)}) == 1 # Create the training TF Record file print('Writing: ', filename) writer = tf.python_io.TFRecordWriter(os.path.join(destination_dir, filename)) for response, prompt, q_id, grd, spkr, tgt, example_pred, idx in zip(responses, prompts, q_ids, grades, speakers, targets, predictions, range(len(q_ids))): example = tf.train.SequenceExample( context=tf.train.Features(feature={ 'targets': tfrecord_utils.float_feature([tgt]), 'grade': tfrecord_utils.float_feature([float(grd)]), 'teacher_pred': tfrecord_utils.float_feature(list(example_pred)), 'spkr': tfrecord_utils.bytes_feature([spkr]), 'q_id': tfrecord_utils.int64_feature([q_id]), 'example_idx': tfrecord_utils.int64_feature([idx]) # Stores the example number for easy back-reference to txt files even when examples get shuffled (0 indexed) }), feature_lists=tf.train.FeatureLists(feature_list={ 'response': tfrecord_utils.int64_feature_list(response), 'prompt': tfrecord_utils.int64_feature_list(prompt)})) if debug: # Print out the data that is going to be saved: print("-----------------\n", "EXAMPLE: \n", "Response: {}\nPrompt: {}\nQ_id: {}\n\ntarget: {}\ngrade: {}\n,teacher_pred: {}\nexample_num: {}\n\n".format(response, prompt, q_id, tgt, grd, example_pred, idx)) writer.write(example.SerializeToString()) writer.close() return
def process_data(args): if args.cifar100 == True: files = ['test', 'train'] else: files=['data_batch_1', 'data_batch_2', 'data_batch_3', 'data_batch_4', 'data_batch_5', 'test_batch'] for file in files: labels, images = unpickle(os.path.join(args.data_path,file)) num_examples=images.shape[0] print num_examples for index in range(num_examples): label = int(labels[index]) if index % 10000 == 0: print index try: writer.close() except: pass writer = tf.python_io.TFRecordWriter(os.path.join(args.target_path, file + '_'+ str(index/10000)+'.tfrecord')) image_raw = images[index].tostring() example = tf.train.Example(features=tf.train.Features(feature={ 'height': tfrecord_utils.int64_feature([32]), 'width': tfrecord_utils.int64_feature([32]), 'depth': tfrecord_utils.int64_feature([3]), 'label': tfrecord_utils.int64_feature([label]), 'image_raw': tfrecord_utils.bytes_feature([image_raw])})) writer.write(example.SerializeToString()) try: writer.close() except: pass
def process_data(args): files = os.listdir(os.path.join(args.data_path, 'images')) num_examples = len(files) #Make sure there is a good global shuffle random.shuffle(files) for file, i in zip(files, range(num_examples)): file_path = os.path.join(args.data_path, 'images/' + file) if i % 5000 == 0: print i try: writer.close() except: pass writer = tf.python_io.TFRecordWriter( os.path.join(args.target_path, 'LSUN' + '_' + str(i / 5000) + '.tfrecord')) if os.path.isfile(file_path) and os.stat(file_path).st_size != 0: im = Image.open(file_path) im = im.resize((args.size, args.size), resample=Image.BICUBIC) image_raw = im.convert("RGB").tostring("raw", "RGB") example = tf.train.Example(features=tf.train.Features( feature={ 'height': tfrecord_utils.int64_feature([args.size]), 'width': tfrecord_utils.int64_feature([args.size]), 'depth': tfrecord_utils.int64_feature([3]), 'label': tfrecord_utils.int64_feature([-1]), 'image_raw': tfrecord_utils.bytes_feature([image_raw]) })) writer.write(example.SerializeToString()) try: writer.close() except: pass
def process_data(args): data = np.loadtxt(args.data_path) data_X = data[:, :256] data_y = np.argmax(np.asarray(data[:, 256:], dtype=np.int32), axis=1) data_X = np.reshape(data_X, [-1, 16, 16]) num_examples = data.shape[0] print num_examples for index in range(num_examples): if index % 10000 == 0: try: writer.close() except: pass writer = tf.python_io.TFRecordWriter( os.path.join(args.target_path, 'semeion_' + str(index / 10000) + '.tfrecord')) image_raw = imresize(data_X[index], size=[28, 28]).tostring() example = tf.train.Example(features=tf.train.Features( feature={ 'height': tfrecord_utils.int64_feature([28]), 'width': tfrecord_utils.int64_feature([28]), 'depth': tfrecord_utils.int64_feature([1]), 'label': tfrecord_utils.int64_feature([data_y[index]]), 'image_raw': tfrecord_utils.bytes_feature([image_raw]) })) writer.write(example.SerializeToString()) try: writer.close() except: pass
def process_data(data, args): sets = ['train', 'valid', 'test'] for set in sets: images = data[set].images labels = data[set].labels num_examples = data[set].num_examples for index in range(num_examples): label = int(labels[index]) if index % 10000 == 0: try: writer.close() except: pass writer = tf.python_io.TFRecordWriter( os.path.join(args.target_path, set + '_' + str(index / 10000) + '.tfrecord')) image_raw = images[index].tostring() example = tf.train.Example(features=tf.train.Features( feature={ 'height': tfrecord_utils.int64_feature([28]), 'width': tfrecord_utils.int64_feature([28]), 'depth': tfrecord_utils.int64_feature([1]), 'label': tfrecord_utils.int64_feature([label]), 'image_raw': tfrecord_utils.bytes_feature([image_raw]) })) writer.write(example.SerializeToString()) try: writer.close() except: pass
def process_data_XVAL(args): files = [ 'test_32x32.mat', 'train_32x32.mat', 'extra_32x32.mat', ] for file in files: data_y, data_X = load_svhn_mat(os.path.join(args.data_path, file)) set = file.split('.')[0] max_digits = np.max(data_y) num_examples = data_X.shape[0] for fold in range(max_digits + 1): print 'Fold', fold fname = 'fold_' + str(fold) path = os.path.join(args.target_path, fname) if not os.path.isdir(path): os.makedirs(path) for index in range(num_examples): label = int(data_y[index]) if label > fold: label -= 1 elif label == fold: label = max_digits if index % 42660 == 0: try: writer_seen.close() writer_unseen.close() except: pass writer_seen = tf.python_io.TFRecordWriter( os.path.join( path, set + '_' + fname + '_' + str(index / 42660) + '.tfrecord')) writer_unseen = tf.python_io.TFRecordWriter( os.path.join( path, set + '_heldout_' + fname + '_' + str(index / 42660) + '.tfrecord')) image_raw = data_X[index].tostring() example = tf.train.Example(features=tf.train.Features( feature={ 'height': tfrecord_utils.int64_feature([32]), 'width': tfrecord_utils.int64_feature([32]), 'depth': tfrecord_utils.int64_feature([3]), 'label': tfrecord_utils.int64_feature([label]), 'image_raw': tfrecord_utils.bytes_feature([image_raw]) })) if int(data_y[index]) == fold: writer_unseen.write(example.SerializeToString()) else: writer_seen.write(example.SerializeToString()) try: writer_seen.close() writer_unseen.close() except: pass
def process_data(args): sets=['train', 'val', 'test'] index={} with open(os.path.join(args.data_path, 'index.txt'), 'r') as f: ind_list= [line[:-1].split() for line in f.readlines()] for i in xrange(len(ind_list)): index[ind_list[i][0]] = {'class' : i, 'name' : ind_list[i][1]} for set in sets: print set files = os.listdir(os.path.join(args.data_path,set+'/images/')) num_examples=len(files) #Make sure there is a good global shuffle random.shuffle(files) if set == 'val': with open(os.path.join(args.data_path, 'val/val_index.txt'), 'r') as f: val_list = [line[:-1].split() for line in f.readlines()] val_index={} for item in val_list: val_index[item[0]]= item[1] for file, i in zip(files, range(num_examples)): file_path = os.path.join(args.data_path, set+'/images/'+file) if set == 'train': label = index[file.split('_')[0]]['class'] name = index[file.split('_')[0]]['name'] elif set == 'val': code = val_index[file] label = index[code]['class'] name = index[code]['name'] else: label = -1 name = 'NA' if i % 10000 == 0: print i try: writer.close() except: pass writer = tf.python_io.TFRecordWriter(os.path.join(args.target_path, set + '_' + str(i/10000) + '.tfrecord')) if os.path.isfile(file_path) and os.stat(file_path).st_size != 0: im = Image.open(file_path) im = im.resize((args.size, args.size), resample=Image.BICUBIC) image_raw = im.convert("RGB").tostring("raw", "RGB") example = tf.train.Example(features=tf.train.Features(feature={ 'height': tfrecord_utils.int64_feature([args.size]), 'width': tfrecord_utils.int64_feature([args.size]), 'depth': tfrecord_utils.int64_feature([3]), 'label': tfrecord_utils.int64_feature([label]), 'image_raw': tfrecord_utils.bytes_feature([image_raw])})) writer.write(example.SerializeToString()) try: writer.close() except: pass
def process_data_XVAL(data, args): sets = ['train', 'valid', 'test'] for set in sets: images = data[set].images labels = data[set].labels num_examples = data[set].num_examples for fold in xrange(10): print 'Fold', fold name = 'fold_' + str(fold) path = os.path.join(args.target_path, name) if not os.path.isdir(path): os.makedirs(path) for index in range(num_examples): label = int(labels[index]) if index % 10000 == 0: try: writer_seen.close() writer_unseen.close() except: pass writer_seen = tf.python_io.TFRecordWriter( os.path.join( path, set + '_' + name + '_' + str(index / 10000) + '.tfrecord')) writer_unseen = tf.python_io.TFRecordWriter( os.path.join( path, set + '_heldout_' + name + '_' + str(index / 10000) + '.tfrecord')) if label > fold: label -= 1 elif label == fold: label = 9 image_raw = images[index].tostring() example = tf.train.Example(features=tf.train.Features( feature={ 'height': tfrecord_utils.int64_feature([28]), 'width': tfrecord_utils.int64_feature([28]), 'depth': tfrecord_utils.int64_feature([1]), 'label': tfrecord_utils.int64_feature([label]), 'image_raw': tfrecord_utils.bytes_feature([image_raw]) })) if int(labels[index]) == fold: writer_unseen.write(example.SerializeToString()) else: writer_seen.write(example.SerializeToString()) try: writer_seen.close() writer_unseen.close() except: pass
def process_data_greyscale(args): files = [ 'test_32x32.mat', 'train_32x32.mat', 'extra_32x32.mat', ] for file in files: data_y, data_X = load_svhn_mat(os.path.join(args.data_path, file)) data_X = 0.21 * data_X[:, :, :, 0] + 0.72 * data_X[:, :, :, 1] + 0.07 * data_X[:, :, :, 2] data_X = np.asarray(data_X, dtype=np.int32) print data_X.shape fname = file.split('.')[0] num_examples = data_X.shape[0] print num_examples for index in range(num_examples): label = int(data_y[index]) if index % 10000 == 0: try: writer.close() except: pass writer = tf.python_io.TFRecordWriter( os.path.join( args.target_path, fname + '_gs_' + str(index / 10000) + '.tfrecord')) image_raw = imresize(data_X[index], [args.size, args.size]).tostring() example = tf.train.Example(features=tf.train.Features( feature={ 'height': tfrecord_utils.int64_feature([args.size]), 'width': tfrecord_utils.int64_feature([args.size]), 'depth': tfrecord_utils.int64_feature([1]), 'label': tfrecord_utils.int64_feature([label]), 'image_raw': tfrecord_utils.bytes_feature([image_raw]) })) writer.write(example.SerializeToString()) try: writer.close() except: pass
def process_data_XVAL(args): files = ['data_batch_1', 'data_batch_2', 'data_batch_3', 'data_batch_4', 'data_batch_5', 'test_batch'] for file in files: labels, images = unpickle(os.path.join(args.data_path,file)) max_digits=np.max(labels) num_examples=images.shape[0] for fold in range(max_digits+1): print 'Fold', fold fname='fold_'+str(fold) path = os.path.join(args.target_path, fname) if not os.path.isdir(path): os.makedirs(path) for index in range(num_examples): label = int(labels[index]) if label > fold: label -= 1 elif label == fold: label = max_digits try: writer_seen.close() writer_unseen.close() except: pass writer_seen = tf.python_io.TFRecordWriter(os.path.join(path, file+'_'+fname +'.tfrecord')) writer_unseen = tf.python_io.TFRecordWriter(os.path.join(path, file + '_heldout_' + fname + '.tfrecord')) image_raw = images[index].tostring() example = tf.train.Example(features=tf.train.Features(feature={ 'height': tfrecord_utils.int64_feature([32]), 'width': tfrecord_utils.int64_feature([32]), 'depth': tfrecord_utils.int64_feature([3]), 'label': tfrecord_utils.int64_feature([label]), 'image_raw': tfrecord_utils.bytes_feature([image_raw])})) if int(labels[index]) == fold: writer_unseen.write(example.SerializeToString()) else: writer_seen.write(example.SerializeToString()) try: writer_seen.close() writer_unseen.close() except: pass
def process_data(args): files = [ 'test_32x32.mat', 'train_32x32.mat', 'extra_32x32.mat', ] for file in files: data_y, data_X = load_svhn_mat(os.path.join(args.data_path, file)) fname = file.split('.')[0] num_examples = data_X.shape[0] print num_examples for index in range(num_examples): label = int(data_y[index]) if index % 10000 == 0: try: writer.close() except: pass writer = tf.python_io.TFRecordWriter( os.path.join( args.target_path, fname + '_' + str(index / 10000) + '.tfrecord')) image_raw = data_X[index].tostring() example = tf.train.Example(features=tf.train.Features( feature={ 'height': tfrecord_utils.int64_feature([32]), 'width': tfrecord_utils.int64_feature([32]), 'depth': tfrecord_utils.int64_feature([1]), 'label': tfrecord_utils.int64_feature([label]), 'image_raw': tfrecord_utils.bytes_feature([image_raw]) })) writer.write(example.SerializeToString()) try: writer.close() except: pass
def main(argv=None): """Converts a dataset to tfrecords.""" args = commandLineParser.parse_args() if os.path.isdir(args.destination_dir): print 'destination directory exists. Exiting...' else: os.makedirs(args.destination_dir) if not os.path.isdir('CMDs'): os.makedirs('CMDs') with open('CMDs/step_preprocess_data.cmd', 'a') as f: f.write(' '.join(sys.argv) + '\n') f.write('--------------------------------\n') # Load responses and prompts as sequences of word ids responses, _ = load_text(args.input_data_path, args.input_wlist_path) prompts, _ = load_text(args.input_prompt_path, args.input_wlist_path) # Load up the prompts as sequences of words with open(args.input_prompt_path, 'r') as file: topics = [line.replace('\n', '') for line in file.readlines()] # Get unique set of topics and topic counts (and sort tem) unique_topics, topic_counts = np.unique(topics, return_counts=True) topics = unique_topics[np.flip(np.argsort(topic_counts), 0)] topic_counts = np.flip(np.sort(topic_counts), 0) # Create dictionary for topics mapping sentence to topic id # Also create file of sorted topics and unigrams file # Unigram file later used for training topic_dict = {} with open(os.path.join(args.destination_dir, 'unigrams.txt'), 'w') as ufile: with open(os.path.join(args.destination_dir, 'sorted_topics.txt'), 'w') as tfile: for i, topic, count in zip(xrange(topics.shape[0]), topics, topic_counts): topic_dict[topic] = i ufile.write(str(i) + ',' + str(int(count)) + '\n') tfile.write(topic + '\n') # Load up the speakers and speakers grades = np.loadtxt(args.input_grade_path) with open(args.input_spkr_path, 'r') as file: speakers = np.asarray( [line.replace('\n', '') for line in file.readlines()]) # Create a list of topic IDs for every response with open(args.input_prompt_path, 'r') as file: q_ids = np.asarray( [topic_dict[line.replace('\n', '')] for line in file.readlines()]) ### Split data into train and validation data sets n = len(responses) train_size = int(n * (1.0 - args.valid_fraction)) valid_size = n - train_size print 'Total dataset size', n, 'Train dataset size', train_size, 'Valid dataset size', valid_size np.random.seed(1000) permutation = np.random.choice(np.arange(n), n, replace=False) index_train = permutation[:train_size] inded_valid = permutation[train_size:] trn_responses = responses[index_train] trn_prompts = prompts[index_train] trn_q_ids = q_ids[index_train] trn_speakers = speakers[index_train] trn_grades = grades[index_train] valid_responses = responses[inded_valid] valid_prompts = prompts[inded_valid] valid_q_ids = q_ids[inded_valid] valid_speakers = speakers[inded_valid] valid_grades = grades[inded_valid] # Create the training TF Record file filename = 'relevance.train.tfrecords' print 'Writing', filename writer = tf.python_io.TFRecordWriter( os.path.join(args.destination_dir, filename)) for response, prompt, q_id, grd, spkr in zip(trn_responses, trn_prompts, trn_q_ids, trn_grades, trn_speakers): example = tf.train.SequenceExample( context=tf.train.Features( feature={ 'targets': tfrecord_utils.float_feature([1.0]), 'grade': tfrecord_utils.float_feature([grd]), 'spkr': tfrecord_utils.bytes_feature([spkr]), 'q_id': tfrecord_utils.int64_feature([q_id]) }), feature_lists=tf.train.FeatureLists( feature_list={ 'response': tfrecord_utils.int64_feature_list(response), 'prompt': tfrecord_utils.int64_feature_list(prompt) })) writer.write(example.SerializeToString()) writer.close() # Create the validation TF Record file filename = 'relevance.valid.tfrecords' print 'Writing', filename writer = tf.python_io.TFRecordWriter( os.path.join(args.destination_dir, filename)) for response, prompt, q_id, grd, spkr in zip(valid_responses, valid_prompts, valid_q_ids, valid_grades, valid_speakers): example = tf.train.SequenceExample( context=tf.train.Features( feature={ 'targets': tfrecord_utils.float_feature([1.0]), 'grade': tfrecord_utils.float_feature([grd]), 'spkr': tfrecord_utils.bytes_feature([spkr]), 'q_id': tfrecord_utils.int64_feature([q_id]) }), feature_lists=tf.train.FeatureLists( feature_list={ 'response': tfrecord_utils.int64_feature_list(response), 'prompt': tfrecord_utils.int64_feature_list(prompt) })) writer.write(example.SerializeToString()) writer.close()
def main(argv=None): """Converts a dataset to tfrecords.""" args = commandLineParser.parse_args() if not os.path.isdir(args.destination_dir): os.makedirs(args.destination_dir) if not os.path.isdir('CMDs'): os.makedirs('CMDs') with open('CMDs/step_preprocess_test_data.cmd', 'a') as f: f.write(' '.join(sys.argv) + '\n') f.write('--------------------------------\n') # Load responses and prompts as sequences of word ids responses, _ = load_text(args.input_data_path, args.input_wlist_path) prompts, _ = load_text(args.input_prompt_path, args.input_wlist_path) # Load up the grades, targets and speakers grades = np.loadtxt(args.input_grade_path) targets = np.loadtxt(args.input_tgt_path, dtype=np.float32) with open(args.input_spkr_path, 'r') as file: speakers = np.asarray( [line.replace('\n', '') for line in file.readlines()]) # Load up sorted topics and (re)construct the topic dict so that I map each prompt word sequence to its q_id topic_dict = {} i = 0 with open(os.path.join(args.sorted_topics_path), 'r') as tfile: for topic in tfile.readlines(): topic_dict[topic.replace('\n', '')] = i i += 1 # Load up the prompts as sequences of words and convert to q_id try: with open(args.input_prompt_path, 'r') as file: q_ids = np.asarray([ topic_dict[line.replace('\n', '')] for line in file.readlines() ]) except: with open(args.input_prompt_path, 'r') as file: q_ids = np.asarray([-1 for line in file.readlines()]) # Create the training TF Record file filename = args.name + '.tfrecords' print 'Writing', filename writer = tf.python_io.TFRecordWriter( os.path.join(args.destination_dir, filename)) for response, prompt, q_id, grd, spkr, tgt in zip(responses, prompts, q_ids, grades, speakers, targets): example = tf.train.SequenceExample( context=tf.train.Features( feature={ 'targets': tfrecord_utils.float_feature([tgt]), 'grade': tfrecord_utils.float_feature([grd]), 'spkr': tfrecord_utils.bytes_feature([spkr]), 'q_id': tfrecord_utils.int64_feature([q_id]) }), feature_lists=tf.train.FeatureLists( feature_list={ 'response': tfrecord_utils.int64_feature_list(response), 'prompt': tfrecord_utils.int64_feature_list(prompt) })) writer.write(example.SerializeToString()) writer.close()
def write_to_tfrecords(filename, destination_dir, responses, prompts, q_ids, grades, speakers, targets=1.0, debug=False): # Check that all the input lists are of equal lengths # TEMP print(len(responses)) print(len(prompts)) print(len(q_ids)) print(len(grades)) print(len(speakers)) assert len( {len(responses), len(prompts), len(q_ids), len(grades), len(speakers)}) == 1 if type(targets) is float or type(targets) is int: # If targets is an integer make each target this value targets = [float(targets)] * len(responses) else: assert type(targets) is list assert len(targets) == len(responses) # Create the training TF Record file print('Writing: ', filename) writer = tf.python_io.TFRecordWriter( os.path.join(destination_dir, filename)) for response, prompt, q_id, grd, spkr, tgt in zip(responses, prompts, q_ids, grades, speakers, targets): if debug: # Print out the data that is going to be saved: print( "-----------------\n", "EXAMPLE: \n", "Response: {}\nPrompt: {}\nQ_id: {}\n\ntarget: {}\ngrade: {}\n\n" .format(response, prompt, q_id, tgt, grd)) example = tf.train.SequenceExample( context=tf.train.Features( feature={ 'targets': tfrecord_utils.float_feature([tgt]), 'grade': tfrecord_utils.float_feature([float(grd)]), 'spkr': tfrecord_utils.bytes_feature([spkr]), 'q_id': tfrecord_utils.int64_feature([q_id]) }), feature_lists=tf.train.FeatureLists( feature_list={ 'response': tfrecord_utils.int64_feature_list(response), 'prompt': tfrecord_utils.int64_feature_list(prompt) })) writer.write(example.SerializeToString()) writer.close() return
def main(argv=None): args = commandLineParser.parse_args() if not os.path.isdir('CMDs'): os.mkdir('CMDs') with open('CMDs/step_process_omniglot_data.txt', 'a') as f: f.write(' '.join(sys.argv) + '\n') f.write('--------------------------------\n') if not os.path.isdir(args.target_path): os.makedirs(args.target_path) collage = np.zeros(shape=[4 * args.size, 8 * args.size]) dirs = os.listdir(args.data_path) len_dirs = len(dirs) for item, j in zip(dirs, xrange(len_dirs)): if j % 15000 == 0: try: writer.close() except: pass writer = tf.python_io.TFRecordWriter( os.path.join(args.target_path, 'omniglot_' + str(j / 15000) + '.tfrecord')) print j img_file = os.path.join(args.data_path, item) if os.path.isfile(img_file) and os.stat(img_file).st_size != 0: try: im = Image.open(img_file) width, height = im.size size = np.min([width, height]) if size < args.size: continue imResize = im.resize((args.size, args.size), resample=Image.NEAREST) imResize = np.array(imResize.getdata(), dtype=np.uint8).reshape( args.size, args.size) if j < 32: i = j % 8 k = j / 8 collage[k * args.size:(k + 1) * args.size, i * args.size:(i + 1) * args.size] = imResize elif j == 32: fig = plt.imshow(np.asarray(collage, dtype=np.uint8), cmap='gray') fig.axes.get_xaxis().set_visible(False) fig.axes.get_yaxis().set_visible(False) path = os.path.join(args.target_path, 'omniglot.png') plt.savefig(path, bbox_inches='tight') plt.close() imResize = np.reshape(imResize, (args.size * args.size)) imResize_raw = imResize.tostring() example = tf.train.Example(features=tf.train.Features( feature={ 'height': tfrecord_utils.int64_feature([args.size]), 'width': tfrecord_utils.int64_feature([args.size]), 'depth': tfrecord_utils.int64_feature([1]), 'label': tfrecord_utils.int64_feature([-1]), 'image_raw': tfrecord_utils.bytes_feature( [imResize_raw]) })) writer.write(example.SerializeToString()) except: with open('errors', 'a') as handle: handle.write(item + '\n') print 'here' writer.close()