def write_batches(target_dir, name, start_batch_num, labels, tasks, jpeg_files): jpeg_files = partition_list(jpeg_files, OUTPUT_BATCH_SIZE) labels = partition_list(labels, OUTPUT_BATCH_SIZE) tasks = partition_list(tasks, OUTPUT_BATCH_SIZE) makedir(target_dir) print "Writing %s batches..." % name corrupt_list = [] for i, (labels_batch, tasks_batch, jpeg_file_batch) in enumerate(zip(labels, tasks, jpeg_files)): t = time() jpeg_strings = list( itertools.chain.from_iterable( resizeJPEG([jpeg.read() for jpeg in jpeg_file_batch], OUTPUT_IMAGE_SIZE, NUM_WORKER_THREADS, CROP_TO_SQUARE))) if len(jpeg_strings) != 1: print i, jpeg_file_batch[0].name, len(jpeg_strings) corrupt_list.append(jpeg_file_batch[0].name) #batch_path = os.path.join(target_dir, 'data_batch_%d' % (start_batch_num + i)) #makedir(batch_path) #assert len(labels_batch) == len(tasks_batch), "tasks dim should be the same as labels dim" #for j in xrange(0, len(labels_batch), OUTPUT_SUB_BATCH_SIZE): # pickle(os.path.join(batch_path, 'data_batch_%d.%d' % (start_batch_num + i, j/OUTPUT_SUB_BATCH_SIZE)), # {'data': jpeg_strings[j:j+OUTPUT_SUB_BATCH_SIZE], # 'labels': labels_batch[j:j+OUTPUT_SUB_BATCH_SIZE], # 'tasks':tasks_batch[j:j+OUTPUT_SUB_BATCH_SIZE]}) #print "Wrote %s (%s batch %d of %d) (%.2f sec)" % (batch_path, name, i+1, len(jpeg_files), time() - t) pickle('car_raw_corrupted_list_' + name, corrupt_list) print corrupt_list print len(corrupt_list) return i + 1
def write_batches(target_dir, name, start_batch_num, labels, jpeg_files): jpeg_files = partition_list(jpeg_files, OUTPUT_BATCH_SIZE) labels = partition_list(labels, OUTPUT_BATCH_SIZE) makedir(target_dir) print "Writing %s batches..." % name for i, (labels_batch, jpeg_file_batch) in enumerate(zip(labels, jpeg_files)): t = time() jpeg_strings = list( itertools.chain.from_iterable( resizeJPEG([jpeg.read() for jpeg in jpeg_file_batch], OUTPUT_IMAGE_SIZE, NUM_WORKER_THREADS, CROP_TO_SQUARE))) batch_path = os.path.join(target_dir, 'data_batch_%d' % (start_batch_num + i)) makedir(batch_path) for j in xrange(0, len(labels_batch), OUTPUT_SUB_BATCH_SIZE): pickle( os.path.join( batch_path, 'data_batch_%d.%d' % (start_batch_num + i, j / OUTPUT_SUB_BATCH_SIZE)), { 'data': jpeg_strings[j:j + OUTPUT_SUB_BATCH_SIZE], 'labels': labels_batch[j:j + OUTPUT_SUB_BATCH_SIZE] }) print "Wrote %s (%s batch %d of %d) (%.2f sec)" % ( batch_path, name, i + 1, len(jpeg_files), time() - t) return i + 1
def write_batches(target_dir, name, start_batch_num, labels, tasks, jpeg_files): jpeg_files = partition_list(jpeg_files, OUTPUT_BATCH_SIZE) labels = partition_list(labels, OUTPUT_BATCH_SIZE) tasks = partition_list(tasks, OUTPUT_BATCH_SIZE) makedir(target_dir) print "Writing %s batches..." % name corrupt_list = [] for i,(labels_batch, tasks_batch, jpeg_file_batch) in enumerate(zip(labels, tasks, jpeg_files)): t = time() jpeg_strings = list(itertools.chain.from_iterable(resizeJPEG([jpeg.read() for jpeg in jpeg_file_batch], OUTPUT_IMAGE_SIZE, NUM_WORKER_THREADS, CROP_TO_SQUARE))) if len(jpeg_strings) != 1: print i, jpeg_file_batch[0].name, len(jpeg_strings) corrupt_list.append(jpeg_file_batch[0].name) #batch_path = os.path.join(target_dir, 'data_batch_%d' % (start_batch_num + i)) #makedir(batch_path) #assert len(labels_batch) == len(tasks_batch), "tasks dim should be the same as labels dim" #for j in xrange(0, len(labels_batch), OUTPUT_SUB_BATCH_SIZE): # pickle(os.path.join(batch_path, 'data_batch_%d.%d' % (start_batch_num + i, j/OUTPUT_SUB_BATCH_SIZE)), # {'data': jpeg_strings[j:j+OUTPUT_SUB_BATCH_SIZE], # 'labels': labels_batch[j:j+OUTPUT_SUB_BATCH_SIZE], # 'tasks':tasks_batch[j:j+OUTPUT_SUB_BATCH_SIZE]}) #print "Wrote %s (%s batch %d of %d) (%.2f sec)" % (batch_path, name, i+1, len(jpeg_files), time() - t) pickle('car_raw_corrupted_list_' + name, corrupt_list) print corrupt_list print len(corrupt_list) return i + 1
def write_batches(target_dir, name, start_batch_num, labels, jpeg_files): jpeg_files = partition_list(jpeg_files, OUTPUT_BATCH_SIZE) labels = partition_list(labels, OUTPUT_BATCH_SIZE) makedir(target_dir) print "Writing %s batches..." % name for i,(labels_batch, jpeg_file_batch) in enumerate(zip(labels, jpeg_files)): t = time() jpeg_strings = list(itertools.chain.from_iterable(resizeJPEG([jpeg.read() for jpeg in jpeg_file_batch], OUTPUT_IMAGE_SIZE, NUM_WORKER_THREADS, CROP_TO_SQUARE))) batch_path = os.path.join(target_dir, 'data_batch_%d' % (start_batch_num + i)) makedir(batch_path) for j in xrange(0, len(labels_batch), OUTPUT_SUB_BATCH_SIZE): pickle(os.path.join(batch_path, 'data_batch_%d.%d' % (start_batch_num + i, j/OUTPUT_SUB_BATCH_SIZE)), {'data': jpeg_strings[j:j+OUTPUT_SUB_BATCH_SIZE], 'labels': labels_batch[j:j+OUTPUT_SUB_BATCH_SIZE]}) print "Wrote %s (%s batch %d of %d) (%.2f sec)" % (batch_path, name, i+1, len(jpeg_files), time() - t) return i + 1