def write_batches(target_dir, name, start_batch_num, labels, tasks,
                  jpeg_files):
    jpeg_files = partition_list(jpeg_files, OUTPUT_BATCH_SIZE)
    labels = partition_list(labels, OUTPUT_BATCH_SIZE)
    tasks = partition_list(tasks, OUTPUT_BATCH_SIZE)
    makedir(target_dir)
    print "Writing %s batches..." % name

    corrupt_list = []

    for i, (labels_batch, tasks_batch,
            jpeg_file_batch) in enumerate(zip(labels, tasks, jpeg_files)):
        t = time()
        jpeg_strings = list(
            itertools.chain.from_iterable(
                resizeJPEG([jpeg.read() for jpeg in jpeg_file_batch],
                           OUTPUT_IMAGE_SIZE, NUM_WORKER_THREADS,
                           CROP_TO_SQUARE)))
        if len(jpeg_strings) != 1:
            print i, jpeg_file_batch[0].name, len(jpeg_strings)
            corrupt_list.append(jpeg_file_batch[0].name)
        #batch_path = os.path.join(target_dir, 'data_batch_%d' % (start_batch_num + i))
        #makedir(batch_path)
        #assert len(labels_batch) == len(tasks_batch), "tasks dim should be the same as labels dim"

        #for j in xrange(0, len(labels_batch), OUTPUT_SUB_BATCH_SIZE):
        #    pickle(os.path.join(batch_path, 'data_batch_%d.%d' % (start_batch_num + i, j/OUTPUT_SUB_BATCH_SIZE)),
        #           {'data': jpeg_strings[j:j+OUTPUT_SUB_BATCH_SIZE],
        #            'labels': labels_batch[j:j+OUTPUT_SUB_BATCH_SIZE],
        #             'tasks':tasks_batch[j:j+OUTPUT_SUB_BATCH_SIZE]})
        #print "Wrote %s (%s batch %d of %d) (%.2f sec)" % (batch_path, name, i+1, len(jpeg_files), time() - t)
    pickle('car_raw_corrupted_list_' + name, corrupt_list)
    print corrupt_list
    print len(corrupt_list)
    return i + 1
示例#2
0
def write_batches(target_dir, name, start_batch_num, labels, jpeg_files):
    jpeg_files = partition_list(jpeg_files, OUTPUT_BATCH_SIZE)
    labels = partition_list(labels, OUTPUT_BATCH_SIZE)
    makedir(target_dir)
    print "Writing %s batches..." % name
    for i, (labels_batch,
            jpeg_file_batch) in enumerate(zip(labels, jpeg_files)):
        t = time()
        jpeg_strings = list(
            itertools.chain.from_iterable(
                resizeJPEG([jpeg.read() for jpeg in jpeg_file_batch],
                           OUTPUT_IMAGE_SIZE, NUM_WORKER_THREADS,
                           CROP_TO_SQUARE)))
        batch_path = os.path.join(target_dir,
                                  'data_batch_%d' % (start_batch_num + i))
        makedir(batch_path)
        for j in xrange(0, len(labels_batch), OUTPUT_SUB_BATCH_SIZE):
            pickle(
                os.path.join(
                    batch_path, 'data_batch_%d.%d' %
                    (start_batch_num + i, j / OUTPUT_SUB_BATCH_SIZE)), {
                        'data': jpeg_strings[j:j + OUTPUT_SUB_BATCH_SIZE],
                        'labels': labels_batch[j:j + OUTPUT_SUB_BATCH_SIZE]
                    })
        print "Wrote %s (%s batch %d of %d) (%.2f sec)" % (
            batch_path, name, i + 1, len(jpeg_files), time() - t)
    return i + 1
def write_batches(target_dir, name, start_batch_num, labels, tasks, jpeg_files):
    jpeg_files = partition_list(jpeg_files, OUTPUT_BATCH_SIZE)
    labels = partition_list(labels, OUTPUT_BATCH_SIZE)
    tasks = partition_list(tasks, OUTPUT_BATCH_SIZE)
    makedir(target_dir)
    print "Writing %s batches..." % name
    
    corrupt_list = []
    
    for i,(labels_batch, tasks_batch, jpeg_file_batch) in enumerate(zip(labels, tasks, jpeg_files)):
        t = time()
        jpeg_strings = list(itertools.chain.from_iterable(resizeJPEG([jpeg.read() for jpeg in jpeg_file_batch], OUTPUT_IMAGE_SIZE, NUM_WORKER_THREADS, CROP_TO_SQUARE)))
	if len(jpeg_strings) != 1:
            print i, jpeg_file_batch[0].name, len(jpeg_strings)
            corrupt_list.append(jpeg_file_batch[0].name)
        #batch_path = os.path.join(target_dir, 'data_batch_%d' % (start_batch_num + i))
        #makedir(batch_path)
        #assert len(labels_batch) == len(tasks_batch), "tasks dim should be the same as labels dim"

        #for j in xrange(0, len(labels_batch), OUTPUT_SUB_BATCH_SIZE):
        #    pickle(os.path.join(batch_path, 'data_batch_%d.%d' % (start_batch_num + i, j/OUTPUT_SUB_BATCH_SIZE)), 
        #           {'data': jpeg_strings[j:j+OUTPUT_SUB_BATCH_SIZE],
        #            'labels': labels_batch[j:j+OUTPUT_SUB_BATCH_SIZE],
        #             'tasks':tasks_batch[j:j+OUTPUT_SUB_BATCH_SIZE]})
        #print "Wrote %s (%s batch %d of %d) (%.2f sec)" % (batch_path, name, i+1, len(jpeg_files), time() - t)
    pickle('car_raw_corrupted_list_' + name, corrupt_list)
    print corrupt_list
    print len(corrupt_list)
    return i + 1
示例#4
0
def write_batches(target_dir, name, start_batch_num, labels, jpeg_files):
    jpeg_files = partition_list(jpeg_files, OUTPUT_BATCH_SIZE)
    labels = partition_list(labels, OUTPUT_BATCH_SIZE)
    makedir(target_dir)
    print "Writing %s batches..." % name
    for i,(labels_batch, jpeg_file_batch) in enumerate(zip(labels, jpeg_files)):
        t = time()
        jpeg_strings = list(itertools.chain.from_iterable(resizeJPEG([jpeg.read() for jpeg in jpeg_file_batch], OUTPUT_IMAGE_SIZE, NUM_WORKER_THREADS, CROP_TO_SQUARE)))
        batch_path = os.path.join(target_dir, 'data_batch_%d' % (start_batch_num + i))
        makedir(batch_path)
        for j in xrange(0, len(labels_batch), OUTPUT_SUB_BATCH_SIZE):
            pickle(os.path.join(batch_path, 'data_batch_%d.%d' % (start_batch_num + i, j/OUTPUT_SUB_BATCH_SIZE)), 
                   {'data': jpeg_strings[j:j+OUTPUT_SUB_BATCH_SIZE],
                    'labels': labels_batch[j:j+OUTPUT_SUB_BATCH_SIZE]})
        print "Wrote %s (%s batch %d of %d) (%.2f sec)" % (batch_path, name, i+1, len(jpeg_files), time() - t)
    return i + 1