def ParseUint64KeyPair(key): CHECK_EQ(len(key), 16) id_a = py_base.KeyToUint64(key[0:8]) id_b = py_base.KeyToUint64(key[8:16]) CHECK_GT(id_a, 0) CHECK_GT(id_b, 0) return id_a, id_b
def GetImageSizes(uri): imageid_to_size = {} reader = py_pert.StringTableShardSetReader() CHECK(reader.Open(uri)) jpeg_image = iw_pb2.JpegImage() progress = iwutil.MakeProgressBar(reader.Entries()) for i, (k, v) in enumerate(reader): image_id = py_base.KeyToUint64(k) jpeg_image.ParseFromString(v) imageid_to_size[image_id] = (jpeg_image.width, jpeg_image.height) progress.update(i) return imageid_to_size
def main(): dataset_name = 'tide_v08' sizes = {} sizes['thumbnail'] = 100 * 100 sizes['small'] = 640 * 480 reset_bucket = False #dataset_base_uri = 'local://home/ubuntu/Desktop/vol-0449ca74/itergraph/%s/' % (dataset_name) #images_uri = '%s/cropped_scaled_photoid_to_image.pert' % (dataset_base_uri) images_uri = 'local://home/ubuntu/Desktop/vol-0449ca74/itergraph/tide_v14/cropped_scaled_photoid_to_image_randomaccess.pert' bucket_name = 'tide_image_cache' s3 = boto.connect_s3() bucket = s3.create_bucket(bucket_name) if reset_bucket: LOG(INFO, 'listing contents of bucket...') all_keys = [key.name for key in bucket.list()] LOG(INFO, 'deleting contents of bucket...') bucket.delete_keys(all_keys) s3.delete_bucket(bucket_name) bucket = s3.create_bucket(bucket_name) bucket.set_acl('public-read') reader = py_pert.StringTableReader() CHECK(reader.Open(images_uri)) progress = iwutil.MakeProgressBar(reader.Entries()) num_workers = 200 max_queue_size = 200 job_queue = JobQueue(num_workers, max_queue_size) for i, (key, value) in enumerate(reader): image_id = py_base.KeyToUint64(key) jpeg_image = iw_pb2.JpegImage() jpeg_image.ParseFromString(value) job_queue.AddJob( ResizeAndUploadImageJob(bucket, sizes, image_id, jpeg_image.data)) progress.update(i) job_queue.WaitForJobsDone() return
def ExportImages(self): image_size_cache_filename = '%s/images/size_cache.pickle' % self.output_path if os.path.exists(image_size_cache_filename): self.imageid_to_size = iwutil.LoadObject(image_size_cache_filename) return base_path = '%s/images/' % (self.output_path) os.mkdir(base_path) LOG(INFO, 'exporting images...') reader = py_pert.StringTableReader() CHECK(reader.Open(self.images_uri)) jpeg_image = iw_pb2.JpegImage() progress = iwutil.MakeProgressBar(reader.Entries()) for i, (k,v) in enumerate(reader): image_id = py_base.KeyToUint64(k) jpeg_image.ParseFromString(v) filename = '%s/%s.jpg' % (base_path, JSPad(image_id)) f = open(filename, 'wb') f.write(jpeg_image.data) f.close() self.imageid_to_size[image_id] = (jpeg_image.width, jpeg_image.height) progress.update(i) iwutil.SaveObject(self.imageid_to_size, image_size_cache_filename) return
def ParseUint64Key(key): CHECK_EQ(len(key), 8) return py_base.KeyToUint64(key)
def KeyToUint64(key): return py_base.KeyToUint64(key)