def get_test_image_resource_descriptions( self, filenames, size=None, min_num_images_for_batch_loading=100): if len(filenames) == 0: return {} result = {} if size == 'scaled_256': filenames_by_batch = {} for fn in filenames: cur_batch = self.test_batch_by_filename[fn] if cur_batch not in filenames_by_batch: filenames_by_batch[cur_batch] = [] filenames_by_batch[cur_batch].append(fn) for batch, cur_filenames in filenames_by_batch.items(): if len(cur_filenames) >= min_num_images_for_batch_loading: pickle_key = 'imagenet-test-batches/' + batch + '.pickle' for fn in cur_filenames: result[fn] = utils.S3BatchResource( pickle_key, fn, 'pickle_dict', 50 * 1000 * 1000) else: for fn in cur_filenames: img_key = 'imagenet-test-scaled/' + fn result[fn] = utils.S3BatchResource( img_key, None, 'object_bytes', 50 * 1000) elif size == 'scaled_500': for fn in filenames: img_key = 'imagenet-test/' + fn result[fn] = utils.S3BatchResource(img_key, None, 'object_bytes', 150 * 1000) else: raise NotImplementedError() assert len(result) == len(filenames) for fn in filenames: assert fn in result return result
def get_train_image_resource_descriptions( self, filenames, size=None, min_num_images_for_batch_loading=100): if len(filenames) == 0: return {} if size != 'scaled_256': raise NotImplementedError() result = {} filenames_by_wnid = {} for fn in filenames: cur_wnid = self.get_wnid_from_train_filename(fn) if cur_wnid not in filenames_by_wnid: filenames_by_wnid[cur_wnid] = [] filenames_by_wnid[cur_wnid].append(fn) for wnid, cur_filenames in filenames_by_wnid.items(): if len(cur_filenames) >= min_num_images_for_batch_loading: tarball_key = 'imagenet-train/' + wnid + '-scaled.tar' for fn in cur_filenames: result[fn] = utils.S3BatchResource(tarball_key, wnid + '/' + fn, 'tarball', 50 * 1300 * 1000) else: for fn in cur_filenames: img_key = 'imagenet-train-individual/' + wnid + '/' + fn result[fn] = utils.S3BatchResource(img_key, None, 'object_bytes', 50 * 1000) assert len(result) == len(filenames) for fn in filenames: assert fn in result return result
def get_image_resource_descriptions(self, candidate_ids, size=None, min_num_images_for_batch_loading=100000): if len(candidate_ids) == 0: return {} for cid in candidate_ids: assert cid in self.all_candidates result = {} if size == 'scaled_256': cids_by_batch = {} cids_without_batch = [] for cid in candidate_ids: if 'batch' in self.all_candidates[cid]: cur_batch = self.all_candidates[cid]['batch'] if cur_batch not in cids_by_batch: cids_by_batch[cur_batch] = [] cids_by_batch[cur_batch].append(cid) else: cids_without_batch.append(cid) for cur_batch, cids in cids_by_batch.items(): if len(cids) >= min_num_images_for_batch_loading: pickle_key = 'imagenet2candidates_batches/' + cur_batch + '.pickle' for cid in cids: result[cid] = utils.S3BatchResource(pickle_key, cid, 'pickle_dict', 1000 * 50 * 1000) else: for cid in cids: img_key = 'imagenet2candidates_scaled/' + cid + '.jpg' result[cid] = utils.S3BatchResource(img_key, None, 'object_bytes', 50 * 1000) for cid in cids_without_batch: img_key = 'imagenet2candidates_scaled/' + cid + '.jpg' result[cid] = utils.S3BatchResource(img_key, None, 'object_bytes', 150 * 1000) elif size == 'scaled_500': for cid in candidate_ids: img_key = 'imagenet2candidates_mturk/' + cid + '.jpg' result[cid] = utils.S3BatchResource(img_key, None, 'object_bytes', 150 * 1000) elif size == 'original': for cid in candidate_ids: img_key = 'imagenet2candidates_original/' + cid + '.jpg' result[cid] = utils.S3BatchResource(img_key, None, 'object_bytes', 2 * 1000 * 1000) else: raise NotImplementedError() assert len(result) == len(candidate_ids) for cid in candidate_ids: assert cid in result return result
def get_test_feature_resource_descriptions(self, filenames): if len(filenames) == 0: return {} result = {} for fn in filenames: img_key = 'imagenet-test-featurized/' + fn + '.npy' result[fn] = utils.S3BatchResource(img_key, None, 'numpy_bytes', 8 * 4096) assert len(result) == len(filenames) for fn in filenames: assert fn in result return result
def get_feature_resource_descriptions(self, candidate_ids): if len(candidate_ids) == 0: return {} for cid in candidate_ids: assert cid in self.all_candidates result = {} for cid in candidate_ids: features_key = 'imagenet2candidates_featurized/' + cid + '.npy' result[cid] = utils.S3BatchResource(features_key, None, 'numpy_bytes', 8 * 4096) assert len(result) == len(candidate_ids) for cid in candidate_ids: assert cid in result return result
def get_val_feature_resource_descriptions(self, filenames): if len(filenames) == 0: return {} result = {} filenames_by_wnid = {} for fn in filenames: cur_wnid = self.wnid_by_val_filename[fn] if cur_wnid not in filenames_by_wnid: filenames_by_wnid[cur_wnid] = [] filenames_by_wnid[cur_wnid].append(fn) for wnid, cur_filenames in filenames_by_wnid.items(): batch_key = 'imagenet-validation-featurized/val-' + wnid + '-fc7.pkl' for fn in cur_filenames: stem = pathlib.Path(fn).stem result[fn] = utils.S3BatchResource(batch_key, stem, 'pickle_dict', 32 * 50 * 1000) assert len(result) == len(filenames) for fn in filenames: assert fn in result return result