def load_word_embedding(vse): checkpoint = torch.load(vse) opt = checkpoint['opt'] vocab = Vocab.from_pickle(pjoin(opt.vocab_path, '%s_vocab.pkl' % opt.data_name)) if not args.glove_only: embed_weights = checkpoint['model'][1]['embed.weight'].cpu().numpy() _, glove_weights = io.load('data/snli/glove.pkl') embed_weights = np.concatenate((glove_weights, embed_weights), axis=1) else: _, embed_weights = io.load('data/snli/glove.pkl') embedding = nn.Embedding(embed_weights.shape[0], embed_weights.shape[1], padding_idx=0) embedding.weight.data.copy_(torch.from_numpy(embed_weights)) return vocab, embedding
def load_yml_config(root, bash_file): yml_filename = osp.join(root, 'jacinle.yml') if osp.isfile(yml_filename): logger.critical('Loading jacinle config: {}.'.format( osp.abspath(yml_filename))) config = io.load(yml_filename) load_vendors(root, config, bash_file)
def load_weights(model, filename, include=None, exclude=None): if osp.isfile(filename): try: weights = io.load(filename) # Hack for checkpoint. if 'model' in weights and 'optimizer' in weights: weights = weights['model'] matcher = IENameMatcher(include, exclude) with matcher: weights = {k: v for k, v in weights.items() if matcher.match(k)} stat = matcher.get_last_stat() if len(stat[1]) > 0: logger.critical('Weights {}: {}.'.format(stat[0], ', '.join(sorted(list(stat[1]))))) # Build the tensors. for k, v in weights.items(): if isinstance(v, np.ndarray): weights[k] = torch.from_numpy(v) try: if isinstance(model, nn.DataParallel): model = model.module load_state_dict(model, weights) except KeyError as e: logger.warning('Unexpected or missing weights found: {}.'.format(str(e))) logger.critical('Weights loaded: {}.'.format(filename)) return True except Exception: logger.exception('Error occurred when load weights {}.'.format(filename)) else: logger.warning('No weights file found at specified position: {}.'.format(filename)) return None
def load_weights(model, filename, include=None, exclude=None, return_raw=True): if osp.isfile(filename): try: raw = weights = io.load(filename) # Hack for checkpoint. if 'model' in weights and 'optimizer' in weights: weights = weights['model'] try: load_state_dict(model, weights, include=include, exclude=exclude) except KeyError as e: logger.warning('Unexpected or missing weights found:\n' + e.args[0]) logger.critical('Weights loaded: {}.'.format(filename)) if return_raw: return raw return True except Exception: logger.exception( 'Error occurred when load weights {}.'.format(filename)) else: logger.warning( 'No weights file found at specified position: {}.'.format( filename)) return None
def load_weights(model, filename): if osp.isfile(filename): try: weights = io.load(filename) # Hack for checkpoint. if 'model' in weights and 'optimizer' in weights: weights = weights['model'] # Build the tensors. for k, v in weights.items(): if isinstance(v, np.ndarray): weights[k] = torch.from_numpy(v) try: if isinstance(model, nn.DataParallel): model = model.module load_state_dict(model, weights) except KeyError as e: logger.warning( 'Unexpected or missing weights found: {}.'.format(str(e))) logger.critical('Weights loaded: {}.'.format(filename)) return True except Exception: logger.exception( 'Error occurred when load weights {}.'.format(filename)) else: logger.warning( 'No weights file found at specified position: {}.'.format( filename)) return None
def main(): scenes = io.load_json(args.scene_json)['scenes'] preds = io.load(args.preds_json) if isinstance(preds, dict): preds = list(preds.values()) if False: preds = [transpose_scene(s) for s in preds] # flattened_objs = [o for s in scenes for o in s['objects']] # flattened_preds = { # k: np.concatenate([np.array(p[k]) for p in preds], axis=0) # for k in preds[0] # } meter = GroupMeters() ''' for i, scene in tqdm_gofor(scenes, mininterval=0.5): for j in range(len(scene['objects'])): test(j, scene['objects'], preds[i], meter) ''' for i, pred in tqdm_gofor(preds, mininterval=0.5): scene = scenes[i] for j in range(len(scene['objects'])): test(j, scene['objects'], pred, meter) print(meter.format_simple('Results:', compressed=False))
def main(): scenes = io.load_json(args.scene_json)['scenes'] preds = io.load(args.preds_json) if isinstance(preds, dict): preds = list(preds.values()) if False: preds = [transpose_scene(s) for s in preds] meter = GroupMeters() flattened_objs = [o for s in scenes for o in s['objects']] flattened_preds = { k: np.concatenate([np.array(p[k]) for p in preds], axis=0) for k in preds[0] } for k, preds in flattened_preds.items(): kk = def_.word2lemma.get(k, k) for i, o in tqdm_gofor(flattened_objs, desc='{}(lemma: {})'.format(k, kk), leave=False): meter.update( 'acc', (preds[i] > 0) == (kk == o[def_.concept2attribute[kk]])) meter.update( f'acc/{k}', (preds[i] > 0) == (kk == o[def_.concept2attribute[kk]])) print(meter.format_simple('Results:', compressed=False))
def wrapped_func(*args, **kwargs): if not force_update and osp.exists(filename): cached_value = io.load(filename) if cached_value is not None: return cached_value computed_value = func(*args, **kwargs) io.dump(filename, computed_value) return computed_value
def load_files(self): print('Loading captions and precomputed image embeddings') self.image_embeddings = io.load(self.image_embeddings) self.captions = list(open(self.captions)) image_list_dup = list() image_list = list(open(self.image_list)) assert len(image_list) * 5 == len(self.captions) for img in image_list: for i in range(5): image_list_dup.append(img) self.image_list = image_list_dup
def wrapped_func(*args, **kwargs): if not force_update and osp.exists(filename): if verbose: logger.info('Using cached results from "{}".'.format(filename)) cached_value = io.load(filename) if cached_value is not None: return cached_value computed_value = func(*args, **kwargs) if verbose: logger.info('Writing result cache to "{}".'.format(filename)) io.dump(filename, computed_value) return computed_value
def main(): logger.critical('Loading the dataset.') data = io.load(args.caption) # Step 1: filter out images. images = {c['image_id'] for c in data['annotations']} # Step 2: build a reverse mapping for images. id2image = {i['id']: i for i in data['images']} images = [id2image[i] for i in images] import torchvision.transforms as T image_transform = T.Compose([ T.Resize((args.image_size, args.image_size)), T.ToTensor(), T.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]) dataset = COCOImageDataset(images, args.image_root, image_transform) logger.critical('Building the model.') model = FeatureExtractor() if args.use_gpu: model.cuda() if args.gpu_parallel: from jactorch.parallel import JacDataParallel model = JacDataParallel(model, device_ids=args.gpus).cuda() cudnn.benchmark = True model.eval() dataloader = dataset.make_dataloader(args.batch_size, shuffle=False, drop_last=False, nr_workers=args.data_workers) output_file = io.open_h5(args.output, 'w') writer = AsyncWriter(output_file, total_size=len(dataset)) for feed_dict in tqdm(dataloader, total=len(dataloader), desc='Extracting features'): if args.use_gpu: feed_dict = async_copy_to(feed_dict, 0) with torch.no_grad(): output_dict = model(feed_dict) writer.feed(output_dict) writer.join() output_file.close() io.dump(args.output_images_json, images)
def main(): io.set_fs_verbose(True) concepts = io.load('./gqa_concepts.json') synonyms = dict() for dataset, word2freq in concepts.items(): this_synonyms = dict() for word in word2freq: syn, ant = get_synonyms_and_antonyms(word) if len(syn) > 1: this_synonyms[word] = syn synonyms[dataset] = this_synonyms io.dump('./gqa_synonyms.json', synonyms, compressed=False)
def load_files(self): print('Loading captions and precomputed image embeddings') self.image_embeddings = load(self.image_embeddings) self.captions = load_json(self.captions) assert len(self.captions) == len(self.image_embeddings) # self.captions = [self.captions[i] for i in non_empty_inds] # self.image_embeddings = self.image_embeddings[non_empty_inds] if self.mode is CompletionDatasetMode.SAMPLE: self.non_empty_inds = [ i for i, c in enumerate(self.captions) if len(c['replace']) > 0 ] else: if self.mode is CompletionDatasetMode.ALL: replace = lambda c: c['replace'] elif self.mode is CompletionDatasetMode.NOUN: replace = lambda c: c['replace_noun'] elif self.mode is CompletionDatasetMode.PREP: replace = lambda c: c['replace_prep'] self.all_inds = [(i, r) for i, c in enumerate(self.captions) for r in replace(c)]
def main(): scenes = io.load_json(args.scene_json)['scenes'] preds = io.load(args.preds_json) if isinstance(preds, dict): preds = list(preds.values()) if False: preds = [transpose_scene(s) for s in preds] scenes = scenes[:1000] preds = preds[:1000] flattened_objs = [o for s in scenes for o in s['objects']] flattened_preds = { k: np.concatenate([np.array(p[k]) for p in preds], axis=0) for k in preds[0] } meter = GroupMeters() for i, obj in tqdm_gofor(flattened_objs, mininterval=0.5): test(i, flattened_objs, flattened_preds, meter) print(meter.format_simple('Results:', compressed=False))
def from_pickle(cls, path): vocab = io.load(path) return cls(sync=vocab)
def main(): for i, filename in enumerate(args.filename): globals()[f'f{i + 1}'] = io.load(filename) from IPython import embed embed()