def score(args): if args.dataset == 'coco': import vg.vendrov_provider as dp elif args.dataset == 'places': import vg.places_provider as dp elif args.dataset == 'flickr8k': import vg.flickr8k_provider as dp logging.info('Loading data') prov = dp.getDataProvider(args.dataset, root=args.root, audio_kind='mfcc') tokenize = characters if args.text else lambda x: x['audio'] config = dict(split=args.split, tokenize=tokenize, batch_size=args.batch_size) if args.text: config['encode_sentences'] = encode_texts scorer = Scorer(prov, config) output = [] for path in args.model: task = load(path) task.eval().cuda() rsa = scorer.rsa_image(task) para = scorer.retrieval_para(task) result = dict(path=path, rsa=rsa, para=para) if not args.text: result['speaker_id'] = scorer.speaker_id(task) output.append(result) json.dump(output, open(args.output, 'w'), indent=2)
import numpy import vg.simple_data as sd import vg.experiment as E import vg.vendrov_provider as dp import vg.defn.baseline_mfcc as D dataset = 'coco' prov = dp.getDataProvider(dataset, root='../..', audio_kind='mfcc') def audio(sent): return sent['audio'] eval_config = dict(tokenize=audio, split='val', encode_sentences=D.aggregate_mean, para=True) numpy.save("scores.1.npy", D.eval_para(prov, eval_config))
def __init__(self, root='.', truncate=None, load_images=True): self.places = places.getDataProvider('places', root=root, truncate=truncate, load_images=load_images) self.coco = vendrov.getDataProvider('coco', root=root, truncate=truncate, load_images=load_images) self.speakers = self.places.speakers.union(self.coco.speakers)