示例#1
0
def score(args):
    
    if args.dataset == 'coco':
        import vg.vendrov_provider as dp
    elif args.dataset == 'places':
        import vg.places_provider as dp
    elif args.dataset == 'flickr8k':
        import vg.flickr8k_provider as dp
    logging.info('Loading data')
    prov = dp.getDataProvider(args.dataset, root=args.root, audio_kind='mfcc')
    tokenize = characters if args.text else lambda x: x['audio']
    config = dict(split=args.split, tokenize=tokenize, batch_size=args.batch_size)
    if args.text:
        config['encode_sentences'] = encode_texts
    scorer = Scorer(prov, config)
    output = []
    for path in args.model:
        task = load(path)
        task.eval().cuda()
        rsa = scorer.rsa_image(task)
        para = scorer.retrieval_para(task)
        result = dict(path=path, rsa=rsa, para=para)
        if not args.text:
            result['speaker_id'] = scorer.speaker_id(task)
        output.append(result)
    json.dump(output, open(args.output, 'w'), indent=2)
示例#2
0
文件: run.py 项目: gchrupala/vgs
import numpy
import vg.simple_data as sd
import vg.experiment as E
import vg.vendrov_provider as dp
import vg.defn.baseline_mfcc as D
dataset = 'coco'

prov = dp.getDataProvider(dataset, root='../..', audio_kind='mfcc')

def audio(sent):
    return sent['audio']

eval_config = dict(tokenize=audio, split='val', encode_sentences=D.aggregate_mean, para=True)


numpy.save("scores.1.npy", D.eval_para(prov, eval_config))


示例#3
0
 def __init__(self, root='.', truncate=None, load_images=True):
     self.places = places.getDataProvider('places', root=root, truncate=truncate, load_images=load_images)
     self.coco = vendrov.getDataProvider('coco', root=root, truncate=truncate, load_images=load_images)
     self.speakers = self.places.speakers.union(self.coco.speakers)