예제 #1
0
def state_stack(net, audio):
    from vg.scorer import testing
    with testing(net):
        states_bot = net.SpeechImage.SpeechEncoderBottom.states(audio)
        states_top = net.SpeechImage.SpeechEncoderTop.states(states_bot[-1])
    states = torch.cat([states_bot, states_top], dim=0).permute(1, 2, 0, 3) #batch x length x layer x feature
    return states
예제 #2
0
파일: audio2vecc.py 프로젝트: gchrupala/vgs
def experiment(net, data, prov, model_config, run_config):
    def valid_loss(task):
        result = []
        for item in data.iter_valid_batches():
            args = task.args(item)
            args = [
                torch.autograd.Variable(torch.from_numpy(x),
                                        volatile=True).cuda() for x in args
            ]
            result.append(task.test_cost(*args).data.cpu().numpy())
        return result

    net.cuda()
    net.train()
    scorer = Scorer(
        prov,
        dict(split='val',
             tokenize=lambda x: x['audio'],
             batch_size=data.batch_size))
    net.optimizer.zero_grad()
    last_epoch = 0
    with open("result.json", "w") as out:
        for epoch in range(last_epoch + 1, run_config['epochs'] + 1):
            costs = Counter()
            net.train()
            for _j, item in enumerate(data.iter_train_batches()):
                j = _j + 1
                name = "Aud"
                task = net
                spk = item['speaker'][0] if len(set(
                    item['speaker'])) == 1 else 'MIXED'
                args = task.args(item)
                args = [
                    torch.autograd.Variable(torch.from_numpy(x)).cuda()
                    for x in args
                ]
                loss = task.optimizer.step(lambda: task.step(*args))
                costs += Counter({'cost': loss.data[0], 'N': 1})
                print(epoch, j, j * data.batch_size, name, spk, "train",
                      "".join([str(costs['cost'] / costs['N'])]))
                if j % run_config['validate_period'] == 0:
                    print(epoch, j, 0, name, spk, "valid",
                          "".join([str(numpy.mean(valid_loss(task)))]))
                sys.stdout.flush()
            torch.save(net, "model.{}.pkl".format(epoch))
            with testing(net):
                result = dict(epoch=epoch,
                              rsa=scorer.rsa_image(net),
                              para=scorer.retrieval_para(net))
                if run_config.get('speaker_id', True):
                    result['speaker_id'] = scorer.speaker_id(net)
                out.write(json.dumps(result))
                out.write("\n")
                out.flush()
    torch.save(net, "model.pkl")
예제 #3
0
def valid_loss(net, name, task, data):
    result = []
    with testing(net):  #net.eval()
        for item in data[name].iter_valid_batches():
            args = task.args(item)
            args = [
                torch.autograd.Variable(torch.from_numpy(x)).cuda()
                for x in args
            ]
            result.append(task.test_cost(*args).data.cpu().numpy())
    return result
예제 #4
0
 def predict_beam(self, audio, audio_len, beam_size):
     with testing(self):
         out = self.SpeechEncoderBottom(audio, audio_len)
         preds = self.TextDecoder.beam_search(out, beam_size)
         pred_chars = []
         for i_seq in range(preds.shape[0]):
             seq = preds[i_seq]
             i_eos = (seq == self.mapper.END_ID).nonzero()[0]
             i_last = i_eos[0] if i_eos.shape[0] > 0 else seq.shape[0]
             chars = [
                 self.mapper.ids.from_id(id.item()) for id in seq[:i_last]
             ]
             pred_chars.append(''.join(chars))
     return pred_chars
예제 #5
0
 def predict(self, audio):
     with testing(self):
         rep = self.SpeechImage.SpeechEncoderTop(
             self.SpeechImage.SpeechEncoderBottom(audio))
     return rep
예제 #6
0
 def encode_images(self, images):
     with testing(self):
         rep = self.SpeechImage.ImageEncoder(images)
     return rep
예제 #7
0
 def predict(self, text):
     with testing(self):
         rep = self.TextEncoderTop(self.TextEncoderBottom(text))
     return rep
예제 #8
0
 def test_cost(self, *args):
     with testing(self):
         return self.cost(*args)
예제 #9
0
 def predict(self, audio, audio_len):
     with testing(self):
         logits, _ = self.forward(audio, audio_len)
     return self.logits2pred(logits.detach().cpu())
예제 #10
0
파일: audio2vecc.py 프로젝트: gchrupala/vgs
 def predict(self, speech):
     with testing(self):
         return self(speech)
예제 #11
0
 def predict(self, speech):
     with testing(self):
         self.eval()
         pred = self(speech)
     return pred
예제 #12
0
 def test_cost(self, beg, end):
     with testing(self):
         self.eval()
         cost = self.cost(beg, end)
     return cost
예제 #13
0
def experiment(net, data, run_config):
    def valid_loss(name, task):
        result = []
        for item in data[name].iter_valid_batches():
            args = task.args(item)
            args = [
                torch.autograd.Variable(torch.from_numpy(x),
                                        volatile=True).cuda() for x in args
            ]
            result.append(
                [x.data.cpu().numpy() for x in task.test_cost(*args)])
        return result

    net.cuda()
    net.train()
    scorer = Scorer(
        data['Image'].provider,
        dict(split='val',
             tokenize=lambda x: x['audio'],
             batch_size=data['Image'].batch_size))
    last_epoch = 0
    #tasks = [('Text', net.Text), ('Image', net.Image)]
    for _, task in run_config['tasks']:
        task.optimizer.zero_grad()

    with open("result.json", "w") as out:
        for epoch in range(last_epoch + 1, run_config['epochs'] + 1):
            costs = dict(Text=Counter(), Image=Counter())

            for _j, items in enumerate(
                    zip(data['Image'].iter_train_batches(reshuffle=True),
                        data['Text'].iter_train_batches(reshuffle=True))):
                j = _j + 1
                item = dict(Image=items[0], Text=items[1])
                for name, task in run_config['tasks']:
                    spk = item[name]['speaker'][0] if len(
                        set(item[name]['speaker'])) == 1 else 'MIXED'
                    args = task.args(item[name])
                    args = [
                        torch.autograd.Variable(torch.from_numpy(x)).cuda()
                        for x in args
                    ]

                    loss = task.cost(*args)
                    task.optimizer.zero_grad()
                    loss.backward()
                    _ = nn.utils.clip_grad_norm(task.parameters(),
                                                task.config['max_norm'])
                    task.optimizer.step()
                    costs[name] += Counter({'cost': loss.data[0], 'N': 1})
                    print(
                        epoch, j, j * data[name].batch_size, name, spk,
                        "train",
                        "".join([str(costs[name]['cost'] / costs[name]['N'])]))

                    if j % run_config['validate_period'] == 0:
                        loss = valid_loss(name, task)
                        print(epoch, j, 0, name, "VALID", "valid",
                              "".join([str(numpy.mean(loss))]))
#                   with testing(net):
#                     result = dict(epoch=epoch, rsa=scorer.rsa_image(net),
#                                       retrieval=scorer.retrieval(net),
#                                       speaker_id=scorer.speaker_id(net))
#                     out.write(json.dumps(result))
#                     out.write("\n")
#                     out.flush()

                    sys.stdout.flush()
            torch.save(net, "model.{}.pkl".format(epoch))
            #       loaded = S.load("model.{}.pkl".format(epoch))
            with testing(net):
                result = dict(epoch=epoch,
                              rsa=scorer.rsa_image(net),
                              retrieval=scorer.retrieval(net),
                              speaker_id=scorer.speaker_id(net))
                out.write(json.dumps(result))
                out.write("\n")
                out.flush()

    torch.save(net, "model.pkl")
예제 #14
0
 def predict(self, audio):
     with testing(self):
         rep = self.Image.SpeechEncoder(audio)
     return rep
예제 #15
0
def layer_states(net, audio):
    from vg.scorer import testing
    with testing(net):
        states = net.SpeechImage.SpeechEncoderBottom(audio)
    return states
예제 #16
0
def experiment(net, data, run_config):
    net.cuda()
    net.train()
    scorer = run_config['Scorer']
    last_epoch = 0
    result_fpath = "result.json"
    model_fpath_tmpl = "model.{}.pkl"
    model_fpath = "model.pkl"
    if run_config['debug']:
        wdump_fpath = "weights.csv"
    if run_config['save_path'] is not None:
        result_fpath = os.path.join(run_config['save_path'], result_fpath)
        model_fpath_tmpl = os.path.join(run_config['save_path'],
                                        model_fpath_tmpl)
        model_fpath = os.path.join(run_config['save_path'], model_fpath)
        if run_config['debug']:
            wdump_fpath = os.path.join(run_config['save_path'], wdump_fpath)
            wdump = open(wdump_fpath, "w")

    for _, task in run_config['tasks']:
        task.optimizer.zero_grad()

    with open(result_fpath, "w") as out:
        if run_config['debug']:
            t = time.time()
        best_wer = None
        for epoch in range(last_epoch + 1, run_config['epochs'] + 1):
            # FIXME: avoid end of epoch with small batch?
            for _j, item in enumerate(data.iter_train_batches(reshuffle=True)):
                j = _j + 1
                for name, task in run_config['tasks']:
                    spkr = item['speaker']
                    spkr = spkr[0] if len(set(spkr)) == 1 else 'MIXED'
                    args = task.args(item)
                    args = [torch.from_numpy(x).cuda() for x in args]

                    loss = task.cost(*args)

                    task.optimizer.zero_grad()
                    loss.backward()
                    nn.utils.clip_grad_norm_(task.parameters(),
                                             task.config['max_norm'])
                    task.optimizer.step()

                    print(epoch, j, j * data.batch_size, spkr, "train",
                          str(loss.item()))

                    if j % run_config['validate_period'] == 0:
                        loss = valid_loss(net, task, data)
                        print(epoch, j, 0, "VALID", "valid",
                              str(np.mean(loss)))
                        # Dump weights for debugging
                        if run_config['debug']:
                            weights = [
                                str(p.view(-1)[0].item())
                                for p in task.parameters()
                            ]
                            wdump.write(",".join(weights))
                            wdump.write("\n")
                            wdump.flush()

                    sys.stdout.flush()
            #torch.save(net, model_fpath_tmpl.format(epoch))

            if run_config['debug']:
                t2 = time.time()
                print("Elapsed time: {:3f}".format(t2 - t))
                t = t2
            with testing(net):
                scorer.set_net(net)
                result = dict(epoch=epoch, cer=scorer.cer(), wer=scorer.wer())
                cer = result['cer']['CER']
                wer = result['wer']['WER']
                print(epoch, j, 0, "CER", "valid", cer, "WER", "valid", wer)
                out.write(json.dumps(result))
                out.write("\n")
                out.flush()
                # Save best model
                if best_wer is None or wer < best_wer:
                    torch.save(net.state_dict(), model_fpath)
                    best_wer = wer
                else:
                    net.load_state_dict(torch.load(model_fpath))
                    if 'epsilon_decay' in run_config.keys():
                        for p in net.SpeechTranscriber.optimizer.param_groups:
                            p["eps"] *= run_config['epsilon_decay']
                            print('Epsilon decay - new value: ', p["eps"])
            if run_config['debug']:
                t2 = time.time()
                print("Elapsed time: {:3f}".format(t2 - t))
                t = t2

    if run_config['debug']:
        wdump.close()

    # Save full model for inference
    torch.save(net, model_fpath)
예제 #17
0
def experiment(net, data, run_config):

    net.cuda()
    net.train()
    scorer = run_config['Scorer']
    last_epoch = 0

    for _, task in run_config['tasks']:
        task.optimizer.zero_grad()

    with open("result.json", "w") as out:
        for epoch in range(last_epoch + 1, run_config['epochs'] + 1):
            costs = dict(SpeechText=Counter(),
                         SpeechCorrText=Counter(),
                         SpeechImage=Counter(),
                         TextImage=Counter())

            for _j, items in enumerate(
                    zip(
                        data['SpeechImage'].iter_train_batches(reshuffle=True),
                        data['SpeechText'].iter_train_batches(reshuffle=True),
                        data['SpeechCorrText'].iter_train_batches(
                            reshuffle=True),
                        data['TextImage'].iter_train_batches(reshuffle=True))):
                j = _j + 1
                item = dict(SpeechImage=items[0],
                            SpeechText=items[1],
                            SpeechCorrText=items[2],
                            TextImage=items[3])
                for name, task in run_config['tasks']:
                    spk = item[name]['speaker'][0] if len(
                        set(item[name]['speaker'])) == 1 else 'MIXED'
                    args = task.args(item[name])
                    args = [torch.from_numpy(x).cuda() for x in args]

                    loss = task.cost(*args)

                    task.optimizer.zero_grad()
                    loss.backward()
                    nn.utils.clip_grad_norm_(task.parameters(),
                                             task.config['max_norm'])
                    task.optimizer.step()
                    costs[name] += Counter({'cost': loss.data.item(), 'N': 1})
                    print(
                        epoch, j, j * data[name].batch_size, name, spk,
                        "train",
                        "".join([str(costs[name]['cost'] / costs[name]['N'])]))

                    if j % run_config['validate_period'] == 0:
                        loss = valid_loss(net, name, task, data)
                        print(epoch, j, 0, name, "VALID", "valid",
                              "".join([str(numpy.mean(loss))]))

                    sys.stdout.flush()
            torch.save(net, "model.{}.pkl".format(epoch))

            with testing(net):
                result = dict(epoch=epoch,
                              rsa=scorer.rsa_image(net),
                              retrieval=scorer.retrieval(net),
                              speaker_id=scorer.speaker_id(net))
                out.write(json.dumps(result))
                out.write("\n")
                out.flush()

    torch.save(net, "model.pkl")
예제 #18
0
파일: audio2vecc.py 프로젝트: gchrupala/vgs
 def test_cost(self, speech1_prev, speech1, speech2, speech3_prev, speech3):
     with testing(self):
         self.eval()
         rep = self(speech2)
         return self.cost(speech1_prev, speech1, rep, speech3_prev, speech3)