示例#1
0
def encode_sentences(task, audios, batch_size=128):
    return numpy.vstack([
        task.predict(
            torch.autograd.Variable(torch.from_numpy(
                vector_padder(batch))).cuda()).data.cpu().numpy()
        for batch in util.grouper(audios, batch_size)
    ])
示例#2
0
def encode_sentences_SpeechText(task, audios, batch_size=128):                                                       
    def predict(x):                                                                                            
        return task.SpeechText.SpeechEncoderTop(task.SpeechText.SpeechEncoderBottom(x))
    return numpy.vstack([ predict(                                              
                torch.autograd.Variable(torch.from_numpy(
                         vector_padder(batch))).cuda()).data.cpu().numpy()
                               for batch in util.grouper(audios, batch_size) ])
示例#3
0
def embed(net, audios, batch_size=32):
    """Return utterance embeddings for audio using the given net."""
    device = next(net.parameters()).device 
    out = [] 
    for batch in util.grouper(audios, batch_size): 
        for result in net.predict(torch.from_numpy(vector_padder(batch)).to(device)).cpu().numpy(): 
            out.append(result) 
    return np.stack(out)
示例#4
0
def get_attn_weights(speech_transcriber, sentences):
    sent_audio = [get_audio(s) for s in sentences]
    sent_len = [sd.shape[0] for sd in sent_audio]
    v_audio = torch.from_numpy(sd.vector_padder(sent_audio,
                                                pad_end=True)).cuda()
    v_audio_len = torch.from_numpy(numpy.array(sent_len)).cuda()
    _, attn_weights = speech_transcriber(v_audio, v_audio_len)
    return attn_weights.numpy()
示例#5
0
def transcribe(speech_transcriber, sentences):
    sent_audio = [get_audio(s) for s in sentences]
    sent_len = [sd.shape[0] for sd in sent_audio]
    v_audio = torch.from_numpy(sd.vector_padder(sent_audio,
                                                pad_end=True)).cuda()
    v_audio_len = torch.from_numpy(numpy.array(sent_len)).cuda()
    trn = speech_transcriber.predict(v_audio, v_audio_len)
    return trn
示例#6
0
def iter_layer_states(model, audios, batch_size=128):
    """Pass audios through the model and for each audio return the state of each timestep and each layer."""

    lens = (numpy.array(map(len, audios)) +
            model.config['filter_length']) // model.config['stride']
    rs = (r for batch in util.grouper(audios, batch_size)
          for r in model.task.pile(vector_padder(batch)))
    for (r, l) in itertools.izip(rs, lens):
        yield r[-l:, :, :]
示例#7
0
def encode_sentences(model, audios, batch_size=128):
    """Project audios to the joint space using model.

    For each audio returns a vector.
    """
    return numpy.vstack([
        model.task.predict(
            torch.autograd.Variable(torch.from_numpy(
                vector_padder(batch))).cuda()).data.cpu().numpy()
        for batch in util.grouper(audios, batch_size)
    ])
示例#8
0
def get_state_stack(net, audios, batch_size=128):
    """Pass audios through the model and for each audio return the state of each timestep and each layer."""
    device = next(net.parameters()).device
    result = []
    lens = inout(np.array(list(map(len, audios))))
    rs = (r for batch in util.grouper(audios, batch_size) 
                for r in state_stack(net, torch.from_numpy(vector_padder(batch)).to(device)).cpu().numpy()
         )
    for (r,l) in zip(rs, lens):
        result.append(r[-l:,:])
    return result
示例#9
0
def get_state_stack(net, audios, batch_size=128):
    import onion.util as util
    from vg.simple_data import vector_padder
    """Pass audios through the model and for each audio return the state of each timestep and each layer."""
    result = []
    lens = inout(np.array(list(map(len, audios))))
    rs = (r for batch in util.grouper(audios, batch_size) for r in state_stack(
        net,
        torch.from_numpy(vector_padder(batch)).cuda()).cpu().numpy())
    for (r, l) in zip(rs, lens):
        result.append(r[-l:, :])
    return result