Пример #1
0
def get_denoised(prob, ctc_bs=False):
    if ctc_bs: # Using ctc beam search before denoising yields only limited improvements a is very slow
        text = get_beam_search(prob)
    else:
        text = get_arg_max(prob)
    src_seq, src_valid_length = encode_char(text)
    src_seq = mx.nd.array([src_seq], ctx=ctx)
    src_valid_length = mx.nd.array(src_valid_length, ctx=ctx)
    encoder_outputs, _ = denoiser.encode(src_seq, valid_length=src_valid_length)
    states = denoiser.decoder.init_state_from_encoder(encoder_outputs, 
                                                      encoder_valid_length=src_valid_length)
    inputs = mx.nd.full(shape=(1,), ctx=src_seq.context, dtype=np.float32, val=BOS)
    output = generator.generate_sequences(inputs, states, text)
    return output.strip()
Пример #2
0
def get_sentence(net, sentence):
    scorer = nlp.model.BeamSearchScorer(alpha=0, K=2, from_logits=False)
    beam_sampler = nlp.model.BeamSearchSampler(beam_size=5,
                                           decoder=net.decode_logprob,
                                           eos_id=EOS,
                                           scorer=scorer,
                                           max_length=150)
    src_seq, src_valid_length = encode_char(sentence)
    src_seq = mx.nd.array([src_seq], ctx=ctx)
    src_valid_length = mx.nd.array(src_valid_length, ctx=ctx)
    encoder_outputs, _ = net.encode(src_seq, valid_length=src_valid_length)
    states = net.decoder.init_state_from_encoder(encoder_outputs, 
                                                      encoder_valid_length=src_valid_length)
    inputs = mx.nd.full(shape=(1,), ctx=src_seq.context, dtype=np.float32, val=BOS)
    samples, scores, valid_lengths = beam_sampler(inputs, states)
    samples = samples[0].asnumpy()
    scores = scores[0].asnumpy()
    valid_lengths = valid_lengths[0].asnumpy()
    return decode_char(samples[0])
    src_seq = mx.nd.array([src_seq], ctx=ctx)
    src_valid_length = mx.nd.array(src_valid_length, ctx=ctx)
    encoder_outputs, _ = denoiser.encode(src_seq,
                                         valid_length=src_valid_length)
    states = denoiser.decoder.init_state_from_encoder(
        encoder_outputs, encoder_valid_length=src_valid_length)
    inputs = mx.nd.full(shape=(1, ),
                        ctx=src_seq.context,
                        dtype=np.float32,
                        val=BOS)
    output = generator.generate_sequences(inputs, states, text)
    return output.strip()


sentence = "This sentnce has an eror"
src_seq, src_valid_length = encode_char(sentence)
src_seq = mx.nd.array([src_seq], ctx=ctx)
src_valid_length = mx.nd.array(src_valid_length, ctx=ctx)
encoder_outputs, _ = denoiser.encode(src_seq, valid_length=src_valid_length)
states = denoiser.decoder.init_state_from_encoder(
    encoder_outputs, encoder_valid_length=src_valid_length)
inputs = mx.nd.full(shape=(1, ),
                    ctx=src_seq.context,
                    dtype=np.float32,
                    val=BOS)
print(sentence)
print("Choice")
print(generator.generate_sequences(inputs, states, sentence))

sentence = "This sentnce has an eror"
src_seq, src_valid_length = encode_char(sentence)
Пример #4
0
def transform(data, label):
    src, src_valid_length = encode_char(data, src=True)
    tgt, tgt_valid_length = encode_char(label, src=False)
    return src, src_valid_length, tgt, tgt_valid_length, data, label