Exemplo n.º 1
0
def reward_progressive_match_label(sample, batch, sample_score=None):
    seq_len = util.find_first_min_zero(sample)
    mask, __ = util.masked_full_like(sample,
                                     1,
                                     num_non_padding=seq_len + 1,
                                     dtype=np.int32)
    mask = mask * (seq_len > 0)
    sample, _sample = sample * mask, sample
    label = _label = batch.labels.label
    pad_width = abs(len(sample) - len(label))
    pad_width = ((0, pad_width), (0, 0))
    if len(label) < len(sample):
        label = np.pad(label, pad_width, 'constant', constant_values=0)
    elif len(sample) < len(label):
        sample = np.pad(sample, pad_width, 'constant', constant_values=0)
    diff = np.abs(sample - label)
    match = (diff == 0).astype(np.float32)  # / batch.features.dec_seq_len
    if len(_label) < len(_sample):
        match[len(_label) - 1:, :] = 0
    elif len(_sample) < len(_label):
        match = match[:len(_sample), :]
    avgmatch = np.sum(match * mask) / np.sum(mask)
    summatch = np.sum(match, axis=0)
    mismatch = np.argmin(match, axis=0)
    mismatch_mask, __ = util.masked_full_like(match,
                                              1,
                                              num_non_padding=mismatch)
    match = match * mismatch_mask
    for ib in range(sample.shape[1]):
        if summatch[ib] > 0 and mismatch[ib] == 0:
            continue
        match[mismatch[ib], ib] = -0.1
    return match, avgmatch
Exemplo n.º 2
0
def reward_match_label(sample, batch, partial_match=True, sample_score=None):
    seq_len = util.find_first_min_zero(sample)
    mask, __ = util.masked_full_like(sample,
                                     1,
                                     num_non_padding=seq_len + 1,
                                     dtype=np.int32)
    mask = mask * (seq_len > 0)
    sample, _sample = sample * mask, sample
    label = _label = batch.labels.label
    pad_width = abs(len(sample) - len(label))
    pad_width = ((0, pad_width), (0, 0))
    if len(label) < len(sample):
        label = np.pad(label, pad_width, 'constant', constant_values=0)
    elif len(sample) < len(label):
        sample = np.pad(sample, pad_width, 'constant', constant_values=0)
    diff = np.abs(sample - label)
    if partial_match:
        match = (diff == 0).astype(np.float32) / (seq_len + 1)
        if len(_label) < len(_sample):
            match[len(_label) - 1:, :] = 0
        elif len(_sample) < len(_label):
            match = match[:len(_sample), :]
    else:
        sumdiff = np.sum(diff, axis=0)
        match = np.zeros_like(sample, dtype=np.float32)
        for ib in range(seq_len.shape[0]):
            if sumdiff[ib] == 0:
                match[seq_len[ib], ib] = 1
    match = match * mask
    avgmatch = np.sum(match) / np.sum(seq_len > 0)
    return match, avgmatch
Exemplo n.º 3
0
def reward_constant(sample, batch, constant=-0.1, sample_score=None):
    # return batch.labels.label_weight, np.mean(batch.labels.label_weight)
    seq_len = util.find_first_min_zero(sample)
    mask, __ = util.masked_full_like(sample,
                                     1,
                                     num_non_padding=seq_len + 1,
                                     dtype=np.int32)
    mask = mask * (seq_len > 0) * constant
    return mask * constant, np.mean(seq_len + 1) * constant
Exemplo n.º 4
0
def reward_global_ngram_stat(sample,
                             batch,
                             global_count,
                             current_count,
                             update_fn,
                             ngram_fn,
                             sample_score=None):
    seq_len = util.find_first_min_zero(sample)
    scores = np.zeros_like(sample, dtype=np.float32)
    batch_ngrams = []
    for ib in range(sample.shape[1]):
        seq = sample[:seq_len[ib], ib]
        ngrams = tuple(ngram_fn(seq))
        for it, ngram in enumerate(ngrams):
            scores[it, ib] = -np.log(
                (current_count[ngram] + 1) / (global_count[ngram] + 1))
        batch_ngrams.append(ngrams)
    update_fn(batch, batch_ngrams)
    return scores, np.sum(scores) / np.sum(seq_len + 1)
Exemplo n.º 5
0
def reward_bleu(sample,
                batch,
                ref_fn,
                reward_incomplete=False,
                sample_score=None):
    seq_len = util.find_first_min_zero(sample) + 1
    scores = np.zeros_like(sample, dtype=np.float32)
    b_refs = ref_fn(batch)
    c = 0
    for ib in range(len(seq_len)):
        step = seq_len[ib]
        refs = b_refs[ib]
        completed = step <= sample.shape[0]
        if refs is not None:
            c += 1
        if refs is not None and (completed or reward_incomplete):
            if not completed:
                step = sample.shape[0]
            scores[step - 1, ib] = util.sentence_bleu(refs, sample[:step, ib])
    return scores, np.sum(scores) / c