示例#1
0
def batchop(datapoints,
            VOCAB,
            GENDER,
            config,
            for_prediction=False,
            *args,
            **kwargs):
    indices = [d.id for d in datapoints]
    in_sequence = []
    out_sequence = []

    gender = []
    for d in datapoints:
        gender.append(GENDER[d.gender])
        in_sequence.append([VOCAB['GO']] + [VOCAB[w] for w in d.in_sequence] +
                           [VOCAB['EOS']])

        out_sequence.append([VOCAB['GO']] + [VOCAB[w]
                                             for w in d.out_sequence] +
                            [VOCAB['EOS']])

    gender = LongVar(config, gender)
    in_sequence = LongVar(config, pad_seq(in_sequence)).transpose(0, 1)
    out_sequence = LongVar(config, pad_seq(out_sequence)).transpose(0, 1)

    #print(list(i.size() for i in [gender, in_sequence, out_sequence]))
    batch = indices, (gender, in_sequence), (out_sequence)

    return batch
示例#2
0
def batchop2(datapoints,
             VOCAB,
             GENDER,
             config,
             for_prediction=False,
             *args,
             **kwargs):
    indices = [d.id for d in datapoints]
    in_sequence = []

    if for_prediction:
        out_sequence = []

    gender = []
    for d in datapoints:
        gender.append(GENDER[d.gender])
        in_sequence.append([VOCAB['GO']] + [VOCAB[w] for w in d.in_sequence] +
                           [VOCAB['EOS']])

        if for_prediction:
            out_sequence.append([VOCAB['GO']] +
                                [VOCAB[w]
                                 for w in d.out_sequence] + [VOCAB['EOS']])

    gender = LongVar(config, gender)
    in_sequence = LongVar(config, pad_seq(in_sequence)).transpose(0, 1)
    if for_prediction:
        out_sequence = LongVar(config, pad_seq(out_sequence)).transpose(0, 1)

    if for_prediction:
        batch = indices, (gender, in_sequence), (out_sequence)
    else:
        batch = indices, (gender, in_sequence), ()

    return batch
示例#3
0
def predict_batchop(datapoints, VOCAB, LABELS, config, *args, **kwargs):
    indices = [d.id for d in datapoints]
    story = []
    question = []

    for d in datapoints:
        story.append([VOCAB[w] for w in d.story])
        question.append([VOCAB[w] for w in d.q])

    story = LongVar(config, pad_seq(story))
    question = LongVar(config, pad_seq(question))

    batch = indices, (story, question), ()
    return batch
示例#4
0
def batchop(datapoints, VOCAB, LABELS, *args, **kwargs):
    indices = [d.id for d in datapoints]
    story = []
    question = []
    answer = []

    for d in datapoints:
        story.append([VOCAB[w] for w in d.story])
        question.append([VOCAB[w] for w in d.q])
        answer.append(LABELS[d.a])

    story = LongVar(pad_seq(story))
    question = LongVar(pad_seq(question))
    answer = LongVar(answer)

    batch = indices, (story, question), (answer)
    return batch
def plot_accuracies(
        epoch_limit,
        min_epoch_count,
        max_epoch_count,
        accuracies,
        plot_title='Combined Accuracy',
        plot_filepath='combined_accuracy_heatmap.png',
        labels={},
        y_offsets={},
        ylabel='Accuracy',
        xlabel='Epoch',
        ylim=(0, 1),
        moving_avg=0,
):
    fig, ax = plt.subplots(1, 1, figsize=(6, 6))

    ax.spines['top'].set_visible(False)
    ax.spines['bottom'].set_visible(False)
    ax.spines['right'].set_visible(False)
    ax.spines['left'].set_visible(False)

    ax.get_xaxis().tick_bottom()
    ax.get_yaxis().tick_left()

    #fig.subplots_adjust(left=.06, right=.75, bottom=.02, top=.94)
    ax.xaxis.set_major_locator(MaxNLocator(integer=True))

    for i, (task_name, acc) in enumerate(accuracies):
        p = np.asarray(pad_seq(acc)).mean(axis=0)
        log.debug('p.shape: {}'.format(p.shape))
        if moving_avg:
            p = calc_moving_avg(p, moving_avg)

        line = plt.plot(p, lw=2.5, color=colors[i + 1], label=task_name)
        plt.legend(loc='lower right')

        # Add a text label to the right end of every line. Most of the code below
        # is adding specific offsets y position because some labels overlapped.
        y_pos = acc[-1]  #- 0.5

        # Again, make sure that all labels are large enough to be easily read
        # by the viewer.
        task_name = os.path.basename(task_name)

        if task_name in y_offsets:
            y_pos += y_offsets[task_name]

        if task_name in labels:
            task_name = labels[task_name]

    fig.suptitle(plot_title, fontsize=18, ha='center')

    if ylim:
        plt.ylim(*ylim)
    plt.ylabel(ylabel)
    plt.xlabel(xlabel)
    plt.savefig(plot_filepath, bbox_inches='tight')
    plt.show()
示例#6
0
def batchop(datapoints, WORD2INDEX, *args, **kwargs):
    indices = [d.id for d in datapoints]
    story = []
    question = []
    answer = []
    extvocab_story = []
    extvocab_answer = []
    
    def build_oov(d, WORD2INDEX):
        oov = [w for w in d.story + d.q + d.a if WORD2INDEX[w] == UNK]
        oov = list(set(oov))
        return oov
        
    UNK = WORD2INDEX['UNK']
    extvocab_size = 0
    for d in datapoints:
        story.append([WORD2INDEX[w] for w in d.story] + [WORD2INDEX['EOS']])
        question.append([WORD2INDEX[w] for w in d.q] + [WORD2INDEX['EOS']])
        
        answer.append([WORD2INDEX[w] for w in d.a] + [WORD2INDEX['EOS']])

        oov = build_oov(d, WORD2INDEX)
        extvocab_story.append(
            [ oov.index(w) + len(WORD2INDEX) if WORD2INDEX[w] == UNK else WORD2INDEX[w]
              for w in d.story] + [WORD2INDEX['EOS']]
        )
        
        extvocab_answer.append(
            [ oov.index(w) + len(WORD2INDEX) if WORD2INDEX[w] == UNK else WORD2INDEX[w]
              for w in d.a] + [WORD2INDEX['EOS']]
        )

        extvocab_size = max(extvocab_size, len(oov))
        
        
    story  = LongVar(pad_seq(story))
    question = LongVar(pad_seq(question))
    answer   = LongVar(pad_seq(answer))
    extvocab_answer   = LongVar(pad_seq(extvocab_answer))
    extvocab_story = LongVar(pad_seq(extvocab_story))
    
    batch = indices, (story, question), (answer, extvocab_story, extvocab_answer, extvocab_size)
    return batch
示例#7
0
def batchop(datapoints, VOCAB, config, *args, **kwargs):
    indices = [d.id for d in datapoints]
    sequence = []
    for d in datapoints:
        s = []
        sequence.append([VOCAB[w] for w in d.sequence])

    sequence    = LongVar(config, pad_seq(sequence))
    batch = indices, (sequence, ), ()
    return batch
示例#8
0
def batchop(datapoints, VOCAB, GENDER, config, *args, **kwargs):
    indices = [d.id for d in datapoints]
    in_sequence = []
    out_sequence = []
    gender = []
    for d in datapoints:
        in_sequence.append([VOCAB['GO']] + [VOCAB[w] for w in d.in_sequence] +
                           [VOCAB['EOS']])

        out_sequence.append([VOCAB['GO']] + [VOCAB[w]
                                             for w in d.out_sequence] +
                            [VOCAB['EOS']])

        gender.append(GENDER[d.gender])

    in_sequence = LongVar(config, pad_seq(in_sequence)).transpose(0, 1)
    out_sequence = LongVar(config, pad_seq(out_sequence)).transpose(0, 1)
    gender = LongVar(config, gender)
    batch = indices, (gender, in_sequence), (out_sequence)
    return batch
示例#9
0
def batchop(datapoints, VOCAB, config, *args, **kwargs):
    indices = [d.id for d in datapoints]
    max_len = max([d.max_token_len for d in datapoints])

    word1 = []
    word2 = []
    existence = []
    for d in datapoints:
        w1, w2 = d.pair
        word1.append([VOCAB[i] for i in w1])
        word2.append([VOCAB[i] for i in w2])

        existence.append(d.existence)

    word1 = LongVar(config, pad_seq(word1))
    word2 = LongVar(config, pad_seq(word2))

    existence = LongVar(config, existence)

    batch = indices, (word1, word2), existence
    return batch
示例#10
0
def batchop(datapoints, VOCAB, config, *args, **kwargs):
    indices = [d.id for d in datapoints]
    sequence = []

    for d in datapoints:
        sequence.append([VOCAB[w] for w in d.sequence])

    sequence = LongVar(config, pad_seq(sequence))
    sequence = sequence.transpose(1, 0)
    
    batch = indices, (sequence[:-1]), (sequence[1:])
    return batch
示例#11
0
def batchop(datapoints, WORD2INDEX, *args, **kwargs):
    indices = [d.id for d in datapoints]
    context = []
    question = []
    answer_positions = []
    answer_lengths = []
    for d in datapoints:
        context.append([WORD2INDEX[w]
                        for w in d.context] + [WORD2INDEX['EOS']])
        question.append([WORD2INDEX[w] for w in d.q])

        answer_length = len(d.a_positions) + 1
        answer_positions.append([i for i in d.a_positions] + [len(d.context)])
        answer_lengths.append(answer_length)

    context = pad_seq(context)
    question = pad_seq(question)
    answer_positions = pad_seq(answer_positions)

    batch = indices, (np.array(context), np.array(question),
                      np.array(answer_lengths)), (np.array(answer_positions), )
    return batch
示例#12
0
def batchop(datapoints, VOCAB, GENDER, config, *args, **kwargs):
    indices = [d.id for d in datapoints]
    sequence = []
    gender = []
    for d in datapoints:
        sequence.append([VOCAB['GO']] + [VOCAB[w]
                                         for w in d.sequence] + [VOCAB['EOS']])
        gender.append(GENDER[d.gender])

    sequence = LongVar(config, pad_seq(sequence))
    gender = LongVar(config, gender)
    batch = indices, (gender, sequence), ()
    return batch
示例#13
0
def batchop(datapoints, VOCAB, LABELS, *args, **kwargs):
    indices = [d.id for d in datapoints]
    sentence = []
    label = []

    for d in datapoints:
        sentence.append([VOCAB[w] for w in d.sentence] + [VOCAB['EOS']])
        #sentence.append([VOCAB[w] for w in d.sentence])
        label.append(LABELS[d.label])

    sentence = LongVar(pad_seq(sentence))
    label = LongVar(label)

    batch = indices, (sentence, ), (label, )
    return batch
示例#14
0
def batchop(datapoints, VOCAB, LABELS, config,  for_prediction=False, *args, **kwargs):
    indices = [d.id for d in datapoints]
    sequence = []
    label = []

    for d in datapoints:
        sequence.append([VOCAB[w] for w in d.sequence])

        if not for_prediction:
            label.append(LABELS[d.label])

    sequence = LongVar(config, pad_seq(sequence))
    if not for_prediction:
        label   = LongVar(config, label)

    batch = indices, (sequence, ), (label)
    return batch