Пример #1
0
def build_bidir_model(inp,
                      inp_mask,
                      tparams,
                      options,
                      sfx=None,
                      nsteps=None,
                      use_dropout=False,
                      use_noise=None,
                      truncate=None,
                      name=None):

    if use_dropout:
        assert use_noise is not None

    assert name is not None
    assert sfx is not None

    #inpr = inp[::-1]
    inpr_mask = inp_mask[::-1]

    n_timesteps = inp.shape[0]
    n_samples = inp.shape[1]

    emb = dot(inp, tparams['Wemb_%s' % sfx])
    emb = emb.reshape([n_timesteps, n_samples, -1])

    if use_dropout:
        emb = dropout_layer(emb, use_noise,
                            p=options['dropout_rate'])

    """
    Forward RNN
    """
    proj = get_layer(options[name])[1](tparams=tparams,
                                       state_below=emb,
                                       options=options,
                                       prefix=name,
                                       nsteps=nsteps,
                                       truncate=truncate,
                                       mask=inp_mask)

    """
    Reverse RNN.
    """
    #embr = dot(inpr, tparams['Wemb_%s' % sfx])
    embr = emb[::-1]#embr.reshape([n_timesteps, n_samples, -1])
    projr = get_layer(options[name])[1](tparams=tparams,
                                        state_below=embr,
                                        options=options,
                                        prefix=name + "_r",
                                        nsteps=nsteps,
                                        truncate=truncate,
                                        mask=inpr_mask)
    return proj, projr
Пример #2
0
def build_model(tparams,
                options,
                prepare_data_fn,
                valid=None,
                cost_mask=None):

    opt_ret = dict()

    trng = RandomStreams(1234)
    use_noise = theano.shared(numpy.float32(0.))

    # description string: #words x #samples, description:
    if options['use_sent_reps']:
        x = tensor.tensor3('desc', dtype='uint32')
        word_mask = tensor.tensor3('desc_mask', dtype='float32')
        sent_mask = tensor.cast(word_mask.sum(0) > 0, "float32")
        slen = tensor.scalar('slen', dtype='uint32')
    else:
        x = tensor.matrix('desc', dtype="uint32")
        word_mask = tensor.matrix('desc_mask', dtype='float32')

    q = tensor.matrix('q', dtype="uint32")
    q_mask = tensor.matrix('q_mask', dtype="float32")
    y = tensor.vector('ans', dtype='uint32')
    em = tensor.matrix('entity_mask', dtype="float32")

    wlen = tensor.scalar('wlen', dtype='uint32')
    qlen = tensor.scalar('qlen', dtype='uint32')
    if options['debug']:
        if valid.done:
            valid.reset()

        valid_d = next(valid)
        d_, q_, a_, em_ = valid_d[0], valid_d[1], valid_d[2], valid_d[3]

        if options['use_sent_reps']:
            d_, d_mask_, q_, q_mask_, wlen_, slen_, qlen_ = prepare_data_fn(d_, q_)
        else:
            d_, d_mask_, q_, q_mask_, wlen_, qlen_ = prepare_data_fn(d_, q_)

        print "Debugging is enabled."

        theano.config.compute_test_value = 'warn'
        x.tag.test_value = numpy.array(d_).astype("uint32")
        word_mask.tag.test_value = numpy.array(d_mask_).astype("float32")
        q.tag.test_value = numpy.array(q_).astype("uint32")
        q_mask.tag.test_value = numpy.array(q_mask_).astype("float32")
        y.tag.test_value = numpy.array(a_).astype("uint32")
        em.tag.test_value = numpy.array(em_).astype("float32")
        wlen.tag.test_value = numpy.array(wlen_).astype("uint32")
        qlen.tag.test_value = numpy.array(qlen_).astype("uint32")

        if options['use_sent_reps']:
            slen.tag.test_value = numpy.array(slen_).astype("uint32")
            sent_mask.tag.test_value = numpy.array(d_mask_.sum(0) > 0, dtype="float32")

    if x.ndim == 3:
        x_rshp = x.reshape((x.shape[0], x.shape[1]*x.shape[2]))
    else:
        x_rshp = x

    """
        Bidirectional for the description.
    """
    if options['use_bidir']:
        proj_wx, proj_wxr = build_bidir_model(x_rshp,
                                              word_mask,
                                              tparams,
                                              options,
                                              sfx="word",
                                              nsteps=wlen,
                                              truncate=options['truncate'],
                                              use_dropout=options['use_dropout'],
                                              use_noise=use_noise,
                                              name="encoder_desc_word")

        desc_wrep = concatenate([proj_wx[0],
                                proj_wxr[0][::-1]],
                                axis=-1)
    else:
        proj_wx = build_nonbidir_model(x_rshp,
                                       word_mask,
                                       tparams,
                                       options,
                                       sfx="word",
                                       nsteps=wlen,
                                       truncate=options['truncate'],
                                       use_dropout=options['use_dropout'],
                                       use_noise=use_noise,
                                       name="encoder_desc_word")
        desc_wrep = proj_wx

    if options['use_bidir']:
        if options['use_sent_reps']:
            desc_wrep = desc_wrep.reshape((x.shape[0],
                                           x.shape[1],
                                           x.shape[2],
                                           -1))

            mean_desc_wrep = ((desc_wrep * word_mask.dimshuffle(0, 1, 2, 'x')).sum(0) /
                (word_mask.sum(0).dimshuffle(0, 1, 'x') + 1e-8))

            proj_sx, proj_sxr = build_bidir_model(mean_desc_wrep,
                                                  sent_mask,
                                                  tparams,
                                                  options,
                                                  sfx="sent",
                                                  nsteps=slen,
                                                  truncate=options['truncate'],
                                                  name="encoder_desc_sent")

            proj_x, proj_xr = proj_sx, proj_sxr
            desc_mask = sent_mask.dimshuffle(0, 1, 'x')
        else:
            proj_x, proj_xr = proj_wx, proj_wxr
            desc_mask = word_mask.dimshuffle(0, 1, 'x')

        """
        Build question bidir RNN
        """
        proj_q, proj_qr = build_bidir_model(q, q_mask,
                                            tparams,
                                            options, sfx="word",
                                            nsteps=qlen,
                                            truncate=options['truncate'],
                                            use_dropout=options['use_dropout'],
                                            use_noise=use_noise,
                                            name="encoder_q")

        desc_rep = concatenate([proj_x[0],
                                proj_xr[0][::-1]],
                                axis=-1)

        q_rep = concatenate([proj_q[0][-1],
                            proj_qr[0][::-1][0]],
                            axis=-1)

    else:
        if options['use_sent_reps']:
            desc_wrep = desc_wrep.reshape((x.shape[0],
                                           x.shape[1],
                                           x.shape[2],
                                           -1))

            mean_desc_wrep = ((desc_wrep * word_mask.dimshuffle(0, 1, 2, 'x')).sum(0) /
                (word_mask.sum(0).dimshuffle(0, 1, 'x') + 1e-8))

            proj_sx = build_nonbidir_model(mean_desc_wrep,
                                           sent_mask,
                                           tparams,
                                           options,
                                           sfx="sent",
                                           nsteps=slen,
                                           truncate=options['truncate'],
                                           name="encoder_desc_sent")
            proj_x = proj_sx
            desc_mask = sent_mask.dimshuffle(0, 1, 'x')
        else:
            proj_x = proj_wx
            desc_mask = word_mask.dimshuffle(0, 1, 'x')
        """
        Build question bidir RNN
        """
        proj_q = build_nonbidir_model(q, q_mask,
                                            tparams,
                                            options, sfx="word",
                                            nsteps=qlen,
                                            truncate=options['truncate'],
                                            use_dropout=options['use_dropout'],
                                            use_noise=use_noise,
                                            name="encoder_q")

        desc_rep = proj_x
        q_rep = proj_q[-1]

    g_desc_ave = 0.

    if options['use_desc_skip_c_g']:
        desc_mean = (desc_rep * desc_mask).sum(0) / \
                tensor.cast(desc_mask.sum(0), 'float32')

        g_desc_ave = get_layer('ff')[1](tparams,
                                        desc_mean,
                                        options,
                                        prefix='ff_out_mean_d',
                                        use_bias=False,
                                        activ='Linear')

    desc_ctx, alphas = build_attention(tparams,
                                       options,
                                       desc_rep,
                                       sent_mask \
                                               if options['use_sent_reps'] else word_mask,
                                       slen \
                                               if options['use_sent_reps'] else wlen,
                                       q=q_rep)

    opt_ret['dec_alphas'] = alphas
    opt_ret['desc_ctx'] = desc_ctx

    g_ctx = get_layer('ff')[1](tparams,
                               desc_ctx,
                               options,
                               prefix='ff_out_ctx',
                               use_bias=False,
                               activ='Linear')

    g_q = get_layer('ff')[1](tparams,
                             q_rep,
                             options,
                             prefix='ff_out_q',
                             activ='Linear')

    if options['use_elu_g']:
        g_out = ELU(g_ctx + g_q + g_desc_ave)
    else:
        g_out = Tanh(g_ctx + g_q + g_desc_ave)

    if options['use_dropout']:
        g_out = dropout_layer(g_out, use_noise,
                              p=options['dropout_rate'])

    logit = get_layer('ff')[1](tparams,
                               g_out,
                               options,
                               prefix='ff_logit',
                               activ='Linear')

    probs = Softmax(logit)
    hinge_cost = multiclass_hinge_loss(probs, y)

    # compute the cost
    cost, errors, ent_errors, ent_derrors = nll_simple(y,
                                                       probs,
                                                       cost_ent_mask=cost_mask,
                                                       cost_ent_desc_mask=em)
    cost = cost #+ 1e-2 * hinge_cost
    #cost = hinge_cost
    vals = OrderedDict({'desc': x,
                        'word_mask': word_mask,
                        'q': q,
                        'q_mask': q_mask,
                        'ans': y,
                        'wlen': wlen,
                        'ent_mask': em,
                        'qlen': qlen})

    if options['use_sent_reps']:
        vals['slen'] = slen

    return trng, use_noise, vals, opt_ret, \
            cost, errors, ent_errors, ent_derrors, \
            probs