Exemplo n.º 1
0
    def test_output_shape(self):
        batsize = 100
        wordembdim = 50
        wordencdim = 20
        innerdim = 40
        datanuments = 77
        vocnumwords = 100
        numchars = 10
        wseqlen = 3
        cseqlen = 5

        m = FBBasicCompositeEncoder(
            wordembdim=wordembdim,
            wordencdim=wordencdim,
            innerdim=innerdim,
            outdim=datanuments,
            numchars=numchars,
            numwords=vocnumwords,
            glovepath="../../../data/glove/miniglove.%dd.txt",
        )

        worddata = np.random.randint(0, vocnumwords, (batsize, wseqlen, 1))
        chardata = np.random.randint(0, numchars, (batsize, wseqlen, cseqlen))
        data = np.concatenate([worddata, chardata], axis=2)

        predshape = m.predict(data).shape
        self.assertEqual(predshape, (batsize, datanuments))
Exemplo n.º 2
0
    def test_output_shape(self):
        batsize = 100
        wordembdim = 50
        wordencdim = 20
        innerdim = 40
        datanuments = 77
        vocnumwords = 100
        numchars = 10
        wseqlen = 3
        cseqlen = 5

        m = FBBasicCompositeEncoder(
            wordembdim=wordembdim,
            wordencdim=wordencdim,
            innerdim=innerdim,
            outdim=datanuments,
            numchars=numchars,
            numwords=vocnumwords,
            glovepath="../../../data/glove/miniglove.%dd.txt",
        )

        worddata = np.random.randint(0, vocnumwords, (batsize, wseqlen, 1))
        chardata = np.random.randint(0, numchars, (batsize, wseqlen, cseqlen))
        data = np.concatenate([worddata, chardata], axis=2)

        predshape = m.predict(data).shape
        self.assertEqual(predshape, (batsize, datanuments))
Exemplo n.º 3
0
def run(
        epochs=100,
        lr=0.5,
        wreg=0.0001,
        numbats=100,
        fblexpath="../../data/freebase/labelsrevlex.map.sample",
        glovepath="../../data/glove/glove.6B.50d.txt",
        fbentdicp="../../data/freebase/entdic.all.map",
        numwords=10,
        numchars=30,
        wordembdim=50,
        wordencdim=100,
        innerdim=300,
        wordoffset=1,
        validinter=3,
        gradnorm=1.0,
        validsplit=100,
    ):
    tt = ticktock("fblextransrun")

    traindata, golddata, vocnuments, vocnumwords, datanuments = \
        loadlexdata(glovepath, fbentdicp, fblexpath, wordoffset, numwords, numchars)

    tt.tock("made data").tick()

    # define model
    m = FBBasicCompositeEncoder(
        wordembdim=wordembdim,
        wordencdim=wordencdim,
        innerdim=innerdim,
        outdim=datanuments,
        numchars=128,               # ASCII
        numwords=vocnumwords,
    )

    #wenc = WordEncoderPlusGlove(numchars=numchars, numwords=vocnumwords, encdim=wordencdim, embdim=wordembdim)

    # train model   TODO
    tt.tick("training")
    m.train([traindata], golddata).adagrad(lr=lr).grad_total_norm(gradnorm).cross_entropy()\
        .autovalidate(splits=validsplit, random=True).validinter(validinter).accuracy()\
        .train(numbats, epochs)
    #embed()
    tt.tock("trained").tick("predicting")
    print m.predict(traindata).shape
    tt.tock("predicted sample")
Exemplo n.º 4
0
def run(
    epochs=100,
    lr=0.5,
    wreg=0.0001,
    numbats=100,
    fblexpath="../../data/freebase/labelsrevlex.map.sample",
    glovepath="../../data/glove/glove.6B.50d.txt",
    fbentdicp="../../data/freebase/entdic.all.map",
    numwords=10,
    numchars=30,
    wordembdim=50,
    wordencdim=100,
    innerdim=300,
    wordoffset=1,
    validinter=3,
    gradnorm=1.0,
    validsplit=100,
):
    tt = ticktock("fblextransrun")

    traindata, golddata, vocnuments, vocnumwords, datanuments = \
        loadlexdata(glovepath, fbentdicp, fblexpath, wordoffset, numwords, numchars)

    tt.tock("made data").tick()

    # define model
    m = FBBasicCompositeEncoder(
        wordembdim=wordembdim,
        wordencdim=wordencdim,
        innerdim=innerdim,
        outdim=datanuments,
        numchars=128,  # ASCII
        numwords=vocnumwords,
    )

    #wenc = WordEncoderPlusGlove(numchars=numchars, numwords=vocnumwords, encdim=wordencdim, embdim=wordembdim)

    # train model   TODO
    tt.tick("training")
    m.train([traindata], golddata).adagrad(lr=lr).grad_total_norm(gradnorm).cross_entropy()\
        .autovalidate(splits=validsplit, random=True).validinter(validinter).accuracy()\
        .train(numbats, epochs)
    #embed()
    tt.tock("trained").tick("predicting")
    print m.predict(traindata).shape
    tt.tock("predicted sample")