Пример #1
0
def main(path, draw=True):
    with gzip.open("resources/bc3200.pickle.gz") as fin:
        print 'Load Brown clusters for creating features ...'
        bcvocab = load(fin)
    evalparser(path=path, report=False, draw=draw,
               bcvocab=bcvocab,
               withdp=False)
Пример #2
0
def main(path, draw=True):
    with gzip.open("resources/bc3200.pickle.gz") as fin:
        print('Load Brown clusters for creating features ...')
        bcvocab = load(fin)
    evalparser(path=path, report=False, draw=draw,
               bcvocab=bcvocab,
               withdp=False)
Пример #3
0
    print "len(vocab) = {}".format(len(vocab))
    data = Data()
    trnM, trnL = data.loadmatrix(fdata, flabel)
    print "trnM.shape = {}".format(trnM.shape)
    idxlabelmap = reversedict(labelidxmap)
    pm = ParsingModel(vocab=vocab, idxlabelmap=idxlabelmap)
    pm.train(trnM, trnL)
    pm.savemodel("model/parsing-model.pickle.gz")


if __name__ == "__main__":
    bcvocab = None
    ## Use brown clsuters
    with gzip.open("resources/bc3200.pickle.gz") as fin:
        print "Load Brown clusters for creating features ..."
        bcvocab = load(fin)
    ## Create training data
    # createtrndata(path="data/training/", topn=8000, bcvocab=bcvocab)
    ## Train model
    # trainmodel()
    ## Evaluate model on the RST-DT test set
    evalparser(
        path="data/test/",
        report=True,
        bcvocab=bcvocab,
        draw=False,
        withdp=WITHDP,
        fdpvocab="data/resources/word-dict.pickle.gz",
        fprojmat="data/resources/projmat.pickle.gz",
    )
Пример #4
0
    D = load(gzip.open(fvocab))
    vocab, labelidxmap = D['vocab'], D['labelidxmap']
    print 'len(vocab) = {}'.format(len(vocab))
    data = Data()
    trnM, trnL = data.loadmatrix(fdata, flabel)
    print 'trnM.shape = {}'.format(trnM.shape)
    idxlabelmap = reversedict(labelidxmap)
    pm = ParsingModel(vocab=vocab, idxlabelmap=idxlabelmap)
    pm.train(trnM, trnL)
    pm.savemodel("model/parsing-model.pickle.gz")


if __name__ == '__main__':
    bcvocab = None
    ## Use brown clsuters
    with gzip.open("resources/bc3200.pickle.gz") as fin:
        print 'Load Brown clusters for creating features ...'
        bcvocab = load(fin)
    ## Create training data
    # createtrndata(path="data/training/", topn=8000, bcvocab=bcvocab)
    ## Train model
    # trainmodel()
    ## Evaluate model on the RST-DT test set
    evalparser(path="data/test/",
               report=True,
               bcvocab=bcvocab,
               draw=False,
               withdp=WITHDP,
               fdpvocab="data/resources/word-dict.pickle.gz",
               fprojmat="data/resources/projmat.pickle.gz")