Exemplo n.º 1
0
 def __init__(self, fold):
     self.bestEpoch = None
     self.maxF1 = 0
     self.persistence = ModelPersistence()
     self.patternRegular = Config.Instance().resultsDir + '/fold_' + str(
         fold) + '/model_'
     self.patternEpoch = Config.Instance().resultsDir + '/fold_' + str(
         fold) + '/regular_'
     self.keepBestEpoch = 5
     self.keepFixedPoint = 3
     self.savedByBestEpoch = []
     self.savedForFixedPoint = []
Exemplo n.º 2
0
def readConfig(file='/srl-config.json'):
    config = Config.Instance()
    config.prepare(Utils.getWorkingDirectory())

    modelConfig = ModelConfig.Instance()
    modelConfig.prepare(config.srlConfig + file)
    return config, modelConfig
Exemplo n.º 3
0
def prepareEmbeddings(useWiki=False):
    config = Config.Instance()
    modelConfig = ModelConfig.Instance()

    tokens = extractAllTokens(config.convertedCorpusDir+'/propbank_full.csv')
    if useWiki:
        tokens.update(extractAllTokens(config.convertedCorpusDir+'/wiki.csv'))

    print '{} tokens found'.format(len(tokens))

    predicates = extractAllTokens(config.convertedCorpusDir+'/propbank_full.csv', 'predicate')


    w2vFiles = {
        "npzFile":config.embeddingsDir+"/wordEmbeddings.npy",
        "npzModel":config.embeddingsDir+"/wordEmbeddings",
        "vecFile":__getVecFile(config.embeddingsDir, modelConfig.embeddingSize),
        "w2idxFile":config.embeddingsDir+"/vocabulary.json"
    }

    w2v = W2VModel()
    w2v.setResources(w2vFiles)
    loader = EmbeddingLoader(w2v)
    word2idx, idx2word, weights = loader.process()

    if modelConfig.embeddingType == 'w2v':
        return loader, loader

    sentHybridFiles = {
        "npzFile":config.embeddingsDir+"/sent_hybrid.npy",
        "npzModel":config.embeddingsDir+"/sent_hybrid",
        "w2idxFile":config.embeddingsDir+"/sent_hybrid.json"
    }

    sentHybrid = HybridModel()
    sentHybrid.setResources(sentHybridFiles)
    print 'creating sentence corpus'
    sentHybrid.generateCorpus(tokens, weights, word2idx)
    Hloader = EmbeddingLoader(sentHybrid)
    Hword2idx, Hidx2word, Hweights = Hloader.process()


    predHybridFiles = {
        "npzFile":config.embeddingsDir+"/pred_hybrid.npy",
        "npzModel":config.embeddingsDir+"/pred_hybrid",
        "w2idxFile":config.embeddingsDir+"/pred_hybrid.json"
    }

    predHybrid = HybridModel()
    predHybrid.setResources(predHybridFiles)
    print 'creating predicate corpus'
    predHybrid.generateCorpus(predicates, weights, word2idx)
    Ploader = EmbeddingLoader(predHybrid)
    Pword2idx, Pidx2word, Pweights = Ploader.process()


    return Hloader, Ploader
Exemplo n.º 4
0
def deleteTrainingResources(k_folds=20):
    __delete(Config.Instance().resourceDir + '/feature_file.npy')
    for i in xrange(1, k_folds + 1):
        __delete(Config.Instance().resourceDir + '/feature_file_' + str(i) +
                 '.npy')
    __delete(Config.Instance().resourceDir + '/embeddings/vocabulary.json')
    __delete(Config.Instance().resourceDir + '/embeddings/pred_hybrid.json')
    __delete(Config.Instance().resourceDir + '/embeddings/pred_hybrid.npy')
    __delete(Config.Instance().resourceDir + '/embeddings/sent_hybrid.json')
    __delete(Config.Instance().resourceDir + '/embeddings/sent_hybrid.npy')
    __delete(Config.Instance().resourceDir + '/embeddings/wordEmbeddings.npy')
Exemplo n.º 5
0
def getEmbeddings():
    config = Config.Instance()
    modelConfig = ModelConfig.Instance()

    if modelConfig.embeddingType == 'w2v':
        w2vFiles = {
            "npzFile":config.embeddingsDir+"/wordEmbeddings.npy",
            "npzModel":config.embeddingsDir+"/wordEmbeddings",
            "vecFile":__getVecFile(config.embeddingsDir, modelConfig.embeddingSize),
            "w2idxFile":config.embeddingsDir+"/vocabulary.json"
        }

        w2v = W2VModel()
        w2v.setResources(w2vFiles)
        loader = EmbeddingLoader(w2v)
        word2idx, idx2word, weights = loader.process()
        return loader, loader
    else:
        sentHybridFiles = {
            "npzFile":config.embeddingsDir+"/sent_hybrid.npy",
            "npzModel":config.embeddingsDir+"/sent_hybrid",
            "w2idxFile":config.embeddingsDir+"/sent_hybrid.json"
        }

        sentHybrid = HybridModel()
        sentHybrid.setResources(sentHybridFiles)
        Hloader = EmbeddingLoader(sentHybrid)
        Hword2idx, Hidx2word, Hweights = Hloader.process()


        predHybridFiles = {
            "npzFile":config.embeddingsDir+"/pred_hybrid.npy",
            "npzModel":config.embeddingsDir+"/pred_hybrid",
            "w2idxFile":config.embeddingsDir+"/pred_hybrid.json"
        }

        predHybrid = HybridModel()
        predHybrid.setResources(predHybridFiles)
        Ploader = EmbeddingLoader(predHybrid)
        Pword2idx, Pidx2word, Pweights = Ploader.process()


        return Hloader, Ploader
Exemplo n.º 6
0
from model.configuration import Config
from model.configuration.model_config import ModelConfig
from utils.function_utils import Utils
from utils import extractFeaturesFromSentence, toNNFormat
from embeddings import getEmbeddings
import pandas as pd

print 'loading configuration'
config = Config.Instance()
config.prepare(Utils.getWorkingDirectory())

modelConfig = ModelConfig.Instance()
modelConfig.prepare(config.srlConfig + '/srl-config.json')
print 'configuration loaded'

sentenceLoader, predicateLoader = getEmbeddings(config,
                                                modelConfig.embeddingType)

wikiFile = pd.read_csv(config.convertedCorpusDir + '/wiki.csv')

for i in xrange(0, len(wikiFile)):
    predicate = wikiFile['predicate'][i]
    sentence = wikiFile['sentence'][i]
    convertedSentence, convertedPredicate, allCaps, firstCaps, noCaps, context, distance = extractFeaturesFromSentence(
        sentence, predicate, sentenceLoader.word2idx, predicateLoader.word2idx)
    inputSentence, inputPredicate, inputAux = toNNFormat(
        convertedSentence, convertedPredicate, allCaps, firstCaps, noCaps,
        context, distance)

    print inputSentence.shape, inputPredicate.shape, inputAux.shape
    break
Exemplo n.º 7
0
clr = CyclicLearningRate(base_lr=0.00020,
                         max_lr=0.0012,
                         step_size=(204. * 3),
                         mode='exp_range',
                         gamma=0.99996)
msaver = ModelEvaluation()
print 'prepared'

print 'creating neural network model'
file = str(sys.argv[1])
startingEpoch = int(sys.argv[2])
lrDefault = float(sys.argv[3])
print 'model to load : {} - epoch : {} - lr : {}'.format(
    file, startingEpoch, lrDefault)
model = LSTMModel(ModelConfig.Instance())
nn = model.load(Config.Instance().resultsDir + '/' + file + '.json',
                Config.Instance().resultsDir + '/' + file + '.h5py')
nn.summary()
lrReducer.setNetwork(nn)
es = EarlyStopper()
lrReducer.setLearningRate(lrDefault)
print 'model loaded'

print 'start training'

number_of_epochs = ModelConfig.Instance().trainingEpochs
for epoch in xrange(startingEpoch, number_of_epochs):
    print "--------- Epoch %d -----------" % (epoch)
    start_time = time.time()
    numIterations = len(container)
Exemplo n.º 8
0
batcher = Batcher()
batcher.addAll(trainingData[0], trainingData[1], trainingData[2],
               trainingData[3])
container = batcher.getBatches()

inference = SRLInference(tagMap, tagList)
evaluator = Evaluator(testData, inference, nnUtils,
                      config.resultsDir + '/finalResult.json')
lrReducer = RateBasedLrReducer(modelConfig.trainingEpochs)
msaver = ModelEvaluation()
print 'prepared'

print 'creating neural network model'
mp = ModelPersistence()
nn = mp.load(
    Config.Instance().resultsDir + '/model_50.json',
    Config.Instance().resultsDir + '/model_50.h5py',
)
nn.compile(optimizer=modelConfig.optimizer,
           loss=modelConfig.lossFunction,
           metrics=['accuracy'])
nn.summary()
lrReducer.setNetwork(nn)
print 'model loaded'

print 'start training'

number_of_epochs = ModelConfig.Instance().trainingEpochs
for epoch in xrange(50, 50 + number_of_epochs):
    print "--------- Epoch %d -----------" % (epoch + 1)
    start_time = time.time()