Пример #1
0
def run_experiment(dataset_id, dataset_dict, task, embeddings, mappings, data):
    # set network hyperparameters and mappings/datasets
    model = BiLSTM(network_params)
    model.setMappings(mappings, embeddings)
    model.setDataset(dataset_dict, data)

    # path to store the trained model and model results
    experiment_name = f'{dataset_id}_{task.lower()}'
    model.modelSavePath = models_dir / f'{experiment_name}.h5'
    model.storeResults(results_dir / f'{experiment_name}.csv')

    # build and train the model
    model.buildModel()
    model.fit(
        epochs=500)  # do not limit training by epochs - use early stopping
Пример #2
0
def run_experiment(datasets_dict, lang, task, embeddings, mappings, data):
    # set network hyperparameters and mappings/datasets
    model = BiLSTM(network_params)
    model.setMappings(mappings, embeddings)
    model.setDataset(datasets_dict, data)

    # define the experiment name
    lang_prefix = f'{lang.lower()}_' if lang is not None else ''
    task_suffix = f'_{task.lower()}' if task is not None else ''
    experiment_name = lang_prefix + 'datasets' + task_suffix

    # path to store the trained model and model results
    model.modelSavePath = models_dir / f'{experiment_name}.h5'
    model.storeResults(results_dir / f'{experiment_name}.csv')

    # build and train the model
    model.buildModel()
    model.fit(
        epochs=500)  # do not limit training by epochs - use early stopping
# TODO Replace customClassifier dengan main task + auxiliary task
custom_classifier = {}
custom_classifier[target_task] = [('LSTM', 100), 'CRF']
for task in aux_task:
    custom_classifier[task] = ['CRF']

params = {
    'classifier': ['CRF'],
    'LSTM-Size': [100],
    'dropout': (0.25, 0.25),
    'charEmbeddings': 'CNN',
    'customClassifier': custom_classifier
}

model = BiLSTM(params)

model.setMappings(mappings, embeddings)
model.setDataset(datasets, data)
model.storeResults("/".join(
    [args.root_dir_result, args.directory_name,
     "performance.out"]))  # Path to store performance scores for dev / test
model.predictionSavePath = "/".join([
    args.root_dir_result, args.directory_name, "predictions",
    "[ModelName]_[Data].conll"
])  # Path to store predictions
model.modelSavePath = "/".join(
    [args.root_dir_result, args.directory_name,
     "models/[ModelName].h5"])  # Path to store models
model.fit(epochs=args.nb_epoch)
Пример #4
0
pickleFile = perpareDataset(embeddingsPath, datasets)

######################################################
#
# The training of the network starts here
#
######################################################

#Load the embeddings and the dataset
embeddings, mappings, data = loadDatasetPickle(pickleFile)

# Some network hyperparameters
params = {
    'classifier': ['CRF'],
    'LSTM-Size': [500],
    'dropout': (0.25, 0.25),
    'charEmbeddings': 'LSTM',
    'maxCharLength': 150,
    'charEmbeddingsSize': 200,
    'charLSTMSize': 200,
    'charFilterLength': 20
}

model = BiLSTM(params)
model.setMappings(mappings, embeddings)
model.setDataset(datasets, data)
model.storeResults('results/sentiment_results.csv'
                   )  #Path to store performance scores for dev / test
model.modelSavePath = "models/[ModelName]_[DevScore]_[TestScore]_[Epoch].h5"  #Path to store models
model.fit(epochs=70)
        'evaluate': True,  # Set true always for single task setups
        'commentSymbol': None
    }  # Lines in the input data starting with this string will be skipped
}

# :: Path on your computer to the word embeddings. Embeddings by Komninos et al. will be downloaded automatically ::
# :: 词向量文件地址,采样Komninos词向量;没有则自动下载
embeddingsPath = 'komninos_english_embeddings.gz'

# :: Prepares the dataset to be used with the LSTM-network. Creates and stores cPickle files in the pkl/ folder ::
# :: 数据预处理,并保存为cPickle文件
pickleFile = prepareDataset(embeddingsPath, datasets)

############################################################################################################
#
# 2.Network training
#
############################################################################################################
# :: Load the embeddings and the dataset ::
# :: 加载词向量和训练数据 ::
embeddings, mappings, data = loadDatasetPickle(pickleFile)
params = {'classifier': ['CRF'], 'LSTM-Size': [100], 'dropout': (0.25, 0.25)}

model = BiLSTM(params)
model.setMappings(mappings, embeddings)
model.setDataset(datasets, data)
model.storeResults('results/unidep_pos_results.csv'
                   )  # Path to store performance scores for dev/test
model.modelSavePath = "models/[ModelName]_[DevScore]_[TestScore]_[Epoch].h5"  # Path to store models
model.fit(epochs=10)
Пример #6
0
# :: Path on your computer to the word embeddings. Embeddings by Komninos et al. will be downloaded automatically ::
embeddingsPath = 'komninos_english_embeddings.gz'

# :: Prepares the dataset to be used with the LSTM-network. Creates and stores cPickle files in the pkl/ folder ::
pickleFile = perpareDataset(embeddingsPath, datasets)


######################################################
#
# The training of the network starts here
#
######################################################


#Load the embeddings and the dataset
embeddings, mappings, data = loadDatasetPickle(pickleFile)

# Some network hyperparameters
params = {'classifier': ['CRF'], 'LSTM-Size': [100, 100], 'dropout': (0.25, 0.25)}


model = BiLSTM(params)
model.setMappings(mappings, embeddings)
model.setDataset(datasets, data)
model.storeResults('results/conll2000_chunking.csv') #Path to store performance scores for dev / test
model.modelSavePath = "models/[ModelName]_[DevScore]_[TestScore]_[Epoch].h5"
model.fit(epochs=25)



Пример #7
0
# embeddingsPath =  'embeddings.vec'

# :: Prepares the dataset to be used with the LSTM-network. Creates and stores cPickle files in the pkl/ folder ::
pickleFile = perpareDataset(embeddingsPath, datasets, useExistent=False)


######################################################
#
# The training of the network starts here
#
######################################################


#Load the embeddings and the dataset
embeddings, mappings, data = loadDatasetPickle(pickleFile)

# Some network hyperparameters
params = {'classifier': ['CRF'], 'LSTM-Size': [100, 100], 'dropout': (0.5, 0.5), 'charEmbeddings':'LSTM',
          'optimizer': 'adam', 'featureNames': ['tokens', 'casing']}


MODEL = BiLSTM(params)
MODEL.setMappings(mappings, embeddings)
MODEL.setDataset(datasets, data)
MODEL.storeResults('results/Jurica_NER.csv') #Path to store performance scores for dev / test
MODEL.modelSavePath = "models/[ModelName]_[DevScore]_[TestScore]_[Epoch].h5"
MODEL.fit(epochs=100)



Пример #8
0
######################################################
#
# The training of the network starts here
#
######################################################

#Load the embeddings and the dataset
embeddings, mappings, data = loadDatasetPickle(pickleFile)

# Some network hyperparameters
params = {
    'classifier': ['CRF'],
    'LSTM-Size': [1024],
    'dropout': 0,
    'featureNames': ['tokens'],
    'miniBatchSize': 8,
    'earlyStopping': 10,
    'charEmbeddings': 'lstm',
    'charEmbeddingsSize': 500,
    'maxCharLength': 500,
    'charLSTMSize': 50
}

model = BiLSTM(params)
model.setMappings(mappings, embeddings)
model.setDataset(datasets, data)
model.storeResults('results/emo_01_results_laser_0.csv'
                   )  #Path to store performance scores for dev / test
model.modelSavePath = "models/[ModelName]_[DevScore]_[TestScore]_[Epoch].h5"  #Path to store models
model.fit(epochs=50)
# :: Path on your computer to the word embeddings. Embeddings by Komninos et al. will be downloaded automatically ::
embeddingsPath = 'komninos_english_embeddings.gz'

# :: Prepares the dataset to be used with the LSTM-network. Creates and stores cPickle files in the pkl/ folder ::
pickleFile = perpareDataset(embeddingsPath, datasets)

######################################################
#
# The training of the network starts here
#
######################################################

#Load the embeddings and the dataset
embeddings, mappings, data = loadDatasetPickle(pickleFile)

# Some network hyperparameters
params = {
    'classifier': ['CRF'],
    'LSTM-Size': [100, 100],
    'dropout': (0.25, 0.25)
}

model = BiLSTM(params)
model.setMappings(mappings, embeddings)
model.setDataset(datasets, data)
model.storeResults('results/quote_direct.csv'
                   )  #Path to store performance scores for dev / test
model.modelSavePath = "models/[ModelName]_[DevScore]_[TestScore]_[Epoch].h5"  #Path to store models
model.fit(epochs=20)
Пример #10
0
#Parameters of the network
params = {
    'dropout': dropout,
    'classifier': 'CRF',
    'LSTM-Size': layers,
    'optimizer': optimizer,
    'charEmbeddings': charEmbedding,
    'miniBatchSize': 32,
    'detailedOutput': detailedPath
}

######################################################
#
# The training of the network starts here
#
######################################################

#Load the embeddings and the dataset from the already created pickle file
embeddings, word2Idx, datasets = loadDatasetPickle(pickledData)
data = datasets[datasetName]

model = BiLSTM(params)
model.setMappings(embeddings, data['mappings'])
model.setTrainDataset(data, labelKey)
model.verboseBuild = True
#model.modelSavePath = "models/%s/[DevScore]_[TestScore]_[Epoch].h5" % modelName #Enable this line to save the model to the disk
model.storeResults(resultsPath)
# number is the batch size
model.evaluate(50)
# :: Prepares the dataset to be used with the LSTM-network. Creates and stores cPickle files in the pkl/ folder ::
pickleFile = perpareDataset(embeddingsPath, datasets, useExistent=False)

######################################################
#
# The training of the network starts here
#
######################################################

#Load the embeddings and the dataset
embeddings, mappings, data = loadDatasetPickle(pickleFile)

# Some network hyperparameters
params = {
    'classifier': ['CRF'],
    'LSTM-Size': [100, 100],
    'dropout': (0.5, 0.5),
    'charEmbeddings': 'LSTM',
    'optimizer': 'adam',
    'featureNames': ['tokens', 'casing']
}

MODEL = BiLSTM(params)
MODEL.setMappings(mappings, embeddings)
MODEL.setDataset(datasets, data)
MODEL.storeResults('results/Conseil_NER.csv'
                   )  #Path to store performance scores for dev / test
MODEL.modelSavePath = "models/[ModelName]_[DevScore]_[TestScore]_[Epoch].h5"
MODEL.fit(epochs=100)