Python MyCorpus示例，util.corpus.MyCorpus Python示例

示例#1

0

显示文件

文件： callback.py 项目： helloworld0909/entity-typing

    def on_epoch_end(self, epoch, logs={}):
        y_prob = self.model.predict(self.X_test)
        np.save(self.saveDir + 'epoch{:0>2}.npy'.format(epoch), y_prob)

        y_predict = MyCorpus.topK(y_prob, topK=1)

        predictions = MyCorpus.oneHotDecode(y_predict)
        ground_truth = MyCorpus.oneHotDecode(self.y_test)
        metrics = evaluate(predictions, ground_truth)
        logging.info('acc, ma_f1, mi_f1: {}'.format(metrics[0], metrics[3],
                                                    metrics[6]))
        with open(self.saveDir + 'metric.txt', 'a') as metricFile:
            metricFile.write('\t'.join(map(str, metrics)) + '\n')
        self.history.append(metrics)

示例#2

0

显示文件

文件： trainMulti.py 项目： helloworld0909/entity-typing

def getRawLabels():
    global trainFilePath, testFilePath, corpus
    _, y_train = corpus.loadFile(filePath=trainFilePath)
    predictions = MyCorpus.oneHotDecode(y_train)
    with open('raw.pkl', 'wb') as outputFile:
        pickle.dump(predictions, outputFile, pickle.HIGHEST_PROTOCOL)
    with open(dataSetName + 'Label2idx.pkl', 'wb') as outputFile:
        pickle.dump(corpus.label2idx, outputFile, pickle.HIGHEST_PROTOCOL)

示例#3

0

显示文件

def stat1():
    from util.corpus import MyCorpus
    from eval.evaluation import evaluate

    dataSetName = 'BBN'
    trainFilePath = 'data/{}/train.json'.format(dataSetName)
    testFilePath = 'data/{}/test.json'.format(dataSetName)

    corpus = MyCorpus(filePathList=[trainFilePath, testFilePath])
    X_train, y_train = corpus.loadFile(filePath=trainFilePath)
    X_test, y_test = corpus.loadFile(filePath=testFilePath)

    y = np.load('10_24_17_32/epoch17.npy')
    ground = MyCorpus.oneHotDecode(y_test)

    scores = []
    outputFile = open('a.txt', 'w', encoding='utf-8')
    for threshold in np.arange(0.3, 0.6, 0.01):
        y1 = MyCorpus.threshold(y, threshold)
        y1 = MyCorpus.oneHotDecode(y1)
        score = evaluate(y1, ground)
        scores.append((threshold, score))
        outputFile.write('%.3f\t%.3f\t%.3f\t%.3f\n' %
                         (threshold, score[0], score[3], score[6]))

    print(scores)
    print('Max: ' +
          str(max(scores, key=lambda kv: sum([kv[1][0], kv[1][3], kv[1][6]]))))
    outputFile.close()

示例#4

0

显示文件

文件： callback.py 项目： helloworld0909/entity-typing

    def on_epoch_end(self, epoch, logs={}):
        y_prob = self.model.predict(self.X_test)
        np.save(self.saveDir + 'epoch{:0>2}.npy'.format(epoch), y_prob)

        scores = []
        for threshold in np.arange(0.3, 0.6, 0.1):
            y_predict = MyCorpus.threshold(y_prob, threshold=threshold)

            predictions = MyCorpus.oneHotDecode(y_predict)
            ground_truth = MyCorpus.oneHotDecode(self.y_test)
            metrics = evaluate(predictions, ground_truth)
            scores.append(metrics)
        maxScore = max(scores, key=lambda kv: sum([kv[0], kv[3], kv[6]]))
        logging.info('acc, ma_f1, mi_f1: {}'.format(maxScore[0], maxScore[3],
                                                    maxScore[6]))
        with open(self.saveDir + 'metric.txt', 'a') as metricFile:
            metricFile.write('\t'.join(map(str, maxScore)) + '\n')
        self.history.append(maxScore)

示例#5

0

显示文件

文件： trainMulti.py 项目： helloworld0909/entity-typing

def trainLSTMSingle():
    global trainFilePath, testFilePath, corpus

    X_train, y_train = corpus.loadFileSingleSent(filePath=trainFilePath)
    X_test, y_test = corpus.loadFileSingleSent(filePath=testFilePath)
    model = lstmSingle(corpus)
    metricHistory = MetricHistory(X_test, y_test)
    model.fit(X_train,
              y_train,
              epochs=10,
              batch_size=128,
              validation_split=0.1,
              shuffle=True,
              callbacks=[metricHistory])
    y_prob = model.predict(X_train)
    y_predict = MyCorpus.hybrid(y_prob, threshold=0.5)

    predictions = MyCorpus.oneHotDecode(y_predict)
    with open(metricHistory.saveDir + 'LSTMSingle.pkl', 'wb') as outputFile:
        pickle.dump(predictions, outputFile, pickle.HIGHEST_PROTOCOL)

示例#6

0

显示文件

from util.corpus import MyCorpus
from util.callback import MetricHistory

# :: Logging level ::
logging.basicConfig(
    level=logging.INFO,
    format=
    '%(asctime)s %(filename)s[line:%(lineno)d] %(levelname)s %(message)s',
    datefmt='%a, %d %b %Y %H:%M:%S',
)

dataSetName = 'Wiki'
trainFilePath = 'data/{}/train.json'.format(dataSetName)
testFilePath = 'data/{}/test.json'.format(dataSetName)

corpus = MyCorpus(filePathList=[trainFilePath, testFilePath])
X_train, y_train = corpus.loadFile(filePath=trainFilePath)
X_test, y_test = corpus.loadFile(filePath=testFilePath)

modelName = dataSetName + '_bilstm.h5'

if len(sys.argv) > 1 and sys.argv[1] == 'eval':
    model = load_model(modelName)
else:
    if os.path.exists(modelName):
        model = load_model(modelName)
    else:
        model = lstm(corpus)
    metricHistory = MetricHistory(X_test, y_test)
    model.fit(X_train,
              y_train,

示例#7

0

显示文件

文件： trainMulti.py 项目： helloworld0909/entity-typing

from util.corpus import MyCorpus
from util.callback import MetricHistory, MetricHistorySoftmax

# :: Logging level ::
logging.basicConfig(
    level=logging.INFO,
    format=
    '%(asctime)s %(filename)s[line:%(lineno)d] %(levelname)s %(message)s',
    datefmt='%a, %d %b %Y %H:%M:%S',
)

dataSetName = 'baike'
trainFilePath = 'data/{}/train.json'.format(dataSetName)
testFilePath = 'data/{}/test.json'.format(dataSetName)

corpus = MyCorpus(filePathList=[trainFilePath, testFilePath])


def getRawLabels():
    global trainFilePath, testFilePath, corpus
    _, y_train = corpus.loadFile(filePath=trainFilePath)
    predictions = MyCorpus.oneHotDecode(y_train)
    with open('raw.pkl', 'wb') as outputFile:
        pickle.dump(predictions, outputFile, pickle.HIGHEST_PROTOCOL)
    with open(dataSetName + 'Label2idx.pkl', 'wb') as outputFile:
        pickle.dump(corpus.label2idx, outputFile, pickle.HIGHEST_PROTOCOL)


def trainLSTM():
    global trainFilePath, testFilePath, corpus