def on_epoch_end(self, epoch, logs={}): y_prob = self.model.predict(self.X_test) np.save(self.saveDir + 'epoch{:0>2}.npy'.format(epoch), y_prob) y_predict = MyCorpus.topK(y_prob, topK=1) predictions = MyCorpus.oneHotDecode(y_predict) ground_truth = MyCorpus.oneHotDecode(self.y_test) metrics = evaluate(predictions, ground_truth) logging.info('acc, ma_f1, mi_f1: {}'.format(metrics[0], metrics[3], metrics[6])) with open(self.saveDir + 'metric.txt', 'a') as metricFile: metricFile.write('\t'.join(map(str, metrics)) + '\n') self.history.append(metrics)
def getRawLabels(): global trainFilePath, testFilePath, corpus _, y_train = corpus.loadFile(filePath=trainFilePath) predictions = MyCorpus.oneHotDecode(y_train) with open('raw.pkl', 'wb') as outputFile: pickle.dump(predictions, outputFile, pickle.HIGHEST_PROTOCOL) with open(dataSetName + 'Label2idx.pkl', 'wb') as outputFile: pickle.dump(corpus.label2idx, outputFile, pickle.HIGHEST_PROTOCOL)
def stat1(): from util.corpus import MyCorpus from eval.evaluation import evaluate dataSetName = 'BBN' trainFilePath = 'data/{}/train.json'.format(dataSetName) testFilePath = 'data/{}/test.json'.format(dataSetName) corpus = MyCorpus(filePathList=[trainFilePath, testFilePath]) X_train, y_train = corpus.loadFile(filePath=trainFilePath) X_test, y_test = corpus.loadFile(filePath=testFilePath) y = np.load('10_24_17_32/epoch17.npy') ground = MyCorpus.oneHotDecode(y_test) scores = [] outputFile = open('a.txt', 'w', encoding='utf-8') for threshold in np.arange(0.3, 0.6, 0.01): y1 = MyCorpus.threshold(y, threshold) y1 = MyCorpus.oneHotDecode(y1) score = evaluate(y1, ground) scores.append((threshold, score)) outputFile.write('%.3f\t%.3f\t%.3f\t%.3f\n' % (threshold, score[0], score[3], score[6])) print(scores) print('Max: ' + str(max(scores, key=lambda kv: sum([kv[1][0], kv[1][3], kv[1][6]])))) outputFile.close()
def on_epoch_end(self, epoch, logs={}): y_prob = self.model.predict(self.X_test) np.save(self.saveDir + 'epoch{:0>2}.npy'.format(epoch), y_prob) scores = [] for threshold in np.arange(0.3, 0.6, 0.1): y_predict = MyCorpus.threshold(y_prob, threshold=threshold) predictions = MyCorpus.oneHotDecode(y_predict) ground_truth = MyCorpus.oneHotDecode(self.y_test) metrics = evaluate(predictions, ground_truth) scores.append(metrics) maxScore = max(scores, key=lambda kv: sum([kv[0], kv[3], kv[6]])) logging.info('acc, ma_f1, mi_f1: {}'.format(maxScore[0], maxScore[3], maxScore[6])) with open(self.saveDir + 'metric.txt', 'a') as metricFile: metricFile.write('\t'.join(map(str, maxScore)) + '\n') self.history.append(maxScore)
def trainLSTMSingle(): global trainFilePath, testFilePath, corpus X_train, y_train = corpus.loadFileSingleSent(filePath=trainFilePath) X_test, y_test = corpus.loadFileSingleSent(filePath=testFilePath) model = lstmSingle(corpus) metricHistory = MetricHistory(X_test, y_test) model.fit(X_train, y_train, epochs=10, batch_size=128, validation_split=0.1, shuffle=True, callbacks=[metricHistory]) y_prob = model.predict(X_train) y_predict = MyCorpus.hybrid(y_prob, threshold=0.5) predictions = MyCorpus.oneHotDecode(y_predict) with open(metricHistory.saveDir + 'LSTMSingle.pkl', 'wb') as outputFile: pickle.dump(predictions, outputFile, pickle.HIGHEST_PROTOCOL)
from util.corpus import MyCorpus from util.callback import MetricHistory # :: Logging level :: logging.basicConfig( level=logging.INFO, format= '%(asctime)s %(filename)s[line:%(lineno)d] %(levelname)s %(message)s', datefmt='%a, %d %b %Y %H:%M:%S', ) dataSetName = 'Wiki' trainFilePath = 'data/{}/train.json'.format(dataSetName) testFilePath = 'data/{}/test.json'.format(dataSetName) corpus = MyCorpus(filePathList=[trainFilePath, testFilePath]) X_train, y_train = corpus.loadFile(filePath=trainFilePath) X_test, y_test = corpus.loadFile(filePath=testFilePath) modelName = dataSetName + '_bilstm.h5' if len(sys.argv) > 1 and sys.argv[1] == 'eval': model = load_model(modelName) else: if os.path.exists(modelName): model = load_model(modelName) else: model = lstm(corpus) metricHistory = MetricHistory(X_test, y_test) model.fit(X_train, y_train,
from util.corpus import MyCorpus from util.callback import MetricHistory, MetricHistorySoftmax # :: Logging level :: logging.basicConfig( level=logging.INFO, format= '%(asctime)s %(filename)s[line:%(lineno)d] %(levelname)s %(message)s', datefmt='%a, %d %b %Y %H:%M:%S', ) dataSetName = 'baike' trainFilePath = 'data/{}/train.json'.format(dataSetName) testFilePath = 'data/{}/test.json'.format(dataSetName) corpus = MyCorpus(filePathList=[trainFilePath, testFilePath]) def getRawLabels(): global trainFilePath, testFilePath, corpus _, y_train = corpus.loadFile(filePath=trainFilePath) predictions = MyCorpus.oneHotDecode(y_train) with open('raw.pkl', 'wb') as outputFile: pickle.dump(predictions, outputFile, pickle.HIGHEST_PROTOCOL) with open(dataSetName + 'Label2idx.pkl', 'wb') as outputFile: pickle.dump(corpus.label2idx, outputFile, pickle.HIGHEST_PROTOCOL) def trainLSTM(): global trainFilePath, testFilePath, corpus