def main(): train_word_list, train_pos_list, train_ne_list = getTrainingData(HMM=True) test_word_list, test_pos_list, test_idx_list = getTestData(HMM=True) # tag_seq_preds = getTestPreds(train_pos_list, train_ne_list, test_pos_list, smooth=True) # tag_seq_preds = getTestPreds(train_word_list, train_ne_list, test_word_list, smooth=True) tag_seq_preds = getTestPreds(train_word_list, train_ne_list, test_word_list, smooth=True, similarity=True) formatted_preds = formatTestPreds(tag_seq_preds, test_idx_list) savePredictionsToCSV(formatted_preds)
def main(): train_word_list, _, train_ne_list = getTrainingData(HMM=True) test_word_list, test_pos_list, test_idx_list = getTestData(HMM=True) feature_type = 'text_features' low_frequency_probabilities = json.load(open(dir_path + 'Training_Test_Data/{0}'.format(feature_type))) tag_seq_preds = getTestPreds(train_word_list, train_ne_list, test_word_list, low_frequency_probabilities, smooth=None, similarity_based=True, test_pos_list=test_pos_list) formatted_preds = formatTestPreds(tag_seq_preds, test_idx_list) savePredictionsToCSV(formatted_preds)
def main(): train_word_list, _, train_ne_list = getTrainingData(HMM=True) test_word_list, test_pos_list, test_idx_list = getTestData(HMM=True) feature_type = 'text_features' low_frequency_probabilities = json.load( open(dir_path + 'Training_Test_Data/{0}'.format(feature_type))) tag_seq_preds = getTestPreds(train_word_list, train_ne_list, test_word_list, low_frequency_probabilities, smooth=None, similarity_based=True, test_pos_list=test_pos_list) formatted_preds = formatTestPreds(tag_seq_preds, test_idx_list) savePredictionsToCSV(formatted_preds)
__author__ = 'Jonathan Simon' ''' Repurposed plotting code found here: http://matplotlib.org/examples/pylab_examples/barchart_demo.html ''' __author__ = 'Jonathan Simon' from DataProcessing.LoadData import getTrainingData import matplotlib.pyplot as plt from collections import Counter import numpy as np train_word_list, train_pos_list, train_ne_list = getTrainingData(HMM=True) ne_counter = Counter([ne for sentence in train_ne_list for ne in sentence]) ne_names, ne_counts = zip(*ne_counter.most_common()) ne_count_total = sum(ne_counts) ne_pos_probs = [1.0 * x / ne_count_total for x in ne_counts] # fig, ax = plt.subplots() fig = plt.figure(figsize=(14, 6)) index = np.arange(len(ne_counter) - 1) bar_width = 0.80 opacity = 0.4 # rects1 = plt.bar(index, train_pos_counts, bar_width, # rects1 = plt.bar(index, ne_pos_probs[1:], bar_width,
def main(): training_data, largest_key_size = getTrainingData() context, pos, index = getTestData() predictions = makePredictions(training_data, context, pos, index, largest_key_size) savePredictionsToCSV(predictions)
def main(): training_data = getTrainingData() context, pos, index = getTestData() predictions = makePredictions(training_data, context, pos, index) savePredictionsToCSV(predictions)
__author__ = 'Jonathan Simon' ''' Repurposed plotting code found here: http://matplotlib.org/examples/pylab_examples/barchart_demo.html ''' __author__ = 'Jonathan Simon' from DataProcessing.LoadData import getTrainingData import matplotlib.pyplot as plt from collections import Counter import numpy as np train_word_list, train_pos_list, train_ne_list = getTrainingData(HMM=True) ne_counter = Counter([ne for sentence in train_ne_list for ne in sentence]) ne_names, ne_counts = zip(*ne_counter.most_common()) ne_count_total = sum(ne_counts) ne_pos_probs = [1.0*x/ne_count_total for x in ne_counts] # fig, ax = plt.subplots() fig = plt.figure(figsize=(14,6)) index = np.arange(len(ne_counter)-1) bar_width = 0.80 opacity = 0.4 # rects1 = plt.bar(index, train_pos_counts, bar_width,