def main():
    train_word_list, train_pos_list, train_ne_list = getTrainingData(HMM=True)
    test_word_list, test_pos_list, test_idx_list = getTestData(HMM=True)

    # tag_seq_preds = getTestPreds(train_pos_list, train_ne_list, test_pos_list, smooth=True)
    # tag_seq_preds = getTestPreds(train_word_list, train_ne_list, test_word_list, smooth=True)
    tag_seq_preds = getTestPreds(train_word_list, train_ne_list, test_word_list, smooth=True, similarity=True)
    formatted_preds = formatTestPreds(tag_seq_preds, test_idx_list)
    savePredictionsToCSV(formatted_preds)
def main():
    train_word_list, _, train_ne_list = getTrainingData(HMM=True)
    test_word_list, test_pos_list, test_idx_list = getTestData(HMM=True)
    
    feature_type = 'text_features'
    low_frequency_probabilities = json.load(open(dir_path + 'Training_Test_Data/{0}'.format(feature_type)))

    tag_seq_preds = getTestPreds(train_word_list, train_ne_list, test_word_list, low_frequency_probabilities, smooth=None, similarity_based=True, test_pos_list=test_pos_list)
    formatted_preds = formatTestPreds(tag_seq_preds, test_idx_list)
    savePredictionsToCSV(formatted_preds)
def main():
    train_word_list, train_pos_list, train_ne_list = getTrainingData(HMM=True)
    test_word_list, test_pos_list, test_idx_list = getTestData(HMM=True)

    # tag_seq_preds = getTestPreds(train_pos_list, train_ne_list, test_pos_list, smooth=True)
    # tag_seq_preds = getTestPreds(train_word_list, train_ne_list, test_word_list, smooth=True)
    tag_seq_preds = getTestPreds(train_word_list,
                                 train_ne_list,
                                 test_word_list,
                                 smooth=True,
                                 similarity=True)
    formatted_preds = formatTestPreds(tag_seq_preds, test_idx_list)
    savePredictionsToCSV(formatted_preds)
def main():
    train_word_list, _, train_ne_list = getTrainingData(HMM=True)
    test_word_list, test_pos_list, test_idx_list = getTestData(HMM=True)

    feature_type = 'text_features'
    low_frequency_probabilities = json.load(
        open(dir_path + 'Training_Test_Data/{0}'.format(feature_type)))

    tag_seq_preds = getTestPreds(train_word_list,
                                 train_ne_list,
                                 test_word_list,
                                 low_frequency_probabilities,
                                 smooth=None,
                                 similarity_based=True,
                                 test_pos_list=test_pos_list)
    formatted_preds = formatTestPreds(tag_seq_preds, test_idx_list)
    savePredictionsToCSV(formatted_preds)
__author__ = 'Jonathan Simon'
'''
Repurposed plotting code found here:
http://matplotlib.org/examples/pylab_examples/barchart_demo.html
'''

__author__ = 'Jonathan Simon'

from DataProcessing.LoadData import getTrainingData
import matplotlib.pyplot as plt
from collections import Counter
import numpy as np

train_word_list, train_pos_list, train_ne_list = getTrainingData(HMM=True)

ne_counter = Counter([ne for sentence in train_ne_list for ne in sentence])
ne_names, ne_counts = zip(*ne_counter.most_common())

ne_count_total = sum(ne_counts)
ne_pos_probs = [1.0 * x / ne_count_total for x in ne_counts]

# fig, ax = plt.subplots()
fig = plt.figure(figsize=(14, 6))

index = np.arange(len(ne_counter) - 1)
bar_width = 0.80

opacity = 0.4

# rects1 = plt.bar(index, train_pos_counts, bar_width,
# rects1 = plt.bar(index, ne_pos_probs[1:], bar_width,
def main():
    training_data, largest_key_size = getTrainingData()
    context, pos, index = getTestData()
    predictions = makePredictions(training_data, context, pos, index, largest_key_size)
    savePredictionsToCSV(predictions)
def main():
    training_data = getTrainingData()
    context, pos, index = getTestData()
    predictions = makePredictions(training_data, context, pos, index)
    savePredictionsToCSV(predictions)
__author__ = 'Jonathan Simon'

'''
Repurposed plotting code found here:
http://matplotlib.org/examples/pylab_examples/barchart_demo.html
'''

__author__ = 'Jonathan Simon'

from DataProcessing.LoadData import getTrainingData
import matplotlib.pyplot as plt
from collections import Counter
import numpy as np

train_word_list, train_pos_list, train_ne_list = getTrainingData(HMM=True)

ne_counter = Counter([ne for sentence in train_ne_list for ne in sentence])
ne_names, ne_counts = zip(*ne_counter.most_common())

ne_count_total = sum(ne_counts)
ne_pos_probs = [1.0*x/ne_count_total for x in ne_counts]

# fig, ax = plt.subplots()
fig = plt.figure(figsize=(14,6))

index = np.arange(len(ne_counter)-1)
bar_width = 0.80

opacity = 0.4

# rects1 = plt.bar(index, train_pos_counts, bar_width,
Exemplo n.º 9
0
def main():
    training_data = getTrainingData()
    context, pos, index = getTestData()
    predictions = makePredictions(training_data, context, pos, index)
    savePredictionsToCSV(predictions)
def main():
    training_data, largest_key_size = getTrainingData()
    context, pos, index = getTestData()
    predictions = makePredictions(training_data, context, pos, index,
                                  largest_key_size)
    savePredictionsToCSV(predictions)