Exemplo n.º 1
0
'''Author: Shawon Ashraf'''

import corpus.corpus as ec
from collections import Counter
import matplotlib.pyplot as plt

if __name__ == "__main__":
    path_to_gold = '../data/isear/isear-val.csv'
    path_to_pred = '../data/isear/isear-val-prediction.csv'

    path_to_train = '../data/isear/isear-train.csv'
    path_to_test = '../data/isear/isear-test.csv'

    c = ec.Corpus(path_to_train, path_to_test, path_to_gold, path_to_pred)

    print("===========================")
    print("Statistics for training data")
    print("============================")
    print()

    train_data = c.train_data
    train_labels = [emo.label for emo in train_data]
    train_counts = Counter(train_labels)

    for k in train_counts.keys():
        print(f"{k} => {train_counts[k]}")
    print(f"total = {sum(train_counts.values())}")
    print()

    print("===========================")
    print("Statistics for testing data")
Exemplo n.º 2
0
from corpus import corpus
import sys

if __name__ == '__main__':

    c = corpus.Corpus('test', sys.argv[1])

    f = c.frequency('f1', [200500, 200600, 200700], ['terrorist'], 'Filtered')

    f.frequency_from_file(sys.argv[2])

    e = f.take_freq()

    e.display()

    e.write_to_json('/Users/ben/Desktop/graph_data')
Exemplo n.º 3
0
    parser.add_argument("-y", action="store", help="year ranges")
    parser.add_argument("-n", action="store", help="frequency record name")
    parser.add_argument("-d",
                        action="store",
                        help="publication date key name for volumes",
                        default="Date")
    parser.add_argument("-txt", action="store", help="output text filepath")

    return parser.parse_args()


if __name__ == '__main__':

    args = setup_parser()

    corp = corpus.Corpus('corp', args.i)

    freq = corp.frequency('freq', [int(y) for y in args.y.split(",")], args.t,
                          args.d)

    freq1 = freq.take_freq(args.k.split(","), args.n)
    freq1.write_to_json("{}_global.json".format(args.o))
    freq1.write("{}_global.txt".format(args.txt))

    avg = freq.take_average_freq(args.k.split(","), args.n)
    avg.write_to_json("{}_avg.json".format(args.o))
    avg.write("{}_avg.txt".format(args.txt))

    var = freq.take_variance(args.k.split(","), args.n)
    var.write_to_json("{}_var.json".format(args.o))
    var.write("{}_var.txt".format(args.txt))
Exemplo n.º 4
0
    parser.add_argument("-t",
                        action="store",
                        help="text field to analyze",
                        default="Filtered")
    parser.add_argument("-d",
                        action="store",
                        help="publication date key name for volumes",
                        default="Year Published")
    parser.add_argument("-y", action="store", help="year range")

    return parser.parse_args()


if __name__ == '__main__':

    args = setup_parser()

    if args.y:
        r = args.y.split(",")
        y_min = int(r[0])
        y_max = int(r[1])

    else:
        y_min = -1 * sys.maxsize
        y_max = sys.maxsize

    corp = corpus.Corpus("corp", args.i)

    sub = corp.build_sub_corpus('sub', args.o, args.k.split(','), args.t,
                                args.d, int(args.l), [y_min, y_max])