'''Author: Shawon Ashraf''' import corpus.corpus as ec from collections import Counter import matplotlib.pyplot as plt if __name__ == "__main__": path_to_gold = '../data/isear/isear-val.csv' path_to_pred = '../data/isear/isear-val-prediction.csv' path_to_train = '../data/isear/isear-train.csv' path_to_test = '../data/isear/isear-test.csv' c = ec.Corpus(path_to_train, path_to_test, path_to_gold, path_to_pred) print("===========================") print("Statistics for training data") print("============================") print() train_data = c.train_data train_labels = [emo.label for emo in train_data] train_counts = Counter(train_labels) for k in train_counts.keys(): print(f"{k} => {train_counts[k]}") print(f"total = {sum(train_counts.values())}") print() print("===========================") print("Statistics for testing data")
from corpus import corpus import sys if __name__ == '__main__': c = corpus.Corpus('test', sys.argv[1]) f = c.frequency('f1', [200500, 200600, 200700], ['terrorist'], 'Filtered') f.frequency_from_file(sys.argv[2]) e = f.take_freq() e.display() e.write_to_json('/Users/ben/Desktop/graph_data')
parser.add_argument("-y", action="store", help="year ranges") parser.add_argument("-n", action="store", help="frequency record name") parser.add_argument("-d", action="store", help="publication date key name for volumes", default="Date") parser.add_argument("-txt", action="store", help="output text filepath") return parser.parse_args() if __name__ == '__main__': args = setup_parser() corp = corpus.Corpus('corp', args.i) freq = corp.frequency('freq', [int(y) for y in args.y.split(",")], args.t, args.d) freq1 = freq.take_freq(args.k.split(","), args.n) freq1.write_to_json("{}_global.json".format(args.o)) freq1.write("{}_global.txt".format(args.txt)) avg = freq.take_average_freq(args.k.split(","), args.n) avg.write_to_json("{}_avg.json".format(args.o)) avg.write("{}_avg.txt".format(args.txt)) var = freq.take_variance(args.k.split(","), args.n) var.write_to_json("{}_var.json".format(args.o)) var.write("{}_var.txt".format(args.txt))
parser.add_argument("-t", action="store", help="text field to analyze", default="Filtered") parser.add_argument("-d", action="store", help="publication date key name for volumes", default="Year Published") parser.add_argument("-y", action="store", help="year range") return parser.parse_args() if __name__ == '__main__': args = setup_parser() if args.y: r = args.y.split(",") y_min = int(r[0]) y_max = int(r[1]) else: y_min = -1 * sys.maxsize y_max = sys.maxsize corp = corpus.Corpus("corp", args.i) sub = corp.build_sub_corpus('sub', args.o, args.k.split(','), args.t, args.d, int(args.l), [y_min, y_max])