Exemplo n.º 1
0
from __future__ import division
from Annotation import Annotation
from nltk import FreqDist
from operator import itemgetter

import matplotlib.pyplot as plt
import numpy as np

import seaborn as sns
sns.set(color_codes=True, style="whitegrid")

A = Annotation('data/exportMedium.json')

allLabels = map(lambda a: a[0][0], A.ofAll(['labels']))

dist = FreqDist(allLabels)

keys = [u'Neg', u'Neut', u'Pos', u'No Sent', u'Undecided', u'Irrelevant']

index = np.arange(len(keys))

bar_width = 0.5

fig = plt.Figure()
plt.bar(index, map(lambda k: dist[k] / len(allLabels), keys), bar_width)

plt.xlabel('Label')
plt.ylabel('% of annotations')
plt.xticks(index + bar_width / 2, keys)
plt.legend()
Exemplo n.º 2
0
from operator import itemgetter, concat, add
from itertools import groupby
from datetime import datetime

import matplotlib.pyplot as plt
import numpy as np
import copy

import seaborn as sns
sns.set(style="whitegrid", color_codes=True)

A = Annotation('data/exportMedium.json')

dateDurations = sorted([(datetime.strptime(
    dateString, "%Y-%m-%d %H:%M:%S"), int(durationString), flag)
                        for dateString, durationString, flag in A.ofAll(
                            ['dateTime', 'duration', 'proposalFlag'])],
                       key=itemgetter(0))

# discover sessions
# calculate the time between two consecutive
# tmpDates = copy.deepcopy(dates)
# now = datetime.now()
# deltas = reduce(lambda lst, d: lst+[d-lst.pop()]+[d],
#                 tmpDates[1:], [tmpDates[0]])[:-1]+[now-now]

# To create a datetime.delta subtract two datetimes accordingly
threshold = datetime(2016, 12, 1, 13, 5, 0) - datetime(2016, 12, 1, 13, 0, 0)
sessions = reduce(
    lambda lst, (date, duration, flag): lst + [[(date, duration, flag)]]
    if (date - lst[-1][-1][0]) > threshold else lst + [(lst.pop() + [(
        date, duration, flag)])], dateDurations[1:], [[dateDurations[0]]])
Exemplo n.º 3
0
from itertools import groupby

import matplotlib.pyplot as plt
import numpy as np

import seaborn as sns; sns.set(color_codes=True, style="whitegrid")

def save(name, value):
    path = '/home/kai/Dropbox/MA/thesis/const/{}.tex'.format(name)
    with open(path, 'w+') as file:
        file.write(value)


A = Annotation('data/exportMedium.json')

annotations = A.ofAll(['labels', 'proposalFlag'])

perDocument = A.perDocument(3, ['dateTime'])

save('numberOfAnnotations', str(len(annotations)))
#

save('numberOfDocuments', str(len(perDocument)))
#

flags = dict((k, list(v)) for k, v in
             groupby(sorted(annotations, key=itemgetter(1)), key=itemgetter(1)))

save('numberOfProposals', str(len(flags['proposal'])))
save('numberOfNoProposals', str(len(flags['no proposal'])))
save('numberOfWrongProposals', str(len(flags['wrong proposal'])))