Exemplo n.º 1
0
from Annotation import Annotation
from itertools import groupby
from operator import itemgetter
from sklearn.neighbors import KernelDensity

import matplotlib.pyplot as plt
import numpy as np

import seaborn as sns
sns.set(color_codes=True, style="whitegrid")

A = Annotation('data/exportMedium.json')

key = itemgetter('proposalFlag')
g = dict((flag, A.durationToSec(map(itemgetter('duration'), annotations)))
         for flag, annotations in groupby(sorted(A.all, key=key), key=key))


def stats(label):
    return np.mean(g[label]), np.std(g[label]), np.median(g[label])


labels = ['proposal', 'no proposal', 'wrong proposal']

map(stats, labels)

xPlot = np.linspace(-10, 300, 1000)[:, np.newaxis]

c = {'no proposal': 'g', 'proposal': 'b', 'wrong proposal': 'r'}

fig = plt.Figure()
Exemplo n.º 2
0
# now = datetime.now()
# deltas = reduce(lambda lst, d: lst+[d-lst.pop()]+[d],
#                 tmpDates[1:], [tmpDates[0]])[:-1]+[now-now]

# To create a datetime.delta subtract two datetimes accordingly
threshold = datetime(2016, 12, 1, 13, 5, 0) - datetime(2016, 12, 1, 13, 0, 0)
sessions = reduce(
    lambda lst, (date, duration, flag): lst + [[(date, duration, flag)]]
    if (date - lst[-1][-1][0]) > threshold else lst + [(lst.pop() + [(
        date, duration, flag)])], dateDurations[1:], [[dateDurations[0]]])

sesssionDurations = map(lambda s: s[-1][0] - s[0][0], sessions)

secToHour = lambda sec: sec / 3600
totalSessionDuration = reduce(add, sesssionDurations).total_seconds()
totalAnnotationDuration = A.durationToSec(
    sum(map(itemgetter(1), dateDurations)))

fig = plt.Figure()

bar_width = 0.5
label = ['Total annotation time', 'Time spent to pick label']

plt.bar(0, secToHour(totalSessionDuration), label=label[0], color='r')
plt.bar(0, secToHour(totalAnnotationDuration), label=label[1], color='b')

plt.ylabel('Hours')
plt.xticks([0 + bar_width / 2], ['Annotation time'])
x1, x2, y1, y2 = plt.axis()
plt.axis((0, 0.5, y1, y2))
plt.legend(frameon=True, loc='best', fancybox=True, prop={'size': 12})
docAnnos = A.perDocument(1, ['document', 'annotations'])

# This blob produces a dictionary with the values ['proposal', 'no
# proposal', 'wrong proposal']. Values are tuples (number of word in document, proposalFlag, duration)
blob = dict((k, list(v))
            for k, v in groupby(sorted([(len(preprocessing(document)),
                                         annotation['proposalFlag'],
                                         annotation['duration'])
                                        for (document, annotations) in docAnnos
                                        for annotation in annotations],
                                       key=itemgetter(1)),
                                key=itemgetter(1)))

plotContent = dict((
    f,
    map(lambda (count, flag, duration): (count, A.durationToSec(duration)), v))
                   for f, v in blob.iteritems())

smooth = lambda c: filter(lambda (x, s): s < 100, c)
plotContent = dict(
    (key, smooth(value)) for key, value in plotContent.iteritems())

labels = ['proposal', 'no proposal', 'wrong proposal']

sns.set(font_scale=1.5)
fig = plt.figure()
fig.set_size_inches(fig.get_size_inches() * 1.5)

for label in labels:
    count, sec = zip(*plotContent[label])
    data = pd.DataFrame()