Exemplo n.º 1
0
def update_statistics():
    """update distress stats about tweets"""
    print "update_statistics => BEGIN"
    # collect the training data
    print "update_statistics => collecting training data"
    labels, stories = list(), list()
    for story in Story.objects.exclude(label = 0):
        labels.append(int(story.label))
        stories.append(story.content)
    # train the models
    print "update_statistics => training the classifiers on %d stories" % len(stories)
    clf = dict()
    for klass in [SVM, Bagging, Boosting, Stacking]:
        if klass == SVM:
            clf[klass] = klass()
        else:
            clf[klass] = klass(n_models = settings.N_MODELS)
        print "update_statistics => initializing %s" % klass.__name__
        clf[klass].fit(stories, labels)
    # collect all the unique dates
    print "update_statistics => collecting unique dates"
    cursor = connection.cursor()
    cursor.execute("select distinct(date(created_at)) from monitor_tweet")
    dates = map(lambda x: x[0], cursor.fetchall())
    print "update_statistics => collected %d unique dates" % len(dates)
    # iterate over dates and store labels
    for date in dates:
        # collect all the tweets for the particular date
        tweets = Tweet.objects.filter(
            created_at__year = date.year,
            created_at__month = date.month,
            created_at__day = date.day
        )
        tweets_text = map(lambda tweet: tweet.text, tweets)
        # store labels for tweets on this particular date
        print "update_statistics => Updating statistics for %s (%d tweets)" % (str(date), len(tweets))
        try:
            stats = Stats.objects.get(created_at = date)
        except:
            stats = Stats()
            stats.created_at = date
        finally:
            for klass in [SVM, Bagging, Boosting, Stacking]:
                plabels = map(lambda x: int(x), clf[klass].predict(tweets_text).tolist())
                # update overall statistics
                depressed_count = len([i for i in plabels if i == 1])
                not_depressed_count = len([i for i in plabels if i == -1])
                setattr(stats, "depressed_count_" + klass.__name__.lower(), depressed_count)
                setattr(stats, "not_depressed_count_" + klass.__name__.lower(), not_depressed_count)
                # update label of each tweet
                for i in xrange(0, len(plabels)):
                    setattr(tweets[i], "label_" + klass.__name__.lower(), plabels[i])
            # write overall statistics back to the database
            stats.save()
            # write label of each tweet back to the database
            for tweet in tweets:
                tweet.save()
    print "update_statistics => DONE"
Exemplo n.º 2
0
def stats(request, name):
    context = {
        'name': name,
        'data': Stats.for_model(name),
        'labelled_tweets': Tweet.labelled_by_date(name)
    }
    return render_to_response("monitor/index.html", context)
Exemplo n.º 3
0
def stats(request, name):
    context = {"name": name, "data": Stats.for_model(name), "labelled_tweets": Tweet.labelled_by_date(name)}
    return render_to_response("monitor/index.html", context)