db = a
        elif o == "-f":
            filename = a
        else:
            usage()

    if filename is None:
        usage()

    count = 0
    buf = []
    progressbar = ProgressBar(widgets = [ AnimatedMarker()
                                        , ' '
                                        , Timer()
                                        , ', # of Tweets: '
                                        , Counter()
                                        ],
                              maxval = UnknownLength)
    progressbar.start()

    ts = TweetStore(db)
    with open(filename) as f:
        for line in islice(f, 1, None, 2):
            buf.append(json.loads(line))
            if len(buf) > CHUNK_SIZE:
                ts.put(buf)
                del buf[:]

            count += 1
            progressbar.update(count)
    classifier = "classifier.pickle"

    opts, args = getopt.getopt(sys.argv[1:], "hc:d:k:s:e:")
    for o, a in opts:
        if o == "-d":
            db = a
        elif o == "-c":
            classifier = a
        elif o == "-k":
            keywords.append(a)
        elif o == "-s":
            start = datetime.strptime(a, "%Y-%M-%d")
        elif o == "-e":
            end = datetime.strptime(a, "%Y-%M-%d")
        else:
            usage()
            sys.exit(0)

    classifier = Classifier.load(classifier)
    aggregator = RetweetWeightedAggregator()

    ts = TweetStore(db)
    for t in ts.get(keywords, start, end):
        s = classifier.classify(t)
        print("%s -- sentiment: %s" % (tweet.to_ascii(t)[tweet.TEXT], "positive" if (s == 1) else "negative"))
        aggregator.add(t, s)

    print("Aggregated sentiment: %f" % aggregator.get_sentiment())
    print("ID of last tweet: %d" % aggregator.get_last_id())
    print("Total number of tweets: %d" % aggregator.get_num())
Exemplo n.º 3
0
# 
# Jun Jen CHAN    (341759)
# Daniel TEH    (558424)
# Tou LEE    (656128)
# David MONROY    (610346)
# Jaime MARTINEZ    (642231)

import json, csv, sys
from tweetstore import TweetStore

# Python script for importing tweets from text file;
# Usage: importTweets.py *sourcefile *couchdbname

# Daniel Teh

filename = sys.argv[1]
storage = TweetStore(sys.argv[2])

i = 0
with open(filename, 'rb') as tweetfile:
    reader = csv.DictReader(tweetfile)
    for tweet in reader:
        decoded = json.loads(tweet['value'])
        try:
            storage.save_tweet(decoded)
        except:
            print("Tweet %d already exists.. skipping" % i)
        print("Importing tweet no. %d" % i)
        i+=1
print ("Inserted %d tweets" % i)
Exemplo n.º 4
0
## Tweet Harvester - Search API
##
## The twitter search api generally only allows for searches up to 1 week into the
## past when scraping for tweets.
## The decision to only use a single node for the Search API was because the city
## of Adelaide does not get enough volume to warrent several harvesters at once.
## The decision not to delve into single users timelines stems from the desire
## to keep data as unbiased.
##
## Daniel Teh
import tweepy, json
from tweetstore import TweetStore
import time
import datetime

storage = TweetStore('tweets_adelaide')


@classmethod
def parse(cls, api, raw):
    status = cls.first_parse(api, raw)
    setattr(status, 'json', json.dumps(raw))
    return status


tweepy.models.Status.first_parse = tweepy.models.Status.parse
tweepy.models.Status.parse = parse

## OAuth Keys
# Application Key
consumer_key = "kuPrFsWufTx87nCSc4HKJ6HVU"