예제 #1
0
 def on_success(self, tweet):
     if tweet:
         ptweet = ProcessedTweet()
         success = ptweet.process_raw(tweet, self.userinfo, requiregeo=True)
         if success:
             o.write(ptweet.__str__())
             if ptweet.inreply:
                 orep.write('/'.join([ptweet.user, ptweet.status_id]) +
                            ': ' + '/'.join(ptweet.inreply) + '\n')
     #check if max time has lapsed
     if time.time() - self.start_time > maxsecs:
         self.disconnect()
예제 #2
0
 def on_success(self, tweet):
     if tweet:
         ptweet = ProcessedTweet()
         success = ptweet.process_raw(tweet, self.userinfo, requiregeo=True)
         if success:
             o.write(ptweet.__str__())
             if ptweet.inreply:
                 orep.write('/'.join([ptweet.user, ptweet.status_id])
                            +': '
                            +'/'.join(ptweet.inreply)+'\n')
     #check if max time has lapsed
     if time.time()-self.start_time>maxsecs:
         self.disconnect()
예제 #3
0
    orep = codecs.open(args.basename+'.replies.txt', 'w', 'utf-8')
    
    o.write('userid\ttweet\tstatusid\tdate\tlat\tlon\n')
    
    userinfo = {}
    searcher = Twython(CONSUMER_KEY, CONSUMER_SECRET)
    until_id = 1e30
    for batch in range(args.numtweets/100):  
        results = searcher.search(q=args.searchterm, count=100, max_id=until_id-1, result_type='recent')  #can change result_type to popular or mixed
        print "Searching until status", until_id

        if len(results['statuses'])==1:  #usually repeat after data runs out
            break
        
        for tweet in results['statuses']:
            ptweet = ProcessedTweet()
            success = ptweet.process_raw(tweet, userinfo, requiregeo = False, lang = args.lang, requireword = args.searchterm) 
            if success:
                o.write(ptweet.__str__())
                if ptweet.inreply:
                    orep.write('/'.join([ptweet.user, ptweet.status_id])
                               +': '
                               +'/'.join(ptweet.inreply)+'\n')
                until_id = min(until_id, int(ptweet.status_id))

        time.sleep(5) #throttling
    
    orep.close()
    o.close()
    
    write_dict_tsv(userinfo, args.basename+'.userinfo.tsv')    
예제 #4
0
    CONSUMER_KEY = "JksOBh39nyd95jagJQTZ8Q"
    CONSUMER_SECRET = "kx87N1Ge8iWuzwcWUH55PhUDOFCqBju6UqUtroYFo"

    word = sys.argv[1]
    n = int(sys.argv[2])
    
    o = codecs.open(word+'.statuses.tsv', 'w', 'utf-8')
    orep = codecs.open(word+'.replies.txt', 'w', 'utf-8')
    
    userinfo = {}
    searcher = Twython(CONSUMER_KEY, CONSUMER_SECRET, OAUTH_TOKEN, OAUTH_SECRET)
    until_id = 1e30
    for batch in range(n):  #at most n*100 tweets
        results = searcher.search(q=word.replace('_', ' OR '), count=100, max_id=until_id-1)
        for tweet in results['statuses']:
            ptweet = ProcessedTweet()
            success = ptweet.process_raw(tweet, userinfo, requiregeo = False)
            if success:
                o.write(ptweet.__str__())
                if ptweet.inreply:
                    orep.write('/'.join([ptweet.user, ptweet.status_id])
                               +': '
                               +'/'.join(ptweet.inreply)+'\n')
                until_id = min(until_id, int(ptweet.status_id))
        print "Searching until status", until_id
        time.sleep(5) #throttling
    
    oj = open(word+'.userinfo.json', 'w')
    json.dump(userinfo, oj)
    oj.close()