Exemplo n.º 1
0
def get_tweets(keyword, maxtweets=10):

    tweets = []
    try:
        searchOrder = TwitterSearchOrder()
        searchOrder.setKeywords([keyword])
        searchOrder.setLanguage('en')
        searchOrder.setCount(maxtweets)  # only return 10 pages
        searchOrder.setIncludeEntities(False)
        # and don't give us all those entity information
        # Now let's create a Twitter Search API Object here
        # complete these by copying from your Twitter Application
        # from Twitter Developer Site
        ts = TwitterSearch(consumer_key=CONSUMER_KEY,
                           consumer_secret=CONSUMER_SECRET,
                           access_token=ACCESS_TOKEN,
                           access_token_secret=ACCESS_TOKEN_SECRET)

        for tweet in ts.searchTweetsIterable(searchOrder):
            #print( tweet['text'] )
            tweets.append(tweet['text'])

    except Exception as e:
        print "Error in retrieving tweets !!\n"
        print(e)

    return tweets
Exemplo n.º 2
0
    def search(self):
        done = False
        try:

            tso = TwitterSearchOrder()  # create a TwitterSearchOrder object
            tso.setKeywords(self.keywords)  # let's define all words we would like to have a look for
            tso.setLanguage('en')  # we want to see English tweets only
            tso.setCount(100)  # give 7 results per page
            tso.setIncludeEntities(False)  # and don't give us all those entity information
            tso.setGeocode(53.3333328, -8.0, 300, True)
            tso.url
            # it's about time to create a TwitterSearch object with our secret tokens
            ts = TwitterSearch(
                consumer_key=self.config.get('twitter_keys', 'consumer_key'),
                consumer_secret=self.config.get('twitter_keys', 'consumer_secret'),
                access_token=self.config.get('twitter_keys', 'access_token'),
                access_token_secret=self.config.get('twitter_keys', 'access_token_secret')
            )

            count = 0
            for tweet in ts.searchTweetsIterable(tso):  # save to db
                count += 1
                self.io.write_tweet(tweet)

            print 'Search complete.. flushed %d tweets into db.'%count
        except TwitterSearchException as e:  # take care of all those ugly errors if there are some
            print 'haha'
            print(e)
Exemplo n.º 3
0
def search_twitter(credentials,keywords):
    """Performs a search against the twitter search API.

        @param dict credentials     The auth credentials
        @param list keywords        The list of nkeywords to search

        @returns TwitterSearch.searchTweetsIterable()   The search results

    """

    tso = TwitterSearchOrder() # create a TwitterSearchOrder object
    tso.setKeywords(keywords) # let's define all words we would like to have a look for
    #tso.setLanguage('de') # we want to see German tweets only
    tso.setCount(100) # please dear Mr Twitter, only give us 7 results per page
    tso.setIncludeEntities(False) # and don't give us all those entity information
    #tso.setUntil(<datetime.date>)
    print tso.createSearchURL()
    # it's about time to create a TwitterSearch object with our secret tokens
    ts = TwitterSearch(
        consumer_key = credentials["tw_consumer_key"],
        consumer_secret = credentials["tw_consumer_secret"],
        access_token = credentials["tw_access_key"],
        access_token_secret = credentials["tw_access_secret"]
     )

    # for tweet in ts.searchTweetsIterable(tso):
    #     print( '@%s tweeted: %s' % ( tweet['user']['screen_name'], tweet['text'] ) )
    return ts.searchTweetsIterable(tso)
Exemplo n.º 4
0
def fetch_tweets(search_request):
    """
    fetches tweets from Twitter API extracts urls and updates db
    """
    try:
        tso = TwitterSearchOrder()  # create a TwitterSearchOrder object
        tso.setKeywords([search_request])  # define search request
        tso.setCount(settings.tweets_per_page)  # only results_per_page
        tso.setIncludeEntities(True)  # give us entity information

        # create a TwitterSearch object with our secret tokens
        ts = TwitterSearch(
            consumer_key=twitter.TWITTER_CONSUMER_KEY,
            consumer_secret=twitter.TWITTER_CONSUMER_SECRET,
            access_token=twitter.TWITTER_ACCESS_TOKEN,
            access_token_secret=twitter.TWITTER_ACCESS_TOKEN_SECRET
        )

        ts.authenticate()  # user must authenticate first
        tweets = ts.searchTweetsIterable(tso)
        found_urls = extract_urls(tweets)
        search_keyword_object = SearchKeyWord()
        search_keyword_object.gifs = found_urls
        search_keyword_object.search_keyword = search_request
        search_keyword_object.updated_at = datetime.now()
        print(search_keyword_object)
        search_keyword_object.save()
        return found_urls

    except TwitterSearchException, e:  # to take care of errors
        message = e.message
Exemplo n.º 5
0
def fetch_tweets(search_request):
    """
    fetches tweets from Twitter API extracts urls and updates db
    """
    try:
        tso = TwitterSearchOrder()  # create a TwitterSearchOrder object
        tso.setKeywords([search_request])  # define search request
        tso.setCount(settings.tweets_per_page)  # only results_per_page
        tso.setIncludeEntities(True)  # give us entity information

        # create a TwitterSearch object with our secret tokens
        ts = TwitterSearch(
            consumer_key=twitter.TWITTER_CONSUMER_KEY,
            consumer_secret=twitter.TWITTER_CONSUMER_SECRET,
            access_token=twitter.TWITTER_ACCESS_TOKEN,
            access_token_secret=twitter.TWITTER_ACCESS_TOKEN_SECRET)

        ts.authenticate()  # user must authenticate first
        tweets = ts.searchTweetsIterable(tso)
        found_urls = extract_urls(tweets)
        search_keyword_object = SearchKeyWord()
        search_keyword_object.gifs = found_urls
        search_keyword_object.search_keyword = search_request
        search_keyword_object.updated_at = datetime.now()
        print(search_keyword_object)
        search_keyword_object.save()
        return found_urls

    except TwitterSearchException, e:  # to take care of errors
        message = e.message
Exemplo n.º 6
0
def twit_search(keywords):
  try:
    tso = TwitterSearchOrder() # create a TwitterSearchOrder object
    tso.setKeywords(keywords) # let's define all words we would like to have a look for
    tso.setLanguage('en') # we want to see English tweets only
    tso.setCount(7) # please dear Mr Twitter, only give us 1 result per page
    tso.setIncludeEntities(False) # and don't give us all those entity information

    # it's about time to create a TwitterSearch object with our secret tokens
    ts = TwitterSearch(
        consumer_key = 'asX13sgNL5fVbVfSwyaLCw',
        consumer_secret = 'Y0SkBfcxZ5Q4AVmmXEMCcWI5lfUD3JBdgtd1fioJwU',
        access_token = '956472907-NGjoV82C6UwGu4xXLod1R3SKsWG9hfCXntt8Smxr',
        access_token_secret = '98S3jvUx5TZQxHYfBcP971ow02mTzeyQUdILamHp3Oee1'
    )

    # for tweet in ts.searchTweetsIterable(tso): # this is where the fun actually starts :)
    #    return '@%s tweeted: %s' % ( tweet['user']['screen_name'], tweet['text'] )
    tweets = []
    base_url = "https://twitter.com"
    for tweet in ts.searchTweetsIterable(tso):
      tweets.append({"screen_name": tweet['user']['screen_name'],
        "text": tweet['text'],
        "full_name": tweet['user']['name'],
        "url": "/".join([base_url, tweet['user']['screen_name'], "status", tweet['id_str']]),
        "created_at": tweet['created_at']
        })
      if len(tweets) >= 6:
        break

    return tweets

  except TwitterSearchException as e: # take care of all those ugly errors if there are some
    print(e)
    return []
Exemplo n.º 7
0
def recent_tweets(term,amt): #takes in term and
    try:
        tso = TwitterSearchOrder() # create a TwitterSearchOrder object
        tso.setKeywords(term) # let's define all words we would like to have a look for
        tso.setLanguage('en') # we want to see German tweets only
        tso.setCount(7) # please dear Mr Twitter, only give us 7 results per page
        tso.setIncludeEntities(False) # and don't give us all those entity information

        # it's about time to create a TwitterSearch object with our secret tokens
        ts = TwitterSearch(
            consumer_key = 'anOyC9WPt8qP82BkKGt34A',
            consumer_secret = 'FzAFLwXEunP34fwu3VItB3zr1P8MTOg4URuNVEI1U',
            access_token = '307461472-FZDgkwOuqLnKXYUtUaJzyJYZpFp1Nhy4IrlBURz1',
            access_token_secret = 'hoiFrBIe85VbtyMbYcxrXjbFhqUF4a6Qjolw5qbKXc'
         )

        tweet_count = 0
        at_count = 0
        hash_count = 0
        for tweet in ts.searchTweetsIterable(tso):
            for char in tweet['text']:
                if char =="@":
                    at_count +=1
                if char == "#":
                    hash_count +=1

            tweet_count+=1
            #print( '@%s tweeted: %s' % ( tweet['user']['screen_name'], tweet['text'] ) )
            if tweet_count >=amt:
                break
        #print tweet_count, at_count, hash_count
        return tweet_count, at_count, hash_count
    except TwitterSearchException as e: # take care of all those ugly errors if there are some
        #print(e)
        print "Over-exerting Twittter!! Come back in a few, you bad, bad warrior."
Exemplo n.º 8
0
  def get_new_tweets(self, keywords: list) -> None:
    '''
    Use the TwitterSearch lib to fetch tweets that match the given keywords.
    Pass tweets to the _store method to update the database.
    '''
    tweets = []
    if self.DEBUG: print("Searching for tweets with {} as keywords.".format(keywords)) # DEBUG
    try:
      tso = TwitterSearchOrder()
      tso.setKeywords(keywords)
      tso.setLanguage('en')
      tso.setCount(1)
      tso.setIncludeEntities(False)

      ts = TwitterSearch(
          consumer_key = 'YOUR STUFF HERE',
          consumer_secret = 'YOUR STUFF HERE',
          access_token = 'YOUR STUFF HERE',
          access_token_secret = 'YOUR STUFF HERE'
        )
      ts.authenticate()
      for tweet in ts.searchTweetsIterable(tso):
        tweets.append(tweet)
    except TwitterSearchException as e:
      self.report_error(["TwitterSearchException",e])

    if self.DEBUG: print("Fetched {} new tweets with {} as keywords.".format(len(tweets),keywords)) # DEBUG
    self._store(tweets, keywords)
Exemplo n.º 9
0
def city_tweet():
    # can't seem to get setGeocode to work via API
    try:
        tso = TwitterSearchOrder()  # create a TwitterSearchOrder object
        tso.addKeyword("RT")
        # tso.setGeocode(41.8819,87.6278,30,'km')  # let's define all words we would like to have a look for
        tso.setLanguage("en")  # we want to see German tweets only
        tso.setCount(7)  # please dear Mr Twitter, only give us 7 results per page
        tso.setIncludeEntities(False)  # and don't give us all those entity information

        # it's about time to create a TwitterSearch object with our secret tokens
        ts = TwitterSearch(
            consumer_key="anOyC9WPt8qP82BkKGt34A",
            consumer_secret="FzAFLwXEunP34fwu3VItB3zr1P8MTOg4URuNVEI1U",
            access_token="307461472-FZDgkwOuqLnKXYUtUaJzyJYZpFp1Nhy4IrlBURz1",
            access_token_secret="hoiFrBIe85VbtyMbYcxrXjbFhqUF4a6Qjolw5qbKXc",
        )

        tweet_count = 0
        at_count = 0
        hash_count = 0
        for tweet in ts.searchTweetsIterable(tso):  # this is where the fun actually starts :)
            for char in tweet["text"]:
                if char == "@":
                    at_count += 1
                if char == "#":
                    hash_count += 1
            tweet_count += 1
            # print( '@%s tweeted: %s' % ( tweet['user']['screen_name'], tweet['text'] ) )
            if tweet_count > 10:
                break
        print tweet_count, at_count, hash_count
        return tweet_count, at_count, hash_count
    except TwitterSearchException as e:  # take care of all those ugly errors if there are some
        print (e)
Exemplo n.º 10
0
def execute_twitter():
    try:
        tso = TwitterSearchOrder() # create a TwitterSearchOrder object
        tso.setKeywords(['Tourismus']) # let's define all words we would like to have a look for
        tso.setLanguage('de') # we want to see German tweets only
        tso.setCount(2) # please dear Mr Twitter, only give us 7 results per page
        tso.setIncludeEntities(False) # and don't give us all those entity information
        #Set up the mysql connection
        conn.set_character_set('utf8')
        cursor = conn.cursor()
        cursor.execute('SET NAMES utf8;')
        cursor.execute('SET CHARACTER SET utf8;')
        cursor.execute('SET character_set_connection=utf8;')
        #Set up the mongo connection
        client = MongoClient()
        db = client.ktidashboard
        items = db.crawler
        stored = items.distinct('identifer')
    
    
        # it's about time to create a TwitterSearch object with our secret tokens
        ts = TwitterSearch(
            consumer_key = 'Z3rU3WMQnvg5xUsox7Rfg',
            consumer_secret = 'yQGMdqA9M25V5g2tsmg6GPdZuR9dr73chErWBx94Jk',
            access_token = '216641262-67CaLeYKWOPz54qObHHe9UESTlncdMyEP4zhl0bI',
            access_token_secret = 'H827lhCwUQka9TiMG7IwSAJkHVBC3f9DG78sy8uNA'
         )
    
        for tweet in ts.searchTweetsIterable(tso): # this is where the fun actually starts :)
            url = ''
            try:
                url = re.search("(?P<url>https?://[^\s]+)", tweet['text']).group("url")
            except:
                pass
            time = tweet['created_at']
            time = tweet['created_at'][4:] 
            time = time[:15] + time[-5:]
            fmt = "%b %d %H:%M:%S %Y"
            time = datetime.strptime(time, fmt).strftime('%Y-%m-%d %H:%M:%S')
            crawltime= datetime.now().strftime('%Y-%m-%d %H:%M:%S')
            indeti =  getIdentifier(tweet['text'],time)
            if (indeti not in stored):
                cursor.execute("""INSERT INTO webcrawler (url,site,author,texts,time,crawltime,source,location) VALUES (%s, %s, %s, %s,%s, %s, %s,%s)""" , (url,'Twitter',tweet['user']['screen_name'].encode('utf-8'),tweet['text'].encode('utf-8'),time,crawltime,'tweet','world'))
                conn.commit()
                tweet = {"source": "tweet", "author":tweet['user'],"site":"Twitter","texts": tweet['text'],"location":"welt", "time": time, "crawltime":crawltime, "identifer":indeti }
                items.insert(tweet)
                stored.append(indeti)
    
    
    except TwitterSearchException as e: # take care of all those ugly errors if there are some
        print(e)
Exemplo n.º 11
0
    def getTwitterMentions(self):
        try:
            print 'last id = %s' % self.status['twitter']['last_id']

            tso = TwitterSearchOrder()
            tso.setKeywords([self.twitterSearchTerm])
            tso.setCount(100)
            tso.setIncludeEntities(True)
            tso.setResultType('recent')
            if self.status['twitter']['last_id']:
                tso.setSinceID(self.status['twitter']['last_id'])

            ts = TwitterSearch(
                consumer_key=self.conf['twitter_app']['consumer_key'],
                consumer_secret=self.conf['twitter_app']['consumer_secret'],
                access_token=self.conf['twitter_app']['access_token'],
                access_token_secret=self.conf['twitter_app']['access_token_secret']
             )
            i = 1

            # this is where the fun actually starts
            for tweet in ts.searchTweetsIterable(tso):
                print "#%s.  #%s" % ( i , tweet['id'] )
                print '@%s tweeted: %s' % ( tweet['user']['screen_name'], tweet['text'] )
                if self.status['twitter']['last_id'] < tweet['id']:
                    self.status['twitter']['last_id'] = tweet['id']

                for u in tweet['entities']['urls']:
                    x = self.expandUrl(u['expanded_url'])
                    print x
                    data = self.readUrlDb(x)
                    data['tweets'][tweet['id']] = {
                        'id': tweet['id'],
                        'user_screen_name': tweet['user']['screen_name'],
                        'user_name': tweet['user']['name'],
                        'user_id': tweet['user']['id'],
                        'profile_image_url': tweet['user']['profile_image_url'],
                        'profile_image_local': self.cacheImage(tweet['user']['profile_image_url']),
                        'text': tweet['text'],
                        'created_at': tweet['created_at'],
                        }
                    self.writeUrlDb(data)
                    self.touchBucket3PostByURL(x)
                i += 1

        # take care of all those ugly errors if there are some
        except TwitterSearchException as e:
            print(e)

        self.writeInfoDb()
Exemplo n.º 12
0
def check_twitter(query):
    try:
        tso = TwitterSearchOrder() # create a TwitterSearchOrder object
        tso.setKeywords([query]) # let's define all words we would like to have a look for
        tso.setLanguage('en') # we want to see German tweets only
        #tso.setCount(1000000) # please dear Mr Twitter, only give us 7 results per page
        tso.setIncludeEntities(False) # and don't give us all those entity information

        # it's about time to create a TwitterSearch object with our secret tokens
        ts = TwitterSearch(
            consumer_key = 'A4yyJgy9yY0mcJDQn4LXhrjcz',
            consumer_secret = 'ID6luLGLOUCx9ADVl77IysKibEblDhuwS6sehQ3SUuEB3ZNsoW',
            access_token = '2689652840-K2dw8nIKu7VJHrW6snsOOeFZFiEGqd5wPAaLm9V',
            access_token_secret = 'oB0KjuzAv9bGSaPbDA0Ate7mXfnmhh94ff9x2EGQjcY0e'
         )

        now_time = delorean.parse(str(datetime.utcnow()))
        now_time_minute = now_time._dt.time().minute
        now_time_hour = now_time._dt.time().hour

        cont = True

        i = 0
        average_retweets = 0
        biggest_no_of_retweets = 0

        for tweet in ts.searchTweetsIterable(tso): # this is where the fun actually starts :)
            tweet_time_minute = delorean.parse(ts.getMetadata()['date'])._dt.time().minute
            tweet_time_hour = delorean.parse(ts.getMetadata()['date'])._dt.time().hour
            if (tweet_time_hour + 1 == now_time_hour) and ((now_time_minute - tweet_time_minute) <= 2):
                average_retweets += tweet['retweet_count']
                if tweet['retweet_count'] >= biggest_no_of_retweets:
                    biggest_no_of_retweets = tweet['retweet_count']
#                print( '@%s tweeted: %s' % ( tweet['user']['screen_name'], tweet['text'] ) )
                i +=1
#                print '.'
            else:
                break
	try:
	        average_retweets = average_retweets / i
		return 'The biggest number of retweets was ' + str(biggest_no_of_retweets) + ' and the average number of retweets was ' + str(average_retweets)
	except ZeroDivisionError:
		return 'No tweets for specified keyword'




    except TwitterSearchException as e: # take care of all those ugly errors if there are some
        print(e)
Exemplo n.º 13
0
def twit_search(keywords):
    try:
        tso = TwitterSearchOrder()  # create a TwitterSearchOrder object
        tso.setKeywords(
            keywords
        )  # let's define all words we would like to have a look for
        tso.setLanguage('en')  # we want to see English tweets only
        tso.setCount(
            7)  # please dear Mr Twitter, only give us 1 result per page
        tso.setIncludeEntities(
            False)  # and don't give us all those entity information

        # it's about time to create a TwitterSearch object with our secret tokens
        ts = TwitterSearch(
            consumer_key='asX13sgNL5fVbVfSwyaLCw',
            consumer_secret='Y0SkBfcxZ5Q4AVmmXEMCcWI5lfUD3JBdgtd1fioJwU',
            access_token='956472907-NGjoV82C6UwGu4xXLod1R3SKsWG9hfCXntt8Smxr',
            access_token_secret='98S3jvUx5TZQxHYfBcP971ow02mTzeyQUdILamHp3Oee1'
        )

        # for tweet in ts.searchTweetsIterable(tso): # this is where the fun actually starts :)
        #    return '@%s tweeted: %s' % ( tweet['user']['screen_name'], tweet['text'] )
        tweets = []
        base_url = "https://twitter.com"
        for tweet in ts.searchTweetsIterable(tso):
            tweets.append({
                "screen_name":
                tweet['user']['screen_name'],
                "text":
                tweet['text'],
                "full_name":
                tweet['user']['name'],
                "url":
                "/".join([
                    base_url, tweet['user']['screen_name'], "status",
                    tweet['id_str']
                ]),
                "created_at":
                tweet['created_at']
            })
            if len(tweets) >= 5:
                break

        return tweets

    except TwitterSearchException as e:  # take care of all those ugly errors if there are some
        print(e)
        return []
Exemplo n.º 14
0
def gatherTwitterData():
	inputVar = raw_input("Enter serach terms (seperated by space):")
	inputVar = re.sub(' +',' ', inputVar) #remove double spaces
	inputWords = inputVar.split(" ")

	fn = ""
	disp = ""
	for w in inputWords:
		if disp == "":
			disp = "'"+w+"'"
			fn = w
		else:
			disp = disp+", '"+w+"'"
			fn = fn + "_" + w

	print("You entered [" + disp+ "]")  

	inputVar = raw_input("Would you like to start fetching tweets? This can only be done once per hour! (y/n default is n):")
	inputVar = inputVar.lower()
	if inputVar == "y" or inputVar == "yes" or inputVar == "j" or inputVar == "ja":
		try:
			tso = TwitterSearchOrder() # create a TwitterSearchOrder object
			inputWords.append("-RT") #Filter retweets
			tso.setKeywords(inputWords) # let's define all words we would like to have a look for
			tso.setLanguage('en') # we want to see German tweets only
			#tso.setCount(noTweets) # please dear Mr Twitter, only give us 7 results per page
			tso.setIncludeEntities(False) # and don't give us all those entity information
			
			ts = TwitterSearch(consumer_key = 'LI7bi7oui3FFUXHCSGcw',consumer_secret = 'QCBmNKVSXvkt7ioU4TQaf6XVL9pKBifD8ch3zQvY',access_token = '2353654315-7LkOG9CFFUewoeIezPloEEY2vHmpE0Mo8vFhkkB',access_token_secret = 'XKyVTRLQB9jDTlXteQbsoULKTqdI6w79IP3HokVI9R1Iu')
			
			filename = "data/"+fn + '_raw_search_' + str(time.time()) +".txt"
			f = open(filename, 'w')

			for tweet in ts.searchTweetsIterable(tso): # this is where the fun actually starts :)
				text = tweet['user']['screen_name'] + "\t" + tweet['text']
				textList = text.splitlines()
				text = " ".join(textList)
				text = text + "\n"
				f.write(text.encode('utf8'))

				print( '@%s tweeted: %s' % ( tweet['user']['screen_name'], tweet['text'] ) )
			f.close()
			print "tweets saved to: " + filename
			return filename
		except TwitterSearchException as e: # take care of all those ugly errors if there are some
			print(e)
			return ""
Exemplo n.º 15
0
def twitterSearch(self,keyword,tweetCount):
    tweetList = []
    try:
        tso = TwitterSearchOrder()
        tso.setKeywords([keyword])
        #tso.setCount(7) # please dear Mr Twitter, only give us 7 results per page
        tso.setIncludeEntities(False)
        tso.setLanguage("en")

        '''ts = TwitterSearch(
            consumer_key = 'QEN0pRUDWBwz3eAnRGt4oA',
            consumer_secret = 'uPKpmdkUyEtHHYTG81hxrDe2kPybRZq8zfAnAvdrA',
            access_token = '109282672-lsdLzFwO2ybQ8f00qZG51JTcel7zTghEBa00R24c',
            access_token_secret = 'TE717i4SK6uuHUBmak8UXLaUijNH2uV1H5HbrCSnG5c'
            )'''

        ts = TwitterSearch(
            consumer_key = 'RqDsNdw2qc482AaTyAQg',
            consumer_secret = '971GEjUVvlzyYotOX44Q1wm2lx6T8QhAWcXaX2ZTz8',
            access_token = '97889597-zq5BNAWr1jrUkMb2G2iH0XGTvAfbIi8cRK2DejtxO',
            access_token_secret = 'eGCTcpvcOTdlUBfIFQ6f7IJIOrwxeYU9bX1nK27KJOw'
            )


        cnt=0
        totcnt=0
        var="Start:"
        for tweet in ts.searchTweetsIterable(tso):
           #print('@%s tweeted: %s' % (tweet['user']['screen_name'], tweet['text']))
            if cnt < tweetCount:
                totcnt=totcnt+1
                chk=' '.join(x for x in tweet['text'].split(' ') if x.find('http') is -1).strip()
                if chk not in var and chk[:10] not in var:
                    print chk
                    var=var+chk+"||"
                    tweetList.append(var.strip())
                    cnt=cnt+1
            else:
                break


    except TwitterSearchException as e: # take care of all those ugly errors if there are some
        print(e)

    return tweetList
Exemplo n.º 16
0
def search():

    try:
        locations=[]
        tso = TwitterSearchOrder() # create a TwitterSearchOrder object
        tso.setKeywords(['preventebola2014',]) # let's define all words we would like to have a look for
        tso.setCount(7) # please dear Mr Twitter, only give us 7 results per page
        tso.setIncludeEntities(False) # and don't give us all those entity information
        # it's about time to create a TwitterSearch object with our secret tokens
        ts = TwitterSearch(
            consumer_key = 'OLLpwcWlXf3vKPzr3qMxUJkUO',
            consumer_secret = 'zDBSRAZLrvmouX5XInvgu8eObEjAfxM7iDCreUqyEbjPMyze03',
            access_token = '1974376242-XNEKCtqSrkiHnUfzf4dFjFCf61fmbzyWXnN8QNy',
            access_token_secret = 'IMCNi88B7Jz0WSODVq1e5wszeCgR63VsyuiVUf9Ns0GFa'
         )
        locations=returnCoordinate(ts.searchTweetsIterable(tso))
        return locations
    except TwitterSearchException as e: # take care of all those ugly errors if there are some
        print(e)
Exemplo n.º 17
0
    def __init__(self, term, outfile):
        
        #from venv vars
        CONSUMER_KEY = os.environ['CONSUMER_KEY']
        CONSUMER_SECRET = os.environ['CONSUMER_SECRET']

        file = open(outfile, 'a')

        try:
            tso = TwitterSearchOrder() 
            tso.setKeywords([term])
            tso.setLanguage('en')
            tso.setCount(99) 
            tso.setIncludeEntities(False) # don't give us all those entity information

            MY_TWITTER_CREDS = os.path.expanduser('.app_credentials')

            if not os.path.exists(MY_TWITTER_CREDS):
                oauth_dance("emotiscrape", CONSUMER_KEY, CONSUMER_SECRET, MY_TWITTER_CREDS)

            oauth_token, oauth_secret = read_token_file(MY_TWITTER_CREDS)


            t = TwitterSearch(
                consumer_key = CONSUMER_KEY,
                consumer_secret = CONSUMER_SECRET,
                access_token = oauth_token,
                access_token_secret = oauth_secret
            )


            for tweet in t.searchTweetsIterable(tso): # this is where the fun actually starts :)
                cleanTweet = self.cleanup_tweet(tweet['text'], term).encode('utf-8')
                file.write(cleanTweet)
                #print cleanTweet
                #print tweet['created_at'] , '\t',  tweet['text']
                #print( '@%s tweeted: %s' % ( tweet['user']['screen_name'], tweet['text'] ) )
                print "pulled 99 tweets for %s, until %s [%s]" % (term, tweet['created_at'], tweet['id'])

        except TwitterSearchException as e: 
            # take care of all those ugly errors if there are some
            print(e)
Exemplo n.º 18
0
def searchTweets(query):
    try:
        tso = TwitterSearchOrder()
        tso.setKeywords([query])
        tso.setLanguage('en')
        tso.setCount(1)
        tso.setIncludeEntities(False)

        ts = TwitterSearch(
            consumer_key = consumer_key,
            consumer_secret = consumer_secret,
            access_token = access_token,
            access_token_secret = access_token_secret
         )

        for tweet in ts.searchTweetsIterable(tso):
            return tweet['text']

    except TwitterSearchException as e:
        return 'Error @Vireo no tweets loaded #fail #lame'
Exemplo n.º 19
0
def main():
    try:
        # create a TwitterSearchOrder object
        tso = TwitterSearchOrder()
        # search key word
        tso.setKeywords(['noodles'])
        # we want to see German tweets only
        tso.setLanguage('en')
        # look for 100 tweets per page
        tso.setCount(100)
        # and don't give us all those entity information(is the html)
        tso.setIncludeEntities(False)
        # keys to interact with the twitter API
        # my keys
        ts = TwitterSearch(
            consumer_key='fpTauqKqCRj4Gp8m9jb9WCilk',
            consumer_secret=
            'OrDd7NssqrvLgOXnzuDkGcS8UbTNoY1jFYJF0HS6daxELfyI2k',
            access_token='2822384568-jleRlhWap2Y7SMDW9y9tXkji95GHYDJPHK2IZ0b',
            access_token_secret='eVWGqNuLEk7xG1t47vLSkwBhJ6cQyNbeiZGShdRZXKF2A'
        )
        for tweet in ts.searchTweetsIterable(tso):
            # for a tweet points to user->entities->url->urls->(urls,expandes_url,)
            try:
                for sea in tweet['user']['entities']['url']['urls']:
                    # sea points to (urls,expandes_url...)
                    data = sea['expanded_url']
                    # if there is some data then write it to file
                    if data:
                        #print data
                        saveFile = open('extractedUri.txt', 'a')
                        saveFile.write(data)
                        saveFile.write('\n')
                        saveFile.close()
            # spent :( a night to resolve this error
            # not all tweets has expanded url so there is a key value excpetion we have to catch it .
            except KeyError:
                print 'error'
    # catch all the search exceptions if you dnt find a tweet
    except TwitterSearchException as e:
        print(e)
Exemplo n.º 20
0
class TwitterSearcher:
  
  def __init__(self):
    try:
        self.tso = TwitterSearchOrder() # create a TwitterSearchOrder object
        self.tso.setKeywords(['Ukraine']) # let's define all words we would like to have a look for
        self.tso.setLanguage('en')
        self.tso.setCount(100) # please dear Mr Twitter, only give us 7 results per page
        self.tso.setIncludeEntities(False) # and don't give us all those entity information

        # it's about time to create a TwitterSearch object with our secret tokens
        self.ts = TwitterSearch(
            consumer_key = 'ZewlOk302H9ydKF0KGcLJQGz8',
            consumer_secret = 'pGBybPshs2G1ncC6S3cYkAF5N2M2lG4x5uh7LHHt4yjdDTsClO',
            access_token = '12981092-z3ZP0PlQj42h9tZk2UUdtDmUqh5iNZsC9uQWfkOah',
            access_token_secret = 'd5wjPYGvFTAsTGGwekcKpQCXi8uthmzdkI77CPEm2e3MW'
         )
        

    except TwitterSearchException as e: # take care of all those ugly errors if there are some
        print(e)
        
  def getMetadata(self):
    return self.metadata
  
  def search(self, keyword, maxCount = 1000):
    tweets = []
    count = 0
    try:
      self.tso.setKeywords([keyword])
      for tweet in self.ts.searchTweetsIterable(self.tso): # this is where the fun actually starts :)
        if count > maxCount:
          break
        # print( '@%s tweeted: %s' % ( tweet['user']['screen_name'], tweet['text'] ) )
        tweets.append(tweet)
        count += 1
    except TwitterSearchException as e:
      print(e)
      
    self.metadata = self.ts.getMetadata()
    return tweets
Exemplo n.º 21
0
    def pegatweets(self, key):
            lista_tweets = [] #lista de tweets
            tso = TwitterSearchOrder() # criar um objeto TwitterSearchOrder
            tso.setKeywords([key]) # define o termo que gostariamos de pesquisar
            tso.setLanguage('pt') # aqui setamos a linguagem dos tweets
            tso.setCount(1) # numero de resultados por pagina

            # Tokens de acesso a API do Twitter
            ts = TwitterSearch(
                consumer_key = 'iaSmYN1LBd9mhMQgxBwFSw',
                consumer_secret = 'cnZsGPlyQqpllOPb9vVHFI0g8QI5CZ96XrsN5cgHw',
                access_token = '60440481-8XyGdX7GSFYjgzYlp7twaehGFlpN0BV7HGIKXvAad',
                access_token_secret = 'hTRTO2y7VTUn3x9eaCqyXABtD4VWVwQR8sHXbrX4QcFD4'
             )

            for tweet in ts.searchTweetsIterable(tso):
                processar = processamentoTexto()    #instancio a classe de processamento de texto
                tw = processar.remover_acentos(tweet['text'].encode('utf-8').strip()) #remove acentos passando como paramentro o tweet
                print( '@%s Twittou: %s' % ( tweet['user']['screen_name'], tweet['text'] ) )  #Mostra tweets
                lista_tweets.append(tw) #adiciona tweet na lista de tweets
            return lista_tweets #retorna a lista de tweets processados
Exemplo n.º 22
0
def main():
    try:
        # create a TwitterSearchOrder object
        tso = TwitterSearchOrder()
        # search key word 
        tso.setKeywords(['noodles']) 
        # we want to see German tweets only
        tso.setLanguage('en') 
        # look for 100 tweets per page
        tso.setCount(100) 
        # and don't give us all those entity information(is the html)
        tso.setIncludeEntities(False)         
        # keys to interact with the twitter API
        # my keys
        ts = TwitterSearch(
            consumer_key = 'fpTauqKqCRj4Gp8m9jb9WCilk',
            consumer_secret = 'OrDd7NssqrvLgOXnzuDkGcS8UbTNoY1jFYJF0HS6daxELfyI2k',
            access_token = '2822384568-jleRlhWap2Y7SMDW9y9tXkji95GHYDJPHK2IZ0b',
            access_token_secret = 'eVWGqNuLEk7xG1t47vLSkwBhJ6cQyNbeiZGShdRZXKF2A'
         )        
        for tweet in ts.searchTweetsIterable(tso): 
            # for a tweet points to user->entities->url->urls->(urls,expandes_url,)
            try :
                for sea in tweet['user']['entities']['url']['urls']:
                    # sea points to (urls,expandes_url...)
                    data = sea['expanded_url']
                    # if there is some data then write it to file
                    if data:
                        #print data
                        saveFile= open('extractedUri.txt','a')
                        saveFile.write(data)
                        saveFile.write('\n')
                        saveFile.close()
            # spent :( a night to resolve this error 
            # not all tweets has expanded url so there is a key value excpetion we have to catch it .                         
            except KeyError :
                print 'error'
    # catch all the search exceptions if you dnt find a tweet         
    except TwitterSearchException as e: 
        print(e)    
Exemplo n.º 23
0
def get_photos(search_terms):
    try:
        tso = TwitterSearchOrder()  # create a TwitterSearchOrder object
        tso.setKeywords(
            search_terms
        )  # let's define all words we would like to have a look for
        tso.setLanguage('en')  # we want to see German tweets only
        tso.setCount(
            40)  # please dear Mr Twitter, only give us 7 results per page
        tso.setIncludeEntities(
            True)  # and don't give us all those entity information

        # it's about time to create a TwitterSearch object with our secret tokens
        ts = TwitterSearch(consumer_key='CONSUMER_KEY',
                           consumer_secret='CONSUME_SECRET',
                           access_token='ACCESS_TOKEN',
                           access_token_secret='ACCESS_TOKEN_SECRET')

        total = 0

        list_of_media = []

        for tweet in ts.searchTweetsIterable(
                tso):  # this is where the fun actually starts :)
            total += 1
            try:
                ent = tweet[u'entities']
                if u'media' in ent:
                    media = ent[u'media']
                    photo_url = media[0]['media_url']
                    list_of_media.append(photo_url)
            except:
                pass

        print total

        return json.dumps(list_of_media)

    except TwitterSearchException as e:  # take care of all those ugly errors if there are some
        print(e)
Exemplo n.º 24
0
def getTweetsForKeyword(keyword, last_id=None):
    """
    Get the (recent) tweets for a given keyword
    :param keyword: the query keyword
    :return: a list of tweets. List is empty if an error occurs
    """
    tweet_list = []

    try:
        print '*** Searching tweets for keyword:', keyword, ' ...'
        tso = TwitterSearchOrder()
        tso.setKeywords([keyword])
        tso.setLanguage('en')
        tso.setResultType('recent')
        tso.setCount(100)
        tso.setIncludeEntities(True)

        if last_id is not None:
            tso.setSinceID(last_id)

        ts = TwitterSearch(
            consumer_key=params.CONSUMER_KEY,
            consumer_secret=params.CONSUMER_SECRET,
            access_token=params.ACCESS_TOKEN,
            access_token_secret=params.ACCESS_TOKEN_SECRET
        )

        ts.authenticate()

        counter = 0

        for tweet in ts.searchTweetsIterable(tso):
            counter += 1
            tweet_list.append(tweet)
        print '*** Found a total of %i tweets for keyword:' % counter, keyword
        return tweet_list

    except TwitterSearchException, e:
        print "[ERROR]", e.message
        return tweet_list
Exemplo n.º 25
0
def search_tweets(query):
    try:
        tso = TwitterSearchOrder()
        tso.setKeywords([query])
        tso.setLanguage('en')
        tso.setCount(100)
        tso.setIncludeEntities(False)

        ts = TwitterSearch(
                consumer_key = consumer_key,
                consumer_secret = consumer_secret,
                access_token = access_token,
                access_token_secret = access_token_secret
                )

        tweets = []
        for tweet in ts.searchTweetsIterable(tso):
            tweets.append(tweet['text'])
        return tweets

    except TwitterSearchException as e:
        return []
Exemplo n.º 26
0
def searchTwitter(pThisTerm):
 try:
    # create a TwitterSearchOrder object
    tso = TwitterSearchOrder()
    # Define all the keywords which must be passed URL encoded.
    # Only return/filter tweets that contain links
    tso.setKeywords(["filter%3Alinks",pThisTerm]) 
    # we want to see English tweets only'
    tso.setLanguage('en')
    # maximum number of tweets to return
    tso.setCount(100)
    # include the entity information
    tso.setIncludeEntities(True)
    #tso.setResultType('recent')

    # create a TwitterSearch object with my secret tokens (@CorrenMcCoy)
    ts = TwitterSearch(
        consumer_key = 'LrA1DdH1QJ5cfS8gGaWp0A',
        consumer_secret = '9AX14EQBLjRjJM4ZHt2kNf0I4G77sKsYX1bEXQCW8',
        access_token = '1862092890-FrKbhD7ngeJtTZFZwf2SMjOPwgsCToq2A451iWi',
        access_token_secret = 'AdMQmyfaxollI596G82FBipfSMhagv6hjlNKoLYjeg8'
     )

    # Iterate over the tweet entities which are in a nested dictionary
    tweetFile = codecs.open('C:/Python27/myFiles/tweetFile.txt','a','utf-8')
    for tweet in ts.searchTweetsIterable(tso): 
        #print( '@%s tweeted: %s' % ( tweet['user']['screen_name'], tweet['text'] ) )
        if tweet['user']['url'] is not None:
            print (tweet['user']['url'])
            tweetFile.write(tweet['user']['url']+'\n');

    tweetFile.close()

 # Error handling. Close file and terminate
 except TwitterSearchException as e:
    tweetFile.close()
    print(e)
    exit()
Exemplo n.º 27
0
def index(request):
	try:
		tso = TwitterSearchOrder() # create a TwitterSearchOrder object
		tso.setKeywords(['basketball']) # let's define all words we would like to have a look for
		tso.setLanguage('en') # we want to see English tweets only
		tso.setCount(7) # please dear Mr Twitter, only give us 7 results per page
		tso.setIncludeEntities(False) # and don't give us all those entity information

    # it's about time to create a TwitterSearch object with our secret tokens
		ts = TwitterSearch(
			consumer_key = 'rv01lKOzYhKSDdoNv5PQ',
			consumer_secret = '2OmHBCuQPYyXWtec2yI1NDykzaDvz4f9oTpVay8QDoI',
			access_token = '47210904-9aMkHRVrv4kHmEb3WG7Tph3L7JRpjLT2Q0hX3J9uw',
			access_token_secret = 'TzdKpq2EDdhPZZF2Z8tMxbvqlFnowySmUvCNvJONqgbh0'
		)


		for tweet in ts.searchTweetsIterable(tso): # this is where the fun actually starts :)
			# return HttpResponse( '@%s tweeted: %s' % ( tweet['user']['screen_name'], tweet['text'] ) )
			return HttpResponse('%s' % (tweet['entities']['hashtags']))

	except TwitterSearchException as e: # take care of all those ugly errors if there are some
		return HttpResponse(e)
Exemplo n.º 28
0
def scrape():
    try:
        tso = TwitterSearchOrder() # TwitterSearchOrder object
        keyword = raw_input('Enter disease to query: ')
        tso.setKeywords([keyword]) # word queries (command line)
        tso.setLanguage('en') # English
        tso.setIncludeEntities(False) # ignore entity information
        tso.setCount(100) #page count

        ts = TwitterSearch(
            consumer_key = 'GJ4i8BuOTyvHw6HwLnF2kyvNI',
            consumer_secret = 'Zfks3yXnJ9f2rVCHaQJ9mdbjsQwECgEHihck5DC0aSyO0ibV9E',
            access_token = '286429860-Y9DQdFcnOhv9Dyzi16HTBs0h0E9g9ArcLTvWKZfr',
            access_token_secret = 'RT3f3iAP8Pdn5xkYWyOfOXzvrWvOwbdWDGMsY4F0yBXap'
         )
        counter = 0
        for tweet in ts.searchTweetsIterable(tso):
            format_row(tweet['user']['screen_name'], tweet['text'])
            counter += 1
            if counter % 100 == 0:
                print counter, " tweets written"
    except TwitterSearchException as e:
        print(e)
Exemplo n.º 29
0
  def getTweetsText(self):
    wordList = [];
    if self.cName is '':
      wordList.append('Airtel')
    else:
      wordList.append(self.cName)

    # it's about time to create a TwitterSearch object with our secret tokens
    tso = TwitterSearchOrder() # create a TwitterSearchOrder object
    tso.setKeywords(wordList)
    tso.setLanguage('en')
    tso.setCount(10) # only give us 10 results per page
    tso.setIncludeEntities(False)
    ts = TwitterSearch(
      consumer_key = self.api_key,
      consumer_secret = self.api_secret,
      access_token = self.access_token_key,
      access_token_secret = self.access_token_secret
      )
    for tweet in ts.searchTweetsIterable(tso):
      self.textTweet.append('@%s tweeted: %s' % ( tweet['user']['screen_name'], tweet['text'] ) )

    return self.textTweet
Exemplo n.º 30
0
def get_photos(search_terms):
	try:
		tso = TwitterSearchOrder() # create a TwitterSearchOrder object
		tso.setKeywords(search_terms) # let's define all words we would like to have a look for
		tso.setLanguage('en') # we want to see German tweets only
		tso.setCount(40) # please dear Mr Twitter, only give us 7 results per page
		tso.setIncludeEntities(True) # and don't give us all those entity information

		# it's about time to create a TwitterSearch object with our secret tokens
		ts = TwitterSearch(
			consumer_key = 'b3RrA25ftevuRjUAtsiOv1ZKG',
			consumer_secret = '80rD93QtkiQphVR6ueJ3PPtaieoixOTb0EJWQ6YQh9eDMCVx9z',
			access_token = '16329367-vBuNh4kcRk1ID6S3sANokf0HDYRoAzBu9mVdVA5CU',
			access_token_secret = '9wlkuq4YeCS8QP61huKIG7fy10Gi6SrKe6OhkBm9R9hpX'
		 )

		total = 0

		list_of_media = []

		for tweet in ts.searchTweetsIterable(tso): # this is where the fun actually starts :)
			total += 1
			try:
				ent = tweet[u'entities']
				if u'media' in ent:
					media = ent[u'media']
					photo_url = media[0]['media_url']
					list_of_media.append(photo_url)
			except:
				pass

		print total
		
		return json.dumps(list_of_media)

	except TwitterSearchException as e: # take care of all those ugly errors if there are some
		print(e)
Exemplo n.º 31
0
def get_photos(search_terms):
	try:
		tso = TwitterSearchOrder() # create a TwitterSearchOrder object
		tso.setKeywords(search_terms) # let's define all words we would like to have a look for
		tso.setLanguage('en') # we want to see German tweets only
		tso.setCount(40) # please dear Mr Twitter, only give us 7 results per page
		tso.setIncludeEntities(True) # and don't give us all those entity information

		# it's about time to create a TwitterSearch object with our secret tokens
		ts = TwitterSearch(
			consumer_key = 'CONSUMER_KEY',
			consumer_secret = 'CONSUME_SECRET',
			access_token = 'ACCESS_TOKEN',
			access_token_secret = 'ACCESS_TOKEN_SECRET'
		 )

		total = 0

		list_of_media = []

		for tweet in ts.searchTweetsIterable(tso): # this is where the fun actually starts :)
			total += 1
			try:
				ent = tweet[u'entities']
				if u'media' in ent:
					media = ent[u'media']
					photo_url = media[0]['media_url']
					list_of_media.append(photo_url)
			except:
				pass

		print total
		
		return json.dumps(list_of_media)

	except TwitterSearchException as e: # take care of all those ugly errors if there are some
		print(e)
Exemplo n.º 32
0
def getTweets(keyword):
    sizeLimit = 1000
    curSize = 0
    tweetList = []
    
    try:
        tso = TwitterSearchOrder() 
        tso.setKeywords([keyword]) 
        tso.setLanguage("en") 
        tso.setCount(100) 
        tso.setIncludeEntities(False) 
        
        ts = TwitterSearch(
            consumer_key = 'L70HW9enbuZU16KUGVLWXQ',
            consumer_secret = 'C0bEJVSBlM5MK3wtjUMdfNEW1N7WUivHkoWCI8icNA0',
            access_token = '803704459-RyWDnsKaMUYz3ciF6JgMAyViRCm5fKyULQxKLsRD',
            access_token_secret = 'z2XWKWkvjZTv7eDUqnKu53aDY6ZwAisQIIxOKxz42p0wi'
        )
        
        for tweet in ts.searchTweetsIterable(tso): 
            tweetsSize = ts.getStatistics()["tweets"]
            curSize += tweetsSize 
            tweetTime = time.strftime('%Y-%m-%d %H:%M:%S', time.strptime(tweet['created_at'],'%a %b %d %H:%M:%S +0000 %Y'))
            polarity = bayesClassifier.classifyTweet(tweet["text"])
            
            tweetEntity = Tweet(tweet["user"]["screen_name"], tweet["text"], tweetTime, polarity)
            tweetList.append(tweetEntity)
            
            polarity = not(polarity)
            
            if curSize > sizeLimit:
                break
        return json.dumps(tweetList, default = encodeTweet)        

    except TwitterSearchException as e: 
        print(e)
def get_tweets(keyword, maxtweets=10):

    tweets = []
    try:
        searchOrder = TwitterSearchOrder()
        searchOrder.setKeywords([keyword])
        searchOrder.setLanguage('en')
        searchOrder.setCount(maxtweets) # only return 10 pages
        searchOrder.setIncludeEntities(False)
        # and don't give us all those entity information
        # Now let's create a Twitter Search API Object here
        # complete these by copying from your Twitter Application
        # from Twitter Developer Site
        ts = TwitterSearch(consumer_key = CONSUMER_KEY,consumer_secret = CONSUMER_SECRET,access_token = ACCESS_TOKEN,access_token_secret = ACCESS_TOKEN_SECRET)

        for tweet in ts.searchTweetsIterable(searchOrder):
            #print( tweet['text'] ) 
            tweets.append(tweet['text'])

    except Exception as e:
        print "Error in retrieving tweets !!\n"
        print (e)

    return tweets
Exemplo n.º 34
0
def scrape():
    try:
        tso = TwitterSearchOrder()  # TwitterSearchOrder object
        keyword = raw_input('Enter disease to query: ')
        tso.setKeywords([keyword])  # word queries (command line)
        tso.setLanguage('en')  # English
        tso.setIncludeEntities(False)  # ignore entity information
        tso.setCount(100)  #page count

        ts = TwitterSearch(
            consumer_key='GJ4i8BuOTyvHw6HwLnF2kyvNI',
            consumer_secret=
            'Zfks3yXnJ9f2rVCHaQJ9mdbjsQwECgEHihck5DC0aSyO0ibV9E',
            access_token='286429860-Y9DQdFcnOhv9Dyzi16HTBs0h0E9g9ArcLTvWKZfr',
            access_token_secret='RT3f3iAP8Pdn5xkYWyOfOXzvrWvOwbdWDGMsY4F0yBXap'
        )
        counter = 0
        for tweet in ts.searchTweetsIterable(tso):
            format_row(tweet['user']['screen_name'], tweet['text'])
            counter += 1
            if counter % 100 == 0:
                print counter, " tweets written"
    except TwitterSearchException as e:
        print(e)
Exemplo n.º 35
0
 tso.setKeywords(KEY_WORDS) # let's define all words we would like to have a look for
 #tso.setLanguage('es') # we want to see Spanish tweets only
 tso.setCount(100) # please dear Mr Twitter, only give us 7 results per page
 tso.setIncludeEntities(True)
 if UNTIL:
     tso.setUntil(UNTIL)
 # it's about time to create a TwitterSearch object with our secret tokens
 ts = TwitterSearch(consumer_key = CONSUMER_KEY,consumer_secret = CONSUMER_SECRET,access_token = ACCESS_TOKEN,access_token_secret = ACCESS_TOKEN_SECRET,verify = True)
 #ts.authenticate()
 count = 0
 # connect to mongo
 connection = pymongo.Connection("mongodb://{0}".format(DB_URL), safe=True)
 db=connection.twitter
 users = db.users
 new_users = 0
 response = ts.searchTweetsIterable(tso)
 for tweet in response: # this is where the fun actually starts :)
     limit_remaining = ts.getMetadata()['x-rate-limit-remaining']
     limit_reset = ts.getMetadata()['x-rate-limit-reset']
     limit = ts.getMetadata()['x-rate-limit-limit']
     sleep = needs_sleep(limit_remaining,limit_reset)
     if sleep:
         print 'Sleeping {0} seconds to avoid reaching rate limit.'.format(sleep)
         time.sleep(sleep)
     #tweets.insert(tweet)
     if users.find({"screen_name": tweet['user']['screen_name'], 'twitteranalytics_project_id':PROJECT_ID}).count() == 0:
         users.insert({"screen_name":tweet['user']['screen_name'],"processed":"no","created_at":datetime.datetime.utcnow(),
             "twitteranalytics_project_id": PROJECT_ID})
         new_users += 1
     print tweet['user']['screen_name'],tweet['created_at'],count
     count += 1
Exemplo n.º 36
0
    # it's about time to create a TwitterSearch object with our secret tokens
    CONSUMER_KEY = 'SKxOLVcsxlPN68V3g2hAA'
    CONSUMER_SECRET = 'jBc0MUUNebHiIEkkraM7IruUyoSY2OZZyZ7eW6qqYw'
    TOKEN_FILE = 'out/twitter.oauth'
    APP_NAME = ''
    (oauth_token, oauth_token_secret) = oauth_dance(APP_NAME, CONSUMER_KEY,
                                                    CONSUMER_SECRET)
    #print oauth_token
    ts = TwitterSearch(consumer_key=CONSUMER_KEY,
                       consumer_secret=CONSUMER_SECRET,
                       access_token=oauth_token,
                       access_token_secret=oauth_token_secret)
    tweets = []

    for tweet in ts.searchTweetsIterable(
            tso):  # this is where the fun actually starts :)
        #print tweet['text']
        txt = tweet['text']
        words = txt.split()
        #print txt
        #print words[0]
        if (words[0] != "RT"):
            if (i <= 100):

                tweets.append(tweet['text'])
                #print( '@%s tweeted: %s' % ( tweet['user']['screen_name'], tweet['text'] ) )
                i = i + 1
            else:
                break

    print 'Total Tweets:', len(tweets)
Exemplo n.º 37
0
def analysis_keyword(request, keyword):
    try:
        form = KeywordForm()
        format_tweets = []
        raw_tweets = []
        time_slot = 20
        pos_timeline = [0] * time_slot
        neg_timeline = [0] * time_slot
        time_timeline = []
        tso = TwitterSearchOrder()
        tso.setKeywords([keyword])
        tso.setLanguage('en')
        tso.setCount(100)
        tso.setIncludeEntities(False)
        count = 200
        i = 0
        start_time = datetime.max
        end_time = datetime.min

        ts = TwitterSearch(
            consumer_key='argHv5V9fa175ygapOHf1g',
            consumer_secret='pms9x6kFJ57WIz4SASnJQ6sMioCugsK2dnuMaD9CNo',
            access_token='167017116-jonEZIB9hyFH0waEsISJooIrat05RaZkDmFdCB41',
            access_token_secret='A9cCFgrHuRt2sgBhtyiWhmktFSot1SkdlVckkJ477ZpSi'
        )
        # fetch
        for tweet in ts.searchTweetsIterable(tso):
            text = tweet['text']
            user = tweet['user']['screen_name']
            created_at = tweet['created_at']
            raw_tweets.append([text, user, created_at])
            if i >= count - 1:
                break
            else:
                i += 1

        # tagging
        for tweet in raw_tweets:
            tag, pos_value, neg_value = tagger(tweet[0])
            if tag != 0:
                stime = time.strptime(tweet[2], "%a %b %d %H:%M:%S +0000 %Y")
                dt = datetime.fromtimestamp(mktime(stime))
                format_tweets.append(
                    [tweet[0], tweet[1], dt, tag, pos_value, neg_value])

        # statistics
        negative = 0
        for tweet in format_tweets:
            if tweet[3] == -1:
                negative += 1

        # generate timeline data
        for tweet in format_tweets:
            if tweet[2] < start_time:
                start_time = tweet[2]
            if tweet[2] > end_time:
                end_time = tweet[2]
        time_intvl = (end_time - start_time) / time_slot

        for tweet in format_tweets:
            slot = get_slot(time_intvl, tweet[2], start_time) - 1
            if tweet[3] == 1:
                pos_timeline[slot] += 1
            else:
                neg_timeline[slot] += -1

        # format final timeline data
        for i in range(time_slot):
            if i % 4 == 0:
                timestr = (start_time + i * time_intvl).strftime('%H:%M:%S')
            else:
                timestr = ''
            time_timeline.append([timestr, pos_timeline[i], neg_timeline[i]])

        template = loader.get_template('classifier/alys_result.html')
        context = RequestContext(
            request, {
                'format_tweets': format_tweets,
                'len': len(format_tweets),
                'neg': negative,
                'pos': len(format_tweets) - negative,
                'keyword': keyword,
                'timeline': time_timeline,
                'form': form,
            })
        return HttpResponse(template.render(context))
    except TwitterSearchException as e:
        template = loader.get_template('classifier/error.html')
        context = RequestContext(request, {
            'e_str': str(e),
        })
        return HttpResponse(template.render(context))
OAuth = TwitterSearch(
    consumer_key='EFBFU5OyPJ8eogw98nubFYqrv',
    consumer_secret='LH8NdCZcGi5fgWe9tWrbTdbnB5EdFs05XV03fG1x8guleeu0S5',
    access_token='2828646955-lGyZgUyrBoeoWguK9BzumA5tYKHmbchm9IOIcXB',
    access_token_secret='15OrYXdvW1BcaQZYHSqFfKcn37uDuSYwlOig5T7VwgHWr')

for ii in range(1, 100):
    try:
        congress_tweets = TwitterSearchOrder()
        congress_tweets.setKeywords(['#ca17', '#CA17'])
        congress_tweets.setLanguage('en')
        congress_tweets.setCount(100)
        congress_tweets.setIncludeEntities(False)
        if ii > 1:
            congress_tweets.setSinceID(since_id)
        json_list = list()
        for tweet in OAuth.searchTweetsIterable(congress_tweets):
            json_list.append(tweet)
            json_list.append(tweet)
        #probably not the best way to do it. Once the rate limit comes back up. Sort the list by ids
        #and then pick up the most recent id. for the since id
        since_id = int(json_list[-1]['id'])
        with open("tweets_data/test/%s.json" % ("hashtag_CA17_" + str(ii)),
                  "w") as output:
            json.dump(json_list, output)
    except TwitterSearchException as e:
        print e
        print "Something went wrong"
    time.sleep(2)
Exemplo n.º 39
0
    config_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)),
                              'config')
    config_file = os.path.join(config_dir, 'config.yml')
    with open(config_file) as f:
        config = yaml.load(f)

    return config


try:
    config = load_config()
    tso = TwitterSearchOrder()
    tso.setKeywords(['#thankyou'])
    # tso.setLanguage('de')
    tso.setCount(10)
    tso.setGeocode(52.5085378, 13.4557724, 20000)
    tso.setIncludeEntities(False)

    # it's about time to create a TwitterSearch object with our secret tokens
    ts = TwitterSearch(consumer_key=config['api_key'],
                       consumer_secret=config['api_secret'],
                       access_token=config['access_token'],
                       access_token_secret=config['access_token_secret'])

    for tweet in ts.searchTweetsIterable(tso):
        if tweet['coordinates']:
            print tweet['coordinates'], tweet['text']

except TwitterSearchException as e:
    print(e)
Exemplo n.º 40
0
def analysis_keyword(request, keyword):
    try:
        form = KeywordForm()
        format_tweets = []
        raw_tweets = []
        time_slot = 20
        pos_timeline = [0]*time_slot
        neg_timeline = [0]*time_slot
        time_timeline = []
        tso = TwitterSearchOrder()
        tso.setKeywords([keyword])
        tso.setLanguage('en')
        tso.setCount(100)
        tso.setIncludeEntities(False)
        count = 200
        i = 0
        start_time = datetime.max
        end_time = datetime.min

        ts = TwitterSearch(
            consumer_key = 'argHv5V9fa175ygapOHf1g',
            consumer_secret ='pms9x6kFJ57WIz4SASnJQ6sMioCugsK2dnuMaD9CNo',
            access_token = '167017116-jonEZIB9hyFH0waEsISJooIrat05RaZkDmFdCB41',
            access_token_secret = 'A9cCFgrHuRt2sgBhtyiWhmktFSot1SkdlVckkJ477ZpSi'
            )
        # fetch
        for tweet in ts.searchTweetsIterable(tso):
            text = tweet['text']
            user = tweet['user']['screen_name']
            created_at = tweet['created_at']
            raw_tweets.append([text, user, created_at])
            if i >= count-1:
                break
            else:
                i += 1

        # tagging
        for tweet in raw_tweets:
            tag, pos_value, neg_value = tagger(tweet[0])
            if tag != 0:
                stime = time.strptime(tweet[2], "%a %b %d %H:%M:%S +0000 %Y")
                dt = datetime.fromtimestamp(mktime(stime))
                format_tweets.append([tweet[0], tweet[1], dt, tag, pos_value, neg_value])

        # statistics
        negative = 0
        for tweet in format_tweets:
            if tweet[3] == -1:
                negative += 1

        # generate timeline data
        for tweet in format_tweets:
            if tweet[2] < start_time:
                start_time = tweet[2]
            if tweet[2] > end_time:
                end_time = tweet[2]
        time_intvl = (end_time - start_time) / time_slot

        for tweet in format_tweets:
            slot = get_slot(time_intvl, tweet[2], start_time) - 1
            if tweet[3] == 1:
                pos_timeline[slot] += 1
            else:
                neg_timeline[slot] += -1

        # format final timeline data
        for i in range(time_slot):
            if i % 4 == 0:
                timestr = (start_time+i*time_intvl).strftime('%H:%M:%S')
            else:
                timestr = ''
            time_timeline.append([timestr, pos_timeline[i], neg_timeline[i]])

        template = loader.get_template('classifier/alys_result.html')
        context = RequestContext(request, {
            'format_tweets':format_tweets,
            'len':len(format_tweets),
            'neg': negative,
            'pos': len(format_tweets) - negative,
            'keyword': keyword,
            'timeline': time_timeline,
            'form': form,
            })
        return HttpResponse(template.render(context))
    except TwitterSearchException as e:
        template = loader.get_template('classifier/error.html')
        context = RequestContext(request, {
            'e_str': str(e),
            })
        return HttpResponse(template.render(context))
Exemplo n.º 41
0
        consumer_secret=auth["API_SECRET"],
        access_token=auth["ACCESS_TOKEN"],
        access_token_secret=auth["ACCESS_TOKEN_SECRET"])

tso = TwitterSearchOrder() # create a TwitterSearchOrder object
tso.setKeywords(SEARCH_SPEC.all) # let's define all words we would like to have a look for
tso.setLanguage('en')
tso.setCount(RESULT_COUNT)
tso.setIncludeEntities(False)

user_fields = ['screen_name','utc_offset','description','location']
tweet_fields = ['created_at','text','retweet_count','favorite_count']

sheet = tablib.Dataset(headers=['id']+user_fields+tweet_fields)

search.searchTweetsIterable(tso)
queries = 0
puts(u"Fetching results:")
with indent(3):
    try:
        for tweet in search:
            if search.getStatistics()['queries'] != queries:
                puts('Fetched {0} tweets'.format(search.getStatistics()['tweets']))
                queries = search.getStatistics()['queries']
            data = [tweet['id_str']]
            for key in user_fields:
                data.append(tweet['user'][key])
            for key in tweet_fields:
                data.append(tweet[key])

            sheet.append(data)
Exemplo n.º 42
0
        consumer_secret = '',
        access_token = '',
        access_token_secret = ''
     )
    tso.setLanguage('en') # we want to see German tweets only
    tso.setCount(7) # please dear Mr Twitter, only give us 7 results per page
    tso.setIncludeEntities(False) # and don't give us all those entity information
    stateszip = [', CA',', CO',', CT',', GA',', IN',', KY',', ME',', MO',', NV',', NH',', NY',', OH',', VA',', WI']
    # it's about time to create a TwitterSearch object with our secret tokens



    for lineKeyWord in f:
        tso.setKeywords([lineKeyWord])
        print lineKeyWord
        for tweet in ts.searchTweetsIterable(tso): # this is where the fun actually starts :)
            count = 0
            for state in stateszip:
                if (tweet['user']['location'].find(state) > 0):
                    count = count + 1
                    if count > 0 :
                        dic = {}
     #   dic["name"] = tweet['user']['screen_name']
                        dic["location"] = tweet['user']['location']
     #   dic["time"]=  tweet['created_at'] 
                        dic["tweettext"] = tweet['text'] 
        #Test1.insert(dic)
                        print dic["location"].split(',')[1]+ ":" + dic["tweettext"]


Exemplo n.º 43
0
def gatherTwitterData():
    inputVar = raw_input("Enter serach terms (seperated by space):")
    inputVar = re.sub(' +', ' ', inputVar)  #remove double spaces
    inputWords = inputVar.split(" ")

    fn = ""
    disp = ""
    for w in inputWords:
        if disp == "":
            disp = "'" + w + "'"
            fn = w
        else:
            disp = disp + ", '" + w + "'"
            fn = fn + "_" + w

    print("You entered [" + disp + "]")

    inputVar = raw_input(
        "Would you like to start fetching tweets? This can only be done once per hour! (y/n default is n):"
    )
    inputVar = inputVar.lower()
    if inputVar == "y" or inputVar == "yes" or inputVar == "j" or inputVar == "ja":
        try:
            tso = TwitterSearchOrder()  # create a TwitterSearchOrder object
            inputWords.append("-RT")  #Filter retweets
            tso.setKeywords(
                inputWords
            )  # let's define all words we would like to have a look for
            tso.setLanguage('en')  # we want to see German tweets only
            #tso.setCount(noTweets) # please dear Mr Twitter, only give us 7 results per page
            tso.setIncludeEntities(
                False)  # and don't give us all those entity information

            ts = TwitterSearch(
                consumer_key='LI7bi7oui3FFUXHCSGcw',
                consumer_secret='QCBmNKVSXvkt7ioU4TQaf6XVL9pKBifD8ch3zQvY',
                access_token=
                '2353654315-7LkOG9CFFUewoeIezPloEEY2vHmpE0Mo8vFhkkB',
                access_token_secret=
                'XKyVTRLQB9jDTlXteQbsoULKTqdI6w79IP3HokVI9R1Iu')

            filename = "data/" + fn + '_raw_search_' + str(
                time.time()) + ".txt"
            f = open(filename, 'w')

            for tweet in ts.searchTweetsIterable(
                    tso):  # this is where the fun actually starts :)
                text = tweet['user']['screen_name'] + "\t" + tweet['text']
                textList = text.splitlines()
                text = " ".join(textList)
                text = text + "\n"
                f.write(text.encode('utf8'))

                print('@%s tweeted: %s' %
                      (tweet['user']['screen_name'], tweet['text']))
            f.close()
            print "tweets saved to: " + filename
            return filename
        except TwitterSearchException as e:  # take care of all those ugly errors if there are some
            print(e)
            return ""