def check_user_info(self):
     """
     Searches for twitter user profile and saves the data
     :return:
     """
     #connect to the api
     api = Tapi.TwitterAPI(cons_key, cons_secret, token, token_secret)
     #get request
     out = api.request('users/lookup', {'Name': '{}'.format(self.username), 'screen_name': '{}'.format(self.username)})
     if out is not None:
         for i in out:
             info = i
         following = info['friends_count']
         followers = info['followers_count']
         join_date = info['created_at']
         location = info['location']
         description = info['description']
         user_id = info['id_str']
         #create user
         self.user = TwitterUser(username=info['screen_name'], location=location, description=description,
                                 date_joined=join_date, following=following, followers=followers,id=user_id)
         # save this user
         TwitterUserDA().save_user(self.user)
     else:
         self.user = TwitterUser(username=self.username, location="", description="*manually created*",
                                 date_joined="Joined June 2009", following='', followers='')
Exemplo n.º 2
0
 def test_save_and_load_multiple_users(self):
     usr1 = TwitterUser('@SCOTUS', 'white house', 'president of the united states', 'June 2009', '14k', '120')
     usr2 = TwitterUser('@FLOTUS', 'white house', 'first lady of the united states', 'June 2010', '1k', '30')
     group = TwitterUserList('white house')
     group.add_user(usr1)
     group.add_user(usr2)
     DA = TwitterUserDA()
     DA.save_list(group)
     load_group = DA.load_all()
Exemplo n.º 3
0
 def test_save_and_load_user(self):
     usr = TwitterUser('@POTUS', 'white house', 'president of the united states', 'June 2009', '14k', '120')
     DA = TwitterUserDA()
     #save
     DA.save_user(usr)
     #load
     load_usr = DA.load_user(username='******')
     self.assertEqual(usr, load_usr)
Exemplo n.º 4
0
    def test_save_and_load_tweet(self):
        usr = TwitterUser('@POTUS', 'white house', 'president of the united states', 'June 2009', '14k', '120')
        twt = Tweet('Donald Trump', usr.username, usr.user_id, '2020-02-01T06:33:23.000Z', 'Rocketman!', 120, 3929, '',
                    300, 'image url', 'URL')

        DA = TweetDA()
        DA.save_tweet(twt)
        load_twt = DA.load_tweet(twt.tweet_id)
        self.assertEqual(twt, load_twt)
Exemplo n.º 5
0
 def test_save_and_load_multiple_tweets(self):
     usr = TwitterUser('@POTUS', 'white house', 'president of the united states', 'June 2009', '14k', '120')
     twt1 = Tweet('Donald Trump', usr.username, usr.user_id, '2020-02-01T06:33:23.000Z', 'Rocketman!', 120, 3929, '',
                  300, 'image url', 'URL')
     twt2 = Tweet('Donald Trump', usr.username, usr.user_id, '2020-03-01T08:33:23.000Z', 'NO!', 120, 3929, '',
                  300, 'image url', 'URL')
     twt_list = TweetList('donny')
     twt_list.add_tweet(twt1)
     twt_list.add_tweet(twt2)
     DA = TweetDA()
     DA.save_list(twt_list)
     load_list = DA.load_all(username='******')
     self.assertEqual(twt_list, load_list)
Exemplo n.º 6
0
        recipient_name = raw_input(
            '\nPlease enter username to monitor (or "." to finish):')
        if (recipient_name == '.'):
            repeat = False
            break

        # Get a basic profile for @person being monitored to ensure they exist.
        recipient_name = re.sub("^@", "", recipient_name)
        found, recipient_info = ts.get_user_info(recipient_name, None)

        if not found:
            print "I cannot locate user " + recipient_name + " , please retry\n"
            continue

        # Analyse the timeline for the monitored user to get a view of their language.
        recipient = TwitterUser.TwitterUser(recipient_info, verbose, unigram,
                                            bigram, trigram)
        ufound, timeline = ts.get_user_timeline(None, recipient.get_userid(),
                                                200)
        if ufound:
            for idx, post in enumerate(timeline):
                hack = None
                if (shoot):
                    recipient.analyse(idx, post)
        else:
            print "I cannot locate user " + recipient.get_name(
            ) + " , please retry\n"
            continue

        recipient.pprint_basics()

        # Now get their [historical] mentions
Exemplo n.º 7
0
       prompt = 'Please enter a user id (or "." to finish):'
       not_found = 'User id not found\n'
   else:
       prompt = 'Please enter a user name (or "." to finish):'
       not_found = 'User not found\n'

   while repeat:
      user_handle = raw_input( prompt)
      if (user_handle == '.'):
         repeat = False
      else:
         # Strip out any leading '@'
         if args.ids:
             found, user_info_json = ts.get_user_info( None, user_handle )
         else:
             user_handle = re.sub( "^@", "", user_handle )
             found, user_info_json = ts.get_user_info( user_handle, None )

         if found:
             user = TwitterUser.TwitterUser( user_info_json, args.verbose, botw )
             found, timeline = ts.get_user_timeline( None, user.get_userid(), 200 )
             if found:
                 for idx, post in enumerate(timeline):
                     user.analyse( idx, post )
             else:
                 print "   Timeline not accessible"
             user.pprint_basics()
         else:
             print not_found

Exemplo n.º 8
0
import TwitterUser

donnie = TwitterUser.TwitterUser(25073877)

print donnie.most_recent_tweet().tweet_text
donnietweets = donnie.get_available_tweets()

print(len(donnietweets))
Exemplo n.º 9
0
def for2016():
    # 2016
    realDonaldTrump = TwitterUser('realDonaldTrump', consumer_key,
                                  consumer_secret, access_key, access_secret)
    JebBush = TwitterUser('JebBush', consumer_key, consumer_secret, access_key,
                          access_secret)
    RealBenCarson = TwitterUser('RealBenCarson', consumer_key, consumer_secret,
                                access_key, access_secret)
    ChrisChristie = TwitterUser('ChrisChristie', consumer_key, consumer_secret,
                                access_key, access_secret)
    TedCruz = TwitterUser('TedCruz', consumer_key, consumer_secret, access_key,
                          access_secret)
    CarlyFiorina = TwitterUser('CarlyFiorina', consumer_key, consumer_secret,
                               access_key, access_secret)
    LindseyGraham = TwitterUser('LindseyGraham', consumer_key, consumer_secret,
                                access_key, access_secret)
    GovMikeHuckabee = TwitterUser('GovMikeHuckabee', consumer_key,
                                  consumer_secret, access_key, access_secret)
    JohnKasich = TwitterUser('JohnKasich', consumer_key, consumer_secret,
                             access_key, access_secret)
    RandPaul = TwitterUser('RandPaul', consumer_key, consumer_secret,
                           access_key, access_secret)
    GovernorPerry = TwitterUser('GovernorPerry', consumer_key, consumer_secret,
                                access_key, access_secret)
    MarcoRubio = TwitterUser('MarcoRubio', consumer_key, consumer_secret,
                             access_key, access_secret)
    RickSantorum = TwitterUser('RickSantorum', consumer_key, consumer_secret,
                               access_key, access_secret)
    ScottWalker = TwitterUser('ScottWalker', consumer_key, consumer_secret,
                              access_key, access_secret)

    cands_2016 = [
        realDonaldTrump, JebBush, RealBenCarson, ChrisChristie, TedCruz,
        CarlyFiorina, LindseyGraham, GovMikeHuckabee, JohnKasich, RandPaul,
        GovernorPerry, MarcoRubio, RickSantorum, ScottWalker
    ]

    api_pull = [
        RealBenCarson, ChrisChristie, CarlyFiorina, LindseyGraham,
        GovernorPerry, RickSantorum
    ]

    combine2016CSV(api_pull)
Exemplo n.º 10
0
def for2020():
    # 2020
    Joe_Biden = TwitterUser('JoeBiden', consumer_key, consumer_secret,
                            access_key, access_secret)
    Cory_Booker = TwitterUser('CoryBooker', consumer_key, consumer_secret,
                              access_key, access_secret)
    Pete_Buttigieg = TwitterUser('PeteButtigieg', consumer_key,
                                 consumer_secret, access_key, access_secret)
    Julian_Castro = TwitterUser('JulianCastro', consumer_key, consumer_secret,
                                access_key, access_secret)
    Tulsi_Gabbard = TwitterUser('TulsiGabbard', consumer_key, consumer_secret,
                                access_key, access_secret)
    Kirsten_Gillibrand = TwitterUser('SenGillibrand', consumer_key,
                                     consumer_secret, access_key,
                                     access_secret)
    Kamala_Harris = TwitterUser('KamalaHarris', consumer_key, consumer_secret,
                                access_key, access_secret)
    Amy_Klobuchar = TwitterUser('amyklobuchar', consumer_key, consumer_secret,
                                access_key, access_secret)
    Beto_ORourke = TwitterUser('BetoORourke', consumer_key, consumer_secret,
                               access_key, access_secret)
    Bernie_Sanders = TwitterUser('BernieSanders', consumer_key,
                                 consumer_secret, access_key, access_secret)
    Elizabeth_Warren = TwitterUser('ewarren', consumer_key, consumer_secret,
                                   access_key, access_secret)
    Andrew_Yang = TwitterUser('AndrewYang', consumer_key, consumer_secret,
                              access_key, access_secret)

    cands_2020 = [
        Joe_Biden, Cory_Booker, Pete_Buttigieg, Julian_Castro, Tulsi_Gabbard,
        Kirsten_Gillibrand, Kamala_Harris, Amy_Klobuchar, Beto_ORourke,
        Bernie_Sanders, Elizabeth_Warren, Andrew_Yang
    ]

    combinedUncleanedCSV(cands_2020)
Exemplo n.º 11
0
    def __init__(self, jsn_or_string,
                 do_tokenize=True,
                 do_parse_created_at=True,
                 store_json=False,
                 store_full_retweet_and_quote=True,
                 noise_tokens=set(),
                 do_parse_source=False,
                 **kwargs):
        """
        :param jsn_or_string: A json representation of a tweet, i.e. the output of json.loads(line) for a line of a file with
         tweets in it in json format, or a string that can be loaded with json.loads
        :param do_tokenize: whether or not to perform tokenization (which is very slow), default True
        :param do_parse_created_at: whether or not to parse the tweet date into a twitter datetime object
            (which is kind of slow), default True
        :param store_json: Whether or not to store the raw JSON for the tweet (weird but useful in some cases)
        :param store_full_retweet_and_quote: If true will store a Tweet() representation of retweets and quoted tweets
                if available in the tweet text. These tweets have the same arguments as those passed into the function
        :param noise_tokens: A list of noise tokens to ignore during tokenization (if you're tokenizing), default none
        :param kwargs: Any other keyword arguments to pass into the tokenization function
        :return:
        """

        if type(jsn_or_string) is dict:
            jsn = jsn_or_string
        else:
            jsn = json.loads(jsn_or_string)

        if 'delete' in jsn or ('text' not in jsn and 'full_text' not in jsn):
            # not actually a tweet
            self.id = None
            return

        ################ Basic Stuff #######################################
        # store raw json (yuck, but useful in some random cases)
        if store_json:
            self.raw_json = jsn
        else:
            self.raw_json = None

        # Get tweet id
        self.id = get_id(jsn)

        # Get User info
        self.user = None
        if 'user' in jsn:
            self.user = TwitterUser.TwitterUser(user_data_object=jsn['user'])

        # so I don't have to try to remember each time whether its an int or string
        self.id_int = int(self.id)
        self.id_str = str(self.id)

        # get new lang field in tweet
        self.lang = lookup(jsn, 'lang')

        # get overall retweet count (i.e. ignore whether this is an original tweet)
        self.retweet_count = jsn.get('retweet_count', 0)
        self.quote_count = lookup(jsn,"quote_count",0)
        self.reply_count = lookup(jsn,"reply_count",0)
        self.favorited_count = jsn.get('favorite_count', 0)

        #######################################################

        ################ Text Stuff #######################################
        # Basic replacement of html characters
        tweet_text = get_text_from_tweet_json(jsn)
        self.text = HTMLParser.HTMLParser().unescape(tweet_text)

        # TOKEN EXTRACTION
        if do_tokenize:
            self.tokens = Tokenize.extract_tokens_twokenize_and_regex(tweet_text,
                                                                      noise_tokens,
                                                                      **kwargs)
        else:
            self.tokens = None
        #######################################################


        ################ (Extended) Entities #######################################
        self.entities = get_ext_status_ents(jsn)
        self.hashtags = get_hashtags(jsn)
        self.mentions = get_mentions(jsn, True)
        self.mentions_sns = get_mentions(jsn, False)
        self.urls = [x['expanded_url'] for x in self.entities.get("urls")]
        self.media = lookup(jsn, 'extended_entities.media', []) + lookup(jsn,'entities.media',[])
        #######################################################

        ################ GEO #######################################
        self.geo = None
        self.coordinates = None
        self.latitude = None
        self.longitude = None
        if 'coordinates' in jsn and jsn['coordinates']:
            self.coordinates = jsn['coordinates']
            if self.coordinates['type'] == 'Point':
                self.longitude = self.coordinates['coordinates'][0]
                self.latitude = self.coordinates['coordinates'][1]
        elif 'geo' in jsn and jsn['geo']:
            self.geo = jsn['geo']
            if 'coordinates' in self.geo:
                self.longitude = self.coordinates['coordinates'][1]
                self.latitude = self.coordinates['coordinates'][0]

        self.place = lookup(jsn, 'place')

        self.geocode_info = get_geo_record_for_tweet(jsn)
        #######################################

        ################ Source Field #######################################
        self.source = jsn['source']
        if do_parse_source:
            source_info = BeautifulSoup(self.source, 'html.parser').a
            try:
                self.source_link = source_info.get("href")
                self.source_name = source_info.text
            except:
                self.source_link = None
                self.source_name = None
        #######################################################

        ################ Datetime stuff #######################################
        if do_parse_created_at:
            self.created_at = get_created_at(jsn)
            # weird junk date
            if self.created_at.year < 2000 or self.created_at.year > 2020:
                self.created_at = None

            if lookup(jsn, 'user.utc_offset', None):
                self.local_time = arrow.get(arrow.get(self.created_at).timestamp + jsn['user']['utc_offset'])
            else:
                self.local_time = None

        else:
            self.created_at = jsn.get('created_at', None)
        ####################################################################

        ################### Retweet Stuff###################################
        self.retweeted_tweet = None
        if 'retweeted_status' in jsn and store_full_retweet_and_quote:
            self.retweeted_tweet = Tweet(jsn['retweeted_status'],
                                         do_tokenize=do_tokenize,
                                         do_parse_created_at=do_parse_created_at,
                                         store_json=store_json,
                                         store_full_retweet_and_quote=store_full_retweet_and_quote,
                                         noise_tokens=noise_tokens,
                                         **kwargs)

        # TRY NOT TO USE THIS STUFF ANYMORE, DO EVERYTHING THROUGH THE RT OBJECT
        # See if this tweet was the user's own and it got retweeted
        self.retweeted_user_tweet_count = get_retweeted_count(jsn)
        self.retweeted = get_retweeted_user(jsn, return_id=(True and 'id' in jsn['user']))
        self.retweeted_sn = get_retweeted_user(jsn, return_id=False)
        ####################################################################

        ################### Reply Stuff###################################
        self.reply_to = get_reply_to(jsn, return_id=(True and 'id' in jsn['user']))
        # this is a better name but keeping both for backwards compatability
        self.reply_to_user_id = self.reply_to
        self.reply_to_sn = get_reply_to(jsn, return_id=False)
        # this is a better name but keeping both for backwards compatability
        self.reply_to_user_screenname = self.reply_to_sn
        if self.reply_to:
            self.in_reply_to_status_id = jsn.get('in_reply_to_status_id', None)
        ####################################################################

        ################### Quote Stuff ###################################
        # Quoted tweet stuff - we will only get a quote of a RT if we're storing the RT!
        self.is_retweet_of_quote = (jsn.get('is_quote_status', False) and
                                    'quoted_status' not in jsn and
                                    'retweeted_status' in jsn)
        self.quoted_tweet = None
        self.quoted_status_id = None
        # There are some inexplicable conditions in which it is a quote tweet but we don't get sent the quoted tweet info
        if 'quoted_status' in jsn:
            self.quoted_status_id = jsn.get('quoted_status_id', None)
            if 'quoted_status' in jsn and store_full_retweet_and_quote:
                self.quoted_tweet = Tweet(jsn['quoted_status'],
                                          do_tokenize=do_tokenize,
                                          do_parse_created_at=do_parse_created_at,
                                          store_json=store_json,
                                          store_full_retweet_and_quote=store_full_retweet_and_quote,
                                          noise_tokens=noise_tokens,
                                          **kwargs)
            else:
                self.quoted_tweet = None
        ####################################################################

        ################### All connected users stuff ###################################
        self.all_connected_users = set([x for x in get_all_associated_users_for_tweet(self) if x != self.id])
        self.all_connected_users.remove(self.id)