示例#1
0
def insertOrUpdateProfileBatch(screenNames):
    """
    Get Twitter profile data from the Twitter API and store in the database.

    Profile records are created, or updated if they already exist.

    :param screenNames: list of user screen names as strings, to be fetched
        from the Twitter API.

    :return successScreenNames: list of user screen names as strings, for the
        Profiles which were successfully fetched then inserted/updated in
        the db.
    :return failedScreenNames: list of user screen names as strings, for the
        Profiles which could not be fetched from the Twitter API and
        inserted/updated in the db.
    """
    APIConn = authentication.getAPIConnection()

    successScreenNames = []
    failedScreenNames = []

    for s in screenNames:
        try:
            fetchedProf = _getProfile(APIConn, screenName=s)
        except TweepError as e:
            # The profile could be missing or suspended, so we log it
            # and then skip inserting or updating (since we have no data).
            print("Could not fetch user: @{name}. {error}. {msg}".format(
                name=s, error=type(e).__name__, msg=str(e)))
            failedScreenNames.append(s)
        else:
            try:
                localProf = insertOrUpdateProfile(fetchedProf)
                # Represent log of followers count visually as repeated stars,
                # sidestepping error for log of zero.
                logFollowers = (int(math.log10(localProf.followersCount))
                                if localProf.followersCount else 0)
                stars = "*" * logFollowers
                print("Inserted/updated user: {name:20} {stars}".format(
                    name=u"@" + localProf.screenName, stars=stars))
                successScreenNames.append(s)
            except Exception as e:
                print(("Could not insert/update user: @{name}. {error}. {msg}".
                       format(name=s, error=type(e).__name__, msg=str(e))))
                failedScreenNames.append(s)

    return successScreenNames, failedScreenNames
 def test_getAPIConnection(self):
     """
     Test that App Access token can be used to connect to Twitter API.
     """
     api = authentication.getAPIConnection(userFlow=False)
示例#3
0
def insertTrendsForWoeid(woeid, userApi=None, delete=False, verbose=True):
    """
    Retrieve Trend data from the Twitter API for a place and insert into the
    database.

    Expects a WOEID value for a Place, gets up to 50 trend records for the
    Place as limited by the API and stores each of the values in the Trend
    table.

    From the API request response, we ignore the location field (which we know
    already) and the time field (since we just use current time as close
    enough).

    For printing of the added trend, it works normally to print the string as
    '...{}'.format, even if the value is 'Jonathan Garc\xeda'. This was tested
    in the bash console of Python Anywhere. However, when running as a cronjob
    and outputting to log file, it appears to be converted to ASCII and throws
    an error. Therefore encoding to ASCII and replacing the character is done,
    even though it less readable.

    :param woeid: Integer for WOEID value of a Place.
    :param userApi: tweepy API connection object. Set this with a
        user-authorised connection to skip the default behaviour of generating
        and using an app-authorised connection.
    :param delete: Boolean, default False. If set to True, delete item after
        it is inserted into db. This is useful for testing.
    :param verbose: Print details for each trend added.
    """
    global appApi

    now = datetime.datetime.now()
    print(f"{now.strftime('%x %X')} Inserting trend data for WOEID {woeid}")

    assert isinstance(
        woeid, int
    ), f"Expected WOEID as type `int` but got type `{type(woeid).__name__}`."

    if userApi:
        # Use user token.
        api = userApi
    else:
        # Use app token.
        if not appApi:
            # Set it if necessary and then reuse it next time.
            appApi = authentication.getAPIConnection()
        api = appApi
    response = api.trends_place(woeid)[0]
    trends = response["trends"]

    for x in trends:
        topic = x["name"]
        volume = x["tweet_volume"]
        t = db.Trend(topic=topic, volume=volume).setPlace(woeid)

        if verbose:
            print("Added trend: {tweetID:4d} | {topic:25} - {volume:7,d} K |"
                  " {woeid:10} - {place}.".format(
                      tweetID=t.id,
                      topic=t.topic,
                      volume=(t.volume // 1000 if t.volume else 0),
                      woeid=t.place.woeid,
                      place=t.place.name,
                  ))

        if delete:
            db.Trend.delete(t.id)
            if verbose:
                print(" - removed from db.")

    return len(trends)
示例#4
0
    Get my own tweets

    Do bar graph.
    Do tag cloud.

    Store tweets in DB with mix of columns I want and JSON column
    for full object.
"""
import json

import tweepy

from lib.twitter_api import authentication as twitterAuth


api = twitterAuth.getAPIConnection()


def _writeJSON(data, filename):
    print("Write")
    with open(filename, "w") as writer:
        json.dump(data, writer, indent=4)
    return True


def _readJSON(filename):
    print("Read")
    with open(filename, "r") as reader:
        data = json.load(reader)
    return data
示例#5
0
def insertOrUpdateTweetBatch(
    profileRecs,
    tweetsPerProfile=200,
    verbose=False,
    writeToDB=True,
    campaignRec=None,
    onlyUpdateEngagements=True,
):
    """
    Get Twitter tweet data from the Twitter API for a batch of profiles
    and store their tweets in the database.

    The verbose and writeToDB flags can be used together to print tweet
    data which would be inserted into the database without actually inserting
    it. This can be used preview tweet data without increasing storage or using
    time to do inserts and updates.

    :param profileRecs: list of Profile objects, to create or update
        tweets for. This might be a list from the Profile table which
        has been filtered based on a job schedule, or Profiles which
        match criteria such as high follower count.
    :param tweetsPerProfile: Default 200. Count of tweets to get for each
        profile, as an integer. If this is 200 or less, then page limit is
        left at 1 and the items per page count is reduced. If this is
        more than 200, then the items per page count is left at 200
        and page limit is adjusted to get a number of tweets as the
        next multiple of 200.
        e.g. 550 tweets needs 2 pages to get the first 400 tweets,
            plus a 3rd page to the additional 150 tweets.
            We simplify to get 200*3 = 600 tweets, to keep the count
            consistent on each query.

        Note that even if 200 tweets are requested, the API sometimes returns
        only 199 and the user may have posted fewer than the requested tweets.

        The limit for a single request to the API is 200, therefore any
        number up to 200 has the same rate limit cost. It may be useful to set
        a number here as 200 or less if we want to get through all the users
        quickly, as this takes fewer API queries and fewer db inserts
        or updates. Also, consider that a very low number may lead to deadtime,
        where the script takes a fixed time to get 200 or 1 tweets and
        now that is has processed the 1 requested and the window limit is
        hit, it has no Tweet processing to do while waiting for the next rate
        limited window. Thought a low value will mean less storage space
        is required.
    :param verbose: Default False. If True, print the data used to created
        a local Tweet record. This data can be printed regardless of whether
        the data is written to the db record or not.
    :param writeToDB: Default True. If True, write the fetched tweets
        to local database, otherwise print and discard them. This is useful
        when used in combination with verbose flag which prints the data.
    :param campaignRec: Campaign record to assign to the local Tweet records.
        Default None to not assign any Campaign.
    :param onlyUpdateEngagements: Default True to only update the favorite
        and retweet count of the tweet in the local db. If False, update
        other fields too. Those are expected to be static on the Twitter API,
        but if rules change on this repo then it is useful to apply them
        historically on existing Tweet records. This flag only affects
        existing records.

    :return: None
    """
    APIConn = authentication.getAPIConnection()

    if tweetsPerProfile <= 200:
        tweetsPerPage = tweetsPerProfile
        pageLimit = 1
    else:
        tweetsPerPage = 200
        # Round up to get the last page which might have fewerb items
        pageLimit = math.ceil(tweetsPerProfile / tweetsPerPage)

    for p in profileRecs:
        try:
            fetchedTweets = _getTweets(APIConn,
                                       userID=p.guid,
                                       tweetsPerPage=tweetsPerPage,
                                       pageLimit=pageLimit)
        except TweepError as e:
            print("Could not fetch tweets for user: @{screenName}."
                  " {type}. {msg}".format(screenName=p.screenName,
                                          type=type(e).__name__,
                                          msg=str(e)))
        else:
            print("User: {0}".format(p.screenName))

            if writeToDB:
                print("Inserting/updating tweets in db...")
            else:
                print("Displaying tweets but not inserting/updating...")

            added = errors = 0
            for f in fetchedTweets:
                try:
                    data, tweetRec = insertOrUpdateTweet(
                        tweet=f,
                        profileID=p.id,
                        writeToDB=writeToDB,
                        onlyUpdateEngagements=onlyUpdateEngagements,
                    )
                    if tweetRec and campaignRec:
                        try:
                            campaignRec.addTweet(tweetRec)
                        except DuplicateEntryError:
                            # Ignore error if Tweet was already assigned.
                            pass
                    if verbose:
                        if tweetRec:
                            tweetRec.prettyPrint()
                        else:
                            # No record was created, so use data dict.
                            m = data["message"]
                            created = data["createdAt"]
                            data["message"] = lib.text_handling.flattenText(m)
                            data["createdAt"] = str(lib.set_tz(created))
                            # TODO: Check if this will raise an error
                            # on unicode symbols in message.
                            print(json.dumps(data, indent=4))
                    added += 1
                except Exception as e:
                    print("Could not insert/update tweet `{id}` for user"
                          " @{screenName}. {type}. {msg}".format(
                              id=f.id,
                              screenName=p.screenName,
                              type=type(e).__name__,
                              msg=str(e),
                          ))
                    errors += 1

                total = added + errors
                # Print stats on every 10 processed and on the last item.
                if total % 10 == 0 or f == fetchedTweets[-1]:
                    print("Total: {total:2,d}. Added: {added:2,d}. "
                          "Errors: {errors:2,d}.".format(total=total,
                                                         added=added,
                                                         errors=errors))