Пример #1
0
def getProfileInfo(username):
    tic = time.perf_counter()

    api = c.setupTwitterAuth()

    try:
        user = api.get_user(username)
    except TweepError as e:
        return {"Error": e}

    # Remove _normal from profile image URL
    profile_image_url = user.profile_image_url_https
    url = re.sub('_normal', '', profile_image_url)

    userInfo = {
        "name": user.name,
        "username": user.screen_name,
        "location": str(user.location),
        "profile_location": str(user.profile_location),
        "geo_enabled": user.geo_enabled,
        "statuses_count": user.statuses_count,
        "followers_count": user.followers_count,
        "friends_count": user.friends_count,
        "verified": user.verified,
        "profile_image_url": url
    }

    toc = time.perf_counter()
    debugPrint(f"getProfileInfo in {toc - tic:0.4f} seconds")
    return userInfo
Пример #2
0
def putDataForUserGermany():
    global engine
    api = configscript.setupTwitterAuth()
    places = api.geo_search(query="Germany", granularity="country")
    place_id = places[0].id
    tweets = tw.Cursor(api.search,
                       q="place:%s" % place_id,
                       tweet_mode='extended',
                       lang='en').items()

    for tweet in tweets:
        username = tweet.user.screen_name
        allTweets = tw.Cursor(api.user_timeline,
                              screen_name=username,
                              tweet_mode="extended",
                              exclude_replies=False,
                              include_rts=False,
                              lang='en').items(150)
        listAllTweets = list(allTweets)
        if (len(listAllTweets) < 1):
            continue

        tweetsDict = m.getTweetsDict(listAllTweets)
        score = m.getOverallScore(tweetsDict)
        if (score == -1):
            continue

        tweetsonlyscore = m.tweetsOnlyScore(tweetsDict)
        scoremax = m.getHappiestTweet(tweetsonlyscore)
        scoremin = m.getSaddestTweet(tweetsonlyscore)

        dict = {
            username: {
                "score": score,
                "min": scoremin["score"],
                "max": scoremax["score"]
            }
        }

        df = pd.DataFrame.from_dict(dict, orient='index')
        df.index.name = 'username'
        df.to_sql('germany_users', con=engine, if_exists='append')

        engine.execute(
            "DELETE FROM germany_users T1 USING germany_users T2 WHERE  T1.ctid  < T2.ctid AND  T1.username = T2.username;"
        )

    engine.dispose()
Пример #3
0
def putDataDB():
    global engine
    api = configscript.setupTwitterAuth()
    places = api.geo_search(query="Denmark", granularity="country")
    place_id = places[0].id
    tweets = tw.Cursor(api.search,
                       q="place:%s" % place_id,
                       tweet_mode='extended',
                       lang='en').items()
    df = pd.DataFrame.from_dict(m.getTweetsDictRaw(tweets), orient='index')
    df.set_index('id', inplace=True)
    df.to_sql('tweets', con=engine, if_exists='append')

    engine.execute(
        "DELETE FROM tweets T1 USING tweets T2 WHERE  T1.ctid  < T2.ctid AND  T1.id    = T2.id AND  T1.score = T2.score AND  T1.created = T2.created;"
    )

    engine.dispose()
Пример #4
0
def celebrityScore(username):
    print(username)
    global engine
    api = configscript.setupTwitterAuth()
    allTweets = tw.Cursor(api.user_timeline,
                          screen_name=username,
                          tweet_mode="extended",
                          exclude_replies=False,
                          include_rts=False,
                          lang='en').items()
    listAllTweets = list(allTweets)

    if (len(listAllTweets) == 0):
        return

    tweetsDict = m.getTweetsDict(listAllTweets)
    score = m.getOverallScore(tweetsDict)

    if (score == -1):
        return

    user = api.get_user(username)
    # Remove _normal from profile image URL
    profile_image_url = user.profile_image_url_https
    url = re.sub('_normal', '', profile_image_url)

    dict = {username: {"score": score, "pic": url}}

    df = pd.DataFrame.from_dict(dict, orient='index')
    df.index.name = 'username'
    df.to_sql('celebrity', con=engine, if_exists='append')

    engine.execute(
        "DELETE FROM celebrity T1 USING celebrity T2 WHERE  T1.ctid  < T2.ctid AND  T1.username = T2.username;"
    )

    engine.dispose()
Пример #5
0
def getData(username, count):
    global lastDate

    # Set up Twitter API
    api = c.setupTwitterAuth()

    tic = time.perf_counter()

    print("Count: " + str(count))

    tweets = api.user_timeline(screen_name=username,
                               exclude_replies=False,
                               include_rts=False,
                               lang="en",
                               tweet_mode='extended',
                               count=200)
    alltweets = []
    alltweets.extend(tweets)
    oldest = tweets[-1].id

    while len(alltweets) < count:
        tweets = api.user_timeline(screen_name=username,
                                   exclude_replies=False,
                                   include_rts=False,
                                   lang="en",
                                   tweet_mode='extended',
                                   count=200,
                                   max_id=oldest - 1)
        if len(tweets) == 0:
            break
        oldest = tweets[-1].id
        alltweets.extend(tweets)

    debugPrint(f"{len(alltweets)} Tweets downloaded")

    try:
        user = api.get_user(username)
    except TweepError as e:
        print(e)
        return {"Error": e.args[0][0]['message']}

    if (len(alltweets) == 0):
        return {"Error": "No tweets"}

    toc = time.perf_counter()
    print(f"Downloaded data in {toc - tic:0.4f} seconds")

    listAllTweets = {username: alltweets}

    if (username not in listAllTweets):
        return {"Error": True}
    tic = time.perf_counter()

    engine = create_engine(config('POSTGRESS'))

    tweets = listAllTweets[username]
    tweetsDict, wordDict = getTweetsDict(tweets)
    tweetsOnlyScores = tweetsOnlyScore(tweetsDict)

    userinfo = getProfileInfo(username)
    overallScore = getOverallScore(tweetsDict)
    topWords = {
        "top": nlargest(5, wordDict, key=wordDict.get),
        "bottom": nsmallest(5, wordDict, key=wordDict.get)
    }
    wordsAmount = len(wordDict)
    highest, lowest, week = getWeekScores(tweetsDict)
    dateobjectEaliest = tweetsDict[len(tweetsDict) - 1]["created"]
    formattedEarliestDate = formatDate(dateobjectEaliest)
    formattedLatestDate = formatDate(
        str(lastDate.strftime('%Y-%m-%d %H:%M:%S')))
    celebrityscore = getClosestsCelebrities(username, overallScore, engine)
    allcelebrities = getAllCelebrities(engine)
    danishuserscore = getDanishUsersScore(overallScore, engine),
    nationalAverages = getNationalScores(engine)
    scoreEvolutionData = scoreEvolution(tweetsDict)
    averagesRange = getLowestAndHighestAverages(scoreEvolutionData)

    data = {
        "userinfo": userinfo,
        "overallscore": overallScore,
        "tweets": {
            "happiest": getHappiestTweet(tweetsOnlyScores),
            "saddest": getSaddestTweet(tweetsOnlyScores)
        },
        "alltweets": tweetsDict,
        "topfivewords": topWords,
        "wordsmatched": wordsAmount,
        "highestweekscore": highest,
        "lowestweekscore": lowest,
        "weekscores": week,
        "tweetstart": formattedEarliestDate,
        "tweetend": formattedLatestDate,
        "tweetsamount": len(tweetsDict),
        "celebrityscore": celebrityscore,
        "allcelebrities": allcelebrities,
        "danishuserscore": danishuserscore,
        "nationalAverages": nationalAverages,
        "monthlyaverages": scoreEvolutionData,
        "averagesRange": averagesRange
    }

    toc2 = time.perf_counter()
    print(f"Done in {toc2 - tic:0.4f} seconds")

    return data