예제 #1
0
def run_keyword(keyword, older):
    """Iteratively search over a given keyword"""
    lg.info(f"Searching tweets for keywords {keyword.valeur}")
    keyword.nb_rech += 1
    min_id = keyword.plus_ancien_tweet
    max_id = keyword.plus_recent_tweet

    keep_looking = True
    i = 0

    while keep_looking:
        i += 1
        try:
            lg.info(
                f"Searching for keyword {keyword.valeur}  ; loop number {i}")
            if older is True:
                lg.debug("Older is true")
                res = scrap_older_tweets(keyword, min_id)
            else:
                lg.debug("Older is false")
                res = scrap_newer_tweets(keyword, max_id)
                # then we search tweet oldest than those just collected
                older = True
            min_id = manage_result(res, keyword)
            if not min_id:
                keep_looking = False

        except TwythonRateLimitError:
            lg.warning(
                "Twitter limit reached." +
                "Wait 15 minutes before moving to next keyword")
            # time.sleep(900)
            break
예제 #2
0
def main():
    tweets_to_process = (
        session.query(TWEET).filter(TWEET.texte_retraite == None))  # noqa
    lg.info(f"Tweet to process : {tweets_to_process.count()}")
    while tweets_to_process.count() > 0:
        for tweet in tweets_to_process.all()[0:1000]:
            tweet.texte_retraite = processing(tweet.texte)
        session.commit()
        session.close()
        tweets_to_process = (session.query(TWEET).filter(
            TWEET.texte_retraite == ""))
예제 #3
0
def main(older=True):
    keywords = (
        session.query(KEYWORD)
        .filter(KEYWORD.active == True) # noqa
        .order_by(KEYWORD.nb_rech)
        .all()
    )
    print(keywords)
    for keyword in keywords:
        run_keyword(keyword, older)
    lg.info("Program over")
예제 #4
0
def common_hashtags(export=True, n=1000):
    freq = nltk.FreqDist(
        [h for h in hashtag_generator()]
    )
    if export is True:
        with open(
                "outputs/hashtags.csv",
                "w", newline='',
                encoding="utf-8") as csvfile:
            writer = csv.writer(
                csvfile,
                delimiter=',',
                quotechar='"',
                quoting=csv.QUOTE_MINIMAL)
            writer.writerows(freq.most_common(n))
        lg.info("Fichier des hasthags exporté")
    return(freq.most_common(n))
예제 #5
0
def manage_result(res, keyword):
    """Manage the twitter response according to wether or not its empty"""

    if res['statuses']:
        # case where there are some results to write in database
        lg.info(f"Number of tweets : {len(res['statuses'])}")
        min_id = min([tweet['id'] for tweet in res['statuses']])-1
        lg.debug(f"min_id : {min_id}")

        # Formatting and writing data
        write_data(res, keyword)
        return min_id
    else:
        lg.info(
            "No more results for keyword '%s' ; moving to next keyword",
            keyword.valeur)
        return None
def main():
    tweets_to_process = session.query(TWEET).filter(
        TWEET.nb_rt == None)  # noqa
    while tweets_to_process.count() > 0:
        lg.info(f"""Searching influence data. Tweets remaining :
            {tweets_to_process.count()}""")
        tweet_list = tweets_to_process.all()[0:100]
        ids = [tweet.tweet_id for tweet in tweet_list]
        results = api_query(ids)

        for tweet in tweets_to_process.all()[0:100]:
            tweet.nb_favori = results[tweet.tweet_id]["nb_favori"]
            tweet.nb_rt = results[tweet.tweet_id]["nb_rt"]
            tweet.date_influence = auj

        session.commit()
        session.close()
        tweets_to_process = session.query(TWEET).filter(
            TWEET.nb_rt == None)  # noqa
예제 #7
0
def write_data(res, keyword):
    """
    Reorganize result data and write them to sqlite database
    """

    for tw in res['statuses']:
        tweet_id = tw['id']
        user_id = tw['user']['id']

        # Writing User data:
        compte = session.query(COMPTE).filter(COMPTE.user_id == user_id)
        if not compte.all():    # Si non existant on le créé
            compte = COMPTE(tw['user'])
            session.add(compte)
        else:
            compte = compte.one()

        # Writing Tweet_data:
        tweet = session.query(TWEET).filter(TWEET.tweet_id == tweet_id)
        if not tweet.all():
            tweet = TWEET(tw)
            tweet.compte = compte
            session.add(tweet)
            lg.info(f"adding tweet {tweet_id}")

        # Updating Keyword data :
        if keyword.plus_ancien_tweet:
            if tweet_id < keyword.plus_ancien_tweet:
                keyword.plus_ancien_tweet = tweet_id
        else:
            keyword.plus_ancien_tweet = tweet_id

        if keyword.plus_recent_tweet:
            if tweet_id > keyword.plus_recent_tweet:
                keyword.plus_recent_tweet = tweet_id
        else:
            keyword.plus_recent_tweet = tweet_id

        session.commit()
        results[tweet_id] = {"nb_rt": nb_rt, "nb_favori": nb_favori}
    return results


def main():
    tweets_to_process = session.query(TWEET).filter(
        TWEET.nb_rt == None)  # noqa
    while tweets_to_process.count() > 0:
        lg.info(f"""Searching influence data. Tweets remaining :
            {tweets_to_process.count()}""")
        tweet_list = tweets_to_process.all()[0:100]
        ids = [tweet.tweet_id for tweet in tweet_list]
        results = api_query(ids)

        for tweet in tweets_to_process.all()[0:100]:
            tweet.nb_favori = results[tweet.tweet_id]["nb_favori"]
            tweet.nb_rt = results[tweet.tweet_id]["nb_rt"]
            tweet.date_influence = auj

        session.commit()
        session.close()
        tweets_to_process = session.query(TWEET).filter(
            TWEET.nb_rt == None)  # noqa


if __name__ == '__main__':

    for annee in range(2006, 2018):
        lg.info(f"Collecte des informations d'influence")
        main()