Пример #1
0
def add_or_update_user(name):
    """
    Add or update a user and their Tweets.
    Throw an error if user doesn't exist or private.
    """
    try:
        twitter_user = TWITTER.get_user(name)
        db_user = (User.query.get(twitter_user.id)
                   or User(id=twitter_user.id, name=name))
        DB.session.add(db_user)
        tweets = twitter_user.timeline(count=200,
                                       exclude_replies=True,
                                       include_rts=False,
                                       since_id=db_user.newest_tweet_id)
        if tweets:
            db_user.newest_tweet_id = tweets[0].id

        for tweet in tweets:
            embedding = BASILICA.embed_sentence(tweet.text, model='twitter')
            db_tweet = Tweet(id=tweet.id, text=tweet.text, embedding=embedding)
            db_user.tweets.append(db_tweet)
            DB.session.add(db_tweet)
    except Exception as e:
        print(f'Error processing {name}: {e}')
        raise e
    else:
        DB.session.commit()
Пример #2
0
def add_or_update_user(username):
    """Add or update a user and their Tweets, error if not a Twitter user."""
    try:
        twitter_user = TWITTER.get_user(username)
        db_user = (User.query.get(twitter_user.id)
                   or User(id=twitter_user.id, name=username))
        DB.session.add(db_user)
        # Lets get the tweets - focusing on primary (not retweet/reply)
        tweets = twitter_user.timeline(count=200,
                                       exclude_replies=True,
                                       include_rts=False,
                                       tweet_mode='extended',
                                       since_id=db_user.newest_tweet_id)
        if tweets:
            db_user.newest_tweet_id = tweets[0].id
        for tweet in tweets:
            embedding = BASILICA.embed_sentence(tweet.full_text,
                                                model='twitter')
            db_tweet = Tweet(id=tweet.id,
                             text=tweet.full_text[:300],
                             embedding=embedding)
            db_user.tweets.append(db_tweet)
            DB.session.add(db_tweet)
    except Exception as e:
        print('Error processing {}: {}'.format(username, e))
        raise e
    else:
        DB.session.commit()
Пример #3
0
def add_or_update_user(username):
    try:
        """Allows us to add/update users to our DB"""
        twitter_user = TWITTER.get_user(username)
        # either updates or adds a user depending upon if they are in the DB
        db_user = (User.query.get(twitter_user.id)) or User(id=twitter_user.id,
                                                            name=username)
        DB.session.add(db_user)

        tweets = twitter_user.timeline(count=200,
                                       exclude_replies=True,
                                       include_rts=False,
                                       tweet_mode='extended')

        # will update the most recent tweet id to the user
        if tweets:
            db_user.newest_tweet_id = tweets[0].id

        for tweet in tweets:
            #db_tweet = Tweet(id=tweet.id, text=tweet.full_text)
            vectorized_tweet = vectorize_tweet(tweet.full_text)
            db_tweet = Tweet(id=tweet.id,
                             text=tweet.full_text,
                             vect=vectorized_tweet)
            db_user.tweets.append(db_tweet)
            DB.session.add(db_tweet)

    except Exception as e:
        print('Error Processing {}: {}'.format(username, e))  # gives an error
        raise e

    # last thing done is committing changes
    else:
        DB.session.commit()
Пример #4
0
    def create_data():
        try:
            twitter_user = twitter_api_client.get_user(request.form["user"])

            db_user = User.query.get(twitter_user.id) or User(id=twitter_user.id)
            db_user.screen_name = twitter_user.screen_name
            db_user.name = twitter_user.name
            db_user.location = twitter_user.location
            db_user.followers_count = twitter_user.followers_count
            db.session.add(db_user)
            db.session.commit()
        
            statuses = twitter_api_client.user_timeline(request.form["user"], tweet_mode="extended", count=300, exclude_replies=True, include_rts=False)
            #db_tweets = []
            for status in statuses:
                print(status.full_text)
                print("----")
                #print(dir(status))

                # Find or create database tweet:
                db_tweet = Tweet.query.get(status.id) or Tweet(id=status.id)
                db_tweet.user_id = status.author.id # or db_user.id
                db_tweet.full_text = status.full_text
                embedding = basilica_client.embed_sentence(status.full_text, model="twitter") # todo: prefer to make a single request to basilica with all the tweet texts, instead of a request per tweet
                #print(len(embedding))
                db_tweet.embedding = embedding
                db.session.add(db_tweet)
                #db_tweets.append(db_tweet)
            db.session.commit()
            #return 'Success'
            #flash(f"User Added successfully!", "success")
            return redirect(f"/new")
        except:
            return jsonify({"message": "OOPS User Not Found!"})
Пример #5
0
def get_tweets():
    tweets = []
    client = current_app.config["TWITTER_API_CLIENT"]
    statuses = client.user_timeline("elonmusk", tweet_mode='extended')
    for status in statuses:
        tweets.append({"id": status.id_str, "message": status.full_text})
        db_tweet = Tweet.query.get(status.id) or Tweet(id=status.id)
        db.session.add(db_tweet)
    print(tweets)
    return jsonify(tweets)
Пример #6
0
def add(user_id):
    form = AddForm()

    if form.validate_on_submit():
        db.session.add(Tweet(user_id=user_id, text=form.text.data))
        db.session.commit()

        return redirect(url_for("tweets.index", user_id=user_id))

    return render_template("add_tweet.html", form=form)
Пример #7
0
def store_twitter_user_data(screen_name=None):
    print(screen_name)

    twitter_user = twitter_api_client.get_user(screen_name)

    #Find or create database user (entry):

    db_twuser = Twuser.query.get(twitter_user.id) or Twuser(id=twitter_user.id)
    db_twuser.screen_name = twitter_user.screen_name
    db_twuser.name = twitter_user.name
    db_twuser.location = twitter_user.location
    db_twuser.followers_count = twitter_user.followers_count
    db.session.add(db_twuser)
    db.session.commit()

    # Get tweets (statuses)

    statuses = twitter_api_client.user_timeline(screen_name,
                                                tweet_mode='extended',
                                                count=42,
                                                exclude_replies=True,
                                                include_rts=False)

    # Transform the data via Basilica
    print('Status count:', len(statuses))
    basilica_api = basilica_api_client()
    all_statuses_texts = [status.full_text for status in statuses]

    # If Basilica is not working, you'd like to comment some lines
    # and uncomment this line.
    # embeddings = all_statuses_texts

    embeddings = list(
        basilica_api.embed_sentences(all_statuses_texts, model='twitter'))
    print('Number of embeddings:', len(embeddings))

    # breakpoint()

    counter = 0
    for status in statuses:
        print(status.full_text)
        print('----')

        #Find or create the database tweet row:
        db_tweet = Tweet.query.get(status.id) or Tweet(id=status.id)
        db_tweet.user_id = status.author.id
        db_tweet.full_text = status.full_text
        embedding = embeddings[counter]
        # print(embedding)
        db_tweet.embedding = embedding
        db.session.add(db_tweet)
        counter += 1
    db.session.commit()

    return db_twuser, statuses
Пример #8
0
def create_tweet():
    print("CREATING A NEW TWEET...")
    print("FORM DATA:", dict(request.form))

    if "status" in request.form:
        string = request.form["status"]
        user_id = int(request.form["user_id"])
        db.session.add(Tweet(status=string, user_id=user_id))
        db.session.commit()
        return jsonify({"message": "CREATED OK", "status": string})
    else:
        return jsonify({"message": "oops"})
Пример #9
0
def add_user_history(username):
    '''Add max tweet history (API limit of 3200) to database'''
    try:
        # Get user info from tweepy
        twitter_user = TWITTER.get_user(username)
        # Add to User table (or check if existing)
        db_user = (User.query.get(twitter_user.id)
                   or User(id=twitter_user.id,
                           username=username,
                           followers=twitter_user.followers_count))
        db.session.add(db_user)
        # Get tweets ignoring re-tweets and replies
        tweets = twitter_user.timeline(count=200,
                                       exclude_replies=True,
                                       include_rts=False,
                                       tweet_mode='extended')
        oldest_max_id = tweets[-1].id - 1
        tweet_history = []
        tweet_history += tweets
        # Add newest_tweet_id to the User table
        if tweets:
            db_user.newest_tweet_id = tweets[0].id
        # Continue to collect tweets using max_id and update until 3200 tweet max
        while True:
            tweets = twitter_user.timeline(count=200,
                                           exclude_replies=True,
                                           include_rts=False,
                                           tweet_mode='extended',
                                           max_id=oldest_max_id)
            if len(tweets) == 0:
                break
            oldest_max_id = tweets[-1].id - 1
            tweet_history += tweets
        print(f'Total Tweets collected for {username}: {len(tweet_history)}')
        # Loop over tweets, get embedding and add to Tweet table
        for tweet in tweet_history:
            # Get an examble basilica embedding for first tweet
            embedding = vectorize_tweet(nlp, tweet.full_text)
            # Add tweet info to Tweet table
            db_tweet = Tweet(id=tweet.id,
                             text=tweet.full_text[:300],
                             embedding=embedding)
            db_user.tweets.append(db_tweet)
            db.session.add(db_tweet)
    except Exception as e:
        print('Error processing {}: {}'.format(username, e))
        raise e
    else:
        # If no errors happend than commit the records
        db.session.commit()
        print('Successfully saved tweets to db!')
Пример #10
0
def add_or_update_user(name):
    """Add or updates user and their tweets.

    Returns error if user doesn't exist or is private.
    """
    try:
        # Gets user through tweepy API
        twitter_user = TWITTER.get_user(name)

        # Adds db_user to user table
        db_user = (User.query.get(twitter_user.id)
                   or User(id=twitter_user.id, name=name))
        DB.session.add(db_user)

        # adds recent non-retweet/reply tweets
        # twitter API has a limit of 200 per request
        tweets = twitter_user.timeline(count=200,
                                       exclude_replies=True,
                                       include_rts=False,
                                       tweet_mode='extended',
                                       since_id=db_user.newest_tweet_id)
        # Add newest_tweet_id to the user table
        # to build a better model, we should include the functionality
        # with a since argument in the twitter_user.timeline()
        # do it like... hack is get from tweet id == 1, and then pupoulate
        # up until recent times to give the model better predictive ability

        # Includes additional user info to User table in our database
        if tweets:
            db_user.newest_tweet_id = tweets[0].id

        # looping over tweets
        for tweet in tweets:

            # Tweet gets vectorized by the model, and added to the DB
            vectorized_tweet = vectorize_tweet(tweet.full_text)
            # Adds tweet info to Tweets table
            db_tweet = Tweet(id=tweet.id,
                             text=tweet.full_text,
                             vect=vectorized_tweet)
            db_user.tweets.append(db_tweet)
            DB.session.add(db_tweet)

    except Exception as e:
        print(f'Encountered error while processing {name}: {e}')
        raise e
    else:
        DB.session.commit()
Пример #11
0
def fetch_user(screen_name=None):
    print(screen_name)

    twitter_user = twitter_api_client.get_user(screen_name)

    # get existing user from the db or initialize a new one:
    db_user = User.query.get(twitter_user.id) or User(id=twitter_user.id)

    db_user.screen_name = twitter_user.screen_name
    db_user.name = twitter_user.name
    db_user.location = twitter_user.location
    db_user.followers_count = twitter_user.followers_count

    db.session.add(db_user)
    db.session.commit()
    #breakpoint()
    # return "OK"
    #return render_template("user.html", user=db_user, tweets=statuses) # tweets=db_tweets
    tweets = twitter_api_client.user_timeline(screen_name,
                                              tweet_mode="extended",
                                              count=300)
    print("TWEETS COUNT:", len(tweets))

    # basilica_api = basilica_api_client()

    all_tweet_texts = [tweet.full_text for tweet in tweets]
    embeddings = list(
        basilica_api_client.embed_sentences(all_tweet_texts, model="twitter"))
    print("NUMBER OF EMBEDDINGS", len(embeddings))

    for index, tweet in enumerate(tweets):
        print(index)
        print(tweet.full_text)
        print("----")

        embedding = embeddings[index]

        # get existing tweet from the db or initialize a new one:
        db_tweet = Tweet.query.get(tweet.id) or Tweet(id=tweet.id)

        db_tweet.user_id = tweet.author.id  # or db_user.id
        db_tweet.full_text = tweet.full_text
        db_tweet.embedding = embedding
        db.session.add(db_tweet)

    db.session.commit()
    # #breakpoint()
    return "OK"
Пример #12
0
def add_or_update_user(username):
    """Add or update a user and their tweets, else error if not a Twitter user."""
    try:
        twitter_user = TWITTER.get_user(username)  # Fetch twitter user handle
        # Create SQLAlchemy User db instance
        db_user = (User.query.get(twitter_user.id) or User(
            id=twitter_user.id,
            handle=username,
            name=twitter_user.name,
            profile_image_url=twitter_user.profile_image_url_https,
            followers_count=twitter_user.followers_count,
            following_count=twitter_user.friends_count))

        # Add user to database
        db.session.add(db_user)

        # Fetch tweets as many as recent as possible with no RT's/Replies.
        tweets = twitter_user.timeline(count=200,
                                       exclude_replies=True,
                                       include_rts=False,
                                       tweet_mode='extended',
                                       since_id=db_user.newest_tweet_id)

        # Check if new or recent tweets exists, if does, get their recent most
        # tweet id
        if tweets:
            db_user.newest_tweet_id = tweets[0].id

        # Loop through newly fetched tweets
        for tweet in tweets:
            # Calculate embedding on the full tweet, but truncate for storing
            embedding = BASILICA.embed_sentence(tweet.full_text,
                                                model='twitter')
            db_tweet = Tweet(id=tweet.id,
                             text=tweet.full_text[:300],
                             embedding=embedding)
            db_user.tweets.append(db_tweet)
            # Add tweets to the database
            db.session.add(db_tweet)

    except Exception as e:
        print('Error processing {}: {}'.format(username, e))
        raise e
    else:
        # If no errors happend than commit the records
        db.session.commit()
Пример #13
0
def fetch_user(screen_name=None):
    print(screen_name)

    #api = twitter_api_client()

    twitter_user = twitter_api.get_user(screen_name)
    

    db_user = User.query.get(twitter_routes.id) or User(id=twitter_user.id)
    db_user.screen_name = twitter_user.screen_name
    db_user.name = twitter_user.name
    db_user.location = twitter_user.location
    db_user.followers_count = twitter_user.followers_count
    db.session.add(db_user)
    db_session.commit()

    
    tweets = twitter_api.user_timeline(screen_name, tweet_mode="extended", count=150)
    print("TWEETS COUNT:", len(statuses))


    all_tweets = [status.full_text for status in tweets]
    embeddingS = list(basilica_api_client.embed_sentences(all_tweet_texts, model="twitter"))
    print("NUMBER OF EMBEDDINGS", LEN(embedding))


    for index, status in enumerate(tweets):
        print(index)
        print(status.full_text)
        print("____")

        embedding = embeddingS[index]

        embedding = basilica_api_client.embed_sentence(status, full_text, model="twitter")
        print(len(embedding))

        db_tweet = Tweet.query.get(status.id) or Tweet(id=status.id)
        db_tweet.user_id = status.author.id 
        db_tweet.full_text = status.full_text
        db_tweet.embedding = embedding
        db.session.add(db_tweet)

    db.session.commit()

    return "OK"
Пример #14
0
def show_user(screen_name=None):
    print("SHOWING USER:"******"extended",
                                        count=50,
                                        exclude_replies=True,
                                        include_rts=False)
        for status in statuses:
            print(status.full_text)

            db_tweet = Tweet.query.get(status.id) or Tweet(id=status.id)
            print(db_tweet)

            db_tweet.user_id = status.author.id
            db_tweet.full_text = status.full_text
            embedding = basilica_client.embed_sentence(status.full_text,
                                                       model="twitter")
            print("Embedding length:", len(embedding))
            db_tweet.embedding = embedding
            db.session.add(db_tweet)
            print("TWEET ADDED TO DB")
        print("COMMITTING TWEET TO DB...")
        db.session.commit()

        return render_template("user_profile.html",
                               user=db_user,
                               tweets=db_user.tweets)
    except Exception as e:
        print(e, "This is the exception")
        return jsonify(
            {"message": "OOPS THERE WAS AN ERROR. PLEASE TRY ANOTHER USER"})
Пример #15
0
def add_user_tweepy(username):
    '''Add a user and their tweets to database'''
    try:
        # Get user info from tweepy
        twitter_user = TWITTER.get_user(username)
        # Add to User table (or check if existing)
        db_user = (User.query.get(twitter_user.id)
                   or User(id=twitter_user.id,
                           username=username,
                           followers=twitter_user.followers_count))
        db.session.add(db_user)
        # Get tweets ignoring re-tweets and replies
        tweets = twitter_user.timeline(count=200,
                                       exclude_replies=True,
                                       include_rts=False,
                                       tweet_mode='extended',
                                       since_id=db_user.newest_tweet_id)
        # Add newest_tweet_id to the User table
        if tweets:
            db_user.newest_tweet_id = tweets[0].id
        # Loop over tweets, get embedding and add to Tweet table
        for tweet in tweets:
            # Get an examble basilica embedding for first tweet
            embedding = vectorize_tweet(nlp, tweets[0].full_text)

            # Add tweet info to Tweet table
            db_tweet = Tweet(id=tweet.id,
                             text=tweet.full_text[:300],
                             embedding=embedding)
            db_user.tweets.append(db_tweet)
            db.session.add(db_tweet)
    except Exception as e:
        print('Error processing {}: {}'.format(username, e))
        raise e
    else:
        # If no errors happend than commit the records
        db.session.commit()
Пример #16
0
# to run in flask shell: exec(open("./add_users.py").read())
from twitoff.models import DB, User, Tweet

DB.drop_all()
DB.create_all()

u1 = User(name='Mort')
t1 = Tweet(text='What time is it?')
t2 = Tweet(text='Are these my shoes?')
t3 = Tweet(text='Not my chair, not my problem.')

u1.tweets += [t1, t2, t3]

u2 = User(name='Jack')
t4 = Tweet(text='Rocket 88')
t5 = Tweet(text='Deep Deep Sleep')
t6 = Tweet(text="Somethin' Else")

u2.tweets += [t4, t5, t6]

adds = [u1, u2, t1, t2, t3, t4, t5, t6]
for add in adds:
    DB.session.add(add)
DB.session.commit()
Пример #17
0
def get_tweeter(screen_name=None):
    tweets = []
    messages = []
    tweeter = {
        'screen_name': screen_name,
        'name': '',
        'followers_count': 0,
        'statuses_count': 0,
        'stored_count': 0,
        'id': 0,
        'id_str': 0,
        'latest_status_id': 0
    }
    db_tweeter = Tweeter.query.filter_by(screen_name=screen_name).first()
    # not in database, query twitter
    if db_tweeter is None:
        user = get_user(screen_name=screen_name)
        # user not found through twitter api
        if user is None:
            messages.append(f'Unable to find Twitter account: {screen_name}')
        # found user, check if protected
        elif user.protected:
            messages.append(f'Unable to load tweets for {screen_name}.')
            messages.append('Account is protected.')
        # found user, add to database
        else:
            tweeter['screen_name'] = user.screen_name
            tweeter['name'] = user.name
            tweeter['followers_count'] = user.followers_count
            tweeter['statuses_count'] = user.statuses_count
            tweeter['id'] = user.id
            new_tweeter = Tweeter(
                screen_name=user.screen_name,
                name=user.name,
                id=user.id,
                id_str=user.id_str,
                followers_count=user.followers_count,
                statuses_count=user.statuses_count
            )
            db.session.add(new_tweeter)
            db.session.commit()
    # found screen_name in database
    else:
        tweeter['screen_name'] = db_tweeter.screen_name
        tweeter['name'] = db_tweeter.name
        tweeter['followers_count'] = db_tweeter.followers_count
        tweeter['statuses_count'] = db_tweeter.statuses_count
        tweeter['id'] = db_tweeter.id

    # valid tweeter
    if tweeter['id'] != 0:
        timeline = get_timeline(tweeter['screen_name'])
        tweets = [status.full_text for status in timeline]
        if len(tweets) > 0:
            b_api = basilica_api()
            embeddings = list(b_api.embed_sentences(tweets, model='twitter'))
            for status, embedding in zip(timeline, embeddings):
                new_tweet = Tweet.query.get(status.id) or Tweet(id=status.id)
                new_tweet.id_str = status.id_str
                new_tweet.text = status.full_text
                new_tweet.tweeter_id = status.author.id
                new_tweet.embedding = embedding
                db.session.add(new_tweet)
                db.session.commit()
        elif len(tweets) != len(timeline):
            messages.append('mismatch size of timeline and tweets')
            messages.append(f'timeline: {len(timeline)}')
            messages.append(f'tweets: {len(tweets)}')
        else:
            messages.append(f'No tweets loaded for {screen_name}')

    return render_template(
        'tweeter.html',
        tweeter=tweeter,
        tweets=tweets,
        messages=messages
    )