def add_or_update_user(name): """ Add or update a user and their Tweets. Throw an error if user doesn't exist or private. """ try: twitter_user = TWITTER.get_user(name) db_user = (User.query.get(twitter_user.id) or User(id=twitter_user.id, name=name)) DB.session.add(db_user) tweets = twitter_user.timeline(count=200, exclude_replies=True, include_rts=False, since_id=db_user.newest_tweet_id) if tweets: db_user.newest_tweet_id = tweets[0].id for tweet in tweets: embedding = BASILICA.embed_sentence(tweet.text, model='twitter') db_tweet = Tweet(id=tweet.id, text=tweet.text, embedding=embedding) db_user.tweets.append(db_tweet) DB.session.add(db_tweet) except Exception as e: print(f'Error processing {name}: {e}') raise e else: DB.session.commit()
def add_or_update_user(username): """Add or update a user and their Tweets, error if not a Twitter user.""" try: twitter_user = TWITTER.get_user(username) db_user = (User.query.get(twitter_user.id) or User(id=twitter_user.id, name=username)) DB.session.add(db_user) # Lets get the tweets - focusing on primary (not retweet/reply) tweets = twitter_user.timeline(count=200, exclude_replies=True, include_rts=False, tweet_mode='extended', since_id=db_user.newest_tweet_id) if tweets: db_user.newest_tweet_id = tweets[0].id for tweet in tweets: embedding = BASILICA.embed_sentence(tweet.full_text, model='twitter') db_tweet = Tweet(id=tweet.id, text=tweet.full_text[:300], embedding=embedding) db_user.tweets.append(db_tweet) DB.session.add(db_tweet) except Exception as e: print('Error processing {}: {}'.format(username, e)) raise e else: DB.session.commit()
def add_or_update_user(username): try: """Allows us to add/update users to our DB""" twitter_user = TWITTER.get_user(username) # either updates or adds a user depending upon if they are in the DB db_user = (User.query.get(twitter_user.id)) or User(id=twitter_user.id, name=username) DB.session.add(db_user) tweets = twitter_user.timeline(count=200, exclude_replies=True, include_rts=False, tweet_mode='extended') # will update the most recent tweet id to the user if tweets: db_user.newest_tweet_id = tweets[0].id for tweet in tweets: #db_tweet = Tweet(id=tweet.id, text=tweet.full_text) vectorized_tweet = vectorize_tweet(tweet.full_text) db_tweet = Tweet(id=tweet.id, text=tweet.full_text, vect=vectorized_tweet) db_user.tweets.append(db_tweet) DB.session.add(db_tweet) except Exception as e: print('Error Processing {}: {}'.format(username, e)) # gives an error raise e # last thing done is committing changes else: DB.session.commit()
def create_data(): try: twitter_user = twitter_api_client.get_user(request.form["user"]) db_user = User.query.get(twitter_user.id) or User(id=twitter_user.id) db_user.screen_name = twitter_user.screen_name db_user.name = twitter_user.name db_user.location = twitter_user.location db_user.followers_count = twitter_user.followers_count db.session.add(db_user) db.session.commit() statuses = twitter_api_client.user_timeline(request.form["user"], tweet_mode="extended", count=300, exclude_replies=True, include_rts=False) #db_tweets = [] for status in statuses: print(status.full_text) print("----") #print(dir(status)) # Find or create database tweet: db_tweet = Tweet.query.get(status.id) or Tweet(id=status.id) db_tweet.user_id = status.author.id # or db_user.id db_tweet.full_text = status.full_text embedding = basilica_client.embed_sentence(status.full_text, model="twitter") # todo: prefer to make a single request to basilica with all the tweet texts, instead of a request per tweet #print(len(embedding)) db_tweet.embedding = embedding db.session.add(db_tweet) #db_tweets.append(db_tweet) db.session.commit() #return 'Success' #flash(f"User Added successfully!", "success") return redirect(f"/new") except: return jsonify({"message": "OOPS User Not Found!"})
def get_tweets(): tweets = [] client = current_app.config["TWITTER_API_CLIENT"] statuses = client.user_timeline("elonmusk", tweet_mode='extended') for status in statuses: tweets.append({"id": status.id_str, "message": status.full_text}) db_tweet = Tweet.query.get(status.id) or Tweet(id=status.id) db.session.add(db_tweet) print(tweets) return jsonify(tweets)
def add(user_id): form = AddForm() if form.validate_on_submit(): db.session.add(Tweet(user_id=user_id, text=form.text.data)) db.session.commit() return redirect(url_for("tweets.index", user_id=user_id)) return render_template("add_tweet.html", form=form)
def store_twitter_user_data(screen_name=None): print(screen_name) twitter_user = twitter_api_client.get_user(screen_name) #Find or create database user (entry): db_twuser = Twuser.query.get(twitter_user.id) or Twuser(id=twitter_user.id) db_twuser.screen_name = twitter_user.screen_name db_twuser.name = twitter_user.name db_twuser.location = twitter_user.location db_twuser.followers_count = twitter_user.followers_count db.session.add(db_twuser) db.session.commit() # Get tweets (statuses) statuses = twitter_api_client.user_timeline(screen_name, tweet_mode='extended', count=42, exclude_replies=True, include_rts=False) # Transform the data via Basilica print('Status count:', len(statuses)) basilica_api = basilica_api_client() all_statuses_texts = [status.full_text for status in statuses] # If Basilica is not working, you'd like to comment some lines # and uncomment this line. # embeddings = all_statuses_texts embeddings = list( basilica_api.embed_sentences(all_statuses_texts, model='twitter')) print('Number of embeddings:', len(embeddings)) # breakpoint() counter = 0 for status in statuses: print(status.full_text) print('----') #Find or create the database tweet row: db_tweet = Tweet.query.get(status.id) or Tweet(id=status.id) db_tweet.user_id = status.author.id db_tweet.full_text = status.full_text embedding = embeddings[counter] # print(embedding) db_tweet.embedding = embedding db.session.add(db_tweet) counter += 1 db.session.commit() return db_twuser, statuses
def create_tweet(): print("CREATING A NEW TWEET...") print("FORM DATA:", dict(request.form)) if "status" in request.form: string = request.form["status"] user_id = int(request.form["user_id"]) db.session.add(Tweet(status=string, user_id=user_id)) db.session.commit() return jsonify({"message": "CREATED OK", "status": string}) else: return jsonify({"message": "oops"})
def add_user_history(username): '''Add max tweet history (API limit of 3200) to database''' try: # Get user info from tweepy twitter_user = TWITTER.get_user(username) # Add to User table (or check if existing) db_user = (User.query.get(twitter_user.id) or User(id=twitter_user.id, username=username, followers=twitter_user.followers_count)) db.session.add(db_user) # Get tweets ignoring re-tweets and replies tweets = twitter_user.timeline(count=200, exclude_replies=True, include_rts=False, tweet_mode='extended') oldest_max_id = tweets[-1].id - 1 tweet_history = [] tweet_history += tweets # Add newest_tweet_id to the User table if tweets: db_user.newest_tweet_id = tweets[0].id # Continue to collect tweets using max_id and update until 3200 tweet max while True: tweets = twitter_user.timeline(count=200, exclude_replies=True, include_rts=False, tweet_mode='extended', max_id=oldest_max_id) if len(tweets) == 0: break oldest_max_id = tweets[-1].id - 1 tweet_history += tweets print(f'Total Tweets collected for {username}: {len(tweet_history)}') # Loop over tweets, get embedding and add to Tweet table for tweet in tweet_history: # Get an examble basilica embedding for first tweet embedding = vectorize_tweet(nlp, tweet.full_text) # Add tweet info to Tweet table db_tweet = Tweet(id=tweet.id, text=tweet.full_text[:300], embedding=embedding) db_user.tweets.append(db_tweet) db.session.add(db_tweet) except Exception as e: print('Error processing {}: {}'.format(username, e)) raise e else: # If no errors happend than commit the records db.session.commit() print('Successfully saved tweets to db!')
def add_or_update_user(name): """Add or updates user and their tweets. Returns error if user doesn't exist or is private. """ try: # Gets user through tweepy API twitter_user = TWITTER.get_user(name) # Adds db_user to user table db_user = (User.query.get(twitter_user.id) or User(id=twitter_user.id, name=name)) DB.session.add(db_user) # adds recent non-retweet/reply tweets # twitter API has a limit of 200 per request tweets = twitter_user.timeline(count=200, exclude_replies=True, include_rts=False, tweet_mode='extended', since_id=db_user.newest_tweet_id) # Add newest_tweet_id to the user table # to build a better model, we should include the functionality # with a since argument in the twitter_user.timeline() # do it like... hack is get from tweet id == 1, and then pupoulate # up until recent times to give the model better predictive ability # Includes additional user info to User table in our database if tweets: db_user.newest_tweet_id = tweets[0].id # looping over tweets for tweet in tweets: # Tweet gets vectorized by the model, and added to the DB vectorized_tweet = vectorize_tweet(tweet.full_text) # Adds tweet info to Tweets table db_tweet = Tweet(id=tweet.id, text=tweet.full_text, vect=vectorized_tweet) db_user.tweets.append(db_tweet) DB.session.add(db_tweet) except Exception as e: print(f'Encountered error while processing {name}: {e}') raise e else: DB.session.commit()
def fetch_user(screen_name=None): print(screen_name) twitter_user = twitter_api_client.get_user(screen_name) # get existing user from the db or initialize a new one: db_user = User.query.get(twitter_user.id) or User(id=twitter_user.id) db_user.screen_name = twitter_user.screen_name db_user.name = twitter_user.name db_user.location = twitter_user.location db_user.followers_count = twitter_user.followers_count db.session.add(db_user) db.session.commit() #breakpoint() # return "OK" #return render_template("user.html", user=db_user, tweets=statuses) # tweets=db_tweets tweets = twitter_api_client.user_timeline(screen_name, tweet_mode="extended", count=300) print("TWEETS COUNT:", len(tweets)) # basilica_api = basilica_api_client() all_tweet_texts = [tweet.full_text for tweet in tweets] embeddings = list( basilica_api_client.embed_sentences(all_tweet_texts, model="twitter")) print("NUMBER OF EMBEDDINGS", len(embeddings)) for index, tweet in enumerate(tweets): print(index) print(tweet.full_text) print("----") embedding = embeddings[index] # get existing tweet from the db or initialize a new one: db_tweet = Tweet.query.get(tweet.id) or Tweet(id=tweet.id) db_tweet.user_id = tweet.author.id # or db_user.id db_tweet.full_text = tweet.full_text db_tweet.embedding = embedding db.session.add(db_tweet) db.session.commit() # #breakpoint() return "OK"
def add_or_update_user(username): """Add or update a user and their tweets, else error if not a Twitter user.""" try: twitter_user = TWITTER.get_user(username) # Fetch twitter user handle # Create SQLAlchemy User db instance db_user = (User.query.get(twitter_user.id) or User( id=twitter_user.id, handle=username, name=twitter_user.name, profile_image_url=twitter_user.profile_image_url_https, followers_count=twitter_user.followers_count, following_count=twitter_user.friends_count)) # Add user to database db.session.add(db_user) # Fetch tweets as many as recent as possible with no RT's/Replies. tweets = twitter_user.timeline(count=200, exclude_replies=True, include_rts=False, tweet_mode='extended', since_id=db_user.newest_tweet_id) # Check if new or recent tweets exists, if does, get their recent most # tweet id if tweets: db_user.newest_tweet_id = tweets[0].id # Loop through newly fetched tweets for tweet in tweets: # Calculate embedding on the full tweet, but truncate for storing embedding = BASILICA.embed_sentence(tweet.full_text, model='twitter') db_tweet = Tweet(id=tweet.id, text=tweet.full_text[:300], embedding=embedding) db_user.tweets.append(db_tweet) # Add tweets to the database db.session.add(db_tweet) except Exception as e: print('Error processing {}: {}'.format(username, e)) raise e else: # If no errors happend than commit the records db.session.commit()
def fetch_user(screen_name=None): print(screen_name) #api = twitter_api_client() twitter_user = twitter_api.get_user(screen_name) db_user = User.query.get(twitter_routes.id) or User(id=twitter_user.id) db_user.screen_name = twitter_user.screen_name db_user.name = twitter_user.name db_user.location = twitter_user.location db_user.followers_count = twitter_user.followers_count db.session.add(db_user) db_session.commit() tweets = twitter_api.user_timeline(screen_name, tweet_mode="extended", count=150) print("TWEETS COUNT:", len(statuses)) all_tweets = [status.full_text for status in tweets] embeddingS = list(basilica_api_client.embed_sentences(all_tweet_texts, model="twitter")) print("NUMBER OF EMBEDDINGS", LEN(embedding)) for index, status in enumerate(tweets): print(index) print(status.full_text) print("____") embedding = embeddingS[index] embedding = basilica_api_client.embed_sentence(status, full_text, model="twitter") print(len(embedding)) db_tweet = Tweet.query.get(status.id) or Tweet(id=status.id) db_tweet.user_id = status.author.id db_tweet.full_text = status.full_text db_tweet.embedding = embedding db.session.add(db_tweet) db.session.commit() return "OK"
def show_user(screen_name=None): print("SHOWING USER:"******"extended", count=50, exclude_replies=True, include_rts=False) for status in statuses: print(status.full_text) db_tweet = Tweet.query.get(status.id) or Tweet(id=status.id) print(db_tweet) db_tweet.user_id = status.author.id db_tweet.full_text = status.full_text embedding = basilica_client.embed_sentence(status.full_text, model="twitter") print("Embedding length:", len(embedding)) db_tweet.embedding = embedding db.session.add(db_tweet) print("TWEET ADDED TO DB") print("COMMITTING TWEET TO DB...") db.session.commit() return render_template("user_profile.html", user=db_user, tweets=db_user.tweets) except Exception as e: print(e, "This is the exception") return jsonify( {"message": "OOPS THERE WAS AN ERROR. PLEASE TRY ANOTHER USER"})
def add_user_tweepy(username): '''Add a user and their tweets to database''' try: # Get user info from tweepy twitter_user = TWITTER.get_user(username) # Add to User table (or check if existing) db_user = (User.query.get(twitter_user.id) or User(id=twitter_user.id, username=username, followers=twitter_user.followers_count)) db.session.add(db_user) # Get tweets ignoring re-tweets and replies tweets = twitter_user.timeline(count=200, exclude_replies=True, include_rts=False, tweet_mode='extended', since_id=db_user.newest_tweet_id) # Add newest_tweet_id to the User table if tweets: db_user.newest_tweet_id = tweets[0].id # Loop over tweets, get embedding and add to Tweet table for tweet in tweets: # Get an examble basilica embedding for first tweet embedding = vectorize_tweet(nlp, tweets[0].full_text) # Add tweet info to Tweet table db_tweet = Tweet(id=tweet.id, text=tweet.full_text[:300], embedding=embedding) db_user.tweets.append(db_tweet) db.session.add(db_tweet) except Exception as e: print('Error processing {}: {}'.format(username, e)) raise e else: # If no errors happend than commit the records db.session.commit()
# to run in flask shell: exec(open("./add_users.py").read()) from twitoff.models import DB, User, Tweet DB.drop_all() DB.create_all() u1 = User(name='Mort') t1 = Tweet(text='What time is it?') t2 = Tweet(text='Are these my shoes?') t3 = Tweet(text='Not my chair, not my problem.') u1.tweets += [t1, t2, t3] u2 = User(name='Jack') t4 = Tweet(text='Rocket 88') t5 = Tweet(text='Deep Deep Sleep') t6 = Tweet(text="Somethin' Else") u2.tweets += [t4, t5, t6] adds = [u1, u2, t1, t2, t3, t4, t5, t6] for add in adds: DB.session.add(add) DB.session.commit()
def get_tweeter(screen_name=None): tweets = [] messages = [] tweeter = { 'screen_name': screen_name, 'name': '', 'followers_count': 0, 'statuses_count': 0, 'stored_count': 0, 'id': 0, 'id_str': 0, 'latest_status_id': 0 } db_tweeter = Tweeter.query.filter_by(screen_name=screen_name).first() # not in database, query twitter if db_tweeter is None: user = get_user(screen_name=screen_name) # user not found through twitter api if user is None: messages.append(f'Unable to find Twitter account: {screen_name}') # found user, check if protected elif user.protected: messages.append(f'Unable to load tweets for {screen_name}.') messages.append('Account is protected.') # found user, add to database else: tweeter['screen_name'] = user.screen_name tweeter['name'] = user.name tweeter['followers_count'] = user.followers_count tweeter['statuses_count'] = user.statuses_count tweeter['id'] = user.id new_tweeter = Tweeter( screen_name=user.screen_name, name=user.name, id=user.id, id_str=user.id_str, followers_count=user.followers_count, statuses_count=user.statuses_count ) db.session.add(new_tweeter) db.session.commit() # found screen_name in database else: tweeter['screen_name'] = db_tweeter.screen_name tweeter['name'] = db_tweeter.name tweeter['followers_count'] = db_tweeter.followers_count tweeter['statuses_count'] = db_tweeter.statuses_count tweeter['id'] = db_tweeter.id # valid tweeter if tweeter['id'] != 0: timeline = get_timeline(tweeter['screen_name']) tweets = [status.full_text for status in timeline] if len(tweets) > 0: b_api = basilica_api() embeddings = list(b_api.embed_sentences(tweets, model='twitter')) for status, embedding in zip(timeline, embeddings): new_tweet = Tweet.query.get(status.id) or Tweet(id=status.id) new_tweet.id_str = status.id_str new_tweet.text = status.full_text new_tweet.tweeter_id = status.author.id new_tweet.embedding = embedding db.session.add(new_tweet) db.session.commit() elif len(tweets) != len(timeline): messages.append('mismatch size of timeline and tweets') messages.append(f'timeline: {len(timeline)}') messages.append(f'tweets: {len(tweets)}') else: messages.append(f'No tweets loaded for {screen_name}') return render_template( 'tweeter.html', tweeter=tweeter, tweets=tweets, messages=messages )