def predict(): print("PREDICT ROUTE...") print("FORM DATA:", dict(request.form)) #> {'screen_name_a': 'elonmusk', 'screen_name_b': 's2t2', 'tweet_text': 'Example tweet text here'} screen_name_a = request.form["screen_name_a"] screen_name_b = request.form["screen_name_b"] tweet_text = request.form["tweet_text"] print("-----------------") print("FETCHING TWEETS FROM THE DATABASE...") # todo: wrap in a try block in case the user's don't exist in the database user_a = User.query.filter_by(screen_name=screen_name_a).one() user_b = User.query.filter_by(screen_name=screen_name_b).one() user_a_tweets = user_a.tweets user_b_tweets = user_b.tweets #user_a_embeddings = [tweet.embedding for tweet in user_a_tweets] #user_b_embeddings = [tweet.embedding for tweet in user_b_tweets] print("USER A", user_a.screen_name, len(user_a.tweets)) print("USER B", user_b.screen_name, len(user_b.tweets)) print("-----------------") print("TRAINING THE MODEL...") embeddings = [] labels = [] for tweet in user_a_tweets: labels.append(user_a.screen_name) embeddings.append(tweet.embedding) for tweet in user_b_tweets: labels.append(user_b.screen_name) embeddings.append(tweet.embedding) pipeline = make_pipeline( ce.OrdinalEncoder(), DecisionTreeClassifier(min_samples_leaf=3, random_state=42, max_depth=9)) pipeline.fit(embeddings, labels) print("classifier training score:", pipeline.score(embeddings, labels)) print("-----------------") print("MAKING A PREDICTION...") #result_a = classifier.predict([user_a_tweets[0].embedding]) #result_b = classifier.predict([user_b_tweets[0].embedding]) example_embedding = basilica_api_client.embed_sentence(tweet_text, model="twitter") result = pipeline.predict([example_embedding]) #breakpoint() #return jsonify({"message": "RESULTS", "most_likely": result[0]}) return render_template("prediction_results.html", screen_name_a=screen_name_a, screen_name_b=screen_name_b, tweet_text=tweet_text, screen_name_most_likely=result[0])
def predict(): print("PREDICT ROUTE...") print("FORM DATA:", dict(request.form)) #> {'screen_name_a': 'elonmusk', 'screen_name_b': 's2t2', 'tweet_text': 'Example tweet text here'} screen_name_a = request.form["screen_name_a"] print(screen_name_a) screen_name_b = request.form["screen_name_b"] print(screen_name_b) tweet_text = request.form["tweet_text"] print("-----------------") print("FETCHING TWEETS FROM THE DATABASE...") # h/t: https://flask-sqlalchemy.palletsprojects.com/en/2.x/queries/ # get the embeddings (from the database) user_a = User.query.filter_by(screen_name=screen_name_a).first() user_b = User.query.filter_by(screen_name=screen_name_b).first() user_a_tweets = user_a.tweets print(user_a_tweets) user_b_tweets = user_b.tweets print(user_b_tweets) print("FETCHED TWEETS", len(user_a_tweets), len(user_b_tweets)) print("-----------------") print("TRAINING THE MODEL...") # X values / inputs: embeddings # Y values / labels: screen_names classifier = LogisticRegression() embeddings = [] labels = [] for tweet in user_a_tweets: embeddings.append(tweet.embedding) labels.append(screen_name_a) # tweet.user.screen_name for tweet in user_b_tweets: embeddings.append(tweet.embedding) labels.append(screen_name_b) # tweet.user.screen_name classifier.fit(embeddings, labels) print("-----------------") print("MAKING A PREDICTION...") #example_embed_a = user_a_tweets[3].embedding #example_embed_b = user_b_tweets[3].embedding #result = classifier.predict([example_embed_a, example_embed_b]) embedding = basilica_api_client.embed_sentence(tweet_text, model="twitter") result = classifier.predict([embedding]) return render_template("prediction_results.html", screen_name_a=screen_name_a, screen_name_b=screen_name_b, tweet_text=tweet_text, screen_name_most_likely=result[0] )
def predict(): print("PREDICT ROUTE...") print("FORM DATA:", dict(request.form)) screen_name_a = request.form["screen_name_a"] screen_name_b = request.form["screen_name_b"] tweet_text = request.form["tweet_text"] print("-----------------") print("FETCHING TWEETS FROM THE DATABASE...") # todo: wrap in a try block in case the user's don't exist in the database # f"SELECT * FROM users WHERE screen_name = {screen_name_a}" user_a = User.query.filter_by(screen_name=screen_name_a).one() user_b = User.query.filter_by(screen_name=screen_name_b).one() user_a_tweets = user_a.tweets # Tweet.query.filter_by(user_id = user_a.id).one() user_b_tweets = user_b.tweets # Tweet.query.filter_by(user_id = user_b.id).one() # user_a_embeddings = [tweet.embedding for tweet in user_a_tweets] # user_b_embeddings = [tweet.embedding for tweet in user_b_tweets] print("USER A", user_a.screen_name, len(user_a.tweets)) print("USER B", user_b.screen_name, len(user_b.tweets)) print("-----------------") print("TRAINING THE MODEL...") embeddings = [] labels = [] for tweet in user_a_tweets: embeddings.append(tweet.embedding) labels.append(user_a.screen_name) for tweet in user_b_tweets: embeddings.append(tweet.embedding) labels.append(user_b.screen_name) classifier = LogisticRegression() classifier.fit(embeddings, labels) print("-----------------") print("MAKING A PREDICTION...") # result_a = classifier.predict([user_a_tweets[0].embedding]) # result_b = classifier.predict([user_b_tweets[0].embedding]) # results = classifier.predict([embeddings[0]])[0] #> elon example_embedding = basilica_connection.embed_sentence(tweet_text, model="twitter") result = classifier.predict([example_embedding]) return render_template("prediction_results.html", screen_name_a=screen_name_a, screen_name_b=screen_name_b, tweet_text=tweet_text, screen_name_most_likely=result[0])
def twitoff_predict(): print(f"INFO: begin predict route processing") # Grab form data screen_name_a = request.form["screen_name_a"] screen_name_b = request.form["screen_name_b"] tweet_text = request.form["tweet_text"] print( f"INFO: name1: {screen_name_a} name2: {screen_name_b} tweet: {tweet_text}" ) # Grab tweet embeddings associated with the entered data tweet_embeddings = [] tweet_labels = [] # Fetch the objects for user a and user b from the database user_a = User.query.filter(User.screen_name == screen_name_a).one() user_b = User.query.filter(User.screen_name == screen_name_b).one() tweets_a = user_a.tweets tweets_b = user_b.tweets all_tweets = tweets_a + tweets_b # Iterate through tweets for tweet in all_tweets: if tweet.embedding == None or tweet.embedding == '': # Missing embedding value... let's skip continue tweet_embeddings.append(tweet.embedding) tweet_labels.append(tweet.user.screen_name) print("EMBEDDINGS:", len(tweet_embeddings), "LABELS:", len(tweet_labels)) # Define and fit a model print(f"INFO: generating a Logistic Regression model") classifier = LogisticRegression(random_state=0, solver="lbfgs", multi_class="multinomial") print(f"INFO: fitting the Logistic Regression model") print(f"INFO: just before the fit step") classifier.fit(tweet_embeddings, tweet_labels) print(f"INFO: just after the fit step") # Generate a prediction print(f"INFO: just before the basilica step") example_tweet_embedding = basilica_conn.embed_sentence(tweet_text, model="twitter") print(f"INFO: just before the prediction step") result = classifier.predict([example_tweet_embedding]) print(f"INFO: just after the prediction step") return render_template("prediction_results.html", screen_name_a=screen_name_a, screen_name_b=screen_name_b, tweet_text=tweet_text, screen_name_most_likely=result[0])
def predict(): print("PREDICT ROUTE...") print("FORM DATA:", dict(request.form)) #> {'screen_name_a': 'elonmusk', 'screen_name_b': 's2t2', 'tweet_text': 'Example tweet text here'} screen_name_a = request.form["screen_name_a"] screen_name_b = request.form["screen_name_b"] tweet_text = request.form["tweet_text"] print("-----------------") print("FETCHING TWEETS FROM THE DATABASE...") # todo: wrap in a try block in case the user's don't exist in the database user_a = User.query.filter_by(screen_name = screen_name_a).one() user_b = User.query.filter_by(screen_name = screen_name_b).one() user_a_tweets = user_a.tweets user_b_tweets = user_b.tweets #user_a_embeddings = [tweet.embedding for tweet in user_a_tweets] #user_b_embeddings = [tweet.embedding for tweet in user_b_tweets] print("USER A", user_a.screen_name, len(user_a.tweets)) print("USER B", user_b.screen_name, len(user_b.tweets)) # consider returning a warning message/ redirect if data isn't in the database print("-----------------") print("TRAINING THE MODEL...") embeddings = [] labels = [] for tweet in user_a_tweets: labels.append(user_a.screen_name) embeddings.append(tweet.embedding) for tweet in user_b_tweets: labels.append(user_b.screen_name) embeddings.append(tweet.embedding) classifier = LogisticRegression() # for example classifier.fit(embeddings, labels) print("-----------------") print("MAKING A PREDICTION...") #result_a = classifier.predict([user_a_tweets[0].embedding]) #result_b = classifier.predict([user_b_tweets[0].embedding]) example_embedding = basilica_api_client.embed_sentence(tweet_text,model="twitter") result = classifier.predict([example_embedding]) #breakpoint() #return jsonify({"message": "RESULTS", "most_likely": result[0]}) return render_template("prediction_results.html", screen_name_a=screen_name_a, screen_name_b=screen_name_b, tweet_text=tweet_text, screen_name_most_likely= result[0] )
def twitoff_predict(): print("PREDICT ROUTE...") print("FORM DATA:", dict(request.form)) #> {'screen_name_a': 'elonmusk', 'screen_name_b': 'Cardstud', 'tweet_text': 'Example tweet text here'} screen_name_a = request.form["screen_name_a"] screen_name_b = request.form["screen_name_b"] tweet_text = request.form["tweet_text"] print(screen_name_a, screen_name_b, tweet_text) # # train a model # # classifier.fit(X, y) tweet_embeddings = [] tweet_labels = [] # from db we need users and tweets user_a = User.query.filter(User.screen_name == screen_name_a).one() user_b = User.query.filter(User.screen_name == screen_name_b).one() tweets_a = user_a.tweets tweets_b = user_b.tweets all_tweets = tweets_a + tweets_b for tweet in all_tweets: tweet_embeddings.append(tweet.embedding) tweet_labels.append(tweet.user.screen_name) print("EMBEDDINGS", len(tweet_embeddings), "LABELS", len(tweet_labels)) classifier = LogisticRegression(random_state=0, solver='lbfgs', multi_class="multinomial") classifier.fit(tweet_embeddings, tweet_labels) # # make and return a prediction # example_tweet_embedding = basilica_connection.embed_sentence( tweet_text, model="twitter") result = classifier.predict([example_tweet_embedding]) print("RESULT:", result[0]) return render_template("prediction_results.html", screen_name_a=screen_name_a, screen_name_b=screen_name_b, tweet_text=tweet_text, screen_name_most_likely=result[0])
def twitoff_predict(): print("PREDICT ROUTE...") print("FORM DATA:", dict(request.form)) #> {'screen_name_a': 'elonmusk', 'screen_name_b': 's2t2', 'tweet_text': 'Example tweet text here'} screen_name_a = request.form["screen_name_a"] screen_name_b = request.form["screen_name_b"] tweet_text = request.form["tweet_text"] print(screen_name_a, screen_name_b, tweet_text) # # train a model # tweet_embeddings = [] tweet_labels = [] user_a = User.query.filter(User.screen_name == screen_name_a).one() user_b = User.query.filter(User.screen_name == screen_name_b).one() tweets_a = user_a.tweets tweets_b = user_b.tweets all_tweets = tweets_a + tweets_b # [1,2,3] + [4,5,6] # = [1, 2, 3, 4, 5, 6] for tweet in all_tweets: # add embeddings and the corresponding user name to the lists above tweet_embeddings.append(tweet.embedding) tweet_labels.append(tweet.user.screen_name) print('EMBEDDINGS:', len(tweet_embeddings), 'LABELS:', len(tweet_labels)) classifier = LogisticRegression(random_state=42, solver='lbfgs', multi_class='multinomial') classifier.fit(tweet_embeddings, tweet_labels) # fetch - refer to book_routes.py # TODO: make a prediction and return it example_tweet_embedding = basilica_connection.embed_sentence(tweet_text, model='twitter') result = classifier.predict([example_tweet_embedding]) # Reshape your data either using array.reshape(-1, 1) if your data has a single feature # or array.reshape(1, -1) if it contains a single sample. # fix - put 'example_tweet_embedding' in brackets to make it into a list print('RESULT:', result[0]) return render_template('prediction_results.html', screen_name_a=screen_name_a, screen_name_b=screen_name_b, tweet_text=tweet_text, screen_name_most_likely=result[0] )
def predict(): print("PREDICT ROUTE...") print("FORM DATA:", dict(request.form)) screen_name_a = request.form["screen_name_a"] screen_name_b = request.form["screen_name_b"] tweet_text = request.form["tweet_text"] print("-----------------") print("FETCHING TWEETS FROM THE DATABASE...") # get the tweets from the database: user_a = User.query.filter_by(screen_name=screen_name_a).first() user_b = User.query.filter_by(screen_name=screen_name_b).first() user_a_tweets = user_a.tweets user_b_tweets = user_b.tweets print("fetched tweets", len(user_a_tweets), len(user_b_tweets)) print("-----------------") print("TRAINING THE MODEL...") # x values / inputs : embeddings # y values / labels : screen_names classifier = LogisticRegression() embeddings = [] labels = [] for tweet in user_a_tweets: embeddings.append(tweet.embedding) labels.append(screen_name_a) for tweet in user_b_tweets: embeddings.append(tweet.embedding) labels.append(screen_name_b) classifier.fit(embeddings, labels) print("-----------------") print("MAKING A PREDICTION...") embedding = basilica_api_client.embed_sentence(tweet_text, model="twitter") result = classifier.predict([embedding]) return render_template("prediction_results.html", screen_name_a=screen_name_a, screen_name_b=screen_name_b, tweet_text=tweet_text, screen_name_most_likely=result[0])
def get_user(screen_name=None): print(screen_name) twitter_user = twitter_api_client.get_user(screen_name) statuses = twitter_api_client.user_timeline(screen_name, tweet_mode="extended", count=150, exclude_replies=True, include_rts=False) print("STATUSES COUNT:", len(statuses)) #return jsonify({"user": user._json, "tweets": [s._json for s in statuses]}) # get existing user from the db or initialize a new one: db_user = User.query.get(twitter_user.id) or User(id=twitter_user.id) db_user.screen_name = twitter_user.screen_name db_user.name = twitter_user.name db_user.location = twitter_user.location db_user.followers_count = twitter_user.followers_count db.session.add(db_user) db.session.commit() #return "OK" #breakpoint() # all_tweet_texts = [status.full_text for status in statuses] # embeddings = list(basilica_api_client.embed_sentences(all_tweet_texts, model="twitter")) # print("NUMBER OF EMBEDDINGS", len(embeddings)) # TODO: explore using the zip() function maybe... # counter = 0 for status in statuses: print(status.full_text) print("----") #print(dir(status)) # get existing tweet from the db or initialize a new one: db_tweet = Tweet.query.get(status.id) or Tweet(id=status.id) db_tweet.user_id = status.author.id # or db_user.id db_tweet.full_text = status.full_text embedding = basilica_api_client.embed_sentence( status.full_text, model="twitter" ) # todo: prefer to make a single request to basilica with all the tweet texts, instead of a request per tweet # embedding = embeddings[counter] print(len(embedding)) db_tweet.embedding = embedding db.session.add(db_tweet) # counter+=1 db.session.commit() return "OK"
def twitoff_prediction(): #set up our routes print("FORM DATA:", dict(request.form) ) #displays our request.form[""]. useful to check our data variables screen_name_a = request.form["screen_name_a"] screen_name_b = request.form["screen_name_b"] tweet_text = request.form["tweet_text"] screen_name_most_likely = "TODO" #breakpoint() Run prediction form on webapp. check terminal to see how to set up above variables. ##TRAIN MODEL: #instantiate the model. model = LogisticRegression() #get users. --similar to how we got them in from twitter_routes get_user() user_a = User.query.filter(User.screen_name == screen_name_a).one() user_b = User.query.filter(User.screen_name == screen_name_b).one() #get user tweets user_a_tweets = user_a.tweets user_b_tweets = user_b.tweets embeddings = [] #get connection from basilica_service to retrieve labels = [] all_tweets = user_a_tweets + user_b_tweets for tweet in all_tweets: embeddings.append( tweet.embedding) #.embedding comes from the Tweet class model labels.append(tweet.user.screen_name) #breakpoint() -- play with model is it working. model.fit(embeddings, labels) ##MAKE PREDICTION example_embedding = basilica_connection.embed_sentence(tweet_text, model="twitter") result = model.predict([example_embedding]) screen_name_most_likely = result[0] return render_template("prediction_results.html", screen_name_a=screen_name_a, screen_name_b=screen_name_b, tweet_text=tweet_text, screen_name_most_likely=screen_name_most_likely)
def twitoff_prediction(): print("FORM DATA:", dict(request.form)) screen_name_a = request.form["screen_name_a"] screen_name_b = request.form["screen_name_b"] tweet_text = request.form["tweet_text"] # # TRAIN THE MODEL # # inputs: embeddings for each tweet # labels: screen name for each tweet model = LogisticRegression(max_iter=1000) user_a = User.query.filter(User.screen_name == screen_name_a).one() user_b = User.query.filter(User.screen_name == screen_name_b).one() user_a_tweets = user_a.tweets user_b_tweets = user_b.tweets embeddings = [] labels = [] all_tweets = user_a_tweets + user_b_tweets for tweet in all_tweets: embeddings.append(tweet.embedding) labels.append(tweet.user.screen_name) model.fit(embeddings, labels) # # MAKE PREDICTION # example_embedding = basilica_connection.embed_sentence(tweet_text, model="twitter") result = model.predict([example_embedding]) screen_name_most_likely = result[0] return render_template("prediction_results.html", screen_name_a=screen_name_a, screen_name_b=screen_name_b, tweet_text=tweet_text, screen_name_most_likely=screen_name_most_likely )
def tweetoff_prediction_results(): print("FORM DATA:", dict(request.form)) screen_name_a = request.form["screen_name_a"] screen_name_b = request.form["screen_name_b"] tweet_text = request.form["tweet_text"] model = LogisticRegression(max_iter=1000) user_a = User.query.filter(User.screen_name == screen_name_a).one() user_b = User.query.filter(User.screen_name == screen_name_b).one() user_a_tweets = user_a.tweets user_b_tweets = user_b.tweets embeddings = [] labels = [] # all_tweets = user_a_tweets + user_b_tweets # for tweet in all_tweets: # embeddings.append(tweet.embedding) # labels.append(tweet.user.screen_name) for tweet in user_a_tweets: labels.append(user_a.screen_name) embeddings.append(tweet.embedding) for tweet in user_b_tweets: labels.append(user_b.screen_name) embeddings.append(tweet.embedding) model.fit(embeddings, labels) example_embedding = basilica_connection.embed_sentence(tweet_text, model="twitter") result = model.predict([example_embedding]) screen_name_most_likely = result[0] return render_template("prediction_results.html", screen_name_a=screen_name_a, screen_name_b=screen_name_b, tweet_text=tweet_text, screen_name_most_likely=screen_name_most_likely)
def fetch_user_data(screen_name): print("FETCHING...", screen_name) # # fetch user info # user = twitter_api.get_user(screen_name) # # store user in database # db_user = User.query.get(user.id) or User(id=user.id) db_user.screen_name = user.screen_name db_user.name = user.name db_user.location = user.location db_user.followers_count = user.followers_count db.session.add(db_user) db.session.commit() # # fetch their tweets # #statuses = twitter_api.user_timeline(screen_name, tweet_mode="extended", count=35, exclude_replies=True, include_rts=False) statuses = twitter_api.user_timeline(screen_name, tweet_mode="extended", count=50) print("STATUSES", len(statuses)) # # fetch embedding for each tweet (will give us a list of lists) # tweet_texts = [status.full_text for status in statuses] embeddings = list( basilica_connection.embed_sentences(tweet_texts, model="twitter")) print("NUMBER OF EMBEDDINGS", len(embeddings)) # # store tweets in database (w/embeddings) # #counter =0 for index, status in enumerate(statuses): print(status.full_text) print("----") db_tweet = Tweet.query.get(status.id) or Tweet(id=status.id) db_tweet.user_id = status.author.id # or db_user.id db_tweet.full_text = status.full_text # # fetching corresponding embedding # embedding = basilica_connection.embed_sentence(status.full_text, model="twitter") #embedding = embeddings[counter] embedding = embeddings[index] #print(len(embedding)) db_tweet.embedding = embedding db.session.add(db_tweet) #counter+=1 db.session.commit() return f"FETCHED {screen_name} OK"
def predict(): print("PREDICT ROUTE...") print("FORM DATA:", dict(request.form)) #> {'screen_name_a': 'elonmusk', 'screen_name_b': 's2t2', 'tweet_text': 'Example tweet text here'} screen_name_a = request.form["screen_name_a"] screen_name_b = request.form["screen_name_b"] tweet_text = request.form["tweet_text"] print(screen_name_a, screen_name_b, tweet_text) # Train the model # Fetch users and tweets from the database print("-----------------") print("FETCHING TWEETS FROM THE DATABASE...") # todo: wrap in a try block in case the user's don't exist in the database # Filter so that we select only the username equal to screen_name_a (the first one the user picked on the form) # No row was found for one() user_a = User.query.filter_by(screen_name=screen_name_a).one_or_none() # try: .first() user_b = User.query.filter_by(screen_name=screen_name_b).one_or_none() # also try: one or none() # no error handling required # Select only tweets from user a and user b user_a_tweets = user_a.tweets # still getting 'NoneType' object has no attribute 'tweet' user_b_tweets = user_b.tweets #user_a_embeddings = [tweet.embedding for tweet in user_a_tweets] #user_b_embeddings = [tweet.embedding for tweet in user_b_tweets] print("USER A", user_a.screen_name, len(user_a.tweets)) print("USER B", user_b.screen_name, len(user_b.tweets)) # consider returning a warning message / redirect if the data isn't in the database print("-----------------") print("TRAINING THE MODEL...") embeddings = [] labels = [] for tweet in user_a_tweets: labels.append(user_a.screen_name) embeddings.append(tweet.embedding) for tweet in user_b_tweets: labels.append(user_b.screen_name) embeddings.append(tweet.embedding) classifier = LogisticRegression() # for example classifier.fit(embeddings, labels) print("-----------------") print("MAKING A PREDICTION...") #result_a = classifier.predict([user_a_tweets[0].embedding]) #result_b = classifier.predict([user_b_tweets[0].embedding]) #results = classifier.predict([embeddings[0]])[0] #> elon # Use Basilica connection to embed input tweet, use Basilica model trained on twitter example_embedding = basilica_api_client.embed_sentence(tweet_text, model="twitter") result = classifier.predict([example_embedding]) #breakpoint() # Direct user to prediction result page after running prediction #return jsonify({"message": "RESULTS", "most_likely": result[0]}) return render_template("prediction_results.html", screen_name_a=screen_name_a, screen_name_b=screen_name_b, tweet_text=tweet_text, screen_name_most_likely= result[0] )
def get_user(screen_name=None): print(screen_name) # Use the input screen name to get user info and save as attributes of twitter_user # (using get_user method on twitter api) twitter_user = twitter_api_client.get_user(screen_name) # Use the input screen name to get latest tweets (using user_timeline method on twitter_api) # exclude_replies=True, include_rts=False - excludes replies and retweets - causes a different amount of tweets per user # Collect 150 latest tweets from each user statuses = twitter_api_client.user_timeline(screen_name, tweet_mode="extended", count=150) print("STATUSES COUNT:", len(statuses)) #return jsonify({"user": user._json, "tweets": [s._json for s in statuses]}) ## ADD USER TABLE INFORMATION TO DB # Get existing user from the db or initialize a new one if it doesn't exist yet: db_user = User.query.get(twitter_user.id) or User(id=twitter_user.id) db_user.screen_name = twitter_user.screen_name db_user.name = twitter_user.name db_user.location = twitter_user.location db_user.followers_count = twitter_user.followers_count # Add and commit changes to the database user table db.session.add(db_user) db.session.commit() #return "OK" #breakpoint() # ADD TWEETS TO TWEET TABLE IN DB # List comprehension to create a list of strings to feed to Basilica model all_tweet_texts = [status.full_text for status in statuses] # Use Basilica to embed the written words of the tweets as numeric values # Define twitter specific model for Basilica to use embeddings = list( basilica_api_client.embed_sentences(all_tweet_texts, model="twitter")) print("NUMBER OF EMBEDDINGS", len(embeddings)) # Store each tweet in the database # For each tweet in the list of tweets pulled from the twitter api above and stored as statuses counter = 0 for status in statuses: print(status.full_text) print("----") # Get existing tweet from the db or initialize a new one: db_tweet = Tweet.query.get(status.id) or Tweet(id=status.id) # Set user_id and the full text of the tweet attribute of instance db_tweet.user_id = status.author.id # or db_user.id db_tweet.full_text = status.full_text embedding = basilica_api_client.embed_sentence( status.full_text, model="twitter" ) # todo: prefer to make a single request to basilica with all the tweet texts, instead of a request per tweet embedding = embeddings[counter] print(len(embedding)) # Set the corresponding embedding from our list of embeddings db_tweet.embedding = embeddings[counter] # Add the tweet to the database db.session.add(db_tweet) # We are using the counter to identify the series of tweets counter += 1 # Commit changes to database table db.session.commit() #breakpoint() # This is what the front end website will see #return "User added" return render_template("user.html", user=db_user, tweets=statuses) # tweets=db_tweets
def predict(): print("PREDICT ROUTE...") print("FORM DATA:", dict(request.form)) screen_name_a = request.form["screen_name_a"] screen_name_b = request.form["screen_name_b"] tweet_text = request.form["tweet_text"] print("-----------------") print("FETCHING TWEETS FROM THE DATABASE...") # h/t: https://flask-sqlalchemy.palletsprojects.com/en/2.x/queries/ #get the embeddings from the database or fetch from Twitter if User.query.filter_by(screen_name=screen_name_a).first() is None: fetch_user(screen_name_a) print("User ", screen_name_a, "fetched") user_a = User.query.filter_by(screen_name=screen_name_a).first() print("user_a", user_a) if User.query.filter_by(screen_name=screen_name_b).first() is None: fetch_user(screen_name_b) print("User ", screen_name_a, "fetched") user_b = User.query.filter_by(screen_name=screen_name_b).first() print("user_b", user_b) user_a_tweets = user_a.tweets user_b_tweets = user_b.tweets print("FETCHED TWEETS", len(user_a_tweets), len(user_b_tweets)) print("-----------------") print("TRAINING THE MODEL...") classifier = LogisticRegression() # X values / inputs: embeddings # Y values / labels: screen_names embeddings = [] labels = [] for tweet in user_a_tweets: embeddings.append(tweet.embedding) labels.append(screen_name_a) # or tweet.user.screen_name for tweet in user_b_tweets: embeddings.append(tweet.embedding) labels.append(screen_name_b) # or tweet.user.screen_name classifier.fit(embeddings, labels) print("-----------------") print("MAKING A PREDICTION...") # example_embed_a = user_a_tweets[3].embedding # example_embed_b = user_b_tweets[3].embedding # result = classifier.predict([example_embed_a, example_embed_b]) embedding = basilica_api_client.embed_sentence(tweet_text, model="twitter") result = classifier.predict([embedding]) image_screen_name = result[0] image_user = User.query.filter_by(screen_name=image_screen_name).first() image = image_user.picture return render_template("prediction_results.html", screen_name_a=screen_name_a, screen_name_b=screen_name_b, tweet_text=tweet_text, screen_name_most_likely=result[0], image=image)
def predict(): print("PREDICT ROUTE...") print("FORM DATA:", dict(request.form)) #> {'screen_name_a': 'elonmusk', 'screen_name_b': 's2t2', 'tweet_text': 'Example tweet text here'} screen_name_a = request.form["screen_name_a"] screen_name_b = request.form["screen_name_b"] tweet_text = request.form["tweet_text"] #return "OK (TODO)" print("-----------------") print("FETCHING TWEETS FROM THE DATABASE...") # todo: wrap in a try block in case the user's don't exist in the database user_a = User.query.filter(User.screen_name == screen_name_a).one() user_b = User.query.filter(User.screen_name == screen_name_b).one() user_a_tweets = user_a.tweets user_b_tweets = user_b.tweets #user_a_embeddings = [tweet.embedding for tweet in user_a_tweets] #user_b_embeddings = [tweet.embedding for tweet in user_b_tweets] print("USER A", user_a.screen_name, len(user_a.tweets)) print("USER B", user_b.screen_name, len(user_b.tweets)) #breakpoint() # take the same number of tweets for each user #min_tweets = min(len(user_a.tweets), len(user_b.tweets)) #user_a_tweets = user_a_tweets[:min_tweets] #user_b_tweets = user_b_tweets[:min_tweets] print("-----------------") print("TRAINING THE MODEL...") embeddings = [] labels = [] for tweet in user_a_tweets: labels.append(user_a.screen_name) embeddings.append(tweet.embedding) for tweet in user_b_tweets: labels.append(user_b.screen_name) embeddings.append(tweet.embedding) #breakpoint() # inspect the x and y values to make sure they are the best format for training # maybe need to impute? classifier = LogisticRegression(random_state=0, solver='lbfgs') # for example classifier.fit(embeddings, labels) # todo: make sure there are an even number of tweets for each user #breakpoint() print("-----------------") print("MAKING A PREDICTION...") #result_a = classifier.predict([user_a_tweets[0].embedding]) #result_b = classifier.predict([user_b_tweets[0].embedding]) example_embedding = basilica_client.embed_sentence(tweet_text, model="twitter") result = classifier.predict([example_embedding]) #breakpoint() #return jsonify({"message": "RESULTS", "most_likely": result[0]}) return render_template("results.html", screen_name_a=screen_name_a, screen_name_b=screen_name_b, tweet_text=tweet_text, screen_name_most_likely=result[0] )