def get_tweet_sentiments_scores(movieId): """ Function to assign sentiment scores to movie tweets using movieId. :param movieId: integer containing movieId from movies tablie :return pandas dataframe of movie tweets with sentiment scores and classifications """ #initialise VADER analyser = SentimentIntensityAnalyzer() #get movie tweets from db tweets = database_helper.select_geo_tweets(movieId) #list to convert to df for return sent_lst = [] for index, row in tweets.iterrows(): #get tweet sentiment sentiment = analyser.polarity_scores(row['msg']) lst_row = { "id": row["id"], "negative_scr": sentiment["neg"], "positive_scr": sentiment["pos"], "neutral_scr": sentiment["neu"], "compound_scr": sentiment["compound"], "senti_class": classify_sentiment(sentiment) } sent_lst.append(lst_row) #return df of movie tweets return pd.DataFrame(sent_lst)
def get_tweet_regions(movieId): """ Function to retreive movie tweets with their respectiv region names :param movieId: integer containing movieId from movies tablie :return geopandas data frame of regional movie tweets """ #get geodataframe of movie tweets tweets = database_helper.select_geo_tweets(movieId) tweets.dropna(subset=["geombng"], inplace=True) #use Ordnance Survey shapefile to attach region names gb = gpd.read_file("../../ProjectData/Data/GB/european_region_region.shp") gb_tweets = sjoin(tweets, gb, how='inner') gb_tweets["region"] = gb_tweets["NAME"].str.replace("Euro Region", "").str.strip() #return geodataframe of region tagged tweets return gb_tweets
# test = sns.kdeplot(gb_tweets['lng'], gb_tweets['lat'], # shade=True, shade_lowest=False, cmap='viridis', # ax=ax) # ax.set_axis_off() # plt.axis('equal') # plt.title(self.title + " Tweet Density") # plt.show() # plt.clf() # plt.cla() # plt.close() fig, ax = plt.subplots(1, figsize=(9, 9)) gb = gpd.read_file("../../ProjectData/Data/GB/european_region_region.shp") tweets = database_helper.select_geo_tweets(1) tweets.dropna(subset=["geombng"], inplace=True) gb_tweets = sjoin(tweets, gb, how='inner') gb_tweets["lat"] = gb_tweets["geombng"].y gb_tweets["lng"] = gb_tweets["geombng"].x grid = database_helper.select_uk_fishnet() grid["minx"] = grid["geombng"].bounds.minx grid["miny"] = grid["geombng"].bounds.miny grid["maxx"] = grid["geombng"].bounds.maxx grid["maxy"] = grid["geombng"].bounds.maxy gb["minx"] = grid["geombng"].bounds.minx gb["miny"] = grid["geombng"].bounds.miny gb["maxx"] = grid["geombng"].bounds.maxx
from colour import Color import scipy.signal from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer from nameparser import HumanName from datetime import datetime from datetime import timedelta import scipy.ndimage as ndi movie = movie_helper.get_movie_by_id(28) #start_date = datetime.combine((movie.ukReleaseDate - timedelta(days=14)), datetime.min.time()) #end_date = datetime.combine((movie.first_run_end - timedelta(days=14)), datetime.min.time()) start_date = None end_date = None tweets = database_helper.select_geo_tweets(movie.movieId, start_date, end_date) tweets = tweets.sort_values(by=['created_at']).reset_index() times = tweets['created_at'] times_tot_mins = 24*60 - (60*np.array([t.hour for t in times]) + np.array([t.minute for t in times])) seps=np.array([(times[i]-times[i-1]).total_seconds() for i in range(1,len(times))]) seps[seps==0]=1 # convert zero second separations to 1-second separations seps = seps.astype(int) sep_array=np.zeros((len(seps)-1,2)) # 1st column: x-coords, 2nd column: y-coords sep_array[:,0]=seps[:-1] sep_array[:,1]=seps[1:] sep_array = sep_array.astype(int)