def main(result_dict, df, chat_members, running_all_functions): result_dict['average word length'] = [] if not running_all_functions: df['is attachment?'] = df['type'].apply(helpers.is_attachment) df['is link?'] = df['text'].apply(helpers.is_link) df['word length'] = df['text'].apply(helpers.average_word_length) for member_name in chat_members: initialize_result_dict(member_name, df, result_dict) average_word_length = df[(df['sender'] == member_name) & (~df['is reaction?']) & (~df['is attachment?']) & (~df['is link?'])]['word length'].mean() if math.isnan(average_word_length): average_word_length = 0 result_dict['average word length'].append(round( average_word_length, 1))
def main(result_dict, df, chat_members): result_dict['messages that are games'] = [] result_dict['% of messages that are games'] = [] result_dict['messages that are game starts'] = [] result_dict['% of game starts that are by this person'] = [] df['is game message?'] = df.apply( lambda msg: helpers.is_game_message(msg.text, msg.type), axis=1) df['is game start?'] = df.apply( lambda msg: helpers.is_game_start(msg.text, msg.type), axis=1) for member_name in chat_members: _, non_reaction_messages = initialize_result_dict( member_name, df, result_dict) game_messages = len(df[(df['is game message?']) & (df['sender'] == member_name)]) game_starts = len(df[(df['is game start?']) & (df['sender'] == member_name)]) result_dict['messages that are games'].append(game_messages) result_dict['% of messages that are games'].append( round( helpers.safe_divide(game_messages, non_reaction_messages) * 100, 2)) result_dict['messages that are game starts'].append(game_starts) total_game_starts = sum(result_dict['messages that are game starts']) for i in range(len(result_dict['messages that are game starts'])): result_dict['% of game starts that are by this person'].append( round( helpers.safe_divide( result_dict['messages that are game starts'][i], total_game_starts) * 100, 2))
def main(result_dict, df, chat_members, minutes_threshold): result_dict['conversation starters'] = [] result_dict['% of all conversation starters that are by this person'] = [] df['is conversation starter?'] = df['time'].diff().apply( lambda diff: helpers.is_conversation_starter(diff, minutes_threshold)) df.iloc[0, df.columns.get_loc('is conversation starter?')] = True for member_name in chat_members: initialize_result_dict(member_name, df, result_dict) conversation_starters = len(df[(df['is conversation starter?']) & (df['sender'] == member_name) & (~df['is reaction?'])]) result_dict['conversation starters'].append(conversation_starters) total_conversation_starters = sum(result_dict['conversation starters']) for i in range(len(result_dict['conversation starters'])): result_dict[ '% of all conversation starters that are by this person'].append( round( helpers.safe_divide( result_dict['conversation starters'][i], total_conversation_starters) * 100, 2))
def main(result_dict, df, chat_members): result_dict['total messages'] = [] result_dict['% of all messages that are by this person'] = [] for member_name in chat_members: total_messages, _ = initialize_result_dict(member_name, df, result_dict) result_dict['total messages'].append(total_messages) total_messages = sum(result_dict['total messages']) for i in range(len(result_dict['total messages'])): result_dict['% of all messages that are by this person'].append( round( helpers.safe_divide(result_dict['total messages'][i], total_messages) * 100, 2))
def main(result_dict, df, chat_members): result_dict['messages that are tweets'] = [] result_dict['% of messages that are tweets'] = [] df['is tweet?'] = df['text'].apply(helpers.is_tweet) for member_name in chat_members: _, non_reaction_messages = initialize_result_dict(member_name, df, result_dict) tweet_messages = len( df[(df['is tweet?']) & (df['sender'] == member_name)] ) result_dict['messages that are tweets'].append(tweet_messages) result_dict['% of messages that are tweets'].append( round(helpers.safe_divide(tweet_messages, non_reaction_messages) * 100, 2) )
def main(result_dict, df, chat_members): result_dict['messages that contain emoji'] = [] result_dict['% of messages that include emoji'] = [] df['includes emoji?'] = df['text'].apply(helpers.includes_emoji) for member_name in chat_members: _, non_reaction_messages = initialize_result_dict( member_name, df, result_dict) emoji_messages = len(df[(df['includes emoji?']) & (df['sender'] == member_name)]) result_dict['messages that contain emoji'].append(emoji_messages) result_dict['% of messages that include emoji'].append( round( helpers.safe_divide(emoji_messages, non_reaction_messages) * 100, 2))
def main(result_dict, df, chat_members): result_dict['all caps messages'] = [] result_dict['% of messages that are all caps'] = [] df['is all caps?'] = df['text'].apply(helpers.is_all_caps) for member_name in chat_members: _, non_reaction_messages = initialize_result_dict( member_name, df, result_dict) all_caps_messages = len(df[(df['is all caps?']) & (df['sender'] == member_name)]) result_dict['all caps messages'].append(all_caps_messages) result_dict['% of messages that are all caps'].append( round( helpers.safe_divide(all_caps_messages, non_reaction_messages) * 100, 2))
def main(result_dict, df, chat_members): result_dict['attachment messages'] = [] result_dict['% of messages that are attachments'] = [] df['is attachment?'] = df['type'].apply(helpers.is_attachment) for member_name in chat_members: _, non_reaction_messages = initialize_result_dict( member_name, df, result_dict) attachment_messages = len(df[(df['is attachment?']) & (df['sender'] == member_name)]) result_dict['attachment messages'].append(attachment_messages) result_dict['% of messages that are attachments'].append( round( helpers.safe_divide(attachment_messages, non_reaction_messages) * 100, 2))
def main(result_dict, df, chat_members, phrase, case_sensitive, separate, regex): if phrase is None: raise Exception('Function is phrase but not given a phrase') result_dict[f'messages that contain {phrase}'] = [] result_dict[f'% of messages that contain {phrase}'] = [] df[f'includes {phrase}?'] = df['text'].apply( lambda msg: helpers.is_phrase_in(phrase, msg, case_sensitive, separate, regex)) for member_name in chat_members: _, non_reaction_messages = initialize_result_dict( member_name, df, result_dict) word_messages = len(df[(df[f'includes {phrase}?']) & (df['sender'] == member_name)]) result_dict[f'messages that contain {phrase}'].append(word_messages) result_dict[f'% of messages that contain {phrase}'].append( round( helpers.safe_divide(word_messages, non_reaction_messages) * 100, 2))
def main(result_dict, df, chat_members, running_all_functions, minutes_threshold): result_dict['total # of message series'] = [] result_dict['total messages'] = [] result_dict['average messages per series'] = [] if not running_all_functions: df['is conversation starter?'] = df['time'].diff().apply( lambda diff: helpers.is_conversation_starter( diff, minutes_threshold)) df.iloc[0, df.columns.get_loc('is conversation starter?')] = True df['is new message series?'] = df['sender'].apply(lambda x: True) df['is new message series?'] = df['is new message series?'].shift().where( df['sender'].shift() != df['sender'], False) df.iloc[0, df.columns.get_loc('is new message series?')] = True for member_name in chat_members: total_messages, _ = initialize_result_dict(member_name, df, result_dict) message_series = len(df[((df['is new message series?']) | (df['is conversation starter?'])) & (df['sender'] == member_name)]) result_dict['total # of message series'].append(message_series) result_dict['total messages'].append(total_messages) result_dict['average messages per series'].append( round(helpers.safe_divide(total_messages, message_series), 2))
def main(result_dict, df, chat_members): result_dict['total messages'] = [] result_dict['non-reaction messages'] = [] result_dict['% of all non-reaction messages that are by this person'] = [] result_dict['reaction messages'] = [] result_dict['% of all reaction messages that are by this person'] = [] result_dict['% of messages that are reactions'] = [] result_dict['reactions'] = [] result_dict['like reacts'] = [] result_dict['% of reactions that are like reacts'] = [] result_dict['love reacts'] = [] result_dict['% of reactions that are love reacts'] = [] result_dict['dislike reacts'] = [] result_dict['% of reactions that are dislike reacts'] = [] result_dict['laugh reacts'] = [] result_dict['% of reactions that are laugh reacts'] = [] result_dict['emphasis reacts'] = [] result_dict['% of reactions that are emphasis reacts'] = [] result_dict['question reacts'] = [] result_dict['% of reactions that are question reacts'] = [] df['reaction action'] = df['text'].apply(helpers.reaction_action) df['like react action'] = df['text'].apply(helpers.like_react_action) df['love react action'] = df['text'].apply(helpers.love_react_action) df['dislike react action'] = df['text'].apply(helpers.dislike_react_action) df['laugh react action'] = df['text'].apply(helpers.laugh_react_action) df['emphasis react action'] = df['text'].apply( helpers.emphasis_react_action) df['question react action'] = df['text'].apply( helpers.question_react_action) for member_name in chat_members: total_messages, non_reaction_messages = initialize_result_dict( member_name, df, result_dict) result_dict['total messages'].append(total_messages) result_dict['reaction messages'].append(total_messages - non_reaction_messages) result_dict['non-reaction messages'].append(non_reaction_messages) result_dict['% of messages that are reactions'].append( round( (1 - helpers.safe_divide(non_reaction_messages, total_messages)) * 100, 2)) reactions = int( df[df['sender'] == member_name]['reaction action'].sum()) like_reacts = int( df[df['sender'] == member_name]['like react action'].sum()) love_reacts = int( df[df['sender'] == member_name]['love react action'].sum()) dislike_reacts = int( df[df['sender'] == member_name]['dislike react action'].sum()) laugh_reacts = int( df[df['sender'] == member_name]['laugh react action'].sum()) emphasis_reacts = int( df[df['sender'] == member_name]['emphasis react action'].sum()) question_reacts = int( df[df['sender'] == member_name]['question react action'].sum()) result_dict['reactions'].append(reactions) result_dict['like reacts'].append(like_reacts) result_dict['% of reactions that are like reacts'].append( round(helpers.safe_divide(like_reacts, reactions) * 100, 2)) result_dict['love reacts'].append(love_reacts) result_dict['% of reactions that are love reacts'].append( round(helpers.safe_divide(love_reacts, reactions) * 100, 2)) result_dict['dislike reacts'].append(dislike_reacts) result_dict['% of reactions that are dislike reacts'].append( round(helpers.safe_divide(dislike_reacts, reactions) * 100, 2)) result_dict['laugh reacts'].append(laugh_reacts) result_dict['% of reactions that are laugh reacts'].append( round(helpers.safe_divide(laugh_reacts, reactions) * 100, 2)) result_dict['emphasis reacts'].append(emphasis_reacts) result_dict['% of reactions that are emphasis reacts'].append( round(helpers.safe_divide(emphasis_reacts, reactions) * 100, 2)) result_dict['question reacts'].append(question_reacts) result_dict['% of reactions that are question reacts'].append( round(helpers.safe_divide(question_reacts, reactions) * 100, 2)) total_non_reaction_messages = sum(result_dict['non-reaction messages']) total_reaction_messages = sum(result_dict['reaction messages']) for i in range(len(result_dict['total messages'])): result_dict[ '% of all non-reaction messages that are by this person'].append( round( helpers.safe_divide( result_dict['non-reaction messages'][i], total_non_reaction_messages) * 100, 2)) result_dict[ '% of all reaction messages that are by this person'].append( round( helpers.safe_divide(result_dict['reaction messages'][i], total_reaction_messages) * 100, 2))
def main(result_dict, df, chat_members, args): if not (args.day or args.week or args.month or args.year): raise Exception('Must give time period length for graph') message_freqs = {} if args.graph_individual: members = [] for member_name in chat_members: total_messages, _ = initialize_result_dict( member_name, df, result_dict) if total_messages > 0: members.append(member_name) for member in members: message_freqs[member] = [] else: message_freqs['Total Messages'] = [] if args.day: df['time_period'] = df['time'].apply(helpers.get_day) time_period_name = 'day' elif args.week: df['time_period'] = df['time'].apply(helpers.get_week) time_period_name = 'week' elif args.month: df['time_period'] = df['time'].apply(helpers.get_month) time_period_name = 'month' elif args.year: df['time_period'] = df['time'].apply(helpers.get_year) time_period_name = 'year' day_fmt = '%m/%d/%y' begin_date = datetime.datetime.strptime(df['time_period'].iloc[0], day_fmt) end_date = datetime.datetime.strptime(df['time_period'].iloc[-1], day_fmt) time_periods = helpers.get_time_periods(begin_date, end_date, time_period_name) for time_period in time_periods: if args.graph_individual: for member_name in members: message_freqs[member_name].append(len( df[(df['time_period'] == time_period) & (df['sender'] == member_name)] )) else: message_freqs['Total Messages'].append(len( df[df['time_period'] == time_period] )) colors = [ 'rgba(31, 120, 180, 1)', 'rgba(51, 160, 44, 1)', 'rgba(227, 26, 28, 1)', 'rgba(255, 127, 0, 1)', 'rgba(106, 61, 154, 1)', 'rgba(177, 89, 40, 1)', 'rgba(166, 206, 227, 1)', 'rgba(178, 223, 138, 1)', 'rgba(251, 154, 153, 1)', 'rgba(253, 191, 111, 1)', 'rgba(202, 178, 214, 1)', 'rgba(255, 255, 153, 1)' ] result_dict['graphData'] = {} if args.day or args.week: result_dict['graphData']['labels'] = time_periods elif args.month: result_dict['graphData']['labels'] = [ f'{time_period.split("/")[0]}/{time_period.split("/")[2]}' for time_period in time_periods ] elif args.year: result_dict['graphData']['labels'] = [ f'20{time_period.split("/")[2]}' for time_period in time_periods ] result_dict['graphData']['datasets'] = [ { 'label': name, 'data': message_freqs[name], 'fill': False, 'borderColor': colors[i % len(message_freqs)] } for i, name in enumerate(message_freqs) ]