def conversation_stats(number, convo_gap=timedelta(hours=14)): info_dict = {} messages = data_manager.messages(number) all_msgs = data_manager.messages() # assumes messages are sorted by time prior = messages[:-1].reset_index() messages = messages[1:].reset_index() delta = messages.date - prior.date # conversations started /ended as a percentage of total new_convos = messages[delta > val_from_delta(convo_gap)] end_convos = prior[delta > val_from_delta(convo_gap)] my_new_convos = new_convos[new_convos.is_from_me == 1] my_end_convos = end_convos[end_convos.is_from_me == 1] info_dict['started'] = round( (my_new_convos.shape[0] / max(1, new_convos.shape[0])) * 100, 1) info_dict['ended'] = round( (my_end_convos.shape[0] / max(1, end_convos.shape[0])) * 100, 1) # response_time in seconds rt_sent = delta[(messages.is_from_me == 1) & (prior.is_from_me == 0) & (delta < val_from_delta(convo_gap))] rt_received = delta[(messages.is_from_me == 0) & (prior.is_from_me == 1) & (delta < val_from_delta(convo_gap))] info_dict['sent_response_time'] = delta_from_value(max( 0, rt_sent.mean())).total_seconds() info_dict['received_response_time'] = delta_from_value( max(0, rt_received.mean())).total_seconds() # # gap within conversations # if not prev['is_from_me'] and curr['is_from_me']: # your_rt = your_rt + (curr.timestamp - prev.timestamp) # # check if there was a message sent in that time # is_sent = all_msgs['is_from_me'] == 1 # is_time = all_msgs['timestamp'] > prev['timestamp'] # is_before = all_msgs['timestamp'] < curr['timestamp'] # is_ignored = len(all_msgs.loc[is_time & is_before & is_sent]) > 0 # ignored_tot += 1 # ignored += int(is_ignored) # # total = max(total, 1) # ignored_tot = max(ignored_tot, 1) # them_tot = max(them_tot, 1) # your_rt = timedelta(seconds=int((your_rt / ignored_tot).total_seconds())) # their_rt = timedelta(seconds=int((their_rt/ them_tot).total_seconds())) # info_dict['ignored'] = round((ignored * 100.0) / ignored_tot, 1) return info_dict
def simple_stats(number): info_dict = {} messages = data_manager.messages(number) sent, received = split_sender(messages) info_dict['sent'] = sent.shape[0] info_dict['received'] = received.shape[0] return info_dict
def diction_summary(number): start = request.args.get('start', None) end = request.args.get('end', None) msg = data_manager.messages(number=number, start=start, end=end) result = language_stats.contact_summary(msg) return make_response(result, OK, headers)
def summary(): start = request.args.get('start', None) end = request.args.get('end', None) msg = data_manager.messages(start=start, end=end) result = general_stats.summary(msg) return make_response(result, OK, headers)
def sentiment_summary(number=None): n = int(request.args.get('n', 5)) start = request.args.get('start', None) end = request.args.get('end', None) msg = data_manager.messages(number=number, start=start, end=end) result = sentiment_stats.sentiment(msg) return make_response(result, OK, headers)
def frequency(number=None): start = request.args.get('start', None) end = request.args.get('end', None) period = request.args.get('period', 'M') msg = data_manager.messages(number=number, start=start, end=end) result = general_stats.frequency(msg, period=period) return make_response(result, OK, headers)
def contact_summary(): n = int(request.args.get('n', 100)) if n == -1: n = None start = request.args.get('start', None) end = request.args.get('end', None) msg = data_manager.messages(start=start, end=end) result = general_stats.contacts_summary(msg, n) content = jsonify(result) return make_response(content, OK, headers)
def emoji(self, number, start=None, end=None): n = 5 msg = data_manager.messages(number=number, start=start, end=end) result = emoji_stats.contact_summary(msg, n) return result
def sentiment(self, number, start=None, end=None): msg = data_manager.messages(number=number, start=start, end=end) result = sentiment_stats.contact_summary(msg) return result
def summary(self, start=None, end=None): msg = data_manager.messages(start=start, end=end) result = general_stats.summary(msg) return result
def frequency(self, number=None, start=None, end=None): msg = data_manager.messages(number=number, start=start, end=end) result = general_stats.frequency(msg, period='M') return result
result['label'] = text if len(text.split()) > 5: vals.append(result) x.append(time) y.append(result['neg']) pos = sorted(vals, key=lambda x: x['pos']) com = sorted(vals, key=lambda x: x['compound']) neg = sorted(vals, key=lambda x: x['neg']) pdb.set_trace() f = interpolate.interp1d(x, y) num = 70 xx = np.linspace(x[0], x[-1], num) yy = f(xx) # plt.plot(x,y, 'bo-') plt.plot(xx, yy, 'g.-') plt.show() # plt.plot(x, y, linewidth=2.0) # plt.show() msg = dm.messages(handle=1066, start=None, end=None) process(msg)
def _all_emojis(): all_text = data_manager.messages().text.str.cat(sep=" ") return _extract_emoji(all_text)
def unique_words(messages): (words, text) = extract_words(messages) (all_words, all_text) = extract_words(data_manager.messages()) uni = unique(words, all_words) df = pd.DataFrame({'name': uni.index, 'value': uni.values})[:100] return df.to_dict(orient='records')