def balance_contacts(records, weighted=True): """ The balance of interactions per contact. For every contact, the balance is the number of outgoing interactions divided by the total number of interactions (in+out). .. math:: \\forall \\,\\text{contact}\\,c,\\;\\text{balance}\,(c) = \\frac{\\bigl|\\text{outgoing}\,(c)\\bigr|}{\\bigl|\\text{outgoing}\,(c)\\bigr|+\\bigl|\\text{incoming}\,(c)\\bigr|} Parameters ---------- weighted : str, optional If ``True``, the balance for each contact is weighted by the number of interactions the user had with this contact. """ counter_out = defaultdict(int) counter = defaultdict(int) for r in records: if r.direction == 'out': counter_out[r.correspondent_id] += 1 counter[r.correspondent_id] += 1 if not weighted: balance = [float(counter_out[c]) / float(counter[c]) for c in counter] else: balance = [float(counter_out[c]) / float(sum(counter.values())) for c in counter] return summary_stats(balance, 0.99)
def interevents_time(records): """ The interevent time between two records of the user. """ inter_events = pairwise(r.datetime for r in records) inter = [total_seconds(new - old) for old, new in inter_events] return summary_stats(inter, 0.99)
def interactions_per_contact(records, direction=None): """ The number of interactions a user had with each of its contacts. Parameters ---------- direction : str, optional Filters the records by their direction: ``None`` for all records, ``'in'`` for incoming, and ``'out'`` for outgoing. """ if direction is None: counter = Counter(r.correspondent_id for r in records) else: counter = Counter(r.correspondent_id for r in records if r.direction == direction) return summary_stats(counter.values())
def call_duration(records, direction=None): """ The duration of the user's calls. Parameters ---------- direction : str, optional Filters the records by their direction: ``None`` for all records, ``'in'`` for incoming, and ``'out'`` for outgoing. """ if direction is None: call_durations = [r.call_duration for r in records] else: call_durations = [r.call_duration for r in records if r.direction == direction] return summary_stats(call_durations)
def call_duration(records, direction=None): """ The duration of the user's calls. Parameters ---------- direction : str, optional Filters the records by their direction: ``None`` for all records, ``'in'`` for incoming, and ``'out'`` for outgoing. """ if direction is None: call_durations = [r.call_duration for r in records] else: call_durations = [ r.call_duration for r in records if r.direction == direction ] return summary_stats(call_durations)
def response_delay_text(records): """ The response delay of the user within a conversation (in seconds) The following sequence of messages defines conversations (``I`` for an incoming text, ``O`` for an outgoing text, ``-`` for a one minute delay): :: I-O--I----O, we have a 60 seconds response delay and a 240 seconds response delay O--O---I--O, we have a 1200 seconds response delay I--II---I-I, we don't have a response delay. The user hasn't answered For this user, the distribution of response delays will be ``[60, 240, 60]`` Notes ----- See :ref:`Using bandicoot <conversations-label>` for a definition of conversations. Conversation are defined to be a series of text messages each sent no more than an hour after the previous. The response delay can thus not be greater than one hour. """ records = list(records) interactions = defaultdict(list) for r in records: interactions[r.correspondent_id].append(r) def _response_delay(grouped): ts = ((b.datetime - a.datetime).total_seconds() for conv in _conversations(grouped) for a, b in pairwise(conv) if b.direction == 'out' and a.direction == 'in') return ts delays = [r for i in interactions.values() for r in _response_delay(i) if r > 0] if delays == []: return None return summary_stats(delays)
def response_delay_text(records): """ The response delay of the user within a conversation (in seconds) The following sequence of messages defines conversations (``I`` for an incoming text, ``O`` for an outgoing text, ``-`` for a one minute delay): :: I-O--I----O, we have a 60 seconds response delay and a 240 seconds response delay O--O---I--O, we have a 1200 seconds response delay I--II---I-I, we don't have a response delay. The user hasn't answered For this user, the distribution of response delays will be ``[60, 240, 60]`` Notes ----- See :ref:`Using bandicoot <conversations-label>` for a definition of conversations. Conversation are defined to be a series of text messages each sent no more than an hour after the previous. The response delay can thus not be greater than one hour. """ records = list(records) interactions = defaultdict(list) for r in records: interactions[r.correspondent_id].append(r) def _response_delay(grouped): ts = ((b.datetime - a.datetime).total_seconds() for conv in _conversations(grouped) for a, b in pairwise(conv) if b.direction == 'out' and a.direction == 'in') return ts delays = [ r for i in interactions.values() for r in _response_delay(i) if r > 0 ] if delays == []: return None return summary_stats(delays)
def call_duration(records): """ The duration of the user's calls (in and out). """ call_durations = [r.call_duration for r in records] return summary_stats(call_durations, 0.99)
def interactions_per_contact(records): """ The number of interactions a user had with each of its contacts. """ counter = Counter(r.correspondent_id for r in records) return summary_stats(counter.values(), 1)