Python summary_stats示例，bandicoot.helper.tools.summary_stats Python示例

示例#1

0

显示文件

def balance_contacts(records, weighted=True):
    """
    The balance of interactions per contact. For every contact,
    the balance is the number of outgoing interactions divided by the total
    number of interactions (in+out).

    .. math::

       \\forall \\,\\text{contact}\\,c,\\;\\text{balance}\,(c) = \\frac{\\bigl|\\text{outgoing}\,(c)\\bigr|}{\\bigl|\\text{outgoing}\,(c)\\bigr|+\\bigl|\\text{incoming}\,(c)\\bigr|}

    Parameters
    ----------
    weighted : str, optional
        If ``True``, the balance for each contact is weighted by
        the number of interactions the user had with this contact.
    """

    counter_out = defaultdict(int)
    counter = defaultdict(int)

    for r in records:
        if r.direction == 'out':
            counter_out[r.correspondent_id] += 1
        counter[r.correspondent_id] += 1

    if not weighted:
        balance = [float(counter_out[c]) / float(counter[c]) for c in counter]
    else:
        balance = [float(counter_out[c]) / float(sum(counter.values())) for c in counter]

    return summary_stats(balance, 0.99)

示例#2

0

显示文件

def interevents_time(records):
    """
    The interevent time between two records of the user.
    """
    inter_events = pairwise(r.datetime for r in records)
    inter = [total_seconds(new - old) for old, new in inter_events]

    return summary_stats(inter, 0.99)

示例#3

0

显示文件

文件： individual.py 项目： ThomasRoca/bandicoot

def interactions_per_contact(records, direction=None):
    """
    The number of interactions a user had with each of its contacts.

    Parameters
    ----------
    direction : str, optional
        Filters the records by their direction: ``None`` for all records,
        ``'in'`` for incoming, and ``'out'`` for outgoing.
    """

    if direction is None:
        counter = Counter(r.correspondent_id for r in records)
    else:
        counter = Counter(r.correspondent_id for r in records if r.direction == direction)
    return summary_stats(counter.values())

示例#4

0

显示文件

文件： individual.py 项目： ThomasRoca/bandicoot

def call_duration(records, direction=None):
    """
    The duration of the user's calls.
            
    Parameters
    ----------
    direction : str, optional
        Filters the records by their direction: ``None`` for all records,
        ``'in'`` for incoming, and ``'out'`` for outgoing.
    """
    
    if direction is None:
        call_durations = [r.call_duration for r in records]
    else:
        call_durations = [r.call_duration for r in records if r.direction == direction]

    return summary_stats(call_durations)

示例#5

0

显示文件

文件： individual.py 项目： chrisjbrooks/bandicoot

def interactions_per_contact(records, direction=None):
    """
    The number of interactions a user had with each of its contacts.

    Parameters
    ----------
    direction : str, optional
        Filters the records by their direction: ``None`` for all records,
        ``'in'`` for incoming, and ``'out'`` for outgoing.
    """

    if direction is None:
        counter = Counter(r.correspondent_id for r in records)
    else:
        counter = Counter(r.correspondent_id for r in records
                          if r.direction == direction)
    return summary_stats(counter.values())

示例#6

0

显示文件

文件： individual.py 项目： chrisjbrooks/bandicoot

def call_duration(records, direction=None):
    """
    The duration of the user's calls.
            
    Parameters
    ----------
    direction : str, optional
        Filters the records by their direction: ``None`` for all records,
        ``'in'`` for incoming, and ``'out'`` for outgoing.
    """

    if direction is None:
        call_durations = [r.call_duration for r in records]
    else:
        call_durations = [
            r.call_duration for r in records if r.direction == direction
        ]

    return summary_stats(call_durations)

示例#7

0

显示文件

文件： individual.py 项目： ThomasRoca/bandicoot

def response_delay_text(records):
    """
    The response delay of the user within a conversation (in seconds)

    The following sequence of messages defines conversations (``I`` for an
    incoming text, ``O`` for an outgoing text, ``-`` for a one minute
    delay): ::

        I-O--I----O, we have a 60 seconds response delay and a 240 seconds response delay
        O--O---I--O, we have a 1200 seconds response delay
        I--II---I-I, we don't have a response delay. The user hasn't answered

    For this user, the distribution of response delays will be ``[60, 240, 60]``

    Notes
    -----
    See :ref:`Using bandicoot <conversations-label>` for a definition of conversations.
    Conversation are defined to be a series of text messages each sent no more than an hour 
    after the previous. The response delay can thus not be greater than one hour.
    """

    records = list(records)

    interactions = defaultdict(list)
    for r in records:
        interactions[r.correspondent_id].append(r)

    def _response_delay(grouped):
        ts = ((b.datetime - a.datetime).total_seconds()
              for conv in _conversations(grouped)
              for a, b in pairwise(conv)
              if b.direction == 'out' and a.direction == 'in')

        return ts

    delays = [r for i in interactions.values() for r in _response_delay(i)
              if r > 0]

    if delays == []:
        return None

    return summary_stats(delays)

示例#8

0

显示文件

文件： individual.py 项目： chrisjbrooks/bandicoot

def response_delay_text(records):
    """
    The response delay of the user within a conversation (in seconds)

    The following sequence of messages defines conversations (``I`` for an
    incoming text, ``O`` for an outgoing text, ``-`` for a one minute
    delay): ::

        I-O--I----O, we have a 60 seconds response delay and a 240 seconds response delay
        O--O---I--O, we have a 1200 seconds response delay
        I--II---I-I, we don't have a response delay. The user hasn't answered

    For this user, the distribution of response delays will be ``[60, 240, 60]``

    Notes
    -----
    See :ref:`Using bandicoot <conversations-label>` for a definition of conversations.
    Conversation are defined to be a series of text messages each sent no more than an hour 
    after the previous. The response delay can thus not be greater than one hour.
    """

    records = list(records)

    interactions = defaultdict(list)
    for r in records:
        interactions[r.correspondent_id].append(r)

    def _response_delay(grouped):
        ts = ((b.datetime - a.datetime).total_seconds()
              for conv in _conversations(grouped) for a, b in pairwise(conv)
              if b.direction == 'out' and a.direction == 'in')

        return ts

    delays = [
        r for i in interactions.values() for r in _response_delay(i) if r > 0
    ]

    if delays == []:
        return None

    return summary_stats(delays)

示例#9

0

显示文件

def call_duration(records):
    """
    The duration of the user's calls (in and out).
    """
    call_durations = [r.call_duration for r in records]
    return summary_stats(call_durations, 0.99)

示例#10

0

显示文件

def interactions_per_contact(records):
    """
    The number of interactions a user had with each of its contacts.
    """
    counter = Counter(r.correspondent_id for r in records)
    return summary_stats(counter.values(), 1)