Exemplo n.º 1
0
    def generate(self, filepath=None, hformat=None, last_timestamp=None):
        """Generate random chat as :func:`WhatsAppChat <whatstk.whatsapp.objects.WhatsAppChat>`.

        Args:
            filepath (str): If given, generated chat is saved with name ``filepath`` (must be a local path).
            hformat (str, optional): :ref:`Format of the header <The header format>`, e.g.
                                    ``'[%y-%m-%d %H:%M:%S] - %name:'``.
            last_timestamp (datetime, optional): Datetime of last message. If `None`, defaults to current date.

        Returns:
            WhatsAppChat: Chat with random messages.

        ..  seealso::

            * :func:`WhatsAppChat.to_txt <whatstk.whatsapp.objects.WhatsAppChat.to_txt>`

        """
        df = self._generate_df(last_timestamp=last_timestamp)
        chat = WhatsAppChat(df)
        if filepath:
            chat.to_txt(filepath=filepath, hformat=hformat)
        return chat
Exemplo n.º 2
0
def get_response_matrix(df=None, chat=None, zero_own=True, norm=NORMS.ABSOLUTE):
    """Get response matrix for given chat.

    Obtains a DataFrame of shape `[n_users, n_users]` counting the number of responses between members. Responses can
    be counted in different ways, e.g. using absolute values or normalised values. Responses are counted based solely
    on consecutive messages. That is, if :math:`user_i` sends a message right after :math:`user_j`, it will be counted
    as a response from :math:`user_i` to :math:`user_j`.

    Axis 0 lists senders and axis 1 lists receivers. That is, the value in cell (i, j) denotes the number of times
    :math:`user_i` responded to a message from :math:`user_j`.

    **Note**: Either ``df`` or ``chat`` must be provided.

    Args:
        df (pandas.DataFrame, optional): Chat data. Atribute `df` of a chat loaded using Chat. If a value is given,
                                            ``chat`` is ignored.
        chat (Chat, optional): Chat data. Object obtained when chat loaded using Chat. Required if ``df`` is None.
        zero_own (bool, optional): Set to True to avoid counting own responses. Defaults to True.
        norm (str, optional): Specifies the type of normalization used for reponse count. Can be:

                                - ``'absolute'``: Absolute count of messages.
                                - ``'joint'``: Normalized by total number of messages sent by all users.
                                - ``'sender'``: Normalized per sender by total number of messages sent by user.
                                - ``'receiver'``: Normalized per receiver by total number of messages sent by user.

    Returns:
        pandas.DataFrame: Response matrix.

    Example:
            Get absolute count on responses (consecutive messages) between users.

            ..  code-block:: python

                >>> from whatstk import WhatsAppChat
                >>> from whatstk.analysis import get_response_matrix
                >>> from whatstk.data import whatsapp_urls
                >>> chat = WhatsAppChat.from_source(filepath=whatsapp_urls.POKEMON)
                >>> responses = get_response_matrix(chat=chat)
                >>> responses
                                Ash Ketchum  Brock  ...  Raichu  Wobbuffet
                Ash Ketchum               0      0  ...       1          0
                Brock                     1      0  ...       0          0
                Jessie & James            0      1  ...       0          0
                Meowth                    0      0  ...       0          0
                Misty                     2      1  ...       1          0
                Prof. Oak                 0      1  ...       0          0
                Raichu                    1      0  ...       0          0
                Wobbuffet                 0      0  ...       0          0

    """
    # Get chat df and users
    df = _get_df(df=df, chat=chat)
    users = WhatsAppChat(df).users
    # Get list of username transitions and initialize dicitonary with counts
    user_transitions = df[COLNAMES_DF.USERNAME].tolist()
    responses = {user: dict(zip(users, [0]*len(users))) for user in users}
    # Fill count dictionary
    for i in range(1, len(user_transitions)):
        sender = user_transitions[i]
        receiver = user_transitions[i-1]
        if zero_own and (sender != receiver):
            responses[sender][receiver] += 1
        elif not zero_own:
            responses[sender][receiver] += 1
    responses = pd.DataFrame.from_dict(responses, orient='index')

    # Normalize
    if norm not in [NORMS.ABSOLUTE, NORMS.JOINT, NORMS.RECEIVER, NORMS.SENDER]:
        raise ValueError("norm not valid. See NORMS variable in whatstk.analysis.resposes")
    else:
        if norm == NORMS.JOINT:
            responses /= responses.sum().sum()
        elif norm == NORMS.RECEIVER:
            responses /= responses.sum(axis=0)
        elif norm == NORMS.SENDER:
            responses = responses.divide(responses.sum(axis=1), axis=0)
    return responses