示例#1
0
    async def show_message_edits(self, from_id, entity_id, message_id):
        message_edits = self.dialog_stats.get_message_edits(
            entity_id, message_id)
        results = [
            'Кажется, ты переслал сообщение из отслеживаемого диалога',
            'Выведем доп. информацию по нему...',
            'Число правок: {}'.format(len(message_edits) - 1)
        ]
        if len(message_edits) > 0:
            last_version = None
            for message_edit in message_edits:
                results.append('')
                results.append('**Версия {} / {}**'.format(
                    message_edit['version'], message_edit['max_version']))
                date = StatusController.datetime_from_str(
                    message_edit['taken_at'], '%Y-%m-%d %H:%M:%S%z')
                results.append('Дата: {}'.format(
                    StatusController.datetime_to_str(date)))
                results.append('Сообщение: \n[{}]\n'.format(
                    self.dialog_stats.remove_message_tags(
                        message_edit['message'])))
                if last_version:
                    edit_ratio = self.dialog_stats.get_str_difference_ratio(
                        self.dialog_stats.remove_message_tags(
                            last_version['message']),
                        self.dialog_stats.remove_message_tags(
                            message_edit['message']))
                    results.append('Процент правок: {0:0.2f}%'.format(
                        100 * edit_ratio))
                    diff_counts = self.dialog_stats.get_str_difference_counts(
                        last_version['message'], message_edit['message'])
                    results.append('Число замен   : {}'.format(
                        diff_counts['replaces_count_edit']))
                    results.append('Число вставок : {}'.format(
                        diff_counts['inserts_count_edit']))
                    results.append('Число удалений: {}'.format(
                        diff_counts['deletes_count_edit']))

                last_version = message_edit
        results = "\n".join(results)
        await self.send_message_to_user(from_id, results)
示例#2
0
    async def get_me_dialog_statistics(self, user_id, date_from=None, title='за всё время', only_last_dialog=False, skip_vocab=False):

        new_normal_form_cache = CacheHelper().get_from_cache('normal_forms', 'dialog_stats', False)
        if new_normal_form_cache:
            self.normal_form_cache = new_normal_form_cache

        new_word_type_form_cache = CacheHelper().get_from_cache('word_type_forms', 'dialog_stats', False)
        if new_word_type_form_cache:
            self.word_type_form_cache = new_word_type_form_cache

        days_a = '?'
        if (not date_from) and (not only_last_dialog):
            res = self.db_conn.execute(
                """
                    SELECT *
                    FROM `activity`
                    ORDER BY taken_at ASC
                """,
                [])
            rows = list(res.fetchall())
            days_a = 1
            if len(rows) > 1:
                date1 = StatusController.datetime_from_str(rows[0]['taken_at'])
                date2 = StatusController.datetime_from_str(rows[len(rows) - 1]['taken_at'])
                days_a = round((date2 - date1).total_seconds() / (24 * 60 * 60))
        results = []
        last_dialogue_date = None
        try:
            user_entity = await self.tg_client.get_entity(PeerUser(user_id))
        except:
            user_entity = None
        if user_entity and (type(user_entity) == User):
            if not date_from:
                res = self.db_conn.execute(
                    """
                        SELECT m.*, 
                        (SELECT version FROM `messages` m1 WHERE m1.`entity_id` = m.`entity_id` AND m1.message_id = m.message_id AND m1.from_id = m.from_id ORDER BY version DESC LIMIT 1) as 'max_version',
                        (SELECT removed FROM `messages` m1 WHERE m1.`entity_id` = m.`entity_id` AND m1.message_id = m.message_id AND m1.from_id = m.from_id ORDER BY version DESC LIMIT 1) as 'is_removed'
                        FROM `messages` m
                        WHERE m.`entity_id` = ? OR m.`entity_id` = ?
                        ORDER BY m.`taken_at` ASC, m.`message_id` ASC, m.`version` ASC
                    """,
                    [str(user_id), str(self.tg_client.me_user_id)]
                )
            else:
                date_from = StatusController.datetime_to_str(date_from, '%Y-%m-%d')
                res = self.db_conn.execute(
                    """
                        SELECT m.*, 
                        (SELECT version FROM `messages` m1 WHERE m1.`entity_id` = m.`entity_id` AND m1.message_id = m.message_id AND m1.from_id = m.from_id ORDER BY version DESC LIMIT 1) as 'max_version',
                        (SELECT removed FROM `messages` m1 WHERE m1.`entity_id` = m.`entity_id` AND m1.message_id = m.message_id AND m1.from_id = m.from_id ORDER BY version DESC LIMIT 1) as 'is_removed'
                        FROM `messages` m
                        WHERE (m.`entity_id` = ? OR m.`entity_id` = ?) AND (m.`taken_at` > ?)
                        ORDER BY m.`taken_at` ASC, m.`message_id` ASC, m.`version` ASC
                    """,
                    [str(user_id), str(self.tg_client.me_user_id), date_from]
                )
            rows = list(res.fetchall())

            me_name = self.tg_client.me_user_entity_name
            another_name = await self.tg_client.get_entity_name(user_id, 'User')
            dialog_name = me_name + ' <-> ' + another_name

            results.append('**Диалог '+dialog_name+' ('+title+')'+':**')
            results.append('')
            results.append('Сообщений диалога в БД: ' + str(len(rows)))
            if len(rows) > 0:
                date_start = StatusController.datetime_from_str(rows[0]['taken_at'], '%Y-%m-%d %H:%M:%S%z')
                if not only_last_dialog:
                    results.append('Самое раннее сообщение диалога в БД: ' + StatusController.datetime_to_str(date_start))
                if len(rows) > 1:
                    date_end = StatusController.datetime_from_str(rows[len(rows) - 1]['taken_at'], '%Y-%m-%d %H:%M:%S%z')
                    seconds_count = (date_end - date_start).total_seconds()
                    days_count = seconds_count / (24 * 60 * 60)
                    messages_count = len(rows)
                    if (not date_from) and (not only_last_dialog):
                        results.append('Длительность общения: {0:0.2f} суток'.format(days_count))
                    if not only_last_dialog:
                        results.append('Средняя частота сообщений: {0:0.2f} в сутки'.format(messages_count / days_count))

                    max_dialog_question_interval = round((24 * 60 * 60) * 1.25)
                    max_dialog_non_question_interval = round((24 * 60 * 60) * 0.75)
                    max_dialog_hello_as_second_message_offset = round((24 * 60 * 60) * 0.25)
                    dialog_hello_words = ['привет', 'приветствую', 'здравствуй', 'здравствуйте']
                    dialog_hello_phrases = ['доброе утро', 'доброго утра', 'добрый день', 'доброго дня', 'добрый вечер', 'доброго вечера']
                    dialog_hello_stop_context = ['-привет', 'всем привет', 'привет»', 'привет"']

                    msg_len_me = 0
                    msg_me_cnt = 0
                    msg_me_max_len = 0
                    msg_len_another = 0
                    msg_another_cnt = 0
                    msg_another_max_len = 0

                    me_deletes = 0
                    another_deletes = 0

                    me_hello = 0
                    another_hello = 0

                    me_words = []
                    another_words = []

                    dialogues = []
                    active_dialog = []
                    last_msg_from_id = None
                    last_date = date_start
                    last_msg_is_question = False

                    edited_messages_me = {}
                    edited_messages_another = {}
                    edited_sequence_interrupted = {}

                    last_message_row = None

                    for row in rows:
                        if not row['message']:
                            row['message'] = ''
                        if int(row['removed']) == 1 or int(row['is_removed']) == 1:
                            if int(row['version']) == int(row['max_version']):
                                if int(row['from_id']) == self.tg_client.me_user_id:
                                    me_deletes = me_deletes + 1
                                else:
                                    another_deletes = another_deletes + 1
                        else:
                            if row['message_id'] not in edited_sequence_interrupted:
                                for k_int in edited_sequence_interrupted.keys():
                                    if type(edited_sequence_interrupted[k_int]) == dict:
                                        if edited_sequence_interrupted[k_int]['from_id'] != row['from_id']:
                                            edited_sequence_interrupted[k_int]['interrupts'].append(row['message_id'])

                            if int(row['max_version']) > 1:
                                if int(row['version']) < int(row['max_version']):
                                    if row['message_id'] not in edited_sequence_interrupted:
                                        edited_sequence_interrupted[row['message_id']] = {
                                            'from_id': row['from_id'],
                                            'interrupts': []
                                        }
                                else:
                                    if row['message_id'] in edited_sequence_interrupted:
                                        edited_sequence_interrupted[row['message_id']] = len(edited_sequence_interrupted[row['message_id']]['interrupts']) > 0

                            if last_message_row and (int(last_message_row['version']) < int(last_message_row['max_version'])) and (row['from_id'] != last_message_row['from_id']):
                                edited_sequence_interrupted[row['message_id']] = True
                            if int(row['version']) == int(row['max_version']):
                                last_message_row = row

                            if int(row['max_version']) > 1:
                                if int(row['from_id']) == self.tg_client.me_user_id:
                                    if row['message_id'] not in edited_messages_me:
                                        edited_messages_me[row['message_id']] = []
                                    edited_messages_me[row['message_id']].append(row)
                                else:
                                    if row['message_id'] not in edited_messages_another:
                                        edited_messages_another[row['message_id']] = []
                                    edited_messages_another[row['message_id']].append(row)
                            if int(row['version']) == int(row['max_version']):
                                message_lower = str(row['message']).lower()
                                message_words = re.sub("[^\w]", " ", message_lower).split()
                                message_words = list(filter(lambda x: x and self.is_valid_word(x, []), message_words))

                                message_hello_words = list(filter(lambda x: x in dialog_hello_words, message_words))
                                message_stop_contexts = list(filter(lambda x: message_lower.find(x) >= 0, dialog_hello_stop_context))

                                msg_from_id = int(row['from_id'])
                                msg_is_question = str(row['message']).find('?') >= 0
                                msg_is_hello = (len(message_hello_words) > 0) and (len(message_stop_contexts) == 0)

                                if not msg_is_hello:
                                    for d_ph in dialog_hello_phrases:
                                        if message_lower.find(d_ph) >= 0:
                                            msg_is_hello = True
                                            break

                                if not skip_vocab:
                                    nform_list = [self.get_normal_form(x) for x in message_words]
                                    if msg_from_id == self.tg_client.me_user_id:
                                        me_words = me_words + nform_list
                                    else:
                                        another_words = another_words + nform_list

                                if msg_is_hello:
                                    if msg_from_id == self.tg_client.me_user_id:
                                        me_hello = me_hello + 1
                                    else:
                                        another_hello = another_hello + 1
                                msg_len = len(row['message'])
                                if msg_from_id == self.tg_client.me_user_id:
                                    msg_len_me = msg_len_me + msg_len
                                    msg_me_cnt = msg_me_cnt + 1
                                    if msg_len > msg_me_max_len:
                                        msg_me_max_len = msg_len
                                else:
                                    msg_len_another = msg_len_another + msg_len
                                    msg_another_cnt = msg_another_cnt + 1
                                    if msg_len > msg_another_max_len:
                                        msg_another_max_len = msg_len
                                msg_date = StatusController.datetime_from_str(row['taken_at'], '%Y-%m-%d %H:%M:%S%z')
                                if (
                                        (len(active_dialog) == 0) or (
                                            last_msg_is_question and
                                            ((msg_date - last_date).total_seconds() > max_dialog_question_interval)
                                        ) or (
                                            not last_msg_is_question and
                                            ((msg_date - last_date).total_seconds() > max_dialog_non_question_interval)
                                        )
                                ):
                                    if len(active_dialog) > 0:
                                        dialogues.append(active_dialog)
                                        active_dialog = []
                                active_dialog.append(row)
                                last_date = msg_date
                                if last_msg_from_id != msg_from_id:
                                    last_msg_is_question = msg_is_question
                                else:
                                    last_msg_is_question = last_msg_is_question or msg_is_question
                                last_msg_from_id = msg_from_id

                    for k_int in edited_sequence_interrupted.keys():
                        if type(edited_sequence_interrupted[k_int]) == dict:
                            edited_sequence_interrupted[k_int] = len(edited_sequence_interrupted[k_int]['interrupts']) > 0

                    me_edit_stats = self.get_edit_stats(edited_messages_me, edited_sequence_interrupted)
                    another_edit_stats = self.get_edit_stats(edited_messages_another, edited_sequence_interrupted)

                    if len(active_dialog) > 0:
                        dialogues.append(active_dialog)
                        active_dialog = []

                    if only_last_dialog:
                        dialogues = [dialogues[len(dialogues) - 1]]
                    else:
                        if len(dialogues) > 0:
                            last_dialogue_date = StatusController.datetime_from_str(dialogues[len(dialogues) - 1][0]['taken_at'], '%Y-%m-%d %H:%M:%S%z')
                        else:
                            last_dialogue_date = None

                    answers_me = 0
                    answers_wait_seconds_me = 0
                    answers_another = 0
                    answers_wait_seconds_another = 0

                    longest_len = 0
                    longest_dialog = None
                    shortest_len = 0

                    dia_me_start = 0
                    dia_another_start = 0

                    dia_me_finish = 0
                    dia_another_finish = 0

                    dia_between_seconds = 0
                    dia_between_max = 0
                    dia_between_max_from = None
                    dia_between_max_to = None
                    dia_between_cnt = 0

                    last_dia_end = None
                    for dial in dialogues:
                        dial_len = len(dial)

                        if (shortest_len == 0) or (dial_len < shortest_len):
                            shortest_len = dial_len
                        if (longest_len == 0) or (dial_len > longest_len):
                            longest_len = dial_len
                            longest_dialog = dial

                        if dial_len > 0:
                            first_dial = dial[0]
                            last_dial = dial[len(dial) - 1]
                            if int(first_dial['from_id']) == self.tg_client.me_user_id:
                                dia_me_start = dia_me_start + 1
                            else:
                                dia_another_start = dia_another_start + 1
                            if int(last_dial['from_id']) == self.tg_client.me_user_id:
                                dia_me_finish = dia_me_finish + 1
                            else:
                                dia_another_finish = dia_another_finish + 1
                            if last_dia_end:
                                curr_first_dia_begin = StatusController.datetime_from_str(first_dial['taken_at'], '%Y-%m-%d %H:%M:%S%z')
                                dia_between_seconds_curr = (curr_first_dia_begin - last_dia_end).total_seconds()
                                dia_between_seconds = dia_between_seconds + dia_between_seconds_curr
                                dia_between_cnt = dia_between_cnt + 1
                                if dia_between_seconds_curr > dia_between_max:
                                    dia_between_max = dia_between_seconds_curr
                                    dia_between_max_from = last_dia_end
                                    dia_between_max_to = curr_first_dia_begin
                            last_dia_end = StatusController.datetime_from_str(last_dial['taken_at'], '%Y-%m-%d %H:%M:%S%z')

                        last_msg_id = None
                        last_msg_date = None
                        for dia in dial:
                            msg_date = StatusController.datetime_from_str(dia['taken_at'], '%Y-%m-%d %H:%M:%S%z')
                            if last_msg_id and (last_msg_id != int(dia['from_id'])):
                                seconds_between = (msg_date - last_msg_date).total_seconds()
                                if seconds_between > (60 * 60 * 4) and ((last_msg_date.time().hour < 6) or (last_msg_date.time().hour >= 22)):
                                    # somebody just sleep
                                    last_msg_date = msg_date
                                    last_msg_id = int(dia['from_id'])
                                    continue
                                if last_msg_id == self.tg_client.me_user_id:
                                    answers_another = answers_another + 1
                                    answers_wait_seconds_another = answers_wait_seconds_another + seconds_between
                                else:
                                    answers_me = answers_me + 1
                                    answers_wait_seconds_me = answers_wait_seconds_me + seconds_between
                            last_msg_date = msg_date
                            last_msg_id = int(dia['from_id'])

                    if dia_between_cnt > 0:
                        dia_between_time = dia_between_seconds / dia_between_cnt
                        dia_between_time = "{0:0.2f} сут.".format(dia_between_time / (60 * 60 * 24))
                        dia_between_time_max = "{0:0.2f} сут.".format(dia_between_max / (60 * 60 * 24))
                        dia_between_time_max = StatusController.datetime_to_str(dia_between_max_from) + ' --- ' + StatusController.datetime_to_str(dia_between_max_to) + ' ('+dia_between_time_max+')'
                    else:
                        dia_between_time = '?'
                        dia_between_time_max = '?'

                    if answers_another > 0:
                        answers_wait_seconds_another = answers_wait_seconds_another / answers_another
                        another_answer_time = "{0:0.2f} мин.".format(answers_wait_seconds_another / 60)
                    else:
                        another_answer_time = '?'

                    if answers_me > 0:
                        answers_wait_seconds_me = answers_wait_seconds_me / answers_me
                        me_answer_time = "{0:0.2f} мин.".format(answers_wait_seconds_me / 60)
                    else:
                        me_answer_time = '?'

                    if not date_from:
                        self.tg_client.entity_controller.set_entity_answer_sec(user_id, answers_wait_seconds_me, answers_wait_seconds_another)

                    longest_dates = ''
                    longest_hours = 0
                    if longest_len > 1:
                        msg_date1 = StatusController.datetime_from_str(longest_dialog[0]['taken_at'], '%Y-%m-%d %H:%M:%S%z')
                        msg_date2 = StatusController.datetime_from_str(longest_dialog[longest_len - 1]['taken_at'], '%Y-%m-%d %H:%M:%S%z')
                        longest_hours = (msg_date2 - msg_date1).total_seconds() / (24 * 60 * 60)
                        longest_dates = StatusController.datetime_to_str(msg_date1) + ' --- ' + StatusController.datetime_to_str(msg_date2)

                    valid_word_types = ['СУЩ', 'МЕЖД']
                    valid_word_types_str = (",".join(valid_word_types)).lower()

                    me_top_10 = None
                    me_last_cnt = None
                    me_words_count = 0
                    all_words_me = {}
                    if (not skip_vocab) and (len(me_words) > 0):
                        for word in me_words:
                            if word and (word not in all_words_me):
                                all_words_me[word] = True
                        me_words_count = len(all_words_me)

                        me_words = list(filter(lambda x: x and self.is_valid_word(x, valid_word_types), me_words))
                        wordlist = sorted(me_words)
                        wordfreq = [wordlist.count(p) for p in wordlist]
                        dic = dict(zip(wordlist, wordfreq))
                        me_words = list(sorted(dic.items(), key=lambda x: x[1], reverse=True))
                        words = me_words
                        half_words = round(len(words) / 2)
                        if half_words < 3:
                            half_words = 3
                        elif half_words > 15:
                            half_words = 15
                        top_10 = list(filter(lambda x: x[1] > 1, words[0:half_words]))
                        top_10 = list(map(lambda x: '**' + str(x[0]) + '** (' + str(x[1]) + ')', top_10))
                        last_word, last_cnt = words[len(words) - 1]
                        last_cnt_words = map(lambda x: x[0], filter(lambda s: s[1] == last_cnt, words))
                        last_cnt_words = list(sorted(last_cnt_words, key=lambda x: len(x), reverse=True))
                        last_cnt_cnt = len(last_cnt_words) - half_words
                        if last_cnt_cnt < 3:
                            last_cnt_cnt = 3
                        elif last_cnt_cnt > 10:
                            last_cnt_cnt = 10
                        last_cnt_words = last_cnt_words[0:last_cnt_cnt]
                        me_top_10 = top_10
                        me_last_cnt = last_cnt_words

                    another_top_10 = None
                    another_last_cnt = None
                    another_words_count = 0
                    all_words_another = {}
                    if (not skip_vocab) and (len(another_words) > 0):
                        for word in another_words:
                            if word and (word not in all_words_another):
                                all_words_another[word] = True
                        another_words_count = len(all_words_another)

                        another_words = list(filter(lambda x: x and self.is_valid_word(x, valid_word_types), another_words))
                        wordlist = sorted(another_words)
                        wordfreq = [wordlist.count(p) for p in wordlist]
                        dic = dict(zip(wordlist, wordfreq))
                        another_words = list(sorted(dic.items(), key=lambda x: x[1], reverse=True))
                        words = another_words
                        half_words = round(len(words) / 2)
                        if half_words < 3:
                            half_words = 3
                        elif half_words > 15:
                            half_words = 15
                        top_10 = list(filter(lambda x: x[1] > 1, words[0:half_words]))
                        top_10 = list(map(lambda x: '**' + str(x[0]) + '** (' + str(x[1]) + ')', top_10))
                        last_word, last_cnt = words[len(words) - 1]
                        last_cnt_words = map(lambda x: x[0], filter(lambda s: s[1] == last_cnt, words))
                        last_cnt_words = list(sorted(last_cnt_words, key=lambda x: len(x), reverse=True))
                        last_cnt_cnt = len(last_cnt_words) - half_words
                        if last_cnt_cnt < 3:
                            last_cnt_cnt = 3
                        elif last_cnt_cnt > 10:
                            last_cnt_cnt = 10
                        last_cnt_words = last_cnt_words[0:last_cnt_cnt]
                        another_top_10 = top_10
                        another_last_cnt = last_cnt_words

                    me_not_another_words_count = 0
                    me_not_another_top = None

                    another_not_me_words_count = 0
                    another_not_me_top = None

                    if (not skip_vocab) and (len(me_words) > 0) and (len(another_words) > 0):
                        me_not_another_top = []
                        for word in me_words:
                            if word[0] not in all_words_another:
                                me_not_another_top.append(word)

                        me_not_another_words_count = len(me_not_another_top)
                        me_not_another_top = me_not_another_top[:15]
                        me_not_another_top = list(map(lambda x: '**' + str(x[0]) + '** (' + str(x[1]) + ')', me_not_another_top))

                        another_not_me_top = []
                        for word in another_words:
                            if word[0] not in all_words_me:
                                another_not_me_top.append(word)

                        another_not_me_words_count = len(another_not_me_top)
                        another_not_me_top = another_not_me_top[:15]
                        another_not_me_top = list(map(lambda x: '**' + str(x[0]) + '** (' + str(x[1]) + ')', another_not_me_top))

                    results.append('Сообщений '+another_name+': {0} ({1:0.3f} Kb.)'.format(msg_another_cnt, msg_len_another/1024))
                    results.append('Сообщений '+me_name+': {0} ({1:0.3f} Kb.)'.format(msg_me_cnt, msg_len_me/1024))
                    if msg_another_cnt > 0:
                        results.append('Средняя длина сообщения '+another_name+': {0:0.2f} сим.'.format(msg_len_another / msg_another_cnt))
                    if msg_me_cnt > 0:
                        results.append('Средняя длина сообщения '+me_name+': {0:0.2f} сим.'.format(msg_len_me / msg_me_cnt))
                    results.append('Самое длинное сообщение '+another_name+': ' + str(msg_another_max_len) + ' сим.')
                    results.append('Самое длинное сообщение '+me_name+': ' + str(msg_me_max_len) + ' сим.')
                    results.append('Число приветствий от '+another_name+': ' + str(another_hello))
                    results.append('Число приветствий от '+me_name+': ' + str(me_hello))

                    if not only_last_dialog:
                        results.append('')
                        results.append('Число диалогов: ' + str(len(dialogues)))
                        results.append('Сообщений в самом коротком диалоге: ' + str(shortest_len))
                        results.append('Сообщений в самом длинном диалоге: ' + str(longest_len))
                        results.append('Самый длинный диалог: ' + longest_dates + ' ({0:0.3f} сут)'.format(longest_hours))
                        if len(dialogues) > 0:
                            if dia_between_time != '?':
                                results.append('Среднее время между диалогами: ' + str(dia_between_time))
                            if dia_between_time_max != '?':
                                results.append('Самое большое время между диалогами: ' + str(dia_between_time_max))
                            results.append('Инициатор диалога ' + another_name + ': {0:0.2f} %'.format(100 * dia_another_start / len(dialogues)))
                            results.append('Инициатор диалога ' + me_name + ': {0:0.2f} %'.format(100 * dia_me_start / len(dialogues)))
                            results.append('Завершитель диалога ' + another_name + ': {0:0.2f} %'.format(100 * dia_another_finish / len(dialogues)))
                            results.append('Завершитель диалога ' + me_name + ': {0:0.2f} %'.format(100 * dia_me_finish / len(dialogues)))
                    else:
                        results.append('Сообщений в диалоге: ' + str(longest_len))
                        results.append('Продолжительность диалога: ' + longest_dates + ' ({0:0.3f} сут)'.format(longest_hours))
                        if dia_me_start > 0:
                            results.append('Инициатор: ' + me_name)
                        elif dia_another_start > 0:
                            results.append('Инициатор: ' + another_name)
                        if dia_me_finish > 0:
                            results.append('Завершитель: ' + me_name)
                        elif dia_another_finish > 0:
                            results.append('Завершитель: ' + another_name)

                    if another_answer_time != '?':
                        results.append('В среднем ' + another_name + ' отвечает за: ' + another_answer_time)
                    if me_answer_time != '?':
                        results.append('В среднем ' + me_name + ' отвечает за: ' + me_answer_time)
                    results.append('')
                    if me_edit_stats['edited_messages_count'] > 0 or another_edit_stats['edited_messages_count'] > 0:
                        if (not date_from) and (not only_last_dialog):
                            results.append('За время активности скрипта ('+str(days_a)+' сут.):')
                        results.append('Отредактировано сообщений {}: {}'.format(another_name, another_edit_stats['edited_messages_count']))
                        results.append('Отредактировано сообщений {}: {}'.format(me_name, me_edit_stats['edited_messages_count']))
                        results.append('Отредактировано сообщений {} после ответа на него: {}'.format(another_name, another_edit_stats['sequence_interrupted_cnt']))
                        results.append('Отредактировано сообщений {} после ответа на него: {}'.format(me_name, me_edit_stats['sequence_interrupted_cnt']))
                        results.append('Процент редактируемых сообщений {0}: {1:0.2f}%'.format(another_name, 100 * another_edit_stats['edited_messages_count'] / msg_another_cnt))
                        results.append('Процент редактируемых сообщений {0}: {1:0.2f}%'.format(me_name, 100 * me_edit_stats['edited_messages_count'] / msg_me_cnt))
                        if another_edit_stats['max_1_message_edits'] > 0:
                            results.append('Макс. число правок одного сообщения {}: {} ("{}")'.format(another_name, another_edit_stats['max_1_message_edits'], self.cut_text(another_edit_stats['max_edits_message'])))
                        if me_edit_stats['max_1_message_edits'] > 0:
                            results.append('Макс. число правок одного сообщения {}: {} ("{}")'.format(me_name, me_edit_stats['max_1_message_edits'], self.cut_text(me_edit_stats['max_edits_message'])))
                        if another_edit_stats['max_1_message_diff_percent'] > 0:
                            results.append('Макс. процент правок одного сообщения {0}: {1:0.2f}% ("{2}")'.format(another_name, another_edit_stats['max_1_message_diff_percent'], self.cut_text(another_edit_stats['max_changed_message'])))
                        if me_edit_stats['max_1_message_diff_percent'] > 0:
                            results.append('Макс. процент правок одного сообщения {0}: {1:0.2f}% ("{2}")'.format(me_name, me_edit_stats['max_1_message_diff_percent'], self.cut_text(me_edit_stats['max_changed_message'])))
                        results.append('')
                        results.append('В среднем правок на 1 сообщение {0}: {1:0.2f}'.format(another_name, another_edit_stats['mid_1_message_edits']))
                        results.append('В среднем правок на 1 сообщение {0}: {1:0.2f}'.format(me_name, me_edit_stats['mid_1_message_edits']))
                        results.append('Средний суммарный процент изменений редактируемых сообщений {0}: {1:0.3f}%'.format(another_name, another_edit_stats['message_edit_mid_summ_percent']))
                        results.append('Средний суммарный процент изменений редактируемых сообщений {0}: {1:0.3f}%'.format(me_name, me_edit_stats['message_edit_mid_summ_percent']))
                        results.append('Среднее время между правками одного сообщения {0}: {1:0.2f} мин.'.format(another_name, another_edit_stats['message_edit_mid_time_sec'] / 60))
                        results.append('Среднее время между правками одного сообщения {0}: {1:0.2f} мин.'.format(me_name, me_edit_stats['message_edit_mid_time_sec'] / 60))
                        results.append('')
                        results.append('Средняя правка (число замен / вставок / удалений) {0}: {1:0.2f} / {2:0.2f} / {3:0.2f}'.format(another_name, another_edit_stats['replaces_count_avg'], another_edit_stats['inserts_count_avg'], another_edit_stats['deletes_count_avg']))
                        results.append('Средняя правка (число замен / вставок / удалений) {0}: {1:0.2f} / {2:0.2f} / {3:0.2f}'.format(me_name, me_edit_stats['replaces_count_avg'], me_edit_stats['inserts_count_avg'], me_edit_stats['deletes_count_avg']))
                        results.append('')
                        results.append('Удалений сообщений ' + another_name + ': ' + str(another_deletes))
                        results.append('Удалений сообщений ' + me_name + ': ' + str(me_deletes))

                    if me_top_10 or another_top_10:
                        results.append('')
                        results.append('Различных слов ' + another_name + ' в диалогах: ' + str(another_words_count))
                        results.append('Различных слов ' + me_name + ' в диалогах: ' + str(me_words_count))
                        results.append('')
                        results.append('Самые частые слова ('+valid_word_types_str+') ' + another_name + ' в диалогах: ' + (", ".join(another_top_10)) + '')
                        results.append('')
                        results.append('Самые частые слова ('+valid_word_types_str+') ' + me_name + ' в диалогах: ' + (", ".join(me_top_10)) + '')
                        results.append('')
                        results.append('Самые редкие слова ('+valid_word_types_str+') ' + another_name + ' в диалогах: ' + (", ".join(another_last_cnt)) + '')
                        results.append('')
                        results.append('Самые редкие слова ('+valid_word_types_str+') ' + me_name + ' в диалогах: ' + (", ".join(me_last_cnt)) + '')
                        if me_not_another_words_count > 0:
                            results.append('')
                            results.append('Слова ' + me_name + ' ('+valid_word_types_str+'), которые ' + another_name + ' ни разу не использовал: **' + str(me_not_another_words_count) +'** шт. Самые частые: ' + (", ".join(me_not_another_top)) + '')
                        if another_not_me_words_count > 0:
                            results.append('')
                            results.append('Слова ' + another_name + ' ('+valid_word_types_str+'), которые ' + me_name + ' ни разу не использовал: **' + str(another_not_me_words_count) +'** шт. Самые частые: ' + (", ".join(another_not_me_top)) + '')

        CacheHelper().save_to_cache('normal_forms', 'dialog_stats', self.normal_form_cache)
        CacheHelper().save_to_cache('word_type_forms', 'dialog_stats', self.word_type_form_cache)

        return {
            'results': results,
            'last_dialogue_date': last_dialogue_date
        }
示例#3
0
    def get_edit_stats(self, edited_messages: dict, edited_sequence_interrupted: dict):

        edits_count = 0
        max_1_message_edits = 0
        max_1_message_diff = 0
        mid_1_message_edits = 0.0
        message_edit_mid_full_percent = 0
        message_edit_mid_summ_percent = 0
        message_edit_mid_time_sec = 0
        edited_messages_count = len(edited_messages)
        max_edits_message = ''
        max_changed_message = ''

        replaces_count_avg = 0
        inserts_count_avg = 0
        deletes_count_avg = 0
        sequence_interrupted_cnt = 0

        if edited_messages_count > 0:
            for edit_rows in edited_messages.values():
                # print([(x['taken_at'] + ': "' + x['message'] + '"') for x in edit_rows])
                edit_ratio_summ = 0
                last_version = None
                summ_edits_time = 0
                message_edit_cnt = len(edit_rows) - 1
                mid_1_message_edits = mid_1_message_edits + message_edit_cnt
                if message_edit_cnt >= max_1_message_edits:
                    max_1_message_edits = message_edit_cnt
                    max_edits_message = self.remove_message_tags(edit_rows[len(edit_rows) - 1]['message'])

                replaces_count_edit = 0
                inserts_count_edit = 0
                deletes_count_edit = 0
                for edit_row in edit_rows:
                    if last_version:
                        curr_time = StatusController.datetime_from_str(edit_row['taken_at'], '%Y-%m-%d %H:%M:%S%z')
                        last_time = StatusController.datetime_from_str(last_version['taken_at'], '%Y-%m-%d %H:%M:%S%z')
                        summ_edits_time = summ_edits_time + abs((curr_time - last_time).total_seconds())
                        edits_count = edits_count + 1
                        edit_ratio = self.get_str_difference_ratio(self.remove_message_tags(last_version['message']), self.remove_message_tags(edit_row['message']))
                        edit_ratio_summ = edit_ratio_summ + edit_ratio

                        diff_counts = self.get_str_difference_counts(last_version['message'], edit_row['message'])
                        inserts_count_edit = inserts_count_edit + diff_counts['inserts_count_edit']
                        replaces_count_edit = replaces_count_edit + diff_counts['replaces_count_edit']
                        deletes_count_edit = deletes_count_edit + diff_counts['deletes_count_edit']

                    last_version = edit_row
                edit_ratio_full = self.get_str_difference_ratio(self.remove_message_tags(edit_rows[0]['message']), self.remove_message_tags(last_version['message']))
                if edit_ratio_full >= max_1_message_diff:
                    max_1_message_diff = edit_ratio_full
                    max_changed_message = self.remove_message_tags(edit_rows[len(edit_rows) - 1]['message'])
                message_edit_mid_full_percent = message_edit_mid_full_percent + edit_ratio_full
                message_edit_mid_summ_percent = message_edit_mid_summ_percent + edit_ratio_summ
                message_edit_mid_time_sec = message_edit_mid_time_sec + (summ_edits_time / message_edit_cnt)

                replaces_count_avg = replaces_count_avg + replaces_count_edit
                inserts_count_avg = inserts_count_avg + inserts_count_edit
                deletes_count_avg = deletes_count_avg + deletes_count_edit

                msg_id = edit_rows[0]['message_id']
                if (msg_id in edited_sequence_interrupted) and edited_sequence_interrupted[msg_id]:
                    sequence_interrupted_cnt = sequence_interrupted_cnt + 1

            mid_1_message_edits = mid_1_message_edits / edited_messages_count
            message_edit_mid_full_percent = 100 * message_edit_mid_full_percent / edited_messages_count
            message_edit_mid_summ_percent = 100 * message_edit_mid_summ_percent / edited_messages_count
            message_edit_mid_time_sec = message_edit_mid_time_sec / edited_messages_count
            replaces_count_avg = replaces_count_avg / edited_messages_count
            inserts_count_avg = inserts_count_avg / edited_messages_count
            deletes_count_avg = deletes_count_avg / edited_messages_count
            max_1_message_diff = 100 * max_1_message_diff

        return {
            "edits_count": edits_count,
            "sequence_interrupted_cnt": sequence_interrupted_cnt,
            "edited_messages_count": edited_messages_count,
            "max_1_message_edits": max_1_message_edits,
            "max_1_message_diff_percent": max_1_message_diff,
            "mid_1_message_edits": mid_1_message_edits,
            "message_edit_mid_full_percent": message_edit_mid_full_percent,
            "message_edit_mid_summ_percent": message_edit_mid_summ_percent,
            "message_edit_mid_time_sec": message_edit_mid_time_sec,
            "max_edits_message": max_edits_message,
            "max_changed_message": max_changed_message,
            "replaces_count_avg": replaces_count_avg,
            "inserts_count_avg": inserts_count_avg,
            "deletes_count_avg": deletes_count_avg,
        }