Exemplo n.º 1
0
async def _plot_messages_distribution(msgs, your_name, target_name,
                                      results_directory):
    """Shows how messages are distributed."""
    plt.heat_map(msgs, results_directory)
    await asyncio.sleep(delay)
    plt.pie_messages_per_author(msgs, your_name, target_name,
                                results_directory)
    await asyncio.sleep(delay)
    plt.stackplot_non_text_messages_percentage(msgs, results_directory)
    await asyncio.sleep(delay)
    plt.barplot_non_text_messages(msgs, results_directory)
    await asyncio.sleep(delay)
    plt.barplot_messages_per_weekday(msgs, your_name, target_name,
                                     results_directory)
    await asyncio.sleep(delay)
    plt.barplot_messages_per_day(msgs, results_directory)
    await asyncio.sleep(delay)
    plt.barplot_messages_per_minutes(msgs, results_directory)
    await asyncio.sleep(delay)
    plt.barplot_non_text_messages(msgs, results_directory)
    await asyncio.sleep(delay)
    plt.distplot_messages_per_hour(msgs, results_directory)
    await asyncio.sleep(delay)
    plt.distplot_messages_per_month(msgs, results_directory)
    await asyncio.sleep(delay)
    plt.distplot_messages_per_day(msgs, results_directory)
    await asyncio.sleep(delay)
    plt.lineplot_messages(msgs, your_name, target_name, results_directory)
    await asyncio.sleep(delay)
    log_line("Messages distribution was analysed.")
Exemplo n.º 2
0
async def get_telegram_messages(your_name,
                                target_name,
                                loop=None,
                                target_id=None,
                                num=1000000):
    """Retrieves a list of messages from Telegram dialogue.

    Notes:
        Requires a ready-to-use Telegram secrets (id, hash etc).
        Asks for target's id in a case this parameter is None.
        Retrieves a photo album as distinct messages with photos.

    Args:
        your_name (str): Your name.
        target_name (str): Target's name.
        loop (asyncio.windows_events._WindowsSelectorEventLoop, optional): An event loop.
        target_id (int,optional):  Target's dialogue id.
        num (int,optional): No more than num NEWEST messages will be retrieved.

    Returns:
        A list of MyMessage objects (from older messages to newer).
    """
    async with (await _get_client(loop=loop)) as client:
        if target_id is None:
            target_id = await _get_target_dialog_id(client)
        target_entity = await client.get_entity(target_id)
        log_line("Receiving Telegram messages...")
        telethon_messages = await _retrieve_messages(client, target_entity,
                                                     num)
        messages = [
            _telethon_msg_to_mymessage(msg, target_id, your_name, target_name)
            for msg in telethon_messages
        ]
        log_line(f"{len(messages)} Telegram messages were received")
        return messages
Exemplo n.º 3
0
def barplot_messages_per_minutes(msgs, path_to_save, minutes=2):
    sns.set(style="whitegrid", palette="muted")
    sns.despine(top=True)

    messages_per_minutes = stools.get_messages_per_minutes(msgs, minutes)

    xticks_labels = stools.get_hours()
    xticks = [i * 60 // minutes for i in range(24)]

    min_minutes = len(min(messages_per_minutes.values(), key=lambda day: len(day)))
    max_minutes = len(max(messages_per_minutes.values(), key=lambda day: len(day)))
    pal = sns.color_palette("GnBu_d", max_minutes - min_minutes + 1)[::-1]

    ax = sns.barplot(x=list(range(len(messages_per_minutes))), y=[len(day) for day in messages_per_minutes.values()],
                     edgecolor="none",
                     palette=np.array(pal)[[len(day) - min_minutes for day in messages_per_minutes.values()]])
    _change_bar_width(ax, 1.)
    ax.set(xlabel="hour", ylabel="messages")
    ax.set_xticklabels(xticks_labels)

    ax.tick_params(axis='x', bottom=True, color="#A9A9A9")
    plt.xticks(xticks, rotation=65)

    fig = plt.gcf()
    fig.set_size_inches(20, 10)

    fig.savefig(os.path.join(path_to_save, barplot_messages_per_minutes.__name__ + ".png"), dpi=600)
    # plt.show()
    log_line(f"{barplot_messages_per_minutes.__name__} was created.")
    plt.close("all")
Exemplo n.º 4
0
def barplot_messages_per_weekday(msgs, your_name, target_name, path_to_save):
    sns.set(style="whitegrid", palette="pastel")

    messages_per_weekday = stools.get_messages_per_weekday(msgs)
    labels = ["Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday"]

    ax = sns.barplot(x=labels, y=[len(weekday) for weekday in messages_per_weekday.values()],
                     label=your_name, color="b")
    sns.set_color_codes("muted")
    sns.barplot(x=labels,
                y=[len([msg for msg in weekday if msg.author == target_name])
                   for weekday in messages_per_weekday.values()],
                label=target_name, color="b")

    ax.legend(ncol=2, loc="lower right", frameon=True)
    ax.set(ylabel="messages")
    sns.despine(right=True, top=True)

    fig = plt.gcf()
    fig.set_size_inches(11, 8)

    fig.savefig(os.path.join(path_to_save, barplot_messages_per_weekday.__name__ + ".png"), dpi=600)
    # plt.show()
    log_line(f"{barplot_messages_per_weekday.__name__} was created.")
    plt.close("all")
Exemplo n.º 5
0
def wordcloud(msgs, words, path_to_save):
    all_words_list = []
    words_cnt = stools.get_words_countered(msgs)
    # we need to create a huge string which contains each word as many times as it encounters in messages.
    for word in set(words):
        all_words_list.extend([word] * (words_cnt[word]))
    random.shuffle(all_words_list, random.random)  # don't forget to shuffle !
    all_words_string = ' '.join(all_words_list)

    # the cloud will be a circle.
    radius = 500
    x, y = np.ogrid[:2 * radius, :2 * radius]
    mask = (x - radius) ** 2 + (y - radius) ** 2 > radius ** 2
    mask = 255 * mask.astype(int)

    word_cloud = wc.WordCloud(background_color="white", repeat=False, mask=mask)
    word_cloud.generate(all_words_string)

    plt.axis("off")
    plt.imshow(word_cloud, interpolation="bilinear")

    word_cloud.to_file(os.path.join(path_to_save, wordcloud.__name__ + ".png"))
    # plt.show()
    plt.close()
    log_line(f"{wordcloud.__name__} was created.")
Exemplo n.º 6
0
def lineplot_messages(msgs, your_name, target_name, path_to_save):
    sns.set(style="whitegrid")

    (x, y_total), (xticks, xticks_labels, xlabel) = _get_plot_data(msgs), _get_xticks(msgs)

    y_your = [len([msg for msg in period if msg.author == your_name]) for period in y_total]
    y_target = [len([msg for msg in period if msg.author == target_name]) for period in y_total]

    plt.fill_between(x, y_your, alpha=0.3)
    ax = sns.lineplot(x=x, y=y_your, palette="denim blue", linewidth=2.5, label=your_name)
    plt.fill_between(x, y_target, alpha=0.3)
    sns.lineplot(x=x, y=y_target, linewidth=2.5, label=target_name)

    ax.set(xlabel=xlabel, ylabel="messages")
    ax.set_xticklabels(xticks_labels)

    ax.tick_params(axis='x', bottom=True, color="#A9A9A9")
    plt.xticks(xticks, rotation=65)
    ax.margins(x=0, y=0)

    # plt.tight_layout()
    fig = plt.gcf()
    fig.set_size_inches(13, 7)

    fig.savefig(os.path.join(path_to_save, lineplot_messages.__name__ + ".png"), dpi=600)
    # plt.show()
    plt.close("all")
    log_line(f"{lineplot_messages.__name__} was created.")
Exemplo n.º 7
0
def barplot_messages_per_day(msgs, path_to_save):
    sns.set(style="whitegrid", palette="muted")
    sns.despine(top=True)

    messages_per_day_vals = stools.get_messages_per_day(msgs).values()

    xticks, xticks_labels, xlabel = _get_xticks(msgs)

    min_day = len(min(messages_per_day_vals, key=lambda day: len(day)))
    max_day = len(max(messages_per_day_vals, key=lambda day: len(day)))
    pal = sns.color_palette("Greens_d", max_day - min_day + 1)[::-1]

    ax = sns.barplot(x=list(range(len(messages_per_day_vals))), y=[len(day) for day in messages_per_day_vals],
                     edgecolor="none", palette=np.array(pal)[[len(day) - min_day for day in messages_per_day_vals]])
    _change_bar_width(ax, 1.)
    ax.set(xlabel=xlabel, ylabel="messages")
    ax.set_xticklabels(xticks_labels)

    ax.tick_params(axis='x', bottom=True, color="#A9A9A9")
    plt.xticks(xticks, rotation=65)

    fig = plt.gcf()
    fig.set_size_inches(20, 10)
    fig.savefig(os.path.join(path_to_save, barplot_messages_per_day.__name__ + ".png"), dpi=500)

    # plt.show()
    log_line(f"{barplot_messages_per_day.__name__} was created.")
    plt.close("all")
Exemplo n.º 8
0
def pie_messages_per_author(msgs, your_name, target_name, path_to_save):
    forwarded = len([msg for msg in msgs if msg.is_forwarded])
    msgs = list(filter(lambda msg: not msg.is_forwarded, msgs))
    your_messages_len = len([msg for msg in msgs if msg.author == your_name])
    target_messages_len = len(msgs) - your_messages_len
    data = [your_messages_len, target_messages_len, forwarded]
    labels = [f"{your_name}\n({your_messages_len})",
              f"{target_name}\n({target_messages_len})",
              f"forwarded\n({forwarded})"]
    explode = (.0, .0, .2)

    fig, ax = plt.subplots(figsize=(13, 8), subplot_kw=dict(aspect="equal"))

    wedges, _, autotexts = ax.pie(x=data, explode=explode, colors=["#4982BB", "#5C6093", "#53B8D7"],
                                  autopct=lambda pct: f"{pct:.1f}%",
                                  wedgeprops={"edgecolor": "black", "alpha": 0.8})

    ax.legend(wedges, labels,
              loc="upper right",
              bbox_to_anchor=(1, 0, 0.5, 1))

    plt.setp(autotexts, size=10, weight="bold")

    fig.savefig(os.path.join(path_to_save, pie_messages_per_author.__name__ + ".png"), dpi=600)
    # plt.show()
    plt.close("all")
    log_line(f"{pie_messages_per_author.__name__} was created.")
Exemplo n.º 9
0
def barplot_words(msgs, your_name, target_name, words, topn, path_to_save):
    sns.set(style="whitegrid")

    your_msgs = [msg for msg in msgs if msg.author == your_name]
    target_msgs = [msg for msg in msgs if msg.author == target_name]

    your_words_cnt = stools.get_words_countered(your_msgs)
    target_words_cnt = stools.get_words_countered(target_msgs)

    words.sort(key=lambda w: your_words_cnt[w] + target_words_cnt[w], reverse=True)
    df_dict = {"name": [], "word": [], "num": []}
    for word in words[:topn]:
        df_dict["word"].extend([word, word])
        df_dict["name"].append(your_name)
        df_dict["num"].append(your_words_cnt[word])
        df_dict["name"].append(target_name)
        df_dict["num"].append(target_words_cnt[word])

    ax = sns.barplot(x="word", y="num", hue="name", data=pd.DataFrame(df_dict), palette="PuBu")
    ax.legend(ncol=1, loc="upper right", frameon=True)
    ax.set(ylabel="messages", xlabel='')

    fig = plt.gcf()
    fig.set_size_inches(14, 8)

    fig.savefig(os.path.join(path_to_save, barplot_words.__name__ + ".png"), dpi=600)
    # plt.show()
    log_line(f"{barplot_words.__name__} was created.")
    plt.close("all")
Exemplo n.º 10
0
def barplot_emojis(msgs, your_name, target_name, topn, path_to_save):
    sns.set(style="whitegrid")

    mc_emojis = stools.get_emoji_countered(msgs).most_common(topn)
    if not mc_emojis:
        return
    your_msgs = [msg for msg in msgs if msg.author == your_name]
    target_msgs = [msg for msg in msgs if msg.author == target_name]

    your_emojis_cnt = stools.get_emoji_countered(your_msgs)
    target_emojis_cnt = stools.get_emoji_countered(target_msgs)

    df_dict = {"name": [], "emoji": [], "num": []}
    for e, _ in mc_emojis:
        df_dict["emoji"].extend([emoji.demojize(e), emoji.demojize(e)])
        df_dict["name"].append(your_name)
        df_dict["num"].append(your_emojis_cnt[e])
        df_dict["name"].append(target_name)
        df_dict["num"].append(target_emojis_cnt[e])

    ax = sns.barplot(x="num", y="emoji", hue="name", data=pd.DataFrame(df_dict), palette="PuBu")
    ax.set(ylabel="emoji name", xlabel="emojis")
    ax.legend(ncol=1, loc="lower right", frameon=True)

    fig = plt.gcf()
    fig.set_size_inches(11, 8)
    plt.tight_layout()

    fig.savefig(os.path.join(path_to_save, barplot_emojis.__name__ + ".png"), dpi=600)
    # plt.show()
    log_line(f"{barplot_emojis.__name__} was created.")
    plt.close("all")
Exemplo n.º 11
0
async def _plot_words_distribution(msgs, your_name, target_name,
                                   results_directory, words):
    """Shows how some words are distributed among the users."""
    plt.barplot_words(msgs, your_name, target_name, words, 10,
                      results_directory)
    await asyncio.sleep(delay)
    plt.wordcloud(msgs, words, results_directory)
    await asyncio.sleep(delay)
    log_line("Words distribution was analysed.")
Exemplo n.º 12
0
def get_telegram_secrets():
    config_file_name = _get_config_file_name()
    config_parser = configparser.ConfigParser()
    config_parser.read(config_file_name, encoding="utf-8-sig")
    api_id = config_parser.get("telegram_secrets", "api_id", fallback="")
    api_hash = config_parser.get("telegram_secrets", "api_hash", fallback="")
    phone_number = config_parser.get("telegram_secrets", "phone_number", fallback="")
    session_name = config_parser.get("telegram_secrets", "session_name", fallback="")
    log_line(f"Telegram secrets were received from {config_file_name} file.")
    return api_id, api_hash, phone_number, session_name
Exemplo n.º 13
0
async def _plot_messages_distribution_content_based(msgs, your_name,
                                                    target_name,
                                                    results_directory):
    """Shows how some characteristics of messages content are distributed."""
    plt.lineplot_message_length(msgs, your_name, target_name,
                                results_directory)
    await asyncio.sleep(delay)
    plt.barplot_emojis(msgs, your_name, target_name, 10, results_directory)
    await asyncio.sleep(delay)
    log_line("Content based messages distribution was analysed.")
Exemplo n.º 14
0
def store_telegram_secrets(api_id, api_hash, phone_number, session_name="Message retriever"):
    config_file_name = _get_config_file_name()
    config_parser = configparser.ConfigParser()
    config_parser.read(config_file_name, encoding="utf-8-sig")
    config_parser.set("telegram_secrets", "api_id", api_id)
    config_parser.set("telegram_secrets", "api_hash", api_hash)
    config_parser.set("telegram_secrets", "session_name", session_name)
    config_parser.set("telegram_secrets", "phone_number", phone_number)
    with open(config_file_name, "w+", encoding="utf-8") as config_file:
        config_parser.write(config_file)
    log_line(f"Telegram secrets were stored in {config_file_name} file.")
Exemplo n.º 15
0
def get_session_params():
    config_file_name = _get_config_file_name()
    config_parser = configparser.ConfigParser()
    config_parser.read(config_file_name, encoding="utf-8-sig")
    dialog_id = config_parser.get("session_params", "dialog_id", fallback="")
    dialog_id = int(dialog_id) if dialog_id else -1
    vkopt_file = config_parser.get("session_params", "vkopt_file", fallback="")
    words_file = config_parser.get("session_params", "words_file", fallback="")
    your_name = config_parser.get("session_params", "your_name", fallback="")
    target_name = config_parser.get("session_params", "target_name", fallback="")
    log_line(f"Session parameters were received from {config_file_name} file.")
    return dialog_id, vkopt_file, words_file, your_name, target_name
Exemplo n.º 16
0
def store_session_params(params):
    config_file_name = _get_config_file_name()
    config_parser = configparser.ConfigParser()
    config_parser.read(config_file_name, encoding="utf-8-sig")
    assert params["from_vk"] or params["from_telegram"]
    config_parser.set("session_params", "dialog_id",
                      re.compile("\(id=[0-9]+\)$").search(params["dialogue"]).group()[4:-1] if params["from_telegram"]
                      else "")
    config_parser.set("session_params", "vkopt_file", params["vkopt_file"] if params["from_vk"] else "")
    config_parser.set("session_params", "words_file", params["words_file"] if params["plot_words"] else "")

    assert params["your_name"] and params["target_name"]
    config_parser.set("session_params", "your_name", params["your_name"])
    config_parser.set("session_params", "target_name", params["target_name"])
    with open(config_file_name, "w+", encoding="utf-8") as config_file:
        config_parser.write(config_file)
    log_line(f"Session parameters were stored in {config_file_name} file.")
Exemplo n.º 17
0
def distplot_messages_per_hour(msgs, path_to_save):
    sns.set(style="whitegrid")

    ax = sns.distplot([msg.date.hour for msg in msgs], bins=range(25), color="m", kde=False)
    ax.set_xticklabels(stools.get_hours())
    ax.set(xlabel="hour", ylabel="messages")
    ax.margins(x=0)

    plt.xticks(range(24), rotation=65)
    plt.tight_layout()
    fig = plt.gcf()
    fig.set_size_inches(11, 8)

    fig.savefig(os.path.join(path_to_save, distplot_messages_per_hour.__name__ + ".png"), dpi=600)
    # plt.show()
    log_line(f"{distplot_messages_per_hour.__name__} was created.")
    plt.close("all")
Exemplo n.º 18
0
def stackplot_non_text_messages_percentage(msgs, path_to_save):
    sns.set(style="whitegrid", palette="muted")

    colors = ['y', 'b', 'c', 'r', 'g', 'm']

    (x, y_total), (xticks, xticks_labels, xlabel) = _get_plot_data(msgs), _get_xticks(msgs)

    stacks = stools.get_non_text_messages_grouped(y_total)

    # Normalize values
    for i in range(len(stacks[0]["groups"])):
        total = sum(stack["groups"][i] for stack in stacks)
        for stack in stacks:
            if not total:
                stack["groups"][i] = 0
            else:
                stack["groups"][i] /= total

    plt.stackplot(x, *[stack["groups"] for stack in stacks], labels=[stack["type"] for stack in stacks],
                  colors=colors, alpha=0.7)

    plt.margins(0, 0)
    plt.xticks(xticks, rotation=65)
    plt.yticks([i / 10 for i in range(0, 11, 2)])

    ax = plt.gca()
    ax.set_xticklabels(xticks_labels)
    ax.set_yticklabels([f"{i}%" for i in range(0, 101, 20)])
    ax.tick_params(axis='x', bottom=True, color="#A9A9A9")
    ax.set(xlabel=xlabel, ylabel="non-text messages")

    # https://stackoverflow.com/a/4701285
    # Shrink current axis by 10%
    box = ax.get_position()
    ax.set_position([box.x0, box.y0, box.width * 0.9, box.height])
    # Put a legend to the right of the current axis
    ax.legend(loc="center left", bbox_to_anchor=(1, 0.5))

    fig = plt.gcf()
    fig.set_size_inches(11, 8)

    fig.savefig(os.path.join(path_to_save, stackplot_non_text_messages_percentage.__name__ + ".png"), dpi=600)
    # plt.show()
    log_line(f"{stackplot_non_text_messages_percentage.__name__} was created.")
    plt.close("all")
Exemplo n.º 19
0
def distplot_messages_per_day(msgs, path_to_save):
    sns.set(style="whitegrid")

    data = stools.get_messages_per_day(msgs)

    max_day_len = len(max(data.values(), key=len))
    ax = sns.distplot([len(day) for day in data.values()], bins=list(range(0, max_day_len, 50)) + [max_day_len],
                      color="m", kde=False)
    ax.set(xlabel="messages", ylabel="days")
    ax.margins(x=0)

    fig = plt.gcf()
    fig.set_size_inches(11, 8)

    fig.savefig(os.path.join(path_to_save, distplot_messages_per_day.__name__ + ".png"), dpi=600)
    # plt.show()
    log_line(f"{distplot_messages_per_day.__name__} was created.")
    plt.close("all")
Exemplo n.º 20
0
def heat_map(msgs, path_to_save, seasons=False):
    sns.set(style="whitegrid")

    messages_per_day = stools.get_messages_per_day(msgs)
    months = stools.date_months_to_str_months(stools.get_months(msgs))
    heat_calendar = {month: np.array([None] * 31, dtype=np.float64) for month in months}
    for day, d_msgs in messages_per_day.items():
        heat_calendar[stools.str_month(day)][day.day - 1] = len(d_msgs)

    # min_day = len(min(messages_per_day.values(), key=len))
    max_day = len(max(messages_per_day.values(), key=len))

    data = np.array(list(heat_calendar.values()))
    mask = np.array([np.array(arr, dtype=bool) for arr in data])

    cmap = cm.get_cmap("Purples")

    center = max_day * 0.4  # (avg([len(d) for d in messages_per_day.values()]) + (max_day - min_day) / 2) / 2

    ax = sns.heatmap(data=data, cmap=cmap, center=center, xticklabels=True, yticklabels=True,
                     square=True, linewidths=.2, cbar_kws={"shrink": .5})

    # builds a mask to highlight empty days
    sns.heatmap(data, mask=mask,
                xticklabels=range(1, 32),
                yticklabels=months,
                linewidths=.2, cbar=False, cmap=mpl_colors.ListedColormap(["#ffffe6"]))

    if seasons:  # divides heatmap on seasons
        season_lines = [i for i, m in enumerate(months) if m.month % 3 == 0 and i != 0]
        ax.hlines(season_lines, *ax.get_xlim(), colors=["b"])
    ax.set(xlabel='day', ylabel="month")
    ax.margins(x=0)

    plt.tight_layout()
    fig = plt.gcf()
    fig.set_size_inches(11, 8)
    fig.savefig(os.path.join(path_to_save, heat_map.__name__ + ".png"), dpi=600)

    # plt.show()
    plt.close("all")
    log_line(f"{heat_map.__name__} was created.")
Exemplo n.º 21
0
def get_mymessages_from_file(your_name, target_name, opt_file_name):
    """Retrieves a list of MyMessage representations of messages from a file generated by VkOpt GChrome extension.

    Notes:
        You must firstly ensure that your_name and target_name are equal to the names in opt_file_name text file.

    Args:
        your_name (str): Your name.
        target_name (str): Target's name.
        opt_file_name (str): The name of the file to read.

    Returns:
        A list of MyMessage objects.
    """
    log_line("Start reading vkOpt messages")
    with open(opt_file_name, 'r', encoding="utf8") as f:
        lines = f.readlines()
    opt_message_list = _parse_lines(lines, your_name, target_name)
    msgs = [_opt_to_mymessage(msg) for msg in opt_message_list]
    log_line(len(opt_message_list), " vkOpt messages were received.")
    return msgs
Exemplo n.º 22
0
def distplot_messages_per_month(msgs, path_to_save):
    sns.set(style="whitegrid")

    start_date = msgs[0].date.date()
    (xticks, xticks_labels, xlabel) = _get_xticks(msgs)

    ax = sns.distplot([(msg.date.date() - start_date).days for msg in msgs],
                      bins=xticks + [(msgs[-1].date.date() - start_date).days], color="m", kde=False)
    ax.set_xticklabels(xticks_labels)
    ax.set(xlabel=xlabel, ylabel="messages")
    ax.margins(x=0)

    plt.xticks(xticks, rotation=65)
    plt.tight_layout()
    fig = plt.gcf()
    fig.set_size_inches(11, 8)

    fig.savefig(os.path.join(path_to_save, distplot_messages_per_month.__name__ + ".png"), dpi=600)
    # plt.show()
    log_line(f"{distplot_messages_per_month.__name__} was created.")
    plt.close("all")
Exemplo n.º 23
0
async def _analyse(msgs, your_name, target_name, words_file, store_msgs=True):
    """Does analysis and stores results."""
    log_line("Start messages analysis process.")

    if not len(msgs):
        log_line("No messages were received.")
        return
    date = datetime.datetime.today().strftime('%d-%m-%y %H-%M-%S')
    results_directory = os.path.join(
        os.path.split(os.path.normpath(os.path.dirname(__file__)))[0],
        "results", f"{date}_{your_name}_{target_name}")

    if not os.path.exists(results_directory):
        os.makedirs(results_directory)

    await asyncio.sleep(delay)

    if store_msgs:
        file_with_msgs = "messages.txt"
        storage.store_msgs(os.path.join(results_directory, file_with_msgs),
                           msgs)

    await asyncio.sleep(delay)

    await _plot_all(msgs, your_name, target_name, results_directory,
                    words_file)

    log_line("Done.")
Exemplo n.º 24
0
def barplot_non_text_messages(msgs, path_to_save):
    sns.set(style="whitegrid", palette="muted")

    colors = ['y', 'b', 'c', 'r', 'g', 'm']

    (x, y_total), (xticks, xticks_labels, xlabel) = _get_plot_data(msgs), _get_xticks(msgs, crop=False)

    bars = stools.get_non_text_messages_grouped(y_total)

    # bars are overlapping, so firstly we need to sum up the all...
    sum_bars = [0] * len(y_total)
    for bar in bars:
        sum_bars = list(map(operator.add, sum_bars, bar["groups"]))
    # ... plot and subtract one by one.
    for i, bar in enumerate(bars[:-1]):
        sns.barplot(x=xticks_labels, y=sum_bars, label=bar["type"], color=colors[i])
        sum_bars = list(map(operator.sub, sum_bars, bar["groups"]))
    ax = sns.barplot(x=xticks_labels, y=sum_bars, label=bars[-1]["type"], color=colors[-1])
    _change_bar_width(ax, 1.)

    # https://stackoverflow.com/a/4701285
    # Shrink current axis by 10%
    box = ax.get_position()
    ax.set_position([box.x0, box.y0, box.width * 0.9, box.height])
    # Put a legend to the right of the current axis
    ax.legend(loc="center left", bbox_to_anchor=(1, 0.5))

    ax.set_xticklabels(xticks_labels, ha="right")
    ax.set(xlabel=xlabel, ylabel="messages")

    plt.xticks(rotation=65)
    fig = plt.gcf()
    fig.set_size_inches(16, 8)

    fig.savefig(os.path.join(path_to_save, barplot_non_text_messages.__name__ + ".png"), dpi=600)
    # plt.show()
    log_line(f"{barplot_non_text_messages.__name__} was created.")
    plt.close("all")
Exemplo n.º 25
0
async def _retrieve_messages(client, target_entity, num):
    """Retrieves messages from client's target_entity batch by batch and return them all."""
    batch_size = min(3000, num)
    msgs = []
    batch = await client.get_messages(target_entity, limit=batch_size)
    while len(batch) and len(msgs) < num:
        offset_id = batch[-1].id
        msgs.extend([msg for msg in batch if isinstance(msg, Message)])
        try:
            batch = await asyncio.wait_for(client.get_messages(target_entity, limit=min(batch_size, num - len(msgs)), offset_id=offset_id), 10*60)
        except ConnectionError:
            log_line("Internet connection was lost.")
            raise
        except asyncio.TimeoutError:
            log_line("Telegram timeout error.")
            break
        if not len(batch):
            log_line(f"{len(msgs[:num])} (100%) messages received.")
        else:
            log_line(f"{len(msgs[:num])} ({len(msgs[:num])/batch.total*100:.2f}%) messages received.")
    return msgs[:num][::-1]
Exemplo n.º 26
0
def get_words(file_path):
    with open(file_path, 'r', encoding="utf-8-sig") as f:
        words = [word.strip() for word in f.readlines()
                 if all([ch.isalpha() or ch == '\'' or ch == '`' for ch in word.strip()])]
    log_line(f"{len(words)} words were received from {file_path} file.")
    return words
Exemplo n.º 27
0
def get_msgs(file_path):
    with open(file_path, 'r') as f:
        msgs = [MyMessage.from_dict(msg) for msg in json.loads(f.read())]
    log_line(f"{len(msgs)} messages were received from {file_path} file.")
    return msgs
Exemplo n.º 28
0
def _parse_lines(lines, your_name, target_name, num=1000000):
    """Parses given text lines and retrieves a message list.

    Notes:
        Parses messages from vkOpt GChrome extension with a DEFAULT message format.
        Appropriate message format is "%username% (%date%):
                                       %message%"
        Appropriate datetime format is "HH:MM:ss  dd/mm/yyyy".
        More than one nested forwarded messages are counted as ONE forwarded message.
        ... As well as a message with multiple photos counts as ONE photo.
        ... As well as a message with multiple audio files ... what the heck?

    Args:
        your_name (str): Your name.
        target_name (str): Target's name.
        lines (list of strings): Text lines of the file.
        num (int): Max number of the messages to retrieve.

    Returns:
        List of dictionaries such as:
            {
                "text": text of the message (str),
                "has_forwards": flag (bool),
                "attachment": text (str) of the attachment (without first line)
            }
    """
    lines[0] = lines[0].replace('\ufeff', '')  # remove start character
    # assert lines[0].startswith(target_name) or lines[0].strip().startswith(your_name)
    date_pattern = "[0-2][0-9]:[0-5][0-9]:[0-5][0-9]  [0-3][0-9]/[0-1][0-9]/([0-9]{4})"
    date_regex = re.compile(date_pattern)
    title_ending_regex = re.compile(" \(" + date_pattern + "\):\n$")
    msg_title_regex = re.compile("^\t*(" + your_name + '|' + target_name +
                                 ") \(" + date_pattern + "\):\n$")
    msgs = []
    current_msg = {"text": "", "has_forwards": False, "attachment": ""}
    i = 0
    while i < len(lines) and len(msgs) <= num:
        line = lines[i]
        if line.startswith("Attachments:["):
            i += 1
            current_msg["attachment"] = lines[i]
        else:
            search = title_ending_regex.search(line)
            if search is not None and search.span()[1] == len(line):
                if line[0].isspace():
                    current_msg["has_forwards"] = True
                    i += 1
                else:
                    if not msg_title_regex.match(line):
                        log_line(
                            f"[{line}] DOES NOT MATCH ANY SUGGESTED NAME! NO VK OPT MESSAGES WILL BE RECEIVED!"
                        )
                        return []
                    # removing redundant spaces after the message
                    current_msg["text"] = current_msg["text"][:-3]
                    msgs.append(current_msg)
                    current_msg = {
                        "text": "",
                        "has_forwards": False,
                        "attachment": ""
                    }
                    current_msg["date"] = datetime.strptime(
                        date_regex.search(line).group(), "%H:%M:%S  %d/%m/%Y")
                    current_msg["author"] = your_name if line.startswith(
                        your_name) else target_name
            elif not current_msg["has_forwards"]:
                if current_msg["attachment"]:
                    current_msg["attachment"] += line
                else:
                    current_msg["text"] += line
        i += 1
    if i > 0:
        current_msg["text"] = current_msg["text"] if current_msg[
            "attachment"] else current_msg["text"][:-3]
        msgs.append(current_msg)
    # first message is just a template and should be removed
    return msgs[1:]
Exemplo n.º 29
0
def store_msgs(file_path, msgs):
    with open(file_path, 'w') as fp:
        json.dump(msgs, fp, default=str)
    log_line(f"{len(msgs)} messages were stored in {file_path} file.")
Exemplo n.º 30
0
def get_words(file_path):
    with open(file_path, 'r', encoding="utf-8-sig") as f:
        words = [word.strip() for word in f.readlines()]
    log_line(f"{len(words)} words were received from {file_path} file.")
    return words