from src.common.sql_plot import SqlPlot

tag_group = ('#thelastofus', '#thelastofus2', '#tlou', '#tlou2')

SqlPlot().show(
    sql_queries=[{
        'query': f"""
        select date_trunc('week', posts.created) as time_window, count(distinct posts.id)
        from posts
        join post_tags
            on posts.id = post_tags.post_id
                and posts.type = 1
                and posts.created > '2019-01-01'
                and post_tags.value in {tag_group}
        group by time_window
        order by time_window
    """, 'label': tag_group
    }],
    title="Количество новых постов за неделю с тегами",
    x_label="Время",
    y_label="Новые посты за неделю"
)
Exemplo n.º 2
0
from src.common.config_loader import ConfigLoader
from src.common.data_base_wrapper import DataBaseWrapper
from src.common.sql_plot import SqlPlot

subsite_ids = [64954]

config = ConfigLoader.load()
db = DataBaseWrapper(config["db"])

queries = []
for subsite_id in subsite_ids:
    subsite_name = db.execute_select_one(
        "select name from subsites where id = %s", (subsite_id, ))[0]
    queries.append({
        'query': f"""
            select date_trunc('week', created) as time_window, count(*)
            from posts
            where type = 1 and subsite_id = {subsite_id}
                and created between '2018-06-01' and '2020-07-20'
            group by time_window
            order by time_window
        """,
        'label': subsite_name
    })

SqlPlot().show(sql_queries=queries,
               title="Количество новых постов за неделю",
               x_label="Время",
               y_label="Новые посты за неделю")
from src.common.sql_plot import SqlPlot

post_id = 220958

SqlPlot().show(sql_queries=[{
    'query':
    f"""
                select request_time, comments
                from post_history
                where post_id = {post_id}
                order by id
            """
}],
               title="Комментарии",
               x_label="Время",
               y_label="Комментарии")
Exemplo n.º 4
0
from src.common.sql_plot import SqlPlot

SqlPlot().show(
    sql_queries=[
        {
            'query': """
                select date_trunc('week', created) as time_window, count(*)
                from posts
                where type = 1 and created < '2020-07-20'
                group by time_window
                order by time_window
            """, 'label': 'С мемами'
        },
        {
            'query': """
                select date_trunc('week', created) as time_window, count(*)
                from posts
                where type = 1 and subsite_id != 64966 and created < '2020-07-20'
                group by time_window
                order by time_window
            """, 'label': 'Без мемов'
        }
    ],
    title="Количество новых постов за неделю (без репостов и вакансий)",
    x_label="Время",
    y_label="Новые посты за неделю"
)
Exemplo n.º 5
0
from src.common.sql_plot import SqlPlot

SqlPlot().show(sql_queries=[{
    'query':
    """
                select date_trunc('week', created) as time_window, avg(comments_count)
                from posts
                where type = 1 and created between '2016-06-01' and '2020-07-19'
                group by time_window
                order by time_window
            """
}],
               title="Среднее количество комментариев на пост",
               x_label="Время",
               y_label="Среднее количество комментариев на пост")
from src.common.sql_plot import SqlPlot

SqlPlot().show(sql_queries=[{
    'query':
    """
                with data as (
                    select date_trunc('week', created) as time_window,
                           count(*) as new_posts
                    from posts
                    group by time_window
                )
                select time_window,
                        sum(new_posts) over (
                            order by time_window rows between unbounded preceding and current row
                        ) as total_posts
                from data
            """
}],
               title="Количество постов",
               x_label="Время",
               y_label="Посты")
from src.common.sql_plot import SqlPlot

post_id = 220958

SqlPlot().show(sql_queries=[{
    'query':
    f"""
                select request_time, hits
                from post_history
                where post_id = {post_id}
                order by id
            """
}],
               title="Просмотры",
               x_label="Время",
               y_label="Просмотры")
Exemplo n.º 8
0
    queries.append({
        'query': f"""
            with length_data as (
                select
                       posts.id as post_id,
                       posts.created as created,
                       sum(coalesce(blocks.text_length, 0)) as text_length
                from posts
                join post_blocks blocks
                    on posts.id = blocks.post_id
                        and posts.type = 1
                group by posts.id
            )
            select
                date_trunc('week', created) as time_window,
                percentile_disc({percentile}) within group (order by text_length) as percentile
            from length_data
            where created between '2016-06-01' and '2020-07-19'
            group by time_window
            order by time_window
        """,
        'label': str(int(percentile * 100)) + '-процентиль'
    })

SqlPlot().show(
    sql_queries=queries,
    title="Длина постов",
    x_label="Время",
    y_label="Процентили"
)
from src.common.sql_plot import SqlPlot

SqlPlot().show(sql_queries=[{
    'query':
    """
                select date_trunc('week', created) as time_window, sum(comments_count)
                from posts
                where type = 1 and created between '2016-06-01' and '2020-07-19'
                group by time_window
                order by time_window
            """
}],
               title="Количество новых комментариев",
               x_label="Время",
               y_label="Новые комментарии за неделю")
Exemplo n.º 10
0
SqlPlot().show(
    sql_queries=[
        {
            'query': """
                with length_data as (
                    select
                           posts.id as post_id,
                           sum(coalesce(blocks.text_length, 0)) as text_length
                    from posts
                    join post_blocks blocks
                        on posts.id = blocks.post_id
                            and posts.type = 1
                    group by posts.id
                ), percentiles as (
                    select generate_series as value
                    from generate_series(0, 0.95, 0.01)
                )
                select
                    percentile_disc(percentiles.value) within group (order by text_length),
                    percentiles.value * 100
                from percentiles cross join length_data
                where text_length < 341961
                group by percentiles.value
            """
        }
    ],
    title="Распределение длины постов",
    x_label="Количество текстовых символов в посте",
    y_label="Количество постов, %"
)
post_id = 220958

SqlPlot().show(sql_queries=[{
    'query':
    f"""
                with data as (
                    select date_trunc('hour', request_time) as hours,
                           min(request_time)                as request_time,
                           min(hits)                        as hits,
                           min(rating)                      as rating
                    from post_history
                    group by hours
                    order by hours
                ),
                     delta_data as (
                         select request_time,
                                (hits - coalesce(lag(hits, 1) over (order by hours), 0))::float                      as d_hits,
                                (rating - coalesce(lag(rating, 1) over (order by hours), 0))::float                  as d_rating,
                                extract(epoch from request_time - lag(request_time, 1) over (order by hours)) / 3600 as d_time
                         from data
                     )
                select request_time,
                       case when d_hits != 0 then d_rating / d_hits / d_time else 0 end as rate
                from delta_data
                where request_time < '2020-10-03'
            """
}],
               title="Конверсия просмотров в рейтинг",
               x_label="Время",
               y_label="Коэффициент")
from src.common.sql_plot import SqlPlot

post_id = 220958

SqlPlot().show(sql_queries=[{
    'query':
    f"""
                select request_time, rating
                from post_history
                where post_id = {post_id}
                order by id
            """
}],
               title="Рейтинг",
               x_label="Время",
               y_label="Рейтинг")
from src.common.sql_plot import SqlPlot

SqlPlot().show(
    sql_queries=[
        {
            'query': """
                with all_ids as (
                    select generate_series as user_id
                    from generate_series(1, (select max(id) from users))
                )
                select
                       round(all_ids.user_id, -3) user_id_group,
                       count(user_errors.id)
                from all_ids
                left join user_errors
                    on all_ids.user_id = user_errors.user_id
                group by user_id_group
                order by user_id_group
            """, 'label': 'Ошибки'
        }
    ],
    title="Количество ошибок API",
    x_label="ID пользователя",
    y_label="Количество ошибок на 1000 пользователей"
)
from src.common.sql_plot import SqlPlot

post_id = 220958

SqlPlot().show(sql_queries=[{
    'query':
    f"""
                with data as (
                    select date_trunc('hour', request_time) as hours,
                           min(request_time)                as request_time,
                           min(hits)                        as hits
                    from post_history
                    group by hours
                    order by hours
                )
                select request_time,
                       coalesce((hits - coalesce(lag(hits, 1) over (order by hours), 0)) 
                            / (extract(epoch from request_time - lag(request_time, 1) over (order by hours)) / 3600), 0)
                from data
            """
}],
               title="Просмотры в час",
               x_label="Время",
               y_label="Просмотры в час")
SqlPlot().show(
    sql_queries=[
        {
            'query': """
                select date_trunc('week', created) as time_window, count(distinct posts.id)
                from posts
                join post_tags tags
                    on posts.id = tags.post_id
                        and posts.type = 1
                        and posts.created between '2016-06-01' and '2020-07-20'
                        and tags.value in ('#лонг', '#лонгрид', '#longread')
                group by time_window
                order by time_window
            """, 'label': "#лонг, #лонгрид, #longread"
        },
        {
            'query': """
                select date_trunc('week', created) as time_window, count(distinct posts.id)
                from posts
                join post_tags tags
                    on posts.id = tags.post_id
                        and posts.type = 1
                        and posts.created between '2016-06-01' and '2020-07-20'
                        and tags.value in ('#лонг', '#лонгрид', '#long', '#longread')
                group by time_window
                order by time_window
            """, 'label': "#long"
        }
    ],
    title="Новые лонги",
    x_label="Время",
    y_label="Новые лонги за неделю"
)
from src.common.sql_plot import SqlPlot

SqlPlot().show(sql_queries=[{
    'query': """
                select date_trunc('week', created) as time_window, count(*)
                from users
                group by time_window
            """,
    'label': 'Новые пользователи'
}],
               title="Количество новых пользователей за неделю",
               x_label="Время",
               y_label="Новые пользователи за неделю")
Exemplo n.º 17
0
SqlPlot().show(sql_queries=[
    {
        'query': """
                select date_trunc('week', created) as time_window, count(distinct posts.id)
                from posts
                join post_tags tags
                    on posts.id = tags.post_id
                        and posts.type = 1
                        and posts.created between '2019-02-01' and '2020-07-20'
                        and posts.is_editorial = true
                        and tags.value in ('#лонг', '#лонгрид', '#long', '#longread')
                group by time_window
                order by time_window
            """,
        'label': "Лонги (редакция)"
    },
    {
        'query': """
                select date_trunc('week', created) as time_window, count(distinct posts.id)
                from posts
                join post_tags tags
                    on posts.id = tags.post_id
                        and posts.type = 1
                        and posts.created between '2019-02-01' and '2020-07-20'
                        and posts.is_editorial = false
                        and tags.value in ('#лонг', '#лонгрид', '#long', '#longread')
                group by time_window
                order by time_window
            """,
        'label': "Лонги (UGC)"
    },
],
               title="Новые лонги",
               x_label="Время",
               y_label="Новые лонги за неделю")
from src.common.sql_plot import SqlPlot

SqlPlot().show(sql_queries=[{
    'query':
    """
                select date_trunc('day', estimated_creation_time) as time_window, count(*)
                from post_errors
                where status_code = 403
                    and estimated_creation_time between '2016-01-01 00:00:00.000000' and '2021-01-01 00:00:00.000000'
                group by time_window
                order by time_window
            """
}],
               title="Посты в скрытых подсайтах",
               x_label="Время",
               y_label="Количество ошибок 403 за день")
Exemplo n.º 19
0
from src.common.sql_plot import SqlPlot

post_id = 220958

SqlPlot().show(
    sql_queries=[
        {
            'query': f"""
                select request_time, favorites
                from post_history
                where post_id = {post_id}
                order by id
            """
        }
    ],
    title="Закладки",
    x_label="Время",
    y_label="Закладки"
)