Пример #1
0
def update_user_interests(date_from, date_to):
    active_users = readonly_session.query(UserActivity.user).filter(
        UserActivity.date_actioned.between(date_from, date_to)).subquery()
    activity_categories = readonly_session.query(
        UserActivity.user,
        Channel.category,
        func.count(func.distinct(Channel.id))
    ).outerjoin(
        VideoInstance,
        (UserActivity.object_type == 'video_instance') &
        (UserActivity.object_id == VideoInstance.id)
    ).filter(
        ((UserActivity.object_type == 'channel') & (UserActivity.object_id == Channel.id)) |
        (VideoInstance.channel == Channel.id)
    ).filter(
        UserActivity.user.in_(active_users),
        Channel.category != None
    ).group_by('1, 2').order_by('1, 3 desc')

    for user, categories in groupby(activity_categories, lambda x: x[0]):
        UserInterest.query.filter_by(user=user).delete()
        db.session.execute(UserInterest.__table__.insert(), [
            dict(user=user, explicit=False, category=category, weight=weight)
            for user, category, weight in categories
        ][:10])
Пример #2
0
    def index_old(self):
        from gviz_data_table import Table
        from rockpack.mainsite.services.user.models import User, UserActivity
        user_count = readonly_session.query(func.count(User.id)).\
            filter(User.refresh_token != '').scalar()
        header = ('user count', 'max lifetime', 'avg lifetime', 'stddev lifetime',
                  'max active days', 'avg active days', 'stddev active days')
        lifetime = func.date_part('days', func.max(UserActivity.date_actioned) -
                                  func.min(UserActivity.date_actioned)).label('lifetime')
        active_days = func.count(func.distinct(func.date(
            UserActivity.date_actioned))).label('active_days')
        activity = readonly_session.query(UserActivity.user, lifetime, active_days).\
            group_by(UserActivity.user)
        ctx = {}
        for key, having_expr in ('all', None), ('1day', lifetime > 1), ('7day', lifetime > 7):
            data = activity.having(having_expr).from_self(
                func.count('*'),
                func.max(lifetime),
                func.avg(lifetime),
                func.stddev_samp(lifetime),
                func.max(active_days),
                func.avg(active_days),
                func.stddev_samp(active_days)
            ).one()
            table = Table([
                dict(id='metric', type=str),
                dict(id='value', type=float),
                dict(id='%', type=str),
            ])
            pdata = ('%d%%' % (data[0] * 100 / user_count),) + ('',) * 6
            table.extend(zip(*(header, map(float, data), pdata)))
            ctx['ret_%s_data' % key] = table.encode()

        return self.render('admin/retention_stats_old.html', **ctx)
Пример #3
0
    def get_query(self, date_from, date_to):
        from rockpack.mainsite.services.video.models import Video

        dates = readonly_session.query(
            func.date(Video.date_added).distinct().label('date_added')
        ).filter(
            Video.date_added.between(date_from, date_to)
        ).subquery()

        counts = readonly_session.query(
            func.date(Video.date_added).label('date_added'),
            func.count().label('count')
        ).filter(
            Video.link_title.isnot(None)
        ).group_by(
            func.date(Video.date_added)
        ).subquery()

        query = readonly_session.query(
            dates.c.date_added,
            func.sum(counts.c.count)
        ).filter(
            counts.c.date_added < dates.c.date_added
        ).group_by(
            dates.c.date_added
        )

        return query
Пример #4
0
def get_users_categories(user_ids=None, start=None, stop=None):
    from rockpack.mainsite.services.video import models
    from rockpack.mainsite.services.user.models import User

    query = db.session.query(User, models.Channel, func.count(models.VideoInstance.id)).outerjoin(
        models.Channel,
        (models.Channel.owner == User.id) &
        (models.Channel.deleted == False) &
        (models.Channel.visible == True) &
        (models.Channel.public == True)
    ).outerjoin(
        models.VideoInstance,
        (models.VideoInstance.channel == models.Channel.id) &
        (models.VideoInstance.deleted == False)
    ).options(
        lazyload(models.Channel.category_rel),
        contains_eager(models.Channel.owner_rel)
    )

    if start:
        updated_channels = readonly_session.query(distinct(models.Channel.id))\
            .filter(models.Channel.date_updated.between(start, stop))

        updated_instances = readonly_session.query(distinct(models.VideoInstance.channel))\
            .filter(models.VideoInstance.date_updated.between(start, stop))

        updated_users = readonly_session.query(distinct(models.Channel.id))\
            .join(User, User.id == models.Channel.owner)\
            .filter(User.date_updated.between(start, stop))

        unioned = updated_channels.union_all(updated_instances, updated_users).subquery()
        query = query.filter(models.Channel.id.in_(unioned))

    query = query.group_by(User.id, models.Channel.id).order_by(User.id)

    if user_ids:
        query = query.filter(User.id.in_(user_ids))

    category_map = {}

    for user, channel, video_count in query:
        if channel and channel.category and condition_for_category(user, channel, video_count):
            category_map.setdefault(user, []).append(channel.category)
        else:
            category_map.setdefault(user, [])

    app.logger.info('User category map size: %d', len(category_map))

    return category_map
Пример #5
0
    def index(self):
        from rockpack.mainsite.services.video.models import Category
        ctx = {}

        parent = aliased(Category)
        cat_group = readonly_session.query(
            Category.name,  # order is important
            parent.name,
            Category.id
        ).filter(
            Category.parent == parent.id
        ).group_by(
            parent.name,
            Category.name,
            Category.id
        ).order_by(parent.name, Category.name)

        cat_map = []

        for (child_name, parent_name, child_id) in cat_group:
            users = search.UserSearch()
            users.add_term('category', child_id)
            users.set_paging(1, 0)
            users.users()
            cat_map.append(
                (parent_name,
                 child_name,
                 users.total,
                 '/admin/stats/usercategories/%s/' % child_id))

        ctx['cat_map'] = cat_map

        return self.render('admin/user_cat_stats.html', **ctx)
Пример #6
0
    def index(self):
        from gviz_data_table import Table
        from rockpack.mainsite.services.user.models import User, UserActivity, UserAccountEvent
        if request.args.get('activity') == 'activity':
            activity_model, activity_date = UserActivity, UserActivity.date_actioned
        else:
            activity_model, activity_date = UserAccountEvent, UserAccountEvent.event_date

        try:
            interval_count = int(request.args['interval_count'])
        except Exception:
            interval_count = 10

        interval = request.args.get('interval')
        if interval not in ('week', 'month'):
            interval = 'week'

        cohort = func.date_part(interval, User.date_joined)
        cohort_label = func.min(func.date(User.date_joined))
        active_interval = (func.date_part(interval, activity_date) - cohort).label('active_interval')

        q = readonly_session.query(User).filter(
            User.date_joined > LAUNCHDATE, User.refresh_token != '')
        if request.args.get('gender') in ('m', 'f'):
            q = q.filter(User.gender == request.args['gender'])
        if request.args.get('locale') in app.config['ENABLED_LOCALES']:
            q = q.filter(User.locale == request.args['locale'])
        if request.args.get('age') in ('13-18', '18-25', '25-35', '35-45', '45-55'):
            age1, age2 = map(int, request.args['age'].split('-'))
            q = q.filter(between(
                func.age(User.date_of_birth),
                text("interval '%d years'" % age1),
                text("interval '%d years'" % age2)
            ))

        active_users = dict(
            ((c, int(w)), u) for c, w, u in
            q.join(
                activity_model,
                (activity_model.user == User.id) &
                (activity_date >= User.date_joined)
            ).group_by(cohort, active_interval).values(
                cohort, active_interval, func.count(func.distinct(activity_model.user))
            )
        )

        table = Table(
            [dict(id='cohort', type=date)] +
            [dict(id='%s%d' % (interval, i), type=str) for i in range(interval_count)]
        )

        totals = q.group_by(cohort).order_by(cohort)
        for c, l, t in totals.values(cohort, cohort_label, func.count()):
            data = []
            for i in range(interval_count):
                a = active_users.get((c, i), '')
                data.append(a and '%d%% (%d)' % (ceil(a * 100.0 / t), a))
            table.append([l] + data)

        return self.render('admin/retention_stats.html', data=table.encode())
Пример #7
0
def create_new_repack_notifications(date_from=None, date_to=None, user_notifications=None):
    packer_channel = aliased(Channel, name="source_channel")
    packer_user = aliased(User, name="packer_user")
    repacker_channel = aliased(Channel, name="repacker_channel")
    repacker_user = aliased(User, name="repacker_user")

    activity_window = readonly_session.query(VideoInstance, packer_channel, repacker_channel, repacker_user).join(
        packer_channel,
        packer_channel.id == VideoInstance.source_channel
    ).join(
        packer_user,
        packer_user.id == packer_channel.owner
    ).join(
        repacker_channel,
        (repacker_channel.id == VideoInstance.channel) &
        (repacker_channel.favourite == False) &
        (repacker_channel.public == True)
    ).join(
        repacker_user,
        repacker_user.id == repacker_channel.owner
    )
    if date_from:
        activity_window = activity_window.filter(VideoInstance.date_added >= date_from)
    if date_to:
        activity_window = activity_window.filter(VideoInstance.date_added < date_to)

    for video_instance, packer_channel, repacker_channel, repacker in activity_window:

        user, type, body = repack_message(repacker, repacker_channel, video_instance)

        _add_user_notification(packer_channel.owner, video_instance.date_added, type, body)
        if user_notifications is not None and type in app.config['PUSH_NOTIFICATION_MAP']:
            user_notifications.setdefault(packer_channel.owner, None)
Пример #8
0
    def users(self, cat_id):
        ctx = {}

        page_size = 50
        users = search.UserSearch()
        users.add_term('category', cat_id)
        users.set_paging(int(request.args.get('p', 0)) * page_size, page_size - 1)
        result = users.users()

        from rockpack.mainsite.services.video.models import Category

        parent = aliased(Category)
        query = readonly_session.query(
            Category.name,  # order is important
            parent.name,
            Category.id
        ).filter(
            Category.parent == parent.id,
            Category.id == cat_id
        )

        ctx['next_page'] = int(request.args.get('p', 1)) + 1
        ctx['category'] = query.one()
        ctx['single_cat'] = True
        ctx['users'] = result
        return self.render('admin/user_cat_stats.html', **ctx)
Пример #9
0
    def import_average_category(self, channel_ids=None, start=None, stop=None, automatic_flush=True):
        from rockpack.mainsite.services.video.models import VideoInstance, Channel

        query = readonly_session.query(VideoInstance.category, Channel.id).join(Channel, Channel.id == VideoInstance.channel).order_by(Channel.id)

        if channel_ids:
            query = query.filter(Channel.id.in_(channel_ids))

        if start:
            query = query.filter(Channel.date_updated.between(start, stop))

        category_map = {}
        for instance_cat, channel_id in query:
            channel_cat_counts = category_map.setdefault(channel_id, {})
            current_count = channel_cat_counts.setdefault(instance_cat, 0)
            channel_cat_counts[instance_cat] = current_count + 1

        app.logger.info('Channel category map size: %d', len(category_map))

        ec = ESChannel.updater(bulk=True)
        for channel_id, c_map in category_map.iteritems():
            ec.set_document_id(channel_id)
            ec.add_field('category', main_category(c_map))
            ec.update()
            ec.reset()

        if automatic_flush:
            self.conn.flush_bulk(forced=True)
Пример #10
0
    def get_query(self, date_from, date_to):
        from rockpack.mainsite.services.video.models import Channel, VideoInstance
        from rockpack.mainsite.services.user.models import User, UserActivity
        from rockpack.mainsite.services.share.models import ShareLink

        activity = readonly_session.query(
            func.coalesce(VideoInstance.channel, UserActivity.object_id).label('channel'),
            UserActivity.action.label('action'),
            literal(0).label('click_count')
        ).select_from(
            UserActivity
        ).outerjoin(
            VideoInstance, VideoInstance.id == UserActivity.object_id
        ).filter(
            UserActivity.date_actioned.between(date_from, date_to)
        )
        shares = readonly_session.query(
            func.coalesce(VideoInstance.channel, ShareLink.object_id).label('channel'),
            literal('share').label('action'),
            ShareLink.click_count.label('click_count')
        ).select_from(
            ShareLink
        ).outerjoin(
            VideoInstance, VideoInstance.id == ShareLink.object_id
        ).filter(
            ShareLink.date_created.between(date_from, date_to)
        )
        activity = activity.union_all(shares).subquery()

        query = readonly_session.query(
            *self._cols(Channel, User, activity)
        ).select_from(
            Channel
        ).join(
            activity, activity.c.channel == Channel.id
        ).join(
            User, User.id == Channel.owner
        )

        return self._agg_query(query, Channel, User, activity)
Пример #11
0
    def get_query(self, date_from, date_to):
        from rockpack.mainsite.services.video.models import Video, VideoInstance
        from rockpack.mainsite.services.user.models import UserActivity
        from rockpack.mainsite.services.share.models import ShareLink

        activity = readonly_session.query(
            UserActivity.object_id.label('video_instance'),
            UserActivity.action.label('action'),
            literal(0).label('click_count')
        ).filter(
            UserActivity.object_type == 'video_instance',
            UserActivity.date_actioned.between(date_from, date_to)
        )
        shares = readonly_session.query(
            ShareLink.object_id.label('video_instance'),
            literal('share').label('action'),
            ShareLink.click_count.label('click_count')
        ).filter(
            ShareLink.object_type == 'video_instance',
            ShareLink.date_created.between(date_from, date_to)
        )
        activity = activity.union_all(shares).subquery()

        query = readonly_session.query(
            Video.id,
            Video.title,
            func.count(activity.c.video_instance),
            func.count(VideoInstance.id.distinct()),
            *([func.sum(func.cast(activity.c.action == c, Integer)) for c in self.counts[2:-1]] +
              [func.sum(activity.c.click_count)])
        ).join(
            VideoInstance, VideoInstance.video == Video.id
        ).join(
            activity, activity.c.video_instance == VideoInstance.id
        ).group_by(
            Video.id
        )

        return query
Пример #12
0
 def index(self):
     from gviz_data_table import Table
     from rockpack.mainsite.services.user.models import User
     from .models import AppDownloadRecord
     ctx = {}
     for action in 'download', 'update':
         table = Table((dict(id='date', type=date), dict(id='count', type=long)))
         table.extend(
             readonly_session.query(AppDownloadRecord).filter_by(action=action).
             group_by('1').order_by('1').
             values(AppDownloadRecord.date, func.sum(AppDownloadRecord.count))
         )
         ctx['%s_data' % action] = table.encode()
     table = Table([dict(id='date', type=date)] +
                   [dict(id=i, type=long) for i in 'Total', 'US', 'UK', 'Other'])
Пример #13
0
def create_unavailable_notifications(date_from=None, date_to=None, user_notifications=None):
    activity_window = readonly_session.query(VideoInstance, Video, Channel).join(
        Video,
        Video.id == VideoInstance.video
    ).join(
        Channel,
        Channel.id == VideoInstance.channel
    ).options(
        joinedload(Channel.owner_rel)
    ).filter(
        Video.visible == False
    )
    if date_from:
        activity_window = activity_window.filter(Video.date_updated >= date_from)
    if date_to:
        activity_window = activity_window.filter(Video.date_updated < date_to)

    for video_instance, video, channel in activity_window:
        user, message_type, message = unavailable_video_message(channel, video_instance)
        _add_user_notification(user, video.date_updated, message_type, message)
        if user_notifications is not None and message_type in app.config['PUSH_NOTIFICATION_MAP']:
            user_notifications.setdefault(user, None)
Пример #14
0
def _post_activity_to_recommender(date_from, date_to):
    weights = (
        (UserActivity.action == 'subscribe', 10),
        (UserActivity.action == 'star', 5),
        (UserActivity.action == 'select', 5),
        (UserActivity.action == 'unsubscribe', -5),
    )
    activity = readonly_session.query(
        UserActivity.user,
        Channel.id,
        case(weights, else_=1)
    ).outerjoin(
        VideoInstance,
        (UserActivity.object_type == 'video_instance') &
        (UserActivity.object_id == VideoInstance.id)
    ).filter(
        ((UserActivity.object_type == 'channel') & (UserActivity.object_id == Channel.id)) |
        (VideoInstance.channel == Channel.id)
    )
    if date_from:
        activity = activity.filter(UserActivity.date_actioned.between(date_from, date_to))
    from rockpack.mainsite.core import recommender
    recommender.load_activity(activity.yield_per(1000))
Пример #15
0
def set_most_influential_for_video(video_ids):
    child = aliased(VideoInstance, name="child")
    query = (
        readonly_session.query(
            VideoInstance.id, VideoInstance.video, child.source_channel, func.count(VideoInstance.id)
        )
        .outerjoin(child, (VideoInstance.video == child.video) & (VideoInstance.channel == child.source_channel))
        .join(Video, (Video.id == VideoInstance.video) & (Video.visible == True) & (VideoInstance.deleted == False))
        .join(Channel, (Channel.id == VideoInstance.channel) & (Channel.public == True))
    )

    query = query.filter(Video.id.in_(video_ids))

    query = query.group_by(VideoInstance.id, VideoInstance.video, child.source_channel)

    instance_counts = {}
    influential_index = {}

    for _id, video, source_channel, count in query.yield_per(6000):
        # Set the count for the video instance
        instance_counts[(_id, video)] = count
        # If the count is higher for the same video that
        # the previous instance, mark this instance as the
        # influential one for this video
        i_id, i_count = influential_index.get(video, [None, 0])

        # Count will always be at least 1
        # but should really be zero if no children
        if not source_channel and count == 1:
            count = 0
        if (count > i_count) or (count == i_count) and not source_channel:
            influential_index.update({video: (_id, count)})

    for (_id, video), count in instance_counts.iteritems():
        if influential_index.get(video, "")[0] == _id:
            VideoInstance.query.filter(VideoInstance.id == _id).update({"most_influential": True})
            break
Пример #16
0
        for action in 'download', 'update':
            table = Table((dict(id='date', type=date), dict(id='count', type=long)))
            table.extend(
                readonly_session.query(AppDownloadRecord).filter_by(action=action).
                group_by('1').order_by('1').
                values(AppDownloadRecord.date, func.sum(AppDownloadRecord.count))
            )
            ctx['%s_data' % action] = table.encode()
        table = Table([dict(id='date', type=date)] +
                      [dict(id=i, type=long) for i in 'Total', 'US', 'UK', 'Other'])
        table.extend(
            readonly_session.query(User).filter(
                User.date_joined > LAUNCHDATE, User.refresh_token != None).
            group_by('1').order_by('1').
            values(
                func.date(User.date_joined),
                func.count('*'),
                func.sum(func.cast(User.locale == 'en-us', Integer)),
                func.sum(func.cast(User.locale == 'en-gb', Integer)),
                func.sum(func.cast(User.locale.notin_(('en-us', 'en-gb')), Integer)),
            )
        )
        ctx['reg_data'] = table.encode()
        return self.render('admin/app_stats.html', **ctx)


class ActivityStatsView(StatsView):
    @expose('/')
    def index(self):
        from gviz_data_table import Table
        from rockpack.mainsite.services.user.models import UserActivity
        table = Table([dict(id='date', type=date)] +
Пример #17
0
def load_users_into_mailchimp():
    """Load records from user table to mailchimp list."""
    from mailchimp import Mailchimp
    excluded_users = UserFlag.query.filter(
        UserFlag.flag.in_(('bouncing', 'unsub3'))).with_entities(UserFlag.user)
    users = readonly_session.query(
        User.id,
        User.username,
        User.email,
        User.first_name,
        User.last_name,
        User.gender,
        User.locale,
        User.date_joined,
        ParentCategory.name.label('interest_name'),
        func.sum(UserInterest.weight).label('interest_weight'),
    ).filter(
        User.is_active == True,
        User.email != '',
        User.id.notin_(excluded_users),
    ).outerjoin(
        UserInterest, UserInterest.user == User.id
    ).outerjoin(
        Category, Category.id == UserInterest.category
    ).outerjoin(
        ParentCategory, ParentCategory.id == Category.parent
    ).group_by(
        User.id, ParentCategory.id
    )
    # TODO: chunking
    batch = []
    for userid, group in groupby(users.order_by(User.id), lambda u: u[0]):
        usergroup = list(group)
        user = usergroup[0]
        merge_vars = dict(
            fname=user.first_name,
            lname=user.last_name,
            username=user.username,
            gender={'m': 'Male', 'f': 'Female'}.get(user.gender, 'Unknown'),
            locale=user.locale,
            datejoined=datetime.strftime(user.date_joined, '%m/%d/%Y'),
        )
        interests = [u.interest_name for u in usergroup if u.interest_name]
        if interests:
            merge_vars['groupings'] = [dict(name='Interest', groups=interests)]
        batch.append(dict(
            email=dict(email=user.email),
            email_type='html',
            merge_vars=merge_vars,
        ))
    conn = Mailchimp(app.config['MAILCHIMP_TOKEN'])
    response = conn.lists.batch_subscribe(
        app.config['MAILCHIMP_LISTID'],
        batch,
        double_optin=False,
        update_existing=True,
        replace_interests=True)
    if response['error_count']:
        app.logger.error('Error loading users into mailchimp: %s', response['errors'][0]['error'])
    else:
        app.logger.info('Loaded users into mailchimp: %d added, %d updated', response['add_count'], response['update_count'])
Пример #18
0
    def import_channel_share(self, automatic_flush=True):
        from rockpack.mainsite.services.share.models import ShareLink
        from rockpack.mainsite.services.user.models import UserActivity, User
        from rockpack.mainsite.services.video.models import VideoInstance, Channel

        total = 0
        missing = 0
        start_time = time.time()

        def _normalised(val, max_val, min_val):
            try:
                return (val - min_val) / (abs(max_val) - abs(min_val))
            except (ZeroDivisionError, decimal.DivisionByZero, decimal.InvalidOperation):
                return 0

        def _update_channel_id(id, val, max_val, min_val):
            channel_dict[id] = channel_dict.setdefault(id, 0) + _normalised(val, max_val, min_val)

        # The strength of actions decay until any older than zulu have no effect
        zulu = datetime.now() - timedelta(days=app.config.get('CHANNEL_RANK_ZULU', 1))
        time_since_zulu = (datetime.utcnow() - zulu).total_seconds()

        for locale in ['en-gb', 'en-us']:
            app.logger.debug('starting for %s', locale)
            channel_dict = {}
            channel_shares = {}

            summation = func.sum(
                (time_since_zulu - (func.extract('epoch', datetime.utcnow()) - func.extract('epoch', UserActivity.date_actioned))) / time_since_zulu
            )

            # activity for channels from videos
            query = readonly_session.query(
                distinct(Channel.id).label('channel_id'),
                summation.label('summed')
            ).join(
                VideoInstance, VideoInstance.channel == Channel.id
            ).join(
                UserActivity, UserActivity.object_id == VideoInstance.id
            ).join(
                User, User.id == UserActivity.user
            ).filter(
                UserActivity.action == 'star',
                UserActivity.object_type == 'video_instance',
                UserActivity.date_actioned > zulu,
                User.locale == locale
            ).group_by(Channel.id)

            summed = query.subquery().columns.summed
            q_max, q_min = UserActivity.query.session.query(func.max(summed), func.min(summed)).one()

            for id, count in query.yield_per(6000):
                channel_dict.setdefault(id, {})
                channel_dict[id]['user_activity'] = [count, _normalised(count, q_max, q_min)]
                channel_dict[id]['norm_user_activity'] = _normalised(count, q_max, q_min)

            app.logger.debug('user activity done')

            summation = func.sum(
                (time_since_zulu - (func.extract('epoch', datetime.utcnow()) - func.extract('epoch', ShareLink.date_created))) / time_since_zulu
            )

            # activity for channel shares
            query = readonly_session.query(
                distinct(Channel.id).label('channel_id'),
                summation.label('summed')
            ).join(
                ShareLink,
                ShareLink.object_id == Channel.id
            ).join(
                User, User.id == ShareLink.user
            ).filter(
                Channel.deleted == False,
                Channel.public == True,
                ShareLink.object_type == 'channel',
                ShareLink.date_created > zulu,
                ShareLink.click_count > 0,
                User.locale == locale
            ).group_by(Channel.id)

            summed = query.subquery().columns.summed

            q_max, q_min = ShareLink.query.session.query(func.max(summed), func.min(summed)).one()
            channel_share_vals = (q_max, q_min)

            for id, count in query.yield_per(6000):
                channel_dict.setdefault(id, {})
                channel_shares[id] = count
                channel_dict[id]['share_link_channel'] = [count, _normalised(count, q_max, q_min)]

            app.logger.debug('channel shares done')
            # activity for videos shares of channels
            query = readonly_session.query(
                distinct(Channel.id).label('channel_id'),
                summation.label('summed')
            ).join(
                VideoInstance,
                VideoInstance.channel == Channel.id
            ).join(
                ShareLink,
                ShareLink.object_id == VideoInstance.id
            ).join(
                User, User.id == ShareLink.user
            ).filter(
                Channel.deleted == False,
                Channel.public == True,
                ShareLink.object_type == 'video_instance',
                ShareLink.date_created > zulu,
                ShareLink.click_count > 0,
                User.locale == locale
            ).group_by(Channel.id)

            summed = query.subquery().columns.summed

            q_max, q_min = ShareLink.query.session.query(func.max(summed), func.min(summed)).one()

            for id, count in query.yield_per(6000):
                channel_dict.setdefault(id, {})
                channel_dict[id]['share_link_video'] = [count, _normalised(count, q_max, q_min)]
                val = channel_shares.get(id, 0)
                # We may get None returned in the data
                if None in channel_share_vals:
                    channel_share_vals = [0, 0]
                channel_dict[id]['norm_share_link_channel'] = channel_dict[id].setdefault('norm_share_link_channel', 0) + _normalised(count + val, q_max + channel_share_vals[0], q_min + channel_share_vals[1])

            app.logger.debug('video shares done')

            app.logger.debug('... updating elasticsearch for %s ...', locale)

            done = 1
            i_total = len(channel_dict)
            ec = ESChannel.updater(bulk=True)
            for id, _dict in channel_dict.iteritems():
                try:
                    count = 0
                    for k, v in _dict.iteritems():
                        if k.startswith('norm'):
                            count += v

                    if count == 0:
                        continue

                    ec.set_document_id(id)
                    ec.add_field('normalised_rank[\'%s\']' % locale, float(count))
                    ec.update()

                except exceptions.DocumentMissingException:
                    missing += 1
                finally:
                    ec.reset()
                total += 1
                if app.logger.isEnabledFor(logging.DEBUG):
                    self.print_percent_complete(done, i_total)
                done += 1

            if automatic_flush:
                ESChannel.flush()

        app.logger.debug('%s total updates in two passes. finished in %s seconds (%s channels not in es)', total, time.time() - start_time, missing)
Пример #19
0
    def import_dolly_repin_counts(self, prefix=None, start=None, stop=None, automatic_flush=True):
        from rockpack.mainsite.services.video.models import VideoInstance, Video

        with app.test_request_context():
            child = aliased(VideoInstance, name='child')
            query = readonly_session.query(
                VideoInstance.id,
                VideoInstance.video,
                child.source_channel,
                func.count(VideoInstance.id)
            ).outerjoin(
                child,
                (VideoInstance.video == child.video) &
                (VideoInstance.channel == child.source_channel)
            ).join(
                Video,
                (Video.id == VideoInstance.video) &
                (Video.visible == True) &
                (VideoInstance.deleted == False))

            if prefix:
                query = query.filter(VideoInstance.id.like(prefix.replace('_', '\\_') + '%'))

            if start:
                video_ids = VideoInstance.query.filter(
                    VideoInstance.date_updated.between(start, stop)
                ).with_entities(VideoInstance.video).subquery()

                query = query.filter(Video.id.in_(video_ids))

            query = query.group_by(VideoInstance.id, VideoInstance.video, child.source_channel)

            instance_counts = {}
            influential_index = {}

            total = query.count()
            done = 1

            for _id, video, source_channel, count in query.yield_per(6000):
                # Set the count for the video instance
                instance_counts[(_id, video)] = count
                # If the count is higher for the same video that
                # the previous instance, mark this instance as the
                # influential one for this video
                i_id, i_count = influential_index.get(video, [None, 0])

                # Count will always be at least 1
                # but should really be zero if no children
                if not source_channel and count == 1:
                    count = 0
                if (count > i_count) or\
                        (count == i_count) and not source_channel:
                    influential_index.update({video: (_id, count,)})

                if app.logger.isEnabledFor(logging.DEBUG):
                    self.print_percent_complete(done, total)
                done += 1

            total = len(instance_counts)
            done = 1

            ec = ESVideo.updater(bulk=True)
            for (_id, video), count in instance_counts.iteritems():
                ec.set_document_id(_id)
                ec.add_field('child_instance_count', count)
                ec.add_field('most_influential', True if influential_index.get(video, '')[0] == _id else False)
                ec.update()
                ec.reset()

                if app.logger.isEnabledFor(logging.DEBUG):
                    self.print_percent_complete(done, total)
                done += 1

            if automatic_flush:
                ESVideo.flush()
Пример #20
0
    def index(self):
        from rockpack.mainsite.services.video import models
        channels = readonly_session.query(models.Channel)

        public = channels.join(models.ChannelLocaleMeta).filter(
            models.ChannelLocaleMeta.visible == True, models.Channel.public == True)

        parent = aliased(models.Category)

        cat_group = readonly_session.query(
            models.Category.name,   # order is important
            parent.name,
            func.count(models.VideoInstance.id)
        ).outerjoin(models.VideoInstance).filter(
            models.Category.parent == parent.id
        ).group_by(
            parent.name,
            models.Category.name
        ).order_by(parent.name, models.Category.name)

        cat_count = cat_group.count()

        kwargs = dict(now=datetime.now().strftime('%Y-%m-%d'),
                      cat_group=cat_group.all(),
                      cat_count=cat_count,
                      is_dolly=app.config.get('DOLLY'))

        if not app.config.get('DOLLY'):
            channel_group = readonly_session.query(
                models.ChannelLocaleMeta.locale,
                parent.name,
                models.Category.name,
                func.count(models.ChannelLocaleMeta.id)
            ).filter(
                models.Category.parent == parent.id,
                models.Channel.category == models.Category.id
            ).filter(
                models.Category.parent != 1
            ).join(
                models.Channel, models.Channel.id == models.ChannelLocaleMeta.channel
            ).filter(
                models.Channel.public == True
            ).group_by(
                models.Category.parent_category,
                models.Category.name,
                parent.name,
                models.ChannelLocaleMeta.locale
            ).order_by(parent.name.desc())
            channel_count = channel_group.count()

            channels_today = channels.filter(
                models.Channel.date_added >= datetime.now().strftime('%Y-%m-%d')).count()

            kwargs.update(dict(
                total_channels=channels.count(),
                total_channels_today=channels_today,
                public_channels=public.count(),
                channels_today=channels_today,
                channel_group=channel_group.all(),
                channel_count=channel_count))

        return self.render(
            'admin/stats.html',
            **kwargs
        )