Пример #1
0
    def import_average_category(self, channel_ids=None, start=None, stop=None, automatic_flush=True):
        from rockpack.mainsite.services.video.models import VideoInstance, Channel

        query = readonly_session.query(VideoInstance.category, Channel.id).join(Channel, Channel.id == VideoInstance.channel).order_by(Channel.id)

        if channel_ids:
            query = query.filter(Channel.id.in_(channel_ids))

        if start:
            query = query.filter(Channel.date_updated.between(start, stop))

        category_map = {}
        for instance_cat, channel_id in query:
            channel_cat_counts = category_map.setdefault(channel_id, {})
            current_count = channel_cat_counts.setdefault(instance_cat, 0)
            channel_cat_counts[instance_cat] = current_count + 1

        app.logger.info('Channel category map size: %d', len(category_map))

        ec = ESChannel.updater(bulk=True)
        for channel_id, c_map in category_map.iteritems():
            ec.set_document_id(channel_id)
            ec.add_field('category', main_category(c_map))
            ec.update()
            ec.reset()

        if automatic_flush:
            self.conn.flush_bulk(forced=True)
Пример #2
0
    def import_channels(self, start=None, stop=None, automatic_flush=True):
        from rockpack.mainsite.services.video.models import Channel, VideoInstance, Video

        with app.test_request_context():
            channels = Channel.query.filter(
                Channel.public == True,
                Channel.visible == True,
                Channel.deleted == False
            ).options(
                joinedload(Channel.category_rel),
                joinedload(Channel.metas),
                joinedload(Channel.owner_rel)
            )

            if start:
                channels = channels.filter(Channel.date_updated.between(start, stop))

            total = channels.count()

            app.logger.info('importing %d channels', total)

            start_time = time.time()
            ec = ESChannel.inserter(bulk=True)
            count = 1

            query = VideoInstance.query.join(
                Video,
                (Video.id == VideoInstance.video) &
                (Video.visible == True)
            ).filter(VideoInstance.deleted == False).group_by(VideoInstance.channel)

            if start:
                # Restrict the counts selected to the channels we want
                query = query.join(
                    Channel,
                    (Channel.id == VideoInstance.channel) &
                    (Channel.date_updated.between(start, stop))
                )

            query = query.values(VideoInstance.channel, func.count(VideoInstance.id))

            video_counts = dict(query)

            for channel in channels.yield_per(6000):
                channel._video_count = video_counts.get(channel.id) or 0
                ec.insert(channel.id, channel)
                if app.logger.isEnabledFor(logging.DEBUG):
                    self.print_percent_complete(count, total)
                count += 1

            if automatic_flush:
                ec.flush_bulk()

            app.logger.debug('finished in %d seconds', time.time() - start_time)
Пример #3
0
    def import_channel_share(self, automatic_flush=True):
        from rockpack.mainsite.services.share.models import ShareLink
        from rockpack.mainsite.services.user.models import UserActivity, User
        from rockpack.mainsite.services.video.models import VideoInstance, Channel

        total = 0
        missing = 0
        start_time = time.time()

        def _normalised(val, max_val, min_val):
            try:
                return (val - min_val) / (abs(max_val) - abs(min_val))
            except (ZeroDivisionError, decimal.DivisionByZero, decimal.InvalidOperation):
                return 0

        def _update_channel_id(id, val, max_val, min_val):
            channel_dict[id] = channel_dict.setdefault(id, 0) + _normalised(val, max_val, min_val)

        # The strength of actions decay until any older than zulu have no effect
        zulu = datetime.now() - timedelta(days=app.config.get('CHANNEL_RANK_ZULU', 1))
        time_since_zulu = (datetime.utcnow() - zulu).total_seconds()

        for locale in ['en-gb', 'en-us']:
            app.logger.debug('starting for %s', locale)
            channel_dict = {}
            channel_shares = {}

            summation = func.sum(
                (time_since_zulu - (func.extract('epoch', datetime.utcnow()) - func.extract('epoch', UserActivity.date_actioned))) / time_since_zulu
            )

            # activity for channels from videos
            query = readonly_session.query(
                distinct(Channel.id).label('channel_id'),
                summation.label('summed')
            ).join(
                VideoInstance, VideoInstance.channel == Channel.id
            ).join(
                UserActivity, UserActivity.object_id == VideoInstance.id
            ).join(
                User, User.id == UserActivity.user
            ).filter(
                UserActivity.action == 'star',
                UserActivity.object_type == 'video_instance',
                UserActivity.date_actioned > zulu,
                User.locale == locale
            ).group_by(Channel.id)

            summed = query.subquery().columns.summed
            q_max, q_min = UserActivity.query.session.query(func.max(summed), func.min(summed)).one()

            for id, count in query.yield_per(6000):
                channel_dict.setdefault(id, {})
                channel_dict[id]['user_activity'] = [count, _normalised(count, q_max, q_min)]
                channel_dict[id]['norm_user_activity'] = _normalised(count, q_max, q_min)

            app.logger.debug('user activity done')

            summation = func.sum(
                (time_since_zulu - (func.extract('epoch', datetime.utcnow()) - func.extract('epoch', ShareLink.date_created))) / time_since_zulu
            )

            # activity for channel shares
            query = readonly_session.query(
                distinct(Channel.id).label('channel_id'),
                summation.label('summed')
            ).join(
                ShareLink,
                ShareLink.object_id == Channel.id
            ).join(
                User, User.id == ShareLink.user
            ).filter(
                Channel.deleted == False,
                Channel.public == True,
                ShareLink.object_type == 'channel',
                ShareLink.date_created > zulu,
                ShareLink.click_count > 0,
                User.locale == locale
            ).group_by(Channel.id)

            summed = query.subquery().columns.summed

            q_max, q_min = ShareLink.query.session.query(func.max(summed), func.min(summed)).one()
            channel_share_vals = (q_max, q_min)

            for id, count in query.yield_per(6000):
                channel_dict.setdefault(id, {})
                channel_shares[id] = count
                channel_dict[id]['share_link_channel'] = [count, _normalised(count, q_max, q_min)]

            app.logger.debug('channel shares done')
            # activity for videos shares of channels
            query = readonly_session.query(
                distinct(Channel.id).label('channel_id'),
                summation.label('summed')
            ).join(
                VideoInstance,
                VideoInstance.channel == Channel.id
            ).join(
                ShareLink,
                ShareLink.object_id == VideoInstance.id
            ).join(
                User, User.id == ShareLink.user
            ).filter(
                Channel.deleted == False,
                Channel.public == True,
                ShareLink.object_type == 'video_instance',
                ShareLink.date_created > zulu,
                ShareLink.click_count > 0,
                User.locale == locale
            ).group_by(Channel.id)

            summed = query.subquery().columns.summed

            q_max, q_min = ShareLink.query.session.query(func.max(summed), func.min(summed)).one()

            for id, count in query.yield_per(6000):
                channel_dict.setdefault(id, {})
                channel_dict[id]['share_link_video'] = [count, _normalised(count, q_max, q_min)]
                val = channel_shares.get(id, 0)
                # We may get None returned in the data
                if None in channel_share_vals:
                    channel_share_vals = [0, 0]
                channel_dict[id]['norm_share_link_channel'] = channel_dict[id].setdefault('norm_share_link_channel', 0) + _normalised(count + val, q_max + channel_share_vals[0], q_min + channel_share_vals[1])

            app.logger.debug('video shares done')

            app.logger.debug('... updating elasticsearch for %s ...', locale)

            done = 1
            i_total = len(channel_dict)
            ec = ESChannel.updater(bulk=True)
            for id, _dict in channel_dict.iteritems():
                try:
                    count = 0
                    for k, v in _dict.iteritems():
                        if k.startswith('norm'):
                            count += v

                    if count == 0:
                        continue

                    ec.set_document_id(id)
                    ec.add_field('normalised_rank[\'%s\']' % locale, float(count))
                    ec.update()

                except exceptions.DocumentMissingException:
                    missing += 1
                finally:
                    ec.reset()
                total += 1
                if app.logger.isEnabledFor(logging.DEBUG):
                    self.print_percent_complete(done, i_total)
                done += 1

            if automatic_flush:
                ESChannel.flush()

        app.logger.debug('%s total updates in two passes. finished in %s seconds (%s channels not in es)', total, time.time() - start_time, missing)