Пример #1
0
 def __init__(self,
              channel=None,
              from_ts=None,
              to_ts=None,
              agents=None,
              statuses=None,
              languages=None,
              group_by='agent',
              plot_by='time',
              plot_type=None,
              no_transform=False,
              **kwargs):
     self.channel = channel
     self.from_ts = Timeslot(from_ts).timeslot
     self.to_ts = Timeslot(to_ts or from_ts).timeslot
     self.agents = self.ensure_agents(agents, group_by, channel)
     self.statuses = self.ensure_statuses(statuses)
     self.languages = map(get_lang_id, languages or [])
     if group_by not in (
             'time', 'agent',
             'lang'):  # Just in case we get parameter from UI, ignore it
         group_by = None
     self.group_by = group_by
     self.plot_by = plot_by
     self.plot_type = plot_type
     self.no_transform = no_transform
Пример #2
0
def __get_conversations(data):
    "preparing query params and performing first bulk query to get the conversations"
    assert data.get('level') in ("hour", "day"), data.get('level')
    channel_ts_key = "channel_ts_day" if data.get('level') == "day" else "channel_ts_hour"    
    channel_ts_lower_bound = make_channel_ts(
        data.get('channel_id'), 
        Timeslot(data.get('from'), data.get('level')))
    channel_ts_upper_bound = make_channel_ts(
        data.get('channel_id'), 
        Timeslot(data.get('to'), data.get('level')))
    query = {
        channel_ts_key+"__lte": to_binary(channel_ts_upper_bound),
        channel_ts_key+"__gte": to_binary(channel_ts_lower_bound),
        "is_closed": True}
    if data.get('categories'):
        categories_param = []
        for cat in data.get('categories'):
            if isinstance(cat, int):
                categories_param.append(cat)
            elif isinstance(cat, (str, unicode)):
                categories_param.append(ConversationQualityTrends.get_category_code(cat))
            else:
                raise Exception("Wrong type for category param; value: %s, type: %s", cat, type(cat))
        query["quality__in"] = categories_param
    conversations = (Conversation.objects(**query)
        .limit(data.get('limit'))
        .skip(data.get('offset')))
    if 'time' == data.get('sort_by'):
        conversations = conversations.sort(**{'last_modified': 1})
    return conversations
Пример #3
0
    def postprocess_params(self, params):
        r = params
        if 'channel_id' in r:
            r['channel'] = get_channels(self.user, r['channel_id'])
            set_languages_param(r)

        if 'from' in r and 'to' in r:
            from_date = r['from']
            to_date = r['to'] or from_date
            from_dt, to_dt = parse_date_interval(from_date, to_date)
            r['from_ts'] = Timeslot(from_dt, r['level'])
            r['to_ts'] = Timeslot(to_dt, r['level'])

        r['agents'] = get_agents(self.user, r['agents'] or [])

        r['statuses'] = get_statuses(r['statuses'], r['plot_type'])

        if r['sentiments'] is not None:
            assert r[
                'intentions'] is None, 'intentions and sentiments cannot be set together'
            r['intentions'] = translate_sentiments_to_intentions(
                r['sentiments'])

        # for some reports we show only problem posts
        if is_problem(r['plot_type']):
            r['intentions'] = [SATYPE_NAME_TO_ID_MAP['problem']]

        # -- cleanup --
        del r['channel_id']
        r.pop('from', None)
        r.pop('to', None)
        del r['sentiments']
        del r['level']

        return params
Пример #4
0
    def get_prev_timeslot_range(from_ts, to_ts):
        from_tsp = from_ts.timestamp
        to_tsp = to_ts.timestamp
        ONE_DAY_SEC = 24 * 60 * 60

        delta = to_tsp - from_tsp + ONE_DAY_SEC
        return (Timeslot.from_timestamp(from_tsp - delta, level=from_ts.level),
                Timeslot.from_timestamp(to_tsp - delta, level=to_ts.level))
Пример #5
0
 def transform_data(self, data, features):
     """ Transform data we got from mongodb on data we can plot in the UI based
     on the features list. """
     level = Timeslot(self.from_ts).level
     assert level == Timeslot(self.to_ts).level
     if self.plot_by == 'time':
         results = self.transform_time_based_plot(data, features, level)
     elif self.plot_by == 'distribution':
         results = self.transform_distribution_plot(data, features, level)
     return {"ok": True, "level": level, "list": results.values()}
Пример #6
0
    def by_time_span(self, channel=None, parent_topic=None, intentions=None, statuses=None,
                     agents=None, languages=None, from_ts=None, to_ts=None, limit=100):
        # Use the aggregation framework to resolve the counts:
        # match on channel + slot + hashed_parents [+ status [+ intention_type ]]
        # group on topic, sum(leaf or node count?)
        # sort(count, -1)
        # limit(100)
        F = ChannelHotTopics.F

        from_ts = Timeslot(from_ts).timeslot
        to_ts   = Timeslot(to_ts or from_ts).timeslot

        time_range = list(gen_timeslots(from_ts, to_ts, closed_range=False))
        assert len(time_range) <= 7, "Max allowed range is 7 days, got %s %s" % (len(time_range), time_range)

        if len(time_range) == 1:
            time_query = {F("time_slot"): time_range[0]}
        else:
            time_query = {F("time_slot"): {"$in": time_range}}

        channel_num = get_channel_num(channel)
        if parent_topic is None:
            parents = []
        else:
            parents = get_topic_hash(parent_topic)

        intention_ids = set(intentions or [ALL_INTENTIONS_ID])
        intention_ids = map(get_intention_id, intention_ids)

        statuses = set(statuses or SpeechActMap.STATUS_NAME_MAP)
        statuses = map(get_status_code, statuses)
        languages = map(get_lang_id, languages or [])

        match_query_base = {
            F("channel_num")    : channel_num,
            F("status")         : {"$in" : statuses},
            F("hashed_parents") : parents,
        }
        match_query_base.update(time_query)

        agent_ids = [a.agent_id for a in (agents or [])] or [ALL_AGENTS]

        match_query_filters = {
            "es.at": {"$in": agent_ids},
            "es.in": {"$in": intention_ids}
        }
        match_query_filters.update(make_lang_query(languages))

        return self.execute_pipeline(match_query_base, match_query_filters, limit)
Пример #7
0
    def test_pack_unpack_stats_id(self):
        original = (
            1234,  # channel_num
            987654,  # topic_hash
            2,  # status code
            1122334  # timeslot
        )
        st_id = pack_stats_id(*original)
        unpacked = unpack_stats_id(st_id)

        self.assertEqual(original, unpacked)

        ts = Timeslot('2013-04-22 21:40')
        original = (
            self.channel,  # channel
            "Hello World",  # topic
            'accepted',  # status
            ts  # timeslot
        )
        st_id = pack_stats_id(*original)
        unpacked = unpack_stats_id(st_id)

        self.assertEqual(original[0].counter, unpacked[0])
        self.assertEqual(get_topic_hash(original[1]), unpacked[1])
        self.assertEqual(get_status_code(original[2]), unpacked[2])
        self.assertEqual(original[3].timeslot, unpacked[3])
Пример #8
0
 def _get_stats(self, closing_time, level, category=None):
     time_slot = Timeslot(closing_time, level)
     data = {"time_slot": time_slot.timeslot}
     if category is not None:
         category = ConversationQualityTrends.get_category_code(category)
         data["category"] = category
     stats = [x for x in ConversationQualityTrends.objects(**data)]
     return stats
Пример #9
0
    def test_purge_none(self):
        TWO_DAYS_AGO = now() - timedelta(days=2)
        self._make_laptops_and_icecream(TWO_DAYS_AGO)
        stats = purge_stats(self.channel)
        last_purged = stats["last_purged"]
        days = stats["purge_days"]
        months = stats["purge_months"]
        self.channel.reload()
        self.assertEqual(
            datetime_to_timeslot(self.channel.last_purged, 'hour'),
            datetime_to_timeslot(last_purged, 'hour'))

        # Should have purged over 15 days for time slots since we never urged before
        self.assertEqual(len(days), 15)
        # Months purged depends on how far in we are to the month when we run the test
        self.assertTrue(len(months) in [2, 3])

        import solariat_bottle.utils.purging

        class MockLocaltime(object):
            tm_mday = 6

        solariat_bottle.utils.purging.localtime = MockLocaltime
        stats = purge_stats(self.channel)
        last_purged = stats["last_purged"]
        days = stats["purge_days"]
        months = stats["purge_months"]
        self.assertEqual(len(days), 1)
        self.assertEqual(days[0],
                         decode_timeslot(Timeslot(level='day').timeslot))
        self.assertEqual(len(months), 0)

        class MockLocaltime(object):
            tm_mday = 8

        solariat_bottle.utils.purging.localtime = MockLocaltime
        stats = purge_stats(self.channel)
        last_purged = stats["last_purged"]
        days = stats["purge_days"]
        months = stats["purge_months"]
        self.assertEqual(len(days), 1)
        self.assertEqual(len(months), 1)
        self.assertEqual(months[0],
                         decode_timeslot(Timeslot(level='month').timeslot))
Пример #10
0
    def get_stats(self, channel, agents):
        statuses = [
            SpeechActMap.POTENTIAL, SpeechActMap.ACTIONABLE,
            SpeechActMap.ACTUAL, SpeechActMap.REJECTED
        ]
        stats = ChannelTopicTrends.objects.by_time_span(channel=channel,
                                                        from_ts=Timeslot(0),
                                                        to_ts=Timeslot(None),
                                                        statuses=statuses,
                                                        group_by='status',
                                                        plot_by='distribution',
                                                        plot_type='topics',
                                                        agents=agents,
                                                        no_transform=True)

        result = {}
        for status in statuses:
            result.setdefault(status, 0)
        for s in stats:
            result[int(s['_id']['grp'])] = s['count']
        return result
Пример #11
0
    def test_select_by_time_span(self):
        content = "I need a mac laptop"

        post = self._create_db_post(content)
        intention_title = post.speech_acts[0]['intention_type']
        intention_id = get_sa_type_id(intention_title)

        leafs = ["mac laptop"]
        nodes = leafs + ["laptop"]

        for level in ('hour', 'day'):
            for topic in leafs:
                res = ChannelTopicTrends.objects.by_time_span(
                    channel=self.channel,
                    topic_pairs=[[topic, True]],
                    from_ts=Timeslot(level=level))
                self.assertEqual(len(res), 1)

                embed = res[0].filter(is_leaf=True,
                                      intention=int(intention_id))[0]
                self.assertEqual(embed.topic_count, 1)

                embed = res[0].filter(is_leaf=True,
                                      intention=int(ALL_INTENTIONS.oid))[0]
                self.assertEqual(embed.topic_count, 1)

            for topic in nodes:
                res = ChannelTopicTrends.objects.by_time_span(
                    channel=self.channel,
                    topic_pairs=[[topic, False]],
                    from_ts=Timeslot(level=level))
                self.assertEqual(len(res), 1)

                embed = res[0].filter(is_leaf=False,
                                      intention=int(intention_id))[0]
                self.assertEqual(embed.topic_count, 1)

                embed = res[0].filter(is_leaf=False,
                                      intention=int(ALL_INTENTIONS.oid))[0]
                self.assertEqual(embed.topic_count, 1)
Пример #12
0
    def postprocess_params(self, params):
        params = super(TrendsView, self).postprocess_params(params)
        get_pair = lambda x: (x['topic'], x['topic_type'] != 'node')

        to_ts = params['to_ts']
        date_now = now()
        if to_ts.timestamp_ms > datetime_to_timestamp_ms(date_now):
            params['to_ts'] = Timeslot(date_now, to_ts.level)

        params['topic_pairs'] = map(get_pair, params['topics'])
        del params['topics']

        return params
Пример #13
0
    def test_select_by_time_span_2(self):
        '''
        Create multiple posts and make sure the slots for
        terms get aggregated.
        '''
        content = "I need a mac laptop"

        leafs = ["mac laptop"]
        nodes = leafs + ["laptop"]

        N = 5

        for i in range(N):
            self._create_db_post(content)
            time.sleep(0.01)

        for level in ('hour', 'day'):
            for topic in leafs:
                stats = ChannelTopicTrends.objects.by_time_span(
                    channel=self.channel,
                    topic_pairs=[[topic, True]],
                    from_ts=Timeslot(level=level),
                    to_ts=Timeslot(level=level))
                self.assertEqual(len(stats), 1)

                embed_stat = stats[0].filter(intention=int(ALL_INTENTIONS.oid),
                                             is_leaf=True)
                self.assertEqual(embed_stat[0].topic_count, N)

            for topic in nodes:
                stats = ChannelTopicTrends.objects.by_time_span(
                    channel=self.channel,
                    topic_pairs=[[topic, False]],
                    from_ts=Timeslot(level=level),
                    to_ts=Timeslot(level=level))
                self.assertEqual(len(stats), 1)
                embed_stat = stats[0].filter(intention=int(ALL_INTENTIONS.oid),
                                             is_leaf=False)
                self.assertEqual(embed_stat[0].topic_count, N)
Пример #14
0
    def setUp(self):
        super(ConversationTest, self).setUp()

        #settings.DEBUG_STAT_UPDATE = False

        self.start_date = now()
        self.i = self.sc.inbound_channel
        self.o = self.sc.outbound_channel
        self.sc.add_username('@test')

        # Create 2 Smart Tags, for different use keywords
        self.laptop_tag = self._create_smart_tag(self.i, 'Laptops Tag', status='Active', keywords=['laptop'])
        self.display_tag = self._create_smart_tag(self.i, 'Other Tag', status='Active', keywords=['display'])

        self.from_ts_hour = Timeslot(point=self.start_date, level='hour')
Пример #15
0
def _check_account_volume(user, account):
    ''' Handles the email warnings for volume thresholding.
        Returns a boolean flag to indicate that the monthly volume threshold
        has been exceeded.
    '''
    if not account.package or account.package.name == "Internal":
        return False
    if account.is_threshold_surpassed_sent:
        return True

    from solariat_bottle.db.account import (account_stats,
                                            VOLUME_NOTIFICATION_THRESHOLD,
                                            THRESHOLD_WARNING,
                                            THRESHOLD_SURPASSED_WARNING)

    volume_limit = account.package.volume
    month_start, month_end = Timeslot(level='month').interval
    posts = account_stats(account,
                          user,
                          start_date=month_start,
                          end_date=month_end)
    number_posts = posts.get('number_of_posts')
    warning_limit = account.volume_warning_limit
    surpassed_limit = account.volume_surpassed_limit

    send_warning = False

    if number_posts >= warning_limit and number_posts < surpassed_limit:
        if not account.is_threshold_warning_sent:
            # Send warning email
            send_warning = True
            percentage = str(VOLUME_NOTIFICATION_THRESHOLD["Warning"]) + "%"
            warning = THRESHOLD_WARNING

    elif number_posts >= surpassed_limit:
        if not account.is_threshold_surpassed_sent:
            # Send surpassed email
            send_warning = True
            percentage = str(VOLUME_NOTIFICATION_THRESHOLD["Surpassed"]) + "%"
            warning = THRESHOLD_SURPASSED_WARNING

    if send_warning:
        from solariat_bottle.utils.mailer import send_account_posts_limit_warning
        account.set_threshold_warning(warning)
        for admin in account.admins:
            send_account_posts_limit_warning(admin, percentage, volume_limit)

    return False
Пример #16
0
def _json_trial(user, item, stats_by_account=None, with_stats=False):
    result = {
        "account_name": item.name or "Unnamed Trial",
        "account_id": str(item.id),
        "start_date": js_ts(item.start_date),
        "end_date": js_ts(item.end_date),
        "created_at": js_ts(item.created),
        "status": item.status
    }
    if stats_by_account:
        result["stats"] = stats_by_account[item]
    elif with_stats:
        month_start, month_end = Timeslot(level='month').interval

        result["stats"] = account_stats(item,
                                        user,
                                        start_date=month_start,
                                        end_date=month_end)
    return result
Пример #17
0
def _json_account(acct,
                  user=None,
                  with_stats=False,
                  start_date=None,
                  end_date=None,
                  cache=None):
    if cache is None:
        cache = {}

    if acct:
        if (session.get('sf_oauthToken', False)
                and acct.access_token is not None
                and acct.account_type == 'Salesforce'):
            is_sf_auth = True
        else:
            is_sf_auth = False

        package = "Internal"
        if acct.package is not None:
            package = acct.package.name

        csm = acct.customer_success_manager
        if csm is not None:
            csm = csm.email

        adm = {'first': None, 'last': None, 'email': None}
        try:
            #  Note: Taking the first element might not be best
            if 'admins' in cache:
                admins = cache['admins']
            else:
                admins = [
                    admin_user for admin_user in acct.admins
                    if not admin_user.is_staff
                ]
            admin = admins[0]
        except IndexError:  # only staff are admins
            pass
        else:
            adm['first'] = admin.first_name
            adm['last'] = admin.last_name
            adm['email'] = admin.email

        a_dict = {
            'id':
            str(acct.id),
            'name':
            acct.name,
            'channels_count':
            acct.get_current_channels(status__ne='Archived').count(),
            'account_type':
            acct.account_type,
            'package':
            package,
            'created_at':
            datetime_to_timestamp_ms(acct.created),
            'is_current':
            user and user.current_account
            and user.current_account.id == acct.id,
            'is_admin':
            user and (acct.can_edit(user) or user.is_superuser),
            'is_super':
            user and user.is_superuser,
            'is_staff':
            user and user.is_staff,
            'is_analyst':
            user and user.is_analyst,
            'is_only_agent':
            user and user.is_only_agent,
            'signature':
            user and user.signature_suffix,
            'is_sf_auth':
            is_sf_auth,
            'end_date':
            acct.end_date and datetime_to_timestamp_ms(acct.end_date),
            'configured_apps':
            acct.available_apps.keys(),
            'available_apps':
            CONFIGURABLE_APPS.keys(),
            'selected_app':
            acct.selected_app,
            'customer_success_manager':
            csm,
            'notes':
            acct.notes,
            'admin':
            adm,
            'is_active':
            acct.is_active,
            'status':
            acct.status,
            'monthly_volume':
            0,
            'is_locked':
            acct.is_locked,
            'updated_at':
            datetime_to_timestamp_ms(acct.updated_at)
            if acct.updated_at else None,
            'recovery_days':
            acct.recovery_days,
            'event_processing_lock':
            acct.event_processing_lock,
        }

        if 'users_count' in cache:
            a_dict['users_count'] = cache['users_count']
        else:
            a_dict['users_count'] = len(
                [u for u in acct.get_users() if not u.is_system])

        if 'all_users_count' in cache:
            a_dict['all_users_count'] = cache['all_users_count']
        else:
            a_dict['all_users_count'] = len(
                [u for u in acct.get_all_users() if not u.is_system])

        if user and user.is_admin:
            a_dict['gse_api_key'] = acct.gse_api_key

        if with_stats and user:
            a_dict['stats'] = account_stats(acct, user, start_date, end_date)
            today = dt.datetime.now()
            start_of_month = dt.datetime(today.year, today.month, 1)
            a_dict['monthly_volume'] = account_stats(acct, user,
                                                     start_of_month, today)
            today_start, today_end = Timeslot(level='day').interval
            a_dict['daily_volume'] = account_stats(acct, user, today_start,
                                                   today_end)
            a_dict[
                'daily_volume_notification_emails'] = acct.daily_post_volume_notification.alert_emails
        return a_dict
    return None
Пример #18
0
    def by_time_span(self,
                     channel=None,
                     from_ts=None,
                     to_ts=None,
                     topic_pairs=None,
                     intentions=None,
                     statuses=None,
                     agents=None,
                     languages=None,
                     group_by='topic',
                     plot_by='time',
                     plot_type=None,
                     no_transform=False):
        """
        :param channel: can be a string or a sequence
        :param from_ts: starting timeslot
        :param to_ts: end timeslot
        :param group_by: the type of grouping we are doing for aggregation
        :param topic_pairs: list of pairs (<topic:str>, <is_leaf:bool>)
        :param statuses: list of <status:int|str>
        :param agents: list of <User>, where each user should have .agent_id != 0
        :param languages: list of language codes or ids
        :param group_by: <str:"topic"|"intention"|"status"|"agent">

        :returns: stats by time span
        """
        agents = self.preprocess_agents(agents, group_by, channel)

        if statuses:
            statuses = is_iterable(statuses) and statuses or [statuses]
            statuses = map(get_status_code, statuses)
        else:
            statuses = SpeechActMap.STATUS_NAME_MAP.keys()

        intention_ids = map(get_intention_id, intentions
                            or []) or [ALL_INTENTIONS_INT]
        topic_pairs = topic_pairs or [[ALL_TOPICS, False]]
        languages = map(get_lang_id, languages or [])

        from_ts = Timeslot(from_ts).timeslot
        to_ts = Timeslot(to_ts or from_ts).timeslot

        or_query = []
        for (topic, _), status in product(topic_pairs, statuses):
            # channel can be a string or a sequence
            if isinstance(channel, seq_types):
                for c in channel:
                    from_id, to_id = self.get_id_intervals(
                        c, from_ts, to_ts, topic, status)
                    or_query.append({"_id": {"$gte": from_id, "$lte": to_id}})
            else:
                from_id, to_id = self.get_id_intervals(channel, from_ts, to_ts,
                                                       topic, status)
                or_query.append({"_id": {"$gte": from_id, "$lte": to_id}})

        if len(or_query) == 1:
            indexed_match_query = or_query[0]
        else:
            indexed_match_query = {"$or": or_query}

        initial_pipeline = [{"$match": indexed_match_query}]

        match_query = {}
        if plot_type:
            match_query = {
                "$and": [
                    self.filter_topics(topic_pairs),
                    self.construct_filter_query(intention_ids, statuses,
                                                agents, languages)
                ]
            }
        pipeline = self.assemble_pipeline(initial_pipeline, match_query,
                                          plot_type, plot_by, group_by)
        res = self.execute_pipeline(pipeline)

        if not res['ok']:
            error_msg = "Aggregate error=%s" % res
            #LOGGER.error("%s pipeline=%s", error_msg, pformat(pipeline))
            return {'ok': False, 'error': error_msg}

        features = {
            'agent': [(u.agent_id, u) for u in (agents or [])],
            'intention': intention_ids,
            'topic': topic_pairs,
            'status': statuses,
            'lang': make_lang_features(languages),
            'time': None
        }[group_by]
        return self.postprocess_results(res, pipeline, no_transform, plot_type,
                                        from_ts, to_ts, group_by, plot_by,
                                        features)
Пример #19
0
    def by_time_span(self,
                     channel=None,
                     from_ts=None,
                     to_ts=None,
                     agents=None,
                     statuses=None,
                     languages=None,
                     group_by='agent',
                     plot_by='time',
                     plot_type=None,
                     no_transform=False):
        """
        :param channel: can be a string or a sequence
        :param from_ts: starting timeslot
        :param to_ts: end timeslot
        :param group_by: the type of grouping we are doing for aggregation

        :returns: stats by time span
        """
        agents = self.preprocess_agents(agents, group_by, channel)

        if statuses:
            statuses = is_iterable(statuses) and statuses or [statuses]
            statuses = map(get_status_code, statuses)
        else:
            statuses = SpeechActMap.STATUS_NAME_MAP.keys()

        from_ts = Timeslot(from_ts).timeslot
        to_ts = Timeslot(to_ts or from_ts).timeslot

        or_query = []
        for status in statuses:
            # channel can be a string or a sequence
            if isinstance(channel, seq_types):
                for c in channel:
                    from_id, to_id = self.get_id_intervals(
                        c, status, from_ts, to_ts)
                    or_query.append({"_id": {"$gte": from_id, "$lte": to_id}})
            else:
                from_id, to_id = self.get_id_intervals(channel, status,
                                                       from_ts, to_ts)
                or_query.append({"_id": {"$gte": from_id, "$lte": to_id}})

        if len(or_query) == 1:
            indexed_match_query = or_query[0]
        else:
            indexed_match_query = {"$or": or_query}

        initial_pipeline = [{"$match": indexed_match_query}]

        match_query = {}
        if plot_type:
            match_query = self.construct_filter_query(statuses, agents,
                                                      languages)
        pipeline = self.assemble_pipeline(initial_pipeline, match_query,
                                          plot_type, plot_by, group_by)
        res = self.execute_pipeline(pipeline)

        if group_by is None:
            features = None
        else:
            features = {
                'agent': [(u.agent_id, u) for u in (agents or [])],
                'lang': make_lang_features(languages),
                'time': None
            }[group_by]
        return self.postprocess_results(res, pipeline, no_transform, plot_type,
                                        from_ts, to_ts, group_by, plot_by,
                                        features)
Пример #20
0
 def get_term_stats(term, level):
     stats = ChannelTopicTrends.objects.by_time_span(
         self.channel,
         topic_pairs=[[term, False]],
         from_ts=Timeslot(level=level))
     return tuple(stats)
Пример #21
0
 def _get_stats(self, level):
     stats = ChannelTrends.objects.by_time_span(
         self.channel, from_ts=Timeslot(level=level))
     return tuple(stats)
Пример #22
0
def transform(data,
              from_ts=None,
              to_ts=None,
              group_by='topic',
              plot_by='time',
              plot_type='topics',
              features=None):
    """ Transforms aggregation data to plot data """
    def group_by_timeslot_label(data):
        by_timeslot_label = defaultdict(dict)
        for item in data:
            time_slot = item['_id'].get('ts', 0)
            label = item['_id'].get('grp', 'count')
            by_timeslot_label[time_slot][label] = item

        return by_timeslot_label

    def _get_count(stats_data, stat_type='count'):
        return stats_data.get(stat_type, 0)

    def get_feature_key(feature):
        if group_by in ('topic', 'agent', 'lang'):
            try:
                return feature[0]
            except (TypeError, IndexError):
                return feature
        elif group_by in ('intention', 'status'):
            return int(feature)
        return 'count'

    def to_client_tz_offset(js_timestamp, tz_offset):
        if tz_offset:
            js_timestamp -= 1000.0 * tz_offset * 60
        return js_timestamp

    def get_time_data(groups, y_axis):
        total_counts = defaultdict(int)
        total_items = defaultdict(int)
        data = defaultdict(list)

        for slot in gen_timeslots(from_ts, to_ts):
            timestamp = timeslot_to_timestamp_ms(slot)
            features_data = groups.get(slot, {})

            for feature in y_axis:
                feature_key = get_feature_key(feature)

                if features_data.get(feature_key):
                    count = _get_count(features_data[feature_key])

                    total_counts[feature_key] += count
                    total_items[feature_key] += 1
                    data[feature_key].append([timestamp, count])
                else:
                    data[feature_key].append([timestamp, 0])

        if plot_type == 'response-time':
            # return average as result
            result_counts = defaultdict(float)
            for key, value in total_counts.iteritems():
                if total_items.get(key):
                    result_counts[key] = round(value / total_items[key], 2)
                else:
                    result_counts[key] = 0
        else:
            result_counts = total_counts
        return data, result_counts

    results = {}
    level = Timeslot(from_ts).level
    assert level == Timeslot(to_ts).level

    if plot_by == 'time':
        groups = group_by_timeslot_label(data)
        y_axis = features or ['count']
        data, counts = get_time_data(groups, y_axis)

        for f in y_axis:
            feature = get_feature_key(f)
            if not counts.get(feature):
                continue
            data_series = {
                "label": translate_label(f, group_by),
                "data": data.get(feature, []),
                "level": level,
                "count": counts.get(feature, 0)
            }
            if group_by == 'topic':
                data_series['topic_type'] = f[1] and 'leaf' or 'node'
            results[feature] = data_series
    elif plot_by == 'distribution':
        groups = group_by_timeslot_label(data)[0]
        y_axis = features or groups.keys()

        idx = 0
        for f in y_axis:
            feature = get_feature_key(f)
            idx += 1
            if feature not in groups:
                continue
            count = _get_count(groups[feature])
            data_series = {
                "label": translate_label(f, group_by),
                "data": [[idx * 2, count]]
            }
            if group_by == 'topic':
                data_series['topic_type'] = f[1] and 'leaf' or 'node'

            results[feature] = data_series

    return {"ok": True, "level": level, "list": results.values()}
Пример #23
0
    def test_topic_counts(self):
        base_filters = dict(
            channel=self.channel,
            from_ts=Timeslot(self.start_date, self.level),
            to_ts=Timeslot(self.end_date, self.level),
            topic_pairs=[('laptop', True), ('display', True)],
            intentions=['likes', 'needs', 'recommendation', 'problem'],
            statuses=[0, 1, 2, 3],
            plot_type='topics',
            no_transform=True)

        # Expected list of dicts
        # For time line
        # {'_id': {'grp': <group_by>, 'ts': <timestamp>}, 'count': 1}
        #
        # For distribution
        # {'_id': {'grp': <group_by>}, 'count': <total_count>}

        # plot_by = time
        # group_by = topic
        data = ChannelTopicTrends.objects.by_time_span(group_by='topic',
                                                       plot_by='time',
                                                       **base_filters)
        # verify time_slots
        self.assertEqual(set(map(lambda x: x['_id']['ts'], data)),
                         set(self.time_slots))
        self._assert_time(data, ['laptop', 'display'])

        # plot_by = distribution
        # group_by = topic
        data = ChannelTopicTrends.objects.by_time_span(group_by='topic',
                                                       plot_by='distribution',
                                                       **base_filters)
        self._assert_distribution(data, ['laptop', 'display'])

        # plot_by = time
        # group_by = intention
        data = ChannelTopicTrends.objects.by_time_span(group_by='intention',
                                                       plot_by='time',
                                                       **base_filters)
        # verify time_slots
        self.assertEqual(set(map(lambda x: x['_id']['ts'], data)),
                         set(self.time_slots))
        self._assert_time(data, [need, problem])

        # plot_by = distribution
        # group_by = intention
        data = ChannelTopicTrends.objects.by_time_span(group_by='intention',
                                                       plot_by='distribution',
                                                       **base_filters)
        self._assert_distribution(data, [need, problem])

        # plot_by = time
        # group_by = status
        data = ChannelTopicTrends.objects.by_time_span(group_by='status',
                                                       plot_by='time',
                                                       **base_filters)

        status = 1
        # verify time_slots
        self.assertEqual(set(map(lambda x: x['_id']['ts'], data)),
                         set(self.time_slots))
        self._assert_time(data, [status])

        # plot_by = distribution
        # group_by = status
        data = ChannelTopicTrends.objects.by_time_span(group_by='status',
                                                       plot_by='distribution',
                                                       **base_filters)
        self._assert_distribution(data, [status])

        #test __ALL_topics__ case
        base_filters.pop('topic_pairs')

        # plot_by = time
        # group_by = topic
        data = ChannelTopicTrends.objects.by_time_span(group_by='topic',
                                                       plot_by='time',
                                                       **base_filters)

        # verify time_slots
        self.assertEqual(set(map(lambda x: x['_id']['ts'], data)),
                         set(self.time_slots))
        self._assert_time(data, [ALL_TOPICS])

        # plot_by = distribution
        # group_by = topic
        data = ChannelTopicTrends.objects.by_time_span(group_by='topic',
                                                       plot_by='distribution',
                                                       **base_filters)
        self._assert_distribution(data, [ALL_TOPICS])

        # plot_by = time
        # group_by = intention
        data = ChannelTopicTrends.objects.by_time_span(group_by='intention',
                                                       plot_by='time',
                                                       **base_filters)

        # verify time_slots
        self.assertEqual(set(map(lambda x: x['_id']['ts'], data)),
                         set(self.time_slots))
        self._assert_time(data, [need, problem])

        # plot_by = distribution
        # group_by = intention
        data = ChannelTopicTrends.objects.by_time_span(group_by='intention',
                                                       plot_by='distribution',
                                                       **base_filters)
        self._assert_distribution(data, [need, problem])

        # plot_by = time
        # group_by = status
        data = ChannelTopicTrends.objects.by_time_span(group_by='status',
                                                       plot_by='time',
                                                       **base_filters)

        status = 1
        # verify time_slots
        self.assertEqual(set(map(lambda x: x['_id']['ts'], data)),
                         set(self.time_slots))
        self._assert_time(data, [status])

        # plot_by = distribution
        # group_by = status
        data = ChannelTopicTrends.objects.by_time_span(group_by='status',
                                                       plot_by='distribution',
                                                       **base_filters)
        self._assert_distribution(data, [status])
Пример #24
0
def more_like_post(post, channel):
    """
    Returns a queryset of similar posts in a given channels.
    Similarity determined by list of topics and intentions of the initial post.
    Note that we are looking for posts that are similar, but with opposite
    status, since we want to re-lable
    """
    from solariat_bottle.db.post.base    import Post
    from solariat_bottle.db.speech_act   import SpeechActMap
    from solariat_bottle.db.channel.base import Channel
    from solariat_bottle.db.conversation import Conversation

    from solariat.utils.timeslot import Timeslot, DURATION_DAY

    topics        = []
    intention_ids = []
    channel = Channel.objects.ensure_channels([channel])[0]
    assignment = post.get_assignment(channel)
    if channel.is_smart_tag:
        # for smart tags lookup similar posts in parent channel
        parent_channel = Channel.objects.get(channel.parent_channel)
        status = [SpeechActMap.POTENTIAL, SpeechActMap.ACTIONABLE, SpeechActMap.ACTUAL, SpeechActMap.REJECTED]
    else:
        parent_channel = channel
        status = [SpeechActMap.POTENTIAL]
        if assignment in SpeechActMap.ASSIGNED:
            ''' Postitive assignment could cause a more precise classification
            of a Potential post and could revert the assignment for Rejected
            posts
            '''
            status.append(SpeechActMap.REJECTED)
        elif assignment in {'rejected', 'discarded'}:
            ''' Conversely, may reject potential posts and may cause a reversion
            of prior allocation for Actionable
            '''
            status.append(SpeechActMap.ACTIONABLE)
        else:
            raise AppException("An internal state is not expected: %s. Please contact support for assistance." % assignment)

    for sa in post.speech_acts:
        topics.extend(sa['intention_topics'])
        intention_ids.append(sa['intention_type_id'])

    # The basic post lookup that just searches for the latest objects
    res, more_posts_available = Post.objects.by_time_point(
                                    parent_channel,
                                    ['__ALL__'],
                                    from_ts   = Timeslot(post.created_at-DURATION_DAY),
                                    to_ts     = Timeslot(post.created_at+timedelta(hours=1)),
                                    status    = status,
                                    intention = intention_ids,
                                    languages = [post.language],
                                    limit     = 10)
    res = set(res)

    if (channel.is_smart_tag):
        # Part of new re-labeling. If tag for a post is rejected, we should
        # go through all posts from the post conversation and through first
        # RESPONSE_DEPTH_FACTOR responses containing the tag
        service_channel = get_service_channel(channel)
        if service_channel:
            conversations = Conversation.objects.lookup_conversations(service_channel, [post])

            if len(conversations) == 1:
                # First extend with all other posts from this conversation that have that tag
                # assigned to them
                res |= set([p for p in Post.objects(id__in=list(conversations[0].posts))
                              if (str(p.id) != str(post.id) and str(channel.id) in p.tag_assignments)])
        # Now go through the first RESPONSE_DEPTH_FACTOR responses which have that tag assigned

    elif (not channel.is_smart_tag and
            SpeechActMap.STATUS_MAP[post.get_assignment(channel)] in [SpeechActMap.ACTIONABLE, SpeechActMap.REJECTED]):
        # In case we reject a post, go through all the posts for the first RESPONSE_DEPTH_FACTOR responses from
        # the same service channel
        channels = [channel]
        if channel.parent_channel is not None:
            service_channel   = Channel.objects.get(id=channel.parent_channel)
            channels.append(service_channel)
        channel_filter = [ c.id for c in channels ]
        channel_filter_refs = [DBRef('Channel', ch) for ch in channel_filter]
        if SpeechActMap.STATUS_MAP[post.get_assignment(channel)] == SpeechActMap.REJECTED:
            target_status = [SpeechActMap.POTENTIAL, SpeechActMap.ACTIONABLE]
        else:
            target_status = [SpeechActMap.POTENTIAL, SpeechActMap.REJECTED]
    return list(res)