示例#1
0
    def by_time_span(self,
                     user,
                     channel,
                     start_time=None,
                     end_time=None,
                     level='hour',
                     **extra):
        "Return generator of docs for time period"

        start_time = start_time or TIMESLOT_EPOCH
        end_time = end_time or get_now()

        from_ts = datetime_to_timeslot(start_time, level)
        to_ts = datetime_to_timeslot(end_time, level)

        if from_ts == to_ts:
            extra['time_slot'] = from_ts
        else:
            extra['time_slot__gte'] = from_ts
            extra['time_slot__lte'] = to_ts

        if hasattr(channel, '__iter__'):
            extra['channel__in'] = channel
        else:
            extra['channel'] = channel

        # from time import time
        # start_t = time()
        stats = self.find_by_user(user, **extra)
        #LOGGER.debug("%s.by_time_span(): querty time %.3f sec", self.__class__.__name__, time() - start_t)

        return stats
示例#2
0
    def test_stats_retrieving(self):
        time_slot = datetime_to_timeslot(now(), 'day')
        topics = ('laptop', 'laptop bag', 'good laptop bag', 'good laptop')
        for topic in topics:
            for term, is_leaf in gen_topic_tree(topic):
                ChannelHotTopics.increment(self.channel,
                                           time_slot,
                                           term,
                                           status=0,
                                           intention_id=0,
                                           is_leaf=is_leaf,
                                           lang_id=Lang.EN,
                                           agent=1)

        stats = ChannelHotTopics.objects.by_time_span(
            channel=self.channel,
            from_ts=datetime_to_timeslot(None, 'day'),
            languages=['en'])

        expected_result = [{
            u'term_count': 2,
            u'topic': u'laptop',
            u'topic_count': 1
        }, {
            u'term_count': 2,
            u'topic': u'bag',
            u'topic_count': 0
        }]

        self.assertListEqual(stats, expected_result)
示例#3
0
    def test_agent_search(self):
        content = 'I need a bike. I like Honda.'
        post = self._create_db_post(content)

        posts = fetch_posts([self.channel],
                            datetime_to_timeslot(post.created_at, 'hour'),
                            datetime_to_timeslot(post.created_at, 'hour'),
                            [dict(topic='honda', topic_type='leaf')],
                            [SpeechActMap.ACTIONABLE],
                            intentions=[],
                            agents=[10],
                            min_conf=0.5)

        self.assertEqual(len(posts), 0)
        self.assertEqual(SpeechActMap.objects.count(), 2)

        # Now reset with this agent, and into an ACTUAL stats
        post.channel_assignments[str(self.channel.id)] = 'actual'
        SpeechActMap.reset(post, [self.channel], agent=10)

        # Should be the same number of speech act map entries
        self.assertEqual(SpeechActMap.objects.count(), 2)

        # And try again. We should get a hit
        posts = fetch_posts([self.channel],
                            datetime_to_timeslot(post.created_at, 'hour'),
                            datetime_to_timeslot(post.created_at, 'hour'),
                            [dict(topic='honda', topic_type='leaf')],
                            [SpeechActMap.ACTUAL],
                            intentions=[],
                            agents=[10],
                            min_conf=0.5)

        self.assertEqual(len(posts), 1)
示例#4
0
    def test_duplicate_handle_diff_channels(self):

        channel3 = TwitterChannel.objects.create_by_user(
            self.user,
            title='TestChannel3',
            type='twitter',
            intention_types=SA_TYPES)

        duplicate_post = self._create_db_post(
            channels=[self.channel2, channel3],
            content=self.duplicate_content,
            url=self.url,
            twitter={
                "created_at": "Wed, 06 Aug 2014 18:38:47 +0000",
                "id": "497089420017676290"
            })

        self.assertEqual(len(duplicate_post.channels), 3)

        time_slot = datetime_to_timeslot(now(), 'day')
        ht_stat = ChannelHotTopics.objects.by_time_span(
            channel=self.channel2,
            from_ts=datetime_to_timeslot(None, 'day'),
        )

        tt_stat = ChannelTopicTrends(channel=self.channel2,
                                     time_slot=time_slot,
                                     topic=self.topic,
                                     status=0)

        self.assertEqual(ht_stat, self.hot_topic_stat)
        self.assertEqual(tt_stat, self.topic_trends_stat)
示例#5
0
    def test_select_by_time_point_3(self):
        ''' Test with different post creation dates'''
        DAY_20131212 = utc(datetime(day=12, month=12, year=2013))
        DAY_20131202 = utc(datetime(day=2, month=12, year=2013))
        DAY_20131002 = utc(datetime(day=2, month=10, year=2013))

        for d in [DAY_20131212, DAY_20131202, DAY_20131002]:
            self._create_db_post(_created=d,
                                 channel=self.channel,
                                 content='i need some carrot')

        # Test for 1 single day
        results = ChannelHotTopics.objects.by_time_span(
            channel=self.channel,
            from_ts=datetime_to_timeslot(DAY_20131212, level='day'))
        self.assertEqual(results[0]['term_count'], 1)

        # For a month
        results = ChannelHotTopics.objects.by_time_span(
            channel=self.channel,
            from_ts=datetime_to_timeslot(DAY_20131212, level='month'))
        self.assertEqual(results[0]['term_count'], 2)

        # For a different month
        results = ChannelHotTopics.objects.by_time_span(
            channel=self.channel,
            from_ts=datetime_to_timeslot(DAY_20131002, level='month'))
        self.assertEqual(results[0]['term_count'], 1)
示例#6
0
    def test_select_by_time_point(self):
        content = "I need a mac laptop"

        self._create_db_post(content)
        '''
        terms: mac laptop, laptop
        '''

        results = ChannelHotTopics.objects.by_time_span(
            channel=self.channel,
            from_ts=datetime_to_timeslot(None, level='hour'))
        self.assertEqual(len(results),
                         0)  # we don't store HOUR hot-topics stats

        # The first test will be for root level topics.
        results = ChannelHotTopics.objects.by_time_span(
            channel=self.channel,
            from_ts=datetime_to_timeslot(None, level='day'))
        expected = set(['laptop'])
        self.assertEqual(
            set([r['topic'] for r in results]).difference(expected), set())

        # Should be the same again - despite the change in month
        results = ChannelHotTopics.objects.by_time_span(
            channel=self.channel,
            from_ts=datetime_to_timeslot(None, level='month'))
        self.assertEqual(
            set([r['topic'] for r in results]).difference(expected), set())
示例#7
0
    def postprocess_params(self, params):
        r = params
        from_dt = parse_datetime(r['from'])
        to_dt = parse_datetime(r['to'])

        r['from_ts'] = datetime_to_timeslot(from_dt, 'hour')
        r['to_ts'] = datetime_to_timeslot(to_dt, 'hour')
        del r['from']
        del r['to']

        r = super(PostsView, self).postprocess_params(r)

        if r['sort_by'] == 'time':
            r['sort_map'] = {'_created': -1}
        else:
            r['sort_map'] = {'intention_confidence': -1}

        r['min_conf'] = r['thresholds']['intention']
        if r['last_query_time'] is not None:
            r['last_query_time'] = timestamp_ms_to_datetime(
                r['last_query_time'])

        del r['plot_type']

        return params
示例#8
0
    def test_select_by_time_point_2(self):
        '''
        Create multiple posts and make sure the slots for
        terms get aggregated.
        '''
        content = "I need a mac laptop"

        for i in range(10):
            self._create_db_post(content)

        results = ChannelHotTopics.objects.by_time_span(
            channel=self.channel,
            from_ts=datetime_to_timeslot(None, level='day'))

        # Should just be 1 at the top
        self.assertEqual(len(results), 1)

        # Make sure the aggregate count is correct
        self.assertEqual(results[0]['term_count'], 10)
        self.assertEqual(results[0]['topic_count'], 0)

        # Should be 10 below for each. Only one item but counts of 10
        results = ChannelHotTopics.objects.by_time_span(
            channel=self.channel,
            parent_topic='laptop',
            from_ts=datetime_to_timeslot(None, level='day'))

        self.assertEqual(len(results), 1)
        self.assertEqual(results[0]['topic_count'], 10)
示例#9
0
def purge_outdated_trends_stats(coll, channel, level, delta):
    initial_timedelta_arg_name = {"hour": "days", "day": "months"}[level]
    timedelta_arg_name = {"hour": "hours", "day": "days"}[level]
    start_dt = now() - relativedelta(**{initial_timedelta_arg_name: delta})
    current_dt = start_dt
    time_step = relativedelta(**{timedelta_arg_name: 1})
    ts = datetime_to_timeslot(current_dt, level)
    zero_counts = 0
    total_records_removed = 0
    EMPTY_SLOTS_NUMBER = 10
    while zero_counts <= EMPTY_SLOTS_NUMBER:
        t0 = datetime.now()
        channel_ts_val = ChannelTopicTrends.make_channel_ts(channel, ts)
        res = coll.objects.coll.remove(coll.objects.get_query(time_slot=ts))
        if res['n'] == 0:
            zero_counts += 1
        current_dt = current_dt - time_step
        total_records_removed += res['n']
        ts = datetime_to_timeslot(current_dt, level)
        LOGGER.info(
            "purging Q:: collection: %s; func: %s; timedelta: %s; date: %s; level: %s; records removed: %s",
            coll.__name__,
            inspect.stack()[0][3],
            datetime.now() - t0, current_dt, level, res['n'])
    return total_records_removed
示例#10
0
def purge_channel_stats(channel):
    days = get_var('CHANNEL_STATS_KEEP_DAYS')

    start_date = datetime(year=2012, month=1, day=1)
    end_date = now() - timedelta(days=days)
    # end_date   = datetime(year=end_date.year, month=end_date.month, day=1)
    timeslots  = (
        (datetime_to_timeslot(start_date, level), datetime_to_timeslot(end_date, level)) \
        for level in TIMESLOT_LEVEL_NAMES
    )

    F = ChannelStats.F
    removed_count = 0
    for start_ts, end_ts in timeslots:
        t0 = datetime.now()
        res = ChannelStats.objects.coll.remove({
            F('time_slot'): {
                '$lte': end_ts,
                '$gt': start_ts
            },
            F('channel'): channel.id
        })
        LOGGER.info(
            "purging Q:: channel: %s; collection: ChannelStats; func: %s; timedelta: %s"
            % (channel.title, inspect.stack()[0][3], datetime.now() - t0))
        removed_count += res['n']
    return removed_count
示例#11
0
    def test_select_by_time_span_3(self):
        past_dt = now() - relativedelta(months=1)  # big enough for all levels

        post1 = self._create_db_post(_created=past_dt,
                                     content='i need some carrot')

        post2 = self._create_db_post(content='i need some carrot')

        self.assertEqual(
            Post.objects(channels__in=[self.channel.id]).count(), 2)

        for level in ('hour', 'day'):
            result = ChannelTopicTrends.objects.by_time_span(
                channel=self.channel,
                topic_pairs=[['carrot', True]],
                from_ts=datetime_to_timeslot(past_dt, level),
                to_ts=datetime_to_timeslot(None, level))
            self.assertEqual(len(result), 2)

            result = ChannelTopicTrends.objects.by_time_span(
                channel=self.channel,
                topic_pairs=[['carrot', True]],
                from_ts=datetime_to_timeslot(
                    past_dt + relativedelta(**{level + 's': 1}), level),
                to_ts=datetime_to_timeslot(None, level))
            self.assertEqual(len(result), 1)
示例#12
0
    def test_timeslot_conversion(self):
        past_dt = now() - timedelta(minutes=7 * 24 * 60)
        ts_before = datetime_to_timeslot(past_dt, 'hour')

        post = self._create_db_post(_created=past_dt,
                                    content='i need some carrot')
        ts_after = datetime_to_timeslot(post.created, 'hour')
        self.assertEqual(ts_before, ts_after)
示例#13
0
    def setUp(self):
        super(Purging, self).setUp()
        # Start Fresh
        ChannelHotTopics.objects.coll.remove()
        self.assertEqual(ChannelHotTopics.objects.count(), 0)

        self.this_month = datetime_to_timeslot(now(), 'month')
        self.this_day = datetime_to_timeslot(now(), 'day')
示例#14
0
    def test_new_post_search(self):
        content = 'I need a bike. I like Honda.'
        post = self._create_db_post(content)

        posts = fetch_posts([self.channel],
                            datetime_to_timeslot(post.created_at, 'hour'),
                            datetime_to_timeslot(
                                post.created_at + timedelta(hours=1), 'hour'),
                            [dict(topic='honda', topic_type='leaf')],
                            [SpeechActMap.ACTIONABLE],
                            intentions=[],
                            min_conf=0.5,
                            agents=[])

        self.assertEqual(len(posts), 1)
示例#15
0
    def test_integer_id(self):
        def make_id_ts_left(time_slot, dummy):
            components = (
                (time_slot, 22),
                (dummy, 42),
            )
            id_ = pack_components(*components)
            return id_

        def make_id_ts_right(time_slot, dummy):
            components = ((dummy, 42), (time_slot, 22))
            id_ = pack_components(*components)
            return id_

        TimeSlotIntegerId.objects.coll.remove()
        from solariat.utils.timeslot import datetime_to_timeslot, parse_date_interval, timedelta, timeslot_to_datetime
        start_date, end_date = parse_date_interval('02/21/2013', '05/21/2013')
        step = timedelta(hours=24)
        dates = []
        while start_date < end_date:
            dates.append(start_date)
            start_date += step
        assert len(dates) == 90

        data = enumerate(dates[::-1], start=100)

        for dummy, date in data:
            dummy %= 5
            time_slot = datetime_to_timeslot(date)
            id_ = make_id_ts_left(time_slot, dummy)
            doc = TimeSlotIntegerId(id=id_, time_slot=time_slot, dummy=dummy)
            doc.save()
        #print list(TimeSlotIntegerId.objects.coll.find())

        #fetch interval
        start_date, end_date = parse_date_interval('03/21/2013', '04/21/2013')
        #start_dummy = 0
        #end_dummy = (1L << 41) - 1
        start_id = make_id_ts_left(datetime_to_timeslot(start_date, 'hour'), 0)
        end_id = make_id_ts_left(datetime_to_timeslot(end_date, 'hour'), 0)
        # print start_id.bit_length()
        # print end_id.bit_length()
        for doc in TimeSlotIntegerId.objects(id__gte=start_id, id__lte=end_id):
            print timeslot_to_datetime(doc.time_slot)
            self.assertGreaterEqual(doc.time_slot,
                                    datetime_to_timeslot(start_date, 'hour'))
            self.assertLessEqual(doc.time_slot,
                                 datetime_to_timeslot(end_date, 'hour'))
示例#16
0
    def test_speech_acts_creation(self):
        content = 'I need a bike. I like Honda.'
        post = self._create_db_post(content)
        self.assertEqual(post.speech_acts[0]['content'], 'I need a bike.')
        self.assertEqual(post.speech_acts[1]['content'], ' I like Honda.')

        # Verify the id for the SpeechActMap, and make sure we can get it.
        sam_id = pack_speech_act_map_id(
            self.channel,
            SpeechActMap.ACTIONABLE,  # status
            datetime_to_timeslot(post.created),  # hour-timeslot of the post
            post,  # post
            0)

        sam = SpeechActMap.objects.get(id=sam_id)
        self.assertTrue(sam)

        self.assertEqual(str(sam.post), str(post.id))
        self.assertEqual(sam.idx, 0)

        # Verify we can retrieve the speech act objects
        sam_ids = [
            m.id for m in make_objects(self.channel, post, post.speech_acts,
                                       'highlighted')
        ]

        self.assertEqual(len(sam_ids), 2)

        sams = SpeechActMap.objects(id__in=sam_ids)
        self.assertEqual(len(sams), len(sam_ids))

        sam = [m for m in sams
               if m.id != sam_id][0]  # we want the other one this time
        self.assertEqual(str(sam.post), str(post.id))
        self.assertEqual(sam.idx, 1)
示例#17
0
    def test_topics_in_speech_acts(self):
        """
        This is related to issue #1623
        https://github.com/solariat/tango/issues/1623

        At some point if post had "DellCares" the keyword "Dell" still will be identified in it.
        """
        content = '#solariat and @some_tag, How can I contact DellCares?'

        # create keyword tracking channel with keyword 'dell'
        keywords = ['dell', '#solariat', '@some_tag']
        channel = KTC.objects.create_by_user(self.user,
                                             title='Inbound Channel',
                                             keywords=keywords)

        post = self._create_db_post(content, channel=channel)

        sam_id = pack_speech_act_map_id(
            channel,
            SpeechActMap.ACTIONABLE,  # status
            datetime_to_timeslot(post.created),  # hour-timeslot of the post
            post,  # post
            0)

        sam = SpeechActMap.objects.get(id=sam_id)
        self.assertTrue('#solariat' in sam.to_dict()['topics'])
        self.assertTrue("en", '@some_tag' in sam.to_dict()['topics'])
        self.assertTrue('dellcares' in sam.to_dict()['topics'])
        self.assertFalse('dell' in sam.to_dict()['topics'])
示例#18
0
def make_objects(channel, post, speech_acts, status, agent=0):
    """
    Returns a list of speech act map objects to save or delete from
    """
    channel_num = get_channel_num(channel)
    time_slot = datetime_to_timeslot(post.created_at, 'hour')

    for sa_idx, sa in enumerate(speech_acts):
        topic_tuples = get_topic_tuples(sa, post, channel=channel)
        sam_id = pack_speech_act_map_id(channel_num, status, time_slot, post,
                                        sa_idx)

        sam = SpeechActMap(id=sam_id,
                           channel=get_channel_id(channel),
                           post=post.id,
                           agent=agent,
                           language=get_lang_id(post.language),
                           idx=sa_idx,
                           intention_type_id=int(sa['intention_type_id']),
                           intention_type_conf=sa['intention_type_conf'],
                           time_slot=time_slot,
                           created_at=post.created_at,
                           topic_tuples=topic_tuples,
                           message_type=post._message_type)

        yield sam
示例#19
0
def _update_stats(response, channel_id, n=1):
    #TODO: FIX
    return
    post = response.post
    timeslots = dict((level, datetime_to_timeslot(post.created, level))
                     for level in ('day', 'month'))

    def increment(topic, intention_id, is_leaf=True):
        """ Recursively increments stats for topic and all its parents
        """
        for level, timeslot in timeslots.items():
            ResponseTermStats.increment(channel_id=channel_id,
                                        topic=topic,
                                        intention_id=intention_id,
                                        timeslot=timeslot,
                                        is_leaf=is_leaf,
                                        response=response,
                                        n=n)

        # recursive calls to also update parent stats
        for term in get_largest_subtopics(topic):
            assert term, 'topic=%r, term=%r' % (topic, term)
            increment(term, intention_id, is_leaf=False)

    for sa in response.post.speech_acts:
        if sa['intention_type'] == 'DISCARDED':
            continue
        intention_id = SATYPE_TITLE_TO_ID_MAP[sa['intention_type']]

        for topic in sa['intention_topics']:
            assert topic
            increment(topic, intention_id, is_leaf=True)
示例#20
0
    def setUp(self):
        super(TopicCountPlotDataCase, self).setUp()
        self.login()
        self.start_date = timeslot.parse_date('04/24/2013')
        self.end_date = timeslot.parse_date('04/25/2013')
        self.level = 'hour'

        timeline = []

        start_date = timeslot.parse_date('04/24/2013')
        while start_date < self.end_date:
            timeline.append(start_date)
            start_date += timeslot.timedelta(seconds=60 * 60 *
                                             2)  #every 2 hours

        self.time_slots = map(lambda d: datetime_to_timeslot(d, 'hour'),
                              timeline)

        contents = cycle([
            'I need a laptop',  #laptop, need (intention id=2)
            'My laptop is not working out for me:(',  #laptop, problem (intention id=3)
            'I need a display',  #display, need
            'My display is not working out for me:(',  #display, problem
        ])
        posts = []

        for _created in timeline:
            post = self._create_db_post(contents.next(), _created=_created)
            #print post.speech_acts
            posts.append(post)
示例#21
0
    def test_impressions(self):
        "Test impressions stats"

        pl1 = self._create_db_matchable('foo')
        pl2 = self._create_db_matchable('bar')
        pl3 = self._create_db_matchable('baz')

        response = self.do_post('posts',
                                version='v1.2',
                                channel=str(self.channel.id),
                                content='i need a foo for bar but not baz')

        post_dict = response['item']

        #matchables = post_dict['matchables']

        response = self.do_post('postmatches',
                                version='v1.2',
                                post=post_dict['id'],
                                impressions=[str(pl1.id),
                                             str(pl2.id)],
                                rejects=[str(pl3.id)])

        self.assertEqual(response['item']['rejects'][0], str(pl3.id))

        time_slot = timeslot.datetime_to_timeslot(Post.objects()[0].created)

        response = self.do_get('channelstats',
                               version='v1.2',
                               channel=str(self.channel.id),
                               time_slot=time_slot)  # month stats object
        stats = response['list'][0]
        self.assertEqual(stats['number_of_impressions'], 2)
示例#22
0
    def test_post_creation(self):
        content = 'I need a bike. I like Honda.'
        post = self._create_db_post(content)
        self.assertEqual(post.speech_acts[0]['content'], 'I need a bike.')
        self.assertEqual(post.speech_acts[1]['content'], ' I like Honda.')

        time_slot = datetime_to_timeslot(post.created, 'hour')

        # single filters
        posts, are_more_posts_available = Post.objects.by_time_point(
            self.channel,
            'bike',
            time_slot,
            status=SpeechActMap.POTENTIAL,
            intention=sa_labels.NEEDS.oid,
        )

        # verify the result has the right content
        self.assertEqual(len(posts), 1)
        self.assertEqual(posts[0].content, post.content)

        # multiple filters
        posts, are_more_posts_available = Post.objects.by_time_point(
            self.channel,
            ['bike', 'honda'],
            time_slot,
            status=SpeechActMap.POTENTIAL,
            # no explicit intention id means ALL INTENTIONS
        )

        self.assertEqual(len(posts), 1)
示例#23
0
    def test_stat_update(self):
        Leaf = Topic = True
        Node = Term = False
        HELP = 10
        JUNK = 12
        EN = Lang.EN
        LALL = Lang.ALL

        time_slot = datetime_to_timeslot(now(), 'day')

        topic = 'laptop'
        agent_id = 12345
        hashed_parents = map(mhash, get_largest_subtopics(topic))

        stat = ChannelHotTopics(channel_num=self.channel.counter,
                                time_slot=time_slot,
                                topic=topic,
                                status=0,
                                hashed_parents=hashed_parents)

        stat.compute_increments(is_leaf=True,
                                intention_id=JUNK,
                                agent=None,
                                lang_id=Lang.EN,
                                n=1)
        stat.compute_increments(is_leaf=False,
                                intention_id=HELP,
                                agent=None,
                                lang_id=Lang.EN,
                                n=1)
        stat.upsert()
        stat = ChannelHotTopics.objects.get(id=stat.id)
        stat.compute_increments(is_leaf=True,
                                intention_id=JUNK,
                                agent=agent_id,
                                n=2)
        stat.upsert()

        stat.reload()

        expected_stats = [
            # agent | is_leaf | intent | language | topic_count
            (ALL_AGENTS, Term, ALL_INTENTIONS_INT, LALL, 1),
            (ALL_AGENTS, Term, ALL_INTENTIONS_INT, EN, 1),
            (ALL_AGENTS, Term, HELP, LALL, 1),
            (ALL_AGENTS, Term, HELP, EN, 1),
            (ALL_AGENTS, Topic, ALL_INTENTIONS_INT, LALL,
             1 + 2),  # +2 from specific agent
            (ALL_AGENTS, Topic, JUNK, LALL, 1 + 2),
            (ALL_AGENTS, Topic, JUNK, EN, 1),
            (ALL_AGENTS, Topic, ALL_INTENTIONS_INT, EN, 1),
            (agent_id, Topic, ALL_INTENTIONS_INT, LALL, 2),
            (agent_id, Topic, JUNK, LALL, 2)
        ]

        self.assert_stats(stat, expected_stats)

        self.assertFalse(stat.filter(agent=0, is_leaf=True,
                                     intention=10))  #no such combination
示例#24
0
def yield_channel_stats(channel, agent, date):
    if date is None:
        date = now()
    for level in TIMESLOT_LEVEL_NAMES:
        time_slot = datetime_to_timeslot(date, level)
        yield ServiceChannelStats(channel=str(channel.id),
                                  time_slot=time_slot,
                                  agent=agent)
示例#25
0
    def _assert_topic_extraction(self, expect_topics):
        past_dt = now() - relativedelta(months=1)
        posts = [
            "I need a riding lawnmower", "I need a lawnmower",
            "I need a side ride push lawnmower"
        ]
        for content in posts:
            self._create_db_post(content)

        for (topic, is_leaf, cnt) in expect_topics:
            for level in ('hour', 'day'):
                result = ChannelTopicTrends.objects.by_time_span(
                    channel=self.channel,
                    topic_pairs=[(topic, is_leaf)],
                    from_ts=datetime_to_timeslot(past_dt, level),
                    to_ts=datetime_to_timeslot(None, level))
                self.assertEqual(len(result), cnt)
示例#26
0
    def test_stat_update(self):
        time_slot = datetime_to_timeslot(now(), 'hour')

        topic = 'laptop'
        agent_id = 12345

        stat = ChannelTopicTrends(channel=self.channel,
                                  time_slot=time_slot,
                                  topic=topic,
                                  status=0)

        stat.compute_increments(is_leaf=True,
                                intention_ids=JUNK,
                                agent=None,
                                inc_dict={'topic_count': 1},
                                n=1)
        stat.compute_increments(is_leaf=False,
                                intention_ids=HELP,
                                agent=None,
                                inc_dict={'topic_count': 1},
                                n=1)
        stat.upsert()

        stat = ChannelTopicTrends.objects.get(id=stat.id)

        stat.compute_increments(is_leaf=True,
                                intention_ids=JUNK,
                                agent=agent_id,
                                inc_dict={'topic_count': 2},
                                n=1)

        stat.compute_increments(is_leaf=False,
                                intention_ids=HELP,
                                agent=None,
                                lang_id=EN,
                                inc_dict={'topic_count': 2},
                                n=1)
        stat.upsert()

        stat.reload()

        expected_stats = [
            (ALL_AGENTS, Term, ALL_INTENTIONS_INT, LALL, 1 + 2),  # +2 for EN
            (ALL_AGENTS, Term, HELP, LALL, 1 + 2),
            (ALL_AGENTS, Term, ALL_INTENTIONS_INT, EN, 2),
            (ALL_AGENTS, Term, HELP, EN, 2),
            (ALL_AGENTS, Topic, ALL_INTENTIONS_INT, LALL,
             1 + 2),  # +2 from specific agent
            (ALL_AGENTS, Topic, JUNK, LALL, 1 + 2),
            (agent_id, Topic, ALL_INTENTIONS_INT, LALL, 2),
            (agent_id, Topic, JUNK, LALL, 2)
        ]

        self.assert_stats(stat, expected_stats)

        self.assertFalse(stat.filter(agent=0, is_leaf=True,
                                     intention=10))  # no such combination
示例#27
0
    def test_purge_none(self):
        TWO_DAYS_AGO = now() - timedelta(days=2)
        self._make_laptops_and_icecream(TWO_DAYS_AGO)
        stats = purge_stats(self.channel)
        last_purged = stats["last_purged"]
        days = stats["purge_days"]
        months = stats["purge_months"]
        self.channel.reload()
        self.assertEqual(
            datetime_to_timeslot(self.channel.last_purged, 'hour'),
            datetime_to_timeslot(last_purged, 'hour'))

        # Should have purged over 15 days for time slots since we never urged before
        self.assertEqual(len(days), 15)
        # Months purged depends on how far in we are to the month when we run the test
        self.assertTrue(len(months) in [2, 3])

        import solariat_bottle.utils.purging

        class MockLocaltime(object):
            tm_mday = 6

        solariat_bottle.utils.purging.localtime = MockLocaltime
        stats = purge_stats(self.channel)
        last_purged = stats["last_purged"]
        days = stats["purge_days"]
        months = stats["purge_months"]
        self.assertEqual(len(days), 1)
        self.assertEqual(days[0],
                         decode_timeslot(Timeslot(level='day').timeslot))
        self.assertEqual(len(months), 0)

        class MockLocaltime(object):
            tm_mday = 8

        solariat_bottle.utils.purging.localtime = MockLocaltime
        stats = purge_stats(self.channel)
        last_purged = stats["last_purged"]
        days = stats["purge_days"]
        months = stats["purge_months"]
        self.assertEqual(len(days), 1)
        self.assertEqual(len(months), 1)
        self.assertEqual(months[0],
                         decode_timeslot(Timeslot(level='month').timeslot))
示例#28
0
def discard_outdated_topics_for_day_level(channel):
    # Remove all days 2 weeks old or more
    channel_num = get_channel_num(channel)
    until_day = datetime_to_timeslot(
        now() - timedelta(days=get_var('HOT_TOPICS_DAY_STATS_KEEP_DAYS')),
        'day')
    from_day = datetime_to_timeslot(TIMESLOT_EPOCH, 'day')
    t0 = datetime.now()
    res = ChannelHotTopics.objects.coll.remove({
        F('channel_num'): channel_num,
        F('time_slot'): {
            '$lt': until_day,
            '$gte': from_day
        }
    })
    LOGGER.info(
        "purging Q:: channel: %s; collection: ChannelHotTopics; func: %s; timedelta: %s"
        % (channel.title, inspect.stack()[0][3], datetime.now() - t0))
    return res["n"]
示例#29
0
 def get_inbound_post(self):
     statuses = [
         SpeechActMap.POTENTIAL, SpeechActMap.ACTIONABLE,
         SpeechActMap.ACTUAL, SpeechActMap.REJECTED
     ]
     time_slot = datetime_to_timeslot(self.inbound_post_created_at, 'hour')
     posts, are_more_posts_available = Post.objects.by_time_point(
         self.inbound, ALL_TOPICS, time_slot, status=statuses)
     self.assertEqual(len(posts), 1)
     return posts[0]
示例#30
0
    def test_post_search(self):
        content = 'I need a bike. I like Honda.'
        post = self._create_db_post(content)

        self.assertEqual(post.addressee, None)

        posts, are_more_posts_available = Post.objects.by_time_point(
            self.channel, 'honda',
            datetime_to_timeslot(post.created_at, 'hour'))[:]

        self.assertEqual(posts[0].id, post.id)