Exemplo n.º 1
0
    def test_facebook_post(self):
        updated_at = now().replace(microsecond=0)
        created_at = TIMESLOT_EPOCH - timedelta(days=300)

        content = 'post image caption'
        status_id = "12345678901_123456789101"
        from solariat_bottle.db.channel.facebook import FacebookServiceChannel
        data = {
            'content':
            content,
            'channels': [
                FacebookServiceChannel.objects.create_by_user(self.user,
                                                              title='Fb')
            ],
            'user_profile': {
                'user_name': 'fb_test_user'
            },
            'facebook': {
                'facebook_post_id': status_id,
                'text': content,
                'created_at': str(created_at),
                '_wrapped_data': {
                    'type': 'status',
                    'source_id': 'fake',
                    'source_type': 'event',
                    'created_at': str(created_at),
                    'updated_time': str(updated_at)
                }
            }
        }

        post1 = self._create_db_post(**data)
        actor_id, creation_time = unpack_event_id(post1.id)
        self.assertEqual(post1.created_at.replace(microsecond=0), updated_at)
        self.assertEqual(creation_time.replace(microsecond=0), updated_at)
Exemplo n.º 2
0
    def setUp(self):
        super(TopicCountPlotDataCase, self).setUp()
        self.login()
        self.start_date = timeslot.parse_date('04/24/2013')
        self.end_date = timeslot.parse_date('04/25/2013')
        self.level = 'hour'

        timeline = []

        start_date = timeslot.parse_date('04/24/2013')
        while start_date < self.end_date:
            timeline.append(start_date)
            start_date += timeslot.timedelta(seconds=60 * 60 *
                                             2)  #every 2 hours

        self.time_slots = map(lambda d: datetime_to_timeslot(d, 'hour'),
                              timeline)

        contents = cycle([
            'I need a laptop',  #laptop, need (intention id=2)
            'My laptop is not working out for me:(',  #laptop, problem (intention id=3)
            'I need a display',  #display, need
            'My display is not working out for me:(',  #display, problem
        ])
        posts = []

        for _created in timeline:
            post = self._create_db_post(contents.next(), _created=_created)
            #print post.speech_acts
            posts.append(post)
Exemplo n.º 3
0
    def get_method_params(self):
        get = self.filters.get
        params = super(SearchRequest, self).get_method_params()
        params.update({
            "include_entities": True,
            "count": self.SEARCH_LIMIT,
            "q": self.build_query(get('keywords')),
            "lang": get('language', get('lang'))
        })

        if not params['q']:
            return None

        start_date = get('start_date')
        end_date = get('end_date')

        if start_date:
            start_date_str = start_date.strftime(self.DATE_FORMAT)
            params['since'] = start_date_str

        if end_date:
            if (end_date.hour, end_date.minute) != (0, 0):
                end_date = end_date + timedelta(days=1)
            end_date_str = end_date.strftime(self.DATE_FORMAT)
            params['until'] = end_date_str

        return params
Exemplo n.º 4
0
    def test_fetch_by_date(self):
        # check start_date. let fetcher made 2 requests (max resp len: 200)
        one_sec = timedelta(seconds=1)
        FakeTwitterApi.restore_settings()
        api = FakeTwitterApi()
        _, start_date = api.DM[300]
        start_date += one_sec
        LOGGER.debug('++++++ DATES: (%s)', (start_date, api.DM[200][1], api.DM[100][1]))

        res = DirectMessagesFetcher(api, **{"start_date": start_date})
        statuses = list(res.fetch())
        self.assertEqual(len(statuses), 300)

        # checking end_date filter
        FakeTwitterApi.restore_settings()
        api = FakeTwitterApi()
        _, start_date = api.DM[300]
        _, end_date = api.DM[150]
        start_date += one_sec

        res = DirectMessagesFetcher(api, **{'start_date': start_date, 'end_date': end_date})
        statuses = list(res.fetch())
        self.assertEqual(len(statuses), 150)

        from solariat.utils.timeslot import parse_datetime
        self.assertTrue(all(start_date <= parse_datetime(s['created_at']) <= end_date for s in statuses))
Exemplo n.º 5
0
 def test_export_analytics(self):
     with mail.record_messages() as outbox:
         resp = self.do_export_request({
             'channel_id':
             str(self.channel.id),
             'from':
             self.created_at_str,
             'to': (self.created_at +
                    timedelta(hours=1)).strftime('%Y-%m-%d %H:%M:%S'),
             'topics': [{
                 'topic': 'laptop',
                 'topic_type': 'node'
             }],
             'intentions': [],
             'thresholds':
             dict(intention=.1),
             'statuses': ['actionable']
         })
         self.assertEqual(resp['message'],
                          ExportPostsView.SUCCESS_MSG_TPL % self.user.email)
         self.assertTrue({'id', 'input_filter_hash', 'state', 'created_at'}
                         < set(resp['task']), resp)
         msg = outbox[0]
         at = msg.attachments[0]
         self.assertTrue(at)
     self.assertEqual(DataExport.objects.count(), 1)
     export_item = DataExport.objects.get()
     self.assertEqual(export_item.state, DataExport.State.SUCCESS)
Exemplo n.º 6
0
    def setup_posts(self):
        self.created_at = now()
        self.created_at_str = self.created_at.strftime('%Y-%m-%d %H:%M:%S')

        base_tweet = self._create_tweet('parent tweet',
                                        _created=self.created_at -
                                        timedelta(milliseconds=1))
        n = 0
        for content in ('I need a bike. I like Honda.',
                        'Can somebody recommend a sturdy laptop?',
                        'I need an affordabl laptop. And a laptop bag',
                        'Whatever you buy, let it be an Apple laptop',
                        'I would like to have a thin and lightweight laptop.'):
            self._create_tweet(content=content,
                               _created=self.created_at +
                               timedelta(milliseconds=n),
                               in_reply_to=base_tweet)
            n += 1
Exemplo n.º 7
0
 def gen_id(padding=0, **kw):
     facebook_data = kw.pop('facebook_data', None)
     p_id = FacebookPost.gen_id(
         is_inbound=kw['is_inbound'],
         actor_id=kw['actor_id'],
         in_reply_to_native_id=facebook_data.get('in_reply_to_status_id')
         if facebook_data else None,
         _created=kw['_created'] + timedelta(milliseconds=padding))
     return p_id
Exemplo n.º 8
0
        def __getitem__(self, index):
            """ Returns :index data item.
                index=0 more recent data item,
                the bigger index, the older data.
            """

            id = self._last_id - index * self.id_inc
            created_at = self._to_date - timedelta(seconds=(index * self.created_at_inc_sec))
            return id, created_at
Exemplo n.º 9
0
        def __init__(self, init_id, data_len, from_date, to_date):
            self.data_len = data_len

            self._to_date = to_date
            self.created_at = to_date
            self.created_at_inc_sec = (to_date - from_date).total_seconds() / data_len
            self.created_at_inc = timedelta(seconds=self.created_at_inc_sec)

            self._last_id = init_id
            self.id = init_id
            self.id_inc = 11
Exemplo n.º 10
0
 def restore_settings(cls):
     cls.SEARCH_DATA_LENGTH = 500
     cls.TIMELINE_DATA_LENGTH = 500
     cls.DM_DATA_LENGTH = 500
     cls.DM_SENT_DATA_LENGTH = 500
     cls.ALL_DATA_LENGTH = sum([
         cls.SEARCH_DATA_LENGTH, cls.TIMELINE_DATA_LENGTH, cls.DM_DATA_LENGTH, cls.DM_SENT_DATA_LENGTH
     ])
     cls.CREATED_TO = now()
     cls.CREATED_FROM = cls.CREATED_TO - timedelta(days=5)
     cls.RAISE_EXCEPTION = False     # if True, return first response, fail on second
     cls.init_next_params()
Exemplo n.º 11
0
    def handle_rate_limit_error(self, error, path, failed_request_time, log_item):
        manager = FacebookRateLimitInfo.objects
        error_code = self._parse_fb_error_code(error)
        if error_code not in FB_RATE_LIMIT_ERRORS:
            return None

        last_rate_limit_info = manager.get_last_rate_limit_info(
            self.access_token, error_code, path)
        back_off_config = FacebookRateLimitInfo.LIMITS_CONFIG[error_code]

        if last_rate_limit_info:
            last_wait_time = last_rate_limit_info.wait_time
            wait_time = timedelta(seconds=min(
                back_off_config.end, last_wait_time * back_off_config.factor))
            if utc(last_rate_limit_info.failed_request_time + wait_time) > utc(failed_request_time):
                wait_until = last_rate_limit_info.failed_request_time + wait_time
            else:
                wait_until = timedelta(seconds=back_off_config.start) + failed_request_time
        else:
            wait_until = timedelta(seconds=back_off_config.start) + failed_request_time
        after = last_rate_limit_info and last_rate_limit_info.wait_until
        return manager.add_rate_limit_info(
            self.access_token, error_code, utc(failed_request_time), path,
            utc(wait_until), str(self._channel), after, log_item)
Exemplo n.º 12
0
    def test_twitter_post(self):
        created_at = TIMESLOT_EPOCH - timedelta(minutes=1)
        content = 'tweet content'
        status_id = fake_status_id()

        data = {
            'content': content,
            'channels': [self.channel],
            'user_profile': {
                'user_name': 'test_user'
            },
            'twitter': {
                'id': status_id,
                'created_at': str(created_at),
                'text': content
            }
        }
        post1 = self._create_db_post(**data)
        actor_id, creation_time = unpack_event_id(post1.id)
        self.assertEqual(post1.created_at.replace(microsecond=0),
                         now().replace(microsecond=0))
        self.assertTrue(
            now() - timedelta(seconds=1) <= creation_time <=
            now() + timedelta(seconds=1), creation_time)
Exemplo n.º 13
0
    def test_integer_id(self):
        def make_id_ts_left(time_slot, dummy):
            components = (
                (time_slot, 22),
                (dummy, 42),
            )
            id_ = pack_components(*components)
            return id_

        def make_id_ts_right(time_slot, dummy):
            components = ((dummy, 42), (time_slot, 22))
            id_ = pack_components(*components)
            return id_

        TimeSlotIntegerId.objects.coll.remove()
        from solariat.utils.timeslot import datetime_to_timeslot, parse_date_interval, timedelta, timeslot_to_datetime
        start_date, end_date = parse_date_interval('02/21/2013', '05/21/2013')
        step = timedelta(hours=24)
        dates = []
        while start_date < end_date:
            dates.append(start_date)
            start_date += step
        assert len(dates) == 90

        data = enumerate(dates[::-1], start=100)

        for dummy, date in data:
            dummy %= 5
            time_slot = datetime_to_timeslot(date)
            id_ = make_id_ts_left(time_slot, dummy)
            doc = TimeSlotIntegerId(id=id_, time_slot=time_slot, dummy=dummy)
            doc.save()
        #print list(TimeSlotIntegerId.objects.coll.find())

        #fetch interval
        start_date, end_date = parse_date_interval('03/21/2013', '04/21/2013')
        #start_dummy = 0
        #end_dummy = (1L << 41) - 1
        start_id = make_id_ts_left(datetime_to_timeslot(start_date, 'hour'), 0)
        end_id = make_id_ts_left(datetime_to_timeslot(end_date, 'hour'), 0)
        # print start_id.bit_length()
        # print end_id.bit_length()
        for doc in TimeSlotIntegerId.objects(id__gte=start_id, id__lte=end_id):
            print timeslot_to_datetime(doc.time_slot)
            self.assertGreaterEqual(doc.time_slot,
                                    datetime_to_timeslot(start_date, 'hour'))
            self.assertLessEqual(doc.time_slot,
                                 datetime_to_timeslot(end_date, 'hour'))
Exemplo n.º 14
0
    def test_conversation_recovery_throttled(self):
        from solariat.utils.timeslot import now, timedelta

        post_native_data, expected_conv_id = self.posts_native_data[2]
        fbs = FacebookServiceChannel.objects.create_by_user(self.user, title='FBS',
                                                            posts_tracking_enabled=True)

        post = self._create_db_post(post_native_data['_wrapped_data']['message'], channel=fbs,
                                    facebook=post_native_data)
        conv_id = fbs.get_conversation_id(post)
        self.assertEqual(conv_id, expected_conv_id)

        conv = Conversation.objects.get()
        self.login(user=self.user)

        requests = mock.MagicMock()
        params = {'channel': str(fbs.id),
                  'limit': 10,
                  'reserve_time': 30,
                  'mode': 'conversation',
                  'token': self.auth_token}

        def assert_requests_get_called_once():
            from solariat_bottle.settings import FBOT_URL, FB_DEFAULT_TOKEN
            url = FBOT_URL + '/json/restore-conversation?token=%s&conversation=%s' % (FB_DEFAULT_TOKEN, conv.id)

            requests.request.assert_called_once_with('get', url, verify=False, timeout=None)

        with mock.patch.dict('sys.modules', {'requests': requests}):
            response = self.client.get(get_api_url('queue/fetch'),
                                       data=json.dumps(params),
                                       content_type='application/json',
                                       base_url='https://localhost')
            assert_requests_get_called_once()
            self.assertFalse(conv.mark_corrupted())
            assert_requests_get_called_once()

            for update in [dict(unset__last_recovery_ts=True),
                           dict(last_recovery_ts=None),
                           dict(last_recovery_ts=now() - timedelta(hours=1))]:
                conv.update(**update)
                requests.request.reset_mock()
                self.assertTrue(conv.mark_corrupted())
                assert_requests_get_called_once()

                self.assertFalse(conv.mark_corrupted())
                assert_requests_get_called_once()
Exemplo n.º 15
0
    def last_online(self, reference):
        """Returns the last event for stream
        referenced by username or stream ref
        if it's not SUSPEND, otherwise None.

        If the last event is ONLINE or KEEP_ALIVE,
        then bot was probably killed with SIGKILL.
        """
        def event_tuple(item):
            if not item:
                return None
            return item['m'], item['u'], item['t'], item['s']

        last_event = None
        ref_id = self.get_ref_id(reference)
        query = {"t": {"$gt": now() - timedelta(hours=1)}, "u": ref_id}
        for item in self.events_coll.find(query).sort([("t", -1),
                                                       ("_id", -1)]).limit(1):
            if item['m'] != Events.EVENT_SUSPEND:
                last_event = item
        return event_tuple(last_event)
Exemplo n.º 16
0
    def test_queue_integration(self):
        """ Covers full integration from starting Subscriber,
            through TwitterTimelineRequest's fetchers, HistoricLoader
            until PostCreator.create_post().
        """
        from solariat_bottle.settings import LOGGER
        from solariat_bottle.db.channel.twitter import TwitterServiceChannel
        from solariat_bottle.db.historic_data import QueuedHistoricData
        from solariat_bottle.db.post.base import Post
        from solariat_bottle.daemons.twitter.historics.timeline_request import \
            DirectMessagesRequest, SentDirectMessagesRequest, SearchRequest, UserTimelineRequest
        from solariat_bottle.db.user_profiles.user_profile import UserProfile

        # reduce amount of data for long-running integration test
        FakeTwitterApi.SEARCH_DATA_LENGTH = 50
        FakeTwitterApi.TIMELINE_DATA_LENGTH = 50
        FakeTwitterApi.DM_DATA_LENGTH = 50
        FakeTwitterApi.DM_SENT_DATA_LENGTH = 50
        FakeTwitterApi.ALL_DATA_LENGTH = 200
        FakeTwitterApi.CREATED_FROM = FakeTwitterApi.CREATED_TO - timedelta(days=1)
        FakeTwitterApi.init_next_params()
        SearchRequest.SEARCH_LIMIT = 10
        UserTimelineRequest.FETCH_LIMIT = 20
        DirectMessagesRequest.DIRECT_MESSAGES_LIMIT = 20
        SentDirectMessagesRequest.DIRECT_MESSAGES_LIMIT = 20

        profile = UserProfile.objects.upsert('Twitter', profile_data=dict(user_name='jarvis', user_id='99188210'))
        channel = TwitterServiceChannel.objects.create_by_user(self.user, title='SC')
        channel.add_username(profile.user_name)
        channel.add_keyword(u'keywörd')

        def get_id_date_pair(post_data):
            if 'twitter' in post_data:
                post_data = post_data['twitter']
            return int(post_data['id']), post_data['created_at']

        fetched_data = []
        def _save_tweets(fn):
            def decorated(tweets, *args, **kwargs):
                LOGGER.debug('PUSH_POSTS, len:%s', len(tweets))
                fetched_data.extend([get_id_date_pair(t) for t in tweets])
                return fn(tweets, *args, **kwargs)
            return decorated

        queued_data = []
        def _save_queued_data(method):
            def _method(*args, **kwargs):
                queued_data[:] = [
                    get_id_date_pair(i.solariat_post_data) for i in
                    QueuedHistoricData.objects(subscription=subscription)
                ]
                LOGGER.debug('QUEUED_POSTS, len: %s', len(queued_data))
                self.assertTrue(len(queued_data) == FakeTwitterApi.ALL_DATA_LENGTH,
                                msg="len=%d %s" % (len(queued_data), queued_data))
                self.assertEqual(set(queued_data), set(fetched_data),
                                 msg=u"\nqueued =%s\nfetched=%s" % (queued_data, fetched_data))
                return method(*args, **kwargs)
            return _method

        subscription = TwitterRestHistoricalSubscription.objects.create(
            created_by=self.user,
            channel_id=channel.id,
            from_date=FakeTwitterApi.CREATED_FROM,
            to_date=FakeTwitterApi.CREATED_TO
        )
        subscriber = TwitterHistoricsSubscriber(subscription)
        subscriber.push_posts = _save_tweets(subscriber.push_posts)
        subscriber.historic_loader.load = _save_queued_data(subscriber.historic_loader.load)

        subscriber.start_historic_load()
        self.assertEqual(subscriber.get_status(), SUBSCRIPTION_FINISHED)

        self.assertEqual(Post.objects(channels__in=[
            subscription.channel.inbound,
            subscription.channel.outbound]).count(), FakeTwitterApi.ALL_DATA_LENGTH)

        SearchRequest.SEARCH_LIMIT = 100
        UserTimelineRequest.FETCH_LIMIT = 200
        DirectMessagesRequest.DIRECT_MESSAGES_LIMIT = 200
        SentDirectMessagesRequest.DIRECT_MESSAGES_LIMIT = 200
Exemplo n.º 17
0
    def event_stream(self, n=10, data_format='json', event_type='twitter'):
        from solariat_bottle.db.post.chat import ChatProfile

        templates = {
            'twitter': json.dumps({'content': 'i am a tweet',
                                   'user_profile': {'user_name': 'test'},
                                   'twitter': {'id': '%(id)s',
                                               'created_at': '%(now)s'}}),
            'facebook': json.dumps({'content': 'i am a tweet',
                                    'user_profile': {'user_name': 'test'},
                                    'facebook': {'facebook_post_id': '%(id)s',
                                                 '_wrapped_data': {'type': 'status',
                                                                   'source_type': 'status',
                                                                   'source_id': 'somepageid'},
                                                 'created_at': '%(now)s'}}),
            'web': json.dumps({'content': '',
                               '_platform': 'Web',
                               'url': 'g-tel.com',
                               'element_html': 'click'}),
            'faq': json.dumps({'content': '',
                               'query': 'How do I dispute an item in my bill?'}),
            'branch': json.dumps({'content': '',
                                  '_platform': 'Branch',
                                  'is_inbound': True}),
            'chat': json.dumps({'content': 'Hello. Can anyone tell me which iPhone is best for me?',
                                'chat_data': {'created_at': '%(now)s'}}),
            'voice': json.dumps({'content': 'What is my account balance?',
                                 'chat_data': {'created_at': '%(now)s'}}),
            'email': json.dumps({'content': 'Hi by email, what is my account balance?',
                                'email_data': {
                                    'cc': [],
                                    'sender': '*****@*****.**',
                                    'recipients': ['*****@*****.**'],
                                    'subject': 'Test',
                                    'created_at': '%(now)s',
                                }}),
            # TODO: NPSOutcome couldn't be created in general way
            # 'voc': json.dumps({'content': 'Good',
            #                    'score': 8,
            #                    'response_type': 'Passive',
            #                    'case_number': 'a1-1',
            #                    'user_profile': None,
            #                    '_created': '%(now)s'
            #                    }),
        }
        from six import StringIO
        from solariat_bottle.scripts.data_load.demo_helpers import CsvPrinter

        class JsonPrinter(object):
            def __init__(self, stream):
                self.stream = stream

            def write_data(self, data):
                self.stream.write(json.dumps(data))
                self.stream.write('\n')

        stream = StringIO()
        printer = {'json': JsonPrinter, 'csv': CsvPrinter}[data_format](stream)

        for idx in range(n):
            ctx = {'id': idx + 1e6, 'now': now() - timedelta(seconds=random.choice(xrange(100)))}
            printer.write_data(json.loads(templates[event_type] % ctx))

        stream.seek(0)
        return stream