示例#1
0
    def run(self):
        inp_queue = self.inp_queue
        start_time = time.time()

        while not self.stopped():
            # make sure we intercept all errors
            try:
                task = inp_queue.get()
                if task is self.QUIT or task == 'QUIT':
                    LOGGER.debug('received QUIT signal %s' % self)
                    break
                start_time = time.time()
                self._busy = True  # Just started doing our post processing
                post_fields = self.preprocess_post(task)
                if not post_fields:
                    LOGGER.warning('no post_fields in: %s', task)
                    continue

                # LOGGER.debug('creating post %r %s', post_fields.get('content'), inp_queue.qsize())

                if self.assign_channels(post_fields):
                    self.create_post(**post_fields)
                else:
                    LOGGER.info('skipping post %r' %
                                post_fields.get('content'))
                    self.inc_skipped()

                self._busy = False  # Just Finished doing our post processing
            except Exception, err:
                LOGGER.error(err, exc_info=True)
                pass

            finally:
示例#2
0
 def inner_method(self, *args, **kwargs):
     if not self.subscription.datasift_historic_id:
         LOGGER.warning(
             "Subscription %s does not have a historic id stored." %
             self.subscription)
         return False
     return method(self, *args, **kwargs)
示例#3
0
    def run(self):
        try:
            requests = list(self.gen_requests())
            for idx, request in enumerate(requests):
                if self.stopped():
                    break
                for tweets in request:
                    if self.stopped():
                        break
                    self.push_posts(tweets,
                                    post_data_format=request.post_data_format,
                                    post_data_dump=request.post_data_dump)
                    self.update_status(SUBSCRIPTION_RUNNING)

            self.historic_loader.load()

            if self.stopped():
                self.update_status(SUBSCRIPTION_STOPPED)
                # update status, then wait for creator's work done
                self.historic_loader.creator.join()
                self.aggregate_state(self.historic_loader, {'finished': True})
            else:
                self.update_status(SUBSCRIPTION_FINISHED)
        except TwitterApiRateLimitError as e:
            LOGGER.warning(
                'Waiting rate limits reset. Restart is needed in: %s sec.' %
                e.wait_for)
            self.update_status(SUBSCRIPTION_WAITING_LIMITS_RESET)
            raise
        except Exception:
            LOGGER.exception('Subscriber exception:', exc_info=True)
            self.update_status(SUBSCRIPTION_ERROR)
            raise
示例#4
0
    def post_authenticated(self, url, json=None, number_of_retries=None):
        assert self.options and self.options.username and self.options.password

        authtoken = None
        expired = None

        while True:
            if not authtoken:
                authtoken = self.get_authtoken(expired)
                expired = None
            auth_url = self.apply_token(url, json, authtoken)
            try:
                return self.post(auth_url, json=json)
            except ApplicationError as err:
                if str(err) == 'Auth token %s is expired' % authtoken:
                    LOGGER.info(err)
                    expired = authtoken
                    authtoken = None
                else:
                    LOGGER.exception(err)
                    break
            except UnauthorizedRequestError as err:
                LOGGER.warning(err, exc_info=True)
                expired = authtoken
                authtoken = None
            except InfrastructureError as err:
                LOGGER.exception(err)
                if number_of_retries is None:
                    time.sleep(self.sleep_timeout)
                elif isinstance(number_of_retries,
                                int) and number_of_retries > 0:
                    number_of_retries -= 1
                else:
                    break
示例#5
0
    def import_data(cls, user, channel, data_loader):
        from solariat_bottle.db.post.utils import factory_by_user
        from solariat_bottle.db.events.event_type import StaticEventType

        static_event_types_map = {et.id: et for et in StaticEventType.objects.find()}

        stats = {'total': 0, 'success': 0}
        for idx, raw_data in enumerate(data_loader.load_data()):
            if isinstance(raw_data, tuple):  # JsonDataLoader returns (event_type, data) tuples
                event_type_id, raw_data = raw_data
                # check we can import this event to this channel
                event_type = static_event_types_map.get(event_type_id) # TODO: wrong!
                if event_type and event_type.platform != channel.platform.lower():
                    continue

            stats['total'] += 1
            raw_data.pop('channel', None)
            raw_data.pop('channels', None)
            raw_data['channel'] = channel
            try:
                factory_by_user(user, **raw_data)
            except:
                LOGGER.warning("Cannot import post #%d %s", idx, raw_data, exc_info=True)
            else:
                stats['success'] += 1
        return stats
示例#6
0
    def adapt_result(channel):

        langs = []
        if isinstance(channel, MultilanguageChannelMixin):
            unprocessed_langs = set(channel.langs)|set(channel.post_langs)
            for lang in unprocessed_langs:
                if lang not in LANG_MAP:
                    warning = "Invalid language %s was configured on channel %s from account %s. Ignoring invalid language." % (
                        lang, channel.title, channel.account.name
                    )
                    LOGGER.warning(warning)
                else:
                    langs.append({'code': lang, 'title': LANG_MAP.get(lang), 'is_target': lang in channel.langs})

        result = dict(
            id          = str(channel.id),
            parent_id   = str(channel.parent_channel) if channel.parent_channel else None,
            title       = channel.title,
            type        = get_type(channel),
            is_compound = channel.is_compound,
            platform    = channel.platform,
            langs       = langs)

        if channel.is_dispatchable:
            result.update(dict(
                is_dispatchable = channel.is_dispatchable,
                user_in_review_team = channel.is_dispatchable and channel.review_outbound and (
                        user.is_superuser or user in channel.get_review_team().members)
            ))
        return result
示例#7
0
 def _func(*args, **kwargs):
     state = func(*args, **kwargs)
     if isinstance(state, dict):
         self.produce_state_update(state)
     else:
         LOGGER.warning('produced state must be a dict instance \
         got: %s\nargs:%s\nkwargs:%s', state, args, kwargs)
     return state
示例#8
0
    def check_unused_topics(self):
        with self.lock:
            registered_topics = {
                job.topic
                for job in self.registry.viewvalues()
            }

        unused = set(self.config.supported_topics) - registered_topics
        LOGGER.warning('There is unused supported topics: %s', unused)
示例#9
0
def reset_fbot_cache(channel):
    from solariat_bottle.tasks import async_requests
    import requests
    url = "%s?token=%s&channel=%s" % (settings.FBOT_URL + '/json/resetchannel',
                                      settings.FB_DEFAULT_TOKEN, channel)
    try:
        async_requests.ignore('get', url, verify=False, timeout=None)
    except requests.ConnectionError:
        LOGGER.warning('Cannot reset fbot channel cache: ', exc_info=True)
示例#10
0
 def post_received(self, post_field):
     """ Expose post_received functionality mainly for testing purposes. Could also use
      it for loading post data directly through bot in case of historics / load_data scripts """
     self.checker.inc_received()
     try:
         data = json.loads(str(post_field))
     except ValueError, err:
         LOGGER.warning(err)
         pass
示例#11
0
 def archive_value(self, value):
     """
     :param value: Current value from the database before the archiving
     :return: if a translation is possible based on the type of the value, translate into archived form
     """
     if type(value) in (unicode, str):
         return 'old.%s.%s' % (str(self.id), value)
     LOGGER.warning(
         "Archiving unique value %s on object %s. No archiving strategy." %
         (value, self))
     return value
示例#12
0
 def execute_pipeline(self, pipeline):
     """ Execute aggregation pipeline, handle any division by zero and return result """
     start_query_time = datetime.now()
     try:
         res = self.coll.aggregate(pipeline)
     except pymongo.errors.OperationFailure, ex:
         # do not log a warning if this is divide by zero error
         if not 'divide by zero' in str(ex):
             LOGGER.warning(
                 "Mongo operation failed with error: %s. Returning empty list.",
                 ex)
         res = {u'ok': 1.0, u'result': []}
示例#13
0
 def create_indexes(self):
     coll = self.data_coll
     # For bigger collections try and create indexes
     for key, value in self.cardinalities.iteritems():
         if 'count' in value and value[
                 'count'] < 12 or key == self.created_at_field:
             try:
                 coll.create_index(key)
             except OperationFailure, ex:
                 LOGGER.warning(
                     "Mongo operation failed while trying to create index: %s",
                     ex)
示例#14
0
 def create_indexes(self):
     coll = self.data_coll
     # For bigger collections try and create indexes
     if self.id_field:
         coll.create_index(self.id_field)
     for key, value in self.cardinalities.iteritems():
         try:
             coll.create_index(key)
         except OperationFailure, ex:
             LOGGER.warning(
                 "Mongo operation failed while trying to create index: %s",
                 ex)
示例#15
0
def create_post(user, sync=False, **kw):
    """ Creates a proper platform Post given a user and post components.
        Special args:
        sync - <bool:default=False> forces synchronous postprocessing
        skip_acl_check - <bool:default=False> creates post w/o checking acl permissions on parent channel (e.g. bots)
    """
    """ Creates a proper platform Post given a user and post components.

        Special args:

        sync - <bool:default=False> forces synchronous postprocessing
    """
    # Set user in thread local storage
    from solariat_bottle.db.user import set_user
    set_user(user)

    from solariat_bottle.db.post.utils import get_platform_class
    from solariat_bottle.utils.post import get_language
    from solariat_bottle.db.channel.base import Channel
    from solariat_bottle.utils.posts_tracking import log_state, PostState, get_post_natural_id

    log_state(kw.get('channel', kw.get('channels', None)),
              get_post_natural_id(kw), PostState.DELIVERED_TO_TANGO)

    post_lang = get_language(kw)
    if post_lang.support_level == Support.NO:
        logger.info("Detect message for unsupported language: %s" %
                    post_lang.lang)
        logger.info("Unsupported message value is: %s" % str(kw))
        return
    else:
        kw['lang'] = post_lang
    kw = normalize_post_params(user, kw)
    klass = get_platform_class(kw['_platform'], event_type=kw['event_type'])

    # we have channels resolved in normalize_post_params
    channels = kw['channels']
    accounts = set([ch.account for ch in channels])
    for account in accounts:
        if _check_account_volume(user, account):
            msg = u"Account '{} ({})' has reached its monthly volume threshold.".format(
                account.name, account.id)
            LOGGER.warning(msg)
        if _check_account_daily_volume(user, account):
            msg = u"Account '{} ({})' has reached its daily volume threshold.".format(
                account.name, account.id)
            LOGGER.warning(msg)

    return klass.objects.create_by_user(user,
                                        safe_create=True,
                                        sync=sync,
                                        **kw)
示例#16
0
    def _check_skipped(self):
        " check skipped counter "

        cur_time = time.time()
        interval = cur_time - self.last_skipped_check
        self.skipped_rate_per_second = self.skipped / (interval or 1.0)

        if interval >= self.SKIPPED_INTERVAL:
            if self.skipped > self.SKIPPED_THRESHOLD:
                LOGGER.warning('too many skipped: %d > %d', self.skipped,
                               self.SKIPPED_THRESHOLD)
            self.skipped = 0  # reset skipped counter
            self.last_skipped_check = cur_time  # start new skipped period
示例#17
0
 def push(self, post, channel_id):
     try:
         self.create(id=self.make_id(channel_id, post.id),
                     channel_id=channel_id,
                     created_at=datetime.utcnow(),
                     post_data=post.data,
                     reserved_until=datetime(1970, 1, 1))
     except DuplicateKeyError:
         LOGGER.warning("Trying to push post %s twice on channel %s" %
                        (post, channel_id))
     else:
         log_state(channel_id, post.native_id,
                   PostState.DELIVERED_TO_GSE_QUEUE)
示例#18
0
 def get_match(cls, profile):
     matches = []
     for label in cls.objects(account_id=profile.account_id):
         if label.match(profile):
             matches.append(label)
     if not matches:
         LOGGER.warning("Found no match for profile %s and class %s" %
                        (profile, cls))
         return None
     if len(matches) > 1:
         LOGGER.warning(
             "Found more than one match for profile %s and class %s" %
             (profile, cls))
     return matches[0]
示例#19
0
    def _is_inbound(self, account=None):
        if account is None:
            return None

        from solariat_bottle.db.account import Account
        assert isinstance(account, Account)
        account_channels = Channel.objects(id__in=self.channels, account=account)
        is_inbound = {channel.is_inbound for channel in account_channels}
        if len(is_inbound) == 0:
            return None
        if len(is_inbound) != 1:
            LOGGER.warning("Channel misconfiguration for account %s. "
                           "Event splitted between inbound and outbound channels." % account)
            return None
        return all(is_inbound)
示例#20
0
    def run(self):
        cmd_queue = self.cmd_queue
        cur_hash  = None
        ds_client = None

        while not self.stopped():
            # make sure we intercept all errors
            try:
                # react on commands simultaneously making a 10 sec pause 
                try:
                    cmd, arg = cmd_queue.get(block=True, timeout=10 if not get_var('ON_TEST') else 1)
                    LOGGER.debug('received %s command', cmd)
                    if cmd == 'CLIENT':
                        ds_client = arg
                        cur_hash  = None
                    elif cmd == 'QUIT':
                        break
                except Empty:
                    LOGGER.debug('timeout (it\'s okay)')
                    pass

                if ds_client is None:
                    continue

                if ds_client.terminated:
                    LOGGER.warning('ds_client is terminated')
                    ds_client = None
                    continue

                # get current datasift hash from the db
                ds_hash = get_datasift_hash()
                if not ds_hash:
                    continue

                # subscibe/unsubscribe if necessary
                if not cur_hash:
                    ds_client.subscribe(ds_hash)

                elif cur_hash != ds_hash:
                    ds_client.unsubscribe(cur_hash)
                    ds_client.subscribe(ds_hash)

                # remember the current hash
                cur_hash = ds_hash

            except Exception, err:
                LOGGER.error(err, exc_info=True)
                pass
示例#21
0
 def execute_search(self, query, limit):
     results = super(DbBasedSE1, self).execute_search(query, limit)
     valid_results = []
     for entry in results:
         try:
             FAQ.objects.get(entry['id'])
             valid_results.append(entry)
         except FAQ.DoesNotExist:
             LOGGER.warning("Removed FAQ stall entry from ES: " + str(entry['id']))
             self.collection.delete(str(entry['id']))
     # Removing normalization because it introduces strange biases into the data for
     # or agsint ES. It implicitly limits the impact of ES on the search.
     #max_score = max([r['relevance'] for r in valid_results])
     #for result in valid_results:
     #    result['relevance'] = result['relevance'] / max_score
     return valid_results
示例#22
0
    def execute_pipeline(self, match_query_base, match_query_filters, limit):
        # Edge case handling
        if limit == 0:
            return []

        pipeline = make_pipeline(match_query_base, match_query_filters, limit)
        res = self.coll.aggregate(pipeline)

        if res['ok']:
            fix_for_neg_value(res['result'], ['topic_count', 'term_count'], pipeline)
            return res['result']
        else:
            LOGGER.warning(
                'ChannelHotTopics pipeline %s failed with result %s', pipeline, res
            )
            return []
示例#23
0
    def verify_application_key(self, app_key):
        """
        For an input application key, validate that we have a valid application token in place.

        :param app_key: A string representing an application key
        :return: An `ApplicationToken` object is a valid one exists for this key, None otherwise
        """
        try:
            app_token = self.get(app_key=app_key)
            if app_token.status != ApplicationToken.STATUS_VALID:
                LOGGER.warning("App key %s is  no longer valid" % app_key)
                raise AuthError("App key %s is no longer valid" % app_key)
            return app_token
        except ApplicationToken.DoesNotExist:
            LOGGER.warning("Trying to use invalid api key %s" % app_key)
            raise AuthError("Trying to use invalid api key %s" % app_key)
示例#24
0
 def start_historic_load(self):
     """
     Start a datasift historic load. Take into account the status of the subscription on our side.
     """
     if self.subscription.status == SUBSCRIPTION_PENDING:
         LOGGER.warning("Starting a pending subscription.")
         self.start_subscription()
     elif self.subscription.status == SUBSCRIPTION_CREATED:
         LOGGER.info("Starting new datasift subscription.")
         self.__start_historic_load()
         LOGGER.info("Historic load started successfully.")
     else:
         LOGGER.warning(
             "This current subscription already has status: %s. Cannot start again."
             % self.subscription.status)
         return False
示例#25
0
    def run(self, task):
        # make sure we intercept all errors
        try:
            post_fields = self.preprocess_post(task)
            if not post_fields:
                LOGGER.warning('no post_fields in: %s', task)
                return

            if self.assign_channels(post_fields):
                self.create_post(**post_fields)
            else:
                LOGGER.info('skipping post %r' % post_fields.get('content'))

        except Exception, err:
            LOGGER.error(err, exc_info=True)
            pass
示例#26
0
def get_twitter_post_users(post):
    """Returns conversation parties from the DM or public tweet"""
    user_ids, user_screen_names, recipient_screen_names = [], [], []

    if 'twitter' not in post:
        # compatibility for tests not passing a fully formatted tweet to the get_tracked_channels
        if 'user_profile' in post and isinstance(post['user_profile'], dict):
            user_ids.append(post['user_profile']['user_id'])
            user_screen_names.append(post['user_profile']['user_name'])
        return user_ids, user_screen_names, recipient_screen_names

    tweet_json = post['twitter']
    is_direct_message = 'recipient' in tweet_json and 'sender' in tweet_json

    try:
        if is_direct_message:
            user_ids.append(tweet_json['sender']['id_str'])
            user_screen_names.append(tweet_json['sender']['screen_name'])
            recipient_screen_names.append(
                tweet_json['recipient']['screen_name'])
        else:
            # all mentions
            if 'entities' in tweet_json and 'user_mentions' in tweet_json[
                    'entities']:
                for user_data in tweet_json['entities']['user_mentions']:
                    recipient_screen_names.append(user_data['screen_name'])
            # sender
            if 'user' in tweet_json:
                user_ids.append(tweet_json['user']['id_str'])
                user_screen_names.append(tweet_json['user']['screen_name'])
            elif 'user_profile' in post:
                user_ids.append(post['user_profile']['user_id'])
                user_screen_names.append(post['user_profile']['user_name'])
            # recipient
            if 'in_reply_to_screen_name' in tweet_json and tweet_json[
                    'in_reply_to_screen_name']:
                recipient_screen_names.append(
                    tweet_json['in_reply_to_screen_name'])
    except KeyError:
        LOGGER.warning("Malformed post data {}".format(post), exc_info=True)

    all_str = lambda iterable: all(isinstance(x, basestring) for x in iterable)
    assert all_str(user_ids), user_ids
    assert all_str(user_screen_names), user_screen_names
    assert all_str(recipient_screen_names), recipient_screen_names
    return user_ids, user_screen_names, recipient_screen_names
示例#27
0
    def upsert(self, platform, profile_data):
        ProfileCls = self.profile_class(platform)

        if isinstance(profile_data, ProfileCls):
            profile_object = profile_data
        elif isinstance(profile_data, (bytes, ObjectId)):
            profile_object = ProfileCls.objects.find_one(profile_data)
        elif isinstance(profile_data, dict):
            query, update = ProfileCls.objects.extract_upsert_data(
                ProfileCls, profile_data)
            profile_object = self.upsert_with_retry(ProfileCls, query, update)
        else:
            logger.warning("Failed to upsert UserProfile, "
                           "returning anonymous profile\nupsert("
                           "{}, {})".format(platform, profile_data))
            profile_object = ProfileCls.anonymous_profile()
        return profile_object
示例#28
0
def fb_put_comment(channel, object_id, message):
    """put comment to some object"""
    # func = lambda: get_facebook_api(channel).put_comment(object_id, message.encode('utf-8', 'replace'))
    # return __try_execute_safely(func, 5, 3600)

    from solariat_bottle.settings import LOGGER
    from solariat_bottle.tasks.exceptions import FacebookCommunicationException
    from solariat_bottle.db.post.facebook import FacebookPost
    try:
        fb_post = FacebookPost.objects.get(_native_id=str(object_id))
    except FacebookPost.DoesNotExist:
        LOGGER.warning(
            "No mapping post for native_id=%s was found. Defaulting to posting comment as user."
            % object_id)
    else:
        try:
            return fb_comment_by_page(fb_post, object_id, message)
        except (FacebookCommunicationException,
                facebook_driver.facebook.GraphAPIError) as exc:
            if '#1705' in str(
                    exc
            ):  # GraphAPIError: (#1705) There was an error posting to this wall
                LOGGER.info("Failed sending comment to post %s with error %s" %
                            (object_id, str(exc)))
                if fb_post.is_second_level_reply:
                    try:
                        object_id = fb_post.wrapped_data['parent_id']
                    except KeyError:
                        LOGGER.error("Can't find parent for comment %s" %
                                     fb_post)
                        raise exc
                    LOGGER.info(
                        "Sending comment to parent %s of initial post %s %s" %
                        (object_id, fb_post, fb_post.native_id))
                    return fb_comment_by_page(fb_post, object_id, message)
            raise exc

    try:
        return get_facebook_api(channel).put_comment(
            object_id, force_bytes(message, 'utf-8', 'replace'))
    except Exception, ex:
        LOGGER.error(
            "Failure posting comment to facebook. Exc: %s,  Channel: %s, Object_id: %s, Message: %s"
            % (ex, channel, object_id, message))
        raise FacebookCommunicationException(ex.message)
示例#29
0
    def start_historic_load(self):
        running_subscriptions = FacebookHistoricalSubscription.objects.find(
            status=SUBSCRIPTION_RUNNING, channel_id=str(self.channel.id))
        if len(running_subscriptions) > 0:
            LOGGER.warning(
                "Only single running subscription available for a channel at single moment"
            )
            return False

        if self.subscription.status == SUBSCRIPTION_CREATED:
            LOGGER.info("Starting new facebook subscription.")
            self._load_history()
            LOGGER.info("Historic load finished successfully.")
        else:
            LOGGER.warning(
                "This current subscription already has status: %s. Cannot start again."
                % self.subscription.status)
            return False
示例#30
0
    def get_user_tracking_channel(self):

        if self.twitter_handle is None:
            return None

        # candidates = UserTrackingChannel.objects.find(usernames__in=get_sync_usernames_list([self.twitter_handle]),
        #                                               account=self.account)[:]

        # case-insensitive lookup for service channel
        from solariat_bottle.db.tracking import PostFilterEntry, TrackingNLP

        usernames_list = get_sync_usernames_list([self.twitter_handle])
        usernames_list = map(TrackingNLP.normalize_kwd, usernames_list)
        candidate_channel_ids = set()
        for pfe in PostFilterEntry.objects.coll.find(
            {PostFilterEntry.F.entry: {
                '$in': usernames_list
            }},
                fields=[PostFilterEntry.F.channels]):
            chs = pfe[PostFilterEntry.F.channels]
            if not isinstance(chs, (list, tuple)):
                chs = [chs]
            for ch in chs:
                if hasattr(ch, 'id'):
                    candidate_channel_ids.add(ch.id)
                else:
                    candidate_channel_ids.add(ch)

        candidates = UserTrackingChannel.objects(id__in=candidate_channel_ids,
                                                 account=self.account)[:]
        if candidates:
            if len(candidates) == 1:
                return candidates[0]
            else:
                LOGGER.warning(
                    "We have multiple candidates for service channel matching for enterprise channel %s"
                    % self)
                return None
        LOGGER.warning(
            "No service channel candidates were found for outbound channel %s. "
            "Some outbound channel filtering might not work.", self.title)
        return None