def run(self): inp_queue = self.inp_queue start_time = time.time() while not self.stopped(): # make sure we intercept all errors try: task = inp_queue.get() if task is self.QUIT or task == 'QUIT': LOGGER.debug('received QUIT signal %s' % self) break start_time = time.time() self._busy = True # Just started doing our post processing post_fields = self.preprocess_post(task) if not post_fields: LOGGER.warning('no post_fields in: %s', task) continue # LOGGER.debug('creating post %r %s', post_fields.get('content'), inp_queue.qsize()) if self.assign_channels(post_fields): self.create_post(**post_fields) else: LOGGER.info('skipping post %r' % post_fields.get('content')) self.inc_skipped() self._busy = False # Just Finished doing our post processing except Exception, err: LOGGER.error(err, exc_info=True) pass finally:
def inner_method(self, *args, **kwargs): if not self.subscription.datasift_historic_id: LOGGER.warning( "Subscription %s does not have a historic id stored." % self.subscription) return False return method(self, *args, **kwargs)
def run(self): try: requests = list(self.gen_requests()) for idx, request in enumerate(requests): if self.stopped(): break for tweets in request: if self.stopped(): break self.push_posts(tweets, post_data_format=request.post_data_format, post_data_dump=request.post_data_dump) self.update_status(SUBSCRIPTION_RUNNING) self.historic_loader.load() if self.stopped(): self.update_status(SUBSCRIPTION_STOPPED) # update status, then wait for creator's work done self.historic_loader.creator.join() self.aggregate_state(self.historic_loader, {'finished': True}) else: self.update_status(SUBSCRIPTION_FINISHED) except TwitterApiRateLimitError as e: LOGGER.warning( 'Waiting rate limits reset. Restart is needed in: %s sec.' % e.wait_for) self.update_status(SUBSCRIPTION_WAITING_LIMITS_RESET) raise except Exception: LOGGER.exception('Subscriber exception:', exc_info=True) self.update_status(SUBSCRIPTION_ERROR) raise
def post_authenticated(self, url, json=None, number_of_retries=None): assert self.options and self.options.username and self.options.password authtoken = None expired = None while True: if not authtoken: authtoken = self.get_authtoken(expired) expired = None auth_url = self.apply_token(url, json, authtoken) try: return self.post(auth_url, json=json) except ApplicationError as err: if str(err) == 'Auth token %s is expired' % authtoken: LOGGER.info(err) expired = authtoken authtoken = None else: LOGGER.exception(err) break except UnauthorizedRequestError as err: LOGGER.warning(err, exc_info=True) expired = authtoken authtoken = None except InfrastructureError as err: LOGGER.exception(err) if number_of_retries is None: time.sleep(self.sleep_timeout) elif isinstance(number_of_retries, int) and number_of_retries > 0: number_of_retries -= 1 else: break
def import_data(cls, user, channel, data_loader): from solariat_bottle.db.post.utils import factory_by_user from solariat_bottle.db.events.event_type import StaticEventType static_event_types_map = {et.id: et for et in StaticEventType.objects.find()} stats = {'total': 0, 'success': 0} for idx, raw_data in enumerate(data_loader.load_data()): if isinstance(raw_data, tuple): # JsonDataLoader returns (event_type, data) tuples event_type_id, raw_data = raw_data # check we can import this event to this channel event_type = static_event_types_map.get(event_type_id) # TODO: wrong! if event_type and event_type.platform != channel.platform.lower(): continue stats['total'] += 1 raw_data.pop('channel', None) raw_data.pop('channels', None) raw_data['channel'] = channel try: factory_by_user(user, **raw_data) except: LOGGER.warning("Cannot import post #%d %s", idx, raw_data, exc_info=True) else: stats['success'] += 1 return stats
def adapt_result(channel): langs = [] if isinstance(channel, MultilanguageChannelMixin): unprocessed_langs = set(channel.langs)|set(channel.post_langs) for lang in unprocessed_langs: if lang not in LANG_MAP: warning = "Invalid language %s was configured on channel %s from account %s. Ignoring invalid language." % ( lang, channel.title, channel.account.name ) LOGGER.warning(warning) else: langs.append({'code': lang, 'title': LANG_MAP.get(lang), 'is_target': lang in channel.langs}) result = dict( id = str(channel.id), parent_id = str(channel.parent_channel) if channel.parent_channel else None, title = channel.title, type = get_type(channel), is_compound = channel.is_compound, platform = channel.platform, langs = langs) if channel.is_dispatchable: result.update(dict( is_dispatchable = channel.is_dispatchable, user_in_review_team = channel.is_dispatchable and channel.review_outbound and ( user.is_superuser or user in channel.get_review_team().members) )) return result
def _func(*args, **kwargs): state = func(*args, **kwargs) if isinstance(state, dict): self.produce_state_update(state) else: LOGGER.warning('produced state must be a dict instance \ got: %s\nargs:%s\nkwargs:%s', state, args, kwargs) return state
def check_unused_topics(self): with self.lock: registered_topics = { job.topic for job in self.registry.viewvalues() } unused = set(self.config.supported_topics) - registered_topics LOGGER.warning('There is unused supported topics: %s', unused)
def reset_fbot_cache(channel): from solariat_bottle.tasks import async_requests import requests url = "%s?token=%s&channel=%s" % (settings.FBOT_URL + '/json/resetchannel', settings.FB_DEFAULT_TOKEN, channel) try: async_requests.ignore('get', url, verify=False, timeout=None) except requests.ConnectionError: LOGGER.warning('Cannot reset fbot channel cache: ', exc_info=True)
def post_received(self, post_field): """ Expose post_received functionality mainly for testing purposes. Could also use it for loading post data directly through bot in case of historics / load_data scripts """ self.checker.inc_received() try: data = json.loads(str(post_field)) except ValueError, err: LOGGER.warning(err) pass
def archive_value(self, value): """ :param value: Current value from the database before the archiving :return: if a translation is possible based on the type of the value, translate into archived form """ if type(value) in (unicode, str): return 'old.%s.%s' % (str(self.id), value) LOGGER.warning( "Archiving unique value %s on object %s. No archiving strategy." % (value, self)) return value
def execute_pipeline(self, pipeline): """ Execute aggregation pipeline, handle any division by zero and return result """ start_query_time = datetime.now() try: res = self.coll.aggregate(pipeline) except pymongo.errors.OperationFailure, ex: # do not log a warning if this is divide by zero error if not 'divide by zero' in str(ex): LOGGER.warning( "Mongo operation failed with error: %s. Returning empty list.", ex) res = {u'ok': 1.0, u'result': []}
def create_indexes(self): coll = self.data_coll # For bigger collections try and create indexes for key, value in self.cardinalities.iteritems(): if 'count' in value and value[ 'count'] < 12 or key == self.created_at_field: try: coll.create_index(key) except OperationFailure, ex: LOGGER.warning( "Mongo operation failed while trying to create index: %s", ex)
def create_indexes(self): coll = self.data_coll # For bigger collections try and create indexes if self.id_field: coll.create_index(self.id_field) for key, value in self.cardinalities.iteritems(): try: coll.create_index(key) except OperationFailure, ex: LOGGER.warning( "Mongo operation failed while trying to create index: %s", ex)
def create_post(user, sync=False, **kw): """ Creates a proper platform Post given a user and post components. Special args: sync - <bool:default=False> forces synchronous postprocessing skip_acl_check - <bool:default=False> creates post w/o checking acl permissions on parent channel (e.g. bots) """ """ Creates a proper platform Post given a user and post components. Special args: sync - <bool:default=False> forces synchronous postprocessing """ # Set user in thread local storage from solariat_bottle.db.user import set_user set_user(user) from solariat_bottle.db.post.utils import get_platform_class from solariat_bottle.utils.post import get_language from solariat_bottle.db.channel.base import Channel from solariat_bottle.utils.posts_tracking import log_state, PostState, get_post_natural_id log_state(kw.get('channel', kw.get('channels', None)), get_post_natural_id(kw), PostState.DELIVERED_TO_TANGO) post_lang = get_language(kw) if post_lang.support_level == Support.NO: logger.info("Detect message for unsupported language: %s" % post_lang.lang) logger.info("Unsupported message value is: %s" % str(kw)) return else: kw['lang'] = post_lang kw = normalize_post_params(user, kw) klass = get_platform_class(kw['_platform'], event_type=kw['event_type']) # we have channels resolved in normalize_post_params channels = kw['channels'] accounts = set([ch.account for ch in channels]) for account in accounts: if _check_account_volume(user, account): msg = u"Account '{} ({})' has reached its monthly volume threshold.".format( account.name, account.id) LOGGER.warning(msg) if _check_account_daily_volume(user, account): msg = u"Account '{} ({})' has reached its daily volume threshold.".format( account.name, account.id) LOGGER.warning(msg) return klass.objects.create_by_user(user, safe_create=True, sync=sync, **kw)
def _check_skipped(self): " check skipped counter " cur_time = time.time() interval = cur_time - self.last_skipped_check self.skipped_rate_per_second = self.skipped / (interval or 1.0) if interval >= self.SKIPPED_INTERVAL: if self.skipped > self.SKIPPED_THRESHOLD: LOGGER.warning('too many skipped: %d > %d', self.skipped, self.SKIPPED_THRESHOLD) self.skipped = 0 # reset skipped counter self.last_skipped_check = cur_time # start new skipped period
def push(self, post, channel_id): try: self.create(id=self.make_id(channel_id, post.id), channel_id=channel_id, created_at=datetime.utcnow(), post_data=post.data, reserved_until=datetime(1970, 1, 1)) except DuplicateKeyError: LOGGER.warning("Trying to push post %s twice on channel %s" % (post, channel_id)) else: log_state(channel_id, post.native_id, PostState.DELIVERED_TO_GSE_QUEUE)
def get_match(cls, profile): matches = [] for label in cls.objects(account_id=profile.account_id): if label.match(profile): matches.append(label) if not matches: LOGGER.warning("Found no match for profile %s and class %s" % (profile, cls)) return None if len(matches) > 1: LOGGER.warning( "Found more than one match for profile %s and class %s" % (profile, cls)) return matches[0]
def _is_inbound(self, account=None): if account is None: return None from solariat_bottle.db.account import Account assert isinstance(account, Account) account_channels = Channel.objects(id__in=self.channels, account=account) is_inbound = {channel.is_inbound for channel in account_channels} if len(is_inbound) == 0: return None if len(is_inbound) != 1: LOGGER.warning("Channel misconfiguration for account %s. " "Event splitted between inbound and outbound channels." % account) return None return all(is_inbound)
def run(self): cmd_queue = self.cmd_queue cur_hash = None ds_client = None while not self.stopped(): # make sure we intercept all errors try: # react on commands simultaneously making a 10 sec pause try: cmd, arg = cmd_queue.get(block=True, timeout=10 if not get_var('ON_TEST') else 1) LOGGER.debug('received %s command', cmd) if cmd == 'CLIENT': ds_client = arg cur_hash = None elif cmd == 'QUIT': break except Empty: LOGGER.debug('timeout (it\'s okay)') pass if ds_client is None: continue if ds_client.terminated: LOGGER.warning('ds_client is terminated') ds_client = None continue # get current datasift hash from the db ds_hash = get_datasift_hash() if not ds_hash: continue # subscibe/unsubscribe if necessary if not cur_hash: ds_client.subscribe(ds_hash) elif cur_hash != ds_hash: ds_client.unsubscribe(cur_hash) ds_client.subscribe(ds_hash) # remember the current hash cur_hash = ds_hash except Exception, err: LOGGER.error(err, exc_info=True) pass
def execute_search(self, query, limit): results = super(DbBasedSE1, self).execute_search(query, limit) valid_results = [] for entry in results: try: FAQ.objects.get(entry['id']) valid_results.append(entry) except FAQ.DoesNotExist: LOGGER.warning("Removed FAQ stall entry from ES: " + str(entry['id'])) self.collection.delete(str(entry['id'])) # Removing normalization because it introduces strange biases into the data for # or agsint ES. It implicitly limits the impact of ES on the search. #max_score = max([r['relevance'] for r in valid_results]) #for result in valid_results: # result['relevance'] = result['relevance'] / max_score return valid_results
def execute_pipeline(self, match_query_base, match_query_filters, limit): # Edge case handling if limit == 0: return [] pipeline = make_pipeline(match_query_base, match_query_filters, limit) res = self.coll.aggregate(pipeline) if res['ok']: fix_for_neg_value(res['result'], ['topic_count', 'term_count'], pipeline) return res['result'] else: LOGGER.warning( 'ChannelHotTopics pipeline %s failed with result %s', pipeline, res ) return []
def verify_application_key(self, app_key): """ For an input application key, validate that we have a valid application token in place. :param app_key: A string representing an application key :return: An `ApplicationToken` object is a valid one exists for this key, None otherwise """ try: app_token = self.get(app_key=app_key) if app_token.status != ApplicationToken.STATUS_VALID: LOGGER.warning("App key %s is no longer valid" % app_key) raise AuthError("App key %s is no longer valid" % app_key) return app_token except ApplicationToken.DoesNotExist: LOGGER.warning("Trying to use invalid api key %s" % app_key) raise AuthError("Trying to use invalid api key %s" % app_key)
def start_historic_load(self): """ Start a datasift historic load. Take into account the status of the subscription on our side. """ if self.subscription.status == SUBSCRIPTION_PENDING: LOGGER.warning("Starting a pending subscription.") self.start_subscription() elif self.subscription.status == SUBSCRIPTION_CREATED: LOGGER.info("Starting new datasift subscription.") self.__start_historic_load() LOGGER.info("Historic load started successfully.") else: LOGGER.warning( "This current subscription already has status: %s. Cannot start again." % self.subscription.status) return False
def run(self, task): # make sure we intercept all errors try: post_fields = self.preprocess_post(task) if not post_fields: LOGGER.warning('no post_fields in: %s', task) return if self.assign_channels(post_fields): self.create_post(**post_fields) else: LOGGER.info('skipping post %r' % post_fields.get('content')) except Exception, err: LOGGER.error(err, exc_info=True) pass
def get_twitter_post_users(post): """Returns conversation parties from the DM or public tweet""" user_ids, user_screen_names, recipient_screen_names = [], [], [] if 'twitter' not in post: # compatibility for tests not passing a fully formatted tweet to the get_tracked_channels if 'user_profile' in post and isinstance(post['user_profile'], dict): user_ids.append(post['user_profile']['user_id']) user_screen_names.append(post['user_profile']['user_name']) return user_ids, user_screen_names, recipient_screen_names tweet_json = post['twitter'] is_direct_message = 'recipient' in tweet_json and 'sender' in tweet_json try: if is_direct_message: user_ids.append(tweet_json['sender']['id_str']) user_screen_names.append(tweet_json['sender']['screen_name']) recipient_screen_names.append( tweet_json['recipient']['screen_name']) else: # all mentions if 'entities' in tweet_json and 'user_mentions' in tweet_json[ 'entities']: for user_data in tweet_json['entities']['user_mentions']: recipient_screen_names.append(user_data['screen_name']) # sender if 'user' in tweet_json: user_ids.append(tweet_json['user']['id_str']) user_screen_names.append(tweet_json['user']['screen_name']) elif 'user_profile' in post: user_ids.append(post['user_profile']['user_id']) user_screen_names.append(post['user_profile']['user_name']) # recipient if 'in_reply_to_screen_name' in tweet_json and tweet_json[ 'in_reply_to_screen_name']: recipient_screen_names.append( tweet_json['in_reply_to_screen_name']) except KeyError: LOGGER.warning("Malformed post data {}".format(post), exc_info=True) all_str = lambda iterable: all(isinstance(x, basestring) for x in iterable) assert all_str(user_ids), user_ids assert all_str(user_screen_names), user_screen_names assert all_str(recipient_screen_names), recipient_screen_names return user_ids, user_screen_names, recipient_screen_names
def upsert(self, platform, profile_data): ProfileCls = self.profile_class(platform) if isinstance(profile_data, ProfileCls): profile_object = profile_data elif isinstance(profile_data, (bytes, ObjectId)): profile_object = ProfileCls.objects.find_one(profile_data) elif isinstance(profile_data, dict): query, update = ProfileCls.objects.extract_upsert_data( ProfileCls, profile_data) profile_object = self.upsert_with_retry(ProfileCls, query, update) else: logger.warning("Failed to upsert UserProfile, " "returning anonymous profile\nupsert(" "{}, {})".format(platform, profile_data)) profile_object = ProfileCls.anonymous_profile() return profile_object
def fb_put_comment(channel, object_id, message): """put comment to some object""" # func = lambda: get_facebook_api(channel).put_comment(object_id, message.encode('utf-8', 'replace')) # return __try_execute_safely(func, 5, 3600) from solariat_bottle.settings import LOGGER from solariat_bottle.tasks.exceptions import FacebookCommunicationException from solariat_bottle.db.post.facebook import FacebookPost try: fb_post = FacebookPost.objects.get(_native_id=str(object_id)) except FacebookPost.DoesNotExist: LOGGER.warning( "No mapping post for native_id=%s was found. Defaulting to posting comment as user." % object_id) else: try: return fb_comment_by_page(fb_post, object_id, message) except (FacebookCommunicationException, facebook_driver.facebook.GraphAPIError) as exc: if '#1705' in str( exc ): # GraphAPIError: (#1705) There was an error posting to this wall LOGGER.info("Failed sending comment to post %s with error %s" % (object_id, str(exc))) if fb_post.is_second_level_reply: try: object_id = fb_post.wrapped_data['parent_id'] except KeyError: LOGGER.error("Can't find parent for comment %s" % fb_post) raise exc LOGGER.info( "Sending comment to parent %s of initial post %s %s" % (object_id, fb_post, fb_post.native_id)) return fb_comment_by_page(fb_post, object_id, message) raise exc try: return get_facebook_api(channel).put_comment( object_id, force_bytes(message, 'utf-8', 'replace')) except Exception, ex: LOGGER.error( "Failure posting comment to facebook. Exc: %s, Channel: %s, Object_id: %s, Message: %s" % (ex, channel, object_id, message)) raise FacebookCommunicationException(ex.message)
def start_historic_load(self): running_subscriptions = FacebookHistoricalSubscription.objects.find( status=SUBSCRIPTION_RUNNING, channel_id=str(self.channel.id)) if len(running_subscriptions) > 0: LOGGER.warning( "Only single running subscription available for a channel at single moment" ) return False if self.subscription.status == SUBSCRIPTION_CREATED: LOGGER.info("Starting new facebook subscription.") self._load_history() LOGGER.info("Historic load finished successfully.") else: LOGGER.warning( "This current subscription already has status: %s. Cannot start again." % self.subscription.status) return False
def get_user_tracking_channel(self): if self.twitter_handle is None: return None # candidates = UserTrackingChannel.objects.find(usernames__in=get_sync_usernames_list([self.twitter_handle]), # account=self.account)[:] # case-insensitive lookup for service channel from solariat_bottle.db.tracking import PostFilterEntry, TrackingNLP usernames_list = get_sync_usernames_list([self.twitter_handle]) usernames_list = map(TrackingNLP.normalize_kwd, usernames_list) candidate_channel_ids = set() for pfe in PostFilterEntry.objects.coll.find( {PostFilterEntry.F.entry: { '$in': usernames_list }}, fields=[PostFilterEntry.F.channels]): chs = pfe[PostFilterEntry.F.channels] if not isinstance(chs, (list, tuple)): chs = [chs] for ch in chs: if hasattr(ch, 'id'): candidate_channel_ids.add(ch.id) else: candidate_channel_ids.add(ch) candidates = UserTrackingChannel.objects(id__in=candidate_channel_ids, account=self.account)[:] if candidates: if len(candidates) == 1: return candidates[0] else: LOGGER.warning( "We have multiple candidates for service channel matching for enterprise channel %s" % self) return None LOGGER.warning( "No service channel candidates were found for outbound channel %s. " "Some outbound channel filtering might not work.", self.title) return None