class Gallery(Document): """ A gallery is dashboard_type specific, will contain collection of predifined widget models """ dashboard_type = fields.ReferenceField(DashboardType, required=True) widget_models = fields.ListField(fields.ReferenceField(WidgetModel)) created = fields.DateTimeField(default=datetime.now) def to_dict(self): rv = super(Gallery, self).to_dict() rv['display_name'] = self.dashboard_type.display_name rv['type'] = self.dashboard_type.type return rv
class ApplicationToken(Document): """ Non-expiring key which is tied in to a user which was granted an application """ STATUS_REQUESTED = 'requested' # State a token will be from the moment it was requested by a user # to the moment the request was accepted by a superuser STATUS_VALID = 'valid' # State a token will be since it was created/accepted by a superuser # until the moment it's specifically invalidated STATUS_INVALID = 'invalid' # State once token has been revoked by a superuser TYPE_BASIC = 'basic' # Cannot create account/users, only basic access (nlp endpoints) TYPE_ACCOUNT = 'account' # Can create one account, has admin level access on that account TYPE_CORPORATE = 'corporate' # an create multiple accounts, has staff level access across them manager = ApplicationTokenManager creator = fields.ReferenceField(User) status = fields.StringField( choices=[STATUS_INVALID, STATUS_VALID, STATUS_REQUESTED]) type = fields.StringField( choices=[TYPE_BASIC, TYPE_CORPORATE, TYPE_ACCOUNT]) app_key = fields.StringField() def validate(self): self.status = self.STATUS_VALID self.save() def invalidate(self): self.status = self.STATUS_INVALID self.save()
class TaskMessage(Document): ''' Internal Structure representing the integartion data structure with a data stream provider. ''' _created = fields.DateTimeField(db_field='ca', default=datetime.now()) content = fields.StringField(db_field='ct', required=True) type = fields.StringField(db_field='tp', required=True) user = fields.ListField(fields.ReferenceField(User)) manager = TaskMessageManager def add_item(self): ''' Increment counters''' self._update_item(1) def remove_item(self): ''' Decrement counters or remove if empty ''' if self.entry_count >= 2: self._update_item(-1) else: self.delete() def set_datasift_hash(self, datasift_hash): " set atomically datasift hash and update last_sync " return self.objects.coll.find_and_modify( query={'_id': self.id}, update={ '$set': { self.fields['datasift_hash'].db_field: datasift_hash, self.fields['last_sync'].db_field: datetime.now() } }, new=True)
class TextChannelFilterItem(Document): content = fields.StringField(db_field='ct') channel_filter = fields.ReferenceField(AuthTextClassifier, db_field='cr') vector = fields.DictField(db_field='vr') filter_type = fields.StringField(choices=['rejected', 'accepted'], default='rejected', db_field='fe')
class QueuedHistoricData(Document): DATASIFT_DEFAULT = 0 TWITTER_API_DM = 1 SOLARIAT_POST_DATA = 2 TWITTER_API_PUBLIC = 3 DATA_FORMATS = (DATASIFT_DEFAULT, TWITTER_API_DM, SOLARIAT_POST_DATA) subscription = fields.ReferenceField(BaseHistoricalSubscription, db_field='sub') timestamp = fields.NumField(db_field='tsp') post_data = fields.StringField(db_field='pd') post_data_format = fields.NumField(choices=DATA_FORMATS, default=DATASIFT_DEFAULT, db_field='fmt') indexes = [('subscription', 'timestamp')] @property def solariat_post_data(self): data = json.loads(self.post_data) transform = { self.SOLARIAT_POST_DATA: lambda x: x, self.DATASIFT_DEFAULT: datasift_to_post_dict, self.TWITTER_API_DM: twitter_dm_to_post_dict, self.TWITTER_API_PUBLIC: twitter_status_to_post_dict }[self.post_data_format] try: data = transform(data) except KeyError: data['_transform_error'] = True return data
class FAQDocumentInfo(ArchivingAuthDocument): collection = 'FAQDocInfo' channel = fields.ReferenceField('Channel', db_field='ch') _answer_df = fields.StringField() _query_df = fields.StringField() _stemmer = fields.StringField() query_count = fields.NumField() indexes = [('channel'), ] # indexes = [('channel'), (('_query_df', TEXT), None, 'english')] def __get_answer_df(self): return json.loads(self._answer_df) def __set_answer_df(self, answer_df): self._answer_df = json.dumps(answer_df) answer_df = property(__get_answer_df, __set_answer_df) def __get_query_df(self): return json.loads(self._query_df) def __set_query_df(self, query_df): self._query_df = json.dumps(query_df) query_df = property(__get_query_df, __set_query_df) def __get_stemmer(self): return json.loads(self._stemmer) def __set_stemmer(self, stemmer): self._stemmer = json.dumps(stemmer) stemmer = property(__get_stemmer, __set_stemmer)
class PostFilterEntryPassive(Document): entry = fields.StringField(db_field='kd') channels = fields.ListField(fields.ReferenceField('Channel'), db_field='cs') twitter_handles = fields.ListField(fields.StringField(), db_field='th') indexes = [Index(('entry'), unique=True)]
class VoicePost(ChatPost): manager = VoiceChatManager _parent_post = fields.ReferenceField('VoicePost', db_field='pp') PROFILE_CLASS = UserProfile @property def platform(self): return 'Voice'
class ChatChannel(Channel): review_outbound = fields.BooleanField(default=False, db_field='ro') review_team = fields.ReferenceField(Group, db_field='rg') @property def is_dispatchable(self): return True @property def platform(self): return 'Chat'
class PostFilterEntry(Document): entry = fields.StringField(db_field='kd') filter_type_id = fields.NumField(db_field='ee', choices=FILTER_TYPE_IDS) post_filter = fields.ReferenceField(PostFilter, db_field='pr') twitter_handles = fields.ListField(fields.StringField(), db_field='th') channels = fields.ListField(fields.ReferenceField('Channel'), db_field='cs') lang = fields.StringField(default=LangCode.EN) indexes = [('entry', 'channels', 'lang'), Index(('filter_type_id', 'entry', 'lang'), unique=True), ('channels', )] @property def filter_type(self): if self.filter_type_id is not None: return FILTER_TYPES[int(self.filter_type_id)]
class Funnel(AuthDocument): """ """ name = fields.StringField(required=True, unique=True) description = fields.StringField() journey_type = fields.ObjectIdField() steps = fields.ListField(fields.ObjectIdField(), required=True) owner = fields.ReferenceField(User) created = fields.DateTimeField(default=datetime.now) def to_dict(self, fields_to_show=None): rv = super(Funnel, self).to_dict() rv['steps'] = map(str, self.steps) return rv
class StreamRef(Document): QUEUED = 'queued' RUNNING = 'running' ERROR = 'error' STOPPED = 'stopped' STREAM_STATUSES = [QUEUED, RUNNING, ERROR, STOPPED] id = fields.BytesField(db_field='_id', unique=True, required=True) track = fields.ListField(fields.StringField()) follow = fields.ListField(fields.StringField()) # user_ids languages = fields.ListField(fields.StringField(), db_field='lng') status = fields.StringField(choices=STREAM_STATUSES) log = fields.ReferenceField('StreamLog') manager = StreamRefManager indexes = [('status', )] def is_stopped(self): return self.status == self.STOPPED or (self.log and self.log.stopped_at is not None) @property def key(self): if not self.id: footprint = self.filters self.id = mhash(footprint, n=128) return self.id @property def filters(self): return tuple( [freeze(self.track), freeze(self.follow), freeze(self.languages)]) def set_added(self): self.update(status=self.RUNNING) self.log.update(started_at=now()) def set_removed(self): self.update(status=self.STOPPED) self.log.update(stopped_at=now()) def save(self, **kw): self.id = self.key # fill hash id super(StreamRef, self).save(**kw)
class AuthToken(Document): """ Temporary key for user authentication """ manager = AuthTokenManager collection = 'authtoken' VALID_PERIOD = get_var('TOKEN_VALID_PERIOD', 24) # hours user = fields.ReferenceField(User) digest = fields.StringField(unique=True) app_key = fields.ObjectIdField(required=False) @property def is_valid(self): # return True if the token is not expired deadline = datetime.utcnow() - timedelta(hours=self.VALID_PERIOD) return deadline < self.created def to_dict(self): # Return dict for HTTP API return {'token': self.digest}
class DashboardWidget(Document): ''' Internal Structure representing the integartion data structure with a data stream provider. ''' created = fields.DateTimeField(db_field='c', default=datetime.now) settings = fields.DictField(db_field='s') order = fields.NumField(db_field='o') title = fields.StringField(db_field='t', required=True) user = fields.ReferenceField(User, db_field='u') dashboard_id = fields.ObjectIdField(required=True) manager = DashboardWidgetManager def to_dict(self): base_dict = dict(title=self.title, order=self.order, id=str(self.id), dashboard_id=str(self.dashboard_id)) base_dict.update(self.settings) return base_dict def copy_to(self, dashboard): new_widget_data = { 'title': self.title, 'user': dashboard.owner, 'dashboard_id': dashboard.id, } new_widget_data.update(self.settings) widget = DashboardWidget.objects.create_by_user(**new_widget_data) return widget def delete(self): dashboard = Dashboard.objects.get_by_user(self.user, id=self.dashboard_id) dashboard._remove_widget(self) super(DashboardWidget, self).delete() def __repr__(self): return "<DashboardWidget: %s; id: %s>" % (self.title, self.id)
class DashboardType(Document): collection = 'DashboardType' manager = DashboardTypeManager type = fields.StringField(required=True, unique=True) display_name = fields.StringField(required=True, unique=True) owner = fields.ReferenceField(User) created = fields.DateTimeField(default=datetime.now) def __repr__(self): return "<DashboardType: %s; id: %s>" % (self.display_name, self.id) def to_dict(self): rv = super(DashboardType, self).to_dict() if self.owner: rv['owner_name'] = '%s %s' % (self.owner.first_name or '', self.owner.last_name or '') rv['email'] = self.owner.email else: rv['owner_name'] = '' rv['email'] = '' return rv
class ChannelFilterItem(Document, ESMixin): ESCollection = ChannelFilterItemCollection() manager = ChannelFilterItemManager item_id = PostOrResponseId(db_field='it') channel_filter = fields.ReferenceField(ChannelFilter, db_field='cr') content = fields.StringField(db_field='ct') vector = fields.DictField(db_field='vr') filter_type = fields.StringField(choices=['rejected', 'starred'], default='rejected', db_field='fe') is_active = fields.BooleanField(default=True, db_field='ia') indexes = [Index(('channel_filter', 'item_id'), unique=True)] def to_dict(self): ''' Used for pushing to ES ''' d = super(ChannelFilterItem, self).to_dict() d['content'] = self.vector['content'] d['filter_type'] = str(self.filter_type) d['channel'] = str(self.channel_filter.channel.id) d['item_id'] = str(d['item_id']) return d
class PredictorModel(Document): collection = 'PredictorModel' allow_inheritance = True version = fields.NumField() predictor = fields.ReferenceField('BasePredictor') parent = fields.ObjectIdField() weight = fields.NumField() display_name = fields.StringField() description = fields.StringField() # is_active = fields.BooleanField(default=False) task_data = fields.EmbeddedDocumentField(TaskData) last_run = fields.DateTimeField() context_features = fields.ListField(fields.DictField()) action_features = fields.ListField(fields.DictField()) train_data_percentage = fields.NumField(default=80) n_rows = fields.NumField() min_samples_thresould = fields.NumField(default=1) from_dt = fields.DateTimeField() to_dt = fields.DateTimeField() def score(self, *args, **kwargs): pass def feedback(self, *args, **kwargs): pass def search(self, *args, **kwargs): pass def to_json(self, *args, **kwargs): from solariat_bottle.db.predictors.base_predictor import PredictorConfigurationConversion data = super(PredictorModel, self).to_json(*args, **kwargs) data = PredictorConfigurationConversion.python_to_json(data) return data
class FAQ(ArchivingAuthDocument): "This is the customer message we search for" collection = 'FAQ' allow_inheritance = True manager = FAQManager channel = fields.ReferenceField('Channel') question = fields.StringField(db_field='q') answer = fields.StringField(db_field='a') queries = fields.ListField(fields.StringField()) feedback = fields.ListField(fields.DictField(), db_field='f') packed_clf = fields.BinaryField(db_field='clf') is_active = fields.BooleanField(default=True, db_field='ia') indexes = [('channel'), ] admin_roles = [ADMIN, STAFF] relevance = 0 vector = {} @property def classifier_class(self): "So we can easily plugin other classifier classes if we want." return FilterClassifier @property def clf(self): def extract(s): return s if not hasattr(self, '_clf') or not self._clf: if self.packed_clf: self._clf = FilterClassifier(model=self.packed_clf) else: self._clf = FilterClassifier() self._clf.extract_features = extract self._clf.__vectorizer = HashingVectorizer(n_features=20, lowercase=True, binary=True) return self._clf def process_queries(self): for query in self.queries: self.train(query, True) def save(self, update_es=False): self.packed_clf = self.clf.packed_model super(FAQ, self).save() if update_es: self.put_to_es(refresh=True) def delete(self): super(FAQ, self).delete() # self.es_collection.delete(str(self.id)) # self.es_collection.index.refresh() DbBasedSE1(self.channel).compile_faqs() def train(self, query, is_relevant): assert is_relevant <= 1, "GOT FEEDBACK WITH SCORE " + str(is_relevant) self.feedback.append(dict(query=query, is_relevant=int(is_relevant))) query_vec = DbBasedSE1.translate(query) self.clf.train([query_vec], [is_relevant]) self.save() # try: # doc = self.es_collection.get(str(self.id))[1]['_source'] # except: # doc = self.make_index_entry() # # If relevant, extend the question_vector # if is_relevant: # doc['question_vector'] = [t for t in set(doc['question_vector']).union(set(query_vec))] # # Either way, append the feedback # doc['faq']['feedback'] = doc['faq']['feedback'] + [(query_vec, is_relevant)] # # self.es_collection.put(str(self.id), doc) # # self.es_collection.index.refresh() def retrain(self): queries = [] values = [] for entry in self.feedback: queries.append(entry[0]) values.append(int(entry[1])) clf = FilterClassifier() clf.train(queries, values) self._clf = clf self.save() def feedback_to_es(self): feedback = [] for feedback_entry in self.feedback: feedback.append((DbBasedSE1.translate(feedback_entry['query']), int(feedback_entry['is_relevant']))) def make_index_entry(self): return dict(id=str(self.id), question_vector=DbBasedSE1.translate(self.question), answer_vector=DbBasedSE1.translate(self.answer), feedback=self.feedback_to_es(), channels=[str(self.channel.id)], faq=self.to_dict()) def put_to_es(self, refresh=True): """Put an encoded form of the document to elastic search""" doc = self.make_index_entry() self.es_collection.put(str(self.id), doc) self.is_active = True self.save() if refresh: self.es_collection.index.refresh() def to_dict(self, fields_to_show=None): full_dict = super(FAQ, self).to_dict() full_dict.pop('packed_clf') return full_dict def deploy(self, refresh=False): self.put_to_es(refresh) def withdraw(self, refresh=False): if self.is_active: self.is_active = False self.save() self.es_collection.delete(str(self.id)) if refresh: self.es_collection.index.refresh()
class EnterpriseFacebookChannel(FacebookUserMixin, Channel): "channel with facebook specific information for daemon" # user access_token for EnterpriseFacebookChannel facebook_access_token = fields.StringField(db_field = 'fat') facebook_handle_id = fields.StringField(db_field = 'fid') facebook_screen_name = fields.StringField(db_field = 'fsn') user_access_token = fields.StringField(db_field = 'uat') # Keep track of all the page accounts this user has access to facebook_account_ids = fields.ListField(fields.StringField()) # monitored facebook pages facebook_page_ids = fields.ListField(fields.StringField()) tracked_fb_group_ids = fields.ListField(fields.StringField()) tracked_fb_event_ids = fields.ListField(fields.StringField()) review_outbound = fields.BooleanField(default=False, db_field='ro') review_team = fields.ReferenceField(Group, db_field='rg') is_inbound = fields.BooleanField(db_field='in', default=False) @property def is_authenticated(self): return self.facebook_access_token @property def type_id(self): return 2 @property def type_name(self): return "Enterprise Facebook" @property def base_url(self): return "https://facebook.com" @property def platform(self): return "Facebook" @property def is_dispatchable(self): return True @property def is_dispatch_channel(self): return True def get_attached_service_channels(self): candidates = FacebookServiceChannel.objects(account=self.account, _dispatch_channel=self)[:] return candidates def get_service_channel(self, lookup_by_page_ids=True): candidates = self.get_attached_service_channels() if not candidates and lookup_by_page_ids: # Fallback to lookup by token/page ids if self.facebook_access_token: candidates = FacebookServiceChannel.objects.find( account=self.account, facebook_access_token=self.facebook_access_token)[:] if not candidates: candidates = FacebookServiceChannel.objects.find( account=self.account, facebook_page_ids__in=self.facebook_page_ids)[:] if not candidates: return None if len(candidates) == 1: return candidates[0] else: LOGGER.error( "We have multiple candidates for service channel matching for enterprise channel %s" % self) return None if len(candidates) > 1: LOGGER.warn("We have multiple candidates for service channel matching " "for enterprise channel %s" % self) if candidates: return candidates[0] def send_message(self, dry_run, creative, post, user, direct_message=False): """ TODO: for now we always response publicly, will need to extend once we want to integrate private messages based on response type. """ if post.can_comment: post_id = post.native_data['facebook_post_id'] else: if post.parent: # This means we also have picked up the parent for the post. We just need # to issue a reply on that comment instead post_id = post.parent.native_data['facebook_post_id'] else: G = facebook_driver.GraphAPI(self.facebook_access_token, channel=self) comment = G.get_object(post.native_data['facebook_post_id'], fields='object.fields(id)') post_id = comment['parent']['id'] LOGGER.info("Sending '%s' to %s using %s" % (creative, post_id, self)) if post.is_pm: fb_answer_pm.ignore(post, creative) else: fb_comment_by_page.ignore(post, post_id, creative) def get_outbound_channel(self, user): return self
class Dashboard(AuthDocument): collection = 'Dashboard' manager = DashboardManager type_id = fields.ObjectIdField(required=True) title = fields.StringField(required=True) description = fields.StringField() owner = fields.ReferenceField(User) author = fields.ReferenceField(User) widgets = fields.ListField(fields.ObjectIdField()) shared_to = fields.ListField(fields.ObjectIdField()) filters = fields.DictField() created = fields.DateTimeField(default=datetime.now) admin_roles = {STAFF, ADMIN, REVIEWER, ANALYST} def to_dict(self, fields_to_show=None): rv = super(Dashboard, self).to_dict() rv['widgets'] = map(str, self.widgets) rv['shared_to'] = map(str, self.shared_to) rv['owner_name'] = '%s %s' % (self.owner.first_name or '', self.owner.last_name or '') rv['author_name'] = '%s %s' % (self.author.first_name or '', self.author.last_name or '') rv['owner_email'] = self.owner.email rv['author_email'] = self.author.email rv['account_id'] = str(self.owner.account.id) rv['type'] = DashboardType.objects.get(self.type_id).type return rv def __repr__(self): return "<Dashboard: %s; id: %s>" % (self.title, self.id) def _add_widget(self, widget): """ """ self.widgets.append(widget.id) self.save() def _remove_widget(self, widget): """ widget is not automatically deleted. To delete, use `.delete_widget()` instead. `widget.dashboard_id` will still point to this dashboard. """ self.widgets.remove(widget.id) self.save() def delete_widget(self, widget): if isinstance(widget, (basestring, fields.ObjectId)): widget = DashboardWidget.objects.get(widget) widget.delete() def delete(self): for widget_id in self.widgets: self.delete_widget(widget_id) super(Dashboard, self).delete_by_user(self.owner) def copy_to(self, user, title=None, description=None): dashboard_data = { 'type_id': self.type_id, 'title': title or self.title, 'description': description or self.description, 'author': self.owner, 'owner': user, 'widgets': [], 'shared_to': [], 'filters': self.filters, } # FIX: create_by_user is having role error dashboard = Dashboard.objects.create_by_user(user, **dashboard_data) #dashboard = Dashboard.objects.create(**dashboard_data) for widget_id in self.widgets: widget = DashboardWidget.objects.get(widget_id) widget.copy_to(dashboard) return dashboard
class BaseHistoricalSubscription(Document): collection = 'BaseHistoricalSubscription' allow_inheritance = True channel_id = fields.ObjectIdField(db_field='cid') status = fields.StringField(db_field='sts', choices=STATUS_CHOICES) from_date = fields.DateTimeField(db_field='fd') to_date = fields.DateTimeField(db_field='td') created_by = fields.ReferenceField('User', db_field='cb') indexes = [('channel_id', )] def __repr__(self): return "<Historic subscription: id:%s, status:%s, from_date:%s, to_date:%s>" % ( self.id, self.status, self.from_date, self.to_date) @cached_property def channel(self): return Channel.objects.find_one(self.channel_id) @cached_property def service_channel(self): from solariat_bottle.utils.post import get_service_channel if not self.channel: return None return get_service_channel(self.channel) @cached_property def outbound_channel(self): if not self.channel: return None try: return self.channel.get_outbound_channel(self.created_by) except AppException: return None def is_active(self): return self.status in STATUS_ACTIVE def get_progress(self): pass @property def is_stoppable(self): return False @property def is_resumable(self): return False def to_dict(self, fields2show=None): doc = super(BaseHistoricalSubscription, self).to_dict(fields2show) doc.update({ "type": self.__class__.__name__, "is_active": self.is_active(), "is_stoppable": self.is_stoppable, "progress": self.get_progress() }) return doc @classmethod def validate_recovery_range(cls, from_date, to_date): pass
class UserProfile(Document): collection = 'UserProfile' allow_inheritance = True _created = fields.DateTimeField(default=now) updated_at = fields.DateTimeField(default=now, db_field='ts') native_id = fields.StringField( db_field='ui', required=False) # Note: ui is a name for UserProfile.user_id field # All Channels this user has been engaged through engaged_channels = fields.ListField(fields.ReferenceField(Channel)) platform_data = fields.DictField(db_field='pd') actor_num = AutoIncrementField(counter_name="ActorCounter", db_field='ar') manager = UserProfileManager indexes = [('native_id', ), ('engaged_channels', )] def __init__(self, data=None, **kw): """For compatibility with untyped UserProfile. This constructor can be deleted once all profiles in the UserProfile collection have the type information in the _t field """ def _get_class_by_id(profile_id): from solariat_bottle.db.user_profiles.social_profile import DELIMITER, TwitterProfile, FacebookProfile pos = unicode(profile_id).rfind(DELIMITER) + 1 if pos == 0: return self.__class__ platform = None try: index = int(profile_id[pos:]) except ValueError: logger.info( u"Could not obtain platform from profile id: {}".format( profile_id)) else: platform = PLATFORM_BY_INDEX.get(index) class_ = { TwitterProfile.platform: TwitterProfile, FacebookProfile.platform: FacebookProfile }.get(platform, self.__class__) return class_ if data: profile_id = data.get('_id') else: profile_id = kw.get('id') if isinstance(profile_id, basestring): self.__class__ = _get_class_by_id(profile_id) super(UserProfile, self).__init__(data, **kw) @property def screen_name(self): return self.native_id @staticmethod def extract_native_id(data): assert isinstance(data, dict), u"%s is not dict" % repr(data) native_id = None if 'platform_data' in data: native_id = data['platform_data'].get('id') if not native_id: native_id = data.get('id', data.get('native_id')) if not native_id: native_id = ObjectId() return str(native_id) @property def created(self): return utc(self._created) @classmethod def anonymous_profile(cls, platform=None): data = {'id': 'anonymous'} return cls.objects.get_or_create(**data) @classmethod def non_existing_profile(cls): try: profile = cls.objects.get(native_id=NATIVE_REMOVED_PROFILE) except cls.DoesNotExist: profile = cls.objects.create(native_id=NATIVE_REMOVED_PROFILE) return profile def update_history(self, channel): if channel not in self.engaged_channels: self.engaged_channels.append(channel) self.save() def has_history(self, channel): service_channel = get_service_channel( channel) if channel and not channel.is_dispatch_channel else None return (channel and (channel in self.engaged_channels or service_channel in self.engaged_channels)) def get_conversations(self, user, channel=None): '''All conversations for this contact - subject to access controls''' from solariat_bottle.db.conversation import Conversation conv_list = sorted(Conversation.objects.find_by_user(user, contacts=self.id), key=lambda c: c.last_modified) if channel is not None: conv_list = [ conv for conv in conv_list if str(channel.id) in conv.channels ] return conv_list def to_dict(self, fields2show=None): doc = super(UserProfile, self).to_dict(fields2show) doc.update(_type=self.__class__.__name__, screen_name=self.screen_name) return doc
class DataExport(ArchivingAuthDocument): STATES = dict(CREATED=0, FETCHING=1, FETCHED=2, GENERATING=3, GENERATED=4, SENDING=5, SENT=6, SUCCESS=7, ERROR=8, CANCELLED=9) State = enum(**STATES) account = fields.ReferenceField('Account', db_field='at') created_by = fields.ReferenceField('User', db_field='ur') recipients = fields.ListField(fields.ReferenceField('User'), db_field='rs') recipient_emails = fields.ListField(fields.StringField(), db_field='rse') state = fields.NumField(choices=STATES.values(), default=State.CREATED, db_field='se') created_at = fields.DateTimeField(db_field='ct', default=now) _input_filter = fields.DictField(db_field='ir') input_filter_hash = fields.BytesField(db_field='irh') states_log = fields.ListField(fields.DictField(), db_field='sg') indexes = [('acl', 'input_filter_hash')] manager = DataExportManager def set_input_filter(self, data): self._input_filter = data self.input_filter_hash = hash_dict(data) input_filter = property(lambda self: self._input_filter, set_input_filter) def _log_state_change(self, from_state, to_state, extra_info): doc = {"from": from_state, "to": to_state, "ts": now()} if extra_info: doc["info"] = extra_info self.states_log.append(doc) return {"push__states_log": doc} def change_state(self, new_state, **kwargs): current_state = self.state assert \ new_state in {self.State.ERROR, self.State.CANCELLED} \ or new_state - current_state <= 2, \ "Cannot switch to state %s from state %s" % ( new_state, current_state) self.state = new_state update_dict = self._log_state_change(current_state, new_state, kwargs) update_dict.update(set__state=new_state) self.update(**update_dict) def to_json(self, fields_to_show=None): data = super(DataExport, self).to_json(fields_to_show=('id', 'input_filter_hash', 'state', 'created_at')) data['input_filter_hash'] = str(data['input_filter_hash']) return data def process(self, user, params=None): state = self.change_state S = DataExport.State initial_args = user, params pipeline = [(S.FETCHING, fetch_posts), (S.GENERATING, PostsCsvGenerator.generate_csv), (None, create_zip_attachments), (S.SENDING, DataExportMailer(self).send_email)] try: args = initial_args for step, command in pipeline: step and state(step) result = command(*args) if not isinstance(result, tuple): args = (result, ) else: args = result state(S.SUCCESS) except Exception as exc: state(S.ERROR, exception=unicode(exc)) raise exc
class ChatPost(Post): manager = ChatPostManager _parent_post = fields.ReferenceField('ChatPost', db_field='pp') session_id = fields.StringField() PROFILE_CLASS = ChatProfile @property def platform(self): return 'Chat' @property def parent(self): if self._parent_post == None: post = self._get_parent_post() if isinstance(post, UntrackedChatPost): return post self._parent_post = post return self._parent_post @property def conversation(self): from solariat_bottle.db.conversation import SessionBasedConversation try: res = SessionBasedConversation.objects.get( session_id=self.session_id) except SessionBasedConversation.DoesNotExist: res = None return res def _get_parent_post(self): """ Find the parent chat post of the current post """ conversation = self.conversation if not conversation: return None # We have no direct way to get a parent from a chat session. Just need to # iterate the posts in the conversation and return the previous post to this one candidates = sorted(conversation.query_posts()[:], key=lambda x: x.created_at) for candidate in reversed(candidates): if candidate.created_at < self.created_at: return candidate return None @property def view_url_link(self): return 'View Chat Message' @property def is_amplifier(self): return False def platform_specific_data(self, outbound_channel=None): """ Any post info that is specific only per platform goes here """ return {'has_location': self.has_location} @property def has_attachments(self): return False @property def has_location(self): chat_data = self._chat_data return chat_data.get('location', False) @property def parent_post_id(self): chat_data = self._chat_data parent_status_id = chat_data.get('in_reply_to_status_id', None) return parent_status_id @property def _chat_data(self): return self.extra_fields.get('chat', {}) def is_root_post(self): return not self.parent @property def _chat_created_at(self): return self._chat_data.get('created_at', None) def parse_created_at(self): return utc(parse_datetime( self._chat_created_at)) if self._chat_created_at else None def _set_url(self, url=None): if url is not None: self.url = url return def set_url(self, url=None): self._set_url(url) self.save() # @classmethod # def gen_id(cls, is_inbound, actor_id, _created, in_reply_to_native_id, parent_event=None): # actor_num = cls.get_actor(is_inbound, actor_id).actor_num # return pack_event_id(actor_num, _created) def to_dict(self, fields2show=None, include_summary=True): from solariat_bottle.db.predictors.multi_channel_smart_tag import EventTag base_dict = super(ChatPost, self).to_dict(fields2show=fields2show) if include_summary: conversation = self.conversation base_dict['summary'] = self.conversation.get_summary() return base_dict
class TwitterChannel(Channel): "Channel with twitter specific information for crawler" # monitored twitter users. # THIS SHOULD BE DEPRECATED. WILL NOT SCALE twitter_usernames = fields.ListField(fields.StringField()) # twitter credentials for replying access_token_key = fields.EncryptedField(fields.StringField(), allow_db_plain_text=True) access_token_secret = fields.EncryptedField(fields.StringField(), allow_db_plain_text=True) review_outbound = fields.BooleanField(default=False, db_field='ro') review_team = fields.ReferenceField(Group, db_field='rg') @property def is_authenticated(self): ''' Over-ride in derived classes. ''' return self.access_token_key and self.access_token_secret @property def type_id(self): return 8 @property def type_name(self): return "Twitter" @property def platform(self): return "Twitter" @property def is_dispatchable(self): return True def get_outbound_channel(self, user): return get_twitter_outbound_channel(user, self) def has_direct_messages(self, conversation, contact=None): """ If any post from this conversation that is not replied is a direct message, that this conversation still has open direct messages, and any reply will be as a direct message. :param conversation: the conversation we are checking for direct messages :param contact: if provided, will only check for open direct messages from this contact. """ posts = conversation.query_posts() for post in posts: if (post.get_post_status(self) == 'actual' and post.message_type == 'direct' and (contact is None or post.user_profile.user_name == contact)): return True return False def send_message(self, dry_run, creative, post, user, direct_message=None): # self.sync_contacts(post.user_profile) from solariat_bottle.tasks.twitter import tw_normal_reply, tw_direct_reply is_dm = False # Assume this is not a DM if direct_message is not None: # If we specifically passed the fact that we want a direct message, use DM # Otherwise decide based on post type is_dm = direct_message else: if post.message_type == 'direct': is_dm = True if not is_dm: status = "@%s %s" % (post.user_profile.user_name, creative) else: status = creative if len(status) > 140: msg = ( 'Sorry, you have exceeded your 140 character limit by %d characters. ' 'Please correct your reply and try again.') % (len(status) - 140) raise AppException(msg) status_id = post.native_id # Update the engagement history post.user_profile.update_history(self) LOGGER.debug("For current message, direct message flag is %s", is_dm) if not dry_run and not get_var('ON_TEST'): if is_dm: tw_direct_reply.ignore(self, status=status, screen_name=post.user_profile.user_name) else: tw_normal_reply.ignore(self, status=status, status_id=status_id, post=post) else: create_outbound_post(user, self, creative, post) LOGGER.debug("Sent '%s' to %s using %s", creative, post.user_profile.user_name, self.title) def share_post(self, post, user, dry_run=False): # self.sync_contacts(post.user_profile) from solariat_bottle.tasks.twitter import tw_share_post post_content = post.plaintext_content status_id = post.native_id if dry_run is False and not get_var('ON_TEST') and get_var( 'APP_MODE') == 'prod': tw_share_post.ignore(self, status_id=status_id, screen_name=post.user_profile.user_name) else: create_outbound_post(user, self, "RT: %s" % post_content, post) LOGGER.debug("Retweet '%s' using %s", post_content, self) def has_private_access(self, sender_handle, recipient_handle): return self._dm_access_check(recipient_handle) def _dm_access_check(self, twitter_handle): """ Check if a channels should actually have permissions to a direct message. :param sender_handle: the twitter handle for the sender of a message. :param recipient_handle: the twitter handle for the recipient of a message. """ if not self.account: LOGGER.info("Channel %s rejected because no account.", self.title) return False outbounds = self.account.get_outbounds_for_handle(twitter_handle) if not outbounds: LOGGER.info( "Channel %s rejected because no outbound channel is set for the handle %s." % (self.title, twitter_handle)) return False # Go through all the outbounds and see if there is any valid for this inbound channel for outbound_channel in outbounds: LOGGER.info("Validating %s against outbound %s.", self.title, outbound_channel.title) checks_passed = True if not outbound_channel: # There is no outbound channel configured for twitter. # At this point no channel from the account should have access. LOGGER.info( "Channel %s rejected because no outbound channel is set.", self.title) checks_passed = False if outbound_channel.status != 'Active' or outbound_channel.twitter_handle != twitter_handle: # Another reason why a channel might not have acces is if twitter # outbound channel is no longer active or if it's active for another # handle. LOGGER.info( "Expected active outbound: %s with handle %s but got: (%s, %s)", outbound_channel.title, twitter_handle, outbound_channel.status, outbound_channel.twitter_handle) checks_passed = False if not set(self.acl).intersection(set(outbound_channel.acl)): LOGGER.info("Channel %s rejected due to acl conflicts.", self.title) LOGGER.info("ACL for %s: %s, while ACL for %s: %s", self.title, set(self.acl), outbound_channel.title, set(outbound_channel.acl)) checks_passed = False if checks_passed: return True return False def get_service_channel(self): return None def list_outbound_channels(self, user): return EnterpriseTwitterChannel.objects.find_by_user( user, account=self.account, twitter_handle__in=self.usernames) def patch_user(self, user): from solariat_bottle.db.user_profiles.social_profile import TwitterProfile # TODO: [gsejop] Why do we need to create dummy Agent/UserProfiles # when User instance has no user_profile? up = TwitterProfile() up.save() AgentProfile = user.account.get_agent_profile_class() ap = AgentProfile(account_id=self.account.id) ap.save() #ap.add_profile(up) user.user_profile = up user.save()
class ChannelType(ArchivingAuthDocument): STATUSES = IN_SYNC, SYNCING, OUT_OF_SYNC = 'IN_SYNC', 'SYNCING', 'OUT_OF_SYNC' manager = ChannelTypeManager # from base.Channel account = fields.ReferenceField(Account, db_field='at', required=True) name = fields.StringField(required=True) description = fields.StringField() schema = fields.ListField(fields.DictField()) sync_status = fields.StringField(choices=STATUSES, default=IN_SYNC) is_locked = fields.BooleanField(default=False) mongo_collection = fields.StringField() created_at = fields.DateTimeField(default=now) updated_at = fields.DateTimeField(default=now) _channel_class = None @property def data_class_name(self): # keep classname unique system wide, to exclude collisions in MetaDocument.Registry[name] # when creating instance of Channel for different accounts return '%s%s' % (self.name.encode('utf8'), self.account.id) def get_channel_class(self): if self._channel_class is None: newclass = SchemaBased.create_data_class( self.data_class_name, self.schema, self.mongo_collection, inherit_from=DynamicEventsImporterChannel, _platform=self.name) self._channel_class = newclass return self._channel_class def update(self, *args, **kwargs): if 'schema' in kwargs and kwargs['schema'] != self.schema: self._channel_class = None from solariat.db.abstract import MetaDocument try: del MetaDocument.Registry[self.data_class_name] except: pass return super(ChannelType, self).update(*args, **kwargs) def apply_sync(self, user): if self.sync_status != self.OUT_OF_SYNC: raise ImproperStateError(self) self.update(sync_status=self.SYNCING) sync_errors = defaultdict(list) sync_coll = self.mongo_collection + 'Sync' + str(user.account.id) ChClass = self.get_channel_class() SyncClass = SchemaBased.create_data_class( self.data_class_name, self.schema, sync_coll, inherit_from=DynamicEventsImporterChannel, _platform=self.name) temp_coll = SyncClass.objects.coll bulk_insert = temp_coll.initialize_unordered_bulk_op() for doc in ChClass.objects.coll.find({'channel_type_id': self.id}): synced_data = {} for fname, field in ChClass.fields.iteritems(): val = doc.get(field.db_field) if val is None: continue synced_data[fname] = field.to_python(val) try: for col in self.schema: if KEY_EXPRESSION in col: continue col_name = col[KEY_NAME] val = doc.get(col_name) synced_data[col_name] = apply_shema_type( val, col[KEY_TYPE]) bulk_insert.insert(SyncClass(**synced_data).data) except Exception as ex: LOGGER.info('Sync error:\n\n %s', ex, exc_info=True) SchemaBased._put_sync_error(sync_errors, col_name, val, ex) if not sync_errors: try: bulk_insert.execute() except Exception as ex: LOGGER.info('Error inserting synced data %s', ex, exc_info=True) self.update(sync_status=self.OUT_OF_SYNC) temp_coll.drop() raise else: bulk_update = ChClass.objects.coll.initialize_unordered_bulk_op( ) for doc in temp_coll.find(): bulk_update.find({'_id': doc['_id']}).replace_one(doc) bulk_update.execute() temp_coll.drop() self.update(sync_status=self.IN_SYNC, updated_at=utc(now())) return {} self.update(sync_status=self.OUT_OF_SYNC) temp_coll.drop() return sync_errors
class ChannelFilter(Document): ''' Base channel filter class (abstract) ''' collection = 'ChannelFilter' allow_inheritance = True channel = fields.ReferenceField('Channel', db_field='cl') manager = ChannelFilterManager ''' Methods for channel classification on inbound and outbound actions ''' @property def accepted_items(self): raise AppException( 'unimplemented method, to be overrided in a subclass') @property def rejected_items(self): raise AppException( 'unimplemented method, to be overrided in a subclass') @property def requires_refresh(self): return False @property def inclusion_threshold(self): return self.channel.inclusion_threshold @property def exclusion_threshold(self): return self.channel.exclusion_threshold @property def reject_count(self): return 0 @property def accept_count(self): return 0 def _predict_fit(self, item): raise AppException( 'unimplemented method, to be overrided in a subclass') def handle_accept(self, item): raise AppException( 'unimplemented method, to be overrided in a subclass') def handle_reject(self, item): raise AppException( 'unimplemented method, to be overrided in a subclass') def retrain(self): ''' For forcing a retraining of the filter.''' pass def reset(self): ''' Remove all related items ''' pass def extract_features(self, item): ''' Return the list of features for the given item''' return []
class EmailPost(Post): manager = EmailPostManager _parent_post = fields.ReferenceField('EmailPost', db_field='pp') @property def platform(self): return 'Email' @property def parent(self): if self._parent_post == None: post = self._get_parent_post() # import ipdb; ipdb.set_trace() if isinstance(post, UntrackedEmailPost): return post self._parent_post = post return self._parent_post def _get_parent_post(self): parent_status_id = self.parent_post_id parent_post = None if parent_status_id: parent_post_id = make_id(parent_status_id) parent_post_id = parent_status_id try: #should be at most only one Post unless we search against user mentions parent_post = EmailPost.objects.get(id=parent_post_id) except Post.DoesNotExist: untracked_post = UntrackedEmailPost( id=parent_post_id, created_at=self.created_at - timedelta(minutes=1)) parent_post = untracked_post return parent_post @property def parent_post_id(self): email_data = self._email_data parent_status_id = email_data.get('in_reply_to_status_id', None) return parent_status_id @property def session_id(self): return self._email_data.get('session_id', None) @property def view_url_link(self): return 'View Email' @property def is_amplifier(self): return False @property def has_attachments(self): return False def platform_specific_data(self, outbound_channel=None): """ Any post info that is specific only per platform goes here """ return {'has_attachments': self.has_attachments} @property def _email_data(self): return self.extra_fields['email'] def is_root_post(self): return not self.parent @property def _email_created_at(self): return parse_datetime(self._email_data.get('created_at', None)) def parse_created_at(self): return self._email_created_at or None def _set_url(self, url=None): '''Utility to set the external url TODO: Adapt for chat''' if url is not None: self.url = url return def set_url(self, url=None): self._set_url(url) self.save()