class NumberSequences(AuthDocument): name = StringField(required=True, unique=True) _next = NumField(required=True) indexes = [("name", )] def __repr__(self): return '<%s: "%s", %s>' % (self.__class__.__name__, self.name, self._next) @classmethod def advance(cls, seq_name, first=1): """ Returns the next number in the <seq_name> sequence advancing it forward Example: num = NumberSequences.advance('channels') """ doc = cls.objects.coll.find_and_modify({'name': seq_name}, {'$inc': { '_next': 1 }}, upsert=True, new=True) return doc['_next']
class FacebookTracking(Document): """Holds tracked facebook page and event ids. Used for facebook channels lookup.""" object_id = StringField() object_type = NumField(choices=[PAGE, EVENT]) channels = ListField(ReferenceField(Channel)) manager = FacebookTrackingManager indexes = ['channels', 'object_id']
class ResponseTag(Document): response_id = StringField( db_field='r_id', required=True ) # This will be the same as the response so we can quickly get them channel = ReferenceField(Channel, db_field='cl') post = ObjectIdField(db_field='pt', required=True) tag = ObjectIdField(db_field='tc', required=True) assignee = ObjectIdField(db_field='ur') post_date = NumField(db_field='ts', required=True) assignment_expires_at = DateTimeField(db_field='ae') status = StringField(db_field='ss', default='pending') intention_name = StringField(db_field='in') skipped_list = ListField(ObjectIdField(), db_field='sl') intention_confidence = NumField(db_field='ic', default=0.0) punks = ListField(StringField(), db_field='ps') starred = ListField(ObjectIdField(), db_field='sd') message_type = NumField(db_field='mtp', default=0) relevance = NumField(db_field='re', default=0.0) actionability = NumField(db_field='ay', default=0.0) indexes = [('response_id'), ('tag')]
class Transactional(object): version = NumField(db_field='_v') def upsert(self): _v = Transactional.version.db_field find_query = self._query find_query.pop(_v, None) def get_current_version(): doc = self.objects.coll.find_one({"_id": find_query["_id"]}) if doc: version = doc[_v] else: version = 1 if get_var('_TEST_TRANSACTION_FAILURE'): time.sleep(1) return version if hasattr(self, '_upsert_data'): update_query = self._upsert_data if "$inc" in update_query: #if there are other $inc queries - add version as another value update_query["$inc"][_v] = 1 else: update_query["$inc"] = {_v: 1} else: update_query = {"$inc": {_v: 1}} tries_counter = 15 #must be >= number of simultaneous processes while tries_counter: version = get_current_version() find_query[_v] = version LOGGER.error("Tries count %s", tries_counter) try: self.objects.coll.update(find_query, update_query, upsert=True, w=1) #safe=True except DuplicateKeyError, e: #log error LOGGER.error("%s\nfind query=%s\nupdate query=%s", e, find_query, update_query) time.sleep(0.5) except Exception, e: LOGGER.error("Exception: %s", e) else:
class ChannelHotTopics(ChannelTopicsBase): ''' Each document tracks specific topic/term stats during a specific timeslot (only days and months are being track, not hours). The main purpose of this collection is to keep track of the most frequently occuring topics and terms (terms being unigrams, bigrams and trigrams of topics). ''' manager = ChannelHotTopicsManager channel_num = NumField(db_field='cl', required=True) topic = StringField(db_field='tc', required=True) hashed_parents = ListField(NumField(), db_field='hp', required=True) # hashed parent topics <[int]> (they always have one word fewer) status = NumField(db_field='ss', required=True) embedded_stats = ListField(EmbeddedDocumentField('ExtendedEmbeddedStats'), db_field='es') indexes = [ ('channel_num', 'time_slot', 'status', 'hashed_parents'), ('gc_counter') ] def __init__(self, data=None, **kwargs): if data is None: self.channel = kwargs.pop('channel', None) if self.channel: kwargs['channel_num'] = self.channel.counter else: assert 'channel_num' in kwargs, 'Channel object or channel number must be provided' from solariat_bottle.db.channel.base import Channel self.channel = Channel.objects.get(counter=kwargs['channel_num']) super(ChannelHotTopics, self).__init__(data, **kwargs) def compute_increments(self, is_leaf=True, intention_id=None, agent=None, lang_id=None, n=1): """ Compute requred increments to embeded stats for this stat instance. """ update_dict = {'topic_count': n} self.update_embedded_stats(intention_id, is_leaf, agent, lang_id, update_dict) @classmethod def increment(cls, channel=None, time_slot=None, topic=None, status=None, intention_id=None, is_leaf=True, agent=None, lang_id=None, n=1): """Deprecated """ assert channel is not None and intention_id is not None \ and topic is not None and time_slot is not None, vars() hashed_parents = map(get_topic_hash, get_subtopics(topic)) #channel_num = get_channel_num(channel) stat = cls(channel=channel, time_slot=time_slot, topic=topic, status=status, hashed_parents=hashed_parents) stat.compute_increments(is_leaf, intention_id, agent, lang_id, n) stat.upsert() return stat def __repr__(self): return '<%s: id=%s channel=%s, topic=%s, hashed_parents=%s, time_slot=%s>' % ( self.__class__.__name__, self.id, self.channel_num, self.topic, self.hashed_parents, self.time_slot) @property def datetime(self): return timeslot_to_datetime(self.time_slot) @property def level(self): _, level = decode_timeslot(self.time_slot) return level def to_dict(self): return dict(id=self.id, channel_num=self.channel_num, topic=self.topic, hashed_parents=self.hashed_parents, time_slot=self.time_slot, status=self.status)
class SpeechActMap(Document): """ Efficient structure to allow searching by intention and also to provide an efficient sharding key that distributes well and also optimizes query on a single shard because the shard key will usually be part of the query. """ # Packed speech-act-map-id (bit-string): pack_speech_act_map_id, unpack_speech_act_map_id id = BytesField(db_field='_id', unique=True, required=True) channel = ObjectIdField(db_field='cl', required=True) post = EventIdField(db_field='pt', required=True) idx = NumField(db_field='ix', required=True) agent = NumField(db_field='at', default=0) language = NumField(db_field='le', default=Lang.EN) intention_type_conf = NumField(db_field='ic', required=True) intention_type_id = NumField(db_field='ii', required=True) time_slot = NumField(db_field='ts', required=True) created_at = DateTimeField(db_field='ca', required=True) topic_tuples = ListField(DictField(), db_field='tt') message_type = NumField(db_field='mtp') # --- status constants --- # Defining the status values and encodings from channel POTENTIAL = 0 # Note sure about the fit between post and channel ACTIONABLE = 1 # Confident of fit between post and channel REJECTED = 2 # Confident of the lack of fit beteen post and channel ACTUAL = 3 # Special extension of actionable for posts that were confirmed by reply # Define the assignment modes and their mappings to status. These reflect how # the link between post and stats was set. Sometimes it is predicted, and some # times it is inferred directly from a user action STATUS_MAP = { 'potential': POTENTIAL, # Predicted 'assigned': POTENTIAL, # Predicted 'rejected': REJECTED, # Given 'discarded': REJECTED, # Predicted 'actionable': ACTIONABLE, # Predicted 'starred': ACTIONABLE, # Given 'accepted': ACTIONABLE, # Given 'highlighted': ACTIONABLE, # Predicted 'replied': ACTUAL, # Given 'actual': ACTUAL, # Given } # Reverse lookup to a display name by status code STATUS_NAME_MAP = { POTENTIAL: "potential", ACTIONABLE: "actionable", REJECTED: "rejected", ACTUAL: "actual" } # ASSIGNED IF ANY ONE OF THESE! ASSIGNED = { 'actionable', 'starred', 'accepted', 'highlighted', 'replied', 'actual' } PREDICTED = { 'potential', 'assigned', 'discarded', 'actionable', 'highlighted' } # LOOKUP Constants to support agent based access NO_AGENT = -1 ANY_AGENT = 0 indexes = [('post', )] def to_dict(self): return dict(id=self.id, channel=self.channel, intention_type_conf=self.intention_type_conf, intention_type_id=self.intention_type_id, time_slot=self.time_slot, post_id=self.post, topics=self.topics, content=self.content, agent=self.agent, language=self.language, status=self.status, message_type=self.message_type) @property def created(self): return timeslot_to_datetime(self.time_slot) @property def status(self): return self.unpacked[1] @property def post_obj(self): if not hasattr(self, '_post'): from solariat_bottle.db.post.base import Post self._post = Post.objects.get(id=self.post) return self._post @property def topics(self): '''Extract out the topics''' return [t['t'] for t in self.topic_tuples] @property def content(self): return self.post_obj.speech_acts[self.idx]['content'] @property def unpacked(self): return unpack_speech_act_map_id(self.id) @classmethod def reset(cls, post, channels, agent=None, reset_outbound=False, action='update'): ''' Clears and set keys for all the given channels. Used when assignment between post and channel changes, or when assignment between post and agent changes. First removes all the old keys, and then geneates new ones. We do not bother updating existing documents. ''' if channels == [] or channels == None: raise AppException( "Oh no! There are no channels provided for synchronizing keys. " "This should never happen. Please ask support to have a look at your data." ) # Remove Old Speech Act Keys sams = [] agents_by_channel = {} for chan in channels: # Initialize agent mapping agents_by_channel[get_channel_id(chan)] = cls.ANY_AGENT # Now generate all possible ids for all status values for status in set(cls.STATUS_MAP.values()): sams.extend(make_objects(chan, post, post.speech_acts, status)) # Now, retrieve the speech act data for agent wherever it exists so we do not # lose it. for sam in cls.objects(id__in=[sam.id for sam in sams]): # Retrieve actual setting if available agents_by_channel[get_channel_id(sam.channel)] = sam.agent # Nuke the old values. We reset them. Shard key must be immuatble so cannot just # change the status value. cls.objects.remove(id__in=[sam.id for sam in sams]) if action == 'remove': return [] # Generate New Speech Act Keys sams = [] for chan in channels: # Skip regeneration of keys if this is for a smart tag and it is no longer # accepted..... if chan.is_smart_tag and chan not in post.accepted_smart_tags: continue status = cls.STATUS_MAP[post.get_assignment(chan)] old_agent = agents_by_channel[get_channel_id(chan)] sams.extend( make_objects(chan, post, post.speech_acts, status, agent or old_agent)) for sam in sams: try: sam.save() except DuplicateKeyError: LOGGER.error( "There is already an speech act with the same ID = %s.", sam.id) return sams
class Doc(Document, Transactional): field1 = NumField()
class Transactional(object): version = NumField()
class Child(Base): field1 = NumField()
class Base(Document): version = NumField(db_field='v')
class TimeSlotIntegerId(Document): id = NumField(db_field='_id', unique=True, required=True) time_slot = NumField() dummy = NumField()
class IdEntity(Document): id = BytesField(db_field='_id', unique=True, required=True) count = NumField()