Пример #1
0
class NumberSequences(AuthDocument):
    name = StringField(required=True, unique=True)
    _next = NumField(required=True)

    indexes = [("name", )]

    def __repr__(self):
        return '<%s: "%s", %s>' % (self.__class__.__name__, self.name,
                                   self._next)

    @classmethod
    def advance(cls, seq_name, first=1):
        """ Returns the next number in the <seq_name> sequence
            advancing it forward

            Example:
            num = NumberSequences.advance('channels')
        """
        doc = cls.objects.coll.find_and_modify({'name': seq_name},
                                               {'$inc': {
                                                   '_next': 1
                                               }},
                                               upsert=True,
                                               new=True)
        return doc['_next']
Пример #2
0
class FacebookTracking(Document):
    """Holds tracked facebook page and event ids. Used for facebook channels lookup."""
    object_id = StringField()
    object_type = NumField(choices=[PAGE, EVENT])
    channels = ListField(ReferenceField(Channel))

    manager = FacebookTrackingManager
    indexes = ['channels', 'object_id']
Пример #3
0
class ResponseTag(Document):

    response_id = StringField(
        db_field='r_id', required=True
    )  # This will be the same as the response so we can quickly get them
    channel = ReferenceField(Channel, db_field='cl')
    post = ObjectIdField(db_field='pt', required=True)
    tag = ObjectIdField(db_field='tc', required=True)
    assignee = ObjectIdField(db_field='ur')
    post_date = NumField(db_field='ts', required=True)
    assignment_expires_at = DateTimeField(db_field='ae')
    status = StringField(db_field='ss', default='pending')
    intention_name = StringField(db_field='in')
    skipped_list = ListField(ObjectIdField(), db_field='sl')
    intention_confidence = NumField(db_field='ic', default=0.0)
    punks = ListField(StringField(), db_field='ps')
    starred = ListField(ObjectIdField(), db_field='sd')
    message_type = NumField(db_field='mtp', default=0)
    relevance = NumField(db_field='re', default=0.0)
    actionability = NumField(db_field='ay', default=0.0)

    indexes = [('response_id'), ('tag')]
Пример #4
0
class Transactional(object):
    version = NumField(db_field='_v')

    def upsert(self):
        _v = Transactional.version.db_field
        find_query = self._query
        find_query.pop(_v, None)

        def get_current_version():
            doc = self.objects.coll.find_one({"_id": find_query["_id"]})

            if doc:
                version = doc[_v]
            else:
                version = 1

            if get_var('_TEST_TRANSACTION_FAILURE'):
                time.sleep(1)
            return version


        if hasattr(self, '_upsert_data'):
            update_query = self._upsert_data
            if "$inc" in update_query:
                #if there are other $inc queries - add version as another value
                update_query["$inc"][_v] = 1
            else:
                update_query["$inc"] = {_v: 1}
        else:
            update_query = {"$inc": {_v: 1}}

        tries_counter = 15  #must be >= number of simultaneous processes
        while tries_counter:
            version = get_current_version()
            find_query[_v] = version

            LOGGER.error("Tries count %s", tries_counter)
            try:
                self.objects.coll.update(find_query, update_query, upsert=True, w=1)  #safe=True
            except DuplicateKeyError, e:
                #log error
                LOGGER.error("%s\nfind query=%s\nupdate query=%s", e, find_query, update_query)
                time.sleep(0.5)
            except Exception, e:
                LOGGER.error("Exception: %s", e)
            else:
Пример #5
0
class ChannelHotTopics(ChannelTopicsBase):
    ''' Each document tracks specific topic/term stats during a specific timeslot
        (only days and months are being track, not hours).

        The main purpose of this collection is to keep track of the most frequently
        occuring topics and terms (terms being unigrams, bigrams and trigrams of topics).
    '''
    manager = ChannelHotTopicsManager

    channel_num    = NumField(db_field='cl', required=True)
    topic          = StringField(db_field='tc', required=True)
    hashed_parents = ListField(NumField(), db_field='hp', required=True)  # hashed parent topics <[int]> (they always have one word fewer)
    status         = NumField(db_field='ss', required=True)
    embedded_stats = ListField(EmbeddedDocumentField('ExtendedEmbeddedStats'), db_field='es')

    indexes =      [ ('channel_num', 'time_slot', 'status', 'hashed_parents'), ('gc_counter') ]

    def __init__(self, data=None, **kwargs):
        if data is None:
            self.channel = kwargs.pop('channel', None)
            if self.channel:
                kwargs['channel_num'] = self.channel.counter
            else:
                assert 'channel_num' in kwargs, 'Channel object or channel number must be provided'
                from solariat_bottle.db.channel.base import Channel
                self.channel = Channel.objects.get(counter=kwargs['channel_num'])
        super(ChannelHotTopics, self).__init__(data, **kwargs)

    def compute_increments(self, is_leaf=True, intention_id=None, agent=None, lang_id=None, n=1):
        """ Compute requred increments to embeded stats for this stat instance. """
        update_dict = {'topic_count': n}
        self.update_embedded_stats(intention_id, is_leaf, agent, lang_id, update_dict)

    @classmethod
    def increment(cls, channel=None, time_slot=None, topic=None, status=None,
                  intention_id=None, is_leaf=True, agent=None, lang_id=None, n=1):
        """Deprecated
        """
        assert channel is not None and intention_id is not None \
                and topic is not None and time_slot is not None, vars()
 
        hashed_parents = map(get_topic_hash, get_subtopics(topic))
        #channel_num = get_channel_num(channel)
        stat = cls(channel=channel, time_slot=time_slot, topic=topic, status=status,
                   hashed_parents=hashed_parents)
        stat.compute_increments(is_leaf, intention_id, agent, lang_id, n)
        stat.upsert()
        return stat

    def __repr__(self):
        return '<%s: id=%s channel=%s, topic=%s, hashed_parents=%s, time_slot=%s>' % (
            self.__class__.__name__,
            self.id,
            self.channel_num,
            self.topic,
            self.hashed_parents,
            self.time_slot)

    @property
    def datetime(self):
        return timeslot_to_datetime(self.time_slot)

    @property
    def level(self):
        _, level = decode_timeslot(self.time_slot)
        return level

    def to_dict(self):
        return dict(id=self.id, channel_num=self.channel_num, topic=self.topic,
                    hashed_parents=self.hashed_parents, time_slot=self.time_slot, status=self.status)
Пример #6
0
class SpeechActMap(Document):
    """ Efficient structure to allow searching by intention and also
        to provide an efficient sharding key that distributes well and
        also optimizes query on a single shard because the shard key
        will usually be part of the query.
    """

    # Packed speech-act-map-id (bit-string): pack_speech_act_map_id, unpack_speech_act_map_id
    id = BytesField(db_field='_id', unique=True, required=True)
    channel = ObjectIdField(db_field='cl', required=True)
    post = EventIdField(db_field='pt', required=True)
    idx = NumField(db_field='ix', required=True)
    agent = NumField(db_field='at', default=0)
    language = NumField(db_field='le', default=Lang.EN)
    intention_type_conf = NumField(db_field='ic', required=True)
    intention_type_id = NumField(db_field='ii', required=True)
    time_slot = NumField(db_field='ts', required=True)
    created_at = DateTimeField(db_field='ca', required=True)
    topic_tuples = ListField(DictField(), db_field='tt')
    message_type = NumField(db_field='mtp')

    # --- status constants ---

    # Defining the status values and encodings from channel

    POTENTIAL = 0  # Note sure about the fit between post and channel
    ACTIONABLE = 1  # Confident of fit between post and channel
    REJECTED = 2  # Confident of the lack of fit beteen post and channel
    ACTUAL = 3  # Special extension of actionable for posts that were confirmed by reply

    # Define the assignment modes and their mappings to status. These reflect how
    # the link between post and stats was set. Sometimes it is predicted, and some
    # times it is inferred directly from a user action

    STATUS_MAP = {
        'potential': POTENTIAL,  # Predicted
        'assigned': POTENTIAL,  # Predicted
        'rejected': REJECTED,  # Given
        'discarded': REJECTED,  # Predicted
        'actionable': ACTIONABLE,  # Predicted
        'starred': ACTIONABLE,  # Given
        'accepted': ACTIONABLE,  # Given
        'highlighted': ACTIONABLE,  # Predicted
        'replied': ACTUAL,  # Given
        'actual': ACTUAL,  # Given
    }

    # Reverse lookup to a display name by status code
    STATUS_NAME_MAP = {
        POTENTIAL: "potential",
        ACTIONABLE: "actionable",
        REJECTED: "rejected",
        ACTUAL: "actual"
    }

    # ASSIGNED IF ANY ONE OF THESE!
    ASSIGNED = {
        'actionable', 'starred', 'accepted', 'highlighted', 'replied', 'actual'
    }
    PREDICTED = {
        'potential', 'assigned', 'discarded', 'actionable', 'highlighted'
    }

    # LOOKUP Constants to support agent based access
    NO_AGENT = -1
    ANY_AGENT = 0

    indexes = [('post', )]

    def to_dict(self):
        return dict(id=self.id,
                    channel=self.channel,
                    intention_type_conf=self.intention_type_conf,
                    intention_type_id=self.intention_type_id,
                    time_slot=self.time_slot,
                    post_id=self.post,
                    topics=self.topics,
                    content=self.content,
                    agent=self.agent,
                    language=self.language,
                    status=self.status,
                    message_type=self.message_type)

    @property
    def created(self):
        return timeslot_to_datetime(self.time_slot)

    @property
    def status(self):
        return self.unpacked[1]

    @property
    def post_obj(self):
        if not hasattr(self, '_post'):
            from solariat_bottle.db.post.base import Post
            self._post = Post.objects.get(id=self.post)
        return self._post

    @property
    def topics(self):
        '''Extract out the topics'''
        return [t['t'] for t in self.topic_tuples]

    @property
    def content(self):
        return self.post_obj.speech_acts[self.idx]['content']

    @property
    def unpacked(self):
        return unpack_speech_act_map_id(self.id)

    @classmethod
    def reset(cls,
              post,
              channels,
              agent=None,
              reset_outbound=False,
              action='update'):
        '''
        Clears and set keys for all the given channels. Used when assignment between post
        and channel changes, or when assignment between post and agent changes. First
        removes all the old keys, and then geneates new ones. We do not bother updating
        existing documents.
        '''

        if channels == [] or channels == None:
            raise AppException(
                "Oh no! There are no channels provided for synchronizing keys. "
                "This should never happen. Please ask support to have a look at your data."
            )
        # Remove Old Speech Act Keys
        sams = []
        agents_by_channel = {}
        for chan in channels:
            # Initialize agent mapping
            agents_by_channel[get_channel_id(chan)] = cls.ANY_AGENT

            # Now generate all possible ids for all status values
            for status in set(cls.STATUS_MAP.values()):
                sams.extend(make_objects(chan, post, post.speech_acts, status))

        # Now, retrieve the speech act data for agent wherever it exists so we do not
        # lose it.
        for sam in cls.objects(id__in=[sam.id for sam in sams]):
            # Retrieve actual setting if available
            agents_by_channel[get_channel_id(sam.channel)] = sam.agent

        # Nuke the old values. We reset them. Shard key must be immuatble so cannot just
        # change the status value.
        cls.objects.remove(id__in=[sam.id for sam in sams])

        if action == 'remove':
            return []

        # Generate New Speech Act Keys
        sams = []
        for chan in channels:
            # Skip regeneration of keys if this is for a smart tag and it is no longer
            # accepted.....
            if chan.is_smart_tag and chan not in post.accepted_smart_tags:
                continue

            status = cls.STATUS_MAP[post.get_assignment(chan)]
            old_agent = agents_by_channel[get_channel_id(chan)]

            sams.extend(
                make_objects(chan, post, post.speech_acts, status, agent
                             or old_agent))

        for sam in sams:
            try:
                sam.save()
            except DuplicateKeyError:
                LOGGER.error(
                    "There is already an speech act with the same ID = %s.",
                    sam.id)
        return sams
Пример #7
0
 class Doc(Document, Transactional):
     field1 = NumField()
Пример #8
0
 class Transactional(object):
     version = NumField()
Пример #9
0
 class Child(Base):
     field1 = NumField()
Пример #10
0
 class Base(Document):
     version = NumField(db_field='v')
Пример #11
0
class TimeSlotIntegerId(Document):
    id = NumField(db_field='_id', unique=True, required=True)
    time_slot = NumField()
    dummy = NumField()
Пример #12
0
class IdEntity(Document):
    id = BytesField(db_field='_id', unique=True, required=True)
    count = NumField()