Пример #1
0
 def __init__(self, user_uuid, queryList=[]):
     # Query section_db with queryList and
     # instantiate the cursor object
     # returned as an instance variable
     clientSpecificQuery = getClientSpecificQueryFilter(user_uuid)
     completeQuery = clientSpecificQuery + queryList
     Sections = get_section_db()
     self.cur = Sections.find({"and": completeQuery})
Пример #2
0
def queryUnclassifiedSections(uuid):
    now = datetime.now()
    weekago = now - timedelta(weeks = 1)

    user_uuid=uuid
    clientSpecificQuery = getClientSpecificQueryFilter(user_uuid)
    Sections=get_section_db()
    logging.debug('section.count = %s' % Sections.count())
    # Theoretically, we shouldn't need the 'predicted_mode' code because we
    # predict values right after reading the trips from moves. However, there
    # is still a small race window in which we are reading trips for other
    # users and haven't yet run the classifier. As we get more users, this
    # window can only grow, and it is easy to handle it, so let's just do so now.
    defaultQueryList = [ {'source':'Shankari'},
                         {'user_id':user_uuid},
                         {'predicted_mode':{ '$exists' : True }},
                         {'confirmed_mode':''},
                         {'retained':True},
                         { 'type': 'move' },
                         {'section_end_datetime':{"$gt": weekago}}]
    completeQueryList = defaultQueryList + clientSpecificQuery
    unclassifiedSections=Sections.find({"$and": completeQueryList})

    # totalUnclassifiedSections are for debugging only, can remove after we know that this works well
    totalUnclassifiedSections=Sections.find({"$and":[ {'source':'Shankari'},
                                                 {'user_id':user_uuid},
                                                 {'confirmed_mode': ''},
                                                 { 'type': 'move' }]})

    unclassifiedSectionCount = unclassifiedSections.count()
    totalUnclassifiedSectionCount = totalUnclassifiedSections.count()

    logging.debug('Unsec.count = %s' % unclassifiedSectionCount)
    logging.debug('Total Unsec.count = %s' % totalUnclassifiedSectionCount)
    # Keep track of what percent of sections are stripped out.
    # Sections can be stripped out for various reasons:
    # - they are too old
    # - they have enough confidence that above the magic threshold (90%) AND
    # the client has requested stripping out
    # - they have already been identified as being too short by the filter label
    stats.storeServerEntry(user_uuid, stats.STAT_TRIP_MGR_PCT_SHOWN, time.time(),
            0 if totalUnclassifiedSectionCount == 0 else float(unclassifiedSectionCount)/totalUnclassifiedSectionCount)
    return unclassifiedSections
Пример #3
0
def getClassifiedRatio(uuid, start, end):
    defaultQueryList = [{
        'source': 'Shankari'
    }, {
        'user_id': uuid
    }, {
        'predicted_mode': {
            '$exists': True
        }
    }, {
        'type': 'move'
    }, {
        'section_start_datetime': {
            '$gt': start,
            '$lt': end
        }
    }]
    clientSpecificQuery = getClientSpecificQueryFilter(uuid)
    completeQueryList = defaultQueryList + clientSpecificQuery
    logging.debug("completeQueryList = %s" % completeQueryList)
    unclassifiedQueryList = completeQueryList + [{'confirmed_mode': ''}]
    classifiedQueryList = completeQueryList + [{'confirmed_mode': {"$ne": ''}}]

    unclassifiedCount = get_section_db().find({
        '$and': unclassifiedQueryList
    }).count()
    classifiedCount = get_section_db().find({
        '$and': classifiedQueryList
    }).count()
    totalCount = get_section_db().find({'$and': completeQueryList}).count()
    logging.info(
        "unclassifiedCount = %s, classifiedCount = %s, totalCount = %s" %
        (unclassifiedCount, classifiedCount, totalCount))
    assert (unclassifiedCount + classifiedCount == totalCount)
    if totalCount > 0:
        return float(classifiedCount) / totalCount
    else:
        return 0
Пример #4
0
def queryUnclassifiedSections(uuid):
    now = datetime.now()
    weekago = now - timedelta(weeks=1)

    user_uuid = uuid
    clientSpecificQuery = getClientSpecificQueryFilter(user_uuid)
    Sections = get_section_db()
    logging.debug('section.count = %s' % Sections.count())
    # Theoretically, we shouldn't need the 'predicted_mode' code because we
    # predict values right after reading the trips from moves. However, there
    # is still a small race window in which we are reading trips for other
    # users and haven't yet run the classifier. As we get more users, this
    # window can only grow, and it is easy to handle it, so let's just do so now.
    defaultQueryList = [{
        'source': 'Shankari'
    }, {
        'user_id': user_uuid
    }, {
        'predicted_mode': {
            '$exists': True
        }
    }, {
        'confirmed_mode': ''
    }, {
        'retained': True
    }, {
        'type': 'move'
    }, {
        'section_end_datetime': {
            "$gt": weekago
        }
    }]
    completeQueryList = defaultQueryList + clientSpecificQuery
    unclassifiedSections = Sections.find({"$and": completeQueryList})

    # totalUnclassifiedSections are for debugging only, can remove after we know that this works well
    totalUnclassifiedSections = Sections.find({
        "$and": [{
            'source': 'Shankari'
        }, {
            'user_id': user_uuid
        }, {
            'confirmed_mode': ''
        }, {
            'type': 'move'
        }]
    })

    unclassifiedSectionCount = unclassifiedSections.count()
    totalUnclassifiedSectionCount = totalUnclassifiedSections.count()

    logging.debug('Unsec.count = %s' % unclassifiedSectionCount)
    logging.debug('Total Unsec.count = %s' % totalUnclassifiedSectionCount)
    # Keep track of what percent of sections are stripped out.
    # Sections can be stripped out for various reasons:
    # - they are too old
    # - they have enough confidence that above the magic threshold (90%) AND
    # the client has requested stripping out
    # - they have already been identified as being too short by the filter label
    stats.storeServerEntry(
        user_uuid, stats.STAT_TRIP_MGR_PCT_SHOWN, time.time(),
        0 if totalUnclassifiedSectionCount == 0 else
        float(unclassifiedSectionCount) / totalUnclassifiedSectionCount)
    return unclassifiedSections