def clearRelatedDb(self):
     edb.get_timeseries_db().remove({"user_id": {"$in": self.testUUIDList}})
     edb.get_analysis_timeseries_db().remove(
         {"user_id": {
             "$in": self.testUUIDList
         }})
     edb.get_usercache_db().remove({"user_id": {"$in": self.testUUIDList}})
     edb.get_uuid_db().remove({"user_id": {"$in": self.testUUIDList}})
Пример #2
0
 def fromEmail(userEmail):
     email2UUID = get_uuid_db().find_one({'user_email': userEmail})
     if email2UUID is None:
         return None
     user = User(email2UUID['uuid'])
     user.__email = userEmail
     return user
Пример #3
0
def purge_entries_for_user(curr_uuid, is_purge_state, db_array=None):
    logging.info("For uuid = %s, deleting entries from the timeseries" % curr_uuid)
    if db_array is not None:
        [ts_db, ats_db, udb, psdb] = db_array
        logging.debug("db_array passed in with databases %s" % db_array)
    else:
        import emission.core.get_database as edb

        ts_db = edb.get_timeseries_db()
        ats_db = edb.get_analysis_timeseries_db()
        udb = edb.get_uuid_db()
        psdb = edb.get_pipeline_state_db()
        logging.debug("db_array not passed in, looking up databases")

    timeseries_del_result = ts_db.remove({"user_id": curr_uuid})
    logging.info("result = %s" % timeseries_del_result)

    logging.info("For uuid = %s, deleting entries from the analysis_timeseries" % curr_uuid)
    analysis_timeseries_del_result = ats_db.remove({"user_id": curr_uuid})
    logging.info("result = %s" % analysis_timeseries_del_result)

    logging.info("For uuid %s, deleting entries from the user_db" % curr_uuid)
    user_db_del_result = udb.remove({"uuid": curr_uuid})
    logging.info("result = %s" % user_db_del_result)

    if is_purge_state:
        logging.info("For uuid %s, deleting entries from the pipeline_state_db" % curr_uuid)
        psdb_del_result = psdb.remove({"user_id": curr_uuid})
        logging.info("result = %s" % psdb_del_result)
Пример #4
0
def purge_entries_for_user(curr_uuid, is_purge_state, db_array=None):
    logging.info("For uuid = %s, deleting entries from the timeseries" %
                 curr_uuid)
    if db_array is not None:
        [ts_db, ats_db, udb, psdb] = db_array
        logging.debug("db_array passed in with databases %s" % db_array)
    else:
        import emission.core.get_database as edb

        ts_db = edb.get_timeseries_db()
        ats_db = edb.get_analysis_timeseries_db()
        udb = edb.get_uuid_db()
        psdb = edb.get_pipeline_state_db()
        logging.debug("db_array not passed in, looking up databases")

    timeseries_del_result = ts_db.remove({"user_id": curr_uuid})
    logging.info("result = %s" % timeseries_del_result)

    logging.info(
        "For uuid = %s, deleting entries from the analysis_timeseries" %
        curr_uuid)
    analysis_timeseries_del_result = ats_db.remove({"user_id": curr_uuid})
    logging.info("result = %s" % analysis_timeseries_del_result)

    logging.info("For uuid %s, deleting entries from the user_db" % curr_uuid)
    user_db_del_result = udb.remove({"uuid": curr_uuid})
    logging.info("result = %s" % user_db_del_result)

    if is_purge_state:
        logging.info(
            "For uuid %s, deleting entries from the pipeline_state_db" %
            curr_uuid)
        psdb_del_result = psdb.remove({"user_id": curr_uuid})
        logging.info("result = %s" % psdb_del_result)
Пример #5
0
def getUUID(request, inHeader=False):
    retUUID = None
    if skipAuth:
        if 'User' in request.headers or 'user' in request.json:
            # skipAuth = true, so the email will be sent in plaintext
            userEmail = __getToken__(request, inHeader)
            retUUID = __getUUIDFromEmail__(userEmail)
            logging.debug(
                "skipAuth = %s, returning UUID directly from email %s" %
                (skipAuth, retUUID))
        else:
            # Return a random user to make it easy to experiment without having to specify a user
            # TODO: Remove this if it is not actually used
            from get_database import get_uuid_db
            user_uuid = get_uuid_db().find_one()['uuid']
            retUUID = user_uuid
            logging.debug("skipAuth = %s, returning arbitrary UUID %s" %
                          (skipAuth, retUUID))
        if Client("choice").getClientKey() is None:
            Client("choice").update(createKey=True)
    else:
        userToken = __getToken__(request, inHeader)
        retUUID = getUUIDFromToken(userToken)
    if retUUID is None:
        raise HTTPError(403, "token is valid, but no account found for user")
    request.params.user_uuid = retUUID
    return retUUID
Пример #6
0
 def fromEmail(userEmail):
   email2UUID = get_uuid_db().find_one({'user_email': userEmail})
   if email2UUID is None:
     return None
   user = User(email2UUID['uuid'])
   user.__email = userEmail
   return user
Пример #7
0
 def setUp(self):
   # Make sure we start with a clean slate every time
   self.serverName = 'localhost'
   common.dropAllCollections(get_db())
   logging.info("After setup, client count = %d, profile count = %d, uuid count = %d" % 
     (get_client_db().find().count(), get_profile_db().count(), get_uuid_db().count()))
   common.loadTable(self.serverName, "Stage_Modes", "emission/tests/data/modes.json")
Пример #8
0
 def setUp(self):
   # Make sure we start with a clean slate every time
   self.serverName = 'localhost'
   common.dropAllCollections(get_db())
   logging.info("After setup, client count = %d, profile count = %d, uuid count = %d" % 
     (get_client_db().find().count(), get_profile_db().count(), get_uuid_db().count()))
   common.loadTable(self.serverName, "Stage_Modes", "emission/tests/data/modes.json")
def find_inactive_users():
    inactive_users = []
    inactive_users_new_consent = ""
    inactive_users_old_consent = ""
    inactive_users_before_september = ""
    inactive_users_after_september = ""
    one_week_ago_ts = arrow.utcnow().replace(weeks=-1).timestamp
    september_first = arrow.get('2016-09-01').timestamp
    for user in edb.get_uuid_db().find():
        db = esta.TimeSeries.get_time_series(user['uuid']).get_data_df("stats/server_api_time", time_query=None)
        new_consent = esta.TimeSeries.get_time_series(user['uuid']).get_data_df("config/consent", time_query=None)
        signup_date = arrow.get(user['update_ts'])
        if db.empty:
            inactive_users.append((user['user_email'], signup_date.date(), ()))
            if new_consent.empty:
                inactive_users_new_consent+=str(user['user_email'])+', '
            else:
                inactive_users_old_consent+=str(user['user_email'])+', '
            if signup_date.timestamp < september_first:
                inactive_users_before_september+=str(user['user_email'])+', '
            else:
                inactive_users_after_september+=str(user['user_email'])+', '
        else:
            #check last usercache call: 
            #the user is inactive if there are no calls or if the last one was before one_week_ago_ts
            last_usercache_call = db[db['name'].str.contains('usercache', case=False)].tail(1)
            if last_usercache_call.empty:
                inactive_users.append((user['user_email'], signup_date.date(), ()))
                if new_consent.empty:
                    inactive_users_new_consent+=str(user['user_email'])+', '
                else:
                    inactive_users_old_consent+=str(user['user_email'])+', '
                if signup_date.timestamp < september_first:
                    inactive_users_before_september+=str(user['user_email'])+', '
                else:
                    inactive_users_after_september+=str(user['user_email'])+', '
            else:
                if last_usercache_call.iloc[0]['ts'] < one_week_ago_ts:
                    inactive_users.append((user['user_email'], signup_date.date(), arrow.get(last_usercache_call.iloc[0]['ts']).date()))
                    if new_consent.empty:
                        inactive_users_new_consent+=str(user['user_email'])+', '
                    else:
                        inactive_users_old_consent+=str(user['user_email'])+', '
                    if signup_date.timestamp < september_first:
                        inactive_users_before_september+=str(user['user_email'])+', '
                    else:
                        inactive_users_after_september+=str(user['user_email'])+', '
    inactive_users_table = pd.DataFrame(inactive_users, columns=['Email', 'Last Sign Up Date', 'Last Usercache Call'])
    print "\nList of inactive users emails and date they signed up:"
    print inactive_users_table
    print "\nEmails of inactive users who consented to the new IRB protocol:"
    print inactive_users_new_consent[:-2]
    print "\nEmails of inactive users who did not consent to the new IRB protocol:"
    print inactive_users_old_consent[:-2]
    print "\nEmails of inactive users who signed up before September 1st:"
    print inactive_users_before_september[:-2]
    print "\nEmails of inactive users who signed up after September 1st:"
    print inactive_users_after_september[:-2]   
    return
Пример #10
0
def map_uuid_to_label(uuid, label):
    ur = edb.get_uuid_db().update_one({"uuid": uuid},
                                      {"$set": {
                                          "user_email": label
                                      }})
    print(ur.raw_result)
    assert (ur.matched_count == 1)
    assert (ur.modified_count == 1)
Пример #11
0
 def fromUUID(user_uuid):
   user = User(user_uuid)
   uuid2Email = get_uuid_db().find_one({'uuid': user_uuid})
   # Remove once we remove obsolete code/tests that doesn't create an email ->
   # uuid mapping
   if uuid2Email is not None and 'user_email' in uuid2Email:
     user.__email = uuid2Email['user_email']
   return user
Пример #12
0
 def fromUUID(user_uuid):
     user = User(user_uuid)
     uuid2Email = get_uuid_db().find_one({'uuid': user_uuid})
     # Remove once we remove obsolete code/tests that doesn't create an email ->
     # uuid mapping
     if uuid2Email is not None and 'user_email' in uuid2Email:
         user.__email = uuid2Email['user_email']
     return user
Пример #13
0
  def registerWithUUID(userEmail, anonUUID):
    from datetime import datetime
    from emission.core.wrapper.client import Client

    # We are accessing three databases here:
    # - The list of pending registrations (people who have filled out demographic
    # information but not installed the app)
    # - The mapping from the userEmail to the user UUID
    # - The mapping from the UUID to other profile information about the user
    # The first two are indexed by the user email. We will use the same field
    # name in both to indicate that it is a shared key. This also allows us to
    # have a simple query that we can reuse.
    userEmailQuery = {'user_email': userEmail}

    # First, we construct the email -> uuid mapping and store it in the appropriate database.
    # At this point, we don't know or care whether the user is part of a study
    # We also store a create timestamp just because that's always a good idea
    # What happens if the user calls register() again? Do we want to generate a new UUID?
    # Do we want to update the create timestamp?
    # For now, let's assume that the answer to both of those questions is yes,
    # because that allows us to use upsert :)
    # A bonus fix is that if something is messed up in the DB, calling create again will fix it.


    emailUUIDObject = {'user_email': userEmail, 'uuid': anonUUID, 'update_ts': datetime.now()}
    writeResultMap = get_uuid_db().replace_one(userEmailQuery, emailUUIDObject, upsert=True)
    # Note, if we did want the create_ts to not be overwritten, we can use the
    # writeResult to decide how to deal with the values

    # Now, we look to see if the user is part of a study. We can either store
    # this information in the profile database, or the mapping, or both. For now,
    # let us store this in the profile database since it is sufficient for it to
    # be associated with the UUID, we anticipate using it for customization, and
    # we assume that other customization stuff will be stored in the profile.

    # We could also assume that we will create the profile if we created the map
    # and update if we updated. But that has some reliability issues. For
    # example, what if creating the map succeeded but creating the profile
    # failed? Subsequently calling the method again to try and fix the profile
    # will continue to fail because we will be trying to update.
    # Much better to deal with it separately by doing a separate upsert

    # Second decision: what do we do if the user is not part of a study? Create a
    # profile anyway with an empty list, or defer the creation of the profile?
    # 
    # Decision: create profile with empty list for two reasons:
    # a) for most of the functions, we want to use the profile data. We should
    # only use the email -> uuid map in the API layer to get the UUID, and use
    # the UUID elsewhere. So we need to have profiles for non-study participants
    # as well.
    # b) it will also make the scripts to update the profile in the background
    # easier to write. They won't have to query the email -> UUID database and
    # create the profile if it doesn't exist - they can just work off the profile
    # database.
    # TODO: Write a script that periodically goes through and identifies maps
    # that don't have an associated profile and fix them
    writeResultProfile = User.createProfile(anonUUID, datetime.now())
    return User.fromUUID(anonUUID)
Пример #14
0
  def registerWithUUID(userEmail, anonUUID):
    from datetime import datetime
    from emission.core.wrapper.client import Client

    # We are accessing three databases here:
    # - The list of pending registrations (people who have filled out demographic
    # information but not installed the app)
    # - The mapping from the userEmail to the user UUID
    # - The mapping from the UUID to other profile information about the user
    # The first two are indexed by the user email. We will use the same field
    # name in both to indicate that it is a shared key. This also allows us to
    # have a simple query that we can reuse.
    userEmailQuery = {'user_email': userEmail}

    # First, we construct the email -> uuid mapping and store it in the appropriate database.
    # At this point, we don't know or care whether the user is part of a study
    # We also store a create timestamp just because that's always a good idea
    # What happens if the user calls register() again? Do we want to generate a new UUID?
    # Do we want to update the create timestamp?
    # For now, let's assume that the answer to both of those questions is yes,
    # because that allows us to use upsert :)
    # A bonus fix is that if something is messed up in the DB, calling create again will fix it.


    emailUUIDObject = {'user_email': userEmail, 'uuid': anonUUID, 'update_ts': datetime.now()}
    writeResultMap = get_uuid_db().replace_one(userEmailQuery, emailUUIDObject, upsert=True)
    # Note, if we did want the create_ts to not be overwritten, we can use the
    # writeResult to decide how to deal with the values

    # Now, we look to see if the user is part of a study. We can either store
    # this information in the profile database, or the mapping, or both. For now,
    # let us store this in the profile database since it is sufficient for it to
    # be associated with the UUID, we anticipate using it for customization, and
    # we assume that other customization stuff will be stored in the profile.

    # We could also assume that we will create the profile if we created the map
    # and update if we updated. But that has some reliability issues. For
    # example, what if creating the map succeeded but creating the profile
    # failed? Subsequently calling the method again to try and fix the profile
    # will continue to fail because we will be trying to update.
    # Much better to deal with it separately by doing a separate upsert

    # Second decision: what do we do if the user is not part of a study? Create a
    # profile anyway with an empty list, or defer the creation of the profile?
    # 
    # Decision: create profile with empty list for two reasons:
    # a) for most of the functions, we want to use the profile data. We should
    # only use the email -> uuid map in the API layer to get the UUID, and use
    # the UUID elsewhere. So we need to have profiles for non-study participants
    # as well.
    # b) it will also make the scripts to update the profile in the background
    # easier to write. They won't have to query the email -> UUID database and
    # create the profile if it doesn't exist - they can just work off the profile
    # database.
    # TODO: Write a script that periodically goes through and identifies maps
    # that don't have an associated profile and fix them
    writeResultProfile = User.createProfile(anonUUID, datetime.now())
    return User.fromUUID(anonUUID)
Пример #15
0
def create_party_leaders():
    ju_email = "*****@*****.**"
    ecwu.User.register(ju_email)
    ju_uuid = edb.get_uuid_db().find_one({'user_email': ju_email})['uuid']
    logging.debug("Found Juliana's uuid %s" % ju_uuid)
    proxy.habiticaRegister("Juliana", ju_email, "autogenerate_me", ju_uuid)

    su_email = "*****@*****.**"
    ecwu.User.register(su_email)
    su_uuid = edb.get_uuid_db().find_one({'user_email': su_email})['uuid']
    logging.debug("Found Sunil's uuid %s" % su_uuid)
    proxy.habiticaRegister("Sunil", su_email, "autogenerate_me", su_uuid)

    sh_email = "*****@*****.**"
    ecwu.User.register(sh_email)
    sh_uuid = edb.get_uuid_db().find_one({'user_email': sh_email})['uuid']
    logging.debug("Found Shankari's uuid %s" % sh_uuid)
    proxy.habiticaRegister("Shankari", sh_email, "autogenerate_me", sh_uuid)
Пример #16
0
 def register(userEmail):
     import uuid
     # This is the UUID that will be stored in the trip database
     # in order to do some fig leaf of anonymity
     # Since we now generate truly anonymized UUIDs, and we expect that the
     # register operation is idempotent, we need to check and ensure that we don't
     # change the UUID if it already exists.
     existing_entry = get_uuid_db().find_one({"user_email": userEmail})
     if existing_entry is None:
         anonUUID = uuid.uuid4()
     else:
         anonUUID = existing_entry['uuid']
     return User.registerWithUUID(userEmail, anonUUID)
Пример #17
0
  def setUp(self):
    # Make sure we start with a clean slate every time
    self.serverName = 'localhost'
    common.dropAllCollections(edb._get_current_db())

    import shutil
    self.config_path = "conf/clients/testclient.settings.json"
    shutil.copyfile("%s.sample" % self.config_path,
                    self.config_path)

    logging.info("After setup, client count = %d, profile count = %d, uuid count = %d" % 
      (get_client_db().find().count(), get_profile_db().count(), get_uuid_db().count()))
    common.loadTable(self.serverName, "Stage_Modes", "emission/tests/data/modes.json")
Пример #18
0
 def register(userEmail):
   import uuid
   # This is the UUID that will be stored in the trip database
   # in order to do some fig leaf of anonymity
   # Since we now generate truly anonymized UUIDs, and we expect that the
   # register operation is idempotent, we need to check and ensure that we don't
   # change the UUID if it already exists.
   existing_entry = get_uuid_db().find_one({"user_email": userEmail})
   if existing_entry is None:
       anonUUID = uuid.uuid4()
   else:
       anonUUID = existing_entry['uuid']
   return User.registerWithUUID(userEmail, anonUUID)
Пример #19
0
def get_split_uuid_lists(n_splits, is_public_pipeline):
    get_count = lambda u: enua.UserCache.getUserCache(u).getMessageCount()
    """
    This is the count of messages in the usercache. While we can use it for the
    scheduling, it is just a hint, because having a zero count here is no guarantee
    of zero count in the processing. And in particular, having a zero count here is
    no indication that there are no pending entries in the long-term cache. I think
    that's why we used to have the long-term cache and the user cache uuid list separately.

    That technique is no longer feasible because it requires coordination after the
    usercache is processed instead of parallelizing the entire pipeline.

    In general, this should be a pretty good hint, but I am not sure that it is perfect,
    and I am not ready to try this out weeks before the big deployment, with one day's
    testing. I also don't want to use UserCache.getMessage() since it returns the entire
    list instead of just the count. So we use the count as a hint and don't filter out
    users based on that.
    """

    all_uuids = [e["uuid"] for e in edb.get_uuid_db().find()]
    if is_public_pipeline:
        sel_uuids = [u for u in all_uuids if u in estag.TEST_PHONE_IDS]
    else:
        sel_uuids = [u for u in all_uuids if u not in estag.TEST_PHONE_IDS]
        # Add back the test phones for now so that we can test the data
        # collection changes before deploying them in the wild
        sel_uuids.extend(TEMP_HANDLED_PUBLIC_PHONES)

    sel_jobs = [(u, get_count(u)) for u in sel_uuids]
    # non_zero_jobs = [j for j in sel_jobs if j[1] !=0 ]
    # Not filtering for now
    non_zero_jobs = sel_jobs
    logging.debug(
        "all_uuids = %s, sel_uuids = %s, sel_jobs = %s, non_zero_jobs = %s" %
        (len(all_uuids), len(sel_uuids), len(sel_jobs), len(non_zero_jobs)))

    non_zero_jobs_df = pd.DataFrame(non_zero_jobs,
                                    columns=['user_id', 'count']).sort("count")
    ret_splits = []
    for i in range(0, n_splits):
        ret_splits.append([])

    col = 0
    for i, nzj in enumerate(non_zero_jobs_df.to_dict('records')):
        ret_splits[col].append(nzj['user_id'])
        col = col + 1
        if col == n_splits:
            logging.debug("reached n_splits, setting to zero")
            col = 0
    logging.debug("Split values are %s" % ret_splits)
    return ret_splits
Пример #20
0
    def setUp(self):
        # Make sure we start with a clean slate every time
        self.serverName = 'localhost'
        common.dropAllCollections(edb._get_current_db())

        import shutil
        self.config_path = "conf/clients/testclient.settings.json"
        shutil.copyfile("%s.sample" % self.config_path, self.config_path)

        logging.info(
            "After setup, client count = %d, profile count = %d, uuid count = %d"
            % (get_client_db().find().count(), get_profile_db().count(),
               get_uuid_db().count()))
        common.loadTable(self.serverName, "Stage_Modes",
                         "emission/tests/data/modes.json")
Пример #21
0
def getResult(user_uuid):
  # This is in here, as opposed to the top level as recommended by the PEP
  # because then we don't have to worry about loading bottle in the unit tests
  from bottle import template
  (prevScore, currScore) = getStoredScore(User.fromUUID(user_uuid))
  (level, sublevel) = getLevel(currScore)

  otherCurrScoreList = []
  for user_uuid_dict in get_uuid_db().find({}, {'uuid': 1, '_id': 0}):
    (currPrevScore, currCurrScore) = getStoredScore(User.fromUUID(user_uuid_dict['uuid']))
    otherCurrScoreList.append(currCurrScore)

  otherCurrScoreList.sort()
  renderedTemplate = template("clients/leaderboard/result_template.html",
                               level_picture_filename = getFileName(level, sublevel),
                               prevScore = prevScore,
                               currScore = currScore,
                               otherCurrScoreList = otherCurrScoreList)
  return renderedTemplate
Пример #22
0
def get_aggregate_analytics():
    df = pd.DataFrame()
    for user in edb.get_uuid_db().find():
        user_df = esta.TimeSeries.get_time_series(user['uuid']).get_data_df("stats/server_api_time", time_query=None)
        if not user_df.empty:
            df = df.append(user_df, ignore_index = True)
            
    df['datetime'] = df.ts.apply(lambda ts: dt.datetime.fromtimestamp(ts))
    df.ix[df.reading>1, 'reading'] = 1
    fig, ax = plt.subplots()
    ax.xaxis.set_major_locator(mdates.WeekdayLocator())
    ax.xaxis.set_major_formatter(mdates.DateFormatter("%m/%d/%Y"))
    plt.ylabel('Response time')
    plt.title('App Analytics')

    f_df = df[df.name == "POST_/result/metrics/timestamp"]
    f_df.plot(x="datetime", y="reading", ax=ax, style='+', color='g', label='Dashboard')

    f_df = df[df.name == "POST_/usercache/put"]
    f_df.plot(x="datetime", y="reading", ax=ax, style='+', color='b', label='Usercache_put')

    f_df = df[df.name == "POST_/usercache/get"]
    f_df.plot(x="datetime", y="reading", ax=ax, style='+', color='r', label='Usercache_get')

    f_df = df[df.name == "POST_/stats/set"]
    f_df.plot(x="datetime", y="reading", ax=ax, style='+', color='black', label='Stats_set')

    f_df = df[df.name == "POST_/habiticaRegister"]
    f_df.plot(x="datetime", y="reading", ax=ax, style='+', color='orange', label='Habitica Sign up_Login')

    f_df = df[df.name == "POST_/habiticaProxy"]
    f_df.plot(x="datetime", y="reading", ax=ax, style='+', color='aqua', label='Habitica')

    f_df = df[df.name.str.contains("POST_/timeline/getTrips")]
    f_df.plot(x="datetime", y="reading", ax=ax, style='+', color='m', label='Diary')

    plt.legend()
     
    fig.savefig('app_analytics.png')
    fig.savefig('app_analytics.eps', format='eps', dpi=1000)
    return
Пример #23
0
def get_aggregate_analytics():
    df = pd.DataFrame()
    for user in edb.get_uuid_db().find():
        user_df = esta.TimeSeries.get_time_series(user['uuid']).get_data_df("stats/server_api_time", time_query=None)
        if not user_df.empty:
            df = df.append(user_df, ignore_index = True)
            
    df['datetime'] = df.ts.apply(lambda ts: dt.datetime.fromtimestamp(ts))
    df.ix[df.reading>1, 'reading'] = 1
    fig, ax = plt.subplots()
    ax.xaxis.set_major_locator(mdates.WeekdayLocator())
    ax.xaxis.set_major_formatter(mdates.DateFormatter("%m/%d/%Y"))
    plt.ylabel('Response time')
    plt.title('App Analytics')

    f_df = df[df.name == "POST_/result/metrics/timestamp"]
    f_df.plot(x="datetime", y="reading", ax=ax, style='+', color='g', label='Dashboard')

    f_df = df[df.name == "POST_/usercache/put"]
    f_df.plot(x="datetime", y="reading", ax=ax, style='+', color='b', label='Usercache_put')

    f_df = df[df.name == "POST_/usercache/get"]
    f_df.plot(x="datetime", y="reading", ax=ax, style='+', color='r', label='Usercache_get')

    f_df = df[df.name == "POST_/stats/set"]
    f_df.plot(x="datetime", y="reading", ax=ax, style='+', color='black', label='Stats_set')

    f_df = df[df.name == "POST_/habiticaRegister"]
    f_df.plot(x="datetime", y="reading", ax=ax, style='+', color='orange', label='Habitica Sign up_Login')

    f_df = df[df.name == "POST_/habiticaProxy"]
    f_df.plot(x="datetime", y="reading", ax=ax, style='+', color='aqua', label='Habitica')

    f_df = df[df.name.str.contains("POST_/timeline/getTrips")]
    f_df.plot(x="datetime", y="reading", ax=ax, style='+', color='m', label='Diary')

    plt.legend()
     
    fig.savefig('app_analytics.png')
    fig.savefig('app_analytics.eps', format='eps', dpi=1000)
    return
Пример #24
0
def getUUID(request, inHeader=False):
  retUUID = None
  if skipAuth:
    if 'User' in request.headers or 'user' in request.json:
        # skipAuth = true, so the email will be sent in plaintext
        userEmail = __getToken__(request, inHeader)
        retUUID = __getUUIDFromEmail__(userEmail)
        logging.debug("skipAuth = %s, returning UUID directly from email %s" % (skipAuth, retUUID))
    else:
        # Return a random user to make it easy to experiment without having to specify a user
        # TODO: Remove this if it is not actually used
        from get_database import get_uuid_db
        user_uuid = get_uuid_db().find_one()['uuid']
        retUUID = user_uuid
        logging.debug("skipAuth = %s, returning arbitrary UUID %s" % (skipAuth, retUUID))
    if Client("choice").getClientKey() is None:
        Client("choice").update(createKey = True)
  else:
    userToken = __getToken__(request, inHeader)
    retUUID = getUUIDFromToken(userToken)
  request.params.user_uuid = retUUID
  return retUUID
Пример #25
0
    def testQueryMatching(self):
        # Load data for the Bay Area
        dataFileba = "emission/tests/data/real_examples/shankari_2016-06-20"
        ldba = ecwl.LocalDate({'year': 2016, 'month': 6, 'day': 20})

        etc.setupRealExample(self, dataFileba)
        testUUIDba = self.testUUID
        edb.get_uuid_db().insert_one({
            "uuid": testUUIDba,
            "user_email": "*****@*****.**"
        })
        etc.runIntakePipeline(testUUIDba)
        logging.debug("uuid for the bay area = %s " % testUUIDba)

        # Load data for Hawaii
        dataFilehi = "emission/tests/data/real_examples/shankari_2016-07-27"
        ldhi = ecwl.LocalDate({'year': 2016, 'month': 7, 'day': 27})

        etc.setupRealExample(self, dataFilehi)
        testUUIDhi = self.testUUID
        edb.get_uuid_db().insert_one({
            "uuid": testUUIDhi,
            "user_email": "*****@*****.**"
        })
        etc.runIntakePipeline(testUUIDhi)

        logging.debug("uuid for hawaii = %s " % testUUIDhi)

        self.testUUIDList = [testUUIDba, testUUIDhi]

        air_query_spec = {
            "time_type":
            "local_date",
            "from_local_date": {
                "year": 2016,
                "month": 2
            },
            "to_local_date": {
                "year": 2016,
                "month": 9
            },
            "freq":
            'DAILY',
            "checks": [{
                "modes": ['WALKING', 'ON_FOOT'],
                "metric": "count",
                "threshold": {
                    "$gt": 5
                }
            }, {
                "modes": ['AIR_OR_HSR'],
                "metric": "count",
                "threshold": {
                    "$gt": 1
                }
            }]
        }

        # Since this requires at least one air trip, this will only return the
        # hawaii trip
        self.assertEqual(tripmetrics.query(air_query_spec), [testUUIDhi])

        walk_drive_spec = {
            "time_type":
            "local_date",
            "from_local_date": {
                "year": 2016,
                "month": 2
            },
            "to_local_date": {
                "year": 2016,
                "month": 9
            },
            "freq":
            'DAILY',
            "checks": [{
                "modes": ['WALKING', 'ON_FOOT'],
                "metric": "count",
                "threshold": {
                    "$gt": 5
                }
            }, {
                "modes": ['IN_VEHICLE'],
                "metric": "count",
                "threshold": {
                    "$gt": 1
                }
            }]
        }

        # Since this only requires walk and bike, will return both trips
        # We can't just do a simple equals check since the uuids may not always
        # be returned in the same order
        walk_drive_result = tripmetrics.query(walk_drive_spec)
        self.assertEqual(len(walk_drive_result), 2)
        self.assertIn(testUUIDhi, walk_drive_result)
        self.assertIn(testUUIDba, walk_drive_result)
Пример #26
0
def get_app_analytics():
    df = pd.DataFrame()
    for user in edb.get_uuid_db().find():
        user_df = esta.TimeSeries.get_time_series(user['uuid']).get_data_df("stats/server_api_time", time_query=None)
        if not user_df.empty:
            df = df.append(user_df, ignore_index = True)

    df['datetime'] = df.ts.apply(lambda ts: dt.datetime.fromtimestamp(ts))
    df.ix[df.reading>1, 'reading'] = 1
    fig, ax = plt.subplots()
    ax.xaxis.set_major_locator(mdates.WeekdayLocator())
    ax.xaxis.set_major_formatter(mdates.DateFormatter("%m/%d/%Y"))
    plt.ylabel('Response time')

    dashboard_df = df[df.name == "POST_/result/metrics/timestamp"]
    dashboard_df.plot(x="datetime", y="reading", ax=ax, style='+', legend=None)
    plt.title('Dashboard')
    fig.savefig('Dashboard.png')
    plt.close(fig)

    fig, ax = plt.subplots()
    cache_put_df = df[df.name == "POST_/usercache/put"]
    cache_put_df.plot(x="datetime", y="reading", ax=ax, style='+', legend=None)
    plt.title('Usercache_put')
    fig.savefig('Usercache_put.png')
    plt.close(fig)

    fig, ax = plt.subplots()
    cache_get_df = df[df.name == "POST_/usercache/get"]
    cache_get_df.plot(x="datetime", y="reading", ax=ax, style='+', legend=None)
    plt.title('Usercache_get')
    fig.savefig('Usercache_get.png')
    plt.close(fig)

    fig, ax = plt.subplots()
    stats_set_df = df[df.name == "POST_/stats/set"]
    stats_set_df.plot(x="datetime", y="reading", ax=ax, style='+', legend=None)
    plt.title('Stats_set')
    fig.savefig('Stats_set.png')
    plt.close(fig)

    fig, ax = plt.subplots()
    habitica_intro_df = df[df.name == "POST_/habiticaRegister"]
    habitica_intro_df.plot(x="datetime", y="reading", ax=ax, style='+', legend=None)
    plt.title('Habitica Sign up and Login')
    fig.savefig('Habitica Sign up_Login.png')
    plt.close(fig)

    fig, ax = plt.subplots()
    habitica_df = df[df.name == "POST_/habiticaProxy"]
    habitica_df.plot(x="datetime", y="reading", ax=ax, style='+', legend=None)
    plt.title('Habitica')
    fig.savefig('Habitica.png')
    plt.close(fig)

    fig, ax = plt.subplots()
    diary_df = df[df.name.str.contains("POST_/timeline/getTrips")]
    diary_df.plot(x="datetime", y="reading", ax=ax, style='+', legend=None)
    plt.title('Diary')
    fig.savefig('Diary.png')
    plt.close(fig)
    return
 def precomputeResults(self):
     for user_uuid_dict in get_uuid_db().find({}, {'uuid': 1, '_id': 0}):
         logging.info("Computing precomputed results for %s" % user_uuid_dict['uuid'])
         userclient.runClientSpecificBackgroundTasks(user_uuid_dict['uuid'])
import emission.core.get_database as edb
import pandas as pd
from uuid import UUID
import emission.analysis.plotting.geojson.geojson_feature_converter as gfc
import emission.analysis.plotting.leaflet_osm.our_plotter as lo
import emission.storage.timeseries.abstract_timeseries as esta
import emission.storage.decorations.analysis_timeseries_queries as esda
import emission.core.wrapper.entry as ecwe
import emission.storage.decorations.trip_queries as esdt
import emission.storage.timeseries.timequery as estt

all_users = pd.DataFrame(
    list(edb.get_uuid_db().find({}, {
        "user_email": 1,
        "uuid": 1,
        "_id": 0
    })))
test_user_id = all_users.iloc[60].uuid

ts = esta.TimeSeries.get_time_series(test_user_id)

# Get all cleaned trips for the first user
ct_df = ts.get_data_df("analysis/cleaned_trip", time_query=None)

#Get GeoJson for trip
first_trip_for_user = ct_df.iloc[0]
first_trip_start_ts = first_trip_for_user.start_ts
first_trip_end_ts = first_trip_for_user.end_ts
trip_start_end_fuzz = 10  # seconds
trips_geojson_list = gfc.get_geojson_for_ts(
    test_user_id, first_trip_start_ts - trip_start_end_fuzz,
Пример #29
0
import logging
import attrdict as ad
import uuid

import emission.core.get_database as edb
import emission.storage.timeseries.aggregate_timeseries as estag

def reset_collection(coll, old_uuid, new_uuid):
    logging.debug(coll.update({"user_id": user.uuid},
              {"$set": {"user_id": new_uuid}}, multi=True))

if __name__ == '__main__':
    logging.basicConfig(level=logging.DEBUG)
    for user_dict in edb.get_uuid_db().find():
        user = ad.AttrDict(user_dict)
        if user.uuid in estag.TEST_PHONE_IDS:
            logging.debug("Found test phone, skipping reset")
        else:
            new_uuid = uuid.uuid4()
            logging.debug("Mapping %s -> %s" % (new_uuid, user.uuid))
            edb.get_uuid_db().update({"uuid" : user.uuid},
                                     {"$set": {"uuid" : new_uuid}})
            logging.debug("Resetting alternatives...")
            reset_collection(edb.get_alternatives_db(), user.uuid, new_uuid)
            logging.debug("Resetting analysis...")
            reset_collection(edb.get_analysis_timeseries_db(), user.uuid, new_uuid)
            logging.debug("Resetting client...")
            reset_collection(edb.get_client_db(), user.uuid, new_uuid)
            logging.debug("Resetting client_stats_backup...")
            reset_collection(edb.get_client_stats_db_backup(), user.uuid, new_uuid)
            logging.debug("Resetting server_stats_backup...")
Пример #30
0
 def unregister(userEmail):
   user = User.fromEmail(userEmail)
   uuid = user.uuid
   get_uuid_db().delete_one({'user_email': userEmail})
   get_profile_db().delete_one({'user_id': uuid})
   return uuid
Пример #31
0
 def isRegistered(userEmail):
     email2UUID = get_uuid_db().find_one({'user_email': userEmail})
     if email2UUID is None:
         return False
     else:
         return True
def map_uuid_to_label(uuid, label):
    ur = edb.get_uuid_db().update_one({"uuid": uuid},
                             {"$set": {"user_email": label}})
    print(ur.raw_result)
    assert(ur.matched_count == 1)
    assert(ur.modified_count == 1)
# extract_timeline_for_day_range_and_user.py script
# The channel is stored in the "client" field of the profile

import emission.core.wrapper.user as ecwu

import sys
import argparse
import logging
import json
import bson.json_util as bju

import emission.core.get_database as edb

if __name__ == '__main__':
    logging.basicConfig(level=logging.DEBUG)
    parser = argparse.ArgumentParser(prog="get_users_for_channel")

    parser.add_argument("channel", help="the channel that the users signed in to")
    parser.add_argument("-o", "--outfile", help="the output filename (default: stdout)")

    args = parser.parse_args()

    matched_profiles_it = edb.get_profile_db().find({"client": args.channel})
    matched_uuids_it = [p["user_id"] for p in matched_profiles_it]
    matched_email2uuid_it = [edb.get_uuid_db().find_one({"uuid": u}) for u in matched_uuids_it]

    logging.debug("Mapped %d entries for channel %s" % (len(matched_email2uuid_it), args.channel)) 

    out_fd = sys.stdout if args.outfile is None else open(args.outfile, "w")
    json.dump(matched_email2uuid_it, out_fd, default=bju.default)
Пример #34
0
 def unregister(userEmail):
     user = User.fromEmail(userEmail)
     uuid = user.uuid
     get_uuid_db().remove({'user_email': userEmail})
     get_profile_db().remove({'user_id': uuid})
     return uuid
        help="after how many lines we should print a status message.")

    parser.add_argument("-i", "--info-only", default=False, action='store_true',
        help="only print entry analysis")

    parser.add_argument("-p", "--pipeline-purge", default=False, action='store_true',
        help="purge the pipeline state as well")

    args = parser.parse_args()
    fn = args.timeline_filename
    logging.info("Loading file or prefix %s" % fn)
    sel_file_list = common.read_files_with_prefix(fn)

    ts_db = edb.get_timeseries_db()
    ats_db = edb.get_analysis_timeseries_db()
    udb = edb.get_uuid_db()
    psdb = edb.get_pipeline_state_db()
    db_array = [ts_db, ats_db, udb, psdb]

    for i, filename in enumerate(sel_file_list):
        if "pipelinestate" in filename:
            continue

        logging.info("=" * 50)
        logging.info("Deleting data from file %s" % filename)

        entries = json.load(gzip.open(filename), object_hook = bju.object_hook)

        # Obtain uuid and rerun information from entries
        curr_uuid_list, needs_rerun = common.analyse_timeline(entries)
        if len(curr_uuid_list) > 1:
Пример #36
0
def calculate_single_suggestion(uuid):
    #Given a single UUID, create a suggestion for them
    return_obj = {
        'message': "Good job walking and biking! No suggestion to show.",
        'savings': "0",
        'start_lat': '0.0',
        'start_lon': '0.0',
        'end_lat': '0.0',
        'end_lon': '0.0',
        'method': 'bike'
    }
    all_users = pd.DataFrame(
        list(edb.get_uuid_db().find({}, {
            "uuid": 1,
            "_id": 0
        })))
    user_id = all_users.iloc[all_users[all_users.uuid == uuid].index.tolist()
                             [0]].uuid
    time_series = esta.TimeSeries.get_time_series(user_id)
    cleaned_sections = time_series.get_data_df("analysis/inferred_section",
                                               time_query=None)
    suggestion_trips = edb.get_suggestion_trips_db()
    #Go in reverse order because we check by most recent trip
    counter = 40
    if len(cleaned_sections) == 0:
        return_obj[
            'message'] = 'Suggestions will appear once you start taking trips!'
        return return_obj
    for i in range(len(cleaned_sections) - 1, -1, -1):
        counter -= 1
        if counter < 0:
            #Iterate 20 trips back
            return return_obj
        if cleaned_sections.iloc[i]["end_ts"] - cleaned_sections.iloc[i][
                "start_ts"] < 5 * 60:
            continue
        distance_in_miles = cleaned_sections.iloc[i]["distance"] * 0.000621371
        mode = cleaned_sections.iloc[i]["sensed_mode"]
        start_loc = cleaned_sections.iloc[i]["start_loc"]["coordinates"]
        start_lat = str(start_loc[0])
        start_lon = str(start_loc[1])
        trip_id = cleaned_sections.iloc[i]['trip_id']
        tripDict = suggestion_trips.find_one({'uuid': uuid})
        end_loc = cleaned_sections.iloc[i]["end_loc"]["coordinates"]
        end_lat = str(end_loc[0])
        end_lon = str(end_loc[1])
        if mode == 5 and distance_in_miles >= 5 and distance_in_miles <= 15:
            logging.debug("15 >= distance >= 5 so I'm considering distance: " +
                          str(distance_in_miles))
            #Suggest bus if it is car and distance between 5 and 15
            default_message = return_obj['message']
            try:
                message = "Try public transportation from " + return_address_from_location(start_lon + "," + start_lat) + \
                " to " + return_address_from_location(end_lon + "," + end_lat) + " (tap me to view)"
                #savings per month, .465 kg co2/mile for car, 0.14323126 kg co2/mile for bus
                savings = str(
                    int(distance_in_miles * 30 * .465 -
                        0.14323126 * distance_in_miles * 30))
                return {
                    'message': message,
                    'savings': savings,
                    'start_lat': start_lat,
                    'start_lon': start_lon,
                    'end_lat': end_lat,
                    'end_lon': end_lon,
                    'method': 'public'
                }
                insert_into_db(tripDict, trip_id, suggestion_trips, uuid)
                break
            except ValueError as e:
                return_obj['message'] = default_message
                continue
        elif (mode == 5 or mode == 3 or mode
              == 4) and (distance_in_miles < 5 and distance_in_miles >= 1):
            logging.debug("5 > distance >= 1 so I'm considering distance: " +
                          str(distance_in_miles))
            #Suggest bike if it is car/bus/train and distance between 5 and 1
            try:
                message = "Try biking from " + return_address_from_location(start_lon + "," + start_lat) + \
                " to " + return_address_from_location(end_lon + "," + end_lat) + " (tap me to view)"
                savings = str(int(distance_in_miles * 30 *
                                  .465))  #savings per month, .465 kg co2/mile
                insert_into_db(tripDict, trip_id, suggestion_trips, uuid)
                return {
                    'message': message,
                    'savings': savings,
                    'start_lat': start_lat,
                    'start_lon': start_lon,
                    'end_lat': end_lat,
                    'end_lon': end_lon,
                    'method': 'bike'
                }
                break
            except:
                continue
        elif (mode == 5 or mode == 3 or mode == 4) and (distance_in_miles < 1):
            logging.debug("1 > distance so I'm considering distance: " +
                          str(distance_in_miles))
            #Suggest walking if it is car/bus/train and distance less than 1
            try:
                message = "Try walking/biking from " + return_address_from_location(start_lon + "," + start_lat) + \
                " to " + return_address_from_location(end_lon + "," + end_lat) + " (tap me to view)"
                savings = str(int(distance_in_miles * 30 *
                                  .465))  #savings per month, .465 kg co2/mile
                insert_into_db(tripDict, trip_id, suggestion_trips, uuid)
                return {
                    'message': message,
                    'savings': savings,
                    'start_lat': start_lat,
                    'start_lon': start_lon,
                    'end_lat': end_lat,
                    'end_lon': end_lon,
                    'method': 'walk'
                }
                break
            except:
                continue
    return return_obj
Пример #37
0
def calculate_yelp_server_suggestion(uuid):
    #Given a single UUID, create a suggestion for them
    return_obj = {
        'message': "Good job walking and biking! No suggestion to show.",
        'savings': "0",
        'start_lat': '0.0',
        'start_lon': '0.0',
        'end_lat': '0.0',
        'end_lon': '0.0',
        'method': 'bike'
    }
    all_users = pd.DataFrame(
        list(edb.get_uuid_db().find({}, {
            "uuid": 1,
            "_id": 0
        })))
    user_id = all_users.iloc[all_users[all_users.uuid == uuid].index.tolist()
                             [0]].uuid
    time_series = esta.TimeSeries.get_time_series(user_id)
    cleaned_sections = time_series.get_data_df("analysis/cleaned_trip",
                                               time_query=None)
    yelp_suggestion_trips = edb.get_yelp_db()
    # print(cleaned_sections)
    #Go in reverse order because we check by most recent trip
    counter = 40
    if len(cleaned_sections) == 0:
        return_obj[
            'message'] = 'Suggestions will appear once you start taking trips!'
        return return_obj
    for i in range(len(cleaned_sections) - 1, -1, -1):
        counter -= 1
        if counter < 0:
            return return_obj
        #NOT QUITE SURE WHAT THIS LINE OF CODE IS SUPPOSED TO DO?
        if cleaned_sections.iloc[i]["end_ts"] - cleaned_sections.iloc[i][
                "start_ts"] < 5 * 60:
            continue
        #Change distance in meters to miles
        distance_in_miles = cleaned_sections.iloc[i]["distance"] * 0.000621371
        mode = cleaned_sections.iloc[i]["sensed_mode"]
        start_loc = cleaned_sections.iloc[i]["start_loc"]["coordinates"]
        start_lon = str(start_loc[0])
        start_lat = str(start_loc[1])
        start_lat_lon = start_lat + ',' + start_lon
        trip_id = cleaned_sections.iloc[i]['trip_id']
        tripDict = yelp_suggestion_trips.find_one({'uuid': uuid})
        #print(tripDict)
        end_loc = cleaned_sections.iloc[i]["end_loc"]["coordinates"]
        end_lon = str(end_loc[0])
        end_lat = str(end_loc[1])
        end_lat_lon = end_lat + ',' + end_lon
        print(end_lat_lon)
        endpoint_categories = category_of_business(end_lat_lon)
        business_locations = {}
        if len(return_address_from_location_yelp(start_lat_lon)) == 1:
            begin_address = return_address_from_location_yelp(start_lat_lon)
        else:
            begin_address = return_address_from_location_yelp(start_lat_lon)[2]
        if len(return_address_from_location_yelp(end_lat_lon)) == 1:
            continue
        city = return_address_from_location_yelp(end_lat_lon)[1]
        address = return_address_from_location_yelp(end_lat_lon)[2]
        #ALREADY CALCULATED BY DISTANCE_IN_MILES
        #comp_distance = distance(dummy, end_lat_lon)
        location_review = review_start_loc(end_lat_lon)
        ratings_bus = {}
        error_message = 'Sorry, unable to retrieve datapoint'
        error_message_categor = 'Sorry, unable to retrieve datapoint because datapoint is a house or datapoint does not belong in service categories'
        if (endpoint_categories):
            for categor in endpoint_categories:
                queried_bus = search(API_KEY, categor, city)['businesses']
                for q in queried_bus:
                    if q['rating'] >= location_review:
                        #'Coordinates' come out as two elements, latitude and longitude
                        ratings_bus[q['name']] = q['rating']
                        obtained = q['location']['display_address'][0] + q[
                            'location']['display_address'][1]
                        obtained.replace(' ', '+')
                        business_locations[q['name']] = obtained
        else:
            return {'message': error_message_categor, 'method': 'bike'}
        for a in business_locations:
            calculate_distance = distance(start_lat_lon, business_locations[a])
            #Will check which mode the trip was taking for the integrated calculate yelp suggestion
            if calculate_distance < distance_in_miles and calculate_distance < 5 and calculate_distance >= 1:
                try:
                    message = "Why didn't you bike from " + begin_address + " to " + a + " (tap me to view) " + a + \
                    " has better reviews, closer to your original starting point, and has a rating of " + str(ratings_bus[a])
                    #Not sure to include the amount of carbon saved
                    #Still looking to see what to return with this message, because currently my latitude and longitudes are stacked together in one string
                    insert_into_db(tripDict, trip_id, yelp_suggestion_trips,
                                   uuid)
                    return {'message': message, 'method': 'bike'}

                    #insert_into_db(tripDict, trip_id, suggestion_trips, uuid)
                    break
                except ValueError as e:
                    continue
            elif calculate_distance < distance_in_miles and calculate_distance < 1:
                try:
                    message = "Why didn't you walk from " + begin_address + " to " + a + " (tap me to view) " + a + \
                    " has better reviews, closer to your original starting point, and has a rating of " + str(ratings_bus[a])
                    insert_into_db(tripDict, trip_id, yelp_suggestion_trips,
                                   uuid)
                    return {'message': message, 'method': 'walk'}
                    break
                except ValueError as e:
                    continue
            elif calculate_distance < distance_in_miles and calculate_distance >= 5 and calculate_distance <= 15:
                try:
                    message = "Why didn't you check out public transportation from " + begin_address + " to " + a + " (tap me to view) " + a + \
                    " has better reviews, closer to your original starting point, and has a rating of " + str(ratings_bus[a])
                    insert_into_db(tripDict, trip_id, yelp_suggestion_trips,
                                   uuid)
                    return {'message': message, 'method': 'public'}
                    break
                except ValueError as e:
                    continue
Пример #38
0
def get_app_analytics():
    df = pd.DataFrame()
    for user in edb.get_uuid_db().find():
        user_df = esta.TimeSeries.get_time_series(user['uuid']).get_data_df(
            "stats/server_api_time", time_query=None)
        if not user_df.empty:
            df = df.append(user_df, ignore_index=True)

    df['datetime'] = df.ts.apply(lambda ts: dt.datetime.fromtimestamp(ts))
    df.ix[df.reading > 1, 'reading'] = 1
    fig, ax = plt.subplots()
    ax.xaxis.set_major_locator(mdates.WeekdayLocator())
    ax.xaxis.set_major_formatter(mdates.DateFormatter("%m/%d/%Y"))
    plt.ylabel('Response time')

    dashboard_df = df[df.name == "POST_/result/metrics/timestamp"]
    dashboard_df.plot(x="datetime", y="reading", ax=ax, style='+', legend=None)
    plt.title('Dashboard')
    fig.savefig('Dashboard.png')
    plt.close(fig)

    fig, ax = plt.subplots()
    cache_put_df = df[df.name == "POST_/usercache/put"]
    cache_put_df.plot(x="datetime", y="reading", ax=ax, style='+', legend=None)
    plt.title('Usercache_put')
    fig.savefig('Usercache_put.png')
    plt.close(fig)

    fig, ax = plt.subplots()
    cache_get_df = df[df.name == "POST_/usercache/get"]
    cache_get_df.plot(x="datetime", y="reading", ax=ax, style='+', legend=None)
    plt.title('Usercache_get')
    fig.savefig('Usercache_get.png')
    plt.close(fig)

    fig, ax = plt.subplots()
    stats_set_df = df[df.name == "POST_/stats/set"]
    stats_set_df.plot(x="datetime", y="reading", ax=ax, style='+', legend=None)
    plt.title('Stats_set')
    fig.savefig('Stats_set.png')
    plt.close(fig)

    fig, ax = plt.subplots()
    habitica_intro_df = df[df.name == "POST_/habiticaRegister"]
    habitica_intro_df.plot(x="datetime",
                           y="reading",
                           ax=ax,
                           style='+',
                           legend=None)
    plt.title('Habitica Sign up and Login')
    fig.savefig('Habitica Sign up_Login.png')
    plt.close(fig)

    fig, ax = plt.subplots()
    habitica_df = df[df.name == "POST_/habiticaProxy"]
    habitica_df.plot(x="datetime", y="reading", ax=ax, style='+', legend=None)
    plt.title('Habitica')
    fig.savefig('Habitica.png')
    plt.close(fig)

    fig, ax = plt.subplots()
    diary_df = df[df.name.str.contains("POST_/timeline/getTrips")]
    diary_df.plot(x="datetime", y="reading", ax=ax, style='+', legend=None)
    plt.title('Diary')
    fig.savefig('Diary.png')
    plt.close(fig)
    return
 def clearRelatedDb(self):
     edb.get_timeseries_db().remove({"user_id": {"$in": self.testUUIDList}})
     edb.get_analysis_timeseries_db().remove({"user_id": {"$in": self.testUUIDList}})
     edb.get_usercache_db().remove({"user_id": {"$in": self.testUUIDList}})
     edb.get_uuid_db().remove({"user_id": {"$in": self.testUUIDList}})
Пример #40
0
def find_inactive_users():
    inactive_users = []
    inactive_users_new_consent = ""
    inactive_users_old_consent = ""
    inactive_users_before_september = ""
    inactive_users_after_september = ""
    one_week_ago_ts = arrow.utcnow().replace(weeks=-1).timestamp
    september_first = arrow.get('2016-09-01').timestamp
    for user in edb.get_uuid_db().find():
        db = esta.TimeSeries.get_time_series(user['uuid']).get_data_df(
            "stats/server_api_time", time_query=None)
        new_consent = esta.TimeSeries.get_time_series(
            user['uuid']).get_data_df("config/consent", time_query=None)
        signup_date = arrow.get(user['update_ts'])
        if db.empty:
            inactive_users.append((user['user_email'], signup_date.date(), ()))
            if new_consent.empty:
                inactive_users_new_consent += str(user['user_email']) + ', '
            else:
                inactive_users_old_consent += str(user['user_email']) + ', '
            if signup_date.timestamp < september_first:
                inactive_users_before_september += str(
                    user['user_email']) + ', '
            else:
                inactive_users_after_september += str(
                    user['user_email']) + ', '
        else:
            #check last usercache call:
            #the user is inactive if there are no calls or if the last one was before one_week_ago_ts
            last_usercache_call = db[db['name'].str.contains(
                'usercache', case=False)].tail(1)
            if last_usercache_call.empty:
                inactive_users.append(
                    (user['user_email'], signup_date.date(), ()))
                if new_consent.empty:
                    inactive_users_new_consent += str(
                        user['user_email']) + ', '
                else:
                    inactive_users_old_consent += str(
                        user['user_email']) + ', '
                if signup_date.timestamp < september_first:
                    inactive_users_before_september += str(
                        user['user_email']) + ', '
                else:
                    inactive_users_after_september += str(
                        user['user_email']) + ', '
            else:
                if last_usercache_call.iloc[0]['ts'] < one_week_ago_ts:
                    inactive_users.append(
                        (user['user_email'], signup_date.date(),
                         arrow.get(last_usercache_call.iloc[0]['ts']).date()))
                    if new_consent.empty:
                        inactive_users_new_consent += str(
                            user['user_email']) + ', '
                    else:
                        inactive_users_old_consent += str(
                            user['user_email']) + ', '
                    if signup_date.timestamp < september_first:
                        inactive_users_before_september += str(
                            user['user_email']) + ', '
                    else:
                        inactive_users_after_september += str(
                            user['user_email']) + ', '
    inactive_users_table = pd.DataFrame(
        inactive_users,
        columns=['Email', 'Last Sign Up Date', 'Last Usercache Call'])
    print "\nList of inactive users emails and date they signed up:"
    print inactive_users_table
    print "\nEmails of inactive users who consented to the new IRB protocol:"
    print inactive_users_new_consent[:-2]
    print "\nEmails of inactive users who did not consent to the new IRB protocol:"
    print inactive_users_old_consent[:-2]
    print "\nEmails of inactive users who signed up before September 1st:"
    print inactive_users_before_september[:-2]
    print "\nEmails of inactive users who signed up after September 1st:"
    print inactive_users_after_september[:-2]
    return
    def testQueryMatching(self):
        # Load data for the Bay Area
        dataFileba = "emission/tests/data/real_examples/shankari_2016-06-20"
        ldba = ecwl.LocalDate({'year': 2016, 'month': 6, 'day': 20})

        etc.setupRealExample(self, dataFileba)
        testUUIDba = self.testUUID
        edb.get_uuid_db().insert({"uuid": testUUIDba, "user_email": "*****@*****.**"})
        etc.runIntakePipeline(testUUIDba)
        logging.debug("uuid for the bay area = %s " % testUUIDba)

        # Load data for Hawaii
        dataFilehi = "emission/tests/data/real_examples/shankari_2016-07-27"
        ldhi = ecwl.LocalDate({'year': 2016, 'month': 7, 'day': 27})

        etc.setupRealExample(self, dataFilehi)
        testUUIDhi = self.testUUID
        edb.get_uuid_db().insert({"uuid": testUUIDhi, "user_email": "*****@*****.**"})
        etc.runIntakePipeline(testUUIDhi)

        logging.debug("uuid for hawaii = %s " % testUUIDhi)

        self.testUUIDList = [testUUIDba, testUUIDhi]

        air_query_spec = {
            "time_type": "local_date",
            "from_local_date": { "year": 2016, "month": 2},
            "to_local_date": { "year": 2016, "month": 9},
            "freq": 'DAILY',
            "checks": [
                {
                    "modes": ['WALKING', 'ON_FOOT'],
                    "metric": "count",
                    "threshold": {"$gt": 5}
                },
                {
                    "modes": ['AIR_OR_HSR'],
                    "metric": "count",
                    "threshold": {"$gt": 1}
                }
            ]
        }

        # Since this requires at least one air trip, this will only return the
        # hawaii trip
        self.assertEqual(tripmetrics.query(air_query_spec), [testUUIDhi])

        walk_drive_spec = {
            "time_type": "local_date",
            "from_local_date": { "year": 2016, "month": 2},
            "to_local_date": { "year": 2016, "month": 9},
            "freq": 'DAILY',
            "checks": [
                {
                    "modes": ['WALKING', 'ON_FOOT'],
                    "metric": "count",
                    "threshold": {"$gt": 5}
                },
                {
                    "modes": ['IN_VEHICLE'],
                    "metric": "count",
                    "threshold": {"$gt": 1}
                }
            ]
        }

        # Since this only requires walk and bike, will return both trips
        # We can't just do a simple equals check since the uuids may not always
        # be returned in the same order
        walk_drive_result = tripmetrics.query(walk_drive_spec)
        self.assertEqual(len(walk_drive_result), 2)
        self.assertIn(testUUIDhi, walk_drive_result)
        self.assertIn(testUUIDba, walk_drive_result)
Пример #42
0
import argparse
import sys
import logging

import emission.core.get_database as edb
import emission.net.ext_service.habitica.proxy as proxy

if __name__ == '__main__':
    logging.basicConfig(level=logging.DEBUG)

    parser = argparse.ArgumentParser()
    parser.add_argument(
        "user_email",
        help=
        "the email address of the user whose habitica account you want to clean up"
    )

    args = parser.parse_args()
    del_uuid = edb.get_uuid_db().find_one({'user_email':
                                           args.user_email})['uuid']
    logging.debug("Found uuid %s" % del_uuid)
    del_habitica_creds = edb.get_habitica_db().find_one({'user_id': del_uuid})
    logging.debug("del_habitica_creds = %s" % del_habitica_creds)
    del_result = proxy.habiticaProxy(
        del_uuid, "DELETE", "/api/v3/user",
        {'password': del_habitica_creds['habitica_password']})
    logging.debug("delete result = %s" % del_result)
                        help="only print entry analysis")

    parser.add_argument("-p",
                        "--pipeline-purge",
                        default=False,
                        action='store_true',
                        help="purge the pipeline state as well")

    args = parser.parse_args()
    fn = args.timeline_filename
    logging.info("Loading file or prefix %s" % fn)
    sel_file_list = common.read_files_with_prefix(fn)

    ts_db = edb.get_timeseries_db()
    ats_db = edb.get_analysis_timeseries_db()
    udb = edb.get_uuid_db()
    psdb = edb.get_pipeline_state_db()

    for i, filename in enumerate(sel_file_list):
        logging.info("=" * 50)
        logging.info("Deleting data from file %s" % filename)

        entries = json.load(gzip.open(filename), object_hook=bju.object_hook)

        # Obtain uuid and rerun information from entries
        curr_uuid_list, needs_rerun = common.analyse_timeline(entries)
        if len(curr_uuid_list) > 1:
            logging.warning("Found %d users, %s in filename, aborting! " %
                            (len(curr_uuid_list), curr_uuid_list))
            raise RuntimeException(
                "Found %d users, %s in filename, expecting 1, %s" %
import argparse
import logging
import json
import bson.json_util as bju

import emission.core.get_database as edb

if __name__ == '__main__':
    logging.basicConfig(level=logging.DEBUG)
    parser = argparse.ArgumentParser(prog="get_users_for_channel")

    parser.add_argument("channel",
                        help="the channel that the users signed in to")
    parser.add_argument("-o",
                        "--outfile",
                        help="the output filename (default: stdout)")

    args = parser.parse_args()

    matched_profiles_it = edb.get_profile_db().find({"client": args.channel})
    matched_uuids_it = [p["user_id"] for p in matched_profiles_it]
    matched_email2uuid_it = [
        edb.get_uuid_db().find_one({"uuid": u}) for u in matched_uuids_it
    ]

    logging.debug("Mapped %d entries for channel %s" %
                  (len(matched_email2uuid_it), args.channel))

    out_fd = sys.stdout if args.outfile is None else open(args.outfile, "w")
    json.dump(matched_email2uuid_it, out_fd, default=bju.default)
import emission.core.get_database as edb
import emission.storage.timeseries.aggregate_timeseries as estag


def reset_collection(coll, old_uuid, new_uuid):
    logging.debug(
        coll.update({"user_id": user.uuid}, {"$set": {
            "user_id": new_uuid
        }},
                    multi=True))


if __name__ == '__main__':
    logging.basicConfig(level=logging.DEBUG)
    for user_dict in edb.get_uuid_db().find():
        user = ad.AttrDict(user_dict)
        if user.uuid in estag.TEST_PHONE_IDS:
            logging.debug("Found test phone, skipping reset")
        else:
            new_uuid = uuid.uuid4()
            logging.debug("Mapping %s -> %s" % (new_uuid, user.uuid))
            edb.get_uuid_db().update({"uuid": user.uuid},
                                     {"$set": {
                                         "uuid": new_uuid
                                     }})
            logging.debug("Resetting alternatives...")
            reset_collection(edb.get_alternatives_db(), user.uuid, new_uuid)
            logging.debug("Resetting analysis...")
            reset_collection(edb.get_analysis_timeseries_db(), user.uuid,
                             new_uuid)
Пример #46
0
 def isRegistered(userEmail):
   email2UUID = get_uuid_db().find_one({'user_email': userEmail})
   if email2UUID is None:
     return False
   else:
     return True
Пример #47
0
def get_all_uuids():
    all_uuids = [e["uuid"] for e in edb.get_uuid_db().find()]
    return all_uuids
import argparse
import sys
import logging

import emission.core.get_database as edb
import emission.net.ext_service.habitica.proxy as proxy

if __name__ == '__main__':
    logging.basicConfig(level=logging.DEBUG)

    parser = argparse.ArgumentParser()
    parser.add_argument("user_email",
        help="the email address of the user whose habitica account you want to clean up")

    args = parser.parse_args()
    del_uuid = edb.get_uuid_db().find_one({'user_email': args.user_email})['uuid']
    logging.debug("Found uuid %s" % del_uuid)
    del_habitica_creds = edb.get_habitica_db().find_one({'user_id': del_uuid})
    logging.debug("del_habitica_creds = %s" % del_habitica_creds)
    del_result = proxy.habiticaProxy(del_uuid, "DELETE",
                                     "/api/v3/user",
                                     {'password': del_habitica_creds['habitica_password']})
    logging.debug("delete result = %s" % del_result)
Пример #49
0
 def precomputeResults(self):
     for user_uuid_dict in get_uuid_db().find({}, {'uuid': 1, '_id': 0}):
         logging.info("Computing precomputed results for %s" %
                      user_uuid_dict['uuid'])
         userclient.runClientSpecificBackgroundTasks(user_uuid_dict['uuid'])