def clearRelatedDb(self): edb.get_timeseries_db().remove({"user_id": self.androidUUID}) edb.get_analysis_timeseries_db().remove({"user_id": self.androidUUID}) edb.get_pipeline_state_db().remove({"user_id": self.androidUUID}) edb.get_timeseries_db().remove({"user_id": self.iosUUID}) edb.get_analysis_timeseries_db().remove({"user_id": self.iosUUID}) edb.get_pipeline_state_db().remove({"user_id": self.iosUUID})
def testEmptyCall(self): # Check call to the entire filter accuracy with a zero length timeseries import emission.core.get_database as edb edb.get_timeseries_db().remove({"user_id": self.testUUID}) # We expect that this should not throw eaicf.filter_accuracy(self.testUUID) self.assertEqual(len(self.ts.get_data_df("background/location")), 0)
def move_all_filters_to_data(): for entry in edb.get_timeseries_db().find(): if "filter" in entry["metadata"]: curr_filter = entry["metadata"]["filter"] if is_location_entry(entry): entry["data"]["filter"] = curr_filter logging.debug("for entry %s, found key %s, moved filter %s into data" % (entry["_id"], get_curr_key(entry), curr_filter)) # For all cases, including the location one, we want to delete the filter from metadata del entry["metadata"]["filter"] edb.get_timeseries_db().save(entry) logging.debug("for entry %s, for key %s, deleted filter %s from metadata" % (entry["_id"], get_curr_key(entry), curr_filter)) else: pass # logging.warning("No filter found for entry %s, skipping" % entry) if "filter" not in entry["data"] and is_location_entry(entry): # This must be an entry from before the time that we started sending # entries to the server. At that time, we only sent time entries, # so set it to time in this case entry["data"]["filter"] = "time" logging.debug("No entry found in either data or metadata, for key %s setting to 'time'" % entry["metadata"]["key"]) edb.get_timeseries_db().save(entry)
def tearDown(self): edb.get_timeseries_db().remove({"user_id": self.testUUID}) edb.get_analysis_timeseries_db().remove({"user_id": self.testUUID}) edb.get_timeseries_db().remove({"user_id": "new_fake"}) edb.get_analysis_timeseries_db().remove({"user_id": "new_fake"}) edb.get_common_trip_db().drop() edb.get_common_place_db().drop()
def tearDown(self): edb.get_timeseries_db().remove({"user_id": self.androidUUID}) edb.get_timeseries_db().remove({"user_id": self.iosUUID}) edb.get_usercache_db().remove({"user_id": self.androidUUID}) edb.get_usercache_db().remove({"user_id": self.iosUUID}) edb.get_place_db().remove() edb.get_trip_new_db().remove()
def clearRelatedDb(self): edb.get_timeseries_db().remove() edb.get_place_db().remove() edb.get_stop_db().remove() edb.get_trip_new_db().remove() edb.get_section_new_db().remove()
def clearRelatedDb(self): edb.get_timeseries_db().delete_many({"user_id": self.testUUID}) edb.get_analysis_timeseries_db().delete_many({"user_id": self.testUUID}) edb.get_pipeline_state_db().delete_many({"user_id": self.testUUID}) edb.get_timeseries_db().delete_many({"user_id": self.testUUID1}) edb.get_analysis_timeseries_db().delete_many({"user_id": self.testUUID1}) edb.get_pipeline_state_db().delete_many({"user_id": self.testUUID1})
def tearDown(self): edb.get_timeseries_db().remove({"user_id": self.androidUUID}) edb.get_timeseries_db().remove({"user_id": self.iosUUID}) edb.get_usercache_db().remove({"user_id": self.androidUUID}) edb.get_usercache_db().remove({"user_id": self.iosUUID}) edb.get_analysis_timeseries_db().remove({"user_id": self.androidUUID}) edb.get_analysis_timeseries_db().remove({"user_id": self.iosUUID})
def getPublicData(): ids = request.json['phone_ids'] all_uuids = map(lambda id: UUID(id), ids) uuids = [uuid for uuid in all_uuids if uuid in estag.TEST_PHONE_IDS] from_ts = request.query.from_ts to_ts = request.query.to_ts time_range = estt.TimeQuery("metadata.write_ts", float(from_ts), float(to_ts)) time_query = time_range.get_query() user_queries = map(lambda id: {'user_id': id}, uuids) for q in user_queries: q.update(time_query) num_entries = map(lambda q: edb.get_timeseries_db().find(q).count(), user_queries) total_entries = sum(num_entries) logging.debug("Total entries requested: %d" % total_entries) threshold = 200000 if total_entries > threshold: data_list = None else: data_list = map( lambda q: list(edb.get_timeseries_db().find(q).sort( "metadata.write_ts")), user_queries) return {'phone_data': data_list}
def clearRelatedDb(self): edb.get_timeseries_db().remove({'user_id': self.testUUID}) edb.get_place_db().remove({'user_id': self.testUUID}) edb.get_stop_db().remove({'user_id': self.testUUID}) edb.get_trip_new_db().remove({'user_id': self.testUUID}) edb.get_section_new_db().remove({'user_id': self.testUUID})
def clearRelatedDb(self): edb.get_timeseries_db().remove({"user_id": {"$in": self.testUUIDList}}) edb.get_analysis_timeseries_db().remove( {"user_id": { "$in": self.testUUIDList }}) edb.get_usercache_db().remove({"user_id": {"$in": self.testUUIDList}})
def testLocalRangeRolloverQuery(self): """ Search for all entries between 8:18 and 8:20 local time, both inclusive """ start_local_dt = ecwl.LocalDate({ 'year': 2015, 'month': 8, 'hour': 8, 'minute': 18 }) end_local_dt = ecwl.LocalDate({ 'year': 2015, 'month': 8, 'hour': 9, 'minute': 8 }) final_query = {"user_id": self.testUUID} final_query.update( esdl.get_range_query("data.local_dt", start_local_dt, end_local_dt)) entries = edb.get_timeseries_db().find(final_query).sort( 'data.ts', pymongo.ASCENDING) self.assertEqual(448, edb.get_timeseries_db().count_documents(final_query)) entries_list = list(entries) # Note that since this is a set of filters, as opposed to a range, this # returns all entries between 18 and 8 in both hours. # so 8:18 is valid, but so is 9:57 self.assertEqual(ecwe.Entry(entries_list[0]).data.local_dt.hour, 8) self.assertEqual(ecwe.Entry(entries_list[0]).data.local_dt.minute, 18) self.assertEqual(ecwe.Entry(entries_list[-1]).data.local_dt.hour, 9) self.assertEqual(ecwe.Entry(entries_list[-1]).data.local_dt.minute, 57)
def tearDown(self): os.remove(self.analysis_conf_path) edb.get_timeseries_db().remove({"user_id": self.androidUUID}) edb.get_timeseries_db().remove({"user_id": self.iosUUID}) edb.get_pipeline_state_db().remove({"user_id": self.androidUUID}) edb.get_pipeline_state_db().remove({"user_id": self.iosUUID}) edb.get_analysis_timeseries_db().remove({"user_id": self.androidUUID}) edb.get_analysis_timeseries_db().remove({"user_id": self.iosUUID})
def loadPointsForTrip(self, trip_id): import emission.core.get_database as edb entries = json.load(open("emission/tests/data/smoothing_data/%s" % trip_id), object_hook=bju.object_hook) for entry in entries: entry["user_id"] = self.testUUID edb.get_timeseries_db().save(entry)
def clearRelatedDb(self): edb.get_timeseries_db().delete_many({"user_id": self.testUUID}) edb.get_analysis_timeseries_db().delete_many( {"user_id": self.testUUID}) edb.get_pipeline_state_db().delete_many({"user_id": self.testUUID}) edb.get_timeseries_db().delete_many({"user_id": self.testUUID1}) edb.get_analysis_timeseries_db().delete_many( {"user_id": self.testUUID1}) edb.get_pipeline_state_db().delete_many({"user_id": self.testUUID1})
def convert_collection(collection, key): result_cursor = collection.find() logging.info("About to convert %s entries" % result_cursor.count()) for i, wrapper in enumerate(result_cursor): entry = convert_wrapper_to_entry(key, wrapper) if entry.get_id() != wrapper["_id"]: logging.warn("entry.id = %s, wrapper.id = %s" % (entry.get_id(), wrapper["_id"])) if i % 10000 == 0: print("converted %s -> %s" % (wrapper, entry)) edb.get_timeseries_db().insert(entry)
def testInsertFilters(self): edb.get_timeseries_db().remove({"user_id": self.testUUID, "metadata.key": "background/filtered_location"}) for entry in edb.get_timeseries_db().find({'user_id': self.testUUID, 'metadata.filter': 'time', "metadata.key": "background/location"}): del entry["_id"] del entry["metadata"]["filter"] entry["metadata"]["key"] = "background/filtered_location" edb.get_timeseries_db().insert(entry) # At this point, all the filtered_location entries will not have any filters self.assertEquals(edb.get_timeseries_db().find({'user_id': self.testUUID, 'metadata.filter': 'time', "metadata.key": "background/filtered_location"}).count(), 0) self.assertEquals(edb.get_timeseries_db().find({'user_id': self.testUUID, 'metadata.filter': 'time', "metadata.key": "background/filtered_location"}).count(), 0) # Now, move all filters estfm.move_all_filters_to_data() # The entries should now be set to "time" # At this point, all the filtered_location entries will not have any filters self.assertEquals(edb.get_timeseries_db().find({'user_id': self.testUUID, 'data.filter': 'distance', "metadata.key": "background/filtered_location"}).count(), 0) self.assertEquals(edb.get_timeseries_db().find({'user_id': self.testUUID, 'data.filter': 'time', "metadata.key": "background/filtered_location"}).count(), 738)
def testOneOverride(self): cfg_1 = copy.copy(self.dummy_config) cfg_1['metadata']['write_ts'] = 1440700000 edb.get_timeseries_db().insert(cfg_1) tq = estt.TimeQuery("metadata.write_ts", 1440658800, 1440745200) eacc.save_all_configs(self.androidUUID, tq) saved_entries = list(edb.get_usercache_db().find({'user_id': self.androidUUID, 'metadata.key': 'config/sensor_config'})) self.assertEqual(len(saved_entries), 1) logging.debug(saved_entries[0]) self.assertEqual(saved_entries[0]['data']['is_duty_cycling'], cfg_1['data']['is_duty_cycling'])
def insert(self, entry): """ """ logging.debug("insert called") if "user_id" not in entry: entry["user_id"] = self.user_id elif entry["user_id"] != self.user_id: raise AttributeError("Saving entry for %s in timeseries for %s" % (entry["user_id"], self.user_id)) else: logging.debug("entry was fine, no need to fix it") logging.debug("Inserting entry %s into timeseries" % entry) edb.get_timeseries_db().insert(entry)
def setupRealExampleWithEntries(testObj): tsdb = edb.get_timeseries_db() for entry in testObj.entries: entry["user_id"] = testObj.testUUID # print "Saving entry with write_ts = %s and ts = %s" % (entry["metadata"]["write_fmt_time"], # entry["data"]["fmt_time"]) edb.save(tsdb, entry) logging.info("After loading, timeseries db size = %s" % edb.get_timeseries_db().count()) logging.debug("First few entries = %s" % [e["data"]["fmt_time"] if "fmt_time" in e["data"] else e["metadata"]["write_fmt_time"] for e in list(edb.get_timeseries_db().find({"user_id": testObj.testUUID}).sort("data.write_ts", pymongo.ASCENDING).limit(10))])
def testOldOverride(self): cfg_1 = copy.copy(self.dummy_config) cfg_1['metadata']['write_ts'] = 1440500000 edb.get_timeseries_db().insert(cfg_1) cfg_2 = copy.copy(self.dummy_config) cfg_2['metadata']['write_ts'] = 1440610000 edb.get_timeseries_db().insert(cfg_2) tq = estt.TimeQuery("metadata.write_ts", 1440658800, 1440745200) eacc.save_all_configs(self.androidUUID, tq) saved_entries = list(edb.get_usercache_db().find({'user_id': self.androidUUID, 'metadata.key': 'config/sensor_config'})) self.assertEqual(len(saved_entries), 0)
def setupRealExampleWithEntries(testObj): tsdb = edb.get_timeseries_db() for entry in testObj.entries: entry["user_id"] = testObj.testUUID # print "Saving entry with write_ts = %s and ts = %s" % (entry["metadata"]["write_fmt_time"], # entry["data"]["fmt_time"]) tsdb.save(entry) logging.info("After loading, timeseries db size = %s" % edb.get_timeseries_db().count()) logging.debug("First few entries = %s" % [e["data"]["fmt_time"] if "fmt_time" in e["data"] else e["metadata"]["write_fmt_time"] for e in list(edb.get_timeseries_db().find({"user_id": testObj.testUUID}).sort("data.write_ts", pymongo.ASCENDING).limit(10))])
def setUp(self): # We need to access the database directly sometimes in order to # forcibly insert entries for the tests to pass. But we put the import # in here to reduce the temptation to use the database directly elsewhere. import emission.core.get_database as edb import uuid self.testUUID = uuid.uuid4() self.entries = json.load(open("emission/tests/data/smoothing_data/tablet_2015-11-03"), object_hook=bju.object_hook) for entry in self.entries: entry["user_id"] = self.testUUID edb.get_timeseries_db().save(entry) self.ts = esta.TimeSeries.get_time_series(self.testUUID)
def clearRelatedDb(self): edb.get_timeseries_db().delete_many( {"user_id": { "$in": self.testUUIDList }}) edb.get_analysis_timeseries_db().delete_many( {"user_id": { "$in": self.testUUIDList }}) edb.get_usercache_db().delete_many( {"user_id": { "$in": self.testUUIDList }}) edb.get_uuid_db().delete_many({"user_id": {"$in": self.testUUIDList}})
def setUp(self): # We need to access the database directly sometimes in order to # forcibly insert entries for the tests to pass. But we put the import # in here to reduce the temptation to use the database directly elsewhere. import emission.core.get_database as edb import uuid self.testUUID = uuid.uuid4() self.entries = json.load( open("emission/tests/data/smoothing_data/tablet_2015-11-03"), object_hook=bju.object_hook) for entry in self.entries: entry["user_id"] = self.testUUID edb.get_timeseries_db().save(entry) self.ts = esta.TimeSeries.get_time_series(self.testUUID)
def setupRealExample(testObj, dump_file): logging.info("Before loading, timeseries db size = %s" % edb.get_timeseries_db().count()) testObj.entries = json.load(open(dump_file), object_hook = bju.object_hook) testObj.testUUID = uuid.uuid4() for entry in testObj.entries: entry["user_id"] = testObj.testUUID # print "Saving entry with write_ts = %s and ts = %s" % (entry["metadata"]["write_fmt_time"], # entry["data"]["fmt_time"]) edb.get_timeseries_db().save(entry) logging.info("After loading, timeseries db size = %s" % edb.get_timeseries_db().count()) logging.debug("First few entries = %s" % [e["data"]["fmt_time"] for e in list(edb.get_timeseries_db().find({"user_id": testObj.testUUID}).sort("data.write_ts", pymongo.ASCENDING).limit(10))])
def post_check(unique_user_list, all_rerun_list): import emission.core.get_database as edb import numpy as np logging.info( "For %s users, loaded %s raw entries, %s processed entries and %s pipeline states" % (len(unique_user_list), edb.get_timeseries_db().count_documents( {"user_id": { "$in": list(unique_user_list) }}), edb.get_analysis_timeseries_db().count_documents( {"user_id": { "$in": list(unique_user_list) }}), edb.get_pipeline_state_db().count_documents({ "user_id": { "$in": list(unique_user_list) } }))) all_rerun_arr = np.array(all_rerun_list) # want to check if no entry needs a rerun? In this case we are done # no entry needs a rerun = all entries are false, not(all entries) are true if np.all(np.logical_not(all_rerun_list)): logging.info( "all entries in the timeline contain analysis results, no need to run the intake pipeline" ) # if all entries need to be re-run, we must have had raw data throughout elif np.all(all_rerun_list): logging.info( "all entries in the timeline contain only raw data, need to run the intake pipeline" ) else: logging.info( "timeline contains a mixture of analysis results and raw data - complain to shankari!" )
def fix_trips_or_sections(collection): tsdb = edb.get_timeseries_db() for entry in collection.find(): start_loc_entry = tsdb.find_one({'user_id': entry['user_id'], 'metadata.key': 'background/location', 'data.ts': entry['start_ts']}) end_loc_entry = tsdb.find_one({'user_id': entry['user_id'], 'metadata.key': 'background/location', 'data.ts': entry['end_ts']}) if start_loc_entry is not None: start_tz = start_loc_entry['metadata']['time_zone'] else: logging.warn("No start_loc_entry found for trip %s, returning default" % entry) start_tz = "America/Los_Angeles" if end_loc_entry is not None: end_tz = end_loc_entry['metadata']['time_zone'] else: logging.warn("No end_loc_entry found for trip %s, returning default" % entry) end_tz = "America/Los_Angeles" logging.debug("Found entries with metadata = %s, %s" % (start_tz, end_tz)) entry['start_local_dt'] = get_local_date(entry['start_fmt_time'], start_tz) entry['end_local_dt'] = get_local_date(entry['end_fmt_time'], end_tz) collection.save(entry)
def fix_stops_or_places(collection): tsdb = edb.get_timeseries_db() for entry in collection.find(): if 'enter_ts' in entry: enter_loc_entry = tsdb.find_one({'user_id': entry['user_id'], 'metadata.key': 'background/location', 'data.ts': entry['enter_ts']}) if enter_loc_entry is not None: enter_tz = enter_loc_entry['metadata']['time_zone'] else: enter_tz = "America/Los_Angeles" logging.debug("entry metadata timezone = %s" % enter_tz) entry['enter_local_dt'] = get_local_date(entry['enter_fmt_time'], enter_tz) else: logging.warning("No entry timestamp found, skipping") if 'exit_ts' in entry: exit_loc_entry = tsdb.find_one({'user_id': entry['user_id'], 'metadata.key': 'background/location', 'data.ts': entry['exit_ts']}) if exit_loc_entry is not None: exit_tz = exit_loc_entry['metadata']['time_zone'] else: exit_tz = "America/Los_Angeles" logging.debug("exit metadata timezone = %s" % exit_tz) entry['exit_local_dt'] = get_local_date(entry['exit_fmt_time'], exit_tz) else: logging.warning("No exit timestamp found, skipping") collection.save(entry)
def setUp(self): etc.setupRealExample( self, "emission/tests/data/real_examples/shankari_2015-aug-27") # eaicf.filter_accuracy(self.testUUID) etc.runIntakePipeline(self.testUUID) # estfm.move_all_filters_to_data() logging.info("After loading, timeseries db size = %s" % edb.get_timeseries_db().count()) self.day_start_ts = 1440658800 self.day_end_ts = 1440745200 self.day_start_dt = esdldq.get_local_date(self.day_start_ts, "America/Los_Angeles") self.day_end_dt = esdldq.get_local_date(self.day_end_ts, "America/Los_Angeles") # If we don't delete the time components, we end up with the upper and # lower bounds = 0, which basically matches nothing. del self.day_start_dt['hour'] del self.day_end_dt['hour'] del self.day_start_dt['minute'] del self.day_end_dt['minute'] del self.day_start_dt['second'] del self.day_end_dt['second']
def getPublicData(): ids = request.json['phone_ids'] all_uuids = map(lambda id: UUID(id), ids) uuids = [uuid for uuid in all_uuids if uuid in estag.TEST_PHONE_IDS] from_ts = request.query.from_ts to_ts = request.query.to_ts time_range = estt.TimeQuery("metadata.write_ts", float(from_ts), float(to_ts)) time_query = time_range.get_query() user_queries = map(lambda id: {'user_id': id}, uuids) for q in user_queries: q.update(time_query) num_entries_ts = map(lambda q: edb.get_timeseries_db().find(q).count(), user_queries) num_entries_uc = map(lambda q: edb.get_usercache_db().find(q).count(), user_queries) total_entries = sum(num_entries_ts + num_entries_uc) logging.debug("Total entries requested: %d" % total_entries) threshold = 200000 if total_entries > threshold: data_list = None else: data_list = map(lambda u: esdc.find_entries(u, None, time_range), all_uuids) return {'phone_data': data_list}
def fix_stops_or_places(collection): tsdb = edb.get_timeseries_db() for entry in collection.find(): if 'enter_ts' in entry: enter_loc_entry = tsdb.find_one({ 'user_id': entry['user_id'], 'metadata.key': 'background/location', 'data.ts': entry['enter_ts'] }) else: logging.info("No entry timestamp found, skipping") if 'exit_ts' in entry: exit_loc_entry = tsdb.find_one({ 'user_id': entry['user_id'], 'metadata.key': 'background/location', 'data.ts': entry['exit_ts'] }) else: logging.info("No exit timestamp found, skipping") logging.debug("Found entries with metadata = %s, %s" % (enter_loc_entry['metadata']['time_zone'], exit_loc_entry['metadata']['time_zone'])) if 'enter_local_dt' in entry: entry['enter_local_dt'] = get_local_date( entry['enter_fmt_time'], enter_loc_entry['metadata']['time_zone']) if 'exit_local_dt' in entry: entry['exit_local_dt'] = get_local_date( entry['exit_fmt_time'], exit_loc_entry['metadata']['time_zone']) collection.save(entry)
def export_timeline(user_id_str, day_str, file_name): logging.info( "Extracting timeline for user %s day %s and saving to file %s" % (user_id_str, day_str, file)) # day_dt = pydt.datetime.strptime(day_str, "%Y-%m-%d").date() day_dt = pydt.datetime.strptime(day_str, "%Y-%m-%d") logging.debug("day_dt is %s" % day_dt) day_end_dt = day_dt + pydt.timedelta(days=1) # TODO: Convert to call to get_timeseries once we get that working # Or should we even do that? entry_list = list(edb.get_timeseries_db().find({ 'user_id': uuid.UUID(user_id_str), 'metadata.write_local_dt': { '$gt': day_dt, "$lt": day_end_dt } })) logging.info("Found %d entries" % len(entry_list)) json.dump(entry_list, open(file_name, "w"), default=bju.default, allow_nan=False, indent=4)
def setupRealExample(testObj, dump_file): logging.info("Before loading, timeseries db size = %s" % edb.get_timeseries_db().count()) with open(dump_file) as dfp: testObj.entries = json.load(dfp, object_hook = bju.object_hook) testObj.testUUID = uuid.uuid4() print("Setting up real example for %s" % testObj.testUUID) setupRealExampleWithEntries(testObj)
def fix_stops_or_places(collection): tsdb = edb.get_timeseries_db() for entry in collection.find(): if 'enter_ts' in entry: enter_loc_entry = tsdb.find_one({'user_id': entry['user_id'], 'metadata.key': 'background/location', 'data.ts': entry['enter_ts']}) else: logging.info("No entry timestamp found, skipping") if 'exit_ts' in entry: exit_loc_entry = tsdb.find_one({'user_id': entry['user_id'], 'metadata.key': 'background/location', 'data.ts': entry['exit_ts']}) else: logging.info("No exit timestamp found, skipping") logging.debug("Found entries with metadata = %s, %s" % (enter_loc_entry['metadata']['time_zone'], exit_loc_entry['metadata']['time_zone'])) if 'enter_local_dt' in entry: entry['enter_local_dt'] = get_local_date(entry['enter_fmt_time'], enter_loc_entry['metadata']['time_zone']) if 'exit_local_dt' in entry: entry['exit_local_dt'] = get_local_date(entry['exit_fmt_time'], exit_loc_entry['metadata']['time_zone']) collection.save(entry)
def export_timeline(user_id_str, day_str, file_name): logging.info( "Extracting timeline for user %s day %s and saving to file %s" % (user_id_str, day_str, file_name)) # day_dt = pydt.datetime.strptime(day_str, "%Y-%m-%d").date() day_dt = pydt.datetime.strptime(day_str, "%Y-%m-%d") logging.debug("day_dt is %s" % day_dt) # TODO: Convert to call to get_timeseries once we get that working # Or should we even do that? user_query = {'user_id': uuid.UUID(user_id_str)} date_query = { 'metadata.write_local_dt.year': day_dt.year, 'metadata.write_local_dt.month': day_dt.month, 'metadata.write_local_dt.day': day_dt.day } final_query = user_query final_query.update(date_query) entry_list = list(edb.get_timeseries_db().find(final_query)) logging.info("Found %d entries" % len(entry_list)) json.dump(entry_list, open(file_name, "w"), default=bju.default, allow_nan=False, indent=4)
def purge_entries_for_user(curr_uuid, is_purge_state, db_array=None): logging.info("For uuid = %s, deleting entries from the timeseries" % curr_uuid) if db_array is not None: [ts_db, ats_db, udb, psdb] = db_array logging.debug("db_array passed in with databases %s" % db_array) else: import emission.core.get_database as edb ts_db = edb.get_timeseries_db() ats_db = edb.get_analysis_timeseries_db() udb = edb.get_uuid_db() psdb = edb.get_pipeline_state_db() logging.debug("db_array not passed in, looking up databases") timeseries_del_result = ts_db.remove({"user_id": curr_uuid}) logging.info("result = %s" % timeseries_del_result) logging.info("For uuid = %s, deleting entries from the analysis_timeseries" % curr_uuid) analysis_timeseries_del_result = ats_db.remove({"user_id": curr_uuid}) logging.info("result = %s" % analysis_timeseries_del_result) logging.info("For uuid %s, deleting entries from the user_db" % curr_uuid) user_db_del_result = udb.remove({"uuid": curr_uuid}) logging.info("result = %s" % user_db_del_result) if is_purge_state: logging.info("For uuid %s, deleting entries from the pipeline_state_db" % curr_uuid) psdb_del_result = psdb.remove({"user_id": curr_uuid}) logging.info("result = %s" % psdb_del_result)
def __init__(self, user_id): super(BuiltinTimeSeries, self).__init__(user_id) self.key_query = lambda(key): {"metadata.key": key} self.type_query = lambda(entry_type): {"metadata.type": entry_type} self.user_query = {"user_id": self.user_id} # UUID is mandatory for this version self.timeseries_db = edb.get_timeseries_db() self.analysis_timeseries_db = edb.get_analysis_timeseries_db() self.ts_map = { "background/location": self.timeseries_db, "background/filtered_location": self.timeseries_db, "background/motion_activity": self.timeseries_db, "background/battery": self.timeseries_db, "statemachine/transition": self.timeseries_db, "config/sensor_config": self.timeseries_db, "segmentation/raw_trip": self.analysis_timeseries_db, "segmentation/raw_place": self.analysis_timeseries_db, "segmentation/raw_section": self.analysis_timeseries_db, "segmentation/raw_stop": self.analysis_timeseries_db, "analysis/smoothing": self.analysis_timeseries_db, "analysis/cleaned_trip": self.analysis_timeseries_db, "analysis/cleaned_place": self.analysis_timeseries_db, "analysis/cleaned_section": self.analysis_timeseries_db, "analysis/cleaned_stop": self.analysis_timeseries_db, "analysis/recreated_location": self.analysis_timeseries_db, }
def setupRealExample(testObj, dump_file): logging.info("Before loading, timeseries db size = %s" % edb.get_timeseries_db().count()) with open(dump_file) as dfp: testObj.entries = json.load(dfp, object_hook=bju.object_hook) testObj.testUUID = uuid.uuid4() setupRealExampleWithEntries(testObj)
def testMoveToLongTerm(self): # 5 mins of data, every 30 secs = 10 entries per entry type. There are # 3 entry types, so 30 entries # First all the entries are in the usercache self.assertEqual(len(self.uc1.getMessage()), 30) self.assertEqual(len(list(self.ts1.find_entries())), 0) self.assertEqual(len(self.uc2.getMessage()), 30) self.assertEqual(len(list(self.ts2.find_entries())), 0) self.assertEqual(len(self.ucios.getMessage()), 30) self.assertEqual(len(list(self.tsios.find_entries())), 0) # Then we move entries for user1 into longterm enuah.UserCacheHandler.getUserCacheHandler(self.testUserUUID1).moveToLongTerm() # So we end up with all user1 entries in longterm self.assertEqual(len(self.uc1.getMessage()), 0) self.assertEqual(len(list(self.ts1.find_entries())), 30) # Then, we move entries for the ios user into longterm enuah.UserCacheHandler.getUserCacheHandler(self.testUserUUIDios).moveToLongTerm() self.assertEqual(len(self.ucios.getMessage()), 0) self.assertEqual(len(list(self.tsios.find_entries())), 30) # 30 entries from android + 30 entries from ios = 60 self.assertEqual(edb.get_timeseries_db().find().count(), 60) self.assertEqual(edb.get_timeseries_error_db().find().count(), 0) # But all existing entries still in usercache for the second user self.assertEqual(len(self.uc2.getMessage()), 30) self.assertEqual(len(list(self.ts2.find_entries())), 0)
def __init__(self, user_id): super(BuiltinTimeSeries, self).__init__(user_id) self.key_query = lambda (key): {"metadata.key": key} self.type_query = lambda (entry_type): {"metadata.type": entry_type} self.user_query = { "user_id": self.user_id } # UUID is mandatory for this version self.timeseries_db = edb.get_timeseries_db() self.analysis_timeseries_db = edb.get_analysis_timeseries_db() self.ts_map = { "background/location": self.timeseries_db, "background/filtered_location": self.timeseries_db, "background/motion_activity": self.timeseries_db, "background/battery": self.timeseries_db, "statemachine/transition": self.timeseries_db, "config/sensor_config": self.timeseries_db, "segmentation/raw_trip": self.analysis_timeseries_db, "segmentation/raw_place": self.analysis_timeseries_db, "segmentation/raw_section": self.analysis_timeseries_db, "segmentation/raw_stop": self.analysis_timeseries_db, "analysis/smoothing": self.analysis_timeseries_db, "analysis/cleaned_trip": self.analysis_timeseries_db, "analysis/cleaned_place": self.analysis_timeseries_db, "analysis/cleaned_section": self.analysis_timeseries_db, "analysis/cleaned_stop": self.analysis_timeseries_db, "analysis/recreated_location": self.analysis_timeseries_db, }
def request_data(server_url, from_ts, to_ts, phone_ids, debug): url = server_url + "/eval/publicData/timeseries?from_ts=" + str(from_ts) + "&to_ts=" + str(to_ts) ids = {'phone_ids': phone_ids} headers = {'Content-Type': 'application/json'} r = requests.get(url, data=json.dumps(ids), headers = headers) dic = json.loads(r.text, object_hook = bju.object_hook) phone_list = dic['phone_data'] if phone_list == None: print "Requested amount of data exceeds the threshold value." else: # Load data to the local server tsdb = edb.get_timeseries_db() for index, entry_list in enumerate(phone_list): if debug: logging.debug("phone" + str(index+1) + " first entry (in Pacific Time):") if len(entry_list) == 0: logging.debug("...has no data...") else: logging.debug(str(entry_list[0].get('metadata').get('write_fmt_time'))) for entry in entry_list: tsdb.save(entry)
def setUp(self): self.clearRelatedDb() etc.setupRealExample(self, "emission/tests/data/real_examples/shankari_2015-aug-27") eaicf.filter_accuracy(self.testUUID) estfm.move_all_filters_to_data() logging.info("After loading, timeseries db size = %s" % edb.get_timeseries_db().count()) self.day_start_ts = 1440658800 self.day_end_ts = 1440745200
def fix_key(check_field, new_key): print("First entry for "+new_key+" is %s" % list(edb.get_timeseries_db().find( {"metadata.key": "config/sensor_config", check_field: {"$exists": True}}).sort( "metadata/write_ts").limit(1))) udb = edb.get_usercache_db() tdb = edb.get_timeseries_db() for i, entry in enumerate(edb.get_timeseries_db().find( {"metadata.key": "config/sensor_config", check_field: {"$exists": True}})): entry["metadata"]["key"] = new_key if i % 10000 == 0: print(udb.insert(entry)) print(tdb.remove(entry["_id"])) else: udb.insert(entry) tdb.remove(entry["_id"])
def get_data_df(self, key, time_query = None): sort_key = self._get_sort_key(time_query) logging.debug("curr_query = %s, sort_key = %s" % (self._get_query([key], time_query), sort_key)) result_it = edb.get_timeseries_db().find(self._get_query([key], time_query), {"data": True, "metadata.write_ts": True}).sort(sort_key, pymongo.ASCENDING) logging.debug("Found %s results" % result_it.count()) # Dataframe doesn't like to work off an iterator - it wants everything in memory return pd.DataFrame([BuiltinTimeSeries._to_df_entry(e) for e in list(result_it)])