def tearDown(self): edb.get_timeseries_db().remove({"user_id": self.androidUUID}) edb.get_timeseries_db().remove({"user_id": self.iosUUID}) edb.get_usercache_db().remove({"user_id": self.androidUUID}) edb.get_usercache_db().remove({"user_id": self.iosUUID}) edb.get_place_db().remove() edb.get_trip_new_db().remove()
def clearRelatedDb(self): edb.get_timeseries_db().remove() edb.get_place_db().remove() edb.get_stop_db().remove() edb.get_trip_new_db().remove() edb.get_section_new_db().remove()
def testSavePlace(self): new_place = esdp.create_new_place(self.testUserId) new_place.enter_ts = 5 esdp.save_place(new_place) self.assertEqual(edb.get_place_db().find({"enter_ts": 5}).count(), 1) self.assertEqual(edb.get_place_db().find_one({"enter_ts": 5})["_id"], new_place.get_id()) self.assertEqual(edb.get_place_db().find_one({"enter_ts": 5})["user_id"], self.testUserId)
def clearRelatedDb(self): edb.get_timeseries_db().remove({'user_id': self.testUUID}) edb.get_place_db().remove({'user_id': self.testUUID}) edb.get_stop_db().remove({'user_id': self.testUUID}) edb.get_trip_new_db().remove({'user_id': self.testUUID}) edb.get_section_new_db().remove({'user_id': self.testUUID})
def del_objects(args): del_query = {} if args.user_id != "all": del_query['user_id'] = uuid.UUID(args.user_id) if args.date is None: trip_query = del_query place_query = del_query else: day_dt = pydt.datetime.strptime(args.date, "%Y-%m-%d") logging.debug("day_dt is %s" % day_dt) day_ts = time.mktime(day_dt.timetuple()) logging.debug("day_ts is %s" % day_ts) trip_query = copy.copy(del_query) trip_query.update({"start_ts": {"$gt": day_ts}}) place_query = copy.copy(del_query) place_query.update({"exit_ts": {"$gt": day_ts}}) print "trip_query = %s" % trip_query print "place_query = %s" % place_query # Since sections have the same basic structure as trips and stops have the # same basic structure as places, we can reuse the queries print "Deleting trips for %s after %s" % (args.user_id, args.date) print edb.get_trip_new_db().remove(trip_query) print "Deleting sections for %s after %s" % (args.user_id, args.date) print edb.get_section_new_db().remove(trip_query) print "Deleting places for %s after %s" % (args.user_id, args.date) print edb.get_place_db().remove(place_query) print "Deleting stops for %s after %s" % (args.user_id, args.date) print edb.get_stop_db().remove(place_query)
def get_places(user_id, time_query): curr_query = _get_ts_query(time_query) curr_query.update({"user_id": user_id}) place_doc_cursor = edb.get_place_db().find(curr_query).sort(time_query.timeType, pymongo.ASCENDING) logging.debug("%d places found in database" % place_doc_cursor.count()) # TODO: Fix "TripIterator" and return it instead of this list return [ecwp.Place(doc) for doc in place_doc_cursor]
def set_up_trips(list_of_cluster_data, user_id): # Import in here to avoid recursive imports # TODO: This should really be moved to a separate class that creates the # entire graph at one time import emission.storage.decorations.common_place_queries as esdcpq clear_existing_trips(user_id) for dct in list_of_cluster_data: start_loc = gj.Point(dct['start_coords'].coordinate_list()) end_loc = gj.Point(dct['end_coords'].coordinate_list()) start_place_id = esdcpq.get_common_place_at_location( start_loc).get_id() end_place_id = esdcpq.get_common_place_at_location(end_loc).get_id() #print 'dct["sections"].trip_id %s is' % dct["sections"][0] probabilites = np.zeros((DAYS_IN_WEEK, HOURS_IN_DAY)) for sec in dct["sections"]: probabilites[get_day(sec), get_start_hour(sec)] += 1 trip = make_new_common_trip() trip.user_id = user_id trip.start_place = start_place_id trip.end_place = end_place_id trip.start_loc = start_loc trip.end_loc = end_loc trip.probabilites = probabilites trip.trips = [unc_trip.get_id() for unc_trip in dct["sections"]] place_db = edb.get_place_db() save_common_trip(trip)
def get_aggregate_places(time_query, box=None): curr_query = _get_ts_query(time_query) if box: curr_query.update({"location": {"$geoWithin": {"$box": box}}}) place_doc_cursor = edb.get_place_db().find(curr_query).sort( time_query.timeType, pymongo.ASCENDING) return [ecwp.Place(doc) for doc in place_doc_cursor]
def set_up_trips(list_of_cluster_data, user_id): # Import in here to avoid recursive imports # TODO: This should really be moved to a separate class that creates the # entire graph at one time import emission.storage.decorations.common_place_queries as esdcpq clear_existing_trips(user_id) for dct in list_of_cluster_data: start_loc = gj.Point(dct['start_coords'].coordinate_list()) end_loc = gj.Point(dct['end_coords'].coordinate_list()) start_place_id = esdcpq.get_common_place_at_location(start_loc).get_id() end_place_id = esdcpq.get_common_place_at_location(end_loc).get_id() #print 'dct["sections"].trip_id %s is' % dct["sections"][0] probabilites = np.zeros((DAYS_IN_WEEK, HOURS_IN_DAY)) for sec in dct["sections"]: probabilites[get_day(sec), get_start_hour(sec)] += 1 trip = make_new_common_trip() trip.user_id = user_id trip.start_place = start_place_id trip.end_place = end_place_id trip.start_loc = start_loc trip.end_loc = end_loc trip.probabilites = probabilites trip.trips = [unc_trip.get_id() for unc_trip in dct["sections"]] place_db = edb.get_place_db() save_common_trip(trip)
def get_places(user_id, time_query): curr_query = _get_ts_query(time_query) curr_query.update({"user_id": user_id}) place_doc_cursor = edb.get_place_db().find(curr_query).sort( time_query.timeType, pymongo.ASCENDING) logging.debug("%d places found in database" % place_doc_cursor.count()) # TODO: Fix "TripIterator" and return it instead of this list return [ecwp.Place(doc) for doc in place_doc_cursor]
def get_all_place_objs(common_place): trip.trips = [unc_trip.get_id() for unc_trip in dct["sections"]] place_db = edb.get_place_db() start_places = [] end_places = [] for t in trip.trips: start = place_db.find_one({"_id" : t.start_place}) end = place_db.find_one({"_id" : t.end_place}) start_places.append(start) end_places.append(end)
def get_all_place_objs(common_place): trip.trips = [unc_trip.get_id() for unc_trip in dct["sections"]] place_db = edb.get_place_db() start_places = [] end_places = [] for t in trip.trips: start = place_db.find_one({"_id": t.start_place}) end = place_db.find_one({"_id": t.end_place}) start_places.append(start) end_places.append(end)
def get_last_place(user_id): """ There are many ways to find the last place. One would be to find the one with the max enter_ts. But that is not performant because we would need to retrieve all the enter_ts and find their max, which is expensive. Instead, we use the property that we process data in chunks of trips, so the last place would have been created and entered but not exited. """ ret_place_doc = edb.get_place_db().find_one({"user_id": user_id, "exit_ts": {"$exists": False}}) logging.debug("last place doc = %s" % ret_place_doc) if ret_place_doc is None: return None ret_place = ecwp.Place(ret_place_doc) assert "exit_ts" not in ret_place assert "exit_fmt_time" not in ret_place assert "starting_trip" not in ret_place return ret_place
def get_last_place(user_id): """ There are many ways to find the last place. One would be to find the one with the max enter_ts. But that is not performant because we would need to retrieve all the enter_ts and find their max, which is expensive. Instead, we use the property that we process data in chunks of trips, so the last place would have been created and entered but not exited. """ ret_place_doc = edb.get_place_db().find_one({ 'user_id': user_id, 'exit_ts': { '$exists': False } }) logging.debug("last place doc = %s" % ret_place_doc) if ret_place_doc is None: return None ret_place = ecwp.Place(ret_place_doc) assert ('exit_ts' not in ret_place) assert ('exit_fmt_time' not in ret_place) assert ('starting_trip' not in ret_place) return ret_place
logging.info("About to convert %s entries" % result_cursor.count()) for i, wrapper in enumerate(result_cursor): entry = convert_wrapper_to_entry(key, wrapper) if entry.get_id() != wrapper["_id"]: logging.warn("entry.id = %s, wrapper.id = %s" % (entry.get_id(), wrapper["_id"])) if i % 10000 == 0: print "converted %s -> %s" % (wrapper, entry) edb.get_timeseries_db().insert(entry) collection.remove(wrapper) def move_ts_entries(key): tdb = edb.get_timeseries_db() atdb = edb.get_analysis_timeseries_db() result_cursor = tdb.find({'metadata.key': key}) logging.info("About to convert %s entries" % result_cursor.count()) for i, entry_doc in enumerate(result_cursor): if i % 10000 == 0: print "moved %s from one ts to the other" % (entry_doc) atdb.insert(entry_doc) tdb.remove(entry_doc) if __name__ == '__main__': # No arguments - muahahahaha. Just going to copy known fields over. convert_collection(edb.get_trip_new_db(), "segmentation/raw_trip") convert_collection(edb.get_place_db(), "segmentation/raw_place") convert_collection(edb.get_section_new_db(), "segmentation/raw_section") convert_collection(edb.get_stop_db(), "segmentation/raw_stop") move_ts_entries("analysis/smoothing")
def create_new_place(user_id): _id = edb.get_place_db().save({'user_id': user_id}) logging.debug("Created new place %s for user %s" % (_id, user_id)) return ecwp.Place({"_id": _id, 'user_id': user_id})
def save_place(place): edb.get_place_db().save(place)
else: logging.warning("No exit timestamp found, skipping") collection.save(entry) if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument( "key", help="the key representing the stream that we want to fix") parser.add_argument( "-f", "--filename", help= "a saved timeline whose local_dt needs to be fixed. If this is specified, key is ignored" ) args = parser.parse_args() if args.filename is not None: fix_file(args.filename) elif args.key == "trips": fix_trips_or_sections(edb.get_trip_new_db()) elif args.key == "sections": fix_trips_or_sections(edb.get_section_new_db()) elif args.key == "places": fix_stops_or_places(edb.get_place_db()) elif args.key == "stops": fix_stops_or_places(edb.get_stop_db()) else: fix_timeseries(args.key)
def get_place(place_id): return ecwp.Place(edb.get_place_db().find_one({"_id": place_id}))
else: exit_tz = "America/Los_Angeles" logging.debug("exit metadata timezone = %s" % exit_tz) entry['exit_local_dt'] = get_local_date(entry['exit_fmt_time'], exit_tz) else: logging.warning("No exit timestamp found, skipping") collection.save(entry) if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument("key", help="the key representing the stream that we want to fix") parser.add_argument("-f", "--filename", help="a saved timeline whose local_dt needs to be fixed. If this is specified, key is ignored") args = parser.parse_args() if args.filename is not None: fix_file(args.filename) elif args.key == "trips": fix_trips_or_sections(edb.get_trip_new_db()) elif args.key == "sections": fix_trips_or_sections(edb.get_section_new_db()) elif args.key == "places": fix_stops_or_places(edb.get_place_db()) elif args.key == "stops": fix_stops_or_places(edb.get_stop_db()) else: fix_timeseries(args.key)
def setUp(self): self.testUserId = uuid.uuid4() edb.get_place_db().remove()
def get_aggregate_places(time_query, box=None): curr_query = _get_ts_query(time_query) if box: curr_query.update({"location": {"$geoWithin": {"$box": box}}}) place_doc_cursor = edb.get_place_db().find(curr_query).sort(time_query.timeType, pymongo.ASCENDING) return [ecwp.Place(doc) for doc in place_doc_cursor]